diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,108534 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1219932390859013, + "eval_steps": 500, + "global_step": 155000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 7.238666058618717e-05, + "grad_norm": 0.5545526742935181, + "learning_rate": 4.999934852005473e-06, + "loss": 1.5857, + "step": 10 + }, + { + "epoch": 0.00014477332117237434, + "grad_norm": 0.44032129645347595, + "learning_rate": 4.999862465344887e-06, + "loss": 1.5257, + "step": 20 + }, + { + "epoch": 0.00021715998175856154, + "grad_norm": 0.40077871084213257, + "learning_rate": 4.999790078684301e-06, + "loss": 1.4955, + "step": 30 + }, + { + "epoch": 0.0002895466423447487, + "grad_norm": 0.3221718966960907, + "learning_rate": 4.999717692023714e-06, + "loss": 1.4903, + "step": 40 + }, + { + "epoch": 0.00036193330293093586, + "grad_norm": 0.3151167631149292, + "learning_rate": 4.999645305363128e-06, + "loss": 1.4603, + "step": 50 + }, + { + "epoch": 0.0004343199635171231, + "grad_norm": 0.27057284116744995, + "learning_rate": 4.9995729187025424e-06, + "loss": 1.4502, + "step": 60 + }, + { + "epoch": 0.0005067066241033103, + "grad_norm": 0.3064720928668976, + "learning_rate": 4.999500532041956e-06, + "loss": 1.4473, + "step": 70 + }, + { + "epoch": 0.0005790932846894974, + "grad_norm": 0.2519373595714569, + "learning_rate": 4.99942814538137e-06, + "loss": 1.4421, + "step": 80 + }, + { + "epoch": 0.0006514799452756846, + "grad_norm": 0.23665937781333923, + "learning_rate": 4.999355758720783e-06, + "loss": 1.4143, + "step": 90 + }, + { + "epoch": 0.0007238666058618717, + "grad_norm": 0.26254650950431824, + "learning_rate": 4.999283372060198e-06, + "loss": 1.4186, + "step": 100 + }, + { + "epoch": 0.000796253266448059, + "grad_norm": 0.21838980913162231, + "learning_rate": 4.9992109853996105e-06, + "loss": 1.3901, + "step": 110 + }, + { + "epoch": 0.0008686399270342462, + "grad_norm": 0.22787261009216309, + "learning_rate": 4.999138598739024e-06, + "loss": 1.393, + "step": 120 + }, + { + "epoch": 0.0009410265876204333, + "grad_norm": 0.24213093519210815, + "learning_rate": 4.999066212078439e-06, + "loss": 1.3777, + "step": 130 + }, + { + "epoch": 0.0010134132482066205, + "grad_norm": 0.31372249126434326, + "learning_rate": 4.998993825417852e-06, + "loss": 1.3858, + "step": 140 + }, + { + "epoch": 0.0010857999087928076, + "grad_norm": 0.23661810159683228, + "learning_rate": 4.998921438757266e-06, + "loss": 1.3904, + "step": 150 + }, + { + "epoch": 0.0011581865693789948, + "grad_norm": 0.245035320520401, + "learning_rate": 4.9988490520966794e-06, + "loss": 1.3719, + "step": 160 + }, + { + "epoch": 0.001230573229965182, + "grad_norm": 0.2323773056268692, + "learning_rate": 4.998776665436094e-06, + "loss": 1.3662, + "step": 170 + }, + { + "epoch": 0.0013029598905513692, + "grad_norm": 0.20842741429805756, + "learning_rate": 4.9987042787755075e-06, + "loss": 1.3549, + "step": 180 + }, + { + "epoch": 0.0013753465511375563, + "grad_norm": 0.2077568769454956, + "learning_rate": 4.998631892114921e-06, + "loss": 1.351, + "step": 190 + }, + { + "epoch": 0.0014477332117237434, + "grad_norm": 0.2298651784658432, + "learning_rate": 4.998559505454335e-06, + "loss": 1.3661, + "step": 200 + }, + { + "epoch": 0.0015201198723099308, + "grad_norm": 0.23133844137191772, + "learning_rate": 4.998487118793749e-06, + "loss": 1.349, + "step": 210 + }, + { + "epoch": 0.001592506532896118, + "grad_norm": 0.2279757559299469, + "learning_rate": 4.998414732133163e-06, + "loss": 1.3391, + "step": 220 + }, + { + "epoch": 0.001664893193482305, + "grad_norm": 0.22140687704086304, + "learning_rate": 4.9983423454725765e-06, + "loss": 1.3491, + "step": 230 + }, + { + "epoch": 0.0017372798540684924, + "grad_norm": 0.2116747796535492, + "learning_rate": 4.99826995881199e-06, + "loss": 1.3488, + "step": 240 + }, + { + "epoch": 0.0018096665146546795, + "grad_norm": 0.2300599366426468, + "learning_rate": 4.9981975721514045e-06, + "loss": 1.3383, + "step": 250 + }, + { + "epoch": 0.0018820531752408666, + "grad_norm": 0.2189347743988037, + "learning_rate": 4.998125185490818e-06, + "loss": 1.3214, + "step": 260 + }, + { + "epoch": 0.001954439835827054, + "grad_norm": 0.2163819819688797, + "learning_rate": 4.998052798830232e-06, + "loss": 1.331, + "step": 270 + }, + { + "epoch": 0.002026826496413241, + "grad_norm": 0.24370183050632477, + "learning_rate": 4.997980412169645e-06, + "loss": 1.3307, + "step": 280 + }, + { + "epoch": 0.002099213156999428, + "grad_norm": 0.21121959388256073, + "learning_rate": 4.99790802550906e-06, + "loss": 1.326, + "step": 290 + }, + { + "epoch": 0.0021715998175856153, + "grad_norm": 0.2269212156534195, + "learning_rate": 4.9978356388484735e-06, + "loss": 1.3192, + "step": 300 + }, + { + "epoch": 0.0022439864781718024, + "grad_norm": 0.19639967381954193, + "learning_rate": 4.997763252187887e-06, + "loss": 1.3322, + "step": 310 + }, + { + "epoch": 0.0023163731387579895, + "grad_norm": 0.2111475169658661, + "learning_rate": 4.997690865527301e-06, + "loss": 1.3282, + "step": 320 + }, + { + "epoch": 0.002388759799344177, + "grad_norm": 0.21942846477031708, + "learning_rate": 4.997618478866715e-06, + "loss": 1.3358, + "step": 330 + }, + { + "epoch": 0.002461146459930364, + "grad_norm": 0.24269279837608337, + "learning_rate": 4.997546092206129e-06, + "loss": 1.2974, + "step": 340 + }, + { + "epoch": 0.0025335331205165513, + "grad_norm": 0.2175322324037552, + "learning_rate": 4.997473705545542e-06, + "loss": 1.3239, + "step": 350 + }, + { + "epoch": 0.0026059197811027384, + "grad_norm": 0.2417760044336319, + "learning_rate": 4.997401318884956e-06, + "loss": 1.3016, + "step": 360 + }, + { + "epoch": 0.0026783064416889255, + "grad_norm": 0.21008457243442535, + "learning_rate": 4.9973289322243705e-06, + "loss": 1.3162, + "step": 370 + }, + { + "epoch": 0.0027506931022751127, + "grad_norm": 0.21002964675426483, + "learning_rate": 4.997256545563784e-06, + "loss": 1.2806, + "step": 380 + }, + { + "epoch": 0.0028230797628612998, + "grad_norm": 0.2070387601852417, + "learning_rate": 4.997184158903198e-06, + "loss": 1.3012, + "step": 390 + }, + { + "epoch": 0.002895466423447487, + "grad_norm": 0.22568874061107635, + "learning_rate": 4.997111772242611e-06, + "loss": 1.2917, + "step": 400 + }, + { + "epoch": 0.0029678530840336744, + "grad_norm": 0.21147729456424713, + "learning_rate": 4.997039385582026e-06, + "loss": 1.3087, + "step": 410 + }, + { + "epoch": 0.0030402397446198616, + "grad_norm": 0.2232261747121811, + "learning_rate": 4.996966998921439e-06, + "loss": 1.3038, + "step": 420 + }, + { + "epoch": 0.0031126264052060487, + "grad_norm": 0.2272012084722519, + "learning_rate": 4.996894612260853e-06, + "loss": 1.2891, + "step": 430 + }, + { + "epoch": 0.003185013065792236, + "grad_norm": 0.20684392750263214, + "learning_rate": 4.996822225600267e-06, + "loss": 1.2942, + "step": 440 + }, + { + "epoch": 0.003257399726378423, + "grad_norm": 0.20101316273212433, + "learning_rate": 4.996749838939681e-06, + "loss": 1.2889, + "step": 450 + }, + { + "epoch": 0.00332978638696461, + "grad_norm": 0.22801408171653748, + "learning_rate": 4.996677452279095e-06, + "loss": 1.285, + "step": 460 + }, + { + "epoch": 0.003402173047550797, + "grad_norm": 0.222897469997406, + "learning_rate": 4.996605065618508e-06, + "loss": 1.2837, + "step": 470 + }, + { + "epoch": 0.0034745597081369847, + "grad_norm": 0.2286527454853058, + "learning_rate": 4.996532678957922e-06, + "loss": 1.2894, + "step": 480 + }, + { + "epoch": 0.003546946368723172, + "grad_norm": 0.23111595213413239, + "learning_rate": 4.9964602922973356e-06, + "loss": 1.2842, + "step": 490 + }, + { + "epoch": 0.003619333029309359, + "grad_norm": 0.20559774339199066, + "learning_rate": 4.99638790563675e-06, + "loss": 1.2789, + "step": 500 + }, + { + "epoch": 0.003691719689895546, + "grad_norm": 0.2076191008090973, + "learning_rate": 4.996315518976164e-06, + "loss": 1.2871, + "step": 510 + }, + { + "epoch": 0.003764106350481733, + "grad_norm": 0.1894315779209137, + "learning_rate": 4.996243132315577e-06, + "loss": 1.2697, + "step": 520 + }, + { + "epoch": 0.0038364930110679203, + "grad_norm": 0.21035927534103394, + "learning_rate": 4.996170745654991e-06, + "loss": 1.2744, + "step": 530 + }, + { + "epoch": 0.003908879671654108, + "grad_norm": 0.22824759781360626, + "learning_rate": 4.996098358994405e-06, + "loss": 1.2881, + "step": 540 + }, + { + "epoch": 0.003981266332240295, + "grad_norm": 0.21655461192131042, + "learning_rate": 4.996025972333819e-06, + "loss": 1.273, + "step": 550 + }, + { + "epoch": 0.004053652992826482, + "grad_norm": 0.2505953907966614, + "learning_rate": 4.9959535856732326e-06, + "loss": 1.2879, + "step": 560 + }, + { + "epoch": 0.004126039653412669, + "grad_norm": 0.21900534629821777, + "learning_rate": 4.995881199012646e-06, + "loss": 1.2763, + "step": 570 + }, + { + "epoch": 0.004198426313998856, + "grad_norm": 0.21123918890953064, + "learning_rate": 4.995808812352061e-06, + "loss": 1.2833, + "step": 580 + }, + { + "epoch": 0.0042708129745850434, + "grad_norm": 0.20754094421863556, + "learning_rate": 4.995736425691474e-06, + "loss": 1.2692, + "step": 590 + }, + { + "epoch": 0.0043431996351712306, + "grad_norm": 0.23484139144420624, + "learning_rate": 4.995664039030888e-06, + "loss": 1.2596, + "step": 600 + }, + { + "epoch": 0.004415586295757418, + "grad_norm": 0.2571958303451538, + "learning_rate": 4.9955916523703015e-06, + "loss": 1.2714, + "step": 610 + }, + { + "epoch": 0.004487972956343605, + "grad_norm": 0.21375253796577454, + "learning_rate": 4.995519265709716e-06, + "loss": 1.2718, + "step": 620 + }, + { + "epoch": 0.004560359616929792, + "grad_norm": 0.20837347209453583, + "learning_rate": 4.9954468790491296e-06, + "loss": 1.2555, + "step": 630 + }, + { + "epoch": 0.004632746277515979, + "grad_norm": 0.21926173567771912, + "learning_rate": 4.995374492388543e-06, + "loss": 1.2565, + "step": 640 + }, + { + "epoch": 0.004705132938102166, + "grad_norm": 0.21038663387298584, + "learning_rate": 4.995302105727957e-06, + "loss": 1.2713, + "step": 650 + }, + { + "epoch": 0.004777519598688354, + "grad_norm": 0.2313099056482315, + "learning_rate": 4.99522971906737e-06, + "loss": 1.2441, + "step": 660 + }, + { + "epoch": 0.004849906259274541, + "grad_norm": 0.2143174558877945, + "learning_rate": 4.995157332406784e-06, + "loss": 1.2497, + "step": 670 + }, + { + "epoch": 0.004922292919860728, + "grad_norm": 0.21607981622219086, + "learning_rate": 4.995084945746198e-06, + "loss": 1.2455, + "step": 680 + }, + { + "epoch": 0.0049946795804469155, + "grad_norm": 0.2096307873725891, + "learning_rate": 4.995012559085612e-06, + "loss": 1.2614, + "step": 690 + }, + { + "epoch": 0.005067066241033103, + "grad_norm": 0.20711645483970642, + "learning_rate": 4.994940172425026e-06, + "loss": 1.2529, + "step": 700 + }, + { + "epoch": 0.00513945290161929, + "grad_norm": 0.2200125753879547, + "learning_rate": 4.994867785764439e-06, + "loss": 1.2626, + "step": 710 + }, + { + "epoch": 0.005211839562205477, + "grad_norm": 0.24058933556079865, + "learning_rate": 4.994795399103853e-06, + "loss": 1.2615, + "step": 720 + }, + { + "epoch": 0.005284226222791664, + "grad_norm": 0.21051721274852753, + "learning_rate": 4.9947230124432674e-06, + "loss": 1.2608, + "step": 730 + }, + { + "epoch": 0.005356612883377851, + "grad_norm": 0.18748053908348083, + "learning_rate": 4.994650625782681e-06, + "loss": 1.2579, + "step": 740 + }, + { + "epoch": 0.005428999543964038, + "grad_norm": 0.4778112769126892, + "learning_rate": 4.994578239122095e-06, + "loss": 1.2514, + "step": 750 + }, + { + "epoch": 0.005501386204550225, + "grad_norm": 0.22301283478736877, + "learning_rate": 4.994505852461508e-06, + "loss": 1.2391, + "step": 760 + }, + { + "epoch": 0.0055737728651364124, + "grad_norm": 0.21762309968471527, + "learning_rate": 4.994433465800923e-06, + "loss": 1.2562, + "step": 770 + }, + { + "epoch": 0.0056461595257225996, + "grad_norm": 0.216139554977417, + "learning_rate": 4.994361079140336e-06, + "loss": 1.2587, + "step": 780 + }, + { + "epoch": 0.005718546186308787, + "grad_norm": 0.19104599952697754, + "learning_rate": 4.99428869247975e-06, + "loss": 1.2622, + "step": 790 + }, + { + "epoch": 0.005790932846894974, + "grad_norm": 0.205087810754776, + "learning_rate": 4.994216305819164e-06, + "loss": 1.2465, + "step": 800 + }, + { + "epoch": 0.005863319507481162, + "grad_norm": 0.20174795389175415, + "learning_rate": 4.994143919158578e-06, + "loss": 1.2285, + "step": 810 + }, + { + "epoch": 0.005935706168067349, + "grad_norm": 0.1900462955236435, + "learning_rate": 4.994071532497992e-06, + "loss": 1.2433, + "step": 820 + }, + { + "epoch": 0.006008092828653536, + "grad_norm": 0.21302379667758942, + "learning_rate": 4.993999145837405e-06, + "loss": 1.2355, + "step": 830 + }, + { + "epoch": 0.006080479489239723, + "grad_norm": 0.19449925422668457, + "learning_rate": 4.993926759176819e-06, + "loss": 1.2538, + "step": 840 + }, + { + "epoch": 0.00615286614982591, + "grad_norm": 0.2037796676158905, + "learning_rate": 4.993854372516233e-06, + "loss": 1.2643, + "step": 850 + }, + { + "epoch": 0.006225252810412097, + "grad_norm": 0.22023706138134003, + "learning_rate": 4.993781985855647e-06, + "loss": 1.2358, + "step": 860 + }, + { + "epoch": 0.0062976394709982845, + "grad_norm": 0.1888815462589264, + "learning_rate": 4.993709599195061e-06, + "loss": 1.2499, + "step": 870 + }, + { + "epoch": 0.006370026131584472, + "grad_norm": 0.18520642817020416, + "learning_rate": 4.993637212534474e-06, + "loss": 1.2459, + "step": 880 + }, + { + "epoch": 0.006442412792170659, + "grad_norm": 0.1910271942615509, + "learning_rate": 4.993564825873889e-06, + "loss": 1.2406, + "step": 890 + }, + { + "epoch": 0.006514799452756846, + "grad_norm": 0.1944640874862671, + "learning_rate": 4.993492439213302e-06, + "loss": 1.2331, + "step": 900 + }, + { + "epoch": 0.006587186113343033, + "grad_norm": 0.19614140689373016, + "learning_rate": 4.993420052552716e-06, + "loss": 1.2319, + "step": 910 + }, + { + "epoch": 0.00665957277392922, + "grad_norm": 0.20917336642742157, + "learning_rate": 4.9933476658921295e-06, + "loss": 1.2433, + "step": 920 + }, + { + "epoch": 0.006731959434515407, + "grad_norm": 0.18660324811935425, + "learning_rate": 4.993275279231544e-06, + "loss": 1.2266, + "step": 930 + }, + { + "epoch": 0.006804346095101594, + "grad_norm": 0.20741026103496552, + "learning_rate": 4.993202892570958e-06, + "loss": 1.2376, + "step": 940 + }, + { + "epoch": 0.006876732755687781, + "grad_norm": 0.20696569979190826, + "learning_rate": 4.993130505910371e-06, + "loss": 1.2479, + "step": 950 + }, + { + "epoch": 0.006949119416273969, + "grad_norm": 0.19880063831806183, + "learning_rate": 4.993058119249785e-06, + "loss": 1.246, + "step": 960 + }, + { + "epoch": 0.0070215060768601565, + "grad_norm": 0.21153710782527924, + "learning_rate": 4.992985732589199e-06, + "loss": 1.2399, + "step": 970 + }, + { + "epoch": 0.007093892737446344, + "grad_norm": 0.22593656182289124, + "learning_rate": 4.992913345928613e-06, + "loss": 1.2423, + "step": 980 + }, + { + "epoch": 0.007166279398032531, + "grad_norm": 0.22546051442623138, + "learning_rate": 4.9928409592680265e-06, + "loss": 1.2099, + "step": 990 + }, + { + "epoch": 0.007238666058618718, + "grad_norm": 0.2021579146385193, + "learning_rate": 4.99276857260744e-06, + "loss": 1.2264, + "step": 1000 + }, + { + "epoch": 0.007311052719204905, + "grad_norm": 0.2759735584259033, + "learning_rate": 4.992696185946855e-06, + "loss": 1.2178, + "step": 1010 + }, + { + "epoch": 0.007383439379791092, + "grad_norm": 0.20149949193000793, + "learning_rate": 4.992623799286268e-06, + "loss": 1.2289, + "step": 1020 + }, + { + "epoch": 0.007455826040377279, + "grad_norm": 0.1932690441608429, + "learning_rate": 4.992551412625682e-06, + "loss": 1.2506, + "step": 1030 + }, + { + "epoch": 0.007528212700963466, + "grad_norm": 0.2068718671798706, + "learning_rate": 4.9924790259650955e-06, + "loss": 1.215, + "step": 1040 + }, + { + "epoch": 0.0076005993615496535, + "grad_norm": 0.22313687205314636, + "learning_rate": 4.99240663930451e-06, + "loss": 1.2356, + "step": 1050 + }, + { + "epoch": 0.007672986022135841, + "grad_norm": 0.2034347951412201, + "learning_rate": 4.9923342526439235e-06, + "loss": 1.2195, + "step": 1060 + }, + { + "epoch": 0.007745372682722028, + "grad_norm": 0.2164289355278015, + "learning_rate": 4.992261865983337e-06, + "loss": 1.2069, + "step": 1070 + }, + { + "epoch": 0.007817759343308216, + "grad_norm": 0.20517635345458984, + "learning_rate": 4.992189479322751e-06, + "loss": 1.2181, + "step": 1080 + }, + { + "epoch": 0.007890146003894403, + "grad_norm": 0.19486500322818756, + "learning_rate": 4.992117092662165e-06, + "loss": 1.229, + "step": 1090 + }, + { + "epoch": 0.00796253266448059, + "grad_norm": 0.20392891764640808, + "learning_rate": 4.992044706001579e-06, + "loss": 1.2325, + "step": 1100 + }, + { + "epoch": 0.008034919325066777, + "grad_norm": 0.19018247723579407, + "learning_rate": 4.9919723193409925e-06, + "loss": 1.2242, + "step": 1110 + }, + { + "epoch": 0.008107305985652964, + "grad_norm": 0.2185811698436737, + "learning_rate": 4.991899932680406e-06, + "loss": 1.2287, + "step": 1120 + }, + { + "epoch": 0.008179692646239151, + "grad_norm": 0.20160779356956482, + "learning_rate": 4.99182754601982e-06, + "loss": 1.2131, + "step": 1130 + }, + { + "epoch": 0.008252079306825338, + "grad_norm": 0.19665688276290894, + "learning_rate": 4.991755159359234e-06, + "loss": 1.219, + "step": 1140 + }, + { + "epoch": 0.008324465967411526, + "grad_norm": 0.19984516501426697, + "learning_rate": 4.991682772698648e-06, + "loss": 1.241, + "step": 1150 + }, + { + "epoch": 0.008396852627997713, + "grad_norm": 0.19308070838451385, + "learning_rate": 4.991610386038061e-06, + "loss": 1.213, + "step": 1160 + }, + { + "epoch": 0.0084692392885839, + "grad_norm": 0.2210283875465393, + "learning_rate": 4.991537999377475e-06, + "loss": 1.2214, + "step": 1170 + }, + { + "epoch": 0.008541625949170087, + "grad_norm": 0.19817933440208435, + "learning_rate": 4.991465612716889e-06, + "loss": 1.2446, + "step": 1180 + }, + { + "epoch": 0.008614012609756274, + "grad_norm": 0.198905810713768, + "learning_rate": 4.991393226056302e-06, + "loss": 1.2252, + "step": 1190 + }, + { + "epoch": 0.008686399270342461, + "grad_norm": 0.21226166188716888, + "learning_rate": 4.991320839395717e-06, + "loss": 1.2189, + "step": 1200 + }, + { + "epoch": 0.008758785930928648, + "grad_norm": 0.20412813127040863, + "learning_rate": 4.99124845273513e-06, + "loss": 1.2299, + "step": 1210 + }, + { + "epoch": 0.008831172591514835, + "grad_norm": 0.18132169544696808, + "learning_rate": 4.991176066074544e-06, + "loss": 1.2236, + "step": 1220 + }, + { + "epoch": 0.008903559252101022, + "grad_norm": 0.2028530240058899, + "learning_rate": 4.9911036794139576e-06, + "loss": 1.2205, + "step": 1230 + }, + { + "epoch": 0.00897594591268721, + "grad_norm": 0.21371830999851227, + "learning_rate": 4.991031292753372e-06, + "loss": 1.213, + "step": 1240 + }, + { + "epoch": 0.009048332573273397, + "grad_norm": 0.20845739543437958, + "learning_rate": 4.990958906092786e-06, + "loss": 1.2153, + "step": 1250 + }, + { + "epoch": 0.009120719233859584, + "grad_norm": 0.19972443580627441, + "learning_rate": 4.990886519432199e-06, + "loss": 1.2211, + "step": 1260 + }, + { + "epoch": 0.009193105894445771, + "grad_norm": 0.21171468496322632, + "learning_rate": 4.990814132771613e-06, + "loss": 1.2159, + "step": 1270 + }, + { + "epoch": 0.009265492555031958, + "grad_norm": 0.18781454861164093, + "learning_rate": 4.9907417461110265e-06, + "loss": 1.2305, + "step": 1280 + }, + { + "epoch": 0.009337879215618145, + "grad_norm": 0.20194801688194275, + "learning_rate": 4.990669359450441e-06, + "loss": 1.2119, + "step": 1290 + }, + { + "epoch": 0.009410265876204332, + "grad_norm": 0.20740105211734772, + "learning_rate": 4.9905969727898546e-06, + "loss": 1.2171, + "step": 1300 + }, + { + "epoch": 0.00948265253679052, + "grad_norm": 0.19401483237743378, + "learning_rate": 4.990524586129268e-06, + "loss": 1.2198, + "step": 1310 + }, + { + "epoch": 0.009555039197376708, + "grad_norm": 0.21922868490219116, + "learning_rate": 4.990452199468682e-06, + "loss": 1.2135, + "step": 1320 + }, + { + "epoch": 0.009627425857962895, + "grad_norm": 0.19779783487319946, + "learning_rate": 4.990379812808096e-06, + "loss": 1.2111, + "step": 1330 + }, + { + "epoch": 0.009699812518549083, + "grad_norm": 0.23003704845905304, + "learning_rate": 4.99030742614751e-06, + "loss": 1.2131, + "step": 1340 + }, + { + "epoch": 0.00977219917913527, + "grad_norm": 0.2075931429862976, + "learning_rate": 4.9902350394869235e-06, + "loss": 1.2128, + "step": 1350 + }, + { + "epoch": 0.009844585839721457, + "grad_norm": 0.19751273095607758, + "learning_rate": 4.990162652826337e-06, + "loss": 1.199, + "step": 1360 + }, + { + "epoch": 0.009916972500307644, + "grad_norm": 0.20674757659435272, + "learning_rate": 4.9900902661657516e-06, + "loss": 1.2362, + "step": 1370 + }, + { + "epoch": 0.009989359160893831, + "grad_norm": 0.19264833629131317, + "learning_rate": 4.990017879505165e-06, + "loss": 1.229, + "step": 1380 + }, + { + "epoch": 0.010061745821480018, + "grad_norm": 0.19390186667442322, + "learning_rate": 4.989945492844579e-06, + "loss": 1.2308, + "step": 1390 + }, + { + "epoch": 0.010134132482066205, + "grad_norm": 0.20780542492866516, + "learning_rate": 4.989873106183992e-06, + "loss": 1.1996, + "step": 1400 + }, + { + "epoch": 0.010206519142652392, + "grad_norm": 0.20205456018447876, + "learning_rate": 4.989800719523407e-06, + "loss": 1.2062, + "step": 1410 + }, + { + "epoch": 0.01027890580323858, + "grad_norm": 0.2114095389842987, + "learning_rate": 4.9897283328628205e-06, + "loss": 1.2147, + "step": 1420 + }, + { + "epoch": 0.010351292463824767, + "grad_norm": 0.2318940907716751, + "learning_rate": 4.989655946202234e-06, + "loss": 1.2316, + "step": 1430 + }, + { + "epoch": 0.010423679124410954, + "grad_norm": 0.20557528734207153, + "learning_rate": 4.989583559541648e-06, + "loss": 1.2094, + "step": 1440 + }, + { + "epoch": 0.01049606578499714, + "grad_norm": 0.23943296074867249, + "learning_rate": 4.989511172881062e-06, + "loss": 1.2044, + "step": 1450 + }, + { + "epoch": 0.010568452445583328, + "grad_norm": 0.19559936225414276, + "learning_rate": 4.989438786220476e-06, + "loss": 1.2108, + "step": 1460 + }, + { + "epoch": 0.010640839106169515, + "grad_norm": 0.22766897082328796, + "learning_rate": 4.9893663995598894e-06, + "loss": 1.2156, + "step": 1470 + }, + { + "epoch": 0.010713225766755702, + "grad_norm": 0.1910102516412735, + "learning_rate": 4.989294012899303e-06, + "loss": 1.2147, + "step": 1480 + }, + { + "epoch": 0.01078561242734189, + "grad_norm": 0.20596715807914734, + "learning_rate": 4.9892216262387175e-06, + "loss": 1.2146, + "step": 1490 + }, + { + "epoch": 0.010857999087928076, + "grad_norm": 0.21965187788009644, + "learning_rate": 4.989149239578131e-06, + "loss": 1.2115, + "step": 1500 + }, + { + "epoch": 0.010930385748514264, + "grad_norm": 0.2052324265241623, + "learning_rate": 4.989076852917545e-06, + "loss": 1.2269, + "step": 1510 + }, + { + "epoch": 0.01100277240910045, + "grad_norm": 0.19294176995754242, + "learning_rate": 4.989004466256958e-06, + "loss": 1.2152, + "step": 1520 + }, + { + "epoch": 0.011075159069686638, + "grad_norm": 0.20509877800941467, + "learning_rate": 4.988932079596373e-06, + "loss": 1.2036, + "step": 1530 + }, + { + "epoch": 0.011147545730272825, + "grad_norm": 0.20573221147060394, + "learning_rate": 4.9888596929357864e-06, + "loss": 1.2033, + "step": 1540 + }, + { + "epoch": 0.011219932390859012, + "grad_norm": 0.1944323480129242, + "learning_rate": 4.9887873062752e-06, + "loss": 1.2002, + "step": 1550 + }, + { + "epoch": 0.011292319051445199, + "grad_norm": 0.20238761603832245, + "learning_rate": 4.988714919614614e-06, + "loss": 1.1859, + "step": 1560 + }, + { + "epoch": 0.011364705712031386, + "grad_norm": 0.21487738192081451, + "learning_rate": 4.988642532954028e-06, + "loss": 1.1908, + "step": 1570 + }, + { + "epoch": 0.011437092372617573, + "grad_norm": 0.19267559051513672, + "learning_rate": 4.988570146293442e-06, + "loss": 1.2108, + "step": 1580 + }, + { + "epoch": 0.01150947903320376, + "grad_norm": 0.21015968918800354, + "learning_rate": 4.988497759632855e-06, + "loss": 1.1977, + "step": 1590 + }, + { + "epoch": 0.011581865693789948, + "grad_norm": 0.1899135857820511, + "learning_rate": 4.988425372972269e-06, + "loss": 1.1996, + "step": 1600 + }, + { + "epoch": 0.011654252354376135, + "grad_norm": 0.21136519312858582, + "learning_rate": 4.9883529863116834e-06, + "loss": 1.2074, + "step": 1610 + }, + { + "epoch": 0.011726639014962324, + "grad_norm": 0.18629726767539978, + "learning_rate": 4.988280599651097e-06, + "loss": 1.1995, + "step": 1620 + }, + { + "epoch": 0.01179902567554851, + "grad_norm": 0.18262408673763275, + "learning_rate": 4.988208212990511e-06, + "loss": 1.1972, + "step": 1630 + }, + { + "epoch": 0.011871412336134698, + "grad_norm": 0.216887965798378, + "learning_rate": 4.988135826329924e-06, + "loss": 1.1925, + "step": 1640 + }, + { + "epoch": 0.011943798996720885, + "grad_norm": 0.18681691586971283, + "learning_rate": 4.988063439669339e-06, + "loss": 1.21, + "step": 1650 + }, + { + "epoch": 0.012016185657307072, + "grad_norm": 0.199030801653862, + "learning_rate": 4.987991053008752e-06, + "loss": 1.2014, + "step": 1660 + }, + { + "epoch": 0.01208857231789326, + "grad_norm": 0.23438072204589844, + "learning_rate": 4.987918666348166e-06, + "loss": 1.1913, + "step": 1670 + }, + { + "epoch": 0.012160958978479446, + "grad_norm": 0.18666905164718628, + "learning_rate": 4.98784627968758e-06, + "loss": 1.2013, + "step": 1680 + }, + { + "epoch": 0.012233345639065633, + "grad_norm": 0.20759981870651245, + "learning_rate": 4.987773893026994e-06, + "loss": 1.2058, + "step": 1690 + }, + { + "epoch": 0.01230573229965182, + "grad_norm": 0.20983868837356567, + "learning_rate": 4.987701506366407e-06, + "loss": 1.2153, + "step": 1700 + }, + { + "epoch": 0.012378118960238008, + "grad_norm": 0.21793246269226074, + "learning_rate": 4.9876291197058205e-06, + "loss": 1.1994, + "step": 1710 + }, + { + "epoch": 0.012450505620824195, + "grad_norm": 0.20397932827472687, + "learning_rate": 4.987556733045235e-06, + "loss": 1.2112, + "step": 1720 + }, + { + "epoch": 0.012522892281410382, + "grad_norm": 0.202115997672081, + "learning_rate": 4.9874843463846485e-06, + "loss": 1.2132, + "step": 1730 + }, + { + "epoch": 0.012595278941996569, + "grad_norm": 0.1943732500076294, + "learning_rate": 4.987411959724062e-06, + "loss": 1.1877, + "step": 1740 + }, + { + "epoch": 0.012667665602582756, + "grad_norm": 0.20084722340106964, + "learning_rate": 4.987339573063476e-06, + "loss": 1.1773, + "step": 1750 + }, + { + "epoch": 0.012740052263168943, + "grad_norm": 0.19970197975635529, + "learning_rate": 4.98726718640289e-06, + "loss": 1.1826, + "step": 1760 + }, + { + "epoch": 0.01281243892375513, + "grad_norm": 0.20870880782604218, + "learning_rate": 4.987194799742304e-06, + "loss": 1.1833, + "step": 1770 + }, + { + "epoch": 0.012884825584341317, + "grad_norm": 0.2071852833032608, + "learning_rate": 4.9871224130817175e-06, + "loss": 1.1955, + "step": 1780 + }, + { + "epoch": 0.012957212244927505, + "grad_norm": 0.21110813319683075, + "learning_rate": 4.987050026421131e-06, + "loss": 1.1876, + "step": 1790 + }, + { + "epoch": 0.013029598905513692, + "grad_norm": 0.18131721019744873, + "learning_rate": 4.9869776397605455e-06, + "loss": 1.1867, + "step": 1800 + }, + { + "epoch": 0.013101985566099879, + "grad_norm": 0.22368177771568298, + "learning_rate": 4.986905253099959e-06, + "loss": 1.199, + "step": 1810 + }, + { + "epoch": 0.013174372226686066, + "grad_norm": 0.19988280534744263, + "learning_rate": 4.986832866439373e-06, + "loss": 1.1972, + "step": 1820 + }, + { + "epoch": 0.013246758887272253, + "grad_norm": 0.2156451791524887, + "learning_rate": 4.986760479778786e-06, + "loss": 1.1999, + "step": 1830 + }, + { + "epoch": 0.01331914554785844, + "grad_norm": 0.1884111911058426, + "learning_rate": 4.986688093118201e-06, + "loss": 1.2085, + "step": 1840 + }, + { + "epoch": 0.013391532208444627, + "grad_norm": 0.20055466890335083, + "learning_rate": 4.9866157064576145e-06, + "loss": 1.2014, + "step": 1850 + }, + { + "epoch": 0.013463918869030814, + "grad_norm": 0.22643277049064636, + "learning_rate": 4.986543319797028e-06, + "loss": 1.194, + "step": 1860 + }, + { + "epoch": 0.013536305529617002, + "grad_norm": 0.2079644799232483, + "learning_rate": 4.986470933136442e-06, + "loss": 1.1998, + "step": 1870 + }, + { + "epoch": 0.013608692190203189, + "grad_norm": 0.19740234315395355, + "learning_rate": 4.986398546475856e-06, + "loss": 1.1917, + "step": 1880 + }, + { + "epoch": 0.013681078850789376, + "grad_norm": 0.18155385553836823, + "learning_rate": 4.98632615981527e-06, + "loss": 1.1904, + "step": 1890 + }, + { + "epoch": 0.013753465511375563, + "grad_norm": 0.1898881047964096, + "learning_rate": 4.986253773154683e-06, + "loss": 1.2094, + "step": 1900 + }, + { + "epoch": 0.013825852171961752, + "grad_norm": 0.19113953411579132, + "learning_rate": 4.986181386494097e-06, + "loss": 1.1973, + "step": 1910 + }, + { + "epoch": 0.013898238832547939, + "grad_norm": 0.18314586579799652, + "learning_rate": 4.986108999833511e-06, + "loss": 1.1954, + "step": 1920 + }, + { + "epoch": 0.013970625493134126, + "grad_norm": 0.21783554553985596, + "learning_rate": 4.986036613172925e-06, + "loss": 1.1947, + "step": 1930 + }, + { + "epoch": 0.014043012153720313, + "grad_norm": 0.18775740265846252, + "learning_rate": 4.985964226512339e-06, + "loss": 1.1773, + "step": 1940 + }, + { + "epoch": 0.0141153988143065, + "grad_norm": 0.1981116682291031, + "learning_rate": 4.985891839851752e-06, + "loss": 1.1887, + "step": 1950 + }, + { + "epoch": 0.014187785474892687, + "grad_norm": 0.19518138468265533, + "learning_rate": 4.985819453191166e-06, + "loss": 1.1832, + "step": 1960 + }, + { + "epoch": 0.014260172135478874, + "grad_norm": 0.18999353051185608, + "learning_rate": 4.98574706653058e-06, + "loss": 1.1854, + "step": 1970 + }, + { + "epoch": 0.014332558796065062, + "grad_norm": 0.18030518293380737, + "learning_rate": 4.985674679869994e-06, + "loss": 1.1873, + "step": 1980 + }, + { + "epoch": 0.014404945456651249, + "grad_norm": 0.19327767193317413, + "learning_rate": 4.985602293209408e-06, + "loss": 1.1978, + "step": 1990 + }, + { + "epoch": 0.014477332117237436, + "grad_norm": 0.18995395302772522, + "learning_rate": 4.985529906548821e-06, + "loss": 1.1941, + "step": 2000 + }, + { + "epoch": 0.014549718777823623, + "grad_norm": 0.19528520107269287, + "learning_rate": 4.985457519888236e-06, + "loss": 1.1841, + "step": 2010 + }, + { + "epoch": 0.01462210543840981, + "grad_norm": 0.20308247208595276, + "learning_rate": 4.985385133227649e-06, + "loss": 1.19, + "step": 2020 + }, + { + "epoch": 0.014694492098995997, + "grad_norm": 0.22538486123085022, + "learning_rate": 4.985312746567063e-06, + "loss": 1.1979, + "step": 2030 + }, + { + "epoch": 0.014766878759582184, + "grad_norm": 0.19460053741931915, + "learning_rate": 4.9852403599064766e-06, + "loss": 1.1837, + "step": 2040 + }, + { + "epoch": 0.014839265420168371, + "grad_norm": 0.25119996070861816, + "learning_rate": 4.985167973245891e-06, + "loss": 1.1851, + "step": 2050 + }, + { + "epoch": 0.014911652080754558, + "grad_norm": 0.1847628504037857, + "learning_rate": 4.985095586585305e-06, + "loss": 1.1955, + "step": 2060 + }, + { + "epoch": 0.014984038741340746, + "grad_norm": 0.18682530522346497, + "learning_rate": 4.985023199924718e-06, + "loss": 1.1957, + "step": 2070 + }, + { + "epoch": 0.015056425401926933, + "grad_norm": 0.18913131952285767, + "learning_rate": 4.984950813264132e-06, + "loss": 1.1825, + "step": 2080 + }, + { + "epoch": 0.01512881206251312, + "grad_norm": 0.18790295720100403, + "learning_rate": 4.984878426603546e-06, + "loss": 1.1794, + "step": 2090 + }, + { + "epoch": 0.015201198723099307, + "grad_norm": 0.17898432910442352, + "learning_rate": 4.98480603994296e-06, + "loss": 1.1643, + "step": 2100 + }, + { + "epoch": 0.015273585383685494, + "grad_norm": 0.18662859499454498, + "learning_rate": 4.9847336532823736e-06, + "loss": 1.1796, + "step": 2110 + }, + { + "epoch": 0.015345972044271681, + "grad_norm": 0.1902911514043808, + "learning_rate": 4.984661266621787e-06, + "loss": 1.1846, + "step": 2120 + }, + { + "epoch": 0.015418358704857868, + "grad_norm": 0.21089226007461548, + "learning_rate": 4.984588879961202e-06, + "loss": 1.1826, + "step": 2130 + }, + { + "epoch": 0.015490745365444055, + "grad_norm": 0.19871971011161804, + "learning_rate": 4.984516493300615e-06, + "loss": 1.1881, + "step": 2140 + }, + { + "epoch": 0.015563132026030243, + "grad_norm": 0.1826990395784378, + "learning_rate": 4.984444106640029e-06, + "loss": 1.1654, + "step": 2150 + }, + { + "epoch": 0.01563551868661643, + "grad_norm": 0.22978819906711578, + "learning_rate": 4.9843717199794425e-06, + "loss": 1.1748, + "step": 2160 + }, + { + "epoch": 0.01570790534720262, + "grad_norm": 0.22456589341163635, + "learning_rate": 4.984299333318857e-06, + "loss": 1.1922, + "step": 2170 + }, + { + "epoch": 0.015780292007788806, + "grad_norm": 0.19744843244552612, + "learning_rate": 4.984226946658271e-06, + "loss": 1.1794, + "step": 2180 + }, + { + "epoch": 0.015852678668374993, + "grad_norm": 0.19209572672843933, + "learning_rate": 4.984154559997684e-06, + "loss": 1.1828, + "step": 2190 + }, + { + "epoch": 0.01592506532896118, + "grad_norm": 0.22542759776115417, + "learning_rate": 4.984082173337098e-06, + "loss": 1.1772, + "step": 2200 + }, + { + "epoch": 0.015997451989547367, + "grad_norm": 0.21402914822101593, + "learning_rate": 4.984009786676512e-06, + "loss": 1.1847, + "step": 2210 + }, + { + "epoch": 0.016069838650133554, + "grad_norm": 0.3712976574897766, + "learning_rate": 4.983937400015926e-06, + "loss": 1.1727, + "step": 2220 + }, + { + "epoch": 0.01614222531071974, + "grad_norm": 0.21875779330730438, + "learning_rate": 4.983865013355339e-06, + "loss": 1.1844, + "step": 2230 + }, + { + "epoch": 0.01621461197130593, + "grad_norm": 0.1886206865310669, + "learning_rate": 4.983792626694753e-06, + "loss": 1.1875, + "step": 2240 + }, + { + "epoch": 0.016286998631892115, + "grad_norm": 0.2019553780555725, + "learning_rate": 4.983720240034167e-06, + "loss": 1.1729, + "step": 2250 + }, + { + "epoch": 0.016359385292478303, + "grad_norm": 0.21517504751682281, + "learning_rate": 4.98364785337358e-06, + "loss": 1.1713, + "step": 2260 + }, + { + "epoch": 0.01643177195306449, + "grad_norm": 0.18494921922683716, + "learning_rate": 4.983575466712994e-06, + "loss": 1.1786, + "step": 2270 + }, + { + "epoch": 0.016504158613650677, + "grad_norm": 0.3039611876010895, + "learning_rate": 4.9835030800524084e-06, + "loss": 1.1819, + "step": 2280 + }, + { + "epoch": 0.016576545274236864, + "grad_norm": 0.23030070960521698, + "learning_rate": 4.983430693391822e-06, + "loss": 1.1915, + "step": 2290 + }, + { + "epoch": 0.01664893193482305, + "grad_norm": 0.1895866096019745, + "learning_rate": 4.983358306731236e-06, + "loss": 1.1782, + "step": 2300 + }, + { + "epoch": 0.016721318595409238, + "grad_norm": 0.19012758135795593, + "learning_rate": 4.983285920070649e-06, + "loss": 1.1894, + "step": 2310 + }, + { + "epoch": 0.016793705255995425, + "grad_norm": 0.18805542588233948, + "learning_rate": 4.983213533410064e-06, + "loss": 1.1802, + "step": 2320 + }, + { + "epoch": 0.016866091916581612, + "grad_norm": 0.19105587899684906, + "learning_rate": 4.983141146749477e-06, + "loss": 1.1797, + "step": 2330 + }, + { + "epoch": 0.0169384785771678, + "grad_norm": 0.22354061901569366, + "learning_rate": 4.983068760088891e-06, + "loss": 1.1898, + "step": 2340 + }, + { + "epoch": 0.017010865237753987, + "grad_norm": 0.1948034018278122, + "learning_rate": 4.982996373428305e-06, + "loss": 1.166, + "step": 2350 + }, + { + "epoch": 0.017083251898340174, + "grad_norm": 0.18683524429798126, + "learning_rate": 4.982923986767719e-06, + "loss": 1.1826, + "step": 2360 + }, + { + "epoch": 0.01715563855892636, + "grad_norm": 0.19329576194286346, + "learning_rate": 4.982851600107133e-06, + "loss": 1.1853, + "step": 2370 + }, + { + "epoch": 0.017228025219512548, + "grad_norm": 0.2141970992088318, + "learning_rate": 4.982779213446546e-06, + "loss": 1.172, + "step": 2380 + }, + { + "epoch": 0.017300411880098735, + "grad_norm": 0.20139099657535553, + "learning_rate": 4.98270682678596e-06, + "loss": 1.178, + "step": 2390 + }, + { + "epoch": 0.017372798540684922, + "grad_norm": 0.22393347322940826, + "learning_rate": 4.982634440125374e-06, + "loss": 1.1724, + "step": 2400 + }, + { + "epoch": 0.01744518520127111, + "grad_norm": 0.18857796490192413, + "learning_rate": 4.982562053464788e-06, + "loss": 1.1663, + "step": 2410 + }, + { + "epoch": 0.017517571861857296, + "grad_norm": 0.18142379820346832, + "learning_rate": 4.982489666804202e-06, + "loss": 1.1972, + "step": 2420 + }, + { + "epoch": 0.017589958522443484, + "grad_norm": 0.20119883120059967, + "learning_rate": 4.982417280143615e-06, + "loss": 1.1719, + "step": 2430 + }, + { + "epoch": 0.01766234518302967, + "grad_norm": 0.20527637004852295, + "learning_rate": 4.98234489348303e-06, + "loss": 1.1612, + "step": 2440 + }, + { + "epoch": 0.017734731843615858, + "grad_norm": 0.19073791801929474, + "learning_rate": 4.982272506822443e-06, + "loss": 1.1752, + "step": 2450 + }, + { + "epoch": 0.017807118504202045, + "grad_norm": 0.19779928028583527, + "learning_rate": 4.982200120161857e-06, + "loss": 1.1877, + "step": 2460 + }, + { + "epoch": 0.017879505164788232, + "grad_norm": 0.2000398486852646, + "learning_rate": 4.9821277335012705e-06, + "loss": 1.169, + "step": 2470 + }, + { + "epoch": 0.01795189182537442, + "grad_norm": 0.19572487473487854, + "learning_rate": 4.982055346840685e-06, + "loss": 1.1985, + "step": 2480 + }, + { + "epoch": 0.018024278485960606, + "grad_norm": 0.2484935224056244, + "learning_rate": 4.981982960180099e-06, + "loss": 1.1692, + "step": 2490 + }, + { + "epoch": 0.018096665146546793, + "grad_norm": 0.18102866411209106, + "learning_rate": 4.981910573519512e-06, + "loss": 1.1632, + "step": 2500 + }, + { + "epoch": 0.01816905180713298, + "grad_norm": 0.18723329901695251, + "learning_rate": 4.981838186858926e-06, + "loss": 1.1763, + "step": 2510 + }, + { + "epoch": 0.018241438467719168, + "grad_norm": 0.201924130320549, + "learning_rate": 4.98176580019834e-06, + "loss": 1.1712, + "step": 2520 + }, + { + "epoch": 0.018313825128305355, + "grad_norm": 0.2887810468673706, + "learning_rate": 4.981693413537754e-06, + "loss": 1.1868, + "step": 2530 + }, + { + "epoch": 0.018386211788891542, + "grad_norm": 0.17751596868038177, + "learning_rate": 4.9816210268771675e-06, + "loss": 1.1815, + "step": 2540 + }, + { + "epoch": 0.01845859844947773, + "grad_norm": 0.20777058601379395, + "learning_rate": 4.981548640216581e-06, + "loss": 1.1607, + "step": 2550 + }, + { + "epoch": 0.018530985110063916, + "grad_norm": 0.2111022174358368, + "learning_rate": 4.981476253555995e-06, + "loss": 1.1709, + "step": 2560 + }, + { + "epoch": 0.018603371770650103, + "grad_norm": 0.1838046908378601, + "learning_rate": 4.981403866895409e-06, + "loss": 1.1658, + "step": 2570 + }, + { + "epoch": 0.01867575843123629, + "grad_norm": 0.20409898459911346, + "learning_rate": 4.981331480234823e-06, + "loss": 1.161, + "step": 2580 + }, + { + "epoch": 0.018748145091822477, + "grad_norm": 0.25109410285949707, + "learning_rate": 4.9812590935742365e-06, + "loss": 1.1483, + "step": 2590 + }, + { + "epoch": 0.018820531752408665, + "grad_norm": 0.25918787717819214, + "learning_rate": 4.98118670691365e-06, + "loss": 1.1781, + "step": 2600 + }, + { + "epoch": 0.01889291841299485, + "grad_norm": 0.18770872056484222, + "learning_rate": 4.9811143202530645e-06, + "loss": 1.1732, + "step": 2610 + }, + { + "epoch": 0.01896530507358104, + "grad_norm": 0.195766419172287, + "learning_rate": 4.981041933592478e-06, + "loss": 1.1673, + "step": 2620 + }, + { + "epoch": 0.019037691734167226, + "grad_norm": 0.19628261029720306, + "learning_rate": 4.980969546931892e-06, + "loss": 1.1687, + "step": 2630 + }, + { + "epoch": 0.019110078394753417, + "grad_norm": 0.22135888040065765, + "learning_rate": 4.980897160271305e-06, + "loss": 1.1664, + "step": 2640 + }, + { + "epoch": 0.019182465055339604, + "grad_norm": 0.19807446002960205, + "learning_rate": 4.98082477361072e-06, + "loss": 1.1762, + "step": 2650 + }, + { + "epoch": 0.01925485171592579, + "grad_norm": 0.3103126883506775, + "learning_rate": 4.9807523869501335e-06, + "loss": 1.1723, + "step": 2660 + }, + { + "epoch": 0.019327238376511978, + "grad_norm": 0.18378371000289917, + "learning_rate": 4.980680000289547e-06, + "loss": 1.185, + "step": 2670 + }, + { + "epoch": 0.019399625037098165, + "grad_norm": 0.1914234310388565, + "learning_rate": 4.980607613628961e-06, + "loss": 1.17, + "step": 2680 + }, + { + "epoch": 0.019472011697684352, + "grad_norm": 0.18758605420589447, + "learning_rate": 4.980535226968375e-06, + "loss": 1.1857, + "step": 2690 + }, + { + "epoch": 0.01954439835827054, + "grad_norm": 0.19784751534461975, + "learning_rate": 4.980462840307789e-06, + "loss": 1.1627, + "step": 2700 + }, + { + "epoch": 0.019616785018856726, + "grad_norm": 0.2056863158941269, + "learning_rate": 4.980390453647202e-06, + "loss": 1.1744, + "step": 2710 + }, + { + "epoch": 0.019689171679442913, + "grad_norm": 0.23083150386810303, + "learning_rate": 4.980318066986616e-06, + "loss": 1.1682, + "step": 2720 + }, + { + "epoch": 0.0197615583400291, + "grad_norm": 0.2089979201555252, + "learning_rate": 4.9802456803260305e-06, + "loss": 1.171, + "step": 2730 + }, + { + "epoch": 0.019833945000615288, + "grad_norm": 0.36265090107917786, + "learning_rate": 4.980173293665444e-06, + "loss": 1.1738, + "step": 2740 + }, + { + "epoch": 0.019906331661201475, + "grad_norm": 0.21013633906841278, + "learning_rate": 4.980100907004858e-06, + "loss": 1.1591, + "step": 2750 + }, + { + "epoch": 0.019978718321787662, + "grad_norm": 0.190397247672081, + "learning_rate": 4.980028520344271e-06, + "loss": 1.1761, + "step": 2760 + }, + { + "epoch": 0.02005110498237385, + "grad_norm": 0.2045404314994812, + "learning_rate": 4.979956133683685e-06, + "loss": 1.1701, + "step": 2770 + }, + { + "epoch": 0.020123491642960036, + "grad_norm": 0.18745577335357666, + "learning_rate": 4.9798837470230986e-06, + "loss": 1.1736, + "step": 2780 + }, + { + "epoch": 0.020195878303546223, + "grad_norm": 0.2338806539773941, + "learning_rate": 4.979811360362512e-06, + "loss": 1.1565, + "step": 2790 + }, + { + "epoch": 0.02026826496413241, + "grad_norm": 0.21875229477882385, + "learning_rate": 4.979738973701927e-06, + "loss": 1.1576, + "step": 2800 + }, + { + "epoch": 0.020340651624718598, + "grad_norm": 0.18577270209789276, + "learning_rate": 4.97966658704134e-06, + "loss": 1.1519, + "step": 2810 + }, + { + "epoch": 0.020413038285304785, + "grad_norm": 0.20935316383838654, + "learning_rate": 4.979594200380754e-06, + "loss": 1.168, + "step": 2820 + }, + { + "epoch": 0.020485424945890972, + "grad_norm": 0.2035789042711258, + "learning_rate": 4.9795218137201675e-06, + "loss": 1.1657, + "step": 2830 + }, + { + "epoch": 0.02055781160647716, + "grad_norm": 0.1842500865459442, + "learning_rate": 4.979449427059582e-06, + "loss": 1.1898, + "step": 2840 + }, + { + "epoch": 0.020630198267063346, + "grad_norm": 0.18557128310203552, + "learning_rate": 4.9793770403989956e-06, + "loss": 1.1708, + "step": 2850 + }, + { + "epoch": 0.020702584927649533, + "grad_norm": 0.19317933917045593, + "learning_rate": 4.979304653738409e-06, + "loss": 1.1572, + "step": 2860 + }, + { + "epoch": 0.02077497158823572, + "grad_norm": 0.17931902408599854, + "learning_rate": 4.979232267077823e-06, + "loss": 1.1581, + "step": 2870 + }, + { + "epoch": 0.020847358248821907, + "grad_norm": 0.20684286952018738, + "learning_rate": 4.979159880417237e-06, + "loss": 1.1828, + "step": 2880 + }, + { + "epoch": 0.020919744909408095, + "grad_norm": 0.19323229789733887, + "learning_rate": 4.979087493756651e-06, + "loss": 1.1456, + "step": 2890 + }, + { + "epoch": 0.02099213156999428, + "grad_norm": 0.19767233729362488, + "learning_rate": 4.9790151070960645e-06, + "loss": 1.1742, + "step": 2900 + }, + { + "epoch": 0.02106451823058047, + "grad_norm": 0.2398158460855484, + "learning_rate": 4.978942720435478e-06, + "loss": 1.1721, + "step": 2910 + }, + { + "epoch": 0.021136904891166656, + "grad_norm": 0.20536300539970398, + "learning_rate": 4.978870333774893e-06, + "loss": 1.158, + "step": 2920 + }, + { + "epoch": 0.021209291551752843, + "grad_norm": 0.20128795504570007, + "learning_rate": 4.978797947114306e-06, + "loss": 1.1632, + "step": 2930 + }, + { + "epoch": 0.02128167821233903, + "grad_norm": 0.19307374954223633, + "learning_rate": 4.97872556045372e-06, + "loss": 1.1718, + "step": 2940 + }, + { + "epoch": 0.021354064872925217, + "grad_norm": 0.19423744082450867, + "learning_rate": 4.9786531737931334e-06, + "loss": 1.1568, + "step": 2950 + }, + { + "epoch": 0.021426451533511404, + "grad_norm": 0.2026156485080719, + "learning_rate": 4.978580787132548e-06, + "loss": 1.168, + "step": 2960 + }, + { + "epoch": 0.02149883819409759, + "grad_norm": 0.21222802996635437, + "learning_rate": 4.9785084004719615e-06, + "loss": 1.1588, + "step": 2970 + }, + { + "epoch": 0.02157122485468378, + "grad_norm": 0.1813318431377411, + "learning_rate": 4.978436013811375e-06, + "loss": 1.1752, + "step": 2980 + }, + { + "epoch": 0.021643611515269966, + "grad_norm": 0.1816568523645401, + "learning_rate": 4.978363627150789e-06, + "loss": 1.1586, + "step": 2990 + }, + { + "epoch": 0.021715998175856153, + "grad_norm": 0.18530064821243286, + "learning_rate": 4.978291240490203e-06, + "loss": 1.1684, + "step": 3000 + }, + { + "epoch": 0.02178838483644234, + "grad_norm": 0.2098444104194641, + "learning_rate": 4.978218853829617e-06, + "loss": 1.1685, + "step": 3010 + }, + { + "epoch": 0.021860771497028527, + "grad_norm": 0.17709197103977203, + "learning_rate": 4.9781464671690304e-06, + "loss": 1.1557, + "step": 3020 + }, + { + "epoch": 0.021933158157614714, + "grad_norm": 0.21442793309688568, + "learning_rate": 4.978074080508444e-06, + "loss": 1.1649, + "step": 3030 + }, + { + "epoch": 0.0220055448182009, + "grad_norm": 0.1951083093881607, + "learning_rate": 4.9780016938478585e-06, + "loss": 1.1499, + "step": 3040 + }, + { + "epoch": 0.02207793147878709, + "grad_norm": 0.18640479445457458, + "learning_rate": 4.977929307187272e-06, + "loss": 1.1664, + "step": 3050 + }, + { + "epoch": 0.022150318139373276, + "grad_norm": 0.18357819318771362, + "learning_rate": 4.977856920526686e-06, + "loss": 1.1744, + "step": 3060 + }, + { + "epoch": 0.022222704799959463, + "grad_norm": 0.23373299837112427, + "learning_rate": 4.977784533866099e-06, + "loss": 1.1594, + "step": 3070 + }, + { + "epoch": 0.02229509146054565, + "grad_norm": 0.2267906218767166, + "learning_rate": 4.977712147205514e-06, + "loss": 1.161, + "step": 3080 + }, + { + "epoch": 0.022367478121131837, + "grad_norm": 0.19738420844078064, + "learning_rate": 4.9776397605449274e-06, + "loss": 1.1524, + "step": 3090 + }, + { + "epoch": 0.022439864781718024, + "grad_norm": 0.22471626102924347, + "learning_rate": 4.977567373884341e-06, + "loss": 1.1426, + "step": 3100 + }, + { + "epoch": 0.02251225144230421, + "grad_norm": 0.18693824112415314, + "learning_rate": 4.977494987223755e-06, + "loss": 1.1516, + "step": 3110 + }, + { + "epoch": 0.022584638102890398, + "grad_norm": 0.29740896821022034, + "learning_rate": 4.977422600563169e-06, + "loss": 1.1545, + "step": 3120 + }, + { + "epoch": 0.022657024763476585, + "grad_norm": 0.19464527070522308, + "learning_rate": 4.977350213902583e-06, + "loss": 1.1474, + "step": 3130 + }, + { + "epoch": 0.022729411424062772, + "grad_norm": 0.19471614062786102, + "learning_rate": 4.977277827241996e-06, + "loss": 1.1441, + "step": 3140 + }, + { + "epoch": 0.02280179808464896, + "grad_norm": 0.1917818784713745, + "learning_rate": 4.97720544058141e-06, + "loss": 1.1588, + "step": 3150 + }, + { + "epoch": 0.022874184745235147, + "grad_norm": 0.20581530034542084, + "learning_rate": 4.977133053920824e-06, + "loss": 1.1644, + "step": 3160 + }, + { + "epoch": 0.022946571405821334, + "grad_norm": 0.19986987113952637, + "learning_rate": 4.977060667260238e-06, + "loss": 1.153, + "step": 3170 + }, + { + "epoch": 0.02301895806640752, + "grad_norm": 0.195315420627594, + "learning_rate": 4.976988280599652e-06, + "loss": 1.1563, + "step": 3180 + }, + { + "epoch": 0.023091344726993708, + "grad_norm": 0.2056894749403, + "learning_rate": 4.976915893939065e-06, + "loss": 1.1605, + "step": 3190 + }, + { + "epoch": 0.023163731387579895, + "grad_norm": 0.19704991579055786, + "learning_rate": 4.976843507278479e-06, + "loss": 1.1635, + "step": 3200 + }, + { + "epoch": 0.023236118048166082, + "grad_norm": 0.2168043702840805, + "learning_rate": 4.976771120617893e-06, + "loss": 1.165, + "step": 3210 + }, + { + "epoch": 0.02330850470875227, + "grad_norm": 0.21489858627319336, + "learning_rate": 4.976698733957307e-06, + "loss": 1.1592, + "step": 3220 + }, + { + "epoch": 0.02338089136933846, + "grad_norm": 0.18227837979793549, + "learning_rate": 4.976626347296721e-06, + "loss": 1.1573, + "step": 3230 + }, + { + "epoch": 0.023453278029924647, + "grad_norm": 0.20387127995491028, + "learning_rate": 4.976553960636134e-06, + "loss": 1.1611, + "step": 3240 + }, + { + "epoch": 0.023525664690510834, + "grad_norm": 0.17616231739521027, + "learning_rate": 4.976481573975549e-06, + "loss": 1.1614, + "step": 3250 + }, + { + "epoch": 0.02359805135109702, + "grad_norm": 0.1885817050933838, + "learning_rate": 4.976409187314962e-06, + "loss": 1.152, + "step": 3260 + }, + { + "epoch": 0.02367043801168321, + "grad_norm": 0.22119595110416412, + "learning_rate": 4.976336800654376e-06, + "loss": 1.1541, + "step": 3270 + }, + { + "epoch": 0.023742824672269396, + "grad_norm": 0.24721552431583405, + "learning_rate": 4.9762644139937895e-06, + "loss": 1.1419, + "step": 3280 + }, + { + "epoch": 0.023815211332855583, + "grad_norm": 0.20840491354465485, + "learning_rate": 4.976192027333203e-06, + "loss": 1.157, + "step": 3290 + }, + { + "epoch": 0.02388759799344177, + "grad_norm": 0.1903543770313263, + "learning_rate": 4.976119640672617e-06, + "loss": 1.1492, + "step": 3300 + }, + { + "epoch": 0.023959984654027957, + "grad_norm": 0.1851651966571808, + "learning_rate": 4.976047254012031e-06, + "loss": 1.1473, + "step": 3310 + }, + { + "epoch": 0.024032371314614144, + "grad_norm": 0.18348610401153564, + "learning_rate": 4.975974867351445e-06, + "loss": 1.1615, + "step": 3320 + }, + { + "epoch": 0.02410475797520033, + "grad_norm": 0.19050352275371552, + "learning_rate": 4.9759024806908585e-06, + "loss": 1.1705, + "step": 3330 + }, + { + "epoch": 0.02417714463578652, + "grad_norm": 0.2005050927400589, + "learning_rate": 4.975830094030272e-06, + "loss": 1.1582, + "step": 3340 + }, + { + "epoch": 0.024249531296372705, + "grad_norm": 0.19381307065486908, + "learning_rate": 4.975757707369686e-06, + "loss": 1.165, + "step": 3350 + }, + { + "epoch": 0.024321917956958893, + "grad_norm": 0.1775844395160675, + "learning_rate": 4.9756853207091e-06, + "loss": 1.1466, + "step": 3360 + }, + { + "epoch": 0.02439430461754508, + "grad_norm": 0.1731746643781662, + "learning_rate": 4.975612934048514e-06, + "loss": 1.1399, + "step": 3370 + }, + { + "epoch": 0.024466691278131267, + "grad_norm": 0.2276047319173813, + "learning_rate": 4.975540547387927e-06, + "loss": 1.1665, + "step": 3380 + }, + { + "epoch": 0.024539077938717454, + "grad_norm": 0.19553668797016144, + "learning_rate": 4.975468160727341e-06, + "loss": 1.1632, + "step": 3390 + }, + { + "epoch": 0.02461146459930364, + "grad_norm": 0.19757987558841705, + "learning_rate": 4.9753957740667555e-06, + "loss": 1.1549, + "step": 3400 + }, + { + "epoch": 0.024683851259889828, + "grad_norm": 0.19470013678073883, + "learning_rate": 4.975323387406169e-06, + "loss": 1.1677, + "step": 3410 + }, + { + "epoch": 0.024756237920476015, + "grad_norm": 0.20019254088401794, + "learning_rate": 4.975251000745583e-06, + "loss": 1.1456, + "step": 3420 + }, + { + "epoch": 0.024828624581062202, + "grad_norm": 0.20071542263031006, + "learning_rate": 4.975178614084996e-06, + "loss": 1.149, + "step": 3430 + }, + { + "epoch": 0.02490101124164839, + "grad_norm": 0.18767118453979492, + "learning_rate": 4.975106227424411e-06, + "loss": 1.1633, + "step": 3440 + }, + { + "epoch": 0.024973397902234577, + "grad_norm": 0.19544526934623718, + "learning_rate": 4.975033840763824e-06, + "loss": 1.145, + "step": 3450 + }, + { + "epoch": 0.025045784562820764, + "grad_norm": 0.18626125156879425, + "learning_rate": 4.974961454103238e-06, + "loss": 1.1457, + "step": 3460 + }, + { + "epoch": 0.02511817122340695, + "grad_norm": 0.19342540204524994, + "learning_rate": 4.974889067442652e-06, + "loss": 1.1648, + "step": 3470 + }, + { + "epoch": 0.025190557883993138, + "grad_norm": 0.20179611444473267, + "learning_rate": 4.974816680782066e-06, + "loss": 1.1574, + "step": 3480 + }, + { + "epoch": 0.025262944544579325, + "grad_norm": 0.20184293389320374, + "learning_rate": 4.97474429412148e-06, + "loss": 1.1542, + "step": 3490 + }, + { + "epoch": 0.025335331205165512, + "grad_norm": 0.18145424127578735, + "learning_rate": 4.974671907460893e-06, + "loss": 1.1478, + "step": 3500 + }, + { + "epoch": 0.0254077178657517, + "grad_norm": 0.2179315686225891, + "learning_rate": 4.974599520800307e-06, + "loss": 1.1552, + "step": 3510 + }, + { + "epoch": 0.025480104526337886, + "grad_norm": 0.20705650746822357, + "learning_rate": 4.974527134139721e-06, + "loss": 1.1547, + "step": 3520 + }, + { + "epoch": 0.025552491186924074, + "grad_norm": 0.19656804203987122, + "learning_rate": 4.974454747479135e-06, + "loss": 1.1541, + "step": 3530 + }, + { + "epoch": 0.02562487784751026, + "grad_norm": 0.18042577803134918, + "learning_rate": 4.974382360818549e-06, + "loss": 1.1442, + "step": 3540 + }, + { + "epoch": 0.025697264508096448, + "grad_norm": 0.20010463893413544, + "learning_rate": 4.974309974157962e-06, + "loss": 1.1454, + "step": 3550 + }, + { + "epoch": 0.025769651168682635, + "grad_norm": 0.17687994241714478, + "learning_rate": 4.974237587497377e-06, + "loss": 1.1548, + "step": 3560 + }, + { + "epoch": 0.025842037829268822, + "grad_norm": 0.17953871190547943, + "learning_rate": 4.97416520083679e-06, + "loss": 1.1388, + "step": 3570 + }, + { + "epoch": 0.02591442448985501, + "grad_norm": 0.1789717972278595, + "learning_rate": 4.974092814176204e-06, + "loss": 1.1522, + "step": 3580 + }, + { + "epoch": 0.025986811150441196, + "grad_norm": 0.1992194801568985, + "learning_rate": 4.9740204275156176e-06, + "loss": 1.1413, + "step": 3590 + }, + { + "epoch": 0.026059197811027383, + "grad_norm": 0.18657977879047394, + "learning_rate": 4.973948040855032e-06, + "loss": 1.1566, + "step": 3600 + }, + { + "epoch": 0.02613158447161357, + "grad_norm": 0.1899375021457672, + "learning_rate": 4.973875654194446e-06, + "loss": 1.1589, + "step": 3610 + }, + { + "epoch": 0.026203971132199758, + "grad_norm": 0.18726755678653717, + "learning_rate": 4.973803267533859e-06, + "loss": 1.1479, + "step": 3620 + }, + { + "epoch": 0.026276357792785945, + "grad_norm": 0.19596411287784576, + "learning_rate": 4.973730880873273e-06, + "loss": 1.1555, + "step": 3630 + }, + { + "epoch": 0.026348744453372132, + "grad_norm": 0.19859635829925537, + "learning_rate": 4.973658494212687e-06, + "loss": 1.1535, + "step": 3640 + }, + { + "epoch": 0.02642113111395832, + "grad_norm": 0.2296416461467743, + "learning_rate": 4.973586107552101e-06, + "loss": 1.129, + "step": 3650 + }, + { + "epoch": 0.026493517774544506, + "grad_norm": 0.1784886121749878, + "learning_rate": 4.973513720891515e-06, + "loss": 1.148, + "step": 3660 + }, + { + "epoch": 0.026565904435130693, + "grad_norm": 0.19031654298305511, + "learning_rate": 4.973441334230928e-06, + "loss": 1.1486, + "step": 3670 + }, + { + "epoch": 0.02663829109571688, + "grad_norm": 0.192035511136055, + "learning_rate": 4.973368947570343e-06, + "loss": 1.1436, + "step": 3680 + }, + { + "epoch": 0.026710677756303067, + "grad_norm": 0.19924819469451904, + "learning_rate": 4.973296560909756e-06, + "loss": 1.1522, + "step": 3690 + }, + { + "epoch": 0.026783064416889255, + "grad_norm": 0.17904998362064362, + "learning_rate": 4.97322417424917e-06, + "loss": 1.1435, + "step": 3700 + }, + { + "epoch": 0.02685545107747544, + "grad_norm": 0.17476427555084229, + "learning_rate": 4.9731517875885835e-06, + "loss": 1.1375, + "step": 3710 + }, + { + "epoch": 0.02692783773806163, + "grad_norm": 0.18144498765468597, + "learning_rate": 4.973079400927998e-06, + "loss": 1.1413, + "step": 3720 + }, + { + "epoch": 0.027000224398647816, + "grad_norm": 0.29269012808799744, + "learning_rate": 4.973007014267412e-06, + "loss": 1.1434, + "step": 3730 + }, + { + "epoch": 0.027072611059234003, + "grad_norm": 0.3143029510974884, + "learning_rate": 4.972934627606825e-06, + "loss": 1.127, + "step": 3740 + }, + { + "epoch": 0.02714499771982019, + "grad_norm": 0.2110947221517563, + "learning_rate": 4.972862240946239e-06, + "loss": 1.1477, + "step": 3750 + }, + { + "epoch": 0.027217384380406377, + "grad_norm": 0.18889427185058594, + "learning_rate": 4.972789854285653e-06, + "loss": 1.1458, + "step": 3760 + }, + { + "epoch": 0.027289771040992564, + "grad_norm": 0.19204457104206085, + "learning_rate": 4.972717467625067e-06, + "loss": 1.1528, + "step": 3770 + }, + { + "epoch": 0.02736215770157875, + "grad_norm": 0.19252029061317444, + "learning_rate": 4.9726450809644805e-06, + "loss": 1.1411, + "step": 3780 + }, + { + "epoch": 0.02743454436216494, + "grad_norm": 0.1872684508562088, + "learning_rate": 4.972572694303894e-06, + "loss": 1.1486, + "step": 3790 + }, + { + "epoch": 0.027506931022751126, + "grad_norm": 0.189093217253685, + "learning_rate": 4.972500307643308e-06, + "loss": 1.151, + "step": 3800 + }, + { + "epoch": 0.027579317683337316, + "grad_norm": 0.19076119363307953, + "learning_rate": 4.972427920982722e-06, + "loss": 1.1442, + "step": 3810 + }, + { + "epoch": 0.027651704343923503, + "grad_norm": 0.2054983228445053, + "learning_rate": 4.972355534322135e-06, + "loss": 1.149, + "step": 3820 + }, + { + "epoch": 0.02772409100450969, + "grad_norm": 0.19762204587459564, + "learning_rate": 4.9722831476615494e-06, + "loss": 1.1373, + "step": 3830 + }, + { + "epoch": 0.027796477665095878, + "grad_norm": 0.20214678347110748, + "learning_rate": 4.972210761000963e-06, + "loss": 1.1302, + "step": 3840 + }, + { + "epoch": 0.027868864325682065, + "grad_norm": 0.19313126802444458, + "learning_rate": 4.972138374340377e-06, + "loss": 1.152, + "step": 3850 + }, + { + "epoch": 0.027941250986268252, + "grad_norm": 0.19163060188293457, + "learning_rate": 4.97206598767979e-06, + "loss": 1.1401, + "step": 3860 + }, + { + "epoch": 0.02801363764685444, + "grad_norm": 0.1946219801902771, + "learning_rate": 4.971993601019205e-06, + "loss": 1.1586, + "step": 3870 + }, + { + "epoch": 0.028086024307440626, + "grad_norm": 0.18935218453407288, + "learning_rate": 4.971921214358618e-06, + "loss": 1.1657, + "step": 3880 + }, + { + "epoch": 0.028158410968026813, + "grad_norm": 0.864253044128418, + "learning_rate": 4.971848827698032e-06, + "loss": 1.1543, + "step": 3890 + }, + { + "epoch": 0.028230797628613, + "grad_norm": 0.1859940141439438, + "learning_rate": 4.971776441037446e-06, + "loss": 1.1487, + "step": 3900 + }, + { + "epoch": 0.028303184289199187, + "grad_norm": 0.18587446212768555, + "learning_rate": 4.97170405437686e-06, + "loss": 1.1423, + "step": 3910 + }, + { + "epoch": 0.028375570949785375, + "grad_norm": 0.22516964375972748, + "learning_rate": 4.971631667716274e-06, + "loss": 1.1224, + "step": 3920 + }, + { + "epoch": 0.02844795761037156, + "grad_norm": 0.19674985110759735, + "learning_rate": 4.971559281055687e-06, + "loss": 1.1492, + "step": 3930 + }, + { + "epoch": 0.02852034427095775, + "grad_norm": 0.1964777559041977, + "learning_rate": 4.971486894395101e-06, + "loss": 1.1433, + "step": 3940 + }, + { + "epoch": 0.028592730931543936, + "grad_norm": 0.1828773319721222, + "learning_rate": 4.9714145077345145e-06, + "loss": 1.1462, + "step": 3950 + }, + { + "epoch": 0.028665117592130123, + "grad_norm": 0.19454605877399445, + "learning_rate": 4.971342121073929e-06, + "loss": 1.137, + "step": 3960 + }, + { + "epoch": 0.02873750425271631, + "grad_norm": 0.20798833668231964, + "learning_rate": 4.971269734413343e-06, + "loss": 1.1352, + "step": 3970 + }, + { + "epoch": 0.028809890913302497, + "grad_norm": 0.1934082806110382, + "learning_rate": 4.971197347752756e-06, + "loss": 1.1256, + "step": 3980 + }, + { + "epoch": 0.028882277573888684, + "grad_norm": 0.19269952178001404, + "learning_rate": 4.97112496109217e-06, + "loss": 1.147, + "step": 3990 + }, + { + "epoch": 0.02895466423447487, + "grad_norm": 0.1940053403377533, + "learning_rate": 4.971052574431584e-06, + "loss": 1.1449, + "step": 4000 + }, + { + "epoch": 0.02902705089506106, + "grad_norm": 0.18359413743019104, + "learning_rate": 4.970980187770998e-06, + "loss": 1.1507, + "step": 4010 + }, + { + "epoch": 0.029099437555647246, + "grad_norm": 0.19637279212474823, + "learning_rate": 4.9709078011104115e-06, + "loss": 1.14, + "step": 4020 + }, + { + "epoch": 0.029171824216233433, + "grad_norm": 0.19355088472366333, + "learning_rate": 4.970835414449825e-06, + "loss": 1.1521, + "step": 4030 + }, + { + "epoch": 0.02924421087681962, + "grad_norm": 0.18721257150173187, + "learning_rate": 4.97076302778924e-06, + "loss": 1.1391, + "step": 4040 + }, + { + "epoch": 0.029316597537405807, + "grad_norm": 0.2009786069393158, + "learning_rate": 4.970690641128653e-06, + "loss": 1.1205, + "step": 4050 + }, + { + "epoch": 0.029388984197991994, + "grad_norm": 0.19062365591526031, + "learning_rate": 4.970618254468067e-06, + "loss": 1.1506, + "step": 4060 + }, + { + "epoch": 0.02946137085857818, + "grad_norm": 0.2137022167444229, + "learning_rate": 4.9705458678074805e-06, + "loss": 1.1407, + "step": 4070 + }, + { + "epoch": 0.02953375751916437, + "grad_norm": 0.18308189511299133, + "learning_rate": 4.970473481146895e-06, + "loss": 1.1483, + "step": 4080 + }, + { + "epoch": 0.029606144179750556, + "grad_norm": 0.1955224871635437, + "learning_rate": 4.9704010944863085e-06, + "loss": 1.1315, + "step": 4090 + }, + { + "epoch": 0.029678530840336743, + "grad_norm": 0.1963161826133728, + "learning_rate": 4.970328707825722e-06, + "loss": 1.1519, + "step": 4100 + }, + { + "epoch": 0.02975091750092293, + "grad_norm": 0.20369768142700195, + "learning_rate": 4.970256321165136e-06, + "loss": 1.1286, + "step": 4110 + }, + { + "epoch": 0.029823304161509117, + "grad_norm": 0.18953342735767365, + "learning_rate": 4.97018393450455e-06, + "loss": 1.1546, + "step": 4120 + }, + { + "epoch": 0.029895690822095304, + "grad_norm": 0.22297842800617218, + "learning_rate": 4.970111547843964e-06, + "loss": 1.1373, + "step": 4130 + }, + { + "epoch": 0.02996807748268149, + "grad_norm": 0.23974451422691345, + "learning_rate": 4.9700391611833775e-06, + "loss": 1.1384, + "step": 4140 + }, + { + "epoch": 0.03004046414326768, + "grad_norm": 0.2132536619901657, + "learning_rate": 4.969966774522791e-06, + "loss": 1.145, + "step": 4150 + }, + { + "epoch": 0.030112850803853865, + "grad_norm": 0.18748712539672852, + "learning_rate": 4.9698943878622056e-06, + "loss": 1.129, + "step": 4160 + }, + { + "epoch": 0.030185237464440053, + "grad_norm": 0.18784211575984955, + "learning_rate": 4.969822001201619e-06, + "loss": 1.1376, + "step": 4170 + }, + { + "epoch": 0.03025762412502624, + "grad_norm": 0.19498911499977112, + "learning_rate": 4.969749614541033e-06, + "loss": 1.1282, + "step": 4180 + }, + { + "epoch": 0.030330010785612427, + "grad_norm": 0.19395703077316284, + "learning_rate": 4.969677227880446e-06, + "loss": 1.1319, + "step": 4190 + }, + { + "epoch": 0.030402397446198614, + "grad_norm": 0.1815170794725418, + "learning_rate": 4.969604841219861e-06, + "loss": 1.135, + "step": 4200 + }, + { + "epoch": 0.0304747841067848, + "grad_norm": 0.18967507779598236, + "learning_rate": 4.9695324545592745e-06, + "loss": 1.1281, + "step": 4210 + }, + { + "epoch": 0.030547170767370988, + "grad_norm": 0.19109271466732025, + "learning_rate": 4.969460067898688e-06, + "loss": 1.1424, + "step": 4220 + }, + { + "epoch": 0.030619557427957175, + "grad_norm": 0.23208124935626984, + "learning_rate": 4.969387681238102e-06, + "loss": 1.1408, + "step": 4230 + }, + { + "epoch": 0.030691944088543362, + "grad_norm": 0.18746989965438843, + "learning_rate": 4.969315294577516e-06, + "loss": 1.143, + "step": 4240 + }, + { + "epoch": 0.03076433074912955, + "grad_norm": 0.21662504971027374, + "learning_rate": 4.96924290791693e-06, + "loss": 1.1353, + "step": 4250 + }, + { + "epoch": 0.030836717409715737, + "grad_norm": 0.2007625848054886, + "learning_rate": 4.969170521256343e-06, + "loss": 1.1345, + "step": 4260 + }, + { + "epoch": 0.030909104070301924, + "grad_norm": 0.18364295363426208, + "learning_rate": 4.969098134595757e-06, + "loss": 1.1357, + "step": 4270 + }, + { + "epoch": 0.03098149073088811, + "grad_norm": 0.19245269894599915, + "learning_rate": 4.9690257479351715e-06, + "loss": 1.1326, + "step": 4280 + }, + { + "epoch": 0.031053877391474298, + "grad_norm": 0.1952354460954666, + "learning_rate": 4.968953361274585e-06, + "loss": 1.1253, + "step": 4290 + }, + { + "epoch": 0.031126264052060485, + "grad_norm": 0.18464967608451843, + "learning_rate": 4.968880974613999e-06, + "loss": 1.1324, + "step": 4300 + }, + { + "epoch": 0.031198650712646672, + "grad_norm": 0.19587458670139313, + "learning_rate": 4.968808587953412e-06, + "loss": 1.1399, + "step": 4310 + }, + { + "epoch": 0.03127103737323286, + "grad_norm": 0.1744762659072876, + "learning_rate": 4.968736201292827e-06, + "loss": 1.1389, + "step": 4320 + }, + { + "epoch": 0.031343424033819046, + "grad_norm": 0.17935672402381897, + "learning_rate": 4.96866381463224e-06, + "loss": 1.1392, + "step": 4330 + }, + { + "epoch": 0.03141581069440524, + "grad_norm": 0.19451332092285156, + "learning_rate": 4.968591427971654e-06, + "loss": 1.1409, + "step": 4340 + }, + { + "epoch": 0.03148819735499142, + "grad_norm": 0.20897798240184784, + "learning_rate": 4.968519041311068e-06, + "loss": 1.1205, + "step": 4350 + }, + { + "epoch": 0.03156058401557761, + "grad_norm": 0.20010434091091156, + "learning_rate": 4.968446654650481e-06, + "loss": 1.1408, + "step": 4360 + }, + { + "epoch": 0.031632970676163795, + "grad_norm": 0.19705431163311005, + "learning_rate": 4.968374267989895e-06, + "loss": 1.1392, + "step": 4370 + }, + { + "epoch": 0.031705357336749986, + "grad_norm": 0.19512054324150085, + "learning_rate": 4.9683018813293085e-06, + "loss": 1.1481, + "step": 4380 + }, + { + "epoch": 0.03177774399733617, + "grad_norm": 0.2135220468044281, + "learning_rate": 4.968229494668723e-06, + "loss": 1.1294, + "step": 4390 + }, + { + "epoch": 0.03185013065792236, + "grad_norm": 0.18849720060825348, + "learning_rate": 4.9681571080081366e-06, + "loss": 1.137, + "step": 4400 + }, + { + "epoch": 0.03192251731850854, + "grad_norm": 0.18328508734703064, + "learning_rate": 4.96808472134755e-06, + "loss": 1.1302, + "step": 4410 + }, + { + "epoch": 0.031994903979094734, + "grad_norm": 0.19263693690299988, + "learning_rate": 4.968012334686964e-06, + "loss": 1.1344, + "step": 4420 + }, + { + "epoch": 0.03206729063968092, + "grad_norm": 0.17624565958976746, + "learning_rate": 4.967939948026378e-06, + "loss": 1.1283, + "step": 4430 + }, + { + "epoch": 0.03213967730026711, + "grad_norm": 0.2214570790529251, + "learning_rate": 4.967867561365792e-06, + "loss": 1.1212, + "step": 4440 + }, + { + "epoch": 0.03221206396085329, + "grad_norm": 0.19538843631744385, + "learning_rate": 4.9677951747052055e-06, + "loss": 1.1519, + "step": 4450 + }, + { + "epoch": 0.03228445062143948, + "grad_norm": 0.19535070657730103, + "learning_rate": 4.967722788044619e-06, + "loss": 1.1206, + "step": 4460 + }, + { + "epoch": 0.032356837282025666, + "grad_norm": 0.20394045114517212, + "learning_rate": 4.967650401384034e-06, + "loss": 1.1546, + "step": 4470 + }, + { + "epoch": 0.03242922394261186, + "grad_norm": 0.18652796745300293, + "learning_rate": 4.967578014723447e-06, + "loss": 1.1371, + "step": 4480 + }, + { + "epoch": 0.03250161060319804, + "grad_norm": 0.18646922707557678, + "learning_rate": 4.967505628062861e-06, + "loss": 1.1479, + "step": 4490 + }, + { + "epoch": 0.03257399726378423, + "grad_norm": 0.1993158608675003, + "learning_rate": 4.9674332414022744e-06, + "loss": 1.1402, + "step": 4500 + }, + { + "epoch": 0.032646383924370415, + "grad_norm": 0.18076984584331512, + "learning_rate": 4.967360854741689e-06, + "loss": 1.1291, + "step": 4510 + }, + { + "epoch": 0.032718770584956605, + "grad_norm": 0.9336857199668884, + "learning_rate": 4.9672884680811025e-06, + "loss": 1.136, + "step": 4520 + }, + { + "epoch": 0.03279115724554279, + "grad_norm": 0.1966332644224167, + "learning_rate": 4.967216081420516e-06, + "loss": 1.1248, + "step": 4530 + }, + { + "epoch": 0.03286354390612898, + "grad_norm": 0.20412597060203552, + "learning_rate": 4.96714369475993e-06, + "loss": 1.1375, + "step": 4540 + }, + { + "epoch": 0.03293593056671516, + "grad_norm": 0.1918439120054245, + "learning_rate": 4.967071308099344e-06, + "loss": 1.1298, + "step": 4550 + }, + { + "epoch": 0.033008317227301354, + "grad_norm": 0.18617042899131775, + "learning_rate": 4.966998921438758e-06, + "loss": 1.1299, + "step": 4560 + }, + { + "epoch": 0.03308070388788754, + "grad_norm": 0.18576890230178833, + "learning_rate": 4.9669265347781714e-06, + "loss": 1.127, + "step": 4570 + }, + { + "epoch": 0.03315309054847373, + "grad_norm": 0.18330788612365723, + "learning_rate": 4.966854148117585e-06, + "loss": 1.1222, + "step": 4580 + }, + { + "epoch": 0.03322547720905991, + "grad_norm": 0.19359582662582397, + "learning_rate": 4.966781761456999e-06, + "loss": 1.1218, + "step": 4590 + }, + { + "epoch": 0.0332978638696461, + "grad_norm": 0.19401496648788452, + "learning_rate": 4.966709374796413e-06, + "loss": 1.1386, + "step": 4600 + }, + { + "epoch": 0.033370250530232286, + "grad_norm": 0.18727423250675201, + "learning_rate": 4.966636988135827e-06, + "loss": 1.1225, + "step": 4610 + }, + { + "epoch": 0.033442637190818476, + "grad_norm": 0.19162550568580627, + "learning_rate": 4.96656460147524e-06, + "loss": 1.1226, + "step": 4620 + }, + { + "epoch": 0.03351502385140466, + "grad_norm": 0.18989378213882446, + "learning_rate": 4.966492214814654e-06, + "loss": 1.1409, + "step": 4630 + }, + { + "epoch": 0.03358741051199085, + "grad_norm": 0.17028242349624634, + "learning_rate": 4.9664198281540684e-06, + "loss": 1.1353, + "step": 4640 + }, + { + "epoch": 0.033659797172577034, + "grad_norm": 0.18227070569992065, + "learning_rate": 4.966347441493482e-06, + "loss": 1.136, + "step": 4650 + }, + { + "epoch": 0.033732183833163225, + "grad_norm": 0.1902882605791092, + "learning_rate": 4.966275054832896e-06, + "loss": 1.1392, + "step": 4660 + }, + { + "epoch": 0.03380457049374941, + "grad_norm": 0.17739436030387878, + "learning_rate": 4.966202668172309e-06, + "loss": 1.1258, + "step": 4670 + }, + { + "epoch": 0.0338769571543356, + "grad_norm": 0.187397763133049, + "learning_rate": 4.966130281511724e-06, + "loss": 1.1261, + "step": 4680 + }, + { + "epoch": 0.03394934381492179, + "grad_norm": 0.17765313386917114, + "learning_rate": 4.966057894851137e-06, + "loss": 1.1252, + "step": 4690 + }, + { + "epoch": 0.03402173047550797, + "grad_norm": 0.18505384027957916, + "learning_rate": 4.965985508190551e-06, + "loss": 1.1257, + "step": 4700 + }, + { + "epoch": 0.034094117136094164, + "grad_norm": 0.19399258494377136, + "learning_rate": 4.965913121529965e-06, + "loss": 1.1236, + "step": 4710 + }, + { + "epoch": 0.03416650379668035, + "grad_norm": 0.17900791764259338, + "learning_rate": 4.965840734869379e-06, + "loss": 1.13, + "step": 4720 + }, + { + "epoch": 0.03423889045726654, + "grad_norm": 0.17912939190864563, + "learning_rate": 4.965768348208793e-06, + "loss": 1.1416, + "step": 4730 + }, + { + "epoch": 0.03431127711785272, + "grad_norm": 0.1887941062450409, + "learning_rate": 4.965695961548206e-06, + "loss": 1.1308, + "step": 4740 + }, + { + "epoch": 0.03438366377843891, + "grad_norm": 0.19085676968097687, + "learning_rate": 4.96562357488762e-06, + "loss": 1.1374, + "step": 4750 + }, + { + "epoch": 0.034456050439025096, + "grad_norm": 0.1932060867547989, + "learning_rate": 4.965551188227034e-06, + "loss": 1.1412, + "step": 4760 + }, + { + "epoch": 0.03452843709961129, + "grad_norm": 0.1827419102191925, + "learning_rate": 4.965478801566448e-06, + "loss": 1.1221, + "step": 4770 + }, + { + "epoch": 0.03460082376019747, + "grad_norm": 0.31538698077201843, + "learning_rate": 4.965406414905862e-06, + "loss": 1.1423, + "step": 4780 + }, + { + "epoch": 0.03467321042078366, + "grad_norm": 0.23767271637916565, + "learning_rate": 4.965334028245275e-06, + "loss": 1.1166, + "step": 4790 + }, + { + "epoch": 0.034745597081369844, + "grad_norm": 0.1903272569179535, + "learning_rate": 4.96526164158469e-06, + "loss": 1.1551, + "step": 4800 + }, + { + "epoch": 0.034817983741956035, + "grad_norm": 0.1915259212255478, + "learning_rate": 4.965189254924103e-06, + "loss": 1.1427, + "step": 4810 + }, + { + "epoch": 0.03489037040254222, + "grad_norm": 0.18546457588672638, + "learning_rate": 4.965116868263517e-06, + "loss": 1.1335, + "step": 4820 + }, + { + "epoch": 0.03496275706312841, + "grad_norm": 0.17920000851154327, + "learning_rate": 4.9650444816029305e-06, + "loss": 1.1362, + "step": 4830 + }, + { + "epoch": 0.03503514372371459, + "grad_norm": 0.1886454075574875, + "learning_rate": 4.964972094942345e-06, + "loss": 1.1328, + "step": 4840 + }, + { + "epoch": 0.035107530384300784, + "grad_norm": 0.18787150084972382, + "learning_rate": 4.964899708281759e-06, + "loss": 1.1359, + "step": 4850 + }, + { + "epoch": 0.03517991704488697, + "grad_norm": 0.18325144052505493, + "learning_rate": 4.964827321621172e-06, + "loss": 1.1281, + "step": 4860 + }, + { + "epoch": 0.03525230370547316, + "grad_norm": 0.17951847612857819, + "learning_rate": 4.964754934960586e-06, + "loss": 1.1304, + "step": 4870 + }, + { + "epoch": 0.03532469036605934, + "grad_norm": 0.17793092131614685, + "learning_rate": 4.9646825482999995e-06, + "loss": 1.1348, + "step": 4880 + }, + { + "epoch": 0.03539707702664553, + "grad_norm": 0.19709095358848572, + "learning_rate": 4.964610161639413e-06, + "loss": 1.1046, + "step": 4890 + }, + { + "epoch": 0.035469463687231716, + "grad_norm": 0.2029866725206375, + "learning_rate": 4.964537774978827e-06, + "loss": 1.1269, + "step": 4900 + }, + { + "epoch": 0.035541850347817906, + "grad_norm": 0.1873743087053299, + "learning_rate": 4.964465388318241e-06, + "loss": 1.1214, + "step": 4910 + }, + { + "epoch": 0.03561423700840409, + "grad_norm": 0.18011152744293213, + "learning_rate": 4.964393001657655e-06, + "loss": 1.1244, + "step": 4920 + }, + { + "epoch": 0.03568662366899028, + "grad_norm": 0.2078821361064911, + "learning_rate": 4.964320614997068e-06, + "loss": 1.123, + "step": 4930 + }, + { + "epoch": 0.035759010329576464, + "grad_norm": 0.22960899770259857, + "learning_rate": 4.964248228336482e-06, + "loss": 1.1354, + "step": 4940 + }, + { + "epoch": 0.035831396990162655, + "grad_norm": 0.19076983630657196, + "learning_rate": 4.9641758416758965e-06, + "loss": 1.1237, + "step": 4950 + }, + { + "epoch": 0.03590378365074884, + "grad_norm": 0.19131101667881012, + "learning_rate": 4.96410345501531e-06, + "loss": 1.1253, + "step": 4960 + }, + { + "epoch": 0.03597617031133503, + "grad_norm": 0.1835022121667862, + "learning_rate": 4.964031068354724e-06, + "loss": 1.1338, + "step": 4970 + }, + { + "epoch": 0.03604855697192121, + "grad_norm": 0.17867615818977356, + "learning_rate": 4.963958681694137e-06, + "loss": 1.112, + "step": 4980 + }, + { + "epoch": 0.0361209436325074, + "grad_norm": 0.18737174570560455, + "learning_rate": 4.963886295033552e-06, + "loss": 1.1277, + "step": 4990 + }, + { + "epoch": 0.03619333029309359, + "grad_norm": 0.21853433549404144, + "learning_rate": 4.963813908372965e-06, + "loss": 1.1324, + "step": 5000 + }, + { + "epoch": 0.03626571695367978, + "grad_norm": 0.18206347525119781, + "learning_rate": 4.963741521712379e-06, + "loss": 1.1275, + "step": 5010 + }, + { + "epoch": 0.03633810361426596, + "grad_norm": 0.20839586853981018, + "learning_rate": 4.963669135051793e-06, + "loss": 1.1235, + "step": 5020 + }, + { + "epoch": 0.03641049027485215, + "grad_norm": 0.208791121840477, + "learning_rate": 4.963596748391207e-06, + "loss": 1.1197, + "step": 5030 + }, + { + "epoch": 0.036482876935438335, + "grad_norm": 0.18400295078754425, + "learning_rate": 4.963524361730621e-06, + "loss": 1.1413, + "step": 5040 + }, + { + "epoch": 0.036555263596024526, + "grad_norm": 0.1914309412240982, + "learning_rate": 4.963451975070034e-06, + "loss": 1.1389, + "step": 5050 + }, + { + "epoch": 0.03662765025661071, + "grad_norm": 0.18529996275901794, + "learning_rate": 4.963379588409448e-06, + "loss": 1.1152, + "step": 5060 + }, + { + "epoch": 0.0367000369171969, + "grad_norm": 0.1869628131389618, + "learning_rate": 4.963307201748862e-06, + "loss": 1.1276, + "step": 5070 + }, + { + "epoch": 0.036772423577783084, + "grad_norm": 0.1835557073354721, + "learning_rate": 4.963234815088276e-06, + "loss": 1.1408, + "step": 5080 + }, + { + "epoch": 0.036844810238369274, + "grad_norm": 0.19675718247890472, + "learning_rate": 4.96316242842769e-06, + "loss": 1.1053, + "step": 5090 + }, + { + "epoch": 0.03691719689895546, + "grad_norm": 0.18781916797161102, + "learning_rate": 4.963090041767103e-06, + "loss": 1.1331, + "step": 5100 + }, + { + "epoch": 0.03698958355954165, + "grad_norm": 0.24024321138858795, + "learning_rate": 4.963017655106518e-06, + "loss": 1.1233, + "step": 5110 + }, + { + "epoch": 0.03706197022012783, + "grad_norm": 0.17619359493255615, + "learning_rate": 4.962945268445931e-06, + "loss": 1.123, + "step": 5120 + }, + { + "epoch": 0.03713435688071402, + "grad_norm": 0.19211329519748688, + "learning_rate": 4.962872881785345e-06, + "loss": 1.1162, + "step": 5130 + }, + { + "epoch": 0.037206743541300207, + "grad_norm": 0.18998998403549194, + "learning_rate": 4.9628004951247586e-06, + "loss": 1.1298, + "step": 5140 + }, + { + "epoch": 0.0372791302018864, + "grad_norm": 0.18230006098747253, + "learning_rate": 4.962728108464173e-06, + "loss": 1.1313, + "step": 5150 + }, + { + "epoch": 0.03735151686247258, + "grad_norm": 0.18055368959903717, + "learning_rate": 4.962655721803587e-06, + "loss": 1.1395, + "step": 5160 + }, + { + "epoch": 0.03742390352305877, + "grad_norm": 0.1782815009355545, + "learning_rate": 4.962583335143e-06, + "loss": 1.1295, + "step": 5170 + }, + { + "epoch": 0.037496290183644955, + "grad_norm": 0.21609275043010712, + "learning_rate": 4.962510948482414e-06, + "loss": 1.1273, + "step": 5180 + }, + { + "epoch": 0.037568676844231146, + "grad_norm": 0.19607985019683838, + "learning_rate": 4.962438561821828e-06, + "loss": 1.116, + "step": 5190 + }, + { + "epoch": 0.03764106350481733, + "grad_norm": 0.26767560839653015, + "learning_rate": 4.962366175161242e-06, + "loss": 1.1085, + "step": 5200 + }, + { + "epoch": 0.03771345016540352, + "grad_norm": 0.2847156524658203, + "learning_rate": 4.962293788500656e-06, + "loss": 1.1067, + "step": 5210 + }, + { + "epoch": 0.0377858368259897, + "grad_norm": 0.20309562981128693, + "learning_rate": 4.962221401840069e-06, + "loss": 1.1204, + "step": 5220 + }, + { + "epoch": 0.037858223486575894, + "grad_norm": 0.20030486583709717, + "learning_rate": 4.962149015179483e-06, + "loss": 1.1203, + "step": 5230 + }, + { + "epoch": 0.03793061014716208, + "grad_norm": 0.16900622844696045, + "learning_rate": 4.962076628518897e-06, + "loss": 1.1398, + "step": 5240 + }, + { + "epoch": 0.03800299680774827, + "grad_norm": 0.18107786774635315, + "learning_rate": 4.962004241858311e-06, + "loss": 1.1224, + "step": 5250 + }, + { + "epoch": 0.03807538346833445, + "grad_norm": 0.19224514067173004, + "learning_rate": 4.9619318551977245e-06, + "loss": 1.1198, + "step": 5260 + }, + { + "epoch": 0.03814777012892064, + "grad_norm": 0.18875695765018463, + "learning_rate": 4.961859468537138e-06, + "loss": 1.1281, + "step": 5270 + }, + { + "epoch": 0.03822015678950683, + "grad_norm": 0.20869621634483337, + "learning_rate": 4.961787081876553e-06, + "loss": 1.0947, + "step": 5280 + }, + { + "epoch": 0.03829254345009302, + "grad_norm": 0.18768785893917084, + "learning_rate": 4.961714695215966e-06, + "loss": 1.1132, + "step": 5290 + }, + { + "epoch": 0.03836493011067921, + "grad_norm": 0.19424548745155334, + "learning_rate": 4.96164230855538e-06, + "loss": 1.1311, + "step": 5300 + }, + { + "epoch": 0.03843731677126539, + "grad_norm": 0.18979331851005554, + "learning_rate": 4.9615699218947934e-06, + "loss": 1.1254, + "step": 5310 + }, + { + "epoch": 0.03850970343185158, + "grad_norm": 0.18203093111515045, + "learning_rate": 4.961497535234208e-06, + "loss": 1.133, + "step": 5320 + }, + { + "epoch": 0.038582090092437765, + "grad_norm": 0.1737833172082901, + "learning_rate": 4.9614251485736215e-06, + "loss": 1.1132, + "step": 5330 + }, + { + "epoch": 0.038654476753023956, + "grad_norm": 0.18952320516109467, + "learning_rate": 4.961352761913035e-06, + "loss": 1.1311, + "step": 5340 + }, + { + "epoch": 0.03872686341361014, + "grad_norm": 0.25203409790992737, + "learning_rate": 4.961280375252449e-06, + "loss": 1.1111, + "step": 5350 + }, + { + "epoch": 0.03879925007419633, + "grad_norm": 0.20354746282100677, + "learning_rate": 4.961207988591863e-06, + "loss": 1.1149, + "step": 5360 + }, + { + "epoch": 0.038871636734782514, + "grad_norm": 0.21158255636692047, + "learning_rate": 4.961135601931277e-06, + "loss": 1.1342, + "step": 5370 + }, + { + "epoch": 0.038944023395368704, + "grad_norm": 0.19399100542068481, + "learning_rate": 4.9610632152706904e-06, + "loss": 1.1173, + "step": 5380 + }, + { + "epoch": 0.03901641005595489, + "grad_norm": 0.18055978417396545, + "learning_rate": 4.960990828610104e-06, + "loss": 1.1228, + "step": 5390 + }, + { + "epoch": 0.03908879671654108, + "grad_norm": 0.17884358763694763, + "learning_rate": 4.9609184419495185e-06, + "loss": 1.1214, + "step": 5400 + }, + { + "epoch": 0.03916118337712726, + "grad_norm": 0.1775633841753006, + "learning_rate": 4.960846055288932e-06, + "loss": 1.121, + "step": 5410 + }, + { + "epoch": 0.03923357003771345, + "grad_norm": 0.18697598576545715, + "learning_rate": 4.960773668628345e-06, + "loss": 1.147, + "step": 5420 + }, + { + "epoch": 0.039305956698299636, + "grad_norm": 0.8238439559936523, + "learning_rate": 4.960701281967759e-06, + "loss": 1.1239, + "step": 5430 + }, + { + "epoch": 0.03937834335888583, + "grad_norm": 0.18985433876514435, + "learning_rate": 4.960628895307173e-06, + "loss": 1.1123, + "step": 5440 + }, + { + "epoch": 0.03945073001947201, + "grad_norm": 0.18031606078147888, + "learning_rate": 4.960556508646587e-06, + "loss": 1.1203, + "step": 5450 + }, + { + "epoch": 0.0395231166800582, + "grad_norm": 0.16933594644069672, + "learning_rate": 4.960484121986e-06, + "loss": 1.118, + "step": 5460 + }, + { + "epoch": 0.039595503340644385, + "grad_norm": 0.1860426515340805, + "learning_rate": 4.960411735325415e-06, + "loss": 1.1193, + "step": 5470 + }, + { + "epoch": 0.039667890001230575, + "grad_norm": 0.18198014795780182, + "learning_rate": 4.960339348664828e-06, + "loss": 1.1229, + "step": 5480 + }, + { + "epoch": 0.03974027666181676, + "grad_norm": 0.19671514630317688, + "learning_rate": 4.960266962004242e-06, + "loss": 1.1263, + "step": 5490 + }, + { + "epoch": 0.03981266332240295, + "grad_norm": 0.18220868706703186, + "learning_rate": 4.9601945753436555e-06, + "loss": 1.122, + "step": 5500 + }, + { + "epoch": 0.03988504998298913, + "grad_norm": 0.19989323616027832, + "learning_rate": 4.96012218868307e-06, + "loss": 1.1202, + "step": 5510 + }, + { + "epoch": 0.039957436643575324, + "grad_norm": 0.1794479936361313, + "learning_rate": 4.960049802022484e-06, + "loss": 1.1221, + "step": 5520 + }, + { + "epoch": 0.04002982330416151, + "grad_norm": 0.23208436369895935, + "learning_rate": 4.959977415361897e-06, + "loss": 1.1197, + "step": 5530 + }, + { + "epoch": 0.0401022099647477, + "grad_norm": 0.20182034373283386, + "learning_rate": 4.959905028701311e-06, + "loss": 1.1272, + "step": 5540 + }, + { + "epoch": 0.04017459662533388, + "grad_norm": 0.18753409385681152, + "learning_rate": 4.959832642040725e-06, + "loss": 1.1078, + "step": 5550 + }, + { + "epoch": 0.04024698328592007, + "grad_norm": 0.20286722481250763, + "learning_rate": 4.959760255380139e-06, + "loss": 1.1251, + "step": 5560 + }, + { + "epoch": 0.040319369946506256, + "grad_norm": 0.21133583784103394, + "learning_rate": 4.9596878687195525e-06, + "loss": 1.1299, + "step": 5570 + }, + { + "epoch": 0.04039175660709245, + "grad_norm": 0.1748242825269699, + "learning_rate": 4.959615482058966e-06, + "loss": 1.1263, + "step": 5580 + }, + { + "epoch": 0.04046414326767863, + "grad_norm": 0.19774052500724792, + "learning_rate": 4.959543095398381e-06, + "loss": 1.1134, + "step": 5590 + }, + { + "epoch": 0.04053652992826482, + "grad_norm": 0.2021917700767517, + "learning_rate": 4.959470708737794e-06, + "loss": 1.1222, + "step": 5600 + }, + { + "epoch": 0.040608916588851005, + "grad_norm": 0.20801125466823578, + "learning_rate": 4.959398322077208e-06, + "loss": 1.1187, + "step": 5610 + }, + { + "epoch": 0.040681303249437195, + "grad_norm": 0.17507006227970123, + "learning_rate": 4.9593259354166215e-06, + "loss": 1.1209, + "step": 5620 + }, + { + "epoch": 0.04075368991002338, + "grad_norm": 0.1864837110042572, + "learning_rate": 4.959253548756036e-06, + "loss": 1.1278, + "step": 5630 + }, + { + "epoch": 0.04082607657060957, + "grad_norm": 0.17824417352676392, + "learning_rate": 4.9591811620954495e-06, + "loss": 1.114, + "step": 5640 + }, + { + "epoch": 0.04089846323119575, + "grad_norm": 0.18606680631637573, + "learning_rate": 4.959108775434863e-06, + "loss": 1.133, + "step": 5650 + }, + { + "epoch": 0.040970849891781944, + "grad_norm": 0.1736188679933548, + "learning_rate": 4.959036388774277e-06, + "loss": 1.1071, + "step": 5660 + }, + { + "epoch": 0.04104323655236813, + "grad_norm": 0.1926575005054474, + "learning_rate": 4.958964002113691e-06, + "loss": 1.1257, + "step": 5670 + }, + { + "epoch": 0.04111562321295432, + "grad_norm": 0.1755741983652115, + "learning_rate": 4.958891615453105e-06, + "loss": 1.108, + "step": 5680 + }, + { + "epoch": 0.0411880098735405, + "grad_norm": 0.19758310914039612, + "learning_rate": 4.9588192287925185e-06, + "loss": 1.1305, + "step": 5690 + }, + { + "epoch": 0.04126039653412669, + "grad_norm": 0.21883004903793335, + "learning_rate": 4.958746842131932e-06, + "loss": 1.1153, + "step": 5700 + }, + { + "epoch": 0.041332783194712876, + "grad_norm": 0.19593234360218048, + "learning_rate": 4.9586744554713466e-06, + "loss": 1.1249, + "step": 5710 + }, + { + "epoch": 0.041405169855299066, + "grad_norm": 0.19684365391731262, + "learning_rate": 4.95860206881076e-06, + "loss": 1.1193, + "step": 5720 + }, + { + "epoch": 0.04147755651588525, + "grad_norm": 0.19557367265224457, + "learning_rate": 4.958529682150174e-06, + "loss": 1.1251, + "step": 5730 + }, + { + "epoch": 0.04154994317647144, + "grad_norm": 0.21626479923725128, + "learning_rate": 4.958457295489587e-06, + "loss": 1.1123, + "step": 5740 + }, + { + "epoch": 0.041622329837057624, + "grad_norm": 0.18775388598442078, + "learning_rate": 4.958384908829002e-06, + "loss": 1.128, + "step": 5750 + }, + { + "epoch": 0.041694716497643815, + "grad_norm": 0.19671566784381866, + "learning_rate": 4.9583125221684155e-06, + "loss": 1.1164, + "step": 5760 + }, + { + "epoch": 0.04176710315823, + "grad_norm": 0.23783642053604126, + "learning_rate": 4.958240135507829e-06, + "loss": 1.1256, + "step": 5770 + }, + { + "epoch": 0.04183948981881619, + "grad_norm": 0.18829865753650665, + "learning_rate": 4.958167748847243e-06, + "loss": 1.1186, + "step": 5780 + }, + { + "epoch": 0.04191187647940237, + "grad_norm": 0.1892513930797577, + "learning_rate": 4.958095362186657e-06, + "loss": 1.1372, + "step": 5790 + }, + { + "epoch": 0.04198426313998856, + "grad_norm": 0.2102990746498108, + "learning_rate": 4.958022975526071e-06, + "loss": 1.1055, + "step": 5800 + }, + { + "epoch": 0.04205664980057475, + "grad_norm": 0.18884964287281036, + "learning_rate": 4.957950588865484e-06, + "loss": 1.1284, + "step": 5810 + }, + { + "epoch": 0.04212903646116094, + "grad_norm": 0.17858710885047913, + "learning_rate": 4.957878202204898e-06, + "loss": 1.1122, + "step": 5820 + }, + { + "epoch": 0.04220142312174712, + "grad_norm": 0.1783481240272522, + "learning_rate": 4.957805815544312e-06, + "loss": 1.1289, + "step": 5830 + }, + { + "epoch": 0.04227380978233331, + "grad_norm": 0.18287035822868347, + "learning_rate": 4.957733428883726e-06, + "loss": 1.1005, + "step": 5840 + }, + { + "epoch": 0.042346196442919495, + "grad_norm": 0.20345322787761688, + "learning_rate": 4.95766104222314e-06, + "loss": 1.1115, + "step": 5850 + }, + { + "epoch": 0.042418583103505686, + "grad_norm": 0.1760493516921997, + "learning_rate": 4.957588655562553e-06, + "loss": 1.1119, + "step": 5860 + }, + { + "epoch": 0.04249096976409188, + "grad_norm": 0.1835738718509674, + "learning_rate": 4.957516268901967e-06, + "loss": 1.1132, + "step": 5870 + }, + { + "epoch": 0.04256335642467806, + "grad_norm": 0.17547370493412018, + "learning_rate": 4.957443882241381e-06, + "loss": 1.1409, + "step": 5880 + }, + { + "epoch": 0.04263574308526425, + "grad_norm": 0.17572353780269623, + "learning_rate": 4.957371495580795e-06, + "loss": 1.1132, + "step": 5890 + }, + { + "epoch": 0.042708129745850434, + "grad_norm": 0.18828994035720825, + "learning_rate": 4.957299108920209e-06, + "loss": 1.1331, + "step": 5900 + }, + { + "epoch": 0.042780516406436625, + "grad_norm": 0.20104007422924042, + "learning_rate": 4.957226722259622e-06, + "loss": 1.0998, + "step": 5910 + }, + { + "epoch": 0.04285290306702281, + "grad_norm": 0.1787911206483841, + "learning_rate": 4.957154335599037e-06, + "loss": 1.105, + "step": 5920 + }, + { + "epoch": 0.042925289727609, + "grad_norm": 0.1929769665002823, + "learning_rate": 4.95708194893845e-06, + "loss": 1.1265, + "step": 5930 + }, + { + "epoch": 0.04299767638819518, + "grad_norm": 0.18230368196964264, + "learning_rate": 4.957009562277864e-06, + "loss": 1.1114, + "step": 5940 + }, + { + "epoch": 0.043070063048781373, + "grad_norm": 0.18805713951587677, + "learning_rate": 4.956937175617278e-06, + "loss": 1.1119, + "step": 5950 + }, + { + "epoch": 0.04314244970936756, + "grad_norm": 0.178778737783432, + "learning_rate": 4.956864788956691e-06, + "loss": 1.105, + "step": 5960 + }, + { + "epoch": 0.04321483636995375, + "grad_norm": 0.19835583865642548, + "learning_rate": 4.956792402296105e-06, + "loss": 1.1146, + "step": 5970 + }, + { + "epoch": 0.04328722303053993, + "grad_norm": 0.18930941820144653, + "learning_rate": 4.956720015635519e-06, + "loss": 1.1172, + "step": 5980 + }, + { + "epoch": 0.04335960969112612, + "grad_norm": 0.18036046624183655, + "learning_rate": 4.956647628974933e-06, + "loss": 1.1233, + "step": 5990 + }, + { + "epoch": 0.043431996351712306, + "grad_norm": 0.18780753016471863, + "learning_rate": 4.9565752423143465e-06, + "loss": 1.1101, + "step": 6000 + }, + { + "epoch": 0.043504383012298496, + "grad_norm": 0.2190426141023636, + "learning_rate": 4.95650285565376e-06, + "loss": 1.1199, + "step": 6010 + }, + { + "epoch": 0.04357676967288468, + "grad_norm": 0.20030994713306427, + "learning_rate": 4.956430468993174e-06, + "loss": 1.1194, + "step": 6020 + }, + { + "epoch": 0.04364915633347087, + "grad_norm": 0.17827999591827393, + "learning_rate": 4.956358082332588e-06, + "loss": 1.1142, + "step": 6030 + }, + { + "epoch": 0.043721542994057054, + "grad_norm": 0.22356738150119781, + "learning_rate": 4.956285695672002e-06, + "loss": 1.1218, + "step": 6040 + }, + { + "epoch": 0.043793929654643245, + "grad_norm": 0.18917842209339142, + "learning_rate": 4.9562133090114154e-06, + "loss": 1.1133, + "step": 6050 + }, + { + "epoch": 0.04386631631522943, + "grad_norm": 0.18079480528831482, + "learning_rate": 4.956140922350829e-06, + "loss": 1.1137, + "step": 6060 + }, + { + "epoch": 0.04393870297581562, + "grad_norm": 0.18289583921432495, + "learning_rate": 4.9560685356902435e-06, + "loss": 1.1261, + "step": 6070 + }, + { + "epoch": 0.0440110896364018, + "grad_norm": 0.2138090580701828, + "learning_rate": 4.955996149029657e-06, + "loss": 1.1005, + "step": 6080 + }, + { + "epoch": 0.04408347629698799, + "grad_norm": 0.20163343846797943, + "learning_rate": 4.955923762369071e-06, + "loss": 1.1079, + "step": 6090 + }, + { + "epoch": 0.04415586295757418, + "grad_norm": 0.20078091323375702, + "learning_rate": 4.955851375708484e-06, + "loss": 1.0869, + "step": 6100 + }, + { + "epoch": 0.04422824961816037, + "grad_norm": 0.2066076546907425, + "learning_rate": 4.955778989047899e-06, + "loss": 1.1208, + "step": 6110 + }, + { + "epoch": 0.04430063627874655, + "grad_norm": 0.18639633059501648, + "learning_rate": 4.9557066023873124e-06, + "loss": 1.1256, + "step": 6120 + }, + { + "epoch": 0.04437302293933274, + "grad_norm": 0.1844189614057541, + "learning_rate": 4.955634215726726e-06, + "loss": 1.1258, + "step": 6130 + }, + { + "epoch": 0.044445409599918925, + "grad_norm": 0.1896459013223648, + "learning_rate": 4.95556182906614e-06, + "loss": 1.1393, + "step": 6140 + }, + { + "epoch": 0.044517796260505116, + "grad_norm": 0.17254671454429626, + "learning_rate": 4.955489442405554e-06, + "loss": 1.1274, + "step": 6150 + }, + { + "epoch": 0.0445901829210913, + "grad_norm": 0.22296860814094543, + "learning_rate": 4.955417055744968e-06, + "loss": 1.125, + "step": 6160 + }, + { + "epoch": 0.04466256958167749, + "grad_norm": 0.20404869318008423, + "learning_rate": 4.955344669084381e-06, + "loss": 1.1093, + "step": 6170 + }, + { + "epoch": 0.044734956242263674, + "grad_norm": 0.18639911711215973, + "learning_rate": 4.955272282423795e-06, + "loss": 1.1051, + "step": 6180 + }, + { + "epoch": 0.044807342902849864, + "grad_norm": 0.1889095902442932, + "learning_rate": 4.9551998957632095e-06, + "loss": 1.1199, + "step": 6190 + }, + { + "epoch": 0.04487972956343605, + "grad_norm": 0.19607719779014587, + "learning_rate": 4.955127509102623e-06, + "loss": 1.1052, + "step": 6200 + }, + { + "epoch": 0.04495211622402224, + "grad_norm": 0.18083210289478302, + "learning_rate": 4.955055122442037e-06, + "loss": 1.129, + "step": 6210 + }, + { + "epoch": 0.04502450288460842, + "grad_norm": 0.2071446180343628, + "learning_rate": 4.95498273578145e-06, + "loss": 1.0987, + "step": 6220 + }, + { + "epoch": 0.04509688954519461, + "grad_norm": 0.1783336102962494, + "learning_rate": 4.954910349120865e-06, + "loss": 1.1306, + "step": 6230 + }, + { + "epoch": 0.045169276205780796, + "grad_norm": 0.18706344068050385, + "learning_rate": 4.954837962460278e-06, + "loss": 1.1217, + "step": 6240 + }, + { + "epoch": 0.04524166286636699, + "grad_norm": 0.19343775510787964, + "learning_rate": 4.954765575799692e-06, + "loss": 1.1079, + "step": 6250 + }, + { + "epoch": 0.04531404952695317, + "grad_norm": 0.18519844114780426, + "learning_rate": 4.954693189139106e-06, + "loss": 1.1148, + "step": 6260 + }, + { + "epoch": 0.04538643618753936, + "grad_norm": 0.2034037858247757, + "learning_rate": 4.95462080247852e-06, + "loss": 1.1, + "step": 6270 + }, + { + "epoch": 0.045458822848125545, + "grad_norm": 0.22080808877944946, + "learning_rate": 4.954548415817934e-06, + "loss": 1.1046, + "step": 6280 + }, + { + "epoch": 0.045531209508711736, + "grad_norm": 0.19020305573940277, + "learning_rate": 4.954476029157347e-06, + "loss": 1.1003, + "step": 6290 + }, + { + "epoch": 0.04560359616929792, + "grad_norm": 0.18018420040607452, + "learning_rate": 4.954403642496761e-06, + "loss": 1.1247, + "step": 6300 + }, + { + "epoch": 0.04567598282988411, + "grad_norm": 0.18034905195236206, + "learning_rate": 4.954331255836175e-06, + "loss": 1.103, + "step": 6310 + }, + { + "epoch": 0.04574836949047029, + "grad_norm": 0.18934369087219238, + "learning_rate": 4.954258869175589e-06, + "loss": 1.1229, + "step": 6320 + }, + { + "epoch": 0.045820756151056484, + "grad_norm": 0.18742787837982178, + "learning_rate": 4.954186482515003e-06, + "loss": 1.1131, + "step": 6330 + }, + { + "epoch": 0.04589314281164267, + "grad_norm": 0.1858285516500473, + "learning_rate": 4.954114095854416e-06, + "loss": 1.1242, + "step": 6340 + }, + { + "epoch": 0.04596552947222886, + "grad_norm": 0.17056511342525482, + "learning_rate": 4.954041709193831e-06, + "loss": 1.0997, + "step": 6350 + }, + { + "epoch": 0.04603791613281504, + "grad_norm": 0.2107161581516266, + "learning_rate": 4.953969322533244e-06, + "loss": 1.1075, + "step": 6360 + }, + { + "epoch": 0.04611030279340123, + "grad_norm": 0.17084477841854095, + "learning_rate": 4.953896935872658e-06, + "loss": 1.1113, + "step": 6370 + }, + { + "epoch": 0.046182689453987416, + "grad_norm": 0.18742406368255615, + "learning_rate": 4.9538245492120715e-06, + "loss": 1.115, + "step": 6380 + }, + { + "epoch": 0.04625507611457361, + "grad_norm": 0.18658484518527985, + "learning_rate": 4.953752162551486e-06, + "loss": 1.112, + "step": 6390 + }, + { + "epoch": 0.04632746277515979, + "grad_norm": 0.19460472464561462, + "learning_rate": 4.9536797758909e-06, + "loss": 1.1297, + "step": 6400 + }, + { + "epoch": 0.04639984943574598, + "grad_norm": 0.1897081881761551, + "learning_rate": 4.953607389230313e-06, + "loss": 1.1045, + "step": 6410 + }, + { + "epoch": 0.046472236096332165, + "grad_norm": 0.1974371373653412, + "learning_rate": 4.953535002569727e-06, + "loss": 1.106, + "step": 6420 + }, + { + "epoch": 0.046544622756918355, + "grad_norm": 0.18914976716041565, + "learning_rate": 4.953462615909141e-06, + "loss": 1.1199, + "step": 6430 + }, + { + "epoch": 0.04661700941750454, + "grad_norm": 0.18014445900917053, + "learning_rate": 4.953390229248555e-06, + "loss": 1.1011, + "step": 6440 + }, + { + "epoch": 0.04668939607809073, + "grad_norm": 0.17030683159828186, + "learning_rate": 4.9533178425879686e-06, + "loss": 1.1098, + "step": 6450 + }, + { + "epoch": 0.04676178273867692, + "grad_norm": 0.19075506925582886, + "learning_rate": 4.953245455927382e-06, + "loss": 1.1064, + "step": 6460 + }, + { + "epoch": 0.046834169399263104, + "grad_norm": 0.19481076300144196, + "learning_rate": 4.953173069266796e-06, + "loss": 1.1031, + "step": 6470 + }, + { + "epoch": 0.046906556059849294, + "grad_norm": 0.1836860328912735, + "learning_rate": 4.953100682606209e-06, + "loss": 1.1134, + "step": 6480 + }, + { + "epoch": 0.04697894272043548, + "grad_norm": 0.18626393377780914, + "learning_rate": 4.953028295945623e-06, + "loss": 1.1165, + "step": 6490 + }, + { + "epoch": 0.04705132938102167, + "grad_norm": 0.17867518961429596, + "learning_rate": 4.9529559092850375e-06, + "loss": 1.1102, + "step": 6500 + }, + { + "epoch": 0.04712371604160785, + "grad_norm": 0.1940499097108841, + "learning_rate": 4.952883522624451e-06, + "loss": 1.1063, + "step": 6510 + }, + { + "epoch": 0.04719610270219404, + "grad_norm": 0.17317171394824982, + "learning_rate": 4.952811135963865e-06, + "loss": 1.1254, + "step": 6520 + }, + { + "epoch": 0.047268489362780226, + "grad_norm": 0.19576644897460938, + "learning_rate": 4.952738749303278e-06, + "loss": 1.1251, + "step": 6530 + }, + { + "epoch": 0.04734087602336642, + "grad_norm": 0.17316751182079315, + "learning_rate": 4.952666362642693e-06, + "loss": 1.1097, + "step": 6540 + }, + { + "epoch": 0.0474132626839526, + "grad_norm": 0.1811741143465042, + "learning_rate": 4.952593975982106e-06, + "loss": 1.122, + "step": 6550 + }, + { + "epoch": 0.04748564934453879, + "grad_norm": 0.1905275136232376, + "learning_rate": 4.95252158932152e-06, + "loss": 1.1074, + "step": 6560 + }, + { + "epoch": 0.047558036005124975, + "grad_norm": 0.19564856588840485, + "learning_rate": 4.952449202660934e-06, + "loss": 1.1139, + "step": 6570 + }, + { + "epoch": 0.047630422665711165, + "grad_norm": 0.17164203524589539, + "learning_rate": 4.952376816000348e-06, + "loss": 1.1148, + "step": 6580 + }, + { + "epoch": 0.04770280932629735, + "grad_norm": 0.19059689342975616, + "learning_rate": 4.952304429339762e-06, + "loss": 1.1193, + "step": 6590 + }, + { + "epoch": 0.04777519598688354, + "grad_norm": 0.18997924029827118, + "learning_rate": 4.952232042679175e-06, + "loss": 1.1091, + "step": 6600 + }, + { + "epoch": 0.04784758264746972, + "grad_norm": 0.2226713001728058, + "learning_rate": 4.952159656018589e-06, + "loss": 1.1131, + "step": 6610 + }, + { + "epoch": 0.047919969308055914, + "grad_norm": 0.18311481177806854, + "learning_rate": 4.9520872693580026e-06, + "loss": 1.1152, + "step": 6620 + }, + { + "epoch": 0.0479923559686421, + "grad_norm": 0.18285690248012543, + "learning_rate": 4.952014882697417e-06, + "loss": 1.1209, + "step": 6630 + }, + { + "epoch": 0.04806474262922829, + "grad_norm": 0.18869620561599731, + "learning_rate": 4.951942496036831e-06, + "loss": 1.0976, + "step": 6640 + }, + { + "epoch": 0.04813712928981447, + "grad_norm": 0.17298665642738342, + "learning_rate": 4.951870109376244e-06, + "loss": 1.1105, + "step": 6650 + }, + { + "epoch": 0.04820951595040066, + "grad_norm": 0.1727328598499298, + "learning_rate": 4.951797722715658e-06, + "loss": 1.1042, + "step": 6660 + }, + { + "epoch": 0.048281902610986846, + "grad_norm": 0.2000247985124588, + "learning_rate": 4.951725336055072e-06, + "loss": 1.1056, + "step": 6670 + }, + { + "epoch": 0.04835428927157304, + "grad_norm": 0.18341891467571259, + "learning_rate": 4.951652949394486e-06, + "loss": 1.1081, + "step": 6680 + }, + { + "epoch": 0.04842667593215922, + "grad_norm": 0.17036312818527222, + "learning_rate": 4.9515805627339e-06, + "loss": 1.1055, + "step": 6690 + }, + { + "epoch": 0.04849906259274541, + "grad_norm": 0.1787406951189041, + "learning_rate": 4.951508176073313e-06, + "loss": 1.1104, + "step": 6700 + }, + { + "epoch": 0.048571449253331594, + "grad_norm": 0.21522028744220734, + "learning_rate": 4.951435789412728e-06, + "loss": 1.1306, + "step": 6710 + }, + { + "epoch": 0.048643835913917785, + "grad_norm": 0.18588437139987946, + "learning_rate": 4.951363402752141e-06, + "loss": 1.1115, + "step": 6720 + }, + { + "epoch": 0.04871622257450397, + "grad_norm": 0.18174554407596588, + "learning_rate": 4.951291016091555e-06, + "loss": 1.1072, + "step": 6730 + }, + { + "epoch": 0.04878860923509016, + "grad_norm": 0.20772916078567505, + "learning_rate": 4.9512186294309685e-06, + "loss": 1.1141, + "step": 6740 + }, + { + "epoch": 0.04886099589567634, + "grad_norm": 0.1825239658355713, + "learning_rate": 4.951146242770383e-06, + "loss": 1.0956, + "step": 6750 + }, + { + "epoch": 0.048933382556262534, + "grad_norm": 0.1815953105688095, + "learning_rate": 4.951073856109797e-06, + "loss": 1.1061, + "step": 6760 + }, + { + "epoch": 0.04900576921684872, + "grad_norm": 0.20214617252349854, + "learning_rate": 4.95100146944921e-06, + "loss": 1.1076, + "step": 6770 + }, + { + "epoch": 0.04907815587743491, + "grad_norm": 0.18477530777454376, + "learning_rate": 4.950929082788624e-06, + "loss": 1.1158, + "step": 6780 + }, + { + "epoch": 0.04915054253802109, + "grad_norm": 0.18946325778961182, + "learning_rate": 4.950856696128038e-06, + "loss": 1.0897, + "step": 6790 + }, + { + "epoch": 0.04922292919860728, + "grad_norm": 0.18577060103416443, + "learning_rate": 4.950784309467452e-06, + "loss": 1.1187, + "step": 6800 + }, + { + "epoch": 0.049295315859193466, + "grad_norm": 0.21096892654895782, + "learning_rate": 4.9507119228068655e-06, + "loss": 1.117, + "step": 6810 + }, + { + "epoch": 0.049367702519779656, + "grad_norm": 0.18540360033512115, + "learning_rate": 4.950639536146279e-06, + "loss": 1.1034, + "step": 6820 + }, + { + "epoch": 0.04944008918036584, + "grad_norm": 0.18726834654808044, + "learning_rate": 4.950567149485694e-06, + "loss": 1.1083, + "step": 6830 + }, + { + "epoch": 0.04951247584095203, + "grad_norm": 0.1806015968322754, + "learning_rate": 4.950494762825107e-06, + "loss": 1.1017, + "step": 6840 + }, + { + "epoch": 0.049584862501538214, + "grad_norm": 0.19054830074310303, + "learning_rate": 4.950422376164521e-06, + "loss": 1.1093, + "step": 6850 + }, + { + "epoch": 0.049657249162124405, + "grad_norm": 0.18223927915096283, + "learning_rate": 4.9503499895039344e-06, + "loss": 1.1138, + "step": 6860 + }, + { + "epoch": 0.04972963582271059, + "grad_norm": 0.18125773966312408, + "learning_rate": 4.950277602843349e-06, + "loss": 1.0931, + "step": 6870 + }, + { + "epoch": 0.04980202248329678, + "grad_norm": 0.18000420928001404, + "learning_rate": 4.9502052161827625e-06, + "loss": 1.1019, + "step": 6880 + }, + { + "epoch": 0.04987440914388296, + "grad_norm": 0.21712656319141388, + "learning_rate": 4.950132829522176e-06, + "loss": 1.1233, + "step": 6890 + }, + { + "epoch": 0.04994679580446915, + "grad_norm": 0.20299050211906433, + "learning_rate": 4.95006044286159e-06, + "loss": 1.1055, + "step": 6900 + }, + { + "epoch": 0.05001918246505534, + "grad_norm": 0.1789911538362503, + "learning_rate": 4.949988056201004e-06, + "loss": 1.0976, + "step": 6910 + }, + { + "epoch": 0.05009156912564153, + "grad_norm": 0.18480534851551056, + "learning_rate": 4.949915669540418e-06, + "loss": 1.1133, + "step": 6920 + }, + { + "epoch": 0.05016395578622771, + "grad_norm": 0.17870941758155823, + "learning_rate": 4.9498432828798315e-06, + "loss": 1.1187, + "step": 6930 + }, + { + "epoch": 0.0502363424468139, + "grad_norm": 0.18759605288505554, + "learning_rate": 4.949770896219245e-06, + "loss": 1.0889, + "step": 6940 + }, + { + "epoch": 0.050308729107400085, + "grad_norm": 0.19115033745765686, + "learning_rate": 4.9496985095586595e-06, + "loss": 1.1194, + "step": 6950 + }, + { + "epoch": 0.050381115767986276, + "grad_norm": 0.19017775356769562, + "learning_rate": 4.949626122898073e-06, + "loss": 1.1183, + "step": 6960 + }, + { + "epoch": 0.05045350242857246, + "grad_norm": 0.16906103491783142, + "learning_rate": 4.949553736237487e-06, + "loss": 1.1165, + "step": 6970 + }, + { + "epoch": 0.05052588908915865, + "grad_norm": 0.18214966356754303, + "learning_rate": 4.9494813495769e-06, + "loss": 1.0915, + "step": 6980 + }, + { + "epoch": 0.050598275749744834, + "grad_norm": 0.19243939220905304, + "learning_rate": 4.949408962916315e-06, + "loss": 1.1123, + "step": 6990 + }, + { + "epoch": 0.050670662410331024, + "grad_norm": 0.1790931522846222, + "learning_rate": 4.9493365762557285e-06, + "loss": 1.1139, + "step": 7000 + }, + { + "epoch": 0.05074304907091721, + "grad_norm": 0.1852511316537857, + "learning_rate": 4.949264189595141e-06, + "loss": 1.1077, + "step": 7010 + }, + { + "epoch": 0.0508154357315034, + "grad_norm": 0.24233588576316833, + "learning_rate": 4.949191802934556e-06, + "loss": 1.1023, + "step": 7020 + }, + { + "epoch": 0.05088782239208959, + "grad_norm": 0.22311051189899445, + "learning_rate": 4.949119416273969e-06, + "loss": 1.0985, + "step": 7030 + }, + { + "epoch": 0.05096020905267577, + "grad_norm": 0.21093599498271942, + "learning_rate": 4.949047029613383e-06, + "loss": 1.0985, + "step": 7040 + }, + { + "epoch": 0.05103259571326196, + "grad_norm": 0.19992297887802124, + "learning_rate": 4.9489746429527965e-06, + "loss": 1.1149, + "step": 7050 + }, + { + "epoch": 0.05110498237384815, + "grad_norm": 0.23999755084514618, + "learning_rate": 4.948902256292211e-06, + "loss": 1.1069, + "step": 7060 + }, + { + "epoch": 0.05117736903443434, + "grad_norm": 0.1987914890050888, + "learning_rate": 4.948829869631625e-06, + "loss": 1.1077, + "step": 7070 + }, + { + "epoch": 0.05124975569502052, + "grad_norm": 0.2037319839000702, + "learning_rate": 4.948757482971038e-06, + "loss": 1.107, + "step": 7080 + }, + { + "epoch": 0.05132214235560671, + "grad_norm": 0.1777563840150833, + "learning_rate": 4.948685096310452e-06, + "loss": 1.1182, + "step": 7090 + }, + { + "epoch": 0.051394529016192896, + "grad_norm": 0.18838225305080414, + "learning_rate": 4.948612709649866e-06, + "loss": 1.1226, + "step": 7100 + }, + { + "epoch": 0.051466915676779086, + "grad_norm": 0.19094857573509216, + "learning_rate": 4.94854032298928e-06, + "loss": 1.0974, + "step": 7110 + }, + { + "epoch": 0.05153930233736527, + "grad_norm": 0.22589007019996643, + "learning_rate": 4.9484679363286935e-06, + "loss": 1.1015, + "step": 7120 + }, + { + "epoch": 0.05161168899795146, + "grad_norm": 0.1975346952676773, + "learning_rate": 4.948395549668107e-06, + "loss": 1.1143, + "step": 7130 + }, + { + "epoch": 0.051684075658537644, + "grad_norm": 0.17119309306144714, + "learning_rate": 4.948323163007522e-06, + "loss": 1.1089, + "step": 7140 + }, + { + "epoch": 0.051756462319123835, + "grad_norm": 0.1853228211402893, + "learning_rate": 4.948250776346935e-06, + "loss": 1.1202, + "step": 7150 + }, + { + "epoch": 0.05182884897971002, + "grad_norm": 0.21450157463550568, + "learning_rate": 4.948178389686349e-06, + "loss": 1.1093, + "step": 7160 + }, + { + "epoch": 0.05190123564029621, + "grad_norm": 0.1728169471025467, + "learning_rate": 4.9481060030257625e-06, + "loss": 1.0914, + "step": 7170 + }, + { + "epoch": 0.05197362230088239, + "grad_norm": 0.21489717066287994, + "learning_rate": 4.948033616365177e-06, + "loss": 1.0987, + "step": 7180 + }, + { + "epoch": 0.05204600896146858, + "grad_norm": 0.19044430553913116, + "learning_rate": 4.9479612297045906e-06, + "loss": 1.1089, + "step": 7190 + }, + { + "epoch": 0.05211839562205477, + "grad_norm": 0.18932093679904938, + "learning_rate": 4.947888843044004e-06, + "loss": 1.1093, + "step": 7200 + }, + { + "epoch": 0.05219078228264096, + "grad_norm": 0.18193960189819336, + "learning_rate": 4.947816456383418e-06, + "loss": 1.1116, + "step": 7210 + }, + { + "epoch": 0.05226316894322714, + "grad_norm": 0.18055914342403412, + "learning_rate": 4.947744069722832e-06, + "loss": 1.1052, + "step": 7220 + }, + { + "epoch": 0.05233555560381333, + "grad_norm": 0.18837417662143707, + "learning_rate": 4.947671683062246e-06, + "loss": 1.0917, + "step": 7230 + }, + { + "epoch": 0.052407942264399515, + "grad_norm": 0.18194933235645294, + "learning_rate": 4.9475992964016595e-06, + "loss": 1.0992, + "step": 7240 + }, + { + "epoch": 0.052480328924985706, + "grad_norm": 0.2614051103591919, + "learning_rate": 4.947526909741073e-06, + "loss": 1.0997, + "step": 7250 + }, + { + "epoch": 0.05255271558557189, + "grad_norm": 0.1798228770494461, + "learning_rate": 4.947454523080487e-06, + "loss": 1.1035, + "step": 7260 + }, + { + "epoch": 0.05262510224615808, + "grad_norm": 0.1857793927192688, + "learning_rate": 4.947382136419901e-06, + "loss": 1.099, + "step": 7270 + }, + { + "epoch": 0.052697488906744264, + "grad_norm": 0.1768251359462738, + "learning_rate": 4.947309749759315e-06, + "loss": 1.0987, + "step": 7280 + }, + { + "epoch": 0.052769875567330454, + "grad_norm": 0.18754634261131287, + "learning_rate": 4.947237363098728e-06, + "loss": 1.098, + "step": 7290 + }, + { + "epoch": 0.05284226222791664, + "grad_norm": 0.18987387418746948, + "learning_rate": 4.947164976438142e-06, + "loss": 1.1097, + "step": 7300 + }, + { + "epoch": 0.05291464888850283, + "grad_norm": 0.17656712234020233, + "learning_rate": 4.9470925897775565e-06, + "loss": 1.1031, + "step": 7310 + }, + { + "epoch": 0.05298703554908901, + "grad_norm": 0.20419563353061676, + "learning_rate": 4.94702020311697e-06, + "loss": 1.1133, + "step": 7320 + }, + { + "epoch": 0.0530594222096752, + "grad_norm": 0.19128115475177765, + "learning_rate": 4.946947816456384e-06, + "loss": 1.1022, + "step": 7330 + }, + { + "epoch": 0.053131808870261386, + "grad_norm": 0.17749454081058502, + "learning_rate": 4.946875429795797e-06, + "loss": 1.1134, + "step": 7340 + }, + { + "epoch": 0.05320419553084758, + "grad_norm": 0.18553034961223602, + "learning_rate": 4.946803043135212e-06, + "loss": 1.1059, + "step": 7350 + }, + { + "epoch": 0.05327658219143376, + "grad_norm": 0.2216414511203766, + "learning_rate": 4.946730656474625e-06, + "loss": 1.1099, + "step": 7360 + }, + { + "epoch": 0.05334896885201995, + "grad_norm": 0.1996411681175232, + "learning_rate": 4.946658269814039e-06, + "loss": 1.0844, + "step": 7370 + }, + { + "epoch": 0.053421355512606135, + "grad_norm": 0.17310328781604767, + "learning_rate": 4.946585883153453e-06, + "loss": 1.0895, + "step": 7380 + }, + { + "epoch": 0.053493742173192325, + "grad_norm": 0.18325687944889069, + "learning_rate": 4.946513496492867e-06, + "loss": 1.1017, + "step": 7390 + }, + { + "epoch": 0.05356612883377851, + "grad_norm": 0.1925450563430786, + "learning_rate": 4.946441109832281e-06, + "loss": 1.1152, + "step": 7400 + }, + { + "epoch": 0.0536385154943647, + "grad_norm": 0.20546835660934448, + "learning_rate": 4.946368723171694e-06, + "loss": 1.0937, + "step": 7410 + }, + { + "epoch": 0.05371090215495088, + "grad_norm": 0.19936460256576538, + "learning_rate": 4.946296336511108e-06, + "loss": 1.0876, + "step": 7420 + }, + { + "epoch": 0.053783288815537074, + "grad_norm": 0.18377749621868134, + "learning_rate": 4.9462239498505224e-06, + "loss": 1.1009, + "step": 7430 + }, + { + "epoch": 0.05385567547612326, + "grad_norm": 0.18654756247997284, + "learning_rate": 4.946151563189936e-06, + "loss": 1.1017, + "step": 7440 + }, + { + "epoch": 0.05392806213670945, + "grad_norm": 0.17584462463855743, + "learning_rate": 4.94607917652935e-06, + "loss": 1.1111, + "step": 7450 + }, + { + "epoch": 0.05400044879729563, + "grad_norm": 0.1823701709508896, + "learning_rate": 4.946006789868763e-06, + "loss": 1.0983, + "step": 7460 + }, + { + "epoch": 0.05407283545788182, + "grad_norm": 0.17409732937812805, + "learning_rate": 4.945934403208178e-06, + "loss": 1.1101, + "step": 7470 + }, + { + "epoch": 0.054145222118468006, + "grad_norm": 0.19750286638736725, + "learning_rate": 4.945862016547591e-06, + "loss": 1.1043, + "step": 7480 + }, + { + "epoch": 0.0542176087790542, + "grad_norm": 0.18154850602149963, + "learning_rate": 4.945789629887005e-06, + "loss": 1.1078, + "step": 7490 + }, + { + "epoch": 0.05428999543964038, + "grad_norm": 0.17519652843475342, + "learning_rate": 4.945717243226419e-06, + "loss": 1.0995, + "step": 7500 + }, + { + "epoch": 0.05436238210022657, + "grad_norm": 0.18698933720588684, + "learning_rate": 4.945644856565833e-06, + "loss": 1.0991, + "step": 7510 + }, + { + "epoch": 0.054434768760812755, + "grad_norm": 0.1779969483613968, + "learning_rate": 4.945572469905247e-06, + "loss": 1.1029, + "step": 7520 + }, + { + "epoch": 0.054507155421398945, + "grad_norm": 0.1794944554567337, + "learning_rate": 4.94550008324466e-06, + "loss": 1.0927, + "step": 7530 + }, + { + "epoch": 0.05457954208198513, + "grad_norm": 0.1742369830608368, + "learning_rate": 4.945427696584074e-06, + "loss": 1.1085, + "step": 7540 + }, + { + "epoch": 0.05465192874257132, + "grad_norm": 0.17996317148208618, + "learning_rate": 4.9453553099234875e-06, + "loss": 1.1053, + "step": 7550 + }, + { + "epoch": 0.0547243154031575, + "grad_norm": 0.18489129841327667, + "learning_rate": 4.945282923262901e-06, + "loss": 1.0965, + "step": 7560 + }, + { + "epoch": 0.054796702063743694, + "grad_norm": 0.2375306636095047, + "learning_rate": 4.945210536602315e-06, + "loss": 1.1016, + "step": 7570 + }, + { + "epoch": 0.05486908872432988, + "grad_norm": 0.18841832876205444, + "learning_rate": 4.945138149941729e-06, + "loss": 1.1089, + "step": 7580 + }, + { + "epoch": 0.05494147538491607, + "grad_norm": 0.17497693002223969, + "learning_rate": 4.945065763281143e-06, + "loss": 1.0913, + "step": 7590 + }, + { + "epoch": 0.05501386204550225, + "grad_norm": 0.20578007400035858, + "learning_rate": 4.9449933766205564e-06, + "loss": 1.0975, + "step": 7600 + }, + { + "epoch": 0.05508624870608844, + "grad_norm": 0.1815917193889618, + "learning_rate": 4.94492098995997e-06, + "loss": 1.0931, + "step": 7610 + }, + { + "epoch": 0.05515863536667463, + "grad_norm": 0.18187057971954346, + "learning_rate": 4.9448486032993845e-06, + "loss": 1.0993, + "step": 7620 + }, + { + "epoch": 0.055231022027260816, + "grad_norm": 0.1740749329328537, + "learning_rate": 4.944776216638798e-06, + "loss": 1.0941, + "step": 7630 + }, + { + "epoch": 0.05530340868784701, + "grad_norm": 0.17215760052204132, + "learning_rate": 4.944703829978212e-06, + "loss": 1.1036, + "step": 7640 + }, + { + "epoch": 0.05537579534843319, + "grad_norm": 0.21924471855163574, + "learning_rate": 4.944631443317625e-06, + "loss": 1.0897, + "step": 7650 + }, + { + "epoch": 0.05544818200901938, + "grad_norm": 0.1766793131828308, + "learning_rate": 4.94455905665704e-06, + "loss": 1.0977, + "step": 7660 + }, + { + "epoch": 0.055520568669605565, + "grad_norm": 0.20563791692256927, + "learning_rate": 4.9444866699964535e-06, + "loss": 1.0981, + "step": 7670 + }, + { + "epoch": 0.055592955330191755, + "grad_norm": 0.17593294382095337, + "learning_rate": 4.944414283335867e-06, + "loss": 1.0932, + "step": 7680 + }, + { + "epoch": 0.05566534199077794, + "grad_norm": 0.18366055190563202, + "learning_rate": 4.944341896675281e-06, + "loss": 1.1067, + "step": 7690 + }, + { + "epoch": 0.05573772865136413, + "grad_norm": 0.19099287688732147, + "learning_rate": 4.944269510014695e-06, + "loss": 1.0877, + "step": 7700 + }, + { + "epoch": 0.05581011531195031, + "grad_norm": 0.17935815453529358, + "learning_rate": 4.944197123354109e-06, + "loss": 1.0943, + "step": 7710 + }, + { + "epoch": 0.055882501972536504, + "grad_norm": 0.18561848998069763, + "learning_rate": 4.944124736693522e-06, + "loss": 1.1099, + "step": 7720 + }, + { + "epoch": 0.05595488863312269, + "grad_norm": 0.18966755270957947, + "learning_rate": 4.944052350032936e-06, + "loss": 1.0981, + "step": 7730 + }, + { + "epoch": 0.05602727529370888, + "grad_norm": 0.18499694764614105, + "learning_rate": 4.9439799633723505e-06, + "loss": 1.103, + "step": 7740 + }, + { + "epoch": 0.05609966195429506, + "grad_norm": 0.17918939888477325, + "learning_rate": 4.943907576711764e-06, + "loss": 1.1007, + "step": 7750 + }, + { + "epoch": 0.05617204861488125, + "grad_norm": 0.19508133828639984, + "learning_rate": 4.943835190051178e-06, + "loss": 1.1104, + "step": 7760 + }, + { + "epoch": 0.056244435275467436, + "grad_norm": 0.19462929666042328, + "learning_rate": 4.943762803390591e-06, + "loss": 1.1089, + "step": 7770 + }, + { + "epoch": 0.056316821936053627, + "grad_norm": 0.3181241750717163, + "learning_rate": 4.943690416730006e-06, + "loss": 1.1036, + "step": 7780 + }, + { + "epoch": 0.05638920859663981, + "grad_norm": 0.175667867064476, + "learning_rate": 4.943618030069419e-06, + "loss": 1.1137, + "step": 7790 + }, + { + "epoch": 0.056461595257226, + "grad_norm": 0.17442381381988525, + "learning_rate": 4.943545643408833e-06, + "loss": 1.0801, + "step": 7800 + }, + { + "epoch": 0.056533981917812184, + "grad_norm": 0.17181989550590515, + "learning_rate": 4.943473256748247e-06, + "loss": 1.1102, + "step": 7810 + }, + { + "epoch": 0.056606368578398375, + "grad_norm": 0.19522684812545776, + "learning_rate": 4.943400870087661e-06, + "loss": 1.103, + "step": 7820 + }, + { + "epoch": 0.05667875523898456, + "grad_norm": 0.3031037747859955, + "learning_rate": 4.943328483427075e-06, + "loss": 1.1028, + "step": 7830 + }, + { + "epoch": 0.05675114189957075, + "grad_norm": 0.21572640538215637, + "learning_rate": 4.943256096766488e-06, + "loss": 1.0969, + "step": 7840 + }, + { + "epoch": 0.05682352856015693, + "grad_norm": 0.1849983185529709, + "learning_rate": 4.943183710105902e-06, + "loss": 1.0988, + "step": 7850 + }, + { + "epoch": 0.05689591522074312, + "grad_norm": 0.17931009829044342, + "learning_rate": 4.9431113234453155e-06, + "loss": 1.0871, + "step": 7860 + }, + { + "epoch": 0.05696830188132931, + "grad_norm": 0.19952841103076935, + "learning_rate": 4.94303893678473e-06, + "loss": 1.1064, + "step": 7870 + }, + { + "epoch": 0.0570406885419155, + "grad_norm": 0.19907739758491516, + "learning_rate": 4.942966550124144e-06, + "loss": 1.0837, + "step": 7880 + }, + { + "epoch": 0.05711307520250168, + "grad_norm": 0.1972847580909729, + "learning_rate": 4.942894163463557e-06, + "loss": 1.0981, + "step": 7890 + }, + { + "epoch": 0.05718546186308787, + "grad_norm": 0.17437316477298737, + "learning_rate": 4.942821776802971e-06, + "loss": 1.0794, + "step": 7900 + }, + { + "epoch": 0.057257848523674056, + "grad_norm": 0.24157637357711792, + "learning_rate": 4.942749390142385e-06, + "loss": 1.0933, + "step": 7910 + }, + { + "epoch": 0.057330235184260246, + "grad_norm": 0.17602761089801788, + "learning_rate": 4.942677003481799e-06, + "loss": 1.1007, + "step": 7920 + }, + { + "epoch": 0.05740262184484643, + "grad_norm": 0.17508484423160553, + "learning_rate": 4.9426046168212126e-06, + "loss": 1.1054, + "step": 7930 + }, + { + "epoch": 0.05747500850543262, + "grad_norm": 0.18290212750434875, + "learning_rate": 4.942532230160626e-06, + "loss": 1.0949, + "step": 7940 + }, + { + "epoch": 0.057547395166018804, + "grad_norm": 0.17786473035812378, + "learning_rate": 4.942459843500041e-06, + "loss": 1.1041, + "step": 7950 + }, + { + "epoch": 0.057619781826604995, + "grad_norm": 0.19254614412784576, + "learning_rate": 4.942387456839454e-06, + "loss": 1.0975, + "step": 7960 + }, + { + "epoch": 0.05769216848719118, + "grad_norm": 0.16799108684062958, + "learning_rate": 4.942315070178868e-06, + "loss": 1.0947, + "step": 7970 + }, + { + "epoch": 0.05776455514777737, + "grad_norm": 0.21218262612819672, + "learning_rate": 4.9422426835182815e-06, + "loss": 1.0826, + "step": 7980 + }, + { + "epoch": 0.05783694180836355, + "grad_norm": 0.1867387741804123, + "learning_rate": 4.942170296857696e-06, + "loss": 1.0918, + "step": 7990 + }, + { + "epoch": 0.05790932846894974, + "grad_norm": 0.19001053273677826, + "learning_rate": 4.9420979101971096e-06, + "loss": 1.113, + "step": 8000 + }, + { + "epoch": 0.05798171512953593, + "grad_norm": 0.23286646604537964, + "learning_rate": 4.942025523536523e-06, + "loss": 1.0957, + "step": 8010 + }, + { + "epoch": 0.05805410179012212, + "grad_norm": 0.184943288564682, + "learning_rate": 4.941953136875937e-06, + "loss": 1.1023, + "step": 8020 + }, + { + "epoch": 0.0581264884507083, + "grad_norm": 0.20318259298801422, + "learning_rate": 4.941880750215351e-06, + "loss": 1.1059, + "step": 8030 + }, + { + "epoch": 0.05819887511129449, + "grad_norm": 0.17356903851032257, + "learning_rate": 4.941808363554765e-06, + "loss": 1.0779, + "step": 8040 + }, + { + "epoch": 0.058271261771880675, + "grad_norm": 0.19621440768241882, + "learning_rate": 4.9417359768941785e-06, + "loss": 1.0961, + "step": 8050 + }, + { + "epoch": 0.058343648432466866, + "grad_norm": 0.18054349720478058, + "learning_rate": 4.941663590233592e-06, + "loss": 1.0977, + "step": 8060 + }, + { + "epoch": 0.05841603509305305, + "grad_norm": 0.18659920990467072, + "learning_rate": 4.941591203573006e-06, + "loss": 1.0989, + "step": 8070 + }, + { + "epoch": 0.05848842175363924, + "grad_norm": 0.1870632916688919, + "learning_rate": 4.941518816912419e-06, + "loss": 1.1024, + "step": 8080 + }, + { + "epoch": 0.058560808414225424, + "grad_norm": 0.18549518287181854, + "learning_rate": 4.941446430251833e-06, + "loss": 1.0826, + "step": 8090 + }, + { + "epoch": 0.058633195074811614, + "grad_norm": 0.1786888837814331, + "learning_rate": 4.941374043591247e-06, + "loss": 1.0928, + "step": 8100 + }, + { + "epoch": 0.0587055817353978, + "grad_norm": 0.1912764608860016, + "learning_rate": 4.941301656930661e-06, + "loss": 1.0815, + "step": 8110 + }, + { + "epoch": 0.05877796839598399, + "grad_norm": 0.17277614772319794, + "learning_rate": 4.941229270270075e-06, + "loss": 1.0938, + "step": 8120 + }, + { + "epoch": 0.05885035505657017, + "grad_norm": 0.18441365659236908, + "learning_rate": 4.941156883609488e-06, + "loss": 1.1084, + "step": 8130 + }, + { + "epoch": 0.05892274171715636, + "grad_norm": 0.24493847787380219, + "learning_rate": 4.941084496948903e-06, + "loss": 1.0924, + "step": 8140 + }, + { + "epoch": 0.058995128377742546, + "grad_norm": 0.1992846131324768, + "learning_rate": 4.941012110288316e-06, + "loss": 1.0969, + "step": 8150 + }, + { + "epoch": 0.05906751503832874, + "grad_norm": 0.20065009593963623, + "learning_rate": 4.94093972362773e-06, + "loss": 1.1019, + "step": 8160 + }, + { + "epoch": 0.05913990169891492, + "grad_norm": 0.17659001052379608, + "learning_rate": 4.940867336967144e-06, + "loss": 1.0926, + "step": 8170 + }, + { + "epoch": 0.05921228835950111, + "grad_norm": 0.19576965272426605, + "learning_rate": 4.940794950306558e-06, + "loss": 1.1049, + "step": 8180 + }, + { + "epoch": 0.059284675020087295, + "grad_norm": 0.20090840756893158, + "learning_rate": 4.940722563645972e-06, + "loss": 1.1027, + "step": 8190 + }, + { + "epoch": 0.059357061680673485, + "grad_norm": 0.16997766494750977, + "learning_rate": 4.940650176985385e-06, + "loss": 1.0915, + "step": 8200 + }, + { + "epoch": 0.059429448341259676, + "grad_norm": 0.20561368763446808, + "learning_rate": 4.940577790324799e-06, + "loss": 1.0983, + "step": 8210 + }, + { + "epoch": 0.05950183500184586, + "grad_norm": 0.20617088675498962, + "learning_rate": 4.940505403664213e-06, + "loss": 1.0889, + "step": 8220 + }, + { + "epoch": 0.05957422166243205, + "grad_norm": 0.1871093362569809, + "learning_rate": 4.940433017003627e-06, + "loss": 1.0964, + "step": 8230 + }, + { + "epoch": 0.059646608323018234, + "grad_norm": 0.1626369059085846, + "learning_rate": 4.940360630343041e-06, + "loss": 1.0938, + "step": 8240 + }, + { + "epoch": 0.059718994983604425, + "grad_norm": 0.18901404738426208, + "learning_rate": 4.940288243682454e-06, + "loss": 1.0823, + "step": 8250 + }, + { + "epoch": 0.05979138164419061, + "grad_norm": 0.18634545803070068, + "learning_rate": 4.940215857021869e-06, + "loss": 1.0863, + "step": 8260 + }, + { + "epoch": 0.0598637683047768, + "grad_norm": 0.1944178342819214, + "learning_rate": 4.940143470361282e-06, + "loss": 1.0806, + "step": 8270 + }, + { + "epoch": 0.05993615496536298, + "grad_norm": 0.1961442083120346, + "learning_rate": 4.940071083700696e-06, + "loss": 1.0876, + "step": 8280 + }, + { + "epoch": 0.06000854162594917, + "grad_norm": 0.17273077368736267, + "learning_rate": 4.9399986970401095e-06, + "loss": 1.091, + "step": 8290 + }, + { + "epoch": 0.06008092828653536, + "grad_norm": 0.24428923428058624, + "learning_rate": 4.939926310379524e-06, + "loss": 1.0987, + "step": 8300 + }, + { + "epoch": 0.06015331494712155, + "grad_norm": 0.18236348032951355, + "learning_rate": 4.939853923718938e-06, + "loss": 1.0951, + "step": 8310 + }, + { + "epoch": 0.06022570160770773, + "grad_norm": 0.18725799024105072, + "learning_rate": 4.939781537058351e-06, + "loss": 1.0989, + "step": 8320 + }, + { + "epoch": 0.06029808826829392, + "grad_norm": 0.21253715455532074, + "learning_rate": 4.939709150397765e-06, + "loss": 1.1017, + "step": 8330 + }, + { + "epoch": 0.060370474928880105, + "grad_norm": 0.17409272491931915, + "learning_rate": 4.939636763737179e-06, + "loss": 1.0809, + "step": 8340 + }, + { + "epoch": 0.060442861589466296, + "grad_norm": 0.21270409226417542, + "learning_rate": 4.939564377076593e-06, + "loss": 1.084, + "step": 8350 + }, + { + "epoch": 0.06051524825005248, + "grad_norm": 0.17901867628097534, + "learning_rate": 4.9394919904160065e-06, + "loss": 1.0841, + "step": 8360 + }, + { + "epoch": 0.06058763491063867, + "grad_norm": 0.18769490718841553, + "learning_rate": 4.93941960375542e-06, + "loss": 1.0789, + "step": 8370 + }, + { + "epoch": 0.060660021571224854, + "grad_norm": 0.18529854714870453, + "learning_rate": 4.939347217094835e-06, + "loss": 1.0867, + "step": 8380 + }, + { + "epoch": 0.060732408231811044, + "grad_norm": 0.18796664476394653, + "learning_rate": 4.939274830434248e-06, + "loss": 1.0871, + "step": 8390 + }, + { + "epoch": 0.06080479489239723, + "grad_norm": 0.1806957870721817, + "learning_rate": 4.939202443773662e-06, + "loss": 1.1102, + "step": 8400 + }, + { + "epoch": 0.06087718155298342, + "grad_norm": 0.19365540146827698, + "learning_rate": 4.9391300571130754e-06, + "loss": 1.0842, + "step": 8410 + }, + { + "epoch": 0.0609495682135696, + "grad_norm": 0.18744197487831116, + "learning_rate": 4.93905767045249e-06, + "loss": 1.1065, + "step": 8420 + }, + { + "epoch": 0.06102195487415579, + "grad_norm": 0.1846928596496582, + "learning_rate": 4.9389852837919035e-06, + "loss": 1.0979, + "step": 8430 + }, + { + "epoch": 0.061094341534741976, + "grad_norm": 0.17853565514087677, + "learning_rate": 4.938912897131317e-06, + "loss": 1.0965, + "step": 8440 + }, + { + "epoch": 0.06116672819532817, + "grad_norm": 0.18652281165122986, + "learning_rate": 4.938840510470731e-06, + "loss": 1.0835, + "step": 8450 + }, + { + "epoch": 0.06123911485591435, + "grad_norm": 0.2018202245235443, + "learning_rate": 4.938768123810145e-06, + "loss": 1.0853, + "step": 8460 + }, + { + "epoch": 0.06131150151650054, + "grad_norm": 0.17888695001602173, + "learning_rate": 4.938695737149559e-06, + "loss": 1.0994, + "step": 8470 + }, + { + "epoch": 0.061383888177086725, + "grad_norm": 0.18451561033725739, + "learning_rate": 4.9386233504889725e-06, + "loss": 1.0874, + "step": 8480 + }, + { + "epoch": 0.061456274837672915, + "grad_norm": 0.18623459339141846, + "learning_rate": 4.938550963828386e-06, + "loss": 1.108, + "step": 8490 + }, + { + "epoch": 0.0615286614982591, + "grad_norm": 0.1710231751203537, + "learning_rate": 4.9384785771678e-06, + "loss": 1.0908, + "step": 8500 + }, + { + "epoch": 0.06160104815884529, + "grad_norm": 0.2325315922498703, + "learning_rate": 4.938406190507214e-06, + "loss": 1.0932, + "step": 8510 + }, + { + "epoch": 0.06167343481943147, + "grad_norm": 0.17885486781597137, + "learning_rate": 4.938333803846628e-06, + "loss": 1.088, + "step": 8520 + }, + { + "epoch": 0.061745821480017664, + "grad_norm": 0.18864506483078003, + "learning_rate": 4.938261417186041e-06, + "loss": 1.0981, + "step": 8530 + }, + { + "epoch": 0.06181820814060385, + "grad_norm": 0.18470600247383118, + "learning_rate": 4.938189030525455e-06, + "loss": 1.0974, + "step": 8540 + }, + { + "epoch": 0.06189059480119004, + "grad_norm": 0.18703562021255493, + "learning_rate": 4.9381166438648695e-06, + "loss": 1.0987, + "step": 8550 + }, + { + "epoch": 0.06196298146177622, + "grad_norm": 0.18582236766815186, + "learning_rate": 4.938044257204283e-06, + "loss": 1.0925, + "step": 8560 + }, + { + "epoch": 0.06203536812236241, + "grad_norm": 0.2140107899904251, + "learning_rate": 4.937971870543697e-06, + "loss": 1.0907, + "step": 8570 + }, + { + "epoch": 0.062107754782948596, + "grad_norm": 0.19368021190166473, + "learning_rate": 4.93789948388311e-06, + "loss": 1.0877, + "step": 8580 + }, + { + "epoch": 0.06218014144353479, + "grad_norm": 0.17043587565422058, + "learning_rate": 4.937827097222525e-06, + "loss": 1.0851, + "step": 8590 + }, + { + "epoch": 0.06225252810412097, + "grad_norm": 0.18179139494895935, + "learning_rate": 4.9377547105619375e-06, + "loss": 1.095, + "step": 8600 + }, + { + "epoch": 0.06232491476470716, + "grad_norm": 0.19978579878807068, + "learning_rate": 4.937682323901352e-06, + "loss": 1.0989, + "step": 8610 + }, + { + "epoch": 0.062397301425293344, + "grad_norm": 0.1800697147846222, + "learning_rate": 4.937609937240766e-06, + "loss": 1.1074, + "step": 8620 + }, + { + "epoch": 0.062469688085879535, + "grad_norm": 0.176766499876976, + "learning_rate": 4.937537550580179e-06, + "loss": 1.0897, + "step": 8630 + }, + { + "epoch": 0.06254207474646573, + "grad_norm": 0.19286136329174042, + "learning_rate": 4.937465163919593e-06, + "loss": 1.1041, + "step": 8640 + }, + { + "epoch": 0.0626144614070519, + "grad_norm": 0.2166859209537506, + "learning_rate": 4.937392777259007e-06, + "loss": 1.0827, + "step": 8650 + }, + { + "epoch": 0.06268684806763809, + "grad_norm": 0.17568807303905487, + "learning_rate": 4.937320390598421e-06, + "loss": 1.1016, + "step": 8660 + }, + { + "epoch": 0.06275923472822428, + "grad_norm": 0.19446556270122528, + "learning_rate": 4.9372480039378346e-06, + "loss": 1.0951, + "step": 8670 + }, + { + "epoch": 0.06283162138881047, + "grad_norm": 0.1830175518989563, + "learning_rate": 4.937175617277248e-06, + "loss": 1.0891, + "step": 8680 + }, + { + "epoch": 0.06290400804939665, + "grad_norm": 0.170004740357399, + "learning_rate": 4.937103230616662e-06, + "loss": 1.0872, + "step": 8690 + }, + { + "epoch": 0.06297639470998284, + "grad_norm": 0.1958620399236679, + "learning_rate": 4.937030843956076e-06, + "loss": 1.0987, + "step": 8700 + }, + { + "epoch": 0.06304878137056903, + "grad_norm": 0.19869394600391388, + "learning_rate": 4.93695845729549e-06, + "loss": 1.0727, + "step": 8710 + }, + { + "epoch": 0.06312116803115522, + "grad_norm": 0.17475168406963348, + "learning_rate": 4.9368860706349035e-06, + "loss": 1.0991, + "step": 8720 + }, + { + "epoch": 0.0631935546917414, + "grad_norm": 0.19080884754657745, + "learning_rate": 4.936813683974317e-06, + "loss": 1.0988, + "step": 8730 + }, + { + "epoch": 0.06326594135232759, + "grad_norm": 0.17002183198928833, + "learning_rate": 4.9367412973137316e-06, + "loss": 1.0859, + "step": 8740 + }, + { + "epoch": 0.06333832801291378, + "grad_norm": 0.19736173748970032, + "learning_rate": 4.936668910653145e-06, + "loss": 1.0757, + "step": 8750 + }, + { + "epoch": 0.06341071467349997, + "grad_norm": 0.20306764543056488, + "learning_rate": 4.936596523992559e-06, + "loss": 1.091, + "step": 8760 + }, + { + "epoch": 0.06348310133408615, + "grad_norm": 0.16739881038665771, + "learning_rate": 4.936524137331972e-06, + "loss": 1.0796, + "step": 8770 + }, + { + "epoch": 0.06355548799467234, + "grad_norm": 0.18593810498714447, + "learning_rate": 4.936451750671387e-06, + "loss": 1.0916, + "step": 8780 + }, + { + "epoch": 0.06362787465525853, + "grad_norm": 0.17287929356098175, + "learning_rate": 4.9363793640108005e-06, + "loss": 1.0747, + "step": 8790 + }, + { + "epoch": 0.06370026131584472, + "grad_norm": 0.19802750647068024, + "learning_rate": 4.936306977350214e-06, + "loss": 1.1005, + "step": 8800 + }, + { + "epoch": 0.06377264797643091, + "grad_norm": 0.17878587543964386, + "learning_rate": 4.936234590689628e-06, + "loss": 1.075, + "step": 8810 + }, + { + "epoch": 0.06384503463701709, + "grad_norm": 0.1824096143245697, + "learning_rate": 4.936162204029042e-06, + "loss": 1.0706, + "step": 8820 + }, + { + "epoch": 0.06391742129760328, + "grad_norm": 0.20461733639240265, + "learning_rate": 4.936089817368456e-06, + "loss": 1.0816, + "step": 8830 + }, + { + "epoch": 0.06398980795818947, + "grad_norm": 0.17579731345176697, + "learning_rate": 4.936017430707869e-06, + "loss": 1.0805, + "step": 8840 + }, + { + "epoch": 0.06406219461877566, + "grad_norm": 0.18281228840351105, + "learning_rate": 4.935945044047283e-06, + "loss": 1.0857, + "step": 8850 + }, + { + "epoch": 0.06413458127936184, + "grad_norm": 0.21296283602714539, + "learning_rate": 4.9358726573866975e-06, + "loss": 1.098, + "step": 8860 + }, + { + "epoch": 0.06420696793994803, + "grad_norm": 0.28200826048851013, + "learning_rate": 4.935800270726111e-06, + "loss": 1.0917, + "step": 8870 + }, + { + "epoch": 0.06427935460053422, + "grad_norm": 0.1753077208995819, + "learning_rate": 4.935727884065525e-06, + "loss": 1.0908, + "step": 8880 + }, + { + "epoch": 0.06435174126112041, + "grad_norm": 0.17760993540287018, + "learning_rate": 4.935655497404938e-06, + "loss": 1.0828, + "step": 8890 + }, + { + "epoch": 0.06442412792170658, + "grad_norm": 0.1909879595041275, + "learning_rate": 4.935583110744353e-06, + "loss": 1.0867, + "step": 8900 + }, + { + "epoch": 0.06449651458229277, + "grad_norm": 0.19046929478645325, + "learning_rate": 4.9355107240837664e-06, + "loss": 1.0913, + "step": 8910 + }, + { + "epoch": 0.06456890124287896, + "grad_norm": 0.19228294491767883, + "learning_rate": 4.93543833742318e-06, + "loss": 1.0848, + "step": 8920 + }, + { + "epoch": 0.06464128790346516, + "grad_norm": 0.1879161149263382, + "learning_rate": 4.935365950762594e-06, + "loss": 1.0996, + "step": 8930 + }, + { + "epoch": 0.06471367456405133, + "grad_norm": 0.18242213129997253, + "learning_rate": 4.935293564102008e-06, + "loss": 1.0873, + "step": 8940 + }, + { + "epoch": 0.06478606122463752, + "grad_norm": 0.19306018948554993, + "learning_rate": 4.935221177441422e-06, + "loss": 1.092, + "step": 8950 + }, + { + "epoch": 0.06485844788522371, + "grad_norm": 0.18336234986782074, + "learning_rate": 4.935148790780835e-06, + "loss": 1.1006, + "step": 8960 + }, + { + "epoch": 0.0649308345458099, + "grad_norm": 0.1992163509130478, + "learning_rate": 4.935076404120249e-06, + "loss": 1.0969, + "step": 8970 + }, + { + "epoch": 0.06500322120639608, + "grad_norm": 0.187247171998024, + "learning_rate": 4.9350040174596634e-06, + "loss": 1.0934, + "step": 8980 + }, + { + "epoch": 0.06507560786698227, + "grad_norm": 0.35581186413764954, + "learning_rate": 4.934931630799077e-06, + "loss": 1.0897, + "step": 8990 + }, + { + "epoch": 0.06514799452756846, + "grad_norm": 0.18312859535217285, + "learning_rate": 4.934859244138491e-06, + "loss": 1.0841, + "step": 9000 + }, + { + "epoch": 0.06522038118815465, + "grad_norm": 0.18880075216293335, + "learning_rate": 4.934786857477904e-06, + "loss": 1.0709, + "step": 9010 + }, + { + "epoch": 0.06529276784874083, + "grad_norm": 0.1736619919538498, + "learning_rate": 4.934714470817319e-06, + "loss": 1.0891, + "step": 9020 + }, + { + "epoch": 0.06536515450932702, + "grad_norm": 0.1821936070919037, + "learning_rate": 4.934642084156732e-06, + "loss": 1.0867, + "step": 9030 + }, + { + "epoch": 0.06543754116991321, + "grad_norm": 0.16732348501682281, + "learning_rate": 4.934569697496146e-06, + "loss": 1.0913, + "step": 9040 + }, + { + "epoch": 0.0655099278304994, + "grad_norm": 0.17859619855880737, + "learning_rate": 4.93449731083556e-06, + "loss": 1.0942, + "step": 9050 + }, + { + "epoch": 0.06558231449108558, + "grad_norm": 0.1915702074766159, + "learning_rate": 4.934424924174974e-06, + "loss": 1.0917, + "step": 9060 + }, + { + "epoch": 0.06565470115167177, + "grad_norm": 0.17785701155662537, + "learning_rate": 4.934352537514388e-06, + "loss": 1.0749, + "step": 9070 + }, + { + "epoch": 0.06572708781225796, + "grad_norm": 0.17183399200439453, + "learning_rate": 4.934280150853801e-06, + "loss": 1.0896, + "step": 9080 + }, + { + "epoch": 0.06579947447284415, + "grad_norm": 0.16996833682060242, + "learning_rate": 4.934207764193215e-06, + "loss": 1.0842, + "step": 9090 + }, + { + "epoch": 0.06587186113343033, + "grad_norm": 0.17851723730564117, + "learning_rate": 4.934135377532629e-06, + "loss": 1.0824, + "step": 9100 + }, + { + "epoch": 0.06594424779401652, + "grad_norm": 0.18523283302783966, + "learning_rate": 4.934062990872043e-06, + "loss": 1.081, + "step": 9110 + }, + { + "epoch": 0.06601663445460271, + "grad_norm": 0.1726391464471817, + "learning_rate": 4.933990604211457e-06, + "loss": 1.0866, + "step": 9120 + }, + { + "epoch": 0.0660890211151889, + "grad_norm": 0.18292652070522308, + "learning_rate": 4.93391821755087e-06, + "loss": 1.0933, + "step": 9130 + }, + { + "epoch": 0.06616140777577507, + "grad_norm": 0.18601487576961517, + "learning_rate": 4.933845830890284e-06, + "loss": 1.0834, + "step": 9140 + }, + { + "epoch": 0.06623379443636127, + "grad_norm": 0.16572198271751404, + "learning_rate": 4.9337734442296974e-06, + "loss": 1.0698, + "step": 9150 + }, + { + "epoch": 0.06630618109694746, + "grad_norm": 0.18648765981197357, + "learning_rate": 4.933701057569111e-06, + "loss": 1.0965, + "step": 9160 + }, + { + "epoch": 0.06637856775753365, + "grad_norm": 0.1860000491142273, + "learning_rate": 4.9336286709085255e-06, + "loss": 1.0782, + "step": 9170 + }, + { + "epoch": 0.06645095441811982, + "grad_norm": 0.2106804996728897, + "learning_rate": 4.933556284247939e-06, + "loss": 1.0899, + "step": 9180 + }, + { + "epoch": 0.06652334107870601, + "grad_norm": 0.16858477890491486, + "learning_rate": 4.933483897587353e-06, + "loss": 1.0902, + "step": 9190 + }, + { + "epoch": 0.0665957277392922, + "grad_norm": 0.17290134727954865, + "learning_rate": 4.933411510926766e-06, + "loss": 1.0991, + "step": 9200 + }, + { + "epoch": 0.0666681143998784, + "grad_norm": 0.18629100918769836, + "learning_rate": 4.933339124266181e-06, + "loss": 1.0849, + "step": 9210 + }, + { + "epoch": 0.06674050106046457, + "grad_norm": 0.18276071548461914, + "learning_rate": 4.9332667376055945e-06, + "loss": 1.08, + "step": 9220 + }, + { + "epoch": 0.06681288772105076, + "grad_norm": 0.2082335501909256, + "learning_rate": 4.933194350945008e-06, + "loss": 1.0747, + "step": 9230 + }, + { + "epoch": 0.06688527438163695, + "grad_norm": 0.20962758362293243, + "learning_rate": 4.933121964284422e-06, + "loss": 1.0784, + "step": 9240 + }, + { + "epoch": 0.06695766104222314, + "grad_norm": 0.20550638437271118, + "learning_rate": 4.933049577623836e-06, + "loss": 1.0856, + "step": 9250 + }, + { + "epoch": 0.06703004770280932, + "grad_norm": 0.17833620309829712, + "learning_rate": 4.93297719096325e-06, + "loss": 1.0797, + "step": 9260 + }, + { + "epoch": 0.06710243436339551, + "grad_norm": 0.17725925147533417, + "learning_rate": 4.932904804302663e-06, + "loss": 1.0872, + "step": 9270 + }, + { + "epoch": 0.0671748210239817, + "grad_norm": 0.1763269156217575, + "learning_rate": 4.932832417642077e-06, + "loss": 1.0956, + "step": 9280 + }, + { + "epoch": 0.06724720768456789, + "grad_norm": 0.17126107215881348, + "learning_rate": 4.932760030981491e-06, + "loss": 1.0727, + "step": 9290 + }, + { + "epoch": 0.06731959434515407, + "grad_norm": 0.16544747352600098, + "learning_rate": 4.932687644320905e-06, + "loss": 1.0782, + "step": 9300 + }, + { + "epoch": 0.06739198100574026, + "grad_norm": 0.175096794962883, + "learning_rate": 4.932615257660319e-06, + "loss": 1.0884, + "step": 9310 + }, + { + "epoch": 0.06746436766632645, + "grad_norm": 0.1752748042345047, + "learning_rate": 4.932542870999732e-06, + "loss": 1.0963, + "step": 9320 + }, + { + "epoch": 0.06753675432691264, + "grad_norm": 0.17110379040241241, + "learning_rate": 4.932470484339146e-06, + "loss": 1.1028, + "step": 9330 + }, + { + "epoch": 0.06760914098749882, + "grad_norm": 0.20907478034496307, + "learning_rate": 4.93239809767856e-06, + "loss": 1.0764, + "step": 9340 + }, + { + "epoch": 0.06768152764808501, + "grad_norm": 0.19506807625293732, + "learning_rate": 4.932325711017974e-06, + "loss": 1.065, + "step": 9350 + }, + { + "epoch": 0.0677539143086712, + "grad_norm": 0.1756713092327118, + "learning_rate": 4.932253324357388e-06, + "loss": 1.1012, + "step": 9360 + }, + { + "epoch": 0.06782630096925739, + "grad_norm": 0.18884608149528503, + "learning_rate": 4.932180937696801e-06, + "loss": 1.0739, + "step": 9370 + }, + { + "epoch": 0.06789868762984358, + "grad_norm": 0.19393788278102875, + "learning_rate": 4.932108551036216e-06, + "loss": 1.0933, + "step": 9380 + }, + { + "epoch": 0.06797107429042976, + "grad_norm": 0.18844221532344818, + "learning_rate": 4.932036164375629e-06, + "loss": 1.0796, + "step": 9390 + }, + { + "epoch": 0.06804346095101595, + "grad_norm": 0.17534899711608887, + "learning_rate": 4.931963777715043e-06, + "loss": 1.0807, + "step": 9400 + }, + { + "epoch": 0.06811584761160214, + "grad_norm": 0.1886800080537796, + "learning_rate": 4.9318913910544566e-06, + "loss": 1.0834, + "step": 9410 + }, + { + "epoch": 0.06818823427218833, + "grad_norm": 0.20587556064128876, + "learning_rate": 4.931819004393871e-06, + "loss": 1.1054, + "step": 9420 + }, + { + "epoch": 0.0682606209327745, + "grad_norm": 0.18019132316112518, + "learning_rate": 4.931746617733285e-06, + "loss": 1.076, + "step": 9430 + }, + { + "epoch": 0.0683330075933607, + "grad_norm": 0.18654106557369232, + "learning_rate": 4.931674231072698e-06, + "loss": 1.0768, + "step": 9440 + }, + { + "epoch": 0.06840539425394689, + "grad_norm": 0.1790568232536316, + "learning_rate": 4.931601844412112e-06, + "loss": 1.0751, + "step": 9450 + }, + { + "epoch": 0.06847778091453308, + "grad_norm": 0.17355284094810486, + "learning_rate": 4.931529457751526e-06, + "loss": 1.0755, + "step": 9460 + }, + { + "epoch": 0.06855016757511925, + "grad_norm": 0.18090367317199707, + "learning_rate": 4.93145707109094e-06, + "loss": 1.0927, + "step": 9470 + }, + { + "epoch": 0.06862255423570544, + "grad_norm": 0.18429531157016754, + "learning_rate": 4.9313846844303536e-06, + "loss": 1.0857, + "step": 9480 + }, + { + "epoch": 0.06869494089629163, + "grad_norm": 0.16996966302394867, + "learning_rate": 4.931312297769767e-06, + "loss": 1.0955, + "step": 9490 + }, + { + "epoch": 0.06876732755687782, + "grad_norm": 0.17073926329612732, + "learning_rate": 4.931239911109182e-06, + "loss": 1.0934, + "step": 9500 + }, + { + "epoch": 0.068839714217464, + "grad_norm": 0.19295451045036316, + "learning_rate": 4.931167524448595e-06, + "loss": 1.0843, + "step": 9510 + }, + { + "epoch": 0.06891210087805019, + "grad_norm": 0.19370391964912415, + "learning_rate": 4.931095137788009e-06, + "loss": 1.0939, + "step": 9520 + }, + { + "epoch": 0.06898448753863638, + "grad_norm": 0.18708017468452454, + "learning_rate": 4.9310227511274225e-06, + "loss": 1.0761, + "step": 9530 + }, + { + "epoch": 0.06905687419922257, + "grad_norm": 0.16977204382419586, + "learning_rate": 4.930950364466837e-06, + "loss": 1.0833, + "step": 9540 + }, + { + "epoch": 0.06912926085980875, + "grad_norm": 0.1928476095199585, + "learning_rate": 4.9308779778062506e-06, + "loss": 1.0773, + "step": 9550 + }, + { + "epoch": 0.06920164752039494, + "grad_norm": 0.19466859102249146, + "learning_rate": 4.930805591145664e-06, + "loss": 1.07, + "step": 9560 + }, + { + "epoch": 0.06927403418098113, + "grad_norm": 0.18574132025241852, + "learning_rate": 4.930733204485078e-06, + "loss": 1.0887, + "step": 9570 + }, + { + "epoch": 0.06934642084156732, + "grad_norm": 0.17645835876464844, + "learning_rate": 4.930660817824492e-06, + "loss": 1.0841, + "step": 9580 + }, + { + "epoch": 0.0694188075021535, + "grad_norm": 0.17621047794818878, + "learning_rate": 4.930588431163906e-06, + "loss": 1.0909, + "step": 9590 + }, + { + "epoch": 0.06949119416273969, + "grad_norm": 0.1808473765850067, + "learning_rate": 4.9305160445033195e-06, + "loss": 1.0885, + "step": 9600 + }, + { + "epoch": 0.06956358082332588, + "grad_norm": 0.1906941682100296, + "learning_rate": 4.930443657842733e-06, + "loss": 1.0699, + "step": 9610 + }, + { + "epoch": 0.06963596748391207, + "grad_norm": 0.18440672755241394, + "learning_rate": 4.9303712711821476e-06, + "loss": 1.0753, + "step": 9620 + }, + { + "epoch": 0.06970835414449825, + "grad_norm": 0.31207403540611267, + "learning_rate": 4.930298884521561e-06, + "loss": 1.0821, + "step": 9630 + }, + { + "epoch": 0.06978074080508444, + "grad_norm": 0.178600013256073, + "learning_rate": 4.930226497860975e-06, + "loss": 1.0827, + "step": 9640 + }, + { + "epoch": 0.06985312746567063, + "grad_norm": 0.1899259239435196, + "learning_rate": 4.930154111200388e-06, + "loss": 1.0673, + "step": 9650 + }, + { + "epoch": 0.06992551412625682, + "grad_norm": 0.1868361085653305, + "learning_rate": 4.930081724539802e-06, + "loss": 1.0838, + "step": 9660 + }, + { + "epoch": 0.069997900786843, + "grad_norm": 0.19484713673591614, + "learning_rate": 4.930009337879216e-06, + "loss": 1.0806, + "step": 9670 + }, + { + "epoch": 0.07007028744742919, + "grad_norm": 0.1849289834499359, + "learning_rate": 4.929936951218629e-06, + "loss": 1.0847, + "step": 9680 + }, + { + "epoch": 0.07014267410801538, + "grad_norm": 0.1799035519361496, + "learning_rate": 4.929864564558044e-06, + "loss": 1.0791, + "step": 9690 + }, + { + "epoch": 0.07021506076860157, + "grad_norm": 0.19453303515911102, + "learning_rate": 4.929792177897457e-06, + "loss": 1.0923, + "step": 9700 + }, + { + "epoch": 0.07028744742918774, + "grad_norm": 0.20945830643177032, + "learning_rate": 4.929719791236871e-06, + "loss": 1.1022, + "step": 9710 + }, + { + "epoch": 0.07035983408977393, + "grad_norm": 0.1825156956911087, + "learning_rate": 4.929647404576285e-06, + "loss": 1.0819, + "step": 9720 + }, + { + "epoch": 0.07043222075036012, + "grad_norm": 0.18250644207000732, + "learning_rate": 4.929575017915699e-06, + "loss": 1.0681, + "step": 9730 + }, + { + "epoch": 0.07050460741094632, + "grad_norm": 0.20941105484962463, + "learning_rate": 4.929502631255113e-06, + "loss": 1.0651, + "step": 9740 + }, + { + "epoch": 0.07057699407153249, + "grad_norm": 0.1895821988582611, + "learning_rate": 4.929430244594526e-06, + "loss": 1.0919, + "step": 9750 + }, + { + "epoch": 0.07064938073211868, + "grad_norm": 0.16658397018909454, + "learning_rate": 4.92935785793394e-06, + "loss": 1.0787, + "step": 9760 + }, + { + "epoch": 0.07072176739270487, + "grad_norm": 0.19257685542106628, + "learning_rate": 4.929285471273354e-06, + "loss": 1.0788, + "step": 9770 + }, + { + "epoch": 0.07079415405329106, + "grad_norm": 0.17767976224422455, + "learning_rate": 4.929213084612768e-06, + "loss": 1.0762, + "step": 9780 + }, + { + "epoch": 0.07086654071387724, + "grad_norm": 0.2234843373298645, + "learning_rate": 4.929140697952182e-06, + "loss": 1.0751, + "step": 9790 + }, + { + "epoch": 0.07093892737446343, + "grad_norm": 0.2335948348045349, + "learning_rate": 4.929068311291595e-06, + "loss": 1.0819, + "step": 9800 + }, + { + "epoch": 0.07101131403504962, + "grad_norm": 0.1768556535243988, + "learning_rate": 4.92899592463101e-06, + "loss": 1.0837, + "step": 9810 + }, + { + "epoch": 0.07108370069563581, + "grad_norm": 0.17308947443962097, + "learning_rate": 4.928923537970423e-06, + "loss": 1.074, + "step": 9820 + }, + { + "epoch": 0.07115608735622199, + "grad_norm": 0.18633995950222015, + "learning_rate": 4.928851151309837e-06, + "loss": 1.0755, + "step": 9830 + }, + { + "epoch": 0.07122847401680818, + "grad_norm": 0.19569677114486694, + "learning_rate": 4.9287787646492505e-06, + "loss": 1.0815, + "step": 9840 + }, + { + "epoch": 0.07130086067739437, + "grad_norm": 0.19831666350364685, + "learning_rate": 4.928706377988665e-06, + "loss": 1.084, + "step": 9850 + }, + { + "epoch": 0.07137324733798056, + "grad_norm": 0.18258638679981232, + "learning_rate": 4.928633991328079e-06, + "loss": 1.0666, + "step": 9860 + }, + { + "epoch": 0.07144563399856674, + "grad_norm": 0.16318519413471222, + "learning_rate": 4.928561604667492e-06, + "loss": 1.0754, + "step": 9870 + }, + { + "epoch": 0.07151802065915293, + "grad_norm": 0.17054226994514465, + "learning_rate": 4.928489218006906e-06, + "loss": 1.0815, + "step": 9880 + }, + { + "epoch": 0.07159040731973912, + "grad_norm": 0.1713060736656189, + "learning_rate": 4.92841683134632e-06, + "loss": 1.0822, + "step": 9890 + }, + { + "epoch": 0.07166279398032531, + "grad_norm": 0.17956681549549103, + "learning_rate": 4.928344444685734e-06, + "loss": 1.0747, + "step": 9900 + }, + { + "epoch": 0.07173518064091149, + "grad_norm": 0.19176030158996582, + "learning_rate": 4.9282720580251475e-06, + "loss": 1.074, + "step": 9910 + }, + { + "epoch": 0.07180756730149768, + "grad_norm": 0.17614677548408508, + "learning_rate": 4.928199671364561e-06, + "loss": 1.0894, + "step": 9920 + }, + { + "epoch": 0.07187995396208387, + "grad_norm": 0.1864258348941803, + "learning_rate": 4.928127284703975e-06, + "loss": 1.0826, + "step": 9930 + }, + { + "epoch": 0.07195234062267006, + "grad_norm": 0.18494221568107605, + "learning_rate": 4.928054898043389e-06, + "loss": 1.0613, + "step": 9940 + }, + { + "epoch": 0.07202472728325623, + "grad_norm": 0.17159555852413177, + "learning_rate": 4.927982511382803e-06, + "loss": 1.073, + "step": 9950 + }, + { + "epoch": 0.07209711394384243, + "grad_norm": 0.18387450277805328, + "learning_rate": 4.9279101247222165e-06, + "loss": 1.0729, + "step": 9960 + }, + { + "epoch": 0.07216950060442862, + "grad_norm": 0.1786179393529892, + "learning_rate": 4.92783773806163e-06, + "loss": 1.0867, + "step": 9970 + }, + { + "epoch": 0.0722418872650148, + "grad_norm": 0.1791893094778061, + "learning_rate": 4.9277653514010445e-06, + "loss": 1.0783, + "step": 9980 + }, + { + "epoch": 0.072314273925601, + "grad_norm": 0.18308618664741516, + "learning_rate": 4.927692964740458e-06, + "loss": 1.0667, + "step": 9990 + }, + { + "epoch": 0.07238666058618717, + "grad_norm": 0.18215444684028625, + "learning_rate": 4.927620578079872e-06, + "loss": 1.0804, + "step": 10000 + }, + { + "epoch": 0.07245904724677336, + "grad_norm": 0.1691853404045105, + "learning_rate": 4.927548191419285e-06, + "loss": 1.0837, + "step": 10010 + }, + { + "epoch": 0.07253143390735955, + "grad_norm": 0.1819877028465271, + "learning_rate": 4.9274758047587e-06, + "loss": 1.0799, + "step": 10020 + }, + { + "epoch": 0.07260382056794575, + "grad_norm": 0.17936040461063385, + "learning_rate": 4.9274034180981135e-06, + "loss": 1.0675, + "step": 10030 + }, + { + "epoch": 0.07267620722853192, + "grad_norm": 0.17777326703071594, + "learning_rate": 4.927331031437527e-06, + "loss": 1.0674, + "step": 10040 + }, + { + "epoch": 0.07274859388911811, + "grad_norm": 0.22706569731235504, + "learning_rate": 4.927258644776941e-06, + "loss": 1.0901, + "step": 10050 + }, + { + "epoch": 0.0728209805497043, + "grad_norm": 0.17453064024448395, + "learning_rate": 4.927186258116355e-06, + "loss": 1.0665, + "step": 10060 + }, + { + "epoch": 0.0728933672102905, + "grad_norm": 0.1731829047203064, + "learning_rate": 4.927113871455769e-06, + "loss": 1.0734, + "step": 10070 + }, + { + "epoch": 0.07296575387087667, + "grad_norm": 0.1648159921169281, + "learning_rate": 4.927041484795182e-06, + "loss": 1.0762, + "step": 10080 + }, + { + "epoch": 0.07303814053146286, + "grad_norm": 0.19320763647556305, + "learning_rate": 4.926969098134596e-06, + "loss": 1.0672, + "step": 10090 + }, + { + "epoch": 0.07311052719204905, + "grad_norm": 0.17869828641414642, + "learning_rate": 4.9268967114740105e-06, + "loss": 1.0748, + "step": 10100 + }, + { + "epoch": 0.07318291385263524, + "grad_norm": 0.17317935824394226, + "learning_rate": 4.926824324813424e-06, + "loss": 1.0725, + "step": 10110 + }, + { + "epoch": 0.07325530051322142, + "grad_norm": 0.18840764462947845, + "learning_rate": 4.926751938152838e-06, + "loss": 1.0902, + "step": 10120 + }, + { + "epoch": 0.07332768717380761, + "grad_norm": 0.18961584568023682, + "learning_rate": 4.926679551492251e-06, + "loss": 1.0677, + "step": 10130 + }, + { + "epoch": 0.0734000738343938, + "grad_norm": 0.1739116609096527, + "learning_rate": 4.926607164831666e-06, + "loss": 1.0744, + "step": 10140 + }, + { + "epoch": 0.07347246049497999, + "grad_norm": 0.19770686328411102, + "learning_rate": 4.926534778171079e-06, + "loss": 1.0968, + "step": 10150 + }, + { + "epoch": 0.07354484715556617, + "grad_norm": 0.17552542686462402, + "learning_rate": 4.926462391510493e-06, + "loss": 1.0851, + "step": 10160 + }, + { + "epoch": 0.07361723381615236, + "grad_norm": 0.18830542266368866, + "learning_rate": 4.926390004849907e-06, + "loss": 1.0855, + "step": 10170 + }, + { + "epoch": 0.07368962047673855, + "grad_norm": 0.2055501937866211, + "learning_rate": 4.926317618189321e-06, + "loss": 1.0803, + "step": 10180 + }, + { + "epoch": 0.07376200713732474, + "grad_norm": 0.18813519179821014, + "learning_rate": 4.926245231528734e-06, + "loss": 1.071, + "step": 10190 + }, + { + "epoch": 0.07383439379791092, + "grad_norm": 0.2169976532459259, + "learning_rate": 4.9261728448681475e-06, + "loss": 1.0872, + "step": 10200 + }, + { + "epoch": 0.0739067804584971, + "grad_norm": 0.2091887891292572, + "learning_rate": 4.926100458207562e-06, + "loss": 1.0811, + "step": 10210 + }, + { + "epoch": 0.0739791671190833, + "grad_norm": 0.17298966646194458, + "learning_rate": 4.9260280715469756e-06, + "loss": 1.0846, + "step": 10220 + }, + { + "epoch": 0.07405155377966949, + "grad_norm": 0.19214802980422974, + "learning_rate": 4.925955684886389e-06, + "loss": 1.0804, + "step": 10230 + }, + { + "epoch": 0.07412394044025566, + "grad_norm": 0.18159951269626617, + "learning_rate": 4.925883298225803e-06, + "loss": 1.0818, + "step": 10240 + }, + { + "epoch": 0.07419632710084186, + "grad_norm": 0.2073182910680771, + "learning_rate": 4.925810911565217e-06, + "loss": 1.0717, + "step": 10250 + }, + { + "epoch": 0.07426871376142805, + "grad_norm": 0.17683623731136322, + "learning_rate": 4.925738524904631e-06, + "loss": 1.0691, + "step": 10260 + }, + { + "epoch": 0.07434110042201424, + "grad_norm": 0.17853355407714844, + "learning_rate": 4.9256661382440445e-06, + "loss": 1.0807, + "step": 10270 + }, + { + "epoch": 0.07441348708260041, + "grad_norm": 0.1861315667629242, + "learning_rate": 4.925593751583458e-06, + "loss": 1.075, + "step": 10280 + }, + { + "epoch": 0.0744858737431866, + "grad_norm": 0.22674742341041565, + "learning_rate": 4.9255213649228726e-06, + "loss": 1.0824, + "step": 10290 + }, + { + "epoch": 0.0745582604037728, + "grad_norm": 0.1941600888967514, + "learning_rate": 4.925448978262286e-06, + "loss": 1.0663, + "step": 10300 + }, + { + "epoch": 0.07463064706435898, + "grad_norm": 0.1759897917509079, + "learning_rate": 4.9253765916017e-06, + "loss": 1.0761, + "step": 10310 + }, + { + "epoch": 0.07470303372494516, + "grad_norm": 0.1744421124458313, + "learning_rate": 4.925304204941113e-06, + "loss": 1.0705, + "step": 10320 + }, + { + "epoch": 0.07477542038553135, + "grad_norm": 0.1926373541355133, + "learning_rate": 4.925231818280528e-06, + "loss": 1.0714, + "step": 10330 + }, + { + "epoch": 0.07484780704611754, + "grad_norm": 0.19619183242321014, + "learning_rate": 4.9251594316199415e-06, + "loss": 1.0728, + "step": 10340 + }, + { + "epoch": 0.07492019370670373, + "grad_norm": 0.23807357251644135, + "learning_rate": 4.925087044959355e-06, + "loss": 1.0772, + "step": 10350 + }, + { + "epoch": 0.07499258036728991, + "grad_norm": 0.1988050788640976, + "learning_rate": 4.925014658298769e-06, + "loss": 1.0829, + "step": 10360 + }, + { + "epoch": 0.0750649670278761, + "grad_norm": 0.16706973314285278, + "learning_rate": 4.924942271638183e-06, + "loss": 1.0642, + "step": 10370 + }, + { + "epoch": 0.07513735368846229, + "grad_norm": 0.19122160971164703, + "learning_rate": 4.924869884977597e-06, + "loss": 1.0973, + "step": 10380 + }, + { + "epoch": 0.07520974034904848, + "grad_norm": 0.1846979856491089, + "learning_rate": 4.92479749831701e-06, + "loss": 1.0775, + "step": 10390 + }, + { + "epoch": 0.07528212700963466, + "grad_norm": 0.17939844727516174, + "learning_rate": 4.924725111656424e-06, + "loss": 1.0644, + "step": 10400 + }, + { + "epoch": 0.07535451367022085, + "grad_norm": 0.17780403792858124, + "learning_rate": 4.9246527249958385e-06, + "loss": 1.0655, + "step": 10410 + }, + { + "epoch": 0.07542690033080704, + "grad_norm": 0.2274876832962036, + "learning_rate": 4.924580338335252e-06, + "loss": 1.0866, + "step": 10420 + }, + { + "epoch": 0.07549928699139323, + "grad_norm": 0.23748233914375305, + "learning_rate": 4.924507951674666e-06, + "loss": 1.0637, + "step": 10430 + }, + { + "epoch": 0.0755716736519794, + "grad_norm": 0.22824735939502716, + "learning_rate": 4.924435565014079e-06, + "loss": 1.0908, + "step": 10440 + }, + { + "epoch": 0.0756440603125656, + "grad_norm": 0.1775079220533371, + "learning_rate": 4.924363178353494e-06, + "loss": 1.0736, + "step": 10450 + }, + { + "epoch": 0.07571644697315179, + "grad_norm": 0.1669524759054184, + "learning_rate": 4.9242907916929074e-06, + "loss": 1.0772, + "step": 10460 + }, + { + "epoch": 0.07578883363373798, + "grad_norm": 0.19081354141235352, + "learning_rate": 4.924218405032321e-06, + "loss": 1.0668, + "step": 10470 + }, + { + "epoch": 0.07586122029432416, + "grad_norm": 0.18212305009365082, + "learning_rate": 4.924146018371735e-06, + "loss": 1.0824, + "step": 10480 + }, + { + "epoch": 0.07593360695491035, + "grad_norm": 0.19392240047454834, + "learning_rate": 4.924073631711149e-06, + "loss": 1.086, + "step": 10490 + }, + { + "epoch": 0.07600599361549654, + "grad_norm": 0.17266540229320526, + "learning_rate": 4.924001245050563e-06, + "loss": 1.0713, + "step": 10500 + }, + { + "epoch": 0.07607838027608273, + "grad_norm": 0.16641221940517426, + "learning_rate": 4.923928858389976e-06, + "loss": 1.0734, + "step": 10510 + }, + { + "epoch": 0.0761507669366689, + "grad_norm": 0.19562187790870667, + "learning_rate": 4.92385647172939e-06, + "loss": 1.0696, + "step": 10520 + }, + { + "epoch": 0.0762231535972551, + "grad_norm": 0.1799498200416565, + "learning_rate": 4.923784085068804e-06, + "loss": 1.0691, + "step": 10530 + }, + { + "epoch": 0.07629554025784129, + "grad_norm": 0.21454447507858276, + "learning_rate": 4.923711698408218e-06, + "loss": 1.0651, + "step": 10540 + }, + { + "epoch": 0.07636792691842748, + "grad_norm": 0.16438615322113037, + "learning_rate": 4.923639311747632e-06, + "loss": 1.0758, + "step": 10550 + }, + { + "epoch": 0.07644031357901367, + "grad_norm": 0.17331558465957642, + "learning_rate": 4.923566925087045e-06, + "loss": 1.092, + "step": 10560 + }, + { + "epoch": 0.07651270023959984, + "grad_norm": 0.17129570245742798, + "learning_rate": 4.923494538426459e-06, + "loss": 1.0644, + "step": 10570 + }, + { + "epoch": 0.07658508690018603, + "grad_norm": 0.17949286103248596, + "learning_rate": 4.923422151765873e-06, + "loss": 1.0765, + "step": 10580 + }, + { + "epoch": 0.07665747356077222, + "grad_norm": 0.20298872888088226, + "learning_rate": 4.923349765105287e-06, + "loss": 1.0813, + "step": 10590 + }, + { + "epoch": 0.07672986022135841, + "grad_norm": 0.17841701209545135, + "learning_rate": 4.923277378444701e-06, + "loss": 1.0763, + "step": 10600 + }, + { + "epoch": 0.07680224688194459, + "grad_norm": 0.1926995813846588, + "learning_rate": 4.923204991784114e-06, + "loss": 1.0666, + "step": 10610 + }, + { + "epoch": 0.07687463354253078, + "grad_norm": 0.20982502400875092, + "learning_rate": 4.923132605123529e-06, + "loss": 1.0674, + "step": 10620 + }, + { + "epoch": 0.07694702020311697, + "grad_norm": 0.18079523742198944, + "learning_rate": 4.923060218462942e-06, + "loss": 1.0782, + "step": 10630 + }, + { + "epoch": 0.07701940686370316, + "grad_norm": 0.18885524570941925, + "learning_rate": 4.922987831802356e-06, + "loss": 1.0675, + "step": 10640 + }, + { + "epoch": 0.07709179352428934, + "grad_norm": 0.1845076084136963, + "learning_rate": 4.9229154451417695e-06, + "loss": 1.0691, + "step": 10650 + }, + { + "epoch": 0.07716418018487553, + "grad_norm": 0.18656429648399353, + "learning_rate": 4.922843058481184e-06, + "loss": 1.0698, + "step": 10660 + }, + { + "epoch": 0.07723656684546172, + "grad_norm": 0.1843183934688568, + "learning_rate": 4.922770671820598e-06, + "loss": 1.0685, + "step": 10670 + }, + { + "epoch": 0.07730895350604791, + "grad_norm": 0.22134332358837128, + "learning_rate": 4.922698285160011e-06, + "loss": 1.0716, + "step": 10680 + }, + { + "epoch": 0.07738134016663409, + "grad_norm": 0.23194286227226257, + "learning_rate": 4.922625898499425e-06, + "loss": 1.0655, + "step": 10690 + }, + { + "epoch": 0.07745372682722028, + "grad_norm": 0.18523964285850525, + "learning_rate": 4.922553511838839e-06, + "loss": 1.0793, + "step": 10700 + }, + { + "epoch": 0.07752611348780647, + "grad_norm": 0.19442328810691833, + "learning_rate": 4.922481125178253e-06, + "loss": 1.0722, + "step": 10710 + }, + { + "epoch": 0.07759850014839266, + "grad_norm": 0.17840822041034698, + "learning_rate": 4.922408738517666e-06, + "loss": 1.0698, + "step": 10720 + }, + { + "epoch": 0.07767088680897884, + "grad_norm": 0.1624884307384491, + "learning_rate": 4.92233635185708e-06, + "loss": 1.0767, + "step": 10730 + }, + { + "epoch": 0.07774327346956503, + "grad_norm": 0.1825413703918457, + "learning_rate": 4.922263965196494e-06, + "loss": 1.0707, + "step": 10740 + }, + { + "epoch": 0.07781566013015122, + "grad_norm": 0.18537434935569763, + "learning_rate": 4.922191578535907e-06, + "loss": 1.0802, + "step": 10750 + }, + { + "epoch": 0.07788804679073741, + "grad_norm": 0.16724461317062378, + "learning_rate": 4.922119191875321e-06, + "loss": 1.0879, + "step": 10760 + }, + { + "epoch": 0.07796043345132359, + "grad_norm": 0.20884546637535095, + "learning_rate": 4.9220468052147355e-06, + "loss": 1.0533, + "step": 10770 + }, + { + "epoch": 0.07803282011190978, + "grad_norm": 0.19194482266902924, + "learning_rate": 4.921974418554149e-06, + "loss": 1.083, + "step": 10780 + }, + { + "epoch": 0.07810520677249597, + "grad_norm": 0.17105208337306976, + "learning_rate": 4.921902031893563e-06, + "loss": 1.0832, + "step": 10790 + }, + { + "epoch": 0.07817759343308216, + "grad_norm": 0.18788471817970276, + "learning_rate": 4.921829645232976e-06, + "loss": 1.0891, + "step": 10800 + }, + { + "epoch": 0.07824998009366833, + "grad_norm": 0.20326700806617737, + "learning_rate": 4.921757258572391e-06, + "loss": 1.0925, + "step": 10810 + }, + { + "epoch": 0.07832236675425452, + "grad_norm": 0.17056359350681305, + "learning_rate": 4.921684871911804e-06, + "loss": 1.06, + "step": 10820 + }, + { + "epoch": 0.07839475341484071, + "grad_norm": 0.17205092310905457, + "learning_rate": 4.921612485251218e-06, + "loss": 1.0682, + "step": 10830 + }, + { + "epoch": 0.0784671400754269, + "grad_norm": 0.17760831117630005, + "learning_rate": 4.921540098590632e-06, + "loss": 1.079, + "step": 10840 + }, + { + "epoch": 0.07853952673601308, + "grad_norm": 0.2105284333229065, + "learning_rate": 4.921467711930046e-06, + "loss": 1.0787, + "step": 10850 + }, + { + "epoch": 0.07861191339659927, + "grad_norm": 0.1877516359090805, + "learning_rate": 4.92139532526946e-06, + "loss": 1.0756, + "step": 10860 + }, + { + "epoch": 0.07868430005718546, + "grad_norm": 0.15930220484733582, + "learning_rate": 4.921322938608873e-06, + "loss": 1.0785, + "step": 10870 + }, + { + "epoch": 0.07875668671777165, + "grad_norm": 0.19607755541801453, + "learning_rate": 4.921250551948287e-06, + "loss": 1.0908, + "step": 10880 + }, + { + "epoch": 0.07882907337835783, + "grad_norm": 0.18164999783039093, + "learning_rate": 4.921178165287701e-06, + "loss": 1.049, + "step": 10890 + }, + { + "epoch": 0.07890146003894402, + "grad_norm": 0.17521744966506958, + "learning_rate": 4.921105778627115e-06, + "loss": 1.0801, + "step": 10900 + }, + { + "epoch": 0.07897384669953021, + "grad_norm": 0.1904889941215515, + "learning_rate": 4.921033391966529e-06, + "loss": 1.0697, + "step": 10910 + }, + { + "epoch": 0.0790462333601164, + "grad_norm": 0.1861046850681305, + "learning_rate": 4.920961005305942e-06, + "loss": 1.0619, + "step": 10920 + }, + { + "epoch": 0.07911862002070258, + "grad_norm": 0.18160459399223328, + "learning_rate": 4.920888618645357e-06, + "loss": 1.075, + "step": 10930 + }, + { + "epoch": 0.07919100668128877, + "grad_norm": 0.1644423007965088, + "learning_rate": 4.92081623198477e-06, + "loss": 1.0704, + "step": 10940 + }, + { + "epoch": 0.07926339334187496, + "grad_norm": 0.192913219332695, + "learning_rate": 4.920743845324184e-06, + "loss": 1.0905, + "step": 10950 + }, + { + "epoch": 0.07933578000246115, + "grad_norm": 0.18490082025527954, + "learning_rate": 4.9206714586635976e-06, + "loss": 1.0704, + "step": 10960 + }, + { + "epoch": 0.07940816666304733, + "grad_norm": 0.17639735341072083, + "learning_rate": 4.920599072003012e-06, + "loss": 1.0789, + "step": 10970 + }, + { + "epoch": 0.07948055332363352, + "grad_norm": 0.1718800812959671, + "learning_rate": 4.920526685342426e-06, + "loss": 1.0849, + "step": 10980 + }, + { + "epoch": 0.07955293998421971, + "grad_norm": 0.1801319122314453, + "learning_rate": 4.920454298681839e-06, + "loss": 1.0621, + "step": 10990 + }, + { + "epoch": 0.0796253266448059, + "grad_norm": 0.17438428103923798, + "learning_rate": 4.920381912021253e-06, + "loss": 1.0694, + "step": 11000 + }, + { + "epoch": 0.07969771330539208, + "grad_norm": 0.19011586904525757, + "learning_rate": 4.920309525360667e-06, + "loss": 1.0672, + "step": 11010 + }, + { + "epoch": 0.07977009996597827, + "grad_norm": 0.17366407811641693, + "learning_rate": 4.920237138700081e-06, + "loss": 1.0692, + "step": 11020 + }, + { + "epoch": 0.07984248662656446, + "grad_norm": 0.18017128109931946, + "learning_rate": 4.9201647520394946e-06, + "loss": 1.0839, + "step": 11030 + }, + { + "epoch": 0.07991487328715065, + "grad_norm": 0.18107180297374725, + "learning_rate": 4.920092365378908e-06, + "loss": 1.0789, + "step": 11040 + }, + { + "epoch": 0.07998725994773682, + "grad_norm": 0.1829778552055359, + "learning_rate": 4.920019978718323e-06, + "loss": 1.0716, + "step": 11050 + }, + { + "epoch": 0.08005964660832302, + "grad_norm": 0.17643770575523376, + "learning_rate": 4.919947592057736e-06, + "loss": 1.0778, + "step": 11060 + }, + { + "epoch": 0.0801320332689092, + "grad_norm": 0.17540425062179565, + "learning_rate": 4.91987520539715e-06, + "loss": 1.0685, + "step": 11070 + }, + { + "epoch": 0.0802044199294954, + "grad_norm": 0.18910111486911774, + "learning_rate": 4.9198028187365635e-06, + "loss": 1.0639, + "step": 11080 + }, + { + "epoch": 0.08027680659008157, + "grad_norm": 0.17153383791446686, + "learning_rate": 4.919730432075978e-06, + "loss": 1.0872, + "step": 11090 + }, + { + "epoch": 0.08034919325066776, + "grad_norm": 0.16545794904232025, + "learning_rate": 4.9196580454153916e-06, + "loss": 1.0681, + "step": 11100 + }, + { + "epoch": 0.08042157991125395, + "grad_norm": 0.19072282314300537, + "learning_rate": 4.919585658754805e-06, + "loss": 1.0723, + "step": 11110 + }, + { + "epoch": 0.08049396657184014, + "grad_norm": 0.190969780087471, + "learning_rate": 4.919513272094219e-06, + "loss": 1.0853, + "step": 11120 + }, + { + "epoch": 0.08056635323242634, + "grad_norm": 0.17297950387001038, + "learning_rate": 4.919440885433633e-06, + "loss": 1.071, + "step": 11130 + }, + { + "epoch": 0.08063873989301251, + "grad_norm": 0.19058924913406372, + "learning_rate": 4.919368498773047e-06, + "loss": 1.0879, + "step": 11140 + }, + { + "epoch": 0.0807111265535987, + "grad_norm": 0.19707483053207397, + "learning_rate": 4.9192961121124605e-06, + "loss": 1.0816, + "step": 11150 + }, + { + "epoch": 0.0807835132141849, + "grad_norm": 0.1789708286523819, + "learning_rate": 4.919223725451874e-06, + "loss": 1.0836, + "step": 11160 + }, + { + "epoch": 0.08085589987477108, + "grad_norm": 0.17154861986637115, + "learning_rate": 4.919151338791288e-06, + "loss": 1.0631, + "step": 11170 + }, + { + "epoch": 0.08092828653535726, + "grad_norm": 0.1575436145067215, + "learning_rate": 4.919078952130702e-06, + "loss": 1.0772, + "step": 11180 + }, + { + "epoch": 0.08100067319594345, + "grad_norm": 0.18095077574253082, + "learning_rate": 4.919006565470116e-06, + "loss": 1.0588, + "step": 11190 + }, + { + "epoch": 0.08107305985652964, + "grad_norm": 0.18087738752365112, + "learning_rate": 4.9189341788095294e-06, + "loss": 1.0859, + "step": 11200 + }, + { + "epoch": 0.08114544651711583, + "grad_norm": 0.41991138458251953, + "learning_rate": 4.918861792148943e-06, + "loss": 1.0671, + "step": 11210 + }, + { + "epoch": 0.08121783317770201, + "grad_norm": 0.19493845105171204, + "learning_rate": 4.9187894054883575e-06, + "loss": 1.0691, + "step": 11220 + }, + { + "epoch": 0.0812902198382882, + "grad_norm": 0.16449686884880066, + "learning_rate": 4.918717018827771e-06, + "loss": 1.0713, + "step": 11230 + }, + { + "epoch": 0.08136260649887439, + "grad_norm": 0.16916941106319427, + "learning_rate": 4.918644632167185e-06, + "loss": 1.0779, + "step": 11240 + }, + { + "epoch": 0.08143499315946058, + "grad_norm": 0.1702130287885666, + "learning_rate": 4.918572245506598e-06, + "loss": 1.0677, + "step": 11250 + }, + { + "epoch": 0.08150737982004676, + "grad_norm": 0.1953752636909485, + "learning_rate": 4.918499858846012e-06, + "loss": 1.0766, + "step": 11260 + }, + { + "epoch": 0.08157976648063295, + "grad_norm": 0.18306571245193481, + "learning_rate": 4.918427472185426e-06, + "loss": 1.0737, + "step": 11270 + }, + { + "epoch": 0.08165215314121914, + "grad_norm": 0.15860667824745178, + "learning_rate": 4.91835508552484e-06, + "loss": 1.0552, + "step": 11280 + }, + { + "epoch": 0.08172453980180533, + "grad_norm": 0.1841406524181366, + "learning_rate": 4.918282698864254e-06, + "loss": 1.0779, + "step": 11290 + }, + { + "epoch": 0.0817969264623915, + "grad_norm": 0.22922413051128387, + "learning_rate": 4.918210312203667e-06, + "loss": 1.0727, + "step": 11300 + }, + { + "epoch": 0.0818693131229777, + "grad_norm": 0.17833411693572998, + "learning_rate": 4.918137925543081e-06, + "loss": 1.0751, + "step": 11310 + }, + { + "epoch": 0.08194169978356389, + "grad_norm": 0.1674196720123291, + "learning_rate": 4.9180655388824945e-06, + "loss": 1.0797, + "step": 11320 + }, + { + "epoch": 0.08201408644415008, + "grad_norm": 0.18351003527641296, + "learning_rate": 4.917993152221909e-06, + "loss": 1.0893, + "step": 11330 + }, + { + "epoch": 0.08208647310473625, + "grad_norm": 0.17393292486667633, + "learning_rate": 4.917920765561323e-06, + "loss": 1.073, + "step": 11340 + }, + { + "epoch": 0.08215885976532245, + "grad_norm": 0.18493473529815674, + "learning_rate": 4.917848378900736e-06, + "loss": 1.0783, + "step": 11350 + }, + { + "epoch": 0.08223124642590864, + "grad_norm": 0.18359433114528656, + "learning_rate": 4.91777599224015e-06, + "loss": 1.0538, + "step": 11360 + }, + { + "epoch": 0.08230363308649483, + "grad_norm": 0.1751643419265747, + "learning_rate": 4.917703605579564e-06, + "loss": 1.0779, + "step": 11370 + }, + { + "epoch": 0.082376019747081, + "grad_norm": 0.16881217062473297, + "learning_rate": 4.917631218918978e-06, + "loss": 1.0722, + "step": 11380 + }, + { + "epoch": 0.0824484064076672, + "grad_norm": 0.19173048436641693, + "learning_rate": 4.9175588322583915e-06, + "loss": 1.0616, + "step": 11390 + }, + { + "epoch": 0.08252079306825338, + "grad_norm": 0.1824052780866623, + "learning_rate": 4.917486445597805e-06, + "loss": 1.0691, + "step": 11400 + }, + { + "epoch": 0.08259317972883957, + "grad_norm": 0.1803470402956009, + "learning_rate": 4.91741405893722e-06, + "loss": 1.0615, + "step": 11410 + }, + { + "epoch": 0.08266556638942575, + "grad_norm": 0.18563689291477203, + "learning_rate": 4.917341672276633e-06, + "loss": 1.0701, + "step": 11420 + }, + { + "epoch": 0.08273795305001194, + "grad_norm": 0.17787225544452667, + "learning_rate": 4.917269285616047e-06, + "loss": 1.0812, + "step": 11430 + }, + { + "epoch": 0.08281033971059813, + "grad_norm": 0.1732112318277359, + "learning_rate": 4.9171968989554605e-06, + "loss": 1.0754, + "step": 11440 + }, + { + "epoch": 0.08288272637118432, + "grad_norm": 0.19006459414958954, + "learning_rate": 4.917124512294875e-06, + "loss": 1.0577, + "step": 11450 + }, + { + "epoch": 0.0829551130317705, + "grad_norm": 0.17567865550518036, + "learning_rate": 4.9170521256342885e-06, + "loss": 1.0775, + "step": 11460 + }, + { + "epoch": 0.08302749969235669, + "grad_norm": 0.1794678121805191, + "learning_rate": 4.916979738973702e-06, + "loss": 1.0655, + "step": 11470 + }, + { + "epoch": 0.08309988635294288, + "grad_norm": 0.20485854148864746, + "learning_rate": 4.916907352313116e-06, + "loss": 1.065, + "step": 11480 + }, + { + "epoch": 0.08317227301352907, + "grad_norm": 0.2057936191558838, + "learning_rate": 4.91683496565253e-06, + "loss": 1.0769, + "step": 11490 + }, + { + "epoch": 0.08324465967411525, + "grad_norm": 0.17958250641822815, + "learning_rate": 4.916762578991944e-06, + "loss": 1.0644, + "step": 11500 + }, + { + "epoch": 0.08331704633470144, + "grad_norm": 0.16068291664123535, + "learning_rate": 4.9166901923313575e-06, + "loss": 1.0761, + "step": 11510 + }, + { + "epoch": 0.08338943299528763, + "grad_norm": 0.18293248116970062, + "learning_rate": 4.916617805670771e-06, + "loss": 1.0632, + "step": 11520 + }, + { + "epoch": 0.08346181965587382, + "grad_norm": 0.19636189937591553, + "learning_rate": 4.9165454190101855e-06, + "loss": 1.0827, + "step": 11530 + }, + { + "epoch": 0.08353420631646, + "grad_norm": 0.18141409754753113, + "learning_rate": 4.916473032349599e-06, + "loss": 1.077, + "step": 11540 + }, + { + "epoch": 0.08360659297704619, + "grad_norm": 0.18197061121463776, + "learning_rate": 4.916400645689013e-06, + "loss": 1.0639, + "step": 11550 + }, + { + "epoch": 0.08367897963763238, + "grad_norm": 0.18665997684001923, + "learning_rate": 4.916328259028426e-06, + "loss": 1.0709, + "step": 11560 + }, + { + "epoch": 0.08375136629821857, + "grad_norm": 0.18202146887779236, + "learning_rate": 4.916255872367841e-06, + "loss": 1.0797, + "step": 11570 + }, + { + "epoch": 0.08382375295880475, + "grad_norm": 0.16882570087909698, + "learning_rate": 4.9161834857072545e-06, + "loss": 1.0683, + "step": 11580 + }, + { + "epoch": 0.08389613961939094, + "grad_norm": 0.17632512748241425, + "learning_rate": 4.916111099046668e-06, + "loss": 1.0526, + "step": 11590 + }, + { + "epoch": 0.08396852627997713, + "grad_norm": 0.1826392412185669, + "learning_rate": 4.916038712386082e-06, + "loss": 1.0694, + "step": 11600 + }, + { + "epoch": 0.08404091294056332, + "grad_norm": 0.17934630811214447, + "learning_rate": 4.915966325725496e-06, + "loss": 1.0621, + "step": 11610 + }, + { + "epoch": 0.0841132996011495, + "grad_norm": 0.23041057586669922, + "learning_rate": 4.91589393906491e-06, + "loss": 1.0716, + "step": 11620 + }, + { + "epoch": 0.08418568626173568, + "grad_norm": 0.18051432073116302, + "learning_rate": 4.915821552404323e-06, + "loss": 1.0688, + "step": 11630 + }, + { + "epoch": 0.08425807292232187, + "grad_norm": 0.18257080018520355, + "learning_rate": 4.915749165743737e-06, + "loss": 1.0637, + "step": 11640 + }, + { + "epoch": 0.08433045958290807, + "grad_norm": 0.17362256348133087, + "learning_rate": 4.9156767790831515e-06, + "loss": 1.0703, + "step": 11650 + }, + { + "epoch": 0.08440284624349424, + "grad_norm": 0.1855854094028473, + "learning_rate": 4.915604392422565e-06, + "loss": 1.0711, + "step": 11660 + }, + { + "epoch": 0.08447523290408043, + "grad_norm": 0.17203544080257416, + "learning_rate": 4.915532005761979e-06, + "loss": 1.0666, + "step": 11670 + }, + { + "epoch": 0.08454761956466662, + "grad_norm": 0.18600568175315857, + "learning_rate": 4.915459619101392e-06, + "loss": 1.0635, + "step": 11680 + }, + { + "epoch": 0.08462000622525281, + "grad_norm": 0.1972518265247345, + "learning_rate": 4.915387232440807e-06, + "loss": 1.0761, + "step": 11690 + }, + { + "epoch": 0.08469239288583899, + "grad_norm": 0.1868593841791153, + "learning_rate": 4.91531484578022e-06, + "loss": 1.0943, + "step": 11700 + }, + { + "epoch": 0.08476477954642518, + "grad_norm": 0.18218737840652466, + "learning_rate": 4.915242459119634e-06, + "loss": 1.0787, + "step": 11710 + }, + { + "epoch": 0.08483716620701137, + "grad_norm": 0.19324436783790588, + "learning_rate": 4.915170072459048e-06, + "loss": 1.0549, + "step": 11720 + }, + { + "epoch": 0.08490955286759756, + "grad_norm": 0.16747941076755524, + "learning_rate": 4.915097685798462e-06, + "loss": 1.0751, + "step": 11730 + }, + { + "epoch": 0.08498193952818375, + "grad_norm": 0.1799047440290451, + "learning_rate": 4.915025299137876e-06, + "loss": 1.072, + "step": 11740 + }, + { + "epoch": 0.08505432618876993, + "grad_norm": 0.17832840979099274, + "learning_rate": 4.914952912477289e-06, + "loss": 1.0627, + "step": 11750 + }, + { + "epoch": 0.08512671284935612, + "grad_norm": 0.21101170778274536, + "learning_rate": 4.914880525816703e-06, + "loss": 1.0887, + "step": 11760 + }, + { + "epoch": 0.08519909950994231, + "grad_norm": 0.20275020599365234, + "learning_rate": 4.9148081391561166e-06, + "loss": 1.0752, + "step": 11770 + }, + { + "epoch": 0.0852714861705285, + "grad_norm": 0.173346146941185, + "learning_rate": 4.91473575249553e-06, + "loss": 1.0769, + "step": 11780 + }, + { + "epoch": 0.08534387283111468, + "grad_norm": 0.19621768593788147, + "learning_rate": 4.914663365834944e-06, + "loss": 1.0652, + "step": 11790 + }, + { + "epoch": 0.08541625949170087, + "grad_norm": 0.19705529510974884, + "learning_rate": 4.914590979174358e-06, + "loss": 1.0729, + "step": 11800 + }, + { + "epoch": 0.08548864615228706, + "grad_norm": 0.1750185489654541, + "learning_rate": 4.914518592513772e-06, + "loss": 1.0859, + "step": 11810 + }, + { + "epoch": 0.08556103281287325, + "grad_norm": 0.17165139317512512, + "learning_rate": 4.9144462058531855e-06, + "loss": 1.0737, + "step": 11820 + }, + { + "epoch": 0.08563341947345943, + "grad_norm": 0.18447789549827576, + "learning_rate": 4.914373819192599e-06, + "loss": 1.0659, + "step": 11830 + }, + { + "epoch": 0.08570580613404562, + "grad_norm": 0.18128694593906403, + "learning_rate": 4.9143014325320136e-06, + "loss": 1.0664, + "step": 11840 + }, + { + "epoch": 0.08577819279463181, + "grad_norm": 0.1879102736711502, + "learning_rate": 4.914229045871427e-06, + "loss": 1.0826, + "step": 11850 + }, + { + "epoch": 0.085850579455218, + "grad_norm": 0.1664225310087204, + "learning_rate": 4.914156659210841e-06, + "loss": 1.0587, + "step": 11860 + }, + { + "epoch": 0.08592296611580418, + "grad_norm": 0.18422931432724, + "learning_rate": 4.914084272550254e-06, + "loss": 1.0793, + "step": 11870 + }, + { + "epoch": 0.08599535277639037, + "grad_norm": 0.17721515893936157, + "learning_rate": 4.914011885889669e-06, + "loss": 1.0602, + "step": 11880 + }, + { + "epoch": 0.08606773943697656, + "grad_norm": 0.17775796353816986, + "learning_rate": 4.9139394992290825e-06, + "loss": 1.0734, + "step": 11890 + }, + { + "epoch": 0.08614012609756275, + "grad_norm": 0.19729198515415192, + "learning_rate": 4.913867112568496e-06, + "loss": 1.0694, + "step": 11900 + }, + { + "epoch": 0.08621251275814892, + "grad_norm": 0.17630425095558167, + "learning_rate": 4.91379472590791e-06, + "loss": 1.0646, + "step": 11910 + }, + { + "epoch": 0.08628489941873511, + "grad_norm": 0.1867901086807251, + "learning_rate": 4.913722339247324e-06, + "loss": 1.0823, + "step": 11920 + }, + { + "epoch": 0.0863572860793213, + "grad_norm": 0.17957784235477448, + "learning_rate": 4.913649952586738e-06, + "loss": 1.0621, + "step": 11930 + }, + { + "epoch": 0.0864296727399075, + "grad_norm": 0.18513678014278412, + "learning_rate": 4.9135775659261514e-06, + "loss": 1.0737, + "step": 11940 + }, + { + "epoch": 0.08650205940049367, + "grad_norm": 0.18073897063732147, + "learning_rate": 4.913505179265565e-06, + "loss": 1.0567, + "step": 11950 + }, + { + "epoch": 0.08657444606107986, + "grad_norm": 0.19034330546855927, + "learning_rate": 4.913432792604979e-06, + "loss": 1.0701, + "step": 11960 + }, + { + "epoch": 0.08664683272166605, + "grad_norm": 0.19168923795223236, + "learning_rate": 4.913360405944393e-06, + "loss": 1.0592, + "step": 11970 + }, + { + "epoch": 0.08671921938225224, + "grad_norm": 0.1833886355161667, + "learning_rate": 4.913288019283807e-06, + "loss": 1.0812, + "step": 11980 + }, + { + "epoch": 0.08679160604283842, + "grad_norm": 0.1789073795080185, + "learning_rate": 4.91321563262322e-06, + "loss": 1.0813, + "step": 11990 + }, + { + "epoch": 0.08686399270342461, + "grad_norm": 0.19956181943416595, + "learning_rate": 4.913143245962634e-06, + "loss": 1.0602, + "step": 12000 + }, + { + "epoch": 0.0869363793640108, + "grad_norm": 0.16924233734607697, + "learning_rate": 4.9130708593020484e-06, + "loss": 1.0682, + "step": 12010 + }, + { + "epoch": 0.08700876602459699, + "grad_norm": 0.20101603865623474, + "learning_rate": 4.912998472641462e-06, + "loss": 1.0896, + "step": 12020 + }, + { + "epoch": 0.08708115268518317, + "grad_norm": 0.17488181591033936, + "learning_rate": 4.912926085980876e-06, + "loss": 1.0551, + "step": 12030 + }, + { + "epoch": 0.08715353934576936, + "grad_norm": 0.18475006520748138, + "learning_rate": 4.912853699320289e-06, + "loss": 1.0825, + "step": 12040 + }, + { + "epoch": 0.08722592600635555, + "grad_norm": 0.2061099112033844, + "learning_rate": 4.912781312659704e-06, + "loss": 1.0836, + "step": 12050 + }, + { + "epoch": 0.08729831266694174, + "grad_norm": 0.7837763428688049, + "learning_rate": 4.912708925999117e-06, + "loss": 1.0716, + "step": 12060 + }, + { + "epoch": 0.08737069932752792, + "grad_norm": 0.2470492273569107, + "learning_rate": 4.912636539338531e-06, + "loss": 1.0686, + "step": 12070 + }, + { + "epoch": 0.08744308598811411, + "grad_norm": 0.18343280255794525, + "learning_rate": 4.912564152677945e-06, + "loss": 1.0704, + "step": 12080 + }, + { + "epoch": 0.0875154726487003, + "grad_norm": 0.18261706829071045, + "learning_rate": 4.912491766017359e-06, + "loss": 1.0632, + "step": 12090 + }, + { + "epoch": 0.08758785930928649, + "grad_norm": 0.25385862588882446, + "learning_rate": 4.912419379356773e-06, + "loss": 1.0686, + "step": 12100 + }, + { + "epoch": 0.08766024596987267, + "grad_norm": 0.1851423978805542, + "learning_rate": 4.912346992696186e-06, + "loss": 1.0647, + "step": 12110 + }, + { + "epoch": 0.08773263263045886, + "grad_norm": 0.16765649616718292, + "learning_rate": 4.9122746060356e-06, + "loss": 1.0625, + "step": 12120 + }, + { + "epoch": 0.08780501929104505, + "grad_norm": 0.17632648348808289, + "learning_rate": 4.912202219375014e-06, + "loss": 1.0767, + "step": 12130 + }, + { + "epoch": 0.08787740595163124, + "grad_norm": 0.20516112446784973, + "learning_rate": 4.912129832714428e-06, + "loss": 1.0764, + "step": 12140 + }, + { + "epoch": 0.08794979261221741, + "grad_norm": 0.19637328386306763, + "learning_rate": 4.912057446053842e-06, + "loss": 1.0819, + "step": 12150 + }, + { + "epoch": 0.0880221792728036, + "grad_norm": 0.17330960929393768, + "learning_rate": 4.911985059393255e-06, + "loss": 1.0731, + "step": 12160 + }, + { + "epoch": 0.0880945659333898, + "grad_norm": 0.17274819314479828, + "learning_rate": 4.91191267273267e-06, + "loss": 1.0811, + "step": 12170 + }, + { + "epoch": 0.08816695259397599, + "grad_norm": 0.18579480051994324, + "learning_rate": 4.911840286072083e-06, + "loss": 1.0769, + "step": 12180 + }, + { + "epoch": 0.08823933925456216, + "grad_norm": 0.18373064696788788, + "learning_rate": 4.911767899411497e-06, + "loss": 1.0749, + "step": 12190 + }, + { + "epoch": 0.08831172591514835, + "grad_norm": 0.1849633753299713, + "learning_rate": 4.9116955127509105e-06, + "loss": 1.0789, + "step": 12200 + }, + { + "epoch": 0.08838411257573454, + "grad_norm": 0.1739869862794876, + "learning_rate": 4.911623126090325e-06, + "loss": 1.0805, + "step": 12210 + }, + { + "epoch": 0.08845649923632073, + "grad_norm": 0.18374896049499512, + "learning_rate": 4.911550739429739e-06, + "loss": 1.0491, + "step": 12220 + }, + { + "epoch": 0.08852888589690691, + "grad_norm": 0.20483864843845367, + "learning_rate": 4.911478352769152e-06, + "loss": 1.0698, + "step": 12230 + }, + { + "epoch": 0.0886012725574931, + "grad_norm": 0.17698989808559418, + "learning_rate": 4.911405966108566e-06, + "loss": 1.0757, + "step": 12240 + }, + { + "epoch": 0.08867365921807929, + "grad_norm": 0.16468937695026398, + "learning_rate": 4.91133357944798e-06, + "loss": 1.0577, + "step": 12250 + }, + { + "epoch": 0.08874604587866548, + "grad_norm": 0.18862901628017426, + "learning_rate": 4.911261192787394e-06, + "loss": 1.0779, + "step": 12260 + }, + { + "epoch": 0.08881843253925166, + "grad_norm": 0.17492325603961945, + "learning_rate": 4.9111888061268075e-06, + "loss": 1.0662, + "step": 12270 + }, + { + "epoch": 0.08889081919983785, + "grad_norm": 0.16824495792388916, + "learning_rate": 4.911116419466221e-06, + "loss": 1.0617, + "step": 12280 + }, + { + "epoch": 0.08896320586042404, + "grad_norm": 0.17155864834785461, + "learning_rate": 4.911044032805636e-06, + "loss": 1.058, + "step": 12290 + }, + { + "epoch": 0.08903559252101023, + "grad_norm": 0.17374593019485474, + "learning_rate": 4.910971646145049e-06, + "loss": 1.0538, + "step": 12300 + }, + { + "epoch": 0.08910797918159642, + "grad_norm": 0.170820415019989, + "learning_rate": 4.910899259484462e-06, + "loss": 1.0719, + "step": 12310 + }, + { + "epoch": 0.0891803658421826, + "grad_norm": 0.1844668984413147, + "learning_rate": 4.9108268728238765e-06, + "loss": 1.058, + "step": 12320 + }, + { + "epoch": 0.08925275250276879, + "grad_norm": 0.17644762992858887, + "learning_rate": 4.91075448616329e-06, + "loss": 1.0533, + "step": 12330 + }, + { + "epoch": 0.08932513916335498, + "grad_norm": 0.19445423781871796, + "learning_rate": 4.910682099502704e-06, + "loss": 1.0617, + "step": 12340 + }, + { + "epoch": 0.08939752582394117, + "grad_norm": 0.20080770552158356, + "learning_rate": 4.910609712842117e-06, + "loss": 1.0592, + "step": 12350 + }, + { + "epoch": 0.08946991248452735, + "grad_norm": 0.17183251678943634, + "learning_rate": 4.910537326181532e-06, + "loss": 1.0629, + "step": 12360 + }, + { + "epoch": 0.08954229914511354, + "grad_norm": 0.17015236616134644, + "learning_rate": 4.910464939520945e-06, + "loss": 1.0571, + "step": 12370 + }, + { + "epoch": 0.08961468580569973, + "grad_norm": 0.19391165673732758, + "learning_rate": 4.910392552860359e-06, + "loss": 1.0634, + "step": 12380 + }, + { + "epoch": 0.08968707246628592, + "grad_norm": 0.21788957715034485, + "learning_rate": 4.910320166199773e-06, + "loss": 1.0633, + "step": 12390 + }, + { + "epoch": 0.0897594591268721, + "grad_norm": 0.1834946870803833, + "learning_rate": 4.910247779539187e-06, + "loss": 1.0594, + "step": 12400 + }, + { + "epoch": 0.08983184578745829, + "grad_norm": 0.1719583421945572, + "learning_rate": 4.910175392878601e-06, + "loss": 1.0485, + "step": 12410 + }, + { + "epoch": 0.08990423244804448, + "grad_norm": 0.21385973691940308, + "learning_rate": 4.910103006218014e-06, + "loss": 1.068, + "step": 12420 + }, + { + "epoch": 0.08997661910863067, + "grad_norm": 0.1749151051044464, + "learning_rate": 4.910030619557428e-06, + "loss": 1.0658, + "step": 12430 + }, + { + "epoch": 0.09004900576921684, + "grad_norm": 0.17845019698143005, + "learning_rate": 4.909958232896842e-06, + "loss": 1.0693, + "step": 12440 + }, + { + "epoch": 0.09012139242980303, + "grad_norm": 0.16557128727436066, + "learning_rate": 4.909885846236256e-06, + "loss": 1.05, + "step": 12450 + }, + { + "epoch": 0.09019377909038923, + "grad_norm": 0.1660662740468979, + "learning_rate": 4.90981345957567e-06, + "loss": 1.0756, + "step": 12460 + }, + { + "epoch": 0.09026616575097542, + "grad_norm": 0.17898871004581451, + "learning_rate": 4.909741072915083e-06, + "loss": 1.0692, + "step": 12470 + }, + { + "epoch": 0.09033855241156159, + "grad_norm": 0.18854671716690063, + "learning_rate": 4.909668686254498e-06, + "loss": 1.0648, + "step": 12480 + }, + { + "epoch": 0.09041093907214778, + "grad_norm": 0.18120893836021423, + "learning_rate": 4.909596299593911e-06, + "loss": 1.0646, + "step": 12490 + }, + { + "epoch": 0.09048332573273397, + "grad_norm": 0.1960660070180893, + "learning_rate": 4.909523912933325e-06, + "loss": 1.067, + "step": 12500 + }, + { + "epoch": 0.09055571239332016, + "grad_norm": 0.2270880788564682, + "learning_rate": 4.9094515262727386e-06, + "loss": 1.057, + "step": 12510 + }, + { + "epoch": 0.09062809905390634, + "grad_norm": 0.19658519327640533, + "learning_rate": 4.909379139612153e-06, + "loss": 1.0789, + "step": 12520 + }, + { + "epoch": 0.09070048571449253, + "grad_norm": 0.1924048811197281, + "learning_rate": 4.909306752951567e-06, + "loss": 1.0608, + "step": 12530 + }, + { + "epoch": 0.09077287237507872, + "grad_norm": 0.1948341578245163, + "learning_rate": 4.90923436629098e-06, + "loss": 1.0594, + "step": 12540 + }, + { + "epoch": 0.09084525903566491, + "grad_norm": 0.21154290437698364, + "learning_rate": 4.909161979630394e-06, + "loss": 1.0635, + "step": 12550 + }, + { + "epoch": 0.09091764569625109, + "grad_norm": 0.18303339183330536, + "learning_rate": 4.909089592969808e-06, + "loss": 1.0802, + "step": 12560 + }, + { + "epoch": 0.09099003235683728, + "grad_norm": 0.17858447134494781, + "learning_rate": 4.909017206309222e-06, + "loss": 1.065, + "step": 12570 + }, + { + "epoch": 0.09106241901742347, + "grad_norm": 0.186380535364151, + "learning_rate": 4.9089448196486356e-06, + "loss": 1.0611, + "step": 12580 + }, + { + "epoch": 0.09113480567800966, + "grad_norm": 0.17138642072677612, + "learning_rate": 4.908872432988049e-06, + "loss": 1.0598, + "step": 12590 + }, + { + "epoch": 0.09120719233859584, + "grad_norm": 0.21660137176513672, + "learning_rate": 4.908800046327463e-06, + "loss": 1.0604, + "step": 12600 + }, + { + "epoch": 0.09127957899918203, + "grad_norm": 0.17236590385437012, + "learning_rate": 4.908727659666877e-06, + "loss": 1.0554, + "step": 12610 + }, + { + "epoch": 0.09135196565976822, + "grad_norm": 0.16949652135372162, + "learning_rate": 4.908655273006291e-06, + "loss": 1.0523, + "step": 12620 + }, + { + "epoch": 0.09142435232035441, + "grad_norm": 0.18573276698589325, + "learning_rate": 4.9085828863457045e-06, + "loss": 1.0574, + "step": 12630 + }, + { + "epoch": 0.09149673898094059, + "grad_norm": 0.17692014575004578, + "learning_rate": 4.908510499685118e-06, + "loss": 1.0683, + "step": 12640 + }, + { + "epoch": 0.09156912564152678, + "grad_norm": 0.17320817708969116, + "learning_rate": 4.908438113024533e-06, + "loss": 1.0725, + "step": 12650 + }, + { + "epoch": 0.09164151230211297, + "grad_norm": 0.18194791674613953, + "learning_rate": 4.908365726363946e-06, + "loss": 1.0769, + "step": 12660 + }, + { + "epoch": 0.09171389896269916, + "grad_norm": 0.17334748804569244, + "learning_rate": 4.90829333970336e-06, + "loss": 1.0648, + "step": 12670 + }, + { + "epoch": 0.09178628562328534, + "grad_norm": 0.18237444758415222, + "learning_rate": 4.9082209530427734e-06, + "loss": 1.0701, + "step": 12680 + }, + { + "epoch": 0.09185867228387153, + "grad_norm": 0.18935726583003998, + "learning_rate": 4.908148566382188e-06, + "loss": 1.0576, + "step": 12690 + }, + { + "epoch": 0.09193105894445772, + "grad_norm": 0.17094674706459045, + "learning_rate": 4.9080761797216015e-06, + "loss": 1.0695, + "step": 12700 + }, + { + "epoch": 0.09200344560504391, + "grad_norm": 0.1917349398136139, + "learning_rate": 4.908003793061015e-06, + "loss": 1.061, + "step": 12710 + }, + { + "epoch": 0.09207583226563008, + "grad_norm": 0.18850326538085938, + "learning_rate": 4.907931406400429e-06, + "loss": 1.0772, + "step": 12720 + }, + { + "epoch": 0.09214821892621627, + "grad_norm": 0.20601466298103333, + "learning_rate": 4.907859019739843e-06, + "loss": 1.0626, + "step": 12730 + }, + { + "epoch": 0.09222060558680246, + "grad_norm": 0.1699492633342743, + "learning_rate": 4.907786633079257e-06, + "loss": 1.054, + "step": 12740 + }, + { + "epoch": 0.09229299224738866, + "grad_norm": 0.16967886686325073, + "learning_rate": 4.9077142464186704e-06, + "loss": 1.0633, + "step": 12750 + }, + { + "epoch": 0.09236537890797483, + "grad_norm": 0.16590727865695953, + "learning_rate": 4.907641859758084e-06, + "loss": 1.0436, + "step": 12760 + }, + { + "epoch": 0.09243776556856102, + "grad_norm": 0.18494842946529388, + "learning_rate": 4.9075694730974985e-06, + "loss": 1.0639, + "step": 12770 + }, + { + "epoch": 0.09251015222914721, + "grad_norm": 0.19467203319072723, + "learning_rate": 4.907497086436912e-06, + "loss": 1.059, + "step": 12780 + }, + { + "epoch": 0.0925825388897334, + "grad_norm": 0.1718284636735916, + "learning_rate": 4.907424699776326e-06, + "loss": 1.0615, + "step": 12790 + }, + { + "epoch": 0.09265492555031958, + "grad_norm": 0.16538646817207336, + "learning_rate": 4.907352313115739e-06, + "loss": 1.0653, + "step": 12800 + }, + { + "epoch": 0.09272731221090577, + "grad_norm": 0.17664389312267303, + "learning_rate": 4.907279926455154e-06, + "loss": 1.0595, + "step": 12810 + }, + { + "epoch": 0.09279969887149196, + "grad_norm": 0.1798931062221527, + "learning_rate": 4.9072075397945674e-06, + "loss": 1.0578, + "step": 12820 + }, + { + "epoch": 0.09287208553207815, + "grad_norm": 0.20928549766540527, + "learning_rate": 4.907135153133981e-06, + "loss": 1.0699, + "step": 12830 + }, + { + "epoch": 0.09294447219266433, + "grad_norm": 0.1914556324481964, + "learning_rate": 4.907062766473395e-06, + "loss": 1.078, + "step": 12840 + }, + { + "epoch": 0.09301685885325052, + "grad_norm": 0.17535455524921417, + "learning_rate": 4.906990379812808e-06, + "loss": 1.062, + "step": 12850 + }, + { + "epoch": 0.09308924551383671, + "grad_norm": 0.209900364279747, + "learning_rate": 4.906917993152222e-06, + "loss": 1.0651, + "step": 12860 + }, + { + "epoch": 0.0931616321744229, + "grad_norm": 0.19271911680698395, + "learning_rate": 4.9068456064916355e-06, + "loss": 1.0695, + "step": 12870 + }, + { + "epoch": 0.09323401883500908, + "grad_norm": 0.17206203937530518, + "learning_rate": 4.90677321983105e-06, + "loss": 1.054, + "step": 12880 + }, + { + "epoch": 0.09330640549559527, + "grad_norm": 0.17323225736618042, + "learning_rate": 4.906700833170464e-06, + "loss": 1.0545, + "step": 12890 + }, + { + "epoch": 0.09337879215618146, + "grad_norm": 0.18585805594921112, + "learning_rate": 4.906628446509877e-06, + "loss": 1.0638, + "step": 12900 + }, + { + "epoch": 0.09345117881676765, + "grad_norm": 0.18131840229034424, + "learning_rate": 4.906556059849291e-06, + "loss": 1.0575, + "step": 12910 + }, + { + "epoch": 0.09352356547735384, + "grad_norm": 0.18972773849964142, + "learning_rate": 4.906483673188705e-06, + "loss": 1.0561, + "step": 12920 + }, + { + "epoch": 0.09359595213794002, + "grad_norm": 0.17242597043514252, + "learning_rate": 4.906411286528119e-06, + "loss": 1.0754, + "step": 12930 + }, + { + "epoch": 0.09366833879852621, + "grad_norm": 0.17874747514724731, + "learning_rate": 4.9063388998675325e-06, + "loss": 1.0769, + "step": 12940 + }, + { + "epoch": 0.0937407254591124, + "grad_norm": 0.1628153920173645, + "learning_rate": 4.906266513206946e-06, + "loss": 1.0701, + "step": 12950 + }, + { + "epoch": 0.09381311211969859, + "grad_norm": 0.1775561273097992, + "learning_rate": 4.906194126546361e-06, + "loss": 1.0496, + "step": 12960 + }, + { + "epoch": 0.09388549878028477, + "grad_norm": 0.17346498370170593, + "learning_rate": 4.906121739885774e-06, + "loss": 1.0592, + "step": 12970 + }, + { + "epoch": 0.09395788544087096, + "grad_norm": 0.1621558666229248, + "learning_rate": 4.906049353225188e-06, + "loss": 1.0639, + "step": 12980 + }, + { + "epoch": 0.09403027210145715, + "grad_norm": 0.16274607181549072, + "learning_rate": 4.9059769665646015e-06, + "loss": 1.0593, + "step": 12990 + }, + { + "epoch": 0.09410265876204334, + "grad_norm": 0.24660325050354004, + "learning_rate": 4.905904579904016e-06, + "loss": 1.0687, + "step": 13000 + }, + { + "epoch": 0.09417504542262951, + "grad_norm": 0.17829085886478424, + "learning_rate": 4.9058321932434295e-06, + "loss": 1.061, + "step": 13010 + }, + { + "epoch": 0.0942474320832157, + "grad_norm": 0.19128848612308502, + "learning_rate": 4.905759806582843e-06, + "loss": 1.0612, + "step": 13020 + }, + { + "epoch": 0.0943198187438019, + "grad_norm": 0.1639917492866516, + "learning_rate": 4.905687419922257e-06, + "loss": 1.0514, + "step": 13030 + }, + { + "epoch": 0.09439220540438809, + "grad_norm": 0.18937060236930847, + "learning_rate": 4.905615033261671e-06, + "loss": 1.0471, + "step": 13040 + }, + { + "epoch": 0.09446459206497426, + "grad_norm": 0.20053894817829132, + "learning_rate": 4.905542646601085e-06, + "loss": 1.051, + "step": 13050 + }, + { + "epoch": 0.09453697872556045, + "grad_norm": 0.17322196066379547, + "learning_rate": 4.9054702599404985e-06, + "loss": 1.0626, + "step": 13060 + }, + { + "epoch": 0.09460936538614664, + "grad_norm": 0.18326787650585175, + "learning_rate": 4.905397873279912e-06, + "loss": 1.0556, + "step": 13070 + }, + { + "epoch": 0.09468175204673283, + "grad_norm": 0.18732202053070068, + "learning_rate": 4.9053254866193265e-06, + "loss": 1.0697, + "step": 13080 + }, + { + "epoch": 0.09475413870731901, + "grad_norm": 0.1722288727760315, + "learning_rate": 4.90525309995874e-06, + "loss": 1.0558, + "step": 13090 + }, + { + "epoch": 0.0948265253679052, + "grad_norm": 0.1705879271030426, + "learning_rate": 4.905180713298154e-06, + "loss": 1.0436, + "step": 13100 + }, + { + "epoch": 0.09489891202849139, + "grad_norm": 0.19368794560432434, + "learning_rate": 4.905108326637567e-06, + "loss": 1.0541, + "step": 13110 + }, + { + "epoch": 0.09497129868907758, + "grad_norm": 0.16703177988529205, + "learning_rate": 4.905035939976982e-06, + "loss": 1.061, + "step": 13120 + }, + { + "epoch": 0.09504368534966376, + "grad_norm": 0.1757340431213379, + "learning_rate": 4.9049635533163955e-06, + "loss": 1.054, + "step": 13130 + }, + { + "epoch": 0.09511607201024995, + "grad_norm": 0.1705889105796814, + "learning_rate": 4.904891166655809e-06, + "loss": 1.0499, + "step": 13140 + }, + { + "epoch": 0.09518845867083614, + "grad_norm": 0.18532103300094604, + "learning_rate": 4.904818779995223e-06, + "loss": 1.073, + "step": 13150 + }, + { + "epoch": 0.09526084533142233, + "grad_norm": 0.17054064571857452, + "learning_rate": 4.904746393334637e-06, + "loss": 1.0548, + "step": 13160 + }, + { + "epoch": 0.09533323199200851, + "grad_norm": 0.18268738687038422, + "learning_rate": 4.904674006674051e-06, + "loss": 1.0489, + "step": 13170 + }, + { + "epoch": 0.0954056186525947, + "grad_norm": 0.1656617373228073, + "learning_rate": 4.904601620013464e-06, + "loss": 1.048, + "step": 13180 + }, + { + "epoch": 0.09547800531318089, + "grad_norm": 0.16891372203826904, + "learning_rate": 4.904529233352878e-06, + "loss": 1.0719, + "step": 13190 + }, + { + "epoch": 0.09555039197376708, + "grad_norm": 0.19069869816303253, + "learning_rate": 4.904456846692292e-06, + "loss": 1.0673, + "step": 13200 + }, + { + "epoch": 0.09562277863435326, + "grad_norm": 0.17400185763835907, + "learning_rate": 4.904384460031706e-06, + "loss": 1.0586, + "step": 13210 + }, + { + "epoch": 0.09569516529493945, + "grad_norm": 0.17865540087223053, + "learning_rate": 4.90431207337112e-06, + "loss": 1.0509, + "step": 13220 + }, + { + "epoch": 0.09576755195552564, + "grad_norm": 0.22971458733081818, + "learning_rate": 4.904239686710533e-06, + "loss": 1.0619, + "step": 13230 + }, + { + "epoch": 0.09583993861611183, + "grad_norm": 0.16705693304538727, + "learning_rate": 4.904167300049947e-06, + "loss": 1.0616, + "step": 13240 + }, + { + "epoch": 0.095912325276698, + "grad_norm": 0.17722009122371674, + "learning_rate": 4.904094913389361e-06, + "loss": 1.046, + "step": 13250 + }, + { + "epoch": 0.0959847119372842, + "grad_norm": 0.18104061484336853, + "learning_rate": 4.904022526728775e-06, + "loss": 1.0516, + "step": 13260 + }, + { + "epoch": 0.09605709859787039, + "grad_norm": 0.1646488904953003, + "learning_rate": 4.903950140068189e-06, + "loss": 1.0612, + "step": 13270 + }, + { + "epoch": 0.09612948525845658, + "grad_norm": 0.17843876779079437, + "learning_rate": 4.903877753407602e-06, + "loss": 1.0584, + "step": 13280 + }, + { + "epoch": 0.09620187191904275, + "grad_norm": 0.18090327084064484, + "learning_rate": 4.903805366747017e-06, + "loss": 1.063, + "step": 13290 + }, + { + "epoch": 0.09627425857962894, + "grad_norm": 0.17758344113826752, + "learning_rate": 4.90373298008643e-06, + "loss": 1.0633, + "step": 13300 + }, + { + "epoch": 0.09634664524021513, + "grad_norm": 0.17629143595695496, + "learning_rate": 4.903660593425844e-06, + "loss": 1.0546, + "step": 13310 + }, + { + "epoch": 0.09641903190080132, + "grad_norm": 0.17930828034877777, + "learning_rate": 4.9035882067652576e-06, + "loss": 1.0679, + "step": 13320 + }, + { + "epoch": 0.0964914185613875, + "grad_norm": 0.22573702037334442, + "learning_rate": 4.903515820104672e-06, + "loss": 1.062, + "step": 13330 + }, + { + "epoch": 0.09656380522197369, + "grad_norm": 0.19880887866020203, + "learning_rate": 4.903443433444086e-06, + "loss": 1.0614, + "step": 13340 + }, + { + "epoch": 0.09663619188255988, + "grad_norm": 0.20379464328289032, + "learning_rate": 4.903371046783499e-06, + "loss": 1.0585, + "step": 13350 + }, + { + "epoch": 0.09670857854314607, + "grad_norm": 0.1923993080854416, + "learning_rate": 4.903298660122913e-06, + "loss": 1.0647, + "step": 13360 + }, + { + "epoch": 0.09678096520373225, + "grad_norm": 0.18714511394500732, + "learning_rate": 4.9032262734623265e-06, + "loss": 1.0439, + "step": 13370 + }, + { + "epoch": 0.09685335186431844, + "grad_norm": 0.20776358246803284, + "learning_rate": 4.90315388680174e-06, + "loss": 1.0717, + "step": 13380 + }, + { + "epoch": 0.09692573852490463, + "grad_norm": 0.1841840147972107, + "learning_rate": 4.903081500141154e-06, + "loss": 1.0716, + "step": 13390 + }, + { + "epoch": 0.09699812518549082, + "grad_norm": 0.1891954392194748, + "learning_rate": 4.903009113480568e-06, + "loss": 1.0572, + "step": 13400 + }, + { + "epoch": 0.097070511846077, + "grad_norm": 0.2061716616153717, + "learning_rate": 4.902936726819982e-06, + "loss": 1.0587, + "step": 13410 + }, + { + "epoch": 0.09714289850666319, + "grad_norm": 0.1809329241514206, + "learning_rate": 4.9028643401593954e-06, + "loss": 1.0772, + "step": 13420 + }, + { + "epoch": 0.09721528516724938, + "grad_norm": 0.18182168900966644, + "learning_rate": 4.902791953498809e-06, + "loss": 1.0707, + "step": 13430 + }, + { + "epoch": 0.09728767182783557, + "grad_norm": 0.16881795227527618, + "learning_rate": 4.9027195668382235e-06, + "loss": 1.0617, + "step": 13440 + }, + { + "epoch": 0.09736005848842175, + "grad_norm": 0.18087784945964813, + "learning_rate": 4.902647180177637e-06, + "loss": 1.0566, + "step": 13450 + }, + { + "epoch": 0.09743244514900794, + "grad_norm": 0.19071871042251587, + "learning_rate": 4.902574793517051e-06, + "loss": 1.0479, + "step": 13460 + }, + { + "epoch": 0.09750483180959413, + "grad_norm": 0.18914766609668732, + "learning_rate": 4.902502406856464e-06, + "loss": 1.0657, + "step": 13470 + }, + { + "epoch": 0.09757721847018032, + "grad_norm": 0.16709379851818085, + "learning_rate": 4.902430020195879e-06, + "loss": 1.0522, + "step": 13480 + }, + { + "epoch": 0.09764960513076651, + "grad_norm": 0.178353950381279, + "learning_rate": 4.9023576335352924e-06, + "loss": 1.0385, + "step": 13490 + }, + { + "epoch": 0.09772199179135269, + "grad_norm": 0.18520021438598633, + "learning_rate": 4.902285246874706e-06, + "loss": 1.0431, + "step": 13500 + }, + { + "epoch": 0.09779437845193888, + "grad_norm": 0.1669263392686844, + "learning_rate": 4.90221286021412e-06, + "loss": 1.0552, + "step": 13510 + }, + { + "epoch": 0.09786676511252507, + "grad_norm": 0.1864238828420639, + "learning_rate": 4.902140473553534e-06, + "loss": 1.0536, + "step": 13520 + }, + { + "epoch": 0.09793915177311126, + "grad_norm": 0.1777682602405548, + "learning_rate": 4.902068086892948e-06, + "loss": 1.0604, + "step": 13530 + }, + { + "epoch": 0.09801153843369743, + "grad_norm": 0.1826665848493576, + "learning_rate": 4.901995700232361e-06, + "loss": 1.0642, + "step": 13540 + }, + { + "epoch": 0.09808392509428362, + "grad_norm": 0.17385782301425934, + "learning_rate": 4.901923313571775e-06, + "loss": 1.0643, + "step": 13550 + }, + { + "epoch": 0.09815631175486982, + "grad_norm": 0.18327641487121582, + "learning_rate": 4.9018509269111894e-06, + "loss": 1.0438, + "step": 13560 + }, + { + "epoch": 0.098228698415456, + "grad_norm": 0.18838289380073547, + "learning_rate": 4.901778540250603e-06, + "loss": 1.0529, + "step": 13570 + }, + { + "epoch": 0.09830108507604218, + "grad_norm": 0.1839076280593872, + "learning_rate": 4.901706153590017e-06, + "loss": 1.0553, + "step": 13580 + }, + { + "epoch": 0.09837347173662837, + "grad_norm": 0.21451406180858612, + "learning_rate": 4.90163376692943e-06, + "loss": 1.0549, + "step": 13590 + }, + { + "epoch": 0.09844585839721456, + "grad_norm": 0.1959172636270523, + "learning_rate": 4.901561380268845e-06, + "loss": 1.0824, + "step": 13600 + }, + { + "epoch": 0.09851824505780075, + "grad_norm": 0.17677393555641174, + "learning_rate": 4.901488993608258e-06, + "loss": 1.0723, + "step": 13610 + }, + { + "epoch": 0.09859063171838693, + "grad_norm": 0.17665037512779236, + "learning_rate": 4.901416606947672e-06, + "loss": 1.0616, + "step": 13620 + }, + { + "epoch": 0.09866301837897312, + "grad_norm": 0.20014891028404236, + "learning_rate": 4.901344220287086e-06, + "loss": 1.0484, + "step": 13630 + }, + { + "epoch": 0.09873540503955931, + "grad_norm": 0.18088600039482117, + "learning_rate": 4.9012718336265e-06, + "loss": 1.0507, + "step": 13640 + }, + { + "epoch": 0.0988077917001455, + "grad_norm": 0.19113531708717346, + "learning_rate": 4.901199446965914e-06, + "loss": 1.063, + "step": 13650 + }, + { + "epoch": 0.09888017836073168, + "grad_norm": 0.18885605037212372, + "learning_rate": 4.901127060305327e-06, + "loss": 1.0525, + "step": 13660 + }, + { + "epoch": 0.09895256502131787, + "grad_norm": 0.21783652901649475, + "learning_rate": 4.901054673644741e-06, + "loss": 1.0417, + "step": 13670 + }, + { + "epoch": 0.09902495168190406, + "grad_norm": 0.17940004169940948, + "learning_rate": 4.900982286984155e-06, + "loss": 1.0507, + "step": 13680 + }, + { + "epoch": 0.09909733834249025, + "grad_norm": 0.18767113983631134, + "learning_rate": 4.900909900323569e-06, + "loss": 1.0595, + "step": 13690 + }, + { + "epoch": 0.09916972500307643, + "grad_norm": 0.21696849167346954, + "learning_rate": 4.900837513662983e-06, + "loss": 1.0526, + "step": 13700 + }, + { + "epoch": 0.09924211166366262, + "grad_norm": 0.18748457729816437, + "learning_rate": 4.900765127002396e-06, + "loss": 1.0464, + "step": 13710 + }, + { + "epoch": 0.09931449832424881, + "grad_norm": 0.16807325184345245, + "learning_rate": 4.900692740341811e-06, + "loss": 1.0509, + "step": 13720 + }, + { + "epoch": 0.099386884984835, + "grad_norm": 0.19087384641170502, + "learning_rate": 4.900620353681224e-06, + "loss": 1.0599, + "step": 13730 + }, + { + "epoch": 0.09945927164542118, + "grad_norm": 0.2038954794406891, + "learning_rate": 4.900547967020638e-06, + "loss": 1.064, + "step": 13740 + }, + { + "epoch": 0.09953165830600737, + "grad_norm": 0.18865635991096497, + "learning_rate": 4.9004755803600515e-06, + "loss": 1.0605, + "step": 13750 + }, + { + "epoch": 0.09960404496659356, + "grad_norm": 0.1682719886302948, + "learning_rate": 4.900403193699466e-06, + "loss": 1.053, + "step": 13760 + }, + { + "epoch": 0.09967643162717975, + "grad_norm": 0.17732466757297516, + "learning_rate": 4.90033080703888e-06, + "loss": 1.0541, + "step": 13770 + }, + { + "epoch": 0.09974881828776593, + "grad_norm": 0.18870952725410461, + "learning_rate": 4.900258420378293e-06, + "loss": 1.0635, + "step": 13780 + }, + { + "epoch": 0.09982120494835212, + "grad_norm": 0.17982196807861328, + "learning_rate": 4.900186033717707e-06, + "loss": 1.064, + "step": 13790 + }, + { + "epoch": 0.0998935916089383, + "grad_norm": 0.17786167562007904, + "learning_rate": 4.900113647057121e-06, + "loss": 1.0619, + "step": 13800 + }, + { + "epoch": 0.0999659782695245, + "grad_norm": 0.1713530421257019, + "learning_rate": 4.900041260396535e-06, + "loss": 1.051, + "step": 13810 + }, + { + "epoch": 0.10003836493011067, + "grad_norm": 0.17207489907741547, + "learning_rate": 4.8999688737359485e-06, + "loss": 1.0744, + "step": 13820 + }, + { + "epoch": 0.10011075159069686, + "grad_norm": 0.18473108112812042, + "learning_rate": 4.899896487075362e-06, + "loss": 1.0505, + "step": 13830 + }, + { + "epoch": 0.10018313825128305, + "grad_norm": 0.16173668205738068, + "learning_rate": 4.899824100414776e-06, + "loss": 1.054, + "step": 13840 + }, + { + "epoch": 0.10025552491186925, + "grad_norm": 0.18166185915470123, + "learning_rate": 4.89975171375419e-06, + "loss": 1.0566, + "step": 13850 + }, + { + "epoch": 0.10032791157245542, + "grad_norm": 0.1847759187221527, + "learning_rate": 4.899679327093604e-06, + "loss": 1.0589, + "step": 13860 + }, + { + "epoch": 0.10040029823304161, + "grad_norm": 0.23197191953659058, + "learning_rate": 4.8996069404330175e-06, + "loss": 1.0468, + "step": 13870 + }, + { + "epoch": 0.1004726848936278, + "grad_norm": 0.1927204728126526, + "learning_rate": 4.899534553772431e-06, + "loss": 1.0602, + "step": 13880 + }, + { + "epoch": 0.100545071554214, + "grad_norm": 0.18343763053417206, + "learning_rate": 4.8994621671118456e-06, + "loss": 1.0436, + "step": 13890 + }, + { + "epoch": 0.10061745821480017, + "grad_norm": 0.18282248079776764, + "learning_rate": 4.899389780451258e-06, + "loss": 1.0505, + "step": 13900 + }, + { + "epoch": 0.10068984487538636, + "grad_norm": 0.17692866921424866, + "learning_rate": 4.899317393790673e-06, + "loss": 1.0826, + "step": 13910 + }, + { + "epoch": 0.10076223153597255, + "grad_norm": 0.17446507513523102, + "learning_rate": 4.899245007130086e-06, + "loss": 1.06, + "step": 13920 + }, + { + "epoch": 0.10083461819655874, + "grad_norm": 0.1633169800043106, + "learning_rate": 4.8991726204695e-06, + "loss": 1.0358, + "step": 13930 + }, + { + "epoch": 0.10090700485714492, + "grad_norm": 0.17512056231498718, + "learning_rate": 4.899100233808914e-06, + "loss": 1.0552, + "step": 13940 + }, + { + "epoch": 0.10097939151773111, + "grad_norm": 0.1872796267271042, + "learning_rate": 4.899027847148328e-06, + "loss": 1.0467, + "step": 13950 + }, + { + "epoch": 0.1010517781783173, + "grad_norm": 0.17610350251197815, + "learning_rate": 4.898955460487742e-06, + "loss": 1.0536, + "step": 13960 + }, + { + "epoch": 0.10112416483890349, + "grad_norm": 0.16492041945457458, + "learning_rate": 4.898883073827155e-06, + "loss": 1.0724, + "step": 13970 + }, + { + "epoch": 0.10119655149948967, + "grad_norm": 0.16583912074565887, + "learning_rate": 4.898810687166569e-06, + "loss": 1.0532, + "step": 13980 + }, + { + "epoch": 0.10126893816007586, + "grad_norm": 0.20664115250110626, + "learning_rate": 4.8987383005059826e-06, + "loss": 1.0643, + "step": 13990 + }, + { + "epoch": 0.10134132482066205, + "grad_norm": 0.17064620554447174, + "learning_rate": 4.898665913845397e-06, + "loss": 1.042, + "step": 14000 + }, + { + "epoch": 0.10141371148124824, + "grad_norm": 0.17799104750156403, + "learning_rate": 4.898593527184811e-06, + "loss": 1.0575, + "step": 14010 + }, + { + "epoch": 0.10148609814183442, + "grad_norm": 0.16274908185005188, + "learning_rate": 4.898521140524224e-06, + "loss": 1.0514, + "step": 14020 + }, + { + "epoch": 0.1015584848024206, + "grad_norm": 0.1757798045873642, + "learning_rate": 4.898448753863638e-06, + "loss": 1.0512, + "step": 14030 + }, + { + "epoch": 0.1016308714630068, + "grad_norm": 0.18405269086360931, + "learning_rate": 4.898376367203052e-06, + "loss": 1.0545, + "step": 14040 + }, + { + "epoch": 0.10170325812359299, + "grad_norm": 0.1723915934562683, + "learning_rate": 4.898303980542466e-06, + "loss": 1.0563, + "step": 14050 + }, + { + "epoch": 0.10177564478417918, + "grad_norm": 0.18672221899032593, + "learning_rate": 4.8982315938818796e-06, + "loss": 1.0542, + "step": 14060 + }, + { + "epoch": 0.10184803144476536, + "grad_norm": 0.20487704873085022, + "learning_rate": 4.898159207221293e-06, + "loss": 1.0567, + "step": 14070 + }, + { + "epoch": 0.10192041810535155, + "grad_norm": 0.1787646859884262, + "learning_rate": 4.898086820560708e-06, + "loss": 1.0683, + "step": 14080 + }, + { + "epoch": 0.10199280476593774, + "grad_norm": 0.18574008345603943, + "learning_rate": 4.898014433900121e-06, + "loss": 1.0517, + "step": 14090 + }, + { + "epoch": 0.10206519142652393, + "grad_norm": 0.17313173413276672, + "learning_rate": 4.897942047239535e-06, + "loss": 1.0451, + "step": 14100 + }, + { + "epoch": 0.1021375780871101, + "grad_norm": 0.18771642446517944, + "learning_rate": 4.8978696605789485e-06, + "loss": 1.0599, + "step": 14110 + }, + { + "epoch": 0.1022099647476963, + "grad_norm": 0.22109109163284302, + "learning_rate": 4.897797273918363e-06, + "loss": 1.0622, + "step": 14120 + }, + { + "epoch": 0.10228235140828248, + "grad_norm": 0.1844063252210617, + "learning_rate": 4.8977248872577766e-06, + "loss": 1.0602, + "step": 14130 + }, + { + "epoch": 0.10235473806886868, + "grad_norm": 0.18542051315307617, + "learning_rate": 4.89765250059719e-06, + "loss": 1.0554, + "step": 14140 + }, + { + "epoch": 0.10242712472945485, + "grad_norm": 0.17631708085536957, + "learning_rate": 4.897580113936604e-06, + "loss": 1.053, + "step": 14150 + }, + { + "epoch": 0.10249951139004104, + "grad_norm": 0.17057740688323975, + "learning_rate": 4.897507727276018e-06, + "loss": 1.0535, + "step": 14160 + }, + { + "epoch": 0.10257189805062723, + "grad_norm": 0.17483165860176086, + "learning_rate": 4.897435340615432e-06, + "loss": 1.0433, + "step": 14170 + }, + { + "epoch": 0.10264428471121342, + "grad_norm": 0.18016666173934937, + "learning_rate": 4.8973629539548455e-06, + "loss": 1.031, + "step": 14180 + }, + { + "epoch": 0.1027166713717996, + "grad_norm": 0.1845754235982895, + "learning_rate": 4.897290567294259e-06, + "loss": 1.0498, + "step": 14190 + }, + { + "epoch": 0.10278905803238579, + "grad_norm": 0.1925055980682373, + "learning_rate": 4.897218180633674e-06, + "loss": 1.0588, + "step": 14200 + }, + { + "epoch": 0.10286144469297198, + "grad_norm": 0.19144387543201447, + "learning_rate": 4.897145793973087e-06, + "loss": 1.0514, + "step": 14210 + }, + { + "epoch": 0.10293383135355817, + "grad_norm": 0.1973779946565628, + "learning_rate": 4.897073407312501e-06, + "loss": 1.0468, + "step": 14220 + }, + { + "epoch": 0.10300621801414435, + "grad_norm": 0.16684280335903168, + "learning_rate": 4.8970010206519144e-06, + "loss": 1.0578, + "step": 14230 + }, + { + "epoch": 0.10307860467473054, + "grad_norm": 0.22712835669517517, + "learning_rate": 4.896928633991329e-06, + "loss": 1.055, + "step": 14240 + }, + { + "epoch": 0.10315099133531673, + "grad_norm": 0.189208522439003, + "learning_rate": 4.8968562473307425e-06, + "loss": 1.0338, + "step": 14250 + }, + { + "epoch": 0.10322337799590292, + "grad_norm": 0.18186455965042114, + "learning_rate": 4.896783860670156e-06, + "loss": 1.0497, + "step": 14260 + }, + { + "epoch": 0.1032957646564891, + "grad_norm": 0.1724977344274521, + "learning_rate": 4.89671147400957e-06, + "loss": 1.0475, + "step": 14270 + }, + { + "epoch": 0.10336815131707529, + "grad_norm": 0.1740335077047348, + "learning_rate": 4.896639087348984e-06, + "loss": 1.0406, + "step": 14280 + }, + { + "epoch": 0.10344053797766148, + "grad_norm": 0.21872451901435852, + "learning_rate": 4.896566700688398e-06, + "loss": 1.0514, + "step": 14290 + }, + { + "epoch": 0.10351292463824767, + "grad_norm": 0.18021239340305328, + "learning_rate": 4.8964943140278114e-06, + "loss": 1.0619, + "step": 14300 + }, + { + "epoch": 0.10358531129883385, + "grad_norm": 0.17084094882011414, + "learning_rate": 4.896421927367225e-06, + "loss": 1.0567, + "step": 14310 + }, + { + "epoch": 0.10365769795942004, + "grad_norm": 0.18873821198940277, + "learning_rate": 4.8963495407066395e-06, + "loss": 1.0565, + "step": 14320 + }, + { + "epoch": 0.10373008462000623, + "grad_norm": 0.16363616287708282, + "learning_rate": 4.896277154046053e-06, + "loss": 1.0444, + "step": 14330 + }, + { + "epoch": 0.10380247128059242, + "grad_norm": 0.20980127155780792, + "learning_rate": 4.896204767385467e-06, + "loss": 1.0515, + "step": 14340 + }, + { + "epoch": 0.1038748579411786, + "grad_norm": 0.17421722412109375, + "learning_rate": 4.89613238072488e-06, + "loss": 1.0513, + "step": 14350 + }, + { + "epoch": 0.10394724460176478, + "grad_norm": 0.20598512887954712, + "learning_rate": 4.896059994064295e-06, + "loss": 1.0549, + "step": 14360 + }, + { + "epoch": 0.10401963126235098, + "grad_norm": 0.1713586449623108, + "learning_rate": 4.8959876074037084e-06, + "loss": 1.0456, + "step": 14370 + }, + { + "epoch": 0.10409201792293717, + "grad_norm": 0.1770513504743576, + "learning_rate": 4.895915220743122e-06, + "loss": 1.0492, + "step": 14380 + }, + { + "epoch": 0.10416440458352334, + "grad_norm": 0.18118791282176971, + "learning_rate": 4.895842834082536e-06, + "loss": 1.0547, + "step": 14390 + }, + { + "epoch": 0.10423679124410953, + "grad_norm": 0.18534211814403534, + "learning_rate": 4.89577044742195e-06, + "loss": 1.057, + "step": 14400 + }, + { + "epoch": 0.10430917790469572, + "grad_norm": 0.20665541291236877, + "learning_rate": 4.895698060761364e-06, + "loss": 1.0455, + "step": 14410 + }, + { + "epoch": 0.10438156456528191, + "grad_norm": 0.17217250168323517, + "learning_rate": 4.895625674100777e-06, + "loss": 1.0524, + "step": 14420 + }, + { + "epoch": 0.10445395122586809, + "grad_norm": 0.1958875209093094, + "learning_rate": 4.895553287440191e-06, + "loss": 1.0457, + "step": 14430 + }, + { + "epoch": 0.10452633788645428, + "grad_norm": 0.18812295794487, + "learning_rate": 4.895480900779605e-06, + "loss": 1.0523, + "step": 14440 + }, + { + "epoch": 0.10459872454704047, + "grad_norm": 0.16309994459152222, + "learning_rate": 4.895408514119018e-06, + "loss": 1.0446, + "step": 14450 + }, + { + "epoch": 0.10467111120762666, + "grad_norm": 0.1937844455242157, + "learning_rate": 4.895336127458432e-06, + "loss": 1.0534, + "step": 14460 + }, + { + "epoch": 0.10474349786821284, + "grad_norm": 0.17462307214736938, + "learning_rate": 4.895263740797846e-06, + "loss": 1.0362, + "step": 14470 + }, + { + "epoch": 0.10481588452879903, + "grad_norm": 0.17586156725883484, + "learning_rate": 4.89519135413726e-06, + "loss": 1.034, + "step": 14480 + }, + { + "epoch": 0.10488827118938522, + "grad_norm": 0.1698175072669983, + "learning_rate": 4.8951189674766735e-06, + "loss": 1.0439, + "step": 14490 + }, + { + "epoch": 0.10496065784997141, + "grad_norm": 0.18947486579418182, + "learning_rate": 4.895046580816087e-06, + "loss": 1.0507, + "step": 14500 + }, + { + "epoch": 0.10503304451055759, + "grad_norm": 0.17008043825626373, + "learning_rate": 4.894974194155502e-06, + "loss": 1.0368, + "step": 14510 + }, + { + "epoch": 0.10510543117114378, + "grad_norm": 0.18763844668865204, + "learning_rate": 4.894901807494915e-06, + "loss": 1.0634, + "step": 14520 + }, + { + "epoch": 0.10517781783172997, + "grad_norm": 0.18135419487953186, + "learning_rate": 4.894829420834329e-06, + "loss": 1.0502, + "step": 14530 + }, + { + "epoch": 0.10525020449231616, + "grad_norm": 0.18853351473808289, + "learning_rate": 4.8947570341737425e-06, + "loss": 1.0538, + "step": 14540 + }, + { + "epoch": 0.10532259115290234, + "grad_norm": 0.17148560285568237, + "learning_rate": 4.894684647513157e-06, + "loss": 1.0524, + "step": 14550 + }, + { + "epoch": 0.10539497781348853, + "grad_norm": 0.18300309777259827, + "learning_rate": 4.8946122608525705e-06, + "loss": 1.0528, + "step": 14560 + }, + { + "epoch": 0.10546736447407472, + "grad_norm": 0.17703156173229218, + "learning_rate": 4.894539874191984e-06, + "loss": 1.057, + "step": 14570 + }, + { + "epoch": 0.10553975113466091, + "grad_norm": 0.18114346265792847, + "learning_rate": 4.894467487531398e-06, + "loss": 1.0477, + "step": 14580 + }, + { + "epoch": 0.10561213779524709, + "grad_norm": 0.18363863229751587, + "learning_rate": 4.894395100870812e-06, + "loss": 1.0485, + "step": 14590 + }, + { + "epoch": 0.10568452445583328, + "grad_norm": 0.17243093252182007, + "learning_rate": 4.894322714210226e-06, + "loss": 1.0512, + "step": 14600 + }, + { + "epoch": 0.10575691111641947, + "grad_norm": 0.16274864971637726, + "learning_rate": 4.8942503275496395e-06, + "loss": 1.0321, + "step": 14610 + }, + { + "epoch": 0.10582929777700566, + "grad_norm": 0.1914278268814087, + "learning_rate": 4.894177940889053e-06, + "loss": 1.0623, + "step": 14620 + }, + { + "epoch": 0.10590168443759183, + "grad_norm": 0.187461256980896, + "learning_rate": 4.894105554228467e-06, + "loss": 1.0426, + "step": 14630 + }, + { + "epoch": 0.10597407109817802, + "grad_norm": 0.17549605667591095, + "learning_rate": 4.894033167567881e-06, + "loss": 1.0341, + "step": 14640 + }, + { + "epoch": 0.10604645775876421, + "grad_norm": 0.17880699038505554, + "learning_rate": 4.893960780907295e-06, + "loss": 1.0549, + "step": 14650 + }, + { + "epoch": 0.1061188444193504, + "grad_norm": 0.17565539479255676, + "learning_rate": 4.893888394246708e-06, + "loss": 1.048, + "step": 14660 + }, + { + "epoch": 0.1061912310799366, + "grad_norm": 0.17139199376106262, + "learning_rate": 4.893816007586122e-06, + "loss": 1.0557, + "step": 14670 + }, + { + "epoch": 0.10626361774052277, + "grad_norm": 0.18786922097206116, + "learning_rate": 4.8937436209255365e-06, + "loss": 1.059, + "step": 14680 + }, + { + "epoch": 0.10633600440110896, + "grad_norm": 0.18301083147525787, + "learning_rate": 4.89367123426495e-06, + "loss": 1.0397, + "step": 14690 + }, + { + "epoch": 0.10640839106169515, + "grad_norm": 0.1840226799249649, + "learning_rate": 4.893598847604364e-06, + "loss": 1.0548, + "step": 14700 + }, + { + "epoch": 0.10648077772228134, + "grad_norm": 0.18379652500152588, + "learning_rate": 4.893526460943777e-06, + "loss": 1.0441, + "step": 14710 + }, + { + "epoch": 0.10655316438286752, + "grad_norm": 0.17776694893836975, + "learning_rate": 4.893454074283192e-06, + "loss": 1.0594, + "step": 14720 + }, + { + "epoch": 0.10662555104345371, + "grad_norm": 0.17674773931503296, + "learning_rate": 4.893381687622605e-06, + "loss": 1.0594, + "step": 14730 + }, + { + "epoch": 0.1066979377040399, + "grad_norm": 0.17039507627487183, + "learning_rate": 4.893309300962019e-06, + "loss": 1.0525, + "step": 14740 + }, + { + "epoch": 0.10677032436462609, + "grad_norm": 0.16657809913158417, + "learning_rate": 4.893236914301433e-06, + "loss": 1.0669, + "step": 14750 + }, + { + "epoch": 0.10684271102521227, + "grad_norm": 0.17446079850196838, + "learning_rate": 4.893164527640847e-06, + "loss": 1.0464, + "step": 14760 + }, + { + "epoch": 0.10691509768579846, + "grad_norm": 0.16530625522136688, + "learning_rate": 4.893092140980261e-06, + "loss": 1.0426, + "step": 14770 + }, + { + "epoch": 0.10698748434638465, + "grad_norm": 0.180899515748024, + "learning_rate": 4.893019754319674e-06, + "loss": 1.0426, + "step": 14780 + }, + { + "epoch": 0.10705987100697084, + "grad_norm": 0.18140670657157898, + "learning_rate": 4.892947367659088e-06, + "loss": 1.0468, + "step": 14790 + }, + { + "epoch": 0.10713225766755702, + "grad_norm": 0.1905767023563385, + "learning_rate": 4.892874980998502e-06, + "loss": 1.0518, + "step": 14800 + }, + { + "epoch": 0.10720464432814321, + "grad_norm": 0.17752863466739655, + "learning_rate": 4.892802594337916e-06, + "loss": 1.048, + "step": 14810 + }, + { + "epoch": 0.1072770309887294, + "grad_norm": 0.16803748905658722, + "learning_rate": 4.89273020767733e-06, + "loss": 1.0472, + "step": 14820 + }, + { + "epoch": 0.10734941764931559, + "grad_norm": 0.17945846915245056, + "learning_rate": 4.892657821016743e-06, + "loss": 1.0396, + "step": 14830 + }, + { + "epoch": 0.10742180430990177, + "grad_norm": 0.19865509867668152, + "learning_rate": 4.892585434356158e-06, + "loss": 1.0592, + "step": 14840 + }, + { + "epoch": 0.10749419097048796, + "grad_norm": 0.18445666134357452, + "learning_rate": 4.892513047695571e-06, + "loss": 1.0411, + "step": 14850 + }, + { + "epoch": 0.10756657763107415, + "grad_norm": 0.1645526885986328, + "learning_rate": 4.892440661034985e-06, + "loss": 1.0588, + "step": 14860 + }, + { + "epoch": 0.10763896429166034, + "grad_norm": 0.1656254678964615, + "learning_rate": 4.8923682743743986e-06, + "loss": 1.0547, + "step": 14870 + }, + { + "epoch": 0.10771135095224652, + "grad_norm": 0.1796533614397049, + "learning_rate": 4.892295887713813e-06, + "loss": 1.0504, + "step": 14880 + }, + { + "epoch": 0.1077837376128327, + "grad_norm": 0.17627054452896118, + "learning_rate": 4.892223501053227e-06, + "loss": 1.064, + "step": 14890 + }, + { + "epoch": 0.1078561242734189, + "grad_norm": 0.1647876799106598, + "learning_rate": 4.89215111439264e-06, + "loss": 1.0305, + "step": 14900 + }, + { + "epoch": 0.10792851093400509, + "grad_norm": 0.17485207319259644, + "learning_rate": 4.892078727732054e-06, + "loss": 1.058, + "step": 14910 + }, + { + "epoch": 0.10800089759459126, + "grad_norm": 0.18584197759628296, + "learning_rate": 4.892006341071468e-06, + "loss": 1.0459, + "step": 14920 + }, + { + "epoch": 0.10807328425517745, + "grad_norm": 0.17022201418876648, + "learning_rate": 4.891933954410882e-06, + "loss": 1.0758, + "step": 14930 + }, + { + "epoch": 0.10814567091576364, + "grad_norm": 0.159866064786911, + "learning_rate": 4.891861567750296e-06, + "loss": 1.0397, + "step": 14940 + }, + { + "epoch": 0.10821805757634984, + "grad_norm": 0.16421310603618622, + "learning_rate": 4.891789181089709e-06, + "loss": 1.0472, + "step": 14950 + }, + { + "epoch": 0.10829044423693601, + "grad_norm": 0.17276668548583984, + "learning_rate": 4.891716794429123e-06, + "loss": 1.0433, + "step": 14960 + }, + { + "epoch": 0.1083628308975222, + "grad_norm": 0.1813632696866989, + "learning_rate": 4.8916444077685364e-06, + "loss": 1.0355, + "step": 14970 + }, + { + "epoch": 0.1084352175581084, + "grad_norm": 0.16496697068214417, + "learning_rate": 4.89157202110795e-06, + "loss": 1.0368, + "step": 14980 + }, + { + "epoch": 0.10850760421869458, + "grad_norm": 0.1636180728673935, + "learning_rate": 4.8914996344473645e-06, + "loss": 1.042, + "step": 14990 + }, + { + "epoch": 0.10857999087928076, + "grad_norm": 0.1825898438692093, + "learning_rate": 4.891427247786778e-06, + "loss": 1.0519, + "step": 15000 + }, + { + "epoch": 0.10865237753986695, + "grad_norm": 0.19418931007385254, + "learning_rate": 4.891354861126192e-06, + "loss": 1.0613, + "step": 15010 + }, + { + "epoch": 0.10872476420045314, + "grad_norm": 0.1902722269296646, + "learning_rate": 4.891282474465605e-06, + "loss": 1.0433, + "step": 15020 + }, + { + "epoch": 0.10879715086103933, + "grad_norm": 0.1725703924894333, + "learning_rate": 4.89121008780502e-06, + "loss": 1.0537, + "step": 15030 + }, + { + "epoch": 0.10886953752162551, + "grad_norm": 0.2096461057662964, + "learning_rate": 4.8911377011444334e-06, + "loss": 1.0416, + "step": 15040 + }, + { + "epoch": 0.1089419241822117, + "grad_norm": 0.1624659150838852, + "learning_rate": 4.891065314483847e-06, + "loss": 1.0572, + "step": 15050 + }, + { + "epoch": 0.10901431084279789, + "grad_norm": 0.1570434719324112, + "learning_rate": 4.890992927823261e-06, + "loss": 1.0396, + "step": 15060 + }, + { + "epoch": 0.10908669750338408, + "grad_norm": 0.17029106616973877, + "learning_rate": 4.890920541162675e-06, + "loss": 1.0596, + "step": 15070 + }, + { + "epoch": 0.10915908416397026, + "grad_norm": 0.16916610300540924, + "learning_rate": 4.890848154502089e-06, + "loss": 1.0534, + "step": 15080 + }, + { + "epoch": 0.10923147082455645, + "grad_norm": 0.18326231837272644, + "learning_rate": 4.890775767841502e-06, + "loss": 1.0517, + "step": 15090 + }, + { + "epoch": 0.10930385748514264, + "grad_norm": 0.2638685405254364, + "learning_rate": 4.890703381180916e-06, + "loss": 1.0556, + "step": 15100 + }, + { + "epoch": 0.10937624414572883, + "grad_norm": 0.17363910377025604, + "learning_rate": 4.8906309945203304e-06, + "loss": 1.0463, + "step": 15110 + }, + { + "epoch": 0.109448630806315, + "grad_norm": 0.17666375637054443, + "learning_rate": 4.890558607859744e-06, + "loss": 1.0603, + "step": 15120 + }, + { + "epoch": 0.1095210174669012, + "grad_norm": 0.16066348552703857, + "learning_rate": 4.890486221199158e-06, + "loss": 1.0578, + "step": 15130 + }, + { + "epoch": 0.10959340412748739, + "grad_norm": 0.18254084885120392, + "learning_rate": 4.890413834538571e-06, + "loss": 1.0439, + "step": 15140 + }, + { + "epoch": 0.10966579078807358, + "grad_norm": 0.18753020465373993, + "learning_rate": 4.890341447877986e-06, + "loss": 1.0529, + "step": 15150 + }, + { + "epoch": 0.10973817744865975, + "grad_norm": 0.21900314092636108, + "learning_rate": 4.890269061217399e-06, + "loss": 1.0389, + "step": 15160 + }, + { + "epoch": 0.10981056410924595, + "grad_norm": 0.17943327128887177, + "learning_rate": 4.890196674556813e-06, + "loss": 1.0558, + "step": 15170 + }, + { + "epoch": 0.10988295076983214, + "grad_norm": 0.16995948553085327, + "learning_rate": 4.890124287896227e-06, + "loss": 1.0474, + "step": 15180 + }, + { + "epoch": 0.10995533743041833, + "grad_norm": 0.17363294959068298, + "learning_rate": 4.890051901235641e-06, + "loss": 1.0629, + "step": 15190 + }, + { + "epoch": 0.1100277240910045, + "grad_norm": 0.1756804883480072, + "learning_rate": 4.889979514575055e-06, + "loss": 1.0547, + "step": 15200 + }, + { + "epoch": 0.1101001107515907, + "grad_norm": 0.1712425798177719, + "learning_rate": 4.889907127914468e-06, + "loss": 1.047, + "step": 15210 + }, + { + "epoch": 0.11017249741217688, + "grad_norm": 0.1906772404909134, + "learning_rate": 4.889834741253882e-06, + "loss": 1.0554, + "step": 15220 + }, + { + "epoch": 0.11024488407276307, + "grad_norm": 0.1886027455329895, + "learning_rate": 4.8897623545932955e-06, + "loss": 1.0429, + "step": 15230 + }, + { + "epoch": 0.11031727073334927, + "grad_norm": 0.17662407457828522, + "learning_rate": 4.88968996793271e-06, + "loss": 1.0659, + "step": 15240 + }, + { + "epoch": 0.11038965739393544, + "grad_norm": 0.1786973476409912, + "learning_rate": 4.889617581272124e-06, + "loss": 1.0657, + "step": 15250 + }, + { + "epoch": 0.11046204405452163, + "grad_norm": 0.18255244195461273, + "learning_rate": 4.889545194611537e-06, + "loss": 1.0411, + "step": 15260 + }, + { + "epoch": 0.11053443071510782, + "grad_norm": 0.18926455080509186, + "learning_rate": 4.889472807950951e-06, + "loss": 1.0492, + "step": 15270 + }, + { + "epoch": 0.11060681737569401, + "grad_norm": 0.17178624868392944, + "learning_rate": 4.889400421290365e-06, + "loss": 1.0424, + "step": 15280 + }, + { + "epoch": 0.11067920403628019, + "grad_norm": 0.17576183378696442, + "learning_rate": 4.889328034629779e-06, + "loss": 1.0464, + "step": 15290 + }, + { + "epoch": 0.11075159069686638, + "grad_norm": 0.19050145149230957, + "learning_rate": 4.8892556479691925e-06, + "loss": 1.0531, + "step": 15300 + }, + { + "epoch": 0.11082397735745257, + "grad_norm": 0.17656132578849792, + "learning_rate": 4.889183261308606e-06, + "loss": 1.0312, + "step": 15310 + }, + { + "epoch": 0.11089636401803876, + "grad_norm": 0.20985843241214752, + "learning_rate": 4.889110874648021e-06, + "loss": 1.0405, + "step": 15320 + }, + { + "epoch": 0.11096875067862494, + "grad_norm": 0.17128346860408783, + "learning_rate": 4.889038487987434e-06, + "loss": 1.0542, + "step": 15330 + }, + { + "epoch": 0.11104113733921113, + "grad_norm": 0.18605564534664154, + "learning_rate": 4.888966101326848e-06, + "loss": 1.0492, + "step": 15340 + }, + { + "epoch": 0.11111352399979732, + "grad_norm": 0.17718972265720367, + "learning_rate": 4.8888937146662615e-06, + "loss": 1.0379, + "step": 15350 + }, + { + "epoch": 0.11118591066038351, + "grad_norm": 0.263354629278183, + "learning_rate": 4.888821328005676e-06, + "loss": 1.0562, + "step": 15360 + }, + { + "epoch": 0.11125829732096969, + "grad_norm": 0.2226659655570984, + "learning_rate": 4.8887489413450895e-06, + "loss": 1.0404, + "step": 15370 + }, + { + "epoch": 0.11133068398155588, + "grad_norm": 0.18347443640232086, + "learning_rate": 4.888676554684503e-06, + "loss": 1.0458, + "step": 15380 + }, + { + "epoch": 0.11140307064214207, + "grad_norm": 0.2046961486339569, + "learning_rate": 4.888604168023917e-06, + "loss": 1.0678, + "step": 15390 + }, + { + "epoch": 0.11147545730272826, + "grad_norm": 0.18448476493358612, + "learning_rate": 4.888531781363331e-06, + "loss": 1.0627, + "step": 15400 + }, + { + "epoch": 0.11154784396331444, + "grad_norm": 0.17062750458717346, + "learning_rate": 4.888459394702745e-06, + "loss": 1.0404, + "step": 15410 + }, + { + "epoch": 0.11162023062390063, + "grad_norm": 0.18066982924938202, + "learning_rate": 4.8883870080421585e-06, + "loss": 1.0386, + "step": 15420 + }, + { + "epoch": 0.11169261728448682, + "grad_norm": 0.18624179065227509, + "learning_rate": 4.888314621381572e-06, + "loss": 1.0547, + "step": 15430 + }, + { + "epoch": 0.11176500394507301, + "grad_norm": 0.23639068007469177, + "learning_rate": 4.8882422347209866e-06, + "loss": 1.0424, + "step": 15440 + }, + { + "epoch": 0.11183739060565918, + "grad_norm": 0.2034512162208557, + "learning_rate": 4.8881698480604e-06, + "loss": 1.0349, + "step": 15450 + }, + { + "epoch": 0.11190977726624537, + "grad_norm": 0.1779569387435913, + "learning_rate": 4.888097461399814e-06, + "loss": 1.0517, + "step": 15460 + }, + { + "epoch": 0.11198216392683157, + "grad_norm": 0.1710672378540039, + "learning_rate": 4.888025074739227e-06, + "loss": 1.0476, + "step": 15470 + }, + { + "epoch": 0.11205455058741776, + "grad_norm": 0.1749650537967682, + "learning_rate": 4.887952688078642e-06, + "loss": 1.0481, + "step": 15480 + }, + { + "epoch": 0.11212693724800393, + "grad_norm": 0.17475520074367523, + "learning_rate": 4.887880301418055e-06, + "loss": 1.0611, + "step": 15490 + }, + { + "epoch": 0.11219932390859012, + "grad_norm": 0.16181065142154694, + "learning_rate": 4.887807914757468e-06, + "loss": 1.0518, + "step": 15500 + }, + { + "epoch": 0.11227171056917631, + "grad_norm": 0.2130517065525055, + "learning_rate": 4.887735528096883e-06, + "loss": 1.0441, + "step": 15510 + }, + { + "epoch": 0.1123440972297625, + "grad_norm": 0.18250152468681335, + "learning_rate": 4.887663141436296e-06, + "loss": 1.0524, + "step": 15520 + }, + { + "epoch": 0.11241648389034868, + "grad_norm": 0.17363341152668, + "learning_rate": 4.88759075477571e-06, + "loss": 1.0465, + "step": 15530 + }, + { + "epoch": 0.11248887055093487, + "grad_norm": 0.17302308976650238, + "learning_rate": 4.8875183681151236e-06, + "loss": 1.0467, + "step": 15540 + }, + { + "epoch": 0.11256125721152106, + "grad_norm": 0.18607738614082336, + "learning_rate": 4.887445981454538e-06, + "loss": 1.0449, + "step": 15550 + }, + { + "epoch": 0.11263364387210725, + "grad_norm": 0.2096407413482666, + "learning_rate": 4.887373594793952e-06, + "loss": 1.0541, + "step": 15560 + }, + { + "epoch": 0.11270603053269343, + "grad_norm": 0.17433767020702362, + "learning_rate": 4.887301208133365e-06, + "loss": 1.052, + "step": 15570 + }, + { + "epoch": 0.11277841719327962, + "grad_norm": 0.16963066160678864, + "learning_rate": 4.887228821472779e-06, + "loss": 1.0411, + "step": 15580 + }, + { + "epoch": 0.11285080385386581, + "grad_norm": 0.19870556890964508, + "learning_rate": 4.887156434812193e-06, + "loss": 1.0467, + "step": 15590 + }, + { + "epoch": 0.112923190514452, + "grad_norm": 0.17876464128494263, + "learning_rate": 4.887084048151607e-06, + "loss": 1.0401, + "step": 15600 + }, + { + "epoch": 0.11299557717503818, + "grad_norm": 0.15952976047992706, + "learning_rate": 4.8870116614910206e-06, + "loss": 1.0339, + "step": 15610 + }, + { + "epoch": 0.11306796383562437, + "grad_norm": 0.17333056032657623, + "learning_rate": 4.886939274830434e-06, + "loss": 1.0465, + "step": 15620 + }, + { + "epoch": 0.11314035049621056, + "grad_norm": 0.1765410602092743, + "learning_rate": 4.886866888169849e-06, + "loss": 1.0402, + "step": 15630 + }, + { + "epoch": 0.11321273715679675, + "grad_norm": 0.17773103713989258, + "learning_rate": 4.886794501509262e-06, + "loss": 1.0483, + "step": 15640 + }, + { + "epoch": 0.11328512381738293, + "grad_norm": 0.18877708911895752, + "learning_rate": 4.886722114848676e-06, + "loss": 1.0528, + "step": 15650 + }, + { + "epoch": 0.11335751047796912, + "grad_norm": 0.20413969457149506, + "learning_rate": 4.8866497281880895e-06, + "loss": 1.0667, + "step": 15660 + }, + { + "epoch": 0.11342989713855531, + "grad_norm": 0.16380807757377625, + "learning_rate": 4.886577341527504e-06, + "loss": 1.0583, + "step": 15670 + }, + { + "epoch": 0.1135022837991415, + "grad_norm": 0.17399290204048157, + "learning_rate": 4.886504954866918e-06, + "loss": 1.0675, + "step": 15680 + }, + { + "epoch": 0.11357467045972768, + "grad_norm": 0.18681904673576355, + "learning_rate": 4.886432568206331e-06, + "loss": 1.0452, + "step": 15690 + }, + { + "epoch": 0.11364705712031387, + "grad_norm": 0.15756523609161377, + "learning_rate": 4.886360181545745e-06, + "loss": 1.0391, + "step": 15700 + }, + { + "epoch": 0.11371944378090006, + "grad_norm": 0.18316251039505005, + "learning_rate": 4.886287794885159e-06, + "loss": 1.0474, + "step": 15710 + }, + { + "epoch": 0.11379183044148625, + "grad_norm": 0.19340310990810394, + "learning_rate": 4.886215408224573e-06, + "loss": 1.0438, + "step": 15720 + }, + { + "epoch": 0.11386421710207242, + "grad_norm": 0.16754977405071259, + "learning_rate": 4.8861430215639865e-06, + "loss": 1.0435, + "step": 15730 + }, + { + "epoch": 0.11393660376265861, + "grad_norm": 0.16273082792758942, + "learning_rate": 4.8860706349034e-06, + "loss": 1.0434, + "step": 15740 + }, + { + "epoch": 0.1140089904232448, + "grad_norm": 0.19182349741458893, + "learning_rate": 4.885998248242815e-06, + "loss": 1.0516, + "step": 15750 + }, + { + "epoch": 0.114081377083831, + "grad_norm": 0.2044076770544052, + "learning_rate": 4.885925861582228e-06, + "loss": 1.042, + "step": 15760 + }, + { + "epoch": 0.11415376374441717, + "grad_norm": 0.17020849883556366, + "learning_rate": 4.885853474921642e-06, + "loss": 1.0367, + "step": 15770 + }, + { + "epoch": 0.11422615040500336, + "grad_norm": 0.17558561265468597, + "learning_rate": 4.8857810882610554e-06, + "loss": 1.0528, + "step": 15780 + }, + { + "epoch": 0.11429853706558955, + "grad_norm": 0.20141629874706268, + "learning_rate": 4.88570870160047e-06, + "loss": 1.0485, + "step": 15790 + }, + { + "epoch": 0.11437092372617574, + "grad_norm": 0.17599183320999146, + "learning_rate": 4.8856363149398835e-06, + "loss": 1.0499, + "step": 15800 + }, + { + "epoch": 0.11444331038676192, + "grad_norm": 0.17712847888469696, + "learning_rate": 4.885563928279297e-06, + "loss": 1.0386, + "step": 15810 + }, + { + "epoch": 0.11451569704734811, + "grad_norm": 0.19977372884750366, + "learning_rate": 4.885491541618711e-06, + "loss": 1.0372, + "step": 15820 + }, + { + "epoch": 0.1145880837079343, + "grad_norm": 0.17353418469429016, + "learning_rate": 4.885419154958125e-06, + "loss": 1.0647, + "step": 15830 + }, + { + "epoch": 0.11466047036852049, + "grad_norm": 0.1808134913444519, + "learning_rate": 4.885346768297539e-06, + "loss": 1.0505, + "step": 15840 + }, + { + "epoch": 0.11473285702910668, + "grad_norm": 0.1848040372133255, + "learning_rate": 4.8852743816369524e-06, + "loss": 1.0363, + "step": 15850 + }, + { + "epoch": 0.11480524368969286, + "grad_norm": 0.1654992550611496, + "learning_rate": 4.885201994976366e-06, + "loss": 1.0361, + "step": 15860 + }, + { + "epoch": 0.11487763035027905, + "grad_norm": 0.17101933062076569, + "learning_rate": 4.88512960831578e-06, + "loss": 1.0407, + "step": 15870 + }, + { + "epoch": 0.11495001701086524, + "grad_norm": 0.2501929998397827, + "learning_rate": 4.885057221655194e-06, + "loss": 1.0494, + "step": 15880 + }, + { + "epoch": 0.11502240367145143, + "grad_norm": 0.17349718511104584, + "learning_rate": 4.884984834994608e-06, + "loss": 1.0417, + "step": 15890 + }, + { + "epoch": 0.11509479033203761, + "grad_norm": 0.20508350431919098, + "learning_rate": 4.884912448334021e-06, + "loss": 1.0481, + "step": 15900 + }, + { + "epoch": 0.1151671769926238, + "grad_norm": 0.17247234284877777, + "learning_rate": 4.884840061673435e-06, + "loss": 1.0362, + "step": 15910 + }, + { + "epoch": 0.11523956365320999, + "grad_norm": 0.16966472566127777, + "learning_rate": 4.8847676750128495e-06, + "loss": 1.0501, + "step": 15920 + }, + { + "epoch": 0.11531195031379618, + "grad_norm": 0.18211546540260315, + "learning_rate": 4.884695288352263e-06, + "loss": 1.0437, + "step": 15930 + }, + { + "epoch": 0.11538433697438236, + "grad_norm": 0.1784241944551468, + "learning_rate": 4.884622901691677e-06, + "loss": 1.0439, + "step": 15940 + }, + { + "epoch": 0.11545672363496855, + "grad_norm": 0.18033133447170258, + "learning_rate": 4.88455051503109e-06, + "loss": 1.0441, + "step": 15950 + }, + { + "epoch": 0.11552911029555474, + "grad_norm": 0.1717543601989746, + "learning_rate": 4.884478128370505e-06, + "loss": 1.032, + "step": 15960 + }, + { + "epoch": 0.11560149695614093, + "grad_norm": 0.18930160999298096, + "learning_rate": 4.884405741709918e-06, + "loss": 1.0425, + "step": 15970 + }, + { + "epoch": 0.1156738836167271, + "grad_norm": 0.17767852544784546, + "learning_rate": 4.884333355049332e-06, + "loss": 1.039, + "step": 15980 + }, + { + "epoch": 0.1157462702773133, + "grad_norm": 0.20390869677066803, + "learning_rate": 4.884260968388746e-06, + "loss": 1.0554, + "step": 15990 + }, + { + "epoch": 0.11581865693789949, + "grad_norm": 0.17186413705348969, + "learning_rate": 4.88418858172816e-06, + "loss": 1.0445, + "step": 16000 + }, + { + "epoch": 0.11589104359848568, + "grad_norm": 0.17237228155136108, + "learning_rate": 4.884116195067574e-06, + "loss": 1.0376, + "step": 16010 + }, + { + "epoch": 0.11596343025907185, + "grad_norm": 0.17973795533180237, + "learning_rate": 4.884043808406987e-06, + "loss": 1.0304, + "step": 16020 + }, + { + "epoch": 0.11603581691965804, + "grad_norm": 0.16578522324562073, + "learning_rate": 4.883971421746401e-06, + "loss": 1.0475, + "step": 16030 + }, + { + "epoch": 0.11610820358024423, + "grad_norm": 0.16535358130931854, + "learning_rate": 4.8838990350858145e-06, + "loss": 1.0501, + "step": 16040 + }, + { + "epoch": 0.11618059024083043, + "grad_norm": 0.18786129355430603, + "learning_rate": 4.883826648425228e-06, + "loss": 1.05, + "step": 16050 + }, + { + "epoch": 0.1162529769014166, + "grad_norm": 0.19526006281375885, + "learning_rate": 4.883754261764642e-06, + "loss": 1.0365, + "step": 16060 + }, + { + "epoch": 0.11632536356200279, + "grad_norm": 0.166265606880188, + "learning_rate": 4.883681875104056e-06, + "loss": 1.0481, + "step": 16070 + }, + { + "epoch": 0.11639775022258898, + "grad_norm": 0.18460646271705627, + "learning_rate": 4.88360948844347e-06, + "loss": 1.0422, + "step": 16080 + }, + { + "epoch": 0.11647013688317517, + "grad_norm": 0.16810445487499237, + "learning_rate": 4.8835371017828835e-06, + "loss": 1.0535, + "step": 16090 + }, + { + "epoch": 0.11654252354376135, + "grad_norm": 0.469312459230423, + "learning_rate": 4.883464715122297e-06, + "loss": 1.0527, + "step": 16100 + }, + { + "epoch": 0.11661491020434754, + "grad_norm": 0.1745699644088745, + "learning_rate": 4.8833923284617115e-06, + "loss": 1.0331, + "step": 16110 + }, + { + "epoch": 0.11668729686493373, + "grad_norm": 0.17050036787986755, + "learning_rate": 4.883319941801125e-06, + "loss": 1.0449, + "step": 16120 + }, + { + "epoch": 0.11675968352551992, + "grad_norm": 0.18037088215351105, + "learning_rate": 4.883247555140539e-06, + "loss": 1.044, + "step": 16130 + }, + { + "epoch": 0.1168320701861061, + "grad_norm": 0.23608429729938507, + "learning_rate": 4.883175168479952e-06, + "loss": 1.0464, + "step": 16140 + }, + { + "epoch": 0.11690445684669229, + "grad_norm": 0.175536647439003, + "learning_rate": 4.883102781819367e-06, + "loss": 1.051, + "step": 16150 + }, + { + "epoch": 0.11697684350727848, + "grad_norm": 0.17264996469020844, + "learning_rate": 4.8830303951587805e-06, + "loss": 1.0428, + "step": 16160 + }, + { + "epoch": 0.11704923016786467, + "grad_norm": 0.17153437435626984, + "learning_rate": 4.882958008498194e-06, + "loss": 1.035, + "step": 16170 + }, + { + "epoch": 0.11712161682845085, + "grad_norm": 0.16962124407291412, + "learning_rate": 4.882885621837608e-06, + "loss": 1.0428, + "step": 16180 + }, + { + "epoch": 0.11719400348903704, + "grad_norm": 0.17661970853805542, + "learning_rate": 4.882813235177022e-06, + "loss": 1.0442, + "step": 16190 + }, + { + "epoch": 0.11726639014962323, + "grad_norm": 0.1694341003894806, + "learning_rate": 4.882740848516436e-06, + "loss": 1.0409, + "step": 16200 + }, + { + "epoch": 0.11733877681020942, + "grad_norm": 0.1740081012248993, + "learning_rate": 4.882668461855849e-06, + "loss": 1.042, + "step": 16210 + }, + { + "epoch": 0.1174111634707956, + "grad_norm": 0.20887626707553864, + "learning_rate": 4.882596075195263e-06, + "loss": 1.0573, + "step": 16220 + }, + { + "epoch": 0.11748355013138179, + "grad_norm": 0.1686887890100479, + "learning_rate": 4.8825236885346775e-06, + "loss": 1.0396, + "step": 16230 + }, + { + "epoch": 0.11755593679196798, + "grad_norm": 0.654279351234436, + "learning_rate": 4.882451301874091e-06, + "loss": 1.0511, + "step": 16240 + }, + { + "epoch": 0.11762832345255417, + "grad_norm": 0.18497861921787262, + "learning_rate": 4.882378915213505e-06, + "loss": 1.0568, + "step": 16250 + }, + { + "epoch": 0.11770071011314034, + "grad_norm": 0.1842462718486786, + "learning_rate": 4.882306528552918e-06, + "loss": 1.0534, + "step": 16260 + }, + { + "epoch": 0.11777309677372653, + "grad_norm": 0.16614331305027008, + "learning_rate": 4.882234141892333e-06, + "loss": 1.0546, + "step": 16270 + }, + { + "epoch": 0.11784548343431273, + "grad_norm": 0.17406275868415833, + "learning_rate": 4.882161755231746e-06, + "loss": 1.0451, + "step": 16280 + }, + { + "epoch": 0.11791787009489892, + "grad_norm": 0.18107841908931732, + "learning_rate": 4.88208936857116e-06, + "loss": 1.0556, + "step": 16290 + }, + { + "epoch": 0.11799025675548509, + "grad_norm": 0.17502503097057343, + "learning_rate": 4.882016981910574e-06, + "loss": 1.0464, + "step": 16300 + }, + { + "epoch": 0.11806264341607128, + "grad_norm": 0.18071506917476654, + "learning_rate": 4.881944595249988e-06, + "loss": 1.0354, + "step": 16310 + }, + { + "epoch": 0.11813503007665747, + "grad_norm": 0.16707313060760498, + "learning_rate": 4.881872208589402e-06, + "loss": 1.0548, + "step": 16320 + }, + { + "epoch": 0.11820741673724366, + "grad_norm": 0.18432946503162384, + "learning_rate": 4.881799821928815e-06, + "loss": 1.0413, + "step": 16330 + }, + { + "epoch": 0.11827980339782984, + "grad_norm": 0.19203749299049377, + "learning_rate": 4.881727435268229e-06, + "loss": 1.0507, + "step": 16340 + }, + { + "epoch": 0.11835219005841603, + "grad_norm": 0.20910878479480743, + "learning_rate": 4.881655048607643e-06, + "loss": 1.0511, + "step": 16350 + }, + { + "epoch": 0.11842457671900222, + "grad_norm": 0.16124652326107025, + "learning_rate": 4.881582661947057e-06, + "loss": 1.0418, + "step": 16360 + }, + { + "epoch": 0.11849696337958841, + "grad_norm": 0.1836594045162201, + "learning_rate": 4.881510275286471e-06, + "loss": 1.0411, + "step": 16370 + }, + { + "epoch": 0.11856935004017459, + "grad_norm": 0.1791921854019165, + "learning_rate": 4.881437888625884e-06, + "loss": 1.0399, + "step": 16380 + }, + { + "epoch": 0.11864173670076078, + "grad_norm": 0.1730756163597107, + "learning_rate": 4.881365501965299e-06, + "loss": 1.0393, + "step": 16390 + }, + { + "epoch": 0.11871412336134697, + "grad_norm": 0.17524994909763336, + "learning_rate": 4.881293115304712e-06, + "loss": 1.0573, + "step": 16400 + }, + { + "epoch": 0.11878651002193316, + "grad_norm": 0.17954038083553314, + "learning_rate": 4.881220728644126e-06, + "loss": 1.0378, + "step": 16410 + }, + { + "epoch": 0.11885889668251935, + "grad_norm": 0.17099116742610931, + "learning_rate": 4.88114834198354e-06, + "loss": 1.047, + "step": 16420 + }, + { + "epoch": 0.11893128334310553, + "grad_norm": 0.1616821140050888, + "learning_rate": 4.881075955322954e-06, + "loss": 1.0512, + "step": 16430 + }, + { + "epoch": 0.11900367000369172, + "grad_norm": 0.18617095053195953, + "learning_rate": 4.881003568662368e-06, + "loss": 1.0634, + "step": 16440 + }, + { + "epoch": 0.11907605666427791, + "grad_norm": 0.19976121187210083, + "learning_rate": 4.880931182001781e-06, + "loss": 1.0464, + "step": 16450 + }, + { + "epoch": 0.1191484433248641, + "grad_norm": 0.17326410114765167, + "learning_rate": 4.880858795341195e-06, + "loss": 1.0619, + "step": 16460 + }, + { + "epoch": 0.11922082998545028, + "grad_norm": 0.16668163239955902, + "learning_rate": 4.880786408680609e-06, + "loss": 1.03, + "step": 16470 + }, + { + "epoch": 0.11929321664603647, + "grad_norm": 0.16474226117134094, + "learning_rate": 4.880714022020023e-06, + "loss": 1.0532, + "step": 16480 + }, + { + "epoch": 0.11936560330662266, + "grad_norm": 0.18454322218894958, + "learning_rate": 4.880641635359437e-06, + "loss": 1.0407, + "step": 16490 + }, + { + "epoch": 0.11943798996720885, + "grad_norm": 0.18234814703464508, + "learning_rate": 4.88056924869885e-06, + "loss": 1.0338, + "step": 16500 + }, + { + "epoch": 0.11951037662779503, + "grad_norm": 0.2152404487133026, + "learning_rate": 4.880496862038264e-06, + "loss": 1.0384, + "step": 16510 + }, + { + "epoch": 0.11958276328838122, + "grad_norm": 0.1833188831806183, + "learning_rate": 4.880424475377678e-06, + "loss": 1.052, + "step": 16520 + }, + { + "epoch": 0.11965514994896741, + "grad_norm": 0.16804316639900208, + "learning_rate": 4.880352088717092e-06, + "loss": 1.0423, + "step": 16530 + }, + { + "epoch": 0.1197275366095536, + "grad_norm": 0.21003463864326477, + "learning_rate": 4.8802797020565055e-06, + "loss": 1.0444, + "step": 16540 + }, + { + "epoch": 0.11979992327013977, + "grad_norm": 0.17130064964294434, + "learning_rate": 4.880207315395919e-06, + "loss": 1.0572, + "step": 16550 + }, + { + "epoch": 0.11987230993072596, + "grad_norm": 0.17350070178508759, + "learning_rate": 4.880134928735333e-06, + "loss": 1.0547, + "step": 16560 + }, + { + "epoch": 0.11994469659131216, + "grad_norm": 0.17210082709789276, + "learning_rate": 4.880062542074746e-06, + "loss": 1.0394, + "step": 16570 + }, + { + "epoch": 0.12001708325189835, + "grad_norm": 0.17453570663928986, + "learning_rate": 4.879990155414161e-06, + "loss": 1.044, + "step": 16580 + }, + { + "epoch": 0.12008946991248452, + "grad_norm": 0.16361692547798157, + "learning_rate": 4.8799177687535744e-06, + "loss": 1.0393, + "step": 16590 + }, + { + "epoch": 0.12016185657307071, + "grad_norm": 0.17388883233070374, + "learning_rate": 4.879845382092988e-06, + "loss": 1.0354, + "step": 16600 + }, + { + "epoch": 0.1202342432336569, + "grad_norm": 0.18685205280780792, + "learning_rate": 4.879772995432402e-06, + "loss": 1.0414, + "step": 16610 + }, + { + "epoch": 0.1203066298942431, + "grad_norm": 0.20947107672691345, + "learning_rate": 4.879700608771816e-06, + "loss": 1.0482, + "step": 16620 + }, + { + "epoch": 0.12037901655482927, + "grad_norm": 0.16179795563220978, + "learning_rate": 4.87962822211123e-06, + "loss": 1.0336, + "step": 16630 + }, + { + "epoch": 0.12045140321541546, + "grad_norm": 0.18635103106498718, + "learning_rate": 4.879555835450643e-06, + "loss": 1.0425, + "step": 16640 + }, + { + "epoch": 0.12052378987600165, + "grad_norm": 0.17454616725444794, + "learning_rate": 4.879483448790057e-06, + "loss": 1.0383, + "step": 16650 + }, + { + "epoch": 0.12059617653658784, + "grad_norm": 0.17645971477031708, + "learning_rate": 4.879411062129471e-06, + "loss": 1.036, + "step": 16660 + }, + { + "epoch": 0.12066856319717402, + "grad_norm": 0.20391476154327393, + "learning_rate": 4.879338675468885e-06, + "loss": 1.0259, + "step": 16670 + }, + { + "epoch": 0.12074094985776021, + "grad_norm": 0.1724318414926529, + "learning_rate": 4.879266288808299e-06, + "loss": 1.0425, + "step": 16680 + }, + { + "epoch": 0.1208133365183464, + "grad_norm": 0.17643970251083374, + "learning_rate": 4.879193902147712e-06, + "loss": 1.0654, + "step": 16690 + }, + { + "epoch": 0.12088572317893259, + "grad_norm": 0.339347243309021, + "learning_rate": 4.879121515487126e-06, + "loss": 1.048, + "step": 16700 + }, + { + "epoch": 0.12095810983951877, + "grad_norm": 0.17284011840820312, + "learning_rate": 4.87904912882654e-06, + "loss": 1.0377, + "step": 16710 + }, + { + "epoch": 0.12103049650010496, + "grad_norm": 0.1662074625492096, + "learning_rate": 4.878976742165954e-06, + "loss": 1.0272, + "step": 16720 + }, + { + "epoch": 0.12110288316069115, + "grad_norm": 0.1813957393169403, + "learning_rate": 4.878904355505368e-06, + "loss": 1.0351, + "step": 16730 + }, + { + "epoch": 0.12117526982127734, + "grad_norm": 0.17036522924900055, + "learning_rate": 4.878831968844781e-06, + "loss": 1.0373, + "step": 16740 + }, + { + "epoch": 0.12124765648186352, + "grad_norm": 0.17929202318191528, + "learning_rate": 4.878759582184196e-06, + "loss": 1.0419, + "step": 16750 + }, + { + "epoch": 0.12132004314244971, + "grad_norm": 0.173272043466568, + "learning_rate": 4.878687195523609e-06, + "loss": 1.0535, + "step": 16760 + }, + { + "epoch": 0.1213924298030359, + "grad_norm": 0.18325883150100708, + "learning_rate": 4.878614808863023e-06, + "loss": 1.0403, + "step": 16770 + }, + { + "epoch": 0.12146481646362209, + "grad_norm": 0.18894407153129578, + "learning_rate": 4.8785424222024365e-06, + "loss": 1.0472, + "step": 16780 + }, + { + "epoch": 0.12153720312420827, + "grad_norm": 0.17379698157310486, + "learning_rate": 4.878470035541851e-06, + "loss": 1.0422, + "step": 16790 + }, + { + "epoch": 0.12160958978479446, + "grad_norm": 0.17601001262664795, + "learning_rate": 4.878397648881265e-06, + "loss": 1.0497, + "step": 16800 + }, + { + "epoch": 0.12168197644538065, + "grad_norm": 0.16135278344154358, + "learning_rate": 4.878325262220678e-06, + "loss": 1.0349, + "step": 16810 + }, + { + "epoch": 0.12175436310596684, + "grad_norm": 0.17922668159008026, + "learning_rate": 4.878252875560092e-06, + "loss": 1.0377, + "step": 16820 + }, + { + "epoch": 0.12182674976655301, + "grad_norm": 0.17484663426876068, + "learning_rate": 4.878180488899506e-06, + "loss": 1.0463, + "step": 16830 + }, + { + "epoch": 0.1218991364271392, + "grad_norm": 0.16210487484931946, + "learning_rate": 4.87810810223892e-06, + "loss": 1.0429, + "step": 16840 + }, + { + "epoch": 0.1219715230877254, + "grad_norm": 0.17364494502544403, + "learning_rate": 4.8780357155783335e-06, + "loss": 1.0426, + "step": 16850 + }, + { + "epoch": 0.12204390974831159, + "grad_norm": 0.1732948124408722, + "learning_rate": 4.877963328917747e-06, + "loss": 1.0417, + "step": 16860 + }, + { + "epoch": 0.12211629640889776, + "grad_norm": 0.18654009699821472, + "learning_rate": 4.877890942257162e-06, + "loss": 1.0351, + "step": 16870 + }, + { + "epoch": 0.12218868306948395, + "grad_norm": 0.17695607244968414, + "learning_rate": 4.877818555596575e-06, + "loss": 1.0412, + "step": 16880 + }, + { + "epoch": 0.12226106973007014, + "grad_norm": 0.173911452293396, + "learning_rate": 4.877746168935989e-06, + "loss": 1.0419, + "step": 16890 + }, + { + "epoch": 0.12233345639065633, + "grad_norm": 0.17585676908493042, + "learning_rate": 4.8776737822754025e-06, + "loss": 1.0414, + "step": 16900 + }, + { + "epoch": 0.12240584305124251, + "grad_norm": 0.179653599858284, + "learning_rate": 4.877601395614817e-06, + "loss": 1.0412, + "step": 16910 + }, + { + "epoch": 0.1224782297118287, + "grad_norm": 0.16631865501403809, + "learning_rate": 4.8775290089542306e-06, + "loss": 1.0408, + "step": 16920 + }, + { + "epoch": 0.12255061637241489, + "grad_norm": 0.17161540687084198, + "learning_rate": 4.877456622293644e-06, + "loss": 1.0323, + "step": 16930 + }, + { + "epoch": 0.12262300303300108, + "grad_norm": 0.1741219311952591, + "learning_rate": 4.877384235633058e-06, + "loss": 1.0435, + "step": 16940 + }, + { + "epoch": 0.12269538969358726, + "grad_norm": 0.2024671733379364, + "learning_rate": 4.877311848972472e-06, + "loss": 1.0446, + "step": 16950 + }, + { + "epoch": 0.12276777635417345, + "grad_norm": 0.1737942099571228, + "learning_rate": 4.877239462311886e-06, + "loss": 1.0464, + "step": 16960 + }, + { + "epoch": 0.12284016301475964, + "grad_norm": 0.21153798699378967, + "learning_rate": 4.8771670756512995e-06, + "loss": 1.0438, + "step": 16970 + }, + { + "epoch": 0.12291254967534583, + "grad_norm": 0.17885242402553558, + "learning_rate": 4.877094688990713e-06, + "loss": 1.0402, + "step": 16980 + }, + { + "epoch": 0.12298493633593202, + "grad_norm": 0.18069690465927124, + "learning_rate": 4.8770223023301276e-06, + "loss": 1.0519, + "step": 16990 + }, + { + "epoch": 0.1230573229965182, + "grad_norm": 0.17245696485042572, + "learning_rate": 4.876949915669541e-06, + "loss": 1.0395, + "step": 17000 + }, + { + "epoch": 0.12312970965710439, + "grad_norm": 0.16451863944530487, + "learning_rate": 4.876877529008955e-06, + "loss": 1.0378, + "step": 17010 + }, + { + "epoch": 0.12320209631769058, + "grad_norm": 0.18624931573867798, + "learning_rate": 4.876805142348368e-06, + "loss": 1.0513, + "step": 17020 + }, + { + "epoch": 0.12327448297827677, + "grad_norm": 0.16225986182689667, + "learning_rate": 4.876732755687783e-06, + "loss": 1.0374, + "step": 17030 + }, + { + "epoch": 0.12334686963886295, + "grad_norm": 0.19967621564865112, + "learning_rate": 4.8766603690271965e-06, + "loss": 1.0455, + "step": 17040 + }, + { + "epoch": 0.12341925629944914, + "grad_norm": 0.18386250734329224, + "learning_rate": 4.87658798236661e-06, + "loss": 1.0349, + "step": 17050 + }, + { + "epoch": 0.12349164296003533, + "grad_norm": 0.17461447417736053, + "learning_rate": 4.876515595706024e-06, + "loss": 1.0374, + "step": 17060 + }, + { + "epoch": 0.12356402962062152, + "grad_norm": 0.16911447048187256, + "learning_rate": 4.876443209045438e-06, + "loss": 1.0312, + "step": 17070 + }, + { + "epoch": 0.1236364162812077, + "grad_norm": 0.17958052456378937, + "learning_rate": 4.876370822384851e-06, + "loss": 1.038, + "step": 17080 + }, + { + "epoch": 0.12370880294179389, + "grad_norm": 0.16912564635276794, + "learning_rate": 4.8762984357242646e-06, + "loss": 1.0268, + "step": 17090 + }, + { + "epoch": 0.12378118960238008, + "grad_norm": 0.18362846970558167, + "learning_rate": 4.876226049063679e-06, + "loss": 1.0457, + "step": 17100 + }, + { + "epoch": 0.12385357626296627, + "grad_norm": 0.18593281507492065, + "learning_rate": 4.876153662403093e-06, + "loss": 1.0296, + "step": 17110 + }, + { + "epoch": 0.12392596292355244, + "grad_norm": 0.1746986359357834, + "learning_rate": 4.876081275742506e-06, + "loss": 1.0429, + "step": 17120 + }, + { + "epoch": 0.12399834958413863, + "grad_norm": 0.16610664129257202, + "learning_rate": 4.87600888908192e-06, + "loss": 1.0432, + "step": 17130 + }, + { + "epoch": 0.12407073624472482, + "grad_norm": 0.1753586083650589, + "learning_rate": 4.875936502421334e-06, + "loss": 1.0408, + "step": 17140 + }, + { + "epoch": 0.12414312290531102, + "grad_norm": 0.1812318116426468, + "learning_rate": 4.875864115760748e-06, + "loss": 1.0482, + "step": 17150 + }, + { + "epoch": 0.12421550956589719, + "grad_norm": 0.18393771350383759, + "learning_rate": 4.875791729100162e-06, + "loss": 1.0439, + "step": 17160 + }, + { + "epoch": 0.12428789622648338, + "grad_norm": 0.2628847062587738, + "learning_rate": 4.875719342439575e-06, + "loss": 1.0498, + "step": 17170 + }, + { + "epoch": 0.12436028288706957, + "grad_norm": 0.18083707988262177, + "learning_rate": 4.87564695577899e-06, + "loss": 1.0428, + "step": 17180 + }, + { + "epoch": 0.12443266954765576, + "grad_norm": 0.19616307318210602, + "learning_rate": 4.875574569118403e-06, + "loss": 1.0391, + "step": 17190 + }, + { + "epoch": 0.12450505620824194, + "grad_norm": 0.16640251874923706, + "learning_rate": 4.875502182457817e-06, + "loss": 1.0412, + "step": 17200 + }, + { + "epoch": 0.12457744286882813, + "grad_norm": 0.2135634571313858, + "learning_rate": 4.8754297957972305e-06, + "loss": 1.0415, + "step": 17210 + }, + { + "epoch": 0.12464982952941432, + "grad_norm": 0.17053596675395966, + "learning_rate": 4.875357409136645e-06, + "loss": 1.0467, + "step": 17220 + }, + { + "epoch": 0.12472221619000051, + "grad_norm": 0.16077913343906403, + "learning_rate": 4.875285022476059e-06, + "loss": 1.0309, + "step": 17230 + }, + { + "epoch": 0.12479460285058669, + "grad_norm": 0.17686612904071808, + "learning_rate": 4.875212635815472e-06, + "loss": 1.0304, + "step": 17240 + }, + { + "epoch": 0.12486698951117288, + "grad_norm": 0.1883876919746399, + "learning_rate": 4.875140249154886e-06, + "loss": 1.0413, + "step": 17250 + }, + { + "epoch": 0.12493937617175907, + "grad_norm": 0.20872871577739716, + "learning_rate": 4.8750678624943e-06, + "loss": 1.0434, + "step": 17260 + }, + { + "epoch": 0.12501176283234525, + "grad_norm": 0.17581568658351898, + "learning_rate": 4.874995475833714e-06, + "loss": 1.0488, + "step": 17270 + }, + { + "epoch": 0.12508414949293145, + "grad_norm": 0.17273147404193878, + "learning_rate": 4.8749230891731275e-06, + "loss": 1.0498, + "step": 17280 + }, + { + "epoch": 0.12515653615351763, + "grad_norm": 0.21244202554225922, + "learning_rate": 4.874850702512541e-06, + "loss": 1.0332, + "step": 17290 + }, + { + "epoch": 0.1252289228141038, + "grad_norm": 0.17866666615009308, + "learning_rate": 4.874778315851955e-06, + "loss": 1.0346, + "step": 17300 + }, + { + "epoch": 0.12530130947469, + "grad_norm": 0.1749383807182312, + "learning_rate": 4.874705929191369e-06, + "loss": 1.0452, + "step": 17310 + }, + { + "epoch": 0.12537369613527619, + "grad_norm": 0.175296813249588, + "learning_rate": 4.874633542530783e-06, + "loss": 1.0376, + "step": 17320 + }, + { + "epoch": 0.1254460827958624, + "grad_norm": 0.18073537945747375, + "learning_rate": 4.8745611558701964e-06, + "loss": 1.0316, + "step": 17330 + }, + { + "epoch": 0.12551846945644857, + "grad_norm": 0.18828389048576355, + "learning_rate": 4.87448876920961e-06, + "loss": 1.0376, + "step": 17340 + }, + { + "epoch": 0.12559085611703474, + "grad_norm": 0.25895076990127563, + "learning_rate": 4.8744163825490245e-06, + "loss": 1.0266, + "step": 17350 + }, + { + "epoch": 0.12566324277762095, + "grad_norm": 0.200386181473732, + "learning_rate": 4.874343995888438e-06, + "loss": 1.0438, + "step": 17360 + }, + { + "epoch": 0.12573562943820712, + "grad_norm": 0.1703101396560669, + "learning_rate": 4.874271609227852e-06, + "loss": 1.0397, + "step": 17370 + }, + { + "epoch": 0.1258080160987933, + "grad_norm": 0.18138134479522705, + "learning_rate": 4.874199222567265e-06, + "loss": 1.0447, + "step": 17380 + }, + { + "epoch": 0.1258804027593795, + "grad_norm": 0.16725878417491913, + "learning_rate": 4.87412683590668e-06, + "loss": 1.0437, + "step": 17390 + }, + { + "epoch": 0.12595278941996568, + "grad_norm": 0.19038145244121552, + "learning_rate": 4.8740544492460934e-06, + "loss": 1.0377, + "step": 17400 + }, + { + "epoch": 0.1260251760805519, + "grad_norm": 0.20098719000816345, + "learning_rate": 4.873982062585507e-06, + "loss": 1.0406, + "step": 17410 + }, + { + "epoch": 0.12609756274113806, + "grad_norm": 0.17163150012493134, + "learning_rate": 4.873909675924921e-06, + "loss": 1.0316, + "step": 17420 + }, + { + "epoch": 0.12616994940172424, + "grad_norm": 0.17717736959457397, + "learning_rate": 4.873837289264335e-06, + "loss": 1.0378, + "step": 17430 + }, + { + "epoch": 0.12624233606231045, + "grad_norm": 0.1882064789533615, + "learning_rate": 4.873764902603749e-06, + "loss": 1.0284, + "step": 17440 + }, + { + "epoch": 0.12631472272289662, + "grad_norm": 0.1787366420030594, + "learning_rate": 4.873692515943162e-06, + "loss": 1.0418, + "step": 17450 + }, + { + "epoch": 0.1263871093834828, + "grad_norm": 0.17708836495876312, + "learning_rate": 4.873620129282576e-06, + "loss": 1.047, + "step": 17460 + }, + { + "epoch": 0.126459496044069, + "grad_norm": 0.1702563464641571, + "learning_rate": 4.8735477426219905e-06, + "loss": 1.038, + "step": 17470 + }, + { + "epoch": 0.12653188270465518, + "grad_norm": 0.1806415468454361, + "learning_rate": 4.873475355961404e-06, + "loss": 1.0358, + "step": 17480 + }, + { + "epoch": 0.12660426936524138, + "grad_norm": 0.24682064354419708, + "learning_rate": 4.873402969300818e-06, + "loss": 1.0479, + "step": 17490 + }, + { + "epoch": 0.12667665602582756, + "grad_norm": 0.18831071257591248, + "learning_rate": 4.873330582640231e-06, + "loss": 1.0333, + "step": 17500 + }, + { + "epoch": 0.12674904268641374, + "grad_norm": 0.17342287302017212, + "learning_rate": 4.873258195979646e-06, + "loss": 1.0253, + "step": 17510 + }, + { + "epoch": 0.12682142934699994, + "grad_norm": 0.17534632980823517, + "learning_rate": 4.873185809319059e-06, + "loss": 1.0478, + "step": 17520 + }, + { + "epoch": 0.12689381600758612, + "grad_norm": 0.16948920488357544, + "learning_rate": 4.873113422658473e-06, + "loss": 1.0396, + "step": 17530 + }, + { + "epoch": 0.1269662026681723, + "grad_norm": 0.19890448451042175, + "learning_rate": 4.873041035997887e-06, + "loss": 1.035, + "step": 17540 + }, + { + "epoch": 0.1270385893287585, + "grad_norm": 0.17209777235984802, + "learning_rate": 4.872968649337301e-06, + "loss": 1.031, + "step": 17550 + }, + { + "epoch": 0.12711097598934468, + "grad_norm": 0.1808318793773651, + "learning_rate": 4.872896262676715e-06, + "loss": 1.0491, + "step": 17560 + }, + { + "epoch": 0.12718336264993088, + "grad_norm": 0.24130041897296906, + "learning_rate": 4.872823876016128e-06, + "loss": 1.0217, + "step": 17570 + }, + { + "epoch": 0.12725574931051706, + "grad_norm": 0.22134606540203094, + "learning_rate": 4.872751489355542e-06, + "loss": 1.0452, + "step": 17580 + }, + { + "epoch": 0.12732813597110323, + "grad_norm": 0.18467597663402557, + "learning_rate": 4.872679102694956e-06, + "loss": 1.0515, + "step": 17590 + }, + { + "epoch": 0.12740052263168944, + "grad_norm": 0.1730322241783142, + "learning_rate": 4.87260671603437e-06, + "loss": 1.0315, + "step": 17600 + }, + { + "epoch": 0.12747290929227562, + "grad_norm": 0.1717580109834671, + "learning_rate": 4.872534329373783e-06, + "loss": 1.0351, + "step": 17610 + }, + { + "epoch": 0.12754529595286182, + "grad_norm": 0.17170915007591248, + "learning_rate": 4.872461942713197e-06, + "loss": 1.033, + "step": 17620 + }, + { + "epoch": 0.127617682613448, + "grad_norm": 0.17856451869010925, + "learning_rate": 4.872389556052611e-06, + "loss": 1.0123, + "step": 17630 + }, + { + "epoch": 0.12769006927403417, + "grad_norm": 0.19595351815223694, + "learning_rate": 4.8723171693920245e-06, + "loss": 1.0247, + "step": 17640 + }, + { + "epoch": 0.12776245593462038, + "grad_norm": 0.16732360422611237, + "learning_rate": 4.872244782731438e-06, + "loss": 1.0421, + "step": 17650 + }, + { + "epoch": 0.12783484259520655, + "grad_norm": 0.18419161438941956, + "learning_rate": 4.8721723960708526e-06, + "loss": 1.0563, + "step": 17660 + }, + { + "epoch": 0.12790722925579273, + "grad_norm": 0.20935377478599548, + "learning_rate": 4.872100009410266e-06, + "loss": 1.0111, + "step": 17670 + }, + { + "epoch": 0.12797961591637894, + "grad_norm": 0.1993681639432907, + "learning_rate": 4.87202762274968e-06, + "loss": 1.0407, + "step": 17680 + }, + { + "epoch": 0.1280520025769651, + "grad_norm": 0.16351039707660675, + "learning_rate": 4.871955236089093e-06, + "loss": 1.0276, + "step": 17690 + }, + { + "epoch": 0.12812438923755132, + "grad_norm": 0.17967389523983002, + "learning_rate": 4.871882849428508e-06, + "loss": 1.0442, + "step": 17700 + }, + { + "epoch": 0.1281967758981375, + "grad_norm": 0.1645398736000061, + "learning_rate": 4.8718104627679215e-06, + "loss": 1.0285, + "step": 17710 + }, + { + "epoch": 0.12826916255872367, + "grad_norm": 0.16454453766345978, + "learning_rate": 4.871738076107335e-06, + "loss": 1.0418, + "step": 17720 + }, + { + "epoch": 0.12834154921930988, + "grad_norm": 0.16446729004383087, + "learning_rate": 4.871665689446749e-06, + "loss": 1.0381, + "step": 17730 + }, + { + "epoch": 0.12841393587989605, + "grad_norm": 0.16874778270721436, + "learning_rate": 4.871593302786163e-06, + "loss": 1.0239, + "step": 17740 + }, + { + "epoch": 0.12848632254048223, + "grad_norm": 0.17436982691287994, + "learning_rate": 4.871520916125577e-06, + "loss": 1.0345, + "step": 17750 + }, + { + "epoch": 0.12855870920106843, + "grad_norm": 0.18663202226161957, + "learning_rate": 4.87144852946499e-06, + "loss": 1.0282, + "step": 17760 + }, + { + "epoch": 0.1286310958616546, + "grad_norm": 0.1899365931749344, + "learning_rate": 4.871376142804404e-06, + "loss": 1.0335, + "step": 17770 + }, + { + "epoch": 0.12870348252224081, + "grad_norm": 0.17862582206726074, + "learning_rate": 4.8713037561438185e-06, + "loss": 1.0453, + "step": 17780 + }, + { + "epoch": 0.128775869182827, + "grad_norm": 0.1662217378616333, + "learning_rate": 4.871231369483232e-06, + "loss": 1.0389, + "step": 17790 + }, + { + "epoch": 0.12884825584341317, + "grad_norm": 0.1717958003282547, + "learning_rate": 4.871158982822646e-06, + "loss": 1.0385, + "step": 17800 + }, + { + "epoch": 0.12892064250399937, + "grad_norm": 0.17093853652477264, + "learning_rate": 4.871086596162059e-06, + "loss": 1.0347, + "step": 17810 + }, + { + "epoch": 0.12899302916458555, + "grad_norm": 0.212626650929451, + "learning_rate": 4.871014209501474e-06, + "loss": 1.0299, + "step": 17820 + }, + { + "epoch": 0.12906541582517173, + "grad_norm": 0.16492265462875366, + "learning_rate": 4.870941822840887e-06, + "loss": 1.0366, + "step": 17830 + }, + { + "epoch": 0.12913780248575793, + "grad_norm": 0.17654773592948914, + "learning_rate": 4.870869436180301e-06, + "loss": 1.0359, + "step": 17840 + }, + { + "epoch": 0.1292101891463441, + "grad_norm": 0.17455124855041504, + "learning_rate": 4.870797049519715e-06, + "loss": 1.0263, + "step": 17850 + }, + { + "epoch": 0.1292825758069303, + "grad_norm": 0.16440659761428833, + "learning_rate": 4.870724662859129e-06, + "loss": 1.0193, + "step": 17860 + }, + { + "epoch": 0.1293549624675165, + "grad_norm": 0.1920333057641983, + "learning_rate": 4.870652276198543e-06, + "loss": 1.0347, + "step": 17870 + }, + { + "epoch": 0.12942734912810266, + "grad_norm": 0.18916349112987518, + "learning_rate": 4.870579889537956e-06, + "loss": 1.036, + "step": 17880 + }, + { + "epoch": 0.12949973578868887, + "grad_norm": 0.20579378306865692, + "learning_rate": 4.87050750287737e-06, + "loss": 1.0291, + "step": 17890 + }, + { + "epoch": 0.12957212244927505, + "grad_norm": 0.16314570605754852, + "learning_rate": 4.870435116216784e-06, + "loss": 1.0316, + "step": 17900 + }, + { + "epoch": 0.12964450910986122, + "grad_norm": 0.18022793531417847, + "learning_rate": 4.870362729556198e-06, + "loss": 1.0444, + "step": 17910 + }, + { + "epoch": 0.12971689577044743, + "grad_norm": 0.1798829734325409, + "learning_rate": 4.870290342895612e-06, + "loss": 1.0351, + "step": 17920 + }, + { + "epoch": 0.1297892824310336, + "grad_norm": 0.1796688586473465, + "learning_rate": 4.870217956235025e-06, + "loss": 1.0431, + "step": 17930 + }, + { + "epoch": 0.1298616690916198, + "grad_norm": 0.1828147917985916, + "learning_rate": 4.870145569574439e-06, + "loss": 1.0343, + "step": 17940 + }, + { + "epoch": 0.12993405575220598, + "grad_norm": 0.21611171960830688, + "learning_rate": 4.870073182913853e-06, + "loss": 1.0343, + "step": 17950 + }, + { + "epoch": 0.13000644241279216, + "grad_norm": 0.17004896700382233, + "learning_rate": 4.870000796253267e-06, + "loss": 1.0468, + "step": 17960 + }, + { + "epoch": 0.13007882907337837, + "grad_norm": 0.18028271198272705, + "learning_rate": 4.869928409592681e-06, + "loss": 1.0504, + "step": 17970 + }, + { + "epoch": 0.13015121573396454, + "grad_norm": 0.18797443807125092, + "learning_rate": 4.869856022932094e-06, + "loss": 1.0274, + "step": 17980 + }, + { + "epoch": 0.13022360239455072, + "grad_norm": 0.18303246796131134, + "learning_rate": 4.869783636271509e-06, + "loss": 1.0087, + "step": 17990 + }, + { + "epoch": 0.13029598905513692, + "grad_norm": 0.18918801844120026, + "learning_rate": 4.869711249610922e-06, + "loss": 1.0417, + "step": 18000 + }, + { + "epoch": 0.1303683757157231, + "grad_norm": 0.16673238575458527, + "learning_rate": 4.869638862950336e-06, + "loss": 1.0255, + "step": 18010 + }, + { + "epoch": 0.1304407623763093, + "grad_norm": 0.16749219596385956, + "learning_rate": 4.8695664762897495e-06, + "loss": 1.0267, + "step": 18020 + }, + { + "epoch": 0.13051314903689548, + "grad_norm": 0.1816338449716568, + "learning_rate": 4.869494089629164e-06, + "loss": 1.0432, + "step": 18030 + }, + { + "epoch": 0.13058553569748166, + "grad_norm": 0.1622341275215149, + "learning_rate": 4.869421702968578e-06, + "loss": 1.023, + "step": 18040 + }, + { + "epoch": 0.13065792235806786, + "grad_norm": 0.1742738038301468, + "learning_rate": 4.869349316307991e-06, + "loss": 1.0301, + "step": 18050 + }, + { + "epoch": 0.13073030901865404, + "grad_norm": 0.16464455425739288, + "learning_rate": 4.869276929647405e-06, + "loss": 1.0151, + "step": 18060 + }, + { + "epoch": 0.13080269567924022, + "grad_norm": 0.16827940940856934, + "learning_rate": 4.869204542986819e-06, + "loss": 1.0237, + "step": 18070 + }, + { + "epoch": 0.13087508233982642, + "grad_norm": 0.17623087763786316, + "learning_rate": 4.869132156326233e-06, + "loss": 1.0521, + "step": 18080 + }, + { + "epoch": 0.1309474690004126, + "grad_norm": 0.1728028506040573, + "learning_rate": 4.8690597696656465e-06, + "loss": 1.0373, + "step": 18090 + }, + { + "epoch": 0.1310198556609988, + "grad_norm": 0.17844252288341522, + "learning_rate": 4.86898738300506e-06, + "loss": 1.0461, + "step": 18100 + }, + { + "epoch": 0.13109224232158498, + "grad_norm": 0.16047002375125885, + "learning_rate": 4.868914996344475e-06, + "loss": 1.0312, + "step": 18110 + }, + { + "epoch": 0.13116462898217116, + "grad_norm": 0.18902169167995453, + "learning_rate": 4.868842609683888e-06, + "loss": 1.0386, + "step": 18120 + }, + { + "epoch": 0.13123701564275736, + "grad_norm": 0.18937572836875916, + "learning_rate": 4.868770223023302e-06, + "loss": 1.0421, + "step": 18130 + }, + { + "epoch": 0.13130940230334354, + "grad_norm": 0.17996446788311005, + "learning_rate": 4.8686978363627154e-06, + "loss": 1.0457, + "step": 18140 + }, + { + "epoch": 0.1313817889639297, + "grad_norm": 0.1748812198638916, + "learning_rate": 4.868625449702129e-06, + "loss": 1.0539, + "step": 18150 + }, + { + "epoch": 0.13145417562451592, + "grad_norm": 0.17151296138763428, + "learning_rate": 4.868553063041543e-06, + "loss": 1.0268, + "step": 18160 + }, + { + "epoch": 0.1315265622851021, + "grad_norm": 0.17143696546554565, + "learning_rate": 4.868480676380956e-06, + "loss": 1.0318, + "step": 18170 + }, + { + "epoch": 0.1315989489456883, + "grad_norm": 0.167258158326149, + "learning_rate": 4.868408289720371e-06, + "loss": 1.0264, + "step": 18180 + }, + { + "epoch": 0.13167133560627448, + "grad_norm": 0.17534078657627106, + "learning_rate": 4.868335903059784e-06, + "loss": 1.0408, + "step": 18190 + }, + { + "epoch": 0.13174372226686065, + "grad_norm": 0.17414483428001404, + "learning_rate": 4.868263516399198e-06, + "loss": 1.0265, + "step": 18200 + }, + { + "epoch": 0.13181610892744686, + "grad_norm": 0.1946071982383728, + "learning_rate": 4.868191129738612e-06, + "loss": 1.0186, + "step": 18210 + }, + { + "epoch": 0.13188849558803303, + "grad_norm": 0.1678534746170044, + "learning_rate": 4.868118743078026e-06, + "loss": 1.0336, + "step": 18220 + }, + { + "epoch": 0.13196088224861924, + "grad_norm": 0.1735198199748993, + "learning_rate": 4.86804635641744e-06, + "loss": 1.0279, + "step": 18230 + }, + { + "epoch": 0.13203326890920541, + "grad_norm": 0.19873082637786865, + "learning_rate": 4.867973969756853e-06, + "loss": 1.0294, + "step": 18240 + }, + { + "epoch": 0.1321056555697916, + "grad_norm": 0.17502813041210175, + "learning_rate": 4.867901583096267e-06, + "loss": 1.0235, + "step": 18250 + }, + { + "epoch": 0.1321780422303778, + "grad_norm": 0.18205420672893524, + "learning_rate": 4.867829196435681e-06, + "loss": 1.0364, + "step": 18260 + }, + { + "epoch": 0.13225042889096397, + "grad_norm": 0.16406495869159698, + "learning_rate": 4.867756809775095e-06, + "loss": 1.0416, + "step": 18270 + }, + { + "epoch": 0.13232281555155015, + "grad_norm": 0.2004392445087433, + "learning_rate": 4.867684423114509e-06, + "loss": 1.0369, + "step": 18280 + }, + { + "epoch": 0.13239520221213635, + "grad_norm": 0.16325534880161285, + "learning_rate": 4.867612036453922e-06, + "loss": 1.0315, + "step": 18290 + }, + { + "epoch": 0.13246758887272253, + "grad_norm": 0.17578786611557007, + "learning_rate": 4.867539649793337e-06, + "loss": 1.0262, + "step": 18300 + }, + { + "epoch": 0.13253997553330873, + "grad_norm": 0.18025441467761993, + "learning_rate": 4.86746726313275e-06, + "loss": 1.0369, + "step": 18310 + }, + { + "epoch": 0.1326123621938949, + "grad_norm": 0.1580292284488678, + "learning_rate": 4.867394876472164e-06, + "loss": 1.023, + "step": 18320 + }, + { + "epoch": 0.1326847488544811, + "grad_norm": 0.16052091121673584, + "learning_rate": 4.8673224898115775e-06, + "loss": 1.0332, + "step": 18330 + }, + { + "epoch": 0.1327571355150673, + "grad_norm": 0.17163227498531342, + "learning_rate": 4.867250103150992e-06, + "loss": 1.0384, + "step": 18340 + }, + { + "epoch": 0.13282952217565347, + "grad_norm": 0.17826439440250397, + "learning_rate": 4.867177716490406e-06, + "loss": 1.0355, + "step": 18350 + }, + { + "epoch": 0.13290190883623965, + "grad_norm": 0.16754212975502014, + "learning_rate": 4.867105329829819e-06, + "loss": 1.044, + "step": 18360 + }, + { + "epoch": 0.13297429549682585, + "grad_norm": 0.17139877378940582, + "learning_rate": 4.867032943169233e-06, + "loss": 1.0249, + "step": 18370 + }, + { + "epoch": 0.13304668215741203, + "grad_norm": 0.1751633733510971, + "learning_rate": 4.866960556508647e-06, + "loss": 1.0354, + "step": 18380 + }, + { + "epoch": 0.13311906881799823, + "grad_norm": 0.1777397096157074, + "learning_rate": 4.866888169848061e-06, + "loss": 1.0224, + "step": 18390 + }, + { + "epoch": 0.1331914554785844, + "grad_norm": 0.1866157501935959, + "learning_rate": 4.8668157831874746e-06, + "loss": 1.0231, + "step": 18400 + }, + { + "epoch": 0.13326384213917059, + "grad_norm": 0.2078569382429123, + "learning_rate": 4.866743396526888e-06, + "loss": 1.0317, + "step": 18410 + }, + { + "epoch": 0.1333362287997568, + "grad_norm": 0.1714392751455307, + "learning_rate": 4.866671009866303e-06, + "loss": 1.0303, + "step": 18420 + }, + { + "epoch": 0.13340861546034297, + "grad_norm": 0.1851416379213333, + "learning_rate": 4.866598623205716e-06, + "loss": 1.0321, + "step": 18430 + }, + { + "epoch": 0.13348100212092914, + "grad_norm": 0.17238068580627441, + "learning_rate": 4.86652623654513e-06, + "loss": 1.0313, + "step": 18440 + }, + { + "epoch": 0.13355338878151535, + "grad_norm": 0.18135380744934082, + "learning_rate": 4.8664538498845435e-06, + "loss": 1.0359, + "step": 18450 + }, + { + "epoch": 0.13362577544210152, + "grad_norm": 0.1726706624031067, + "learning_rate": 4.866381463223958e-06, + "loss": 1.0164, + "step": 18460 + }, + { + "epoch": 0.13369816210268773, + "grad_norm": 0.1773628294467926, + "learning_rate": 4.8663090765633716e-06, + "loss": 1.0397, + "step": 18470 + }, + { + "epoch": 0.1337705487632739, + "grad_norm": 0.17249688506126404, + "learning_rate": 4.866236689902785e-06, + "loss": 1.0314, + "step": 18480 + }, + { + "epoch": 0.13384293542386008, + "grad_norm": 0.17601031064987183, + "learning_rate": 4.866164303242199e-06, + "loss": 1.0412, + "step": 18490 + }, + { + "epoch": 0.1339153220844463, + "grad_norm": 0.17362290620803833, + "learning_rate": 4.866091916581613e-06, + "loss": 1.0335, + "step": 18500 + }, + { + "epoch": 0.13398770874503246, + "grad_norm": 0.17129851877689362, + "learning_rate": 4.866019529921027e-06, + "loss": 1.0282, + "step": 18510 + }, + { + "epoch": 0.13406009540561864, + "grad_norm": 0.18305164575576782, + "learning_rate": 4.8659471432604405e-06, + "loss": 1.0276, + "step": 18520 + }, + { + "epoch": 0.13413248206620484, + "grad_norm": 0.16306072473526, + "learning_rate": 4.865874756599854e-06, + "loss": 1.026, + "step": 18530 + }, + { + "epoch": 0.13420486872679102, + "grad_norm": 0.17902541160583496, + "learning_rate": 4.865802369939268e-06, + "loss": 1.0372, + "step": 18540 + }, + { + "epoch": 0.13427725538737723, + "grad_norm": 0.17102956771850586, + "learning_rate": 4.865729983278682e-06, + "loss": 1.0272, + "step": 18550 + }, + { + "epoch": 0.1343496420479634, + "grad_norm": 0.1690719872713089, + "learning_rate": 4.865657596618096e-06, + "loss": 1.0411, + "step": 18560 + }, + { + "epoch": 0.13442202870854958, + "grad_norm": 0.18555520474910736, + "learning_rate": 4.865585209957509e-06, + "loss": 1.0406, + "step": 18570 + }, + { + "epoch": 0.13449441536913578, + "grad_norm": 0.17667360603809357, + "learning_rate": 4.865512823296923e-06, + "loss": 1.0216, + "step": 18580 + }, + { + "epoch": 0.13456680202972196, + "grad_norm": 0.15590178966522217, + "learning_rate": 4.8654404366363375e-06, + "loss": 1.025, + "step": 18590 + }, + { + "epoch": 0.13463918869030814, + "grad_norm": 0.17240816354751587, + "learning_rate": 4.865368049975751e-06, + "loss": 1.0211, + "step": 18600 + }, + { + "epoch": 0.13471157535089434, + "grad_norm": 0.16866736114025116, + "learning_rate": 4.865295663315165e-06, + "loss": 1.0422, + "step": 18610 + }, + { + "epoch": 0.13478396201148052, + "grad_norm": 0.17667965590953827, + "learning_rate": 4.865223276654578e-06, + "loss": 1.0312, + "step": 18620 + }, + { + "epoch": 0.13485634867206672, + "grad_norm": 0.176561638712883, + "learning_rate": 4.865150889993993e-06, + "loss": 1.0363, + "step": 18630 + }, + { + "epoch": 0.1349287353326529, + "grad_norm": 0.18116410076618195, + "learning_rate": 4.8650785033334064e-06, + "loss": 1.0333, + "step": 18640 + }, + { + "epoch": 0.13500112199323908, + "grad_norm": 0.17595870792865753, + "learning_rate": 4.86500611667282e-06, + "loss": 1.0195, + "step": 18650 + }, + { + "epoch": 0.13507350865382528, + "grad_norm": 0.17091989517211914, + "learning_rate": 4.864933730012234e-06, + "loss": 1.0266, + "step": 18660 + }, + { + "epoch": 0.13514589531441146, + "grad_norm": 0.18030595779418945, + "learning_rate": 4.864861343351647e-06, + "loss": 1.0377, + "step": 18670 + }, + { + "epoch": 0.13521828197499763, + "grad_norm": 0.1966775506734848, + "learning_rate": 4.864788956691061e-06, + "loss": 1.0233, + "step": 18680 + }, + { + "epoch": 0.13529066863558384, + "grad_norm": 0.1909208595752716, + "learning_rate": 4.8647165700304745e-06, + "loss": 1.0294, + "step": 18690 + }, + { + "epoch": 0.13536305529617002, + "grad_norm": 0.17203876376152039, + "learning_rate": 4.864644183369889e-06, + "loss": 1.0422, + "step": 18700 + }, + { + "epoch": 0.13543544195675622, + "grad_norm": 0.164755716919899, + "learning_rate": 4.864571796709303e-06, + "loss": 1.0307, + "step": 18710 + }, + { + "epoch": 0.1355078286173424, + "grad_norm": 0.17235048115253448, + "learning_rate": 4.864499410048716e-06, + "loss": 1.025, + "step": 18720 + }, + { + "epoch": 0.13558021527792857, + "grad_norm": 0.17906124889850616, + "learning_rate": 4.86442702338813e-06, + "loss": 1.0251, + "step": 18730 + }, + { + "epoch": 0.13565260193851478, + "grad_norm": 0.1699845939874649, + "learning_rate": 4.864354636727544e-06, + "loss": 1.0513, + "step": 18740 + }, + { + "epoch": 0.13572498859910095, + "grad_norm": 0.2316533476114273, + "learning_rate": 4.864282250066958e-06, + "loss": 1.0299, + "step": 18750 + }, + { + "epoch": 0.13579737525968716, + "grad_norm": 0.1772417426109314, + "learning_rate": 4.8642098634063715e-06, + "loss": 1.014, + "step": 18760 + }, + { + "epoch": 0.13586976192027334, + "grad_norm": 0.17594146728515625, + "learning_rate": 4.864137476745785e-06, + "loss": 1.0376, + "step": 18770 + }, + { + "epoch": 0.1359421485808595, + "grad_norm": 0.1800992637872696, + "learning_rate": 4.8640650900852e-06, + "loss": 1.0218, + "step": 18780 + }, + { + "epoch": 0.13601453524144572, + "grad_norm": 0.2082536369562149, + "learning_rate": 4.863992703424613e-06, + "loss": 1.029, + "step": 18790 + }, + { + "epoch": 0.1360869219020319, + "grad_norm": 0.17295007407665253, + "learning_rate": 4.863920316764027e-06, + "loss": 1.0321, + "step": 18800 + }, + { + "epoch": 0.13615930856261807, + "grad_norm": 0.2445904165506363, + "learning_rate": 4.8638479301034404e-06, + "loss": 1.034, + "step": 18810 + }, + { + "epoch": 0.13623169522320427, + "grad_norm": 0.16870233416557312, + "learning_rate": 4.863775543442855e-06, + "loss": 1.0237, + "step": 18820 + }, + { + "epoch": 0.13630408188379045, + "grad_norm": 0.1675586849451065, + "learning_rate": 4.8637031567822685e-06, + "loss": 1.0238, + "step": 18830 + }, + { + "epoch": 0.13637646854437666, + "grad_norm": 0.1609523743391037, + "learning_rate": 4.863630770121682e-06, + "loss": 1.0141, + "step": 18840 + }, + { + "epoch": 0.13644885520496283, + "grad_norm": 0.1770259588956833, + "learning_rate": 4.863558383461096e-06, + "loss": 1.0507, + "step": 18850 + }, + { + "epoch": 0.136521241865549, + "grad_norm": 0.17467375099658966, + "learning_rate": 4.86348599680051e-06, + "loss": 1.0226, + "step": 18860 + }, + { + "epoch": 0.1365936285261352, + "grad_norm": 0.17384403944015503, + "learning_rate": 4.863413610139924e-06, + "loss": 1.0342, + "step": 18870 + }, + { + "epoch": 0.1366660151867214, + "grad_norm": 0.18507815897464752, + "learning_rate": 4.8633412234793374e-06, + "loss": 1.0338, + "step": 18880 + }, + { + "epoch": 0.13673840184730757, + "grad_norm": 0.173783078789711, + "learning_rate": 4.863268836818751e-06, + "loss": 1.0354, + "step": 18890 + }, + { + "epoch": 0.13681078850789377, + "grad_norm": 0.1754179745912552, + "learning_rate": 4.8631964501581655e-06, + "loss": 1.031, + "step": 18900 + }, + { + "epoch": 0.13688317516847995, + "grad_norm": 0.17524704337120056, + "learning_rate": 4.863124063497579e-06, + "loss": 1.0221, + "step": 18910 + }, + { + "epoch": 0.13695556182906615, + "grad_norm": 0.1778000146150589, + "learning_rate": 4.863051676836993e-06, + "loss": 1.0122, + "step": 18920 + }, + { + "epoch": 0.13702794848965233, + "grad_norm": 0.19337837398052216, + "learning_rate": 4.862979290176406e-06, + "loss": 1.0321, + "step": 18930 + }, + { + "epoch": 0.1371003351502385, + "grad_norm": 0.16937635838985443, + "learning_rate": 4.862906903515821e-06, + "loss": 1.0281, + "step": 18940 + }, + { + "epoch": 0.1371727218108247, + "grad_norm": 0.161665141582489, + "learning_rate": 4.8628345168552345e-06, + "loss": 1.0256, + "step": 18950 + }, + { + "epoch": 0.1372451084714109, + "grad_norm": 0.18332736194133759, + "learning_rate": 4.862762130194648e-06, + "loss": 1.034, + "step": 18960 + }, + { + "epoch": 0.13731749513199706, + "grad_norm": 0.17856566607952118, + "learning_rate": 4.862689743534062e-06, + "loss": 1.0334, + "step": 18970 + }, + { + "epoch": 0.13738988179258327, + "grad_norm": 0.16888518631458282, + "learning_rate": 4.862617356873476e-06, + "loss": 1.0146, + "step": 18980 + }, + { + "epoch": 0.13746226845316944, + "grad_norm": 0.18556654453277588, + "learning_rate": 4.86254497021289e-06, + "loss": 1.0241, + "step": 18990 + }, + { + "epoch": 0.13753465511375565, + "grad_norm": 0.21083232760429382, + "learning_rate": 4.862472583552303e-06, + "loss": 1.0315, + "step": 19000 + }, + { + "epoch": 0.13760704177434183, + "grad_norm": 0.194644495844841, + "learning_rate": 4.862400196891717e-06, + "loss": 1.0312, + "step": 19010 + }, + { + "epoch": 0.137679428434928, + "grad_norm": 0.19104093313217163, + "learning_rate": 4.8623278102311315e-06, + "loss": 1.0296, + "step": 19020 + }, + { + "epoch": 0.1377518150955142, + "grad_norm": 0.16693130135536194, + "learning_rate": 4.862255423570545e-06, + "loss": 1.0217, + "step": 19030 + }, + { + "epoch": 0.13782420175610038, + "grad_norm": 0.18064992129802704, + "learning_rate": 4.862183036909959e-06, + "loss": 1.0308, + "step": 19040 + }, + { + "epoch": 0.13789658841668656, + "grad_norm": 0.19371187686920166, + "learning_rate": 4.862110650249372e-06, + "loss": 1.032, + "step": 19050 + }, + { + "epoch": 0.13796897507727277, + "grad_norm": 0.17446281015872955, + "learning_rate": 4.862038263588787e-06, + "loss": 1.0323, + "step": 19060 + }, + { + "epoch": 0.13804136173785894, + "grad_norm": 0.17750053107738495, + "learning_rate": 4.8619658769282e-06, + "loss": 1.0269, + "step": 19070 + }, + { + "epoch": 0.13811374839844515, + "grad_norm": 0.1809011548757553, + "learning_rate": 4.861893490267614e-06, + "loss": 1.0242, + "step": 19080 + }, + { + "epoch": 0.13818613505903132, + "grad_norm": 0.19166511297225952, + "learning_rate": 4.861821103607028e-06, + "loss": 1.0228, + "step": 19090 + }, + { + "epoch": 0.1382585217196175, + "grad_norm": 0.17810720205307007, + "learning_rate": 4.861748716946442e-06, + "loss": 1.038, + "step": 19100 + }, + { + "epoch": 0.1383309083802037, + "grad_norm": 0.16615994274616241, + "learning_rate": 4.861676330285856e-06, + "loss": 1.027, + "step": 19110 + }, + { + "epoch": 0.13840329504078988, + "grad_norm": 0.16814115643501282, + "learning_rate": 4.861603943625269e-06, + "loss": 1.0254, + "step": 19120 + }, + { + "epoch": 0.13847568170137606, + "grad_norm": 0.1832701861858368, + "learning_rate": 4.861531556964683e-06, + "loss": 1.029, + "step": 19130 + }, + { + "epoch": 0.13854806836196226, + "grad_norm": 0.17128700017929077, + "learning_rate": 4.861459170304097e-06, + "loss": 1.0295, + "step": 19140 + }, + { + "epoch": 0.13862045502254844, + "grad_norm": 0.1750701367855072, + "learning_rate": 4.861386783643511e-06, + "loss": 1.0251, + "step": 19150 + }, + { + "epoch": 0.13869284168313464, + "grad_norm": 0.2172747105360031, + "learning_rate": 4.861314396982925e-06, + "loss": 1.022, + "step": 19160 + }, + { + "epoch": 0.13876522834372082, + "grad_norm": 0.17995861172676086, + "learning_rate": 4.861242010322338e-06, + "loss": 1.0308, + "step": 19170 + }, + { + "epoch": 0.138837615004307, + "grad_norm": 0.17807912826538086, + "learning_rate": 4.861169623661752e-06, + "loss": 1.0221, + "step": 19180 + }, + { + "epoch": 0.1389100016648932, + "grad_norm": 0.17291560769081116, + "learning_rate": 4.861097237001166e-06, + "loss": 1.023, + "step": 19190 + }, + { + "epoch": 0.13898238832547938, + "grad_norm": 0.21126574277877808, + "learning_rate": 4.861024850340579e-06, + "loss": 1.0267, + "step": 19200 + }, + { + "epoch": 0.13905477498606555, + "grad_norm": 0.178102508187294, + "learning_rate": 4.8609524636799936e-06, + "loss": 1.028, + "step": 19210 + }, + { + "epoch": 0.13912716164665176, + "grad_norm": 0.18170644342899323, + "learning_rate": 4.860880077019407e-06, + "loss": 1.0209, + "step": 19220 + }, + { + "epoch": 0.13919954830723794, + "grad_norm": 0.17773956060409546, + "learning_rate": 4.860807690358821e-06, + "loss": 1.0319, + "step": 19230 + }, + { + "epoch": 0.13927193496782414, + "grad_norm": 0.25084516406059265, + "learning_rate": 4.860735303698234e-06, + "loss": 1.027, + "step": 19240 + }, + { + "epoch": 0.13934432162841032, + "grad_norm": 0.18037767708301544, + "learning_rate": 4.860662917037649e-06, + "loss": 1.0325, + "step": 19250 + }, + { + "epoch": 0.1394167082889965, + "grad_norm": 0.1826416552066803, + "learning_rate": 4.8605905303770625e-06, + "loss": 1.0292, + "step": 19260 + }, + { + "epoch": 0.1394890949495827, + "grad_norm": 0.16938439011573792, + "learning_rate": 4.860518143716476e-06, + "loss": 1.0303, + "step": 19270 + }, + { + "epoch": 0.13956148161016887, + "grad_norm": 0.18216344714164734, + "learning_rate": 4.86044575705589e-06, + "loss": 1.0351, + "step": 19280 + }, + { + "epoch": 0.13963386827075505, + "grad_norm": 0.17035633325576782, + "learning_rate": 4.860373370395304e-06, + "loss": 1.0357, + "step": 19290 + }, + { + "epoch": 0.13970625493134126, + "grad_norm": 0.17245067656040192, + "learning_rate": 4.860300983734718e-06, + "loss": 1.0377, + "step": 19300 + }, + { + "epoch": 0.13977864159192743, + "grad_norm": 0.17423754930496216, + "learning_rate": 4.860228597074131e-06, + "loss": 1.0384, + "step": 19310 + }, + { + "epoch": 0.13985102825251364, + "grad_norm": 0.20927661657333374, + "learning_rate": 4.860156210413545e-06, + "loss": 1.0141, + "step": 19320 + }, + { + "epoch": 0.13992341491309981, + "grad_norm": 0.18298327922821045, + "learning_rate": 4.860083823752959e-06, + "loss": 1.0114, + "step": 19330 + }, + { + "epoch": 0.139995801573686, + "grad_norm": 0.1714993566274643, + "learning_rate": 4.860011437092373e-06, + "loss": 1.0275, + "step": 19340 + }, + { + "epoch": 0.1400681882342722, + "grad_norm": 0.1708700954914093, + "learning_rate": 4.859939050431787e-06, + "loss": 1.0364, + "step": 19350 + }, + { + "epoch": 0.14014057489485837, + "grad_norm": 0.18635700643062592, + "learning_rate": 4.8598666637712e-06, + "loss": 1.034, + "step": 19360 + }, + { + "epoch": 0.14021296155544458, + "grad_norm": 0.17284467816352844, + "learning_rate": 4.859794277110614e-06, + "loss": 1.0332, + "step": 19370 + }, + { + "epoch": 0.14028534821603075, + "grad_norm": 0.17173859477043152, + "learning_rate": 4.859721890450028e-06, + "loss": 1.0151, + "step": 19380 + }, + { + "epoch": 0.14035773487661693, + "grad_norm": 0.17994263768196106, + "learning_rate": 4.859649503789442e-06, + "loss": 1.0139, + "step": 19390 + }, + { + "epoch": 0.14043012153720313, + "grad_norm": 0.21482296288013458, + "learning_rate": 4.859577117128856e-06, + "loss": 1.0116, + "step": 19400 + }, + { + "epoch": 0.1405025081977893, + "grad_norm": 0.17747275531291962, + "learning_rate": 4.859504730468269e-06, + "loss": 1.0267, + "step": 19410 + }, + { + "epoch": 0.1405748948583755, + "grad_norm": 0.17190630733966827, + "learning_rate": 4.859432343807684e-06, + "loss": 1.0314, + "step": 19420 + }, + { + "epoch": 0.1406472815189617, + "grad_norm": 0.1733713150024414, + "learning_rate": 4.859359957147097e-06, + "loss": 1.0189, + "step": 19430 + }, + { + "epoch": 0.14071966817954787, + "grad_norm": 0.18654613196849823, + "learning_rate": 4.859287570486511e-06, + "loss": 1.0353, + "step": 19440 + }, + { + "epoch": 0.14079205484013407, + "grad_norm": 0.19242019951343536, + "learning_rate": 4.859215183825925e-06, + "loss": 1.0235, + "step": 19450 + }, + { + "epoch": 0.14086444150072025, + "grad_norm": 0.17690154910087585, + "learning_rate": 4.859142797165339e-06, + "loss": 1.0262, + "step": 19460 + }, + { + "epoch": 0.14093682816130643, + "grad_norm": 0.17208655178546906, + "learning_rate": 4.859070410504753e-06, + "loss": 1.026, + "step": 19470 + }, + { + "epoch": 0.14100921482189263, + "grad_norm": 0.1762927770614624, + "learning_rate": 4.858998023844166e-06, + "loss": 1.0367, + "step": 19480 + }, + { + "epoch": 0.1410816014824788, + "grad_norm": 0.18810449540615082, + "learning_rate": 4.85892563718358e-06, + "loss": 1.0267, + "step": 19490 + }, + { + "epoch": 0.14115398814306498, + "grad_norm": 0.23858876526355743, + "learning_rate": 4.858853250522994e-06, + "loss": 1.033, + "step": 19500 + }, + { + "epoch": 0.1412263748036512, + "grad_norm": 0.186942458152771, + "learning_rate": 4.858780863862408e-06, + "loss": 1.0223, + "step": 19510 + }, + { + "epoch": 0.14129876146423737, + "grad_norm": 0.1946224570274353, + "learning_rate": 4.858708477201822e-06, + "loss": 1.0169, + "step": 19520 + }, + { + "epoch": 0.14137114812482357, + "grad_norm": 0.1714710295200348, + "learning_rate": 4.858636090541235e-06, + "loss": 1.0199, + "step": 19530 + }, + { + "epoch": 0.14144353478540975, + "grad_norm": 0.17258310317993164, + "learning_rate": 4.85856370388065e-06, + "loss": 1.0274, + "step": 19540 + }, + { + "epoch": 0.14151592144599592, + "grad_norm": 0.18983785808086395, + "learning_rate": 4.858491317220063e-06, + "loss": 1.0382, + "step": 19550 + }, + { + "epoch": 0.14158830810658213, + "grad_norm": 0.17009888589382172, + "learning_rate": 4.858418930559477e-06, + "loss": 1.0138, + "step": 19560 + }, + { + "epoch": 0.1416606947671683, + "grad_norm": 0.17958617210388184, + "learning_rate": 4.8583465438988905e-06, + "loss": 1.0258, + "step": 19570 + }, + { + "epoch": 0.14173308142775448, + "grad_norm": 0.17548643052577972, + "learning_rate": 4.858274157238305e-06, + "loss": 1.0213, + "step": 19580 + }, + { + "epoch": 0.14180546808834069, + "grad_norm": 0.16976386308670044, + "learning_rate": 4.858201770577719e-06, + "loss": 1.0292, + "step": 19590 + }, + { + "epoch": 0.14187785474892686, + "grad_norm": 0.16231513023376465, + "learning_rate": 4.858129383917132e-06, + "loss": 1.0299, + "step": 19600 + }, + { + "epoch": 0.14195024140951307, + "grad_norm": 0.17872096598148346, + "learning_rate": 4.858056997256546e-06, + "loss": 1.0275, + "step": 19610 + }, + { + "epoch": 0.14202262807009924, + "grad_norm": 0.17281471192836761, + "learning_rate": 4.85798461059596e-06, + "loss": 1.0325, + "step": 19620 + }, + { + "epoch": 0.14209501473068542, + "grad_norm": 0.19203288853168488, + "learning_rate": 4.857912223935374e-06, + "loss": 1.0231, + "step": 19630 + }, + { + "epoch": 0.14216740139127163, + "grad_norm": 0.1869240552186966, + "learning_rate": 4.8578398372747875e-06, + "loss": 1.0303, + "step": 19640 + }, + { + "epoch": 0.1422397880518578, + "grad_norm": 0.18349246680736542, + "learning_rate": 4.857767450614201e-06, + "loss": 1.051, + "step": 19650 + }, + { + "epoch": 0.14231217471244398, + "grad_norm": 0.16620957851409912, + "learning_rate": 4.857695063953616e-06, + "loss": 1.0302, + "step": 19660 + }, + { + "epoch": 0.14238456137303018, + "grad_norm": 0.16156761348247528, + "learning_rate": 4.857622677293029e-06, + "loss": 1.0181, + "step": 19670 + }, + { + "epoch": 0.14245694803361636, + "grad_norm": 0.19458822906017303, + "learning_rate": 4.857550290632443e-06, + "loss": 1.027, + "step": 19680 + }, + { + "epoch": 0.14252933469420256, + "grad_norm": 0.17083300650119781, + "learning_rate": 4.8574779039718565e-06, + "loss": 1.0365, + "step": 19690 + }, + { + "epoch": 0.14260172135478874, + "grad_norm": 0.1720832884311676, + "learning_rate": 4.857405517311271e-06, + "loss": 1.0303, + "step": 19700 + }, + { + "epoch": 0.14267410801537492, + "grad_norm": 0.17224328219890594, + "learning_rate": 4.8573331306506845e-06, + "loss": 1.0318, + "step": 19710 + }, + { + "epoch": 0.14274649467596112, + "grad_norm": 0.16815780103206635, + "learning_rate": 4.857260743990098e-06, + "loss": 1.041, + "step": 19720 + }, + { + "epoch": 0.1428188813365473, + "grad_norm": 0.19083335995674133, + "learning_rate": 4.857188357329512e-06, + "loss": 1.0303, + "step": 19730 + }, + { + "epoch": 0.14289126799713348, + "grad_norm": 0.167524054646492, + "learning_rate": 4.857115970668925e-06, + "loss": 1.0343, + "step": 19740 + }, + { + "epoch": 0.14296365465771968, + "grad_norm": 0.17465034127235413, + "learning_rate": 4.857043584008339e-06, + "loss": 1.0161, + "step": 19750 + }, + { + "epoch": 0.14303604131830586, + "grad_norm": 0.18341541290283203, + "learning_rate": 4.856971197347753e-06, + "loss": 1.0134, + "step": 19760 + }, + { + "epoch": 0.14310842797889206, + "grad_norm": 0.1703953891992569, + "learning_rate": 4.856898810687167e-06, + "loss": 1.0333, + "step": 19770 + }, + { + "epoch": 0.14318081463947824, + "grad_norm": 0.17636318504810333, + "learning_rate": 4.856826424026581e-06, + "loss": 1.0271, + "step": 19780 + }, + { + "epoch": 0.14325320130006441, + "grad_norm": 0.17411428689956665, + "learning_rate": 4.856754037365994e-06, + "loss": 1.0399, + "step": 19790 + }, + { + "epoch": 0.14332558796065062, + "grad_norm": 0.18185856938362122, + "learning_rate": 4.856681650705408e-06, + "loss": 1.0305, + "step": 19800 + }, + { + "epoch": 0.1433979746212368, + "grad_norm": 0.17615829408168793, + "learning_rate": 4.856609264044822e-06, + "loss": 1.0266, + "step": 19810 + }, + { + "epoch": 0.14347036128182297, + "grad_norm": 0.19287104904651642, + "learning_rate": 4.856536877384236e-06, + "loss": 1.012, + "step": 19820 + }, + { + "epoch": 0.14354274794240918, + "grad_norm": 0.17633087933063507, + "learning_rate": 4.85646449072365e-06, + "loss": 1.0288, + "step": 19830 + }, + { + "epoch": 0.14361513460299535, + "grad_norm": 0.16220209002494812, + "learning_rate": 4.856392104063063e-06, + "loss": 1.0151, + "step": 19840 + }, + { + "epoch": 0.14368752126358156, + "grad_norm": 0.16982801258563995, + "learning_rate": 4.856319717402478e-06, + "loss": 1.0261, + "step": 19850 + }, + { + "epoch": 0.14375990792416773, + "grad_norm": 0.1811235100030899, + "learning_rate": 4.856247330741891e-06, + "loss": 1.0323, + "step": 19860 + }, + { + "epoch": 0.1438322945847539, + "grad_norm": 0.17505569756031036, + "learning_rate": 4.856174944081305e-06, + "loss": 1.0212, + "step": 19870 + }, + { + "epoch": 0.14390468124534012, + "grad_norm": 0.17915651202201843, + "learning_rate": 4.8561025574207186e-06, + "loss": 1.0347, + "step": 19880 + }, + { + "epoch": 0.1439770679059263, + "grad_norm": 0.16607633233070374, + "learning_rate": 4.856030170760133e-06, + "loss": 1.0331, + "step": 19890 + }, + { + "epoch": 0.14404945456651247, + "grad_norm": 0.1832243949174881, + "learning_rate": 4.855957784099547e-06, + "loss": 1.0218, + "step": 19900 + }, + { + "epoch": 0.14412184122709867, + "grad_norm": 0.17596963047981262, + "learning_rate": 4.85588539743896e-06, + "loss": 1.0321, + "step": 19910 + }, + { + "epoch": 0.14419422788768485, + "grad_norm": 0.17509303987026215, + "learning_rate": 4.855813010778374e-06, + "loss": 1.0224, + "step": 19920 + }, + { + "epoch": 0.14426661454827105, + "grad_norm": 0.159066841006279, + "learning_rate": 4.855740624117788e-06, + "loss": 1.0205, + "step": 19930 + }, + { + "epoch": 0.14433900120885723, + "grad_norm": 0.18700556457042694, + "learning_rate": 4.855668237457202e-06, + "loss": 1.0297, + "step": 19940 + }, + { + "epoch": 0.1444113878694434, + "grad_norm": 0.17160135507583618, + "learning_rate": 4.8555958507966156e-06, + "loss": 1.034, + "step": 19950 + }, + { + "epoch": 0.1444837745300296, + "grad_norm": 0.17142252624034882, + "learning_rate": 4.855523464136029e-06, + "loss": 1.0225, + "step": 19960 + }, + { + "epoch": 0.1445561611906158, + "grad_norm": 0.18951663374900818, + "learning_rate": 4.855451077475443e-06, + "loss": 1.0276, + "step": 19970 + }, + { + "epoch": 0.144628547851202, + "grad_norm": 0.1778341382741928, + "learning_rate": 4.855378690814857e-06, + "loss": 1.0116, + "step": 19980 + }, + { + "epoch": 0.14470093451178817, + "grad_norm": 0.1711929589509964, + "learning_rate": 4.855306304154271e-06, + "loss": 1.0469, + "step": 19990 + }, + { + "epoch": 0.14477332117237435, + "grad_norm": 0.1792709082365036, + "learning_rate": 4.8552339174936845e-06, + "loss": 1.0369, + "step": 20000 + }, + { + "epoch": 0.14484570783296055, + "grad_norm": 0.1821603775024414, + "learning_rate": 4.855161530833098e-06, + "loss": 1.0374, + "step": 20010 + }, + { + "epoch": 0.14491809449354673, + "grad_norm": 0.17707721889019012, + "learning_rate": 4.8550891441725126e-06, + "loss": 1.0276, + "step": 20020 + }, + { + "epoch": 0.1449904811541329, + "grad_norm": 0.18702900409698486, + "learning_rate": 4.855016757511926e-06, + "loss": 1.0198, + "step": 20030 + }, + { + "epoch": 0.1450628678147191, + "grad_norm": 0.16952428221702576, + "learning_rate": 4.85494437085134e-06, + "loss": 1.0333, + "step": 20040 + }, + { + "epoch": 0.1451352544753053, + "grad_norm": 0.17305567860603333, + "learning_rate": 4.854871984190753e-06, + "loss": 1.0139, + "step": 20050 + }, + { + "epoch": 0.1452076411358915, + "grad_norm": 0.17391686141490936, + "learning_rate": 4.854799597530168e-06, + "loss": 1.029, + "step": 20060 + }, + { + "epoch": 0.14528002779647767, + "grad_norm": 0.17623725533485413, + "learning_rate": 4.8547272108695815e-06, + "loss": 1.0245, + "step": 20070 + }, + { + "epoch": 0.14535241445706384, + "grad_norm": 0.17333059012889862, + "learning_rate": 4.854654824208995e-06, + "loss": 1.0297, + "step": 20080 + }, + { + "epoch": 0.14542480111765005, + "grad_norm": 0.1761116087436676, + "learning_rate": 4.854582437548409e-06, + "loss": 1.0236, + "step": 20090 + }, + { + "epoch": 0.14549718777823623, + "grad_norm": 0.16909906268119812, + "learning_rate": 4.854510050887823e-06, + "loss": 1.0219, + "step": 20100 + }, + { + "epoch": 0.1455695744388224, + "grad_norm": 0.1722634732723236, + "learning_rate": 4.854437664227237e-06, + "loss": 1.0271, + "step": 20110 + }, + { + "epoch": 0.1456419610994086, + "grad_norm": 0.32046079635620117, + "learning_rate": 4.85436527756665e-06, + "loss": 1.0361, + "step": 20120 + }, + { + "epoch": 0.14571434775999478, + "grad_norm": 0.18109475076198578, + "learning_rate": 4.854292890906064e-06, + "loss": 1.0265, + "step": 20130 + }, + { + "epoch": 0.145786734420581, + "grad_norm": 0.17990946769714355, + "learning_rate": 4.8542205042454785e-06, + "loss": 1.033, + "step": 20140 + }, + { + "epoch": 0.14585912108116716, + "grad_norm": 0.17502890527248383, + "learning_rate": 4.854148117584892e-06, + "loss": 1.0363, + "step": 20150 + }, + { + "epoch": 0.14593150774175334, + "grad_norm": 0.17126423120498657, + "learning_rate": 4.854075730924306e-06, + "loss": 1.0339, + "step": 20160 + }, + { + "epoch": 0.14600389440233955, + "grad_norm": 0.1773746907711029, + "learning_rate": 4.854003344263719e-06, + "loss": 1.038, + "step": 20170 + }, + { + "epoch": 0.14607628106292572, + "grad_norm": 0.1729922592639923, + "learning_rate": 4.853930957603134e-06, + "loss": 1.0151, + "step": 20180 + }, + { + "epoch": 0.1461486677235119, + "grad_norm": 0.1790771633386612, + "learning_rate": 4.8538585709425474e-06, + "loss": 1.0436, + "step": 20190 + }, + { + "epoch": 0.1462210543840981, + "grad_norm": 0.17333535850048065, + "learning_rate": 4.853786184281961e-06, + "loss": 1.0241, + "step": 20200 + }, + { + "epoch": 0.14629344104468428, + "grad_norm": 0.19140805304050446, + "learning_rate": 4.853713797621375e-06, + "loss": 1.0242, + "step": 20210 + }, + { + "epoch": 0.14636582770527048, + "grad_norm": 0.16760674118995667, + "learning_rate": 4.853641410960789e-06, + "loss": 1.0257, + "step": 20220 + }, + { + "epoch": 0.14643821436585666, + "grad_norm": 0.17233441770076752, + "learning_rate": 4.853569024300203e-06, + "loss": 1.0338, + "step": 20230 + }, + { + "epoch": 0.14651060102644284, + "grad_norm": 0.17265230417251587, + "learning_rate": 4.853496637639616e-06, + "loss": 1.037, + "step": 20240 + }, + { + "epoch": 0.14658298768702904, + "grad_norm": 0.17247281968593597, + "learning_rate": 4.85342425097903e-06, + "loss": 1.0141, + "step": 20250 + }, + { + "epoch": 0.14665537434761522, + "grad_norm": 0.1745438128709793, + "learning_rate": 4.853351864318444e-06, + "loss": 1.023, + "step": 20260 + }, + { + "epoch": 0.1467277610082014, + "grad_norm": 0.15753747522830963, + "learning_rate": 4.853279477657857e-06, + "loss": 1.0326, + "step": 20270 + }, + { + "epoch": 0.1468001476687876, + "grad_norm": 0.16413787007331848, + "learning_rate": 4.853207090997271e-06, + "loss": 1.021, + "step": 20280 + }, + { + "epoch": 0.14687253432937378, + "grad_norm": 0.1920785754919052, + "learning_rate": 4.853134704336685e-06, + "loss": 1.039, + "step": 20290 + }, + { + "epoch": 0.14694492098995998, + "grad_norm": 0.1672215759754181, + "learning_rate": 4.853062317676099e-06, + "loss": 1.0227, + "step": 20300 + }, + { + "epoch": 0.14701730765054616, + "grad_norm": 0.17362061142921448, + "learning_rate": 4.8529899310155125e-06, + "loss": 1.0167, + "step": 20310 + }, + { + "epoch": 0.14708969431113234, + "grad_norm": 0.22914515435695648, + "learning_rate": 4.852917544354926e-06, + "loss": 1.0369, + "step": 20320 + }, + { + "epoch": 0.14716208097171854, + "grad_norm": 0.2375790923833847, + "learning_rate": 4.852845157694341e-06, + "loss": 1.0304, + "step": 20330 + }, + { + "epoch": 0.14723446763230472, + "grad_norm": 0.1776515245437622, + "learning_rate": 4.852772771033754e-06, + "loss": 1.0327, + "step": 20340 + }, + { + "epoch": 0.1473068542928909, + "grad_norm": 0.18703965842723846, + "learning_rate": 4.852700384373168e-06, + "loss": 1.0377, + "step": 20350 + }, + { + "epoch": 0.1473792409534771, + "grad_norm": 0.18549805879592896, + "learning_rate": 4.8526279977125814e-06, + "loss": 1.0297, + "step": 20360 + }, + { + "epoch": 0.14745162761406327, + "grad_norm": 0.1732458919286728, + "learning_rate": 4.852555611051996e-06, + "loss": 1.0432, + "step": 20370 + }, + { + "epoch": 0.14752401427464948, + "grad_norm": 0.1937321275472641, + "learning_rate": 4.8524832243914095e-06, + "loss": 1.0219, + "step": 20380 + }, + { + "epoch": 0.14759640093523566, + "grad_norm": 0.2144630402326584, + "learning_rate": 4.852410837730823e-06, + "loss": 1.025, + "step": 20390 + }, + { + "epoch": 0.14766878759582183, + "grad_norm": 0.1818886548280716, + "learning_rate": 4.852338451070237e-06, + "loss": 1.0292, + "step": 20400 + }, + { + "epoch": 0.14774117425640804, + "grad_norm": 0.1875666230916977, + "learning_rate": 4.852266064409651e-06, + "loss": 1.0262, + "step": 20410 + }, + { + "epoch": 0.1478135609169942, + "grad_norm": 0.15505819022655487, + "learning_rate": 4.852193677749065e-06, + "loss": 1.0158, + "step": 20420 + }, + { + "epoch": 0.1478859475775804, + "grad_norm": 0.16805370151996613, + "learning_rate": 4.8521212910884785e-06, + "loss": 1.0337, + "step": 20430 + }, + { + "epoch": 0.1479583342381666, + "grad_norm": 0.18419931828975677, + "learning_rate": 4.852048904427892e-06, + "loss": 1.0314, + "step": 20440 + }, + { + "epoch": 0.14803072089875277, + "grad_norm": 0.1946050375699997, + "learning_rate": 4.8519765177673065e-06, + "loss": 1.0217, + "step": 20450 + }, + { + "epoch": 0.14810310755933898, + "grad_norm": 0.1928732991218567, + "learning_rate": 4.85190413110672e-06, + "loss": 1.0272, + "step": 20460 + }, + { + "epoch": 0.14817549421992515, + "grad_norm": 0.20039114356040955, + "learning_rate": 4.851831744446134e-06, + "loss": 1.0294, + "step": 20470 + }, + { + "epoch": 0.14824788088051133, + "grad_norm": 0.1852181851863861, + "learning_rate": 4.851759357785547e-06, + "loss": 1.0188, + "step": 20480 + }, + { + "epoch": 0.14832026754109753, + "grad_norm": 0.16493256390094757, + "learning_rate": 4.851686971124962e-06, + "loss": 1.0354, + "step": 20490 + }, + { + "epoch": 0.1483926542016837, + "grad_norm": 0.17907851934432983, + "learning_rate": 4.8516145844643755e-06, + "loss": 1.0362, + "step": 20500 + }, + { + "epoch": 0.14846504086226991, + "grad_norm": 0.19379720091819763, + "learning_rate": 4.851542197803789e-06, + "loss": 1.0367, + "step": 20510 + }, + { + "epoch": 0.1485374275228561, + "grad_norm": 0.1685412973165512, + "learning_rate": 4.851469811143203e-06, + "loss": 1.0292, + "step": 20520 + }, + { + "epoch": 0.14860981418344227, + "grad_norm": 0.17223286628723145, + "learning_rate": 4.851397424482617e-06, + "loss": 1.0302, + "step": 20530 + }, + { + "epoch": 0.14868220084402847, + "grad_norm": 0.17199258506298065, + "learning_rate": 4.851325037822031e-06, + "loss": 1.0299, + "step": 20540 + }, + { + "epoch": 0.14875458750461465, + "grad_norm": 0.1630372256040573, + "learning_rate": 4.851252651161444e-06, + "loss": 1.0267, + "step": 20550 + }, + { + "epoch": 0.14882697416520083, + "grad_norm": 0.17937202751636505, + "learning_rate": 4.851180264500858e-06, + "loss": 1.0288, + "step": 20560 + }, + { + "epoch": 0.14889936082578703, + "grad_norm": 0.1685437560081482, + "learning_rate": 4.851107877840272e-06, + "loss": 1.0382, + "step": 20570 + }, + { + "epoch": 0.1489717474863732, + "grad_norm": 0.7421558499336243, + "learning_rate": 4.851035491179686e-06, + "loss": 1.0395, + "step": 20580 + }, + { + "epoch": 0.1490441341469594, + "grad_norm": 0.1732497662305832, + "learning_rate": 4.8509631045191e-06, + "loss": 1.024, + "step": 20590 + }, + { + "epoch": 0.1491165208075456, + "grad_norm": 0.15808378159999847, + "learning_rate": 4.850890717858513e-06, + "loss": 1.0197, + "step": 20600 + }, + { + "epoch": 0.14918890746813177, + "grad_norm": 0.177708238363266, + "learning_rate": 4.850818331197927e-06, + "loss": 1.026, + "step": 20610 + }, + { + "epoch": 0.14926129412871797, + "grad_norm": 0.16309256851673126, + "learning_rate": 4.850745944537341e-06, + "loss": 1.0317, + "step": 20620 + }, + { + "epoch": 0.14933368078930415, + "grad_norm": 0.16510923206806183, + "learning_rate": 4.850673557876755e-06, + "loss": 1.0207, + "step": 20630 + }, + { + "epoch": 0.14940606744989032, + "grad_norm": 0.18781432509422302, + "learning_rate": 4.850601171216169e-06, + "loss": 1.0201, + "step": 20640 + }, + { + "epoch": 0.14947845411047653, + "grad_norm": 0.19044159352779388, + "learning_rate": 4.850528784555582e-06, + "loss": 1.0272, + "step": 20650 + }, + { + "epoch": 0.1495508407710627, + "grad_norm": 0.17361673712730408, + "learning_rate": 4.850456397894997e-06, + "loss": 1.0183, + "step": 20660 + }, + { + "epoch": 0.1496232274316489, + "grad_norm": 0.1650736778974533, + "learning_rate": 4.85038401123441e-06, + "loss": 1.0283, + "step": 20670 + }, + { + "epoch": 0.14969561409223509, + "grad_norm": 0.16908712685108185, + "learning_rate": 4.850311624573824e-06, + "loss": 1.0159, + "step": 20680 + }, + { + "epoch": 0.14976800075282126, + "grad_norm": 0.17872673273086548, + "learning_rate": 4.8502392379132376e-06, + "loss": 1.0202, + "step": 20690 + }, + { + "epoch": 0.14984038741340747, + "grad_norm": 0.16816575825214386, + "learning_rate": 4.850166851252652e-06, + "loss": 1.0228, + "step": 20700 + }, + { + "epoch": 0.14991277407399364, + "grad_norm": 0.16545483469963074, + "learning_rate": 4.850094464592066e-06, + "loss": 1.026, + "step": 20710 + }, + { + "epoch": 0.14998516073457982, + "grad_norm": 0.17488573491573334, + "learning_rate": 4.850022077931479e-06, + "loss": 1.0393, + "step": 20720 + }, + { + "epoch": 0.15005754739516602, + "grad_norm": 0.16280938684940338, + "learning_rate": 4.849949691270893e-06, + "loss": 1.0309, + "step": 20730 + }, + { + "epoch": 0.1501299340557522, + "grad_norm": 0.18939673900604248, + "learning_rate": 4.849877304610307e-06, + "loss": 1.034, + "step": 20740 + }, + { + "epoch": 0.1502023207163384, + "grad_norm": 0.16721650958061218, + "learning_rate": 4.849804917949721e-06, + "loss": 1.0227, + "step": 20750 + }, + { + "epoch": 0.15027470737692458, + "grad_norm": 0.1724279224872589, + "learning_rate": 4.8497325312891346e-06, + "loss": 1.0207, + "step": 20760 + }, + { + "epoch": 0.15034709403751076, + "grad_norm": 0.1731773465871811, + "learning_rate": 4.849660144628548e-06, + "loss": 1.032, + "step": 20770 + }, + { + "epoch": 0.15041948069809696, + "grad_norm": 0.15489158034324646, + "learning_rate": 4.849587757967963e-06, + "loss": 1.0216, + "step": 20780 + }, + { + "epoch": 0.15049186735868314, + "grad_norm": 0.1700417697429657, + "learning_rate": 4.849515371307375e-06, + "loss": 1.0307, + "step": 20790 + }, + { + "epoch": 0.15056425401926932, + "grad_norm": 0.19111177325248718, + "learning_rate": 4.849442984646789e-06, + "loss": 1.0101, + "step": 20800 + }, + { + "epoch": 0.15063664067985552, + "grad_norm": 0.17551042139530182, + "learning_rate": 4.8493705979862035e-06, + "loss": 1.0204, + "step": 20810 + }, + { + "epoch": 0.1507090273404417, + "grad_norm": 0.18322902917861938, + "learning_rate": 4.849298211325617e-06, + "loss": 1.0231, + "step": 20820 + }, + { + "epoch": 0.1507814140010279, + "grad_norm": 0.23962242901325226, + "learning_rate": 4.849225824665031e-06, + "loss": 1.0276, + "step": 20830 + }, + { + "epoch": 0.15085380066161408, + "grad_norm": 0.24165092408657074, + "learning_rate": 4.849153438004444e-06, + "loss": 1.0114, + "step": 20840 + }, + { + "epoch": 0.15092618732220026, + "grad_norm": 0.16115596890449524, + "learning_rate": 4.849081051343859e-06, + "loss": 1.0203, + "step": 20850 + }, + { + "epoch": 0.15099857398278646, + "grad_norm": 0.1693454384803772, + "learning_rate": 4.849008664683272e-06, + "loss": 1.0206, + "step": 20860 + }, + { + "epoch": 0.15107096064337264, + "grad_norm": 0.1848178654909134, + "learning_rate": 4.848936278022686e-06, + "loss": 1.0206, + "step": 20870 + }, + { + "epoch": 0.1511433473039588, + "grad_norm": 0.17669790983200073, + "learning_rate": 4.8488638913621e-06, + "loss": 1.0219, + "step": 20880 + }, + { + "epoch": 0.15121573396454502, + "grad_norm": 0.17501038312911987, + "learning_rate": 4.848791504701514e-06, + "loss": 1.0268, + "step": 20890 + }, + { + "epoch": 0.1512881206251312, + "grad_norm": 0.17689432203769684, + "learning_rate": 4.848719118040928e-06, + "loss": 1.0276, + "step": 20900 + }, + { + "epoch": 0.1513605072857174, + "grad_norm": 0.19285716116428375, + "learning_rate": 4.848646731380341e-06, + "loss": 1.0148, + "step": 20910 + }, + { + "epoch": 0.15143289394630358, + "grad_norm": 0.17144864797592163, + "learning_rate": 4.848574344719755e-06, + "loss": 1.0195, + "step": 20920 + }, + { + "epoch": 0.15150528060688975, + "grad_norm": 0.18200930953025818, + "learning_rate": 4.8485019580591694e-06, + "loss": 1.0325, + "step": 20930 + }, + { + "epoch": 0.15157766726747596, + "grad_norm": 0.17781402170658112, + "learning_rate": 4.848429571398583e-06, + "loss": 1.028, + "step": 20940 + }, + { + "epoch": 0.15165005392806213, + "grad_norm": 0.16629651188850403, + "learning_rate": 4.848357184737997e-06, + "loss": 1.0354, + "step": 20950 + }, + { + "epoch": 0.1517224405886483, + "grad_norm": 0.1842113435268402, + "learning_rate": 4.84828479807741e-06, + "loss": 1.0181, + "step": 20960 + }, + { + "epoch": 0.15179482724923452, + "grad_norm": 0.16665078699588776, + "learning_rate": 4.848212411416825e-06, + "loss": 1.0333, + "step": 20970 + }, + { + "epoch": 0.1518672139098207, + "grad_norm": 0.17545108497142792, + "learning_rate": 4.848140024756238e-06, + "loss": 1.0224, + "step": 20980 + }, + { + "epoch": 0.1519396005704069, + "grad_norm": 0.17534632980823517, + "learning_rate": 4.848067638095652e-06, + "loss": 1.035, + "step": 20990 + }, + { + "epoch": 0.15201198723099307, + "grad_norm": 0.16326074302196503, + "learning_rate": 4.847995251435066e-06, + "loss": 1.0243, + "step": 21000 + }, + { + "epoch": 0.15208437389157925, + "grad_norm": 0.16196899116039276, + "learning_rate": 4.84792286477448e-06, + "loss": 1.0239, + "step": 21010 + }, + { + "epoch": 0.15215676055216545, + "grad_norm": 0.17832061648368835, + "learning_rate": 4.847850478113894e-06, + "loss": 1.0319, + "step": 21020 + }, + { + "epoch": 0.15222914721275163, + "grad_norm": 0.16528917849063873, + "learning_rate": 4.847778091453307e-06, + "loss": 1.024, + "step": 21030 + }, + { + "epoch": 0.1523015338733378, + "grad_norm": 0.18502266705036163, + "learning_rate": 4.847705704792721e-06, + "loss": 1.0254, + "step": 21040 + }, + { + "epoch": 0.152373920533924, + "grad_norm": 0.21463727951049805, + "learning_rate": 4.847633318132135e-06, + "loss": 1.0331, + "step": 21050 + }, + { + "epoch": 0.1524463071945102, + "grad_norm": 0.1813105046749115, + "learning_rate": 4.847560931471549e-06, + "loss": 1.022, + "step": 21060 + }, + { + "epoch": 0.1525186938550964, + "grad_norm": 0.17173029482364655, + "learning_rate": 4.847488544810963e-06, + "loss": 1.0358, + "step": 21070 + }, + { + "epoch": 0.15259108051568257, + "grad_norm": 0.17391446232795715, + "learning_rate": 4.847416158150376e-06, + "loss": 1.0353, + "step": 21080 + }, + { + "epoch": 0.15266346717626875, + "grad_norm": 0.18049128353595734, + "learning_rate": 4.847343771489791e-06, + "loss": 1.0328, + "step": 21090 + }, + { + "epoch": 0.15273585383685495, + "grad_norm": 0.16343386471271515, + "learning_rate": 4.847271384829204e-06, + "loss": 1.0207, + "step": 21100 + }, + { + "epoch": 0.15280824049744113, + "grad_norm": 0.17857620120048523, + "learning_rate": 4.847198998168618e-06, + "loss": 1.0256, + "step": 21110 + }, + { + "epoch": 0.15288062715802733, + "grad_norm": 0.17615753412246704, + "learning_rate": 4.8471266115080315e-06, + "loss": 1.0197, + "step": 21120 + }, + { + "epoch": 0.1529530138186135, + "grad_norm": 0.26710131764411926, + "learning_rate": 4.847054224847446e-06, + "loss": 1.0344, + "step": 21130 + }, + { + "epoch": 0.15302540047919969, + "grad_norm": 0.17339998483657837, + "learning_rate": 4.84698183818686e-06, + "loss": 1.0031, + "step": 21140 + }, + { + "epoch": 0.1530977871397859, + "grad_norm": 0.1859639585018158, + "learning_rate": 4.846909451526273e-06, + "loss": 1.0236, + "step": 21150 + }, + { + "epoch": 0.15317017380037207, + "grad_norm": 0.16950179636478424, + "learning_rate": 4.846837064865687e-06, + "loss": 1.0251, + "step": 21160 + }, + { + "epoch": 0.15324256046095824, + "grad_norm": 0.18173764646053314, + "learning_rate": 4.846764678205101e-06, + "loss": 1.0126, + "step": 21170 + }, + { + "epoch": 0.15331494712154445, + "grad_norm": 0.1840379685163498, + "learning_rate": 4.846692291544515e-06, + "loss": 1.0327, + "step": 21180 + }, + { + "epoch": 0.15338733378213062, + "grad_norm": 0.1827646642923355, + "learning_rate": 4.8466199048839285e-06, + "loss": 1.0287, + "step": 21190 + }, + { + "epoch": 0.15345972044271683, + "grad_norm": 0.20936237275600433, + "learning_rate": 4.846547518223342e-06, + "loss": 1.0154, + "step": 21200 + }, + { + "epoch": 0.153532107103303, + "grad_norm": 0.20079828798770905, + "learning_rate": 4.846475131562756e-06, + "loss": 1.0166, + "step": 21210 + }, + { + "epoch": 0.15360449376388918, + "grad_norm": 0.1706727296113968, + "learning_rate": 4.84640274490217e-06, + "loss": 1.0317, + "step": 21220 + }, + { + "epoch": 0.1536768804244754, + "grad_norm": 0.1730235069990158, + "learning_rate": 4.846330358241584e-06, + "loss": 1.0206, + "step": 21230 + }, + { + "epoch": 0.15374926708506156, + "grad_norm": 0.17368663847446442, + "learning_rate": 4.8462579715809975e-06, + "loss": 1.026, + "step": 21240 + }, + { + "epoch": 0.15382165374564774, + "grad_norm": 0.19320766627788544, + "learning_rate": 4.846185584920411e-06, + "loss": 1.0455, + "step": 21250 + }, + { + "epoch": 0.15389404040623395, + "grad_norm": 0.17245323956012726, + "learning_rate": 4.8461131982598255e-06, + "loss": 1.0254, + "step": 21260 + }, + { + "epoch": 0.15396642706682012, + "grad_norm": 0.28055688738822937, + "learning_rate": 4.846040811599239e-06, + "loss": 1.0158, + "step": 21270 + }, + { + "epoch": 0.15403881372740633, + "grad_norm": 0.1800316721200943, + "learning_rate": 4.845968424938653e-06, + "loss": 1.0278, + "step": 21280 + }, + { + "epoch": 0.1541112003879925, + "grad_norm": 0.16145852208137512, + "learning_rate": 4.845896038278066e-06, + "loss": 1.0221, + "step": 21290 + }, + { + "epoch": 0.15418358704857868, + "grad_norm": 0.16589269042015076, + "learning_rate": 4.845823651617481e-06, + "loss": 1.029, + "step": 21300 + }, + { + "epoch": 0.15425597370916488, + "grad_norm": 0.17117495834827423, + "learning_rate": 4.8457512649568945e-06, + "loss": 1.012, + "step": 21310 + }, + { + "epoch": 0.15432836036975106, + "grad_norm": 0.16930687427520752, + "learning_rate": 4.845678878296308e-06, + "loss": 1.0132, + "step": 21320 + }, + { + "epoch": 0.15440074703033724, + "grad_norm": 0.15988081693649292, + "learning_rate": 4.845606491635722e-06, + "loss": 1.0226, + "step": 21330 + }, + { + "epoch": 0.15447313369092344, + "grad_norm": 0.16869573295116425, + "learning_rate": 4.845534104975135e-06, + "loss": 1.0183, + "step": 21340 + }, + { + "epoch": 0.15454552035150962, + "grad_norm": 0.1683894544839859, + "learning_rate": 4.845461718314549e-06, + "loss": 1.02, + "step": 21350 + }, + { + "epoch": 0.15461790701209582, + "grad_norm": 0.16887131333351135, + "learning_rate": 4.8453893316539625e-06, + "loss": 1.0191, + "step": 21360 + }, + { + "epoch": 0.154690293672682, + "grad_norm": 0.1669628620147705, + "learning_rate": 4.845316944993377e-06, + "loss": 1.0215, + "step": 21370 + }, + { + "epoch": 0.15476268033326818, + "grad_norm": 0.16394099593162537, + "learning_rate": 4.845244558332791e-06, + "loss": 1.0435, + "step": 21380 + }, + { + "epoch": 0.15483506699385438, + "grad_norm": 0.298396497964859, + "learning_rate": 4.845172171672204e-06, + "loss": 1.0062, + "step": 21390 + }, + { + "epoch": 0.15490745365444056, + "grad_norm": 0.1778578758239746, + "learning_rate": 4.845099785011618e-06, + "loss": 1.0383, + "step": 21400 + }, + { + "epoch": 0.15497984031502673, + "grad_norm": 0.16672207415103912, + "learning_rate": 4.845027398351032e-06, + "loss": 1.0257, + "step": 21410 + }, + { + "epoch": 0.15505222697561294, + "grad_norm": 0.1904023140668869, + "learning_rate": 4.844955011690446e-06, + "loss": 1.0183, + "step": 21420 + }, + { + "epoch": 0.15512461363619912, + "grad_norm": 0.19985061883926392, + "learning_rate": 4.8448826250298596e-06, + "loss": 1.0243, + "step": 21430 + }, + { + "epoch": 0.15519700029678532, + "grad_norm": 0.18588073551654816, + "learning_rate": 4.844810238369273e-06, + "loss": 1.0255, + "step": 21440 + }, + { + "epoch": 0.1552693869573715, + "grad_norm": 0.18870490789413452, + "learning_rate": 4.844737851708688e-06, + "loss": 1.0333, + "step": 21450 + }, + { + "epoch": 0.15534177361795767, + "grad_norm": 0.1897239238023758, + "learning_rate": 4.844665465048101e-06, + "loss": 1.0228, + "step": 21460 + }, + { + "epoch": 0.15541416027854388, + "grad_norm": 0.2040310651063919, + "learning_rate": 4.844593078387515e-06, + "loss": 1.0144, + "step": 21470 + }, + { + "epoch": 0.15548654693913005, + "grad_norm": 0.18327052891254425, + "learning_rate": 4.8445206917269285e-06, + "loss": 1.0259, + "step": 21480 + }, + { + "epoch": 0.15555893359971623, + "grad_norm": 0.2118920236825943, + "learning_rate": 4.844448305066343e-06, + "loss": 1.0299, + "step": 21490 + }, + { + "epoch": 0.15563132026030244, + "grad_norm": 0.17221951484680176, + "learning_rate": 4.8443759184057566e-06, + "loss": 1.0316, + "step": 21500 + }, + { + "epoch": 0.1557037069208886, + "grad_norm": 0.1662265807390213, + "learning_rate": 4.84430353174517e-06, + "loss": 1.0094, + "step": 21510 + }, + { + "epoch": 0.15577609358147482, + "grad_norm": 0.18612660467624664, + "learning_rate": 4.844231145084584e-06, + "loss": 1.0185, + "step": 21520 + }, + { + "epoch": 0.155848480242061, + "grad_norm": 0.17847616970539093, + "learning_rate": 4.844158758423998e-06, + "loss": 1.0138, + "step": 21530 + }, + { + "epoch": 0.15592086690264717, + "grad_norm": 0.17742417752742767, + "learning_rate": 4.844086371763412e-06, + "loss": 1.0129, + "step": 21540 + }, + { + "epoch": 0.15599325356323337, + "grad_norm": 0.17097236216068268, + "learning_rate": 4.8440139851028255e-06, + "loss": 1.0194, + "step": 21550 + }, + { + "epoch": 0.15606564022381955, + "grad_norm": 0.2134924679994583, + "learning_rate": 4.843941598442239e-06, + "loss": 1.0192, + "step": 21560 + }, + { + "epoch": 0.15613802688440573, + "grad_norm": 0.17918936908245087, + "learning_rate": 4.8438692117816536e-06, + "loss": 1.0286, + "step": 21570 + }, + { + "epoch": 0.15621041354499193, + "grad_norm": 0.16962049901485443, + "learning_rate": 4.843796825121067e-06, + "loss": 1.0243, + "step": 21580 + }, + { + "epoch": 0.1562828002055781, + "grad_norm": 0.18653210997581482, + "learning_rate": 4.843724438460481e-06, + "loss": 1.0321, + "step": 21590 + }, + { + "epoch": 0.15635518686616431, + "grad_norm": 0.1704837828874588, + "learning_rate": 4.843652051799894e-06, + "loss": 1.031, + "step": 21600 + }, + { + "epoch": 0.1564275735267505, + "grad_norm": 0.18651624023914337, + "learning_rate": 4.843579665139309e-06, + "loss": 1.0029, + "step": 21610 + }, + { + "epoch": 0.15649996018733667, + "grad_norm": 0.18372347950935364, + "learning_rate": 4.8435072784787225e-06, + "loss": 1.0356, + "step": 21620 + }, + { + "epoch": 0.15657234684792287, + "grad_norm": 0.18019145727157593, + "learning_rate": 4.843434891818136e-06, + "loss": 1.0289, + "step": 21630 + }, + { + "epoch": 0.15664473350850905, + "grad_norm": 0.1659417301416397, + "learning_rate": 4.84336250515755e-06, + "loss": 1.0187, + "step": 21640 + }, + { + "epoch": 0.15671712016909523, + "grad_norm": 0.18040567636489868, + "learning_rate": 4.843290118496964e-06, + "loss": 1.0321, + "step": 21650 + }, + { + "epoch": 0.15678950682968143, + "grad_norm": 0.1708545982837677, + "learning_rate": 4.843217731836378e-06, + "loss": 1.0207, + "step": 21660 + }, + { + "epoch": 0.1568618934902676, + "grad_norm": 0.16297031939029694, + "learning_rate": 4.8431453451757914e-06, + "loss": 1.0146, + "step": 21670 + }, + { + "epoch": 0.1569342801508538, + "grad_norm": 0.19065815210342407, + "learning_rate": 4.843072958515205e-06, + "loss": 1.0256, + "step": 21680 + }, + { + "epoch": 0.15700666681144, + "grad_norm": 0.17626740038394928, + "learning_rate": 4.8430005718546195e-06, + "loss": 1.025, + "step": 21690 + }, + { + "epoch": 0.15707905347202616, + "grad_norm": 0.1907099336385727, + "learning_rate": 4.842928185194033e-06, + "loss": 1.0163, + "step": 21700 + }, + { + "epoch": 0.15715144013261237, + "grad_norm": 0.1677362322807312, + "learning_rate": 4.842855798533447e-06, + "loss": 1.0341, + "step": 21710 + }, + { + "epoch": 0.15722382679319855, + "grad_norm": 0.18757909536361694, + "learning_rate": 4.84278341187286e-06, + "loss": 1.0201, + "step": 21720 + }, + { + "epoch": 0.15729621345378475, + "grad_norm": 0.16965140402317047, + "learning_rate": 4.842711025212275e-06, + "loss": 1.0217, + "step": 21730 + }, + { + "epoch": 0.15736860011437093, + "grad_norm": 0.16649499535560608, + "learning_rate": 4.8426386385516884e-06, + "loss": 1.0142, + "step": 21740 + }, + { + "epoch": 0.1574409867749571, + "grad_norm": 0.1895909309387207, + "learning_rate": 4.842566251891102e-06, + "loss": 1.017, + "step": 21750 + }, + { + "epoch": 0.1575133734355433, + "grad_norm": 0.17109711468219757, + "learning_rate": 4.842493865230516e-06, + "loss": 1.0203, + "step": 21760 + }, + { + "epoch": 0.15758576009612948, + "grad_norm": 0.16221435368061066, + "learning_rate": 4.84242147856993e-06, + "loss": 1.0098, + "step": 21770 + }, + { + "epoch": 0.15765814675671566, + "grad_norm": 0.17318131029605865, + "learning_rate": 4.842349091909344e-06, + "loss": 1.0307, + "step": 21780 + }, + { + "epoch": 0.15773053341730187, + "grad_norm": 0.17340058088302612, + "learning_rate": 4.842276705248757e-06, + "loss": 1.021, + "step": 21790 + }, + { + "epoch": 0.15780292007788804, + "grad_norm": 0.16992861032485962, + "learning_rate": 4.842204318588171e-06, + "loss": 1.0237, + "step": 21800 + }, + { + "epoch": 0.15787530673847425, + "grad_norm": 0.17952974140644073, + "learning_rate": 4.842131931927585e-06, + "loss": 1.0169, + "step": 21810 + }, + { + "epoch": 0.15794769339906042, + "grad_norm": 0.19869837164878845, + "learning_rate": 4.842059545266999e-06, + "loss": 1.0267, + "step": 21820 + }, + { + "epoch": 0.1580200800596466, + "grad_norm": 0.1871790587902069, + "learning_rate": 4.841987158606413e-06, + "loss": 1.017, + "step": 21830 + }, + { + "epoch": 0.1580924667202328, + "grad_norm": 0.183842271566391, + "learning_rate": 4.841914771945826e-06, + "loss": 1.0101, + "step": 21840 + }, + { + "epoch": 0.15816485338081898, + "grad_norm": 0.17067475616931915, + "learning_rate": 4.84184238528524e-06, + "loss": 1.0251, + "step": 21850 + }, + { + "epoch": 0.15823724004140516, + "grad_norm": 0.17248547077178955, + "learning_rate": 4.8417699986246535e-06, + "loss": 1.0154, + "step": 21860 + }, + { + "epoch": 0.15830962670199136, + "grad_norm": 0.17312128841876984, + "learning_rate": 4.841697611964067e-06, + "loss": 1.0098, + "step": 21870 + }, + { + "epoch": 0.15838201336257754, + "grad_norm": 0.17725808918476105, + "learning_rate": 4.841625225303482e-06, + "loss": 1.016, + "step": 21880 + }, + { + "epoch": 0.15845440002316374, + "grad_norm": 0.16956385970115662, + "learning_rate": 4.841552838642895e-06, + "loss": 1.0214, + "step": 21890 + }, + { + "epoch": 0.15852678668374992, + "grad_norm": 0.16064795851707458, + "learning_rate": 4.841480451982309e-06, + "loss": 1.0117, + "step": 21900 + }, + { + "epoch": 0.1585991733443361, + "grad_norm": 0.1749652624130249, + "learning_rate": 4.8414080653217225e-06, + "loss": 1.0257, + "step": 21910 + }, + { + "epoch": 0.1586715600049223, + "grad_norm": 0.18944776058197021, + "learning_rate": 4.841335678661137e-06, + "loss": 0.9973, + "step": 21920 + }, + { + "epoch": 0.15874394666550848, + "grad_norm": 0.24421219527721405, + "learning_rate": 4.8412632920005505e-06, + "loss": 1.0302, + "step": 21930 + }, + { + "epoch": 0.15881633332609466, + "grad_norm": 0.1638653576374054, + "learning_rate": 4.841190905339964e-06, + "loss": 1.0135, + "step": 21940 + }, + { + "epoch": 0.15888871998668086, + "grad_norm": 0.18156132102012634, + "learning_rate": 4.841118518679378e-06, + "loss": 1.0098, + "step": 21950 + }, + { + "epoch": 0.15896110664726704, + "grad_norm": 0.170707106590271, + "learning_rate": 4.841046132018792e-06, + "loss": 1.0142, + "step": 21960 + }, + { + "epoch": 0.15903349330785324, + "grad_norm": 0.17522519826889038, + "learning_rate": 4.840973745358206e-06, + "loss": 1.0156, + "step": 21970 + }, + { + "epoch": 0.15910587996843942, + "grad_norm": 0.17544645071029663, + "learning_rate": 4.8409013586976195e-06, + "loss": 1.0216, + "step": 21980 + }, + { + "epoch": 0.1591782666290256, + "grad_norm": 0.16112075746059418, + "learning_rate": 4.840828972037033e-06, + "loss": 1.0137, + "step": 21990 + }, + { + "epoch": 0.1592506532896118, + "grad_norm": 0.166812926530838, + "learning_rate": 4.840756585376447e-06, + "loss": 1.0234, + "step": 22000 + }, + { + "epoch": 0.15932303995019798, + "grad_norm": 0.19277237355709076, + "learning_rate": 4.840684198715861e-06, + "loss": 1.0112, + "step": 22010 + }, + { + "epoch": 0.15939542661078415, + "grad_norm": 0.18085378408432007, + "learning_rate": 4.840611812055275e-06, + "loss": 1.0177, + "step": 22020 + }, + { + "epoch": 0.15946781327137036, + "grad_norm": 0.2057359516620636, + "learning_rate": 4.840539425394688e-06, + "loss": 1.0294, + "step": 22030 + }, + { + "epoch": 0.15954019993195653, + "grad_norm": 0.165378138422966, + "learning_rate": 4.840467038734102e-06, + "loss": 1.0127, + "step": 22040 + }, + { + "epoch": 0.15961258659254274, + "grad_norm": 0.18065567314624786, + "learning_rate": 4.8403946520735165e-06, + "loss": 1.0238, + "step": 22050 + }, + { + "epoch": 0.15968497325312891, + "grad_norm": 0.1699487715959549, + "learning_rate": 4.84032226541293e-06, + "loss": 1.0217, + "step": 22060 + }, + { + "epoch": 0.1597573599137151, + "grad_norm": 0.22719596326351166, + "learning_rate": 4.840249878752344e-06, + "loss": 1.032, + "step": 22070 + }, + { + "epoch": 0.1598297465743013, + "grad_norm": 0.17852869629859924, + "learning_rate": 4.840177492091757e-06, + "loss": 1.0022, + "step": 22080 + }, + { + "epoch": 0.15990213323488747, + "grad_norm": 0.18880029022693634, + "learning_rate": 4.840105105431172e-06, + "loss": 1.0214, + "step": 22090 + }, + { + "epoch": 0.15997451989547365, + "grad_norm": 0.15838246047496796, + "learning_rate": 4.840032718770585e-06, + "loss": 1.0162, + "step": 22100 + }, + { + "epoch": 0.16004690655605985, + "grad_norm": 0.15795861184597015, + "learning_rate": 4.839960332109999e-06, + "loss": 1.0199, + "step": 22110 + }, + { + "epoch": 0.16011929321664603, + "grad_norm": 0.2182161509990692, + "learning_rate": 4.839887945449413e-06, + "loss": 1.0107, + "step": 22120 + }, + { + "epoch": 0.16019167987723223, + "grad_norm": 0.16949397325515747, + "learning_rate": 4.839815558788827e-06, + "loss": 1.0171, + "step": 22130 + }, + { + "epoch": 0.1602640665378184, + "grad_norm": 0.16689515113830566, + "learning_rate": 4.839743172128241e-06, + "loss": 1.0128, + "step": 22140 + }, + { + "epoch": 0.1603364531984046, + "grad_norm": 0.19180209934711456, + "learning_rate": 4.839670785467654e-06, + "loss": 1.0162, + "step": 22150 + }, + { + "epoch": 0.1604088398589908, + "grad_norm": 0.16757026314735413, + "learning_rate": 4.839598398807068e-06, + "loss": 1.0052, + "step": 22160 + }, + { + "epoch": 0.16048122651957697, + "grad_norm": 0.1716785430908203, + "learning_rate": 4.839526012146482e-06, + "loss": 1.0202, + "step": 22170 + }, + { + "epoch": 0.16055361318016315, + "grad_norm": 0.17269423604011536, + "learning_rate": 4.839453625485896e-06, + "loss": 1.0103, + "step": 22180 + }, + { + "epoch": 0.16062599984074935, + "grad_norm": 0.22084921598434448, + "learning_rate": 4.83938123882531e-06, + "loss": 1.0143, + "step": 22190 + }, + { + "epoch": 0.16069838650133553, + "grad_norm": 0.18779946863651276, + "learning_rate": 4.839308852164723e-06, + "loss": 1.016, + "step": 22200 + }, + { + "epoch": 0.16077077316192173, + "grad_norm": 0.17556487023830414, + "learning_rate": 4.839236465504138e-06, + "loss": 1.0178, + "step": 22210 + }, + { + "epoch": 0.1608431598225079, + "grad_norm": 0.164267897605896, + "learning_rate": 4.839164078843551e-06, + "loss": 1.0224, + "step": 22220 + }, + { + "epoch": 0.16091554648309409, + "grad_norm": 0.20508578419685364, + "learning_rate": 4.839091692182965e-06, + "loss": 1.0087, + "step": 22230 + }, + { + "epoch": 0.1609879331436803, + "grad_norm": 0.46417108178138733, + "learning_rate": 4.8390193055223786e-06, + "loss": 1.0203, + "step": 22240 + }, + { + "epoch": 0.16106031980426647, + "grad_norm": 0.19800300896167755, + "learning_rate": 4.838946918861793e-06, + "loss": 1.0203, + "step": 22250 + }, + { + "epoch": 0.16113270646485267, + "grad_norm": 0.17719148099422455, + "learning_rate": 4.838874532201207e-06, + "loss": 1.0306, + "step": 22260 + }, + { + "epoch": 0.16120509312543885, + "grad_norm": 0.16901084780693054, + "learning_rate": 4.83880214554062e-06, + "loss": 1.014, + "step": 22270 + }, + { + "epoch": 0.16127747978602502, + "grad_norm": 0.1782984584569931, + "learning_rate": 4.838729758880034e-06, + "loss": 1.0268, + "step": 22280 + }, + { + "epoch": 0.16134986644661123, + "grad_norm": 0.18408305943012238, + "learning_rate": 4.838657372219448e-06, + "loss": 1.0163, + "step": 22290 + }, + { + "epoch": 0.1614222531071974, + "grad_norm": 0.16512493789196014, + "learning_rate": 4.838584985558862e-06, + "loss": 1.0251, + "step": 22300 + }, + { + "epoch": 0.16149463976778358, + "grad_norm": 0.180845245718956, + "learning_rate": 4.8385125988982756e-06, + "loss": 1.019, + "step": 22310 + }, + { + "epoch": 0.1615670264283698, + "grad_norm": 0.17334243655204773, + "learning_rate": 4.838440212237689e-06, + "loss": 1.0123, + "step": 22320 + }, + { + "epoch": 0.16163941308895596, + "grad_norm": 0.16331173479557037, + "learning_rate": 4.838367825577104e-06, + "loss": 1.0139, + "step": 22330 + }, + { + "epoch": 0.16171179974954217, + "grad_norm": 0.17761701345443726, + "learning_rate": 4.838295438916517e-06, + "loss": 1.0381, + "step": 22340 + }, + { + "epoch": 0.16178418641012834, + "grad_norm": 0.162800133228302, + "learning_rate": 4.838223052255931e-06, + "loss": 1.026, + "step": 22350 + }, + { + "epoch": 0.16185657307071452, + "grad_norm": 0.18399173021316528, + "learning_rate": 4.8381506655953445e-06, + "loss": 1.0155, + "step": 22360 + }, + { + "epoch": 0.16192895973130073, + "grad_norm": 0.18210694193840027, + "learning_rate": 4.838078278934759e-06, + "loss": 1.0326, + "step": 22370 + }, + { + "epoch": 0.1620013463918869, + "grad_norm": 0.19371944665908813, + "learning_rate": 4.838005892274173e-06, + "loss": 1.0091, + "step": 22380 + }, + { + "epoch": 0.16207373305247308, + "grad_norm": 0.2175704389810562, + "learning_rate": 4.837933505613585e-06, + "loss": 1.0094, + "step": 22390 + }, + { + "epoch": 0.16214611971305928, + "grad_norm": 0.17889146506786346, + "learning_rate": 4.837861118953e-06, + "loss": 1.0349, + "step": 22400 + }, + { + "epoch": 0.16221850637364546, + "grad_norm": 0.17176759243011475, + "learning_rate": 4.8377887322924134e-06, + "loss": 1.0191, + "step": 22410 + }, + { + "epoch": 0.16229089303423166, + "grad_norm": 0.17913545668125153, + "learning_rate": 4.837716345631827e-06, + "loss": 1.0136, + "step": 22420 + }, + { + "epoch": 0.16236327969481784, + "grad_norm": 0.17989039421081543, + "learning_rate": 4.837643958971241e-06, + "loss": 1.0442, + "step": 22430 + }, + { + "epoch": 0.16243566635540402, + "grad_norm": 0.165592759847641, + "learning_rate": 4.837571572310655e-06, + "loss": 1.0248, + "step": 22440 + }, + { + "epoch": 0.16250805301599022, + "grad_norm": 0.179484561085701, + "learning_rate": 4.837499185650069e-06, + "loss": 1.0218, + "step": 22450 + }, + { + "epoch": 0.1625804396765764, + "grad_norm": 0.17062394320964813, + "learning_rate": 4.837426798989482e-06, + "loss": 1.0168, + "step": 22460 + }, + { + "epoch": 0.16265282633716258, + "grad_norm": 0.1736491620540619, + "learning_rate": 4.837354412328896e-06, + "loss": 1.0122, + "step": 22470 + }, + { + "epoch": 0.16272521299774878, + "grad_norm": 0.17264223098754883, + "learning_rate": 4.8372820256683104e-06, + "loss": 1.0215, + "step": 22480 + }, + { + "epoch": 0.16279759965833496, + "grad_norm": 0.18589048087596893, + "learning_rate": 4.837209639007724e-06, + "loss": 1.0197, + "step": 22490 + }, + { + "epoch": 0.16286998631892116, + "grad_norm": 0.16932156682014465, + "learning_rate": 4.837137252347138e-06, + "loss": 1.0257, + "step": 22500 + }, + { + "epoch": 0.16294237297950734, + "grad_norm": 0.17345447838306427, + "learning_rate": 4.837064865686551e-06, + "loss": 1.0228, + "step": 22510 + }, + { + "epoch": 0.16301475964009352, + "grad_norm": 0.16551077365875244, + "learning_rate": 4.836992479025966e-06, + "loss": 1.0266, + "step": 22520 + }, + { + "epoch": 0.16308714630067972, + "grad_norm": 0.1778588742017746, + "learning_rate": 4.836920092365379e-06, + "loss": 1.0194, + "step": 22530 + }, + { + "epoch": 0.1631595329612659, + "grad_norm": 0.16605743765830994, + "learning_rate": 4.836847705704793e-06, + "loss": 1.0151, + "step": 22540 + }, + { + "epoch": 0.16323191962185207, + "grad_norm": 0.18560338020324707, + "learning_rate": 4.836775319044207e-06, + "loss": 1.017, + "step": 22550 + }, + { + "epoch": 0.16330430628243828, + "grad_norm": 0.1800818145275116, + "learning_rate": 4.836702932383621e-06, + "loss": 1.0375, + "step": 22560 + }, + { + "epoch": 0.16337669294302445, + "grad_norm": 0.20372332632541656, + "learning_rate": 4.836630545723035e-06, + "loss": 1.0101, + "step": 22570 + }, + { + "epoch": 0.16344907960361066, + "grad_norm": 0.17202037572860718, + "learning_rate": 4.836558159062448e-06, + "loss": 1.0169, + "step": 22580 + }, + { + "epoch": 0.16352146626419684, + "grad_norm": 0.17716839909553528, + "learning_rate": 4.836485772401862e-06, + "loss": 1.0207, + "step": 22590 + }, + { + "epoch": 0.163593852924783, + "grad_norm": 0.18483179807662964, + "learning_rate": 4.836413385741276e-06, + "loss": 1.004, + "step": 22600 + }, + { + "epoch": 0.16366623958536922, + "grad_norm": 0.18177522718906403, + "learning_rate": 4.83634099908069e-06, + "loss": 1.005, + "step": 22610 + }, + { + "epoch": 0.1637386262459554, + "grad_norm": 0.1648116558790207, + "learning_rate": 4.836268612420104e-06, + "loss": 1.0109, + "step": 22620 + }, + { + "epoch": 0.16381101290654157, + "grad_norm": 0.169694185256958, + "learning_rate": 4.836196225759517e-06, + "loss": 1.0065, + "step": 22630 + }, + { + "epoch": 0.16388339956712777, + "grad_norm": 0.1901792734861374, + "learning_rate": 4.836123839098931e-06, + "loss": 1.0206, + "step": 22640 + }, + { + "epoch": 0.16395578622771395, + "grad_norm": 0.1724863350391388, + "learning_rate": 4.836051452438345e-06, + "loss": 1.0276, + "step": 22650 + }, + { + "epoch": 0.16402817288830016, + "grad_norm": 0.17653776705265045, + "learning_rate": 4.835979065777759e-06, + "loss": 1.0317, + "step": 22660 + }, + { + "epoch": 0.16410055954888633, + "grad_norm": 0.1690499484539032, + "learning_rate": 4.8359066791171725e-06, + "loss": 1.0252, + "step": 22670 + }, + { + "epoch": 0.1641729462094725, + "grad_norm": 0.1637679785490036, + "learning_rate": 4.835834292456586e-06, + "loss": 1.0172, + "step": 22680 + }, + { + "epoch": 0.1642453328700587, + "grad_norm": 0.17211398482322693, + "learning_rate": 4.835761905796001e-06, + "loss": 1.018, + "step": 22690 + }, + { + "epoch": 0.1643177195306449, + "grad_norm": 0.18994399905204773, + "learning_rate": 4.835689519135414e-06, + "loss": 1.0102, + "step": 22700 + }, + { + "epoch": 0.16439010619123107, + "grad_norm": 0.17697584629058838, + "learning_rate": 4.835617132474828e-06, + "loss": 1.0071, + "step": 22710 + }, + { + "epoch": 0.16446249285181727, + "grad_norm": 0.17998354136943817, + "learning_rate": 4.8355447458142415e-06, + "loss": 1.0074, + "step": 22720 + }, + { + "epoch": 0.16453487951240345, + "grad_norm": 0.17211481928825378, + "learning_rate": 4.835472359153656e-06, + "loss": 1.0121, + "step": 22730 + }, + { + "epoch": 0.16460726617298965, + "grad_norm": 0.19927141070365906, + "learning_rate": 4.8353999724930695e-06, + "loss": 1.026, + "step": 22740 + }, + { + "epoch": 0.16467965283357583, + "grad_norm": 0.18247176706790924, + "learning_rate": 4.835327585832483e-06, + "loss": 1.0036, + "step": 22750 + }, + { + "epoch": 0.164752039494162, + "grad_norm": 0.1697113811969757, + "learning_rate": 4.835255199171897e-06, + "loss": 1.011, + "step": 22760 + }, + { + "epoch": 0.1648244261547482, + "grad_norm": 0.1692165732383728, + "learning_rate": 4.835182812511311e-06, + "loss": 1.0219, + "step": 22770 + }, + { + "epoch": 0.1648968128153344, + "grad_norm": 0.18225090205669403, + "learning_rate": 4.835110425850725e-06, + "loss": 1.0236, + "step": 22780 + }, + { + "epoch": 0.16496919947592056, + "grad_norm": 0.17765475809574127, + "learning_rate": 4.8350380391901385e-06, + "loss": 1.0097, + "step": 22790 + }, + { + "epoch": 0.16504158613650677, + "grad_norm": 0.17133235931396484, + "learning_rate": 4.834965652529552e-06, + "loss": 1.0139, + "step": 22800 + }, + { + "epoch": 0.16511397279709294, + "grad_norm": 0.1649719774723053, + "learning_rate": 4.8348932658689665e-06, + "loss": 1.0256, + "step": 22810 + }, + { + "epoch": 0.16518635945767915, + "grad_norm": 0.1826830953359604, + "learning_rate": 4.83482087920838e-06, + "loss": 1.0307, + "step": 22820 + }, + { + "epoch": 0.16525874611826533, + "grad_norm": 0.16446372866630554, + "learning_rate": 4.834748492547794e-06, + "loss": 1.0148, + "step": 22830 + }, + { + "epoch": 0.1653311327788515, + "grad_norm": 0.16916634142398834, + "learning_rate": 4.834676105887207e-06, + "loss": 1.0137, + "step": 22840 + }, + { + "epoch": 0.1654035194394377, + "grad_norm": 0.18869148194789886, + "learning_rate": 4.834603719226622e-06, + "loss": 1.0212, + "step": 22850 + }, + { + "epoch": 0.16547590610002388, + "grad_norm": 0.2165064662694931, + "learning_rate": 4.8345313325660355e-06, + "loss": 1.0222, + "step": 22860 + }, + { + "epoch": 0.1655482927606101, + "grad_norm": 0.17655514180660248, + "learning_rate": 4.834458945905449e-06, + "loss": 1.0227, + "step": 22870 + }, + { + "epoch": 0.16562067942119627, + "grad_norm": 0.17349255084991455, + "learning_rate": 4.834386559244863e-06, + "loss": 1.0176, + "step": 22880 + }, + { + "epoch": 0.16569306608178244, + "grad_norm": 0.1745917946100235, + "learning_rate": 4.834314172584277e-06, + "loss": 1.0105, + "step": 22890 + }, + { + "epoch": 0.16576545274236865, + "grad_norm": 0.17585240304470062, + "learning_rate": 4.834241785923691e-06, + "loss": 1.0113, + "step": 22900 + }, + { + "epoch": 0.16583783940295482, + "grad_norm": 0.1638375222682953, + "learning_rate": 4.834169399263104e-06, + "loss": 1.0205, + "step": 22910 + }, + { + "epoch": 0.165910226063541, + "grad_norm": 0.24316856265068054, + "learning_rate": 4.834097012602518e-06, + "loss": 1.0187, + "step": 22920 + }, + { + "epoch": 0.1659826127241272, + "grad_norm": 0.18205313384532928, + "learning_rate": 4.834024625941932e-06, + "loss": 1.0046, + "step": 22930 + }, + { + "epoch": 0.16605499938471338, + "grad_norm": 0.16292521357536316, + "learning_rate": 4.833952239281345e-06, + "loss": 1.0175, + "step": 22940 + }, + { + "epoch": 0.16612738604529959, + "grad_norm": 0.1718023717403412, + "learning_rate": 4.833879852620759e-06, + "loss": 1.0144, + "step": 22950 + }, + { + "epoch": 0.16619977270588576, + "grad_norm": 0.17306740581989288, + "learning_rate": 4.833807465960173e-06, + "loss": 1.0277, + "step": 22960 + }, + { + "epoch": 0.16627215936647194, + "grad_norm": 0.2184477001428604, + "learning_rate": 4.833735079299587e-06, + "loss": 1.0086, + "step": 22970 + }, + { + "epoch": 0.16634454602705814, + "grad_norm": 0.18039968609809875, + "learning_rate": 4.8336626926390006e-06, + "loss": 1.0144, + "step": 22980 + }, + { + "epoch": 0.16641693268764432, + "grad_norm": 0.1612757295370102, + "learning_rate": 4.833590305978414e-06, + "loss": 1.0096, + "step": 22990 + }, + { + "epoch": 0.1664893193482305, + "grad_norm": 0.16724608838558197, + "learning_rate": 4.833517919317829e-06, + "loss": 1.0224, + "step": 23000 + }, + { + "epoch": 0.1665617060088167, + "grad_norm": 0.19500578939914703, + "learning_rate": 4.833445532657242e-06, + "loss": 1.0279, + "step": 23010 + }, + { + "epoch": 0.16663409266940288, + "grad_norm": 0.1689131259918213, + "learning_rate": 4.833373145996656e-06, + "loss": 1.0225, + "step": 23020 + }, + { + "epoch": 0.16670647932998908, + "grad_norm": 0.18741093575954437, + "learning_rate": 4.8333007593360695e-06, + "loss": 1.0067, + "step": 23030 + }, + { + "epoch": 0.16677886599057526, + "grad_norm": 0.16117849946022034, + "learning_rate": 4.833228372675484e-06, + "loss": 1.0135, + "step": 23040 + }, + { + "epoch": 0.16685125265116144, + "grad_norm": 0.17075563967227936, + "learning_rate": 4.8331559860148976e-06, + "loss": 1.0091, + "step": 23050 + }, + { + "epoch": 0.16692363931174764, + "grad_norm": 0.16465437412261963, + "learning_rate": 4.833083599354311e-06, + "loss": 1.013, + "step": 23060 + }, + { + "epoch": 0.16699602597233382, + "grad_norm": 0.17264023423194885, + "learning_rate": 4.833011212693725e-06, + "loss": 1.0298, + "step": 23070 + }, + { + "epoch": 0.16706841263292, + "grad_norm": 0.27394869923591614, + "learning_rate": 4.832938826033139e-06, + "loss": 1.0207, + "step": 23080 + }, + { + "epoch": 0.1671407992935062, + "grad_norm": 0.16792115569114685, + "learning_rate": 4.832866439372553e-06, + "loss": 1.0246, + "step": 23090 + }, + { + "epoch": 0.16721318595409237, + "grad_norm": 0.17486481368541718, + "learning_rate": 4.8327940527119665e-06, + "loss": 0.9999, + "step": 23100 + }, + { + "epoch": 0.16728557261467858, + "grad_norm": 0.18093885481357574, + "learning_rate": 4.83272166605138e-06, + "loss": 1.0158, + "step": 23110 + }, + { + "epoch": 0.16735795927526476, + "grad_norm": 0.16081741452217102, + "learning_rate": 4.8326492793907946e-06, + "loss": 1.0061, + "step": 23120 + }, + { + "epoch": 0.16743034593585093, + "grad_norm": 0.1673981100320816, + "learning_rate": 4.832576892730208e-06, + "loss": 1.0152, + "step": 23130 + }, + { + "epoch": 0.16750273259643714, + "grad_norm": 0.1669563502073288, + "learning_rate": 4.832504506069622e-06, + "loss": 1.0121, + "step": 23140 + }, + { + "epoch": 0.16757511925702331, + "grad_norm": 0.19718532264232635, + "learning_rate": 4.8324321194090354e-06, + "loss": 1.0223, + "step": 23150 + }, + { + "epoch": 0.1676475059176095, + "grad_norm": 0.2987012565135956, + "learning_rate": 4.83235973274845e-06, + "loss": 1.0097, + "step": 23160 + }, + { + "epoch": 0.1677198925781957, + "grad_norm": 0.16320352256298065, + "learning_rate": 4.8322873460878635e-06, + "loss": 1.006, + "step": 23170 + }, + { + "epoch": 0.16779227923878187, + "grad_norm": 0.16298076510429382, + "learning_rate": 4.832214959427277e-06, + "loss": 1.0035, + "step": 23180 + }, + { + "epoch": 0.16786466589936808, + "grad_norm": 0.1666620373725891, + "learning_rate": 4.832142572766691e-06, + "loss": 1.0136, + "step": 23190 + }, + { + "epoch": 0.16793705255995425, + "grad_norm": 0.16718272864818573, + "learning_rate": 4.832070186106105e-06, + "loss": 1.0126, + "step": 23200 + }, + { + "epoch": 0.16800943922054043, + "grad_norm": 0.2104543298482895, + "learning_rate": 4.831997799445519e-06, + "loss": 1.0133, + "step": 23210 + }, + { + "epoch": 0.16808182588112663, + "grad_norm": 0.17877444624900818, + "learning_rate": 4.8319254127849324e-06, + "loss": 1.0188, + "step": 23220 + }, + { + "epoch": 0.1681542125417128, + "grad_norm": 0.16311542689800262, + "learning_rate": 4.831853026124346e-06, + "loss": 1.0098, + "step": 23230 + }, + { + "epoch": 0.168226599202299, + "grad_norm": 0.1771518886089325, + "learning_rate": 4.83178063946376e-06, + "loss": 1.0004, + "step": 23240 + }, + { + "epoch": 0.1682989858628852, + "grad_norm": 0.20852121710777283, + "learning_rate": 4.831708252803174e-06, + "loss": 1.014, + "step": 23250 + }, + { + "epoch": 0.16837137252347137, + "grad_norm": 0.17537963390350342, + "learning_rate": 4.831635866142588e-06, + "loss": 1.0188, + "step": 23260 + }, + { + "epoch": 0.16844375918405757, + "grad_norm": 0.16795283555984497, + "learning_rate": 4.831563479482001e-06, + "loss": 1.0063, + "step": 23270 + }, + { + "epoch": 0.16851614584464375, + "grad_norm": 0.18271319568157196, + "learning_rate": 4.831491092821415e-06, + "loss": 1.0066, + "step": 23280 + }, + { + "epoch": 0.16858853250522993, + "grad_norm": 0.16966833174228668, + "learning_rate": 4.8314187061608294e-06, + "loss": 1.0109, + "step": 23290 + }, + { + "epoch": 0.16866091916581613, + "grad_norm": 0.1862793117761612, + "learning_rate": 4.831346319500243e-06, + "loss": 1.0195, + "step": 23300 + }, + { + "epoch": 0.1687333058264023, + "grad_norm": 0.17365515232086182, + "learning_rate": 4.831273932839657e-06, + "loss": 1.04, + "step": 23310 + }, + { + "epoch": 0.16880569248698848, + "grad_norm": 0.17782513797283173, + "learning_rate": 4.83120154617907e-06, + "loss": 1.0078, + "step": 23320 + }, + { + "epoch": 0.1688780791475747, + "grad_norm": 0.16336967051029205, + "learning_rate": 4.831129159518485e-06, + "loss": 1.0081, + "step": 23330 + }, + { + "epoch": 0.16895046580816087, + "grad_norm": 0.18843859434127808, + "learning_rate": 4.831056772857898e-06, + "loss": 1.0103, + "step": 23340 + }, + { + "epoch": 0.16902285246874707, + "grad_norm": 0.1720830351114273, + "learning_rate": 4.830984386197312e-06, + "loss": 1.0168, + "step": 23350 + }, + { + "epoch": 0.16909523912933325, + "grad_norm": 0.24049195647239685, + "learning_rate": 4.830911999536726e-06, + "loss": 1.0181, + "step": 23360 + }, + { + "epoch": 0.16916762578991942, + "grad_norm": 0.17097869515419006, + "learning_rate": 4.83083961287614e-06, + "loss": 1.0042, + "step": 23370 + }, + { + "epoch": 0.16924001245050563, + "grad_norm": 0.2235918790102005, + "learning_rate": 4.830767226215554e-06, + "loss": 1.0094, + "step": 23380 + }, + { + "epoch": 0.1693123991110918, + "grad_norm": 0.16128119826316833, + "learning_rate": 4.830694839554967e-06, + "loss": 1.0185, + "step": 23390 + }, + { + "epoch": 0.16938478577167798, + "grad_norm": 0.16598336398601532, + "learning_rate": 4.830622452894381e-06, + "loss": 1.0118, + "step": 23400 + }, + { + "epoch": 0.16945717243226419, + "grad_norm": 0.16268615424633026, + "learning_rate": 4.830550066233795e-06, + "loss": 1.0171, + "step": 23410 + }, + { + "epoch": 0.16952955909285036, + "grad_norm": 0.1621619611978531, + "learning_rate": 4.830477679573209e-06, + "loss": 1.0176, + "step": 23420 + }, + { + "epoch": 0.16960194575343657, + "grad_norm": 0.1687730848789215, + "learning_rate": 4.830405292912623e-06, + "loss": 1.0058, + "step": 23430 + }, + { + "epoch": 0.16967433241402274, + "grad_norm": 0.17967501282691956, + "learning_rate": 4.830332906252036e-06, + "loss": 1.0141, + "step": 23440 + }, + { + "epoch": 0.16974671907460892, + "grad_norm": 0.23088906705379486, + "learning_rate": 4.83026051959145e-06, + "loss": 1.0283, + "step": 23450 + }, + { + "epoch": 0.16981910573519512, + "grad_norm": 0.16651782393455505, + "learning_rate": 4.8301881329308635e-06, + "loss": 1.0155, + "step": 23460 + }, + { + "epoch": 0.1698914923957813, + "grad_norm": 0.16330918669700623, + "learning_rate": 4.830115746270277e-06, + "loss": 1.0126, + "step": 23470 + }, + { + "epoch": 0.1699638790563675, + "grad_norm": 0.1758406162261963, + "learning_rate": 4.8300433596096915e-06, + "loss": 1.007, + "step": 23480 + }, + { + "epoch": 0.17003626571695368, + "grad_norm": 0.18241257965564728, + "learning_rate": 4.829970972949105e-06, + "loss": 1.0032, + "step": 23490 + }, + { + "epoch": 0.17010865237753986, + "grad_norm": 0.18768127262592316, + "learning_rate": 4.829898586288519e-06, + "loss": 1.0193, + "step": 23500 + }, + { + "epoch": 0.17018103903812606, + "grad_norm": 0.1854964941740036, + "learning_rate": 4.829826199627932e-06, + "loss": 1.0254, + "step": 23510 + }, + { + "epoch": 0.17025342569871224, + "grad_norm": 0.164667010307312, + "learning_rate": 4.829753812967347e-06, + "loss": 1.0093, + "step": 23520 + }, + { + "epoch": 0.17032581235929842, + "grad_norm": 0.1953144520521164, + "learning_rate": 4.8296814263067605e-06, + "loss": 1.0138, + "step": 23530 + }, + { + "epoch": 0.17039819901988462, + "grad_norm": 0.170278862118721, + "learning_rate": 4.829609039646174e-06, + "loss": 1.0114, + "step": 23540 + }, + { + "epoch": 0.1704705856804708, + "grad_norm": 0.19750620424747467, + "learning_rate": 4.829536652985588e-06, + "loss": 1.0215, + "step": 23550 + }, + { + "epoch": 0.170542972341057, + "grad_norm": 0.19091200828552246, + "learning_rate": 4.829464266325002e-06, + "loss": 1.0166, + "step": 23560 + }, + { + "epoch": 0.17061535900164318, + "grad_norm": 0.1851140260696411, + "learning_rate": 4.829391879664416e-06, + "loss": 1.0062, + "step": 23570 + }, + { + "epoch": 0.17068774566222936, + "grad_norm": 0.19524310529232025, + "learning_rate": 4.829319493003829e-06, + "loss": 1.0217, + "step": 23580 + }, + { + "epoch": 0.17076013232281556, + "grad_norm": 0.17677630484104156, + "learning_rate": 4.829247106343243e-06, + "loss": 1.0066, + "step": 23590 + }, + { + "epoch": 0.17083251898340174, + "grad_norm": 0.1704902946949005, + "learning_rate": 4.8291747196826575e-06, + "loss": 1.0033, + "step": 23600 + }, + { + "epoch": 0.17090490564398791, + "grad_norm": 0.16769947111606598, + "learning_rate": 4.829102333022071e-06, + "loss": 0.9979, + "step": 23610 + }, + { + "epoch": 0.17097729230457412, + "grad_norm": 0.16838715970516205, + "learning_rate": 4.829029946361485e-06, + "loss": 1.0022, + "step": 23620 + }, + { + "epoch": 0.1710496789651603, + "grad_norm": 0.18555735051631927, + "learning_rate": 4.828957559700898e-06, + "loss": 1.0036, + "step": 23630 + }, + { + "epoch": 0.1711220656257465, + "grad_norm": 0.17811746895313263, + "learning_rate": 4.828885173040313e-06, + "loss": 1.0122, + "step": 23640 + }, + { + "epoch": 0.17119445228633268, + "grad_norm": 0.17292849719524384, + "learning_rate": 4.828812786379726e-06, + "loss": 0.9983, + "step": 23650 + }, + { + "epoch": 0.17126683894691885, + "grad_norm": 0.18011000752449036, + "learning_rate": 4.82874039971914e-06, + "loss": 1.0149, + "step": 23660 + }, + { + "epoch": 0.17133922560750506, + "grad_norm": 0.1814233809709549, + "learning_rate": 4.828668013058554e-06, + "loss": 1.0192, + "step": 23670 + }, + { + "epoch": 0.17141161226809123, + "grad_norm": 0.17261269688606262, + "learning_rate": 4.828595626397968e-06, + "loss": 1.0306, + "step": 23680 + }, + { + "epoch": 0.1714839989286774, + "grad_norm": 0.18298877775669098, + "learning_rate": 4.828523239737382e-06, + "loss": 1.0157, + "step": 23690 + }, + { + "epoch": 0.17155638558926362, + "grad_norm": 0.16827493906021118, + "learning_rate": 4.828450853076795e-06, + "loss": 1.0056, + "step": 23700 + }, + { + "epoch": 0.1716287722498498, + "grad_norm": 0.21955229341983795, + "learning_rate": 4.828378466416209e-06, + "loss": 1.0198, + "step": 23710 + }, + { + "epoch": 0.171701158910436, + "grad_norm": 0.17503350973129272, + "learning_rate": 4.828306079755623e-06, + "loss": 1.0191, + "step": 23720 + }, + { + "epoch": 0.17177354557102217, + "grad_norm": 0.1629331111907959, + "learning_rate": 4.828233693095037e-06, + "loss": 1.0135, + "step": 23730 + }, + { + "epoch": 0.17184593223160835, + "grad_norm": 0.1784103661775589, + "learning_rate": 4.828161306434451e-06, + "loss": 1.0184, + "step": 23740 + }, + { + "epoch": 0.17191831889219455, + "grad_norm": 0.17589734494686127, + "learning_rate": 4.828088919773864e-06, + "loss": 1.0039, + "step": 23750 + }, + { + "epoch": 0.17199070555278073, + "grad_norm": 0.17863091826438904, + "learning_rate": 4.828016533113279e-06, + "loss": 0.9991, + "step": 23760 + }, + { + "epoch": 0.1720630922133669, + "grad_norm": 0.16851244866847992, + "learning_rate": 4.827944146452692e-06, + "loss": 1.0054, + "step": 23770 + }, + { + "epoch": 0.1721354788739531, + "grad_norm": 0.1785619854927063, + "learning_rate": 4.827871759792106e-06, + "loss": 1.0108, + "step": 23780 + }, + { + "epoch": 0.1722078655345393, + "grad_norm": 0.18070313334465027, + "learning_rate": 4.8277993731315196e-06, + "loss": 1.0221, + "step": 23790 + }, + { + "epoch": 0.1722802521951255, + "grad_norm": 0.16967083513736725, + "learning_rate": 4.827726986470934e-06, + "loss": 1.0178, + "step": 23800 + }, + { + "epoch": 0.17235263885571167, + "grad_norm": 0.19012100994586945, + "learning_rate": 4.827654599810348e-06, + "loss": 1.0037, + "step": 23810 + }, + { + "epoch": 0.17242502551629785, + "grad_norm": 0.18341712653636932, + "learning_rate": 4.827582213149761e-06, + "loss": 1.0065, + "step": 23820 + }, + { + "epoch": 0.17249741217688405, + "grad_norm": 0.1624983698129654, + "learning_rate": 4.827509826489175e-06, + "loss": 1.0026, + "step": 23830 + }, + { + "epoch": 0.17256979883747023, + "grad_norm": 0.1717177778482437, + "learning_rate": 4.827437439828589e-06, + "loss": 1.0206, + "step": 23840 + }, + { + "epoch": 0.1726421854980564, + "grad_norm": 0.18346939980983734, + "learning_rate": 4.827365053168003e-06, + "loss": 1.0212, + "step": 23850 + }, + { + "epoch": 0.1727145721586426, + "grad_norm": 0.2184140831232071, + "learning_rate": 4.8272926665074166e-06, + "loss": 1.0239, + "step": 23860 + }, + { + "epoch": 0.1727869588192288, + "grad_norm": 0.15121838450431824, + "learning_rate": 4.82722027984683e-06, + "loss": 0.9996, + "step": 23870 + }, + { + "epoch": 0.172859345479815, + "grad_norm": 0.1719536930322647, + "learning_rate": 4.827147893186244e-06, + "loss": 1.0067, + "step": 23880 + }, + { + "epoch": 0.17293173214040117, + "grad_norm": 0.18498043715953827, + "learning_rate": 4.827075506525658e-06, + "loss": 1.011, + "step": 23890 + }, + { + "epoch": 0.17300411880098734, + "grad_norm": 0.16693322360515594, + "learning_rate": 4.827003119865072e-06, + "loss": 1.001, + "step": 23900 + }, + { + "epoch": 0.17307650546157355, + "grad_norm": 0.19714364409446716, + "learning_rate": 4.8269307332044855e-06, + "loss": 1.0202, + "step": 23910 + }, + { + "epoch": 0.17314889212215973, + "grad_norm": 0.1966882348060608, + "learning_rate": 4.826858346543899e-06, + "loss": 1.0124, + "step": 23920 + }, + { + "epoch": 0.1732212787827459, + "grad_norm": 0.1639029085636139, + "learning_rate": 4.826785959883314e-06, + "loss": 1.012, + "step": 23930 + }, + { + "epoch": 0.1732936654433321, + "grad_norm": 0.18476232886314392, + "learning_rate": 4.826713573222727e-06, + "loss": 1.0252, + "step": 23940 + }, + { + "epoch": 0.17336605210391828, + "grad_norm": 0.1863066405057907, + "learning_rate": 4.826641186562141e-06, + "loss": 1.0282, + "step": 23950 + }, + { + "epoch": 0.1734384387645045, + "grad_norm": 0.1653691977262497, + "learning_rate": 4.8265687999015544e-06, + "loss": 1.0146, + "step": 23960 + }, + { + "epoch": 0.17351082542509066, + "grad_norm": 0.18651318550109863, + "learning_rate": 4.826496413240969e-06, + "loss": 1.0117, + "step": 23970 + }, + { + "epoch": 0.17358321208567684, + "grad_norm": 0.16590899229049683, + "learning_rate": 4.826424026580382e-06, + "loss": 1.0129, + "step": 23980 + }, + { + "epoch": 0.17365559874626305, + "grad_norm": 0.15830731391906738, + "learning_rate": 4.826351639919796e-06, + "loss": 1.0247, + "step": 23990 + }, + { + "epoch": 0.17372798540684922, + "grad_norm": 0.20496408641338348, + "learning_rate": 4.82627925325921e-06, + "loss": 1.0094, + "step": 24000 + }, + { + "epoch": 0.1738003720674354, + "grad_norm": 0.17806655168533325, + "learning_rate": 4.826206866598623e-06, + "loss": 1.0311, + "step": 24010 + }, + { + "epoch": 0.1738727587280216, + "grad_norm": 0.18243646621704102, + "learning_rate": 4.826134479938037e-06, + "loss": 1.001, + "step": 24020 + }, + { + "epoch": 0.17394514538860778, + "grad_norm": 0.16999110579490662, + "learning_rate": 4.826062093277451e-06, + "loss": 1.0067, + "step": 24030 + }, + { + "epoch": 0.17401753204919398, + "grad_norm": 0.16639530658721924, + "learning_rate": 4.825989706616865e-06, + "loss": 0.9953, + "step": 24040 + }, + { + "epoch": 0.17408991870978016, + "grad_norm": 0.20730364322662354, + "learning_rate": 4.825917319956279e-06, + "loss": 1.0058, + "step": 24050 + }, + { + "epoch": 0.17416230537036634, + "grad_norm": 0.17685486376285553, + "learning_rate": 4.825844933295692e-06, + "loss": 1.005, + "step": 24060 + }, + { + "epoch": 0.17423469203095254, + "grad_norm": 0.16955289244651794, + "learning_rate": 4.825772546635106e-06, + "loss": 1.0101, + "step": 24070 + }, + { + "epoch": 0.17430707869153872, + "grad_norm": 0.1870541274547577, + "learning_rate": 4.82570015997452e-06, + "loss": 1.0102, + "step": 24080 + }, + { + "epoch": 0.17437946535212492, + "grad_norm": 0.22333884239196777, + "learning_rate": 4.825627773313934e-06, + "loss": 1.0087, + "step": 24090 + }, + { + "epoch": 0.1744518520127111, + "grad_norm": 0.18519848585128784, + "learning_rate": 4.825555386653348e-06, + "loss": 1.005, + "step": 24100 + }, + { + "epoch": 0.17452423867329728, + "grad_norm": 0.18657277524471283, + "learning_rate": 4.825482999992761e-06, + "loss": 1.0142, + "step": 24110 + }, + { + "epoch": 0.17459662533388348, + "grad_norm": 0.20050521194934845, + "learning_rate": 4.825410613332176e-06, + "loss": 1.0069, + "step": 24120 + }, + { + "epoch": 0.17466901199446966, + "grad_norm": 0.18028657138347626, + "learning_rate": 4.825338226671589e-06, + "loss": 1.0045, + "step": 24130 + }, + { + "epoch": 0.17474139865505584, + "grad_norm": 0.17001697421073914, + "learning_rate": 4.825265840011003e-06, + "loss": 1.011, + "step": 24140 + }, + { + "epoch": 0.17481378531564204, + "grad_norm": 0.16994266211986542, + "learning_rate": 4.8251934533504165e-06, + "loss": 1.0136, + "step": 24150 + }, + { + "epoch": 0.17488617197622822, + "grad_norm": 0.17373910546302795, + "learning_rate": 4.825121066689831e-06, + "loss": 1.0038, + "step": 24160 + }, + { + "epoch": 0.17495855863681442, + "grad_norm": 0.17860816419124603, + "learning_rate": 4.825048680029245e-06, + "loss": 1.0161, + "step": 24170 + }, + { + "epoch": 0.1750309452974006, + "grad_norm": 0.16933861374855042, + "learning_rate": 4.824976293368658e-06, + "loss": 1.0084, + "step": 24180 + }, + { + "epoch": 0.17510333195798677, + "grad_norm": 0.18152697384357452, + "learning_rate": 4.824903906708072e-06, + "loss": 0.9913, + "step": 24190 + }, + { + "epoch": 0.17517571861857298, + "grad_norm": 0.15832629799842834, + "learning_rate": 4.824831520047486e-06, + "loss": 1.0034, + "step": 24200 + }, + { + "epoch": 0.17524810527915916, + "grad_norm": 0.1804628223180771, + "learning_rate": 4.8247591333869e-06, + "loss": 1.0056, + "step": 24210 + }, + { + "epoch": 0.17532049193974533, + "grad_norm": 0.1735430359840393, + "learning_rate": 4.8246867467263135e-06, + "loss": 1.0073, + "step": 24220 + }, + { + "epoch": 0.17539287860033154, + "grad_norm": 0.176886186003685, + "learning_rate": 4.824614360065727e-06, + "loss": 1.0106, + "step": 24230 + }, + { + "epoch": 0.1754652652609177, + "grad_norm": 0.16210952401161194, + "learning_rate": 4.824541973405142e-06, + "loss": 1.0199, + "step": 24240 + }, + { + "epoch": 0.17553765192150392, + "grad_norm": 0.16794593632221222, + "learning_rate": 4.824469586744555e-06, + "loss": 0.9955, + "step": 24250 + }, + { + "epoch": 0.1756100385820901, + "grad_norm": 0.18100325763225555, + "learning_rate": 4.824397200083969e-06, + "loss": 1.0069, + "step": 24260 + }, + { + "epoch": 0.17568242524267627, + "grad_norm": 0.16271761059761047, + "learning_rate": 4.8243248134233825e-06, + "loss": 1.0122, + "step": 24270 + }, + { + "epoch": 0.17575481190326248, + "grad_norm": 0.16619524359703064, + "learning_rate": 4.824252426762797e-06, + "loss": 1.0177, + "step": 24280 + }, + { + "epoch": 0.17582719856384865, + "grad_norm": 0.18534304201602936, + "learning_rate": 4.8241800401022105e-06, + "loss": 1.0133, + "step": 24290 + }, + { + "epoch": 0.17589958522443483, + "grad_norm": 0.179828941822052, + "learning_rate": 4.824107653441624e-06, + "loss": 1.0115, + "step": 24300 + }, + { + "epoch": 0.17597197188502103, + "grad_norm": 0.19218121469020844, + "learning_rate": 4.824035266781038e-06, + "loss": 1.0002, + "step": 24310 + }, + { + "epoch": 0.1760443585456072, + "grad_norm": 0.1730458289384842, + "learning_rate": 4.823962880120452e-06, + "loss": 1.0093, + "step": 24320 + }, + { + "epoch": 0.17611674520619341, + "grad_norm": 0.1910993903875351, + "learning_rate": 4.823890493459866e-06, + "loss": 1.0133, + "step": 24330 + }, + { + "epoch": 0.1761891318667796, + "grad_norm": 0.1970120370388031, + "learning_rate": 4.8238181067992795e-06, + "loss": 1.0102, + "step": 24340 + }, + { + "epoch": 0.17626151852736577, + "grad_norm": 0.17400617897510529, + "learning_rate": 4.823745720138693e-06, + "loss": 1.0102, + "step": 24350 + }, + { + "epoch": 0.17633390518795197, + "grad_norm": 0.17096106708049774, + "learning_rate": 4.8236733334781075e-06, + "loss": 1.0161, + "step": 24360 + }, + { + "epoch": 0.17640629184853815, + "grad_norm": 0.185289666056633, + "learning_rate": 4.823600946817521e-06, + "loss": 1.0013, + "step": 24370 + }, + { + "epoch": 0.17647867850912433, + "grad_norm": 0.17100411653518677, + "learning_rate": 4.823528560156935e-06, + "loss": 0.994, + "step": 24380 + }, + { + "epoch": 0.17655106516971053, + "grad_norm": 0.1876966655254364, + "learning_rate": 4.823456173496348e-06, + "loss": 1.0041, + "step": 24390 + }, + { + "epoch": 0.1766234518302967, + "grad_norm": 0.1617269068956375, + "learning_rate": 4.823383786835763e-06, + "loss": 0.997, + "step": 24400 + }, + { + "epoch": 0.1766958384908829, + "grad_norm": 0.17999807000160217, + "learning_rate": 4.8233114001751765e-06, + "loss": 1.0085, + "step": 24410 + }, + { + "epoch": 0.1767682251514691, + "grad_norm": 0.16914209723472595, + "learning_rate": 4.82323901351459e-06, + "loss": 1.0218, + "step": 24420 + }, + { + "epoch": 0.17684061181205527, + "grad_norm": 0.20446030795574188, + "learning_rate": 4.823166626854004e-06, + "loss": 1.0115, + "step": 24430 + }, + { + "epoch": 0.17691299847264147, + "grad_norm": 0.16619598865509033, + "learning_rate": 4.823094240193418e-06, + "loss": 1.0197, + "step": 24440 + }, + { + "epoch": 0.17698538513322765, + "grad_norm": 0.173335999250412, + "learning_rate": 4.823021853532832e-06, + "loss": 1.0128, + "step": 24450 + }, + { + "epoch": 0.17705777179381382, + "grad_norm": 0.17360666394233704, + "learning_rate": 4.822949466872245e-06, + "loss": 1.0073, + "step": 24460 + }, + { + "epoch": 0.17713015845440003, + "grad_norm": 0.16045695543289185, + "learning_rate": 4.822877080211659e-06, + "loss": 0.999, + "step": 24470 + }, + { + "epoch": 0.1772025451149862, + "grad_norm": 0.17634133994579315, + "learning_rate": 4.822804693551073e-06, + "loss": 1.0113, + "step": 24480 + }, + { + "epoch": 0.1772749317755724, + "grad_norm": 0.15489248931407928, + "learning_rate": 4.822732306890487e-06, + "loss": 1.0033, + "step": 24490 + }, + { + "epoch": 0.17734731843615859, + "grad_norm": 0.17164482176303864, + "learning_rate": 4.822659920229901e-06, + "loss": 1.0036, + "step": 24500 + }, + { + "epoch": 0.17741970509674476, + "grad_norm": 0.17662885785102844, + "learning_rate": 4.822587533569314e-06, + "loss": 1.0162, + "step": 24510 + }, + { + "epoch": 0.17749209175733097, + "grad_norm": 0.1665455847978592, + "learning_rate": 4.822515146908728e-06, + "loss": 1.0061, + "step": 24520 + }, + { + "epoch": 0.17756447841791714, + "grad_norm": 0.17285911738872528, + "learning_rate": 4.8224427602481416e-06, + "loss": 1.0259, + "step": 24530 + }, + { + "epoch": 0.17763686507850332, + "grad_norm": 0.1706143617630005, + "learning_rate": 4.822370373587555e-06, + "loss": 1.0258, + "step": 24540 + }, + { + "epoch": 0.17770925173908952, + "grad_norm": 0.17605163156986237, + "learning_rate": 4.82229798692697e-06, + "loss": 1.0184, + "step": 24550 + }, + { + "epoch": 0.1777816383996757, + "grad_norm": 0.1811869591474533, + "learning_rate": 4.822225600266383e-06, + "loss": 1.018, + "step": 24560 + }, + { + "epoch": 0.1778540250602619, + "grad_norm": 0.16938798129558563, + "learning_rate": 4.822153213605797e-06, + "loss": 1.0101, + "step": 24570 + }, + { + "epoch": 0.17792641172084808, + "grad_norm": 0.17794403433799744, + "learning_rate": 4.8220808269452105e-06, + "loss": 1.0068, + "step": 24580 + }, + { + "epoch": 0.17799879838143426, + "grad_norm": 0.1750117838382721, + "learning_rate": 4.822008440284625e-06, + "loss": 1.0183, + "step": 24590 + }, + { + "epoch": 0.17807118504202046, + "grad_norm": 0.19311663508415222, + "learning_rate": 4.8219360536240386e-06, + "loss": 1.0059, + "step": 24600 + }, + { + "epoch": 0.17814357170260664, + "grad_norm": 0.17027834057807922, + "learning_rate": 4.821863666963452e-06, + "loss": 1.0117, + "step": 24610 + }, + { + "epoch": 0.17821595836319284, + "grad_norm": 0.16142655909061432, + "learning_rate": 4.821791280302866e-06, + "loss": 1.0039, + "step": 24620 + }, + { + "epoch": 0.17828834502377902, + "grad_norm": 0.20361775159835815, + "learning_rate": 4.82171889364228e-06, + "loss": 0.9984, + "step": 24630 + }, + { + "epoch": 0.1783607316843652, + "grad_norm": 0.1685153841972351, + "learning_rate": 4.821646506981694e-06, + "loss": 1.0199, + "step": 24640 + }, + { + "epoch": 0.1784331183449514, + "grad_norm": 0.17604348063468933, + "learning_rate": 4.8215741203211075e-06, + "loss": 1.0111, + "step": 24650 + }, + { + "epoch": 0.17850550500553758, + "grad_norm": 0.17381654679775238, + "learning_rate": 4.821501733660521e-06, + "loss": 1.0142, + "step": 24660 + }, + { + "epoch": 0.17857789166612376, + "grad_norm": 0.1625945121049881, + "learning_rate": 4.821429346999935e-06, + "loss": 1.0036, + "step": 24670 + }, + { + "epoch": 0.17865027832670996, + "grad_norm": 0.16681507229804993, + "learning_rate": 4.821356960339349e-06, + "loss": 1.0174, + "step": 24680 + }, + { + "epoch": 0.17872266498729614, + "grad_norm": 0.17053383588790894, + "learning_rate": 4.821284573678763e-06, + "loss": 1.0073, + "step": 24690 + }, + { + "epoch": 0.17879505164788234, + "grad_norm": 0.16991731524467468, + "learning_rate": 4.8212121870181764e-06, + "loss": 1.0232, + "step": 24700 + }, + { + "epoch": 0.17886743830846852, + "grad_norm": 0.18369825184345245, + "learning_rate": 4.82113980035759e-06, + "loss": 1.0144, + "step": 24710 + }, + { + "epoch": 0.1789398249690547, + "grad_norm": 0.18356874585151672, + "learning_rate": 4.8210674136970045e-06, + "loss": 1.0159, + "step": 24720 + }, + { + "epoch": 0.1790122116296409, + "grad_norm": 0.21696501970291138, + "learning_rate": 4.820995027036418e-06, + "loss": 1.001, + "step": 24730 + }, + { + "epoch": 0.17908459829022708, + "grad_norm": 0.1677326261997223, + "learning_rate": 4.820922640375832e-06, + "loss": 1.02, + "step": 24740 + }, + { + "epoch": 0.17915698495081325, + "grad_norm": 0.17450115084648132, + "learning_rate": 4.820850253715245e-06, + "loss": 1.0215, + "step": 24750 + }, + { + "epoch": 0.17922937161139946, + "grad_norm": 0.1796158254146576, + "learning_rate": 4.82077786705466e-06, + "loss": 1.0213, + "step": 24760 + }, + { + "epoch": 0.17930175827198563, + "grad_norm": 0.1706731915473938, + "learning_rate": 4.8207054803940734e-06, + "loss": 1.0065, + "step": 24770 + }, + { + "epoch": 0.17937414493257184, + "grad_norm": 0.16157269477844238, + "learning_rate": 4.820633093733487e-06, + "loss": 1.0122, + "step": 24780 + }, + { + "epoch": 0.17944653159315802, + "grad_norm": 0.17014949023723602, + "learning_rate": 4.820560707072901e-06, + "loss": 1.0163, + "step": 24790 + }, + { + "epoch": 0.1795189182537442, + "grad_norm": 0.17551864683628082, + "learning_rate": 4.820488320412315e-06, + "loss": 1.001, + "step": 24800 + }, + { + "epoch": 0.1795913049143304, + "grad_norm": 0.17576205730438232, + "learning_rate": 4.820415933751729e-06, + "loss": 1.0058, + "step": 24810 + }, + { + "epoch": 0.17966369157491657, + "grad_norm": 0.16241174936294556, + "learning_rate": 4.820343547091142e-06, + "loss": 1.001, + "step": 24820 + }, + { + "epoch": 0.17973607823550275, + "grad_norm": 0.2229170948266983, + "learning_rate": 4.820271160430556e-06, + "loss": 1.0258, + "step": 24830 + }, + { + "epoch": 0.17980846489608895, + "grad_norm": 0.16988316178321838, + "learning_rate": 4.8201987737699704e-06, + "loss": 1.013, + "step": 24840 + }, + { + "epoch": 0.17988085155667513, + "grad_norm": 0.2061333805322647, + "learning_rate": 4.820126387109384e-06, + "loss": 1.0025, + "step": 24850 + }, + { + "epoch": 0.17995323821726134, + "grad_norm": 0.19015344977378845, + "learning_rate": 4.820054000448798e-06, + "loss": 1.014, + "step": 24860 + }, + { + "epoch": 0.1800256248778475, + "grad_norm": 0.17161577939987183, + "learning_rate": 4.819981613788211e-06, + "loss": 0.9984, + "step": 24870 + }, + { + "epoch": 0.1800980115384337, + "grad_norm": 0.17109227180480957, + "learning_rate": 4.819909227127626e-06, + "loss": 1.0087, + "step": 24880 + }, + { + "epoch": 0.1801703981990199, + "grad_norm": 0.1876406967639923, + "learning_rate": 4.819836840467039e-06, + "loss": 1.0225, + "step": 24890 + }, + { + "epoch": 0.18024278485960607, + "grad_norm": 0.17785070836544037, + "learning_rate": 4.819764453806453e-06, + "loss": 1.0034, + "step": 24900 + }, + { + "epoch": 0.18031517152019225, + "grad_norm": 0.17659418284893036, + "learning_rate": 4.819692067145867e-06, + "loss": 1.0196, + "step": 24910 + }, + { + "epoch": 0.18038755818077845, + "grad_norm": 0.19268226623535156, + "learning_rate": 4.819619680485281e-06, + "loss": 1.0012, + "step": 24920 + }, + { + "epoch": 0.18045994484136463, + "grad_norm": 0.1869351863861084, + "learning_rate": 4.819547293824695e-06, + "loss": 1.0112, + "step": 24930 + }, + { + "epoch": 0.18053233150195083, + "grad_norm": 0.17009639739990234, + "learning_rate": 4.819474907164108e-06, + "loss": 1.0161, + "step": 24940 + }, + { + "epoch": 0.180604718162537, + "grad_norm": 0.20268967747688293, + "learning_rate": 4.819402520503522e-06, + "loss": 1.0077, + "step": 24950 + }, + { + "epoch": 0.18067710482312319, + "grad_norm": 0.1628827154636383, + "learning_rate": 4.819330133842936e-06, + "loss": 1.0127, + "step": 24960 + }, + { + "epoch": 0.1807494914837094, + "grad_norm": 0.18746638298034668, + "learning_rate": 4.81925774718235e-06, + "loss": 1.015, + "step": 24970 + }, + { + "epoch": 0.18082187814429557, + "grad_norm": 0.16319945454597473, + "learning_rate": 4.819185360521764e-06, + "loss": 0.9972, + "step": 24980 + }, + { + "epoch": 0.18089426480488174, + "grad_norm": 0.1594797521829605, + "learning_rate": 4.819112973861177e-06, + "loss": 1.0161, + "step": 24990 + }, + { + "epoch": 0.18096665146546795, + "grad_norm": 0.20105616748332977, + "learning_rate": 4.819040587200592e-06, + "loss": 1.014, + "step": 25000 + }, + { + "epoch": 0.18103903812605412, + "grad_norm": 0.1811130940914154, + "learning_rate": 4.818968200540005e-06, + "loss": 1.0114, + "step": 25010 + }, + { + "epoch": 0.18111142478664033, + "grad_norm": 0.17586207389831543, + "learning_rate": 4.818895813879419e-06, + "loss": 1.0274, + "step": 25020 + }, + { + "epoch": 0.1811838114472265, + "grad_norm": 0.19107623398303986, + "learning_rate": 4.8188234272188325e-06, + "loss": 1.0176, + "step": 25030 + }, + { + "epoch": 0.18125619810781268, + "grad_norm": 0.16346633434295654, + "learning_rate": 4.818751040558246e-06, + "loss": 1.0137, + "step": 25040 + }, + { + "epoch": 0.1813285847683989, + "grad_norm": 0.17446771264076233, + "learning_rate": 4.81867865389766e-06, + "loss": 0.989, + "step": 25050 + }, + { + "epoch": 0.18140097142898506, + "grad_norm": 0.1719013899564743, + "learning_rate": 4.818606267237073e-06, + "loss": 1.0155, + "step": 25060 + }, + { + "epoch": 0.18147335808957124, + "grad_norm": 0.1678323745727539, + "learning_rate": 4.818533880576488e-06, + "loss": 1.0097, + "step": 25070 + }, + { + "epoch": 0.18154574475015745, + "grad_norm": 0.1604251116514206, + "learning_rate": 4.8184614939159015e-06, + "loss": 1.0109, + "step": 25080 + }, + { + "epoch": 0.18161813141074362, + "grad_norm": 0.20874586701393127, + "learning_rate": 4.818389107255315e-06, + "loss": 1.0054, + "step": 25090 + }, + { + "epoch": 0.18169051807132983, + "grad_norm": 0.17557349801063538, + "learning_rate": 4.818316720594729e-06, + "loss": 1.0165, + "step": 25100 + }, + { + "epoch": 0.181762904731916, + "grad_norm": 0.17090950906276703, + "learning_rate": 4.818244333934143e-06, + "loss": 1.0049, + "step": 25110 + }, + { + "epoch": 0.18183529139250218, + "grad_norm": 0.20330511033535004, + "learning_rate": 4.818171947273557e-06, + "loss": 1.0095, + "step": 25120 + }, + { + "epoch": 0.18190767805308838, + "grad_norm": 0.1726309359073639, + "learning_rate": 4.81809956061297e-06, + "loss": 1.0183, + "step": 25130 + }, + { + "epoch": 0.18198006471367456, + "grad_norm": 0.18963780999183655, + "learning_rate": 4.818027173952384e-06, + "loss": 0.9986, + "step": 25140 + }, + { + "epoch": 0.18205245137426074, + "grad_norm": 0.18106186389923096, + "learning_rate": 4.8179547872917985e-06, + "loss": 1.0016, + "step": 25150 + }, + { + "epoch": 0.18212483803484694, + "grad_norm": 0.17316314578056335, + "learning_rate": 4.817882400631212e-06, + "loss": 0.9989, + "step": 25160 + }, + { + "epoch": 0.18219722469543312, + "grad_norm": 0.17553074657917023, + "learning_rate": 4.817810013970626e-06, + "loss": 0.9927, + "step": 25170 + }, + { + "epoch": 0.18226961135601932, + "grad_norm": 0.21649324893951416, + "learning_rate": 4.817737627310039e-06, + "loss": 1.0065, + "step": 25180 + }, + { + "epoch": 0.1823419980166055, + "grad_norm": 0.19755081832408905, + "learning_rate": 4.817665240649454e-06, + "loss": 1.0105, + "step": 25190 + }, + { + "epoch": 0.18241438467719168, + "grad_norm": 0.15995916724205017, + "learning_rate": 4.817592853988867e-06, + "loss": 1.0064, + "step": 25200 + }, + { + "epoch": 0.18248677133777788, + "grad_norm": 0.17277465760707855, + "learning_rate": 4.817520467328281e-06, + "loss": 1.0104, + "step": 25210 + }, + { + "epoch": 0.18255915799836406, + "grad_norm": 0.17675970494747162, + "learning_rate": 4.817448080667695e-06, + "loss": 0.9952, + "step": 25220 + }, + { + "epoch": 0.18263154465895026, + "grad_norm": 0.18293781578540802, + "learning_rate": 4.817375694007109e-06, + "loss": 0.9949, + "step": 25230 + }, + { + "epoch": 0.18270393131953644, + "grad_norm": 0.18516694009304047, + "learning_rate": 4.817303307346523e-06, + "loss": 1.0042, + "step": 25240 + }, + { + "epoch": 0.18277631798012262, + "grad_norm": 0.1832907646894455, + "learning_rate": 4.817230920685936e-06, + "loss": 1.0153, + "step": 25250 + }, + { + "epoch": 0.18284870464070882, + "grad_norm": 0.17651574313640594, + "learning_rate": 4.81715853402535e-06, + "loss": 1.0011, + "step": 25260 + }, + { + "epoch": 0.182921091301295, + "grad_norm": 0.17321477830410004, + "learning_rate": 4.8170861473647636e-06, + "loss": 0.9985, + "step": 25270 + }, + { + "epoch": 0.18299347796188117, + "grad_norm": 0.16472671926021576, + "learning_rate": 4.817013760704178e-06, + "loss": 1.0067, + "step": 25280 + }, + { + "epoch": 0.18306586462246738, + "grad_norm": 0.16202005743980408, + "learning_rate": 4.816941374043592e-06, + "loss": 1.0109, + "step": 25290 + }, + { + "epoch": 0.18313825128305355, + "grad_norm": 0.19252698123455048, + "learning_rate": 4.816868987383005e-06, + "loss": 0.9949, + "step": 25300 + }, + { + "epoch": 0.18321063794363976, + "grad_norm": 0.18542930483818054, + "learning_rate": 4.816796600722419e-06, + "loss": 1.0235, + "step": 25310 + }, + { + "epoch": 0.18328302460422594, + "grad_norm": 0.17838090658187866, + "learning_rate": 4.816724214061833e-06, + "loss": 1.0196, + "step": 25320 + }, + { + "epoch": 0.1833554112648121, + "grad_norm": 0.1758023202419281, + "learning_rate": 4.816651827401247e-06, + "loss": 1.0076, + "step": 25330 + }, + { + "epoch": 0.18342779792539832, + "grad_norm": 0.16187690198421478, + "learning_rate": 4.8165794407406606e-06, + "loss": 1.0086, + "step": 25340 + }, + { + "epoch": 0.1835001845859845, + "grad_norm": 0.16450618207454681, + "learning_rate": 4.816507054080074e-06, + "loss": 1.0065, + "step": 25350 + }, + { + "epoch": 0.18357257124657067, + "grad_norm": 0.18235477805137634, + "learning_rate": 4.816434667419489e-06, + "loss": 1.0105, + "step": 25360 + }, + { + "epoch": 0.18364495790715687, + "grad_norm": 0.1665814220905304, + "learning_rate": 4.816362280758902e-06, + "loss": 0.9997, + "step": 25370 + }, + { + "epoch": 0.18371734456774305, + "grad_norm": 0.17312000691890717, + "learning_rate": 4.816289894098316e-06, + "loss": 1.0143, + "step": 25380 + }, + { + "epoch": 0.18378973122832926, + "grad_norm": 0.16814294457435608, + "learning_rate": 4.8162175074377295e-06, + "loss": 0.987, + "step": 25390 + }, + { + "epoch": 0.18386211788891543, + "grad_norm": 0.17651145160198212, + "learning_rate": 4.816145120777144e-06, + "loss": 1.0058, + "step": 25400 + }, + { + "epoch": 0.1839345045495016, + "grad_norm": 0.1924390345811844, + "learning_rate": 4.816072734116558e-06, + "loss": 1.0145, + "step": 25410 + }, + { + "epoch": 0.18400689121008781, + "grad_norm": 0.1660076081752777, + "learning_rate": 4.816000347455971e-06, + "loss": 0.9972, + "step": 25420 + }, + { + "epoch": 0.184079277870674, + "grad_norm": 0.16928330063819885, + "learning_rate": 4.815927960795385e-06, + "loss": 1.0043, + "step": 25430 + }, + { + "epoch": 0.18415166453126017, + "grad_norm": 0.2533145546913147, + "learning_rate": 4.815855574134799e-06, + "loss": 0.9977, + "step": 25440 + }, + { + "epoch": 0.18422405119184637, + "grad_norm": 0.17467600107192993, + "learning_rate": 4.815783187474213e-06, + "loss": 1.0004, + "step": 25450 + }, + { + "epoch": 0.18429643785243255, + "grad_norm": 0.16953355073928833, + "learning_rate": 4.8157108008136265e-06, + "loss": 1.0088, + "step": 25460 + }, + { + "epoch": 0.18436882451301875, + "grad_norm": 0.17765529453754425, + "learning_rate": 4.81563841415304e-06, + "loss": 1.0144, + "step": 25470 + }, + { + "epoch": 0.18444121117360493, + "grad_norm": 0.16797038912773132, + "learning_rate": 4.815566027492455e-06, + "loss": 1.0036, + "step": 25480 + }, + { + "epoch": 0.1845135978341911, + "grad_norm": 0.17482724785804749, + "learning_rate": 4.815493640831868e-06, + "loss": 1.0054, + "step": 25490 + }, + { + "epoch": 0.1845859844947773, + "grad_norm": 0.18403145670890808, + "learning_rate": 4.815421254171282e-06, + "loss": 1.0197, + "step": 25500 + }, + { + "epoch": 0.1846583711553635, + "grad_norm": 0.1704588383436203, + "learning_rate": 4.8153488675106954e-06, + "loss": 1.0074, + "step": 25510 + }, + { + "epoch": 0.18473075781594966, + "grad_norm": 0.1732064187526703, + "learning_rate": 4.81527648085011e-06, + "loss": 1.0107, + "step": 25520 + }, + { + "epoch": 0.18480314447653587, + "grad_norm": 0.16858525574207306, + "learning_rate": 4.8152040941895235e-06, + "loss": 1.0017, + "step": 25530 + }, + { + "epoch": 0.18487553113712205, + "grad_norm": 0.17689578235149384, + "learning_rate": 4.815131707528937e-06, + "loss": 0.995, + "step": 25540 + }, + { + "epoch": 0.18494791779770825, + "grad_norm": 0.16487176716327667, + "learning_rate": 4.815059320868351e-06, + "loss": 1.0149, + "step": 25550 + }, + { + "epoch": 0.18502030445829443, + "grad_norm": 0.15995535254478455, + "learning_rate": 4.814986934207765e-06, + "loss": 1.0098, + "step": 25560 + }, + { + "epoch": 0.1850926911188806, + "grad_norm": 0.18028096854686737, + "learning_rate": 4.814914547547178e-06, + "loss": 0.9971, + "step": 25570 + }, + { + "epoch": 0.1851650777794668, + "grad_norm": 0.1847427487373352, + "learning_rate": 4.814842160886592e-06, + "loss": 0.9958, + "step": 25580 + }, + { + "epoch": 0.18523746444005298, + "grad_norm": 0.17257408797740936, + "learning_rate": 4.814769774226006e-06, + "loss": 1.0213, + "step": 25590 + }, + { + "epoch": 0.18530985110063916, + "grad_norm": 0.17929920554161072, + "learning_rate": 4.81469738756542e-06, + "loss": 1.0089, + "step": 25600 + }, + { + "epoch": 0.18538223776122537, + "grad_norm": 0.16596031188964844, + "learning_rate": 4.814625000904833e-06, + "loss": 1.0069, + "step": 25610 + }, + { + "epoch": 0.18545462442181154, + "grad_norm": 0.17687474191188812, + "learning_rate": 4.814552614244247e-06, + "loss": 1.0072, + "step": 25620 + }, + { + "epoch": 0.18552701108239775, + "grad_norm": 0.18298721313476562, + "learning_rate": 4.814480227583661e-06, + "loss": 1.0038, + "step": 25630 + }, + { + "epoch": 0.18559939774298392, + "grad_norm": 0.17622758448123932, + "learning_rate": 4.814407840923075e-06, + "loss": 1.0052, + "step": 25640 + }, + { + "epoch": 0.1856717844035701, + "grad_norm": 0.17824731767177582, + "learning_rate": 4.814335454262489e-06, + "loss": 1.0, + "step": 25650 + }, + { + "epoch": 0.1857441710641563, + "grad_norm": 0.16804032027721405, + "learning_rate": 4.814263067601902e-06, + "loss": 1.0189, + "step": 25660 + }, + { + "epoch": 0.18581655772474248, + "grad_norm": 0.16727906465530396, + "learning_rate": 4.814190680941317e-06, + "loss": 1.0104, + "step": 25670 + }, + { + "epoch": 0.18588894438532866, + "grad_norm": 0.16687215864658356, + "learning_rate": 4.81411829428073e-06, + "loss": 1.0073, + "step": 25680 + }, + { + "epoch": 0.18596133104591486, + "grad_norm": 0.16591374576091766, + "learning_rate": 4.814045907620144e-06, + "loss": 1.0267, + "step": 25690 + }, + { + "epoch": 0.18603371770650104, + "grad_norm": 0.19451576471328735, + "learning_rate": 4.8139735209595575e-06, + "loss": 0.9904, + "step": 25700 + }, + { + "epoch": 0.18610610436708724, + "grad_norm": 0.179254949092865, + "learning_rate": 4.813901134298972e-06, + "loss": 1.0082, + "step": 25710 + }, + { + "epoch": 0.18617849102767342, + "grad_norm": 0.17360366880893707, + "learning_rate": 4.813828747638386e-06, + "loss": 1.0003, + "step": 25720 + }, + { + "epoch": 0.1862508776882596, + "grad_norm": 0.15899336338043213, + "learning_rate": 4.813756360977799e-06, + "loss": 1.0011, + "step": 25730 + }, + { + "epoch": 0.1863232643488458, + "grad_norm": 0.16736361384391785, + "learning_rate": 4.813683974317213e-06, + "loss": 1.0155, + "step": 25740 + }, + { + "epoch": 0.18639565100943198, + "grad_norm": 0.18747742474079132, + "learning_rate": 4.813611587656627e-06, + "loss": 0.9963, + "step": 25750 + }, + { + "epoch": 0.18646803767001816, + "grad_norm": 0.177093505859375, + "learning_rate": 4.813539200996041e-06, + "loss": 1.0041, + "step": 25760 + }, + { + "epoch": 0.18654042433060436, + "grad_norm": 0.16255925595760345, + "learning_rate": 4.8134668143354545e-06, + "loss": 1.0181, + "step": 25770 + }, + { + "epoch": 0.18661281099119054, + "grad_norm": 0.1680876463651657, + "learning_rate": 4.813394427674868e-06, + "loss": 1.0146, + "step": 25780 + }, + { + "epoch": 0.18668519765177674, + "grad_norm": 0.16530559957027435, + "learning_rate": 4.813322041014283e-06, + "loss": 0.9985, + "step": 25790 + }, + { + "epoch": 0.18675758431236292, + "grad_norm": 0.17303499579429626, + "learning_rate": 4.813249654353696e-06, + "loss": 1.0045, + "step": 25800 + }, + { + "epoch": 0.1868299709729491, + "grad_norm": 0.18719731271266937, + "learning_rate": 4.81317726769311e-06, + "loss": 1.0188, + "step": 25810 + }, + { + "epoch": 0.1869023576335353, + "grad_norm": 0.17420874536037445, + "learning_rate": 4.8131048810325235e-06, + "loss": 1.0054, + "step": 25820 + }, + { + "epoch": 0.18697474429412148, + "grad_norm": 0.16283327341079712, + "learning_rate": 4.813032494371938e-06, + "loss": 1.01, + "step": 25830 + }, + { + "epoch": 0.18704713095470768, + "grad_norm": 0.16841505467891693, + "learning_rate": 4.8129601077113515e-06, + "loss": 1.0008, + "step": 25840 + }, + { + "epoch": 0.18711951761529386, + "grad_norm": 0.16842392086982727, + "learning_rate": 4.812887721050765e-06, + "loss": 0.9914, + "step": 25850 + }, + { + "epoch": 0.18719190427588003, + "grad_norm": 0.16453711688518524, + "learning_rate": 4.812815334390179e-06, + "loss": 1.0025, + "step": 25860 + }, + { + "epoch": 0.18726429093646624, + "grad_norm": 0.15623344480991364, + "learning_rate": 4.812742947729593e-06, + "loss": 1.0011, + "step": 25870 + }, + { + "epoch": 0.18733667759705241, + "grad_norm": 0.1698412448167801, + "learning_rate": 4.812670561069007e-06, + "loss": 1.0064, + "step": 25880 + }, + { + "epoch": 0.1874090642576386, + "grad_norm": 0.17024236917495728, + "learning_rate": 4.8125981744084205e-06, + "loss": 0.9973, + "step": 25890 + }, + { + "epoch": 0.1874814509182248, + "grad_norm": 0.1828537881374359, + "learning_rate": 4.812525787747834e-06, + "loss": 1.0015, + "step": 25900 + }, + { + "epoch": 0.18755383757881097, + "grad_norm": 0.25711390376091003, + "learning_rate": 4.812453401087248e-06, + "loss": 1.0139, + "step": 25910 + }, + { + "epoch": 0.18762622423939718, + "grad_norm": 0.17483864724636078, + "learning_rate": 4.812381014426662e-06, + "loss": 1.0105, + "step": 25920 + }, + { + "epoch": 0.18769861089998335, + "grad_norm": 0.18660502135753632, + "learning_rate": 4.812308627766076e-06, + "loss": 1.0001, + "step": 25930 + }, + { + "epoch": 0.18777099756056953, + "grad_norm": 0.16732463240623474, + "learning_rate": 4.812236241105489e-06, + "loss": 0.9918, + "step": 25940 + }, + { + "epoch": 0.18784338422115573, + "grad_norm": 0.16313007473945618, + "learning_rate": 4.812163854444903e-06, + "loss": 0.9983, + "step": 25950 + }, + { + "epoch": 0.1879157708817419, + "grad_norm": 0.17107586562633514, + "learning_rate": 4.8120914677843175e-06, + "loss": 1.0116, + "step": 25960 + }, + { + "epoch": 0.1879881575423281, + "grad_norm": 0.17458048462867737, + "learning_rate": 4.812019081123731e-06, + "loss": 0.998, + "step": 25970 + }, + { + "epoch": 0.1880605442029143, + "grad_norm": 0.16045020520687103, + "learning_rate": 4.811946694463145e-06, + "loss": 0.9783, + "step": 25980 + }, + { + "epoch": 0.18813293086350047, + "grad_norm": 0.1742679625749588, + "learning_rate": 4.811874307802558e-06, + "loss": 1.0056, + "step": 25990 + }, + { + "epoch": 0.18820531752408667, + "grad_norm": 0.21241635084152222, + "learning_rate": 4.811801921141973e-06, + "loss": 1.0108, + "step": 26000 + }, + { + "epoch": 0.18827770418467285, + "grad_norm": 0.2042105346918106, + "learning_rate": 4.811729534481386e-06, + "loss": 0.9975, + "step": 26010 + }, + { + "epoch": 0.18835009084525903, + "grad_norm": 0.18309561908245087, + "learning_rate": 4.8116571478208e-06, + "loss": 1.0094, + "step": 26020 + }, + { + "epoch": 0.18842247750584523, + "grad_norm": 0.17432443797588348, + "learning_rate": 4.811584761160214e-06, + "loss": 0.9972, + "step": 26030 + }, + { + "epoch": 0.1884948641664314, + "grad_norm": 0.17750132083892822, + "learning_rate": 4.811512374499628e-06, + "loss": 0.9996, + "step": 26040 + }, + { + "epoch": 0.18856725082701759, + "grad_norm": 0.17087271809577942, + "learning_rate": 4.811439987839042e-06, + "loss": 1.0022, + "step": 26050 + }, + { + "epoch": 0.1886396374876038, + "grad_norm": 0.1847958117723465, + "learning_rate": 4.811367601178455e-06, + "loss": 1.0217, + "step": 26060 + }, + { + "epoch": 0.18871202414818997, + "grad_norm": 0.20137399435043335, + "learning_rate": 4.811295214517869e-06, + "loss": 0.9926, + "step": 26070 + }, + { + "epoch": 0.18878441080877617, + "grad_norm": 0.18774452805519104, + "learning_rate": 4.811222827857283e-06, + "loss": 1.0058, + "step": 26080 + }, + { + "epoch": 0.18885679746936235, + "grad_norm": 0.18850480020046234, + "learning_rate": 4.811150441196697e-06, + "loss": 0.9942, + "step": 26090 + }, + { + "epoch": 0.18892918412994852, + "grad_norm": 0.18268634378910065, + "learning_rate": 4.81107805453611e-06, + "loss": 0.9998, + "step": 26100 + }, + { + "epoch": 0.18900157079053473, + "grad_norm": 0.16116634011268616, + "learning_rate": 4.811005667875524e-06, + "loss": 0.9966, + "step": 26110 + }, + { + "epoch": 0.1890739574511209, + "grad_norm": 0.18132539093494415, + "learning_rate": 4.810933281214938e-06, + "loss": 1.006, + "step": 26120 + }, + { + "epoch": 0.18914634411170708, + "grad_norm": 0.19577182829380035, + "learning_rate": 4.8108608945543515e-06, + "loss": 1.0125, + "step": 26130 + }, + { + "epoch": 0.1892187307722933, + "grad_norm": 0.2031572461128235, + "learning_rate": 4.810788507893765e-06, + "loss": 1.0017, + "step": 26140 + }, + { + "epoch": 0.18929111743287946, + "grad_norm": 0.1615428924560547, + "learning_rate": 4.81071612123318e-06, + "loss": 0.9947, + "step": 26150 + }, + { + "epoch": 0.18936350409346567, + "grad_norm": 0.18610787391662598, + "learning_rate": 4.810643734572593e-06, + "loss": 1.0, + "step": 26160 + }, + { + "epoch": 0.18943589075405184, + "grad_norm": 0.1651063859462738, + "learning_rate": 4.810571347912007e-06, + "loss": 0.9999, + "step": 26170 + }, + { + "epoch": 0.18950827741463802, + "grad_norm": 0.1639026403427124, + "learning_rate": 4.8104989612514204e-06, + "loss": 1.0054, + "step": 26180 + }, + { + "epoch": 0.18958066407522423, + "grad_norm": 0.17324158549308777, + "learning_rate": 4.810426574590835e-06, + "loss": 1.0045, + "step": 26190 + }, + { + "epoch": 0.1896530507358104, + "grad_norm": 0.1581469178199768, + "learning_rate": 4.8103541879302485e-06, + "loss": 0.9932, + "step": 26200 + }, + { + "epoch": 0.18972543739639658, + "grad_norm": 0.17741291224956512, + "learning_rate": 4.810281801269662e-06, + "loss": 0.9954, + "step": 26210 + }, + { + "epoch": 0.18979782405698278, + "grad_norm": 0.21022439002990723, + "learning_rate": 4.810209414609076e-06, + "loss": 1.0093, + "step": 26220 + }, + { + "epoch": 0.18987021071756896, + "grad_norm": 0.1657036393880844, + "learning_rate": 4.81013702794849e-06, + "loss": 0.9934, + "step": 26230 + }, + { + "epoch": 0.18994259737815516, + "grad_norm": 0.17548425495624542, + "learning_rate": 4.810064641287904e-06, + "loss": 1.0048, + "step": 26240 + }, + { + "epoch": 0.19001498403874134, + "grad_norm": 0.17935971915721893, + "learning_rate": 4.8099922546273174e-06, + "loss": 1.0143, + "step": 26250 + }, + { + "epoch": 0.19008737069932752, + "grad_norm": 0.16744934022426605, + "learning_rate": 4.809919867966731e-06, + "loss": 0.9974, + "step": 26260 + }, + { + "epoch": 0.19015975735991372, + "grad_norm": 0.18624848127365112, + "learning_rate": 4.8098474813061455e-06, + "loss": 1.0077, + "step": 26270 + }, + { + "epoch": 0.1902321440204999, + "grad_norm": 0.21752850711345673, + "learning_rate": 4.809775094645559e-06, + "loss": 1.0113, + "step": 26280 + }, + { + "epoch": 0.19030453068108608, + "grad_norm": 0.16781753301620483, + "learning_rate": 4.809702707984973e-06, + "loss": 1.0066, + "step": 26290 + }, + { + "epoch": 0.19037691734167228, + "grad_norm": 0.16836953163146973, + "learning_rate": 4.809630321324386e-06, + "loss": 1.0001, + "step": 26300 + }, + { + "epoch": 0.19044930400225846, + "grad_norm": 0.2364221215248108, + "learning_rate": 4.809557934663801e-06, + "loss": 0.9922, + "step": 26310 + }, + { + "epoch": 0.19052169066284466, + "grad_norm": 0.18490445613861084, + "learning_rate": 4.8094855480032144e-06, + "loss": 1.0104, + "step": 26320 + }, + { + "epoch": 0.19059407732343084, + "grad_norm": 0.1624281406402588, + "learning_rate": 4.809413161342628e-06, + "loss": 0.9976, + "step": 26330 + }, + { + "epoch": 0.19066646398401702, + "grad_norm": 0.17725110054016113, + "learning_rate": 4.809340774682042e-06, + "loss": 1.0087, + "step": 26340 + }, + { + "epoch": 0.19073885064460322, + "grad_norm": 0.1625022143125534, + "learning_rate": 4.809268388021456e-06, + "loss": 1.0036, + "step": 26350 + }, + { + "epoch": 0.1908112373051894, + "grad_norm": 0.1683979332447052, + "learning_rate": 4.80919600136087e-06, + "loss": 0.9992, + "step": 26360 + }, + { + "epoch": 0.1908836239657756, + "grad_norm": 0.22802671790122986, + "learning_rate": 4.809123614700283e-06, + "loss": 1.0043, + "step": 26370 + }, + { + "epoch": 0.19095601062636178, + "grad_norm": 0.16152718663215637, + "learning_rate": 4.809051228039697e-06, + "loss": 0.9968, + "step": 26380 + }, + { + "epoch": 0.19102839728694795, + "grad_norm": 0.17749905586242676, + "learning_rate": 4.8089788413791115e-06, + "loss": 1.0124, + "step": 26390 + }, + { + "epoch": 0.19110078394753416, + "grad_norm": 0.16626082360744476, + "learning_rate": 4.808906454718525e-06, + "loss": 0.9949, + "step": 26400 + }, + { + "epoch": 0.19117317060812034, + "grad_norm": 0.1978878676891327, + "learning_rate": 4.808834068057939e-06, + "loss": 1.0022, + "step": 26410 + }, + { + "epoch": 0.1912455572687065, + "grad_norm": 0.1693107634782791, + "learning_rate": 4.808761681397352e-06, + "loss": 0.9924, + "step": 26420 + }, + { + "epoch": 0.19131794392929272, + "grad_norm": 0.18437570333480835, + "learning_rate": 4.808689294736767e-06, + "loss": 1.0065, + "step": 26430 + }, + { + "epoch": 0.1913903305898789, + "grad_norm": 0.1534915566444397, + "learning_rate": 4.80861690807618e-06, + "loss": 1.0144, + "step": 26440 + }, + { + "epoch": 0.1914627172504651, + "grad_norm": 0.17550812661647797, + "learning_rate": 4.808544521415594e-06, + "loss": 0.9951, + "step": 26450 + }, + { + "epoch": 0.19153510391105127, + "grad_norm": 0.19502116739749908, + "learning_rate": 4.808472134755008e-06, + "loss": 0.9999, + "step": 26460 + }, + { + "epoch": 0.19160749057163745, + "grad_norm": 0.1792958527803421, + "learning_rate": 4.808399748094422e-06, + "loss": 0.9968, + "step": 26470 + }, + { + "epoch": 0.19167987723222366, + "grad_norm": 0.16823455691337585, + "learning_rate": 4.808327361433836e-06, + "loss": 1.0029, + "step": 26480 + }, + { + "epoch": 0.19175226389280983, + "grad_norm": 0.17597924172878265, + "learning_rate": 4.808254974773249e-06, + "loss": 1.008, + "step": 26490 + }, + { + "epoch": 0.191824650553396, + "grad_norm": 0.1772463321685791, + "learning_rate": 4.808182588112663e-06, + "loss": 0.9921, + "step": 26500 + }, + { + "epoch": 0.1918970372139822, + "grad_norm": 0.1926368921995163, + "learning_rate": 4.808110201452077e-06, + "loss": 0.9993, + "step": 26510 + }, + { + "epoch": 0.1919694238745684, + "grad_norm": 0.16659504175186157, + "learning_rate": 4.808037814791491e-06, + "loss": 1.0063, + "step": 26520 + }, + { + "epoch": 0.1920418105351546, + "grad_norm": 0.1627788245677948, + "learning_rate": 4.807965428130905e-06, + "loss": 0.9983, + "step": 26530 + }, + { + "epoch": 0.19211419719574077, + "grad_norm": 0.30011650919914246, + "learning_rate": 4.807893041470318e-06, + "loss": 0.9909, + "step": 26540 + }, + { + "epoch": 0.19218658385632695, + "grad_norm": 0.17447486519813538, + "learning_rate": 4.807820654809732e-06, + "loss": 0.9992, + "step": 26550 + }, + { + "epoch": 0.19225897051691315, + "grad_norm": 0.17184774577617645, + "learning_rate": 4.807748268149146e-06, + "loss": 1.0044, + "step": 26560 + }, + { + "epoch": 0.19233135717749933, + "grad_norm": 0.17131789028644562, + "learning_rate": 4.80767588148856e-06, + "loss": 1.0048, + "step": 26570 + }, + { + "epoch": 0.1924037438380855, + "grad_norm": 0.1681704968214035, + "learning_rate": 4.8076034948279735e-06, + "loss": 1.0105, + "step": 26580 + }, + { + "epoch": 0.1924761304986717, + "grad_norm": 0.18246489763259888, + "learning_rate": 4.807531108167387e-06, + "loss": 0.9853, + "step": 26590 + }, + { + "epoch": 0.1925485171592579, + "grad_norm": 0.1730792075395584, + "learning_rate": 4.807458721506802e-06, + "loss": 1.0048, + "step": 26600 + }, + { + "epoch": 0.1926209038198441, + "grad_norm": 0.1860353797674179, + "learning_rate": 4.807386334846215e-06, + "loss": 1.0024, + "step": 26610 + }, + { + "epoch": 0.19269329048043027, + "grad_norm": 0.16695286333560944, + "learning_rate": 4.807313948185629e-06, + "loss": 1.0142, + "step": 26620 + }, + { + "epoch": 0.19276567714101644, + "grad_norm": 0.17245034873485565, + "learning_rate": 4.8072415615250425e-06, + "loss": 1.0003, + "step": 26630 + }, + { + "epoch": 0.19283806380160265, + "grad_norm": 0.16842077672481537, + "learning_rate": 4.807169174864456e-06, + "loss": 1.0141, + "step": 26640 + }, + { + "epoch": 0.19291045046218883, + "grad_norm": 0.16834910213947296, + "learning_rate": 4.80709678820387e-06, + "loss": 0.9903, + "step": 26650 + }, + { + "epoch": 0.192982837122775, + "grad_norm": 0.1826198846101761, + "learning_rate": 4.807024401543284e-06, + "loss": 1.0076, + "step": 26660 + }, + { + "epoch": 0.1930552237833612, + "grad_norm": 0.17187774181365967, + "learning_rate": 4.806952014882698e-06, + "loss": 1.0037, + "step": 26670 + }, + { + "epoch": 0.19312761044394738, + "grad_norm": 0.1815541386604309, + "learning_rate": 4.806879628222111e-06, + "loss": 1.0086, + "step": 26680 + }, + { + "epoch": 0.1931999971045336, + "grad_norm": 0.18151985108852386, + "learning_rate": 4.806807241561525e-06, + "loss": 0.9894, + "step": 26690 + }, + { + "epoch": 0.19327238376511977, + "grad_norm": 0.17643725872039795, + "learning_rate": 4.806734854900939e-06, + "loss": 1.0044, + "step": 26700 + }, + { + "epoch": 0.19334477042570594, + "grad_norm": 0.17447534203529358, + "learning_rate": 4.806662468240353e-06, + "loss": 1.0114, + "step": 26710 + }, + { + "epoch": 0.19341715708629215, + "grad_norm": 0.16424672305583954, + "learning_rate": 4.806590081579767e-06, + "loss": 1.0033, + "step": 26720 + }, + { + "epoch": 0.19348954374687832, + "grad_norm": 0.16802680492401123, + "learning_rate": 4.80651769491918e-06, + "loss": 1.0072, + "step": 26730 + }, + { + "epoch": 0.1935619304074645, + "grad_norm": 0.17925448715686798, + "learning_rate": 4.806445308258594e-06, + "loss": 0.9935, + "step": 26740 + }, + { + "epoch": 0.1936343170680507, + "grad_norm": 0.17291328310966492, + "learning_rate": 4.806372921598008e-06, + "loss": 0.9942, + "step": 26750 + }, + { + "epoch": 0.19370670372863688, + "grad_norm": 0.18986187875270844, + "learning_rate": 4.806300534937422e-06, + "loss": 1.0095, + "step": 26760 + }, + { + "epoch": 0.19377909038922309, + "grad_norm": 0.15968811511993408, + "learning_rate": 4.806228148276836e-06, + "loss": 1.0083, + "step": 26770 + }, + { + "epoch": 0.19385147704980926, + "grad_norm": 0.18246155977249146, + "learning_rate": 4.806155761616249e-06, + "loss": 0.9959, + "step": 26780 + }, + { + "epoch": 0.19392386371039544, + "grad_norm": 0.1724570244550705, + "learning_rate": 4.806083374955664e-06, + "loss": 0.993, + "step": 26790 + }, + { + "epoch": 0.19399625037098164, + "grad_norm": 0.16867926716804504, + "learning_rate": 4.806010988295077e-06, + "loss": 1.0064, + "step": 26800 + }, + { + "epoch": 0.19406863703156782, + "grad_norm": 0.16909553110599518, + "learning_rate": 4.805938601634491e-06, + "loss": 1.0073, + "step": 26810 + }, + { + "epoch": 0.194141023692154, + "grad_norm": 0.2134760469198227, + "learning_rate": 4.8058662149739046e-06, + "loss": 0.9893, + "step": 26820 + }, + { + "epoch": 0.1942134103527402, + "grad_norm": 0.16521146893501282, + "learning_rate": 4.805793828313319e-06, + "loss": 0.9962, + "step": 26830 + }, + { + "epoch": 0.19428579701332638, + "grad_norm": 0.16288983821868896, + "learning_rate": 4.805721441652733e-06, + "loss": 0.9958, + "step": 26840 + }, + { + "epoch": 0.19435818367391258, + "grad_norm": 0.17485104501247406, + "learning_rate": 4.805649054992146e-06, + "loss": 1.0048, + "step": 26850 + }, + { + "epoch": 0.19443057033449876, + "grad_norm": 0.15907074511051178, + "learning_rate": 4.80557666833156e-06, + "loss": 1.0056, + "step": 26860 + }, + { + "epoch": 0.19450295699508494, + "grad_norm": 0.17798353731632233, + "learning_rate": 4.805504281670974e-06, + "loss": 1.0024, + "step": 26870 + }, + { + "epoch": 0.19457534365567114, + "grad_norm": 0.18437181413173676, + "learning_rate": 4.805431895010388e-06, + "loss": 0.9971, + "step": 26880 + }, + { + "epoch": 0.19464773031625732, + "grad_norm": 0.15452776849269867, + "learning_rate": 4.805359508349802e-06, + "loss": 1.0092, + "step": 26890 + }, + { + "epoch": 0.1947201169768435, + "grad_norm": 0.16308918595314026, + "learning_rate": 4.805287121689215e-06, + "loss": 1.0055, + "step": 26900 + }, + { + "epoch": 0.1947925036374297, + "grad_norm": 0.1587022840976715, + "learning_rate": 4.80521473502863e-06, + "loss": 1.0006, + "step": 26910 + }, + { + "epoch": 0.19486489029801587, + "grad_norm": 0.1751098334789276, + "learning_rate": 4.805142348368043e-06, + "loss": 0.9859, + "step": 26920 + }, + { + "epoch": 0.19493727695860208, + "grad_norm": 0.15153715014457703, + "learning_rate": 4.805069961707457e-06, + "loss": 1.0112, + "step": 26930 + }, + { + "epoch": 0.19500966361918826, + "grad_norm": 0.16435284912586212, + "learning_rate": 4.8049975750468705e-06, + "loss": 1.0057, + "step": 26940 + }, + { + "epoch": 0.19508205027977443, + "grad_norm": 0.17554335296154022, + "learning_rate": 4.804925188386285e-06, + "loss": 1.0051, + "step": 26950 + }, + { + "epoch": 0.19515443694036064, + "grad_norm": 0.23302575945854187, + "learning_rate": 4.804852801725699e-06, + "loss": 1.0029, + "step": 26960 + }, + { + "epoch": 0.1952268236009468, + "grad_norm": 0.16776177287101746, + "learning_rate": 4.804780415065112e-06, + "loss": 0.9986, + "step": 26970 + }, + { + "epoch": 0.19529921026153302, + "grad_norm": 0.15875661373138428, + "learning_rate": 4.804708028404526e-06, + "loss": 0.9944, + "step": 26980 + }, + { + "epoch": 0.1953715969221192, + "grad_norm": 0.16342279314994812, + "learning_rate": 4.80463564174394e-06, + "loss": 1.0014, + "step": 26990 + }, + { + "epoch": 0.19544398358270537, + "grad_norm": 0.16927549242973328, + "learning_rate": 4.804563255083354e-06, + "loss": 1.0005, + "step": 27000 + }, + { + "epoch": 0.19551637024329158, + "grad_norm": 0.17107799649238586, + "learning_rate": 4.8044908684227675e-06, + "loss": 0.9865, + "step": 27010 + }, + { + "epoch": 0.19558875690387775, + "grad_norm": 0.156640887260437, + "learning_rate": 4.804418481762181e-06, + "loss": 0.9957, + "step": 27020 + }, + { + "epoch": 0.19566114356446393, + "grad_norm": 0.20385904610157013, + "learning_rate": 4.804346095101596e-06, + "loss": 0.9996, + "step": 27030 + }, + { + "epoch": 0.19573353022505013, + "grad_norm": 0.15625648200511932, + "learning_rate": 4.804273708441009e-06, + "loss": 1.0155, + "step": 27040 + }, + { + "epoch": 0.1958059168856363, + "grad_norm": 0.17145240306854248, + "learning_rate": 4.804201321780423e-06, + "loss": 0.9947, + "step": 27050 + }, + { + "epoch": 0.19587830354622252, + "grad_norm": 0.20977072417736053, + "learning_rate": 4.8041289351198364e-06, + "loss": 1.0036, + "step": 27060 + }, + { + "epoch": 0.1959506902068087, + "grad_norm": 0.163489431142807, + "learning_rate": 4.804056548459251e-06, + "loss": 1.0106, + "step": 27070 + }, + { + "epoch": 0.19602307686739487, + "grad_norm": 0.17504650354385376, + "learning_rate": 4.8039841617986645e-06, + "loss": 0.9896, + "step": 27080 + }, + { + "epoch": 0.19609546352798107, + "grad_norm": 0.16525472700595856, + "learning_rate": 4.803911775138078e-06, + "loss": 1.0065, + "step": 27090 + }, + { + "epoch": 0.19616785018856725, + "grad_norm": 0.20987266302108765, + "learning_rate": 4.803839388477492e-06, + "loss": 1.0003, + "step": 27100 + }, + { + "epoch": 0.19624023684915343, + "grad_norm": 0.17910590767860413, + "learning_rate": 4.803767001816906e-06, + "loss": 0.9886, + "step": 27110 + }, + { + "epoch": 0.19631262350973963, + "grad_norm": 0.17571011185646057, + "learning_rate": 4.80369461515632e-06, + "loss": 0.9865, + "step": 27120 + }, + { + "epoch": 0.1963850101703258, + "grad_norm": 0.1787402182817459, + "learning_rate": 4.8036222284957334e-06, + "loss": 1.0061, + "step": 27130 + }, + { + "epoch": 0.196457396830912, + "grad_norm": 0.18772943317890167, + "learning_rate": 4.803549841835147e-06, + "loss": 0.9935, + "step": 27140 + }, + { + "epoch": 0.1965297834914982, + "grad_norm": 0.17668381333351135, + "learning_rate": 4.803477455174561e-06, + "loss": 1.008, + "step": 27150 + }, + { + "epoch": 0.19660217015208437, + "grad_norm": 0.16147243976593018, + "learning_rate": 4.803405068513974e-06, + "loss": 0.996, + "step": 27160 + }, + { + "epoch": 0.19667455681267057, + "grad_norm": 0.1802648901939392, + "learning_rate": 4.803332681853388e-06, + "loss": 1.0033, + "step": 27170 + }, + { + "epoch": 0.19674694347325675, + "grad_norm": 0.1721193492412567, + "learning_rate": 4.803260295192802e-06, + "loss": 1.0023, + "step": 27180 + }, + { + "epoch": 0.19681933013384292, + "grad_norm": 0.1651720553636551, + "learning_rate": 4.803187908532216e-06, + "loss": 1.0125, + "step": 27190 + }, + { + "epoch": 0.19689171679442913, + "grad_norm": 0.2428152859210968, + "learning_rate": 4.80311552187163e-06, + "loss": 1.0131, + "step": 27200 + }, + { + "epoch": 0.1969641034550153, + "grad_norm": 0.17188434302806854, + "learning_rate": 4.803043135211043e-06, + "loss": 0.9972, + "step": 27210 + }, + { + "epoch": 0.1970364901156015, + "grad_norm": 0.1973094493150711, + "learning_rate": 4.802970748550458e-06, + "loss": 0.9983, + "step": 27220 + }, + { + "epoch": 0.19710887677618769, + "grad_norm": 0.16881392896175385, + "learning_rate": 4.802898361889871e-06, + "loss": 1.0107, + "step": 27230 + }, + { + "epoch": 0.19718126343677386, + "grad_norm": 0.17442533373832703, + "learning_rate": 4.802825975229285e-06, + "loss": 1.0153, + "step": 27240 + }, + { + "epoch": 0.19725365009736007, + "grad_norm": 0.18059037625789642, + "learning_rate": 4.8027535885686985e-06, + "loss": 1.0168, + "step": 27250 + }, + { + "epoch": 0.19732603675794624, + "grad_norm": 0.19683484733104706, + "learning_rate": 4.802681201908113e-06, + "loss": 0.9942, + "step": 27260 + }, + { + "epoch": 0.19739842341853242, + "grad_norm": 0.1710311323404312, + "learning_rate": 4.802608815247527e-06, + "loss": 1.0155, + "step": 27270 + }, + { + "epoch": 0.19747081007911862, + "grad_norm": 0.16936126351356506, + "learning_rate": 4.80253642858694e-06, + "loss": 0.9969, + "step": 27280 + }, + { + "epoch": 0.1975431967397048, + "grad_norm": 0.19875222444534302, + "learning_rate": 4.802464041926354e-06, + "loss": 0.9913, + "step": 27290 + }, + { + "epoch": 0.197615583400291, + "grad_norm": 0.16911375522613525, + "learning_rate": 4.802391655265768e-06, + "loss": 1.0025, + "step": 27300 + }, + { + "epoch": 0.19768797006087718, + "grad_norm": 0.16510553658008575, + "learning_rate": 4.802319268605182e-06, + "loss": 1.0083, + "step": 27310 + }, + { + "epoch": 0.19776035672146336, + "grad_norm": 0.17663301527500153, + "learning_rate": 4.8022468819445955e-06, + "loss": 0.9859, + "step": 27320 + }, + { + "epoch": 0.19783274338204956, + "grad_norm": 0.18130449950695038, + "learning_rate": 4.802174495284009e-06, + "loss": 1.0023, + "step": 27330 + }, + { + "epoch": 0.19790513004263574, + "grad_norm": 0.1663258969783783, + "learning_rate": 4.802102108623423e-06, + "loss": 1.0032, + "step": 27340 + }, + { + "epoch": 0.19797751670322192, + "grad_norm": 0.17057499289512634, + "learning_rate": 4.802029721962837e-06, + "loss": 0.9985, + "step": 27350 + }, + { + "epoch": 0.19804990336380812, + "grad_norm": 0.1627529412508011, + "learning_rate": 4.801957335302251e-06, + "loss": 1.021, + "step": 27360 + }, + { + "epoch": 0.1981222900243943, + "grad_norm": 0.1599203497171402, + "learning_rate": 4.8018849486416645e-06, + "loss": 1.0064, + "step": 27370 + }, + { + "epoch": 0.1981946766849805, + "grad_norm": 0.199107825756073, + "learning_rate": 4.801812561981078e-06, + "loss": 1.0142, + "step": 27380 + }, + { + "epoch": 0.19826706334556668, + "grad_norm": 0.19576513767242432, + "learning_rate": 4.8017401753204926e-06, + "loss": 1.0037, + "step": 27390 + }, + { + "epoch": 0.19833945000615286, + "grad_norm": 0.16550497710704803, + "learning_rate": 4.801667788659906e-06, + "loss": 0.9853, + "step": 27400 + }, + { + "epoch": 0.19841183666673906, + "grad_norm": 0.17709434032440186, + "learning_rate": 4.80159540199932e-06, + "loss": 1.0031, + "step": 27410 + }, + { + "epoch": 0.19848422332732524, + "grad_norm": 0.1671217530965805, + "learning_rate": 4.801523015338733e-06, + "loss": 0.9927, + "step": 27420 + }, + { + "epoch": 0.19855660998791141, + "grad_norm": 0.16205404698848724, + "learning_rate": 4.801450628678148e-06, + "loss": 1.004, + "step": 27430 + }, + { + "epoch": 0.19862899664849762, + "grad_norm": 0.21994493901729584, + "learning_rate": 4.8013782420175615e-06, + "loss": 0.9901, + "step": 27440 + }, + { + "epoch": 0.1987013833090838, + "grad_norm": 0.17155443131923676, + "learning_rate": 4.801305855356975e-06, + "loss": 1.011, + "step": 27450 + }, + { + "epoch": 0.19877376996967, + "grad_norm": 0.1630110889673233, + "learning_rate": 4.801233468696389e-06, + "loss": 0.9974, + "step": 27460 + }, + { + "epoch": 0.19884615663025618, + "grad_norm": 0.17589977383613586, + "learning_rate": 4.801161082035803e-06, + "loss": 0.9816, + "step": 27470 + }, + { + "epoch": 0.19891854329084235, + "grad_norm": 0.16798698902130127, + "learning_rate": 4.801088695375217e-06, + "loss": 1.0086, + "step": 27480 + }, + { + "epoch": 0.19899092995142856, + "grad_norm": 0.18393242359161377, + "learning_rate": 4.80101630871463e-06, + "loss": 1.01, + "step": 27490 + }, + { + "epoch": 0.19906331661201473, + "grad_norm": 0.1627999097108841, + "learning_rate": 4.800943922054044e-06, + "loss": 1.0033, + "step": 27500 + }, + { + "epoch": 0.1991357032726009, + "grad_norm": 0.1707964688539505, + "learning_rate": 4.8008715353934585e-06, + "loss": 0.9966, + "step": 27510 + }, + { + "epoch": 0.19920808993318712, + "grad_norm": 0.154254749417305, + "learning_rate": 4.800799148732872e-06, + "loss": 0.9943, + "step": 27520 + }, + { + "epoch": 0.1992804765937733, + "grad_norm": 0.17467810213565826, + "learning_rate": 4.800726762072286e-06, + "loss": 0.9986, + "step": 27530 + }, + { + "epoch": 0.1993528632543595, + "grad_norm": 0.17175132036209106, + "learning_rate": 4.800654375411699e-06, + "loss": 1.0, + "step": 27540 + }, + { + "epoch": 0.19942524991494567, + "grad_norm": 0.17211025953292847, + "learning_rate": 4.800581988751114e-06, + "loss": 1.011, + "step": 27550 + }, + { + "epoch": 0.19949763657553185, + "grad_norm": 0.16861172020435333, + "learning_rate": 4.800509602090527e-06, + "loss": 1.0082, + "step": 27560 + }, + { + "epoch": 0.19957002323611805, + "grad_norm": 0.20472027361392975, + "learning_rate": 4.800437215429941e-06, + "loss": 0.9906, + "step": 27570 + }, + { + "epoch": 0.19964240989670423, + "grad_norm": 0.16416484117507935, + "learning_rate": 4.800364828769355e-06, + "loss": 0.9934, + "step": 27580 + }, + { + "epoch": 0.19971479655729044, + "grad_norm": 0.16409383714199066, + "learning_rate": 4.800292442108769e-06, + "loss": 0.9885, + "step": 27590 + }, + { + "epoch": 0.1997871832178766, + "grad_norm": 0.17067715525627136, + "learning_rate": 4.800220055448183e-06, + "loss": 0.9954, + "step": 27600 + }, + { + "epoch": 0.1998595698784628, + "grad_norm": 0.18361663818359375, + "learning_rate": 4.800147668787596e-06, + "loss": 1.0033, + "step": 27610 + }, + { + "epoch": 0.199931956539049, + "grad_norm": 0.1660345494747162, + "learning_rate": 4.80007528212701e-06, + "loss": 1.0032, + "step": 27620 + }, + { + "epoch": 0.20000434319963517, + "grad_norm": 0.16931919753551483, + "learning_rate": 4.8000028954664244e-06, + "loss": 1.0006, + "step": 27630 + }, + { + "epoch": 0.20007672986022135, + "grad_norm": 0.1660221815109253, + "learning_rate": 4.799930508805838e-06, + "loss": 1.0126, + "step": 27640 + }, + { + "epoch": 0.20014911652080755, + "grad_norm": 0.179294615983963, + "learning_rate": 4.799858122145252e-06, + "loss": 1.0143, + "step": 27650 + }, + { + "epoch": 0.20022150318139373, + "grad_norm": 0.19794364273548126, + "learning_rate": 4.799785735484665e-06, + "loss": 1.0057, + "step": 27660 + }, + { + "epoch": 0.20029388984197993, + "grad_norm": 0.1863810420036316, + "learning_rate": 4.79971334882408e-06, + "loss": 0.9972, + "step": 27670 + }, + { + "epoch": 0.2003662765025661, + "grad_norm": 0.17206966876983643, + "learning_rate": 4.799640962163493e-06, + "loss": 1.0025, + "step": 27680 + }, + { + "epoch": 0.2004386631631523, + "grad_norm": 0.18598827719688416, + "learning_rate": 4.799568575502906e-06, + "loss": 1.0035, + "step": 27690 + }, + { + "epoch": 0.2005110498237385, + "grad_norm": 0.17448210716247559, + "learning_rate": 4.799496188842321e-06, + "loss": 1.0121, + "step": 27700 + }, + { + "epoch": 0.20058343648432467, + "grad_norm": 0.1639571338891983, + "learning_rate": 4.799423802181734e-06, + "loss": 1.0005, + "step": 27710 + }, + { + "epoch": 0.20065582314491084, + "grad_norm": 0.16156673431396484, + "learning_rate": 4.799351415521148e-06, + "loss": 0.9937, + "step": 27720 + }, + { + "epoch": 0.20072820980549705, + "grad_norm": 0.1673559546470642, + "learning_rate": 4.7992790288605614e-06, + "loss": 0.9872, + "step": 27730 + }, + { + "epoch": 0.20080059646608323, + "grad_norm": 0.16240374743938446, + "learning_rate": 4.799206642199976e-06, + "loss": 1.0194, + "step": 27740 + }, + { + "epoch": 0.20087298312666943, + "grad_norm": 0.16212816536426544, + "learning_rate": 4.7991342555393895e-06, + "loss": 0.9928, + "step": 27750 + }, + { + "epoch": 0.2009453697872556, + "grad_norm": 0.15658411383628845, + "learning_rate": 4.799061868878803e-06, + "loss": 0.9976, + "step": 27760 + }, + { + "epoch": 0.20101775644784178, + "grad_norm": 0.16336016356945038, + "learning_rate": 4.798989482218217e-06, + "loss": 1.0189, + "step": 27770 + }, + { + "epoch": 0.201090143108428, + "grad_norm": 0.18533754348754883, + "learning_rate": 4.798917095557631e-06, + "loss": 0.9867, + "step": 27780 + }, + { + "epoch": 0.20116252976901416, + "grad_norm": 0.1857583224773407, + "learning_rate": 4.798844708897045e-06, + "loss": 0.9994, + "step": 27790 + }, + { + "epoch": 0.20123491642960034, + "grad_norm": 0.19050684571266174, + "learning_rate": 4.7987723222364584e-06, + "loss": 0.997, + "step": 27800 + }, + { + "epoch": 0.20130730309018655, + "grad_norm": 0.17281419038772583, + "learning_rate": 4.798699935575872e-06, + "loss": 1.0018, + "step": 27810 + }, + { + "epoch": 0.20137968975077272, + "grad_norm": 0.169466033577919, + "learning_rate": 4.7986275489152865e-06, + "loss": 0.997, + "step": 27820 + }, + { + "epoch": 0.20145207641135893, + "grad_norm": 0.16908937692642212, + "learning_rate": 4.7985551622547e-06, + "loss": 1.0002, + "step": 27830 + }, + { + "epoch": 0.2015244630719451, + "grad_norm": 0.17299243807792664, + "learning_rate": 4.798482775594114e-06, + "loss": 1.0038, + "step": 27840 + }, + { + "epoch": 0.20159684973253128, + "grad_norm": 0.16563472151756287, + "learning_rate": 4.798410388933527e-06, + "loss": 0.9915, + "step": 27850 + }, + { + "epoch": 0.20166923639311748, + "grad_norm": 0.19840186834335327, + "learning_rate": 4.798338002272942e-06, + "loss": 1.0037, + "step": 27860 + }, + { + "epoch": 0.20174162305370366, + "grad_norm": 0.17237645387649536, + "learning_rate": 4.7982656156123554e-06, + "loss": 1.012, + "step": 27870 + }, + { + "epoch": 0.20181400971428984, + "grad_norm": 0.18129993975162506, + "learning_rate": 4.798193228951769e-06, + "loss": 1.0009, + "step": 27880 + }, + { + "epoch": 0.20188639637487604, + "grad_norm": 0.16735847294330597, + "learning_rate": 4.798120842291183e-06, + "loss": 1.0198, + "step": 27890 + }, + { + "epoch": 0.20195878303546222, + "grad_norm": 0.16451966762542725, + "learning_rate": 4.798048455630597e-06, + "loss": 1.0038, + "step": 27900 + }, + { + "epoch": 0.20203116969604842, + "grad_norm": 0.1658269166946411, + "learning_rate": 4.797976068970011e-06, + "loss": 0.9934, + "step": 27910 + }, + { + "epoch": 0.2021035563566346, + "grad_norm": 0.17041842639446259, + "learning_rate": 4.797903682309424e-06, + "loss": 0.9873, + "step": 27920 + }, + { + "epoch": 0.20217594301722078, + "grad_norm": 0.18290521204471588, + "learning_rate": 4.797831295648838e-06, + "loss": 0.9885, + "step": 27930 + }, + { + "epoch": 0.20224832967780698, + "grad_norm": 0.173233300447464, + "learning_rate": 4.797758908988252e-06, + "loss": 0.9977, + "step": 27940 + }, + { + "epoch": 0.20232071633839316, + "grad_norm": 0.1651332825422287, + "learning_rate": 4.797686522327666e-06, + "loss": 0.9989, + "step": 27950 + }, + { + "epoch": 0.20239310299897934, + "grad_norm": 0.18143868446350098, + "learning_rate": 4.79761413566708e-06, + "loss": 1.0013, + "step": 27960 + }, + { + "epoch": 0.20246548965956554, + "grad_norm": 0.18052855134010315, + "learning_rate": 4.797541749006493e-06, + "loss": 1.0127, + "step": 27970 + }, + { + "epoch": 0.20253787632015172, + "grad_norm": 0.1647488921880722, + "learning_rate": 4.797469362345907e-06, + "loss": 0.9766, + "step": 27980 + }, + { + "epoch": 0.20261026298073792, + "grad_norm": 0.18632908165454865, + "learning_rate": 4.797396975685321e-06, + "loss": 0.9951, + "step": 27990 + }, + { + "epoch": 0.2026826496413241, + "grad_norm": 0.1738438606262207, + "learning_rate": 4.797324589024735e-06, + "loss": 1.0055, + "step": 28000 + }, + { + "epoch": 0.20275503630191027, + "grad_norm": 0.17190273106098175, + "learning_rate": 4.797252202364149e-06, + "loss": 1.003, + "step": 28010 + }, + { + "epoch": 0.20282742296249648, + "grad_norm": 0.16988669335842133, + "learning_rate": 4.797179815703562e-06, + "loss": 0.9932, + "step": 28020 + }, + { + "epoch": 0.20289980962308266, + "grad_norm": 0.16658180952072144, + "learning_rate": 4.797107429042977e-06, + "loss": 1.0075, + "step": 28030 + }, + { + "epoch": 0.20297219628366883, + "grad_norm": 0.16553157567977905, + "learning_rate": 4.79703504238239e-06, + "loss": 1.0056, + "step": 28040 + }, + { + "epoch": 0.20304458294425504, + "grad_norm": 0.17701905965805054, + "learning_rate": 4.796962655721804e-06, + "loss": 1.0115, + "step": 28050 + }, + { + "epoch": 0.2031169696048412, + "grad_norm": 0.16136251389980316, + "learning_rate": 4.7968902690612175e-06, + "loss": 0.9909, + "step": 28060 + }, + { + "epoch": 0.20318935626542742, + "grad_norm": 0.17143204808235168, + "learning_rate": 4.796817882400632e-06, + "loss": 0.9944, + "step": 28070 + }, + { + "epoch": 0.2032617429260136, + "grad_norm": 0.16544604301452637, + "learning_rate": 4.796745495740046e-06, + "loss": 1.0009, + "step": 28080 + }, + { + "epoch": 0.20333412958659977, + "grad_norm": 0.1553160548210144, + "learning_rate": 4.796673109079459e-06, + "loss": 1.0123, + "step": 28090 + }, + { + "epoch": 0.20340651624718598, + "grad_norm": 0.16683773696422577, + "learning_rate": 4.796600722418873e-06, + "loss": 1.0095, + "step": 28100 + }, + { + "epoch": 0.20347890290777215, + "grad_norm": 0.16187171638011932, + "learning_rate": 4.796528335758287e-06, + "loss": 0.9973, + "step": 28110 + }, + { + "epoch": 0.20355128956835836, + "grad_norm": 0.1778879463672638, + "learning_rate": 4.796455949097701e-06, + "loss": 1.012, + "step": 28120 + }, + { + "epoch": 0.20362367622894453, + "grad_norm": 0.1824272871017456, + "learning_rate": 4.7963835624371146e-06, + "loss": 0.9874, + "step": 28130 + }, + { + "epoch": 0.2036960628895307, + "grad_norm": 0.15840831398963928, + "learning_rate": 4.796311175776528e-06, + "loss": 1.0078, + "step": 28140 + }, + { + "epoch": 0.20376844955011691, + "grad_norm": 0.1720242202281952, + "learning_rate": 4.796238789115943e-06, + "loss": 0.9946, + "step": 28150 + }, + { + "epoch": 0.2038408362107031, + "grad_norm": 0.18525436520576477, + "learning_rate": 4.796166402455356e-06, + "loss": 0.9854, + "step": 28160 + }, + { + "epoch": 0.20391322287128927, + "grad_norm": 0.16889312863349915, + "learning_rate": 4.79609401579477e-06, + "loss": 0.9788, + "step": 28170 + }, + { + "epoch": 0.20398560953187547, + "grad_norm": 0.1737358719110489, + "learning_rate": 4.7960216291341835e-06, + "loss": 1.008, + "step": 28180 + }, + { + "epoch": 0.20405799619246165, + "grad_norm": 0.1770784854888916, + "learning_rate": 4.795949242473598e-06, + "loss": 0.9884, + "step": 28190 + }, + { + "epoch": 0.20413038285304785, + "grad_norm": 0.17560461163520813, + "learning_rate": 4.7958768558130116e-06, + "loss": 0.9923, + "step": 28200 + }, + { + "epoch": 0.20420276951363403, + "grad_norm": 0.210534930229187, + "learning_rate": 4.795804469152425e-06, + "loss": 1.0002, + "step": 28210 + }, + { + "epoch": 0.2042751561742202, + "grad_norm": 0.1665802001953125, + "learning_rate": 4.795732082491839e-06, + "loss": 0.9887, + "step": 28220 + }, + { + "epoch": 0.2043475428348064, + "grad_norm": 0.17272193729877472, + "learning_rate": 4.795659695831252e-06, + "loss": 1.007, + "step": 28230 + }, + { + "epoch": 0.2044199294953926, + "grad_norm": 0.16487373411655426, + "learning_rate": 4.795587309170666e-06, + "loss": 1.0067, + "step": 28240 + }, + { + "epoch": 0.20449231615597876, + "grad_norm": 0.16630111634731293, + "learning_rate": 4.79551492251008e-06, + "loss": 0.9795, + "step": 28250 + }, + { + "epoch": 0.20456470281656497, + "grad_norm": 0.17112283408641815, + "learning_rate": 4.795442535849494e-06, + "loss": 1.001, + "step": 28260 + }, + { + "epoch": 0.20463708947715115, + "grad_norm": 0.17622940242290497, + "learning_rate": 4.795370149188908e-06, + "loss": 0.9859, + "step": 28270 + }, + { + "epoch": 0.20470947613773735, + "grad_norm": 0.16943074762821198, + "learning_rate": 4.795297762528321e-06, + "loss": 1.0008, + "step": 28280 + }, + { + "epoch": 0.20478186279832353, + "grad_norm": 0.16673767566680908, + "learning_rate": 4.795225375867735e-06, + "loss": 0.9991, + "step": 28290 + }, + { + "epoch": 0.2048542494589097, + "grad_norm": 0.16490839421749115, + "learning_rate": 4.795152989207149e-06, + "loss": 1.0065, + "step": 28300 + }, + { + "epoch": 0.2049266361194959, + "grad_norm": 0.16834768652915955, + "learning_rate": 4.795080602546563e-06, + "loss": 1.0002, + "step": 28310 + }, + { + "epoch": 0.20499902278008209, + "grad_norm": 0.15660522878170013, + "learning_rate": 4.795008215885977e-06, + "loss": 1.008, + "step": 28320 + }, + { + "epoch": 0.20507140944066826, + "grad_norm": 0.1734970211982727, + "learning_rate": 4.79493582922539e-06, + "loss": 0.9975, + "step": 28330 + }, + { + "epoch": 0.20514379610125447, + "grad_norm": 0.17806610465049744, + "learning_rate": 4.794863442564805e-06, + "loss": 0.9932, + "step": 28340 + }, + { + "epoch": 0.20521618276184064, + "grad_norm": 0.1722010225057602, + "learning_rate": 4.794791055904218e-06, + "loss": 0.9841, + "step": 28350 + }, + { + "epoch": 0.20528856942242685, + "grad_norm": 0.1715139001607895, + "learning_rate": 4.794718669243632e-06, + "loss": 0.9977, + "step": 28360 + }, + { + "epoch": 0.20536095608301302, + "grad_norm": 0.17599177360534668, + "learning_rate": 4.7946462825830456e-06, + "loss": 1.0029, + "step": 28370 + }, + { + "epoch": 0.2054333427435992, + "grad_norm": 0.16538375616073608, + "learning_rate": 4.79457389592246e-06, + "loss": 0.999, + "step": 28380 + }, + { + "epoch": 0.2055057294041854, + "grad_norm": 0.1635717749595642, + "learning_rate": 4.794501509261874e-06, + "loss": 1.0158, + "step": 28390 + }, + { + "epoch": 0.20557811606477158, + "grad_norm": 0.17137548327445984, + "learning_rate": 4.794429122601287e-06, + "loss": 1.0063, + "step": 28400 + }, + { + "epoch": 0.20565050272535776, + "grad_norm": 0.26129329204559326, + "learning_rate": 4.794356735940701e-06, + "loss": 1.0029, + "step": 28410 + }, + { + "epoch": 0.20572288938594396, + "grad_norm": 0.19296756386756897, + "learning_rate": 4.794284349280115e-06, + "loss": 0.9955, + "step": 28420 + }, + { + "epoch": 0.20579527604653014, + "grad_norm": 0.17898808419704437, + "learning_rate": 4.794211962619529e-06, + "loss": 0.9865, + "step": 28430 + }, + { + "epoch": 0.20586766270711634, + "grad_norm": 0.16566985845565796, + "learning_rate": 4.794139575958943e-06, + "loss": 0.9895, + "step": 28440 + }, + { + "epoch": 0.20594004936770252, + "grad_norm": 0.1738710254430771, + "learning_rate": 4.794067189298356e-06, + "loss": 1.0066, + "step": 28450 + }, + { + "epoch": 0.2060124360282887, + "grad_norm": 0.18403783440589905, + "learning_rate": 4.793994802637771e-06, + "loss": 0.9924, + "step": 28460 + }, + { + "epoch": 0.2060848226888749, + "grad_norm": 0.1839311420917511, + "learning_rate": 4.793922415977184e-06, + "loss": 0.9973, + "step": 28470 + }, + { + "epoch": 0.20615720934946108, + "grad_norm": 0.17490077018737793, + "learning_rate": 4.793850029316598e-06, + "loss": 1.0049, + "step": 28480 + }, + { + "epoch": 0.20622959601004726, + "grad_norm": 0.17090052366256714, + "learning_rate": 4.7937776426560115e-06, + "loss": 0.987, + "step": 28490 + }, + { + "epoch": 0.20630198267063346, + "grad_norm": 0.1909414380788803, + "learning_rate": 4.793705255995426e-06, + "loss": 1.0039, + "step": 28500 + }, + { + "epoch": 0.20637436933121964, + "grad_norm": 0.18218588829040527, + "learning_rate": 4.79363286933484e-06, + "loss": 1.0067, + "step": 28510 + }, + { + "epoch": 0.20644675599180584, + "grad_norm": 0.2051839381456375, + "learning_rate": 4.793560482674253e-06, + "loss": 0.9922, + "step": 28520 + }, + { + "epoch": 0.20651914265239202, + "grad_norm": 0.17690230906009674, + "learning_rate": 4.793488096013667e-06, + "loss": 0.9973, + "step": 28530 + }, + { + "epoch": 0.2065915293129782, + "grad_norm": 0.32738569378852844, + "learning_rate": 4.793415709353081e-06, + "loss": 0.998, + "step": 28540 + }, + { + "epoch": 0.2066639159735644, + "grad_norm": 0.16709066927433014, + "learning_rate": 4.793343322692495e-06, + "loss": 0.9968, + "step": 28550 + }, + { + "epoch": 0.20673630263415058, + "grad_norm": 0.16835230588912964, + "learning_rate": 4.7932709360319085e-06, + "loss": 0.9936, + "step": 28560 + }, + { + "epoch": 0.20680868929473675, + "grad_norm": 0.18569424748420715, + "learning_rate": 4.793198549371322e-06, + "loss": 0.9828, + "step": 28570 + }, + { + "epoch": 0.20688107595532296, + "grad_norm": 0.16183647513389587, + "learning_rate": 4.793126162710736e-06, + "loss": 1.0112, + "step": 28580 + }, + { + "epoch": 0.20695346261590913, + "grad_norm": 0.1736779808998108, + "learning_rate": 4.79305377605015e-06, + "loss": 0.9918, + "step": 28590 + }, + { + "epoch": 0.20702584927649534, + "grad_norm": 0.6868466138839722, + "learning_rate": 4.792981389389564e-06, + "loss": 0.9918, + "step": 28600 + }, + { + "epoch": 0.20709823593708152, + "grad_norm": 0.1731596291065216, + "learning_rate": 4.7929090027289774e-06, + "loss": 0.9935, + "step": 28610 + }, + { + "epoch": 0.2071706225976677, + "grad_norm": 0.18018291890621185, + "learning_rate": 4.792836616068391e-06, + "loss": 1.0124, + "step": 28620 + }, + { + "epoch": 0.2072430092582539, + "grad_norm": 0.1815289855003357, + "learning_rate": 4.7927642294078055e-06, + "loss": 1.0116, + "step": 28630 + }, + { + "epoch": 0.20731539591884007, + "grad_norm": 0.18109048902988434, + "learning_rate": 4.792691842747219e-06, + "loss": 0.9968, + "step": 28640 + }, + { + "epoch": 0.20738778257942625, + "grad_norm": 0.1723286211490631, + "learning_rate": 4.792619456086633e-06, + "loss": 1.0088, + "step": 28650 + }, + { + "epoch": 0.20746016924001245, + "grad_norm": 0.17731237411499023, + "learning_rate": 4.792547069426046e-06, + "loss": 1.0042, + "step": 28660 + }, + { + "epoch": 0.20753255590059863, + "grad_norm": 0.17550349235534668, + "learning_rate": 4.792474682765461e-06, + "loss": 1.0046, + "step": 28670 + }, + { + "epoch": 0.20760494256118484, + "grad_norm": 0.17998819053173065, + "learning_rate": 4.7924022961048745e-06, + "loss": 0.9919, + "step": 28680 + }, + { + "epoch": 0.207677329221771, + "grad_norm": 0.1708000898361206, + "learning_rate": 4.792329909444288e-06, + "loss": 1.0019, + "step": 28690 + }, + { + "epoch": 0.2077497158823572, + "grad_norm": 0.16459348797798157, + "learning_rate": 4.792257522783702e-06, + "loss": 1.0008, + "step": 28700 + }, + { + "epoch": 0.2078221025429434, + "grad_norm": 0.17152118682861328, + "learning_rate": 4.792185136123116e-06, + "loss": 0.9919, + "step": 28710 + }, + { + "epoch": 0.20789448920352957, + "grad_norm": 0.1598610132932663, + "learning_rate": 4.79211274946253e-06, + "loss": 1.0093, + "step": 28720 + }, + { + "epoch": 0.20796687586411577, + "grad_norm": 0.18231916427612305, + "learning_rate": 4.792040362801943e-06, + "loss": 1.0098, + "step": 28730 + }, + { + "epoch": 0.20803926252470195, + "grad_norm": 0.16905340552330017, + "learning_rate": 4.791967976141357e-06, + "loss": 0.992, + "step": 28740 + }, + { + "epoch": 0.20811164918528813, + "grad_norm": 0.1717272698879242, + "learning_rate": 4.791895589480771e-06, + "loss": 0.9876, + "step": 28750 + }, + { + "epoch": 0.20818403584587433, + "grad_norm": 0.16113172471523285, + "learning_rate": 4.791823202820184e-06, + "loss": 0.9987, + "step": 28760 + }, + { + "epoch": 0.2082564225064605, + "grad_norm": 0.18557120859622955, + "learning_rate": 4.791750816159598e-06, + "loss": 1.0109, + "step": 28770 + }, + { + "epoch": 0.20832880916704669, + "grad_norm": 0.17728063464164734, + "learning_rate": 4.791678429499012e-06, + "loss": 0.9922, + "step": 28780 + }, + { + "epoch": 0.2084011958276329, + "grad_norm": 0.19753523170948029, + "learning_rate": 4.791606042838426e-06, + "loss": 1.0016, + "step": 28790 + }, + { + "epoch": 0.20847358248821907, + "grad_norm": 0.16373999416828156, + "learning_rate": 4.7915336561778395e-06, + "loss": 0.9975, + "step": 28800 + }, + { + "epoch": 0.20854596914880527, + "grad_norm": 0.17161045968532562, + "learning_rate": 4.791461269517253e-06, + "loss": 0.9926, + "step": 28810 + }, + { + "epoch": 0.20861835580939145, + "grad_norm": 0.16987286508083344, + "learning_rate": 4.791388882856668e-06, + "loss": 0.9974, + "step": 28820 + }, + { + "epoch": 0.20869074246997762, + "grad_norm": 0.2174030840396881, + "learning_rate": 4.791316496196081e-06, + "loss": 1.003, + "step": 28830 + }, + { + "epoch": 0.20876312913056383, + "grad_norm": 0.1620844602584839, + "learning_rate": 4.791244109535495e-06, + "loss": 1.0006, + "step": 28840 + }, + { + "epoch": 0.20883551579115, + "grad_norm": 0.18147195875644684, + "learning_rate": 4.7911717228749085e-06, + "loss": 1.0035, + "step": 28850 + }, + { + "epoch": 0.20890790245173618, + "grad_norm": 0.1799267679452896, + "learning_rate": 4.791099336214323e-06, + "loss": 1.0044, + "step": 28860 + }, + { + "epoch": 0.2089802891123224, + "grad_norm": 0.2170770764350891, + "learning_rate": 4.7910269495537366e-06, + "loss": 0.9663, + "step": 28870 + }, + { + "epoch": 0.20905267577290856, + "grad_norm": 0.17900538444519043, + "learning_rate": 4.79095456289315e-06, + "loss": 0.99, + "step": 28880 + }, + { + "epoch": 0.20912506243349477, + "grad_norm": 0.1686086356639862, + "learning_rate": 4.790882176232564e-06, + "loss": 1.0001, + "step": 28890 + }, + { + "epoch": 0.20919744909408095, + "grad_norm": 0.19090880453586578, + "learning_rate": 4.790809789571978e-06, + "loss": 1.0074, + "step": 28900 + }, + { + "epoch": 0.20926983575466712, + "grad_norm": 0.16574802994728088, + "learning_rate": 4.790737402911392e-06, + "loss": 0.9998, + "step": 28910 + }, + { + "epoch": 0.20934222241525333, + "grad_norm": 0.17160232365131378, + "learning_rate": 4.7906650162508055e-06, + "loss": 1.0036, + "step": 28920 + }, + { + "epoch": 0.2094146090758395, + "grad_norm": 0.17954379320144653, + "learning_rate": 4.790592629590219e-06, + "loss": 0.9932, + "step": 28930 + }, + { + "epoch": 0.20948699573642568, + "grad_norm": 0.17119750380516052, + "learning_rate": 4.7905202429296336e-06, + "loss": 0.9804, + "step": 28940 + }, + { + "epoch": 0.20955938239701188, + "grad_norm": 0.16228660941123962, + "learning_rate": 4.790447856269047e-06, + "loss": 1.0078, + "step": 28950 + }, + { + "epoch": 0.20963176905759806, + "grad_norm": 0.16871187090873718, + "learning_rate": 4.790375469608461e-06, + "loss": 1.0053, + "step": 28960 + }, + { + "epoch": 0.20970415571818427, + "grad_norm": 0.1674191802740097, + "learning_rate": 4.790303082947874e-06, + "loss": 1.0003, + "step": 28970 + }, + { + "epoch": 0.20977654237877044, + "grad_norm": 0.16644033789634705, + "learning_rate": 4.790230696287289e-06, + "loss": 0.9774, + "step": 28980 + }, + { + "epoch": 0.20984892903935662, + "grad_norm": 0.17086544632911682, + "learning_rate": 4.7901583096267025e-06, + "loss": 1.0096, + "step": 28990 + }, + { + "epoch": 0.20992131569994282, + "grad_norm": 0.16685600578784943, + "learning_rate": 4.790085922966116e-06, + "loss": 0.9982, + "step": 29000 + }, + { + "epoch": 0.209993702360529, + "grad_norm": 0.16532373428344727, + "learning_rate": 4.79001353630553e-06, + "loss": 0.976, + "step": 29010 + }, + { + "epoch": 0.21006608902111518, + "grad_norm": 0.36561527848243713, + "learning_rate": 4.789941149644944e-06, + "loss": 0.9778, + "step": 29020 + }, + { + "epoch": 0.21013847568170138, + "grad_norm": 0.1807478368282318, + "learning_rate": 4.789868762984358e-06, + "loss": 1.0065, + "step": 29030 + }, + { + "epoch": 0.21021086234228756, + "grad_norm": 0.17527340352535248, + "learning_rate": 4.789796376323771e-06, + "loss": 0.998, + "step": 29040 + }, + { + "epoch": 0.21028324900287376, + "grad_norm": 0.1826309859752655, + "learning_rate": 4.789723989663185e-06, + "loss": 0.9983, + "step": 29050 + }, + { + "epoch": 0.21035563566345994, + "grad_norm": 0.17075999081134796, + "learning_rate": 4.7896516030025995e-06, + "loss": 1.0071, + "step": 29060 + }, + { + "epoch": 0.21042802232404612, + "grad_norm": 0.20827743411064148, + "learning_rate": 4.789579216342013e-06, + "loss": 0.9932, + "step": 29070 + }, + { + "epoch": 0.21050040898463232, + "grad_norm": 0.18631266057491302, + "learning_rate": 4.789506829681427e-06, + "loss": 1.0037, + "step": 29080 + }, + { + "epoch": 0.2105727956452185, + "grad_norm": 0.1681118607521057, + "learning_rate": 4.78943444302084e-06, + "loss": 0.9982, + "step": 29090 + }, + { + "epoch": 0.21064518230580467, + "grad_norm": 0.16737908124923706, + "learning_rate": 4.789362056360255e-06, + "loss": 0.9948, + "step": 29100 + }, + { + "epoch": 0.21071756896639088, + "grad_norm": 0.17825300991535187, + "learning_rate": 4.789289669699668e-06, + "loss": 1.0048, + "step": 29110 + }, + { + "epoch": 0.21078995562697705, + "grad_norm": 0.16629593074321747, + "learning_rate": 4.789217283039082e-06, + "loss": 1.002, + "step": 29120 + }, + { + "epoch": 0.21086234228756326, + "grad_norm": 0.16453807055950165, + "learning_rate": 4.789144896378496e-06, + "loss": 0.9952, + "step": 29130 + }, + { + "epoch": 0.21093472894814944, + "grad_norm": 0.18079112470149994, + "learning_rate": 4.78907250971791e-06, + "loss": 0.994, + "step": 29140 + }, + { + "epoch": 0.2110071156087356, + "grad_norm": 0.16734014451503754, + "learning_rate": 4.789000123057324e-06, + "loss": 0.9832, + "step": 29150 + }, + { + "epoch": 0.21107950226932182, + "grad_norm": 0.16586540639400482, + "learning_rate": 4.788927736396737e-06, + "loss": 1.0078, + "step": 29160 + }, + { + "epoch": 0.211151888929908, + "grad_norm": 0.1610448658466339, + "learning_rate": 4.788855349736151e-06, + "loss": 0.9818, + "step": 29170 + }, + { + "epoch": 0.21122427559049417, + "grad_norm": 0.16332942247390747, + "learning_rate": 4.788782963075565e-06, + "loss": 0.9989, + "step": 29180 + }, + { + "epoch": 0.21129666225108037, + "grad_norm": 0.17163671553134918, + "learning_rate": 4.788710576414979e-06, + "loss": 1.0085, + "step": 29190 + }, + { + "epoch": 0.21136904891166655, + "grad_norm": 0.17151540517807007, + "learning_rate": 4.788638189754393e-06, + "loss": 0.9963, + "step": 29200 + }, + { + "epoch": 0.21144143557225276, + "grad_norm": 0.16174449026584625, + "learning_rate": 4.788565803093806e-06, + "loss": 1.0069, + "step": 29210 + }, + { + "epoch": 0.21151382223283893, + "grad_norm": 0.1772051900625229, + "learning_rate": 4.78849341643322e-06, + "loss": 1.0059, + "step": 29220 + }, + { + "epoch": 0.2115862088934251, + "grad_norm": 0.1666724532842636, + "learning_rate": 4.788421029772634e-06, + "loss": 0.9863, + "step": 29230 + }, + { + "epoch": 0.21165859555401131, + "grad_norm": 0.1606568545103073, + "learning_rate": 4.788348643112048e-06, + "loss": 0.9924, + "step": 29240 + }, + { + "epoch": 0.2117309822145975, + "grad_norm": 0.1638329178094864, + "learning_rate": 4.788276256451462e-06, + "loss": 0.9987, + "step": 29250 + }, + { + "epoch": 0.21180336887518367, + "grad_norm": 0.17658962309360504, + "learning_rate": 4.788203869790875e-06, + "loss": 0.9978, + "step": 29260 + }, + { + "epoch": 0.21187575553576987, + "grad_norm": 0.17163963615894318, + "learning_rate": 4.78813148313029e-06, + "loss": 0.9986, + "step": 29270 + }, + { + "epoch": 0.21194814219635605, + "grad_norm": 0.17665480077266693, + "learning_rate": 4.7880590964697024e-06, + "loss": 0.9921, + "step": 29280 + }, + { + "epoch": 0.21202052885694225, + "grad_norm": 0.17104417085647583, + "learning_rate": 4.787986709809117e-06, + "loss": 0.9853, + "step": 29290 + }, + { + "epoch": 0.21209291551752843, + "grad_norm": 0.17565962672233582, + "learning_rate": 4.7879143231485305e-06, + "loss": 1.0039, + "step": 29300 + }, + { + "epoch": 0.2121653021781146, + "grad_norm": 0.2072620391845703, + "learning_rate": 4.787841936487944e-06, + "loss": 0.9881, + "step": 29310 + }, + { + "epoch": 0.2122376888387008, + "grad_norm": 0.16268736124038696, + "learning_rate": 4.787769549827358e-06, + "loss": 0.9767, + "step": 29320 + }, + { + "epoch": 0.212310075499287, + "grad_norm": 0.19353044033050537, + "learning_rate": 4.787697163166772e-06, + "loss": 1.002, + "step": 29330 + }, + { + "epoch": 0.2123824621598732, + "grad_norm": 0.1635683923959732, + "learning_rate": 4.787624776506186e-06, + "loss": 0.9846, + "step": 29340 + }, + { + "epoch": 0.21245484882045937, + "grad_norm": 0.16228455305099487, + "learning_rate": 4.7875523898455994e-06, + "loss": 0.996, + "step": 29350 + }, + { + "epoch": 0.21252723548104555, + "grad_norm": 0.18255174160003662, + "learning_rate": 4.787480003185013e-06, + "loss": 0.9805, + "step": 29360 + }, + { + "epoch": 0.21259962214163175, + "grad_norm": 0.17667743563652039, + "learning_rate": 4.787407616524427e-06, + "loss": 0.994, + "step": 29370 + }, + { + "epoch": 0.21267200880221793, + "grad_norm": 0.15924212336540222, + "learning_rate": 4.787335229863841e-06, + "loss": 0.9877, + "step": 29380 + }, + { + "epoch": 0.2127443954628041, + "grad_norm": 0.27366387844085693, + "learning_rate": 4.787262843203255e-06, + "loss": 0.9891, + "step": 29390 + }, + { + "epoch": 0.2128167821233903, + "grad_norm": 0.17497357726097107, + "learning_rate": 4.787190456542668e-06, + "loss": 0.9717, + "step": 29400 + }, + { + "epoch": 0.21288916878397648, + "grad_norm": 0.17088007926940918, + "learning_rate": 4.787118069882082e-06, + "loss": 0.9952, + "step": 29410 + }, + { + "epoch": 0.2129615554445627, + "grad_norm": 0.16863124072551727, + "learning_rate": 4.7870456832214965e-06, + "loss": 0.9988, + "step": 29420 + }, + { + "epoch": 0.21303394210514887, + "grad_norm": 0.16611674427986145, + "learning_rate": 4.78697329656091e-06, + "loss": 0.989, + "step": 29430 + }, + { + "epoch": 0.21310632876573504, + "grad_norm": 0.2923133373260498, + "learning_rate": 4.786900909900324e-06, + "loss": 1.0054, + "step": 29440 + }, + { + "epoch": 0.21317871542632125, + "grad_norm": 0.18671530485153198, + "learning_rate": 4.786828523239737e-06, + "loss": 1.003, + "step": 29450 + }, + { + "epoch": 0.21325110208690742, + "grad_norm": 0.17407409846782684, + "learning_rate": 4.786756136579152e-06, + "loss": 0.9949, + "step": 29460 + }, + { + "epoch": 0.2133234887474936, + "grad_norm": 0.27453675866127014, + "learning_rate": 4.786683749918565e-06, + "loss": 0.9984, + "step": 29470 + }, + { + "epoch": 0.2133958754080798, + "grad_norm": 0.16514623165130615, + "learning_rate": 4.786611363257979e-06, + "loss": 0.9985, + "step": 29480 + }, + { + "epoch": 0.21346826206866598, + "grad_norm": 0.17301639914512634, + "learning_rate": 4.786538976597393e-06, + "loss": 1.0006, + "step": 29490 + }, + { + "epoch": 0.21354064872925219, + "grad_norm": 0.1778867542743683, + "learning_rate": 4.786466589936807e-06, + "loss": 0.9948, + "step": 29500 + }, + { + "epoch": 0.21361303538983836, + "grad_norm": 0.1603461503982544, + "learning_rate": 4.786394203276221e-06, + "loss": 0.9911, + "step": 29510 + }, + { + "epoch": 0.21368542205042454, + "grad_norm": 0.234296053647995, + "learning_rate": 4.786321816615634e-06, + "loss": 0.9971, + "step": 29520 + }, + { + "epoch": 0.21375780871101074, + "grad_norm": 0.158965066075325, + "learning_rate": 4.786249429955048e-06, + "loss": 0.9987, + "step": 29530 + }, + { + "epoch": 0.21383019537159692, + "grad_norm": 0.1825021207332611, + "learning_rate": 4.786177043294462e-06, + "loss": 0.9916, + "step": 29540 + }, + { + "epoch": 0.2139025820321831, + "grad_norm": 0.18081267178058624, + "learning_rate": 4.786104656633876e-06, + "loss": 0.9879, + "step": 29550 + }, + { + "epoch": 0.2139749686927693, + "grad_norm": 0.1616426259279251, + "learning_rate": 4.78603226997329e-06, + "loss": 0.9958, + "step": 29560 + }, + { + "epoch": 0.21404735535335548, + "grad_norm": 0.17399275302886963, + "learning_rate": 4.785959883312703e-06, + "loss": 1.0119, + "step": 29570 + }, + { + "epoch": 0.21411974201394168, + "grad_norm": 0.22257418930530548, + "learning_rate": 4.785887496652118e-06, + "loss": 0.9888, + "step": 29580 + }, + { + "epoch": 0.21419212867452786, + "grad_norm": 0.162775918841362, + "learning_rate": 4.785815109991531e-06, + "loss": 0.9989, + "step": 29590 + }, + { + "epoch": 0.21426451533511404, + "grad_norm": 0.16926290094852448, + "learning_rate": 4.785742723330945e-06, + "loss": 0.9965, + "step": 29600 + }, + { + "epoch": 0.21433690199570024, + "grad_norm": 0.18034987151622772, + "learning_rate": 4.7856703366703585e-06, + "loss": 0.995, + "step": 29610 + }, + { + "epoch": 0.21440928865628642, + "grad_norm": 0.17207445204257965, + "learning_rate": 4.785597950009773e-06, + "loss": 1.0046, + "step": 29620 + }, + { + "epoch": 0.2144816753168726, + "grad_norm": 0.21093519032001495, + "learning_rate": 4.785525563349187e-06, + "loss": 0.9917, + "step": 29630 + }, + { + "epoch": 0.2145540619774588, + "grad_norm": 0.16876211762428284, + "learning_rate": 4.7854531766886e-06, + "loss": 0.9903, + "step": 29640 + }, + { + "epoch": 0.21462644863804498, + "grad_norm": 0.24008895456790924, + "learning_rate": 4.785380790028014e-06, + "loss": 0.9946, + "step": 29650 + }, + { + "epoch": 0.21469883529863118, + "grad_norm": 0.16969846189022064, + "learning_rate": 4.785308403367428e-06, + "loss": 0.9987, + "step": 29660 + }, + { + "epoch": 0.21477122195921736, + "grad_norm": 0.1585167944431305, + "learning_rate": 4.785236016706842e-06, + "loss": 0.9916, + "step": 29670 + }, + { + "epoch": 0.21484360861980353, + "grad_norm": 0.16442425549030304, + "learning_rate": 4.7851636300462556e-06, + "loss": 0.9765, + "step": 29680 + }, + { + "epoch": 0.21491599528038974, + "grad_norm": 0.19266870617866516, + "learning_rate": 4.785091243385669e-06, + "loss": 0.9818, + "step": 29690 + }, + { + "epoch": 0.21498838194097591, + "grad_norm": 0.16447675228118896, + "learning_rate": 4.785018856725084e-06, + "loss": 1.0, + "step": 29700 + }, + { + "epoch": 0.2150607686015621, + "grad_norm": 0.1561741828918457, + "learning_rate": 4.784946470064497e-06, + "loss": 0.9833, + "step": 29710 + }, + { + "epoch": 0.2151331552621483, + "grad_norm": 0.1617199033498764, + "learning_rate": 4.784874083403911e-06, + "loss": 0.997, + "step": 29720 + }, + { + "epoch": 0.21520554192273447, + "grad_norm": 0.16869542002677917, + "learning_rate": 4.7848016967433245e-06, + "loss": 0.9877, + "step": 29730 + }, + { + "epoch": 0.21527792858332068, + "grad_norm": 0.1657578945159912, + "learning_rate": 4.784729310082739e-06, + "loss": 0.984, + "step": 29740 + }, + { + "epoch": 0.21535031524390685, + "grad_norm": 0.1783364713191986, + "learning_rate": 4.7846569234221526e-06, + "loss": 0.9972, + "step": 29750 + }, + { + "epoch": 0.21542270190449303, + "grad_norm": 0.16609250009059906, + "learning_rate": 4.784584536761566e-06, + "loss": 0.9815, + "step": 29760 + }, + { + "epoch": 0.21549508856507923, + "grad_norm": 0.16367986798286438, + "learning_rate": 4.78451215010098e-06, + "loss": 0.9864, + "step": 29770 + }, + { + "epoch": 0.2155674752256654, + "grad_norm": 0.1632063388824463, + "learning_rate": 4.784439763440394e-06, + "loss": 0.9857, + "step": 29780 + }, + { + "epoch": 0.2156398618862516, + "grad_norm": 0.2034599334001541, + "learning_rate": 4.784367376779808e-06, + "loss": 0.9864, + "step": 29790 + }, + { + "epoch": 0.2157122485468378, + "grad_norm": 0.15968771278858185, + "learning_rate": 4.7842949901192215e-06, + "loss": 0.982, + "step": 29800 + }, + { + "epoch": 0.21578463520742397, + "grad_norm": 0.16494300961494446, + "learning_rate": 4.784222603458635e-06, + "loss": 0.9871, + "step": 29810 + }, + { + "epoch": 0.21585702186801017, + "grad_norm": 0.1890561729669571, + "learning_rate": 4.784150216798049e-06, + "loss": 0.9845, + "step": 29820 + }, + { + "epoch": 0.21592940852859635, + "grad_norm": 0.18601875007152557, + "learning_rate": 4.784077830137462e-06, + "loss": 0.9968, + "step": 29830 + }, + { + "epoch": 0.21600179518918253, + "grad_norm": 0.18021321296691895, + "learning_rate": 4.784005443476876e-06, + "loss": 0.9904, + "step": 29840 + }, + { + "epoch": 0.21607418184976873, + "grad_norm": 0.17945921421051025, + "learning_rate": 4.78393305681629e-06, + "loss": 0.9995, + "step": 29850 + }, + { + "epoch": 0.2161465685103549, + "grad_norm": 0.16598504781723022, + "learning_rate": 4.783860670155704e-06, + "loss": 1.0009, + "step": 29860 + }, + { + "epoch": 0.2162189551709411, + "grad_norm": 0.1580289602279663, + "learning_rate": 4.783788283495118e-06, + "loss": 0.9999, + "step": 29870 + }, + { + "epoch": 0.2162913418315273, + "grad_norm": 0.16476024687290192, + "learning_rate": 4.783715896834531e-06, + "loss": 0.9882, + "step": 29880 + }, + { + "epoch": 0.21636372849211347, + "grad_norm": 0.17372757196426392, + "learning_rate": 4.783643510173946e-06, + "loss": 0.9873, + "step": 29890 + }, + { + "epoch": 0.21643611515269967, + "grad_norm": 0.17424210906028748, + "learning_rate": 4.783571123513359e-06, + "loss": 0.9848, + "step": 29900 + }, + { + "epoch": 0.21650850181328585, + "grad_norm": 0.16753096878528595, + "learning_rate": 4.783498736852773e-06, + "loss": 0.9753, + "step": 29910 + }, + { + "epoch": 0.21658088847387202, + "grad_norm": 0.16922076046466827, + "learning_rate": 4.783426350192187e-06, + "loss": 0.9874, + "step": 29920 + }, + { + "epoch": 0.21665327513445823, + "grad_norm": 0.18576353788375854, + "learning_rate": 4.783353963531601e-06, + "loss": 0.9952, + "step": 29930 + }, + { + "epoch": 0.2167256617950444, + "grad_norm": 0.15824037790298462, + "learning_rate": 4.783281576871015e-06, + "loss": 0.9921, + "step": 29940 + }, + { + "epoch": 0.2167980484556306, + "grad_norm": 0.18869732320308685, + "learning_rate": 4.783209190210428e-06, + "loss": 1.0058, + "step": 29950 + }, + { + "epoch": 0.2168704351162168, + "grad_norm": 0.17720650136470795, + "learning_rate": 4.783136803549842e-06, + "loss": 0.9959, + "step": 29960 + }, + { + "epoch": 0.21694282177680296, + "grad_norm": 0.17192226648330688, + "learning_rate": 4.783064416889256e-06, + "loss": 0.9915, + "step": 29970 + }, + { + "epoch": 0.21701520843738917, + "grad_norm": 0.17724481225013733, + "learning_rate": 4.78299203022867e-06, + "loss": 0.9957, + "step": 29980 + }, + { + "epoch": 0.21708759509797534, + "grad_norm": 0.16848604381084442, + "learning_rate": 4.782919643568084e-06, + "loss": 0.995, + "step": 29990 + }, + { + "epoch": 0.21715998175856152, + "grad_norm": 0.16832269728183746, + "learning_rate": 4.782847256907497e-06, + "loss": 1.0102, + "step": 30000 + }, + { + "epoch": 0.21723236841914773, + "grad_norm": 0.16782933473587036, + "learning_rate": 4.782774870246911e-06, + "loss": 0.9875, + "step": 30010 + }, + { + "epoch": 0.2173047550797339, + "grad_norm": 0.17606335878372192, + "learning_rate": 4.782702483586325e-06, + "loss": 0.9907, + "step": 30020 + }, + { + "epoch": 0.2173771417403201, + "grad_norm": 0.16667865216732025, + "learning_rate": 4.782630096925739e-06, + "loss": 0.9906, + "step": 30030 + }, + { + "epoch": 0.21744952840090628, + "grad_norm": 0.17801982164382935, + "learning_rate": 4.7825577102651525e-06, + "loss": 0.995, + "step": 30040 + }, + { + "epoch": 0.21752191506149246, + "grad_norm": 0.19045329093933105, + "learning_rate": 4.782485323604566e-06, + "loss": 0.9944, + "step": 30050 + }, + { + "epoch": 0.21759430172207866, + "grad_norm": 0.19208842515945435, + "learning_rate": 4.782412936943981e-06, + "loss": 0.9972, + "step": 30060 + }, + { + "epoch": 0.21766668838266484, + "grad_norm": 0.19080910086631775, + "learning_rate": 4.782340550283394e-06, + "loss": 0.9907, + "step": 30070 + }, + { + "epoch": 0.21773907504325102, + "grad_norm": 0.16448453068733215, + "learning_rate": 4.782268163622808e-06, + "loss": 0.9954, + "step": 30080 + }, + { + "epoch": 0.21781146170383722, + "grad_norm": 0.16412755846977234, + "learning_rate": 4.7821957769622214e-06, + "loss": 0.9929, + "step": 30090 + }, + { + "epoch": 0.2178838483644234, + "grad_norm": 0.17973798513412476, + "learning_rate": 4.782123390301636e-06, + "loss": 0.9898, + "step": 30100 + }, + { + "epoch": 0.2179562350250096, + "grad_norm": 0.16456930339336395, + "learning_rate": 4.7820510036410495e-06, + "loss": 0.9943, + "step": 30110 + }, + { + "epoch": 0.21802862168559578, + "grad_norm": 0.1790180802345276, + "learning_rate": 4.781978616980463e-06, + "loss": 1.0058, + "step": 30120 + }, + { + "epoch": 0.21810100834618196, + "grad_norm": 0.15905308723449707, + "learning_rate": 4.781906230319877e-06, + "loss": 0.985, + "step": 30130 + }, + { + "epoch": 0.21817339500676816, + "grad_norm": 0.17000477015972137, + "learning_rate": 4.781833843659291e-06, + "loss": 0.9802, + "step": 30140 + }, + { + "epoch": 0.21824578166735434, + "grad_norm": 0.19705595076084137, + "learning_rate": 4.781761456998705e-06, + "loss": 1.002, + "step": 30150 + }, + { + "epoch": 0.21831816832794051, + "grad_norm": 0.16319391131401062, + "learning_rate": 4.7816890703381185e-06, + "loss": 0.997, + "step": 30160 + }, + { + "epoch": 0.21839055498852672, + "grad_norm": 0.1748110055923462, + "learning_rate": 4.781616683677532e-06, + "loss": 0.9865, + "step": 30170 + }, + { + "epoch": 0.2184629416491129, + "grad_norm": 0.16149266064167023, + "learning_rate": 4.7815442970169465e-06, + "loss": 0.9952, + "step": 30180 + }, + { + "epoch": 0.2185353283096991, + "grad_norm": 0.22713536024093628, + "learning_rate": 4.78147191035636e-06, + "loss": 1.0025, + "step": 30190 + }, + { + "epoch": 0.21860771497028528, + "grad_norm": 0.1786499321460724, + "learning_rate": 4.781399523695774e-06, + "loss": 0.9896, + "step": 30200 + }, + { + "epoch": 0.21868010163087145, + "grad_norm": 0.19902971386909485, + "learning_rate": 4.781327137035187e-06, + "loss": 0.9976, + "step": 30210 + }, + { + "epoch": 0.21875248829145766, + "grad_norm": 0.17950847744941711, + "learning_rate": 4.781254750374602e-06, + "loss": 1.0033, + "step": 30220 + }, + { + "epoch": 0.21882487495204384, + "grad_norm": 0.1599850207567215, + "learning_rate": 4.7811823637140155e-06, + "loss": 0.9735, + "step": 30230 + }, + { + "epoch": 0.21889726161263, + "grad_norm": 0.15841835737228394, + "learning_rate": 4.781109977053429e-06, + "loss": 1.0022, + "step": 30240 + }, + { + "epoch": 0.21896964827321622, + "grad_norm": 0.17060723900794983, + "learning_rate": 4.781037590392843e-06, + "loss": 0.9909, + "step": 30250 + }, + { + "epoch": 0.2190420349338024, + "grad_norm": 0.16815410554409027, + "learning_rate": 4.780965203732257e-06, + "loss": 0.9865, + "step": 30260 + }, + { + "epoch": 0.2191144215943886, + "grad_norm": 0.2049429714679718, + "learning_rate": 4.780892817071671e-06, + "loss": 0.978, + "step": 30270 + }, + { + "epoch": 0.21918680825497477, + "grad_norm": 0.1744859516620636, + "learning_rate": 4.780820430411084e-06, + "loss": 0.9946, + "step": 30280 + }, + { + "epoch": 0.21925919491556095, + "grad_norm": 0.17535239458084106, + "learning_rate": 4.780748043750498e-06, + "loss": 0.9893, + "step": 30290 + }, + { + "epoch": 0.21933158157614716, + "grad_norm": 0.16993948817253113, + "learning_rate": 4.7806756570899125e-06, + "loss": 0.995, + "step": 30300 + }, + { + "epoch": 0.21940396823673333, + "grad_norm": 0.1737825572490692, + "learning_rate": 4.780603270429326e-06, + "loss": 0.979, + "step": 30310 + }, + { + "epoch": 0.2194763548973195, + "grad_norm": 0.19781017303466797, + "learning_rate": 4.78053088376874e-06, + "loss": 0.9894, + "step": 30320 + }, + { + "epoch": 0.2195487415579057, + "grad_norm": 0.15426485240459442, + "learning_rate": 4.780458497108153e-06, + "loss": 0.9906, + "step": 30330 + }, + { + "epoch": 0.2196211282184919, + "grad_norm": 0.17458288371562958, + "learning_rate": 4.780386110447567e-06, + "loss": 0.9847, + "step": 30340 + }, + { + "epoch": 0.2196935148790781, + "grad_norm": 0.17659978568553925, + "learning_rate": 4.7803137237869805e-06, + "loss": 0.9937, + "step": 30350 + }, + { + "epoch": 0.21976590153966427, + "grad_norm": 0.16080795228481293, + "learning_rate": 4.780241337126394e-06, + "loss": 0.975, + "step": 30360 + }, + { + "epoch": 0.21983828820025045, + "grad_norm": 0.1639893800020218, + "learning_rate": 4.780168950465809e-06, + "loss": 1.0061, + "step": 30370 + }, + { + "epoch": 0.21991067486083665, + "grad_norm": 0.17804419994354248, + "learning_rate": 4.780096563805222e-06, + "loss": 0.9952, + "step": 30380 + }, + { + "epoch": 0.21998306152142283, + "grad_norm": 0.17205168306827545, + "learning_rate": 4.780024177144636e-06, + "loss": 0.9953, + "step": 30390 + }, + { + "epoch": 0.220055448182009, + "grad_norm": 0.2006746232509613, + "learning_rate": 4.7799517904840495e-06, + "loss": 0.9882, + "step": 30400 + }, + { + "epoch": 0.2201278348425952, + "grad_norm": 0.18707719445228577, + "learning_rate": 4.779879403823464e-06, + "loss": 1.0003, + "step": 30410 + }, + { + "epoch": 0.2202002215031814, + "grad_norm": 0.178603395819664, + "learning_rate": 4.7798070171628776e-06, + "loss": 0.9928, + "step": 30420 + }, + { + "epoch": 0.2202726081637676, + "grad_norm": 0.19374102354049683, + "learning_rate": 4.779734630502291e-06, + "loss": 0.9865, + "step": 30430 + }, + { + "epoch": 0.22034499482435377, + "grad_norm": 0.1638360470533371, + "learning_rate": 4.779662243841705e-06, + "loss": 0.9922, + "step": 30440 + }, + { + "epoch": 0.22041738148493994, + "grad_norm": 0.16339033842086792, + "learning_rate": 4.779589857181119e-06, + "loss": 0.995, + "step": 30450 + }, + { + "epoch": 0.22048976814552615, + "grad_norm": 0.17286980152130127, + "learning_rate": 4.779517470520533e-06, + "loss": 0.9862, + "step": 30460 + }, + { + "epoch": 0.22056215480611233, + "grad_norm": 0.17059637606143951, + "learning_rate": 4.7794450838599465e-06, + "loss": 0.988, + "step": 30470 + }, + { + "epoch": 0.22063454146669853, + "grad_norm": 0.15443108975887299, + "learning_rate": 4.77937269719936e-06, + "loss": 0.9902, + "step": 30480 + }, + { + "epoch": 0.2207069281272847, + "grad_norm": 0.16582870483398438, + "learning_rate": 4.7793003105387746e-06, + "loss": 0.9947, + "step": 30490 + }, + { + "epoch": 0.22077931478787088, + "grad_norm": 0.16786135733127594, + "learning_rate": 4.779227923878188e-06, + "loss": 0.9932, + "step": 30500 + }, + { + "epoch": 0.2208517014484571, + "grad_norm": 0.18378998339176178, + "learning_rate": 4.779155537217602e-06, + "loss": 0.9923, + "step": 30510 + }, + { + "epoch": 0.22092408810904327, + "grad_norm": 0.1661590337753296, + "learning_rate": 4.779083150557015e-06, + "loss": 0.9963, + "step": 30520 + }, + { + "epoch": 0.22099647476962944, + "grad_norm": 0.16045540571212769, + "learning_rate": 4.77901076389643e-06, + "loss": 0.9888, + "step": 30530 + }, + { + "epoch": 0.22106886143021565, + "grad_norm": 0.17073200643062592, + "learning_rate": 4.7789383772358435e-06, + "loss": 0.9863, + "step": 30540 + }, + { + "epoch": 0.22114124809080182, + "grad_norm": 0.2749057710170746, + "learning_rate": 4.778865990575257e-06, + "loss": 0.9993, + "step": 30550 + }, + { + "epoch": 0.22121363475138803, + "grad_norm": 0.17265433073043823, + "learning_rate": 4.778793603914671e-06, + "loss": 0.9898, + "step": 30560 + }, + { + "epoch": 0.2212860214119742, + "grad_norm": 0.1878434419631958, + "learning_rate": 4.778721217254085e-06, + "loss": 0.998, + "step": 30570 + }, + { + "epoch": 0.22135840807256038, + "grad_norm": 0.16679958999156952, + "learning_rate": 4.778648830593499e-06, + "loss": 0.9897, + "step": 30580 + }, + { + "epoch": 0.22143079473314659, + "grad_norm": 0.18159635365009308, + "learning_rate": 4.778576443932912e-06, + "loss": 0.9953, + "step": 30590 + }, + { + "epoch": 0.22150318139373276, + "grad_norm": 0.18782582879066467, + "learning_rate": 4.778504057272326e-06, + "loss": 0.9949, + "step": 30600 + }, + { + "epoch": 0.22157556805431894, + "grad_norm": 0.16997790336608887, + "learning_rate": 4.77843167061174e-06, + "loss": 0.9956, + "step": 30610 + }, + { + "epoch": 0.22164795471490514, + "grad_norm": 0.19015435874462128, + "learning_rate": 4.778359283951154e-06, + "loss": 0.9788, + "step": 30620 + }, + { + "epoch": 0.22172034137549132, + "grad_norm": 0.15905068814754486, + "learning_rate": 4.778286897290568e-06, + "loss": 0.9862, + "step": 30630 + }, + { + "epoch": 0.22179272803607752, + "grad_norm": 0.21960864961147308, + "learning_rate": 4.778214510629981e-06, + "loss": 0.9855, + "step": 30640 + }, + { + "epoch": 0.2218651146966637, + "grad_norm": 0.1731971949338913, + "learning_rate": 4.778142123969395e-06, + "loss": 0.9855, + "step": 30650 + }, + { + "epoch": 0.22193750135724988, + "grad_norm": 0.1737217754125595, + "learning_rate": 4.7780697373088094e-06, + "loss": 0.9929, + "step": 30660 + }, + { + "epoch": 0.22200988801783608, + "grad_norm": 0.16913598775863647, + "learning_rate": 4.777997350648223e-06, + "loss": 0.9939, + "step": 30670 + }, + { + "epoch": 0.22208227467842226, + "grad_norm": 0.19872261583805084, + "learning_rate": 4.777924963987637e-06, + "loss": 1.0059, + "step": 30680 + }, + { + "epoch": 0.22215466133900844, + "grad_norm": 0.17020894587039948, + "learning_rate": 4.77785257732705e-06, + "loss": 0.9992, + "step": 30690 + }, + { + "epoch": 0.22222704799959464, + "grad_norm": 0.19177848100662231, + "learning_rate": 4.777780190666465e-06, + "loss": 0.9826, + "step": 30700 + }, + { + "epoch": 0.22229943466018082, + "grad_norm": 0.17280960083007812, + "learning_rate": 4.777707804005878e-06, + "loss": 0.9868, + "step": 30710 + }, + { + "epoch": 0.22237182132076702, + "grad_norm": 0.1845826506614685, + "learning_rate": 4.777635417345292e-06, + "loss": 0.994, + "step": 30720 + }, + { + "epoch": 0.2224442079813532, + "grad_norm": 0.20441372692584991, + "learning_rate": 4.777563030684706e-06, + "loss": 0.9852, + "step": 30730 + }, + { + "epoch": 0.22251659464193937, + "grad_norm": 0.17806535959243774, + "learning_rate": 4.77749064402412e-06, + "loss": 0.9922, + "step": 30740 + }, + { + "epoch": 0.22258898130252558, + "grad_norm": 0.1843547821044922, + "learning_rate": 4.777418257363534e-06, + "loss": 0.9931, + "step": 30750 + }, + { + "epoch": 0.22266136796311176, + "grad_norm": 0.1745116412639618, + "learning_rate": 4.777345870702947e-06, + "loss": 0.9939, + "step": 30760 + }, + { + "epoch": 0.22273375462369793, + "grad_norm": 0.18230880796909332, + "learning_rate": 4.777273484042361e-06, + "loss": 0.9821, + "step": 30770 + }, + { + "epoch": 0.22280614128428414, + "grad_norm": 0.17460425198078156, + "learning_rate": 4.777201097381775e-06, + "loss": 0.9932, + "step": 30780 + }, + { + "epoch": 0.2228785279448703, + "grad_norm": 0.16138465702533722, + "learning_rate": 4.777128710721189e-06, + "loss": 0.9921, + "step": 30790 + }, + { + "epoch": 0.22295091460545652, + "grad_norm": 0.1702491044998169, + "learning_rate": 4.777056324060603e-06, + "loss": 1.0047, + "step": 30800 + }, + { + "epoch": 0.2230233012660427, + "grad_norm": 0.1869451254606247, + "learning_rate": 4.776983937400016e-06, + "loss": 0.9977, + "step": 30810 + }, + { + "epoch": 0.22309568792662887, + "grad_norm": 0.17127086222171783, + "learning_rate": 4.776911550739431e-06, + "loss": 1.0077, + "step": 30820 + }, + { + "epoch": 0.22316807458721508, + "grad_norm": 0.17618906497955322, + "learning_rate": 4.776839164078844e-06, + "loss": 0.9919, + "step": 30830 + }, + { + "epoch": 0.22324046124780125, + "grad_norm": 0.19127856194972992, + "learning_rate": 4.776766777418258e-06, + "loss": 0.9839, + "step": 30840 + }, + { + "epoch": 0.22331284790838743, + "grad_norm": 0.16776026785373688, + "learning_rate": 4.7766943907576715e-06, + "loss": 0.9903, + "step": 30850 + }, + { + "epoch": 0.22338523456897363, + "grad_norm": 0.17897354066371918, + "learning_rate": 4.776622004097086e-06, + "loss": 0.9847, + "step": 30860 + }, + { + "epoch": 0.2234576212295598, + "grad_norm": 0.16106632351875305, + "learning_rate": 4.776549617436499e-06, + "loss": 1.0027, + "step": 30870 + }, + { + "epoch": 0.22353000789014602, + "grad_norm": 0.15779045224189758, + "learning_rate": 4.776477230775912e-06, + "loss": 0.9942, + "step": 30880 + }, + { + "epoch": 0.2236023945507322, + "grad_norm": 0.19655205309391022, + "learning_rate": 4.776404844115327e-06, + "loss": 1.0076, + "step": 30890 + }, + { + "epoch": 0.22367478121131837, + "grad_norm": 0.1667492836713791, + "learning_rate": 4.7763324574547405e-06, + "loss": 0.9975, + "step": 30900 + }, + { + "epoch": 0.22374716787190457, + "grad_norm": 0.18029555678367615, + "learning_rate": 4.776260070794154e-06, + "loss": 0.9857, + "step": 30910 + }, + { + "epoch": 0.22381955453249075, + "grad_norm": 0.1663108766078949, + "learning_rate": 4.776187684133568e-06, + "loss": 0.9952, + "step": 30920 + }, + { + "epoch": 0.22389194119307693, + "grad_norm": 0.18549077212810516, + "learning_rate": 4.776115297472982e-06, + "loss": 0.9825, + "step": 30930 + }, + { + "epoch": 0.22396432785366313, + "grad_norm": 0.17091688513755798, + "learning_rate": 4.776042910812396e-06, + "loss": 1.0017, + "step": 30940 + }, + { + "epoch": 0.2240367145142493, + "grad_norm": 0.18328119814395905, + "learning_rate": 4.775970524151809e-06, + "loss": 0.9957, + "step": 30950 + }, + { + "epoch": 0.2241091011748355, + "grad_norm": 0.18230809271335602, + "learning_rate": 4.775898137491223e-06, + "loss": 1.0105, + "step": 30960 + }, + { + "epoch": 0.2241814878354217, + "grad_norm": 0.17327918112277985, + "learning_rate": 4.7758257508306375e-06, + "loss": 0.9785, + "step": 30970 + }, + { + "epoch": 0.22425387449600787, + "grad_norm": 0.1592261642217636, + "learning_rate": 4.775753364170051e-06, + "loss": 0.9965, + "step": 30980 + }, + { + "epoch": 0.22432626115659407, + "grad_norm": 0.16673846542835236, + "learning_rate": 4.775680977509465e-06, + "loss": 0.986, + "step": 30990 + }, + { + "epoch": 0.22439864781718025, + "grad_norm": 0.1908463090658188, + "learning_rate": 4.775608590848878e-06, + "loss": 1.0013, + "step": 31000 + }, + { + "epoch": 0.22447103447776642, + "grad_norm": 0.16515257954597473, + "learning_rate": 4.775536204188293e-06, + "loss": 1.0092, + "step": 31010 + }, + { + "epoch": 0.22454342113835263, + "grad_norm": 0.1733829379081726, + "learning_rate": 4.775463817527706e-06, + "loss": 0.9834, + "step": 31020 + }, + { + "epoch": 0.2246158077989388, + "grad_norm": 0.17411421239376068, + "learning_rate": 4.77539143086712e-06, + "loss": 0.9866, + "step": 31030 + }, + { + "epoch": 0.224688194459525, + "grad_norm": 0.16972704231739044, + "learning_rate": 4.775319044206534e-06, + "loss": 0.986, + "step": 31040 + }, + { + "epoch": 0.22476058112011119, + "grad_norm": 0.1979493349790573, + "learning_rate": 4.775246657545948e-06, + "loss": 0.9816, + "step": 31050 + }, + { + "epoch": 0.22483296778069736, + "grad_norm": 0.18027926981449127, + "learning_rate": 4.775174270885362e-06, + "loss": 0.9853, + "step": 31060 + }, + { + "epoch": 0.22490535444128357, + "grad_norm": 0.16481108963489532, + "learning_rate": 4.775101884224775e-06, + "loss": 0.9876, + "step": 31070 + }, + { + "epoch": 0.22497774110186974, + "grad_norm": 0.1805502474308014, + "learning_rate": 4.775029497564189e-06, + "loss": 0.9897, + "step": 31080 + }, + { + "epoch": 0.22505012776245595, + "grad_norm": 0.1621004194021225, + "learning_rate": 4.774957110903603e-06, + "loss": 0.9909, + "step": 31090 + }, + { + "epoch": 0.22512251442304212, + "grad_norm": 0.1652892827987671, + "learning_rate": 4.774884724243017e-06, + "loss": 0.9781, + "step": 31100 + }, + { + "epoch": 0.2251949010836283, + "grad_norm": 0.16680368781089783, + "learning_rate": 4.774812337582431e-06, + "loss": 0.9995, + "step": 31110 + }, + { + "epoch": 0.2252672877442145, + "grad_norm": 0.17814873158931732, + "learning_rate": 4.774739950921844e-06, + "loss": 0.9923, + "step": 31120 + }, + { + "epoch": 0.22533967440480068, + "grad_norm": 0.15377861261367798, + "learning_rate": 4.774667564261259e-06, + "loss": 0.9837, + "step": 31130 + }, + { + "epoch": 0.22541206106538686, + "grad_norm": 0.18391862511634827, + "learning_rate": 4.774595177600672e-06, + "loss": 0.9995, + "step": 31140 + }, + { + "epoch": 0.22548444772597306, + "grad_norm": 0.17734524607658386, + "learning_rate": 4.774522790940086e-06, + "loss": 1.0083, + "step": 31150 + }, + { + "epoch": 0.22555683438655924, + "grad_norm": 0.17555080354213715, + "learning_rate": 4.7744504042794996e-06, + "loss": 1.0057, + "step": 31160 + }, + { + "epoch": 0.22562922104714545, + "grad_norm": 0.16660809516906738, + "learning_rate": 4.774378017618914e-06, + "loss": 0.9793, + "step": 31170 + }, + { + "epoch": 0.22570160770773162, + "grad_norm": 0.19567494094371796, + "learning_rate": 4.774305630958328e-06, + "loss": 0.9865, + "step": 31180 + }, + { + "epoch": 0.2257739943683178, + "grad_norm": 0.15271180868148804, + "learning_rate": 4.774233244297741e-06, + "loss": 0.9831, + "step": 31190 + }, + { + "epoch": 0.225846381028904, + "grad_norm": 0.16450783610343933, + "learning_rate": 4.774160857637155e-06, + "loss": 0.996, + "step": 31200 + }, + { + "epoch": 0.22591876768949018, + "grad_norm": 0.2669644057750702, + "learning_rate": 4.774088470976569e-06, + "loss": 0.9876, + "step": 31210 + }, + { + "epoch": 0.22599115435007636, + "grad_norm": 0.1570923775434494, + "learning_rate": 4.774016084315983e-06, + "loss": 0.9838, + "step": 31220 + }, + { + "epoch": 0.22606354101066256, + "grad_norm": 0.1822049915790558, + "learning_rate": 4.7739436976553966e-06, + "loss": 0.976, + "step": 31230 + }, + { + "epoch": 0.22613592767124874, + "grad_norm": 0.17041492462158203, + "learning_rate": 4.77387131099481e-06, + "loss": 1.0074, + "step": 31240 + }, + { + "epoch": 0.22620831433183494, + "grad_norm": 0.1686597615480423, + "learning_rate": 4.773798924334224e-06, + "loss": 1.0004, + "step": 31250 + }, + { + "epoch": 0.22628070099242112, + "grad_norm": 0.16727478802204132, + "learning_rate": 4.773726537673638e-06, + "loss": 1.0044, + "step": 31260 + }, + { + "epoch": 0.2263530876530073, + "grad_norm": 0.16306477785110474, + "learning_rate": 4.773654151013052e-06, + "loss": 0.9966, + "step": 31270 + }, + { + "epoch": 0.2264254743135935, + "grad_norm": 0.17092132568359375, + "learning_rate": 4.7735817643524655e-06, + "loss": 1.0063, + "step": 31280 + }, + { + "epoch": 0.22649786097417968, + "grad_norm": 0.16931623220443726, + "learning_rate": 4.773509377691879e-06, + "loss": 0.9868, + "step": 31290 + }, + { + "epoch": 0.22657024763476585, + "grad_norm": 0.1603298783302307, + "learning_rate": 4.7734369910312936e-06, + "loss": 0.9837, + "step": 31300 + }, + { + "epoch": 0.22664263429535206, + "grad_norm": 0.19515341520309448, + "learning_rate": 4.773364604370707e-06, + "loss": 0.9844, + "step": 31310 + }, + { + "epoch": 0.22671502095593823, + "grad_norm": 0.16910241544246674, + "learning_rate": 4.773292217710121e-06, + "loss": 0.9948, + "step": 31320 + }, + { + "epoch": 0.22678740761652444, + "grad_norm": 0.19384412467479706, + "learning_rate": 4.773219831049534e-06, + "loss": 0.9617, + "step": 31330 + }, + { + "epoch": 0.22685979427711062, + "grad_norm": 0.17638161778450012, + "learning_rate": 4.773147444388949e-06, + "loss": 0.9865, + "step": 31340 + }, + { + "epoch": 0.2269321809376968, + "grad_norm": 0.17400164902210236, + "learning_rate": 4.7730750577283625e-06, + "loss": 0.997, + "step": 31350 + }, + { + "epoch": 0.227004567598283, + "grad_norm": 0.18208061158657074, + "learning_rate": 4.773002671067776e-06, + "loss": 1.0006, + "step": 31360 + }, + { + "epoch": 0.22707695425886917, + "grad_norm": 0.17312349379062653, + "learning_rate": 4.77293028440719e-06, + "loss": 0.973, + "step": 31370 + }, + { + "epoch": 0.22714934091945535, + "grad_norm": 0.18779698014259338, + "learning_rate": 4.772857897746604e-06, + "loss": 0.9899, + "step": 31380 + }, + { + "epoch": 0.22722172758004155, + "grad_norm": 0.1715383231639862, + "learning_rate": 4.772785511086018e-06, + "loss": 0.9891, + "step": 31390 + }, + { + "epoch": 0.22729411424062773, + "grad_norm": 0.18335622549057007, + "learning_rate": 4.772713124425431e-06, + "loss": 0.9925, + "step": 31400 + }, + { + "epoch": 0.22736650090121394, + "grad_norm": 0.1920541524887085, + "learning_rate": 4.772640737764845e-06, + "loss": 0.9875, + "step": 31410 + }, + { + "epoch": 0.2274388875618001, + "grad_norm": 0.16819268465042114, + "learning_rate": 4.772568351104259e-06, + "loss": 0.9909, + "step": 31420 + }, + { + "epoch": 0.2275112742223863, + "grad_norm": 0.1579744815826416, + "learning_rate": 4.772495964443672e-06, + "loss": 0.9903, + "step": 31430 + }, + { + "epoch": 0.2275836608829725, + "grad_norm": 0.1756497621536255, + "learning_rate": 4.772423577783086e-06, + "loss": 0.9948, + "step": 31440 + }, + { + "epoch": 0.22765604754355867, + "grad_norm": 0.1611769050359726, + "learning_rate": 4.7723511911225e-06, + "loss": 0.9925, + "step": 31450 + }, + { + "epoch": 0.22772843420414485, + "grad_norm": 0.19173139333724976, + "learning_rate": 4.772278804461914e-06, + "loss": 0.9761, + "step": 31460 + }, + { + "epoch": 0.22780082086473105, + "grad_norm": 0.20270651578903198, + "learning_rate": 4.772206417801328e-06, + "loss": 0.9898, + "step": 31470 + }, + { + "epoch": 0.22787320752531723, + "grad_norm": 0.17298614978790283, + "learning_rate": 4.772134031140741e-06, + "loss": 0.9871, + "step": 31480 + }, + { + "epoch": 0.22794559418590343, + "grad_norm": 0.15793482959270477, + "learning_rate": 4.772061644480156e-06, + "loss": 0.9874, + "step": 31490 + }, + { + "epoch": 0.2280179808464896, + "grad_norm": 0.1665399670600891, + "learning_rate": 4.771989257819569e-06, + "loss": 0.9963, + "step": 31500 + }, + { + "epoch": 0.2280903675070758, + "grad_norm": 0.1729333996772766, + "learning_rate": 4.771916871158983e-06, + "loss": 0.9995, + "step": 31510 + }, + { + "epoch": 0.228162754167662, + "grad_norm": 0.1829392910003662, + "learning_rate": 4.7718444844983965e-06, + "loss": 0.9848, + "step": 31520 + }, + { + "epoch": 0.22823514082824817, + "grad_norm": 0.17102506756782532, + "learning_rate": 4.771772097837811e-06, + "loss": 0.9867, + "step": 31530 + }, + { + "epoch": 0.22830752748883434, + "grad_norm": 0.1742861568927765, + "learning_rate": 4.771699711177225e-06, + "loss": 0.9888, + "step": 31540 + }, + { + "epoch": 0.22837991414942055, + "grad_norm": 0.17398032546043396, + "learning_rate": 4.771627324516638e-06, + "loss": 0.9985, + "step": 31550 + }, + { + "epoch": 0.22845230081000673, + "grad_norm": 0.168184295296669, + "learning_rate": 4.771554937856052e-06, + "loss": 0.9877, + "step": 31560 + }, + { + "epoch": 0.22852468747059293, + "grad_norm": 0.1651362031698227, + "learning_rate": 4.771482551195466e-06, + "loss": 0.996, + "step": 31570 + }, + { + "epoch": 0.2285970741311791, + "grad_norm": 0.17777453362941742, + "learning_rate": 4.77141016453488e-06, + "loss": 0.9945, + "step": 31580 + }, + { + "epoch": 0.22866946079176528, + "grad_norm": 0.17504815757274628, + "learning_rate": 4.7713377778742935e-06, + "loss": 1.0095, + "step": 31590 + }, + { + "epoch": 0.2287418474523515, + "grad_norm": 0.16557878255844116, + "learning_rate": 4.771265391213707e-06, + "loss": 0.9934, + "step": 31600 + }, + { + "epoch": 0.22881423411293766, + "grad_norm": 0.1588304191827774, + "learning_rate": 4.771193004553122e-06, + "loss": 0.9674, + "step": 31610 + }, + { + "epoch": 0.22888662077352384, + "grad_norm": 0.17581112682819366, + "learning_rate": 4.771120617892535e-06, + "loss": 0.9903, + "step": 31620 + }, + { + "epoch": 0.22895900743411005, + "grad_norm": 0.15916821360588074, + "learning_rate": 4.771048231231949e-06, + "loss": 0.9846, + "step": 31630 + }, + { + "epoch": 0.22903139409469622, + "grad_norm": 0.16646379232406616, + "learning_rate": 4.7709758445713625e-06, + "loss": 0.9747, + "step": 31640 + }, + { + "epoch": 0.22910378075528243, + "grad_norm": 0.18210920691490173, + "learning_rate": 4.770903457910777e-06, + "loss": 0.9999, + "step": 31650 + }, + { + "epoch": 0.2291761674158686, + "grad_norm": 0.1829901784658432, + "learning_rate": 4.7708310712501905e-06, + "loss": 0.9917, + "step": 31660 + }, + { + "epoch": 0.22924855407645478, + "grad_norm": 0.16289442777633667, + "learning_rate": 4.770758684589604e-06, + "loss": 0.9858, + "step": 31670 + }, + { + "epoch": 0.22932094073704098, + "grad_norm": 0.1644997000694275, + "learning_rate": 4.770686297929018e-06, + "loss": 0.9918, + "step": 31680 + }, + { + "epoch": 0.22939332739762716, + "grad_norm": 0.1583040952682495, + "learning_rate": 4.770613911268432e-06, + "loss": 0.9852, + "step": 31690 + }, + { + "epoch": 0.22946571405821337, + "grad_norm": 0.15572409331798553, + "learning_rate": 4.770541524607846e-06, + "loss": 1.0034, + "step": 31700 + }, + { + "epoch": 0.22953810071879954, + "grad_norm": 0.185507133603096, + "learning_rate": 4.7704691379472595e-06, + "loss": 0.996, + "step": 31710 + }, + { + "epoch": 0.22961048737938572, + "grad_norm": 0.16780561208724976, + "learning_rate": 4.770396751286673e-06, + "loss": 0.9902, + "step": 31720 + }, + { + "epoch": 0.22968287403997192, + "grad_norm": 0.1608486771583557, + "learning_rate": 4.7703243646260875e-06, + "loss": 0.9956, + "step": 31730 + }, + { + "epoch": 0.2297552607005581, + "grad_norm": 0.16754546761512756, + "learning_rate": 4.770251977965501e-06, + "loss": 0.9933, + "step": 31740 + }, + { + "epoch": 0.22982764736114428, + "grad_norm": 0.16332760453224182, + "learning_rate": 4.770179591304915e-06, + "loss": 0.9819, + "step": 31750 + }, + { + "epoch": 0.22990003402173048, + "grad_norm": 0.23022449016571045, + "learning_rate": 4.770107204644328e-06, + "loss": 0.9909, + "step": 31760 + }, + { + "epoch": 0.22997242068231666, + "grad_norm": 0.18331989645957947, + "learning_rate": 4.770034817983743e-06, + "loss": 1.0003, + "step": 31770 + }, + { + "epoch": 0.23004480734290286, + "grad_norm": 0.1962156593799591, + "learning_rate": 4.7699624313231565e-06, + "loss": 0.9884, + "step": 31780 + }, + { + "epoch": 0.23011719400348904, + "grad_norm": 0.16017916798591614, + "learning_rate": 4.76989004466257e-06, + "loss": 0.9935, + "step": 31790 + }, + { + "epoch": 0.23018958066407522, + "grad_norm": 0.16973306238651276, + "learning_rate": 4.769817658001984e-06, + "loss": 1.0018, + "step": 31800 + }, + { + "epoch": 0.23026196732466142, + "grad_norm": 0.1658056229352951, + "learning_rate": 4.769745271341398e-06, + "loss": 0.9564, + "step": 31810 + }, + { + "epoch": 0.2303343539852476, + "grad_norm": 0.1671288162469864, + "learning_rate": 4.769672884680812e-06, + "loss": 0.9972, + "step": 31820 + }, + { + "epoch": 0.23040674064583377, + "grad_norm": 0.17118290066719055, + "learning_rate": 4.769600498020225e-06, + "loss": 0.9972, + "step": 31830 + }, + { + "epoch": 0.23047912730641998, + "grad_norm": 0.1895531266927719, + "learning_rate": 4.769528111359639e-06, + "loss": 0.9919, + "step": 31840 + }, + { + "epoch": 0.23055151396700616, + "grad_norm": 0.1677425503730774, + "learning_rate": 4.769455724699053e-06, + "loss": 0.9709, + "step": 31850 + }, + { + "epoch": 0.23062390062759236, + "grad_norm": 0.15633396804332733, + "learning_rate": 4.769383338038467e-06, + "loss": 0.9762, + "step": 31860 + }, + { + "epoch": 0.23069628728817854, + "grad_norm": 0.15433230996131897, + "learning_rate": 4.769310951377881e-06, + "loss": 0.9902, + "step": 31870 + }, + { + "epoch": 0.2307686739487647, + "grad_norm": 0.17825239896774292, + "learning_rate": 4.769238564717294e-06, + "loss": 0.9933, + "step": 31880 + }, + { + "epoch": 0.23084106060935092, + "grad_norm": 0.1626371145248413, + "learning_rate": 4.769166178056708e-06, + "loss": 0.9893, + "step": 31890 + }, + { + "epoch": 0.2309134472699371, + "grad_norm": 0.17792868614196777, + "learning_rate": 4.769093791396122e-06, + "loss": 0.9825, + "step": 31900 + }, + { + "epoch": 0.23098583393052327, + "grad_norm": 0.1791241466999054, + "learning_rate": 4.769021404735536e-06, + "loss": 0.9916, + "step": 31910 + }, + { + "epoch": 0.23105822059110948, + "grad_norm": 0.1636131852865219, + "learning_rate": 4.76894901807495e-06, + "loss": 1.0103, + "step": 31920 + }, + { + "epoch": 0.23113060725169565, + "grad_norm": 0.1715201586484909, + "learning_rate": 4.768876631414363e-06, + "loss": 0.988, + "step": 31930 + }, + { + "epoch": 0.23120299391228186, + "grad_norm": 0.19205105304718018, + "learning_rate": 4.768804244753777e-06, + "loss": 0.9843, + "step": 31940 + }, + { + "epoch": 0.23127538057286803, + "grad_norm": 0.177320197224617, + "learning_rate": 4.7687318580931905e-06, + "loss": 1.0003, + "step": 31950 + }, + { + "epoch": 0.2313477672334542, + "grad_norm": 0.16277745366096497, + "learning_rate": 4.768659471432605e-06, + "loss": 0.9858, + "step": 31960 + }, + { + "epoch": 0.23142015389404041, + "grad_norm": 0.16934184730052948, + "learning_rate": 4.7685870847720186e-06, + "loss": 1.0059, + "step": 31970 + }, + { + "epoch": 0.2314925405546266, + "grad_norm": 0.17568084597587585, + "learning_rate": 4.768514698111432e-06, + "loss": 0.9886, + "step": 31980 + }, + { + "epoch": 0.23156492721521277, + "grad_norm": 0.16883017122745514, + "learning_rate": 4.768442311450846e-06, + "loss": 1.0016, + "step": 31990 + }, + { + "epoch": 0.23163731387579897, + "grad_norm": 0.170290008187294, + "learning_rate": 4.76836992479026e-06, + "loss": 0.9923, + "step": 32000 + }, + { + "epoch": 0.23170970053638515, + "grad_norm": 0.17787618935108185, + "learning_rate": 4.768297538129674e-06, + "loss": 0.974, + "step": 32010 + }, + { + "epoch": 0.23178208719697135, + "grad_norm": 0.16933457553386688, + "learning_rate": 4.7682251514690875e-06, + "loss": 0.9861, + "step": 32020 + }, + { + "epoch": 0.23185447385755753, + "grad_norm": 0.1682525873184204, + "learning_rate": 4.768152764808501e-06, + "loss": 0.9785, + "step": 32030 + }, + { + "epoch": 0.2319268605181437, + "grad_norm": 0.17858469486236572, + "learning_rate": 4.768080378147915e-06, + "loss": 0.9788, + "step": 32040 + }, + { + "epoch": 0.2319992471787299, + "grad_norm": 0.16218888759613037, + "learning_rate": 4.768007991487329e-06, + "loss": 0.9851, + "step": 32050 + }, + { + "epoch": 0.2320716338393161, + "grad_norm": 0.1646839827299118, + "learning_rate": 4.767935604826743e-06, + "loss": 0.9798, + "step": 32060 + }, + { + "epoch": 0.23214402049990226, + "grad_norm": 0.20934203267097473, + "learning_rate": 4.767863218166156e-06, + "loss": 0.9786, + "step": 32070 + }, + { + "epoch": 0.23221640716048847, + "grad_norm": 0.1663549393415451, + "learning_rate": 4.76779083150557e-06, + "loss": 0.9759, + "step": 32080 + }, + { + "epoch": 0.23228879382107465, + "grad_norm": 0.16773077845573425, + "learning_rate": 4.7677184448449845e-06, + "loss": 0.997, + "step": 32090 + }, + { + "epoch": 0.23236118048166085, + "grad_norm": 0.18589721620082855, + "learning_rate": 4.767646058184398e-06, + "loss": 0.991, + "step": 32100 + }, + { + "epoch": 0.23243356714224703, + "grad_norm": 0.17491789162158966, + "learning_rate": 4.767573671523812e-06, + "loss": 0.9837, + "step": 32110 + }, + { + "epoch": 0.2325059538028332, + "grad_norm": 0.18429701030254364, + "learning_rate": 4.767501284863225e-06, + "loss": 0.9795, + "step": 32120 + }, + { + "epoch": 0.2325783404634194, + "grad_norm": 0.16141755878925323, + "learning_rate": 4.76742889820264e-06, + "loss": 1.0045, + "step": 32130 + }, + { + "epoch": 0.23265072712400559, + "grad_norm": 0.18277022242546082, + "learning_rate": 4.7673565115420534e-06, + "loss": 0.9937, + "step": 32140 + }, + { + "epoch": 0.23272311378459176, + "grad_norm": 0.17076079547405243, + "learning_rate": 4.767284124881467e-06, + "loss": 0.9845, + "step": 32150 + }, + { + "epoch": 0.23279550044517797, + "grad_norm": 0.20235513150691986, + "learning_rate": 4.767211738220881e-06, + "loss": 0.9765, + "step": 32160 + }, + { + "epoch": 0.23286788710576414, + "grad_norm": 0.2140243798494339, + "learning_rate": 4.767139351560295e-06, + "loss": 0.9939, + "step": 32170 + }, + { + "epoch": 0.23294027376635035, + "grad_norm": 0.17782087624073029, + "learning_rate": 4.767066964899709e-06, + "loss": 0.9904, + "step": 32180 + }, + { + "epoch": 0.23301266042693652, + "grad_norm": 0.15998302400112152, + "learning_rate": 4.766994578239122e-06, + "loss": 0.9819, + "step": 32190 + }, + { + "epoch": 0.2330850470875227, + "grad_norm": 0.17034156620502472, + "learning_rate": 4.766922191578536e-06, + "loss": 0.9825, + "step": 32200 + }, + { + "epoch": 0.2331574337481089, + "grad_norm": 0.1846659928560257, + "learning_rate": 4.7668498049179504e-06, + "loss": 0.9788, + "step": 32210 + }, + { + "epoch": 0.23322982040869508, + "grad_norm": 0.1705406755208969, + "learning_rate": 4.766777418257364e-06, + "loss": 0.9898, + "step": 32220 + }, + { + "epoch": 0.2333022070692813, + "grad_norm": 0.16298960149288177, + "learning_rate": 4.766705031596778e-06, + "loss": 1.008, + "step": 32230 + }, + { + "epoch": 0.23337459372986746, + "grad_norm": 0.1563328206539154, + "learning_rate": 4.766632644936191e-06, + "loss": 0.9991, + "step": 32240 + }, + { + "epoch": 0.23344698039045364, + "grad_norm": 0.19101522862911224, + "learning_rate": 4.766560258275606e-06, + "loss": 0.9866, + "step": 32250 + }, + { + "epoch": 0.23351936705103984, + "grad_norm": 0.16439121961593628, + "learning_rate": 4.766487871615019e-06, + "loss": 0.9864, + "step": 32260 + }, + { + "epoch": 0.23359175371162602, + "grad_norm": 0.16966257989406586, + "learning_rate": 4.766415484954433e-06, + "loss": 0.99, + "step": 32270 + }, + { + "epoch": 0.2336641403722122, + "grad_norm": 0.17119558155536652, + "learning_rate": 4.766343098293847e-06, + "loss": 0.985, + "step": 32280 + }, + { + "epoch": 0.2337365270327984, + "grad_norm": 0.16533613204956055, + "learning_rate": 4.766270711633261e-06, + "loss": 0.9725, + "step": 32290 + }, + { + "epoch": 0.23380891369338458, + "grad_norm": 0.21587282419204712, + "learning_rate": 4.766198324972675e-06, + "loss": 0.9851, + "step": 32300 + }, + { + "epoch": 0.23388130035397078, + "grad_norm": 0.1782499998807907, + "learning_rate": 4.766125938312088e-06, + "loss": 0.9908, + "step": 32310 + }, + { + "epoch": 0.23395368701455696, + "grad_norm": 0.17237718403339386, + "learning_rate": 4.766053551651502e-06, + "loss": 0.9953, + "step": 32320 + }, + { + "epoch": 0.23402607367514314, + "grad_norm": 0.16807182133197784, + "learning_rate": 4.765981164990916e-06, + "loss": 0.9937, + "step": 32330 + }, + { + "epoch": 0.23409846033572934, + "grad_norm": 0.17400771379470825, + "learning_rate": 4.76590877833033e-06, + "loss": 0.9917, + "step": 32340 + }, + { + "epoch": 0.23417084699631552, + "grad_norm": 0.16965161263942719, + "learning_rate": 4.765836391669744e-06, + "loss": 0.9828, + "step": 32350 + }, + { + "epoch": 0.2342432336569017, + "grad_norm": 0.17362885177135468, + "learning_rate": 4.765764005009157e-06, + "loss": 0.9778, + "step": 32360 + }, + { + "epoch": 0.2343156203174879, + "grad_norm": 0.16530746221542358, + "learning_rate": 4.765691618348572e-06, + "loss": 1.0049, + "step": 32370 + }, + { + "epoch": 0.23438800697807408, + "grad_norm": 0.19061051309108734, + "learning_rate": 4.765619231687985e-06, + "loss": 1.0037, + "step": 32380 + }, + { + "epoch": 0.23446039363866028, + "grad_norm": 0.19029873609542847, + "learning_rate": 4.765546845027399e-06, + "loss": 0.9754, + "step": 32390 + }, + { + "epoch": 0.23453278029924646, + "grad_norm": 0.17064446210861206, + "learning_rate": 4.7654744583668125e-06, + "loss": 0.996, + "step": 32400 + }, + { + "epoch": 0.23460516695983263, + "grad_norm": 0.17918898165225983, + "learning_rate": 4.765402071706227e-06, + "loss": 1.0014, + "step": 32410 + }, + { + "epoch": 0.23467755362041884, + "grad_norm": 0.16727639734745026, + "learning_rate": 4.765329685045641e-06, + "loss": 0.9911, + "step": 32420 + }, + { + "epoch": 0.23474994028100502, + "grad_norm": 0.16551131010055542, + "learning_rate": 4.765257298385054e-06, + "loss": 1.0024, + "step": 32430 + }, + { + "epoch": 0.2348223269415912, + "grad_norm": 0.18284402787685394, + "learning_rate": 4.765184911724468e-06, + "loss": 0.9876, + "step": 32440 + }, + { + "epoch": 0.2348947136021774, + "grad_norm": 0.1902831643819809, + "learning_rate": 4.765112525063882e-06, + "loss": 0.9884, + "step": 32450 + }, + { + "epoch": 0.23496710026276357, + "grad_norm": 0.19926203787326813, + "learning_rate": 4.765040138403295e-06, + "loss": 0.9889, + "step": 32460 + }, + { + "epoch": 0.23503948692334978, + "grad_norm": 0.1620095819234848, + "learning_rate": 4.764967751742709e-06, + "loss": 0.9882, + "step": 32470 + }, + { + "epoch": 0.23511187358393595, + "grad_norm": 0.19026455283164978, + "learning_rate": 4.764895365082123e-06, + "loss": 0.9652, + "step": 32480 + }, + { + "epoch": 0.23518426024452213, + "grad_norm": 0.18305736780166626, + "learning_rate": 4.764822978421537e-06, + "loss": 0.984, + "step": 32490 + }, + { + "epoch": 0.23525664690510834, + "grad_norm": 0.16484969854354858, + "learning_rate": 4.76475059176095e-06, + "loss": 0.9798, + "step": 32500 + }, + { + "epoch": 0.2353290335656945, + "grad_norm": 0.1810574233531952, + "learning_rate": 4.764678205100364e-06, + "loss": 0.9762, + "step": 32510 + }, + { + "epoch": 0.2354014202262807, + "grad_norm": 0.16701748967170715, + "learning_rate": 4.7646058184397785e-06, + "loss": 0.9907, + "step": 32520 + }, + { + "epoch": 0.2354738068868669, + "grad_norm": 0.17856666445732117, + "learning_rate": 4.764533431779192e-06, + "loss": 0.9772, + "step": 32530 + }, + { + "epoch": 0.23554619354745307, + "grad_norm": 0.19198378920555115, + "learning_rate": 4.764461045118606e-06, + "loss": 0.9729, + "step": 32540 + }, + { + "epoch": 0.23561858020803927, + "grad_norm": 0.18789614737033844, + "learning_rate": 4.764388658458019e-06, + "loss": 0.986, + "step": 32550 + }, + { + "epoch": 0.23569096686862545, + "grad_norm": 0.1816016137599945, + "learning_rate": 4.764316271797434e-06, + "loss": 0.9902, + "step": 32560 + }, + { + "epoch": 0.23576335352921163, + "grad_norm": 0.17151084542274475, + "learning_rate": 4.764243885136847e-06, + "loss": 1.0051, + "step": 32570 + }, + { + "epoch": 0.23583574018979783, + "grad_norm": 0.16930073499679565, + "learning_rate": 4.764171498476261e-06, + "loss": 0.9954, + "step": 32580 + }, + { + "epoch": 0.235908126850384, + "grad_norm": 0.16622294485569, + "learning_rate": 4.764099111815675e-06, + "loss": 0.9795, + "step": 32590 + }, + { + "epoch": 0.23598051351097019, + "grad_norm": 0.1712275892496109, + "learning_rate": 4.764026725155089e-06, + "loss": 0.978, + "step": 32600 + }, + { + "epoch": 0.2360529001715564, + "grad_norm": 0.17688055336475372, + "learning_rate": 4.763954338494503e-06, + "loss": 0.9731, + "step": 32610 + }, + { + "epoch": 0.23612528683214257, + "grad_norm": 0.1737777143716812, + "learning_rate": 4.763881951833916e-06, + "loss": 0.9967, + "step": 32620 + }, + { + "epoch": 0.23619767349272877, + "grad_norm": 0.2481534481048584, + "learning_rate": 4.76380956517333e-06, + "loss": 0.9943, + "step": 32630 + }, + { + "epoch": 0.23627006015331495, + "grad_norm": 0.1767176389694214, + "learning_rate": 4.7637371785127436e-06, + "loss": 0.9884, + "step": 32640 + }, + { + "epoch": 0.23634244681390112, + "grad_norm": 0.17765220999717712, + "learning_rate": 4.763664791852158e-06, + "loss": 0.9768, + "step": 32650 + }, + { + "epoch": 0.23641483347448733, + "grad_norm": 0.17526815831661224, + "learning_rate": 4.763592405191572e-06, + "loss": 0.9778, + "step": 32660 + }, + { + "epoch": 0.2364872201350735, + "grad_norm": 0.22117316722869873, + "learning_rate": 4.763520018530985e-06, + "loss": 0.9754, + "step": 32670 + }, + { + "epoch": 0.23655960679565968, + "grad_norm": 0.17404435575008392, + "learning_rate": 4.763447631870399e-06, + "loss": 0.9928, + "step": 32680 + }, + { + "epoch": 0.2366319934562459, + "grad_norm": 0.17994847893714905, + "learning_rate": 4.763375245209813e-06, + "loss": 0.9899, + "step": 32690 + }, + { + "epoch": 0.23670438011683206, + "grad_norm": 0.2047555297613144, + "learning_rate": 4.763302858549227e-06, + "loss": 0.986, + "step": 32700 + }, + { + "epoch": 0.23677676677741827, + "grad_norm": 0.19384850561618805, + "learning_rate": 4.7632304718886406e-06, + "loss": 0.9829, + "step": 32710 + }, + { + "epoch": 0.23684915343800444, + "grad_norm": 0.18670782446861267, + "learning_rate": 4.763158085228054e-06, + "loss": 0.979, + "step": 32720 + }, + { + "epoch": 0.23692154009859062, + "grad_norm": 0.1731204390525818, + "learning_rate": 4.763085698567469e-06, + "loss": 0.9934, + "step": 32730 + }, + { + "epoch": 0.23699392675917683, + "grad_norm": 0.1660483479499817, + "learning_rate": 4.763013311906882e-06, + "loss": 0.9775, + "step": 32740 + }, + { + "epoch": 0.237066313419763, + "grad_norm": 0.1656530648469925, + "learning_rate": 4.762940925246296e-06, + "loss": 0.9899, + "step": 32750 + }, + { + "epoch": 0.23713870008034918, + "grad_norm": 0.16526754200458527, + "learning_rate": 4.7628685385857095e-06, + "loss": 0.9766, + "step": 32760 + }, + { + "epoch": 0.23721108674093538, + "grad_norm": 0.16604174673557281, + "learning_rate": 4.762796151925124e-06, + "loss": 0.9883, + "step": 32770 + }, + { + "epoch": 0.23728347340152156, + "grad_norm": 0.1578882932662964, + "learning_rate": 4.7627237652645376e-06, + "loss": 0.9888, + "step": 32780 + }, + { + "epoch": 0.23735586006210777, + "grad_norm": 0.1657019555568695, + "learning_rate": 4.762651378603951e-06, + "loss": 0.9808, + "step": 32790 + }, + { + "epoch": 0.23742824672269394, + "grad_norm": 0.2883431017398834, + "learning_rate": 4.762578991943365e-06, + "loss": 0.9934, + "step": 32800 + }, + { + "epoch": 0.23750063338328012, + "grad_norm": 0.17559395730495453, + "learning_rate": 4.762506605282779e-06, + "loss": 0.9899, + "step": 32810 + }, + { + "epoch": 0.23757302004386632, + "grad_norm": 0.16590864956378937, + "learning_rate": 4.762434218622193e-06, + "loss": 0.9745, + "step": 32820 + }, + { + "epoch": 0.2376454067044525, + "grad_norm": 0.17122603952884674, + "learning_rate": 4.7623618319616065e-06, + "loss": 0.9744, + "step": 32830 + }, + { + "epoch": 0.2377177933650387, + "grad_norm": 0.16767603158950806, + "learning_rate": 4.76228944530102e-06, + "loss": 0.9875, + "step": 32840 + }, + { + "epoch": 0.23779018002562488, + "grad_norm": 0.16585096716880798, + "learning_rate": 4.7622170586404346e-06, + "loss": 0.9873, + "step": 32850 + }, + { + "epoch": 0.23786256668621106, + "grad_norm": 0.16618821024894714, + "learning_rate": 4.762144671979848e-06, + "loss": 0.9841, + "step": 32860 + }, + { + "epoch": 0.23793495334679726, + "grad_norm": 0.18130016326904297, + "learning_rate": 4.762072285319262e-06, + "loss": 0.9948, + "step": 32870 + }, + { + "epoch": 0.23800734000738344, + "grad_norm": 0.16200359165668488, + "learning_rate": 4.7619998986586754e-06, + "loss": 0.9913, + "step": 32880 + }, + { + "epoch": 0.23807972666796962, + "grad_norm": 0.16042807698249817, + "learning_rate": 4.76192751199809e-06, + "loss": 0.9835, + "step": 32890 + }, + { + "epoch": 0.23815211332855582, + "grad_norm": 0.1726037710905075, + "learning_rate": 4.7618551253375035e-06, + "loss": 0.9665, + "step": 32900 + }, + { + "epoch": 0.238224499989142, + "grad_norm": 0.16508442163467407, + "learning_rate": 4.761782738676917e-06, + "loss": 1.0037, + "step": 32910 + }, + { + "epoch": 0.2382968866497282, + "grad_norm": 0.16878628730773926, + "learning_rate": 4.761710352016331e-06, + "loss": 0.9897, + "step": 32920 + }, + { + "epoch": 0.23836927331031438, + "grad_norm": 0.17071186006069183, + "learning_rate": 4.761637965355745e-06, + "loss": 1.0066, + "step": 32930 + }, + { + "epoch": 0.23844165997090055, + "grad_norm": 0.16093328595161438, + "learning_rate": 4.761565578695159e-06, + "loss": 0.9938, + "step": 32940 + }, + { + "epoch": 0.23851404663148676, + "grad_norm": 0.181466743350029, + "learning_rate": 4.7614931920345724e-06, + "loss": 0.9863, + "step": 32950 + }, + { + "epoch": 0.23858643329207294, + "grad_norm": 0.19711147248744965, + "learning_rate": 4.761420805373986e-06, + "loss": 0.9867, + "step": 32960 + }, + { + "epoch": 0.2386588199526591, + "grad_norm": 0.17717312276363373, + "learning_rate": 4.7613484187134005e-06, + "loss": 0.9923, + "step": 32970 + }, + { + "epoch": 0.23873120661324532, + "grad_norm": 0.17629402875900269, + "learning_rate": 4.761276032052814e-06, + "loss": 1.0001, + "step": 32980 + }, + { + "epoch": 0.2388035932738315, + "grad_norm": 0.16870364546775818, + "learning_rate": 4.761203645392227e-06, + "loss": 0.9904, + "step": 32990 + }, + { + "epoch": 0.2388759799344177, + "grad_norm": 0.17565900087356567, + "learning_rate": 4.761131258731641e-06, + "loss": 0.9871, + "step": 33000 + }, + { + "epoch": 0.23894836659500387, + "grad_norm": 0.20653510093688965, + "learning_rate": 4.761058872071055e-06, + "loss": 0.9983, + "step": 33010 + }, + { + "epoch": 0.23902075325559005, + "grad_norm": 0.165169820189476, + "learning_rate": 4.760986485410469e-06, + "loss": 0.9812, + "step": 33020 + }, + { + "epoch": 0.23909313991617626, + "grad_norm": 0.29448917508125305, + "learning_rate": 4.760914098749882e-06, + "loss": 0.9681, + "step": 33030 + }, + { + "epoch": 0.23916552657676243, + "grad_norm": 0.1737428605556488, + "learning_rate": 4.760841712089297e-06, + "loss": 0.9895, + "step": 33040 + }, + { + "epoch": 0.2392379132373486, + "grad_norm": 0.15930554270744324, + "learning_rate": 4.76076932542871e-06, + "loss": 0.9923, + "step": 33050 + }, + { + "epoch": 0.23931029989793481, + "grad_norm": 0.15467441082000732, + "learning_rate": 4.760696938768124e-06, + "loss": 0.9791, + "step": 33060 + }, + { + "epoch": 0.239382686558521, + "grad_norm": 0.16547755897045135, + "learning_rate": 4.7606245521075375e-06, + "loss": 0.9836, + "step": 33070 + }, + { + "epoch": 0.2394550732191072, + "grad_norm": 0.1736575812101364, + "learning_rate": 4.760552165446952e-06, + "loss": 0.9818, + "step": 33080 + }, + { + "epoch": 0.23952745987969337, + "grad_norm": 0.17099529504776, + "learning_rate": 4.760479778786366e-06, + "loss": 0.9936, + "step": 33090 + }, + { + "epoch": 0.23959984654027955, + "grad_norm": 0.16323433816432953, + "learning_rate": 4.760407392125779e-06, + "loss": 0.9748, + "step": 33100 + }, + { + "epoch": 0.23967223320086575, + "grad_norm": 0.174141988158226, + "learning_rate": 4.760335005465193e-06, + "loss": 0.9935, + "step": 33110 + }, + { + "epoch": 0.23974461986145193, + "grad_norm": 0.17322736978530884, + "learning_rate": 4.760262618804607e-06, + "loss": 0.9852, + "step": 33120 + }, + { + "epoch": 0.2398170065220381, + "grad_norm": 0.1788867712020874, + "learning_rate": 4.760190232144021e-06, + "loss": 0.9934, + "step": 33130 + }, + { + "epoch": 0.2398893931826243, + "grad_norm": 0.1551644206047058, + "learning_rate": 4.7601178454834345e-06, + "loss": 0.9858, + "step": 33140 + }, + { + "epoch": 0.2399617798432105, + "grad_norm": 0.16504356265068054, + "learning_rate": 4.760045458822848e-06, + "loss": 0.974, + "step": 33150 + }, + { + "epoch": 0.2400341665037967, + "grad_norm": 0.16401821374893188, + "learning_rate": 4.759973072162263e-06, + "loss": 0.9849, + "step": 33160 + }, + { + "epoch": 0.24010655316438287, + "grad_norm": 0.1985578089952469, + "learning_rate": 4.759900685501676e-06, + "loss": 0.9823, + "step": 33170 + }, + { + "epoch": 0.24017893982496905, + "grad_norm": 0.1741316169500351, + "learning_rate": 4.75982829884109e-06, + "loss": 0.9926, + "step": 33180 + }, + { + "epoch": 0.24025132648555525, + "grad_norm": 0.17418281733989716, + "learning_rate": 4.7597559121805035e-06, + "loss": 0.9871, + "step": 33190 + }, + { + "epoch": 0.24032371314614143, + "grad_norm": 0.16395236551761627, + "learning_rate": 4.759683525519918e-06, + "loss": 0.9792, + "step": 33200 + }, + { + "epoch": 0.2403960998067276, + "grad_norm": 0.1797424554824829, + "learning_rate": 4.7596111388593315e-06, + "loss": 0.9794, + "step": 33210 + }, + { + "epoch": 0.2404684864673138, + "grad_norm": 0.15799680352210999, + "learning_rate": 4.759538752198745e-06, + "loss": 0.9801, + "step": 33220 + }, + { + "epoch": 0.24054087312789998, + "grad_norm": 0.1746169626712799, + "learning_rate": 4.759466365538159e-06, + "loss": 0.9923, + "step": 33230 + }, + { + "epoch": 0.2406132597884862, + "grad_norm": 0.16156165301799774, + "learning_rate": 4.759393978877573e-06, + "loss": 0.9762, + "step": 33240 + }, + { + "epoch": 0.24068564644907237, + "grad_norm": 0.1618238091468811, + "learning_rate": 4.759321592216987e-06, + "loss": 0.9839, + "step": 33250 + }, + { + "epoch": 0.24075803310965854, + "grad_norm": 0.17595741152763367, + "learning_rate": 4.7592492055564005e-06, + "loss": 0.9902, + "step": 33260 + }, + { + "epoch": 0.24083041977024475, + "grad_norm": 0.16890239715576172, + "learning_rate": 4.759176818895814e-06, + "loss": 0.9763, + "step": 33270 + }, + { + "epoch": 0.24090280643083092, + "grad_norm": 0.17769628763198853, + "learning_rate": 4.759104432235228e-06, + "loss": 0.9644, + "step": 33280 + }, + { + "epoch": 0.2409751930914171, + "grad_norm": 0.17169831693172455, + "learning_rate": 4.759032045574642e-06, + "loss": 0.9999, + "step": 33290 + }, + { + "epoch": 0.2410475797520033, + "grad_norm": 0.16996458172798157, + "learning_rate": 4.758959658914056e-06, + "loss": 0.9833, + "step": 33300 + }, + { + "epoch": 0.24111996641258948, + "grad_norm": 0.1796058863401413, + "learning_rate": 4.758887272253469e-06, + "loss": 0.9757, + "step": 33310 + }, + { + "epoch": 0.24119235307317569, + "grad_norm": 0.17819707095623016, + "learning_rate": 4.758814885592883e-06, + "loss": 0.9874, + "step": 33320 + }, + { + "epoch": 0.24126473973376186, + "grad_norm": 0.16091227531433105, + "learning_rate": 4.7587424989322975e-06, + "loss": 0.9809, + "step": 33330 + }, + { + "epoch": 0.24133712639434804, + "grad_norm": 0.17495249211788177, + "learning_rate": 4.758670112271711e-06, + "loss": 0.9992, + "step": 33340 + }, + { + "epoch": 0.24140951305493424, + "grad_norm": 0.16773755848407745, + "learning_rate": 4.758597725611125e-06, + "loss": 0.9884, + "step": 33350 + }, + { + "epoch": 0.24148189971552042, + "grad_norm": 0.23348310589790344, + "learning_rate": 4.758525338950538e-06, + "loss": 0.9905, + "step": 33360 + }, + { + "epoch": 0.2415542863761066, + "grad_norm": 0.1686268001794815, + "learning_rate": 4.758452952289953e-06, + "loss": 1.0038, + "step": 33370 + }, + { + "epoch": 0.2416266730366928, + "grad_norm": 0.15924613177776337, + "learning_rate": 4.758380565629366e-06, + "loss": 0.9911, + "step": 33380 + }, + { + "epoch": 0.24169905969727898, + "grad_norm": 0.19010992348194122, + "learning_rate": 4.75830817896878e-06, + "loss": 0.9824, + "step": 33390 + }, + { + "epoch": 0.24177144635786518, + "grad_norm": 0.18180611729621887, + "learning_rate": 4.758235792308194e-06, + "loss": 0.9813, + "step": 33400 + }, + { + "epoch": 0.24184383301845136, + "grad_norm": 0.1697949916124344, + "learning_rate": 4.758163405647608e-06, + "loss": 0.9783, + "step": 33410 + }, + { + "epoch": 0.24191621967903754, + "grad_norm": 0.1699085682630539, + "learning_rate": 4.758091018987022e-06, + "loss": 0.9754, + "step": 33420 + }, + { + "epoch": 0.24198860633962374, + "grad_norm": 0.18635696172714233, + "learning_rate": 4.758018632326435e-06, + "loss": 0.9779, + "step": 33430 + }, + { + "epoch": 0.24206099300020992, + "grad_norm": 0.17529450356960297, + "learning_rate": 4.757946245665849e-06, + "loss": 0.9768, + "step": 33440 + }, + { + "epoch": 0.24213337966079612, + "grad_norm": 0.1667739450931549, + "learning_rate": 4.757873859005263e-06, + "loss": 0.9699, + "step": 33450 + }, + { + "epoch": 0.2422057663213823, + "grad_norm": 0.17115871608257294, + "learning_rate": 4.757801472344677e-06, + "loss": 0.9964, + "step": 33460 + }, + { + "epoch": 0.24227815298196848, + "grad_norm": 0.19105277955532074, + "learning_rate": 4.757729085684091e-06, + "loss": 0.9917, + "step": 33470 + }, + { + "epoch": 0.24235053964255468, + "grad_norm": 0.16427116096019745, + "learning_rate": 4.757656699023504e-06, + "loss": 0.9769, + "step": 33480 + }, + { + "epoch": 0.24242292630314086, + "grad_norm": 0.17592819035053253, + "learning_rate": 4.757584312362919e-06, + "loss": 0.9819, + "step": 33490 + }, + { + "epoch": 0.24249531296372703, + "grad_norm": 0.22349461913108826, + "learning_rate": 4.757511925702332e-06, + "loss": 0.9884, + "step": 33500 + }, + { + "epoch": 0.24256769962431324, + "grad_norm": 0.15861837565898895, + "learning_rate": 4.757439539041746e-06, + "loss": 0.9811, + "step": 33510 + }, + { + "epoch": 0.24264008628489941, + "grad_norm": 0.1849350929260254, + "learning_rate": 4.7573671523811596e-06, + "loss": 0.9861, + "step": 33520 + }, + { + "epoch": 0.24271247294548562, + "grad_norm": 0.18118155002593994, + "learning_rate": 4.757294765720573e-06, + "loss": 0.9915, + "step": 33530 + }, + { + "epoch": 0.2427848596060718, + "grad_norm": 0.1626245677471161, + "learning_rate": 4.757222379059987e-06, + "loss": 1.0021, + "step": 33540 + }, + { + "epoch": 0.24285724626665797, + "grad_norm": 0.1634633094072342, + "learning_rate": 4.7571499923994e-06, + "loss": 0.982, + "step": 33550 + }, + { + "epoch": 0.24292963292724418, + "grad_norm": 0.23890525102615356, + "learning_rate": 4.757077605738815e-06, + "loss": 0.9864, + "step": 33560 + }, + { + "epoch": 0.24300201958783035, + "grad_norm": 0.1567389816045761, + "learning_rate": 4.7570052190782285e-06, + "loss": 0.9626, + "step": 33570 + }, + { + "epoch": 0.24307440624841653, + "grad_norm": 0.16346383094787598, + "learning_rate": 4.756932832417642e-06, + "loss": 0.9792, + "step": 33580 + }, + { + "epoch": 0.24314679290900273, + "grad_norm": 0.1675373762845993, + "learning_rate": 4.756860445757056e-06, + "loss": 0.9689, + "step": 33590 + }, + { + "epoch": 0.2432191795695889, + "grad_norm": 0.1947060525417328, + "learning_rate": 4.75678805909647e-06, + "loss": 0.9719, + "step": 33600 + }, + { + "epoch": 0.24329156623017512, + "grad_norm": 0.17084960639476776, + "learning_rate": 4.756715672435884e-06, + "loss": 0.9709, + "step": 33610 + }, + { + "epoch": 0.2433639528907613, + "grad_norm": 0.16235877573490143, + "learning_rate": 4.756643285775297e-06, + "loss": 0.9808, + "step": 33620 + }, + { + "epoch": 0.24343633955134747, + "grad_norm": 0.16720472276210785, + "learning_rate": 4.756570899114711e-06, + "loss": 0.9817, + "step": 33630 + }, + { + "epoch": 0.24350872621193367, + "grad_norm": 0.16974382102489471, + "learning_rate": 4.7564985124541255e-06, + "loss": 0.9774, + "step": 33640 + }, + { + "epoch": 0.24358111287251985, + "grad_norm": 0.16924746334552765, + "learning_rate": 4.756426125793539e-06, + "loss": 0.9672, + "step": 33650 + }, + { + "epoch": 0.24365349953310603, + "grad_norm": 0.19946692883968353, + "learning_rate": 4.756353739132953e-06, + "loss": 1.0038, + "step": 33660 + }, + { + "epoch": 0.24372588619369223, + "grad_norm": 0.17359711229801178, + "learning_rate": 4.756281352472366e-06, + "loss": 0.9763, + "step": 33670 + }, + { + "epoch": 0.2437982728542784, + "grad_norm": 0.1743346005678177, + "learning_rate": 4.756208965811781e-06, + "loss": 0.9859, + "step": 33680 + }, + { + "epoch": 0.2438706595148646, + "grad_norm": 0.1658143252134323, + "learning_rate": 4.7561365791511944e-06, + "loss": 1.0053, + "step": 33690 + }, + { + "epoch": 0.2439430461754508, + "grad_norm": 0.17681175470352173, + "learning_rate": 4.756064192490608e-06, + "loss": 0.9842, + "step": 33700 + }, + { + "epoch": 0.24401543283603697, + "grad_norm": 0.17499324679374695, + "learning_rate": 4.755991805830022e-06, + "loss": 0.9822, + "step": 33710 + }, + { + "epoch": 0.24408781949662317, + "grad_norm": 0.15076082944869995, + "learning_rate": 4.755919419169436e-06, + "loss": 0.97, + "step": 33720 + }, + { + "epoch": 0.24416020615720935, + "grad_norm": 0.17643196880817413, + "learning_rate": 4.75584703250885e-06, + "loss": 0.9888, + "step": 33730 + }, + { + "epoch": 0.24423259281779552, + "grad_norm": 0.24239002168178558, + "learning_rate": 4.755774645848263e-06, + "loss": 0.9886, + "step": 33740 + }, + { + "epoch": 0.24430497947838173, + "grad_norm": 0.16595035791397095, + "learning_rate": 4.755702259187677e-06, + "loss": 0.991, + "step": 33750 + }, + { + "epoch": 0.2443773661389679, + "grad_norm": 0.1724718064069748, + "learning_rate": 4.7556298725270914e-06, + "loss": 0.9744, + "step": 33760 + }, + { + "epoch": 0.2444497527995541, + "grad_norm": 0.17158448696136475, + "learning_rate": 4.755557485866505e-06, + "loss": 0.9853, + "step": 33770 + }, + { + "epoch": 0.2445221394601403, + "grad_norm": 0.23816993832588196, + "learning_rate": 4.755485099205919e-06, + "loss": 1.0021, + "step": 33780 + }, + { + "epoch": 0.24459452612072646, + "grad_norm": 0.16831299662590027, + "learning_rate": 4.755412712545332e-06, + "loss": 0.9886, + "step": 33790 + }, + { + "epoch": 0.24466691278131267, + "grad_norm": 0.1661224067211151, + "learning_rate": 4.755340325884747e-06, + "loss": 1.002, + "step": 33800 + }, + { + "epoch": 0.24473929944189884, + "grad_norm": 0.16861529648303986, + "learning_rate": 4.75526793922416e-06, + "loss": 0.9858, + "step": 33810 + }, + { + "epoch": 0.24481168610248502, + "grad_norm": 0.17031748592853546, + "learning_rate": 4.755195552563574e-06, + "loss": 0.9806, + "step": 33820 + }, + { + "epoch": 0.24488407276307123, + "grad_norm": 0.17578351497650146, + "learning_rate": 4.755123165902988e-06, + "loss": 0.9785, + "step": 33830 + }, + { + "epoch": 0.2449564594236574, + "grad_norm": 0.18001072108745575, + "learning_rate": 4.755050779242402e-06, + "loss": 0.9939, + "step": 33840 + }, + { + "epoch": 0.2450288460842436, + "grad_norm": 0.18239019811153412, + "learning_rate": 4.754978392581816e-06, + "loss": 0.9748, + "step": 33850 + }, + { + "epoch": 0.24510123274482978, + "grad_norm": 0.17754653096199036, + "learning_rate": 4.754906005921229e-06, + "loss": 0.989, + "step": 33860 + }, + { + "epoch": 0.24517361940541596, + "grad_norm": 0.1725796014070511, + "learning_rate": 4.754833619260643e-06, + "loss": 0.9888, + "step": 33870 + }, + { + "epoch": 0.24524600606600216, + "grad_norm": 0.17696940898895264, + "learning_rate": 4.754761232600057e-06, + "loss": 0.9773, + "step": 33880 + }, + { + "epoch": 0.24531839272658834, + "grad_norm": 0.18559856712818146, + "learning_rate": 4.754688845939471e-06, + "loss": 0.9736, + "step": 33890 + }, + { + "epoch": 0.24539077938717452, + "grad_norm": 0.17658168077468872, + "learning_rate": 4.754616459278885e-06, + "loss": 1.0012, + "step": 33900 + }, + { + "epoch": 0.24546316604776072, + "grad_norm": 0.15067683160305023, + "learning_rate": 4.754544072618298e-06, + "loss": 0.982, + "step": 33910 + }, + { + "epoch": 0.2455355527083469, + "grad_norm": 0.18849797546863556, + "learning_rate": 4.754471685957712e-06, + "loss": 0.9798, + "step": 33920 + }, + { + "epoch": 0.2456079393689331, + "grad_norm": 0.16818571090698242, + "learning_rate": 4.754399299297126e-06, + "loss": 0.9772, + "step": 33930 + }, + { + "epoch": 0.24568032602951928, + "grad_norm": 0.17547225952148438, + "learning_rate": 4.75432691263654e-06, + "loss": 0.9829, + "step": 33940 + }, + { + "epoch": 0.24575271269010546, + "grad_norm": 0.17536889016628265, + "learning_rate": 4.7542545259759535e-06, + "loss": 0.9678, + "step": 33950 + }, + { + "epoch": 0.24582509935069166, + "grad_norm": 0.17033427953720093, + "learning_rate": 4.754182139315367e-06, + "loss": 0.9967, + "step": 33960 + }, + { + "epoch": 0.24589748601127784, + "grad_norm": 0.18375976383686066, + "learning_rate": 4.754109752654782e-06, + "loss": 0.9946, + "step": 33970 + }, + { + "epoch": 0.24596987267186404, + "grad_norm": 0.15287001430988312, + "learning_rate": 4.754037365994195e-06, + "loss": 0.9848, + "step": 33980 + }, + { + "epoch": 0.24604225933245022, + "grad_norm": 0.17237313091754913, + "learning_rate": 4.753964979333609e-06, + "loss": 0.9876, + "step": 33990 + }, + { + "epoch": 0.2461146459930364, + "grad_norm": 0.16331906616687775, + "learning_rate": 4.7538925926730225e-06, + "loss": 0.9934, + "step": 34000 + }, + { + "epoch": 0.2461870326536226, + "grad_norm": 0.17287370562553406, + "learning_rate": 4.753820206012437e-06, + "loss": 0.9888, + "step": 34010 + }, + { + "epoch": 0.24625941931420878, + "grad_norm": 0.16767475008964539, + "learning_rate": 4.7537478193518505e-06, + "loss": 0.9969, + "step": 34020 + }, + { + "epoch": 0.24633180597479495, + "grad_norm": 0.1597083956003189, + "learning_rate": 4.753675432691264e-06, + "loss": 0.9748, + "step": 34030 + }, + { + "epoch": 0.24640419263538116, + "grad_norm": 0.17577607929706573, + "learning_rate": 4.753603046030678e-06, + "loss": 0.9894, + "step": 34040 + }, + { + "epoch": 0.24647657929596734, + "grad_norm": 0.1721176654100418, + "learning_rate": 4.753530659370091e-06, + "loss": 0.9785, + "step": 34050 + }, + { + "epoch": 0.24654896595655354, + "grad_norm": 0.17841075360774994, + "learning_rate": 4.753458272709505e-06, + "loss": 0.9734, + "step": 34060 + }, + { + "epoch": 0.24662135261713972, + "grad_norm": 0.18525052070617676, + "learning_rate": 4.753385886048919e-06, + "loss": 0.9842, + "step": 34070 + }, + { + "epoch": 0.2466937392777259, + "grad_norm": 0.18916094303131104, + "learning_rate": 4.753313499388333e-06, + "loss": 0.9919, + "step": 34080 + }, + { + "epoch": 0.2467661259383121, + "grad_norm": 0.21833638846874237, + "learning_rate": 4.753241112727747e-06, + "loss": 0.9952, + "step": 34090 + }, + { + "epoch": 0.24683851259889827, + "grad_norm": 0.1600702404975891, + "learning_rate": 4.75316872606716e-06, + "loss": 0.9731, + "step": 34100 + }, + { + "epoch": 0.24691089925948445, + "grad_norm": 0.16110806167125702, + "learning_rate": 4.753096339406574e-06, + "loss": 0.9743, + "step": 34110 + }, + { + "epoch": 0.24698328592007066, + "grad_norm": 0.16948141157627106, + "learning_rate": 4.753023952745988e-06, + "loss": 0.9904, + "step": 34120 + }, + { + "epoch": 0.24705567258065683, + "grad_norm": 0.16437900066375732, + "learning_rate": 4.752951566085402e-06, + "loss": 0.9714, + "step": 34130 + }, + { + "epoch": 0.24712805924124304, + "grad_norm": 0.1707761436700821, + "learning_rate": 4.752879179424816e-06, + "loss": 0.9948, + "step": 34140 + }, + { + "epoch": 0.2472004459018292, + "grad_norm": 0.17766614258289337, + "learning_rate": 4.752806792764229e-06, + "loss": 0.9811, + "step": 34150 + }, + { + "epoch": 0.2472728325624154, + "grad_norm": 0.16982759535312653, + "learning_rate": 4.752734406103644e-06, + "loss": 0.988, + "step": 34160 + }, + { + "epoch": 0.2473452192230016, + "grad_norm": 0.1820429265499115, + "learning_rate": 4.752662019443057e-06, + "loss": 0.9801, + "step": 34170 + }, + { + "epoch": 0.24741760588358777, + "grad_norm": 0.17136429250240326, + "learning_rate": 4.752589632782471e-06, + "loss": 0.975, + "step": 34180 + }, + { + "epoch": 0.24748999254417395, + "grad_norm": 0.17014792561531067, + "learning_rate": 4.7525172461218846e-06, + "loss": 0.985, + "step": 34190 + }, + { + "epoch": 0.24756237920476015, + "grad_norm": 0.17695526778697968, + "learning_rate": 4.752444859461299e-06, + "loss": 0.9866, + "step": 34200 + }, + { + "epoch": 0.24763476586534633, + "grad_norm": 0.18461161851882935, + "learning_rate": 4.752372472800713e-06, + "loss": 0.9922, + "step": 34210 + }, + { + "epoch": 0.24770715252593253, + "grad_norm": 0.23707006871700287, + "learning_rate": 4.752300086140126e-06, + "loss": 0.9964, + "step": 34220 + }, + { + "epoch": 0.2477795391865187, + "grad_norm": 0.16921405494213104, + "learning_rate": 4.75222769947954e-06, + "loss": 0.9831, + "step": 34230 + }, + { + "epoch": 0.2478519258471049, + "grad_norm": 0.18006683886051178, + "learning_rate": 4.752155312818954e-06, + "loss": 0.9915, + "step": 34240 + }, + { + "epoch": 0.2479243125076911, + "grad_norm": 0.1586627960205078, + "learning_rate": 4.752082926158368e-06, + "loss": 0.9765, + "step": 34250 + }, + { + "epoch": 0.24799669916827727, + "grad_norm": 0.17502956092357635, + "learning_rate": 4.7520105394977816e-06, + "loss": 0.9833, + "step": 34260 + }, + { + "epoch": 0.24806908582886344, + "grad_norm": 0.17137262225151062, + "learning_rate": 4.751938152837195e-06, + "loss": 0.9813, + "step": 34270 + }, + { + "epoch": 0.24814147248944965, + "grad_norm": 0.1806812733411789, + "learning_rate": 4.75186576617661e-06, + "loss": 0.9949, + "step": 34280 + }, + { + "epoch": 0.24821385915003583, + "grad_norm": 0.17505182325839996, + "learning_rate": 4.751793379516023e-06, + "loss": 0.9949, + "step": 34290 + }, + { + "epoch": 0.24828624581062203, + "grad_norm": 0.17659908533096313, + "learning_rate": 4.751720992855437e-06, + "loss": 0.9938, + "step": 34300 + }, + { + "epoch": 0.2483586324712082, + "grad_norm": 0.18105976283550262, + "learning_rate": 4.7516486061948505e-06, + "loss": 0.9778, + "step": 34310 + }, + { + "epoch": 0.24843101913179438, + "grad_norm": 0.18048040568828583, + "learning_rate": 4.751576219534265e-06, + "loss": 0.9918, + "step": 34320 + }, + { + "epoch": 0.2485034057923806, + "grad_norm": 0.17734596133232117, + "learning_rate": 4.7515038328736786e-06, + "loss": 0.9889, + "step": 34330 + }, + { + "epoch": 0.24857579245296677, + "grad_norm": 0.16741421818733215, + "learning_rate": 4.751431446213092e-06, + "loss": 0.9854, + "step": 34340 + }, + { + "epoch": 0.24864817911355294, + "grad_norm": 0.17737902700901031, + "learning_rate": 4.751359059552506e-06, + "loss": 0.9793, + "step": 34350 + }, + { + "epoch": 0.24872056577413915, + "grad_norm": 0.1893605887889862, + "learning_rate": 4.75128667289192e-06, + "loss": 0.9778, + "step": 34360 + }, + { + "epoch": 0.24879295243472532, + "grad_norm": 0.16508178412914276, + "learning_rate": 4.751214286231334e-06, + "loss": 0.9873, + "step": 34370 + }, + { + "epoch": 0.24886533909531153, + "grad_norm": 0.1740712970495224, + "learning_rate": 4.7511418995707475e-06, + "loss": 0.9862, + "step": 34380 + }, + { + "epoch": 0.2489377257558977, + "grad_norm": 0.1603364646434784, + "learning_rate": 4.751069512910161e-06, + "loss": 0.978, + "step": 34390 + }, + { + "epoch": 0.24901011241648388, + "grad_norm": 0.18249155580997467, + "learning_rate": 4.750997126249576e-06, + "loss": 0.9909, + "step": 34400 + }, + { + "epoch": 0.24908249907707009, + "grad_norm": 0.1699957698583603, + "learning_rate": 4.750924739588989e-06, + "loss": 0.9915, + "step": 34410 + }, + { + "epoch": 0.24915488573765626, + "grad_norm": 0.17712600529193878, + "learning_rate": 4.750852352928403e-06, + "loss": 0.976, + "step": 34420 + }, + { + "epoch": 0.24922727239824244, + "grad_norm": 0.17744386196136475, + "learning_rate": 4.7507799662678164e-06, + "loss": 0.9728, + "step": 34430 + }, + { + "epoch": 0.24929965905882864, + "grad_norm": 0.15788348019123077, + "learning_rate": 4.750707579607231e-06, + "loss": 0.9756, + "step": 34440 + }, + { + "epoch": 0.24937204571941482, + "grad_norm": 0.1607343703508377, + "learning_rate": 4.7506351929466445e-06, + "loss": 0.9801, + "step": 34450 + }, + { + "epoch": 0.24944443238000102, + "grad_norm": 0.18192967772483826, + "learning_rate": 4.750562806286058e-06, + "loss": 0.992, + "step": 34460 + }, + { + "epoch": 0.2495168190405872, + "grad_norm": 0.17542362213134766, + "learning_rate": 4.750490419625472e-06, + "loss": 0.9864, + "step": 34470 + }, + { + "epoch": 0.24958920570117338, + "grad_norm": 0.17715929448604584, + "learning_rate": 4.750418032964886e-06, + "loss": 0.9606, + "step": 34480 + }, + { + "epoch": 0.24966159236175958, + "grad_norm": 0.16454724967479706, + "learning_rate": 4.7503456463043e-06, + "loss": 0.9867, + "step": 34490 + }, + { + "epoch": 0.24973397902234576, + "grad_norm": 0.1616222858428955, + "learning_rate": 4.7502732596437134e-06, + "loss": 0.9823, + "step": 34500 + }, + { + "epoch": 0.24980636568293194, + "grad_norm": 0.21941696107387543, + "learning_rate": 4.750200872983127e-06, + "loss": 0.9895, + "step": 34510 + }, + { + "epoch": 0.24987875234351814, + "grad_norm": 0.20772811770439148, + "learning_rate": 4.750128486322541e-06, + "loss": 0.9953, + "step": 34520 + }, + { + "epoch": 0.24995113900410432, + "grad_norm": 0.172512024641037, + "learning_rate": 4.750056099661955e-06, + "loss": 0.9808, + "step": 34530 + }, + { + "epoch": 0.2500235256646905, + "grad_norm": 0.1570970118045807, + "learning_rate": 4.749983713001369e-06, + "loss": 0.9893, + "step": 34540 + }, + { + "epoch": 0.25009591232527667, + "grad_norm": 0.1649353802204132, + "learning_rate": 4.749911326340782e-06, + "loss": 0.9848, + "step": 34550 + }, + { + "epoch": 0.2501682989858629, + "grad_norm": 0.18175838887691498, + "learning_rate": 4.749838939680196e-06, + "loss": 0.9766, + "step": 34560 + }, + { + "epoch": 0.2502406856464491, + "grad_norm": 0.16872644424438477, + "learning_rate": 4.7497665530196104e-06, + "loss": 0.9747, + "step": 34570 + }, + { + "epoch": 0.25031307230703526, + "grad_norm": 0.16489218175411224, + "learning_rate": 4.749694166359023e-06, + "loss": 0.9861, + "step": 34580 + }, + { + "epoch": 0.25038545896762143, + "grad_norm": 0.15996752679347992, + "learning_rate": 4.749621779698438e-06, + "loss": 0.9866, + "step": 34590 + }, + { + "epoch": 0.2504578456282076, + "grad_norm": 0.1725120097398758, + "learning_rate": 4.749549393037851e-06, + "loss": 0.9784, + "step": 34600 + }, + { + "epoch": 0.25053023228879384, + "grad_norm": 0.167087122797966, + "learning_rate": 4.749477006377265e-06, + "loss": 0.9864, + "step": 34610 + }, + { + "epoch": 0.25060261894938, + "grad_norm": 0.1733073741197586, + "learning_rate": 4.7494046197166785e-06, + "loss": 0.964, + "step": 34620 + }, + { + "epoch": 0.2506750056099662, + "grad_norm": 0.1666453331708908, + "learning_rate": 4.749332233056093e-06, + "loss": 0.9815, + "step": 34630 + }, + { + "epoch": 0.25074739227055237, + "grad_norm": 0.2046760767698288, + "learning_rate": 4.749259846395507e-06, + "loss": 0.974, + "step": 34640 + }, + { + "epoch": 0.25081977893113855, + "grad_norm": 0.1714048534631729, + "learning_rate": 4.74918745973492e-06, + "loss": 0.9842, + "step": 34650 + }, + { + "epoch": 0.2508921655917248, + "grad_norm": 0.17349305748939514, + "learning_rate": 4.749115073074334e-06, + "loss": 0.9822, + "step": 34660 + }, + { + "epoch": 0.25096455225231096, + "grad_norm": 0.1701555848121643, + "learning_rate": 4.749042686413748e-06, + "loss": 0.9751, + "step": 34670 + }, + { + "epoch": 0.25103693891289713, + "grad_norm": 0.1766299158334732, + "learning_rate": 4.748970299753162e-06, + "loss": 0.9783, + "step": 34680 + }, + { + "epoch": 0.2511093255734833, + "grad_norm": 0.16215893626213074, + "learning_rate": 4.7488979130925755e-06, + "loss": 0.9798, + "step": 34690 + }, + { + "epoch": 0.2511817122340695, + "grad_norm": 0.16153815388679504, + "learning_rate": 4.748825526431989e-06, + "loss": 0.9638, + "step": 34700 + }, + { + "epoch": 0.2512540988946557, + "grad_norm": 0.17262734472751617, + "learning_rate": 4.748753139771403e-06, + "loss": 0.9585, + "step": 34710 + }, + { + "epoch": 0.2513264855552419, + "grad_norm": 0.1748388707637787, + "learning_rate": 4.748680753110817e-06, + "loss": 0.9755, + "step": 34720 + }, + { + "epoch": 0.2513988722158281, + "grad_norm": 0.16861870884895325, + "learning_rate": 4.748608366450231e-06, + "loss": 0.9901, + "step": 34730 + }, + { + "epoch": 0.25147125887641425, + "grad_norm": 0.1585874855518341, + "learning_rate": 4.7485359797896445e-06, + "loss": 0.9944, + "step": 34740 + }, + { + "epoch": 0.2515436455370004, + "grad_norm": 0.16749711334705353, + "learning_rate": 4.748463593129058e-06, + "loss": 0.967, + "step": 34750 + }, + { + "epoch": 0.2516160321975866, + "grad_norm": 0.19604967534542084, + "learning_rate": 4.7483912064684725e-06, + "loss": 0.991, + "step": 34760 + }, + { + "epoch": 0.25168841885817284, + "grad_norm": 0.16052144765853882, + "learning_rate": 4.748318819807886e-06, + "loss": 0.9695, + "step": 34770 + }, + { + "epoch": 0.251760805518759, + "grad_norm": 0.18518640100955963, + "learning_rate": 4.7482464331473e-06, + "loss": 0.9893, + "step": 34780 + }, + { + "epoch": 0.2518331921793452, + "grad_norm": 0.18303367495536804, + "learning_rate": 4.748174046486713e-06, + "loss": 0.9683, + "step": 34790 + }, + { + "epoch": 0.25190557883993137, + "grad_norm": 0.178907573223114, + "learning_rate": 4.748101659826128e-06, + "loss": 0.9822, + "step": 34800 + }, + { + "epoch": 0.25197796550051754, + "grad_norm": 0.1701265573501587, + "learning_rate": 4.7480292731655415e-06, + "loss": 0.9788, + "step": 34810 + }, + { + "epoch": 0.2520503521611038, + "grad_norm": 0.15824520587921143, + "learning_rate": 4.747956886504955e-06, + "loss": 0.9705, + "step": 34820 + }, + { + "epoch": 0.25212273882168995, + "grad_norm": 0.16767218708992004, + "learning_rate": 4.747884499844369e-06, + "loss": 0.9993, + "step": 34830 + }, + { + "epoch": 0.25219512548227613, + "grad_norm": 0.16409531235694885, + "learning_rate": 4.747812113183783e-06, + "loss": 0.986, + "step": 34840 + }, + { + "epoch": 0.2522675121428623, + "grad_norm": 0.17260675132274628, + "learning_rate": 4.747739726523197e-06, + "loss": 0.9677, + "step": 34850 + }, + { + "epoch": 0.2523398988034485, + "grad_norm": 0.15789243578910828, + "learning_rate": 4.74766733986261e-06, + "loss": 0.9795, + "step": 34860 + }, + { + "epoch": 0.2524122854640347, + "grad_norm": 0.9138031005859375, + "learning_rate": 4.747594953202024e-06, + "loss": 0.9824, + "step": 34870 + }, + { + "epoch": 0.2524846721246209, + "grad_norm": 0.1509862244129181, + "learning_rate": 4.7475225665414385e-06, + "loss": 0.9766, + "step": 34880 + }, + { + "epoch": 0.25255705878520707, + "grad_norm": 0.15754196047782898, + "learning_rate": 4.747450179880852e-06, + "loss": 0.9781, + "step": 34890 + }, + { + "epoch": 0.25262944544579324, + "grad_norm": 0.1629686951637268, + "learning_rate": 4.747377793220266e-06, + "loss": 0.9731, + "step": 34900 + }, + { + "epoch": 0.2527018321063794, + "grad_norm": 0.17362354695796967, + "learning_rate": 4.747305406559679e-06, + "loss": 0.9777, + "step": 34910 + }, + { + "epoch": 0.2527742187669656, + "grad_norm": 0.1758502572774887, + "learning_rate": 4.747233019899094e-06, + "loss": 0.9851, + "step": 34920 + }, + { + "epoch": 0.25284660542755183, + "grad_norm": 0.16180376708507538, + "learning_rate": 4.747160633238507e-06, + "loss": 0.9728, + "step": 34930 + }, + { + "epoch": 0.252918992088138, + "grad_norm": 0.18018090724945068, + "learning_rate": 4.747088246577921e-06, + "loss": 0.9682, + "step": 34940 + }, + { + "epoch": 0.2529913787487242, + "grad_norm": 0.1669149100780487, + "learning_rate": 4.747015859917335e-06, + "loss": 0.9744, + "step": 34950 + }, + { + "epoch": 0.25306376540931036, + "grad_norm": 0.16746865212917328, + "learning_rate": 4.746943473256749e-06, + "loss": 0.986, + "step": 34960 + }, + { + "epoch": 0.25313615206989654, + "grad_norm": 0.1823040395975113, + "learning_rate": 4.746871086596163e-06, + "loss": 0.9739, + "step": 34970 + }, + { + "epoch": 0.25320853873048277, + "grad_norm": 0.17070458829402924, + "learning_rate": 4.746798699935576e-06, + "loss": 0.9951, + "step": 34980 + }, + { + "epoch": 0.25328092539106895, + "grad_norm": 0.16862110793590546, + "learning_rate": 4.74672631327499e-06, + "loss": 0.9872, + "step": 34990 + }, + { + "epoch": 0.2533533120516551, + "grad_norm": 0.17179900407791138, + "learning_rate": 4.746653926614404e-06, + "loss": 0.9854, + "step": 35000 + }, + { + "epoch": 0.2534256987122413, + "grad_norm": 0.17400874197483063, + "learning_rate": 4.746581539953818e-06, + "loss": 0.9763, + "step": 35010 + }, + { + "epoch": 0.2534980853728275, + "grad_norm": 0.20990721881389618, + "learning_rate": 4.746509153293232e-06, + "loss": 0.9752, + "step": 35020 + }, + { + "epoch": 0.2535704720334137, + "grad_norm": 0.17433054745197296, + "learning_rate": 4.746436766632645e-06, + "loss": 0.9773, + "step": 35030 + }, + { + "epoch": 0.2536428586939999, + "grad_norm": 0.17180733382701874, + "learning_rate": 4.74636437997206e-06, + "loss": 0.9757, + "step": 35040 + }, + { + "epoch": 0.25371524535458606, + "grad_norm": 0.19720107316970825, + "learning_rate": 4.746291993311473e-06, + "loss": 0.9789, + "step": 35050 + }, + { + "epoch": 0.25378763201517224, + "grad_norm": 0.18892458081245422, + "learning_rate": 4.746219606650887e-06, + "loss": 0.9868, + "step": 35060 + }, + { + "epoch": 0.2538600186757584, + "grad_norm": 0.18329143524169922, + "learning_rate": 4.7461472199903006e-06, + "loss": 0.9795, + "step": 35070 + }, + { + "epoch": 0.2539324053363446, + "grad_norm": 0.17473115026950836, + "learning_rate": 4.746074833329715e-06, + "loss": 0.9798, + "step": 35080 + }, + { + "epoch": 0.2540047919969308, + "grad_norm": 0.17442865669727325, + "learning_rate": 4.746002446669129e-06, + "loss": 0.9803, + "step": 35090 + }, + { + "epoch": 0.254077178657517, + "grad_norm": 0.17204399406909943, + "learning_rate": 4.745930060008542e-06, + "loss": 0.9771, + "step": 35100 + }, + { + "epoch": 0.2541495653181032, + "grad_norm": 0.1870034784078598, + "learning_rate": 4.745857673347956e-06, + "loss": 0.9718, + "step": 35110 + }, + { + "epoch": 0.25422195197868935, + "grad_norm": 0.1639021635055542, + "learning_rate": 4.7457852866873695e-06, + "loss": 0.9805, + "step": 35120 + }, + { + "epoch": 0.25429433863927553, + "grad_norm": 0.16658686101436615, + "learning_rate": 4.745712900026783e-06, + "loss": 0.982, + "step": 35130 + }, + { + "epoch": 0.25436672529986176, + "grad_norm": 0.164857879281044, + "learning_rate": 4.745640513366197e-06, + "loss": 0.9885, + "step": 35140 + }, + { + "epoch": 0.25443911196044794, + "grad_norm": 0.18031135201454163, + "learning_rate": 4.745568126705611e-06, + "loss": 0.9763, + "step": 35150 + }, + { + "epoch": 0.2545114986210341, + "grad_norm": 0.1674937754869461, + "learning_rate": 4.745495740045025e-06, + "loss": 0.9891, + "step": 35160 + }, + { + "epoch": 0.2545838852816203, + "grad_norm": 0.16859376430511475, + "learning_rate": 4.7454233533844384e-06, + "loss": 0.9738, + "step": 35170 + }, + { + "epoch": 0.25465627194220647, + "grad_norm": 0.1623847633600235, + "learning_rate": 4.745350966723852e-06, + "loss": 0.9772, + "step": 35180 + }, + { + "epoch": 0.2547286586027927, + "grad_norm": 0.25162413716316223, + "learning_rate": 4.7452785800632665e-06, + "loss": 0.9767, + "step": 35190 + }, + { + "epoch": 0.2548010452633789, + "grad_norm": 0.16929900646209717, + "learning_rate": 4.74520619340268e-06, + "loss": 0.9745, + "step": 35200 + }, + { + "epoch": 0.25487343192396505, + "grad_norm": 0.18202784657478333, + "learning_rate": 4.745133806742094e-06, + "loss": 0.9777, + "step": 35210 + }, + { + "epoch": 0.25494581858455123, + "grad_norm": 0.16864082217216492, + "learning_rate": 4.745061420081507e-06, + "loss": 0.9695, + "step": 35220 + }, + { + "epoch": 0.2550182052451374, + "grad_norm": 0.18244844675064087, + "learning_rate": 4.744989033420922e-06, + "loss": 0.9793, + "step": 35230 + }, + { + "epoch": 0.25509059190572364, + "grad_norm": 0.15838764607906342, + "learning_rate": 4.7449166467603354e-06, + "loss": 0.9749, + "step": 35240 + }, + { + "epoch": 0.2551629785663098, + "grad_norm": 0.1680920273065567, + "learning_rate": 4.744844260099749e-06, + "loss": 0.9788, + "step": 35250 + }, + { + "epoch": 0.255235365226896, + "grad_norm": 0.1684524267911911, + "learning_rate": 4.744771873439163e-06, + "loss": 0.9859, + "step": 35260 + }, + { + "epoch": 0.25530775188748217, + "grad_norm": 0.16353952884674072, + "learning_rate": 4.744699486778577e-06, + "loss": 0.9842, + "step": 35270 + }, + { + "epoch": 0.25538013854806835, + "grad_norm": 0.17483973503112793, + "learning_rate": 4.744627100117991e-06, + "loss": 0.9818, + "step": 35280 + }, + { + "epoch": 0.2554525252086545, + "grad_norm": 0.16171082854270935, + "learning_rate": 4.744554713457404e-06, + "loss": 0.9814, + "step": 35290 + }, + { + "epoch": 0.25552491186924076, + "grad_norm": 0.16629914939403534, + "learning_rate": 4.744482326796818e-06, + "loss": 0.9932, + "step": 35300 + }, + { + "epoch": 0.25559729852982693, + "grad_norm": 0.1688779890537262, + "learning_rate": 4.744409940136232e-06, + "loss": 0.9748, + "step": 35310 + }, + { + "epoch": 0.2556696851904131, + "grad_norm": 0.18332324922084808, + "learning_rate": 4.744337553475646e-06, + "loss": 0.983, + "step": 35320 + }, + { + "epoch": 0.2557420718509993, + "grad_norm": 3.5396459102630615, + "learning_rate": 4.74426516681506e-06, + "loss": 0.9881, + "step": 35330 + }, + { + "epoch": 0.25581445851158546, + "grad_norm": 0.16247138381004333, + "learning_rate": 4.744192780154473e-06, + "loss": 0.9893, + "step": 35340 + }, + { + "epoch": 0.2558868451721717, + "grad_norm": 0.17081671953201294, + "learning_rate": 4.744120393493887e-06, + "loss": 0.9899, + "step": 35350 + }, + { + "epoch": 0.25595923183275787, + "grad_norm": 0.1555139124393463, + "learning_rate": 4.744048006833301e-06, + "loss": 0.9697, + "step": 35360 + }, + { + "epoch": 0.25603161849334405, + "grad_norm": 0.19195972383022308, + "learning_rate": 4.743975620172715e-06, + "loss": 0.9819, + "step": 35370 + }, + { + "epoch": 0.2561040051539302, + "grad_norm": 0.2612147927284241, + "learning_rate": 4.743903233512129e-06, + "loss": 0.9856, + "step": 35380 + }, + { + "epoch": 0.2561763918145164, + "grad_norm": 0.17265565693378448, + "learning_rate": 4.743830846851542e-06, + "loss": 0.973, + "step": 35390 + }, + { + "epoch": 0.25624877847510263, + "grad_norm": 0.17444762587547302, + "learning_rate": 4.743758460190957e-06, + "loss": 0.9811, + "step": 35400 + }, + { + "epoch": 0.2563211651356888, + "grad_norm": 0.17848478257656097, + "learning_rate": 4.74368607353037e-06, + "loss": 0.9701, + "step": 35410 + }, + { + "epoch": 0.256393551796275, + "grad_norm": 0.16675600409507751, + "learning_rate": 4.743613686869784e-06, + "loss": 0.9836, + "step": 35420 + }, + { + "epoch": 0.25646593845686116, + "grad_norm": 0.16786222159862518, + "learning_rate": 4.7435413002091975e-06, + "loss": 0.9816, + "step": 35430 + }, + { + "epoch": 0.25653832511744734, + "grad_norm": 0.18785636126995087, + "learning_rate": 4.743468913548612e-06, + "loss": 0.9693, + "step": 35440 + }, + { + "epoch": 0.2566107117780335, + "grad_norm": 0.16622446477413177, + "learning_rate": 4.743396526888026e-06, + "loss": 0.9818, + "step": 35450 + }, + { + "epoch": 0.25668309843861975, + "grad_norm": 0.1868736445903778, + "learning_rate": 4.743324140227439e-06, + "loss": 0.9806, + "step": 35460 + }, + { + "epoch": 0.2567554850992059, + "grad_norm": 0.17753900587558746, + "learning_rate": 4.743251753566853e-06, + "loss": 0.9739, + "step": 35470 + }, + { + "epoch": 0.2568278717597921, + "grad_norm": 0.18363986909389496, + "learning_rate": 4.743179366906267e-06, + "loss": 0.9767, + "step": 35480 + }, + { + "epoch": 0.2569002584203783, + "grad_norm": 0.1745624840259552, + "learning_rate": 4.743106980245681e-06, + "loss": 0.9889, + "step": 35490 + }, + { + "epoch": 0.25697264508096446, + "grad_norm": 0.16066277027130127, + "learning_rate": 4.7430345935850945e-06, + "loss": 0.9692, + "step": 35500 + }, + { + "epoch": 0.2570450317415507, + "grad_norm": 0.16065356135368347, + "learning_rate": 4.742962206924508e-06, + "loss": 0.9864, + "step": 35510 + }, + { + "epoch": 0.25711741840213687, + "grad_norm": 0.1665170043706894, + "learning_rate": 4.742889820263923e-06, + "loss": 0.979, + "step": 35520 + }, + { + "epoch": 0.25718980506272304, + "grad_norm": 0.15713395178318024, + "learning_rate": 4.742817433603336e-06, + "loss": 0.9603, + "step": 35530 + }, + { + "epoch": 0.2572621917233092, + "grad_norm": 0.18510933220386505, + "learning_rate": 4.74274504694275e-06, + "loss": 0.9726, + "step": 35540 + }, + { + "epoch": 0.2573345783838954, + "grad_norm": 0.16515351831912994, + "learning_rate": 4.7426726602821635e-06, + "loss": 0.9782, + "step": 35550 + }, + { + "epoch": 0.25740696504448163, + "grad_norm": 0.17086593806743622, + "learning_rate": 4.742600273621578e-06, + "loss": 0.9634, + "step": 35560 + }, + { + "epoch": 0.2574793517050678, + "grad_norm": 0.17308638989925385, + "learning_rate": 4.7425278869609915e-06, + "loss": 0.978, + "step": 35570 + }, + { + "epoch": 0.257551738365654, + "grad_norm": 0.18150749802589417, + "learning_rate": 4.742455500300405e-06, + "loss": 0.976, + "step": 35580 + }, + { + "epoch": 0.25762412502624016, + "grad_norm": 0.1714148074388504, + "learning_rate": 4.742383113639819e-06, + "loss": 0.9786, + "step": 35590 + }, + { + "epoch": 0.25769651168682634, + "grad_norm": 0.16008371114730835, + "learning_rate": 4.742310726979233e-06, + "loss": 0.9789, + "step": 35600 + }, + { + "epoch": 0.2577688983474125, + "grad_norm": 0.17920182645320892, + "learning_rate": 4.742238340318647e-06, + "loss": 0.9875, + "step": 35610 + }, + { + "epoch": 0.25784128500799874, + "grad_norm": 0.1833036094903946, + "learning_rate": 4.7421659536580605e-06, + "loss": 1.0031, + "step": 35620 + }, + { + "epoch": 0.2579136716685849, + "grad_norm": 0.17967666685581207, + "learning_rate": 4.742093566997474e-06, + "loss": 0.9975, + "step": 35630 + }, + { + "epoch": 0.2579860583291711, + "grad_norm": 0.29100435972213745, + "learning_rate": 4.742021180336888e-06, + "loss": 0.974, + "step": 35640 + }, + { + "epoch": 0.2580584449897573, + "grad_norm": 0.1635904461145401, + "learning_rate": 4.741948793676301e-06, + "loss": 0.9854, + "step": 35650 + }, + { + "epoch": 0.25813083165034345, + "grad_norm": 0.20104598999023438, + "learning_rate": 4.741876407015715e-06, + "loss": 0.9847, + "step": 35660 + }, + { + "epoch": 0.2582032183109297, + "grad_norm": 0.1948116570711136, + "learning_rate": 4.741804020355129e-06, + "loss": 0.9766, + "step": 35670 + }, + { + "epoch": 0.25827560497151586, + "grad_norm": 0.18129467964172363, + "learning_rate": 4.741731633694543e-06, + "loss": 0.9788, + "step": 35680 + }, + { + "epoch": 0.25834799163210204, + "grad_norm": 0.19102171063423157, + "learning_rate": 4.741659247033957e-06, + "loss": 0.9826, + "step": 35690 + }, + { + "epoch": 0.2584203782926882, + "grad_norm": 0.15986351668834686, + "learning_rate": 4.74158686037337e-06, + "loss": 0.9818, + "step": 35700 + }, + { + "epoch": 0.2584927649532744, + "grad_norm": 0.16768378019332886, + "learning_rate": 4.741514473712785e-06, + "loss": 0.9833, + "step": 35710 + }, + { + "epoch": 0.2585651516138606, + "grad_norm": 0.16951607167720795, + "learning_rate": 4.741442087052198e-06, + "loss": 0.9796, + "step": 35720 + }, + { + "epoch": 0.2586375382744468, + "grad_norm": 0.18605226278305054, + "learning_rate": 4.741369700391612e-06, + "loss": 0.9878, + "step": 35730 + }, + { + "epoch": 0.258709924935033, + "grad_norm": 0.15698987245559692, + "learning_rate": 4.7412973137310256e-06, + "loss": 0.9664, + "step": 35740 + }, + { + "epoch": 0.25878231159561915, + "grad_norm": 0.17848019301891327, + "learning_rate": 4.74122492707044e-06, + "loss": 0.968, + "step": 35750 + }, + { + "epoch": 0.25885469825620533, + "grad_norm": 0.18096424639225006, + "learning_rate": 4.741152540409854e-06, + "loss": 0.9781, + "step": 35760 + }, + { + "epoch": 0.25892708491679156, + "grad_norm": 0.16998852789402008, + "learning_rate": 4.741080153749267e-06, + "loss": 0.9852, + "step": 35770 + }, + { + "epoch": 0.25899947157737774, + "grad_norm": 0.16071717441082, + "learning_rate": 4.741007767088681e-06, + "loss": 0.9852, + "step": 35780 + }, + { + "epoch": 0.2590718582379639, + "grad_norm": 0.16710597276687622, + "learning_rate": 4.740935380428095e-06, + "loss": 0.9728, + "step": 35790 + }, + { + "epoch": 0.2591442448985501, + "grad_norm": 0.15692850947380066, + "learning_rate": 4.740862993767509e-06, + "loss": 0.983, + "step": 35800 + }, + { + "epoch": 0.25921663155913627, + "grad_norm": 0.16748061776161194, + "learning_rate": 4.7407906071069226e-06, + "loss": 0.984, + "step": 35810 + }, + { + "epoch": 0.25928901821972244, + "grad_norm": 0.16068707406520844, + "learning_rate": 4.740718220446336e-06, + "loss": 0.9745, + "step": 35820 + }, + { + "epoch": 0.2593614048803087, + "grad_norm": 0.17079469561576843, + "learning_rate": 4.740645833785751e-06, + "loss": 0.9656, + "step": 35830 + }, + { + "epoch": 0.25943379154089485, + "grad_norm": 0.18786334991455078, + "learning_rate": 4.740573447125164e-06, + "loss": 0.9619, + "step": 35840 + }, + { + "epoch": 0.25950617820148103, + "grad_norm": 0.1623920202255249, + "learning_rate": 4.740501060464578e-06, + "loss": 0.9768, + "step": 35850 + }, + { + "epoch": 0.2595785648620672, + "grad_norm": 0.16342291235923767, + "learning_rate": 4.7404286738039915e-06, + "loss": 0.9867, + "step": 35860 + }, + { + "epoch": 0.2596509515226534, + "grad_norm": 0.16781893372535706, + "learning_rate": 4.740356287143406e-06, + "loss": 0.9758, + "step": 35870 + }, + { + "epoch": 0.2597233381832396, + "grad_norm": 0.1712564378976822, + "learning_rate": 4.74028390048282e-06, + "loss": 0.9558, + "step": 35880 + }, + { + "epoch": 0.2597957248438258, + "grad_norm": 0.16046634316444397, + "learning_rate": 4.740211513822233e-06, + "loss": 0.9723, + "step": 35890 + }, + { + "epoch": 0.25986811150441197, + "grad_norm": 0.1630384773015976, + "learning_rate": 4.740139127161647e-06, + "loss": 0.9766, + "step": 35900 + }, + { + "epoch": 0.25994049816499815, + "grad_norm": 0.16825400292873383, + "learning_rate": 4.740066740501061e-06, + "loss": 0.9824, + "step": 35910 + }, + { + "epoch": 0.2600128848255843, + "grad_norm": 0.17138569056987762, + "learning_rate": 4.739994353840475e-06, + "loss": 0.9749, + "step": 35920 + }, + { + "epoch": 0.26008527148617056, + "grad_norm": 0.1562861055135727, + "learning_rate": 4.7399219671798885e-06, + "loss": 0.9786, + "step": 35930 + }, + { + "epoch": 0.26015765814675673, + "grad_norm": 0.1865861713886261, + "learning_rate": 4.739849580519302e-06, + "loss": 0.9761, + "step": 35940 + }, + { + "epoch": 0.2602300448073429, + "grad_norm": 0.168324813246727, + "learning_rate": 4.739777193858716e-06, + "loss": 0.9813, + "step": 35950 + }, + { + "epoch": 0.2603024314679291, + "grad_norm": 0.1690807342529297, + "learning_rate": 4.73970480719813e-06, + "loss": 0.9779, + "step": 35960 + }, + { + "epoch": 0.26037481812851526, + "grad_norm": 0.17235450446605682, + "learning_rate": 4.739632420537544e-06, + "loss": 0.9858, + "step": 35970 + }, + { + "epoch": 0.26044720478910144, + "grad_norm": 0.188417449593544, + "learning_rate": 4.7395600338769574e-06, + "loss": 0.9765, + "step": 35980 + }, + { + "epoch": 0.26051959144968767, + "grad_norm": 0.17113593220710754, + "learning_rate": 4.739487647216371e-06, + "loss": 0.9687, + "step": 35990 + }, + { + "epoch": 0.26059197811027385, + "grad_norm": 0.1908288598060608, + "learning_rate": 4.7394152605557855e-06, + "loss": 0.9831, + "step": 36000 + }, + { + "epoch": 0.26066436477086, + "grad_norm": 0.17590002715587616, + "learning_rate": 4.739342873895199e-06, + "loss": 0.9856, + "step": 36010 + }, + { + "epoch": 0.2607367514314462, + "grad_norm": 0.16687145829200745, + "learning_rate": 4.739270487234613e-06, + "loss": 0.9793, + "step": 36020 + }, + { + "epoch": 0.2608091380920324, + "grad_norm": 0.1600182056427002, + "learning_rate": 4.739198100574026e-06, + "loss": 0.9738, + "step": 36030 + }, + { + "epoch": 0.2608815247526186, + "grad_norm": 0.16335642337799072, + "learning_rate": 4.739125713913441e-06, + "loss": 0.9686, + "step": 36040 + }, + { + "epoch": 0.2609539114132048, + "grad_norm": 0.16597791016101837, + "learning_rate": 4.7390533272528544e-06, + "loss": 0.9796, + "step": 36050 + }, + { + "epoch": 0.26102629807379096, + "grad_norm": 0.1797637939453125, + "learning_rate": 4.738980940592268e-06, + "loss": 0.9771, + "step": 36060 + }, + { + "epoch": 0.26109868473437714, + "grad_norm": 0.17320837080478668, + "learning_rate": 4.738908553931682e-06, + "loss": 0.9889, + "step": 36070 + }, + { + "epoch": 0.2611710713949633, + "grad_norm": 0.16971297562122345, + "learning_rate": 4.738836167271096e-06, + "loss": 0.9947, + "step": 36080 + }, + { + "epoch": 0.26124345805554955, + "grad_norm": 0.17000246047973633, + "learning_rate": 4.73876378061051e-06, + "loss": 0.9631, + "step": 36090 + }, + { + "epoch": 0.2613158447161357, + "grad_norm": 0.16442660987377167, + "learning_rate": 4.738691393949923e-06, + "loss": 0.9566, + "step": 36100 + }, + { + "epoch": 0.2613882313767219, + "grad_norm": 0.15749797224998474, + "learning_rate": 4.738619007289337e-06, + "loss": 0.9857, + "step": 36110 + }, + { + "epoch": 0.2614606180373081, + "grad_norm": 0.1590905487537384, + "learning_rate": 4.7385466206287514e-06, + "loss": 0.9741, + "step": 36120 + }, + { + "epoch": 0.26153300469789426, + "grad_norm": 0.20736895501613617, + "learning_rate": 4.738474233968165e-06, + "loss": 0.9741, + "step": 36130 + }, + { + "epoch": 0.26160539135848043, + "grad_norm": 0.1713234931230545, + "learning_rate": 4.738401847307579e-06, + "loss": 0.9652, + "step": 36140 + }, + { + "epoch": 0.26167777801906666, + "grad_norm": 0.17479397356510162, + "learning_rate": 4.738329460646992e-06, + "loss": 0.962, + "step": 36150 + }, + { + "epoch": 0.26175016467965284, + "grad_norm": 0.15319569408893585, + "learning_rate": 4.738257073986407e-06, + "loss": 0.9914, + "step": 36160 + }, + { + "epoch": 0.261822551340239, + "grad_norm": 0.17199110984802246, + "learning_rate": 4.7381846873258195e-06, + "loss": 0.9701, + "step": 36170 + }, + { + "epoch": 0.2618949380008252, + "grad_norm": 0.17923112213611603, + "learning_rate": 4.738112300665233e-06, + "loss": 0.9866, + "step": 36180 + }, + { + "epoch": 0.26196732466141137, + "grad_norm": 0.17701056599617004, + "learning_rate": 4.738039914004648e-06, + "loss": 0.9905, + "step": 36190 + }, + { + "epoch": 0.2620397113219976, + "grad_norm": 0.18318752944469452, + "learning_rate": 4.737967527344061e-06, + "loss": 0.9769, + "step": 36200 + }, + { + "epoch": 0.2621120979825838, + "grad_norm": 0.16948123276233673, + "learning_rate": 4.737895140683475e-06, + "loss": 0.9701, + "step": 36210 + }, + { + "epoch": 0.26218448464316996, + "grad_norm": 0.16804496943950653, + "learning_rate": 4.7378227540228885e-06, + "loss": 0.9758, + "step": 36220 + }, + { + "epoch": 0.26225687130375613, + "grad_norm": 0.16858185827732086, + "learning_rate": 4.737750367362303e-06, + "loss": 0.9778, + "step": 36230 + }, + { + "epoch": 0.2623292579643423, + "grad_norm": 0.16800491511821747, + "learning_rate": 4.7376779807017165e-06, + "loss": 0.96, + "step": 36240 + }, + { + "epoch": 0.26240164462492854, + "grad_norm": 0.16772626340389252, + "learning_rate": 4.73760559404113e-06, + "loss": 0.958, + "step": 36250 + }, + { + "epoch": 0.2624740312855147, + "grad_norm": 0.16602137684822083, + "learning_rate": 4.737533207380544e-06, + "loss": 0.9761, + "step": 36260 + }, + { + "epoch": 0.2625464179461009, + "grad_norm": 0.17461955547332764, + "learning_rate": 4.737460820719958e-06, + "loss": 0.9546, + "step": 36270 + }, + { + "epoch": 0.2626188046066871, + "grad_norm": 0.1623477339744568, + "learning_rate": 4.737388434059372e-06, + "loss": 0.981, + "step": 36280 + }, + { + "epoch": 0.26269119126727325, + "grad_norm": 0.17813843488693237, + "learning_rate": 4.7373160473987855e-06, + "loss": 0.9779, + "step": 36290 + }, + { + "epoch": 0.2627635779278594, + "grad_norm": 0.18442265689373016, + "learning_rate": 4.737243660738199e-06, + "loss": 0.9541, + "step": 36300 + }, + { + "epoch": 0.26283596458844566, + "grad_norm": 0.16500839591026306, + "learning_rate": 4.7371712740776135e-06, + "loss": 0.9601, + "step": 36310 + }, + { + "epoch": 0.26290835124903184, + "grad_norm": 0.17317786812782288, + "learning_rate": 4.737098887417027e-06, + "loss": 0.9704, + "step": 36320 + }, + { + "epoch": 0.262980737909618, + "grad_norm": 0.1605556458234787, + "learning_rate": 4.737026500756441e-06, + "loss": 0.9793, + "step": 36330 + }, + { + "epoch": 0.2630531245702042, + "grad_norm": 0.16761939227581024, + "learning_rate": 4.736954114095854e-06, + "loss": 0.9669, + "step": 36340 + }, + { + "epoch": 0.26312551123079037, + "grad_norm": 0.19816842675209045, + "learning_rate": 4.736881727435269e-06, + "loss": 0.9749, + "step": 36350 + }, + { + "epoch": 0.2631978978913766, + "grad_norm": 0.15934444963932037, + "learning_rate": 4.7368093407746825e-06, + "loss": 0.9762, + "step": 36360 + }, + { + "epoch": 0.2632702845519628, + "grad_norm": 0.16128577291965485, + "learning_rate": 4.736736954114096e-06, + "loss": 0.9648, + "step": 36370 + }, + { + "epoch": 0.26334267121254895, + "grad_norm": 0.1702321320772171, + "learning_rate": 4.73666456745351e-06, + "loss": 0.9762, + "step": 36380 + }, + { + "epoch": 0.26341505787313513, + "grad_norm": 0.16586679220199585, + "learning_rate": 4.736592180792924e-06, + "loss": 0.9676, + "step": 36390 + }, + { + "epoch": 0.2634874445337213, + "grad_norm": 0.18088088929653168, + "learning_rate": 4.736519794132338e-06, + "loss": 0.974, + "step": 36400 + }, + { + "epoch": 0.26355983119430754, + "grad_norm": 0.2222166806459427, + "learning_rate": 4.736447407471751e-06, + "loss": 0.9776, + "step": 36410 + }, + { + "epoch": 0.2636322178548937, + "grad_norm": 0.15918581187725067, + "learning_rate": 4.736375020811165e-06, + "loss": 0.9787, + "step": 36420 + }, + { + "epoch": 0.2637046045154799, + "grad_norm": 0.1603013426065445, + "learning_rate": 4.7363026341505795e-06, + "loss": 0.9617, + "step": 36430 + }, + { + "epoch": 0.26377699117606607, + "grad_norm": 0.1752447783946991, + "learning_rate": 4.736230247489993e-06, + "loss": 0.9724, + "step": 36440 + }, + { + "epoch": 0.26384937783665224, + "grad_norm": 0.27862676978111267, + "learning_rate": 4.736157860829407e-06, + "loss": 0.9675, + "step": 36450 + }, + { + "epoch": 0.2639217644972385, + "grad_norm": 0.1704714149236679, + "learning_rate": 4.73608547416882e-06, + "loss": 0.962, + "step": 36460 + }, + { + "epoch": 0.26399415115782465, + "grad_norm": 0.2055920511484146, + "learning_rate": 4.736013087508235e-06, + "loss": 0.9712, + "step": 36470 + }, + { + "epoch": 0.26406653781841083, + "grad_norm": 0.168257936835289, + "learning_rate": 4.735940700847648e-06, + "loss": 0.9722, + "step": 36480 + }, + { + "epoch": 0.264138924478997, + "grad_norm": 0.1615269035100937, + "learning_rate": 4.735868314187062e-06, + "loss": 0.9708, + "step": 36490 + }, + { + "epoch": 0.2642113111395832, + "grad_norm": 0.16784435510635376, + "learning_rate": 4.735795927526476e-06, + "loss": 0.9741, + "step": 36500 + }, + { + "epoch": 0.26428369780016936, + "grad_norm": 0.16948504745960236, + "learning_rate": 4.73572354086589e-06, + "loss": 0.9692, + "step": 36510 + }, + { + "epoch": 0.2643560844607556, + "grad_norm": 0.16476517915725708, + "learning_rate": 4.735651154205304e-06, + "loss": 0.9673, + "step": 36520 + }, + { + "epoch": 0.26442847112134177, + "grad_norm": 0.18049409985542297, + "learning_rate": 4.735578767544717e-06, + "loss": 0.9676, + "step": 36530 + }, + { + "epoch": 0.26450085778192794, + "grad_norm": 0.1603485643863678, + "learning_rate": 4.735506380884131e-06, + "loss": 0.9789, + "step": 36540 + }, + { + "epoch": 0.2645732444425141, + "grad_norm": 0.16971318423748016, + "learning_rate": 4.735433994223545e-06, + "loss": 0.9847, + "step": 36550 + }, + { + "epoch": 0.2646456311031003, + "grad_norm": 0.19369632005691528, + "learning_rate": 4.735361607562959e-06, + "loss": 0.9731, + "step": 36560 + }, + { + "epoch": 0.26471801776368653, + "grad_norm": 0.1709279865026474, + "learning_rate": 4.735289220902373e-06, + "loss": 0.9596, + "step": 36570 + }, + { + "epoch": 0.2647904044242727, + "grad_norm": 0.1615041047334671, + "learning_rate": 4.735216834241786e-06, + "loss": 0.9585, + "step": 36580 + }, + { + "epoch": 0.2648627910848589, + "grad_norm": 0.17136956751346588, + "learning_rate": 4.7351444475812e-06, + "loss": 0.9857, + "step": 36590 + }, + { + "epoch": 0.26493517774544506, + "grad_norm": 0.1596122533082962, + "learning_rate": 4.735072060920614e-06, + "loss": 0.9646, + "step": 36600 + }, + { + "epoch": 0.26500756440603124, + "grad_norm": 0.16344700753688812, + "learning_rate": 4.734999674260028e-06, + "loss": 0.9773, + "step": 36610 + }, + { + "epoch": 0.26507995106661747, + "grad_norm": 0.17619621753692627, + "learning_rate": 4.734927287599442e-06, + "loss": 0.97, + "step": 36620 + }, + { + "epoch": 0.26515233772720365, + "grad_norm": 0.16314248740673065, + "learning_rate": 4.734854900938855e-06, + "loss": 0.9712, + "step": 36630 + }, + { + "epoch": 0.2652247243877898, + "grad_norm": 0.1628977507352829, + "learning_rate": 4.73478251427827e-06, + "loss": 0.9813, + "step": 36640 + }, + { + "epoch": 0.265297111048376, + "grad_norm": 0.18795722723007202, + "learning_rate": 4.734710127617683e-06, + "loss": 0.9799, + "step": 36650 + }, + { + "epoch": 0.2653694977089622, + "grad_norm": 0.16708257794380188, + "learning_rate": 4.734637740957097e-06, + "loss": 0.964, + "step": 36660 + }, + { + "epoch": 0.26544188436954835, + "grad_norm": 0.15541145205497742, + "learning_rate": 4.7345653542965105e-06, + "loss": 0.9709, + "step": 36670 + }, + { + "epoch": 0.2655142710301346, + "grad_norm": 0.6976906657218933, + "learning_rate": 4.734492967635925e-06, + "loss": 0.9633, + "step": 36680 + }, + { + "epoch": 0.26558665769072076, + "grad_norm": 0.18083471059799194, + "learning_rate": 4.734420580975339e-06, + "loss": 0.9868, + "step": 36690 + }, + { + "epoch": 0.26565904435130694, + "grad_norm": 0.16377471387386322, + "learning_rate": 4.734348194314752e-06, + "loss": 0.9749, + "step": 36700 + }, + { + "epoch": 0.2657314310118931, + "grad_norm": 0.17875248193740845, + "learning_rate": 4.734275807654166e-06, + "loss": 0.9759, + "step": 36710 + }, + { + "epoch": 0.2658038176724793, + "grad_norm": 0.1629532426595688, + "learning_rate": 4.7342034209935794e-06, + "loss": 0.9764, + "step": 36720 + }, + { + "epoch": 0.2658762043330655, + "grad_norm": 0.18493175506591797, + "learning_rate": 4.734131034332993e-06, + "loss": 0.9693, + "step": 36730 + }, + { + "epoch": 0.2659485909936517, + "grad_norm": 0.1734851896762848, + "learning_rate": 4.734058647672407e-06, + "loss": 0.9604, + "step": 36740 + }, + { + "epoch": 0.2660209776542379, + "grad_norm": 0.15318694710731506, + "learning_rate": 4.733986261011821e-06, + "loss": 0.9816, + "step": 36750 + }, + { + "epoch": 0.26609336431482405, + "grad_norm": 0.1794171929359436, + "learning_rate": 4.733913874351235e-06, + "loss": 0.977, + "step": 36760 + }, + { + "epoch": 0.26616575097541023, + "grad_norm": 0.18346603214740753, + "learning_rate": 4.733841487690648e-06, + "loss": 0.9839, + "step": 36770 + }, + { + "epoch": 0.26623813763599646, + "grad_norm": 0.16709034144878387, + "learning_rate": 4.733769101030062e-06, + "loss": 0.9943, + "step": 36780 + }, + { + "epoch": 0.26631052429658264, + "grad_norm": 0.16048607230186462, + "learning_rate": 4.7336967143694764e-06, + "loss": 0.963, + "step": 36790 + }, + { + "epoch": 0.2663829109571688, + "grad_norm": 0.1640567034482956, + "learning_rate": 4.73362432770889e-06, + "loss": 0.9744, + "step": 36800 + }, + { + "epoch": 0.266455297617755, + "grad_norm": 0.18880528211593628, + "learning_rate": 4.733551941048304e-06, + "loss": 0.9689, + "step": 36810 + }, + { + "epoch": 0.26652768427834117, + "grad_norm": 0.17127233743667603, + "learning_rate": 4.733479554387717e-06, + "loss": 0.9747, + "step": 36820 + }, + { + "epoch": 0.26660007093892735, + "grad_norm": 0.17141376435756683, + "learning_rate": 4.733407167727132e-06, + "loss": 0.9732, + "step": 36830 + }, + { + "epoch": 0.2666724575995136, + "grad_norm": 0.1688418984413147, + "learning_rate": 4.733334781066545e-06, + "loss": 0.9744, + "step": 36840 + }, + { + "epoch": 0.26674484426009976, + "grad_norm": 0.18066221475601196, + "learning_rate": 4.733262394405959e-06, + "loss": 0.9735, + "step": 36850 + }, + { + "epoch": 0.26681723092068593, + "grad_norm": 0.15719619393348694, + "learning_rate": 4.733190007745373e-06, + "loss": 0.9797, + "step": 36860 + }, + { + "epoch": 0.2668896175812721, + "grad_norm": 0.1722886562347412, + "learning_rate": 4.733117621084787e-06, + "loss": 0.9744, + "step": 36870 + }, + { + "epoch": 0.2669620042418583, + "grad_norm": 0.19963102042675018, + "learning_rate": 4.733045234424201e-06, + "loss": 0.9828, + "step": 36880 + }, + { + "epoch": 0.2670343909024445, + "grad_norm": 0.16132299602031708, + "learning_rate": 4.732972847763614e-06, + "loss": 0.9712, + "step": 36890 + }, + { + "epoch": 0.2671067775630307, + "grad_norm": 0.16427114605903625, + "learning_rate": 4.732900461103028e-06, + "loss": 0.9779, + "step": 36900 + }, + { + "epoch": 0.26717916422361687, + "grad_norm": 0.18113258481025696, + "learning_rate": 4.732828074442442e-06, + "loss": 0.9688, + "step": 36910 + }, + { + "epoch": 0.26725155088420305, + "grad_norm": 0.1653916984796524, + "learning_rate": 4.732755687781856e-06, + "loss": 0.9717, + "step": 36920 + }, + { + "epoch": 0.2673239375447892, + "grad_norm": 0.179592102766037, + "learning_rate": 4.73268330112127e-06, + "loss": 0.9656, + "step": 36930 + }, + { + "epoch": 0.26739632420537546, + "grad_norm": 0.19769518077373505, + "learning_rate": 4.732610914460683e-06, + "loss": 0.9637, + "step": 36940 + }, + { + "epoch": 0.26746871086596163, + "grad_norm": 0.17146340012550354, + "learning_rate": 4.732538527800098e-06, + "loss": 0.9619, + "step": 36950 + }, + { + "epoch": 0.2675410975265478, + "grad_norm": 0.1564941555261612, + "learning_rate": 4.732466141139511e-06, + "loss": 0.9688, + "step": 36960 + }, + { + "epoch": 0.267613484187134, + "grad_norm": 0.1622597575187683, + "learning_rate": 4.732393754478925e-06, + "loss": 0.9848, + "step": 36970 + }, + { + "epoch": 0.26768587084772016, + "grad_norm": 0.17522403597831726, + "learning_rate": 4.7323213678183385e-06, + "loss": 0.9857, + "step": 36980 + }, + { + "epoch": 0.2677582575083064, + "grad_norm": 0.18009519577026367, + "learning_rate": 4.732248981157753e-06, + "loss": 0.9746, + "step": 36990 + }, + { + "epoch": 0.2678306441688926, + "grad_norm": 0.16934752464294434, + "learning_rate": 4.732176594497167e-06, + "loss": 0.9715, + "step": 37000 + }, + { + "epoch": 0.26790303082947875, + "grad_norm": 0.17400366067886353, + "learning_rate": 4.73210420783658e-06, + "loss": 0.985, + "step": 37010 + }, + { + "epoch": 0.2679754174900649, + "grad_norm": 0.15376758575439453, + "learning_rate": 4.732031821175994e-06, + "loss": 0.976, + "step": 37020 + }, + { + "epoch": 0.2680478041506511, + "grad_norm": 0.17052818834781647, + "learning_rate": 4.731959434515408e-06, + "loss": 0.9854, + "step": 37030 + }, + { + "epoch": 0.2681201908112373, + "grad_norm": 0.16806358098983765, + "learning_rate": 4.731887047854822e-06, + "loss": 0.9739, + "step": 37040 + }, + { + "epoch": 0.2681925774718235, + "grad_norm": 0.16140809655189514, + "learning_rate": 4.7318146611942355e-06, + "loss": 0.9864, + "step": 37050 + }, + { + "epoch": 0.2682649641324097, + "grad_norm": 0.16904619336128235, + "learning_rate": 4.731742274533649e-06, + "loss": 0.9594, + "step": 37060 + }, + { + "epoch": 0.26833735079299587, + "grad_norm": 0.1671932488679886, + "learning_rate": 4.731669887873064e-06, + "loss": 0.9694, + "step": 37070 + }, + { + "epoch": 0.26840973745358204, + "grad_norm": 0.17153862118721008, + "learning_rate": 4.731597501212477e-06, + "loss": 0.9811, + "step": 37080 + }, + { + "epoch": 0.2684821241141682, + "grad_norm": 0.16389037668704987, + "learning_rate": 4.731525114551891e-06, + "loss": 0.9758, + "step": 37090 + }, + { + "epoch": 0.26855451077475445, + "grad_norm": 0.16687390208244324, + "learning_rate": 4.7314527278913045e-06, + "loss": 0.9687, + "step": 37100 + }, + { + "epoch": 0.26862689743534063, + "grad_norm": 0.2017953097820282, + "learning_rate": 4.731380341230719e-06, + "loss": 0.9653, + "step": 37110 + }, + { + "epoch": 0.2686992840959268, + "grad_norm": 0.15654923021793365, + "learning_rate": 4.7313079545701326e-06, + "loss": 0.98, + "step": 37120 + }, + { + "epoch": 0.268771670756513, + "grad_norm": 0.18685537576675415, + "learning_rate": 4.731235567909546e-06, + "loss": 0.9759, + "step": 37130 + }, + { + "epoch": 0.26884405741709916, + "grad_norm": 0.1896306425333023, + "learning_rate": 4.73116318124896e-06, + "loss": 0.975, + "step": 37140 + }, + { + "epoch": 0.2689164440776854, + "grad_norm": 0.16155771911144257, + "learning_rate": 4.731090794588374e-06, + "loss": 0.9734, + "step": 37150 + }, + { + "epoch": 0.26898883073827157, + "grad_norm": 0.18064841628074646, + "learning_rate": 4.731018407927788e-06, + "loss": 0.9724, + "step": 37160 + }, + { + "epoch": 0.26906121739885774, + "grad_norm": 0.1620803028345108, + "learning_rate": 4.7309460212672015e-06, + "loss": 0.9608, + "step": 37170 + }, + { + "epoch": 0.2691336040594439, + "grad_norm": 0.15905041992664337, + "learning_rate": 4.730873634606615e-06, + "loss": 0.974, + "step": 37180 + }, + { + "epoch": 0.2692059907200301, + "grad_norm": 0.16241392493247986, + "learning_rate": 4.730801247946029e-06, + "loss": 0.971, + "step": 37190 + }, + { + "epoch": 0.2692783773806163, + "grad_norm": 0.1652408093214035, + "learning_rate": 4.730728861285443e-06, + "loss": 0.9561, + "step": 37200 + }, + { + "epoch": 0.2693507640412025, + "grad_norm": 0.1611439734697342, + "learning_rate": 4.730656474624857e-06, + "loss": 0.9889, + "step": 37210 + }, + { + "epoch": 0.2694231507017887, + "grad_norm": 0.16467610001564026, + "learning_rate": 4.73058408796427e-06, + "loss": 0.9639, + "step": 37220 + }, + { + "epoch": 0.26949553736237486, + "grad_norm": 0.17998045682907104, + "learning_rate": 4.730511701303684e-06, + "loss": 0.9692, + "step": 37230 + }, + { + "epoch": 0.26956792402296104, + "grad_norm": 0.16246424615383148, + "learning_rate": 4.730439314643098e-06, + "loss": 0.981, + "step": 37240 + }, + { + "epoch": 0.2696403106835472, + "grad_norm": 0.15857288241386414, + "learning_rate": 4.730366927982511e-06, + "loss": 0.9596, + "step": 37250 + }, + { + "epoch": 0.26971269734413345, + "grad_norm": 0.17110489308834076, + "learning_rate": 4.730294541321926e-06, + "loss": 0.9907, + "step": 37260 + }, + { + "epoch": 0.2697850840047196, + "grad_norm": 0.16102251410484314, + "learning_rate": 4.730222154661339e-06, + "loss": 0.9728, + "step": 37270 + }, + { + "epoch": 0.2698574706653058, + "grad_norm": 0.15859219431877136, + "learning_rate": 4.730149768000753e-06, + "loss": 0.9693, + "step": 37280 + }, + { + "epoch": 0.269929857325892, + "grad_norm": 0.16720248758792877, + "learning_rate": 4.7300773813401666e-06, + "loss": 0.975, + "step": 37290 + }, + { + "epoch": 0.27000224398647815, + "grad_norm": 0.1726258546113968, + "learning_rate": 4.730004994679581e-06, + "loss": 0.9823, + "step": 37300 + }, + { + "epoch": 0.2700746306470644, + "grad_norm": 0.17747798562049866, + "learning_rate": 4.729932608018995e-06, + "loss": 0.9637, + "step": 37310 + }, + { + "epoch": 0.27014701730765056, + "grad_norm": 0.17301778495311737, + "learning_rate": 4.729860221358408e-06, + "loss": 0.9755, + "step": 37320 + }, + { + "epoch": 0.27021940396823674, + "grad_norm": 0.1715429127216339, + "learning_rate": 4.729787834697822e-06, + "loss": 0.9838, + "step": 37330 + }, + { + "epoch": 0.2702917906288229, + "grad_norm": 0.17954185605049133, + "learning_rate": 4.729715448037236e-06, + "loss": 0.981, + "step": 37340 + }, + { + "epoch": 0.2703641772894091, + "grad_norm": 0.1605345904827118, + "learning_rate": 4.72964306137665e-06, + "loss": 0.9726, + "step": 37350 + }, + { + "epoch": 0.27043656394999527, + "grad_norm": 0.1734982430934906, + "learning_rate": 4.7295706747160636e-06, + "loss": 0.9621, + "step": 37360 + }, + { + "epoch": 0.2705089506105815, + "grad_norm": 0.16955071687698364, + "learning_rate": 4.729498288055477e-06, + "loss": 0.9831, + "step": 37370 + }, + { + "epoch": 0.2705813372711677, + "grad_norm": 0.18525801599025726, + "learning_rate": 4.729425901394891e-06, + "loss": 0.9803, + "step": 37380 + }, + { + "epoch": 0.27065372393175385, + "grad_norm": 0.1983799785375595, + "learning_rate": 4.729353514734305e-06, + "loss": 0.9768, + "step": 37390 + }, + { + "epoch": 0.27072611059234003, + "grad_norm": 0.169651597738266, + "learning_rate": 4.729281128073719e-06, + "loss": 0.9703, + "step": 37400 + }, + { + "epoch": 0.2707984972529262, + "grad_norm": 0.20270761847496033, + "learning_rate": 4.7292087414131325e-06, + "loss": 0.9909, + "step": 37410 + }, + { + "epoch": 0.27087088391351244, + "grad_norm": 0.19444452226161957, + "learning_rate": 4.729136354752546e-06, + "loss": 0.9811, + "step": 37420 + }, + { + "epoch": 0.2709432705740986, + "grad_norm": 0.17284293472766876, + "learning_rate": 4.729063968091961e-06, + "loss": 0.9821, + "step": 37430 + }, + { + "epoch": 0.2710156572346848, + "grad_norm": 0.1620352417230606, + "learning_rate": 4.728991581431374e-06, + "loss": 0.9725, + "step": 37440 + }, + { + "epoch": 0.27108804389527097, + "grad_norm": 0.17737331986427307, + "learning_rate": 4.728919194770788e-06, + "loss": 0.9739, + "step": 37450 + }, + { + "epoch": 0.27116043055585715, + "grad_norm": 0.15988625586032867, + "learning_rate": 4.7288468081102014e-06, + "loss": 0.9737, + "step": 37460 + }, + { + "epoch": 0.2712328172164434, + "grad_norm": 0.1713685542345047, + "learning_rate": 4.728774421449616e-06, + "loss": 0.98, + "step": 37470 + }, + { + "epoch": 0.27130520387702955, + "grad_norm": 0.17601056396961212, + "learning_rate": 4.7287020347890295e-06, + "loss": 0.9833, + "step": 37480 + }, + { + "epoch": 0.27137759053761573, + "grad_norm": 0.16959619522094727, + "learning_rate": 4.728629648128443e-06, + "loss": 0.9658, + "step": 37490 + }, + { + "epoch": 0.2714499771982019, + "grad_norm": 0.174661785364151, + "learning_rate": 4.728557261467857e-06, + "loss": 0.9721, + "step": 37500 + }, + { + "epoch": 0.2715223638587881, + "grad_norm": 0.16935496032238007, + "learning_rate": 4.728484874807271e-06, + "loss": 0.963, + "step": 37510 + }, + { + "epoch": 0.2715947505193743, + "grad_norm": 0.1676948070526123, + "learning_rate": 4.728412488146685e-06, + "loss": 0.969, + "step": 37520 + }, + { + "epoch": 0.2716671371799605, + "grad_norm": 0.17413829267024994, + "learning_rate": 4.7283401014860984e-06, + "loss": 0.9767, + "step": 37530 + }, + { + "epoch": 0.27173952384054667, + "grad_norm": 0.1575450748205185, + "learning_rate": 4.728267714825512e-06, + "loss": 0.9855, + "step": 37540 + }, + { + "epoch": 0.27181191050113285, + "grad_norm": 0.15776337683200836, + "learning_rate": 4.7281953281649265e-06, + "loss": 0.9669, + "step": 37550 + }, + { + "epoch": 0.271884297161719, + "grad_norm": 0.16901899874210358, + "learning_rate": 4.72812294150434e-06, + "loss": 0.9696, + "step": 37560 + }, + { + "epoch": 0.2719566838223052, + "grad_norm": 0.18395544588565826, + "learning_rate": 4.728050554843754e-06, + "loss": 0.9743, + "step": 37570 + }, + { + "epoch": 0.27202907048289143, + "grad_norm": 0.15982641279697418, + "learning_rate": 4.727978168183167e-06, + "loss": 0.9691, + "step": 37580 + }, + { + "epoch": 0.2721014571434776, + "grad_norm": 0.16578301787376404, + "learning_rate": 4.727905781522582e-06, + "loss": 0.9807, + "step": 37590 + }, + { + "epoch": 0.2721738438040638, + "grad_norm": 0.1732582002878189, + "learning_rate": 4.7278333948619954e-06, + "loss": 0.9776, + "step": 37600 + }, + { + "epoch": 0.27224623046464996, + "grad_norm": 0.1836758553981781, + "learning_rate": 4.727761008201409e-06, + "loss": 0.9741, + "step": 37610 + }, + { + "epoch": 0.27231861712523614, + "grad_norm": 0.1627260446548462, + "learning_rate": 4.727688621540823e-06, + "loss": 0.9747, + "step": 37620 + }, + { + "epoch": 0.27239100378582237, + "grad_norm": 0.16746656596660614, + "learning_rate": 4.727616234880237e-06, + "loss": 0.9808, + "step": 37630 + }, + { + "epoch": 0.27246339044640855, + "grad_norm": 0.18597693741321564, + "learning_rate": 4.727543848219651e-06, + "loss": 0.9778, + "step": 37640 + }, + { + "epoch": 0.2725357771069947, + "grad_norm": 0.1623985916376114, + "learning_rate": 4.727471461559064e-06, + "loss": 0.9741, + "step": 37650 + }, + { + "epoch": 0.2726081637675809, + "grad_norm": 0.1913863867521286, + "learning_rate": 4.727399074898478e-06, + "loss": 0.9838, + "step": 37660 + }, + { + "epoch": 0.2726805504281671, + "grad_norm": 0.1891731321811676, + "learning_rate": 4.7273266882378925e-06, + "loss": 0.9743, + "step": 37670 + }, + { + "epoch": 0.2727529370887533, + "grad_norm": 0.1674305498600006, + "learning_rate": 4.727254301577306e-06, + "loss": 0.9798, + "step": 37680 + }, + { + "epoch": 0.2728253237493395, + "grad_norm": 0.1666889786720276, + "learning_rate": 4.72718191491672e-06, + "loss": 0.9566, + "step": 37690 + }, + { + "epoch": 0.27289771040992566, + "grad_norm": 0.17124493420124054, + "learning_rate": 4.727109528256133e-06, + "loss": 0.9701, + "step": 37700 + }, + { + "epoch": 0.27297009707051184, + "grad_norm": 0.17866984009742737, + "learning_rate": 4.727037141595548e-06, + "loss": 0.9893, + "step": 37710 + }, + { + "epoch": 0.273042483731098, + "grad_norm": 0.16738882660865784, + "learning_rate": 4.726964754934961e-06, + "loss": 0.9757, + "step": 37720 + }, + { + "epoch": 0.2731148703916842, + "grad_norm": 0.1580776870250702, + "learning_rate": 4.726892368274375e-06, + "loss": 0.9758, + "step": 37730 + }, + { + "epoch": 0.2731872570522704, + "grad_norm": 0.1834014505147934, + "learning_rate": 4.726819981613789e-06, + "loss": 0.9687, + "step": 37740 + }, + { + "epoch": 0.2732596437128566, + "grad_norm": 0.15350131690502167, + "learning_rate": 4.726747594953203e-06, + "loss": 0.9674, + "step": 37750 + }, + { + "epoch": 0.2733320303734428, + "grad_norm": 0.1681162267923355, + "learning_rate": 4.726675208292616e-06, + "loss": 0.9744, + "step": 37760 + }, + { + "epoch": 0.27340441703402896, + "grad_norm": 0.17525209486484528, + "learning_rate": 4.7266028216320295e-06, + "loss": 0.9663, + "step": 37770 + }, + { + "epoch": 0.27347680369461513, + "grad_norm": 0.16645973920822144, + "learning_rate": 4.726530434971444e-06, + "loss": 0.9631, + "step": 37780 + }, + { + "epoch": 0.27354919035520137, + "grad_norm": 0.15926945209503174, + "learning_rate": 4.7264580483108575e-06, + "loss": 0.9706, + "step": 37790 + }, + { + "epoch": 0.27362157701578754, + "grad_norm": 0.18003691732883453, + "learning_rate": 4.726385661650271e-06, + "loss": 0.961, + "step": 37800 + }, + { + "epoch": 0.2736939636763737, + "grad_norm": 0.1554681807756424, + "learning_rate": 4.726313274989685e-06, + "loss": 0.9708, + "step": 37810 + }, + { + "epoch": 0.2737663503369599, + "grad_norm": 0.165873184800148, + "learning_rate": 4.726240888329099e-06, + "loss": 0.9873, + "step": 37820 + }, + { + "epoch": 0.2738387369975461, + "grad_norm": 0.16069376468658447, + "learning_rate": 4.726168501668513e-06, + "loss": 0.9704, + "step": 37830 + }, + { + "epoch": 0.2739111236581323, + "grad_norm": 0.16646680235862732, + "learning_rate": 4.7260961150079265e-06, + "loss": 0.9794, + "step": 37840 + }, + { + "epoch": 0.2739835103187185, + "grad_norm": 0.1622561812400818, + "learning_rate": 4.72602372834734e-06, + "loss": 0.9655, + "step": 37850 + }, + { + "epoch": 0.27405589697930466, + "grad_norm": 0.17769238352775574, + "learning_rate": 4.7259513416867546e-06, + "loss": 0.9845, + "step": 37860 + }, + { + "epoch": 0.27412828363989084, + "grad_norm": 0.1598799079656601, + "learning_rate": 4.725878955026168e-06, + "loss": 0.9684, + "step": 37870 + }, + { + "epoch": 0.274200670300477, + "grad_norm": 0.15777985751628876, + "learning_rate": 4.725806568365582e-06, + "loss": 0.9754, + "step": 37880 + }, + { + "epoch": 0.2742730569610632, + "grad_norm": 0.16836735606193542, + "learning_rate": 4.725734181704995e-06, + "loss": 0.9621, + "step": 37890 + }, + { + "epoch": 0.2743454436216494, + "grad_norm": 0.1695423424243927, + "learning_rate": 4.72566179504441e-06, + "loss": 0.9573, + "step": 37900 + }, + { + "epoch": 0.2744178302822356, + "grad_norm": 0.17105446755886078, + "learning_rate": 4.7255894083838235e-06, + "loss": 0.9736, + "step": 37910 + }, + { + "epoch": 0.2744902169428218, + "grad_norm": 0.17636360228061676, + "learning_rate": 4.725517021723237e-06, + "loss": 0.9589, + "step": 37920 + }, + { + "epoch": 0.27456260360340795, + "grad_norm": 0.2081209272146225, + "learning_rate": 4.725444635062651e-06, + "loss": 0.9717, + "step": 37930 + }, + { + "epoch": 0.2746349902639941, + "grad_norm": 0.1700514554977417, + "learning_rate": 4.725372248402065e-06, + "loss": 0.9788, + "step": 37940 + }, + { + "epoch": 0.27470737692458036, + "grad_norm": 0.18585331737995148, + "learning_rate": 4.725299861741479e-06, + "loss": 0.9673, + "step": 37950 + }, + { + "epoch": 0.27477976358516654, + "grad_norm": 0.1638772189617157, + "learning_rate": 4.725227475080892e-06, + "loss": 0.977, + "step": 37960 + }, + { + "epoch": 0.2748521502457527, + "grad_norm": 0.16300378739833832, + "learning_rate": 4.725155088420306e-06, + "loss": 0.9904, + "step": 37970 + }, + { + "epoch": 0.2749245369063389, + "grad_norm": 0.16224254667758942, + "learning_rate": 4.72508270175972e-06, + "loss": 0.9626, + "step": 37980 + }, + { + "epoch": 0.27499692356692507, + "grad_norm": 0.1576361060142517, + "learning_rate": 4.725010315099134e-06, + "loss": 0.9815, + "step": 37990 + }, + { + "epoch": 0.2750693102275113, + "grad_norm": 0.265109121799469, + "learning_rate": 4.724937928438548e-06, + "loss": 0.9741, + "step": 38000 + }, + { + "epoch": 0.2751416968880975, + "grad_norm": 0.16923700273036957, + "learning_rate": 4.724865541777961e-06, + "loss": 0.9641, + "step": 38010 + }, + { + "epoch": 0.27521408354868365, + "grad_norm": 0.15976910293102264, + "learning_rate": 4.724793155117375e-06, + "loss": 0.9833, + "step": 38020 + }, + { + "epoch": 0.27528647020926983, + "grad_norm": 0.1535775065422058, + "learning_rate": 4.724720768456789e-06, + "loss": 0.985, + "step": 38030 + }, + { + "epoch": 0.275358856869856, + "grad_norm": 0.1646791249513626, + "learning_rate": 4.724648381796203e-06, + "loss": 0.9652, + "step": 38040 + }, + { + "epoch": 0.2754312435304422, + "grad_norm": 0.17050626873970032, + "learning_rate": 4.724575995135617e-06, + "loss": 0.9634, + "step": 38050 + }, + { + "epoch": 0.2755036301910284, + "grad_norm": 0.16307681798934937, + "learning_rate": 4.72450360847503e-06, + "loss": 0.9639, + "step": 38060 + }, + { + "epoch": 0.2755760168516146, + "grad_norm": 0.17553727328777313, + "learning_rate": 4.724431221814445e-06, + "loss": 0.9771, + "step": 38070 + }, + { + "epoch": 0.27564840351220077, + "grad_norm": 0.1655888557434082, + "learning_rate": 4.724358835153858e-06, + "loss": 0.9717, + "step": 38080 + }, + { + "epoch": 0.27572079017278694, + "grad_norm": 0.1691659390926361, + "learning_rate": 4.724286448493272e-06, + "loss": 0.9491, + "step": 38090 + }, + { + "epoch": 0.2757931768333731, + "grad_norm": 0.15853264927864075, + "learning_rate": 4.7242140618326856e-06, + "loss": 0.9666, + "step": 38100 + }, + { + "epoch": 0.27586556349395935, + "grad_norm": 0.15642456710338593, + "learning_rate": 4.7241416751721e-06, + "loss": 0.9665, + "step": 38110 + }, + { + "epoch": 0.27593795015454553, + "grad_norm": 0.16785617172718048, + "learning_rate": 4.724069288511514e-06, + "loss": 0.9697, + "step": 38120 + }, + { + "epoch": 0.2760103368151317, + "grad_norm": 0.2900194823741913, + "learning_rate": 4.723996901850927e-06, + "loss": 0.9717, + "step": 38130 + }, + { + "epoch": 0.2760827234757179, + "grad_norm": 0.23790918290615082, + "learning_rate": 4.723924515190341e-06, + "loss": 0.9698, + "step": 38140 + }, + { + "epoch": 0.27615511013630406, + "grad_norm": 0.15893487632274628, + "learning_rate": 4.723852128529755e-06, + "loss": 0.975, + "step": 38150 + }, + { + "epoch": 0.2762274967968903, + "grad_norm": 0.15974752604961395, + "learning_rate": 4.723779741869169e-06, + "loss": 0.9819, + "step": 38160 + }, + { + "epoch": 0.27629988345747647, + "grad_norm": 0.16646121442317963, + "learning_rate": 4.723707355208583e-06, + "loss": 0.9664, + "step": 38170 + }, + { + "epoch": 0.27637227011806265, + "grad_norm": 0.15674103796482086, + "learning_rate": 4.723634968547996e-06, + "loss": 0.9783, + "step": 38180 + }, + { + "epoch": 0.2764446567786488, + "grad_norm": 0.1640247255563736, + "learning_rate": 4.723562581887411e-06, + "loss": 0.9746, + "step": 38190 + }, + { + "epoch": 0.276517043439235, + "grad_norm": 0.18196041882038116, + "learning_rate": 4.723490195226824e-06, + "loss": 0.9697, + "step": 38200 + }, + { + "epoch": 0.27658943009982123, + "grad_norm": 0.16162289679050446, + "learning_rate": 4.723417808566238e-06, + "loss": 0.9823, + "step": 38210 + }, + { + "epoch": 0.2766618167604074, + "grad_norm": 0.16687557101249695, + "learning_rate": 4.7233454219056515e-06, + "loss": 0.9688, + "step": 38220 + }, + { + "epoch": 0.2767342034209936, + "grad_norm": 0.16218598186969757, + "learning_rate": 4.723273035245066e-06, + "loss": 0.9888, + "step": 38230 + }, + { + "epoch": 0.27680659008157976, + "grad_norm": 0.15919815003871918, + "learning_rate": 4.72320064858448e-06, + "loss": 0.9664, + "step": 38240 + }, + { + "epoch": 0.27687897674216594, + "grad_norm": 0.15851202607154846, + "learning_rate": 4.723128261923893e-06, + "loss": 0.9657, + "step": 38250 + }, + { + "epoch": 0.2769513634027521, + "grad_norm": 0.17171426117420197, + "learning_rate": 4.723055875263307e-06, + "loss": 0.9671, + "step": 38260 + }, + { + "epoch": 0.27702375006333835, + "grad_norm": 0.1656324565410614, + "learning_rate": 4.722983488602721e-06, + "loss": 0.9644, + "step": 38270 + }, + { + "epoch": 0.2770961367239245, + "grad_norm": 0.16705715656280518, + "learning_rate": 4.722911101942135e-06, + "loss": 0.969, + "step": 38280 + }, + { + "epoch": 0.2771685233845107, + "grad_norm": 0.1705339401960373, + "learning_rate": 4.722838715281548e-06, + "loss": 0.9657, + "step": 38290 + }, + { + "epoch": 0.2772409100450969, + "grad_norm": 0.18638475239276886, + "learning_rate": 4.722766328620962e-06, + "loss": 0.9715, + "step": 38300 + }, + { + "epoch": 0.27731329670568305, + "grad_norm": 0.16748540103435516, + "learning_rate": 4.722693941960376e-06, + "loss": 0.9616, + "step": 38310 + }, + { + "epoch": 0.2773856833662693, + "grad_norm": 0.15937179327011108, + "learning_rate": 4.722621555299789e-06, + "loss": 0.9764, + "step": 38320 + }, + { + "epoch": 0.27745807002685546, + "grad_norm": 0.19340196251869202, + "learning_rate": 4.722549168639203e-06, + "loss": 0.9746, + "step": 38330 + }, + { + "epoch": 0.27753045668744164, + "grad_norm": 0.17282438278198242, + "learning_rate": 4.7224767819786174e-06, + "loss": 0.9691, + "step": 38340 + }, + { + "epoch": 0.2776028433480278, + "grad_norm": 0.17909948527812958, + "learning_rate": 4.722404395318031e-06, + "loss": 0.9802, + "step": 38350 + }, + { + "epoch": 0.277675230008614, + "grad_norm": 0.1845092922449112, + "learning_rate": 4.722332008657445e-06, + "loss": 0.9723, + "step": 38360 + }, + { + "epoch": 0.2777476166692002, + "grad_norm": 0.18336127698421478, + "learning_rate": 4.722259621996858e-06, + "loss": 0.9782, + "step": 38370 + }, + { + "epoch": 0.2778200033297864, + "grad_norm": 0.1571909338235855, + "learning_rate": 4.722187235336273e-06, + "loss": 0.9594, + "step": 38380 + }, + { + "epoch": 0.2778923899903726, + "grad_norm": 0.18232674896717072, + "learning_rate": 4.722114848675686e-06, + "loss": 0.9559, + "step": 38390 + }, + { + "epoch": 0.27796477665095876, + "grad_norm": 0.20004482567310333, + "learning_rate": 4.7220424620151e-06, + "loss": 0.9725, + "step": 38400 + }, + { + "epoch": 0.27803716331154493, + "grad_norm": 0.18284298479557037, + "learning_rate": 4.721970075354514e-06, + "loss": 0.9719, + "step": 38410 + }, + { + "epoch": 0.2781095499721311, + "grad_norm": 0.16327325999736786, + "learning_rate": 4.721897688693928e-06, + "loss": 0.9698, + "step": 38420 + }, + { + "epoch": 0.27818193663271734, + "grad_norm": 0.16780653595924377, + "learning_rate": 4.721825302033342e-06, + "loss": 0.9678, + "step": 38430 + }, + { + "epoch": 0.2782543232933035, + "grad_norm": 0.16471821069717407, + "learning_rate": 4.721752915372755e-06, + "loss": 0.9706, + "step": 38440 + }, + { + "epoch": 0.2783267099538897, + "grad_norm": 0.1728796362876892, + "learning_rate": 4.721680528712169e-06, + "loss": 0.9657, + "step": 38450 + }, + { + "epoch": 0.27839909661447587, + "grad_norm": 0.15473975241184235, + "learning_rate": 4.721608142051583e-06, + "loss": 0.9683, + "step": 38460 + }, + { + "epoch": 0.27847148327506205, + "grad_norm": 0.15741673111915588, + "learning_rate": 4.721535755390997e-06, + "loss": 0.9775, + "step": 38470 + }, + { + "epoch": 0.2785438699356483, + "grad_norm": 0.1729719191789627, + "learning_rate": 4.721463368730411e-06, + "loss": 0.973, + "step": 38480 + }, + { + "epoch": 0.27861625659623446, + "grad_norm": 0.16401894390583038, + "learning_rate": 4.721390982069824e-06, + "loss": 0.9589, + "step": 38490 + }, + { + "epoch": 0.27868864325682063, + "grad_norm": 0.16134223341941833, + "learning_rate": 4.721318595409239e-06, + "loss": 0.9797, + "step": 38500 + }, + { + "epoch": 0.2787610299174068, + "grad_norm": 0.1627780795097351, + "learning_rate": 4.721246208748652e-06, + "loss": 0.976, + "step": 38510 + }, + { + "epoch": 0.278833416577993, + "grad_norm": 0.15430185198783875, + "learning_rate": 4.721173822088066e-06, + "loss": 0.9683, + "step": 38520 + }, + { + "epoch": 0.2789058032385792, + "grad_norm": 0.18733610212802887, + "learning_rate": 4.7211014354274795e-06, + "loss": 0.9688, + "step": 38530 + }, + { + "epoch": 0.2789781898991654, + "grad_norm": 0.16492965817451477, + "learning_rate": 4.721029048766894e-06, + "loss": 0.9683, + "step": 38540 + }, + { + "epoch": 0.2790505765597516, + "grad_norm": 0.1765565425157547, + "learning_rate": 4.720956662106308e-06, + "loss": 0.9645, + "step": 38550 + }, + { + "epoch": 0.27912296322033775, + "grad_norm": 0.18229708075523376, + "learning_rate": 4.720884275445721e-06, + "loss": 0.9562, + "step": 38560 + }, + { + "epoch": 0.2791953498809239, + "grad_norm": 0.175114244222641, + "learning_rate": 4.720811888785135e-06, + "loss": 0.959, + "step": 38570 + }, + { + "epoch": 0.2792677365415101, + "grad_norm": 0.18212807178497314, + "learning_rate": 4.720739502124549e-06, + "loss": 0.9662, + "step": 38580 + }, + { + "epoch": 0.27934012320209634, + "grad_norm": 0.20221389830112457, + "learning_rate": 4.720667115463963e-06, + "loss": 0.9644, + "step": 38590 + }, + { + "epoch": 0.2794125098626825, + "grad_norm": 0.16933338344097137, + "learning_rate": 4.7205947288033766e-06, + "loss": 0.9783, + "step": 38600 + }, + { + "epoch": 0.2794848965232687, + "grad_norm": 0.16471229493618011, + "learning_rate": 4.72052234214279e-06, + "loss": 0.9816, + "step": 38610 + }, + { + "epoch": 0.27955728318385487, + "grad_norm": 0.16443496942520142, + "learning_rate": 4.720449955482204e-06, + "loss": 0.9757, + "step": 38620 + }, + { + "epoch": 0.27962966984444104, + "grad_norm": 0.17601965367794037, + "learning_rate": 4.720377568821618e-06, + "loss": 0.9697, + "step": 38630 + }, + { + "epoch": 0.2797020565050273, + "grad_norm": 0.1578618586063385, + "learning_rate": 4.720305182161032e-06, + "loss": 0.9539, + "step": 38640 + }, + { + "epoch": 0.27977444316561345, + "grad_norm": 0.1648857295513153, + "learning_rate": 4.7202327955004455e-06, + "loss": 0.9604, + "step": 38650 + }, + { + "epoch": 0.27984682982619963, + "grad_norm": 0.1769489347934723, + "learning_rate": 4.720160408839859e-06, + "loss": 0.9612, + "step": 38660 + }, + { + "epoch": 0.2799192164867858, + "grad_norm": 0.17651750147342682, + "learning_rate": 4.7200880221792736e-06, + "loss": 0.976, + "step": 38670 + }, + { + "epoch": 0.279991603147372, + "grad_norm": 0.1547250896692276, + "learning_rate": 4.720015635518687e-06, + "loss": 0.9679, + "step": 38680 + }, + { + "epoch": 0.2800639898079582, + "grad_norm": 0.15348443388938904, + "learning_rate": 4.719943248858101e-06, + "loss": 0.9766, + "step": 38690 + }, + { + "epoch": 0.2801363764685444, + "grad_norm": 0.1610197126865387, + "learning_rate": 4.719870862197514e-06, + "loss": 0.9837, + "step": 38700 + }, + { + "epoch": 0.28020876312913057, + "grad_norm": 0.21418476104736328, + "learning_rate": 4.719798475536929e-06, + "loss": 0.9665, + "step": 38710 + }, + { + "epoch": 0.28028114978971674, + "grad_norm": 0.17171064019203186, + "learning_rate": 4.7197260888763425e-06, + "loss": 0.9687, + "step": 38720 + }, + { + "epoch": 0.2803535364503029, + "grad_norm": 0.1554517149925232, + "learning_rate": 4.719653702215756e-06, + "loss": 0.9636, + "step": 38730 + }, + { + "epoch": 0.28042592311088915, + "grad_norm": 0.16150425374507904, + "learning_rate": 4.71958131555517e-06, + "loss": 0.978, + "step": 38740 + }, + { + "epoch": 0.28049830977147533, + "grad_norm": 0.16200552880764008, + "learning_rate": 4.719508928894584e-06, + "loss": 0.9618, + "step": 38750 + }, + { + "epoch": 0.2805706964320615, + "grad_norm": 0.16305282711982727, + "learning_rate": 4.719436542233998e-06, + "loss": 0.9762, + "step": 38760 + }, + { + "epoch": 0.2806430830926477, + "grad_norm": 0.16002973914146423, + "learning_rate": 4.719364155573411e-06, + "loss": 0.9829, + "step": 38770 + }, + { + "epoch": 0.28071546975323386, + "grad_norm": 0.1760619580745697, + "learning_rate": 4.719291768912825e-06, + "loss": 0.9737, + "step": 38780 + }, + { + "epoch": 0.28078785641382004, + "grad_norm": 0.15010814368724823, + "learning_rate": 4.7192193822522395e-06, + "loss": 0.9658, + "step": 38790 + }, + { + "epoch": 0.28086024307440627, + "grad_norm": 0.1825767457485199, + "learning_rate": 4.719146995591653e-06, + "loss": 0.975, + "step": 38800 + }, + { + "epoch": 0.28093262973499245, + "grad_norm": 0.17386701703071594, + "learning_rate": 4.719074608931067e-06, + "loss": 0.9684, + "step": 38810 + }, + { + "epoch": 0.2810050163955786, + "grad_norm": 0.15754207968711853, + "learning_rate": 4.71900222227048e-06, + "loss": 0.9836, + "step": 38820 + }, + { + "epoch": 0.2810774030561648, + "grad_norm": 0.1641159951686859, + "learning_rate": 4.718929835609894e-06, + "loss": 0.9741, + "step": 38830 + }, + { + "epoch": 0.281149789716751, + "grad_norm": 0.16686658561229706, + "learning_rate": 4.7188574489493076e-06, + "loss": 0.9835, + "step": 38840 + }, + { + "epoch": 0.2812221763773372, + "grad_norm": 0.17113393545150757, + "learning_rate": 4.718785062288721e-06, + "loss": 0.9675, + "step": 38850 + }, + { + "epoch": 0.2812945630379234, + "grad_norm": 0.19541935622692108, + "learning_rate": 4.718712675628136e-06, + "loss": 0.971, + "step": 38860 + }, + { + "epoch": 0.28136694969850956, + "grad_norm": 0.171500563621521, + "learning_rate": 4.718640288967549e-06, + "loss": 0.9693, + "step": 38870 + }, + { + "epoch": 0.28143933635909574, + "grad_norm": 0.1516040712594986, + "learning_rate": 4.718567902306963e-06, + "loss": 0.9637, + "step": 38880 + }, + { + "epoch": 0.2815117230196819, + "grad_norm": 0.1661519557237625, + "learning_rate": 4.7184955156463765e-06, + "loss": 0.9658, + "step": 38890 + }, + { + "epoch": 0.28158410968026815, + "grad_norm": 0.16510553658008575, + "learning_rate": 4.718423128985791e-06, + "loss": 0.9776, + "step": 38900 + }, + { + "epoch": 0.2816564963408543, + "grad_norm": 0.386662095785141, + "learning_rate": 4.718350742325205e-06, + "loss": 0.964, + "step": 38910 + }, + { + "epoch": 0.2817288830014405, + "grad_norm": 0.1649596393108368, + "learning_rate": 4.718278355664618e-06, + "loss": 0.9717, + "step": 38920 + }, + { + "epoch": 0.2818012696620267, + "grad_norm": 0.17319610714912415, + "learning_rate": 4.718205969004032e-06, + "loss": 0.9752, + "step": 38930 + }, + { + "epoch": 0.28187365632261285, + "grad_norm": 0.16159221529960632, + "learning_rate": 4.718133582343446e-06, + "loss": 0.9815, + "step": 38940 + }, + { + "epoch": 0.28194604298319903, + "grad_norm": 0.17223593592643738, + "learning_rate": 4.71806119568286e-06, + "loss": 0.9752, + "step": 38950 + }, + { + "epoch": 0.28201842964378526, + "grad_norm": 0.15941330790519714, + "learning_rate": 4.7179888090222735e-06, + "loss": 0.97, + "step": 38960 + }, + { + "epoch": 0.28209081630437144, + "grad_norm": 0.17768092453479767, + "learning_rate": 4.717916422361687e-06, + "loss": 0.9608, + "step": 38970 + }, + { + "epoch": 0.2821632029649576, + "grad_norm": 0.18235769867897034, + "learning_rate": 4.717844035701102e-06, + "loss": 0.9795, + "step": 38980 + }, + { + "epoch": 0.2822355896255438, + "grad_norm": 0.1654520183801651, + "learning_rate": 4.717771649040515e-06, + "loss": 0.9743, + "step": 38990 + }, + { + "epoch": 0.28230797628612997, + "grad_norm": 0.17783403396606445, + "learning_rate": 4.717699262379929e-06, + "loss": 0.977, + "step": 39000 + }, + { + "epoch": 0.2823803629467162, + "grad_norm": 0.15796136856079102, + "learning_rate": 4.7176268757193424e-06, + "loss": 0.9702, + "step": 39010 + }, + { + "epoch": 0.2824527496073024, + "grad_norm": 0.16155339777469635, + "learning_rate": 4.717554489058757e-06, + "loss": 0.9734, + "step": 39020 + }, + { + "epoch": 0.28252513626788855, + "grad_norm": 0.16858787834644318, + "learning_rate": 4.7174821023981705e-06, + "loss": 0.9644, + "step": 39030 + }, + { + "epoch": 0.28259752292847473, + "grad_norm": 0.17033177614212036, + "learning_rate": 4.717409715737584e-06, + "loss": 0.981, + "step": 39040 + }, + { + "epoch": 0.2826699095890609, + "grad_norm": 0.17069222033023834, + "learning_rate": 4.717337329076998e-06, + "loss": 0.974, + "step": 39050 + }, + { + "epoch": 0.28274229624964714, + "grad_norm": 0.17629200220108032, + "learning_rate": 4.717264942416412e-06, + "loss": 0.9717, + "step": 39060 + }, + { + "epoch": 0.2828146829102333, + "grad_norm": 0.1703384816646576, + "learning_rate": 4.717192555755826e-06, + "loss": 0.9781, + "step": 39070 + }, + { + "epoch": 0.2828870695708195, + "grad_norm": 0.16142185032367706, + "learning_rate": 4.7171201690952394e-06, + "loss": 0.9576, + "step": 39080 + }, + { + "epoch": 0.28295945623140567, + "grad_norm": 0.1647561937570572, + "learning_rate": 4.717047782434653e-06, + "loss": 0.9738, + "step": 39090 + }, + { + "epoch": 0.28303184289199185, + "grad_norm": 0.16790105402469635, + "learning_rate": 4.7169753957740675e-06, + "loss": 0.9632, + "step": 39100 + }, + { + "epoch": 0.283104229552578, + "grad_norm": 0.16049116849899292, + "learning_rate": 4.716903009113481e-06, + "loss": 0.9809, + "step": 39110 + }, + { + "epoch": 0.28317661621316426, + "grad_norm": 0.16163350641727448, + "learning_rate": 4.716830622452895e-06, + "loss": 0.9568, + "step": 39120 + }, + { + "epoch": 0.28324900287375043, + "grad_norm": 0.16700412333011627, + "learning_rate": 4.716758235792308e-06, + "loss": 0.9724, + "step": 39130 + }, + { + "epoch": 0.2833213895343366, + "grad_norm": 0.17913348972797394, + "learning_rate": 4.716685849131723e-06, + "loss": 0.9634, + "step": 39140 + }, + { + "epoch": 0.2833937761949228, + "grad_norm": 0.1574385166168213, + "learning_rate": 4.7166134624711365e-06, + "loss": 0.98, + "step": 39150 + }, + { + "epoch": 0.28346616285550896, + "grad_norm": 0.15847839415073395, + "learning_rate": 4.71654107581055e-06, + "loss": 0.9686, + "step": 39160 + }, + { + "epoch": 0.2835385495160952, + "grad_norm": 0.18322902917861938, + "learning_rate": 4.716468689149964e-06, + "loss": 0.9731, + "step": 39170 + }, + { + "epoch": 0.28361093617668137, + "grad_norm": 0.155546173453331, + "learning_rate": 4.716396302489378e-06, + "loss": 0.9539, + "step": 39180 + }, + { + "epoch": 0.28368332283726755, + "grad_norm": 0.17779536545276642, + "learning_rate": 4.716323915828792e-06, + "loss": 0.9655, + "step": 39190 + }, + { + "epoch": 0.2837557094978537, + "grad_norm": 0.1730605959892273, + "learning_rate": 4.716251529168205e-06, + "loss": 0.9726, + "step": 39200 + }, + { + "epoch": 0.2838280961584399, + "grad_norm": 0.19149181246757507, + "learning_rate": 4.716179142507619e-06, + "loss": 0.9723, + "step": 39210 + }, + { + "epoch": 0.28390048281902613, + "grad_norm": 0.15683695673942566, + "learning_rate": 4.716106755847033e-06, + "loss": 0.974, + "step": 39220 + }, + { + "epoch": 0.2839728694796123, + "grad_norm": 0.1871178299188614, + "learning_rate": 4.716034369186447e-06, + "loss": 0.9814, + "step": 39230 + }, + { + "epoch": 0.2840452561401985, + "grad_norm": 0.25293681025505066, + "learning_rate": 4.715961982525861e-06, + "loss": 0.9697, + "step": 39240 + }, + { + "epoch": 0.28411764280078466, + "grad_norm": 0.18877732753753662, + "learning_rate": 4.715889595865274e-06, + "loss": 0.9743, + "step": 39250 + }, + { + "epoch": 0.28419002946137084, + "grad_norm": 0.17080391943454742, + "learning_rate": 4.715817209204688e-06, + "loss": 0.9627, + "step": 39260 + }, + { + "epoch": 0.2842624161219571, + "grad_norm": 0.1600443720817566, + "learning_rate": 4.715744822544102e-06, + "loss": 0.9624, + "step": 39270 + }, + { + "epoch": 0.28433480278254325, + "grad_norm": 0.18293151259422302, + "learning_rate": 4.715672435883516e-06, + "loss": 0.9775, + "step": 39280 + }, + { + "epoch": 0.2844071894431294, + "grad_norm": 0.17442253232002258, + "learning_rate": 4.71560004922293e-06, + "loss": 0.9586, + "step": 39290 + }, + { + "epoch": 0.2844795761037156, + "grad_norm": 0.1556355059146881, + "learning_rate": 4.715527662562343e-06, + "loss": 0.9798, + "step": 39300 + }, + { + "epoch": 0.2845519627643018, + "grad_norm": 0.18220852315425873, + "learning_rate": 4.715455275901758e-06, + "loss": 0.97, + "step": 39310 + }, + { + "epoch": 0.28462434942488796, + "grad_norm": 0.1559944748878479, + "learning_rate": 4.715382889241171e-06, + "loss": 0.9807, + "step": 39320 + }, + { + "epoch": 0.2846967360854742, + "grad_norm": 0.1595684438943863, + "learning_rate": 4.715310502580585e-06, + "loss": 0.9567, + "step": 39330 + }, + { + "epoch": 0.28476912274606037, + "grad_norm": 0.18207839131355286, + "learning_rate": 4.7152381159199985e-06, + "loss": 0.9801, + "step": 39340 + }, + { + "epoch": 0.28484150940664654, + "grad_norm": 0.1691935509443283, + "learning_rate": 4.715165729259413e-06, + "loss": 0.9671, + "step": 39350 + }, + { + "epoch": 0.2849138960672327, + "grad_norm": 0.16009429097175598, + "learning_rate": 4.715093342598826e-06, + "loss": 0.9612, + "step": 39360 + }, + { + "epoch": 0.2849862827278189, + "grad_norm": 0.16721150279045105, + "learning_rate": 4.71502095593824e-06, + "loss": 0.9633, + "step": 39370 + }, + { + "epoch": 0.28505866938840513, + "grad_norm": 0.16945244371891022, + "learning_rate": 4.714948569277654e-06, + "loss": 0.9679, + "step": 39380 + }, + { + "epoch": 0.2851310560489913, + "grad_norm": 0.16579945385456085, + "learning_rate": 4.7148761826170675e-06, + "loss": 0.9525, + "step": 39390 + }, + { + "epoch": 0.2852034427095775, + "grad_norm": 0.17413263022899628, + "learning_rate": 4.714803795956481e-06, + "loss": 0.9631, + "step": 39400 + }, + { + "epoch": 0.28527582937016366, + "grad_norm": 0.16317112743854523, + "learning_rate": 4.714731409295895e-06, + "loss": 0.9699, + "step": 39410 + }, + { + "epoch": 0.28534821603074983, + "grad_norm": 0.16122910380363464, + "learning_rate": 4.714659022635309e-06, + "loss": 0.9658, + "step": 39420 + }, + { + "epoch": 0.28542060269133607, + "grad_norm": 0.17023932933807373, + "learning_rate": 4.714586635974723e-06, + "loss": 0.9697, + "step": 39430 + }, + { + "epoch": 0.28549298935192224, + "grad_norm": 0.17641928791999817, + "learning_rate": 4.714514249314136e-06, + "loss": 0.9823, + "step": 39440 + }, + { + "epoch": 0.2855653760125084, + "grad_norm": 0.16606158018112183, + "learning_rate": 4.71444186265355e-06, + "loss": 0.9721, + "step": 39450 + }, + { + "epoch": 0.2856377626730946, + "grad_norm": 0.1534787118434906, + "learning_rate": 4.7143694759929645e-06, + "loss": 0.976, + "step": 39460 + }, + { + "epoch": 0.2857101493336808, + "grad_norm": 0.15776877105236053, + "learning_rate": 4.714297089332378e-06, + "loss": 0.9708, + "step": 39470 + }, + { + "epoch": 0.28578253599426695, + "grad_norm": 0.1588585376739502, + "learning_rate": 4.714224702671792e-06, + "loss": 0.9766, + "step": 39480 + }, + { + "epoch": 0.2858549226548532, + "grad_norm": 0.16405107080936432, + "learning_rate": 4.714152316011205e-06, + "loss": 0.9766, + "step": 39490 + }, + { + "epoch": 0.28592730931543936, + "grad_norm": 0.15826919674873352, + "learning_rate": 4.71407992935062e-06, + "loss": 0.987, + "step": 39500 + }, + { + "epoch": 0.28599969597602554, + "grad_norm": 0.166813924908638, + "learning_rate": 4.714007542690033e-06, + "loss": 0.9616, + "step": 39510 + }, + { + "epoch": 0.2860720826366117, + "grad_norm": 0.15735673904418945, + "learning_rate": 4.713935156029447e-06, + "loss": 0.9719, + "step": 39520 + }, + { + "epoch": 0.2861444692971979, + "grad_norm": 0.16577894985675812, + "learning_rate": 4.713862769368861e-06, + "loss": 0.9661, + "step": 39530 + }, + { + "epoch": 0.2862168559577841, + "grad_norm": 0.1636783629655838, + "learning_rate": 4.713790382708275e-06, + "loss": 0.966, + "step": 39540 + }, + { + "epoch": 0.2862892426183703, + "grad_norm": 0.16773077845573425, + "learning_rate": 4.713717996047689e-06, + "loss": 0.9675, + "step": 39550 + }, + { + "epoch": 0.2863616292789565, + "grad_norm": 0.16528858244419098, + "learning_rate": 4.713645609387102e-06, + "loss": 0.953, + "step": 39560 + }, + { + "epoch": 0.28643401593954265, + "grad_norm": 0.178873211145401, + "learning_rate": 4.713573222726516e-06, + "loss": 0.9768, + "step": 39570 + }, + { + "epoch": 0.28650640260012883, + "grad_norm": 0.18183675408363342, + "learning_rate": 4.71350083606593e-06, + "loss": 0.9704, + "step": 39580 + }, + { + "epoch": 0.28657878926071506, + "grad_norm": 0.16120202839374542, + "learning_rate": 4.713428449405344e-06, + "loss": 0.9816, + "step": 39590 + }, + { + "epoch": 0.28665117592130124, + "grad_norm": 0.20635010302066803, + "learning_rate": 4.713356062744758e-06, + "loss": 0.9583, + "step": 39600 + }, + { + "epoch": 0.2867235625818874, + "grad_norm": 0.1598854809999466, + "learning_rate": 4.713283676084171e-06, + "loss": 0.9664, + "step": 39610 + }, + { + "epoch": 0.2867959492424736, + "grad_norm": 0.16559354960918427, + "learning_rate": 4.713211289423586e-06, + "loss": 0.9564, + "step": 39620 + }, + { + "epoch": 0.28686833590305977, + "grad_norm": 0.18220894038677216, + "learning_rate": 4.713138902762999e-06, + "loss": 0.9688, + "step": 39630 + }, + { + "epoch": 0.28694072256364594, + "grad_norm": 0.16989223659038544, + "learning_rate": 4.713066516102413e-06, + "loss": 0.9781, + "step": 39640 + }, + { + "epoch": 0.2870131092242322, + "grad_norm": 0.16962113976478577, + "learning_rate": 4.712994129441827e-06, + "loss": 0.9771, + "step": 39650 + }, + { + "epoch": 0.28708549588481835, + "grad_norm": 0.16623146831989288, + "learning_rate": 4.712921742781241e-06, + "loss": 0.9551, + "step": 39660 + }, + { + "epoch": 0.28715788254540453, + "grad_norm": 0.1545698344707489, + "learning_rate": 4.712849356120655e-06, + "loss": 0.9565, + "step": 39670 + }, + { + "epoch": 0.2872302692059907, + "grad_norm": 0.19971857964992523, + "learning_rate": 4.712776969460068e-06, + "loss": 0.9566, + "step": 39680 + }, + { + "epoch": 0.2873026558665769, + "grad_norm": 0.1566108763217926, + "learning_rate": 4.712704582799482e-06, + "loss": 0.9696, + "step": 39690 + }, + { + "epoch": 0.2873750425271631, + "grad_norm": 0.16341611742973328, + "learning_rate": 4.712632196138896e-06, + "loss": 0.9674, + "step": 39700 + }, + { + "epoch": 0.2874474291877493, + "grad_norm": 0.18505293130874634, + "learning_rate": 4.71255980947831e-06, + "loss": 0.9693, + "step": 39710 + }, + { + "epoch": 0.28751981584833547, + "grad_norm": 0.167031392455101, + "learning_rate": 4.712487422817724e-06, + "loss": 0.9645, + "step": 39720 + }, + { + "epoch": 0.28759220250892165, + "grad_norm": 0.17869937419891357, + "learning_rate": 4.712415036157137e-06, + "loss": 0.9805, + "step": 39730 + }, + { + "epoch": 0.2876645891695078, + "grad_norm": 0.16606245934963226, + "learning_rate": 4.712342649496552e-06, + "loss": 0.9612, + "step": 39740 + }, + { + "epoch": 0.28773697583009405, + "grad_norm": 0.19421431422233582, + "learning_rate": 4.712270262835965e-06, + "loss": 0.9731, + "step": 39750 + }, + { + "epoch": 0.28780936249068023, + "grad_norm": 0.17621836066246033, + "learning_rate": 4.712197876175379e-06, + "loss": 0.9697, + "step": 39760 + }, + { + "epoch": 0.2878817491512664, + "grad_norm": 0.1548440009355545, + "learning_rate": 4.7121254895147925e-06, + "loss": 0.9666, + "step": 39770 + }, + { + "epoch": 0.2879541358118526, + "grad_norm": 0.18306025862693787, + "learning_rate": 4.712053102854207e-06, + "loss": 0.9628, + "step": 39780 + }, + { + "epoch": 0.28802652247243876, + "grad_norm": 0.16512498259544373, + "learning_rate": 4.711980716193621e-06, + "loss": 0.968, + "step": 39790 + }, + { + "epoch": 0.28809890913302494, + "grad_norm": 0.17831869423389435, + "learning_rate": 4.711908329533034e-06, + "loss": 0.9739, + "step": 39800 + }, + { + "epoch": 0.28817129579361117, + "grad_norm": 0.15997859835624695, + "learning_rate": 4.711835942872448e-06, + "loss": 0.9684, + "step": 39810 + }, + { + "epoch": 0.28824368245419735, + "grad_norm": 0.1619550734758377, + "learning_rate": 4.711763556211862e-06, + "loss": 0.9792, + "step": 39820 + }, + { + "epoch": 0.2883160691147835, + "grad_norm": 0.1704218089580536, + "learning_rate": 4.711691169551276e-06, + "loss": 0.9676, + "step": 39830 + }, + { + "epoch": 0.2883884557753697, + "grad_norm": 0.17263592779636383, + "learning_rate": 4.7116187828906895e-06, + "loss": 0.9774, + "step": 39840 + }, + { + "epoch": 0.2884608424359559, + "grad_norm": 0.1720539629459381, + "learning_rate": 4.711546396230103e-06, + "loss": 0.9691, + "step": 39850 + }, + { + "epoch": 0.2885332290965421, + "grad_norm": 0.2420406937599182, + "learning_rate": 4.711474009569517e-06, + "loss": 0.9788, + "step": 39860 + }, + { + "epoch": 0.2886056157571283, + "grad_norm": 0.17428769171237946, + "learning_rate": 4.711401622908931e-06, + "loss": 0.9727, + "step": 39870 + }, + { + "epoch": 0.28867800241771446, + "grad_norm": 0.17469775676727295, + "learning_rate": 4.711329236248345e-06, + "loss": 0.9748, + "step": 39880 + }, + { + "epoch": 0.28875038907830064, + "grad_norm": 0.17111310362815857, + "learning_rate": 4.7112568495877585e-06, + "loss": 0.9626, + "step": 39890 + }, + { + "epoch": 0.2888227757388868, + "grad_norm": 0.19012178480625153, + "learning_rate": 4.711184462927172e-06, + "loss": 0.9675, + "step": 39900 + }, + { + "epoch": 0.28889516239947305, + "grad_norm": 0.16587407886981964, + "learning_rate": 4.711112076266586e-06, + "loss": 0.9665, + "step": 39910 + }, + { + "epoch": 0.2889675490600592, + "grad_norm": 0.17747098207473755, + "learning_rate": 4.711039689605999e-06, + "loss": 0.975, + "step": 39920 + }, + { + "epoch": 0.2890399357206454, + "grad_norm": 0.15955297648906708, + "learning_rate": 4.710967302945414e-06, + "loss": 0.9633, + "step": 39930 + }, + { + "epoch": 0.2891123223812316, + "grad_norm": 0.1651538610458374, + "learning_rate": 4.710894916284827e-06, + "loss": 0.9611, + "step": 39940 + }, + { + "epoch": 0.28918470904181776, + "grad_norm": 0.15846842527389526, + "learning_rate": 4.710822529624241e-06, + "loss": 0.9699, + "step": 39950 + }, + { + "epoch": 0.289257095702404, + "grad_norm": 0.17200906574726105, + "learning_rate": 4.710750142963655e-06, + "loss": 0.9759, + "step": 39960 + }, + { + "epoch": 0.28932948236299016, + "grad_norm": 0.18310053646564484, + "learning_rate": 4.710677756303069e-06, + "loss": 0.9855, + "step": 39970 + }, + { + "epoch": 0.28940186902357634, + "grad_norm": 0.17805758118629456, + "learning_rate": 4.710605369642483e-06, + "loss": 0.9754, + "step": 39980 + }, + { + "epoch": 0.2894742556841625, + "grad_norm": 0.164411723613739, + "learning_rate": 4.710532982981896e-06, + "loss": 0.9643, + "step": 39990 + }, + { + "epoch": 0.2895466423447487, + "grad_norm": 0.17421521246433258, + "learning_rate": 4.71046059632131e-06, + "loss": 0.9709, + "step": 40000 + }, + { + "epoch": 0.28961902900533487, + "grad_norm": 0.17790335416793823, + "learning_rate": 4.710388209660724e-06, + "loss": 0.9615, + "step": 40010 + }, + { + "epoch": 0.2896914156659211, + "grad_norm": 0.17614027857780457, + "learning_rate": 4.710315823000138e-06, + "loss": 0.972, + "step": 40020 + }, + { + "epoch": 0.2897638023265073, + "grad_norm": 0.17423102259635925, + "learning_rate": 4.710243436339552e-06, + "loss": 0.9799, + "step": 40030 + }, + { + "epoch": 0.28983618898709346, + "grad_norm": 0.16734719276428223, + "learning_rate": 4.710171049678965e-06, + "loss": 0.9802, + "step": 40040 + }, + { + "epoch": 0.28990857564767963, + "grad_norm": 0.18581393361091614, + "learning_rate": 4.710098663018379e-06, + "loss": 0.9651, + "step": 40050 + }, + { + "epoch": 0.2899809623082658, + "grad_norm": 0.16907867789268494, + "learning_rate": 4.710026276357793e-06, + "loss": 0.981, + "step": 40060 + }, + { + "epoch": 0.29005334896885204, + "grad_norm": 0.164125457406044, + "learning_rate": 4.709953889697207e-06, + "loss": 0.9739, + "step": 40070 + }, + { + "epoch": 0.2901257356294382, + "grad_norm": 0.1936989575624466, + "learning_rate": 4.7098815030366205e-06, + "loss": 0.9565, + "step": 40080 + }, + { + "epoch": 0.2901981222900244, + "grad_norm": 0.16564348340034485, + "learning_rate": 4.709809116376034e-06, + "loss": 0.9671, + "step": 40090 + }, + { + "epoch": 0.2902705089506106, + "grad_norm": 0.17586882412433624, + "learning_rate": 4.709736729715449e-06, + "loss": 0.9709, + "step": 40100 + }, + { + "epoch": 0.29034289561119675, + "grad_norm": 0.18018198013305664, + "learning_rate": 4.709664343054862e-06, + "loss": 0.9725, + "step": 40110 + }, + { + "epoch": 0.290415282271783, + "grad_norm": 0.1636233627796173, + "learning_rate": 4.709591956394276e-06, + "loss": 0.9762, + "step": 40120 + }, + { + "epoch": 0.29048766893236916, + "grad_norm": 0.1633533090353012, + "learning_rate": 4.7095195697336895e-06, + "loss": 0.972, + "step": 40130 + }, + { + "epoch": 0.29056005559295534, + "grad_norm": 0.18192791938781738, + "learning_rate": 4.709447183073104e-06, + "loss": 0.9612, + "step": 40140 + }, + { + "epoch": 0.2906324422535415, + "grad_norm": 0.17899948358535767, + "learning_rate": 4.7093747964125176e-06, + "loss": 0.9708, + "step": 40150 + }, + { + "epoch": 0.2907048289141277, + "grad_norm": 0.17250950634479523, + "learning_rate": 4.709302409751931e-06, + "loss": 0.9764, + "step": 40160 + }, + { + "epoch": 0.29077721557471387, + "grad_norm": 0.1687665432691574, + "learning_rate": 4.709230023091345e-06, + "loss": 0.9702, + "step": 40170 + }, + { + "epoch": 0.2908496022353001, + "grad_norm": 0.1669207513332367, + "learning_rate": 4.709157636430759e-06, + "loss": 0.9824, + "step": 40180 + }, + { + "epoch": 0.2909219888958863, + "grad_norm": 0.16087938845157623, + "learning_rate": 4.709085249770173e-06, + "loss": 0.9607, + "step": 40190 + }, + { + "epoch": 0.29099437555647245, + "grad_norm": 0.18034091591835022, + "learning_rate": 4.7090128631095865e-06, + "loss": 0.9671, + "step": 40200 + }, + { + "epoch": 0.2910667622170586, + "grad_norm": 0.1801455020904541, + "learning_rate": 4.708940476449e-06, + "loss": 0.975, + "step": 40210 + }, + { + "epoch": 0.2911391488776448, + "grad_norm": 0.16992124915122986, + "learning_rate": 4.7088680897884146e-06, + "loss": 0.9597, + "step": 40220 + }, + { + "epoch": 0.29121153553823104, + "grad_norm": 0.1644294410943985, + "learning_rate": 4.708795703127828e-06, + "loss": 0.97, + "step": 40230 + }, + { + "epoch": 0.2912839221988172, + "grad_norm": 0.16996628046035767, + "learning_rate": 4.708723316467242e-06, + "loss": 0.9661, + "step": 40240 + }, + { + "epoch": 0.2913563088594034, + "grad_norm": 0.1804681122303009, + "learning_rate": 4.708650929806655e-06, + "loss": 0.9691, + "step": 40250 + }, + { + "epoch": 0.29142869551998957, + "grad_norm": 0.1625206172466278, + "learning_rate": 4.70857854314607e-06, + "loss": 0.9725, + "step": 40260 + }, + { + "epoch": 0.29150108218057574, + "grad_norm": 0.15384940803050995, + "learning_rate": 4.7085061564854835e-06, + "loss": 0.9657, + "step": 40270 + }, + { + "epoch": 0.291573468841162, + "grad_norm": 0.18380320072174072, + "learning_rate": 4.708433769824897e-06, + "loss": 0.9646, + "step": 40280 + }, + { + "epoch": 0.29164585550174815, + "grad_norm": 0.17061907052993774, + "learning_rate": 4.708361383164311e-06, + "loss": 0.9681, + "step": 40290 + }, + { + "epoch": 0.29171824216233433, + "grad_norm": 0.18094012141227722, + "learning_rate": 4.708288996503725e-06, + "loss": 0.9638, + "step": 40300 + }, + { + "epoch": 0.2917906288229205, + "grad_norm": 0.1657949686050415, + "learning_rate": 4.708216609843139e-06, + "loss": 0.9689, + "step": 40310 + }, + { + "epoch": 0.2918630154835067, + "grad_norm": 0.28880590200424194, + "learning_rate": 4.708144223182552e-06, + "loss": 0.9702, + "step": 40320 + }, + { + "epoch": 0.29193540214409286, + "grad_norm": 0.16735604405403137, + "learning_rate": 4.708071836521966e-06, + "loss": 0.9845, + "step": 40330 + }, + { + "epoch": 0.2920077888046791, + "grad_norm": 0.17504291236400604, + "learning_rate": 4.7079994498613805e-06, + "loss": 0.9572, + "step": 40340 + }, + { + "epoch": 0.29208017546526527, + "grad_norm": 0.15563970804214478, + "learning_rate": 4.707927063200794e-06, + "loss": 0.9574, + "step": 40350 + }, + { + "epoch": 0.29215256212585144, + "grad_norm": 0.15227772295475006, + "learning_rate": 4.707854676540208e-06, + "loss": 0.9612, + "step": 40360 + }, + { + "epoch": 0.2922249487864376, + "grad_norm": 0.16515760123729706, + "learning_rate": 4.707782289879621e-06, + "loss": 0.9519, + "step": 40370 + }, + { + "epoch": 0.2922973354470238, + "grad_norm": 0.17312021553516388, + "learning_rate": 4.707709903219036e-06, + "loss": 0.9707, + "step": 40380 + }, + { + "epoch": 0.29236972210761003, + "grad_norm": 0.16830036044120789, + "learning_rate": 4.7076375165584494e-06, + "loss": 0.9557, + "step": 40390 + }, + { + "epoch": 0.2924421087681962, + "grad_norm": 0.15725110471248627, + "learning_rate": 4.707565129897863e-06, + "loss": 0.9652, + "step": 40400 + }, + { + "epoch": 0.2925144954287824, + "grad_norm": 0.16694726049900055, + "learning_rate": 4.707492743237277e-06, + "loss": 0.9682, + "step": 40410 + }, + { + "epoch": 0.29258688208936856, + "grad_norm": 0.16128145158290863, + "learning_rate": 4.70742035657669e-06, + "loss": 0.9658, + "step": 40420 + }, + { + "epoch": 0.29265926874995474, + "grad_norm": 0.16756875813007355, + "learning_rate": 4.707347969916104e-06, + "loss": 0.9768, + "step": 40430 + }, + { + "epoch": 0.29273165541054097, + "grad_norm": 0.1622963845729828, + "learning_rate": 4.7072755832555175e-06, + "loss": 0.9754, + "step": 40440 + }, + { + "epoch": 0.29280404207112715, + "grad_norm": 0.15665726363658905, + "learning_rate": 4.707203196594932e-06, + "loss": 0.9553, + "step": 40450 + }, + { + "epoch": 0.2928764287317133, + "grad_norm": 0.17446359992027283, + "learning_rate": 4.707130809934346e-06, + "loss": 0.9724, + "step": 40460 + }, + { + "epoch": 0.2929488153922995, + "grad_norm": 0.16420406103134155, + "learning_rate": 4.707058423273759e-06, + "loss": 0.972, + "step": 40470 + }, + { + "epoch": 0.2930212020528857, + "grad_norm": 0.1752181053161621, + "learning_rate": 4.706986036613173e-06, + "loss": 0.9645, + "step": 40480 + }, + { + "epoch": 0.2930935887134719, + "grad_norm": 0.16292783617973328, + "learning_rate": 4.706913649952587e-06, + "loss": 0.9608, + "step": 40490 + }, + { + "epoch": 0.2931659753740581, + "grad_norm": 0.15894421935081482, + "learning_rate": 4.706841263292001e-06, + "loss": 0.955, + "step": 40500 + }, + { + "epoch": 0.29323836203464426, + "grad_norm": 0.1527421772480011, + "learning_rate": 4.7067688766314145e-06, + "loss": 0.9607, + "step": 40510 + }, + { + "epoch": 0.29331074869523044, + "grad_norm": 0.1576659232378006, + "learning_rate": 4.706696489970828e-06, + "loss": 0.9614, + "step": 40520 + }, + { + "epoch": 0.2933831353558166, + "grad_norm": 0.1652728021144867, + "learning_rate": 4.706624103310243e-06, + "loss": 0.9544, + "step": 40530 + }, + { + "epoch": 0.2934555220164028, + "grad_norm": 0.16338014602661133, + "learning_rate": 4.706551716649656e-06, + "loss": 0.9539, + "step": 40540 + }, + { + "epoch": 0.293527908676989, + "grad_norm": 0.16020265221595764, + "learning_rate": 4.70647932998907e-06, + "loss": 0.9732, + "step": 40550 + }, + { + "epoch": 0.2936002953375752, + "grad_norm": 0.17139659821987152, + "learning_rate": 4.7064069433284834e-06, + "loss": 0.9645, + "step": 40560 + }, + { + "epoch": 0.2936726819981614, + "grad_norm": 0.1736135184764862, + "learning_rate": 4.706334556667898e-06, + "loss": 0.9746, + "step": 40570 + }, + { + "epoch": 0.29374506865874755, + "grad_norm": 0.16610664129257202, + "learning_rate": 4.7062621700073115e-06, + "loss": 0.9673, + "step": 40580 + }, + { + "epoch": 0.29381745531933373, + "grad_norm": 0.195650115609169, + "learning_rate": 4.706189783346725e-06, + "loss": 0.9613, + "step": 40590 + }, + { + "epoch": 0.29388984197991996, + "grad_norm": 0.1645543873310089, + "learning_rate": 4.706117396686139e-06, + "loss": 0.9577, + "step": 40600 + }, + { + "epoch": 0.29396222864050614, + "grad_norm": 0.17229747772216797, + "learning_rate": 4.706045010025553e-06, + "loss": 0.9701, + "step": 40610 + }, + { + "epoch": 0.2940346153010923, + "grad_norm": 0.18451061844825745, + "learning_rate": 4.705972623364967e-06, + "loss": 0.9746, + "step": 40620 + }, + { + "epoch": 0.2941070019616785, + "grad_norm": 0.16431106626987457, + "learning_rate": 4.7059002367043805e-06, + "loss": 0.9575, + "step": 40630 + }, + { + "epoch": 0.29417938862226467, + "grad_norm": 0.15613338351249695, + "learning_rate": 4.705827850043794e-06, + "loss": 0.9684, + "step": 40640 + }, + { + "epoch": 0.2942517752828509, + "grad_norm": 0.1578802615404129, + "learning_rate": 4.705755463383208e-06, + "loss": 0.9641, + "step": 40650 + }, + { + "epoch": 0.2943241619434371, + "grad_norm": 0.1636672019958496, + "learning_rate": 4.705683076722622e-06, + "loss": 0.9686, + "step": 40660 + }, + { + "epoch": 0.29439654860402326, + "grad_norm": 0.2066519856452942, + "learning_rate": 4.705610690062036e-06, + "loss": 0.9649, + "step": 40670 + }, + { + "epoch": 0.29446893526460943, + "grad_norm": 0.16676735877990723, + "learning_rate": 4.705538303401449e-06, + "loss": 0.9666, + "step": 40680 + }, + { + "epoch": 0.2945413219251956, + "grad_norm": 0.17513099312782288, + "learning_rate": 4.705465916740863e-06, + "loss": 0.9768, + "step": 40690 + }, + { + "epoch": 0.2946137085857818, + "grad_norm": 0.17959053814411163, + "learning_rate": 4.7053935300802775e-06, + "loss": 0.9748, + "step": 40700 + }, + { + "epoch": 0.294686095246368, + "grad_norm": 0.16386069357395172, + "learning_rate": 4.705321143419691e-06, + "loss": 0.9679, + "step": 40710 + }, + { + "epoch": 0.2947584819069542, + "grad_norm": 0.16159318387508392, + "learning_rate": 4.705248756759105e-06, + "loss": 0.9673, + "step": 40720 + }, + { + "epoch": 0.29483086856754037, + "grad_norm": 0.16015946865081787, + "learning_rate": 4.705176370098518e-06, + "loss": 0.9542, + "step": 40730 + }, + { + "epoch": 0.29490325522812655, + "grad_norm": 0.16827359795570374, + "learning_rate": 4.705103983437933e-06, + "loss": 0.973, + "step": 40740 + }, + { + "epoch": 0.2949756418887127, + "grad_norm": 0.16736365854740143, + "learning_rate": 4.705031596777346e-06, + "loss": 0.9778, + "step": 40750 + }, + { + "epoch": 0.29504802854929896, + "grad_norm": 0.17061574757099152, + "learning_rate": 4.70495921011676e-06, + "loss": 0.9558, + "step": 40760 + }, + { + "epoch": 0.29512041520988513, + "grad_norm": 0.16562294960021973, + "learning_rate": 4.704886823456174e-06, + "loss": 0.9805, + "step": 40770 + }, + { + "epoch": 0.2951928018704713, + "grad_norm": 0.17279978096485138, + "learning_rate": 4.704814436795588e-06, + "loss": 0.9656, + "step": 40780 + }, + { + "epoch": 0.2952651885310575, + "grad_norm": 0.20112721621990204, + "learning_rate": 4.704742050135002e-06, + "loss": 0.96, + "step": 40790 + }, + { + "epoch": 0.29533757519164366, + "grad_norm": 0.1690005213022232, + "learning_rate": 4.704669663474415e-06, + "loss": 0.961, + "step": 40800 + }, + { + "epoch": 0.2954099618522299, + "grad_norm": 0.16406698524951935, + "learning_rate": 4.704597276813829e-06, + "loss": 0.9705, + "step": 40810 + }, + { + "epoch": 0.2954823485128161, + "grad_norm": 0.16599249839782715, + "learning_rate": 4.704524890153243e-06, + "loss": 0.9577, + "step": 40820 + }, + { + "epoch": 0.29555473517340225, + "grad_norm": 0.19940651953220367, + "learning_rate": 4.704452503492657e-06, + "loss": 0.9678, + "step": 40830 + }, + { + "epoch": 0.2956271218339884, + "grad_norm": 0.1629924476146698, + "learning_rate": 4.704380116832071e-06, + "loss": 0.9655, + "step": 40840 + }, + { + "epoch": 0.2956995084945746, + "grad_norm": 0.17306756973266602, + "learning_rate": 4.704307730171484e-06, + "loss": 0.974, + "step": 40850 + }, + { + "epoch": 0.2957718951551608, + "grad_norm": 0.15905143320560455, + "learning_rate": 4.704235343510899e-06, + "loss": 0.9605, + "step": 40860 + }, + { + "epoch": 0.295844281815747, + "grad_norm": 0.172456756234169, + "learning_rate": 4.704162956850312e-06, + "loss": 0.9663, + "step": 40870 + }, + { + "epoch": 0.2959166684763332, + "grad_norm": 0.16704027354717255, + "learning_rate": 4.704090570189726e-06, + "loss": 0.9636, + "step": 40880 + }, + { + "epoch": 0.29598905513691937, + "grad_norm": 0.17687085270881653, + "learning_rate": 4.7040181835291396e-06, + "loss": 0.9696, + "step": 40890 + }, + { + "epoch": 0.29606144179750554, + "grad_norm": 0.16065765917301178, + "learning_rate": 4.703945796868554e-06, + "loss": 0.9753, + "step": 40900 + }, + { + "epoch": 0.2961338284580917, + "grad_norm": 0.2607700526714325, + "learning_rate": 4.703873410207968e-06, + "loss": 0.9674, + "step": 40910 + }, + { + "epoch": 0.29620621511867795, + "grad_norm": 0.1637168526649475, + "learning_rate": 4.703801023547381e-06, + "loss": 0.9579, + "step": 40920 + }, + { + "epoch": 0.29627860177926413, + "grad_norm": 0.16693775355815887, + "learning_rate": 4.703728636886795e-06, + "loss": 0.9653, + "step": 40930 + }, + { + "epoch": 0.2963509884398503, + "grad_norm": 0.16066871583461761, + "learning_rate": 4.703656250226209e-06, + "loss": 0.9694, + "step": 40940 + }, + { + "epoch": 0.2964233751004365, + "grad_norm": 0.16873207688331604, + "learning_rate": 4.703583863565622e-06, + "loss": 0.9804, + "step": 40950 + }, + { + "epoch": 0.29649576176102266, + "grad_norm": 0.17132726311683655, + "learning_rate": 4.703511476905036e-06, + "loss": 0.9662, + "step": 40960 + }, + { + "epoch": 0.2965681484216089, + "grad_norm": 0.16967126727104187, + "learning_rate": 4.70343909024445e-06, + "loss": 0.9615, + "step": 40970 + }, + { + "epoch": 0.29664053508219507, + "grad_norm": 0.16376537084579468, + "learning_rate": 4.703366703583864e-06, + "loss": 0.9654, + "step": 40980 + }, + { + "epoch": 0.29671292174278124, + "grad_norm": 0.18798090517520905, + "learning_rate": 4.703294316923277e-06, + "loss": 0.9677, + "step": 40990 + }, + { + "epoch": 0.2967853084033674, + "grad_norm": 0.1704019010066986, + "learning_rate": 4.703221930262691e-06, + "loss": 0.9697, + "step": 41000 + }, + { + "epoch": 0.2968576950639536, + "grad_norm": 0.15598341822624207, + "learning_rate": 4.7031495436021055e-06, + "loss": 0.9642, + "step": 41010 + }, + { + "epoch": 0.29693008172453983, + "grad_norm": 0.17868080735206604, + "learning_rate": 4.703077156941519e-06, + "loss": 0.9692, + "step": 41020 + }, + { + "epoch": 0.297002468385126, + "grad_norm": 0.16686975955963135, + "learning_rate": 4.703004770280933e-06, + "loss": 0.9695, + "step": 41030 + }, + { + "epoch": 0.2970748550457122, + "grad_norm": 0.16504326462745667, + "learning_rate": 4.702932383620346e-06, + "loss": 0.9697, + "step": 41040 + }, + { + "epoch": 0.29714724170629836, + "grad_norm": 0.17151391506195068, + "learning_rate": 4.702859996959761e-06, + "loss": 0.9686, + "step": 41050 + }, + { + "epoch": 0.29721962836688454, + "grad_norm": 0.16817405819892883, + "learning_rate": 4.702787610299174e-06, + "loss": 0.9692, + "step": 41060 + }, + { + "epoch": 0.2972920150274707, + "grad_norm": 0.1832583099603653, + "learning_rate": 4.702715223638588e-06, + "loss": 0.9846, + "step": 41070 + }, + { + "epoch": 0.29736440168805695, + "grad_norm": 0.171138733625412, + "learning_rate": 4.702642836978002e-06, + "loss": 0.9749, + "step": 41080 + }, + { + "epoch": 0.2974367883486431, + "grad_norm": 0.15057332813739777, + "learning_rate": 4.702570450317416e-06, + "loss": 0.9668, + "step": 41090 + }, + { + "epoch": 0.2975091750092293, + "grad_norm": 0.16757220029830933, + "learning_rate": 4.70249806365683e-06, + "loss": 0.9772, + "step": 41100 + }, + { + "epoch": 0.2975815616698155, + "grad_norm": 0.20239600539207458, + "learning_rate": 4.702425676996243e-06, + "loss": 0.9588, + "step": 41110 + }, + { + "epoch": 0.29765394833040165, + "grad_norm": 0.17006611824035645, + "learning_rate": 4.702353290335657e-06, + "loss": 0.9626, + "step": 41120 + }, + { + "epoch": 0.2977263349909879, + "grad_norm": 0.17428216338157654, + "learning_rate": 4.7022809036750714e-06, + "loss": 0.963, + "step": 41130 + }, + { + "epoch": 0.29779872165157406, + "grad_norm": 0.16272498667240143, + "learning_rate": 4.702208517014485e-06, + "loss": 0.9712, + "step": 41140 + }, + { + "epoch": 0.29787110831216024, + "grad_norm": 0.15650291740894318, + "learning_rate": 4.702136130353899e-06, + "loss": 0.9564, + "step": 41150 + }, + { + "epoch": 0.2979434949727464, + "grad_norm": 0.17676356434822083, + "learning_rate": 4.702063743693312e-06, + "loss": 0.9722, + "step": 41160 + }, + { + "epoch": 0.2980158816333326, + "grad_norm": 0.17107968032360077, + "learning_rate": 4.701991357032727e-06, + "loss": 0.9638, + "step": 41170 + }, + { + "epoch": 0.2980882682939188, + "grad_norm": 0.16407465934753418, + "learning_rate": 4.70191897037214e-06, + "loss": 0.9646, + "step": 41180 + }, + { + "epoch": 0.298160654954505, + "grad_norm": 0.1607753485441208, + "learning_rate": 4.701846583711554e-06, + "loss": 0.9712, + "step": 41190 + }, + { + "epoch": 0.2982330416150912, + "grad_norm": 0.1688770204782486, + "learning_rate": 4.701774197050968e-06, + "loss": 0.9638, + "step": 41200 + }, + { + "epoch": 0.29830542827567735, + "grad_norm": 0.16739125549793243, + "learning_rate": 4.701701810390382e-06, + "loss": 0.9555, + "step": 41210 + }, + { + "epoch": 0.29837781493626353, + "grad_norm": 0.17620337009429932, + "learning_rate": 4.701629423729796e-06, + "loss": 0.9672, + "step": 41220 + }, + { + "epoch": 0.2984502015968497, + "grad_norm": 0.16256146132946014, + "learning_rate": 4.701557037069209e-06, + "loss": 0.9719, + "step": 41230 + }, + { + "epoch": 0.29852258825743594, + "grad_norm": 0.17721201479434967, + "learning_rate": 4.701484650408623e-06, + "loss": 0.9726, + "step": 41240 + }, + { + "epoch": 0.2985949749180221, + "grad_norm": 0.1829133778810501, + "learning_rate": 4.701412263748037e-06, + "loss": 0.9632, + "step": 41250 + }, + { + "epoch": 0.2986673615786083, + "grad_norm": 0.15692444145679474, + "learning_rate": 4.701339877087451e-06, + "loss": 0.9641, + "step": 41260 + }, + { + "epoch": 0.29873974823919447, + "grad_norm": 0.1705268919467926, + "learning_rate": 4.701267490426865e-06, + "loss": 0.9684, + "step": 41270 + }, + { + "epoch": 0.29881213489978065, + "grad_norm": 0.16059453785419464, + "learning_rate": 4.701195103766278e-06, + "loss": 0.9658, + "step": 41280 + }, + { + "epoch": 0.2988845215603669, + "grad_norm": 0.16799260675907135, + "learning_rate": 4.701122717105692e-06, + "loss": 0.9679, + "step": 41290 + }, + { + "epoch": 0.29895690822095305, + "grad_norm": 0.15362030267715454, + "learning_rate": 4.701050330445106e-06, + "loss": 0.9547, + "step": 41300 + }, + { + "epoch": 0.29902929488153923, + "grad_norm": 0.15962745249271393, + "learning_rate": 4.70097794378452e-06, + "loss": 0.9745, + "step": 41310 + }, + { + "epoch": 0.2991016815421254, + "grad_norm": 0.17490293085575104, + "learning_rate": 4.7009055571239335e-06, + "loss": 0.965, + "step": 41320 + }, + { + "epoch": 0.2991740682027116, + "grad_norm": 0.1677936166524887, + "learning_rate": 4.700833170463347e-06, + "loss": 0.9546, + "step": 41330 + }, + { + "epoch": 0.2992464548632978, + "grad_norm": 0.16804441809654236, + "learning_rate": 4.700760783802762e-06, + "loss": 0.9712, + "step": 41340 + }, + { + "epoch": 0.299318841523884, + "grad_norm": 0.21531559526920319, + "learning_rate": 4.700688397142175e-06, + "loss": 0.9689, + "step": 41350 + }, + { + "epoch": 0.29939122818447017, + "grad_norm": 0.18383803963661194, + "learning_rate": 4.700616010481589e-06, + "loss": 0.9736, + "step": 41360 + }, + { + "epoch": 0.29946361484505635, + "grad_norm": 0.17313213646411896, + "learning_rate": 4.7005436238210024e-06, + "loss": 0.9589, + "step": 41370 + }, + { + "epoch": 0.2995360015056425, + "grad_norm": 0.18341746926307678, + "learning_rate": 4.700471237160417e-06, + "loss": 0.9629, + "step": 41380 + }, + { + "epoch": 0.2996083881662287, + "grad_norm": 0.1760926991701126, + "learning_rate": 4.7003988504998305e-06, + "loss": 0.9681, + "step": 41390 + }, + { + "epoch": 0.29968077482681493, + "grad_norm": 0.18593202531337738, + "learning_rate": 4.700326463839244e-06, + "loss": 0.9667, + "step": 41400 + }, + { + "epoch": 0.2997531614874011, + "grad_norm": 0.16508474946022034, + "learning_rate": 4.700254077178658e-06, + "loss": 0.9772, + "step": 41410 + }, + { + "epoch": 0.2998255481479873, + "grad_norm": 0.16628380119800568, + "learning_rate": 4.700181690518072e-06, + "loss": 0.9648, + "step": 41420 + }, + { + "epoch": 0.29989793480857346, + "grad_norm": 0.19085149466991425, + "learning_rate": 4.700109303857486e-06, + "loss": 0.9726, + "step": 41430 + }, + { + "epoch": 0.29997032146915964, + "grad_norm": 0.16664601862430573, + "learning_rate": 4.7000369171968995e-06, + "loss": 0.9628, + "step": 41440 + }, + { + "epoch": 0.30004270812974587, + "grad_norm": 0.17423871159553528, + "learning_rate": 4.699964530536313e-06, + "loss": 0.966, + "step": 41450 + }, + { + "epoch": 0.30011509479033205, + "grad_norm": 0.18593478202819824, + "learning_rate": 4.6998921438757275e-06, + "loss": 0.9664, + "step": 41460 + }, + { + "epoch": 0.3001874814509182, + "grad_norm": 0.19175831973552704, + "learning_rate": 4.699819757215141e-06, + "loss": 0.9789, + "step": 41470 + }, + { + "epoch": 0.3002598681115044, + "grad_norm": 0.15855441987514496, + "learning_rate": 4.699747370554554e-06, + "loss": 0.9516, + "step": 41480 + }, + { + "epoch": 0.3003322547720906, + "grad_norm": 0.2066076695919037, + "learning_rate": 4.699674983893968e-06, + "loss": 0.9594, + "step": 41490 + }, + { + "epoch": 0.3004046414326768, + "grad_norm": 0.9006432890892029, + "learning_rate": 4.699602597233382e-06, + "loss": 0.9761, + "step": 41500 + }, + { + "epoch": 0.300477028093263, + "grad_norm": 0.17038564383983612, + "learning_rate": 4.699530210572796e-06, + "loss": 0.9686, + "step": 41510 + }, + { + "epoch": 0.30054941475384916, + "grad_norm": 0.16303139925003052, + "learning_rate": 4.699457823912209e-06, + "loss": 0.9774, + "step": 41520 + }, + { + "epoch": 0.30062180141443534, + "grad_norm": 0.16983075439929962, + "learning_rate": 4.699385437251624e-06, + "loss": 0.9731, + "step": 41530 + }, + { + "epoch": 0.3006941880750215, + "grad_norm": 0.17250801622867584, + "learning_rate": 4.699313050591037e-06, + "loss": 0.9432, + "step": 41540 + }, + { + "epoch": 0.3007665747356077, + "grad_norm": 0.16830387711524963, + "learning_rate": 4.699240663930451e-06, + "loss": 0.964, + "step": 41550 + }, + { + "epoch": 0.3008389613961939, + "grad_norm": 0.16739524900913239, + "learning_rate": 4.6991682772698645e-06, + "loss": 0.9785, + "step": 41560 + }, + { + "epoch": 0.3009113480567801, + "grad_norm": 0.17023219168186188, + "learning_rate": 4.699095890609279e-06, + "loss": 0.9729, + "step": 41570 + }, + { + "epoch": 0.3009837347173663, + "grad_norm": 0.17168667912483215, + "learning_rate": 4.699023503948693e-06, + "loss": 0.961, + "step": 41580 + }, + { + "epoch": 0.30105612137795246, + "grad_norm": 0.16515317559242249, + "learning_rate": 4.698951117288106e-06, + "loss": 0.9731, + "step": 41590 + }, + { + "epoch": 0.30112850803853863, + "grad_norm": 0.16387787461280823, + "learning_rate": 4.69887873062752e-06, + "loss": 0.9587, + "step": 41600 + }, + { + "epoch": 0.30120089469912487, + "grad_norm": 0.18492285907268524, + "learning_rate": 4.698806343966934e-06, + "loss": 0.9635, + "step": 41610 + }, + { + "epoch": 0.30127328135971104, + "grad_norm": 0.15610884130001068, + "learning_rate": 4.698733957306348e-06, + "loss": 0.9579, + "step": 41620 + }, + { + "epoch": 0.3013456680202972, + "grad_norm": 0.1656864881515503, + "learning_rate": 4.6986615706457616e-06, + "loss": 0.9637, + "step": 41630 + }, + { + "epoch": 0.3014180546808834, + "grad_norm": 0.16656151413917542, + "learning_rate": 4.698589183985175e-06, + "loss": 0.9704, + "step": 41640 + }, + { + "epoch": 0.3014904413414696, + "grad_norm": 0.163262277841568, + "learning_rate": 4.69851679732459e-06, + "loss": 0.954, + "step": 41650 + }, + { + "epoch": 0.3015628280020558, + "grad_norm": 0.17654089629650116, + "learning_rate": 4.698444410664003e-06, + "loss": 0.981, + "step": 41660 + }, + { + "epoch": 0.301635214662642, + "grad_norm": 0.16009564697742462, + "learning_rate": 4.698372024003417e-06, + "loss": 0.964, + "step": 41670 + }, + { + "epoch": 0.30170760132322816, + "grad_norm": 0.17271243035793304, + "learning_rate": 4.6982996373428305e-06, + "loss": 0.9637, + "step": 41680 + }, + { + "epoch": 0.30177998798381434, + "grad_norm": 0.16152308881282806, + "learning_rate": 4.698227250682245e-06, + "loss": 0.9601, + "step": 41690 + }, + { + "epoch": 0.3018523746444005, + "grad_norm": 0.16311658918857574, + "learning_rate": 4.6981548640216586e-06, + "loss": 0.9648, + "step": 41700 + }, + { + "epoch": 0.30192476130498674, + "grad_norm": 0.17179886996746063, + "learning_rate": 4.698082477361072e-06, + "loss": 0.9636, + "step": 41710 + }, + { + "epoch": 0.3019971479655729, + "grad_norm": 0.16331078112125397, + "learning_rate": 4.698010090700486e-06, + "loss": 0.9694, + "step": 41720 + }, + { + "epoch": 0.3020695346261591, + "grad_norm": 0.16688616573810577, + "learning_rate": 4.6979377040399e-06, + "loss": 0.9646, + "step": 41730 + }, + { + "epoch": 0.3021419212867453, + "grad_norm": 0.19026783108711243, + "learning_rate": 4.697865317379314e-06, + "loss": 0.9706, + "step": 41740 + }, + { + "epoch": 0.30221430794733145, + "grad_norm": 0.169419065117836, + "learning_rate": 4.6977929307187275e-06, + "loss": 0.9559, + "step": 41750 + }, + { + "epoch": 0.3022866946079176, + "grad_norm": 0.1800675243139267, + "learning_rate": 4.697720544058141e-06, + "loss": 0.9822, + "step": 41760 + }, + { + "epoch": 0.30235908126850386, + "grad_norm": 0.15072910487651825, + "learning_rate": 4.6976481573975556e-06, + "loss": 0.962, + "step": 41770 + }, + { + "epoch": 0.30243146792909004, + "grad_norm": 0.15950879454612732, + "learning_rate": 4.697575770736969e-06, + "loss": 0.9697, + "step": 41780 + }, + { + "epoch": 0.3025038545896762, + "grad_norm": 0.1841602772474289, + "learning_rate": 4.697503384076383e-06, + "loss": 0.9731, + "step": 41790 + }, + { + "epoch": 0.3025762412502624, + "grad_norm": 0.22333543002605438, + "learning_rate": 4.697430997415796e-06, + "loss": 0.9594, + "step": 41800 + }, + { + "epoch": 0.30264862791084857, + "grad_norm": 0.17645373940467834, + "learning_rate": 4.697358610755211e-06, + "loss": 0.9595, + "step": 41810 + }, + { + "epoch": 0.3027210145714348, + "grad_norm": 0.16420337557792664, + "learning_rate": 4.6972862240946245e-06, + "loss": 0.9829, + "step": 41820 + }, + { + "epoch": 0.302793401232021, + "grad_norm": 0.1616903692483902, + "learning_rate": 4.697213837434038e-06, + "loss": 0.9627, + "step": 41830 + }, + { + "epoch": 0.30286578789260715, + "grad_norm": 0.1770700216293335, + "learning_rate": 4.697141450773452e-06, + "loss": 0.9546, + "step": 41840 + }, + { + "epoch": 0.30293817455319333, + "grad_norm": 0.16673099994659424, + "learning_rate": 4.697069064112866e-06, + "loss": 0.972, + "step": 41850 + }, + { + "epoch": 0.3030105612137795, + "grad_norm": 0.15225054323673248, + "learning_rate": 4.69699667745228e-06, + "loss": 0.9707, + "step": 41860 + }, + { + "epoch": 0.30308294787436574, + "grad_norm": 0.1643424779176712, + "learning_rate": 4.6969242907916934e-06, + "loss": 0.9601, + "step": 41870 + }, + { + "epoch": 0.3031553345349519, + "grad_norm": 0.20740066468715668, + "learning_rate": 4.696851904131107e-06, + "loss": 0.962, + "step": 41880 + }, + { + "epoch": 0.3032277211955381, + "grad_norm": 0.17226648330688477, + "learning_rate": 4.696779517470521e-06, + "loss": 0.9739, + "step": 41890 + }, + { + "epoch": 0.30330010785612427, + "grad_norm": 0.17776334285736084, + "learning_rate": 4.696707130809935e-06, + "loss": 0.979, + "step": 41900 + }, + { + "epoch": 0.30337249451671044, + "grad_norm": 0.15923330187797546, + "learning_rate": 4.696634744149349e-06, + "loss": 0.9613, + "step": 41910 + }, + { + "epoch": 0.3034448811772966, + "grad_norm": 0.1711985170841217, + "learning_rate": 4.696562357488762e-06, + "loss": 0.9588, + "step": 41920 + }, + { + "epoch": 0.30351726783788285, + "grad_norm": 0.1879815310239792, + "learning_rate": 4.696489970828176e-06, + "loss": 0.9679, + "step": 41930 + }, + { + "epoch": 0.30358965449846903, + "grad_norm": 0.17883126437664032, + "learning_rate": 4.6964175841675904e-06, + "loss": 0.9755, + "step": 41940 + }, + { + "epoch": 0.3036620411590552, + "grad_norm": 0.16554105281829834, + "learning_rate": 4.696345197507004e-06, + "loss": 0.9611, + "step": 41950 + }, + { + "epoch": 0.3037344278196414, + "grad_norm": 0.1649336963891983, + "learning_rate": 4.696272810846418e-06, + "loss": 0.9611, + "step": 41960 + }, + { + "epoch": 0.30380681448022756, + "grad_norm": 0.15734368562698364, + "learning_rate": 4.696200424185831e-06, + "loss": 0.952, + "step": 41970 + }, + { + "epoch": 0.3038792011408138, + "grad_norm": 0.17791064083576202, + "learning_rate": 4.696128037525246e-06, + "loss": 0.9507, + "step": 41980 + }, + { + "epoch": 0.30395158780139997, + "grad_norm": 0.18303173780441284, + "learning_rate": 4.696055650864659e-06, + "loss": 0.9595, + "step": 41990 + }, + { + "epoch": 0.30402397446198615, + "grad_norm": 0.1724705994129181, + "learning_rate": 4.695983264204073e-06, + "loss": 0.9704, + "step": 42000 + }, + { + "epoch": 0.3040963611225723, + "grad_norm": 0.164495587348938, + "learning_rate": 4.695910877543487e-06, + "loss": 0.9598, + "step": 42010 + }, + { + "epoch": 0.3041687477831585, + "grad_norm": 0.16240546107292175, + "learning_rate": 4.6958384908829e-06, + "loss": 0.9621, + "step": 42020 + }, + { + "epoch": 0.30424113444374473, + "grad_norm": 0.17704802751541138, + "learning_rate": 4.695766104222314e-06, + "loss": 0.9589, + "step": 42030 + }, + { + "epoch": 0.3043135211043309, + "grad_norm": 0.17840299010276794, + "learning_rate": 4.695693717561728e-06, + "loss": 0.9773, + "step": 42040 + }, + { + "epoch": 0.3043859077649171, + "grad_norm": 0.18059472739696503, + "learning_rate": 4.695621330901142e-06, + "loss": 0.9743, + "step": 42050 + }, + { + "epoch": 0.30445829442550326, + "grad_norm": 0.18101860582828522, + "learning_rate": 4.6955489442405555e-06, + "loss": 0.9618, + "step": 42060 + }, + { + "epoch": 0.30453068108608944, + "grad_norm": 0.16693522036075592, + "learning_rate": 4.695476557579969e-06, + "loss": 0.9533, + "step": 42070 + }, + { + "epoch": 0.3046030677466756, + "grad_norm": 0.1905452311038971, + "learning_rate": 4.695404170919383e-06, + "loss": 0.9576, + "step": 42080 + }, + { + "epoch": 0.30467545440726185, + "grad_norm": 0.16793328523635864, + "learning_rate": 4.695331784258797e-06, + "loss": 0.9592, + "step": 42090 + }, + { + "epoch": 0.304747841067848, + "grad_norm": 0.17872589826583862, + "learning_rate": 4.695259397598211e-06, + "loss": 0.9747, + "step": 42100 + }, + { + "epoch": 0.3048202277284342, + "grad_norm": 0.1794547736644745, + "learning_rate": 4.6951870109376244e-06, + "loss": 0.951, + "step": 42110 + }, + { + "epoch": 0.3048926143890204, + "grad_norm": 0.1795600950717926, + "learning_rate": 4.695114624277038e-06, + "loss": 0.9495, + "step": 42120 + }, + { + "epoch": 0.30496500104960655, + "grad_norm": 0.16469037532806396, + "learning_rate": 4.6950422376164525e-06, + "loss": 0.9677, + "step": 42130 + }, + { + "epoch": 0.3050373877101928, + "grad_norm": 0.15928620100021362, + "learning_rate": 4.694969850955866e-06, + "loss": 0.9597, + "step": 42140 + }, + { + "epoch": 0.30510977437077896, + "grad_norm": 0.17253397405147552, + "learning_rate": 4.69489746429528e-06, + "loss": 0.9603, + "step": 42150 + }, + { + "epoch": 0.30518216103136514, + "grad_norm": 0.1894710212945938, + "learning_rate": 4.694825077634693e-06, + "loss": 0.9537, + "step": 42160 + }, + { + "epoch": 0.3052545476919513, + "grad_norm": 0.16059796512126923, + "learning_rate": 4.694752690974108e-06, + "loss": 0.9595, + "step": 42170 + }, + { + "epoch": 0.3053269343525375, + "grad_norm": 0.156858429312706, + "learning_rate": 4.6946803043135215e-06, + "loss": 0.9604, + "step": 42180 + }, + { + "epoch": 0.3053993210131237, + "grad_norm": 0.17593984305858612, + "learning_rate": 4.694607917652935e-06, + "loss": 0.955, + "step": 42190 + }, + { + "epoch": 0.3054717076737099, + "grad_norm": 0.17821641266345978, + "learning_rate": 4.694535530992349e-06, + "loss": 0.959, + "step": 42200 + }, + { + "epoch": 0.3055440943342961, + "grad_norm": 0.25540754199028015, + "learning_rate": 4.694463144331763e-06, + "loss": 0.9672, + "step": 42210 + }, + { + "epoch": 0.30561648099488226, + "grad_norm": 0.19185206294059753, + "learning_rate": 4.694390757671177e-06, + "loss": 0.9605, + "step": 42220 + }, + { + "epoch": 0.30568886765546843, + "grad_norm": 0.21133938431739807, + "learning_rate": 4.69431837101059e-06, + "loss": 0.9783, + "step": 42230 + }, + { + "epoch": 0.30576125431605466, + "grad_norm": 0.17313557863235474, + "learning_rate": 4.694245984350004e-06, + "loss": 0.964, + "step": 42240 + }, + { + "epoch": 0.30583364097664084, + "grad_norm": 0.18542583286762238, + "learning_rate": 4.6941735976894185e-06, + "loss": 0.9428, + "step": 42250 + }, + { + "epoch": 0.305906027637227, + "grad_norm": 0.17269088327884674, + "learning_rate": 4.694101211028832e-06, + "loss": 0.974, + "step": 42260 + }, + { + "epoch": 0.3059784142978132, + "grad_norm": 0.16022367775440216, + "learning_rate": 4.694028824368246e-06, + "loss": 0.9644, + "step": 42270 + }, + { + "epoch": 0.30605080095839937, + "grad_norm": 0.17748059332370758, + "learning_rate": 4.693956437707659e-06, + "loss": 0.9604, + "step": 42280 + }, + { + "epoch": 0.30612318761898555, + "grad_norm": 0.16204720735549927, + "learning_rate": 4.693884051047074e-06, + "loss": 0.9672, + "step": 42290 + }, + { + "epoch": 0.3061955742795718, + "grad_norm": 0.16240718960762024, + "learning_rate": 4.693811664386487e-06, + "loss": 0.9703, + "step": 42300 + }, + { + "epoch": 0.30626796094015796, + "grad_norm": 0.1833665817975998, + "learning_rate": 4.693739277725901e-06, + "loss": 0.9671, + "step": 42310 + }, + { + "epoch": 0.30634034760074413, + "grad_norm": 0.16456997394561768, + "learning_rate": 4.693666891065315e-06, + "loss": 0.9566, + "step": 42320 + }, + { + "epoch": 0.3064127342613303, + "grad_norm": 0.1582067459821701, + "learning_rate": 4.693594504404729e-06, + "loss": 0.9696, + "step": 42330 + }, + { + "epoch": 0.3064851209219165, + "grad_norm": 0.16254618763923645, + "learning_rate": 4.693522117744143e-06, + "loss": 0.9624, + "step": 42340 + }, + { + "epoch": 0.3065575075825027, + "grad_norm": 0.22306321561336517, + "learning_rate": 4.693449731083556e-06, + "loss": 0.9663, + "step": 42350 + }, + { + "epoch": 0.3066298942430889, + "grad_norm": 0.1735800951719284, + "learning_rate": 4.69337734442297e-06, + "loss": 0.9727, + "step": 42360 + }, + { + "epoch": 0.3067022809036751, + "grad_norm": 0.16660434007644653, + "learning_rate": 4.693304957762384e-06, + "loss": 0.9558, + "step": 42370 + }, + { + "epoch": 0.30677466756426125, + "grad_norm": 0.1673038899898529, + "learning_rate": 4.693232571101798e-06, + "loss": 0.9772, + "step": 42380 + }, + { + "epoch": 0.3068470542248474, + "grad_norm": 0.1805497705936432, + "learning_rate": 4.693160184441212e-06, + "loss": 0.9679, + "step": 42390 + }, + { + "epoch": 0.30691944088543366, + "grad_norm": 0.16256369650363922, + "learning_rate": 4.693087797780625e-06, + "loss": 0.9636, + "step": 42400 + }, + { + "epoch": 0.30699182754601984, + "grad_norm": 0.16821902990341187, + "learning_rate": 4.69301541112004e-06, + "loss": 0.9604, + "step": 42410 + }, + { + "epoch": 0.307064214206606, + "grad_norm": 0.170756995677948, + "learning_rate": 4.692943024459453e-06, + "loss": 0.9493, + "step": 42420 + }, + { + "epoch": 0.3071366008671922, + "grad_norm": 0.1680414229631424, + "learning_rate": 4.692870637798867e-06, + "loss": 0.9669, + "step": 42430 + }, + { + "epoch": 0.30720898752777837, + "grad_norm": 0.17271284759044647, + "learning_rate": 4.6927982511382806e-06, + "loss": 0.9572, + "step": 42440 + }, + { + "epoch": 0.30728137418836454, + "grad_norm": 0.20150049030780792, + "learning_rate": 4.692725864477695e-06, + "loss": 0.976, + "step": 42450 + }, + { + "epoch": 0.3073537608489508, + "grad_norm": 0.16273252665996552, + "learning_rate": 4.692653477817109e-06, + "loss": 0.9646, + "step": 42460 + }, + { + "epoch": 0.30742614750953695, + "grad_norm": 0.17150864005088806, + "learning_rate": 4.692581091156522e-06, + "loss": 0.9734, + "step": 42470 + }, + { + "epoch": 0.30749853417012313, + "grad_norm": 0.16339828073978424, + "learning_rate": 4.692508704495936e-06, + "loss": 0.959, + "step": 42480 + }, + { + "epoch": 0.3075709208307093, + "grad_norm": 0.17035742104053497, + "learning_rate": 4.69243631783535e-06, + "loss": 0.9637, + "step": 42490 + }, + { + "epoch": 0.3076433074912955, + "grad_norm": 0.17820675671100616, + "learning_rate": 4.692363931174764e-06, + "loss": 0.9647, + "step": 42500 + }, + { + "epoch": 0.3077156941518817, + "grad_norm": 0.15689398348331451, + "learning_rate": 4.6922915445141776e-06, + "loss": 0.966, + "step": 42510 + }, + { + "epoch": 0.3077880808124679, + "grad_norm": 0.17147964239120483, + "learning_rate": 4.692219157853591e-06, + "loss": 0.9748, + "step": 42520 + }, + { + "epoch": 0.30786046747305407, + "grad_norm": 0.17200268805027008, + "learning_rate": 4.692146771193005e-06, + "loss": 0.9656, + "step": 42530 + }, + { + "epoch": 0.30793285413364024, + "grad_norm": 0.18745240569114685, + "learning_rate": 4.692074384532418e-06, + "loss": 0.9666, + "step": 42540 + }, + { + "epoch": 0.3080052407942264, + "grad_norm": 0.16291543841362, + "learning_rate": 4.692001997871832e-06, + "loss": 0.9636, + "step": 42550 + }, + { + "epoch": 0.30807762745481265, + "grad_norm": 0.16443657875061035, + "learning_rate": 4.6919296112112465e-06, + "loss": 0.9673, + "step": 42560 + }, + { + "epoch": 0.30815001411539883, + "grad_norm": 0.1615900695323944, + "learning_rate": 4.69185722455066e-06, + "loss": 0.9578, + "step": 42570 + }, + { + "epoch": 0.308222400775985, + "grad_norm": 0.17126181721687317, + "learning_rate": 4.691784837890074e-06, + "loss": 0.9695, + "step": 42580 + }, + { + "epoch": 0.3082947874365712, + "grad_norm": 0.15183715522289276, + "learning_rate": 4.691712451229487e-06, + "loss": 0.9695, + "step": 42590 + }, + { + "epoch": 0.30836717409715736, + "grad_norm": 0.15649786591529846, + "learning_rate": 4.691640064568902e-06, + "loss": 0.9661, + "step": 42600 + }, + { + "epoch": 0.30843956075774354, + "grad_norm": 0.15690255165100098, + "learning_rate": 4.691567677908315e-06, + "loss": 0.9723, + "step": 42610 + }, + { + "epoch": 0.30851194741832977, + "grad_norm": 0.16935759782791138, + "learning_rate": 4.691495291247729e-06, + "loss": 0.9786, + "step": 42620 + }, + { + "epoch": 0.30858433407891595, + "grad_norm": 0.19255200028419495, + "learning_rate": 4.691422904587143e-06, + "loss": 0.9625, + "step": 42630 + }, + { + "epoch": 0.3086567207395021, + "grad_norm": 0.16277821362018585, + "learning_rate": 4.691350517926557e-06, + "loss": 0.9501, + "step": 42640 + }, + { + "epoch": 0.3087291074000883, + "grad_norm": 0.15837381780147552, + "learning_rate": 4.691278131265971e-06, + "loss": 0.9564, + "step": 42650 + }, + { + "epoch": 0.3088014940606745, + "grad_norm": 0.16267119348049164, + "learning_rate": 4.691205744605384e-06, + "loss": 0.961, + "step": 42660 + }, + { + "epoch": 0.3088738807212607, + "grad_norm": 0.17577193677425385, + "learning_rate": 4.691133357944798e-06, + "loss": 0.9807, + "step": 42670 + }, + { + "epoch": 0.3089462673818469, + "grad_norm": 0.16068010032176971, + "learning_rate": 4.691060971284212e-06, + "loss": 0.9623, + "step": 42680 + }, + { + "epoch": 0.30901865404243306, + "grad_norm": 0.16693639755249023, + "learning_rate": 4.690988584623626e-06, + "loss": 0.9602, + "step": 42690 + }, + { + "epoch": 0.30909104070301924, + "grad_norm": 0.17629244923591614, + "learning_rate": 4.69091619796304e-06, + "loss": 0.9785, + "step": 42700 + }, + { + "epoch": 0.3091634273636054, + "grad_norm": 0.15694406628608704, + "learning_rate": 4.690843811302453e-06, + "loss": 0.9756, + "step": 42710 + }, + { + "epoch": 0.30923581402419165, + "grad_norm": 0.16043122112751007, + "learning_rate": 4.690771424641867e-06, + "loss": 0.9697, + "step": 42720 + }, + { + "epoch": 0.3093082006847778, + "grad_norm": 0.18830914795398712, + "learning_rate": 4.690699037981281e-06, + "loss": 0.9642, + "step": 42730 + }, + { + "epoch": 0.309380587345364, + "grad_norm": 0.1912577599287033, + "learning_rate": 4.690626651320695e-06, + "loss": 0.962, + "step": 42740 + }, + { + "epoch": 0.3094529740059502, + "grad_norm": 0.16711075603961945, + "learning_rate": 4.690554264660109e-06, + "loss": 0.959, + "step": 42750 + }, + { + "epoch": 0.30952536066653635, + "grad_norm": 0.17442958056926727, + "learning_rate": 4.690481877999522e-06, + "loss": 0.9752, + "step": 42760 + }, + { + "epoch": 0.3095977473271226, + "grad_norm": 0.17477945983409882, + "learning_rate": 4.690409491338937e-06, + "loss": 0.9708, + "step": 42770 + }, + { + "epoch": 0.30967013398770876, + "grad_norm": 0.1687520295381546, + "learning_rate": 4.69033710467835e-06, + "loss": 0.9602, + "step": 42780 + }, + { + "epoch": 0.30974252064829494, + "grad_norm": 0.1918717324733734, + "learning_rate": 4.690264718017764e-06, + "loss": 0.9622, + "step": 42790 + }, + { + "epoch": 0.3098149073088811, + "grad_norm": 0.17581704258918762, + "learning_rate": 4.6901923313571775e-06, + "loss": 0.9502, + "step": 42800 + }, + { + "epoch": 0.3098872939694673, + "grad_norm": 0.1803891360759735, + "learning_rate": 4.690119944696592e-06, + "loss": 0.9743, + "step": 42810 + }, + { + "epoch": 0.30995968063005347, + "grad_norm": 0.16164544224739075, + "learning_rate": 4.690047558036006e-06, + "loss": 0.9597, + "step": 42820 + }, + { + "epoch": 0.3100320672906397, + "grad_norm": 0.15992704033851624, + "learning_rate": 4.689975171375419e-06, + "loss": 0.9745, + "step": 42830 + }, + { + "epoch": 0.3101044539512259, + "grad_norm": 0.16522592306137085, + "learning_rate": 4.689902784714833e-06, + "loss": 0.9392, + "step": 42840 + }, + { + "epoch": 0.31017684061181205, + "grad_norm": 0.17075496912002563, + "learning_rate": 4.689830398054247e-06, + "loss": 0.9706, + "step": 42850 + }, + { + "epoch": 0.31024922727239823, + "grad_norm": 0.16021926701068878, + "learning_rate": 4.689758011393661e-06, + "loss": 0.9651, + "step": 42860 + }, + { + "epoch": 0.3103216139329844, + "grad_norm": 0.16339780390262604, + "learning_rate": 4.6896856247330745e-06, + "loss": 0.9626, + "step": 42870 + }, + { + "epoch": 0.31039400059357064, + "grad_norm": 0.15929320454597473, + "learning_rate": 4.689613238072488e-06, + "loss": 0.9484, + "step": 42880 + }, + { + "epoch": 0.3104663872541568, + "grad_norm": 0.1761467158794403, + "learning_rate": 4.689540851411903e-06, + "loss": 0.9623, + "step": 42890 + }, + { + "epoch": 0.310538773914743, + "grad_norm": 0.16710932552814484, + "learning_rate": 4.689468464751316e-06, + "loss": 0.959, + "step": 42900 + }, + { + "epoch": 0.31061116057532917, + "grad_norm": 0.16465498507022858, + "learning_rate": 4.68939607809073e-06, + "loss": 0.9584, + "step": 42910 + }, + { + "epoch": 0.31068354723591535, + "grad_norm": 0.1967269629240036, + "learning_rate": 4.6893236914301435e-06, + "loss": 0.9583, + "step": 42920 + }, + { + "epoch": 0.3107559338965016, + "grad_norm": 0.16868357360363007, + "learning_rate": 4.689251304769558e-06, + "loss": 0.9559, + "step": 42930 + }, + { + "epoch": 0.31082832055708776, + "grad_norm": 0.15778405964374542, + "learning_rate": 4.6891789181089715e-06, + "loss": 0.9707, + "step": 42940 + }, + { + "epoch": 0.31090070721767393, + "grad_norm": 0.20243608951568604, + "learning_rate": 4.689106531448385e-06, + "loss": 0.9471, + "step": 42950 + }, + { + "epoch": 0.3109730938782601, + "grad_norm": 0.17772892117500305, + "learning_rate": 4.689034144787799e-06, + "loss": 0.9589, + "step": 42960 + }, + { + "epoch": 0.3110454805388463, + "grad_norm": 0.17439591884613037, + "learning_rate": 4.688961758127213e-06, + "loss": 0.9604, + "step": 42970 + }, + { + "epoch": 0.31111786719943246, + "grad_norm": 0.15958340466022491, + "learning_rate": 4.688889371466627e-06, + "loss": 0.967, + "step": 42980 + }, + { + "epoch": 0.3111902538600187, + "grad_norm": 0.16158387064933777, + "learning_rate": 4.6888169848060405e-06, + "loss": 0.9569, + "step": 42990 + }, + { + "epoch": 0.31126264052060487, + "grad_norm": 0.17039985954761505, + "learning_rate": 4.688744598145454e-06, + "loss": 0.9571, + "step": 43000 + }, + { + "epoch": 0.31133502718119105, + "grad_norm": 0.1581193506717682, + "learning_rate": 4.6886722114848685e-06, + "loss": 0.9568, + "step": 43010 + }, + { + "epoch": 0.3114074138417772, + "grad_norm": 0.16639426350593567, + "learning_rate": 4.688599824824282e-06, + "loss": 0.9495, + "step": 43020 + }, + { + "epoch": 0.3114798005023634, + "grad_norm": 0.1791532039642334, + "learning_rate": 4.688527438163696e-06, + "loss": 0.973, + "step": 43030 + }, + { + "epoch": 0.31155218716294963, + "grad_norm": 0.16932880878448486, + "learning_rate": 4.688455051503109e-06, + "loss": 0.966, + "step": 43040 + }, + { + "epoch": 0.3116245738235358, + "grad_norm": 0.17780663073062897, + "learning_rate": 4.688382664842524e-06, + "loss": 0.9554, + "step": 43050 + }, + { + "epoch": 0.311696960484122, + "grad_norm": 0.1656964272260666, + "learning_rate": 4.6883102781819375e-06, + "loss": 0.9728, + "step": 43060 + }, + { + "epoch": 0.31176934714470816, + "grad_norm": 0.15379230678081512, + "learning_rate": 4.68823789152135e-06, + "loss": 0.9739, + "step": 43070 + }, + { + "epoch": 0.31184173380529434, + "grad_norm": 0.17420606315135956, + "learning_rate": 4.688165504860765e-06, + "loss": 0.9573, + "step": 43080 + }, + { + "epoch": 0.3119141204658806, + "grad_norm": 0.17237995564937592, + "learning_rate": 4.688093118200178e-06, + "loss": 0.9633, + "step": 43090 + }, + { + "epoch": 0.31198650712646675, + "grad_norm": 0.254451185464859, + "learning_rate": 4.688020731539592e-06, + "loss": 0.9597, + "step": 43100 + }, + { + "epoch": 0.3120588937870529, + "grad_norm": 0.17835448682308197, + "learning_rate": 4.6879483448790056e-06, + "loss": 0.9491, + "step": 43110 + }, + { + "epoch": 0.3121312804476391, + "grad_norm": 0.1625124216079712, + "learning_rate": 4.68787595821842e-06, + "loss": 0.9623, + "step": 43120 + }, + { + "epoch": 0.3122036671082253, + "grad_norm": 0.1648617535829544, + "learning_rate": 4.687803571557834e-06, + "loss": 0.9626, + "step": 43130 + }, + { + "epoch": 0.31227605376881146, + "grad_norm": 0.15769249200820923, + "learning_rate": 4.687731184897247e-06, + "loss": 0.9585, + "step": 43140 + }, + { + "epoch": 0.3123484404293977, + "grad_norm": 0.16843098402023315, + "learning_rate": 4.687658798236661e-06, + "loss": 0.9519, + "step": 43150 + }, + { + "epoch": 0.31242082708998387, + "grad_norm": 0.17640751600265503, + "learning_rate": 4.687586411576075e-06, + "loss": 0.9519, + "step": 43160 + }, + { + "epoch": 0.31249321375057004, + "grad_norm": 0.2160252183675766, + "learning_rate": 4.687514024915489e-06, + "loss": 0.9505, + "step": 43170 + }, + { + "epoch": 0.3125656004111562, + "grad_norm": 0.1599891185760498, + "learning_rate": 4.6874416382549026e-06, + "loss": 0.9592, + "step": 43180 + }, + { + "epoch": 0.3126379870717424, + "grad_norm": 0.17435148358345032, + "learning_rate": 4.687369251594316e-06, + "loss": 0.9647, + "step": 43190 + }, + { + "epoch": 0.31271037373232863, + "grad_norm": 0.18090033531188965, + "learning_rate": 4.687296864933731e-06, + "loss": 0.9587, + "step": 43200 + }, + { + "epoch": 0.3127827603929148, + "grad_norm": 0.15829293429851532, + "learning_rate": 4.687224478273144e-06, + "loss": 0.9601, + "step": 43210 + }, + { + "epoch": 0.312855147053501, + "grad_norm": 0.17692400515079498, + "learning_rate": 4.687152091612558e-06, + "loss": 0.964, + "step": 43220 + }, + { + "epoch": 0.31292753371408716, + "grad_norm": 0.18780209124088287, + "learning_rate": 4.6870797049519715e-06, + "loss": 0.9679, + "step": 43230 + }, + { + "epoch": 0.31299992037467333, + "grad_norm": 0.20248176157474518, + "learning_rate": 4.687007318291386e-06, + "loss": 0.9639, + "step": 43240 + }, + { + "epoch": 0.31307230703525957, + "grad_norm": 0.1714249551296234, + "learning_rate": 4.6869349316307996e-06, + "loss": 0.9524, + "step": 43250 + }, + { + "epoch": 0.31314469369584574, + "grad_norm": 0.1626296043395996, + "learning_rate": 4.686862544970213e-06, + "loss": 0.9643, + "step": 43260 + }, + { + "epoch": 0.3132170803564319, + "grad_norm": 0.21206989884376526, + "learning_rate": 4.686790158309627e-06, + "loss": 0.9681, + "step": 43270 + }, + { + "epoch": 0.3132894670170181, + "grad_norm": 0.16204078495502472, + "learning_rate": 4.686717771649041e-06, + "loss": 0.9671, + "step": 43280 + }, + { + "epoch": 0.3133618536776043, + "grad_norm": 0.1752130091190338, + "learning_rate": 4.686645384988455e-06, + "loss": 0.969, + "step": 43290 + }, + { + "epoch": 0.31343424033819045, + "grad_norm": 0.16563290357589722, + "learning_rate": 4.6865729983278685e-06, + "loss": 0.9686, + "step": 43300 + }, + { + "epoch": 0.3135066269987767, + "grad_norm": 0.16079412400722504, + "learning_rate": 4.686500611667282e-06, + "loss": 0.9783, + "step": 43310 + }, + { + "epoch": 0.31357901365936286, + "grad_norm": 0.19905321300029755, + "learning_rate": 4.686428225006696e-06, + "loss": 0.9657, + "step": 43320 + }, + { + "epoch": 0.31365140031994904, + "grad_norm": 0.1775355190038681, + "learning_rate": 4.68635583834611e-06, + "loss": 0.9536, + "step": 43330 + }, + { + "epoch": 0.3137237869805352, + "grad_norm": 0.16202837228775024, + "learning_rate": 4.686283451685524e-06, + "loss": 0.9783, + "step": 43340 + }, + { + "epoch": 0.3137961736411214, + "grad_norm": 0.17238853871822357, + "learning_rate": 4.686211065024937e-06, + "loss": 0.9829, + "step": 43350 + }, + { + "epoch": 0.3138685603017076, + "grad_norm": 0.16124895215034485, + "learning_rate": 4.686138678364351e-06, + "loss": 0.9646, + "step": 43360 + }, + { + "epoch": 0.3139409469622938, + "grad_norm": 0.3990856111049652, + "learning_rate": 4.6860662917037655e-06, + "loss": 0.9583, + "step": 43370 + }, + { + "epoch": 0.31401333362288, + "grad_norm": 0.168771892786026, + "learning_rate": 4.685993905043179e-06, + "loss": 0.9749, + "step": 43380 + }, + { + "epoch": 0.31408572028346615, + "grad_norm": 0.16756808757781982, + "learning_rate": 4.685921518382593e-06, + "loss": 0.9551, + "step": 43390 + }, + { + "epoch": 0.31415810694405233, + "grad_norm": 0.16926801204681396, + "learning_rate": 4.685849131722006e-06, + "loss": 0.9635, + "step": 43400 + }, + { + "epoch": 0.31423049360463856, + "grad_norm": 0.17521944642066956, + "learning_rate": 4.685776745061421e-06, + "loss": 0.967, + "step": 43410 + }, + { + "epoch": 0.31430288026522474, + "grad_norm": 0.16481991112232208, + "learning_rate": 4.6857043584008344e-06, + "loss": 0.9695, + "step": 43420 + }, + { + "epoch": 0.3143752669258109, + "grad_norm": 0.15918724238872528, + "learning_rate": 4.685631971740248e-06, + "loss": 0.9679, + "step": 43430 + }, + { + "epoch": 0.3144476535863971, + "grad_norm": 0.1576610803604126, + "learning_rate": 4.685559585079662e-06, + "loss": 0.9572, + "step": 43440 + }, + { + "epoch": 0.31452004024698327, + "grad_norm": 0.16008280217647552, + "learning_rate": 4.685487198419076e-06, + "loss": 0.9626, + "step": 43450 + }, + { + "epoch": 0.3145924269075695, + "grad_norm": 0.16831348836421967, + "learning_rate": 4.68541481175849e-06, + "loss": 0.9513, + "step": 43460 + }, + { + "epoch": 0.3146648135681557, + "grad_norm": 0.2367899864912033, + "learning_rate": 4.685342425097903e-06, + "loss": 0.9394, + "step": 43470 + }, + { + "epoch": 0.31473720022874185, + "grad_norm": 0.16397932171821594, + "learning_rate": 4.685270038437317e-06, + "loss": 0.9559, + "step": 43480 + }, + { + "epoch": 0.31480958688932803, + "grad_norm": 0.16159677505493164, + "learning_rate": 4.6851976517767314e-06, + "loss": 0.9623, + "step": 43490 + }, + { + "epoch": 0.3148819735499142, + "grad_norm": 0.16152982413768768, + "learning_rate": 4.685125265116145e-06, + "loss": 0.9662, + "step": 43500 + }, + { + "epoch": 0.3149543602105004, + "grad_norm": 0.19313648343086243, + "learning_rate": 4.685052878455559e-06, + "loss": 0.9582, + "step": 43510 + }, + { + "epoch": 0.3150267468710866, + "grad_norm": 0.16740714013576508, + "learning_rate": 4.684980491794972e-06, + "loss": 0.959, + "step": 43520 + }, + { + "epoch": 0.3150991335316728, + "grad_norm": 0.1715911626815796, + "learning_rate": 4.684908105134387e-06, + "loss": 0.9513, + "step": 43530 + }, + { + "epoch": 0.31517152019225897, + "grad_norm": 0.1693735122680664, + "learning_rate": 4.6848357184738e-06, + "loss": 0.9747, + "step": 43540 + }, + { + "epoch": 0.31524390685284515, + "grad_norm": 0.16398315131664276, + "learning_rate": 4.684763331813214e-06, + "loss": 0.9639, + "step": 43550 + }, + { + "epoch": 0.3153162935134313, + "grad_norm": 0.16864167153835297, + "learning_rate": 4.684690945152628e-06, + "loss": 0.9509, + "step": 43560 + }, + { + "epoch": 0.31538868017401755, + "grad_norm": 0.15336643159389496, + "learning_rate": 4.684618558492042e-06, + "loss": 0.9669, + "step": 43570 + }, + { + "epoch": 0.31546106683460373, + "grad_norm": 0.17718195915222168, + "learning_rate": 4.684546171831456e-06, + "loss": 0.9624, + "step": 43580 + }, + { + "epoch": 0.3155334534951899, + "grad_norm": 0.17874468863010406, + "learning_rate": 4.684473785170869e-06, + "loss": 0.9659, + "step": 43590 + }, + { + "epoch": 0.3156058401557761, + "grad_norm": 0.1714981645345688, + "learning_rate": 4.684401398510283e-06, + "loss": 0.9621, + "step": 43600 + }, + { + "epoch": 0.31567822681636226, + "grad_norm": 0.15359127521514893, + "learning_rate": 4.6843290118496965e-06, + "loss": 0.9701, + "step": 43610 + }, + { + "epoch": 0.3157506134769485, + "grad_norm": 0.2659284472465515, + "learning_rate": 4.68425662518911e-06, + "loss": 0.9573, + "step": 43620 + }, + { + "epoch": 0.31582300013753467, + "grad_norm": 0.15532797574996948, + "learning_rate": 4.684184238528524e-06, + "loss": 0.96, + "step": 43630 + }, + { + "epoch": 0.31589538679812085, + "grad_norm": 0.17984911799430847, + "learning_rate": 4.684111851867938e-06, + "loss": 0.9591, + "step": 43640 + }, + { + "epoch": 0.315967773458707, + "grad_norm": 0.16166174411773682, + "learning_rate": 4.684039465207352e-06, + "loss": 0.9545, + "step": 43650 + }, + { + "epoch": 0.3160401601192932, + "grad_norm": 0.16693688929080963, + "learning_rate": 4.6839670785467655e-06, + "loss": 0.972, + "step": 43660 + }, + { + "epoch": 0.3161125467798794, + "grad_norm": 0.1600932478904724, + "learning_rate": 4.683894691886179e-06, + "loss": 0.9576, + "step": 43670 + }, + { + "epoch": 0.3161849334404656, + "grad_norm": 0.1789911538362503, + "learning_rate": 4.6838223052255935e-06, + "loss": 0.9519, + "step": 43680 + }, + { + "epoch": 0.3162573201010518, + "grad_norm": 0.16878649592399597, + "learning_rate": 4.683749918565007e-06, + "loss": 0.9537, + "step": 43690 + }, + { + "epoch": 0.31632970676163796, + "grad_norm": 0.18558557331562042, + "learning_rate": 4.683677531904421e-06, + "loss": 0.9598, + "step": 43700 + }, + { + "epoch": 0.31640209342222414, + "grad_norm": 0.1579427868127823, + "learning_rate": 4.683605145243834e-06, + "loss": 0.9635, + "step": 43710 + }, + { + "epoch": 0.3164744800828103, + "grad_norm": 0.16161714494228363, + "learning_rate": 4.683532758583249e-06, + "loss": 0.9515, + "step": 43720 + }, + { + "epoch": 0.31654686674339655, + "grad_norm": 0.15619002282619476, + "learning_rate": 4.6834603719226625e-06, + "loss": 0.9658, + "step": 43730 + }, + { + "epoch": 0.3166192534039827, + "grad_norm": 0.17198404669761658, + "learning_rate": 4.683387985262076e-06, + "loss": 0.9492, + "step": 43740 + }, + { + "epoch": 0.3166916400645689, + "grad_norm": 0.18472731113433838, + "learning_rate": 4.68331559860149e-06, + "loss": 0.9596, + "step": 43750 + }, + { + "epoch": 0.3167640267251551, + "grad_norm": 0.17256297171115875, + "learning_rate": 4.683243211940904e-06, + "loss": 0.9668, + "step": 43760 + }, + { + "epoch": 0.31683641338574126, + "grad_norm": 0.17887750267982483, + "learning_rate": 4.683170825280318e-06, + "loss": 0.9639, + "step": 43770 + }, + { + "epoch": 0.3169088000463275, + "grad_norm": 0.17158237099647522, + "learning_rate": 4.683098438619731e-06, + "loss": 0.9678, + "step": 43780 + }, + { + "epoch": 0.31698118670691366, + "grad_norm": 0.1695108711719513, + "learning_rate": 4.683026051959145e-06, + "loss": 0.9517, + "step": 43790 + }, + { + "epoch": 0.31705357336749984, + "grad_norm": 0.1559198945760727, + "learning_rate": 4.6829536652985595e-06, + "loss": 0.9627, + "step": 43800 + }, + { + "epoch": 0.317125960028086, + "grad_norm": 0.15818607807159424, + "learning_rate": 4.682881278637973e-06, + "loss": 0.9667, + "step": 43810 + }, + { + "epoch": 0.3171983466886722, + "grad_norm": 0.1614452749490738, + "learning_rate": 4.682808891977387e-06, + "loss": 0.9597, + "step": 43820 + }, + { + "epoch": 0.31727073334925837, + "grad_norm": 0.17955391108989716, + "learning_rate": 4.6827365053168e-06, + "loss": 0.9531, + "step": 43830 + }, + { + "epoch": 0.3173431200098446, + "grad_norm": 0.1729903519153595, + "learning_rate": 4.682664118656215e-06, + "loss": 0.9437, + "step": 43840 + }, + { + "epoch": 0.3174155066704308, + "grad_norm": 0.17108261585235596, + "learning_rate": 4.682591731995628e-06, + "loss": 0.9588, + "step": 43850 + }, + { + "epoch": 0.31748789333101696, + "grad_norm": 0.16403260827064514, + "learning_rate": 4.682519345335042e-06, + "loss": 0.9561, + "step": 43860 + }, + { + "epoch": 0.31756027999160313, + "grad_norm": 0.17436562478542328, + "learning_rate": 4.682446958674456e-06, + "loss": 0.9576, + "step": 43870 + }, + { + "epoch": 0.3176326666521893, + "grad_norm": 0.16612768173217773, + "learning_rate": 4.68237457201387e-06, + "loss": 0.9558, + "step": 43880 + }, + { + "epoch": 0.31770505331277554, + "grad_norm": 0.16263121366500854, + "learning_rate": 4.682302185353284e-06, + "loss": 0.9564, + "step": 43890 + }, + { + "epoch": 0.3177774399733617, + "grad_norm": 0.24618317186832428, + "learning_rate": 4.682229798692697e-06, + "loss": 0.9667, + "step": 43900 + }, + { + "epoch": 0.3178498266339479, + "grad_norm": 0.16008852422237396, + "learning_rate": 4.682157412032111e-06, + "loss": 0.973, + "step": 43910 + }, + { + "epoch": 0.3179222132945341, + "grad_norm": 0.16652582585811615, + "learning_rate": 4.682085025371525e-06, + "loss": 0.9534, + "step": 43920 + }, + { + "epoch": 0.31799459995512025, + "grad_norm": 0.19293102622032166, + "learning_rate": 4.682012638710939e-06, + "loss": 0.9546, + "step": 43930 + }, + { + "epoch": 0.3180669866157065, + "grad_norm": 0.16652163863182068, + "learning_rate": 4.681940252050353e-06, + "loss": 0.9599, + "step": 43940 + }, + { + "epoch": 0.31813937327629266, + "grad_norm": 0.16719068586826324, + "learning_rate": 4.681867865389766e-06, + "loss": 0.9807, + "step": 43950 + }, + { + "epoch": 0.31821175993687884, + "grad_norm": 0.16578418016433716, + "learning_rate": 4.68179547872918e-06, + "loss": 0.9627, + "step": 43960 + }, + { + "epoch": 0.318284146597465, + "grad_norm": 0.20773844420909882, + "learning_rate": 4.681723092068594e-06, + "loss": 0.9506, + "step": 43970 + }, + { + "epoch": 0.3183565332580512, + "grad_norm": 0.20871974527835846, + "learning_rate": 4.681650705408008e-06, + "loss": 0.9609, + "step": 43980 + }, + { + "epoch": 0.3184289199186374, + "grad_norm": 0.19961602985858917, + "learning_rate": 4.6815783187474216e-06, + "loss": 0.9696, + "step": 43990 + }, + { + "epoch": 0.3185013065792236, + "grad_norm": 0.1632424145936966, + "learning_rate": 4.681505932086835e-06, + "loss": 0.9569, + "step": 44000 + }, + { + "epoch": 0.3185736932398098, + "grad_norm": 0.15213897824287415, + "learning_rate": 4.68143354542625e-06, + "loss": 0.9503, + "step": 44010 + }, + { + "epoch": 0.31864607990039595, + "grad_norm": 0.16649749875068665, + "learning_rate": 4.681361158765663e-06, + "loss": 0.9645, + "step": 44020 + }, + { + "epoch": 0.3187184665609821, + "grad_norm": 0.16990630328655243, + "learning_rate": 4.681288772105077e-06, + "loss": 0.9532, + "step": 44030 + }, + { + "epoch": 0.3187908532215683, + "grad_norm": 0.15943020582199097, + "learning_rate": 4.6812163854444905e-06, + "loss": 0.9601, + "step": 44040 + }, + { + "epoch": 0.31886323988215454, + "grad_norm": 0.1748354434967041, + "learning_rate": 4.681143998783905e-06, + "loss": 0.9482, + "step": 44050 + }, + { + "epoch": 0.3189356265427407, + "grad_norm": 0.16795013844966888, + "learning_rate": 4.6810716121233186e-06, + "loss": 0.9617, + "step": 44060 + }, + { + "epoch": 0.3190080132033269, + "grad_norm": 0.17851009964942932, + "learning_rate": 4.680999225462732e-06, + "loss": 0.9599, + "step": 44070 + }, + { + "epoch": 0.31908039986391307, + "grad_norm": 0.16591772437095642, + "learning_rate": 4.680926838802146e-06, + "loss": 0.9617, + "step": 44080 + }, + { + "epoch": 0.31915278652449924, + "grad_norm": 0.16842985153198242, + "learning_rate": 4.68085445214156e-06, + "loss": 0.9649, + "step": 44090 + }, + { + "epoch": 0.3192251731850855, + "grad_norm": 0.1664043366909027, + "learning_rate": 4.680782065480974e-06, + "loss": 0.9515, + "step": 44100 + }, + { + "epoch": 0.31929755984567165, + "grad_norm": 0.1651202142238617, + "learning_rate": 4.6807096788203875e-06, + "loss": 0.9699, + "step": 44110 + }, + { + "epoch": 0.31936994650625783, + "grad_norm": 0.18120470643043518, + "learning_rate": 4.680637292159801e-06, + "loss": 0.96, + "step": 44120 + }, + { + "epoch": 0.319442333166844, + "grad_norm": 0.1606922298669815, + "learning_rate": 4.680564905499215e-06, + "loss": 0.9478, + "step": 44130 + }, + { + "epoch": 0.3195147198274302, + "grad_norm": 0.16963617503643036, + "learning_rate": 4.680492518838628e-06, + "loss": 0.9625, + "step": 44140 + }, + { + "epoch": 0.3195871064880164, + "grad_norm": 0.17569056153297424, + "learning_rate": 4.680420132178042e-06, + "loss": 0.9659, + "step": 44150 + }, + { + "epoch": 0.3196594931486026, + "grad_norm": 0.16258999705314636, + "learning_rate": 4.6803477455174564e-06, + "loss": 0.9563, + "step": 44160 + }, + { + "epoch": 0.31973187980918877, + "grad_norm": 0.22039775550365448, + "learning_rate": 4.68027535885687e-06, + "loss": 0.9474, + "step": 44170 + }, + { + "epoch": 0.31980426646977494, + "grad_norm": 0.1699448972940445, + "learning_rate": 4.680202972196284e-06, + "loss": 0.9796, + "step": 44180 + }, + { + "epoch": 0.3198766531303611, + "grad_norm": 0.15425299108028412, + "learning_rate": 4.680130585535697e-06, + "loss": 0.9552, + "step": 44190 + }, + { + "epoch": 0.3199490397909473, + "grad_norm": 0.17444351315498352, + "learning_rate": 4.680058198875112e-06, + "loss": 0.9538, + "step": 44200 + }, + { + "epoch": 0.32002142645153353, + "grad_norm": 0.17369182407855988, + "learning_rate": 4.679985812214525e-06, + "loss": 0.9556, + "step": 44210 + }, + { + "epoch": 0.3200938131121197, + "grad_norm": 0.17337919771671295, + "learning_rate": 4.679913425553939e-06, + "loss": 0.9626, + "step": 44220 + }, + { + "epoch": 0.3201661997727059, + "grad_norm": 0.1717376410961151, + "learning_rate": 4.679841038893353e-06, + "loss": 0.9431, + "step": 44230 + }, + { + "epoch": 0.32023858643329206, + "grad_norm": 0.16457335650920868, + "learning_rate": 4.679768652232767e-06, + "loss": 0.9493, + "step": 44240 + }, + { + "epoch": 0.32031097309387824, + "grad_norm": 0.17030712962150574, + "learning_rate": 4.679696265572181e-06, + "loss": 0.9625, + "step": 44250 + }, + { + "epoch": 0.32038335975446447, + "grad_norm": 0.1790560930967331, + "learning_rate": 4.679623878911594e-06, + "loss": 0.9629, + "step": 44260 + }, + { + "epoch": 0.32045574641505065, + "grad_norm": 0.15533442795276642, + "learning_rate": 4.679551492251008e-06, + "loss": 0.965, + "step": 44270 + }, + { + "epoch": 0.3205281330756368, + "grad_norm": 0.15475206077098846, + "learning_rate": 4.679479105590422e-06, + "loss": 0.9614, + "step": 44280 + }, + { + "epoch": 0.320600519736223, + "grad_norm": 0.15645426511764526, + "learning_rate": 4.679406718929836e-06, + "loss": 0.9481, + "step": 44290 + }, + { + "epoch": 0.3206729063968092, + "grad_norm": 0.17138896882534027, + "learning_rate": 4.67933433226925e-06, + "loss": 0.9605, + "step": 44300 + }, + { + "epoch": 0.3207452930573954, + "grad_norm": 0.17752832174301147, + "learning_rate": 4.679261945608663e-06, + "loss": 0.9538, + "step": 44310 + }, + { + "epoch": 0.3208176797179816, + "grad_norm": 0.17925305664539337, + "learning_rate": 4.679189558948078e-06, + "loss": 0.9536, + "step": 44320 + }, + { + "epoch": 0.32089006637856776, + "grad_norm": 0.16660036146640778, + "learning_rate": 4.679117172287491e-06, + "loss": 0.9566, + "step": 44330 + }, + { + "epoch": 0.32096245303915394, + "grad_norm": 0.16511604189872742, + "learning_rate": 4.679044785626905e-06, + "loss": 0.9473, + "step": 44340 + }, + { + "epoch": 0.3210348396997401, + "grad_norm": 0.17227579653263092, + "learning_rate": 4.6789723989663185e-06, + "loss": 0.9642, + "step": 44350 + }, + { + "epoch": 0.3211072263603263, + "grad_norm": 0.17164403200149536, + "learning_rate": 4.678900012305733e-06, + "loss": 0.9601, + "step": 44360 + }, + { + "epoch": 0.3211796130209125, + "grad_norm": 0.1575680375099182, + "learning_rate": 4.678827625645147e-06, + "loss": 0.9672, + "step": 44370 + }, + { + "epoch": 0.3212519996814987, + "grad_norm": 0.15979178249835968, + "learning_rate": 4.67875523898456e-06, + "loss": 0.9615, + "step": 44380 + }, + { + "epoch": 0.3213243863420849, + "grad_norm": 0.19064490497112274, + "learning_rate": 4.678682852323974e-06, + "loss": 0.9672, + "step": 44390 + }, + { + "epoch": 0.32139677300267105, + "grad_norm": 0.16969703137874603, + "learning_rate": 4.678610465663388e-06, + "loss": 0.9418, + "step": 44400 + }, + { + "epoch": 0.32146915966325723, + "grad_norm": 0.1616748571395874, + "learning_rate": 4.678538079002802e-06, + "loss": 0.9613, + "step": 44410 + }, + { + "epoch": 0.32154154632384346, + "grad_norm": 0.15733925998210907, + "learning_rate": 4.6784656923422155e-06, + "loss": 0.9679, + "step": 44420 + }, + { + "epoch": 0.32161393298442964, + "grad_norm": 0.1610860526561737, + "learning_rate": 4.678393305681629e-06, + "loss": 0.9692, + "step": 44430 + }, + { + "epoch": 0.3216863196450158, + "grad_norm": 0.16648943722248077, + "learning_rate": 4.678320919021044e-06, + "loss": 0.9565, + "step": 44440 + }, + { + "epoch": 0.321758706305602, + "grad_norm": 0.186976358294487, + "learning_rate": 4.678248532360457e-06, + "loss": 0.9474, + "step": 44450 + }, + { + "epoch": 0.32183109296618817, + "grad_norm": 0.1633586436510086, + "learning_rate": 4.678176145699871e-06, + "loss": 0.9468, + "step": 44460 + }, + { + "epoch": 0.3219034796267744, + "grad_norm": 0.197808638215065, + "learning_rate": 4.6781037590392845e-06, + "loss": 0.9601, + "step": 44470 + }, + { + "epoch": 0.3219758662873606, + "grad_norm": 0.15820321440696716, + "learning_rate": 4.678031372378699e-06, + "loss": 0.9537, + "step": 44480 + }, + { + "epoch": 0.32204825294794676, + "grad_norm": 0.1628575474023819, + "learning_rate": 4.6779589857181125e-06, + "loss": 0.9503, + "step": 44490 + }, + { + "epoch": 0.32212063960853293, + "grad_norm": 0.1750411093235016, + "learning_rate": 4.677886599057526e-06, + "loss": 0.9594, + "step": 44500 + }, + { + "epoch": 0.3221930262691191, + "grad_norm": 0.17343439161777496, + "learning_rate": 4.67781421239694e-06, + "loss": 0.9584, + "step": 44510 + }, + { + "epoch": 0.32226541292970534, + "grad_norm": 0.16742408275604248, + "learning_rate": 4.677741825736354e-06, + "loss": 0.9638, + "step": 44520 + }, + { + "epoch": 0.3223377995902915, + "grad_norm": 0.1644875556230545, + "learning_rate": 4.677669439075768e-06, + "loss": 0.9536, + "step": 44530 + }, + { + "epoch": 0.3224101862508777, + "grad_norm": 0.16791434586048126, + "learning_rate": 4.6775970524151815e-06, + "loss": 0.9669, + "step": 44540 + }, + { + "epoch": 0.32248257291146387, + "grad_norm": 0.16774770617485046, + "learning_rate": 4.677524665754595e-06, + "loss": 0.9569, + "step": 44550 + }, + { + "epoch": 0.32255495957205005, + "grad_norm": 0.15673328936100006, + "learning_rate": 4.677452279094009e-06, + "loss": 0.9394, + "step": 44560 + }, + { + "epoch": 0.3226273462326362, + "grad_norm": 0.17572014033794403, + "learning_rate": 4.677379892433423e-06, + "loss": 0.957, + "step": 44570 + }, + { + "epoch": 0.32269973289322246, + "grad_norm": 0.18698441982269287, + "learning_rate": 4.677307505772837e-06, + "loss": 0.9637, + "step": 44580 + }, + { + "epoch": 0.32277211955380863, + "grad_norm": 0.17538724839687347, + "learning_rate": 4.67723511911225e-06, + "loss": 0.968, + "step": 44590 + }, + { + "epoch": 0.3228445062143948, + "grad_norm": 0.16755321621894836, + "learning_rate": 4.677162732451664e-06, + "loss": 0.9542, + "step": 44600 + }, + { + "epoch": 0.322916892874981, + "grad_norm": 0.1889658123254776, + "learning_rate": 4.6770903457910785e-06, + "loss": 0.9668, + "step": 44610 + }, + { + "epoch": 0.32298927953556716, + "grad_norm": 0.15155941247940063, + "learning_rate": 4.677017959130492e-06, + "loss": 0.963, + "step": 44620 + }, + { + "epoch": 0.3230616661961534, + "grad_norm": 0.17296727001667023, + "learning_rate": 4.676945572469906e-06, + "loss": 0.9658, + "step": 44630 + }, + { + "epoch": 0.3231340528567396, + "grad_norm": 0.15777067840099335, + "learning_rate": 4.676873185809319e-06, + "loss": 0.956, + "step": 44640 + }, + { + "epoch": 0.32320643951732575, + "grad_norm": 0.1767292618751526, + "learning_rate": 4.676800799148734e-06, + "loss": 0.9635, + "step": 44650 + }, + { + "epoch": 0.3232788261779119, + "grad_norm": 0.1630258858203888, + "learning_rate": 4.6767284124881466e-06, + "loss": 0.9621, + "step": 44660 + }, + { + "epoch": 0.3233512128384981, + "grad_norm": 0.16284741461277008, + "learning_rate": 4.676656025827561e-06, + "loss": 0.9644, + "step": 44670 + }, + { + "epoch": 0.32342359949908434, + "grad_norm": 0.16810114681720734, + "learning_rate": 4.676583639166975e-06, + "loss": 0.9712, + "step": 44680 + }, + { + "epoch": 0.3234959861596705, + "grad_norm": 0.16822989284992218, + "learning_rate": 4.676511252506388e-06, + "loss": 0.9616, + "step": 44690 + }, + { + "epoch": 0.3235683728202567, + "grad_norm": 0.1614549160003662, + "learning_rate": 4.676438865845802e-06, + "loss": 0.9493, + "step": 44700 + }, + { + "epoch": 0.32364075948084287, + "grad_norm": 0.1698823720216751, + "learning_rate": 4.676366479185216e-06, + "loss": 0.9479, + "step": 44710 + }, + { + "epoch": 0.32371314614142904, + "grad_norm": 0.16118445992469788, + "learning_rate": 4.67629409252463e-06, + "loss": 0.9754, + "step": 44720 + }, + { + "epoch": 0.3237855328020152, + "grad_norm": 0.16466830670833588, + "learning_rate": 4.6762217058640436e-06, + "loss": 0.978, + "step": 44730 + }, + { + "epoch": 0.32385791946260145, + "grad_norm": 0.1972661316394806, + "learning_rate": 4.676149319203457e-06, + "loss": 0.9743, + "step": 44740 + }, + { + "epoch": 0.32393030612318763, + "grad_norm": 0.17572824656963348, + "learning_rate": 4.676076932542871e-06, + "loss": 0.9744, + "step": 44750 + }, + { + "epoch": 0.3240026927837738, + "grad_norm": 0.18128469586372375, + "learning_rate": 4.676004545882285e-06, + "loss": 0.9504, + "step": 44760 + }, + { + "epoch": 0.32407507944436, + "grad_norm": 0.2079382985830307, + "learning_rate": 4.675932159221699e-06, + "loss": 0.9561, + "step": 44770 + }, + { + "epoch": 0.32414746610494616, + "grad_norm": 0.15759167075157166, + "learning_rate": 4.6758597725611125e-06, + "loss": 0.9527, + "step": 44780 + }, + { + "epoch": 0.3242198527655324, + "grad_norm": 0.1726982742547989, + "learning_rate": 4.675787385900526e-06, + "loss": 0.9738, + "step": 44790 + }, + { + "epoch": 0.32429223942611857, + "grad_norm": 0.2257843315601349, + "learning_rate": 4.6757149992399406e-06, + "loss": 0.9718, + "step": 44800 + }, + { + "epoch": 0.32436462608670474, + "grad_norm": 0.16954675316810608, + "learning_rate": 4.675642612579354e-06, + "loss": 0.9619, + "step": 44810 + }, + { + "epoch": 0.3244370127472909, + "grad_norm": 0.17804615199565887, + "learning_rate": 4.675570225918768e-06, + "loss": 0.9657, + "step": 44820 + }, + { + "epoch": 0.3245093994078771, + "grad_norm": 0.18081621825695038, + "learning_rate": 4.675497839258181e-06, + "loss": 0.9528, + "step": 44830 + }, + { + "epoch": 0.32458178606846333, + "grad_norm": 0.19913703203201294, + "learning_rate": 4.675425452597596e-06, + "loss": 0.9552, + "step": 44840 + }, + { + "epoch": 0.3246541727290495, + "grad_norm": 0.15979525446891785, + "learning_rate": 4.6753530659370095e-06, + "loss": 0.9465, + "step": 44850 + }, + { + "epoch": 0.3247265593896357, + "grad_norm": 0.1660212129354477, + "learning_rate": 4.675280679276423e-06, + "loss": 0.9517, + "step": 44860 + }, + { + "epoch": 0.32479894605022186, + "grad_norm": 0.1735403835773468, + "learning_rate": 4.675208292615837e-06, + "loss": 0.9641, + "step": 44870 + }, + { + "epoch": 0.32487133271080804, + "grad_norm": 0.15234126150608063, + "learning_rate": 4.675135905955251e-06, + "loss": 0.9705, + "step": 44880 + }, + { + "epoch": 0.3249437193713942, + "grad_norm": 0.16520267724990845, + "learning_rate": 4.675063519294665e-06, + "loss": 0.9587, + "step": 44890 + }, + { + "epoch": 0.32501610603198045, + "grad_norm": 0.1735251098871231, + "learning_rate": 4.6749911326340784e-06, + "loss": 0.9668, + "step": 44900 + }, + { + "epoch": 0.3250884926925666, + "grad_norm": 0.17319133877754211, + "learning_rate": 4.674918745973492e-06, + "loss": 0.9523, + "step": 44910 + }, + { + "epoch": 0.3251608793531528, + "grad_norm": 0.17401674389839172, + "learning_rate": 4.6748463593129065e-06, + "loss": 0.9708, + "step": 44920 + }, + { + "epoch": 0.325233266013739, + "grad_norm": 0.16565395891666412, + "learning_rate": 4.67477397265232e-06, + "loss": 0.9531, + "step": 44930 + }, + { + "epoch": 0.32530565267432515, + "grad_norm": 0.18349653482437134, + "learning_rate": 4.674701585991734e-06, + "loss": 0.9586, + "step": 44940 + }, + { + "epoch": 0.3253780393349114, + "grad_norm": 0.15808264911174774, + "learning_rate": 4.674629199331147e-06, + "loss": 0.9453, + "step": 44950 + }, + { + "epoch": 0.32545042599549756, + "grad_norm": 0.16079016029834747, + "learning_rate": 4.674556812670562e-06, + "loss": 0.9568, + "step": 44960 + }, + { + "epoch": 0.32552281265608374, + "grad_norm": 0.1582205444574356, + "learning_rate": 4.6744844260099754e-06, + "loss": 0.9383, + "step": 44970 + }, + { + "epoch": 0.3255951993166699, + "grad_norm": 0.1639067679643631, + "learning_rate": 4.674412039349389e-06, + "loss": 0.9554, + "step": 44980 + }, + { + "epoch": 0.3256675859772561, + "grad_norm": 0.17060793936252594, + "learning_rate": 4.674339652688803e-06, + "loss": 0.9534, + "step": 44990 + }, + { + "epoch": 0.3257399726378423, + "grad_norm": 0.18423733115196228, + "learning_rate": 4.674267266028217e-06, + "loss": 0.9478, + "step": 45000 + }, + { + "epoch": 0.3258123592984285, + "grad_norm": 0.18846924602985382, + "learning_rate": 4.674194879367631e-06, + "loss": 0.9734, + "step": 45010 + }, + { + "epoch": 0.3258847459590147, + "grad_norm": 0.16957645118236542, + "learning_rate": 4.674122492707044e-06, + "loss": 0.9489, + "step": 45020 + }, + { + "epoch": 0.32595713261960085, + "grad_norm": 0.15767230093479156, + "learning_rate": 4.674050106046458e-06, + "loss": 0.9565, + "step": 45030 + }, + { + "epoch": 0.32602951928018703, + "grad_norm": 0.15942777693271637, + "learning_rate": 4.6739777193858724e-06, + "loss": 0.9586, + "step": 45040 + }, + { + "epoch": 0.3261019059407732, + "grad_norm": 0.16869819164276123, + "learning_rate": 4.673905332725286e-06, + "loss": 0.9599, + "step": 45050 + }, + { + "epoch": 0.32617429260135944, + "grad_norm": 0.17203491926193237, + "learning_rate": 4.6738329460647e-06, + "loss": 0.9671, + "step": 45060 + }, + { + "epoch": 0.3262466792619456, + "grad_norm": 0.15850797295570374, + "learning_rate": 4.673760559404113e-06, + "loss": 0.9588, + "step": 45070 + }, + { + "epoch": 0.3263190659225318, + "grad_norm": 0.16606780886650085, + "learning_rate": 4.673688172743528e-06, + "loss": 0.9549, + "step": 45080 + }, + { + "epoch": 0.32639145258311797, + "grad_norm": 0.1549416184425354, + "learning_rate": 4.673615786082941e-06, + "loss": 0.9558, + "step": 45090 + }, + { + "epoch": 0.32646383924370415, + "grad_norm": 0.16568376123905182, + "learning_rate": 4.673543399422355e-06, + "loss": 0.9557, + "step": 45100 + }, + { + "epoch": 0.3265362259042904, + "grad_norm": 0.17167599499225616, + "learning_rate": 4.673471012761769e-06, + "loss": 0.9581, + "step": 45110 + }, + { + "epoch": 0.32660861256487655, + "grad_norm": 0.1696111410856247, + "learning_rate": 4.673398626101183e-06, + "loss": 0.9523, + "step": 45120 + }, + { + "epoch": 0.32668099922546273, + "grad_norm": 0.15967723727226257, + "learning_rate": 4.673326239440597e-06, + "loss": 0.9456, + "step": 45130 + }, + { + "epoch": 0.3267533858860489, + "grad_norm": 0.1622992306947708, + "learning_rate": 4.67325385278001e-06, + "loss": 0.9549, + "step": 45140 + }, + { + "epoch": 0.3268257725466351, + "grad_norm": 0.17789630591869354, + "learning_rate": 4.673181466119424e-06, + "loss": 0.9494, + "step": 45150 + }, + { + "epoch": 0.3268981592072213, + "grad_norm": 0.16522136330604553, + "learning_rate": 4.673109079458838e-06, + "loss": 0.974, + "step": 45160 + }, + { + "epoch": 0.3269705458678075, + "grad_norm": 0.17646393179893494, + "learning_rate": 4.673036692798252e-06, + "loss": 0.9629, + "step": 45170 + }, + { + "epoch": 0.32704293252839367, + "grad_norm": 0.16956348717212677, + "learning_rate": 4.672964306137666e-06, + "loss": 0.96, + "step": 45180 + }, + { + "epoch": 0.32711531918897985, + "grad_norm": 0.166047602891922, + "learning_rate": 4.672891919477079e-06, + "loss": 0.9432, + "step": 45190 + }, + { + "epoch": 0.327187705849566, + "grad_norm": 0.1728239208459854, + "learning_rate": 4.672819532816493e-06, + "loss": 0.9631, + "step": 45200 + }, + { + "epoch": 0.32726009251015226, + "grad_norm": 0.16295363008975983, + "learning_rate": 4.6727471461559065e-06, + "loss": 0.9609, + "step": 45210 + }, + { + "epoch": 0.32733247917073843, + "grad_norm": 0.16612671315670013, + "learning_rate": 4.67267475949532e-06, + "loss": 0.9499, + "step": 45220 + }, + { + "epoch": 0.3274048658313246, + "grad_norm": 0.16281329095363617, + "learning_rate": 4.6726023728347345e-06, + "loss": 0.9565, + "step": 45230 + }, + { + "epoch": 0.3274772524919108, + "grad_norm": 0.15651023387908936, + "learning_rate": 4.672529986174148e-06, + "loss": 0.9623, + "step": 45240 + }, + { + "epoch": 0.32754963915249696, + "grad_norm": 0.21686317026615143, + "learning_rate": 4.672457599513562e-06, + "loss": 0.9478, + "step": 45250 + }, + { + "epoch": 0.32762202581308314, + "grad_norm": 0.16553688049316406, + "learning_rate": 4.672385212852975e-06, + "loss": 0.9527, + "step": 45260 + }, + { + "epoch": 0.32769441247366937, + "grad_norm": 0.16378672420978546, + "learning_rate": 4.67231282619239e-06, + "loss": 0.9625, + "step": 45270 + }, + { + "epoch": 0.32776679913425555, + "grad_norm": 0.16440148651599884, + "learning_rate": 4.6722404395318035e-06, + "loss": 0.9716, + "step": 45280 + }, + { + "epoch": 0.3278391857948417, + "grad_norm": 0.17290063202381134, + "learning_rate": 4.672168052871217e-06, + "loss": 0.9489, + "step": 45290 + }, + { + "epoch": 0.3279115724554279, + "grad_norm": 0.16323043406009674, + "learning_rate": 4.672095666210631e-06, + "loss": 0.9543, + "step": 45300 + }, + { + "epoch": 0.3279839591160141, + "grad_norm": 0.3352501690387726, + "learning_rate": 4.672023279550045e-06, + "loss": 0.964, + "step": 45310 + }, + { + "epoch": 0.3280563457766003, + "grad_norm": 0.16609349846839905, + "learning_rate": 4.671950892889459e-06, + "loss": 0.9649, + "step": 45320 + }, + { + "epoch": 0.3281287324371865, + "grad_norm": 0.17989186942577362, + "learning_rate": 4.671878506228872e-06, + "loss": 0.9558, + "step": 45330 + }, + { + "epoch": 0.32820111909777266, + "grad_norm": 0.1710348278284073, + "learning_rate": 4.671806119568286e-06, + "loss": 0.9666, + "step": 45340 + }, + { + "epoch": 0.32827350575835884, + "grad_norm": 0.16703477501869202, + "learning_rate": 4.6717337329077e-06, + "loss": 0.9709, + "step": 45350 + }, + { + "epoch": 0.328345892418945, + "grad_norm": 0.16510546207427979, + "learning_rate": 4.671661346247114e-06, + "loss": 0.9561, + "step": 45360 + }, + { + "epoch": 0.32841827907953125, + "grad_norm": 0.17808891832828522, + "learning_rate": 4.671588959586528e-06, + "loss": 0.9671, + "step": 45370 + }, + { + "epoch": 0.3284906657401174, + "grad_norm": 0.15966980159282684, + "learning_rate": 4.671516572925941e-06, + "loss": 0.9631, + "step": 45380 + }, + { + "epoch": 0.3285630524007036, + "grad_norm": 0.18822209537029266, + "learning_rate": 4.671444186265355e-06, + "loss": 0.9539, + "step": 45390 + }, + { + "epoch": 0.3286354390612898, + "grad_norm": 0.16146880388259888, + "learning_rate": 4.671371799604769e-06, + "loss": 0.9603, + "step": 45400 + }, + { + "epoch": 0.32870782572187596, + "grad_norm": 0.16964489221572876, + "learning_rate": 4.671299412944183e-06, + "loss": 0.9706, + "step": 45410 + }, + { + "epoch": 0.32878021238246213, + "grad_norm": 0.17912639677524567, + "learning_rate": 4.671227026283597e-06, + "loss": 0.9671, + "step": 45420 + }, + { + "epoch": 0.32885259904304837, + "grad_norm": 0.15870171785354614, + "learning_rate": 4.67115463962301e-06, + "loss": 0.9596, + "step": 45430 + }, + { + "epoch": 0.32892498570363454, + "grad_norm": 0.16293965280056, + "learning_rate": 4.671082252962425e-06, + "loss": 0.9536, + "step": 45440 + }, + { + "epoch": 0.3289973723642207, + "grad_norm": 0.1799435019493103, + "learning_rate": 4.671009866301838e-06, + "loss": 0.953, + "step": 45450 + }, + { + "epoch": 0.3290697590248069, + "grad_norm": 0.16505300998687744, + "learning_rate": 4.670937479641252e-06, + "loss": 0.9579, + "step": 45460 + }, + { + "epoch": 0.3291421456853931, + "grad_norm": 0.1620611697435379, + "learning_rate": 4.6708650929806656e-06, + "loss": 0.9364, + "step": 45470 + }, + { + "epoch": 0.3292145323459793, + "grad_norm": 0.16515015065670013, + "learning_rate": 4.67079270632008e-06, + "loss": 0.9711, + "step": 45480 + }, + { + "epoch": 0.3292869190065655, + "grad_norm": 0.16195544600486755, + "learning_rate": 4.670720319659494e-06, + "loss": 0.9485, + "step": 45490 + }, + { + "epoch": 0.32935930566715166, + "grad_norm": 0.1693519651889801, + "learning_rate": 4.670647932998907e-06, + "loss": 0.9392, + "step": 45500 + }, + { + "epoch": 0.32943169232773784, + "grad_norm": 0.16994911432266235, + "learning_rate": 4.670575546338321e-06, + "loss": 0.9574, + "step": 45510 + }, + { + "epoch": 0.329504078988324, + "grad_norm": 0.16897796094417572, + "learning_rate": 4.670503159677735e-06, + "loss": 0.9588, + "step": 45520 + }, + { + "epoch": 0.32957646564891024, + "grad_norm": 0.1709592044353485, + "learning_rate": 4.670430773017149e-06, + "loss": 0.9397, + "step": 45530 + }, + { + "epoch": 0.3296488523094964, + "grad_norm": 0.177882120013237, + "learning_rate": 4.6703583863565626e-06, + "loss": 0.9669, + "step": 45540 + }, + { + "epoch": 0.3297212389700826, + "grad_norm": 0.18497739732265472, + "learning_rate": 4.670285999695976e-06, + "loss": 0.9451, + "step": 45550 + }, + { + "epoch": 0.3297936256306688, + "grad_norm": 0.16033156216144562, + "learning_rate": 4.670213613035391e-06, + "loss": 0.9574, + "step": 45560 + }, + { + "epoch": 0.32986601229125495, + "grad_norm": 0.1641259789466858, + "learning_rate": 4.670141226374804e-06, + "loss": 0.94, + "step": 45570 + }, + { + "epoch": 0.3299383989518411, + "grad_norm": 0.1763799786567688, + "learning_rate": 4.670068839714218e-06, + "loss": 0.9508, + "step": 45580 + }, + { + "epoch": 0.33001078561242736, + "grad_norm": 0.17956474423408508, + "learning_rate": 4.6699964530536315e-06, + "loss": 0.9598, + "step": 45590 + }, + { + "epoch": 0.33008317227301354, + "grad_norm": 0.21450495719909668, + "learning_rate": 4.669924066393046e-06, + "loss": 0.9572, + "step": 45600 + }, + { + "epoch": 0.3301555589335997, + "grad_norm": 0.1846141368150711, + "learning_rate": 4.66985167973246e-06, + "loss": 0.968, + "step": 45610 + }, + { + "epoch": 0.3302279455941859, + "grad_norm": 0.1598512977361679, + "learning_rate": 4.669779293071873e-06, + "loss": 0.9591, + "step": 45620 + }, + { + "epoch": 0.33030033225477207, + "grad_norm": 0.15021023154258728, + "learning_rate": 4.669706906411287e-06, + "loss": 0.9389, + "step": 45630 + }, + { + "epoch": 0.3303727189153583, + "grad_norm": 0.16507980227470398, + "learning_rate": 4.669634519750701e-06, + "loss": 0.9638, + "step": 45640 + }, + { + "epoch": 0.3304451055759445, + "grad_norm": 0.16119886934757233, + "learning_rate": 4.669562133090115e-06, + "loss": 0.9521, + "step": 45650 + }, + { + "epoch": 0.33051749223653065, + "grad_norm": 0.19781506061553955, + "learning_rate": 4.6694897464295285e-06, + "loss": 0.9473, + "step": 45660 + }, + { + "epoch": 0.33058987889711683, + "grad_norm": 0.1651494801044464, + "learning_rate": 4.669417359768942e-06, + "loss": 0.9482, + "step": 45670 + }, + { + "epoch": 0.330662265557703, + "grad_norm": 0.16081689298152924, + "learning_rate": 4.669344973108357e-06, + "loss": 0.9678, + "step": 45680 + }, + { + "epoch": 0.33073465221828924, + "grad_norm": 0.1630435436964035, + "learning_rate": 4.66927258644777e-06, + "loss": 0.9627, + "step": 45690 + }, + { + "epoch": 0.3308070388788754, + "grad_norm": 0.2068457007408142, + "learning_rate": 4.669200199787184e-06, + "loss": 0.9521, + "step": 45700 + }, + { + "epoch": 0.3308794255394616, + "grad_norm": 0.168662890791893, + "learning_rate": 4.6691278131265974e-06, + "loss": 0.9453, + "step": 45710 + }, + { + "epoch": 0.33095181220004777, + "grad_norm": 0.18764568865299225, + "learning_rate": 4.669055426466011e-06, + "loss": 0.9483, + "step": 45720 + }, + { + "epoch": 0.33102419886063394, + "grad_norm": 0.1561020016670227, + "learning_rate": 4.668983039805425e-06, + "loss": 0.9565, + "step": 45730 + }, + { + "epoch": 0.3310965855212202, + "grad_norm": 0.22187693417072296, + "learning_rate": 4.668910653144838e-06, + "loss": 0.9594, + "step": 45740 + }, + { + "epoch": 0.33116897218180635, + "grad_norm": 0.16155502200126648, + "learning_rate": 4.668838266484253e-06, + "loss": 0.9664, + "step": 45750 + }, + { + "epoch": 0.33124135884239253, + "grad_norm": 0.1851801574230194, + "learning_rate": 4.668765879823666e-06, + "loss": 0.9517, + "step": 45760 + }, + { + "epoch": 0.3313137455029787, + "grad_norm": 0.16832536458969116, + "learning_rate": 4.66869349316308e-06, + "loss": 0.9532, + "step": 45770 + }, + { + "epoch": 0.3313861321635649, + "grad_norm": 0.17694327235221863, + "learning_rate": 4.668621106502494e-06, + "loss": 0.9466, + "step": 45780 + }, + { + "epoch": 0.33145851882415106, + "grad_norm": 0.16291755437850952, + "learning_rate": 4.668548719841908e-06, + "loss": 0.9544, + "step": 45790 + }, + { + "epoch": 0.3315309054847373, + "grad_norm": 0.1645163744688034, + "learning_rate": 4.668476333181322e-06, + "loss": 0.9519, + "step": 45800 + }, + { + "epoch": 0.33160329214532347, + "grad_norm": 0.1698862910270691, + "learning_rate": 4.668403946520735e-06, + "loss": 0.9583, + "step": 45810 + }, + { + "epoch": 0.33167567880590965, + "grad_norm": 0.16201475262641907, + "learning_rate": 4.668331559860149e-06, + "loss": 0.9518, + "step": 45820 + }, + { + "epoch": 0.3317480654664958, + "grad_norm": 0.2548742890357971, + "learning_rate": 4.668259173199563e-06, + "loss": 0.9571, + "step": 45830 + }, + { + "epoch": 0.331820452127082, + "grad_norm": 0.17558850347995758, + "learning_rate": 4.668186786538977e-06, + "loss": 0.9502, + "step": 45840 + }, + { + "epoch": 0.33189283878766823, + "grad_norm": 0.2049540877342224, + "learning_rate": 4.668114399878391e-06, + "loss": 0.9686, + "step": 45850 + }, + { + "epoch": 0.3319652254482544, + "grad_norm": 0.15438438951969147, + "learning_rate": 4.668042013217804e-06, + "loss": 0.9603, + "step": 45860 + }, + { + "epoch": 0.3320376121088406, + "grad_norm": 0.15590500831604004, + "learning_rate": 4.667969626557219e-06, + "loss": 0.9566, + "step": 45870 + }, + { + "epoch": 0.33210999876942676, + "grad_norm": 0.15624962747097015, + "learning_rate": 4.667897239896632e-06, + "loss": 0.9586, + "step": 45880 + }, + { + "epoch": 0.33218238543001294, + "grad_norm": 0.15918295085430145, + "learning_rate": 4.667824853236046e-06, + "loss": 0.951, + "step": 45890 + }, + { + "epoch": 0.33225477209059917, + "grad_norm": 0.16058406233787537, + "learning_rate": 4.6677524665754595e-06, + "loss": 0.958, + "step": 45900 + }, + { + "epoch": 0.33232715875118535, + "grad_norm": 0.1862640231847763, + "learning_rate": 4.667680079914874e-06, + "loss": 0.9443, + "step": 45910 + }, + { + "epoch": 0.3323995454117715, + "grad_norm": 0.1649313122034073, + "learning_rate": 4.667607693254288e-06, + "loss": 0.9523, + "step": 45920 + }, + { + "epoch": 0.3324719320723577, + "grad_norm": 0.17208698391914368, + "learning_rate": 4.667535306593701e-06, + "loss": 0.9776, + "step": 45930 + }, + { + "epoch": 0.3325443187329439, + "grad_norm": 0.19187243282794952, + "learning_rate": 4.667462919933115e-06, + "loss": 0.9472, + "step": 45940 + }, + { + "epoch": 0.33261670539353005, + "grad_norm": 0.17830708622932434, + "learning_rate": 4.667390533272529e-06, + "loss": 0.9662, + "step": 45950 + }, + { + "epoch": 0.3326890920541163, + "grad_norm": 0.16068105399608612, + "learning_rate": 4.667318146611943e-06, + "loss": 0.9581, + "step": 45960 + }, + { + "epoch": 0.33276147871470246, + "grad_norm": 0.15991735458374023, + "learning_rate": 4.6672457599513565e-06, + "loss": 0.9495, + "step": 45970 + }, + { + "epoch": 0.33283386537528864, + "grad_norm": 0.20799915492534637, + "learning_rate": 4.66717337329077e-06, + "loss": 0.9464, + "step": 45980 + }, + { + "epoch": 0.3329062520358748, + "grad_norm": 0.16113980114459991, + "learning_rate": 4.667100986630184e-06, + "loss": 0.9635, + "step": 45990 + }, + { + "epoch": 0.332978638696461, + "grad_norm": 0.20556610822677612, + "learning_rate": 4.667028599969598e-06, + "loss": 0.9714, + "step": 46000 + }, + { + "epoch": 0.3330510253570472, + "grad_norm": 0.17026638984680176, + "learning_rate": 4.666956213309012e-06, + "loss": 0.9572, + "step": 46010 + }, + { + "epoch": 0.3331234120176334, + "grad_norm": 0.1647808998823166, + "learning_rate": 4.6668838266484255e-06, + "loss": 0.9645, + "step": 46020 + }, + { + "epoch": 0.3331957986782196, + "grad_norm": 0.17708559334278107, + "learning_rate": 4.666811439987839e-06, + "loss": 0.9605, + "step": 46030 + }, + { + "epoch": 0.33326818533880576, + "grad_norm": 0.17343778908252716, + "learning_rate": 4.6667390533272535e-06, + "loss": 0.9694, + "step": 46040 + }, + { + "epoch": 0.33334057199939193, + "grad_norm": 0.18600593507289886, + "learning_rate": 4.666666666666667e-06, + "loss": 0.9542, + "step": 46050 + }, + { + "epoch": 0.33341295865997816, + "grad_norm": 0.1633295863866806, + "learning_rate": 4.666594280006081e-06, + "loss": 0.9564, + "step": 46060 + }, + { + "epoch": 0.33348534532056434, + "grad_norm": 0.16525378823280334, + "learning_rate": 4.666521893345494e-06, + "loss": 0.9482, + "step": 46070 + }, + { + "epoch": 0.3335577319811505, + "grad_norm": 0.16414770483970642, + "learning_rate": 4.666449506684909e-06, + "loss": 0.9537, + "step": 46080 + }, + { + "epoch": 0.3336301186417367, + "grad_norm": 0.1624254733324051, + "learning_rate": 4.6663771200243225e-06, + "loss": 0.9482, + "step": 46090 + }, + { + "epoch": 0.33370250530232287, + "grad_norm": 0.18863092362880707, + "learning_rate": 4.666304733363736e-06, + "loss": 0.9574, + "step": 46100 + }, + { + "epoch": 0.33377489196290905, + "grad_norm": 0.16010300815105438, + "learning_rate": 4.66623234670315e-06, + "loss": 0.9503, + "step": 46110 + }, + { + "epoch": 0.3338472786234953, + "grad_norm": 0.16204185783863068, + "learning_rate": 4.666159960042564e-06, + "loss": 0.9577, + "step": 46120 + }, + { + "epoch": 0.33391966528408146, + "grad_norm": 0.16618123650550842, + "learning_rate": 4.666087573381978e-06, + "loss": 0.9709, + "step": 46130 + }, + { + "epoch": 0.33399205194466763, + "grad_norm": 0.1760234236717224, + "learning_rate": 4.666015186721391e-06, + "loss": 0.9694, + "step": 46140 + }, + { + "epoch": 0.3340644386052538, + "grad_norm": 0.26092368364334106, + "learning_rate": 4.665942800060805e-06, + "loss": 0.9568, + "step": 46150 + }, + { + "epoch": 0.33413682526584, + "grad_norm": 0.17464442551136017, + "learning_rate": 4.6658704134002195e-06, + "loss": 0.9586, + "step": 46160 + }, + { + "epoch": 0.3342092119264262, + "grad_norm": 0.15855874121189117, + "learning_rate": 4.665798026739633e-06, + "loss": 0.9455, + "step": 46170 + }, + { + "epoch": 0.3342815985870124, + "grad_norm": 0.1912544220685959, + "learning_rate": 4.665725640079047e-06, + "loss": 0.9646, + "step": 46180 + }, + { + "epoch": 0.3343539852475986, + "grad_norm": 0.1546599417924881, + "learning_rate": 4.66565325341846e-06, + "loss": 0.9577, + "step": 46190 + }, + { + "epoch": 0.33442637190818475, + "grad_norm": 0.17187544703483582, + "learning_rate": 4.665580866757875e-06, + "loss": 0.9353, + "step": 46200 + }, + { + "epoch": 0.3344987585687709, + "grad_norm": 0.17121565341949463, + "learning_rate": 4.665508480097288e-06, + "loss": 0.9612, + "step": 46210 + }, + { + "epoch": 0.33457114522935716, + "grad_norm": 0.15924374759197235, + "learning_rate": 4.665436093436702e-06, + "loss": 0.9596, + "step": 46220 + }, + { + "epoch": 0.33464353188994334, + "grad_norm": 0.19250038266181946, + "learning_rate": 4.665363706776116e-06, + "loss": 0.9633, + "step": 46230 + }, + { + "epoch": 0.3347159185505295, + "grad_norm": 0.17079396545886993, + "learning_rate": 4.66529132011553e-06, + "loss": 0.9491, + "step": 46240 + }, + { + "epoch": 0.3347883052111157, + "grad_norm": 0.19403786957263947, + "learning_rate": 4.665218933454943e-06, + "loss": 0.9462, + "step": 46250 + }, + { + "epoch": 0.33486069187170187, + "grad_norm": 0.18236984312534332, + "learning_rate": 4.6651465467943565e-06, + "loss": 0.9459, + "step": 46260 + }, + { + "epoch": 0.3349330785322881, + "grad_norm": 0.14901652932167053, + "learning_rate": 4.665074160133771e-06, + "loss": 0.9659, + "step": 46270 + }, + { + "epoch": 0.3350054651928743, + "grad_norm": 0.17317718267440796, + "learning_rate": 4.6650017734731846e-06, + "loss": 0.9512, + "step": 46280 + }, + { + "epoch": 0.33507785185346045, + "grad_norm": 0.16543887555599213, + "learning_rate": 4.664929386812598e-06, + "loss": 0.9405, + "step": 46290 + }, + { + "epoch": 0.33515023851404663, + "grad_norm": 0.16926343739032745, + "learning_rate": 4.664857000152012e-06, + "loss": 0.9575, + "step": 46300 + }, + { + "epoch": 0.3352226251746328, + "grad_norm": 0.16885744035243988, + "learning_rate": 4.664784613491426e-06, + "loss": 0.9496, + "step": 46310 + }, + { + "epoch": 0.335295011835219, + "grad_norm": 0.15920351445674896, + "learning_rate": 4.66471222683084e-06, + "loss": 0.9579, + "step": 46320 + }, + { + "epoch": 0.3353673984958052, + "grad_norm": 0.15638776123523712, + "learning_rate": 4.6646398401702535e-06, + "loss": 0.9599, + "step": 46330 + }, + { + "epoch": 0.3354397851563914, + "grad_norm": 0.15919549763202667, + "learning_rate": 4.664567453509667e-06, + "loss": 0.953, + "step": 46340 + }, + { + "epoch": 0.33551217181697757, + "grad_norm": 0.1604597270488739, + "learning_rate": 4.664495066849082e-06, + "loss": 0.9597, + "step": 46350 + }, + { + "epoch": 0.33558455847756374, + "grad_norm": 0.19076739251613617, + "learning_rate": 4.664422680188495e-06, + "loss": 0.9795, + "step": 46360 + }, + { + "epoch": 0.3356569451381499, + "grad_norm": 0.15637139976024628, + "learning_rate": 4.664350293527909e-06, + "loss": 0.9549, + "step": 46370 + }, + { + "epoch": 0.33572933179873615, + "grad_norm": 0.17368392646312714, + "learning_rate": 4.6642779068673224e-06, + "loss": 0.9644, + "step": 46380 + }, + { + "epoch": 0.33580171845932233, + "grad_norm": 0.1643470972776413, + "learning_rate": 4.664205520206737e-06, + "loss": 0.9517, + "step": 46390 + }, + { + "epoch": 0.3358741051199085, + "grad_norm": 0.17304439842700958, + "learning_rate": 4.6641331335461505e-06, + "loss": 0.9686, + "step": 46400 + }, + { + "epoch": 0.3359464917804947, + "grad_norm": 0.15978851914405823, + "learning_rate": 4.664060746885564e-06, + "loss": 0.9658, + "step": 46410 + }, + { + "epoch": 0.33601887844108086, + "grad_norm": 0.16269192099571228, + "learning_rate": 4.663988360224978e-06, + "loss": 0.9419, + "step": 46420 + }, + { + "epoch": 0.3360912651016671, + "grad_norm": 0.16838060319423676, + "learning_rate": 4.663915973564392e-06, + "loss": 0.9444, + "step": 46430 + }, + { + "epoch": 0.33616365176225327, + "grad_norm": 0.1612749993801117, + "learning_rate": 4.663843586903806e-06, + "loss": 0.9599, + "step": 46440 + }, + { + "epoch": 0.33623603842283944, + "grad_norm": 0.1748184859752655, + "learning_rate": 4.6637712002432194e-06, + "loss": 0.9482, + "step": 46450 + }, + { + "epoch": 0.3363084250834256, + "grad_norm": 0.1573769748210907, + "learning_rate": 4.663698813582633e-06, + "loss": 0.9552, + "step": 46460 + }, + { + "epoch": 0.3363808117440118, + "grad_norm": 0.16278138756752014, + "learning_rate": 4.6636264269220475e-06, + "loss": 0.9531, + "step": 46470 + }, + { + "epoch": 0.336453198404598, + "grad_norm": 0.19514703750610352, + "learning_rate": 4.663554040261461e-06, + "loss": 0.9648, + "step": 46480 + }, + { + "epoch": 0.3365255850651842, + "grad_norm": 0.15883156657218933, + "learning_rate": 4.663481653600875e-06, + "loss": 0.9489, + "step": 46490 + }, + { + "epoch": 0.3365979717257704, + "grad_norm": 0.20735056698322296, + "learning_rate": 4.663409266940288e-06, + "loss": 0.9552, + "step": 46500 + }, + { + "epoch": 0.33667035838635656, + "grad_norm": 0.15435609221458435, + "learning_rate": 4.663336880279703e-06, + "loss": 0.9573, + "step": 46510 + }, + { + "epoch": 0.33674274504694274, + "grad_norm": 0.15631069242954254, + "learning_rate": 4.6632644936191164e-06, + "loss": 0.9501, + "step": 46520 + }, + { + "epoch": 0.3368151317075289, + "grad_norm": 0.17219269275665283, + "learning_rate": 4.66319210695853e-06, + "loss": 0.9483, + "step": 46530 + }, + { + "epoch": 0.33688751836811515, + "grad_norm": 0.19126524031162262, + "learning_rate": 4.663119720297944e-06, + "loss": 0.9449, + "step": 46540 + }, + { + "epoch": 0.3369599050287013, + "grad_norm": 0.16317123174667358, + "learning_rate": 4.663047333637358e-06, + "loss": 0.9521, + "step": 46550 + }, + { + "epoch": 0.3370322916892875, + "grad_norm": 0.15335845947265625, + "learning_rate": 4.662974946976772e-06, + "loss": 0.9538, + "step": 46560 + }, + { + "epoch": 0.3371046783498737, + "grad_norm": 0.1824522316455841, + "learning_rate": 4.662902560316185e-06, + "loss": 0.9511, + "step": 46570 + }, + { + "epoch": 0.33717706501045985, + "grad_norm": 0.16539111733436584, + "learning_rate": 4.662830173655599e-06, + "loss": 0.9501, + "step": 46580 + }, + { + "epoch": 0.3372494516710461, + "grad_norm": 0.16969068348407745, + "learning_rate": 4.6627577869950134e-06, + "loss": 0.9363, + "step": 46590 + }, + { + "epoch": 0.33732183833163226, + "grad_norm": 0.15810436010360718, + "learning_rate": 4.662685400334427e-06, + "loss": 0.9563, + "step": 46600 + }, + { + "epoch": 0.33739422499221844, + "grad_norm": 0.1647157222032547, + "learning_rate": 4.662613013673841e-06, + "loss": 0.9552, + "step": 46610 + }, + { + "epoch": 0.3374666116528046, + "grad_norm": 0.1537630707025528, + "learning_rate": 4.662540627013254e-06, + "loss": 0.956, + "step": 46620 + }, + { + "epoch": 0.3375389983133908, + "grad_norm": 0.16150793433189392, + "learning_rate": 4.662468240352668e-06, + "loss": 0.9567, + "step": 46630 + }, + { + "epoch": 0.33761138497397697, + "grad_norm": 0.17372149229049683, + "learning_rate": 4.662395853692082e-06, + "loss": 0.9595, + "step": 46640 + }, + { + "epoch": 0.3376837716345632, + "grad_norm": 0.18082787096500397, + "learning_rate": 4.662323467031496e-06, + "loss": 0.9577, + "step": 46650 + }, + { + "epoch": 0.3377561582951494, + "grad_norm": 0.17019541561603546, + "learning_rate": 4.66225108037091e-06, + "loss": 0.9532, + "step": 46660 + }, + { + "epoch": 0.33782854495573555, + "grad_norm": 0.15364877879619598, + "learning_rate": 4.662178693710323e-06, + "loss": 0.9584, + "step": 46670 + }, + { + "epoch": 0.33790093161632173, + "grad_norm": 0.15855498611927032, + "learning_rate": 4.662106307049738e-06, + "loss": 0.9702, + "step": 46680 + }, + { + "epoch": 0.3379733182769079, + "grad_norm": 0.23772907257080078, + "learning_rate": 4.662033920389151e-06, + "loss": 0.9503, + "step": 46690 + }, + { + "epoch": 0.33804570493749414, + "grad_norm": 0.16535037755966187, + "learning_rate": 4.661961533728565e-06, + "loss": 0.9469, + "step": 46700 + }, + { + "epoch": 0.3381180915980803, + "grad_norm": 0.17086680233478546, + "learning_rate": 4.6618891470679785e-06, + "loss": 0.9591, + "step": 46710 + }, + { + "epoch": 0.3381904782586665, + "grad_norm": 0.16868987679481506, + "learning_rate": 4.661816760407393e-06, + "loss": 0.9501, + "step": 46720 + }, + { + "epoch": 0.33826286491925267, + "grad_norm": 0.16112284362316132, + "learning_rate": 4.661744373746807e-06, + "loss": 0.9581, + "step": 46730 + }, + { + "epoch": 0.33833525157983885, + "grad_norm": 0.17157232761383057, + "learning_rate": 4.66167198708622e-06, + "loss": 0.9574, + "step": 46740 + }, + { + "epoch": 0.3384076382404251, + "grad_norm": 0.16588850319385529, + "learning_rate": 4.661599600425634e-06, + "loss": 0.9397, + "step": 46750 + }, + { + "epoch": 0.33848002490101126, + "grad_norm": 0.16449251770973206, + "learning_rate": 4.661527213765048e-06, + "loss": 0.9504, + "step": 46760 + }, + { + "epoch": 0.33855241156159743, + "grad_norm": 0.17265821993350983, + "learning_rate": 4.661454827104462e-06, + "loss": 0.9542, + "step": 46770 + }, + { + "epoch": 0.3386247982221836, + "grad_norm": 0.1667519360780716, + "learning_rate": 4.661382440443875e-06, + "loss": 0.9454, + "step": 46780 + }, + { + "epoch": 0.3386971848827698, + "grad_norm": 0.16387122869491577, + "learning_rate": 4.661310053783289e-06, + "loss": 0.9588, + "step": 46790 + }, + { + "epoch": 0.33876957154335596, + "grad_norm": 0.15918706357479095, + "learning_rate": 4.661237667122703e-06, + "loss": 0.9506, + "step": 46800 + }, + { + "epoch": 0.3388419582039422, + "grad_norm": 0.18269628286361694, + "learning_rate": 4.661165280462116e-06, + "loss": 0.9541, + "step": 46810 + }, + { + "epoch": 0.33891434486452837, + "grad_norm": 0.16732244193553925, + "learning_rate": 4.66109289380153e-06, + "loss": 0.9683, + "step": 46820 + }, + { + "epoch": 0.33898673152511455, + "grad_norm": 0.17757059633731842, + "learning_rate": 4.6610205071409445e-06, + "loss": 0.9507, + "step": 46830 + }, + { + "epoch": 0.3390591181857007, + "grad_norm": 0.16032300889492035, + "learning_rate": 4.660948120480358e-06, + "loss": 0.9469, + "step": 46840 + }, + { + "epoch": 0.3391315048462869, + "grad_norm": 0.1686878800392151, + "learning_rate": 4.660875733819772e-06, + "loss": 0.9517, + "step": 46850 + }, + { + "epoch": 0.33920389150687313, + "grad_norm": 0.24848999083042145, + "learning_rate": 4.660803347159185e-06, + "loss": 0.9647, + "step": 46860 + }, + { + "epoch": 0.3392762781674593, + "grad_norm": 0.16146300733089447, + "learning_rate": 4.6607309604986e-06, + "loss": 0.969, + "step": 46870 + }, + { + "epoch": 0.3393486648280455, + "grad_norm": 0.17109398543834686, + "learning_rate": 4.660658573838013e-06, + "loss": 0.9475, + "step": 46880 + }, + { + "epoch": 0.33942105148863166, + "grad_norm": 0.16845859587192535, + "learning_rate": 4.660586187177427e-06, + "loss": 0.9659, + "step": 46890 + }, + { + "epoch": 0.33949343814921784, + "grad_norm": 0.17198343575000763, + "learning_rate": 4.660513800516841e-06, + "loss": 0.951, + "step": 46900 + }, + { + "epoch": 0.3395658248098041, + "grad_norm": 0.1685718148946762, + "learning_rate": 4.660441413856255e-06, + "loss": 0.9511, + "step": 46910 + }, + { + "epoch": 0.33963821147039025, + "grad_norm": 0.17462746798992157, + "learning_rate": 4.660369027195669e-06, + "loss": 0.9518, + "step": 46920 + }, + { + "epoch": 0.3397105981309764, + "grad_norm": 0.15987993776798248, + "learning_rate": 4.660296640535082e-06, + "loss": 0.9521, + "step": 46930 + }, + { + "epoch": 0.3397829847915626, + "grad_norm": 0.16598616540431976, + "learning_rate": 4.660224253874496e-06, + "loss": 0.9543, + "step": 46940 + }, + { + "epoch": 0.3398553714521488, + "grad_norm": 0.16702087223529816, + "learning_rate": 4.66015186721391e-06, + "loss": 0.9475, + "step": 46950 + }, + { + "epoch": 0.339927758112735, + "grad_norm": 0.20819000899791718, + "learning_rate": 4.660079480553324e-06, + "loss": 0.9534, + "step": 46960 + }, + { + "epoch": 0.3400001447733212, + "grad_norm": 0.1900683492422104, + "learning_rate": 4.660007093892738e-06, + "loss": 0.9521, + "step": 46970 + }, + { + "epoch": 0.34007253143390737, + "grad_norm": 0.15491485595703125, + "learning_rate": 4.659934707232151e-06, + "loss": 0.9561, + "step": 46980 + }, + { + "epoch": 0.34014491809449354, + "grad_norm": 0.17071206867694855, + "learning_rate": 4.659862320571566e-06, + "loss": 0.9486, + "step": 46990 + }, + { + "epoch": 0.3402173047550797, + "grad_norm": 0.17024581134319305, + "learning_rate": 4.659789933910979e-06, + "loss": 0.9481, + "step": 47000 + }, + { + "epoch": 0.3402896914156659, + "grad_norm": 0.1717842072248459, + "learning_rate": 4.659717547250393e-06, + "loss": 0.9476, + "step": 47010 + }, + { + "epoch": 0.34036207807625213, + "grad_norm": 0.16945697367191315, + "learning_rate": 4.6596451605898066e-06, + "loss": 0.9598, + "step": 47020 + }, + { + "epoch": 0.3404344647368383, + "grad_norm": 0.1646365374326706, + "learning_rate": 4.659572773929221e-06, + "loss": 0.9447, + "step": 47030 + }, + { + "epoch": 0.3405068513974245, + "grad_norm": 0.1725275218486786, + "learning_rate": 4.659500387268635e-06, + "loss": 0.9625, + "step": 47040 + }, + { + "epoch": 0.34057923805801066, + "grad_norm": 0.15354198217391968, + "learning_rate": 4.659428000608048e-06, + "loss": 0.9535, + "step": 47050 + }, + { + "epoch": 0.34065162471859683, + "grad_norm": 0.15781398117542267, + "learning_rate": 4.659355613947462e-06, + "loss": 0.9584, + "step": 47060 + }, + { + "epoch": 0.34072401137918307, + "grad_norm": 0.22969284653663635, + "learning_rate": 4.659283227286876e-06, + "loss": 0.9555, + "step": 47070 + }, + { + "epoch": 0.34079639803976924, + "grad_norm": 0.16445045173168182, + "learning_rate": 4.65921084062629e-06, + "loss": 0.9676, + "step": 47080 + }, + { + "epoch": 0.3408687847003554, + "grad_norm": 0.16403111815452576, + "learning_rate": 4.6591384539657036e-06, + "loss": 0.9633, + "step": 47090 + }, + { + "epoch": 0.3409411713609416, + "grad_norm": 0.17574258148670197, + "learning_rate": 4.659066067305117e-06, + "loss": 0.94, + "step": 47100 + }, + { + "epoch": 0.3410135580215278, + "grad_norm": 0.16448186337947845, + "learning_rate": 4.658993680644532e-06, + "loss": 0.9693, + "step": 47110 + }, + { + "epoch": 0.341085944682114, + "grad_norm": 0.15972661972045898, + "learning_rate": 4.658921293983945e-06, + "loss": 0.9592, + "step": 47120 + }, + { + "epoch": 0.3411583313427002, + "grad_norm": 0.15439611673355103, + "learning_rate": 4.658848907323359e-06, + "loss": 0.9685, + "step": 47130 + }, + { + "epoch": 0.34123071800328636, + "grad_norm": 0.1713487207889557, + "learning_rate": 4.6587765206627725e-06, + "loss": 0.9415, + "step": 47140 + }, + { + "epoch": 0.34130310466387254, + "grad_norm": 0.15895883738994598, + "learning_rate": 4.658704134002187e-06, + "loss": 0.9427, + "step": 47150 + }, + { + "epoch": 0.3413754913244587, + "grad_norm": 0.15406803786754608, + "learning_rate": 4.658631747341601e-06, + "loss": 0.9464, + "step": 47160 + }, + { + "epoch": 0.3414478779850449, + "grad_norm": 0.1744023710489273, + "learning_rate": 4.658559360681014e-06, + "loss": 0.9538, + "step": 47170 + }, + { + "epoch": 0.3415202646456311, + "grad_norm": 0.16929666697978973, + "learning_rate": 4.658486974020428e-06, + "loss": 0.9457, + "step": 47180 + }, + { + "epoch": 0.3415926513062173, + "grad_norm": 0.1510300189256668, + "learning_rate": 4.658414587359842e-06, + "loss": 0.9548, + "step": 47190 + }, + { + "epoch": 0.3416650379668035, + "grad_norm": 0.1694670170545578, + "learning_rate": 4.658342200699256e-06, + "loss": 0.9565, + "step": 47200 + }, + { + "epoch": 0.34173742462738965, + "grad_norm": 0.1610107421875, + "learning_rate": 4.6582698140386695e-06, + "loss": 0.9569, + "step": 47210 + }, + { + "epoch": 0.34180981128797583, + "grad_norm": 0.1698133498430252, + "learning_rate": 4.658197427378083e-06, + "loss": 0.9563, + "step": 47220 + }, + { + "epoch": 0.34188219794856206, + "grad_norm": 0.17170600593090057, + "learning_rate": 4.658125040717497e-06, + "loss": 0.9611, + "step": 47230 + }, + { + "epoch": 0.34195458460914824, + "grad_norm": 0.1630273014307022, + "learning_rate": 4.658052654056911e-06, + "loss": 0.9517, + "step": 47240 + }, + { + "epoch": 0.3420269712697344, + "grad_norm": 0.1633155643939972, + "learning_rate": 4.657980267396325e-06, + "loss": 0.9635, + "step": 47250 + }, + { + "epoch": 0.3420993579303206, + "grad_norm": 0.1674155741930008, + "learning_rate": 4.6579078807357384e-06, + "loss": 0.944, + "step": 47260 + }, + { + "epoch": 0.34217174459090677, + "grad_norm": 0.18419358134269714, + "learning_rate": 4.657835494075152e-06, + "loss": 0.9554, + "step": 47270 + }, + { + "epoch": 0.342244131251493, + "grad_norm": 0.17149895429611206, + "learning_rate": 4.6577631074145665e-06, + "loss": 0.9533, + "step": 47280 + }, + { + "epoch": 0.3423165179120792, + "grad_norm": 0.15570290386676788, + "learning_rate": 4.65769072075398e-06, + "loss": 0.9394, + "step": 47290 + }, + { + "epoch": 0.34238890457266535, + "grad_norm": 0.1557348072528839, + "learning_rate": 4.657618334093394e-06, + "loss": 0.9467, + "step": 47300 + }, + { + "epoch": 0.34246129123325153, + "grad_norm": 0.16203594207763672, + "learning_rate": 4.657545947432807e-06, + "loss": 0.9445, + "step": 47310 + }, + { + "epoch": 0.3425336778938377, + "grad_norm": 0.20134076476097107, + "learning_rate": 4.657473560772221e-06, + "loss": 0.948, + "step": 47320 + }, + { + "epoch": 0.3426060645544239, + "grad_norm": 0.1600342094898224, + "learning_rate": 4.657401174111635e-06, + "loss": 0.9633, + "step": 47330 + }, + { + "epoch": 0.3426784512150101, + "grad_norm": 0.17118972539901733, + "learning_rate": 4.657328787451049e-06, + "loss": 0.9599, + "step": 47340 + }, + { + "epoch": 0.3427508378755963, + "grad_norm": 0.15433147549629211, + "learning_rate": 4.657256400790463e-06, + "loss": 0.9496, + "step": 47350 + }, + { + "epoch": 0.34282322453618247, + "grad_norm": 0.1639435738325119, + "learning_rate": 4.657184014129876e-06, + "loss": 0.9455, + "step": 47360 + }, + { + "epoch": 0.34289561119676865, + "grad_norm": 0.1590513288974762, + "learning_rate": 4.65711162746929e-06, + "loss": 0.968, + "step": 47370 + }, + { + "epoch": 0.3429679978573548, + "grad_norm": 0.15320120751857758, + "learning_rate": 4.657039240808704e-06, + "loss": 0.9613, + "step": 47380 + }, + { + "epoch": 0.34304038451794105, + "grad_norm": 0.1623704880475998, + "learning_rate": 4.656966854148118e-06, + "loss": 0.949, + "step": 47390 + }, + { + "epoch": 0.34311277117852723, + "grad_norm": 0.28263241052627563, + "learning_rate": 4.656894467487532e-06, + "loss": 0.9426, + "step": 47400 + }, + { + "epoch": 0.3431851578391134, + "grad_norm": 0.1681540608406067, + "learning_rate": 4.656822080826945e-06, + "loss": 0.9484, + "step": 47410 + }, + { + "epoch": 0.3432575444996996, + "grad_norm": 0.17275528609752655, + "learning_rate": 4.656749694166359e-06, + "loss": 0.9456, + "step": 47420 + }, + { + "epoch": 0.34332993116028576, + "grad_norm": 0.1764938235282898, + "learning_rate": 4.656677307505773e-06, + "loss": 0.9555, + "step": 47430 + }, + { + "epoch": 0.343402317820872, + "grad_norm": 0.18242251873016357, + "learning_rate": 4.656604920845187e-06, + "loss": 0.9464, + "step": 47440 + }, + { + "epoch": 0.34347470448145817, + "grad_norm": 0.1684602051973343, + "learning_rate": 4.6565325341846005e-06, + "loss": 0.9521, + "step": 47450 + }, + { + "epoch": 0.34354709114204435, + "grad_norm": 0.448024719953537, + "learning_rate": 4.656460147524014e-06, + "loss": 0.9466, + "step": 47460 + }, + { + "epoch": 0.3436194778026305, + "grad_norm": 0.20128124952316284, + "learning_rate": 4.656387760863429e-06, + "loss": 0.9388, + "step": 47470 + }, + { + "epoch": 0.3436918644632167, + "grad_norm": 0.18526539206504822, + "learning_rate": 4.656315374202842e-06, + "loss": 0.9565, + "step": 47480 + }, + { + "epoch": 0.34376425112380293, + "grad_norm": 0.1590823531150818, + "learning_rate": 4.656242987542256e-06, + "loss": 0.9447, + "step": 47490 + }, + { + "epoch": 0.3438366377843891, + "grad_norm": 0.21153424680233002, + "learning_rate": 4.6561706008816695e-06, + "loss": 0.9503, + "step": 47500 + }, + { + "epoch": 0.3439090244449753, + "grad_norm": 0.18983012437820435, + "learning_rate": 4.656098214221084e-06, + "loss": 0.9614, + "step": 47510 + }, + { + "epoch": 0.34398141110556146, + "grad_norm": 0.1883876472711563, + "learning_rate": 4.6560258275604975e-06, + "loss": 0.9572, + "step": 47520 + }, + { + "epoch": 0.34405379776614764, + "grad_norm": 0.15937159955501556, + "learning_rate": 4.655953440899911e-06, + "loss": 0.9579, + "step": 47530 + }, + { + "epoch": 0.3441261844267338, + "grad_norm": 0.17712438106536865, + "learning_rate": 4.655881054239325e-06, + "loss": 0.9693, + "step": 47540 + }, + { + "epoch": 0.34419857108732005, + "grad_norm": 0.17279085516929626, + "learning_rate": 4.655808667578739e-06, + "loss": 0.9572, + "step": 47550 + }, + { + "epoch": 0.3442709577479062, + "grad_norm": 0.16314037144184113, + "learning_rate": 4.655736280918153e-06, + "loss": 0.9495, + "step": 47560 + }, + { + "epoch": 0.3443433444084924, + "grad_norm": 0.1699339896440506, + "learning_rate": 4.6556638942575665e-06, + "loss": 0.9369, + "step": 47570 + }, + { + "epoch": 0.3444157310690786, + "grad_norm": 0.16625383496284485, + "learning_rate": 4.65559150759698e-06, + "loss": 0.9402, + "step": 47580 + }, + { + "epoch": 0.34448811772966476, + "grad_norm": 0.15905995666980743, + "learning_rate": 4.6555191209363946e-06, + "loss": 0.96, + "step": 47590 + }, + { + "epoch": 0.344560504390251, + "grad_norm": 0.1678093671798706, + "learning_rate": 4.655446734275808e-06, + "loss": 0.9517, + "step": 47600 + }, + { + "epoch": 0.34463289105083716, + "grad_norm": 0.1573289930820465, + "learning_rate": 4.655374347615222e-06, + "loss": 0.9514, + "step": 47610 + }, + { + "epoch": 0.34470527771142334, + "grad_norm": 0.1935252696275711, + "learning_rate": 4.655301960954635e-06, + "loss": 0.9646, + "step": 47620 + }, + { + "epoch": 0.3447776643720095, + "grad_norm": 0.16098780930042267, + "learning_rate": 4.65522957429405e-06, + "loss": 0.9546, + "step": 47630 + }, + { + "epoch": 0.3448500510325957, + "grad_norm": 0.1690308153629303, + "learning_rate": 4.6551571876334635e-06, + "loss": 0.9697, + "step": 47640 + }, + { + "epoch": 0.3449224376931819, + "grad_norm": 0.159522145986557, + "learning_rate": 4.655084800972877e-06, + "loss": 0.9472, + "step": 47650 + }, + { + "epoch": 0.3449948243537681, + "grad_norm": 0.2347215861082077, + "learning_rate": 4.655012414312291e-06, + "loss": 0.9475, + "step": 47660 + }, + { + "epoch": 0.3450672110143543, + "grad_norm": 0.16307930648326874, + "learning_rate": 4.654940027651705e-06, + "loss": 0.9627, + "step": 47670 + }, + { + "epoch": 0.34513959767494046, + "grad_norm": 0.4456617832183838, + "learning_rate": 4.654867640991119e-06, + "loss": 0.9653, + "step": 47680 + }, + { + "epoch": 0.34521198433552663, + "grad_norm": 0.19592955708503723, + "learning_rate": 4.654795254330532e-06, + "loss": 0.958, + "step": 47690 + }, + { + "epoch": 0.3452843709961128, + "grad_norm": 0.1693570464849472, + "learning_rate": 4.654722867669946e-06, + "loss": 0.9591, + "step": 47700 + }, + { + "epoch": 0.34535675765669904, + "grad_norm": 0.16405141353607178, + "learning_rate": 4.6546504810093605e-06, + "loss": 0.9568, + "step": 47710 + }, + { + "epoch": 0.3454291443172852, + "grad_norm": 0.15746448934078217, + "learning_rate": 4.654578094348774e-06, + "loss": 0.9587, + "step": 47720 + }, + { + "epoch": 0.3455015309778714, + "grad_norm": 0.17681455612182617, + "learning_rate": 4.654505707688188e-06, + "loss": 0.9533, + "step": 47730 + }, + { + "epoch": 0.3455739176384576, + "grad_norm": 0.14864014089107513, + "learning_rate": 4.654433321027601e-06, + "loss": 0.9444, + "step": 47740 + }, + { + "epoch": 0.34564630429904375, + "grad_norm": 0.17312194406986237, + "learning_rate": 4.654360934367016e-06, + "loss": 0.9546, + "step": 47750 + }, + { + "epoch": 0.34571869095963, + "grad_norm": 0.17767411470413208, + "learning_rate": 4.654288547706429e-06, + "loss": 0.9547, + "step": 47760 + }, + { + "epoch": 0.34579107762021616, + "grad_norm": 0.16189977526664734, + "learning_rate": 4.654216161045843e-06, + "loss": 0.959, + "step": 47770 + }, + { + "epoch": 0.34586346428080234, + "grad_norm": 0.15381406247615814, + "learning_rate": 4.654143774385257e-06, + "loss": 0.9532, + "step": 47780 + }, + { + "epoch": 0.3459358509413885, + "grad_norm": 0.1887199878692627, + "learning_rate": 4.654071387724671e-06, + "loss": 0.9523, + "step": 47790 + }, + { + "epoch": 0.3460082376019747, + "grad_norm": 0.1715165674686432, + "learning_rate": 4.653999001064085e-06, + "loss": 0.9387, + "step": 47800 + }, + { + "epoch": 0.3460806242625609, + "grad_norm": 0.17425018548965454, + "learning_rate": 4.653926614403498e-06, + "loss": 0.9659, + "step": 47810 + }, + { + "epoch": 0.3461530109231471, + "grad_norm": 0.16004815697669983, + "learning_rate": 4.653854227742912e-06, + "loss": 0.9524, + "step": 47820 + }, + { + "epoch": 0.3462253975837333, + "grad_norm": 0.16618654131889343, + "learning_rate": 4.653781841082326e-06, + "loss": 0.9573, + "step": 47830 + }, + { + "epoch": 0.34629778424431945, + "grad_norm": 0.18237411975860596, + "learning_rate": 4.653709454421739e-06, + "loss": 0.9503, + "step": 47840 + }, + { + "epoch": 0.3463701709049056, + "grad_norm": 0.17747117578983307, + "learning_rate": 4.653637067761153e-06, + "loss": 0.951, + "step": 47850 + }, + { + "epoch": 0.3464425575654918, + "grad_norm": 0.1547970473766327, + "learning_rate": 4.653564681100567e-06, + "loss": 0.9493, + "step": 47860 + }, + { + "epoch": 0.34651494422607804, + "grad_norm": 0.20299063622951508, + "learning_rate": 4.653492294439981e-06, + "loss": 0.9549, + "step": 47870 + }, + { + "epoch": 0.3465873308866642, + "grad_norm": 0.16217590868473053, + "learning_rate": 4.6534199077793945e-06, + "loss": 0.951, + "step": 47880 + }, + { + "epoch": 0.3466597175472504, + "grad_norm": 0.21849872171878815, + "learning_rate": 4.653347521118808e-06, + "loss": 0.9457, + "step": 47890 + }, + { + "epoch": 0.34673210420783657, + "grad_norm": 0.22776605188846588, + "learning_rate": 4.653275134458223e-06, + "loss": 0.9705, + "step": 47900 + }, + { + "epoch": 0.34680449086842274, + "grad_norm": 0.1702580451965332, + "learning_rate": 4.653202747797636e-06, + "loss": 0.9504, + "step": 47910 + }, + { + "epoch": 0.346876877529009, + "grad_norm": 0.16190893948078156, + "learning_rate": 4.65313036113705e-06, + "loss": 0.9618, + "step": 47920 + }, + { + "epoch": 0.34694926418959515, + "grad_norm": 0.18085525929927826, + "learning_rate": 4.6530579744764634e-06, + "loss": 0.9564, + "step": 47930 + }, + { + "epoch": 0.34702165085018133, + "grad_norm": 0.16075456142425537, + "learning_rate": 4.652985587815878e-06, + "loss": 0.9576, + "step": 47940 + }, + { + "epoch": 0.3470940375107675, + "grad_norm": 0.1670777052640915, + "learning_rate": 4.6529132011552915e-06, + "loss": 0.9598, + "step": 47950 + }, + { + "epoch": 0.3471664241713537, + "grad_norm": 0.1586279422044754, + "learning_rate": 4.652840814494705e-06, + "loss": 0.9469, + "step": 47960 + }, + { + "epoch": 0.3472388108319399, + "grad_norm": 0.163706973195076, + "learning_rate": 4.652768427834119e-06, + "loss": 0.9456, + "step": 47970 + }, + { + "epoch": 0.3473111974925261, + "grad_norm": 0.16007377207279205, + "learning_rate": 4.652696041173533e-06, + "loss": 0.9445, + "step": 47980 + }, + { + "epoch": 0.34738358415311227, + "grad_norm": 0.18147924542427063, + "learning_rate": 4.652623654512947e-06, + "loss": 0.9527, + "step": 47990 + }, + { + "epoch": 0.34745597081369844, + "grad_norm": 0.1525728702545166, + "learning_rate": 4.6525512678523604e-06, + "loss": 0.9419, + "step": 48000 + }, + { + "epoch": 0.3475283574742846, + "grad_norm": 0.168410062789917, + "learning_rate": 4.652478881191774e-06, + "loss": 0.9464, + "step": 48010 + }, + { + "epoch": 0.3476007441348708, + "grad_norm": 0.16513432562351227, + "learning_rate": 4.652406494531188e-06, + "loss": 0.9467, + "step": 48020 + }, + { + "epoch": 0.34767313079545703, + "grad_norm": 0.14979685842990875, + "learning_rate": 4.652334107870602e-06, + "loss": 0.9547, + "step": 48030 + }, + { + "epoch": 0.3477455174560432, + "grad_norm": 0.17704786360263824, + "learning_rate": 4.652261721210016e-06, + "loss": 0.9491, + "step": 48040 + }, + { + "epoch": 0.3478179041166294, + "grad_norm": 0.1605810523033142, + "learning_rate": 4.652189334549429e-06, + "loss": 0.9566, + "step": 48050 + }, + { + "epoch": 0.34789029077721556, + "grad_norm": 0.1907358467578888, + "learning_rate": 4.652116947888843e-06, + "loss": 0.9568, + "step": 48060 + }, + { + "epoch": 0.34796267743780174, + "grad_norm": 0.16195763647556305, + "learning_rate": 4.6520445612282574e-06, + "loss": 0.9532, + "step": 48070 + }, + { + "epoch": 0.34803506409838797, + "grad_norm": 0.1610960215330124, + "learning_rate": 4.651972174567671e-06, + "loss": 0.9452, + "step": 48080 + }, + { + "epoch": 0.34810745075897415, + "grad_norm": 0.17993375658988953, + "learning_rate": 4.651899787907085e-06, + "loss": 0.9497, + "step": 48090 + }, + { + "epoch": 0.3481798374195603, + "grad_norm": 0.16861571371555328, + "learning_rate": 4.651827401246498e-06, + "loss": 0.9503, + "step": 48100 + }, + { + "epoch": 0.3482522240801465, + "grad_norm": 0.16078142821788788, + "learning_rate": 4.651755014585913e-06, + "loss": 0.9387, + "step": 48110 + }, + { + "epoch": 0.3483246107407327, + "grad_norm": 0.1687428504228592, + "learning_rate": 4.651682627925326e-06, + "loss": 0.9459, + "step": 48120 + }, + { + "epoch": 0.3483969974013189, + "grad_norm": 0.17936325073242188, + "learning_rate": 4.65161024126474e-06, + "loss": 0.944, + "step": 48130 + }, + { + "epoch": 0.3484693840619051, + "grad_norm": 0.16002655029296875, + "learning_rate": 4.651537854604154e-06, + "loss": 0.9528, + "step": 48140 + }, + { + "epoch": 0.34854177072249126, + "grad_norm": 0.16870175302028656, + "learning_rate": 4.651465467943568e-06, + "loss": 0.9461, + "step": 48150 + }, + { + "epoch": 0.34861415738307744, + "grad_norm": 0.37288644909858704, + "learning_rate": 4.651393081282982e-06, + "loss": 0.9438, + "step": 48160 + }, + { + "epoch": 0.3486865440436636, + "grad_norm": 0.1594185084104538, + "learning_rate": 4.651320694622395e-06, + "loss": 0.9631, + "step": 48170 + }, + { + "epoch": 0.34875893070424985, + "grad_norm": 0.16958339512348175, + "learning_rate": 4.651248307961809e-06, + "loss": 0.9672, + "step": 48180 + }, + { + "epoch": 0.348831317364836, + "grad_norm": 0.2117815464735031, + "learning_rate": 4.651175921301223e-06, + "loss": 0.9624, + "step": 48190 + }, + { + "epoch": 0.3489037040254222, + "grad_norm": 0.16270878911018372, + "learning_rate": 4.651103534640637e-06, + "loss": 0.9621, + "step": 48200 + }, + { + "epoch": 0.3489760906860084, + "grad_norm": 0.1788318157196045, + "learning_rate": 4.651031147980051e-06, + "loss": 0.9604, + "step": 48210 + }, + { + "epoch": 0.34904847734659455, + "grad_norm": 0.20249681174755096, + "learning_rate": 4.650958761319464e-06, + "loss": 0.9452, + "step": 48220 + }, + { + "epoch": 0.34912086400718073, + "grad_norm": 0.16005674004554749, + "learning_rate": 4.650886374658879e-06, + "loss": 0.9375, + "step": 48230 + }, + { + "epoch": 0.34919325066776696, + "grad_norm": 0.182306170463562, + "learning_rate": 4.650813987998292e-06, + "loss": 0.9549, + "step": 48240 + }, + { + "epoch": 0.34926563732835314, + "grad_norm": 0.1851535588502884, + "learning_rate": 4.650741601337706e-06, + "loss": 0.9459, + "step": 48250 + }, + { + "epoch": 0.3493380239889393, + "grad_norm": 0.1800885647535324, + "learning_rate": 4.6506692146771195e-06, + "loss": 0.9594, + "step": 48260 + }, + { + "epoch": 0.3494104106495255, + "grad_norm": 0.1562681943178177, + "learning_rate": 4.650596828016534e-06, + "loss": 0.9548, + "step": 48270 + }, + { + "epoch": 0.34948279731011167, + "grad_norm": 0.164944589138031, + "learning_rate": 4.650524441355948e-06, + "loss": 0.9595, + "step": 48280 + }, + { + "epoch": 0.3495551839706979, + "grad_norm": 0.15770290791988373, + "learning_rate": 4.650452054695361e-06, + "loss": 0.9573, + "step": 48290 + }, + { + "epoch": 0.3496275706312841, + "grad_norm": 0.19235900044441223, + "learning_rate": 4.650379668034775e-06, + "loss": 0.9381, + "step": 48300 + }, + { + "epoch": 0.34969995729187026, + "grad_norm": 0.17154589295387268, + "learning_rate": 4.650307281374189e-06, + "loss": 0.9434, + "step": 48310 + }, + { + "epoch": 0.34977234395245643, + "grad_norm": 0.19060273468494415, + "learning_rate": 4.650234894713603e-06, + "loss": 0.9666, + "step": 48320 + }, + { + "epoch": 0.3498447306130426, + "grad_norm": 0.16510871052742004, + "learning_rate": 4.6501625080530165e-06, + "loss": 0.9498, + "step": 48330 + }, + { + "epoch": 0.34991711727362884, + "grad_norm": 0.1761731505393982, + "learning_rate": 4.65009012139243e-06, + "loss": 0.9522, + "step": 48340 + }, + { + "epoch": 0.349989503934215, + "grad_norm": 0.17731162905693054, + "learning_rate": 4.650017734731845e-06, + "loss": 0.9507, + "step": 48350 + }, + { + "epoch": 0.3500618905948012, + "grad_norm": 0.17625099420547485, + "learning_rate": 4.649945348071258e-06, + "loss": 0.9528, + "step": 48360 + }, + { + "epoch": 0.35013427725538737, + "grad_norm": 0.16574423015117645, + "learning_rate": 4.649872961410671e-06, + "loss": 0.969, + "step": 48370 + }, + { + "epoch": 0.35020666391597355, + "grad_norm": 0.16655471920967102, + "learning_rate": 4.6498005747500855e-06, + "loss": 0.9462, + "step": 48380 + }, + { + "epoch": 0.3502790505765597, + "grad_norm": 0.17919887602329254, + "learning_rate": 4.649728188089499e-06, + "loss": 0.9669, + "step": 48390 + }, + { + "epoch": 0.35035143723714596, + "grad_norm": 0.1737910956144333, + "learning_rate": 4.649655801428913e-06, + "loss": 0.9544, + "step": 48400 + }, + { + "epoch": 0.35042382389773213, + "grad_norm": 0.1664748638868332, + "learning_rate": 4.649583414768326e-06, + "loss": 0.9394, + "step": 48410 + }, + { + "epoch": 0.3504962105583183, + "grad_norm": 0.1567506492137909, + "learning_rate": 4.649511028107741e-06, + "loss": 0.9453, + "step": 48420 + }, + { + "epoch": 0.3505685972189045, + "grad_norm": 0.17070920765399933, + "learning_rate": 4.649438641447154e-06, + "loss": 0.9549, + "step": 48430 + }, + { + "epoch": 0.35064098387949066, + "grad_norm": 0.16167403757572174, + "learning_rate": 4.649366254786568e-06, + "loss": 0.9453, + "step": 48440 + }, + { + "epoch": 0.3507133705400769, + "grad_norm": 0.15973427891731262, + "learning_rate": 4.649293868125982e-06, + "loss": 0.9561, + "step": 48450 + }, + { + "epoch": 0.3507857572006631, + "grad_norm": 0.16272811591625214, + "learning_rate": 4.649221481465396e-06, + "loss": 0.9554, + "step": 48460 + }, + { + "epoch": 0.35085814386124925, + "grad_norm": 0.1659621149301529, + "learning_rate": 4.64914909480481e-06, + "loss": 0.9474, + "step": 48470 + }, + { + "epoch": 0.3509305305218354, + "grad_norm": 0.16306394338607788, + "learning_rate": 4.649076708144223e-06, + "loss": 0.9511, + "step": 48480 + }, + { + "epoch": 0.3510029171824216, + "grad_norm": 0.1689397245645523, + "learning_rate": 4.649004321483637e-06, + "loss": 0.9528, + "step": 48490 + }, + { + "epoch": 0.35107530384300784, + "grad_norm": 0.16257858276367188, + "learning_rate": 4.648931934823051e-06, + "loss": 0.9544, + "step": 48500 + }, + { + "epoch": 0.351147690503594, + "grad_norm": 0.1643674671649933, + "learning_rate": 4.648859548162465e-06, + "loss": 0.9441, + "step": 48510 + }, + { + "epoch": 0.3512200771641802, + "grad_norm": 0.18621480464935303, + "learning_rate": 4.648787161501879e-06, + "loss": 0.9677, + "step": 48520 + }, + { + "epoch": 0.35129246382476637, + "grad_norm": 0.15909579396247864, + "learning_rate": 4.648714774841292e-06, + "loss": 0.9404, + "step": 48530 + }, + { + "epoch": 0.35136485048535254, + "grad_norm": 0.16073808073997498, + "learning_rate": 4.648642388180707e-06, + "loss": 0.9649, + "step": 48540 + }, + { + "epoch": 0.3514372371459387, + "grad_norm": 0.15749648213386536, + "learning_rate": 4.64857000152012e-06, + "loss": 0.9665, + "step": 48550 + }, + { + "epoch": 0.35150962380652495, + "grad_norm": 0.18623562157154083, + "learning_rate": 4.648497614859534e-06, + "loss": 0.9529, + "step": 48560 + }, + { + "epoch": 0.35158201046711113, + "grad_norm": 0.16134946048259735, + "learning_rate": 4.6484252281989476e-06, + "loss": 0.9499, + "step": 48570 + }, + { + "epoch": 0.3516543971276973, + "grad_norm": 0.15732122957706451, + "learning_rate": 4.648352841538362e-06, + "loss": 0.9572, + "step": 48580 + }, + { + "epoch": 0.3517267837882835, + "grad_norm": 0.1690632700920105, + "learning_rate": 4.648280454877776e-06, + "loss": 0.9506, + "step": 48590 + }, + { + "epoch": 0.35179917044886966, + "grad_norm": 0.16241760551929474, + "learning_rate": 4.648208068217189e-06, + "loss": 0.9518, + "step": 48600 + }, + { + "epoch": 0.3518715571094559, + "grad_norm": 0.1884649395942688, + "learning_rate": 4.648135681556603e-06, + "loss": 0.9577, + "step": 48610 + }, + { + "epoch": 0.35194394377004207, + "grad_norm": 0.16294220089912415, + "learning_rate": 4.648063294896017e-06, + "loss": 0.9432, + "step": 48620 + }, + { + "epoch": 0.35201633043062824, + "grad_norm": 0.18577656149864197, + "learning_rate": 4.647990908235431e-06, + "loss": 0.9513, + "step": 48630 + }, + { + "epoch": 0.3520887170912144, + "grad_norm": 0.16119709610939026, + "learning_rate": 4.647918521574845e-06, + "loss": 0.9478, + "step": 48640 + }, + { + "epoch": 0.3521611037518006, + "grad_norm": 0.1651376187801361, + "learning_rate": 4.647846134914258e-06, + "loss": 0.9487, + "step": 48650 + }, + { + "epoch": 0.35223349041238683, + "grad_norm": 0.18918465077877045, + "learning_rate": 4.647773748253672e-06, + "loss": 0.9562, + "step": 48660 + }, + { + "epoch": 0.352305877072973, + "grad_norm": 0.1643255203962326, + "learning_rate": 4.647701361593086e-06, + "loss": 0.9502, + "step": 48670 + }, + { + "epoch": 0.3523782637335592, + "grad_norm": 0.16544142365455627, + "learning_rate": 4.6476289749325e-06, + "loss": 0.9399, + "step": 48680 + }, + { + "epoch": 0.35245065039414536, + "grad_norm": 0.1629703789949417, + "learning_rate": 4.6475565882719135e-06, + "loss": 0.9514, + "step": 48690 + }, + { + "epoch": 0.35252303705473154, + "grad_norm": 0.15862901508808136, + "learning_rate": 4.647484201611327e-06, + "loss": 0.949, + "step": 48700 + }, + { + "epoch": 0.35259542371531777, + "grad_norm": 0.1527785062789917, + "learning_rate": 4.647411814950742e-06, + "loss": 0.9548, + "step": 48710 + }, + { + "epoch": 0.35266781037590395, + "grad_norm": 0.1874808520078659, + "learning_rate": 4.647339428290155e-06, + "loss": 0.9653, + "step": 48720 + }, + { + "epoch": 0.3527401970364901, + "grad_norm": 0.18184229731559753, + "learning_rate": 4.647267041629569e-06, + "loss": 0.9433, + "step": 48730 + }, + { + "epoch": 0.3528125836970763, + "grad_norm": 0.16013579070568085, + "learning_rate": 4.6471946549689824e-06, + "loss": 0.9545, + "step": 48740 + }, + { + "epoch": 0.3528849703576625, + "grad_norm": 0.1711476594209671, + "learning_rate": 4.647122268308397e-06, + "loss": 0.9541, + "step": 48750 + }, + { + "epoch": 0.35295735701824865, + "grad_norm": 0.16671577095985413, + "learning_rate": 4.6470498816478105e-06, + "loss": 0.9568, + "step": 48760 + }, + { + "epoch": 0.3530297436788349, + "grad_norm": 0.2846975326538086, + "learning_rate": 4.646977494987224e-06, + "loss": 0.9496, + "step": 48770 + }, + { + "epoch": 0.35310213033942106, + "grad_norm": 0.15886595845222473, + "learning_rate": 4.646905108326638e-06, + "loss": 0.9667, + "step": 48780 + }, + { + "epoch": 0.35317451700000724, + "grad_norm": 0.1654849648475647, + "learning_rate": 4.646832721666052e-06, + "loss": 0.9483, + "step": 48790 + }, + { + "epoch": 0.3532469036605934, + "grad_norm": 0.1595975011587143, + "learning_rate": 4.646760335005466e-06, + "loss": 0.952, + "step": 48800 + }, + { + "epoch": 0.3533192903211796, + "grad_norm": 0.19034674763679504, + "learning_rate": 4.6466879483448794e-06, + "loss": 0.9593, + "step": 48810 + }, + { + "epoch": 0.3533916769817658, + "grad_norm": 0.16194772720336914, + "learning_rate": 4.646615561684293e-06, + "loss": 0.9536, + "step": 48820 + }, + { + "epoch": 0.353464063642352, + "grad_norm": 0.19409111142158508, + "learning_rate": 4.6465431750237075e-06, + "loss": 0.9578, + "step": 48830 + }, + { + "epoch": 0.3535364503029382, + "grad_norm": 0.15368357300758362, + "learning_rate": 4.646470788363121e-06, + "loss": 0.9635, + "step": 48840 + }, + { + "epoch": 0.35360883696352435, + "grad_norm": 0.1651766002178192, + "learning_rate": 4.646398401702535e-06, + "loss": 0.9542, + "step": 48850 + }, + { + "epoch": 0.35368122362411053, + "grad_norm": 0.16302619874477386, + "learning_rate": 4.646326015041948e-06, + "loss": 0.9451, + "step": 48860 + }, + { + "epoch": 0.35375361028469676, + "grad_norm": 0.16752517223358154, + "learning_rate": 4.646253628381363e-06, + "loss": 0.956, + "step": 48870 + }, + { + "epoch": 0.35382599694528294, + "grad_norm": 0.16661721467971802, + "learning_rate": 4.6461812417207765e-06, + "loss": 0.9501, + "step": 48880 + }, + { + "epoch": 0.3538983836058691, + "grad_norm": 0.16403451561927795, + "learning_rate": 4.64610885506019e-06, + "loss": 0.945, + "step": 48890 + }, + { + "epoch": 0.3539707702664553, + "grad_norm": 0.16170132160186768, + "learning_rate": 4.646036468399604e-06, + "loss": 0.9528, + "step": 48900 + }, + { + "epoch": 0.35404315692704147, + "grad_norm": 0.15352827310562134, + "learning_rate": 4.645964081739017e-06, + "loss": 0.964, + "step": 48910 + }, + { + "epoch": 0.35411554358762765, + "grad_norm": 0.2216578722000122, + "learning_rate": 4.645891695078431e-06, + "loss": 0.9571, + "step": 48920 + }, + { + "epoch": 0.3541879302482139, + "grad_norm": 0.1692575216293335, + "learning_rate": 4.6458193084178445e-06, + "loss": 0.9552, + "step": 48930 + }, + { + "epoch": 0.35426031690880005, + "grad_norm": 0.16259534657001495, + "learning_rate": 4.645746921757259e-06, + "loss": 0.9497, + "step": 48940 + }, + { + "epoch": 0.35433270356938623, + "grad_norm": 0.2892276644706726, + "learning_rate": 4.645674535096673e-06, + "loss": 0.9429, + "step": 48950 + }, + { + "epoch": 0.3544050902299724, + "grad_norm": 0.1655515879392624, + "learning_rate": 4.645602148436086e-06, + "loss": 0.9362, + "step": 48960 + }, + { + "epoch": 0.3544774768905586, + "grad_norm": 0.1588042974472046, + "learning_rate": 4.6455297617755e-06, + "loss": 0.9581, + "step": 48970 + }, + { + "epoch": 0.3545498635511448, + "grad_norm": 0.16203071177005768, + "learning_rate": 4.645457375114914e-06, + "loss": 0.9525, + "step": 48980 + }, + { + "epoch": 0.354622250211731, + "grad_norm": 0.1807693988084793, + "learning_rate": 4.645384988454328e-06, + "loss": 0.9412, + "step": 48990 + }, + { + "epoch": 0.35469463687231717, + "grad_norm": 0.14859171211719513, + "learning_rate": 4.6453126017937415e-06, + "loss": 0.955, + "step": 49000 + }, + { + "epoch": 0.35476702353290335, + "grad_norm": 0.17705701291561127, + "learning_rate": 4.645240215133155e-06, + "loss": 0.9459, + "step": 49010 + }, + { + "epoch": 0.3548394101934895, + "grad_norm": 0.1593550145626068, + "learning_rate": 4.64516782847257e-06, + "loss": 0.9333, + "step": 49020 + }, + { + "epoch": 0.35491179685407576, + "grad_norm": 0.16969063878059387, + "learning_rate": 4.645095441811983e-06, + "loss": 0.9572, + "step": 49030 + }, + { + "epoch": 0.35498418351466193, + "grad_norm": 0.16061443090438843, + "learning_rate": 4.645023055151397e-06, + "loss": 0.9582, + "step": 49040 + }, + { + "epoch": 0.3550565701752481, + "grad_norm": 0.17726626992225647, + "learning_rate": 4.6449506684908105e-06, + "loss": 0.9508, + "step": 49050 + }, + { + "epoch": 0.3551289568358343, + "grad_norm": 0.1747400313615799, + "learning_rate": 4.644878281830225e-06, + "loss": 0.9436, + "step": 49060 + }, + { + "epoch": 0.35520134349642046, + "grad_norm": 0.1950230598449707, + "learning_rate": 4.6448058951696385e-06, + "loss": 0.9571, + "step": 49070 + }, + { + "epoch": 0.35527373015700664, + "grad_norm": 0.1573755443096161, + "learning_rate": 4.644733508509052e-06, + "loss": 0.9574, + "step": 49080 + }, + { + "epoch": 0.35534611681759287, + "grad_norm": 0.17132724821567535, + "learning_rate": 4.644661121848466e-06, + "loss": 0.9586, + "step": 49090 + }, + { + "epoch": 0.35541850347817905, + "grad_norm": 0.18015651404857635, + "learning_rate": 4.64458873518788e-06, + "loss": 0.9312, + "step": 49100 + }, + { + "epoch": 0.3554908901387652, + "grad_norm": 0.17248773574829102, + "learning_rate": 4.644516348527294e-06, + "loss": 0.9444, + "step": 49110 + }, + { + "epoch": 0.3555632767993514, + "grad_norm": 0.16664332151412964, + "learning_rate": 4.6444439618667075e-06, + "loss": 0.9524, + "step": 49120 + }, + { + "epoch": 0.3556356634599376, + "grad_norm": 0.16630926728248596, + "learning_rate": 4.644371575206121e-06, + "loss": 0.9397, + "step": 49130 + }, + { + "epoch": 0.3557080501205238, + "grad_norm": 0.17303983867168427, + "learning_rate": 4.6442991885455356e-06, + "loss": 0.9622, + "step": 49140 + }, + { + "epoch": 0.35578043678111, + "grad_norm": 0.15973907709121704, + "learning_rate": 4.644226801884949e-06, + "loss": 0.9364, + "step": 49150 + }, + { + "epoch": 0.35585282344169616, + "grad_norm": 0.1687871366739273, + "learning_rate": 4.644154415224363e-06, + "loss": 0.9577, + "step": 49160 + }, + { + "epoch": 0.35592521010228234, + "grad_norm": 0.16706955432891846, + "learning_rate": 4.644082028563776e-06, + "loss": 0.9619, + "step": 49170 + }, + { + "epoch": 0.3559975967628685, + "grad_norm": 0.19494742155075073, + "learning_rate": 4.644009641903191e-06, + "loss": 0.947, + "step": 49180 + }, + { + "epoch": 0.35606998342345475, + "grad_norm": 0.1793723702430725, + "learning_rate": 4.6439372552426045e-06, + "loss": 0.9618, + "step": 49190 + }, + { + "epoch": 0.3561423700840409, + "grad_norm": 0.16906292736530304, + "learning_rate": 4.643864868582018e-06, + "loss": 0.9433, + "step": 49200 + }, + { + "epoch": 0.3562147567446271, + "grad_norm": 0.1776910424232483, + "learning_rate": 4.643792481921432e-06, + "loss": 0.9434, + "step": 49210 + }, + { + "epoch": 0.3562871434052133, + "grad_norm": 0.17651747167110443, + "learning_rate": 4.643720095260846e-06, + "loss": 0.9524, + "step": 49220 + }, + { + "epoch": 0.35635953006579946, + "grad_norm": 0.1679781824350357, + "learning_rate": 4.64364770860026e-06, + "loss": 0.9429, + "step": 49230 + }, + { + "epoch": 0.3564319167263857, + "grad_norm": 0.17428244650363922, + "learning_rate": 4.643575321939673e-06, + "loss": 0.9497, + "step": 49240 + }, + { + "epoch": 0.35650430338697187, + "grad_norm": 0.16317783296108246, + "learning_rate": 4.643502935279087e-06, + "loss": 0.9579, + "step": 49250 + }, + { + "epoch": 0.35657669004755804, + "grad_norm": 0.17340287566184998, + "learning_rate": 4.643430548618501e-06, + "loss": 0.9584, + "step": 49260 + }, + { + "epoch": 0.3566490767081442, + "grad_norm": 0.17054349184036255, + "learning_rate": 4.643358161957915e-06, + "loss": 0.9482, + "step": 49270 + }, + { + "epoch": 0.3567214633687304, + "grad_norm": 0.15321429073810577, + "learning_rate": 4.643285775297329e-06, + "loss": 0.9532, + "step": 49280 + }, + { + "epoch": 0.3567938500293166, + "grad_norm": 0.16249078512191772, + "learning_rate": 4.643213388636742e-06, + "loss": 0.9366, + "step": 49290 + }, + { + "epoch": 0.3568662366899028, + "grad_norm": 0.23813359439373016, + "learning_rate": 4.643141001976156e-06, + "loss": 0.9628, + "step": 49300 + }, + { + "epoch": 0.356938623350489, + "grad_norm": 0.18279701471328735, + "learning_rate": 4.64306861531557e-06, + "loss": 0.962, + "step": 49310 + }, + { + "epoch": 0.35701101001107516, + "grad_norm": 0.1736493855714798, + "learning_rate": 4.642996228654984e-06, + "loss": 0.9578, + "step": 49320 + }, + { + "epoch": 0.35708339667166134, + "grad_norm": 0.16122330725193024, + "learning_rate": 4.642923841994398e-06, + "loss": 0.9503, + "step": 49330 + }, + { + "epoch": 0.3571557833322475, + "grad_norm": 0.15669801831245422, + "learning_rate": 4.642851455333811e-06, + "loss": 0.9562, + "step": 49340 + }, + { + "epoch": 0.35722816999283374, + "grad_norm": 0.16338752210140228, + "learning_rate": 4.642779068673226e-06, + "loss": 0.9554, + "step": 49350 + }, + { + "epoch": 0.3573005566534199, + "grad_norm": 0.16918496787548065, + "learning_rate": 4.642706682012639e-06, + "loss": 0.946, + "step": 49360 + }, + { + "epoch": 0.3573729433140061, + "grad_norm": 0.17653869092464447, + "learning_rate": 4.642634295352053e-06, + "loss": 0.9528, + "step": 49370 + }, + { + "epoch": 0.3574453299745923, + "grad_norm": 0.1767107993364334, + "learning_rate": 4.642561908691467e-06, + "loss": 0.9456, + "step": 49380 + }, + { + "epoch": 0.35751771663517845, + "grad_norm": 0.16276022791862488, + "learning_rate": 4.642489522030881e-06, + "loss": 0.9418, + "step": 49390 + }, + { + "epoch": 0.3575901032957647, + "grad_norm": 0.15793178975582123, + "learning_rate": 4.642417135370295e-06, + "loss": 0.9514, + "step": 49400 + }, + { + "epoch": 0.35766248995635086, + "grad_norm": 0.16991020739078522, + "learning_rate": 4.642344748709708e-06, + "loss": 0.939, + "step": 49410 + }, + { + "epoch": 0.35773487661693704, + "grad_norm": 0.1681062877178192, + "learning_rate": 4.642272362049122e-06, + "loss": 0.9462, + "step": 49420 + }, + { + "epoch": 0.3578072632775232, + "grad_norm": 0.18085841834545135, + "learning_rate": 4.6421999753885355e-06, + "loss": 0.945, + "step": 49430 + }, + { + "epoch": 0.3578796499381094, + "grad_norm": 0.1598934829235077, + "learning_rate": 4.642127588727949e-06, + "loss": 0.9579, + "step": 49440 + }, + { + "epoch": 0.35795203659869557, + "grad_norm": 0.19113032519817352, + "learning_rate": 4.642055202067363e-06, + "loss": 0.9484, + "step": 49450 + }, + { + "epoch": 0.3580244232592818, + "grad_norm": 0.16052620112895966, + "learning_rate": 4.641982815406777e-06, + "loss": 0.9557, + "step": 49460 + }, + { + "epoch": 0.358096809919868, + "grad_norm": 0.16908268630504608, + "learning_rate": 4.641910428746191e-06, + "loss": 0.9453, + "step": 49470 + }, + { + "epoch": 0.35816919658045415, + "grad_norm": 0.19930988550186157, + "learning_rate": 4.6418380420856044e-06, + "loss": 0.9453, + "step": 49480 + }, + { + "epoch": 0.35824158324104033, + "grad_norm": 0.16531233489513397, + "learning_rate": 4.641765655425018e-06, + "loss": 0.9414, + "step": 49490 + }, + { + "epoch": 0.3583139699016265, + "grad_norm": 0.16358982026576996, + "learning_rate": 4.6416932687644325e-06, + "loss": 0.9555, + "step": 49500 + }, + { + "epoch": 0.35838635656221274, + "grad_norm": 0.16903631389141083, + "learning_rate": 4.641620882103846e-06, + "loss": 0.9489, + "step": 49510 + }, + { + "epoch": 0.3584587432227989, + "grad_norm": 0.1676577776670456, + "learning_rate": 4.64154849544326e-06, + "loss": 0.956, + "step": 49520 + }, + { + "epoch": 0.3585311298833851, + "grad_norm": 0.1515856236219406, + "learning_rate": 4.641476108782673e-06, + "loss": 0.9528, + "step": 49530 + }, + { + "epoch": 0.35860351654397127, + "grad_norm": 0.16165132820606232, + "learning_rate": 4.641403722122088e-06, + "loss": 0.9508, + "step": 49540 + }, + { + "epoch": 0.35867590320455744, + "grad_norm": 0.1639813631772995, + "learning_rate": 4.6413313354615014e-06, + "loss": 0.9484, + "step": 49550 + }, + { + "epoch": 0.3587482898651437, + "grad_norm": 0.1556709110736847, + "learning_rate": 4.641258948800915e-06, + "loss": 0.9368, + "step": 49560 + }, + { + "epoch": 0.35882067652572985, + "grad_norm": 0.16210544109344482, + "learning_rate": 4.641186562140329e-06, + "loss": 0.9479, + "step": 49570 + }, + { + "epoch": 0.35889306318631603, + "grad_norm": 0.16633464395999908, + "learning_rate": 4.641114175479743e-06, + "loss": 0.9517, + "step": 49580 + }, + { + "epoch": 0.3589654498469022, + "grad_norm": 0.16160385310649872, + "learning_rate": 4.641041788819157e-06, + "loss": 0.9523, + "step": 49590 + }, + { + "epoch": 0.3590378365074884, + "grad_norm": 0.16910237073898315, + "learning_rate": 4.64096940215857e-06, + "loss": 0.9453, + "step": 49600 + }, + { + "epoch": 0.35911022316807456, + "grad_norm": 0.20249851047992706, + "learning_rate": 4.640897015497984e-06, + "loss": 0.9446, + "step": 49610 + }, + { + "epoch": 0.3591826098286608, + "grad_norm": 0.16274423897266388, + "learning_rate": 4.6408246288373985e-06, + "loss": 0.951, + "step": 49620 + }, + { + "epoch": 0.35925499648924697, + "grad_norm": 0.16763456165790558, + "learning_rate": 4.640752242176812e-06, + "loss": 0.9379, + "step": 49630 + }, + { + "epoch": 0.35932738314983315, + "grad_norm": 0.15942569077014923, + "learning_rate": 4.640679855516226e-06, + "loss": 0.9472, + "step": 49640 + }, + { + "epoch": 0.3593997698104193, + "grad_norm": 0.17928923666477203, + "learning_rate": 4.640607468855639e-06, + "loss": 0.9577, + "step": 49650 + }, + { + "epoch": 0.3594721564710055, + "grad_norm": 0.16758985817432404, + "learning_rate": 4.640535082195054e-06, + "loss": 0.9494, + "step": 49660 + }, + { + "epoch": 0.35954454313159173, + "grad_norm": 0.18791483342647552, + "learning_rate": 4.640462695534467e-06, + "loss": 0.9423, + "step": 49670 + }, + { + "epoch": 0.3596169297921779, + "grad_norm": 0.19120506942272186, + "learning_rate": 4.640390308873881e-06, + "loss": 0.9602, + "step": 49680 + }, + { + "epoch": 0.3596893164527641, + "grad_norm": 0.17304813861846924, + "learning_rate": 4.640317922213295e-06, + "loss": 0.9485, + "step": 49690 + }, + { + "epoch": 0.35976170311335026, + "grad_norm": 0.16456131637096405, + "learning_rate": 4.640245535552709e-06, + "loss": 0.9379, + "step": 49700 + }, + { + "epoch": 0.35983408977393644, + "grad_norm": 0.1641353815793991, + "learning_rate": 4.640173148892123e-06, + "loss": 0.9485, + "step": 49710 + }, + { + "epoch": 0.35990647643452267, + "grad_norm": 0.17807172238826752, + "learning_rate": 4.640100762231536e-06, + "loss": 0.939, + "step": 49720 + }, + { + "epoch": 0.35997886309510885, + "grad_norm": 0.17257729172706604, + "learning_rate": 4.64002837557095e-06, + "loss": 0.9506, + "step": 49730 + }, + { + "epoch": 0.360051249755695, + "grad_norm": 0.16776679456233978, + "learning_rate": 4.639955988910364e-06, + "loss": 0.9543, + "step": 49740 + }, + { + "epoch": 0.3601236364162812, + "grad_norm": 0.16268086433410645, + "learning_rate": 4.639883602249778e-06, + "loss": 0.9506, + "step": 49750 + }, + { + "epoch": 0.3601960230768674, + "grad_norm": 0.18560020625591278, + "learning_rate": 4.639811215589192e-06, + "loss": 0.952, + "step": 49760 + }, + { + "epoch": 0.36026840973745355, + "grad_norm": 0.1764240264892578, + "learning_rate": 4.639738828928605e-06, + "loss": 0.9481, + "step": 49770 + }, + { + "epoch": 0.3603407963980398, + "grad_norm": 0.18193010985851288, + "learning_rate": 4.63966644226802e-06, + "loss": 0.9451, + "step": 49780 + }, + { + "epoch": 0.36041318305862596, + "grad_norm": 0.15828032791614532, + "learning_rate": 4.639594055607433e-06, + "loss": 0.9549, + "step": 49790 + }, + { + "epoch": 0.36048556971921214, + "grad_norm": 0.20637375116348267, + "learning_rate": 4.639521668946847e-06, + "loss": 0.9618, + "step": 49800 + }, + { + "epoch": 0.3605579563797983, + "grad_norm": 0.15375225245952606, + "learning_rate": 4.6394492822862605e-06, + "loss": 0.9475, + "step": 49810 + }, + { + "epoch": 0.3606303430403845, + "grad_norm": 0.16718937456607819, + "learning_rate": 4.639376895625675e-06, + "loss": 0.9517, + "step": 49820 + }, + { + "epoch": 0.3607027297009707, + "grad_norm": 0.1495947241783142, + "learning_rate": 4.639304508965089e-06, + "loss": 0.9442, + "step": 49830 + }, + { + "epoch": 0.3607751163615569, + "grad_norm": 0.16354574263095856, + "learning_rate": 4.639232122304502e-06, + "loss": 0.9516, + "step": 49840 + }, + { + "epoch": 0.3608475030221431, + "grad_norm": 0.16006174683570862, + "learning_rate": 4.639159735643916e-06, + "loss": 0.9571, + "step": 49850 + }, + { + "epoch": 0.36091988968272926, + "grad_norm": 0.1511203497648239, + "learning_rate": 4.63908734898333e-06, + "loss": 0.9498, + "step": 49860 + }, + { + "epoch": 0.36099227634331543, + "grad_norm": 0.16783535480499268, + "learning_rate": 4.639014962322744e-06, + "loss": 0.9507, + "step": 49870 + }, + { + "epoch": 0.36106466300390166, + "grad_norm": 0.1579425036907196, + "learning_rate": 4.6389425756621576e-06, + "loss": 0.945, + "step": 49880 + }, + { + "epoch": 0.36113704966448784, + "grad_norm": 0.16360943019390106, + "learning_rate": 4.638870189001571e-06, + "loss": 0.9373, + "step": 49890 + }, + { + "epoch": 0.361209436325074, + "grad_norm": 0.15648721158504486, + "learning_rate": 4.638797802340985e-06, + "loss": 0.9672, + "step": 49900 + }, + { + "epoch": 0.3612818229856602, + "grad_norm": 0.16514001786708832, + "learning_rate": 4.638725415680399e-06, + "loss": 0.9446, + "step": 49910 + }, + { + "epoch": 0.36135420964624637, + "grad_norm": 0.20961642265319824, + "learning_rate": 4.638653029019813e-06, + "loss": 0.9411, + "step": 49920 + }, + { + "epoch": 0.3614265963068326, + "grad_norm": 0.1592184156179428, + "learning_rate": 4.6385806423592265e-06, + "loss": 0.9524, + "step": 49930 + }, + { + "epoch": 0.3614989829674188, + "grad_norm": 0.16908025741577148, + "learning_rate": 4.63850825569864e-06, + "loss": 0.9332, + "step": 49940 + }, + { + "epoch": 0.36157136962800496, + "grad_norm": 0.15697824954986572, + "learning_rate": 4.6384358690380546e-06, + "loss": 0.9351, + "step": 49950 + }, + { + "epoch": 0.36164375628859113, + "grad_norm": 0.16380582749843597, + "learning_rate": 4.638363482377467e-06, + "loss": 0.9462, + "step": 49960 + }, + { + "epoch": 0.3617161429491773, + "grad_norm": 0.15993693470954895, + "learning_rate": 4.638291095716882e-06, + "loss": 0.9584, + "step": 49970 + }, + { + "epoch": 0.3617885296097635, + "grad_norm": 0.17109087109565735, + "learning_rate": 4.638218709056295e-06, + "loss": 0.9411, + "step": 49980 + }, + { + "epoch": 0.3618609162703497, + "grad_norm": 0.16840317845344543, + "learning_rate": 4.638146322395709e-06, + "loss": 0.9362, + "step": 49990 + }, + { + "epoch": 0.3619333029309359, + "grad_norm": 0.16615648567676544, + "learning_rate": 4.638073935735123e-06, + "loss": 0.9501, + "step": 50000 + }, + { + "epoch": 0.3620056895915221, + "grad_norm": 0.16905289888381958, + "learning_rate": 4.638001549074537e-06, + "loss": 0.9524, + "step": 50010 + }, + { + "epoch": 0.36207807625210825, + "grad_norm": 0.17198826372623444, + "learning_rate": 4.637929162413951e-06, + "loss": 0.9435, + "step": 50020 + }, + { + "epoch": 0.3621504629126944, + "grad_norm": 0.17047207057476044, + "learning_rate": 4.637856775753364e-06, + "loss": 0.9492, + "step": 50030 + }, + { + "epoch": 0.36222284957328066, + "grad_norm": 0.18584772944450378, + "learning_rate": 4.637784389092778e-06, + "loss": 0.9482, + "step": 50040 + }, + { + "epoch": 0.36229523623386684, + "grad_norm": 0.16773472726345062, + "learning_rate": 4.637712002432192e-06, + "loss": 0.9572, + "step": 50050 + }, + { + "epoch": 0.362367622894453, + "grad_norm": 0.16679717600345612, + "learning_rate": 4.637639615771606e-06, + "loss": 0.9529, + "step": 50060 + }, + { + "epoch": 0.3624400095550392, + "grad_norm": 0.15634585916996002, + "learning_rate": 4.63756722911102e-06, + "loss": 0.9401, + "step": 50070 + }, + { + "epoch": 0.36251239621562537, + "grad_norm": 0.15569652616977692, + "learning_rate": 4.637494842450433e-06, + "loss": 0.9416, + "step": 50080 + }, + { + "epoch": 0.3625847828762116, + "grad_norm": 0.16276825964450836, + "learning_rate": 4.637422455789847e-06, + "loss": 0.9422, + "step": 50090 + }, + { + "epoch": 0.3626571695367978, + "grad_norm": 0.16607090830802917, + "learning_rate": 4.637350069129261e-06, + "loss": 0.9587, + "step": 50100 + }, + { + "epoch": 0.36272955619738395, + "grad_norm": 0.17144577205181122, + "learning_rate": 4.637277682468675e-06, + "loss": 0.9356, + "step": 50110 + }, + { + "epoch": 0.36280194285797013, + "grad_norm": 0.16335241496562958, + "learning_rate": 4.637205295808089e-06, + "loss": 0.9581, + "step": 50120 + }, + { + "epoch": 0.3628743295185563, + "grad_norm": 0.17196884751319885, + "learning_rate": 4.637132909147502e-06, + "loss": 0.9345, + "step": 50130 + }, + { + "epoch": 0.3629467161791425, + "grad_norm": 0.1646929383277893, + "learning_rate": 4.637060522486917e-06, + "loss": 0.9388, + "step": 50140 + }, + { + "epoch": 0.3630191028397287, + "grad_norm": 0.16134804487228394, + "learning_rate": 4.63698813582633e-06, + "loss": 0.9546, + "step": 50150 + }, + { + "epoch": 0.3630914895003149, + "grad_norm": 0.16641758382320404, + "learning_rate": 4.636915749165744e-06, + "loss": 0.9681, + "step": 50160 + }, + { + "epoch": 0.36316387616090107, + "grad_norm": 0.1564699113368988, + "learning_rate": 4.6368433625051575e-06, + "loss": 0.9527, + "step": 50170 + }, + { + "epoch": 0.36323626282148724, + "grad_norm": 0.16455498337745667, + "learning_rate": 4.636770975844572e-06, + "loss": 0.9407, + "step": 50180 + }, + { + "epoch": 0.3633086494820734, + "grad_norm": 0.16031110286712646, + "learning_rate": 4.636698589183986e-06, + "loss": 0.9418, + "step": 50190 + }, + { + "epoch": 0.36338103614265965, + "grad_norm": 0.1539280265569687, + "learning_rate": 4.636626202523399e-06, + "loss": 0.9496, + "step": 50200 + }, + { + "epoch": 0.36345342280324583, + "grad_norm": 0.20575940608978271, + "learning_rate": 4.636553815862813e-06, + "loss": 0.9332, + "step": 50210 + }, + { + "epoch": 0.363525809463832, + "grad_norm": 0.17900900542736053, + "learning_rate": 4.636481429202227e-06, + "loss": 0.9533, + "step": 50220 + }, + { + "epoch": 0.3635981961244182, + "grad_norm": 0.1708286702632904, + "learning_rate": 4.636409042541641e-06, + "loss": 0.9504, + "step": 50230 + }, + { + "epoch": 0.36367058278500436, + "grad_norm": 0.16686797142028809, + "learning_rate": 4.6363366558810545e-06, + "loss": 0.951, + "step": 50240 + }, + { + "epoch": 0.3637429694455906, + "grad_norm": 0.1779949814081192, + "learning_rate": 4.636264269220468e-06, + "loss": 0.954, + "step": 50250 + }, + { + "epoch": 0.36381535610617677, + "grad_norm": 0.16261711716651917, + "learning_rate": 4.636191882559883e-06, + "loss": 0.9552, + "step": 50260 + }, + { + "epoch": 0.36388774276676294, + "grad_norm": 0.15804466605186462, + "learning_rate": 4.636119495899296e-06, + "loss": 0.952, + "step": 50270 + }, + { + "epoch": 0.3639601294273491, + "grad_norm": 0.18854446709156036, + "learning_rate": 4.63604710923871e-06, + "loss": 0.937, + "step": 50280 + }, + { + "epoch": 0.3640325160879353, + "grad_norm": 0.16362988948822021, + "learning_rate": 4.6359747225781234e-06, + "loss": 0.9439, + "step": 50290 + }, + { + "epoch": 0.3641049027485215, + "grad_norm": 0.1906929761171341, + "learning_rate": 4.635902335917538e-06, + "loss": 0.9522, + "step": 50300 + }, + { + "epoch": 0.3641772894091077, + "grad_norm": 0.1719408482313156, + "learning_rate": 4.6358299492569515e-06, + "loss": 0.9409, + "step": 50310 + }, + { + "epoch": 0.3642496760696939, + "grad_norm": 0.17714013159275055, + "learning_rate": 4.635757562596365e-06, + "loss": 0.9425, + "step": 50320 + }, + { + "epoch": 0.36432206273028006, + "grad_norm": 0.1718074232339859, + "learning_rate": 4.635685175935779e-06, + "loss": 0.9431, + "step": 50330 + }, + { + "epoch": 0.36439444939086624, + "grad_norm": 0.16074173152446747, + "learning_rate": 4.635612789275193e-06, + "loss": 0.9598, + "step": 50340 + }, + { + "epoch": 0.3644668360514524, + "grad_norm": 0.1573699712753296, + "learning_rate": 4.635540402614607e-06, + "loss": 0.9416, + "step": 50350 + }, + { + "epoch": 0.36453922271203865, + "grad_norm": 0.15923991799354553, + "learning_rate": 4.6354680159540205e-06, + "loss": 0.9591, + "step": 50360 + }, + { + "epoch": 0.3646116093726248, + "grad_norm": 0.17145873606204987, + "learning_rate": 4.635395629293434e-06, + "loss": 0.9399, + "step": 50370 + }, + { + "epoch": 0.364683996033211, + "grad_norm": 0.16001589596271515, + "learning_rate": 4.6353232426328485e-06, + "loss": 0.9558, + "step": 50380 + }, + { + "epoch": 0.3647563826937972, + "grad_norm": 0.16999055445194244, + "learning_rate": 4.635250855972262e-06, + "loss": 0.9505, + "step": 50390 + }, + { + "epoch": 0.36482876935438335, + "grad_norm": 0.1643807590007782, + "learning_rate": 4.635178469311676e-06, + "loss": 0.9385, + "step": 50400 + }, + { + "epoch": 0.3649011560149696, + "grad_norm": 0.1788293868303299, + "learning_rate": 4.635106082651089e-06, + "loss": 0.9312, + "step": 50410 + }, + { + "epoch": 0.36497354267555576, + "grad_norm": 0.17690715193748474, + "learning_rate": 4.635033695990504e-06, + "loss": 0.9524, + "step": 50420 + }, + { + "epoch": 0.36504592933614194, + "grad_norm": 0.17854566872119904, + "learning_rate": 4.6349613093299175e-06, + "loss": 0.952, + "step": 50430 + }, + { + "epoch": 0.3651183159967281, + "grad_norm": 0.4458518326282501, + "learning_rate": 4.634888922669331e-06, + "loss": 0.9479, + "step": 50440 + }, + { + "epoch": 0.3651907026573143, + "grad_norm": 0.15616470575332642, + "learning_rate": 4.634816536008745e-06, + "loss": 0.9488, + "step": 50450 + }, + { + "epoch": 0.3652630893179005, + "grad_norm": 0.16164493560791016, + "learning_rate": 4.634744149348159e-06, + "loss": 0.9507, + "step": 50460 + }, + { + "epoch": 0.3653354759784867, + "grad_norm": 0.16681145131587982, + "learning_rate": 4.634671762687573e-06, + "loss": 0.9473, + "step": 50470 + }, + { + "epoch": 0.3654078626390729, + "grad_norm": 0.1800214797258377, + "learning_rate": 4.634599376026986e-06, + "loss": 0.9655, + "step": 50480 + }, + { + "epoch": 0.36548024929965905, + "grad_norm": 0.1790829747915268, + "learning_rate": 4.6345269893664e-06, + "loss": 0.9581, + "step": 50490 + }, + { + "epoch": 0.36555263596024523, + "grad_norm": 0.1491861194372177, + "learning_rate": 4.634454602705814e-06, + "loss": 0.9409, + "step": 50500 + }, + { + "epoch": 0.3656250226208314, + "grad_norm": 0.15858380496501923, + "learning_rate": 4.634382216045227e-06, + "loss": 0.9392, + "step": 50510 + }, + { + "epoch": 0.36569740928141764, + "grad_norm": 0.17849485576152802, + "learning_rate": 4.634309829384641e-06, + "loss": 0.9595, + "step": 50520 + }, + { + "epoch": 0.3657697959420038, + "grad_norm": 0.16567130386829376, + "learning_rate": 4.634237442724055e-06, + "loss": 0.9496, + "step": 50530 + }, + { + "epoch": 0.36584218260259, + "grad_norm": 0.22058269381523132, + "learning_rate": 4.634165056063469e-06, + "loss": 0.9507, + "step": 50540 + }, + { + "epoch": 0.36591456926317617, + "grad_norm": 0.16320207715034485, + "learning_rate": 4.6340926694028825e-06, + "loss": 0.9408, + "step": 50550 + }, + { + "epoch": 0.36598695592376235, + "grad_norm": 0.19167020916938782, + "learning_rate": 4.634020282742296e-06, + "loss": 0.9467, + "step": 50560 + }, + { + "epoch": 0.3660593425843486, + "grad_norm": 0.17240388691425323, + "learning_rate": 4.633947896081711e-06, + "loss": 0.9623, + "step": 50570 + }, + { + "epoch": 0.36613172924493476, + "grad_norm": 0.20315563678741455, + "learning_rate": 4.633875509421124e-06, + "loss": 0.9401, + "step": 50580 + }, + { + "epoch": 0.36620411590552093, + "grad_norm": 0.1570211499929428, + "learning_rate": 4.633803122760538e-06, + "loss": 0.9451, + "step": 50590 + }, + { + "epoch": 0.3662765025661071, + "grad_norm": 0.18627873063087463, + "learning_rate": 4.6337307360999515e-06, + "loss": 0.9548, + "step": 50600 + }, + { + "epoch": 0.3663488892266933, + "grad_norm": 0.19180041551589966, + "learning_rate": 4.633658349439366e-06, + "loss": 0.9445, + "step": 50610 + }, + { + "epoch": 0.3664212758872795, + "grad_norm": 0.19508033990859985, + "learning_rate": 4.6335859627787796e-06, + "loss": 0.9493, + "step": 50620 + }, + { + "epoch": 0.3664936625478657, + "grad_norm": 0.18519370257854462, + "learning_rate": 4.633513576118193e-06, + "loss": 0.9308, + "step": 50630 + }, + { + "epoch": 0.36656604920845187, + "grad_norm": 0.1680838167667389, + "learning_rate": 4.633441189457607e-06, + "loss": 0.9335, + "step": 50640 + }, + { + "epoch": 0.36663843586903805, + "grad_norm": 0.16373924911022186, + "learning_rate": 4.633368802797021e-06, + "loss": 0.9413, + "step": 50650 + }, + { + "epoch": 0.3667108225296242, + "grad_norm": 0.16160275042057037, + "learning_rate": 4.633296416136435e-06, + "loss": 0.9487, + "step": 50660 + }, + { + "epoch": 0.3667832091902104, + "grad_norm": 0.1721445471048355, + "learning_rate": 4.6332240294758485e-06, + "loss": 0.9351, + "step": 50670 + }, + { + "epoch": 0.36685559585079663, + "grad_norm": 0.1686573028564453, + "learning_rate": 4.633151642815262e-06, + "loss": 0.9441, + "step": 50680 + }, + { + "epoch": 0.3669279825113828, + "grad_norm": 0.15622854232788086, + "learning_rate": 4.633079256154676e-06, + "loss": 0.9586, + "step": 50690 + }, + { + "epoch": 0.367000369171969, + "grad_norm": 0.14963869750499725, + "learning_rate": 4.63300686949409e-06, + "loss": 0.9496, + "step": 50700 + }, + { + "epoch": 0.36707275583255516, + "grad_norm": 0.17733648419380188, + "learning_rate": 4.632934482833504e-06, + "loss": 0.9453, + "step": 50710 + }, + { + "epoch": 0.36714514249314134, + "grad_norm": 0.15873117744922638, + "learning_rate": 4.632862096172917e-06, + "loss": 0.9551, + "step": 50720 + }, + { + "epoch": 0.3672175291537276, + "grad_norm": 0.15206924080848694, + "learning_rate": 4.632789709512331e-06, + "loss": 0.9318, + "step": 50730 + }, + { + "epoch": 0.36728991581431375, + "grad_norm": 0.16713659465312958, + "learning_rate": 4.6327173228517455e-06, + "loss": 0.9512, + "step": 50740 + }, + { + "epoch": 0.3673623024748999, + "grad_norm": 0.1614990532398224, + "learning_rate": 4.632644936191159e-06, + "loss": 0.9528, + "step": 50750 + }, + { + "epoch": 0.3674346891354861, + "grad_norm": 0.16960258781909943, + "learning_rate": 4.632572549530573e-06, + "loss": 0.9465, + "step": 50760 + }, + { + "epoch": 0.3675070757960723, + "grad_norm": 0.19546057283878326, + "learning_rate": 4.632500162869986e-06, + "loss": 0.9463, + "step": 50770 + }, + { + "epoch": 0.3675794624566585, + "grad_norm": 0.1588112711906433, + "learning_rate": 4.632427776209401e-06, + "loss": 0.9295, + "step": 50780 + }, + { + "epoch": 0.3676518491172447, + "grad_norm": 0.17281554639339447, + "learning_rate": 4.632355389548814e-06, + "loss": 0.956, + "step": 50790 + }, + { + "epoch": 0.36772423577783087, + "grad_norm": 0.15607163310050964, + "learning_rate": 4.632283002888228e-06, + "loss": 0.9452, + "step": 50800 + }, + { + "epoch": 0.36779662243841704, + "grad_norm": 0.16770051419734955, + "learning_rate": 4.632210616227642e-06, + "loss": 0.9582, + "step": 50810 + }, + { + "epoch": 0.3678690090990032, + "grad_norm": 0.15941555798053741, + "learning_rate": 4.632138229567056e-06, + "loss": 0.9394, + "step": 50820 + }, + { + "epoch": 0.3679413957595894, + "grad_norm": 0.1637001931667328, + "learning_rate": 4.63206584290647e-06, + "loss": 0.9377, + "step": 50830 + }, + { + "epoch": 0.36801378242017563, + "grad_norm": 0.17539703845977783, + "learning_rate": 4.631993456245883e-06, + "loss": 0.9532, + "step": 50840 + }, + { + "epoch": 0.3680861690807618, + "grad_norm": 0.15488503873348236, + "learning_rate": 4.631921069585297e-06, + "loss": 0.9572, + "step": 50850 + }, + { + "epoch": 0.368158555741348, + "grad_norm": 0.17145054042339325, + "learning_rate": 4.6318486829247114e-06, + "loss": 0.9558, + "step": 50860 + }, + { + "epoch": 0.36823094240193416, + "grad_norm": 0.16223368048667908, + "learning_rate": 4.631776296264125e-06, + "loss": 0.9489, + "step": 50870 + }, + { + "epoch": 0.36830332906252033, + "grad_norm": 0.17038589715957642, + "learning_rate": 4.631703909603539e-06, + "loss": 0.9607, + "step": 50880 + }, + { + "epoch": 0.36837571572310657, + "grad_norm": 0.17411848902702332, + "learning_rate": 4.631631522942952e-06, + "loss": 0.9467, + "step": 50890 + }, + { + "epoch": 0.36844810238369274, + "grad_norm": 0.15501435101032257, + "learning_rate": 4.631559136282367e-06, + "loss": 0.946, + "step": 50900 + }, + { + "epoch": 0.3685204890442789, + "grad_norm": 0.15863391757011414, + "learning_rate": 4.63148674962178e-06, + "loss": 0.9441, + "step": 50910 + }, + { + "epoch": 0.3685928757048651, + "grad_norm": 0.17681151628494263, + "learning_rate": 4.631414362961194e-06, + "loss": 0.9548, + "step": 50920 + }, + { + "epoch": 0.3686652623654513, + "grad_norm": 0.1619083285331726, + "learning_rate": 4.631341976300608e-06, + "loss": 0.9488, + "step": 50930 + }, + { + "epoch": 0.3687376490260375, + "grad_norm": 0.16921238601207733, + "learning_rate": 4.631269589640022e-06, + "loss": 0.9414, + "step": 50940 + }, + { + "epoch": 0.3688100356866237, + "grad_norm": 0.16636481881141663, + "learning_rate": 4.631197202979436e-06, + "loss": 0.941, + "step": 50950 + }, + { + "epoch": 0.36888242234720986, + "grad_norm": 0.16682793200016022, + "learning_rate": 4.631124816318849e-06, + "loss": 0.9447, + "step": 50960 + }, + { + "epoch": 0.36895480900779604, + "grad_norm": 0.1645735502243042, + "learning_rate": 4.631052429658263e-06, + "loss": 0.9495, + "step": 50970 + }, + { + "epoch": 0.3690271956683822, + "grad_norm": 0.16850468516349792, + "learning_rate": 4.630980042997677e-06, + "loss": 0.9512, + "step": 50980 + }, + { + "epoch": 0.36909958232896845, + "grad_norm": 0.16100184619426727, + "learning_rate": 4.630907656337091e-06, + "loss": 0.9483, + "step": 50990 + }, + { + "epoch": 0.3691719689895546, + "grad_norm": 0.1832258701324463, + "learning_rate": 4.630835269676505e-06, + "loss": 0.9398, + "step": 51000 + }, + { + "epoch": 0.3692443556501408, + "grad_norm": 0.1679748147726059, + "learning_rate": 4.630762883015918e-06, + "loss": 0.9545, + "step": 51010 + }, + { + "epoch": 0.369316742310727, + "grad_norm": 0.5284795165061951, + "learning_rate": 4.630690496355332e-06, + "loss": 0.9481, + "step": 51020 + }, + { + "epoch": 0.36938912897131315, + "grad_norm": 0.16089926660060883, + "learning_rate": 4.6306181096947454e-06, + "loss": 0.9499, + "step": 51030 + }, + { + "epoch": 0.36946151563189933, + "grad_norm": 0.16643081605434418, + "learning_rate": 4.630545723034159e-06, + "loss": 0.948, + "step": 51040 + }, + { + "epoch": 0.36953390229248556, + "grad_norm": 0.24914591014385223, + "learning_rate": 4.6304733363735735e-06, + "loss": 0.9461, + "step": 51050 + }, + { + "epoch": 0.36960628895307174, + "grad_norm": 0.16817229986190796, + "learning_rate": 4.630400949712987e-06, + "loss": 0.9431, + "step": 51060 + }, + { + "epoch": 0.3696786756136579, + "grad_norm": 0.15578976273536682, + "learning_rate": 4.630328563052401e-06, + "loss": 0.9523, + "step": 51070 + }, + { + "epoch": 0.3697510622742441, + "grad_norm": 0.1646818220615387, + "learning_rate": 4.630256176391814e-06, + "loss": 0.9489, + "step": 51080 + }, + { + "epoch": 0.36982344893483027, + "grad_norm": 0.15397506952285767, + "learning_rate": 4.630183789731229e-06, + "loss": 0.939, + "step": 51090 + }, + { + "epoch": 0.3698958355954165, + "grad_norm": 0.1653456836938858, + "learning_rate": 4.6301114030706424e-06, + "loss": 0.9552, + "step": 51100 + }, + { + "epoch": 0.3699682222560027, + "grad_norm": 0.1986481249332428, + "learning_rate": 4.630039016410056e-06, + "loss": 0.9519, + "step": 51110 + }, + { + "epoch": 0.37004060891658885, + "grad_norm": 0.17512935400009155, + "learning_rate": 4.62996662974947e-06, + "loss": 0.9582, + "step": 51120 + }, + { + "epoch": 0.37011299557717503, + "grad_norm": 0.17302803695201874, + "learning_rate": 4.629894243088884e-06, + "loss": 0.9275, + "step": 51130 + }, + { + "epoch": 0.3701853822377612, + "grad_norm": 0.16957373917102814, + "learning_rate": 4.629821856428298e-06, + "loss": 0.9455, + "step": 51140 + }, + { + "epoch": 0.37025776889834744, + "grad_norm": 0.1609341949224472, + "learning_rate": 4.629749469767711e-06, + "loss": 0.9449, + "step": 51150 + }, + { + "epoch": 0.3703301555589336, + "grad_norm": 0.2060326784849167, + "learning_rate": 4.629677083107125e-06, + "loss": 0.9487, + "step": 51160 + }, + { + "epoch": 0.3704025422195198, + "grad_norm": 0.15667761862277985, + "learning_rate": 4.6296046964465395e-06, + "loss": 0.9421, + "step": 51170 + }, + { + "epoch": 0.37047492888010597, + "grad_norm": 0.15692569315433502, + "learning_rate": 4.629532309785953e-06, + "loss": 0.9383, + "step": 51180 + }, + { + "epoch": 0.37054731554069215, + "grad_norm": 0.15926668047904968, + "learning_rate": 4.629459923125367e-06, + "loss": 0.9483, + "step": 51190 + }, + { + "epoch": 0.3706197022012783, + "grad_norm": 0.16595801711082458, + "learning_rate": 4.62938753646478e-06, + "loss": 0.9489, + "step": 51200 + }, + { + "epoch": 0.37069208886186455, + "grad_norm": 0.1648326963186264, + "learning_rate": 4.629315149804195e-06, + "loss": 0.9634, + "step": 51210 + }, + { + "epoch": 0.37076447552245073, + "grad_norm": 0.15535977482795715, + "learning_rate": 4.629242763143608e-06, + "loss": 0.9404, + "step": 51220 + }, + { + "epoch": 0.3708368621830369, + "grad_norm": 0.1696339249610901, + "learning_rate": 4.629170376483022e-06, + "loss": 0.9539, + "step": 51230 + }, + { + "epoch": 0.3709092488436231, + "grad_norm": 0.15960168838500977, + "learning_rate": 4.629097989822436e-06, + "loss": 0.9377, + "step": 51240 + }, + { + "epoch": 0.37098163550420926, + "grad_norm": 0.20475220680236816, + "learning_rate": 4.62902560316185e-06, + "loss": 0.9529, + "step": 51250 + }, + { + "epoch": 0.3710540221647955, + "grad_norm": 0.16864748299121857, + "learning_rate": 4.628953216501264e-06, + "loss": 0.9446, + "step": 51260 + }, + { + "epoch": 0.37112640882538167, + "grad_norm": 0.1571142077445984, + "learning_rate": 4.628880829840677e-06, + "loss": 0.9558, + "step": 51270 + }, + { + "epoch": 0.37119879548596785, + "grad_norm": 0.19135212898254395, + "learning_rate": 4.628808443180091e-06, + "loss": 0.9484, + "step": 51280 + }, + { + "epoch": 0.371271182146554, + "grad_norm": 0.15567857027053833, + "learning_rate": 4.628736056519505e-06, + "loss": 0.9503, + "step": 51290 + }, + { + "epoch": 0.3713435688071402, + "grad_norm": 0.16534672677516937, + "learning_rate": 4.628663669858919e-06, + "loss": 0.9329, + "step": 51300 + }, + { + "epoch": 0.37141595546772643, + "grad_norm": 0.16259850561618805, + "learning_rate": 4.628591283198333e-06, + "loss": 0.9419, + "step": 51310 + }, + { + "epoch": 0.3714883421283126, + "grad_norm": 0.20146603882312775, + "learning_rate": 4.628518896537746e-06, + "loss": 0.956, + "step": 51320 + }, + { + "epoch": 0.3715607287888988, + "grad_norm": 0.18213625252246857, + "learning_rate": 4.62844650987716e-06, + "loss": 0.9639, + "step": 51330 + }, + { + "epoch": 0.37163311544948496, + "grad_norm": 0.15779289603233337, + "learning_rate": 4.628374123216574e-06, + "loss": 0.9425, + "step": 51340 + }, + { + "epoch": 0.37170550211007114, + "grad_norm": 0.16919735074043274, + "learning_rate": 4.628301736555988e-06, + "loss": 0.9501, + "step": 51350 + }, + { + "epoch": 0.3717778887706573, + "grad_norm": 0.1676865667104721, + "learning_rate": 4.6282293498954016e-06, + "loss": 0.9453, + "step": 51360 + }, + { + "epoch": 0.37185027543124355, + "grad_norm": 0.15803247690200806, + "learning_rate": 4.628156963234815e-06, + "loss": 0.9435, + "step": 51370 + }, + { + "epoch": 0.3719226620918297, + "grad_norm": 0.1553627997636795, + "learning_rate": 4.62808457657423e-06, + "loss": 0.9638, + "step": 51380 + }, + { + "epoch": 0.3719950487524159, + "grad_norm": 0.16494029760360718, + "learning_rate": 4.628012189913643e-06, + "loss": 0.9366, + "step": 51390 + }, + { + "epoch": 0.3720674354130021, + "grad_norm": 0.16437461972236633, + "learning_rate": 4.627939803253057e-06, + "loss": 0.94, + "step": 51400 + }, + { + "epoch": 0.37213982207358826, + "grad_norm": 0.15786989033222198, + "learning_rate": 4.6278674165924705e-06, + "loss": 0.9574, + "step": 51410 + }, + { + "epoch": 0.3722122087341745, + "grad_norm": 0.16579070687294006, + "learning_rate": 4.627795029931885e-06, + "loss": 0.949, + "step": 51420 + }, + { + "epoch": 0.37228459539476066, + "grad_norm": 0.16631585359573364, + "learning_rate": 4.6277226432712986e-06, + "loss": 0.9393, + "step": 51430 + }, + { + "epoch": 0.37235698205534684, + "grad_norm": 0.21473245322704315, + "learning_rate": 4.627650256610712e-06, + "loss": 0.9482, + "step": 51440 + }, + { + "epoch": 0.372429368715933, + "grad_norm": 0.17821331322193146, + "learning_rate": 4.627577869950126e-06, + "loss": 0.9511, + "step": 51450 + }, + { + "epoch": 0.3725017553765192, + "grad_norm": 0.16437208652496338, + "learning_rate": 4.62750548328954e-06, + "loss": 0.9522, + "step": 51460 + }, + { + "epoch": 0.3725741420371054, + "grad_norm": 0.1748124063014984, + "learning_rate": 4.627433096628954e-06, + "loss": 0.951, + "step": 51470 + }, + { + "epoch": 0.3726465286976916, + "grad_norm": 0.1691410392522812, + "learning_rate": 4.6273607099683675e-06, + "loss": 0.9452, + "step": 51480 + }, + { + "epoch": 0.3727189153582778, + "grad_norm": 0.15897352993488312, + "learning_rate": 4.627288323307781e-06, + "loss": 0.9503, + "step": 51490 + }, + { + "epoch": 0.37279130201886396, + "grad_norm": 0.17863501608371735, + "learning_rate": 4.6272159366471956e-06, + "loss": 0.9485, + "step": 51500 + }, + { + "epoch": 0.37286368867945013, + "grad_norm": 0.1577998846769333, + "learning_rate": 4.627143549986609e-06, + "loss": 0.9468, + "step": 51510 + }, + { + "epoch": 0.3729360753400363, + "grad_norm": 0.17091219127178192, + "learning_rate": 4.627071163326023e-06, + "loss": 0.949, + "step": 51520 + }, + { + "epoch": 0.37300846200062254, + "grad_norm": 0.16450020670890808, + "learning_rate": 4.626998776665436e-06, + "loss": 0.9375, + "step": 51530 + }, + { + "epoch": 0.3730808486612087, + "grad_norm": 0.20446856319904327, + "learning_rate": 4.626926390004851e-06, + "loss": 0.945, + "step": 51540 + }, + { + "epoch": 0.3731532353217949, + "grad_norm": 0.18482069671154022, + "learning_rate": 4.626854003344264e-06, + "loss": 0.9365, + "step": 51550 + }, + { + "epoch": 0.3732256219823811, + "grad_norm": 0.2597537934780121, + "learning_rate": 4.626781616683677e-06, + "loss": 0.9474, + "step": 51560 + }, + { + "epoch": 0.37329800864296725, + "grad_norm": 0.16503006219863892, + "learning_rate": 4.626709230023092e-06, + "loss": 0.9384, + "step": 51570 + }, + { + "epoch": 0.3733703953035535, + "grad_norm": 0.14666348695755005, + "learning_rate": 4.626636843362505e-06, + "loss": 0.9408, + "step": 51580 + }, + { + "epoch": 0.37344278196413966, + "grad_norm": 0.1652412712574005, + "learning_rate": 4.626564456701919e-06, + "loss": 0.9559, + "step": 51590 + }, + { + "epoch": 0.37351516862472584, + "grad_norm": 0.15990935266017914, + "learning_rate": 4.626492070041333e-06, + "loss": 0.9363, + "step": 51600 + }, + { + "epoch": 0.373587555285312, + "grad_norm": 0.16256429255008698, + "learning_rate": 4.626419683380747e-06, + "loss": 0.947, + "step": 51610 + }, + { + "epoch": 0.3736599419458982, + "grad_norm": 0.166569322347641, + "learning_rate": 4.626347296720161e-06, + "loss": 0.9548, + "step": 51620 + }, + { + "epoch": 0.3737323286064844, + "grad_norm": 0.18789708614349365, + "learning_rate": 4.626274910059574e-06, + "loss": 0.9502, + "step": 51630 + }, + { + "epoch": 0.3738047152670706, + "grad_norm": 0.16426165401935577, + "learning_rate": 4.626202523398988e-06, + "loss": 0.942, + "step": 51640 + }, + { + "epoch": 0.3738771019276568, + "grad_norm": 0.15513013303279877, + "learning_rate": 4.626130136738402e-06, + "loss": 0.9541, + "step": 51650 + }, + { + "epoch": 0.37394948858824295, + "grad_norm": 0.1891675591468811, + "learning_rate": 4.626057750077816e-06, + "loss": 0.956, + "step": 51660 + }, + { + "epoch": 0.3740218752488291, + "grad_norm": 0.17240507900714874, + "learning_rate": 4.62598536341723e-06, + "loss": 0.9564, + "step": 51670 + }, + { + "epoch": 0.37409426190941536, + "grad_norm": 0.16830044984817505, + "learning_rate": 4.625912976756643e-06, + "loss": 0.9326, + "step": 51680 + }, + { + "epoch": 0.37416664857000154, + "grad_norm": 0.16331635415554047, + "learning_rate": 4.625840590096058e-06, + "loss": 0.945, + "step": 51690 + }, + { + "epoch": 0.3742390352305877, + "grad_norm": 0.15753015875816345, + "learning_rate": 4.625768203435471e-06, + "loss": 0.9527, + "step": 51700 + }, + { + "epoch": 0.3743114218911739, + "grad_norm": 0.16955150663852692, + "learning_rate": 4.625695816774885e-06, + "loss": 0.9453, + "step": 51710 + }, + { + "epoch": 0.37438380855176007, + "grad_norm": 0.17448243498802185, + "learning_rate": 4.6256234301142985e-06, + "loss": 0.9526, + "step": 51720 + }, + { + "epoch": 0.37445619521234624, + "grad_norm": 0.17462722957134247, + "learning_rate": 4.625551043453713e-06, + "loss": 0.9496, + "step": 51730 + }, + { + "epoch": 0.3745285818729325, + "grad_norm": 0.15947075188159943, + "learning_rate": 4.625478656793127e-06, + "loss": 0.9412, + "step": 51740 + }, + { + "epoch": 0.37460096853351865, + "grad_norm": 0.16033557057380676, + "learning_rate": 4.62540627013254e-06, + "loss": 0.9442, + "step": 51750 + }, + { + "epoch": 0.37467335519410483, + "grad_norm": 0.15499283373355865, + "learning_rate": 4.625333883471954e-06, + "loss": 0.9436, + "step": 51760 + }, + { + "epoch": 0.374745741854691, + "grad_norm": 0.16719605028629303, + "learning_rate": 4.625261496811368e-06, + "loss": 0.9456, + "step": 51770 + }, + { + "epoch": 0.3748181285152772, + "grad_norm": 0.16747929155826569, + "learning_rate": 4.625189110150782e-06, + "loss": 0.9396, + "step": 51780 + }, + { + "epoch": 0.3748905151758634, + "grad_norm": 0.16449685394763947, + "learning_rate": 4.6251167234901955e-06, + "loss": 0.9478, + "step": 51790 + }, + { + "epoch": 0.3749629018364496, + "grad_norm": 0.16664811968803406, + "learning_rate": 4.625044336829609e-06, + "loss": 0.9462, + "step": 51800 + }, + { + "epoch": 0.37503528849703577, + "grad_norm": 0.16544151306152344, + "learning_rate": 4.624971950169024e-06, + "loss": 0.9416, + "step": 51810 + }, + { + "epoch": 0.37510767515762194, + "grad_norm": 0.1873922049999237, + "learning_rate": 4.624899563508437e-06, + "loss": 0.9471, + "step": 51820 + }, + { + "epoch": 0.3751800618182081, + "grad_norm": 0.1557958424091339, + "learning_rate": 4.624827176847851e-06, + "loss": 0.9429, + "step": 51830 + }, + { + "epoch": 0.37525244847879435, + "grad_norm": 0.1720801442861557, + "learning_rate": 4.6247547901872644e-06, + "loss": 0.9529, + "step": 51840 + }, + { + "epoch": 0.37532483513938053, + "grad_norm": 0.2492019236087799, + "learning_rate": 4.624682403526679e-06, + "loss": 0.9338, + "step": 51850 + }, + { + "epoch": 0.3753972217999667, + "grad_norm": 0.15926505625247955, + "learning_rate": 4.6246100168660925e-06, + "loss": 0.9455, + "step": 51860 + }, + { + "epoch": 0.3754696084605529, + "grad_norm": 0.16041946411132812, + "learning_rate": 4.624537630205506e-06, + "loss": 0.9507, + "step": 51870 + }, + { + "epoch": 0.37554199512113906, + "grad_norm": 0.1602320671081543, + "learning_rate": 4.62446524354492e-06, + "loss": 0.9493, + "step": 51880 + }, + { + "epoch": 0.37561438178172524, + "grad_norm": 0.18607132136821747, + "learning_rate": 4.624392856884334e-06, + "loss": 0.9591, + "step": 51890 + }, + { + "epoch": 0.37568676844231147, + "grad_norm": 0.1767975091934204, + "learning_rate": 4.624320470223748e-06, + "loss": 0.9571, + "step": 51900 + }, + { + "epoch": 0.37575915510289765, + "grad_norm": 0.15797516703605652, + "learning_rate": 4.6242480835631615e-06, + "loss": 0.9301, + "step": 51910 + }, + { + "epoch": 0.3758315417634838, + "grad_norm": 0.16194427013397217, + "learning_rate": 4.624175696902575e-06, + "loss": 0.9337, + "step": 51920 + }, + { + "epoch": 0.37590392842407, + "grad_norm": 0.15718939900398254, + "learning_rate": 4.624103310241989e-06, + "loss": 0.9361, + "step": 51930 + }, + { + "epoch": 0.3759763150846562, + "grad_norm": 0.15875135362148285, + "learning_rate": 4.624030923581403e-06, + "loss": 0.9309, + "step": 51940 + }, + { + "epoch": 0.3760487017452424, + "grad_norm": 0.1533413827419281, + "learning_rate": 4.623958536920817e-06, + "loss": 0.9316, + "step": 51950 + }, + { + "epoch": 0.3761210884058286, + "grad_norm": 0.21360956132411957, + "learning_rate": 4.62388615026023e-06, + "loss": 0.9535, + "step": 51960 + }, + { + "epoch": 0.37619347506641476, + "grad_norm": 0.16147691011428833, + "learning_rate": 4.623813763599644e-06, + "loss": 0.9492, + "step": 51970 + }, + { + "epoch": 0.37626586172700094, + "grad_norm": 0.1707833856344223, + "learning_rate": 4.6237413769390585e-06, + "loss": 0.9319, + "step": 51980 + }, + { + "epoch": 0.3763382483875871, + "grad_norm": 0.17225024104118347, + "learning_rate": 4.623668990278472e-06, + "loss": 0.9323, + "step": 51990 + }, + { + "epoch": 0.37641063504817335, + "grad_norm": 0.16834914684295654, + "learning_rate": 4.623596603617886e-06, + "loss": 0.9474, + "step": 52000 + }, + { + "epoch": 0.3764830217087595, + "grad_norm": 0.1597452461719513, + "learning_rate": 4.623524216957299e-06, + "loss": 0.938, + "step": 52010 + }, + { + "epoch": 0.3765554083693457, + "grad_norm": 0.15453481674194336, + "learning_rate": 4.623451830296714e-06, + "loss": 0.9591, + "step": 52020 + }, + { + "epoch": 0.3766277950299319, + "grad_norm": 0.16118237376213074, + "learning_rate": 4.623379443636127e-06, + "loss": 0.9533, + "step": 52030 + }, + { + "epoch": 0.37670018169051805, + "grad_norm": 0.1625969558954239, + "learning_rate": 4.623307056975541e-06, + "loss": 0.9415, + "step": 52040 + }, + { + "epoch": 0.37677256835110423, + "grad_norm": 0.1555289477109909, + "learning_rate": 4.623234670314955e-06, + "loss": 0.9301, + "step": 52050 + }, + { + "epoch": 0.37684495501169046, + "grad_norm": 0.16258127987384796, + "learning_rate": 4.623162283654369e-06, + "loss": 0.9373, + "step": 52060 + }, + { + "epoch": 0.37691734167227664, + "grad_norm": 0.15739430487155914, + "learning_rate": 4.623089896993783e-06, + "loss": 0.9482, + "step": 52070 + }, + { + "epoch": 0.3769897283328628, + "grad_norm": 0.1597059667110443, + "learning_rate": 4.623017510333196e-06, + "loss": 0.9505, + "step": 52080 + }, + { + "epoch": 0.377062114993449, + "grad_norm": 0.16315515339374542, + "learning_rate": 4.62294512367261e-06, + "loss": 0.9478, + "step": 52090 + }, + { + "epoch": 0.37713450165403517, + "grad_norm": 0.17426295578479767, + "learning_rate": 4.6228727370120236e-06, + "loss": 0.9359, + "step": 52100 + }, + { + "epoch": 0.3772068883146214, + "grad_norm": 0.16051162779331207, + "learning_rate": 4.622800350351437e-06, + "loss": 0.9403, + "step": 52110 + }, + { + "epoch": 0.3772792749752076, + "grad_norm": 0.17289088666439056, + "learning_rate": 4.622727963690851e-06, + "loss": 0.9381, + "step": 52120 + }, + { + "epoch": 0.37735166163579376, + "grad_norm": 0.17087596654891968, + "learning_rate": 4.622655577030265e-06, + "loss": 0.9479, + "step": 52130 + }, + { + "epoch": 0.37742404829637993, + "grad_norm": 0.16720347106456757, + "learning_rate": 4.622583190369679e-06, + "loss": 0.936, + "step": 52140 + }, + { + "epoch": 0.3774964349569661, + "grad_norm": 0.18241596221923828, + "learning_rate": 4.6225108037090925e-06, + "loss": 0.943, + "step": 52150 + }, + { + "epoch": 0.37756882161755234, + "grad_norm": 0.17475546896457672, + "learning_rate": 4.622438417048506e-06, + "loss": 0.9408, + "step": 52160 + }, + { + "epoch": 0.3776412082781385, + "grad_norm": 0.15939339995384216, + "learning_rate": 4.6223660303879206e-06, + "loss": 0.948, + "step": 52170 + }, + { + "epoch": 0.3777135949387247, + "grad_norm": 0.1768542230129242, + "learning_rate": 4.622293643727334e-06, + "loss": 0.9561, + "step": 52180 + }, + { + "epoch": 0.37778598159931087, + "grad_norm": 0.15933936834335327, + "learning_rate": 4.622221257066748e-06, + "loss": 0.9486, + "step": 52190 + }, + { + "epoch": 0.37785836825989705, + "grad_norm": 0.16799773275852203, + "learning_rate": 4.622148870406161e-06, + "loss": 0.9352, + "step": 52200 + }, + { + "epoch": 0.3779307549204833, + "grad_norm": 0.2672235369682312, + "learning_rate": 4.622076483745576e-06, + "loss": 0.9393, + "step": 52210 + }, + { + "epoch": 0.37800314158106946, + "grad_norm": 0.1704738289117813, + "learning_rate": 4.6220040970849895e-06, + "loss": 0.941, + "step": 52220 + }, + { + "epoch": 0.37807552824165563, + "grad_norm": 0.16287149488925934, + "learning_rate": 4.621931710424403e-06, + "loss": 0.9248, + "step": 52230 + }, + { + "epoch": 0.3781479149022418, + "grad_norm": 0.1523078829050064, + "learning_rate": 4.621859323763817e-06, + "loss": 0.9593, + "step": 52240 + }, + { + "epoch": 0.378220301562828, + "grad_norm": 0.16731417179107666, + "learning_rate": 4.621786937103231e-06, + "loss": 0.9534, + "step": 52250 + }, + { + "epoch": 0.37829268822341416, + "grad_norm": 0.2127581089735031, + "learning_rate": 4.621714550442645e-06, + "loss": 0.9454, + "step": 52260 + }, + { + "epoch": 0.3783650748840004, + "grad_norm": 0.17544521391391754, + "learning_rate": 4.621642163782058e-06, + "loss": 0.9495, + "step": 52270 + }, + { + "epoch": 0.3784374615445866, + "grad_norm": 0.1671367883682251, + "learning_rate": 4.621569777121472e-06, + "loss": 0.9431, + "step": 52280 + }, + { + "epoch": 0.37850984820517275, + "grad_norm": 0.15478673577308655, + "learning_rate": 4.6214973904608865e-06, + "loss": 0.9475, + "step": 52290 + }, + { + "epoch": 0.3785822348657589, + "grad_norm": 0.1520804613828659, + "learning_rate": 4.6214250038003e-06, + "loss": 0.9435, + "step": 52300 + }, + { + "epoch": 0.3786546215263451, + "grad_norm": 0.1676923781633377, + "learning_rate": 4.621352617139714e-06, + "loss": 0.9409, + "step": 52310 + }, + { + "epoch": 0.37872700818693134, + "grad_norm": 0.15937332808971405, + "learning_rate": 4.621280230479127e-06, + "loss": 0.9341, + "step": 52320 + }, + { + "epoch": 0.3787993948475175, + "grad_norm": 0.16753974556922913, + "learning_rate": 4.621207843818542e-06, + "loss": 0.9533, + "step": 52330 + }, + { + "epoch": 0.3788717815081037, + "grad_norm": 0.17047527432441711, + "learning_rate": 4.621135457157955e-06, + "loss": 0.948, + "step": 52340 + }, + { + "epoch": 0.37894416816868987, + "grad_norm": 0.1727355569601059, + "learning_rate": 4.621063070497369e-06, + "loss": 0.9421, + "step": 52350 + }, + { + "epoch": 0.37901655482927604, + "grad_norm": 0.1628546565771103, + "learning_rate": 4.620990683836783e-06, + "loss": 0.9464, + "step": 52360 + }, + { + "epoch": 0.3790889414898623, + "grad_norm": 0.17123012244701385, + "learning_rate": 4.620918297176197e-06, + "loss": 0.9472, + "step": 52370 + }, + { + "epoch": 0.37916132815044845, + "grad_norm": 0.1762952357530594, + "learning_rate": 4.620845910515611e-06, + "loss": 0.9495, + "step": 52380 + }, + { + "epoch": 0.37923371481103463, + "grad_norm": 0.17170877754688263, + "learning_rate": 4.620773523855024e-06, + "loss": 0.9504, + "step": 52390 + }, + { + "epoch": 0.3793061014716208, + "grad_norm": 0.16942241787910461, + "learning_rate": 4.620701137194438e-06, + "loss": 0.9413, + "step": 52400 + }, + { + "epoch": 0.379378488132207, + "grad_norm": 0.15613293647766113, + "learning_rate": 4.6206287505338524e-06, + "loss": 0.9331, + "step": 52410 + }, + { + "epoch": 0.37945087479279316, + "grad_norm": 0.18218636512756348, + "learning_rate": 4.620556363873266e-06, + "loss": 0.9552, + "step": 52420 + }, + { + "epoch": 0.3795232614533794, + "grad_norm": 0.16046783328056335, + "learning_rate": 4.62048397721268e-06, + "loss": 0.9505, + "step": 52430 + }, + { + "epoch": 0.37959564811396557, + "grad_norm": 0.16807179152965546, + "learning_rate": 4.620411590552093e-06, + "loss": 0.9554, + "step": 52440 + }, + { + "epoch": 0.37966803477455174, + "grad_norm": 0.18919555842876434, + "learning_rate": 4.620339203891508e-06, + "loss": 0.9436, + "step": 52450 + }, + { + "epoch": 0.3797404214351379, + "grad_norm": 0.16013583540916443, + "learning_rate": 4.620266817230921e-06, + "loss": 0.9366, + "step": 52460 + }, + { + "epoch": 0.3798128080957241, + "grad_norm": 0.16914841532707214, + "learning_rate": 4.620194430570335e-06, + "loss": 0.9469, + "step": 52470 + }, + { + "epoch": 0.37988519475631033, + "grad_norm": 0.1607542335987091, + "learning_rate": 4.620122043909749e-06, + "loss": 0.9475, + "step": 52480 + }, + { + "epoch": 0.3799575814168965, + "grad_norm": 0.15814971923828125, + "learning_rate": 4.620049657249163e-06, + "loss": 0.9308, + "step": 52490 + }, + { + "epoch": 0.3800299680774827, + "grad_norm": 0.1508866399526596, + "learning_rate": 4.619977270588577e-06, + "loss": 0.9608, + "step": 52500 + }, + { + "epoch": 0.38010235473806886, + "grad_norm": 0.22690674662590027, + "learning_rate": 4.61990488392799e-06, + "loss": 0.9331, + "step": 52510 + }, + { + "epoch": 0.38017474139865504, + "grad_norm": 0.1720598340034485, + "learning_rate": 4.619832497267404e-06, + "loss": 0.9616, + "step": 52520 + }, + { + "epoch": 0.38024712805924127, + "grad_norm": 0.16492782533168793, + "learning_rate": 4.619760110606818e-06, + "loss": 0.9386, + "step": 52530 + }, + { + "epoch": 0.38031951471982745, + "grad_norm": 0.1547376811504364, + "learning_rate": 4.619687723946232e-06, + "loss": 0.9459, + "step": 52540 + }, + { + "epoch": 0.3803919013804136, + "grad_norm": 0.18093635141849518, + "learning_rate": 4.619615337285646e-06, + "loss": 0.9616, + "step": 52550 + }, + { + "epoch": 0.3804642880409998, + "grad_norm": 0.16667254269123077, + "learning_rate": 4.619542950625059e-06, + "loss": 0.9431, + "step": 52560 + }, + { + "epoch": 0.380536674701586, + "grad_norm": 0.15666356682777405, + "learning_rate": 4.619470563964473e-06, + "loss": 0.9355, + "step": 52570 + }, + { + "epoch": 0.38060906136217215, + "grad_norm": 0.21181073784828186, + "learning_rate": 4.619398177303887e-06, + "loss": 0.9516, + "step": 52580 + }, + { + "epoch": 0.3806814480227584, + "grad_norm": 0.16820743680000305, + "learning_rate": 4.619325790643301e-06, + "loss": 0.942, + "step": 52590 + }, + { + "epoch": 0.38075383468334456, + "grad_norm": 0.19321396946907043, + "learning_rate": 4.6192534039827145e-06, + "loss": 0.9395, + "step": 52600 + }, + { + "epoch": 0.38082622134393074, + "grad_norm": 0.16616934537887573, + "learning_rate": 4.619181017322128e-06, + "loss": 0.9376, + "step": 52610 + }, + { + "epoch": 0.3808986080045169, + "grad_norm": 0.19867192208766937, + "learning_rate": 4.619108630661542e-06, + "loss": 0.945, + "step": 52620 + }, + { + "epoch": 0.3809709946651031, + "grad_norm": 0.15694333612918854, + "learning_rate": 4.619036244000955e-06, + "loss": 0.9276, + "step": 52630 + }, + { + "epoch": 0.3810433813256893, + "grad_norm": 0.16102519631385803, + "learning_rate": 4.61896385734037e-06, + "loss": 0.9493, + "step": 52640 + }, + { + "epoch": 0.3811157679862755, + "grad_norm": 0.15607930719852448, + "learning_rate": 4.6188914706797835e-06, + "loss": 0.9453, + "step": 52650 + }, + { + "epoch": 0.3811881546468617, + "grad_norm": 0.1629965603351593, + "learning_rate": 4.618819084019197e-06, + "loss": 0.9471, + "step": 52660 + }, + { + "epoch": 0.38126054130744785, + "grad_norm": 0.15766113996505737, + "learning_rate": 4.618746697358611e-06, + "loss": 0.9396, + "step": 52670 + }, + { + "epoch": 0.38133292796803403, + "grad_norm": 0.1607961505651474, + "learning_rate": 4.618674310698025e-06, + "loss": 0.9483, + "step": 52680 + }, + { + "epoch": 0.38140531462862026, + "grad_norm": 0.1705811619758606, + "learning_rate": 4.618601924037439e-06, + "loss": 0.9418, + "step": 52690 + }, + { + "epoch": 0.38147770128920644, + "grad_norm": 0.1601417511701584, + "learning_rate": 4.618529537376852e-06, + "loss": 0.9441, + "step": 52700 + }, + { + "epoch": 0.3815500879497926, + "grad_norm": 0.1780814677476883, + "learning_rate": 4.618457150716266e-06, + "loss": 0.9551, + "step": 52710 + }, + { + "epoch": 0.3816224746103788, + "grad_norm": 0.15561501681804657, + "learning_rate": 4.61838476405568e-06, + "loss": 0.9423, + "step": 52720 + }, + { + "epoch": 0.38169486127096497, + "grad_norm": 0.15728692710399628, + "learning_rate": 4.618312377395094e-06, + "loss": 0.9354, + "step": 52730 + }, + { + "epoch": 0.3817672479315512, + "grad_norm": 0.20734450221061707, + "learning_rate": 4.618239990734508e-06, + "loss": 0.9548, + "step": 52740 + }, + { + "epoch": 0.3818396345921374, + "grad_norm": 0.17452536523342133, + "learning_rate": 4.618167604073921e-06, + "loss": 0.9418, + "step": 52750 + }, + { + "epoch": 0.38191202125272355, + "grad_norm": 0.17230959236621857, + "learning_rate": 4.618095217413335e-06, + "loss": 0.9517, + "step": 52760 + }, + { + "epoch": 0.38198440791330973, + "grad_norm": 0.16141583025455475, + "learning_rate": 4.618022830752749e-06, + "loss": 0.9378, + "step": 52770 + }, + { + "epoch": 0.3820567945738959, + "grad_norm": 0.16719284653663635, + "learning_rate": 4.617950444092163e-06, + "loss": 0.9458, + "step": 52780 + }, + { + "epoch": 0.3821291812344821, + "grad_norm": 0.1570596694946289, + "learning_rate": 4.617878057431577e-06, + "loss": 0.9495, + "step": 52790 + }, + { + "epoch": 0.3822015678950683, + "grad_norm": 0.17521663010120392, + "learning_rate": 4.61780567077099e-06, + "loss": 0.9413, + "step": 52800 + }, + { + "epoch": 0.3822739545556545, + "grad_norm": 0.1793406456708908, + "learning_rate": 4.617733284110405e-06, + "loss": 0.9346, + "step": 52810 + }, + { + "epoch": 0.38234634121624067, + "grad_norm": 0.18589749932289124, + "learning_rate": 4.617660897449818e-06, + "loss": 0.9405, + "step": 52820 + }, + { + "epoch": 0.38241872787682685, + "grad_norm": 0.15368783473968506, + "learning_rate": 4.617588510789232e-06, + "loss": 0.9345, + "step": 52830 + }, + { + "epoch": 0.382491114537413, + "grad_norm": 0.1717151403427124, + "learning_rate": 4.6175161241286456e-06, + "loss": 0.9413, + "step": 52840 + }, + { + "epoch": 0.38256350119799926, + "grad_norm": 0.1748562455177307, + "learning_rate": 4.61744373746806e-06, + "loss": 0.9458, + "step": 52850 + }, + { + "epoch": 0.38263588785858543, + "grad_norm": 0.17434179782867432, + "learning_rate": 4.617371350807474e-06, + "loss": 0.9401, + "step": 52860 + }, + { + "epoch": 0.3827082745191716, + "grad_norm": 0.1836162507534027, + "learning_rate": 4.617298964146887e-06, + "loss": 0.9455, + "step": 52870 + }, + { + "epoch": 0.3827806611797578, + "grad_norm": 0.15718111395835876, + "learning_rate": 4.617226577486301e-06, + "loss": 0.9388, + "step": 52880 + }, + { + "epoch": 0.38285304784034396, + "grad_norm": 0.15703000128269196, + "learning_rate": 4.617154190825715e-06, + "loss": 0.9423, + "step": 52890 + }, + { + "epoch": 0.3829254345009302, + "grad_norm": 0.16674445569515228, + "learning_rate": 4.617081804165129e-06, + "loss": 0.951, + "step": 52900 + }, + { + "epoch": 0.38299782116151637, + "grad_norm": 0.157858207821846, + "learning_rate": 4.6170094175045426e-06, + "loss": 0.9484, + "step": 52910 + }, + { + "epoch": 0.38307020782210255, + "grad_norm": 0.17335231602191925, + "learning_rate": 4.616937030843956e-06, + "loss": 0.9482, + "step": 52920 + }, + { + "epoch": 0.3831425944826887, + "grad_norm": 0.16465920209884644, + "learning_rate": 4.616864644183371e-06, + "loss": 0.9378, + "step": 52930 + }, + { + "epoch": 0.3832149811432749, + "grad_norm": 0.17838311195373535, + "learning_rate": 4.616792257522784e-06, + "loss": 0.956, + "step": 52940 + }, + { + "epoch": 0.3832873678038611, + "grad_norm": 0.1656845659017563, + "learning_rate": 4.616719870862198e-06, + "loss": 0.9409, + "step": 52950 + }, + { + "epoch": 0.3833597544644473, + "grad_norm": 0.15346546471118927, + "learning_rate": 4.6166474842016115e-06, + "loss": 0.9334, + "step": 52960 + }, + { + "epoch": 0.3834321411250335, + "grad_norm": 0.17252103984355927, + "learning_rate": 4.616575097541026e-06, + "loss": 0.9422, + "step": 52970 + }, + { + "epoch": 0.38350452778561966, + "grad_norm": 0.16853582859039307, + "learning_rate": 4.6165027108804396e-06, + "loss": 0.944, + "step": 52980 + }, + { + "epoch": 0.38357691444620584, + "grad_norm": 0.17308548092842102, + "learning_rate": 4.616430324219853e-06, + "loss": 0.9396, + "step": 52990 + }, + { + "epoch": 0.383649301106792, + "grad_norm": 0.15582707524299622, + "learning_rate": 4.616357937559267e-06, + "loss": 0.9445, + "step": 53000 + }, + { + "epoch": 0.38372168776737825, + "grad_norm": 0.15688510239124298, + "learning_rate": 4.616285550898681e-06, + "loss": 0.9359, + "step": 53010 + }, + { + "epoch": 0.3837940744279644, + "grad_norm": 0.16674372553825378, + "learning_rate": 4.616213164238095e-06, + "loss": 0.9378, + "step": 53020 + }, + { + "epoch": 0.3838664610885506, + "grad_norm": 0.15617065131664276, + "learning_rate": 4.6161407775775085e-06, + "loss": 0.9443, + "step": 53030 + }, + { + "epoch": 0.3839388477491368, + "grad_norm": 0.18050551414489746, + "learning_rate": 4.616068390916922e-06, + "loss": 0.9282, + "step": 53040 + }, + { + "epoch": 0.38401123440972296, + "grad_norm": 0.18156303465366364, + "learning_rate": 4.6159960042563366e-06, + "loss": 0.9467, + "step": 53050 + }, + { + "epoch": 0.3840836210703092, + "grad_norm": 0.1625535637140274, + "learning_rate": 4.61592361759575e-06, + "loss": 0.9361, + "step": 53060 + }, + { + "epoch": 0.38415600773089537, + "grad_norm": 0.150069460272789, + "learning_rate": 4.615851230935164e-06, + "loss": 0.9384, + "step": 53070 + }, + { + "epoch": 0.38422839439148154, + "grad_norm": 0.17181698977947235, + "learning_rate": 4.615778844274577e-06, + "loss": 0.9275, + "step": 53080 + }, + { + "epoch": 0.3843007810520677, + "grad_norm": 0.16848745942115784, + "learning_rate": 4.615706457613992e-06, + "loss": 0.9463, + "step": 53090 + }, + { + "epoch": 0.3843731677126539, + "grad_norm": 0.15281365811824799, + "learning_rate": 4.6156340709534055e-06, + "loss": 0.9435, + "step": 53100 + }, + { + "epoch": 0.3844455543732401, + "grad_norm": 0.16574618220329285, + "learning_rate": 4.615561684292819e-06, + "loss": 0.9379, + "step": 53110 + }, + { + "epoch": 0.3845179410338263, + "grad_norm": 0.1556422859430313, + "learning_rate": 4.615489297632233e-06, + "loss": 0.9404, + "step": 53120 + }, + { + "epoch": 0.3845903276944125, + "grad_norm": 0.16364730894565582, + "learning_rate": 4.615416910971647e-06, + "loss": 0.9436, + "step": 53130 + }, + { + "epoch": 0.38466271435499866, + "grad_norm": 0.18387825787067413, + "learning_rate": 4.61534452431106e-06, + "loss": 0.9542, + "step": 53140 + }, + { + "epoch": 0.38473510101558483, + "grad_norm": 0.16818496584892273, + "learning_rate": 4.615272137650474e-06, + "loss": 0.9338, + "step": 53150 + }, + { + "epoch": 0.384807487676171, + "grad_norm": 0.17835207283496857, + "learning_rate": 4.615199750989888e-06, + "loss": 0.949, + "step": 53160 + }, + { + "epoch": 0.38487987433675724, + "grad_norm": 0.15151627361774445, + "learning_rate": 4.615127364329302e-06, + "loss": 0.9357, + "step": 53170 + }, + { + "epoch": 0.3849522609973434, + "grad_norm": 0.1659085601568222, + "learning_rate": 4.615054977668715e-06, + "loss": 0.939, + "step": 53180 + }, + { + "epoch": 0.3850246476579296, + "grad_norm": 0.19778911769390106, + "learning_rate": 4.614982591008129e-06, + "loss": 0.95, + "step": 53190 + }, + { + "epoch": 0.3850970343185158, + "grad_norm": 0.1595744490623474, + "learning_rate": 4.614910204347543e-06, + "loss": 0.9478, + "step": 53200 + }, + { + "epoch": 0.38516942097910195, + "grad_norm": 0.16915087401866913, + "learning_rate": 4.614837817686957e-06, + "loss": 0.9462, + "step": 53210 + }, + { + "epoch": 0.3852418076396882, + "grad_norm": 0.19996778666973114, + "learning_rate": 4.614765431026371e-06, + "loss": 0.9251, + "step": 53220 + }, + { + "epoch": 0.38531419430027436, + "grad_norm": 0.17590823769569397, + "learning_rate": 4.614693044365784e-06, + "loss": 0.9577, + "step": 53230 + }, + { + "epoch": 0.38538658096086054, + "grad_norm": 0.16508999466896057, + "learning_rate": 4.614620657705199e-06, + "loss": 0.9411, + "step": 53240 + }, + { + "epoch": 0.3854589676214467, + "grad_norm": 0.15081745386123657, + "learning_rate": 4.614548271044612e-06, + "loss": 0.9475, + "step": 53250 + }, + { + "epoch": 0.3855313542820329, + "grad_norm": 0.17549897730350494, + "learning_rate": 4.614475884384026e-06, + "loss": 0.9367, + "step": 53260 + }, + { + "epoch": 0.38560374094261907, + "grad_norm": 0.1669255644083023, + "learning_rate": 4.6144034977234395e-06, + "loss": 0.9399, + "step": 53270 + }, + { + "epoch": 0.3856761276032053, + "grad_norm": 0.15719322860240936, + "learning_rate": 4.614331111062854e-06, + "loss": 0.9427, + "step": 53280 + }, + { + "epoch": 0.3857485142637915, + "grad_norm": 0.1589440554380417, + "learning_rate": 4.614258724402268e-06, + "loss": 0.9389, + "step": 53290 + }, + { + "epoch": 0.38582090092437765, + "grad_norm": 0.1575065553188324, + "learning_rate": 4.614186337741681e-06, + "loss": 0.9479, + "step": 53300 + }, + { + "epoch": 0.38589328758496383, + "grad_norm": 0.18554028868675232, + "learning_rate": 4.614113951081095e-06, + "loss": 0.9357, + "step": 53310 + }, + { + "epoch": 0.38596567424555, + "grad_norm": 0.1604183316230774, + "learning_rate": 4.614041564420509e-06, + "loss": 0.9588, + "step": 53320 + }, + { + "epoch": 0.38603806090613624, + "grad_norm": 0.18416175246238708, + "learning_rate": 4.613969177759923e-06, + "loss": 0.9475, + "step": 53330 + }, + { + "epoch": 0.3861104475667224, + "grad_norm": 0.17273585498332977, + "learning_rate": 4.6138967910993365e-06, + "loss": 0.9493, + "step": 53340 + }, + { + "epoch": 0.3861828342273086, + "grad_norm": 0.15361787378787994, + "learning_rate": 4.61382440443875e-06, + "loss": 0.9467, + "step": 53350 + }, + { + "epoch": 0.38625522088789477, + "grad_norm": 0.16739359498023987, + "learning_rate": 4.613752017778164e-06, + "loss": 0.9473, + "step": 53360 + }, + { + "epoch": 0.38632760754848094, + "grad_norm": 0.17038732767105103, + "learning_rate": 4.613679631117578e-06, + "loss": 0.9526, + "step": 53370 + }, + { + "epoch": 0.3863999942090672, + "grad_norm": 0.16086862981319427, + "learning_rate": 4.613607244456992e-06, + "loss": 0.9518, + "step": 53380 + }, + { + "epoch": 0.38647238086965335, + "grad_norm": 0.18573077023029327, + "learning_rate": 4.6135348577964055e-06, + "loss": 0.9483, + "step": 53390 + }, + { + "epoch": 0.38654476753023953, + "grad_norm": 0.17494763433933258, + "learning_rate": 4.613462471135819e-06, + "loss": 0.9391, + "step": 53400 + }, + { + "epoch": 0.3866171541908257, + "grad_norm": 0.19013044238090515, + "learning_rate": 4.6133900844752335e-06, + "loss": 0.9467, + "step": 53410 + }, + { + "epoch": 0.3866895408514119, + "grad_norm": 0.16894245147705078, + "learning_rate": 4.613317697814647e-06, + "loss": 0.9394, + "step": 53420 + }, + { + "epoch": 0.3867619275119981, + "grad_norm": 0.15704815089702606, + "learning_rate": 4.613245311154061e-06, + "loss": 0.9343, + "step": 53430 + }, + { + "epoch": 0.3868343141725843, + "grad_norm": 0.15952664613723755, + "learning_rate": 4.613172924493474e-06, + "loss": 0.9402, + "step": 53440 + }, + { + "epoch": 0.38690670083317047, + "grad_norm": 0.15518204867839813, + "learning_rate": 4.613100537832889e-06, + "loss": 0.9413, + "step": 53450 + }, + { + "epoch": 0.38697908749375665, + "grad_norm": 0.16507835686206818, + "learning_rate": 4.6130281511723025e-06, + "loss": 0.9396, + "step": 53460 + }, + { + "epoch": 0.3870514741543428, + "grad_norm": 0.17540588974952698, + "learning_rate": 4.612955764511716e-06, + "loss": 0.9663, + "step": 53470 + }, + { + "epoch": 0.387123860814929, + "grad_norm": 0.16836467385292053, + "learning_rate": 4.61288337785113e-06, + "loss": 0.9458, + "step": 53480 + }, + { + "epoch": 0.38719624747551523, + "grad_norm": 0.17496466636657715, + "learning_rate": 4.612810991190544e-06, + "loss": 0.9404, + "step": 53490 + }, + { + "epoch": 0.3872686341361014, + "grad_norm": 0.17381425201892853, + "learning_rate": 4.612738604529958e-06, + "loss": 0.9517, + "step": 53500 + }, + { + "epoch": 0.3873410207966876, + "grad_norm": 0.15964674949645996, + "learning_rate": 4.612666217869371e-06, + "loss": 0.9452, + "step": 53510 + }, + { + "epoch": 0.38741340745727376, + "grad_norm": 0.16532573103904724, + "learning_rate": 4.612593831208785e-06, + "loss": 0.9341, + "step": 53520 + }, + { + "epoch": 0.38748579411785994, + "grad_norm": 0.17337869107723236, + "learning_rate": 4.6125214445481995e-06, + "loss": 0.9402, + "step": 53530 + }, + { + "epoch": 0.38755818077844617, + "grad_norm": 0.1612296998500824, + "learning_rate": 4.612449057887613e-06, + "loss": 0.9458, + "step": 53540 + }, + { + "epoch": 0.38763056743903235, + "grad_norm": 0.1577647626399994, + "learning_rate": 4.612376671227027e-06, + "loss": 0.9357, + "step": 53550 + }, + { + "epoch": 0.3877029540996185, + "grad_norm": 0.1562313437461853, + "learning_rate": 4.61230428456644e-06, + "loss": 0.9474, + "step": 53560 + }, + { + "epoch": 0.3877753407602047, + "grad_norm": 0.1620018631219864, + "learning_rate": 4.612231897905855e-06, + "loss": 0.9408, + "step": 53570 + }, + { + "epoch": 0.3878477274207909, + "grad_norm": 0.21448646485805511, + "learning_rate": 4.612159511245268e-06, + "loss": 0.954, + "step": 53580 + }, + { + "epoch": 0.3879201140813771, + "grad_norm": 0.16676433384418488, + "learning_rate": 4.612087124584682e-06, + "loss": 0.9484, + "step": 53590 + }, + { + "epoch": 0.3879925007419633, + "grad_norm": 0.16710571944713593, + "learning_rate": 4.612014737924096e-06, + "loss": 0.9479, + "step": 53600 + }, + { + "epoch": 0.38806488740254946, + "grad_norm": 0.1688128262758255, + "learning_rate": 4.61194235126351e-06, + "loss": 0.941, + "step": 53610 + }, + { + "epoch": 0.38813727406313564, + "grad_norm": 0.15826979279518127, + "learning_rate": 4.611869964602924e-06, + "loss": 0.9407, + "step": 53620 + }, + { + "epoch": 0.3882096607237218, + "grad_norm": 0.3778460621833801, + "learning_rate": 4.611797577942337e-06, + "loss": 0.9465, + "step": 53630 + }, + { + "epoch": 0.388282047384308, + "grad_norm": 0.1658174842596054, + "learning_rate": 4.611725191281751e-06, + "loss": 0.9414, + "step": 53640 + }, + { + "epoch": 0.3883544340448942, + "grad_norm": 0.16166210174560547, + "learning_rate": 4.611652804621165e-06, + "loss": 0.9495, + "step": 53650 + }, + { + "epoch": 0.3884268207054804, + "grad_norm": 0.17662234604358673, + "learning_rate": 4.611580417960579e-06, + "loss": 0.9516, + "step": 53660 + }, + { + "epoch": 0.3884992073660666, + "grad_norm": 0.16207055747509003, + "learning_rate": 4.611508031299992e-06, + "loss": 0.9417, + "step": 53670 + }, + { + "epoch": 0.38857159402665276, + "grad_norm": 0.17646333575248718, + "learning_rate": 4.611435644639406e-06, + "loss": 0.9415, + "step": 53680 + }, + { + "epoch": 0.38864398068723893, + "grad_norm": 0.15925532579421997, + "learning_rate": 4.61136325797882e-06, + "loss": 0.9373, + "step": 53690 + }, + { + "epoch": 0.38871636734782516, + "grad_norm": 0.15857084095478058, + "learning_rate": 4.6112908713182335e-06, + "loss": 0.9461, + "step": 53700 + }, + { + "epoch": 0.38878875400841134, + "grad_norm": 0.16620729863643646, + "learning_rate": 4.611218484657647e-06, + "loss": 0.9419, + "step": 53710 + }, + { + "epoch": 0.3888611406689975, + "grad_norm": 0.14982983469963074, + "learning_rate": 4.6111460979970616e-06, + "loss": 0.9434, + "step": 53720 + }, + { + "epoch": 0.3889335273295837, + "grad_norm": 0.16091054677963257, + "learning_rate": 4.611073711336475e-06, + "loss": 0.939, + "step": 53730 + }, + { + "epoch": 0.38900591399016987, + "grad_norm": 0.17934353649616241, + "learning_rate": 4.611001324675889e-06, + "loss": 0.9422, + "step": 53740 + }, + { + "epoch": 0.3890783006507561, + "grad_norm": 0.22706985473632812, + "learning_rate": 4.610928938015302e-06, + "loss": 0.9458, + "step": 53750 + }, + { + "epoch": 0.3891506873113423, + "grad_norm": 0.19281072914600372, + "learning_rate": 4.610856551354717e-06, + "loss": 0.9466, + "step": 53760 + }, + { + "epoch": 0.38922307397192846, + "grad_norm": 0.17095661163330078, + "learning_rate": 4.6107841646941305e-06, + "loss": 0.9353, + "step": 53770 + }, + { + "epoch": 0.38929546063251463, + "grad_norm": 0.1668483316898346, + "learning_rate": 4.610711778033544e-06, + "loss": 0.948, + "step": 53780 + }, + { + "epoch": 0.3893678472931008, + "grad_norm": 0.1767456829547882, + "learning_rate": 4.610639391372958e-06, + "loss": 0.9435, + "step": 53790 + }, + { + "epoch": 0.389440233953687, + "grad_norm": 0.15454381704330444, + "learning_rate": 4.610567004712372e-06, + "loss": 0.9393, + "step": 53800 + }, + { + "epoch": 0.3895126206142732, + "grad_norm": 0.16419751942157745, + "learning_rate": 4.610494618051786e-06, + "loss": 0.9305, + "step": 53810 + }, + { + "epoch": 0.3895850072748594, + "grad_norm": 0.1819341629743576, + "learning_rate": 4.610422231391199e-06, + "loss": 0.9309, + "step": 53820 + }, + { + "epoch": 0.3896573939354456, + "grad_norm": 0.15951742231845856, + "learning_rate": 4.610349844730613e-06, + "loss": 0.945, + "step": 53830 + }, + { + "epoch": 0.38972978059603175, + "grad_norm": 0.15791021287441254, + "learning_rate": 4.6102774580700275e-06, + "loss": 0.9441, + "step": 53840 + }, + { + "epoch": 0.3898021672566179, + "grad_norm": 0.16241905093193054, + "learning_rate": 4.610205071409441e-06, + "loss": 0.9372, + "step": 53850 + }, + { + "epoch": 0.38987455391720416, + "grad_norm": 0.1761135458946228, + "learning_rate": 4.610132684748855e-06, + "loss": 0.9509, + "step": 53860 + }, + { + "epoch": 0.38994694057779034, + "grad_norm": 0.16063787043094635, + "learning_rate": 4.610060298088268e-06, + "loss": 0.9453, + "step": 53870 + }, + { + "epoch": 0.3900193272383765, + "grad_norm": 0.17681707441806793, + "learning_rate": 4.609987911427683e-06, + "loss": 0.9474, + "step": 53880 + }, + { + "epoch": 0.3900917138989627, + "grad_norm": 0.18907004594802856, + "learning_rate": 4.6099155247670964e-06, + "loss": 0.9496, + "step": 53890 + }, + { + "epoch": 0.39016410055954887, + "grad_norm": 0.1664309799671173, + "learning_rate": 4.60984313810651e-06, + "loss": 0.9371, + "step": 53900 + }, + { + "epoch": 0.3902364872201351, + "grad_norm": 0.16614876687526703, + "learning_rate": 4.609770751445924e-06, + "loss": 0.9383, + "step": 53910 + }, + { + "epoch": 0.3903088738807213, + "grad_norm": 0.16756542026996613, + "learning_rate": 4.609698364785338e-06, + "loss": 0.9444, + "step": 53920 + }, + { + "epoch": 0.39038126054130745, + "grad_norm": 0.19262923300266266, + "learning_rate": 4.609625978124752e-06, + "loss": 0.9412, + "step": 53930 + }, + { + "epoch": 0.3904536472018936, + "grad_norm": 0.1710319072008133, + "learning_rate": 4.609553591464165e-06, + "loss": 0.9517, + "step": 53940 + }, + { + "epoch": 0.3905260338624798, + "grad_norm": 0.15746866166591644, + "learning_rate": 4.609481204803579e-06, + "loss": 0.9364, + "step": 53950 + }, + { + "epoch": 0.39059842052306604, + "grad_norm": 0.16860923171043396, + "learning_rate": 4.6094088181429934e-06, + "loss": 0.9342, + "step": 53960 + }, + { + "epoch": 0.3906708071836522, + "grad_norm": 0.20959050953388214, + "learning_rate": 4.609336431482407e-06, + "loss": 0.9483, + "step": 53970 + }, + { + "epoch": 0.3907431938442384, + "grad_norm": 0.1634407639503479, + "learning_rate": 4.609264044821821e-06, + "loss": 0.9353, + "step": 53980 + }, + { + "epoch": 0.39081558050482457, + "grad_norm": 0.16322919726371765, + "learning_rate": 4.609191658161234e-06, + "loss": 0.9264, + "step": 53990 + }, + { + "epoch": 0.39088796716541074, + "grad_norm": 0.14622409641742706, + "learning_rate": 4.609119271500648e-06, + "loss": 0.9348, + "step": 54000 + }, + { + "epoch": 0.3909603538259969, + "grad_norm": 0.1500609666109085, + "learning_rate": 4.609046884840062e-06, + "loss": 0.9372, + "step": 54010 + }, + { + "epoch": 0.39103274048658315, + "grad_norm": 0.18767625093460083, + "learning_rate": 4.608974498179476e-06, + "loss": 0.9268, + "step": 54020 + }, + { + "epoch": 0.39110512714716933, + "grad_norm": 0.16851924359798431, + "learning_rate": 4.60890211151889e-06, + "loss": 0.9589, + "step": 54030 + }, + { + "epoch": 0.3911775138077555, + "grad_norm": 0.1965654194355011, + "learning_rate": 4.608829724858303e-06, + "loss": 0.9422, + "step": 54040 + }, + { + "epoch": 0.3912499004683417, + "grad_norm": 0.15963523089885712, + "learning_rate": 4.608757338197718e-06, + "loss": 0.9442, + "step": 54050 + }, + { + "epoch": 0.39132228712892786, + "grad_norm": 0.16074392199516296, + "learning_rate": 4.608684951537131e-06, + "loss": 0.9486, + "step": 54060 + }, + { + "epoch": 0.3913946737895141, + "grad_norm": 0.1722477674484253, + "learning_rate": 4.608612564876545e-06, + "loss": 0.9376, + "step": 54070 + }, + { + "epoch": 0.39146706045010027, + "grad_norm": 0.16781273484230042, + "learning_rate": 4.6085401782159585e-06, + "loss": 0.9461, + "step": 54080 + }, + { + "epoch": 0.39153944711068644, + "grad_norm": 0.17819204926490784, + "learning_rate": 4.608467791555373e-06, + "loss": 0.9365, + "step": 54090 + }, + { + "epoch": 0.3916118337712726, + "grad_norm": 0.1631801426410675, + "learning_rate": 4.608395404894787e-06, + "loss": 0.9405, + "step": 54100 + }, + { + "epoch": 0.3916842204318588, + "grad_norm": 0.1547449231147766, + "learning_rate": 4.6083230182342e-06, + "loss": 0.9377, + "step": 54110 + }, + { + "epoch": 0.39175660709244503, + "grad_norm": 0.16404055058956146, + "learning_rate": 4.608250631573614e-06, + "loss": 0.9356, + "step": 54120 + }, + { + "epoch": 0.3918289937530312, + "grad_norm": 0.1674279123544693, + "learning_rate": 4.608178244913028e-06, + "loss": 0.9219, + "step": 54130 + }, + { + "epoch": 0.3919013804136174, + "grad_norm": 0.14838223159313202, + "learning_rate": 4.608105858252442e-06, + "loss": 0.9361, + "step": 54140 + }, + { + "epoch": 0.39197376707420356, + "grad_norm": 0.16514058411121368, + "learning_rate": 4.6080334715918555e-06, + "loss": 0.9414, + "step": 54150 + }, + { + "epoch": 0.39204615373478974, + "grad_norm": 0.16169683635234833, + "learning_rate": 4.607961084931269e-06, + "loss": 0.9349, + "step": 54160 + }, + { + "epoch": 0.3921185403953759, + "grad_norm": 0.1660471111536026, + "learning_rate": 4.607888698270684e-06, + "loss": 0.9509, + "step": 54170 + }, + { + "epoch": 0.39219092705596215, + "grad_norm": 0.1746322214603424, + "learning_rate": 4.607816311610097e-06, + "loss": 0.9371, + "step": 54180 + }, + { + "epoch": 0.3922633137165483, + "grad_norm": 0.16147038340568542, + "learning_rate": 4.607743924949511e-06, + "loss": 0.9442, + "step": 54190 + }, + { + "epoch": 0.3923357003771345, + "grad_norm": 0.1733742505311966, + "learning_rate": 4.6076715382889245e-06, + "loss": 0.9382, + "step": 54200 + }, + { + "epoch": 0.3924080870377207, + "grad_norm": 0.18472987413406372, + "learning_rate": 4.607599151628338e-06, + "loss": 0.9304, + "step": 54210 + }, + { + "epoch": 0.39248047369830685, + "grad_norm": 0.16237011551856995, + "learning_rate": 4.607526764967752e-06, + "loss": 0.9406, + "step": 54220 + }, + { + "epoch": 0.3925528603588931, + "grad_norm": 0.2291804850101471, + "learning_rate": 4.607454378307165e-06, + "loss": 0.9429, + "step": 54230 + }, + { + "epoch": 0.39262524701947926, + "grad_norm": 0.15868277847766876, + "learning_rate": 4.60738199164658e-06, + "loss": 0.9355, + "step": 54240 + }, + { + "epoch": 0.39269763368006544, + "grad_norm": 0.17885923385620117, + "learning_rate": 4.607309604985993e-06, + "loss": 0.9412, + "step": 54250 + }, + { + "epoch": 0.3927700203406516, + "grad_norm": 0.1670595407485962, + "learning_rate": 4.607237218325407e-06, + "loss": 0.9417, + "step": 54260 + }, + { + "epoch": 0.3928424070012378, + "grad_norm": 0.15766389667987823, + "learning_rate": 4.607164831664821e-06, + "loss": 0.9434, + "step": 54270 + }, + { + "epoch": 0.392914793661824, + "grad_norm": 0.16400344669818878, + "learning_rate": 4.607092445004235e-06, + "loss": 0.9412, + "step": 54280 + }, + { + "epoch": 0.3929871803224102, + "grad_norm": 0.16506126523017883, + "learning_rate": 4.607020058343649e-06, + "loss": 0.9409, + "step": 54290 + }, + { + "epoch": 0.3930595669829964, + "grad_norm": 0.16669459640979767, + "learning_rate": 4.606947671683062e-06, + "loss": 0.9329, + "step": 54300 + }, + { + "epoch": 0.39313195364358255, + "grad_norm": 0.18172895908355713, + "learning_rate": 4.606875285022476e-06, + "loss": 0.9446, + "step": 54310 + }, + { + "epoch": 0.39320434030416873, + "grad_norm": 0.1678895652294159, + "learning_rate": 4.60680289836189e-06, + "loss": 0.9421, + "step": 54320 + }, + { + "epoch": 0.3932767269647549, + "grad_norm": 0.15718461573123932, + "learning_rate": 4.606730511701304e-06, + "loss": 0.9324, + "step": 54330 + }, + { + "epoch": 0.39334911362534114, + "grad_norm": 0.16904427111148834, + "learning_rate": 4.606658125040718e-06, + "loss": 0.9321, + "step": 54340 + }, + { + "epoch": 0.3934215002859273, + "grad_norm": 0.16376027464866638, + "learning_rate": 4.606585738380131e-06, + "loss": 0.9483, + "step": 54350 + }, + { + "epoch": 0.3934938869465135, + "grad_norm": 0.15465879440307617, + "learning_rate": 4.606513351719546e-06, + "loss": 0.931, + "step": 54360 + }, + { + "epoch": 0.39356627360709967, + "grad_norm": 0.171060249209404, + "learning_rate": 4.606440965058959e-06, + "loss": 0.9366, + "step": 54370 + }, + { + "epoch": 0.39363866026768585, + "grad_norm": 0.1620955467224121, + "learning_rate": 4.606368578398373e-06, + "loss": 0.9333, + "step": 54380 + }, + { + "epoch": 0.3937110469282721, + "grad_norm": 0.1702883541584015, + "learning_rate": 4.6062961917377866e-06, + "loss": 0.9442, + "step": 54390 + }, + { + "epoch": 0.39378343358885826, + "grad_norm": 0.16319170594215393, + "learning_rate": 4.606223805077201e-06, + "loss": 0.9315, + "step": 54400 + }, + { + "epoch": 0.39385582024944443, + "grad_norm": 0.16425073146820068, + "learning_rate": 4.606151418416615e-06, + "loss": 0.9407, + "step": 54410 + }, + { + "epoch": 0.3939282069100306, + "grad_norm": 0.1441926509141922, + "learning_rate": 4.606079031756028e-06, + "loss": 0.9393, + "step": 54420 + }, + { + "epoch": 0.3940005935706168, + "grad_norm": 0.1872769445180893, + "learning_rate": 4.606006645095442e-06, + "loss": 0.9417, + "step": 54430 + }, + { + "epoch": 0.394072980231203, + "grad_norm": 0.15714821219444275, + "learning_rate": 4.605934258434856e-06, + "loss": 0.9343, + "step": 54440 + }, + { + "epoch": 0.3941453668917892, + "grad_norm": 0.16071169078350067, + "learning_rate": 4.60586187177427e-06, + "loss": 0.9447, + "step": 54450 + }, + { + "epoch": 0.39421775355237537, + "grad_norm": 0.15835289657115936, + "learning_rate": 4.6057894851136836e-06, + "loss": 0.95, + "step": 54460 + }, + { + "epoch": 0.39429014021296155, + "grad_norm": 0.15297073125839233, + "learning_rate": 4.605717098453097e-06, + "loss": 0.9402, + "step": 54470 + }, + { + "epoch": 0.3943625268735477, + "grad_norm": 0.1636633574962616, + "learning_rate": 4.605644711792512e-06, + "loss": 0.9444, + "step": 54480 + }, + { + "epoch": 0.39443491353413396, + "grad_norm": 0.15706664323806763, + "learning_rate": 4.605572325131925e-06, + "loss": 0.9578, + "step": 54490 + }, + { + "epoch": 0.39450730019472013, + "grad_norm": 0.15871857106685638, + "learning_rate": 4.605499938471339e-06, + "loss": 0.9271, + "step": 54500 + }, + { + "epoch": 0.3945796868553063, + "grad_norm": 0.16093158721923828, + "learning_rate": 4.6054275518107525e-06, + "loss": 0.9584, + "step": 54510 + }, + { + "epoch": 0.3946520735158925, + "grad_norm": 0.2100452184677124, + "learning_rate": 4.605355165150167e-06, + "loss": 0.9364, + "step": 54520 + }, + { + "epoch": 0.39472446017647866, + "grad_norm": 0.1580246388912201, + "learning_rate": 4.6052827784895806e-06, + "loss": 0.9394, + "step": 54530 + }, + { + "epoch": 0.39479684683706484, + "grad_norm": 0.16030821204185486, + "learning_rate": 4.605210391828994e-06, + "loss": 0.9389, + "step": 54540 + }, + { + "epoch": 0.3948692334976511, + "grad_norm": 0.17322495579719543, + "learning_rate": 4.605138005168408e-06, + "loss": 0.9363, + "step": 54550 + }, + { + "epoch": 0.39494162015823725, + "grad_norm": 0.19092749059200287, + "learning_rate": 4.605065618507822e-06, + "loss": 0.9458, + "step": 54560 + }, + { + "epoch": 0.3950140068188234, + "grad_norm": 0.14710497856140137, + "learning_rate": 4.604993231847236e-06, + "loss": 0.9424, + "step": 54570 + }, + { + "epoch": 0.3950863934794096, + "grad_norm": 0.17393282055854797, + "learning_rate": 4.6049208451866495e-06, + "loss": 0.9462, + "step": 54580 + }, + { + "epoch": 0.3951587801399958, + "grad_norm": 0.1747346818447113, + "learning_rate": 4.604848458526063e-06, + "loss": 0.9349, + "step": 54590 + }, + { + "epoch": 0.395231166800582, + "grad_norm": 0.1654556542634964, + "learning_rate": 4.604776071865477e-06, + "loss": 0.9427, + "step": 54600 + }, + { + "epoch": 0.3953035534611682, + "grad_norm": 0.1744566261768341, + "learning_rate": 4.604703685204891e-06, + "loss": 0.9456, + "step": 54610 + }, + { + "epoch": 0.39537594012175437, + "grad_norm": 0.1695273369550705, + "learning_rate": 4.604631298544305e-06, + "loss": 0.943, + "step": 54620 + }, + { + "epoch": 0.39544832678234054, + "grad_norm": 0.17201577126979828, + "learning_rate": 4.6045589118837184e-06, + "loss": 0.9452, + "step": 54630 + }, + { + "epoch": 0.3955207134429267, + "grad_norm": 0.18179161846637726, + "learning_rate": 4.604486525223132e-06, + "loss": 0.93, + "step": 54640 + }, + { + "epoch": 0.39559310010351295, + "grad_norm": 0.16751891374588013, + "learning_rate": 4.6044141385625465e-06, + "loss": 0.9283, + "step": 54650 + }, + { + "epoch": 0.39566548676409913, + "grad_norm": 0.15544885396957397, + "learning_rate": 4.60434175190196e-06, + "loss": 0.946, + "step": 54660 + }, + { + "epoch": 0.3957378734246853, + "grad_norm": 0.15887774527072906, + "learning_rate": 4.604269365241374e-06, + "loss": 0.9441, + "step": 54670 + }, + { + "epoch": 0.3958102600852715, + "grad_norm": 0.16352955996990204, + "learning_rate": 4.604196978580787e-06, + "loss": 0.9427, + "step": 54680 + }, + { + "epoch": 0.39588264674585766, + "grad_norm": 0.1617666482925415, + "learning_rate": 4.604124591920202e-06, + "loss": 0.9415, + "step": 54690 + }, + { + "epoch": 0.39595503340644383, + "grad_norm": 0.15895050764083862, + "learning_rate": 4.6040522052596154e-06, + "loss": 0.953, + "step": 54700 + }, + { + "epoch": 0.39602742006703007, + "grad_norm": 0.158672496676445, + "learning_rate": 4.603979818599029e-06, + "loss": 0.9369, + "step": 54710 + }, + { + "epoch": 0.39609980672761624, + "grad_norm": 0.17853258550167084, + "learning_rate": 4.603907431938443e-06, + "loss": 0.9429, + "step": 54720 + }, + { + "epoch": 0.3961721933882024, + "grad_norm": 0.1713215857744217, + "learning_rate": 4.603835045277856e-06, + "loss": 0.935, + "step": 54730 + }, + { + "epoch": 0.3962445800487886, + "grad_norm": 0.1543017029762268, + "learning_rate": 4.60376265861727e-06, + "loss": 0.9297, + "step": 54740 + }, + { + "epoch": 0.3963169667093748, + "grad_norm": 0.17130827903747559, + "learning_rate": 4.603690271956684e-06, + "loss": 0.9472, + "step": 54750 + }, + { + "epoch": 0.396389353369961, + "grad_norm": 0.1728035807609558, + "learning_rate": 4.603617885296098e-06, + "loss": 0.9448, + "step": 54760 + }, + { + "epoch": 0.3964617400305472, + "grad_norm": 0.16144831478595734, + "learning_rate": 4.603545498635512e-06, + "loss": 0.9515, + "step": 54770 + }, + { + "epoch": 0.39653412669113336, + "grad_norm": 0.15876467525959015, + "learning_rate": 4.603473111974925e-06, + "loss": 0.945, + "step": 54780 + }, + { + "epoch": 0.39660651335171954, + "grad_norm": 0.1569388210773468, + "learning_rate": 4.603400725314339e-06, + "loss": 0.9366, + "step": 54790 + }, + { + "epoch": 0.3966789000123057, + "grad_norm": 0.1643538773059845, + "learning_rate": 4.603328338653753e-06, + "loss": 0.9337, + "step": 54800 + }, + { + "epoch": 0.39675128667289195, + "grad_norm": 0.15085361897945404, + "learning_rate": 4.603255951993167e-06, + "loss": 0.9506, + "step": 54810 + }, + { + "epoch": 0.3968236733334781, + "grad_norm": 0.15235082805156708, + "learning_rate": 4.6031835653325805e-06, + "loss": 0.9261, + "step": 54820 + }, + { + "epoch": 0.3968960599940643, + "grad_norm": 0.15740101039409637, + "learning_rate": 4.603111178671994e-06, + "loss": 0.9483, + "step": 54830 + }, + { + "epoch": 0.3969684466546505, + "grad_norm": 0.17915703356266022, + "learning_rate": 4.603038792011409e-06, + "loss": 0.9333, + "step": 54840 + }, + { + "epoch": 0.39704083331523665, + "grad_norm": 0.18708273768424988, + "learning_rate": 4.602966405350822e-06, + "loss": 0.9457, + "step": 54850 + }, + { + "epoch": 0.39711321997582283, + "grad_norm": 0.1566181778907776, + "learning_rate": 4.602894018690236e-06, + "loss": 0.9296, + "step": 54860 + }, + { + "epoch": 0.39718560663640906, + "grad_norm": 0.20385728776454926, + "learning_rate": 4.6028216320296495e-06, + "loss": 0.9426, + "step": 54870 + }, + { + "epoch": 0.39725799329699524, + "grad_norm": 0.15828682482242584, + "learning_rate": 4.602749245369064e-06, + "loss": 0.9304, + "step": 54880 + }, + { + "epoch": 0.3973303799575814, + "grad_norm": 0.16250182688236237, + "learning_rate": 4.6026768587084775e-06, + "loss": 0.9444, + "step": 54890 + }, + { + "epoch": 0.3974027666181676, + "grad_norm": 0.16645143926143646, + "learning_rate": 4.602604472047891e-06, + "loss": 0.9462, + "step": 54900 + }, + { + "epoch": 0.39747515327875377, + "grad_norm": 0.17297451198101044, + "learning_rate": 4.602532085387305e-06, + "loss": 0.9392, + "step": 54910 + }, + { + "epoch": 0.39754753993934, + "grad_norm": 0.16319099068641663, + "learning_rate": 4.602459698726719e-06, + "loss": 0.9384, + "step": 54920 + }, + { + "epoch": 0.3976199265999262, + "grad_norm": 0.17599353194236755, + "learning_rate": 4.602387312066133e-06, + "loss": 0.9392, + "step": 54930 + }, + { + "epoch": 0.39769231326051235, + "grad_norm": 0.16673387587070465, + "learning_rate": 4.6023149254055465e-06, + "loss": 0.9367, + "step": 54940 + }, + { + "epoch": 0.39776469992109853, + "grad_norm": 0.17548586428165436, + "learning_rate": 4.60224253874496e-06, + "loss": 0.9384, + "step": 54950 + }, + { + "epoch": 0.3978370865816847, + "grad_norm": 0.15889111161231995, + "learning_rate": 4.6021701520843745e-06, + "loss": 0.9319, + "step": 54960 + }, + { + "epoch": 0.39790947324227094, + "grad_norm": 0.15126605331897736, + "learning_rate": 4.602097765423788e-06, + "loss": 0.9389, + "step": 54970 + }, + { + "epoch": 0.3979818599028571, + "grad_norm": 0.1905619353055954, + "learning_rate": 4.602025378763202e-06, + "loss": 0.9516, + "step": 54980 + }, + { + "epoch": 0.3980542465634433, + "grad_norm": 0.18187619745731354, + "learning_rate": 4.601952992102615e-06, + "loss": 0.9277, + "step": 54990 + }, + { + "epoch": 0.39812663322402947, + "grad_norm": 0.1634269803762436, + "learning_rate": 4.60188060544203e-06, + "loss": 0.9381, + "step": 55000 + }, + { + "epoch": 0.39819901988461565, + "grad_norm": 0.16369114816188812, + "learning_rate": 4.6018082187814435e-06, + "loss": 0.9403, + "step": 55010 + }, + { + "epoch": 0.3982714065452018, + "grad_norm": 0.17202910780906677, + "learning_rate": 4.601735832120857e-06, + "loss": 0.9411, + "step": 55020 + }, + { + "epoch": 0.39834379320578805, + "grad_norm": 0.17395305633544922, + "learning_rate": 4.601663445460271e-06, + "loss": 0.9236, + "step": 55030 + }, + { + "epoch": 0.39841617986637423, + "grad_norm": 0.17423111200332642, + "learning_rate": 4.601591058799685e-06, + "loss": 0.9404, + "step": 55040 + }, + { + "epoch": 0.3984885665269604, + "grad_norm": 0.15829895436763763, + "learning_rate": 4.601518672139099e-06, + "loss": 0.9254, + "step": 55050 + }, + { + "epoch": 0.3985609531875466, + "grad_norm": 0.17353561520576477, + "learning_rate": 4.601446285478512e-06, + "loss": 0.9326, + "step": 55060 + }, + { + "epoch": 0.39863333984813276, + "grad_norm": 0.1960097998380661, + "learning_rate": 4.601373898817926e-06, + "loss": 0.9582, + "step": 55070 + }, + { + "epoch": 0.398705726508719, + "grad_norm": 0.15898558497428894, + "learning_rate": 4.6013015121573405e-06, + "loss": 0.9513, + "step": 55080 + }, + { + "epoch": 0.39877811316930517, + "grad_norm": 0.17398889362812042, + "learning_rate": 4.601229125496754e-06, + "loss": 0.9299, + "step": 55090 + }, + { + "epoch": 0.39885049982989135, + "grad_norm": 0.21108154952526093, + "learning_rate": 4.601156738836168e-06, + "loss": 0.9514, + "step": 55100 + }, + { + "epoch": 0.3989228864904775, + "grad_norm": 0.15072329342365265, + "learning_rate": 4.601084352175581e-06, + "loss": 0.9276, + "step": 55110 + }, + { + "epoch": 0.3989952731510637, + "grad_norm": 0.18398626148700714, + "learning_rate": 4.601011965514996e-06, + "loss": 0.9373, + "step": 55120 + }, + { + "epoch": 0.39906765981164993, + "grad_norm": 0.26923868060112, + "learning_rate": 4.600939578854409e-06, + "loss": 0.9442, + "step": 55130 + }, + { + "epoch": 0.3991400464722361, + "grad_norm": 0.16427594423294067, + "learning_rate": 4.600867192193823e-06, + "loss": 0.9444, + "step": 55140 + }, + { + "epoch": 0.3992124331328223, + "grad_norm": 0.17052388191223145, + "learning_rate": 4.600794805533237e-06, + "loss": 0.947, + "step": 55150 + }, + { + "epoch": 0.39928481979340846, + "grad_norm": 0.18360772728919983, + "learning_rate": 4.600722418872651e-06, + "loss": 0.9419, + "step": 55160 + }, + { + "epoch": 0.39935720645399464, + "grad_norm": 0.16440702974796295, + "learning_rate": 4.600650032212065e-06, + "loss": 0.9214, + "step": 55170 + }, + { + "epoch": 0.39942959311458087, + "grad_norm": 0.15219788253307343, + "learning_rate": 4.600577645551478e-06, + "loss": 0.957, + "step": 55180 + }, + { + "epoch": 0.39950197977516705, + "grad_norm": 0.16597037017345428, + "learning_rate": 4.600505258890892e-06, + "loss": 0.9424, + "step": 55190 + }, + { + "epoch": 0.3995743664357532, + "grad_norm": 0.2117663323879242, + "learning_rate": 4.600432872230306e-06, + "loss": 0.9397, + "step": 55200 + }, + { + "epoch": 0.3996467530963394, + "grad_norm": 0.16794174909591675, + "learning_rate": 4.60036048556972e-06, + "loss": 0.9553, + "step": 55210 + }, + { + "epoch": 0.3997191397569256, + "grad_norm": 0.1689329296350479, + "learning_rate": 4.600288098909134e-06, + "loss": 0.9436, + "step": 55220 + }, + { + "epoch": 0.39979152641751176, + "grad_norm": 0.1604575663805008, + "learning_rate": 4.600215712248547e-06, + "loss": 0.942, + "step": 55230 + }, + { + "epoch": 0.399863913078098, + "grad_norm": 0.16758602857589722, + "learning_rate": 4.600143325587961e-06, + "loss": 0.9411, + "step": 55240 + }, + { + "epoch": 0.39993629973868416, + "grad_norm": 0.167888805270195, + "learning_rate": 4.600070938927375e-06, + "loss": 0.9467, + "step": 55250 + }, + { + "epoch": 0.40000868639927034, + "grad_norm": 0.16341939568519592, + "learning_rate": 4.599998552266788e-06, + "loss": 0.9469, + "step": 55260 + }, + { + "epoch": 0.4000810730598565, + "grad_norm": 0.15733623504638672, + "learning_rate": 4.5999261656062026e-06, + "loss": 0.9472, + "step": 55270 + }, + { + "epoch": 0.4001534597204427, + "grad_norm": 0.15709537267684937, + "learning_rate": 4.599853778945616e-06, + "loss": 0.9363, + "step": 55280 + }, + { + "epoch": 0.4002258463810289, + "grad_norm": 0.1718069463968277, + "learning_rate": 4.59978139228503e-06, + "loss": 0.9339, + "step": 55290 + }, + { + "epoch": 0.4002982330416151, + "grad_norm": 0.15917843580245972, + "learning_rate": 4.599709005624443e-06, + "loss": 0.9363, + "step": 55300 + }, + { + "epoch": 0.4003706197022013, + "grad_norm": 0.15500162541866302, + "learning_rate": 4.599636618963858e-06, + "loss": 0.9295, + "step": 55310 + }, + { + "epoch": 0.40044300636278746, + "grad_norm": 0.1756095588207245, + "learning_rate": 4.5995642323032715e-06, + "loss": 0.9356, + "step": 55320 + }, + { + "epoch": 0.40051539302337363, + "grad_norm": 0.16309203207492828, + "learning_rate": 4.599491845642685e-06, + "loss": 0.9378, + "step": 55330 + }, + { + "epoch": 0.40058777968395987, + "grad_norm": 0.16361068189144135, + "learning_rate": 4.599419458982099e-06, + "loss": 0.9376, + "step": 55340 + }, + { + "epoch": 0.40066016634454604, + "grad_norm": 0.16683772206306458, + "learning_rate": 4.599347072321513e-06, + "loss": 0.9455, + "step": 55350 + }, + { + "epoch": 0.4007325530051322, + "grad_norm": 0.1727033108472824, + "learning_rate": 4.599274685660927e-06, + "loss": 0.9312, + "step": 55360 + }, + { + "epoch": 0.4008049396657184, + "grad_norm": 0.22760114073753357, + "learning_rate": 4.5992022990003404e-06, + "loss": 0.9331, + "step": 55370 + }, + { + "epoch": 0.4008773263263046, + "grad_norm": 0.16040877997875214, + "learning_rate": 4.599129912339754e-06, + "loss": 0.9554, + "step": 55380 + }, + { + "epoch": 0.40094971298689075, + "grad_norm": 0.17181342840194702, + "learning_rate": 4.599057525679168e-06, + "loss": 0.9522, + "step": 55390 + }, + { + "epoch": 0.401022099647477, + "grad_norm": 0.16263987123966217, + "learning_rate": 4.598985139018582e-06, + "loss": 0.9407, + "step": 55400 + }, + { + "epoch": 0.40109448630806316, + "grad_norm": 0.16319935023784637, + "learning_rate": 4.598912752357996e-06, + "loss": 0.9337, + "step": 55410 + }, + { + "epoch": 0.40116687296864934, + "grad_norm": 0.17521025240421295, + "learning_rate": 4.598840365697409e-06, + "loss": 0.9386, + "step": 55420 + }, + { + "epoch": 0.4012392596292355, + "grad_norm": 0.16432663798332214, + "learning_rate": 4.598767979036823e-06, + "loss": 0.9387, + "step": 55430 + }, + { + "epoch": 0.4013116462898217, + "grad_norm": 0.16351726651191711, + "learning_rate": 4.5986955923762374e-06, + "loss": 0.9424, + "step": 55440 + }, + { + "epoch": 0.4013840329504079, + "grad_norm": 0.15134920179843903, + "learning_rate": 4.598623205715651e-06, + "loss": 0.9225, + "step": 55450 + }, + { + "epoch": 0.4014564196109941, + "grad_norm": 0.1916009783744812, + "learning_rate": 4.598550819055065e-06, + "loss": 0.942, + "step": 55460 + }, + { + "epoch": 0.4015288062715803, + "grad_norm": 0.1707172840833664, + "learning_rate": 4.598478432394478e-06, + "loss": 0.9395, + "step": 55470 + }, + { + "epoch": 0.40160119293216645, + "grad_norm": 0.20359551906585693, + "learning_rate": 4.598406045733893e-06, + "loss": 0.9398, + "step": 55480 + }, + { + "epoch": 0.4016735795927526, + "grad_norm": 0.16758215427398682, + "learning_rate": 4.598333659073306e-06, + "loss": 0.9317, + "step": 55490 + }, + { + "epoch": 0.40174596625333886, + "grad_norm": 0.17241047322750092, + "learning_rate": 4.59826127241272e-06, + "loss": 0.9463, + "step": 55500 + }, + { + "epoch": 0.40181835291392504, + "grad_norm": 0.17256489396095276, + "learning_rate": 4.598188885752134e-06, + "loss": 0.9383, + "step": 55510 + }, + { + "epoch": 0.4018907395745112, + "grad_norm": 0.16239801049232483, + "learning_rate": 4.598116499091548e-06, + "loss": 0.9344, + "step": 55520 + }, + { + "epoch": 0.4019631262350974, + "grad_norm": 0.15352807939052582, + "learning_rate": 4.598044112430962e-06, + "loss": 0.9391, + "step": 55530 + }, + { + "epoch": 0.40203551289568357, + "grad_norm": 0.15835584700107574, + "learning_rate": 4.597971725770375e-06, + "loss": 0.9441, + "step": 55540 + }, + { + "epoch": 0.40210789955626974, + "grad_norm": 0.17135757207870483, + "learning_rate": 4.597899339109789e-06, + "loss": 0.9403, + "step": 55550 + }, + { + "epoch": 0.402180286216856, + "grad_norm": 0.18612030148506165, + "learning_rate": 4.597826952449203e-06, + "loss": 0.954, + "step": 55560 + }, + { + "epoch": 0.40225267287744215, + "grad_norm": 0.16078492999076843, + "learning_rate": 4.597754565788617e-06, + "loss": 0.947, + "step": 55570 + }, + { + "epoch": 0.40232505953802833, + "grad_norm": 0.16282187402248383, + "learning_rate": 4.597682179128031e-06, + "loss": 0.9369, + "step": 55580 + }, + { + "epoch": 0.4023974461986145, + "grad_norm": 0.18029029667377472, + "learning_rate": 4.597609792467444e-06, + "loss": 0.9432, + "step": 55590 + }, + { + "epoch": 0.4024698328592007, + "grad_norm": 0.15934710204601288, + "learning_rate": 4.597537405806859e-06, + "loss": 0.9534, + "step": 55600 + }, + { + "epoch": 0.4025422195197869, + "grad_norm": 0.16303454339504242, + "learning_rate": 4.597465019146272e-06, + "loss": 0.9452, + "step": 55610 + }, + { + "epoch": 0.4026146061803731, + "grad_norm": 0.16990971565246582, + "learning_rate": 4.597392632485686e-06, + "loss": 0.9413, + "step": 55620 + }, + { + "epoch": 0.40268699284095927, + "grad_norm": 0.18538545072078705, + "learning_rate": 4.5973202458250995e-06, + "loss": 0.9341, + "step": 55630 + }, + { + "epoch": 0.40275937950154544, + "grad_norm": 0.16913770139217377, + "learning_rate": 4.597247859164514e-06, + "loss": 0.9415, + "step": 55640 + }, + { + "epoch": 0.4028317661621316, + "grad_norm": 0.1886037439107895, + "learning_rate": 4.597175472503928e-06, + "loss": 0.9407, + "step": 55650 + }, + { + "epoch": 0.40290415282271785, + "grad_norm": 0.1722469925880432, + "learning_rate": 4.597103085843341e-06, + "loss": 0.945, + "step": 55660 + }, + { + "epoch": 0.40297653948330403, + "grad_norm": 0.16154325008392334, + "learning_rate": 4.597030699182755e-06, + "loss": 0.9329, + "step": 55670 + }, + { + "epoch": 0.4030489261438902, + "grad_norm": 0.16594886779785156, + "learning_rate": 4.596958312522169e-06, + "loss": 0.946, + "step": 55680 + }, + { + "epoch": 0.4031213128044764, + "grad_norm": 0.15325994789600372, + "learning_rate": 4.596885925861583e-06, + "loss": 0.9348, + "step": 55690 + }, + { + "epoch": 0.40319369946506256, + "grad_norm": 0.1615227907896042, + "learning_rate": 4.5968135392009965e-06, + "loss": 0.9294, + "step": 55700 + }, + { + "epoch": 0.4032660861256488, + "grad_norm": 0.1949525773525238, + "learning_rate": 4.59674115254041e-06, + "loss": 0.9388, + "step": 55710 + }, + { + "epoch": 0.40333847278623497, + "grad_norm": 0.1611870676279068, + "learning_rate": 4.596668765879825e-06, + "loss": 0.9464, + "step": 55720 + }, + { + "epoch": 0.40341085944682115, + "grad_norm": 0.16078144311904907, + "learning_rate": 4.596596379219238e-06, + "loss": 0.9239, + "step": 55730 + }, + { + "epoch": 0.4034832461074073, + "grad_norm": 0.16597573459148407, + "learning_rate": 4.596523992558652e-06, + "loss": 0.9325, + "step": 55740 + }, + { + "epoch": 0.4035556327679935, + "grad_norm": 0.16398844122886658, + "learning_rate": 4.5964516058980655e-06, + "loss": 0.942, + "step": 55750 + }, + { + "epoch": 0.4036280194285797, + "grad_norm": 0.16814014315605164, + "learning_rate": 4.59637921923748e-06, + "loss": 0.9423, + "step": 55760 + }, + { + "epoch": 0.4037004060891659, + "grad_norm": 0.1564667820930481, + "learning_rate": 4.5963068325768935e-06, + "loss": 0.939, + "step": 55770 + }, + { + "epoch": 0.4037727927497521, + "grad_norm": 0.16583354771137238, + "learning_rate": 4.596234445916307e-06, + "loss": 0.9374, + "step": 55780 + }, + { + "epoch": 0.40384517941033826, + "grad_norm": 0.1725219190120697, + "learning_rate": 4.596162059255721e-06, + "loss": 0.9323, + "step": 55790 + }, + { + "epoch": 0.40391756607092444, + "grad_norm": 0.15034452080726624, + "learning_rate": 4.596089672595134e-06, + "loss": 0.9276, + "step": 55800 + }, + { + "epoch": 0.4039899527315106, + "grad_norm": 0.17058463394641876, + "learning_rate": 4.596017285934548e-06, + "loss": 0.9497, + "step": 55810 + }, + { + "epoch": 0.40406233939209685, + "grad_norm": 0.15496298670768738, + "learning_rate": 4.595944899273962e-06, + "loss": 0.9416, + "step": 55820 + }, + { + "epoch": 0.404134726052683, + "grad_norm": 0.16956356167793274, + "learning_rate": 4.595872512613376e-06, + "loss": 0.9354, + "step": 55830 + }, + { + "epoch": 0.4042071127132692, + "grad_norm": 0.15078359842300415, + "learning_rate": 4.59580012595279e-06, + "loss": 0.9287, + "step": 55840 + }, + { + "epoch": 0.4042794993738554, + "grad_norm": 0.1605517864227295, + "learning_rate": 4.595727739292203e-06, + "loss": 0.9434, + "step": 55850 + }, + { + "epoch": 0.40435188603444155, + "grad_norm": 0.16339904069900513, + "learning_rate": 4.595655352631617e-06, + "loss": 0.9406, + "step": 55860 + }, + { + "epoch": 0.4044242726950278, + "grad_norm": 0.18486179411411285, + "learning_rate": 4.595582965971031e-06, + "loss": 0.9444, + "step": 55870 + }, + { + "epoch": 0.40449665935561396, + "grad_norm": 0.16515012085437775, + "learning_rate": 4.595510579310445e-06, + "loss": 0.9375, + "step": 55880 + }, + { + "epoch": 0.40456904601620014, + "grad_norm": 0.16577614843845367, + "learning_rate": 4.595438192649859e-06, + "loss": 0.9506, + "step": 55890 + }, + { + "epoch": 0.4046414326767863, + "grad_norm": 0.16598597168922424, + "learning_rate": 4.595365805989272e-06, + "loss": 0.9229, + "step": 55900 + }, + { + "epoch": 0.4047138193373725, + "grad_norm": 0.1684425175189972, + "learning_rate": 4.595293419328687e-06, + "loss": 0.9365, + "step": 55910 + }, + { + "epoch": 0.40478620599795867, + "grad_norm": 0.15921281278133392, + "learning_rate": 4.5952210326681e-06, + "loss": 0.9394, + "step": 55920 + }, + { + "epoch": 0.4048585926585449, + "grad_norm": 0.16952553391456604, + "learning_rate": 4.595148646007514e-06, + "loss": 0.9461, + "step": 55930 + }, + { + "epoch": 0.4049309793191311, + "grad_norm": 0.1839471012353897, + "learning_rate": 4.5950762593469276e-06, + "loss": 0.9523, + "step": 55940 + }, + { + "epoch": 0.40500336597971726, + "grad_norm": 0.16442671418190002, + "learning_rate": 4.595003872686342e-06, + "loss": 0.9517, + "step": 55950 + }, + { + "epoch": 0.40507575264030343, + "grad_norm": 0.1691816747188568, + "learning_rate": 4.594931486025756e-06, + "loss": 0.9393, + "step": 55960 + }, + { + "epoch": 0.4051481393008896, + "grad_norm": 0.17636831104755402, + "learning_rate": 4.594859099365169e-06, + "loss": 0.9443, + "step": 55970 + }, + { + "epoch": 0.40522052596147584, + "grad_norm": 0.16882002353668213, + "learning_rate": 4.594786712704583e-06, + "loss": 0.9407, + "step": 55980 + }, + { + "epoch": 0.405292912622062, + "grad_norm": 0.16932517290115356, + "learning_rate": 4.594714326043997e-06, + "loss": 0.9443, + "step": 55990 + }, + { + "epoch": 0.4053652992826482, + "grad_norm": 0.17036424577236176, + "learning_rate": 4.594641939383411e-06, + "loss": 0.9496, + "step": 56000 + }, + { + "epoch": 0.40543768594323437, + "grad_norm": 0.16463550925254822, + "learning_rate": 4.5945695527228246e-06, + "loss": 0.9404, + "step": 56010 + }, + { + "epoch": 0.40551007260382055, + "grad_norm": 0.16589607298374176, + "learning_rate": 4.594497166062238e-06, + "loss": 0.9475, + "step": 56020 + }, + { + "epoch": 0.4055824592644068, + "grad_norm": 0.18529045581817627, + "learning_rate": 4.594424779401652e-06, + "loss": 0.9464, + "step": 56030 + }, + { + "epoch": 0.40565484592499296, + "grad_norm": 0.15794023871421814, + "learning_rate": 4.594352392741066e-06, + "loss": 0.9315, + "step": 56040 + }, + { + "epoch": 0.40572723258557913, + "grad_norm": 0.16272369027137756, + "learning_rate": 4.59428000608048e-06, + "loss": 0.9486, + "step": 56050 + }, + { + "epoch": 0.4057996192461653, + "grad_norm": 0.16422978043556213, + "learning_rate": 4.5942076194198935e-06, + "loss": 0.9569, + "step": 56060 + }, + { + "epoch": 0.4058720059067515, + "grad_norm": 0.15604273974895477, + "learning_rate": 4.594135232759307e-06, + "loss": 0.9499, + "step": 56070 + }, + { + "epoch": 0.40594439256733766, + "grad_norm": 0.17885945737361908, + "learning_rate": 4.5940628460987216e-06, + "loss": 0.9409, + "step": 56080 + }, + { + "epoch": 0.4060167792279239, + "grad_norm": 0.19414786994457245, + "learning_rate": 4.593990459438135e-06, + "loss": 0.9399, + "step": 56090 + }, + { + "epoch": 0.4060891658885101, + "grad_norm": 0.1650743931531906, + "learning_rate": 4.593918072777549e-06, + "loss": 0.9367, + "step": 56100 + }, + { + "epoch": 0.40616155254909625, + "grad_norm": 0.1537282019853592, + "learning_rate": 4.5938456861169624e-06, + "loss": 0.9325, + "step": 56110 + }, + { + "epoch": 0.4062339392096824, + "grad_norm": 0.16103175282478333, + "learning_rate": 4.593773299456377e-06, + "loss": 0.9262, + "step": 56120 + }, + { + "epoch": 0.4063063258702686, + "grad_norm": 0.1802387833595276, + "learning_rate": 4.5937009127957905e-06, + "loss": 0.9475, + "step": 56130 + }, + { + "epoch": 0.40637871253085484, + "grad_norm": 0.16064372658729553, + "learning_rate": 4.593628526135204e-06, + "loss": 0.9182, + "step": 56140 + }, + { + "epoch": 0.406451099191441, + "grad_norm": 0.15981996059417725, + "learning_rate": 4.593556139474618e-06, + "loss": 0.934, + "step": 56150 + }, + { + "epoch": 0.4065234858520272, + "grad_norm": 0.153685063123703, + "learning_rate": 4.593483752814032e-06, + "loss": 0.9424, + "step": 56160 + }, + { + "epoch": 0.40659587251261337, + "grad_norm": 0.1737099438905716, + "learning_rate": 4.593411366153446e-06, + "loss": 0.9264, + "step": 56170 + }, + { + "epoch": 0.40666825917319954, + "grad_norm": 0.15085779130458832, + "learning_rate": 4.5933389794928594e-06, + "loss": 0.9393, + "step": 56180 + }, + { + "epoch": 0.4067406458337858, + "grad_norm": 0.16470050811767578, + "learning_rate": 4.593266592832273e-06, + "loss": 0.9372, + "step": 56190 + }, + { + "epoch": 0.40681303249437195, + "grad_norm": 0.17371892929077148, + "learning_rate": 4.5931942061716875e-06, + "loss": 0.9426, + "step": 56200 + }, + { + "epoch": 0.40688541915495813, + "grad_norm": 0.1519443243741989, + "learning_rate": 4.593121819511101e-06, + "loss": 0.9331, + "step": 56210 + }, + { + "epoch": 0.4069578058155443, + "grad_norm": 0.1687236726284027, + "learning_rate": 4.593049432850515e-06, + "loss": 0.9469, + "step": 56220 + }, + { + "epoch": 0.4070301924761305, + "grad_norm": 0.1575406938791275, + "learning_rate": 4.592977046189928e-06, + "loss": 0.938, + "step": 56230 + }, + { + "epoch": 0.4071025791367167, + "grad_norm": 0.15486252307891846, + "learning_rate": 4.592904659529343e-06, + "loss": 0.9325, + "step": 56240 + }, + { + "epoch": 0.4071749657973029, + "grad_norm": 0.16617028415203094, + "learning_rate": 4.5928322728687564e-06, + "loss": 0.9382, + "step": 56250 + }, + { + "epoch": 0.40724735245788907, + "grad_norm": 0.16430149972438812, + "learning_rate": 4.59275988620817e-06, + "loss": 0.9316, + "step": 56260 + }, + { + "epoch": 0.40731973911847524, + "grad_norm": 0.17749901115894318, + "learning_rate": 4.592687499547584e-06, + "loss": 0.9332, + "step": 56270 + }, + { + "epoch": 0.4073921257790614, + "grad_norm": 0.21616603434085846, + "learning_rate": 4.592615112886998e-06, + "loss": 0.9409, + "step": 56280 + }, + { + "epoch": 0.4074645124396476, + "grad_norm": 0.1658083200454712, + "learning_rate": 4.592542726226412e-06, + "loss": 0.9445, + "step": 56290 + }, + { + "epoch": 0.40753689910023383, + "grad_norm": 0.15681838989257812, + "learning_rate": 4.592470339565825e-06, + "loss": 0.9343, + "step": 56300 + }, + { + "epoch": 0.40760928576082, + "grad_norm": 0.15368938446044922, + "learning_rate": 4.592397952905239e-06, + "loss": 0.9424, + "step": 56310 + }, + { + "epoch": 0.4076816724214062, + "grad_norm": 0.24562257528305054, + "learning_rate": 4.5923255662446534e-06, + "loss": 0.9345, + "step": 56320 + }, + { + "epoch": 0.40775405908199236, + "grad_norm": 0.1594623327255249, + "learning_rate": 4.592253179584066e-06, + "loss": 0.9462, + "step": 56330 + }, + { + "epoch": 0.40782644574257854, + "grad_norm": 0.1747690588235855, + "learning_rate": 4.59218079292348e-06, + "loss": 0.9347, + "step": 56340 + }, + { + "epoch": 0.40789883240316477, + "grad_norm": 0.1642814725637436, + "learning_rate": 4.592108406262894e-06, + "loss": 0.9402, + "step": 56350 + }, + { + "epoch": 0.40797121906375095, + "grad_norm": 0.15779854357242584, + "learning_rate": 4.592036019602308e-06, + "loss": 0.9327, + "step": 56360 + }, + { + "epoch": 0.4080436057243371, + "grad_norm": 0.15080593526363373, + "learning_rate": 4.5919636329417215e-06, + "loss": 0.9267, + "step": 56370 + }, + { + "epoch": 0.4081159923849233, + "grad_norm": 0.1907733529806137, + "learning_rate": 4.591891246281135e-06, + "loss": 0.9358, + "step": 56380 + }, + { + "epoch": 0.4081883790455095, + "grad_norm": 0.18039512634277344, + "learning_rate": 4.59181885962055e-06, + "loss": 0.9398, + "step": 56390 + }, + { + "epoch": 0.4082607657060957, + "grad_norm": 0.17534703016281128, + "learning_rate": 4.591746472959963e-06, + "loss": 0.94, + "step": 56400 + }, + { + "epoch": 0.4083331523666819, + "grad_norm": 0.16364185512065887, + "learning_rate": 4.591674086299377e-06, + "loss": 0.9374, + "step": 56410 + }, + { + "epoch": 0.40840553902726806, + "grad_norm": 0.16373589634895325, + "learning_rate": 4.5916016996387905e-06, + "loss": 0.9334, + "step": 56420 + }, + { + "epoch": 0.40847792568785424, + "grad_norm": 0.249722421169281, + "learning_rate": 4.591529312978205e-06, + "loss": 0.9358, + "step": 56430 + }, + { + "epoch": 0.4085503123484404, + "grad_norm": 0.16310162842273712, + "learning_rate": 4.5914569263176185e-06, + "loss": 0.9516, + "step": 56440 + }, + { + "epoch": 0.4086226990090266, + "grad_norm": 0.15711535513401031, + "learning_rate": 4.591384539657032e-06, + "loss": 0.9374, + "step": 56450 + }, + { + "epoch": 0.4086950856696128, + "grad_norm": 0.16699931025505066, + "learning_rate": 4.591312152996446e-06, + "loss": 0.9478, + "step": 56460 + }, + { + "epoch": 0.408767472330199, + "grad_norm": 0.16310204565525055, + "learning_rate": 4.59123976633586e-06, + "loss": 0.9335, + "step": 56470 + }, + { + "epoch": 0.4088398589907852, + "grad_norm": 0.1538310945034027, + "learning_rate": 4.591167379675274e-06, + "loss": 0.9424, + "step": 56480 + }, + { + "epoch": 0.40891224565137135, + "grad_norm": 0.1658681482076645, + "learning_rate": 4.5910949930146875e-06, + "loss": 0.9302, + "step": 56490 + }, + { + "epoch": 0.40898463231195753, + "grad_norm": 0.18781772255897522, + "learning_rate": 4.591022606354101e-06, + "loss": 0.9369, + "step": 56500 + }, + { + "epoch": 0.40905701897254376, + "grad_norm": 0.17703670263290405, + "learning_rate": 4.5909502196935155e-06, + "loss": 0.9407, + "step": 56510 + }, + { + "epoch": 0.40912940563312994, + "grad_norm": 0.16049979627132416, + "learning_rate": 4.590877833032929e-06, + "loss": 0.9452, + "step": 56520 + }, + { + "epoch": 0.4092017922937161, + "grad_norm": 0.16507183015346527, + "learning_rate": 4.590805446372343e-06, + "loss": 0.9304, + "step": 56530 + }, + { + "epoch": 0.4092741789543023, + "grad_norm": 0.17175514996051788, + "learning_rate": 4.590733059711756e-06, + "loss": 0.939, + "step": 56540 + }, + { + "epoch": 0.40934656561488847, + "grad_norm": 0.15628288686275482, + "learning_rate": 4.590660673051171e-06, + "loss": 0.9413, + "step": 56550 + }, + { + "epoch": 0.4094189522754747, + "grad_norm": 0.17340143024921417, + "learning_rate": 4.5905882863905845e-06, + "loss": 0.9347, + "step": 56560 + }, + { + "epoch": 0.4094913389360609, + "grad_norm": 0.17264612019062042, + "learning_rate": 4.590515899729998e-06, + "loss": 0.9275, + "step": 56570 + }, + { + "epoch": 0.40956372559664705, + "grad_norm": 0.1592729389667511, + "learning_rate": 4.590443513069412e-06, + "loss": 0.936, + "step": 56580 + }, + { + "epoch": 0.40963611225723323, + "grad_norm": 0.17017214000225067, + "learning_rate": 4.590371126408826e-06, + "loss": 0.9455, + "step": 56590 + }, + { + "epoch": 0.4097084989178194, + "grad_norm": 0.25028762221336365, + "learning_rate": 4.59029873974824e-06, + "loss": 0.9368, + "step": 56600 + }, + { + "epoch": 0.4097808855784056, + "grad_norm": 0.1710430234670639, + "learning_rate": 4.590226353087653e-06, + "loss": 0.932, + "step": 56610 + }, + { + "epoch": 0.4098532722389918, + "grad_norm": 0.18898555636405945, + "learning_rate": 4.590153966427067e-06, + "loss": 0.9284, + "step": 56620 + }, + { + "epoch": 0.409925658899578, + "grad_norm": 0.15929999947547913, + "learning_rate": 4.590081579766481e-06, + "loss": 0.9334, + "step": 56630 + }, + { + "epoch": 0.40999804556016417, + "grad_norm": 0.18224512040615082, + "learning_rate": 4.590009193105895e-06, + "loss": 0.9523, + "step": 56640 + }, + { + "epoch": 0.41007043222075035, + "grad_norm": 0.16598254442214966, + "learning_rate": 4.589936806445309e-06, + "loss": 0.9455, + "step": 56650 + }, + { + "epoch": 0.4101428188813365, + "grad_norm": 0.1503967046737671, + "learning_rate": 4.589864419784722e-06, + "loss": 0.9469, + "step": 56660 + }, + { + "epoch": 0.41021520554192276, + "grad_norm": 0.17737853527069092, + "learning_rate": 4.589792033124136e-06, + "loss": 0.946, + "step": 56670 + }, + { + "epoch": 0.41028759220250893, + "grad_norm": 0.16925589740276337, + "learning_rate": 4.58971964646355e-06, + "loss": 0.9484, + "step": 56680 + }, + { + "epoch": 0.4103599788630951, + "grad_norm": 0.15373647212982178, + "learning_rate": 4.589647259802964e-06, + "loss": 0.9357, + "step": 56690 + }, + { + "epoch": 0.4104323655236813, + "grad_norm": 0.1628134548664093, + "learning_rate": 4.589574873142378e-06, + "loss": 0.9328, + "step": 56700 + }, + { + "epoch": 0.41050475218426746, + "grad_norm": 0.1642216444015503, + "learning_rate": 4.589502486481791e-06, + "loss": 0.9391, + "step": 56710 + }, + { + "epoch": 0.4105771388448537, + "grad_norm": 0.18736900389194489, + "learning_rate": 4.589430099821206e-06, + "loss": 0.9277, + "step": 56720 + }, + { + "epoch": 0.41064952550543987, + "grad_norm": 0.16991931200027466, + "learning_rate": 4.589357713160619e-06, + "loss": 0.9257, + "step": 56730 + }, + { + "epoch": 0.41072191216602605, + "grad_norm": 0.1831955909729004, + "learning_rate": 4.589285326500033e-06, + "loss": 0.9251, + "step": 56740 + }, + { + "epoch": 0.4107942988266122, + "grad_norm": 0.16035877168178558, + "learning_rate": 4.5892129398394466e-06, + "loss": 0.9424, + "step": 56750 + }, + { + "epoch": 0.4108666854871984, + "grad_norm": 0.16036030650138855, + "learning_rate": 4.589140553178861e-06, + "loss": 0.9377, + "step": 56760 + }, + { + "epoch": 0.4109390721477846, + "grad_norm": 0.1828073412179947, + "learning_rate": 4.589068166518275e-06, + "loss": 0.9371, + "step": 56770 + }, + { + "epoch": 0.4110114588083708, + "grad_norm": 0.16397011280059814, + "learning_rate": 4.588995779857688e-06, + "loss": 0.9394, + "step": 56780 + }, + { + "epoch": 0.411083845468957, + "grad_norm": 0.14961424469947815, + "learning_rate": 4.588923393197102e-06, + "loss": 0.9346, + "step": 56790 + }, + { + "epoch": 0.41115623212954316, + "grad_norm": 0.16437679529190063, + "learning_rate": 4.588851006536516e-06, + "loss": 0.9223, + "step": 56800 + }, + { + "epoch": 0.41122861879012934, + "grad_norm": 0.15859009325504303, + "learning_rate": 4.58877861987593e-06, + "loss": 0.9479, + "step": 56810 + }, + { + "epoch": 0.4113010054507155, + "grad_norm": 0.15533125400543213, + "learning_rate": 4.5887062332153436e-06, + "loss": 0.9188, + "step": 56820 + }, + { + "epoch": 0.41137339211130175, + "grad_norm": 0.16156156361103058, + "learning_rate": 4.588633846554757e-06, + "loss": 0.9446, + "step": 56830 + }, + { + "epoch": 0.4114457787718879, + "grad_norm": 0.169430211186409, + "learning_rate": 4.588561459894172e-06, + "loss": 0.9298, + "step": 56840 + }, + { + "epoch": 0.4115181654324741, + "grad_norm": 0.18322457373142242, + "learning_rate": 4.588489073233585e-06, + "loss": 0.9431, + "step": 56850 + }, + { + "epoch": 0.4115905520930603, + "grad_norm": 0.17053550481796265, + "learning_rate": 4.588416686572998e-06, + "loss": 0.9349, + "step": 56860 + }, + { + "epoch": 0.41166293875364646, + "grad_norm": 0.18169362843036652, + "learning_rate": 4.5883442999124125e-06, + "loss": 0.9257, + "step": 56870 + }, + { + "epoch": 0.4117353254142327, + "grad_norm": 0.2942195236682892, + "learning_rate": 4.588271913251826e-06, + "loss": 0.9326, + "step": 56880 + }, + { + "epoch": 0.41180771207481887, + "grad_norm": 0.21721670031547546, + "learning_rate": 4.58819952659124e-06, + "loss": 0.9412, + "step": 56890 + }, + { + "epoch": 0.41188009873540504, + "grad_norm": 0.1641770750284195, + "learning_rate": 4.588127139930653e-06, + "loss": 0.9299, + "step": 56900 + }, + { + "epoch": 0.4119524853959912, + "grad_norm": 0.17069405317306519, + "learning_rate": 4.588054753270068e-06, + "loss": 0.9389, + "step": 56910 + }, + { + "epoch": 0.4120248720565774, + "grad_norm": 0.16552869975566864, + "learning_rate": 4.5879823666094814e-06, + "loss": 0.943, + "step": 56920 + }, + { + "epoch": 0.41209725871716363, + "grad_norm": 0.2061159610748291, + "learning_rate": 4.587909979948895e-06, + "loss": 0.9433, + "step": 56930 + }, + { + "epoch": 0.4121696453777498, + "grad_norm": 0.16571536660194397, + "learning_rate": 4.587837593288309e-06, + "loss": 0.9441, + "step": 56940 + }, + { + "epoch": 0.412242032038336, + "grad_norm": 0.15703128278255463, + "learning_rate": 4.587765206627723e-06, + "loss": 0.94, + "step": 56950 + }, + { + "epoch": 0.41231441869892216, + "grad_norm": 0.16326795518398285, + "learning_rate": 4.587692819967137e-06, + "loss": 0.9363, + "step": 56960 + }, + { + "epoch": 0.41238680535950833, + "grad_norm": 0.16321924328804016, + "learning_rate": 4.58762043330655e-06, + "loss": 0.9219, + "step": 56970 + }, + { + "epoch": 0.4124591920200945, + "grad_norm": 0.16104656457901, + "learning_rate": 4.587548046645964e-06, + "loss": 0.9407, + "step": 56980 + }, + { + "epoch": 0.41253157868068074, + "grad_norm": 0.16342920064926147, + "learning_rate": 4.5874756599853784e-06, + "loss": 0.9396, + "step": 56990 + }, + { + "epoch": 0.4126039653412669, + "grad_norm": 0.18352803587913513, + "learning_rate": 4.587403273324792e-06, + "loss": 0.9289, + "step": 57000 + }, + { + "epoch": 0.4126763520018531, + "grad_norm": 0.14929810166358948, + "learning_rate": 4.587330886664206e-06, + "loss": 0.9403, + "step": 57010 + }, + { + "epoch": 0.4127487386624393, + "grad_norm": 0.15996743738651276, + "learning_rate": 4.587258500003619e-06, + "loss": 0.9487, + "step": 57020 + }, + { + "epoch": 0.41282112532302545, + "grad_norm": 0.15918391942977905, + "learning_rate": 4.587186113343034e-06, + "loss": 0.9334, + "step": 57030 + }, + { + "epoch": 0.4128935119836117, + "grad_norm": 0.16165630519390106, + "learning_rate": 4.587113726682447e-06, + "loss": 0.9335, + "step": 57040 + }, + { + "epoch": 0.41296589864419786, + "grad_norm": 0.16731780767440796, + "learning_rate": 4.587041340021861e-06, + "loss": 0.9401, + "step": 57050 + }, + { + "epoch": 0.41303828530478404, + "grad_norm": 0.21615374088287354, + "learning_rate": 4.586968953361275e-06, + "loss": 0.9381, + "step": 57060 + }, + { + "epoch": 0.4131106719653702, + "grad_norm": 0.17595277726650238, + "learning_rate": 4.586896566700689e-06, + "loss": 0.9435, + "step": 57070 + }, + { + "epoch": 0.4131830586259564, + "grad_norm": 0.2352835237979889, + "learning_rate": 4.586824180040103e-06, + "loss": 0.9311, + "step": 57080 + }, + { + "epoch": 0.4132554452865426, + "grad_norm": 0.15337133407592773, + "learning_rate": 4.586751793379516e-06, + "loss": 0.9489, + "step": 57090 + }, + { + "epoch": 0.4133278319471288, + "grad_norm": 0.18751218914985657, + "learning_rate": 4.58667940671893e-06, + "loss": 0.9385, + "step": 57100 + }, + { + "epoch": 0.413400218607715, + "grad_norm": 0.3108257055282593, + "learning_rate": 4.586607020058344e-06, + "loss": 0.9399, + "step": 57110 + }, + { + "epoch": 0.41347260526830115, + "grad_norm": 0.15695133805274963, + "learning_rate": 4.586534633397758e-06, + "loss": 0.931, + "step": 57120 + }, + { + "epoch": 0.41354499192888733, + "grad_norm": 0.1587478369474411, + "learning_rate": 4.586462246737172e-06, + "loss": 0.9228, + "step": 57130 + }, + { + "epoch": 0.4136173785894735, + "grad_norm": 0.17306017875671387, + "learning_rate": 4.586389860076585e-06, + "loss": 0.9294, + "step": 57140 + }, + { + "epoch": 0.41368976525005974, + "grad_norm": 0.1602945774793625, + "learning_rate": 4.586317473416e-06, + "loss": 0.9387, + "step": 57150 + }, + { + "epoch": 0.4137621519106459, + "grad_norm": 0.17871308326721191, + "learning_rate": 4.586245086755413e-06, + "loss": 0.9349, + "step": 57160 + }, + { + "epoch": 0.4138345385712321, + "grad_norm": 0.18266141414642334, + "learning_rate": 4.586172700094827e-06, + "loss": 0.9463, + "step": 57170 + }, + { + "epoch": 0.41390692523181827, + "grad_norm": 0.15963898599147797, + "learning_rate": 4.5861003134342405e-06, + "loss": 0.9383, + "step": 57180 + }, + { + "epoch": 0.41397931189240444, + "grad_norm": 0.18138666450977325, + "learning_rate": 4.586027926773655e-06, + "loss": 0.9332, + "step": 57190 + }, + { + "epoch": 0.4140516985529907, + "grad_norm": 0.16729888319969177, + "learning_rate": 4.585955540113069e-06, + "loss": 0.9286, + "step": 57200 + }, + { + "epoch": 0.41412408521357685, + "grad_norm": 0.17177051305770874, + "learning_rate": 4.585883153452482e-06, + "loss": 0.9317, + "step": 57210 + }, + { + "epoch": 0.41419647187416303, + "grad_norm": 0.17003445327281952, + "learning_rate": 4.585810766791896e-06, + "loss": 0.9415, + "step": 57220 + }, + { + "epoch": 0.4142688585347492, + "grad_norm": 0.3177216649055481, + "learning_rate": 4.58573838013131e-06, + "loss": 0.9546, + "step": 57230 + }, + { + "epoch": 0.4143412451953354, + "grad_norm": 0.15487100183963776, + "learning_rate": 4.585665993470724e-06, + "loss": 0.9362, + "step": 57240 + }, + { + "epoch": 0.4144136318559216, + "grad_norm": 0.158743754029274, + "learning_rate": 4.5855936068101375e-06, + "loss": 0.941, + "step": 57250 + }, + { + "epoch": 0.4144860185165078, + "grad_norm": 0.16190417110919952, + "learning_rate": 4.585521220149551e-06, + "loss": 0.9304, + "step": 57260 + }, + { + "epoch": 0.41455840517709397, + "grad_norm": 0.16800768673419952, + "learning_rate": 4.585448833488965e-06, + "loss": 0.9331, + "step": 57270 + }, + { + "epoch": 0.41463079183768015, + "grad_norm": 0.15928290784358978, + "learning_rate": 4.585376446828379e-06, + "loss": 0.9298, + "step": 57280 + }, + { + "epoch": 0.4147031784982663, + "grad_norm": 0.17402148246765137, + "learning_rate": 4.585304060167793e-06, + "loss": 0.9381, + "step": 57290 + }, + { + "epoch": 0.4147755651588525, + "grad_norm": 0.16189469397068024, + "learning_rate": 4.5852316735072065e-06, + "loss": 0.9407, + "step": 57300 + }, + { + "epoch": 0.41484795181943873, + "grad_norm": 0.16404691338539124, + "learning_rate": 4.58515928684662e-06, + "loss": 0.9396, + "step": 57310 + }, + { + "epoch": 0.4149203384800249, + "grad_norm": 0.15537384152412415, + "learning_rate": 4.5850869001860345e-06, + "loss": 0.9291, + "step": 57320 + }, + { + "epoch": 0.4149927251406111, + "grad_norm": 0.16837313771247864, + "learning_rate": 4.585014513525448e-06, + "loss": 0.9345, + "step": 57330 + }, + { + "epoch": 0.41506511180119726, + "grad_norm": 0.17618413269519806, + "learning_rate": 4.584942126864862e-06, + "loss": 0.9325, + "step": 57340 + }, + { + "epoch": 0.41513749846178344, + "grad_norm": 0.16522805392742157, + "learning_rate": 4.584869740204275e-06, + "loss": 0.9328, + "step": 57350 + }, + { + "epoch": 0.41520988512236967, + "grad_norm": 0.18333765864372253, + "learning_rate": 4.58479735354369e-06, + "loss": 0.9246, + "step": 57360 + }, + { + "epoch": 0.41528227178295585, + "grad_norm": 0.18241846561431885, + "learning_rate": 4.5847249668831035e-06, + "loss": 0.9327, + "step": 57370 + }, + { + "epoch": 0.415354658443542, + "grad_norm": 0.1651061475276947, + "learning_rate": 4.584652580222517e-06, + "loss": 0.9293, + "step": 57380 + }, + { + "epoch": 0.4154270451041282, + "grad_norm": 0.14222125709056854, + "learning_rate": 4.584580193561931e-06, + "loss": 0.9349, + "step": 57390 + }, + { + "epoch": 0.4154994317647144, + "grad_norm": 0.17054183781147003, + "learning_rate": 4.584507806901344e-06, + "loss": 0.9352, + "step": 57400 + }, + { + "epoch": 0.4155718184253006, + "grad_norm": 0.16434767842292786, + "learning_rate": 4.584435420240758e-06, + "loss": 0.9448, + "step": 57410 + }, + { + "epoch": 0.4156442050858868, + "grad_norm": 0.15329429507255554, + "learning_rate": 4.584363033580172e-06, + "loss": 0.9352, + "step": 57420 + }, + { + "epoch": 0.41571659174647296, + "grad_norm": 0.1638394594192505, + "learning_rate": 4.584290646919586e-06, + "loss": 0.927, + "step": 57430 + }, + { + "epoch": 0.41578897840705914, + "grad_norm": 0.1653471738100052, + "learning_rate": 4.584218260259e-06, + "loss": 0.9451, + "step": 57440 + }, + { + "epoch": 0.4158613650676453, + "grad_norm": 0.1822325438261032, + "learning_rate": 4.584145873598413e-06, + "loss": 0.9372, + "step": 57450 + }, + { + "epoch": 0.41593375172823155, + "grad_norm": 0.16440334916114807, + "learning_rate": 4.584073486937827e-06, + "loss": 0.9434, + "step": 57460 + }, + { + "epoch": 0.4160061383888177, + "grad_norm": 0.1566876322031021, + "learning_rate": 4.584001100277241e-06, + "loss": 0.9454, + "step": 57470 + }, + { + "epoch": 0.4160785250494039, + "grad_norm": 0.15633165836334229, + "learning_rate": 4.583928713616655e-06, + "loss": 0.9438, + "step": 57480 + }, + { + "epoch": 0.4161509117099901, + "grad_norm": 0.1904531866312027, + "learning_rate": 4.5838563269560686e-06, + "loss": 0.9433, + "step": 57490 + }, + { + "epoch": 0.41622329837057626, + "grad_norm": 0.15552623569965363, + "learning_rate": 4.583783940295482e-06, + "loss": 0.9351, + "step": 57500 + }, + { + "epoch": 0.41629568503116243, + "grad_norm": 0.2083786427974701, + "learning_rate": 4.583711553634897e-06, + "loss": 0.9324, + "step": 57510 + }, + { + "epoch": 0.41636807169174866, + "grad_norm": 0.16016191244125366, + "learning_rate": 4.58363916697431e-06, + "loss": 0.9311, + "step": 57520 + }, + { + "epoch": 0.41644045835233484, + "grad_norm": 0.1558094024658203, + "learning_rate": 4.583566780313724e-06, + "loss": 0.9262, + "step": 57530 + }, + { + "epoch": 0.416512845012921, + "grad_norm": 0.16131691634655, + "learning_rate": 4.5834943936531375e-06, + "loss": 0.9343, + "step": 57540 + }, + { + "epoch": 0.4165852316735072, + "grad_norm": 0.1501682698726654, + "learning_rate": 4.583422006992552e-06, + "loss": 0.944, + "step": 57550 + }, + { + "epoch": 0.41665761833409337, + "grad_norm": 0.18568944931030273, + "learning_rate": 4.5833496203319656e-06, + "loss": 0.9248, + "step": 57560 + }, + { + "epoch": 0.4167300049946796, + "grad_norm": 0.16975927352905273, + "learning_rate": 4.583277233671379e-06, + "loss": 0.9348, + "step": 57570 + }, + { + "epoch": 0.4168023916552658, + "grad_norm": 0.15231750905513763, + "learning_rate": 4.583204847010793e-06, + "loss": 0.9445, + "step": 57580 + }, + { + "epoch": 0.41687477831585196, + "grad_norm": 0.2114260494709015, + "learning_rate": 4.583132460350207e-06, + "loss": 0.9306, + "step": 57590 + }, + { + "epoch": 0.41694716497643813, + "grad_norm": 0.17215648293495178, + "learning_rate": 4.583060073689621e-06, + "loss": 0.9206, + "step": 57600 + }, + { + "epoch": 0.4170195516370243, + "grad_norm": 0.16319842636585236, + "learning_rate": 4.5829876870290345e-06, + "loss": 0.9321, + "step": 57610 + }, + { + "epoch": 0.41709193829761054, + "grad_norm": 0.1795549988746643, + "learning_rate": 4.582915300368448e-06, + "loss": 0.9346, + "step": 57620 + }, + { + "epoch": 0.4171643249581967, + "grad_norm": 0.18302929401397705, + "learning_rate": 4.582842913707863e-06, + "loss": 0.9413, + "step": 57630 + }, + { + "epoch": 0.4172367116187829, + "grad_norm": 0.16946756839752197, + "learning_rate": 4.582770527047276e-06, + "loss": 0.9397, + "step": 57640 + }, + { + "epoch": 0.4173090982793691, + "grad_norm": 0.16745789349079132, + "learning_rate": 4.58269814038669e-06, + "loss": 0.9381, + "step": 57650 + }, + { + "epoch": 0.41738148493995525, + "grad_norm": 0.15687045454978943, + "learning_rate": 4.5826257537261034e-06, + "loss": 0.9341, + "step": 57660 + }, + { + "epoch": 0.4174538716005414, + "grad_norm": 0.1755257248878479, + "learning_rate": 4.582553367065518e-06, + "loss": 0.9442, + "step": 57670 + }, + { + "epoch": 0.41752625826112766, + "grad_norm": 0.1668943166732788, + "learning_rate": 4.5824809804049315e-06, + "loss": 0.9397, + "step": 57680 + }, + { + "epoch": 0.41759864492171384, + "grad_norm": 0.16578532755374908, + "learning_rate": 4.582408593744345e-06, + "loss": 0.9375, + "step": 57690 + }, + { + "epoch": 0.4176710315823, + "grad_norm": 0.15865838527679443, + "learning_rate": 4.582336207083759e-06, + "loss": 0.9431, + "step": 57700 + }, + { + "epoch": 0.4177434182428862, + "grad_norm": 0.1591697633266449, + "learning_rate": 4.582263820423173e-06, + "loss": 0.9313, + "step": 57710 + }, + { + "epoch": 0.41781580490347237, + "grad_norm": 0.23013950884342194, + "learning_rate": 4.582191433762587e-06, + "loss": 0.9357, + "step": 57720 + }, + { + "epoch": 0.4178881915640586, + "grad_norm": 0.1570298671722412, + "learning_rate": 4.5821190471020004e-06, + "loss": 0.9206, + "step": 57730 + }, + { + "epoch": 0.4179605782246448, + "grad_norm": 0.16404032707214355, + "learning_rate": 4.582046660441414e-06, + "loss": 0.9465, + "step": 57740 + }, + { + "epoch": 0.41803296488523095, + "grad_norm": 0.16130004823207855, + "learning_rate": 4.5819742737808285e-06, + "loss": 0.9389, + "step": 57750 + }, + { + "epoch": 0.4181053515458171, + "grad_norm": 0.1546400487422943, + "learning_rate": 4.581901887120242e-06, + "loss": 0.9361, + "step": 57760 + }, + { + "epoch": 0.4181777382064033, + "grad_norm": 0.2353212833404541, + "learning_rate": 4.581829500459656e-06, + "loss": 0.9321, + "step": 57770 + }, + { + "epoch": 0.41825012486698954, + "grad_norm": 0.16489247977733612, + "learning_rate": 4.581757113799069e-06, + "loss": 0.9414, + "step": 57780 + }, + { + "epoch": 0.4183225115275757, + "grad_norm": 0.17833788692951202, + "learning_rate": 4.581684727138484e-06, + "loss": 0.9375, + "step": 57790 + }, + { + "epoch": 0.4183948981881619, + "grad_norm": 0.16194747388362885, + "learning_rate": 4.5816123404778974e-06, + "loss": 0.9359, + "step": 57800 + }, + { + "epoch": 0.41846728484874807, + "grad_norm": 0.16397050023078918, + "learning_rate": 4.581539953817311e-06, + "loss": 0.9314, + "step": 57810 + }, + { + "epoch": 0.41853967150933424, + "grad_norm": 0.18386350572109222, + "learning_rate": 4.581467567156725e-06, + "loss": 0.9458, + "step": 57820 + }, + { + "epoch": 0.4186120581699204, + "grad_norm": 0.17196989059448242, + "learning_rate": 4.581395180496139e-06, + "loss": 0.9213, + "step": 57830 + }, + { + "epoch": 0.41868444483050665, + "grad_norm": 0.15747104585170746, + "learning_rate": 4.581322793835553e-06, + "loss": 0.9346, + "step": 57840 + }, + { + "epoch": 0.41875683149109283, + "grad_norm": 0.19495005905628204, + "learning_rate": 4.581250407174966e-06, + "loss": 0.9384, + "step": 57850 + }, + { + "epoch": 0.418829218151679, + "grad_norm": 0.15831686556339264, + "learning_rate": 4.58117802051438e-06, + "loss": 0.9382, + "step": 57860 + }, + { + "epoch": 0.4189016048122652, + "grad_norm": 0.16458268463611603, + "learning_rate": 4.5811056338537945e-06, + "loss": 0.92, + "step": 57870 + }, + { + "epoch": 0.41897399147285136, + "grad_norm": 0.1708781123161316, + "learning_rate": 4.581033247193208e-06, + "loss": 0.9377, + "step": 57880 + }, + { + "epoch": 0.4190463781334376, + "grad_norm": 0.18208129703998566, + "learning_rate": 4.580960860532622e-06, + "loss": 0.9399, + "step": 57890 + }, + { + "epoch": 0.41911876479402377, + "grad_norm": 0.15881425142288208, + "learning_rate": 4.580888473872035e-06, + "loss": 0.9298, + "step": 57900 + }, + { + "epoch": 0.41919115145460994, + "grad_norm": 0.1868477463722229, + "learning_rate": 4.580816087211449e-06, + "loss": 0.9473, + "step": 57910 + }, + { + "epoch": 0.4192635381151961, + "grad_norm": 0.17207710444927216, + "learning_rate": 4.5807437005508625e-06, + "loss": 0.9367, + "step": 57920 + }, + { + "epoch": 0.4193359247757823, + "grad_norm": 0.19365693628787994, + "learning_rate": 4.580671313890276e-06, + "loss": 0.9333, + "step": 57930 + }, + { + "epoch": 0.41940831143636853, + "grad_norm": 0.1523778736591339, + "learning_rate": 4.580598927229691e-06, + "loss": 0.9295, + "step": 57940 + }, + { + "epoch": 0.4194806980969547, + "grad_norm": 0.15326263010501862, + "learning_rate": 4.580526540569104e-06, + "loss": 0.9508, + "step": 57950 + }, + { + "epoch": 0.4195530847575409, + "grad_norm": 0.1532769799232483, + "learning_rate": 4.580454153908518e-06, + "loss": 0.9285, + "step": 57960 + }, + { + "epoch": 0.41962547141812706, + "grad_norm": 0.15870678424835205, + "learning_rate": 4.5803817672479315e-06, + "loss": 0.9472, + "step": 57970 + }, + { + "epoch": 0.41969785807871324, + "grad_norm": 0.1667070984840393, + "learning_rate": 4.580309380587346e-06, + "loss": 0.939, + "step": 57980 + }, + { + "epoch": 0.41977024473929947, + "grad_norm": 0.4463184177875519, + "learning_rate": 4.5802369939267595e-06, + "loss": 0.9313, + "step": 57990 + }, + { + "epoch": 0.41984263139988565, + "grad_norm": 0.15991568565368652, + "learning_rate": 4.580164607266173e-06, + "loss": 0.9425, + "step": 58000 + }, + { + "epoch": 0.4199150180604718, + "grad_norm": 0.16935986280441284, + "learning_rate": 4.580092220605587e-06, + "loss": 0.9291, + "step": 58010 + }, + { + "epoch": 0.419987404721058, + "grad_norm": 0.17099948227405548, + "learning_rate": 4.580019833945001e-06, + "loss": 0.9335, + "step": 58020 + }, + { + "epoch": 0.4200597913816442, + "grad_norm": 0.15163956582546234, + "learning_rate": 4.579947447284415e-06, + "loss": 0.9305, + "step": 58030 + }, + { + "epoch": 0.42013217804223035, + "grad_norm": 0.1671072095632553, + "learning_rate": 4.5798750606238285e-06, + "loss": 0.9379, + "step": 58040 + }, + { + "epoch": 0.4202045647028166, + "grad_norm": 0.16619133949279785, + "learning_rate": 4.579802673963242e-06, + "loss": 0.9309, + "step": 58050 + }, + { + "epoch": 0.42027695136340276, + "grad_norm": 0.16173526644706726, + "learning_rate": 4.579730287302656e-06, + "loss": 0.9369, + "step": 58060 + }, + { + "epoch": 0.42034933802398894, + "grad_norm": 0.16461287438869476, + "learning_rate": 4.57965790064207e-06, + "loss": 0.9386, + "step": 58070 + }, + { + "epoch": 0.4204217246845751, + "grad_norm": 0.19412462413311005, + "learning_rate": 4.579585513981484e-06, + "loss": 0.9284, + "step": 58080 + }, + { + "epoch": 0.4204941113451613, + "grad_norm": 0.18617956340312958, + "learning_rate": 4.579513127320897e-06, + "loss": 0.9284, + "step": 58090 + }, + { + "epoch": 0.4205664980057475, + "grad_norm": 0.1594972461462021, + "learning_rate": 4.579440740660311e-06, + "loss": 0.9367, + "step": 58100 + }, + { + "epoch": 0.4206388846663337, + "grad_norm": 0.1762949526309967, + "learning_rate": 4.5793683539997255e-06, + "loss": 0.9305, + "step": 58110 + }, + { + "epoch": 0.4207112713269199, + "grad_norm": 0.20204119384288788, + "learning_rate": 4.579295967339139e-06, + "loss": 0.939, + "step": 58120 + }, + { + "epoch": 0.42078365798750605, + "grad_norm": 0.16462816298007965, + "learning_rate": 4.579223580678553e-06, + "loss": 0.9302, + "step": 58130 + }, + { + "epoch": 0.42085604464809223, + "grad_norm": 0.15783581137657166, + "learning_rate": 4.579151194017966e-06, + "loss": 0.9372, + "step": 58140 + }, + { + "epoch": 0.42092843130867846, + "grad_norm": 0.1847236305475235, + "learning_rate": 4.579078807357381e-06, + "loss": 0.9338, + "step": 58150 + }, + { + "epoch": 0.42100081796926464, + "grad_norm": 0.16038931906223297, + "learning_rate": 4.579006420696794e-06, + "loss": 0.9347, + "step": 58160 + }, + { + "epoch": 0.4210732046298508, + "grad_norm": 0.17341868579387665, + "learning_rate": 4.578934034036208e-06, + "loss": 0.9468, + "step": 58170 + }, + { + "epoch": 0.421145591290437, + "grad_norm": 0.17107468843460083, + "learning_rate": 4.578861647375622e-06, + "loss": 0.9385, + "step": 58180 + }, + { + "epoch": 0.42121797795102317, + "grad_norm": 0.16987687349319458, + "learning_rate": 4.578789260715036e-06, + "loss": 0.9256, + "step": 58190 + }, + { + "epoch": 0.42129036461160935, + "grad_norm": 0.15816541016101837, + "learning_rate": 4.57871687405445e-06, + "loss": 0.9309, + "step": 58200 + }, + { + "epoch": 0.4213627512721956, + "grad_norm": 0.17220225930213928, + "learning_rate": 4.578644487393863e-06, + "loss": 0.931, + "step": 58210 + }, + { + "epoch": 0.42143513793278176, + "grad_norm": 0.15358738601207733, + "learning_rate": 4.578572100733277e-06, + "loss": 0.9363, + "step": 58220 + }, + { + "epoch": 0.42150752459336793, + "grad_norm": 0.1880992203950882, + "learning_rate": 4.578499714072691e-06, + "loss": 0.9336, + "step": 58230 + }, + { + "epoch": 0.4215799112539541, + "grad_norm": 0.1707928329706192, + "learning_rate": 4.578427327412105e-06, + "loss": 0.9444, + "step": 58240 + }, + { + "epoch": 0.4216522979145403, + "grad_norm": 0.16345572471618652, + "learning_rate": 4.578354940751519e-06, + "loss": 0.9401, + "step": 58250 + }, + { + "epoch": 0.4217246845751265, + "grad_norm": 0.15612851083278656, + "learning_rate": 4.578282554090932e-06, + "loss": 0.9325, + "step": 58260 + }, + { + "epoch": 0.4217970712357127, + "grad_norm": 0.15962456166744232, + "learning_rate": 4.578210167430347e-06, + "loss": 0.933, + "step": 58270 + }, + { + "epoch": 0.42186945789629887, + "grad_norm": 0.16563986241817474, + "learning_rate": 4.57813778076976e-06, + "loss": 0.9284, + "step": 58280 + }, + { + "epoch": 0.42194184455688505, + "grad_norm": 0.18625997006893158, + "learning_rate": 4.578065394109174e-06, + "loss": 0.927, + "step": 58290 + }, + { + "epoch": 0.4220142312174712, + "grad_norm": 0.16373422741889954, + "learning_rate": 4.5779930074485876e-06, + "loss": 0.9354, + "step": 58300 + }, + { + "epoch": 0.42208661787805746, + "grad_norm": 0.16232380270957947, + "learning_rate": 4.577920620788002e-06, + "loss": 0.9257, + "step": 58310 + }, + { + "epoch": 0.42215900453864363, + "grad_norm": 0.1764630377292633, + "learning_rate": 4.577848234127416e-06, + "loss": 0.9374, + "step": 58320 + }, + { + "epoch": 0.4222313911992298, + "grad_norm": 0.1576317995786667, + "learning_rate": 4.577775847466829e-06, + "loss": 0.9403, + "step": 58330 + }, + { + "epoch": 0.422303777859816, + "grad_norm": 0.18781952559947968, + "learning_rate": 4.577703460806243e-06, + "loss": 0.9278, + "step": 58340 + }, + { + "epoch": 0.42237616452040216, + "grad_norm": 0.1682749092578888, + "learning_rate": 4.577631074145657e-06, + "loss": 0.9339, + "step": 58350 + }, + { + "epoch": 0.42244855118098834, + "grad_norm": 0.17663206160068512, + "learning_rate": 4.577558687485071e-06, + "loss": 0.937, + "step": 58360 + }, + { + "epoch": 0.4225209378415746, + "grad_norm": 0.16293081641197205, + "learning_rate": 4.577486300824485e-06, + "loss": 0.924, + "step": 58370 + }, + { + "epoch": 0.42259332450216075, + "grad_norm": 0.17560496926307678, + "learning_rate": 4.577413914163898e-06, + "loss": 0.9319, + "step": 58380 + }, + { + "epoch": 0.4226657111627469, + "grad_norm": 0.1667516529560089, + "learning_rate": 4.577341527503313e-06, + "loss": 0.9406, + "step": 58390 + }, + { + "epoch": 0.4227380978233331, + "grad_norm": 0.15829908847808838, + "learning_rate": 4.577269140842726e-06, + "loss": 0.9388, + "step": 58400 + }, + { + "epoch": 0.4228104844839193, + "grad_norm": 0.16750116646289825, + "learning_rate": 4.57719675418214e-06, + "loss": 0.9415, + "step": 58410 + }, + { + "epoch": 0.4228828711445055, + "grad_norm": 0.21571747958660126, + "learning_rate": 4.5771243675215535e-06, + "loss": 0.9387, + "step": 58420 + }, + { + "epoch": 0.4229552578050917, + "grad_norm": 0.16604144871234894, + "learning_rate": 4.577051980860968e-06, + "loss": 0.9345, + "step": 58430 + }, + { + "epoch": 0.42302764446567787, + "grad_norm": 0.16571690142154694, + "learning_rate": 4.576979594200382e-06, + "loss": 0.9398, + "step": 58440 + }, + { + "epoch": 0.42310003112626404, + "grad_norm": 0.1598815768957138, + "learning_rate": 4.576907207539794e-06, + "loss": 0.938, + "step": 58450 + }, + { + "epoch": 0.4231724177868502, + "grad_norm": 0.16254471242427826, + "learning_rate": 4.576834820879209e-06, + "loss": 0.9153, + "step": 58460 + }, + { + "epoch": 0.42324480444743645, + "grad_norm": 0.17115645110607147, + "learning_rate": 4.5767624342186224e-06, + "loss": 0.936, + "step": 58470 + }, + { + "epoch": 0.42331719110802263, + "grad_norm": 0.1529657244682312, + "learning_rate": 4.576690047558036e-06, + "loss": 0.9381, + "step": 58480 + }, + { + "epoch": 0.4233895777686088, + "grad_norm": 0.15278260409832, + "learning_rate": 4.57661766089745e-06, + "loss": 0.9446, + "step": 58490 + }, + { + "epoch": 0.423461964429195, + "grad_norm": 0.15874941647052765, + "learning_rate": 4.576545274236864e-06, + "loss": 0.943, + "step": 58500 + }, + { + "epoch": 0.42353435108978116, + "grad_norm": 0.1559789478778839, + "learning_rate": 4.576472887576278e-06, + "loss": 0.9342, + "step": 58510 + }, + { + "epoch": 0.42360673775036733, + "grad_norm": 0.1483520269393921, + "learning_rate": 4.576400500915691e-06, + "loss": 0.927, + "step": 58520 + }, + { + "epoch": 0.42367912441095357, + "grad_norm": 0.16274036467075348, + "learning_rate": 4.576328114255105e-06, + "loss": 0.9228, + "step": 58530 + }, + { + "epoch": 0.42375151107153974, + "grad_norm": 0.16001389920711517, + "learning_rate": 4.5762557275945194e-06, + "loss": 0.9284, + "step": 58540 + }, + { + "epoch": 0.4238238977321259, + "grad_norm": 0.16106966137886047, + "learning_rate": 4.576183340933933e-06, + "loss": 0.9275, + "step": 58550 + }, + { + "epoch": 0.4238962843927121, + "grad_norm": 0.17756831645965576, + "learning_rate": 4.576110954273347e-06, + "loss": 0.9324, + "step": 58560 + }, + { + "epoch": 0.4239686710532983, + "grad_norm": 0.18322084844112396, + "learning_rate": 4.57603856761276e-06, + "loss": 0.9276, + "step": 58570 + }, + { + "epoch": 0.4240410577138845, + "grad_norm": 0.17278200387954712, + "learning_rate": 4.575966180952175e-06, + "loss": 0.9419, + "step": 58580 + }, + { + "epoch": 0.4241134443744707, + "grad_norm": 0.16556476056575775, + "learning_rate": 4.575893794291588e-06, + "loss": 0.9291, + "step": 58590 + }, + { + "epoch": 0.42418583103505686, + "grad_norm": 0.15898211300373077, + "learning_rate": 4.575821407631002e-06, + "loss": 0.9342, + "step": 58600 + }, + { + "epoch": 0.42425821769564304, + "grad_norm": 0.16985763609409332, + "learning_rate": 4.575749020970416e-06, + "loss": 0.9345, + "step": 58610 + }, + { + "epoch": 0.4243306043562292, + "grad_norm": 0.16115114092826843, + "learning_rate": 4.57567663430983e-06, + "loss": 0.9404, + "step": 58620 + }, + { + "epoch": 0.42440299101681545, + "grad_norm": 0.16031764447689056, + "learning_rate": 4.575604247649244e-06, + "loss": 0.9347, + "step": 58630 + }, + { + "epoch": 0.4244753776774016, + "grad_norm": 0.15465863049030304, + "learning_rate": 4.575531860988657e-06, + "loss": 0.9354, + "step": 58640 + }, + { + "epoch": 0.4245477643379878, + "grad_norm": 0.17583829164505005, + "learning_rate": 4.575459474328071e-06, + "loss": 0.9308, + "step": 58650 + }, + { + "epoch": 0.424620150998574, + "grad_norm": 0.16195513308048248, + "learning_rate": 4.575387087667485e-06, + "loss": 0.9312, + "step": 58660 + }, + { + "epoch": 0.42469253765916015, + "grad_norm": 0.24764384329319, + "learning_rate": 4.575314701006899e-06, + "loss": 0.9256, + "step": 58670 + }, + { + "epoch": 0.4247649243197464, + "grad_norm": 0.15488171577453613, + "learning_rate": 4.575242314346313e-06, + "loss": 0.9204, + "step": 58680 + }, + { + "epoch": 0.42483731098033256, + "grad_norm": 0.16304126381874084, + "learning_rate": 4.575169927685726e-06, + "loss": 0.9366, + "step": 58690 + }, + { + "epoch": 0.42490969764091874, + "grad_norm": 0.15404012799263, + "learning_rate": 4.57509754102514e-06, + "loss": 0.9253, + "step": 58700 + }, + { + "epoch": 0.4249820843015049, + "grad_norm": 0.17125609517097473, + "learning_rate": 4.575025154364554e-06, + "loss": 0.9325, + "step": 58710 + }, + { + "epoch": 0.4250544709620911, + "grad_norm": 0.17699052393436432, + "learning_rate": 4.574952767703968e-06, + "loss": 0.9248, + "step": 58720 + }, + { + "epoch": 0.42512685762267727, + "grad_norm": 0.1671576052904129, + "learning_rate": 4.5748803810433815e-06, + "loss": 0.9407, + "step": 58730 + }, + { + "epoch": 0.4251992442832635, + "grad_norm": 0.48583319783210754, + "learning_rate": 4.574807994382795e-06, + "loss": 0.9151, + "step": 58740 + }, + { + "epoch": 0.4252716309438497, + "grad_norm": 0.15986119210720062, + "learning_rate": 4.57473560772221e-06, + "loss": 0.9351, + "step": 58750 + }, + { + "epoch": 0.42534401760443585, + "grad_norm": 0.14684616029262543, + "learning_rate": 4.574663221061623e-06, + "loss": 0.9456, + "step": 58760 + }, + { + "epoch": 0.42541640426502203, + "grad_norm": 0.2594200074672699, + "learning_rate": 4.574590834401037e-06, + "loss": 0.9368, + "step": 58770 + }, + { + "epoch": 0.4254887909256082, + "grad_norm": 0.15739043056964874, + "learning_rate": 4.5745184477404505e-06, + "loss": 0.9336, + "step": 58780 + }, + { + "epoch": 0.42556117758619444, + "grad_norm": 0.1712053120136261, + "learning_rate": 4.574446061079865e-06, + "loss": 0.9341, + "step": 58790 + }, + { + "epoch": 0.4256335642467806, + "grad_norm": 0.1579931527376175, + "learning_rate": 4.5743736744192785e-06, + "loss": 0.9315, + "step": 58800 + }, + { + "epoch": 0.4257059509073668, + "grad_norm": 0.16759207844734192, + "learning_rate": 4.574301287758692e-06, + "loss": 0.9334, + "step": 58810 + }, + { + "epoch": 0.42577833756795297, + "grad_norm": 0.15357771515846252, + "learning_rate": 4.574228901098106e-06, + "loss": 0.9341, + "step": 58820 + }, + { + "epoch": 0.42585072422853915, + "grad_norm": 0.16257642209529877, + "learning_rate": 4.57415651443752e-06, + "loss": 0.9319, + "step": 58830 + }, + { + "epoch": 0.4259231108891254, + "grad_norm": 0.1543009728193283, + "learning_rate": 4.574084127776934e-06, + "loss": 0.9544, + "step": 58840 + }, + { + "epoch": 0.42599549754971155, + "grad_norm": 0.18277451395988464, + "learning_rate": 4.5740117411163475e-06, + "loss": 0.9316, + "step": 58850 + }, + { + "epoch": 0.42606788421029773, + "grad_norm": 0.1689336895942688, + "learning_rate": 4.573939354455761e-06, + "loss": 0.9406, + "step": 58860 + }, + { + "epoch": 0.4261402708708839, + "grad_norm": 0.16068103909492493, + "learning_rate": 4.5738669677951756e-06, + "loss": 0.9412, + "step": 58870 + }, + { + "epoch": 0.4262126575314701, + "grad_norm": 0.1947869062423706, + "learning_rate": 4.573794581134589e-06, + "loss": 0.9303, + "step": 58880 + }, + { + "epoch": 0.42628504419205626, + "grad_norm": 0.16800038516521454, + "learning_rate": 4.573722194474003e-06, + "loss": 0.9349, + "step": 58890 + }, + { + "epoch": 0.4263574308526425, + "grad_norm": 0.16128326952457428, + "learning_rate": 4.573649807813416e-06, + "loss": 0.9433, + "step": 58900 + }, + { + "epoch": 0.42642981751322867, + "grad_norm": 0.16078263521194458, + "learning_rate": 4.573577421152831e-06, + "loss": 0.9305, + "step": 58910 + }, + { + "epoch": 0.42650220417381485, + "grad_norm": 0.20979563891887665, + "learning_rate": 4.5735050344922445e-06, + "loss": 0.9481, + "step": 58920 + }, + { + "epoch": 0.426574590834401, + "grad_norm": 0.16073863208293915, + "learning_rate": 4.573432647831658e-06, + "loss": 0.9395, + "step": 58930 + }, + { + "epoch": 0.4266469774949872, + "grad_norm": 0.17131203413009644, + "learning_rate": 4.573360261171072e-06, + "loss": 0.9441, + "step": 58940 + }, + { + "epoch": 0.42671936415557343, + "grad_norm": 0.150382399559021, + "learning_rate": 4.573287874510486e-06, + "loss": 0.926, + "step": 58950 + }, + { + "epoch": 0.4267917508161596, + "grad_norm": 0.15955784916877747, + "learning_rate": 4.5732154878499e-06, + "loss": 0.9287, + "step": 58960 + }, + { + "epoch": 0.4268641374767458, + "grad_norm": 0.16103602945804596, + "learning_rate": 4.573143101189313e-06, + "loss": 0.9269, + "step": 58970 + }, + { + "epoch": 0.42693652413733196, + "grad_norm": 0.17586109042167664, + "learning_rate": 4.573070714528727e-06, + "loss": 0.9279, + "step": 58980 + }, + { + "epoch": 0.42700891079791814, + "grad_norm": 0.1569269299507141, + "learning_rate": 4.572998327868141e-06, + "loss": 0.932, + "step": 58990 + }, + { + "epoch": 0.42708129745850437, + "grad_norm": 0.1663513034582138, + "learning_rate": 4.572925941207554e-06, + "loss": 0.9276, + "step": 59000 + }, + { + "epoch": 0.42715368411909055, + "grad_norm": 0.16358311474323273, + "learning_rate": 4.572853554546968e-06, + "loss": 0.936, + "step": 59010 + }, + { + "epoch": 0.4272260707796767, + "grad_norm": 0.15506871044635773, + "learning_rate": 4.572781167886382e-06, + "loss": 0.9271, + "step": 59020 + }, + { + "epoch": 0.4272984574402629, + "grad_norm": 0.156698539853096, + "learning_rate": 4.572708781225796e-06, + "loss": 0.9327, + "step": 59030 + }, + { + "epoch": 0.4273708441008491, + "grad_norm": 0.1768382340669632, + "learning_rate": 4.5726363945652096e-06, + "loss": 0.9408, + "step": 59040 + }, + { + "epoch": 0.42744323076143526, + "grad_norm": 0.15888147056102753, + "learning_rate": 4.572564007904623e-06, + "loss": 0.9294, + "step": 59050 + }, + { + "epoch": 0.4275156174220215, + "grad_norm": 0.1897684782743454, + "learning_rate": 4.572491621244038e-06, + "loss": 0.9478, + "step": 59060 + }, + { + "epoch": 0.42758800408260766, + "grad_norm": 0.18450258672237396, + "learning_rate": 4.572419234583451e-06, + "loss": 0.9202, + "step": 59070 + }, + { + "epoch": 0.42766039074319384, + "grad_norm": 0.16853807866573334, + "learning_rate": 4.572346847922865e-06, + "loss": 0.9362, + "step": 59080 + }, + { + "epoch": 0.42773277740378, + "grad_norm": 0.16247627139091492, + "learning_rate": 4.5722744612622785e-06, + "loss": 0.9351, + "step": 59090 + }, + { + "epoch": 0.4278051640643662, + "grad_norm": 0.17450258135795593, + "learning_rate": 4.572202074601693e-06, + "loss": 0.9304, + "step": 59100 + }, + { + "epoch": 0.4278775507249524, + "grad_norm": 0.22442488372325897, + "learning_rate": 4.572129687941107e-06, + "loss": 0.9176, + "step": 59110 + }, + { + "epoch": 0.4279499373855386, + "grad_norm": 0.1596069484949112, + "learning_rate": 4.57205730128052e-06, + "loss": 0.9467, + "step": 59120 + }, + { + "epoch": 0.4280223240461248, + "grad_norm": 0.16665290296077728, + "learning_rate": 4.571984914619934e-06, + "loss": 0.9333, + "step": 59130 + }, + { + "epoch": 0.42809471070671096, + "grad_norm": 0.15915903449058533, + "learning_rate": 4.571912527959348e-06, + "loss": 0.9386, + "step": 59140 + }, + { + "epoch": 0.42816709736729713, + "grad_norm": 0.16277122497558594, + "learning_rate": 4.571840141298762e-06, + "loss": 0.9298, + "step": 59150 + }, + { + "epoch": 0.42823948402788337, + "grad_norm": 0.14925530552864075, + "learning_rate": 4.5717677546381755e-06, + "loss": 0.9454, + "step": 59160 + }, + { + "epoch": 0.42831187068846954, + "grad_norm": 0.1798754781484604, + "learning_rate": 4.571695367977589e-06, + "loss": 0.9397, + "step": 59170 + }, + { + "epoch": 0.4283842573490557, + "grad_norm": 0.16380873322486877, + "learning_rate": 4.571622981317004e-06, + "loss": 0.9515, + "step": 59180 + }, + { + "epoch": 0.4284566440096419, + "grad_norm": 0.16432473063468933, + "learning_rate": 4.571550594656417e-06, + "loss": 0.9351, + "step": 59190 + }, + { + "epoch": 0.4285290306702281, + "grad_norm": 0.14648674428462982, + "learning_rate": 4.571478207995831e-06, + "loss": 0.9283, + "step": 59200 + }, + { + "epoch": 0.4286014173308143, + "grad_norm": 0.1510661244392395, + "learning_rate": 4.5714058213352444e-06, + "loss": 0.9295, + "step": 59210 + }, + { + "epoch": 0.4286738039914005, + "grad_norm": 0.18739067018032074, + "learning_rate": 4.571333434674659e-06, + "loss": 0.937, + "step": 59220 + }, + { + "epoch": 0.42874619065198666, + "grad_norm": 0.16375543177127838, + "learning_rate": 4.5712610480140725e-06, + "loss": 0.9354, + "step": 59230 + }, + { + "epoch": 0.42881857731257284, + "grad_norm": 0.16390521824359894, + "learning_rate": 4.571188661353486e-06, + "loss": 0.9353, + "step": 59240 + }, + { + "epoch": 0.428890963973159, + "grad_norm": 0.14936009049415588, + "learning_rate": 4.5711162746929e-06, + "loss": 0.9253, + "step": 59250 + }, + { + "epoch": 0.4289633506337452, + "grad_norm": 0.16098490357398987, + "learning_rate": 4.571043888032314e-06, + "loss": 0.9187, + "step": 59260 + }, + { + "epoch": 0.4290357372943314, + "grad_norm": 0.16449196636676788, + "learning_rate": 4.570971501371728e-06, + "loss": 0.93, + "step": 59270 + }, + { + "epoch": 0.4291081239549176, + "grad_norm": 0.44586077332496643, + "learning_rate": 4.5708991147111414e-06, + "loss": 0.9421, + "step": 59280 + }, + { + "epoch": 0.4291805106155038, + "grad_norm": 0.15456175804138184, + "learning_rate": 4.570826728050555e-06, + "loss": 0.9223, + "step": 59290 + }, + { + "epoch": 0.42925289727608995, + "grad_norm": 0.17588649690151215, + "learning_rate": 4.570754341389969e-06, + "loss": 0.9438, + "step": 59300 + }, + { + "epoch": 0.4293252839366761, + "grad_norm": 0.15864711999893188, + "learning_rate": 4.570681954729383e-06, + "loss": 0.9313, + "step": 59310 + }, + { + "epoch": 0.42939767059726236, + "grad_norm": 0.16494864225387573, + "learning_rate": 4.570609568068797e-06, + "loss": 0.9457, + "step": 59320 + }, + { + "epoch": 0.42947005725784854, + "grad_norm": 0.15359218418598175, + "learning_rate": 4.57053718140821e-06, + "loss": 0.9295, + "step": 59330 + }, + { + "epoch": 0.4295424439184347, + "grad_norm": 0.16325341165065765, + "learning_rate": 4.570464794747624e-06, + "loss": 0.9434, + "step": 59340 + }, + { + "epoch": 0.4296148305790209, + "grad_norm": 0.15335196256637573, + "learning_rate": 4.5703924080870385e-06, + "loss": 0.9255, + "step": 59350 + }, + { + "epoch": 0.42968721723960707, + "grad_norm": 0.15363959968090057, + "learning_rate": 4.570320021426452e-06, + "loss": 0.9349, + "step": 59360 + }, + { + "epoch": 0.4297596039001933, + "grad_norm": 0.16724181175231934, + "learning_rate": 4.570247634765866e-06, + "loss": 0.9281, + "step": 59370 + }, + { + "epoch": 0.4298319905607795, + "grad_norm": 0.1659107208251953, + "learning_rate": 4.570175248105279e-06, + "loss": 0.9412, + "step": 59380 + }, + { + "epoch": 0.42990437722136565, + "grad_norm": 0.17168617248535156, + "learning_rate": 4.570102861444694e-06, + "loss": 0.9401, + "step": 59390 + }, + { + "epoch": 0.42997676388195183, + "grad_norm": 0.15352968871593475, + "learning_rate": 4.570030474784107e-06, + "loss": 0.9271, + "step": 59400 + }, + { + "epoch": 0.430049150542538, + "grad_norm": 0.17253489792346954, + "learning_rate": 4.569958088123521e-06, + "loss": 0.9324, + "step": 59410 + }, + { + "epoch": 0.4301215372031242, + "grad_norm": 0.15288107097148895, + "learning_rate": 4.569885701462935e-06, + "loss": 0.9274, + "step": 59420 + }, + { + "epoch": 0.4301939238637104, + "grad_norm": 0.17181651294231415, + "learning_rate": 4.569813314802349e-06, + "loss": 0.9399, + "step": 59430 + }, + { + "epoch": 0.4302663105242966, + "grad_norm": 0.16761989891529083, + "learning_rate": 4.569740928141763e-06, + "loss": 0.938, + "step": 59440 + }, + { + "epoch": 0.43033869718488277, + "grad_norm": 0.16204999387264252, + "learning_rate": 4.569668541481176e-06, + "loss": 0.9332, + "step": 59450 + }, + { + "epoch": 0.43041108384546894, + "grad_norm": 0.1525745391845703, + "learning_rate": 4.56959615482059e-06, + "loss": 0.93, + "step": 59460 + }, + { + "epoch": 0.4304834705060551, + "grad_norm": 0.17561571300029755, + "learning_rate": 4.569523768160004e-06, + "loss": 0.9375, + "step": 59470 + }, + { + "epoch": 0.43055585716664135, + "grad_norm": 0.1534716933965683, + "learning_rate": 4.569451381499418e-06, + "loss": 0.9513, + "step": 59480 + }, + { + "epoch": 0.43062824382722753, + "grad_norm": 0.16287165880203247, + "learning_rate": 4.569378994838832e-06, + "loss": 0.9542, + "step": 59490 + }, + { + "epoch": 0.4307006304878137, + "grad_norm": 0.1597660928964615, + "learning_rate": 4.569306608178245e-06, + "loss": 0.926, + "step": 59500 + }, + { + "epoch": 0.4307730171483999, + "grad_norm": 0.18294881284236908, + "learning_rate": 4.569234221517659e-06, + "loss": 0.9382, + "step": 59510 + }, + { + "epoch": 0.43084540380898606, + "grad_norm": 0.1525103747844696, + "learning_rate": 4.5691618348570725e-06, + "loss": 0.937, + "step": 59520 + }, + { + "epoch": 0.4309177904695723, + "grad_norm": 0.15872421860694885, + "learning_rate": 4.569089448196486e-06, + "loss": 0.9225, + "step": 59530 + }, + { + "epoch": 0.43099017713015847, + "grad_norm": 0.15989533066749573, + "learning_rate": 4.5690170615359005e-06, + "loss": 0.926, + "step": 59540 + }, + { + "epoch": 0.43106256379074465, + "grad_norm": 0.16288037598133087, + "learning_rate": 4.568944674875314e-06, + "loss": 0.9269, + "step": 59550 + }, + { + "epoch": 0.4311349504513308, + "grad_norm": 0.16788716614246368, + "learning_rate": 4.568872288214728e-06, + "loss": 0.9176, + "step": 59560 + }, + { + "epoch": 0.431207337111917, + "grad_norm": 0.15858720242977142, + "learning_rate": 4.568799901554141e-06, + "loss": 0.9246, + "step": 59570 + }, + { + "epoch": 0.4312797237725032, + "grad_norm": 0.17023035883903503, + "learning_rate": 4.568727514893556e-06, + "loss": 0.9416, + "step": 59580 + }, + { + "epoch": 0.4313521104330894, + "grad_norm": 0.16211800277233124, + "learning_rate": 4.5686551282329695e-06, + "loss": 0.9295, + "step": 59590 + }, + { + "epoch": 0.4314244970936756, + "grad_norm": 0.16199994087219238, + "learning_rate": 4.568582741572383e-06, + "loss": 0.9308, + "step": 59600 + }, + { + "epoch": 0.43149688375426176, + "grad_norm": 0.1606810986995697, + "learning_rate": 4.568510354911797e-06, + "loss": 0.9309, + "step": 59610 + }, + { + "epoch": 0.43156927041484794, + "grad_norm": 0.20290134847164154, + "learning_rate": 4.568437968251211e-06, + "loss": 0.9311, + "step": 59620 + }, + { + "epoch": 0.4316416570754341, + "grad_norm": 0.18084190785884857, + "learning_rate": 4.568365581590625e-06, + "loss": 0.9283, + "step": 59630 + }, + { + "epoch": 0.43171404373602035, + "grad_norm": 0.15636208653450012, + "learning_rate": 4.568293194930038e-06, + "loss": 0.939, + "step": 59640 + }, + { + "epoch": 0.4317864303966065, + "grad_norm": 0.16248819231987, + "learning_rate": 4.568220808269452e-06, + "loss": 0.9457, + "step": 59650 + }, + { + "epoch": 0.4318588170571927, + "grad_norm": 0.16725608706474304, + "learning_rate": 4.5681484216088665e-06, + "loss": 0.9239, + "step": 59660 + }, + { + "epoch": 0.4319312037177789, + "grad_norm": 0.15592262148857117, + "learning_rate": 4.56807603494828e-06, + "loss": 0.9274, + "step": 59670 + }, + { + "epoch": 0.43200359037836505, + "grad_norm": 0.16762572526931763, + "learning_rate": 4.568003648287694e-06, + "loss": 0.9387, + "step": 59680 + }, + { + "epoch": 0.4320759770389513, + "grad_norm": 0.16156117618083954, + "learning_rate": 4.567931261627107e-06, + "loss": 0.9486, + "step": 59690 + }, + { + "epoch": 0.43214836369953746, + "grad_norm": 0.1718205362558365, + "learning_rate": 4.567858874966522e-06, + "loss": 0.928, + "step": 59700 + }, + { + "epoch": 0.43222075036012364, + "grad_norm": 0.15854863822460175, + "learning_rate": 4.567786488305935e-06, + "loss": 0.9373, + "step": 59710 + }, + { + "epoch": 0.4322931370207098, + "grad_norm": 0.1781233698129654, + "learning_rate": 4.567714101645349e-06, + "loss": 0.9324, + "step": 59720 + }, + { + "epoch": 0.432365523681296, + "grad_norm": 0.15047183632850647, + "learning_rate": 4.567641714984763e-06, + "loss": 0.9246, + "step": 59730 + }, + { + "epoch": 0.4324379103418822, + "grad_norm": 0.1613796353340149, + "learning_rate": 4.567569328324177e-06, + "loss": 0.9407, + "step": 59740 + }, + { + "epoch": 0.4325102970024684, + "grad_norm": 0.15587370097637177, + "learning_rate": 4.567496941663591e-06, + "loss": 0.9229, + "step": 59750 + }, + { + "epoch": 0.4325826836630546, + "grad_norm": 0.15507788956165314, + "learning_rate": 4.567424555003004e-06, + "loss": 0.9281, + "step": 59760 + }, + { + "epoch": 0.43265507032364076, + "grad_norm": 0.15560147166252136, + "learning_rate": 4.567352168342418e-06, + "loss": 0.9467, + "step": 59770 + }, + { + "epoch": 0.43272745698422693, + "grad_norm": 0.1680835783481598, + "learning_rate": 4.567279781681832e-06, + "loss": 0.9317, + "step": 59780 + }, + { + "epoch": 0.4327998436448131, + "grad_norm": 0.18736176192760468, + "learning_rate": 4.567207395021246e-06, + "loss": 0.9314, + "step": 59790 + }, + { + "epoch": 0.43287223030539934, + "grad_norm": 0.1607762575149536, + "learning_rate": 4.56713500836066e-06, + "loss": 0.9391, + "step": 59800 + }, + { + "epoch": 0.4329446169659855, + "grad_norm": 0.16497263312339783, + "learning_rate": 4.567062621700073e-06, + "loss": 0.9347, + "step": 59810 + }, + { + "epoch": 0.4330170036265717, + "grad_norm": 0.14769291877746582, + "learning_rate": 4.566990235039488e-06, + "loss": 0.9089, + "step": 59820 + }, + { + "epoch": 0.43308939028715787, + "grad_norm": 0.1630723625421524, + "learning_rate": 4.566917848378901e-06, + "loss": 0.9294, + "step": 59830 + }, + { + "epoch": 0.43316177694774405, + "grad_norm": 0.1905062049627304, + "learning_rate": 4.566845461718315e-06, + "loss": 0.9248, + "step": 59840 + }, + { + "epoch": 0.4332341636083303, + "grad_norm": 0.16832837462425232, + "learning_rate": 4.566773075057729e-06, + "loss": 0.9424, + "step": 59850 + }, + { + "epoch": 0.43330655026891646, + "grad_norm": 0.16252058744430542, + "learning_rate": 4.566700688397143e-06, + "loss": 0.9272, + "step": 59860 + }, + { + "epoch": 0.43337893692950263, + "grad_norm": 0.18382105231285095, + "learning_rate": 4.566628301736557e-06, + "loss": 0.9257, + "step": 59870 + }, + { + "epoch": 0.4334513235900888, + "grad_norm": 0.16314184665679932, + "learning_rate": 4.56655591507597e-06, + "loss": 0.9395, + "step": 59880 + }, + { + "epoch": 0.433523710250675, + "grad_norm": 0.17523783445358276, + "learning_rate": 4.566483528415384e-06, + "loss": 0.926, + "step": 59890 + }, + { + "epoch": 0.4335960969112612, + "grad_norm": 0.16567225754261017, + "learning_rate": 4.566411141754798e-06, + "loss": 0.9382, + "step": 59900 + }, + { + "epoch": 0.4336684835718474, + "grad_norm": 0.16518938541412354, + "learning_rate": 4.566338755094212e-06, + "loss": 0.9248, + "step": 59910 + }, + { + "epoch": 0.4337408702324336, + "grad_norm": 0.147177055478096, + "learning_rate": 4.566266368433626e-06, + "loss": 0.9317, + "step": 59920 + }, + { + "epoch": 0.43381325689301975, + "grad_norm": 0.1555219441652298, + "learning_rate": 4.566193981773039e-06, + "loss": 0.9323, + "step": 59930 + }, + { + "epoch": 0.4338856435536059, + "grad_norm": 0.1518564671278, + "learning_rate": 4.566121595112453e-06, + "loss": 0.9413, + "step": 59940 + }, + { + "epoch": 0.4339580302141921, + "grad_norm": 0.1824176162481308, + "learning_rate": 4.566049208451867e-06, + "loss": 0.9254, + "step": 59950 + }, + { + "epoch": 0.43403041687477834, + "grad_norm": 0.16142070293426514, + "learning_rate": 4.565976821791281e-06, + "loss": 0.9313, + "step": 59960 + }, + { + "epoch": 0.4341028035353645, + "grad_norm": 0.1917445957660675, + "learning_rate": 4.5659044351306945e-06, + "loss": 0.9205, + "step": 59970 + }, + { + "epoch": 0.4341751901959507, + "grad_norm": 0.1893223375082016, + "learning_rate": 4.565832048470108e-06, + "loss": 0.932, + "step": 59980 + }, + { + "epoch": 0.43424757685653687, + "grad_norm": 0.16585801541805267, + "learning_rate": 4.565759661809523e-06, + "loss": 0.9321, + "step": 59990 + }, + { + "epoch": 0.43431996351712304, + "grad_norm": 0.1668858528137207, + "learning_rate": 4.565687275148936e-06, + "loss": 0.9322, + "step": 60000 + }, + { + "epoch": 0.4343923501777093, + "grad_norm": 0.16714520752429962, + "learning_rate": 4.56561488848835e-06, + "loss": 0.9363, + "step": 60010 + }, + { + "epoch": 0.43446473683829545, + "grad_norm": 0.5191177129745483, + "learning_rate": 4.5655425018277634e-06, + "loss": 0.9313, + "step": 60020 + }, + { + "epoch": 0.43453712349888163, + "grad_norm": 0.16211098432540894, + "learning_rate": 4.565470115167178e-06, + "loss": 0.9287, + "step": 60030 + }, + { + "epoch": 0.4346095101594678, + "grad_norm": 0.15494702756404877, + "learning_rate": 4.565397728506591e-06, + "loss": 0.9223, + "step": 60040 + }, + { + "epoch": 0.434681896820054, + "grad_norm": 0.1619596928358078, + "learning_rate": 4.565325341846005e-06, + "loss": 0.9234, + "step": 60050 + }, + { + "epoch": 0.4347542834806402, + "grad_norm": 0.1678442656993866, + "learning_rate": 4.565252955185419e-06, + "loss": 0.931, + "step": 60060 + }, + { + "epoch": 0.4348266701412264, + "grad_norm": 0.1571994423866272, + "learning_rate": 4.565180568524832e-06, + "loss": 0.9274, + "step": 60070 + }, + { + "epoch": 0.43489905680181257, + "grad_norm": 0.16465076804161072, + "learning_rate": 4.565108181864246e-06, + "loss": 0.9344, + "step": 60080 + }, + { + "epoch": 0.43497144346239874, + "grad_norm": 0.1529812514781952, + "learning_rate": 4.56503579520366e-06, + "loss": 0.9154, + "step": 60090 + }, + { + "epoch": 0.4350438301229849, + "grad_norm": 0.1658780574798584, + "learning_rate": 4.564963408543074e-06, + "loss": 0.9321, + "step": 60100 + }, + { + "epoch": 0.4351162167835711, + "grad_norm": 0.15867140889167786, + "learning_rate": 4.564891021882488e-06, + "loss": 0.9317, + "step": 60110 + }, + { + "epoch": 0.43518860344415733, + "grad_norm": 0.1777503490447998, + "learning_rate": 4.564818635221901e-06, + "loss": 0.9306, + "step": 60120 + }, + { + "epoch": 0.4352609901047435, + "grad_norm": 0.15686187148094177, + "learning_rate": 4.564746248561315e-06, + "loss": 0.9357, + "step": 60130 + }, + { + "epoch": 0.4353333767653297, + "grad_norm": 0.15787990391254425, + "learning_rate": 4.564673861900729e-06, + "loss": 0.9264, + "step": 60140 + }, + { + "epoch": 0.43540576342591586, + "grad_norm": 0.16923680901527405, + "learning_rate": 4.564601475240143e-06, + "loss": 0.9343, + "step": 60150 + }, + { + "epoch": 0.43547815008650204, + "grad_norm": 0.1720929890871048, + "learning_rate": 4.564529088579557e-06, + "loss": 0.9384, + "step": 60160 + }, + { + "epoch": 0.43555053674708827, + "grad_norm": 0.2149965465068817, + "learning_rate": 4.56445670191897e-06, + "loss": 0.9439, + "step": 60170 + }, + { + "epoch": 0.43562292340767444, + "grad_norm": 0.15889641642570496, + "learning_rate": 4.564384315258385e-06, + "loss": 0.926, + "step": 60180 + }, + { + "epoch": 0.4356953100682606, + "grad_norm": 0.203876331448555, + "learning_rate": 4.564311928597798e-06, + "loss": 0.9298, + "step": 60190 + }, + { + "epoch": 0.4357676967288468, + "grad_norm": 0.15438738465309143, + "learning_rate": 4.564239541937212e-06, + "loss": 0.9273, + "step": 60200 + }, + { + "epoch": 0.435840083389433, + "grad_norm": 0.1764325052499771, + "learning_rate": 4.5641671552766255e-06, + "loss": 0.9513, + "step": 60210 + }, + { + "epoch": 0.4359124700500192, + "grad_norm": 0.1527344286441803, + "learning_rate": 4.56409476861604e-06, + "loss": 0.9259, + "step": 60220 + }, + { + "epoch": 0.4359848567106054, + "grad_norm": 0.15969283878803253, + "learning_rate": 4.564022381955454e-06, + "loss": 0.9343, + "step": 60230 + }, + { + "epoch": 0.43605724337119156, + "grad_norm": 0.1652412861585617, + "learning_rate": 4.563949995294867e-06, + "loss": 0.9318, + "step": 60240 + }, + { + "epoch": 0.43612963003177774, + "grad_norm": 0.1574745625257492, + "learning_rate": 4.563877608634281e-06, + "loss": 0.9204, + "step": 60250 + }, + { + "epoch": 0.4362020166923639, + "grad_norm": 0.16842874884605408, + "learning_rate": 4.563805221973695e-06, + "loss": 0.928, + "step": 60260 + }, + { + "epoch": 0.4362744033529501, + "grad_norm": 0.17006808519363403, + "learning_rate": 4.563732835313109e-06, + "loss": 0.9243, + "step": 60270 + }, + { + "epoch": 0.4363467900135363, + "grad_norm": 0.16430647671222687, + "learning_rate": 4.5636604486525225e-06, + "loss": 0.9332, + "step": 60280 + }, + { + "epoch": 0.4364191766741225, + "grad_norm": 0.15201468765735626, + "learning_rate": 4.563588061991936e-06, + "loss": 0.9301, + "step": 60290 + }, + { + "epoch": 0.4364915633347087, + "grad_norm": 0.15162621438503265, + "learning_rate": 4.563515675331351e-06, + "loss": 0.9387, + "step": 60300 + }, + { + "epoch": 0.43656394999529485, + "grad_norm": 0.16102218627929688, + "learning_rate": 4.563443288670764e-06, + "loss": 0.9418, + "step": 60310 + }, + { + "epoch": 0.43663633665588103, + "grad_norm": 0.1847027838230133, + "learning_rate": 4.563370902010178e-06, + "loss": 0.9279, + "step": 60320 + }, + { + "epoch": 0.43670872331646726, + "grad_norm": 0.18758545815944672, + "learning_rate": 4.5632985153495915e-06, + "loss": 0.9381, + "step": 60330 + }, + { + "epoch": 0.43678110997705344, + "grad_norm": 0.16631188988685608, + "learning_rate": 4.563226128689006e-06, + "loss": 0.933, + "step": 60340 + }, + { + "epoch": 0.4368534966376396, + "grad_norm": 0.1626015156507492, + "learning_rate": 4.5631537420284196e-06, + "loss": 0.9305, + "step": 60350 + }, + { + "epoch": 0.4369258832982258, + "grad_norm": 0.16860458254814148, + "learning_rate": 4.563081355367833e-06, + "loss": 0.9341, + "step": 60360 + }, + { + "epoch": 0.43699826995881197, + "grad_norm": 0.15493610501289368, + "learning_rate": 4.563008968707247e-06, + "loss": 0.9321, + "step": 60370 + }, + { + "epoch": 0.4370706566193982, + "grad_norm": 0.16478878259658813, + "learning_rate": 4.562936582046661e-06, + "loss": 0.9218, + "step": 60380 + }, + { + "epoch": 0.4371430432799844, + "grad_norm": 0.16642285883426666, + "learning_rate": 4.562864195386075e-06, + "loss": 0.9375, + "step": 60390 + }, + { + "epoch": 0.43721542994057055, + "grad_norm": 0.1687694787979126, + "learning_rate": 4.5627918087254885e-06, + "loss": 0.922, + "step": 60400 + }, + { + "epoch": 0.43728781660115673, + "grad_norm": 0.15206386148929596, + "learning_rate": 4.562719422064902e-06, + "loss": 0.9289, + "step": 60410 + }, + { + "epoch": 0.4373602032617429, + "grad_norm": 0.1521196812391281, + "learning_rate": 4.5626470354043166e-06, + "loss": 0.9391, + "step": 60420 + }, + { + "epoch": 0.43743258992232914, + "grad_norm": 0.1528043895959854, + "learning_rate": 4.56257464874373e-06, + "loss": 0.9537, + "step": 60430 + }, + { + "epoch": 0.4375049765829153, + "grad_norm": 0.16489632427692413, + "learning_rate": 4.562502262083144e-06, + "loss": 0.9462, + "step": 60440 + }, + { + "epoch": 0.4375773632435015, + "grad_norm": 0.1611616611480713, + "learning_rate": 4.562429875422557e-06, + "loss": 0.94, + "step": 60450 + }, + { + "epoch": 0.43764974990408767, + "grad_norm": 0.1894521713256836, + "learning_rate": 4.562357488761972e-06, + "loss": 0.937, + "step": 60460 + }, + { + "epoch": 0.43772213656467385, + "grad_norm": 0.1543973833322525, + "learning_rate": 4.5622851021013855e-06, + "loss": 0.9228, + "step": 60470 + }, + { + "epoch": 0.43779452322526, + "grad_norm": 0.15751250088214874, + "learning_rate": 4.562212715440799e-06, + "loss": 0.9269, + "step": 60480 + }, + { + "epoch": 0.43786690988584626, + "grad_norm": 0.15188553929328918, + "learning_rate": 4.562140328780213e-06, + "loss": 0.9333, + "step": 60490 + }, + { + "epoch": 0.43793929654643243, + "grad_norm": 0.1614580750465393, + "learning_rate": 4.562067942119627e-06, + "loss": 0.9432, + "step": 60500 + }, + { + "epoch": 0.4380116832070186, + "grad_norm": 0.18022243678569794, + "learning_rate": 4.561995555459041e-06, + "loss": 0.9232, + "step": 60510 + }, + { + "epoch": 0.4380840698676048, + "grad_norm": 0.16363121569156647, + "learning_rate": 4.561923168798454e-06, + "loss": 0.9374, + "step": 60520 + }, + { + "epoch": 0.43815645652819096, + "grad_norm": 0.16248828172683716, + "learning_rate": 4.561850782137868e-06, + "loss": 0.9284, + "step": 60530 + }, + { + "epoch": 0.4382288431887772, + "grad_norm": 0.20947439968585968, + "learning_rate": 4.5617783954772825e-06, + "loss": 0.9374, + "step": 60540 + }, + { + "epoch": 0.43830122984936337, + "grad_norm": 0.1605253666639328, + "learning_rate": 4.561706008816696e-06, + "loss": 0.9469, + "step": 60550 + }, + { + "epoch": 0.43837361650994955, + "grad_norm": 0.1608964055776596, + "learning_rate": 4.56163362215611e-06, + "loss": 0.9317, + "step": 60560 + }, + { + "epoch": 0.4384460031705357, + "grad_norm": 0.18973439931869507, + "learning_rate": 4.561561235495523e-06, + "loss": 0.9383, + "step": 60570 + }, + { + "epoch": 0.4385183898311219, + "grad_norm": 0.16821689903736115, + "learning_rate": 4.561488848834937e-06, + "loss": 0.9244, + "step": 60580 + }, + { + "epoch": 0.43859077649170813, + "grad_norm": 0.16728858649730682, + "learning_rate": 4.561416462174351e-06, + "loss": 0.9172, + "step": 60590 + }, + { + "epoch": 0.4386631631522943, + "grad_norm": 0.15404008328914642, + "learning_rate": 4.561344075513764e-06, + "loss": 0.9352, + "step": 60600 + }, + { + "epoch": 0.4387355498128805, + "grad_norm": 0.20690277218818665, + "learning_rate": 4.561271688853179e-06, + "loss": 0.9347, + "step": 60610 + }, + { + "epoch": 0.43880793647346666, + "grad_norm": 0.16291332244873047, + "learning_rate": 4.561199302192592e-06, + "loss": 0.9295, + "step": 60620 + }, + { + "epoch": 0.43888032313405284, + "grad_norm": 0.18108299374580383, + "learning_rate": 4.561126915532006e-06, + "loss": 0.9339, + "step": 60630 + }, + { + "epoch": 0.438952709794639, + "grad_norm": 0.15784204006195068, + "learning_rate": 4.5610545288714195e-06, + "loss": 0.9267, + "step": 60640 + }, + { + "epoch": 0.43902509645522525, + "grad_norm": 0.15953408181667328, + "learning_rate": 4.560982142210834e-06, + "loss": 0.9392, + "step": 60650 + }, + { + "epoch": 0.4390974831158114, + "grad_norm": 0.14857307076454163, + "learning_rate": 4.560909755550248e-06, + "loss": 0.9322, + "step": 60660 + }, + { + "epoch": 0.4391698697763976, + "grad_norm": 0.16134792566299438, + "learning_rate": 4.560837368889661e-06, + "loss": 0.9428, + "step": 60670 + }, + { + "epoch": 0.4392422564369838, + "grad_norm": 0.15805503726005554, + "learning_rate": 4.560764982229075e-06, + "loss": 0.9246, + "step": 60680 + }, + { + "epoch": 0.43931464309756996, + "grad_norm": 0.16385342180728912, + "learning_rate": 4.560692595568489e-06, + "loss": 0.9318, + "step": 60690 + }, + { + "epoch": 0.4393870297581562, + "grad_norm": 0.16679242253303528, + "learning_rate": 4.560620208907903e-06, + "loss": 0.915, + "step": 60700 + }, + { + "epoch": 0.43945941641874237, + "grad_norm": 0.15610092878341675, + "learning_rate": 4.5605478222473165e-06, + "loss": 0.926, + "step": 60710 + }, + { + "epoch": 0.43953180307932854, + "grad_norm": 0.16888998448848724, + "learning_rate": 4.56047543558673e-06, + "loss": 0.9093, + "step": 60720 + }, + { + "epoch": 0.4396041897399147, + "grad_norm": 0.31106677651405334, + "learning_rate": 4.560403048926144e-06, + "loss": 0.9279, + "step": 60730 + }, + { + "epoch": 0.4396765764005009, + "grad_norm": 0.16773858666419983, + "learning_rate": 4.560330662265558e-06, + "loss": 0.9385, + "step": 60740 + }, + { + "epoch": 0.43974896306108713, + "grad_norm": 0.1473310887813568, + "learning_rate": 4.560258275604972e-06, + "loss": 0.9347, + "step": 60750 + }, + { + "epoch": 0.4398213497216733, + "grad_norm": 0.17835259437561035, + "learning_rate": 4.5601858889443854e-06, + "loss": 0.9369, + "step": 60760 + }, + { + "epoch": 0.4398937363822595, + "grad_norm": 0.15652461349964142, + "learning_rate": 4.560113502283799e-06, + "loss": 0.9309, + "step": 60770 + }, + { + "epoch": 0.43996612304284566, + "grad_norm": 0.15282343327999115, + "learning_rate": 4.5600411156232135e-06, + "loss": 0.927, + "step": 60780 + }, + { + "epoch": 0.44003850970343183, + "grad_norm": 0.16254651546478271, + "learning_rate": 4.559968728962627e-06, + "loss": 0.9371, + "step": 60790 + }, + { + "epoch": 0.440110896364018, + "grad_norm": 0.16506025195121765, + "learning_rate": 4.559896342302041e-06, + "loss": 0.9312, + "step": 60800 + }, + { + "epoch": 0.44018328302460424, + "grad_norm": 0.16597755253314972, + "learning_rate": 4.559823955641454e-06, + "loss": 0.9336, + "step": 60810 + }, + { + "epoch": 0.4402556696851904, + "grad_norm": 0.15858708322048187, + "learning_rate": 4.559751568980869e-06, + "loss": 0.9463, + "step": 60820 + }, + { + "epoch": 0.4403280563457766, + "grad_norm": 0.16279590129852295, + "learning_rate": 4.5596791823202824e-06, + "loss": 0.9377, + "step": 60830 + }, + { + "epoch": 0.4404004430063628, + "grad_norm": 0.16087251901626587, + "learning_rate": 4.559606795659696e-06, + "loss": 0.9271, + "step": 60840 + }, + { + "epoch": 0.44047282966694895, + "grad_norm": 0.17947107553482056, + "learning_rate": 4.55953440899911e-06, + "loss": 0.9367, + "step": 60850 + }, + { + "epoch": 0.4405452163275352, + "grad_norm": 0.17072343826293945, + "learning_rate": 4.559462022338524e-06, + "loss": 0.9294, + "step": 60860 + }, + { + "epoch": 0.44061760298812136, + "grad_norm": 0.16467486321926117, + "learning_rate": 4.559389635677938e-06, + "loss": 0.9368, + "step": 60870 + }, + { + "epoch": 0.44068998964870754, + "grad_norm": 0.1682046353816986, + "learning_rate": 4.559317249017351e-06, + "loss": 0.9272, + "step": 60880 + }, + { + "epoch": 0.4407623763092937, + "grad_norm": 0.16743090748786926, + "learning_rate": 4.559244862356765e-06, + "loss": 0.9356, + "step": 60890 + }, + { + "epoch": 0.4408347629698799, + "grad_norm": 0.16245803236961365, + "learning_rate": 4.5591724756961795e-06, + "loss": 0.9326, + "step": 60900 + }, + { + "epoch": 0.4409071496304661, + "grad_norm": 0.1696007251739502, + "learning_rate": 4.559100089035593e-06, + "loss": 0.9131, + "step": 60910 + }, + { + "epoch": 0.4409795362910523, + "grad_norm": 0.16754589974880219, + "learning_rate": 4.559027702375007e-06, + "loss": 0.9268, + "step": 60920 + }, + { + "epoch": 0.4410519229516385, + "grad_norm": 0.17982327938079834, + "learning_rate": 4.55895531571442e-06, + "loss": 0.9366, + "step": 60930 + }, + { + "epoch": 0.44112430961222465, + "grad_norm": 0.15991713106632233, + "learning_rate": 4.558882929053835e-06, + "loss": 0.9275, + "step": 60940 + }, + { + "epoch": 0.44119669627281083, + "grad_norm": 0.16579997539520264, + "learning_rate": 4.558810542393248e-06, + "loss": 0.935, + "step": 60950 + }, + { + "epoch": 0.44126908293339706, + "grad_norm": 0.1842290610074997, + "learning_rate": 4.558738155732662e-06, + "loss": 0.9342, + "step": 60960 + }, + { + "epoch": 0.44134146959398324, + "grad_norm": 0.1753285974264145, + "learning_rate": 4.558665769072076e-06, + "loss": 0.9382, + "step": 60970 + }, + { + "epoch": 0.4414138562545694, + "grad_norm": 0.1587180495262146, + "learning_rate": 4.55859338241149e-06, + "loss": 0.9397, + "step": 60980 + }, + { + "epoch": 0.4414862429151556, + "grad_norm": 0.2102600336074829, + "learning_rate": 4.558520995750904e-06, + "loss": 0.9305, + "step": 60990 + }, + { + "epoch": 0.44155862957574177, + "grad_norm": 0.1546296626329422, + "learning_rate": 4.558448609090317e-06, + "loss": 0.9228, + "step": 61000 + }, + { + "epoch": 0.44163101623632794, + "grad_norm": 0.16108326613903046, + "learning_rate": 4.558376222429731e-06, + "loss": 0.94, + "step": 61010 + }, + { + "epoch": 0.4417034028969142, + "grad_norm": 0.17151787877082825, + "learning_rate": 4.558303835769145e-06, + "loss": 0.9354, + "step": 61020 + }, + { + "epoch": 0.44177578955750035, + "grad_norm": 0.17950987815856934, + "learning_rate": 4.558231449108559e-06, + "loss": 0.9282, + "step": 61030 + }, + { + "epoch": 0.44184817621808653, + "grad_norm": 0.16002346575260162, + "learning_rate": 4.558159062447973e-06, + "loss": 0.9411, + "step": 61040 + }, + { + "epoch": 0.4419205628786727, + "grad_norm": 0.15132936835289001, + "learning_rate": 4.558086675787386e-06, + "loss": 0.9273, + "step": 61050 + }, + { + "epoch": 0.4419929495392589, + "grad_norm": 0.19634242355823517, + "learning_rate": 4.558014289126801e-06, + "loss": 0.9273, + "step": 61060 + }, + { + "epoch": 0.4420653361998451, + "grad_norm": 0.17866051197052002, + "learning_rate": 4.557941902466214e-06, + "loss": 0.9362, + "step": 61070 + }, + { + "epoch": 0.4421377228604313, + "grad_norm": 0.16125331819057465, + "learning_rate": 4.557869515805628e-06, + "loss": 0.9297, + "step": 61080 + }, + { + "epoch": 0.44221010952101747, + "grad_norm": 0.16221415996551514, + "learning_rate": 4.5577971291450416e-06, + "loss": 0.925, + "step": 61090 + }, + { + "epoch": 0.44228249618160365, + "grad_norm": 0.1527089923620224, + "learning_rate": 4.557724742484455e-06, + "loss": 0.9194, + "step": 61100 + }, + { + "epoch": 0.4423548828421898, + "grad_norm": 0.17006568610668182, + "learning_rate": 4.557652355823869e-06, + "loss": 0.9305, + "step": 61110 + }, + { + "epoch": 0.44242726950277605, + "grad_norm": 0.1684480905532837, + "learning_rate": 4.557579969163282e-06, + "loss": 0.9277, + "step": 61120 + }, + { + "epoch": 0.44249965616336223, + "grad_norm": 0.16048870980739594, + "learning_rate": 4.557507582502697e-06, + "loss": 0.9333, + "step": 61130 + }, + { + "epoch": 0.4425720428239484, + "grad_norm": 0.16578523814678192, + "learning_rate": 4.5574351958421105e-06, + "loss": 0.9288, + "step": 61140 + }, + { + "epoch": 0.4426444294845346, + "grad_norm": 0.16607344150543213, + "learning_rate": 4.557362809181524e-06, + "loss": 0.9125, + "step": 61150 + }, + { + "epoch": 0.44271681614512076, + "grad_norm": 0.1659523993730545, + "learning_rate": 4.557290422520938e-06, + "loss": 0.9324, + "step": 61160 + }, + { + "epoch": 0.44278920280570694, + "grad_norm": 0.17935402691364288, + "learning_rate": 4.557218035860352e-06, + "loss": 0.9362, + "step": 61170 + }, + { + "epoch": 0.44286158946629317, + "grad_norm": 0.16320429742336273, + "learning_rate": 4.557145649199766e-06, + "loss": 0.9305, + "step": 61180 + }, + { + "epoch": 0.44293397612687935, + "grad_norm": 0.15417033433914185, + "learning_rate": 4.557073262539179e-06, + "loss": 0.9378, + "step": 61190 + }, + { + "epoch": 0.4430063627874655, + "grad_norm": 0.14864222705364227, + "learning_rate": 4.557000875878593e-06, + "loss": 0.9401, + "step": 61200 + }, + { + "epoch": 0.4430787494480517, + "grad_norm": 0.189174622297287, + "learning_rate": 4.5569284892180075e-06, + "loss": 0.9357, + "step": 61210 + }, + { + "epoch": 0.4431511361086379, + "grad_norm": 0.15933531522750854, + "learning_rate": 4.556856102557421e-06, + "loss": 0.9296, + "step": 61220 + }, + { + "epoch": 0.4432235227692241, + "grad_norm": 0.15585920214653015, + "learning_rate": 4.556783715896835e-06, + "loss": 0.9277, + "step": 61230 + }, + { + "epoch": 0.4432959094298103, + "grad_norm": 0.17847231030464172, + "learning_rate": 4.556711329236248e-06, + "loss": 0.9382, + "step": 61240 + }, + { + "epoch": 0.44336829609039646, + "grad_norm": 0.15778236091136932, + "learning_rate": 4.556638942575663e-06, + "loss": 0.9191, + "step": 61250 + }, + { + "epoch": 0.44344068275098264, + "grad_norm": 0.15541084110736847, + "learning_rate": 4.556566555915076e-06, + "loss": 0.9389, + "step": 61260 + }, + { + "epoch": 0.4435130694115688, + "grad_norm": 0.17525739967823029, + "learning_rate": 4.55649416925449e-06, + "loss": 0.9447, + "step": 61270 + }, + { + "epoch": 0.44358545607215505, + "grad_norm": 0.15737952291965485, + "learning_rate": 4.556421782593904e-06, + "loss": 0.9284, + "step": 61280 + }, + { + "epoch": 0.4436578427327412, + "grad_norm": 0.16541706025600433, + "learning_rate": 4.556349395933318e-06, + "loss": 0.9284, + "step": 61290 + }, + { + "epoch": 0.4437302293933274, + "grad_norm": 0.1597539633512497, + "learning_rate": 4.556277009272732e-06, + "loss": 0.9297, + "step": 61300 + }, + { + "epoch": 0.4438026160539136, + "grad_norm": 0.15804599225521088, + "learning_rate": 4.556204622612145e-06, + "loss": 0.9233, + "step": 61310 + }, + { + "epoch": 0.44387500271449976, + "grad_norm": 0.1523711085319519, + "learning_rate": 4.556132235951559e-06, + "loss": 0.9259, + "step": 61320 + }, + { + "epoch": 0.44394738937508593, + "grad_norm": 0.18194106221199036, + "learning_rate": 4.556059849290973e-06, + "loss": 0.937, + "step": 61330 + }, + { + "epoch": 0.44401977603567216, + "grad_norm": 0.15899352729320526, + "learning_rate": 4.555987462630387e-06, + "loss": 0.9282, + "step": 61340 + }, + { + "epoch": 0.44409216269625834, + "grad_norm": 0.14628228545188904, + "learning_rate": 4.555915075969801e-06, + "loss": 0.9209, + "step": 61350 + }, + { + "epoch": 0.4441645493568445, + "grad_norm": 0.15761525928974152, + "learning_rate": 4.555842689309214e-06, + "loss": 0.9334, + "step": 61360 + }, + { + "epoch": 0.4442369360174307, + "grad_norm": 0.1836678832769394, + "learning_rate": 4.555770302648628e-06, + "loss": 0.9142, + "step": 61370 + }, + { + "epoch": 0.44430932267801687, + "grad_norm": 0.1548013538122177, + "learning_rate": 4.555697915988042e-06, + "loss": 0.9247, + "step": 61380 + }, + { + "epoch": 0.4443817093386031, + "grad_norm": 0.16135182976722717, + "learning_rate": 4.555625529327456e-06, + "loss": 0.9311, + "step": 61390 + }, + { + "epoch": 0.4444540959991893, + "grad_norm": 0.1575031578540802, + "learning_rate": 4.55555314266687e-06, + "loss": 0.9356, + "step": 61400 + }, + { + "epoch": 0.44452648265977546, + "grad_norm": 0.170990988612175, + "learning_rate": 4.555480756006283e-06, + "loss": 0.9471, + "step": 61410 + }, + { + "epoch": 0.44459886932036163, + "grad_norm": 0.157973051071167, + "learning_rate": 4.555408369345698e-06, + "loss": 0.9324, + "step": 61420 + }, + { + "epoch": 0.4446712559809478, + "grad_norm": 0.15711332857608795, + "learning_rate": 4.555335982685111e-06, + "loss": 0.9344, + "step": 61430 + }, + { + "epoch": 0.44474364264153404, + "grad_norm": 0.16744282841682434, + "learning_rate": 4.555263596024525e-06, + "loss": 0.9255, + "step": 61440 + }, + { + "epoch": 0.4448160293021202, + "grad_norm": 0.16048872470855713, + "learning_rate": 4.5551912093639385e-06, + "loss": 0.9338, + "step": 61450 + }, + { + "epoch": 0.4448884159627064, + "grad_norm": 0.17134064435958862, + "learning_rate": 4.555118822703353e-06, + "loss": 0.9264, + "step": 61460 + }, + { + "epoch": 0.4449608026232926, + "grad_norm": 0.15895238518714905, + "learning_rate": 4.555046436042767e-06, + "loss": 0.9251, + "step": 61470 + }, + { + "epoch": 0.44503318928387875, + "grad_norm": 0.16355475783348083, + "learning_rate": 4.55497404938218e-06, + "loss": 0.9235, + "step": 61480 + }, + { + "epoch": 0.445105575944465, + "grad_norm": 0.15274769067764282, + "learning_rate": 4.554901662721594e-06, + "loss": 0.9244, + "step": 61490 + }, + { + "epoch": 0.44517796260505116, + "grad_norm": 0.24952305853366852, + "learning_rate": 4.554829276061008e-06, + "loss": 0.9222, + "step": 61500 + }, + { + "epoch": 0.44525034926563734, + "grad_norm": 0.17668789625167847, + "learning_rate": 4.554756889400422e-06, + "loss": 0.9155, + "step": 61510 + }, + { + "epoch": 0.4453227359262235, + "grad_norm": 0.22048163414001465, + "learning_rate": 4.5546845027398355e-06, + "loss": 0.921, + "step": 61520 + }, + { + "epoch": 0.4453951225868097, + "grad_norm": 0.1538519263267517, + "learning_rate": 4.554612116079249e-06, + "loss": 0.9302, + "step": 61530 + }, + { + "epoch": 0.44546750924739587, + "grad_norm": 0.1744624376296997, + "learning_rate": 4.554539729418664e-06, + "loss": 0.9266, + "step": 61540 + }, + { + "epoch": 0.4455398959079821, + "grad_norm": 0.17444263398647308, + "learning_rate": 4.554467342758077e-06, + "loss": 0.9306, + "step": 61550 + }, + { + "epoch": 0.4456122825685683, + "grad_norm": 0.16291213035583496, + "learning_rate": 4.554394956097491e-06, + "loss": 0.9388, + "step": 61560 + }, + { + "epoch": 0.44568466922915445, + "grad_norm": 0.16425669193267822, + "learning_rate": 4.5543225694369044e-06, + "loss": 0.9165, + "step": 61570 + }, + { + "epoch": 0.4457570558897406, + "grad_norm": 0.16991227865219116, + "learning_rate": 4.554250182776319e-06, + "loss": 0.9208, + "step": 61580 + }, + { + "epoch": 0.4458294425503268, + "grad_norm": 0.17110396921634674, + "learning_rate": 4.5541777961157325e-06, + "loss": 0.9288, + "step": 61590 + }, + { + "epoch": 0.44590182921091304, + "grad_norm": 0.17774710059165955, + "learning_rate": 4.554105409455146e-06, + "loss": 0.9334, + "step": 61600 + }, + { + "epoch": 0.4459742158714992, + "grad_norm": 0.18292616307735443, + "learning_rate": 4.55403302279456e-06, + "loss": 0.9189, + "step": 61610 + }, + { + "epoch": 0.4460466025320854, + "grad_norm": 0.15850049257278442, + "learning_rate": 4.553960636133974e-06, + "loss": 0.9296, + "step": 61620 + }, + { + "epoch": 0.44611898919267157, + "grad_norm": 0.18073786795139313, + "learning_rate": 4.553888249473387e-06, + "loss": 0.9159, + "step": 61630 + }, + { + "epoch": 0.44619137585325774, + "grad_norm": 0.15304653346538544, + "learning_rate": 4.553815862812801e-06, + "loss": 0.9312, + "step": 61640 + }, + { + "epoch": 0.446263762513844, + "grad_norm": 0.18145766854286194, + "learning_rate": 4.553743476152215e-06, + "loss": 0.9203, + "step": 61650 + }, + { + "epoch": 0.44633614917443015, + "grad_norm": 0.16524021327495575, + "learning_rate": 4.553671089491629e-06, + "loss": 0.9272, + "step": 61660 + }, + { + "epoch": 0.44640853583501633, + "grad_norm": 0.17342203855514526, + "learning_rate": 4.553598702831042e-06, + "loss": 0.9182, + "step": 61670 + }, + { + "epoch": 0.4464809224956025, + "grad_norm": 0.165330708026886, + "learning_rate": 4.553526316170456e-06, + "loss": 0.9312, + "step": 61680 + }, + { + "epoch": 0.4465533091561887, + "grad_norm": 0.166887104511261, + "learning_rate": 4.55345392950987e-06, + "loss": 0.9259, + "step": 61690 + }, + { + "epoch": 0.44662569581677486, + "grad_norm": 0.16618217527866364, + "learning_rate": 4.553381542849284e-06, + "loss": 0.9182, + "step": 61700 + }, + { + "epoch": 0.4466980824773611, + "grad_norm": 0.15841081738471985, + "learning_rate": 4.553309156188698e-06, + "loss": 0.9348, + "step": 61710 + }, + { + "epoch": 0.44677046913794727, + "grad_norm": 0.15774931013584137, + "learning_rate": 4.553236769528111e-06, + "loss": 0.9285, + "step": 61720 + }, + { + "epoch": 0.44684285579853344, + "grad_norm": 0.16773931682109833, + "learning_rate": 4.553164382867526e-06, + "loss": 0.9249, + "step": 61730 + }, + { + "epoch": 0.4469152424591196, + "grad_norm": 0.16781432926654816, + "learning_rate": 4.553091996206939e-06, + "loss": 0.9372, + "step": 61740 + }, + { + "epoch": 0.4469876291197058, + "grad_norm": 0.15466833114624023, + "learning_rate": 4.553019609546353e-06, + "loss": 0.9349, + "step": 61750 + }, + { + "epoch": 0.44706001578029203, + "grad_norm": 0.1791720688343048, + "learning_rate": 4.5529472228857665e-06, + "loss": 0.9285, + "step": 61760 + }, + { + "epoch": 0.4471324024408782, + "grad_norm": 0.15556438267230988, + "learning_rate": 4.552874836225181e-06, + "loss": 0.9269, + "step": 61770 + }, + { + "epoch": 0.4472047891014644, + "grad_norm": 0.1610628068447113, + "learning_rate": 4.552802449564595e-06, + "loss": 0.919, + "step": 61780 + }, + { + "epoch": 0.44727717576205056, + "grad_norm": 0.16492611169815063, + "learning_rate": 4.552730062904008e-06, + "loss": 0.9167, + "step": 61790 + }, + { + "epoch": 0.44734956242263674, + "grad_norm": 0.15720947086811066, + "learning_rate": 4.552657676243422e-06, + "loss": 0.9226, + "step": 61800 + }, + { + "epoch": 0.44742194908322297, + "grad_norm": 0.1607959121465683, + "learning_rate": 4.552585289582836e-06, + "loss": 0.9386, + "step": 61810 + }, + { + "epoch": 0.44749433574380915, + "grad_norm": 0.19096055626869202, + "learning_rate": 4.55251290292225e-06, + "loss": 0.9261, + "step": 61820 + }, + { + "epoch": 0.4475667224043953, + "grad_norm": 0.1570555865764618, + "learning_rate": 4.5524405162616636e-06, + "loss": 0.9382, + "step": 61830 + }, + { + "epoch": 0.4476391090649815, + "grad_norm": 0.16010279953479767, + "learning_rate": 4.552368129601077e-06, + "loss": 0.9222, + "step": 61840 + }, + { + "epoch": 0.4477114957255677, + "grad_norm": 0.1684756577014923, + "learning_rate": 4.552295742940492e-06, + "loss": 0.9308, + "step": 61850 + }, + { + "epoch": 0.44778388238615385, + "grad_norm": 0.15752823650836945, + "learning_rate": 4.552223356279905e-06, + "loss": 0.9248, + "step": 61860 + }, + { + "epoch": 0.4478562690467401, + "grad_norm": 0.15489928424358368, + "learning_rate": 4.552150969619319e-06, + "loss": 0.9236, + "step": 61870 + }, + { + "epoch": 0.44792865570732626, + "grad_norm": 0.1638929694890976, + "learning_rate": 4.5520785829587325e-06, + "loss": 0.9329, + "step": 61880 + }, + { + "epoch": 0.44800104236791244, + "grad_norm": 0.15785187482833862, + "learning_rate": 4.552006196298147e-06, + "loss": 0.9346, + "step": 61890 + }, + { + "epoch": 0.4480734290284986, + "grad_norm": 0.16453564167022705, + "learning_rate": 4.5519338096375606e-06, + "loss": 0.9417, + "step": 61900 + }, + { + "epoch": 0.4481458156890848, + "grad_norm": 0.15828080475330353, + "learning_rate": 4.551861422976974e-06, + "loss": 0.9302, + "step": 61910 + }, + { + "epoch": 0.448218202349671, + "grad_norm": 0.1843886375427246, + "learning_rate": 4.551789036316388e-06, + "loss": 0.9294, + "step": 61920 + }, + { + "epoch": 0.4482905890102572, + "grad_norm": 0.17021195590496063, + "learning_rate": 4.551716649655802e-06, + "loss": 0.9208, + "step": 61930 + }, + { + "epoch": 0.4483629756708434, + "grad_norm": 0.16324369609355927, + "learning_rate": 4.551644262995216e-06, + "loss": 0.9285, + "step": 61940 + }, + { + "epoch": 0.44843536233142955, + "grad_norm": 0.16327841579914093, + "learning_rate": 4.5515718763346295e-06, + "loss": 0.9336, + "step": 61950 + }, + { + "epoch": 0.44850774899201573, + "grad_norm": 0.16559500992298126, + "learning_rate": 4.551499489674043e-06, + "loss": 0.9416, + "step": 61960 + }, + { + "epoch": 0.44858013565260196, + "grad_norm": 0.16372480988502502, + "learning_rate": 4.551427103013457e-06, + "loss": 0.9315, + "step": 61970 + }, + { + "epoch": 0.44865252231318814, + "grad_norm": 0.16200987994670868, + "learning_rate": 4.551354716352871e-06, + "loss": 0.9153, + "step": 61980 + }, + { + "epoch": 0.4487249089737743, + "grad_norm": 0.17236877977848053, + "learning_rate": 4.551282329692285e-06, + "loss": 0.9357, + "step": 61990 + }, + { + "epoch": 0.4487972956343605, + "grad_norm": 0.22826656699180603, + "learning_rate": 4.551209943031698e-06, + "loss": 0.9265, + "step": 62000 + }, + { + "epoch": 0.44886968229494667, + "grad_norm": 0.1597038060426712, + "learning_rate": 4.551137556371112e-06, + "loss": 0.9292, + "step": 62010 + }, + { + "epoch": 0.44894206895553285, + "grad_norm": 0.15479369461536407, + "learning_rate": 4.5510651697105265e-06, + "loss": 0.9452, + "step": 62020 + }, + { + "epoch": 0.4490144556161191, + "grad_norm": 0.1617387980222702, + "learning_rate": 4.55099278304994e-06, + "loss": 0.9311, + "step": 62030 + }, + { + "epoch": 0.44908684227670526, + "grad_norm": 0.17810650169849396, + "learning_rate": 4.550920396389354e-06, + "loss": 0.9337, + "step": 62040 + }, + { + "epoch": 0.44915922893729143, + "grad_norm": 0.15863506495952606, + "learning_rate": 4.550848009728767e-06, + "loss": 0.9391, + "step": 62050 + }, + { + "epoch": 0.4492316155978776, + "grad_norm": 0.18483230471611023, + "learning_rate": 4.550775623068182e-06, + "loss": 0.924, + "step": 62060 + }, + { + "epoch": 0.4493040022584638, + "grad_norm": 0.15691833198070526, + "learning_rate": 4.550703236407595e-06, + "loss": 0.9286, + "step": 62070 + }, + { + "epoch": 0.44937638891905, + "grad_norm": 0.15761037170886993, + "learning_rate": 4.550630849747009e-06, + "loss": 0.9414, + "step": 62080 + }, + { + "epoch": 0.4494487755796362, + "grad_norm": 0.15282666683197021, + "learning_rate": 4.550558463086423e-06, + "loss": 0.931, + "step": 62090 + }, + { + "epoch": 0.44952116224022237, + "grad_norm": 0.1992768943309784, + "learning_rate": 4.550486076425837e-06, + "loss": 0.938, + "step": 62100 + }, + { + "epoch": 0.44959354890080855, + "grad_norm": 0.15177763998508453, + "learning_rate": 4.550413689765251e-06, + "loss": 0.9436, + "step": 62110 + }, + { + "epoch": 0.4496659355613947, + "grad_norm": 0.16348634660243988, + "learning_rate": 4.550341303104664e-06, + "loss": 0.9394, + "step": 62120 + }, + { + "epoch": 0.44973832222198096, + "grad_norm": 0.18338032066822052, + "learning_rate": 4.550268916444078e-06, + "loss": 0.9319, + "step": 62130 + }, + { + "epoch": 0.44981070888256713, + "grad_norm": 0.22670073807239532, + "learning_rate": 4.5501965297834924e-06, + "loss": 0.936, + "step": 62140 + }, + { + "epoch": 0.4498830955431533, + "grad_norm": 0.17168883979320526, + "learning_rate": 4.550124143122906e-06, + "loss": 0.9335, + "step": 62150 + }, + { + "epoch": 0.4499554822037395, + "grad_norm": 0.16021141409873962, + "learning_rate": 4.550051756462319e-06, + "loss": 0.9373, + "step": 62160 + }, + { + "epoch": 0.45002786886432566, + "grad_norm": 0.1671023815870285, + "learning_rate": 4.549979369801733e-06, + "loss": 0.927, + "step": 62170 + }, + { + "epoch": 0.4501002555249119, + "grad_norm": 0.15953001379966736, + "learning_rate": 4.549906983141147e-06, + "loss": 0.9296, + "step": 62180 + }, + { + "epoch": 0.4501726421854981, + "grad_norm": 0.19120389223098755, + "learning_rate": 4.5498345964805605e-06, + "loss": 0.9239, + "step": 62190 + }, + { + "epoch": 0.45024502884608425, + "grad_norm": 0.1692996323108673, + "learning_rate": 4.549762209819974e-06, + "loss": 0.9201, + "step": 62200 + }, + { + "epoch": 0.4503174155066704, + "grad_norm": 0.15161260962486267, + "learning_rate": 4.549689823159389e-06, + "loss": 0.933, + "step": 62210 + }, + { + "epoch": 0.4503898021672566, + "grad_norm": 0.1791296750307083, + "learning_rate": 4.549617436498802e-06, + "loss": 0.9206, + "step": 62220 + }, + { + "epoch": 0.4504621888278428, + "grad_norm": 0.16728129982948303, + "learning_rate": 4.549545049838216e-06, + "loss": 0.9243, + "step": 62230 + }, + { + "epoch": 0.450534575488429, + "grad_norm": 0.15448437631130219, + "learning_rate": 4.5494726631776294e-06, + "loss": 0.922, + "step": 62240 + }, + { + "epoch": 0.4506069621490152, + "grad_norm": 0.17442266643047333, + "learning_rate": 4.549400276517044e-06, + "loss": 0.9266, + "step": 62250 + }, + { + "epoch": 0.45067934880960137, + "grad_norm": 0.16572901606559753, + "learning_rate": 4.5493278898564575e-06, + "loss": 0.9262, + "step": 62260 + }, + { + "epoch": 0.45075173547018754, + "grad_norm": 0.19222629070281982, + "learning_rate": 4.549255503195871e-06, + "loss": 0.9271, + "step": 62270 + }, + { + "epoch": 0.4508241221307737, + "grad_norm": 0.180272176861763, + "learning_rate": 4.549183116535285e-06, + "loss": 0.9252, + "step": 62280 + }, + { + "epoch": 0.45089650879135995, + "grad_norm": 0.1528458446264267, + "learning_rate": 4.549110729874699e-06, + "loss": 0.916, + "step": 62290 + }, + { + "epoch": 0.45096889545194613, + "grad_norm": 0.16604572534561157, + "learning_rate": 4.549038343214113e-06, + "loss": 0.9209, + "step": 62300 + }, + { + "epoch": 0.4510412821125323, + "grad_norm": 0.1774618774652481, + "learning_rate": 4.5489659565535264e-06, + "loss": 0.9266, + "step": 62310 + }, + { + "epoch": 0.4511136687731185, + "grad_norm": 0.16191911697387695, + "learning_rate": 4.54889356989294e-06, + "loss": 0.9276, + "step": 62320 + }, + { + "epoch": 0.45118605543370466, + "grad_norm": 0.18299001455307007, + "learning_rate": 4.5488211832323545e-06, + "loss": 0.9334, + "step": 62330 + }, + { + "epoch": 0.4512584420942909, + "grad_norm": 0.14922909438610077, + "learning_rate": 4.548748796571768e-06, + "loss": 0.9232, + "step": 62340 + }, + { + "epoch": 0.45133082875487707, + "grad_norm": 0.14895036816596985, + "learning_rate": 4.548676409911182e-06, + "loss": 0.9299, + "step": 62350 + }, + { + "epoch": 0.45140321541546324, + "grad_norm": 0.15128493309020996, + "learning_rate": 4.548604023250595e-06, + "loss": 0.9409, + "step": 62360 + }, + { + "epoch": 0.4514756020760494, + "grad_norm": 0.1607513725757599, + "learning_rate": 4.54853163659001e-06, + "loss": 0.9143, + "step": 62370 + }, + { + "epoch": 0.4515479887366356, + "grad_norm": 0.1618734747171402, + "learning_rate": 4.5484592499294235e-06, + "loss": 0.9249, + "step": 62380 + }, + { + "epoch": 0.4516203753972218, + "grad_norm": 0.18562345206737518, + "learning_rate": 4.548386863268837e-06, + "loss": 0.9292, + "step": 62390 + }, + { + "epoch": 0.451692762057808, + "grad_norm": 0.17333509027957916, + "learning_rate": 4.548314476608251e-06, + "loss": 0.929, + "step": 62400 + }, + { + "epoch": 0.4517651487183942, + "grad_norm": 0.1685020625591278, + "learning_rate": 4.548242089947665e-06, + "loss": 0.9247, + "step": 62410 + }, + { + "epoch": 0.45183753537898036, + "grad_norm": 0.16872678697109222, + "learning_rate": 4.548169703287079e-06, + "loss": 0.9225, + "step": 62420 + }, + { + "epoch": 0.45190992203956654, + "grad_norm": 0.15158583223819733, + "learning_rate": 4.548097316626492e-06, + "loss": 0.9277, + "step": 62430 + }, + { + "epoch": 0.4519823087001527, + "grad_norm": 0.18446789681911469, + "learning_rate": 4.548024929965906e-06, + "loss": 0.9227, + "step": 62440 + }, + { + "epoch": 0.45205469536073895, + "grad_norm": 0.17495040595531464, + "learning_rate": 4.5479525433053205e-06, + "loss": 0.9362, + "step": 62450 + }, + { + "epoch": 0.4521270820213251, + "grad_norm": 0.25016602873802185, + "learning_rate": 4.547880156644734e-06, + "loss": 0.9354, + "step": 62460 + }, + { + "epoch": 0.4521994686819113, + "grad_norm": 0.1564486026763916, + "learning_rate": 4.547807769984148e-06, + "loss": 0.9244, + "step": 62470 + }, + { + "epoch": 0.4522718553424975, + "grad_norm": 0.1539490669965744, + "learning_rate": 4.547735383323561e-06, + "loss": 0.9209, + "step": 62480 + }, + { + "epoch": 0.45234424200308365, + "grad_norm": 0.17183518409729004, + "learning_rate": 4.547662996662976e-06, + "loss": 0.9361, + "step": 62490 + }, + { + "epoch": 0.4524166286636699, + "grad_norm": 0.17086255550384521, + "learning_rate": 4.547590610002389e-06, + "loss": 0.9142, + "step": 62500 + }, + { + "epoch": 0.45248901532425606, + "grad_norm": 0.16200555860996246, + "learning_rate": 4.547518223341803e-06, + "loss": 0.942, + "step": 62510 + }, + { + "epoch": 0.45256140198484224, + "grad_norm": 0.1565650850534439, + "learning_rate": 4.547445836681217e-06, + "loss": 0.938, + "step": 62520 + }, + { + "epoch": 0.4526337886454284, + "grad_norm": 0.17040525376796722, + "learning_rate": 4.547373450020631e-06, + "loss": 0.9279, + "step": 62530 + }, + { + "epoch": 0.4527061753060146, + "grad_norm": 0.17260707914829254, + "learning_rate": 4.547301063360045e-06, + "loss": 0.9246, + "step": 62540 + }, + { + "epoch": 0.45277856196660077, + "grad_norm": 0.16091448068618774, + "learning_rate": 4.547228676699458e-06, + "loss": 0.9301, + "step": 62550 + }, + { + "epoch": 0.452850948627187, + "grad_norm": 0.15594513714313507, + "learning_rate": 4.547156290038872e-06, + "loss": 0.9388, + "step": 62560 + }, + { + "epoch": 0.4529233352877732, + "grad_norm": 0.17561206221580505, + "learning_rate": 4.547083903378286e-06, + "loss": 0.9311, + "step": 62570 + }, + { + "epoch": 0.45299572194835935, + "grad_norm": 0.1688334196805954, + "learning_rate": 4.5470115167177e-06, + "loss": 0.9374, + "step": 62580 + }, + { + "epoch": 0.45306810860894553, + "grad_norm": 0.26716697216033936, + "learning_rate": 4.546939130057114e-06, + "loss": 0.9332, + "step": 62590 + }, + { + "epoch": 0.4531404952695317, + "grad_norm": 0.16558301448822021, + "learning_rate": 4.546866743396527e-06, + "loss": 0.9361, + "step": 62600 + }, + { + "epoch": 0.45321288193011794, + "grad_norm": 0.16215580701828003, + "learning_rate": 4.546794356735941e-06, + "loss": 0.944, + "step": 62610 + }, + { + "epoch": 0.4532852685907041, + "grad_norm": 0.17625215649604797, + "learning_rate": 4.546721970075355e-06, + "loss": 0.9408, + "step": 62620 + }, + { + "epoch": 0.4533576552512903, + "grad_norm": 0.1480870544910431, + "learning_rate": 4.546649583414769e-06, + "loss": 0.929, + "step": 62630 + }, + { + "epoch": 0.45343004191187647, + "grad_norm": 0.17280828952789307, + "learning_rate": 4.5465771967541826e-06, + "loss": 0.9177, + "step": 62640 + }, + { + "epoch": 0.45350242857246265, + "grad_norm": 0.1757204234600067, + "learning_rate": 4.546504810093596e-06, + "loss": 0.9396, + "step": 62650 + }, + { + "epoch": 0.4535748152330489, + "grad_norm": 0.16843682527542114, + "learning_rate": 4.546432423433011e-06, + "loss": 0.9311, + "step": 62660 + }, + { + "epoch": 0.45364720189363505, + "grad_norm": 0.14993290603160858, + "learning_rate": 4.546360036772424e-06, + "loss": 0.9257, + "step": 62670 + }, + { + "epoch": 0.45371958855422123, + "grad_norm": 0.1843591332435608, + "learning_rate": 4.546287650111838e-06, + "loss": 0.9305, + "step": 62680 + }, + { + "epoch": 0.4537919752148074, + "grad_norm": 0.1545378565788269, + "learning_rate": 4.5462152634512515e-06, + "loss": 0.93, + "step": 62690 + }, + { + "epoch": 0.4538643618753936, + "grad_norm": 0.1681130826473236, + "learning_rate": 4.546142876790665e-06, + "loss": 0.9191, + "step": 62700 + }, + { + "epoch": 0.4539367485359798, + "grad_norm": 0.15913967788219452, + "learning_rate": 4.546070490130079e-06, + "loss": 0.9356, + "step": 62710 + }, + { + "epoch": 0.454009135196566, + "grad_norm": 0.16639158129692078, + "learning_rate": 4.545998103469493e-06, + "loss": 0.9405, + "step": 62720 + }, + { + "epoch": 0.45408152185715217, + "grad_norm": 0.1884455382823944, + "learning_rate": 4.545925716808907e-06, + "loss": 0.931, + "step": 62730 + }, + { + "epoch": 0.45415390851773835, + "grad_norm": 0.15364967286586761, + "learning_rate": 4.54585333014832e-06, + "loss": 0.9278, + "step": 62740 + }, + { + "epoch": 0.4542262951783245, + "grad_norm": 0.15758123993873596, + "learning_rate": 4.545780943487734e-06, + "loss": 0.9337, + "step": 62750 + }, + { + "epoch": 0.4542986818389107, + "grad_norm": 0.16586291790008545, + "learning_rate": 4.545708556827148e-06, + "loss": 0.9298, + "step": 62760 + }, + { + "epoch": 0.45437106849949693, + "grad_norm": 0.15221761167049408, + "learning_rate": 4.545636170166562e-06, + "loss": 0.9273, + "step": 62770 + }, + { + "epoch": 0.4544434551600831, + "grad_norm": 0.16838103532791138, + "learning_rate": 4.545563783505976e-06, + "loss": 0.9418, + "step": 62780 + }, + { + "epoch": 0.4545158418206693, + "grad_norm": 0.15279430150985718, + "learning_rate": 4.545491396845389e-06, + "loss": 0.9251, + "step": 62790 + }, + { + "epoch": 0.45458822848125546, + "grad_norm": 0.161879301071167, + "learning_rate": 4.545419010184803e-06, + "loss": 0.9355, + "step": 62800 + }, + { + "epoch": 0.45466061514184164, + "grad_norm": 0.15122537314891815, + "learning_rate": 4.545346623524217e-06, + "loss": 0.9282, + "step": 62810 + }, + { + "epoch": 0.45473300180242787, + "grad_norm": 0.1683684140443802, + "learning_rate": 4.545274236863631e-06, + "loss": 0.9341, + "step": 62820 + }, + { + "epoch": 0.45480538846301405, + "grad_norm": 0.16994281113147736, + "learning_rate": 4.545201850203045e-06, + "loss": 0.9273, + "step": 62830 + }, + { + "epoch": 0.4548777751236002, + "grad_norm": 0.15986701846122742, + "learning_rate": 4.545129463542458e-06, + "loss": 0.9214, + "step": 62840 + }, + { + "epoch": 0.4549501617841864, + "grad_norm": 0.1651432365179062, + "learning_rate": 4.545057076881873e-06, + "loss": 0.932, + "step": 62850 + }, + { + "epoch": 0.4550225484447726, + "grad_norm": 0.1712128072977066, + "learning_rate": 4.544984690221286e-06, + "loss": 0.9326, + "step": 62860 + }, + { + "epoch": 0.4550949351053588, + "grad_norm": 0.17609210312366486, + "learning_rate": 4.5449123035607e-06, + "loss": 0.938, + "step": 62870 + }, + { + "epoch": 0.455167321765945, + "grad_norm": 0.17556455731391907, + "learning_rate": 4.544839916900114e-06, + "loss": 0.9311, + "step": 62880 + }, + { + "epoch": 0.45523970842653116, + "grad_norm": 0.1542162448167801, + "learning_rate": 4.544767530239528e-06, + "loss": 0.9262, + "step": 62890 + }, + { + "epoch": 0.45531209508711734, + "grad_norm": 0.21663711965084076, + "learning_rate": 4.544695143578942e-06, + "loss": 0.9301, + "step": 62900 + }, + { + "epoch": 0.4553844817477035, + "grad_norm": 0.1632581651210785, + "learning_rate": 4.544622756918355e-06, + "loss": 0.942, + "step": 62910 + }, + { + "epoch": 0.4554568684082897, + "grad_norm": 0.1683775782585144, + "learning_rate": 4.544550370257769e-06, + "loss": 0.9313, + "step": 62920 + }, + { + "epoch": 0.4555292550688759, + "grad_norm": 0.16265662014484406, + "learning_rate": 4.544477983597183e-06, + "loss": 0.9255, + "step": 62930 + }, + { + "epoch": 0.4556016417294621, + "grad_norm": 0.16888990998268127, + "learning_rate": 4.544405596936597e-06, + "loss": 0.9327, + "step": 62940 + }, + { + "epoch": 0.4556740283900483, + "grad_norm": 0.1617661714553833, + "learning_rate": 4.544333210276011e-06, + "loss": 0.9365, + "step": 62950 + }, + { + "epoch": 0.45574641505063446, + "grad_norm": 0.15065929293632507, + "learning_rate": 4.544260823615424e-06, + "loss": 0.9411, + "step": 62960 + }, + { + "epoch": 0.45581880171122063, + "grad_norm": 0.15729419887065887, + "learning_rate": 4.544188436954839e-06, + "loss": 0.9305, + "step": 62970 + }, + { + "epoch": 0.45589118837180687, + "grad_norm": 0.1598915308713913, + "learning_rate": 4.544116050294252e-06, + "loss": 0.9236, + "step": 62980 + }, + { + "epoch": 0.45596357503239304, + "grad_norm": 0.1573273241519928, + "learning_rate": 4.544043663633666e-06, + "loss": 0.9221, + "step": 62990 + }, + { + "epoch": 0.4560359616929792, + "grad_norm": 0.17503662407398224, + "learning_rate": 4.5439712769730795e-06, + "loss": 0.9322, + "step": 63000 + }, + { + "epoch": 0.4561083483535654, + "grad_norm": 0.16443364322185516, + "learning_rate": 4.543898890312494e-06, + "loss": 0.9312, + "step": 63010 + }, + { + "epoch": 0.4561807350141516, + "grad_norm": 0.15545983612537384, + "learning_rate": 4.543826503651908e-06, + "loss": 0.9267, + "step": 63020 + }, + { + "epoch": 0.4562531216747378, + "grad_norm": 0.25189727544784546, + "learning_rate": 4.543754116991321e-06, + "loss": 0.919, + "step": 63030 + }, + { + "epoch": 0.456325508335324, + "grad_norm": 0.17259828746318817, + "learning_rate": 4.543681730330735e-06, + "loss": 0.9431, + "step": 63040 + }, + { + "epoch": 0.45639789499591016, + "grad_norm": 0.1540062129497528, + "learning_rate": 4.543609343670149e-06, + "loss": 0.9239, + "step": 63050 + }, + { + "epoch": 0.45647028165649634, + "grad_norm": 0.16821375489234924, + "learning_rate": 4.543536957009563e-06, + "loss": 0.9303, + "step": 63060 + }, + { + "epoch": 0.4565426683170825, + "grad_norm": 0.15353234112262726, + "learning_rate": 4.5434645703489765e-06, + "loss": 0.9242, + "step": 63070 + }, + { + "epoch": 0.4566150549776687, + "grad_norm": 0.15900860726833344, + "learning_rate": 4.54339218368839e-06, + "loss": 0.9232, + "step": 63080 + }, + { + "epoch": 0.4566874416382549, + "grad_norm": 0.2517249882221222, + "learning_rate": 4.543319797027805e-06, + "loss": 0.9415, + "step": 63090 + }, + { + "epoch": 0.4567598282988411, + "grad_norm": 0.16736070811748505, + "learning_rate": 4.543247410367218e-06, + "loss": 0.9241, + "step": 63100 + }, + { + "epoch": 0.4568322149594273, + "grad_norm": 0.1665504276752472, + "learning_rate": 4.543175023706632e-06, + "loss": 0.9348, + "step": 63110 + }, + { + "epoch": 0.45690460162001345, + "grad_norm": 0.1781657338142395, + "learning_rate": 4.5431026370460455e-06, + "loss": 0.9379, + "step": 63120 + }, + { + "epoch": 0.4569769882805996, + "grad_norm": 0.15879395604133606, + "learning_rate": 4.54303025038546e-06, + "loss": 0.9426, + "step": 63130 + }, + { + "epoch": 0.45704937494118586, + "grad_norm": 0.1565670371055603, + "learning_rate": 4.5429578637248735e-06, + "loss": 0.9176, + "step": 63140 + }, + { + "epoch": 0.45712176160177204, + "grad_norm": 0.15761685371398926, + "learning_rate": 4.542885477064287e-06, + "loss": 0.9228, + "step": 63150 + }, + { + "epoch": 0.4571941482623582, + "grad_norm": 0.15627005696296692, + "learning_rate": 4.542813090403701e-06, + "loss": 0.9354, + "step": 63160 + }, + { + "epoch": 0.4572665349229444, + "grad_norm": 0.15798023343086243, + "learning_rate": 4.542740703743115e-06, + "loss": 0.9131, + "step": 63170 + }, + { + "epoch": 0.45733892158353057, + "grad_norm": 0.1624073088169098, + "learning_rate": 4.542668317082529e-06, + "loss": 0.927, + "step": 63180 + }, + { + "epoch": 0.4574113082441168, + "grad_norm": 0.15683312714099884, + "learning_rate": 4.5425959304219425e-06, + "loss": 0.9402, + "step": 63190 + }, + { + "epoch": 0.457483694904703, + "grad_norm": 0.18702444434165955, + "learning_rate": 4.542523543761356e-06, + "loss": 0.9368, + "step": 63200 + }, + { + "epoch": 0.45755608156528915, + "grad_norm": 0.15603706240653992, + "learning_rate": 4.54245115710077e-06, + "loss": 0.9298, + "step": 63210 + }, + { + "epoch": 0.45762846822587533, + "grad_norm": 0.15618003904819489, + "learning_rate": 4.542378770440183e-06, + "loss": 0.9393, + "step": 63220 + }, + { + "epoch": 0.4577008548864615, + "grad_norm": 0.1605190932750702, + "learning_rate": 4.542306383779597e-06, + "loss": 0.9349, + "step": 63230 + }, + { + "epoch": 0.4577732415470477, + "grad_norm": 0.16419483721256256, + "learning_rate": 4.542233997119011e-06, + "loss": 0.9356, + "step": 63240 + }, + { + "epoch": 0.4578456282076339, + "grad_norm": 0.15912272036075592, + "learning_rate": 4.542161610458425e-06, + "loss": 0.932, + "step": 63250 + }, + { + "epoch": 0.4579180148682201, + "grad_norm": 0.16210229694843292, + "learning_rate": 4.542089223797839e-06, + "loss": 0.9353, + "step": 63260 + }, + { + "epoch": 0.45799040152880627, + "grad_norm": 0.16142286360263824, + "learning_rate": 4.542016837137252e-06, + "loss": 0.9464, + "step": 63270 + }, + { + "epoch": 0.45806278818939244, + "grad_norm": 0.21197547018527985, + "learning_rate": 4.541944450476667e-06, + "loss": 0.9221, + "step": 63280 + }, + { + "epoch": 0.4581351748499786, + "grad_norm": 0.17035934329032898, + "learning_rate": 4.54187206381608e-06, + "loss": 0.9299, + "step": 63290 + }, + { + "epoch": 0.45820756151056485, + "grad_norm": 0.20072509348392487, + "learning_rate": 4.541799677155494e-06, + "loss": 0.9152, + "step": 63300 + }, + { + "epoch": 0.45827994817115103, + "grad_norm": 0.1753024309873581, + "learning_rate": 4.5417272904949075e-06, + "loss": 0.9106, + "step": 63310 + }, + { + "epoch": 0.4583523348317372, + "grad_norm": 0.16419456899166107, + "learning_rate": 4.541654903834322e-06, + "loss": 0.9305, + "step": 63320 + }, + { + "epoch": 0.4584247214923234, + "grad_norm": 0.15261687338352203, + "learning_rate": 4.541582517173736e-06, + "loss": 0.9305, + "step": 63330 + }, + { + "epoch": 0.45849710815290956, + "grad_norm": 0.16415435075759888, + "learning_rate": 4.541510130513149e-06, + "loss": 0.9342, + "step": 63340 + }, + { + "epoch": 0.4585694948134958, + "grad_norm": 0.15612776577472687, + "learning_rate": 4.541437743852563e-06, + "loss": 0.9125, + "step": 63350 + }, + { + "epoch": 0.45864188147408197, + "grad_norm": 0.1619674116373062, + "learning_rate": 4.541365357191977e-06, + "loss": 0.9358, + "step": 63360 + }, + { + "epoch": 0.45871426813466815, + "grad_norm": 0.1574641913175583, + "learning_rate": 4.541292970531391e-06, + "loss": 0.9322, + "step": 63370 + }, + { + "epoch": 0.4587866547952543, + "grad_norm": 0.15913856029510498, + "learning_rate": 4.5412205838708046e-06, + "loss": 0.9454, + "step": 63380 + }, + { + "epoch": 0.4588590414558405, + "grad_norm": 0.15667736530303955, + "learning_rate": 4.541148197210218e-06, + "loss": 0.9376, + "step": 63390 + }, + { + "epoch": 0.45893142811642673, + "grad_norm": 0.15261350572109222, + "learning_rate": 4.541075810549632e-06, + "loss": 0.9474, + "step": 63400 + }, + { + "epoch": 0.4590038147770129, + "grad_norm": 0.1693621575832367, + "learning_rate": 4.541003423889046e-06, + "loss": 0.9309, + "step": 63410 + }, + { + "epoch": 0.4590762014375991, + "grad_norm": 0.15610769391059875, + "learning_rate": 4.54093103722846e-06, + "loss": 0.9228, + "step": 63420 + }, + { + "epoch": 0.45914858809818526, + "grad_norm": 0.17140141129493713, + "learning_rate": 4.5408586505678735e-06, + "loss": 0.9351, + "step": 63430 + }, + { + "epoch": 0.45922097475877144, + "grad_norm": 0.16948135197162628, + "learning_rate": 4.540786263907287e-06, + "loss": 0.9241, + "step": 63440 + }, + { + "epoch": 0.4592933614193576, + "grad_norm": 0.15839575231075287, + "learning_rate": 4.5407138772467016e-06, + "loss": 0.9205, + "step": 63450 + }, + { + "epoch": 0.45936574807994385, + "grad_norm": 0.16708028316497803, + "learning_rate": 4.540641490586115e-06, + "loss": 0.9118, + "step": 63460 + }, + { + "epoch": 0.45943813474053, + "grad_norm": 0.16213096678256989, + "learning_rate": 4.540569103925529e-06, + "loss": 0.9338, + "step": 63470 + }, + { + "epoch": 0.4595105214011162, + "grad_norm": 0.16923314332962036, + "learning_rate": 4.540496717264942e-06, + "loss": 0.9167, + "step": 63480 + }, + { + "epoch": 0.4595829080617024, + "grad_norm": 0.15057291090488434, + "learning_rate": 4.540424330604357e-06, + "loss": 0.9275, + "step": 63490 + }, + { + "epoch": 0.45965529472228855, + "grad_norm": 0.15825194120407104, + "learning_rate": 4.5403519439437705e-06, + "loss": 0.9367, + "step": 63500 + }, + { + "epoch": 0.4597276813828748, + "grad_norm": 0.16979360580444336, + "learning_rate": 4.540279557283184e-06, + "loss": 0.9326, + "step": 63510 + }, + { + "epoch": 0.45980006804346096, + "grad_norm": 0.16070100665092468, + "learning_rate": 4.540207170622598e-06, + "loss": 0.9278, + "step": 63520 + }, + { + "epoch": 0.45987245470404714, + "grad_norm": 0.15810810029506683, + "learning_rate": 4.540134783962012e-06, + "loss": 0.9187, + "step": 63530 + }, + { + "epoch": 0.4599448413646333, + "grad_norm": 0.15783362090587616, + "learning_rate": 4.540062397301426e-06, + "loss": 0.9289, + "step": 63540 + }, + { + "epoch": 0.4600172280252195, + "grad_norm": 0.15892358124256134, + "learning_rate": 4.539990010640839e-06, + "loss": 0.9234, + "step": 63550 + }, + { + "epoch": 0.4600896146858057, + "grad_norm": 0.15859895944595337, + "learning_rate": 4.539917623980253e-06, + "loss": 0.9265, + "step": 63560 + }, + { + "epoch": 0.4601620013463919, + "grad_norm": 0.16993699967861176, + "learning_rate": 4.5398452373196675e-06, + "loss": 0.9481, + "step": 63570 + }, + { + "epoch": 0.4602343880069781, + "grad_norm": 0.1587960124015808, + "learning_rate": 4.539772850659081e-06, + "loss": 0.9308, + "step": 63580 + }, + { + "epoch": 0.46030677466756426, + "grad_norm": 0.17357853055000305, + "learning_rate": 4.539700463998495e-06, + "loss": 0.9403, + "step": 63590 + }, + { + "epoch": 0.46037916132815043, + "grad_norm": 0.172526553273201, + "learning_rate": 4.539628077337908e-06, + "loss": 0.9237, + "step": 63600 + }, + { + "epoch": 0.4604515479887366, + "grad_norm": 0.1549219787120819, + "learning_rate": 4.539555690677323e-06, + "loss": 0.9297, + "step": 63610 + }, + { + "epoch": 0.46052393464932284, + "grad_norm": 0.1645515263080597, + "learning_rate": 4.5394833040167364e-06, + "loss": 0.9213, + "step": 63620 + }, + { + "epoch": 0.460596321309909, + "grad_norm": 0.1633620709180832, + "learning_rate": 4.53941091735615e-06, + "loss": 0.9293, + "step": 63630 + }, + { + "epoch": 0.4606687079704952, + "grad_norm": 0.17372886836528778, + "learning_rate": 4.539338530695564e-06, + "loss": 0.9202, + "step": 63640 + }, + { + "epoch": 0.46074109463108137, + "grad_norm": 0.37556585669517517, + "learning_rate": 4.539266144034978e-06, + "loss": 0.9344, + "step": 63650 + }, + { + "epoch": 0.46081348129166755, + "grad_norm": 0.161403089761734, + "learning_rate": 4.539193757374392e-06, + "loss": 0.9468, + "step": 63660 + }, + { + "epoch": 0.4608858679522538, + "grad_norm": 0.1543300747871399, + "learning_rate": 4.539121370713805e-06, + "loss": 0.9162, + "step": 63670 + }, + { + "epoch": 0.46095825461283996, + "grad_norm": 0.2087777554988861, + "learning_rate": 4.539048984053219e-06, + "loss": 0.9278, + "step": 63680 + }, + { + "epoch": 0.46103064127342613, + "grad_norm": 0.15856918692588806, + "learning_rate": 4.5389765973926334e-06, + "loss": 0.9155, + "step": 63690 + }, + { + "epoch": 0.4611030279340123, + "grad_norm": 0.156173437833786, + "learning_rate": 4.538904210732047e-06, + "loss": 0.9355, + "step": 63700 + }, + { + "epoch": 0.4611754145945985, + "grad_norm": 0.1753517985343933, + "learning_rate": 4.538831824071461e-06, + "loss": 0.9267, + "step": 63710 + }, + { + "epoch": 0.4612478012551847, + "grad_norm": 0.18503955006599426, + "learning_rate": 4.538759437410874e-06, + "loss": 0.9342, + "step": 63720 + }, + { + "epoch": 0.4613201879157709, + "grad_norm": 0.1558404415845871, + "learning_rate": 4.538687050750289e-06, + "loss": 0.9214, + "step": 63730 + }, + { + "epoch": 0.4613925745763571, + "grad_norm": 0.154067724943161, + "learning_rate": 4.538614664089702e-06, + "loss": 0.9238, + "step": 63740 + }, + { + "epoch": 0.46146496123694325, + "grad_norm": 0.1731044501066208, + "learning_rate": 4.538542277429115e-06, + "loss": 0.9452, + "step": 63750 + }, + { + "epoch": 0.4615373478975294, + "grad_norm": 0.17740470170974731, + "learning_rate": 4.53846989076853e-06, + "loss": 0.931, + "step": 63760 + }, + { + "epoch": 0.4616097345581156, + "grad_norm": 0.15755429863929749, + "learning_rate": 4.538397504107943e-06, + "loss": 0.918, + "step": 63770 + }, + { + "epoch": 0.46168212121870184, + "grad_norm": 0.15609146654605865, + "learning_rate": 4.538325117447357e-06, + "loss": 0.9306, + "step": 63780 + }, + { + "epoch": 0.461754507879288, + "grad_norm": 0.17046941816806793, + "learning_rate": 4.5382527307867704e-06, + "loss": 0.927, + "step": 63790 + }, + { + "epoch": 0.4618268945398742, + "grad_norm": 0.1781318038702011, + "learning_rate": 4.538180344126185e-06, + "loss": 0.9184, + "step": 63800 + }, + { + "epoch": 0.46189928120046037, + "grad_norm": 0.16463392972946167, + "learning_rate": 4.5381079574655985e-06, + "loss": 0.929, + "step": 63810 + }, + { + "epoch": 0.46197166786104654, + "grad_norm": 0.1558876931667328, + "learning_rate": 4.538035570805012e-06, + "loss": 0.9194, + "step": 63820 + }, + { + "epoch": 0.4620440545216328, + "grad_norm": 0.17599675059318542, + "learning_rate": 4.537963184144426e-06, + "loss": 0.9254, + "step": 63830 + }, + { + "epoch": 0.46211644118221895, + "grad_norm": 0.1566462218761444, + "learning_rate": 4.53789079748384e-06, + "loss": 0.9412, + "step": 63840 + }, + { + "epoch": 0.46218882784280513, + "grad_norm": 0.15512123703956604, + "learning_rate": 4.537818410823254e-06, + "loss": 0.9367, + "step": 63850 + }, + { + "epoch": 0.4622612145033913, + "grad_norm": 0.16452373564243317, + "learning_rate": 4.5377460241626675e-06, + "loss": 0.9274, + "step": 63860 + }, + { + "epoch": 0.4623336011639775, + "grad_norm": 0.16838426887989044, + "learning_rate": 4.537673637502081e-06, + "loss": 0.9143, + "step": 63870 + }, + { + "epoch": 0.4624059878245637, + "grad_norm": 0.16255596280097961, + "learning_rate": 4.5376012508414955e-06, + "loss": 0.9174, + "step": 63880 + }, + { + "epoch": 0.4624783744851499, + "grad_norm": 0.18602770566940308, + "learning_rate": 4.537528864180909e-06, + "loss": 0.9149, + "step": 63890 + }, + { + "epoch": 0.46255076114573607, + "grad_norm": 0.19781062006950378, + "learning_rate": 4.537456477520323e-06, + "loss": 0.936, + "step": 63900 + }, + { + "epoch": 0.46262314780632224, + "grad_norm": 0.1478962004184723, + "learning_rate": 4.537384090859736e-06, + "loss": 0.9191, + "step": 63910 + }, + { + "epoch": 0.4626955344669084, + "grad_norm": 0.15507012605667114, + "learning_rate": 4.537311704199151e-06, + "loss": 0.9242, + "step": 63920 + }, + { + "epoch": 0.46276792112749465, + "grad_norm": 0.17668579518795013, + "learning_rate": 4.5372393175385645e-06, + "loss": 0.9339, + "step": 63930 + }, + { + "epoch": 0.46284030778808083, + "grad_norm": 0.14959698915481567, + "learning_rate": 4.537166930877978e-06, + "loss": 0.9114, + "step": 63940 + }, + { + "epoch": 0.462912694448667, + "grad_norm": 0.17925885319709778, + "learning_rate": 4.537094544217392e-06, + "loss": 0.9206, + "step": 63950 + }, + { + "epoch": 0.4629850811092532, + "grad_norm": 0.16243740916252136, + "learning_rate": 4.537022157556806e-06, + "loss": 0.9196, + "step": 63960 + }, + { + "epoch": 0.46305746776983936, + "grad_norm": 0.3166872560977936, + "learning_rate": 4.53694977089622e-06, + "loss": 0.9352, + "step": 63970 + }, + { + "epoch": 0.46312985443042554, + "grad_norm": 0.16058658063411713, + "learning_rate": 4.536877384235633e-06, + "loss": 0.9097, + "step": 63980 + }, + { + "epoch": 0.46320224109101177, + "grad_norm": 0.15642769634723663, + "learning_rate": 4.536804997575047e-06, + "loss": 0.9307, + "step": 63990 + }, + { + "epoch": 0.46327462775159794, + "grad_norm": 0.41400203108787537, + "learning_rate": 4.5367326109144615e-06, + "loss": 0.9321, + "step": 64000 + }, + { + "epoch": 0.4633470144121841, + "grad_norm": 0.17071636021137238, + "learning_rate": 4.536660224253875e-06, + "loss": 0.9323, + "step": 64010 + }, + { + "epoch": 0.4634194010727703, + "grad_norm": 0.16446176171302795, + "learning_rate": 4.536587837593289e-06, + "loss": 0.9172, + "step": 64020 + }, + { + "epoch": 0.4634917877333565, + "grad_norm": 0.17229726910591125, + "learning_rate": 4.536515450932702e-06, + "loss": 0.9246, + "step": 64030 + }, + { + "epoch": 0.4635641743939427, + "grad_norm": 0.14730413258075714, + "learning_rate": 4.536443064272116e-06, + "loss": 0.9353, + "step": 64040 + }, + { + "epoch": 0.4636365610545289, + "grad_norm": 0.1702919751405716, + "learning_rate": 4.53637067761153e-06, + "loss": 0.9239, + "step": 64050 + }, + { + "epoch": 0.46370894771511506, + "grad_norm": 0.15090477466583252, + "learning_rate": 4.536298290950944e-06, + "loss": 0.9189, + "step": 64060 + }, + { + "epoch": 0.46378133437570124, + "grad_norm": 0.15662406384944916, + "learning_rate": 4.536225904290358e-06, + "loss": 0.9161, + "step": 64070 + }, + { + "epoch": 0.4638537210362874, + "grad_norm": 0.15603668987751007, + "learning_rate": 4.536153517629771e-06, + "loss": 0.9194, + "step": 64080 + }, + { + "epoch": 0.46392610769687365, + "grad_norm": 0.16202674806118011, + "learning_rate": 4.536081130969186e-06, + "loss": 0.9402, + "step": 64090 + }, + { + "epoch": 0.4639984943574598, + "grad_norm": 0.2072402536869049, + "learning_rate": 4.536008744308599e-06, + "loss": 0.9153, + "step": 64100 + }, + { + "epoch": 0.464070881018046, + "grad_norm": 0.16444271802902222, + "learning_rate": 4.535936357648013e-06, + "loss": 0.9216, + "step": 64110 + }, + { + "epoch": 0.4641432676786322, + "grad_norm": 0.19051018357276917, + "learning_rate": 4.5358639709874266e-06, + "loss": 0.9233, + "step": 64120 + }, + { + "epoch": 0.46421565433921835, + "grad_norm": 0.1983797252178192, + "learning_rate": 4.535791584326841e-06, + "loss": 0.9345, + "step": 64130 + }, + { + "epoch": 0.46428804099980453, + "grad_norm": 0.17344747483730316, + "learning_rate": 4.535719197666255e-06, + "loss": 0.9311, + "step": 64140 + }, + { + "epoch": 0.46436042766039076, + "grad_norm": 0.16626109182834625, + "learning_rate": 4.535646811005668e-06, + "loss": 0.9188, + "step": 64150 + }, + { + "epoch": 0.46443281432097694, + "grad_norm": 0.162200927734375, + "learning_rate": 4.535574424345082e-06, + "loss": 0.9248, + "step": 64160 + }, + { + "epoch": 0.4645052009815631, + "grad_norm": 0.43572789430618286, + "learning_rate": 4.535502037684496e-06, + "loss": 0.9225, + "step": 64170 + }, + { + "epoch": 0.4645775876421493, + "grad_norm": 0.18060703575611115, + "learning_rate": 4.53542965102391e-06, + "loss": 0.9357, + "step": 64180 + }, + { + "epoch": 0.46464997430273547, + "grad_norm": 0.16320358216762543, + "learning_rate": 4.5353572643633236e-06, + "loss": 0.9227, + "step": 64190 + }, + { + "epoch": 0.4647223609633217, + "grad_norm": 0.1623474657535553, + "learning_rate": 4.535284877702737e-06, + "loss": 0.9375, + "step": 64200 + }, + { + "epoch": 0.4647947476239079, + "grad_norm": 0.1604037880897522, + "learning_rate": 4.535212491042152e-06, + "loss": 0.9255, + "step": 64210 + }, + { + "epoch": 0.46486713428449405, + "grad_norm": 0.2360948622226715, + "learning_rate": 4.535140104381565e-06, + "loss": 0.9329, + "step": 64220 + }, + { + "epoch": 0.46493952094508023, + "grad_norm": 0.17647242546081543, + "learning_rate": 4.535067717720979e-06, + "loss": 0.9377, + "step": 64230 + }, + { + "epoch": 0.4650119076056664, + "grad_norm": 0.1619800180196762, + "learning_rate": 4.5349953310603925e-06, + "loss": 0.9352, + "step": 64240 + }, + { + "epoch": 0.46508429426625264, + "grad_norm": 0.2056133896112442, + "learning_rate": 4.534922944399807e-06, + "loss": 0.9284, + "step": 64250 + }, + { + "epoch": 0.4651566809268388, + "grad_norm": 0.16904328763484955, + "learning_rate": 4.5348505577392206e-06, + "loss": 0.9397, + "step": 64260 + }, + { + "epoch": 0.465229067587425, + "grad_norm": 0.17163747549057007, + "learning_rate": 4.534778171078634e-06, + "loss": 0.9299, + "step": 64270 + }, + { + "epoch": 0.46530145424801117, + "grad_norm": 0.15540345013141632, + "learning_rate": 4.534705784418048e-06, + "loss": 0.9273, + "step": 64280 + }, + { + "epoch": 0.46537384090859735, + "grad_norm": 0.16602273285388947, + "learning_rate": 4.534633397757461e-06, + "loss": 0.9229, + "step": 64290 + }, + { + "epoch": 0.4654462275691835, + "grad_norm": 0.15529848635196686, + "learning_rate": 4.534561011096875e-06, + "loss": 0.9227, + "step": 64300 + }, + { + "epoch": 0.46551861422976976, + "grad_norm": 0.15982522070407867, + "learning_rate": 4.534488624436289e-06, + "loss": 0.9184, + "step": 64310 + }, + { + "epoch": 0.46559100089035593, + "grad_norm": 0.16728608310222626, + "learning_rate": 4.534416237775703e-06, + "loss": 0.9283, + "step": 64320 + }, + { + "epoch": 0.4656633875509421, + "grad_norm": 0.1668313890695572, + "learning_rate": 4.534343851115117e-06, + "loss": 0.9257, + "step": 64330 + }, + { + "epoch": 0.4657357742115283, + "grad_norm": 0.16521133482456207, + "learning_rate": 4.53427146445453e-06, + "loss": 0.9199, + "step": 64340 + }, + { + "epoch": 0.46580816087211446, + "grad_norm": 0.1640012413263321, + "learning_rate": 4.534199077793944e-06, + "loss": 0.9251, + "step": 64350 + }, + { + "epoch": 0.4658805475327007, + "grad_norm": 0.15635322034358978, + "learning_rate": 4.5341266911333584e-06, + "loss": 0.9295, + "step": 64360 + }, + { + "epoch": 0.46595293419328687, + "grad_norm": 0.1682094931602478, + "learning_rate": 4.534054304472772e-06, + "loss": 0.9379, + "step": 64370 + }, + { + "epoch": 0.46602532085387305, + "grad_norm": 0.1606939285993576, + "learning_rate": 4.533981917812186e-06, + "loss": 0.925, + "step": 64380 + }, + { + "epoch": 0.4660977075144592, + "grad_norm": 0.1753852218389511, + "learning_rate": 4.533909531151599e-06, + "loss": 0.9238, + "step": 64390 + }, + { + "epoch": 0.4661700941750454, + "grad_norm": 0.20732562243938446, + "learning_rate": 4.533837144491014e-06, + "loss": 0.9268, + "step": 64400 + }, + { + "epoch": 0.46624248083563163, + "grad_norm": 0.16475224494934082, + "learning_rate": 4.533764757830427e-06, + "loss": 0.9164, + "step": 64410 + }, + { + "epoch": 0.4663148674962178, + "grad_norm": 0.17399507761001587, + "learning_rate": 4.533692371169841e-06, + "loss": 0.9204, + "step": 64420 + }, + { + "epoch": 0.466387254156804, + "grad_norm": 0.2416888028383255, + "learning_rate": 4.533619984509255e-06, + "loss": 0.9399, + "step": 64430 + }, + { + "epoch": 0.46645964081739016, + "grad_norm": 0.1748601794242859, + "learning_rate": 4.533547597848669e-06, + "loss": 0.9227, + "step": 64440 + }, + { + "epoch": 0.46653202747797634, + "grad_norm": 0.15142974257469177, + "learning_rate": 4.533475211188083e-06, + "loss": 0.9286, + "step": 64450 + }, + { + "epoch": 0.4666044141385626, + "grad_norm": 0.14932338893413544, + "learning_rate": 4.533402824527496e-06, + "loss": 0.9318, + "step": 64460 + }, + { + "epoch": 0.46667680079914875, + "grad_norm": 0.16411228477954865, + "learning_rate": 4.53333043786691e-06, + "loss": 0.9183, + "step": 64470 + }, + { + "epoch": 0.4667491874597349, + "grad_norm": 0.16134792566299438, + "learning_rate": 4.533258051206324e-06, + "loss": 0.9311, + "step": 64480 + }, + { + "epoch": 0.4668215741203211, + "grad_norm": 0.15893922746181488, + "learning_rate": 4.533185664545738e-06, + "loss": 0.9197, + "step": 64490 + }, + { + "epoch": 0.4668939607809073, + "grad_norm": 0.17452390491962433, + "learning_rate": 4.533113277885152e-06, + "loss": 0.9372, + "step": 64500 + }, + { + "epoch": 0.46696634744149346, + "grad_norm": 0.16843093931674957, + "learning_rate": 4.533040891224565e-06, + "loss": 0.9192, + "step": 64510 + }, + { + "epoch": 0.4670387341020797, + "grad_norm": 0.15596753358840942, + "learning_rate": 4.53296850456398e-06, + "loss": 0.9322, + "step": 64520 + }, + { + "epoch": 0.46711112076266587, + "grad_norm": 0.1649407148361206, + "learning_rate": 4.532896117903393e-06, + "loss": 0.9248, + "step": 64530 + }, + { + "epoch": 0.46718350742325204, + "grad_norm": 0.19480155408382416, + "learning_rate": 4.532823731242807e-06, + "loss": 0.9253, + "step": 64540 + }, + { + "epoch": 0.4672558940838382, + "grad_norm": 0.1667308658361435, + "learning_rate": 4.5327513445822205e-06, + "loss": 0.9249, + "step": 64550 + }, + { + "epoch": 0.4673282807444244, + "grad_norm": 0.16336029767990112, + "learning_rate": 4.532678957921635e-06, + "loss": 0.924, + "step": 64560 + }, + { + "epoch": 0.46740066740501063, + "grad_norm": 0.1569215953350067, + "learning_rate": 4.532606571261049e-06, + "loss": 0.9127, + "step": 64570 + }, + { + "epoch": 0.4674730540655968, + "grad_norm": 0.16221965849399567, + "learning_rate": 4.532534184600462e-06, + "loss": 0.9228, + "step": 64580 + }, + { + "epoch": 0.467545440726183, + "grad_norm": 0.16710902750492096, + "learning_rate": 4.532461797939876e-06, + "loss": 0.918, + "step": 64590 + }, + { + "epoch": 0.46761782738676916, + "grad_norm": 0.17595240473747253, + "learning_rate": 4.53238941127929e-06, + "loss": 0.9273, + "step": 64600 + }, + { + "epoch": 0.46769021404735533, + "grad_norm": 0.16132394969463348, + "learning_rate": 4.532317024618704e-06, + "loss": 0.927, + "step": 64610 + }, + { + "epoch": 0.46776260070794157, + "grad_norm": 0.14544646441936493, + "learning_rate": 4.5322446379581175e-06, + "loss": 0.925, + "step": 64620 + }, + { + "epoch": 0.46783498736852774, + "grad_norm": 0.1635567545890808, + "learning_rate": 4.532172251297531e-06, + "loss": 0.9207, + "step": 64630 + }, + { + "epoch": 0.4679073740291139, + "grad_norm": 0.15619686245918274, + "learning_rate": 4.532099864636945e-06, + "loss": 0.914, + "step": 64640 + }, + { + "epoch": 0.4679797606897001, + "grad_norm": 0.17399942874908447, + "learning_rate": 4.532027477976359e-06, + "loss": 0.9301, + "step": 64650 + }, + { + "epoch": 0.4680521473502863, + "grad_norm": 0.15665535628795624, + "learning_rate": 4.531955091315773e-06, + "loss": 0.9231, + "step": 64660 + }, + { + "epoch": 0.46812453401087245, + "grad_norm": 0.16816446185112, + "learning_rate": 4.5318827046551865e-06, + "loss": 0.9334, + "step": 64670 + }, + { + "epoch": 0.4681969206714587, + "grad_norm": 0.16004996001720428, + "learning_rate": 4.5318103179946e-06, + "loss": 0.9201, + "step": 64680 + }, + { + "epoch": 0.46826930733204486, + "grad_norm": 0.15259157121181488, + "learning_rate": 4.5317379313340145e-06, + "loss": 0.9352, + "step": 64690 + }, + { + "epoch": 0.46834169399263104, + "grad_norm": 0.1640315055847168, + "learning_rate": 4.531665544673428e-06, + "loss": 0.9199, + "step": 64700 + }, + { + "epoch": 0.4684140806532172, + "grad_norm": 0.15679945051670074, + "learning_rate": 4.531593158012842e-06, + "loss": 0.9235, + "step": 64710 + }, + { + "epoch": 0.4684864673138034, + "grad_norm": 0.16989102959632874, + "learning_rate": 4.531520771352255e-06, + "loss": 0.9157, + "step": 64720 + }, + { + "epoch": 0.4685588539743896, + "grad_norm": 0.16276511549949646, + "learning_rate": 4.53144838469167e-06, + "loss": 0.9257, + "step": 64730 + }, + { + "epoch": 0.4686312406349758, + "grad_norm": 0.1543579399585724, + "learning_rate": 4.5313759980310835e-06, + "loss": 0.91, + "step": 64740 + }, + { + "epoch": 0.468703627295562, + "grad_norm": 0.1687450408935547, + "learning_rate": 4.531303611370497e-06, + "loss": 0.9423, + "step": 64750 + }, + { + "epoch": 0.46877601395614815, + "grad_norm": 0.15913762152194977, + "learning_rate": 4.531231224709911e-06, + "loss": 0.9296, + "step": 64760 + }, + { + "epoch": 0.46884840061673433, + "grad_norm": 0.18352152407169342, + "learning_rate": 4.531158838049325e-06, + "loss": 0.9307, + "step": 64770 + }, + { + "epoch": 0.46892078727732056, + "grad_norm": 0.16911835968494415, + "learning_rate": 4.531086451388739e-06, + "loss": 0.9296, + "step": 64780 + }, + { + "epoch": 0.46899317393790674, + "grad_norm": 0.17307941615581512, + "learning_rate": 4.531014064728152e-06, + "loss": 0.9198, + "step": 64790 + }, + { + "epoch": 0.4690655605984929, + "grad_norm": 0.16192714869976044, + "learning_rate": 4.530941678067566e-06, + "loss": 0.9313, + "step": 64800 + }, + { + "epoch": 0.4691379472590791, + "grad_norm": 0.16948965191841125, + "learning_rate": 4.53086929140698e-06, + "loss": 0.9317, + "step": 64810 + }, + { + "epoch": 0.46921033391966527, + "grad_norm": 0.18143318593502045, + "learning_rate": 4.530796904746393e-06, + "loss": 0.9153, + "step": 64820 + }, + { + "epoch": 0.46928272058025144, + "grad_norm": 0.15184900164604187, + "learning_rate": 4.530724518085807e-06, + "loss": 0.9259, + "step": 64830 + }, + { + "epoch": 0.4693551072408377, + "grad_norm": 0.23423556983470917, + "learning_rate": 4.530652131425221e-06, + "loss": 0.9186, + "step": 64840 + }, + { + "epoch": 0.46942749390142385, + "grad_norm": 0.15671993792057037, + "learning_rate": 4.530579744764635e-06, + "loss": 0.9295, + "step": 64850 + }, + { + "epoch": 0.46949988056201003, + "grad_norm": 0.1595701277256012, + "learning_rate": 4.5305073581040486e-06, + "loss": 0.917, + "step": 64860 + }, + { + "epoch": 0.4695722672225962, + "grad_norm": 0.1630265712738037, + "learning_rate": 4.530434971443462e-06, + "loss": 0.9234, + "step": 64870 + }, + { + "epoch": 0.4696446538831824, + "grad_norm": 0.16177038848400116, + "learning_rate": 4.530362584782877e-06, + "loss": 0.9365, + "step": 64880 + }, + { + "epoch": 0.4697170405437686, + "grad_norm": 0.16307857632637024, + "learning_rate": 4.53029019812229e-06, + "loss": 0.9355, + "step": 64890 + }, + { + "epoch": 0.4697894272043548, + "grad_norm": 0.15251775085926056, + "learning_rate": 4.530217811461704e-06, + "loss": 0.9335, + "step": 64900 + }, + { + "epoch": 0.46986181386494097, + "grad_norm": 0.172450453042984, + "learning_rate": 4.5301454248011175e-06, + "loss": 0.9269, + "step": 64910 + }, + { + "epoch": 0.46993420052552715, + "grad_norm": 0.1565367430448532, + "learning_rate": 4.530073038140532e-06, + "loss": 0.9327, + "step": 64920 + }, + { + "epoch": 0.4700065871861133, + "grad_norm": 0.3131227493286133, + "learning_rate": 4.5300006514799456e-06, + "loss": 0.9213, + "step": 64930 + }, + { + "epoch": 0.47007897384669955, + "grad_norm": 0.1655091792345047, + "learning_rate": 4.529928264819359e-06, + "loss": 0.9409, + "step": 64940 + }, + { + "epoch": 0.47015136050728573, + "grad_norm": 0.1536223590373993, + "learning_rate": 4.529855878158773e-06, + "loss": 0.92, + "step": 64950 + }, + { + "epoch": 0.4702237471678719, + "grad_norm": 0.1615433245897293, + "learning_rate": 4.529783491498187e-06, + "loss": 0.9194, + "step": 64960 + }, + { + "epoch": 0.4702961338284581, + "grad_norm": 0.15238776803016663, + "learning_rate": 4.529711104837601e-06, + "loss": 0.9202, + "step": 64970 + }, + { + "epoch": 0.47036852048904426, + "grad_norm": 0.15168006718158722, + "learning_rate": 4.5296387181770145e-06, + "loss": 0.9292, + "step": 64980 + }, + { + "epoch": 0.47044090714963044, + "grad_norm": 0.15964418649673462, + "learning_rate": 4.529566331516428e-06, + "loss": 0.9273, + "step": 64990 + }, + { + "epoch": 0.47051329381021667, + "grad_norm": 0.1611081212759018, + "learning_rate": 4.5294939448558426e-06, + "loss": 0.9226, + "step": 65000 + }, + { + "epoch": 0.47058568047080285, + "grad_norm": 0.168733611702919, + "learning_rate": 4.529421558195256e-06, + "loss": 0.9212, + "step": 65010 + }, + { + "epoch": 0.470658067131389, + "grad_norm": 0.17109130322933197, + "learning_rate": 4.52934917153467e-06, + "loss": 0.9261, + "step": 65020 + }, + { + "epoch": 0.4707304537919752, + "grad_norm": 0.16328299045562744, + "learning_rate": 4.529276784874083e-06, + "loss": 0.9293, + "step": 65030 + }, + { + "epoch": 0.4708028404525614, + "grad_norm": 0.16657480597496033, + "learning_rate": 4.529204398213498e-06, + "loss": 0.9165, + "step": 65040 + }, + { + "epoch": 0.4708752271131476, + "grad_norm": 0.1572638899087906, + "learning_rate": 4.5291320115529115e-06, + "loss": 0.9346, + "step": 65050 + }, + { + "epoch": 0.4709476137737338, + "grad_norm": 0.16349254548549652, + "learning_rate": 4.529059624892325e-06, + "loss": 0.9294, + "step": 65060 + }, + { + "epoch": 0.47102000043431996, + "grad_norm": 0.16738827526569366, + "learning_rate": 4.528987238231739e-06, + "loss": 0.9208, + "step": 65070 + }, + { + "epoch": 0.47109238709490614, + "grad_norm": 0.1925373077392578, + "learning_rate": 4.528914851571153e-06, + "loss": 0.9135, + "step": 65080 + }, + { + "epoch": 0.4711647737554923, + "grad_norm": 0.1586436629295349, + "learning_rate": 4.528842464910567e-06, + "loss": 0.929, + "step": 65090 + }, + { + "epoch": 0.47123716041607855, + "grad_norm": 0.15072402358055115, + "learning_rate": 4.5287700782499804e-06, + "loss": 0.9166, + "step": 65100 + }, + { + "epoch": 0.4713095470766647, + "grad_norm": 0.1562575101852417, + "learning_rate": 4.528697691589394e-06, + "loss": 0.9136, + "step": 65110 + }, + { + "epoch": 0.4713819337372509, + "grad_norm": 1.8733214139938354, + "learning_rate": 4.5286253049288085e-06, + "loss": 0.9095, + "step": 65120 + }, + { + "epoch": 0.4714543203978371, + "grad_norm": 0.1686108261346817, + "learning_rate": 4.528552918268222e-06, + "loss": 0.9196, + "step": 65130 + }, + { + "epoch": 0.47152670705842326, + "grad_norm": 0.17261675000190735, + "learning_rate": 4.528480531607636e-06, + "loss": 0.9238, + "step": 65140 + }, + { + "epoch": 0.4715990937190095, + "grad_norm": 0.19091512262821198, + "learning_rate": 4.528408144947049e-06, + "loss": 0.9248, + "step": 65150 + }, + { + "epoch": 0.47167148037959566, + "grad_norm": 0.1787658929824829, + "learning_rate": 4.528335758286464e-06, + "loss": 0.9335, + "step": 65160 + }, + { + "epoch": 0.47174386704018184, + "grad_norm": 0.1631409078836441, + "learning_rate": 4.5282633716258774e-06, + "loss": 0.9183, + "step": 65170 + }, + { + "epoch": 0.471816253700768, + "grad_norm": 0.16262884438037872, + "learning_rate": 4.528190984965291e-06, + "loss": 0.9278, + "step": 65180 + }, + { + "epoch": 0.4718886403613542, + "grad_norm": 0.16872116923332214, + "learning_rate": 4.528118598304705e-06, + "loss": 0.9303, + "step": 65190 + }, + { + "epoch": 0.47196102702194037, + "grad_norm": 0.17301858961582184, + "learning_rate": 4.528046211644119e-06, + "loss": 0.941, + "step": 65200 + }, + { + "epoch": 0.4720334136825266, + "grad_norm": 0.17453503608703613, + "learning_rate": 4.527973824983533e-06, + "loss": 0.9196, + "step": 65210 + }, + { + "epoch": 0.4721058003431128, + "grad_norm": 0.1696014106273651, + "learning_rate": 4.527901438322946e-06, + "loss": 0.9275, + "step": 65220 + }, + { + "epoch": 0.47217818700369896, + "grad_norm": 0.18995621800422668, + "learning_rate": 4.52782905166236e-06, + "loss": 0.9225, + "step": 65230 + }, + { + "epoch": 0.47225057366428513, + "grad_norm": 0.1921137422323227, + "learning_rate": 4.5277566650017744e-06, + "loss": 0.9353, + "step": 65240 + }, + { + "epoch": 0.4723229603248713, + "grad_norm": 0.15778936445713043, + "learning_rate": 4.527684278341188e-06, + "loss": 0.9268, + "step": 65250 + }, + { + "epoch": 0.47239534698545754, + "grad_norm": 0.17273864150047302, + "learning_rate": 4.527611891680602e-06, + "loss": 0.9206, + "step": 65260 + }, + { + "epoch": 0.4724677336460437, + "grad_norm": 0.16963228583335876, + "learning_rate": 4.527539505020015e-06, + "loss": 0.9257, + "step": 65270 + }, + { + "epoch": 0.4725401203066299, + "grad_norm": 0.15939907729625702, + "learning_rate": 4.527467118359429e-06, + "loss": 0.9197, + "step": 65280 + }, + { + "epoch": 0.4726125069672161, + "grad_norm": 0.16947107017040253, + "learning_rate": 4.527394731698843e-06, + "loss": 0.9286, + "step": 65290 + }, + { + "epoch": 0.47268489362780225, + "grad_norm": 0.2224172055721283, + "learning_rate": 4.527322345038257e-06, + "loss": 0.9384, + "step": 65300 + }, + { + "epoch": 0.4727572802883885, + "grad_norm": 0.16941924393177032, + "learning_rate": 4.527249958377671e-06, + "loss": 0.9309, + "step": 65310 + }, + { + "epoch": 0.47282966694897466, + "grad_norm": 0.17061692476272583, + "learning_rate": 4.527177571717084e-06, + "loss": 0.9248, + "step": 65320 + }, + { + "epoch": 0.47290205360956084, + "grad_norm": 0.18717791140079498, + "learning_rate": 4.527105185056499e-06, + "loss": 0.9196, + "step": 65330 + }, + { + "epoch": 0.472974440270147, + "grad_norm": 0.163266122341156, + "learning_rate": 4.5270327983959114e-06, + "loss": 0.9331, + "step": 65340 + }, + { + "epoch": 0.4730468269307332, + "grad_norm": 0.16074928641319275, + "learning_rate": 4.526960411735326e-06, + "loss": 0.9223, + "step": 65350 + }, + { + "epoch": 0.47311921359131937, + "grad_norm": 0.15289919078350067, + "learning_rate": 4.5268880250747395e-06, + "loss": 0.9275, + "step": 65360 + }, + { + "epoch": 0.4731916002519056, + "grad_norm": 0.15109410881996155, + "learning_rate": 4.526815638414153e-06, + "loss": 0.9392, + "step": 65370 + }, + { + "epoch": 0.4732639869124918, + "grad_norm": 0.1612526923418045, + "learning_rate": 4.526743251753567e-06, + "loss": 0.9317, + "step": 65380 + }, + { + "epoch": 0.47333637357307795, + "grad_norm": 0.1555621474981308, + "learning_rate": 4.526670865092981e-06, + "loss": 0.9258, + "step": 65390 + }, + { + "epoch": 0.4734087602336641, + "grad_norm": 0.1611545830965042, + "learning_rate": 4.526598478432395e-06, + "loss": 0.9283, + "step": 65400 + }, + { + "epoch": 0.4734811468942503, + "grad_norm": 0.15748530626296997, + "learning_rate": 4.5265260917718085e-06, + "loss": 0.928, + "step": 65410 + }, + { + "epoch": 0.47355353355483654, + "grad_norm": 0.15606287121772766, + "learning_rate": 4.526453705111222e-06, + "loss": 0.9109, + "step": 65420 + }, + { + "epoch": 0.4736259202154227, + "grad_norm": 0.15752071142196655, + "learning_rate": 4.526381318450636e-06, + "loss": 0.9277, + "step": 65430 + }, + { + "epoch": 0.4736983068760089, + "grad_norm": 0.15977409482002258, + "learning_rate": 4.52630893179005e-06, + "loss": 0.9256, + "step": 65440 + }, + { + "epoch": 0.47377069353659507, + "grad_norm": 0.18297438323497772, + "learning_rate": 4.526236545129464e-06, + "loss": 0.9271, + "step": 65450 + }, + { + "epoch": 0.47384308019718124, + "grad_norm": 0.16020241379737854, + "learning_rate": 4.526164158468877e-06, + "loss": 0.9215, + "step": 65460 + }, + { + "epoch": 0.4739154668577675, + "grad_norm": 0.16484937071800232, + "learning_rate": 4.526091771808291e-06, + "loss": 0.9252, + "step": 65470 + }, + { + "epoch": 0.47398785351835365, + "grad_norm": 0.15512751042842865, + "learning_rate": 4.5260193851477055e-06, + "loss": 0.9287, + "step": 65480 + }, + { + "epoch": 0.47406024017893983, + "grad_norm": 0.17977750301361084, + "learning_rate": 4.525946998487119e-06, + "loss": 0.9197, + "step": 65490 + }, + { + "epoch": 0.474132626839526, + "grad_norm": 0.17437465488910675, + "learning_rate": 4.525874611826533e-06, + "loss": 0.9398, + "step": 65500 + }, + { + "epoch": 0.4742050135001122, + "grad_norm": 0.1768895983695984, + "learning_rate": 4.525802225165946e-06, + "loss": 0.9288, + "step": 65510 + }, + { + "epoch": 0.47427740016069836, + "grad_norm": 0.15914186835289001, + "learning_rate": 4.525729838505361e-06, + "loss": 0.9102, + "step": 65520 + }, + { + "epoch": 0.4743497868212846, + "grad_norm": 0.1690351516008377, + "learning_rate": 4.525657451844774e-06, + "loss": 0.9324, + "step": 65530 + }, + { + "epoch": 0.47442217348187077, + "grad_norm": 0.15182551741600037, + "learning_rate": 4.525585065184188e-06, + "loss": 0.9163, + "step": 65540 + }, + { + "epoch": 0.47449456014245694, + "grad_norm": 0.17388121783733368, + "learning_rate": 4.525512678523602e-06, + "loss": 0.9364, + "step": 65550 + }, + { + "epoch": 0.4745669468030431, + "grad_norm": 0.1595803201198578, + "learning_rate": 4.525440291863016e-06, + "loss": 0.934, + "step": 65560 + }, + { + "epoch": 0.4746393334636293, + "grad_norm": 0.15675079822540283, + "learning_rate": 4.52536790520243e-06, + "loss": 0.9422, + "step": 65570 + }, + { + "epoch": 0.47471172012421553, + "grad_norm": 0.37786662578582764, + "learning_rate": 4.525295518541843e-06, + "loss": 0.9165, + "step": 65580 + }, + { + "epoch": 0.4747841067848017, + "grad_norm": 0.16655652225017548, + "learning_rate": 4.525223131881257e-06, + "loss": 0.9209, + "step": 65590 + }, + { + "epoch": 0.4748564934453879, + "grad_norm": 0.18225440382957458, + "learning_rate": 4.525150745220671e-06, + "loss": 0.9251, + "step": 65600 + }, + { + "epoch": 0.47492888010597406, + "grad_norm": 0.17587362229824066, + "learning_rate": 4.525078358560085e-06, + "loss": 0.9156, + "step": 65610 + }, + { + "epoch": 0.47500126676656024, + "grad_norm": 0.1652805507183075, + "learning_rate": 4.525005971899499e-06, + "loss": 0.9384, + "step": 65620 + }, + { + "epoch": 0.47507365342714647, + "grad_norm": 0.19912905991077423, + "learning_rate": 4.524933585238912e-06, + "loss": 0.9288, + "step": 65630 + }, + { + "epoch": 0.47514604008773265, + "grad_norm": 0.16045509278774261, + "learning_rate": 4.524861198578327e-06, + "loss": 0.9382, + "step": 65640 + }, + { + "epoch": 0.4752184267483188, + "grad_norm": 0.15262702107429504, + "learning_rate": 4.52478881191774e-06, + "loss": 0.9139, + "step": 65650 + }, + { + "epoch": 0.475290813408905, + "grad_norm": 0.14551232755184174, + "learning_rate": 4.524716425257154e-06, + "loss": 0.9337, + "step": 65660 + }, + { + "epoch": 0.4753632000694912, + "grad_norm": 0.1558513045310974, + "learning_rate": 4.5246440385965676e-06, + "loss": 0.923, + "step": 65670 + }, + { + "epoch": 0.4754355867300774, + "grad_norm": 0.15776222944259644, + "learning_rate": 4.524571651935982e-06, + "loss": 0.9251, + "step": 65680 + }, + { + "epoch": 0.4755079733906636, + "grad_norm": 0.15665759146213531, + "learning_rate": 4.524499265275396e-06, + "loss": 0.9342, + "step": 65690 + }, + { + "epoch": 0.47558036005124976, + "grad_norm": 0.16208912432193756, + "learning_rate": 4.524426878614809e-06, + "loss": 0.9177, + "step": 65700 + }, + { + "epoch": 0.47565274671183594, + "grad_norm": 0.15631617605686188, + "learning_rate": 4.524354491954223e-06, + "loss": 0.9125, + "step": 65710 + }, + { + "epoch": 0.4757251333724221, + "grad_norm": 0.14883607625961304, + "learning_rate": 4.524282105293637e-06, + "loss": 0.9268, + "step": 65720 + }, + { + "epoch": 0.4757975200330083, + "grad_norm": 0.1741674840450287, + "learning_rate": 4.524209718633051e-06, + "loss": 0.9362, + "step": 65730 + }, + { + "epoch": 0.4758699066935945, + "grad_norm": 0.17127837240695953, + "learning_rate": 4.5241373319724646e-06, + "loss": 0.9356, + "step": 65740 + }, + { + "epoch": 0.4759422933541807, + "grad_norm": 0.15458397567272186, + "learning_rate": 4.524064945311878e-06, + "loss": 0.9194, + "step": 65750 + }, + { + "epoch": 0.4760146800147669, + "grad_norm": 0.17374902963638306, + "learning_rate": 4.523992558651293e-06, + "loss": 0.9327, + "step": 65760 + }, + { + "epoch": 0.47608706667535305, + "grad_norm": 0.16826695203781128, + "learning_rate": 4.523920171990706e-06, + "loss": 0.9353, + "step": 65770 + }, + { + "epoch": 0.47615945333593923, + "grad_norm": 0.17883165180683136, + "learning_rate": 4.52384778533012e-06, + "loss": 0.9363, + "step": 65780 + }, + { + "epoch": 0.47623183999652546, + "grad_norm": 0.15835782885551453, + "learning_rate": 4.5237753986695335e-06, + "loss": 0.9221, + "step": 65790 + }, + { + "epoch": 0.47630422665711164, + "grad_norm": 0.1708236187696457, + "learning_rate": 4.523703012008948e-06, + "loss": 0.932, + "step": 65800 + }, + { + "epoch": 0.4763766133176978, + "grad_norm": 0.15170232951641083, + "learning_rate": 4.5236306253483616e-06, + "loss": 0.9146, + "step": 65810 + }, + { + "epoch": 0.476448999978284, + "grad_norm": 0.23991255462169647, + "learning_rate": 4.523558238687775e-06, + "loss": 0.9293, + "step": 65820 + }, + { + "epoch": 0.47652138663887017, + "grad_norm": 0.15470875799655914, + "learning_rate": 4.523485852027189e-06, + "loss": 0.9262, + "step": 65830 + }, + { + "epoch": 0.4765937732994564, + "grad_norm": 0.1519736647605896, + "learning_rate": 4.523413465366603e-06, + "loss": 0.9267, + "step": 65840 + }, + { + "epoch": 0.4766661599600426, + "grad_norm": 0.15895114839076996, + "learning_rate": 4.523341078706017e-06, + "loss": 0.9305, + "step": 65850 + }, + { + "epoch": 0.47673854662062876, + "grad_norm": 0.21327605843544006, + "learning_rate": 4.5232686920454305e-06, + "loss": 0.9106, + "step": 65860 + }, + { + "epoch": 0.47681093328121493, + "grad_norm": 0.15450114011764526, + "learning_rate": 4.523196305384844e-06, + "loss": 0.9167, + "step": 65870 + }, + { + "epoch": 0.4768833199418011, + "grad_norm": 0.17385606467723846, + "learning_rate": 4.523123918724258e-06, + "loss": 0.9264, + "step": 65880 + }, + { + "epoch": 0.4769557066023873, + "grad_norm": 0.15806423127651215, + "learning_rate": 4.523051532063671e-06, + "loss": 0.9311, + "step": 65890 + }, + { + "epoch": 0.4770280932629735, + "grad_norm": 0.15671569108963013, + "learning_rate": 4.522979145403085e-06, + "loss": 0.9292, + "step": 65900 + }, + { + "epoch": 0.4771004799235597, + "grad_norm": 0.18585889041423798, + "learning_rate": 4.5229067587424994e-06, + "loss": 0.9216, + "step": 65910 + }, + { + "epoch": 0.47717286658414587, + "grad_norm": 0.1866776943206787, + "learning_rate": 4.522834372081913e-06, + "loss": 0.9221, + "step": 65920 + }, + { + "epoch": 0.47724525324473205, + "grad_norm": 0.1546396166086197, + "learning_rate": 4.522761985421327e-06, + "loss": 0.9335, + "step": 65930 + }, + { + "epoch": 0.4773176399053182, + "grad_norm": 0.16696631908416748, + "learning_rate": 4.52268959876074e-06, + "loss": 0.9369, + "step": 65940 + }, + { + "epoch": 0.47739002656590446, + "grad_norm": 0.1489332616329193, + "learning_rate": 4.522617212100155e-06, + "loss": 0.9173, + "step": 65950 + }, + { + "epoch": 0.47746241322649063, + "grad_norm": 0.16513670980930328, + "learning_rate": 4.522544825439568e-06, + "loss": 0.9357, + "step": 65960 + }, + { + "epoch": 0.4775347998870768, + "grad_norm": 0.16803127527236938, + "learning_rate": 4.522472438778982e-06, + "loss": 0.9284, + "step": 65970 + }, + { + "epoch": 0.477607186547663, + "grad_norm": 0.16002456843852997, + "learning_rate": 4.522400052118396e-06, + "loss": 0.9268, + "step": 65980 + }, + { + "epoch": 0.47767957320824916, + "grad_norm": 0.17356227338314056, + "learning_rate": 4.52232766545781e-06, + "loss": 0.9188, + "step": 65990 + }, + { + "epoch": 0.4777519598688354, + "grad_norm": 0.15143811702728271, + "learning_rate": 4.522255278797224e-06, + "loss": 0.925, + "step": 66000 + }, + { + "epoch": 0.4778243465294216, + "grad_norm": 0.16914960741996765, + "learning_rate": 4.522182892136637e-06, + "loss": 0.9254, + "step": 66010 + }, + { + "epoch": 0.47789673319000775, + "grad_norm": 0.18097072839736938, + "learning_rate": 4.522110505476051e-06, + "loss": 0.9262, + "step": 66020 + }, + { + "epoch": 0.4779691198505939, + "grad_norm": 0.15266379714012146, + "learning_rate": 4.522038118815465e-06, + "loss": 0.9339, + "step": 66030 + }, + { + "epoch": 0.4780415065111801, + "grad_norm": 0.16532346606254578, + "learning_rate": 4.521965732154879e-06, + "loss": 0.9308, + "step": 66040 + }, + { + "epoch": 0.4781138931717663, + "grad_norm": 0.15536561608314514, + "learning_rate": 4.521893345494293e-06, + "loss": 0.9382, + "step": 66050 + }, + { + "epoch": 0.4781862798323525, + "grad_norm": 0.17557422816753387, + "learning_rate": 4.521820958833706e-06, + "loss": 0.9254, + "step": 66060 + }, + { + "epoch": 0.4782586664929387, + "grad_norm": 0.1694338172674179, + "learning_rate": 4.52174857217312e-06, + "loss": 0.9163, + "step": 66070 + }, + { + "epoch": 0.47833105315352487, + "grad_norm": 0.1753464937210083, + "learning_rate": 4.521676185512534e-06, + "loss": 0.9185, + "step": 66080 + }, + { + "epoch": 0.47840343981411104, + "grad_norm": 0.17190249264240265, + "learning_rate": 4.521603798851948e-06, + "loss": 0.9321, + "step": 66090 + }, + { + "epoch": 0.4784758264746972, + "grad_norm": 0.19599749147891998, + "learning_rate": 4.5215314121913615e-06, + "loss": 0.9146, + "step": 66100 + }, + { + "epoch": 0.47854821313528345, + "grad_norm": 0.15936939418315887, + "learning_rate": 4.521459025530775e-06, + "loss": 0.9135, + "step": 66110 + }, + { + "epoch": 0.47862059979586963, + "grad_norm": 0.1538030058145523, + "learning_rate": 4.52138663887019e-06, + "loss": 0.9398, + "step": 66120 + }, + { + "epoch": 0.4786929864564558, + "grad_norm": 0.15321306884288788, + "learning_rate": 4.521314252209603e-06, + "loss": 0.9364, + "step": 66130 + }, + { + "epoch": 0.478765373117042, + "grad_norm": 0.17162488400936127, + "learning_rate": 4.521241865549017e-06, + "loss": 0.9273, + "step": 66140 + }, + { + "epoch": 0.47883775977762816, + "grad_norm": 0.14776068925857544, + "learning_rate": 4.5211694788884305e-06, + "loss": 0.9183, + "step": 66150 + }, + { + "epoch": 0.4789101464382144, + "grad_norm": 0.16864345967769623, + "learning_rate": 4.521097092227845e-06, + "loss": 0.9167, + "step": 66160 + }, + { + "epoch": 0.47898253309880057, + "grad_norm": 0.1763739287853241, + "learning_rate": 4.5210247055672585e-06, + "loss": 0.9372, + "step": 66170 + }, + { + "epoch": 0.47905491975938674, + "grad_norm": 0.1718112826347351, + "learning_rate": 4.520952318906672e-06, + "loss": 0.9237, + "step": 66180 + }, + { + "epoch": 0.4791273064199729, + "grad_norm": 0.1696150302886963, + "learning_rate": 4.520879932246086e-06, + "loss": 0.9025, + "step": 66190 + }, + { + "epoch": 0.4791996930805591, + "grad_norm": 0.15991909801959991, + "learning_rate": 4.5208075455855e-06, + "loss": 0.9252, + "step": 66200 + }, + { + "epoch": 0.47927207974114533, + "grad_norm": 0.15851566195487976, + "learning_rate": 4.520735158924914e-06, + "loss": 0.9346, + "step": 66210 + }, + { + "epoch": 0.4793444664017315, + "grad_norm": 0.1567084640264511, + "learning_rate": 4.5206627722643275e-06, + "loss": 0.9178, + "step": 66220 + }, + { + "epoch": 0.4794168530623177, + "grad_norm": 0.20427605509757996, + "learning_rate": 4.520590385603741e-06, + "loss": 0.9117, + "step": 66230 + }, + { + "epoch": 0.47948923972290386, + "grad_norm": 0.17024697363376617, + "learning_rate": 4.5205179989431555e-06, + "loss": 0.9149, + "step": 66240 + }, + { + "epoch": 0.47956162638349004, + "grad_norm": 0.15808874368667603, + "learning_rate": 4.520445612282569e-06, + "loss": 0.9303, + "step": 66250 + }, + { + "epoch": 0.4796340130440762, + "grad_norm": 0.15470923483371735, + "learning_rate": 4.520373225621983e-06, + "loss": 0.9306, + "step": 66260 + }, + { + "epoch": 0.47970639970466245, + "grad_norm": 0.15453216433525085, + "learning_rate": 4.520300838961396e-06, + "loss": 0.9206, + "step": 66270 + }, + { + "epoch": 0.4797787863652486, + "grad_norm": 0.15643690526485443, + "learning_rate": 4.520228452300811e-06, + "loss": 0.9228, + "step": 66280 + }, + { + "epoch": 0.4798511730258348, + "grad_norm": 0.16580794751644135, + "learning_rate": 4.5201560656402245e-06, + "loss": 0.9398, + "step": 66290 + }, + { + "epoch": 0.479923559686421, + "grad_norm": 0.15789580345153809, + "learning_rate": 4.520083678979638e-06, + "loss": 0.9263, + "step": 66300 + }, + { + "epoch": 0.47999594634700715, + "grad_norm": 0.1554155945777893, + "learning_rate": 4.520011292319052e-06, + "loss": 0.9264, + "step": 66310 + }, + { + "epoch": 0.4800683330075934, + "grad_norm": 0.15116317570209503, + "learning_rate": 4.519938905658466e-06, + "loss": 0.9056, + "step": 66320 + }, + { + "epoch": 0.48014071966817956, + "grad_norm": 0.1687956303358078, + "learning_rate": 4.51986651899788e-06, + "loss": 0.9263, + "step": 66330 + }, + { + "epoch": 0.48021310632876574, + "grad_norm": 0.16320586204528809, + "learning_rate": 4.519794132337293e-06, + "loss": 0.9258, + "step": 66340 + }, + { + "epoch": 0.4802854929893519, + "grad_norm": 0.16672076284885406, + "learning_rate": 4.519721745676707e-06, + "loss": 0.9144, + "step": 66350 + }, + { + "epoch": 0.4803578796499381, + "grad_norm": 0.1586775779724121, + "learning_rate": 4.5196493590161215e-06, + "loss": 0.9242, + "step": 66360 + }, + { + "epoch": 0.4804302663105243, + "grad_norm": 0.15324345231056213, + "learning_rate": 4.519576972355535e-06, + "loss": 0.9235, + "step": 66370 + }, + { + "epoch": 0.4805026529711105, + "grad_norm": 0.1635131537914276, + "learning_rate": 4.519504585694949e-06, + "loss": 0.9354, + "step": 66380 + }, + { + "epoch": 0.4805750396316967, + "grad_norm": 0.16541409492492676, + "learning_rate": 4.519432199034362e-06, + "loss": 0.9113, + "step": 66390 + }, + { + "epoch": 0.48064742629228285, + "grad_norm": 0.1589830070734024, + "learning_rate": 4.519359812373776e-06, + "loss": 0.9271, + "step": 66400 + }, + { + "epoch": 0.48071981295286903, + "grad_norm": 0.1620132029056549, + "learning_rate": 4.5192874257131896e-06, + "loss": 0.9318, + "step": 66410 + }, + { + "epoch": 0.4807921996134552, + "grad_norm": 0.1495799869298935, + "learning_rate": 4.519215039052603e-06, + "loss": 0.908, + "step": 66420 + }, + { + "epoch": 0.48086458627404144, + "grad_norm": 0.17354585230350494, + "learning_rate": 4.519142652392018e-06, + "loss": 0.9334, + "step": 66430 + }, + { + "epoch": 0.4809369729346276, + "grad_norm": 0.18685661256313324, + "learning_rate": 4.519070265731431e-06, + "loss": 0.9207, + "step": 66440 + }, + { + "epoch": 0.4810093595952138, + "grad_norm": 0.16093280911445618, + "learning_rate": 4.518997879070845e-06, + "loss": 0.9236, + "step": 66450 + }, + { + "epoch": 0.48108174625579997, + "grad_norm": 0.15578316152095795, + "learning_rate": 4.5189254924102585e-06, + "loss": 0.918, + "step": 66460 + }, + { + "epoch": 0.48115413291638615, + "grad_norm": 0.1621652990579605, + "learning_rate": 4.518853105749673e-06, + "loss": 0.9276, + "step": 66470 + }, + { + "epoch": 0.4812265195769724, + "grad_norm": 0.16943508386611938, + "learning_rate": 4.5187807190890866e-06, + "loss": 0.9465, + "step": 66480 + }, + { + "epoch": 0.48129890623755855, + "grad_norm": 0.17045237123966217, + "learning_rate": 4.5187083324285e-06, + "loss": 0.9317, + "step": 66490 + }, + { + "epoch": 0.48137129289814473, + "grad_norm": 0.16214360296726227, + "learning_rate": 4.518635945767914e-06, + "loss": 0.9179, + "step": 66500 + }, + { + "epoch": 0.4814436795587309, + "grad_norm": 0.15441229939460754, + "learning_rate": 4.518563559107328e-06, + "loss": 0.9354, + "step": 66510 + }, + { + "epoch": 0.4815160662193171, + "grad_norm": 0.14522579312324524, + "learning_rate": 4.518491172446742e-06, + "loss": 0.9209, + "step": 66520 + }, + { + "epoch": 0.4815884528799033, + "grad_norm": 0.15296971797943115, + "learning_rate": 4.5184187857861555e-06, + "loss": 0.9226, + "step": 66530 + }, + { + "epoch": 0.4816608395404895, + "grad_norm": 0.15403321385383606, + "learning_rate": 4.518346399125569e-06, + "loss": 0.9182, + "step": 66540 + }, + { + "epoch": 0.48173322620107567, + "grad_norm": 0.16520103812217712, + "learning_rate": 4.5182740124649836e-06, + "loss": 0.9209, + "step": 66550 + }, + { + "epoch": 0.48180561286166185, + "grad_norm": 0.16668029129505157, + "learning_rate": 4.518201625804397e-06, + "loss": 0.9384, + "step": 66560 + }, + { + "epoch": 0.481877999522248, + "grad_norm": 0.16062092781066895, + "learning_rate": 4.518129239143811e-06, + "loss": 0.9313, + "step": 66570 + }, + { + "epoch": 0.4819503861828342, + "grad_norm": 0.15920652449131012, + "learning_rate": 4.518056852483224e-06, + "loss": 0.9146, + "step": 66580 + }, + { + "epoch": 0.48202277284342043, + "grad_norm": 0.1484328955411911, + "learning_rate": 4.517984465822639e-06, + "loss": 0.9161, + "step": 66590 + }, + { + "epoch": 0.4820951595040066, + "grad_norm": 0.16604140400886536, + "learning_rate": 4.5179120791620525e-06, + "loss": 0.9102, + "step": 66600 + }, + { + "epoch": 0.4821675461645928, + "grad_norm": 0.16721735894680023, + "learning_rate": 4.517839692501466e-06, + "loss": 0.9208, + "step": 66610 + }, + { + "epoch": 0.48223993282517896, + "grad_norm": 0.17339125275611877, + "learning_rate": 4.51776730584088e-06, + "loss": 0.9244, + "step": 66620 + }, + { + "epoch": 0.48231231948576514, + "grad_norm": 0.1743367463350296, + "learning_rate": 4.517694919180294e-06, + "loss": 0.9294, + "step": 66630 + }, + { + "epoch": 0.48238470614635137, + "grad_norm": 0.1753169298171997, + "learning_rate": 4.517622532519708e-06, + "loss": 0.9206, + "step": 66640 + }, + { + "epoch": 0.48245709280693755, + "grad_norm": 0.16888897120952606, + "learning_rate": 4.5175501458591214e-06, + "loss": 0.9175, + "step": 66650 + }, + { + "epoch": 0.4825294794675237, + "grad_norm": 0.18841847777366638, + "learning_rate": 4.517477759198535e-06, + "loss": 0.9228, + "step": 66660 + }, + { + "epoch": 0.4826018661281099, + "grad_norm": 0.1644550859928131, + "learning_rate": 4.517405372537949e-06, + "loss": 0.9295, + "step": 66670 + }, + { + "epoch": 0.4826742527886961, + "grad_norm": 0.15950177609920502, + "learning_rate": 4.517332985877363e-06, + "loss": 0.9303, + "step": 66680 + }, + { + "epoch": 0.4827466394492823, + "grad_norm": 0.1527274250984192, + "learning_rate": 4.517260599216777e-06, + "loss": 0.924, + "step": 66690 + }, + { + "epoch": 0.4828190261098685, + "grad_norm": 0.1690022051334381, + "learning_rate": 4.51718821255619e-06, + "loss": 0.9212, + "step": 66700 + }, + { + "epoch": 0.48289141277045466, + "grad_norm": 0.17802709341049194, + "learning_rate": 4.517115825895604e-06, + "loss": 0.9246, + "step": 66710 + }, + { + "epoch": 0.48296379943104084, + "grad_norm": 0.15989567339420319, + "learning_rate": 4.5170434392350184e-06, + "loss": 0.9205, + "step": 66720 + }, + { + "epoch": 0.483036186091627, + "grad_norm": 0.16513442993164062, + "learning_rate": 4.516971052574432e-06, + "loss": 0.9294, + "step": 66730 + }, + { + "epoch": 0.4831085727522132, + "grad_norm": 0.157196044921875, + "learning_rate": 4.516898665913846e-06, + "loss": 0.9186, + "step": 66740 + }, + { + "epoch": 0.4831809594127994, + "grad_norm": 0.17779704928398132, + "learning_rate": 4.516826279253259e-06, + "loss": 0.9111, + "step": 66750 + }, + { + "epoch": 0.4832533460733856, + "grad_norm": 0.15791267156600952, + "learning_rate": 4.516753892592674e-06, + "loss": 0.938, + "step": 66760 + }, + { + "epoch": 0.4833257327339718, + "grad_norm": 0.16559448838233948, + "learning_rate": 4.516681505932087e-06, + "loss": 0.9158, + "step": 66770 + }, + { + "epoch": 0.48339811939455796, + "grad_norm": 0.1510489284992218, + "learning_rate": 4.516609119271501e-06, + "loss": 0.9308, + "step": 66780 + }, + { + "epoch": 0.48347050605514413, + "grad_norm": 0.16266123950481415, + "learning_rate": 4.516536732610915e-06, + "loss": 0.9223, + "step": 66790 + }, + { + "epoch": 0.48354289271573037, + "grad_norm": 0.16285644471645355, + "learning_rate": 4.516464345950329e-06, + "loss": 0.9035, + "step": 66800 + }, + { + "epoch": 0.48361527937631654, + "grad_norm": 0.16523510217666626, + "learning_rate": 4.516391959289743e-06, + "loss": 0.923, + "step": 66810 + }, + { + "epoch": 0.4836876660369027, + "grad_norm": 0.1816108375787735, + "learning_rate": 4.516319572629156e-06, + "loss": 0.9195, + "step": 66820 + }, + { + "epoch": 0.4837600526974889, + "grad_norm": 0.1596839874982834, + "learning_rate": 4.51624718596857e-06, + "loss": 0.932, + "step": 66830 + }, + { + "epoch": 0.4838324393580751, + "grad_norm": 0.16277308762073517, + "learning_rate": 4.516174799307984e-06, + "loss": 0.9247, + "step": 66840 + }, + { + "epoch": 0.4839048260186613, + "grad_norm": 0.1530946046113968, + "learning_rate": 4.516102412647398e-06, + "loss": 0.9213, + "step": 66850 + }, + { + "epoch": 0.4839772126792475, + "grad_norm": 0.16606640815734863, + "learning_rate": 4.516030025986812e-06, + "loss": 0.9209, + "step": 66860 + }, + { + "epoch": 0.48404959933983366, + "grad_norm": 0.180929034948349, + "learning_rate": 4.515957639326225e-06, + "loss": 0.9209, + "step": 66870 + }, + { + "epoch": 0.48412198600041983, + "grad_norm": 0.15386876463890076, + "learning_rate": 4.51588525266564e-06, + "loss": 0.9208, + "step": 66880 + }, + { + "epoch": 0.484194372661006, + "grad_norm": 0.17483779788017273, + "learning_rate": 4.515812866005053e-06, + "loss": 0.9147, + "step": 66890 + }, + { + "epoch": 0.48426675932159224, + "grad_norm": 0.17675867676734924, + "learning_rate": 4.515740479344467e-06, + "loss": 0.9216, + "step": 66900 + }, + { + "epoch": 0.4843391459821784, + "grad_norm": 0.16373442113399506, + "learning_rate": 4.5156680926838805e-06, + "loss": 0.9273, + "step": 66910 + }, + { + "epoch": 0.4844115326427646, + "grad_norm": 0.16552437841892242, + "learning_rate": 4.515595706023295e-06, + "loss": 0.9343, + "step": 66920 + }, + { + "epoch": 0.4844839193033508, + "grad_norm": 0.16545453667640686, + "learning_rate": 4.515523319362708e-06, + "loss": 0.9205, + "step": 66930 + }, + { + "epoch": 0.48455630596393695, + "grad_norm": 0.17034026980400085, + "learning_rate": 4.515450932702121e-06, + "loss": 0.9213, + "step": 66940 + }, + { + "epoch": 0.4846286926245231, + "grad_norm": 0.2646459639072418, + "learning_rate": 4.515378546041536e-06, + "loss": 0.9172, + "step": 66950 + }, + { + "epoch": 0.48470107928510936, + "grad_norm": 0.16209463775157928, + "learning_rate": 4.5153061593809495e-06, + "loss": 0.9155, + "step": 66960 + }, + { + "epoch": 0.48477346594569554, + "grad_norm": 0.1605486124753952, + "learning_rate": 4.515233772720363e-06, + "loss": 0.9242, + "step": 66970 + }, + { + "epoch": 0.4848458526062817, + "grad_norm": 0.16971814632415771, + "learning_rate": 4.515161386059777e-06, + "loss": 0.9143, + "step": 66980 + }, + { + "epoch": 0.4849182392668679, + "grad_norm": 0.17342694103717804, + "learning_rate": 4.515088999399191e-06, + "loss": 0.9302, + "step": 66990 + }, + { + "epoch": 0.48499062592745407, + "grad_norm": 0.15016865730285645, + "learning_rate": 4.515016612738605e-06, + "loss": 0.9206, + "step": 67000 + }, + { + "epoch": 0.4850630125880403, + "grad_norm": 0.15992531180381775, + "learning_rate": 4.514944226078018e-06, + "loss": 0.9153, + "step": 67010 + }, + { + "epoch": 0.4851353992486265, + "grad_norm": 0.1591133326292038, + "learning_rate": 4.514871839417432e-06, + "loss": 0.9185, + "step": 67020 + }, + { + "epoch": 0.48520778590921265, + "grad_norm": 0.16176247596740723, + "learning_rate": 4.5147994527568465e-06, + "loss": 0.9246, + "step": 67030 + }, + { + "epoch": 0.48528017256979883, + "grad_norm": 0.15282560884952545, + "learning_rate": 4.51472706609626e-06, + "loss": 0.9323, + "step": 67040 + }, + { + "epoch": 0.485352559230385, + "grad_norm": 0.1511177271604538, + "learning_rate": 4.514654679435674e-06, + "loss": 0.9358, + "step": 67050 + }, + { + "epoch": 0.48542494589097124, + "grad_norm": 0.15375123918056488, + "learning_rate": 4.514582292775087e-06, + "loss": 0.9322, + "step": 67060 + }, + { + "epoch": 0.4854973325515574, + "grad_norm": 0.21929968893527985, + "learning_rate": 4.514509906114502e-06, + "loss": 0.9129, + "step": 67070 + }, + { + "epoch": 0.4855697192121436, + "grad_norm": 0.16412512958049774, + "learning_rate": 4.514437519453915e-06, + "loss": 0.9264, + "step": 67080 + }, + { + "epoch": 0.48564210587272977, + "grad_norm": 0.15203996002674103, + "learning_rate": 4.514365132793329e-06, + "loss": 0.922, + "step": 67090 + }, + { + "epoch": 0.48571449253331594, + "grad_norm": 0.15428316593170166, + "learning_rate": 4.514292746132743e-06, + "loss": 0.9196, + "step": 67100 + }, + { + "epoch": 0.4857868791939021, + "grad_norm": 0.171985924243927, + "learning_rate": 4.514220359472157e-06, + "loss": 0.9349, + "step": 67110 + }, + { + "epoch": 0.48585926585448835, + "grad_norm": 0.16062787175178528, + "learning_rate": 4.514147972811571e-06, + "loss": 0.9108, + "step": 67120 + }, + { + "epoch": 0.48593165251507453, + "grad_norm": 0.15661896765232086, + "learning_rate": 4.514075586150984e-06, + "loss": 0.9236, + "step": 67130 + }, + { + "epoch": 0.4860040391756607, + "grad_norm": 0.16929559409618378, + "learning_rate": 4.514003199490398e-06, + "loss": 0.9322, + "step": 67140 + }, + { + "epoch": 0.4860764258362469, + "grad_norm": 0.1664290726184845, + "learning_rate": 4.513930812829812e-06, + "loss": 0.9166, + "step": 67150 + }, + { + "epoch": 0.48614881249683306, + "grad_norm": 0.17513130605220795, + "learning_rate": 4.513858426169226e-06, + "loss": 0.917, + "step": 67160 + }, + { + "epoch": 0.4862211991574193, + "grad_norm": 0.16142228245735168, + "learning_rate": 4.51378603950864e-06, + "loss": 0.9296, + "step": 67170 + }, + { + "epoch": 0.48629358581800547, + "grad_norm": 0.17332206666469574, + "learning_rate": 4.513713652848053e-06, + "loss": 0.9092, + "step": 67180 + }, + { + "epoch": 0.48636597247859165, + "grad_norm": 0.15262171626091003, + "learning_rate": 4.513641266187468e-06, + "loss": 0.935, + "step": 67190 + }, + { + "epoch": 0.4864383591391778, + "grad_norm": 0.1601051539182663, + "learning_rate": 4.513568879526881e-06, + "loss": 0.9203, + "step": 67200 + }, + { + "epoch": 0.486510745799764, + "grad_norm": 0.16139155626296997, + "learning_rate": 4.513496492866295e-06, + "loss": 0.9336, + "step": 67210 + }, + { + "epoch": 0.48658313246035023, + "grad_norm": 0.182013601064682, + "learning_rate": 4.5134241062057086e-06, + "loss": 0.9269, + "step": 67220 + }, + { + "epoch": 0.4866555191209364, + "grad_norm": 0.19671277701854706, + "learning_rate": 4.513351719545123e-06, + "loss": 0.924, + "step": 67230 + }, + { + "epoch": 0.4867279057815226, + "grad_norm": 0.17239131033420563, + "learning_rate": 4.513279332884537e-06, + "loss": 0.9346, + "step": 67240 + }, + { + "epoch": 0.48680029244210876, + "grad_norm": 0.1634131819009781, + "learning_rate": 4.51320694622395e-06, + "loss": 0.9198, + "step": 67250 + }, + { + "epoch": 0.48687267910269494, + "grad_norm": 0.1805330216884613, + "learning_rate": 4.513134559563364e-06, + "loss": 0.9201, + "step": 67260 + }, + { + "epoch": 0.4869450657632811, + "grad_norm": 0.15409274399280548, + "learning_rate": 4.513062172902778e-06, + "loss": 0.9268, + "step": 67270 + }, + { + "epoch": 0.48701745242386735, + "grad_norm": 0.14754238724708557, + "learning_rate": 4.512989786242192e-06, + "loss": 0.92, + "step": 67280 + }, + { + "epoch": 0.4870898390844535, + "grad_norm": 0.14895077049732208, + "learning_rate": 4.5129173995816056e-06, + "loss": 0.926, + "step": 67290 + }, + { + "epoch": 0.4871622257450397, + "grad_norm": 0.16120226681232452, + "learning_rate": 4.512845012921019e-06, + "loss": 0.9242, + "step": 67300 + }, + { + "epoch": 0.4872346124056259, + "grad_norm": 0.16055871546268463, + "learning_rate": 4.512772626260433e-06, + "loss": 0.9222, + "step": 67310 + }, + { + "epoch": 0.48730699906621205, + "grad_norm": 0.16574375331401825, + "learning_rate": 4.512700239599847e-06, + "loss": 0.9205, + "step": 67320 + }, + { + "epoch": 0.4873793857267983, + "grad_norm": 0.15389981865882874, + "learning_rate": 4.512627852939261e-06, + "loss": 0.9151, + "step": 67330 + }, + { + "epoch": 0.48745177238738446, + "grad_norm": 0.15907318890094757, + "learning_rate": 4.5125554662786745e-06, + "loss": 0.9225, + "step": 67340 + }, + { + "epoch": 0.48752415904797064, + "grad_norm": 0.16848038136959076, + "learning_rate": 4.512483079618088e-06, + "loss": 0.9333, + "step": 67350 + }, + { + "epoch": 0.4875965457085568, + "grad_norm": 0.15980379283428192, + "learning_rate": 4.512410692957503e-06, + "loss": 0.9227, + "step": 67360 + }, + { + "epoch": 0.487668932369143, + "grad_norm": 0.15230245888233185, + "learning_rate": 4.512338306296916e-06, + "loss": 0.9143, + "step": 67370 + }, + { + "epoch": 0.4877413190297292, + "grad_norm": 0.16187123954296112, + "learning_rate": 4.51226591963633e-06, + "loss": 0.9224, + "step": 67380 + }, + { + "epoch": 0.4878137056903154, + "grad_norm": 0.16506314277648926, + "learning_rate": 4.5121935329757434e-06, + "loss": 0.9233, + "step": 67390 + }, + { + "epoch": 0.4878860923509016, + "grad_norm": 0.21859927475452423, + "learning_rate": 4.512121146315158e-06, + "loss": 0.9181, + "step": 67400 + }, + { + "epoch": 0.48795847901148776, + "grad_norm": 0.16471314430236816, + "learning_rate": 4.5120487596545715e-06, + "loss": 0.9317, + "step": 67410 + }, + { + "epoch": 0.48803086567207393, + "grad_norm": 0.16795067489147186, + "learning_rate": 4.511976372993985e-06, + "loss": 0.9162, + "step": 67420 + }, + { + "epoch": 0.48810325233266016, + "grad_norm": 0.20342256128787994, + "learning_rate": 4.511903986333399e-06, + "loss": 0.9284, + "step": 67430 + }, + { + "epoch": 0.48817563899324634, + "grad_norm": 0.31586503982543945, + "learning_rate": 4.511831599672813e-06, + "loss": 0.9252, + "step": 67440 + }, + { + "epoch": 0.4882480256538325, + "grad_norm": 0.18031372129917145, + "learning_rate": 4.511759213012227e-06, + "loss": 0.9218, + "step": 67450 + }, + { + "epoch": 0.4883204123144187, + "grad_norm": 0.15259408950805664, + "learning_rate": 4.5116868263516404e-06, + "loss": 0.9252, + "step": 67460 + }, + { + "epoch": 0.48839279897500487, + "grad_norm": 0.15039771795272827, + "learning_rate": 4.511614439691054e-06, + "loss": 0.9228, + "step": 67470 + }, + { + "epoch": 0.48846518563559105, + "grad_norm": 0.15142498910427094, + "learning_rate": 4.511542053030468e-06, + "loss": 0.8998, + "step": 67480 + }, + { + "epoch": 0.4885375722961773, + "grad_norm": 0.15406163036823273, + "learning_rate": 4.511469666369881e-06, + "loss": 0.9219, + "step": 67490 + }, + { + "epoch": 0.48860995895676346, + "grad_norm": 0.16428889334201813, + "learning_rate": 4.511397279709295e-06, + "loss": 0.9298, + "step": 67500 + }, + { + "epoch": 0.48868234561734963, + "grad_norm": 0.15274979174137115, + "learning_rate": 4.511324893048709e-06, + "loss": 0.9374, + "step": 67510 + }, + { + "epoch": 0.4887547322779358, + "grad_norm": 0.15351366996765137, + "learning_rate": 4.511252506388123e-06, + "loss": 0.9262, + "step": 67520 + }, + { + "epoch": 0.488827118938522, + "grad_norm": 0.18971464037895203, + "learning_rate": 4.511180119727537e-06, + "loss": 0.9329, + "step": 67530 + }, + { + "epoch": 0.4888995055991082, + "grad_norm": 0.15166032314300537, + "learning_rate": 4.51110773306695e-06, + "loss": 0.9232, + "step": 67540 + }, + { + "epoch": 0.4889718922596944, + "grad_norm": 0.15017013251781464, + "learning_rate": 4.511035346406365e-06, + "loss": 0.9279, + "step": 67550 + }, + { + "epoch": 0.4890442789202806, + "grad_norm": 0.16144691407680511, + "learning_rate": 4.510962959745778e-06, + "loss": 0.9267, + "step": 67560 + }, + { + "epoch": 0.48911666558086675, + "grad_norm": 0.1623421162366867, + "learning_rate": 4.510890573085192e-06, + "loss": 0.9219, + "step": 67570 + }, + { + "epoch": 0.4891890522414529, + "grad_norm": 0.15922850370407104, + "learning_rate": 4.5108181864246055e-06, + "loss": 0.919, + "step": 67580 + }, + { + "epoch": 0.48926143890203916, + "grad_norm": 0.16896429657936096, + "learning_rate": 4.51074579976402e-06, + "loss": 0.9263, + "step": 67590 + }, + { + "epoch": 0.48933382556262534, + "grad_norm": 0.1584937423467636, + "learning_rate": 4.510673413103434e-06, + "loss": 0.93, + "step": 67600 + }, + { + "epoch": 0.4894062122232115, + "grad_norm": 0.15415899455547333, + "learning_rate": 4.510601026442847e-06, + "loss": 0.9178, + "step": 67610 + }, + { + "epoch": 0.4894785988837977, + "grad_norm": 0.15234239399433136, + "learning_rate": 4.510528639782261e-06, + "loss": 0.9174, + "step": 67620 + }, + { + "epoch": 0.48955098554438387, + "grad_norm": 0.1630704402923584, + "learning_rate": 4.510456253121675e-06, + "loss": 0.926, + "step": 67630 + }, + { + "epoch": 0.48962337220497004, + "grad_norm": 0.15613876283168793, + "learning_rate": 4.510383866461089e-06, + "loss": 0.9233, + "step": 67640 + }, + { + "epoch": 0.4896957588655563, + "grad_norm": 0.16620004177093506, + "learning_rate": 4.5103114798005025e-06, + "loss": 0.9264, + "step": 67650 + }, + { + "epoch": 0.48976814552614245, + "grad_norm": 0.15522471070289612, + "learning_rate": 4.510239093139916e-06, + "loss": 0.923, + "step": 67660 + }, + { + "epoch": 0.4898405321867286, + "grad_norm": 0.15571705996990204, + "learning_rate": 4.510166706479331e-06, + "loss": 0.927, + "step": 67670 + }, + { + "epoch": 0.4899129188473148, + "grad_norm": 0.15183138847351074, + "learning_rate": 4.510094319818744e-06, + "loss": 0.9162, + "step": 67680 + }, + { + "epoch": 0.489985305507901, + "grad_norm": 0.1504392772912979, + "learning_rate": 4.510021933158158e-06, + "loss": 0.9206, + "step": 67690 + }, + { + "epoch": 0.4900576921684872, + "grad_norm": 0.2166612446308136, + "learning_rate": 4.5099495464975715e-06, + "loss": 0.9206, + "step": 67700 + }, + { + "epoch": 0.4901300788290734, + "grad_norm": 0.19607232511043549, + "learning_rate": 4.509877159836986e-06, + "loss": 0.9225, + "step": 67710 + }, + { + "epoch": 0.49020246548965957, + "grad_norm": 0.1525793820619583, + "learning_rate": 4.5098047731763995e-06, + "loss": 0.9161, + "step": 67720 + }, + { + "epoch": 0.49027485215024574, + "grad_norm": 0.16443735361099243, + "learning_rate": 4.509732386515813e-06, + "loss": 0.9173, + "step": 67730 + }, + { + "epoch": 0.4903472388108319, + "grad_norm": 0.17903602123260498, + "learning_rate": 4.509659999855227e-06, + "loss": 0.9225, + "step": 67740 + }, + { + "epoch": 0.49041962547141815, + "grad_norm": 0.1686403751373291, + "learning_rate": 4.509587613194641e-06, + "loss": 0.9224, + "step": 67750 + }, + { + "epoch": 0.49049201213200433, + "grad_norm": 0.36816513538360596, + "learning_rate": 4.509515226534055e-06, + "loss": 0.9186, + "step": 67760 + }, + { + "epoch": 0.4905643987925905, + "grad_norm": 0.15775349736213684, + "learning_rate": 4.5094428398734685e-06, + "loss": 0.9361, + "step": 67770 + }, + { + "epoch": 0.4906367854531767, + "grad_norm": 0.17417992651462555, + "learning_rate": 4.509370453212882e-06, + "loss": 0.9245, + "step": 67780 + }, + { + "epoch": 0.49070917211376286, + "grad_norm": 0.1487663984298706, + "learning_rate": 4.5092980665522965e-06, + "loss": 0.9302, + "step": 67790 + }, + { + "epoch": 0.49078155877434904, + "grad_norm": 0.1552221029996872, + "learning_rate": 4.50922567989171e-06, + "loss": 0.926, + "step": 67800 + }, + { + "epoch": 0.49085394543493527, + "grad_norm": 0.16190670430660248, + "learning_rate": 4.509153293231124e-06, + "loss": 0.9284, + "step": 67810 + }, + { + "epoch": 0.49092633209552144, + "grad_norm": 0.17371685802936554, + "learning_rate": 4.509080906570537e-06, + "loss": 0.9413, + "step": 67820 + }, + { + "epoch": 0.4909987187561076, + "grad_norm": 0.1726863831281662, + "learning_rate": 4.509008519909952e-06, + "loss": 0.9315, + "step": 67830 + }, + { + "epoch": 0.4910711054166938, + "grad_norm": 0.15722721815109253, + "learning_rate": 4.5089361332493655e-06, + "loss": 0.9261, + "step": 67840 + }, + { + "epoch": 0.49114349207728, + "grad_norm": 0.15430855751037598, + "learning_rate": 4.508863746588779e-06, + "loss": 0.9288, + "step": 67850 + }, + { + "epoch": 0.4912158787378662, + "grad_norm": 0.1556122750043869, + "learning_rate": 4.508791359928193e-06, + "loss": 0.9227, + "step": 67860 + }, + { + "epoch": 0.4912882653984524, + "grad_norm": 0.1647377461194992, + "learning_rate": 4.508718973267607e-06, + "loss": 0.9096, + "step": 67870 + }, + { + "epoch": 0.49136065205903856, + "grad_norm": 0.1618121862411499, + "learning_rate": 4.508646586607021e-06, + "loss": 0.9199, + "step": 67880 + }, + { + "epoch": 0.49143303871962474, + "grad_norm": 0.16955524682998657, + "learning_rate": 4.508574199946434e-06, + "loss": 0.9285, + "step": 67890 + }, + { + "epoch": 0.4915054253802109, + "grad_norm": 0.15921622514724731, + "learning_rate": 4.508501813285848e-06, + "loss": 0.9237, + "step": 67900 + }, + { + "epoch": 0.49157781204079715, + "grad_norm": 0.17753024399280548, + "learning_rate": 4.5084294266252625e-06, + "loss": 0.9264, + "step": 67910 + }, + { + "epoch": 0.4916501987013833, + "grad_norm": 0.15799757838249207, + "learning_rate": 4.508357039964676e-06, + "loss": 0.914, + "step": 67920 + }, + { + "epoch": 0.4917225853619695, + "grad_norm": 0.17389525473117828, + "learning_rate": 4.50828465330409e-06, + "loss": 0.935, + "step": 67930 + }, + { + "epoch": 0.4917949720225557, + "grad_norm": 0.15797245502471924, + "learning_rate": 4.508212266643503e-06, + "loss": 0.9193, + "step": 67940 + }, + { + "epoch": 0.49186735868314185, + "grad_norm": 0.1706414818763733, + "learning_rate": 4.508139879982917e-06, + "loss": 0.9221, + "step": 67950 + }, + { + "epoch": 0.4919397453437281, + "grad_norm": 0.17515702545642853, + "learning_rate": 4.508067493322331e-06, + "loss": 0.924, + "step": 67960 + }, + { + "epoch": 0.49201213200431426, + "grad_norm": 0.16736344993114471, + "learning_rate": 4.507995106661745e-06, + "loss": 0.9147, + "step": 67970 + }, + { + "epoch": 0.49208451866490044, + "grad_norm": 0.24362386763095856, + "learning_rate": 4.507922720001159e-06, + "loss": 0.9217, + "step": 67980 + }, + { + "epoch": 0.4921569053254866, + "grad_norm": 0.1629912108182907, + "learning_rate": 4.507850333340572e-06, + "loss": 0.921, + "step": 67990 + }, + { + "epoch": 0.4922292919860728, + "grad_norm": 0.16399963200092316, + "learning_rate": 4.507777946679986e-06, + "loss": 0.9234, + "step": 68000 + }, + { + "epoch": 0.49230167864665897, + "grad_norm": 0.15990719199180603, + "learning_rate": 4.5077055600193995e-06, + "loss": 0.9207, + "step": 68010 + }, + { + "epoch": 0.4923740653072452, + "grad_norm": 0.1633741706609726, + "learning_rate": 4.507633173358814e-06, + "loss": 0.9153, + "step": 68020 + }, + { + "epoch": 0.4924464519678314, + "grad_norm": 0.15352605283260345, + "learning_rate": 4.5075607866982276e-06, + "loss": 0.9147, + "step": 68030 + }, + { + "epoch": 0.49251883862841755, + "grad_norm": 0.15996570885181427, + "learning_rate": 4.507488400037641e-06, + "loss": 0.9193, + "step": 68040 + }, + { + "epoch": 0.49259122528900373, + "grad_norm": 0.15362408757209778, + "learning_rate": 4.507416013377055e-06, + "loss": 0.9308, + "step": 68050 + }, + { + "epoch": 0.4926636119495899, + "grad_norm": 0.1646050661802292, + "learning_rate": 4.507343626716469e-06, + "loss": 0.9204, + "step": 68060 + }, + { + "epoch": 0.49273599861017614, + "grad_norm": 0.1549074798822403, + "learning_rate": 4.507271240055883e-06, + "loss": 0.9312, + "step": 68070 + }, + { + "epoch": 0.4928083852707623, + "grad_norm": 0.16783104836940765, + "learning_rate": 4.5071988533952965e-06, + "loss": 0.9225, + "step": 68080 + }, + { + "epoch": 0.4928807719313485, + "grad_norm": 0.16458235681056976, + "learning_rate": 4.50712646673471e-06, + "loss": 0.936, + "step": 68090 + }, + { + "epoch": 0.49295315859193467, + "grad_norm": 0.16786985099315643, + "learning_rate": 4.507054080074124e-06, + "loss": 0.9155, + "step": 68100 + }, + { + "epoch": 0.49302554525252085, + "grad_norm": 0.16184143722057343, + "learning_rate": 4.506981693413538e-06, + "loss": 0.9124, + "step": 68110 + }, + { + "epoch": 0.4930979319131071, + "grad_norm": 0.1618042141199112, + "learning_rate": 4.506909306752952e-06, + "loss": 0.9182, + "step": 68120 + }, + { + "epoch": 0.49317031857369326, + "grad_norm": 0.47443485260009766, + "learning_rate": 4.5068369200923654e-06, + "loss": 0.9171, + "step": 68130 + }, + { + "epoch": 0.49324270523427943, + "grad_norm": 0.16205495595932007, + "learning_rate": 4.506764533431779e-06, + "loss": 0.9191, + "step": 68140 + }, + { + "epoch": 0.4933150918948656, + "grad_norm": 0.15748542547225952, + "learning_rate": 4.5066921467711935e-06, + "loss": 0.929, + "step": 68150 + }, + { + "epoch": 0.4933874785554518, + "grad_norm": 0.15539519488811493, + "learning_rate": 4.506619760110607e-06, + "loss": 0.9244, + "step": 68160 + }, + { + "epoch": 0.49345986521603796, + "grad_norm": 0.15781527757644653, + "learning_rate": 4.506547373450021e-06, + "loss": 0.9215, + "step": 68170 + }, + { + "epoch": 0.4935322518766242, + "grad_norm": 0.15772010385990143, + "learning_rate": 4.506474986789434e-06, + "loss": 0.9173, + "step": 68180 + }, + { + "epoch": 0.49360463853721037, + "grad_norm": 0.15966345369815826, + "learning_rate": 4.506402600128849e-06, + "loss": 0.9222, + "step": 68190 + }, + { + "epoch": 0.49367702519779655, + "grad_norm": 0.15698853135108948, + "learning_rate": 4.5063302134682624e-06, + "loss": 0.9244, + "step": 68200 + }, + { + "epoch": 0.4937494118583827, + "grad_norm": 0.16982385516166687, + "learning_rate": 4.506257826807676e-06, + "loss": 0.9258, + "step": 68210 + }, + { + "epoch": 0.4938217985189689, + "grad_norm": 0.19245341420173645, + "learning_rate": 4.50618544014709e-06, + "loss": 0.9141, + "step": 68220 + }, + { + "epoch": 0.49389418517955513, + "grad_norm": 0.14945447444915771, + "learning_rate": 4.506113053486504e-06, + "loss": 0.9339, + "step": 68230 + }, + { + "epoch": 0.4939665718401413, + "grad_norm": 0.16145005822181702, + "learning_rate": 4.506040666825918e-06, + "loss": 0.9186, + "step": 68240 + }, + { + "epoch": 0.4940389585007275, + "grad_norm": 0.15670140087604523, + "learning_rate": 4.505968280165331e-06, + "loss": 0.9277, + "step": 68250 + }, + { + "epoch": 0.49411134516131366, + "grad_norm": 0.4954744577407837, + "learning_rate": 4.505895893504745e-06, + "loss": 0.9307, + "step": 68260 + }, + { + "epoch": 0.49418373182189984, + "grad_norm": 0.15574707090854645, + "learning_rate": 4.5058235068441594e-06, + "loss": 0.9254, + "step": 68270 + }, + { + "epoch": 0.4942561184824861, + "grad_norm": 0.1754721999168396, + "learning_rate": 4.505751120183573e-06, + "loss": 0.9294, + "step": 68280 + }, + { + "epoch": 0.49432850514307225, + "grad_norm": 0.18038561940193176, + "learning_rate": 4.505678733522987e-06, + "loss": 0.9175, + "step": 68290 + }, + { + "epoch": 0.4944008918036584, + "grad_norm": 0.16185778379440308, + "learning_rate": 4.5056063468624e-06, + "loss": 0.9344, + "step": 68300 + }, + { + "epoch": 0.4944732784642446, + "grad_norm": 0.1598776876926422, + "learning_rate": 4.505533960201815e-06, + "loss": 0.9228, + "step": 68310 + }, + { + "epoch": 0.4945456651248308, + "grad_norm": 0.4865586459636688, + "learning_rate": 4.505461573541228e-06, + "loss": 0.9308, + "step": 68320 + }, + { + "epoch": 0.49461805178541696, + "grad_norm": 0.1811351776123047, + "learning_rate": 4.505389186880642e-06, + "loss": 0.9199, + "step": 68330 + }, + { + "epoch": 0.4946904384460032, + "grad_norm": 0.15315648913383484, + "learning_rate": 4.505316800220056e-06, + "loss": 0.9264, + "step": 68340 + }, + { + "epoch": 0.49476282510658937, + "grad_norm": 0.1656007021665573, + "learning_rate": 4.50524441355947e-06, + "loss": 0.9213, + "step": 68350 + }, + { + "epoch": 0.49483521176717554, + "grad_norm": 0.17960675060749054, + "learning_rate": 4.505172026898884e-06, + "loss": 0.9258, + "step": 68360 + }, + { + "epoch": 0.4949075984277617, + "grad_norm": 0.18778973817825317, + "learning_rate": 4.505099640238297e-06, + "loss": 0.9233, + "step": 68370 + }, + { + "epoch": 0.4949799850883479, + "grad_norm": 0.1481725573539734, + "learning_rate": 4.505027253577711e-06, + "loss": 0.9133, + "step": 68380 + }, + { + "epoch": 0.49505237174893413, + "grad_norm": 0.171866312623024, + "learning_rate": 4.504954866917125e-06, + "loss": 0.9296, + "step": 68390 + }, + { + "epoch": 0.4951247584095203, + "grad_norm": 0.16532866656780243, + "learning_rate": 4.504882480256539e-06, + "loss": 0.8981, + "step": 68400 + }, + { + "epoch": 0.4951971450701065, + "grad_norm": 0.1649603694677353, + "learning_rate": 4.504810093595953e-06, + "loss": 0.9199, + "step": 68410 + }, + { + "epoch": 0.49526953173069266, + "grad_norm": 0.1862333118915558, + "learning_rate": 4.504737706935366e-06, + "loss": 0.9284, + "step": 68420 + }, + { + "epoch": 0.49534191839127883, + "grad_norm": 0.17143741250038147, + "learning_rate": 4.504665320274781e-06, + "loss": 0.914, + "step": 68430 + }, + { + "epoch": 0.49541430505186507, + "grad_norm": 0.16334125399589539, + "learning_rate": 4.504592933614194e-06, + "loss": 0.9182, + "step": 68440 + }, + { + "epoch": 0.49548669171245124, + "grad_norm": 0.18255673348903656, + "learning_rate": 4.504520546953608e-06, + "loss": 0.9099, + "step": 68450 + }, + { + "epoch": 0.4955590783730374, + "grad_norm": 0.16726234555244446, + "learning_rate": 4.5044481602930215e-06, + "loss": 0.9263, + "step": 68460 + }, + { + "epoch": 0.4956314650336236, + "grad_norm": 0.16593337059020996, + "learning_rate": 4.504375773632436e-06, + "loss": 0.9165, + "step": 68470 + }, + { + "epoch": 0.4957038516942098, + "grad_norm": 0.173760324716568, + "learning_rate": 4.50430338697185e-06, + "loss": 0.9228, + "step": 68480 + }, + { + "epoch": 0.49577623835479595, + "grad_norm": 0.1431112438440323, + "learning_rate": 4.504231000311263e-06, + "loss": 0.9308, + "step": 68490 + }, + { + "epoch": 0.4958486250153822, + "grad_norm": 0.22450746595859528, + "learning_rate": 4.504158613650677e-06, + "loss": 0.9277, + "step": 68500 + }, + { + "epoch": 0.49592101167596836, + "grad_norm": 0.16971862316131592, + "learning_rate": 4.504086226990091e-06, + "loss": 0.924, + "step": 68510 + }, + { + "epoch": 0.49599339833655454, + "grad_norm": 0.15365462005138397, + "learning_rate": 4.504013840329504e-06, + "loss": 0.9172, + "step": 68520 + }, + { + "epoch": 0.4960657849971407, + "grad_norm": 0.17213942110538483, + "learning_rate": 4.503941453668918e-06, + "loss": 0.912, + "step": 68530 + }, + { + "epoch": 0.4961381716577269, + "grad_norm": 0.16275885701179504, + "learning_rate": 4.503869067008332e-06, + "loss": 0.9113, + "step": 68540 + }, + { + "epoch": 0.4962105583183131, + "grad_norm": 0.1540212631225586, + "learning_rate": 4.503796680347746e-06, + "loss": 0.9075, + "step": 68550 + }, + { + "epoch": 0.4962829449788993, + "grad_norm": 0.15798866748809814, + "learning_rate": 4.503724293687159e-06, + "loss": 0.9271, + "step": 68560 + }, + { + "epoch": 0.4963553316394855, + "grad_norm": 0.1525816023349762, + "learning_rate": 4.503651907026573e-06, + "loss": 0.913, + "step": 68570 + }, + { + "epoch": 0.49642771830007165, + "grad_norm": 0.1655115783214569, + "learning_rate": 4.5035795203659875e-06, + "loss": 0.9094, + "step": 68580 + }, + { + "epoch": 0.49650010496065783, + "grad_norm": 0.16454185545444489, + "learning_rate": 4.503507133705401e-06, + "loss": 0.9232, + "step": 68590 + }, + { + "epoch": 0.49657249162124406, + "grad_norm": 0.15450453758239746, + "learning_rate": 4.503434747044815e-06, + "loss": 0.9256, + "step": 68600 + }, + { + "epoch": 0.49664487828183024, + "grad_norm": 0.15737608075141907, + "learning_rate": 4.503362360384228e-06, + "loss": 0.913, + "step": 68610 + }, + { + "epoch": 0.4967172649424164, + "grad_norm": 0.16449777781963348, + "learning_rate": 4.503289973723643e-06, + "loss": 0.9236, + "step": 68620 + }, + { + "epoch": 0.4967896516030026, + "grad_norm": 0.15372395515441895, + "learning_rate": 4.503217587063056e-06, + "loss": 0.9174, + "step": 68630 + }, + { + "epoch": 0.49686203826358877, + "grad_norm": 0.1783866584300995, + "learning_rate": 4.50314520040247e-06, + "loss": 0.9295, + "step": 68640 + }, + { + "epoch": 0.496934424924175, + "grad_norm": 0.1594487428665161, + "learning_rate": 4.503072813741884e-06, + "loss": 0.9224, + "step": 68650 + }, + { + "epoch": 0.4970068115847612, + "grad_norm": 0.16570931673049927, + "learning_rate": 4.503000427081298e-06, + "loss": 0.9139, + "step": 68660 + }, + { + "epoch": 0.49707919824534735, + "grad_norm": 0.16301476955413818, + "learning_rate": 4.502928040420712e-06, + "loss": 0.9185, + "step": 68670 + }, + { + "epoch": 0.49715158490593353, + "grad_norm": 0.17501094937324524, + "learning_rate": 4.502855653760125e-06, + "loss": 0.9253, + "step": 68680 + }, + { + "epoch": 0.4972239715665197, + "grad_norm": 0.1674194633960724, + "learning_rate": 4.502783267099539e-06, + "loss": 0.9193, + "step": 68690 + }, + { + "epoch": 0.4972963582271059, + "grad_norm": 0.16636104881763458, + "learning_rate": 4.502710880438953e-06, + "loss": 0.9098, + "step": 68700 + }, + { + "epoch": 0.4973687448876921, + "grad_norm": 0.16473974287509918, + "learning_rate": 4.502638493778367e-06, + "loss": 0.9239, + "step": 68710 + }, + { + "epoch": 0.4974411315482783, + "grad_norm": 0.17471805214881897, + "learning_rate": 4.502566107117781e-06, + "loss": 0.9208, + "step": 68720 + }, + { + "epoch": 0.49751351820886447, + "grad_norm": 0.16630442440509796, + "learning_rate": 4.502493720457194e-06, + "loss": 0.9233, + "step": 68730 + }, + { + "epoch": 0.49758590486945065, + "grad_norm": 0.14597173035144806, + "learning_rate": 4.502421333796608e-06, + "loss": 0.916, + "step": 68740 + }, + { + "epoch": 0.4976582915300368, + "grad_norm": 0.1661393642425537, + "learning_rate": 4.502348947136022e-06, + "loss": 0.9205, + "step": 68750 + }, + { + "epoch": 0.49773067819062305, + "grad_norm": 0.15321460366249084, + "learning_rate": 4.502276560475436e-06, + "loss": 0.9276, + "step": 68760 + }, + { + "epoch": 0.49780306485120923, + "grad_norm": 0.15991902351379395, + "learning_rate": 4.5022041738148496e-06, + "loss": 0.9238, + "step": 68770 + }, + { + "epoch": 0.4978754515117954, + "grad_norm": 0.17004790902137756, + "learning_rate": 4.502131787154263e-06, + "loss": 0.9171, + "step": 68780 + }, + { + "epoch": 0.4979478381723816, + "grad_norm": 0.17030557990074158, + "learning_rate": 4.502059400493678e-06, + "loss": 0.9158, + "step": 68790 + }, + { + "epoch": 0.49802022483296776, + "grad_norm": 0.16426852345466614, + "learning_rate": 4.501987013833091e-06, + "loss": 0.927, + "step": 68800 + }, + { + "epoch": 0.498092611493554, + "grad_norm": 0.14977115392684937, + "learning_rate": 4.501914627172505e-06, + "loss": 0.9155, + "step": 68810 + }, + { + "epoch": 0.49816499815414017, + "grad_norm": 0.16252385079860687, + "learning_rate": 4.5018422405119185e-06, + "loss": 0.9082, + "step": 68820 + }, + { + "epoch": 0.49823738481472635, + "grad_norm": 0.15293535590171814, + "learning_rate": 4.501769853851333e-06, + "loss": 0.9093, + "step": 68830 + }, + { + "epoch": 0.4983097714753125, + "grad_norm": 0.14948716759681702, + "learning_rate": 4.501697467190747e-06, + "loss": 0.9099, + "step": 68840 + }, + { + "epoch": 0.4983821581358987, + "grad_norm": 0.14924530684947968, + "learning_rate": 4.50162508053016e-06, + "loss": 0.9288, + "step": 68850 + }, + { + "epoch": 0.4984545447964849, + "grad_norm": 0.16742658615112305, + "learning_rate": 4.501552693869574e-06, + "loss": 0.9359, + "step": 68860 + }, + { + "epoch": 0.4985269314570711, + "grad_norm": 0.1566425859928131, + "learning_rate": 4.501480307208988e-06, + "loss": 0.9298, + "step": 68870 + }, + { + "epoch": 0.4985993181176573, + "grad_norm": 0.15507277846336365, + "learning_rate": 4.501407920548402e-06, + "loss": 0.9209, + "step": 68880 + }, + { + "epoch": 0.49867170477824346, + "grad_norm": 0.2236766368150711, + "learning_rate": 4.5013355338878155e-06, + "loss": 0.9195, + "step": 68890 + }, + { + "epoch": 0.49874409143882964, + "grad_norm": 0.14580419659614563, + "learning_rate": 4.501263147227229e-06, + "loss": 0.9099, + "step": 68900 + }, + { + "epoch": 0.4988164780994158, + "grad_norm": 0.1595289558172226, + "learning_rate": 4.501190760566644e-06, + "loss": 0.9197, + "step": 68910 + }, + { + "epoch": 0.49888886476000205, + "grad_norm": 0.15660600364208221, + "learning_rate": 4.501118373906057e-06, + "loss": 0.9246, + "step": 68920 + }, + { + "epoch": 0.4989612514205882, + "grad_norm": 0.1797994077205658, + "learning_rate": 4.501045987245471e-06, + "loss": 0.9251, + "step": 68930 + }, + { + "epoch": 0.4990336380811744, + "grad_norm": 0.16895923018455505, + "learning_rate": 4.5009736005848844e-06, + "loss": 0.9208, + "step": 68940 + }, + { + "epoch": 0.4991060247417606, + "grad_norm": 0.17603491246700287, + "learning_rate": 4.500901213924299e-06, + "loss": 0.9205, + "step": 68950 + }, + { + "epoch": 0.49917841140234676, + "grad_norm": 0.1691259890794754, + "learning_rate": 4.5008288272637125e-06, + "loss": 0.9282, + "step": 68960 + }, + { + "epoch": 0.499250798062933, + "grad_norm": 0.1619505137205124, + "learning_rate": 4.500756440603126e-06, + "loss": 0.9324, + "step": 68970 + }, + { + "epoch": 0.49932318472351916, + "grad_norm": 0.16095148026943207, + "learning_rate": 4.50068405394254e-06, + "loss": 0.9167, + "step": 68980 + }, + { + "epoch": 0.49939557138410534, + "grad_norm": 0.14836084842681885, + "learning_rate": 4.500611667281954e-06, + "loss": 0.9127, + "step": 68990 + }, + { + "epoch": 0.4994679580446915, + "grad_norm": 0.15759669244289398, + "learning_rate": 4.500539280621368e-06, + "loss": 0.9219, + "step": 69000 + }, + { + "epoch": 0.4995403447052777, + "grad_norm": 0.17617014050483704, + "learning_rate": 4.5004668939607814e-06, + "loss": 0.9272, + "step": 69010 + }, + { + "epoch": 0.49961273136586387, + "grad_norm": 0.15363344550132751, + "learning_rate": 4.500394507300195e-06, + "loss": 0.914, + "step": 69020 + }, + { + "epoch": 0.4996851180264501, + "grad_norm": 0.163772851228714, + "learning_rate": 4.5003221206396095e-06, + "loss": 0.9352, + "step": 69030 + }, + { + "epoch": 0.4997575046870363, + "grad_norm": 0.16267885267734528, + "learning_rate": 4.500249733979023e-06, + "loss": 0.9052, + "step": 69040 + }, + { + "epoch": 0.49982989134762246, + "grad_norm": 0.18130633234977722, + "learning_rate": 4.500177347318436e-06, + "loss": 0.9282, + "step": 69050 + }, + { + "epoch": 0.49990227800820863, + "grad_norm": 0.1978328377008438, + "learning_rate": 4.50010496065785e-06, + "loss": 0.9345, + "step": 69060 + }, + { + "epoch": 0.4999746646687948, + "grad_norm": 0.15893995761871338, + "learning_rate": 4.500032573997264e-06, + "loss": 0.9136, + "step": 69070 + }, + { + "epoch": 0.500047051329381, + "grad_norm": 0.17726098001003265, + "learning_rate": 4.499960187336678e-06, + "loss": 0.9239, + "step": 69080 + }, + { + "epoch": 0.5001194379899672, + "grad_norm": 0.20466522872447968, + "learning_rate": 4.499887800676091e-06, + "loss": 0.916, + "step": 69090 + }, + { + "epoch": 0.5001918246505533, + "grad_norm": 0.1550358682870865, + "learning_rate": 4.499815414015506e-06, + "loss": 0.9195, + "step": 69100 + }, + { + "epoch": 0.5002642113111396, + "grad_norm": 0.14892955124378204, + "learning_rate": 4.499743027354919e-06, + "loss": 0.9103, + "step": 69110 + }, + { + "epoch": 0.5003365979717258, + "grad_norm": 0.1611659675836563, + "learning_rate": 4.499670640694333e-06, + "loss": 0.9123, + "step": 69120 + }, + { + "epoch": 0.500408984632312, + "grad_norm": 0.19101987779140472, + "learning_rate": 4.4995982540337465e-06, + "loss": 0.9135, + "step": 69130 + }, + { + "epoch": 0.5004813712928982, + "grad_norm": 0.1674869805574417, + "learning_rate": 4.499525867373161e-06, + "loss": 0.9236, + "step": 69140 + }, + { + "epoch": 0.5005537579534843, + "grad_norm": 0.15739434957504272, + "learning_rate": 4.499453480712575e-06, + "loss": 0.9267, + "step": 69150 + }, + { + "epoch": 0.5006261446140705, + "grad_norm": 0.17267724871635437, + "learning_rate": 4.499381094051988e-06, + "loss": 0.9148, + "step": 69160 + }, + { + "epoch": 0.5006985312746567, + "grad_norm": 0.1497461199760437, + "learning_rate": 4.499308707391402e-06, + "loss": 0.9141, + "step": 69170 + }, + { + "epoch": 0.5007709179352429, + "grad_norm": 0.15880879759788513, + "learning_rate": 4.499236320730816e-06, + "loss": 0.9333, + "step": 69180 + }, + { + "epoch": 0.500843304595829, + "grad_norm": 0.1679236739873886, + "learning_rate": 4.49916393407023e-06, + "loss": 0.902, + "step": 69190 + }, + { + "epoch": 0.5009156912564152, + "grad_norm": 0.1678447723388672, + "learning_rate": 4.4990915474096435e-06, + "loss": 0.9136, + "step": 69200 + }, + { + "epoch": 0.5009880779170015, + "grad_norm": 0.16553092002868652, + "learning_rate": 4.499019160749057e-06, + "loss": 0.9205, + "step": 69210 + }, + { + "epoch": 0.5010604645775877, + "grad_norm": 0.15976086258888245, + "learning_rate": 4.498946774088472e-06, + "loss": 0.9124, + "step": 69220 + }, + { + "epoch": 0.5011328512381739, + "grad_norm": 0.1608666628599167, + "learning_rate": 4.498874387427885e-06, + "loss": 0.9233, + "step": 69230 + }, + { + "epoch": 0.50120523789876, + "grad_norm": 0.19113683700561523, + "learning_rate": 4.498802000767299e-06, + "loss": 0.9208, + "step": 69240 + }, + { + "epoch": 0.5012776245593462, + "grad_norm": 0.1584874838590622, + "learning_rate": 4.4987296141067125e-06, + "loss": 0.9106, + "step": 69250 + }, + { + "epoch": 0.5013500112199324, + "grad_norm": 0.17393231391906738, + "learning_rate": 4.498657227446127e-06, + "loss": 0.915, + "step": 69260 + }, + { + "epoch": 0.5014223978805186, + "grad_norm": 0.1741904765367508, + "learning_rate": 4.4985848407855405e-06, + "loss": 0.9318, + "step": 69270 + }, + { + "epoch": 0.5014947845411047, + "grad_norm": 0.15421254932880402, + "learning_rate": 4.498512454124954e-06, + "loss": 0.9146, + "step": 69280 + }, + { + "epoch": 0.5015671712016909, + "grad_norm": 0.19196033477783203, + "learning_rate": 4.498440067464368e-06, + "loss": 0.9116, + "step": 69290 + }, + { + "epoch": 0.5016395578622771, + "grad_norm": 0.15833470225334167, + "learning_rate": 4.498367680803782e-06, + "loss": 0.9232, + "step": 69300 + }, + { + "epoch": 0.5017119445228633, + "grad_norm": 0.1550120860338211, + "learning_rate": 4.498295294143196e-06, + "loss": 0.9222, + "step": 69310 + }, + { + "epoch": 0.5017843311834496, + "grad_norm": 0.15786628425121307, + "learning_rate": 4.4982229074826095e-06, + "loss": 0.936, + "step": 69320 + }, + { + "epoch": 0.5018567178440357, + "grad_norm": 0.1613067090511322, + "learning_rate": 4.498150520822023e-06, + "loss": 0.9028, + "step": 69330 + }, + { + "epoch": 0.5019291045046219, + "grad_norm": 0.20446714758872986, + "learning_rate": 4.498078134161437e-06, + "loss": 0.8997, + "step": 69340 + }, + { + "epoch": 0.5020014911652081, + "grad_norm": 0.1550551950931549, + "learning_rate": 4.498005747500851e-06, + "loss": 0.9199, + "step": 69350 + }, + { + "epoch": 0.5020738778257943, + "grad_norm": 0.15930670499801636, + "learning_rate": 4.497933360840265e-06, + "loss": 0.9126, + "step": 69360 + }, + { + "epoch": 0.5021462644863804, + "grad_norm": 0.16248895227909088, + "learning_rate": 4.497860974179678e-06, + "loss": 0.9354, + "step": 69370 + }, + { + "epoch": 0.5022186511469666, + "grad_norm": 0.1518125683069229, + "learning_rate": 4.497788587519092e-06, + "loss": 0.9201, + "step": 69380 + }, + { + "epoch": 0.5022910378075528, + "grad_norm": 0.16246938705444336, + "learning_rate": 4.4977162008585065e-06, + "loss": 0.9231, + "step": 69390 + }, + { + "epoch": 0.502363424468139, + "grad_norm": 0.17307987809181213, + "learning_rate": 4.49764381419792e-06, + "loss": 0.9279, + "step": 69400 + }, + { + "epoch": 0.5024358111287252, + "grad_norm": 0.1634259670972824, + "learning_rate": 4.497571427537334e-06, + "loss": 0.9189, + "step": 69410 + }, + { + "epoch": 0.5025081977893114, + "grad_norm": 0.1655704826116562, + "learning_rate": 4.497499040876747e-06, + "loss": 0.8979, + "step": 69420 + }, + { + "epoch": 0.5025805844498976, + "grad_norm": 0.16415700316429138, + "learning_rate": 4.497426654216162e-06, + "loss": 0.9202, + "step": 69430 + }, + { + "epoch": 0.5026529711104838, + "grad_norm": 0.16875231266021729, + "learning_rate": 4.497354267555575e-06, + "loss": 0.9152, + "step": 69440 + }, + { + "epoch": 0.50272535777107, + "grad_norm": 0.19432953000068665, + "learning_rate": 4.497281880894989e-06, + "loss": 0.9217, + "step": 69450 + }, + { + "epoch": 0.5027977444316561, + "grad_norm": 0.15906265377998352, + "learning_rate": 4.497209494234403e-06, + "loss": 0.9239, + "step": 69460 + }, + { + "epoch": 0.5028701310922423, + "grad_norm": 0.16084323823451996, + "learning_rate": 4.497137107573817e-06, + "loss": 0.9084, + "step": 69470 + }, + { + "epoch": 0.5029425177528285, + "grad_norm": 0.16277502477169037, + "learning_rate": 4.497064720913231e-06, + "loss": 0.9318, + "step": 69480 + }, + { + "epoch": 0.5030149044134147, + "grad_norm": 0.16054733097553253, + "learning_rate": 4.496992334252644e-06, + "loss": 0.9187, + "step": 69490 + }, + { + "epoch": 0.5030872910740009, + "grad_norm": 0.14251933991909027, + "learning_rate": 4.496919947592058e-06, + "loss": 0.9271, + "step": 69500 + }, + { + "epoch": 0.503159677734587, + "grad_norm": 0.1685430407524109, + "learning_rate": 4.496847560931472e-06, + "loss": 0.9193, + "step": 69510 + }, + { + "epoch": 0.5032320643951732, + "grad_norm": 0.15440599620342255, + "learning_rate": 4.496775174270886e-06, + "loss": 0.9206, + "step": 69520 + }, + { + "epoch": 0.5033044510557595, + "grad_norm": 0.15421442687511444, + "learning_rate": 4.4967027876103e-06, + "loss": 0.923, + "step": 69530 + }, + { + "epoch": 0.5033768377163457, + "grad_norm": 0.1564491093158722, + "learning_rate": 4.496630400949713e-06, + "loss": 0.9216, + "step": 69540 + }, + { + "epoch": 0.5034492243769318, + "grad_norm": 0.16401636600494385, + "learning_rate": 4.496558014289128e-06, + "loss": 0.9185, + "step": 69550 + }, + { + "epoch": 0.503521611037518, + "grad_norm": 0.1628132462501526, + "learning_rate": 4.496485627628541e-06, + "loss": 0.9235, + "step": 69560 + }, + { + "epoch": 0.5035939976981042, + "grad_norm": 0.16760440170764923, + "learning_rate": 4.496413240967955e-06, + "loss": 0.9184, + "step": 69570 + }, + { + "epoch": 0.5036663843586904, + "grad_norm": 0.15936897695064545, + "learning_rate": 4.496340854307369e-06, + "loss": 0.9206, + "step": 69580 + }, + { + "epoch": 0.5037387710192766, + "grad_norm": 0.15765340626239777, + "learning_rate": 4.496268467646782e-06, + "loss": 0.9306, + "step": 69590 + }, + { + "epoch": 0.5038111576798627, + "grad_norm": 0.17861410975456238, + "learning_rate": 4.496196080986196e-06, + "loss": 0.9187, + "step": 69600 + }, + { + "epoch": 0.5038835443404489, + "grad_norm": 0.24393630027770996, + "learning_rate": 4.4961236943256094e-06, + "loss": 0.9149, + "step": 69610 + }, + { + "epoch": 0.5039559310010351, + "grad_norm": 0.20668023824691772, + "learning_rate": 4.496051307665024e-06, + "loss": 0.9286, + "step": 69620 + }, + { + "epoch": 0.5040283176616213, + "grad_norm": 0.15350449085235596, + "learning_rate": 4.4959789210044375e-06, + "loss": 0.9045, + "step": 69630 + }, + { + "epoch": 0.5041007043222075, + "grad_norm": 0.16766700148582458, + "learning_rate": 4.495906534343851e-06, + "loss": 0.9155, + "step": 69640 + }, + { + "epoch": 0.5041730909827937, + "grad_norm": 0.15862970054149628, + "learning_rate": 4.495834147683265e-06, + "loss": 0.916, + "step": 69650 + }, + { + "epoch": 0.5042454776433799, + "grad_norm": 0.1742592453956604, + "learning_rate": 4.495761761022679e-06, + "loss": 0.9168, + "step": 69660 + }, + { + "epoch": 0.5043178643039661, + "grad_norm": 0.15328940749168396, + "learning_rate": 4.495689374362093e-06, + "loss": 0.9151, + "step": 69670 + }, + { + "epoch": 0.5043902509645523, + "grad_norm": 0.15543635189533234, + "learning_rate": 4.4956169877015064e-06, + "loss": 0.917, + "step": 69680 + }, + { + "epoch": 0.5044626376251384, + "grad_norm": 0.1722266972064972, + "learning_rate": 4.49554460104092e-06, + "loss": 0.9218, + "step": 69690 + }, + { + "epoch": 0.5045350242857246, + "grad_norm": 0.1570882499217987, + "learning_rate": 4.4954722143803345e-06, + "loss": 0.9202, + "step": 69700 + }, + { + "epoch": 0.5046074109463108, + "grad_norm": 0.15502651035785675, + "learning_rate": 4.495399827719748e-06, + "loss": 0.9209, + "step": 69710 + }, + { + "epoch": 0.504679797606897, + "grad_norm": 0.1683962345123291, + "learning_rate": 4.495327441059162e-06, + "loss": 0.9239, + "step": 69720 + }, + { + "epoch": 0.5047521842674831, + "grad_norm": 0.15740440785884857, + "learning_rate": 4.495255054398575e-06, + "loss": 0.9221, + "step": 69730 + }, + { + "epoch": 0.5048245709280694, + "grad_norm": 0.15507307648658752, + "learning_rate": 4.49518266773799e-06, + "loss": 0.9148, + "step": 69740 + }, + { + "epoch": 0.5048969575886556, + "grad_norm": 0.14979131519794464, + "learning_rate": 4.4951102810774034e-06, + "loss": 0.9056, + "step": 69750 + }, + { + "epoch": 0.5049693442492418, + "grad_norm": 0.1572350412607193, + "learning_rate": 4.495037894416817e-06, + "loss": 0.9233, + "step": 69760 + }, + { + "epoch": 0.505041730909828, + "grad_norm": 0.15610437095165253, + "learning_rate": 4.494965507756231e-06, + "loss": 0.9252, + "step": 69770 + }, + { + "epoch": 0.5051141175704141, + "grad_norm": 0.15537583827972412, + "learning_rate": 4.494893121095645e-06, + "loss": 0.9243, + "step": 69780 + }, + { + "epoch": 0.5051865042310003, + "grad_norm": 0.19823171198368073, + "learning_rate": 4.494820734435059e-06, + "loss": 0.9286, + "step": 69790 + }, + { + "epoch": 0.5052588908915865, + "grad_norm": 0.15276482701301575, + "learning_rate": 4.494748347774472e-06, + "loss": 0.9221, + "step": 69800 + }, + { + "epoch": 0.5053312775521727, + "grad_norm": 0.15829552710056305, + "learning_rate": 4.494675961113886e-06, + "loss": 0.9058, + "step": 69810 + }, + { + "epoch": 0.5054036642127588, + "grad_norm": 0.1616862267255783, + "learning_rate": 4.4946035744533004e-06, + "loss": 0.9224, + "step": 69820 + }, + { + "epoch": 0.505476050873345, + "grad_norm": 0.15852747857570648, + "learning_rate": 4.494531187792714e-06, + "loss": 0.9288, + "step": 69830 + }, + { + "epoch": 0.5055484375339312, + "grad_norm": 0.16059504449367523, + "learning_rate": 4.494458801132128e-06, + "loss": 0.9153, + "step": 69840 + }, + { + "epoch": 0.5056208241945175, + "grad_norm": 0.16823314130306244, + "learning_rate": 4.494386414471541e-06, + "loss": 0.9232, + "step": 69850 + }, + { + "epoch": 0.5056932108551037, + "grad_norm": 0.14530618488788605, + "learning_rate": 4.494314027810956e-06, + "loss": 0.9221, + "step": 69860 + }, + { + "epoch": 0.5057655975156898, + "grad_norm": 0.15647926926612854, + "learning_rate": 4.494241641150369e-06, + "loss": 0.9297, + "step": 69870 + }, + { + "epoch": 0.505837984176276, + "grad_norm": 0.18810240924358368, + "learning_rate": 4.494169254489783e-06, + "loss": 0.9132, + "step": 69880 + }, + { + "epoch": 0.5059103708368622, + "grad_norm": 0.16139045357704163, + "learning_rate": 4.494096867829197e-06, + "loss": 0.9025, + "step": 69890 + }, + { + "epoch": 0.5059827574974484, + "grad_norm": 0.14970675110816956, + "learning_rate": 4.494024481168611e-06, + "loss": 0.9192, + "step": 69900 + }, + { + "epoch": 0.5060551441580345, + "grad_norm": 0.17512159049510956, + "learning_rate": 4.493952094508025e-06, + "loss": 0.9159, + "step": 69910 + }, + { + "epoch": 0.5061275308186207, + "grad_norm": 0.1656593680381775, + "learning_rate": 4.493879707847438e-06, + "loss": 0.9287, + "step": 69920 + }, + { + "epoch": 0.5061999174792069, + "grad_norm": 0.14336960017681122, + "learning_rate": 4.493807321186852e-06, + "loss": 0.913, + "step": 69930 + }, + { + "epoch": 0.5062723041397931, + "grad_norm": 0.15477393567562103, + "learning_rate": 4.493734934526266e-06, + "loss": 0.9236, + "step": 69940 + }, + { + "epoch": 0.5063446908003794, + "grad_norm": 0.16996079683303833, + "learning_rate": 4.49366254786568e-06, + "loss": 0.9288, + "step": 69950 + }, + { + "epoch": 0.5064170774609655, + "grad_norm": 0.16285812854766846, + "learning_rate": 4.493590161205094e-06, + "loss": 0.9243, + "step": 69960 + }, + { + "epoch": 0.5064894641215517, + "grad_norm": 0.16963927447795868, + "learning_rate": 4.493517774544507e-06, + "loss": 0.9154, + "step": 69970 + }, + { + "epoch": 0.5065618507821379, + "grad_norm": 0.17112816870212555, + "learning_rate": 4.493445387883921e-06, + "loss": 0.923, + "step": 69980 + }, + { + "epoch": 0.5066342374427241, + "grad_norm": 0.17348773777484894, + "learning_rate": 4.493373001223335e-06, + "loss": 0.9348, + "step": 69990 + }, + { + "epoch": 0.5067066241033102, + "grad_norm": 0.16326965391635895, + "learning_rate": 4.493300614562749e-06, + "loss": 0.9317, + "step": 70000 + }, + { + "epoch": 0.5067790107638964, + "grad_norm": 0.16024067997932434, + "learning_rate": 4.4932282279021625e-06, + "loss": 0.9252, + "step": 70010 + }, + { + "epoch": 0.5068513974244826, + "grad_norm": 0.1615953892469406, + "learning_rate": 4.493155841241576e-06, + "loss": 0.9135, + "step": 70020 + }, + { + "epoch": 0.5069237840850688, + "grad_norm": 0.14714552462100983, + "learning_rate": 4.493083454580991e-06, + "loss": 0.9307, + "step": 70030 + }, + { + "epoch": 0.506996170745655, + "grad_norm": 0.15000593662261963, + "learning_rate": 4.493011067920404e-06, + "loss": 0.9198, + "step": 70040 + }, + { + "epoch": 0.5070685574062411, + "grad_norm": 0.16868354380130768, + "learning_rate": 4.492938681259818e-06, + "loss": 0.9192, + "step": 70050 + }, + { + "epoch": 0.5071409440668274, + "grad_norm": 0.1756649762392044, + "learning_rate": 4.4928662945992315e-06, + "loss": 0.9145, + "step": 70060 + }, + { + "epoch": 0.5072133307274136, + "grad_norm": 0.14338277280330658, + "learning_rate": 4.492793907938646e-06, + "loss": 0.9162, + "step": 70070 + }, + { + "epoch": 0.5072857173879998, + "grad_norm": 0.15879562497138977, + "learning_rate": 4.4927215212780596e-06, + "loss": 0.9136, + "step": 70080 + }, + { + "epoch": 0.507358104048586, + "grad_norm": 0.16323305666446686, + "learning_rate": 4.492649134617473e-06, + "loss": 0.9207, + "step": 70090 + }, + { + "epoch": 0.5074304907091721, + "grad_norm": 0.15792213380336761, + "learning_rate": 4.492576747956887e-06, + "loss": 0.9112, + "step": 70100 + }, + { + "epoch": 0.5075028773697583, + "grad_norm": 0.15665297210216522, + "learning_rate": 4.4925043612963e-06, + "loss": 0.9182, + "step": 70110 + }, + { + "epoch": 0.5075752640303445, + "grad_norm": 0.16642248630523682, + "learning_rate": 4.492431974635714e-06, + "loss": 0.9249, + "step": 70120 + }, + { + "epoch": 0.5076476506909307, + "grad_norm": 0.1554255336523056, + "learning_rate": 4.492359587975128e-06, + "loss": 0.9203, + "step": 70130 + }, + { + "epoch": 0.5077200373515168, + "grad_norm": 0.17137949168682098, + "learning_rate": 4.492287201314542e-06, + "loss": 0.9248, + "step": 70140 + }, + { + "epoch": 0.507792424012103, + "grad_norm": 0.16739679872989655, + "learning_rate": 4.492214814653956e-06, + "loss": 0.9192, + "step": 70150 + }, + { + "epoch": 0.5078648106726892, + "grad_norm": 0.16433091461658478, + "learning_rate": 4.492142427993369e-06, + "loss": 0.919, + "step": 70160 + }, + { + "epoch": 0.5079371973332755, + "grad_norm": 0.16552671790122986, + "learning_rate": 4.492070041332783e-06, + "loss": 0.9291, + "step": 70170 + }, + { + "epoch": 0.5080095839938616, + "grad_norm": 0.15935184061527252, + "learning_rate": 4.491997654672197e-06, + "loss": 0.9255, + "step": 70180 + }, + { + "epoch": 0.5080819706544478, + "grad_norm": 0.16075530648231506, + "learning_rate": 4.491925268011611e-06, + "loss": 0.9286, + "step": 70190 + }, + { + "epoch": 0.508154357315034, + "grad_norm": 0.16693368554115295, + "learning_rate": 4.491852881351025e-06, + "loss": 0.9312, + "step": 70200 + }, + { + "epoch": 0.5082267439756202, + "grad_norm": 0.1627953052520752, + "learning_rate": 4.491780494690438e-06, + "loss": 0.914, + "step": 70210 + }, + { + "epoch": 0.5082991306362064, + "grad_norm": 0.17071197926998138, + "learning_rate": 4.491708108029853e-06, + "loss": 0.9145, + "step": 70220 + }, + { + "epoch": 0.5083715172967925, + "grad_norm": 0.1666022539138794, + "learning_rate": 4.491635721369266e-06, + "loss": 0.9181, + "step": 70230 + }, + { + "epoch": 0.5084439039573787, + "grad_norm": 0.16290847957134247, + "learning_rate": 4.49156333470868e-06, + "loss": 0.9289, + "step": 70240 + }, + { + "epoch": 0.5085162906179649, + "grad_norm": 0.2862056791782379, + "learning_rate": 4.4914909480480936e-06, + "loss": 0.9133, + "step": 70250 + }, + { + "epoch": 0.5085886772785511, + "grad_norm": 0.1533055603504181, + "learning_rate": 4.491418561387508e-06, + "loss": 0.9179, + "step": 70260 + }, + { + "epoch": 0.5086610639391373, + "grad_norm": 0.16260363161563873, + "learning_rate": 4.491346174726922e-06, + "loss": 0.9203, + "step": 70270 + }, + { + "epoch": 0.5087334505997235, + "grad_norm": 0.17325079441070557, + "learning_rate": 4.491273788066335e-06, + "loss": 0.9229, + "step": 70280 + }, + { + "epoch": 0.5088058372603097, + "grad_norm": 0.17838117480278015, + "learning_rate": 4.491201401405749e-06, + "loss": 0.9184, + "step": 70290 + }, + { + "epoch": 0.5088782239208959, + "grad_norm": 0.17130929231643677, + "learning_rate": 4.491129014745163e-06, + "loss": 0.9226, + "step": 70300 + }, + { + "epoch": 0.508950610581482, + "grad_norm": 0.1562015265226364, + "learning_rate": 4.491056628084577e-06, + "loss": 0.9384, + "step": 70310 + }, + { + "epoch": 0.5090229972420682, + "grad_norm": 0.1642456352710724, + "learning_rate": 4.490984241423991e-06, + "loss": 0.9392, + "step": 70320 + }, + { + "epoch": 0.5090953839026544, + "grad_norm": 0.16102585196495056, + "learning_rate": 4.490911854763404e-06, + "loss": 0.9269, + "step": 70330 + }, + { + "epoch": 0.5091677705632406, + "grad_norm": 0.16825130581855774, + "learning_rate": 4.490839468102819e-06, + "loss": 0.9312, + "step": 70340 + }, + { + "epoch": 0.5092401572238268, + "grad_norm": 0.18951553106307983, + "learning_rate": 4.490767081442232e-06, + "loss": 0.9149, + "step": 70350 + }, + { + "epoch": 0.5093125438844129, + "grad_norm": 0.1539865881204605, + "learning_rate": 4.490694694781646e-06, + "loss": 0.9176, + "step": 70360 + }, + { + "epoch": 0.5093849305449991, + "grad_norm": 0.16370318830013275, + "learning_rate": 4.4906223081210595e-06, + "loss": 0.9208, + "step": 70370 + }, + { + "epoch": 0.5094573172055854, + "grad_norm": 0.1988501250743866, + "learning_rate": 4.490549921460474e-06, + "loss": 0.9306, + "step": 70380 + }, + { + "epoch": 0.5095297038661716, + "grad_norm": 0.15401297807693481, + "learning_rate": 4.490477534799888e-06, + "loss": 0.931, + "step": 70390 + }, + { + "epoch": 0.5096020905267578, + "grad_norm": 0.15856841206550598, + "learning_rate": 4.490405148139301e-06, + "loss": 0.9152, + "step": 70400 + }, + { + "epoch": 0.5096744771873439, + "grad_norm": 0.15859901905059814, + "learning_rate": 4.490332761478715e-06, + "loss": 0.9092, + "step": 70410 + }, + { + "epoch": 0.5097468638479301, + "grad_norm": 0.16617213189601898, + "learning_rate": 4.490260374818129e-06, + "loss": 0.9061, + "step": 70420 + }, + { + "epoch": 0.5098192505085163, + "grad_norm": 0.159602552652359, + "learning_rate": 4.490187988157543e-06, + "loss": 0.9199, + "step": 70430 + }, + { + "epoch": 0.5098916371691025, + "grad_norm": 0.17478512227535248, + "learning_rate": 4.4901156014969565e-06, + "loss": 0.9304, + "step": 70440 + }, + { + "epoch": 0.5099640238296886, + "grad_norm": 0.1556614488363266, + "learning_rate": 4.49004321483637e-06, + "loss": 0.9188, + "step": 70450 + }, + { + "epoch": 0.5100364104902748, + "grad_norm": 0.15821588039398193, + "learning_rate": 4.489970828175785e-06, + "loss": 0.9139, + "step": 70460 + }, + { + "epoch": 0.510108797150861, + "grad_norm": 0.1509390026330948, + "learning_rate": 4.489898441515198e-06, + "loss": 0.9246, + "step": 70470 + }, + { + "epoch": 0.5101811838114473, + "grad_norm": 0.15749859809875488, + "learning_rate": 4.489826054854612e-06, + "loss": 0.9151, + "step": 70480 + }, + { + "epoch": 0.5102535704720335, + "grad_norm": 0.1502166986465454, + "learning_rate": 4.4897536681940254e-06, + "loss": 0.9084, + "step": 70490 + }, + { + "epoch": 0.5103259571326196, + "grad_norm": 0.15976519882678986, + "learning_rate": 4.48968128153344e-06, + "loss": 0.9106, + "step": 70500 + }, + { + "epoch": 0.5103983437932058, + "grad_norm": 0.15369580686092377, + "learning_rate": 4.4896088948728535e-06, + "loss": 0.9203, + "step": 70510 + }, + { + "epoch": 0.510470730453792, + "grad_norm": 0.17383261024951935, + "learning_rate": 4.489536508212267e-06, + "loss": 0.9298, + "step": 70520 + }, + { + "epoch": 0.5105431171143782, + "grad_norm": 0.1546318680047989, + "learning_rate": 4.489464121551681e-06, + "loss": 0.9357, + "step": 70530 + }, + { + "epoch": 0.5106155037749643, + "grad_norm": 0.15537676215171814, + "learning_rate": 4.489391734891095e-06, + "loss": 0.9096, + "step": 70540 + }, + { + "epoch": 0.5106878904355505, + "grad_norm": 0.14807634055614471, + "learning_rate": 4.489319348230509e-06, + "loss": 0.9225, + "step": 70550 + }, + { + "epoch": 0.5107602770961367, + "grad_norm": 0.17068465054035187, + "learning_rate": 4.4892469615699224e-06, + "loss": 0.9229, + "step": 70560 + }, + { + "epoch": 0.5108326637567229, + "grad_norm": 0.16985973715782166, + "learning_rate": 4.489174574909336e-06, + "loss": 0.918, + "step": 70570 + }, + { + "epoch": 0.510905050417309, + "grad_norm": 0.1610952913761139, + "learning_rate": 4.48910218824875e-06, + "loss": 0.9191, + "step": 70580 + }, + { + "epoch": 0.5109774370778953, + "grad_norm": 0.19032908976078033, + "learning_rate": 4.489029801588164e-06, + "loss": 0.9158, + "step": 70590 + }, + { + "epoch": 0.5110498237384815, + "grad_norm": 0.1557486355304718, + "learning_rate": 4.488957414927578e-06, + "loss": 0.9177, + "step": 70600 + }, + { + "epoch": 0.5111222103990677, + "grad_norm": 0.15214890241622925, + "learning_rate": 4.488885028266991e-06, + "loss": 0.9175, + "step": 70610 + }, + { + "epoch": 0.5111945970596539, + "grad_norm": 0.16953812539577484, + "learning_rate": 4.488812641606405e-06, + "loss": 0.9178, + "step": 70620 + }, + { + "epoch": 0.51126698372024, + "grad_norm": 0.16255024075508118, + "learning_rate": 4.4887402549458195e-06, + "loss": 0.9245, + "step": 70630 + }, + { + "epoch": 0.5113393703808262, + "grad_norm": 0.15664204955101013, + "learning_rate": 4.488667868285232e-06, + "loss": 0.9172, + "step": 70640 + }, + { + "epoch": 0.5114117570414124, + "grad_norm": 0.1526808738708496, + "learning_rate": 4.488595481624647e-06, + "loss": 0.9152, + "step": 70650 + }, + { + "epoch": 0.5114841437019986, + "grad_norm": 0.18234345316886902, + "learning_rate": 4.48852309496406e-06, + "loss": 0.9152, + "step": 70660 + }, + { + "epoch": 0.5115565303625847, + "grad_norm": 0.15891316533088684, + "learning_rate": 4.488450708303474e-06, + "loss": 0.9184, + "step": 70670 + }, + { + "epoch": 0.5116289170231709, + "grad_norm": 0.15766972303390503, + "learning_rate": 4.4883783216428875e-06, + "loss": 0.9197, + "step": 70680 + }, + { + "epoch": 0.5117013036837571, + "grad_norm": 0.16315676271915436, + "learning_rate": 4.488305934982302e-06, + "loss": 0.9245, + "step": 70690 + }, + { + "epoch": 0.5117736903443434, + "grad_norm": 0.15983930230140686, + "learning_rate": 4.488233548321716e-06, + "loss": 0.9192, + "step": 70700 + }, + { + "epoch": 0.5118460770049296, + "grad_norm": 0.14471487700939178, + "learning_rate": 4.488161161661129e-06, + "loss": 0.9174, + "step": 70710 + }, + { + "epoch": 0.5119184636655157, + "grad_norm": 0.1613370180130005, + "learning_rate": 4.488088775000543e-06, + "loss": 0.9036, + "step": 70720 + }, + { + "epoch": 0.5119908503261019, + "grad_norm": 0.16052919626235962, + "learning_rate": 4.488016388339957e-06, + "loss": 0.9103, + "step": 70730 + }, + { + "epoch": 0.5120632369866881, + "grad_norm": 0.16880005598068237, + "learning_rate": 4.487944001679371e-06, + "loss": 0.9251, + "step": 70740 + }, + { + "epoch": 0.5121356236472743, + "grad_norm": 0.1468336135149002, + "learning_rate": 4.4878716150187845e-06, + "loss": 0.9009, + "step": 70750 + }, + { + "epoch": 0.5122080103078605, + "grad_norm": 0.16562113165855408, + "learning_rate": 4.487799228358198e-06, + "loss": 0.9216, + "step": 70760 + }, + { + "epoch": 0.5122803969684466, + "grad_norm": 0.15913574397563934, + "learning_rate": 4.487726841697612e-06, + "loss": 0.9216, + "step": 70770 + }, + { + "epoch": 0.5123527836290328, + "grad_norm": 0.1655045747756958, + "learning_rate": 4.487654455037026e-06, + "loss": 0.9086, + "step": 70780 + }, + { + "epoch": 0.512425170289619, + "grad_norm": 0.15185129642486572, + "learning_rate": 4.48758206837644e-06, + "loss": 0.9242, + "step": 70790 + }, + { + "epoch": 0.5124975569502053, + "grad_norm": 0.1671728938817978, + "learning_rate": 4.4875096817158535e-06, + "loss": 0.9166, + "step": 70800 + }, + { + "epoch": 0.5125699436107914, + "grad_norm": 0.1865541785955429, + "learning_rate": 4.487437295055267e-06, + "loss": 0.9236, + "step": 70810 + }, + { + "epoch": 0.5126423302713776, + "grad_norm": 0.1669655591249466, + "learning_rate": 4.4873649083946816e-06, + "loss": 0.9196, + "step": 70820 + }, + { + "epoch": 0.5127147169319638, + "grad_norm": 0.17508293688297272, + "learning_rate": 4.487292521734095e-06, + "loss": 0.9211, + "step": 70830 + }, + { + "epoch": 0.51278710359255, + "grad_norm": 0.1667715311050415, + "learning_rate": 4.487220135073509e-06, + "loss": 0.9144, + "step": 70840 + }, + { + "epoch": 0.5128594902531362, + "grad_norm": 0.17849986255168915, + "learning_rate": 4.487147748412922e-06, + "loss": 0.9205, + "step": 70850 + }, + { + "epoch": 0.5129318769137223, + "grad_norm": 0.15619997680187225, + "learning_rate": 4.487075361752337e-06, + "loss": 0.9379, + "step": 70860 + }, + { + "epoch": 0.5130042635743085, + "grad_norm": 0.16368311643600464, + "learning_rate": 4.4870029750917505e-06, + "loss": 0.9072, + "step": 70870 + }, + { + "epoch": 0.5130766502348947, + "grad_norm": 0.16930024325847626, + "learning_rate": 4.486930588431164e-06, + "loss": 0.91, + "step": 70880 + }, + { + "epoch": 0.5131490368954809, + "grad_norm": 0.17948120832443237, + "learning_rate": 4.486858201770578e-06, + "loss": 0.9104, + "step": 70890 + }, + { + "epoch": 0.513221423556067, + "grad_norm": 0.19163572788238525, + "learning_rate": 4.486785815109992e-06, + "loss": 0.8944, + "step": 70900 + }, + { + "epoch": 0.5132938102166533, + "grad_norm": 0.15345646440982819, + "learning_rate": 4.486713428449406e-06, + "loss": 0.9268, + "step": 70910 + }, + { + "epoch": 0.5133661968772395, + "grad_norm": 0.16913288831710815, + "learning_rate": 4.486641041788819e-06, + "loss": 0.9235, + "step": 70920 + }, + { + "epoch": 0.5134385835378257, + "grad_norm": 0.17389893531799316, + "learning_rate": 4.486568655128233e-06, + "loss": 0.9248, + "step": 70930 + }, + { + "epoch": 0.5135109701984119, + "grad_norm": 0.16675566136837006, + "learning_rate": 4.4864962684676475e-06, + "loss": 0.9113, + "step": 70940 + }, + { + "epoch": 0.513583356858998, + "grad_norm": 0.15058213472366333, + "learning_rate": 4.486423881807061e-06, + "loss": 0.9248, + "step": 70950 + }, + { + "epoch": 0.5136557435195842, + "grad_norm": 0.33218497037887573, + "learning_rate": 4.486351495146475e-06, + "loss": 0.9035, + "step": 70960 + }, + { + "epoch": 0.5137281301801704, + "grad_norm": 0.1726812720298767, + "learning_rate": 4.486279108485888e-06, + "loss": 0.904, + "step": 70970 + }, + { + "epoch": 0.5138005168407566, + "grad_norm": 0.1540847271680832, + "learning_rate": 4.486206721825303e-06, + "loss": 0.9153, + "step": 70980 + }, + { + "epoch": 0.5138729035013427, + "grad_norm": 0.1801622062921524, + "learning_rate": 4.486134335164716e-06, + "loss": 0.9145, + "step": 70990 + }, + { + "epoch": 0.5139452901619289, + "grad_norm": 0.14557209610939026, + "learning_rate": 4.48606194850413e-06, + "loss": 0.9214, + "step": 71000 + }, + { + "epoch": 0.5140176768225152, + "grad_norm": 0.1920100599527359, + "learning_rate": 4.485989561843544e-06, + "loss": 0.9251, + "step": 71010 + }, + { + "epoch": 0.5140900634831014, + "grad_norm": 0.1555316001176834, + "learning_rate": 4.485917175182958e-06, + "loss": 0.9179, + "step": 71020 + }, + { + "epoch": 0.5141624501436876, + "grad_norm": 0.16274884343147278, + "learning_rate": 4.485844788522372e-06, + "loss": 0.9098, + "step": 71030 + }, + { + "epoch": 0.5142348368042737, + "grad_norm": 0.17160306870937347, + "learning_rate": 4.485772401861785e-06, + "loss": 0.918, + "step": 71040 + }, + { + "epoch": 0.5143072234648599, + "grad_norm": 0.1791687160730362, + "learning_rate": 4.485700015201199e-06, + "loss": 0.921, + "step": 71050 + }, + { + "epoch": 0.5143796101254461, + "grad_norm": 0.1617984026670456, + "learning_rate": 4.485627628540613e-06, + "loss": 0.9106, + "step": 71060 + }, + { + "epoch": 0.5144519967860323, + "grad_norm": 0.1553729772567749, + "learning_rate": 4.485555241880027e-06, + "loss": 0.9223, + "step": 71070 + }, + { + "epoch": 0.5145243834466184, + "grad_norm": 0.2470700591802597, + "learning_rate": 4.485482855219441e-06, + "loss": 0.9151, + "step": 71080 + }, + { + "epoch": 0.5145967701072046, + "grad_norm": 0.16699855029582977, + "learning_rate": 4.485410468558854e-06, + "loss": 0.9129, + "step": 71090 + }, + { + "epoch": 0.5146691567677908, + "grad_norm": 0.21680283546447754, + "learning_rate": 4.485338081898269e-06, + "loss": 0.9169, + "step": 71100 + }, + { + "epoch": 0.514741543428377, + "grad_norm": 0.16673427820205688, + "learning_rate": 4.485265695237682e-06, + "loss": 0.9362, + "step": 71110 + }, + { + "epoch": 0.5148139300889633, + "grad_norm": 0.1613384336233139, + "learning_rate": 4.485193308577096e-06, + "loss": 0.9249, + "step": 71120 + }, + { + "epoch": 0.5148863167495494, + "grad_norm": 0.1817154735326767, + "learning_rate": 4.48512092191651e-06, + "loss": 0.9287, + "step": 71130 + }, + { + "epoch": 0.5149587034101356, + "grad_norm": 0.1700638085603714, + "learning_rate": 4.485048535255924e-06, + "loss": 0.9126, + "step": 71140 + }, + { + "epoch": 0.5150310900707218, + "grad_norm": 0.17622961103916168, + "learning_rate": 4.484976148595338e-06, + "loss": 0.9131, + "step": 71150 + }, + { + "epoch": 0.515103476731308, + "grad_norm": 0.1659720242023468, + "learning_rate": 4.484903761934751e-06, + "loss": 0.909, + "step": 71160 + }, + { + "epoch": 0.5151758633918941, + "grad_norm": 0.16669756174087524, + "learning_rate": 4.484831375274165e-06, + "loss": 0.9083, + "step": 71170 + }, + { + "epoch": 0.5152482500524803, + "grad_norm": 0.16893939673900604, + "learning_rate": 4.4847589886135785e-06, + "loss": 0.9122, + "step": 71180 + }, + { + "epoch": 0.5153206367130665, + "grad_norm": 0.15991714596748352, + "learning_rate": 4.484686601952992e-06, + "loss": 0.915, + "step": 71190 + }, + { + "epoch": 0.5153930233736527, + "grad_norm": 0.15963132679462433, + "learning_rate": 4.484614215292406e-06, + "loss": 0.9193, + "step": 71200 + }, + { + "epoch": 0.5154654100342388, + "grad_norm": 0.1559552103281021, + "learning_rate": 4.48454182863182e-06, + "loss": 0.9138, + "step": 71210 + }, + { + "epoch": 0.515537796694825, + "grad_norm": 0.17159438133239746, + "learning_rate": 4.484469441971234e-06, + "loss": 0.9046, + "step": 71220 + }, + { + "epoch": 0.5156101833554113, + "grad_norm": 0.15353378653526306, + "learning_rate": 4.4843970553106474e-06, + "loss": 0.9144, + "step": 71230 + }, + { + "epoch": 0.5156825700159975, + "grad_norm": 0.16255958378314972, + "learning_rate": 4.484324668650061e-06, + "loss": 0.9019, + "step": 71240 + }, + { + "epoch": 0.5157549566765837, + "grad_norm": 0.16722968220710754, + "learning_rate": 4.4842522819894755e-06, + "loss": 0.9364, + "step": 71250 + }, + { + "epoch": 0.5158273433371698, + "grad_norm": 0.16965439915657043, + "learning_rate": 4.484179895328889e-06, + "loss": 0.9201, + "step": 71260 + }, + { + "epoch": 0.515899729997756, + "grad_norm": 0.16112767159938812, + "learning_rate": 4.484107508668303e-06, + "loss": 0.9206, + "step": 71270 + }, + { + "epoch": 0.5159721166583422, + "grad_norm": 0.1666184961795807, + "learning_rate": 4.484035122007716e-06, + "loss": 0.9133, + "step": 71280 + }, + { + "epoch": 0.5160445033189284, + "grad_norm": 0.16528722643852234, + "learning_rate": 4.483962735347131e-06, + "loss": 0.9199, + "step": 71290 + }, + { + "epoch": 0.5161168899795145, + "grad_norm": 0.1569664627313614, + "learning_rate": 4.4838903486865444e-06, + "loss": 0.9239, + "step": 71300 + }, + { + "epoch": 0.5161892766401007, + "grad_norm": 0.1667070984840393, + "learning_rate": 4.483817962025958e-06, + "loss": 0.9269, + "step": 71310 + }, + { + "epoch": 0.5162616633006869, + "grad_norm": 0.15985529124736786, + "learning_rate": 4.483745575365372e-06, + "loss": 0.913, + "step": 71320 + }, + { + "epoch": 0.5163340499612732, + "grad_norm": 0.17231838405132294, + "learning_rate": 4.483673188704786e-06, + "loss": 0.9246, + "step": 71330 + }, + { + "epoch": 0.5164064366218594, + "grad_norm": 0.16398270428180695, + "learning_rate": 4.4836008020442e-06, + "loss": 0.919, + "step": 71340 + }, + { + "epoch": 0.5164788232824455, + "grad_norm": 0.25354573130607605, + "learning_rate": 4.483528415383613e-06, + "loss": 0.9369, + "step": 71350 + }, + { + "epoch": 0.5165512099430317, + "grad_norm": 0.15637996792793274, + "learning_rate": 4.483456028723027e-06, + "loss": 0.9173, + "step": 71360 + }, + { + "epoch": 0.5166235966036179, + "grad_norm": 0.1540151685476303, + "learning_rate": 4.4833836420624415e-06, + "loss": 0.9172, + "step": 71370 + }, + { + "epoch": 0.5166959832642041, + "grad_norm": 0.1704026758670807, + "learning_rate": 4.483311255401855e-06, + "loss": 0.9171, + "step": 71380 + }, + { + "epoch": 0.5167683699247902, + "grad_norm": 0.152908593416214, + "learning_rate": 4.483238868741269e-06, + "loss": 0.9149, + "step": 71390 + }, + { + "epoch": 0.5168407565853764, + "grad_norm": 0.15788275003433228, + "learning_rate": 4.483166482080682e-06, + "loss": 0.9224, + "step": 71400 + }, + { + "epoch": 0.5169131432459626, + "grad_norm": 0.1816600263118744, + "learning_rate": 4.483094095420096e-06, + "loss": 0.9313, + "step": 71410 + }, + { + "epoch": 0.5169855299065488, + "grad_norm": 0.3124025762081146, + "learning_rate": 4.48302170875951e-06, + "loss": 0.9189, + "step": 71420 + }, + { + "epoch": 0.517057916567135, + "grad_norm": 0.16039884090423584, + "learning_rate": 4.482949322098924e-06, + "loss": 0.929, + "step": 71430 + }, + { + "epoch": 0.5171303032277212, + "grad_norm": 0.1553005576133728, + "learning_rate": 4.482876935438338e-06, + "loss": 0.9131, + "step": 71440 + }, + { + "epoch": 0.5172026898883074, + "grad_norm": 0.16902105510234833, + "learning_rate": 4.482804548777751e-06, + "loss": 0.9111, + "step": 71450 + }, + { + "epoch": 0.5172750765488936, + "grad_norm": 0.17077742516994476, + "learning_rate": 4.482732162117166e-06, + "loss": 0.9269, + "step": 71460 + }, + { + "epoch": 0.5173474632094798, + "grad_norm": 0.17665700614452362, + "learning_rate": 4.482659775456579e-06, + "loss": 0.908, + "step": 71470 + }, + { + "epoch": 0.517419849870066, + "grad_norm": 0.16787657141685486, + "learning_rate": 4.482587388795993e-06, + "loss": 0.9067, + "step": 71480 + }, + { + "epoch": 0.5174922365306521, + "grad_norm": 0.168588787317276, + "learning_rate": 4.4825150021354065e-06, + "loss": 0.9111, + "step": 71490 + }, + { + "epoch": 0.5175646231912383, + "grad_norm": 0.16920819878578186, + "learning_rate": 4.482442615474821e-06, + "loss": 0.9262, + "step": 71500 + }, + { + "epoch": 0.5176370098518245, + "grad_norm": 0.17827217280864716, + "learning_rate": 4.482370228814235e-06, + "loss": 0.9352, + "step": 71510 + }, + { + "epoch": 0.5177093965124107, + "grad_norm": 0.18853449821472168, + "learning_rate": 4.482297842153648e-06, + "loss": 0.9177, + "step": 71520 + }, + { + "epoch": 0.5177817831729968, + "grad_norm": 0.1763719618320465, + "learning_rate": 4.482225455493062e-06, + "loss": 0.9231, + "step": 71530 + }, + { + "epoch": 0.5178541698335831, + "grad_norm": 0.15667404234409332, + "learning_rate": 4.482153068832476e-06, + "loss": 0.9272, + "step": 71540 + }, + { + "epoch": 0.5179265564941693, + "grad_norm": 0.15414214134216309, + "learning_rate": 4.48208068217189e-06, + "loss": 0.9289, + "step": 71550 + }, + { + "epoch": 0.5179989431547555, + "grad_norm": 0.15999796986579895, + "learning_rate": 4.4820082955113036e-06, + "loss": 0.9122, + "step": 71560 + }, + { + "epoch": 0.5180713298153417, + "grad_norm": 0.1718224138021469, + "learning_rate": 4.481935908850717e-06, + "loss": 0.9087, + "step": 71570 + }, + { + "epoch": 0.5181437164759278, + "grad_norm": 0.14979562163352966, + "learning_rate": 4.481863522190132e-06, + "loss": 0.9066, + "step": 71580 + }, + { + "epoch": 0.518216103136514, + "grad_norm": 0.16340069472789764, + "learning_rate": 4.481791135529545e-06, + "loss": 0.923, + "step": 71590 + }, + { + "epoch": 0.5182884897971002, + "grad_norm": 0.15064361691474915, + "learning_rate": 4.481718748868959e-06, + "loss": 0.9123, + "step": 71600 + }, + { + "epoch": 0.5183608764576864, + "grad_norm": 0.16256332397460938, + "learning_rate": 4.4816463622083725e-06, + "loss": 0.9243, + "step": 71610 + }, + { + "epoch": 0.5184332631182725, + "grad_norm": 0.16027949750423431, + "learning_rate": 4.481573975547787e-06, + "loss": 0.9185, + "step": 71620 + }, + { + "epoch": 0.5185056497788587, + "grad_norm": 0.14856921136379242, + "learning_rate": 4.4815015888872006e-06, + "loss": 0.914, + "step": 71630 + }, + { + "epoch": 0.5185780364394449, + "grad_norm": 0.17712818086147308, + "learning_rate": 4.481429202226614e-06, + "loss": 0.9299, + "step": 71640 + }, + { + "epoch": 0.5186504231000312, + "grad_norm": 0.15580609440803528, + "learning_rate": 4.481356815566028e-06, + "loss": 0.924, + "step": 71650 + }, + { + "epoch": 0.5187228097606174, + "grad_norm": 0.15209463238716125, + "learning_rate": 4.481284428905442e-06, + "loss": 0.9227, + "step": 71660 + }, + { + "epoch": 0.5187951964212035, + "grad_norm": 0.1534406542778015, + "learning_rate": 4.481212042244856e-06, + "loss": 0.9206, + "step": 71670 + }, + { + "epoch": 0.5188675830817897, + "grad_norm": 0.1686408370733261, + "learning_rate": 4.4811396555842695e-06, + "loss": 0.9148, + "step": 71680 + }, + { + "epoch": 0.5189399697423759, + "grad_norm": 0.1665174961090088, + "learning_rate": 4.481067268923683e-06, + "loss": 0.9151, + "step": 71690 + }, + { + "epoch": 0.5190123564029621, + "grad_norm": 0.17360328137874603, + "learning_rate": 4.480994882263097e-06, + "loss": 0.9171, + "step": 71700 + }, + { + "epoch": 0.5190847430635482, + "grad_norm": 0.14976924657821655, + "learning_rate": 4.48092249560251e-06, + "loss": 0.9203, + "step": 71710 + }, + { + "epoch": 0.5191571297241344, + "grad_norm": 0.16571447253227234, + "learning_rate": 4.480850108941924e-06, + "loss": 0.9062, + "step": 71720 + }, + { + "epoch": 0.5192295163847206, + "grad_norm": 0.1702362298965454, + "learning_rate": 4.480777722281338e-06, + "loss": 0.9272, + "step": 71730 + }, + { + "epoch": 0.5193019030453068, + "grad_norm": 0.16296398639678955, + "learning_rate": 4.480705335620752e-06, + "loss": 0.915, + "step": 71740 + }, + { + "epoch": 0.5193742897058929, + "grad_norm": 0.15773184597492218, + "learning_rate": 4.480632948960166e-06, + "loss": 0.9159, + "step": 71750 + }, + { + "epoch": 0.5194466763664792, + "grad_norm": 0.15796466171741486, + "learning_rate": 4.480560562299579e-06, + "loss": 0.923, + "step": 71760 + }, + { + "epoch": 0.5195190630270654, + "grad_norm": 0.16592220962047577, + "learning_rate": 4.480488175638994e-06, + "loss": 0.9204, + "step": 71770 + }, + { + "epoch": 0.5195914496876516, + "grad_norm": 0.161960631608963, + "learning_rate": 4.480415788978407e-06, + "loss": 0.9235, + "step": 71780 + }, + { + "epoch": 0.5196638363482378, + "grad_norm": 0.17310741543769836, + "learning_rate": 4.480343402317821e-06, + "loss": 0.9062, + "step": 71790 + }, + { + "epoch": 0.5197362230088239, + "grad_norm": 0.1718437820672989, + "learning_rate": 4.4802710156572346e-06, + "loss": 0.9064, + "step": 71800 + }, + { + "epoch": 0.5198086096694101, + "grad_norm": 0.16727685928344727, + "learning_rate": 4.480198628996649e-06, + "loss": 0.9363, + "step": 71810 + }, + { + "epoch": 0.5198809963299963, + "grad_norm": 0.17579713463783264, + "learning_rate": 4.480126242336063e-06, + "loss": 0.9249, + "step": 71820 + }, + { + "epoch": 0.5199533829905825, + "grad_norm": 0.15877732634544373, + "learning_rate": 4.480053855675476e-06, + "loss": 0.9297, + "step": 71830 + }, + { + "epoch": 0.5200257696511686, + "grad_norm": 0.1620732545852661, + "learning_rate": 4.47998146901489e-06, + "loss": 0.9168, + "step": 71840 + }, + { + "epoch": 0.5200981563117548, + "grad_norm": 0.17274366319179535, + "learning_rate": 4.479909082354304e-06, + "loss": 0.9086, + "step": 71850 + }, + { + "epoch": 0.5201705429723411, + "grad_norm": 0.1550474315881729, + "learning_rate": 4.479836695693718e-06, + "loss": 0.9206, + "step": 71860 + }, + { + "epoch": 0.5202429296329273, + "grad_norm": 0.17657595872879028, + "learning_rate": 4.479764309033132e-06, + "loss": 0.9213, + "step": 71870 + }, + { + "epoch": 0.5203153162935135, + "grad_norm": 0.16544876992702484, + "learning_rate": 4.479691922372545e-06, + "loss": 0.929, + "step": 71880 + }, + { + "epoch": 0.5203877029540996, + "grad_norm": 0.1544915735721588, + "learning_rate": 4.47961953571196e-06, + "loss": 0.9057, + "step": 71890 + }, + { + "epoch": 0.5204600896146858, + "grad_norm": 0.15595407783985138, + "learning_rate": 4.479547149051373e-06, + "loss": 0.9144, + "step": 71900 + }, + { + "epoch": 0.520532476275272, + "grad_norm": 0.1735614836215973, + "learning_rate": 4.479474762390787e-06, + "loss": 0.9156, + "step": 71910 + }, + { + "epoch": 0.5206048629358582, + "grad_norm": 0.1538870483636856, + "learning_rate": 4.4794023757302005e-06, + "loss": 0.9117, + "step": 71920 + }, + { + "epoch": 0.5206772495964443, + "grad_norm": 0.15817460417747498, + "learning_rate": 4.479329989069615e-06, + "loss": 0.9233, + "step": 71930 + }, + { + "epoch": 0.5207496362570305, + "grad_norm": 0.16701272130012512, + "learning_rate": 4.479257602409029e-06, + "loss": 0.9218, + "step": 71940 + }, + { + "epoch": 0.5208220229176167, + "grad_norm": 0.15565438568592072, + "learning_rate": 4.479185215748442e-06, + "loss": 0.9113, + "step": 71950 + }, + { + "epoch": 0.5208944095782029, + "grad_norm": 0.15946054458618164, + "learning_rate": 4.479112829087856e-06, + "loss": 0.9165, + "step": 71960 + }, + { + "epoch": 0.5209667962387892, + "grad_norm": 0.1618921309709549, + "learning_rate": 4.47904044242727e-06, + "loss": 0.9241, + "step": 71970 + }, + { + "epoch": 0.5210391828993753, + "grad_norm": 0.19003738462924957, + "learning_rate": 4.478968055766684e-06, + "loss": 0.9101, + "step": 71980 + }, + { + "epoch": 0.5211115695599615, + "grad_norm": 0.16518519818782806, + "learning_rate": 4.4788956691060975e-06, + "loss": 0.9038, + "step": 71990 + }, + { + "epoch": 0.5211839562205477, + "grad_norm": 0.15592487156391144, + "learning_rate": 4.478823282445511e-06, + "loss": 0.9162, + "step": 72000 + }, + { + "epoch": 0.5212563428811339, + "grad_norm": 0.17761656641960144, + "learning_rate": 4.478750895784925e-06, + "loss": 0.9126, + "step": 72010 + }, + { + "epoch": 0.52132872954172, + "grad_norm": 0.1610282063484192, + "learning_rate": 4.478678509124339e-06, + "loss": 0.9199, + "step": 72020 + }, + { + "epoch": 0.5214011162023062, + "grad_norm": 0.16208316385746002, + "learning_rate": 4.478606122463753e-06, + "loss": 0.9129, + "step": 72030 + }, + { + "epoch": 0.5214735028628924, + "grad_norm": 0.17312684655189514, + "learning_rate": 4.4785337358031664e-06, + "loss": 0.9022, + "step": 72040 + }, + { + "epoch": 0.5215458895234786, + "grad_norm": 0.16080522537231445, + "learning_rate": 4.47846134914258e-06, + "loss": 0.908, + "step": 72050 + }, + { + "epoch": 0.5216182761840648, + "grad_norm": 0.17630934715270996, + "learning_rate": 4.4783889624819945e-06, + "loss": 0.9169, + "step": 72060 + }, + { + "epoch": 0.521690662844651, + "grad_norm": 0.1585136502981186, + "learning_rate": 4.478316575821408e-06, + "loss": 0.9079, + "step": 72070 + }, + { + "epoch": 0.5217630495052372, + "grad_norm": 0.15829670429229736, + "learning_rate": 4.478244189160822e-06, + "loss": 0.9209, + "step": 72080 + }, + { + "epoch": 0.5218354361658234, + "grad_norm": 0.1633504331111908, + "learning_rate": 4.478171802500235e-06, + "loss": 0.9202, + "step": 72090 + }, + { + "epoch": 0.5219078228264096, + "grad_norm": 0.1850529909133911, + "learning_rate": 4.47809941583965e-06, + "loss": 0.9244, + "step": 72100 + }, + { + "epoch": 0.5219802094869957, + "grad_norm": 0.16101692616939545, + "learning_rate": 4.4780270291790635e-06, + "loss": 0.9122, + "step": 72110 + }, + { + "epoch": 0.5220525961475819, + "grad_norm": 0.16224358975887299, + "learning_rate": 4.477954642518477e-06, + "loss": 0.9149, + "step": 72120 + }, + { + "epoch": 0.5221249828081681, + "grad_norm": 0.14560241997241974, + "learning_rate": 4.477882255857891e-06, + "loss": 0.9203, + "step": 72130 + }, + { + "epoch": 0.5221973694687543, + "grad_norm": 0.15428347885608673, + "learning_rate": 4.477809869197305e-06, + "loss": 0.9212, + "step": 72140 + }, + { + "epoch": 0.5222697561293405, + "grad_norm": 0.1555236577987671, + "learning_rate": 4.477737482536719e-06, + "loss": 0.9223, + "step": 72150 + }, + { + "epoch": 0.5223421427899266, + "grad_norm": 0.16414514183998108, + "learning_rate": 4.477665095876132e-06, + "loss": 0.9153, + "step": 72160 + }, + { + "epoch": 0.5224145294505128, + "grad_norm": 0.1530592143535614, + "learning_rate": 4.477592709215546e-06, + "loss": 0.9168, + "step": 72170 + }, + { + "epoch": 0.5224869161110991, + "grad_norm": 0.17093126475811005, + "learning_rate": 4.4775203225549605e-06, + "loss": 0.9175, + "step": 72180 + }, + { + "epoch": 0.5225593027716853, + "grad_norm": 0.16496798396110535, + "learning_rate": 4.477447935894374e-06, + "loss": 0.9173, + "step": 72190 + }, + { + "epoch": 0.5226316894322715, + "grad_norm": 0.2403537482023239, + "learning_rate": 4.477375549233788e-06, + "loss": 0.9239, + "step": 72200 + }, + { + "epoch": 0.5227040760928576, + "grad_norm": 0.1915457397699356, + "learning_rate": 4.477303162573201e-06, + "loss": 0.9279, + "step": 72210 + }, + { + "epoch": 0.5227764627534438, + "grad_norm": 0.1616457849740982, + "learning_rate": 4.477230775912616e-06, + "loss": 0.9078, + "step": 72220 + }, + { + "epoch": 0.52284884941403, + "grad_norm": 0.19186237454414368, + "learning_rate": 4.4771583892520285e-06, + "loss": 0.9307, + "step": 72230 + }, + { + "epoch": 0.5229212360746162, + "grad_norm": 0.16343003511428833, + "learning_rate": 4.477086002591442e-06, + "loss": 0.9148, + "step": 72240 + }, + { + "epoch": 0.5229936227352023, + "grad_norm": 0.15922591090202332, + "learning_rate": 4.477013615930857e-06, + "loss": 0.9101, + "step": 72250 + }, + { + "epoch": 0.5230660093957885, + "grad_norm": 0.1629839390516281, + "learning_rate": 4.47694122927027e-06, + "loss": 0.9133, + "step": 72260 + }, + { + "epoch": 0.5231383960563747, + "grad_norm": 0.1665726751089096, + "learning_rate": 4.476868842609684e-06, + "loss": 0.9105, + "step": 72270 + }, + { + "epoch": 0.5232107827169609, + "grad_norm": 0.22242015600204468, + "learning_rate": 4.4767964559490975e-06, + "loss": 0.9184, + "step": 72280 + }, + { + "epoch": 0.5232831693775472, + "grad_norm": 0.15692086517810822, + "learning_rate": 4.476724069288512e-06, + "loss": 0.9071, + "step": 72290 + }, + { + "epoch": 0.5233555560381333, + "grad_norm": 0.16244766116142273, + "learning_rate": 4.4766516826279255e-06, + "loss": 0.924, + "step": 72300 + }, + { + "epoch": 0.5234279426987195, + "grad_norm": 0.1637459546327591, + "learning_rate": 4.476579295967339e-06, + "loss": 0.9181, + "step": 72310 + }, + { + "epoch": 0.5235003293593057, + "grad_norm": 0.16626401245594025, + "learning_rate": 4.476506909306753e-06, + "loss": 0.9203, + "step": 72320 + }, + { + "epoch": 0.5235727160198919, + "grad_norm": 0.15717044472694397, + "learning_rate": 4.476434522646167e-06, + "loss": 0.9104, + "step": 72330 + }, + { + "epoch": 0.523645102680478, + "grad_norm": 0.15641281008720398, + "learning_rate": 4.476362135985581e-06, + "loss": 0.9045, + "step": 72340 + }, + { + "epoch": 0.5237174893410642, + "grad_norm": 0.15991607308387756, + "learning_rate": 4.4762897493249945e-06, + "loss": 0.9179, + "step": 72350 + }, + { + "epoch": 0.5237898760016504, + "grad_norm": 0.14962872862815857, + "learning_rate": 4.476217362664408e-06, + "loss": 0.9184, + "step": 72360 + }, + { + "epoch": 0.5238622626622366, + "grad_norm": 0.14683006703853607, + "learning_rate": 4.4761449760038226e-06, + "loss": 0.9173, + "step": 72370 + }, + { + "epoch": 0.5239346493228227, + "grad_norm": 0.17192301154136658, + "learning_rate": 4.476072589343236e-06, + "loss": 0.9299, + "step": 72380 + }, + { + "epoch": 0.524007035983409, + "grad_norm": 0.16342616081237793, + "learning_rate": 4.47600020268265e-06, + "loss": 0.9071, + "step": 72390 + }, + { + "epoch": 0.5240794226439952, + "grad_norm": 0.1558561623096466, + "learning_rate": 4.475927816022063e-06, + "loss": 0.9077, + "step": 72400 + }, + { + "epoch": 0.5241518093045814, + "grad_norm": 0.18777623772621155, + "learning_rate": 4.475855429361478e-06, + "loss": 0.912, + "step": 72410 + }, + { + "epoch": 0.5242241959651676, + "grad_norm": 0.16315610706806183, + "learning_rate": 4.4757830427008915e-06, + "loss": 0.9415, + "step": 72420 + }, + { + "epoch": 0.5242965826257537, + "grad_norm": 0.17795760929584503, + "learning_rate": 4.475710656040305e-06, + "loss": 0.9197, + "step": 72430 + }, + { + "epoch": 0.5243689692863399, + "grad_norm": 0.16951826214790344, + "learning_rate": 4.475638269379719e-06, + "loss": 0.908, + "step": 72440 + }, + { + "epoch": 0.5244413559469261, + "grad_norm": 0.18716560304164886, + "learning_rate": 4.475565882719133e-06, + "loss": 0.9293, + "step": 72450 + }, + { + "epoch": 0.5245137426075123, + "grad_norm": 0.24817276000976562, + "learning_rate": 4.475493496058547e-06, + "loss": 0.9103, + "step": 72460 + }, + { + "epoch": 0.5245861292680984, + "grad_norm": 0.15385325253009796, + "learning_rate": 4.47542110939796e-06, + "loss": 0.9171, + "step": 72470 + }, + { + "epoch": 0.5246585159286846, + "grad_norm": 0.15869303047657013, + "learning_rate": 4.475348722737374e-06, + "loss": 0.9223, + "step": 72480 + }, + { + "epoch": 0.5247309025892708, + "grad_norm": 0.14980655908584595, + "learning_rate": 4.4752763360767885e-06, + "loss": 0.9182, + "step": 72490 + }, + { + "epoch": 0.5248032892498571, + "grad_norm": 0.16237030923366547, + "learning_rate": 4.475203949416202e-06, + "loss": 0.9184, + "step": 72500 + }, + { + "epoch": 0.5248756759104433, + "grad_norm": 0.15368181467056274, + "learning_rate": 4.475131562755616e-06, + "loss": 0.9164, + "step": 72510 + }, + { + "epoch": 0.5249480625710294, + "grad_norm": 0.16227343678474426, + "learning_rate": 4.475059176095029e-06, + "loss": 0.9022, + "step": 72520 + }, + { + "epoch": 0.5250204492316156, + "grad_norm": 0.15485748648643494, + "learning_rate": 4.474986789434444e-06, + "loss": 0.9167, + "step": 72530 + }, + { + "epoch": 0.5250928358922018, + "grad_norm": 0.17310456931591034, + "learning_rate": 4.474914402773857e-06, + "loss": 0.928, + "step": 72540 + }, + { + "epoch": 0.525165222552788, + "grad_norm": 0.1546664834022522, + "learning_rate": 4.474842016113271e-06, + "loss": 0.9162, + "step": 72550 + }, + { + "epoch": 0.5252376092133741, + "grad_norm": 0.17586833238601685, + "learning_rate": 4.474769629452685e-06, + "loss": 0.9126, + "step": 72560 + }, + { + "epoch": 0.5253099958739603, + "grad_norm": 0.1783231645822525, + "learning_rate": 4.474697242792099e-06, + "loss": 0.9093, + "step": 72570 + }, + { + "epoch": 0.5253823825345465, + "grad_norm": 0.17169401049613953, + "learning_rate": 4.474624856131513e-06, + "loss": 0.9267, + "step": 72580 + }, + { + "epoch": 0.5254547691951327, + "grad_norm": 0.478100448846817, + "learning_rate": 4.474552469470926e-06, + "loss": 0.9089, + "step": 72590 + }, + { + "epoch": 0.5255271558557189, + "grad_norm": 0.1514614224433899, + "learning_rate": 4.47448008281034e-06, + "loss": 0.9243, + "step": 72600 + }, + { + "epoch": 0.5255995425163051, + "grad_norm": 0.15338928997516632, + "learning_rate": 4.4744076961497544e-06, + "loss": 0.9111, + "step": 72610 + }, + { + "epoch": 0.5256719291768913, + "grad_norm": 0.16090184450149536, + "learning_rate": 4.474335309489168e-06, + "loss": 0.9188, + "step": 72620 + }, + { + "epoch": 0.5257443158374775, + "grad_norm": 0.1639074832201004, + "learning_rate": 4.474262922828582e-06, + "loss": 0.9235, + "step": 72630 + }, + { + "epoch": 0.5258167024980637, + "grad_norm": 0.16050408780574799, + "learning_rate": 4.474190536167995e-06, + "loss": 0.9176, + "step": 72640 + }, + { + "epoch": 0.5258890891586498, + "grad_norm": 0.1604773849248886, + "learning_rate": 4.474118149507409e-06, + "loss": 0.9258, + "step": 72650 + }, + { + "epoch": 0.525961475819236, + "grad_norm": 0.15453247725963593, + "learning_rate": 4.474045762846823e-06, + "loss": 0.9273, + "step": 72660 + }, + { + "epoch": 0.5260338624798222, + "grad_norm": 0.17497363686561584, + "learning_rate": 4.473973376186237e-06, + "loss": 0.9219, + "step": 72670 + }, + { + "epoch": 0.5261062491404084, + "grad_norm": 0.15889976918697357, + "learning_rate": 4.473900989525651e-06, + "loss": 0.9325, + "step": 72680 + }, + { + "epoch": 0.5261786358009946, + "grad_norm": 0.18632720410823822, + "learning_rate": 4.473828602865064e-06, + "loss": 0.9188, + "step": 72690 + }, + { + "epoch": 0.5262510224615807, + "grad_norm": 0.16045477986335754, + "learning_rate": 4.473756216204479e-06, + "loss": 0.9261, + "step": 72700 + }, + { + "epoch": 0.526323409122167, + "grad_norm": 0.16817103326320648, + "learning_rate": 4.473683829543892e-06, + "loss": 0.9212, + "step": 72710 + }, + { + "epoch": 0.5263957957827532, + "grad_norm": 0.1566298007965088, + "learning_rate": 4.473611442883306e-06, + "loss": 0.9188, + "step": 72720 + }, + { + "epoch": 0.5264681824433394, + "grad_norm": 0.1550060659646988, + "learning_rate": 4.4735390562227195e-06, + "loss": 0.9263, + "step": 72730 + }, + { + "epoch": 0.5265405691039255, + "grad_norm": 0.15189586579799652, + "learning_rate": 4.473466669562134e-06, + "loss": 0.9202, + "step": 72740 + }, + { + "epoch": 0.5266129557645117, + "grad_norm": 0.14698752760887146, + "learning_rate": 4.473394282901548e-06, + "loss": 0.9105, + "step": 72750 + }, + { + "epoch": 0.5266853424250979, + "grad_norm": 0.16903573274612427, + "learning_rate": 4.473321896240961e-06, + "loss": 0.9135, + "step": 72760 + }, + { + "epoch": 0.5267577290856841, + "grad_norm": 0.16911180317401886, + "learning_rate": 4.473249509580375e-06, + "loss": 0.9249, + "step": 72770 + }, + { + "epoch": 0.5268301157462703, + "grad_norm": 0.1543751209974289, + "learning_rate": 4.4731771229197884e-06, + "loss": 0.9126, + "step": 72780 + }, + { + "epoch": 0.5269025024068564, + "grad_norm": 0.15658721327781677, + "learning_rate": 4.473104736259202e-06, + "loss": 0.9152, + "step": 72790 + }, + { + "epoch": 0.5269748890674426, + "grad_norm": 0.16438843309879303, + "learning_rate": 4.473032349598616e-06, + "loss": 0.9226, + "step": 72800 + }, + { + "epoch": 0.5270472757280288, + "grad_norm": 0.17810095846652985, + "learning_rate": 4.47295996293803e-06, + "loss": 0.9093, + "step": 72810 + }, + { + "epoch": 0.5271196623886151, + "grad_norm": 0.1657225638628006, + "learning_rate": 4.472887576277444e-06, + "loss": 0.9233, + "step": 72820 + }, + { + "epoch": 0.5271920490492013, + "grad_norm": 0.16005633771419525, + "learning_rate": 4.472815189616857e-06, + "loss": 0.9201, + "step": 72830 + }, + { + "epoch": 0.5272644357097874, + "grad_norm": 0.16548793017864227, + "learning_rate": 4.472742802956271e-06, + "loss": 0.9304, + "step": 72840 + }, + { + "epoch": 0.5273368223703736, + "grad_norm": 0.14762021601200104, + "learning_rate": 4.4726704162956855e-06, + "loss": 0.9137, + "step": 72850 + }, + { + "epoch": 0.5274092090309598, + "grad_norm": 0.17930978536605835, + "learning_rate": 4.472598029635099e-06, + "loss": 0.9203, + "step": 72860 + }, + { + "epoch": 0.527481595691546, + "grad_norm": 0.1988268494606018, + "learning_rate": 4.472525642974513e-06, + "loss": 0.9219, + "step": 72870 + }, + { + "epoch": 0.5275539823521321, + "grad_norm": 0.1670868992805481, + "learning_rate": 4.472453256313926e-06, + "loss": 0.9083, + "step": 72880 + }, + { + "epoch": 0.5276263690127183, + "grad_norm": 0.14923833310604095, + "learning_rate": 4.472380869653341e-06, + "loss": 0.9207, + "step": 72890 + }, + { + "epoch": 0.5276987556733045, + "grad_norm": 0.15868178009986877, + "learning_rate": 4.472308482992754e-06, + "loss": 0.9055, + "step": 72900 + }, + { + "epoch": 0.5277711423338907, + "grad_norm": 0.1550142616033554, + "learning_rate": 4.472236096332168e-06, + "loss": 0.906, + "step": 72910 + }, + { + "epoch": 0.527843528994477, + "grad_norm": 0.15354187786579132, + "learning_rate": 4.472163709671582e-06, + "loss": 0.9138, + "step": 72920 + }, + { + "epoch": 0.5279159156550631, + "grad_norm": 0.15525110065937042, + "learning_rate": 4.472091323010996e-06, + "loss": 0.9195, + "step": 72930 + }, + { + "epoch": 0.5279883023156493, + "grad_norm": 0.15831002593040466, + "learning_rate": 4.47201893635041e-06, + "loss": 0.9161, + "step": 72940 + }, + { + "epoch": 0.5280606889762355, + "grad_norm": 0.169021874666214, + "learning_rate": 4.471946549689823e-06, + "loss": 0.9256, + "step": 72950 + }, + { + "epoch": 0.5281330756368217, + "grad_norm": 0.14959047734737396, + "learning_rate": 4.471874163029237e-06, + "loss": 0.8988, + "step": 72960 + }, + { + "epoch": 0.5282054622974078, + "grad_norm": 0.1501626968383789, + "learning_rate": 4.471801776368651e-06, + "loss": 0.9179, + "step": 72970 + }, + { + "epoch": 0.528277848957994, + "grad_norm": 0.15244172513484955, + "learning_rate": 4.471729389708065e-06, + "loss": 0.9218, + "step": 72980 + }, + { + "epoch": 0.5283502356185802, + "grad_norm": 0.21361258625984192, + "learning_rate": 4.471657003047479e-06, + "loss": 0.9205, + "step": 72990 + }, + { + "epoch": 0.5284226222791664, + "grad_norm": 0.1592887043952942, + "learning_rate": 4.471584616386892e-06, + "loss": 0.9192, + "step": 73000 + }, + { + "epoch": 0.5284950089397525, + "grad_norm": 0.15822738409042358, + "learning_rate": 4.471512229726307e-06, + "loss": 0.9095, + "step": 73010 + }, + { + "epoch": 0.5285673956003387, + "grad_norm": 0.18526582419872284, + "learning_rate": 4.47143984306572e-06, + "loss": 0.922, + "step": 73020 + }, + { + "epoch": 0.528639782260925, + "grad_norm": 0.16115911304950714, + "learning_rate": 4.471367456405134e-06, + "loss": 0.9071, + "step": 73030 + }, + { + "epoch": 0.5287121689215112, + "grad_norm": 0.16934490203857422, + "learning_rate": 4.4712950697445475e-06, + "loss": 0.9206, + "step": 73040 + }, + { + "epoch": 0.5287845555820974, + "grad_norm": 0.15864363312721252, + "learning_rate": 4.471222683083962e-06, + "loss": 0.9344, + "step": 73050 + }, + { + "epoch": 0.5288569422426835, + "grad_norm": 0.16181667149066925, + "learning_rate": 4.471150296423376e-06, + "loss": 0.9177, + "step": 73060 + }, + { + "epoch": 0.5289293289032697, + "grad_norm": 0.15380777418613434, + "learning_rate": 4.471077909762789e-06, + "loss": 0.9181, + "step": 73070 + }, + { + "epoch": 0.5290017155638559, + "grad_norm": 0.1445995569229126, + "learning_rate": 4.471005523102203e-06, + "loss": 0.9232, + "step": 73080 + }, + { + "epoch": 0.5290741022244421, + "grad_norm": 0.1677917093038559, + "learning_rate": 4.470933136441617e-06, + "loss": 0.9231, + "step": 73090 + }, + { + "epoch": 0.5291464888850282, + "grad_norm": 0.19374777376651764, + "learning_rate": 4.470860749781031e-06, + "loss": 0.9182, + "step": 73100 + }, + { + "epoch": 0.5292188755456144, + "grad_norm": 0.18379537761211395, + "learning_rate": 4.4707883631204446e-06, + "loss": 0.9132, + "step": 73110 + }, + { + "epoch": 0.5292912622062006, + "grad_norm": 0.1719239056110382, + "learning_rate": 4.470715976459858e-06, + "loss": 0.9188, + "step": 73120 + }, + { + "epoch": 0.5293636488667868, + "grad_norm": 0.15979036688804626, + "learning_rate": 4.470643589799273e-06, + "loss": 0.9093, + "step": 73130 + }, + { + "epoch": 0.5294360355273731, + "grad_norm": 0.15309134125709534, + "learning_rate": 4.470571203138686e-06, + "loss": 0.9195, + "step": 73140 + }, + { + "epoch": 0.5295084221879592, + "grad_norm": 0.1547461748123169, + "learning_rate": 4.4704988164781e-06, + "loss": 0.9073, + "step": 73150 + }, + { + "epoch": 0.5295808088485454, + "grad_norm": 0.17491716146469116, + "learning_rate": 4.4704264298175135e-06, + "loss": 0.9356, + "step": 73160 + }, + { + "epoch": 0.5296531955091316, + "grad_norm": 0.15330342948436737, + "learning_rate": 4.470354043156928e-06, + "loss": 0.9124, + "step": 73170 + }, + { + "epoch": 0.5297255821697178, + "grad_norm": 0.1625756025314331, + "learning_rate": 4.4702816564963416e-06, + "loss": 0.9254, + "step": 73180 + }, + { + "epoch": 0.529797968830304, + "grad_norm": 0.1609082669019699, + "learning_rate": 4.470209269835755e-06, + "loss": 0.9108, + "step": 73190 + }, + { + "epoch": 0.5298703554908901, + "grad_norm": 0.15871219336986542, + "learning_rate": 4.470136883175169e-06, + "loss": 0.9182, + "step": 73200 + }, + { + "epoch": 0.5299427421514763, + "grad_norm": 0.20798322558403015, + "learning_rate": 4.470064496514583e-06, + "loss": 0.9201, + "step": 73210 + }, + { + "epoch": 0.5300151288120625, + "grad_norm": 0.15819989144802094, + "learning_rate": 4.469992109853997e-06, + "loss": 0.9164, + "step": 73220 + }, + { + "epoch": 0.5300875154726487, + "grad_norm": 0.16910409927368164, + "learning_rate": 4.4699197231934105e-06, + "loss": 0.912, + "step": 73230 + }, + { + "epoch": 0.5301599021332349, + "grad_norm": 0.16933558881282806, + "learning_rate": 4.469847336532824e-06, + "loss": 0.9084, + "step": 73240 + }, + { + "epoch": 0.5302322887938211, + "grad_norm": 0.1705089658498764, + "learning_rate": 4.469774949872238e-06, + "loss": 0.9249, + "step": 73250 + }, + { + "epoch": 0.5303046754544073, + "grad_norm": 0.16231994330883026, + "learning_rate": 4.469702563211652e-06, + "loss": 0.9106, + "step": 73260 + }, + { + "epoch": 0.5303770621149935, + "grad_norm": 0.1603698879480362, + "learning_rate": 4.469630176551066e-06, + "loss": 0.9157, + "step": 73270 + }, + { + "epoch": 0.5304494487755796, + "grad_norm": 0.1552126109600067, + "learning_rate": 4.469557789890479e-06, + "loss": 0.9121, + "step": 73280 + }, + { + "epoch": 0.5305218354361658, + "grad_norm": 0.16099673509597778, + "learning_rate": 4.469485403229893e-06, + "loss": 0.9216, + "step": 73290 + }, + { + "epoch": 0.530594222096752, + "grad_norm": 0.16103273630142212, + "learning_rate": 4.469413016569307e-06, + "loss": 0.9206, + "step": 73300 + }, + { + "epoch": 0.5306666087573382, + "grad_norm": 0.16438160836696625, + "learning_rate": 4.46934062990872e-06, + "loss": 0.9218, + "step": 73310 + }, + { + "epoch": 0.5307389954179244, + "grad_norm": 0.17345352470874786, + "learning_rate": 4.469268243248135e-06, + "loss": 0.9085, + "step": 73320 + }, + { + "epoch": 0.5308113820785105, + "grad_norm": 0.15736123919487, + "learning_rate": 4.469195856587548e-06, + "loss": 0.9238, + "step": 73330 + }, + { + "epoch": 0.5308837687390967, + "grad_norm": 0.17544583976268768, + "learning_rate": 4.469123469926962e-06, + "loss": 0.9176, + "step": 73340 + }, + { + "epoch": 0.530956155399683, + "grad_norm": 0.17113934457302094, + "learning_rate": 4.469051083266376e-06, + "loss": 0.9092, + "step": 73350 + }, + { + "epoch": 0.5310285420602692, + "grad_norm": 0.1495228260755539, + "learning_rate": 4.46897869660579e-06, + "loss": 0.9151, + "step": 73360 + }, + { + "epoch": 0.5311009287208553, + "grad_norm": 0.16176773607730865, + "learning_rate": 4.468906309945204e-06, + "loss": 0.8991, + "step": 73370 + }, + { + "epoch": 0.5311733153814415, + "grad_norm": 0.15699774026870728, + "learning_rate": 4.468833923284617e-06, + "loss": 0.9167, + "step": 73380 + }, + { + "epoch": 0.5312457020420277, + "grad_norm": 0.15788871049880981, + "learning_rate": 4.468761536624031e-06, + "loss": 0.8985, + "step": 73390 + }, + { + "epoch": 0.5313180887026139, + "grad_norm": 0.155131995677948, + "learning_rate": 4.468689149963445e-06, + "loss": 0.9163, + "step": 73400 + }, + { + "epoch": 0.5313904753632, + "grad_norm": 0.15978825092315674, + "learning_rate": 4.468616763302859e-06, + "loss": 0.911, + "step": 73410 + }, + { + "epoch": 0.5314628620237862, + "grad_norm": 0.18127478659152985, + "learning_rate": 4.468544376642273e-06, + "loss": 0.9225, + "step": 73420 + }, + { + "epoch": 0.5315352486843724, + "grad_norm": 0.15009476244449615, + "learning_rate": 4.468471989981686e-06, + "loss": 0.9069, + "step": 73430 + }, + { + "epoch": 0.5316076353449586, + "grad_norm": 0.15387962758541107, + "learning_rate": 4.4683996033211e-06, + "loss": 0.9209, + "step": 73440 + }, + { + "epoch": 0.5316800220055449, + "grad_norm": 0.15109896659851074, + "learning_rate": 4.468327216660514e-06, + "loss": 0.9285, + "step": 73450 + }, + { + "epoch": 0.531752408666131, + "grad_norm": 0.20869199931621552, + "learning_rate": 4.468254829999928e-06, + "loss": 0.9189, + "step": 73460 + }, + { + "epoch": 0.5318247953267172, + "grad_norm": 0.15885184705257416, + "learning_rate": 4.4681824433393415e-06, + "loss": 0.9301, + "step": 73470 + }, + { + "epoch": 0.5318971819873034, + "grad_norm": 0.2871398627758026, + "learning_rate": 4.468110056678755e-06, + "loss": 0.9291, + "step": 73480 + }, + { + "epoch": 0.5319695686478896, + "grad_norm": 0.14929638803005219, + "learning_rate": 4.46803767001817e-06, + "loss": 0.9143, + "step": 73490 + }, + { + "epoch": 0.5320419553084758, + "grad_norm": 0.15041810274124146, + "learning_rate": 4.467965283357583e-06, + "loss": 0.9164, + "step": 73500 + }, + { + "epoch": 0.5321143419690619, + "grad_norm": 0.1545180082321167, + "learning_rate": 4.467892896696997e-06, + "loss": 0.9144, + "step": 73510 + }, + { + "epoch": 0.5321867286296481, + "grad_norm": 0.15915392339229584, + "learning_rate": 4.4678205100364104e-06, + "loss": 0.9088, + "step": 73520 + }, + { + "epoch": 0.5322591152902343, + "grad_norm": 0.1939249187707901, + "learning_rate": 4.467748123375825e-06, + "loss": 0.9066, + "step": 73530 + }, + { + "epoch": 0.5323315019508205, + "grad_norm": 0.1595548391342163, + "learning_rate": 4.4676757367152385e-06, + "loss": 0.9067, + "step": 73540 + }, + { + "epoch": 0.5324038886114066, + "grad_norm": 0.15864749252796173, + "learning_rate": 4.467603350054652e-06, + "loss": 0.9134, + "step": 73550 + }, + { + "epoch": 0.5324762752719929, + "grad_norm": 0.171446293592453, + "learning_rate": 4.467530963394066e-06, + "loss": 0.9157, + "step": 73560 + }, + { + "epoch": 0.5325486619325791, + "grad_norm": 0.15038402378559113, + "learning_rate": 4.46745857673348e-06, + "loss": 0.9159, + "step": 73570 + }, + { + "epoch": 0.5326210485931653, + "grad_norm": 0.15996231138706207, + "learning_rate": 4.467386190072894e-06, + "loss": 0.9059, + "step": 73580 + }, + { + "epoch": 0.5326934352537515, + "grad_norm": 0.16054002940654755, + "learning_rate": 4.4673138034123075e-06, + "loss": 0.9136, + "step": 73590 + }, + { + "epoch": 0.5327658219143376, + "grad_norm": 0.15865951776504517, + "learning_rate": 4.467241416751721e-06, + "loss": 0.9048, + "step": 73600 + }, + { + "epoch": 0.5328382085749238, + "grad_norm": 0.15602022409439087, + "learning_rate": 4.4671690300911355e-06, + "loss": 0.901, + "step": 73610 + }, + { + "epoch": 0.53291059523551, + "grad_norm": 0.16050325334072113, + "learning_rate": 4.467096643430549e-06, + "loss": 0.9012, + "step": 73620 + }, + { + "epoch": 0.5329829818960962, + "grad_norm": 0.15464062988758087, + "learning_rate": 4.467024256769963e-06, + "loss": 0.918, + "step": 73630 + }, + { + "epoch": 0.5330553685566823, + "grad_norm": 0.16735030710697174, + "learning_rate": 4.466951870109376e-06, + "loss": 0.9048, + "step": 73640 + }, + { + "epoch": 0.5331277552172685, + "grad_norm": 0.17225484549999237, + "learning_rate": 4.466879483448791e-06, + "loss": 0.9131, + "step": 73650 + }, + { + "epoch": 0.5332001418778547, + "grad_norm": 0.17411339282989502, + "learning_rate": 4.4668070967882045e-06, + "loss": 0.9063, + "step": 73660 + }, + { + "epoch": 0.533272528538441, + "grad_norm": 0.19198282063007355, + "learning_rate": 4.466734710127618e-06, + "loss": 0.9239, + "step": 73670 + }, + { + "epoch": 0.5333449151990272, + "grad_norm": 0.16125676035881042, + "learning_rate": 4.466662323467032e-06, + "loss": 0.9251, + "step": 73680 + }, + { + "epoch": 0.5334173018596133, + "grad_norm": 0.1499517261981964, + "learning_rate": 4.466589936806446e-06, + "loss": 0.913, + "step": 73690 + }, + { + "epoch": 0.5334896885201995, + "grad_norm": 0.16353091597557068, + "learning_rate": 4.46651755014586e-06, + "loss": 0.9166, + "step": 73700 + }, + { + "epoch": 0.5335620751807857, + "grad_norm": 0.14786536991596222, + "learning_rate": 4.466445163485273e-06, + "loss": 0.9174, + "step": 73710 + }, + { + "epoch": 0.5336344618413719, + "grad_norm": 0.15844851732254028, + "learning_rate": 4.466372776824687e-06, + "loss": 0.9162, + "step": 73720 + }, + { + "epoch": 0.533706848501958, + "grad_norm": 0.18850095570087433, + "learning_rate": 4.4663003901641015e-06, + "loss": 0.9141, + "step": 73730 + }, + { + "epoch": 0.5337792351625442, + "grad_norm": 0.1814296692609787, + "learning_rate": 4.466228003503515e-06, + "loss": 0.9087, + "step": 73740 + }, + { + "epoch": 0.5338516218231304, + "grad_norm": 0.1553386151790619, + "learning_rate": 4.466155616842929e-06, + "loss": 0.9115, + "step": 73750 + }, + { + "epoch": 0.5339240084837166, + "grad_norm": 0.1544741988182068, + "learning_rate": 4.466083230182342e-06, + "loss": 0.9066, + "step": 73760 + }, + { + "epoch": 0.5339963951443029, + "grad_norm": 0.15202949941158295, + "learning_rate": 4.466010843521757e-06, + "loss": 0.9164, + "step": 73770 + }, + { + "epoch": 0.534068781804889, + "grad_norm": 0.157356858253479, + "learning_rate": 4.46593845686117e-06, + "loss": 0.9272, + "step": 73780 + }, + { + "epoch": 0.5341411684654752, + "grad_norm": 0.1528637409210205, + "learning_rate": 4.465866070200584e-06, + "loss": 0.9253, + "step": 73790 + }, + { + "epoch": 0.5342135551260614, + "grad_norm": 0.1515146940946579, + "learning_rate": 4.465793683539998e-06, + "loss": 0.9025, + "step": 73800 + }, + { + "epoch": 0.5342859417866476, + "grad_norm": 0.1715908944606781, + "learning_rate": 4.465721296879412e-06, + "loss": 0.9274, + "step": 73810 + }, + { + "epoch": 0.5343583284472337, + "grad_norm": 0.17339174449443817, + "learning_rate": 4.465648910218826e-06, + "loss": 0.9196, + "step": 73820 + }, + { + "epoch": 0.5344307151078199, + "grad_norm": 0.18492698669433594, + "learning_rate": 4.4655765235582385e-06, + "loss": 0.9117, + "step": 73830 + }, + { + "epoch": 0.5345031017684061, + "grad_norm": 0.18511684238910675, + "learning_rate": 4.465504136897653e-06, + "loss": 0.9192, + "step": 73840 + }, + { + "epoch": 0.5345754884289923, + "grad_norm": 0.15983940660953522, + "learning_rate": 4.4654317502370666e-06, + "loss": 0.9144, + "step": 73850 + }, + { + "epoch": 0.5346478750895785, + "grad_norm": 0.1514192372560501, + "learning_rate": 4.46535936357648e-06, + "loss": 0.9289, + "step": 73860 + }, + { + "epoch": 0.5347202617501646, + "grad_norm": 0.1741372048854828, + "learning_rate": 4.465286976915894e-06, + "loss": 0.9224, + "step": 73870 + }, + { + "epoch": 0.5347926484107509, + "grad_norm": 0.1461644023656845, + "learning_rate": 4.465214590255308e-06, + "loss": 0.9044, + "step": 73880 + }, + { + "epoch": 0.5348650350713371, + "grad_norm": 0.1568828821182251, + "learning_rate": 4.465142203594722e-06, + "loss": 0.9098, + "step": 73890 + }, + { + "epoch": 0.5349374217319233, + "grad_norm": 0.16432945430278778, + "learning_rate": 4.4650698169341355e-06, + "loss": 0.9187, + "step": 73900 + }, + { + "epoch": 0.5350098083925094, + "grad_norm": 0.16285908222198486, + "learning_rate": 4.464997430273549e-06, + "loss": 0.9172, + "step": 73910 + }, + { + "epoch": 0.5350821950530956, + "grad_norm": 0.15735489130020142, + "learning_rate": 4.4649250436129636e-06, + "loss": 0.9198, + "step": 73920 + }, + { + "epoch": 0.5351545817136818, + "grad_norm": 0.1569201946258545, + "learning_rate": 4.464852656952377e-06, + "loss": 0.9162, + "step": 73930 + }, + { + "epoch": 0.535226968374268, + "grad_norm": 0.1620626151561737, + "learning_rate": 4.464780270291791e-06, + "loss": 0.9158, + "step": 73940 + }, + { + "epoch": 0.5352993550348542, + "grad_norm": 0.16271735727787018, + "learning_rate": 4.464707883631204e-06, + "loss": 0.9015, + "step": 73950 + }, + { + "epoch": 0.5353717416954403, + "grad_norm": 0.23674176633358002, + "learning_rate": 4.464635496970619e-06, + "loss": 0.9143, + "step": 73960 + }, + { + "epoch": 0.5354441283560265, + "grad_norm": 0.18053658306598663, + "learning_rate": 4.4645631103100325e-06, + "loss": 0.9125, + "step": 73970 + }, + { + "epoch": 0.5355165150166128, + "grad_norm": 0.16268426179885864, + "learning_rate": 4.464490723649446e-06, + "loss": 0.9208, + "step": 73980 + }, + { + "epoch": 0.535588901677199, + "grad_norm": 0.15412411093711853, + "learning_rate": 4.46441833698886e-06, + "loss": 0.9183, + "step": 73990 + }, + { + "epoch": 0.5356612883377851, + "grad_norm": 0.18611110746860504, + "learning_rate": 4.464345950328274e-06, + "loss": 0.9177, + "step": 74000 + }, + { + "epoch": 0.5357336749983713, + "grad_norm": 0.14643755555152893, + "learning_rate": 4.464273563667688e-06, + "loss": 0.9048, + "step": 74010 + }, + { + "epoch": 0.5358060616589575, + "grad_norm": 0.17977450788021088, + "learning_rate": 4.464201177007101e-06, + "loss": 0.9166, + "step": 74020 + }, + { + "epoch": 0.5358784483195437, + "grad_norm": 0.159438818693161, + "learning_rate": 4.464128790346515e-06, + "loss": 0.9098, + "step": 74030 + }, + { + "epoch": 0.5359508349801299, + "grad_norm": 0.1566077172756195, + "learning_rate": 4.464056403685929e-06, + "loss": 0.9198, + "step": 74040 + }, + { + "epoch": 0.536023221640716, + "grad_norm": 0.17558586597442627, + "learning_rate": 4.463984017025343e-06, + "loss": 0.9303, + "step": 74050 + }, + { + "epoch": 0.5360956083013022, + "grad_norm": 0.15491542220115662, + "learning_rate": 4.463911630364757e-06, + "loss": 0.9193, + "step": 74060 + }, + { + "epoch": 0.5361679949618884, + "grad_norm": 0.17469727993011475, + "learning_rate": 4.46383924370417e-06, + "loss": 0.9117, + "step": 74070 + }, + { + "epoch": 0.5362403816224746, + "grad_norm": 0.1587485820055008, + "learning_rate": 4.463766857043584e-06, + "loss": 0.9028, + "step": 74080 + }, + { + "epoch": 0.5363127682830608, + "grad_norm": 0.15624359250068665, + "learning_rate": 4.4636944703829984e-06, + "loss": 0.9028, + "step": 74090 + }, + { + "epoch": 0.536385154943647, + "grad_norm": 0.17197231948375702, + "learning_rate": 4.463622083722412e-06, + "loss": 0.9141, + "step": 74100 + }, + { + "epoch": 0.5364575416042332, + "grad_norm": 0.1522126942873001, + "learning_rate": 4.463549697061826e-06, + "loss": 0.9279, + "step": 74110 + }, + { + "epoch": 0.5365299282648194, + "grad_norm": 0.1569167822599411, + "learning_rate": 4.463477310401239e-06, + "loss": 0.9168, + "step": 74120 + }, + { + "epoch": 0.5366023149254056, + "grad_norm": 0.18380215764045715, + "learning_rate": 4.463404923740654e-06, + "loss": 0.9146, + "step": 74130 + }, + { + "epoch": 0.5366747015859917, + "grad_norm": 0.1586330085992813, + "learning_rate": 4.463332537080067e-06, + "loss": 0.8948, + "step": 74140 + }, + { + "epoch": 0.5367470882465779, + "grad_norm": 0.1561429798603058, + "learning_rate": 4.463260150419481e-06, + "loss": 0.9113, + "step": 74150 + }, + { + "epoch": 0.5368194749071641, + "grad_norm": 0.1750173717737198, + "learning_rate": 4.463187763758895e-06, + "loss": 0.9277, + "step": 74160 + }, + { + "epoch": 0.5368918615677503, + "grad_norm": 0.15227210521697998, + "learning_rate": 4.463115377098309e-06, + "loss": 0.9042, + "step": 74170 + }, + { + "epoch": 0.5369642482283364, + "grad_norm": 0.15368933975696564, + "learning_rate": 4.463042990437723e-06, + "loss": 0.9009, + "step": 74180 + }, + { + "epoch": 0.5370366348889226, + "grad_norm": 0.1501675844192505, + "learning_rate": 4.462970603777136e-06, + "loss": 0.9168, + "step": 74190 + }, + { + "epoch": 0.5371090215495089, + "grad_norm": 0.16025373339653015, + "learning_rate": 4.46289821711655e-06, + "loss": 0.9125, + "step": 74200 + }, + { + "epoch": 0.5371814082100951, + "grad_norm": 0.1600399762392044, + "learning_rate": 4.462825830455964e-06, + "loss": 0.8988, + "step": 74210 + }, + { + "epoch": 0.5372537948706813, + "grad_norm": 0.1532607227563858, + "learning_rate": 4.462753443795378e-06, + "loss": 0.9254, + "step": 74220 + }, + { + "epoch": 0.5373261815312674, + "grad_norm": 0.1633194237947464, + "learning_rate": 4.462681057134792e-06, + "loss": 0.9143, + "step": 74230 + }, + { + "epoch": 0.5373985681918536, + "grad_norm": 0.15434294939041138, + "learning_rate": 4.462608670474205e-06, + "loss": 0.9139, + "step": 74240 + }, + { + "epoch": 0.5374709548524398, + "grad_norm": 0.15733137726783752, + "learning_rate": 4.46253628381362e-06, + "loss": 0.9254, + "step": 74250 + }, + { + "epoch": 0.537543341513026, + "grad_norm": 0.16783663630485535, + "learning_rate": 4.462463897153033e-06, + "loss": 0.9132, + "step": 74260 + }, + { + "epoch": 0.5376157281736121, + "grad_norm": 0.16016171872615814, + "learning_rate": 4.462391510492447e-06, + "loss": 0.9161, + "step": 74270 + }, + { + "epoch": 0.5376881148341983, + "grad_norm": 0.1455632746219635, + "learning_rate": 4.4623191238318605e-06, + "loss": 0.9178, + "step": 74280 + }, + { + "epoch": 0.5377605014947845, + "grad_norm": 0.16944627463817596, + "learning_rate": 4.462246737171275e-06, + "loss": 0.9171, + "step": 74290 + }, + { + "epoch": 0.5378328881553708, + "grad_norm": 0.16209137439727783, + "learning_rate": 4.462174350510689e-06, + "loss": 0.9148, + "step": 74300 + }, + { + "epoch": 0.537905274815957, + "grad_norm": 0.16766659915447235, + "learning_rate": 4.462101963850102e-06, + "loss": 0.9054, + "step": 74310 + }, + { + "epoch": 0.5379776614765431, + "grad_norm": 0.15169428288936615, + "learning_rate": 4.462029577189516e-06, + "loss": 0.916, + "step": 74320 + }, + { + "epoch": 0.5380500481371293, + "grad_norm": 0.17565733194351196, + "learning_rate": 4.46195719052893e-06, + "loss": 0.9056, + "step": 74330 + }, + { + "epoch": 0.5381224347977155, + "grad_norm": 0.1468237191438675, + "learning_rate": 4.461884803868344e-06, + "loss": 0.9199, + "step": 74340 + }, + { + "epoch": 0.5381948214583017, + "grad_norm": 0.15995702147483826, + "learning_rate": 4.4618124172077575e-06, + "loss": 0.9205, + "step": 74350 + }, + { + "epoch": 0.5382672081188878, + "grad_norm": 0.28701356053352356, + "learning_rate": 4.461740030547171e-06, + "loss": 0.9032, + "step": 74360 + }, + { + "epoch": 0.538339594779474, + "grad_norm": 0.15283870697021484, + "learning_rate": 4.461667643886585e-06, + "loss": 0.919, + "step": 74370 + }, + { + "epoch": 0.5384119814400602, + "grad_norm": 0.2051803320646286, + "learning_rate": 4.461595257225998e-06, + "loss": 0.9115, + "step": 74380 + }, + { + "epoch": 0.5384843681006464, + "grad_norm": 0.16268160939216614, + "learning_rate": 4.461522870565412e-06, + "loss": 0.9043, + "step": 74390 + }, + { + "epoch": 0.5385567547612325, + "grad_norm": 0.16522175073623657, + "learning_rate": 4.4614504839048265e-06, + "loss": 0.9193, + "step": 74400 + }, + { + "epoch": 0.5386291414218188, + "grad_norm": 0.1583668738603592, + "learning_rate": 4.46137809724424e-06, + "loss": 0.9083, + "step": 74410 + }, + { + "epoch": 0.538701528082405, + "grad_norm": 0.15010447800159454, + "learning_rate": 4.461305710583654e-06, + "loss": 0.9025, + "step": 74420 + }, + { + "epoch": 0.5387739147429912, + "grad_norm": 0.16112802922725677, + "learning_rate": 4.461233323923067e-06, + "loss": 0.9152, + "step": 74430 + }, + { + "epoch": 0.5388463014035774, + "grad_norm": 0.16676628589630127, + "learning_rate": 4.461160937262482e-06, + "loss": 0.9022, + "step": 74440 + }, + { + "epoch": 0.5389186880641635, + "grad_norm": 0.1548004299402237, + "learning_rate": 4.461088550601895e-06, + "loss": 0.9131, + "step": 74450 + }, + { + "epoch": 0.5389910747247497, + "grad_norm": 0.17125369608402252, + "learning_rate": 4.461016163941309e-06, + "loss": 0.9151, + "step": 74460 + }, + { + "epoch": 0.5390634613853359, + "grad_norm": 0.1690664291381836, + "learning_rate": 4.460943777280723e-06, + "loss": 0.9138, + "step": 74470 + }, + { + "epoch": 0.5391358480459221, + "grad_norm": 0.17033086717128754, + "learning_rate": 4.460871390620137e-06, + "loss": 0.8982, + "step": 74480 + }, + { + "epoch": 0.5392082347065082, + "grad_norm": 0.15503831207752228, + "learning_rate": 4.460799003959551e-06, + "loss": 0.9268, + "step": 74490 + }, + { + "epoch": 0.5392806213670944, + "grad_norm": 0.15129579603672028, + "learning_rate": 4.460726617298964e-06, + "loss": 0.9067, + "step": 74500 + }, + { + "epoch": 0.5393530080276807, + "grad_norm": 0.16346019506454468, + "learning_rate": 4.460654230638378e-06, + "loss": 0.9138, + "step": 74510 + }, + { + "epoch": 0.5394253946882669, + "grad_norm": 0.15942130982875824, + "learning_rate": 4.460581843977792e-06, + "loss": 0.9109, + "step": 74520 + }, + { + "epoch": 0.5394977813488531, + "grad_norm": 0.14833667874336243, + "learning_rate": 4.460509457317206e-06, + "loss": 0.9065, + "step": 74530 + }, + { + "epoch": 0.5395701680094392, + "grad_norm": 0.15694496035575867, + "learning_rate": 4.46043707065662e-06, + "loss": 0.9225, + "step": 74540 + }, + { + "epoch": 0.5396425546700254, + "grad_norm": 0.15417462587356567, + "learning_rate": 4.460364683996033e-06, + "loss": 0.8993, + "step": 74550 + }, + { + "epoch": 0.5397149413306116, + "grad_norm": 0.16096115112304688, + "learning_rate": 4.460292297335448e-06, + "loss": 0.9104, + "step": 74560 + }, + { + "epoch": 0.5397873279911978, + "grad_norm": 0.1507575362920761, + "learning_rate": 4.460219910674861e-06, + "loss": 0.9165, + "step": 74570 + }, + { + "epoch": 0.539859714651784, + "grad_norm": 0.16418473422527313, + "learning_rate": 4.460147524014275e-06, + "loss": 0.9192, + "step": 74580 + }, + { + "epoch": 0.5399321013123701, + "grad_norm": 0.1715206801891327, + "learning_rate": 4.4600751373536886e-06, + "loss": 0.9093, + "step": 74590 + }, + { + "epoch": 0.5400044879729563, + "grad_norm": 0.1661735624074936, + "learning_rate": 4.460002750693103e-06, + "loss": 0.9224, + "step": 74600 + }, + { + "epoch": 0.5400768746335425, + "grad_norm": 0.15840347111225128, + "learning_rate": 4.459930364032517e-06, + "loss": 0.9192, + "step": 74610 + }, + { + "epoch": 0.5401492612941288, + "grad_norm": 0.15837319195270538, + "learning_rate": 4.45985797737193e-06, + "loss": 0.9191, + "step": 74620 + }, + { + "epoch": 0.540221647954715, + "grad_norm": 0.1872469186782837, + "learning_rate": 4.459785590711344e-06, + "loss": 0.9116, + "step": 74630 + }, + { + "epoch": 0.5402940346153011, + "grad_norm": 0.16932313144207, + "learning_rate": 4.459713204050758e-06, + "loss": 0.8963, + "step": 74640 + }, + { + "epoch": 0.5403664212758873, + "grad_norm": 0.1552034318447113, + "learning_rate": 4.459640817390172e-06, + "loss": 0.9058, + "step": 74650 + }, + { + "epoch": 0.5404388079364735, + "grad_norm": 0.1520053595304489, + "learning_rate": 4.4595684307295856e-06, + "loss": 0.9006, + "step": 74660 + }, + { + "epoch": 0.5405111945970597, + "grad_norm": 0.15014509856700897, + "learning_rate": 4.459496044068999e-06, + "loss": 0.9045, + "step": 74670 + }, + { + "epoch": 0.5405835812576458, + "grad_norm": 0.18093499541282654, + "learning_rate": 4.459423657408413e-06, + "loss": 0.9013, + "step": 74680 + }, + { + "epoch": 0.540655967918232, + "grad_norm": 0.1501227766275406, + "learning_rate": 4.459351270747827e-06, + "loss": 0.9043, + "step": 74690 + }, + { + "epoch": 0.5407283545788182, + "grad_norm": 0.1506822109222412, + "learning_rate": 4.459278884087241e-06, + "loss": 0.9163, + "step": 74700 + }, + { + "epoch": 0.5408007412394044, + "grad_norm": 0.1608346849679947, + "learning_rate": 4.4592064974266545e-06, + "loss": 0.911, + "step": 74710 + }, + { + "epoch": 0.5408731278999905, + "grad_norm": 0.1549476683139801, + "learning_rate": 4.459134110766068e-06, + "loss": 0.9132, + "step": 74720 + }, + { + "epoch": 0.5409455145605768, + "grad_norm": 0.15968433022499084, + "learning_rate": 4.4590617241054826e-06, + "loss": 0.9119, + "step": 74730 + }, + { + "epoch": 0.541017901221163, + "grad_norm": 0.17671333253383636, + "learning_rate": 4.458989337444896e-06, + "loss": 0.9038, + "step": 74740 + }, + { + "epoch": 0.5410902878817492, + "grad_norm": 0.15897606313228607, + "learning_rate": 4.45891695078431e-06, + "loss": 0.9038, + "step": 74750 + }, + { + "epoch": 0.5411626745423354, + "grad_norm": 0.15166926383972168, + "learning_rate": 4.458844564123723e-06, + "loss": 0.9155, + "step": 74760 + }, + { + "epoch": 0.5412350612029215, + "grad_norm": 0.14908485114574432, + "learning_rate": 4.458772177463138e-06, + "loss": 0.9107, + "step": 74770 + }, + { + "epoch": 0.5413074478635077, + "grad_norm": 0.15309011936187744, + "learning_rate": 4.4586997908025515e-06, + "loss": 0.9124, + "step": 74780 + }, + { + "epoch": 0.5413798345240939, + "grad_norm": 0.15603891015052795, + "learning_rate": 4.458627404141965e-06, + "loss": 0.9081, + "step": 74790 + }, + { + "epoch": 0.5414522211846801, + "grad_norm": 0.17669987678527832, + "learning_rate": 4.458555017481379e-06, + "loss": 0.9093, + "step": 74800 + }, + { + "epoch": 0.5415246078452662, + "grad_norm": 0.1566879153251648, + "learning_rate": 4.458482630820793e-06, + "loss": 0.9079, + "step": 74810 + }, + { + "epoch": 0.5415969945058524, + "grad_norm": 0.17799584567546844, + "learning_rate": 4.458410244160207e-06, + "loss": 0.9145, + "step": 74820 + }, + { + "epoch": 0.5416693811664387, + "grad_norm": 0.1597883254289627, + "learning_rate": 4.4583378574996204e-06, + "loss": 0.9082, + "step": 74830 + }, + { + "epoch": 0.5417417678270249, + "grad_norm": 0.16738228499889374, + "learning_rate": 4.458265470839034e-06, + "loss": 0.9198, + "step": 74840 + }, + { + "epoch": 0.541814154487611, + "grad_norm": 0.16331464052200317, + "learning_rate": 4.4581930841784485e-06, + "loss": 0.9269, + "step": 74850 + }, + { + "epoch": 0.5418865411481972, + "grad_norm": 0.1695605218410492, + "learning_rate": 4.458120697517862e-06, + "loss": 0.9043, + "step": 74860 + }, + { + "epoch": 0.5419589278087834, + "grad_norm": 0.15982206165790558, + "learning_rate": 4.458048310857276e-06, + "loss": 0.9224, + "step": 74870 + }, + { + "epoch": 0.5420313144693696, + "grad_norm": 0.16445820033550262, + "learning_rate": 4.457975924196689e-06, + "loss": 0.9069, + "step": 74880 + }, + { + "epoch": 0.5421037011299558, + "grad_norm": 0.18443961441516876, + "learning_rate": 4.457903537536103e-06, + "loss": 0.9108, + "step": 74890 + }, + { + "epoch": 0.5421760877905419, + "grad_norm": 0.1700538992881775, + "learning_rate": 4.457831150875517e-06, + "loss": 0.924, + "step": 74900 + }, + { + "epoch": 0.5422484744511281, + "grad_norm": 0.174394428730011, + "learning_rate": 4.45775876421493e-06, + "loss": 0.9144, + "step": 74910 + }, + { + "epoch": 0.5423208611117143, + "grad_norm": 0.1507367342710495, + "learning_rate": 4.457686377554345e-06, + "loss": 0.9172, + "step": 74920 + }, + { + "epoch": 0.5423932477723005, + "grad_norm": 0.1828458607196808, + "learning_rate": 4.457613990893758e-06, + "loss": 0.9018, + "step": 74930 + }, + { + "epoch": 0.5424656344328868, + "grad_norm": 0.15712609887123108, + "learning_rate": 4.457541604233172e-06, + "loss": 0.9145, + "step": 74940 + }, + { + "epoch": 0.5425380210934729, + "grad_norm": 0.1707613319158554, + "learning_rate": 4.4574692175725855e-06, + "loss": 0.9128, + "step": 74950 + }, + { + "epoch": 0.5426104077540591, + "grad_norm": 0.17930477857589722, + "learning_rate": 4.457396830912e-06, + "loss": 0.9068, + "step": 74960 + }, + { + "epoch": 0.5426827944146453, + "grad_norm": 0.1521557718515396, + "learning_rate": 4.457324444251414e-06, + "loss": 0.9035, + "step": 74970 + }, + { + "epoch": 0.5427551810752315, + "grad_norm": 0.17281807959079742, + "learning_rate": 4.457252057590827e-06, + "loss": 0.9113, + "step": 74980 + }, + { + "epoch": 0.5428275677358176, + "grad_norm": 0.147269606590271, + "learning_rate": 4.457179670930241e-06, + "loss": 0.9171, + "step": 74990 + }, + { + "epoch": 0.5428999543964038, + "grad_norm": 0.1692981719970703, + "learning_rate": 4.457107284269655e-06, + "loss": 0.9118, + "step": 75000 + }, + { + "epoch": 0.54297234105699, + "grad_norm": 0.16155540943145752, + "learning_rate": 4.457034897609069e-06, + "loss": 0.9116, + "step": 75010 + }, + { + "epoch": 0.5430447277175762, + "grad_norm": 0.15603849291801453, + "learning_rate": 4.4569625109484825e-06, + "loss": 0.9051, + "step": 75020 + }, + { + "epoch": 0.5431171143781623, + "grad_norm": 0.16101206839084625, + "learning_rate": 4.456890124287896e-06, + "loss": 0.9154, + "step": 75030 + }, + { + "epoch": 0.5431895010387486, + "grad_norm": 0.15974144637584686, + "learning_rate": 4.456817737627311e-06, + "loss": 0.9182, + "step": 75040 + }, + { + "epoch": 0.5432618876993348, + "grad_norm": 0.169901043176651, + "learning_rate": 4.456745350966724e-06, + "loss": 0.9084, + "step": 75050 + }, + { + "epoch": 0.543334274359921, + "grad_norm": 0.16322177648544312, + "learning_rate": 4.456672964306138e-06, + "loss": 0.9178, + "step": 75060 + }, + { + "epoch": 0.5434066610205072, + "grad_norm": 0.16943545639514923, + "learning_rate": 4.4566005776455514e-06, + "loss": 0.9102, + "step": 75070 + }, + { + "epoch": 0.5434790476810933, + "grad_norm": 0.16489852964878082, + "learning_rate": 4.456528190984966e-06, + "loss": 0.9265, + "step": 75080 + }, + { + "epoch": 0.5435514343416795, + "grad_norm": 0.198415607213974, + "learning_rate": 4.4564558043243795e-06, + "loss": 0.9159, + "step": 75090 + }, + { + "epoch": 0.5436238210022657, + "grad_norm": 0.15088145434856415, + "learning_rate": 4.456383417663793e-06, + "loss": 0.9112, + "step": 75100 + }, + { + "epoch": 0.5436962076628519, + "grad_norm": 0.1523219645023346, + "learning_rate": 4.456311031003207e-06, + "loss": 0.9206, + "step": 75110 + }, + { + "epoch": 0.543768594323438, + "grad_norm": 0.1473480463027954, + "learning_rate": 4.456238644342621e-06, + "loss": 0.918, + "step": 75120 + }, + { + "epoch": 0.5438409809840242, + "grad_norm": 0.1520926058292389, + "learning_rate": 4.456166257682035e-06, + "loss": 0.9063, + "step": 75130 + }, + { + "epoch": 0.5439133676446104, + "grad_norm": 0.15161623060703278, + "learning_rate": 4.4560938710214485e-06, + "loss": 0.9107, + "step": 75140 + }, + { + "epoch": 0.5439857543051967, + "grad_norm": 0.15313850343227386, + "learning_rate": 4.456021484360862e-06, + "loss": 0.9073, + "step": 75150 + }, + { + "epoch": 0.5440581409657829, + "grad_norm": 0.14688560366630554, + "learning_rate": 4.4559490977002765e-06, + "loss": 0.9086, + "step": 75160 + }, + { + "epoch": 0.544130527626369, + "grad_norm": 0.15404599905014038, + "learning_rate": 4.45587671103969e-06, + "loss": 0.9174, + "step": 75170 + }, + { + "epoch": 0.5442029142869552, + "grad_norm": 0.16538633406162262, + "learning_rate": 4.455804324379104e-06, + "loss": 0.9209, + "step": 75180 + }, + { + "epoch": 0.5442753009475414, + "grad_norm": 0.16180939972400665, + "learning_rate": 4.455731937718517e-06, + "loss": 0.9071, + "step": 75190 + }, + { + "epoch": 0.5443476876081276, + "grad_norm": 0.1648901402950287, + "learning_rate": 4.455659551057932e-06, + "loss": 0.9119, + "step": 75200 + }, + { + "epoch": 0.5444200742687137, + "grad_norm": 0.1480080932378769, + "learning_rate": 4.4555871643973455e-06, + "loss": 0.9057, + "step": 75210 + }, + { + "epoch": 0.5444924609292999, + "grad_norm": 0.16776438057422638, + "learning_rate": 4.455514777736759e-06, + "loss": 0.9274, + "step": 75220 + }, + { + "epoch": 0.5445648475898861, + "grad_norm": 0.1602822095155716, + "learning_rate": 4.455442391076173e-06, + "loss": 0.9214, + "step": 75230 + }, + { + "epoch": 0.5446372342504723, + "grad_norm": 0.14920540153980255, + "learning_rate": 4.455370004415587e-06, + "loss": 0.922, + "step": 75240 + }, + { + "epoch": 0.5447096209110585, + "grad_norm": 0.14813263714313507, + "learning_rate": 4.455297617755001e-06, + "loss": 0.9304, + "step": 75250 + }, + { + "epoch": 0.5447820075716447, + "grad_norm": 0.16396941244602203, + "learning_rate": 4.455225231094414e-06, + "loss": 0.9075, + "step": 75260 + }, + { + "epoch": 0.5448543942322309, + "grad_norm": 0.15782274305820465, + "learning_rate": 4.455152844433828e-06, + "loss": 0.9049, + "step": 75270 + }, + { + "epoch": 0.5449267808928171, + "grad_norm": 0.15898077189922333, + "learning_rate": 4.4550804577732425e-06, + "loss": 0.9095, + "step": 75280 + }, + { + "epoch": 0.5449991675534033, + "grad_norm": 0.15519340336322784, + "learning_rate": 4.455008071112656e-06, + "loss": 0.9105, + "step": 75290 + }, + { + "epoch": 0.5450715542139895, + "grad_norm": 0.1506635993719101, + "learning_rate": 4.45493568445207e-06, + "loss": 0.9096, + "step": 75300 + }, + { + "epoch": 0.5451439408745756, + "grad_norm": 0.15947678685188293, + "learning_rate": 4.454863297791483e-06, + "loss": 0.9099, + "step": 75310 + }, + { + "epoch": 0.5452163275351618, + "grad_norm": 0.1622234433889389, + "learning_rate": 4.454790911130897e-06, + "loss": 0.9208, + "step": 75320 + }, + { + "epoch": 0.545288714195748, + "grad_norm": 0.16322208940982819, + "learning_rate": 4.454718524470311e-06, + "loss": 0.8956, + "step": 75330 + }, + { + "epoch": 0.5453611008563342, + "grad_norm": 0.16314153373241425, + "learning_rate": 4.454646137809725e-06, + "loss": 0.9099, + "step": 75340 + }, + { + "epoch": 0.5454334875169203, + "grad_norm": 0.15650366246700287, + "learning_rate": 4.454573751149139e-06, + "loss": 0.9143, + "step": 75350 + }, + { + "epoch": 0.5455058741775066, + "grad_norm": 0.16160249710083008, + "learning_rate": 4.454501364488552e-06, + "loss": 0.9086, + "step": 75360 + }, + { + "epoch": 0.5455782608380928, + "grad_norm": 0.18455857038497925, + "learning_rate": 4.454428977827967e-06, + "loss": 0.916, + "step": 75370 + }, + { + "epoch": 0.545650647498679, + "grad_norm": 0.16547948122024536, + "learning_rate": 4.45435659116738e-06, + "loss": 0.9188, + "step": 75380 + }, + { + "epoch": 0.5457230341592652, + "grad_norm": 0.15037128329277039, + "learning_rate": 4.454284204506794e-06, + "loss": 0.9107, + "step": 75390 + }, + { + "epoch": 0.5457954208198513, + "grad_norm": 0.15458130836486816, + "learning_rate": 4.4542118178462076e-06, + "loss": 0.9162, + "step": 75400 + }, + { + "epoch": 0.5458678074804375, + "grad_norm": 0.17746815085411072, + "learning_rate": 4.454139431185622e-06, + "loss": 0.9108, + "step": 75410 + }, + { + "epoch": 0.5459401941410237, + "grad_norm": 0.15650658309459686, + "learning_rate": 4.454067044525035e-06, + "loss": 0.9192, + "step": 75420 + }, + { + "epoch": 0.5460125808016099, + "grad_norm": 0.16250117123126984, + "learning_rate": 4.453994657864449e-06, + "loss": 0.9132, + "step": 75430 + }, + { + "epoch": 0.546084967462196, + "grad_norm": 0.1670369803905487, + "learning_rate": 4.453922271203863e-06, + "loss": 0.9135, + "step": 75440 + }, + { + "epoch": 0.5461573541227822, + "grad_norm": 1.3092010021209717, + "learning_rate": 4.4538498845432765e-06, + "loss": 0.9078, + "step": 75450 + }, + { + "epoch": 0.5462297407833684, + "grad_norm": 0.17468515038490295, + "learning_rate": 4.45377749788269e-06, + "loss": 0.9231, + "step": 75460 + }, + { + "epoch": 0.5463021274439547, + "grad_norm": 0.17053230106830597, + "learning_rate": 4.453705111222104e-06, + "loss": 0.9126, + "step": 75470 + }, + { + "epoch": 0.5463745141045409, + "grad_norm": 0.17557454109191895, + "learning_rate": 4.453632724561518e-06, + "loss": 0.9228, + "step": 75480 + }, + { + "epoch": 0.546446900765127, + "grad_norm": 0.17532938718795776, + "learning_rate": 4.453560337900932e-06, + "loss": 0.899, + "step": 75490 + }, + { + "epoch": 0.5465192874257132, + "grad_norm": 0.15332704782485962, + "learning_rate": 4.453487951240345e-06, + "loss": 0.9157, + "step": 75500 + }, + { + "epoch": 0.5465916740862994, + "grad_norm": 0.18341203033924103, + "learning_rate": 4.453415564579759e-06, + "loss": 0.9216, + "step": 75510 + }, + { + "epoch": 0.5466640607468856, + "grad_norm": 0.15997199714183807, + "learning_rate": 4.4533431779191735e-06, + "loss": 0.9244, + "step": 75520 + }, + { + "epoch": 0.5467364474074717, + "grad_norm": 0.1543131321668625, + "learning_rate": 4.453270791258587e-06, + "loss": 0.9075, + "step": 75530 + }, + { + "epoch": 0.5468088340680579, + "grad_norm": 0.16920112073421478, + "learning_rate": 4.453198404598001e-06, + "loss": 0.912, + "step": 75540 + }, + { + "epoch": 0.5468812207286441, + "grad_norm": 0.15737563371658325, + "learning_rate": 4.453126017937414e-06, + "loss": 0.9132, + "step": 75550 + }, + { + "epoch": 0.5469536073892303, + "grad_norm": 0.175185427069664, + "learning_rate": 4.453053631276829e-06, + "loss": 0.9211, + "step": 75560 + }, + { + "epoch": 0.5470259940498166, + "grad_norm": 0.163910910487175, + "learning_rate": 4.4529812446162424e-06, + "loss": 0.9075, + "step": 75570 + }, + { + "epoch": 0.5470983807104027, + "grad_norm": 0.185495063662529, + "learning_rate": 4.452908857955656e-06, + "loss": 0.929, + "step": 75580 + }, + { + "epoch": 0.5471707673709889, + "grad_norm": 0.159888356924057, + "learning_rate": 4.45283647129507e-06, + "loss": 0.9159, + "step": 75590 + }, + { + "epoch": 0.5472431540315751, + "grad_norm": 0.14591063559055328, + "learning_rate": 4.452764084634484e-06, + "loss": 0.9206, + "step": 75600 + }, + { + "epoch": 0.5473155406921613, + "grad_norm": 0.16549073159694672, + "learning_rate": 4.452691697973898e-06, + "loss": 0.8996, + "step": 75610 + }, + { + "epoch": 0.5473879273527474, + "grad_norm": 0.16032549738883972, + "learning_rate": 4.452619311313311e-06, + "loss": 0.9198, + "step": 75620 + }, + { + "epoch": 0.5474603140133336, + "grad_norm": 0.15921436250209808, + "learning_rate": 4.452546924652725e-06, + "loss": 0.8986, + "step": 75630 + }, + { + "epoch": 0.5475327006739198, + "grad_norm": 0.15605443716049194, + "learning_rate": 4.4524745379921394e-06, + "loss": 0.911, + "step": 75640 + }, + { + "epoch": 0.547605087334506, + "grad_norm": 0.1790502518415451, + "learning_rate": 4.452402151331553e-06, + "loss": 0.9153, + "step": 75650 + }, + { + "epoch": 0.5476774739950921, + "grad_norm": 0.16580620408058167, + "learning_rate": 4.452329764670967e-06, + "loss": 0.9162, + "step": 75660 + }, + { + "epoch": 0.5477498606556783, + "grad_norm": 0.19182774424552917, + "learning_rate": 4.45225737801038e-06, + "loss": 0.9188, + "step": 75670 + }, + { + "epoch": 0.5478222473162646, + "grad_norm": 0.16452908515930176, + "learning_rate": 4.452184991349795e-06, + "loss": 0.9187, + "step": 75680 + }, + { + "epoch": 0.5478946339768508, + "grad_norm": 0.15444496273994446, + "learning_rate": 4.452112604689208e-06, + "loss": 0.9169, + "step": 75690 + }, + { + "epoch": 0.547967020637437, + "grad_norm": 0.149451345205307, + "learning_rate": 4.452040218028622e-06, + "loss": 0.9236, + "step": 75700 + }, + { + "epoch": 0.5480394072980231, + "grad_norm": 0.16976690292358398, + "learning_rate": 4.451967831368036e-06, + "loss": 0.9086, + "step": 75710 + }, + { + "epoch": 0.5481117939586093, + "grad_norm": 0.14885394275188446, + "learning_rate": 4.45189544470745e-06, + "loss": 0.9012, + "step": 75720 + }, + { + "epoch": 0.5481841806191955, + "grad_norm": 0.1846446394920349, + "learning_rate": 4.451823058046864e-06, + "loss": 0.9183, + "step": 75730 + }, + { + "epoch": 0.5482565672797817, + "grad_norm": 0.15907852351665497, + "learning_rate": 4.451750671386277e-06, + "loss": 0.9219, + "step": 75740 + }, + { + "epoch": 0.5483289539403678, + "grad_norm": 0.1452861726284027, + "learning_rate": 4.451678284725691e-06, + "loss": 0.9102, + "step": 75750 + }, + { + "epoch": 0.548401340600954, + "grad_norm": 0.16507689654827118, + "learning_rate": 4.451605898065105e-06, + "loss": 0.9243, + "step": 75760 + }, + { + "epoch": 0.5484737272615402, + "grad_norm": 0.15828417241573334, + "learning_rate": 4.451533511404519e-06, + "loss": 0.9028, + "step": 75770 + }, + { + "epoch": 0.5485461139221264, + "grad_norm": 0.18392127752304077, + "learning_rate": 4.451461124743933e-06, + "loss": 0.908, + "step": 75780 + }, + { + "epoch": 0.5486185005827127, + "grad_norm": 0.15240317583084106, + "learning_rate": 4.451388738083346e-06, + "loss": 0.9013, + "step": 75790 + }, + { + "epoch": 0.5486908872432988, + "grad_norm": 0.14687864482402802, + "learning_rate": 4.451316351422761e-06, + "loss": 0.9184, + "step": 75800 + }, + { + "epoch": 0.548763273903885, + "grad_norm": 0.15710905194282532, + "learning_rate": 4.451243964762174e-06, + "loss": 0.9165, + "step": 75810 + }, + { + "epoch": 0.5488356605644712, + "grad_norm": 0.1660161018371582, + "learning_rate": 4.451171578101588e-06, + "loss": 0.9045, + "step": 75820 + }, + { + "epoch": 0.5489080472250574, + "grad_norm": 0.1465510129928589, + "learning_rate": 4.4510991914410015e-06, + "loss": 0.8998, + "step": 75830 + }, + { + "epoch": 0.5489804338856435, + "grad_norm": 0.17025746405124664, + "learning_rate": 4.451026804780416e-06, + "loss": 0.9093, + "step": 75840 + }, + { + "epoch": 0.5490528205462297, + "grad_norm": 0.18562918901443481, + "learning_rate": 4.45095441811983e-06, + "loss": 0.9195, + "step": 75850 + }, + { + "epoch": 0.5491252072068159, + "grad_norm": 0.1641816943883896, + "learning_rate": 4.450882031459243e-06, + "loss": 0.9039, + "step": 75860 + }, + { + "epoch": 0.5491975938674021, + "grad_norm": 0.19206029176712036, + "learning_rate": 4.450809644798657e-06, + "loss": 0.9224, + "step": 75870 + }, + { + "epoch": 0.5492699805279883, + "grad_norm": 0.15584112703800201, + "learning_rate": 4.450737258138071e-06, + "loss": 0.9171, + "step": 75880 + }, + { + "epoch": 0.5493423671885745, + "grad_norm": 0.1531420797109604, + "learning_rate": 4.450664871477485e-06, + "loss": 0.8936, + "step": 75890 + }, + { + "epoch": 0.5494147538491607, + "grad_norm": 0.17042067646980286, + "learning_rate": 4.4505924848168985e-06, + "loss": 0.9002, + "step": 75900 + }, + { + "epoch": 0.5494871405097469, + "grad_norm": 0.15222638845443726, + "learning_rate": 4.450520098156312e-06, + "loss": 0.9058, + "step": 75910 + }, + { + "epoch": 0.5495595271703331, + "grad_norm": 0.1593593806028366, + "learning_rate": 4.450447711495726e-06, + "loss": 0.8956, + "step": 75920 + }, + { + "epoch": 0.5496319138309192, + "grad_norm": 0.16259454190731049, + "learning_rate": 4.45037532483514e-06, + "loss": 0.9124, + "step": 75930 + }, + { + "epoch": 0.5497043004915054, + "grad_norm": 0.23078764975070953, + "learning_rate": 4.450302938174554e-06, + "loss": 0.9127, + "step": 75940 + }, + { + "epoch": 0.5497766871520916, + "grad_norm": 0.1593347191810608, + "learning_rate": 4.4502305515139675e-06, + "loss": 0.8963, + "step": 75950 + }, + { + "epoch": 0.5498490738126778, + "grad_norm": 0.1672598272562027, + "learning_rate": 4.450158164853381e-06, + "loss": 0.9212, + "step": 75960 + }, + { + "epoch": 0.549921460473264, + "grad_norm": 0.1680530607700348, + "learning_rate": 4.450085778192795e-06, + "loss": 0.9194, + "step": 75970 + }, + { + "epoch": 0.5499938471338501, + "grad_norm": 0.15082144737243652, + "learning_rate": 4.450013391532208e-06, + "loss": 0.9028, + "step": 75980 + }, + { + "epoch": 0.5500662337944363, + "grad_norm": 0.1934494823217392, + "learning_rate": 4.449941004871623e-06, + "loss": 0.907, + "step": 75990 + }, + { + "epoch": 0.5501386204550226, + "grad_norm": 0.15488490462303162, + "learning_rate": 4.449868618211036e-06, + "loss": 0.9231, + "step": 76000 + }, + { + "epoch": 0.5502110071156088, + "grad_norm": 0.1557077169418335, + "learning_rate": 4.44979623155045e-06, + "loss": 0.9252, + "step": 76010 + }, + { + "epoch": 0.550283393776195, + "grad_norm": 0.16591419279575348, + "learning_rate": 4.449723844889864e-06, + "loss": 0.9057, + "step": 76020 + }, + { + "epoch": 0.5503557804367811, + "grad_norm": 0.16087841987609863, + "learning_rate": 4.449651458229278e-06, + "loss": 0.9173, + "step": 76030 + }, + { + "epoch": 0.5504281670973673, + "grad_norm": 0.16236597299575806, + "learning_rate": 4.449579071568692e-06, + "loss": 0.9211, + "step": 76040 + }, + { + "epoch": 0.5505005537579535, + "grad_norm": 0.17655915021896362, + "learning_rate": 4.449506684908105e-06, + "loss": 0.9211, + "step": 76050 + }, + { + "epoch": 0.5505729404185397, + "grad_norm": 0.1689653992652893, + "learning_rate": 4.449434298247519e-06, + "loss": 0.9139, + "step": 76060 + }, + { + "epoch": 0.5506453270791258, + "grad_norm": 0.16405482590198517, + "learning_rate": 4.449361911586933e-06, + "loss": 0.9165, + "step": 76070 + }, + { + "epoch": 0.550717713739712, + "grad_norm": 0.16635659337043762, + "learning_rate": 4.449289524926347e-06, + "loss": 0.8991, + "step": 76080 + }, + { + "epoch": 0.5507901004002982, + "grad_norm": 0.1841815710067749, + "learning_rate": 4.449217138265761e-06, + "loss": 0.9175, + "step": 76090 + }, + { + "epoch": 0.5508624870608844, + "grad_norm": 0.15842583775520325, + "learning_rate": 4.449144751605174e-06, + "loss": 0.9144, + "step": 76100 + }, + { + "epoch": 0.5509348737214707, + "grad_norm": 0.16369058191776276, + "learning_rate": 4.449072364944588e-06, + "loss": 0.9071, + "step": 76110 + }, + { + "epoch": 0.5510072603820568, + "grad_norm": 0.16025404632091522, + "learning_rate": 4.448999978284002e-06, + "loss": 0.9108, + "step": 76120 + }, + { + "epoch": 0.551079647042643, + "grad_norm": 0.15726248919963837, + "learning_rate": 4.448927591623416e-06, + "loss": 0.9048, + "step": 76130 + }, + { + "epoch": 0.5511520337032292, + "grad_norm": 0.149103045463562, + "learning_rate": 4.4488552049628296e-06, + "loss": 0.9041, + "step": 76140 + }, + { + "epoch": 0.5512244203638154, + "grad_norm": 0.1646459996700287, + "learning_rate": 4.448782818302243e-06, + "loss": 0.9207, + "step": 76150 + }, + { + "epoch": 0.5512968070244015, + "grad_norm": 0.16346558928489685, + "learning_rate": 4.448710431641658e-06, + "loss": 0.9152, + "step": 76160 + }, + { + "epoch": 0.5513691936849877, + "grad_norm": 0.15449324250221252, + "learning_rate": 4.448638044981071e-06, + "loss": 0.9028, + "step": 76170 + }, + { + "epoch": 0.5514415803455739, + "grad_norm": 0.1610867977142334, + "learning_rate": 4.448565658320485e-06, + "loss": 0.9255, + "step": 76180 + }, + { + "epoch": 0.5515139670061601, + "grad_norm": 0.15043221414089203, + "learning_rate": 4.4484932716598985e-06, + "loss": 0.9077, + "step": 76190 + }, + { + "epoch": 0.5515863536667462, + "grad_norm": 0.18590010702610016, + "learning_rate": 4.448420884999313e-06, + "loss": 0.9125, + "step": 76200 + }, + { + "epoch": 0.5516587403273325, + "grad_norm": 0.16317640244960785, + "learning_rate": 4.4483484983387266e-06, + "loss": 0.9219, + "step": 76210 + }, + { + "epoch": 0.5517311269879187, + "grad_norm": 0.15565603971481323, + "learning_rate": 4.44827611167814e-06, + "loss": 0.9128, + "step": 76220 + }, + { + "epoch": 0.5518035136485049, + "grad_norm": 0.1605430394411087, + "learning_rate": 4.448203725017554e-06, + "loss": 0.9104, + "step": 76230 + }, + { + "epoch": 0.5518759003090911, + "grad_norm": 0.17234712839126587, + "learning_rate": 4.448131338356968e-06, + "loss": 0.9054, + "step": 76240 + }, + { + "epoch": 0.5519482869696772, + "grad_norm": 0.15464408695697784, + "learning_rate": 4.448058951696382e-06, + "loss": 0.9115, + "step": 76250 + }, + { + "epoch": 0.5520206736302634, + "grad_norm": 0.15122352540493011, + "learning_rate": 4.4479865650357955e-06, + "loss": 0.9039, + "step": 76260 + }, + { + "epoch": 0.5520930602908496, + "grad_norm": 0.1647047996520996, + "learning_rate": 4.447914178375209e-06, + "loss": 0.9161, + "step": 76270 + }, + { + "epoch": 0.5521654469514358, + "grad_norm": 0.16340136528015137, + "learning_rate": 4.4478417917146236e-06, + "loss": 0.9097, + "step": 76280 + }, + { + "epoch": 0.5522378336120219, + "grad_norm": 0.1576181948184967, + "learning_rate": 4.447769405054037e-06, + "loss": 0.9111, + "step": 76290 + }, + { + "epoch": 0.5523102202726081, + "grad_norm": 0.17520330846309662, + "learning_rate": 4.447697018393451e-06, + "loss": 0.9019, + "step": 76300 + }, + { + "epoch": 0.5523826069331943, + "grad_norm": 0.16088417172431946, + "learning_rate": 4.447624631732864e-06, + "loss": 0.9159, + "step": 76310 + }, + { + "epoch": 0.5524549935937806, + "grad_norm": 0.15434597432613373, + "learning_rate": 4.447552245072279e-06, + "loss": 0.9033, + "step": 76320 + }, + { + "epoch": 0.5525273802543668, + "grad_norm": 0.16620974242687225, + "learning_rate": 4.4474798584116925e-06, + "loss": 0.9203, + "step": 76330 + }, + { + "epoch": 0.5525997669149529, + "grad_norm": 0.159925177693367, + "learning_rate": 4.447407471751106e-06, + "loss": 0.9086, + "step": 76340 + }, + { + "epoch": 0.5526721535755391, + "grad_norm": 0.19963112473487854, + "learning_rate": 4.44733508509052e-06, + "loss": 0.9212, + "step": 76350 + }, + { + "epoch": 0.5527445402361253, + "grad_norm": 0.1715710163116455, + "learning_rate": 4.447262698429934e-06, + "loss": 0.9205, + "step": 76360 + }, + { + "epoch": 0.5528169268967115, + "grad_norm": 0.1506585329771042, + "learning_rate": 4.447190311769348e-06, + "loss": 0.9063, + "step": 76370 + }, + { + "epoch": 0.5528893135572976, + "grad_norm": 0.16564472019672394, + "learning_rate": 4.4471179251087614e-06, + "loss": 0.9322, + "step": 76380 + }, + { + "epoch": 0.5529617002178838, + "grad_norm": 0.1629706174135208, + "learning_rate": 4.447045538448175e-06, + "loss": 0.8981, + "step": 76390 + }, + { + "epoch": 0.55303408687847, + "grad_norm": 0.15394346415996552, + "learning_rate": 4.4469731517875895e-06, + "loss": 0.918, + "step": 76400 + }, + { + "epoch": 0.5531064735390562, + "grad_norm": 0.1582847237586975, + "learning_rate": 4.446900765127003e-06, + "loss": 0.9083, + "step": 76410 + }, + { + "epoch": 0.5531788601996425, + "grad_norm": 0.16456353664398193, + "learning_rate": 4.446828378466417e-06, + "loss": 0.9225, + "step": 76420 + }, + { + "epoch": 0.5532512468602286, + "grad_norm": 0.1710321456193924, + "learning_rate": 4.44675599180583e-06, + "loss": 0.916, + "step": 76430 + }, + { + "epoch": 0.5533236335208148, + "grad_norm": 0.15686561167240143, + "learning_rate": 4.446683605145245e-06, + "loss": 0.9092, + "step": 76440 + }, + { + "epoch": 0.553396020181401, + "grad_norm": 0.15640173852443695, + "learning_rate": 4.4466112184846584e-06, + "loss": 0.9061, + "step": 76450 + }, + { + "epoch": 0.5534684068419872, + "grad_norm": 0.1626739203929901, + "learning_rate": 4.446538831824072e-06, + "loss": 0.9156, + "step": 76460 + }, + { + "epoch": 0.5535407935025733, + "grad_norm": 0.15522697567939758, + "learning_rate": 4.446466445163486e-06, + "loss": 0.8936, + "step": 76470 + }, + { + "epoch": 0.5536131801631595, + "grad_norm": 0.15257471799850464, + "learning_rate": 4.446394058502899e-06, + "loss": 0.9053, + "step": 76480 + }, + { + "epoch": 0.5536855668237457, + "grad_norm": 0.16405083239078522, + "learning_rate": 4.446321671842313e-06, + "loss": 0.9124, + "step": 76490 + }, + { + "epoch": 0.5537579534843319, + "grad_norm": 0.1680813431739807, + "learning_rate": 4.4462492851817265e-06, + "loss": 0.9134, + "step": 76500 + }, + { + "epoch": 0.553830340144918, + "grad_norm": 0.16853106021881104, + "learning_rate": 4.446176898521141e-06, + "loss": 0.926, + "step": 76510 + }, + { + "epoch": 0.5539027268055042, + "grad_norm": 0.164813831448555, + "learning_rate": 4.446104511860555e-06, + "loss": 0.9091, + "step": 76520 + }, + { + "epoch": 0.5539751134660905, + "grad_norm": 0.1515646129846573, + "learning_rate": 4.446032125199968e-06, + "loss": 0.9083, + "step": 76530 + }, + { + "epoch": 0.5540475001266767, + "grad_norm": 0.16372545063495636, + "learning_rate": 4.445959738539382e-06, + "loss": 0.8965, + "step": 76540 + }, + { + "epoch": 0.5541198867872629, + "grad_norm": 0.1647547334432602, + "learning_rate": 4.445887351878796e-06, + "loss": 0.8986, + "step": 76550 + }, + { + "epoch": 0.554192273447849, + "grad_norm": 0.1702655404806137, + "learning_rate": 4.44581496521821e-06, + "loss": 0.9111, + "step": 76560 + }, + { + "epoch": 0.5542646601084352, + "grad_norm": 0.1671968400478363, + "learning_rate": 4.4457425785576235e-06, + "loss": 0.9038, + "step": 76570 + }, + { + "epoch": 0.5543370467690214, + "grad_norm": 0.23342488706111908, + "learning_rate": 4.445670191897037e-06, + "loss": 0.8895, + "step": 76580 + }, + { + "epoch": 0.5544094334296076, + "grad_norm": 0.17012055218219757, + "learning_rate": 4.445597805236452e-06, + "loss": 0.9223, + "step": 76590 + }, + { + "epoch": 0.5544818200901938, + "grad_norm": 0.153640478849411, + "learning_rate": 4.445525418575865e-06, + "loss": 0.9189, + "step": 76600 + }, + { + "epoch": 0.5545542067507799, + "grad_norm": 0.1719810962677002, + "learning_rate": 4.445453031915279e-06, + "loss": 0.8992, + "step": 76610 + }, + { + "epoch": 0.5546265934113661, + "grad_norm": 0.16828425228595734, + "learning_rate": 4.4453806452546925e-06, + "loss": 0.9154, + "step": 76620 + }, + { + "epoch": 0.5546989800719523, + "grad_norm": 0.1485888808965683, + "learning_rate": 4.445308258594107e-06, + "loss": 0.9044, + "step": 76630 + }, + { + "epoch": 0.5547713667325386, + "grad_norm": 0.15807074308395386, + "learning_rate": 4.4452358719335205e-06, + "loss": 0.9193, + "step": 76640 + }, + { + "epoch": 0.5548437533931248, + "grad_norm": 0.2003334015607834, + "learning_rate": 4.445163485272934e-06, + "loss": 0.9226, + "step": 76650 + }, + { + "epoch": 0.5549161400537109, + "grad_norm": 0.16058491170406342, + "learning_rate": 4.445091098612348e-06, + "loss": 0.9208, + "step": 76660 + }, + { + "epoch": 0.5549885267142971, + "grad_norm": 0.22712364792823792, + "learning_rate": 4.445018711951762e-06, + "loss": 0.9116, + "step": 76670 + }, + { + "epoch": 0.5550609133748833, + "grad_norm": 0.16164818406105042, + "learning_rate": 4.444946325291176e-06, + "loss": 0.9143, + "step": 76680 + }, + { + "epoch": 0.5551333000354695, + "grad_norm": 0.1746838092803955, + "learning_rate": 4.4448739386305895e-06, + "loss": 0.9179, + "step": 76690 + }, + { + "epoch": 0.5552056866960556, + "grad_norm": 0.1547100841999054, + "learning_rate": 4.444801551970003e-06, + "loss": 0.9122, + "step": 76700 + }, + { + "epoch": 0.5552780733566418, + "grad_norm": 0.1596509963274002, + "learning_rate": 4.444729165309417e-06, + "loss": 0.908, + "step": 76710 + }, + { + "epoch": 0.555350460017228, + "grad_norm": 0.16364139318466187, + "learning_rate": 4.444656778648831e-06, + "loss": 0.9158, + "step": 76720 + }, + { + "epoch": 0.5554228466778142, + "grad_norm": 0.17858783900737762, + "learning_rate": 4.444584391988245e-06, + "loss": 0.9134, + "step": 76730 + }, + { + "epoch": 0.5554952333384005, + "grad_norm": 0.14799803495407104, + "learning_rate": 4.444512005327658e-06, + "loss": 0.9138, + "step": 76740 + }, + { + "epoch": 0.5555676199989866, + "grad_norm": 0.156327486038208, + "learning_rate": 4.444439618667072e-06, + "loss": 0.9222, + "step": 76750 + }, + { + "epoch": 0.5556400066595728, + "grad_norm": 0.16460415720939636, + "learning_rate": 4.4443672320064865e-06, + "loss": 0.9107, + "step": 76760 + }, + { + "epoch": 0.555712393320159, + "grad_norm": 0.1606915295124054, + "learning_rate": 4.4442948453459e-06, + "loss": 0.8984, + "step": 76770 + }, + { + "epoch": 0.5557847799807452, + "grad_norm": 0.15452681481838226, + "learning_rate": 4.444222458685314e-06, + "loss": 0.8986, + "step": 76780 + }, + { + "epoch": 0.5558571666413313, + "grad_norm": 0.16996990144252777, + "learning_rate": 4.444150072024727e-06, + "loss": 0.919, + "step": 76790 + }, + { + "epoch": 0.5559295533019175, + "grad_norm": 0.16095289587974548, + "learning_rate": 4.444077685364142e-06, + "loss": 0.9196, + "step": 76800 + }, + { + "epoch": 0.5560019399625037, + "grad_norm": 0.16664938628673553, + "learning_rate": 4.444005298703555e-06, + "loss": 0.915, + "step": 76810 + }, + { + "epoch": 0.5560743266230899, + "grad_norm": 0.15813925862312317, + "learning_rate": 4.443932912042969e-06, + "loss": 0.9096, + "step": 76820 + }, + { + "epoch": 0.556146713283676, + "grad_norm": 0.1628894805908203, + "learning_rate": 4.443860525382383e-06, + "loss": 0.9181, + "step": 76830 + }, + { + "epoch": 0.5562190999442622, + "grad_norm": 0.16485193371772766, + "learning_rate": 4.443788138721797e-06, + "loss": 0.8998, + "step": 76840 + }, + { + "epoch": 0.5562914866048485, + "grad_norm": 0.15740840137004852, + "learning_rate": 4.443715752061211e-06, + "loss": 0.9215, + "step": 76850 + }, + { + "epoch": 0.5563638732654347, + "grad_norm": 0.16431400179862976, + "learning_rate": 4.443643365400624e-06, + "loss": 0.9179, + "step": 76860 + }, + { + "epoch": 0.5564362599260209, + "grad_norm": 0.17064224183559418, + "learning_rate": 4.443570978740038e-06, + "loss": 0.9115, + "step": 76870 + }, + { + "epoch": 0.556508646586607, + "grad_norm": 0.15299154818058014, + "learning_rate": 4.443498592079452e-06, + "loss": 0.9046, + "step": 76880 + }, + { + "epoch": 0.5565810332471932, + "grad_norm": 0.19055993854999542, + "learning_rate": 4.443426205418866e-06, + "loss": 0.9223, + "step": 76890 + }, + { + "epoch": 0.5566534199077794, + "grad_norm": 0.16058650612831116, + "learning_rate": 4.44335381875828e-06, + "loss": 0.9083, + "step": 76900 + }, + { + "epoch": 0.5567258065683656, + "grad_norm": 0.15559203922748566, + "learning_rate": 4.443281432097693e-06, + "loss": 0.9158, + "step": 76910 + }, + { + "epoch": 0.5567981932289517, + "grad_norm": 0.4795348346233368, + "learning_rate": 4.443209045437108e-06, + "loss": 0.9116, + "step": 76920 + }, + { + "epoch": 0.5568705798895379, + "grad_norm": 0.1700964719057083, + "learning_rate": 4.443136658776521e-06, + "loss": 0.907, + "step": 76930 + }, + { + "epoch": 0.5569429665501241, + "grad_norm": 0.15994440019130707, + "learning_rate": 4.443064272115935e-06, + "loss": 0.9222, + "step": 76940 + }, + { + "epoch": 0.5570153532107104, + "grad_norm": 0.17749464511871338, + "learning_rate": 4.4429918854553486e-06, + "loss": 0.9154, + "step": 76950 + }, + { + "epoch": 0.5570877398712966, + "grad_norm": 0.16541603207588196, + "learning_rate": 4.442919498794763e-06, + "loss": 0.9177, + "step": 76960 + }, + { + "epoch": 0.5571601265318827, + "grad_norm": 0.16451773047447205, + "learning_rate": 4.442847112134177e-06, + "loss": 0.9144, + "step": 76970 + }, + { + "epoch": 0.5572325131924689, + "grad_norm": 0.16148695349693298, + "learning_rate": 4.44277472547359e-06, + "loss": 0.9084, + "step": 76980 + }, + { + "epoch": 0.5573048998530551, + "grad_norm": 0.18457120656967163, + "learning_rate": 4.442702338813004e-06, + "loss": 0.9008, + "step": 76990 + }, + { + "epoch": 0.5573772865136413, + "grad_norm": 0.1575312465429306, + "learning_rate": 4.442629952152418e-06, + "loss": 0.9129, + "step": 77000 + }, + { + "epoch": 0.5574496731742274, + "grad_norm": 0.16345492005348206, + "learning_rate": 4.442557565491831e-06, + "loss": 0.9194, + "step": 77010 + }, + { + "epoch": 0.5575220598348136, + "grad_norm": 0.17905868589878082, + "learning_rate": 4.442485178831245e-06, + "loss": 0.9271, + "step": 77020 + }, + { + "epoch": 0.5575944464953998, + "grad_norm": 0.15946638584136963, + "learning_rate": 4.442412792170659e-06, + "loss": 0.9158, + "step": 77030 + }, + { + "epoch": 0.557666833155986, + "grad_norm": 0.1656288057565689, + "learning_rate": 4.442340405510073e-06, + "loss": 0.897, + "step": 77040 + }, + { + "epoch": 0.5577392198165722, + "grad_norm": 0.15466462075710297, + "learning_rate": 4.442268018849486e-06, + "loss": 0.9194, + "step": 77050 + }, + { + "epoch": 0.5578116064771584, + "grad_norm": 0.17180196940898895, + "learning_rate": 4.4421956321889e-06, + "loss": 0.9056, + "step": 77060 + }, + { + "epoch": 0.5578839931377446, + "grad_norm": 0.16797685623168945, + "learning_rate": 4.4421232455283145e-06, + "loss": 0.9069, + "step": 77070 + }, + { + "epoch": 0.5579563797983308, + "grad_norm": 0.15991143882274628, + "learning_rate": 4.442050858867728e-06, + "loss": 0.9102, + "step": 77080 + }, + { + "epoch": 0.558028766458917, + "grad_norm": 0.1536579132080078, + "learning_rate": 4.441978472207142e-06, + "loss": 0.9134, + "step": 77090 + }, + { + "epoch": 0.5581011531195031, + "grad_norm": 0.15524393320083618, + "learning_rate": 4.441906085546555e-06, + "loss": 0.9237, + "step": 77100 + }, + { + "epoch": 0.5581735397800893, + "grad_norm": 0.15506607294082642, + "learning_rate": 4.44183369888597e-06, + "loss": 0.925, + "step": 77110 + }, + { + "epoch": 0.5582459264406755, + "grad_norm": 0.1472669243812561, + "learning_rate": 4.4417613122253834e-06, + "loss": 0.8972, + "step": 77120 + }, + { + "epoch": 0.5583183131012617, + "grad_norm": 0.17821435630321503, + "learning_rate": 4.441688925564797e-06, + "loss": 0.9146, + "step": 77130 + }, + { + "epoch": 0.5583906997618479, + "grad_norm": 0.152136892080307, + "learning_rate": 4.441616538904211e-06, + "loss": 0.9101, + "step": 77140 + }, + { + "epoch": 0.558463086422434, + "grad_norm": 0.16555529832839966, + "learning_rate": 4.441544152243625e-06, + "loss": 0.915, + "step": 77150 + }, + { + "epoch": 0.5585354730830202, + "grad_norm": 0.18034303188323975, + "learning_rate": 4.441471765583039e-06, + "loss": 0.9174, + "step": 77160 + }, + { + "epoch": 0.5586078597436065, + "grad_norm": 0.1521337479352951, + "learning_rate": 4.441399378922452e-06, + "loss": 0.9194, + "step": 77170 + }, + { + "epoch": 0.5586802464041927, + "grad_norm": 0.1529371738433838, + "learning_rate": 4.441326992261866e-06, + "loss": 0.893, + "step": 77180 + }, + { + "epoch": 0.5587526330647788, + "grad_norm": 0.18080340325832367, + "learning_rate": 4.4412546056012804e-06, + "loss": 0.921, + "step": 77190 + }, + { + "epoch": 0.558825019725365, + "grad_norm": 0.15729157626628876, + "learning_rate": 4.441182218940694e-06, + "loss": 0.905, + "step": 77200 + }, + { + "epoch": 0.5588974063859512, + "grad_norm": 0.16402077674865723, + "learning_rate": 4.441109832280108e-06, + "loss": 0.9141, + "step": 77210 + }, + { + "epoch": 0.5589697930465374, + "grad_norm": 0.14309856295585632, + "learning_rate": 4.441037445619521e-06, + "loss": 0.909, + "step": 77220 + }, + { + "epoch": 0.5590421797071236, + "grad_norm": 0.168483704328537, + "learning_rate": 4.440965058958936e-06, + "loss": 0.9038, + "step": 77230 + }, + { + "epoch": 0.5591145663677097, + "grad_norm": 0.15855084359645844, + "learning_rate": 4.440892672298349e-06, + "loss": 0.9107, + "step": 77240 + }, + { + "epoch": 0.5591869530282959, + "grad_norm": 0.2141026258468628, + "learning_rate": 4.440820285637763e-06, + "loss": 0.917, + "step": 77250 + }, + { + "epoch": 0.5592593396888821, + "grad_norm": 0.14907251298427582, + "learning_rate": 4.440747898977177e-06, + "loss": 0.9142, + "step": 77260 + }, + { + "epoch": 0.5593317263494684, + "grad_norm": 0.16853876411914825, + "learning_rate": 4.440675512316591e-06, + "loss": 0.9273, + "step": 77270 + }, + { + "epoch": 0.5594041130100545, + "grad_norm": 0.15877121686935425, + "learning_rate": 4.440603125656005e-06, + "loss": 0.9126, + "step": 77280 + }, + { + "epoch": 0.5594764996706407, + "grad_norm": 0.1590542197227478, + "learning_rate": 4.440530738995418e-06, + "loss": 0.9254, + "step": 77290 + }, + { + "epoch": 0.5595488863312269, + "grad_norm": 0.15544173121452332, + "learning_rate": 4.440458352334832e-06, + "loss": 0.9253, + "step": 77300 + }, + { + "epoch": 0.5596212729918131, + "grad_norm": 0.15453557670116425, + "learning_rate": 4.440385965674246e-06, + "loss": 0.9185, + "step": 77310 + }, + { + "epoch": 0.5596936596523993, + "grad_norm": 0.16094282269477844, + "learning_rate": 4.44031357901366e-06, + "loss": 0.9194, + "step": 77320 + }, + { + "epoch": 0.5597660463129854, + "grad_norm": 0.15466651320457458, + "learning_rate": 4.440241192353074e-06, + "loss": 0.9163, + "step": 77330 + }, + { + "epoch": 0.5598384329735716, + "grad_norm": 0.1567269265651703, + "learning_rate": 4.440168805692487e-06, + "loss": 0.9179, + "step": 77340 + }, + { + "epoch": 0.5599108196341578, + "grad_norm": 0.2014339566230774, + "learning_rate": 4.440096419031901e-06, + "loss": 0.9073, + "step": 77350 + }, + { + "epoch": 0.559983206294744, + "grad_norm": 0.16425397992134094, + "learning_rate": 4.440024032371315e-06, + "loss": 0.8897, + "step": 77360 + }, + { + "epoch": 0.5600555929553301, + "grad_norm": 0.16576462984085083, + "learning_rate": 4.439951645710729e-06, + "loss": 0.9062, + "step": 77370 + }, + { + "epoch": 0.5601279796159164, + "grad_norm": 0.15141081809997559, + "learning_rate": 4.4398792590501425e-06, + "loss": 0.9214, + "step": 77380 + }, + { + "epoch": 0.5602003662765026, + "grad_norm": 0.16057878732681274, + "learning_rate": 4.439806872389556e-06, + "loss": 0.9085, + "step": 77390 + }, + { + "epoch": 0.5602727529370888, + "grad_norm": 0.1669279783964157, + "learning_rate": 4.439734485728971e-06, + "loss": 0.9171, + "step": 77400 + }, + { + "epoch": 0.560345139597675, + "grad_norm": 0.17052066326141357, + "learning_rate": 4.439662099068384e-06, + "loss": 0.9034, + "step": 77410 + }, + { + "epoch": 0.5604175262582611, + "grad_norm": 0.16968189179897308, + "learning_rate": 4.439589712407798e-06, + "loss": 0.9182, + "step": 77420 + }, + { + "epoch": 0.5604899129188473, + "grad_norm": 0.1690019816160202, + "learning_rate": 4.4395173257472115e-06, + "loss": 0.9208, + "step": 77430 + }, + { + "epoch": 0.5605622995794335, + "grad_norm": 0.22552861273288727, + "learning_rate": 4.439444939086626e-06, + "loss": 0.9157, + "step": 77440 + }, + { + "epoch": 0.5606346862400197, + "grad_norm": 0.16138851642608643, + "learning_rate": 4.4393725524260395e-06, + "loss": 0.9054, + "step": 77450 + }, + { + "epoch": 0.5607070729006058, + "grad_norm": 0.15333735942840576, + "learning_rate": 4.439300165765453e-06, + "loss": 0.9084, + "step": 77460 + }, + { + "epoch": 0.560779459561192, + "grad_norm": 0.26510128378868103, + "learning_rate": 4.439227779104867e-06, + "loss": 0.9149, + "step": 77470 + }, + { + "epoch": 0.5608518462217783, + "grad_norm": 0.14910390973091125, + "learning_rate": 4.439155392444281e-06, + "loss": 0.9052, + "step": 77480 + }, + { + "epoch": 0.5609242328823645, + "grad_norm": 0.15675272047519684, + "learning_rate": 4.439083005783695e-06, + "loss": 0.9103, + "step": 77490 + }, + { + "epoch": 0.5609966195429507, + "grad_norm": 0.16790616512298584, + "learning_rate": 4.4390106191231085e-06, + "loss": 0.8932, + "step": 77500 + }, + { + "epoch": 0.5610690062035368, + "grad_norm": 0.15522590279579163, + "learning_rate": 4.438938232462522e-06, + "loss": 0.9203, + "step": 77510 + }, + { + "epoch": 0.561141392864123, + "grad_norm": 0.1486007571220398, + "learning_rate": 4.4388658458019365e-06, + "loss": 0.893, + "step": 77520 + }, + { + "epoch": 0.5612137795247092, + "grad_norm": 0.16432353854179382, + "learning_rate": 4.43879345914135e-06, + "loss": 0.9105, + "step": 77530 + }, + { + "epoch": 0.5612861661852954, + "grad_norm": 0.16237597167491913, + "learning_rate": 4.438721072480763e-06, + "loss": 0.9097, + "step": 77540 + }, + { + "epoch": 0.5613585528458815, + "grad_norm": 0.15193864703178406, + "learning_rate": 4.438648685820177e-06, + "loss": 0.9242, + "step": 77550 + }, + { + "epoch": 0.5614309395064677, + "grad_norm": 0.18787966668605804, + "learning_rate": 4.438576299159591e-06, + "loss": 0.9021, + "step": 77560 + }, + { + "epoch": 0.5615033261670539, + "grad_norm": 0.17147335410118103, + "learning_rate": 4.438503912499005e-06, + "loss": 0.9167, + "step": 77570 + }, + { + "epoch": 0.5615757128276401, + "grad_norm": 0.16556903719902039, + "learning_rate": 4.438431525838418e-06, + "loss": 0.9053, + "step": 77580 + }, + { + "epoch": 0.5616480994882264, + "grad_norm": 0.17590190470218658, + "learning_rate": 4.438359139177833e-06, + "loss": 0.9204, + "step": 77590 + }, + { + "epoch": 0.5617204861488125, + "grad_norm": 0.173880934715271, + "learning_rate": 4.438286752517246e-06, + "loss": 0.9062, + "step": 77600 + }, + { + "epoch": 0.5617928728093987, + "grad_norm": 0.16164129972457886, + "learning_rate": 4.43821436585666e-06, + "loss": 0.9107, + "step": 77610 + }, + { + "epoch": 0.5618652594699849, + "grad_norm": 0.1576722264289856, + "learning_rate": 4.4381419791960736e-06, + "loss": 0.9131, + "step": 77620 + }, + { + "epoch": 0.5619376461305711, + "grad_norm": 0.17891205847263336, + "learning_rate": 4.438069592535488e-06, + "loss": 0.9132, + "step": 77630 + }, + { + "epoch": 0.5620100327911572, + "grad_norm": 0.149394229054451, + "learning_rate": 4.437997205874902e-06, + "loss": 0.9029, + "step": 77640 + }, + { + "epoch": 0.5620824194517434, + "grad_norm": 0.1493150144815445, + "learning_rate": 4.437924819214315e-06, + "loss": 0.9246, + "step": 77650 + }, + { + "epoch": 0.5621548061123296, + "grad_norm": 0.1570667326450348, + "learning_rate": 4.437852432553729e-06, + "loss": 0.9175, + "step": 77660 + }, + { + "epoch": 0.5622271927729158, + "grad_norm": 0.1554926335811615, + "learning_rate": 4.437780045893143e-06, + "loss": 0.907, + "step": 77670 + }, + { + "epoch": 0.562299579433502, + "grad_norm": 0.18290413916110992, + "learning_rate": 4.437707659232557e-06, + "loss": 0.9032, + "step": 77680 + }, + { + "epoch": 0.5623719660940881, + "grad_norm": 0.1693306416273117, + "learning_rate": 4.4376352725719706e-06, + "loss": 0.9073, + "step": 77690 + }, + { + "epoch": 0.5624443527546744, + "grad_norm": 0.16810953617095947, + "learning_rate": 4.437562885911384e-06, + "loss": 0.9203, + "step": 77700 + }, + { + "epoch": 0.5625167394152606, + "grad_norm": 0.1598738580942154, + "learning_rate": 4.437490499250799e-06, + "loss": 0.919, + "step": 77710 + }, + { + "epoch": 0.5625891260758468, + "grad_norm": 0.17527292668819427, + "learning_rate": 4.437418112590212e-06, + "loss": 0.9263, + "step": 77720 + }, + { + "epoch": 0.562661512736433, + "grad_norm": 0.14632010459899902, + "learning_rate": 4.437345725929626e-06, + "loss": 0.9096, + "step": 77730 + }, + { + "epoch": 0.5627338993970191, + "grad_norm": 0.16045227646827698, + "learning_rate": 4.4372733392690395e-06, + "loss": 0.9228, + "step": 77740 + }, + { + "epoch": 0.5628062860576053, + "grad_norm": 0.1670956164598465, + "learning_rate": 4.437200952608454e-06, + "loss": 0.9122, + "step": 77750 + }, + { + "epoch": 0.5628786727181915, + "grad_norm": 0.15484373271465302, + "learning_rate": 4.4371285659478676e-06, + "loss": 0.9101, + "step": 77760 + }, + { + "epoch": 0.5629510593787777, + "grad_norm": 0.1494317650794983, + "learning_rate": 4.437056179287281e-06, + "loss": 0.9119, + "step": 77770 + }, + { + "epoch": 0.5630234460393638, + "grad_norm": 0.15059392154216766, + "learning_rate": 4.436983792626695e-06, + "loss": 0.9092, + "step": 77780 + }, + { + "epoch": 0.56309583269995, + "grad_norm": 0.19551579654216766, + "learning_rate": 4.436911405966109e-06, + "loss": 0.9181, + "step": 77790 + }, + { + "epoch": 0.5631682193605363, + "grad_norm": 0.14669376611709595, + "learning_rate": 4.436839019305523e-06, + "loss": 0.9141, + "step": 77800 + }, + { + "epoch": 0.5632406060211225, + "grad_norm": 0.16056394577026367, + "learning_rate": 4.4367666326449365e-06, + "loss": 0.9127, + "step": 77810 + }, + { + "epoch": 0.5633129926817086, + "grad_norm": 0.15070192515850067, + "learning_rate": 4.43669424598435e-06, + "loss": 0.8972, + "step": 77820 + }, + { + "epoch": 0.5633853793422948, + "grad_norm": 0.16001105308532715, + "learning_rate": 4.436621859323765e-06, + "loss": 0.9019, + "step": 77830 + }, + { + "epoch": 0.563457766002881, + "grad_norm": 0.14969342947006226, + "learning_rate": 4.436549472663178e-06, + "loss": 0.9138, + "step": 77840 + }, + { + "epoch": 0.5635301526634672, + "grad_norm": 0.15888501703739166, + "learning_rate": 4.436477086002592e-06, + "loss": 0.9168, + "step": 77850 + }, + { + "epoch": 0.5636025393240534, + "grad_norm": 0.16971467435359955, + "learning_rate": 4.4364046993420054e-06, + "loss": 0.9011, + "step": 77860 + }, + { + "epoch": 0.5636749259846395, + "grad_norm": 0.18575704097747803, + "learning_rate": 4.43633231268142e-06, + "loss": 0.8922, + "step": 77870 + }, + { + "epoch": 0.5637473126452257, + "grad_norm": 0.161865696310997, + "learning_rate": 4.4362599260208335e-06, + "loss": 0.9045, + "step": 77880 + }, + { + "epoch": 0.5638196993058119, + "grad_norm": 0.22121845185756683, + "learning_rate": 4.436187539360247e-06, + "loss": 0.9184, + "step": 77890 + }, + { + "epoch": 0.5638920859663981, + "grad_norm": 0.15823763608932495, + "learning_rate": 4.436115152699661e-06, + "loss": 0.898, + "step": 77900 + }, + { + "epoch": 0.5639644726269843, + "grad_norm": 0.1676093190908432, + "learning_rate": 4.436042766039075e-06, + "loss": 0.9173, + "step": 77910 + }, + { + "epoch": 0.5640368592875705, + "grad_norm": 0.15236294269561768, + "learning_rate": 4.435970379378489e-06, + "loss": 0.9052, + "step": 77920 + }, + { + "epoch": 0.5641092459481567, + "grad_norm": 0.16066612303256989, + "learning_rate": 4.4358979927179024e-06, + "loss": 0.9102, + "step": 77930 + }, + { + "epoch": 0.5641816326087429, + "grad_norm": 0.162504181265831, + "learning_rate": 4.435825606057316e-06, + "loss": 0.8935, + "step": 77940 + }, + { + "epoch": 0.564254019269329, + "grad_norm": 0.1564251035451889, + "learning_rate": 4.4357532193967305e-06, + "loss": 0.9219, + "step": 77950 + }, + { + "epoch": 0.5643264059299152, + "grad_norm": 0.14474442601203918, + "learning_rate": 4.435680832736144e-06, + "loss": 0.9118, + "step": 77960 + }, + { + "epoch": 0.5643987925905014, + "grad_norm": 0.1615808606147766, + "learning_rate": 4.435608446075558e-06, + "loss": 0.9025, + "step": 77970 + }, + { + "epoch": 0.5644711792510876, + "grad_norm": 0.16067107021808624, + "learning_rate": 4.435536059414971e-06, + "loss": 0.9064, + "step": 77980 + }, + { + "epoch": 0.5645435659116738, + "grad_norm": 0.1663515865802765, + "learning_rate": 4.435463672754385e-06, + "loss": 0.8929, + "step": 77990 + }, + { + "epoch": 0.5646159525722599, + "grad_norm": 0.15532717108726501, + "learning_rate": 4.4353912860937994e-06, + "loss": 0.9231, + "step": 78000 + }, + { + "epoch": 0.5646883392328462, + "grad_norm": 0.16401223838329315, + "learning_rate": 4.435318899433213e-06, + "loss": 0.9048, + "step": 78010 + }, + { + "epoch": 0.5647607258934324, + "grad_norm": 0.15295349061489105, + "learning_rate": 4.435246512772627e-06, + "loss": 0.9135, + "step": 78020 + }, + { + "epoch": 0.5648331125540186, + "grad_norm": 0.16399818658828735, + "learning_rate": 4.43517412611204e-06, + "loss": 0.9169, + "step": 78030 + }, + { + "epoch": 0.5649054992146048, + "grad_norm": 0.15173108875751495, + "learning_rate": 4.435101739451455e-06, + "loss": 0.9184, + "step": 78040 + }, + { + "epoch": 0.5649778858751909, + "grad_norm": 0.15851818025112152, + "learning_rate": 4.435029352790868e-06, + "loss": 0.907, + "step": 78050 + }, + { + "epoch": 0.5650502725357771, + "grad_norm": 0.14833968877792358, + "learning_rate": 4.434956966130282e-06, + "loss": 0.9057, + "step": 78060 + }, + { + "epoch": 0.5651226591963633, + "grad_norm": 0.15308476984500885, + "learning_rate": 4.434884579469696e-06, + "loss": 0.9124, + "step": 78070 + }, + { + "epoch": 0.5651950458569495, + "grad_norm": 0.1614348590373993, + "learning_rate": 4.434812192809109e-06, + "loss": 0.9196, + "step": 78080 + }, + { + "epoch": 0.5652674325175356, + "grad_norm": 0.15268519520759583, + "learning_rate": 4.434739806148523e-06, + "loss": 0.9167, + "step": 78090 + }, + { + "epoch": 0.5653398191781218, + "grad_norm": 0.20000995695590973, + "learning_rate": 4.434667419487937e-06, + "loss": 0.8982, + "step": 78100 + }, + { + "epoch": 0.565412205838708, + "grad_norm": 0.19799144566059113, + "learning_rate": 4.434595032827351e-06, + "loss": 0.9041, + "step": 78110 + }, + { + "epoch": 0.5654845924992943, + "grad_norm": 0.16405373811721802, + "learning_rate": 4.4345226461667645e-06, + "loss": 0.9125, + "step": 78120 + }, + { + "epoch": 0.5655569791598805, + "grad_norm": 0.14727067947387695, + "learning_rate": 4.434450259506178e-06, + "loss": 0.9143, + "step": 78130 + }, + { + "epoch": 0.5656293658204666, + "grad_norm": 0.15156599879264832, + "learning_rate": 4.434377872845592e-06, + "loss": 0.9086, + "step": 78140 + }, + { + "epoch": 0.5657017524810528, + "grad_norm": 0.16756990551948547, + "learning_rate": 4.434305486185006e-06, + "loss": 0.919, + "step": 78150 + }, + { + "epoch": 0.565774139141639, + "grad_norm": 0.159501850605011, + "learning_rate": 4.43423309952442e-06, + "loss": 0.8931, + "step": 78160 + }, + { + "epoch": 0.5658465258022252, + "grad_norm": 0.1752566546201706, + "learning_rate": 4.4341607128638335e-06, + "loss": 0.9072, + "step": 78170 + }, + { + "epoch": 0.5659189124628113, + "grad_norm": 0.15048715472221375, + "learning_rate": 4.434088326203247e-06, + "loss": 0.8956, + "step": 78180 + }, + { + "epoch": 0.5659912991233975, + "grad_norm": 0.18843425810337067, + "learning_rate": 4.4340159395426615e-06, + "loss": 0.9209, + "step": 78190 + }, + { + "epoch": 0.5660636857839837, + "grad_norm": 0.23466771841049194, + "learning_rate": 4.433943552882075e-06, + "loss": 0.9072, + "step": 78200 + }, + { + "epoch": 0.5661360724445699, + "grad_norm": 0.1651633083820343, + "learning_rate": 4.433871166221489e-06, + "loss": 0.9061, + "step": 78210 + }, + { + "epoch": 0.566208459105156, + "grad_norm": 0.18153083324432373, + "learning_rate": 4.433798779560902e-06, + "loss": 0.9043, + "step": 78220 + }, + { + "epoch": 0.5662808457657423, + "grad_norm": 0.14733073115348816, + "learning_rate": 4.433726392900317e-06, + "loss": 0.9011, + "step": 78230 + }, + { + "epoch": 0.5663532324263285, + "grad_norm": 0.16242343187332153, + "learning_rate": 4.4336540062397305e-06, + "loss": 0.9121, + "step": 78240 + }, + { + "epoch": 0.5664256190869147, + "grad_norm": 0.2215704470872879, + "learning_rate": 4.433581619579144e-06, + "loss": 0.9137, + "step": 78250 + }, + { + "epoch": 0.5664980057475009, + "grad_norm": 0.15027424693107605, + "learning_rate": 4.433509232918558e-06, + "loss": 0.911, + "step": 78260 + }, + { + "epoch": 0.566570392408087, + "grad_norm": 0.15450255572795868, + "learning_rate": 4.433436846257972e-06, + "loss": 0.9016, + "step": 78270 + }, + { + "epoch": 0.5666427790686732, + "grad_norm": 0.15546143054962158, + "learning_rate": 4.433364459597386e-06, + "loss": 0.9196, + "step": 78280 + }, + { + "epoch": 0.5667151657292594, + "grad_norm": 0.2212473452091217, + "learning_rate": 4.433292072936799e-06, + "loss": 0.9095, + "step": 78290 + }, + { + "epoch": 0.5667875523898456, + "grad_norm": 0.21143324673175812, + "learning_rate": 4.433219686276213e-06, + "loss": 0.9151, + "step": 78300 + }, + { + "epoch": 0.5668599390504317, + "grad_norm": 0.15912073850631714, + "learning_rate": 4.4331472996156275e-06, + "loss": 0.9131, + "step": 78310 + }, + { + "epoch": 0.5669323257110179, + "grad_norm": 0.16560253500938416, + "learning_rate": 4.433074912955041e-06, + "loss": 0.9257, + "step": 78320 + }, + { + "epoch": 0.5670047123716042, + "grad_norm": 0.15693403780460358, + "learning_rate": 4.433002526294455e-06, + "loss": 0.9085, + "step": 78330 + }, + { + "epoch": 0.5670770990321904, + "grad_norm": 0.15633919835090637, + "learning_rate": 4.432930139633868e-06, + "loss": 0.9304, + "step": 78340 + }, + { + "epoch": 0.5671494856927766, + "grad_norm": 0.2166070193052292, + "learning_rate": 4.432857752973283e-06, + "loss": 0.9092, + "step": 78350 + }, + { + "epoch": 0.5672218723533627, + "grad_norm": 0.154709130525589, + "learning_rate": 4.432785366312696e-06, + "loss": 0.9122, + "step": 78360 + }, + { + "epoch": 0.5672942590139489, + "grad_norm": 0.1582053154706955, + "learning_rate": 4.43271297965211e-06, + "loss": 0.9011, + "step": 78370 + }, + { + "epoch": 0.5673666456745351, + "grad_norm": 0.16059152781963348, + "learning_rate": 4.432640592991524e-06, + "loss": 0.9082, + "step": 78380 + }, + { + "epoch": 0.5674390323351213, + "grad_norm": 0.1610398292541504, + "learning_rate": 4.432568206330938e-06, + "loss": 0.9072, + "step": 78390 + }, + { + "epoch": 0.5675114189957075, + "grad_norm": 0.1508665531873703, + "learning_rate": 4.432495819670352e-06, + "loss": 0.9011, + "step": 78400 + }, + { + "epoch": 0.5675838056562936, + "grad_norm": 0.16580963134765625, + "learning_rate": 4.432423433009765e-06, + "loss": 0.9156, + "step": 78410 + }, + { + "epoch": 0.5676561923168798, + "grad_norm": 0.17013192176818848, + "learning_rate": 4.432351046349179e-06, + "loss": 0.9119, + "step": 78420 + }, + { + "epoch": 0.567728578977466, + "grad_norm": 0.15810072422027588, + "learning_rate": 4.432278659688593e-06, + "loss": 0.9018, + "step": 78430 + }, + { + "epoch": 0.5678009656380523, + "grad_norm": 0.1526464819908142, + "learning_rate": 4.432206273028007e-06, + "loss": 0.9058, + "step": 78440 + }, + { + "epoch": 0.5678733522986384, + "grad_norm": 0.15462712943553925, + "learning_rate": 4.432133886367421e-06, + "loss": 0.9117, + "step": 78450 + }, + { + "epoch": 0.5679457389592246, + "grad_norm": 0.1546926200389862, + "learning_rate": 4.432061499706834e-06, + "loss": 0.9001, + "step": 78460 + }, + { + "epoch": 0.5680181256198108, + "grad_norm": 0.152653768658638, + "learning_rate": 4.431989113046249e-06, + "loss": 0.8992, + "step": 78470 + }, + { + "epoch": 0.568090512280397, + "grad_norm": 0.20859988033771515, + "learning_rate": 4.431916726385662e-06, + "loss": 0.9182, + "step": 78480 + }, + { + "epoch": 0.5681628989409832, + "grad_norm": 0.16723498702049255, + "learning_rate": 4.431844339725076e-06, + "loss": 0.898, + "step": 78490 + }, + { + "epoch": 0.5682352856015693, + "grad_norm": 0.16271011531352997, + "learning_rate": 4.4317719530644896e-06, + "loss": 0.9084, + "step": 78500 + }, + { + "epoch": 0.5683076722621555, + "grad_norm": 0.15765948593616486, + "learning_rate": 4.431699566403904e-06, + "loss": 0.9245, + "step": 78510 + }, + { + "epoch": 0.5683800589227417, + "grad_norm": 0.15575428307056427, + "learning_rate": 4.431627179743318e-06, + "loss": 0.9282, + "step": 78520 + }, + { + "epoch": 0.5684524455833279, + "grad_norm": 0.17210975289344788, + "learning_rate": 4.431554793082731e-06, + "loss": 0.9234, + "step": 78530 + }, + { + "epoch": 0.5685248322439141, + "grad_norm": 0.15548361837863922, + "learning_rate": 4.431482406422145e-06, + "loss": 0.9141, + "step": 78540 + }, + { + "epoch": 0.5685972189045003, + "grad_norm": 0.15747930109500885, + "learning_rate": 4.431410019761559e-06, + "loss": 0.9105, + "step": 78550 + }, + { + "epoch": 0.5686696055650865, + "grad_norm": 0.19277207553386688, + "learning_rate": 4.431337633100973e-06, + "loss": 0.8969, + "step": 78560 + }, + { + "epoch": 0.5687419922256727, + "grad_norm": 0.15888191759586334, + "learning_rate": 4.431265246440387e-06, + "loss": 0.9133, + "step": 78570 + }, + { + "epoch": 0.5688143788862589, + "grad_norm": 0.18703103065490723, + "learning_rate": 4.4311928597798e-06, + "loss": 0.9204, + "step": 78580 + }, + { + "epoch": 0.568886765546845, + "grad_norm": 0.15291064977645874, + "learning_rate": 4.431120473119214e-06, + "loss": 0.9055, + "step": 78590 + }, + { + "epoch": 0.5689591522074312, + "grad_norm": 0.15586498379707336, + "learning_rate": 4.4310480864586274e-06, + "loss": 0.9007, + "step": 78600 + }, + { + "epoch": 0.5690315388680174, + "grad_norm": 0.17706800997257233, + "learning_rate": 4.430975699798041e-06, + "loss": 0.9223, + "step": 78610 + }, + { + "epoch": 0.5691039255286036, + "grad_norm": 0.1869611293077469, + "learning_rate": 4.4309033131374555e-06, + "loss": 0.9168, + "step": 78620 + }, + { + "epoch": 0.5691763121891897, + "grad_norm": 0.20068153738975525, + "learning_rate": 4.430830926476869e-06, + "loss": 0.9092, + "step": 78630 + }, + { + "epoch": 0.5692486988497759, + "grad_norm": 0.15373751521110535, + "learning_rate": 4.430758539816283e-06, + "loss": 0.9068, + "step": 78640 + }, + { + "epoch": 0.5693210855103622, + "grad_norm": 0.161079540848732, + "learning_rate": 4.430686153155696e-06, + "loss": 0.9072, + "step": 78650 + }, + { + "epoch": 0.5693934721709484, + "grad_norm": 0.17405648529529572, + "learning_rate": 4.430613766495111e-06, + "loss": 0.9152, + "step": 78660 + }, + { + "epoch": 0.5694658588315346, + "grad_norm": 0.20295019447803497, + "learning_rate": 4.4305413798345244e-06, + "loss": 0.9121, + "step": 78670 + }, + { + "epoch": 0.5695382454921207, + "grad_norm": 0.16853387653827667, + "learning_rate": 4.430468993173938e-06, + "loss": 0.9134, + "step": 78680 + }, + { + "epoch": 0.5696106321527069, + "grad_norm": 0.21727709472179413, + "learning_rate": 4.430396606513352e-06, + "loss": 0.9049, + "step": 78690 + }, + { + "epoch": 0.5696830188132931, + "grad_norm": 0.15580429136753082, + "learning_rate": 4.430324219852766e-06, + "loss": 0.9035, + "step": 78700 + }, + { + "epoch": 0.5697554054738793, + "grad_norm": 0.1704913228750229, + "learning_rate": 4.43025183319218e-06, + "loss": 0.9269, + "step": 78710 + }, + { + "epoch": 0.5698277921344654, + "grad_norm": 0.1460624784231186, + "learning_rate": 4.430179446531593e-06, + "loss": 0.9113, + "step": 78720 + }, + { + "epoch": 0.5699001787950516, + "grad_norm": 0.16129523515701294, + "learning_rate": 4.430107059871007e-06, + "loss": 0.9033, + "step": 78730 + }, + { + "epoch": 0.5699725654556378, + "grad_norm": 0.15936952829360962, + "learning_rate": 4.4300346732104214e-06, + "loss": 0.9164, + "step": 78740 + }, + { + "epoch": 0.570044952116224, + "grad_norm": 0.15949364006519318, + "learning_rate": 4.429962286549835e-06, + "loss": 0.9013, + "step": 78750 + }, + { + "epoch": 0.5701173387768103, + "grad_norm": 0.16319353878498077, + "learning_rate": 4.429889899889249e-06, + "loss": 0.9086, + "step": 78760 + }, + { + "epoch": 0.5701897254373964, + "grad_norm": 0.16823211312294006, + "learning_rate": 4.429817513228662e-06, + "loss": 0.9163, + "step": 78770 + }, + { + "epoch": 0.5702621120979826, + "grad_norm": 0.16069024801254272, + "learning_rate": 4.429745126568076e-06, + "loss": 0.9148, + "step": 78780 + }, + { + "epoch": 0.5703344987585688, + "grad_norm": 0.15689004957675934, + "learning_rate": 4.42967273990749e-06, + "loss": 0.9087, + "step": 78790 + }, + { + "epoch": 0.570406885419155, + "grad_norm": 0.19310441613197327, + "learning_rate": 4.429600353246904e-06, + "loss": 0.9031, + "step": 78800 + }, + { + "epoch": 0.5704792720797411, + "grad_norm": 0.15190792083740234, + "learning_rate": 4.429527966586318e-06, + "loss": 0.908, + "step": 78810 + }, + { + "epoch": 0.5705516587403273, + "grad_norm": 0.1643807291984558, + "learning_rate": 4.429455579925731e-06, + "loss": 0.9052, + "step": 78820 + }, + { + "epoch": 0.5706240454009135, + "grad_norm": 0.16099394857883453, + "learning_rate": 4.429383193265146e-06, + "loss": 0.8997, + "step": 78830 + }, + { + "epoch": 0.5706964320614997, + "grad_norm": 0.1543867588043213, + "learning_rate": 4.429310806604559e-06, + "loss": 0.9139, + "step": 78840 + }, + { + "epoch": 0.5707688187220858, + "grad_norm": 0.1512497216463089, + "learning_rate": 4.429238419943973e-06, + "loss": 0.8972, + "step": 78850 + }, + { + "epoch": 0.5708412053826721, + "grad_norm": 0.1591501533985138, + "learning_rate": 4.4291660332833865e-06, + "loss": 0.9097, + "step": 78860 + }, + { + "epoch": 0.5709135920432583, + "grad_norm": 0.1647246778011322, + "learning_rate": 4.429093646622801e-06, + "loss": 0.903, + "step": 78870 + }, + { + "epoch": 0.5709859787038445, + "grad_norm": 0.1652226597070694, + "learning_rate": 4.429021259962215e-06, + "loss": 0.9128, + "step": 78880 + }, + { + "epoch": 0.5710583653644307, + "grad_norm": 0.16023515164852142, + "learning_rate": 4.428948873301628e-06, + "loss": 0.9068, + "step": 78890 + }, + { + "epoch": 0.5711307520250168, + "grad_norm": 0.16677772998809814, + "learning_rate": 4.428876486641042e-06, + "loss": 0.9067, + "step": 78900 + }, + { + "epoch": 0.571203138685603, + "grad_norm": 0.15754565596580505, + "learning_rate": 4.428804099980456e-06, + "loss": 0.8981, + "step": 78910 + }, + { + "epoch": 0.5712755253461892, + "grad_norm": 0.1568695604801178, + "learning_rate": 4.42873171331987e-06, + "loss": 0.9127, + "step": 78920 + }, + { + "epoch": 0.5713479120067754, + "grad_norm": 0.14470389485359192, + "learning_rate": 4.4286593266592835e-06, + "loss": 0.908, + "step": 78930 + }, + { + "epoch": 0.5714202986673615, + "grad_norm": 0.17773394286632538, + "learning_rate": 4.428586939998697e-06, + "loss": 0.9145, + "step": 78940 + }, + { + "epoch": 0.5714926853279477, + "grad_norm": 0.18091265857219696, + "learning_rate": 4.428514553338112e-06, + "loss": 0.9129, + "step": 78950 + }, + { + "epoch": 0.5715650719885339, + "grad_norm": 0.17283041775226593, + "learning_rate": 4.428442166677525e-06, + "loss": 0.9103, + "step": 78960 + }, + { + "epoch": 0.5716374586491202, + "grad_norm": 0.17135843634605408, + "learning_rate": 4.428369780016939e-06, + "loss": 0.9144, + "step": 78970 + }, + { + "epoch": 0.5717098453097064, + "grad_norm": 0.15438847243785858, + "learning_rate": 4.4282973933563525e-06, + "loss": 0.9039, + "step": 78980 + }, + { + "epoch": 0.5717822319702925, + "grad_norm": 0.16643427312374115, + "learning_rate": 4.428225006695767e-06, + "loss": 0.9042, + "step": 78990 + }, + { + "epoch": 0.5718546186308787, + "grad_norm": 0.17289410531520844, + "learning_rate": 4.4281526200351805e-06, + "loss": 0.909, + "step": 79000 + }, + { + "epoch": 0.5719270052914649, + "grad_norm": 0.15444664657115936, + "learning_rate": 4.428080233374594e-06, + "loss": 0.9129, + "step": 79010 + }, + { + "epoch": 0.5719993919520511, + "grad_norm": 0.1560271829366684, + "learning_rate": 4.428007846714008e-06, + "loss": 0.9051, + "step": 79020 + }, + { + "epoch": 0.5720717786126372, + "grad_norm": 0.1660393625497818, + "learning_rate": 4.427935460053422e-06, + "loss": 0.9067, + "step": 79030 + }, + { + "epoch": 0.5721441652732234, + "grad_norm": 0.16919827461242676, + "learning_rate": 4.427863073392836e-06, + "loss": 0.9075, + "step": 79040 + }, + { + "epoch": 0.5722165519338096, + "grad_norm": 0.16163021326065063, + "learning_rate": 4.4277906867322495e-06, + "loss": 0.9167, + "step": 79050 + }, + { + "epoch": 0.5722889385943958, + "grad_norm": 0.15363195538520813, + "learning_rate": 4.427718300071663e-06, + "loss": 0.9091, + "step": 79060 + }, + { + "epoch": 0.5723613252549821, + "grad_norm": 0.22884926199913025, + "learning_rate": 4.4276459134110776e-06, + "loss": 0.9061, + "step": 79070 + }, + { + "epoch": 0.5724337119155682, + "grad_norm": 0.17008480429649353, + "learning_rate": 4.427573526750491e-06, + "loss": 0.8936, + "step": 79080 + }, + { + "epoch": 0.5725060985761544, + "grad_norm": 0.1554049849510193, + "learning_rate": 4.427501140089905e-06, + "loss": 0.9041, + "step": 79090 + }, + { + "epoch": 0.5725784852367406, + "grad_norm": 0.15605086088180542, + "learning_rate": 4.427428753429318e-06, + "loss": 0.9061, + "step": 79100 + }, + { + "epoch": 0.5726508718973268, + "grad_norm": 0.18999440968036652, + "learning_rate": 4.427356366768733e-06, + "loss": 0.918, + "step": 79110 + }, + { + "epoch": 0.572723258557913, + "grad_norm": 0.14956019818782806, + "learning_rate": 4.4272839801081465e-06, + "loss": 0.9107, + "step": 79120 + }, + { + "epoch": 0.5727956452184991, + "grad_norm": 0.16016794741153717, + "learning_rate": 4.427211593447559e-06, + "loss": 0.9077, + "step": 79130 + }, + { + "epoch": 0.5728680318790853, + "grad_norm": 0.16640208661556244, + "learning_rate": 4.427139206786974e-06, + "loss": 0.9131, + "step": 79140 + }, + { + "epoch": 0.5729404185396715, + "grad_norm": 0.14928004145622253, + "learning_rate": 4.427066820126387e-06, + "loss": 0.9216, + "step": 79150 + }, + { + "epoch": 0.5730128052002577, + "grad_norm": 0.17138387262821198, + "learning_rate": 4.426994433465801e-06, + "loss": 0.912, + "step": 79160 + }, + { + "epoch": 0.5730851918608438, + "grad_norm": 0.16592368483543396, + "learning_rate": 4.4269220468052146e-06, + "loss": 0.9082, + "step": 79170 + }, + { + "epoch": 0.5731575785214301, + "grad_norm": 0.15400424599647522, + "learning_rate": 4.426849660144629e-06, + "loss": 0.8956, + "step": 79180 + }, + { + "epoch": 0.5732299651820163, + "grad_norm": 0.16034607589244843, + "learning_rate": 4.426777273484043e-06, + "loss": 0.9104, + "step": 79190 + }, + { + "epoch": 0.5733023518426025, + "grad_norm": 0.16131481528282166, + "learning_rate": 4.426704886823456e-06, + "loss": 0.9076, + "step": 79200 + }, + { + "epoch": 0.5733747385031887, + "grad_norm": 0.15663005411624908, + "learning_rate": 4.42663250016287e-06, + "loss": 0.9027, + "step": 79210 + }, + { + "epoch": 0.5734471251637748, + "grad_norm": 0.15593023598194122, + "learning_rate": 4.426560113502284e-06, + "loss": 0.8946, + "step": 79220 + }, + { + "epoch": 0.573519511824361, + "grad_norm": 0.15859454870224, + "learning_rate": 4.426487726841698e-06, + "loss": 0.908, + "step": 79230 + }, + { + "epoch": 0.5735918984849472, + "grad_norm": 0.2031136453151703, + "learning_rate": 4.4264153401811116e-06, + "loss": 0.903, + "step": 79240 + }, + { + "epoch": 0.5736642851455334, + "grad_norm": 0.1812455952167511, + "learning_rate": 4.426342953520525e-06, + "loss": 0.9156, + "step": 79250 + }, + { + "epoch": 0.5737366718061195, + "grad_norm": 0.16677896678447723, + "learning_rate": 4.42627056685994e-06, + "loss": 0.9107, + "step": 79260 + }, + { + "epoch": 0.5738090584667057, + "grad_norm": 0.19645458459854126, + "learning_rate": 4.426198180199353e-06, + "loss": 0.9055, + "step": 79270 + }, + { + "epoch": 0.5738814451272919, + "grad_norm": 0.15948496758937836, + "learning_rate": 4.426125793538767e-06, + "loss": 0.9226, + "step": 79280 + }, + { + "epoch": 0.5739538317878782, + "grad_norm": 0.152080699801445, + "learning_rate": 4.4260534068781805e-06, + "loss": 0.9106, + "step": 79290 + }, + { + "epoch": 0.5740262184484644, + "grad_norm": 0.16088615357875824, + "learning_rate": 4.425981020217595e-06, + "loss": 0.9034, + "step": 79300 + }, + { + "epoch": 0.5740986051090505, + "grad_norm": 0.1661704033613205, + "learning_rate": 4.425908633557009e-06, + "loss": 0.9064, + "step": 79310 + }, + { + "epoch": 0.5741709917696367, + "grad_norm": 0.175064355134964, + "learning_rate": 4.425836246896422e-06, + "loss": 0.9014, + "step": 79320 + }, + { + "epoch": 0.5742433784302229, + "grad_norm": 0.1588856726884842, + "learning_rate": 4.425763860235836e-06, + "loss": 0.904, + "step": 79330 + }, + { + "epoch": 0.5743157650908091, + "grad_norm": 0.16363567113876343, + "learning_rate": 4.42569147357525e-06, + "loss": 0.9076, + "step": 79340 + }, + { + "epoch": 0.5743881517513952, + "grad_norm": 0.1526729166507721, + "learning_rate": 4.425619086914664e-06, + "loss": 0.9174, + "step": 79350 + }, + { + "epoch": 0.5744605384119814, + "grad_norm": 0.15896102786064148, + "learning_rate": 4.4255467002540775e-06, + "loss": 0.9168, + "step": 79360 + }, + { + "epoch": 0.5745329250725676, + "grad_norm": 0.15912756323814392, + "learning_rate": 4.425474313593491e-06, + "loss": 0.9176, + "step": 79370 + }, + { + "epoch": 0.5746053117331538, + "grad_norm": 0.15687337517738342, + "learning_rate": 4.425401926932905e-06, + "loss": 0.9177, + "step": 79380 + }, + { + "epoch": 0.57467769839374, + "grad_norm": 0.1485118865966797, + "learning_rate": 4.425329540272319e-06, + "loss": 0.9097, + "step": 79390 + }, + { + "epoch": 0.5747500850543262, + "grad_norm": 0.1573365032672882, + "learning_rate": 4.425257153611733e-06, + "loss": 0.9106, + "step": 79400 + }, + { + "epoch": 0.5748224717149124, + "grad_norm": 0.16917355358600616, + "learning_rate": 4.4251847669511464e-06, + "loss": 0.9, + "step": 79410 + }, + { + "epoch": 0.5748948583754986, + "grad_norm": 0.15793545544147491, + "learning_rate": 4.42511238029056e-06, + "loss": 0.9028, + "step": 79420 + }, + { + "epoch": 0.5749672450360848, + "grad_norm": 0.16056816279888153, + "learning_rate": 4.4250399936299745e-06, + "loss": 0.9103, + "step": 79430 + }, + { + "epoch": 0.5750396316966709, + "grad_norm": 0.21023601293563843, + "learning_rate": 4.424967606969388e-06, + "loss": 0.9131, + "step": 79440 + }, + { + "epoch": 0.5751120183572571, + "grad_norm": 0.16455906629562378, + "learning_rate": 4.424895220308802e-06, + "loss": 0.9136, + "step": 79450 + }, + { + "epoch": 0.5751844050178433, + "grad_norm": 0.15624572336673737, + "learning_rate": 4.424822833648215e-06, + "loss": 0.8955, + "step": 79460 + }, + { + "epoch": 0.5752567916784295, + "grad_norm": 0.15977784991264343, + "learning_rate": 4.42475044698763e-06, + "loss": 0.9127, + "step": 79470 + }, + { + "epoch": 0.5753291783390156, + "grad_norm": 0.15706151723861694, + "learning_rate": 4.4246780603270434e-06, + "loss": 0.9218, + "step": 79480 + }, + { + "epoch": 0.5754015649996018, + "grad_norm": 0.1580117791891098, + "learning_rate": 4.424605673666457e-06, + "loss": 0.9063, + "step": 79490 + }, + { + "epoch": 0.5754739516601881, + "grad_norm": 0.1586533933877945, + "learning_rate": 4.424533287005871e-06, + "loss": 0.9001, + "step": 79500 + }, + { + "epoch": 0.5755463383207743, + "grad_norm": 0.15484578907489777, + "learning_rate": 4.424460900345285e-06, + "loss": 0.9188, + "step": 79510 + }, + { + "epoch": 0.5756187249813605, + "grad_norm": 0.16581545770168304, + "learning_rate": 4.424388513684699e-06, + "loss": 0.9029, + "step": 79520 + }, + { + "epoch": 0.5756911116419466, + "grad_norm": 0.2024676501750946, + "learning_rate": 4.424316127024112e-06, + "loss": 0.9066, + "step": 79530 + }, + { + "epoch": 0.5757634983025328, + "grad_norm": 0.17313936352729797, + "learning_rate": 4.424243740363526e-06, + "loss": 0.9075, + "step": 79540 + }, + { + "epoch": 0.575835884963119, + "grad_norm": 0.15734641253948212, + "learning_rate": 4.4241713537029404e-06, + "loss": 0.891, + "step": 79550 + }, + { + "epoch": 0.5759082716237052, + "grad_norm": 0.16273874044418335, + "learning_rate": 4.424098967042354e-06, + "loss": 0.8987, + "step": 79560 + }, + { + "epoch": 0.5759806582842913, + "grad_norm": 0.1653600037097931, + "learning_rate": 4.424026580381768e-06, + "loss": 0.9086, + "step": 79570 + }, + { + "epoch": 0.5760530449448775, + "grad_norm": 0.14946460723876953, + "learning_rate": 4.423954193721181e-06, + "loss": 0.9125, + "step": 79580 + }, + { + "epoch": 0.5761254316054637, + "grad_norm": 0.15209373831748962, + "learning_rate": 4.423881807060596e-06, + "loss": 0.9055, + "step": 79590 + }, + { + "epoch": 0.5761978182660499, + "grad_norm": 0.15624649822711945, + "learning_rate": 4.423809420400009e-06, + "loss": 0.9077, + "step": 79600 + }, + { + "epoch": 0.5762702049266362, + "grad_norm": 0.15583081543445587, + "learning_rate": 4.423737033739423e-06, + "loss": 0.8949, + "step": 79610 + }, + { + "epoch": 0.5763425915872223, + "grad_norm": 0.17833974957466125, + "learning_rate": 4.423664647078837e-06, + "loss": 0.9099, + "step": 79620 + }, + { + "epoch": 0.5764149782478085, + "grad_norm": 0.15480463206768036, + "learning_rate": 4.423592260418251e-06, + "loss": 0.9146, + "step": 79630 + }, + { + "epoch": 0.5764873649083947, + "grad_norm": 0.17085537314414978, + "learning_rate": 4.423519873757665e-06, + "loss": 0.9139, + "step": 79640 + }, + { + "epoch": 0.5765597515689809, + "grad_norm": 0.14300264418125153, + "learning_rate": 4.423447487097078e-06, + "loss": 0.9212, + "step": 79650 + }, + { + "epoch": 0.576632138229567, + "grad_norm": 0.1670181304216385, + "learning_rate": 4.423375100436492e-06, + "loss": 0.8988, + "step": 79660 + }, + { + "epoch": 0.5767045248901532, + "grad_norm": 0.16258497536182404, + "learning_rate": 4.4233027137759055e-06, + "loss": 0.9033, + "step": 79670 + }, + { + "epoch": 0.5767769115507394, + "grad_norm": 0.16200661659240723, + "learning_rate": 4.423230327115319e-06, + "loss": 0.9015, + "step": 79680 + }, + { + "epoch": 0.5768492982113256, + "grad_norm": 0.1577218770980835, + "learning_rate": 4.423157940454733e-06, + "loss": 0.9133, + "step": 79690 + }, + { + "epoch": 0.5769216848719118, + "grad_norm": 0.1620890349149704, + "learning_rate": 4.423085553794147e-06, + "loss": 0.9146, + "step": 79700 + }, + { + "epoch": 0.576994071532498, + "grad_norm": 0.18708209693431854, + "learning_rate": 4.423013167133561e-06, + "loss": 0.8999, + "step": 79710 + }, + { + "epoch": 0.5770664581930842, + "grad_norm": 0.1733100414276123, + "learning_rate": 4.4229407804729745e-06, + "loss": 0.9122, + "step": 79720 + }, + { + "epoch": 0.5771388448536704, + "grad_norm": 0.15812858939170837, + "learning_rate": 4.422868393812388e-06, + "loss": 0.907, + "step": 79730 + }, + { + "epoch": 0.5772112315142566, + "grad_norm": 0.1608811616897583, + "learning_rate": 4.4227960071518025e-06, + "loss": 0.9055, + "step": 79740 + }, + { + "epoch": 0.5772836181748427, + "grad_norm": 0.17216205596923828, + "learning_rate": 4.422723620491216e-06, + "loss": 0.919, + "step": 79750 + }, + { + "epoch": 0.5773560048354289, + "grad_norm": 0.36230647563934326, + "learning_rate": 4.42265123383063e-06, + "loss": 0.9139, + "step": 79760 + }, + { + "epoch": 0.5774283914960151, + "grad_norm": 0.15506798028945923, + "learning_rate": 4.422578847170043e-06, + "loss": 0.9009, + "step": 79770 + }, + { + "epoch": 0.5775007781566013, + "grad_norm": 0.1626974493265152, + "learning_rate": 4.422506460509458e-06, + "loss": 0.8987, + "step": 79780 + }, + { + "epoch": 0.5775731648171875, + "grad_norm": 0.1675325185060501, + "learning_rate": 4.4224340738488715e-06, + "loss": 0.9121, + "step": 79790 + }, + { + "epoch": 0.5776455514777736, + "grad_norm": 0.15305018424987793, + "learning_rate": 4.422361687188285e-06, + "loss": 0.9116, + "step": 79800 + }, + { + "epoch": 0.5777179381383598, + "grad_norm": 0.16215454041957855, + "learning_rate": 4.422289300527699e-06, + "loss": 0.9109, + "step": 79810 + }, + { + "epoch": 0.5777903247989461, + "grad_norm": 0.1520712971687317, + "learning_rate": 4.422216913867113e-06, + "loss": 0.9134, + "step": 79820 + }, + { + "epoch": 0.5778627114595323, + "grad_norm": 0.16776016354560852, + "learning_rate": 4.422144527206527e-06, + "loss": 0.9024, + "step": 79830 + }, + { + "epoch": 0.5779350981201185, + "grad_norm": 0.16810259222984314, + "learning_rate": 4.42207214054594e-06, + "loss": 0.9109, + "step": 79840 + }, + { + "epoch": 0.5780074847807046, + "grad_norm": 0.16688929498195648, + "learning_rate": 4.421999753885354e-06, + "loss": 0.9119, + "step": 79850 + }, + { + "epoch": 0.5780798714412908, + "grad_norm": 0.15468403697013855, + "learning_rate": 4.4219273672247685e-06, + "loss": 0.9151, + "step": 79860 + }, + { + "epoch": 0.578152258101877, + "grad_norm": 0.14916957914829254, + "learning_rate": 4.421854980564182e-06, + "loss": 0.9155, + "step": 79870 + }, + { + "epoch": 0.5782246447624632, + "grad_norm": 0.15850642323493958, + "learning_rate": 4.421782593903596e-06, + "loss": 0.9105, + "step": 79880 + }, + { + "epoch": 0.5782970314230493, + "grad_norm": 0.15652024745941162, + "learning_rate": 4.421710207243009e-06, + "loss": 0.9018, + "step": 79890 + }, + { + "epoch": 0.5783694180836355, + "grad_norm": 0.15815021097660065, + "learning_rate": 4.421637820582424e-06, + "loss": 0.9002, + "step": 79900 + }, + { + "epoch": 0.5784418047442217, + "grad_norm": 0.15399345755577087, + "learning_rate": 4.421565433921837e-06, + "loss": 0.9184, + "step": 79910 + }, + { + "epoch": 0.578514191404808, + "grad_norm": 0.16379056870937347, + "learning_rate": 4.421493047261251e-06, + "loss": 0.8932, + "step": 79920 + }, + { + "epoch": 0.5785865780653942, + "grad_norm": 0.1618824154138565, + "learning_rate": 4.421420660600665e-06, + "loss": 0.9028, + "step": 79930 + }, + { + "epoch": 0.5786589647259803, + "grad_norm": 0.1554708182811737, + "learning_rate": 4.421348273940079e-06, + "loss": 0.8998, + "step": 79940 + }, + { + "epoch": 0.5787313513865665, + "grad_norm": 0.16406132280826569, + "learning_rate": 4.421275887279493e-06, + "loss": 0.9058, + "step": 79950 + }, + { + "epoch": 0.5788037380471527, + "grad_norm": 0.15660354495048523, + "learning_rate": 4.421203500618906e-06, + "loss": 0.9134, + "step": 79960 + }, + { + "epoch": 0.5788761247077389, + "grad_norm": 0.15395161509513855, + "learning_rate": 4.42113111395832e-06, + "loss": 0.8982, + "step": 79970 + }, + { + "epoch": 0.578948511368325, + "grad_norm": 0.15442846715450287, + "learning_rate": 4.421058727297734e-06, + "loss": 0.9148, + "step": 79980 + }, + { + "epoch": 0.5790208980289112, + "grad_norm": 0.156655415892601, + "learning_rate": 4.420986340637148e-06, + "loss": 0.9125, + "step": 79990 + }, + { + "epoch": 0.5790932846894974, + "grad_norm": 0.16474436223506927, + "learning_rate": 4.420913953976562e-06, + "loss": 0.9103, + "step": 80000 + }, + { + "epoch": 0.5791656713500836, + "grad_norm": 0.17078536748886108, + "learning_rate": 4.420841567315975e-06, + "loss": 0.9094, + "step": 80010 + }, + { + "epoch": 0.5792380580106697, + "grad_norm": 0.1581786423921585, + "learning_rate": 4.420769180655389e-06, + "loss": 0.8923, + "step": 80020 + }, + { + "epoch": 0.579310444671256, + "grad_norm": 0.16456280648708344, + "learning_rate": 4.420696793994803e-06, + "loss": 0.9152, + "step": 80030 + }, + { + "epoch": 0.5793828313318422, + "grad_norm": 0.1432550996541977, + "learning_rate": 4.420624407334217e-06, + "loss": 0.8975, + "step": 80040 + }, + { + "epoch": 0.5794552179924284, + "grad_norm": 0.1518087089061737, + "learning_rate": 4.4205520206736306e-06, + "loss": 0.9096, + "step": 80050 + }, + { + "epoch": 0.5795276046530146, + "grad_norm": 0.15951353311538696, + "learning_rate": 4.420479634013044e-06, + "loss": 0.8996, + "step": 80060 + }, + { + "epoch": 0.5795999913136007, + "grad_norm": 0.16211937367916107, + "learning_rate": 4.420407247352459e-06, + "loss": 0.912, + "step": 80070 + }, + { + "epoch": 0.5796723779741869, + "grad_norm": 0.14895065128803253, + "learning_rate": 4.420334860691872e-06, + "loss": 0.9152, + "step": 80080 + }, + { + "epoch": 0.5797447646347731, + "grad_norm": 0.14833849668502808, + "learning_rate": 4.420262474031286e-06, + "loss": 0.9243, + "step": 80090 + }, + { + "epoch": 0.5798171512953593, + "grad_norm": 0.1672501266002655, + "learning_rate": 4.4201900873706995e-06, + "loss": 0.9072, + "step": 80100 + }, + { + "epoch": 0.5798895379559454, + "grad_norm": 0.16042830049991608, + "learning_rate": 4.420117700710114e-06, + "loss": 0.9099, + "step": 80110 + }, + { + "epoch": 0.5799619246165316, + "grad_norm": 0.16218118369579315, + "learning_rate": 4.420045314049528e-06, + "loss": 0.9036, + "step": 80120 + }, + { + "epoch": 0.5800343112771178, + "grad_norm": 0.1452185958623886, + "learning_rate": 4.419972927388941e-06, + "loss": 0.9013, + "step": 80130 + }, + { + "epoch": 0.5801066979377041, + "grad_norm": 0.1564382016658783, + "learning_rate": 4.419900540728355e-06, + "loss": 0.8933, + "step": 80140 + }, + { + "epoch": 0.5801790845982903, + "grad_norm": 0.1605406403541565, + "learning_rate": 4.419828154067769e-06, + "loss": 0.8964, + "step": 80150 + }, + { + "epoch": 0.5802514712588764, + "grad_norm": 0.16458991169929504, + "learning_rate": 4.419755767407183e-06, + "loss": 0.9147, + "step": 80160 + }, + { + "epoch": 0.5803238579194626, + "grad_norm": 0.16406084597110748, + "learning_rate": 4.4196833807465965e-06, + "loss": 0.9032, + "step": 80170 + }, + { + "epoch": 0.5803962445800488, + "grad_norm": 0.16171269118785858, + "learning_rate": 4.41961099408601e-06, + "loss": 0.8972, + "step": 80180 + }, + { + "epoch": 0.580468631240635, + "grad_norm": 0.1607089638710022, + "learning_rate": 4.419538607425424e-06, + "loss": 0.8913, + "step": 80190 + }, + { + "epoch": 0.5805410179012211, + "grad_norm": 0.15454769134521484, + "learning_rate": 4.419466220764837e-06, + "loss": 0.9014, + "step": 80200 + }, + { + "epoch": 0.5806134045618073, + "grad_norm": 0.15976151823997498, + "learning_rate": 4.419393834104251e-06, + "loss": 0.8891, + "step": 80210 + }, + { + "epoch": 0.5806857912223935, + "grad_norm": 0.15922151505947113, + "learning_rate": 4.4193214474436654e-06, + "loss": 0.9069, + "step": 80220 + }, + { + "epoch": 0.5807581778829797, + "grad_norm": 0.1480971723794937, + "learning_rate": 4.419249060783079e-06, + "loss": 0.9023, + "step": 80230 + }, + { + "epoch": 0.580830564543566, + "grad_norm": 0.3126855492591858, + "learning_rate": 4.419176674122493e-06, + "loss": 0.9113, + "step": 80240 + }, + { + "epoch": 0.5809029512041521, + "grad_norm": 0.15098969638347626, + "learning_rate": 4.419104287461906e-06, + "loss": 0.9212, + "step": 80250 + }, + { + "epoch": 0.5809753378647383, + "grad_norm": 0.16232796013355255, + "learning_rate": 4.419031900801321e-06, + "loss": 0.8948, + "step": 80260 + }, + { + "epoch": 0.5810477245253245, + "grad_norm": 0.16059771180152893, + "learning_rate": 4.418959514140734e-06, + "loss": 0.9039, + "step": 80270 + }, + { + "epoch": 0.5811201111859107, + "grad_norm": 0.15262576937675476, + "learning_rate": 4.418887127480148e-06, + "loss": 0.9075, + "step": 80280 + }, + { + "epoch": 0.5811924978464968, + "grad_norm": 0.15464602410793304, + "learning_rate": 4.418814740819562e-06, + "loss": 0.9119, + "step": 80290 + }, + { + "epoch": 0.581264884507083, + "grad_norm": 0.1607392579317093, + "learning_rate": 4.418742354158976e-06, + "loss": 0.903, + "step": 80300 + }, + { + "epoch": 0.5813372711676692, + "grad_norm": 0.16211529076099396, + "learning_rate": 4.41866996749839e-06, + "loss": 0.9033, + "step": 80310 + }, + { + "epoch": 0.5814096578282554, + "grad_norm": 0.1596810221672058, + "learning_rate": 4.418597580837803e-06, + "loss": 0.9072, + "step": 80320 + }, + { + "epoch": 0.5814820444888416, + "grad_norm": 0.15583199262619019, + "learning_rate": 4.418525194177217e-06, + "loss": 0.9146, + "step": 80330 + }, + { + "epoch": 0.5815544311494277, + "grad_norm": 0.1569822132587433, + "learning_rate": 4.418452807516631e-06, + "loss": 0.9046, + "step": 80340 + }, + { + "epoch": 0.581626817810014, + "grad_norm": 0.15323781967163086, + "learning_rate": 4.418380420856045e-06, + "loss": 0.9146, + "step": 80350 + }, + { + "epoch": 0.5816992044706002, + "grad_norm": 0.161820650100708, + "learning_rate": 4.418308034195459e-06, + "loss": 0.9147, + "step": 80360 + }, + { + "epoch": 0.5817715911311864, + "grad_norm": 0.1547018438577652, + "learning_rate": 4.418235647534872e-06, + "loss": 0.9052, + "step": 80370 + }, + { + "epoch": 0.5818439777917725, + "grad_norm": 0.16685166954994202, + "learning_rate": 4.418163260874287e-06, + "loss": 0.9076, + "step": 80380 + }, + { + "epoch": 0.5819163644523587, + "grad_norm": 0.1596868634223938, + "learning_rate": 4.4180908742137e-06, + "loss": 0.8918, + "step": 80390 + }, + { + "epoch": 0.5819887511129449, + "grad_norm": 0.16917213797569275, + "learning_rate": 4.418018487553114e-06, + "loss": 0.8946, + "step": 80400 + }, + { + "epoch": 0.5820611377735311, + "grad_norm": 0.15056735277175903, + "learning_rate": 4.4179461008925275e-06, + "loss": 0.9045, + "step": 80410 + }, + { + "epoch": 0.5821335244341173, + "grad_norm": 0.1574123352766037, + "learning_rate": 4.417873714231942e-06, + "loss": 0.9067, + "step": 80420 + }, + { + "epoch": 0.5822059110947034, + "grad_norm": 0.15226131677627563, + "learning_rate": 4.417801327571356e-06, + "loss": 0.9183, + "step": 80430 + }, + { + "epoch": 0.5822782977552896, + "grad_norm": 0.15113304555416107, + "learning_rate": 4.417728940910769e-06, + "loss": 0.9102, + "step": 80440 + }, + { + "epoch": 0.5823506844158759, + "grad_norm": 0.14975102245807648, + "learning_rate": 4.417656554250183e-06, + "loss": 0.902, + "step": 80450 + }, + { + "epoch": 0.5824230710764621, + "grad_norm": 0.1500435620546341, + "learning_rate": 4.417584167589597e-06, + "loss": 0.9046, + "step": 80460 + }, + { + "epoch": 0.5824954577370483, + "grad_norm": 0.15473416447639465, + "learning_rate": 4.417511780929011e-06, + "loss": 0.9146, + "step": 80470 + }, + { + "epoch": 0.5825678443976344, + "grad_norm": 0.1687445044517517, + "learning_rate": 4.4174393942684245e-06, + "loss": 0.9172, + "step": 80480 + }, + { + "epoch": 0.5826402310582206, + "grad_norm": 0.18456241488456726, + "learning_rate": 4.417367007607838e-06, + "loss": 0.9059, + "step": 80490 + }, + { + "epoch": 0.5827126177188068, + "grad_norm": 0.16103129088878632, + "learning_rate": 4.417294620947253e-06, + "loss": 0.9129, + "step": 80500 + }, + { + "epoch": 0.582785004379393, + "grad_norm": 0.15495523810386658, + "learning_rate": 4.417222234286666e-06, + "loss": 0.8998, + "step": 80510 + }, + { + "epoch": 0.5828573910399791, + "grad_norm": 0.1617932915687561, + "learning_rate": 4.41714984762608e-06, + "loss": 0.9129, + "step": 80520 + }, + { + "epoch": 0.5829297777005653, + "grad_norm": 0.1493421494960785, + "learning_rate": 4.4170774609654935e-06, + "loss": 0.8968, + "step": 80530 + }, + { + "epoch": 0.5830021643611515, + "grad_norm": 0.15675191581249237, + "learning_rate": 4.417005074304908e-06, + "loss": 0.9182, + "step": 80540 + }, + { + "epoch": 0.5830745510217377, + "grad_norm": 0.16273775696754456, + "learning_rate": 4.4169326876443216e-06, + "loss": 0.9103, + "step": 80550 + }, + { + "epoch": 0.583146937682324, + "grad_norm": 0.171338751912117, + "learning_rate": 4.416860300983735e-06, + "loss": 0.9151, + "step": 80560 + }, + { + "epoch": 0.5832193243429101, + "grad_norm": 0.16506828367710114, + "learning_rate": 4.416787914323149e-06, + "loss": 0.9137, + "step": 80570 + }, + { + "epoch": 0.5832917110034963, + "grad_norm": 0.1581031233072281, + "learning_rate": 4.416715527662563e-06, + "loss": 0.8992, + "step": 80580 + }, + { + "epoch": 0.5833640976640825, + "grad_norm": 0.1575007438659668, + "learning_rate": 4.416643141001977e-06, + "loss": 0.9053, + "step": 80590 + }, + { + "epoch": 0.5834364843246687, + "grad_norm": 0.17189882695674896, + "learning_rate": 4.4165707543413905e-06, + "loss": 0.9148, + "step": 80600 + }, + { + "epoch": 0.5835088709852548, + "grad_norm": 0.16765065491199493, + "learning_rate": 4.416498367680804e-06, + "loss": 0.9063, + "step": 80610 + }, + { + "epoch": 0.583581257645841, + "grad_norm": 0.15300755202770233, + "learning_rate": 4.416425981020218e-06, + "loss": 0.9059, + "step": 80620 + }, + { + "epoch": 0.5836536443064272, + "grad_norm": 0.16409176588058472, + "learning_rate": 4.416353594359632e-06, + "loss": 0.9074, + "step": 80630 + }, + { + "epoch": 0.5837260309670134, + "grad_norm": 0.1527976095676422, + "learning_rate": 4.416281207699046e-06, + "loss": 0.9057, + "step": 80640 + }, + { + "epoch": 0.5837984176275995, + "grad_norm": 0.17555196583271027, + "learning_rate": 4.416208821038459e-06, + "loss": 0.918, + "step": 80650 + }, + { + "epoch": 0.5838708042881857, + "grad_norm": 0.15772706270217896, + "learning_rate": 4.416136434377873e-06, + "loss": 0.9064, + "step": 80660 + }, + { + "epoch": 0.583943190948772, + "grad_norm": 0.16107985377311707, + "learning_rate": 4.4160640477172875e-06, + "loss": 0.9153, + "step": 80670 + }, + { + "epoch": 0.5840155776093582, + "grad_norm": 0.27143919467926025, + "learning_rate": 4.415991661056701e-06, + "loss": 0.9147, + "step": 80680 + }, + { + "epoch": 0.5840879642699444, + "grad_norm": 0.1891665756702423, + "learning_rate": 4.415919274396115e-06, + "loss": 0.9206, + "step": 80690 + }, + { + "epoch": 0.5841603509305305, + "grad_norm": 0.15576356649398804, + "learning_rate": 4.415846887735528e-06, + "loss": 0.9045, + "step": 80700 + }, + { + "epoch": 0.5842327375911167, + "grad_norm": 0.16257961094379425, + "learning_rate": 4.415774501074943e-06, + "loss": 0.8978, + "step": 80710 + }, + { + "epoch": 0.5843051242517029, + "grad_norm": 0.17086383700370789, + "learning_rate": 4.4157021144143556e-06, + "loss": 0.9039, + "step": 80720 + }, + { + "epoch": 0.5843775109122891, + "grad_norm": 0.205778568983078, + "learning_rate": 4.41562972775377e-06, + "loss": 0.9195, + "step": 80730 + }, + { + "epoch": 0.5844498975728752, + "grad_norm": 0.15765634179115295, + "learning_rate": 4.415557341093184e-06, + "loss": 0.9038, + "step": 80740 + }, + { + "epoch": 0.5845222842334614, + "grad_norm": 0.14532588422298431, + "learning_rate": 4.415484954432597e-06, + "loss": 0.9122, + "step": 80750 + }, + { + "epoch": 0.5845946708940476, + "grad_norm": 0.172866091132164, + "learning_rate": 4.415412567772011e-06, + "loss": 0.9122, + "step": 80760 + }, + { + "epoch": 0.5846670575546339, + "grad_norm": 0.16551901400089264, + "learning_rate": 4.415340181111425e-06, + "loss": 0.9049, + "step": 80770 + }, + { + "epoch": 0.5847394442152201, + "grad_norm": 0.15034866333007812, + "learning_rate": 4.415267794450839e-06, + "loss": 0.9006, + "step": 80780 + }, + { + "epoch": 0.5848118308758062, + "grad_norm": 0.1626167744398117, + "learning_rate": 4.4151954077902526e-06, + "loss": 0.9122, + "step": 80790 + }, + { + "epoch": 0.5848842175363924, + "grad_norm": 0.2411961555480957, + "learning_rate": 4.415123021129666e-06, + "loss": 0.9039, + "step": 80800 + }, + { + "epoch": 0.5849566041969786, + "grad_norm": 0.16269677877426147, + "learning_rate": 4.41505063446908e-06, + "loss": 0.923, + "step": 80810 + }, + { + "epoch": 0.5850289908575648, + "grad_norm": 0.15651679039001465, + "learning_rate": 4.414978247808494e-06, + "loss": 0.9018, + "step": 80820 + }, + { + "epoch": 0.585101377518151, + "grad_norm": 0.1721392273902893, + "learning_rate": 4.414905861147908e-06, + "loss": 0.9058, + "step": 80830 + }, + { + "epoch": 0.5851737641787371, + "grad_norm": 0.1486913561820984, + "learning_rate": 4.4148334744873215e-06, + "loss": 0.9067, + "step": 80840 + }, + { + "epoch": 0.5852461508393233, + "grad_norm": 0.16526785492897034, + "learning_rate": 4.414761087826735e-06, + "loss": 0.9126, + "step": 80850 + }, + { + "epoch": 0.5853185374999095, + "grad_norm": 0.18631219863891602, + "learning_rate": 4.41468870116615e-06, + "loss": 0.9067, + "step": 80860 + }, + { + "epoch": 0.5853909241604957, + "grad_norm": 0.15394018590450287, + "learning_rate": 4.414616314505563e-06, + "loss": 0.9065, + "step": 80870 + }, + { + "epoch": 0.5854633108210819, + "grad_norm": 0.15232212841510773, + "learning_rate": 4.414543927844977e-06, + "loss": 0.907, + "step": 80880 + }, + { + "epoch": 0.5855356974816681, + "grad_norm": 0.15810738503932953, + "learning_rate": 4.4144715411843904e-06, + "loss": 0.9133, + "step": 80890 + }, + { + "epoch": 0.5856080841422543, + "grad_norm": 0.15687547624111176, + "learning_rate": 4.414399154523805e-06, + "loss": 0.9222, + "step": 80900 + }, + { + "epoch": 0.5856804708028405, + "grad_norm": 0.1554637998342514, + "learning_rate": 4.4143267678632185e-06, + "loss": 0.9004, + "step": 80910 + }, + { + "epoch": 0.5857528574634266, + "grad_norm": 0.14952871203422546, + "learning_rate": 4.414254381202632e-06, + "loss": 0.9071, + "step": 80920 + }, + { + "epoch": 0.5858252441240128, + "grad_norm": 0.15082089602947235, + "learning_rate": 4.414181994542046e-06, + "loss": 0.9173, + "step": 80930 + }, + { + "epoch": 0.585897630784599, + "grad_norm": 0.1659436672925949, + "learning_rate": 4.41410960788146e-06, + "loss": 0.9038, + "step": 80940 + }, + { + "epoch": 0.5859700174451852, + "grad_norm": 0.16675877571105957, + "learning_rate": 4.414037221220874e-06, + "loss": 0.9022, + "step": 80950 + }, + { + "epoch": 0.5860424041057714, + "grad_norm": 0.17415930330753326, + "learning_rate": 4.4139648345602874e-06, + "loss": 0.912, + "step": 80960 + }, + { + "epoch": 0.5861147907663575, + "grad_norm": 0.16868185997009277, + "learning_rate": 4.413892447899701e-06, + "loss": 0.9139, + "step": 80970 + }, + { + "epoch": 0.5861871774269438, + "grad_norm": 0.1576145589351654, + "learning_rate": 4.4138200612391155e-06, + "loss": 0.9084, + "step": 80980 + }, + { + "epoch": 0.58625956408753, + "grad_norm": 0.16312648355960846, + "learning_rate": 4.413747674578529e-06, + "loss": 0.9107, + "step": 80990 + }, + { + "epoch": 0.5863319507481162, + "grad_norm": 0.1908407360315323, + "learning_rate": 4.413675287917943e-06, + "loss": 0.9057, + "step": 81000 + }, + { + "epoch": 0.5864043374087023, + "grad_norm": 0.19931037724018097, + "learning_rate": 4.413602901257356e-06, + "loss": 0.9036, + "step": 81010 + }, + { + "epoch": 0.5864767240692885, + "grad_norm": 0.15866592526435852, + "learning_rate": 4.413530514596771e-06, + "loss": 0.9027, + "step": 81020 + }, + { + "epoch": 0.5865491107298747, + "grad_norm": 0.1934008151292801, + "learning_rate": 4.4134581279361844e-06, + "loss": 0.9087, + "step": 81030 + }, + { + "epoch": 0.5866214973904609, + "grad_norm": 0.1468328833580017, + "learning_rate": 4.413385741275598e-06, + "loss": 0.9099, + "step": 81040 + }, + { + "epoch": 0.586693884051047, + "grad_norm": 0.16777294874191284, + "learning_rate": 4.413313354615012e-06, + "loss": 0.8982, + "step": 81050 + }, + { + "epoch": 0.5867662707116332, + "grad_norm": 0.15594753623008728, + "learning_rate": 4.413240967954426e-06, + "loss": 0.9146, + "step": 81060 + }, + { + "epoch": 0.5868386573722194, + "grad_norm": 0.15250714123249054, + "learning_rate": 4.41316858129384e-06, + "loss": 0.9137, + "step": 81070 + }, + { + "epoch": 0.5869110440328056, + "grad_norm": 0.15024009346961975, + "learning_rate": 4.413096194633253e-06, + "loss": 0.9134, + "step": 81080 + }, + { + "epoch": 0.5869834306933919, + "grad_norm": 0.1556905061006546, + "learning_rate": 4.413023807972667e-06, + "loss": 0.9178, + "step": 81090 + }, + { + "epoch": 0.587055817353978, + "grad_norm": 0.1638568490743637, + "learning_rate": 4.4129514213120815e-06, + "loss": 0.9126, + "step": 81100 + }, + { + "epoch": 0.5871282040145642, + "grad_norm": 0.15844887495040894, + "learning_rate": 4.412879034651495e-06, + "loss": 0.9135, + "step": 81110 + }, + { + "epoch": 0.5872005906751504, + "grad_norm": 0.1853482723236084, + "learning_rate": 4.412806647990909e-06, + "loss": 0.897, + "step": 81120 + }, + { + "epoch": 0.5872729773357366, + "grad_norm": 0.15754495561122894, + "learning_rate": 4.412734261330322e-06, + "loss": 0.9092, + "step": 81130 + }, + { + "epoch": 0.5873453639963228, + "grad_norm": 0.16885913908481598, + "learning_rate": 4.412661874669737e-06, + "loss": 0.9082, + "step": 81140 + }, + { + "epoch": 0.5874177506569089, + "grad_norm": 0.16286881268024445, + "learning_rate": 4.41258948800915e-06, + "loss": 0.893, + "step": 81150 + }, + { + "epoch": 0.5874901373174951, + "grad_norm": 0.14878816902637482, + "learning_rate": 4.412517101348564e-06, + "loss": 0.9028, + "step": 81160 + }, + { + "epoch": 0.5875625239780813, + "grad_norm": 0.17409685254096985, + "learning_rate": 4.412444714687978e-06, + "loss": 0.9155, + "step": 81170 + }, + { + "epoch": 0.5876349106386675, + "grad_norm": 0.1579102873802185, + "learning_rate": 4.412372328027392e-06, + "loss": 0.9146, + "step": 81180 + }, + { + "epoch": 0.5877072972992536, + "grad_norm": 0.15453718602657318, + "learning_rate": 4.412299941366806e-06, + "loss": 0.8998, + "step": 81190 + }, + { + "epoch": 0.5877796839598399, + "grad_norm": 0.16412879526615143, + "learning_rate": 4.412227554706219e-06, + "loss": 0.907, + "step": 81200 + }, + { + "epoch": 0.5878520706204261, + "grad_norm": 0.1556544452905655, + "learning_rate": 4.412155168045633e-06, + "loss": 0.9067, + "step": 81210 + }, + { + "epoch": 0.5879244572810123, + "grad_norm": 0.1514187902212143, + "learning_rate": 4.412082781385047e-06, + "loss": 0.909, + "step": 81220 + }, + { + "epoch": 0.5879968439415985, + "grad_norm": 0.154129296541214, + "learning_rate": 4.412010394724461e-06, + "loss": 0.9017, + "step": 81230 + }, + { + "epoch": 0.5880692306021846, + "grad_norm": 0.16333553194999695, + "learning_rate": 4.411938008063875e-06, + "loss": 0.9039, + "step": 81240 + }, + { + "epoch": 0.5881416172627708, + "grad_norm": 0.1498739868402481, + "learning_rate": 4.411865621403288e-06, + "loss": 0.8942, + "step": 81250 + }, + { + "epoch": 0.588214003923357, + "grad_norm": 0.1580556333065033, + "learning_rate": 4.411793234742702e-06, + "loss": 0.8984, + "step": 81260 + }, + { + "epoch": 0.5882863905839432, + "grad_norm": 0.15687990188598633, + "learning_rate": 4.4117208480821155e-06, + "loss": 0.9096, + "step": 81270 + }, + { + "epoch": 0.5883587772445293, + "grad_norm": 0.17736192047595978, + "learning_rate": 4.411648461421529e-06, + "loss": 0.9031, + "step": 81280 + }, + { + "epoch": 0.5884311639051155, + "grad_norm": 0.1527496576309204, + "learning_rate": 4.4115760747609435e-06, + "loss": 0.8972, + "step": 81290 + }, + { + "epoch": 0.5885035505657018, + "grad_norm": 0.17745208740234375, + "learning_rate": 4.411503688100357e-06, + "loss": 0.917, + "step": 81300 + }, + { + "epoch": 0.588575937226288, + "grad_norm": 0.20360904932022095, + "learning_rate": 4.411431301439771e-06, + "loss": 0.9189, + "step": 81310 + }, + { + "epoch": 0.5886483238868742, + "grad_norm": 0.1483248472213745, + "learning_rate": 4.411358914779184e-06, + "loss": 0.9098, + "step": 81320 + }, + { + "epoch": 0.5887207105474603, + "grad_norm": 0.1655409038066864, + "learning_rate": 4.411286528118599e-06, + "loss": 0.9164, + "step": 81330 + }, + { + "epoch": 0.5887930972080465, + "grad_norm": 0.1597280353307724, + "learning_rate": 4.4112141414580125e-06, + "loss": 0.9058, + "step": 81340 + }, + { + "epoch": 0.5888654838686327, + "grad_norm": 0.16111089289188385, + "learning_rate": 4.411141754797426e-06, + "loss": 0.9048, + "step": 81350 + }, + { + "epoch": 0.5889378705292189, + "grad_norm": 0.16747364401817322, + "learning_rate": 4.41106936813684e-06, + "loss": 0.9183, + "step": 81360 + }, + { + "epoch": 0.589010257189805, + "grad_norm": 0.14955104887485504, + "learning_rate": 4.410996981476254e-06, + "loss": 0.9064, + "step": 81370 + }, + { + "epoch": 0.5890826438503912, + "grad_norm": 0.15736231207847595, + "learning_rate": 4.410924594815668e-06, + "loss": 0.9048, + "step": 81380 + }, + { + "epoch": 0.5891550305109774, + "grad_norm": 0.20818248391151428, + "learning_rate": 4.410852208155081e-06, + "loss": 0.8868, + "step": 81390 + }, + { + "epoch": 0.5892274171715636, + "grad_norm": 0.15236660838127136, + "learning_rate": 4.410779821494495e-06, + "loss": 0.9082, + "step": 81400 + }, + { + "epoch": 0.5892998038321499, + "grad_norm": 0.1863219141960144, + "learning_rate": 4.4107074348339095e-06, + "loss": 0.9106, + "step": 81410 + }, + { + "epoch": 0.589372190492736, + "grad_norm": 0.14689262211322784, + "learning_rate": 4.410635048173323e-06, + "loss": 0.9158, + "step": 81420 + }, + { + "epoch": 0.5894445771533222, + "grad_norm": 0.15385663509368896, + "learning_rate": 4.410562661512737e-06, + "loss": 0.9033, + "step": 81430 + }, + { + "epoch": 0.5895169638139084, + "grad_norm": 0.18120580911636353, + "learning_rate": 4.41049027485215e-06, + "loss": 0.9147, + "step": 81440 + }, + { + "epoch": 0.5895893504744946, + "grad_norm": 0.16570447385311127, + "learning_rate": 4.410417888191564e-06, + "loss": 0.9152, + "step": 81450 + }, + { + "epoch": 0.5896617371350807, + "grad_norm": 0.15214945375919342, + "learning_rate": 4.410345501530978e-06, + "loss": 0.9046, + "step": 81460 + }, + { + "epoch": 0.5897341237956669, + "grad_norm": 0.1923670470714569, + "learning_rate": 4.410273114870392e-06, + "loss": 0.918, + "step": 81470 + }, + { + "epoch": 0.5898065104562531, + "grad_norm": 0.1635698527097702, + "learning_rate": 4.410200728209806e-06, + "loss": 0.9058, + "step": 81480 + }, + { + "epoch": 0.5898788971168393, + "grad_norm": 0.1771516501903534, + "learning_rate": 4.410128341549219e-06, + "loss": 0.9068, + "step": 81490 + }, + { + "epoch": 0.5899512837774255, + "grad_norm": 0.16354140639305115, + "learning_rate": 4.410055954888634e-06, + "loss": 0.9066, + "step": 81500 + }, + { + "epoch": 0.5900236704380117, + "grad_norm": 0.16545943915843964, + "learning_rate": 4.409983568228047e-06, + "loss": 0.9138, + "step": 81510 + }, + { + "epoch": 0.5900960570985979, + "grad_norm": 0.15025997161865234, + "learning_rate": 4.409911181567461e-06, + "loss": 0.8911, + "step": 81520 + }, + { + "epoch": 0.5901684437591841, + "grad_norm": 0.17573940753936768, + "learning_rate": 4.4098387949068746e-06, + "loss": 0.9041, + "step": 81530 + }, + { + "epoch": 0.5902408304197703, + "grad_norm": 0.17626507580280304, + "learning_rate": 4.409766408246289e-06, + "loss": 0.9036, + "step": 81540 + }, + { + "epoch": 0.5903132170803564, + "grad_norm": 0.16366511583328247, + "learning_rate": 4.409694021585703e-06, + "loss": 0.9016, + "step": 81550 + }, + { + "epoch": 0.5903856037409426, + "grad_norm": 0.17350682616233826, + "learning_rate": 4.409621634925116e-06, + "loss": 0.9112, + "step": 81560 + }, + { + "epoch": 0.5904579904015288, + "grad_norm": 0.16151121258735657, + "learning_rate": 4.40954924826453e-06, + "loss": 0.9051, + "step": 81570 + }, + { + "epoch": 0.590530377062115, + "grad_norm": 0.16566744446754456, + "learning_rate": 4.409476861603944e-06, + "loss": 0.9037, + "step": 81580 + }, + { + "epoch": 0.5906027637227012, + "grad_norm": 0.16139444708824158, + "learning_rate": 4.409404474943358e-06, + "loss": 0.907, + "step": 81590 + }, + { + "epoch": 0.5906751503832873, + "grad_norm": 0.16810381412506104, + "learning_rate": 4.409332088282772e-06, + "loss": 0.8919, + "step": 81600 + }, + { + "epoch": 0.5907475370438735, + "grad_norm": 0.1652746945619583, + "learning_rate": 4.409259701622185e-06, + "loss": 0.9017, + "step": 81610 + }, + { + "epoch": 0.5908199237044598, + "grad_norm": 0.1639404594898224, + "learning_rate": 4.4091873149616e-06, + "loss": 0.8995, + "step": 81620 + }, + { + "epoch": 0.590892310365046, + "grad_norm": 0.1466279774904251, + "learning_rate": 4.409114928301013e-06, + "loss": 0.9177, + "step": 81630 + }, + { + "epoch": 0.5909646970256321, + "grad_norm": 0.16004787385463715, + "learning_rate": 4.409042541640427e-06, + "loss": 0.9039, + "step": 81640 + }, + { + "epoch": 0.5910370836862183, + "grad_norm": 0.15307894349098206, + "learning_rate": 4.4089701549798405e-06, + "loss": 0.9036, + "step": 81650 + }, + { + "epoch": 0.5911094703468045, + "grad_norm": 0.19654370844364166, + "learning_rate": 4.408897768319255e-06, + "loss": 0.9107, + "step": 81660 + }, + { + "epoch": 0.5911818570073907, + "grad_norm": 0.17902538180351257, + "learning_rate": 4.408825381658669e-06, + "loss": 0.9045, + "step": 81670 + }, + { + "epoch": 0.5912542436679769, + "grad_norm": 0.1651085615158081, + "learning_rate": 4.408752994998082e-06, + "loss": 0.9106, + "step": 81680 + }, + { + "epoch": 0.591326630328563, + "grad_norm": 0.1591925323009491, + "learning_rate": 4.408680608337496e-06, + "loss": 0.904, + "step": 81690 + }, + { + "epoch": 0.5913990169891492, + "grad_norm": 0.1673898696899414, + "learning_rate": 4.40860822167691e-06, + "loss": 0.9005, + "step": 81700 + }, + { + "epoch": 0.5914714036497354, + "grad_norm": 0.16726025938987732, + "learning_rate": 4.408535835016324e-06, + "loss": 0.9051, + "step": 81710 + }, + { + "epoch": 0.5915437903103216, + "grad_norm": 0.16342008113861084, + "learning_rate": 4.4084634483557375e-06, + "loss": 0.9207, + "step": 81720 + }, + { + "epoch": 0.5916161769709078, + "grad_norm": 0.15118926763534546, + "learning_rate": 4.408391061695151e-06, + "loss": 0.9022, + "step": 81730 + }, + { + "epoch": 0.591688563631494, + "grad_norm": 0.15618331730365753, + "learning_rate": 4.408318675034566e-06, + "loss": 0.9089, + "step": 81740 + }, + { + "epoch": 0.5917609502920802, + "grad_norm": 0.16020120680332184, + "learning_rate": 4.408246288373979e-06, + "loss": 0.9077, + "step": 81750 + }, + { + "epoch": 0.5918333369526664, + "grad_norm": 0.15385285019874573, + "learning_rate": 4.408173901713393e-06, + "loss": 0.909, + "step": 81760 + }, + { + "epoch": 0.5919057236132526, + "grad_norm": 0.15946872532367706, + "learning_rate": 4.4081015150528064e-06, + "loss": 0.9251, + "step": 81770 + }, + { + "epoch": 0.5919781102738387, + "grad_norm": 0.16759130358695984, + "learning_rate": 4.40802912839222e-06, + "loss": 0.9045, + "step": 81780 + }, + { + "epoch": 0.5920504969344249, + "grad_norm": 0.1478552371263504, + "learning_rate": 4.407956741731634e-06, + "loss": 0.9143, + "step": 81790 + }, + { + "epoch": 0.5921228835950111, + "grad_norm": 0.18161530792713165, + "learning_rate": 4.407884355071047e-06, + "loss": 0.9115, + "step": 81800 + }, + { + "epoch": 0.5921952702555973, + "grad_norm": 0.16163845360279083, + "learning_rate": 4.407811968410462e-06, + "loss": 0.8982, + "step": 81810 + }, + { + "epoch": 0.5922676569161834, + "grad_norm": 0.15115785598754883, + "learning_rate": 4.407739581749875e-06, + "loss": 0.9166, + "step": 81820 + }, + { + "epoch": 0.5923400435767697, + "grad_norm": 0.1659201681613922, + "learning_rate": 4.407667195089289e-06, + "loss": 0.9008, + "step": 81830 + }, + { + "epoch": 0.5924124302373559, + "grad_norm": 0.1570078432559967, + "learning_rate": 4.407594808428703e-06, + "loss": 0.8988, + "step": 81840 + }, + { + "epoch": 0.5924848168979421, + "grad_norm": 0.1584419459104538, + "learning_rate": 4.407522421768117e-06, + "loss": 0.9097, + "step": 81850 + }, + { + "epoch": 0.5925572035585283, + "grad_norm": 0.18121954798698425, + "learning_rate": 4.407450035107531e-06, + "loss": 0.8932, + "step": 81860 + }, + { + "epoch": 0.5926295902191144, + "grad_norm": 0.15848958492279053, + "learning_rate": 4.407377648446944e-06, + "loss": 0.9048, + "step": 81870 + }, + { + "epoch": 0.5927019768797006, + "grad_norm": 0.17608241736888885, + "learning_rate": 4.407305261786358e-06, + "loss": 0.9077, + "step": 81880 + }, + { + "epoch": 0.5927743635402868, + "grad_norm": 0.16470035910606384, + "learning_rate": 4.407232875125772e-06, + "loss": 0.9114, + "step": 81890 + }, + { + "epoch": 0.592846750200873, + "grad_norm": 0.14142341911792755, + "learning_rate": 4.407160488465186e-06, + "loss": 0.8911, + "step": 81900 + }, + { + "epoch": 0.5929191368614591, + "grad_norm": 0.151789128780365, + "learning_rate": 4.4070881018046e-06, + "loss": 0.9079, + "step": 81910 + }, + { + "epoch": 0.5929915235220453, + "grad_norm": 0.15422183275222778, + "learning_rate": 4.407015715144013e-06, + "loss": 0.9028, + "step": 81920 + }, + { + "epoch": 0.5930639101826315, + "grad_norm": 0.1466582864522934, + "learning_rate": 4.406943328483428e-06, + "loss": 0.9025, + "step": 81930 + }, + { + "epoch": 0.5931362968432178, + "grad_norm": 0.16983705759048462, + "learning_rate": 4.406870941822841e-06, + "loss": 0.9048, + "step": 81940 + }, + { + "epoch": 0.593208683503804, + "grad_norm": 0.1635872721672058, + "learning_rate": 4.406798555162255e-06, + "loss": 0.901, + "step": 81950 + }, + { + "epoch": 0.5932810701643901, + "grad_norm": 0.17607009410858154, + "learning_rate": 4.4067261685016685e-06, + "loss": 0.9248, + "step": 81960 + }, + { + "epoch": 0.5933534568249763, + "grad_norm": 0.1671140491962433, + "learning_rate": 4.406653781841083e-06, + "loss": 0.9005, + "step": 81970 + }, + { + "epoch": 0.5934258434855625, + "grad_norm": 0.17591434717178345, + "learning_rate": 4.406581395180497e-06, + "loss": 0.909, + "step": 81980 + }, + { + "epoch": 0.5934982301461487, + "grad_norm": 0.17302151024341583, + "learning_rate": 4.40650900851991e-06, + "loss": 0.9135, + "step": 81990 + }, + { + "epoch": 0.5935706168067348, + "grad_norm": 0.169133722782135, + "learning_rate": 4.406436621859324e-06, + "loss": 0.8986, + "step": 82000 + }, + { + "epoch": 0.593643003467321, + "grad_norm": 0.1573852002620697, + "learning_rate": 4.406364235198738e-06, + "loss": 0.9064, + "step": 82010 + }, + { + "epoch": 0.5937153901279072, + "grad_norm": 0.1580667495727539, + "learning_rate": 4.406291848538152e-06, + "loss": 0.9076, + "step": 82020 + }, + { + "epoch": 0.5937877767884934, + "grad_norm": 0.15899869799613953, + "learning_rate": 4.4062194618775655e-06, + "loss": 0.9142, + "step": 82030 + }, + { + "epoch": 0.5938601634490797, + "grad_norm": 0.17980434000492096, + "learning_rate": 4.406147075216979e-06, + "loss": 0.9041, + "step": 82040 + }, + { + "epoch": 0.5939325501096658, + "grad_norm": 0.1649356186389923, + "learning_rate": 4.406074688556393e-06, + "loss": 0.9299, + "step": 82050 + }, + { + "epoch": 0.594004936770252, + "grad_norm": 0.15136809647083282, + "learning_rate": 4.406002301895807e-06, + "loss": 0.9082, + "step": 82060 + }, + { + "epoch": 0.5940773234308382, + "grad_norm": 0.15223738551139832, + "learning_rate": 4.405929915235221e-06, + "loss": 0.9107, + "step": 82070 + }, + { + "epoch": 0.5941497100914244, + "grad_norm": 0.1510484218597412, + "learning_rate": 4.4058575285746345e-06, + "loss": 0.9013, + "step": 82080 + }, + { + "epoch": 0.5942220967520105, + "grad_norm": 0.17146191000938416, + "learning_rate": 4.405785141914048e-06, + "loss": 0.9101, + "step": 82090 + }, + { + "epoch": 0.5942944834125967, + "grad_norm": 0.19103531539440155, + "learning_rate": 4.4057127552534626e-06, + "loss": 0.9044, + "step": 82100 + }, + { + "epoch": 0.5943668700731829, + "grad_norm": 0.1852683573961258, + "learning_rate": 4.405640368592876e-06, + "loss": 0.905, + "step": 82110 + }, + { + "epoch": 0.5944392567337691, + "grad_norm": 0.17115405201911926, + "learning_rate": 4.40556798193229e-06, + "loss": 0.903, + "step": 82120 + }, + { + "epoch": 0.5945116433943552, + "grad_norm": 0.1565488874912262, + "learning_rate": 4.405495595271703e-06, + "loss": 0.8996, + "step": 82130 + }, + { + "epoch": 0.5945840300549414, + "grad_norm": 0.16356854140758514, + "learning_rate": 4.405423208611118e-06, + "loss": 0.9087, + "step": 82140 + }, + { + "epoch": 0.5946564167155277, + "grad_norm": 0.16093464195728302, + "learning_rate": 4.4053508219505315e-06, + "loss": 0.9107, + "step": 82150 + }, + { + "epoch": 0.5947288033761139, + "grad_norm": 0.16113322973251343, + "learning_rate": 4.405278435289945e-06, + "loss": 0.9077, + "step": 82160 + }, + { + "epoch": 0.5948011900367001, + "grad_norm": 0.19098584353923798, + "learning_rate": 4.405206048629359e-06, + "loss": 0.8965, + "step": 82170 + }, + { + "epoch": 0.5948735766972862, + "grad_norm": 0.46058592200279236, + "learning_rate": 4.405133661968773e-06, + "loss": 0.9037, + "step": 82180 + }, + { + "epoch": 0.5949459633578724, + "grad_norm": 0.15246206521987915, + "learning_rate": 4.405061275308187e-06, + "loss": 0.9013, + "step": 82190 + }, + { + "epoch": 0.5950183500184586, + "grad_norm": 0.16424813866615295, + "learning_rate": 4.4049888886476e-06, + "loss": 0.8992, + "step": 82200 + }, + { + "epoch": 0.5950907366790448, + "grad_norm": 0.16102401912212372, + "learning_rate": 4.404916501987014e-06, + "loss": 0.9074, + "step": 82210 + }, + { + "epoch": 0.595163123339631, + "grad_norm": 0.16575241088867188, + "learning_rate": 4.4048441153264285e-06, + "loss": 0.9125, + "step": 82220 + }, + { + "epoch": 0.5952355100002171, + "grad_norm": 0.15276582539081573, + "learning_rate": 4.404771728665842e-06, + "loss": 0.8948, + "step": 82230 + }, + { + "epoch": 0.5953078966608033, + "grad_norm": 0.16686709225177765, + "learning_rate": 4.404699342005256e-06, + "loss": 0.9082, + "step": 82240 + }, + { + "epoch": 0.5953802833213895, + "grad_norm": 0.19125671684741974, + "learning_rate": 4.404626955344669e-06, + "loss": 0.9021, + "step": 82250 + }, + { + "epoch": 0.5954526699819758, + "grad_norm": 0.15619054436683655, + "learning_rate": 4.404554568684084e-06, + "loss": 0.9076, + "step": 82260 + }, + { + "epoch": 0.595525056642562, + "grad_norm": 0.1474914848804474, + "learning_rate": 4.404482182023497e-06, + "loss": 0.9121, + "step": 82270 + }, + { + "epoch": 0.5955974433031481, + "grad_norm": 0.1519433706998825, + "learning_rate": 4.404409795362911e-06, + "loss": 0.9154, + "step": 82280 + }, + { + "epoch": 0.5956698299637343, + "grad_norm": 0.15717659890651703, + "learning_rate": 4.404337408702325e-06, + "loss": 0.9113, + "step": 82290 + }, + { + "epoch": 0.5957422166243205, + "grad_norm": 0.1966000646352768, + "learning_rate": 4.404265022041739e-06, + "loss": 0.9156, + "step": 82300 + }, + { + "epoch": 0.5958146032849067, + "grad_norm": 0.16030791401863098, + "learning_rate": 4.404192635381152e-06, + "loss": 0.9195, + "step": 82310 + }, + { + "epoch": 0.5958869899454928, + "grad_norm": 0.16544552147388458, + "learning_rate": 4.4041202487205655e-06, + "loss": 0.9117, + "step": 82320 + }, + { + "epoch": 0.595959376606079, + "grad_norm": 0.15709573030471802, + "learning_rate": 4.40404786205998e-06, + "loss": 0.9049, + "step": 82330 + }, + { + "epoch": 0.5960317632666652, + "grad_norm": 0.15841509401798248, + "learning_rate": 4.403975475399394e-06, + "loss": 0.9054, + "step": 82340 + }, + { + "epoch": 0.5961041499272514, + "grad_norm": 0.15718714892864227, + "learning_rate": 4.403903088738807e-06, + "loss": 0.8977, + "step": 82350 + }, + { + "epoch": 0.5961765365878376, + "grad_norm": 0.1640692800283432, + "learning_rate": 4.403830702078221e-06, + "loss": 0.9121, + "step": 82360 + }, + { + "epoch": 0.5962489232484238, + "grad_norm": 0.1538037806749344, + "learning_rate": 4.403758315417635e-06, + "loss": 0.9113, + "step": 82370 + }, + { + "epoch": 0.59632130990901, + "grad_norm": 0.18401135504245758, + "learning_rate": 4.403685928757049e-06, + "loss": 0.8984, + "step": 82380 + }, + { + "epoch": 0.5963936965695962, + "grad_norm": 0.1522941142320633, + "learning_rate": 4.4036135420964625e-06, + "loss": 0.9004, + "step": 82390 + }, + { + "epoch": 0.5964660832301824, + "grad_norm": 0.16432693600654602, + "learning_rate": 4.403541155435876e-06, + "loss": 0.9088, + "step": 82400 + }, + { + "epoch": 0.5965384698907685, + "grad_norm": 0.16186141967773438, + "learning_rate": 4.403468768775291e-06, + "loss": 0.8907, + "step": 82410 + }, + { + "epoch": 0.5966108565513547, + "grad_norm": 0.16021370887756348, + "learning_rate": 4.403396382114704e-06, + "loss": 0.906, + "step": 82420 + }, + { + "epoch": 0.5966832432119409, + "grad_norm": 0.1591646671295166, + "learning_rate": 4.403323995454118e-06, + "loss": 0.9104, + "step": 82430 + }, + { + "epoch": 0.5967556298725271, + "grad_norm": 0.16218051314353943, + "learning_rate": 4.4032516087935314e-06, + "loss": 0.8933, + "step": 82440 + }, + { + "epoch": 0.5968280165331132, + "grad_norm": 0.1592613160610199, + "learning_rate": 4.403179222132946e-06, + "loss": 0.9027, + "step": 82450 + }, + { + "epoch": 0.5969004031936994, + "grad_norm": 0.15859758853912354, + "learning_rate": 4.4031068354723595e-06, + "loss": 0.9131, + "step": 82460 + }, + { + "epoch": 0.5969727898542857, + "grad_norm": 0.1566203236579895, + "learning_rate": 4.403034448811773e-06, + "loss": 0.9032, + "step": 82470 + }, + { + "epoch": 0.5970451765148719, + "grad_norm": 0.14999212324619293, + "learning_rate": 4.402962062151187e-06, + "loss": 0.9117, + "step": 82480 + }, + { + "epoch": 0.597117563175458, + "grad_norm": 0.18934468924999237, + "learning_rate": 4.402889675490601e-06, + "loss": 0.909, + "step": 82490 + }, + { + "epoch": 0.5971899498360442, + "grad_norm": 0.15167193114757538, + "learning_rate": 4.402817288830015e-06, + "loss": 0.8999, + "step": 82500 + }, + { + "epoch": 0.5972623364966304, + "grad_norm": 0.16032280027866364, + "learning_rate": 4.4027449021694284e-06, + "loss": 0.8981, + "step": 82510 + }, + { + "epoch": 0.5973347231572166, + "grad_norm": 0.16216394305229187, + "learning_rate": 4.402672515508842e-06, + "loss": 0.9033, + "step": 82520 + }, + { + "epoch": 0.5974071098178028, + "grad_norm": 0.15446747839450836, + "learning_rate": 4.4026001288482565e-06, + "loss": 0.9062, + "step": 82530 + }, + { + "epoch": 0.5974794964783889, + "grad_norm": 0.1542857438325882, + "learning_rate": 4.40252774218767e-06, + "loss": 0.9097, + "step": 82540 + }, + { + "epoch": 0.5975518831389751, + "grad_norm": 0.1710725724697113, + "learning_rate": 4.402455355527084e-06, + "loss": 0.9131, + "step": 82550 + }, + { + "epoch": 0.5976242697995613, + "grad_norm": 0.16666147112846375, + "learning_rate": 4.402382968866497e-06, + "loss": 0.905, + "step": 82560 + }, + { + "epoch": 0.5976966564601475, + "grad_norm": 0.17649833858013153, + "learning_rate": 4.402310582205912e-06, + "loss": 0.8965, + "step": 82570 + }, + { + "epoch": 0.5977690431207338, + "grad_norm": 0.15569385886192322, + "learning_rate": 4.4022381955453255e-06, + "loss": 0.8984, + "step": 82580 + }, + { + "epoch": 0.5978414297813199, + "grad_norm": 0.1647312492132187, + "learning_rate": 4.402165808884739e-06, + "loss": 0.9114, + "step": 82590 + }, + { + "epoch": 0.5979138164419061, + "grad_norm": 0.15604494512081146, + "learning_rate": 4.402093422224153e-06, + "loss": 0.9011, + "step": 82600 + }, + { + "epoch": 0.5979862031024923, + "grad_norm": 0.16003774106502533, + "learning_rate": 4.402021035563567e-06, + "loss": 0.9036, + "step": 82610 + }, + { + "epoch": 0.5980585897630785, + "grad_norm": 0.15925242006778717, + "learning_rate": 4.401948648902981e-06, + "loss": 0.8946, + "step": 82620 + }, + { + "epoch": 0.5981309764236646, + "grad_norm": 0.1530761569738388, + "learning_rate": 4.401876262242394e-06, + "loss": 0.9071, + "step": 82630 + }, + { + "epoch": 0.5982033630842508, + "grad_norm": 0.15942499041557312, + "learning_rate": 4.401803875581808e-06, + "loss": 0.9162, + "step": 82640 + }, + { + "epoch": 0.598275749744837, + "grad_norm": 0.1493762582540512, + "learning_rate": 4.4017314889212225e-06, + "loss": 0.905, + "step": 82650 + }, + { + "epoch": 0.5983481364054232, + "grad_norm": 0.1528092622756958, + "learning_rate": 4.401659102260636e-06, + "loss": 0.9097, + "step": 82660 + }, + { + "epoch": 0.5984205230660093, + "grad_norm": 0.18498297035694122, + "learning_rate": 4.40158671560005e-06, + "loss": 0.8937, + "step": 82670 + }, + { + "epoch": 0.5984929097265956, + "grad_norm": 0.19264182448387146, + "learning_rate": 4.401514328939463e-06, + "loss": 0.9167, + "step": 82680 + }, + { + "epoch": 0.5985652963871818, + "grad_norm": 0.14750641584396362, + "learning_rate": 4.401441942278877e-06, + "loss": 0.913, + "step": 82690 + }, + { + "epoch": 0.598637683047768, + "grad_norm": 0.2046324908733368, + "learning_rate": 4.401369555618291e-06, + "loss": 0.9094, + "step": 82700 + }, + { + "epoch": 0.5987100697083542, + "grad_norm": 0.16712640225887299, + "learning_rate": 4.401297168957705e-06, + "loss": 0.9057, + "step": 82710 + }, + { + "epoch": 0.5987824563689403, + "grad_norm": 0.1629089117050171, + "learning_rate": 4.401224782297119e-06, + "loss": 0.9194, + "step": 82720 + }, + { + "epoch": 0.5988548430295265, + "grad_norm": 0.15542463958263397, + "learning_rate": 4.401152395636532e-06, + "loss": 0.8874, + "step": 82730 + }, + { + "epoch": 0.5989272296901127, + "grad_norm": 0.16220228374004364, + "learning_rate": 4.401080008975947e-06, + "loss": 0.9143, + "step": 82740 + }, + { + "epoch": 0.5989996163506989, + "grad_norm": 0.15400896966457367, + "learning_rate": 4.40100762231536e-06, + "loss": 0.8985, + "step": 82750 + }, + { + "epoch": 0.599072003011285, + "grad_norm": 0.16913191974163055, + "learning_rate": 4.400935235654774e-06, + "loss": 0.9012, + "step": 82760 + }, + { + "epoch": 0.5991443896718712, + "grad_norm": 0.1506996899843216, + "learning_rate": 4.4008628489941875e-06, + "loss": 0.9109, + "step": 82770 + }, + { + "epoch": 0.5992167763324574, + "grad_norm": 0.15028440952301025, + "learning_rate": 4.400790462333602e-06, + "loss": 0.9002, + "step": 82780 + }, + { + "epoch": 0.5992891629930437, + "grad_norm": 0.15182295441627502, + "learning_rate": 4.400718075673016e-06, + "loss": 0.8906, + "step": 82790 + }, + { + "epoch": 0.5993615496536299, + "grad_norm": 0.1721028983592987, + "learning_rate": 4.400645689012429e-06, + "loss": 0.9059, + "step": 82800 + }, + { + "epoch": 0.599433936314216, + "grad_norm": 0.1762334257364273, + "learning_rate": 4.400573302351843e-06, + "loss": 0.8982, + "step": 82810 + }, + { + "epoch": 0.5995063229748022, + "grad_norm": 0.15542656183242798, + "learning_rate": 4.400500915691257e-06, + "loss": 0.9063, + "step": 82820 + }, + { + "epoch": 0.5995787096353884, + "grad_norm": 0.1622530221939087, + "learning_rate": 4.400428529030671e-06, + "loss": 0.906, + "step": 82830 + }, + { + "epoch": 0.5996510962959746, + "grad_norm": 0.19038499891757965, + "learning_rate": 4.400356142370084e-06, + "loss": 0.9196, + "step": 82840 + }, + { + "epoch": 0.5997234829565607, + "grad_norm": 0.15594981610774994, + "learning_rate": 4.400283755709498e-06, + "loss": 0.8982, + "step": 82850 + }, + { + "epoch": 0.5997958696171469, + "grad_norm": 0.1520778387784958, + "learning_rate": 4.400211369048912e-06, + "loss": 0.912, + "step": 82860 + }, + { + "epoch": 0.5998682562777331, + "grad_norm": 0.15902015566825867, + "learning_rate": 4.400138982388325e-06, + "loss": 0.9152, + "step": 82870 + }, + { + "epoch": 0.5999406429383193, + "grad_norm": 0.1633022576570511, + "learning_rate": 4.400066595727739e-06, + "loss": 0.8891, + "step": 82880 + }, + { + "epoch": 0.6000130295989056, + "grad_norm": 0.1755109429359436, + "learning_rate": 4.3999942090671535e-06, + "loss": 0.906, + "step": 82890 + }, + { + "epoch": 0.6000854162594917, + "grad_norm": 0.15575723350048065, + "learning_rate": 4.399921822406567e-06, + "loss": 0.9105, + "step": 82900 + }, + { + "epoch": 0.6001578029200779, + "grad_norm": 0.1682191789150238, + "learning_rate": 4.399849435745981e-06, + "loss": 0.8904, + "step": 82910 + }, + { + "epoch": 0.6002301895806641, + "grad_norm": 0.1746690571308136, + "learning_rate": 4.399777049085394e-06, + "loss": 0.9284, + "step": 82920 + }, + { + "epoch": 0.6003025762412503, + "grad_norm": 0.16141672432422638, + "learning_rate": 4.399704662424809e-06, + "loss": 0.8947, + "step": 82930 + }, + { + "epoch": 0.6003749629018365, + "grad_norm": 0.17700308561325073, + "learning_rate": 4.399632275764222e-06, + "loss": 0.9184, + "step": 82940 + }, + { + "epoch": 0.6004473495624226, + "grad_norm": 0.16985797882080078, + "learning_rate": 4.399559889103636e-06, + "loss": 0.905, + "step": 82950 + }, + { + "epoch": 0.6005197362230088, + "grad_norm": 0.17443716526031494, + "learning_rate": 4.39948750244305e-06, + "loss": 0.9197, + "step": 82960 + }, + { + "epoch": 0.600592122883595, + "grad_norm": 0.1507861465215683, + "learning_rate": 4.399415115782464e-06, + "loss": 0.9189, + "step": 82970 + }, + { + "epoch": 0.6006645095441812, + "grad_norm": 0.16039422154426575, + "learning_rate": 4.399342729121878e-06, + "loss": 0.9017, + "step": 82980 + }, + { + "epoch": 0.6007368962047673, + "grad_norm": 0.15274271368980408, + "learning_rate": 4.399270342461291e-06, + "loss": 0.9117, + "step": 82990 + }, + { + "epoch": 0.6008092828653536, + "grad_norm": 0.18588446080684662, + "learning_rate": 4.399197955800705e-06, + "loss": 0.9046, + "step": 83000 + }, + { + "epoch": 0.6008816695259398, + "grad_norm": 0.15456266701221466, + "learning_rate": 4.399125569140119e-06, + "loss": 0.8961, + "step": 83010 + }, + { + "epoch": 0.600954056186526, + "grad_norm": 0.17130137979984283, + "learning_rate": 4.399053182479533e-06, + "loss": 0.9042, + "step": 83020 + }, + { + "epoch": 0.6010264428471122, + "grad_norm": 0.15653465688228607, + "learning_rate": 4.398980795818947e-06, + "loss": 0.9129, + "step": 83030 + }, + { + "epoch": 0.6010988295076983, + "grad_norm": 0.1832706183195114, + "learning_rate": 4.39890840915836e-06, + "loss": 0.9146, + "step": 83040 + }, + { + "epoch": 0.6011712161682845, + "grad_norm": 0.1502702534198761, + "learning_rate": 4.398836022497775e-06, + "loss": 0.8918, + "step": 83050 + }, + { + "epoch": 0.6012436028288707, + "grad_norm": 0.19595682621002197, + "learning_rate": 4.398763635837188e-06, + "loss": 0.8992, + "step": 83060 + }, + { + "epoch": 0.6013159894894569, + "grad_norm": 0.16274172067642212, + "learning_rate": 4.398691249176602e-06, + "loss": 0.8992, + "step": 83070 + }, + { + "epoch": 0.601388376150043, + "grad_norm": 0.15060241520404816, + "learning_rate": 4.398618862516016e-06, + "loss": 0.9031, + "step": 83080 + }, + { + "epoch": 0.6014607628106292, + "grad_norm": 0.19672121107578278, + "learning_rate": 4.39854647585543e-06, + "loss": 0.9027, + "step": 83090 + }, + { + "epoch": 0.6015331494712154, + "grad_norm": 0.1655958890914917, + "learning_rate": 4.398474089194844e-06, + "loss": 0.9133, + "step": 83100 + }, + { + "epoch": 0.6016055361318017, + "grad_norm": 0.16771474480628967, + "learning_rate": 4.398401702534257e-06, + "loss": 0.9074, + "step": 83110 + }, + { + "epoch": 0.6016779227923879, + "grad_norm": 0.16556835174560547, + "learning_rate": 4.398329315873671e-06, + "loss": 0.8982, + "step": 83120 + }, + { + "epoch": 0.601750309452974, + "grad_norm": 0.15332920849323273, + "learning_rate": 4.398256929213085e-06, + "loss": 0.8985, + "step": 83130 + }, + { + "epoch": 0.6018226961135602, + "grad_norm": 0.1574867218732834, + "learning_rate": 4.398184542552499e-06, + "loss": 0.9085, + "step": 83140 + }, + { + "epoch": 0.6018950827741464, + "grad_norm": 0.17546038329601288, + "learning_rate": 4.398112155891913e-06, + "loss": 0.9062, + "step": 83150 + }, + { + "epoch": 0.6019674694347326, + "grad_norm": 0.1600867062807083, + "learning_rate": 4.398039769231326e-06, + "loss": 0.914, + "step": 83160 + }, + { + "epoch": 0.6020398560953187, + "grad_norm": 0.1494045853614807, + "learning_rate": 4.397967382570741e-06, + "loss": 0.8953, + "step": 83170 + }, + { + "epoch": 0.6021122427559049, + "grad_norm": 0.15468309819698334, + "learning_rate": 4.397894995910154e-06, + "loss": 0.9284, + "step": 83180 + }, + { + "epoch": 0.6021846294164911, + "grad_norm": 0.2398003190755844, + "learning_rate": 4.397822609249568e-06, + "loss": 0.9161, + "step": 83190 + }, + { + "epoch": 0.6022570160770773, + "grad_norm": 0.1596948802471161, + "learning_rate": 4.3977502225889815e-06, + "loss": 0.8963, + "step": 83200 + }, + { + "epoch": 0.6023294027376636, + "grad_norm": 0.16264113783836365, + "learning_rate": 4.397677835928396e-06, + "loss": 0.9142, + "step": 83210 + }, + { + "epoch": 0.6024017893982497, + "grad_norm": 0.1623883843421936, + "learning_rate": 4.39760544926781e-06, + "loss": 0.8912, + "step": 83220 + }, + { + "epoch": 0.6024741760588359, + "grad_norm": 0.16414840519428253, + "learning_rate": 4.397533062607223e-06, + "loss": 0.9061, + "step": 83230 + }, + { + "epoch": 0.6025465627194221, + "grad_norm": 0.16708609461784363, + "learning_rate": 4.397460675946637e-06, + "loss": 0.9081, + "step": 83240 + }, + { + "epoch": 0.6026189493800083, + "grad_norm": 0.15145167708396912, + "learning_rate": 4.397388289286051e-06, + "loss": 0.8966, + "step": 83250 + }, + { + "epoch": 0.6026913360405944, + "grad_norm": 0.31119629740715027, + "learning_rate": 4.397315902625465e-06, + "loss": 0.9119, + "step": 83260 + }, + { + "epoch": 0.6027637227011806, + "grad_norm": 0.16512273252010345, + "learning_rate": 4.3972435159648785e-06, + "loss": 0.9058, + "step": 83270 + }, + { + "epoch": 0.6028361093617668, + "grad_norm": 0.2106596678495407, + "learning_rate": 4.397171129304292e-06, + "loss": 0.896, + "step": 83280 + }, + { + "epoch": 0.602908496022353, + "grad_norm": 0.16792406141757965, + "learning_rate": 4.397098742643706e-06, + "loss": 0.9028, + "step": 83290 + }, + { + "epoch": 0.6029808826829391, + "grad_norm": 0.1586570292711258, + "learning_rate": 4.39702635598312e-06, + "loss": 0.9083, + "step": 83300 + }, + { + "epoch": 0.6030532693435253, + "grad_norm": 0.18892773985862732, + "learning_rate": 4.396953969322534e-06, + "loss": 0.9072, + "step": 83310 + }, + { + "epoch": 0.6031256560041116, + "grad_norm": 0.17914731800556183, + "learning_rate": 4.3968815826619475e-06, + "loss": 0.8988, + "step": 83320 + }, + { + "epoch": 0.6031980426646978, + "grad_norm": 0.15073609352111816, + "learning_rate": 4.396809196001361e-06, + "loss": 0.9003, + "step": 83330 + }, + { + "epoch": 0.603270429325284, + "grad_norm": 0.16799314320087433, + "learning_rate": 4.3967368093407755e-06, + "loss": 0.9045, + "step": 83340 + }, + { + "epoch": 0.6033428159858701, + "grad_norm": 0.16455517709255219, + "learning_rate": 4.396664422680189e-06, + "loss": 0.9059, + "step": 83350 + }, + { + "epoch": 0.6034152026464563, + "grad_norm": 0.22611026465892792, + "learning_rate": 4.396592036019603e-06, + "loss": 0.9087, + "step": 83360 + }, + { + "epoch": 0.6034875893070425, + "grad_norm": 0.15722450613975525, + "learning_rate": 4.396519649359016e-06, + "loss": 0.8941, + "step": 83370 + }, + { + "epoch": 0.6035599759676287, + "grad_norm": 0.1701124906539917, + "learning_rate": 4.39644726269843e-06, + "loss": 0.9138, + "step": 83380 + }, + { + "epoch": 0.6036323626282148, + "grad_norm": 0.1639043539762497, + "learning_rate": 4.396374876037844e-06, + "loss": 0.91, + "step": 83390 + }, + { + "epoch": 0.603704749288801, + "grad_norm": 0.15046794712543488, + "learning_rate": 4.396302489377258e-06, + "loss": 0.898, + "step": 83400 + }, + { + "epoch": 0.6037771359493872, + "grad_norm": 0.1862155944108963, + "learning_rate": 4.396230102716672e-06, + "loss": 0.9124, + "step": 83410 + }, + { + "epoch": 0.6038495226099735, + "grad_norm": 0.16142509877681732, + "learning_rate": 4.396157716056085e-06, + "loss": 0.8921, + "step": 83420 + }, + { + "epoch": 0.6039219092705597, + "grad_norm": 0.14882884919643402, + "learning_rate": 4.396085329395499e-06, + "loss": 0.9176, + "step": 83430 + }, + { + "epoch": 0.6039942959311458, + "grad_norm": 0.1499185711145401, + "learning_rate": 4.396012942734913e-06, + "loss": 0.9056, + "step": 83440 + }, + { + "epoch": 0.604066682591732, + "grad_norm": 0.15857931971549988, + "learning_rate": 4.395940556074327e-06, + "loss": 0.8983, + "step": 83450 + }, + { + "epoch": 0.6041390692523182, + "grad_norm": 0.16343648731708527, + "learning_rate": 4.395868169413741e-06, + "loss": 0.9089, + "step": 83460 + }, + { + "epoch": 0.6042114559129044, + "grad_norm": 0.1648421734571457, + "learning_rate": 4.395795782753154e-06, + "loss": 0.9001, + "step": 83470 + }, + { + "epoch": 0.6042838425734905, + "grad_norm": 0.15347935259342194, + "learning_rate": 4.395723396092568e-06, + "loss": 0.9173, + "step": 83480 + }, + { + "epoch": 0.6043562292340767, + "grad_norm": 0.1825142502784729, + "learning_rate": 4.395651009431982e-06, + "loss": 0.8914, + "step": 83490 + }, + { + "epoch": 0.6044286158946629, + "grad_norm": 0.17557092010974884, + "learning_rate": 4.395578622771396e-06, + "loss": 0.8938, + "step": 83500 + }, + { + "epoch": 0.6045010025552491, + "grad_norm": 0.15079079568386078, + "learning_rate": 4.3955062361108095e-06, + "loss": 0.8986, + "step": 83510 + }, + { + "epoch": 0.6045733892158353, + "grad_norm": 0.16225622594356537, + "learning_rate": 4.395433849450223e-06, + "loss": 0.8948, + "step": 83520 + }, + { + "epoch": 0.6046457758764215, + "grad_norm": 0.14633281528949738, + "learning_rate": 4.395361462789638e-06, + "loss": 0.9021, + "step": 83530 + }, + { + "epoch": 0.6047181625370077, + "grad_norm": 0.15504805743694305, + "learning_rate": 4.395289076129051e-06, + "loss": 0.9164, + "step": 83540 + }, + { + "epoch": 0.6047905491975939, + "grad_norm": 0.1608634889125824, + "learning_rate": 4.395216689468465e-06, + "loss": 0.9065, + "step": 83550 + }, + { + "epoch": 0.6048629358581801, + "grad_norm": 0.15265637636184692, + "learning_rate": 4.3951443028078785e-06, + "loss": 0.9109, + "step": 83560 + }, + { + "epoch": 0.6049353225187662, + "grad_norm": 0.16638517379760742, + "learning_rate": 4.395071916147293e-06, + "loss": 0.8959, + "step": 83570 + }, + { + "epoch": 0.6050077091793524, + "grad_norm": 0.15558472275733948, + "learning_rate": 4.3949995294867066e-06, + "loss": 0.8929, + "step": 83580 + }, + { + "epoch": 0.6050800958399386, + "grad_norm": 0.1535881906747818, + "learning_rate": 4.39492714282612e-06, + "loss": 0.9172, + "step": 83590 + }, + { + "epoch": 0.6051524825005248, + "grad_norm": 0.14769315719604492, + "learning_rate": 4.394854756165534e-06, + "loss": 0.9152, + "step": 83600 + }, + { + "epoch": 0.605224869161111, + "grad_norm": 0.1421438306570053, + "learning_rate": 4.394782369504948e-06, + "loss": 0.91, + "step": 83610 + }, + { + "epoch": 0.6052972558216971, + "grad_norm": 0.18984383344650269, + "learning_rate": 4.394709982844362e-06, + "loss": 0.9114, + "step": 83620 + }, + { + "epoch": 0.6053696424822833, + "grad_norm": 0.15073958039283752, + "learning_rate": 4.3946375961837755e-06, + "loss": 0.9094, + "step": 83630 + }, + { + "epoch": 0.6054420291428696, + "grad_norm": 0.15686622262001038, + "learning_rate": 4.394565209523189e-06, + "loss": 0.9015, + "step": 83640 + }, + { + "epoch": 0.6055144158034558, + "grad_norm": 0.15890394151210785, + "learning_rate": 4.3944928228626036e-06, + "loss": 0.9264, + "step": 83650 + }, + { + "epoch": 0.605586802464042, + "grad_norm": 0.15338025987148285, + "learning_rate": 4.394420436202017e-06, + "loss": 0.8971, + "step": 83660 + }, + { + "epoch": 0.6056591891246281, + "grad_norm": 0.1526980698108673, + "learning_rate": 4.394348049541431e-06, + "loss": 0.9015, + "step": 83670 + }, + { + "epoch": 0.6057315757852143, + "grad_norm": 0.15152287483215332, + "learning_rate": 4.394275662880844e-06, + "loss": 0.8988, + "step": 83680 + }, + { + "epoch": 0.6058039624458005, + "grad_norm": 0.15338623523712158, + "learning_rate": 4.394203276220259e-06, + "loss": 0.9021, + "step": 83690 + }, + { + "epoch": 0.6058763491063867, + "grad_norm": 0.1615828573703766, + "learning_rate": 4.3941308895596725e-06, + "loss": 0.9109, + "step": 83700 + }, + { + "epoch": 0.6059487357669728, + "grad_norm": 0.16938403248786926, + "learning_rate": 4.394058502899086e-06, + "loss": 0.8999, + "step": 83710 + }, + { + "epoch": 0.606021122427559, + "grad_norm": 0.154579296708107, + "learning_rate": 4.3939861162385e-06, + "loss": 0.8897, + "step": 83720 + }, + { + "epoch": 0.6060935090881452, + "grad_norm": 0.1727149486541748, + "learning_rate": 4.393913729577914e-06, + "loss": 0.9162, + "step": 83730 + }, + { + "epoch": 0.6061658957487315, + "grad_norm": 0.16071633994579315, + "learning_rate": 4.393841342917328e-06, + "loss": 0.9169, + "step": 83740 + }, + { + "epoch": 0.6062382824093177, + "grad_norm": 0.17092163860797882, + "learning_rate": 4.393768956256741e-06, + "loss": 0.9072, + "step": 83750 + }, + { + "epoch": 0.6063106690699038, + "grad_norm": 0.15256325900554657, + "learning_rate": 4.393696569596155e-06, + "loss": 0.8937, + "step": 83760 + }, + { + "epoch": 0.60638305573049, + "grad_norm": 0.16219750046730042, + "learning_rate": 4.3936241829355695e-06, + "loss": 0.9149, + "step": 83770 + }, + { + "epoch": 0.6064554423910762, + "grad_norm": 0.15577305853366852, + "learning_rate": 4.393551796274983e-06, + "loss": 0.9136, + "step": 83780 + }, + { + "epoch": 0.6065278290516624, + "grad_norm": 0.20615074038505554, + "learning_rate": 4.393479409614397e-06, + "loss": 0.9098, + "step": 83790 + }, + { + "epoch": 0.6066002157122485, + "grad_norm": 0.1500859558582306, + "learning_rate": 4.39340702295381e-06, + "loss": 0.908, + "step": 83800 + }, + { + "epoch": 0.6066726023728347, + "grad_norm": 0.1555171012878418, + "learning_rate": 4.393334636293225e-06, + "loss": 0.8914, + "step": 83810 + }, + { + "epoch": 0.6067449890334209, + "grad_norm": 0.16960106790065765, + "learning_rate": 4.3932622496326384e-06, + "loss": 0.9178, + "step": 83820 + }, + { + "epoch": 0.6068173756940071, + "grad_norm": 0.1492023915052414, + "learning_rate": 4.393189862972052e-06, + "loss": 0.9121, + "step": 83830 + }, + { + "epoch": 0.6068897623545932, + "grad_norm": 0.16274462640285492, + "learning_rate": 4.393117476311466e-06, + "loss": 0.9152, + "step": 83840 + }, + { + "epoch": 0.6069621490151795, + "grad_norm": 0.1625206470489502, + "learning_rate": 4.39304508965088e-06, + "loss": 0.9116, + "step": 83850 + }, + { + "epoch": 0.6070345356757657, + "grad_norm": 0.15123465657234192, + "learning_rate": 4.392972702990294e-06, + "loss": 0.9061, + "step": 83860 + }, + { + "epoch": 0.6071069223363519, + "grad_norm": 0.17103265225887299, + "learning_rate": 4.392900316329707e-06, + "loss": 0.9021, + "step": 83870 + }, + { + "epoch": 0.6071793089969381, + "grad_norm": 0.19635428488254547, + "learning_rate": 4.392827929669121e-06, + "loss": 0.8978, + "step": 83880 + }, + { + "epoch": 0.6072516956575242, + "grad_norm": 0.14300402998924255, + "learning_rate": 4.3927555430085354e-06, + "loss": 0.9072, + "step": 83890 + }, + { + "epoch": 0.6073240823181104, + "grad_norm": 0.1507839858531952, + "learning_rate": 4.392683156347948e-06, + "loss": 0.9102, + "step": 83900 + }, + { + "epoch": 0.6073964689786966, + "grad_norm": 0.16211600601673126, + "learning_rate": 4.392610769687362e-06, + "loss": 0.9038, + "step": 83910 + }, + { + "epoch": 0.6074688556392828, + "grad_norm": 0.1655724048614502, + "learning_rate": 4.392538383026776e-06, + "loss": 0.9158, + "step": 83920 + }, + { + "epoch": 0.6075412422998689, + "grad_norm": 0.16653649508953094, + "learning_rate": 4.39246599636619e-06, + "loss": 0.8987, + "step": 83930 + }, + { + "epoch": 0.6076136289604551, + "grad_norm": 0.1796729415655136, + "learning_rate": 4.3923936097056035e-06, + "loss": 0.9065, + "step": 83940 + }, + { + "epoch": 0.6076860156210414, + "grad_norm": 0.16883957386016846, + "learning_rate": 4.392321223045017e-06, + "loss": 0.9116, + "step": 83950 + }, + { + "epoch": 0.6077584022816276, + "grad_norm": 0.18032489717006683, + "learning_rate": 4.392248836384432e-06, + "loss": 0.8919, + "step": 83960 + }, + { + "epoch": 0.6078307889422138, + "grad_norm": 0.18664510548114777, + "learning_rate": 4.392176449723845e-06, + "loss": 0.9099, + "step": 83970 + }, + { + "epoch": 0.6079031756027999, + "grad_norm": 0.16412018239498138, + "learning_rate": 4.392104063063259e-06, + "loss": 0.902, + "step": 83980 + }, + { + "epoch": 0.6079755622633861, + "grad_norm": 0.17010684311389923, + "learning_rate": 4.3920316764026724e-06, + "loss": 0.9389, + "step": 83990 + }, + { + "epoch": 0.6080479489239723, + "grad_norm": 0.150782972574234, + "learning_rate": 4.391959289742087e-06, + "loss": 0.8912, + "step": 84000 + }, + { + "epoch": 0.6081203355845585, + "grad_norm": 0.17593304812908173, + "learning_rate": 4.3918869030815005e-06, + "loss": 0.8924, + "step": 84010 + }, + { + "epoch": 0.6081927222451446, + "grad_norm": 0.1518871784210205, + "learning_rate": 4.391814516420914e-06, + "loss": 0.9028, + "step": 84020 + }, + { + "epoch": 0.6082651089057308, + "grad_norm": 0.1600775122642517, + "learning_rate": 4.391742129760328e-06, + "loss": 0.9032, + "step": 84030 + }, + { + "epoch": 0.608337495566317, + "grad_norm": 0.1590421050786972, + "learning_rate": 4.391669743099742e-06, + "loss": 0.8984, + "step": 84040 + }, + { + "epoch": 0.6084098822269032, + "grad_norm": 0.17040304839611053, + "learning_rate": 4.391597356439156e-06, + "loss": 0.8969, + "step": 84050 + }, + { + "epoch": 0.6084822688874895, + "grad_norm": 0.1723751425743103, + "learning_rate": 4.3915249697785694e-06, + "loss": 0.8904, + "step": 84060 + }, + { + "epoch": 0.6085546555480756, + "grad_norm": 0.14594772458076477, + "learning_rate": 4.391452583117983e-06, + "loss": 0.9142, + "step": 84070 + }, + { + "epoch": 0.6086270422086618, + "grad_norm": 0.16909512877464294, + "learning_rate": 4.391380196457397e-06, + "loss": 0.8943, + "step": 84080 + }, + { + "epoch": 0.608699428869248, + "grad_norm": 0.1714186817407608, + "learning_rate": 4.391307809796811e-06, + "loss": 0.9039, + "step": 84090 + }, + { + "epoch": 0.6087718155298342, + "grad_norm": 0.16077734529972076, + "learning_rate": 4.391235423136225e-06, + "loss": 0.912, + "step": 84100 + }, + { + "epoch": 0.6088442021904203, + "grad_norm": 0.1786806732416153, + "learning_rate": 4.391163036475638e-06, + "loss": 0.915, + "step": 84110 + }, + { + "epoch": 0.6089165888510065, + "grad_norm": 0.15787489712238312, + "learning_rate": 4.391090649815052e-06, + "loss": 0.9037, + "step": 84120 + }, + { + "epoch": 0.6089889755115927, + "grad_norm": 0.14963692426681519, + "learning_rate": 4.3910182631544665e-06, + "loss": 0.8954, + "step": 84130 + }, + { + "epoch": 0.6090613621721789, + "grad_norm": 0.15370801091194153, + "learning_rate": 4.39094587649388e-06, + "loss": 0.9128, + "step": 84140 + }, + { + "epoch": 0.609133748832765, + "grad_norm": 0.16809116303920746, + "learning_rate": 4.390873489833294e-06, + "loss": 0.9006, + "step": 84150 + }, + { + "epoch": 0.6092061354933512, + "grad_norm": 0.16092905402183533, + "learning_rate": 4.390801103172707e-06, + "loss": 0.8976, + "step": 84160 + }, + { + "epoch": 0.6092785221539375, + "grad_norm": 0.1607845276594162, + "learning_rate": 4.390728716512122e-06, + "loss": 0.9273, + "step": 84170 + }, + { + "epoch": 0.6093509088145237, + "grad_norm": 0.15682317316532135, + "learning_rate": 4.390656329851535e-06, + "loss": 0.9062, + "step": 84180 + }, + { + "epoch": 0.6094232954751099, + "grad_norm": 0.154841810464859, + "learning_rate": 4.390583943190949e-06, + "loss": 0.9231, + "step": 84190 + }, + { + "epoch": 0.609495682135696, + "grad_norm": 0.16527412831783295, + "learning_rate": 4.390511556530363e-06, + "loss": 0.9122, + "step": 84200 + }, + { + "epoch": 0.6095680687962822, + "grad_norm": 0.15766416490077972, + "learning_rate": 4.390439169869777e-06, + "loss": 0.8958, + "step": 84210 + }, + { + "epoch": 0.6096404554568684, + "grad_norm": 0.17075148224830627, + "learning_rate": 4.390366783209191e-06, + "loss": 0.9055, + "step": 84220 + }, + { + "epoch": 0.6097128421174546, + "grad_norm": 0.15440765023231506, + "learning_rate": 4.390294396548604e-06, + "loss": 0.9209, + "step": 84230 + }, + { + "epoch": 0.6097852287780408, + "grad_norm": 0.1687108278274536, + "learning_rate": 4.390222009888018e-06, + "loss": 0.9068, + "step": 84240 + }, + { + "epoch": 0.6098576154386269, + "grad_norm": 0.15964575111865997, + "learning_rate": 4.390149623227432e-06, + "loss": 0.9099, + "step": 84250 + }, + { + "epoch": 0.6099300020992131, + "grad_norm": 0.15133626759052277, + "learning_rate": 4.390077236566846e-06, + "loss": 0.912, + "step": 84260 + }, + { + "epoch": 0.6100023887597994, + "grad_norm": 0.16388767957687378, + "learning_rate": 4.39000484990626e-06, + "loss": 0.9091, + "step": 84270 + }, + { + "epoch": 0.6100747754203856, + "grad_norm": 0.16977226734161377, + "learning_rate": 4.389932463245673e-06, + "loss": 0.9041, + "step": 84280 + }, + { + "epoch": 0.6101471620809717, + "grad_norm": 0.2612716555595398, + "learning_rate": 4.389860076585088e-06, + "loss": 0.9024, + "step": 84290 + }, + { + "epoch": 0.6102195487415579, + "grad_norm": 0.17897847294807434, + "learning_rate": 4.389787689924501e-06, + "loss": 0.898, + "step": 84300 + }, + { + "epoch": 0.6102919354021441, + "grad_norm": 0.16913948953151703, + "learning_rate": 4.389715303263915e-06, + "loss": 0.9011, + "step": 84310 + }, + { + "epoch": 0.6103643220627303, + "grad_norm": 0.1693422794342041, + "learning_rate": 4.3896429166033286e-06, + "loss": 0.91, + "step": 84320 + }, + { + "epoch": 0.6104367087233165, + "grad_norm": 0.14860454201698303, + "learning_rate": 4.389570529942743e-06, + "loss": 0.9087, + "step": 84330 + }, + { + "epoch": 0.6105090953839026, + "grad_norm": 0.17605887353420258, + "learning_rate": 4.389498143282157e-06, + "loss": 0.8988, + "step": 84340 + }, + { + "epoch": 0.6105814820444888, + "grad_norm": 0.2861873507499695, + "learning_rate": 4.38942575662157e-06, + "loss": 0.8955, + "step": 84350 + }, + { + "epoch": 0.610653868705075, + "grad_norm": 0.1683700680732727, + "learning_rate": 4.389353369960984e-06, + "loss": 0.8973, + "step": 84360 + }, + { + "epoch": 0.6107262553656612, + "grad_norm": 0.15339066088199615, + "learning_rate": 4.389280983300398e-06, + "loss": 0.9137, + "step": 84370 + }, + { + "epoch": 0.6107986420262475, + "grad_norm": 0.15601833164691925, + "learning_rate": 4.389208596639812e-06, + "loss": 0.9166, + "step": 84380 + }, + { + "epoch": 0.6108710286868336, + "grad_norm": 0.16695159673690796, + "learning_rate": 4.3891362099792256e-06, + "loss": 0.902, + "step": 84390 + }, + { + "epoch": 0.6109434153474198, + "grad_norm": 0.1545722484588623, + "learning_rate": 4.389063823318639e-06, + "loss": 0.9112, + "step": 84400 + }, + { + "epoch": 0.611015802008006, + "grad_norm": 0.17581118643283844, + "learning_rate": 4.388991436658054e-06, + "loss": 0.9104, + "step": 84410 + }, + { + "epoch": 0.6110881886685922, + "grad_norm": 0.16836950182914734, + "learning_rate": 4.388919049997467e-06, + "loss": 0.9007, + "step": 84420 + }, + { + "epoch": 0.6111605753291783, + "grad_norm": 0.1791825294494629, + "learning_rate": 4.38884666333688e-06, + "loss": 0.8978, + "step": 84430 + }, + { + "epoch": 0.6112329619897645, + "grad_norm": 0.17053841054439545, + "learning_rate": 4.3887742766762945e-06, + "loss": 0.9075, + "step": 84440 + }, + { + "epoch": 0.6113053486503507, + "grad_norm": 0.15153911709785461, + "learning_rate": 4.388701890015708e-06, + "loss": 0.9048, + "step": 84450 + }, + { + "epoch": 0.6113777353109369, + "grad_norm": 0.15731872618198395, + "learning_rate": 4.388629503355122e-06, + "loss": 0.9043, + "step": 84460 + }, + { + "epoch": 0.611450121971523, + "grad_norm": 0.15405894815921783, + "learning_rate": 4.388557116694535e-06, + "loss": 0.9059, + "step": 84470 + }, + { + "epoch": 0.6115225086321093, + "grad_norm": 0.15265247225761414, + "learning_rate": 4.38848473003395e-06, + "loss": 0.903, + "step": 84480 + }, + { + "epoch": 0.6115948952926955, + "grad_norm": 0.16418184340000153, + "learning_rate": 4.388412343373363e-06, + "loss": 0.898, + "step": 84490 + }, + { + "epoch": 0.6116672819532817, + "grad_norm": 0.16227209568023682, + "learning_rate": 4.388339956712777e-06, + "loss": 0.9145, + "step": 84500 + }, + { + "epoch": 0.6117396686138679, + "grad_norm": 0.20757746696472168, + "learning_rate": 4.388267570052191e-06, + "loss": 0.9047, + "step": 84510 + }, + { + "epoch": 0.611812055274454, + "grad_norm": 0.1717897206544876, + "learning_rate": 4.388195183391605e-06, + "loss": 0.8976, + "step": 84520 + }, + { + "epoch": 0.6118844419350402, + "grad_norm": 0.1659894585609436, + "learning_rate": 4.388122796731019e-06, + "loss": 0.9076, + "step": 84530 + }, + { + "epoch": 0.6119568285956264, + "grad_norm": 0.15432026982307434, + "learning_rate": 4.388050410070432e-06, + "loss": 0.9111, + "step": 84540 + }, + { + "epoch": 0.6120292152562126, + "grad_norm": 0.15473110973834991, + "learning_rate": 4.387978023409846e-06, + "loss": 0.9041, + "step": 84550 + }, + { + "epoch": 0.6121016019167987, + "grad_norm": 0.18219618499279022, + "learning_rate": 4.3879056367492604e-06, + "loss": 0.8956, + "step": 84560 + }, + { + "epoch": 0.6121739885773849, + "grad_norm": 0.15730948746204376, + "learning_rate": 4.387833250088674e-06, + "loss": 0.9085, + "step": 84570 + }, + { + "epoch": 0.6122463752379711, + "grad_norm": 0.14876548945903778, + "learning_rate": 4.387760863428088e-06, + "loss": 0.9082, + "step": 84580 + }, + { + "epoch": 0.6123187618985574, + "grad_norm": 0.1581803560256958, + "learning_rate": 4.387688476767501e-06, + "loss": 0.9161, + "step": 84590 + }, + { + "epoch": 0.6123911485591436, + "grad_norm": 0.1603010892868042, + "learning_rate": 4.387616090106916e-06, + "loss": 0.9022, + "step": 84600 + }, + { + "epoch": 0.6124635352197297, + "grad_norm": 0.17957136034965515, + "learning_rate": 4.387543703446329e-06, + "loss": 0.9077, + "step": 84610 + }, + { + "epoch": 0.6125359218803159, + "grad_norm": 0.1617906540632248, + "learning_rate": 4.387471316785743e-06, + "loss": 0.9247, + "step": 84620 + }, + { + "epoch": 0.6126083085409021, + "grad_norm": 0.1665765643119812, + "learning_rate": 4.387398930125157e-06, + "loss": 0.9073, + "step": 84630 + }, + { + "epoch": 0.6126806952014883, + "grad_norm": 0.1667291522026062, + "learning_rate": 4.387326543464571e-06, + "loss": 0.8995, + "step": 84640 + }, + { + "epoch": 0.6127530818620744, + "grad_norm": 0.1612149178981781, + "learning_rate": 4.387254156803985e-06, + "loss": 0.9066, + "step": 84650 + }, + { + "epoch": 0.6128254685226606, + "grad_norm": 0.15186448395252228, + "learning_rate": 4.387181770143398e-06, + "loss": 0.9118, + "step": 84660 + }, + { + "epoch": 0.6128978551832468, + "grad_norm": 0.15720537304878235, + "learning_rate": 4.387109383482812e-06, + "loss": 0.8964, + "step": 84670 + }, + { + "epoch": 0.612970241843833, + "grad_norm": 0.1654808670282364, + "learning_rate": 4.387036996822226e-06, + "loss": 0.9022, + "step": 84680 + }, + { + "epoch": 0.6130426285044192, + "grad_norm": 0.15336300432682037, + "learning_rate": 4.38696461016164e-06, + "loss": 0.9128, + "step": 84690 + }, + { + "epoch": 0.6131150151650054, + "grad_norm": 0.15341922640800476, + "learning_rate": 4.386892223501054e-06, + "loss": 0.9072, + "step": 84700 + }, + { + "epoch": 0.6131874018255916, + "grad_norm": 0.15757760405540466, + "learning_rate": 4.386819836840467e-06, + "loss": 0.895, + "step": 84710 + }, + { + "epoch": 0.6132597884861778, + "grad_norm": 0.1582041084766388, + "learning_rate": 4.386747450179881e-06, + "loss": 0.9047, + "step": 84720 + }, + { + "epoch": 0.613332175146764, + "grad_norm": 0.19254916906356812, + "learning_rate": 4.386675063519295e-06, + "loss": 0.8918, + "step": 84730 + }, + { + "epoch": 0.6134045618073501, + "grad_norm": 0.14358896017074585, + "learning_rate": 4.386602676858709e-06, + "loss": 0.8909, + "step": 84740 + }, + { + "epoch": 0.6134769484679363, + "grad_norm": 0.15055955946445465, + "learning_rate": 4.3865302901981225e-06, + "loss": 0.9093, + "step": 84750 + }, + { + "epoch": 0.6135493351285225, + "grad_norm": 0.1698206514120102, + "learning_rate": 4.386457903537536e-06, + "loss": 0.8958, + "step": 84760 + }, + { + "epoch": 0.6136217217891087, + "grad_norm": 0.1727665215730667, + "learning_rate": 4.386385516876951e-06, + "loss": 0.9116, + "step": 84770 + }, + { + "epoch": 0.6136941084496949, + "grad_norm": 0.1568523496389389, + "learning_rate": 4.386313130216364e-06, + "loss": 0.9059, + "step": 84780 + }, + { + "epoch": 0.613766495110281, + "grad_norm": 0.17023316025733948, + "learning_rate": 4.386240743555778e-06, + "loss": 0.9019, + "step": 84790 + }, + { + "epoch": 0.6138388817708673, + "grad_norm": 0.15724539756774902, + "learning_rate": 4.3861683568951914e-06, + "loss": 0.9167, + "step": 84800 + }, + { + "epoch": 0.6139112684314535, + "grad_norm": 0.17113758623600006, + "learning_rate": 4.386095970234606e-06, + "loss": 0.8977, + "step": 84810 + }, + { + "epoch": 0.6139836550920397, + "grad_norm": 0.166314959526062, + "learning_rate": 4.3860235835740195e-06, + "loss": 0.9104, + "step": 84820 + }, + { + "epoch": 0.6140560417526258, + "grad_norm": 0.1817166805267334, + "learning_rate": 4.385951196913433e-06, + "loss": 0.902, + "step": 84830 + }, + { + "epoch": 0.614128428413212, + "grad_norm": 0.1507468968629837, + "learning_rate": 4.385878810252847e-06, + "loss": 0.8974, + "step": 84840 + }, + { + "epoch": 0.6142008150737982, + "grad_norm": 0.15710368752479553, + "learning_rate": 4.385806423592261e-06, + "loss": 0.9036, + "step": 84850 + }, + { + "epoch": 0.6142732017343844, + "grad_norm": 0.14728538691997528, + "learning_rate": 4.385734036931675e-06, + "loss": 0.9031, + "step": 84860 + }, + { + "epoch": 0.6143455883949706, + "grad_norm": 0.15462151169776917, + "learning_rate": 4.3856616502710885e-06, + "loss": 0.9021, + "step": 84870 + }, + { + "epoch": 0.6144179750555567, + "grad_norm": 0.1638130396604538, + "learning_rate": 4.385589263610502e-06, + "loss": 0.92, + "step": 84880 + }, + { + "epoch": 0.6144903617161429, + "grad_norm": 0.1549723744392395, + "learning_rate": 4.3855168769499165e-06, + "loss": 0.9028, + "step": 84890 + }, + { + "epoch": 0.6145627483767291, + "grad_norm": 0.15805752575397491, + "learning_rate": 4.38544449028933e-06, + "loss": 0.899, + "step": 84900 + }, + { + "epoch": 0.6146351350373154, + "grad_norm": 0.17812702059745789, + "learning_rate": 4.385372103628744e-06, + "loss": 0.9045, + "step": 84910 + }, + { + "epoch": 0.6147075216979015, + "grad_norm": 0.15838688611984253, + "learning_rate": 4.385299716968157e-06, + "loss": 0.9154, + "step": 84920 + }, + { + "epoch": 0.6147799083584877, + "grad_norm": 0.16311374306678772, + "learning_rate": 4.385227330307572e-06, + "loss": 0.8976, + "step": 84930 + }, + { + "epoch": 0.6148522950190739, + "grad_norm": 0.15058547258377075, + "learning_rate": 4.3851549436469855e-06, + "loss": 0.8918, + "step": 84940 + }, + { + "epoch": 0.6149246816796601, + "grad_norm": 0.15357309579849243, + "learning_rate": 4.385082556986399e-06, + "loss": 0.9086, + "step": 84950 + }, + { + "epoch": 0.6149970683402463, + "grad_norm": 0.15655238926410675, + "learning_rate": 4.385010170325813e-06, + "loss": 0.9006, + "step": 84960 + }, + { + "epoch": 0.6150694550008324, + "grad_norm": 0.17502596974372864, + "learning_rate": 4.384937783665226e-06, + "loss": 0.8993, + "step": 84970 + }, + { + "epoch": 0.6151418416614186, + "grad_norm": 0.1721956878900528, + "learning_rate": 4.38486539700464e-06, + "loss": 0.8934, + "step": 84980 + }, + { + "epoch": 0.6152142283220048, + "grad_norm": 0.1560133844614029, + "learning_rate": 4.3847930103440535e-06, + "loss": 0.9097, + "step": 84990 + }, + { + "epoch": 0.615286614982591, + "grad_norm": 0.15889939665794373, + "learning_rate": 4.384720623683468e-06, + "loss": 0.8942, + "step": 85000 + }, + { + "epoch": 0.6153590016431773, + "grad_norm": 0.16215567290782928, + "learning_rate": 4.384648237022882e-06, + "loss": 0.9017, + "step": 85010 + }, + { + "epoch": 0.6154313883037634, + "grad_norm": 0.16176781058311462, + "learning_rate": 4.384575850362295e-06, + "loss": 0.8989, + "step": 85020 + }, + { + "epoch": 0.6155037749643496, + "grad_norm": 0.1544492542743683, + "learning_rate": 4.384503463701709e-06, + "loss": 0.9044, + "step": 85030 + }, + { + "epoch": 0.6155761616249358, + "grad_norm": 0.14962157607078552, + "learning_rate": 4.384431077041123e-06, + "loss": 0.8937, + "step": 85040 + }, + { + "epoch": 0.615648548285522, + "grad_norm": 0.16228331625461578, + "learning_rate": 4.384358690380537e-06, + "loss": 0.9054, + "step": 85050 + }, + { + "epoch": 0.6157209349461081, + "grad_norm": 0.1617509126663208, + "learning_rate": 4.3842863037199506e-06, + "loss": 0.8999, + "step": 85060 + }, + { + "epoch": 0.6157933216066943, + "grad_norm": 0.15040123462677002, + "learning_rate": 4.384213917059364e-06, + "loss": 0.9004, + "step": 85070 + }, + { + "epoch": 0.6158657082672805, + "grad_norm": 0.15811018645763397, + "learning_rate": 4.384141530398779e-06, + "loss": 0.8944, + "step": 85080 + }, + { + "epoch": 0.6159380949278667, + "grad_norm": 0.17127995193004608, + "learning_rate": 4.384069143738192e-06, + "loss": 0.8957, + "step": 85090 + }, + { + "epoch": 0.6160104815884528, + "grad_norm": 0.16223451495170593, + "learning_rate": 4.383996757077606e-06, + "loss": 0.8946, + "step": 85100 + }, + { + "epoch": 0.616082868249039, + "grad_norm": 0.1593468189239502, + "learning_rate": 4.3839243704170195e-06, + "loss": 0.908, + "step": 85110 + }, + { + "epoch": 0.6161552549096253, + "grad_norm": 0.16201543807983398, + "learning_rate": 4.383851983756434e-06, + "loss": 0.9083, + "step": 85120 + }, + { + "epoch": 0.6162276415702115, + "grad_norm": 0.15319061279296875, + "learning_rate": 4.3837795970958476e-06, + "loss": 0.9078, + "step": 85130 + }, + { + "epoch": 0.6163000282307977, + "grad_norm": 0.15424944460391998, + "learning_rate": 4.383707210435261e-06, + "loss": 0.9044, + "step": 85140 + }, + { + "epoch": 0.6163724148913838, + "grad_norm": 0.17157965898513794, + "learning_rate": 4.383634823774675e-06, + "loss": 0.9082, + "step": 85150 + }, + { + "epoch": 0.61644480155197, + "grad_norm": 0.15128067135810852, + "learning_rate": 4.383562437114089e-06, + "loss": 0.9109, + "step": 85160 + }, + { + "epoch": 0.6165171882125562, + "grad_norm": 0.170233815908432, + "learning_rate": 4.383490050453503e-06, + "loss": 0.901, + "step": 85170 + }, + { + "epoch": 0.6165895748731424, + "grad_norm": 0.16256168484687805, + "learning_rate": 4.3834176637929165e-06, + "loss": 0.9104, + "step": 85180 + }, + { + "epoch": 0.6166619615337285, + "grad_norm": 0.16661547124385834, + "learning_rate": 4.38334527713233e-06, + "loss": 0.9079, + "step": 85190 + }, + { + "epoch": 0.6167343481943147, + "grad_norm": 0.17816273868083954, + "learning_rate": 4.3832728904717446e-06, + "loss": 0.9076, + "step": 85200 + }, + { + "epoch": 0.6168067348549009, + "grad_norm": 0.16999828815460205, + "learning_rate": 4.383200503811158e-06, + "loss": 0.9152, + "step": 85210 + }, + { + "epoch": 0.6168791215154871, + "grad_norm": 0.14718665182590485, + "learning_rate": 4.383128117150572e-06, + "loss": 0.8982, + "step": 85220 + }, + { + "epoch": 0.6169515081760734, + "grad_norm": 0.2667122781276703, + "learning_rate": 4.383055730489985e-06, + "loss": 0.9059, + "step": 85230 + }, + { + "epoch": 0.6170238948366595, + "grad_norm": 0.1408415138721466, + "learning_rate": 4.3829833438294e-06, + "loss": 0.9018, + "step": 85240 + }, + { + "epoch": 0.6170962814972457, + "grad_norm": 0.16204825043678284, + "learning_rate": 4.3829109571688135e-06, + "loss": 0.8952, + "step": 85250 + }, + { + "epoch": 0.6171686681578319, + "grad_norm": 0.1726893186569214, + "learning_rate": 4.382838570508227e-06, + "loss": 0.9127, + "step": 85260 + }, + { + "epoch": 0.6172410548184181, + "grad_norm": 0.1730419099330902, + "learning_rate": 4.382766183847641e-06, + "loss": 0.9003, + "step": 85270 + }, + { + "epoch": 0.6173134414790042, + "grad_norm": 0.17287707328796387, + "learning_rate": 4.382693797187055e-06, + "loss": 0.8961, + "step": 85280 + }, + { + "epoch": 0.6173858281395904, + "grad_norm": 0.1501106470823288, + "learning_rate": 4.382621410526469e-06, + "loss": 0.9104, + "step": 85290 + }, + { + "epoch": 0.6174582148001766, + "grad_norm": 0.16863562166690826, + "learning_rate": 4.382549023865882e-06, + "loss": 0.8988, + "step": 85300 + }, + { + "epoch": 0.6175306014607628, + "grad_norm": 0.1819497048854828, + "learning_rate": 4.382476637205296e-06, + "loss": 0.9, + "step": 85310 + }, + { + "epoch": 0.617602988121349, + "grad_norm": 0.15243135392665863, + "learning_rate": 4.3824042505447105e-06, + "loss": 0.9003, + "step": 85320 + }, + { + "epoch": 0.6176753747819352, + "grad_norm": 0.1578928381204605, + "learning_rate": 4.382331863884124e-06, + "loss": 0.9003, + "step": 85330 + }, + { + "epoch": 0.6177477614425214, + "grad_norm": 0.15596744418144226, + "learning_rate": 4.382259477223538e-06, + "loss": 0.9036, + "step": 85340 + }, + { + "epoch": 0.6178201481031076, + "grad_norm": 0.16625262796878815, + "learning_rate": 4.382187090562951e-06, + "loss": 0.9022, + "step": 85350 + }, + { + "epoch": 0.6178925347636938, + "grad_norm": 0.14543043076992035, + "learning_rate": 4.382114703902365e-06, + "loss": 0.8922, + "step": 85360 + }, + { + "epoch": 0.61796492142428, + "grad_norm": 0.14668937027454376, + "learning_rate": 4.3820423172417794e-06, + "loss": 0.8963, + "step": 85370 + }, + { + "epoch": 0.6180373080848661, + "grad_norm": 0.15720996260643005, + "learning_rate": 4.381969930581193e-06, + "loss": 0.9104, + "step": 85380 + }, + { + "epoch": 0.6181096947454523, + "grad_norm": 0.20788639783859253, + "learning_rate": 4.381897543920607e-06, + "loss": 0.908, + "step": 85390 + }, + { + "epoch": 0.6181820814060385, + "grad_norm": 0.1491943746805191, + "learning_rate": 4.38182515726002e-06, + "loss": 0.9125, + "step": 85400 + }, + { + "epoch": 0.6182544680666247, + "grad_norm": 0.16570164263248444, + "learning_rate": 4.381752770599435e-06, + "loss": 0.8988, + "step": 85410 + }, + { + "epoch": 0.6183268547272108, + "grad_norm": 0.15333351492881775, + "learning_rate": 4.381680383938848e-06, + "loss": 0.9015, + "step": 85420 + }, + { + "epoch": 0.618399241387797, + "grad_norm": 0.15450377762317657, + "learning_rate": 4.381607997278262e-06, + "loss": 0.9206, + "step": 85430 + }, + { + "epoch": 0.6184716280483833, + "grad_norm": 0.15417149662971497, + "learning_rate": 4.381535610617676e-06, + "loss": 0.9062, + "step": 85440 + }, + { + "epoch": 0.6185440147089695, + "grad_norm": 0.1625894010066986, + "learning_rate": 4.38146322395709e-06, + "loss": 0.9074, + "step": 85450 + }, + { + "epoch": 0.6186164013695556, + "grad_norm": 0.15775802731513977, + "learning_rate": 4.381390837296504e-06, + "loss": 0.9068, + "step": 85460 + }, + { + "epoch": 0.6186887880301418, + "grad_norm": 0.16794852912425995, + "learning_rate": 4.381318450635917e-06, + "loss": 0.896, + "step": 85470 + }, + { + "epoch": 0.618761174690728, + "grad_norm": 0.158334419131279, + "learning_rate": 4.381246063975331e-06, + "loss": 0.8872, + "step": 85480 + }, + { + "epoch": 0.6188335613513142, + "grad_norm": 0.15617749094963074, + "learning_rate": 4.3811736773147445e-06, + "loss": 0.9026, + "step": 85490 + }, + { + "epoch": 0.6189059480119004, + "grad_norm": 0.16775725781917572, + "learning_rate": 4.381101290654158e-06, + "loss": 0.9062, + "step": 85500 + }, + { + "epoch": 0.6189783346724865, + "grad_norm": 0.16923066973686218, + "learning_rate": 4.381028903993572e-06, + "loss": 0.9077, + "step": 85510 + }, + { + "epoch": 0.6190507213330727, + "grad_norm": 0.16332891583442688, + "learning_rate": 4.380956517332986e-06, + "loss": 0.8984, + "step": 85520 + }, + { + "epoch": 0.6191231079936589, + "grad_norm": 0.1725182682275772, + "learning_rate": 4.3808841306724e-06, + "loss": 0.8937, + "step": 85530 + }, + { + "epoch": 0.6191954946542452, + "grad_norm": 0.17352226376533508, + "learning_rate": 4.3808117440118134e-06, + "loss": 0.9188, + "step": 85540 + }, + { + "epoch": 0.6192678813148313, + "grad_norm": 0.1709306836128235, + "learning_rate": 4.380739357351227e-06, + "loss": 0.8912, + "step": 85550 + }, + { + "epoch": 0.6193402679754175, + "grad_norm": 0.2021743208169937, + "learning_rate": 4.3806669706906415e-06, + "loss": 0.9081, + "step": 85560 + }, + { + "epoch": 0.6194126546360037, + "grad_norm": 0.14920145273208618, + "learning_rate": 4.380594584030055e-06, + "loss": 0.8997, + "step": 85570 + }, + { + "epoch": 0.6194850412965899, + "grad_norm": 0.1603516936302185, + "learning_rate": 4.380522197369469e-06, + "loss": 0.9109, + "step": 85580 + }, + { + "epoch": 0.619557427957176, + "grad_norm": 0.14569969475269318, + "learning_rate": 4.380449810708882e-06, + "loss": 0.896, + "step": 85590 + }, + { + "epoch": 0.6196298146177622, + "grad_norm": 0.16068822145462036, + "learning_rate": 4.380377424048297e-06, + "loss": 0.8974, + "step": 85600 + }, + { + "epoch": 0.6197022012783484, + "grad_norm": 0.16494551301002502, + "learning_rate": 4.3803050373877105e-06, + "loss": 0.8991, + "step": 85610 + }, + { + "epoch": 0.6197745879389346, + "grad_norm": 0.15262630581855774, + "learning_rate": 4.380232650727124e-06, + "loss": 0.8966, + "step": 85620 + }, + { + "epoch": 0.6198469745995208, + "grad_norm": 0.15079142153263092, + "learning_rate": 4.380160264066538e-06, + "loss": 0.8946, + "step": 85630 + }, + { + "epoch": 0.6199193612601069, + "grad_norm": 0.15024054050445557, + "learning_rate": 4.380087877405952e-06, + "loss": 0.9155, + "step": 85640 + }, + { + "epoch": 0.6199917479206932, + "grad_norm": 0.17157381772994995, + "learning_rate": 4.380015490745366e-06, + "loss": 0.9006, + "step": 85650 + }, + { + "epoch": 0.6200641345812794, + "grad_norm": 0.1453818827867508, + "learning_rate": 4.379943104084779e-06, + "loss": 0.9009, + "step": 85660 + }, + { + "epoch": 0.6201365212418656, + "grad_norm": 0.15377745032310486, + "learning_rate": 4.379870717424193e-06, + "loss": 0.8998, + "step": 85670 + }, + { + "epoch": 0.6202089079024518, + "grad_norm": 0.16755272448062897, + "learning_rate": 4.3797983307636075e-06, + "loss": 0.8894, + "step": 85680 + }, + { + "epoch": 0.6202812945630379, + "grad_norm": 0.1771673858165741, + "learning_rate": 4.379725944103021e-06, + "loss": 0.9014, + "step": 85690 + }, + { + "epoch": 0.6203536812236241, + "grad_norm": 0.1448373943567276, + "learning_rate": 4.379653557442435e-06, + "loss": 0.8946, + "step": 85700 + }, + { + "epoch": 0.6204260678842103, + "grad_norm": 0.15097440779209137, + "learning_rate": 4.379581170781848e-06, + "loss": 0.8906, + "step": 85710 + }, + { + "epoch": 0.6204984545447965, + "grad_norm": 0.1607360988855362, + "learning_rate": 4.379508784121263e-06, + "loss": 0.9011, + "step": 85720 + }, + { + "epoch": 0.6205708412053826, + "grad_norm": 0.16542325913906097, + "learning_rate": 4.379436397460676e-06, + "loss": 0.9062, + "step": 85730 + }, + { + "epoch": 0.6206432278659688, + "grad_norm": 0.18820612132549286, + "learning_rate": 4.37936401080009e-06, + "loss": 0.9047, + "step": 85740 + }, + { + "epoch": 0.620715614526555, + "grad_norm": 0.15945078432559967, + "learning_rate": 4.379291624139504e-06, + "loss": 0.9064, + "step": 85750 + }, + { + "epoch": 0.6207880011871413, + "grad_norm": 0.2132113128900528, + "learning_rate": 4.379219237478918e-06, + "loss": 0.9078, + "step": 85760 + }, + { + "epoch": 0.6208603878477275, + "grad_norm": 0.15202796459197998, + "learning_rate": 4.379146850818332e-06, + "loss": 0.901, + "step": 85770 + }, + { + "epoch": 0.6209327745083136, + "grad_norm": 0.15480341017246246, + "learning_rate": 4.379074464157745e-06, + "loss": 0.9037, + "step": 85780 + }, + { + "epoch": 0.6210051611688998, + "grad_norm": 0.22787299752235413, + "learning_rate": 4.379002077497159e-06, + "loss": 0.9112, + "step": 85790 + }, + { + "epoch": 0.621077547829486, + "grad_norm": 0.16583016514778137, + "learning_rate": 4.378929690836573e-06, + "loss": 0.9095, + "step": 85800 + }, + { + "epoch": 0.6211499344900722, + "grad_norm": 0.1868494153022766, + "learning_rate": 4.378857304175987e-06, + "loss": 0.8924, + "step": 85810 + }, + { + "epoch": 0.6212223211506583, + "grad_norm": 0.18490175902843475, + "learning_rate": 4.378784917515401e-06, + "loss": 0.8871, + "step": 85820 + }, + { + "epoch": 0.6212947078112445, + "grad_norm": 0.15634137392044067, + "learning_rate": 4.378712530854814e-06, + "loss": 0.9004, + "step": 85830 + }, + { + "epoch": 0.6213670944718307, + "grad_norm": 0.24686381220817566, + "learning_rate": 4.378640144194229e-06, + "loss": 0.8762, + "step": 85840 + }, + { + "epoch": 0.6214394811324169, + "grad_norm": 0.1837376058101654, + "learning_rate": 4.378567757533642e-06, + "loss": 0.9133, + "step": 85850 + }, + { + "epoch": 0.6215118677930032, + "grad_norm": 0.16701044142246246, + "learning_rate": 4.378495370873056e-06, + "loss": 0.9067, + "step": 85860 + }, + { + "epoch": 0.6215842544535893, + "grad_norm": 0.15616366267204285, + "learning_rate": 4.3784229842124696e-06, + "loss": 0.9053, + "step": 85870 + }, + { + "epoch": 0.6216566411141755, + "grad_norm": 0.1612260639667511, + "learning_rate": 4.378350597551884e-06, + "loss": 0.8892, + "step": 85880 + }, + { + "epoch": 0.6217290277747617, + "grad_norm": 0.17558954656124115, + "learning_rate": 4.378278210891298e-06, + "loss": 0.9048, + "step": 85890 + }, + { + "epoch": 0.6218014144353479, + "grad_norm": 0.175954669713974, + "learning_rate": 4.378205824230711e-06, + "loss": 0.8994, + "step": 85900 + }, + { + "epoch": 0.621873801095934, + "grad_norm": 0.1546930968761444, + "learning_rate": 4.378133437570125e-06, + "loss": 0.8875, + "step": 85910 + }, + { + "epoch": 0.6219461877565202, + "grad_norm": 0.1583179086446762, + "learning_rate": 4.378061050909539e-06, + "loss": 0.8928, + "step": 85920 + }, + { + "epoch": 0.6220185744171064, + "grad_norm": 0.15575550496578217, + "learning_rate": 4.377988664248953e-06, + "loss": 0.9023, + "step": 85930 + }, + { + "epoch": 0.6220909610776926, + "grad_norm": 0.1556166708469391, + "learning_rate": 4.3779162775883666e-06, + "loss": 0.9032, + "step": 85940 + }, + { + "epoch": 0.6221633477382787, + "grad_norm": 0.16194866597652435, + "learning_rate": 4.37784389092778e-06, + "loss": 0.9187, + "step": 85950 + }, + { + "epoch": 0.6222357343988649, + "grad_norm": 0.15536653995513916, + "learning_rate": 4.377771504267194e-06, + "loss": 0.907, + "step": 85960 + }, + { + "epoch": 0.6223081210594512, + "grad_norm": 0.15262702107429504, + "learning_rate": 4.377699117606608e-06, + "loss": 0.8914, + "step": 85970 + }, + { + "epoch": 0.6223805077200374, + "grad_norm": 0.26444554328918457, + "learning_rate": 4.377626730946022e-06, + "loss": 0.9096, + "step": 85980 + }, + { + "epoch": 0.6224528943806236, + "grad_norm": 0.14140740036964417, + "learning_rate": 4.3775543442854355e-06, + "loss": 0.898, + "step": 85990 + }, + { + "epoch": 0.6225252810412097, + "grad_norm": 0.17096258699893951, + "learning_rate": 4.377481957624849e-06, + "loss": 0.9021, + "step": 86000 + }, + { + "epoch": 0.6225976677017959, + "grad_norm": 0.16008111834526062, + "learning_rate": 4.3774095709642636e-06, + "loss": 0.8982, + "step": 86010 + }, + { + "epoch": 0.6226700543623821, + "grad_norm": 0.1551225781440735, + "learning_rate": 4.377337184303676e-06, + "loss": 0.9093, + "step": 86020 + }, + { + "epoch": 0.6227424410229683, + "grad_norm": 0.14841316640377045, + "learning_rate": 4.377264797643091e-06, + "loss": 0.902, + "step": 86030 + }, + { + "epoch": 0.6228148276835545, + "grad_norm": 0.18173016607761383, + "learning_rate": 4.377192410982504e-06, + "loss": 0.9144, + "step": 86040 + }, + { + "epoch": 0.6228872143441406, + "grad_norm": 0.14934363961219788, + "learning_rate": 4.377120024321918e-06, + "loss": 0.8974, + "step": 86050 + }, + { + "epoch": 0.6229596010047268, + "grad_norm": 0.17030176520347595, + "learning_rate": 4.377047637661332e-06, + "loss": 0.8943, + "step": 86060 + }, + { + "epoch": 0.623031987665313, + "grad_norm": 0.16661794483661652, + "learning_rate": 4.376975251000746e-06, + "loss": 0.9049, + "step": 86070 + }, + { + "epoch": 0.6231043743258993, + "grad_norm": 0.16051805019378662, + "learning_rate": 4.37690286434016e-06, + "loss": 0.899, + "step": 86080 + }, + { + "epoch": 0.6231767609864854, + "grad_norm": 0.16653110086917877, + "learning_rate": 4.376830477679573e-06, + "loss": 0.9057, + "step": 86090 + }, + { + "epoch": 0.6232491476470716, + "grad_norm": 0.16507036983966827, + "learning_rate": 4.376758091018987e-06, + "loss": 0.907, + "step": 86100 + }, + { + "epoch": 0.6233215343076578, + "grad_norm": 0.16348743438720703, + "learning_rate": 4.3766857043584014e-06, + "loss": 0.9077, + "step": 86110 + }, + { + "epoch": 0.623393920968244, + "grad_norm": 0.15697596967220306, + "learning_rate": 4.376613317697815e-06, + "loss": 0.9028, + "step": 86120 + }, + { + "epoch": 0.6234663076288302, + "grad_norm": 0.15257367491722107, + "learning_rate": 4.376540931037229e-06, + "loss": 0.896, + "step": 86130 + }, + { + "epoch": 0.6235386942894163, + "grad_norm": 0.16256658732891083, + "learning_rate": 4.376468544376642e-06, + "loss": 0.9138, + "step": 86140 + }, + { + "epoch": 0.6236110809500025, + "grad_norm": 0.19833579659461975, + "learning_rate": 4.376396157716056e-06, + "loss": 0.9078, + "step": 86150 + }, + { + "epoch": 0.6236834676105887, + "grad_norm": 0.15901575982570648, + "learning_rate": 4.37632377105547e-06, + "loss": 0.8936, + "step": 86160 + }, + { + "epoch": 0.6237558542711749, + "grad_norm": 0.16411714255809784, + "learning_rate": 4.376251384394884e-06, + "loss": 0.9072, + "step": 86170 + }, + { + "epoch": 0.6238282409317611, + "grad_norm": 0.1543898582458496, + "learning_rate": 4.376178997734298e-06, + "loss": 0.91, + "step": 86180 + }, + { + "epoch": 0.6239006275923473, + "grad_norm": 0.15518857538700104, + "learning_rate": 4.376106611073711e-06, + "loss": 0.9141, + "step": 86190 + }, + { + "epoch": 0.6239730142529335, + "grad_norm": 0.15521982312202454, + "learning_rate": 4.376034224413126e-06, + "loss": 0.9045, + "step": 86200 + }, + { + "epoch": 0.6240454009135197, + "grad_norm": 0.1619000881910324, + "learning_rate": 4.375961837752539e-06, + "loss": 0.8963, + "step": 86210 + }, + { + "epoch": 0.6241177875741059, + "grad_norm": 0.15840552747249603, + "learning_rate": 4.375889451091953e-06, + "loss": 0.8982, + "step": 86220 + }, + { + "epoch": 0.624190174234692, + "grad_norm": 0.1774725317955017, + "learning_rate": 4.3758170644313665e-06, + "loss": 0.9161, + "step": 86230 + }, + { + "epoch": 0.6242625608952782, + "grad_norm": 0.160127192735672, + "learning_rate": 4.375744677770781e-06, + "loss": 0.8993, + "step": 86240 + }, + { + "epoch": 0.6243349475558644, + "grad_norm": 0.1587544083595276, + "learning_rate": 4.375672291110195e-06, + "loss": 0.892, + "step": 86250 + }, + { + "epoch": 0.6244073342164506, + "grad_norm": 0.1745489090681076, + "learning_rate": 4.375599904449608e-06, + "loss": 0.9089, + "step": 86260 + }, + { + "epoch": 0.6244797208770367, + "grad_norm": 0.1463412344455719, + "learning_rate": 4.375527517789022e-06, + "loss": 0.8934, + "step": 86270 + }, + { + "epoch": 0.6245521075376229, + "grad_norm": 0.16872276365756989, + "learning_rate": 4.375455131128436e-06, + "loss": 0.9004, + "step": 86280 + }, + { + "epoch": 0.6246244941982092, + "grad_norm": 0.1642751842737198, + "learning_rate": 4.37538274446785e-06, + "loss": 0.8958, + "step": 86290 + }, + { + "epoch": 0.6246968808587954, + "grad_norm": 0.17172108590602875, + "learning_rate": 4.3753103578072635e-06, + "loss": 0.9021, + "step": 86300 + }, + { + "epoch": 0.6247692675193816, + "grad_norm": 0.1646968573331833, + "learning_rate": 4.375237971146677e-06, + "loss": 0.8952, + "step": 86310 + }, + { + "epoch": 0.6248416541799677, + "grad_norm": 0.15084651112556458, + "learning_rate": 4.375165584486092e-06, + "loss": 0.894, + "step": 86320 + }, + { + "epoch": 0.6249140408405539, + "grad_norm": 0.15649060904979706, + "learning_rate": 4.375093197825505e-06, + "loss": 0.9019, + "step": 86330 + }, + { + "epoch": 0.6249864275011401, + "grad_norm": 0.16130280494689941, + "learning_rate": 4.375020811164919e-06, + "loss": 0.9091, + "step": 86340 + }, + { + "epoch": 0.6250588141617263, + "grad_norm": 0.15296019613742828, + "learning_rate": 4.3749484245043325e-06, + "loss": 0.9127, + "step": 86350 + }, + { + "epoch": 0.6251312008223124, + "grad_norm": 0.15848460793495178, + "learning_rate": 4.374876037843747e-06, + "loss": 0.8889, + "step": 86360 + }, + { + "epoch": 0.6252035874828986, + "grad_norm": 0.15533143281936646, + "learning_rate": 4.3748036511831605e-06, + "loss": 0.9016, + "step": 86370 + }, + { + "epoch": 0.6252759741434848, + "grad_norm": 0.15972629189491272, + "learning_rate": 4.374731264522574e-06, + "loss": 0.9124, + "step": 86380 + }, + { + "epoch": 0.6253483608040711, + "grad_norm": 0.18219304084777832, + "learning_rate": 4.374658877861988e-06, + "loss": 0.8992, + "step": 86390 + }, + { + "epoch": 0.6254207474646573, + "grad_norm": 0.14863041043281555, + "learning_rate": 4.374586491201402e-06, + "loss": 0.8854, + "step": 86400 + }, + { + "epoch": 0.6254931341252434, + "grad_norm": 0.1658528745174408, + "learning_rate": 4.374514104540816e-06, + "loss": 0.8979, + "step": 86410 + }, + { + "epoch": 0.6255655207858296, + "grad_norm": 0.15939390659332275, + "learning_rate": 4.3744417178802295e-06, + "loss": 0.9003, + "step": 86420 + }, + { + "epoch": 0.6256379074464158, + "grad_norm": 0.17238759994506836, + "learning_rate": 4.374369331219643e-06, + "loss": 0.9056, + "step": 86430 + }, + { + "epoch": 0.625710294107002, + "grad_norm": 0.1758257895708084, + "learning_rate": 4.3742969445590575e-06, + "loss": 0.9052, + "step": 86440 + }, + { + "epoch": 0.6257826807675881, + "grad_norm": 0.1517690122127533, + "learning_rate": 4.374224557898471e-06, + "loss": 0.8987, + "step": 86450 + }, + { + "epoch": 0.6258550674281743, + "grad_norm": 0.16682100296020508, + "learning_rate": 4.374152171237885e-06, + "loss": 0.9072, + "step": 86460 + }, + { + "epoch": 0.6259274540887605, + "grad_norm": 0.14764508605003357, + "learning_rate": 4.374079784577298e-06, + "loss": 0.8948, + "step": 86470 + }, + { + "epoch": 0.6259998407493467, + "grad_norm": 0.1788049340248108, + "learning_rate": 4.374007397916713e-06, + "loss": 0.8991, + "step": 86480 + }, + { + "epoch": 0.6260722274099328, + "grad_norm": 0.15142115950584412, + "learning_rate": 4.3739350112561265e-06, + "loss": 0.9062, + "step": 86490 + }, + { + "epoch": 0.6261446140705191, + "grad_norm": 0.1992928683757782, + "learning_rate": 4.37386262459554e-06, + "loss": 0.9139, + "step": 86500 + }, + { + "epoch": 0.6262170007311053, + "grad_norm": 0.1716473549604416, + "learning_rate": 4.373790237934954e-06, + "loss": 0.8933, + "step": 86510 + }, + { + "epoch": 0.6262893873916915, + "grad_norm": 0.19439038634300232, + "learning_rate": 4.373717851274368e-06, + "loss": 0.8969, + "step": 86520 + }, + { + "epoch": 0.6263617740522777, + "grad_norm": 0.13993479311466217, + "learning_rate": 4.373645464613782e-06, + "loss": 0.9131, + "step": 86530 + }, + { + "epoch": 0.6264341607128638, + "grad_norm": 0.1706664115190506, + "learning_rate": 4.373573077953195e-06, + "loss": 0.9075, + "step": 86540 + }, + { + "epoch": 0.62650654737345, + "grad_norm": 0.17721545696258545, + "learning_rate": 4.373500691292609e-06, + "loss": 0.9067, + "step": 86550 + }, + { + "epoch": 0.6265789340340362, + "grad_norm": 0.1569727510213852, + "learning_rate": 4.373428304632023e-06, + "loss": 0.9073, + "step": 86560 + }, + { + "epoch": 0.6266513206946224, + "grad_norm": 0.15974320471286774, + "learning_rate": 4.373355917971436e-06, + "loss": 0.9023, + "step": 86570 + }, + { + "epoch": 0.6267237073552085, + "grad_norm": 0.14838136732578278, + "learning_rate": 4.37328353131085e-06, + "loss": 0.8885, + "step": 86580 + }, + { + "epoch": 0.6267960940157947, + "grad_norm": 0.1551826149225235, + "learning_rate": 4.373211144650264e-06, + "loss": 0.905, + "step": 86590 + }, + { + "epoch": 0.6268684806763809, + "grad_norm": 0.158196359872818, + "learning_rate": 4.373138757989678e-06, + "loss": 0.8934, + "step": 86600 + }, + { + "epoch": 0.6269408673369672, + "grad_norm": 0.17901091277599335, + "learning_rate": 4.3730663713290916e-06, + "loss": 0.919, + "step": 86610 + }, + { + "epoch": 0.6270132539975534, + "grad_norm": 0.1554396152496338, + "learning_rate": 4.372993984668505e-06, + "loss": 0.896, + "step": 86620 + }, + { + "epoch": 0.6270856406581395, + "grad_norm": 0.1537899672985077, + "learning_rate": 4.37292159800792e-06, + "loss": 0.8975, + "step": 86630 + }, + { + "epoch": 0.6271580273187257, + "grad_norm": 0.16763430833816528, + "learning_rate": 4.372849211347333e-06, + "loss": 0.8944, + "step": 86640 + }, + { + "epoch": 0.6272304139793119, + "grad_norm": 0.15677796304225922, + "learning_rate": 4.372776824686747e-06, + "loss": 0.9166, + "step": 86650 + }, + { + "epoch": 0.6273028006398981, + "grad_norm": 0.15729351341724396, + "learning_rate": 4.3727044380261605e-06, + "loss": 0.9052, + "step": 86660 + }, + { + "epoch": 0.6273751873004842, + "grad_norm": 0.16490468382835388, + "learning_rate": 4.372632051365575e-06, + "loss": 0.8986, + "step": 86670 + }, + { + "epoch": 0.6274475739610704, + "grad_norm": 0.16459213197231293, + "learning_rate": 4.3725596647049886e-06, + "loss": 0.8967, + "step": 86680 + }, + { + "epoch": 0.6275199606216566, + "grad_norm": 0.16037428379058838, + "learning_rate": 4.372487278044402e-06, + "loss": 0.9144, + "step": 86690 + }, + { + "epoch": 0.6275923472822428, + "grad_norm": 0.16006019711494446, + "learning_rate": 4.372414891383816e-06, + "loss": 0.9089, + "step": 86700 + }, + { + "epoch": 0.6276647339428291, + "grad_norm": 0.1548503190279007, + "learning_rate": 4.37234250472323e-06, + "loss": 0.9, + "step": 86710 + }, + { + "epoch": 0.6277371206034152, + "grad_norm": 0.21827849745750427, + "learning_rate": 4.372270118062644e-06, + "loss": 0.8975, + "step": 86720 + }, + { + "epoch": 0.6278095072640014, + "grad_norm": 0.16216625273227692, + "learning_rate": 4.3721977314020575e-06, + "loss": 0.9022, + "step": 86730 + }, + { + "epoch": 0.6278818939245876, + "grad_norm": 0.17740298807621002, + "learning_rate": 4.372125344741471e-06, + "loss": 0.9114, + "step": 86740 + }, + { + "epoch": 0.6279542805851738, + "grad_norm": 0.15223225951194763, + "learning_rate": 4.372052958080885e-06, + "loss": 0.886, + "step": 86750 + }, + { + "epoch": 0.62802666724576, + "grad_norm": 0.1714017689228058, + "learning_rate": 4.371980571420299e-06, + "loss": 0.8929, + "step": 86760 + }, + { + "epoch": 0.6280990539063461, + "grad_norm": 0.17561236023902893, + "learning_rate": 4.371908184759713e-06, + "loss": 0.9036, + "step": 86770 + }, + { + "epoch": 0.6281714405669323, + "grad_norm": 0.16735588014125824, + "learning_rate": 4.371835798099126e-06, + "loss": 0.9127, + "step": 86780 + }, + { + "epoch": 0.6282438272275185, + "grad_norm": 0.15860114991664886, + "learning_rate": 4.37176341143854e-06, + "loss": 0.9031, + "step": 86790 + }, + { + "epoch": 0.6283162138881047, + "grad_norm": 0.23566380143165588, + "learning_rate": 4.3716910247779545e-06, + "loss": 0.9014, + "step": 86800 + }, + { + "epoch": 0.6283886005486908, + "grad_norm": 0.18236784636974335, + "learning_rate": 4.371618638117368e-06, + "loss": 0.8992, + "step": 86810 + }, + { + "epoch": 0.6284609872092771, + "grad_norm": 0.1653483808040619, + "learning_rate": 4.371546251456782e-06, + "loss": 0.9042, + "step": 86820 + }, + { + "epoch": 0.6285333738698633, + "grad_norm": 0.15193116664886475, + "learning_rate": 4.371473864796195e-06, + "loss": 0.8964, + "step": 86830 + }, + { + "epoch": 0.6286057605304495, + "grad_norm": 0.15298867225646973, + "learning_rate": 4.37140147813561e-06, + "loss": 0.8856, + "step": 86840 + }, + { + "epoch": 0.6286781471910357, + "grad_norm": 0.15323877334594727, + "learning_rate": 4.3713290914750234e-06, + "loss": 0.9021, + "step": 86850 + }, + { + "epoch": 0.6287505338516218, + "grad_norm": 0.159663587808609, + "learning_rate": 4.371256704814437e-06, + "loss": 0.8852, + "step": 86860 + }, + { + "epoch": 0.628822920512208, + "grad_norm": 0.1943221390247345, + "learning_rate": 4.371184318153851e-06, + "loss": 0.9021, + "step": 86870 + }, + { + "epoch": 0.6288953071727942, + "grad_norm": 0.16797471046447754, + "learning_rate": 4.371111931493265e-06, + "loss": 0.9143, + "step": 86880 + }, + { + "epoch": 0.6289676938333804, + "grad_norm": 0.17109879851341248, + "learning_rate": 4.371039544832679e-06, + "loss": 0.905, + "step": 86890 + }, + { + "epoch": 0.6290400804939665, + "grad_norm": 0.17516149580478668, + "learning_rate": 4.370967158172092e-06, + "loss": 0.9148, + "step": 86900 + }, + { + "epoch": 0.6291124671545527, + "grad_norm": 0.15488648414611816, + "learning_rate": 4.370894771511506e-06, + "loss": 0.9063, + "step": 86910 + }, + { + "epoch": 0.629184853815139, + "grad_norm": 0.16343455016613007, + "learning_rate": 4.3708223848509204e-06, + "loss": 0.9098, + "step": 86920 + }, + { + "epoch": 0.6292572404757252, + "grad_norm": 0.1545439064502716, + "learning_rate": 4.370749998190334e-06, + "loss": 0.9044, + "step": 86930 + }, + { + "epoch": 0.6293296271363114, + "grad_norm": 0.14563705027103424, + "learning_rate": 4.370677611529748e-06, + "loss": 0.8909, + "step": 86940 + }, + { + "epoch": 0.6294020137968975, + "grad_norm": 0.16632063686847687, + "learning_rate": 4.370605224869161e-06, + "loss": 0.9051, + "step": 86950 + }, + { + "epoch": 0.6294744004574837, + "grad_norm": 0.1433282047510147, + "learning_rate": 4.370532838208576e-06, + "loss": 0.9143, + "step": 86960 + }, + { + "epoch": 0.6295467871180699, + "grad_norm": 0.16106697916984558, + "learning_rate": 4.370460451547989e-06, + "loss": 0.8937, + "step": 86970 + }, + { + "epoch": 0.6296191737786561, + "grad_norm": 0.15034160017967224, + "learning_rate": 4.370388064887403e-06, + "loss": 0.9059, + "step": 86980 + }, + { + "epoch": 0.6296915604392422, + "grad_norm": 0.15533864498138428, + "learning_rate": 4.370315678226817e-06, + "loss": 0.9062, + "step": 86990 + }, + { + "epoch": 0.6297639470998284, + "grad_norm": 0.17288139462471008, + "learning_rate": 4.370243291566231e-06, + "loss": 0.896, + "step": 87000 + }, + { + "epoch": 0.6298363337604146, + "grad_norm": 0.1603853702545166, + "learning_rate": 4.370170904905645e-06, + "loss": 0.9013, + "step": 87010 + }, + { + "epoch": 0.6299087204210008, + "grad_norm": 0.1669483482837677, + "learning_rate": 4.370098518245058e-06, + "loss": 0.8986, + "step": 87020 + }, + { + "epoch": 0.629981107081587, + "grad_norm": 0.15310995280742645, + "learning_rate": 4.370026131584472e-06, + "loss": 0.9085, + "step": 87030 + }, + { + "epoch": 0.6300534937421732, + "grad_norm": 0.1482187807559967, + "learning_rate": 4.369953744923886e-06, + "loss": 0.9044, + "step": 87040 + }, + { + "epoch": 0.6301258804027594, + "grad_norm": 0.16005839407444, + "learning_rate": 4.3698813582633e-06, + "loss": 0.9033, + "step": 87050 + }, + { + "epoch": 0.6301982670633456, + "grad_norm": 0.1608879268169403, + "learning_rate": 4.369808971602714e-06, + "loss": 0.8938, + "step": 87060 + }, + { + "epoch": 0.6302706537239318, + "grad_norm": 0.15144924819469452, + "learning_rate": 4.369736584942127e-06, + "loss": 0.905, + "step": 87070 + }, + { + "epoch": 0.6303430403845179, + "grad_norm": 0.1928846538066864, + "learning_rate": 4.369664198281541e-06, + "loss": 0.897, + "step": 87080 + }, + { + "epoch": 0.6304154270451041, + "grad_norm": 0.15390628576278687, + "learning_rate": 4.3695918116209545e-06, + "loss": 0.908, + "step": 87090 + }, + { + "epoch": 0.6304878137056903, + "grad_norm": 0.15502314269542694, + "learning_rate": 4.369519424960368e-06, + "loss": 0.8921, + "step": 87100 + }, + { + "epoch": 0.6305602003662765, + "grad_norm": 0.1564696878194809, + "learning_rate": 4.3694470382997825e-06, + "loss": 0.8971, + "step": 87110 + }, + { + "epoch": 0.6306325870268626, + "grad_norm": 0.1649816632270813, + "learning_rate": 4.369374651639196e-06, + "loss": 0.898, + "step": 87120 + }, + { + "epoch": 0.6307049736874488, + "grad_norm": 0.16276071965694427, + "learning_rate": 4.36930226497861e-06, + "loss": 0.8981, + "step": 87130 + }, + { + "epoch": 0.6307773603480351, + "grad_norm": 0.14670120179653168, + "learning_rate": 4.369229878318023e-06, + "loss": 0.8952, + "step": 87140 + }, + { + "epoch": 0.6308497470086213, + "grad_norm": 0.15809543430805206, + "learning_rate": 4.369157491657438e-06, + "loss": 0.9064, + "step": 87150 + }, + { + "epoch": 0.6309221336692075, + "grad_norm": 0.16645848751068115, + "learning_rate": 4.3690851049968515e-06, + "loss": 0.8973, + "step": 87160 + }, + { + "epoch": 0.6309945203297936, + "grad_norm": 0.1631806641817093, + "learning_rate": 4.369012718336265e-06, + "loss": 0.8984, + "step": 87170 + }, + { + "epoch": 0.6310669069903798, + "grad_norm": 0.20448337495326996, + "learning_rate": 4.368940331675679e-06, + "loss": 0.9061, + "step": 87180 + }, + { + "epoch": 0.631139293650966, + "grad_norm": 0.3554750084877014, + "learning_rate": 4.368867945015093e-06, + "loss": 0.9172, + "step": 87190 + }, + { + "epoch": 0.6312116803115522, + "grad_norm": 0.15781621634960175, + "learning_rate": 4.368795558354507e-06, + "loss": 0.9017, + "step": 87200 + }, + { + "epoch": 0.6312840669721383, + "grad_norm": 0.16704939305782318, + "learning_rate": 4.36872317169392e-06, + "loss": 0.8875, + "step": 87210 + }, + { + "epoch": 0.6313564536327245, + "grad_norm": 0.16684958338737488, + "learning_rate": 4.368650785033334e-06, + "loss": 0.9216, + "step": 87220 + }, + { + "epoch": 0.6314288402933107, + "grad_norm": 0.16323356330394745, + "learning_rate": 4.3685783983727485e-06, + "loss": 0.9072, + "step": 87230 + }, + { + "epoch": 0.631501226953897, + "grad_norm": 0.16825905442237854, + "learning_rate": 4.368506011712162e-06, + "loss": 0.8985, + "step": 87240 + }, + { + "epoch": 0.6315736136144832, + "grad_norm": 0.1925610899925232, + "learning_rate": 4.368433625051576e-06, + "loss": 0.9055, + "step": 87250 + }, + { + "epoch": 0.6316460002750693, + "grad_norm": 0.16499613225460052, + "learning_rate": 4.368361238390989e-06, + "loss": 0.9002, + "step": 87260 + }, + { + "epoch": 0.6317183869356555, + "grad_norm": 0.16235573589801788, + "learning_rate": 4.368288851730404e-06, + "loss": 0.9047, + "step": 87270 + }, + { + "epoch": 0.6317907735962417, + "grad_norm": 0.15934467315673828, + "learning_rate": 4.368216465069817e-06, + "loss": 0.8983, + "step": 87280 + }, + { + "epoch": 0.6318631602568279, + "grad_norm": 0.15878109633922577, + "learning_rate": 4.368144078409231e-06, + "loss": 0.9026, + "step": 87290 + }, + { + "epoch": 0.631935546917414, + "grad_norm": 0.14932307600975037, + "learning_rate": 4.368071691748645e-06, + "loss": 0.8983, + "step": 87300 + }, + { + "epoch": 0.6320079335780002, + "grad_norm": 0.1681598424911499, + "learning_rate": 4.367999305088059e-06, + "loss": 0.9061, + "step": 87310 + }, + { + "epoch": 0.6320803202385864, + "grad_norm": 0.14897821843624115, + "learning_rate": 4.367926918427473e-06, + "loss": 0.8846, + "step": 87320 + }, + { + "epoch": 0.6321527068991726, + "grad_norm": 0.4469514787197113, + "learning_rate": 4.367854531766886e-06, + "loss": 0.892, + "step": 87330 + }, + { + "epoch": 0.6322250935597588, + "grad_norm": 0.15947720408439636, + "learning_rate": 4.3677821451063e-06, + "loss": 0.8925, + "step": 87340 + }, + { + "epoch": 0.632297480220345, + "grad_norm": 0.15741553902626038, + "learning_rate": 4.367709758445714e-06, + "loss": 0.8912, + "step": 87350 + }, + { + "epoch": 0.6323698668809312, + "grad_norm": 0.1720518171787262, + "learning_rate": 4.367637371785128e-06, + "loss": 0.9073, + "step": 87360 + }, + { + "epoch": 0.6324422535415174, + "grad_norm": 0.14960655570030212, + "learning_rate": 4.367564985124542e-06, + "loss": 0.8995, + "step": 87370 + }, + { + "epoch": 0.6325146402021036, + "grad_norm": 0.16579873859882355, + "learning_rate": 4.367492598463955e-06, + "loss": 0.8921, + "step": 87380 + }, + { + "epoch": 0.6325870268626897, + "grad_norm": 0.15160775184631348, + "learning_rate": 4.367420211803369e-06, + "loss": 0.8939, + "step": 87390 + }, + { + "epoch": 0.6326594135232759, + "grad_norm": 0.15509670972824097, + "learning_rate": 4.367347825142783e-06, + "loss": 0.8962, + "step": 87400 + }, + { + "epoch": 0.6327318001838621, + "grad_norm": 0.16785472631454468, + "learning_rate": 4.367275438482197e-06, + "loss": 0.9102, + "step": 87410 + }, + { + "epoch": 0.6328041868444483, + "grad_norm": 0.18528971076011658, + "learning_rate": 4.3672030518216106e-06, + "loss": 0.901, + "step": 87420 + }, + { + "epoch": 0.6328765735050345, + "grad_norm": 0.1553627848625183, + "learning_rate": 4.367130665161024e-06, + "loss": 0.9006, + "step": 87430 + }, + { + "epoch": 0.6329489601656206, + "grad_norm": 0.15704572200775146, + "learning_rate": 4.367058278500439e-06, + "loss": 0.8956, + "step": 87440 + }, + { + "epoch": 0.6330213468262069, + "grad_norm": 0.16988041996955872, + "learning_rate": 4.366985891839852e-06, + "loss": 0.8945, + "step": 87450 + }, + { + "epoch": 0.6330937334867931, + "grad_norm": 0.15681475400924683, + "learning_rate": 4.366913505179266e-06, + "loss": 0.8865, + "step": 87460 + }, + { + "epoch": 0.6331661201473793, + "grad_norm": 0.15510614216327667, + "learning_rate": 4.3668411185186795e-06, + "loss": 0.8965, + "step": 87470 + }, + { + "epoch": 0.6332385068079655, + "grad_norm": 0.16827818751335144, + "learning_rate": 4.366768731858094e-06, + "loss": 0.892, + "step": 87480 + }, + { + "epoch": 0.6333108934685516, + "grad_norm": 0.1560061275959015, + "learning_rate": 4.3666963451975076e-06, + "loss": 0.9041, + "step": 87490 + }, + { + "epoch": 0.6333832801291378, + "grad_norm": 0.15482322871685028, + "learning_rate": 4.366623958536921e-06, + "loss": 0.9012, + "step": 87500 + }, + { + "epoch": 0.633455666789724, + "grad_norm": 0.1474655419588089, + "learning_rate": 4.366551571876335e-06, + "loss": 0.896, + "step": 87510 + }, + { + "epoch": 0.6335280534503102, + "grad_norm": 0.15873880684375763, + "learning_rate": 4.366479185215749e-06, + "loss": 0.8961, + "step": 87520 + }, + { + "epoch": 0.6336004401108963, + "grad_norm": 0.2057919055223465, + "learning_rate": 4.366406798555163e-06, + "loss": 0.9079, + "step": 87530 + }, + { + "epoch": 0.6336728267714825, + "grad_norm": 0.1464945524930954, + "learning_rate": 4.3663344118945765e-06, + "loss": 0.9039, + "step": 87540 + }, + { + "epoch": 0.6337452134320687, + "grad_norm": 0.15978756546974182, + "learning_rate": 4.36626202523399e-06, + "loss": 0.9076, + "step": 87550 + }, + { + "epoch": 0.633817600092655, + "grad_norm": 0.15238073468208313, + "learning_rate": 4.366189638573405e-06, + "loss": 0.8772, + "step": 87560 + }, + { + "epoch": 0.6338899867532412, + "grad_norm": 0.1541658192873001, + "learning_rate": 4.366117251912818e-06, + "loss": 0.9014, + "step": 87570 + }, + { + "epoch": 0.6339623734138273, + "grad_norm": 0.15558382868766785, + "learning_rate": 4.366044865252232e-06, + "loss": 0.9151, + "step": 87580 + }, + { + "epoch": 0.6340347600744135, + "grad_norm": 0.22495369613170624, + "learning_rate": 4.3659724785916454e-06, + "loss": 0.9017, + "step": 87590 + }, + { + "epoch": 0.6341071467349997, + "grad_norm": 0.14666642248630524, + "learning_rate": 4.36590009193106e-06, + "loss": 0.906, + "step": 87600 + }, + { + "epoch": 0.6341795333955859, + "grad_norm": 0.1527988612651825, + "learning_rate": 4.365827705270473e-06, + "loss": 0.8978, + "step": 87610 + }, + { + "epoch": 0.634251920056172, + "grad_norm": 0.2135559469461441, + "learning_rate": 4.365755318609886e-06, + "loss": 0.9118, + "step": 87620 + }, + { + "epoch": 0.6343243067167582, + "grad_norm": 0.15013383328914642, + "learning_rate": 4.365682931949301e-06, + "loss": 0.8956, + "step": 87630 + }, + { + "epoch": 0.6343966933773444, + "grad_norm": 0.15739452838897705, + "learning_rate": 4.365610545288714e-06, + "loss": 0.8912, + "step": 87640 + }, + { + "epoch": 0.6344690800379306, + "grad_norm": 0.16141614317893982, + "learning_rate": 4.365538158628128e-06, + "loss": 0.9124, + "step": 87650 + }, + { + "epoch": 0.6345414666985167, + "grad_norm": 0.1528901308774948, + "learning_rate": 4.365465771967542e-06, + "loss": 0.903, + "step": 87660 + }, + { + "epoch": 0.634613853359103, + "grad_norm": 0.1768021136522293, + "learning_rate": 4.365393385306956e-06, + "loss": 0.8949, + "step": 87670 + }, + { + "epoch": 0.6346862400196892, + "grad_norm": 0.15449446439743042, + "learning_rate": 4.36532099864637e-06, + "loss": 0.9019, + "step": 87680 + }, + { + "epoch": 0.6347586266802754, + "grad_norm": 0.19363653659820557, + "learning_rate": 4.365248611985783e-06, + "loss": 0.9071, + "step": 87690 + }, + { + "epoch": 0.6348310133408616, + "grad_norm": 0.1683684140443802, + "learning_rate": 4.365176225325197e-06, + "loss": 0.895, + "step": 87700 + }, + { + "epoch": 0.6349034000014477, + "grad_norm": 0.15621653199195862, + "learning_rate": 4.365103838664611e-06, + "loss": 0.8939, + "step": 87710 + }, + { + "epoch": 0.6349757866620339, + "grad_norm": 0.16487151384353638, + "learning_rate": 4.365031452004025e-06, + "loss": 0.8915, + "step": 87720 + }, + { + "epoch": 0.6350481733226201, + "grad_norm": 0.1636609584093094, + "learning_rate": 4.364959065343439e-06, + "loss": 0.895, + "step": 87730 + }, + { + "epoch": 0.6351205599832063, + "grad_norm": 0.1616763025522232, + "learning_rate": 4.364886678682852e-06, + "loss": 0.9072, + "step": 87740 + }, + { + "epoch": 0.6351929466437924, + "grad_norm": 0.1616402268409729, + "learning_rate": 4.364814292022267e-06, + "loss": 0.8905, + "step": 87750 + }, + { + "epoch": 0.6352653333043786, + "grad_norm": 0.1978462189435959, + "learning_rate": 4.36474190536168e-06, + "loss": 0.9042, + "step": 87760 + }, + { + "epoch": 0.6353377199649649, + "grad_norm": 0.16519363224506378, + "learning_rate": 4.364669518701094e-06, + "loss": 0.8818, + "step": 87770 + }, + { + "epoch": 0.6354101066255511, + "grad_norm": 0.16044704616069794, + "learning_rate": 4.3645971320405075e-06, + "loss": 0.8968, + "step": 87780 + }, + { + "epoch": 0.6354824932861373, + "grad_norm": 0.16723746061325073, + "learning_rate": 4.364524745379922e-06, + "loss": 0.9008, + "step": 87790 + }, + { + "epoch": 0.6355548799467234, + "grad_norm": 0.15763086080551147, + "learning_rate": 4.364452358719336e-06, + "loss": 0.9046, + "step": 87800 + }, + { + "epoch": 0.6356272666073096, + "grad_norm": 0.16559143364429474, + "learning_rate": 4.364379972058749e-06, + "loss": 0.9074, + "step": 87810 + }, + { + "epoch": 0.6356996532678958, + "grad_norm": 0.17605692148208618, + "learning_rate": 4.364307585398163e-06, + "loss": 0.8845, + "step": 87820 + }, + { + "epoch": 0.635772039928482, + "grad_norm": 0.1688181757926941, + "learning_rate": 4.364235198737577e-06, + "loss": 0.9096, + "step": 87830 + }, + { + "epoch": 0.6358444265890681, + "grad_norm": 0.15427358448505402, + "learning_rate": 4.364162812076991e-06, + "loss": 0.9048, + "step": 87840 + }, + { + "epoch": 0.6359168132496543, + "grad_norm": 0.17729778587818146, + "learning_rate": 4.3640904254164045e-06, + "loss": 0.8993, + "step": 87850 + }, + { + "epoch": 0.6359891999102405, + "grad_norm": 0.17065225541591644, + "learning_rate": 4.364018038755818e-06, + "loss": 0.8954, + "step": 87860 + }, + { + "epoch": 0.6360615865708267, + "grad_norm": 0.15446516871452332, + "learning_rate": 4.363945652095233e-06, + "loss": 0.9051, + "step": 87870 + }, + { + "epoch": 0.636133973231413, + "grad_norm": 0.19369186460971832, + "learning_rate": 4.363873265434646e-06, + "loss": 0.8988, + "step": 87880 + }, + { + "epoch": 0.6362063598919991, + "grad_norm": 0.15911339223384857, + "learning_rate": 4.36380087877406e-06, + "loss": 0.9014, + "step": 87890 + }, + { + "epoch": 0.6362787465525853, + "grad_norm": 0.1556956171989441, + "learning_rate": 4.3637284921134735e-06, + "loss": 0.8952, + "step": 87900 + }, + { + "epoch": 0.6363511332131715, + "grad_norm": 0.176447331905365, + "learning_rate": 4.363656105452888e-06, + "loss": 0.8821, + "step": 87910 + }, + { + "epoch": 0.6364235198737577, + "grad_norm": 0.15189889073371887, + "learning_rate": 4.3635837187923015e-06, + "loss": 0.896, + "step": 87920 + }, + { + "epoch": 0.6364959065343438, + "grad_norm": 0.20345427095890045, + "learning_rate": 4.363511332131715e-06, + "loss": 0.9084, + "step": 87930 + }, + { + "epoch": 0.63656829319493, + "grad_norm": 0.1482352763414383, + "learning_rate": 4.363438945471129e-06, + "loss": 0.8937, + "step": 87940 + }, + { + "epoch": 0.6366406798555162, + "grad_norm": 0.16106894612312317, + "learning_rate": 4.363366558810543e-06, + "loss": 0.8992, + "step": 87950 + }, + { + "epoch": 0.6367130665161024, + "grad_norm": 0.16146771609783173, + "learning_rate": 4.363294172149957e-06, + "loss": 0.8968, + "step": 87960 + }, + { + "epoch": 0.6367854531766886, + "grad_norm": 0.15578705072402954, + "learning_rate": 4.3632217854893705e-06, + "loss": 0.9023, + "step": 87970 + }, + { + "epoch": 0.6368578398372748, + "grad_norm": 0.19550129771232605, + "learning_rate": 4.363149398828784e-06, + "loss": 0.9035, + "step": 87980 + }, + { + "epoch": 0.636930226497861, + "grad_norm": 0.18265287578105927, + "learning_rate": 4.3630770121681985e-06, + "loss": 0.9067, + "step": 87990 + }, + { + "epoch": 0.6370026131584472, + "grad_norm": 0.14970242977142334, + "learning_rate": 4.363004625507612e-06, + "loss": 0.9117, + "step": 88000 + }, + { + "epoch": 0.6370749998190334, + "grad_norm": 0.1472383439540863, + "learning_rate": 4.362932238847026e-06, + "loss": 0.8982, + "step": 88010 + }, + { + "epoch": 0.6371473864796195, + "grad_norm": 0.14995744824409485, + "learning_rate": 4.362859852186439e-06, + "loss": 0.8951, + "step": 88020 + }, + { + "epoch": 0.6372197731402057, + "grad_norm": 0.15421155095100403, + "learning_rate": 4.362787465525853e-06, + "loss": 0.8772, + "step": 88030 + }, + { + "epoch": 0.6372921598007919, + "grad_norm": 0.1862330138683319, + "learning_rate": 4.3627150788652675e-06, + "loss": 0.9068, + "step": 88040 + }, + { + "epoch": 0.6373645464613781, + "grad_norm": 0.15791213512420654, + "learning_rate": 4.362642692204681e-06, + "loss": 0.8989, + "step": 88050 + }, + { + "epoch": 0.6374369331219643, + "grad_norm": 0.1469462811946869, + "learning_rate": 4.362570305544095e-06, + "loss": 0.8983, + "step": 88060 + }, + { + "epoch": 0.6375093197825504, + "grad_norm": 0.15179064869880676, + "learning_rate": 4.362497918883508e-06, + "loss": 0.893, + "step": 88070 + }, + { + "epoch": 0.6375817064431366, + "grad_norm": 0.15898427367210388, + "learning_rate": 4.362425532222923e-06, + "loss": 0.8959, + "step": 88080 + }, + { + "epoch": 0.6376540931037229, + "grad_norm": 0.1530866026878357, + "learning_rate": 4.362353145562336e-06, + "loss": 0.8991, + "step": 88090 + }, + { + "epoch": 0.6377264797643091, + "grad_norm": 0.15585550665855408, + "learning_rate": 4.36228075890175e-06, + "loss": 0.8949, + "step": 88100 + }, + { + "epoch": 0.6377988664248952, + "grad_norm": 0.2193233221769333, + "learning_rate": 4.362208372241164e-06, + "loss": 0.9041, + "step": 88110 + }, + { + "epoch": 0.6378712530854814, + "grad_norm": 0.144108384847641, + "learning_rate": 4.362135985580578e-06, + "loss": 0.9028, + "step": 88120 + }, + { + "epoch": 0.6379436397460676, + "grad_norm": 0.18072602152824402, + "learning_rate": 4.362063598919992e-06, + "loss": 0.9006, + "step": 88130 + }, + { + "epoch": 0.6380160264066538, + "grad_norm": 0.15635623037815094, + "learning_rate": 4.361991212259405e-06, + "loss": 0.9059, + "step": 88140 + }, + { + "epoch": 0.63808841306724, + "grad_norm": 0.16746172308921814, + "learning_rate": 4.361918825598819e-06, + "loss": 0.8977, + "step": 88150 + }, + { + "epoch": 0.6381607997278261, + "grad_norm": 0.1613510549068451, + "learning_rate": 4.3618464389382326e-06, + "loss": 0.9092, + "step": 88160 + }, + { + "epoch": 0.6382331863884123, + "grad_norm": 0.1660555601119995, + "learning_rate": 4.361774052277646e-06, + "loss": 0.908, + "step": 88170 + }, + { + "epoch": 0.6383055730489985, + "grad_norm": 0.19197924435138702, + "learning_rate": 4.36170166561706e-06, + "loss": 0.8946, + "step": 88180 + }, + { + "epoch": 0.6383779597095847, + "grad_norm": 0.1495658904314041, + "learning_rate": 4.361629278956474e-06, + "loss": 0.9006, + "step": 88190 + }, + { + "epoch": 0.638450346370171, + "grad_norm": 0.15957580506801605, + "learning_rate": 4.361556892295888e-06, + "loss": 0.9024, + "step": 88200 + }, + { + "epoch": 0.6385227330307571, + "grad_norm": 0.1745164394378662, + "learning_rate": 4.3614845056353015e-06, + "loss": 0.8863, + "step": 88210 + }, + { + "epoch": 0.6385951196913433, + "grad_norm": 0.15032242238521576, + "learning_rate": 4.361412118974715e-06, + "loss": 0.8906, + "step": 88220 + }, + { + "epoch": 0.6386675063519295, + "grad_norm": 0.15983961522579193, + "learning_rate": 4.3613397323141296e-06, + "loss": 0.9114, + "step": 88230 + }, + { + "epoch": 0.6387398930125157, + "grad_norm": 0.21465584635734558, + "learning_rate": 4.361267345653543e-06, + "loss": 0.8914, + "step": 88240 + }, + { + "epoch": 0.6388122796731018, + "grad_norm": 0.15455862879753113, + "learning_rate": 4.361194958992957e-06, + "loss": 0.9126, + "step": 88250 + }, + { + "epoch": 0.638884666333688, + "grad_norm": 0.14769776165485382, + "learning_rate": 4.36112257233237e-06, + "loss": 0.8858, + "step": 88260 + }, + { + "epoch": 0.6389570529942742, + "grad_norm": 1.265124797821045, + "learning_rate": 4.361050185671785e-06, + "loss": 0.8948, + "step": 88270 + }, + { + "epoch": 0.6390294396548604, + "grad_norm": 0.1922004669904709, + "learning_rate": 4.3609777990111985e-06, + "loss": 0.9017, + "step": 88280 + }, + { + "epoch": 0.6391018263154465, + "grad_norm": 0.16533154249191284, + "learning_rate": 4.360905412350612e-06, + "loss": 0.8987, + "step": 88290 + }, + { + "epoch": 0.6391742129760328, + "grad_norm": 0.15893571078777313, + "learning_rate": 4.360833025690026e-06, + "loss": 0.8976, + "step": 88300 + }, + { + "epoch": 0.639246599636619, + "grad_norm": 0.150678813457489, + "learning_rate": 4.36076063902944e-06, + "loss": 0.894, + "step": 88310 + }, + { + "epoch": 0.6393189862972052, + "grad_norm": 0.14992178976535797, + "learning_rate": 4.360688252368854e-06, + "loss": 0.9037, + "step": 88320 + }, + { + "epoch": 0.6393913729577914, + "grad_norm": 0.1573602557182312, + "learning_rate": 4.3606158657082674e-06, + "loss": 0.9089, + "step": 88330 + }, + { + "epoch": 0.6394637596183775, + "grad_norm": 0.1497740000486374, + "learning_rate": 4.360543479047681e-06, + "loss": 0.9014, + "step": 88340 + }, + { + "epoch": 0.6395361462789637, + "grad_norm": 0.1509128361940384, + "learning_rate": 4.3604710923870955e-06, + "loss": 0.9033, + "step": 88350 + }, + { + "epoch": 0.6396085329395499, + "grad_norm": 0.15733207762241364, + "learning_rate": 4.360398705726509e-06, + "loss": 0.9074, + "step": 88360 + }, + { + "epoch": 0.6396809196001361, + "grad_norm": 0.16280068457126617, + "learning_rate": 4.360326319065923e-06, + "loss": 0.9005, + "step": 88370 + }, + { + "epoch": 0.6397533062607222, + "grad_norm": 0.15421195328235626, + "learning_rate": 4.360253932405336e-06, + "loss": 0.9046, + "step": 88380 + }, + { + "epoch": 0.6398256929213084, + "grad_norm": 0.22842425107955933, + "learning_rate": 4.360181545744751e-06, + "loss": 0.9101, + "step": 88390 + }, + { + "epoch": 0.6398980795818946, + "grad_norm": 0.1527772694826126, + "learning_rate": 4.3601091590841644e-06, + "loss": 0.9021, + "step": 88400 + }, + { + "epoch": 0.6399704662424809, + "grad_norm": 0.15494686365127563, + "learning_rate": 4.360036772423578e-06, + "loss": 0.8911, + "step": 88410 + }, + { + "epoch": 0.6400428529030671, + "grad_norm": 0.16015708446502686, + "learning_rate": 4.359964385762992e-06, + "loss": 0.916, + "step": 88420 + }, + { + "epoch": 0.6401152395636532, + "grad_norm": 0.15892557799816132, + "learning_rate": 4.359891999102406e-06, + "loss": 0.8768, + "step": 88430 + }, + { + "epoch": 0.6401876262242394, + "grad_norm": 0.1498948186635971, + "learning_rate": 4.35981961244182e-06, + "loss": 0.9047, + "step": 88440 + }, + { + "epoch": 0.6402600128848256, + "grad_norm": 0.1665375530719757, + "learning_rate": 4.359747225781233e-06, + "loss": 0.8941, + "step": 88450 + }, + { + "epoch": 0.6403323995454118, + "grad_norm": 0.15249140560626984, + "learning_rate": 4.359674839120647e-06, + "loss": 0.8964, + "step": 88460 + }, + { + "epoch": 0.640404786205998, + "grad_norm": 0.1530652791261673, + "learning_rate": 4.3596024524600614e-06, + "loss": 0.9092, + "step": 88470 + }, + { + "epoch": 0.6404771728665841, + "grad_norm": 0.1995309293270111, + "learning_rate": 4.359530065799475e-06, + "loss": 0.8985, + "step": 88480 + }, + { + "epoch": 0.6405495595271703, + "grad_norm": 0.17197419703006744, + "learning_rate": 4.359457679138889e-06, + "loss": 0.9123, + "step": 88490 + }, + { + "epoch": 0.6406219461877565, + "grad_norm": 0.17037421464920044, + "learning_rate": 4.359385292478302e-06, + "loss": 0.8954, + "step": 88500 + }, + { + "epoch": 0.6406943328483428, + "grad_norm": 0.14726117253303528, + "learning_rate": 4.359312905817717e-06, + "loss": 0.9045, + "step": 88510 + }, + { + "epoch": 0.6407667195089289, + "grad_norm": 0.14530207216739655, + "learning_rate": 4.35924051915713e-06, + "loss": 0.8884, + "step": 88520 + }, + { + "epoch": 0.6408391061695151, + "grad_norm": 0.15528494119644165, + "learning_rate": 4.359168132496544e-06, + "loss": 0.8854, + "step": 88530 + }, + { + "epoch": 0.6409114928301013, + "grad_norm": 0.16812381148338318, + "learning_rate": 4.359095745835958e-06, + "loss": 0.9137, + "step": 88540 + }, + { + "epoch": 0.6409838794906875, + "grad_norm": 0.15406684577465057, + "learning_rate": 4.359023359175372e-06, + "loss": 0.9013, + "step": 88550 + }, + { + "epoch": 0.6410562661512736, + "grad_norm": 0.15804636478424072, + "learning_rate": 4.358950972514786e-06, + "loss": 0.903, + "step": 88560 + }, + { + "epoch": 0.6411286528118598, + "grad_norm": 0.15636689960956573, + "learning_rate": 4.358878585854199e-06, + "loss": 0.9006, + "step": 88570 + }, + { + "epoch": 0.641201039472446, + "grad_norm": 0.18669186532497406, + "learning_rate": 4.358806199193613e-06, + "loss": 0.9171, + "step": 88580 + }, + { + "epoch": 0.6412734261330322, + "grad_norm": 0.17967639863491058, + "learning_rate": 4.358733812533027e-06, + "loss": 0.8925, + "step": 88590 + }, + { + "epoch": 0.6413458127936184, + "grad_norm": 0.15877005457878113, + "learning_rate": 4.358661425872441e-06, + "loss": 0.8923, + "step": 88600 + }, + { + "epoch": 0.6414181994542045, + "grad_norm": 0.1480860710144043, + "learning_rate": 4.358589039211855e-06, + "loss": 0.9054, + "step": 88610 + }, + { + "epoch": 0.6414905861147908, + "grad_norm": 0.15211063623428345, + "learning_rate": 4.358516652551268e-06, + "loss": 0.9001, + "step": 88620 + }, + { + "epoch": 0.641562972775377, + "grad_norm": 0.1518232822418213, + "learning_rate": 4.358444265890682e-06, + "loss": 0.9031, + "step": 88630 + }, + { + "epoch": 0.6416353594359632, + "grad_norm": 0.1446765810251236, + "learning_rate": 4.358371879230096e-06, + "loss": 0.9166, + "step": 88640 + }, + { + "epoch": 0.6417077460965493, + "grad_norm": 0.17465344071388245, + "learning_rate": 4.35829949256951e-06, + "loss": 0.8882, + "step": 88650 + }, + { + "epoch": 0.6417801327571355, + "grad_norm": 0.1594177931547165, + "learning_rate": 4.3582271059089235e-06, + "loss": 0.9004, + "step": 88660 + }, + { + "epoch": 0.6418525194177217, + "grad_norm": 0.1680103987455368, + "learning_rate": 4.358154719248337e-06, + "loss": 0.891, + "step": 88670 + }, + { + "epoch": 0.6419249060783079, + "grad_norm": 0.1576722413301468, + "learning_rate": 4.358082332587751e-06, + "loss": 0.9137, + "step": 88680 + }, + { + "epoch": 0.641997292738894, + "grad_norm": 0.14920315146446228, + "learning_rate": 4.358009945927164e-06, + "loss": 0.8867, + "step": 88690 + }, + { + "epoch": 0.6420696793994802, + "grad_norm": 0.15702413022518158, + "learning_rate": 4.357937559266579e-06, + "loss": 0.8899, + "step": 88700 + }, + { + "epoch": 0.6421420660600664, + "grad_norm": 0.14902466535568237, + "learning_rate": 4.3578651726059925e-06, + "loss": 0.9046, + "step": 88710 + }, + { + "epoch": 0.6422144527206526, + "grad_norm": 0.1598339080810547, + "learning_rate": 4.357792785945406e-06, + "loss": 0.896, + "step": 88720 + }, + { + "epoch": 0.6422868393812389, + "grad_norm": 0.15564772486686707, + "learning_rate": 4.35772039928482e-06, + "loss": 0.9102, + "step": 88730 + }, + { + "epoch": 0.642359226041825, + "grad_norm": 0.14430661499500275, + "learning_rate": 4.357648012624234e-06, + "loss": 0.8899, + "step": 88740 + }, + { + "epoch": 0.6424316127024112, + "grad_norm": 0.15602971613407135, + "learning_rate": 4.357575625963648e-06, + "loss": 0.8982, + "step": 88750 + }, + { + "epoch": 0.6425039993629974, + "grad_norm": 0.16379621624946594, + "learning_rate": 4.357503239303061e-06, + "loss": 0.9031, + "step": 88760 + }, + { + "epoch": 0.6425763860235836, + "grad_norm": 0.15604230761528015, + "learning_rate": 4.357430852642475e-06, + "loss": 0.8916, + "step": 88770 + }, + { + "epoch": 0.6426487726841698, + "grad_norm": 0.15678240358829498, + "learning_rate": 4.3573584659818895e-06, + "loss": 0.8947, + "step": 88780 + }, + { + "epoch": 0.6427211593447559, + "grad_norm": 0.1667027622461319, + "learning_rate": 4.357286079321303e-06, + "loss": 0.8967, + "step": 88790 + }, + { + "epoch": 0.6427935460053421, + "grad_norm": 0.1623423546552658, + "learning_rate": 4.357213692660717e-06, + "loss": 0.8882, + "step": 88800 + }, + { + "epoch": 0.6428659326659283, + "grad_norm": 0.15376125276088715, + "learning_rate": 4.35714130600013e-06, + "loss": 0.8956, + "step": 88810 + }, + { + "epoch": 0.6429383193265145, + "grad_norm": 0.1492316871881485, + "learning_rate": 4.357068919339544e-06, + "loss": 0.8974, + "step": 88820 + }, + { + "epoch": 0.6430107059871008, + "grad_norm": 0.1571977287530899, + "learning_rate": 4.356996532678958e-06, + "loss": 0.9096, + "step": 88830 + }, + { + "epoch": 0.6430830926476869, + "grad_norm": 0.1929909884929657, + "learning_rate": 4.356924146018372e-06, + "loss": 0.8849, + "step": 88840 + }, + { + "epoch": 0.6431554793082731, + "grad_norm": 0.15631908178329468, + "learning_rate": 4.356851759357786e-06, + "loss": 0.9151, + "step": 88850 + }, + { + "epoch": 0.6432278659688593, + "grad_norm": 0.14949972927570343, + "learning_rate": 4.356779372697199e-06, + "loss": 0.8962, + "step": 88860 + }, + { + "epoch": 0.6433002526294455, + "grad_norm": 0.16994498670101166, + "learning_rate": 4.356706986036614e-06, + "loss": 0.8988, + "step": 88870 + }, + { + "epoch": 0.6433726392900316, + "grad_norm": 0.16940979659557343, + "learning_rate": 4.356634599376027e-06, + "loss": 0.9085, + "step": 88880 + }, + { + "epoch": 0.6434450259506178, + "grad_norm": 0.15808720886707306, + "learning_rate": 4.356562212715441e-06, + "loss": 0.9043, + "step": 88890 + }, + { + "epoch": 0.643517412611204, + "grad_norm": 0.15764841437339783, + "learning_rate": 4.3564898260548546e-06, + "loss": 0.8932, + "step": 88900 + }, + { + "epoch": 0.6435897992717902, + "grad_norm": 0.15186621248722076, + "learning_rate": 4.356417439394269e-06, + "loss": 0.9061, + "step": 88910 + }, + { + "epoch": 0.6436621859323763, + "grad_norm": 0.14821207523345947, + "learning_rate": 4.356345052733683e-06, + "loss": 0.8926, + "step": 88920 + }, + { + "epoch": 0.6437345725929625, + "grad_norm": 0.15951776504516602, + "learning_rate": 4.356272666073096e-06, + "loss": 0.887, + "step": 88930 + }, + { + "epoch": 0.6438069592535488, + "grad_norm": 0.1618291735649109, + "learning_rate": 4.35620027941251e-06, + "loss": 0.8978, + "step": 88940 + }, + { + "epoch": 0.643879345914135, + "grad_norm": 0.1718137413263321, + "learning_rate": 4.356127892751924e-06, + "loss": 0.902, + "step": 88950 + }, + { + "epoch": 0.6439517325747212, + "grad_norm": 0.1487615555524826, + "learning_rate": 4.356055506091338e-06, + "loss": 0.897, + "step": 88960 + }, + { + "epoch": 0.6440241192353073, + "grad_norm": 0.1924024522304535, + "learning_rate": 4.3559831194307516e-06, + "loss": 0.9099, + "step": 88970 + }, + { + "epoch": 0.6440965058958935, + "grad_norm": 0.15762527287006378, + "learning_rate": 4.355910732770165e-06, + "loss": 0.891, + "step": 88980 + }, + { + "epoch": 0.6441688925564797, + "grad_norm": 0.16442440450191498, + "learning_rate": 4.35583834610958e-06, + "loss": 0.8914, + "step": 88990 + }, + { + "epoch": 0.6442412792170659, + "grad_norm": 0.15198470652103424, + "learning_rate": 4.355765959448993e-06, + "loss": 0.9036, + "step": 89000 + }, + { + "epoch": 0.644313665877652, + "grad_norm": 0.16357183456420898, + "learning_rate": 4.355693572788407e-06, + "loss": 0.9018, + "step": 89010 + }, + { + "epoch": 0.6443860525382382, + "grad_norm": 0.15125641226768494, + "learning_rate": 4.3556211861278205e-06, + "loss": 0.9005, + "step": 89020 + }, + { + "epoch": 0.6444584391988244, + "grad_norm": 0.1542976200580597, + "learning_rate": 4.355548799467235e-06, + "loss": 0.9062, + "step": 89030 + }, + { + "epoch": 0.6445308258594107, + "grad_norm": 0.16405089199543, + "learning_rate": 4.3554764128066486e-06, + "loss": 0.8959, + "step": 89040 + }, + { + "epoch": 0.6446032125199969, + "grad_norm": 0.15201352536678314, + "learning_rate": 4.355404026146062e-06, + "loss": 0.8916, + "step": 89050 + }, + { + "epoch": 0.644675599180583, + "grad_norm": 0.1623867303133011, + "learning_rate": 4.355331639485476e-06, + "loss": 0.889, + "step": 89060 + }, + { + "epoch": 0.6447479858411692, + "grad_norm": 0.1591363102197647, + "learning_rate": 4.35525925282489e-06, + "loss": 0.9057, + "step": 89070 + }, + { + "epoch": 0.6448203725017554, + "grad_norm": 0.16368667781352997, + "learning_rate": 4.355186866164304e-06, + "loss": 0.8983, + "step": 89080 + }, + { + "epoch": 0.6448927591623416, + "grad_norm": 0.15838485956192017, + "learning_rate": 4.3551144795037175e-06, + "loss": 0.8975, + "step": 89090 + }, + { + "epoch": 0.6449651458229277, + "grad_norm": 0.14503420889377594, + "learning_rate": 4.355042092843131e-06, + "loss": 0.8942, + "step": 89100 + }, + { + "epoch": 0.6450375324835139, + "grad_norm": 0.1557312160730362, + "learning_rate": 4.354969706182546e-06, + "loss": 0.8893, + "step": 89110 + }, + { + "epoch": 0.6451099191441001, + "grad_norm": 0.16098271310329437, + "learning_rate": 4.354897319521959e-06, + "loss": 0.9019, + "step": 89120 + }, + { + "epoch": 0.6451823058046863, + "grad_norm": 0.2473098188638687, + "learning_rate": 4.354824932861373e-06, + "loss": 0.8977, + "step": 89130 + }, + { + "epoch": 0.6452546924652725, + "grad_norm": 0.15116575360298157, + "learning_rate": 4.3547525462007864e-06, + "loss": 0.896, + "step": 89140 + }, + { + "epoch": 0.6453270791258587, + "grad_norm": 0.16768385469913483, + "learning_rate": 4.354680159540201e-06, + "loss": 0.8977, + "step": 89150 + }, + { + "epoch": 0.6453994657864449, + "grad_norm": 0.15651331841945648, + "learning_rate": 4.3546077728796145e-06, + "loss": 0.8983, + "step": 89160 + }, + { + "epoch": 0.6454718524470311, + "grad_norm": 0.18052604794502258, + "learning_rate": 4.354535386219028e-06, + "loss": 0.9187, + "step": 89170 + }, + { + "epoch": 0.6455442391076173, + "grad_norm": 0.15085764229297638, + "learning_rate": 4.354462999558442e-06, + "loss": 0.8947, + "step": 89180 + }, + { + "epoch": 0.6456166257682034, + "grad_norm": 0.15222260355949402, + "learning_rate": 4.354390612897856e-06, + "loss": 0.8996, + "step": 89190 + }, + { + "epoch": 0.6456890124287896, + "grad_norm": 0.16957657039165497, + "learning_rate": 4.354318226237269e-06, + "loss": 0.9059, + "step": 89200 + }, + { + "epoch": 0.6457613990893758, + "grad_norm": 0.17988649010658264, + "learning_rate": 4.354245839576683e-06, + "loss": 0.9038, + "step": 89210 + }, + { + "epoch": 0.645833785749962, + "grad_norm": 0.15836915373802185, + "learning_rate": 4.354173452916097e-06, + "loss": 0.9061, + "step": 89220 + }, + { + "epoch": 0.6459061724105482, + "grad_norm": 0.16917668282985687, + "learning_rate": 4.354101066255511e-06, + "loss": 0.9056, + "step": 89230 + }, + { + "epoch": 0.6459785590711343, + "grad_norm": 0.17027825117111206, + "learning_rate": 4.354028679594924e-06, + "loss": 0.8929, + "step": 89240 + }, + { + "epoch": 0.6460509457317205, + "grad_norm": 0.15539249777793884, + "learning_rate": 4.353956292934338e-06, + "loss": 0.8926, + "step": 89250 + }, + { + "epoch": 0.6461233323923068, + "grad_norm": 0.17285488545894623, + "learning_rate": 4.353883906273752e-06, + "loss": 0.9006, + "step": 89260 + }, + { + "epoch": 0.646195719052893, + "grad_norm": 0.15279121696949005, + "learning_rate": 4.353811519613166e-06, + "loss": 0.9059, + "step": 89270 + }, + { + "epoch": 0.6462681057134791, + "grad_norm": 0.16332434117794037, + "learning_rate": 4.35373913295258e-06, + "loss": 0.8942, + "step": 89280 + }, + { + "epoch": 0.6463404923740653, + "grad_norm": 0.15591338276863098, + "learning_rate": 4.353666746291993e-06, + "loss": 0.8902, + "step": 89290 + }, + { + "epoch": 0.6464128790346515, + "grad_norm": 0.15603341162204742, + "learning_rate": 4.353594359631408e-06, + "loss": 0.8933, + "step": 89300 + }, + { + "epoch": 0.6464852656952377, + "grad_norm": 0.17558693885803223, + "learning_rate": 4.353521972970821e-06, + "loss": 0.8918, + "step": 89310 + }, + { + "epoch": 0.6465576523558239, + "grad_norm": 0.1703271120786667, + "learning_rate": 4.353449586310235e-06, + "loss": 0.8969, + "step": 89320 + }, + { + "epoch": 0.64663003901641, + "grad_norm": 0.15776929259300232, + "learning_rate": 4.3533771996496485e-06, + "loss": 0.8974, + "step": 89330 + }, + { + "epoch": 0.6467024256769962, + "grad_norm": 0.1533646285533905, + "learning_rate": 4.353304812989063e-06, + "loss": 0.8994, + "step": 89340 + }, + { + "epoch": 0.6467748123375824, + "grad_norm": 0.3359949290752411, + "learning_rate": 4.353232426328477e-06, + "loss": 0.9038, + "step": 89350 + }, + { + "epoch": 0.6468471989981687, + "grad_norm": 0.1611877977848053, + "learning_rate": 4.35316003966789e-06, + "loss": 0.8968, + "step": 89360 + }, + { + "epoch": 0.6469195856587548, + "grad_norm": 0.1490904539823532, + "learning_rate": 4.353087653007304e-06, + "loss": 0.8771, + "step": 89370 + }, + { + "epoch": 0.646991972319341, + "grad_norm": 0.15412895381450653, + "learning_rate": 4.353015266346718e-06, + "loss": 0.9061, + "step": 89380 + }, + { + "epoch": 0.6470643589799272, + "grad_norm": 0.16382527351379395, + "learning_rate": 4.352942879686132e-06, + "loss": 0.8989, + "step": 89390 + }, + { + "epoch": 0.6471367456405134, + "grad_norm": 0.15625497698783875, + "learning_rate": 4.3528704930255455e-06, + "loss": 0.9106, + "step": 89400 + }, + { + "epoch": 0.6472091323010996, + "grad_norm": 0.149552121758461, + "learning_rate": 4.352798106364959e-06, + "loss": 0.9041, + "step": 89410 + }, + { + "epoch": 0.6472815189616857, + "grad_norm": 0.16269992291927338, + "learning_rate": 4.352725719704373e-06, + "loss": 0.9069, + "step": 89420 + }, + { + "epoch": 0.6473539056222719, + "grad_norm": 0.15287677943706512, + "learning_rate": 4.352653333043787e-06, + "loss": 0.8955, + "step": 89430 + }, + { + "epoch": 0.6474262922828581, + "grad_norm": 0.1846160590648651, + "learning_rate": 4.352580946383201e-06, + "loss": 0.8977, + "step": 89440 + }, + { + "epoch": 0.6474986789434443, + "grad_norm": 0.15473540127277374, + "learning_rate": 4.3525085597226145e-06, + "loss": 0.8942, + "step": 89450 + }, + { + "epoch": 0.6475710656040304, + "grad_norm": 0.16567891836166382, + "learning_rate": 4.352436173062028e-06, + "loss": 0.9138, + "step": 89460 + }, + { + "epoch": 0.6476434522646167, + "grad_norm": 0.15014810860157013, + "learning_rate": 4.3523637864014425e-06, + "loss": 0.9014, + "step": 89470 + }, + { + "epoch": 0.6477158389252029, + "grad_norm": 0.15278102457523346, + "learning_rate": 4.352291399740856e-06, + "loss": 0.8891, + "step": 89480 + }, + { + "epoch": 0.6477882255857891, + "grad_norm": 0.15812081098556519, + "learning_rate": 4.35221901308027e-06, + "loss": 0.9111, + "step": 89490 + }, + { + "epoch": 0.6478606122463753, + "grad_norm": 0.15760880708694458, + "learning_rate": 4.352146626419683e-06, + "loss": 0.8998, + "step": 89500 + }, + { + "epoch": 0.6479329989069614, + "grad_norm": 0.15096792578697205, + "learning_rate": 4.352074239759098e-06, + "loss": 0.8996, + "step": 89510 + }, + { + "epoch": 0.6480053855675476, + "grad_norm": 0.16857214272022247, + "learning_rate": 4.3520018530985115e-06, + "loss": 0.8953, + "step": 89520 + }, + { + "epoch": 0.6480777722281338, + "grad_norm": 0.16111542284488678, + "learning_rate": 4.351929466437925e-06, + "loss": 0.8887, + "step": 89530 + }, + { + "epoch": 0.64815015888872, + "grad_norm": 0.15649932622909546, + "learning_rate": 4.351857079777339e-06, + "loss": 0.8869, + "step": 89540 + }, + { + "epoch": 0.6482225455493061, + "grad_norm": 0.1554279923439026, + "learning_rate": 4.351784693116753e-06, + "loss": 0.8913, + "step": 89550 + }, + { + "epoch": 0.6482949322098923, + "grad_norm": 0.15444056689739227, + "learning_rate": 4.351712306456167e-06, + "loss": 0.8978, + "step": 89560 + }, + { + "epoch": 0.6483673188704785, + "grad_norm": 0.16410863399505615, + "learning_rate": 4.35163991979558e-06, + "loss": 0.9021, + "step": 89570 + }, + { + "epoch": 0.6484397055310648, + "grad_norm": 0.181893453001976, + "learning_rate": 4.351567533134994e-06, + "loss": 0.8952, + "step": 89580 + }, + { + "epoch": 0.648512092191651, + "grad_norm": 0.15560483932495117, + "learning_rate": 4.3514951464744085e-06, + "loss": 0.8824, + "step": 89590 + }, + { + "epoch": 0.6485844788522371, + "grad_norm": 0.16299884021282196, + "learning_rate": 4.351422759813822e-06, + "loss": 0.8884, + "step": 89600 + }, + { + "epoch": 0.6486568655128233, + "grad_norm": 0.16149957478046417, + "learning_rate": 4.351350373153236e-06, + "loss": 0.8942, + "step": 89610 + }, + { + "epoch": 0.6487292521734095, + "grad_norm": 0.15844495594501495, + "learning_rate": 4.351277986492649e-06, + "loss": 0.8916, + "step": 89620 + }, + { + "epoch": 0.6488016388339957, + "grad_norm": 0.15929342806339264, + "learning_rate": 4.351205599832064e-06, + "loss": 0.9046, + "step": 89630 + }, + { + "epoch": 0.6488740254945818, + "grad_norm": 0.14573869109153748, + "learning_rate": 4.351133213171477e-06, + "loss": 0.8981, + "step": 89640 + }, + { + "epoch": 0.648946412155168, + "grad_norm": 0.14629115164279938, + "learning_rate": 4.351060826510891e-06, + "loss": 0.9027, + "step": 89650 + }, + { + "epoch": 0.6490187988157542, + "grad_norm": 0.16613242030143738, + "learning_rate": 4.350988439850305e-06, + "loss": 0.8883, + "step": 89660 + }, + { + "epoch": 0.6490911854763404, + "grad_norm": 0.18370899558067322, + "learning_rate": 4.350916053189719e-06, + "loss": 0.8997, + "step": 89670 + }, + { + "epoch": 0.6491635721369267, + "grad_norm": 0.15053485333919525, + "learning_rate": 4.350843666529133e-06, + "loss": 0.8909, + "step": 89680 + }, + { + "epoch": 0.6492359587975128, + "grad_norm": 0.1765306442975998, + "learning_rate": 4.350771279868546e-06, + "loss": 0.8805, + "step": 89690 + }, + { + "epoch": 0.649308345458099, + "grad_norm": 0.1697237193584442, + "learning_rate": 4.35069889320796e-06, + "loss": 0.9051, + "step": 89700 + }, + { + "epoch": 0.6493807321186852, + "grad_norm": 0.15003421902656555, + "learning_rate": 4.350626506547374e-06, + "loss": 0.9008, + "step": 89710 + }, + { + "epoch": 0.6494531187792714, + "grad_norm": 0.15888899564743042, + "learning_rate": 4.350554119886788e-06, + "loss": 0.9043, + "step": 89720 + }, + { + "epoch": 0.6495255054398575, + "grad_norm": 0.15244218707084656, + "learning_rate": 4.350481733226202e-06, + "loss": 0.898, + "step": 89730 + }, + { + "epoch": 0.6495978921004437, + "grad_norm": 0.1715979427099228, + "learning_rate": 4.350409346565615e-06, + "loss": 0.8955, + "step": 89740 + }, + { + "epoch": 0.6496702787610299, + "grad_norm": 0.15656600892543793, + "learning_rate": 4.350336959905029e-06, + "loss": 0.8979, + "step": 89750 + }, + { + "epoch": 0.6497426654216161, + "grad_norm": 0.1492892950773239, + "learning_rate": 4.3502645732444425e-06, + "loss": 0.8935, + "step": 89760 + }, + { + "epoch": 0.6498150520822022, + "grad_norm": 0.15817688405513763, + "learning_rate": 4.350192186583856e-06, + "loss": 0.8975, + "step": 89770 + }, + { + "epoch": 0.6498874387427884, + "grad_norm": 0.17220188677310944, + "learning_rate": 4.3501197999232706e-06, + "loss": 0.904, + "step": 89780 + }, + { + "epoch": 0.6499598254033747, + "grad_norm": 0.15756267309188843, + "learning_rate": 4.350047413262684e-06, + "loss": 0.8945, + "step": 89790 + }, + { + "epoch": 0.6500322120639609, + "grad_norm": 0.14775143563747406, + "learning_rate": 4.349975026602098e-06, + "loss": 0.892, + "step": 89800 + }, + { + "epoch": 0.6501045987245471, + "grad_norm": 0.14889222383499146, + "learning_rate": 4.3499026399415114e-06, + "loss": 0.883, + "step": 89810 + }, + { + "epoch": 0.6501769853851332, + "grad_norm": 0.15578392148017883, + "learning_rate": 4.349830253280926e-06, + "loss": 0.8863, + "step": 89820 + }, + { + "epoch": 0.6502493720457194, + "grad_norm": 0.15579867362976074, + "learning_rate": 4.3497578666203395e-06, + "loss": 0.9008, + "step": 89830 + }, + { + "epoch": 0.6503217587063056, + "grad_norm": 0.1711921989917755, + "learning_rate": 4.349685479959753e-06, + "loss": 0.9018, + "step": 89840 + }, + { + "epoch": 0.6503941453668918, + "grad_norm": 0.17373885214328766, + "learning_rate": 4.349613093299167e-06, + "loss": 0.8876, + "step": 89850 + }, + { + "epoch": 0.650466532027478, + "grad_norm": 0.15983055531978607, + "learning_rate": 4.349540706638581e-06, + "loss": 0.9056, + "step": 89860 + }, + { + "epoch": 0.6505389186880641, + "grad_norm": 0.17014628648757935, + "learning_rate": 4.349468319977995e-06, + "loss": 0.8816, + "step": 89870 + }, + { + "epoch": 0.6506113053486503, + "grad_norm": 0.15592381358146667, + "learning_rate": 4.3493959333174084e-06, + "loss": 0.8784, + "step": 89880 + }, + { + "epoch": 0.6506836920092366, + "grad_norm": 0.16757424175739288, + "learning_rate": 4.349323546656822e-06, + "loss": 0.8985, + "step": 89890 + }, + { + "epoch": 0.6507560786698228, + "grad_norm": 0.15414102375507355, + "learning_rate": 4.3492511599962365e-06, + "loss": 0.8938, + "step": 89900 + }, + { + "epoch": 0.650828465330409, + "grad_norm": 0.16440652310848236, + "learning_rate": 4.34917877333565e-06, + "loss": 0.9051, + "step": 89910 + }, + { + "epoch": 0.6509008519909951, + "grad_norm": 0.17210443317890167, + "learning_rate": 4.349106386675064e-06, + "loss": 0.8878, + "step": 89920 + }, + { + "epoch": 0.6509732386515813, + "grad_norm": 0.17794910073280334, + "learning_rate": 4.349034000014477e-06, + "loss": 0.9026, + "step": 89930 + }, + { + "epoch": 0.6510456253121675, + "grad_norm": 0.15437255799770355, + "learning_rate": 4.348961613353892e-06, + "loss": 0.9072, + "step": 89940 + }, + { + "epoch": 0.6511180119727537, + "grad_norm": 0.15506230294704437, + "learning_rate": 4.3488892266933054e-06, + "loss": 0.8998, + "step": 89950 + }, + { + "epoch": 0.6511903986333398, + "grad_norm": 0.1564485728740692, + "learning_rate": 4.348816840032719e-06, + "loss": 0.8746, + "step": 89960 + }, + { + "epoch": 0.651262785293926, + "grad_norm": 0.15721721947193146, + "learning_rate": 4.348744453372133e-06, + "loss": 0.8953, + "step": 89970 + }, + { + "epoch": 0.6513351719545122, + "grad_norm": 0.16001653671264648, + "learning_rate": 4.348672066711547e-06, + "loss": 0.9131, + "step": 89980 + }, + { + "epoch": 0.6514075586150984, + "grad_norm": 0.15154358744621277, + "learning_rate": 4.348599680050961e-06, + "loss": 0.8969, + "step": 89990 + }, + { + "epoch": 0.6514799452756846, + "grad_norm": 0.17069634795188904, + "learning_rate": 4.348527293390374e-06, + "loss": 0.9152, + "step": 90000 + }, + { + "epoch": 0.6515523319362708, + "grad_norm": 0.16475622355937958, + "learning_rate": 4.348454906729788e-06, + "loss": 0.9022, + "step": 90010 + }, + { + "epoch": 0.651624718596857, + "grad_norm": 0.16313569247722626, + "learning_rate": 4.3483825200692024e-06, + "loss": 0.8895, + "step": 90020 + }, + { + "epoch": 0.6516971052574432, + "grad_norm": 0.15203681588172913, + "learning_rate": 4.348310133408616e-06, + "loss": 0.8818, + "step": 90030 + }, + { + "epoch": 0.6517694919180294, + "grad_norm": 0.1589788943529129, + "learning_rate": 4.34823774674803e-06, + "loss": 0.9019, + "step": 90040 + }, + { + "epoch": 0.6518418785786155, + "grad_norm": 0.1522030234336853, + "learning_rate": 4.348165360087443e-06, + "loss": 0.8986, + "step": 90050 + }, + { + "epoch": 0.6519142652392017, + "grad_norm": 0.15555304288864136, + "learning_rate": 4.348092973426857e-06, + "loss": 0.9048, + "step": 90060 + }, + { + "epoch": 0.6519866518997879, + "grad_norm": 0.15009433031082153, + "learning_rate": 4.348020586766271e-06, + "loss": 0.9054, + "step": 90070 + }, + { + "epoch": 0.6520590385603741, + "grad_norm": 0.1470109075307846, + "learning_rate": 4.347948200105685e-06, + "loss": 0.9064, + "step": 90080 + }, + { + "epoch": 0.6521314252209602, + "grad_norm": 0.17592494189739227, + "learning_rate": 4.347875813445099e-06, + "loss": 0.8949, + "step": 90090 + }, + { + "epoch": 0.6522038118815464, + "grad_norm": 0.14667287468910217, + "learning_rate": 4.347803426784512e-06, + "loss": 0.9068, + "step": 90100 + }, + { + "epoch": 0.6522761985421327, + "grad_norm": 0.15610606968402863, + "learning_rate": 4.347731040123927e-06, + "loss": 0.8909, + "step": 90110 + }, + { + "epoch": 0.6523485852027189, + "grad_norm": 0.17158207297325134, + "learning_rate": 4.34765865346334e-06, + "loss": 0.8969, + "step": 90120 + }, + { + "epoch": 0.652420971863305, + "grad_norm": 0.15405572950839996, + "learning_rate": 4.347586266802754e-06, + "loss": 0.9042, + "step": 90130 + }, + { + "epoch": 0.6524933585238912, + "grad_norm": 0.1571338176727295, + "learning_rate": 4.3475138801421675e-06, + "loss": 0.8955, + "step": 90140 + }, + { + "epoch": 0.6525657451844774, + "grad_norm": 0.14696629345417023, + "learning_rate": 4.347441493481582e-06, + "loss": 0.8909, + "step": 90150 + }, + { + "epoch": 0.6526381318450636, + "grad_norm": 0.16139934957027435, + "learning_rate": 4.347369106820996e-06, + "loss": 0.8968, + "step": 90160 + }, + { + "epoch": 0.6527105185056498, + "grad_norm": 0.15784136950969696, + "learning_rate": 4.347296720160409e-06, + "loss": 0.9019, + "step": 90170 + }, + { + "epoch": 0.6527829051662359, + "grad_norm": 0.1769580841064453, + "learning_rate": 4.347224333499823e-06, + "loss": 0.9009, + "step": 90180 + }, + { + "epoch": 0.6528552918268221, + "grad_norm": 0.15210415422916412, + "learning_rate": 4.347151946839237e-06, + "loss": 0.8969, + "step": 90190 + }, + { + "epoch": 0.6529276784874083, + "grad_norm": 0.15724553167819977, + "learning_rate": 4.347079560178651e-06, + "loss": 0.8949, + "step": 90200 + }, + { + "epoch": 0.6530000651479946, + "grad_norm": 0.15788982808589935, + "learning_rate": 4.3470071735180645e-06, + "loss": 0.8914, + "step": 90210 + }, + { + "epoch": 0.6530724518085808, + "grad_norm": 0.16926227509975433, + "learning_rate": 4.346934786857478e-06, + "loss": 0.8868, + "step": 90220 + }, + { + "epoch": 0.6531448384691669, + "grad_norm": 0.16521447896957397, + "learning_rate": 4.346862400196893e-06, + "loss": 0.8966, + "step": 90230 + }, + { + "epoch": 0.6532172251297531, + "grad_norm": 0.17298093438148499, + "learning_rate": 4.346790013536306e-06, + "loss": 0.8916, + "step": 90240 + }, + { + "epoch": 0.6532896117903393, + "grad_norm": 0.1661769151687622, + "learning_rate": 4.34671762687572e-06, + "loss": 0.8891, + "step": 90250 + }, + { + "epoch": 0.6533619984509255, + "grad_norm": 0.16557759046554565, + "learning_rate": 4.3466452402151335e-06, + "loss": 0.9243, + "step": 90260 + }, + { + "epoch": 0.6534343851115116, + "grad_norm": 0.15999002754688263, + "learning_rate": 4.346572853554547e-06, + "loss": 0.8908, + "step": 90270 + }, + { + "epoch": 0.6535067717720978, + "grad_norm": 0.15427353978157043, + "learning_rate": 4.346500466893961e-06, + "loss": 0.9055, + "step": 90280 + }, + { + "epoch": 0.653579158432684, + "grad_norm": 0.16352301836013794, + "learning_rate": 4.346428080233374e-06, + "loss": 0.8789, + "step": 90290 + }, + { + "epoch": 0.6536515450932702, + "grad_norm": 0.15975026786327362, + "learning_rate": 4.346355693572789e-06, + "loss": 0.9023, + "step": 90300 + }, + { + "epoch": 0.6537239317538563, + "grad_norm": 0.17519934475421906, + "learning_rate": 4.346283306912202e-06, + "loss": 0.903, + "step": 90310 + }, + { + "epoch": 0.6537963184144426, + "grad_norm": 0.1745595932006836, + "learning_rate": 4.346210920251616e-06, + "loss": 0.9025, + "step": 90320 + }, + { + "epoch": 0.6538687050750288, + "grad_norm": 0.16801311075687408, + "learning_rate": 4.34613853359103e-06, + "loss": 0.8968, + "step": 90330 + }, + { + "epoch": 0.653941091735615, + "grad_norm": 0.1629055142402649, + "learning_rate": 4.346066146930444e-06, + "loss": 0.8973, + "step": 90340 + }, + { + "epoch": 0.6540134783962012, + "grad_norm": 0.17443734407424927, + "learning_rate": 4.345993760269858e-06, + "loss": 0.8937, + "step": 90350 + }, + { + "epoch": 0.6540858650567873, + "grad_norm": 0.15698504447937012, + "learning_rate": 4.345921373609271e-06, + "loss": 0.902, + "step": 90360 + }, + { + "epoch": 0.6541582517173735, + "grad_norm": 0.1658298522233963, + "learning_rate": 4.345848986948685e-06, + "loss": 0.9065, + "step": 90370 + }, + { + "epoch": 0.6542306383779597, + "grad_norm": 0.17639166116714478, + "learning_rate": 4.345776600288099e-06, + "loss": 0.8894, + "step": 90380 + }, + { + "epoch": 0.6543030250385459, + "grad_norm": 0.17266248166561127, + "learning_rate": 4.345704213627513e-06, + "loss": 0.8957, + "step": 90390 + }, + { + "epoch": 0.654375411699132, + "grad_norm": 0.1632227897644043, + "learning_rate": 4.345631826966927e-06, + "loss": 0.8963, + "step": 90400 + }, + { + "epoch": 0.6544477983597182, + "grad_norm": 0.16069155931472778, + "learning_rate": 4.34555944030634e-06, + "loss": 0.9013, + "step": 90410 + }, + { + "epoch": 0.6545201850203045, + "grad_norm": 0.1587107628583908, + "learning_rate": 4.345487053645755e-06, + "loss": 0.8857, + "step": 90420 + }, + { + "epoch": 0.6545925716808907, + "grad_norm": 0.16049619019031525, + "learning_rate": 4.345414666985168e-06, + "loss": 0.9042, + "step": 90430 + }, + { + "epoch": 0.6546649583414769, + "grad_norm": 0.16169418394565582, + "learning_rate": 4.345342280324582e-06, + "loss": 0.8816, + "step": 90440 + }, + { + "epoch": 0.654737345002063, + "grad_norm": 0.17584997415542603, + "learning_rate": 4.3452698936639956e-06, + "loss": 0.8948, + "step": 90450 + }, + { + "epoch": 0.6548097316626492, + "grad_norm": 0.15903721749782562, + "learning_rate": 4.34519750700341e-06, + "loss": 0.8819, + "step": 90460 + }, + { + "epoch": 0.6548821183232354, + "grad_norm": 0.1641675978899002, + "learning_rate": 4.345125120342824e-06, + "loss": 0.8808, + "step": 90470 + }, + { + "epoch": 0.6549545049838216, + "grad_norm": 0.15496835112571716, + "learning_rate": 4.345052733682237e-06, + "loss": 0.8954, + "step": 90480 + }, + { + "epoch": 0.6550268916444077, + "grad_norm": 0.15261691808700562, + "learning_rate": 4.344980347021651e-06, + "loss": 0.9, + "step": 90490 + }, + { + "epoch": 0.6550992783049939, + "grad_norm": 0.15243463218212128, + "learning_rate": 4.344907960361065e-06, + "loss": 0.9019, + "step": 90500 + }, + { + "epoch": 0.6551716649655801, + "grad_norm": 0.16035859286785126, + "learning_rate": 4.344835573700479e-06, + "loss": 0.9019, + "step": 90510 + }, + { + "epoch": 0.6552440516261663, + "grad_norm": 0.17638221383094788, + "learning_rate": 4.3447631870398926e-06, + "loss": 0.9142, + "step": 90520 + }, + { + "epoch": 0.6553164382867526, + "grad_norm": 0.15220975875854492, + "learning_rate": 4.344690800379306e-06, + "loss": 0.9064, + "step": 90530 + }, + { + "epoch": 0.6553888249473387, + "grad_norm": 0.14705011248588562, + "learning_rate": 4.344618413718721e-06, + "loss": 0.8898, + "step": 90540 + }, + { + "epoch": 0.6554612116079249, + "grad_norm": 0.164079487323761, + "learning_rate": 4.344546027058134e-06, + "loss": 0.905, + "step": 90550 + }, + { + "epoch": 0.6555335982685111, + "grad_norm": 0.15466266870498657, + "learning_rate": 4.344473640397548e-06, + "loss": 0.9071, + "step": 90560 + }, + { + "epoch": 0.6556059849290973, + "grad_norm": 0.1539478898048401, + "learning_rate": 4.3444012537369615e-06, + "loss": 0.8988, + "step": 90570 + }, + { + "epoch": 0.6556783715896835, + "grad_norm": 0.15907570719718933, + "learning_rate": 4.344328867076376e-06, + "loss": 0.8937, + "step": 90580 + }, + { + "epoch": 0.6557507582502696, + "grad_norm": 0.16037173569202423, + "learning_rate": 4.34425648041579e-06, + "loss": 0.8899, + "step": 90590 + }, + { + "epoch": 0.6558231449108558, + "grad_norm": 0.1608818769454956, + "learning_rate": 4.344184093755203e-06, + "loss": 0.8924, + "step": 90600 + }, + { + "epoch": 0.655895531571442, + "grad_norm": 0.17891094088554382, + "learning_rate": 4.344111707094617e-06, + "loss": 0.904, + "step": 90610 + }, + { + "epoch": 0.6559679182320282, + "grad_norm": 0.15963943302631378, + "learning_rate": 4.344039320434031e-06, + "loss": 0.8965, + "step": 90620 + }, + { + "epoch": 0.6560403048926143, + "grad_norm": 0.1626274734735489, + "learning_rate": 4.343966933773445e-06, + "loss": 0.8977, + "step": 90630 + }, + { + "epoch": 0.6561126915532006, + "grad_norm": 0.14938683807849884, + "learning_rate": 4.3438945471128585e-06, + "loss": 0.8972, + "step": 90640 + }, + { + "epoch": 0.6561850782137868, + "grad_norm": 0.15236683189868927, + "learning_rate": 4.343822160452272e-06, + "loss": 0.8866, + "step": 90650 + }, + { + "epoch": 0.656257464874373, + "grad_norm": 0.1745002120733261, + "learning_rate": 4.343749773791686e-06, + "loss": 0.8951, + "step": 90660 + }, + { + "epoch": 0.6563298515349592, + "grad_norm": 0.15428422391414642, + "learning_rate": 4.3436773871311e-06, + "loss": 0.9125, + "step": 90670 + }, + { + "epoch": 0.6564022381955453, + "grad_norm": 0.18317624926567078, + "learning_rate": 4.343605000470514e-06, + "loss": 0.8919, + "step": 90680 + }, + { + "epoch": 0.6564746248561315, + "grad_norm": 0.15780498087406158, + "learning_rate": 4.3435326138099274e-06, + "loss": 0.9001, + "step": 90690 + }, + { + "epoch": 0.6565470115167177, + "grad_norm": 0.1752166748046875, + "learning_rate": 4.343460227149341e-06, + "loss": 0.9079, + "step": 90700 + }, + { + "epoch": 0.6566193981773039, + "grad_norm": 0.17048229277133942, + "learning_rate": 4.3433878404887555e-06, + "loss": 0.8877, + "step": 90710 + }, + { + "epoch": 0.65669178483789, + "grad_norm": 0.15478329360485077, + "learning_rate": 4.343315453828169e-06, + "loss": 0.8885, + "step": 90720 + }, + { + "epoch": 0.6567641714984762, + "grad_norm": 0.15634417533874512, + "learning_rate": 4.343243067167583e-06, + "loss": 0.8944, + "step": 90730 + }, + { + "epoch": 0.6568365581590625, + "grad_norm": 0.15285581350326538, + "learning_rate": 4.343170680506996e-06, + "loss": 0.9014, + "step": 90740 + }, + { + "epoch": 0.6569089448196487, + "grad_norm": 0.1574464589357376, + "learning_rate": 4.343098293846411e-06, + "loss": 0.8962, + "step": 90750 + }, + { + "epoch": 0.6569813314802349, + "grad_norm": 0.19475358724594116, + "learning_rate": 4.3430259071858244e-06, + "loss": 0.8974, + "step": 90760 + }, + { + "epoch": 0.657053718140821, + "grad_norm": 0.21129895746707916, + "learning_rate": 4.342953520525238e-06, + "loss": 0.8898, + "step": 90770 + }, + { + "epoch": 0.6571261048014072, + "grad_norm": 0.1641937643289566, + "learning_rate": 4.342881133864652e-06, + "loss": 0.8914, + "step": 90780 + }, + { + "epoch": 0.6571984914619934, + "grad_norm": 0.158699169754982, + "learning_rate": 4.342808747204066e-06, + "loss": 0.8855, + "step": 90790 + }, + { + "epoch": 0.6572708781225796, + "grad_norm": 0.1632438600063324, + "learning_rate": 4.342736360543479e-06, + "loss": 0.8882, + "step": 90800 + }, + { + "epoch": 0.6573432647831657, + "grad_norm": 0.16449430584907532, + "learning_rate": 4.342663973882893e-06, + "loss": 0.8886, + "step": 90810 + }, + { + "epoch": 0.6574156514437519, + "grad_norm": 0.16445982456207275, + "learning_rate": 4.342591587222307e-06, + "loss": 0.8873, + "step": 90820 + }, + { + "epoch": 0.6574880381043381, + "grad_norm": 0.18143968284130096, + "learning_rate": 4.342519200561721e-06, + "loss": 0.9047, + "step": 90830 + }, + { + "epoch": 0.6575604247649243, + "grad_norm": 0.15910190343856812, + "learning_rate": 4.342446813901134e-06, + "loss": 0.8998, + "step": 90840 + }, + { + "epoch": 0.6576328114255106, + "grad_norm": 0.1646713763475418, + "learning_rate": 4.342374427240548e-06, + "loss": 0.9012, + "step": 90850 + }, + { + "epoch": 0.6577051980860967, + "grad_norm": 0.1441211849451065, + "learning_rate": 4.342302040579962e-06, + "loss": 0.897, + "step": 90860 + }, + { + "epoch": 0.6577775847466829, + "grad_norm": 0.15770696103572845, + "learning_rate": 4.342229653919376e-06, + "loss": 0.9049, + "step": 90870 + }, + { + "epoch": 0.6578499714072691, + "grad_norm": 0.1501597911119461, + "learning_rate": 4.3421572672587895e-06, + "loss": 0.901, + "step": 90880 + }, + { + "epoch": 0.6579223580678553, + "grad_norm": 0.16416409611701965, + "learning_rate": 4.342084880598203e-06, + "loss": 0.9032, + "step": 90890 + }, + { + "epoch": 0.6579947447284414, + "grad_norm": 0.16274628043174744, + "learning_rate": 4.342012493937618e-06, + "loss": 0.887, + "step": 90900 + }, + { + "epoch": 0.6580671313890276, + "grad_norm": 0.14996199309825897, + "learning_rate": 4.341940107277031e-06, + "loss": 0.8815, + "step": 90910 + }, + { + "epoch": 0.6581395180496138, + "grad_norm": 0.2303011268377304, + "learning_rate": 4.341867720616445e-06, + "loss": 0.8799, + "step": 90920 + }, + { + "epoch": 0.6582119047102, + "grad_norm": 0.14957544207572937, + "learning_rate": 4.3417953339558585e-06, + "loss": 0.903, + "step": 90930 + }, + { + "epoch": 0.6582842913707861, + "grad_norm": 0.15761502087116241, + "learning_rate": 4.341722947295273e-06, + "loss": 0.9141, + "step": 90940 + }, + { + "epoch": 0.6583566780313724, + "grad_norm": 0.17332914471626282, + "learning_rate": 4.3416505606346865e-06, + "loss": 0.8998, + "step": 90950 + }, + { + "epoch": 0.6584290646919586, + "grad_norm": 0.1537407785654068, + "learning_rate": 4.3415781739741e-06, + "loss": 0.8852, + "step": 90960 + }, + { + "epoch": 0.6585014513525448, + "grad_norm": 0.14254407584667206, + "learning_rate": 4.341505787313514e-06, + "loss": 0.8973, + "step": 90970 + }, + { + "epoch": 0.658573838013131, + "grad_norm": 0.1535002887248993, + "learning_rate": 4.341433400652928e-06, + "loss": 0.8981, + "step": 90980 + }, + { + "epoch": 0.6586462246737171, + "grad_norm": 0.15489903092384338, + "learning_rate": 4.341361013992342e-06, + "loss": 0.8854, + "step": 90990 + }, + { + "epoch": 0.6587186113343033, + "grad_norm": 0.16978639364242554, + "learning_rate": 4.3412886273317555e-06, + "loss": 0.9023, + "step": 91000 + }, + { + "epoch": 0.6587909979948895, + "grad_norm": 0.1680452525615692, + "learning_rate": 4.341216240671169e-06, + "loss": 0.9008, + "step": 91010 + }, + { + "epoch": 0.6588633846554757, + "grad_norm": 0.15573959052562714, + "learning_rate": 4.3411438540105835e-06, + "loss": 0.889, + "step": 91020 + }, + { + "epoch": 0.6589357713160618, + "grad_norm": 0.1434423178434372, + "learning_rate": 4.341071467349997e-06, + "loss": 0.8804, + "step": 91030 + }, + { + "epoch": 0.659008157976648, + "grad_norm": 0.16832110285758972, + "learning_rate": 4.340999080689411e-06, + "loss": 0.9065, + "step": 91040 + }, + { + "epoch": 0.6590805446372342, + "grad_norm": 0.15029281377792358, + "learning_rate": 4.340926694028824e-06, + "loss": 0.8851, + "step": 91050 + }, + { + "epoch": 0.6591529312978205, + "grad_norm": 0.1768171489238739, + "learning_rate": 4.340854307368239e-06, + "loss": 0.9025, + "step": 91060 + }, + { + "epoch": 0.6592253179584067, + "grad_norm": 0.14737635850906372, + "learning_rate": 4.3407819207076525e-06, + "loss": 0.9048, + "step": 91070 + }, + { + "epoch": 0.6592977046189928, + "grad_norm": 0.1616610735654831, + "learning_rate": 4.340709534047066e-06, + "loss": 0.9036, + "step": 91080 + }, + { + "epoch": 0.659370091279579, + "grad_norm": 0.15665307641029358, + "learning_rate": 4.34063714738648e-06, + "loss": 0.8956, + "step": 91090 + }, + { + "epoch": 0.6594424779401652, + "grad_norm": 0.15417052805423737, + "learning_rate": 4.340564760725894e-06, + "loss": 0.9022, + "step": 91100 + }, + { + "epoch": 0.6595148646007514, + "grad_norm": 0.14735175669193268, + "learning_rate": 4.340492374065308e-06, + "loss": 0.8875, + "step": 91110 + }, + { + "epoch": 0.6595872512613375, + "grad_norm": 0.18449713289737701, + "learning_rate": 4.340419987404721e-06, + "loss": 0.8886, + "step": 91120 + }, + { + "epoch": 0.6596596379219237, + "grad_norm": 0.15880465507507324, + "learning_rate": 4.340347600744135e-06, + "loss": 0.8961, + "step": 91130 + }, + { + "epoch": 0.6597320245825099, + "grad_norm": 0.1495341658592224, + "learning_rate": 4.3402752140835495e-06, + "loss": 0.8861, + "step": 91140 + }, + { + "epoch": 0.6598044112430961, + "grad_norm": 0.1608497053384781, + "learning_rate": 4.340202827422963e-06, + "loss": 0.8905, + "step": 91150 + }, + { + "epoch": 0.6598767979036823, + "grad_norm": 0.18891900777816772, + "learning_rate": 4.340130440762377e-06, + "loss": 0.894, + "step": 91160 + }, + { + "epoch": 0.6599491845642685, + "grad_norm": 0.1456620842218399, + "learning_rate": 4.34005805410179e-06, + "loss": 0.8951, + "step": 91170 + }, + { + "epoch": 0.6600215712248547, + "grad_norm": 0.16527323424816132, + "learning_rate": 4.339985667441205e-06, + "loss": 0.8914, + "step": 91180 + }, + { + "epoch": 0.6600939578854409, + "grad_norm": 0.15947329998016357, + "learning_rate": 4.339913280780618e-06, + "loss": 0.8964, + "step": 91190 + }, + { + "epoch": 0.6601663445460271, + "grad_norm": 0.1482989341020584, + "learning_rate": 4.339840894120032e-06, + "loss": 0.8923, + "step": 91200 + }, + { + "epoch": 0.6602387312066132, + "grad_norm": 0.15714868903160095, + "learning_rate": 4.339768507459446e-06, + "loss": 0.892, + "step": 91210 + }, + { + "epoch": 0.6603111178671994, + "grad_norm": 0.18459376692771912, + "learning_rate": 4.33969612079886e-06, + "loss": 0.8944, + "step": 91220 + }, + { + "epoch": 0.6603835045277856, + "grad_norm": 0.1577734649181366, + "learning_rate": 4.339623734138274e-06, + "loss": 0.8919, + "step": 91230 + }, + { + "epoch": 0.6604558911883718, + "grad_norm": 0.19559817016124725, + "learning_rate": 4.339551347477687e-06, + "loss": 0.9021, + "step": 91240 + }, + { + "epoch": 0.660528277848958, + "grad_norm": 0.16929206252098083, + "learning_rate": 4.339478960817101e-06, + "loss": 0.9089, + "step": 91250 + }, + { + "epoch": 0.6606006645095441, + "grad_norm": 0.1482090950012207, + "learning_rate": 4.339406574156515e-06, + "loss": 0.8995, + "step": 91260 + }, + { + "epoch": 0.6606730511701304, + "grad_norm": 0.17333853244781494, + "learning_rate": 4.339334187495929e-06, + "loss": 0.8998, + "step": 91270 + }, + { + "epoch": 0.6607454378307166, + "grad_norm": 0.16415739059448242, + "learning_rate": 4.339261800835343e-06, + "loss": 0.8956, + "step": 91280 + }, + { + "epoch": 0.6608178244913028, + "grad_norm": 0.14835873246192932, + "learning_rate": 4.339189414174756e-06, + "loss": 0.8809, + "step": 91290 + }, + { + "epoch": 0.660890211151889, + "grad_norm": 0.16368795931339264, + "learning_rate": 4.33911702751417e-06, + "loss": 0.9006, + "step": 91300 + }, + { + "epoch": 0.6609625978124751, + "grad_norm": 0.1738886833190918, + "learning_rate": 4.339044640853584e-06, + "loss": 0.8993, + "step": 91310 + }, + { + "epoch": 0.6610349844730613, + "grad_norm": 0.1818006932735443, + "learning_rate": 4.338972254192998e-06, + "loss": 0.8997, + "step": 91320 + }, + { + "epoch": 0.6611073711336475, + "grad_norm": 0.16817954182624817, + "learning_rate": 4.338899867532412e-06, + "loss": 0.8994, + "step": 91330 + }, + { + "epoch": 0.6611797577942337, + "grad_norm": 0.15932884812355042, + "learning_rate": 4.338827480871825e-06, + "loss": 0.8976, + "step": 91340 + }, + { + "epoch": 0.6612521444548198, + "grad_norm": 0.15406905114650726, + "learning_rate": 4.338755094211239e-06, + "loss": 0.8906, + "step": 91350 + }, + { + "epoch": 0.661324531115406, + "grad_norm": 0.16480666399002075, + "learning_rate": 4.3386827075506524e-06, + "loss": 0.9069, + "step": 91360 + }, + { + "epoch": 0.6613969177759922, + "grad_norm": 0.17040395736694336, + "learning_rate": 4.338610320890067e-06, + "loss": 0.8925, + "step": 91370 + }, + { + "epoch": 0.6614693044365785, + "grad_norm": 0.1497850865125656, + "learning_rate": 4.3385379342294805e-06, + "loss": 0.8956, + "step": 91380 + }, + { + "epoch": 0.6615416910971647, + "grad_norm": 0.326239675283432, + "learning_rate": 4.338465547568894e-06, + "loss": 0.8959, + "step": 91390 + }, + { + "epoch": 0.6616140777577508, + "grad_norm": 0.17361514270305634, + "learning_rate": 4.338393160908308e-06, + "loss": 0.9122, + "step": 91400 + }, + { + "epoch": 0.661686464418337, + "grad_norm": 0.16059482097625732, + "learning_rate": 4.338320774247722e-06, + "loss": 0.8971, + "step": 91410 + }, + { + "epoch": 0.6617588510789232, + "grad_norm": 0.16254779696464539, + "learning_rate": 4.338248387587136e-06, + "loss": 0.8937, + "step": 91420 + }, + { + "epoch": 0.6618312377395094, + "grad_norm": 0.16201868653297424, + "learning_rate": 4.3381760009265494e-06, + "loss": 0.8988, + "step": 91430 + }, + { + "epoch": 0.6619036244000955, + "grad_norm": 0.15529035031795502, + "learning_rate": 4.338103614265963e-06, + "loss": 0.8974, + "step": 91440 + }, + { + "epoch": 0.6619760110606817, + "grad_norm": 0.18519388139247894, + "learning_rate": 4.3380312276053775e-06, + "loss": 0.8969, + "step": 91450 + }, + { + "epoch": 0.6620483977212679, + "grad_norm": 0.14886817336082458, + "learning_rate": 4.337958840944791e-06, + "loss": 0.8927, + "step": 91460 + }, + { + "epoch": 0.6621207843818541, + "grad_norm": 0.18947485089302063, + "learning_rate": 4.337886454284205e-06, + "loss": 0.8954, + "step": 91470 + }, + { + "epoch": 0.6621931710424404, + "grad_norm": 0.14915327727794647, + "learning_rate": 4.337814067623618e-06, + "loss": 0.904, + "step": 91480 + }, + { + "epoch": 0.6622655577030265, + "grad_norm": 0.3526378273963928, + "learning_rate": 4.337741680963032e-06, + "loss": 0.907, + "step": 91490 + }, + { + "epoch": 0.6623379443636127, + "grad_norm": 0.17231549322605133, + "learning_rate": 4.3376692943024464e-06, + "loss": 0.8878, + "step": 91500 + }, + { + "epoch": 0.6624103310241989, + "grad_norm": 0.15187296271324158, + "learning_rate": 4.33759690764186e-06, + "loss": 0.8893, + "step": 91510 + }, + { + "epoch": 0.6624827176847851, + "grad_norm": 0.16822843253612518, + "learning_rate": 4.337524520981274e-06, + "loss": 0.893, + "step": 91520 + }, + { + "epoch": 0.6625551043453712, + "grad_norm": 0.16215179860591888, + "learning_rate": 4.337452134320687e-06, + "loss": 0.893, + "step": 91530 + }, + { + "epoch": 0.6626274910059574, + "grad_norm": 0.15672698616981506, + "learning_rate": 4.337379747660102e-06, + "loss": 0.8875, + "step": 91540 + }, + { + "epoch": 0.6626998776665436, + "grad_norm": 0.14577756822109222, + "learning_rate": 4.337307360999515e-06, + "loss": 0.9028, + "step": 91550 + }, + { + "epoch": 0.6627722643271298, + "grad_norm": 0.1537584662437439, + "learning_rate": 4.337234974338929e-06, + "loss": 0.8978, + "step": 91560 + }, + { + "epoch": 0.6628446509877159, + "grad_norm": 0.15644492208957672, + "learning_rate": 4.337162587678343e-06, + "loss": 0.886, + "step": 91570 + }, + { + "epoch": 0.6629170376483021, + "grad_norm": 0.16348648071289062, + "learning_rate": 4.337090201017757e-06, + "loss": 0.9081, + "step": 91580 + }, + { + "epoch": 0.6629894243088884, + "grad_norm": 0.15281915664672852, + "learning_rate": 4.337017814357171e-06, + "loss": 0.8931, + "step": 91590 + }, + { + "epoch": 0.6630618109694746, + "grad_norm": 0.1640012115240097, + "learning_rate": 4.336945427696584e-06, + "loss": 0.9045, + "step": 91600 + }, + { + "epoch": 0.6631341976300608, + "grad_norm": 0.17783017456531525, + "learning_rate": 4.336873041035998e-06, + "loss": 0.8932, + "step": 91610 + }, + { + "epoch": 0.6632065842906469, + "grad_norm": 0.23584623634815216, + "learning_rate": 4.336800654375412e-06, + "loss": 0.9074, + "step": 91620 + }, + { + "epoch": 0.6632789709512331, + "grad_norm": 0.15153855085372925, + "learning_rate": 4.336728267714826e-06, + "loss": 0.8817, + "step": 91630 + }, + { + "epoch": 0.6633513576118193, + "grad_norm": 0.1572607457637787, + "learning_rate": 4.33665588105424e-06, + "loss": 0.8969, + "step": 91640 + }, + { + "epoch": 0.6634237442724055, + "grad_norm": 0.15882018208503723, + "learning_rate": 4.336583494393653e-06, + "loss": 0.8956, + "step": 91650 + }, + { + "epoch": 0.6634961309329916, + "grad_norm": 0.19259294867515564, + "learning_rate": 4.336511107733068e-06, + "loss": 0.8947, + "step": 91660 + }, + { + "epoch": 0.6635685175935778, + "grad_norm": 0.15729625523090363, + "learning_rate": 4.336438721072481e-06, + "loss": 0.8938, + "step": 91670 + }, + { + "epoch": 0.663640904254164, + "grad_norm": 0.18913884460926056, + "learning_rate": 4.336366334411895e-06, + "loss": 0.898, + "step": 91680 + }, + { + "epoch": 0.6637132909147502, + "grad_norm": 0.23208874464035034, + "learning_rate": 4.3362939477513085e-06, + "loss": 0.8831, + "step": 91690 + }, + { + "epoch": 0.6637856775753365, + "grad_norm": 0.15006226301193237, + "learning_rate": 4.336221561090723e-06, + "loss": 0.8933, + "step": 91700 + }, + { + "epoch": 0.6638580642359226, + "grad_norm": 0.17437438666820526, + "learning_rate": 4.336149174430137e-06, + "loss": 0.8922, + "step": 91710 + }, + { + "epoch": 0.6639304508965088, + "grad_norm": 0.1585850864648819, + "learning_rate": 4.33607678776955e-06, + "loss": 0.8929, + "step": 91720 + }, + { + "epoch": 0.664002837557095, + "grad_norm": 0.15892000496387482, + "learning_rate": 4.336004401108964e-06, + "loss": 0.8981, + "step": 91730 + }, + { + "epoch": 0.6640752242176812, + "grad_norm": 0.15951044857501984, + "learning_rate": 4.335932014448378e-06, + "loss": 0.8968, + "step": 91740 + }, + { + "epoch": 0.6641476108782673, + "grad_norm": 0.16389575600624084, + "learning_rate": 4.335859627787792e-06, + "loss": 0.9076, + "step": 91750 + }, + { + "epoch": 0.6642199975388535, + "grad_norm": 0.23026235401630402, + "learning_rate": 4.3357872411272055e-06, + "loss": 0.8934, + "step": 91760 + }, + { + "epoch": 0.6642923841994397, + "grad_norm": 0.15285776555538177, + "learning_rate": 4.335714854466619e-06, + "loss": 0.8961, + "step": 91770 + }, + { + "epoch": 0.6643647708600259, + "grad_norm": 0.15406441688537598, + "learning_rate": 4.335642467806034e-06, + "loss": 0.8853, + "step": 91780 + }, + { + "epoch": 0.664437157520612, + "grad_norm": 0.1550244837999344, + "learning_rate": 4.335570081145447e-06, + "loss": 0.9001, + "step": 91790 + }, + { + "epoch": 0.6645095441811983, + "grad_norm": 0.16414350271224976, + "learning_rate": 4.335497694484861e-06, + "loss": 0.9078, + "step": 91800 + }, + { + "epoch": 0.6645819308417845, + "grad_norm": 0.15932433307170868, + "learning_rate": 4.3354253078242745e-06, + "loss": 0.8989, + "step": 91810 + }, + { + "epoch": 0.6646543175023707, + "grad_norm": 0.1505679041147232, + "learning_rate": 4.335352921163689e-06, + "loss": 0.8885, + "step": 91820 + }, + { + "epoch": 0.6647267041629569, + "grad_norm": 0.16631951928138733, + "learning_rate": 4.3352805345031026e-06, + "loss": 0.8972, + "step": 91830 + }, + { + "epoch": 0.664799090823543, + "grad_norm": 0.17262007296085358, + "learning_rate": 4.335208147842516e-06, + "loss": 0.8933, + "step": 91840 + }, + { + "epoch": 0.6648714774841292, + "grad_norm": 0.16659270226955414, + "learning_rate": 4.33513576118193e-06, + "loss": 0.9059, + "step": 91850 + }, + { + "epoch": 0.6649438641447154, + "grad_norm": 0.15563061833381653, + "learning_rate": 4.335063374521343e-06, + "loss": 0.9071, + "step": 91860 + }, + { + "epoch": 0.6650162508053016, + "grad_norm": 0.16777054965496063, + "learning_rate": 4.334990987860757e-06, + "loss": 0.8994, + "step": 91870 + }, + { + "epoch": 0.6650886374658878, + "grad_norm": 0.15433162450790405, + "learning_rate": 4.334918601200171e-06, + "loss": 0.9064, + "step": 91880 + }, + { + "epoch": 0.6651610241264739, + "grad_norm": 0.16042456030845642, + "learning_rate": 4.334846214539585e-06, + "loss": 0.8952, + "step": 91890 + }, + { + "epoch": 0.6652334107870601, + "grad_norm": 0.189911887049675, + "learning_rate": 4.334773827878999e-06, + "loss": 0.8903, + "step": 91900 + }, + { + "epoch": 0.6653057974476464, + "grad_norm": 0.1497250199317932, + "learning_rate": 4.334701441218412e-06, + "loss": 0.8836, + "step": 91910 + }, + { + "epoch": 0.6653781841082326, + "grad_norm": 0.16076631844043732, + "learning_rate": 4.334629054557826e-06, + "loss": 0.8971, + "step": 91920 + }, + { + "epoch": 0.6654505707688187, + "grad_norm": 0.15558098256587982, + "learning_rate": 4.33455666789724e-06, + "loss": 0.8838, + "step": 91930 + }, + { + "epoch": 0.6655229574294049, + "grad_norm": 0.1555255949497223, + "learning_rate": 4.334484281236654e-06, + "loss": 0.9026, + "step": 91940 + }, + { + "epoch": 0.6655953440899911, + "grad_norm": 0.15714456140995026, + "learning_rate": 4.334411894576068e-06, + "loss": 0.8889, + "step": 91950 + }, + { + "epoch": 0.6656677307505773, + "grad_norm": 0.15416781604290009, + "learning_rate": 4.334339507915481e-06, + "loss": 0.8943, + "step": 91960 + }, + { + "epoch": 0.6657401174111635, + "grad_norm": 0.16116608679294586, + "learning_rate": 4.334267121254896e-06, + "loss": 0.8844, + "step": 91970 + }, + { + "epoch": 0.6658125040717496, + "grad_norm": 0.141585111618042, + "learning_rate": 4.334194734594309e-06, + "loss": 0.8845, + "step": 91980 + }, + { + "epoch": 0.6658848907323358, + "grad_norm": 0.1526327133178711, + "learning_rate": 4.334122347933723e-06, + "loss": 0.8922, + "step": 91990 + }, + { + "epoch": 0.665957277392922, + "grad_norm": 0.16217315196990967, + "learning_rate": 4.3340499612731366e-06, + "loss": 0.889, + "step": 92000 + }, + { + "epoch": 0.6660296640535083, + "grad_norm": 0.16622312366962433, + "learning_rate": 4.333977574612551e-06, + "loss": 0.8993, + "step": 92010 + }, + { + "epoch": 0.6661020507140945, + "grad_norm": 0.19960935413837433, + "learning_rate": 4.333905187951965e-06, + "loss": 0.8982, + "step": 92020 + }, + { + "epoch": 0.6661744373746806, + "grad_norm": 0.16707240045070648, + "learning_rate": 4.333832801291378e-06, + "loss": 0.9088, + "step": 92030 + }, + { + "epoch": 0.6662468240352668, + "grad_norm": 0.15086571872234344, + "learning_rate": 4.333760414630792e-06, + "loss": 0.8991, + "step": 92040 + }, + { + "epoch": 0.666319210695853, + "grad_norm": 0.17651046812534332, + "learning_rate": 4.333688027970206e-06, + "loss": 0.8987, + "step": 92050 + }, + { + "epoch": 0.6663915973564392, + "grad_norm": 0.1702297031879425, + "learning_rate": 4.33361564130962e-06, + "loss": 0.898, + "step": 92060 + }, + { + "epoch": 0.6664639840170253, + "grad_norm": 0.17619454860687256, + "learning_rate": 4.333543254649034e-06, + "loss": 0.894, + "step": 92070 + }, + { + "epoch": 0.6665363706776115, + "grad_norm": 0.16067981719970703, + "learning_rate": 4.333470867988447e-06, + "loss": 0.9031, + "step": 92080 + }, + { + "epoch": 0.6666087573381977, + "grad_norm": 0.16957126557826996, + "learning_rate": 4.333398481327861e-06, + "loss": 0.8966, + "step": 92090 + }, + { + "epoch": 0.6666811439987839, + "grad_norm": 0.1614599972963333, + "learning_rate": 4.333326094667275e-06, + "loss": 0.8976, + "step": 92100 + }, + { + "epoch": 0.66675353065937, + "grad_norm": 0.17003393173217773, + "learning_rate": 4.333253708006689e-06, + "loss": 0.9072, + "step": 92110 + }, + { + "epoch": 0.6668259173199563, + "grad_norm": 0.17658734321594238, + "learning_rate": 4.3331813213461025e-06, + "loss": 0.8964, + "step": 92120 + }, + { + "epoch": 0.6668983039805425, + "grad_norm": 0.15573443472385406, + "learning_rate": 4.333108934685516e-06, + "loss": 0.8984, + "step": 92130 + }, + { + "epoch": 0.6669706906411287, + "grad_norm": 0.17280741035938263, + "learning_rate": 4.333036548024931e-06, + "loss": 0.8982, + "step": 92140 + }, + { + "epoch": 0.6670430773017149, + "grad_norm": 0.1615643948316574, + "learning_rate": 4.332964161364344e-06, + "loss": 0.8958, + "step": 92150 + }, + { + "epoch": 0.667115463962301, + "grad_norm": 0.16449616849422455, + "learning_rate": 4.332891774703758e-06, + "loss": 0.8938, + "step": 92160 + }, + { + "epoch": 0.6671878506228872, + "grad_norm": 0.1627790331840515, + "learning_rate": 4.3328193880431714e-06, + "loss": 0.8861, + "step": 92170 + }, + { + "epoch": 0.6672602372834734, + "grad_norm": 0.15564194321632385, + "learning_rate": 4.332747001382586e-06, + "loss": 0.9061, + "step": 92180 + }, + { + "epoch": 0.6673326239440596, + "grad_norm": 0.18332041800022125, + "learning_rate": 4.3326746147219995e-06, + "loss": 0.9071, + "step": 92190 + }, + { + "epoch": 0.6674050106046457, + "grad_norm": 0.165358766913414, + "learning_rate": 4.332602228061413e-06, + "loss": 0.8964, + "step": 92200 + }, + { + "epoch": 0.6674773972652319, + "grad_norm": 0.1612190455198288, + "learning_rate": 4.332529841400827e-06, + "loss": 0.8976, + "step": 92210 + }, + { + "epoch": 0.6675497839258181, + "grad_norm": 0.16194748878479004, + "learning_rate": 4.332457454740241e-06, + "loss": 0.8953, + "step": 92220 + }, + { + "epoch": 0.6676221705864044, + "grad_norm": 0.16928349435329437, + "learning_rate": 4.332385068079655e-06, + "loss": 0.9029, + "step": 92230 + }, + { + "epoch": 0.6676945572469906, + "grad_norm": 0.15044300258159637, + "learning_rate": 4.3323126814190684e-06, + "loss": 0.8986, + "step": 92240 + }, + { + "epoch": 0.6677669439075767, + "grad_norm": 0.1588992476463318, + "learning_rate": 4.332240294758482e-06, + "loss": 0.9034, + "step": 92250 + }, + { + "epoch": 0.6678393305681629, + "grad_norm": 0.16093967854976654, + "learning_rate": 4.3321679080978965e-06, + "loss": 0.8845, + "step": 92260 + }, + { + "epoch": 0.6679117172287491, + "grad_norm": 0.17110575735569, + "learning_rate": 4.33209552143731e-06, + "loss": 0.8848, + "step": 92270 + }, + { + "epoch": 0.6679841038893353, + "grad_norm": 0.15584023296833038, + "learning_rate": 4.332023134776724e-06, + "loss": 0.8858, + "step": 92280 + }, + { + "epoch": 0.6680564905499214, + "grad_norm": 0.16196756064891815, + "learning_rate": 4.331950748116137e-06, + "loss": 0.8951, + "step": 92290 + }, + { + "epoch": 0.6681288772105076, + "grad_norm": 0.1788647472858429, + "learning_rate": 4.331878361455552e-06, + "loss": 0.8988, + "step": 92300 + }, + { + "epoch": 0.6682012638710938, + "grad_norm": 0.1447635442018509, + "learning_rate": 4.3318059747949655e-06, + "loss": 0.8857, + "step": 92310 + }, + { + "epoch": 0.66827365053168, + "grad_norm": 0.16333886981010437, + "learning_rate": 4.331733588134379e-06, + "loss": 0.8831, + "step": 92320 + }, + { + "epoch": 0.6683460371922663, + "grad_norm": 0.16351155936717987, + "learning_rate": 4.331661201473793e-06, + "loss": 0.9038, + "step": 92330 + }, + { + "epoch": 0.6684184238528524, + "grad_norm": 0.16826938092708588, + "learning_rate": 4.331588814813207e-06, + "loss": 0.8982, + "step": 92340 + }, + { + "epoch": 0.6684908105134386, + "grad_norm": 0.16592121124267578, + "learning_rate": 4.331516428152621e-06, + "loss": 0.8828, + "step": 92350 + }, + { + "epoch": 0.6685631971740248, + "grad_norm": 0.1630096733570099, + "learning_rate": 4.331444041492034e-06, + "loss": 0.8867, + "step": 92360 + }, + { + "epoch": 0.668635583834611, + "grad_norm": 0.18398292362689972, + "learning_rate": 4.331371654831448e-06, + "loss": 0.9008, + "step": 92370 + }, + { + "epoch": 0.6687079704951971, + "grad_norm": 0.16176696121692657, + "learning_rate": 4.3312992681708625e-06, + "loss": 0.9025, + "step": 92380 + }, + { + "epoch": 0.6687803571557833, + "grad_norm": 0.15559855103492737, + "learning_rate": 4.331226881510275e-06, + "loss": 0.9031, + "step": 92390 + }, + { + "epoch": 0.6688527438163695, + "grad_norm": 0.16191458702087402, + "learning_rate": 4.331154494849689e-06, + "loss": 0.8954, + "step": 92400 + }, + { + "epoch": 0.6689251304769557, + "grad_norm": 0.15984484553337097, + "learning_rate": 4.331082108189103e-06, + "loss": 0.8888, + "step": 92410 + }, + { + "epoch": 0.6689975171375419, + "grad_norm": 0.15444859862327576, + "learning_rate": 4.331009721528517e-06, + "loss": 0.8966, + "step": 92420 + }, + { + "epoch": 0.669069903798128, + "grad_norm": 0.1950903832912445, + "learning_rate": 4.3309373348679305e-06, + "loss": 0.901, + "step": 92430 + }, + { + "epoch": 0.6691422904587143, + "grad_norm": 0.1582936942577362, + "learning_rate": 4.330864948207344e-06, + "loss": 0.891, + "step": 92440 + }, + { + "epoch": 0.6692146771193005, + "grad_norm": 0.15177102386951447, + "learning_rate": 4.330792561546759e-06, + "loss": 0.8893, + "step": 92450 + }, + { + "epoch": 0.6692870637798867, + "grad_norm": 0.14646144211292267, + "learning_rate": 4.330720174886172e-06, + "loss": 0.8964, + "step": 92460 + }, + { + "epoch": 0.6693594504404728, + "grad_norm": 0.17326298356056213, + "learning_rate": 4.330647788225586e-06, + "loss": 0.8839, + "step": 92470 + }, + { + "epoch": 0.669431837101059, + "grad_norm": 0.16231794655323029, + "learning_rate": 4.3305754015649995e-06, + "loss": 0.8983, + "step": 92480 + }, + { + "epoch": 0.6695042237616452, + "grad_norm": 0.1864873170852661, + "learning_rate": 4.330503014904414e-06, + "loss": 0.897, + "step": 92490 + }, + { + "epoch": 0.6695766104222314, + "grad_norm": 0.16087661683559418, + "learning_rate": 4.3304306282438275e-06, + "loss": 0.8815, + "step": 92500 + }, + { + "epoch": 0.6696489970828176, + "grad_norm": 0.1563531458377838, + "learning_rate": 4.330358241583241e-06, + "loss": 0.8971, + "step": 92510 + }, + { + "epoch": 0.6697213837434037, + "grad_norm": 0.15433651208877563, + "learning_rate": 4.330285854922655e-06, + "loss": 0.8948, + "step": 92520 + }, + { + "epoch": 0.6697937704039899, + "grad_norm": 0.18349480628967285, + "learning_rate": 4.330213468262069e-06, + "loss": 0.9053, + "step": 92530 + }, + { + "epoch": 0.6698661570645762, + "grad_norm": 0.186500683426857, + "learning_rate": 4.330141081601483e-06, + "loss": 0.8941, + "step": 92540 + }, + { + "epoch": 0.6699385437251624, + "grad_norm": 0.17492738366127014, + "learning_rate": 4.3300686949408965e-06, + "loss": 0.8914, + "step": 92550 + }, + { + "epoch": 0.6700109303857485, + "grad_norm": 0.1956550031900406, + "learning_rate": 4.32999630828031e-06, + "loss": 0.8992, + "step": 92560 + }, + { + "epoch": 0.6700833170463347, + "grad_norm": 0.1706896424293518, + "learning_rate": 4.3299239216197246e-06, + "loss": 0.8879, + "step": 92570 + }, + { + "epoch": 0.6701557037069209, + "grad_norm": 0.16048724949359894, + "learning_rate": 4.329851534959138e-06, + "loss": 0.8983, + "step": 92580 + }, + { + "epoch": 0.6702280903675071, + "grad_norm": 0.14965306222438812, + "learning_rate": 4.329779148298552e-06, + "loss": 0.8997, + "step": 92590 + }, + { + "epoch": 0.6703004770280933, + "grad_norm": 0.15428891777992249, + "learning_rate": 4.329706761637965e-06, + "loss": 0.8933, + "step": 92600 + }, + { + "epoch": 0.6703728636886794, + "grad_norm": 0.15892021358013153, + "learning_rate": 4.32963437497738e-06, + "loss": 0.8951, + "step": 92610 + }, + { + "epoch": 0.6704452503492656, + "grad_norm": 0.1577247977256775, + "learning_rate": 4.3295619883167935e-06, + "loss": 0.8999, + "step": 92620 + }, + { + "epoch": 0.6705176370098518, + "grad_norm": 0.15700013935565948, + "learning_rate": 4.329489601656207e-06, + "loss": 0.8752, + "step": 92630 + }, + { + "epoch": 0.670590023670438, + "grad_norm": 0.18034988641738892, + "learning_rate": 4.329417214995621e-06, + "loss": 0.9094, + "step": 92640 + }, + { + "epoch": 0.6706624103310243, + "grad_norm": 0.15093854069709778, + "learning_rate": 4.329344828335035e-06, + "loss": 0.909, + "step": 92650 + }, + { + "epoch": 0.6707347969916104, + "grad_norm": 0.15224012732505798, + "learning_rate": 4.329272441674449e-06, + "loss": 0.8964, + "step": 92660 + }, + { + "epoch": 0.6708071836521966, + "grad_norm": 0.16921748220920563, + "learning_rate": 4.329200055013862e-06, + "loss": 0.893, + "step": 92670 + }, + { + "epoch": 0.6708795703127828, + "grad_norm": 0.15135690569877625, + "learning_rate": 4.329127668353276e-06, + "loss": 0.8859, + "step": 92680 + }, + { + "epoch": 0.670951956973369, + "grad_norm": 0.15335379540920258, + "learning_rate": 4.3290552816926905e-06, + "loss": 0.8956, + "step": 92690 + }, + { + "epoch": 0.6710243436339551, + "grad_norm": 0.1561633050441742, + "learning_rate": 4.328982895032104e-06, + "loss": 0.8801, + "step": 92700 + }, + { + "epoch": 0.6710967302945413, + "grad_norm": 0.1546168178319931, + "learning_rate": 4.328910508371518e-06, + "loss": 0.9001, + "step": 92710 + }, + { + "epoch": 0.6711691169551275, + "grad_norm": 0.16860635578632355, + "learning_rate": 4.328838121710931e-06, + "loss": 0.9041, + "step": 92720 + }, + { + "epoch": 0.6712415036157137, + "grad_norm": 0.1650303453207016, + "learning_rate": 4.328765735050345e-06, + "loss": 0.8919, + "step": 92730 + }, + { + "epoch": 0.6713138902762998, + "grad_norm": 0.15512806177139282, + "learning_rate": 4.328693348389759e-06, + "loss": 0.884, + "step": 92740 + }, + { + "epoch": 0.671386276936886, + "grad_norm": 0.17656339704990387, + "learning_rate": 4.328620961729173e-06, + "loss": 0.8928, + "step": 92750 + }, + { + "epoch": 0.6714586635974723, + "grad_norm": 0.1842651218175888, + "learning_rate": 4.328548575068587e-06, + "loss": 0.9057, + "step": 92760 + }, + { + "epoch": 0.6715310502580585, + "grad_norm": 0.15689325332641602, + "learning_rate": 4.328476188408e-06, + "loss": 0.8871, + "step": 92770 + }, + { + "epoch": 0.6716034369186447, + "grad_norm": 0.16614311933517456, + "learning_rate": 4.328403801747415e-06, + "loss": 0.8998, + "step": 92780 + }, + { + "epoch": 0.6716758235792308, + "grad_norm": 0.16957563161849976, + "learning_rate": 4.328331415086828e-06, + "loss": 0.8937, + "step": 92790 + }, + { + "epoch": 0.671748210239817, + "grad_norm": 0.15638667345046997, + "learning_rate": 4.328259028426242e-06, + "loss": 0.894, + "step": 92800 + }, + { + "epoch": 0.6718205969004032, + "grad_norm": 0.16604597866535187, + "learning_rate": 4.328186641765656e-06, + "loss": 0.8891, + "step": 92810 + }, + { + "epoch": 0.6718929835609894, + "grad_norm": 0.1521102339029312, + "learning_rate": 4.32811425510507e-06, + "loss": 0.8946, + "step": 92820 + }, + { + "epoch": 0.6719653702215755, + "grad_norm": 0.1847507506608963, + "learning_rate": 4.328041868444484e-06, + "loss": 0.906, + "step": 92830 + }, + { + "epoch": 0.6720377568821617, + "grad_norm": 0.15854564309120178, + "learning_rate": 4.327969481783897e-06, + "loss": 0.8913, + "step": 92840 + }, + { + "epoch": 0.6721101435427479, + "grad_norm": 0.16507163643836975, + "learning_rate": 4.327897095123311e-06, + "loss": 0.8888, + "step": 92850 + }, + { + "epoch": 0.6721825302033342, + "grad_norm": 0.1640719473361969, + "learning_rate": 4.327824708462725e-06, + "loss": 0.8969, + "step": 92860 + }, + { + "epoch": 0.6722549168639204, + "grad_norm": 0.15480723977088928, + "learning_rate": 4.327752321802139e-06, + "loss": 0.9023, + "step": 92870 + }, + { + "epoch": 0.6723273035245065, + "grad_norm": 0.1514626145362854, + "learning_rate": 4.327679935141553e-06, + "loss": 0.8934, + "step": 92880 + }, + { + "epoch": 0.6723996901850927, + "grad_norm": 0.16035489737987518, + "learning_rate": 4.327607548480966e-06, + "loss": 0.8847, + "step": 92890 + }, + { + "epoch": 0.6724720768456789, + "grad_norm": 0.15631358325481415, + "learning_rate": 4.327535161820381e-06, + "loss": 0.9011, + "step": 92900 + }, + { + "epoch": 0.6725444635062651, + "grad_norm": 0.16039063036441803, + "learning_rate": 4.327462775159794e-06, + "loss": 0.8872, + "step": 92910 + }, + { + "epoch": 0.6726168501668512, + "grad_norm": 0.15340082347393036, + "learning_rate": 4.327390388499207e-06, + "loss": 0.8918, + "step": 92920 + }, + { + "epoch": 0.6726892368274374, + "grad_norm": 0.16243009269237518, + "learning_rate": 4.3273180018386215e-06, + "loss": 0.8937, + "step": 92930 + }, + { + "epoch": 0.6727616234880236, + "grad_norm": 0.15922003984451294, + "learning_rate": 4.327245615178035e-06, + "loss": 0.8906, + "step": 92940 + }, + { + "epoch": 0.6728340101486098, + "grad_norm": 0.1569734811782837, + "learning_rate": 4.327173228517449e-06, + "loss": 0.8916, + "step": 92950 + }, + { + "epoch": 0.672906396809196, + "grad_norm": 0.1412716507911682, + "learning_rate": 4.327100841856862e-06, + "loss": 0.881, + "step": 92960 + }, + { + "epoch": 0.6729787834697822, + "grad_norm": 0.16564616560935974, + "learning_rate": 4.327028455196277e-06, + "loss": 0.8876, + "step": 92970 + }, + { + "epoch": 0.6730511701303684, + "grad_norm": 0.18585346639156342, + "learning_rate": 4.3269560685356904e-06, + "loss": 0.8932, + "step": 92980 + }, + { + "epoch": 0.6731235567909546, + "grad_norm": 0.15012194216251373, + "learning_rate": 4.326883681875104e-06, + "loss": 0.8916, + "step": 92990 + }, + { + "epoch": 0.6731959434515408, + "grad_norm": 0.14845649898052216, + "learning_rate": 4.326811295214518e-06, + "loss": 0.8963, + "step": 93000 + }, + { + "epoch": 0.673268330112127, + "grad_norm": 0.15878477692604065, + "learning_rate": 4.326738908553932e-06, + "loss": 0.8983, + "step": 93010 + }, + { + "epoch": 0.6733407167727131, + "grad_norm": 0.15004906058311462, + "learning_rate": 4.326666521893346e-06, + "loss": 0.8929, + "step": 93020 + }, + { + "epoch": 0.6734131034332993, + "grad_norm": 0.16124626994132996, + "learning_rate": 4.326594135232759e-06, + "loss": 0.8837, + "step": 93030 + }, + { + "epoch": 0.6734854900938855, + "grad_norm": 0.15695308148860931, + "learning_rate": 4.326521748572173e-06, + "loss": 0.8991, + "step": 93040 + }, + { + "epoch": 0.6735578767544717, + "grad_norm": 0.1830526888370514, + "learning_rate": 4.3264493619115874e-06, + "loss": 0.8826, + "step": 93050 + }, + { + "epoch": 0.6736302634150578, + "grad_norm": 0.1436559408903122, + "learning_rate": 4.326376975251001e-06, + "loss": 0.8939, + "step": 93060 + }, + { + "epoch": 0.673702650075644, + "grad_norm": 0.15827639400959015, + "learning_rate": 4.326304588590415e-06, + "loss": 0.907, + "step": 93070 + }, + { + "epoch": 0.6737750367362303, + "grad_norm": 0.15548427402973175, + "learning_rate": 4.326232201929828e-06, + "loss": 0.9136, + "step": 93080 + }, + { + "epoch": 0.6738474233968165, + "grad_norm": 0.15338309109210968, + "learning_rate": 4.326159815269243e-06, + "loss": 0.8979, + "step": 93090 + }, + { + "epoch": 0.6739198100574026, + "grad_norm": 0.16544108092784882, + "learning_rate": 4.326087428608656e-06, + "loss": 0.8817, + "step": 93100 + }, + { + "epoch": 0.6739921967179888, + "grad_norm": 0.16785688698291779, + "learning_rate": 4.32601504194807e-06, + "loss": 0.8912, + "step": 93110 + }, + { + "epoch": 0.674064583378575, + "grad_norm": 0.17321330308914185, + "learning_rate": 4.325942655287484e-06, + "loss": 0.8863, + "step": 93120 + }, + { + "epoch": 0.6741369700391612, + "grad_norm": 0.16258351504802704, + "learning_rate": 4.325870268626898e-06, + "loss": 0.9073, + "step": 93130 + }, + { + "epoch": 0.6742093566997474, + "grad_norm": 0.16058233380317688, + "learning_rate": 4.325797881966312e-06, + "loss": 0.9084, + "step": 93140 + }, + { + "epoch": 0.6742817433603335, + "grad_norm": 0.16303308308124542, + "learning_rate": 4.325725495305725e-06, + "loss": 0.8965, + "step": 93150 + }, + { + "epoch": 0.6743541300209197, + "grad_norm": 0.15831464529037476, + "learning_rate": 4.325653108645139e-06, + "loss": 0.8865, + "step": 93160 + }, + { + "epoch": 0.6744265166815059, + "grad_norm": 0.1554798036813736, + "learning_rate": 4.325580721984553e-06, + "loss": 0.8803, + "step": 93170 + }, + { + "epoch": 0.6744989033420922, + "grad_norm": 0.16166193783283234, + "learning_rate": 4.325508335323967e-06, + "loss": 0.9018, + "step": 93180 + }, + { + "epoch": 0.6745712900026783, + "grad_norm": 0.15309026837348938, + "learning_rate": 4.325435948663381e-06, + "loss": 0.8972, + "step": 93190 + }, + { + "epoch": 0.6746436766632645, + "grad_norm": 0.15099987387657166, + "learning_rate": 4.325363562002794e-06, + "loss": 0.9075, + "step": 93200 + }, + { + "epoch": 0.6747160633238507, + "grad_norm": 0.17467179894447327, + "learning_rate": 4.325291175342209e-06, + "loss": 0.9024, + "step": 93210 + }, + { + "epoch": 0.6747884499844369, + "grad_norm": 0.15384790301322937, + "learning_rate": 4.325218788681622e-06, + "loss": 0.9, + "step": 93220 + }, + { + "epoch": 0.674860836645023, + "grad_norm": 0.1675538271665573, + "learning_rate": 4.325146402021036e-06, + "loss": 0.893, + "step": 93230 + }, + { + "epoch": 0.6749332233056092, + "grad_norm": 0.15132692456245422, + "learning_rate": 4.3250740153604495e-06, + "loss": 0.9021, + "step": 93240 + }, + { + "epoch": 0.6750056099661954, + "grad_norm": 0.15508437156677246, + "learning_rate": 4.325001628699864e-06, + "loss": 0.9039, + "step": 93250 + }, + { + "epoch": 0.6750779966267816, + "grad_norm": 0.15347804129123688, + "learning_rate": 4.324929242039278e-06, + "loss": 0.8787, + "step": 93260 + }, + { + "epoch": 0.6751503832873678, + "grad_norm": 0.15671610832214355, + "learning_rate": 4.324856855378691e-06, + "loss": 0.8973, + "step": 93270 + }, + { + "epoch": 0.6752227699479539, + "grad_norm": 0.15267059206962585, + "learning_rate": 4.324784468718105e-06, + "loss": 0.8862, + "step": 93280 + }, + { + "epoch": 0.6752951566085402, + "grad_norm": 0.1843479573726654, + "learning_rate": 4.324712082057519e-06, + "loss": 0.9027, + "step": 93290 + }, + { + "epoch": 0.6753675432691264, + "grad_norm": 0.18705353140830994, + "learning_rate": 4.324639695396933e-06, + "loss": 0.8873, + "step": 93300 + }, + { + "epoch": 0.6754399299297126, + "grad_norm": 0.16083325445652008, + "learning_rate": 4.3245673087363466e-06, + "loss": 0.8973, + "step": 93310 + }, + { + "epoch": 0.6755123165902988, + "grad_norm": 0.16239725053310394, + "learning_rate": 4.32449492207576e-06, + "loss": 0.8963, + "step": 93320 + }, + { + "epoch": 0.6755847032508849, + "grad_norm": 0.15586166083812714, + "learning_rate": 4.324422535415174e-06, + "loss": 0.8988, + "step": 93330 + }, + { + "epoch": 0.6756570899114711, + "grad_norm": 0.17255674302577972, + "learning_rate": 4.324350148754588e-06, + "loss": 0.8979, + "step": 93340 + }, + { + "epoch": 0.6757294765720573, + "grad_norm": 0.15488573908805847, + "learning_rate": 4.324277762094002e-06, + "loss": 0.8938, + "step": 93350 + }, + { + "epoch": 0.6758018632326435, + "grad_norm": 0.1733180284500122, + "learning_rate": 4.3242053754334155e-06, + "loss": 0.8936, + "step": 93360 + }, + { + "epoch": 0.6758742498932296, + "grad_norm": 0.16540446877479553, + "learning_rate": 4.324132988772829e-06, + "loss": 0.8812, + "step": 93370 + }, + { + "epoch": 0.6759466365538158, + "grad_norm": 0.16037392616271973, + "learning_rate": 4.3240606021122436e-06, + "loss": 0.8979, + "step": 93380 + }, + { + "epoch": 0.6760190232144021, + "grad_norm": 0.17287902534008026, + "learning_rate": 4.323988215451657e-06, + "loss": 0.8928, + "step": 93390 + }, + { + "epoch": 0.6760914098749883, + "grad_norm": 0.19038693606853485, + "learning_rate": 4.323915828791071e-06, + "loss": 0.896, + "step": 93400 + }, + { + "epoch": 0.6761637965355745, + "grad_norm": 0.14845329523086548, + "learning_rate": 4.323843442130484e-06, + "loss": 0.8969, + "step": 93410 + }, + { + "epoch": 0.6762361831961606, + "grad_norm": 0.15594539046287537, + "learning_rate": 4.323771055469899e-06, + "loss": 0.8967, + "step": 93420 + }, + { + "epoch": 0.6763085698567468, + "grad_norm": 0.1664988100528717, + "learning_rate": 4.3236986688093125e-06, + "loss": 0.8907, + "step": 93430 + }, + { + "epoch": 0.676380956517333, + "grad_norm": 0.16152945160865784, + "learning_rate": 4.323626282148726e-06, + "loss": 0.8887, + "step": 93440 + }, + { + "epoch": 0.6764533431779192, + "grad_norm": 0.16336587071418762, + "learning_rate": 4.32355389548814e-06, + "loss": 0.8937, + "step": 93450 + }, + { + "epoch": 0.6765257298385053, + "grad_norm": 0.17990297079086304, + "learning_rate": 4.323481508827553e-06, + "loss": 0.8885, + "step": 93460 + }, + { + "epoch": 0.6765981164990915, + "grad_norm": 0.1632242649793625, + "learning_rate": 4.323409122166967e-06, + "loss": 0.9011, + "step": 93470 + }, + { + "epoch": 0.6766705031596777, + "grad_norm": 0.1489850878715515, + "learning_rate": 4.323336735506381e-06, + "loss": 0.8834, + "step": 93480 + }, + { + "epoch": 0.6767428898202639, + "grad_norm": 0.16526971757411957, + "learning_rate": 4.323264348845795e-06, + "loss": 0.8924, + "step": 93490 + }, + { + "epoch": 0.6768152764808502, + "grad_norm": 0.14482301473617554, + "learning_rate": 4.323191962185209e-06, + "loss": 0.8818, + "step": 93500 + }, + { + "epoch": 0.6768876631414363, + "grad_norm": 0.19359660148620605, + "learning_rate": 4.323119575524622e-06, + "loss": 0.9027, + "step": 93510 + }, + { + "epoch": 0.6769600498020225, + "grad_norm": 0.15199698507785797, + "learning_rate": 4.323047188864036e-06, + "loss": 0.9103, + "step": 93520 + }, + { + "epoch": 0.6770324364626087, + "grad_norm": 0.1570311337709427, + "learning_rate": 4.32297480220345e-06, + "loss": 0.8866, + "step": 93530 + }, + { + "epoch": 0.6771048231231949, + "grad_norm": 0.16493958234786987, + "learning_rate": 4.322902415542864e-06, + "loss": 0.8965, + "step": 93540 + }, + { + "epoch": 0.677177209783781, + "grad_norm": 0.15966171026229858, + "learning_rate": 4.322830028882278e-06, + "loss": 0.9059, + "step": 93550 + }, + { + "epoch": 0.6772495964443672, + "grad_norm": 0.15570668876171112, + "learning_rate": 4.322757642221691e-06, + "loss": 0.9031, + "step": 93560 + }, + { + "epoch": 0.6773219831049534, + "grad_norm": 0.15496212244033813, + "learning_rate": 4.322685255561106e-06, + "loss": 0.9177, + "step": 93570 + }, + { + "epoch": 0.6773943697655396, + "grad_norm": 0.15817132592201233, + "learning_rate": 4.322612868900519e-06, + "loss": 0.8852, + "step": 93580 + }, + { + "epoch": 0.6774667564261257, + "grad_norm": 0.15422877669334412, + "learning_rate": 4.322540482239933e-06, + "loss": 0.8944, + "step": 93590 + }, + { + "epoch": 0.6775391430867119, + "grad_norm": 0.15881921350955963, + "learning_rate": 4.3224680955793465e-06, + "loss": 0.9014, + "step": 93600 + }, + { + "epoch": 0.6776115297472982, + "grad_norm": 0.24979665875434875, + "learning_rate": 4.322395708918761e-06, + "loss": 0.8926, + "step": 93610 + }, + { + "epoch": 0.6776839164078844, + "grad_norm": 0.19371944665908813, + "learning_rate": 4.322323322258175e-06, + "loss": 0.879, + "step": 93620 + }, + { + "epoch": 0.6777563030684706, + "grad_norm": 0.15385429561138153, + "learning_rate": 4.322250935597588e-06, + "loss": 0.9084, + "step": 93630 + }, + { + "epoch": 0.6778286897290567, + "grad_norm": 0.16214215755462646, + "learning_rate": 4.322178548937002e-06, + "loss": 0.8968, + "step": 93640 + }, + { + "epoch": 0.6779010763896429, + "grad_norm": 0.21187572181224823, + "learning_rate": 4.322106162276416e-06, + "loss": 0.8969, + "step": 93650 + }, + { + "epoch": 0.6779734630502291, + "grad_norm": 0.15346659719944, + "learning_rate": 4.32203377561583e-06, + "loss": 0.8985, + "step": 93660 + }, + { + "epoch": 0.6780458497108153, + "grad_norm": 0.14978167414665222, + "learning_rate": 4.3219613889552435e-06, + "loss": 0.8998, + "step": 93670 + }, + { + "epoch": 0.6781182363714015, + "grad_norm": 0.16475534439086914, + "learning_rate": 4.321889002294657e-06, + "loss": 0.8935, + "step": 93680 + }, + { + "epoch": 0.6781906230319876, + "grad_norm": 0.1509210169315338, + "learning_rate": 4.321816615634072e-06, + "loss": 0.9166, + "step": 93690 + }, + { + "epoch": 0.6782630096925738, + "grad_norm": 0.15706129372119904, + "learning_rate": 4.321744228973485e-06, + "loss": 0.884, + "step": 93700 + }, + { + "epoch": 0.6783353963531601, + "grad_norm": 0.19146853685379028, + "learning_rate": 4.321671842312899e-06, + "loss": 0.8988, + "step": 93710 + }, + { + "epoch": 0.6784077830137463, + "grad_norm": 0.14886470139026642, + "learning_rate": 4.3215994556523124e-06, + "loss": 0.8954, + "step": 93720 + }, + { + "epoch": 0.6784801696743324, + "grad_norm": 0.15223486721515656, + "learning_rate": 4.321527068991727e-06, + "loss": 0.8994, + "step": 93730 + }, + { + "epoch": 0.6785525563349186, + "grad_norm": 0.1696760505437851, + "learning_rate": 4.3214546823311405e-06, + "loss": 0.9005, + "step": 93740 + }, + { + "epoch": 0.6786249429955048, + "grad_norm": 0.17853619158267975, + "learning_rate": 4.321382295670554e-06, + "loss": 0.883, + "step": 93750 + }, + { + "epoch": 0.678697329656091, + "grad_norm": 0.1491808295249939, + "learning_rate": 4.321309909009968e-06, + "loss": 0.8852, + "step": 93760 + }, + { + "epoch": 0.6787697163166772, + "grad_norm": 0.19821669161319733, + "learning_rate": 4.321237522349382e-06, + "loss": 0.8976, + "step": 93770 + }, + { + "epoch": 0.6788421029772633, + "grad_norm": 0.1570088416337967, + "learning_rate": 4.321165135688796e-06, + "loss": 0.8913, + "step": 93780 + }, + { + "epoch": 0.6789144896378495, + "grad_norm": 0.15688063204288483, + "learning_rate": 4.3210927490282094e-06, + "loss": 0.9008, + "step": 93790 + }, + { + "epoch": 0.6789868762984357, + "grad_norm": 0.19363504648208618, + "learning_rate": 4.321020362367623e-06, + "loss": 0.8946, + "step": 93800 + }, + { + "epoch": 0.6790592629590219, + "grad_norm": 0.16203875839710236, + "learning_rate": 4.3209479757070375e-06, + "loss": 0.8867, + "step": 93810 + }, + { + "epoch": 0.6791316496196081, + "grad_norm": 0.15617011487483978, + "learning_rate": 4.320875589046451e-06, + "loss": 0.9006, + "step": 93820 + }, + { + "epoch": 0.6792040362801943, + "grad_norm": 0.1507921814918518, + "learning_rate": 4.320803202385865e-06, + "loss": 0.9161, + "step": 93830 + }, + { + "epoch": 0.6792764229407805, + "grad_norm": 0.16400021314620972, + "learning_rate": 4.320730815725278e-06, + "loss": 0.8899, + "step": 93840 + }, + { + "epoch": 0.6793488096013667, + "grad_norm": 0.15807297825813293, + "learning_rate": 4.320658429064693e-06, + "loss": 0.9041, + "step": 93850 + }, + { + "epoch": 0.6794211962619529, + "grad_norm": 0.15214060246944427, + "learning_rate": 4.3205860424041065e-06, + "loss": 0.8978, + "step": 93860 + }, + { + "epoch": 0.679493582922539, + "grad_norm": 0.16088810563087463, + "learning_rate": 4.32051365574352e-06, + "loss": 0.8974, + "step": 93870 + }, + { + "epoch": 0.6795659695831252, + "grad_norm": 0.1684071570634842, + "learning_rate": 4.320441269082934e-06, + "loss": 0.8949, + "step": 93880 + }, + { + "epoch": 0.6796383562437114, + "grad_norm": 0.15487946569919586, + "learning_rate": 4.320368882422348e-06, + "loss": 0.8905, + "step": 93890 + }, + { + "epoch": 0.6797107429042976, + "grad_norm": 0.1507687270641327, + "learning_rate": 4.320296495761762e-06, + "loss": 0.8959, + "step": 93900 + }, + { + "epoch": 0.6797831295648837, + "grad_norm": 0.16048942506313324, + "learning_rate": 4.320224109101175e-06, + "loss": 0.8903, + "step": 93910 + }, + { + "epoch": 0.67985551622547, + "grad_norm": 0.1652323603630066, + "learning_rate": 4.320151722440589e-06, + "loss": 0.8897, + "step": 93920 + }, + { + "epoch": 0.6799279028860562, + "grad_norm": 0.16126669943332672, + "learning_rate": 4.3200793357800035e-06, + "loss": 0.8921, + "step": 93930 + }, + { + "epoch": 0.6800002895466424, + "grad_norm": 0.15348832309246063, + "learning_rate": 4.320006949119417e-06, + "loss": 0.8927, + "step": 93940 + }, + { + "epoch": 0.6800726762072286, + "grad_norm": 0.2017420381307602, + "learning_rate": 4.319934562458831e-06, + "loss": 0.8945, + "step": 93950 + }, + { + "epoch": 0.6801450628678147, + "grad_norm": 0.17592447996139526, + "learning_rate": 4.319862175798244e-06, + "loss": 0.8905, + "step": 93960 + }, + { + "epoch": 0.6802174495284009, + "grad_norm": 0.15097962319850922, + "learning_rate": 4.319789789137658e-06, + "loss": 0.8964, + "step": 93970 + }, + { + "epoch": 0.6802898361889871, + "grad_norm": 0.16152705252170563, + "learning_rate": 4.3197174024770715e-06, + "loss": 0.8875, + "step": 93980 + }, + { + "epoch": 0.6803622228495733, + "grad_norm": 0.17407093942165375, + "learning_rate": 4.319645015816485e-06, + "loss": 0.9065, + "step": 93990 + }, + { + "epoch": 0.6804346095101594, + "grad_norm": 0.16338643431663513, + "learning_rate": 4.3195726291559e-06, + "loss": 0.907, + "step": 94000 + }, + { + "epoch": 0.6805069961707456, + "grad_norm": 0.15193772315979004, + "learning_rate": 4.319500242495313e-06, + "loss": 0.8908, + "step": 94010 + }, + { + "epoch": 0.6805793828313318, + "grad_norm": 0.15826401114463806, + "learning_rate": 4.319427855834727e-06, + "loss": 0.889, + "step": 94020 + }, + { + "epoch": 0.6806517694919181, + "grad_norm": 0.15474501252174377, + "learning_rate": 4.3193554691741405e-06, + "loss": 0.8907, + "step": 94030 + }, + { + "epoch": 0.6807241561525043, + "grad_norm": 0.1633521169424057, + "learning_rate": 4.319283082513555e-06, + "loss": 0.8997, + "step": 94040 + }, + { + "epoch": 0.6807965428130904, + "grad_norm": 0.19621436297893524, + "learning_rate": 4.3192106958529686e-06, + "loss": 0.9016, + "step": 94050 + }, + { + "epoch": 0.6808689294736766, + "grad_norm": 0.15351617336273193, + "learning_rate": 4.319138309192382e-06, + "loss": 0.9004, + "step": 94060 + }, + { + "epoch": 0.6809413161342628, + "grad_norm": 0.1630709171295166, + "learning_rate": 4.319065922531796e-06, + "loss": 0.8924, + "step": 94070 + }, + { + "epoch": 0.681013702794849, + "grad_norm": 0.17487677931785583, + "learning_rate": 4.31899353587121e-06, + "loss": 0.8983, + "step": 94080 + }, + { + "epoch": 0.6810860894554351, + "grad_norm": 0.15708361566066742, + "learning_rate": 4.318921149210624e-06, + "loss": 0.8904, + "step": 94090 + }, + { + "epoch": 0.6811584761160213, + "grad_norm": 0.15445072948932648, + "learning_rate": 4.3188487625500375e-06, + "loss": 0.9044, + "step": 94100 + }, + { + "epoch": 0.6812308627766075, + "grad_norm": 0.16823984682559967, + "learning_rate": 4.318776375889451e-06, + "loss": 0.8893, + "step": 94110 + }, + { + "epoch": 0.6813032494371937, + "grad_norm": 0.1907905787229538, + "learning_rate": 4.318703989228865e-06, + "loss": 0.906, + "step": 94120 + }, + { + "epoch": 0.6813756360977798, + "grad_norm": 0.1714591085910797, + "learning_rate": 4.318631602568279e-06, + "loss": 0.8967, + "step": 94130 + }, + { + "epoch": 0.6814480227583661, + "grad_norm": 0.15924982726573944, + "learning_rate": 4.318559215907693e-06, + "loss": 0.8899, + "step": 94140 + }, + { + "epoch": 0.6815204094189523, + "grad_norm": 0.18446829915046692, + "learning_rate": 4.318486829247106e-06, + "loss": 0.9054, + "step": 94150 + }, + { + "epoch": 0.6815927960795385, + "grad_norm": 0.1584208905696869, + "learning_rate": 4.31841444258652e-06, + "loss": 0.9035, + "step": 94160 + }, + { + "epoch": 0.6816651827401247, + "grad_norm": 0.15730392932891846, + "learning_rate": 4.3183420559259345e-06, + "loss": 0.9004, + "step": 94170 + }, + { + "epoch": 0.6817375694007108, + "grad_norm": 0.15665248036384583, + "learning_rate": 4.318269669265348e-06, + "loss": 0.8768, + "step": 94180 + }, + { + "epoch": 0.681809956061297, + "grad_norm": 0.15125800669193268, + "learning_rate": 4.318197282604762e-06, + "loss": 0.9052, + "step": 94190 + }, + { + "epoch": 0.6818823427218832, + "grad_norm": 0.15670722723007202, + "learning_rate": 4.318124895944175e-06, + "loss": 0.8914, + "step": 94200 + }, + { + "epoch": 0.6819547293824694, + "grad_norm": 0.16642498970031738, + "learning_rate": 4.31805250928359e-06, + "loss": 0.8978, + "step": 94210 + }, + { + "epoch": 0.6820271160430555, + "grad_norm": 0.17175577580928802, + "learning_rate": 4.317980122623003e-06, + "loss": 0.9012, + "step": 94220 + }, + { + "epoch": 0.6820995027036417, + "grad_norm": 0.16106824576854706, + "learning_rate": 4.317907735962417e-06, + "loss": 0.8985, + "step": 94230 + }, + { + "epoch": 0.682171889364228, + "grad_norm": 0.15547014772891998, + "learning_rate": 4.317835349301831e-06, + "loss": 0.9019, + "step": 94240 + }, + { + "epoch": 0.6822442760248142, + "grad_norm": 0.16611367464065552, + "learning_rate": 4.317762962641245e-06, + "loss": 0.8933, + "step": 94250 + }, + { + "epoch": 0.6823166626854004, + "grad_norm": 0.33474913239479065, + "learning_rate": 4.317690575980659e-06, + "loss": 0.8996, + "step": 94260 + }, + { + "epoch": 0.6823890493459865, + "grad_norm": 0.17985767126083374, + "learning_rate": 4.317618189320072e-06, + "loss": 0.9111, + "step": 94270 + }, + { + "epoch": 0.6824614360065727, + "grad_norm": 0.14204993844032288, + "learning_rate": 4.317545802659486e-06, + "loss": 0.9077, + "step": 94280 + }, + { + "epoch": 0.6825338226671589, + "grad_norm": 0.14709369838237762, + "learning_rate": 4.3174734159989e-06, + "loss": 0.8779, + "step": 94290 + }, + { + "epoch": 0.6826062093277451, + "grad_norm": 0.16410696506500244, + "learning_rate": 4.317401029338314e-06, + "loss": 0.8993, + "step": 94300 + }, + { + "epoch": 0.6826785959883312, + "grad_norm": 0.15575379133224487, + "learning_rate": 4.317328642677728e-06, + "loss": 0.8887, + "step": 94310 + }, + { + "epoch": 0.6827509826489174, + "grad_norm": 0.16469940543174744, + "learning_rate": 4.317256256017141e-06, + "loss": 0.8954, + "step": 94320 + }, + { + "epoch": 0.6828233693095036, + "grad_norm": 0.15340863168239594, + "learning_rate": 4.317183869356556e-06, + "loss": 0.8975, + "step": 94330 + }, + { + "epoch": 0.6828957559700898, + "grad_norm": 0.16109062731266022, + "learning_rate": 4.317111482695969e-06, + "loss": 0.8972, + "step": 94340 + }, + { + "epoch": 0.6829681426306761, + "grad_norm": 0.16391950845718384, + "learning_rate": 4.317039096035383e-06, + "loss": 0.9028, + "step": 94350 + }, + { + "epoch": 0.6830405292912622, + "grad_norm": 0.15392933785915375, + "learning_rate": 4.316966709374797e-06, + "loss": 0.8991, + "step": 94360 + }, + { + "epoch": 0.6831129159518484, + "grad_norm": 0.16828075051307678, + "learning_rate": 4.316894322714211e-06, + "loss": 0.9025, + "step": 94370 + }, + { + "epoch": 0.6831853026124346, + "grad_norm": 0.147588312625885, + "learning_rate": 4.316821936053625e-06, + "loss": 0.9008, + "step": 94380 + }, + { + "epoch": 0.6832576892730208, + "grad_norm": 0.14783324301242828, + "learning_rate": 4.316749549393038e-06, + "loss": 0.9054, + "step": 94390 + }, + { + "epoch": 0.683330075933607, + "grad_norm": 0.1539517641067505, + "learning_rate": 4.316677162732452e-06, + "loss": 0.8891, + "step": 94400 + }, + { + "epoch": 0.6834024625941931, + "grad_norm": 0.15323641896247864, + "learning_rate": 4.316604776071866e-06, + "loss": 0.9112, + "step": 94410 + }, + { + "epoch": 0.6834748492547793, + "grad_norm": 0.1703089028596878, + "learning_rate": 4.31653238941128e-06, + "loss": 0.8885, + "step": 94420 + }, + { + "epoch": 0.6835472359153655, + "grad_norm": 0.15150368213653564, + "learning_rate": 4.316460002750694e-06, + "loss": 0.8762, + "step": 94430 + }, + { + "epoch": 0.6836196225759517, + "grad_norm": 0.15605489909648895, + "learning_rate": 4.316387616090107e-06, + "loss": 0.9036, + "step": 94440 + }, + { + "epoch": 0.683692009236538, + "grad_norm": 0.14873306453227997, + "learning_rate": 4.316315229429522e-06, + "loss": 0.8936, + "step": 94450 + }, + { + "epoch": 0.6837643958971241, + "grad_norm": 0.15489499270915985, + "learning_rate": 4.316242842768935e-06, + "loss": 0.8927, + "step": 94460 + }, + { + "epoch": 0.6838367825577103, + "grad_norm": 0.14702056348323822, + "learning_rate": 4.316170456108349e-06, + "loss": 0.8919, + "step": 94470 + }, + { + "epoch": 0.6839091692182965, + "grad_norm": 0.17012947797775269, + "learning_rate": 4.3160980694477625e-06, + "loss": 0.9027, + "step": 94480 + }, + { + "epoch": 0.6839815558788827, + "grad_norm": 0.23859137296676636, + "learning_rate": 4.316025682787177e-06, + "loss": 0.8989, + "step": 94490 + }, + { + "epoch": 0.6840539425394688, + "grad_norm": 0.14270347356796265, + "learning_rate": 4.315953296126591e-06, + "loss": 0.8966, + "step": 94500 + }, + { + "epoch": 0.684126329200055, + "grad_norm": 0.15070217847824097, + "learning_rate": 4.315880909466003e-06, + "loss": 0.8967, + "step": 94510 + }, + { + "epoch": 0.6841987158606412, + "grad_norm": 0.14996741712093353, + "learning_rate": 4.315808522805418e-06, + "loss": 0.8973, + "step": 94520 + }, + { + "epoch": 0.6842711025212274, + "grad_norm": 0.1512274593114853, + "learning_rate": 4.3157361361448314e-06, + "loss": 0.8937, + "step": 94530 + }, + { + "epoch": 0.6843434891818135, + "grad_norm": 0.1495598703622818, + "learning_rate": 4.315663749484245e-06, + "loss": 0.894, + "step": 94540 + }, + { + "epoch": 0.6844158758423997, + "grad_norm": 0.1526460349559784, + "learning_rate": 4.315591362823659e-06, + "loss": 0.9058, + "step": 94550 + }, + { + "epoch": 0.684488262502986, + "grad_norm": 0.22177492082118988, + "learning_rate": 4.315518976163073e-06, + "loss": 0.8909, + "step": 94560 + }, + { + "epoch": 0.6845606491635722, + "grad_norm": 0.1475263386964798, + "learning_rate": 4.315446589502487e-06, + "loss": 0.8894, + "step": 94570 + }, + { + "epoch": 0.6846330358241584, + "grad_norm": 0.15898674726486206, + "learning_rate": 4.3153742028419e-06, + "loss": 0.9013, + "step": 94580 + }, + { + "epoch": 0.6847054224847445, + "grad_norm": 0.160345658659935, + "learning_rate": 4.315301816181314e-06, + "loss": 0.8894, + "step": 94590 + }, + { + "epoch": 0.6847778091453307, + "grad_norm": 0.16944102942943573, + "learning_rate": 4.3152294295207285e-06, + "loss": 0.9001, + "step": 94600 + }, + { + "epoch": 0.6848501958059169, + "grad_norm": 0.17007231712341309, + "learning_rate": 4.315157042860142e-06, + "loss": 0.9066, + "step": 94610 + }, + { + "epoch": 0.6849225824665031, + "grad_norm": 0.14328494668006897, + "learning_rate": 4.315084656199556e-06, + "loss": 0.8942, + "step": 94620 + }, + { + "epoch": 0.6849949691270892, + "grad_norm": 0.17920292913913727, + "learning_rate": 4.315012269538969e-06, + "loss": 0.9016, + "step": 94630 + }, + { + "epoch": 0.6850673557876754, + "grad_norm": 0.15425662696361542, + "learning_rate": 4.314939882878384e-06, + "loss": 0.885, + "step": 94640 + }, + { + "epoch": 0.6851397424482616, + "grad_norm": 0.15390869975090027, + "learning_rate": 4.314867496217797e-06, + "loss": 0.8941, + "step": 94650 + }, + { + "epoch": 0.6852121291088478, + "grad_norm": 0.15865157544612885, + "learning_rate": 4.314795109557211e-06, + "loss": 0.9016, + "step": 94660 + }, + { + "epoch": 0.685284515769434, + "grad_norm": 0.14358533918857574, + "learning_rate": 4.314722722896625e-06, + "loss": 0.8833, + "step": 94670 + }, + { + "epoch": 0.6853569024300202, + "grad_norm": 0.15622635185718536, + "learning_rate": 4.314650336236039e-06, + "loss": 0.8932, + "step": 94680 + }, + { + "epoch": 0.6854292890906064, + "grad_norm": 0.1485530436038971, + "learning_rate": 4.314577949575453e-06, + "loss": 0.9085, + "step": 94690 + }, + { + "epoch": 0.6855016757511926, + "grad_norm": 0.1571887731552124, + "learning_rate": 4.314505562914866e-06, + "loss": 0.9022, + "step": 94700 + }, + { + "epoch": 0.6855740624117788, + "grad_norm": 0.15338723361492157, + "learning_rate": 4.31443317625428e-06, + "loss": 0.8992, + "step": 94710 + }, + { + "epoch": 0.6856464490723649, + "grad_norm": 0.14642809331417084, + "learning_rate": 4.314360789593694e-06, + "loss": 0.8907, + "step": 94720 + }, + { + "epoch": 0.6857188357329511, + "grad_norm": 0.1597226858139038, + "learning_rate": 4.314288402933108e-06, + "loss": 0.9018, + "step": 94730 + }, + { + "epoch": 0.6857912223935373, + "grad_norm": 0.16934312880039215, + "learning_rate": 4.314216016272522e-06, + "loss": 0.8962, + "step": 94740 + }, + { + "epoch": 0.6858636090541235, + "grad_norm": 0.16490796208381653, + "learning_rate": 4.314143629611935e-06, + "loss": 0.8977, + "step": 94750 + }, + { + "epoch": 0.6859359957147096, + "grad_norm": 0.1624918133020401, + "learning_rate": 4.314071242951349e-06, + "loss": 0.8968, + "step": 94760 + }, + { + "epoch": 0.6860083823752959, + "grad_norm": 0.16102252900600433, + "learning_rate": 4.313998856290763e-06, + "loss": 0.8991, + "step": 94770 + }, + { + "epoch": 0.6860807690358821, + "grad_norm": 0.15310189127922058, + "learning_rate": 4.313926469630177e-06, + "loss": 0.894, + "step": 94780 + }, + { + "epoch": 0.6861531556964683, + "grad_norm": 0.15352487564086914, + "learning_rate": 4.3138540829695906e-06, + "loss": 0.8779, + "step": 94790 + }, + { + "epoch": 0.6862255423570545, + "grad_norm": 0.15823419392108917, + "learning_rate": 4.313781696309004e-06, + "loss": 0.906, + "step": 94800 + }, + { + "epoch": 0.6862979290176406, + "grad_norm": 0.14976033568382263, + "learning_rate": 4.313709309648419e-06, + "loss": 0.8872, + "step": 94810 + }, + { + "epoch": 0.6863703156782268, + "grad_norm": 0.1584375500679016, + "learning_rate": 4.313636922987832e-06, + "loss": 0.8847, + "step": 94820 + }, + { + "epoch": 0.686442702338813, + "grad_norm": 0.16619914770126343, + "learning_rate": 4.313564536327246e-06, + "loss": 0.8942, + "step": 94830 + }, + { + "epoch": 0.6865150889993992, + "grad_norm": 0.16585594415664673, + "learning_rate": 4.3134921496666595e-06, + "loss": 0.8958, + "step": 94840 + }, + { + "epoch": 0.6865874756599853, + "grad_norm": 0.1556151807308197, + "learning_rate": 4.313419763006074e-06, + "loss": 0.9076, + "step": 94850 + }, + { + "epoch": 0.6866598623205715, + "grad_norm": 0.16103480756282806, + "learning_rate": 4.3133473763454876e-06, + "loss": 0.8927, + "step": 94860 + }, + { + "epoch": 0.6867322489811577, + "grad_norm": 0.15770234167575836, + "learning_rate": 4.313274989684901e-06, + "loss": 0.8901, + "step": 94870 + }, + { + "epoch": 0.686804635641744, + "grad_norm": 0.15498584508895874, + "learning_rate": 4.313202603024315e-06, + "loss": 0.9026, + "step": 94880 + }, + { + "epoch": 0.6868770223023302, + "grad_norm": 0.15212376415729523, + "learning_rate": 4.313130216363729e-06, + "loss": 0.8837, + "step": 94890 + }, + { + "epoch": 0.6869494089629163, + "grad_norm": 0.16598445177078247, + "learning_rate": 4.313057829703143e-06, + "loss": 0.8998, + "step": 94900 + }, + { + "epoch": 0.6870217956235025, + "grad_norm": 0.15342400968074799, + "learning_rate": 4.3129854430425565e-06, + "loss": 0.8858, + "step": 94910 + }, + { + "epoch": 0.6870941822840887, + "grad_norm": 0.1750396341085434, + "learning_rate": 4.31291305638197e-06, + "loss": 0.8823, + "step": 94920 + }, + { + "epoch": 0.6871665689446749, + "grad_norm": 0.14746147394180298, + "learning_rate": 4.3128406697213846e-06, + "loss": 0.8892, + "step": 94930 + }, + { + "epoch": 0.687238955605261, + "grad_norm": 0.16607217490673065, + "learning_rate": 4.312768283060798e-06, + "loss": 0.9003, + "step": 94940 + }, + { + "epoch": 0.6873113422658472, + "grad_norm": 0.15274251997470856, + "learning_rate": 4.312695896400212e-06, + "loss": 0.8967, + "step": 94950 + }, + { + "epoch": 0.6873837289264334, + "grad_norm": 0.15561649203300476, + "learning_rate": 4.312623509739625e-06, + "loss": 0.8926, + "step": 94960 + }, + { + "epoch": 0.6874561155870196, + "grad_norm": 0.1617891639471054, + "learning_rate": 4.31255112307904e-06, + "loss": 0.8919, + "step": 94970 + }, + { + "epoch": 0.6875285022476059, + "grad_norm": 0.15291917324066162, + "learning_rate": 4.3124787364184535e-06, + "loss": 0.8961, + "step": 94980 + }, + { + "epoch": 0.687600888908192, + "grad_norm": 0.15395525097846985, + "learning_rate": 4.312406349757867e-06, + "loss": 0.9067, + "step": 94990 + }, + { + "epoch": 0.6876732755687782, + "grad_norm": 0.15367929637432098, + "learning_rate": 4.312333963097281e-06, + "loss": 0.8899, + "step": 95000 + }, + { + "epoch": 0.6877456622293644, + "grad_norm": 0.16362635791301727, + "learning_rate": 4.312261576436695e-06, + "loss": 0.8971, + "step": 95010 + }, + { + "epoch": 0.6878180488899506, + "grad_norm": 0.1670594960451126, + "learning_rate": 4.312189189776109e-06, + "loss": 0.8869, + "step": 95020 + }, + { + "epoch": 0.6878904355505367, + "grad_norm": 0.161848783493042, + "learning_rate": 4.312116803115522e-06, + "loss": 0.8845, + "step": 95030 + }, + { + "epoch": 0.6879628222111229, + "grad_norm": 0.16515681147575378, + "learning_rate": 4.312044416454936e-06, + "loss": 0.8909, + "step": 95040 + }, + { + "epoch": 0.6880352088717091, + "grad_norm": 0.14597424864768982, + "learning_rate": 4.31197202979435e-06, + "loss": 0.896, + "step": 95050 + }, + { + "epoch": 0.6881075955322953, + "grad_norm": 0.1595359891653061, + "learning_rate": 4.311899643133763e-06, + "loss": 0.8876, + "step": 95060 + }, + { + "epoch": 0.6881799821928815, + "grad_norm": 0.1476903259754181, + "learning_rate": 4.311827256473177e-06, + "loss": 0.8913, + "step": 95070 + }, + { + "epoch": 0.6882523688534676, + "grad_norm": 0.18288491666316986, + "learning_rate": 4.311754869812591e-06, + "loss": 0.9013, + "step": 95080 + }, + { + "epoch": 0.6883247555140539, + "grad_norm": 0.16732849180698395, + "learning_rate": 4.311682483152005e-06, + "loss": 0.8962, + "step": 95090 + }, + { + "epoch": 0.6883971421746401, + "grad_norm": 0.15418116748332977, + "learning_rate": 4.311610096491419e-06, + "loss": 0.8913, + "step": 95100 + }, + { + "epoch": 0.6884695288352263, + "grad_norm": 0.15506073832511902, + "learning_rate": 4.311537709830832e-06, + "loss": 0.9081, + "step": 95110 + }, + { + "epoch": 0.6885419154958125, + "grad_norm": 0.15247192978858948, + "learning_rate": 4.311465323170247e-06, + "loss": 0.9026, + "step": 95120 + }, + { + "epoch": 0.6886143021563986, + "grad_norm": 0.15282821655273438, + "learning_rate": 4.31139293650966e-06, + "loss": 0.8913, + "step": 95130 + }, + { + "epoch": 0.6886866888169848, + "grad_norm": 0.14951321482658386, + "learning_rate": 4.311320549849074e-06, + "loss": 0.8937, + "step": 95140 + }, + { + "epoch": 0.688759075477571, + "grad_norm": 0.15877053141593933, + "learning_rate": 4.3112481631884875e-06, + "loss": 0.889, + "step": 95150 + }, + { + "epoch": 0.6888314621381572, + "grad_norm": 0.15322931110858917, + "learning_rate": 4.311175776527902e-06, + "loss": 0.8997, + "step": 95160 + }, + { + "epoch": 0.6889038487987433, + "grad_norm": 0.16098260879516602, + "learning_rate": 4.311103389867316e-06, + "loss": 0.8823, + "step": 95170 + }, + { + "epoch": 0.6889762354593295, + "grad_norm": 0.16160458326339722, + "learning_rate": 4.311031003206729e-06, + "loss": 0.8972, + "step": 95180 + }, + { + "epoch": 0.6890486221199157, + "grad_norm": 0.16651996970176697, + "learning_rate": 4.310958616546143e-06, + "loss": 0.8879, + "step": 95190 + }, + { + "epoch": 0.689121008780502, + "grad_norm": 0.1751406192779541, + "learning_rate": 4.310886229885557e-06, + "loss": 0.9041, + "step": 95200 + }, + { + "epoch": 0.6891933954410882, + "grad_norm": 0.1623631864786148, + "learning_rate": 4.310813843224971e-06, + "loss": 0.8999, + "step": 95210 + }, + { + "epoch": 0.6892657821016743, + "grad_norm": 0.1833324134349823, + "learning_rate": 4.3107414565643845e-06, + "loss": 0.8994, + "step": 95220 + }, + { + "epoch": 0.6893381687622605, + "grad_norm": 0.1521463841199875, + "learning_rate": 4.310669069903798e-06, + "loss": 0.9037, + "step": 95230 + }, + { + "epoch": 0.6894105554228467, + "grad_norm": 0.16113896667957306, + "learning_rate": 4.310596683243213e-06, + "loss": 0.8856, + "step": 95240 + }, + { + "epoch": 0.6894829420834329, + "grad_norm": 0.1714179962873459, + "learning_rate": 4.310524296582626e-06, + "loss": 0.8825, + "step": 95250 + }, + { + "epoch": 0.689555328744019, + "grad_norm": 0.16432242095470428, + "learning_rate": 4.31045190992204e-06, + "loss": 0.8871, + "step": 95260 + }, + { + "epoch": 0.6896277154046052, + "grad_norm": 0.16391827166080475, + "learning_rate": 4.3103795232614534e-06, + "loss": 0.8978, + "step": 95270 + }, + { + "epoch": 0.6897001020651914, + "grad_norm": 0.1498943716287613, + "learning_rate": 4.310307136600868e-06, + "loss": 0.8863, + "step": 95280 + }, + { + "epoch": 0.6897724887257776, + "grad_norm": 0.1618964523077011, + "learning_rate": 4.3102347499402815e-06, + "loss": 0.8955, + "step": 95290 + }, + { + "epoch": 0.6898448753863639, + "grad_norm": 0.1474188268184662, + "learning_rate": 4.310162363279695e-06, + "loss": 0.8935, + "step": 95300 + }, + { + "epoch": 0.68991726204695, + "grad_norm": 0.1580621600151062, + "learning_rate": 4.310089976619109e-06, + "loss": 0.8884, + "step": 95310 + }, + { + "epoch": 0.6899896487075362, + "grad_norm": 0.16119031608104706, + "learning_rate": 4.310017589958523e-06, + "loss": 0.8994, + "step": 95320 + }, + { + "epoch": 0.6900620353681224, + "grad_norm": 0.19750839471817017, + "learning_rate": 4.309945203297937e-06, + "loss": 0.8847, + "step": 95330 + }, + { + "epoch": 0.6901344220287086, + "grad_norm": 0.15879863500595093, + "learning_rate": 4.3098728166373505e-06, + "loss": 0.8885, + "step": 95340 + }, + { + "epoch": 0.6902068086892947, + "grad_norm": 0.14561788737773895, + "learning_rate": 4.309800429976764e-06, + "loss": 0.8881, + "step": 95350 + }, + { + "epoch": 0.6902791953498809, + "grad_norm": 0.16508562862873077, + "learning_rate": 4.3097280433161785e-06, + "loss": 0.8895, + "step": 95360 + }, + { + "epoch": 0.6903515820104671, + "grad_norm": 0.14818058907985687, + "learning_rate": 4.309655656655592e-06, + "loss": 0.8812, + "step": 95370 + }, + { + "epoch": 0.6904239686710533, + "grad_norm": 0.15891733765602112, + "learning_rate": 4.309583269995006e-06, + "loss": 0.8962, + "step": 95380 + }, + { + "epoch": 0.6904963553316394, + "grad_norm": 0.15448154509067535, + "learning_rate": 4.309510883334419e-06, + "loss": 0.8874, + "step": 95390 + }, + { + "epoch": 0.6905687419922256, + "grad_norm": 0.15475550293922424, + "learning_rate": 4.309438496673833e-06, + "loss": 0.8954, + "step": 95400 + }, + { + "epoch": 0.6906411286528119, + "grad_norm": 0.1788392812013626, + "learning_rate": 4.3093661100132475e-06, + "loss": 0.8905, + "step": 95410 + }, + { + "epoch": 0.6907135153133981, + "grad_norm": 0.17869041860103607, + "learning_rate": 4.309293723352661e-06, + "loss": 0.8919, + "step": 95420 + }, + { + "epoch": 0.6907859019739843, + "grad_norm": 0.1565256416797638, + "learning_rate": 4.309221336692075e-06, + "loss": 0.8814, + "step": 95430 + }, + { + "epoch": 0.6908582886345704, + "grad_norm": 0.30670905113220215, + "learning_rate": 4.309148950031488e-06, + "loss": 0.8798, + "step": 95440 + }, + { + "epoch": 0.6909306752951566, + "grad_norm": 0.1644957959651947, + "learning_rate": 4.309076563370903e-06, + "loss": 0.8952, + "step": 95450 + }, + { + "epoch": 0.6910030619557428, + "grad_norm": 0.17856526374816895, + "learning_rate": 4.309004176710316e-06, + "loss": 0.9032, + "step": 95460 + }, + { + "epoch": 0.691075448616329, + "grad_norm": 0.1568627655506134, + "learning_rate": 4.30893179004973e-06, + "loss": 0.893, + "step": 95470 + }, + { + "epoch": 0.6911478352769151, + "grad_norm": 0.2152179330587387, + "learning_rate": 4.308859403389144e-06, + "loss": 0.8926, + "step": 95480 + }, + { + "epoch": 0.6912202219375013, + "grad_norm": 0.16184626519680023, + "learning_rate": 4.308787016728558e-06, + "loss": 0.8783, + "step": 95490 + }, + { + "epoch": 0.6912926085980875, + "grad_norm": 0.1587613821029663, + "learning_rate": 4.308714630067972e-06, + "loss": 0.8855, + "step": 95500 + }, + { + "epoch": 0.6913649952586738, + "grad_norm": 0.18116620182991028, + "learning_rate": 4.308642243407385e-06, + "loss": 0.8977, + "step": 95510 + }, + { + "epoch": 0.69143738191926, + "grad_norm": 0.14871811866760254, + "learning_rate": 4.308569856746799e-06, + "loss": 0.8962, + "step": 95520 + }, + { + "epoch": 0.6915097685798461, + "grad_norm": 0.14631856977939606, + "learning_rate": 4.308497470086213e-06, + "loss": 0.8987, + "step": 95530 + }, + { + "epoch": 0.6915821552404323, + "grad_norm": 0.16045540571212769, + "learning_rate": 4.308425083425627e-06, + "loss": 0.8954, + "step": 95540 + }, + { + "epoch": 0.6916545419010185, + "grad_norm": 0.22909776866436005, + "learning_rate": 4.308352696765041e-06, + "loss": 0.9038, + "step": 95550 + }, + { + "epoch": 0.6917269285616047, + "grad_norm": 0.1496800035238266, + "learning_rate": 4.308280310104454e-06, + "loss": 0.8881, + "step": 95560 + }, + { + "epoch": 0.6917993152221908, + "grad_norm": 0.16365250945091248, + "learning_rate": 4.308207923443868e-06, + "loss": 0.8915, + "step": 95570 + }, + { + "epoch": 0.691871701882777, + "grad_norm": 0.16174180805683136, + "learning_rate": 4.3081355367832815e-06, + "loss": 0.9061, + "step": 95580 + }, + { + "epoch": 0.6919440885433632, + "grad_norm": 0.1649826467037201, + "learning_rate": 4.308063150122695e-06, + "loss": 0.8954, + "step": 95590 + }, + { + "epoch": 0.6920164752039494, + "grad_norm": 0.17469272017478943, + "learning_rate": 4.3079907634621096e-06, + "loss": 0.887, + "step": 95600 + }, + { + "epoch": 0.6920888618645356, + "grad_norm": 0.1564358025789261, + "learning_rate": 4.307918376801523e-06, + "loss": 0.8889, + "step": 95610 + }, + { + "epoch": 0.6921612485251218, + "grad_norm": 0.1583586186170578, + "learning_rate": 4.307845990140937e-06, + "loss": 0.8859, + "step": 95620 + }, + { + "epoch": 0.692233635185708, + "grad_norm": 0.1620096117258072, + "learning_rate": 4.30777360348035e-06, + "loss": 0.8862, + "step": 95630 + }, + { + "epoch": 0.6923060218462942, + "grad_norm": 0.16032646596431732, + "learning_rate": 4.307701216819765e-06, + "loss": 0.8894, + "step": 95640 + }, + { + "epoch": 0.6923784085068804, + "grad_norm": 0.21608605980873108, + "learning_rate": 4.3076288301591785e-06, + "loss": 0.8925, + "step": 95650 + }, + { + "epoch": 0.6924507951674665, + "grad_norm": 0.14863435924053192, + "learning_rate": 4.307556443498592e-06, + "loss": 0.8989, + "step": 95660 + }, + { + "epoch": 0.6925231818280527, + "grad_norm": 0.16338156163692474, + "learning_rate": 4.307484056838006e-06, + "loss": 0.8888, + "step": 95670 + }, + { + "epoch": 0.6925955684886389, + "grad_norm": 0.15900051593780518, + "learning_rate": 4.30741167017742e-06, + "loss": 0.8827, + "step": 95680 + }, + { + "epoch": 0.6926679551492251, + "grad_norm": 0.16256722807884216, + "learning_rate": 4.307339283516834e-06, + "loss": 0.8878, + "step": 95690 + }, + { + "epoch": 0.6927403418098113, + "grad_norm": 0.15759159624576569, + "learning_rate": 4.307266896856247e-06, + "loss": 0.8929, + "step": 95700 + }, + { + "epoch": 0.6928127284703974, + "grad_norm": 0.15648800134658813, + "learning_rate": 4.307194510195661e-06, + "loss": 0.8818, + "step": 95710 + }, + { + "epoch": 0.6928851151309836, + "grad_norm": 0.1834617257118225, + "learning_rate": 4.3071221235350755e-06, + "loss": 0.8918, + "step": 95720 + }, + { + "epoch": 0.6929575017915699, + "grad_norm": 0.162217915058136, + "learning_rate": 4.307049736874489e-06, + "loss": 0.9029, + "step": 95730 + }, + { + "epoch": 0.6930298884521561, + "grad_norm": 0.15800444781780243, + "learning_rate": 4.306977350213903e-06, + "loss": 0.8949, + "step": 95740 + }, + { + "epoch": 0.6931022751127422, + "grad_norm": 0.16938433051109314, + "learning_rate": 4.306904963553316e-06, + "loss": 0.8983, + "step": 95750 + }, + { + "epoch": 0.6931746617733284, + "grad_norm": 0.16872501373291016, + "learning_rate": 4.306832576892731e-06, + "loss": 0.8944, + "step": 95760 + }, + { + "epoch": 0.6932470484339146, + "grad_norm": 0.1726616472005844, + "learning_rate": 4.306760190232144e-06, + "loss": 0.8868, + "step": 95770 + }, + { + "epoch": 0.6933194350945008, + "grad_norm": 0.1653449535369873, + "learning_rate": 4.306687803571558e-06, + "loss": 0.8915, + "step": 95780 + }, + { + "epoch": 0.693391821755087, + "grad_norm": 0.15586352348327637, + "learning_rate": 4.306615416910972e-06, + "loss": 0.8864, + "step": 95790 + }, + { + "epoch": 0.6934642084156731, + "grad_norm": 0.16180157661437988, + "learning_rate": 4.306543030250386e-06, + "loss": 0.8912, + "step": 95800 + }, + { + "epoch": 0.6935365950762593, + "grad_norm": 0.16136401891708374, + "learning_rate": 4.3064706435898e-06, + "loss": 0.8899, + "step": 95810 + }, + { + "epoch": 0.6936089817368455, + "grad_norm": 0.1651676446199417, + "learning_rate": 4.306398256929213e-06, + "loss": 0.8796, + "step": 95820 + }, + { + "epoch": 0.6936813683974318, + "grad_norm": 0.16563965380191803, + "learning_rate": 4.306325870268627e-06, + "loss": 0.8822, + "step": 95830 + }, + { + "epoch": 0.693753755058018, + "grad_norm": 0.15649251639842987, + "learning_rate": 4.3062534836080414e-06, + "loss": 0.8911, + "step": 95840 + }, + { + "epoch": 0.6938261417186041, + "grad_norm": 0.1557883769273758, + "learning_rate": 4.306181096947455e-06, + "loss": 0.9081, + "step": 95850 + }, + { + "epoch": 0.6938985283791903, + "grad_norm": 0.18621514737606049, + "learning_rate": 4.306108710286869e-06, + "loss": 0.8983, + "step": 95860 + }, + { + "epoch": 0.6939709150397765, + "grad_norm": 0.16845759749412537, + "learning_rate": 4.306036323626282e-06, + "loss": 0.8874, + "step": 95870 + }, + { + "epoch": 0.6940433017003627, + "grad_norm": 0.16313624382019043, + "learning_rate": 4.305963936965697e-06, + "loss": 0.8855, + "step": 95880 + }, + { + "epoch": 0.6941156883609488, + "grad_norm": 0.160096675157547, + "learning_rate": 4.30589155030511e-06, + "loss": 0.8966, + "step": 95890 + }, + { + "epoch": 0.694188075021535, + "grad_norm": 0.15571869909763336, + "learning_rate": 4.305819163644524e-06, + "loss": 0.8865, + "step": 95900 + }, + { + "epoch": 0.6942604616821212, + "grad_norm": 0.14915607869625092, + "learning_rate": 4.305746776983938e-06, + "loss": 0.8954, + "step": 95910 + }, + { + "epoch": 0.6943328483427074, + "grad_norm": 0.15486833453178406, + "learning_rate": 4.305674390323352e-06, + "loss": 0.893, + "step": 95920 + }, + { + "epoch": 0.6944052350032935, + "grad_norm": 0.16171999275684357, + "learning_rate": 4.305602003662766e-06, + "loss": 0.8914, + "step": 95930 + }, + { + "epoch": 0.6944776216638798, + "grad_norm": 0.16727794706821442, + "learning_rate": 4.305529617002179e-06, + "loss": 0.8979, + "step": 95940 + }, + { + "epoch": 0.694550008324466, + "grad_norm": 0.19044362008571625, + "learning_rate": 4.305457230341593e-06, + "loss": 0.8751, + "step": 95950 + }, + { + "epoch": 0.6946223949850522, + "grad_norm": 0.16111010313034058, + "learning_rate": 4.305384843681007e-06, + "loss": 0.9069, + "step": 95960 + }, + { + "epoch": 0.6946947816456384, + "grad_norm": 0.20112472772598267, + "learning_rate": 4.305312457020421e-06, + "loss": 0.8972, + "step": 95970 + }, + { + "epoch": 0.6947671683062245, + "grad_norm": 0.19165970385074615, + "learning_rate": 4.305240070359835e-06, + "loss": 0.9029, + "step": 95980 + }, + { + "epoch": 0.6948395549668107, + "grad_norm": 0.1580514758825302, + "learning_rate": 4.305167683699248e-06, + "loss": 0.8961, + "step": 95990 + }, + { + "epoch": 0.6949119416273969, + "grad_norm": 0.1498570591211319, + "learning_rate": 4.305095297038662e-06, + "loss": 0.8919, + "step": 96000 + }, + { + "epoch": 0.6949843282879831, + "grad_norm": 0.15419946610927582, + "learning_rate": 4.305022910378076e-06, + "loss": 0.888, + "step": 96010 + }, + { + "epoch": 0.6950567149485692, + "grad_norm": 0.17555716633796692, + "learning_rate": 4.30495052371749e-06, + "loss": 0.8925, + "step": 96020 + }, + { + "epoch": 0.6951291016091554, + "grad_norm": 0.15583516657352448, + "learning_rate": 4.3048781370569035e-06, + "loss": 0.8886, + "step": 96030 + }, + { + "epoch": 0.6952014882697416, + "grad_norm": 0.15992936491966248, + "learning_rate": 4.304805750396317e-06, + "loss": 0.9023, + "step": 96040 + }, + { + "epoch": 0.6952738749303279, + "grad_norm": 0.16176897287368774, + "learning_rate": 4.304733363735732e-06, + "loss": 0.8834, + "step": 96050 + }, + { + "epoch": 0.6953462615909141, + "grad_norm": 0.14862500131130219, + "learning_rate": 4.304660977075145e-06, + "loss": 0.9043, + "step": 96060 + }, + { + "epoch": 0.6954186482515002, + "grad_norm": 0.16168230772018433, + "learning_rate": 4.304588590414559e-06, + "loss": 0.9008, + "step": 96070 + }, + { + "epoch": 0.6954910349120864, + "grad_norm": 0.15463055670261383, + "learning_rate": 4.3045162037539725e-06, + "loss": 0.8975, + "step": 96080 + }, + { + "epoch": 0.6955634215726726, + "grad_norm": 0.15740707516670227, + "learning_rate": 4.304443817093387e-06, + "loss": 0.8999, + "step": 96090 + }, + { + "epoch": 0.6956358082332588, + "grad_norm": 0.1548217087984085, + "learning_rate": 4.3043714304328e-06, + "loss": 0.8997, + "step": 96100 + }, + { + "epoch": 0.695708194893845, + "grad_norm": 0.17392714321613312, + "learning_rate": 4.304299043772214e-06, + "loss": 0.8889, + "step": 96110 + }, + { + "epoch": 0.6957805815544311, + "grad_norm": 0.16982245445251465, + "learning_rate": 4.304226657111628e-06, + "loss": 0.8908, + "step": 96120 + }, + { + "epoch": 0.6958529682150173, + "grad_norm": 0.33574557304382324, + "learning_rate": 4.304154270451041e-06, + "loss": 0.8943, + "step": 96130 + }, + { + "epoch": 0.6959253548756035, + "grad_norm": 0.1604243814945221, + "learning_rate": 4.304081883790455e-06, + "loss": 0.891, + "step": 96140 + }, + { + "epoch": 0.6959977415361898, + "grad_norm": 0.1695748269557953, + "learning_rate": 4.3040094971298695e-06, + "loss": 0.9052, + "step": 96150 + }, + { + "epoch": 0.6960701281967759, + "grad_norm": 0.15508553385734558, + "learning_rate": 4.303937110469283e-06, + "loss": 0.8917, + "step": 96160 + }, + { + "epoch": 0.6961425148573621, + "grad_norm": 0.16269119083881378, + "learning_rate": 4.303864723808697e-06, + "loss": 0.891, + "step": 96170 + }, + { + "epoch": 0.6962149015179483, + "grad_norm": 0.15743182599544525, + "learning_rate": 4.30379233714811e-06, + "loss": 0.8944, + "step": 96180 + }, + { + "epoch": 0.6962872881785345, + "grad_norm": 0.15545742213726044, + "learning_rate": 4.303719950487524e-06, + "loss": 0.8862, + "step": 96190 + }, + { + "epoch": 0.6963596748391206, + "grad_norm": 0.2056836634874344, + "learning_rate": 4.303647563826938e-06, + "loss": 0.8911, + "step": 96200 + }, + { + "epoch": 0.6964320614997068, + "grad_norm": 0.15505336225032806, + "learning_rate": 4.303575177166352e-06, + "loss": 0.8959, + "step": 96210 + }, + { + "epoch": 0.696504448160293, + "grad_norm": 0.155269056558609, + "learning_rate": 4.303502790505766e-06, + "loss": 0.8989, + "step": 96220 + }, + { + "epoch": 0.6965768348208792, + "grad_norm": 0.16013725101947784, + "learning_rate": 4.303430403845179e-06, + "loss": 0.8941, + "step": 96230 + }, + { + "epoch": 0.6966492214814654, + "grad_norm": 0.15824046730995178, + "learning_rate": 4.303358017184594e-06, + "loss": 0.8998, + "step": 96240 + }, + { + "epoch": 0.6967216081420515, + "grad_norm": 0.17412789165973663, + "learning_rate": 4.303285630524007e-06, + "loss": 0.9062, + "step": 96250 + }, + { + "epoch": 0.6967939948026378, + "grad_norm": 0.22454731166362762, + "learning_rate": 4.303213243863421e-06, + "loss": 0.8884, + "step": 96260 + }, + { + "epoch": 0.696866381463224, + "grad_norm": 0.16182060539722443, + "learning_rate": 4.3031408572028345e-06, + "loss": 0.8888, + "step": 96270 + }, + { + "epoch": 0.6969387681238102, + "grad_norm": 0.17886275053024292, + "learning_rate": 4.303068470542249e-06, + "loss": 0.8823, + "step": 96280 + }, + { + "epoch": 0.6970111547843963, + "grad_norm": 0.1985713690519333, + "learning_rate": 4.302996083881663e-06, + "loss": 0.8882, + "step": 96290 + }, + { + "epoch": 0.6970835414449825, + "grad_norm": 0.1513681411743164, + "learning_rate": 4.302923697221076e-06, + "loss": 0.9012, + "step": 96300 + }, + { + "epoch": 0.6971559281055687, + "grad_norm": 0.15492072701454163, + "learning_rate": 4.30285131056049e-06, + "loss": 0.899, + "step": 96310 + }, + { + "epoch": 0.6972283147661549, + "grad_norm": 0.15350612998008728, + "learning_rate": 4.302778923899904e-06, + "loss": 0.9022, + "step": 96320 + }, + { + "epoch": 0.697300701426741, + "grad_norm": 0.15936022996902466, + "learning_rate": 4.302706537239318e-06, + "loss": 0.8985, + "step": 96330 + }, + { + "epoch": 0.6973730880873272, + "grad_norm": 0.15187810361385345, + "learning_rate": 4.3026341505787316e-06, + "loss": 0.8867, + "step": 96340 + }, + { + "epoch": 0.6974454747479134, + "grad_norm": 0.16198959946632385, + "learning_rate": 4.302561763918145e-06, + "loss": 0.903, + "step": 96350 + }, + { + "epoch": 0.6975178614084997, + "grad_norm": 0.1528894305229187, + "learning_rate": 4.30248937725756e-06, + "loss": 0.8793, + "step": 96360 + }, + { + "epoch": 0.6975902480690859, + "grad_norm": 0.17313164472579956, + "learning_rate": 4.302416990596973e-06, + "loss": 0.8992, + "step": 96370 + }, + { + "epoch": 0.697662634729672, + "grad_norm": 0.15843333303928375, + "learning_rate": 4.302344603936387e-06, + "loss": 0.8963, + "step": 96380 + }, + { + "epoch": 0.6977350213902582, + "grad_norm": 0.1584440916776657, + "learning_rate": 4.3022722172758005e-06, + "loss": 0.9057, + "step": 96390 + }, + { + "epoch": 0.6978074080508444, + "grad_norm": 0.14555677771568298, + "learning_rate": 4.302199830615215e-06, + "loss": 0.8841, + "step": 96400 + }, + { + "epoch": 0.6978797947114306, + "grad_norm": 0.22453060746192932, + "learning_rate": 4.3021274439546286e-06, + "loss": 0.8912, + "step": 96410 + }, + { + "epoch": 0.6979521813720168, + "grad_norm": 0.16137515008449554, + "learning_rate": 4.302055057294042e-06, + "loss": 0.8757, + "step": 96420 + }, + { + "epoch": 0.6980245680326029, + "grad_norm": 0.2278577834367752, + "learning_rate": 4.301982670633456e-06, + "loss": 0.8918, + "step": 96430 + }, + { + "epoch": 0.6980969546931891, + "grad_norm": 0.1604139506816864, + "learning_rate": 4.30191028397287e-06, + "loss": 0.8988, + "step": 96440 + }, + { + "epoch": 0.6981693413537753, + "grad_norm": 0.1709796041250229, + "learning_rate": 4.301837897312284e-06, + "loss": 0.8999, + "step": 96450 + }, + { + "epoch": 0.6982417280143615, + "grad_norm": 0.16104479134082794, + "learning_rate": 4.3017655106516975e-06, + "loss": 0.8961, + "step": 96460 + }, + { + "epoch": 0.6983141146749478, + "grad_norm": 0.1653842329978943, + "learning_rate": 4.301693123991111e-06, + "loss": 0.8805, + "step": 96470 + }, + { + "epoch": 0.6983865013355339, + "grad_norm": 0.16090551018714905, + "learning_rate": 4.3016207373305256e-06, + "loss": 0.9097, + "step": 96480 + }, + { + "epoch": 0.6984588879961201, + "grad_norm": 0.3694705367088318, + "learning_rate": 4.301548350669939e-06, + "loss": 0.8856, + "step": 96490 + }, + { + "epoch": 0.6985312746567063, + "grad_norm": 0.1503128856420517, + "learning_rate": 4.301475964009353e-06, + "loss": 0.8856, + "step": 96500 + }, + { + "epoch": 0.6986036613172925, + "grad_norm": 0.16498462855815887, + "learning_rate": 4.301403577348766e-06, + "loss": 0.8975, + "step": 96510 + }, + { + "epoch": 0.6986760479778786, + "grad_norm": 0.1551860123872757, + "learning_rate": 4.301331190688181e-06, + "loss": 0.895, + "step": 96520 + }, + { + "epoch": 0.6987484346384648, + "grad_norm": 0.16694217920303345, + "learning_rate": 4.3012588040275945e-06, + "loss": 0.8932, + "step": 96530 + }, + { + "epoch": 0.698820821299051, + "grad_norm": 0.9841502904891968, + "learning_rate": 4.301186417367008e-06, + "loss": 0.9151, + "step": 96540 + }, + { + "epoch": 0.6988932079596372, + "grad_norm": 0.3107115626335144, + "learning_rate": 4.301114030706422e-06, + "loss": 0.8826, + "step": 96550 + }, + { + "epoch": 0.6989655946202233, + "grad_norm": 0.3002294898033142, + "learning_rate": 4.301041644045836e-06, + "loss": 0.8991, + "step": 96560 + }, + { + "epoch": 0.6990379812808095, + "grad_norm": 0.1559462547302246, + "learning_rate": 4.30096925738525e-06, + "loss": 0.8887, + "step": 96570 + }, + { + "epoch": 0.6991103679413958, + "grad_norm": 0.1544226109981537, + "learning_rate": 4.3008968707246634e-06, + "loss": 0.8921, + "step": 96580 + }, + { + "epoch": 0.699182754601982, + "grad_norm": 0.18448671698570251, + "learning_rate": 4.300824484064077e-06, + "loss": 0.8846, + "step": 96590 + }, + { + "epoch": 0.6992551412625682, + "grad_norm": 0.1595505028963089, + "learning_rate": 4.3007520974034915e-06, + "loss": 0.8977, + "step": 96600 + }, + { + "epoch": 0.6993275279231543, + "grad_norm": 0.15914291143417358, + "learning_rate": 4.300679710742905e-06, + "loss": 0.9054, + "step": 96610 + }, + { + "epoch": 0.6993999145837405, + "grad_norm": 0.14967529475688934, + "learning_rate": 4.300607324082319e-06, + "loss": 0.8893, + "step": 96620 + }, + { + "epoch": 0.6994723012443267, + "grad_norm": 0.1701008379459381, + "learning_rate": 4.300534937421732e-06, + "loss": 0.8854, + "step": 96630 + }, + { + "epoch": 0.6995446879049129, + "grad_norm": 0.15182742476463318, + "learning_rate": 4.300462550761146e-06, + "loss": 0.8878, + "step": 96640 + }, + { + "epoch": 0.699617074565499, + "grad_norm": 0.16047948598861694, + "learning_rate": 4.30039016410056e-06, + "loss": 0.8819, + "step": 96650 + }, + { + "epoch": 0.6996894612260852, + "grad_norm": 0.15404923260211945, + "learning_rate": 4.300317777439973e-06, + "loss": 0.8876, + "step": 96660 + }, + { + "epoch": 0.6997618478866714, + "grad_norm": 0.14895722270011902, + "learning_rate": 4.300245390779388e-06, + "loss": 0.8889, + "step": 96670 + }, + { + "epoch": 0.6998342345472577, + "grad_norm": 0.1480094939470291, + "learning_rate": 4.300173004118801e-06, + "loss": 0.8798, + "step": 96680 + }, + { + "epoch": 0.6999066212078439, + "grad_norm": 0.15281951427459717, + "learning_rate": 4.300100617458215e-06, + "loss": 0.8828, + "step": 96690 + }, + { + "epoch": 0.69997900786843, + "grad_norm": 0.15964744985103607, + "learning_rate": 4.3000282307976285e-06, + "loss": 0.8885, + "step": 96700 + }, + { + "epoch": 0.7000513945290162, + "grad_norm": 0.15603557229042053, + "learning_rate": 4.299955844137043e-06, + "loss": 0.8863, + "step": 96710 + }, + { + "epoch": 0.7001237811896024, + "grad_norm": 0.1674613654613495, + "learning_rate": 4.299883457476457e-06, + "loss": 0.8982, + "step": 96720 + }, + { + "epoch": 0.7001961678501886, + "grad_norm": 0.15703196823596954, + "learning_rate": 4.29981107081587e-06, + "loss": 0.8825, + "step": 96730 + }, + { + "epoch": 0.7002685545107747, + "grad_norm": 0.17176085710525513, + "learning_rate": 4.299738684155284e-06, + "loss": 0.8873, + "step": 96740 + }, + { + "epoch": 0.7003409411713609, + "grad_norm": 0.16086554527282715, + "learning_rate": 4.299666297494698e-06, + "loss": 0.8854, + "step": 96750 + }, + { + "epoch": 0.7004133278319471, + "grad_norm": 0.16075126826763153, + "learning_rate": 4.299593910834112e-06, + "loss": 0.9035, + "step": 96760 + }, + { + "epoch": 0.7004857144925333, + "grad_norm": 0.14747561514377594, + "learning_rate": 4.2995215241735255e-06, + "loss": 0.8975, + "step": 96770 + }, + { + "epoch": 0.7005581011531195, + "grad_norm": 0.16974276304244995, + "learning_rate": 4.299449137512939e-06, + "loss": 0.8975, + "step": 96780 + }, + { + "epoch": 0.7006304878137057, + "grad_norm": 0.16168342530727386, + "learning_rate": 4.299376750852353e-06, + "loss": 0.8864, + "step": 96790 + }, + { + "epoch": 0.7007028744742919, + "grad_norm": 0.1794668585062027, + "learning_rate": 4.299304364191767e-06, + "loss": 0.8973, + "step": 96800 + }, + { + "epoch": 0.7007752611348781, + "grad_norm": 0.16029122471809387, + "learning_rate": 4.299231977531181e-06, + "loss": 0.8895, + "step": 96810 + }, + { + "epoch": 0.7008476477954643, + "grad_norm": 0.18477699160575867, + "learning_rate": 4.2991595908705945e-06, + "loss": 0.8888, + "step": 96820 + }, + { + "epoch": 0.7009200344560504, + "grad_norm": 0.14988726377487183, + "learning_rate": 4.299087204210008e-06, + "loss": 0.8774, + "step": 96830 + }, + { + "epoch": 0.7009924211166366, + "grad_norm": 0.1730933040380478, + "learning_rate": 4.2990148175494225e-06, + "loss": 0.9006, + "step": 96840 + }, + { + "epoch": 0.7010648077772228, + "grad_norm": 0.1729445457458496, + "learning_rate": 4.298942430888836e-06, + "loss": 0.892, + "step": 96850 + }, + { + "epoch": 0.701137194437809, + "grad_norm": 0.15627533197402954, + "learning_rate": 4.29887004422825e-06, + "loss": 0.8931, + "step": 96860 + }, + { + "epoch": 0.7012095810983952, + "grad_norm": 0.16592872142791748, + "learning_rate": 4.298797657567663e-06, + "loss": 0.9052, + "step": 96870 + }, + { + "epoch": 0.7012819677589813, + "grad_norm": 0.15809662640094757, + "learning_rate": 4.298725270907078e-06, + "loss": 0.8885, + "step": 96880 + }, + { + "epoch": 0.7013543544195676, + "grad_norm": 0.1509890854358673, + "learning_rate": 4.2986528842464915e-06, + "loss": 0.8884, + "step": 96890 + }, + { + "epoch": 0.7014267410801538, + "grad_norm": 0.14822304248809814, + "learning_rate": 4.298580497585905e-06, + "loss": 0.8768, + "step": 96900 + }, + { + "epoch": 0.70149912774074, + "grad_norm": 0.15169335901737213, + "learning_rate": 4.298508110925319e-06, + "loss": 0.8876, + "step": 96910 + }, + { + "epoch": 0.7015715144013261, + "grad_norm": 0.16953204572200775, + "learning_rate": 4.298435724264733e-06, + "loss": 0.9045, + "step": 96920 + }, + { + "epoch": 0.7016439010619123, + "grad_norm": 0.16186213493347168, + "learning_rate": 4.298363337604147e-06, + "loss": 0.8963, + "step": 96930 + }, + { + "epoch": 0.7017162877224985, + "grad_norm": 0.16287651658058167, + "learning_rate": 4.29829095094356e-06, + "loss": 0.9003, + "step": 96940 + }, + { + "epoch": 0.7017886743830847, + "grad_norm": 0.17633283138275146, + "learning_rate": 4.298218564282974e-06, + "loss": 0.8685, + "step": 96950 + }, + { + "epoch": 0.7018610610436709, + "grad_norm": 0.22455504536628723, + "learning_rate": 4.2981461776223885e-06, + "loss": 0.9052, + "step": 96960 + }, + { + "epoch": 0.701933447704257, + "grad_norm": 0.14891460537910461, + "learning_rate": 4.298073790961802e-06, + "loss": 0.8836, + "step": 96970 + }, + { + "epoch": 0.7020058343648432, + "grad_norm": 0.1637570559978485, + "learning_rate": 4.298001404301216e-06, + "loss": 0.8994, + "step": 96980 + }, + { + "epoch": 0.7020782210254294, + "grad_norm": 0.1491258293390274, + "learning_rate": 4.297929017640629e-06, + "loss": 0.8972, + "step": 96990 + }, + { + "epoch": 0.7021506076860157, + "grad_norm": 0.15172043442726135, + "learning_rate": 4.297856630980044e-06, + "loss": 0.8946, + "step": 97000 + }, + { + "epoch": 0.7022229943466018, + "grad_norm": 0.1903785616159439, + "learning_rate": 4.297784244319457e-06, + "loss": 0.885, + "step": 97010 + }, + { + "epoch": 0.702295381007188, + "grad_norm": 0.1513489931821823, + "learning_rate": 4.297711857658871e-06, + "loss": 0.8886, + "step": 97020 + }, + { + "epoch": 0.7023677676677742, + "grad_norm": 0.14946502447128296, + "learning_rate": 4.297639470998285e-06, + "loss": 0.8916, + "step": 97030 + }, + { + "epoch": 0.7024401543283604, + "grad_norm": 0.16224405169487, + "learning_rate": 4.297567084337699e-06, + "loss": 0.8992, + "step": 97040 + }, + { + "epoch": 0.7025125409889466, + "grad_norm": 0.17303119599819183, + "learning_rate": 4.297494697677113e-06, + "loss": 0.9058, + "step": 97050 + }, + { + "epoch": 0.7025849276495327, + "grad_norm": 0.16080045700073242, + "learning_rate": 4.297422311016526e-06, + "loss": 0.8884, + "step": 97060 + }, + { + "epoch": 0.7026573143101189, + "grad_norm": 0.1557897925376892, + "learning_rate": 4.29734992435594e-06, + "loss": 0.8916, + "step": 97070 + }, + { + "epoch": 0.7027297009707051, + "grad_norm": 0.14745230972766876, + "learning_rate": 4.297277537695354e-06, + "loss": 0.888, + "step": 97080 + }, + { + "epoch": 0.7028020876312913, + "grad_norm": 0.16206741333007812, + "learning_rate": 4.297205151034768e-06, + "loss": 0.889, + "step": 97090 + }, + { + "epoch": 0.7028744742918774, + "grad_norm": 0.23554551601409912, + "learning_rate": 4.297132764374182e-06, + "loss": 0.8866, + "step": 97100 + }, + { + "epoch": 0.7029468609524637, + "grad_norm": 0.15564242005348206, + "learning_rate": 4.297060377713595e-06, + "loss": 0.8908, + "step": 97110 + }, + { + "epoch": 0.7030192476130499, + "grad_norm": 0.15459449589252472, + "learning_rate": 4.29698799105301e-06, + "loss": 0.8898, + "step": 97120 + }, + { + "epoch": 0.7030916342736361, + "grad_norm": 0.1636834740638733, + "learning_rate": 4.296915604392423e-06, + "loss": 0.8917, + "step": 97130 + }, + { + "epoch": 0.7031640209342223, + "grad_norm": 0.1554258018732071, + "learning_rate": 4.296843217731837e-06, + "loss": 0.887, + "step": 97140 + }, + { + "epoch": 0.7032364075948084, + "grad_norm": 0.1681572049856186, + "learning_rate": 4.2967708310712506e-06, + "loss": 0.8967, + "step": 97150 + }, + { + "epoch": 0.7033087942553946, + "grad_norm": 0.15658222138881683, + "learning_rate": 4.296698444410664e-06, + "loss": 0.8905, + "step": 97160 + }, + { + "epoch": 0.7033811809159808, + "grad_norm": 0.159566730260849, + "learning_rate": 4.296626057750078e-06, + "loss": 0.8944, + "step": 97170 + }, + { + "epoch": 0.703453567576567, + "grad_norm": 0.16223838925361633, + "learning_rate": 4.296553671089491e-06, + "loss": 0.8846, + "step": 97180 + }, + { + "epoch": 0.7035259542371531, + "grad_norm": 0.16831691563129425, + "learning_rate": 4.296481284428906e-06, + "loss": 0.8883, + "step": 97190 + }, + { + "epoch": 0.7035983408977393, + "grad_norm": 0.17067785561084747, + "learning_rate": 4.2964088977683195e-06, + "loss": 0.8852, + "step": 97200 + }, + { + "epoch": 0.7036707275583256, + "grad_norm": 0.1619025319814682, + "learning_rate": 4.296336511107733e-06, + "loss": 0.8959, + "step": 97210 + }, + { + "epoch": 0.7037431142189118, + "grad_norm": 0.17006492614746094, + "learning_rate": 4.296264124447147e-06, + "loss": 0.8926, + "step": 97220 + }, + { + "epoch": 0.703815500879498, + "grad_norm": 0.16066719591617584, + "learning_rate": 4.296191737786561e-06, + "loss": 0.8944, + "step": 97230 + }, + { + "epoch": 0.7038878875400841, + "grad_norm": 0.16666296124458313, + "learning_rate": 4.296119351125975e-06, + "loss": 0.8865, + "step": 97240 + }, + { + "epoch": 0.7039602742006703, + "grad_norm": 0.152059406042099, + "learning_rate": 4.296046964465388e-06, + "loss": 0.8949, + "step": 97250 + }, + { + "epoch": 0.7040326608612565, + "grad_norm": 0.15612152218818665, + "learning_rate": 4.295974577804802e-06, + "loss": 0.9075, + "step": 97260 + }, + { + "epoch": 0.7041050475218427, + "grad_norm": 0.15522941946983337, + "learning_rate": 4.2959021911442165e-06, + "loss": 0.8996, + "step": 97270 + }, + { + "epoch": 0.7041774341824288, + "grad_norm": 0.1641325205564499, + "learning_rate": 4.29582980448363e-06, + "loss": 0.8991, + "step": 97280 + }, + { + "epoch": 0.704249820843015, + "grad_norm": 0.1451716423034668, + "learning_rate": 4.295757417823044e-06, + "loss": 0.9109, + "step": 97290 + }, + { + "epoch": 0.7043222075036012, + "grad_norm": 0.211027592420578, + "learning_rate": 4.295685031162457e-06, + "loss": 0.8876, + "step": 97300 + }, + { + "epoch": 0.7043945941641874, + "grad_norm": 0.15049049258232117, + "learning_rate": 4.295612644501872e-06, + "loss": 0.8982, + "step": 97310 + }, + { + "epoch": 0.7044669808247737, + "grad_norm": 0.1562298983335495, + "learning_rate": 4.2955402578412854e-06, + "loss": 0.9015, + "step": 97320 + }, + { + "epoch": 0.7045393674853598, + "grad_norm": 0.1571105718612671, + "learning_rate": 4.295467871180699e-06, + "loss": 0.8958, + "step": 97330 + }, + { + "epoch": 0.704611754145946, + "grad_norm": 0.1848883330821991, + "learning_rate": 4.295395484520113e-06, + "loss": 0.894, + "step": 97340 + }, + { + "epoch": 0.7046841408065322, + "grad_norm": 0.15157540142536163, + "learning_rate": 4.295323097859527e-06, + "loss": 0.8881, + "step": 97350 + }, + { + "epoch": 0.7047565274671184, + "grad_norm": 0.16974295675754547, + "learning_rate": 4.295250711198941e-06, + "loss": 0.8998, + "step": 97360 + }, + { + "epoch": 0.7048289141277045, + "grad_norm": 0.15676848590373993, + "learning_rate": 4.295178324538354e-06, + "loss": 0.8847, + "step": 97370 + }, + { + "epoch": 0.7049013007882907, + "grad_norm": 0.15643902122974396, + "learning_rate": 4.295105937877768e-06, + "loss": 0.9044, + "step": 97380 + }, + { + "epoch": 0.7049736874488769, + "grad_norm": 0.15624825656414032, + "learning_rate": 4.2950335512171824e-06, + "loss": 0.8963, + "step": 97390 + }, + { + "epoch": 0.7050460741094631, + "grad_norm": 0.16203194856643677, + "learning_rate": 4.294961164556596e-06, + "loss": 0.8655, + "step": 97400 + }, + { + "epoch": 0.7051184607700492, + "grad_norm": 0.16650012135505676, + "learning_rate": 4.29488877789601e-06, + "loss": 0.9065, + "step": 97410 + }, + { + "epoch": 0.7051908474306355, + "grad_norm": 0.16026648879051208, + "learning_rate": 4.294816391235423e-06, + "loss": 0.9028, + "step": 97420 + }, + { + "epoch": 0.7052632340912217, + "grad_norm": 0.15564897656440735, + "learning_rate": 4.294744004574837e-06, + "loss": 0.8955, + "step": 97430 + }, + { + "epoch": 0.7053356207518079, + "grad_norm": 0.15881158411502838, + "learning_rate": 4.294671617914251e-06, + "loss": 0.9105, + "step": 97440 + }, + { + "epoch": 0.7054080074123941, + "grad_norm": 0.14864668250083923, + "learning_rate": 4.294599231253665e-06, + "loss": 0.8795, + "step": 97450 + }, + { + "epoch": 0.7054803940729802, + "grad_norm": 0.16540266573429108, + "learning_rate": 4.294526844593079e-06, + "loss": 0.8881, + "step": 97460 + }, + { + "epoch": 0.7055527807335664, + "grad_norm": 0.1469249576330185, + "learning_rate": 4.294454457932492e-06, + "loss": 0.8988, + "step": 97470 + }, + { + "epoch": 0.7056251673941526, + "grad_norm": 0.1481475979089737, + "learning_rate": 4.294382071271907e-06, + "loss": 0.9061, + "step": 97480 + }, + { + "epoch": 0.7056975540547388, + "grad_norm": 0.14965546131134033, + "learning_rate": 4.29430968461132e-06, + "loss": 0.9011, + "step": 97490 + }, + { + "epoch": 0.705769940715325, + "grad_norm": 0.1463448852300644, + "learning_rate": 4.294237297950734e-06, + "loss": 0.8892, + "step": 97500 + }, + { + "epoch": 0.7058423273759111, + "grad_norm": 0.157759428024292, + "learning_rate": 4.2941649112901475e-06, + "loss": 0.8854, + "step": 97510 + }, + { + "epoch": 0.7059147140364973, + "grad_norm": 0.15894946455955505, + "learning_rate": 4.294092524629562e-06, + "loss": 0.8911, + "step": 97520 + }, + { + "epoch": 0.7059871006970836, + "grad_norm": 0.16521485149860382, + "learning_rate": 4.294020137968976e-06, + "loss": 0.8939, + "step": 97530 + }, + { + "epoch": 0.7060594873576698, + "grad_norm": 0.16126827895641327, + "learning_rate": 4.293947751308389e-06, + "loss": 0.8838, + "step": 97540 + }, + { + "epoch": 0.706131874018256, + "grad_norm": 0.18010060489177704, + "learning_rate": 4.293875364647803e-06, + "loss": 0.9076, + "step": 97550 + }, + { + "epoch": 0.7062042606788421, + "grad_norm": 0.15842019021511078, + "learning_rate": 4.293802977987217e-06, + "loss": 0.8983, + "step": 97560 + }, + { + "epoch": 0.7062766473394283, + "grad_norm": 0.14757438004016876, + "learning_rate": 4.293730591326631e-06, + "loss": 0.893, + "step": 97570 + }, + { + "epoch": 0.7063490340000145, + "grad_norm": 0.15686437487602234, + "learning_rate": 4.2936582046660445e-06, + "loss": 0.8923, + "step": 97580 + }, + { + "epoch": 0.7064214206606007, + "grad_norm": 0.15694831311702728, + "learning_rate": 4.293585818005458e-06, + "loss": 0.9027, + "step": 97590 + }, + { + "epoch": 0.7064938073211868, + "grad_norm": 0.15480537712574005, + "learning_rate": 4.293513431344873e-06, + "loss": 0.8853, + "step": 97600 + }, + { + "epoch": 0.706566193981773, + "grad_norm": 0.17100360989570618, + "learning_rate": 4.293441044684286e-06, + "loss": 0.8935, + "step": 97610 + }, + { + "epoch": 0.7066385806423592, + "grad_norm": 0.15572969615459442, + "learning_rate": 4.2933686580237e-06, + "loss": 0.8777, + "step": 97620 + }, + { + "epoch": 0.7067109673029454, + "grad_norm": 0.16251128911972046, + "learning_rate": 4.2932962713631135e-06, + "loss": 0.8977, + "step": 97630 + }, + { + "epoch": 0.7067833539635316, + "grad_norm": 0.15367302298545837, + "learning_rate": 4.293223884702528e-06, + "loss": 0.8939, + "step": 97640 + }, + { + "epoch": 0.7068557406241178, + "grad_norm": 0.1467921882867813, + "learning_rate": 4.2931514980419415e-06, + "loss": 0.8904, + "step": 97650 + }, + { + "epoch": 0.706928127284704, + "grad_norm": 0.14249904453754425, + "learning_rate": 4.293079111381355e-06, + "loss": 0.8977, + "step": 97660 + }, + { + "epoch": 0.7070005139452902, + "grad_norm": 0.15507516264915466, + "learning_rate": 4.293006724720769e-06, + "loss": 0.9006, + "step": 97670 + }, + { + "epoch": 0.7070729006058764, + "grad_norm": 0.147030770778656, + "learning_rate": 4.292934338060183e-06, + "loss": 0.8973, + "step": 97680 + }, + { + "epoch": 0.7071452872664625, + "grad_norm": 0.14783620834350586, + "learning_rate": 4.292861951399596e-06, + "loss": 0.8842, + "step": 97690 + }, + { + "epoch": 0.7072176739270487, + "grad_norm": 0.16624851524829865, + "learning_rate": 4.29278956473901e-06, + "loss": 0.8908, + "step": 97700 + }, + { + "epoch": 0.7072900605876349, + "grad_norm": 0.13825160264968872, + "learning_rate": 4.292717178078424e-06, + "loss": 0.8955, + "step": 97710 + }, + { + "epoch": 0.7073624472482211, + "grad_norm": 0.1511533409357071, + "learning_rate": 4.292644791417838e-06, + "loss": 0.8846, + "step": 97720 + }, + { + "epoch": 0.7074348339088072, + "grad_norm": 0.1551799476146698, + "learning_rate": 4.292572404757251e-06, + "loss": 0.8946, + "step": 97730 + }, + { + "epoch": 0.7075072205693935, + "grad_norm": 0.1462963968515396, + "learning_rate": 4.292500018096665e-06, + "loss": 0.8802, + "step": 97740 + }, + { + "epoch": 0.7075796072299797, + "grad_norm": 0.15262961387634277, + "learning_rate": 4.292427631436079e-06, + "loss": 0.9035, + "step": 97750 + }, + { + "epoch": 0.7076519938905659, + "grad_norm": 0.1675291806459427, + "learning_rate": 4.292355244775493e-06, + "loss": 0.8842, + "step": 97760 + }, + { + "epoch": 0.707724380551152, + "grad_norm": 0.15412959456443787, + "learning_rate": 4.292282858114907e-06, + "loss": 0.8899, + "step": 97770 + }, + { + "epoch": 0.7077967672117382, + "grad_norm": 0.16402597725391388, + "learning_rate": 4.29221047145432e-06, + "loss": 0.8956, + "step": 97780 + }, + { + "epoch": 0.7078691538723244, + "grad_norm": 0.15542785823345184, + "learning_rate": 4.292138084793735e-06, + "loss": 0.8926, + "step": 97790 + }, + { + "epoch": 0.7079415405329106, + "grad_norm": 0.16312408447265625, + "learning_rate": 4.292065698133148e-06, + "loss": 0.9012, + "step": 97800 + }, + { + "epoch": 0.7080139271934968, + "grad_norm": 0.16359078884124756, + "learning_rate": 4.291993311472562e-06, + "loss": 0.889, + "step": 97810 + }, + { + "epoch": 0.7080863138540829, + "grad_norm": 0.1581135094165802, + "learning_rate": 4.2919209248119756e-06, + "loss": 0.8854, + "step": 97820 + }, + { + "epoch": 0.7081587005146691, + "grad_norm": 0.16701552271842957, + "learning_rate": 4.29184853815139e-06, + "loss": 0.8877, + "step": 97830 + }, + { + "epoch": 0.7082310871752553, + "grad_norm": 0.18646441400051117, + "learning_rate": 4.291776151490804e-06, + "loss": 0.8825, + "step": 97840 + }, + { + "epoch": 0.7083034738358416, + "grad_norm": 0.15613111853599548, + "learning_rate": 4.291703764830217e-06, + "loss": 0.8889, + "step": 97850 + }, + { + "epoch": 0.7083758604964278, + "grad_norm": 0.15725113451480865, + "learning_rate": 4.291631378169631e-06, + "loss": 0.892, + "step": 97860 + }, + { + "epoch": 0.7084482471570139, + "grad_norm": 0.1500454545021057, + "learning_rate": 4.291558991509045e-06, + "loss": 0.8845, + "step": 97870 + }, + { + "epoch": 0.7085206338176001, + "grad_norm": 0.14962556958198547, + "learning_rate": 4.291486604848459e-06, + "loss": 0.9005, + "step": 97880 + }, + { + "epoch": 0.7085930204781863, + "grad_norm": 0.15193293988704681, + "learning_rate": 4.2914142181878726e-06, + "loss": 0.898, + "step": 97890 + }, + { + "epoch": 0.7086654071387725, + "grad_norm": 0.19393183290958405, + "learning_rate": 4.291341831527286e-06, + "loss": 0.8808, + "step": 97900 + }, + { + "epoch": 0.7087377937993586, + "grad_norm": 0.16170886158943176, + "learning_rate": 4.291269444866701e-06, + "loss": 0.8892, + "step": 97910 + }, + { + "epoch": 0.7088101804599448, + "grad_norm": 0.16696853935718536, + "learning_rate": 4.291197058206114e-06, + "loss": 0.8952, + "step": 97920 + }, + { + "epoch": 0.708882567120531, + "grad_norm": 0.14689412713050842, + "learning_rate": 4.291124671545528e-06, + "loss": 0.8976, + "step": 97930 + }, + { + "epoch": 0.7089549537811172, + "grad_norm": 0.18432478606700897, + "learning_rate": 4.2910522848849415e-06, + "loss": 0.888, + "step": 97940 + }, + { + "epoch": 0.7090273404417035, + "grad_norm": 0.14978989958763123, + "learning_rate": 4.290979898224356e-06, + "loss": 0.8878, + "step": 97950 + }, + { + "epoch": 0.7090997271022896, + "grad_norm": 0.14927172660827637, + "learning_rate": 4.2909075115637696e-06, + "loss": 0.9018, + "step": 97960 + }, + { + "epoch": 0.7091721137628758, + "grad_norm": 0.1598445326089859, + "learning_rate": 4.290835124903183e-06, + "loss": 0.8776, + "step": 97970 + }, + { + "epoch": 0.709244500423462, + "grad_norm": 0.15578649938106537, + "learning_rate": 4.290762738242597e-06, + "loss": 0.8925, + "step": 97980 + }, + { + "epoch": 0.7093168870840482, + "grad_norm": 0.15951481461524963, + "learning_rate": 4.290690351582011e-06, + "loss": 0.8979, + "step": 97990 + }, + { + "epoch": 0.7093892737446343, + "grad_norm": 0.15647555887699127, + "learning_rate": 4.290617964921425e-06, + "loss": 0.9094, + "step": 98000 + }, + { + "epoch": 0.7094616604052205, + "grad_norm": 0.15051069855690002, + "learning_rate": 4.2905455782608385e-06, + "loss": 0.8905, + "step": 98010 + }, + { + "epoch": 0.7095340470658067, + "grad_norm": 0.17179308831691742, + "learning_rate": 4.290473191600252e-06, + "loss": 0.883, + "step": 98020 + }, + { + "epoch": 0.7096064337263929, + "grad_norm": 0.16709086298942566, + "learning_rate": 4.290400804939666e-06, + "loss": 0.9033, + "step": 98030 + }, + { + "epoch": 0.709678820386979, + "grad_norm": 0.17320266366004944, + "learning_rate": 4.29032841827908e-06, + "loss": 0.9088, + "step": 98040 + }, + { + "epoch": 0.7097512070475652, + "grad_norm": 0.16562733054161072, + "learning_rate": 4.290256031618494e-06, + "loss": 0.8882, + "step": 98050 + }, + { + "epoch": 0.7098235937081515, + "grad_norm": 0.15800151228904724, + "learning_rate": 4.2901836449579074e-06, + "loss": 0.8935, + "step": 98060 + }, + { + "epoch": 0.7098959803687377, + "grad_norm": 0.15598443150520325, + "learning_rate": 4.290111258297321e-06, + "loss": 0.8878, + "step": 98070 + }, + { + "epoch": 0.7099683670293239, + "grad_norm": 0.14312607049942017, + "learning_rate": 4.2900388716367355e-06, + "loss": 0.8901, + "step": 98080 + }, + { + "epoch": 0.71004075368991, + "grad_norm": 0.16290390491485596, + "learning_rate": 4.289966484976149e-06, + "loss": 0.8821, + "step": 98090 + }, + { + "epoch": 0.7101131403504962, + "grad_norm": 0.1655735820531845, + "learning_rate": 4.289894098315563e-06, + "loss": 0.8994, + "step": 98100 + }, + { + "epoch": 0.7101855270110824, + "grad_norm": 0.1505131721496582, + "learning_rate": 4.289821711654976e-06, + "loss": 0.8779, + "step": 98110 + }, + { + "epoch": 0.7102579136716686, + "grad_norm": 0.1438320130109787, + "learning_rate": 4.289749324994391e-06, + "loss": 0.8743, + "step": 98120 + }, + { + "epoch": 0.7103303003322547, + "grad_norm": 0.18201330304145813, + "learning_rate": 4.2896769383338044e-06, + "loss": 0.8858, + "step": 98130 + }, + { + "epoch": 0.7104026869928409, + "grad_norm": 0.18367905914783478, + "learning_rate": 4.289604551673218e-06, + "loss": 0.9101, + "step": 98140 + }, + { + "epoch": 0.7104750736534271, + "grad_norm": 0.15578269958496094, + "learning_rate": 4.289532165012632e-06, + "loss": 0.893, + "step": 98150 + }, + { + "epoch": 0.7105474603140133, + "grad_norm": 0.15970683097839355, + "learning_rate": 4.289459778352046e-06, + "loss": 0.8996, + "step": 98160 + }, + { + "epoch": 0.7106198469745996, + "grad_norm": 0.16508597135543823, + "learning_rate": 4.28938739169146e-06, + "loss": 0.8862, + "step": 98170 + }, + { + "epoch": 0.7106922336351857, + "grad_norm": 0.174842968583107, + "learning_rate": 4.289315005030873e-06, + "loss": 0.9011, + "step": 98180 + }, + { + "epoch": 0.7107646202957719, + "grad_norm": 0.17075632512569427, + "learning_rate": 4.289242618370287e-06, + "loss": 0.8765, + "step": 98190 + }, + { + "epoch": 0.7108370069563581, + "grad_norm": 0.15258832275867462, + "learning_rate": 4.2891702317097014e-06, + "loss": 0.895, + "step": 98200 + }, + { + "epoch": 0.7109093936169443, + "grad_norm": 0.14689311385154724, + "learning_rate": 4.289097845049115e-06, + "loss": 0.8874, + "step": 98210 + }, + { + "epoch": 0.7109817802775305, + "grad_norm": 0.16319765150547028, + "learning_rate": 4.289025458388528e-06, + "loss": 0.8929, + "step": 98220 + }, + { + "epoch": 0.7110541669381166, + "grad_norm": 0.16500282287597656, + "learning_rate": 4.288953071727942e-06, + "loss": 0.887, + "step": 98230 + }, + { + "epoch": 0.7111265535987028, + "grad_norm": 0.15623874962329865, + "learning_rate": 4.288880685067356e-06, + "loss": 0.8959, + "step": 98240 + }, + { + "epoch": 0.711198940259289, + "grad_norm": 0.15647245943546295, + "learning_rate": 4.2888082984067695e-06, + "loss": 0.8864, + "step": 98250 + }, + { + "epoch": 0.7112713269198752, + "grad_norm": 0.1665528565645218, + "learning_rate": 4.288735911746183e-06, + "loss": 0.8903, + "step": 98260 + }, + { + "epoch": 0.7113437135804614, + "grad_norm": 0.14643557369709015, + "learning_rate": 4.288663525085598e-06, + "loss": 0.8978, + "step": 98270 + }, + { + "epoch": 0.7114161002410476, + "grad_norm": 0.154141366481781, + "learning_rate": 4.288591138425011e-06, + "loss": 0.8978, + "step": 98280 + }, + { + "epoch": 0.7114884869016338, + "grad_norm": 0.14892645180225372, + "learning_rate": 4.288518751764425e-06, + "loss": 0.8925, + "step": 98290 + }, + { + "epoch": 0.71156087356222, + "grad_norm": 0.17983193695545197, + "learning_rate": 4.2884463651038384e-06, + "loss": 0.8798, + "step": 98300 + }, + { + "epoch": 0.7116332602228062, + "grad_norm": 0.15208591520786285, + "learning_rate": 4.288373978443253e-06, + "loss": 0.8917, + "step": 98310 + }, + { + "epoch": 0.7117056468833923, + "grad_norm": 0.15199977159500122, + "learning_rate": 4.2883015917826665e-06, + "loss": 0.8878, + "step": 98320 + }, + { + "epoch": 0.7117780335439785, + "grad_norm": 0.16912555694580078, + "learning_rate": 4.28822920512208e-06, + "loss": 0.8876, + "step": 98330 + }, + { + "epoch": 0.7118504202045647, + "grad_norm": 0.18677960336208344, + "learning_rate": 4.288156818461494e-06, + "loss": 0.8971, + "step": 98340 + }, + { + "epoch": 0.7119228068651509, + "grad_norm": 0.16484694182872772, + "learning_rate": 4.288084431800908e-06, + "loss": 0.8867, + "step": 98350 + }, + { + "epoch": 0.711995193525737, + "grad_norm": 0.18507961928844452, + "learning_rate": 4.288012045140322e-06, + "loss": 0.9005, + "step": 98360 + }, + { + "epoch": 0.7120675801863232, + "grad_norm": 0.16130656003952026, + "learning_rate": 4.2879396584797355e-06, + "loss": 0.9067, + "step": 98370 + }, + { + "epoch": 0.7121399668469095, + "grad_norm": 0.16613750159740448, + "learning_rate": 4.287867271819149e-06, + "loss": 0.8972, + "step": 98380 + }, + { + "epoch": 0.7122123535074957, + "grad_norm": 0.1898043304681778, + "learning_rate": 4.2877948851585635e-06, + "loss": 0.8951, + "step": 98390 + }, + { + "epoch": 0.7122847401680819, + "grad_norm": 0.15496717393398285, + "learning_rate": 4.287722498497977e-06, + "loss": 0.8843, + "step": 98400 + }, + { + "epoch": 0.712357126828668, + "grad_norm": 0.15950420498847961, + "learning_rate": 4.287650111837391e-06, + "loss": 0.8907, + "step": 98410 + }, + { + "epoch": 0.7124295134892542, + "grad_norm": 0.16205261647701263, + "learning_rate": 4.287577725176804e-06, + "loss": 0.8871, + "step": 98420 + }, + { + "epoch": 0.7125019001498404, + "grad_norm": 0.16304804384708405, + "learning_rate": 4.287505338516219e-06, + "loss": 0.8884, + "step": 98430 + }, + { + "epoch": 0.7125742868104266, + "grad_norm": 0.15807406604290009, + "learning_rate": 4.2874329518556325e-06, + "loss": 0.8893, + "step": 98440 + }, + { + "epoch": 0.7126466734710127, + "grad_norm": 0.16383838653564453, + "learning_rate": 4.287360565195046e-06, + "loss": 0.9048, + "step": 98450 + }, + { + "epoch": 0.7127190601315989, + "grad_norm": 0.17542411386966705, + "learning_rate": 4.28728817853446e-06, + "loss": 0.8776, + "step": 98460 + }, + { + "epoch": 0.7127914467921851, + "grad_norm": 0.18369115889072418, + "learning_rate": 4.287215791873874e-06, + "loss": 0.888, + "step": 98470 + }, + { + "epoch": 0.7128638334527714, + "grad_norm": 0.21220123767852783, + "learning_rate": 4.287143405213288e-06, + "loss": 0.8996, + "step": 98480 + }, + { + "epoch": 0.7129362201133576, + "grad_norm": 0.17068584263324738, + "learning_rate": 4.287071018552701e-06, + "loss": 0.8882, + "step": 98490 + }, + { + "epoch": 0.7130086067739437, + "grad_norm": 0.15225493907928467, + "learning_rate": 4.286998631892115e-06, + "loss": 0.8924, + "step": 98500 + }, + { + "epoch": 0.7130809934345299, + "grad_norm": 0.14265893399715424, + "learning_rate": 4.2869262452315295e-06, + "loss": 0.8832, + "step": 98510 + }, + { + "epoch": 0.7131533800951161, + "grad_norm": 0.18743206560611725, + "learning_rate": 4.286853858570943e-06, + "loss": 0.9077, + "step": 98520 + }, + { + "epoch": 0.7132257667557023, + "grad_norm": 0.15794509649276733, + "learning_rate": 4.286781471910357e-06, + "loss": 0.8924, + "step": 98530 + }, + { + "epoch": 0.7132981534162884, + "grad_norm": 0.16649536788463593, + "learning_rate": 4.28670908524977e-06, + "loss": 0.8789, + "step": 98540 + }, + { + "epoch": 0.7133705400768746, + "grad_norm": 0.1717004030942917, + "learning_rate": 4.286636698589185e-06, + "loss": 0.8966, + "step": 98550 + }, + { + "epoch": 0.7134429267374608, + "grad_norm": 0.1472935825586319, + "learning_rate": 4.286564311928598e-06, + "loss": 0.8928, + "step": 98560 + }, + { + "epoch": 0.713515313398047, + "grad_norm": 0.1435527801513672, + "learning_rate": 4.286491925268012e-06, + "loss": 0.872, + "step": 98570 + }, + { + "epoch": 0.7135877000586331, + "grad_norm": 0.17352406680583954, + "learning_rate": 4.286419538607426e-06, + "loss": 0.883, + "step": 98580 + }, + { + "epoch": 0.7136600867192194, + "grad_norm": 0.14839379489421844, + "learning_rate": 4.28634715194684e-06, + "loss": 0.8919, + "step": 98590 + }, + { + "epoch": 0.7137324733798056, + "grad_norm": 0.1555548906326294, + "learning_rate": 4.286274765286254e-06, + "loss": 0.8863, + "step": 98600 + }, + { + "epoch": 0.7138048600403918, + "grad_norm": 0.1623990833759308, + "learning_rate": 4.286202378625667e-06, + "loss": 0.8832, + "step": 98610 + }, + { + "epoch": 0.713877246700978, + "grad_norm": 0.19231371581554413, + "learning_rate": 4.286129991965081e-06, + "loss": 0.8975, + "step": 98620 + }, + { + "epoch": 0.7139496333615641, + "grad_norm": 0.15244658291339874, + "learning_rate": 4.286057605304495e-06, + "loss": 0.9031, + "step": 98630 + }, + { + "epoch": 0.7140220200221503, + "grad_norm": 0.15277956426143646, + "learning_rate": 4.285985218643909e-06, + "loss": 0.888, + "step": 98640 + }, + { + "epoch": 0.7140944066827365, + "grad_norm": 0.15656965970993042, + "learning_rate": 4.285912831983323e-06, + "loss": 0.8905, + "step": 98650 + }, + { + "epoch": 0.7141667933433227, + "grad_norm": 0.18114785850048065, + "learning_rate": 4.285840445322736e-06, + "loss": 0.8754, + "step": 98660 + }, + { + "epoch": 0.7142391800039088, + "grad_norm": 0.23680664598941803, + "learning_rate": 4.28576805866215e-06, + "loss": 0.894, + "step": 98670 + }, + { + "epoch": 0.714311566664495, + "grad_norm": 0.14668525755405426, + "learning_rate": 4.285695672001564e-06, + "loss": 0.8903, + "step": 98680 + }, + { + "epoch": 0.7143839533250812, + "grad_norm": 0.1626420021057129, + "learning_rate": 4.285623285340978e-06, + "loss": 0.8976, + "step": 98690 + }, + { + "epoch": 0.7144563399856675, + "grad_norm": 0.154447540640831, + "learning_rate": 4.2855508986803916e-06, + "loss": 0.8961, + "step": 98700 + }, + { + "epoch": 0.7145287266462537, + "grad_norm": 0.15234994888305664, + "learning_rate": 4.285478512019805e-06, + "loss": 0.8995, + "step": 98710 + }, + { + "epoch": 0.7146011133068398, + "grad_norm": 0.15986403822898865, + "learning_rate": 4.28540612535922e-06, + "loss": 0.8968, + "step": 98720 + }, + { + "epoch": 0.714673499967426, + "grad_norm": 0.16405628621578217, + "learning_rate": 4.285333738698633e-06, + "loss": 0.8877, + "step": 98730 + }, + { + "epoch": 0.7147458866280122, + "grad_norm": 0.14801183342933655, + "learning_rate": 4.285261352038047e-06, + "loss": 0.8865, + "step": 98740 + }, + { + "epoch": 0.7148182732885984, + "grad_norm": 0.1582917869091034, + "learning_rate": 4.2851889653774605e-06, + "loss": 0.8968, + "step": 98750 + }, + { + "epoch": 0.7148906599491845, + "grad_norm": 0.14904256165027618, + "learning_rate": 4.285116578716874e-06, + "loss": 0.8825, + "step": 98760 + }, + { + "epoch": 0.7149630466097707, + "grad_norm": 0.15857091546058655, + "learning_rate": 4.285044192056288e-06, + "loss": 0.8883, + "step": 98770 + }, + { + "epoch": 0.7150354332703569, + "grad_norm": 0.1512639969587326, + "learning_rate": 4.284971805395702e-06, + "loss": 0.9077, + "step": 98780 + }, + { + "epoch": 0.7151078199309431, + "grad_norm": 0.1620493084192276, + "learning_rate": 4.284899418735116e-06, + "loss": 0.8921, + "step": 98790 + }, + { + "epoch": 0.7151802065915294, + "grad_norm": 0.15523122251033783, + "learning_rate": 4.2848270320745294e-06, + "loss": 0.8975, + "step": 98800 + }, + { + "epoch": 0.7152525932521155, + "grad_norm": 0.15379586815834045, + "learning_rate": 4.284754645413943e-06, + "loss": 0.9056, + "step": 98810 + }, + { + "epoch": 0.7153249799127017, + "grad_norm": 0.15808962285518646, + "learning_rate": 4.2846822587533575e-06, + "loss": 0.8923, + "step": 98820 + }, + { + "epoch": 0.7153973665732879, + "grad_norm": 0.21590928733348846, + "learning_rate": 4.284609872092771e-06, + "loss": 0.8851, + "step": 98830 + }, + { + "epoch": 0.7154697532338741, + "grad_norm": 0.16815008223056793, + "learning_rate": 4.284537485432185e-06, + "loss": 0.8893, + "step": 98840 + }, + { + "epoch": 0.7155421398944602, + "grad_norm": 0.15785428881645203, + "learning_rate": 4.284465098771598e-06, + "loss": 0.8874, + "step": 98850 + }, + { + "epoch": 0.7156145265550464, + "grad_norm": 0.1511503905057907, + "learning_rate": 4.284392712111012e-06, + "loss": 0.8789, + "step": 98860 + }, + { + "epoch": 0.7156869132156326, + "grad_norm": 0.15221667289733887, + "learning_rate": 4.2843203254504264e-06, + "loss": 0.8909, + "step": 98870 + }, + { + "epoch": 0.7157592998762188, + "grad_norm": 0.15201707184314728, + "learning_rate": 4.28424793878984e-06, + "loss": 0.8908, + "step": 98880 + }, + { + "epoch": 0.715831686536805, + "grad_norm": 0.1675233393907547, + "learning_rate": 4.284175552129254e-06, + "loss": 0.9029, + "step": 98890 + }, + { + "epoch": 0.7159040731973911, + "grad_norm": 0.1582505851984024, + "learning_rate": 4.284103165468667e-06, + "loss": 0.894, + "step": 98900 + }, + { + "epoch": 0.7159764598579774, + "grad_norm": 0.14627264440059662, + "learning_rate": 4.284030778808082e-06, + "loss": 0.8948, + "step": 98910 + }, + { + "epoch": 0.7160488465185636, + "grad_norm": 0.15614593029022217, + "learning_rate": 4.283958392147495e-06, + "loss": 0.8895, + "step": 98920 + }, + { + "epoch": 0.7161212331791498, + "grad_norm": 0.17109185457229614, + "learning_rate": 4.283886005486909e-06, + "loss": 0.9064, + "step": 98930 + }, + { + "epoch": 0.716193619839736, + "grad_norm": 0.17292575538158417, + "learning_rate": 4.283813618826323e-06, + "loss": 0.8936, + "step": 98940 + }, + { + "epoch": 0.7162660065003221, + "grad_norm": 0.15228956937789917, + "learning_rate": 4.283741232165737e-06, + "loss": 0.8948, + "step": 98950 + }, + { + "epoch": 0.7163383931609083, + "grad_norm": 0.15983666479587555, + "learning_rate": 4.283668845505151e-06, + "loss": 0.892, + "step": 98960 + }, + { + "epoch": 0.7164107798214945, + "grad_norm": 0.15323320031166077, + "learning_rate": 4.283596458844564e-06, + "loss": 0.885, + "step": 98970 + }, + { + "epoch": 0.7164831664820807, + "grad_norm": 0.1651761531829834, + "learning_rate": 4.283524072183978e-06, + "loss": 0.8907, + "step": 98980 + }, + { + "epoch": 0.7165555531426668, + "grad_norm": 0.1607891172170639, + "learning_rate": 4.283451685523392e-06, + "loss": 0.8986, + "step": 98990 + }, + { + "epoch": 0.716627939803253, + "grad_norm": 0.15800441801548004, + "learning_rate": 4.283379298862806e-06, + "loss": 0.8871, + "step": 99000 + }, + { + "epoch": 0.7167003264638393, + "grad_norm": 0.15280944108963013, + "learning_rate": 4.28330691220222e-06, + "loss": 0.892, + "step": 99010 + }, + { + "epoch": 0.7167727131244255, + "grad_norm": 0.15718095004558563, + "learning_rate": 4.283234525541633e-06, + "loss": 0.8885, + "step": 99020 + }, + { + "epoch": 0.7168450997850117, + "grad_norm": 0.15296663343906403, + "learning_rate": 4.283162138881048e-06, + "loss": 0.8945, + "step": 99030 + }, + { + "epoch": 0.7169174864455978, + "grad_norm": 0.1575295478105545, + "learning_rate": 4.283089752220461e-06, + "loss": 0.895, + "step": 99040 + }, + { + "epoch": 0.716989873106184, + "grad_norm": 0.16054439544677734, + "learning_rate": 4.283017365559875e-06, + "loss": 0.8788, + "step": 99050 + }, + { + "epoch": 0.7170622597667702, + "grad_norm": 0.15393316745758057, + "learning_rate": 4.2829449788992885e-06, + "loss": 0.881, + "step": 99060 + }, + { + "epoch": 0.7171346464273564, + "grad_norm": 0.16581709682941437, + "learning_rate": 4.282872592238703e-06, + "loss": 0.8949, + "step": 99070 + }, + { + "epoch": 0.7172070330879425, + "grad_norm": 0.16176964342594147, + "learning_rate": 4.282800205578117e-06, + "loss": 0.8834, + "step": 99080 + }, + { + "epoch": 0.7172794197485287, + "grad_norm": 0.15313786268234253, + "learning_rate": 4.28272781891753e-06, + "loss": 0.8849, + "step": 99090 + }, + { + "epoch": 0.7173518064091149, + "grad_norm": 0.1565057635307312, + "learning_rate": 4.282655432256944e-06, + "loss": 0.8898, + "step": 99100 + }, + { + "epoch": 0.7174241930697011, + "grad_norm": 0.17131567001342773, + "learning_rate": 4.282583045596358e-06, + "loss": 0.8981, + "step": 99110 + }, + { + "epoch": 0.7174965797302874, + "grad_norm": 0.16490551829338074, + "learning_rate": 4.282510658935772e-06, + "loss": 0.8875, + "step": 99120 + }, + { + "epoch": 0.7175689663908735, + "grad_norm": 0.14752908051013947, + "learning_rate": 4.2824382722751855e-06, + "loss": 0.8903, + "step": 99130 + }, + { + "epoch": 0.7176413530514597, + "grad_norm": 0.1532769799232483, + "learning_rate": 4.282365885614599e-06, + "loss": 0.8946, + "step": 99140 + }, + { + "epoch": 0.7177137397120459, + "grad_norm": 0.16182607412338257, + "learning_rate": 4.282293498954014e-06, + "loss": 0.8948, + "step": 99150 + }, + { + "epoch": 0.7177861263726321, + "grad_norm": 0.1951036900281906, + "learning_rate": 4.282221112293427e-06, + "loss": 0.8873, + "step": 99160 + }, + { + "epoch": 0.7178585130332182, + "grad_norm": 0.16654542088508606, + "learning_rate": 4.282148725632841e-06, + "loss": 0.8903, + "step": 99170 + }, + { + "epoch": 0.7179308996938044, + "grad_norm": 0.17811834812164307, + "learning_rate": 4.2820763389722545e-06, + "loss": 0.8951, + "step": 99180 + }, + { + "epoch": 0.7180032863543906, + "grad_norm": 0.14857879281044006, + "learning_rate": 4.282003952311669e-06, + "loss": 0.8927, + "step": 99190 + }, + { + "epoch": 0.7180756730149768, + "grad_norm": 0.16096261143684387, + "learning_rate": 4.2819315656510825e-06, + "loss": 0.8919, + "step": 99200 + }, + { + "epoch": 0.7181480596755629, + "grad_norm": 0.15119074285030365, + "learning_rate": 4.281859178990496e-06, + "loss": 0.8969, + "step": 99210 + }, + { + "epoch": 0.7182204463361491, + "grad_norm": 0.15937365591526031, + "learning_rate": 4.28178679232991e-06, + "loss": 0.8907, + "step": 99220 + }, + { + "epoch": 0.7182928329967354, + "grad_norm": 0.15309041738510132, + "learning_rate": 4.281714405669324e-06, + "loss": 0.8832, + "step": 99230 + }, + { + "epoch": 0.7183652196573216, + "grad_norm": 0.20644572377204895, + "learning_rate": 4.281642019008738e-06, + "loss": 0.8935, + "step": 99240 + }, + { + "epoch": 0.7184376063179078, + "grad_norm": 0.17840343713760376, + "learning_rate": 4.2815696323481515e-06, + "loss": 0.8981, + "step": 99250 + }, + { + "epoch": 0.7185099929784939, + "grad_norm": 0.16128534078598022, + "learning_rate": 4.281497245687565e-06, + "loss": 0.8925, + "step": 99260 + }, + { + "epoch": 0.7185823796390801, + "grad_norm": 0.16106021404266357, + "learning_rate": 4.2814248590269796e-06, + "loss": 0.8902, + "step": 99270 + }, + { + "epoch": 0.7186547662996663, + "grad_norm": 0.15783260762691498, + "learning_rate": 4.281352472366392e-06, + "loss": 0.878, + "step": 99280 + }, + { + "epoch": 0.7187271529602525, + "grad_norm": 0.17158517241477966, + "learning_rate": 4.281280085705806e-06, + "loss": 0.895, + "step": 99290 + }, + { + "epoch": 0.7187995396208386, + "grad_norm": 0.16213983297348022, + "learning_rate": 4.28120769904522e-06, + "loss": 0.8746, + "step": 99300 + }, + { + "epoch": 0.7188719262814248, + "grad_norm": 0.17305901646614075, + "learning_rate": 4.281135312384634e-06, + "loss": 0.8907, + "step": 99310 + }, + { + "epoch": 0.718944312942011, + "grad_norm": 0.18499404191970825, + "learning_rate": 4.281062925724048e-06, + "loss": 0.884, + "step": 99320 + }, + { + "epoch": 0.7190166996025973, + "grad_norm": 0.15206129848957062, + "learning_rate": 4.280990539063461e-06, + "loss": 0.8882, + "step": 99330 + }, + { + "epoch": 0.7190890862631835, + "grad_norm": 0.15170209109783173, + "learning_rate": 4.280918152402876e-06, + "loss": 0.9006, + "step": 99340 + }, + { + "epoch": 0.7191614729237696, + "grad_norm": 0.14841248095035553, + "learning_rate": 4.280845765742289e-06, + "loss": 0.8907, + "step": 99350 + }, + { + "epoch": 0.7192338595843558, + "grad_norm": 0.16877888143062592, + "learning_rate": 4.280773379081703e-06, + "loss": 0.8853, + "step": 99360 + }, + { + "epoch": 0.719306246244942, + "grad_norm": 0.14914186298847198, + "learning_rate": 4.2807009924211166e-06, + "loss": 0.9001, + "step": 99370 + }, + { + "epoch": 0.7193786329055282, + "grad_norm": 0.15811428427696228, + "learning_rate": 4.280628605760531e-06, + "loss": 0.8884, + "step": 99380 + }, + { + "epoch": 0.7194510195661143, + "grad_norm": 0.15445317327976227, + "learning_rate": 4.280556219099945e-06, + "loss": 0.8969, + "step": 99390 + }, + { + "epoch": 0.7195234062267005, + "grad_norm": 0.1613607257604599, + "learning_rate": 4.280483832439358e-06, + "loss": 0.8747, + "step": 99400 + }, + { + "epoch": 0.7195957928872867, + "grad_norm": 0.1554788202047348, + "learning_rate": 4.280411445778772e-06, + "loss": 0.8949, + "step": 99410 + }, + { + "epoch": 0.7196681795478729, + "grad_norm": 0.15722106397151947, + "learning_rate": 4.280339059118186e-06, + "loss": 0.888, + "step": 99420 + }, + { + "epoch": 0.719740566208459, + "grad_norm": 0.1679772436618805, + "learning_rate": 4.2802666724576e-06, + "loss": 0.8853, + "step": 99430 + }, + { + "epoch": 0.7198129528690453, + "grad_norm": 0.676620602607727, + "learning_rate": 4.2801942857970136e-06, + "loss": 0.884, + "step": 99440 + }, + { + "epoch": 0.7198853395296315, + "grad_norm": 0.15480059385299683, + "learning_rate": 4.280121899136427e-06, + "loss": 0.8963, + "step": 99450 + }, + { + "epoch": 0.7199577261902177, + "grad_norm": 0.15194228291511536, + "learning_rate": 4.280049512475841e-06, + "loss": 0.8896, + "step": 99460 + }, + { + "epoch": 0.7200301128508039, + "grad_norm": 0.17319074273109436, + "learning_rate": 4.279977125815255e-06, + "loss": 0.8986, + "step": 99470 + }, + { + "epoch": 0.72010249951139, + "grad_norm": 0.15072764456272125, + "learning_rate": 4.279904739154669e-06, + "loss": 0.8776, + "step": 99480 + }, + { + "epoch": 0.7201748861719762, + "grad_norm": 0.16070221364498138, + "learning_rate": 4.2798323524940825e-06, + "loss": 0.8977, + "step": 99490 + }, + { + "epoch": 0.7202472728325624, + "grad_norm": 0.156993567943573, + "learning_rate": 4.279759965833496e-06, + "loss": 0.8952, + "step": 99500 + }, + { + "epoch": 0.7203196594931486, + "grad_norm": 0.1609060913324356, + "learning_rate": 4.2796875791729106e-06, + "loss": 0.896, + "step": 99510 + }, + { + "epoch": 0.7203920461537348, + "grad_norm": 0.15467387437820435, + "learning_rate": 4.279615192512324e-06, + "loss": 0.8862, + "step": 99520 + }, + { + "epoch": 0.7204644328143209, + "grad_norm": 0.2120065689086914, + "learning_rate": 4.279542805851738e-06, + "loss": 0.8828, + "step": 99530 + }, + { + "epoch": 0.7205368194749071, + "grad_norm": 0.16634128987789154, + "learning_rate": 4.279470419191151e-06, + "loss": 0.8791, + "step": 99540 + }, + { + "epoch": 0.7206092061354934, + "grad_norm": 0.18684551119804382, + "learning_rate": 4.279398032530566e-06, + "loss": 0.8796, + "step": 99550 + }, + { + "epoch": 0.7206815927960796, + "grad_norm": 0.17714783549308777, + "learning_rate": 4.2793256458699795e-06, + "loss": 0.8972, + "step": 99560 + }, + { + "epoch": 0.7207539794566657, + "grad_norm": 0.14424735307693481, + "learning_rate": 4.279253259209393e-06, + "loss": 0.8898, + "step": 99570 + }, + { + "epoch": 0.7208263661172519, + "grad_norm": 0.1793050915002823, + "learning_rate": 4.279180872548807e-06, + "loss": 0.8837, + "step": 99580 + }, + { + "epoch": 0.7208987527778381, + "grad_norm": 0.15566468238830566, + "learning_rate": 4.279108485888221e-06, + "loss": 0.8823, + "step": 99590 + }, + { + "epoch": 0.7209711394384243, + "grad_norm": 0.14767152070999146, + "learning_rate": 4.279036099227635e-06, + "loss": 0.8748, + "step": 99600 + }, + { + "epoch": 0.7210435260990105, + "grad_norm": 0.1632547825574875, + "learning_rate": 4.2789637125670484e-06, + "loss": 0.8904, + "step": 99610 + }, + { + "epoch": 0.7211159127595966, + "grad_norm": 0.15623623132705688, + "learning_rate": 4.278891325906462e-06, + "loss": 0.8852, + "step": 99620 + }, + { + "epoch": 0.7211882994201828, + "grad_norm": 0.20870515704154968, + "learning_rate": 4.2788189392458765e-06, + "loss": 0.8955, + "step": 99630 + }, + { + "epoch": 0.721260686080769, + "grad_norm": 0.15594063699245453, + "learning_rate": 4.27874655258529e-06, + "loss": 0.9089, + "step": 99640 + }, + { + "epoch": 0.7213330727413553, + "grad_norm": 0.16253912448883057, + "learning_rate": 4.278674165924704e-06, + "loss": 0.8859, + "step": 99650 + }, + { + "epoch": 0.7214054594019415, + "grad_norm": 0.3714236319065094, + "learning_rate": 4.278601779264117e-06, + "loss": 0.8875, + "step": 99660 + }, + { + "epoch": 0.7214778460625276, + "grad_norm": 0.14919225871562958, + "learning_rate": 4.278529392603532e-06, + "loss": 0.8868, + "step": 99670 + }, + { + "epoch": 0.7215502327231138, + "grad_norm": 0.16863149404525757, + "learning_rate": 4.2784570059429454e-06, + "loss": 0.8998, + "step": 99680 + }, + { + "epoch": 0.7216226193837, + "grad_norm": 0.15315097570419312, + "learning_rate": 4.278384619282359e-06, + "loss": 0.8859, + "step": 99690 + }, + { + "epoch": 0.7216950060442862, + "grad_norm": 0.14236433804035187, + "learning_rate": 4.278312232621773e-06, + "loss": 0.8921, + "step": 99700 + }, + { + "epoch": 0.7217673927048723, + "grad_norm": 0.16768266260623932, + "learning_rate": 4.278239845961187e-06, + "loss": 0.8741, + "step": 99710 + }, + { + "epoch": 0.7218397793654585, + "grad_norm": 0.1807376593351364, + "learning_rate": 4.278167459300601e-06, + "loss": 0.888, + "step": 99720 + }, + { + "epoch": 0.7219121660260447, + "grad_norm": 0.16242581605911255, + "learning_rate": 4.278095072640014e-06, + "loss": 0.8871, + "step": 99730 + }, + { + "epoch": 0.7219845526866309, + "grad_norm": 0.1657247543334961, + "learning_rate": 4.278022685979428e-06, + "loss": 0.8905, + "step": 99740 + }, + { + "epoch": 0.722056939347217, + "grad_norm": 0.1562010794878006, + "learning_rate": 4.2779502993188424e-06, + "loss": 0.865, + "step": 99750 + }, + { + "epoch": 0.7221293260078033, + "grad_norm": 0.1501951813697815, + "learning_rate": 4.277877912658256e-06, + "loss": 0.8991, + "step": 99760 + }, + { + "epoch": 0.7222017126683895, + "grad_norm": 0.15544991195201874, + "learning_rate": 4.27780552599767e-06, + "loss": 0.8812, + "step": 99770 + }, + { + "epoch": 0.7222740993289757, + "grad_norm": 0.17391003668308258, + "learning_rate": 4.277733139337083e-06, + "loss": 0.8923, + "step": 99780 + }, + { + "epoch": 0.7223464859895619, + "grad_norm": 0.16744978725910187, + "learning_rate": 4.277660752676498e-06, + "loss": 0.8973, + "step": 99790 + }, + { + "epoch": 0.722418872650148, + "grad_norm": 0.1574922800064087, + "learning_rate": 4.277588366015911e-06, + "loss": 0.8932, + "step": 99800 + }, + { + "epoch": 0.7224912593107342, + "grad_norm": 0.14947380125522614, + "learning_rate": 4.277515979355324e-06, + "loss": 0.8859, + "step": 99810 + }, + { + "epoch": 0.7225636459713204, + "grad_norm": 0.16751673817634583, + "learning_rate": 4.277443592694739e-06, + "loss": 0.8879, + "step": 99820 + }, + { + "epoch": 0.7226360326319066, + "grad_norm": 0.1539609283208847, + "learning_rate": 4.277371206034152e-06, + "loss": 0.8991, + "step": 99830 + }, + { + "epoch": 0.7227084192924927, + "grad_norm": 0.19024856388568878, + "learning_rate": 4.277298819373566e-06, + "loss": 0.8886, + "step": 99840 + }, + { + "epoch": 0.7227808059530789, + "grad_norm": 0.1468716859817505, + "learning_rate": 4.2772264327129795e-06, + "loss": 0.8967, + "step": 99850 + }, + { + "epoch": 0.7228531926136652, + "grad_norm": 0.15970027446746826, + "learning_rate": 4.277154046052394e-06, + "loss": 0.8892, + "step": 99860 + }, + { + "epoch": 0.7229255792742514, + "grad_norm": 0.1565774530172348, + "learning_rate": 4.2770816593918075e-06, + "loss": 0.8853, + "step": 99870 + }, + { + "epoch": 0.7229979659348376, + "grad_norm": 0.19593319296836853, + "learning_rate": 4.277009272731221e-06, + "loss": 0.8866, + "step": 99880 + }, + { + "epoch": 0.7230703525954237, + "grad_norm": 0.14920704066753387, + "learning_rate": 4.276936886070635e-06, + "loss": 0.8842, + "step": 99890 + }, + { + "epoch": 0.7231427392560099, + "grad_norm": 0.18510152399539948, + "learning_rate": 4.276864499410049e-06, + "loss": 0.8763, + "step": 99900 + }, + { + "epoch": 0.7232151259165961, + "grad_norm": 0.16031725704669952, + "learning_rate": 4.276792112749463e-06, + "loss": 0.8836, + "step": 99910 + }, + { + "epoch": 0.7232875125771823, + "grad_norm": 0.18583592772483826, + "learning_rate": 4.2767197260888765e-06, + "loss": 0.8894, + "step": 99920 + }, + { + "epoch": 0.7233598992377684, + "grad_norm": 0.15419869124889374, + "learning_rate": 4.27664733942829e-06, + "loss": 0.8858, + "step": 99930 + }, + { + "epoch": 0.7234322858983546, + "grad_norm": 0.15521828830242157, + "learning_rate": 4.2765749527677045e-06, + "loss": 0.893, + "step": 99940 + }, + { + "epoch": 0.7235046725589408, + "grad_norm": 0.15606524050235748, + "learning_rate": 4.276502566107118e-06, + "loss": 0.9011, + "step": 99950 + }, + { + "epoch": 0.723577059219527, + "grad_norm": 0.15376631915569305, + "learning_rate": 4.276430179446532e-06, + "loss": 0.8898, + "step": 99960 + }, + { + "epoch": 0.7236494458801133, + "grad_norm": 0.14439062774181366, + "learning_rate": 4.276357792785945e-06, + "loss": 0.8911, + "step": 99970 + }, + { + "epoch": 0.7237218325406994, + "grad_norm": 0.1527758240699768, + "learning_rate": 4.27628540612536e-06, + "loss": 0.8911, + "step": 99980 + }, + { + "epoch": 0.7237942192012856, + "grad_norm": 0.15964572131633759, + "learning_rate": 4.2762130194647735e-06, + "loss": 0.8951, + "step": 99990 + }, + { + "epoch": 0.7238666058618718, + "grad_norm": 0.15122631192207336, + "learning_rate": 4.276140632804187e-06, + "loss": 0.8813, + "step": 100000 + }, + { + "epoch": 0.723938992522458, + "grad_norm": 0.16392894089221954, + "learning_rate": 4.276068246143601e-06, + "loss": 0.8888, + "step": 100010 + }, + { + "epoch": 0.7240113791830441, + "grad_norm": 0.1608874499797821, + "learning_rate": 4.275995859483015e-06, + "loss": 0.8825, + "step": 100020 + }, + { + "epoch": 0.7240837658436303, + "grad_norm": 0.16042576730251312, + "learning_rate": 4.275923472822429e-06, + "loss": 0.8939, + "step": 100030 + }, + { + "epoch": 0.7241561525042165, + "grad_norm": 0.15833930671215057, + "learning_rate": 4.275851086161842e-06, + "loss": 0.8763, + "step": 100040 + }, + { + "epoch": 0.7242285391648027, + "grad_norm": 0.1503579467535019, + "learning_rate": 4.275778699501256e-06, + "loss": 0.8852, + "step": 100050 + }, + { + "epoch": 0.7243009258253889, + "grad_norm": 0.1671973466873169, + "learning_rate": 4.2757063128406705e-06, + "loss": 0.8856, + "step": 100060 + }, + { + "epoch": 0.724373312485975, + "grad_norm": 0.14335432648658752, + "learning_rate": 4.275633926180084e-06, + "loss": 0.8775, + "step": 100070 + }, + { + "epoch": 0.7244456991465613, + "grad_norm": 0.1621866524219513, + "learning_rate": 4.275561539519498e-06, + "loss": 0.8868, + "step": 100080 + }, + { + "epoch": 0.7245180858071475, + "grad_norm": 0.1560746431350708, + "learning_rate": 4.275489152858911e-06, + "loss": 0.8857, + "step": 100090 + }, + { + "epoch": 0.7245904724677337, + "grad_norm": 0.1637454777956009, + "learning_rate": 4.275416766198325e-06, + "loss": 0.8863, + "step": 100100 + }, + { + "epoch": 0.7246628591283198, + "grad_norm": 0.16514307260513306, + "learning_rate": 4.275344379537739e-06, + "loss": 0.893, + "step": 100110 + }, + { + "epoch": 0.724735245788906, + "grad_norm": 0.15878024697303772, + "learning_rate": 4.275271992877153e-06, + "loss": 0.8852, + "step": 100120 + }, + { + "epoch": 0.7248076324494922, + "grad_norm": 0.15798494219779968, + "learning_rate": 4.275199606216567e-06, + "loss": 0.8922, + "step": 100130 + }, + { + "epoch": 0.7248800191100784, + "grad_norm": 0.1547410637140274, + "learning_rate": 4.27512721955598e-06, + "loss": 0.8779, + "step": 100140 + }, + { + "epoch": 0.7249524057706646, + "grad_norm": 0.14897017180919647, + "learning_rate": 4.275054832895395e-06, + "loss": 0.8924, + "step": 100150 + }, + { + "epoch": 0.7250247924312507, + "grad_norm": 0.1668456345796585, + "learning_rate": 4.274982446234808e-06, + "loss": 0.8803, + "step": 100160 + }, + { + "epoch": 0.7250971790918369, + "grad_norm": 0.1529316008090973, + "learning_rate": 4.274910059574222e-06, + "loss": 0.8984, + "step": 100170 + }, + { + "epoch": 0.7251695657524232, + "grad_norm": 0.14754995703697205, + "learning_rate": 4.2748376729136356e-06, + "loss": 0.8735, + "step": 100180 + }, + { + "epoch": 0.7252419524130094, + "grad_norm": 0.16086558997631073, + "learning_rate": 4.27476528625305e-06, + "loss": 0.8852, + "step": 100190 + }, + { + "epoch": 0.7253143390735955, + "grad_norm": 0.16596639156341553, + "learning_rate": 4.274692899592464e-06, + "loss": 0.9067, + "step": 100200 + }, + { + "epoch": 0.7253867257341817, + "grad_norm": 0.18739081919193268, + "learning_rate": 4.274620512931877e-06, + "loss": 0.8872, + "step": 100210 + }, + { + "epoch": 0.7254591123947679, + "grad_norm": 0.15418687462806702, + "learning_rate": 4.274548126271291e-06, + "loss": 0.8934, + "step": 100220 + }, + { + "epoch": 0.7255314990553541, + "grad_norm": 0.16099806129932404, + "learning_rate": 4.274475739610705e-06, + "loss": 0.8943, + "step": 100230 + }, + { + "epoch": 0.7256038857159403, + "grad_norm": 0.15143683552742004, + "learning_rate": 4.274403352950119e-06, + "loss": 0.8825, + "step": 100240 + }, + { + "epoch": 0.7256762723765264, + "grad_norm": 0.15264058113098145, + "learning_rate": 4.2743309662895326e-06, + "loss": 0.8817, + "step": 100250 + }, + { + "epoch": 0.7257486590371126, + "grad_norm": 0.1594517081975937, + "learning_rate": 4.274258579628946e-06, + "loss": 0.8968, + "step": 100260 + }, + { + "epoch": 0.7258210456976988, + "grad_norm": 0.1539693921804428, + "learning_rate": 4.274186192968361e-06, + "loss": 0.8994, + "step": 100270 + }, + { + "epoch": 0.725893432358285, + "grad_norm": 0.1556989848613739, + "learning_rate": 4.274113806307774e-06, + "loss": 0.8952, + "step": 100280 + }, + { + "epoch": 0.7259658190188713, + "grad_norm": 0.15270110964775085, + "learning_rate": 4.274041419647188e-06, + "loss": 0.8867, + "step": 100290 + }, + { + "epoch": 0.7260382056794574, + "grad_norm": 0.16615337133407593, + "learning_rate": 4.2739690329866015e-06, + "loss": 0.881, + "step": 100300 + }, + { + "epoch": 0.7261105923400436, + "grad_norm": 0.15638002753257751, + "learning_rate": 4.273896646326016e-06, + "loss": 0.8889, + "step": 100310 + }, + { + "epoch": 0.7261829790006298, + "grad_norm": 0.1828213930130005, + "learning_rate": 4.27382425966543e-06, + "loss": 0.8921, + "step": 100320 + }, + { + "epoch": 0.726255365661216, + "grad_norm": 0.14612999558448792, + "learning_rate": 4.273751873004843e-06, + "loss": 0.889, + "step": 100330 + }, + { + "epoch": 0.7263277523218021, + "grad_norm": 0.15252217650413513, + "learning_rate": 4.273679486344257e-06, + "loss": 0.8879, + "step": 100340 + }, + { + "epoch": 0.7264001389823883, + "grad_norm": 0.1537008434534073, + "learning_rate": 4.2736070996836704e-06, + "loss": 0.899, + "step": 100350 + }, + { + "epoch": 0.7264725256429745, + "grad_norm": 0.1628774255514145, + "learning_rate": 4.273534713023084e-06, + "loss": 0.8934, + "step": 100360 + }, + { + "epoch": 0.7265449123035607, + "grad_norm": 0.1571381837129593, + "learning_rate": 4.273462326362498e-06, + "loss": 0.8825, + "step": 100370 + }, + { + "epoch": 0.7266172989641468, + "grad_norm": 0.16100090742111206, + "learning_rate": 4.273389939701912e-06, + "loss": 0.8901, + "step": 100380 + }, + { + "epoch": 0.7266896856247331, + "grad_norm": 0.14620473980903625, + "learning_rate": 4.273317553041326e-06, + "loss": 0.8886, + "step": 100390 + }, + { + "epoch": 0.7267620722853193, + "grad_norm": 0.15393929183483124, + "learning_rate": 4.273245166380739e-06, + "loss": 0.8974, + "step": 100400 + }, + { + "epoch": 0.7268344589459055, + "grad_norm": 0.16305720806121826, + "learning_rate": 4.273172779720153e-06, + "loss": 0.884, + "step": 100410 + }, + { + "epoch": 0.7269068456064917, + "grad_norm": 0.1579166203737259, + "learning_rate": 4.2731003930595674e-06, + "loss": 0.8762, + "step": 100420 + }, + { + "epoch": 0.7269792322670778, + "grad_norm": 0.14451733231544495, + "learning_rate": 4.273028006398981e-06, + "loss": 0.8893, + "step": 100430 + }, + { + "epoch": 0.727051618927664, + "grad_norm": 0.14748463034629822, + "learning_rate": 4.272955619738395e-06, + "loss": 0.8861, + "step": 100440 + }, + { + "epoch": 0.7271240055882502, + "grad_norm": 0.16086864471435547, + "learning_rate": 4.272883233077808e-06, + "loss": 0.8885, + "step": 100450 + }, + { + "epoch": 0.7271963922488364, + "grad_norm": 0.14903460443019867, + "learning_rate": 4.272810846417223e-06, + "loss": 0.8832, + "step": 100460 + }, + { + "epoch": 0.7272687789094225, + "grad_norm": 0.16146931052207947, + "learning_rate": 4.272738459756636e-06, + "loss": 0.8856, + "step": 100470 + }, + { + "epoch": 0.7273411655700087, + "grad_norm": 0.1918579488992691, + "learning_rate": 4.27266607309605e-06, + "loss": 0.8953, + "step": 100480 + }, + { + "epoch": 0.7274135522305949, + "grad_norm": 0.1559261530637741, + "learning_rate": 4.272593686435464e-06, + "loss": 0.8855, + "step": 100490 + }, + { + "epoch": 0.7274859388911812, + "grad_norm": 0.15730531513690948, + "learning_rate": 4.272521299774878e-06, + "loss": 0.8946, + "step": 100500 + }, + { + "epoch": 0.7275583255517674, + "grad_norm": 0.1542884111404419, + "learning_rate": 4.272448913114292e-06, + "loss": 0.8932, + "step": 100510 + }, + { + "epoch": 0.7276307122123535, + "grad_norm": 0.15835349261760712, + "learning_rate": 4.272376526453705e-06, + "loss": 0.903, + "step": 100520 + }, + { + "epoch": 0.7277030988729397, + "grad_norm": 0.1627766340970993, + "learning_rate": 4.272304139793119e-06, + "loss": 0.8901, + "step": 100530 + }, + { + "epoch": 0.7277754855335259, + "grad_norm": 0.15794923901557922, + "learning_rate": 4.272231753132533e-06, + "loss": 0.8939, + "step": 100540 + }, + { + "epoch": 0.7278478721941121, + "grad_norm": 0.15838219225406647, + "learning_rate": 4.272159366471947e-06, + "loss": 0.8933, + "step": 100550 + }, + { + "epoch": 0.7279202588546982, + "grad_norm": 0.15067455172538757, + "learning_rate": 4.272086979811361e-06, + "loss": 0.8892, + "step": 100560 + }, + { + "epoch": 0.7279926455152844, + "grad_norm": 0.158193901181221, + "learning_rate": 4.272014593150774e-06, + "loss": 0.8793, + "step": 100570 + }, + { + "epoch": 0.7280650321758706, + "grad_norm": 0.15686438977718353, + "learning_rate": 4.271942206490189e-06, + "loss": 0.8775, + "step": 100580 + }, + { + "epoch": 0.7281374188364568, + "grad_norm": 0.1513579934835434, + "learning_rate": 4.271869819829602e-06, + "loss": 0.8954, + "step": 100590 + }, + { + "epoch": 0.728209805497043, + "grad_norm": 0.15484221279621124, + "learning_rate": 4.271797433169016e-06, + "loss": 0.8944, + "step": 100600 + }, + { + "epoch": 0.7282821921576292, + "grad_norm": 0.14668524265289307, + "learning_rate": 4.2717250465084295e-06, + "loss": 0.8753, + "step": 100610 + }, + { + "epoch": 0.7283545788182154, + "grad_norm": 0.1556718647480011, + "learning_rate": 4.271652659847844e-06, + "loss": 0.8875, + "step": 100620 + }, + { + "epoch": 0.7284269654788016, + "grad_norm": 0.16115809977054596, + "learning_rate": 4.271580273187258e-06, + "loss": 0.8909, + "step": 100630 + }, + { + "epoch": 0.7284993521393878, + "grad_norm": 0.152574360370636, + "learning_rate": 4.271507886526671e-06, + "loss": 0.8916, + "step": 100640 + }, + { + "epoch": 0.728571738799974, + "grad_norm": 0.16087447106838226, + "learning_rate": 4.271435499866085e-06, + "loss": 0.8859, + "step": 100650 + }, + { + "epoch": 0.7286441254605601, + "grad_norm": 0.21453756093978882, + "learning_rate": 4.271363113205499e-06, + "loss": 0.8862, + "step": 100660 + }, + { + "epoch": 0.7287165121211463, + "grad_norm": 0.156850203871727, + "learning_rate": 4.271290726544913e-06, + "loss": 0.8844, + "step": 100670 + }, + { + "epoch": 0.7287888987817325, + "grad_norm": 0.16212424635887146, + "learning_rate": 4.2712183398843265e-06, + "loss": 0.8892, + "step": 100680 + }, + { + "epoch": 0.7288612854423187, + "grad_norm": 0.1557214856147766, + "learning_rate": 4.27114595322374e-06, + "loss": 0.8843, + "step": 100690 + }, + { + "epoch": 0.7289336721029048, + "grad_norm": 0.1972065418958664, + "learning_rate": 4.271073566563154e-06, + "loss": 0.8747, + "step": 100700 + }, + { + "epoch": 0.7290060587634911, + "grad_norm": 0.1524554044008255, + "learning_rate": 4.271001179902568e-06, + "loss": 0.8857, + "step": 100710 + }, + { + "epoch": 0.7290784454240773, + "grad_norm": 0.15759634971618652, + "learning_rate": 4.270928793241982e-06, + "loss": 0.8856, + "step": 100720 + }, + { + "epoch": 0.7291508320846635, + "grad_norm": 0.1459013819694519, + "learning_rate": 4.2708564065813955e-06, + "loss": 0.8737, + "step": 100730 + }, + { + "epoch": 0.7292232187452496, + "grad_norm": 0.15204448997974396, + "learning_rate": 4.270784019920809e-06, + "loss": 0.8969, + "step": 100740 + }, + { + "epoch": 0.7292956054058358, + "grad_norm": 0.16425342857837677, + "learning_rate": 4.2707116332602235e-06, + "loss": 0.8876, + "step": 100750 + }, + { + "epoch": 0.729367992066422, + "grad_norm": 0.16584093868732452, + "learning_rate": 4.270639246599637e-06, + "loss": 0.885, + "step": 100760 + }, + { + "epoch": 0.7294403787270082, + "grad_norm": 0.16400320827960968, + "learning_rate": 4.270566859939051e-06, + "loss": 0.8939, + "step": 100770 + }, + { + "epoch": 0.7295127653875944, + "grad_norm": 0.15366198122501373, + "learning_rate": 4.270494473278464e-06, + "loss": 0.8716, + "step": 100780 + }, + { + "epoch": 0.7295851520481805, + "grad_norm": 0.15150944888591766, + "learning_rate": 4.270422086617879e-06, + "loss": 0.89, + "step": 100790 + }, + { + "epoch": 0.7296575387087667, + "grad_norm": 0.1443614810705185, + "learning_rate": 4.2703496999572925e-06, + "loss": 0.8889, + "step": 100800 + }, + { + "epoch": 0.7297299253693529, + "grad_norm": 0.15036936104297638, + "learning_rate": 4.270277313296706e-06, + "loss": 0.9045, + "step": 100810 + }, + { + "epoch": 0.7298023120299392, + "grad_norm": 0.16487093269824982, + "learning_rate": 4.27020492663612e-06, + "loss": 0.8815, + "step": 100820 + }, + { + "epoch": 0.7298746986905253, + "grad_norm": 0.15365450084209442, + "learning_rate": 4.270132539975534e-06, + "loss": 0.894, + "step": 100830 + }, + { + "epoch": 0.7299470853511115, + "grad_norm": 0.1608169972896576, + "learning_rate": 4.270060153314948e-06, + "loss": 0.9013, + "step": 100840 + }, + { + "epoch": 0.7300194720116977, + "grad_norm": 0.16800221800804138, + "learning_rate": 4.269987766654361e-06, + "loss": 0.8778, + "step": 100850 + }, + { + "epoch": 0.7300918586722839, + "grad_norm": 0.15907716751098633, + "learning_rate": 4.269915379993775e-06, + "loss": 0.8859, + "step": 100860 + }, + { + "epoch": 0.73016424533287, + "grad_norm": 0.15400275588035583, + "learning_rate": 4.269842993333189e-06, + "loss": 0.8904, + "step": 100870 + }, + { + "epoch": 0.7302366319934562, + "grad_norm": 0.1577254682779312, + "learning_rate": 4.269770606672602e-06, + "loss": 0.8833, + "step": 100880 + }, + { + "epoch": 0.7303090186540424, + "grad_norm": 0.1583620309829712, + "learning_rate": 4.269698220012016e-06, + "loss": 0.8808, + "step": 100890 + }, + { + "epoch": 0.7303814053146286, + "grad_norm": 0.1512048840522766, + "learning_rate": 4.26962583335143e-06, + "loss": 0.892, + "step": 100900 + }, + { + "epoch": 0.7304537919752148, + "grad_norm": 0.1588258147239685, + "learning_rate": 4.269553446690844e-06, + "loss": 0.8959, + "step": 100910 + }, + { + "epoch": 0.730526178635801, + "grad_norm": 0.15298683941364288, + "learning_rate": 4.2694810600302576e-06, + "loss": 0.885, + "step": 100920 + }, + { + "epoch": 0.7305985652963872, + "grad_norm": 0.15516585111618042, + "learning_rate": 4.269408673369671e-06, + "loss": 0.8925, + "step": 100930 + }, + { + "epoch": 0.7306709519569734, + "grad_norm": 0.1673891842365265, + "learning_rate": 4.269336286709086e-06, + "loss": 0.9016, + "step": 100940 + }, + { + "epoch": 0.7307433386175596, + "grad_norm": 0.16964946687221527, + "learning_rate": 4.269263900048499e-06, + "loss": 0.9035, + "step": 100950 + }, + { + "epoch": 0.7308157252781458, + "grad_norm": 0.21331481635570526, + "learning_rate": 4.269191513387913e-06, + "loss": 0.8832, + "step": 100960 + }, + { + "epoch": 0.7308881119387319, + "grad_norm": 0.1504168063402176, + "learning_rate": 4.2691191267273265e-06, + "loss": 0.8759, + "step": 100970 + }, + { + "epoch": 0.7309604985993181, + "grad_norm": 0.14905038475990295, + "learning_rate": 4.269046740066741e-06, + "loss": 0.887, + "step": 100980 + }, + { + "epoch": 0.7310328852599043, + "grad_norm": 0.1641206294298172, + "learning_rate": 4.2689743534061546e-06, + "loss": 0.8813, + "step": 100990 + }, + { + "epoch": 0.7311052719204905, + "grad_norm": 0.3293008804321289, + "learning_rate": 4.268901966745568e-06, + "loss": 0.874, + "step": 101000 + }, + { + "epoch": 0.7311776585810766, + "grad_norm": 0.1844899207353592, + "learning_rate": 4.268829580084982e-06, + "loss": 0.8987, + "step": 101010 + }, + { + "epoch": 0.7312500452416628, + "grad_norm": 0.17655082046985626, + "learning_rate": 4.268757193424396e-06, + "loss": 0.8979, + "step": 101020 + }, + { + "epoch": 0.7313224319022491, + "grad_norm": 0.15127238631248474, + "learning_rate": 4.26868480676381e-06, + "loss": 0.8825, + "step": 101030 + }, + { + "epoch": 0.7313948185628353, + "grad_norm": 0.15606975555419922, + "learning_rate": 4.2686124201032235e-06, + "loss": 0.8874, + "step": 101040 + }, + { + "epoch": 0.7314672052234215, + "grad_norm": 0.1606336385011673, + "learning_rate": 4.268540033442637e-06, + "loss": 0.8774, + "step": 101050 + }, + { + "epoch": 0.7315395918840076, + "grad_norm": 0.16057442128658295, + "learning_rate": 4.268467646782052e-06, + "loss": 0.8981, + "step": 101060 + }, + { + "epoch": 0.7316119785445938, + "grad_norm": 0.1635402888059616, + "learning_rate": 4.268395260121465e-06, + "loss": 0.8952, + "step": 101070 + }, + { + "epoch": 0.73168436520518, + "grad_norm": 0.1442507654428482, + "learning_rate": 4.268322873460879e-06, + "loss": 0.8981, + "step": 101080 + }, + { + "epoch": 0.7317567518657662, + "grad_norm": 0.13882668316364288, + "learning_rate": 4.2682504868002924e-06, + "loss": 0.8905, + "step": 101090 + }, + { + "epoch": 0.7318291385263523, + "grad_norm": 0.15066657960414886, + "learning_rate": 4.268178100139707e-06, + "loss": 0.8987, + "step": 101100 + }, + { + "epoch": 0.7319015251869385, + "grad_norm": 0.1645701825618744, + "learning_rate": 4.2681057134791205e-06, + "loss": 0.8886, + "step": 101110 + }, + { + "epoch": 0.7319739118475247, + "grad_norm": 0.15226510167121887, + "learning_rate": 4.268033326818534e-06, + "loss": 0.8932, + "step": 101120 + }, + { + "epoch": 0.7320462985081109, + "grad_norm": 0.15741746127605438, + "learning_rate": 4.267960940157948e-06, + "loss": 0.8793, + "step": 101130 + }, + { + "epoch": 0.7321186851686972, + "grad_norm": 0.15548457205295563, + "learning_rate": 4.267888553497362e-06, + "loss": 0.881, + "step": 101140 + }, + { + "epoch": 0.7321910718292833, + "grad_norm": 0.2032490223646164, + "learning_rate": 4.267816166836776e-06, + "loss": 0.8916, + "step": 101150 + }, + { + "epoch": 0.7322634584898695, + "grad_norm": 0.2026352435350418, + "learning_rate": 4.2677437801761894e-06, + "loss": 0.8932, + "step": 101160 + }, + { + "epoch": 0.7323358451504557, + "grad_norm": 0.15487812459468842, + "learning_rate": 4.267671393515603e-06, + "loss": 0.8805, + "step": 101170 + }, + { + "epoch": 0.7324082318110419, + "grad_norm": 0.16896316409111023, + "learning_rate": 4.2675990068550175e-06, + "loss": 0.8811, + "step": 101180 + }, + { + "epoch": 0.732480618471628, + "grad_norm": 0.20788662135601044, + "learning_rate": 4.267526620194431e-06, + "loss": 0.8858, + "step": 101190 + }, + { + "epoch": 0.7325530051322142, + "grad_norm": 0.15774105489253998, + "learning_rate": 4.267454233533845e-06, + "loss": 0.9004, + "step": 101200 + }, + { + "epoch": 0.7326253917928004, + "grad_norm": 0.1576681286096573, + "learning_rate": 4.267381846873258e-06, + "loss": 0.9003, + "step": 101210 + }, + { + "epoch": 0.7326977784533866, + "grad_norm": 0.19305309653282166, + "learning_rate": 4.267309460212673e-06, + "loss": 0.894, + "step": 101220 + }, + { + "epoch": 0.7327701651139727, + "grad_norm": 0.16262653470039368, + "learning_rate": 4.2672370735520864e-06, + "loss": 0.8774, + "step": 101230 + }, + { + "epoch": 0.732842551774559, + "grad_norm": 0.14455868303775787, + "learning_rate": 4.2671646868915e-06, + "loss": 0.8963, + "step": 101240 + }, + { + "epoch": 0.7329149384351452, + "grad_norm": 0.19319617748260498, + "learning_rate": 4.267092300230914e-06, + "loss": 0.8832, + "step": 101250 + }, + { + "epoch": 0.7329873250957314, + "grad_norm": 0.2210075408220291, + "learning_rate": 4.267019913570328e-06, + "loss": 0.8884, + "step": 101260 + }, + { + "epoch": 0.7330597117563176, + "grad_norm": 0.16288064420223236, + "learning_rate": 4.266947526909742e-06, + "loss": 0.8762, + "step": 101270 + }, + { + "epoch": 0.7331320984169037, + "grad_norm": 0.16447566449642181, + "learning_rate": 4.266875140249155e-06, + "loss": 0.8895, + "step": 101280 + }, + { + "epoch": 0.7332044850774899, + "grad_norm": 0.15744997560977936, + "learning_rate": 4.266802753588569e-06, + "loss": 0.8948, + "step": 101290 + }, + { + "epoch": 0.7332768717380761, + "grad_norm": 0.1907864809036255, + "learning_rate": 4.2667303669279835e-06, + "loss": 0.8816, + "step": 101300 + }, + { + "epoch": 0.7333492583986623, + "grad_norm": 0.15406233072280884, + "learning_rate": 4.266657980267397e-06, + "loss": 0.8851, + "step": 101310 + }, + { + "epoch": 0.7334216450592485, + "grad_norm": 0.16773687303066254, + "learning_rate": 4.266585593606811e-06, + "loss": 0.8864, + "step": 101320 + }, + { + "epoch": 0.7334940317198346, + "grad_norm": 0.15521180629730225, + "learning_rate": 4.266513206946224e-06, + "loss": 0.8834, + "step": 101330 + }, + { + "epoch": 0.7335664183804208, + "grad_norm": 0.15735216438770294, + "learning_rate": 4.266440820285638e-06, + "loss": 0.8814, + "step": 101340 + }, + { + "epoch": 0.7336388050410071, + "grad_norm": 0.14961345493793488, + "learning_rate": 4.266368433625052e-06, + "loss": 0.8863, + "step": 101350 + }, + { + "epoch": 0.7337111917015933, + "grad_norm": 0.15976914763450623, + "learning_rate": 4.266296046964466e-06, + "loss": 0.8944, + "step": 101360 + }, + { + "epoch": 0.7337835783621794, + "grad_norm": 0.16493217647075653, + "learning_rate": 4.26622366030388e-06, + "loss": 0.8897, + "step": 101370 + }, + { + "epoch": 0.7338559650227656, + "grad_norm": 0.15834347903728485, + "learning_rate": 4.266151273643293e-06, + "loss": 0.8842, + "step": 101380 + }, + { + "epoch": 0.7339283516833518, + "grad_norm": 0.14904192090034485, + "learning_rate": 4.266078886982708e-06, + "loss": 0.8796, + "step": 101390 + }, + { + "epoch": 0.734000738343938, + "grad_norm": 0.16918309032917023, + "learning_rate": 4.2660065003221205e-06, + "loss": 0.8919, + "step": 101400 + }, + { + "epoch": 0.7340731250045242, + "grad_norm": 0.14453521370887756, + "learning_rate": 4.265934113661535e-06, + "loss": 0.888, + "step": 101410 + }, + { + "epoch": 0.7341455116651103, + "grad_norm": 0.15393070876598358, + "learning_rate": 4.2658617270009485e-06, + "loss": 0.8833, + "step": 101420 + }, + { + "epoch": 0.7342178983256965, + "grad_norm": 0.1721397191286087, + "learning_rate": 4.265789340340362e-06, + "loss": 0.8918, + "step": 101430 + }, + { + "epoch": 0.7342902849862827, + "grad_norm": 0.15190811455249786, + "learning_rate": 4.265716953679776e-06, + "loss": 0.8889, + "step": 101440 + }, + { + "epoch": 0.734362671646869, + "grad_norm": 0.14311714470386505, + "learning_rate": 4.26564456701919e-06, + "loss": 0.8907, + "step": 101450 + }, + { + "epoch": 0.7344350583074551, + "grad_norm": 0.17075756192207336, + "learning_rate": 4.265572180358604e-06, + "loss": 0.8994, + "step": 101460 + }, + { + "epoch": 0.7345074449680413, + "grad_norm": 0.1581360548734665, + "learning_rate": 4.2654997936980175e-06, + "loss": 0.8876, + "step": 101470 + }, + { + "epoch": 0.7345798316286275, + "grad_norm": 0.15147803723812103, + "learning_rate": 4.265427407037431e-06, + "loss": 0.9041, + "step": 101480 + }, + { + "epoch": 0.7346522182892137, + "grad_norm": 0.161724254488945, + "learning_rate": 4.265355020376845e-06, + "loss": 0.8929, + "step": 101490 + }, + { + "epoch": 0.7347246049497999, + "grad_norm": 0.1595461070537567, + "learning_rate": 4.265282633716259e-06, + "loss": 0.8882, + "step": 101500 + }, + { + "epoch": 0.734796991610386, + "grad_norm": 0.15513767302036285, + "learning_rate": 4.265210247055673e-06, + "loss": 0.8998, + "step": 101510 + }, + { + "epoch": 0.7348693782709722, + "grad_norm": 0.15473343431949615, + "learning_rate": 4.265137860395086e-06, + "loss": 0.9025, + "step": 101520 + }, + { + "epoch": 0.7349417649315584, + "grad_norm": 0.16891096532344818, + "learning_rate": 4.2650654737345e-06, + "loss": 0.8868, + "step": 101530 + }, + { + "epoch": 0.7350141515921446, + "grad_norm": 0.15746982395648956, + "learning_rate": 4.2649930870739145e-06, + "loss": 0.9042, + "step": 101540 + }, + { + "epoch": 0.7350865382527307, + "grad_norm": 0.16814929246902466, + "learning_rate": 4.264920700413328e-06, + "loss": 0.9029, + "step": 101550 + }, + { + "epoch": 0.735158924913317, + "grad_norm": 0.14766864478588104, + "learning_rate": 4.264848313752742e-06, + "loss": 0.8741, + "step": 101560 + }, + { + "epoch": 0.7352313115739032, + "grad_norm": 0.16124971210956573, + "learning_rate": 4.264775927092155e-06, + "loss": 0.8771, + "step": 101570 + }, + { + "epoch": 0.7353036982344894, + "grad_norm": 0.16348862648010254, + "learning_rate": 4.26470354043157e-06, + "loss": 0.8955, + "step": 101580 + }, + { + "epoch": 0.7353760848950756, + "grad_norm": 0.16436149179935455, + "learning_rate": 4.264631153770983e-06, + "loss": 0.8889, + "step": 101590 + }, + { + "epoch": 0.7354484715556617, + "grad_norm": 0.14332278072834015, + "learning_rate": 4.264558767110397e-06, + "loss": 0.8866, + "step": 101600 + }, + { + "epoch": 0.7355208582162479, + "grad_norm": 0.15618641674518585, + "learning_rate": 4.264486380449811e-06, + "loss": 0.8853, + "step": 101610 + }, + { + "epoch": 0.7355932448768341, + "grad_norm": 0.1528848558664322, + "learning_rate": 4.264413993789225e-06, + "loss": 0.8882, + "step": 101620 + }, + { + "epoch": 0.7356656315374203, + "grad_norm": 0.1625969558954239, + "learning_rate": 4.264341607128639e-06, + "loss": 0.8774, + "step": 101630 + }, + { + "epoch": 0.7357380181980064, + "grad_norm": 0.16516810655593872, + "learning_rate": 4.264269220468052e-06, + "loss": 0.887, + "step": 101640 + }, + { + "epoch": 0.7358104048585926, + "grad_norm": 0.1680750846862793, + "learning_rate": 4.264196833807466e-06, + "loss": 0.887, + "step": 101650 + }, + { + "epoch": 0.7358827915191788, + "grad_norm": 0.16871851682662964, + "learning_rate": 4.26412444714688e-06, + "loss": 0.8886, + "step": 101660 + }, + { + "epoch": 0.7359551781797651, + "grad_norm": 0.15388178825378418, + "learning_rate": 4.264052060486294e-06, + "loss": 0.8906, + "step": 101670 + }, + { + "epoch": 0.7360275648403513, + "grad_norm": 0.15053468942642212, + "learning_rate": 4.263979673825708e-06, + "loss": 0.8882, + "step": 101680 + }, + { + "epoch": 0.7360999515009374, + "grad_norm": 0.16372324526309967, + "learning_rate": 4.263907287165121e-06, + "loss": 0.8683, + "step": 101690 + }, + { + "epoch": 0.7361723381615236, + "grad_norm": 0.1610361635684967, + "learning_rate": 4.263834900504536e-06, + "loss": 0.8854, + "step": 101700 + }, + { + "epoch": 0.7362447248221098, + "grad_norm": 0.16573579609394073, + "learning_rate": 4.263762513843949e-06, + "loss": 0.9022, + "step": 101710 + }, + { + "epoch": 0.736317111482696, + "grad_norm": 0.16359181702136993, + "learning_rate": 4.263690127183363e-06, + "loss": 0.8873, + "step": 101720 + }, + { + "epoch": 0.7363894981432821, + "grad_norm": 0.296243280172348, + "learning_rate": 4.2636177405227766e-06, + "loss": 0.8911, + "step": 101730 + }, + { + "epoch": 0.7364618848038683, + "grad_norm": 0.16400732100009918, + "learning_rate": 4.263545353862191e-06, + "loss": 0.8859, + "step": 101740 + }, + { + "epoch": 0.7365342714644545, + "grad_norm": 0.15146301686763763, + "learning_rate": 4.263472967201605e-06, + "loss": 0.8912, + "step": 101750 + }, + { + "epoch": 0.7366066581250407, + "grad_norm": 0.1514797806739807, + "learning_rate": 4.263400580541018e-06, + "loss": 0.8805, + "step": 101760 + }, + { + "epoch": 0.736679044785627, + "grad_norm": 0.15184786915779114, + "learning_rate": 4.263328193880432e-06, + "loss": 0.8869, + "step": 101770 + }, + { + "epoch": 0.7367514314462131, + "grad_norm": 0.1572055220603943, + "learning_rate": 4.263255807219846e-06, + "loss": 0.8827, + "step": 101780 + }, + { + "epoch": 0.7368238181067993, + "grad_norm": 0.16821490228176117, + "learning_rate": 4.26318342055926e-06, + "loss": 0.9025, + "step": 101790 + }, + { + "epoch": 0.7368962047673855, + "grad_norm": 0.14948932826519012, + "learning_rate": 4.263111033898674e-06, + "loss": 0.8809, + "step": 101800 + }, + { + "epoch": 0.7369685914279717, + "grad_norm": 0.1437656432390213, + "learning_rate": 4.263038647238087e-06, + "loss": 0.894, + "step": 101810 + }, + { + "epoch": 0.7370409780885578, + "grad_norm": 0.16621340811252594, + "learning_rate": 4.262966260577502e-06, + "loss": 0.881, + "step": 101820 + }, + { + "epoch": 0.737113364749144, + "grad_norm": 0.14426559209823608, + "learning_rate": 4.262893873916915e-06, + "loss": 0.8927, + "step": 101830 + }, + { + "epoch": 0.7371857514097302, + "grad_norm": 0.16779235005378723, + "learning_rate": 4.262821487256329e-06, + "loss": 0.889, + "step": 101840 + }, + { + "epoch": 0.7372581380703164, + "grad_norm": 0.14878766238689423, + "learning_rate": 4.2627491005957425e-06, + "loss": 0.8903, + "step": 101850 + }, + { + "epoch": 0.7373305247309025, + "grad_norm": 0.15575487911701202, + "learning_rate": 4.262676713935157e-06, + "loss": 0.8779, + "step": 101860 + }, + { + "epoch": 0.7374029113914887, + "grad_norm": 0.1539398729801178, + "learning_rate": 4.262604327274571e-06, + "loss": 0.8843, + "step": 101870 + }, + { + "epoch": 0.737475298052075, + "grad_norm": 0.1627013087272644, + "learning_rate": 4.262531940613984e-06, + "loss": 0.8893, + "step": 101880 + }, + { + "epoch": 0.7375476847126612, + "grad_norm": 0.17876599729061127, + "learning_rate": 4.262459553953398e-06, + "loss": 0.8884, + "step": 101890 + }, + { + "epoch": 0.7376200713732474, + "grad_norm": 0.16160640120506287, + "learning_rate": 4.262387167292812e-06, + "loss": 0.8803, + "step": 101900 + }, + { + "epoch": 0.7376924580338335, + "grad_norm": 0.18297509849071503, + "learning_rate": 4.262314780632226e-06, + "loss": 0.8956, + "step": 101910 + }, + { + "epoch": 0.7377648446944197, + "grad_norm": 0.1643095761537552, + "learning_rate": 4.2622423939716395e-06, + "loss": 0.8855, + "step": 101920 + }, + { + "epoch": 0.7378372313550059, + "grad_norm": 0.17934858798980713, + "learning_rate": 4.262170007311053e-06, + "loss": 0.882, + "step": 101930 + }, + { + "epoch": 0.7379096180155921, + "grad_norm": 0.15763209760189056, + "learning_rate": 4.262097620650467e-06, + "loss": 0.8848, + "step": 101940 + }, + { + "epoch": 0.7379820046761782, + "grad_norm": 0.1518256813287735, + "learning_rate": 4.26202523398988e-06, + "loss": 0.8955, + "step": 101950 + }, + { + "epoch": 0.7380543913367644, + "grad_norm": 0.15647168457508087, + "learning_rate": 4.261952847329294e-06, + "loss": 0.9097, + "step": 101960 + }, + { + "epoch": 0.7381267779973506, + "grad_norm": 0.15015296638011932, + "learning_rate": 4.2618804606687084e-06, + "loss": 0.8752, + "step": 101970 + }, + { + "epoch": 0.7381991646579369, + "grad_norm": 0.15466606616973877, + "learning_rate": 4.261808074008122e-06, + "loss": 0.8849, + "step": 101980 + }, + { + "epoch": 0.7382715513185231, + "grad_norm": 0.17152267694473267, + "learning_rate": 4.261735687347536e-06, + "loss": 0.8728, + "step": 101990 + }, + { + "epoch": 0.7383439379791092, + "grad_norm": 0.14666572213172913, + "learning_rate": 4.261663300686949e-06, + "loss": 0.8757, + "step": 102000 + }, + { + "epoch": 0.7384163246396954, + "grad_norm": 0.1615079790353775, + "learning_rate": 4.261590914026364e-06, + "loss": 0.895, + "step": 102010 + }, + { + "epoch": 0.7384887113002816, + "grad_norm": 0.17585870623588562, + "learning_rate": 4.261518527365777e-06, + "loss": 0.8775, + "step": 102020 + }, + { + "epoch": 0.7385610979608678, + "grad_norm": 0.177878275513649, + "learning_rate": 4.261446140705191e-06, + "loss": 0.8664, + "step": 102030 + }, + { + "epoch": 0.738633484621454, + "grad_norm": 0.15934938192367554, + "learning_rate": 4.261373754044605e-06, + "loss": 0.8739, + "step": 102040 + }, + { + "epoch": 0.7387058712820401, + "grad_norm": 0.1503947675228119, + "learning_rate": 4.261301367384019e-06, + "loss": 0.8869, + "step": 102050 + }, + { + "epoch": 0.7387782579426263, + "grad_norm": 0.1672418862581253, + "learning_rate": 4.261228980723433e-06, + "loss": 0.8923, + "step": 102060 + }, + { + "epoch": 0.7388506446032125, + "grad_norm": 0.15783722698688507, + "learning_rate": 4.261156594062846e-06, + "loss": 0.9022, + "step": 102070 + }, + { + "epoch": 0.7389230312637987, + "grad_norm": 0.15607894957065582, + "learning_rate": 4.26108420740226e-06, + "loss": 0.8853, + "step": 102080 + }, + { + "epoch": 0.738995417924385, + "grad_norm": 0.14554698765277863, + "learning_rate": 4.261011820741674e-06, + "loss": 0.8896, + "step": 102090 + }, + { + "epoch": 0.7390678045849711, + "grad_norm": 0.15600426495075226, + "learning_rate": 4.260939434081088e-06, + "loss": 0.8755, + "step": 102100 + }, + { + "epoch": 0.7391401912455573, + "grad_norm": 0.14329098165035248, + "learning_rate": 4.260867047420502e-06, + "loss": 0.9022, + "step": 102110 + }, + { + "epoch": 0.7392125779061435, + "grad_norm": 0.1529379040002823, + "learning_rate": 4.260794660759915e-06, + "loss": 0.8871, + "step": 102120 + }, + { + "epoch": 0.7392849645667297, + "grad_norm": 0.15350185334682465, + "learning_rate": 4.260722274099329e-06, + "loss": 0.8908, + "step": 102130 + }, + { + "epoch": 0.7393573512273158, + "grad_norm": 0.17035044729709625, + "learning_rate": 4.260649887438743e-06, + "loss": 0.8809, + "step": 102140 + }, + { + "epoch": 0.739429737887902, + "grad_norm": 0.14859919250011444, + "learning_rate": 4.260577500778157e-06, + "loss": 0.901, + "step": 102150 + }, + { + "epoch": 0.7395021245484882, + "grad_norm": 0.15911567211151123, + "learning_rate": 4.2605051141175705e-06, + "loss": 0.8832, + "step": 102160 + }, + { + "epoch": 0.7395745112090744, + "grad_norm": 0.17544224858283997, + "learning_rate": 4.260432727456984e-06, + "loss": 0.884, + "step": 102170 + }, + { + "epoch": 0.7396468978696605, + "grad_norm": 0.15622149407863617, + "learning_rate": 4.260360340796399e-06, + "loss": 0.8888, + "step": 102180 + }, + { + "epoch": 0.7397192845302467, + "grad_norm": 0.20478832721710205, + "learning_rate": 4.260287954135812e-06, + "loss": 0.8916, + "step": 102190 + }, + { + "epoch": 0.739791671190833, + "grad_norm": 0.15085692703723907, + "learning_rate": 4.260215567475226e-06, + "loss": 0.8811, + "step": 102200 + }, + { + "epoch": 0.7398640578514192, + "grad_norm": 0.15468019247055054, + "learning_rate": 4.2601431808146395e-06, + "loss": 0.8901, + "step": 102210 + }, + { + "epoch": 0.7399364445120054, + "grad_norm": 0.15799905359745026, + "learning_rate": 4.260070794154054e-06, + "loss": 0.8872, + "step": 102220 + }, + { + "epoch": 0.7400088311725915, + "grad_norm": 0.14824278652668, + "learning_rate": 4.2599984074934675e-06, + "loss": 0.8929, + "step": 102230 + }, + { + "epoch": 0.7400812178331777, + "grad_norm": 0.15417352318763733, + "learning_rate": 4.259926020832881e-06, + "loss": 0.8917, + "step": 102240 + }, + { + "epoch": 0.7401536044937639, + "grad_norm": 0.16246527433395386, + "learning_rate": 4.259853634172295e-06, + "loss": 0.8828, + "step": 102250 + }, + { + "epoch": 0.7402259911543501, + "grad_norm": 0.14646829664707184, + "learning_rate": 4.259781247511709e-06, + "loss": 0.8966, + "step": 102260 + }, + { + "epoch": 0.7402983778149362, + "grad_norm": 0.15677151083946228, + "learning_rate": 4.259708860851123e-06, + "loss": 0.8897, + "step": 102270 + }, + { + "epoch": 0.7403707644755224, + "grad_norm": 0.16189172863960266, + "learning_rate": 4.2596364741905365e-06, + "loss": 0.8789, + "step": 102280 + }, + { + "epoch": 0.7404431511361086, + "grad_norm": 0.15842679142951965, + "learning_rate": 4.25956408752995e-06, + "loss": 0.8876, + "step": 102290 + }, + { + "epoch": 0.7405155377966949, + "grad_norm": 0.14995113015174866, + "learning_rate": 4.2594917008693646e-06, + "loss": 0.8881, + "step": 102300 + }, + { + "epoch": 0.740587924457281, + "grad_norm": 0.1539173424243927, + "learning_rate": 4.259419314208778e-06, + "loss": 0.8848, + "step": 102310 + }, + { + "epoch": 0.7406603111178672, + "grad_norm": 0.17772093415260315, + "learning_rate": 4.259346927548192e-06, + "loss": 0.8902, + "step": 102320 + }, + { + "epoch": 0.7407326977784534, + "grad_norm": 0.1554667055606842, + "learning_rate": 4.259274540887605e-06, + "loss": 0.8865, + "step": 102330 + }, + { + "epoch": 0.7408050844390396, + "grad_norm": 0.15641778707504272, + "learning_rate": 4.25920215422702e-06, + "loss": 0.9031, + "step": 102340 + }, + { + "epoch": 0.7408774710996258, + "grad_norm": 0.15458346903324127, + "learning_rate": 4.2591297675664335e-06, + "loss": 0.8843, + "step": 102350 + }, + { + "epoch": 0.7409498577602119, + "grad_norm": 0.15070627629756927, + "learning_rate": 4.259057380905847e-06, + "loss": 0.8975, + "step": 102360 + }, + { + "epoch": 0.7410222444207981, + "grad_norm": 0.15110282599925995, + "learning_rate": 4.258984994245261e-06, + "loss": 0.8928, + "step": 102370 + }, + { + "epoch": 0.7410946310813843, + "grad_norm": 0.1538836807012558, + "learning_rate": 4.258912607584675e-06, + "loss": 0.8946, + "step": 102380 + }, + { + "epoch": 0.7411670177419705, + "grad_norm": 0.15167924761772156, + "learning_rate": 4.258840220924089e-06, + "loss": 0.8839, + "step": 102390 + }, + { + "epoch": 0.7412394044025566, + "grad_norm": 0.15393514931201935, + "learning_rate": 4.258767834263502e-06, + "loss": 0.887, + "step": 102400 + }, + { + "epoch": 0.7413117910631429, + "grad_norm": 0.1549709439277649, + "learning_rate": 4.258695447602916e-06, + "loss": 0.8871, + "step": 102410 + }, + { + "epoch": 0.7413841777237291, + "grad_norm": 0.16167686879634857, + "learning_rate": 4.2586230609423305e-06, + "loss": 0.8783, + "step": 102420 + }, + { + "epoch": 0.7414565643843153, + "grad_norm": 0.15924765169620514, + "learning_rate": 4.258550674281744e-06, + "loss": 0.8769, + "step": 102430 + }, + { + "epoch": 0.7415289510449015, + "grad_norm": 0.15444104373455048, + "learning_rate": 4.258478287621158e-06, + "loss": 0.8888, + "step": 102440 + }, + { + "epoch": 0.7416013377054876, + "grad_norm": 0.14428524672985077, + "learning_rate": 4.258405900960571e-06, + "loss": 0.8683, + "step": 102450 + }, + { + "epoch": 0.7416737243660738, + "grad_norm": 0.16138869524002075, + "learning_rate": 4.258333514299985e-06, + "loss": 0.8838, + "step": 102460 + }, + { + "epoch": 0.74174611102666, + "grad_norm": 0.16185913980007172, + "learning_rate": 4.2582611276393986e-06, + "loss": 0.8833, + "step": 102470 + }, + { + "epoch": 0.7418184976872462, + "grad_norm": 0.1485951989889145, + "learning_rate": 4.258188740978812e-06, + "loss": 0.8956, + "step": 102480 + }, + { + "epoch": 0.7418908843478323, + "grad_norm": 0.16537578403949738, + "learning_rate": 4.258116354318227e-06, + "loss": 0.8927, + "step": 102490 + }, + { + "epoch": 0.7419632710084185, + "grad_norm": 0.14754781126976013, + "learning_rate": 4.25804396765764e-06, + "loss": 0.881, + "step": 102500 + }, + { + "epoch": 0.7420356576690048, + "grad_norm": 0.14885316789150238, + "learning_rate": 4.257971580997054e-06, + "loss": 0.8927, + "step": 102510 + }, + { + "epoch": 0.742108044329591, + "grad_norm": 0.17432914674282074, + "learning_rate": 4.2578991943364675e-06, + "loss": 0.8965, + "step": 102520 + }, + { + "epoch": 0.7421804309901772, + "grad_norm": 0.1665223240852356, + "learning_rate": 4.257826807675882e-06, + "loss": 0.8862, + "step": 102530 + }, + { + "epoch": 0.7422528176507633, + "grad_norm": 0.15308286249637604, + "learning_rate": 4.257754421015296e-06, + "loss": 0.8913, + "step": 102540 + }, + { + "epoch": 0.7423252043113495, + "grad_norm": 0.1635119467973709, + "learning_rate": 4.257682034354709e-06, + "loss": 0.8989, + "step": 102550 + }, + { + "epoch": 0.7423975909719357, + "grad_norm": 0.15072479844093323, + "learning_rate": 4.257609647694123e-06, + "loss": 0.8836, + "step": 102560 + }, + { + "epoch": 0.7424699776325219, + "grad_norm": 0.17337563633918762, + "learning_rate": 4.257537261033537e-06, + "loss": 0.8954, + "step": 102570 + }, + { + "epoch": 0.742542364293108, + "grad_norm": 0.14887109398841858, + "learning_rate": 4.257464874372951e-06, + "loss": 0.8954, + "step": 102580 + }, + { + "epoch": 0.7426147509536942, + "grad_norm": 0.16400760412216187, + "learning_rate": 4.2573924877123645e-06, + "loss": 0.8847, + "step": 102590 + }, + { + "epoch": 0.7426871376142804, + "grad_norm": 0.15407805144786835, + "learning_rate": 4.257320101051778e-06, + "loss": 0.8885, + "step": 102600 + }, + { + "epoch": 0.7427595242748666, + "grad_norm": 0.15420646965503693, + "learning_rate": 4.257247714391193e-06, + "loss": 0.8844, + "step": 102610 + }, + { + "epoch": 0.7428319109354529, + "grad_norm": 0.15996070206165314, + "learning_rate": 4.257175327730606e-06, + "loss": 0.8854, + "step": 102620 + }, + { + "epoch": 0.742904297596039, + "grad_norm": 0.16860343515872955, + "learning_rate": 4.25710294107002e-06, + "loss": 0.8879, + "step": 102630 + }, + { + "epoch": 0.7429766842566252, + "grad_norm": 0.15210293233394623, + "learning_rate": 4.2570305544094334e-06, + "loss": 0.8922, + "step": 102640 + }, + { + "epoch": 0.7430490709172114, + "grad_norm": 0.17375917732715607, + "learning_rate": 4.256958167748848e-06, + "loss": 0.8811, + "step": 102650 + }, + { + "epoch": 0.7431214575777976, + "grad_norm": 0.16159164905548096, + "learning_rate": 4.2568857810882615e-06, + "loss": 0.8704, + "step": 102660 + }, + { + "epoch": 0.7431938442383837, + "grad_norm": 0.1597278118133545, + "learning_rate": 4.256813394427675e-06, + "loss": 0.8723, + "step": 102670 + }, + { + "epoch": 0.7432662308989699, + "grad_norm": 0.16430126130580902, + "learning_rate": 4.256741007767089e-06, + "loss": 0.8836, + "step": 102680 + }, + { + "epoch": 0.7433386175595561, + "grad_norm": 0.15877282619476318, + "learning_rate": 4.256668621106503e-06, + "loss": 0.887, + "step": 102690 + }, + { + "epoch": 0.7434110042201423, + "grad_norm": 0.15375186502933502, + "learning_rate": 4.256596234445917e-06, + "loss": 0.8792, + "step": 102700 + }, + { + "epoch": 0.7434833908807285, + "grad_norm": 0.177475243806839, + "learning_rate": 4.2565238477853304e-06, + "loss": 0.8842, + "step": 102710 + }, + { + "epoch": 0.7435557775413146, + "grad_norm": 0.14837265014648438, + "learning_rate": 4.256451461124744e-06, + "loss": 0.8891, + "step": 102720 + }, + { + "epoch": 0.7436281642019009, + "grad_norm": 0.1412384957075119, + "learning_rate": 4.2563790744641585e-06, + "loss": 0.8849, + "step": 102730 + }, + { + "epoch": 0.7437005508624871, + "grad_norm": 0.15624116361141205, + "learning_rate": 4.256306687803572e-06, + "loss": 0.893, + "step": 102740 + }, + { + "epoch": 0.7437729375230733, + "grad_norm": 0.15126636624336243, + "learning_rate": 4.256234301142986e-06, + "loss": 0.8776, + "step": 102750 + }, + { + "epoch": 0.7438453241836595, + "grad_norm": 0.1584903746843338, + "learning_rate": 4.256161914482399e-06, + "loss": 0.8855, + "step": 102760 + }, + { + "epoch": 0.7439177108442456, + "grad_norm": 0.1627122163772583, + "learning_rate": 4.256089527821813e-06, + "loss": 0.89, + "step": 102770 + }, + { + "epoch": 0.7439900975048318, + "grad_norm": 0.15770822763442993, + "learning_rate": 4.2560171411612274e-06, + "loss": 0.8928, + "step": 102780 + }, + { + "epoch": 0.744062484165418, + "grad_norm": 0.1587304323911667, + "learning_rate": 4.255944754500641e-06, + "loss": 0.8867, + "step": 102790 + }, + { + "epoch": 0.7441348708260042, + "grad_norm": 0.15314078330993652, + "learning_rate": 4.255872367840055e-06, + "loss": 0.8957, + "step": 102800 + }, + { + "epoch": 0.7442072574865903, + "grad_norm": 0.14915263652801514, + "learning_rate": 4.255799981179468e-06, + "loss": 0.8805, + "step": 102810 + }, + { + "epoch": 0.7442796441471765, + "grad_norm": 0.14680497348308563, + "learning_rate": 4.255727594518883e-06, + "loss": 0.8882, + "step": 102820 + }, + { + "epoch": 0.7443520308077628, + "grad_norm": 0.15903522074222565, + "learning_rate": 4.255655207858296e-06, + "loss": 0.8976, + "step": 102830 + }, + { + "epoch": 0.744424417468349, + "grad_norm": 0.1559790074825287, + "learning_rate": 4.25558282119771e-06, + "loss": 0.8856, + "step": 102840 + }, + { + "epoch": 0.7444968041289352, + "grad_norm": 0.15489821135997772, + "learning_rate": 4.255510434537124e-06, + "loss": 0.8878, + "step": 102850 + }, + { + "epoch": 0.7445691907895213, + "grad_norm": 0.15823949873447418, + "learning_rate": 4.255438047876538e-06, + "loss": 0.8764, + "step": 102860 + }, + { + "epoch": 0.7446415774501075, + "grad_norm": 0.1716890037059784, + "learning_rate": 4.255365661215952e-06, + "loss": 0.8994, + "step": 102870 + }, + { + "epoch": 0.7447139641106937, + "grad_norm": 0.15007559955120087, + "learning_rate": 4.255293274555365e-06, + "loss": 0.8947, + "step": 102880 + }, + { + "epoch": 0.7447863507712799, + "grad_norm": 0.16091489791870117, + "learning_rate": 4.255220887894779e-06, + "loss": 0.8847, + "step": 102890 + }, + { + "epoch": 0.744858737431866, + "grad_norm": 0.1675153523683548, + "learning_rate": 4.255148501234193e-06, + "loss": 0.8804, + "step": 102900 + }, + { + "epoch": 0.7449311240924522, + "grad_norm": 0.15456238389015198, + "learning_rate": 4.255076114573607e-06, + "loss": 0.8986, + "step": 102910 + }, + { + "epoch": 0.7450035107530384, + "grad_norm": 0.15441204607486725, + "learning_rate": 4.255003727913021e-06, + "loss": 0.8742, + "step": 102920 + }, + { + "epoch": 0.7450758974136246, + "grad_norm": 0.16027353703975677, + "learning_rate": 4.254931341252434e-06, + "loss": 0.8856, + "step": 102930 + }, + { + "epoch": 0.7451482840742109, + "grad_norm": 0.16300475597381592, + "learning_rate": 4.254858954591849e-06, + "loss": 0.882, + "step": 102940 + }, + { + "epoch": 0.745220670734797, + "grad_norm": 0.15692980587482452, + "learning_rate": 4.254786567931262e-06, + "loss": 0.8875, + "step": 102950 + }, + { + "epoch": 0.7452930573953832, + "grad_norm": 0.16681601107120514, + "learning_rate": 4.254714181270676e-06, + "loss": 0.8866, + "step": 102960 + }, + { + "epoch": 0.7453654440559694, + "grad_norm": 0.16296398639678955, + "learning_rate": 4.2546417946100895e-06, + "loss": 0.8969, + "step": 102970 + }, + { + "epoch": 0.7454378307165556, + "grad_norm": 0.1497073471546173, + "learning_rate": 4.254569407949504e-06, + "loss": 0.8949, + "step": 102980 + }, + { + "epoch": 0.7455102173771417, + "grad_norm": 0.17505697906017303, + "learning_rate": 4.254497021288917e-06, + "loss": 0.8935, + "step": 102990 + }, + { + "epoch": 0.7455826040377279, + "grad_norm": 0.15840458869934082, + "learning_rate": 4.25442463462833e-06, + "loss": 0.8957, + "step": 103000 + }, + { + "epoch": 0.7456549906983141, + "grad_norm": 0.1545899212360382, + "learning_rate": 4.254352247967745e-06, + "loss": 0.8853, + "step": 103010 + }, + { + "epoch": 0.7457273773589003, + "grad_norm": 0.1966148018836975, + "learning_rate": 4.2542798613071585e-06, + "loss": 0.8883, + "step": 103020 + }, + { + "epoch": 0.7457997640194864, + "grad_norm": 0.1721884310245514, + "learning_rate": 4.254207474646572e-06, + "loss": 0.8887, + "step": 103030 + }, + { + "epoch": 0.7458721506800726, + "grad_norm": 0.19111919403076172, + "learning_rate": 4.254135087985986e-06, + "loss": 0.8829, + "step": 103040 + }, + { + "epoch": 0.7459445373406589, + "grad_norm": 0.15273134410381317, + "learning_rate": 4.2540627013254e-06, + "loss": 0.8952, + "step": 103050 + }, + { + "epoch": 0.7460169240012451, + "grad_norm": 0.16259609162807465, + "learning_rate": 4.253990314664814e-06, + "loss": 0.8866, + "step": 103060 + }, + { + "epoch": 0.7460893106618313, + "grad_norm": 0.15013474225997925, + "learning_rate": 4.253917928004227e-06, + "loss": 0.8792, + "step": 103070 + }, + { + "epoch": 0.7461616973224174, + "grad_norm": 0.17235596477985382, + "learning_rate": 4.253845541343641e-06, + "loss": 0.9079, + "step": 103080 + }, + { + "epoch": 0.7462340839830036, + "grad_norm": 0.16108207404613495, + "learning_rate": 4.2537731546830555e-06, + "loss": 0.8797, + "step": 103090 + }, + { + "epoch": 0.7463064706435898, + "grad_norm": 0.16072994470596313, + "learning_rate": 4.253700768022469e-06, + "loss": 0.8804, + "step": 103100 + }, + { + "epoch": 0.746378857304176, + "grad_norm": 0.1662396937608719, + "learning_rate": 4.253628381361883e-06, + "loss": 0.8866, + "step": 103110 + }, + { + "epoch": 0.7464512439647621, + "grad_norm": 0.16367369890213013, + "learning_rate": 4.253555994701296e-06, + "loss": 0.8975, + "step": 103120 + }, + { + "epoch": 0.7465236306253483, + "grad_norm": 0.15681147575378418, + "learning_rate": 4.253483608040711e-06, + "loss": 0.8995, + "step": 103130 + }, + { + "epoch": 0.7465960172859345, + "grad_norm": 0.1485588252544403, + "learning_rate": 4.253411221380124e-06, + "loss": 0.8863, + "step": 103140 + }, + { + "epoch": 0.7466684039465208, + "grad_norm": 0.1582004874944687, + "learning_rate": 4.253338834719538e-06, + "loss": 0.8895, + "step": 103150 + }, + { + "epoch": 0.746740790607107, + "grad_norm": 0.17631734907627106, + "learning_rate": 4.253266448058952e-06, + "loss": 0.8993, + "step": 103160 + }, + { + "epoch": 0.7468131772676931, + "grad_norm": 0.15485930442810059, + "learning_rate": 4.253194061398366e-06, + "loss": 0.8882, + "step": 103170 + }, + { + "epoch": 0.7468855639282793, + "grad_norm": 0.18264351785182953, + "learning_rate": 4.25312167473778e-06, + "loss": 0.8989, + "step": 103180 + }, + { + "epoch": 0.7469579505888655, + "grad_norm": 0.15158303081989288, + "learning_rate": 4.253049288077193e-06, + "loss": 0.9056, + "step": 103190 + }, + { + "epoch": 0.7470303372494517, + "grad_norm": 0.1658327281475067, + "learning_rate": 4.252976901416607e-06, + "loss": 0.8896, + "step": 103200 + }, + { + "epoch": 0.7471027239100378, + "grad_norm": 0.15178616344928741, + "learning_rate": 4.252904514756021e-06, + "loss": 0.8784, + "step": 103210 + }, + { + "epoch": 0.747175110570624, + "grad_norm": 0.15876880288124084, + "learning_rate": 4.252832128095435e-06, + "loss": 0.8802, + "step": 103220 + }, + { + "epoch": 0.7472474972312102, + "grad_norm": 0.15720294415950775, + "learning_rate": 4.252759741434849e-06, + "loss": 0.8837, + "step": 103230 + }, + { + "epoch": 0.7473198838917964, + "grad_norm": 0.16239741444587708, + "learning_rate": 4.252687354774262e-06, + "loss": 0.8917, + "step": 103240 + }, + { + "epoch": 0.7473922705523826, + "grad_norm": 0.19867613911628723, + "learning_rate": 4.252614968113677e-06, + "loss": 0.8822, + "step": 103250 + }, + { + "epoch": 0.7474646572129688, + "grad_norm": 0.15661980211734772, + "learning_rate": 4.25254258145309e-06, + "loss": 0.8789, + "step": 103260 + }, + { + "epoch": 0.747537043873555, + "grad_norm": 0.16807293891906738, + "learning_rate": 4.252470194792504e-06, + "loss": 0.9001, + "step": 103270 + }, + { + "epoch": 0.7476094305341412, + "grad_norm": 0.15940342843532562, + "learning_rate": 4.252397808131918e-06, + "loss": 0.8969, + "step": 103280 + }, + { + "epoch": 0.7476818171947274, + "grad_norm": 0.1640319973230362, + "learning_rate": 4.252325421471332e-06, + "loss": 0.8795, + "step": 103290 + }, + { + "epoch": 0.7477542038553135, + "grad_norm": 0.17689263820648193, + "learning_rate": 4.252253034810746e-06, + "loss": 0.891, + "step": 103300 + }, + { + "epoch": 0.7478265905158997, + "grad_norm": 0.14825983345508575, + "learning_rate": 4.252180648150159e-06, + "loss": 0.8791, + "step": 103310 + }, + { + "epoch": 0.7478989771764859, + "grad_norm": 0.162586972117424, + "learning_rate": 4.252108261489573e-06, + "loss": 0.8809, + "step": 103320 + }, + { + "epoch": 0.7479713638370721, + "grad_norm": 0.1549188792705536, + "learning_rate": 4.252035874828987e-06, + "loss": 0.8842, + "step": 103330 + }, + { + "epoch": 0.7480437504976583, + "grad_norm": 0.1467401683330536, + "learning_rate": 4.251963488168401e-06, + "loss": 0.8806, + "step": 103340 + }, + { + "epoch": 0.7481161371582444, + "grad_norm": 0.16220062971115112, + "learning_rate": 4.251891101507815e-06, + "loss": 0.8836, + "step": 103350 + }, + { + "epoch": 0.7481885238188307, + "grad_norm": 0.1533544659614563, + "learning_rate": 4.251818714847228e-06, + "loss": 0.8901, + "step": 103360 + }, + { + "epoch": 0.7482609104794169, + "grad_norm": 0.1560657024383545, + "learning_rate": 4.251746328186642e-06, + "loss": 0.8769, + "step": 103370 + }, + { + "epoch": 0.7483332971400031, + "grad_norm": 0.1645650416612625, + "learning_rate": 4.251673941526056e-06, + "loss": 0.8804, + "step": 103380 + }, + { + "epoch": 0.7484056838005892, + "grad_norm": 0.1500735580921173, + "learning_rate": 4.25160155486547e-06, + "loss": 0.8849, + "step": 103390 + }, + { + "epoch": 0.7484780704611754, + "grad_norm": 0.1593628227710724, + "learning_rate": 4.2515291682048835e-06, + "loss": 0.8793, + "step": 103400 + }, + { + "epoch": 0.7485504571217616, + "grad_norm": 0.16192486882209778, + "learning_rate": 4.251456781544297e-06, + "loss": 0.8873, + "step": 103410 + }, + { + "epoch": 0.7486228437823478, + "grad_norm": 0.147709459066391, + "learning_rate": 4.251384394883712e-06, + "loss": 0.8904, + "step": 103420 + }, + { + "epoch": 0.748695230442934, + "grad_norm": 0.16684521734714508, + "learning_rate": 4.251312008223125e-06, + "loss": 0.895, + "step": 103430 + }, + { + "epoch": 0.7487676171035201, + "grad_norm": 0.14957979321479797, + "learning_rate": 4.251239621562539e-06, + "loss": 0.8818, + "step": 103440 + }, + { + "epoch": 0.7488400037641063, + "grad_norm": 0.15316711366176605, + "learning_rate": 4.2511672349019524e-06, + "loss": 0.8816, + "step": 103450 + }, + { + "epoch": 0.7489123904246925, + "grad_norm": 0.15910635888576508, + "learning_rate": 4.251094848241367e-06, + "loss": 0.8801, + "step": 103460 + }, + { + "epoch": 0.7489847770852788, + "grad_norm": 0.1638316661119461, + "learning_rate": 4.2510224615807805e-06, + "loss": 0.8842, + "step": 103470 + }, + { + "epoch": 0.749057163745865, + "grad_norm": 0.15128594636917114, + "learning_rate": 4.250950074920194e-06, + "loss": 0.8978, + "step": 103480 + }, + { + "epoch": 0.7491295504064511, + "grad_norm": 0.15200836956501007, + "learning_rate": 4.250877688259608e-06, + "loss": 0.8843, + "step": 103490 + }, + { + "epoch": 0.7492019370670373, + "grad_norm": 0.19085319340229034, + "learning_rate": 4.250805301599022e-06, + "loss": 0.8784, + "step": 103500 + }, + { + "epoch": 0.7492743237276235, + "grad_norm": 0.15487466752529144, + "learning_rate": 4.250732914938436e-06, + "loss": 0.8954, + "step": 103510 + }, + { + "epoch": 0.7493467103882097, + "grad_norm": 0.15988904237747192, + "learning_rate": 4.2506605282778494e-06, + "loss": 0.8951, + "step": 103520 + }, + { + "epoch": 0.7494190970487958, + "grad_norm": 0.1502448171377182, + "learning_rate": 4.250588141617263e-06, + "loss": 0.8985, + "step": 103530 + }, + { + "epoch": 0.749491483709382, + "grad_norm": 0.153715580701828, + "learning_rate": 4.250515754956677e-06, + "loss": 0.8955, + "step": 103540 + }, + { + "epoch": 0.7495638703699682, + "grad_norm": 0.1608363538980484, + "learning_rate": 4.25044336829609e-06, + "loss": 0.8897, + "step": 103550 + }, + { + "epoch": 0.7496362570305544, + "grad_norm": 0.1529805064201355, + "learning_rate": 4.250370981635504e-06, + "loss": 0.8936, + "step": 103560 + }, + { + "epoch": 0.7497086436911405, + "grad_norm": 0.15650229156017303, + "learning_rate": 4.250298594974918e-06, + "loss": 0.8815, + "step": 103570 + }, + { + "epoch": 0.7497810303517268, + "grad_norm": 0.1722974181175232, + "learning_rate": 4.250226208314332e-06, + "loss": 0.9026, + "step": 103580 + }, + { + "epoch": 0.749853417012313, + "grad_norm": 0.16492018103599548, + "learning_rate": 4.250153821653746e-06, + "loss": 0.8814, + "step": 103590 + }, + { + "epoch": 0.7499258036728992, + "grad_norm": 0.15420758724212646, + "learning_rate": 4.250081434993159e-06, + "loss": 0.888, + "step": 103600 + }, + { + "epoch": 0.7499981903334854, + "grad_norm": 0.15207520127296448, + "learning_rate": 4.250009048332574e-06, + "loss": 0.8778, + "step": 103610 + }, + { + "epoch": 0.7500705769940715, + "grad_norm": 0.15864045917987823, + "learning_rate": 4.249936661671987e-06, + "loss": 0.885, + "step": 103620 + }, + { + "epoch": 0.7501429636546577, + "grad_norm": 0.16581547260284424, + "learning_rate": 4.249864275011401e-06, + "loss": 0.8845, + "step": 103630 + }, + { + "epoch": 0.7502153503152439, + "grad_norm": 0.16255062818527222, + "learning_rate": 4.2497918883508145e-06, + "loss": 0.8849, + "step": 103640 + }, + { + "epoch": 0.7502877369758301, + "grad_norm": 0.15730057656764984, + "learning_rate": 4.249719501690229e-06, + "loss": 0.8975, + "step": 103650 + }, + { + "epoch": 0.7503601236364162, + "grad_norm": 0.16164420545101166, + "learning_rate": 4.249647115029643e-06, + "loss": 0.8865, + "step": 103660 + }, + { + "epoch": 0.7504325102970024, + "grad_norm": 0.1671602427959442, + "learning_rate": 4.249574728369056e-06, + "loss": 0.8888, + "step": 103670 + }, + { + "epoch": 0.7505048969575887, + "grad_norm": 0.1699591726064682, + "learning_rate": 4.24950234170847e-06, + "loss": 0.87, + "step": 103680 + }, + { + "epoch": 0.7505772836181749, + "grad_norm": 0.16800114512443542, + "learning_rate": 4.249429955047884e-06, + "loss": 0.9011, + "step": 103690 + }, + { + "epoch": 0.7506496702787611, + "grad_norm": 0.18705791234970093, + "learning_rate": 4.249357568387298e-06, + "loss": 0.8766, + "step": 103700 + }, + { + "epoch": 0.7507220569393472, + "grad_norm": 0.1909557431936264, + "learning_rate": 4.2492851817267115e-06, + "loss": 0.9045, + "step": 103710 + }, + { + "epoch": 0.7507944435999334, + "grad_norm": 0.15778642892837524, + "learning_rate": 4.249212795066125e-06, + "loss": 0.8715, + "step": 103720 + }, + { + "epoch": 0.7508668302605196, + "grad_norm": 0.1488863229751587, + "learning_rate": 4.24914040840554e-06, + "loss": 0.8756, + "step": 103730 + }, + { + "epoch": 0.7509392169211058, + "grad_norm": 0.15307924151420593, + "learning_rate": 4.249068021744953e-06, + "loss": 0.8738, + "step": 103740 + }, + { + "epoch": 0.7510116035816919, + "grad_norm": 0.18600323796272278, + "learning_rate": 4.248995635084367e-06, + "loss": 0.8883, + "step": 103750 + }, + { + "epoch": 0.7510839902422781, + "grad_norm": 0.16874092817306519, + "learning_rate": 4.2489232484237805e-06, + "loss": 0.8835, + "step": 103760 + }, + { + "epoch": 0.7511563769028643, + "grad_norm": 0.1458248794078827, + "learning_rate": 4.248850861763195e-06, + "loss": 0.878, + "step": 103770 + }, + { + "epoch": 0.7512287635634505, + "grad_norm": 0.1547769457101822, + "learning_rate": 4.2487784751026086e-06, + "loss": 0.888, + "step": 103780 + }, + { + "epoch": 0.7513011502240368, + "grad_norm": 0.15332648158073425, + "learning_rate": 4.248706088442022e-06, + "loss": 0.8808, + "step": 103790 + }, + { + "epoch": 0.7513735368846229, + "grad_norm": 0.16886559128761292, + "learning_rate": 4.248633701781436e-06, + "loss": 0.884, + "step": 103800 + }, + { + "epoch": 0.7514459235452091, + "grad_norm": 0.16709497570991516, + "learning_rate": 4.24856131512085e-06, + "loss": 0.8972, + "step": 103810 + }, + { + "epoch": 0.7515183102057953, + "grad_norm": 0.15135438740253448, + "learning_rate": 4.248488928460264e-06, + "loss": 0.8969, + "step": 103820 + }, + { + "epoch": 0.7515906968663815, + "grad_norm": 0.15672151744365692, + "learning_rate": 4.2484165417996775e-06, + "loss": 0.893, + "step": 103830 + }, + { + "epoch": 0.7516630835269676, + "grad_norm": 0.14277760684490204, + "learning_rate": 4.248344155139091e-06, + "loss": 0.8874, + "step": 103840 + }, + { + "epoch": 0.7517354701875538, + "grad_norm": 0.15540048480033875, + "learning_rate": 4.2482717684785056e-06, + "loss": 0.8904, + "step": 103850 + }, + { + "epoch": 0.75180785684814, + "grad_norm": 0.14748062193393707, + "learning_rate": 4.248199381817919e-06, + "loss": 0.8929, + "step": 103860 + }, + { + "epoch": 0.7518802435087262, + "grad_norm": 0.154066801071167, + "learning_rate": 4.248126995157333e-06, + "loss": 0.8819, + "step": 103870 + }, + { + "epoch": 0.7519526301693124, + "grad_norm": 0.15664027631282806, + "learning_rate": 4.248054608496746e-06, + "loss": 0.8937, + "step": 103880 + }, + { + "epoch": 0.7520250168298986, + "grad_norm": 0.15651507675647736, + "learning_rate": 4.247982221836161e-06, + "loss": 0.8824, + "step": 103890 + }, + { + "epoch": 0.7520974034904848, + "grad_norm": 0.15763552486896515, + "learning_rate": 4.2479098351755745e-06, + "loss": 0.8743, + "step": 103900 + }, + { + "epoch": 0.752169790151071, + "grad_norm": 0.15285298228263855, + "learning_rate": 4.247837448514988e-06, + "loss": 0.8947, + "step": 103910 + }, + { + "epoch": 0.7522421768116572, + "grad_norm": 0.15118758380413055, + "learning_rate": 4.247765061854402e-06, + "loss": 0.8839, + "step": 103920 + }, + { + "epoch": 0.7523145634722433, + "grad_norm": 0.16061747074127197, + "learning_rate": 4.247692675193816e-06, + "loss": 0.8882, + "step": 103930 + }, + { + "epoch": 0.7523869501328295, + "grad_norm": 0.15958862006664276, + "learning_rate": 4.24762028853323e-06, + "loss": 0.8741, + "step": 103940 + }, + { + "epoch": 0.7524593367934157, + "grad_norm": 0.152791365981102, + "learning_rate": 4.247547901872643e-06, + "loss": 0.8921, + "step": 103950 + }, + { + "epoch": 0.7525317234540019, + "grad_norm": 0.15019190311431885, + "learning_rate": 4.247475515212057e-06, + "loss": 0.8939, + "step": 103960 + }, + { + "epoch": 0.752604110114588, + "grad_norm": 0.15106353163719177, + "learning_rate": 4.2474031285514715e-06, + "loss": 0.877, + "step": 103970 + }, + { + "epoch": 0.7526764967751742, + "grad_norm": 0.15266717970371246, + "learning_rate": 4.247330741890885e-06, + "loss": 0.8791, + "step": 103980 + }, + { + "epoch": 0.7527488834357604, + "grad_norm": 0.16882237792015076, + "learning_rate": 4.247258355230299e-06, + "loss": 0.8827, + "step": 103990 + }, + { + "epoch": 0.7528212700963467, + "grad_norm": 0.15489928424358368, + "learning_rate": 4.247185968569712e-06, + "loss": 0.8958, + "step": 104000 + }, + { + "epoch": 0.7528936567569329, + "grad_norm": 0.15682126581668854, + "learning_rate": 4.247113581909126e-06, + "loss": 0.8914, + "step": 104010 + }, + { + "epoch": 0.752966043417519, + "grad_norm": 0.1847575455904007, + "learning_rate": 4.24704119524854e-06, + "loss": 0.8897, + "step": 104020 + }, + { + "epoch": 0.7530384300781052, + "grad_norm": 0.15872369706630707, + "learning_rate": 4.246968808587954e-06, + "loss": 0.8898, + "step": 104030 + }, + { + "epoch": 0.7531108167386914, + "grad_norm": 0.16287031769752502, + "learning_rate": 4.246896421927368e-06, + "loss": 0.8857, + "step": 104040 + }, + { + "epoch": 0.7531832033992776, + "grad_norm": 0.14909902215003967, + "learning_rate": 4.246824035266781e-06, + "loss": 0.8943, + "step": 104050 + }, + { + "epoch": 0.7532555900598638, + "grad_norm": 0.1613544374704361, + "learning_rate": 4.246751648606195e-06, + "loss": 0.8926, + "step": 104060 + }, + { + "epoch": 0.7533279767204499, + "grad_norm": 0.14487162232398987, + "learning_rate": 4.2466792619456085e-06, + "loss": 0.8836, + "step": 104070 + }, + { + "epoch": 0.7534003633810361, + "grad_norm": 0.16244520246982574, + "learning_rate": 4.246606875285023e-06, + "loss": 0.8857, + "step": 104080 + }, + { + "epoch": 0.7534727500416223, + "grad_norm": 0.15103542804718018, + "learning_rate": 4.246534488624437e-06, + "loss": 0.8759, + "step": 104090 + }, + { + "epoch": 0.7535451367022085, + "grad_norm": 0.15711627900600433, + "learning_rate": 4.24646210196385e-06, + "loss": 0.8965, + "step": 104100 + }, + { + "epoch": 0.7536175233627948, + "grad_norm": 0.15874861180782318, + "learning_rate": 4.246389715303264e-06, + "loss": 0.9034, + "step": 104110 + }, + { + "epoch": 0.7536899100233809, + "grad_norm": 0.14917586743831635, + "learning_rate": 4.246317328642678e-06, + "loss": 0.8858, + "step": 104120 + }, + { + "epoch": 0.7537622966839671, + "grad_norm": 0.14904716610908508, + "learning_rate": 4.246244941982092e-06, + "loss": 0.8882, + "step": 104130 + }, + { + "epoch": 0.7538346833445533, + "grad_norm": 0.15895779430866241, + "learning_rate": 4.2461725553215055e-06, + "loss": 0.8892, + "step": 104140 + }, + { + "epoch": 0.7539070700051395, + "grad_norm": 0.2372187227010727, + "learning_rate": 4.246100168660919e-06, + "loss": 0.89, + "step": 104150 + }, + { + "epoch": 0.7539794566657256, + "grad_norm": 0.18698617815971375, + "learning_rate": 4.246027782000333e-06, + "loss": 0.9019, + "step": 104160 + }, + { + "epoch": 0.7540518433263118, + "grad_norm": 0.16438809037208557, + "learning_rate": 4.245955395339747e-06, + "loss": 0.8853, + "step": 104170 + }, + { + "epoch": 0.754124229986898, + "grad_norm": 0.17748364806175232, + "learning_rate": 4.245883008679161e-06, + "loss": 0.8724, + "step": 104180 + }, + { + "epoch": 0.7541966166474842, + "grad_norm": 0.14284367859363556, + "learning_rate": 4.2458106220185744e-06, + "loss": 0.889, + "step": 104190 + }, + { + "epoch": 0.7542690033080703, + "grad_norm": 0.15544088184833527, + "learning_rate": 4.245738235357988e-06, + "loss": 0.875, + "step": 104200 + }, + { + "epoch": 0.7543413899686566, + "grad_norm": 0.16390086710453033, + "learning_rate": 4.2456658486974025e-06, + "loss": 0.8853, + "step": 104210 + }, + { + "epoch": 0.7544137766292428, + "grad_norm": 0.15978281199932098, + "learning_rate": 4.245593462036816e-06, + "loss": 0.8848, + "step": 104220 + }, + { + "epoch": 0.754486163289829, + "grad_norm": 0.1489153504371643, + "learning_rate": 4.24552107537623e-06, + "loss": 0.8817, + "step": 104230 + }, + { + "epoch": 0.7545585499504152, + "grad_norm": 0.15438929200172424, + "learning_rate": 4.245448688715643e-06, + "loss": 0.8807, + "step": 104240 + }, + { + "epoch": 0.7546309366110013, + "grad_norm": 0.18427646160125732, + "learning_rate": 4.245376302055058e-06, + "loss": 0.89, + "step": 104250 + }, + { + "epoch": 0.7547033232715875, + "grad_norm": 0.15060459077358246, + "learning_rate": 4.2453039153944714e-06, + "loss": 0.8951, + "step": 104260 + }, + { + "epoch": 0.7547757099321737, + "grad_norm": 0.16309115290641785, + "learning_rate": 4.245231528733885e-06, + "loss": 0.8921, + "step": 104270 + }, + { + "epoch": 0.7548480965927599, + "grad_norm": 0.15942274034023285, + "learning_rate": 4.245159142073299e-06, + "loss": 0.8872, + "step": 104280 + }, + { + "epoch": 0.754920483253346, + "grad_norm": 0.1642897129058838, + "learning_rate": 4.245086755412713e-06, + "loss": 0.8889, + "step": 104290 + }, + { + "epoch": 0.7549928699139322, + "grad_norm": 0.15134483575820923, + "learning_rate": 4.245014368752127e-06, + "loss": 0.8769, + "step": 104300 + }, + { + "epoch": 0.7550652565745184, + "grad_norm": 0.16363342106342316, + "learning_rate": 4.24494198209154e-06, + "loss": 0.8751, + "step": 104310 + }, + { + "epoch": 0.7551376432351047, + "grad_norm": 0.1584373414516449, + "learning_rate": 4.244869595430954e-06, + "loss": 0.8846, + "step": 104320 + }, + { + "epoch": 0.7552100298956909, + "grad_norm": 0.15007972717285156, + "learning_rate": 4.2447972087703685e-06, + "loss": 0.8924, + "step": 104330 + }, + { + "epoch": 0.755282416556277, + "grad_norm": 0.16009405255317688, + "learning_rate": 4.244724822109782e-06, + "loss": 0.8788, + "step": 104340 + }, + { + "epoch": 0.7553548032168632, + "grad_norm": 0.14448218047618866, + "learning_rate": 4.244652435449196e-06, + "loss": 0.8778, + "step": 104350 + }, + { + "epoch": 0.7554271898774494, + "grad_norm": 0.1468927413225174, + "learning_rate": 4.244580048788609e-06, + "loss": 0.8914, + "step": 104360 + }, + { + "epoch": 0.7554995765380356, + "grad_norm": 0.14824378490447998, + "learning_rate": 4.244507662128024e-06, + "loss": 0.8868, + "step": 104370 + }, + { + "epoch": 0.7555719631986217, + "grad_norm": 0.15983250737190247, + "learning_rate": 4.244435275467437e-06, + "loss": 0.8888, + "step": 104380 + }, + { + "epoch": 0.7556443498592079, + "grad_norm": 0.15000095963478088, + "learning_rate": 4.244362888806851e-06, + "loss": 0.9001, + "step": 104390 + }, + { + "epoch": 0.7557167365197941, + "grad_norm": 0.15501455962657928, + "learning_rate": 4.244290502146265e-06, + "loss": 0.8791, + "step": 104400 + }, + { + "epoch": 0.7557891231803803, + "grad_norm": 0.14795927703380585, + "learning_rate": 4.244218115485679e-06, + "loss": 0.8769, + "step": 104410 + }, + { + "epoch": 0.7558615098409666, + "grad_norm": 0.16325373947620392, + "learning_rate": 4.244145728825093e-06, + "loss": 0.8827, + "step": 104420 + }, + { + "epoch": 0.7559338965015527, + "grad_norm": 0.15596868097782135, + "learning_rate": 4.244073342164506e-06, + "loss": 0.8761, + "step": 104430 + }, + { + "epoch": 0.7560062831621389, + "grad_norm": 0.14845876395702362, + "learning_rate": 4.24400095550392e-06, + "loss": 0.8798, + "step": 104440 + }, + { + "epoch": 0.7560786698227251, + "grad_norm": 0.1506635695695877, + "learning_rate": 4.243928568843334e-06, + "loss": 0.8831, + "step": 104450 + }, + { + "epoch": 0.7561510564833113, + "grad_norm": 0.14508011937141418, + "learning_rate": 4.243856182182748e-06, + "loss": 0.8923, + "step": 104460 + }, + { + "epoch": 0.7562234431438974, + "grad_norm": 0.16762535274028778, + "learning_rate": 4.243783795522162e-06, + "loss": 0.8903, + "step": 104470 + }, + { + "epoch": 0.7562958298044836, + "grad_norm": 0.15426135063171387, + "learning_rate": 4.243711408861575e-06, + "loss": 0.8721, + "step": 104480 + }, + { + "epoch": 0.7563682164650698, + "grad_norm": 0.15148262679576874, + "learning_rate": 4.24363902220099e-06, + "loss": 0.8973, + "step": 104490 + }, + { + "epoch": 0.756440603125656, + "grad_norm": 0.14398479461669922, + "learning_rate": 4.243566635540403e-06, + "loss": 0.8937, + "step": 104500 + }, + { + "epoch": 0.7565129897862422, + "grad_norm": 0.16111652553081512, + "learning_rate": 4.243494248879817e-06, + "loss": 0.8981, + "step": 104510 + }, + { + "epoch": 0.7565853764468283, + "grad_norm": 0.14975638687610626, + "learning_rate": 4.2434218622192306e-06, + "loss": 0.8924, + "step": 104520 + }, + { + "epoch": 0.7566577631074146, + "grad_norm": 0.1521756798028946, + "learning_rate": 4.243349475558645e-06, + "loss": 0.88, + "step": 104530 + }, + { + "epoch": 0.7567301497680008, + "grad_norm": 0.17231915891170502, + "learning_rate": 4.243277088898059e-06, + "loss": 0.8897, + "step": 104540 + }, + { + "epoch": 0.756802536428587, + "grad_norm": 0.16323207318782806, + "learning_rate": 4.243204702237472e-06, + "loss": 0.8935, + "step": 104550 + }, + { + "epoch": 0.7568749230891731, + "grad_norm": 0.14698879420757294, + "learning_rate": 4.243132315576886e-06, + "loss": 0.8892, + "step": 104560 + }, + { + "epoch": 0.7569473097497593, + "grad_norm": 0.15942469239234924, + "learning_rate": 4.2430599289163e-06, + "loss": 0.8943, + "step": 104570 + }, + { + "epoch": 0.7570196964103455, + "grad_norm": 0.16518035531044006, + "learning_rate": 4.242987542255713e-06, + "loss": 0.8947, + "step": 104580 + }, + { + "epoch": 0.7570920830709317, + "grad_norm": 0.14789389073848724, + "learning_rate": 4.242915155595127e-06, + "loss": 0.8858, + "step": 104590 + }, + { + "epoch": 0.7571644697315179, + "grad_norm": 0.1392948478460312, + "learning_rate": 4.242842768934541e-06, + "loss": 0.8891, + "step": 104600 + }, + { + "epoch": 0.757236856392104, + "grad_norm": 0.15600177645683289, + "learning_rate": 4.242770382273955e-06, + "loss": 0.9023, + "step": 104610 + }, + { + "epoch": 0.7573092430526902, + "grad_norm": 0.16960683465003967, + "learning_rate": 4.242697995613368e-06, + "loss": 0.879, + "step": 104620 + }, + { + "epoch": 0.7573816297132764, + "grad_norm": 0.15232223272323608, + "learning_rate": 4.242625608952782e-06, + "loss": 0.8809, + "step": 104630 + }, + { + "epoch": 0.7574540163738627, + "grad_norm": 0.1578724980354309, + "learning_rate": 4.2425532222921965e-06, + "loss": 0.9066, + "step": 104640 + }, + { + "epoch": 0.7575264030344488, + "grad_norm": 0.1544656753540039, + "learning_rate": 4.24248083563161e-06, + "loss": 0.8877, + "step": 104650 + }, + { + "epoch": 0.757598789695035, + "grad_norm": 0.154565691947937, + "learning_rate": 4.242408448971024e-06, + "loss": 0.8876, + "step": 104660 + }, + { + "epoch": 0.7576711763556212, + "grad_norm": 0.15601760149002075, + "learning_rate": 4.242336062310437e-06, + "loss": 0.8946, + "step": 104670 + }, + { + "epoch": 0.7577435630162074, + "grad_norm": 0.16208244860172272, + "learning_rate": 4.242263675649852e-06, + "loss": 0.8835, + "step": 104680 + }, + { + "epoch": 0.7578159496767936, + "grad_norm": 0.1622067540884018, + "learning_rate": 4.242191288989265e-06, + "loss": 0.8915, + "step": 104690 + }, + { + "epoch": 0.7578883363373797, + "grad_norm": 0.1567181497812271, + "learning_rate": 4.242118902328679e-06, + "loss": 0.8854, + "step": 104700 + }, + { + "epoch": 0.7579607229979659, + "grad_norm": 0.17146632075309753, + "learning_rate": 4.242046515668093e-06, + "loss": 0.8797, + "step": 104710 + }, + { + "epoch": 0.7580331096585521, + "grad_norm": 0.15992100536823273, + "learning_rate": 4.241974129007507e-06, + "loss": 0.8882, + "step": 104720 + }, + { + "epoch": 0.7581054963191383, + "grad_norm": 0.14678719639778137, + "learning_rate": 4.241901742346921e-06, + "loss": 0.8827, + "step": 104730 + }, + { + "epoch": 0.7581778829797245, + "grad_norm": 0.15735352039337158, + "learning_rate": 4.241829355686334e-06, + "loss": 0.8882, + "step": 104740 + }, + { + "epoch": 0.7582502696403107, + "grad_norm": 0.16120299696922302, + "learning_rate": 4.241756969025748e-06, + "loss": 0.8893, + "step": 104750 + }, + { + "epoch": 0.7583226563008969, + "grad_norm": 0.15673673152923584, + "learning_rate": 4.241684582365162e-06, + "loss": 0.8916, + "step": 104760 + }, + { + "epoch": 0.7583950429614831, + "grad_norm": 0.1934574395418167, + "learning_rate": 4.241612195704576e-06, + "loss": 0.8908, + "step": 104770 + }, + { + "epoch": 0.7584674296220693, + "grad_norm": 0.17176105082035065, + "learning_rate": 4.24153980904399e-06, + "loss": 0.8849, + "step": 104780 + }, + { + "epoch": 0.7585398162826554, + "grad_norm": 0.16662052273750305, + "learning_rate": 4.241467422383403e-06, + "loss": 0.8946, + "step": 104790 + }, + { + "epoch": 0.7586122029432416, + "grad_norm": 0.14775685966014862, + "learning_rate": 4.241395035722817e-06, + "loss": 0.8891, + "step": 104800 + }, + { + "epoch": 0.7586845896038278, + "grad_norm": 0.15374141931533813, + "learning_rate": 4.241322649062231e-06, + "loss": 0.8965, + "step": 104810 + }, + { + "epoch": 0.758756976264414, + "grad_norm": 0.15306535363197327, + "learning_rate": 4.241250262401645e-06, + "loss": 0.8876, + "step": 104820 + }, + { + "epoch": 0.7588293629250001, + "grad_norm": 0.14979711174964905, + "learning_rate": 4.241177875741059e-06, + "loss": 0.8802, + "step": 104830 + }, + { + "epoch": 0.7589017495855863, + "grad_norm": 0.15853987634181976, + "learning_rate": 4.241105489080472e-06, + "loss": 0.8827, + "step": 104840 + }, + { + "epoch": 0.7589741362461726, + "grad_norm": 0.15730775892734528, + "learning_rate": 4.241033102419887e-06, + "loss": 0.8964, + "step": 104850 + }, + { + "epoch": 0.7590465229067588, + "grad_norm": 0.1486206203699112, + "learning_rate": 4.2409607157593e-06, + "loss": 0.9028, + "step": 104860 + }, + { + "epoch": 0.759118909567345, + "grad_norm": 0.15792833268642426, + "learning_rate": 4.240888329098714e-06, + "loss": 0.8772, + "step": 104870 + }, + { + "epoch": 0.7591912962279311, + "grad_norm": 0.16453076899051666, + "learning_rate": 4.2408159424381275e-06, + "loss": 0.8917, + "step": 104880 + }, + { + "epoch": 0.7592636828885173, + "grad_norm": 0.1514229029417038, + "learning_rate": 4.240743555777542e-06, + "loss": 0.8884, + "step": 104890 + }, + { + "epoch": 0.7593360695491035, + "grad_norm": 0.15336290001869202, + "learning_rate": 4.240671169116956e-06, + "loss": 0.8828, + "step": 104900 + }, + { + "epoch": 0.7594084562096897, + "grad_norm": 0.1626693159341812, + "learning_rate": 4.240598782456369e-06, + "loss": 0.8948, + "step": 104910 + }, + { + "epoch": 0.7594808428702758, + "grad_norm": 0.15300175547599792, + "learning_rate": 4.240526395795783e-06, + "loss": 0.8984, + "step": 104920 + }, + { + "epoch": 0.759553229530862, + "grad_norm": 0.1569639891386032, + "learning_rate": 4.240454009135197e-06, + "loss": 0.869, + "step": 104930 + }, + { + "epoch": 0.7596256161914482, + "grad_norm": 0.15857458114624023, + "learning_rate": 4.240381622474611e-06, + "loss": 0.8881, + "step": 104940 + }, + { + "epoch": 0.7596980028520345, + "grad_norm": 0.15155275166034698, + "learning_rate": 4.2403092358140245e-06, + "loss": 0.8706, + "step": 104950 + }, + { + "epoch": 0.7597703895126207, + "grad_norm": 0.16041214764118195, + "learning_rate": 4.240236849153438e-06, + "loss": 0.8975, + "step": 104960 + }, + { + "epoch": 0.7598427761732068, + "grad_norm": 0.16059066355228424, + "learning_rate": 4.240164462492853e-06, + "loss": 0.8794, + "step": 104970 + }, + { + "epoch": 0.759915162833793, + "grad_norm": 0.15635879337787628, + "learning_rate": 4.240092075832266e-06, + "loss": 0.8938, + "step": 104980 + }, + { + "epoch": 0.7599875494943792, + "grad_norm": 0.14419741928577423, + "learning_rate": 4.24001968917168e-06, + "loss": 0.8759, + "step": 104990 + }, + { + "epoch": 0.7600599361549654, + "grad_norm": 0.18103042244911194, + "learning_rate": 4.2399473025110934e-06, + "loss": 0.8966, + "step": 105000 + }, + { + "epoch": 0.7601323228155515, + "grad_norm": 0.15891040861606598, + "learning_rate": 4.239874915850508e-06, + "loss": 0.8981, + "step": 105010 + }, + { + "epoch": 0.7602047094761377, + "grad_norm": 0.17460259795188904, + "learning_rate": 4.2398025291899215e-06, + "loss": 0.8862, + "step": 105020 + }, + { + "epoch": 0.7602770961367239, + "grad_norm": 0.17469242215156555, + "learning_rate": 4.239730142529335e-06, + "loss": 0.8902, + "step": 105030 + }, + { + "epoch": 0.7603494827973101, + "grad_norm": 0.1552739441394806, + "learning_rate": 4.239657755868749e-06, + "loss": 0.8872, + "step": 105040 + }, + { + "epoch": 0.7604218694578962, + "grad_norm": 0.16493083536624908, + "learning_rate": 4.239585369208163e-06, + "loss": 0.8876, + "step": 105050 + }, + { + "epoch": 0.7604942561184825, + "grad_norm": 0.1514502614736557, + "learning_rate": 4.239512982547577e-06, + "loss": 0.876, + "step": 105060 + }, + { + "epoch": 0.7605666427790687, + "grad_norm": 0.16472308337688446, + "learning_rate": 4.2394405958869905e-06, + "loss": 0.8893, + "step": 105070 + }, + { + "epoch": 0.7606390294396549, + "grad_norm": 0.15388494729995728, + "learning_rate": 4.239368209226404e-06, + "loss": 0.8934, + "step": 105080 + }, + { + "epoch": 0.7607114161002411, + "grad_norm": 0.1528458446264267, + "learning_rate": 4.2392958225658185e-06, + "loss": 0.8948, + "step": 105090 + }, + { + "epoch": 0.7607838027608272, + "grad_norm": 0.1468532532453537, + "learning_rate": 4.239223435905232e-06, + "loss": 0.8768, + "step": 105100 + }, + { + "epoch": 0.7608561894214134, + "grad_norm": 0.14190086722373962, + "learning_rate": 4.239151049244645e-06, + "loss": 0.8829, + "step": 105110 + }, + { + "epoch": 0.7609285760819996, + "grad_norm": 0.1714107096195221, + "learning_rate": 4.239078662584059e-06, + "loss": 0.8895, + "step": 105120 + }, + { + "epoch": 0.7610009627425858, + "grad_norm": 0.15026678144931793, + "learning_rate": 4.239006275923473e-06, + "loss": 0.895, + "step": 105130 + }, + { + "epoch": 0.761073349403172, + "grad_norm": 0.16360147297382355, + "learning_rate": 4.238933889262887e-06, + "loss": 0.8825, + "step": 105140 + }, + { + "epoch": 0.7611457360637581, + "grad_norm": 0.17714418470859528, + "learning_rate": 4.2388615026023e-06, + "loss": 0.8734, + "step": 105150 + }, + { + "epoch": 0.7612181227243443, + "grad_norm": 0.1679549664258957, + "learning_rate": 4.238789115941715e-06, + "loss": 0.8955, + "step": 105160 + }, + { + "epoch": 0.7612905093849306, + "grad_norm": 0.1560448259115219, + "learning_rate": 4.238716729281128e-06, + "loss": 0.893, + "step": 105170 + }, + { + "epoch": 0.7613628960455168, + "grad_norm": 0.15543989837169647, + "learning_rate": 4.238644342620542e-06, + "loss": 0.8896, + "step": 105180 + }, + { + "epoch": 0.761435282706103, + "grad_norm": 0.15329879522323608, + "learning_rate": 4.2385719559599555e-06, + "loss": 0.8894, + "step": 105190 + }, + { + "epoch": 0.7615076693666891, + "grad_norm": 0.15145981311798096, + "learning_rate": 4.23849956929937e-06, + "loss": 0.8888, + "step": 105200 + }, + { + "epoch": 0.7615800560272753, + "grad_norm": 0.14573727548122406, + "learning_rate": 4.238427182638784e-06, + "loss": 0.8841, + "step": 105210 + }, + { + "epoch": 0.7616524426878615, + "grad_norm": 0.15317559242248535, + "learning_rate": 4.238354795978197e-06, + "loss": 0.8869, + "step": 105220 + }, + { + "epoch": 0.7617248293484477, + "grad_norm": 0.17463774979114532, + "learning_rate": 4.238282409317611e-06, + "loss": 0.8971, + "step": 105230 + }, + { + "epoch": 0.7617972160090338, + "grad_norm": 0.14841240644454956, + "learning_rate": 4.238210022657025e-06, + "loss": 0.8845, + "step": 105240 + }, + { + "epoch": 0.76186960266962, + "grad_norm": 0.19012483954429626, + "learning_rate": 4.238137635996439e-06, + "loss": 0.8936, + "step": 105250 + }, + { + "epoch": 0.7619419893302062, + "grad_norm": 0.2200062870979309, + "learning_rate": 4.2380652493358525e-06, + "loss": 0.8938, + "step": 105260 + }, + { + "epoch": 0.7620143759907925, + "grad_norm": 0.16414809226989746, + "learning_rate": 4.237992862675266e-06, + "loss": 0.8925, + "step": 105270 + }, + { + "epoch": 0.7620867626513786, + "grad_norm": 0.16722236573696136, + "learning_rate": 4.237920476014681e-06, + "loss": 0.8895, + "step": 105280 + }, + { + "epoch": 0.7621591493119648, + "grad_norm": 0.17089565098285675, + "learning_rate": 4.237848089354094e-06, + "loss": 0.8922, + "step": 105290 + }, + { + "epoch": 0.762231535972551, + "grad_norm": 0.1593043953180313, + "learning_rate": 4.237775702693508e-06, + "loss": 0.8754, + "step": 105300 + }, + { + "epoch": 0.7623039226331372, + "grad_norm": 0.16244159638881683, + "learning_rate": 4.2377033160329215e-06, + "loss": 0.8918, + "step": 105310 + }, + { + "epoch": 0.7623763092937234, + "grad_norm": 0.155963733792305, + "learning_rate": 4.237630929372336e-06, + "loss": 0.8795, + "step": 105320 + }, + { + "epoch": 0.7624486959543095, + "grad_norm": 0.15185341238975525, + "learning_rate": 4.2375585427117496e-06, + "loss": 0.8946, + "step": 105330 + }, + { + "epoch": 0.7625210826148957, + "grad_norm": 0.16116154193878174, + "learning_rate": 4.237486156051163e-06, + "loss": 0.8874, + "step": 105340 + }, + { + "epoch": 0.7625934692754819, + "grad_norm": 0.15430016815662384, + "learning_rate": 4.237413769390577e-06, + "loss": 0.8861, + "step": 105350 + }, + { + "epoch": 0.7626658559360681, + "grad_norm": 0.15945422649383545, + "learning_rate": 4.237341382729991e-06, + "loss": 0.8794, + "step": 105360 + }, + { + "epoch": 0.7627382425966542, + "grad_norm": 0.1454370766878128, + "learning_rate": 4.237268996069405e-06, + "loss": 0.8782, + "step": 105370 + }, + { + "epoch": 0.7628106292572405, + "grad_norm": 0.14304585754871368, + "learning_rate": 4.2371966094088185e-06, + "loss": 0.8795, + "step": 105380 + }, + { + "epoch": 0.7628830159178267, + "grad_norm": 0.165323406457901, + "learning_rate": 4.237124222748232e-06, + "loss": 0.8894, + "step": 105390 + }, + { + "epoch": 0.7629554025784129, + "grad_norm": 0.1540648490190506, + "learning_rate": 4.2370518360876466e-06, + "loss": 0.8751, + "step": 105400 + }, + { + "epoch": 0.763027789238999, + "grad_norm": 0.1511969268321991, + "learning_rate": 4.23697944942706e-06, + "loss": 0.8812, + "step": 105410 + }, + { + "epoch": 0.7631001758995852, + "grad_norm": 0.15881909430027008, + "learning_rate": 4.236907062766474e-06, + "loss": 0.8935, + "step": 105420 + }, + { + "epoch": 0.7631725625601714, + "grad_norm": 0.1552649736404419, + "learning_rate": 4.236834676105887e-06, + "loss": 0.8867, + "step": 105430 + }, + { + "epoch": 0.7632449492207576, + "grad_norm": 0.16431519389152527, + "learning_rate": 4.236762289445301e-06, + "loss": 0.8761, + "step": 105440 + }, + { + "epoch": 0.7633173358813438, + "grad_norm": 0.17564263939857483, + "learning_rate": 4.2366899027847155e-06, + "loss": 0.8907, + "step": 105450 + }, + { + "epoch": 0.7633897225419299, + "grad_norm": 0.15047433972358704, + "learning_rate": 4.236617516124129e-06, + "loss": 0.8874, + "step": 105460 + }, + { + "epoch": 0.7634621092025161, + "grad_norm": 0.2053581178188324, + "learning_rate": 4.236545129463543e-06, + "loss": 0.8866, + "step": 105470 + }, + { + "epoch": 0.7635344958631024, + "grad_norm": 0.16297610104084015, + "learning_rate": 4.236472742802956e-06, + "loss": 0.8818, + "step": 105480 + }, + { + "epoch": 0.7636068825236886, + "grad_norm": 0.1815577745437622, + "learning_rate": 4.236400356142371e-06, + "loss": 0.8894, + "step": 105490 + }, + { + "epoch": 0.7636792691842748, + "grad_norm": 0.14771369099617004, + "learning_rate": 4.236327969481784e-06, + "loss": 0.8805, + "step": 105500 + }, + { + "epoch": 0.7637516558448609, + "grad_norm": 0.22556520998477936, + "learning_rate": 4.236255582821198e-06, + "loss": 0.8771, + "step": 105510 + }, + { + "epoch": 0.7638240425054471, + "grad_norm": 0.15500441193580627, + "learning_rate": 4.236183196160612e-06, + "loss": 0.8816, + "step": 105520 + }, + { + "epoch": 0.7638964291660333, + "grad_norm": 0.18774858117103577, + "learning_rate": 4.236110809500026e-06, + "loss": 0.8809, + "step": 105530 + }, + { + "epoch": 0.7639688158266195, + "grad_norm": 0.15794971585273743, + "learning_rate": 4.23603842283944e-06, + "loss": 0.886, + "step": 105540 + }, + { + "epoch": 0.7640412024872056, + "grad_norm": 0.16550391912460327, + "learning_rate": 4.235966036178853e-06, + "loss": 0.8963, + "step": 105550 + }, + { + "epoch": 0.7641135891477918, + "grad_norm": 0.15253488719463348, + "learning_rate": 4.235893649518267e-06, + "loss": 0.8904, + "step": 105560 + }, + { + "epoch": 0.764185975808378, + "grad_norm": 0.16290663182735443, + "learning_rate": 4.2358212628576814e-06, + "loss": 0.8908, + "step": 105570 + }, + { + "epoch": 0.7642583624689642, + "grad_norm": 0.16055020689964294, + "learning_rate": 4.235748876197095e-06, + "loss": 0.8807, + "step": 105580 + }, + { + "epoch": 0.7643307491295505, + "grad_norm": 0.1527642160654068, + "learning_rate": 4.235676489536509e-06, + "loss": 0.8882, + "step": 105590 + }, + { + "epoch": 0.7644031357901366, + "grad_norm": 0.1476249247789383, + "learning_rate": 4.235604102875922e-06, + "loss": 0.8715, + "step": 105600 + }, + { + "epoch": 0.7644755224507228, + "grad_norm": 0.15459424257278442, + "learning_rate": 4.235531716215337e-06, + "loss": 0.8853, + "step": 105610 + }, + { + "epoch": 0.764547909111309, + "grad_norm": 0.17739763855934143, + "learning_rate": 4.23545932955475e-06, + "loss": 0.8846, + "step": 105620 + }, + { + "epoch": 0.7646202957718952, + "grad_norm": 0.158504918217659, + "learning_rate": 4.235386942894164e-06, + "loss": 0.8892, + "step": 105630 + }, + { + "epoch": 0.7646926824324813, + "grad_norm": 0.16947391629219055, + "learning_rate": 4.235314556233578e-06, + "loss": 0.8886, + "step": 105640 + }, + { + "epoch": 0.7647650690930675, + "grad_norm": 0.15372443199157715, + "learning_rate": 4.235242169572991e-06, + "loss": 0.8757, + "step": 105650 + }, + { + "epoch": 0.7648374557536537, + "grad_norm": 0.16209058463573456, + "learning_rate": 4.235169782912405e-06, + "loss": 0.8973, + "step": 105660 + }, + { + "epoch": 0.7649098424142399, + "grad_norm": 0.15519694983959198, + "learning_rate": 4.2350973962518184e-06, + "loss": 0.8926, + "step": 105670 + }, + { + "epoch": 0.764982229074826, + "grad_norm": 0.15895330905914307, + "learning_rate": 4.235025009591233e-06, + "loss": 0.8882, + "step": 105680 + }, + { + "epoch": 0.7650546157354122, + "grad_norm": 0.14420805871486664, + "learning_rate": 4.2349526229306465e-06, + "loss": 0.8822, + "step": 105690 + }, + { + "epoch": 0.7651270023959985, + "grad_norm": 0.15106813609600067, + "learning_rate": 4.23488023627006e-06, + "loss": 0.8966, + "step": 105700 + }, + { + "epoch": 0.7651993890565847, + "grad_norm": 0.14976173639297485, + "learning_rate": 4.234807849609474e-06, + "loss": 0.8817, + "step": 105710 + }, + { + "epoch": 0.7652717757171709, + "grad_norm": 0.16191081702709198, + "learning_rate": 4.234735462948888e-06, + "loss": 0.8792, + "step": 105720 + }, + { + "epoch": 0.765344162377757, + "grad_norm": 0.15428221225738525, + "learning_rate": 4.234663076288302e-06, + "loss": 0.8836, + "step": 105730 + }, + { + "epoch": 0.7654165490383432, + "grad_norm": 0.15489520132541656, + "learning_rate": 4.2345906896277154e-06, + "loss": 0.8786, + "step": 105740 + }, + { + "epoch": 0.7654889356989294, + "grad_norm": 0.16444027423858643, + "learning_rate": 4.234518302967129e-06, + "loss": 0.883, + "step": 105750 + }, + { + "epoch": 0.7655613223595156, + "grad_norm": 0.1668035089969635, + "learning_rate": 4.2344459163065435e-06, + "loss": 0.893, + "step": 105760 + }, + { + "epoch": 0.7656337090201017, + "grad_norm": 0.15421725809574127, + "learning_rate": 4.234373529645957e-06, + "loss": 0.8888, + "step": 105770 + }, + { + "epoch": 0.7657060956806879, + "grad_norm": 0.15628722310066223, + "learning_rate": 4.234301142985371e-06, + "loss": 0.8877, + "step": 105780 + }, + { + "epoch": 0.7657784823412741, + "grad_norm": 0.16434313356876373, + "learning_rate": 4.234228756324784e-06, + "loss": 0.8856, + "step": 105790 + }, + { + "epoch": 0.7658508690018604, + "grad_norm": 0.14600999653339386, + "learning_rate": 4.234156369664199e-06, + "loss": 0.8916, + "step": 105800 + }, + { + "epoch": 0.7659232556624466, + "grad_norm": 0.23364056646823883, + "learning_rate": 4.2340839830036125e-06, + "loss": 0.8865, + "step": 105810 + }, + { + "epoch": 0.7659956423230327, + "grad_norm": 0.15147335827350616, + "learning_rate": 4.234011596343026e-06, + "loss": 0.875, + "step": 105820 + }, + { + "epoch": 0.7660680289836189, + "grad_norm": 0.16839240491390228, + "learning_rate": 4.23393920968244e-06, + "loss": 0.8807, + "step": 105830 + }, + { + "epoch": 0.7661404156442051, + "grad_norm": 0.1934797465801239, + "learning_rate": 4.233866823021854e-06, + "loss": 0.8983, + "step": 105840 + }, + { + "epoch": 0.7662128023047913, + "grad_norm": 0.15738998353481293, + "learning_rate": 4.233794436361268e-06, + "loss": 0.8784, + "step": 105850 + }, + { + "epoch": 0.7662851889653775, + "grad_norm": 0.14478467404842377, + "learning_rate": 4.233722049700681e-06, + "loss": 0.896, + "step": 105860 + }, + { + "epoch": 0.7663575756259636, + "grad_norm": 0.17345765233039856, + "learning_rate": 4.233649663040095e-06, + "loss": 0.8857, + "step": 105870 + }, + { + "epoch": 0.7664299622865498, + "grad_norm": 0.15138719975948334, + "learning_rate": 4.2335772763795095e-06, + "loss": 0.8979, + "step": 105880 + }, + { + "epoch": 0.766502348947136, + "grad_norm": 0.15025945007801056, + "learning_rate": 4.233504889718923e-06, + "loss": 0.8786, + "step": 105890 + }, + { + "epoch": 0.7665747356077222, + "grad_norm": 0.16647055745124817, + "learning_rate": 4.233432503058337e-06, + "loss": 0.897, + "step": 105900 + }, + { + "epoch": 0.7666471222683084, + "grad_norm": 0.16044731438159943, + "learning_rate": 4.23336011639775e-06, + "loss": 0.8921, + "step": 105910 + }, + { + "epoch": 0.7667195089288946, + "grad_norm": 0.1512782722711563, + "learning_rate": 4.233287729737165e-06, + "loss": 0.8839, + "step": 105920 + }, + { + "epoch": 0.7667918955894808, + "grad_norm": 0.16747620701789856, + "learning_rate": 4.233215343076578e-06, + "loss": 0.8857, + "step": 105930 + }, + { + "epoch": 0.766864282250067, + "grad_norm": 0.17304562032222748, + "learning_rate": 4.233142956415992e-06, + "loss": 0.8897, + "step": 105940 + }, + { + "epoch": 0.7669366689106532, + "grad_norm": 0.15581056475639343, + "learning_rate": 4.233070569755406e-06, + "loss": 0.8906, + "step": 105950 + }, + { + "epoch": 0.7670090555712393, + "grad_norm": 0.17081882059574127, + "learning_rate": 4.23299818309482e-06, + "loss": 0.8815, + "step": 105960 + }, + { + "epoch": 0.7670814422318255, + "grad_norm": 0.15507552027702332, + "learning_rate": 4.232925796434234e-06, + "loss": 0.8847, + "step": 105970 + }, + { + "epoch": 0.7671538288924117, + "grad_norm": 0.16203370690345764, + "learning_rate": 4.232853409773647e-06, + "loss": 0.8817, + "step": 105980 + }, + { + "epoch": 0.7672262155529979, + "grad_norm": 0.1585424691438675, + "learning_rate": 4.232781023113061e-06, + "loss": 0.8779, + "step": 105990 + }, + { + "epoch": 0.767298602213584, + "grad_norm": 0.15579846501350403, + "learning_rate": 4.232708636452475e-06, + "loss": 0.8773, + "step": 106000 + }, + { + "epoch": 0.7673709888741703, + "grad_norm": 0.1483168751001358, + "learning_rate": 4.232636249791889e-06, + "loss": 0.8916, + "step": 106010 + }, + { + "epoch": 0.7674433755347565, + "grad_norm": 0.18050970137119293, + "learning_rate": 4.232563863131303e-06, + "loss": 0.8731, + "step": 106020 + }, + { + "epoch": 0.7675157621953427, + "grad_norm": 0.17349663376808167, + "learning_rate": 4.232491476470716e-06, + "loss": 0.883, + "step": 106030 + }, + { + "epoch": 0.7675881488559289, + "grad_norm": 0.17154832184314728, + "learning_rate": 4.23241908981013e-06, + "loss": 0.8769, + "step": 106040 + }, + { + "epoch": 0.767660535516515, + "grad_norm": 0.15172946453094482, + "learning_rate": 4.232346703149544e-06, + "loss": 0.8715, + "step": 106050 + }, + { + "epoch": 0.7677329221771012, + "grad_norm": 0.16642993688583374, + "learning_rate": 4.232274316488958e-06, + "loss": 0.8857, + "step": 106060 + }, + { + "epoch": 0.7678053088376874, + "grad_norm": 0.15337328612804413, + "learning_rate": 4.2322019298283716e-06, + "loss": 0.872, + "step": 106070 + }, + { + "epoch": 0.7678776954982736, + "grad_norm": 0.15362398326396942, + "learning_rate": 4.232129543167785e-06, + "loss": 0.8852, + "step": 106080 + }, + { + "epoch": 0.7679500821588597, + "grad_norm": 0.16665634512901306, + "learning_rate": 4.2320571565072e-06, + "loss": 0.8877, + "step": 106090 + }, + { + "epoch": 0.7680224688194459, + "grad_norm": 0.14505624771118164, + "learning_rate": 4.231984769846613e-06, + "loss": 0.8936, + "step": 106100 + }, + { + "epoch": 0.7680948554800321, + "grad_norm": 0.16204003989696503, + "learning_rate": 4.231912383186027e-06, + "loss": 0.8903, + "step": 106110 + }, + { + "epoch": 0.7681672421406184, + "grad_norm": 0.16394628584384918, + "learning_rate": 4.2318399965254405e-06, + "loss": 0.8986, + "step": 106120 + }, + { + "epoch": 0.7682396288012046, + "grad_norm": 0.15969203412532806, + "learning_rate": 4.231767609864855e-06, + "loss": 0.8769, + "step": 106130 + }, + { + "epoch": 0.7683120154617907, + "grad_norm": 0.1562148928642273, + "learning_rate": 4.2316952232042686e-06, + "loss": 0.872, + "step": 106140 + }, + { + "epoch": 0.7683844021223769, + "grad_norm": 0.171428382396698, + "learning_rate": 4.231622836543682e-06, + "loss": 0.8811, + "step": 106150 + }, + { + "epoch": 0.7684567887829631, + "grad_norm": 0.16853584349155426, + "learning_rate": 4.231550449883096e-06, + "loss": 0.8836, + "step": 106160 + }, + { + "epoch": 0.7685291754435493, + "grad_norm": 0.15788713097572327, + "learning_rate": 4.23147806322251e-06, + "loss": 0.8912, + "step": 106170 + }, + { + "epoch": 0.7686015621041354, + "grad_norm": 0.15766339004039764, + "learning_rate": 4.231405676561923e-06, + "loss": 0.8992, + "step": 106180 + }, + { + "epoch": 0.7686739487647216, + "grad_norm": 0.14977054297924042, + "learning_rate": 4.2313332899013375e-06, + "loss": 0.8844, + "step": 106190 + }, + { + "epoch": 0.7687463354253078, + "grad_norm": 0.15274566411972046, + "learning_rate": 4.231260903240751e-06, + "loss": 0.8939, + "step": 106200 + }, + { + "epoch": 0.768818722085894, + "grad_norm": 0.17646168172359467, + "learning_rate": 4.231188516580165e-06, + "loss": 0.8851, + "step": 106210 + }, + { + "epoch": 0.7688911087464801, + "grad_norm": 0.16536547243595123, + "learning_rate": 4.231116129919578e-06, + "loss": 0.879, + "step": 106220 + }, + { + "epoch": 0.7689634954070664, + "grad_norm": 0.17476871609687805, + "learning_rate": 4.231043743258992e-06, + "loss": 0.8759, + "step": 106230 + }, + { + "epoch": 0.7690358820676526, + "grad_norm": 0.1473916620016098, + "learning_rate": 4.230971356598406e-06, + "loss": 0.8778, + "step": 106240 + }, + { + "epoch": 0.7691082687282388, + "grad_norm": 0.15702074766159058, + "learning_rate": 4.23089896993782e-06, + "loss": 0.8932, + "step": 106250 + }, + { + "epoch": 0.769180655388825, + "grad_norm": 0.14984412491321564, + "learning_rate": 4.230826583277234e-06, + "loss": 0.8931, + "step": 106260 + }, + { + "epoch": 0.7692530420494111, + "grad_norm": 0.15122853219509125, + "learning_rate": 4.230754196616647e-06, + "loss": 0.8831, + "step": 106270 + }, + { + "epoch": 0.7693254287099973, + "grad_norm": 0.16905367374420166, + "learning_rate": 4.230681809956062e-06, + "loss": 0.8764, + "step": 106280 + }, + { + "epoch": 0.7693978153705835, + "grad_norm": 0.15527789294719696, + "learning_rate": 4.230609423295475e-06, + "loss": 0.8804, + "step": 106290 + }, + { + "epoch": 0.7694702020311697, + "grad_norm": 0.16785407066345215, + "learning_rate": 4.230537036634889e-06, + "loss": 0.8924, + "step": 106300 + }, + { + "epoch": 0.7695425886917558, + "grad_norm": 0.15901590883731842, + "learning_rate": 4.230464649974303e-06, + "loss": 0.8922, + "step": 106310 + }, + { + "epoch": 0.769614975352342, + "grad_norm": 0.17190630733966827, + "learning_rate": 4.230392263313717e-06, + "loss": 0.8802, + "step": 106320 + }, + { + "epoch": 0.7696873620129283, + "grad_norm": 0.27805203199386597, + "learning_rate": 4.230319876653131e-06, + "loss": 0.8847, + "step": 106330 + }, + { + "epoch": 0.7697597486735145, + "grad_norm": 0.1641608327627182, + "learning_rate": 4.230247489992544e-06, + "loss": 0.8951, + "step": 106340 + }, + { + "epoch": 0.7698321353341007, + "grad_norm": 0.15645158290863037, + "learning_rate": 4.230175103331958e-06, + "loss": 0.8771, + "step": 106350 + }, + { + "epoch": 0.7699045219946868, + "grad_norm": 0.15807659924030304, + "learning_rate": 4.230102716671372e-06, + "loss": 0.8841, + "step": 106360 + }, + { + "epoch": 0.769976908655273, + "grad_norm": 0.15633799135684967, + "learning_rate": 4.230030330010786e-06, + "loss": 0.8917, + "step": 106370 + }, + { + "epoch": 0.7700492953158592, + "grad_norm": 0.15708708763122559, + "learning_rate": 4.2299579433502e-06, + "loss": 0.8789, + "step": 106380 + }, + { + "epoch": 0.7701216819764454, + "grad_norm": 0.15080325305461884, + "learning_rate": 4.229885556689613e-06, + "loss": 0.8861, + "step": 106390 + }, + { + "epoch": 0.7701940686370315, + "grad_norm": 0.14527060091495514, + "learning_rate": 4.229813170029028e-06, + "loss": 0.8835, + "step": 106400 + }, + { + "epoch": 0.7702664552976177, + "grad_norm": 0.1920609176158905, + "learning_rate": 4.229740783368441e-06, + "loss": 0.8916, + "step": 106410 + }, + { + "epoch": 0.7703388419582039, + "grad_norm": 0.14528389275074005, + "learning_rate": 4.229668396707855e-06, + "loss": 0.8716, + "step": 106420 + }, + { + "epoch": 0.7704112286187901, + "grad_norm": 0.1617295742034912, + "learning_rate": 4.2295960100472685e-06, + "loss": 0.8786, + "step": 106430 + }, + { + "epoch": 0.7704836152793764, + "grad_norm": 0.15510478615760803, + "learning_rate": 4.229523623386683e-06, + "loss": 0.8832, + "step": 106440 + }, + { + "epoch": 0.7705560019399625, + "grad_norm": 0.1545783132314682, + "learning_rate": 4.229451236726097e-06, + "loss": 0.8805, + "step": 106450 + }, + { + "epoch": 0.7706283886005487, + "grad_norm": 0.15689776837825775, + "learning_rate": 4.22937885006551e-06, + "loss": 0.8718, + "step": 106460 + }, + { + "epoch": 0.7707007752611349, + "grad_norm": 0.1707046777009964, + "learning_rate": 4.229306463404924e-06, + "loss": 0.8843, + "step": 106470 + }, + { + "epoch": 0.7707731619217211, + "grad_norm": 0.16164378821849823, + "learning_rate": 4.229234076744338e-06, + "loss": 0.8876, + "step": 106480 + }, + { + "epoch": 0.7708455485823072, + "grad_norm": 0.16344380378723145, + "learning_rate": 4.229161690083752e-06, + "loss": 0.8894, + "step": 106490 + }, + { + "epoch": 0.7709179352428934, + "grad_norm": 0.15482939779758453, + "learning_rate": 4.2290893034231655e-06, + "loss": 0.887, + "step": 106500 + }, + { + "epoch": 0.7709903219034796, + "grad_norm": 0.16493044793605804, + "learning_rate": 4.229016916762579e-06, + "loss": 0.8868, + "step": 106510 + }, + { + "epoch": 0.7710627085640658, + "grad_norm": 0.15701046586036682, + "learning_rate": 4.228944530101994e-06, + "loss": 0.8802, + "step": 106520 + }, + { + "epoch": 0.771135095224652, + "grad_norm": 0.15896005928516388, + "learning_rate": 4.228872143441407e-06, + "loss": 0.8828, + "step": 106530 + }, + { + "epoch": 0.7712074818852381, + "grad_norm": 0.15199479460716248, + "learning_rate": 4.228799756780821e-06, + "loss": 0.8855, + "step": 106540 + }, + { + "epoch": 0.7712798685458244, + "grad_norm": 0.14489343762397766, + "learning_rate": 4.2287273701202345e-06, + "loss": 0.8969, + "step": 106550 + }, + { + "epoch": 0.7713522552064106, + "grad_norm": 0.1755223423242569, + "learning_rate": 4.228654983459649e-06, + "loss": 0.8788, + "step": 106560 + }, + { + "epoch": 0.7714246418669968, + "grad_norm": 0.17546701431274414, + "learning_rate": 4.2285825967990625e-06, + "loss": 0.8877, + "step": 106570 + }, + { + "epoch": 0.771497028527583, + "grad_norm": 0.1525055468082428, + "learning_rate": 4.228510210138476e-06, + "loss": 0.8842, + "step": 106580 + }, + { + "epoch": 0.7715694151881691, + "grad_norm": 0.14916154742240906, + "learning_rate": 4.22843782347789e-06, + "loss": 0.8826, + "step": 106590 + }, + { + "epoch": 0.7716418018487553, + "grad_norm": 0.16282054781913757, + "learning_rate": 4.228365436817304e-06, + "loss": 0.8843, + "step": 106600 + }, + { + "epoch": 0.7717141885093415, + "grad_norm": 0.16194520890712738, + "learning_rate": 4.228293050156718e-06, + "loss": 0.8881, + "step": 106610 + }, + { + "epoch": 0.7717865751699277, + "grad_norm": 0.1717979609966278, + "learning_rate": 4.2282206634961315e-06, + "loss": 0.8785, + "step": 106620 + }, + { + "epoch": 0.7718589618305138, + "grad_norm": 0.24075248837471008, + "learning_rate": 4.228148276835545e-06, + "loss": 0.8907, + "step": 106630 + }, + { + "epoch": 0.7719313484911, + "grad_norm": 0.3043558597564697, + "learning_rate": 4.2280758901749595e-06, + "loss": 0.8813, + "step": 106640 + }, + { + "epoch": 0.7720037351516863, + "grad_norm": 0.17792464792728424, + "learning_rate": 4.228003503514373e-06, + "loss": 0.8881, + "step": 106650 + }, + { + "epoch": 0.7720761218122725, + "grad_norm": 0.16710121929645538, + "learning_rate": 4.227931116853787e-06, + "loss": 0.8809, + "step": 106660 + }, + { + "epoch": 0.7721485084728587, + "grad_norm": 0.14636112749576569, + "learning_rate": 4.2278587301932e-06, + "loss": 0.8869, + "step": 106670 + }, + { + "epoch": 0.7722208951334448, + "grad_norm": 0.16227993369102478, + "learning_rate": 4.227786343532614e-06, + "loss": 0.8696, + "step": 106680 + }, + { + "epoch": 0.772293281794031, + "grad_norm": 0.15392231941223145, + "learning_rate": 4.2277139568720285e-06, + "loss": 0.8921, + "step": 106690 + }, + { + "epoch": 0.7723656684546172, + "grad_norm": 0.1547156274318695, + "learning_rate": 4.227641570211442e-06, + "loss": 0.8892, + "step": 106700 + }, + { + "epoch": 0.7724380551152034, + "grad_norm": 0.15668383240699768, + "learning_rate": 4.227569183550856e-06, + "loss": 0.8795, + "step": 106710 + }, + { + "epoch": 0.7725104417757895, + "grad_norm": 0.15469415485858917, + "learning_rate": 4.227496796890269e-06, + "loss": 0.8863, + "step": 106720 + }, + { + "epoch": 0.7725828284363757, + "grad_norm": 0.16153530776500702, + "learning_rate": 4.227424410229683e-06, + "loss": 0.8813, + "step": 106730 + }, + { + "epoch": 0.7726552150969619, + "grad_norm": 0.15066730976104736, + "learning_rate": 4.2273520235690965e-06, + "loss": 0.8876, + "step": 106740 + }, + { + "epoch": 0.7727276017575481, + "grad_norm": 0.15237820148468018, + "learning_rate": 4.227279636908511e-06, + "loss": 0.8915, + "step": 106750 + }, + { + "epoch": 0.7727999884181344, + "grad_norm": 0.1508363038301468, + "learning_rate": 4.227207250247925e-06, + "loss": 0.8846, + "step": 106760 + }, + { + "epoch": 0.7728723750787205, + "grad_norm": 0.16356633603572845, + "learning_rate": 4.227134863587338e-06, + "loss": 0.8851, + "step": 106770 + }, + { + "epoch": 0.7729447617393067, + "grad_norm": 0.1548301726579666, + "learning_rate": 4.227062476926752e-06, + "loss": 0.8941, + "step": 106780 + }, + { + "epoch": 0.7730171483998929, + "grad_norm": 0.18178719282150269, + "learning_rate": 4.226990090266166e-06, + "loss": 0.8839, + "step": 106790 + }, + { + "epoch": 0.7730895350604791, + "grad_norm": 0.16349080204963684, + "learning_rate": 4.22691770360558e-06, + "loss": 0.8818, + "step": 106800 + }, + { + "epoch": 0.7731619217210652, + "grad_norm": 0.15550731122493744, + "learning_rate": 4.2268453169449936e-06, + "loss": 0.8791, + "step": 106810 + }, + { + "epoch": 0.7732343083816514, + "grad_norm": 0.15300001204013824, + "learning_rate": 4.226772930284407e-06, + "loss": 0.8669, + "step": 106820 + }, + { + "epoch": 0.7733066950422376, + "grad_norm": 0.15288126468658447, + "learning_rate": 4.226700543623821e-06, + "loss": 0.8753, + "step": 106830 + }, + { + "epoch": 0.7733790817028238, + "grad_norm": 0.17350119352340698, + "learning_rate": 4.226628156963235e-06, + "loss": 0.8931, + "step": 106840 + }, + { + "epoch": 0.7734514683634099, + "grad_norm": 0.144486203789711, + "learning_rate": 4.226555770302649e-06, + "loss": 0.8797, + "step": 106850 + }, + { + "epoch": 0.7735238550239962, + "grad_norm": 0.15180061757564545, + "learning_rate": 4.2264833836420625e-06, + "loss": 0.883, + "step": 106860 + }, + { + "epoch": 0.7735962416845824, + "grad_norm": 0.16879640519618988, + "learning_rate": 4.226410996981476e-06, + "loss": 0.8795, + "step": 106870 + }, + { + "epoch": 0.7736686283451686, + "grad_norm": 0.17711849510669708, + "learning_rate": 4.2263386103208906e-06, + "loss": 0.881, + "step": 106880 + }, + { + "epoch": 0.7737410150057548, + "grad_norm": 0.15794023871421814, + "learning_rate": 4.226266223660304e-06, + "loss": 0.883, + "step": 106890 + }, + { + "epoch": 0.7738134016663409, + "grad_norm": 0.16354835033416748, + "learning_rate": 4.226193836999718e-06, + "loss": 0.8808, + "step": 106900 + }, + { + "epoch": 0.7738857883269271, + "grad_norm": 0.16571597754955292, + "learning_rate": 4.226121450339131e-06, + "loss": 0.879, + "step": 106910 + }, + { + "epoch": 0.7739581749875133, + "grad_norm": 0.16619838774204254, + "learning_rate": 4.226049063678546e-06, + "loss": 0.8966, + "step": 106920 + }, + { + "epoch": 0.7740305616480995, + "grad_norm": 0.16915611922740936, + "learning_rate": 4.2259766770179595e-06, + "loss": 0.867, + "step": 106930 + }, + { + "epoch": 0.7741029483086856, + "grad_norm": 0.15059566497802734, + "learning_rate": 4.225904290357373e-06, + "loss": 0.8969, + "step": 106940 + }, + { + "epoch": 0.7741753349692718, + "grad_norm": 0.15447822213172913, + "learning_rate": 4.225831903696787e-06, + "loss": 0.8692, + "step": 106950 + }, + { + "epoch": 0.774247721629858, + "grad_norm": 0.15346454083919525, + "learning_rate": 4.225759517036201e-06, + "loss": 0.879, + "step": 106960 + }, + { + "epoch": 0.7743201082904443, + "grad_norm": 0.15431329607963562, + "learning_rate": 4.225687130375615e-06, + "loss": 0.8781, + "step": 106970 + }, + { + "epoch": 0.7743924949510305, + "grad_norm": 0.15542955696582794, + "learning_rate": 4.225614743715028e-06, + "loss": 0.8815, + "step": 106980 + }, + { + "epoch": 0.7744648816116166, + "grad_norm": 0.15447083115577698, + "learning_rate": 4.225542357054442e-06, + "loss": 0.8799, + "step": 106990 + }, + { + "epoch": 0.7745372682722028, + "grad_norm": 0.17327377200126648, + "learning_rate": 4.2254699703938565e-06, + "loss": 0.8878, + "step": 107000 + }, + { + "epoch": 0.774609654932789, + "grad_norm": 0.15756526589393616, + "learning_rate": 4.22539758373327e-06, + "loss": 0.8798, + "step": 107010 + }, + { + "epoch": 0.7746820415933752, + "grad_norm": 0.15248951315879822, + "learning_rate": 4.225325197072684e-06, + "loss": 0.8899, + "step": 107020 + }, + { + "epoch": 0.7747544282539613, + "grad_norm": 0.1477757692337036, + "learning_rate": 4.225252810412097e-06, + "loss": 0.8826, + "step": 107030 + }, + { + "epoch": 0.7748268149145475, + "grad_norm": 0.1861177235841751, + "learning_rate": 4.225180423751512e-06, + "loss": 0.8875, + "step": 107040 + }, + { + "epoch": 0.7748992015751337, + "grad_norm": 0.14727658033370972, + "learning_rate": 4.2251080370909254e-06, + "loss": 0.8786, + "step": 107050 + }, + { + "epoch": 0.7749715882357199, + "grad_norm": 0.15688657760620117, + "learning_rate": 4.225035650430339e-06, + "loss": 0.8887, + "step": 107060 + }, + { + "epoch": 0.775043974896306, + "grad_norm": 0.15266485512256622, + "learning_rate": 4.224963263769753e-06, + "loss": 0.8883, + "step": 107070 + }, + { + "epoch": 0.7751163615568923, + "grad_norm": 0.15053589642047882, + "learning_rate": 4.224890877109167e-06, + "loss": 0.8763, + "step": 107080 + }, + { + "epoch": 0.7751887482174785, + "grad_norm": 0.1497945785522461, + "learning_rate": 4.224818490448581e-06, + "loss": 0.8768, + "step": 107090 + }, + { + "epoch": 0.7752611348780647, + "grad_norm": 0.16105857491493225, + "learning_rate": 4.224746103787994e-06, + "loss": 0.8819, + "step": 107100 + }, + { + "epoch": 0.7753335215386509, + "grad_norm": 0.15862345695495605, + "learning_rate": 4.224673717127408e-06, + "loss": 0.8753, + "step": 107110 + }, + { + "epoch": 0.775405908199237, + "grad_norm": 0.15614831447601318, + "learning_rate": 4.2246013304668224e-06, + "loss": 0.8811, + "step": 107120 + }, + { + "epoch": 0.7754782948598232, + "grad_norm": 0.19445110857486725, + "learning_rate": 4.224528943806236e-06, + "loss": 0.8964, + "step": 107130 + }, + { + "epoch": 0.7755506815204094, + "grad_norm": 0.15942369401454926, + "learning_rate": 4.22445655714565e-06, + "loss": 0.8658, + "step": 107140 + }, + { + "epoch": 0.7756230681809956, + "grad_norm": 0.1634572595357895, + "learning_rate": 4.224384170485063e-06, + "loss": 0.8844, + "step": 107150 + }, + { + "epoch": 0.7756954548415818, + "grad_norm": 0.14864301681518555, + "learning_rate": 4.224311783824478e-06, + "loss": 0.8885, + "step": 107160 + }, + { + "epoch": 0.7757678415021679, + "grad_norm": 0.1603207290172577, + "learning_rate": 4.224239397163891e-06, + "loss": 0.8811, + "step": 107170 + }, + { + "epoch": 0.7758402281627542, + "grad_norm": 0.15209531784057617, + "learning_rate": 4.224167010503305e-06, + "loss": 0.8856, + "step": 107180 + }, + { + "epoch": 0.7759126148233404, + "grad_norm": 0.1611732542514801, + "learning_rate": 4.224094623842719e-06, + "loss": 0.8802, + "step": 107190 + }, + { + "epoch": 0.7759850014839266, + "grad_norm": 0.16331136226654053, + "learning_rate": 4.224022237182133e-06, + "loss": 0.8931, + "step": 107200 + }, + { + "epoch": 0.7760573881445127, + "grad_norm": 0.17706800997257233, + "learning_rate": 4.223949850521547e-06, + "loss": 0.8834, + "step": 107210 + }, + { + "epoch": 0.7761297748050989, + "grad_norm": 0.14724914729595184, + "learning_rate": 4.22387746386096e-06, + "loss": 0.8868, + "step": 107220 + }, + { + "epoch": 0.7762021614656851, + "grad_norm": 0.16513299942016602, + "learning_rate": 4.223805077200374e-06, + "loss": 0.8752, + "step": 107230 + }, + { + "epoch": 0.7762745481262713, + "grad_norm": 0.17045806348323822, + "learning_rate": 4.2237326905397875e-06, + "loss": 0.8909, + "step": 107240 + }, + { + "epoch": 0.7763469347868575, + "grad_norm": 0.189479261636734, + "learning_rate": 4.223660303879201e-06, + "loss": 0.884, + "step": 107250 + }, + { + "epoch": 0.7764193214474436, + "grad_norm": 0.15895524621009827, + "learning_rate": 4.223587917218615e-06, + "loss": 0.8878, + "step": 107260 + }, + { + "epoch": 0.7764917081080298, + "grad_norm": 0.16213759779930115, + "learning_rate": 4.223515530558029e-06, + "loss": 0.8825, + "step": 107270 + }, + { + "epoch": 0.776564094768616, + "grad_norm": 0.1568875014781952, + "learning_rate": 4.223443143897443e-06, + "loss": 0.8751, + "step": 107280 + }, + { + "epoch": 0.7766364814292023, + "grad_norm": 0.1578979790210724, + "learning_rate": 4.2233707572368565e-06, + "loss": 0.883, + "step": 107290 + }, + { + "epoch": 0.7767088680897885, + "grad_norm": 0.1542271375656128, + "learning_rate": 4.22329837057627e-06, + "loss": 0.8786, + "step": 107300 + }, + { + "epoch": 0.7767812547503746, + "grad_norm": 0.16791146993637085, + "learning_rate": 4.2232259839156845e-06, + "loss": 0.8914, + "step": 107310 + }, + { + "epoch": 0.7768536414109608, + "grad_norm": 0.1576090306043625, + "learning_rate": 4.223153597255098e-06, + "loss": 0.894, + "step": 107320 + }, + { + "epoch": 0.776926028071547, + "grad_norm": 0.15662311017513275, + "learning_rate": 4.223081210594512e-06, + "loss": 0.8998, + "step": 107330 + }, + { + "epoch": 0.7769984147321332, + "grad_norm": 0.14646415412425995, + "learning_rate": 4.223008823933925e-06, + "loss": 0.867, + "step": 107340 + }, + { + "epoch": 0.7770708013927193, + "grad_norm": 0.14766213297843933, + "learning_rate": 4.22293643727334e-06, + "loss": 0.8937, + "step": 107350 + }, + { + "epoch": 0.7771431880533055, + "grad_norm": 0.1753314882516861, + "learning_rate": 4.2228640506127535e-06, + "loss": 0.8736, + "step": 107360 + }, + { + "epoch": 0.7772155747138917, + "grad_norm": 0.15868641436100006, + "learning_rate": 4.222791663952167e-06, + "loss": 0.9, + "step": 107370 + }, + { + "epoch": 0.7772879613744779, + "grad_norm": 0.15650039911270142, + "learning_rate": 4.222719277291581e-06, + "loss": 0.886, + "step": 107380 + }, + { + "epoch": 0.7773603480350642, + "grad_norm": 0.17144428193569183, + "learning_rate": 4.222646890630995e-06, + "loss": 0.892, + "step": 107390 + }, + { + "epoch": 0.7774327346956503, + "grad_norm": 0.1921321600675583, + "learning_rate": 4.222574503970409e-06, + "loss": 0.8881, + "step": 107400 + }, + { + "epoch": 0.7775051213562365, + "grad_norm": 0.1584097146987915, + "learning_rate": 4.222502117309822e-06, + "loss": 0.8806, + "step": 107410 + }, + { + "epoch": 0.7775775080168227, + "grad_norm": 0.15249648690223694, + "learning_rate": 4.222429730649236e-06, + "loss": 0.8956, + "step": 107420 + }, + { + "epoch": 0.7776498946774089, + "grad_norm": 0.15863776206970215, + "learning_rate": 4.2223573439886505e-06, + "loss": 0.8773, + "step": 107430 + }, + { + "epoch": 0.777722281337995, + "grad_norm": 0.16656135022640228, + "learning_rate": 4.222284957328064e-06, + "loss": 0.8973, + "step": 107440 + }, + { + "epoch": 0.7777946679985812, + "grad_norm": 0.15339358150959015, + "learning_rate": 4.222212570667478e-06, + "loss": 0.8851, + "step": 107450 + }, + { + "epoch": 0.7778670546591674, + "grad_norm": 0.1573115736246109, + "learning_rate": 4.222140184006891e-06, + "loss": 0.8841, + "step": 107460 + }, + { + "epoch": 0.7779394413197536, + "grad_norm": 0.1542723923921585, + "learning_rate": 4.222067797346305e-06, + "loss": 0.8954, + "step": 107470 + }, + { + "epoch": 0.7780118279803397, + "grad_norm": 0.1786673665046692, + "learning_rate": 4.221995410685719e-06, + "loss": 0.8873, + "step": 107480 + }, + { + "epoch": 0.7780842146409259, + "grad_norm": 0.17204001545906067, + "learning_rate": 4.221923024025133e-06, + "loss": 0.8873, + "step": 107490 + }, + { + "epoch": 0.7781566013015122, + "grad_norm": 0.14830465614795685, + "learning_rate": 4.221850637364547e-06, + "loss": 0.8775, + "step": 107500 + }, + { + "epoch": 0.7782289879620984, + "grad_norm": 0.1740427315235138, + "learning_rate": 4.22177825070396e-06, + "loss": 0.8825, + "step": 107510 + }, + { + "epoch": 0.7783013746226846, + "grad_norm": 0.1551501452922821, + "learning_rate": 4.221705864043375e-06, + "loss": 0.9021, + "step": 107520 + }, + { + "epoch": 0.7783737612832707, + "grad_norm": 0.1553511619567871, + "learning_rate": 4.221633477382788e-06, + "loss": 0.8773, + "step": 107530 + }, + { + "epoch": 0.7784461479438569, + "grad_norm": 0.15967567265033722, + "learning_rate": 4.221561090722202e-06, + "loss": 0.8829, + "step": 107540 + }, + { + "epoch": 0.7785185346044431, + "grad_norm": 0.14767323434352875, + "learning_rate": 4.2214887040616156e-06, + "loss": 0.8687, + "step": 107550 + }, + { + "epoch": 0.7785909212650293, + "grad_norm": 0.14567668735980988, + "learning_rate": 4.22141631740103e-06, + "loss": 0.8757, + "step": 107560 + }, + { + "epoch": 0.7786633079256154, + "grad_norm": 0.14703209698200226, + "learning_rate": 4.221343930740444e-06, + "loss": 0.8835, + "step": 107570 + }, + { + "epoch": 0.7787356945862016, + "grad_norm": 0.15349993109703064, + "learning_rate": 4.221271544079857e-06, + "loss": 0.8685, + "step": 107580 + }, + { + "epoch": 0.7788080812467878, + "grad_norm": 0.16429787874221802, + "learning_rate": 4.221199157419271e-06, + "loss": 0.8891, + "step": 107590 + }, + { + "epoch": 0.778880467907374, + "grad_norm": 0.15371567010879517, + "learning_rate": 4.221126770758685e-06, + "loss": 0.8729, + "step": 107600 + }, + { + "epoch": 0.7789528545679603, + "grad_norm": 0.1502668410539627, + "learning_rate": 4.221054384098099e-06, + "loss": 0.8834, + "step": 107610 + }, + { + "epoch": 0.7790252412285464, + "grad_norm": 0.166198268532753, + "learning_rate": 4.2209819974375126e-06, + "loss": 0.876, + "step": 107620 + }, + { + "epoch": 0.7790976278891326, + "grad_norm": 0.1724855899810791, + "learning_rate": 4.220909610776926e-06, + "loss": 0.8752, + "step": 107630 + }, + { + "epoch": 0.7791700145497188, + "grad_norm": 0.15645526349544525, + "learning_rate": 4.220837224116341e-06, + "loss": 0.8787, + "step": 107640 + }, + { + "epoch": 0.779242401210305, + "grad_norm": 0.16536635160446167, + "learning_rate": 4.220764837455754e-06, + "loss": 0.8852, + "step": 107650 + }, + { + "epoch": 0.7793147878708911, + "grad_norm": 0.1508960872888565, + "learning_rate": 4.220692450795168e-06, + "loss": 0.8768, + "step": 107660 + }, + { + "epoch": 0.7793871745314773, + "grad_norm": 0.1536785364151001, + "learning_rate": 4.2206200641345815e-06, + "loss": 0.8825, + "step": 107670 + }, + { + "epoch": 0.7794595611920635, + "grad_norm": 0.16157682240009308, + "learning_rate": 4.220547677473996e-06, + "loss": 0.8861, + "step": 107680 + }, + { + "epoch": 0.7795319478526497, + "grad_norm": 0.15818680822849274, + "learning_rate": 4.2204752908134096e-06, + "loss": 0.8745, + "step": 107690 + }, + { + "epoch": 0.7796043345132359, + "grad_norm": 0.16059935092926025, + "learning_rate": 4.220402904152823e-06, + "loss": 0.8934, + "step": 107700 + }, + { + "epoch": 0.7796767211738221, + "grad_norm": 0.15591976046562195, + "learning_rate": 4.220330517492237e-06, + "loss": 0.8838, + "step": 107710 + }, + { + "epoch": 0.7797491078344083, + "grad_norm": 0.15530046820640564, + "learning_rate": 4.220258130831651e-06, + "loss": 0.8794, + "step": 107720 + }, + { + "epoch": 0.7798214944949945, + "grad_norm": 0.1756300926208496, + "learning_rate": 4.220185744171065e-06, + "loss": 0.8819, + "step": 107730 + }, + { + "epoch": 0.7798938811555807, + "grad_norm": 0.16055084764957428, + "learning_rate": 4.2201133575104785e-06, + "loss": 0.8594, + "step": 107740 + }, + { + "epoch": 0.7799662678161668, + "grad_norm": 0.1619112342596054, + "learning_rate": 4.220040970849892e-06, + "loss": 0.8905, + "step": 107750 + }, + { + "epoch": 0.780038654476753, + "grad_norm": 0.15275640785694122, + "learning_rate": 4.2199685841893066e-06, + "loss": 0.8799, + "step": 107760 + }, + { + "epoch": 0.7801110411373392, + "grad_norm": 0.1480998545885086, + "learning_rate": 4.219896197528719e-06, + "loss": 0.8672, + "step": 107770 + }, + { + "epoch": 0.7801834277979254, + "grad_norm": 0.15729357302188873, + "learning_rate": 4.219823810868133e-06, + "loss": 0.8956, + "step": 107780 + }, + { + "epoch": 0.7802558144585116, + "grad_norm": 0.1509513109922409, + "learning_rate": 4.2197514242075474e-06, + "loss": 0.8796, + "step": 107790 + }, + { + "epoch": 0.7803282011190977, + "grad_norm": 0.15310880541801453, + "learning_rate": 4.219679037546961e-06, + "loss": 0.8726, + "step": 107800 + }, + { + "epoch": 0.7804005877796839, + "grad_norm": 0.16130521893501282, + "learning_rate": 4.219606650886375e-06, + "loss": 0.8855, + "step": 107810 + }, + { + "epoch": 0.7804729744402702, + "grad_norm": 0.18404695391654968, + "learning_rate": 4.219534264225788e-06, + "loss": 0.8839, + "step": 107820 + }, + { + "epoch": 0.7805453611008564, + "grad_norm": 0.16077350080013275, + "learning_rate": 4.219461877565203e-06, + "loss": 0.8933, + "step": 107830 + }, + { + "epoch": 0.7806177477614425, + "grad_norm": 0.1624414175748825, + "learning_rate": 4.219389490904616e-06, + "loss": 0.871, + "step": 107840 + }, + { + "epoch": 0.7806901344220287, + "grad_norm": 0.16145184636116028, + "learning_rate": 4.21931710424403e-06, + "loss": 0.8888, + "step": 107850 + }, + { + "epoch": 0.7807625210826149, + "grad_norm": 0.17542187869548798, + "learning_rate": 4.219244717583444e-06, + "loss": 0.8951, + "step": 107860 + }, + { + "epoch": 0.7808349077432011, + "grad_norm": 0.14952528476715088, + "learning_rate": 4.219172330922858e-06, + "loss": 0.8886, + "step": 107870 + }, + { + "epoch": 0.7809072944037873, + "grad_norm": 0.1723993569612503, + "learning_rate": 4.219099944262272e-06, + "loss": 0.8807, + "step": 107880 + }, + { + "epoch": 0.7809796810643734, + "grad_norm": 0.1505005955696106, + "learning_rate": 4.219027557601685e-06, + "loss": 0.8711, + "step": 107890 + }, + { + "epoch": 0.7810520677249596, + "grad_norm": 0.15281620621681213, + "learning_rate": 4.218955170941099e-06, + "loss": 0.887, + "step": 107900 + }, + { + "epoch": 0.7811244543855458, + "grad_norm": 0.1522083282470703, + "learning_rate": 4.218882784280513e-06, + "loss": 0.8973, + "step": 107910 + }, + { + "epoch": 0.7811968410461321, + "grad_norm": 0.16179485619068146, + "learning_rate": 4.218810397619927e-06, + "loss": 0.8856, + "step": 107920 + }, + { + "epoch": 0.7812692277067183, + "grad_norm": 0.15184660255908966, + "learning_rate": 4.218738010959341e-06, + "loss": 0.8856, + "step": 107930 + }, + { + "epoch": 0.7813416143673044, + "grad_norm": 0.19640418887138367, + "learning_rate": 4.218665624298754e-06, + "loss": 0.8837, + "step": 107940 + }, + { + "epoch": 0.7814140010278906, + "grad_norm": 0.16016776859760284, + "learning_rate": 4.218593237638169e-06, + "loss": 0.887, + "step": 107950 + }, + { + "epoch": 0.7814863876884768, + "grad_norm": 0.1551421880722046, + "learning_rate": 4.218520850977582e-06, + "loss": 0.8665, + "step": 107960 + }, + { + "epoch": 0.781558774349063, + "grad_norm": 0.16472944617271423, + "learning_rate": 4.218448464316996e-06, + "loss": 0.8814, + "step": 107970 + }, + { + "epoch": 0.7816311610096491, + "grad_norm": 0.1580583155155182, + "learning_rate": 4.2183760776564095e-06, + "loss": 0.8823, + "step": 107980 + }, + { + "epoch": 0.7817035476702353, + "grad_norm": 0.1603555530309677, + "learning_rate": 4.218303690995824e-06, + "loss": 0.872, + "step": 107990 + }, + { + "epoch": 0.7817759343308215, + "grad_norm": 0.1556985229253769, + "learning_rate": 4.218231304335238e-06, + "loss": 0.8787, + "step": 108000 + }, + { + "epoch": 0.7818483209914077, + "grad_norm": 0.1534159928560257, + "learning_rate": 4.218158917674651e-06, + "loss": 0.9064, + "step": 108010 + }, + { + "epoch": 0.7819207076519938, + "grad_norm": 0.1716359406709671, + "learning_rate": 4.218086531014065e-06, + "loss": 0.8834, + "step": 108020 + }, + { + "epoch": 0.7819930943125801, + "grad_norm": 0.15171417593955994, + "learning_rate": 4.218014144353479e-06, + "loss": 0.8825, + "step": 108030 + }, + { + "epoch": 0.7820654809731663, + "grad_norm": 0.1647900491952896, + "learning_rate": 4.217941757692893e-06, + "loss": 0.8718, + "step": 108040 + }, + { + "epoch": 0.7821378676337525, + "grad_norm": 0.1467769294977188, + "learning_rate": 4.2178693710323065e-06, + "loss": 0.8832, + "step": 108050 + }, + { + "epoch": 0.7822102542943387, + "grad_norm": 0.15823620557785034, + "learning_rate": 4.21779698437172e-06, + "loss": 0.8914, + "step": 108060 + }, + { + "epoch": 0.7822826409549248, + "grad_norm": 0.17605745792388916, + "learning_rate": 4.217724597711134e-06, + "loss": 0.8871, + "step": 108070 + }, + { + "epoch": 0.782355027615511, + "grad_norm": 0.15321215987205505, + "learning_rate": 4.217652211050548e-06, + "loss": 0.8906, + "step": 108080 + }, + { + "epoch": 0.7824274142760972, + "grad_norm": 0.15181642770767212, + "learning_rate": 4.217579824389962e-06, + "loss": 0.8662, + "step": 108090 + }, + { + "epoch": 0.7824998009366834, + "grad_norm": 0.17298097908496857, + "learning_rate": 4.2175074377293755e-06, + "loss": 0.8795, + "step": 108100 + }, + { + "epoch": 0.7825721875972695, + "grad_norm": 0.1724858433008194, + "learning_rate": 4.217435051068789e-06, + "loss": 0.9011, + "step": 108110 + }, + { + "epoch": 0.7826445742578557, + "grad_norm": 0.1518513262271881, + "learning_rate": 4.2173626644082035e-06, + "loss": 0.8938, + "step": 108120 + }, + { + "epoch": 0.7827169609184419, + "grad_norm": 0.15819215774536133, + "learning_rate": 4.217290277747617e-06, + "loss": 0.8725, + "step": 108130 + }, + { + "epoch": 0.7827893475790282, + "grad_norm": 0.1756516396999359, + "learning_rate": 4.217217891087031e-06, + "loss": 0.8867, + "step": 108140 + }, + { + "epoch": 0.7828617342396144, + "grad_norm": 0.1819458156824112, + "learning_rate": 4.217145504426444e-06, + "loss": 0.8883, + "step": 108150 + }, + { + "epoch": 0.7829341209002005, + "grad_norm": 0.14967991411685944, + "learning_rate": 4.217073117765859e-06, + "loss": 0.8814, + "step": 108160 + }, + { + "epoch": 0.7830065075607867, + "grad_norm": 0.15189316868782043, + "learning_rate": 4.2170007311052725e-06, + "loss": 0.8822, + "step": 108170 + }, + { + "epoch": 0.7830788942213729, + "grad_norm": 0.159009650349617, + "learning_rate": 4.216928344444686e-06, + "loss": 0.8774, + "step": 108180 + }, + { + "epoch": 0.7831512808819591, + "grad_norm": 0.15388379991054535, + "learning_rate": 4.2168559577841e-06, + "loss": 0.8821, + "step": 108190 + }, + { + "epoch": 0.7832236675425452, + "grad_norm": 0.1746244877576828, + "learning_rate": 4.216783571123514e-06, + "loss": 0.8937, + "step": 108200 + }, + { + "epoch": 0.7832960542031314, + "grad_norm": 0.1544540673494339, + "learning_rate": 4.216711184462928e-06, + "loss": 0.8892, + "step": 108210 + }, + { + "epoch": 0.7833684408637176, + "grad_norm": 0.15060152113437653, + "learning_rate": 4.216638797802341e-06, + "loss": 0.8786, + "step": 108220 + }, + { + "epoch": 0.7834408275243038, + "grad_norm": 0.15450206398963928, + "learning_rate": 4.216566411141755e-06, + "loss": 0.8823, + "step": 108230 + }, + { + "epoch": 0.7835132141848901, + "grad_norm": 0.1713588386774063, + "learning_rate": 4.2164940244811695e-06, + "loss": 0.896, + "step": 108240 + }, + { + "epoch": 0.7835856008454762, + "grad_norm": 0.1668989658355713, + "learning_rate": 4.216421637820583e-06, + "loss": 0.8828, + "step": 108250 + }, + { + "epoch": 0.7836579875060624, + "grad_norm": 0.1604335904121399, + "learning_rate": 4.216349251159997e-06, + "loss": 0.8747, + "step": 108260 + }, + { + "epoch": 0.7837303741666486, + "grad_norm": 0.1643938422203064, + "learning_rate": 4.21627686449941e-06, + "loss": 0.8799, + "step": 108270 + }, + { + "epoch": 0.7838027608272348, + "grad_norm": 0.1590445637702942, + "learning_rate": 4.216204477838825e-06, + "loss": 0.8859, + "step": 108280 + }, + { + "epoch": 0.783875147487821, + "grad_norm": 0.16419881582260132, + "learning_rate": 4.216132091178238e-06, + "loss": 0.8834, + "step": 108290 + }, + { + "epoch": 0.7839475341484071, + "grad_norm": 0.14862844347953796, + "learning_rate": 4.216059704517651e-06, + "loss": 0.8793, + "step": 108300 + }, + { + "epoch": 0.7840199208089933, + "grad_norm": 0.1492660492658615, + "learning_rate": 4.215987317857066e-06, + "loss": 0.8808, + "step": 108310 + }, + { + "epoch": 0.7840923074695795, + "grad_norm": 0.14114709198474884, + "learning_rate": 4.215914931196479e-06, + "loss": 0.865, + "step": 108320 + }, + { + "epoch": 0.7841646941301657, + "grad_norm": 0.16671665012836456, + "learning_rate": 4.215842544535893e-06, + "loss": 0.8726, + "step": 108330 + }, + { + "epoch": 0.7842370807907518, + "grad_norm": 0.1602155566215515, + "learning_rate": 4.2157701578753065e-06, + "loss": 0.8726, + "step": 108340 + }, + { + "epoch": 0.7843094674513381, + "grad_norm": 0.1590067595243454, + "learning_rate": 4.215697771214721e-06, + "loss": 0.8723, + "step": 108350 + }, + { + "epoch": 0.7843818541119243, + "grad_norm": 0.16449514031410217, + "learning_rate": 4.2156253845541346e-06, + "loss": 0.8795, + "step": 108360 + }, + { + "epoch": 0.7844542407725105, + "grad_norm": 0.16773192584514618, + "learning_rate": 4.215552997893548e-06, + "loss": 0.8772, + "step": 108370 + }, + { + "epoch": 0.7845266274330966, + "grad_norm": 0.15345753729343414, + "learning_rate": 4.215480611232962e-06, + "loss": 0.8893, + "step": 108380 + }, + { + "epoch": 0.7845990140936828, + "grad_norm": 0.1570970118045807, + "learning_rate": 4.215408224572376e-06, + "loss": 0.8911, + "step": 108390 + }, + { + "epoch": 0.784671400754269, + "grad_norm": 0.480564683675766, + "learning_rate": 4.21533583791179e-06, + "loss": 0.881, + "step": 108400 + }, + { + "epoch": 0.7847437874148552, + "grad_norm": 0.1536029726266861, + "learning_rate": 4.2152634512512035e-06, + "loss": 0.8657, + "step": 108410 + }, + { + "epoch": 0.7848161740754414, + "grad_norm": 0.15583275258541107, + "learning_rate": 4.215191064590617e-06, + "loss": 0.8856, + "step": 108420 + }, + { + "epoch": 0.7848885607360275, + "grad_norm": 0.17179587483406067, + "learning_rate": 4.2151186779300316e-06, + "loss": 0.8915, + "step": 108430 + }, + { + "epoch": 0.7849609473966137, + "grad_norm": 0.16397567093372345, + "learning_rate": 4.215046291269445e-06, + "loss": 0.87, + "step": 108440 + }, + { + "epoch": 0.7850333340572, + "grad_norm": 0.1486414670944214, + "learning_rate": 4.214973904608859e-06, + "loss": 0.8715, + "step": 108450 + }, + { + "epoch": 0.7851057207177862, + "grad_norm": 0.14893367886543274, + "learning_rate": 4.214901517948272e-06, + "loss": 0.88, + "step": 108460 + }, + { + "epoch": 0.7851781073783723, + "grad_norm": 0.15935064852237701, + "learning_rate": 4.214829131287687e-06, + "loss": 0.8744, + "step": 108470 + }, + { + "epoch": 0.7852504940389585, + "grad_norm": 0.16078276932239532, + "learning_rate": 4.2147567446271005e-06, + "loss": 0.8863, + "step": 108480 + }, + { + "epoch": 0.7853228806995447, + "grad_norm": 0.16032767295837402, + "learning_rate": 4.214684357966514e-06, + "loss": 0.8813, + "step": 108490 + }, + { + "epoch": 0.7853952673601309, + "grad_norm": 0.2287614494562149, + "learning_rate": 4.214611971305928e-06, + "loss": 0.8818, + "step": 108500 + }, + { + "epoch": 0.785467654020717, + "grad_norm": 0.16381344199180603, + "learning_rate": 4.214539584645342e-06, + "loss": 0.884, + "step": 108510 + }, + { + "epoch": 0.7855400406813032, + "grad_norm": 0.16530375182628632, + "learning_rate": 4.214467197984756e-06, + "loss": 0.8746, + "step": 108520 + }, + { + "epoch": 0.7856124273418894, + "grad_norm": 0.14450208842754364, + "learning_rate": 4.2143948113241694e-06, + "loss": 0.8855, + "step": 108530 + }, + { + "epoch": 0.7856848140024756, + "grad_norm": 0.16523830592632294, + "learning_rate": 4.214322424663583e-06, + "loss": 0.8822, + "step": 108540 + }, + { + "epoch": 0.7857572006630618, + "grad_norm": 0.175279900431633, + "learning_rate": 4.2142500380029975e-06, + "loss": 0.8739, + "step": 108550 + }, + { + "epoch": 0.785829587323648, + "grad_norm": 0.16809509694576263, + "learning_rate": 4.214177651342411e-06, + "loss": 0.8929, + "step": 108560 + }, + { + "epoch": 0.7859019739842342, + "grad_norm": 0.14289383590221405, + "learning_rate": 4.214105264681825e-06, + "loss": 0.8728, + "step": 108570 + }, + { + "epoch": 0.7859743606448204, + "grad_norm": 0.14453014731407166, + "learning_rate": 4.214032878021238e-06, + "loss": 0.8962, + "step": 108580 + }, + { + "epoch": 0.7860467473054066, + "grad_norm": 0.1566169410943985, + "learning_rate": 4.213960491360653e-06, + "loss": 0.8758, + "step": 108590 + }, + { + "epoch": 0.7861191339659928, + "grad_norm": 0.1643407642841339, + "learning_rate": 4.2138881047000664e-06, + "loss": 0.882, + "step": 108600 + }, + { + "epoch": 0.7861915206265789, + "grad_norm": 0.16235338151454926, + "learning_rate": 4.21381571803948e-06, + "loss": 0.8701, + "step": 108610 + }, + { + "epoch": 0.7862639072871651, + "grad_norm": 0.153314471244812, + "learning_rate": 4.213743331378894e-06, + "loss": 0.8827, + "step": 108620 + }, + { + "epoch": 0.7863362939477513, + "grad_norm": 0.15935984253883362, + "learning_rate": 4.213670944718308e-06, + "loss": 0.8793, + "step": 108630 + }, + { + "epoch": 0.7864086806083375, + "grad_norm": 0.15511925518512726, + "learning_rate": 4.213598558057722e-06, + "loss": 0.8916, + "step": 108640 + }, + { + "epoch": 0.7864810672689236, + "grad_norm": 0.15609268844127655, + "learning_rate": 4.213526171397135e-06, + "loss": 0.8937, + "step": 108650 + }, + { + "epoch": 0.7865534539295098, + "grad_norm": 0.14593185484409332, + "learning_rate": 4.213453784736549e-06, + "loss": 0.8766, + "step": 108660 + }, + { + "epoch": 0.7866258405900961, + "grad_norm": 0.16142848134040833, + "learning_rate": 4.2133813980759634e-06, + "loss": 0.8711, + "step": 108670 + }, + { + "epoch": 0.7866982272506823, + "grad_norm": 0.15131999552249908, + "learning_rate": 4.213309011415377e-06, + "loss": 0.8837, + "step": 108680 + }, + { + "epoch": 0.7867706139112685, + "grad_norm": 0.16211840510368347, + "learning_rate": 4.213236624754791e-06, + "loss": 0.8916, + "step": 108690 + }, + { + "epoch": 0.7868430005718546, + "grad_norm": 0.16512945294380188, + "learning_rate": 4.213164238094204e-06, + "loss": 0.8844, + "step": 108700 + }, + { + "epoch": 0.7869153872324408, + "grad_norm": 0.15159179270267487, + "learning_rate": 4.213091851433618e-06, + "loss": 0.8814, + "step": 108710 + }, + { + "epoch": 0.786987773893027, + "grad_norm": 0.17902809381484985, + "learning_rate": 4.213019464773032e-06, + "loss": 0.8774, + "step": 108720 + }, + { + "epoch": 0.7870601605536132, + "grad_norm": 0.16519932448863983, + "learning_rate": 4.212947078112446e-06, + "loss": 0.8849, + "step": 108730 + }, + { + "epoch": 0.7871325472141993, + "grad_norm": 0.16074302792549133, + "learning_rate": 4.21287469145186e-06, + "loss": 0.8875, + "step": 108740 + }, + { + "epoch": 0.7872049338747855, + "grad_norm": 0.1652543991804123, + "learning_rate": 4.212802304791273e-06, + "loss": 0.8727, + "step": 108750 + }, + { + "epoch": 0.7872773205353717, + "grad_norm": 0.17259052395820618, + "learning_rate": 4.212729918130688e-06, + "loss": 0.8849, + "step": 108760 + }, + { + "epoch": 0.787349707195958, + "grad_norm": 0.17187197506427765, + "learning_rate": 4.212657531470101e-06, + "loss": 0.8853, + "step": 108770 + }, + { + "epoch": 0.7874220938565442, + "grad_norm": 0.1663118600845337, + "learning_rate": 4.212585144809515e-06, + "loss": 0.8764, + "step": 108780 + }, + { + "epoch": 0.7874944805171303, + "grad_norm": 0.15814106166362762, + "learning_rate": 4.2125127581489285e-06, + "loss": 0.8648, + "step": 108790 + }, + { + "epoch": 0.7875668671777165, + "grad_norm": 0.17825469374656677, + "learning_rate": 4.212440371488343e-06, + "loss": 0.8818, + "step": 108800 + }, + { + "epoch": 0.7876392538383027, + "grad_norm": 0.191095769405365, + "learning_rate": 4.212367984827757e-06, + "loss": 0.8861, + "step": 108810 + }, + { + "epoch": 0.7877116404988889, + "grad_norm": 0.16488607227802277, + "learning_rate": 4.21229559816717e-06, + "loss": 0.8915, + "step": 108820 + }, + { + "epoch": 0.787784027159475, + "grad_norm": 0.15883904695510864, + "learning_rate": 4.212223211506584e-06, + "loss": 0.8806, + "step": 108830 + }, + { + "epoch": 0.7878564138200612, + "grad_norm": 0.1884288191795349, + "learning_rate": 4.2121508248459975e-06, + "loss": 0.8955, + "step": 108840 + }, + { + "epoch": 0.7879288004806474, + "grad_norm": 0.15324948728084564, + "learning_rate": 4.212078438185411e-06, + "loss": 0.8832, + "step": 108850 + }, + { + "epoch": 0.7880011871412336, + "grad_norm": 0.16065652668476105, + "learning_rate": 4.2120060515248255e-06, + "loss": 0.8755, + "step": 108860 + }, + { + "epoch": 0.7880735738018197, + "grad_norm": 0.15885667502880096, + "learning_rate": 4.211933664864239e-06, + "loss": 0.8754, + "step": 108870 + }, + { + "epoch": 0.788145960462406, + "grad_norm": 0.1391364336013794, + "learning_rate": 4.211861278203653e-06, + "loss": 0.8806, + "step": 108880 + }, + { + "epoch": 0.7882183471229922, + "grad_norm": 0.15506839752197266, + "learning_rate": 4.211788891543066e-06, + "loss": 0.8831, + "step": 108890 + }, + { + "epoch": 0.7882907337835784, + "grad_norm": 0.15888062119483948, + "learning_rate": 4.21171650488248e-06, + "loss": 0.8819, + "step": 108900 + }, + { + "epoch": 0.7883631204441646, + "grad_norm": 0.1553460657596588, + "learning_rate": 4.2116441182218945e-06, + "loss": 0.8916, + "step": 108910 + }, + { + "epoch": 0.7884355071047507, + "grad_norm": 0.14751103520393372, + "learning_rate": 4.211571731561308e-06, + "loss": 0.876, + "step": 108920 + }, + { + "epoch": 0.7885078937653369, + "grad_norm": 0.17180080711841583, + "learning_rate": 4.211499344900722e-06, + "loss": 0.8759, + "step": 108930 + }, + { + "epoch": 0.7885802804259231, + "grad_norm": 0.15554192662239075, + "learning_rate": 4.211426958240135e-06, + "loss": 0.8813, + "step": 108940 + }, + { + "epoch": 0.7886526670865093, + "grad_norm": 0.1551918089389801, + "learning_rate": 4.21135457157955e-06, + "loss": 0.884, + "step": 108950 + }, + { + "epoch": 0.7887250537470955, + "grad_norm": 0.17789483070373535, + "learning_rate": 4.211282184918963e-06, + "loss": 0.8821, + "step": 108960 + }, + { + "epoch": 0.7887974404076816, + "grad_norm": 0.16537193953990936, + "learning_rate": 4.211209798258377e-06, + "loss": 0.8805, + "step": 108970 + }, + { + "epoch": 0.7888698270682679, + "grad_norm": 0.15402130782604218, + "learning_rate": 4.211137411597791e-06, + "loss": 0.8804, + "step": 108980 + }, + { + "epoch": 0.7889422137288541, + "grad_norm": 0.15392087399959564, + "learning_rate": 4.211065024937205e-06, + "loss": 0.8975, + "step": 108990 + }, + { + "epoch": 0.7890146003894403, + "grad_norm": 0.1604469269514084, + "learning_rate": 4.210992638276619e-06, + "loss": 0.8778, + "step": 109000 + }, + { + "epoch": 0.7890869870500264, + "grad_norm": 0.14916957914829254, + "learning_rate": 4.210920251616032e-06, + "loss": 0.8859, + "step": 109010 + }, + { + "epoch": 0.7891593737106126, + "grad_norm": 0.15784524381160736, + "learning_rate": 4.210847864955446e-06, + "loss": 0.8833, + "step": 109020 + }, + { + "epoch": 0.7892317603711988, + "grad_norm": 0.26077041029930115, + "learning_rate": 4.21077547829486e-06, + "loss": 0.8962, + "step": 109030 + }, + { + "epoch": 0.789304147031785, + "grad_norm": 0.16116316616535187, + "learning_rate": 4.210703091634274e-06, + "loss": 0.8888, + "step": 109040 + }, + { + "epoch": 0.7893765336923712, + "grad_norm": 0.24238155782222748, + "learning_rate": 4.210630704973688e-06, + "loss": 0.8827, + "step": 109050 + }, + { + "epoch": 0.7894489203529573, + "grad_norm": 0.14621655642986298, + "learning_rate": 4.210558318313101e-06, + "loss": 0.8893, + "step": 109060 + }, + { + "epoch": 0.7895213070135435, + "grad_norm": 0.2018691748380661, + "learning_rate": 4.210485931652516e-06, + "loss": 0.8831, + "step": 109070 + }, + { + "epoch": 0.7895936936741297, + "grad_norm": 0.148194819688797, + "learning_rate": 4.210413544991929e-06, + "loss": 0.876, + "step": 109080 + }, + { + "epoch": 0.789666080334716, + "grad_norm": 0.15002113580703735, + "learning_rate": 4.210341158331343e-06, + "loss": 0.8754, + "step": 109090 + }, + { + "epoch": 0.7897384669953021, + "grad_norm": 0.15236304700374603, + "learning_rate": 4.2102687716707566e-06, + "loss": 0.8849, + "step": 109100 + }, + { + "epoch": 0.7898108536558883, + "grad_norm": 0.17015591263771057, + "learning_rate": 4.210196385010171e-06, + "loss": 0.8818, + "step": 109110 + }, + { + "epoch": 0.7898832403164745, + "grad_norm": 0.16506314277648926, + "learning_rate": 4.210123998349585e-06, + "loss": 0.8789, + "step": 109120 + }, + { + "epoch": 0.7899556269770607, + "grad_norm": 0.16899830102920532, + "learning_rate": 4.210051611688998e-06, + "loss": 0.8763, + "step": 109130 + }, + { + "epoch": 0.7900280136376469, + "grad_norm": 0.16103564202785492, + "learning_rate": 4.209979225028412e-06, + "loss": 0.8816, + "step": 109140 + }, + { + "epoch": 0.790100400298233, + "grad_norm": 0.15541161596775055, + "learning_rate": 4.209906838367826e-06, + "loss": 0.8864, + "step": 109150 + }, + { + "epoch": 0.7901727869588192, + "grad_norm": 0.15353739261627197, + "learning_rate": 4.20983445170724e-06, + "loss": 0.8881, + "step": 109160 + }, + { + "epoch": 0.7902451736194054, + "grad_norm": 0.1619664430618286, + "learning_rate": 4.2097620650466536e-06, + "loss": 0.8927, + "step": 109170 + }, + { + "epoch": 0.7903175602799916, + "grad_norm": 0.15257126092910767, + "learning_rate": 4.209689678386067e-06, + "loss": 0.8725, + "step": 109180 + }, + { + "epoch": 0.7903899469405777, + "grad_norm": 0.1462344229221344, + "learning_rate": 4.209617291725482e-06, + "loss": 0.8893, + "step": 109190 + }, + { + "epoch": 0.790462333601164, + "grad_norm": 0.15440794825553894, + "learning_rate": 4.209544905064895e-06, + "loss": 0.8796, + "step": 109200 + }, + { + "epoch": 0.7905347202617502, + "grad_norm": 0.1578925997018814, + "learning_rate": 4.209472518404309e-06, + "loss": 0.8796, + "step": 109210 + }, + { + "epoch": 0.7906071069223364, + "grad_norm": 0.1685589849948883, + "learning_rate": 4.2094001317437225e-06, + "loss": 0.8761, + "step": 109220 + }, + { + "epoch": 0.7906794935829226, + "grad_norm": 0.16151396930217743, + "learning_rate": 4.209327745083137e-06, + "loss": 0.8849, + "step": 109230 + }, + { + "epoch": 0.7907518802435087, + "grad_norm": 0.16625353693962097, + "learning_rate": 4.2092553584225506e-06, + "loss": 0.8767, + "step": 109240 + }, + { + "epoch": 0.7908242669040949, + "grad_norm": 0.16339325904846191, + "learning_rate": 4.209182971761964e-06, + "loss": 0.8856, + "step": 109250 + }, + { + "epoch": 0.7908966535646811, + "grad_norm": 0.16669736802577972, + "learning_rate": 4.209110585101378e-06, + "loss": 0.887, + "step": 109260 + }, + { + "epoch": 0.7909690402252673, + "grad_norm": 0.15320435166358948, + "learning_rate": 4.209038198440792e-06, + "loss": 0.8849, + "step": 109270 + }, + { + "epoch": 0.7910414268858534, + "grad_norm": 0.1580091118812561, + "learning_rate": 4.208965811780206e-06, + "loss": 0.8786, + "step": 109280 + }, + { + "epoch": 0.7911138135464396, + "grad_norm": 0.15802930295467377, + "learning_rate": 4.2088934251196195e-06, + "loss": 0.8855, + "step": 109290 + }, + { + "epoch": 0.7911862002070259, + "grad_norm": 0.1613604873418808, + "learning_rate": 4.208821038459033e-06, + "loss": 0.8679, + "step": 109300 + }, + { + "epoch": 0.7912585868676121, + "grad_norm": 0.17970921099185944, + "learning_rate": 4.208748651798448e-06, + "loss": 0.8885, + "step": 109310 + }, + { + "epoch": 0.7913309735281983, + "grad_norm": 0.18362759053707123, + "learning_rate": 4.208676265137861e-06, + "loss": 0.8811, + "step": 109320 + }, + { + "epoch": 0.7914033601887844, + "grad_norm": 0.16677528619766235, + "learning_rate": 4.208603878477275e-06, + "loss": 0.8826, + "step": 109330 + }, + { + "epoch": 0.7914757468493706, + "grad_norm": 0.17493285238742828, + "learning_rate": 4.2085314918166884e-06, + "loss": 0.873, + "step": 109340 + }, + { + "epoch": 0.7915481335099568, + "grad_norm": 0.16501615941524506, + "learning_rate": 4.208459105156102e-06, + "loss": 0.8832, + "step": 109350 + }, + { + "epoch": 0.791620520170543, + "grad_norm": 0.15602637827396393, + "learning_rate": 4.208386718495516e-06, + "loss": 0.8867, + "step": 109360 + }, + { + "epoch": 0.7916929068311291, + "grad_norm": 0.1481214165687561, + "learning_rate": 4.208314331834929e-06, + "loss": 0.8815, + "step": 109370 + }, + { + "epoch": 0.7917652934917153, + "grad_norm": 0.1560535579919815, + "learning_rate": 4.208241945174344e-06, + "loss": 0.8858, + "step": 109380 + }, + { + "epoch": 0.7918376801523015, + "grad_norm": 0.21015238761901855, + "learning_rate": 4.208169558513757e-06, + "loss": 0.8827, + "step": 109390 + }, + { + "epoch": 0.7919100668128877, + "grad_norm": 0.16126051545143127, + "learning_rate": 4.208097171853171e-06, + "loss": 0.8978, + "step": 109400 + }, + { + "epoch": 0.791982453473474, + "grad_norm": 0.15194587409496307, + "learning_rate": 4.208024785192585e-06, + "loss": 0.8785, + "step": 109410 + }, + { + "epoch": 0.7920548401340601, + "grad_norm": 0.1646006554365158, + "learning_rate": 4.207952398531999e-06, + "loss": 0.88, + "step": 109420 + }, + { + "epoch": 0.7921272267946463, + "grad_norm": 0.21086090803146362, + "learning_rate": 4.207880011871413e-06, + "loss": 0.8909, + "step": 109430 + }, + { + "epoch": 0.7921996134552325, + "grad_norm": 0.1583826094865799, + "learning_rate": 4.207807625210826e-06, + "loss": 0.8872, + "step": 109440 + }, + { + "epoch": 0.7922720001158187, + "grad_norm": 0.15756238996982574, + "learning_rate": 4.20773523855024e-06, + "loss": 0.8844, + "step": 109450 + }, + { + "epoch": 0.7923443867764048, + "grad_norm": 0.1494230180978775, + "learning_rate": 4.207662851889654e-06, + "loss": 0.8814, + "step": 109460 + }, + { + "epoch": 0.792416773436991, + "grad_norm": 0.17796126008033752, + "learning_rate": 4.207590465229068e-06, + "loss": 0.8824, + "step": 109470 + }, + { + "epoch": 0.7924891600975772, + "grad_norm": 0.16645440459251404, + "learning_rate": 4.207518078568482e-06, + "loss": 0.892, + "step": 109480 + }, + { + "epoch": 0.7925615467581634, + "grad_norm": 0.16064368188381195, + "learning_rate": 4.207445691907895e-06, + "loss": 0.8843, + "step": 109490 + }, + { + "epoch": 0.7926339334187495, + "grad_norm": 0.18398964405059814, + "learning_rate": 4.207373305247309e-06, + "loss": 0.8878, + "step": 109500 + }, + { + "epoch": 0.7927063200793357, + "grad_norm": 0.14704552292823792, + "learning_rate": 4.207300918586723e-06, + "loss": 0.8858, + "step": 109510 + }, + { + "epoch": 0.792778706739922, + "grad_norm": 0.15785852074623108, + "learning_rate": 4.207228531926137e-06, + "loss": 0.8876, + "step": 109520 + }, + { + "epoch": 0.7928510934005082, + "grad_norm": 0.16036728024482727, + "learning_rate": 4.2071561452655505e-06, + "loss": 0.8926, + "step": 109530 + }, + { + "epoch": 0.7929234800610944, + "grad_norm": 0.1538962423801422, + "learning_rate": 4.207083758604964e-06, + "loss": 0.89, + "step": 109540 + }, + { + "epoch": 0.7929958667216805, + "grad_norm": 0.15451432764530182, + "learning_rate": 4.207011371944379e-06, + "loss": 0.8863, + "step": 109550 + }, + { + "epoch": 0.7930682533822667, + "grad_norm": 0.1714572012424469, + "learning_rate": 4.206938985283792e-06, + "loss": 0.8779, + "step": 109560 + }, + { + "epoch": 0.7931406400428529, + "grad_norm": 0.15013469755649567, + "learning_rate": 4.206866598623206e-06, + "loss": 0.8753, + "step": 109570 + }, + { + "epoch": 0.7932130267034391, + "grad_norm": 0.15864507853984833, + "learning_rate": 4.2067942119626195e-06, + "loss": 0.8817, + "step": 109580 + }, + { + "epoch": 0.7932854133640252, + "grad_norm": 0.18490475416183472, + "learning_rate": 4.206721825302034e-06, + "loss": 0.8838, + "step": 109590 + }, + { + "epoch": 0.7933578000246114, + "grad_norm": 0.15001821517944336, + "learning_rate": 4.2066494386414475e-06, + "loss": 0.8807, + "step": 109600 + }, + { + "epoch": 0.7934301866851976, + "grad_norm": 0.1856260895729065, + "learning_rate": 4.206577051980861e-06, + "loss": 0.8846, + "step": 109610 + }, + { + "epoch": 0.7935025733457839, + "grad_norm": 0.14746098220348358, + "learning_rate": 4.206504665320275e-06, + "loss": 0.8684, + "step": 109620 + }, + { + "epoch": 0.7935749600063701, + "grad_norm": 0.16342267394065857, + "learning_rate": 4.206432278659689e-06, + "loss": 0.8689, + "step": 109630 + }, + { + "epoch": 0.7936473466669562, + "grad_norm": 0.15328651666641235, + "learning_rate": 4.206359891999103e-06, + "loss": 0.8678, + "step": 109640 + }, + { + "epoch": 0.7937197333275424, + "grad_norm": 0.164528951048851, + "learning_rate": 4.2062875053385165e-06, + "loss": 0.8783, + "step": 109650 + }, + { + "epoch": 0.7937921199881286, + "grad_norm": 0.1499549150466919, + "learning_rate": 4.20621511867793e-06, + "loss": 0.8813, + "step": 109660 + }, + { + "epoch": 0.7938645066487148, + "grad_norm": 0.14804977178573608, + "learning_rate": 4.2061427320173445e-06, + "loss": 0.8748, + "step": 109670 + }, + { + "epoch": 0.793936893309301, + "grad_norm": 0.1729278266429901, + "learning_rate": 4.206070345356758e-06, + "loss": 0.8744, + "step": 109680 + }, + { + "epoch": 0.7940092799698871, + "grad_norm": 0.15878424048423767, + "learning_rate": 4.205997958696172e-06, + "loss": 0.8887, + "step": 109690 + }, + { + "epoch": 0.7940816666304733, + "grad_norm": 0.15942630171775818, + "learning_rate": 4.205925572035585e-06, + "loss": 0.8915, + "step": 109700 + }, + { + "epoch": 0.7941540532910595, + "grad_norm": 0.16669262945652008, + "learning_rate": 4.205853185375e-06, + "loss": 0.8915, + "step": 109710 + }, + { + "epoch": 0.7942264399516457, + "grad_norm": 0.1515069305896759, + "learning_rate": 4.2057807987144135e-06, + "loss": 0.8784, + "step": 109720 + }, + { + "epoch": 0.794298826612232, + "grad_norm": 0.1678621619939804, + "learning_rate": 4.205708412053827e-06, + "loss": 0.8901, + "step": 109730 + }, + { + "epoch": 0.7943712132728181, + "grad_norm": 0.17552782595157623, + "learning_rate": 4.205636025393241e-06, + "loss": 0.8664, + "step": 109740 + }, + { + "epoch": 0.7944435999334043, + "grad_norm": 0.1632026731967926, + "learning_rate": 4.205563638732655e-06, + "loss": 0.8854, + "step": 109750 + }, + { + "epoch": 0.7945159865939905, + "grad_norm": 0.15322089195251465, + "learning_rate": 4.205491252072069e-06, + "loss": 0.8788, + "step": 109760 + }, + { + "epoch": 0.7945883732545767, + "grad_norm": 0.1526355892419815, + "learning_rate": 4.205418865411482e-06, + "loss": 0.8773, + "step": 109770 + }, + { + "epoch": 0.7946607599151628, + "grad_norm": 0.1596207320690155, + "learning_rate": 4.205346478750896e-06, + "loss": 0.8807, + "step": 109780 + }, + { + "epoch": 0.794733146575749, + "grad_norm": 0.16599130630493164, + "learning_rate": 4.2052740920903105e-06, + "loss": 0.8716, + "step": 109790 + }, + { + "epoch": 0.7948055332363352, + "grad_norm": 0.1544189453125, + "learning_rate": 4.205201705429724e-06, + "loss": 0.8769, + "step": 109800 + }, + { + "epoch": 0.7948779198969214, + "grad_norm": 0.1597534716129303, + "learning_rate": 4.205129318769138e-06, + "loss": 0.8723, + "step": 109810 + }, + { + "epoch": 0.7949503065575075, + "grad_norm": 0.15642835199832916, + "learning_rate": 4.205056932108551e-06, + "loss": 0.8835, + "step": 109820 + }, + { + "epoch": 0.7950226932180938, + "grad_norm": 0.16617697477340698, + "learning_rate": 4.204984545447966e-06, + "loss": 0.8745, + "step": 109830 + }, + { + "epoch": 0.79509507987868, + "grad_norm": 0.15826547145843506, + "learning_rate": 4.204912158787379e-06, + "loss": 0.8885, + "step": 109840 + }, + { + "epoch": 0.7951674665392662, + "grad_norm": 0.16026552021503448, + "learning_rate": 4.204839772126793e-06, + "loss": 0.8901, + "step": 109850 + }, + { + "epoch": 0.7952398531998524, + "grad_norm": 0.16128748655319214, + "learning_rate": 4.204767385466207e-06, + "loss": 0.8934, + "step": 109860 + }, + { + "epoch": 0.7953122398604385, + "grad_norm": 0.1773352324962616, + "learning_rate": 4.204694998805621e-06, + "loss": 0.8806, + "step": 109870 + }, + { + "epoch": 0.7953846265210247, + "grad_norm": 0.16145089268684387, + "learning_rate": 4.204622612145035e-06, + "loss": 0.8779, + "step": 109880 + }, + { + "epoch": 0.7954570131816109, + "grad_norm": 0.14754821360111237, + "learning_rate": 4.2045502254844475e-06, + "loss": 0.8763, + "step": 109890 + }, + { + "epoch": 0.7955293998421971, + "grad_norm": 0.15587793290615082, + "learning_rate": 4.204477838823862e-06, + "loss": 0.8794, + "step": 109900 + }, + { + "epoch": 0.7956017865027832, + "grad_norm": 0.14746980369091034, + "learning_rate": 4.2044054521632756e-06, + "loss": 0.887, + "step": 109910 + }, + { + "epoch": 0.7956741731633694, + "grad_norm": 0.15477393567562103, + "learning_rate": 4.204333065502689e-06, + "loss": 0.8808, + "step": 109920 + }, + { + "epoch": 0.7957465598239556, + "grad_norm": 0.14774759113788605, + "learning_rate": 4.204260678842103e-06, + "loss": 0.8813, + "step": 109930 + }, + { + "epoch": 0.7958189464845419, + "grad_norm": 0.1507684290409088, + "learning_rate": 4.204188292181517e-06, + "loss": 0.8908, + "step": 109940 + }, + { + "epoch": 0.795891333145128, + "grad_norm": 0.15101462602615356, + "learning_rate": 4.204115905520931e-06, + "loss": 0.8836, + "step": 109950 + }, + { + "epoch": 0.7959637198057142, + "grad_norm": 0.14433503150939941, + "learning_rate": 4.2040435188603445e-06, + "loss": 0.8769, + "step": 109960 + }, + { + "epoch": 0.7960361064663004, + "grad_norm": 0.17462539672851562, + "learning_rate": 4.203971132199758e-06, + "loss": 0.8851, + "step": 109970 + }, + { + "epoch": 0.7961084931268866, + "grad_norm": 0.1587734818458557, + "learning_rate": 4.2038987455391726e-06, + "loss": 0.8821, + "step": 109980 + }, + { + "epoch": 0.7961808797874728, + "grad_norm": 0.14876559376716614, + "learning_rate": 4.203826358878586e-06, + "loss": 0.8717, + "step": 109990 + }, + { + "epoch": 0.7962532664480589, + "grad_norm": 0.1661464273929596, + "learning_rate": 4.203753972218e-06, + "loss": 0.8884, + "step": 110000 + }, + { + "epoch": 0.7963256531086451, + "grad_norm": 0.21970897912979126, + "learning_rate": 4.203681585557413e-06, + "loss": 0.8812, + "step": 110010 + }, + { + "epoch": 0.7963980397692313, + "grad_norm": 0.15922003984451294, + "learning_rate": 4.203609198896828e-06, + "loss": 0.8924, + "step": 110020 + }, + { + "epoch": 0.7964704264298175, + "grad_norm": 0.1996181309223175, + "learning_rate": 4.2035368122362415e-06, + "loss": 0.8815, + "step": 110030 + }, + { + "epoch": 0.7965428130904036, + "grad_norm": 0.14531823992729187, + "learning_rate": 4.203464425575655e-06, + "loss": 0.8849, + "step": 110040 + }, + { + "epoch": 0.7966151997509899, + "grad_norm": 0.15441548824310303, + "learning_rate": 4.203392038915069e-06, + "loss": 0.8793, + "step": 110050 + }, + { + "epoch": 0.7966875864115761, + "grad_norm": 0.15467193722724915, + "learning_rate": 4.203319652254483e-06, + "loss": 0.8773, + "step": 110060 + }, + { + "epoch": 0.7967599730721623, + "grad_norm": 0.15833494067192078, + "learning_rate": 4.203247265593897e-06, + "loss": 0.8748, + "step": 110070 + }, + { + "epoch": 0.7968323597327485, + "grad_norm": 0.1572531759738922, + "learning_rate": 4.2031748789333104e-06, + "loss": 0.8769, + "step": 110080 + }, + { + "epoch": 0.7969047463933346, + "grad_norm": 0.16923896968364716, + "learning_rate": 4.203102492272724e-06, + "loss": 0.8714, + "step": 110090 + }, + { + "epoch": 0.7969771330539208, + "grad_norm": 0.15771649777889252, + "learning_rate": 4.2030301056121385e-06, + "loss": 0.8861, + "step": 110100 + }, + { + "epoch": 0.797049519714507, + "grad_norm": 0.1439589411020279, + "learning_rate": 4.202957718951552e-06, + "loss": 0.8705, + "step": 110110 + }, + { + "epoch": 0.7971219063750932, + "grad_norm": 0.14609919488430023, + "learning_rate": 4.202885332290966e-06, + "loss": 0.883, + "step": 110120 + }, + { + "epoch": 0.7971942930356793, + "grad_norm": 0.15220913290977478, + "learning_rate": 4.202812945630379e-06, + "loss": 0.889, + "step": 110130 + }, + { + "epoch": 0.7972666796962655, + "grad_norm": 0.16518516838550568, + "learning_rate": 4.202740558969793e-06, + "loss": 0.8745, + "step": 110140 + }, + { + "epoch": 0.7973390663568518, + "grad_norm": 0.15049389004707336, + "learning_rate": 4.2026681723092074e-06, + "loss": 0.8761, + "step": 110150 + }, + { + "epoch": 0.797411453017438, + "grad_norm": 0.13817547261714935, + "learning_rate": 4.202595785648621e-06, + "loss": 0.8687, + "step": 110160 + }, + { + "epoch": 0.7974838396780242, + "grad_norm": 0.15669967234134674, + "learning_rate": 4.202523398988035e-06, + "loss": 0.8928, + "step": 110170 + }, + { + "epoch": 0.7975562263386103, + "grad_norm": 0.15119336545467377, + "learning_rate": 4.202451012327448e-06, + "loss": 0.8827, + "step": 110180 + }, + { + "epoch": 0.7976286129991965, + "grad_norm": 0.14785991609096527, + "learning_rate": 4.202378625666863e-06, + "loss": 0.8917, + "step": 110190 + }, + { + "epoch": 0.7977009996597827, + "grad_norm": 0.15478040277957916, + "learning_rate": 4.202306239006276e-06, + "loss": 0.873, + "step": 110200 + }, + { + "epoch": 0.7977733863203689, + "grad_norm": 0.17014062404632568, + "learning_rate": 4.20223385234569e-06, + "loss": 0.885, + "step": 110210 + }, + { + "epoch": 0.797845772980955, + "grad_norm": 0.1526460498571396, + "learning_rate": 4.202161465685104e-06, + "loss": 0.8793, + "step": 110220 + }, + { + "epoch": 0.7979181596415412, + "grad_norm": 0.15299007296562195, + "learning_rate": 4.202089079024518e-06, + "loss": 0.8705, + "step": 110230 + }, + { + "epoch": 0.7979905463021274, + "grad_norm": 0.1601126492023468, + "learning_rate": 4.202016692363932e-06, + "loss": 0.8854, + "step": 110240 + }, + { + "epoch": 0.7980629329627136, + "grad_norm": 0.14891035854816437, + "learning_rate": 4.201944305703345e-06, + "loss": 0.8736, + "step": 110250 + }, + { + "epoch": 0.7981353196232999, + "grad_norm": 0.17252424359321594, + "learning_rate": 4.201871919042759e-06, + "loss": 0.8858, + "step": 110260 + }, + { + "epoch": 0.798207706283886, + "grad_norm": 0.16554145514965057, + "learning_rate": 4.201799532382173e-06, + "loss": 0.8679, + "step": 110270 + }, + { + "epoch": 0.7982800929444722, + "grad_norm": 0.17143869400024414, + "learning_rate": 4.201727145721587e-06, + "loss": 0.8895, + "step": 110280 + }, + { + "epoch": 0.7983524796050584, + "grad_norm": 0.14797469973564148, + "learning_rate": 4.201654759061001e-06, + "loss": 0.8776, + "step": 110290 + }, + { + "epoch": 0.7984248662656446, + "grad_norm": 0.15571478009223938, + "learning_rate": 4.201582372400414e-06, + "loss": 0.8765, + "step": 110300 + }, + { + "epoch": 0.7984972529262307, + "grad_norm": 0.1487504541873932, + "learning_rate": 4.201509985739829e-06, + "loss": 0.8822, + "step": 110310 + }, + { + "epoch": 0.7985696395868169, + "grad_norm": 0.15721207857131958, + "learning_rate": 4.201437599079242e-06, + "loss": 0.8609, + "step": 110320 + }, + { + "epoch": 0.7986420262474031, + "grad_norm": 0.14724335074424744, + "learning_rate": 4.201365212418656e-06, + "loss": 0.8808, + "step": 110330 + }, + { + "epoch": 0.7987144129079893, + "grad_norm": 0.1642737239599228, + "learning_rate": 4.2012928257580695e-06, + "loss": 0.8882, + "step": 110340 + }, + { + "epoch": 0.7987867995685755, + "grad_norm": 0.1529485136270523, + "learning_rate": 4.201220439097484e-06, + "loss": 0.8793, + "step": 110350 + }, + { + "epoch": 0.7988591862291617, + "grad_norm": 0.14472009241580963, + "learning_rate": 4.201148052436898e-06, + "loss": 0.8705, + "step": 110360 + }, + { + "epoch": 0.7989315728897479, + "grad_norm": 0.16149692237377167, + "learning_rate": 4.201075665776311e-06, + "loss": 0.8788, + "step": 110370 + }, + { + "epoch": 0.7990039595503341, + "grad_norm": 0.16890741884708405, + "learning_rate": 4.201003279115725e-06, + "loss": 0.8714, + "step": 110380 + }, + { + "epoch": 0.7990763462109203, + "grad_norm": 0.16988790035247803, + "learning_rate": 4.200930892455139e-06, + "loss": 0.8946, + "step": 110390 + }, + { + "epoch": 0.7991487328715065, + "grad_norm": 0.14879997074604034, + "learning_rate": 4.200858505794553e-06, + "loss": 0.8776, + "step": 110400 + }, + { + "epoch": 0.7992211195320926, + "grad_norm": 0.38371655344963074, + "learning_rate": 4.2007861191339665e-06, + "loss": 0.892, + "step": 110410 + }, + { + "epoch": 0.7992935061926788, + "grad_norm": 0.14874762296676636, + "learning_rate": 4.20071373247338e-06, + "loss": 0.8787, + "step": 110420 + }, + { + "epoch": 0.799365892853265, + "grad_norm": 0.1458701342344284, + "learning_rate": 4.200641345812794e-06, + "loss": 0.8788, + "step": 110430 + }, + { + "epoch": 0.7994382795138512, + "grad_norm": 0.1612052172422409, + "learning_rate": 4.200568959152207e-06, + "loss": 0.8816, + "step": 110440 + }, + { + "epoch": 0.7995106661744373, + "grad_norm": 0.1589481681585312, + "learning_rate": 4.200496572491621e-06, + "loss": 0.8847, + "step": 110450 + }, + { + "epoch": 0.7995830528350235, + "grad_norm": 0.15650621056556702, + "learning_rate": 4.2004241858310355e-06, + "loss": 0.8862, + "step": 110460 + }, + { + "epoch": 0.7996554394956098, + "grad_norm": 0.1470274180173874, + "learning_rate": 4.200351799170449e-06, + "loss": 0.8822, + "step": 110470 + }, + { + "epoch": 0.799727826156196, + "grad_norm": 0.15329478681087494, + "learning_rate": 4.200279412509863e-06, + "loss": 0.8777, + "step": 110480 + }, + { + "epoch": 0.7998002128167822, + "grad_norm": 0.16613824665546417, + "learning_rate": 4.200207025849276e-06, + "loss": 0.8915, + "step": 110490 + }, + { + "epoch": 0.7998725994773683, + "grad_norm": 0.15577919781208038, + "learning_rate": 4.200134639188691e-06, + "loss": 0.8799, + "step": 110500 + }, + { + "epoch": 0.7999449861379545, + "grad_norm": 0.16274091601371765, + "learning_rate": 4.200062252528104e-06, + "loss": 0.8822, + "step": 110510 + }, + { + "epoch": 0.8000173727985407, + "grad_norm": 0.16509264707565308, + "learning_rate": 4.199989865867518e-06, + "loss": 0.8921, + "step": 110520 + }, + { + "epoch": 0.8000897594591269, + "grad_norm": 0.16428282856941223, + "learning_rate": 4.199917479206932e-06, + "loss": 0.861, + "step": 110530 + }, + { + "epoch": 0.800162146119713, + "grad_norm": 0.15787632763385773, + "learning_rate": 4.199845092546346e-06, + "loss": 0.8679, + "step": 110540 + }, + { + "epoch": 0.8002345327802992, + "grad_norm": 0.21818390488624573, + "learning_rate": 4.19977270588576e-06, + "loss": 0.8813, + "step": 110550 + }, + { + "epoch": 0.8003069194408854, + "grad_norm": 0.16257944703102112, + "learning_rate": 4.199700319225173e-06, + "loss": 0.8811, + "step": 110560 + }, + { + "epoch": 0.8003793061014716, + "grad_norm": 0.16678005456924438, + "learning_rate": 4.199627932564587e-06, + "loss": 0.8873, + "step": 110570 + }, + { + "epoch": 0.8004516927620579, + "grad_norm": 0.15862931311130524, + "learning_rate": 4.199555545904001e-06, + "loss": 0.8825, + "step": 110580 + }, + { + "epoch": 0.800524079422644, + "grad_norm": 0.18531429767608643, + "learning_rate": 4.199483159243415e-06, + "loss": 0.8642, + "step": 110590 + }, + { + "epoch": 0.8005964660832302, + "grad_norm": 0.15238986909389496, + "learning_rate": 4.199410772582829e-06, + "loss": 0.882, + "step": 110600 + }, + { + "epoch": 0.8006688527438164, + "grad_norm": 0.15185806155204773, + "learning_rate": 4.199338385922242e-06, + "loss": 0.8812, + "step": 110610 + }, + { + "epoch": 0.8007412394044026, + "grad_norm": 0.14736582338809967, + "learning_rate": 4.199265999261657e-06, + "loss": 0.8764, + "step": 110620 + }, + { + "epoch": 0.8008136260649887, + "grad_norm": 0.191138356924057, + "learning_rate": 4.19919361260107e-06, + "loss": 0.8821, + "step": 110630 + }, + { + "epoch": 0.8008860127255749, + "grad_norm": 0.1546011120080948, + "learning_rate": 4.199121225940484e-06, + "loss": 0.8797, + "step": 110640 + }, + { + "epoch": 0.8009583993861611, + "grad_norm": 0.14093956351280212, + "learning_rate": 4.1990488392798976e-06, + "loss": 0.8744, + "step": 110650 + }, + { + "epoch": 0.8010307860467473, + "grad_norm": 0.14860482513904572, + "learning_rate": 4.198976452619312e-06, + "loss": 0.8811, + "step": 110660 + }, + { + "epoch": 0.8011031727073334, + "grad_norm": 0.15203125774860382, + "learning_rate": 4.198904065958726e-06, + "loss": 0.8726, + "step": 110670 + }, + { + "epoch": 0.8011755593679197, + "grad_norm": 0.1518326997756958, + "learning_rate": 4.198831679298139e-06, + "loss": 0.8747, + "step": 110680 + }, + { + "epoch": 0.8012479460285059, + "grad_norm": 0.16520649194717407, + "learning_rate": 4.198759292637553e-06, + "loss": 0.8794, + "step": 110690 + }, + { + "epoch": 0.8013203326890921, + "grad_norm": 0.16305768489837646, + "learning_rate": 4.198686905976967e-06, + "loss": 0.8866, + "step": 110700 + }, + { + "epoch": 0.8013927193496783, + "grad_norm": 0.15262234210968018, + "learning_rate": 4.198614519316381e-06, + "loss": 0.8865, + "step": 110710 + }, + { + "epoch": 0.8014651060102644, + "grad_norm": 0.163343608379364, + "learning_rate": 4.1985421326557946e-06, + "loss": 0.8781, + "step": 110720 + }, + { + "epoch": 0.8015374926708506, + "grad_norm": 0.15089716017246246, + "learning_rate": 4.198469745995208e-06, + "loss": 0.883, + "step": 110730 + }, + { + "epoch": 0.8016098793314368, + "grad_norm": 0.1560911238193512, + "learning_rate": 4.198397359334622e-06, + "loss": 0.8744, + "step": 110740 + }, + { + "epoch": 0.801682265992023, + "grad_norm": 1.2095776796340942, + "learning_rate": 4.198324972674036e-06, + "loss": 0.8845, + "step": 110750 + }, + { + "epoch": 0.8017546526526091, + "grad_norm": 0.15118081867694855, + "learning_rate": 4.19825258601345e-06, + "loss": 0.8761, + "step": 110760 + }, + { + "epoch": 0.8018270393131953, + "grad_norm": 0.15144529938697815, + "learning_rate": 4.1981801993528635e-06, + "loss": 0.879, + "step": 110770 + }, + { + "epoch": 0.8018994259737815, + "grad_norm": 0.16330844163894653, + "learning_rate": 4.198107812692277e-06, + "loss": 0.8809, + "step": 110780 + }, + { + "epoch": 0.8019718126343678, + "grad_norm": 0.14975833892822266, + "learning_rate": 4.198035426031692e-06, + "loss": 0.8788, + "step": 110790 + }, + { + "epoch": 0.802044199294954, + "grad_norm": 0.15585237741470337, + "learning_rate": 4.197963039371105e-06, + "loss": 0.882, + "step": 110800 + }, + { + "epoch": 0.8021165859555401, + "grad_norm": 0.14996300637722015, + "learning_rate": 4.197890652710519e-06, + "loss": 0.8803, + "step": 110810 + }, + { + "epoch": 0.8021889726161263, + "grad_norm": 0.1632111817598343, + "learning_rate": 4.1978182660499324e-06, + "loss": 0.8709, + "step": 110820 + }, + { + "epoch": 0.8022613592767125, + "grad_norm": 0.1543271541595459, + "learning_rate": 4.197745879389347e-06, + "loss": 0.8823, + "step": 110830 + }, + { + "epoch": 0.8023337459372987, + "grad_norm": 0.25150734186172485, + "learning_rate": 4.1976734927287605e-06, + "loss": 0.8912, + "step": 110840 + }, + { + "epoch": 0.8024061325978848, + "grad_norm": 0.14821644127368927, + "learning_rate": 4.197601106068174e-06, + "loss": 0.8951, + "step": 110850 + }, + { + "epoch": 0.802478519258471, + "grad_norm": 0.5984487533569336, + "learning_rate": 4.197528719407588e-06, + "loss": 0.8829, + "step": 110860 + }, + { + "epoch": 0.8025509059190572, + "grad_norm": 0.1596580147743225, + "learning_rate": 4.197456332747002e-06, + "loss": 0.8795, + "step": 110870 + }, + { + "epoch": 0.8026232925796434, + "grad_norm": 0.1528296023607254, + "learning_rate": 4.197383946086416e-06, + "loss": 0.8802, + "step": 110880 + }, + { + "epoch": 0.8026956792402297, + "grad_norm": 0.15530245006084442, + "learning_rate": 4.1973115594258294e-06, + "loss": 0.8859, + "step": 110890 + }, + { + "epoch": 0.8027680659008158, + "grad_norm": 0.1550951898097992, + "learning_rate": 4.197239172765243e-06, + "loss": 0.8788, + "step": 110900 + }, + { + "epoch": 0.802840452561402, + "grad_norm": 0.17830154299736023, + "learning_rate": 4.1971667861046575e-06, + "loss": 0.8782, + "step": 110910 + }, + { + "epoch": 0.8029128392219882, + "grad_norm": 0.1467135101556778, + "learning_rate": 4.197094399444071e-06, + "loss": 0.8776, + "step": 110920 + }, + { + "epoch": 0.8029852258825744, + "grad_norm": 0.1502387672662735, + "learning_rate": 4.197022012783485e-06, + "loss": 0.8782, + "step": 110930 + }, + { + "epoch": 0.8030576125431605, + "grad_norm": 0.14576847851276398, + "learning_rate": 4.196949626122898e-06, + "loss": 0.8663, + "step": 110940 + }, + { + "epoch": 0.8031299992037467, + "grad_norm": 0.14512509107589722, + "learning_rate": 4.196877239462312e-06, + "loss": 0.878, + "step": 110950 + }, + { + "epoch": 0.8032023858643329, + "grad_norm": 0.1578640192747116, + "learning_rate": 4.196804852801726e-06, + "loss": 0.8812, + "step": 110960 + }, + { + "epoch": 0.8032747725249191, + "grad_norm": 0.1540568768978119, + "learning_rate": 4.196732466141139e-06, + "loss": 0.8948, + "step": 110970 + }, + { + "epoch": 0.8033471591855053, + "grad_norm": 0.16598433256149292, + "learning_rate": 4.196660079480554e-06, + "loss": 0.8928, + "step": 110980 + }, + { + "epoch": 0.8034195458460914, + "grad_norm": 0.15447190403938293, + "learning_rate": 4.196587692819967e-06, + "loss": 0.8822, + "step": 110990 + }, + { + "epoch": 0.8034919325066777, + "grad_norm": 0.15389618277549744, + "learning_rate": 4.196515306159381e-06, + "loss": 0.872, + "step": 111000 + }, + { + "epoch": 0.8035643191672639, + "grad_norm": 0.1530335396528244, + "learning_rate": 4.1964429194987945e-06, + "loss": 0.871, + "step": 111010 + }, + { + "epoch": 0.8036367058278501, + "grad_norm": 0.15146398544311523, + "learning_rate": 4.196370532838209e-06, + "loss": 0.8834, + "step": 111020 + }, + { + "epoch": 0.8037090924884362, + "grad_norm": 0.15704941749572754, + "learning_rate": 4.196298146177623e-06, + "loss": 0.9041, + "step": 111030 + }, + { + "epoch": 0.8037814791490224, + "grad_norm": 0.15479826927185059, + "learning_rate": 4.196225759517036e-06, + "loss": 0.8853, + "step": 111040 + }, + { + "epoch": 0.8038538658096086, + "grad_norm": 0.1650967299938202, + "learning_rate": 4.19615337285645e-06, + "loss": 0.8816, + "step": 111050 + }, + { + "epoch": 0.8039262524701948, + "grad_norm": 0.15710927546024323, + "learning_rate": 4.196080986195864e-06, + "loss": 0.8873, + "step": 111060 + }, + { + "epoch": 0.803998639130781, + "grad_norm": 0.15104107558727264, + "learning_rate": 4.196008599535278e-06, + "loss": 0.8729, + "step": 111070 + }, + { + "epoch": 0.8040710257913671, + "grad_norm": 0.15065807104110718, + "learning_rate": 4.1959362128746915e-06, + "loss": 0.8849, + "step": 111080 + }, + { + "epoch": 0.8041434124519533, + "grad_norm": 0.16708062589168549, + "learning_rate": 4.195863826214105e-06, + "loss": 0.8839, + "step": 111090 + }, + { + "epoch": 0.8042157991125395, + "grad_norm": 0.155837744474411, + "learning_rate": 4.19579143955352e-06, + "loss": 0.8737, + "step": 111100 + }, + { + "epoch": 0.8042881857731258, + "grad_norm": 0.1683686077594757, + "learning_rate": 4.195719052892933e-06, + "loss": 0.8767, + "step": 111110 + }, + { + "epoch": 0.804360572433712, + "grad_norm": 0.15654446184635162, + "learning_rate": 4.195646666232347e-06, + "loss": 0.8707, + "step": 111120 + }, + { + "epoch": 0.8044329590942981, + "grad_norm": 0.15169669687747955, + "learning_rate": 4.1955742795717605e-06, + "loss": 0.883, + "step": 111130 + }, + { + "epoch": 0.8045053457548843, + "grad_norm": 0.15075841546058655, + "learning_rate": 4.195501892911175e-06, + "loss": 0.8914, + "step": 111140 + }, + { + "epoch": 0.8045777324154705, + "grad_norm": 0.16259559988975525, + "learning_rate": 4.1954295062505885e-06, + "loss": 0.8853, + "step": 111150 + }, + { + "epoch": 0.8046501190760567, + "grad_norm": 0.2971009910106659, + "learning_rate": 4.195357119590002e-06, + "loss": 0.8743, + "step": 111160 + }, + { + "epoch": 0.8047225057366428, + "grad_norm": 0.17599447071552277, + "learning_rate": 4.195284732929416e-06, + "loss": 0.8837, + "step": 111170 + }, + { + "epoch": 0.804794892397229, + "grad_norm": 0.1559291034936905, + "learning_rate": 4.19521234626883e-06, + "loss": 0.8789, + "step": 111180 + }, + { + "epoch": 0.8048672790578152, + "grad_norm": 0.14445562660694122, + "learning_rate": 4.195139959608244e-06, + "loss": 0.8738, + "step": 111190 + }, + { + "epoch": 0.8049396657184014, + "grad_norm": 0.1521482765674591, + "learning_rate": 4.1950675729476575e-06, + "loss": 0.8897, + "step": 111200 + }, + { + "epoch": 0.8050120523789877, + "grad_norm": 0.15571485459804535, + "learning_rate": 4.194995186287071e-06, + "loss": 0.8981, + "step": 111210 + }, + { + "epoch": 0.8050844390395738, + "grad_norm": 0.1559680998325348, + "learning_rate": 4.1949227996264855e-06, + "loss": 0.89, + "step": 111220 + }, + { + "epoch": 0.80515682570016, + "grad_norm": 0.1483583301305771, + "learning_rate": 4.194850412965899e-06, + "loss": 0.8838, + "step": 111230 + }, + { + "epoch": 0.8052292123607462, + "grad_norm": 0.15938538312911987, + "learning_rate": 4.194778026305313e-06, + "loss": 0.8759, + "step": 111240 + }, + { + "epoch": 0.8053015990213324, + "grad_norm": 0.15668749809265137, + "learning_rate": 4.194705639644726e-06, + "loss": 0.8753, + "step": 111250 + }, + { + "epoch": 0.8053739856819185, + "grad_norm": 0.1507354974746704, + "learning_rate": 4.194633252984141e-06, + "loss": 0.8911, + "step": 111260 + }, + { + "epoch": 0.8054463723425047, + "grad_norm": 0.15574651956558228, + "learning_rate": 4.1945608663235545e-06, + "loss": 0.879, + "step": 111270 + }, + { + "epoch": 0.8055187590030909, + "grad_norm": 0.2068503350019455, + "learning_rate": 4.194488479662968e-06, + "loss": 0.8897, + "step": 111280 + }, + { + "epoch": 0.8055911456636771, + "grad_norm": 0.1596960425376892, + "learning_rate": 4.194416093002382e-06, + "loss": 0.8782, + "step": 111290 + }, + { + "epoch": 0.8056635323242632, + "grad_norm": 0.1368640810251236, + "learning_rate": 4.194343706341796e-06, + "loss": 0.8767, + "step": 111300 + }, + { + "epoch": 0.8057359189848494, + "grad_norm": 0.18080635368824005, + "learning_rate": 4.19427131968121e-06, + "loss": 0.8798, + "step": 111310 + }, + { + "epoch": 0.8058083056454357, + "grad_norm": 0.1573970466852188, + "learning_rate": 4.194198933020623e-06, + "loss": 0.8708, + "step": 111320 + }, + { + "epoch": 0.8058806923060219, + "grad_norm": 0.1502685248851776, + "learning_rate": 4.194126546360037e-06, + "loss": 0.8842, + "step": 111330 + }, + { + "epoch": 0.8059530789666081, + "grad_norm": 0.15076951682567596, + "learning_rate": 4.1940541596994515e-06, + "loss": 0.8821, + "step": 111340 + }, + { + "epoch": 0.8060254656271942, + "grad_norm": 0.15020160377025604, + "learning_rate": 4.193981773038865e-06, + "loss": 0.886, + "step": 111350 + }, + { + "epoch": 0.8060978522877804, + "grad_norm": 0.356048047542572, + "learning_rate": 4.193909386378279e-06, + "loss": 0.8936, + "step": 111360 + }, + { + "epoch": 0.8061702389483666, + "grad_norm": 0.15093164145946503, + "learning_rate": 4.193836999717692e-06, + "loss": 0.8714, + "step": 111370 + }, + { + "epoch": 0.8062426256089528, + "grad_norm": 0.16395637392997742, + "learning_rate": 4.193764613057106e-06, + "loss": 0.876, + "step": 111380 + }, + { + "epoch": 0.8063150122695389, + "grad_norm": 0.15686947107315063, + "learning_rate": 4.19369222639652e-06, + "loss": 0.8807, + "step": 111390 + }, + { + "epoch": 0.8063873989301251, + "grad_norm": 0.15679305791854858, + "learning_rate": 4.193619839735934e-06, + "loss": 0.8945, + "step": 111400 + }, + { + "epoch": 0.8064597855907113, + "grad_norm": 0.15202337503433228, + "learning_rate": 4.193547453075348e-06, + "loss": 0.8781, + "step": 111410 + }, + { + "epoch": 0.8065321722512976, + "grad_norm": 0.1549692004919052, + "learning_rate": 4.193475066414761e-06, + "loss": 0.8694, + "step": 111420 + }, + { + "epoch": 0.8066045589118838, + "grad_norm": 0.15202471613883972, + "learning_rate": 4.193402679754176e-06, + "loss": 0.8718, + "step": 111430 + }, + { + "epoch": 0.8066769455724699, + "grad_norm": 0.15716615319252014, + "learning_rate": 4.193330293093589e-06, + "loss": 0.888, + "step": 111440 + }, + { + "epoch": 0.8067493322330561, + "grad_norm": 2.5106468200683594, + "learning_rate": 4.193257906433003e-06, + "loss": 0.87, + "step": 111450 + }, + { + "epoch": 0.8068217188936423, + "grad_norm": 0.15499623119831085, + "learning_rate": 4.1931855197724166e-06, + "loss": 0.8882, + "step": 111460 + }, + { + "epoch": 0.8068941055542285, + "grad_norm": 0.14410343766212463, + "learning_rate": 4.193113133111831e-06, + "loss": 0.8796, + "step": 111470 + }, + { + "epoch": 0.8069664922148146, + "grad_norm": 0.14761362969875336, + "learning_rate": 4.193040746451244e-06, + "loss": 0.8715, + "step": 111480 + }, + { + "epoch": 0.8070388788754008, + "grad_norm": 0.1514003425836563, + "learning_rate": 4.192968359790658e-06, + "loss": 0.8761, + "step": 111490 + }, + { + "epoch": 0.807111265535987, + "grad_norm": 0.15471474826335907, + "learning_rate": 4.192895973130072e-06, + "loss": 0.877, + "step": 111500 + }, + { + "epoch": 0.8071836521965732, + "grad_norm": 0.17473427951335907, + "learning_rate": 4.1928235864694855e-06, + "loss": 0.8767, + "step": 111510 + }, + { + "epoch": 0.8072560388571594, + "grad_norm": 0.14548401534557343, + "learning_rate": 4.192751199808899e-06, + "loss": 0.8961, + "step": 111520 + }, + { + "epoch": 0.8073284255177456, + "grad_norm": 0.15971076488494873, + "learning_rate": 4.192678813148313e-06, + "loss": 0.8899, + "step": 111530 + }, + { + "epoch": 0.8074008121783318, + "grad_norm": 0.16011503338813782, + "learning_rate": 4.192606426487727e-06, + "loss": 0.8803, + "step": 111540 + }, + { + "epoch": 0.807473198838918, + "grad_norm": 0.15592749416828156, + "learning_rate": 4.192534039827141e-06, + "loss": 0.878, + "step": 111550 + }, + { + "epoch": 0.8075455854995042, + "grad_norm": 0.14613130688667297, + "learning_rate": 4.1924616531665544e-06, + "loss": 0.8678, + "step": 111560 + }, + { + "epoch": 0.8076179721600903, + "grad_norm": 0.15929226577281952, + "learning_rate": 4.192389266505968e-06, + "loss": 0.8775, + "step": 111570 + }, + { + "epoch": 0.8076903588206765, + "grad_norm": 0.1534443497657776, + "learning_rate": 4.1923168798453825e-06, + "loss": 0.8827, + "step": 111580 + }, + { + "epoch": 0.8077627454812627, + "grad_norm": 0.145346000790596, + "learning_rate": 4.192244493184796e-06, + "loss": 0.8658, + "step": 111590 + }, + { + "epoch": 0.8078351321418489, + "grad_norm": 0.14903073012828827, + "learning_rate": 4.19217210652421e-06, + "loss": 0.8829, + "step": 111600 + }, + { + "epoch": 0.807907518802435, + "grad_norm": 0.14224445819854736, + "learning_rate": 4.192099719863623e-06, + "loss": 0.8843, + "step": 111610 + }, + { + "epoch": 0.8079799054630212, + "grad_norm": 0.3385683596134186, + "learning_rate": 4.192027333203038e-06, + "loss": 0.8851, + "step": 111620 + }, + { + "epoch": 0.8080522921236074, + "grad_norm": 0.14634902775287628, + "learning_rate": 4.1919549465424514e-06, + "loss": 0.8848, + "step": 111630 + }, + { + "epoch": 0.8081246787841937, + "grad_norm": 0.16326604783535004, + "learning_rate": 4.191882559881865e-06, + "loss": 0.8758, + "step": 111640 + }, + { + "epoch": 0.8081970654447799, + "grad_norm": 0.15901891887187958, + "learning_rate": 4.191810173221279e-06, + "loss": 0.8679, + "step": 111650 + }, + { + "epoch": 0.808269452105366, + "grad_norm": 0.15597860515117645, + "learning_rate": 4.191737786560693e-06, + "loss": 0.8848, + "step": 111660 + }, + { + "epoch": 0.8083418387659522, + "grad_norm": 0.15394975244998932, + "learning_rate": 4.191665399900107e-06, + "loss": 0.8767, + "step": 111670 + }, + { + "epoch": 0.8084142254265384, + "grad_norm": 0.1595843881368637, + "learning_rate": 4.19159301323952e-06, + "loss": 0.8717, + "step": 111680 + }, + { + "epoch": 0.8084866120871246, + "grad_norm": 0.15284503996372223, + "learning_rate": 4.191520626578934e-06, + "loss": 0.8774, + "step": 111690 + }, + { + "epoch": 0.8085589987477108, + "grad_norm": 0.16651558876037598, + "learning_rate": 4.1914482399183484e-06, + "loss": 0.8686, + "step": 111700 + }, + { + "epoch": 0.8086313854082969, + "grad_norm": 0.16672812402248383, + "learning_rate": 4.191375853257762e-06, + "loss": 0.8755, + "step": 111710 + }, + { + "epoch": 0.8087037720688831, + "grad_norm": 0.1647217869758606, + "learning_rate": 4.191303466597176e-06, + "loss": 0.8887, + "step": 111720 + }, + { + "epoch": 0.8087761587294693, + "grad_norm": 0.16164937615394592, + "learning_rate": 4.191231079936589e-06, + "loss": 0.8733, + "step": 111730 + }, + { + "epoch": 0.8088485453900556, + "grad_norm": 0.1532217264175415, + "learning_rate": 4.191158693276004e-06, + "loss": 0.877, + "step": 111740 + }, + { + "epoch": 0.8089209320506417, + "grad_norm": 0.16159088909626007, + "learning_rate": 4.191086306615417e-06, + "loss": 0.8977, + "step": 111750 + }, + { + "epoch": 0.8089933187112279, + "grad_norm": 0.1658451408147812, + "learning_rate": 4.191013919954831e-06, + "loss": 0.8906, + "step": 111760 + }, + { + "epoch": 0.8090657053718141, + "grad_norm": 0.43993157148361206, + "learning_rate": 4.190941533294245e-06, + "loss": 0.8782, + "step": 111770 + }, + { + "epoch": 0.8091380920324003, + "grad_norm": 0.1686534732580185, + "learning_rate": 4.190869146633659e-06, + "loss": 0.8833, + "step": 111780 + }, + { + "epoch": 0.8092104786929865, + "grad_norm": 0.16583459079265594, + "learning_rate": 4.190796759973073e-06, + "loss": 0.8893, + "step": 111790 + }, + { + "epoch": 0.8092828653535726, + "grad_norm": 0.21636223793029785, + "learning_rate": 4.190724373312486e-06, + "loss": 0.89, + "step": 111800 + }, + { + "epoch": 0.8093552520141588, + "grad_norm": 0.22253377735614777, + "learning_rate": 4.1906519866519e-06, + "loss": 0.8847, + "step": 111810 + }, + { + "epoch": 0.809427638674745, + "grad_norm": 0.15331260859966278, + "learning_rate": 4.190579599991314e-06, + "loss": 0.875, + "step": 111820 + }, + { + "epoch": 0.8095000253353312, + "grad_norm": 0.1543157696723938, + "learning_rate": 4.190507213330728e-06, + "loss": 0.8843, + "step": 111830 + }, + { + "epoch": 0.8095724119959173, + "grad_norm": 0.17324155569076538, + "learning_rate": 4.190434826670142e-06, + "loss": 0.8831, + "step": 111840 + }, + { + "epoch": 0.8096447986565036, + "grad_norm": 0.16567941009998322, + "learning_rate": 4.190362440009555e-06, + "loss": 0.8866, + "step": 111850 + }, + { + "epoch": 0.8097171853170898, + "grad_norm": 0.15467321872711182, + "learning_rate": 4.19029005334897e-06, + "loss": 0.8789, + "step": 111860 + }, + { + "epoch": 0.809789571977676, + "grad_norm": 0.1545843631029129, + "learning_rate": 4.190217666688383e-06, + "loss": 0.8849, + "step": 111870 + }, + { + "epoch": 0.8098619586382622, + "grad_norm": 0.15270915627479553, + "learning_rate": 4.190145280027797e-06, + "loss": 0.8768, + "step": 111880 + }, + { + "epoch": 0.8099343452988483, + "grad_norm": 0.14915820956230164, + "learning_rate": 4.1900728933672105e-06, + "loss": 0.878, + "step": 111890 + }, + { + "epoch": 0.8100067319594345, + "grad_norm": 0.15363983809947968, + "learning_rate": 4.190000506706625e-06, + "loss": 0.8841, + "step": 111900 + }, + { + "epoch": 0.8100791186200207, + "grad_norm": 0.1456158310174942, + "learning_rate": 4.189928120046039e-06, + "loss": 0.8803, + "step": 111910 + }, + { + "epoch": 0.8101515052806069, + "grad_norm": 0.16374152898788452, + "learning_rate": 4.189855733385452e-06, + "loss": 0.8793, + "step": 111920 + }, + { + "epoch": 0.810223891941193, + "grad_norm": 0.15842416882514954, + "learning_rate": 4.189783346724866e-06, + "loss": 0.8783, + "step": 111930 + }, + { + "epoch": 0.8102962786017792, + "grad_norm": 0.15646661818027496, + "learning_rate": 4.18971096006428e-06, + "loss": 0.8745, + "step": 111940 + }, + { + "epoch": 0.8103686652623655, + "grad_norm": 0.16457587480545044, + "learning_rate": 4.189638573403694e-06, + "loss": 0.8782, + "step": 111950 + }, + { + "epoch": 0.8104410519229517, + "grad_norm": 0.14522863924503326, + "learning_rate": 4.1895661867431075e-06, + "loss": 0.874, + "step": 111960 + }, + { + "epoch": 0.8105134385835379, + "grad_norm": 0.15910083055496216, + "learning_rate": 4.189493800082521e-06, + "loss": 0.8912, + "step": 111970 + }, + { + "epoch": 0.810585825244124, + "grad_norm": 0.164401113986969, + "learning_rate": 4.189421413421935e-06, + "loss": 0.8799, + "step": 111980 + }, + { + "epoch": 0.8106582119047102, + "grad_norm": 0.15682414174079895, + "learning_rate": 4.189349026761349e-06, + "loss": 0.8782, + "step": 111990 + }, + { + "epoch": 0.8107305985652964, + "grad_norm": 0.17602966725826263, + "learning_rate": 4.189276640100763e-06, + "loss": 0.8776, + "step": 112000 + }, + { + "epoch": 0.8108029852258826, + "grad_norm": 0.15074023604393005, + "learning_rate": 4.1892042534401765e-06, + "loss": 0.8892, + "step": 112010 + }, + { + "epoch": 0.8108753718864687, + "grad_norm": 0.1462225317955017, + "learning_rate": 4.18913186677959e-06, + "loss": 0.8541, + "step": 112020 + }, + { + "epoch": 0.8109477585470549, + "grad_norm": 0.15102677047252655, + "learning_rate": 4.189059480119004e-06, + "loss": 0.8776, + "step": 112030 + }, + { + "epoch": 0.8110201452076411, + "grad_norm": 0.40401482582092285, + "learning_rate": 4.188987093458417e-06, + "loss": 0.8831, + "step": 112040 + }, + { + "epoch": 0.8110925318682273, + "grad_norm": 0.3952445983886719, + "learning_rate": 4.188914706797832e-06, + "loss": 0.878, + "step": 112050 + }, + { + "epoch": 0.8111649185288136, + "grad_norm": 0.15976792573928833, + "learning_rate": 4.188842320137245e-06, + "loss": 0.8804, + "step": 112060 + }, + { + "epoch": 0.8112373051893997, + "grad_norm": 0.14807958900928497, + "learning_rate": 4.188769933476659e-06, + "loss": 0.8783, + "step": 112070 + }, + { + "epoch": 0.8113096918499859, + "grad_norm": 0.15941010415554047, + "learning_rate": 4.188697546816073e-06, + "loss": 0.8738, + "step": 112080 + }, + { + "epoch": 0.8113820785105721, + "grad_norm": 0.14998309314250946, + "learning_rate": 4.188625160155487e-06, + "loss": 0.8743, + "step": 112090 + }, + { + "epoch": 0.8114544651711583, + "grad_norm": 0.15989407896995544, + "learning_rate": 4.188552773494901e-06, + "loss": 0.8853, + "step": 112100 + }, + { + "epoch": 0.8115268518317444, + "grad_norm": 0.15547819435596466, + "learning_rate": 4.188480386834314e-06, + "loss": 0.8755, + "step": 112110 + }, + { + "epoch": 0.8115992384923306, + "grad_norm": 0.15554924309253693, + "learning_rate": 4.188408000173728e-06, + "loss": 0.8842, + "step": 112120 + }, + { + "epoch": 0.8116716251529168, + "grad_norm": 0.15123188495635986, + "learning_rate": 4.188335613513142e-06, + "loss": 0.8748, + "step": 112130 + }, + { + "epoch": 0.811744011813503, + "grad_norm": 0.16083499789237976, + "learning_rate": 4.188263226852556e-06, + "loss": 0.8765, + "step": 112140 + }, + { + "epoch": 0.8118163984740892, + "grad_norm": 0.14771822094917297, + "learning_rate": 4.18819084019197e-06, + "loss": 0.875, + "step": 112150 + }, + { + "epoch": 0.8118887851346753, + "grad_norm": 0.1459280550479889, + "learning_rate": 4.188118453531383e-06, + "loss": 0.8924, + "step": 112160 + }, + { + "epoch": 0.8119611717952616, + "grad_norm": 0.1660243421792984, + "learning_rate": 4.188046066870797e-06, + "loss": 0.8802, + "step": 112170 + }, + { + "epoch": 0.8120335584558478, + "grad_norm": 0.17147773504257202, + "learning_rate": 4.187973680210211e-06, + "loss": 0.883, + "step": 112180 + }, + { + "epoch": 0.812105945116434, + "grad_norm": 0.16224630177021027, + "learning_rate": 4.187901293549625e-06, + "loss": 0.8721, + "step": 112190 + }, + { + "epoch": 0.8121783317770201, + "grad_norm": 0.1489465981721878, + "learning_rate": 4.1878289068890386e-06, + "loss": 0.8739, + "step": 112200 + }, + { + "epoch": 0.8122507184376063, + "grad_norm": 0.14807015657424927, + "learning_rate": 4.187756520228452e-06, + "loss": 0.8842, + "step": 112210 + }, + { + "epoch": 0.8123231050981925, + "grad_norm": 0.15409156680107117, + "learning_rate": 4.187684133567867e-06, + "loss": 0.8738, + "step": 112220 + }, + { + "epoch": 0.8123954917587787, + "grad_norm": 0.16499954462051392, + "learning_rate": 4.18761174690728e-06, + "loss": 0.8848, + "step": 112230 + }, + { + "epoch": 0.8124678784193649, + "grad_norm": 0.15704099833965302, + "learning_rate": 4.187539360246694e-06, + "loss": 0.8767, + "step": 112240 + }, + { + "epoch": 0.812540265079951, + "grad_norm": 0.1506892442703247, + "learning_rate": 4.1874669735861075e-06, + "loss": 0.8802, + "step": 112250 + }, + { + "epoch": 0.8126126517405372, + "grad_norm": 0.1446923166513443, + "learning_rate": 4.187394586925522e-06, + "loss": 0.8753, + "step": 112260 + }, + { + "epoch": 0.8126850384011235, + "grad_norm": 0.16159547865390778, + "learning_rate": 4.187322200264936e-06, + "loss": 0.8751, + "step": 112270 + }, + { + "epoch": 0.8127574250617097, + "grad_norm": 0.17383888363838196, + "learning_rate": 4.187249813604349e-06, + "loss": 0.8793, + "step": 112280 + }, + { + "epoch": 0.8128298117222958, + "grad_norm": 0.1629524528980255, + "learning_rate": 4.187177426943763e-06, + "loss": 0.8804, + "step": 112290 + }, + { + "epoch": 0.812902198382882, + "grad_norm": 0.17006246745586395, + "learning_rate": 4.187105040283177e-06, + "loss": 0.8731, + "step": 112300 + }, + { + "epoch": 0.8129745850434682, + "grad_norm": 0.16095757484436035, + "learning_rate": 4.187032653622591e-06, + "loss": 0.8835, + "step": 112310 + }, + { + "epoch": 0.8130469717040544, + "grad_norm": 0.16775591671466827, + "learning_rate": 4.1869602669620045e-06, + "loss": 0.8859, + "step": 112320 + }, + { + "epoch": 0.8131193583646406, + "grad_norm": 0.16004826128482819, + "learning_rate": 4.186887880301418e-06, + "loss": 0.865, + "step": 112330 + }, + { + "epoch": 0.8131917450252267, + "grad_norm": 0.15483014285564423, + "learning_rate": 4.186815493640833e-06, + "loss": 0.8848, + "step": 112340 + }, + { + "epoch": 0.8132641316858129, + "grad_norm": 0.16190212965011597, + "learning_rate": 4.186743106980246e-06, + "loss": 0.8848, + "step": 112350 + }, + { + "epoch": 0.8133365183463991, + "grad_norm": 0.14579403400421143, + "learning_rate": 4.18667072031966e-06, + "loss": 0.8696, + "step": 112360 + }, + { + "epoch": 0.8134089050069853, + "grad_norm": 0.16750074923038483, + "learning_rate": 4.1865983336590734e-06, + "loss": 0.8826, + "step": 112370 + }, + { + "epoch": 0.8134812916675715, + "grad_norm": 0.1532454639673233, + "learning_rate": 4.186525946998488e-06, + "loss": 0.8722, + "step": 112380 + }, + { + "epoch": 0.8135536783281577, + "grad_norm": 0.15037626028060913, + "learning_rate": 4.1864535603379015e-06, + "loss": 0.8787, + "step": 112390 + }, + { + "epoch": 0.8136260649887439, + "grad_norm": 0.16376477479934692, + "learning_rate": 4.186381173677315e-06, + "loss": 0.8689, + "step": 112400 + }, + { + "epoch": 0.8136984516493301, + "grad_norm": 0.15625233948230743, + "learning_rate": 4.186308787016729e-06, + "loss": 0.8647, + "step": 112410 + }, + { + "epoch": 0.8137708383099163, + "grad_norm": 0.1553266942501068, + "learning_rate": 4.186236400356143e-06, + "loss": 0.8792, + "step": 112420 + }, + { + "epoch": 0.8138432249705024, + "grad_norm": 0.1567511111497879, + "learning_rate": 4.186164013695557e-06, + "loss": 0.8802, + "step": 112430 + }, + { + "epoch": 0.8139156116310886, + "grad_norm": 0.15767507255077362, + "learning_rate": 4.1860916270349704e-06, + "loss": 0.8783, + "step": 112440 + }, + { + "epoch": 0.8139879982916748, + "grad_norm": 0.1529453545808792, + "learning_rate": 4.186019240374384e-06, + "loss": 0.8856, + "step": 112450 + }, + { + "epoch": 0.814060384952261, + "grad_norm": 0.14311917126178741, + "learning_rate": 4.1859468537137985e-06, + "loss": 0.8824, + "step": 112460 + }, + { + "epoch": 0.8141327716128471, + "grad_norm": 0.1506299078464508, + "learning_rate": 4.185874467053212e-06, + "loss": 0.8952, + "step": 112470 + }, + { + "epoch": 0.8142051582734334, + "grad_norm": 0.14917585253715515, + "learning_rate": 4.185802080392626e-06, + "loss": 0.8784, + "step": 112480 + }, + { + "epoch": 0.8142775449340196, + "grad_norm": 0.16028118133544922, + "learning_rate": 4.185729693732039e-06, + "loss": 0.8708, + "step": 112490 + }, + { + "epoch": 0.8143499315946058, + "grad_norm": 0.18150749802589417, + "learning_rate": 4.185657307071454e-06, + "loss": 0.8822, + "step": 112500 + }, + { + "epoch": 0.814422318255192, + "grad_norm": 0.1553160399198532, + "learning_rate": 4.1855849204108674e-06, + "loss": 0.8889, + "step": 112510 + }, + { + "epoch": 0.8144947049157781, + "grad_norm": 0.15606872737407684, + "learning_rate": 4.185512533750281e-06, + "loss": 0.8843, + "step": 112520 + }, + { + "epoch": 0.8145670915763643, + "grad_norm": 0.1746978759765625, + "learning_rate": 4.185440147089695e-06, + "loss": 0.8768, + "step": 112530 + }, + { + "epoch": 0.8146394782369505, + "grad_norm": 0.20451655983924866, + "learning_rate": 4.185367760429108e-06, + "loss": 0.8694, + "step": 112540 + }, + { + "epoch": 0.8147118648975367, + "grad_norm": 0.15979830920696259, + "learning_rate": 4.185295373768522e-06, + "loss": 0.8778, + "step": 112550 + }, + { + "epoch": 0.8147842515581228, + "grad_norm": 0.15347421169281006, + "learning_rate": 4.1852229871079355e-06, + "loss": 0.8757, + "step": 112560 + }, + { + "epoch": 0.814856638218709, + "grad_norm": 0.1704648733139038, + "learning_rate": 4.18515060044735e-06, + "loss": 0.8759, + "step": 112570 + }, + { + "epoch": 0.8149290248792952, + "grad_norm": 0.14998449385166168, + "learning_rate": 4.185078213786764e-06, + "loss": 0.8819, + "step": 112580 + }, + { + "epoch": 0.8150014115398815, + "grad_norm": 0.1555691659450531, + "learning_rate": 4.185005827126177e-06, + "loss": 0.8858, + "step": 112590 + }, + { + "epoch": 0.8150737982004677, + "grad_norm": 0.15047410130500793, + "learning_rate": 4.184933440465591e-06, + "loss": 0.8789, + "step": 112600 + }, + { + "epoch": 0.8151461848610538, + "grad_norm": 0.14893095195293427, + "learning_rate": 4.184861053805005e-06, + "loss": 0.8807, + "step": 112610 + }, + { + "epoch": 0.81521857152164, + "grad_norm": 0.14945295453071594, + "learning_rate": 4.184788667144419e-06, + "loss": 0.8863, + "step": 112620 + }, + { + "epoch": 0.8152909581822262, + "grad_norm": 0.1564187854528427, + "learning_rate": 4.1847162804838325e-06, + "loss": 0.8864, + "step": 112630 + }, + { + "epoch": 0.8153633448428124, + "grad_norm": 0.15783673524856567, + "learning_rate": 4.184643893823246e-06, + "loss": 0.8753, + "step": 112640 + }, + { + "epoch": 0.8154357315033985, + "grad_norm": 0.16287674009799957, + "learning_rate": 4.184571507162661e-06, + "loss": 0.8739, + "step": 112650 + }, + { + "epoch": 0.8155081181639847, + "grad_norm": 0.1520308554172516, + "learning_rate": 4.184499120502074e-06, + "loss": 0.8993, + "step": 112660 + }, + { + "epoch": 0.8155805048245709, + "grad_norm": 0.14746682345867157, + "learning_rate": 4.184426733841488e-06, + "loss": 0.8857, + "step": 112670 + }, + { + "epoch": 0.8156528914851571, + "grad_norm": 0.15371263027191162, + "learning_rate": 4.1843543471809015e-06, + "loss": 0.869, + "step": 112680 + }, + { + "epoch": 0.8157252781457432, + "grad_norm": 0.1501394808292389, + "learning_rate": 4.184281960520316e-06, + "loss": 0.8704, + "step": 112690 + }, + { + "epoch": 0.8157976648063295, + "grad_norm": 0.18657702207565308, + "learning_rate": 4.1842095738597295e-06, + "loss": 0.8919, + "step": 112700 + }, + { + "epoch": 0.8158700514669157, + "grad_norm": 0.15628504753112793, + "learning_rate": 4.184137187199143e-06, + "loss": 0.8848, + "step": 112710 + }, + { + "epoch": 0.8159424381275019, + "grad_norm": 0.14718109369277954, + "learning_rate": 4.184064800538557e-06, + "loss": 0.8754, + "step": 112720 + }, + { + "epoch": 0.8160148247880881, + "grad_norm": 0.15502269566059113, + "learning_rate": 4.183992413877971e-06, + "loss": 0.8684, + "step": 112730 + }, + { + "epoch": 0.8160872114486742, + "grad_norm": 0.15216968953609467, + "learning_rate": 4.183920027217385e-06, + "loss": 0.8889, + "step": 112740 + }, + { + "epoch": 0.8161595981092604, + "grad_norm": 0.23924054205417633, + "learning_rate": 4.1838476405567985e-06, + "loss": 0.8764, + "step": 112750 + }, + { + "epoch": 0.8162319847698466, + "grad_norm": 0.1529170572757721, + "learning_rate": 4.183775253896212e-06, + "loss": 0.8778, + "step": 112760 + }, + { + "epoch": 0.8163043714304328, + "grad_norm": 0.15385037660598755, + "learning_rate": 4.1837028672356266e-06, + "loss": 0.8746, + "step": 112770 + }, + { + "epoch": 0.816376758091019, + "grad_norm": 0.15637922286987305, + "learning_rate": 4.18363048057504e-06, + "loss": 0.8715, + "step": 112780 + }, + { + "epoch": 0.8164491447516051, + "grad_norm": 0.1572360396385193, + "learning_rate": 4.183558093914454e-06, + "loss": 0.8725, + "step": 112790 + }, + { + "epoch": 0.8165215314121914, + "grad_norm": 0.14621710777282715, + "learning_rate": 4.183485707253867e-06, + "loss": 0.8683, + "step": 112800 + }, + { + "epoch": 0.8165939180727776, + "grad_norm": 0.15248066186904907, + "learning_rate": 4.183413320593281e-06, + "loss": 0.8836, + "step": 112810 + }, + { + "epoch": 0.8166663047333638, + "grad_norm": 0.16547314822673798, + "learning_rate": 4.1833409339326955e-06, + "loss": 0.8836, + "step": 112820 + }, + { + "epoch": 0.81673869139395, + "grad_norm": 0.15647459030151367, + "learning_rate": 4.183268547272109e-06, + "loss": 0.88, + "step": 112830 + }, + { + "epoch": 0.8168110780545361, + "grad_norm": 0.16002123057842255, + "learning_rate": 4.183196160611523e-06, + "loss": 0.887, + "step": 112840 + }, + { + "epoch": 0.8168834647151223, + "grad_norm": 0.14930887520313263, + "learning_rate": 4.183123773950936e-06, + "loss": 0.8787, + "step": 112850 + }, + { + "epoch": 0.8169558513757085, + "grad_norm": 0.15242883563041687, + "learning_rate": 4.183051387290351e-06, + "loss": 0.8712, + "step": 112860 + }, + { + "epoch": 0.8170282380362947, + "grad_norm": 0.1501031219959259, + "learning_rate": 4.182979000629764e-06, + "loss": 0.8827, + "step": 112870 + }, + { + "epoch": 0.8171006246968808, + "grad_norm": 0.14793910086154938, + "learning_rate": 4.182906613969178e-06, + "loss": 0.8773, + "step": 112880 + }, + { + "epoch": 0.817173011357467, + "grad_norm": 0.14584796130657196, + "learning_rate": 4.182834227308592e-06, + "loss": 0.8774, + "step": 112890 + }, + { + "epoch": 0.8172453980180532, + "grad_norm": 0.16166086494922638, + "learning_rate": 4.182761840648006e-06, + "loss": 0.8875, + "step": 112900 + }, + { + "epoch": 0.8173177846786395, + "grad_norm": 0.16169136762619019, + "learning_rate": 4.18268945398742e-06, + "loss": 0.876, + "step": 112910 + }, + { + "epoch": 0.8173901713392256, + "grad_norm": 0.15373431146144867, + "learning_rate": 4.182617067326833e-06, + "loss": 0.8835, + "step": 112920 + }, + { + "epoch": 0.8174625579998118, + "grad_norm": 0.15151308476924896, + "learning_rate": 4.182544680666247e-06, + "loss": 0.8961, + "step": 112930 + }, + { + "epoch": 0.817534944660398, + "grad_norm": 0.150752454996109, + "learning_rate": 4.182472294005661e-06, + "loss": 0.8785, + "step": 112940 + }, + { + "epoch": 0.8176073313209842, + "grad_norm": 0.15306270122528076, + "learning_rate": 4.182399907345075e-06, + "loss": 0.8856, + "step": 112950 + }, + { + "epoch": 0.8176797179815704, + "grad_norm": 0.15430527925491333, + "learning_rate": 4.182327520684489e-06, + "loss": 0.8826, + "step": 112960 + }, + { + "epoch": 0.8177521046421565, + "grad_norm": 0.1908227801322937, + "learning_rate": 4.182255134023902e-06, + "loss": 0.8828, + "step": 112970 + }, + { + "epoch": 0.8178244913027427, + "grad_norm": 0.15534339845180511, + "learning_rate": 4.182182747363317e-06, + "loss": 0.8882, + "step": 112980 + }, + { + "epoch": 0.8178968779633289, + "grad_norm": 0.1552184671163559, + "learning_rate": 4.18211036070273e-06, + "loss": 0.875, + "step": 112990 + }, + { + "epoch": 0.8179692646239151, + "grad_norm": 0.16356508433818817, + "learning_rate": 4.182037974042144e-06, + "loss": 0.8764, + "step": 113000 + }, + { + "epoch": 0.8180416512845012, + "grad_norm": 0.14462150633335114, + "learning_rate": 4.1819655873815576e-06, + "loss": 0.8844, + "step": 113010 + }, + { + "epoch": 0.8181140379450875, + "grad_norm": 0.15308737754821777, + "learning_rate": 4.181893200720972e-06, + "loss": 0.8758, + "step": 113020 + }, + { + "epoch": 0.8181864246056737, + "grad_norm": 0.1581387221813202, + "learning_rate": 4.181820814060386e-06, + "loss": 0.886, + "step": 113030 + }, + { + "epoch": 0.8182588112662599, + "grad_norm": 0.16034311056137085, + "learning_rate": 4.181748427399799e-06, + "loss": 0.8668, + "step": 113040 + }, + { + "epoch": 0.818331197926846, + "grad_norm": 0.15236437320709229, + "learning_rate": 4.181676040739213e-06, + "loss": 0.8861, + "step": 113050 + }, + { + "epoch": 0.8184035845874322, + "grad_norm": 0.20184694230556488, + "learning_rate": 4.181603654078627e-06, + "loss": 0.8921, + "step": 113060 + }, + { + "epoch": 0.8184759712480184, + "grad_norm": 0.15360352396965027, + "learning_rate": 4.18153126741804e-06, + "loss": 0.8988, + "step": 113070 + }, + { + "epoch": 0.8185483579086046, + "grad_norm": 0.1674332171678543, + "learning_rate": 4.181458880757454e-06, + "loss": 0.8742, + "step": 113080 + }, + { + "epoch": 0.8186207445691908, + "grad_norm": 0.16225215792655945, + "learning_rate": 4.181386494096868e-06, + "loss": 0.877, + "step": 113090 + }, + { + "epoch": 0.8186931312297769, + "grad_norm": 0.1606452614068985, + "learning_rate": 4.181314107436282e-06, + "loss": 0.8686, + "step": 113100 + }, + { + "epoch": 0.8187655178903631, + "grad_norm": 0.1443140208721161, + "learning_rate": 4.1812417207756954e-06, + "loss": 0.9018, + "step": 113110 + }, + { + "epoch": 0.8188379045509494, + "grad_norm": 0.16372881829738617, + "learning_rate": 4.181169334115109e-06, + "loss": 0.8783, + "step": 113120 + }, + { + "epoch": 0.8189102912115356, + "grad_norm": 0.1642576903104782, + "learning_rate": 4.1810969474545235e-06, + "loss": 0.8701, + "step": 113130 + }, + { + "epoch": 0.8189826778721218, + "grad_norm": 0.15896013379096985, + "learning_rate": 4.181024560793937e-06, + "loss": 0.8781, + "step": 113140 + }, + { + "epoch": 0.8190550645327079, + "grad_norm": 0.17695783078670502, + "learning_rate": 4.180952174133351e-06, + "loss": 0.8797, + "step": 113150 + }, + { + "epoch": 0.8191274511932941, + "grad_norm": 0.15274715423583984, + "learning_rate": 4.180879787472764e-06, + "loss": 0.8736, + "step": 113160 + }, + { + "epoch": 0.8191998378538803, + "grad_norm": 0.14954541623592377, + "learning_rate": 4.180807400812179e-06, + "loss": 0.8771, + "step": 113170 + }, + { + "epoch": 0.8192722245144665, + "grad_norm": 0.14884912967681885, + "learning_rate": 4.1807350141515924e-06, + "loss": 0.8703, + "step": 113180 + }, + { + "epoch": 0.8193446111750526, + "grad_norm": 0.18822531402111053, + "learning_rate": 4.180662627491006e-06, + "loss": 0.8762, + "step": 113190 + }, + { + "epoch": 0.8194169978356388, + "grad_norm": 0.16255638003349304, + "learning_rate": 4.18059024083042e-06, + "loss": 0.8758, + "step": 113200 + }, + { + "epoch": 0.819489384496225, + "grad_norm": 0.16536031663417816, + "learning_rate": 4.180517854169834e-06, + "loss": 0.8704, + "step": 113210 + }, + { + "epoch": 0.8195617711568112, + "grad_norm": 0.16202905774116516, + "learning_rate": 4.180445467509248e-06, + "loss": 0.8886, + "step": 113220 + }, + { + "epoch": 0.8196341578173975, + "grad_norm": 0.2304823398590088, + "learning_rate": 4.180373080848661e-06, + "loss": 0.8815, + "step": 113230 + }, + { + "epoch": 0.8197065444779836, + "grad_norm": 0.15564051270484924, + "learning_rate": 4.180300694188075e-06, + "loss": 0.8844, + "step": 113240 + }, + { + "epoch": 0.8197789311385698, + "grad_norm": 0.1432119905948639, + "learning_rate": 4.1802283075274894e-06, + "loss": 0.8713, + "step": 113250 + }, + { + "epoch": 0.819851317799156, + "grad_norm": 0.16350963711738586, + "learning_rate": 4.180155920866903e-06, + "loss": 0.8831, + "step": 113260 + }, + { + "epoch": 0.8199237044597422, + "grad_norm": 0.16729813814163208, + "learning_rate": 4.180083534206317e-06, + "loss": 0.879, + "step": 113270 + }, + { + "epoch": 0.8199960911203283, + "grad_norm": 0.1555587649345398, + "learning_rate": 4.18001114754573e-06, + "loss": 0.8793, + "step": 113280 + }, + { + "epoch": 0.8200684777809145, + "grad_norm": 0.1441824585199356, + "learning_rate": 4.179938760885145e-06, + "loss": 0.8703, + "step": 113290 + }, + { + "epoch": 0.8201408644415007, + "grad_norm": 0.1523018628358841, + "learning_rate": 4.179866374224558e-06, + "loss": 0.8796, + "step": 113300 + }, + { + "epoch": 0.8202132511020869, + "grad_norm": 0.15191130340099335, + "learning_rate": 4.179793987563972e-06, + "loss": 0.8681, + "step": 113310 + }, + { + "epoch": 0.820285637762673, + "grad_norm": 0.14424999058246613, + "learning_rate": 4.179721600903386e-06, + "loss": 0.8875, + "step": 113320 + }, + { + "epoch": 0.8203580244232593, + "grad_norm": 0.1524539440870285, + "learning_rate": 4.1796492142428e-06, + "loss": 0.8752, + "step": 113330 + }, + { + "epoch": 0.8204304110838455, + "grad_norm": 0.14882370829582214, + "learning_rate": 4.179576827582214e-06, + "loss": 0.8885, + "step": 113340 + }, + { + "epoch": 0.8205027977444317, + "grad_norm": 0.15619249641895294, + "learning_rate": 4.179504440921627e-06, + "loss": 0.8795, + "step": 113350 + }, + { + "epoch": 0.8205751844050179, + "grad_norm": 0.16282761096954346, + "learning_rate": 4.179432054261041e-06, + "loss": 0.8792, + "step": 113360 + }, + { + "epoch": 0.820647571065604, + "grad_norm": 0.15634344518184662, + "learning_rate": 4.179359667600455e-06, + "loss": 0.8811, + "step": 113370 + }, + { + "epoch": 0.8207199577261902, + "grad_norm": 0.15985414385795593, + "learning_rate": 4.179287280939869e-06, + "loss": 0.8803, + "step": 113380 + }, + { + "epoch": 0.8207923443867764, + "grad_norm": 0.1588483601808548, + "learning_rate": 4.179214894279283e-06, + "loss": 0.8858, + "step": 113390 + }, + { + "epoch": 0.8208647310473626, + "grad_norm": 0.1567647010087967, + "learning_rate": 4.179142507618696e-06, + "loss": 0.8786, + "step": 113400 + }, + { + "epoch": 0.8209371177079487, + "grad_norm": 0.18194611370563507, + "learning_rate": 4.17907012095811e-06, + "loss": 0.8917, + "step": 113410 + }, + { + "epoch": 0.8210095043685349, + "grad_norm": 0.1811460703611374, + "learning_rate": 4.178997734297524e-06, + "loss": 0.8777, + "step": 113420 + }, + { + "epoch": 0.8210818910291211, + "grad_norm": 0.15702205896377563, + "learning_rate": 4.178925347636938e-06, + "loss": 0.8673, + "step": 113430 + }, + { + "epoch": 0.8211542776897074, + "grad_norm": 0.1575247198343277, + "learning_rate": 4.1788529609763515e-06, + "loss": 0.8919, + "step": 113440 + }, + { + "epoch": 0.8212266643502936, + "grad_norm": 0.16069388389587402, + "learning_rate": 4.178780574315765e-06, + "loss": 0.8805, + "step": 113450 + }, + { + "epoch": 0.8212990510108797, + "grad_norm": 0.15721113979816437, + "learning_rate": 4.17870818765518e-06, + "loss": 0.8744, + "step": 113460 + }, + { + "epoch": 0.8213714376714659, + "grad_norm": 0.18812179565429688, + "learning_rate": 4.178635800994593e-06, + "loss": 0.884, + "step": 113470 + }, + { + "epoch": 0.8214438243320521, + "grad_norm": 0.1596520096063614, + "learning_rate": 4.178563414334007e-06, + "loss": 0.878, + "step": 113480 + }, + { + "epoch": 0.8215162109926383, + "grad_norm": 0.15708090364933014, + "learning_rate": 4.1784910276734205e-06, + "loss": 0.8749, + "step": 113490 + }, + { + "epoch": 0.8215885976532245, + "grad_norm": 0.1581474244594574, + "learning_rate": 4.178418641012835e-06, + "loss": 0.8897, + "step": 113500 + }, + { + "epoch": 0.8216609843138106, + "grad_norm": 0.15259206295013428, + "learning_rate": 4.1783462543522486e-06, + "loss": 0.8834, + "step": 113510 + }, + { + "epoch": 0.8217333709743968, + "grad_norm": 0.15696731209754944, + "learning_rate": 4.178273867691662e-06, + "loss": 0.8793, + "step": 113520 + }, + { + "epoch": 0.821805757634983, + "grad_norm": 0.15300579369068146, + "learning_rate": 4.178201481031076e-06, + "loss": 0.878, + "step": 113530 + }, + { + "epoch": 0.8218781442955692, + "grad_norm": 0.15157224237918854, + "learning_rate": 4.17812909437049e-06, + "loss": 0.8733, + "step": 113540 + }, + { + "epoch": 0.8219505309561554, + "grad_norm": 0.15784545242786407, + "learning_rate": 4.178056707709904e-06, + "loss": 0.879, + "step": 113550 + }, + { + "epoch": 0.8220229176167416, + "grad_norm": 0.1615738868713379, + "learning_rate": 4.1779843210493175e-06, + "loss": 0.8809, + "step": 113560 + }, + { + "epoch": 0.8220953042773278, + "grad_norm": 0.16567973792552948, + "learning_rate": 4.177911934388731e-06, + "loss": 0.8967, + "step": 113570 + }, + { + "epoch": 0.822167690937914, + "grad_norm": 0.15392477810382843, + "learning_rate": 4.1778395477281456e-06, + "loss": 0.8712, + "step": 113580 + }, + { + "epoch": 0.8222400775985002, + "grad_norm": 0.17302165925502777, + "learning_rate": 4.177767161067559e-06, + "loss": 0.8868, + "step": 113590 + }, + { + "epoch": 0.8223124642590863, + "grad_norm": 0.16119389235973358, + "learning_rate": 4.177694774406972e-06, + "loss": 0.8805, + "step": 113600 + }, + { + "epoch": 0.8223848509196725, + "grad_norm": 0.16274970769882202, + "learning_rate": 4.177622387746386e-06, + "loss": 0.8818, + "step": 113610 + }, + { + "epoch": 0.8224572375802587, + "grad_norm": 0.1551283448934555, + "learning_rate": 4.1775500010858e-06, + "loss": 0.8802, + "step": 113620 + }, + { + "epoch": 0.8225296242408449, + "grad_norm": 0.1489950567483902, + "learning_rate": 4.177477614425214e-06, + "loss": 0.8803, + "step": 113630 + }, + { + "epoch": 0.822602010901431, + "grad_norm": 0.15743660926818848, + "learning_rate": 4.177405227764627e-06, + "loss": 0.8856, + "step": 113640 + }, + { + "epoch": 0.8226743975620173, + "grad_norm": 0.16645437479019165, + "learning_rate": 4.177332841104042e-06, + "loss": 0.8809, + "step": 113650 + }, + { + "epoch": 0.8227467842226035, + "grad_norm": 0.14299704134464264, + "learning_rate": 4.177260454443455e-06, + "loss": 0.8884, + "step": 113660 + }, + { + "epoch": 0.8228191708831897, + "grad_norm": 0.1666107475757599, + "learning_rate": 4.177188067782869e-06, + "loss": 0.8872, + "step": 113670 + }, + { + "epoch": 0.8228915575437759, + "grad_norm": 0.1572570949792862, + "learning_rate": 4.1771156811222826e-06, + "loss": 0.8743, + "step": 113680 + }, + { + "epoch": 0.822963944204362, + "grad_norm": 0.15440626442432404, + "learning_rate": 4.177043294461697e-06, + "loss": 0.8864, + "step": 113690 + }, + { + "epoch": 0.8230363308649482, + "grad_norm": 0.14731813967227936, + "learning_rate": 4.176970907801111e-06, + "loss": 0.8812, + "step": 113700 + }, + { + "epoch": 0.8231087175255344, + "grad_norm": 0.15947289764881134, + "learning_rate": 4.176898521140524e-06, + "loss": 0.8917, + "step": 113710 + }, + { + "epoch": 0.8231811041861206, + "grad_norm": 0.15511277318000793, + "learning_rate": 4.176826134479938e-06, + "loss": 0.8687, + "step": 113720 + }, + { + "epoch": 0.8232534908467067, + "grad_norm": 0.14976176619529724, + "learning_rate": 4.176753747819352e-06, + "loss": 0.8789, + "step": 113730 + }, + { + "epoch": 0.8233258775072929, + "grad_norm": 0.1585080772638321, + "learning_rate": 4.176681361158766e-06, + "loss": 0.8886, + "step": 113740 + }, + { + "epoch": 0.8233982641678791, + "grad_norm": 0.1489742547273636, + "learning_rate": 4.1766089744981796e-06, + "loss": 0.8814, + "step": 113750 + }, + { + "epoch": 0.8234706508284654, + "grad_norm": 0.15232981741428375, + "learning_rate": 4.176536587837593e-06, + "loss": 0.8869, + "step": 113760 + }, + { + "epoch": 0.8235430374890516, + "grad_norm": 0.15081308782100677, + "learning_rate": 4.176464201177008e-06, + "loss": 0.8853, + "step": 113770 + }, + { + "epoch": 0.8236154241496377, + "grad_norm": 0.1606776863336563, + "learning_rate": 4.176391814516421e-06, + "loss": 0.8917, + "step": 113780 + }, + { + "epoch": 0.8236878108102239, + "grad_norm": 0.15332511067390442, + "learning_rate": 4.176319427855835e-06, + "loss": 0.8662, + "step": 113790 + }, + { + "epoch": 0.8237601974708101, + "grad_norm": 0.15498222410678864, + "learning_rate": 4.1762470411952485e-06, + "loss": 0.8738, + "step": 113800 + }, + { + "epoch": 0.8238325841313963, + "grad_norm": 0.174130380153656, + "learning_rate": 4.176174654534663e-06, + "loss": 0.8872, + "step": 113810 + }, + { + "epoch": 0.8239049707919824, + "grad_norm": 0.15380537509918213, + "learning_rate": 4.176102267874077e-06, + "loss": 0.8666, + "step": 113820 + }, + { + "epoch": 0.8239773574525686, + "grad_norm": 0.15310508012771606, + "learning_rate": 4.17602988121349e-06, + "loss": 0.8979, + "step": 113830 + }, + { + "epoch": 0.8240497441131548, + "grad_norm": 0.1675233244895935, + "learning_rate": 4.175957494552904e-06, + "loss": 0.8791, + "step": 113840 + }, + { + "epoch": 0.824122130773741, + "grad_norm": 0.14090007543563843, + "learning_rate": 4.175885107892318e-06, + "loss": 0.8752, + "step": 113850 + }, + { + "epoch": 0.8241945174343273, + "grad_norm": 0.15238140523433685, + "learning_rate": 4.175812721231732e-06, + "loss": 0.868, + "step": 113860 + }, + { + "epoch": 0.8242669040949134, + "grad_norm": 0.16735051572322845, + "learning_rate": 4.1757403345711455e-06, + "loss": 0.8843, + "step": 113870 + }, + { + "epoch": 0.8243392907554996, + "grad_norm": 0.18536004424095154, + "learning_rate": 4.175667947910559e-06, + "loss": 0.8736, + "step": 113880 + }, + { + "epoch": 0.8244116774160858, + "grad_norm": 0.15690474212169647, + "learning_rate": 4.175595561249974e-06, + "loss": 0.8772, + "step": 113890 + }, + { + "epoch": 0.824484064076672, + "grad_norm": 0.17251384258270264, + "learning_rate": 4.175523174589387e-06, + "loss": 0.878, + "step": 113900 + }, + { + "epoch": 0.8245564507372581, + "grad_norm": 0.15271571278572083, + "learning_rate": 4.175450787928801e-06, + "loss": 0.8737, + "step": 113910 + }, + { + "epoch": 0.8246288373978443, + "grad_norm": 0.1521008163690567, + "learning_rate": 4.1753784012682144e-06, + "loss": 0.89, + "step": 113920 + }, + { + "epoch": 0.8247012240584305, + "grad_norm": 0.15803080797195435, + "learning_rate": 4.175306014607629e-06, + "loss": 0.8795, + "step": 113930 + }, + { + "epoch": 0.8247736107190167, + "grad_norm": 0.14416874945163727, + "learning_rate": 4.1752336279470425e-06, + "loss": 0.8853, + "step": 113940 + }, + { + "epoch": 0.8248459973796028, + "grad_norm": 0.16256214678287506, + "learning_rate": 4.175161241286456e-06, + "loss": 0.8969, + "step": 113950 + }, + { + "epoch": 0.824918384040189, + "grad_norm": 0.15503908693790436, + "learning_rate": 4.17508885462587e-06, + "loss": 0.8903, + "step": 113960 + }, + { + "epoch": 0.8249907707007753, + "grad_norm": 0.23007570207118988, + "learning_rate": 4.175016467965284e-06, + "loss": 0.8737, + "step": 113970 + }, + { + "epoch": 0.8250631573613615, + "grad_norm": 0.15308153629302979, + "learning_rate": 4.174944081304698e-06, + "loss": 0.8723, + "step": 113980 + }, + { + "epoch": 0.8251355440219477, + "grad_norm": 0.14756432175636292, + "learning_rate": 4.1748716946441114e-06, + "loss": 0.8749, + "step": 113990 + }, + { + "epoch": 0.8252079306825338, + "grad_norm": 0.17486760020256042, + "learning_rate": 4.174799307983525e-06, + "loss": 0.8632, + "step": 114000 + }, + { + "epoch": 0.82528031734312, + "grad_norm": 0.1607871949672699, + "learning_rate": 4.1747269213229395e-06, + "loss": 0.8716, + "step": 114010 + }, + { + "epoch": 0.8253527040037062, + "grad_norm": 0.1484338939189911, + "learning_rate": 4.174654534662353e-06, + "loss": 0.8746, + "step": 114020 + }, + { + "epoch": 0.8254250906642924, + "grad_norm": 0.21677958965301514, + "learning_rate": 4.174582148001767e-06, + "loss": 0.8857, + "step": 114030 + }, + { + "epoch": 0.8254974773248785, + "grad_norm": 0.16989293694496155, + "learning_rate": 4.17450976134118e-06, + "loss": 0.8781, + "step": 114040 + }, + { + "epoch": 0.8255698639854647, + "grad_norm": 0.1774820238351822, + "learning_rate": 4.174437374680594e-06, + "loss": 0.8694, + "step": 114050 + }, + { + "epoch": 0.8256422506460509, + "grad_norm": 0.15557865798473358, + "learning_rate": 4.1743649880200085e-06, + "loss": 0.8745, + "step": 114060 + }, + { + "epoch": 0.8257146373066371, + "grad_norm": 0.15898308157920837, + "learning_rate": 4.174292601359422e-06, + "loss": 0.8887, + "step": 114070 + }, + { + "epoch": 0.8257870239672234, + "grad_norm": 0.17024999856948853, + "learning_rate": 4.174220214698836e-06, + "loss": 0.8772, + "step": 114080 + }, + { + "epoch": 0.8258594106278095, + "grad_norm": 0.1510993391275406, + "learning_rate": 4.174147828038249e-06, + "loss": 0.8678, + "step": 114090 + }, + { + "epoch": 0.8259317972883957, + "grad_norm": 0.15980158746242523, + "learning_rate": 4.174075441377664e-06, + "loss": 0.8756, + "step": 114100 + }, + { + "epoch": 0.8260041839489819, + "grad_norm": 0.15219931304454803, + "learning_rate": 4.174003054717077e-06, + "loss": 0.8959, + "step": 114110 + }, + { + "epoch": 0.8260765706095681, + "grad_norm": 0.14390040934085846, + "learning_rate": 4.173930668056491e-06, + "loss": 0.8794, + "step": 114120 + }, + { + "epoch": 0.8261489572701542, + "grad_norm": 0.14578518271446228, + "learning_rate": 4.173858281395905e-06, + "loss": 0.8719, + "step": 114130 + }, + { + "epoch": 0.8262213439307404, + "grad_norm": 0.1575828641653061, + "learning_rate": 4.173785894735318e-06, + "loss": 0.8812, + "step": 114140 + }, + { + "epoch": 0.8262937305913266, + "grad_norm": 0.21511991322040558, + "learning_rate": 4.173713508074732e-06, + "loss": 0.8674, + "step": 114150 + }, + { + "epoch": 0.8263661172519128, + "grad_norm": 0.16267074644565582, + "learning_rate": 4.173641121414146e-06, + "loss": 0.8874, + "step": 114160 + }, + { + "epoch": 0.826438503912499, + "grad_norm": 0.14810219407081604, + "learning_rate": 4.17356873475356e-06, + "loss": 0.8732, + "step": 114170 + }, + { + "epoch": 0.8265108905730852, + "grad_norm": 0.14335951209068298, + "learning_rate": 4.1734963480929735e-06, + "loss": 0.8814, + "step": 114180 + }, + { + "epoch": 0.8265832772336714, + "grad_norm": 0.14947238564491272, + "learning_rate": 4.173423961432387e-06, + "loss": 0.8886, + "step": 114190 + }, + { + "epoch": 0.8266556638942576, + "grad_norm": 0.15292997658252716, + "learning_rate": 4.173351574771801e-06, + "loss": 0.8808, + "step": 114200 + }, + { + "epoch": 0.8267280505548438, + "grad_norm": 0.16210046410560608, + "learning_rate": 4.173279188111215e-06, + "loss": 0.8807, + "step": 114210 + }, + { + "epoch": 0.82680043721543, + "grad_norm": 0.1552553027868271, + "learning_rate": 4.173206801450629e-06, + "loss": 0.9006, + "step": 114220 + }, + { + "epoch": 0.8268728238760161, + "grad_norm": 0.15879151225090027, + "learning_rate": 4.1731344147900425e-06, + "loss": 0.8699, + "step": 114230 + }, + { + "epoch": 0.8269452105366023, + "grad_norm": 0.1656922549009323, + "learning_rate": 4.173062028129456e-06, + "loss": 0.8742, + "step": 114240 + }, + { + "epoch": 0.8270175971971885, + "grad_norm": 0.18779303133487701, + "learning_rate": 4.1729896414688705e-06, + "loss": 0.876, + "step": 114250 + }, + { + "epoch": 0.8270899838577747, + "grad_norm": 0.15145491063594818, + "learning_rate": 4.172917254808284e-06, + "loss": 0.8818, + "step": 114260 + }, + { + "epoch": 0.8271623705183608, + "grad_norm": 0.1533774733543396, + "learning_rate": 4.172844868147698e-06, + "loss": 0.8762, + "step": 114270 + }, + { + "epoch": 0.827234757178947, + "grad_norm": 0.15074725449085236, + "learning_rate": 4.172772481487111e-06, + "loss": 0.8766, + "step": 114280 + }, + { + "epoch": 0.8273071438395333, + "grad_norm": 0.15934348106384277, + "learning_rate": 4.172700094826526e-06, + "loss": 0.8789, + "step": 114290 + }, + { + "epoch": 0.8273795305001195, + "grad_norm": 0.1573365181684494, + "learning_rate": 4.1726277081659395e-06, + "loss": 0.8783, + "step": 114300 + }, + { + "epoch": 0.8274519171607057, + "grad_norm": 0.15947797894477844, + "learning_rate": 4.172555321505353e-06, + "loss": 0.8645, + "step": 114310 + }, + { + "epoch": 0.8275243038212918, + "grad_norm": 0.1520025134086609, + "learning_rate": 4.172482934844767e-06, + "loss": 0.8753, + "step": 114320 + }, + { + "epoch": 0.827596690481878, + "grad_norm": 0.15688809752464294, + "learning_rate": 4.172410548184181e-06, + "loss": 0.8712, + "step": 114330 + }, + { + "epoch": 0.8276690771424642, + "grad_norm": 0.23521029949188232, + "learning_rate": 4.172338161523595e-06, + "loss": 0.8903, + "step": 114340 + }, + { + "epoch": 0.8277414638030504, + "grad_norm": 0.15197953581809998, + "learning_rate": 4.172265774863008e-06, + "loss": 0.8847, + "step": 114350 + }, + { + "epoch": 0.8278138504636365, + "grad_norm": 0.151088684797287, + "learning_rate": 4.172193388202422e-06, + "loss": 0.8656, + "step": 114360 + }, + { + "epoch": 0.8278862371242227, + "grad_norm": 0.14569281041622162, + "learning_rate": 4.1721210015418365e-06, + "loss": 0.8814, + "step": 114370 + }, + { + "epoch": 0.8279586237848089, + "grad_norm": 0.1544409692287445, + "learning_rate": 4.17204861488125e-06, + "loss": 0.8876, + "step": 114380 + }, + { + "epoch": 0.8280310104453952, + "grad_norm": 0.17135676741600037, + "learning_rate": 4.171976228220664e-06, + "loss": 0.8809, + "step": 114390 + }, + { + "epoch": 0.8281033971059814, + "grad_norm": 0.15658897161483765, + "learning_rate": 4.171903841560077e-06, + "loss": 0.8838, + "step": 114400 + }, + { + "epoch": 0.8281757837665675, + "grad_norm": 0.14812763035297394, + "learning_rate": 4.171831454899492e-06, + "loss": 0.8784, + "step": 114410 + }, + { + "epoch": 0.8282481704271537, + "grad_norm": 0.15863005816936493, + "learning_rate": 4.171759068238905e-06, + "loss": 0.89, + "step": 114420 + }, + { + "epoch": 0.8283205570877399, + "grad_norm": 0.15445846319198608, + "learning_rate": 4.171686681578319e-06, + "loss": 0.8813, + "step": 114430 + }, + { + "epoch": 0.8283929437483261, + "grad_norm": 0.15649083256721497, + "learning_rate": 4.171614294917733e-06, + "loss": 0.8706, + "step": 114440 + }, + { + "epoch": 0.8284653304089122, + "grad_norm": 0.15367910265922546, + "learning_rate": 4.171541908257147e-06, + "loss": 0.8636, + "step": 114450 + }, + { + "epoch": 0.8285377170694984, + "grad_norm": 0.16421107947826385, + "learning_rate": 4.171469521596561e-06, + "loss": 0.8749, + "step": 114460 + }, + { + "epoch": 0.8286101037300846, + "grad_norm": 0.1638004332780838, + "learning_rate": 4.171397134935974e-06, + "loss": 0.8675, + "step": 114470 + }, + { + "epoch": 0.8286824903906708, + "grad_norm": 0.15033425390720367, + "learning_rate": 4.171324748275388e-06, + "loss": 0.8858, + "step": 114480 + }, + { + "epoch": 0.8287548770512569, + "grad_norm": 0.16598929464817047, + "learning_rate": 4.171252361614802e-06, + "loss": 0.8906, + "step": 114490 + }, + { + "epoch": 0.8288272637118432, + "grad_norm": 0.17145119607448578, + "learning_rate": 4.171179974954216e-06, + "loss": 0.8716, + "step": 114500 + }, + { + "epoch": 0.8288996503724294, + "grad_norm": 0.20420026779174805, + "learning_rate": 4.17110758829363e-06, + "loss": 0.8753, + "step": 114510 + }, + { + "epoch": 0.8289720370330156, + "grad_norm": 0.15310752391815186, + "learning_rate": 4.171035201633043e-06, + "loss": 0.8767, + "step": 114520 + }, + { + "epoch": 0.8290444236936018, + "grad_norm": 0.15935362875461578, + "learning_rate": 4.170962814972458e-06, + "loss": 0.8764, + "step": 114530 + }, + { + "epoch": 0.8291168103541879, + "grad_norm": 0.16182856261730194, + "learning_rate": 4.170890428311871e-06, + "loss": 0.8739, + "step": 114540 + }, + { + "epoch": 0.8291891970147741, + "grad_norm": 0.15881557762622833, + "learning_rate": 4.170818041651285e-06, + "loss": 0.8855, + "step": 114550 + }, + { + "epoch": 0.8292615836753603, + "grad_norm": 0.1685194969177246, + "learning_rate": 4.170745654990699e-06, + "loss": 0.8758, + "step": 114560 + }, + { + "epoch": 0.8293339703359465, + "grad_norm": 0.14720526337623596, + "learning_rate": 4.170673268330113e-06, + "loss": 0.873, + "step": 114570 + }, + { + "epoch": 0.8294063569965326, + "grad_norm": 0.16784626245498657, + "learning_rate": 4.170600881669527e-06, + "loss": 0.8853, + "step": 114580 + }, + { + "epoch": 0.8294787436571188, + "grad_norm": 0.1470102220773697, + "learning_rate": 4.17052849500894e-06, + "loss": 0.8832, + "step": 114590 + }, + { + "epoch": 0.829551130317705, + "grad_norm": 0.16039815545082092, + "learning_rate": 4.170456108348354e-06, + "loss": 0.8632, + "step": 114600 + }, + { + "epoch": 0.8296235169782913, + "grad_norm": 0.1540583372116089, + "learning_rate": 4.170383721687768e-06, + "loss": 0.8813, + "step": 114610 + }, + { + "epoch": 0.8296959036388775, + "grad_norm": 0.15052036941051483, + "learning_rate": 4.170311335027182e-06, + "loss": 0.8849, + "step": 114620 + }, + { + "epoch": 0.8297682902994636, + "grad_norm": 0.1492500752210617, + "learning_rate": 4.170238948366596e-06, + "loss": 0.885, + "step": 114630 + }, + { + "epoch": 0.8298406769600498, + "grad_norm": 0.15592175722122192, + "learning_rate": 4.170166561706009e-06, + "loss": 0.8654, + "step": 114640 + }, + { + "epoch": 0.829913063620636, + "grad_norm": 0.15170545876026154, + "learning_rate": 4.170094175045423e-06, + "loss": 0.8728, + "step": 114650 + }, + { + "epoch": 0.8299854502812222, + "grad_norm": 0.45990192890167236, + "learning_rate": 4.1700217883848364e-06, + "loss": 0.8802, + "step": 114660 + }, + { + "epoch": 0.8300578369418083, + "grad_norm": 0.1639576107263565, + "learning_rate": 4.16994940172425e-06, + "loss": 0.8959, + "step": 114670 + }, + { + "epoch": 0.8301302236023945, + "grad_norm": 0.15342135727405548, + "learning_rate": 4.1698770150636645e-06, + "loss": 0.8907, + "step": 114680 + }, + { + "epoch": 0.8302026102629807, + "grad_norm": 0.15938065946102142, + "learning_rate": 4.169804628403078e-06, + "loss": 0.873, + "step": 114690 + }, + { + "epoch": 0.8302749969235669, + "grad_norm": 0.15566521883010864, + "learning_rate": 4.169732241742492e-06, + "loss": 0.8848, + "step": 114700 + }, + { + "epoch": 0.8303473835841532, + "grad_norm": 0.1593281626701355, + "learning_rate": 4.169659855081905e-06, + "loss": 0.8775, + "step": 114710 + }, + { + "epoch": 0.8304197702447393, + "grad_norm": 0.15656022727489471, + "learning_rate": 4.16958746842132e-06, + "loss": 0.8735, + "step": 114720 + }, + { + "epoch": 0.8304921569053255, + "grad_norm": 0.15564171969890594, + "learning_rate": 4.1695150817607334e-06, + "loss": 0.8776, + "step": 114730 + }, + { + "epoch": 0.8305645435659117, + "grad_norm": 0.15010464191436768, + "learning_rate": 4.169442695100147e-06, + "loss": 0.8914, + "step": 114740 + }, + { + "epoch": 0.8306369302264979, + "grad_norm": 0.15904924273490906, + "learning_rate": 4.169370308439561e-06, + "loss": 0.8804, + "step": 114750 + }, + { + "epoch": 0.830709316887084, + "grad_norm": 0.161235973238945, + "learning_rate": 4.169297921778975e-06, + "loss": 0.8811, + "step": 114760 + }, + { + "epoch": 0.8307817035476702, + "grad_norm": 0.15518715977668762, + "learning_rate": 4.169225535118389e-06, + "loss": 0.8712, + "step": 114770 + }, + { + "epoch": 0.8308540902082564, + "grad_norm": 0.15426094830036163, + "learning_rate": 4.169153148457802e-06, + "loss": 0.881, + "step": 114780 + }, + { + "epoch": 0.8309264768688426, + "grad_norm": 0.15717415511608124, + "learning_rate": 4.169080761797216e-06, + "loss": 0.8656, + "step": 114790 + }, + { + "epoch": 0.8309988635294288, + "grad_norm": 0.19086652994155884, + "learning_rate": 4.1690083751366305e-06, + "loss": 0.8722, + "step": 114800 + }, + { + "epoch": 0.8310712501900149, + "grad_norm": 0.18602590262889862, + "learning_rate": 4.168935988476044e-06, + "loss": 0.8677, + "step": 114810 + }, + { + "epoch": 0.8311436368506012, + "grad_norm": 0.16731368005275726, + "learning_rate": 4.168863601815458e-06, + "loss": 0.8814, + "step": 114820 + }, + { + "epoch": 0.8312160235111874, + "grad_norm": 0.162908136844635, + "learning_rate": 4.168791215154871e-06, + "loss": 0.8795, + "step": 114830 + }, + { + "epoch": 0.8312884101717736, + "grad_norm": 0.15628595650196075, + "learning_rate": 4.168718828494285e-06, + "loss": 0.8818, + "step": 114840 + }, + { + "epoch": 0.8313607968323597, + "grad_norm": 0.15655048191547394, + "learning_rate": 4.168646441833699e-06, + "loss": 0.8835, + "step": 114850 + }, + { + "epoch": 0.8314331834929459, + "grad_norm": 0.15166090428829193, + "learning_rate": 4.168574055173113e-06, + "loss": 0.888, + "step": 114860 + }, + { + "epoch": 0.8315055701535321, + "grad_norm": 0.19675646722316742, + "learning_rate": 4.168501668512527e-06, + "loss": 0.8821, + "step": 114870 + }, + { + "epoch": 0.8315779568141183, + "grad_norm": 0.15683965384960175, + "learning_rate": 4.16842928185194e-06, + "loss": 0.8894, + "step": 114880 + }, + { + "epoch": 0.8316503434747045, + "grad_norm": 0.15531839430332184, + "learning_rate": 4.168356895191355e-06, + "loss": 0.8811, + "step": 114890 + }, + { + "epoch": 0.8317227301352906, + "grad_norm": 0.1496214121580124, + "learning_rate": 4.168284508530768e-06, + "loss": 0.873, + "step": 114900 + }, + { + "epoch": 0.8317951167958768, + "grad_norm": 0.16152338683605194, + "learning_rate": 4.168212121870182e-06, + "loss": 0.8725, + "step": 114910 + }, + { + "epoch": 0.8318675034564631, + "grad_norm": 0.17109687626361847, + "learning_rate": 4.1681397352095955e-06, + "loss": 0.8678, + "step": 114920 + }, + { + "epoch": 0.8319398901170493, + "grad_norm": 0.15160439908504486, + "learning_rate": 4.16806734854901e-06, + "loss": 0.8881, + "step": 114930 + }, + { + "epoch": 0.8320122767776355, + "grad_norm": 0.15334677696228027, + "learning_rate": 4.167994961888424e-06, + "loss": 0.882, + "step": 114940 + }, + { + "epoch": 0.8320846634382216, + "grad_norm": 0.16867488622665405, + "learning_rate": 4.167922575227837e-06, + "loss": 0.8815, + "step": 114950 + }, + { + "epoch": 0.8321570500988078, + "grad_norm": 0.16307266056537628, + "learning_rate": 4.167850188567251e-06, + "loss": 0.8737, + "step": 114960 + }, + { + "epoch": 0.832229436759394, + "grad_norm": 0.17095071077346802, + "learning_rate": 4.167777801906665e-06, + "loss": 0.868, + "step": 114970 + }, + { + "epoch": 0.8323018234199802, + "grad_norm": 0.1598438024520874, + "learning_rate": 4.167705415246079e-06, + "loss": 0.8826, + "step": 114980 + }, + { + "epoch": 0.8323742100805663, + "grad_norm": 0.15811651945114136, + "learning_rate": 4.1676330285854925e-06, + "loss": 0.8842, + "step": 114990 + }, + { + "epoch": 0.8324465967411525, + "grad_norm": 0.14370112121105194, + "learning_rate": 4.167560641924906e-06, + "loss": 0.8631, + "step": 115000 + }, + { + "epoch": 0.8325189834017387, + "grad_norm": 0.15269970893859863, + "learning_rate": 4.167488255264321e-06, + "loss": 0.8808, + "step": 115010 + }, + { + "epoch": 0.8325913700623249, + "grad_norm": 0.15361453592777252, + "learning_rate": 4.167415868603734e-06, + "loss": 0.87, + "step": 115020 + }, + { + "epoch": 0.8326637567229112, + "grad_norm": 0.1495579481124878, + "learning_rate": 4.167343481943148e-06, + "loss": 0.8709, + "step": 115030 + }, + { + "epoch": 0.8327361433834973, + "grad_norm": 0.14331714808940887, + "learning_rate": 4.1672710952825615e-06, + "loss": 0.8837, + "step": 115040 + }, + { + "epoch": 0.8328085300440835, + "grad_norm": 0.2528897225856781, + "learning_rate": 4.167198708621976e-06, + "loss": 0.8815, + "step": 115050 + }, + { + "epoch": 0.8328809167046697, + "grad_norm": 0.15450528264045715, + "learning_rate": 4.1671263219613896e-06, + "loss": 0.8856, + "step": 115060 + }, + { + "epoch": 0.8329533033652559, + "grad_norm": 0.1559913456439972, + "learning_rate": 4.167053935300803e-06, + "loss": 0.8908, + "step": 115070 + }, + { + "epoch": 0.833025690025842, + "grad_norm": 0.1556912660598755, + "learning_rate": 4.166981548640217e-06, + "loss": 0.8851, + "step": 115080 + }, + { + "epoch": 0.8330980766864282, + "grad_norm": 0.15410785377025604, + "learning_rate": 4.166909161979631e-06, + "loss": 0.8828, + "step": 115090 + }, + { + "epoch": 0.8331704633470144, + "grad_norm": 0.20597724616527557, + "learning_rate": 4.166836775319045e-06, + "loss": 0.8725, + "step": 115100 + }, + { + "epoch": 0.8332428500076006, + "grad_norm": 0.1543237566947937, + "learning_rate": 4.1667643886584585e-06, + "loss": 0.8656, + "step": 115110 + }, + { + "epoch": 0.8333152366681867, + "grad_norm": 0.14461509883403778, + "learning_rate": 4.166692001997872e-06, + "loss": 0.8763, + "step": 115120 + }, + { + "epoch": 0.8333876233287729, + "grad_norm": 0.14833268523216248, + "learning_rate": 4.1666196153372866e-06, + "loss": 0.8725, + "step": 115130 + }, + { + "epoch": 0.8334600099893592, + "grad_norm": 0.16073955595493317, + "learning_rate": 4.1665472286767e-06, + "loss": 0.8727, + "step": 115140 + }, + { + "epoch": 0.8335323966499454, + "grad_norm": 0.13932476937770844, + "learning_rate": 4.166474842016114e-06, + "loss": 0.8792, + "step": 115150 + }, + { + "epoch": 0.8336047833105316, + "grad_norm": 0.1595870852470398, + "learning_rate": 4.166402455355527e-06, + "loss": 0.8769, + "step": 115160 + }, + { + "epoch": 0.8336771699711177, + "grad_norm": 0.14972224831581116, + "learning_rate": 4.166330068694942e-06, + "loss": 0.8873, + "step": 115170 + }, + { + "epoch": 0.8337495566317039, + "grad_norm": 0.14646044373512268, + "learning_rate": 4.1662576820343555e-06, + "loss": 0.8845, + "step": 115180 + }, + { + "epoch": 0.8338219432922901, + "grad_norm": 0.1509612500667572, + "learning_rate": 4.166185295373768e-06, + "loss": 0.8673, + "step": 115190 + }, + { + "epoch": 0.8338943299528763, + "grad_norm": 0.1453905701637268, + "learning_rate": 4.166112908713183e-06, + "loss": 0.8754, + "step": 115200 + }, + { + "epoch": 0.8339667166134624, + "grad_norm": 0.14863267540931702, + "learning_rate": 4.166040522052596e-06, + "loss": 0.869, + "step": 115210 + }, + { + "epoch": 0.8340391032740486, + "grad_norm": 0.1638236790895462, + "learning_rate": 4.16596813539201e-06, + "loss": 0.8878, + "step": 115220 + }, + { + "epoch": 0.8341114899346348, + "grad_norm": 0.17066480219364166, + "learning_rate": 4.1658957487314236e-06, + "loss": 0.8719, + "step": 115230 + }, + { + "epoch": 0.8341838765952211, + "grad_norm": 0.16603226959705353, + "learning_rate": 4.165823362070838e-06, + "loss": 0.8733, + "step": 115240 + }, + { + "epoch": 0.8342562632558073, + "grad_norm": 0.141592338681221, + "learning_rate": 4.165750975410252e-06, + "loss": 0.8729, + "step": 115250 + }, + { + "epoch": 0.8343286499163934, + "grad_norm": 0.15747976303100586, + "learning_rate": 4.165678588749665e-06, + "loss": 0.8633, + "step": 115260 + }, + { + "epoch": 0.8344010365769796, + "grad_norm": 0.1882728487253189, + "learning_rate": 4.165606202089079e-06, + "loss": 0.8736, + "step": 115270 + }, + { + "epoch": 0.8344734232375658, + "grad_norm": 0.16340763866901398, + "learning_rate": 4.165533815428493e-06, + "loss": 0.8885, + "step": 115280 + }, + { + "epoch": 0.834545809898152, + "grad_norm": 0.1481885462999344, + "learning_rate": 4.165461428767907e-06, + "loss": 0.8885, + "step": 115290 + }, + { + "epoch": 0.8346181965587381, + "grad_norm": 0.15606743097305298, + "learning_rate": 4.165389042107321e-06, + "loss": 0.8767, + "step": 115300 + }, + { + "epoch": 0.8346905832193243, + "grad_norm": 0.14937707781791687, + "learning_rate": 4.165316655446734e-06, + "loss": 0.875, + "step": 115310 + }, + { + "epoch": 0.8347629698799105, + "grad_norm": 0.16030281782150269, + "learning_rate": 4.165244268786149e-06, + "loss": 0.8741, + "step": 115320 + }, + { + "epoch": 0.8348353565404967, + "grad_norm": 0.15192563831806183, + "learning_rate": 4.165171882125562e-06, + "loss": 0.8686, + "step": 115330 + }, + { + "epoch": 0.8349077432010829, + "grad_norm": 0.15150035917758942, + "learning_rate": 4.165099495464976e-06, + "loss": 0.8875, + "step": 115340 + }, + { + "epoch": 0.8349801298616691, + "grad_norm": 0.1519050896167755, + "learning_rate": 4.1650271088043895e-06, + "loss": 0.8844, + "step": 115350 + }, + { + "epoch": 0.8350525165222553, + "grad_norm": 0.15744005143642426, + "learning_rate": 4.164954722143804e-06, + "loss": 0.8619, + "step": 115360 + }, + { + "epoch": 0.8351249031828415, + "grad_norm": 0.16231480240821838, + "learning_rate": 4.164882335483218e-06, + "loss": 0.8722, + "step": 115370 + }, + { + "epoch": 0.8351972898434277, + "grad_norm": 0.15285032987594604, + "learning_rate": 4.164809948822631e-06, + "loss": 0.8796, + "step": 115380 + }, + { + "epoch": 0.8352696765040138, + "grad_norm": 0.16190151870250702, + "learning_rate": 4.164737562162045e-06, + "loss": 0.8804, + "step": 115390 + }, + { + "epoch": 0.8353420631646, + "grad_norm": 0.16439662873744965, + "learning_rate": 4.164665175501459e-06, + "loss": 0.8944, + "step": 115400 + }, + { + "epoch": 0.8354144498251862, + "grad_norm": 0.14439240097999573, + "learning_rate": 4.164592788840873e-06, + "loss": 0.8802, + "step": 115410 + }, + { + "epoch": 0.8354868364857724, + "grad_norm": 0.14769050478935242, + "learning_rate": 4.1645204021802865e-06, + "loss": 0.8807, + "step": 115420 + }, + { + "epoch": 0.8355592231463586, + "grad_norm": 0.15976214408874512, + "learning_rate": 4.1644480155197e-06, + "loss": 0.8796, + "step": 115430 + }, + { + "epoch": 0.8356316098069447, + "grad_norm": 0.15138176083564758, + "learning_rate": 4.164375628859115e-06, + "loss": 0.8711, + "step": 115440 + }, + { + "epoch": 0.835703996467531, + "grad_norm": 0.15986670553684235, + "learning_rate": 4.164303242198528e-06, + "loss": 0.889, + "step": 115450 + }, + { + "epoch": 0.8357763831281172, + "grad_norm": 0.1555429995059967, + "learning_rate": 4.164230855537942e-06, + "loss": 0.865, + "step": 115460 + }, + { + "epoch": 0.8358487697887034, + "grad_norm": 0.15238524973392487, + "learning_rate": 4.1641584688773554e-06, + "loss": 0.8672, + "step": 115470 + }, + { + "epoch": 0.8359211564492895, + "grad_norm": 0.16362081468105316, + "learning_rate": 4.164086082216769e-06, + "loss": 0.8658, + "step": 115480 + }, + { + "epoch": 0.8359935431098757, + "grad_norm": 0.16536161303520203, + "learning_rate": 4.1640136955561835e-06, + "loss": 0.8911, + "step": 115490 + }, + { + "epoch": 0.8360659297704619, + "grad_norm": 0.15160474181175232, + "learning_rate": 4.163941308895597e-06, + "loss": 0.8678, + "step": 115500 + }, + { + "epoch": 0.8361383164310481, + "grad_norm": 0.16615518927574158, + "learning_rate": 4.163868922235011e-06, + "loss": 0.8866, + "step": 115510 + }, + { + "epoch": 0.8362107030916343, + "grad_norm": 0.15683193504810333, + "learning_rate": 4.163796535574424e-06, + "loss": 0.866, + "step": 115520 + }, + { + "epoch": 0.8362830897522204, + "grad_norm": 0.149534672498703, + "learning_rate": 4.163724148913839e-06, + "loss": 0.8748, + "step": 115530 + }, + { + "epoch": 0.8363554764128066, + "grad_norm": 0.15053287148475647, + "learning_rate": 4.1636517622532525e-06, + "loss": 0.8713, + "step": 115540 + }, + { + "epoch": 0.8364278630733928, + "grad_norm": 0.16546009480953217, + "learning_rate": 4.163579375592666e-06, + "loss": 0.8776, + "step": 115550 + }, + { + "epoch": 0.8365002497339791, + "grad_norm": 0.1460997462272644, + "learning_rate": 4.16350698893208e-06, + "loss": 0.8818, + "step": 115560 + }, + { + "epoch": 0.8365726363945652, + "grad_norm": 0.15319761633872986, + "learning_rate": 4.163434602271494e-06, + "loss": 0.8797, + "step": 115570 + }, + { + "epoch": 0.8366450230551514, + "grad_norm": 0.234059140086174, + "learning_rate": 4.163362215610908e-06, + "loss": 0.8659, + "step": 115580 + }, + { + "epoch": 0.8367174097157376, + "grad_norm": 0.147258460521698, + "learning_rate": 4.163289828950321e-06, + "loss": 0.8754, + "step": 115590 + }, + { + "epoch": 0.8367897963763238, + "grad_norm": 0.15908755362033844, + "learning_rate": 4.163217442289735e-06, + "loss": 0.872, + "step": 115600 + }, + { + "epoch": 0.83686218303691, + "grad_norm": 0.15638591349124908, + "learning_rate": 4.1631450556291495e-06, + "loss": 0.8971, + "step": 115610 + }, + { + "epoch": 0.8369345696974961, + "grad_norm": 0.1701616644859314, + "learning_rate": 4.163072668968563e-06, + "loss": 0.8726, + "step": 115620 + }, + { + "epoch": 0.8370069563580823, + "grad_norm": 0.15191833674907684, + "learning_rate": 4.163000282307977e-06, + "loss": 0.8767, + "step": 115630 + }, + { + "epoch": 0.8370793430186685, + "grad_norm": 0.15473678708076477, + "learning_rate": 4.16292789564739e-06, + "loss": 0.8773, + "step": 115640 + }, + { + "epoch": 0.8371517296792547, + "grad_norm": 0.15959081053733826, + "learning_rate": 4.162855508986805e-06, + "loss": 0.8745, + "step": 115650 + }, + { + "epoch": 0.8372241163398408, + "grad_norm": 0.1590321958065033, + "learning_rate": 4.162783122326218e-06, + "loss": 0.8818, + "step": 115660 + }, + { + "epoch": 0.8372965030004271, + "grad_norm": 0.16567932069301605, + "learning_rate": 4.162710735665632e-06, + "loss": 0.8798, + "step": 115670 + }, + { + "epoch": 0.8373688896610133, + "grad_norm": 0.15759676694869995, + "learning_rate": 4.162638349005046e-06, + "loss": 0.8689, + "step": 115680 + }, + { + "epoch": 0.8374412763215995, + "grad_norm": 0.15198834240436554, + "learning_rate": 4.16256596234446e-06, + "loss": 0.8867, + "step": 115690 + }, + { + "epoch": 0.8375136629821857, + "grad_norm": 0.1595429629087448, + "learning_rate": 4.162493575683874e-06, + "loss": 0.8792, + "step": 115700 + }, + { + "epoch": 0.8375860496427718, + "grad_norm": 0.15620209276676178, + "learning_rate": 4.162421189023287e-06, + "loss": 0.8751, + "step": 115710 + }, + { + "epoch": 0.837658436303358, + "grad_norm": 0.18191352486610413, + "learning_rate": 4.162348802362701e-06, + "loss": 0.8682, + "step": 115720 + }, + { + "epoch": 0.8377308229639442, + "grad_norm": 0.1610066294670105, + "learning_rate": 4.1622764157021145e-06, + "loss": 0.8813, + "step": 115730 + }, + { + "epoch": 0.8378032096245304, + "grad_norm": 0.16806887090206146, + "learning_rate": 4.162204029041528e-06, + "loss": 0.8786, + "step": 115740 + }, + { + "epoch": 0.8378755962851165, + "grad_norm": 0.1434309184551239, + "learning_rate": 4.162131642380942e-06, + "loss": 0.8844, + "step": 115750 + }, + { + "epoch": 0.8379479829457027, + "grad_norm": 0.17224647104740143, + "learning_rate": 4.162059255720356e-06, + "loss": 0.8848, + "step": 115760 + }, + { + "epoch": 0.838020369606289, + "grad_norm": 0.1476389765739441, + "learning_rate": 4.16198686905977e-06, + "loss": 0.8813, + "step": 115770 + }, + { + "epoch": 0.8380927562668752, + "grad_norm": 0.1633952558040619, + "learning_rate": 4.1619144823991835e-06, + "loss": 0.8842, + "step": 115780 + }, + { + "epoch": 0.8381651429274614, + "grad_norm": 0.16024617850780487, + "learning_rate": 4.161842095738597e-06, + "loss": 0.8829, + "step": 115790 + }, + { + "epoch": 0.8382375295880475, + "grad_norm": 0.15520702302455902, + "learning_rate": 4.1617697090780116e-06, + "loss": 0.8744, + "step": 115800 + }, + { + "epoch": 0.8383099162486337, + "grad_norm": 0.161240816116333, + "learning_rate": 4.161697322417425e-06, + "loss": 0.8912, + "step": 115810 + }, + { + "epoch": 0.8383823029092199, + "grad_norm": 0.14296600222587585, + "learning_rate": 4.161624935756839e-06, + "loss": 0.8727, + "step": 115820 + }, + { + "epoch": 0.8384546895698061, + "grad_norm": 0.16847427189350128, + "learning_rate": 4.161552549096252e-06, + "loss": 0.8812, + "step": 115830 + }, + { + "epoch": 0.8385270762303922, + "grad_norm": 0.14801815152168274, + "learning_rate": 4.161480162435667e-06, + "loss": 0.8822, + "step": 115840 + }, + { + "epoch": 0.8385994628909784, + "grad_norm": 0.15415239334106445, + "learning_rate": 4.1614077757750805e-06, + "loss": 0.8754, + "step": 115850 + }, + { + "epoch": 0.8386718495515646, + "grad_norm": 0.1483432799577713, + "learning_rate": 4.161335389114494e-06, + "loss": 0.8831, + "step": 115860 + }, + { + "epoch": 0.8387442362121508, + "grad_norm": 0.1458449810743332, + "learning_rate": 4.161263002453908e-06, + "loss": 0.8847, + "step": 115870 + }, + { + "epoch": 0.8388166228727371, + "grad_norm": 0.15387925505638123, + "learning_rate": 4.161190615793322e-06, + "loss": 0.8719, + "step": 115880 + }, + { + "epoch": 0.8388890095333232, + "grad_norm": 0.14547225832939148, + "learning_rate": 4.161118229132736e-06, + "loss": 0.8866, + "step": 115890 + }, + { + "epoch": 0.8389613961939094, + "grad_norm": 0.15607166290283203, + "learning_rate": 4.161045842472149e-06, + "loss": 0.8811, + "step": 115900 + }, + { + "epoch": 0.8390337828544956, + "grad_norm": 0.14948482811450958, + "learning_rate": 4.160973455811563e-06, + "loss": 0.8838, + "step": 115910 + }, + { + "epoch": 0.8391061695150818, + "grad_norm": 0.16144073009490967, + "learning_rate": 4.1609010691509775e-06, + "loss": 0.8786, + "step": 115920 + }, + { + "epoch": 0.839178556175668, + "grad_norm": 0.15592895448207855, + "learning_rate": 4.160828682490391e-06, + "loss": 0.8812, + "step": 115930 + }, + { + "epoch": 0.8392509428362541, + "grad_norm": 0.18400424718856812, + "learning_rate": 4.160756295829805e-06, + "loss": 0.87, + "step": 115940 + }, + { + "epoch": 0.8393233294968403, + "grad_norm": 0.1547369509935379, + "learning_rate": 4.160683909169218e-06, + "loss": 0.8918, + "step": 115950 + }, + { + "epoch": 0.8393957161574265, + "grad_norm": 0.16612297296524048, + "learning_rate": 4.160611522508633e-06, + "loss": 0.8735, + "step": 115960 + }, + { + "epoch": 0.8394681028180127, + "grad_norm": 0.14709250628948212, + "learning_rate": 4.160539135848046e-06, + "loss": 0.8752, + "step": 115970 + }, + { + "epoch": 0.8395404894785989, + "grad_norm": 0.15569816529750824, + "learning_rate": 4.16046674918746e-06, + "loss": 0.8725, + "step": 115980 + }, + { + "epoch": 0.8396128761391851, + "grad_norm": 0.16147053241729736, + "learning_rate": 4.160394362526874e-06, + "loss": 0.8772, + "step": 115990 + }, + { + "epoch": 0.8396852627997713, + "grad_norm": 0.15154972672462463, + "learning_rate": 4.160321975866288e-06, + "loss": 0.881, + "step": 116000 + }, + { + "epoch": 0.8397576494603575, + "grad_norm": 0.15269294381141663, + "learning_rate": 4.160249589205702e-06, + "loss": 0.883, + "step": 116010 + }, + { + "epoch": 0.8398300361209436, + "grad_norm": 0.17886610329151154, + "learning_rate": 4.160177202545115e-06, + "loss": 0.8735, + "step": 116020 + }, + { + "epoch": 0.8399024227815298, + "grad_norm": 0.15143749117851257, + "learning_rate": 4.160104815884529e-06, + "loss": 0.8752, + "step": 116030 + }, + { + "epoch": 0.839974809442116, + "grad_norm": 0.15021578967571259, + "learning_rate": 4.1600324292239434e-06, + "loss": 0.8707, + "step": 116040 + }, + { + "epoch": 0.8400471961027022, + "grad_norm": 0.15974397957324982, + "learning_rate": 4.159960042563357e-06, + "loss": 0.8743, + "step": 116050 + }, + { + "epoch": 0.8401195827632884, + "grad_norm": 0.15328460931777954, + "learning_rate": 4.159887655902771e-06, + "loss": 0.878, + "step": 116060 + }, + { + "epoch": 0.8401919694238745, + "grad_norm": 0.16074135899543762, + "learning_rate": 4.159815269242184e-06, + "loss": 0.8775, + "step": 116070 + }, + { + "epoch": 0.8402643560844607, + "grad_norm": 0.14766909182071686, + "learning_rate": 4.159742882581598e-06, + "loss": 0.8734, + "step": 116080 + }, + { + "epoch": 0.840336742745047, + "grad_norm": 0.1620090901851654, + "learning_rate": 4.159670495921012e-06, + "loss": 0.884, + "step": 116090 + }, + { + "epoch": 0.8404091294056332, + "grad_norm": 0.1521306186914444, + "learning_rate": 4.159598109260426e-06, + "loss": 0.8904, + "step": 116100 + }, + { + "epoch": 0.8404815160662193, + "grad_norm": 0.15059085190296173, + "learning_rate": 4.15952572259984e-06, + "loss": 0.8783, + "step": 116110 + }, + { + "epoch": 0.8405539027268055, + "grad_norm": 0.159867063164711, + "learning_rate": 4.159453335939253e-06, + "loss": 0.8777, + "step": 116120 + }, + { + "epoch": 0.8406262893873917, + "grad_norm": 0.15567173063755035, + "learning_rate": 4.159380949278668e-06, + "loss": 0.8751, + "step": 116130 + }, + { + "epoch": 0.8406986760479779, + "grad_norm": 0.16036923229694366, + "learning_rate": 4.159308562618081e-06, + "loss": 0.8946, + "step": 116140 + }, + { + "epoch": 0.840771062708564, + "grad_norm": 0.151793971657753, + "learning_rate": 4.159236175957495e-06, + "loss": 0.8785, + "step": 116150 + }, + { + "epoch": 0.8408434493691502, + "grad_norm": 0.17096100747585297, + "learning_rate": 4.1591637892969085e-06, + "loss": 0.8795, + "step": 116160 + }, + { + "epoch": 0.8409158360297364, + "grad_norm": 0.1503993719816208, + "learning_rate": 4.159091402636323e-06, + "loss": 0.8877, + "step": 116170 + }, + { + "epoch": 0.8409882226903226, + "grad_norm": 0.1676565408706665, + "learning_rate": 4.159019015975737e-06, + "loss": 0.8669, + "step": 116180 + }, + { + "epoch": 0.8410606093509088, + "grad_norm": 0.14891086518764496, + "learning_rate": 4.15894662931515e-06, + "loss": 0.885, + "step": 116190 + }, + { + "epoch": 0.841132996011495, + "grad_norm": 0.15103653073310852, + "learning_rate": 4.158874242654564e-06, + "loss": 0.8782, + "step": 116200 + }, + { + "epoch": 0.8412053826720812, + "grad_norm": 0.15480820834636688, + "learning_rate": 4.158801855993978e-06, + "loss": 0.8741, + "step": 116210 + }, + { + "epoch": 0.8412777693326674, + "grad_norm": 0.17282061278820038, + "learning_rate": 4.158729469333392e-06, + "loss": 0.8843, + "step": 116220 + }, + { + "epoch": 0.8413501559932536, + "grad_norm": 0.15457768738269806, + "learning_rate": 4.1586570826728055e-06, + "loss": 0.8808, + "step": 116230 + }, + { + "epoch": 0.8414225426538398, + "grad_norm": 0.16778331995010376, + "learning_rate": 4.158584696012219e-06, + "loss": 0.8677, + "step": 116240 + }, + { + "epoch": 0.8414949293144259, + "grad_norm": 0.15565545856952667, + "learning_rate": 4.158512309351633e-06, + "loss": 0.8793, + "step": 116250 + }, + { + "epoch": 0.8415673159750121, + "grad_norm": 0.14847850799560547, + "learning_rate": 4.158439922691046e-06, + "loss": 0.8677, + "step": 116260 + }, + { + "epoch": 0.8416397026355983, + "grad_norm": 0.15020453929901123, + "learning_rate": 4.15836753603046e-06, + "loss": 0.8863, + "step": 116270 + }, + { + "epoch": 0.8417120892961845, + "grad_norm": 0.16893228888511658, + "learning_rate": 4.1582951493698745e-06, + "loss": 0.8815, + "step": 116280 + }, + { + "epoch": 0.8417844759567706, + "grad_norm": 0.17535464465618134, + "learning_rate": 4.158222762709288e-06, + "loss": 0.8735, + "step": 116290 + }, + { + "epoch": 0.8418568626173569, + "grad_norm": 0.15316392481327057, + "learning_rate": 4.158150376048702e-06, + "loss": 0.876, + "step": 116300 + }, + { + "epoch": 0.8419292492779431, + "grad_norm": 0.16685836017131805, + "learning_rate": 4.158077989388115e-06, + "loss": 0.873, + "step": 116310 + }, + { + "epoch": 0.8420016359385293, + "grad_norm": 0.16232609748840332, + "learning_rate": 4.15800560272753e-06, + "loss": 0.8792, + "step": 116320 + }, + { + "epoch": 0.8420740225991155, + "grad_norm": 0.1534125953912735, + "learning_rate": 4.157933216066943e-06, + "loss": 0.8802, + "step": 116330 + }, + { + "epoch": 0.8421464092597016, + "grad_norm": 0.16465212404727936, + "learning_rate": 4.157860829406357e-06, + "loss": 0.8792, + "step": 116340 + }, + { + "epoch": 0.8422187959202878, + "grad_norm": 0.1542976200580597, + "learning_rate": 4.157788442745771e-06, + "loss": 0.8803, + "step": 116350 + }, + { + "epoch": 0.842291182580874, + "grad_norm": 0.1525125801563263, + "learning_rate": 4.157716056085185e-06, + "loss": 0.8787, + "step": 116360 + }, + { + "epoch": 0.8423635692414602, + "grad_norm": 0.16965779662132263, + "learning_rate": 4.157643669424599e-06, + "loss": 0.8799, + "step": 116370 + }, + { + "epoch": 0.8424359559020463, + "grad_norm": 0.14481857419013977, + "learning_rate": 4.157571282764012e-06, + "loss": 0.8803, + "step": 116380 + }, + { + "epoch": 0.8425083425626325, + "grad_norm": 0.15240401029586792, + "learning_rate": 4.157498896103426e-06, + "loss": 0.8813, + "step": 116390 + }, + { + "epoch": 0.8425807292232187, + "grad_norm": 0.17337167263031006, + "learning_rate": 4.15742650944284e-06, + "loss": 0.8711, + "step": 116400 + }, + { + "epoch": 0.842653115883805, + "grad_norm": 0.14756740629673004, + "learning_rate": 4.157354122782254e-06, + "loss": 0.8615, + "step": 116410 + }, + { + "epoch": 0.8427255025443912, + "grad_norm": 0.16462619602680206, + "learning_rate": 4.157281736121668e-06, + "loss": 0.8717, + "step": 116420 + }, + { + "epoch": 0.8427978892049773, + "grad_norm": 0.1649799644947052, + "learning_rate": 4.157209349461081e-06, + "loss": 0.8778, + "step": 116430 + }, + { + "epoch": 0.8428702758655635, + "grad_norm": 0.1735529750585556, + "learning_rate": 4.157136962800496e-06, + "loss": 0.8802, + "step": 116440 + }, + { + "epoch": 0.8429426625261497, + "grad_norm": 0.19218334555625916, + "learning_rate": 4.157064576139909e-06, + "loss": 0.8801, + "step": 116450 + }, + { + "epoch": 0.8430150491867359, + "grad_norm": 0.15204769372940063, + "learning_rate": 4.156992189479323e-06, + "loss": 0.8728, + "step": 116460 + }, + { + "epoch": 0.843087435847322, + "grad_norm": 0.15448568761348724, + "learning_rate": 4.1569198028187365e-06, + "loss": 0.8687, + "step": 116470 + }, + { + "epoch": 0.8431598225079082, + "grad_norm": 0.15800927579402924, + "learning_rate": 4.156847416158151e-06, + "loss": 0.8742, + "step": 116480 + }, + { + "epoch": 0.8432322091684944, + "grad_norm": 0.1503394991159439, + "learning_rate": 4.156775029497565e-06, + "loss": 0.875, + "step": 116490 + }, + { + "epoch": 0.8433045958290806, + "grad_norm": 0.5347150564193726, + "learning_rate": 4.156702642836978e-06, + "loss": 0.8686, + "step": 116500 + }, + { + "epoch": 0.8433769824896667, + "grad_norm": 0.1692754626274109, + "learning_rate": 4.156630256176392e-06, + "loss": 0.8775, + "step": 116510 + }, + { + "epoch": 0.843449369150253, + "grad_norm": 0.15633852779865265, + "learning_rate": 4.156557869515806e-06, + "loss": 0.8743, + "step": 116520 + }, + { + "epoch": 0.8435217558108392, + "grad_norm": 0.15997891128063202, + "learning_rate": 4.15648548285522e-06, + "loss": 0.8746, + "step": 116530 + }, + { + "epoch": 0.8435941424714254, + "grad_norm": 0.15815088152885437, + "learning_rate": 4.1564130961946336e-06, + "loss": 0.8701, + "step": 116540 + }, + { + "epoch": 0.8436665291320116, + "grad_norm": 0.1581558734178543, + "learning_rate": 4.156340709534047e-06, + "loss": 0.8803, + "step": 116550 + }, + { + "epoch": 0.8437389157925977, + "grad_norm": 0.15266630053520203, + "learning_rate": 4.156268322873462e-06, + "loss": 0.8638, + "step": 116560 + }, + { + "epoch": 0.8438113024531839, + "grad_norm": 0.15142551064491272, + "learning_rate": 4.156195936212875e-06, + "loss": 0.8699, + "step": 116570 + }, + { + "epoch": 0.8438836891137701, + "grad_norm": 0.1561920940876007, + "learning_rate": 4.156123549552289e-06, + "loss": 0.8756, + "step": 116580 + }, + { + "epoch": 0.8439560757743563, + "grad_norm": 0.16398534178733826, + "learning_rate": 4.1560511628917025e-06, + "loss": 0.8767, + "step": 116590 + }, + { + "epoch": 0.8440284624349425, + "grad_norm": 0.1780395209789276, + "learning_rate": 4.155978776231117e-06, + "loss": 0.8656, + "step": 116600 + }, + { + "epoch": 0.8441008490955286, + "grad_norm": 0.16467039287090302, + "learning_rate": 4.1559063895705306e-06, + "loss": 0.8734, + "step": 116610 + }, + { + "epoch": 0.8441732357561149, + "grad_norm": 0.16053692996501923, + "learning_rate": 4.155834002909944e-06, + "loss": 0.8777, + "step": 116620 + }, + { + "epoch": 0.8442456224167011, + "grad_norm": 0.15318801999092102, + "learning_rate": 4.155761616249358e-06, + "loss": 0.8797, + "step": 116630 + }, + { + "epoch": 0.8443180090772873, + "grad_norm": 0.1649479866027832, + "learning_rate": 4.155689229588772e-06, + "loss": 0.886, + "step": 116640 + }, + { + "epoch": 0.8443903957378734, + "grad_norm": 0.15145239233970642, + "learning_rate": 4.155616842928186e-06, + "loss": 0.8599, + "step": 116650 + }, + { + "epoch": 0.8444627823984596, + "grad_norm": 0.15609456598758698, + "learning_rate": 4.1555444562675995e-06, + "loss": 0.8787, + "step": 116660 + }, + { + "epoch": 0.8445351690590458, + "grad_norm": 0.14667485654354095, + "learning_rate": 4.155472069607013e-06, + "loss": 0.8736, + "step": 116670 + }, + { + "epoch": 0.844607555719632, + "grad_norm": 0.1538429707288742, + "learning_rate": 4.1553996829464276e-06, + "loss": 0.8859, + "step": 116680 + }, + { + "epoch": 0.8446799423802182, + "grad_norm": 0.1470920741558075, + "learning_rate": 4.155327296285841e-06, + "loss": 0.8801, + "step": 116690 + }, + { + "epoch": 0.8447523290408043, + "grad_norm": 0.2034630924463272, + "learning_rate": 4.155254909625255e-06, + "loss": 0.8741, + "step": 116700 + }, + { + "epoch": 0.8448247157013905, + "grad_norm": 0.14927048981189728, + "learning_rate": 4.155182522964668e-06, + "loss": 0.876, + "step": 116710 + }, + { + "epoch": 0.8448971023619767, + "grad_norm": 0.16222061216831207, + "learning_rate": 4.155110136304082e-06, + "loss": 0.8748, + "step": 116720 + }, + { + "epoch": 0.844969489022563, + "grad_norm": 0.15882587432861328, + "learning_rate": 4.1550377496434965e-06, + "loss": 0.885, + "step": 116730 + }, + { + "epoch": 0.8450418756831491, + "grad_norm": 0.1530759185552597, + "learning_rate": 4.15496536298291e-06, + "loss": 0.8758, + "step": 116740 + }, + { + "epoch": 0.8451142623437353, + "grad_norm": 0.15886208415031433, + "learning_rate": 4.154892976322324e-06, + "loss": 0.8756, + "step": 116750 + }, + { + "epoch": 0.8451866490043215, + "grad_norm": 0.1658525913953781, + "learning_rate": 4.154820589661737e-06, + "loss": 0.8642, + "step": 116760 + }, + { + "epoch": 0.8452590356649077, + "grad_norm": 0.15931586921215057, + "learning_rate": 4.154748203001152e-06, + "loss": 0.8726, + "step": 116770 + }, + { + "epoch": 0.8453314223254939, + "grad_norm": 0.16602514684200287, + "learning_rate": 4.154675816340565e-06, + "loss": 0.8694, + "step": 116780 + }, + { + "epoch": 0.84540380898608, + "grad_norm": 0.14427199959754944, + "learning_rate": 4.154603429679979e-06, + "loss": 0.8666, + "step": 116790 + }, + { + "epoch": 0.8454761956466662, + "grad_norm": 0.15520738065242767, + "learning_rate": 4.154531043019393e-06, + "loss": 0.8778, + "step": 116800 + }, + { + "epoch": 0.8455485823072524, + "grad_norm": 0.15710857510566711, + "learning_rate": 4.154458656358806e-06, + "loss": 0.8681, + "step": 116810 + }, + { + "epoch": 0.8456209689678386, + "grad_norm": 0.1453404724597931, + "learning_rate": 4.15438626969822e-06, + "loss": 0.8734, + "step": 116820 + }, + { + "epoch": 0.8456933556284248, + "grad_norm": 0.16362370550632477, + "learning_rate": 4.154313883037634e-06, + "loss": 0.8842, + "step": 116830 + }, + { + "epoch": 0.845765742289011, + "grad_norm": 0.14579138159751892, + "learning_rate": 4.154241496377048e-06, + "loss": 0.8635, + "step": 116840 + }, + { + "epoch": 0.8458381289495972, + "grad_norm": 0.1520829200744629, + "learning_rate": 4.154169109716462e-06, + "loss": 0.8801, + "step": 116850 + }, + { + "epoch": 0.8459105156101834, + "grad_norm": 0.1613858938217163, + "learning_rate": 4.154096723055875e-06, + "loss": 0.8858, + "step": 116860 + }, + { + "epoch": 0.8459829022707696, + "grad_norm": 0.14903327822685242, + "learning_rate": 4.154024336395289e-06, + "loss": 0.8801, + "step": 116870 + }, + { + "epoch": 0.8460552889313557, + "grad_norm": 0.2931915819644928, + "learning_rate": 4.153951949734703e-06, + "loss": 0.8663, + "step": 116880 + }, + { + "epoch": 0.8461276755919419, + "grad_norm": 0.1518881767988205, + "learning_rate": 4.153879563074117e-06, + "loss": 0.8729, + "step": 116890 + }, + { + "epoch": 0.8462000622525281, + "grad_norm": 0.15249276161193848, + "learning_rate": 4.1538071764135305e-06, + "loss": 0.8809, + "step": 116900 + }, + { + "epoch": 0.8462724489131143, + "grad_norm": 0.1551447957754135, + "learning_rate": 4.153734789752944e-06, + "loss": 0.8857, + "step": 116910 + }, + { + "epoch": 0.8463448355737004, + "grad_norm": 0.17283110320568085, + "learning_rate": 4.153662403092359e-06, + "loss": 0.8796, + "step": 116920 + }, + { + "epoch": 0.8464172222342866, + "grad_norm": 0.1588117629289627, + "learning_rate": 4.153590016431772e-06, + "loss": 0.8738, + "step": 116930 + }, + { + "epoch": 0.8464896088948729, + "grad_norm": 0.15109075605869293, + "learning_rate": 4.153517629771186e-06, + "loss": 0.8842, + "step": 116940 + }, + { + "epoch": 0.8465619955554591, + "grad_norm": 0.15133486688137054, + "learning_rate": 4.1534452431105994e-06, + "loss": 0.8784, + "step": 116950 + }, + { + "epoch": 0.8466343822160453, + "grad_norm": 0.1452382504940033, + "learning_rate": 4.153372856450014e-06, + "loss": 0.8816, + "step": 116960 + }, + { + "epoch": 0.8467067688766314, + "grad_norm": 0.14603115618228912, + "learning_rate": 4.1533004697894275e-06, + "loss": 0.89, + "step": 116970 + }, + { + "epoch": 0.8467791555372176, + "grad_norm": 0.15353167057037354, + "learning_rate": 4.153228083128841e-06, + "loss": 0.8786, + "step": 116980 + }, + { + "epoch": 0.8468515421978038, + "grad_norm": 0.14847798645496368, + "learning_rate": 4.153155696468255e-06, + "loss": 0.8782, + "step": 116990 + }, + { + "epoch": 0.84692392885839, + "grad_norm": 0.15351562201976776, + "learning_rate": 4.153083309807669e-06, + "loss": 0.8894, + "step": 117000 + }, + { + "epoch": 0.8469963155189761, + "grad_norm": 0.14607466757297516, + "learning_rate": 4.153010923147083e-06, + "loss": 0.8665, + "step": 117010 + }, + { + "epoch": 0.8470687021795623, + "grad_norm": 0.1430024653673172, + "learning_rate": 4.1529385364864964e-06, + "loss": 0.8724, + "step": 117020 + }, + { + "epoch": 0.8471410888401485, + "grad_norm": 0.15325815975666046, + "learning_rate": 4.15286614982591e-06, + "loss": 0.863, + "step": 117030 + }, + { + "epoch": 0.8472134755007347, + "grad_norm": 0.14200498163700104, + "learning_rate": 4.1527937631653245e-06, + "loss": 0.8753, + "step": 117040 + }, + { + "epoch": 0.847285862161321, + "grad_norm": 0.17369510233402252, + "learning_rate": 4.152721376504738e-06, + "loss": 0.8768, + "step": 117050 + }, + { + "epoch": 0.8473582488219071, + "grad_norm": 0.15766370296478271, + "learning_rate": 4.152648989844152e-06, + "loss": 0.877, + "step": 117060 + }, + { + "epoch": 0.8474306354824933, + "grad_norm": 0.16461563110351562, + "learning_rate": 4.152576603183565e-06, + "loss": 0.8692, + "step": 117070 + }, + { + "epoch": 0.8475030221430795, + "grad_norm": 0.1531338095664978, + "learning_rate": 4.15250421652298e-06, + "loss": 0.8897, + "step": 117080 + }, + { + "epoch": 0.8475754088036657, + "grad_norm": 0.15825554728507996, + "learning_rate": 4.1524318298623935e-06, + "loss": 0.8883, + "step": 117090 + }, + { + "epoch": 0.8476477954642518, + "grad_norm": 0.1514403074979782, + "learning_rate": 4.152359443201807e-06, + "loss": 0.886, + "step": 117100 + }, + { + "epoch": 0.847720182124838, + "grad_norm": 0.1458583027124405, + "learning_rate": 4.152287056541221e-06, + "loss": 0.8762, + "step": 117110 + }, + { + "epoch": 0.8477925687854242, + "grad_norm": 0.18490411341190338, + "learning_rate": 4.152214669880635e-06, + "loss": 0.8753, + "step": 117120 + }, + { + "epoch": 0.8478649554460104, + "grad_norm": 0.16968803107738495, + "learning_rate": 4.152142283220049e-06, + "loss": 0.8885, + "step": 117130 + }, + { + "epoch": 0.8479373421065965, + "grad_norm": 0.15850697457790375, + "learning_rate": 4.152069896559462e-06, + "loss": 0.8867, + "step": 117140 + }, + { + "epoch": 0.8480097287671828, + "grad_norm": 0.15073266625404358, + "learning_rate": 4.151997509898876e-06, + "loss": 0.8838, + "step": 117150 + }, + { + "epoch": 0.848082115427769, + "grad_norm": 0.1633184403181076, + "learning_rate": 4.1519251232382905e-06, + "loss": 0.8646, + "step": 117160 + }, + { + "epoch": 0.8481545020883552, + "grad_norm": 0.1570475846529007, + "learning_rate": 4.151852736577704e-06, + "loss": 0.8869, + "step": 117170 + }, + { + "epoch": 0.8482268887489414, + "grad_norm": 0.152681365609169, + "learning_rate": 4.151780349917118e-06, + "loss": 0.874, + "step": 117180 + }, + { + "epoch": 0.8482992754095275, + "grad_norm": 0.15066421031951904, + "learning_rate": 4.151707963256531e-06, + "loss": 0.8739, + "step": 117190 + }, + { + "epoch": 0.8483716620701137, + "grad_norm": 0.14346186816692352, + "learning_rate": 4.151635576595946e-06, + "loss": 0.8754, + "step": 117200 + }, + { + "epoch": 0.8484440487306999, + "grad_norm": 0.14868153631687164, + "learning_rate": 4.151563189935359e-06, + "loss": 0.8744, + "step": 117210 + }, + { + "epoch": 0.8485164353912861, + "grad_norm": 0.16770720481872559, + "learning_rate": 4.151490803274773e-06, + "loss": 0.8894, + "step": 117220 + }, + { + "epoch": 0.8485888220518722, + "grad_norm": 0.15046921372413635, + "learning_rate": 4.151418416614187e-06, + "loss": 0.8617, + "step": 117230 + }, + { + "epoch": 0.8486612087124584, + "grad_norm": 0.596076488494873, + "learning_rate": 4.151346029953601e-06, + "loss": 0.8723, + "step": 117240 + }, + { + "epoch": 0.8487335953730446, + "grad_norm": 0.15184585750102997, + "learning_rate": 4.151273643293015e-06, + "loss": 0.8841, + "step": 117250 + }, + { + "epoch": 0.8488059820336309, + "grad_norm": 0.14896786212921143, + "learning_rate": 4.151201256632428e-06, + "loss": 0.8806, + "step": 117260 + }, + { + "epoch": 0.8488783686942171, + "grad_norm": 0.15315642952919006, + "learning_rate": 4.151128869971842e-06, + "loss": 0.8845, + "step": 117270 + }, + { + "epoch": 0.8489507553548032, + "grad_norm": 0.15430568158626556, + "learning_rate": 4.151056483311256e-06, + "loss": 0.8775, + "step": 117280 + }, + { + "epoch": 0.8490231420153894, + "grad_norm": 0.170336052775383, + "learning_rate": 4.15098409665067e-06, + "loss": 0.8547, + "step": 117290 + }, + { + "epoch": 0.8490955286759756, + "grad_norm": 0.14343459904193878, + "learning_rate": 4.150911709990084e-06, + "loss": 0.873, + "step": 117300 + }, + { + "epoch": 0.8491679153365618, + "grad_norm": 0.20575720071792603, + "learning_rate": 4.150839323329497e-06, + "loss": 0.873, + "step": 117310 + }, + { + "epoch": 0.849240301997148, + "grad_norm": 0.15282517671585083, + "learning_rate": 4.150766936668911e-06, + "loss": 0.8895, + "step": 117320 + }, + { + "epoch": 0.8493126886577341, + "grad_norm": 0.1475241631269455, + "learning_rate": 4.1506945500083245e-06, + "loss": 0.8727, + "step": 117330 + }, + { + "epoch": 0.8493850753183203, + "grad_norm": 0.1757318675518036, + "learning_rate": 4.150622163347738e-06, + "loss": 0.8726, + "step": 117340 + }, + { + "epoch": 0.8494574619789065, + "grad_norm": 0.1519278883934021, + "learning_rate": 4.1505497766871526e-06, + "loss": 0.8614, + "step": 117350 + }, + { + "epoch": 0.8495298486394928, + "grad_norm": 0.14336329698562622, + "learning_rate": 4.150477390026566e-06, + "loss": 0.879, + "step": 117360 + }, + { + "epoch": 0.849602235300079, + "grad_norm": 0.15254177153110504, + "learning_rate": 4.15040500336598e-06, + "loss": 0.8768, + "step": 117370 + }, + { + "epoch": 0.8496746219606651, + "grad_norm": 0.15974296629428864, + "learning_rate": 4.150332616705393e-06, + "loss": 0.8659, + "step": 117380 + }, + { + "epoch": 0.8497470086212513, + "grad_norm": 0.15729989111423492, + "learning_rate": 4.150260230044808e-06, + "loss": 0.8687, + "step": 117390 + }, + { + "epoch": 0.8498193952818375, + "grad_norm": 0.15651953220367432, + "learning_rate": 4.1501878433842215e-06, + "loss": 0.8749, + "step": 117400 + }, + { + "epoch": 0.8498917819424237, + "grad_norm": 0.14588718116283417, + "learning_rate": 4.150115456723635e-06, + "loss": 0.8758, + "step": 117410 + }, + { + "epoch": 0.8499641686030098, + "grad_norm": 0.15261155366897583, + "learning_rate": 4.150043070063049e-06, + "loss": 0.8791, + "step": 117420 + }, + { + "epoch": 0.850036555263596, + "grad_norm": 0.16166460514068604, + "learning_rate": 4.149970683402463e-06, + "loss": 0.8829, + "step": 117430 + }, + { + "epoch": 0.8501089419241822, + "grad_norm": 0.1595904678106308, + "learning_rate": 4.149898296741877e-06, + "loss": 0.8713, + "step": 117440 + }, + { + "epoch": 0.8501813285847684, + "grad_norm": 0.1586706042289734, + "learning_rate": 4.14982591008129e-06, + "loss": 0.8689, + "step": 117450 + }, + { + "epoch": 0.8502537152453545, + "grad_norm": 0.17200522124767303, + "learning_rate": 4.149753523420704e-06, + "loss": 0.88, + "step": 117460 + }, + { + "epoch": 0.8503261019059408, + "grad_norm": 0.17994257807731628, + "learning_rate": 4.1496811367601185e-06, + "loss": 0.8885, + "step": 117470 + }, + { + "epoch": 0.850398488566527, + "grad_norm": 0.1691487431526184, + "learning_rate": 4.149608750099532e-06, + "loss": 0.8759, + "step": 117480 + }, + { + "epoch": 0.8504708752271132, + "grad_norm": 0.1492878645658493, + "learning_rate": 4.149536363438946e-06, + "loss": 0.8862, + "step": 117490 + }, + { + "epoch": 0.8505432618876994, + "grad_norm": 0.15258543193340302, + "learning_rate": 4.149463976778359e-06, + "loss": 0.8684, + "step": 117500 + }, + { + "epoch": 0.8506156485482855, + "grad_norm": 0.14704549312591553, + "learning_rate": 4.149391590117773e-06, + "loss": 0.8682, + "step": 117510 + }, + { + "epoch": 0.8506880352088717, + "grad_norm": 0.1570146381855011, + "learning_rate": 4.1493192034571874e-06, + "loss": 0.8638, + "step": 117520 + }, + { + "epoch": 0.8507604218694579, + "grad_norm": 0.1731637567281723, + "learning_rate": 4.149246816796601e-06, + "loss": 0.8875, + "step": 117530 + }, + { + "epoch": 0.8508328085300441, + "grad_norm": 0.15617217123508453, + "learning_rate": 4.149174430136015e-06, + "loss": 0.8981, + "step": 117540 + }, + { + "epoch": 0.8509051951906302, + "grad_norm": 0.15860138833522797, + "learning_rate": 4.149102043475428e-06, + "loss": 0.8783, + "step": 117550 + }, + { + "epoch": 0.8509775818512164, + "grad_norm": 0.15152351558208466, + "learning_rate": 4.149029656814843e-06, + "loss": 0.8784, + "step": 117560 + }, + { + "epoch": 0.8510499685118026, + "grad_norm": 0.15892478823661804, + "learning_rate": 4.148957270154256e-06, + "loss": 0.8943, + "step": 117570 + }, + { + "epoch": 0.8511223551723889, + "grad_norm": 0.13866350054740906, + "learning_rate": 4.14888488349367e-06, + "loss": 0.8671, + "step": 117580 + }, + { + "epoch": 0.851194741832975, + "grad_norm": 0.22664818167686462, + "learning_rate": 4.148812496833084e-06, + "loss": 0.87, + "step": 117590 + }, + { + "epoch": 0.8512671284935612, + "grad_norm": 0.1927105337381363, + "learning_rate": 4.148740110172498e-06, + "loss": 0.8713, + "step": 117600 + }, + { + "epoch": 0.8513395151541474, + "grad_norm": 0.19467182457447052, + "learning_rate": 4.148667723511912e-06, + "loss": 0.879, + "step": 117610 + }, + { + "epoch": 0.8514119018147336, + "grad_norm": 0.1516137421131134, + "learning_rate": 4.148595336851325e-06, + "loss": 0.8693, + "step": 117620 + }, + { + "epoch": 0.8514842884753198, + "grad_norm": 0.15127213299274445, + "learning_rate": 4.148522950190739e-06, + "loss": 0.8648, + "step": 117630 + }, + { + "epoch": 0.8515566751359059, + "grad_norm": 0.16966821253299713, + "learning_rate": 4.148450563530153e-06, + "loss": 0.8681, + "step": 117640 + }, + { + "epoch": 0.8516290617964921, + "grad_norm": 0.17517682909965515, + "learning_rate": 4.148378176869567e-06, + "loss": 0.8782, + "step": 117650 + }, + { + "epoch": 0.8517014484570783, + "grad_norm": 0.154166117310524, + "learning_rate": 4.148305790208981e-06, + "loss": 0.8838, + "step": 117660 + }, + { + "epoch": 0.8517738351176645, + "grad_norm": 0.16335949301719666, + "learning_rate": 4.148233403548394e-06, + "loss": 0.88, + "step": 117670 + }, + { + "epoch": 0.8518462217782508, + "grad_norm": 0.15556417405605316, + "learning_rate": 4.148161016887809e-06, + "loss": 0.8693, + "step": 117680 + }, + { + "epoch": 0.8519186084388369, + "grad_norm": 0.14646877348423004, + "learning_rate": 4.148088630227222e-06, + "loss": 0.8817, + "step": 117690 + }, + { + "epoch": 0.8519909950994231, + "grad_norm": 0.15498358011245728, + "learning_rate": 4.148016243566636e-06, + "loss": 0.875, + "step": 117700 + }, + { + "epoch": 0.8520633817600093, + "grad_norm": 0.17575348913669586, + "learning_rate": 4.1479438569060495e-06, + "loss": 0.8792, + "step": 117710 + }, + { + "epoch": 0.8521357684205955, + "grad_norm": 0.14134739339351654, + "learning_rate": 4.147871470245464e-06, + "loss": 0.8778, + "step": 117720 + }, + { + "epoch": 0.8522081550811816, + "grad_norm": 0.18208590149879456, + "learning_rate": 4.147799083584878e-06, + "loss": 0.8799, + "step": 117730 + }, + { + "epoch": 0.8522805417417678, + "grad_norm": 0.15532927215099335, + "learning_rate": 4.147726696924291e-06, + "loss": 0.8852, + "step": 117740 + }, + { + "epoch": 0.852352928402354, + "grad_norm": 0.15121373534202576, + "learning_rate": 4.147654310263705e-06, + "loss": 0.8639, + "step": 117750 + }, + { + "epoch": 0.8524253150629402, + "grad_norm": 0.14457499980926514, + "learning_rate": 4.147581923603119e-06, + "loss": 0.8829, + "step": 117760 + }, + { + "epoch": 0.8524977017235263, + "grad_norm": 0.1544269621372223, + "learning_rate": 4.147509536942533e-06, + "loss": 0.8777, + "step": 117770 + }, + { + "epoch": 0.8525700883841125, + "grad_norm": 0.15642063319683075, + "learning_rate": 4.1474371502819465e-06, + "loss": 0.879, + "step": 117780 + }, + { + "epoch": 0.8526424750446988, + "grad_norm": 0.3743261694908142, + "learning_rate": 4.14736476362136e-06, + "loss": 0.8811, + "step": 117790 + }, + { + "epoch": 0.852714861705285, + "grad_norm": 0.14951781928539276, + "learning_rate": 4.147292376960775e-06, + "loss": 0.8695, + "step": 117800 + }, + { + "epoch": 0.8527872483658712, + "grad_norm": 0.153753861784935, + "learning_rate": 4.147219990300188e-06, + "loss": 0.8706, + "step": 117810 + }, + { + "epoch": 0.8528596350264573, + "grad_norm": 0.15326263010501862, + "learning_rate": 4.147147603639602e-06, + "loss": 0.8746, + "step": 117820 + }, + { + "epoch": 0.8529320216870435, + "grad_norm": 0.16046510636806488, + "learning_rate": 4.1470752169790155e-06, + "loss": 0.8765, + "step": 117830 + }, + { + "epoch": 0.8530044083476297, + "grad_norm": 0.15566720068454742, + "learning_rate": 4.147002830318429e-06, + "loss": 0.8806, + "step": 117840 + }, + { + "epoch": 0.8530767950082159, + "grad_norm": 0.16101893782615662, + "learning_rate": 4.146930443657843e-06, + "loss": 0.8745, + "step": 117850 + }, + { + "epoch": 0.853149181668802, + "grad_norm": 0.15617725253105164, + "learning_rate": 4.146858056997256e-06, + "loss": 0.8773, + "step": 117860 + }, + { + "epoch": 0.8532215683293882, + "grad_norm": 0.19228564202785492, + "learning_rate": 4.146785670336671e-06, + "loss": 0.8827, + "step": 117870 + }, + { + "epoch": 0.8532939549899744, + "grad_norm": 0.17179349064826965, + "learning_rate": 4.146713283676084e-06, + "loss": 0.8657, + "step": 117880 + }, + { + "epoch": 0.8533663416505607, + "grad_norm": 0.1750807911157608, + "learning_rate": 4.146640897015498e-06, + "loss": 0.8613, + "step": 117890 + }, + { + "epoch": 0.8534387283111469, + "grad_norm": 0.17412197589874268, + "learning_rate": 4.146568510354912e-06, + "loss": 0.8641, + "step": 117900 + }, + { + "epoch": 0.853511114971733, + "grad_norm": 0.154750794172287, + "learning_rate": 4.146496123694326e-06, + "loss": 0.8799, + "step": 117910 + }, + { + "epoch": 0.8535835016323192, + "grad_norm": 0.17946046590805054, + "learning_rate": 4.14642373703374e-06, + "loss": 0.8661, + "step": 117920 + }, + { + "epoch": 0.8536558882929054, + "grad_norm": 0.17114438116550446, + "learning_rate": 4.146351350373153e-06, + "loss": 0.8755, + "step": 117930 + }, + { + "epoch": 0.8537282749534916, + "grad_norm": 0.15185768902301788, + "learning_rate": 4.146278963712567e-06, + "loss": 0.8746, + "step": 117940 + }, + { + "epoch": 0.8538006616140777, + "grad_norm": 0.17097008228302002, + "learning_rate": 4.146206577051981e-06, + "loss": 0.8732, + "step": 117950 + }, + { + "epoch": 0.8538730482746639, + "grad_norm": 0.154009148478508, + "learning_rate": 4.146134190391395e-06, + "loss": 0.8758, + "step": 117960 + }, + { + "epoch": 0.8539454349352501, + "grad_norm": 0.15704505145549774, + "learning_rate": 4.146061803730809e-06, + "loss": 0.8888, + "step": 117970 + }, + { + "epoch": 0.8540178215958363, + "grad_norm": 0.19270089268684387, + "learning_rate": 4.145989417070222e-06, + "loss": 0.8657, + "step": 117980 + }, + { + "epoch": 0.8540902082564225, + "grad_norm": 0.16790702939033508, + "learning_rate": 4.145917030409637e-06, + "loss": 0.8659, + "step": 117990 + }, + { + "epoch": 0.8541625949170087, + "grad_norm": 0.15922467410564423, + "learning_rate": 4.14584464374905e-06, + "loss": 0.8762, + "step": 118000 + }, + { + "epoch": 0.8542349815775949, + "grad_norm": 0.14866970479488373, + "learning_rate": 4.145772257088464e-06, + "loss": 0.881, + "step": 118010 + }, + { + "epoch": 0.8543073682381811, + "grad_norm": 0.15993216633796692, + "learning_rate": 4.1456998704278776e-06, + "loss": 0.8748, + "step": 118020 + }, + { + "epoch": 0.8543797548987673, + "grad_norm": 0.1770089566707611, + "learning_rate": 4.145627483767292e-06, + "loss": 0.8689, + "step": 118030 + }, + { + "epoch": 0.8544521415593535, + "grad_norm": 0.1457049697637558, + "learning_rate": 4.145555097106706e-06, + "loss": 0.8777, + "step": 118040 + }, + { + "epoch": 0.8545245282199396, + "grad_norm": 0.1655452698469162, + "learning_rate": 4.145482710446119e-06, + "loss": 0.8742, + "step": 118050 + }, + { + "epoch": 0.8545969148805258, + "grad_norm": 0.15786688029766083, + "learning_rate": 4.145410323785533e-06, + "loss": 0.8852, + "step": 118060 + }, + { + "epoch": 0.854669301541112, + "grad_norm": 0.14149673283100128, + "learning_rate": 4.145337937124947e-06, + "loss": 0.8803, + "step": 118070 + }, + { + "epoch": 0.8547416882016982, + "grad_norm": 0.19031822681427002, + "learning_rate": 4.145265550464361e-06, + "loss": 0.8798, + "step": 118080 + }, + { + "epoch": 0.8548140748622843, + "grad_norm": 0.1616319864988327, + "learning_rate": 4.1451931638037746e-06, + "loss": 0.8837, + "step": 118090 + }, + { + "epoch": 0.8548864615228705, + "grad_norm": 0.16045629978179932, + "learning_rate": 4.145120777143188e-06, + "loss": 0.8746, + "step": 118100 + }, + { + "epoch": 0.8549588481834568, + "grad_norm": 0.14599646627902985, + "learning_rate": 4.145048390482602e-06, + "loss": 0.878, + "step": 118110 + }, + { + "epoch": 0.855031234844043, + "grad_norm": 0.1440078318119049, + "learning_rate": 4.144976003822016e-06, + "loss": 0.8766, + "step": 118120 + }, + { + "epoch": 0.8551036215046292, + "grad_norm": 0.15769658982753754, + "learning_rate": 4.14490361716143e-06, + "loss": 0.8753, + "step": 118130 + }, + { + "epoch": 0.8551760081652153, + "grad_norm": 0.15050294995307922, + "learning_rate": 4.1448312305008435e-06, + "loss": 0.8737, + "step": 118140 + }, + { + "epoch": 0.8552483948258015, + "grad_norm": 0.15845508873462677, + "learning_rate": 4.144758843840257e-06, + "loss": 0.8665, + "step": 118150 + }, + { + "epoch": 0.8553207814863877, + "grad_norm": 0.16100691258907318, + "learning_rate": 4.1446864571796716e-06, + "loss": 0.8818, + "step": 118160 + }, + { + "epoch": 0.8553931681469739, + "grad_norm": 0.1659129559993744, + "learning_rate": 4.144614070519085e-06, + "loss": 0.874, + "step": 118170 + }, + { + "epoch": 0.85546555480756, + "grad_norm": 0.15678264200687408, + "learning_rate": 4.144541683858499e-06, + "loss": 0.8712, + "step": 118180 + }, + { + "epoch": 0.8555379414681462, + "grad_norm": 0.16013620793819427, + "learning_rate": 4.144469297197912e-06, + "loss": 0.8695, + "step": 118190 + }, + { + "epoch": 0.8556103281287324, + "grad_norm": 0.16265377402305603, + "learning_rate": 4.144396910537327e-06, + "loss": 0.8779, + "step": 118200 + }, + { + "epoch": 0.8556827147893187, + "grad_norm": 0.15654130280017853, + "learning_rate": 4.1443245238767405e-06, + "loss": 0.8683, + "step": 118210 + }, + { + "epoch": 0.8557551014499049, + "grad_norm": 0.17270372807979584, + "learning_rate": 4.144252137216154e-06, + "loss": 0.8814, + "step": 118220 + }, + { + "epoch": 0.855827488110491, + "grad_norm": 0.15407659113407135, + "learning_rate": 4.144179750555568e-06, + "loss": 0.8727, + "step": 118230 + }, + { + "epoch": 0.8558998747710772, + "grad_norm": 0.14619654417037964, + "learning_rate": 4.144107363894982e-06, + "loss": 0.8825, + "step": 118240 + }, + { + "epoch": 0.8559722614316634, + "grad_norm": 0.15296125411987305, + "learning_rate": 4.144034977234396e-06, + "loss": 0.8842, + "step": 118250 + }, + { + "epoch": 0.8560446480922496, + "grad_norm": 0.14285480976104736, + "learning_rate": 4.143962590573809e-06, + "loss": 0.8817, + "step": 118260 + }, + { + "epoch": 0.8561170347528357, + "grad_norm": 0.15438437461853027, + "learning_rate": 4.143890203913223e-06, + "loss": 0.8721, + "step": 118270 + }, + { + "epoch": 0.8561894214134219, + "grad_norm": 0.14615854620933533, + "learning_rate": 4.1438178172526375e-06, + "loss": 0.8703, + "step": 118280 + }, + { + "epoch": 0.8562618080740081, + "grad_norm": 0.1698932945728302, + "learning_rate": 4.143745430592051e-06, + "loss": 0.8623, + "step": 118290 + }, + { + "epoch": 0.8563341947345943, + "grad_norm": 0.14447180926799774, + "learning_rate": 4.143673043931465e-06, + "loss": 0.8622, + "step": 118300 + }, + { + "epoch": 0.8564065813951804, + "grad_norm": 0.15869450569152832, + "learning_rate": 4.143600657270878e-06, + "loss": 0.8625, + "step": 118310 + }, + { + "epoch": 0.8564789680557667, + "grad_norm": 0.16322019696235657, + "learning_rate": 4.143528270610293e-06, + "loss": 0.8714, + "step": 118320 + }, + { + "epoch": 0.8565513547163529, + "grad_norm": 0.15476880967617035, + "learning_rate": 4.1434558839497064e-06, + "loss": 0.8786, + "step": 118330 + }, + { + "epoch": 0.8566237413769391, + "grad_norm": 0.17096053063869476, + "learning_rate": 4.14338349728912e-06, + "loss": 0.8831, + "step": 118340 + }, + { + "epoch": 0.8566961280375253, + "grad_norm": 0.15711814165115356, + "learning_rate": 4.143311110628534e-06, + "loss": 0.8487, + "step": 118350 + }, + { + "epoch": 0.8567685146981114, + "grad_norm": 0.15190854668617249, + "learning_rate": 4.143238723967948e-06, + "loss": 0.872, + "step": 118360 + }, + { + "epoch": 0.8568409013586976, + "grad_norm": 0.15767568349838257, + "learning_rate": 4.143166337307361e-06, + "loss": 0.8787, + "step": 118370 + }, + { + "epoch": 0.8569132880192838, + "grad_norm": 0.13986888527870178, + "learning_rate": 4.1430939506467745e-06, + "loss": 0.8605, + "step": 118380 + }, + { + "epoch": 0.85698567467987, + "grad_norm": 0.1628597527742386, + "learning_rate": 4.143021563986189e-06, + "loss": 0.8824, + "step": 118390 + }, + { + "epoch": 0.8570580613404561, + "grad_norm": 0.14581960439682007, + "learning_rate": 4.142949177325603e-06, + "loss": 0.8637, + "step": 118400 + }, + { + "epoch": 0.8571304480010423, + "grad_norm": 0.15314272046089172, + "learning_rate": 4.142876790665016e-06, + "loss": 0.871, + "step": 118410 + }, + { + "epoch": 0.8572028346616286, + "grad_norm": 0.13960693776607513, + "learning_rate": 4.14280440400443e-06, + "loss": 0.8791, + "step": 118420 + }, + { + "epoch": 0.8572752213222148, + "grad_norm": 0.31276777386665344, + "learning_rate": 4.142732017343844e-06, + "loss": 0.8804, + "step": 118430 + }, + { + "epoch": 0.857347607982801, + "grad_norm": 0.15629543364048004, + "learning_rate": 4.142659630683258e-06, + "loss": 0.8891, + "step": 118440 + }, + { + "epoch": 0.8574199946433871, + "grad_norm": 0.15342466533184052, + "learning_rate": 4.1425872440226715e-06, + "loss": 0.883, + "step": 118450 + }, + { + "epoch": 0.8574923813039733, + "grad_norm": 0.14505906403064728, + "learning_rate": 4.142514857362085e-06, + "loss": 0.8773, + "step": 118460 + }, + { + "epoch": 0.8575647679645595, + "grad_norm": 0.17075826227664948, + "learning_rate": 4.1424424707015e-06, + "loss": 0.8876, + "step": 118470 + }, + { + "epoch": 0.8576371546251457, + "grad_norm": 0.14717842638492584, + "learning_rate": 4.142370084040913e-06, + "loss": 0.875, + "step": 118480 + }, + { + "epoch": 0.8577095412857318, + "grad_norm": 0.1587383896112442, + "learning_rate": 4.142297697380327e-06, + "loss": 0.878, + "step": 118490 + }, + { + "epoch": 0.857781927946318, + "grad_norm": 0.15151575207710266, + "learning_rate": 4.1422253107197404e-06, + "loss": 0.8881, + "step": 118500 + }, + { + "epoch": 0.8578543146069042, + "grad_norm": 0.17024588584899902, + "learning_rate": 4.142152924059155e-06, + "loss": 0.8746, + "step": 118510 + }, + { + "epoch": 0.8579267012674904, + "grad_norm": 0.16007643938064575, + "learning_rate": 4.1420805373985685e-06, + "loss": 0.8731, + "step": 118520 + }, + { + "epoch": 0.8579990879280767, + "grad_norm": 0.1677238941192627, + "learning_rate": 4.142008150737982e-06, + "loss": 0.87, + "step": 118530 + }, + { + "epoch": 0.8580714745886628, + "grad_norm": 0.1518600583076477, + "learning_rate": 4.141935764077396e-06, + "loss": 0.8786, + "step": 118540 + }, + { + "epoch": 0.858143861249249, + "grad_norm": 0.15245488286018372, + "learning_rate": 4.14186337741681e-06, + "loss": 0.8882, + "step": 118550 + }, + { + "epoch": 0.8582162479098352, + "grad_norm": 0.16277900338172913, + "learning_rate": 4.141790990756224e-06, + "loss": 0.8778, + "step": 118560 + }, + { + "epoch": 0.8582886345704214, + "grad_norm": 0.1517462432384491, + "learning_rate": 4.1417186040956375e-06, + "loss": 0.8821, + "step": 118570 + }, + { + "epoch": 0.8583610212310075, + "grad_norm": 0.1491396576166153, + "learning_rate": 4.141646217435051e-06, + "loss": 0.8738, + "step": 118580 + }, + { + "epoch": 0.8584334078915937, + "grad_norm": 0.19470219314098358, + "learning_rate": 4.1415738307744655e-06, + "loss": 0.8708, + "step": 118590 + }, + { + "epoch": 0.8585057945521799, + "grad_norm": 0.30775144696235657, + "learning_rate": 4.141501444113879e-06, + "loss": 0.8803, + "step": 118600 + }, + { + "epoch": 0.8585781812127661, + "grad_norm": 0.154507115483284, + "learning_rate": 4.141429057453293e-06, + "loss": 0.8731, + "step": 118610 + }, + { + "epoch": 0.8586505678733523, + "grad_norm": 0.17000029981136322, + "learning_rate": 4.141356670792706e-06, + "loss": 0.8884, + "step": 118620 + }, + { + "epoch": 0.8587229545339384, + "grad_norm": 0.15258820354938507, + "learning_rate": 4.141284284132121e-06, + "loss": 0.8584, + "step": 118630 + }, + { + "epoch": 0.8587953411945247, + "grad_norm": 0.15323597192764282, + "learning_rate": 4.1412118974715345e-06, + "loss": 0.8639, + "step": 118640 + }, + { + "epoch": 0.8588677278551109, + "grad_norm": 0.1578139066696167, + "learning_rate": 4.141139510810948e-06, + "loss": 0.8929, + "step": 118650 + }, + { + "epoch": 0.8589401145156971, + "grad_norm": 0.16174130141735077, + "learning_rate": 4.141067124150362e-06, + "loss": 0.887, + "step": 118660 + }, + { + "epoch": 0.8590125011762832, + "grad_norm": 0.15842093527317047, + "learning_rate": 4.140994737489776e-06, + "loss": 0.8724, + "step": 118670 + }, + { + "epoch": 0.8590848878368694, + "grad_norm": 0.1600816696882248, + "learning_rate": 4.14092235082919e-06, + "loss": 0.8744, + "step": 118680 + }, + { + "epoch": 0.8591572744974556, + "grad_norm": 0.17777082324028015, + "learning_rate": 4.140849964168603e-06, + "loss": 0.8832, + "step": 118690 + }, + { + "epoch": 0.8592296611580418, + "grad_norm": 0.15657740831375122, + "learning_rate": 4.140777577508017e-06, + "loss": 0.8791, + "step": 118700 + }, + { + "epoch": 0.859302047818628, + "grad_norm": 0.16433501243591309, + "learning_rate": 4.1407051908474315e-06, + "loss": 0.879, + "step": 118710 + }, + { + "epoch": 0.8593744344792141, + "grad_norm": 0.148828387260437, + "learning_rate": 4.140632804186845e-06, + "loss": 0.8825, + "step": 118720 + }, + { + "epoch": 0.8594468211398003, + "grad_norm": 0.1595117151737213, + "learning_rate": 4.140560417526259e-06, + "loss": 0.8729, + "step": 118730 + }, + { + "epoch": 0.8595192078003866, + "grad_norm": 0.15022175014019012, + "learning_rate": 4.140488030865672e-06, + "loss": 0.8778, + "step": 118740 + }, + { + "epoch": 0.8595915944609728, + "grad_norm": 0.16430038213729858, + "learning_rate": 4.140415644205086e-06, + "loss": 0.8535, + "step": 118750 + }, + { + "epoch": 0.859663981121559, + "grad_norm": 0.1567695438861847, + "learning_rate": 4.1403432575445e-06, + "loss": 0.8784, + "step": 118760 + }, + { + "epoch": 0.8597363677821451, + "grad_norm": 0.15546707808971405, + "learning_rate": 4.140270870883914e-06, + "loss": 0.874, + "step": 118770 + }, + { + "epoch": 0.8598087544427313, + "grad_norm": 0.16317854821681976, + "learning_rate": 4.140198484223328e-06, + "loss": 0.8694, + "step": 118780 + }, + { + "epoch": 0.8598811411033175, + "grad_norm": 0.15389098227024078, + "learning_rate": 4.140126097562741e-06, + "loss": 0.8778, + "step": 118790 + }, + { + "epoch": 0.8599535277639037, + "grad_norm": 0.17565035820007324, + "learning_rate": 4.140053710902156e-06, + "loss": 0.8855, + "step": 118800 + }, + { + "epoch": 0.8600259144244898, + "grad_norm": 0.16407647728919983, + "learning_rate": 4.139981324241569e-06, + "loss": 0.8706, + "step": 118810 + }, + { + "epoch": 0.860098301085076, + "grad_norm": 0.15503662824630737, + "learning_rate": 4.139908937580983e-06, + "loss": 0.8748, + "step": 118820 + }, + { + "epoch": 0.8601706877456622, + "grad_norm": 0.15762311220169067, + "learning_rate": 4.1398365509203966e-06, + "loss": 0.8765, + "step": 118830 + }, + { + "epoch": 0.8602430744062484, + "grad_norm": 0.16244664788246155, + "learning_rate": 4.139764164259811e-06, + "loss": 0.8762, + "step": 118840 + }, + { + "epoch": 0.8603154610668347, + "grad_norm": 0.15617172420024872, + "learning_rate": 4.139691777599225e-06, + "loss": 0.8736, + "step": 118850 + }, + { + "epoch": 0.8603878477274208, + "grad_norm": 0.1622217446565628, + "learning_rate": 4.139619390938638e-06, + "loss": 0.8735, + "step": 118860 + }, + { + "epoch": 0.860460234388007, + "grad_norm": 0.15860269963741302, + "learning_rate": 4.139547004278052e-06, + "loss": 0.8614, + "step": 118870 + }, + { + "epoch": 0.8605326210485932, + "grad_norm": 0.1792127788066864, + "learning_rate": 4.139474617617466e-06, + "loss": 0.8649, + "step": 118880 + }, + { + "epoch": 0.8606050077091794, + "grad_norm": 0.14843524992465973, + "learning_rate": 4.13940223095688e-06, + "loss": 0.8749, + "step": 118890 + }, + { + "epoch": 0.8606773943697655, + "grad_norm": 0.15046702325344086, + "learning_rate": 4.1393298442962936e-06, + "loss": 0.8697, + "step": 118900 + }, + { + "epoch": 0.8607497810303517, + "grad_norm": 0.15024615824222565, + "learning_rate": 4.139257457635707e-06, + "loss": 0.8616, + "step": 118910 + }, + { + "epoch": 0.8608221676909379, + "grad_norm": 0.16122505068778992, + "learning_rate": 4.139185070975121e-06, + "loss": 0.8701, + "step": 118920 + }, + { + "epoch": 0.8608945543515241, + "grad_norm": 0.16019709408283234, + "learning_rate": 4.139112684314534e-06, + "loss": 0.8733, + "step": 118930 + }, + { + "epoch": 0.8609669410121102, + "grad_norm": 0.21425500512123108, + "learning_rate": 4.139040297653948e-06, + "loss": 0.8828, + "step": 118940 + }, + { + "epoch": 0.8610393276726965, + "grad_norm": 0.15902172029018402, + "learning_rate": 4.1389679109933625e-06, + "loss": 0.8829, + "step": 118950 + }, + { + "epoch": 0.8611117143332827, + "grad_norm": 0.15424564480781555, + "learning_rate": 4.138895524332776e-06, + "loss": 0.8682, + "step": 118960 + }, + { + "epoch": 0.8611841009938689, + "grad_norm": 0.14803998172283173, + "learning_rate": 4.13882313767219e-06, + "loss": 0.8536, + "step": 118970 + }, + { + "epoch": 0.8612564876544551, + "grad_norm": 0.17855031788349152, + "learning_rate": 4.138750751011603e-06, + "loss": 0.8756, + "step": 118980 + }, + { + "epoch": 0.8613288743150412, + "grad_norm": 0.19807036221027374, + "learning_rate": 4.138678364351018e-06, + "loss": 0.8816, + "step": 118990 + }, + { + "epoch": 0.8614012609756274, + "grad_norm": 0.15931545197963715, + "learning_rate": 4.138605977690431e-06, + "loss": 0.8715, + "step": 119000 + }, + { + "epoch": 0.8614736476362136, + "grad_norm": 0.14279036223888397, + "learning_rate": 4.138533591029845e-06, + "loss": 0.8736, + "step": 119010 + }, + { + "epoch": 0.8615460342967998, + "grad_norm": 0.14687687158584595, + "learning_rate": 4.138461204369259e-06, + "loss": 0.8761, + "step": 119020 + }, + { + "epoch": 0.8616184209573859, + "grad_norm": 0.15899838507175446, + "learning_rate": 4.138388817708673e-06, + "loss": 0.8743, + "step": 119030 + }, + { + "epoch": 0.8616908076179721, + "grad_norm": 0.18828865885734558, + "learning_rate": 4.138316431048087e-06, + "loss": 0.8716, + "step": 119040 + }, + { + "epoch": 0.8617631942785583, + "grad_norm": 0.17458096146583557, + "learning_rate": 4.1382440443875e-06, + "loss": 0.8581, + "step": 119050 + }, + { + "epoch": 0.8618355809391446, + "grad_norm": 0.1492881029844284, + "learning_rate": 4.138171657726914e-06, + "loss": 0.8767, + "step": 119060 + }, + { + "epoch": 0.8619079675997308, + "grad_norm": 0.1459776908159256, + "learning_rate": 4.1380992710663284e-06, + "loss": 0.8777, + "step": 119070 + }, + { + "epoch": 0.8619803542603169, + "grad_norm": 0.15040789544582367, + "learning_rate": 4.138026884405742e-06, + "loss": 0.8845, + "step": 119080 + }, + { + "epoch": 0.8620527409209031, + "grad_norm": 0.14359919726848602, + "learning_rate": 4.137954497745156e-06, + "loss": 0.8878, + "step": 119090 + }, + { + "epoch": 0.8621251275814893, + "grad_norm": 0.1604946255683899, + "learning_rate": 4.137882111084569e-06, + "loss": 0.875, + "step": 119100 + }, + { + "epoch": 0.8621975142420755, + "grad_norm": 0.15070165693759918, + "learning_rate": 4.137809724423984e-06, + "loss": 0.8829, + "step": 119110 + }, + { + "epoch": 0.8622699009026616, + "grad_norm": 0.15339529514312744, + "learning_rate": 4.137737337763397e-06, + "loss": 0.881, + "step": 119120 + }, + { + "epoch": 0.8623422875632478, + "grad_norm": 0.15151731669902802, + "learning_rate": 4.137664951102811e-06, + "loss": 0.8773, + "step": 119130 + }, + { + "epoch": 0.862414674223834, + "grad_norm": 0.17294517159461975, + "learning_rate": 4.137592564442225e-06, + "loss": 0.8631, + "step": 119140 + }, + { + "epoch": 0.8624870608844202, + "grad_norm": 0.1720060110092163, + "learning_rate": 4.137520177781639e-06, + "loss": 0.8931, + "step": 119150 + }, + { + "epoch": 0.8625594475450064, + "grad_norm": 0.15391665697097778, + "learning_rate": 4.137447791121053e-06, + "loss": 0.8624, + "step": 119160 + }, + { + "epoch": 0.8626318342055926, + "grad_norm": 0.14894205331802368, + "learning_rate": 4.137375404460466e-06, + "loss": 0.8758, + "step": 119170 + }, + { + "epoch": 0.8627042208661788, + "grad_norm": 0.16286589205265045, + "learning_rate": 4.13730301779988e-06, + "loss": 0.8836, + "step": 119180 + }, + { + "epoch": 0.862776607526765, + "grad_norm": 0.1608862280845642, + "learning_rate": 4.137230631139294e-06, + "loss": 0.8801, + "step": 119190 + }, + { + "epoch": 0.8628489941873512, + "grad_norm": 0.1520557999610901, + "learning_rate": 4.137158244478708e-06, + "loss": 0.8725, + "step": 119200 + }, + { + "epoch": 0.8629213808479373, + "grad_norm": 0.17940600216388702, + "learning_rate": 4.137085857818122e-06, + "loss": 0.8713, + "step": 119210 + }, + { + "epoch": 0.8629937675085235, + "grad_norm": 0.14350375533103943, + "learning_rate": 4.137013471157535e-06, + "loss": 0.8842, + "step": 119220 + }, + { + "epoch": 0.8630661541691097, + "grad_norm": 0.15040133893489838, + "learning_rate": 4.13694108449695e-06, + "loss": 0.8723, + "step": 119230 + }, + { + "epoch": 0.8631385408296959, + "grad_norm": 0.16370560228824615, + "learning_rate": 4.136868697836363e-06, + "loss": 0.8713, + "step": 119240 + }, + { + "epoch": 0.863210927490282, + "grad_norm": 0.16433537006378174, + "learning_rate": 4.136796311175777e-06, + "loss": 0.8691, + "step": 119250 + }, + { + "epoch": 0.8632833141508682, + "grad_norm": 0.15914808213710785, + "learning_rate": 4.1367239245151905e-06, + "loss": 0.8863, + "step": 119260 + }, + { + "epoch": 0.8633557008114545, + "grad_norm": 0.16615451872348785, + "learning_rate": 4.136651537854605e-06, + "loss": 0.8857, + "step": 119270 + }, + { + "epoch": 0.8634280874720407, + "grad_norm": 0.157662034034729, + "learning_rate": 4.136579151194019e-06, + "loss": 0.887, + "step": 119280 + }, + { + "epoch": 0.8635004741326269, + "grad_norm": 0.5979766249656677, + "learning_rate": 4.136506764533432e-06, + "loss": 0.8737, + "step": 119290 + }, + { + "epoch": 0.863572860793213, + "grad_norm": 0.15033407509326935, + "learning_rate": 4.136434377872846e-06, + "loss": 0.8869, + "step": 119300 + }, + { + "epoch": 0.8636452474537992, + "grad_norm": 0.1554785966873169, + "learning_rate": 4.13636199121226e-06, + "loss": 0.8854, + "step": 119310 + }, + { + "epoch": 0.8637176341143854, + "grad_norm": 0.17543356120586395, + "learning_rate": 4.136289604551674e-06, + "loss": 0.8879, + "step": 119320 + }, + { + "epoch": 0.8637900207749716, + "grad_norm": 0.14748305082321167, + "learning_rate": 4.1362172178910875e-06, + "loss": 0.8729, + "step": 119330 + }, + { + "epoch": 0.8638624074355578, + "grad_norm": 0.15673819184303284, + "learning_rate": 4.136144831230501e-06, + "loss": 0.8585, + "step": 119340 + }, + { + "epoch": 0.8639347940961439, + "grad_norm": 0.1523292362689972, + "learning_rate": 4.136072444569916e-06, + "loss": 0.8832, + "step": 119350 + }, + { + "epoch": 0.8640071807567301, + "grad_norm": 0.2544263005256653, + "learning_rate": 4.136000057909329e-06, + "loss": 0.8712, + "step": 119360 + }, + { + "epoch": 0.8640795674173163, + "grad_norm": 0.15314503014087677, + "learning_rate": 4.135927671248743e-06, + "loss": 0.8739, + "step": 119370 + }, + { + "epoch": 0.8641519540779026, + "grad_norm": 0.15046413242816925, + "learning_rate": 4.1358552845881565e-06, + "loss": 0.8834, + "step": 119380 + }, + { + "epoch": 0.8642243407384887, + "grad_norm": 0.4037892520427704, + "learning_rate": 4.13578289792757e-06, + "loss": 0.8753, + "step": 119390 + }, + { + "epoch": 0.8642967273990749, + "grad_norm": 0.15684808790683746, + "learning_rate": 4.1357105112669845e-06, + "loss": 0.8746, + "step": 119400 + }, + { + "epoch": 0.8643691140596611, + "grad_norm": 0.15342099964618683, + "learning_rate": 4.135638124606398e-06, + "loss": 0.8746, + "step": 119410 + }, + { + "epoch": 0.8644415007202473, + "grad_norm": 0.15408830344676971, + "learning_rate": 4.135565737945812e-06, + "loss": 0.8748, + "step": 119420 + }, + { + "epoch": 0.8645138873808335, + "grad_norm": 0.16404138505458832, + "learning_rate": 4.135493351285225e-06, + "loss": 0.8657, + "step": 119430 + }, + { + "epoch": 0.8645862740414196, + "grad_norm": 0.14688260853290558, + "learning_rate": 4.135420964624639e-06, + "loss": 0.8836, + "step": 119440 + }, + { + "epoch": 0.8646586607020058, + "grad_norm": 0.16032230854034424, + "learning_rate": 4.135348577964053e-06, + "loss": 0.8735, + "step": 119450 + }, + { + "epoch": 0.864731047362592, + "grad_norm": 0.14813324809074402, + "learning_rate": 4.135276191303467e-06, + "loss": 0.8774, + "step": 119460 + }, + { + "epoch": 0.8648034340231782, + "grad_norm": 0.1528783142566681, + "learning_rate": 4.135203804642881e-06, + "loss": 0.8737, + "step": 119470 + }, + { + "epoch": 0.8648758206837645, + "grad_norm": 0.15664593875408173, + "learning_rate": 4.135131417982294e-06, + "loss": 0.88, + "step": 119480 + }, + { + "epoch": 0.8649482073443506, + "grad_norm": 0.1477356255054474, + "learning_rate": 4.135059031321708e-06, + "loss": 0.8672, + "step": 119490 + }, + { + "epoch": 0.8650205940049368, + "grad_norm": 0.1740371733903885, + "learning_rate": 4.134986644661122e-06, + "loss": 0.8781, + "step": 119500 + }, + { + "epoch": 0.865092980665523, + "grad_norm": 0.15743084251880646, + "learning_rate": 4.134914258000536e-06, + "loss": 0.8829, + "step": 119510 + }, + { + "epoch": 0.8651653673261092, + "grad_norm": 0.14371630549430847, + "learning_rate": 4.13484187133995e-06, + "loss": 0.8789, + "step": 119520 + }, + { + "epoch": 0.8652377539866953, + "grad_norm": 0.1605149209499359, + "learning_rate": 4.134769484679363e-06, + "loss": 0.8863, + "step": 119530 + }, + { + "epoch": 0.8653101406472815, + "grad_norm": 0.16169756650924683, + "learning_rate": 4.134697098018777e-06, + "loss": 0.8839, + "step": 119540 + }, + { + "epoch": 0.8653825273078677, + "grad_norm": 0.15459372103214264, + "learning_rate": 4.134624711358191e-06, + "loss": 0.8706, + "step": 119550 + }, + { + "epoch": 0.8654549139684539, + "grad_norm": 0.2985534369945526, + "learning_rate": 4.134552324697605e-06, + "loss": 0.8732, + "step": 119560 + }, + { + "epoch": 0.86552730062904, + "grad_norm": 0.1482762098312378, + "learning_rate": 4.1344799380370186e-06, + "loss": 0.8806, + "step": 119570 + }, + { + "epoch": 0.8655996872896262, + "grad_norm": 0.15104518830776215, + "learning_rate": 4.134407551376432e-06, + "loss": 0.884, + "step": 119580 + }, + { + "epoch": 0.8656720739502125, + "grad_norm": 0.15297864377498627, + "learning_rate": 4.134335164715847e-06, + "loss": 0.8779, + "step": 119590 + }, + { + "epoch": 0.8657444606107987, + "grad_norm": 0.1583460420370102, + "learning_rate": 4.13426277805526e-06, + "loss": 0.8681, + "step": 119600 + }, + { + "epoch": 0.8658168472713849, + "grad_norm": 0.15396851301193237, + "learning_rate": 4.134190391394674e-06, + "loss": 0.8802, + "step": 119610 + }, + { + "epoch": 0.865889233931971, + "grad_norm": 0.16072429716587067, + "learning_rate": 4.1341180047340875e-06, + "loss": 0.8759, + "step": 119620 + }, + { + "epoch": 0.8659616205925572, + "grad_norm": 0.14890296757221222, + "learning_rate": 4.134045618073502e-06, + "loss": 0.8725, + "step": 119630 + }, + { + "epoch": 0.8660340072531434, + "grad_norm": 0.1567048579454422, + "learning_rate": 4.1339732314129156e-06, + "loss": 0.8839, + "step": 119640 + }, + { + "epoch": 0.8661063939137296, + "grad_norm": 0.15268154442310333, + "learning_rate": 4.133900844752329e-06, + "loss": 0.8609, + "step": 119650 + }, + { + "epoch": 0.8661787805743157, + "grad_norm": 0.15023602545261383, + "learning_rate": 4.133828458091743e-06, + "loss": 0.8758, + "step": 119660 + }, + { + "epoch": 0.8662511672349019, + "grad_norm": 0.15714387595653534, + "learning_rate": 4.133756071431157e-06, + "loss": 0.873, + "step": 119670 + }, + { + "epoch": 0.8663235538954881, + "grad_norm": 0.15032929182052612, + "learning_rate": 4.133683684770571e-06, + "loss": 0.8631, + "step": 119680 + }, + { + "epoch": 0.8663959405560743, + "grad_norm": 0.14457829296588898, + "learning_rate": 4.1336112981099845e-06, + "loss": 0.8716, + "step": 119690 + }, + { + "epoch": 0.8664683272166606, + "grad_norm": 0.17244747281074524, + "learning_rate": 4.133538911449398e-06, + "loss": 0.8842, + "step": 119700 + }, + { + "epoch": 0.8665407138772467, + "grad_norm": 0.15346111357212067, + "learning_rate": 4.1334665247888126e-06, + "loss": 0.884, + "step": 119710 + }, + { + "epoch": 0.8666131005378329, + "grad_norm": 0.1665041148662567, + "learning_rate": 4.133394138128226e-06, + "loss": 0.8807, + "step": 119720 + }, + { + "epoch": 0.8666854871984191, + "grad_norm": 0.1579190045595169, + "learning_rate": 4.13332175146764e-06, + "loss": 0.882, + "step": 119730 + }, + { + "epoch": 0.8667578738590053, + "grad_norm": 1.4202749729156494, + "learning_rate": 4.133249364807053e-06, + "loss": 0.8748, + "step": 119740 + }, + { + "epoch": 0.8668302605195914, + "grad_norm": 0.1646961271762848, + "learning_rate": 4.133176978146468e-06, + "loss": 0.8626, + "step": 119750 + }, + { + "epoch": 0.8669026471801776, + "grad_norm": 0.1607542634010315, + "learning_rate": 4.1331045914858815e-06, + "loss": 0.8768, + "step": 119760 + }, + { + "epoch": 0.8669750338407638, + "grad_norm": 0.1496744155883789, + "learning_rate": 4.133032204825295e-06, + "loss": 0.8702, + "step": 119770 + }, + { + "epoch": 0.86704742050135, + "grad_norm": 0.14871948957443237, + "learning_rate": 4.132959818164709e-06, + "loss": 0.8846, + "step": 119780 + }, + { + "epoch": 0.8671198071619362, + "grad_norm": 0.14933007955551147, + "learning_rate": 4.132887431504123e-06, + "loss": 0.8641, + "step": 119790 + }, + { + "epoch": 0.8671921938225224, + "grad_norm": 0.16172604262828827, + "learning_rate": 4.132815044843537e-06, + "loss": 0.8764, + "step": 119800 + }, + { + "epoch": 0.8672645804831086, + "grad_norm": 0.1538577377796173, + "learning_rate": 4.1327426581829504e-06, + "loss": 0.8734, + "step": 119810 + }, + { + "epoch": 0.8673369671436948, + "grad_norm": 0.14605380594730377, + "learning_rate": 4.132670271522364e-06, + "loss": 0.8692, + "step": 119820 + }, + { + "epoch": 0.867409353804281, + "grad_norm": 0.16161808371543884, + "learning_rate": 4.1325978848617785e-06, + "loss": 0.8729, + "step": 119830 + }, + { + "epoch": 0.8674817404648671, + "grad_norm": 0.2714543044567108, + "learning_rate": 4.132525498201192e-06, + "loss": 0.8787, + "step": 119840 + }, + { + "epoch": 0.8675541271254533, + "grad_norm": 0.1541425883769989, + "learning_rate": 4.132453111540606e-06, + "loss": 0.853, + "step": 119850 + }, + { + "epoch": 0.8676265137860395, + "grad_norm": 0.15290531516075134, + "learning_rate": 4.132380724880019e-06, + "loss": 0.8684, + "step": 119860 + }, + { + "epoch": 0.8676989004466257, + "grad_norm": 0.1534222811460495, + "learning_rate": 4.132308338219434e-06, + "loss": 0.8673, + "step": 119870 + }, + { + "epoch": 0.8677712871072119, + "grad_norm": 0.16768594086170197, + "learning_rate": 4.1322359515588474e-06, + "loss": 0.8653, + "step": 119880 + }, + { + "epoch": 0.867843673767798, + "grad_norm": 0.1455199420452118, + "learning_rate": 4.132163564898261e-06, + "loss": 0.8762, + "step": 119890 + }, + { + "epoch": 0.8679160604283842, + "grad_norm": 0.15997451543807983, + "learning_rate": 4.132091178237675e-06, + "loss": 0.884, + "step": 119900 + }, + { + "epoch": 0.8679884470889705, + "grad_norm": 0.15855015814304352, + "learning_rate": 4.132018791577089e-06, + "loss": 0.8886, + "step": 119910 + }, + { + "epoch": 0.8680608337495567, + "grad_norm": 0.1565515547990799, + "learning_rate": 4.131946404916503e-06, + "loss": 0.8721, + "step": 119920 + }, + { + "epoch": 0.8681332204101428, + "grad_norm": 0.1573457270860672, + "learning_rate": 4.131874018255916e-06, + "loss": 0.8682, + "step": 119930 + }, + { + "epoch": 0.868205607070729, + "grad_norm": 0.1630558967590332, + "learning_rate": 4.13180163159533e-06, + "loss": 0.8654, + "step": 119940 + }, + { + "epoch": 0.8682779937313152, + "grad_norm": 0.1974228322505951, + "learning_rate": 4.1317292449347444e-06, + "loss": 0.8581, + "step": 119950 + }, + { + "epoch": 0.8683503803919014, + "grad_norm": 0.14881473779678345, + "learning_rate": 4.131656858274157e-06, + "loss": 0.8827, + "step": 119960 + }, + { + "epoch": 0.8684227670524876, + "grad_norm": 0.16378778219223022, + "learning_rate": 4.131584471613571e-06, + "loss": 0.8598, + "step": 119970 + }, + { + "epoch": 0.8684951537130737, + "grad_norm": 0.1859802007675171, + "learning_rate": 4.131512084952985e-06, + "loss": 0.8768, + "step": 119980 + }, + { + "epoch": 0.8685675403736599, + "grad_norm": 0.16006912291049957, + "learning_rate": 4.131439698292399e-06, + "loss": 0.8718, + "step": 119990 + }, + { + "epoch": 0.8686399270342461, + "grad_norm": 0.25071021914482117, + "learning_rate": 4.1313673116318125e-06, + "loss": 0.8708, + "step": 120000 + }, + { + "epoch": 0.8687123136948323, + "grad_norm": 0.15014329552650452, + "learning_rate": 4.131294924971226e-06, + "loss": 0.8719, + "step": 120010 + }, + { + "epoch": 0.8687847003554185, + "grad_norm": 0.1454050987958908, + "learning_rate": 4.131222538310641e-06, + "loss": 0.87, + "step": 120020 + }, + { + "epoch": 0.8688570870160047, + "grad_norm": 0.1666693240404129, + "learning_rate": 4.131150151650054e-06, + "loss": 0.8761, + "step": 120030 + }, + { + "epoch": 0.8689294736765909, + "grad_norm": 0.16259945929050446, + "learning_rate": 4.131077764989468e-06, + "loss": 0.8679, + "step": 120040 + }, + { + "epoch": 0.8690018603371771, + "grad_norm": 0.16060245037078857, + "learning_rate": 4.1310053783288815e-06, + "loss": 0.872, + "step": 120050 + }, + { + "epoch": 0.8690742469977633, + "grad_norm": 0.3217458724975586, + "learning_rate": 4.130932991668296e-06, + "loss": 0.8638, + "step": 120060 + }, + { + "epoch": 0.8691466336583494, + "grad_norm": 0.14817246794700623, + "learning_rate": 4.1308606050077095e-06, + "loss": 0.8703, + "step": 120070 + }, + { + "epoch": 0.8692190203189356, + "grad_norm": 0.1485157608985901, + "learning_rate": 4.130788218347123e-06, + "loss": 0.8652, + "step": 120080 + }, + { + "epoch": 0.8692914069795218, + "grad_norm": 0.16461946070194244, + "learning_rate": 4.130715831686537e-06, + "loss": 0.858, + "step": 120090 + }, + { + "epoch": 0.869363793640108, + "grad_norm": 0.15408070385456085, + "learning_rate": 4.130643445025951e-06, + "loss": 0.8723, + "step": 120100 + }, + { + "epoch": 0.8694361803006941, + "grad_norm": 0.15531690418720245, + "learning_rate": 4.130571058365365e-06, + "loss": 0.8761, + "step": 120110 + }, + { + "epoch": 0.8695085669612804, + "grad_norm": 0.15016768872737885, + "learning_rate": 4.1304986717047785e-06, + "loss": 0.8732, + "step": 120120 + }, + { + "epoch": 0.8695809536218666, + "grad_norm": 0.1652849316596985, + "learning_rate": 4.130426285044192e-06, + "loss": 0.8677, + "step": 120130 + }, + { + "epoch": 0.8696533402824528, + "grad_norm": 0.15715721249580383, + "learning_rate": 4.1303538983836065e-06, + "loss": 0.8773, + "step": 120140 + }, + { + "epoch": 0.869725726943039, + "grad_norm": 0.2709084749221802, + "learning_rate": 4.13028151172302e-06, + "loss": 0.8724, + "step": 120150 + }, + { + "epoch": 0.8697981136036251, + "grad_norm": 0.147217258810997, + "learning_rate": 4.130209125062434e-06, + "loss": 0.8757, + "step": 120160 + }, + { + "epoch": 0.8698705002642113, + "grad_norm": 0.15031427145004272, + "learning_rate": 4.130136738401847e-06, + "loss": 0.8757, + "step": 120170 + }, + { + "epoch": 0.8699428869247975, + "grad_norm": 0.1699194461107254, + "learning_rate": 4.130064351741261e-06, + "loss": 0.8655, + "step": 120180 + }, + { + "epoch": 0.8700152735853837, + "grad_norm": 0.17292308807373047, + "learning_rate": 4.1299919650806755e-06, + "loss": 0.8879, + "step": 120190 + }, + { + "epoch": 0.8700876602459698, + "grad_norm": 0.15235485136508942, + "learning_rate": 4.129919578420089e-06, + "loss": 0.8707, + "step": 120200 + }, + { + "epoch": 0.870160046906556, + "grad_norm": 0.15500420331954956, + "learning_rate": 4.129847191759503e-06, + "loss": 0.8743, + "step": 120210 + }, + { + "epoch": 0.8702324335671422, + "grad_norm": 0.1642414629459381, + "learning_rate": 4.129774805098916e-06, + "loss": 0.8661, + "step": 120220 + }, + { + "epoch": 0.8703048202277285, + "grad_norm": 0.16339725255966187, + "learning_rate": 4.129702418438331e-06, + "loss": 0.8755, + "step": 120230 + }, + { + "epoch": 0.8703772068883147, + "grad_norm": 0.1617591381072998, + "learning_rate": 4.129630031777744e-06, + "loss": 0.8659, + "step": 120240 + }, + { + "epoch": 0.8704495935489008, + "grad_norm": 0.14315065741539001, + "learning_rate": 4.129557645117158e-06, + "loss": 0.8774, + "step": 120250 + }, + { + "epoch": 0.870521980209487, + "grad_norm": 0.16367150843143463, + "learning_rate": 4.129485258456572e-06, + "loss": 0.8774, + "step": 120260 + }, + { + "epoch": 0.8705943668700732, + "grad_norm": 0.1460050344467163, + "learning_rate": 4.129412871795986e-06, + "loss": 0.8779, + "step": 120270 + }, + { + "epoch": 0.8706667535306594, + "grad_norm": 0.17741134762763977, + "learning_rate": 4.1293404851354e-06, + "loss": 0.8806, + "step": 120280 + }, + { + "epoch": 0.8707391401912455, + "grad_norm": 0.1527169644832611, + "learning_rate": 4.129268098474813e-06, + "loss": 0.8783, + "step": 120290 + }, + { + "epoch": 0.8708115268518317, + "grad_norm": 0.1656719595193863, + "learning_rate": 4.129195711814227e-06, + "loss": 0.874, + "step": 120300 + }, + { + "epoch": 0.8708839135124179, + "grad_norm": 0.15612590312957764, + "learning_rate": 4.129123325153641e-06, + "loss": 0.8766, + "step": 120310 + }, + { + "epoch": 0.8709563001730041, + "grad_norm": 0.182773619890213, + "learning_rate": 4.129050938493055e-06, + "loss": 0.863, + "step": 120320 + }, + { + "epoch": 0.8710286868335904, + "grad_norm": 0.1543162316083908, + "learning_rate": 4.128978551832469e-06, + "loss": 0.8738, + "step": 120330 + }, + { + "epoch": 0.8711010734941765, + "grad_norm": 0.18090280890464783, + "learning_rate": 4.128906165171882e-06, + "loss": 0.8703, + "step": 120340 + }, + { + "epoch": 0.8711734601547627, + "grad_norm": 0.15451836585998535, + "learning_rate": 4.128833778511297e-06, + "loss": 0.8816, + "step": 120350 + }, + { + "epoch": 0.8712458468153489, + "grad_norm": 0.146149680018425, + "learning_rate": 4.12876139185071e-06, + "loss": 0.8759, + "step": 120360 + }, + { + "epoch": 0.8713182334759351, + "grad_norm": 0.14919118583202362, + "learning_rate": 4.128689005190124e-06, + "loss": 0.8645, + "step": 120370 + }, + { + "epoch": 0.8713906201365212, + "grad_norm": 0.6079381704330444, + "learning_rate": 4.1286166185295376e-06, + "loss": 0.8763, + "step": 120380 + }, + { + "epoch": 0.8714630067971074, + "grad_norm": 0.162376269698143, + "learning_rate": 4.128544231868952e-06, + "loss": 0.8675, + "step": 120390 + }, + { + "epoch": 0.8715353934576936, + "grad_norm": 0.16012080013751984, + "learning_rate": 4.128471845208366e-06, + "loss": 0.874, + "step": 120400 + }, + { + "epoch": 0.8716077801182798, + "grad_norm": 0.16266874969005585, + "learning_rate": 4.128399458547779e-06, + "loss": 0.868, + "step": 120410 + }, + { + "epoch": 0.871680166778866, + "grad_norm": 0.15482492744922638, + "learning_rate": 4.128327071887193e-06, + "loss": 0.8837, + "step": 120420 + }, + { + "epoch": 0.8717525534394521, + "grad_norm": 0.15119405090808868, + "learning_rate": 4.128254685226607e-06, + "loss": 0.8738, + "step": 120430 + }, + { + "epoch": 0.8718249401000384, + "grad_norm": 0.1444867104291916, + "learning_rate": 4.128182298566021e-06, + "loss": 0.8617, + "step": 120440 + }, + { + "epoch": 0.8718973267606246, + "grad_norm": 0.15251514315605164, + "learning_rate": 4.1281099119054346e-06, + "loss": 0.8706, + "step": 120450 + }, + { + "epoch": 0.8719697134212108, + "grad_norm": 0.16033178567886353, + "learning_rate": 4.128037525244848e-06, + "loss": 0.8756, + "step": 120460 + }, + { + "epoch": 0.872042100081797, + "grad_norm": 0.1551380306482315, + "learning_rate": 4.127965138584263e-06, + "loss": 0.8642, + "step": 120470 + }, + { + "epoch": 0.8721144867423831, + "grad_norm": 0.14764218032360077, + "learning_rate": 4.127892751923676e-06, + "loss": 0.8646, + "step": 120480 + }, + { + "epoch": 0.8721868734029693, + "grad_norm": 0.14466458559036255, + "learning_rate": 4.127820365263089e-06, + "loss": 0.8727, + "step": 120490 + }, + { + "epoch": 0.8722592600635555, + "grad_norm": 0.15080726146697998, + "learning_rate": 4.1277479786025035e-06, + "loss": 0.8934, + "step": 120500 + }, + { + "epoch": 0.8723316467241417, + "grad_norm": 0.16469760239124298, + "learning_rate": 4.127675591941917e-06, + "loss": 0.8752, + "step": 120510 + }, + { + "epoch": 0.8724040333847278, + "grad_norm": 0.1857549250125885, + "learning_rate": 4.127603205281331e-06, + "loss": 0.866, + "step": 120520 + }, + { + "epoch": 0.872476420045314, + "grad_norm": 0.18460987508296967, + "learning_rate": 4.127530818620744e-06, + "loss": 0.8576, + "step": 120530 + }, + { + "epoch": 0.8725488067059002, + "grad_norm": 0.15353545546531677, + "learning_rate": 4.127458431960159e-06, + "loss": 0.8828, + "step": 120540 + }, + { + "epoch": 0.8726211933664865, + "grad_norm": 0.14069363474845886, + "learning_rate": 4.1273860452995724e-06, + "loss": 0.868, + "step": 120550 + }, + { + "epoch": 0.8726935800270726, + "grad_norm": 0.1678398996591568, + "learning_rate": 4.127313658638986e-06, + "loss": 0.8802, + "step": 120560 + }, + { + "epoch": 0.8727659666876588, + "grad_norm": 0.15249879658222198, + "learning_rate": 4.1272412719784e-06, + "loss": 0.8821, + "step": 120570 + }, + { + "epoch": 0.872838353348245, + "grad_norm": 0.154356449842453, + "learning_rate": 4.127168885317814e-06, + "loss": 0.8736, + "step": 120580 + }, + { + "epoch": 0.8729107400088312, + "grad_norm": 0.16177409887313843, + "learning_rate": 4.127096498657228e-06, + "loss": 0.8793, + "step": 120590 + }, + { + "epoch": 0.8729831266694174, + "grad_norm": 0.14435283839702606, + "learning_rate": 4.127024111996641e-06, + "loss": 0.8678, + "step": 120600 + }, + { + "epoch": 0.8730555133300035, + "grad_norm": 0.1692737489938736, + "learning_rate": 4.126951725336055e-06, + "loss": 0.8701, + "step": 120610 + }, + { + "epoch": 0.8731278999905897, + "grad_norm": 0.14820191264152527, + "learning_rate": 4.1268793386754694e-06, + "loss": 0.8674, + "step": 120620 + }, + { + "epoch": 0.8732002866511759, + "grad_norm": 0.15043896436691284, + "learning_rate": 4.126806952014883e-06, + "loss": 0.8733, + "step": 120630 + }, + { + "epoch": 0.8732726733117621, + "grad_norm": 0.1535714864730835, + "learning_rate": 4.126734565354297e-06, + "loss": 0.8787, + "step": 120640 + }, + { + "epoch": 0.8733450599723483, + "grad_norm": 0.1565728783607483, + "learning_rate": 4.12666217869371e-06, + "loss": 0.8652, + "step": 120650 + }, + { + "epoch": 0.8734174466329345, + "grad_norm": 0.15889082849025726, + "learning_rate": 4.126589792033125e-06, + "loss": 0.8727, + "step": 120660 + }, + { + "epoch": 0.8734898332935207, + "grad_norm": 0.15119129419326782, + "learning_rate": 4.126517405372538e-06, + "loss": 0.8723, + "step": 120670 + }, + { + "epoch": 0.8735622199541069, + "grad_norm": 0.14388339221477509, + "learning_rate": 4.126445018711952e-06, + "loss": 0.8686, + "step": 120680 + }, + { + "epoch": 0.873634606614693, + "grad_norm": 0.14522388577461243, + "learning_rate": 4.126372632051366e-06, + "loss": 0.8696, + "step": 120690 + }, + { + "epoch": 0.8737069932752792, + "grad_norm": 0.15283215045928955, + "learning_rate": 4.12630024539078e-06, + "loss": 0.8731, + "step": 120700 + }, + { + "epoch": 0.8737793799358654, + "grad_norm": 0.15034788846969604, + "learning_rate": 4.126227858730194e-06, + "loss": 0.8837, + "step": 120710 + }, + { + "epoch": 0.8738517665964516, + "grad_norm": 0.15683099627494812, + "learning_rate": 4.126155472069607e-06, + "loss": 0.8686, + "step": 120720 + }, + { + "epoch": 0.8739241532570378, + "grad_norm": 0.19069434702396393, + "learning_rate": 4.126083085409021e-06, + "loss": 0.8651, + "step": 120730 + }, + { + "epoch": 0.8739965399176239, + "grad_norm": 0.15311843156814575, + "learning_rate": 4.126010698748435e-06, + "loss": 0.8822, + "step": 120740 + }, + { + "epoch": 0.8740689265782101, + "grad_norm": 0.1613323837518692, + "learning_rate": 4.125938312087849e-06, + "loss": 0.8744, + "step": 120750 + }, + { + "epoch": 0.8741413132387964, + "grad_norm": 0.1653766632080078, + "learning_rate": 4.125865925427263e-06, + "loss": 0.8658, + "step": 120760 + }, + { + "epoch": 0.8742136998993826, + "grad_norm": 0.20106831192970276, + "learning_rate": 4.125793538766676e-06, + "loss": 0.867, + "step": 120770 + }, + { + "epoch": 0.8742860865599688, + "grad_norm": 0.1640590876340866, + "learning_rate": 4.12572115210609e-06, + "loss": 0.8731, + "step": 120780 + }, + { + "epoch": 0.8743584732205549, + "grad_norm": 0.16234862804412842, + "learning_rate": 4.125648765445504e-06, + "loss": 0.8798, + "step": 120790 + }, + { + "epoch": 0.8744308598811411, + "grad_norm": 0.16816940903663635, + "learning_rate": 4.125576378784918e-06, + "loss": 0.8721, + "step": 120800 + }, + { + "epoch": 0.8745032465417273, + "grad_norm": 0.15265819430351257, + "learning_rate": 4.1255039921243315e-06, + "loss": 0.8658, + "step": 120810 + }, + { + "epoch": 0.8745756332023135, + "grad_norm": 0.15596769750118256, + "learning_rate": 4.125431605463745e-06, + "loss": 0.8784, + "step": 120820 + }, + { + "epoch": 0.8746480198628996, + "grad_norm": 0.15130725502967834, + "learning_rate": 4.12535921880316e-06, + "loss": 0.8847, + "step": 120830 + }, + { + "epoch": 0.8747204065234858, + "grad_norm": 0.22041760385036469, + "learning_rate": 4.125286832142573e-06, + "loss": 0.8765, + "step": 120840 + }, + { + "epoch": 0.874792793184072, + "grad_norm": 0.14767880737781525, + "learning_rate": 4.125214445481987e-06, + "loss": 0.8817, + "step": 120850 + }, + { + "epoch": 0.8748651798446583, + "grad_norm": 0.21697768568992615, + "learning_rate": 4.1251420588214005e-06, + "loss": 0.8704, + "step": 120860 + }, + { + "epoch": 0.8749375665052445, + "grad_norm": 0.1474272608757019, + "learning_rate": 4.125069672160815e-06, + "loss": 0.8718, + "step": 120870 + }, + { + "epoch": 0.8750099531658306, + "grad_norm": 0.18004824221134186, + "learning_rate": 4.1249972855002285e-06, + "loss": 0.8832, + "step": 120880 + }, + { + "epoch": 0.8750823398264168, + "grad_norm": 0.1615532785654068, + "learning_rate": 4.124924898839642e-06, + "loss": 0.869, + "step": 120890 + }, + { + "epoch": 0.875154726487003, + "grad_norm": 0.14899741113185883, + "learning_rate": 4.124852512179056e-06, + "loss": 0.8693, + "step": 120900 + }, + { + "epoch": 0.8752271131475892, + "grad_norm": 0.16730321943759918, + "learning_rate": 4.12478012551847e-06, + "loss": 0.8847, + "step": 120910 + }, + { + "epoch": 0.8752994998081753, + "grad_norm": 0.15044957399368286, + "learning_rate": 4.124707738857884e-06, + "loss": 0.8684, + "step": 120920 + }, + { + "epoch": 0.8753718864687615, + "grad_norm": 0.17763184010982513, + "learning_rate": 4.1246353521972975e-06, + "loss": 0.8853, + "step": 120930 + }, + { + "epoch": 0.8754442731293477, + "grad_norm": 0.16393078863620758, + "learning_rate": 4.124562965536711e-06, + "loss": 0.8696, + "step": 120940 + }, + { + "epoch": 0.8755166597899339, + "grad_norm": 0.15573619306087494, + "learning_rate": 4.1244905788761255e-06, + "loss": 0.8609, + "step": 120950 + }, + { + "epoch": 0.87558904645052, + "grad_norm": 0.15625816583633423, + "learning_rate": 4.124418192215539e-06, + "loss": 0.8735, + "step": 120960 + }, + { + "epoch": 0.8756614331111063, + "grad_norm": 0.14690211415290833, + "learning_rate": 4.124345805554953e-06, + "loss": 0.8688, + "step": 120970 + }, + { + "epoch": 0.8757338197716925, + "grad_norm": 0.17025691270828247, + "learning_rate": 4.124273418894366e-06, + "loss": 0.8582, + "step": 120980 + }, + { + "epoch": 0.8758062064322787, + "grad_norm": 0.159259632229805, + "learning_rate": 4.124201032233781e-06, + "loss": 0.8892, + "step": 120990 + }, + { + "epoch": 0.8758785930928649, + "grad_norm": 0.16585783660411835, + "learning_rate": 4.1241286455731945e-06, + "loss": 0.8723, + "step": 121000 + }, + { + "epoch": 0.875950979753451, + "grad_norm": 0.15400756895542145, + "learning_rate": 4.124056258912608e-06, + "loss": 0.8656, + "step": 121010 + }, + { + "epoch": 0.8760233664140372, + "grad_norm": 0.16609486937522888, + "learning_rate": 4.123983872252022e-06, + "loss": 0.8726, + "step": 121020 + }, + { + "epoch": 0.8760957530746234, + "grad_norm": 0.16967037320137024, + "learning_rate": 4.123911485591435e-06, + "loss": 0.8606, + "step": 121030 + }, + { + "epoch": 0.8761681397352096, + "grad_norm": 0.1568365842103958, + "learning_rate": 4.123839098930849e-06, + "loss": 0.8669, + "step": 121040 + }, + { + "epoch": 0.8762405263957957, + "grad_norm": 0.16394557058811188, + "learning_rate": 4.1237667122702626e-06, + "loss": 0.8723, + "step": 121050 + }, + { + "epoch": 0.8763129130563819, + "grad_norm": 0.15548640489578247, + "learning_rate": 4.123694325609677e-06, + "loss": 0.8712, + "step": 121060 + }, + { + "epoch": 0.8763852997169681, + "grad_norm": 0.1536964625120163, + "learning_rate": 4.123621938949091e-06, + "loss": 0.8664, + "step": 121070 + }, + { + "epoch": 0.8764576863775544, + "grad_norm": 0.14496274292469025, + "learning_rate": 4.123549552288504e-06, + "loss": 0.8656, + "step": 121080 + }, + { + "epoch": 0.8765300730381406, + "grad_norm": 0.16302034258842468, + "learning_rate": 4.123477165627918e-06, + "loss": 0.8763, + "step": 121090 + }, + { + "epoch": 0.8766024596987267, + "grad_norm": 0.15400786697864532, + "learning_rate": 4.123404778967332e-06, + "loss": 0.8673, + "step": 121100 + }, + { + "epoch": 0.8766748463593129, + "grad_norm": 0.1585310846567154, + "learning_rate": 4.123332392306746e-06, + "loss": 0.874, + "step": 121110 + }, + { + "epoch": 0.8767472330198991, + "grad_norm": 0.1589457243680954, + "learning_rate": 4.1232600056461596e-06, + "loss": 0.8741, + "step": 121120 + }, + { + "epoch": 0.8768196196804853, + "grad_norm": 0.16599847376346588, + "learning_rate": 4.123187618985573e-06, + "loss": 0.8783, + "step": 121130 + }, + { + "epoch": 0.8768920063410715, + "grad_norm": 0.1981654316186905, + "learning_rate": 4.123115232324988e-06, + "loss": 0.8759, + "step": 121140 + }, + { + "epoch": 0.8769643930016576, + "grad_norm": 0.19641701877117157, + "learning_rate": 4.123042845664401e-06, + "loss": 0.8701, + "step": 121150 + }, + { + "epoch": 0.8770367796622438, + "grad_norm": 0.16379612684249878, + "learning_rate": 4.122970459003815e-06, + "loss": 0.8603, + "step": 121160 + }, + { + "epoch": 0.87710916632283, + "grad_norm": 0.16375406086444855, + "learning_rate": 4.1228980723432285e-06, + "loss": 0.8772, + "step": 121170 + }, + { + "epoch": 0.8771815529834163, + "grad_norm": 0.16766558587551117, + "learning_rate": 4.122825685682643e-06, + "loss": 0.887, + "step": 121180 + }, + { + "epoch": 0.8772539396440024, + "grad_norm": 0.14811073243618011, + "learning_rate": 4.1227532990220566e-06, + "loss": 0.8706, + "step": 121190 + }, + { + "epoch": 0.8773263263045886, + "grad_norm": 0.16084469854831696, + "learning_rate": 4.12268091236147e-06, + "loss": 0.873, + "step": 121200 + }, + { + "epoch": 0.8773987129651748, + "grad_norm": 0.17017702758312225, + "learning_rate": 4.122608525700884e-06, + "loss": 0.873, + "step": 121210 + }, + { + "epoch": 0.877471099625761, + "grad_norm": 0.1788460612297058, + "learning_rate": 4.122536139040298e-06, + "loss": 0.875, + "step": 121220 + }, + { + "epoch": 0.8775434862863472, + "grad_norm": 0.1578466147184372, + "learning_rate": 4.122463752379712e-06, + "loss": 0.8784, + "step": 121230 + }, + { + "epoch": 0.8776158729469333, + "grad_norm": 0.14853112399578094, + "learning_rate": 4.1223913657191255e-06, + "loss": 0.8869, + "step": 121240 + }, + { + "epoch": 0.8776882596075195, + "grad_norm": 0.15817275643348694, + "learning_rate": 4.122318979058539e-06, + "loss": 0.871, + "step": 121250 + }, + { + "epoch": 0.8777606462681057, + "grad_norm": 0.1494825780391693, + "learning_rate": 4.122246592397954e-06, + "loss": 0.8697, + "step": 121260 + }, + { + "epoch": 0.8778330329286919, + "grad_norm": 0.1578221619129181, + "learning_rate": 4.122174205737367e-06, + "loss": 0.8748, + "step": 121270 + }, + { + "epoch": 0.877905419589278, + "grad_norm": 0.14515846967697144, + "learning_rate": 4.122101819076781e-06, + "loss": 0.8718, + "step": 121280 + }, + { + "epoch": 0.8779778062498643, + "grad_norm": 0.15049944818019867, + "learning_rate": 4.1220294324161944e-06, + "loss": 0.8619, + "step": 121290 + }, + { + "epoch": 0.8780501929104505, + "grad_norm": 0.15272891521453857, + "learning_rate": 4.121957045755609e-06, + "loss": 0.8802, + "step": 121300 + }, + { + "epoch": 0.8781225795710367, + "grad_norm": 0.16228324174880981, + "learning_rate": 4.1218846590950225e-06, + "loss": 0.8673, + "step": 121310 + }, + { + "epoch": 0.8781949662316229, + "grad_norm": 0.15620221197605133, + "learning_rate": 4.121812272434436e-06, + "loss": 0.8788, + "step": 121320 + }, + { + "epoch": 0.878267352892209, + "grad_norm": 0.16147710382938385, + "learning_rate": 4.12173988577385e-06, + "loss": 0.887, + "step": 121330 + }, + { + "epoch": 0.8783397395527952, + "grad_norm": 0.15584251284599304, + "learning_rate": 4.121667499113264e-06, + "loss": 0.8716, + "step": 121340 + }, + { + "epoch": 0.8784121262133814, + "grad_norm": 0.14494866132736206, + "learning_rate": 4.121595112452678e-06, + "loss": 0.8705, + "step": 121350 + }, + { + "epoch": 0.8784845128739676, + "grad_norm": 0.15706604719161987, + "learning_rate": 4.1215227257920914e-06, + "loss": 0.8711, + "step": 121360 + }, + { + "epoch": 0.8785568995345537, + "grad_norm": 0.15880705416202545, + "learning_rate": 4.121450339131505e-06, + "loss": 0.8671, + "step": 121370 + }, + { + "epoch": 0.8786292861951399, + "grad_norm": 0.1646871417760849, + "learning_rate": 4.1213779524709195e-06, + "loss": 0.8686, + "step": 121380 + }, + { + "epoch": 0.8787016728557262, + "grad_norm": 0.14550799131393433, + "learning_rate": 4.121305565810333e-06, + "loss": 0.8771, + "step": 121390 + }, + { + "epoch": 0.8787740595163124, + "grad_norm": 0.16066545248031616, + "learning_rate": 4.121233179149747e-06, + "loss": 0.8774, + "step": 121400 + }, + { + "epoch": 0.8788464461768986, + "grad_norm": 0.16294661164283752, + "learning_rate": 4.12116079248916e-06, + "loss": 0.889, + "step": 121410 + }, + { + "epoch": 0.8789188328374847, + "grad_norm": 0.16425122320652008, + "learning_rate": 4.121088405828574e-06, + "loss": 0.8681, + "step": 121420 + }, + { + "epoch": 0.8789912194980709, + "grad_norm": 0.1627512127161026, + "learning_rate": 4.1210160191679884e-06, + "loss": 0.8666, + "step": 121430 + }, + { + "epoch": 0.8790636061586571, + "grad_norm": 0.13960957527160645, + "learning_rate": 4.120943632507402e-06, + "loss": 0.8701, + "step": 121440 + }, + { + "epoch": 0.8791359928192433, + "grad_norm": 0.16151773929595947, + "learning_rate": 4.120871245846816e-06, + "loss": 0.8845, + "step": 121450 + }, + { + "epoch": 0.8792083794798294, + "grad_norm": 0.14862598478794098, + "learning_rate": 4.120798859186229e-06, + "loss": 0.8767, + "step": 121460 + }, + { + "epoch": 0.8792807661404156, + "grad_norm": 0.15480075776576996, + "learning_rate": 4.120726472525644e-06, + "loss": 0.8746, + "step": 121470 + }, + { + "epoch": 0.8793531528010018, + "grad_norm": 0.15683260560035706, + "learning_rate": 4.120654085865057e-06, + "loss": 0.8714, + "step": 121480 + }, + { + "epoch": 0.879425539461588, + "grad_norm": 0.15848079323768616, + "learning_rate": 4.120581699204471e-06, + "loss": 0.8831, + "step": 121490 + }, + { + "epoch": 0.8794979261221743, + "grad_norm": 0.16250035166740417, + "learning_rate": 4.120509312543885e-06, + "loss": 0.8825, + "step": 121500 + }, + { + "epoch": 0.8795703127827604, + "grad_norm": 0.21030132472515106, + "learning_rate": 4.120436925883299e-06, + "loss": 0.8641, + "step": 121510 + }, + { + "epoch": 0.8796426994433466, + "grad_norm": 0.1805466115474701, + "learning_rate": 4.120364539222713e-06, + "loss": 0.872, + "step": 121520 + }, + { + "epoch": 0.8797150861039328, + "grad_norm": 0.14784234762191772, + "learning_rate": 4.120292152562126e-06, + "loss": 0.8725, + "step": 121530 + }, + { + "epoch": 0.879787472764519, + "grad_norm": 0.16358047723770142, + "learning_rate": 4.12021976590154e-06, + "loss": 0.8708, + "step": 121540 + }, + { + "epoch": 0.8798598594251051, + "grad_norm": 0.16846664249897003, + "learning_rate": 4.1201473792409535e-06, + "loss": 0.874, + "step": 121550 + }, + { + "epoch": 0.8799322460856913, + "grad_norm": 0.15270091593265533, + "learning_rate": 4.120074992580367e-06, + "loss": 0.8719, + "step": 121560 + }, + { + "epoch": 0.8800046327462775, + "grad_norm": 0.1571713536977768, + "learning_rate": 4.120002605919781e-06, + "loss": 0.8754, + "step": 121570 + }, + { + "epoch": 0.8800770194068637, + "grad_norm": 0.1490432471036911, + "learning_rate": 4.119930219259195e-06, + "loss": 0.8702, + "step": 121580 + }, + { + "epoch": 0.8801494060674498, + "grad_norm": 0.14698143303394318, + "learning_rate": 4.119857832598609e-06, + "loss": 0.8756, + "step": 121590 + }, + { + "epoch": 0.880221792728036, + "grad_norm": 0.14973314106464386, + "learning_rate": 4.1197854459380225e-06, + "loss": 0.8748, + "step": 121600 + }, + { + "epoch": 0.8802941793886223, + "grad_norm": 0.20785781741142273, + "learning_rate": 4.119713059277436e-06, + "loss": 0.8852, + "step": 121610 + }, + { + "epoch": 0.8803665660492085, + "grad_norm": 0.16038250923156738, + "learning_rate": 4.1196406726168505e-06, + "loss": 0.8608, + "step": 121620 + }, + { + "epoch": 0.8804389527097947, + "grad_norm": 0.15908242762088776, + "learning_rate": 4.119568285956264e-06, + "loss": 0.8659, + "step": 121630 + }, + { + "epoch": 0.8805113393703808, + "grad_norm": 0.15829870104789734, + "learning_rate": 4.119495899295678e-06, + "loss": 0.869, + "step": 121640 + }, + { + "epoch": 0.880583726030967, + "grad_norm": 0.16468794643878937, + "learning_rate": 4.119423512635091e-06, + "loss": 0.8803, + "step": 121650 + }, + { + "epoch": 0.8806561126915532, + "grad_norm": 0.15394102036952972, + "learning_rate": 4.119351125974506e-06, + "loss": 0.8783, + "step": 121660 + }, + { + "epoch": 0.8807284993521394, + "grad_norm": 0.15285733342170715, + "learning_rate": 4.1192787393139195e-06, + "loss": 0.8683, + "step": 121670 + }, + { + "epoch": 0.8808008860127255, + "grad_norm": 0.16268157958984375, + "learning_rate": 4.119206352653333e-06, + "loss": 0.8668, + "step": 121680 + }, + { + "epoch": 0.8808732726733117, + "grad_norm": 0.15528251230716705, + "learning_rate": 4.119133965992747e-06, + "loss": 0.8625, + "step": 121690 + }, + { + "epoch": 0.8809456593338979, + "grad_norm": 0.15688292682170868, + "learning_rate": 4.119061579332161e-06, + "loss": 0.8734, + "step": 121700 + }, + { + "epoch": 0.8810180459944842, + "grad_norm": 0.15693509578704834, + "learning_rate": 4.118989192671575e-06, + "loss": 0.8748, + "step": 121710 + }, + { + "epoch": 0.8810904326550704, + "grad_norm": 0.16579411923885345, + "learning_rate": 4.118916806010988e-06, + "loss": 0.8751, + "step": 121720 + }, + { + "epoch": 0.8811628193156565, + "grad_norm": 0.16935928165912628, + "learning_rate": 4.118844419350402e-06, + "loss": 0.8638, + "step": 121730 + }, + { + "epoch": 0.8812352059762427, + "grad_norm": 0.194980189204216, + "learning_rate": 4.1187720326898165e-06, + "loss": 0.8801, + "step": 121740 + }, + { + "epoch": 0.8813075926368289, + "grad_norm": 0.16746121644973755, + "learning_rate": 4.11869964602923e-06, + "loss": 0.8841, + "step": 121750 + }, + { + "epoch": 0.8813799792974151, + "grad_norm": 0.1961222141981125, + "learning_rate": 4.118627259368644e-06, + "loss": 0.8761, + "step": 121760 + }, + { + "epoch": 0.8814523659580012, + "grad_norm": 0.15616725385189056, + "learning_rate": 4.118554872708057e-06, + "loss": 0.869, + "step": 121770 + }, + { + "epoch": 0.8815247526185874, + "grad_norm": 0.15364345908164978, + "learning_rate": 4.118482486047472e-06, + "loss": 0.8766, + "step": 121780 + }, + { + "epoch": 0.8815971392791736, + "grad_norm": 0.15156231820583344, + "learning_rate": 4.118410099386885e-06, + "loss": 0.8673, + "step": 121790 + }, + { + "epoch": 0.8816695259397598, + "grad_norm": 0.2237502783536911, + "learning_rate": 4.118337712726299e-06, + "loss": 0.8718, + "step": 121800 + }, + { + "epoch": 0.881741912600346, + "grad_norm": 0.14461927115917206, + "learning_rate": 4.118265326065713e-06, + "loss": 0.872, + "step": 121810 + }, + { + "epoch": 0.8818142992609322, + "grad_norm": 0.17010053992271423, + "learning_rate": 4.118192939405127e-06, + "loss": 0.8745, + "step": 121820 + }, + { + "epoch": 0.8818866859215184, + "grad_norm": 0.16288408637046814, + "learning_rate": 4.118120552744541e-06, + "loss": 0.8762, + "step": 121830 + }, + { + "epoch": 0.8819590725821046, + "grad_norm": 0.1665002405643463, + "learning_rate": 4.118048166083954e-06, + "loss": 0.8678, + "step": 121840 + }, + { + "epoch": 0.8820314592426908, + "grad_norm": 0.16218537092208862, + "learning_rate": 4.117975779423368e-06, + "loss": 0.8829, + "step": 121850 + }, + { + "epoch": 0.882103845903277, + "grad_norm": 0.14851604402065277, + "learning_rate": 4.117903392762782e-06, + "loss": 0.874, + "step": 121860 + }, + { + "epoch": 0.8821762325638631, + "grad_norm": 0.1481228917837143, + "learning_rate": 4.117831006102196e-06, + "loss": 0.868, + "step": 121870 + }, + { + "epoch": 0.8822486192244493, + "grad_norm": 0.16877874732017517, + "learning_rate": 4.11775861944161e-06, + "loss": 0.8754, + "step": 121880 + }, + { + "epoch": 0.8823210058850355, + "grad_norm": 0.15102118253707886, + "learning_rate": 4.117686232781023e-06, + "loss": 0.8721, + "step": 121890 + }, + { + "epoch": 0.8823933925456217, + "grad_norm": 0.15276771783828735, + "learning_rate": 4.117613846120438e-06, + "loss": 0.8833, + "step": 121900 + }, + { + "epoch": 0.8824657792062078, + "grad_norm": 0.14909601211547852, + "learning_rate": 4.117541459459851e-06, + "loss": 0.8613, + "step": 121910 + }, + { + "epoch": 0.8825381658667941, + "grad_norm": 0.15736235678195953, + "learning_rate": 4.117469072799265e-06, + "loss": 0.8756, + "step": 121920 + }, + { + "epoch": 0.8826105525273803, + "grad_norm": 0.1750316172838211, + "learning_rate": 4.1173966861386786e-06, + "loss": 0.8851, + "step": 121930 + }, + { + "epoch": 0.8826829391879665, + "grad_norm": 0.14961692690849304, + "learning_rate": 4.117324299478093e-06, + "loss": 0.871, + "step": 121940 + }, + { + "epoch": 0.8827553258485527, + "grad_norm": 0.13990993797779083, + "learning_rate": 4.117251912817507e-06, + "loss": 0.8573, + "step": 121950 + }, + { + "epoch": 0.8828277125091388, + "grad_norm": 0.1395825743675232, + "learning_rate": 4.11717952615692e-06, + "loss": 0.8589, + "step": 121960 + }, + { + "epoch": 0.882900099169725, + "grad_norm": 0.15364444255828857, + "learning_rate": 4.117107139496334e-06, + "loss": 0.867, + "step": 121970 + }, + { + "epoch": 0.8829724858303112, + "grad_norm": 3.056195020675659, + "learning_rate": 4.117034752835748e-06, + "loss": 0.882, + "step": 121980 + }, + { + "epoch": 0.8830448724908974, + "grad_norm": 0.14895865321159363, + "learning_rate": 4.116962366175162e-06, + "loss": 0.8752, + "step": 121990 + }, + { + "epoch": 0.8831172591514835, + "grad_norm": 0.1695874035358429, + "learning_rate": 4.116889979514576e-06, + "loss": 0.877, + "step": 122000 + }, + { + "epoch": 0.8831896458120697, + "grad_norm": 0.1436607986688614, + "learning_rate": 4.116817592853989e-06, + "loss": 0.8785, + "step": 122010 + }, + { + "epoch": 0.8832620324726559, + "grad_norm": 0.15994448959827423, + "learning_rate": 4.116745206193403e-06, + "loss": 0.8697, + "step": 122020 + }, + { + "epoch": 0.8833344191332422, + "grad_norm": 0.15391020476818085, + "learning_rate": 4.116672819532817e-06, + "loss": 0.8702, + "step": 122030 + }, + { + "epoch": 0.8834068057938284, + "grad_norm": 0.1519947201013565, + "learning_rate": 4.116600432872231e-06, + "loss": 0.8728, + "step": 122040 + }, + { + "epoch": 0.8834791924544145, + "grad_norm": 0.14958973228931427, + "learning_rate": 4.1165280462116445e-06, + "loss": 0.8603, + "step": 122050 + }, + { + "epoch": 0.8835515791150007, + "grad_norm": 0.15004867315292358, + "learning_rate": 4.116455659551058e-06, + "loss": 0.878, + "step": 122060 + }, + { + "epoch": 0.8836239657755869, + "grad_norm": 0.16854992508888245, + "learning_rate": 4.116383272890473e-06, + "loss": 0.8622, + "step": 122070 + }, + { + "epoch": 0.8836963524361731, + "grad_norm": 0.14855122566223145, + "learning_rate": 4.116310886229885e-06, + "loss": 0.8679, + "step": 122080 + }, + { + "epoch": 0.8837687390967592, + "grad_norm": 0.17604343593120575, + "learning_rate": 4.1162384995693e-06, + "loss": 0.8664, + "step": 122090 + }, + { + "epoch": 0.8838411257573454, + "grad_norm": 0.15962982177734375, + "learning_rate": 4.1161661129087134e-06, + "loss": 0.8831, + "step": 122100 + }, + { + "epoch": 0.8839135124179316, + "grad_norm": 0.15554451942443848, + "learning_rate": 4.116093726248127e-06, + "loss": 0.8701, + "step": 122110 + }, + { + "epoch": 0.8839858990785178, + "grad_norm": 0.15746499598026276, + "learning_rate": 4.116021339587541e-06, + "loss": 0.8682, + "step": 122120 + }, + { + "epoch": 0.8840582857391039, + "grad_norm": 0.1579037606716156, + "learning_rate": 4.115948952926955e-06, + "loss": 0.8738, + "step": 122130 + }, + { + "epoch": 0.8841306723996902, + "grad_norm": 0.16659896075725555, + "learning_rate": 4.115876566266369e-06, + "loss": 0.874, + "step": 122140 + }, + { + "epoch": 0.8842030590602764, + "grad_norm": 0.183345228433609, + "learning_rate": 4.115804179605782e-06, + "loss": 0.8697, + "step": 122150 + }, + { + "epoch": 0.8842754457208626, + "grad_norm": 0.15425361692905426, + "learning_rate": 4.115731792945196e-06, + "loss": 0.8637, + "step": 122160 + }, + { + "epoch": 0.8843478323814488, + "grad_norm": 0.1493588238954544, + "learning_rate": 4.1156594062846104e-06, + "loss": 0.8512, + "step": 122170 + }, + { + "epoch": 0.8844202190420349, + "grad_norm": 0.14579205214977264, + "learning_rate": 4.115587019624024e-06, + "loss": 0.8685, + "step": 122180 + }, + { + "epoch": 0.8844926057026211, + "grad_norm": 0.14941352605819702, + "learning_rate": 4.115514632963438e-06, + "loss": 0.8795, + "step": 122190 + }, + { + "epoch": 0.8845649923632073, + "grad_norm": 0.16284583508968353, + "learning_rate": 4.115442246302851e-06, + "loss": 0.8814, + "step": 122200 + }, + { + "epoch": 0.8846373790237935, + "grad_norm": 0.15186572074890137, + "learning_rate": 4.115369859642265e-06, + "loss": 0.8693, + "step": 122210 + }, + { + "epoch": 0.8847097656843796, + "grad_norm": 0.15203732252120972, + "learning_rate": 4.115297472981679e-06, + "loss": 0.879, + "step": 122220 + }, + { + "epoch": 0.8847821523449658, + "grad_norm": 0.15315940976142883, + "learning_rate": 4.115225086321093e-06, + "loss": 0.8679, + "step": 122230 + }, + { + "epoch": 0.8848545390055521, + "grad_norm": 0.15874212980270386, + "learning_rate": 4.115152699660507e-06, + "loss": 0.8919, + "step": 122240 + }, + { + "epoch": 0.8849269256661383, + "grad_norm": 0.15111921727657318, + "learning_rate": 4.11508031299992e-06, + "loss": 0.876, + "step": 122250 + }, + { + "epoch": 0.8849993123267245, + "grad_norm": 0.16939522325992584, + "learning_rate": 4.115007926339335e-06, + "loss": 0.8728, + "step": 122260 + }, + { + "epoch": 0.8850716989873106, + "grad_norm": 0.14914065599441528, + "learning_rate": 4.114935539678748e-06, + "loss": 0.8666, + "step": 122270 + }, + { + "epoch": 0.8851440856478968, + "grad_norm": 0.17796002328395844, + "learning_rate": 4.114863153018162e-06, + "loss": 0.86, + "step": 122280 + }, + { + "epoch": 0.885216472308483, + "grad_norm": 0.14601661264896393, + "learning_rate": 4.1147907663575755e-06, + "loss": 0.8799, + "step": 122290 + }, + { + "epoch": 0.8852888589690692, + "grad_norm": 0.14311879873275757, + "learning_rate": 4.11471837969699e-06, + "loss": 0.872, + "step": 122300 + }, + { + "epoch": 0.8853612456296553, + "grad_norm": 0.14968661963939667, + "learning_rate": 4.114645993036404e-06, + "loss": 0.8701, + "step": 122310 + }, + { + "epoch": 0.8854336322902415, + "grad_norm": 0.15330290794372559, + "learning_rate": 4.114573606375817e-06, + "loss": 0.8773, + "step": 122320 + }, + { + "epoch": 0.8855060189508277, + "grad_norm": 0.14958740770816803, + "learning_rate": 4.114501219715231e-06, + "loss": 0.8634, + "step": 122330 + }, + { + "epoch": 0.8855784056114139, + "grad_norm": 0.15254485607147217, + "learning_rate": 4.114428833054645e-06, + "loss": 0.863, + "step": 122340 + }, + { + "epoch": 0.8856507922720002, + "grad_norm": 0.18143147230148315, + "learning_rate": 4.114356446394059e-06, + "loss": 0.862, + "step": 122350 + }, + { + "epoch": 0.8857231789325863, + "grad_norm": 0.1426178365945816, + "learning_rate": 4.1142840597334725e-06, + "loss": 0.8654, + "step": 122360 + }, + { + "epoch": 0.8857955655931725, + "grad_norm": 0.15494516491889954, + "learning_rate": 4.114211673072886e-06, + "loss": 0.8658, + "step": 122370 + }, + { + "epoch": 0.8858679522537587, + "grad_norm": 0.15428562462329865, + "learning_rate": 4.114139286412301e-06, + "loss": 0.8713, + "step": 122380 + }, + { + "epoch": 0.8859403389143449, + "grad_norm": 0.14963556826114655, + "learning_rate": 4.114066899751714e-06, + "loss": 0.8621, + "step": 122390 + }, + { + "epoch": 0.886012725574931, + "grad_norm": 0.16026413440704346, + "learning_rate": 4.113994513091128e-06, + "loss": 0.8767, + "step": 122400 + }, + { + "epoch": 0.8860851122355172, + "grad_norm": 0.14730548858642578, + "learning_rate": 4.1139221264305415e-06, + "loss": 0.866, + "step": 122410 + }, + { + "epoch": 0.8861574988961034, + "grad_norm": 0.14938196539878845, + "learning_rate": 4.113849739769956e-06, + "loss": 0.8596, + "step": 122420 + }, + { + "epoch": 0.8862298855566896, + "grad_norm": 0.16436588764190674, + "learning_rate": 4.1137773531093695e-06, + "loss": 0.8769, + "step": 122430 + }, + { + "epoch": 0.8863022722172758, + "grad_norm": 0.15649719536304474, + "learning_rate": 4.113704966448783e-06, + "loss": 0.872, + "step": 122440 + }, + { + "epoch": 0.886374658877862, + "grad_norm": 0.1726832240819931, + "learning_rate": 4.113632579788197e-06, + "loss": 0.8779, + "step": 122450 + }, + { + "epoch": 0.8864470455384482, + "grad_norm": 0.14762026071548462, + "learning_rate": 4.113560193127611e-06, + "loss": 0.8725, + "step": 122460 + }, + { + "epoch": 0.8865194321990344, + "grad_norm": 0.14345690608024597, + "learning_rate": 4.113487806467025e-06, + "loss": 0.8574, + "step": 122470 + }, + { + "epoch": 0.8865918188596206, + "grad_norm": 0.16033095121383667, + "learning_rate": 4.1134154198064385e-06, + "loss": 0.8639, + "step": 122480 + }, + { + "epoch": 0.8866642055202067, + "grad_norm": 0.14597992599010468, + "learning_rate": 4.113343033145852e-06, + "loss": 0.8729, + "step": 122490 + }, + { + "epoch": 0.8867365921807929, + "grad_norm": 0.16098947823047638, + "learning_rate": 4.1132706464852666e-06, + "loss": 0.8659, + "step": 122500 + }, + { + "epoch": 0.8868089788413791, + "grad_norm": 0.14861907064914703, + "learning_rate": 4.11319825982468e-06, + "loss": 0.8686, + "step": 122510 + }, + { + "epoch": 0.8868813655019653, + "grad_norm": 0.15192726254463196, + "learning_rate": 4.113125873164094e-06, + "loss": 0.873, + "step": 122520 + }, + { + "epoch": 0.8869537521625515, + "grad_norm": 0.16257117688655853, + "learning_rate": 4.113053486503507e-06, + "loss": 0.8881, + "step": 122530 + }, + { + "epoch": 0.8870261388231376, + "grad_norm": 0.15565155446529388, + "learning_rate": 4.112981099842922e-06, + "loss": 0.8813, + "step": 122540 + }, + { + "epoch": 0.8870985254837238, + "grad_norm": 0.16683930158615112, + "learning_rate": 4.1129087131823355e-06, + "loss": 0.8844, + "step": 122550 + }, + { + "epoch": 0.8871709121443101, + "grad_norm": 0.16827575862407684, + "learning_rate": 4.112836326521749e-06, + "loss": 0.8845, + "step": 122560 + }, + { + "epoch": 0.8872432988048963, + "grad_norm": 0.14905016124248505, + "learning_rate": 4.112763939861163e-06, + "loss": 0.8852, + "step": 122570 + }, + { + "epoch": 0.8873156854654825, + "grad_norm": 0.15825332701206207, + "learning_rate": 4.112691553200577e-06, + "loss": 0.8702, + "step": 122580 + }, + { + "epoch": 0.8873880721260686, + "grad_norm": 0.15095843374729156, + "learning_rate": 4.112619166539991e-06, + "loss": 0.8667, + "step": 122590 + }, + { + "epoch": 0.8874604587866548, + "grad_norm": 0.15348178148269653, + "learning_rate": 4.112546779879404e-06, + "loss": 0.8696, + "step": 122600 + }, + { + "epoch": 0.887532845447241, + "grad_norm": 0.14814035594463348, + "learning_rate": 4.112474393218818e-06, + "loss": 0.8742, + "step": 122610 + }, + { + "epoch": 0.8876052321078272, + "grad_norm": 0.15248076617717743, + "learning_rate": 4.112402006558232e-06, + "loss": 0.8855, + "step": 122620 + }, + { + "epoch": 0.8876776187684133, + "grad_norm": 0.15458175539970398, + "learning_rate": 4.112329619897645e-06, + "loss": 0.8722, + "step": 122630 + }, + { + "epoch": 0.8877500054289995, + "grad_norm": 0.15646861493587494, + "learning_rate": 4.112257233237059e-06, + "loss": 0.87, + "step": 122640 + }, + { + "epoch": 0.8878223920895857, + "grad_norm": 0.14675036072731018, + "learning_rate": 4.112184846576473e-06, + "loss": 0.8763, + "step": 122650 + }, + { + "epoch": 0.8878947787501719, + "grad_norm": 0.16464652121067047, + "learning_rate": 4.112112459915887e-06, + "loss": 0.8817, + "step": 122660 + }, + { + "epoch": 0.8879671654107582, + "grad_norm": 0.15563102066516876, + "learning_rate": 4.1120400732553006e-06, + "loss": 0.8728, + "step": 122670 + }, + { + "epoch": 0.8880395520713443, + "grad_norm": 0.15346914529800415, + "learning_rate": 4.111967686594714e-06, + "loss": 0.8636, + "step": 122680 + }, + { + "epoch": 0.8881119387319305, + "grad_norm": 0.15571348369121552, + "learning_rate": 4.111895299934129e-06, + "loss": 0.8643, + "step": 122690 + }, + { + "epoch": 0.8881843253925167, + "grad_norm": 0.16230016946792603, + "learning_rate": 4.111822913273542e-06, + "loss": 0.8615, + "step": 122700 + }, + { + "epoch": 0.8882567120531029, + "grad_norm": 0.1600496470928192, + "learning_rate": 4.111750526612956e-06, + "loss": 0.8824, + "step": 122710 + }, + { + "epoch": 0.888329098713689, + "grad_norm": 0.14802932739257812, + "learning_rate": 4.1116781399523695e-06, + "loss": 0.8607, + "step": 122720 + }, + { + "epoch": 0.8884014853742752, + "grad_norm": 0.14707952737808228, + "learning_rate": 4.111605753291784e-06, + "loss": 0.8645, + "step": 122730 + }, + { + "epoch": 0.8884738720348614, + "grad_norm": 0.15505805611610413, + "learning_rate": 4.1115333666311976e-06, + "loss": 0.874, + "step": 122740 + }, + { + "epoch": 0.8885462586954476, + "grad_norm": 0.1629432737827301, + "learning_rate": 4.111460979970611e-06, + "loss": 0.8765, + "step": 122750 + }, + { + "epoch": 0.8886186453560337, + "grad_norm": 0.15821245312690735, + "learning_rate": 4.111388593310025e-06, + "loss": 0.8836, + "step": 122760 + }, + { + "epoch": 0.88869103201662, + "grad_norm": 0.1640791893005371, + "learning_rate": 4.111316206649439e-06, + "loss": 0.8718, + "step": 122770 + }, + { + "epoch": 0.8887634186772062, + "grad_norm": 0.1496168076992035, + "learning_rate": 4.111243819988853e-06, + "loss": 0.8776, + "step": 122780 + }, + { + "epoch": 0.8888358053377924, + "grad_norm": 0.15713010728359222, + "learning_rate": 4.1111714333282665e-06, + "loss": 0.8767, + "step": 122790 + }, + { + "epoch": 0.8889081919983786, + "grad_norm": 0.15316422283649445, + "learning_rate": 4.11109904666768e-06, + "loss": 0.8752, + "step": 122800 + }, + { + "epoch": 0.8889805786589647, + "grad_norm": 0.17414002120494843, + "learning_rate": 4.111026660007095e-06, + "loss": 0.8743, + "step": 122810 + }, + { + "epoch": 0.8890529653195509, + "grad_norm": 0.1568707376718521, + "learning_rate": 4.110954273346508e-06, + "loss": 0.8779, + "step": 122820 + }, + { + "epoch": 0.8891253519801371, + "grad_norm": 0.15081565082073212, + "learning_rate": 4.110881886685922e-06, + "loss": 0.8679, + "step": 122830 + }, + { + "epoch": 0.8891977386407233, + "grad_norm": 0.14779068529605865, + "learning_rate": 4.1108095000253354e-06, + "loss": 0.8696, + "step": 122840 + }, + { + "epoch": 0.8892701253013094, + "grad_norm": 0.17924214899539948, + "learning_rate": 4.110737113364749e-06, + "loss": 0.8915, + "step": 122850 + }, + { + "epoch": 0.8893425119618956, + "grad_norm": 0.1565881222486496, + "learning_rate": 4.1106647267041635e-06, + "loss": 0.8798, + "step": 122860 + }, + { + "epoch": 0.8894148986224818, + "grad_norm": 0.16054075956344604, + "learning_rate": 4.110592340043577e-06, + "loss": 0.8543, + "step": 122870 + }, + { + "epoch": 0.8894872852830681, + "grad_norm": 0.1694210320711136, + "learning_rate": 4.110519953382991e-06, + "loss": 0.877, + "step": 122880 + }, + { + "epoch": 0.8895596719436543, + "grad_norm": 0.15159261226654053, + "learning_rate": 4.110447566722404e-06, + "loss": 0.8787, + "step": 122890 + }, + { + "epoch": 0.8896320586042404, + "grad_norm": 0.1697186380624771, + "learning_rate": 4.110375180061819e-06, + "loss": 0.8796, + "step": 122900 + }, + { + "epoch": 0.8897044452648266, + "grad_norm": 0.1503906100988388, + "learning_rate": 4.1103027934012324e-06, + "loss": 0.873, + "step": 122910 + }, + { + "epoch": 0.8897768319254128, + "grad_norm": 0.14617328345775604, + "learning_rate": 4.110230406740646e-06, + "loss": 0.8758, + "step": 122920 + }, + { + "epoch": 0.889849218585999, + "grad_norm": 0.16019441187381744, + "learning_rate": 4.11015802008006e-06, + "loss": 0.8714, + "step": 122930 + }, + { + "epoch": 0.8899216052465851, + "grad_norm": 0.14964258670806885, + "learning_rate": 4.110085633419474e-06, + "loss": 0.8641, + "step": 122940 + }, + { + "epoch": 0.8899939919071713, + "grad_norm": 0.15857601165771484, + "learning_rate": 4.110013246758888e-06, + "loss": 0.8818, + "step": 122950 + }, + { + "epoch": 0.8900663785677575, + "grad_norm": 0.1659708172082901, + "learning_rate": 4.109940860098301e-06, + "loss": 0.8809, + "step": 122960 + }, + { + "epoch": 0.8901387652283437, + "grad_norm": 0.15709270536899567, + "learning_rate": 4.109868473437715e-06, + "loss": 0.878, + "step": 122970 + }, + { + "epoch": 0.89021115188893, + "grad_norm": 0.15638253092765808, + "learning_rate": 4.1097960867771294e-06, + "loss": 0.88, + "step": 122980 + }, + { + "epoch": 0.8902835385495161, + "grad_norm": 0.2203262895345688, + "learning_rate": 4.109723700116543e-06, + "loss": 0.872, + "step": 122990 + }, + { + "epoch": 0.8903559252101023, + "grad_norm": 0.1518968641757965, + "learning_rate": 4.109651313455957e-06, + "loss": 0.8815, + "step": 123000 + }, + { + "epoch": 0.8904283118706885, + "grad_norm": 0.1435840129852295, + "learning_rate": 4.10957892679537e-06, + "loss": 0.8743, + "step": 123010 + }, + { + "epoch": 0.8905006985312747, + "grad_norm": 0.15603044629096985, + "learning_rate": 4.109506540134785e-06, + "loss": 0.8781, + "step": 123020 + }, + { + "epoch": 0.8905730851918608, + "grad_norm": 0.15523672103881836, + "learning_rate": 4.109434153474198e-06, + "loss": 0.8721, + "step": 123030 + }, + { + "epoch": 0.890645471852447, + "grad_norm": 0.15901529788970947, + "learning_rate": 4.109361766813612e-06, + "loss": 0.8793, + "step": 123040 + }, + { + "epoch": 0.8907178585130332, + "grad_norm": 0.14832401275634766, + "learning_rate": 4.109289380153026e-06, + "loss": 0.8713, + "step": 123050 + }, + { + "epoch": 0.8907902451736194, + "grad_norm": 0.14594705402851105, + "learning_rate": 4.10921699349244e-06, + "loss": 0.8588, + "step": 123060 + }, + { + "epoch": 0.8908626318342056, + "grad_norm": 0.21617244184017181, + "learning_rate": 4.109144606831854e-06, + "loss": 0.8714, + "step": 123070 + }, + { + "epoch": 0.8909350184947917, + "grad_norm": 0.15614116191864014, + "learning_rate": 4.109072220171267e-06, + "loss": 0.8754, + "step": 123080 + }, + { + "epoch": 0.891007405155378, + "grad_norm": 0.17854410409927368, + "learning_rate": 4.108999833510681e-06, + "loss": 0.8709, + "step": 123090 + }, + { + "epoch": 0.8910797918159642, + "grad_norm": 0.146412193775177, + "learning_rate": 4.108927446850095e-06, + "loss": 0.8688, + "step": 123100 + }, + { + "epoch": 0.8911521784765504, + "grad_norm": 0.16947603225708008, + "learning_rate": 4.108855060189509e-06, + "loss": 0.8774, + "step": 123110 + }, + { + "epoch": 0.8912245651371365, + "grad_norm": 0.16192446649074554, + "learning_rate": 4.108782673528923e-06, + "loss": 0.8858, + "step": 123120 + }, + { + "epoch": 0.8912969517977227, + "grad_norm": 0.15947166085243225, + "learning_rate": 4.108710286868336e-06, + "loss": 0.8668, + "step": 123130 + }, + { + "epoch": 0.8913693384583089, + "grad_norm": 0.14823932945728302, + "learning_rate": 4.108637900207751e-06, + "loss": 0.8724, + "step": 123140 + }, + { + "epoch": 0.8914417251188951, + "grad_norm": 0.15632663667201996, + "learning_rate": 4.1085655135471635e-06, + "loss": 0.8697, + "step": 123150 + }, + { + "epoch": 0.8915141117794813, + "grad_norm": 0.18125022947788239, + "learning_rate": 4.108493126886577e-06, + "loss": 0.8844, + "step": 123160 + }, + { + "epoch": 0.8915864984400674, + "grad_norm": 0.1472315490245819, + "learning_rate": 4.1084207402259915e-06, + "loss": 0.8656, + "step": 123170 + }, + { + "epoch": 0.8916588851006536, + "grad_norm": 0.16403494775295258, + "learning_rate": 4.108348353565405e-06, + "loss": 0.8732, + "step": 123180 + }, + { + "epoch": 0.8917312717612398, + "grad_norm": 0.17012234032154083, + "learning_rate": 4.108275966904819e-06, + "loss": 0.8715, + "step": 123190 + }, + { + "epoch": 0.8918036584218261, + "grad_norm": 0.16016776859760284, + "learning_rate": 4.108203580244232e-06, + "loss": 0.8698, + "step": 123200 + }, + { + "epoch": 0.8918760450824122, + "grad_norm": 0.1564156860113144, + "learning_rate": 4.108131193583647e-06, + "loss": 0.8808, + "step": 123210 + }, + { + "epoch": 0.8919484317429984, + "grad_norm": 0.14456337690353394, + "learning_rate": 4.1080588069230605e-06, + "loss": 0.8633, + "step": 123220 + }, + { + "epoch": 0.8920208184035846, + "grad_norm": 0.1573518067598343, + "learning_rate": 4.107986420262474e-06, + "loss": 0.873, + "step": 123230 + }, + { + "epoch": 0.8920932050641708, + "grad_norm": 0.14703720808029175, + "learning_rate": 4.107914033601888e-06, + "loss": 0.8733, + "step": 123240 + }, + { + "epoch": 0.892165591724757, + "grad_norm": 0.14381085336208344, + "learning_rate": 4.107841646941302e-06, + "loss": 0.8629, + "step": 123250 + }, + { + "epoch": 0.8922379783853431, + "grad_norm": 0.15476030111312866, + "learning_rate": 4.107769260280716e-06, + "loss": 0.8765, + "step": 123260 + }, + { + "epoch": 0.8923103650459293, + "grad_norm": 0.1462145894765854, + "learning_rate": 4.107696873620129e-06, + "loss": 0.8677, + "step": 123270 + }, + { + "epoch": 0.8923827517065155, + "grad_norm": 0.18984442949295044, + "learning_rate": 4.107624486959543e-06, + "loss": 0.8851, + "step": 123280 + }, + { + "epoch": 0.8924551383671017, + "grad_norm": 0.15868966281414032, + "learning_rate": 4.1075521002989575e-06, + "loss": 0.864, + "step": 123290 + }, + { + "epoch": 0.892527525027688, + "grad_norm": 0.1640649437904358, + "learning_rate": 4.107479713638371e-06, + "loss": 0.8773, + "step": 123300 + }, + { + "epoch": 0.8925999116882741, + "grad_norm": 0.16619038581848145, + "learning_rate": 4.107407326977785e-06, + "loss": 0.8643, + "step": 123310 + }, + { + "epoch": 0.8926722983488603, + "grad_norm": 0.14695781469345093, + "learning_rate": 4.107334940317198e-06, + "loss": 0.8755, + "step": 123320 + }, + { + "epoch": 0.8927446850094465, + "grad_norm": 0.14879372715950012, + "learning_rate": 4.107262553656613e-06, + "loss": 0.8826, + "step": 123330 + }, + { + "epoch": 0.8928170716700327, + "grad_norm": 0.16833491623401642, + "learning_rate": 4.107190166996026e-06, + "loss": 0.8893, + "step": 123340 + }, + { + "epoch": 0.8928894583306188, + "grad_norm": 0.15543371438980103, + "learning_rate": 4.10711778033544e-06, + "loss": 0.8869, + "step": 123350 + }, + { + "epoch": 0.892961844991205, + "grad_norm": 0.15344348549842834, + "learning_rate": 4.107045393674854e-06, + "loss": 0.8791, + "step": 123360 + }, + { + "epoch": 0.8930342316517912, + "grad_norm": 0.15514753758907318, + "learning_rate": 4.106973007014268e-06, + "loss": 0.8844, + "step": 123370 + }, + { + "epoch": 0.8931066183123774, + "grad_norm": 0.16524678468704224, + "learning_rate": 4.106900620353682e-06, + "loss": 0.867, + "step": 123380 + }, + { + "epoch": 0.8931790049729635, + "grad_norm": 0.16478219628334045, + "learning_rate": 4.106828233693095e-06, + "loss": 0.8817, + "step": 123390 + }, + { + "epoch": 0.8932513916335497, + "grad_norm": 0.15810853242874146, + "learning_rate": 4.106755847032509e-06, + "loss": 0.8733, + "step": 123400 + }, + { + "epoch": 0.893323778294136, + "grad_norm": 0.1545628160238266, + "learning_rate": 4.106683460371923e-06, + "loss": 0.8761, + "step": 123410 + }, + { + "epoch": 0.8933961649547222, + "grad_norm": 0.176656574010849, + "learning_rate": 4.106611073711337e-06, + "loss": 0.8779, + "step": 123420 + }, + { + "epoch": 0.8934685516153084, + "grad_norm": 0.15695984661579132, + "learning_rate": 4.106538687050751e-06, + "loss": 0.8856, + "step": 123430 + }, + { + "epoch": 0.8935409382758945, + "grad_norm": 0.1536366492509842, + "learning_rate": 4.106466300390164e-06, + "loss": 0.8712, + "step": 123440 + }, + { + "epoch": 0.8936133249364807, + "grad_norm": 0.2222108393907547, + "learning_rate": 4.106393913729578e-06, + "loss": 0.8721, + "step": 123450 + }, + { + "epoch": 0.8936857115970669, + "grad_norm": 0.15123595297336578, + "learning_rate": 4.106321527068992e-06, + "loss": 0.8617, + "step": 123460 + }, + { + "epoch": 0.8937580982576531, + "grad_norm": 0.14488713443279266, + "learning_rate": 4.106249140408406e-06, + "loss": 0.8686, + "step": 123470 + }, + { + "epoch": 0.8938304849182392, + "grad_norm": 0.18304714560508728, + "learning_rate": 4.1061767537478196e-06, + "loss": 0.8768, + "step": 123480 + }, + { + "epoch": 0.8939028715788254, + "grad_norm": 0.16296693682670593, + "learning_rate": 4.106104367087233e-06, + "loss": 0.8661, + "step": 123490 + }, + { + "epoch": 0.8939752582394116, + "grad_norm": 0.14711807668209076, + "learning_rate": 4.106031980426648e-06, + "loss": 0.8769, + "step": 123500 + }, + { + "epoch": 0.8940476448999978, + "grad_norm": 0.1431189477443695, + "learning_rate": 4.105959593766061e-06, + "loss": 0.8894, + "step": 123510 + }, + { + "epoch": 0.8941200315605841, + "grad_norm": 0.1538097858428955, + "learning_rate": 4.105887207105475e-06, + "loss": 0.8737, + "step": 123520 + }, + { + "epoch": 0.8941924182211702, + "grad_norm": 0.15925562381744385, + "learning_rate": 4.1058148204448885e-06, + "loss": 0.8678, + "step": 123530 + }, + { + "epoch": 0.8942648048817564, + "grad_norm": 0.15328852832317352, + "learning_rate": 4.105742433784303e-06, + "loss": 0.8692, + "step": 123540 + }, + { + "epoch": 0.8943371915423426, + "grad_norm": 0.17088687419891357, + "learning_rate": 4.105670047123717e-06, + "loss": 0.8799, + "step": 123550 + }, + { + "epoch": 0.8944095782029288, + "grad_norm": 0.16045989096164703, + "learning_rate": 4.10559766046313e-06, + "loss": 0.8686, + "step": 123560 + }, + { + "epoch": 0.894481964863515, + "grad_norm": 0.17168870568275452, + "learning_rate": 4.105525273802544e-06, + "loss": 0.8733, + "step": 123570 + }, + { + "epoch": 0.8945543515241011, + "grad_norm": 0.16414594650268555, + "learning_rate": 4.105452887141958e-06, + "loss": 0.8873, + "step": 123580 + }, + { + "epoch": 0.8946267381846873, + "grad_norm": 0.15379905700683594, + "learning_rate": 4.105380500481372e-06, + "loss": 0.8712, + "step": 123590 + }, + { + "epoch": 0.8946991248452735, + "grad_norm": 0.1613912731409073, + "learning_rate": 4.1053081138207855e-06, + "loss": 0.8756, + "step": 123600 + }, + { + "epoch": 0.8947715115058597, + "grad_norm": 0.1442667692899704, + "learning_rate": 4.105235727160199e-06, + "loss": 0.8774, + "step": 123610 + }, + { + "epoch": 0.8948438981664459, + "grad_norm": 0.14742349088191986, + "learning_rate": 4.105163340499614e-06, + "loss": 0.8749, + "step": 123620 + }, + { + "epoch": 0.8949162848270321, + "grad_norm": 0.15726658701896667, + "learning_rate": 4.105090953839027e-06, + "loss": 0.8714, + "step": 123630 + }, + { + "epoch": 0.8949886714876183, + "grad_norm": 0.1479179859161377, + "learning_rate": 4.105018567178441e-06, + "loss": 0.8764, + "step": 123640 + }, + { + "epoch": 0.8950610581482045, + "grad_norm": 0.14696402847766876, + "learning_rate": 4.1049461805178544e-06, + "loss": 0.8743, + "step": 123650 + }, + { + "epoch": 0.8951334448087906, + "grad_norm": 0.19279779493808746, + "learning_rate": 4.104873793857269e-06, + "loss": 0.876, + "step": 123660 + }, + { + "epoch": 0.8952058314693768, + "grad_norm": 0.1497318148612976, + "learning_rate": 4.1048014071966825e-06, + "loss": 0.8704, + "step": 123670 + }, + { + "epoch": 0.895278218129963, + "grad_norm": 0.16485776007175446, + "learning_rate": 4.104729020536095e-06, + "loss": 0.882, + "step": 123680 + }, + { + "epoch": 0.8953506047905492, + "grad_norm": 0.18929380178451538, + "learning_rate": 4.10465663387551e-06, + "loss": 0.8752, + "step": 123690 + }, + { + "epoch": 0.8954229914511354, + "grad_norm": 0.1474197655916214, + "learning_rate": 4.104584247214923e-06, + "loss": 0.8665, + "step": 123700 + }, + { + "epoch": 0.8954953781117215, + "grad_norm": 0.1479775607585907, + "learning_rate": 4.104511860554337e-06, + "loss": 0.8665, + "step": 123710 + }, + { + "epoch": 0.8955677647723077, + "grad_norm": 0.26325809955596924, + "learning_rate": 4.104439473893751e-06, + "loss": 0.8675, + "step": 123720 + }, + { + "epoch": 0.895640151432894, + "grad_norm": 0.16428937017917633, + "learning_rate": 4.104367087233165e-06, + "loss": 0.8761, + "step": 123730 + }, + { + "epoch": 0.8957125380934802, + "grad_norm": 0.14757543802261353, + "learning_rate": 4.104294700572579e-06, + "loss": 0.8641, + "step": 123740 + }, + { + "epoch": 0.8957849247540663, + "grad_norm": 0.15158720314502716, + "learning_rate": 4.104222313911992e-06, + "loss": 0.8716, + "step": 123750 + }, + { + "epoch": 0.8958573114146525, + "grad_norm": 0.15300332009792328, + "learning_rate": 4.104149927251406e-06, + "loss": 0.8746, + "step": 123760 + }, + { + "epoch": 0.8959296980752387, + "grad_norm": 0.14569737017154694, + "learning_rate": 4.10407754059082e-06, + "loss": 0.8769, + "step": 123770 + }, + { + "epoch": 0.8960020847358249, + "grad_norm": 0.1715804487466812, + "learning_rate": 4.104005153930234e-06, + "loss": 0.871, + "step": 123780 + }, + { + "epoch": 0.896074471396411, + "grad_norm": 0.149086594581604, + "learning_rate": 4.103932767269648e-06, + "loss": 0.8719, + "step": 123790 + }, + { + "epoch": 0.8961468580569972, + "grad_norm": 0.14175446331501007, + "learning_rate": 4.103860380609061e-06, + "loss": 0.8699, + "step": 123800 + }, + { + "epoch": 0.8962192447175834, + "grad_norm": 0.15866614878177643, + "learning_rate": 4.103787993948476e-06, + "loss": 0.8739, + "step": 123810 + }, + { + "epoch": 0.8962916313781696, + "grad_norm": 0.15101435780525208, + "learning_rate": 4.103715607287889e-06, + "loss": 0.8812, + "step": 123820 + }, + { + "epoch": 0.8963640180387559, + "grad_norm": 0.16456730663776398, + "learning_rate": 4.103643220627303e-06, + "loss": 0.8643, + "step": 123830 + }, + { + "epoch": 0.896436404699342, + "grad_norm": 0.15458862483501434, + "learning_rate": 4.1035708339667165e-06, + "loss": 0.8746, + "step": 123840 + }, + { + "epoch": 0.8965087913599282, + "grad_norm": 0.15625207126140594, + "learning_rate": 4.103498447306131e-06, + "loss": 0.8852, + "step": 123850 + }, + { + "epoch": 0.8965811780205144, + "grad_norm": 0.1557016670703888, + "learning_rate": 4.103426060645545e-06, + "loss": 0.8615, + "step": 123860 + }, + { + "epoch": 0.8966535646811006, + "grad_norm": 0.15922847390174866, + "learning_rate": 4.103353673984958e-06, + "loss": 0.8678, + "step": 123870 + }, + { + "epoch": 0.8967259513416868, + "grad_norm": 0.15058737993240356, + "learning_rate": 4.103281287324372e-06, + "loss": 0.8657, + "step": 123880 + }, + { + "epoch": 0.8967983380022729, + "grad_norm": 0.15507520735263824, + "learning_rate": 4.103208900663786e-06, + "loss": 0.8639, + "step": 123890 + }, + { + "epoch": 0.8968707246628591, + "grad_norm": 0.1978985220193863, + "learning_rate": 4.1031365140032e-06, + "loss": 0.8783, + "step": 123900 + }, + { + "epoch": 0.8969431113234453, + "grad_norm": 0.1425454467535019, + "learning_rate": 4.1030641273426135e-06, + "loss": 0.8741, + "step": 123910 + }, + { + "epoch": 0.8970154979840315, + "grad_norm": 0.14902305603027344, + "learning_rate": 4.102991740682027e-06, + "loss": 0.8698, + "step": 123920 + }, + { + "epoch": 0.8970878846446176, + "grad_norm": 0.1497499793767929, + "learning_rate": 4.102919354021442e-06, + "loss": 0.8657, + "step": 123930 + }, + { + "epoch": 0.8971602713052039, + "grad_norm": 0.16021916270256042, + "learning_rate": 4.102846967360855e-06, + "loss": 0.8808, + "step": 123940 + }, + { + "epoch": 0.8972326579657901, + "grad_norm": 0.14823076128959656, + "learning_rate": 4.102774580700269e-06, + "loss": 0.8701, + "step": 123950 + }, + { + "epoch": 0.8973050446263763, + "grad_norm": 0.16842210292816162, + "learning_rate": 4.1027021940396825e-06, + "loss": 0.8782, + "step": 123960 + }, + { + "epoch": 0.8973774312869625, + "grad_norm": 0.14396853744983673, + "learning_rate": 4.102629807379097e-06, + "loss": 0.8804, + "step": 123970 + }, + { + "epoch": 0.8974498179475486, + "grad_norm": 0.1455616056919098, + "learning_rate": 4.1025574207185105e-06, + "loss": 0.869, + "step": 123980 + }, + { + "epoch": 0.8975222046081348, + "grad_norm": 0.14303292334079742, + "learning_rate": 4.102485034057924e-06, + "loss": 0.8642, + "step": 123990 + }, + { + "epoch": 0.897594591268721, + "grad_norm": 0.15414045751094818, + "learning_rate": 4.102412647397338e-06, + "loss": 0.8639, + "step": 124000 + }, + { + "epoch": 0.8976669779293072, + "grad_norm": 0.15850482881069183, + "learning_rate": 4.102340260736752e-06, + "loss": 0.8639, + "step": 124010 + }, + { + "epoch": 0.8977393645898933, + "grad_norm": 0.15761762857437134, + "learning_rate": 4.102267874076166e-06, + "loss": 0.8722, + "step": 124020 + }, + { + "epoch": 0.8978117512504795, + "grad_norm": 0.1510854810476303, + "learning_rate": 4.1021954874155795e-06, + "loss": 0.8801, + "step": 124030 + }, + { + "epoch": 0.8978841379110657, + "grad_norm": 0.14972640573978424, + "learning_rate": 4.102123100754993e-06, + "loss": 0.8731, + "step": 124040 + }, + { + "epoch": 0.897956524571652, + "grad_norm": 0.15360723435878754, + "learning_rate": 4.1020507140944076e-06, + "loss": 0.8713, + "step": 124050 + }, + { + "epoch": 0.8980289112322382, + "grad_norm": 0.16010282933712006, + "learning_rate": 4.101978327433821e-06, + "loss": 0.8626, + "step": 124060 + }, + { + "epoch": 0.8981012978928243, + "grad_norm": 0.15653282403945923, + "learning_rate": 4.101905940773235e-06, + "loss": 0.8747, + "step": 124070 + }, + { + "epoch": 0.8981736845534105, + "grad_norm": 0.14662018418312073, + "learning_rate": 4.101833554112648e-06, + "loss": 0.8779, + "step": 124080 + }, + { + "epoch": 0.8982460712139967, + "grad_norm": 0.1570512056350708, + "learning_rate": 4.101761167452062e-06, + "loss": 0.8845, + "step": 124090 + }, + { + "epoch": 0.8983184578745829, + "grad_norm": 0.16235743463039398, + "learning_rate": 4.1016887807914765e-06, + "loss": 0.8702, + "step": 124100 + }, + { + "epoch": 0.898390844535169, + "grad_norm": 0.1602431684732437, + "learning_rate": 4.10161639413089e-06, + "loss": 0.8693, + "step": 124110 + }, + { + "epoch": 0.8984632311957552, + "grad_norm": 0.15021947026252747, + "learning_rate": 4.101544007470304e-06, + "loss": 0.8704, + "step": 124120 + }, + { + "epoch": 0.8985356178563414, + "grad_norm": 0.17709344625473022, + "learning_rate": 4.101471620809717e-06, + "loss": 0.8725, + "step": 124130 + }, + { + "epoch": 0.8986080045169276, + "grad_norm": 0.3144311308860779, + "learning_rate": 4.101399234149132e-06, + "loss": 0.8644, + "step": 124140 + }, + { + "epoch": 0.8986803911775139, + "grad_norm": 0.1658957153558731, + "learning_rate": 4.101326847488545e-06, + "loss": 0.8645, + "step": 124150 + }, + { + "epoch": 0.8987527778381, + "grad_norm": 0.1864367425441742, + "learning_rate": 4.101254460827959e-06, + "loss": 0.8769, + "step": 124160 + }, + { + "epoch": 0.8988251644986862, + "grad_norm": 0.1641472429037094, + "learning_rate": 4.101182074167373e-06, + "loss": 0.8743, + "step": 124170 + }, + { + "epoch": 0.8988975511592724, + "grad_norm": 0.156347393989563, + "learning_rate": 4.101109687506787e-06, + "loss": 0.863, + "step": 124180 + }, + { + "epoch": 0.8989699378198586, + "grad_norm": 0.14861541986465454, + "learning_rate": 4.101037300846201e-06, + "loss": 0.8728, + "step": 124190 + }, + { + "epoch": 0.8990423244804447, + "grad_norm": 0.15074244141578674, + "learning_rate": 4.100964914185614e-06, + "loss": 0.8771, + "step": 124200 + }, + { + "epoch": 0.8991147111410309, + "grad_norm": 0.14890417456626892, + "learning_rate": 4.100892527525028e-06, + "loss": 0.8639, + "step": 124210 + }, + { + "epoch": 0.8991870978016171, + "grad_norm": 0.1627875566482544, + "learning_rate": 4.1008201408644416e-06, + "loss": 0.8691, + "step": 124220 + }, + { + "epoch": 0.8992594844622033, + "grad_norm": 0.15136182308197021, + "learning_rate": 4.100747754203855e-06, + "loss": 0.8836, + "step": 124230 + }, + { + "epoch": 0.8993318711227895, + "grad_norm": 0.14906705915927887, + "learning_rate": 4.100675367543269e-06, + "loss": 0.8721, + "step": 124240 + }, + { + "epoch": 0.8994042577833756, + "grad_norm": 0.14865903556346893, + "learning_rate": 4.100602980882683e-06, + "loss": 0.873, + "step": 124250 + }, + { + "epoch": 0.8994766444439619, + "grad_norm": 0.19429634511470795, + "learning_rate": 4.100530594222097e-06, + "loss": 0.8822, + "step": 124260 + }, + { + "epoch": 0.8995490311045481, + "grad_norm": 0.16122131049633026, + "learning_rate": 4.1004582075615105e-06, + "loss": 0.874, + "step": 124270 + }, + { + "epoch": 0.8996214177651343, + "grad_norm": 0.14246490597724915, + "learning_rate": 4.100385820900924e-06, + "loss": 0.8657, + "step": 124280 + }, + { + "epoch": 0.8996938044257204, + "grad_norm": 0.15221592783927917, + "learning_rate": 4.100313434240339e-06, + "loss": 0.8718, + "step": 124290 + }, + { + "epoch": 0.8997661910863066, + "grad_norm": 0.1513119488954544, + "learning_rate": 4.100241047579752e-06, + "loss": 0.8552, + "step": 124300 + }, + { + "epoch": 0.8998385777468928, + "grad_norm": 0.1578131765127182, + "learning_rate": 4.100168660919166e-06, + "loss": 0.873, + "step": 124310 + }, + { + "epoch": 0.899910964407479, + "grad_norm": 0.1699373573064804, + "learning_rate": 4.1000962742585794e-06, + "loss": 0.883, + "step": 124320 + }, + { + "epoch": 0.8999833510680652, + "grad_norm": 0.16044111549854279, + "learning_rate": 4.100023887597994e-06, + "loss": 0.8759, + "step": 124330 + }, + { + "epoch": 0.9000557377286513, + "grad_norm": 0.16332702338695526, + "learning_rate": 4.0999515009374075e-06, + "loss": 0.8708, + "step": 124340 + }, + { + "epoch": 0.9001281243892375, + "grad_norm": 0.15864451229572296, + "learning_rate": 4.099879114276821e-06, + "loss": 0.8728, + "step": 124350 + }, + { + "epoch": 0.9002005110498238, + "grad_norm": 0.16290296614170074, + "learning_rate": 4.099806727616235e-06, + "loss": 0.8597, + "step": 124360 + }, + { + "epoch": 0.90027289771041, + "grad_norm": 0.16254830360412598, + "learning_rate": 4.099734340955649e-06, + "loss": 0.8672, + "step": 124370 + }, + { + "epoch": 0.9003452843709961, + "grad_norm": 0.15096203982830048, + "learning_rate": 4.099661954295063e-06, + "loss": 0.884, + "step": 124380 + }, + { + "epoch": 0.9004176710315823, + "grad_norm": 0.1599651724100113, + "learning_rate": 4.0995895676344764e-06, + "loss": 0.8764, + "step": 124390 + }, + { + "epoch": 0.9004900576921685, + "grad_norm": 0.1575443595647812, + "learning_rate": 4.09951718097389e-06, + "loss": 0.8833, + "step": 124400 + }, + { + "epoch": 0.9005624443527547, + "grad_norm": 0.15898630023002625, + "learning_rate": 4.0994447943133045e-06, + "loss": 0.8664, + "step": 124410 + }, + { + "epoch": 0.9006348310133409, + "grad_norm": 0.14191798865795135, + "learning_rate": 4.099372407652718e-06, + "loss": 0.8758, + "step": 124420 + }, + { + "epoch": 0.900707217673927, + "grad_norm": 0.15920232236385345, + "learning_rate": 4.099300020992132e-06, + "loss": 0.8628, + "step": 124430 + }, + { + "epoch": 0.9007796043345132, + "grad_norm": 0.15842455625534058, + "learning_rate": 4.099227634331545e-06, + "loss": 0.8698, + "step": 124440 + }, + { + "epoch": 0.9008519909950994, + "grad_norm": 0.1533687710762024, + "learning_rate": 4.09915524767096e-06, + "loss": 0.8723, + "step": 124450 + }, + { + "epoch": 0.9009243776556856, + "grad_norm": 0.16112767159938812, + "learning_rate": 4.0990828610103734e-06, + "loss": 0.8559, + "step": 124460 + }, + { + "epoch": 0.9009967643162718, + "grad_norm": 0.15855056047439575, + "learning_rate": 4.099010474349787e-06, + "loss": 0.8793, + "step": 124470 + }, + { + "epoch": 0.901069150976858, + "grad_norm": 0.15525835752487183, + "learning_rate": 4.098938087689201e-06, + "loss": 0.8706, + "step": 124480 + }, + { + "epoch": 0.9011415376374442, + "grad_norm": 0.14787094295024872, + "learning_rate": 4.098865701028615e-06, + "loss": 0.8685, + "step": 124490 + }, + { + "epoch": 0.9012139242980304, + "grad_norm": 0.17414870858192444, + "learning_rate": 4.098793314368029e-06, + "loss": 0.8715, + "step": 124500 + }, + { + "epoch": 0.9012863109586166, + "grad_norm": 0.15117833018302917, + "learning_rate": 4.098720927707442e-06, + "loss": 0.8646, + "step": 124510 + }, + { + "epoch": 0.9013586976192027, + "grad_norm": 0.22992941737174988, + "learning_rate": 4.098648541046856e-06, + "loss": 0.8764, + "step": 124520 + }, + { + "epoch": 0.9014310842797889, + "grad_norm": 0.15416789054870605, + "learning_rate": 4.0985761543862705e-06, + "loss": 0.881, + "step": 124530 + }, + { + "epoch": 0.9015034709403751, + "grad_norm": 0.17136387526988983, + "learning_rate": 4.098503767725684e-06, + "loss": 0.8801, + "step": 124540 + }, + { + "epoch": 0.9015758576009613, + "grad_norm": 0.1540147364139557, + "learning_rate": 4.098431381065098e-06, + "loss": 0.8798, + "step": 124550 + }, + { + "epoch": 0.9016482442615474, + "grad_norm": 0.15776564180850983, + "learning_rate": 4.098358994404511e-06, + "loss": 0.8718, + "step": 124560 + }, + { + "epoch": 0.9017206309221336, + "grad_norm": 0.15084987878799438, + "learning_rate": 4.098286607743926e-06, + "loss": 0.8809, + "step": 124570 + }, + { + "epoch": 0.9017930175827199, + "grad_norm": 0.15924064815044403, + "learning_rate": 4.098214221083339e-06, + "loss": 0.8815, + "step": 124580 + }, + { + "epoch": 0.9018654042433061, + "grad_norm": 0.14319369196891785, + "learning_rate": 4.098141834422753e-06, + "loss": 0.8619, + "step": 124590 + }, + { + "epoch": 0.9019377909038923, + "grad_norm": 0.1575179100036621, + "learning_rate": 4.098069447762167e-06, + "loss": 0.8651, + "step": 124600 + }, + { + "epoch": 0.9020101775644784, + "grad_norm": 0.1562921404838562, + "learning_rate": 4.097997061101581e-06, + "loss": 0.8687, + "step": 124610 + }, + { + "epoch": 0.9020825642250646, + "grad_norm": 0.1615796834230423, + "learning_rate": 4.097924674440995e-06, + "loss": 0.8539, + "step": 124620 + }, + { + "epoch": 0.9021549508856508, + "grad_norm": 0.14882075786590576, + "learning_rate": 4.097852287780408e-06, + "loss": 0.8667, + "step": 124630 + }, + { + "epoch": 0.902227337546237, + "grad_norm": 0.15828663110733032, + "learning_rate": 4.097779901119822e-06, + "loss": 0.8749, + "step": 124640 + }, + { + "epoch": 0.9022997242068231, + "grad_norm": 0.15658116340637207, + "learning_rate": 4.097707514459236e-06, + "loss": 0.8891, + "step": 124650 + }, + { + "epoch": 0.9023721108674093, + "grad_norm": 0.15174657106399536, + "learning_rate": 4.09763512779865e-06, + "loss": 0.8691, + "step": 124660 + }, + { + "epoch": 0.9024444975279955, + "grad_norm": 0.14801457524299622, + "learning_rate": 4.097562741138064e-06, + "loss": 0.8774, + "step": 124670 + }, + { + "epoch": 0.9025168841885818, + "grad_norm": 0.17308656871318817, + "learning_rate": 4.097490354477477e-06, + "loss": 0.8792, + "step": 124680 + }, + { + "epoch": 0.902589270849168, + "grad_norm": 0.13914184272289276, + "learning_rate": 4.097417967816891e-06, + "loss": 0.8703, + "step": 124690 + }, + { + "epoch": 0.9026616575097541, + "grad_norm": 0.16653895378112793, + "learning_rate": 4.097345581156305e-06, + "loss": 0.8584, + "step": 124700 + }, + { + "epoch": 0.9027340441703403, + "grad_norm": 0.15545400977134705, + "learning_rate": 4.097273194495719e-06, + "loss": 0.8746, + "step": 124710 + }, + { + "epoch": 0.9028064308309265, + "grad_norm": 0.15247924625873566, + "learning_rate": 4.0972008078351325e-06, + "loss": 0.8756, + "step": 124720 + }, + { + "epoch": 0.9028788174915127, + "grad_norm": 0.15128053724765778, + "learning_rate": 4.097128421174546e-06, + "loss": 0.8757, + "step": 124730 + }, + { + "epoch": 0.9029512041520988, + "grad_norm": 0.1656970977783203, + "learning_rate": 4.09705603451396e-06, + "loss": 0.864, + "step": 124740 + }, + { + "epoch": 0.903023590812685, + "grad_norm": 0.15432408452033997, + "learning_rate": 4.096983647853373e-06, + "loss": 0.8673, + "step": 124750 + }, + { + "epoch": 0.9030959774732712, + "grad_norm": 0.14504657685756683, + "learning_rate": 4.096911261192788e-06, + "loss": 0.8722, + "step": 124760 + }, + { + "epoch": 0.9031683641338574, + "grad_norm": 0.181019127368927, + "learning_rate": 4.0968388745322015e-06, + "loss": 0.8777, + "step": 124770 + }, + { + "epoch": 0.9032407507944435, + "grad_norm": 0.1841985583305359, + "learning_rate": 4.096766487871615e-06, + "loss": 0.8764, + "step": 124780 + }, + { + "epoch": 0.9033131374550298, + "grad_norm": 0.14566998183727264, + "learning_rate": 4.096694101211029e-06, + "loss": 0.8788, + "step": 124790 + }, + { + "epoch": 0.903385524115616, + "grad_norm": 0.15821722149848938, + "learning_rate": 4.096621714550443e-06, + "loss": 0.8675, + "step": 124800 + }, + { + "epoch": 0.9034579107762022, + "grad_norm": 0.14918698370456696, + "learning_rate": 4.096549327889857e-06, + "loss": 0.8612, + "step": 124810 + }, + { + "epoch": 0.9035302974367884, + "grad_norm": 0.16349859535694122, + "learning_rate": 4.09647694122927e-06, + "loss": 0.8707, + "step": 124820 + }, + { + "epoch": 0.9036026840973745, + "grad_norm": 0.15375465154647827, + "learning_rate": 4.096404554568684e-06, + "loss": 0.8806, + "step": 124830 + }, + { + "epoch": 0.9036750707579607, + "grad_norm": 0.152941033244133, + "learning_rate": 4.0963321679080985e-06, + "loss": 0.881, + "step": 124840 + }, + { + "epoch": 0.9037474574185469, + "grad_norm": 0.22806885838508606, + "learning_rate": 4.096259781247512e-06, + "loss": 0.8626, + "step": 124850 + }, + { + "epoch": 0.9038198440791331, + "grad_norm": 0.15706026554107666, + "learning_rate": 4.096187394586926e-06, + "loss": 0.8711, + "step": 124860 + }, + { + "epoch": 0.9038922307397192, + "grad_norm": 0.1524626761674881, + "learning_rate": 4.096115007926339e-06, + "loss": 0.8779, + "step": 124870 + }, + { + "epoch": 0.9039646174003054, + "grad_norm": 0.16618412733078003, + "learning_rate": 4.096042621265753e-06, + "loss": 0.8715, + "step": 124880 + }, + { + "epoch": 0.9040370040608917, + "grad_norm": 0.1479671746492386, + "learning_rate": 4.095970234605167e-06, + "loss": 0.8798, + "step": 124890 + }, + { + "epoch": 0.9041093907214779, + "grad_norm": 0.22761158645153046, + "learning_rate": 4.095897847944581e-06, + "loss": 0.8708, + "step": 124900 + }, + { + "epoch": 0.9041817773820641, + "grad_norm": 0.15918904542922974, + "learning_rate": 4.095825461283995e-06, + "loss": 0.8679, + "step": 124910 + }, + { + "epoch": 0.9042541640426502, + "grad_norm": 0.16984111070632935, + "learning_rate": 4.095753074623408e-06, + "loss": 0.8686, + "step": 124920 + }, + { + "epoch": 0.9043265507032364, + "grad_norm": 0.15530432760715485, + "learning_rate": 4.095680687962823e-06, + "loss": 0.8733, + "step": 124930 + }, + { + "epoch": 0.9043989373638226, + "grad_norm": 0.15321482717990875, + "learning_rate": 4.095608301302236e-06, + "loss": 0.868, + "step": 124940 + }, + { + "epoch": 0.9044713240244088, + "grad_norm": 0.15977735817432404, + "learning_rate": 4.09553591464165e-06, + "loss": 0.8704, + "step": 124950 + }, + { + "epoch": 0.904543710684995, + "grad_norm": 0.14585714042186737, + "learning_rate": 4.0954635279810636e-06, + "loss": 0.8688, + "step": 124960 + }, + { + "epoch": 0.9046160973455811, + "grad_norm": 0.16191209852695465, + "learning_rate": 4.095391141320478e-06, + "loss": 0.8638, + "step": 124970 + }, + { + "epoch": 0.9046884840061673, + "grad_norm": 0.27564188838005066, + "learning_rate": 4.095318754659892e-06, + "loss": 0.8708, + "step": 124980 + }, + { + "epoch": 0.9047608706667535, + "grad_norm": 0.14919497072696686, + "learning_rate": 4.095246367999305e-06, + "loss": 0.8662, + "step": 124990 + }, + { + "epoch": 0.9048332573273398, + "grad_norm": 0.1528129130601883, + "learning_rate": 4.095173981338719e-06, + "loss": 0.8759, + "step": 125000 + }, + { + "epoch": 0.904905643987926, + "grad_norm": 0.15867576003074646, + "learning_rate": 4.095101594678133e-06, + "loss": 0.888, + "step": 125010 + }, + { + "epoch": 0.9049780306485121, + "grad_norm": 0.16656945645809174, + "learning_rate": 4.095029208017547e-06, + "loss": 0.8661, + "step": 125020 + }, + { + "epoch": 0.9050504173090983, + "grad_norm": 0.1601569950580597, + "learning_rate": 4.094956821356961e-06, + "loss": 0.8645, + "step": 125030 + }, + { + "epoch": 0.9051228039696845, + "grad_norm": 0.1524617075920105, + "learning_rate": 4.094884434696374e-06, + "loss": 0.8727, + "step": 125040 + }, + { + "epoch": 0.9051951906302707, + "grad_norm": 0.15600845217704773, + "learning_rate": 4.094812048035789e-06, + "loss": 0.8757, + "step": 125050 + }, + { + "epoch": 0.9052675772908568, + "grad_norm": 0.15215645730495453, + "learning_rate": 4.094739661375202e-06, + "loss": 0.8556, + "step": 125060 + }, + { + "epoch": 0.905339963951443, + "grad_norm": 0.14857865869998932, + "learning_rate": 4.094667274714616e-06, + "loss": 0.8629, + "step": 125070 + }, + { + "epoch": 0.9054123506120292, + "grad_norm": 0.15590626001358032, + "learning_rate": 4.0945948880540295e-06, + "loss": 0.8733, + "step": 125080 + }, + { + "epoch": 0.9054847372726154, + "grad_norm": 0.15361756086349487, + "learning_rate": 4.094522501393444e-06, + "loss": 0.8775, + "step": 125090 + }, + { + "epoch": 0.9055571239332015, + "grad_norm": 0.14809496700763702, + "learning_rate": 4.094450114732858e-06, + "loss": 0.8752, + "step": 125100 + }, + { + "epoch": 0.9056295105937878, + "grad_norm": 0.1699819564819336, + "learning_rate": 4.094377728072271e-06, + "loss": 0.8768, + "step": 125110 + }, + { + "epoch": 0.905701897254374, + "grad_norm": 0.3279818892478943, + "learning_rate": 4.094305341411685e-06, + "loss": 0.8726, + "step": 125120 + }, + { + "epoch": 0.9057742839149602, + "grad_norm": 0.14810748398303986, + "learning_rate": 4.094232954751099e-06, + "loss": 0.874, + "step": 125130 + }, + { + "epoch": 0.9058466705755464, + "grad_norm": 0.14194615185260773, + "learning_rate": 4.094160568090513e-06, + "loss": 0.8608, + "step": 125140 + }, + { + "epoch": 0.9059190572361325, + "grad_norm": 0.1805509328842163, + "learning_rate": 4.0940881814299265e-06, + "loss": 0.8603, + "step": 125150 + }, + { + "epoch": 0.9059914438967187, + "grad_norm": 0.15983721613883972, + "learning_rate": 4.09401579476934e-06, + "loss": 0.8724, + "step": 125160 + }, + { + "epoch": 0.9060638305573049, + "grad_norm": 0.15340054035186768, + "learning_rate": 4.093943408108755e-06, + "loss": 0.8659, + "step": 125170 + }, + { + "epoch": 0.9061362172178911, + "grad_norm": 0.14701798558235168, + "learning_rate": 4.093871021448168e-06, + "loss": 0.8699, + "step": 125180 + }, + { + "epoch": 0.9062086038784772, + "grad_norm": 0.14838139712810516, + "learning_rate": 4.093798634787582e-06, + "loss": 0.8712, + "step": 125190 + }, + { + "epoch": 0.9062809905390634, + "grad_norm": 0.1553456038236618, + "learning_rate": 4.0937262481269954e-06, + "loss": 0.8742, + "step": 125200 + }, + { + "epoch": 0.9063533771996497, + "grad_norm": 0.15716488659381866, + "learning_rate": 4.09365386146641e-06, + "loss": 0.8726, + "step": 125210 + }, + { + "epoch": 0.9064257638602359, + "grad_norm": 0.15702371299266815, + "learning_rate": 4.0935814748058235e-06, + "loss": 0.8847, + "step": 125220 + }, + { + "epoch": 0.906498150520822, + "grad_norm": 0.16816936433315277, + "learning_rate": 4.093509088145237e-06, + "loss": 0.8732, + "step": 125230 + }, + { + "epoch": 0.9065705371814082, + "grad_norm": 0.14697718620300293, + "learning_rate": 4.093436701484651e-06, + "loss": 0.8725, + "step": 125240 + }, + { + "epoch": 0.9066429238419944, + "grad_norm": 0.1436859369277954, + "learning_rate": 4.093364314824065e-06, + "loss": 0.8681, + "step": 125250 + }, + { + "epoch": 0.9067153105025806, + "grad_norm": 0.1546768695116043, + "learning_rate": 4.093291928163479e-06, + "loss": 0.8904, + "step": 125260 + }, + { + "epoch": 0.9067876971631668, + "grad_norm": 0.16405488550662994, + "learning_rate": 4.093219541502892e-06, + "loss": 0.8736, + "step": 125270 + }, + { + "epoch": 0.9068600838237529, + "grad_norm": 0.15231408178806305, + "learning_rate": 4.093147154842306e-06, + "loss": 0.8566, + "step": 125280 + }, + { + "epoch": 0.9069324704843391, + "grad_norm": 0.15230032801628113, + "learning_rate": 4.09307476818172e-06, + "loss": 0.8713, + "step": 125290 + }, + { + "epoch": 0.9070048571449253, + "grad_norm": 0.15494680404663086, + "learning_rate": 4.093002381521133e-06, + "loss": 0.8725, + "step": 125300 + }, + { + "epoch": 0.9070772438055115, + "grad_norm": 0.17046596109867096, + "learning_rate": 4.092929994860547e-06, + "loss": 0.8698, + "step": 125310 + }, + { + "epoch": 0.9071496304660978, + "grad_norm": 0.16179198026657104, + "learning_rate": 4.092857608199961e-06, + "loss": 0.8697, + "step": 125320 + }, + { + "epoch": 0.9072220171266839, + "grad_norm": 0.15248672664165497, + "learning_rate": 4.092785221539375e-06, + "loss": 0.8528, + "step": 125330 + }, + { + "epoch": 0.9072944037872701, + "grad_norm": 0.1498788446187973, + "learning_rate": 4.092712834878789e-06, + "loss": 0.8753, + "step": 125340 + }, + { + "epoch": 0.9073667904478563, + "grad_norm": 0.1529855877161026, + "learning_rate": 4.092640448218202e-06, + "loss": 0.8749, + "step": 125350 + }, + { + "epoch": 0.9074391771084425, + "grad_norm": 0.154828742146492, + "learning_rate": 4.092568061557617e-06, + "loss": 0.8729, + "step": 125360 + }, + { + "epoch": 0.9075115637690286, + "grad_norm": 0.15640468895435333, + "learning_rate": 4.09249567489703e-06, + "loss": 0.8613, + "step": 125370 + }, + { + "epoch": 0.9075839504296148, + "grad_norm": 0.16197548806667328, + "learning_rate": 4.092423288236444e-06, + "loss": 0.8664, + "step": 125380 + }, + { + "epoch": 0.907656337090201, + "grad_norm": 0.15120939910411835, + "learning_rate": 4.0923509015758575e-06, + "loss": 0.8811, + "step": 125390 + }, + { + "epoch": 0.9077287237507872, + "grad_norm": 0.14062069356441498, + "learning_rate": 4.092278514915272e-06, + "loss": 0.8531, + "step": 125400 + }, + { + "epoch": 0.9078011104113733, + "grad_norm": 0.15563814342021942, + "learning_rate": 4.092206128254686e-06, + "loss": 0.8861, + "step": 125410 + }, + { + "epoch": 0.9078734970719596, + "grad_norm": 0.16093406081199646, + "learning_rate": 4.092133741594099e-06, + "loss": 0.8706, + "step": 125420 + }, + { + "epoch": 0.9079458837325458, + "grad_norm": 0.15003357827663422, + "learning_rate": 4.092061354933513e-06, + "loss": 0.8709, + "step": 125430 + }, + { + "epoch": 0.908018270393132, + "grad_norm": 0.16230422258377075, + "learning_rate": 4.091988968272927e-06, + "loss": 0.8653, + "step": 125440 + }, + { + "epoch": 0.9080906570537182, + "grad_norm": 0.14883364737033844, + "learning_rate": 4.091916581612341e-06, + "loss": 0.8632, + "step": 125450 + }, + { + "epoch": 0.9081630437143043, + "grad_norm": 0.15273550152778625, + "learning_rate": 4.0918441949517545e-06, + "loss": 0.8826, + "step": 125460 + }, + { + "epoch": 0.9082354303748905, + "grad_norm": 0.1566764861345291, + "learning_rate": 4.091771808291168e-06, + "loss": 0.8754, + "step": 125470 + }, + { + "epoch": 0.9083078170354767, + "grad_norm": 0.17311665415763855, + "learning_rate": 4.091699421630582e-06, + "loss": 0.8671, + "step": 125480 + }, + { + "epoch": 0.9083802036960629, + "grad_norm": 0.15010863542556763, + "learning_rate": 4.091627034969996e-06, + "loss": 0.8626, + "step": 125490 + }, + { + "epoch": 0.908452590356649, + "grad_norm": 0.1590012162923813, + "learning_rate": 4.09155464830941e-06, + "loss": 0.8547, + "step": 125500 + }, + { + "epoch": 0.9085249770172352, + "grad_norm": 0.6873774528503418, + "learning_rate": 4.0914822616488235e-06, + "loss": 0.8869, + "step": 125510 + }, + { + "epoch": 0.9085973636778214, + "grad_norm": 0.1598593294620514, + "learning_rate": 4.091409874988237e-06, + "loss": 0.8807, + "step": 125520 + }, + { + "epoch": 0.9086697503384077, + "grad_norm": 0.1523318886756897, + "learning_rate": 4.0913374883276516e-06, + "loss": 0.8734, + "step": 125530 + }, + { + "epoch": 0.9087421369989939, + "grad_norm": 0.15472367405891418, + "learning_rate": 4.091265101667065e-06, + "loss": 0.8717, + "step": 125540 + }, + { + "epoch": 0.90881452365958, + "grad_norm": 0.15248602628707886, + "learning_rate": 4.091192715006479e-06, + "loss": 0.8718, + "step": 125550 + }, + { + "epoch": 0.9088869103201662, + "grad_norm": 0.15343599021434784, + "learning_rate": 4.091120328345892e-06, + "loss": 0.878, + "step": 125560 + }, + { + "epoch": 0.9089592969807524, + "grad_norm": 0.16457286477088928, + "learning_rate": 4.091047941685307e-06, + "loss": 0.8691, + "step": 125570 + }, + { + "epoch": 0.9090316836413386, + "grad_norm": 0.15641097724437714, + "learning_rate": 4.0909755550247205e-06, + "loss": 0.8795, + "step": 125580 + }, + { + "epoch": 0.9091040703019247, + "grad_norm": 0.18575336039066315, + "learning_rate": 4.090903168364134e-06, + "loss": 0.8739, + "step": 125590 + }, + { + "epoch": 0.9091764569625109, + "grad_norm": 0.1508977711200714, + "learning_rate": 4.090830781703548e-06, + "loss": 0.8898, + "step": 125600 + }, + { + "epoch": 0.9092488436230971, + "grad_norm": 0.14920225739479065, + "learning_rate": 4.090758395042962e-06, + "loss": 0.8587, + "step": 125610 + }, + { + "epoch": 0.9093212302836833, + "grad_norm": 0.15833619236946106, + "learning_rate": 4.090686008382376e-06, + "loss": 0.8692, + "step": 125620 + }, + { + "epoch": 0.9093936169442695, + "grad_norm": 0.15184055268764496, + "learning_rate": 4.090613621721789e-06, + "loss": 0.8775, + "step": 125630 + }, + { + "epoch": 0.9094660036048557, + "grad_norm": 0.14898662269115448, + "learning_rate": 4.090541235061203e-06, + "loss": 0.8754, + "step": 125640 + }, + { + "epoch": 0.9095383902654419, + "grad_norm": 0.16337692737579346, + "learning_rate": 4.0904688484006175e-06, + "loss": 0.875, + "step": 125650 + }, + { + "epoch": 0.9096107769260281, + "grad_norm": 0.15499453246593475, + "learning_rate": 4.090396461740031e-06, + "loss": 0.8584, + "step": 125660 + }, + { + "epoch": 0.9096831635866143, + "grad_norm": 0.15772931277751923, + "learning_rate": 4.090324075079445e-06, + "loss": 0.8766, + "step": 125670 + }, + { + "epoch": 0.9097555502472005, + "grad_norm": 0.14884252846240997, + "learning_rate": 4.090251688418858e-06, + "loss": 0.8841, + "step": 125680 + }, + { + "epoch": 0.9098279369077866, + "grad_norm": 0.14398325979709625, + "learning_rate": 4.090179301758273e-06, + "loss": 0.876, + "step": 125690 + }, + { + "epoch": 0.9099003235683728, + "grad_norm": 0.1522645205259323, + "learning_rate": 4.090106915097686e-06, + "loss": 0.8766, + "step": 125700 + }, + { + "epoch": 0.909972710228959, + "grad_norm": 0.18375910818576813, + "learning_rate": 4.0900345284371e-06, + "loss": 0.8715, + "step": 125710 + }, + { + "epoch": 0.9100450968895452, + "grad_norm": 0.1611146479845047, + "learning_rate": 4.089962141776514e-06, + "loss": 0.8661, + "step": 125720 + }, + { + "epoch": 0.9101174835501313, + "grad_norm": 0.2902297079563141, + "learning_rate": 4.089889755115928e-06, + "loss": 0.8631, + "step": 125730 + }, + { + "epoch": 0.9101898702107176, + "grad_norm": 0.18060381710529327, + "learning_rate": 4.089817368455342e-06, + "loss": 0.8654, + "step": 125740 + }, + { + "epoch": 0.9102622568713038, + "grad_norm": 0.15165190398693085, + "learning_rate": 4.089744981794755e-06, + "loss": 0.8734, + "step": 125750 + }, + { + "epoch": 0.91033464353189, + "grad_norm": 0.16775788366794586, + "learning_rate": 4.089672595134169e-06, + "loss": 0.8643, + "step": 125760 + }, + { + "epoch": 0.9104070301924762, + "grad_norm": 0.1607564091682434, + "learning_rate": 4.0896002084735834e-06, + "loss": 0.8653, + "step": 125770 + }, + { + "epoch": 0.9104794168530623, + "grad_norm": 0.15660177171230316, + "learning_rate": 4.089527821812997e-06, + "loss": 0.8727, + "step": 125780 + }, + { + "epoch": 0.9105518035136485, + "grad_norm": 0.14147257804870605, + "learning_rate": 4.089455435152411e-06, + "loss": 0.8755, + "step": 125790 + }, + { + "epoch": 0.9106241901742347, + "grad_norm": 0.1649179309606552, + "learning_rate": 4.089383048491824e-06, + "loss": 0.87, + "step": 125800 + }, + { + "epoch": 0.9106965768348209, + "grad_norm": 0.15854406356811523, + "learning_rate": 4.089310661831238e-06, + "loss": 0.8649, + "step": 125810 + }, + { + "epoch": 0.910768963495407, + "grad_norm": 0.16019472479820251, + "learning_rate": 4.0892382751706515e-06, + "loss": 0.8806, + "step": 125820 + }, + { + "epoch": 0.9108413501559932, + "grad_norm": 0.16454534232616425, + "learning_rate": 4.089165888510065e-06, + "loss": 0.874, + "step": 125830 + }, + { + "epoch": 0.9109137368165794, + "grad_norm": 0.14616376161575317, + "learning_rate": 4.08909350184948e-06, + "loss": 0.8781, + "step": 125840 + }, + { + "epoch": 0.9109861234771657, + "grad_norm": 0.16948239505290985, + "learning_rate": 4.089021115188893e-06, + "loss": 0.8746, + "step": 125850 + }, + { + "epoch": 0.9110585101377519, + "grad_norm": 0.1634904146194458, + "learning_rate": 4.088948728528307e-06, + "loss": 0.8729, + "step": 125860 + }, + { + "epoch": 0.911130896798338, + "grad_norm": 0.16780352592468262, + "learning_rate": 4.0888763418677204e-06, + "loss": 0.8731, + "step": 125870 + }, + { + "epoch": 0.9112032834589242, + "grad_norm": 0.1554594188928604, + "learning_rate": 4.088803955207135e-06, + "loss": 0.8751, + "step": 125880 + }, + { + "epoch": 0.9112756701195104, + "grad_norm": 0.1543656289577484, + "learning_rate": 4.0887315685465485e-06, + "loss": 0.8811, + "step": 125890 + }, + { + "epoch": 0.9113480567800966, + "grad_norm": 0.14425186812877655, + "learning_rate": 4.088659181885962e-06, + "loss": 0.8709, + "step": 125900 + }, + { + "epoch": 0.9114204434406827, + "grad_norm": 0.15529589354991913, + "learning_rate": 4.088586795225376e-06, + "loss": 0.8594, + "step": 125910 + }, + { + "epoch": 0.9114928301012689, + "grad_norm": 0.1789928823709488, + "learning_rate": 4.08851440856479e-06, + "loss": 0.867, + "step": 125920 + }, + { + "epoch": 0.9115652167618551, + "grad_norm": 0.15948614478111267, + "learning_rate": 4.088442021904204e-06, + "loss": 0.8622, + "step": 125930 + }, + { + "epoch": 0.9116376034224413, + "grad_norm": 0.18289946019649506, + "learning_rate": 4.0883696352436174e-06, + "loss": 0.8776, + "step": 125940 + }, + { + "epoch": 0.9117099900830276, + "grad_norm": 0.16194994747638702, + "learning_rate": 4.088297248583031e-06, + "loss": 0.8787, + "step": 125950 + }, + { + "epoch": 0.9117823767436137, + "grad_norm": 0.15668463706970215, + "learning_rate": 4.0882248619224455e-06, + "loss": 0.8665, + "step": 125960 + }, + { + "epoch": 0.9118547634041999, + "grad_norm": 0.14782801270484924, + "learning_rate": 4.088152475261859e-06, + "loss": 0.8671, + "step": 125970 + }, + { + "epoch": 0.9119271500647861, + "grad_norm": 0.14951111376285553, + "learning_rate": 4.088080088601273e-06, + "loss": 0.8714, + "step": 125980 + }, + { + "epoch": 0.9119995367253723, + "grad_norm": 0.15753866732120514, + "learning_rate": 4.088007701940686e-06, + "loss": 0.8804, + "step": 125990 + }, + { + "epoch": 0.9120719233859584, + "grad_norm": 0.1543973982334137, + "learning_rate": 4.087935315280101e-06, + "loss": 0.8618, + "step": 126000 + }, + { + "epoch": 0.9121443100465446, + "grad_norm": 0.16389751434326172, + "learning_rate": 4.0878629286195145e-06, + "loss": 0.865, + "step": 126010 + }, + { + "epoch": 0.9122166967071308, + "grad_norm": 0.15064431726932526, + "learning_rate": 4.087790541958928e-06, + "loss": 0.8691, + "step": 126020 + }, + { + "epoch": 0.912289083367717, + "grad_norm": 0.15450918674468994, + "learning_rate": 4.087718155298342e-06, + "loss": 0.8802, + "step": 126030 + }, + { + "epoch": 0.9123614700283031, + "grad_norm": 0.15472367405891418, + "learning_rate": 4.087645768637756e-06, + "loss": 0.8812, + "step": 126040 + }, + { + "epoch": 0.9124338566888893, + "grad_norm": 0.16118258237838745, + "learning_rate": 4.08757338197717e-06, + "loss": 0.8693, + "step": 126050 + }, + { + "epoch": 0.9125062433494756, + "grad_norm": 0.21667267382144928, + "learning_rate": 4.087500995316583e-06, + "loss": 0.8782, + "step": 126060 + }, + { + "epoch": 0.9125786300100618, + "grad_norm": 0.1615353375673294, + "learning_rate": 4.087428608655997e-06, + "loss": 0.8707, + "step": 126070 + }, + { + "epoch": 0.912651016670648, + "grad_norm": 0.15932497382164001, + "learning_rate": 4.0873562219954115e-06, + "loss": 0.8674, + "step": 126080 + }, + { + "epoch": 0.9127234033312341, + "grad_norm": 0.14429889619350433, + "learning_rate": 4.087283835334825e-06, + "loss": 0.8676, + "step": 126090 + }, + { + "epoch": 0.9127957899918203, + "grad_norm": 0.1498817354440689, + "learning_rate": 4.087211448674239e-06, + "loss": 0.8839, + "step": 126100 + }, + { + "epoch": 0.9128681766524065, + "grad_norm": 0.1536143273115158, + "learning_rate": 4.087139062013652e-06, + "loss": 0.866, + "step": 126110 + }, + { + "epoch": 0.9129405633129927, + "grad_norm": 0.14300012588500977, + "learning_rate": 4.087066675353066e-06, + "loss": 0.8694, + "step": 126120 + }, + { + "epoch": 0.9130129499735788, + "grad_norm": 0.15428438782691956, + "learning_rate": 4.08699428869248e-06, + "loss": 0.8779, + "step": 126130 + }, + { + "epoch": 0.913085336634165, + "grad_norm": 0.17595326900482178, + "learning_rate": 4.086921902031894e-06, + "loss": 0.8634, + "step": 126140 + }, + { + "epoch": 0.9131577232947512, + "grad_norm": 0.15784429013729095, + "learning_rate": 4.086849515371308e-06, + "loss": 0.8764, + "step": 126150 + }, + { + "epoch": 0.9132301099553374, + "grad_norm": 0.15594114363193512, + "learning_rate": 4.086777128710721e-06, + "loss": 0.8722, + "step": 126160 + }, + { + "epoch": 0.9133024966159237, + "grad_norm": 0.14309413731098175, + "learning_rate": 4.086704742050136e-06, + "loss": 0.8726, + "step": 126170 + }, + { + "epoch": 0.9133748832765098, + "grad_norm": 0.15265756845474243, + "learning_rate": 4.086632355389549e-06, + "loss": 0.8784, + "step": 126180 + }, + { + "epoch": 0.913447269937096, + "grad_norm": 0.20048628747463226, + "learning_rate": 4.086559968728963e-06, + "loss": 0.8717, + "step": 126190 + }, + { + "epoch": 0.9135196565976822, + "grad_norm": 0.15623369812965393, + "learning_rate": 4.0864875820683765e-06, + "loss": 0.8691, + "step": 126200 + }, + { + "epoch": 0.9135920432582684, + "grad_norm": 0.1530992090702057, + "learning_rate": 4.086415195407791e-06, + "loss": 0.8678, + "step": 126210 + }, + { + "epoch": 0.9136644299188545, + "grad_norm": 0.15348680317401886, + "learning_rate": 4.086342808747205e-06, + "loss": 0.8609, + "step": 126220 + }, + { + "epoch": 0.9137368165794407, + "grad_norm": 0.1623658686876297, + "learning_rate": 4.086270422086618e-06, + "loss": 0.874, + "step": 126230 + }, + { + "epoch": 0.9138092032400269, + "grad_norm": 0.16523189842700958, + "learning_rate": 4.086198035426032e-06, + "loss": 0.8834, + "step": 126240 + }, + { + "epoch": 0.9138815899006131, + "grad_norm": 0.148997500538826, + "learning_rate": 4.086125648765446e-06, + "loss": 0.8689, + "step": 126250 + }, + { + "epoch": 0.9139539765611993, + "grad_norm": 0.15621952712535858, + "learning_rate": 4.08605326210486e-06, + "loss": 0.8786, + "step": 126260 + }, + { + "epoch": 0.9140263632217855, + "grad_norm": 0.14902496337890625, + "learning_rate": 4.0859808754442736e-06, + "loss": 0.87, + "step": 126270 + }, + { + "epoch": 0.9140987498823717, + "grad_norm": 0.15417183935642242, + "learning_rate": 4.085908488783687e-06, + "loss": 0.8674, + "step": 126280 + }, + { + "epoch": 0.9141711365429579, + "grad_norm": 0.19318684935569763, + "learning_rate": 4.085836102123102e-06, + "loss": 0.858, + "step": 126290 + }, + { + "epoch": 0.9142435232035441, + "grad_norm": 0.15379764139652252, + "learning_rate": 4.085763715462515e-06, + "loss": 0.8635, + "step": 126300 + }, + { + "epoch": 0.9143159098641302, + "grad_norm": 0.37275516986846924, + "learning_rate": 4.085691328801929e-06, + "loss": 0.8751, + "step": 126310 + }, + { + "epoch": 0.9143882965247164, + "grad_norm": 0.1587948054075241, + "learning_rate": 4.0856189421413425e-06, + "loss": 0.8655, + "step": 126320 + }, + { + "epoch": 0.9144606831853026, + "grad_norm": 0.15148475766181946, + "learning_rate": 4.085546555480756e-06, + "loss": 0.863, + "step": 126330 + }, + { + "epoch": 0.9145330698458888, + "grad_norm": 0.16496942937374115, + "learning_rate": 4.08547416882017e-06, + "loss": 0.8457, + "step": 126340 + }, + { + "epoch": 0.914605456506475, + "grad_norm": 0.15391036868095398, + "learning_rate": 4.085401782159583e-06, + "loss": 0.8728, + "step": 126350 + }, + { + "epoch": 0.9146778431670611, + "grad_norm": 0.15899689495563507, + "learning_rate": 4.085329395498998e-06, + "loss": 0.8644, + "step": 126360 + }, + { + "epoch": 0.9147502298276473, + "grad_norm": 0.1451529711484909, + "learning_rate": 4.085257008838411e-06, + "loss": 0.871, + "step": 126370 + }, + { + "epoch": 0.9148226164882336, + "grad_norm": 0.14316491782665253, + "learning_rate": 4.085184622177825e-06, + "loss": 0.8741, + "step": 126380 + }, + { + "epoch": 0.9148950031488198, + "grad_norm": 0.1564132571220398, + "learning_rate": 4.085112235517239e-06, + "loss": 0.8763, + "step": 126390 + }, + { + "epoch": 0.914967389809406, + "grad_norm": 0.16010133922100067, + "learning_rate": 4.085039848856653e-06, + "loss": 0.8729, + "step": 126400 + }, + { + "epoch": 0.9150397764699921, + "grad_norm": 0.15824563801288605, + "learning_rate": 4.084967462196067e-06, + "loss": 0.8694, + "step": 126410 + }, + { + "epoch": 0.9151121631305783, + "grad_norm": 0.16496235132217407, + "learning_rate": 4.08489507553548e-06, + "loss": 0.886, + "step": 126420 + }, + { + "epoch": 0.9151845497911645, + "grad_norm": 0.15750516951084137, + "learning_rate": 4.084822688874894e-06, + "loss": 0.8668, + "step": 126430 + }, + { + "epoch": 0.9152569364517507, + "grad_norm": 0.1529252976179123, + "learning_rate": 4.084750302214308e-06, + "loss": 0.8663, + "step": 126440 + }, + { + "epoch": 0.9153293231123368, + "grad_norm": 0.15626297891139984, + "learning_rate": 4.084677915553722e-06, + "loss": 0.8725, + "step": 126450 + }, + { + "epoch": 0.915401709772923, + "grad_norm": 0.15462850034236908, + "learning_rate": 4.084605528893136e-06, + "loss": 0.8787, + "step": 126460 + }, + { + "epoch": 0.9154740964335092, + "grad_norm": 0.15536385774612427, + "learning_rate": 4.084533142232549e-06, + "loss": 0.8707, + "step": 126470 + }, + { + "epoch": 0.9155464830940954, + "grad_norm": 0.1599668562412262, + "learning_rate": 4.084460755571964e-06, + "loss": 0.8703, + "step": 126480 + }, + { + "epoch": 0.9156188697546817, + "grad_norm": 0.1455414891242981, + "learning_rate": 4.084388368911377e-06, + "loss": 0.8686, + "step": 126490 + }, + { + "epoch": 0.9156912564152678, + "grad_norm": 0.15002378821372986, + "learning_rate": 4.084315982250791e-06, + "loss": 0.8834, + "step": 126500 + }, + { + "epoch": 0.915763643075854, + "grad_norm": 0.16886621713638306, + "learning_rate": 4.084243595590205e-06, + "loss": 0.8663, + "step": 126510 + }, + { + "epoch": 0.9158360297364402, + "grad_norm": 0.15072478353977203, + "learning_rate": 4.084171208929619e-06, + "loss": 0.8776, + "step": 126520 + }, + { + "epoch": 0.9159084163970264, + "grad_norm": 0.18889220058918, + "learning_rate": 4.084098822269033e-06, + "loss": 0.8762, + "step": 126530 + }, + { + "epoch": 0.9159808030576125, + "grad_norm": 0.1520804464817047, + "learning_rate": 4.084026435608446e-06, + "loss": 0.8669, + "step": 126540 + }, + { + "epoch": 0.9160531897181987, + "grad_norm": 0.16211113333702087, + "learning_rate": 4.08395404894786e-06, + "loss": 0.8734, + "step": 126550 + }, + { + "epoch": 0.9161255763787849, + "grad_norm": 0.21941564977169037, + "learning_rate": 4.083881662287274e-06, + "loss": 0.876, + "step": 126560 + }, + { + "epoch": 0.9161979630393711, + "grad_norm": 0.14378584921360016, + "learning_rate": 4.083809275626688e-06, + "loss": 0.8728, + "step": 126570 + }, + { + "epoch": 0.9162703496999572, + "grad_norm": 0.14332708716392517, + "learning_rate": 4.083736888966102e-06, + "loss": 0.8764, + "step": 126580 + }, + { + "epoch": 0.9163427363605435, + "grad_norm": 0.14835390448570251, + "learning_rate": 4.083664502305515e-06, + "loss": 0.8651, + "step": 126590 + }, + { + "epoch": 0.9164151230211297, + "grad_norm": 0.1542014479637146, + "learning_rate": 4.08359211564493e-06, + "loss": 0.8678, + "step": 126600 + }, + { + "epoch": 0.9164875096817159, + "grad_norm": 0.1536477953195572, + "learning_rate": 4.083519728984343e-06, + "loss": 0.88, + "step": 126610 + }, + { + "epoch": 0.9165598963423021, + "grad_norm": 0.16406361758708954, + "learning_rate": 4.083447342323757e-06, + "loss": 0.8792, + "step": 126620 + }, + { + "epoch": 0.9166322830028882, + "grad_norm": 0.14225560426712036, + "learning_rate": 4.0833749556631705e-06, + "loss": 0.878, + "step": 126630 + }, + { + "epoch": 0.9167046696634744, + "grad_norm": 0.1528317928314209, + "learning_rate": 4.083302569002585e-06, + "loss": 0.8706, + "step": 126640 + }, + { + "epoch": 0.9167770563240606, + "grad_norm": 0.1531572937965393, + "learning_rate": 4.083230182341999e-06, + "loss": 0.8681, + "step": 126650 + }, + { + "epoch": 0.9168494429846468, + "grad_norm": 0.163166806101799, + "learning_rate": 4.083157795681412e-06, + "loss": 0.8694, + "step": 126660 + }, + { + "epoch": 0.9169218296452329, + "grad_norm": 0.1512100100517273, + "learning_rate": 4.083085409020826e-06, + "loss": 0.8655, + "step": 126670 + }, + { + "epoch": 0.9169942163058191, + "grad_norm": 0.14937952160835266, + "learning_rate": 4.08301302236024e-06, + "loss": 0.8743, + "step": 126680 + }, + { + "epoch": 0.9170666029664053, + "grad_norm": 0.16589467227458954, + "learning_rate": 4.082940635699654e-06, + "loss": 0.8736, + "step": 126690 + }, + { + "epoch": 0.9171389896269916, + "grad_norm": 0.15574873983860016, + "learning_rate": 4.0828682490390675e-06, + "loss": 0.8724, + "step": 126700 + }, + { + "epoch": 0.9172113762875778, + "grad_norm": 0.15169155597686768, + "learning_rate": 4.082795862378481e-06, + "loss": 0.8803, + "step": 126710 + }, + { + "epoch": 0.9172837629481639, + "grad_norm": 0.15095655620098114, + "learning_rate": 4.082723475717896e-06, + "loss": 0.8695, + "step": 126720 + }, + { + "epoch": 0.9173561496087501, + "grad_norm": 0.144984170794487, + "learning_rate": 4.082651089057309e-06, + "loss": 0.8814, + "step": 126730 + }, + { + "epoch": 0.9174285362693363, + "grad_norm": 0.15031631290912628, + "learning_rate": 4.082578702396723e-06, + "loss": 0.8549, + "step": 126740 + }, + { + "epoch": 0.9175009229299225, + "grad_norm": 0.15027374029159546, + "learning_rate": 4.0825063157361364e-06, + "loss": 0.8781, + "step": 126750 + }, + { + "epoch": 0.9175733095905086, + "grad_norm": 0.1555909812450409, + "learning_rate": 4.08243392907555e-06, + "loss": 0.8563, + "step": 126760 + }, + { + "epoch": 0.9176456962510948, + "grad_norm": 0.1572585105895996, + "learning_rate": 4.0823615424149645e-06, + "loss": 0.88, + "step": 126770 + }, + { + "epoch": 0.917718082911681, + "grad_norm": 0.15121687948703766, + "learning_rate": 4.082289155754378e-06, + "loss": 0.864, + "step": 126780 + }, + { + "epoch": 0.9177904695722672, + "grad_norm": 0.14316906034946442, + "learning_rate": 4.082216769093792e-06, + "loss": 0.877, + "step": 126790 + }, + { + "epoch": 0.9178628562328535, + "grad_norm": 0.1634669452905655, + "learning_rate": 4.082144382433205e-06, + "loss": 0.8763, + "step": 126800 + }, + { + "epoch": 0.9179352428934396, + "grad_norm": 0.1625238060951233, + "learning_rate": 4.08207199577262e-06, + "loss": 0.8826, + "step": 126810 + }, + { + "epoch": 0.9180076295540258, + "grad_norm": 0.1542353332042694, + "learning_rate": 4.0819996091120335e-06, + "loss": 0.8715, + "step": 126820 + }, + { + "epoch": 0.918080016214612, + "grad_norm": 0.156948059797287, + "learning_rate": 4.081927222451447e-06, + "loss": 0.8703, + "step": 126830 + }, + { + "epoch": 0.9181524028751982, + "grad_norm": 0.17528340220451355, + "learning_rate": 4.081854835790861e-06, + "loss": 0.8721, + "step": 126840 + }, + { + "epoch": 0.9182247895357843, + "grad_norm": 0.15125548839569092, + "learning_rate": 4.081782449130275e-06, + "loss": 0.8732, + "step": 126850 + }, + { + "epoch": 0.9182971761963705, + "grad_norm": 0.1517082154750824, + "learning_rate": 4.081710062469688e-06, + "loss": 0.8781, + "step": 126860 + }, + { + "epoch": 0.9183695628569567, + "grad_norm": 0.16170631349086761, + "learning_rate": 4.081637675809102e-06, + "loss": 0.8683, + "step": 126870 + }, + { + "epoch": 0.9184419495175429, + "grad_norm": 0.15255410969257355, + "learning_rate": 4.081565289148516e-06, + "loss": 0.8665, + "step": 126880 + }, + { + "epoch": 0.918514336178129, + "grad_norm": 0.158643901348114, + "learning_rate": 4.08149290248793e-06, + "loss": 0.8795, + "step": 126890 + }, + { + "epoch": 0.9185867228387152, + "grad_norm": 0.15852761268615723, + "learning_rate": 4.081420515827343e-06, + "loss": 0.87, + "step": 126900 + }, + { + "epoch": 0.9186591094993015, + "grad_norm": 0.2482609897851944, + "learning_rate": 4.081348129166757e-06, + "loss": 0.8684, + "step": 126910 + }, + { + "epoch": 0.9187314961598877, + "grad_norm": 0.16423265635967255, + "learning_rate": 4.081275742506171e-06, + "loss": 0.8796, + "step": 126920 + }, + { + "epoch": 0.9188038828204739, + "grad_norm": 0.1586569845676422, + "learning_rate": 4.081203355845585e-06, + "loss": 0.8862, + "step": 126930 + }, + { + "epoch": 0.91887626948106, + "grad_norm": 0.1470942199230194, + "learning_rate": 4.0811309691849985e-06, + "loss": 0.8605, + "step": 126940 + }, + { + "epoch": 0.9189486561416462, + "grad_norm": 0.17311806976795197, + "learning_rate": 4.081058582524412e-06, + "loss": 0.8771, + "step": 126950 + }, + { + "epoch": 0.9190210428022324, + "grad_norm": 0.16579419374465942, + "learning_rate": 4.080986195863827e-06, + "loss": 0.8782, + "step": 126960 + }, + { + "epoch": 0.9190934294628186, + "grad_norm": 0.1688244491815567, + "learning_rate": 4.08091380920324e-06, + "loss": 0.8752, + "step": 126970 + }, + { + "epoch": 0.9191658161234048, + "grad_norm": 0.14942839741706848, + "learning_rate": 4.080841422542654e-06, + "loss": 0.8588, + "step": 126980 + }, + { + "epoch": 0.9192382027839909, + "grad_norm": 0.18559300899505615, + "learning_rate": 4.0807690358820675e-06, + "loss": 0.8744, + "step": 126990 + }, + { + "epoch": 0.9193105894445771, + "grad_norm": 0.15597479045391083, + "learning_rate": 4.080696649221482e-06, + "loss": 0.8656, + "step": 127000 + }, + { + "epoch": 0.9193829761051633, + "grad_norm": 0.15126492083072662, + "learning_rate": 4.0806242625608956e-06, + "loss": 0.87, + "step": 127010 + }, + { + "epoch": 0.9194553627657496, + "grad_norm": 0.1641504019498825, + "learning_rate": 4.080551875900309e-06, + "loss": 0.8796, + "step": 127020 + }, + { + "epoch": 0.9195277494263357, + "grad_norm": 0.15217795968055725, + "learning_rate": 4.080479489239723e-06, + "loss": 0.8741, + "step": 127030 + }, + { + "epoch": 0.9196001360869219, + "grad_norm": 0.16015304625034332, + "learning_rate": 4.080407102579137e-06, + "loss": 0.8903, + "step": 127040 + }, + { + "epoch": 0.9196725227475081, + "grad_norm": 0.15204580128192902, + "learning_rate": 4.080334715918551e-06, + "loss": 0.8619, + "step": 127050 + }, + { + "epoch": 0.9197449094080943, + "grad_norm": 0.14324235916137695, + "learning_rate": 4.0802623292579645e-06, + "loss": 0.8609, + "step": 127060 + }, + { + "epoch": 0.9198172960686805, + "grad_norm": 0.1518729031085968, + "learning_rate": 4.080189942597378e-06, + "loss": 0.8761, + "step": 127070 + }, + { + "epoch": 0.9198896827292666, + "grad_norm": 0.15307843685150146, + "learning_rate": 4.0801175559367926e-06, + "loss": 0.8673, + "step": 127080 + }, + { + "epoch": 0.9199620693898528, + "grad_norm": 0.14242909848690033, + "learning_rate": 4.080045169276206e-06, + "loss": 0.8756, + "step": 127090 + }, + { + "epoch": 0.920034456050439, + "grad_norm": 0.16166019439697266, + "learning_rate": 4.07997278261562e-06, + "loss": 0.8742, + "step": 127100 + }, + { + "epoch": 0.9201068427110252, + "grad_norm": 0.15393179655075073, + "learning_rate": 4.079900395955033e-06, + "loss": 0.8783, + "step": 127110 + }, + { + "epoch": 0.9201792293716115, + "grad_norm": 0.16669778525829315, + "learning_rate": 4.079828009294448e-06, + "loss": 0.8653, + "step": 127120 + }, + { + "epoch": 0.9202516160321976, + "grad_norm": 0.15323293209075928, + "learning_rate": 4.0797556226338615e-06, + "loss": 0.8681, + "step": 127130 + }, + { + "epoch": 0.9203240026927838, + "grad_norm": 0.1925155520439148, + "learning_rate": 4.079683235973275e-06, + "loss": 0.8669, + "step": 127140 + }, + { + "epoch": 0.92039638935337, + "grad_norm": 0.16045774519443512, + "learning_rate": 4.079610849312689e-06, + "loss": 0.8688, + "step": 127150 + }, + { + "epoch": 0.9204687760139562, + "grad_norm": 0.16183599829673767, + "learning_rate": 4.079538462652103e-06, + "loss": 0.8602, + "step": 127160 + }, + { + "epoch": 0.9205411626745423, + "grad_norm": 0.15572576224803925, + "learning_rate": 4.079466075991517e-06, + "loss": 0.8722, + "step": 127170 + }, + { + "epoch": 0.9206135493351285, + "grad_norm": 0.15877743065357208, + "learning_rate": 4.07939368933093e-06, + "loss": 0.8783, + "step": 127180 + }, + { + "epoch": 0.9206859359957147, + "grad_norm": 0.14954781532287598, + "learning_rate": 4.079321302670344e-06, + "loss": 0.8597, + "step": 127190 + }, + { + "epoch": 0.9207583226563009, + "grad_norm": 0.1656097173690796, + "learning_rate": 4.0792489160097585e-06, + "loss": 0.8725, + "step": 127200 + }, + { + "epoch": 0.920830709316887, + "grad_norm": 0.15802302956581116, + "learning_rate": 4.079176529349172e-06, + "loss": 0.8739, + "step": 127210 + }, + { + "epoch": 0.9209030959774732, + "grad_norm": 0.14850866794586182, + "learning_rate": 4.079104142688586e-06, + "loss": 0.854, + "step": 127220 + }, + { + "epoch": 0.9209754826380595, + "grad_norm": 0.15718205273151398, + "learning_rate": 4.079031756027999e-06, + "loss": 0.876, + "step": 127230 + }, + { + "epoch": 0.9210478692986457, + "grad_norm": 0.15593793988227844, + "learning_rate": 4.078959369367414e-06, + "loss": 0.8773, + "step": 127240 + }, + { + "epoch": 0.9211202559592319, + "grad_norm": 0.14483477175235748, + "learning_rate": 4.0788869827068274e-06, + "loss": 0.8722, + "step": 127250 + }, + { + "epoch": 0.921192642619818, + "grad_norm": 0.16658411920070648, + "learning_rate": 4.078814596046241e-06, + "loss": 0.8677, + "step": 127260 + }, + { + "epoch": 0.9212650292804042, + "grad_norm": 0.1465682089328766, + "learning_rate": 4.078742209385655e-06, + "loss": 0.8688, + "step": 127270 + }, + { + "epoch": 0.9213374159409904, + "grad_norm": 0.16307535767555237, + "learning_rate": 4.078669822725069e-06, + "loss": 0.8704, + "step": 127280 + }, + { + "epoch": 0.9214098026015766, + "grad_norm": 0.303844153881073, + "learning_rate": 4.078597436064483e-06, + "loss": 0.8708, + "step": 127290 + }, + { + "epoch": 0.9214821892621627, + "grad_norm": 0.14299264550209045, + "learning_rate": 4.078525049403896e-06, + "loss": 0.8653, + "step": 127300 + }, + { + "epoch": 0.9215545759227489, + "grad_norm": 0.14078418910503387, + "learning_rate": 4.07845266274331e-06, + "loss": 0.8736, + "step": 127310 + }, + { + "epoch": 0.9216269625833351, + "grad_norm": 0.14355109632015228, + "learning_rate": 4.0783802760827244e-06, + "loss": 0.8658, + "step": 127320 + }, + { + "epoch": 0.9216993492439214, + "grad_norm": 0.18450787663459778, + "learning_rate": 4.078307889422138e-06, + "loss": 0.8626, + "step": 127330 + }, + { + "epoch": 0.9217717359045076, + "grad_norm": 0.16404852271080017, + "learning_rate": 4.078235502761552e-06, + "loss": 0.8701, + "step": 127340 + }, + { + "epoch": 0.9218441225650937, + "grad_norm": 0.15658390522003174, + "learning_rate": 4.078163116100965e-06, + "loss": 0.8798, + "step": 127350 + }, + { + "epoch": 0.9219165092256799, + "grad_norm": 0.2226731926202774, + "learning_rate": 4.078090729440379e-06, + "loss": 0.8801, + "step": 127360 + }, + { + "epoch": 0.9219888958862661, + "grad_norm": 0.19657254219055176, + "learning_rate": 4.078018342779793e-06, + "loss": 0.8749, + "step": 127370 + }, + { + "epoch": 0.9220612825468523, + "grad_norm": 0.15708957612514496, + "learning_rate": 4.077945956119207e-06, + "loss": 0.8697, + "step": 127380 + }, + { + "epoch": 0.9221336692074384, + "grad_norm": 0.16604341566562653, + "learning_rate": 4.077873569458621e-06, + "loss": 0.8535, + "step": 127390 + }, + { + "epoch": 0.9222060558680246, + "grad_norm": 0.14287860691547394, + "learning_rate": 4.077801182798034e-06, + "loss": 0.8556, + "step": 127400 + }, + { + "epoch": 0.9222784425286108, + "grad_norm": 0.1693638414144516, + "learning_rate": 4.077728796137448e-06, + "loss": 0.8789, + "step": 127410 + }, + { + "epoch": 0.922350829189197, + "grad_norm": 0.1491190791130066, + "learning_rate": 4.0776564094768614e-06, + "loss": 0.863, + "step": 127420 + }, + { + "epoch": 0.9224232158497832, + "grad_norm": 0.14451493322849274, + "learning_rate": 4.077584022816276e-06, + "loss": 0.8768, + "step": 127430 + }, + { + "epoch": 0.9224956025103694, + "grad_norm": 0.155923992395401, + "learning_rate": 4.0775116361556895e-06, + "loss": 0.8745, + "step": 127440 + }, + { + "epoch": 0.9225679891709556, + "grad_norm": 0.1764858216047287, + "learning_rate": 4.077439249495103e-06, + "loss": 0.8792, + "step": 127450 + }, + { + "epoch": 0.9226403758315418, + "grad_norm": 0.16587840020656586, + "learning_rate": 4.077366862834517e-06, + "loss": 0.8666, + "step": 127460 + }, + { + "epoch": 0.922712762492128, + "grad_norm": 0.16507935523986816, + "learning_rate": 4.077294476173931e-06, + "loss": 0.8765, + "step": 127470 + }, + { + "epoch": 0.9227851491527141, + "grad_norm": 0.15929803252220154, + "learning_rate": 4.077222089513345e-06, + "loss": 0.8667, + "step": 127480 + }, + { + "epoch": 0.9228575358133003, + "grad_norm": 0.15834636986255646, + "learning_rate": 4.0771497028527584e-06, + "loss": 0.8802, + "step": 127490 + }, + { + "epoch": 0.9229299224738865, + "grad_norm": 0.1508595049381256, + "learning_rate": 4.077077316192172e-06, + "loss": 0.8584, + "step": 127500 + }, + { + "epoch": 0.9230023091344727, + "grad_norm": 0.14436863362789154, + "learning_rate": 4.0770049295315865e-06, + "loss": 0.8725, + "step": 127510 + }, + { + "epoch": 0.9230746957950589, + "grad_norm": 0.17195680737495422, + "learning_rate": 4.076932542871e-06, + "loss": 0.8717, + "step": 127520 + }, + { + "epoch": 0.923147082455645, + "grad_norm": 0.16129696369171143, + "learning_rate": 4.076860156210414e-06, + "loss": 0.8821, + "step": 127530 + }, + { + "epoch": 0.9232194691162312, + "grad_norm": 0.15177470445632935, + "learning_rate": 4.076787769549827e-06, + "loss": 0.8636, + "step": 127540 + }, + { + "epoch": 0.9232918557768175, + "grad_norm": 0.16349272429943085, + "learning_rate": 4.076715382889241e-06, + "loss": 0.8705, + "step": 127550 + }, + { + "epoch": 0.9233642424374037, + "grad_norm": 0.14663663506507874, + "learning_rate": 4.0766429962286555e-06, + "loss": 0.8571, + "step": 127560 + }, + { + "epoch": 0.9234366290979898, + "grad_norm": 0.16342896223068237, + "learning_rate": 4.076570609568069e-06, + "loss": 0.8687, + "step": 127570 + }, + { + "epoch": 0.923509015758576, + "grad_norm": 0.15321224927902222, + "learning_rate": 4.076498222907483e-06, + "loss": 0.8622, + "step": 127580 + }, + { + "epoch": 0.9235814024191622, + "grad_norm": 0.1598549485206604, + "learning_rate": 4.076425836246896e-06, + "loss": 0.8786, + "step": 127590 + }, + { + "epoch": 0.9236537890797484, + "grad_norm": 0.16147591173648834, + "learning_rate": 4.076353449586311e-06, + "loss": 0.872, + "step": 127600 + }, + { + "epoch": 0.9237261757403346, + "grad_norm": 0.14562930166721344, + "learning_rate": 4.076281062925724e-06, + "loss": 0.8682, + "step": 127610 + }, + { + "epoch": 0.9237985624009207, + "grad_norm": 0.1647154986858368, + "learning_rate": 4.076208676265138e-06, + "loss": 0.8815, + "step": 127620 + }, + { + "epoch": 0.9238709490615069, + "grad_norm": 0.1684718132019043, + "learning_rate": 4.076136289604552e-06, + "loss": 0.8714, + "step": 127630 + }, + { + "epoch": 0.9239433357220931, + "grad_norm": 0.16165629029273987, + "learning_rate": 4.076063902943966e-06, + "loss": 0.8705, + "step": 127640 + }, + { + "epoch": 0.9240157223826794, + "grad_norm": 0.15720994770526886, + "learning_rate": 4.07599151628338e-06, + "loss": 0.8629, + "step": 127650 + }, + { + "epoch": 0.9240881090432655, + "grad_norm": 0.16634640097618103, + "learning_rate": 4.075919129622793e-06, + "loss": 0.883, + "step": 127660 + }, + { + "epoch": 0.9241604957038517, + "grad_norm": 0.1512623280286789, + "learning_rate": 4.075846742962207e-06, + "loss": 0.8628, + "step": 127670 + }, + { + "epoch": 0.9242328823644379, + "grad_norm": 0.14942419528961182, + "learning_rate": 4.075774356301621e-06, + "loss": 0.8543, + "step": 127680 + }, + { + "epoch": 0.9243052690250241, + "grad_norm": 0.15057924389839172, + "learning_rate": 4.075701969641035e-06, + "loss": 0.8695, + "step": 127690 + }, + { + "epoch": 0.9243776556856103, + "grad_norm": 0.15089672803878784, + "learning_rate": 4.075629582980449e-06, + "loss": 0.8924, + "step": 127700 + }, + { + "epoch": 0.9244500423461964, + "grad_norm": 0.15450294315814972, + "learning_rate": 4.075557196319862e-06, + "loss": 0.8642, + "step": 127710 + }, + { + "epoch": 0.9245224290067826, + "grad_norm": 0.14952602982521057, + "learning_rate": 4.075484809659277e-06, + "loss": 0.8694, + "step": 127720 + }, + { + "epoch": 0.9245948156673688, + "grad_norm": 0.16396868228912354, + "learning_rate": 4.07541242299869e-06, + "loss": 0.8905, + "step": 127730 + }, + { + "epoch": 0.924667202327955, + "grad_norm": 0.1575574278831482, + "learning_rate": 4.075340036338104e-06, + "loss": 0.8772, + "step": 127740 + }, + { + "epoch": 0.9247395889885411, + "grad_norm": 0.16081462800502777, + "learning_rate": 4.0752676496775176e-06, + "loss": 0.8768, + "step": 127750 + }, + { + "epoch": 0.9248119756491274, + "grad_norm": 0.14505434036254883, + "learning_rate": 4.075195263016932e-06, + "loss": 0.8524, + "step": 127760 + }, + { + "epoch": 0.9248843623097136, + "grad_norm": 0.15242122113704681, + "learning_rate": 4.075122876356346e-06, + "loss": 0.8701, + "step": 127770 + }, + { + "epoch": 0.9249567489702998, + "grad_norm": 0.14712516963481903, + "learning_rate": 4.075050489695759e-06, + "loss": 0.8619, + "step": 127780 + }, + { + "epoch": 0.925029135630886, + "grad_norm": 0.15457618236541748, + "learning_rate": 4.074978103035173e-06, + "loss": 0.8745, + "step": 127790 + }, + { + "epoch": 0.9251015222914721, + "grad_norm": 0.15267246961593628, + "learning_rate": 4.074905716374587e-06, + "loss": 0.8584, + "step": 127800 + }, + { + "epoch": 0.9251739089520583, + "grad_norm": 0.15300290286540985, + "learning_rate": 4.074833329714001e-06, + "loss": 0.8626, + "step": 127810 + }, + { + "epoch": 0.9252462956126445, + "grad_norm": 0.14224673807621002, + "learning_rate": 4.0747609430534146e-06, + "loss": 0.8753, + "step": 127820 + }, + { + "epoch": 0.9253186822732307, + "grad_norm": 0.15373513102531433, + "learning_rate": 4.074688556392828e-06, + "loss": 0.8811, + "step": 127830 + }, + { + "epoch": 0.9253910689338168, + "grad_norm": 0.15759536623954773, + "learning_rate": 4.074616169732243e-06, + "loss": 0.8691, + "step": 127840 + }, + { + "epoch": 0.925463455594403, + "grad_norm": 0.16300243139266968, + "learning_rate": 4.074543783071656e-06, + "loss": 0.8709, + "step": 127850 + }, + { + "epoch": 0.9255358422549893, + "grad_norm": 0.16847741603851318, + "learning_rate": 4.07447139641107e-06, + "loss": 0.8689, + "step": 127860 + }, + { + "epoch": 0.9256082289155755, + "grad_norm": 0.16154174506664276, + "learning_rate": 4.0743990097504835e-06, + "loss": 0.8533, + "step": 127870 + }, + { + "epoch": 0.9256806155761617, + "grad_norm": 0.15363070368766785, + "learning_rate": 4.074326623089898e-06, + "loss": 0.8616, + "step": 127880 + }, + { + "epoch": 0.9257530022367478, + "grad_norm": 0.14643029868602753, + "learning_rate": 4.0742542364293116e-06, + "loss": 0.8536, + "step": 127890 + }, + { + "epoch": 0.925825388897334, + "grad_norm": 0.14773331582546234, + "learning_rate": 4.074181849768725e-06, + "loss": 0.8606, + "step": 127900 + }, + { + "epoch": 0.9258977755579202, + "grad_norm": 0.1448742300271988, + "learning_rate": 4.074109463108139e-06, + "loss": 0.8676, + "step": 127910 + }, + { + "epoch": 0.9259701622185064, + "grad_norm": 0.18105871975421906, + "learning_rate": 4.074037076447552e-06, + "loss": 0.8664, + "step": 127920 + }, + { + "epoch": 0.9260425488790925, + "grad_norm": 0.1581772118806839, + "learning_rate": 4.073964689786966e-06, + "loss": 0.8803, + "step": 127930 + }, + { + "epoch": 0.9261149355396787, + "grad_norm": 0.1465241014957428, + "learning_rate": 4.07389230312638e-06, + "loss": 0.8762, + "step": 127940 + }, + { + "epoch": 0.9261873222002649, + "grad_norm": 0.16206571459770203, + "learning_rate": 4.073819916465794e-06, + "loss": 0.8781, + "step": 127950 + }, + { + "epoch": 0.9262597088608511, + "grad_norm": 0.1485998034477234, + "learning_rate": 4.073747529805208e-06, + "loss": 0.8712, + "step": 127960 + }, + { + "epoch": 0.9263320955214374, + "grad_norm": 0.15023688971996307, + "learning_rate": 4.073675143144621e-06, + "loss": 0.8626, + "step": 127970 + }, + { + "epoch": 0.9264044821820235, + "grad_norm": 0.14640280604362488, + "learning_rate": 4.073602756484035e-06, + "loss": 0.8683, + "step": 127980 + }, + { + "epoch": 0.9264768688426097, + "grad_norm": 0.15513427555561066, + "learning_rate": 4.073530369823449e-06, + "loss": 0.8722, + "step": 127990 + }, + { + "epoch": 0.9265492555031959, + "grad_norm": 0.14883030951023102, + "learning_rate": 4.073457983162863e-06, + "loss": 0.8616, + "step": 128000 + }, + { + "epoch": 0.9266216421637821, + "grad_norm": 0.17207932472229004, + "learning_rate": 4.073385596502277e-06, + "loss": 0.8649, + "step": 128010 + }, + { + "epoch": 0.9266940288243682, + "grad_norm": 0.14062434434890747, + "learning_rate": 4.07331320984169e-06, + "loss": 0.8589, + "step": 128020 + }, + { + "epoch": 0.9267664154849544, + "grad_norm": 0.14233818650245667, + "learning_rate": 4.073240823181105e-06, + "loss": 0.8628, + "step": 128030 + }, + { + "epoch": 0.9268388021455406, + "grad_norm": 0.15095971524715424, + "learning_rate": 4.073168436520518e-06, + "loss": 0.8647, + "step": 128040 + }, + { + "epoch": 0.9269111888061268, + "grad_norm": 0.164928138256073, + "learning_rate": 4.073096049859932e-06, + "loss": 0.8679, + "step": 128050 + }, + { + "epoch": 0.926983575466713, + "grad_norm": 0.15687911212444305, + "learning_rate": 4.073023663199346e-06, + "loss": 0.8691, + "step": 128060 + }, + { + "epoch": 0.9270559621272991, + "grad_norm": 0.16557860374450684, + "learning_rate": 4.07295127653876e-06, + "loss": 0.8834, + "step": 128070 + }, + { + "epoch": 0.9271283487878854, + "grad_norm": 0.15757247805595398, + "learning_rate": 4.072878889878174e-06, + "loss": 0.8759, + "step": 128080 + }, + { + "epoch": 0.9272007354484716, + "grad_norm": 0.15857826173305511, + "learning_rate": 4.072806503217587e-06, + "loss": 0.8754, + "step": 128090 + }, + { + "epoch": 0.9272731221090578, + "grad_norm": 0.15188080072402954, + "learning_rate": 4.072734116557001e-06, + "loss": 0.8669, + "step": 128100 + }, + { + "epoch": 0.927345508769644, + "grad_norm": 0.15522122383117676, + "learning_rate": 4.072661729896415e-06, + "loss": 0.8643, + "step": 128110 + }, + { + "epoch": 0.9274178954302301, + "grad_norm": 0.14088667929172516, + "learning_rate": 4.072589343235829e-06, + "loss": 0.8784, + "step": 128120 + }, + { + "epoch": 0.9274902820908163, + "grad_norm": 0.1532406508922577, + "learning_rate": 4.072516956575243e-06, + "loss": 0.8637, + "step": 128130 + }, + { + "epoch": 0.9275626687514025, + "grad_norm": 0.14696896076202393, + "learning_rate": 4.072444569914656e-06, + "loss": 0.8601, + "step": 128140 + }, + { + "epoch": 0.9276350554119887, + "grad_norm": 0.1489928513765335, + "learning_rate": 4.07237218325407e-06, + "loss": 0.8698, + "step": 128150 + }, + { + "epoch": 0.9277074420725748, + "grad_norm": 0.1538485586643219, + "learning_rate": 4.072299796593484e-06, + "loss": 0.8817, + "step": 128160 + }, + { + "epoch": 0.927779828733161, + "grad_norm": 0.14813606441020966, + "learning_rate": 4.072227409932898e-06, + "loss": 0.8636, + "step": 128170 + }, + { + "epoch": 0.9278522153937473, + "grad_norm": 0.1586228609085083, + "learning_rate": 4.0721550232723115e-06, + "loss": 0.868, + "step": 128180 + }, + { + "epoch": 0.9279246020543335, + "grad_norm": 0.21530722081661224, + "learning_rate": 4.072082636611725e-06, + "loss": 0.8735, + "step": 128190 + }, + { + "epoch": 0.9279969887149196, + "grad_norm": 0.18316511809825897, + "learning_rate": 4.07201024995114e-06, + "loss": 0.8693, + "step": 128200 + }, + { + "epoch": 0.9280693753755058, + "grad_norm": 0.15171143412590027, + "learning_rate": 4.071937863290553e-06, + "loss": 0.8617, + "step": 128210 + }, + { + "epoch": 0.928141762036092, + "grad_norm": 0.15644243359565735, + "learning_rate": 4.071865476629967e-06, + "loss": 0.8728, + "step": 128220 + }, + { + "epoch": 0.9282141486966782, + "grad_norm": 0.16042940318584442, + "learning_rate": 4.0717930899693804e-06, + "loss": 0.8778, + "step": 128230 + }, + { + "epoch": 0.9282865353572644, + "grad_norm": 0.15380287170410156, + "learning_rate": 4.071720703308795e-06, + "loss": 0.8594, + "step": 128240 + }, + { + "epoch": 0.9283589220178505, + "grad_norm": 0.15428663790225983, + "learning_rate": 4.0716483166482085e-06, + "loss": 0.8817, + "step": 128250 + }, + { + "epoch": 0.9284313086784367, + "grad_norm": 0.14984562993049622, + "learning_rate": 4.071575929987622e-06, + "loss": 0.8781, + "step": 128260 + }, + { + "epoch": 0.9285036953390229, + "grad_norm": 0.16001556813716888, + "learning_rate": 4.071503543327036e-06, + "loss": 0.8666, + "step": 128270 + }, + { + "epoch": 0.9285760819996091, + "grad_norm": 0.1445271223783493, + "learning_rate": 4.07143115666645e-06, + "loss": 0.8772, + "step": 128280 + }, + { + "epoch": 0.9286484686601953, + "grad_norm": 0.1755678355693817, + "learning_rate": 4.071358770005864e-06, + "loss": 0.8805, + "step": 128290 + }, + { + "epoch": 0.9287208553207815, + "grad_norm": 0.16216261684894562, + "learning_rate": 4.0712863833452775e-06, + "loss": 0.8694, + "step": 128300 + }, + { + "epoch": 0.9287932419813677, + "grad_norm": 0.15545831620693207, + "learning_rate": 4.071213996684691e-06, + "loss": 0.8681, + "step": 128310 + }, + { + "epoch": 0.9288656286419539, + "grad_norm": 0.15312260389328003, + "learning_rate": 4.0711416100241055e-06, + "loss": 0.8721, + "step": 128320 + }, + { + "epoch": 0.92893801530254, + "grad_norm": 0.1504194438457489, + "learning_rate": 4.071069223363519e-06, + "loss": 0.8624, + "step": 128330 + }, + { + "epoch": 0.9290104019631262, + "grad_norm": 0.15465429425239563, + "learning_rate": 4.070996836702933e-06, + "loss": 0.8799, + "step": 128340 + }, + { + "epoch": 0.9290827886237124, + "grad_norm": 0.14774566888809204, + "learning_rate": 4.070924450042346e-06, + "loss": 0.8717, + "step": 128350 + }, + { + "epoch": 0.9291551752842986, + "grad_norm": 0.16979655623435974, + "learning_rate": 4.070852063381761e-06, + "loss": 0.8552, + "step": 128360 + }, + { + "epoch": 0.9292275619448848, + "grad_norm": 0.15105919539928436, + "learning_rate": 4.0707796767211745e-06, + "loss": 0.8624, + "step": 128370 + }, + { + "epoch": 0.9292999486054709, + "grad_norm": 0.155971959233284, + "learning_rate": 4.070707290060588e-06, + "loss": 0.8592, + "step": 128380 + }, + { + "epoch": 0.9293723352660572, + "grad_norm": 0.162667378783226, + "learning_rate": 4.070634903400002e-06, + "loss": 0.8771, + "step": 128390 + }, + { + "epoch": 0.9294447219266434, + "grad_norm": 0.1585136353969574, + "learning_rate": 4.070562516739416e-06, + "loss": 0.8714, + "step": 128400 + }, + { + "epoch": 0.9295171085872296, + "grad_norm": 0.14089208841323853, + "learning_rate": 4.07049013007883e-06, + "loss": 0.8691, + "step": 128410 + }, + { + "epoch": 0.9295894952478158, + "grad_norm": 0.14816270768642426, + "learning_rate": 4.070417743418243e-06, + "loss": 0.8754, + "step": 128420 + }, + { + "epoch": 0.9296618819084019, + "grad_norm": 0.14089035987854004, + "learning_rate": 4.070345356757657e-06, + "loss": 0.869, + "step": 128430 + }, + { + "epoch": 0.9297342685689881, + "grad_norm": 0.15903660655021667, + "learning_rate": 4.0702729700970715e-06, + "loss": 0.8669, + "step": 128440 + }, + { + "epoch": 0.9298066552295743, + "grad_norm": 0.15594375133514404, + "learning_rate": 4.070200583436484e-06, + "loss": 0.8594, + "step": 128450 + }, + { + "epoch": 0.9298790418901605, + "grad_norm": 0.15002770721912384, + "learning_rate": 4.070128196775898e-06, + "loss": 0.858, + "step": 128460 + }, + { + "epoch": 0.9299514285507466, + "grad_norm": 0.1600421667098999, + "learning_rate": 4.070055810115312e-06, + "loss": 0.8613, + "step": 128470 + }, + { + "epoch": 0.9300238152113328, + "grad_norm": 0.16615080833435059, + "learning_rate": 4.069983423454726e-06, + "loss": 0.8763, + "step": 128480 + }, + { + "epoch": 0.930096201871919, + "grad_norm": 0.15148785710334778, + "learning_rate": 4.0699110367941396e-06, + "loss": 0.8695, + "step": 128490 + }, + { + "epoch": 0.9301685885325053, + "grad_norm": 0.14840847253799438, + "learning_rate": 4.069838650133553e-06, + "loss": 0.8739, + "step": 128500 + }, + { + "epoch": 0.9302409751930915, + "grad_norm": 0.13647682964801788, + "learning_rate": 4.069766263472968e-06, + "loss": 0.8672, + "step": 128510 + }, + { + "epoch": 0.9303133618536776, + "grad_norm": 0.1561964750289917, + "learning_rate": 4.069693876812381e-06, + "loss": 0.8637, + "step": 128520 + }, + { + "epoch": 0.9303857485142638, + "grad_norm": 0.22199493646621704, + "learning_rate": 4.069621490151795e-06, + "loss": 0.8707, + "step": 128530 + }, + { + "epoch": 0.93045813517485, + "grad_norm": 0.1517588496208191, + "learning_rate": 4.0695491034912085e-06, + "loss": 0.8663, + "step": 128540 + }, + { + "epoch": 0.9305305218354362, + "grad_norm": 0.15259967744350433, + "learning_rate": 4.069476716830623e-06, + "loss": 0.8684, + "step": 128550 + }, + { + "epoch": 0.9306029084960223, + "grad_norm": 0.15910792350769043, + "learning_rate": 4.0694043301700366e-06, + "loss": 0.8676, + "step": 128560 + }, + { + "epoch": 0.9306752951566085, + "grad_norm": 0.16429047286510468, + "learning_rate": 4.06933194350945e-06, + "loss": 0.8783, + "step": 128570 + }, + { + "epoch": 0.9307476818171947, + "grad_norm": 0.1512821912765503, + "learning_rate": 4.069259556848864e-06, + "loss": 0.8691, + "step": 128580 + }, + { + "epoch": 0.9308200684777809, + "grad_norm": 0.154913067817688, + "learning_rate": 4.069187170188278e-06, + "loss": 0.8761, + "step": 128590 + }, + { + "epoch": 0.930892455138367, + "grad_norm": 0.14742477238178253, + "learning_rate": 4.069114783527692e-06, + "loss": 0.8685, + "step": 128600 + }, + { + "epoch": 0.9309648417989533, + "grad_norm": 0.15343081951141357, + "learning_rate": 4.0690423968671055e-06, + "loss": 0.8675, + "step": 128610 + }, + { + "epoch": 0.9310372284595395, + "grad_norm": 0.18212413787841797, + "learning_rate": 4.068970010206519e-06, + "loss": 0.8671, + "step": 128620 + }, + { + "epoch": 0.9311096151201257, + "grad_norm": 0.14590677618980408, + "learning_rate": 4.0688976235459336e-06, + "loss": 0.8758, + "step": 128630 + }, + { + "epoch": 0.9311820017807119, + "grad_norm": 0.15685223042964935, + "learning_rate": 4.068825236885347e-06, + "loss": 0.8634, + "step": 128640 + }, + { + "epoch": 0.931254388441298, + "grad_norm": 0.1610851287841797, + "learning_rate": 4.068752850224761e-06, + "loss": 0.8678, + "step": 128650 + }, + { + "epoch": 0.9313267751018842, + "grad_norm": 0.1528211534023285, + "learning_rate": 4.068680463564174e-06, + "loss": 0.8797, + "step": 128660 + }, + { + "epoch": 0.9313991617624704, + "grad_norm": 0.1573326289653778, + "learning_rate": 4.068608076903589e-06, + "loss": 0.8785, + "step": 128670 + }, + { + "epoch": 0.9314715484230566, + "grad_norm": 0.15934540331363678, + "learning_rate": 4.0685356902430025e-06, + "loss": 0.8626, + "step": 128680 + }, + { + "epoch": 0.9315439350836427, + "grad_norm": 0.14272333681583405, + "learning_rate": 4.068463303582416e-06, + "loss": 0.8685, + "step": 128690 + }, + { + "epoch": 0.9316163217442289, + "grad_norm": 0.1473149210214615, + "learning_rate": 4.06839091692183e-06, + "loss": 0.8655, + "step": 128700 + }, + { + "epoch": 0.9316887084048152, + "grad_norm": 0.1499226689338684, + "learning_rate": 4.068318530261244e-06, + "loss": 0.8633, + "step": 128710 + }, + { + "epoch": 0.9317610950654014, + "grad_norm": 0.14663182199001312, + "learning_rate": 4.068246143600658e-06, + "loss": 0.852, + "step": 128720 + }, + { + "epoch": 0.9318334817259876, + "grad_norm": 0.15294471383094788, + "learning_rate": 4.068173756940071e-06, + "loss": 0.8769, + "step": 128730 + }, + { + "epoch": 0.9319058683865737, + "grad_norm": 0.16538985073566437, + "learning_rate": 4.068101370279485e-06, + "loss": 0.8754, + "step": 128740 + }, + { + "epoch": 0.9319782550471599, + "grad_norm": 0.14424091577529907, + "learning_rate": 4.0680289836188995e-06, + "loss": 0.8745, + "step": 128750 + }, + { + "epoch": 0.9320506417077461, + "grad_norm": 0.18575291335582733, + "learning_rate": 4.067956596958313e-06, + "loss": 0.8542, + "step": 128760 + }, + { + "epoch": 0.9321230283683323, + "grad_norm": 0.156608447432518, + "learning_rate": 4.067884210297727e-06, + "loss": 0.8703, + "step": 128770 + }, + { + "epoch": 0.9321954150289185, + "grad_norm": 0.16681206226348877, + "learning_rate": 4.06781182363714e-06, + "loss": 0.8691, + "step": 128780 + }, + { + "epoch": 0.9322678016895046, + "grad_norm": 0.18856379389762878, + "learning_rate": 4.067739436976554e-06, + "loss": 0.8727, + "step": 128790 + }, + { + "epoch": 0.9323401883500908, + "grad_norm": 0.15221746265888214, + "learning_rate": 4.0676670503159684e-06, + "loss": 0.8771, + "step": 128800 + }, + { + "epoch": 0.932412575010677, + "grad_norm": 0.15797759592533112, + "learning_rate": 4.067594663655382e-06, + "loss": 0.8682, + "step": 128810 + }, + { + "epoch": 0.9324849616712633, + "grad_norm": 0.1687854826450348, + "learning_rate": 4.067522276994796e-06, + "loss": 0.8743, + "step": 128820 + }, + { + "epoch": 0.9325573483318494, + "grad_norm": 0.15136481821537018, + "learning_rate": 4.067449890334209e-06, + "loss": 0.8712, + "step": 128830 + }, + { + "epoch": 0.9326297349924356, + "grad_norm": 0.1667127013206482, + "learning_rate": 4.067377503673624e-06, + "loss": 0.862, + "step": 128840 + }, + { + "epoch": 0.9327021216530218, + "grad_norm": 0.1545359492301941, + "learning_rate": 4.067305117013037e-06, + "loss": 0.8564, + "step": 128850 + }, + { + "epoch": 0.932774508313608, + "grad_norm": 0.18718428909778595, + "learning_rate": 4.067232730352451e-06, + "loss": 0.8584, + "step": 128860 + }, + { + "epoch": 0.9328468949741942, + "grad_norm": 0.14906947314739227, + "learning_rate": 4.067160343691865e-06, + "loss": 0.8737, + "step": 128870 + }, + { + "epoch": 0.9329192816347803, + "grad_norm": 0.1455237865447998, + "learning_rate": 4.067087957031279e-06, + "loss": 0.872, + "step": 128880 + }, + { + "epoch": 0.9329916682953665, + "grad_norm": 0.1723933070898056, + "learning_rate": 4.067015570370693e-06, + "loss": 0.8785, + "step": 128890 + }, + { + "epoch": 0.9330640549559527, + "grad_norm": 0.1570318043231964, + "learning_rate": 4.066943183710106e-06, + "loss": 0.8693, + "step": 128900 + }, + { + "epoch": 0.9331364416165389, + "grad_norm": 0.16334332525730133, + "learning_rate": 4.06687079704952e-06, + "loss": 0.8744, + "step": 128910 + }, + { + "epoch": 0.9332088282771251, + "grad_norm": 0.15056326985359192, + "learning_rate": 4.066798410388934e-06, + "loss": 0.8666, + "step": 128920 + }, + { + "epoch": 0.9332812149377113, + "grad_norm": 0.15324917435646057, + "learning_rate": 4.066726023728348e-06, + "loss": 0.8814, + "step": 128930 + }, + { + "epoch": 0.9333536015982975, + "grad_norm": 0.1748197376728058, + "learning_rate": 4.066653637067762e-06, + "loss": 0.869, + "step": 128940 + }, + { + "epoch": 0.9334259882588837, + "grad_norm": 0.1483113169670105, + "learning_rate": 4.066581250407175e-06, + "loss": 0.8726, + "step": 128950 + }, + { + "epoch": 0.9334983749194699, + "grad_norm": 0.14639945328235626, + "learning_rate": 4.06650886374659e-06, + "loss": 0.8641, + "step": 128960 + }, + { + "epoch": 0.933570761580056, + "grad_norm": 0.1470886915922165, + "learning_rate": 4.066436477086003e-06, + "loss": 0.8671, + "step": 128970 + }, + { + "epoch": 0.9336431482406422, + "grad_norm": 0.15743905305862427, + "learning_rate": 4.066364090425416e-06, + "loss": 0.8751, + "step": 128980 + }, + { + "epoch": 0.9337155349012284, + "grad_norm": 0.14820967614650726, + "learning_rate": 4.0662917037648305e-06, + "loss": 0.8643, + "step": 128990 + }, + { + "epoch": 0.9337879215618146, + "grad_norm": 0.15819282829761505, + "learning_rate": 4.066219317104244e-06, + "loss": 0.8715, + "step": 129000 + }, + { + "epoch": 0.9338603082224007, + "grad_norm": 0.16119031608104706, + "learning_rate": 4.066146930443658e-06, + "loss": 0.885, + "step": 129010 + }, + { + "epoch": 0.9339326948829869, + "grad_norm": 0.1570100635290146, + "learning_rate": 4.066074543783071e-06, + "loss": 0.863, + "step": 129020 + }, + { + "epoch": 0.9340050815435732, + "grad_norm": 0.15377597510814667, + "learning_rate": 4.066002157122486e-06, + "loss": 0.8705, + "step": 129030 + }, + { + "epoch": 0.9340774682041594, + "grad_norm": 0.1488153040409088, + "learning_rate": 4.0659297704618995e-06, + "loss": 0.8709, + "step": 129040 + }, + { + "epoch": 0.9341498548647456, + "grad_norm": 0.1504792720079422, + "learning_rate": 4.065857383801313e-06, + "loss": 0.8645, + "step": 129050 + }, + { + "epoch": 0.9342222415253317, + "grad_norm": 0.15076880156993866, + "learning_rate": 4.065784997140727e-06, + "loss": 0.8786, + "step": 129060 + }, + { + "epoch": 0.9342946281859179, + "grad_norm": 0.15644735097885132, + "learning_rate": 4.065712610480141e-06, + "loss": 0.8497, + "step": 129070 + }, + { + "epoch": 0.9343670148465041, + "grad_norm": 0.15488304197788239, + "learning_rate": 4.065640223819555e-06, + "loss": 0.8716, + "step": 129080 + }, + { + "epoch": 0.9344394015070903, + "grad_norm": 0.15287454426288605, + "learning_rate": 4.065567837158968e-06, + "loss": 0.8699, + "step": 129090 + }, + { + "epoch": 0.9345117881676764, + "grad_norm": 0.15674547851085663, + "learning_rate": 4.065495450498382e-06, + "loss": 0.8544, + "step": 129100 + }, + { + "epoch": 0.9345841748282626, + "grad_norm": 0.14883847534656525, + "learning_rate": 4.0654230638377965e-06, + "loss": 0.8748, + "step": 129110 + }, + { + "epoch": 0.9346565614888488, + "grad_norm": 0.1638948619365692, + "learning_rate": 4.06535067717721e-06, + "loss": 0.8734, + "step": 129120 + }, + { + "epoch": 0.934728948149435, + "grad_norm": 0.1496628224849701, + "learning_rate": 4.065278290516624e-06, + "loss": 0.8622, + "step": 129130 + }, + { + "epoch": 0.9348013348100213, + "grad_norm": 0.15044906735420227, + "learning_rate": 4.065205903856037e-06, + "loss": 0.8845, + "step": 129140 + }, + { + "epoch": 0.9348737214706074, + "grad_norm": 0.1459684520959854, + "learning_rate": 4.065133517195452e-06, + "loss": 0.8753, + "step": 129150 + }, + { + "epoch": 0.9349461081311936, + "grad_norm": 0.14683480560779572, + "learning_rate": 4.065061130534865e-06, + "loss": 0.8597, + "step": 129160 + }, + { + "epoch": 0.9350184947917798, + "grad_norm": 0.17215889692306519, + "learning_rate": 4.064988743874279e-06, + "loss": 0.8613, + "step": 129170 + }, + { + "epoch": 0.935090881452366, + "grad_norm": 0.15645846724510193, + "learning_rate": 4.064916357213693e-06, + "loss": 0.8649, + "step": 129180 + }, + { + "epoch": 0.9351632681129521, + "grad_norm": 0.15681461989879608, + "learning_rate": 4.064843970553107e-06, + "loss": 0.8675, + "step": 129190 + }, + { + "epoch": 0.9352356547735383, + "grad_norm": 0.15553514659404755, + "learning_rate": 4.064771583892521e-06, + "loss": 0.865, + "step": 129200 + }, + { + "epoch": 0.9353080414341245, + "grad_norm": 0.1579878181219101, + "learning_rate": 4.064699197231934e-06, + "loss": 0.8564, + "step": 129210 + }, + { + "epoch": 0.9353804280947107, + "grad_norm": 0.1529635339975357, + "learning_rate": 4.064626810571348e-06, + "loss": 0.8701, + "step": 129220 + }, + { + "epoch": 0.9354528147552968, + "grad_norm": 0.17497135698795319, + "learning_rate": 4.064554423910762e-06, + "loss": 0.858, + "step": 129230 + }, + { + "epoch": 0.9355252014158831, + "grad_norm": 0.1518746018409729, + "learning_rate": 4.064482037250176e-06, + "loss": 0.8595, + "step": 129240 + }, + { + "epoch": 0.9355975880764693, + "grad_norm": 0.15486803650856018, + "learning_rate": 4.06440965058959e-06, + "loss": 0.8762, + "step": 129250 + }, + { + "epoch": 0.9356699747370555, + "grad_norm": 0.16987523436546326, + "learning_rate": 4.064337263929003e-06, + "loss": 0.8698, + "step": 129260 + }, + { + "epoch": 0.9357423613976417, + "grad_norm": 0.15838555991649628, + "learning_rate": 4.064264877268418e-06, + "loss": 0.8614, + "step": 129270 + }, + { + "epoch": 0.9358147480582278, + "grad_norm": 0.15161006152629852, + "learning_rate": 4.064192490607831e-06, + "loss": 0.8582, + "step": 129280 + }, + { + "epoch": 0.935887134718814, + "grad_norm": 0.16381420195102692, + "learning_rate": 4.064120103947245e-06, + "loss": 0.8612, + "step": 129290 + }, + { + "epoch": 0.9359595213794002, + "grad_norm": 0.15253403782844543, + "learning_rate": 4.0640477172866586e-06, + "loss": 0.8672, + "step": 129300 + }, + { + "epoch": 0.9360319080399864, + "grad_norm": 0.14792008697986603, + "learning_rate": 4.063975330626073e-06, + "loss": 0.8663, + "step": 129310 + }, + { + "epoch": 0.9361042947005725, + "grad_norm": 0.15635430812835693, + "learning_rate": 4.063902943965487e-06, + "loss": 0.8706, + "step": 129320 + }, + { + "epoch": 0.9361766813611587, + "grad_norm": 0.16064366698265076, + "learning_rate": 4.0638305573049e-06, + "loss": 0.8765, + "step": 129330 + }, + { + "epoch": 0.9362490680217449, + "grad_norm": 0.156932070851326, + "learning_rate": 4.063758170644314e-06, + "loss": 0.881, + "step": 129340 + }, + { + "epoch": 0.9363214546823312, + "grad_norm": 0.1553388088941574, + "learning_rate": 4.063685783983728e-06, + "loss": 0.8881, + "step": 129350 + }, + { + "epoch": 0.9363938413429174, + "grad_norm": 0.13980628550052643, + "learning_rate": 4.063613397323142e-06, + "loss": 0.8828, + "step": 129360 + }, + { + "epoch": 0.9364662280035035, + "grad_norm": 0.14694495499134064, + "learning_rate": 4.0635410106625556e-06, + "loss": 0.8716, + "step": 129370 + }, + { + "epoch": 0.9365386146640897, + "grad_norm": 0.15779894590377808, + "learning_rate": 4.063468624001969e-06, + "loss": 0.8684, + "step": 129380 + }, + { + "epoch": 0.9366110013246759, + "grad_norm": 0.15921930968761444, + "learning_rate": 4.063396237341384e-06, + "loss": 0.868, + "step": 129390 + }, + { + "epoch": 0.9366833879852621, + "grad_norm": 0.15195946395397186, + "learning_rate": 4.063323850680797e-06, + "loss": 0.8714, + "step": 129400 + }, + { + "epoch": 0.9367557746458482, + "grad_norm": 0.1530548632144928, + "learning_rate": 4.063251464020211e-06, + "loss": 0.8691, + "step": 129410 + }, + { + "epoch": 0.9368281613064344, + "grad_norm": 0.15170645713806152, + "learning_rate": 4.0631790773596245e-06, + "loss": 0.8762, + "step": 129420 + }, + { + "epoch": 0.9369005479670206, + "grad_norm": 0.15962299704551697, + "learning_rate": 4.063106690699038e-06, + "loss": 0.871, + "step": 129430 + }, + { + "epoch": 0.9369729346276068, + "grad_norm": 0.16227169334888458, + "learning_rate": 4.0630343040384526e-06, + "loss": 0.8512, + "step": 129440 + }, + { + "epoch": 0.9370453212881931, + "grad_norm": 0.1503174751996994, + "learning_rate": 4.062961917377866e-06, + "loss": 0.8683, + "step": 129450 + }, + { + "epoch": 0.9371177079487792, + "grad_norm": 0.1538417786359787, + "learning_rate": 4.06288953071728e-06, + "loss": 0.8637, + "step": 129460 + }, + { + "epoch": 0.9371900946093654, + "grad_norm": 0.14984267950057983, + "learning_rate": 4.062817144056693e-06, + "loss": 0.8632, + "step": 129470 + }, + { + "epoch": 0.9372624812699516, + "grad_norm": 0.1558646708726883, + "learning_rate": 4.062744757396108e-06, + "loss": 0.8677, + "step": 129480 + }, + { + "epoch": 0.9373348679305378, + "grad_norm": 0.16419059038162231, + "learning_rate": 4.0626723707355215e-06, + "loss": 0.8634, + "step": 129490 + }, + { + "epoch": 0.937407254591124, + "grad_norm": 0.15692633390426636, + "learning_rate": 4.062599984074935e-06, + "loss": 0.8717, + "step": 129500 + }, + { + "epoch": 0.9374796412517101, + "grad_norm": 0.15984247624874115, + "learning_rate": 4.062527597414349e-06, + "loss": 0.8783, + "step": 129510 + }, + { + "epoch": 0.9375520279122963, + "grad_norm": 0.15747103095054626, + "learning_rate": 4.062455210753762e-06, + "loss": 0.8726, + "step": 129520 + }, + { + "epoch": 0.9376244145728825, + "grad_norm": 0.15458211302757263, + "learning_rate": 4.062382824093176e-06, + "loss": 0.8605, + "step": 129530 + }, + { + "epoch": 0.9376968012334687, + "grad_norm": 0.14359694719314575, + "learning_rate": 4.0623104374325904e-06, + "loss": 0.8852, + "step": 129540 + }, + { + "epoch": 0.9377691878940548, + "grad_norm": 0.16120558977127075, + "learning_rate": 4.062238050772004e-06, + "loss": 0.8745, + "step": 129550 + }, + { + "epoch": 0.9378415745546411, + "grad_norm": 0.15251369774341583, + "learning_rate": 4.062165664111418e-06, + "loss": 0.8723, + "step": 129560 + }, + { + "epoch": 0.9379139612152273, + "grad_norm": 0.15283681452274323, + "learning_rate": 4.062093277450831e-06, + "loss": 0.8648, + "step": 129570 + }, + { + "epoch": 0.9379863478758135, + "grad_norm": 0.15063002705574036, + "learning_rate": 4.062020890790245e-06, + "loss": 0.8652, + "step": 129580 + }, + { + "epoch": 0.9380587345363997, + "grad_norm": 0.1525861620903015, + "learning_rate": 4.061948504129659e-06, + "loss": 0.8836, + "step": 129590 + }, + { + "epoch": 0.9381311211969858, + "grad_norm": 0.1666775345802307, + "learning_rate": 4.061876117469073e-06, + "loss": 0.8762, + "step": 129600 + }, + { + "epoch": 0.938203507857572, + "grad_norm": 0.15316708385944366, + "learning_rate": 4.061803730808487e-06, + "loss": 0.8482, + "step": 129610 + }, + { + "epoch": 0.9382758945181582, + "grad_norm": 0.15615612268447876, + "learning_rate": 4.0617313441479e-06, + "loss": 0.8639, + "step": 129620 + }, + { + "epoch": 0.9383482811787444, + "grad_norm": 0.13969020545482635, + "learning_rate": 4.061658957487315e-06, + "loss": 0.8829, + "step": 129630 + }, + { + "epoch": 0.9384206678393305, + "grad_norm": 0.1508314609527588, + "learning_rate": 4.061586570826728e-06, + "loss": 0.8611, + "step": 129640 + }, + { + "epoch": 0.9384930544999167, + "grad_norm": 0.16280554234981537, + "learning_rate": 4.061514184166142e-06, + "loss": 0.883, + "step": 129650 + }, + { + "epoch": 0.9385654411605029, + "grad_norm": 0.15639394521713257, + "learning_rate": 4.0614417975055555e-06, + "loss": 0.8711, + "step": 129660 + }, + { + "epoch": 0.9386378278210892, + "grad_norm": 0.1565892994403839, + "learning_rate": 4.06136941084497e-06, + "loss": 0.8667, + "step": 129670 + }, + { + "epoch": 0.9387102144816754, + "grad_norm": 0.16035987436771393, + "learning_rate": 4.061297024184384e-06, + "loss": 0.8624, + "step": 129680 + }, + { + "epoch": 0.9387826011422615, + "grad_norm": 0.15157711505889893, + "learning_rate": 4.061224637523797e-06, + "loss": 0.8751, + "step": 129690 + }, + { + "epoch": 0.9388549878028477, + "grad_norm": 0.15233691036701202, + "learning_rate": 4.061152250863211e-06, + "loss": 0.8743, + "step": 129700 + }, + { + "epoch": 0.9389273744634339, + "grad_norm": 0.15194357931613922, + "learning_rate": 4.061079864202625e-06, + "loss": 0.875, + "step": 129710 + }, + { + "epoch": 0.9389997611240201, + "grad_norm": 0.1659833937883377, + "learning_rate": 4.061007477542039e-06, + "loss": 0.8714, + "step": 129720 + }, + { + "epoch": 0.9390721477846062, + "grad_norm": 0.15151038765907288, + "learning_rate": 4.0609350908814525e-06, + "loss": 0.863, + "step": 129730 + }, + { + "epoch": 0.9391445344451924, + "grad_norm": 0.16103753447532654, + "learning_rate": 4.060862704220866e-06, + "loss": 0.8715, + "step": 129740 + }, + { + "epoch": 0.9392169211057786, + "grad_norm": 0.1853325515985489, + "learning_rate": 4.060790317560281e-06, + "loss": 0.8664, + "step": 129750 + }, + { + "epoch": 0.9392893077663648, + "grad_norm": 0.15947386622428894, + "learning_rate": 4.060717930899694e-06, + "loss": 0.8698, + "step": 129760 + }, + { + "epoch": 0.939361694426951, + "grad_norm": 0.14465877413749695, + "learning_rate": 4.060645544239108e-06, + "loss": 0.8654, + "step": 129770 + }, + { + "epoch": 0.9394340810875372, + "grad_norm": 0.15076486766338348, + "learning_rate": 4.0605731575785215e-06, + "loss": 0.8733, + "step": 129780 + }, + { + "epoch": 0.9395064677481234, + "grad_norm": 0.14492474496364594, + "learning_rate": 4.060500770917936e-06, + "loss": 0.8629, + "step": 129790 + }, + { + "epoch": 0.9395788544087096, + "grad_norm": 0.15159480273723602, + "learning_rate": 4.0604283842573495e-06, + "loss": 0.875, + "step": 129800 + }, + { + "epoch": 0.9396512410692958, + "grad_norm": 0.14613956212997437, + "learning_rate": 4.060355997596763e-06, + "loss": 0.8781, + "step": 129810 + }, + { + "epoch": 0.9397236277298819, + "grad_norm": 0.16877029836177826, + "learning_rate": 4.060283610936177e-06, + "loss": 0.8768, + "step": 129820 + }, + { + "epoch": 0.9397960143904681, + "grad_norm": 0.15022648870944977, + "learning_rate": 4.060211224275591e-06, + "loss": 0.8699, + "step": 129830 + }, + { + "epoch": 0.9398684010510543, + "grad_norm": 0.1675875037908554, + "learning_rate": 4.060138837615005e-06, + "loss": 0.8626, + "step": 129840 + }, + { + "epoch": 0.9399407877116405, + "grad_norm": 0.15652434527873993, + "learning_rate": 4.0600664509544185e-06, + "loss": 0.8588, + "step": 129850 + }, + { + "epoch": 0.9400131743722266, + "grad_norm": 0.16846764087677002, + "learning_rate": 4.059994064293832e-06, + "loss": 0.859, + "step": 129860 + }, + { + "epoch": 0.9400855610328128, + "grad_norm": 0.1613306701183319, + "learning_rate": 4.0599216776332465e-06, + "loss": 0.8593, + "step": 129870 + }, + { + "epoch": 0.9401579476933991, + "grad_norm": 0.14691142737865448, + "learning_rate": 4.05984929097266e-06, + "loss": 0.8657, + "step": 129880 + }, + { + "epoch": 0.9402303343539853, + "grad_norm": 0.20344461500644684, + "learning_rate": 4.059776904312074e-06, + "loss": 0.8688, + "step": 129890 + }, + { + "epoch": 0.9403027210145715, + "grad_norm": 0.1496155560016632, + "learning_rate": 4.059704517651487e-06, + "loss": 0.862, + "step": 129900 + }, + { + "epoch": 0.9403751076751576, + "grad_norm": 0.14814414083957672, + "learning_rate": 4.059632130990902e-06, + "loss": 0.8479, + "step": 129910 + }, + { + "epoch": 0.9404474943357438, + "grad_norm": 0.1548141986131668, + "learning_rate": 4.0595597443303155e-06, + "loss": 0.8626, + "step": 129920 + }, + { + "epoch": 0.94051988099633, + "grad_norm": 0.14892597496509552, + "learning_rate": 4.059487357669729e-06, + "loss": 0.8761, + "step": 129930 + }, + { + "epoch": 0.9405922676569162, + "grad_norm": 0.14724701642990112, + "learning_rate": 4.059414971009143e-06, + "loss": 0.8737, + "step": 129940 + }, + { + "epoch": 0.9406646543175023, + "grad_norm": 0.1490176022052765, + "learning_rate": 4.059342584348557e-06, + "loss": 0.8762, + "step": 129950 + }, + { + "epoch": 0.9407370409780885, + "grad_norm": 0.1549617052078247, + "learning_rate": 4.059270197687971e-06, + "loss": 0.876, + "step": 129960 + }, + { + "epoch": 0.9408094276386747, + "grad_norm": 0.1603880673646927, + "learning_rate": 4.059197811027384e-06, + "loss": 0.8737, + "step": 129970 + }, + { + "epoch": 0.9408818142992609, + "grad_norm": 0.14581482112407684, + "learning_rate": 4.059125424366798e-06, + "loss": 0.8723, + "step": 129980 + }, + { + "epoch": 0.9409542009598472, + "grad_norm": 0.14896036684513092, + "learning_rate": 4.0590530377062125e-06, + "loss": 0.8674, + "step": 129990 + }, + { + "epoch": 0.9410265876204333, + "grad_norm": 0.1456650048494339, + "learning_rate": 4.058980651045626e-06, + "loss": 0.8697, + "step": 130000 + }, + { + "epoch": 0.9410989742810195, + "grad_norm": 0.16549433767795563, + "learning_rate": 4.05890826438504e-06, + "loss": 0.8668, + "step": 130010 + }, + { + "epoch": 0.9411713609416057, + "grad_norm": 0.15788212418556213, + "learning_rate": 4.058835877724453e-06, + "loss": 0.8884, + "step": 130020 + }, + { + "epoch": 0.9412437476021919, + "grad_norm": 0.20761068165302277, + "learning_rate": 4.058763491063867e-06, + "loss": 0.8641, + "step": 130030 + }, + { + "epoch": 0.941316134262778, + "grad_norm": 0.1683483123779297, + "learning_rate": 4.0586911044032806e-06, + "loss": 0.852, + "step": 130040 + }, + { + "epoch": 0.9413885209233642, + "grad_norm": 0.16688086092472076, + "learning_rate": 4.058618717742694e-06, + "loss": 0.8703, + "step": 130050 + }, + { + "epoch": 0.9414609075839504, + "grad_norm": 0.15116585791110992, + "learning_rate": 4.058546331082109e-06, + "loss": 0.8814, + "step": 130060 + }, + { + "epoch": 0.9415332942445366, + "grad_norm": 0.1498250663280487, + "learning_rate": 4.058473944421522e-06, + "loss": 0.8785, + "step": 130070 + }, + { + "epoch": 0.9416056809051228, + "grad_norm": 0.14295758306980133, + "learning_rate": 4.058401557760936e-06, + "loss": 0.8625, + "step": 130080 + }, + { + "epoch": 0.941678067565709, + "grad_norm": 0.16779643297195435, + "learning_rate": 4.0583291711003495e-06, + "loss": 0.8589, + "step": 130090 + }, + { + "epoch": 0.9417504542262952, + "grad_norm": 0.14837302267551422, + "learning_rate": 4.058256784439764e-06, + "loss": 0.8672, + "step": 130100 + }, + { + "epoch": 0.9418228408868814, + "grad_norm": 0.15054334700107574, + "learning_rate": 4.0581843977791776e-06, + "loss": 0.856, + "step": 130110 + }, + { + "epoch": 0.9418952275474676, + "grad_norm": 0.1450866162776947, + "learning_rate": 4.058112011118591e-06, + "loss": 0.8576, + "step": 130120 + }, + { + "epoch": 0.9419676142080537, + "grad_norm": 0.1485680341720581, + "learning_rate": 4.058039624458005e-06, + "loss": 0.8712, + "step": 130130 + }, + { + "epoch": 0.9420400008686399, + "grad_norm": 0.14476777613162994, + "learning_rate": 4.057967237797419e-06, + "loss": 0.8852, + "step": 130140 + }, + { + "epoch": 0.9421123875292261, + "grad_norm": 0.14453761279582977, + "learning_rate": 4.057894851136833e-06, + "loss": 0.8608, + "step": 130150 + }, + { + "epoch": 0.9421847741898123, + "grad_norm": 0.1491234302520752, + "learning_rate": 4.0578224644762465e-06, + "loss": 0.8662, + "step": 130160 + }, + { + "epoch": 0.9422571608503985, + "grad_norm": 0.1594741940498352, + "learning_rate": 4.05775007781566e-06, + "loss": 0.8748, + "step": 130170 + }, + { + "epoch": 0.9423295475109846, + "grad_norm": 0.15636101365089417, + "learning_rate": 4.0576776911550746e-06, + "loss": 0.8674, + "step": 130180 + }, + { + "epoch": 0.9424019341715708, + "grad_norm": 0.1590276062488556, + "learning_rate": 4.057605304494488e-06, + "loss": 0.8547, + "step": 130190 + }, + { + "epoch": 0.9424743208321571, + "grad_norm": 0.15569545328617096, + "learning_rate": 4.057532917833902e-06, + "loss": 0.8607, + "step": 130200 + }, + { + "epoch": 0.9425467074927433, + "grad_norm": 0.15706123411655426, + "learning_rate": 4.057460531173315e-06, + "loss": 0.8647, + "step": 130210 + }, + { + "epoch": 0.9426190941533295, + "grad_norm": 0.17118285596370697, + "learning_rate": 4.057388144512729e-06, + "loss": 0.8688, + "step": 130220 + }, + { + "epoch": 0.9426914808139156, + "grad_norm": 0.1491112858057022, + "learning_rate": 4.0573157578521435e-06, + "loss": 0.8635, + "step": 130230 + }, + { + "epoch": 0.9427638674745018, + "grad_norm": 0.15699629485607147, + "learning_rate": 4.057243371191557e-06, + "loss": 0.8717, + "step": 130240 + }, + { + "epoch": 0.942836254135088, + "grad_norm": 0.1544734388589859, + "learning_rate": 4.057170984530971e-06, + "loss": 0.8608, + "step": 130250 + }, + { + "epoch": 0.9429086407956742, + "grad_norm": 0.15462660789489746, + "learning_rate": 4.057098597870384e-06, + "loss": 0.8624, + "step": 130260 + }, + { + "epoch": 0.9429810274562603, + "grad_norm": 0.1702507883310318, + "learning_rate": 4.057026211209799e-06, + "loss": 0.8809, + "step": 130270 + }, + { + "epoch": 0.9430534141168465, + "grad_norm": 0.1495058238506317, + "learning_rate": 4.0569538245492124e-06, + "loss": 0.8564, + "step": 130280 + }, + { + "epoch": 0.9431258007774327, + "grad_norm": 0.1570599526166916, + "learning_rate": 4.056881437888626e-06, + "loss": 0.8738, + "step": 130290 + }, + { + "epoch": 0.943198187438019, + "grad_norm": 0.14418993890285492, + "learning_rate": 4.05680905122804e-06, + "loss": 0.8863, + "step": 130300 + }, + { + "epoch": 0.9432705740986052, + "grad_norm": 0.1544889211654663, + "learning_rate": 4.056736664567454e-06, + "loss": 0.878, + "step": 130310 + }, + { + "epoch": 0.9433429607591913, + "grad_norm": 0.151493638753891, + "learning_rate": 4.056664277906868e-06, + "loss": 0.8675, + "step": 130320 + }, + { + "epoch": 0.9434153474197775, + "grad_norm": 0.15243935585021973, + "learning_rate": 4.056591891246281e-06, + "loss": 0.8659, + "step": 130330 + }, + { + "epoch": 0.9434877340803637, + "grad_norm": 0.15640780329704285, + "learning_rate": 4.056519504585695e-06, + "loss": 0.8789, + "step": 130340 + }, + { + "epoch": 0.9435601207409499, + "grad_norm": 0.16906537115573883, + "learning_rate": 4.0564471179251094e-06, + "loss": 0.8718, + "step": 130350 + }, + { + "epoch": 0.943632507401536, + "grad_norm": 0.13774484395980835, + "learning_rate": 4.056374731264523e-06, + "loss": 0.8684, + "step": 130360 + }, + { + "epoch": 0.9437048940621222, + "grad_norm": 0.18474681675434113, + "learning_rate": 4.056302344603937e-06, + "loss": 0.8737, + "step": 130370 + }, + { + "epoch": 0.9437772807227084, + "grad_norm": 0.15171298384666443, + "learning_rate": 4.05622995794335e-06, + "loss": 0.8742, + "step": 130380 + }, + { + "epoch": 0.9438496673832946, + "grad_norm": 0.1634998321533203, + "learning_rate": 4.056157571282765e-06, + "loss": 0.8671, + "step": 130390 + }, + { + "epoch": 0.9439220540438807, + "grad_norm": 0.15286368131637573, + "learning_rate": 4.056085184622178e-06, + "loss": 0.874, + "step": 130400 + }, + { + "epoch": 0.943994440704467, + "grad_norm": 0.17988328635692596, + "learning_rate": 4.056012797961592e-06, + "loss": 0.8764, + "step": 130410 + }, + { + "epoch": 0.9440668273650532, + "grad_norm": 0.1676689237356186, + "learning_rate": 4.055940411301006e-06, + "loss": 0.8714, + "step": 130420 + }, + { + "epoch": 0.9441392140256394, + "grad_norm": 0.15611009299755096, + "learning_rate": 4.05586802464042e-06, + "loss": 0.8603, + "step": 130430 + }, + { + "epoch": 0.9442116006862256, + "grad_norm": 0.1466500610113144, + "learning_rate": 4.055795637979834e-06, + "loss": 0.8706, + "step": 130440 + }, + { + "epoch": 0.9442839873468117, + "grad_norm": 0.1896083503961563, + "learning_rate": 4.055723251319247e-06, + "loss": 0.8681, + "step": 130450 + }, + { + "epoch": 0.9443563740073979, + "grad_norm": 0.15304064750671387, + "learning_rate": 4.055650864658661e-06, + "loss": 0.8754, + "step": 130460 + }, + { + "epoch": 0.9444287606679841, + "grad_norm": 0.15458323061466217, + "learning_rate": 4.055578477998075e-06, + "loss": 0.8714, + "step": 130470 + }, + { + "epoch": 0.9445011473285703, + "grad_norm": 0.15180428326129913, + "learning_rate": 4.055506091337489e-06, + "loss": 0.87, + "step": 130480 + }, + { + "epoch": 0.9445735339891564, + "grad_norm": 0.1695541888475418, + "learning_rate": 4.055433704676903e-06, + "loss": 0.862, + "step": 130490 + }, + { + "epoch": 0.9446459206497426, + "grad_norm": 0.1498434692621231, + "learning_rate": 4.055361318016316e-06, + "loss": 0.8605, + "step": 130500 + }, + { + "epoch": 0.9447183073103288, + "grad_norm": 0.1435045748949051, + "learning_rate": 4.055288931355731e-06, + "loss": 0.8729, + "step": 130510 + }, + { + "epoch": 0.9447906939709151, + "grad_norm": 0.14459870755672455, + "learning_rate": 4.055216544695144e-06, + "loss": 0.8589, + "step": 130520 + }, + { + "epoch": 0.9448630806315013, + "grad_norm": 0.1518053263425827, + "learning_rate": 4.055144158034558e-06, + "loss": 0.8549, + "step": 130530 + }, + { + "epoch": 0.9449354672920874, + "grad_norm": 0.1755157858133316, + "learning_rate": 4.0550717713739715e-06, + "loss": 0.8627, + "step": 130540 + }, + { + "epoch": 0.9450078539526736, + "grad_norm": 0.16715474426746368, + "learning_rate": 4.054999384713386e-06, + "loss": 0.8732, + "step": 130550 + }, + { + "epoch": 0.9450802406132598, + "grad_norm": 0.16011476516723633, + "learning_rate": 4.0549269980528e-06, + "loss": 0.8697, + "step": 130560 + }, + { + "epoch": 0.945152627273846, + "grad_norm": 0.1612127423286438, + "learning_rate": 4.054854611392212e-06, + "loss": 0.8733, + "step": 130570 + }, + { + "epoch": 0.9452250139344321, + "grad_norm": 0.15211746096611023, + "learning_rate": 4.054782224731627e-06, + "loss": 0.8724, + "step": 130580 + }, + { + "epoch": 0.9452974005950183, + "grad_norm": 0.1653570979833603, + "learning_rate": 4.0547098380710405e-06, + "loss": 0.8681, + "step": 130590 + }, + { + "epoch": 0.9453697872556045, + "grad_norm": 0.14813409745693207, + "learning_rate": 4.054637451410454e-06, + "loss": 0.8693, + "step": 130600 + }, + { + "epoch": 0.9454421739161907, + "grad_norm": 0.15209084749221802, + "learning_rate": 4.054565064749868e-06, + "loss": 0.8623, + "step": 130610 + }, + { + "epoch": 0.945514560576777, + "grad_norm": 0.15478059649467468, + "learning_rate": 4.054492678089282e-06, + "loss": 0.874, + "step": 130620 + }, + { + "epoch": 0.9455869472373631, + "grad_norm": 0.3934093713760376, + "learning_rate": 4.054420291428696e-06, + "loss": 0.8708, + "step": 130630 + }, + { + "epoch": 0.9456593338979493, + "grad_norm": 0.1564350724220276, + "learning_rate": 4.054347904768109e-06, + "loss": 0.8693, + "step": 130640 + }, + { + "epoch": 0.9457317205585355, + "grad_norm": 0.14715883135795593, + "learning_rate": 4.054275518107523e-06, + "loss": 0.8701, + "step": 130650 + }, + { + "epoch": 0.9458041072191217, + "grad_norm": 0.1694241613149643, + "learning_rate": 4.0542031314469375e-06, + "loss": 0.8761, + "step": 130660 + }, + { + "epoch": 0.9458764938797078, + "grad_norm": 0.16506516933441162, + "learning_rate": 4.054130744786351e-06, + "loss": 0.8759, + "step": 130670 + }, + { + "epoch": 0.945948880540294, + "grad_norm": 0.14801914989948273, + "learning_rate": 4.054058358125765e-06, + "loss": 0.8667, + "step": 130680 + }, + { + "epoch": 0.9460212672008802, + "grad_norm": 0.15450334548950195, + "learning_rate": 4.053985971465178e-06, + "loss": 0.8587, + "step": 130690 + }, + { + "epoch": 0.9460936538614664, + "grad_norm": 0.1489882618188858, + "learning_rate": 4.053913584804593e-06, + "loss": 0.8715, + "step": 130700 + }, + { + "epoch": 0.9461660405220526, + "grad_norm": 0.1521863490343094, + "learning_rate": 4.053841198144006e-06, + "loss": 0.8708, + "step": 130710 + }, + { + "epoch": 0.9462384271826387, + "grad_norm": 0.15034830570220947, + "learning_rate": 4.05376881148342e-06, + "loss": 0.8874, + "step": 130720 + }, + { + "epoch": 0.946310813843225, + "grad_norm": 0.1745532602071762, + "learning_rate": 4.053696424822834e-06, + "loss": 0.8755, + "step": 130730 + }, + { + "epoch": 0.9463832005038112, + "grad_norm": 0.15382978320121765, + "learning_rate": 4.053624038162248e-06, + "loss": 0.8506, + "step": 130740 + }, + { + "epoch": 0.9464555871643974, + "grad_norm": 0.16145150363445282, + "learning_rate": 4.053551651501662e-06, + "loss": 0.8639, + "step": 130750 + }, + { + "epoch": 0.9465279738249835, + "grad_norm": 0.14968262612819672, + "learning_rate": 4.053479264841075e-06, + "loss": 0.8719, + "step": 130760 + }, + { + "epoch": 0.9466003604855697, + "grad_norm": 0.17151197791099548, + "learning_rate": 4.053406878180489e-06, + "loss": 0.8707, + "step": 130770 + }, + { + "epoch": 0.9466727471461559, + "grad_norm": 0.149154931306839, + "learning_rate": 4.053334491519903e-06, + "loss": 0.8683, + "step": 130780 + }, + { + "epoch": 0.9467451338067421, + "grad_norm": 0.19101570546627045, + "learning_rate": 4.053262104859317e-06, + "loss": 0.8669, + "step": 130790 + }, + { + "epoch": 0.9468175204673283, + "grad_norm": 0.14117294549942017, + "learning_rate": 4.053189718198731e-06, + "loss": 0.8595, + "step": 130800 + }, + { + "epoch": 0.9468899071279144, + "grad_norm": 0.14957231283187866, + "learning_rate": 4.053117331538144e-06, + "loss": 0.863, + "step": 130810 + }, + { + "epoch": 0.9469622937885006, + "grad_norm": 0.1667657345533371, + "learning_rate": 4.053044944877558e-06, + "loss": 0.8785, + "step": 130820 + }, + { + "epoch": 0.9470346804490869, + "grad_norm": 0.1488480567932129, + "learning_rate": 4.052972558216972e-06, + "loss": 0.8482, + "step": 130830 + }, + { + "epoch": 0.9471070671096731, + "grad_norm": 0.16403774917125702, + "learning_rate": 4.052900171556386e-06, + "loss": 0.8699, + "step": 130840 + }, + { + "epoch": 0.9471794537702592, + "grad_norm": 0.14479616284370422, + "learning_rate": 4.0528277848957996e-06, + "loss": 0.8675, + "step": 130850 + }, + { + "epoch": 0.9472518404308454, + "grad_norm": 0.157504141330719, + "learning_rate": 4.052755398235213e-06, + "loss": 0.862, + "step": 130860 + }, + { + "epoch": 0.9473242270914316, + "grad_norm": 0.15569128096103668, + "learning_rate": 4.052683011574628e-06, + "loss": 0.8629, + "step": 130870 + }, + { + "epoch": 0.9473966137520178, + "grad_norm": 0.15487146377563477, + "learning_rate": 4.052610624914041e-06, + "loss": 0.87, + "step": 130880 + }, + { + "epoch": 0.947469000412604, + "grad_norm": 0.152805894613266, + "learning_rate": 4.052538238253455e-06, + "loss": 0.8711, + "step": 130890 + }, + { + "epoch": 0.9475413870731901, + "grad_norm": 0.15563584864139557, + "learning_rate": 4.0524658515928685e-06, + "loss": 0.8485, + "step": 130900 + }, + { + "epoch": 0.9476137737337763, + "grad_norm": 0.1475389450788498, + "learning_rate": 4.052393464932283e-06, + "loss": 0.8782, + "step": 130910 + }, + { + "epoch": 0.9476861603943625, + "grad_norm": 0.160760760307312, + "learning_rate": 4.0523210782716966e-06, + "loss": 0.8786, + "step": 130920 + }, + { + "epoch": 0.9477585470549487, + "grad_norm": 0.1502762883901596, + "learning_rate": 4.05224869161111e-06, + "loss": 0.8633, + "step": 130930 + }, + { + "epoch": 0.947830933715535, + "grad_norm": 0.14664162695407867, + "learning_rate": 4.052176304950524e-06, + "loss": 0.878, + "step": 130940 + }, + { + "epoch": 0.9479033203761211, + "grad_norm": 0.15366783738136292, + "learning_rate": 4.052103918289938e-06, + "loss": 0.8672, + "step": 130950 + }, + { + "epoch": 0.9479757070367073, + "grad_norm": 0.1406678408384323, + "learning_rate": 4.052031531629352e-06, + "loss": 0.8716, + "step": 130960 + }, + { + "epoch": 0.9480480936972935, + "grad_norm": 0.14894571900367737, + "learning_rate": 4.0519591449687655e-06, + "loss": 0.8601, + "step": 130970 + }, + { + "epoch": 0.9481204803578797, + "grad_norm": 0.14522773027420044, + "learning_rate": 4.051886758308179e-06, + "loss": 0.8797, + "step": 130980 + }, + { + "epoch": 0.9481928670184658, + "grad_norm": 0.1636369526386261, + "learning_rate": 4.051814371647594e-06, + "loss": 0.87, + "step": 130990 + }, + { + "epoch": 0.948265253679052, + "grad_norm": 0.19674119353294373, + "learning_rate": 4.051741984987007e-06, + "loss": 0.8732, + "step": 131000 + }, + { + "epoch": 0.9483376403396382, + "grad_norm": 0.16425974667072296, + "learning_rate": 4.051669598326421e-06, + "loss": 0.8701, + "step": 131010 + }, + { + "epoch": 0.9484100270002244, + "grad_norm": 0.15251362323760986, + "learning_rate": 4.0515972116658344e-06, + "loss": 0.8686, + "step": 131020 + }, + { + "epoch": 0.9484824136608105, + "grad_norm": 0.15562012791633606, + "learning_rate": 4.051524825005249e-06, + "loss": 0.8516, + "step": 131030 + }, + { + "epoch": 0.9485548003213967, + "grad_norm": 0.14685966074466705, + "learning_rate": 4.0514524383446625e-06, + "loss": 0.8677, + "step": 131040 + }, + { + "epoch": 0.948627186981983, + "grad_norm": 0.15165336430072784, + "learning_rate": 4.051380051684076e-06, + "loss": 0.8594, + "step": 131050 + }, + { + "epoch": 0.9486995736425692, + "grad_norm": 0.13826829195022583, + "learning_rate": 4.05130766502349e-06, + "loss": 0.8599, + "step": 131060 + }, + { + "epoch": 0.9487719603031554, + "grad_norm": 0.19004106521606445, + "learning_rate": 4.051235278362904e-06, + "loss": 0.8743, + "step": 131070 + }, + { + "epoch": 0.9488443469637415, + "grad_norm": 0.1457131803035736, + "learning_rate": 4.051162891702318e-06, + "loss": 0.8736, + "step": 131080 + }, + { + "epoch": 0.9489167336243277, + "grad_norm": 0.16379250586032867, + "learning_rate": 4.0510905050417314e-06, + "loss": 0.86, + "step": 131090 + }, + { + "epoch": 0.9489891202849139, + "grad_norm": 0.14875370264053345, + "learning_rate": 4.051018118381145e-06, + "loss": 0.8776, + "step": 131100 + }, + { + "epoch": 0.9490615069455001, + "grad_norm": 0.14659325778484344, + "learning_rate": 4.050945731720559e-06, + "loss": 0.8758, + "step": 131110 + }, + { + "epoch": 0.9491338936060862, + "grad_norm": 0.15121561288833618, + "learning_rate": 4.050873345059972e-06, + "loss": 0.8544, + "step": 131120 + }, + { + "epoch": 0.9492062802666724, + "grad_norm": 0.14305542409420013, + "learning_rate": 4.050800958399386e-06, + "loss": 0.8744, + "step": 131130 + }, + { + "epoch": 0.9492786669272586, + "grad_norm": 0.15504080057144165, + "learning_rate": 4.0507285717388e-06, + "loss": 0.8705, + "step": 131140 + }, + { + "epoch": 0.9493510535878449, + "grad_norm": 0.14514702558517456, + "learning_rate": 4.050656185078214e-06, + "loss": 0.8657, + "step": 131150 + }, + { + "epoch": 0.9494234402484311, + "grad_norm": 0.15616527199745178, + "learning_rate": 4.050583798417628e-06, + "loss": 0.8626, + "step": 131160 + }, + { + "epoch": 0.9494958269090172, + "grad_norm": 0.15663272142410278, + "learning_rate": 4.050511411757041e-06, + "loss": 0.8605, + "step": 131170 + }, + { + "epoch": 0.9495682135696034, + "grad_norm": 0.1529805064201355, + "learning_rate": 4.050439025096456e-06, + "loss": 0.8636, + "step": 131180 + }, + { + "epoch": 0.9496406002301896, + "grad_norm": 0.16288772225379944, + "learning_rate": 4.050366638435869e-06, + "loss": 0.8525, + "step": 131190 + }, + { + "epoch": 0.9497129868907758, + "grad_norm": 0.14723965525627136, + "learning_rate": 4.050294251775283e-06, + "loss": 0.8596, + "step": 131200 + }, + { + "epoch": 0.9497853735513619, + "grad_norm": 0.14269182085990906, + "learning_rate": 4.0502218651146965e-06, + "loss": 0.8667, + "step": 131210 + }, + { + "epoch": 0.9498577602119481, + "grad_norm": 0.16036245226860046, + "learning_rate": 4.050149478454111e-06, + "loss": 0.8724, + "step": 131220 + }, + { + "epoch": 0.9499301468725343, + "grad_norm": 0.15207916498184204, + "learning_rate": 4.050077091793525e-06, + "loss": 0.8688, + "step": 131230 + }, + { + "epoch": 0.9500025335331205, + "grad_norm": 0.1509644240140915, + "learning_rate": 4.050004705132938e-06, + "loss": 0.8553, + "step": 131240 + }, + { + "epoch": 0.9500749201937067, + "grad_norm": 0.14878825843334198, + "learning_rate": 4.049932318472352e-06, + "loss": 0.8579, + "step": 131250 + }, + { + "epoch": 0.9501473068542929, + "grad_norm": 0.1610831767320633, + "learning_rate": 4.049859931811766e-06, + "loss": 0.8732, + "step": 131260 + }, + { + "epoch": 0.9502196935148791, + "grad_norm": 0.1690327674150467, + "learning_rate": 4.04978754515118e-06, + "loss": 0.8738, + "step": 131270 + }, + { + "epoch": 0.9502920801754653, + "grad_norm": 0.16976772248744965, + "learning_rate": 4.0497151584905935e-06, + "loss": 0.8626, + "step": 131280 + }, + { + "epoch": 0.9503644668360515, + "grad_norm": 0.1506384015083313, + "learning_rate": 4.049642771830007e-06, + "loss": 0.8642, + "step": 131290 + }, + { + "epoch": 0.9504368534966376, + "grad_norm": 0.1462264209985733, + "learning_rate": 4.049570385169422e-06, + "loss": 0.8646, + "step": 131300 + }, + { + "epoch": 0.9505092401572238, + "grad_norm": 0.14484186470508575, + "learning_rate": 4.049497998508835e-06, + "loss": 0.8771, + "step": 131310 + }, + { + "epoch": 0.95058162681781, + "grad_norm": 0.15840958058834076, + "learning_rate": 4.049425611848249e-06, + "loss": 0.8607, + "step": 131320 + }, + { + "epoch": 0.9506540134783962, + "grad_norm": 0.15120358765125275, + "learning_rate": 4.0493532251876625e-06, + "loss": 0.8766, + "step": 131330 + }, + { + "epoch": 0.9507264001389824, + "grad_norm": 0.15043680369853973, + "learning_rate": 4.049280838527077e-06, + "loss": 0.871, + "step": 131340 + }, + { + "epoch": 0.9507987867995685, + "grad_norm": 0.15595075488090515, + "learning_rate": 4.0492084518664905e-06, + "loss": 0.8668, + "step": 131350 + }, + { + "epoch": 0.9508711734601548, + "grad_norm": 0.15385489165782928, + "learning_rate": 4.049136065205904e-06, + "loss": 0.8748, + "step": 131360 + }, + { + "epoch": 0.950943560120741, + "grad_norm": 0.19055695831775665, + "learning_rate": 4.049063678545318e-06, + "loss": 0.8791, + "step": 131370 + }, + { + "epoch": 0.9510159467813272, + "grad_norm": 0.1542825996875763, + "learning_rate": 4.048991291884732e-06, + "loss": 0.8667, + "step": 131380 + }, + { + "epoch": 0.9510883334419133, + "grad_norm": 0.16472730040550232, + "learning_rate": 4.048918905224146e-06, + "loss": 0.8755, + "step": 131390 + }, + { + "epoch": 0.9511607201024995, + "grad_norm": 0.17082463204860687, + "learning_rate": 4.0488465185635595e-06, + "loss": 0.8434, + "step": 131400 + }, + { + "epoch": 0.9512331067630857, + "grad_norm": 0.1629323959350586, + "learning_rate": 4.048774131902973e-06, + "loss": 0.8712, + "step": 131410 + }, + { + "epoch": 0.9513054934236719, + "grad_norm": 0.1467832624912262, + "learning_rate": 4.0487017452423875e-06, + "loss": 0.8609, + "step": 131420 + }, + { + "epoch": 0.951377880084258, + "grad_norm": 0.15191030502319336, + "learning_rate": 4.048629358581801e-06, + "loss": 0.8718, + "step": 131430 + }, + { + "epoch": 0.9514502667448442, + "grad_norm": 0.17958003282546997, + "learning_rate": 4.048556971921215e-06, + "loss": 0.8696, + "step": 131440 + }, + { + "epoch": 0.9515226534054304, + "grad_norm": 0.16682404279708862, + "learning_rate": 4.048484585260628e-06, + "loss": 0.8676, + "step": 131450 + }, + { + "epoch": 0.9515950400660166, + "grad_norm": 0.1475543975830078, + "learning_rate": 4.048412198600042e-06, + "loss": 0.87, + "step": 131460 + }, + { + "epoch": 0.9516674267266029, + "grad_norm": 0.15572024881839752, + "learning_rate": 4.0483398119394565e-06, + "loss": 0.8793, + "step": 131470 + }, + { + "epoch": 0.951739813387189, + "grad_norm": 0.1579056978225708, + "learning_rate": 4.04826742527887e-06, + "loss": 0.8578, + "step": 131480 + }, + { + "epoch": 0.9518122000477752, + "grad_norm": 0.1470668464899063, + "learning_rate": 4.048195038618284e-06, + "loss": 0.8634, + "step": 131490 + }, + { + "epoch": 0.9518845867083614, + "grad_norm": 0.1533782035112381, + "learning_rate": 4.048122651957697e-06, + "loss": 0.852, + "step": 131500 + }, + { + "epoch": 0.9519569733689476, + "grad_norm": 0.16424772143363953, + "learning_rate": 4.048050265297112e-06, + "loss": 0.8566, + "step": 131510 + }, + { + "epoch": 0.9520293600295338, + "grad_norm": 0.17163357138633728, + "learning_rate": 4.047977878636525e-06, + "loss": 0.8657, + "step": 131520 + }, + { + "epoch": 0.9521017466901199, + "grad_norm": 0.15258784592151642, + "learning_rate": 4.047905491975939e-06, + "loss": 0.8786, + "step": 131530 + }, + { + "epoch": 0.9521741333507061, + "grad_norm": 0.15149055421352386, + "learning_rate": 4.047833105315353e-06, + "loss": 0.8627, + "step": 131540 + }, + { + "epoch": 0.9522465200112923, + "grad_norm": 0.15331679582595825, + "learning_rate": 4.047760718654767e-06, + "loss": 0.8613, + "step": 131550 + }, + { + "epoch": 0.9523189066718785, + "grad_norm": 0.15409405529499054, + "learning_rate": 4.047688331994181e-06, + "loss": 0.8754, + "step": 131560 + }, + { + "epoch": 0.9523912933324646, + "grad_norm": 0.16300641000270844, + "learning_rate": 4.047615945333594e-06, + "loss": 0.8851, + "step": 131570 + }, + { + "epoch": 0.9524636799930509, + "grad_norm": 0.1475706696510315, + "learning_rate": 4.047543558673008e-06, + "loss": 0.8686, + "step": 131580 + }, + { + "epoch": 0.9525360666536371, + "grad_norm": 0.14005255699157715, + "learning_rate": 4.047471172012422e-06, + "loss": 0.8771, + "step": 131590 + }, + { + "epoch": 0.9526084533142233, + "grad_norm": 0.152145653963089, + "learning_rate": 4.047398785351836e-06, + "loss": 0.8652, + "step": 131600 + }, + { + "epoch": 0.9526808399748095, + "grad_norm": 0.15361985564231873, + "learning_rate": 4.04732639869125e-06, + "loss": 0.8609, + "step": 131610 + }, + { + "epoch": 0.9527532266353956, + "grad_norm": 0.15738271176815033, + "learning_rate": 4.047254012030663e-06, + "loss": 0.8731, + "step": 131620 + }, + { + "epoch": 0.9528256132959818, + "grad_norm": 0.15454953908920288, + "learning_rate": 4.047181625370077e-06, + "loss": 0.8549, + "step": 131630 + }, + { + "epoch": 0.952897999956568, + "grad_norm": 0.17715829610824585, + "learning_rate": 4.0471092387094905e-06, + "loss": 0.864, + "step": 131640 + }, + { + "epoch": 0.9529703866171542, + "grad_norm": 0.16041779518127441, + "learning_rate": 4.047036852048904e-06, + "loss": 0.8823, + "step": 131650 + }, + { + "epoch": 0.9530427732777403, + "grad_norm": 0.1558634638786316, + "learning_rate": 4.0469644653883186e-06, + "loss": 0.8816, + "step": 131660 + }, + { + "epoch": 0.9531151599383265, + "grad_norm": 0.16207058727741241, + "learning_rate": 4.046892078727732e-06, + "loss": 0.8568, + "step": 131670 + }, + { + "epoch": 0.9531875465989128, + "grad_norm": 0.16530075669288635, + "learning_rate": 4.046819692067146e-06, + "loss": 0.8695, + "step": 131680 + }, + { + "epoch": 0.953259933259499, + "grad_norm": 0.16175314784049988, + "learning_rate": 4.046747305406559e-06, + "loss": 0.8662, + "step": 131690 + }, + { + "epoch": 0.9533323199200852, + "grad_norm": 0.1511378288269043, + "learning_rate": 4.046674918745974e-06, + "loss": 0.8655, + "step": 131700 + }, + { + "epoch": 0.9534047065806713, + "grad_norm": 0.15488837659358978, + "learning_rate": 4.0466025320853875e-06, + "loss": 0.8634, + "step": 131710 + }, + { + "epoch": 0.9534770932412575, + "grad_norm": 0.15649256110191345, + "learning_rate": 4.046530145424801e-06, + "loss": 0.8821, + "step": 131720 + }, + { + "epoch": 0.9535494799018437, + "grad_norm": 0.14642643928527832, + "learning_rate": 4.046457758764215e-06, + "loss": 0.8704, + "step": 131730 + }, + { + "epoch": 0.9536218665624299, + "grad_norm": 0.15652616322040558, + "learning_rate": 4.046385372103629e-06, + "loss": 0.8604, + "step": 131740 + }, + { + "epoch": 0.953694253223016, + "grad_norm": 0.15622968971729279, + "learning_rate": 4.046312985443043e-06, + "loss": 0.8704, + "step": 131750 + }, + { + "epoch": 0.9537666398836022, + "grad_norm": 0.1728099137544632, + "learning_rate": 4.0462405987824564e-06, + "loss": 0.8708, + "step": 131760 + }, + { + "epoch": 0.9538390265441884, + "grad_norm": 0.15809345245361328, + "learning_rate": 4.04616821212187e-06, + "loss": 0.8614, + "step": 131770 + }, + { + "epoch": 0.9539114132047746, + "grad_norm": 0.1720665991306305, + "learning_rate": 4.0460958254612845e-06, + "loss": 0.861, + "step": 131780 + }, + { + "epoch": 0.9539837998653609, + "grad_norm": 0.15099675953388214, + "learning_rate": 4.046023438800698e-06, + "loss": 0.8784, + "step": 131790 + }, + { + "epoch": 0.954056186525947, + "grad_norm": 0.14246854186058044, + "learning_rate": 4.045951052140112e-06, + "loss": 0.8659, + "step": 131800 + }, + { + "epoch": 0.9541285731865332, + "grad_norm": 0.15169619023799896, + "learning_rate": 4.045878665479525e-06, + "loss": 0.8672, + "step": 131810 + }, + { + "epoch": 0.9542009598471194, + "grad_norm": 0.15939058363437653, + "learning_rate": 4.04580627881894e-06, + "loss": 0.8608, + "step": 131820 + }, + { + "epoch": 0.9542733465077056, + "grad_norm": 0.16501100361347198, + "learning_rate": 4.0457338921583534e-06, + "loss": 0.857, + "step": 131830 + }, + { + "epoch": 0.9543457331682917, + "grad_norm": 1.6813664436340332, + "learning_rate": 4.045661505497767e-06, + "loss": 0.8666, + "step": 131840 + }, + { + "epoch": 0.9544181198288779, + "grad_norm": 0.14207109808921814, + "learning_rate": 4.045589118837181e-06, + "loss": 0.8631, + "step": 131850 + }, + { + "epoch": 0.9544905064894641, + "grad_norm": 0.14771947264671326, + "learning_rate": 4.045516732176595e-06, + "loss": 0.8668, + "step": 131860 + }, + { + "epoch": 0.9545628931500503, + "grad_norm": 0.15695475041866302, + "learning_rate": 4.045444345516009e-06, + "loss": 0.8629, + "step": 131870 + }, + { + "epoch": 0.9546352798106365, + "grad_norm": 0.15353751182556152, + "learning_rate": 4.045371958855422e-06, + "loss": 0.858, + "step": 131880 + }, + { + "epoch": 0.9547076664712227, + "grad_norm": 0.15183933079242706, + "learning_rate": 4.045299572194836e-06, + "loss": 0.8694, + "step": 131890 + }, + { + "epoch": 0.9547800531318089, + "grad_norm": 0.16019755601882935, + "learning_rate": 4.0452271855342504e-06, + "loss": 0.8774, + "step": 131900 + }, + { + "epoch": 0.9548524397923951, + "grad_norm": 0.15118776261806488, + "learning_rate": 4.045154798873664e-06, + "loss": 0.8733, + "step": 131910 + }, + { + "epoch": 0.9549248264529813, + "grad_norm": 0.15187504887580872, + "learning_rate": 4.045082412213078e-06, + "loss": 0.8695, + "step": 131920 + }, + { + "epoch": 0.9549972131135674, + "grad_norm": 0.15506109595298767, + "learning_rate": 4.045010025552491e-06, + "loss": 0.8637, + "step": 131930 + }, + { + "epoch": 0.9550695997741536, + "grad_norm": 0.14858941733837128, + "learning_rate": 4.044937638891906e-06, + "loss": 0.8727, + "step": 131940 + }, + { + "epoch": 0.9551419864347398, + "grad_norm": 0.15059681236743927, + "learning_rate": 4.044865252231319e-06, + "loss": 0.8503, + "step": 131950 + }, + { + "epoch": 0.955214373095326, + "grad_norm": 0.14260925352573395, + "learning_rate": 4.044792865570733e-06, + "loss": 0.8507, + "step": 131960 + }, + { + "epoch": 0.9552867597559122, + "grad_norm": 0.16660186648368835, + "learning_rate": 4.044720478910147e-06, + "loss": 0.8809, + "step": 131970 + }, + { + "epoch": 0.9553591464164983, + "grad_norm": 0.1452503353357315, + "learning_rate": 4.044648092249561e-06, + "loss": 0.8595, + "step": 131980 + }, + { + "epoch": 0.9554315330770845, + "grad_norm": 0.14808453619480133, + "learning_rate": 4.044575705588975e-06, + "loss": 0.8595, + "step": 131990 + }, + { + "epoch": 0.9555039197376708, + "grad_norm": 0.14562194049358368, + "learning_rate": 4.044503318928388e-06, + "loss": 0.8679, + "step": 132000 + }, + { + "epoch": 0.955576306398257, + "grad_norm": 0.1467532068490982, + "learning_rate": 4.044430932267802e-06, + "loss": 0.8722, + "step": 132010 + }, + { + "epoch": 0.9556486930588431, + "grad_norm": 0.15089406073093414, + "learning_rate": 4.044358545607216e-06, + "loss": 0.8647, + "step": 132020 + }, + { + "epoch": 0.9557210797194293, + "grad_norm": 0.1548631340265274, + "learning_rate": 4.04428615894663e-06, + "loss": 0.8675, + "step": 132030 + }, + { + "epoch": 0.9557934663800155, + "grad_norm": 0.15700556337833405, + "learning_rate": 4.044213772286044e-06, + "loss": 0.8643, + "step": 132040 + }, + { + "epoch": 0.9558658530406017, + "grad_norm": 0.1600215882062912, + "learning_rate": 4.044141385625457e-06, + "loss": 0.8749, + "step": 132050 + }, + { + "epoch": 0.9559382397011879, + "grad_norm": 0.1461002379655838, + "learning_rate": 4.044068998964871e-06, + "loss": 0.8759, + "step": 132060 + }, + { + "epoch": 0.956010626361774, + "grad_norm": 0.1531788557767868, + "learning_rate": 4.043996612304285e-06, + "loss": 0.8736, + "step": 132070 + }, + { + "epoch": 0.9560830130223602, + "grad_norm": 0.15142254531383514, + "learning_rate": 4.043924225643699e-06, + "loss": 0.857, + "step": 132080 + }, + { + "epoch": 0.9561553996829464, + "grad_norm": 0.1594085544347763, + "learning_rate": 4.0438518389831125e-06, + "loss": 0.87, + "step": 132090 + }, + { + "epoch": 0.9562277863435326, + "grad_norm": 0.16034048795700073, + "learning_rate": 4.043779452322526e-06, + "loss": 0.8755, + "step": 132100 + }, + { + "epoch": 0.9563001730041188, + "grad_norm": 0.1539410948753357, + "learning_rate": 4.043707065661941e-06, + "loss": 0.874, + "step": 132110 + }, + { + "epoch": 0.956372559664705, + "grad_norm": 0.15013930201530457, + "learning_rate": 4.043634679001354e-06, + "loss": 0.8666, + "step": 132120 + }, + { + "epoch": 0.9564449463252912, + "grad_norm": 0.15976254642009735, + "learning_rate": 4.043562292340768e-06, + "loss": 0.8697, + "step": 132130 + }, + { + "epoch": 0.9565173329858774, + "grad_norm": 0.16351239383220673, + "learning_rate": 4.0434899056801815e-06, + "loss": 0.8731, + "step": 132140 + }, + { + "epoch": 0.9565897196464636, + "grad_norm": 0.15584561228752136, + "learning_rate": 4.043417519019596e-06, + "loss": 0.8833, + "step": 132150 + }, + { + "epoch": 0.9566621063070497, + "grad_norm": 0.1562098264694214, + "learning_rate": 4.043345132359009e-06, + "loss": 0.872, + "step": 132160 + }, + { + "epoch": 0.9567344929676359, + "grad_norm": 0.156274676322937, + "learning_rate": 4.043272745698423e-06, + "loss": 0.8708, + "step": 132170 + }, + { + "epoch": 0.9568068796282221, + "grad_norm": 0.22564736008644104, + "learning_rate": 4.043200359037837e-06, + "loss": 0.8636, + "step": 132180 + }, + { + "epoch": 0.9568792662888083, + "grad_norm": 0.1602523922920227, + "learning_rate": 4.04312797237725e-06, + "loss": 0.8775, + "step": 132190 + }, + { + "epoch": 0.9569516529493944, + "grad_norm": 0.15143248438835144, + "learning_rate": 4.043055585716664e-06, + "loss": 0.8635, + "step": 132200 + }, + { + "epoch": 0.9570240396099807, + "grad_norm": 0.14714032411575317, + "learning_rate": 4.0429831990560785e-06, + "loss": 0.8642, + "step": 132210 + }, + { + "epoch": 0.9570964262705669, + "grad_norm": 0.1613510251045227, + "learning_rate": 4.042910812395492e-06, + "loss": 0.8645, + "step": 132220 + }, + { + "epoch": 0.9571688129311531, + "grad_norm": 0.17830723524093628, + "learning_rate": 4.042838425734906e-06, + "loss": 0.8672, + "step": 132230 + }, + { + "epoch": 0.9572411995917393, + "grad_norm": 0.15344306826591492, + "learning_rate": 4.042766039074319e-06, + "loss": 0.8679, + "step": 132240 + }, + { + "epoch": 0.9573135862523254, + "grad_norm": 0.15479233860969543, + "learning_rate": 4.042693652413733e-06, + "loss": 0.8674, + "step": 132250 + }, + { + "epoch": 0.9573859729129116, + "grad_norm": 0.1529737114906311, + "learning_rate": 4.042621265753147e-06, + "loss": 0.8539, + "step": 132260 + }, + { + "epoch": 0.9574583595734978, + "grad_norm": 0.14475677907466888, + "learning_rate": 4.042548879092561e-06, + "loss": 0.8693, + "step": 132270 + }, + { + "epoch": 0.957530746234084, + "grad_norm": 0.167455792427063, + "learning_rate": 4.042476492431975e-06, + "loss": 0.8576, + "step": 132280 + }, + { + "epoch": 0.9576031328946701, + "grad_norm": 0.14674553275108337, + "learning_rate": 4.042404105771388e-06, + "loss": 0.8753, + "step": 132290 + }, + { + "epoch": 0.9576755195552563, + "grad_norm": 0.20534364879131317, + "learning_rate": 4.042331719110803e-06, + "loss": 0.8661, + "step": 132300 + }, + { + "epoch": 0.9577479062158425, + "grad_norm": 0.16672727465629578, + "learning_rate": 4.042259332450216e-06, + "loss": 0.8645, + "step": 132310 + }, + { + "epoch": 0.9578202928764288, + "grad_norm": 0.14698807895183563, + "learning_rate": 4.04218694578963e-06, + "loss": 0.8645, + "step": 132320 + }, + { + "epoch": 0.957892679537015, + "grad_norm": 0.15321475267410278, + "learning_rate": 4.0421145591290436e-06, + "loss": 0.8692, + "step": 132330 + }, + { + "epoch": 0.9579650661976011, + "grad_norm": 0.15178924798965454, + "learning_rate": 4.042042172468458e-06, + "loss": 0.8705, + "step": 132340 + }, + { + "epoch": 0.9580374528581873, + "grad_norm": 0.701418936252594, + "learning_rate": 4.041969785807872e-06, + "loss": 0.8685, + "step": 132350 + }, + { + "epoch": 0.9581098395187735, + "grad_norm": 0.15373194217681885, + "learning_rate": 4.041897399147285e-06, + "loss": 0.867, + "step": 132360 + }, + { + "epoch": 0.9581822261793597, + "grad_norm": 0.15666644275188446, + "learning_rate": 4.041825012486699e-06, + "loss": 0.8715, + "step": 132370 + }, + { + "epoch": 0.9582546128399458, + "grad_norm": 0.14554648101329803, + "learning_rate": 4.041752625826113e-06, + "loss": 0.8612, + "step": 132380 + }, + { + "epoch": 0.958326999500532, + "grad_norm": 0.15833979845046997, + "learning_rate": 4.041680239165527e-06, + "loss": 0.8694, + "step": 132390 + }, + { + "epoch": 0.9583993861611182, + "grad_norm": 0.1463756412267685, + "learning_rate": 4.0416078525049406e-06, + "loss": 0.8691, + "step": 132400 + }, + { + "epoch": 0.9584717728217044, + "grad_norm": 0.14751847088336945, + "learning_rate": 4.041535465844354e-06, + "loss": 0.8666, + "step": 132410 + }, + { + "epoch": 0.9585441594822907, + "grad_norm": 0.16007982194423676, + "learning_rate": 4.041463079183769e-06, + "loss": 0.8766, + "step": 132420 + }, + { + "epoch": 0.9586165461428768, + "grad_norm": 0.1559884399175644, + "learning_rate": 4.041390692523182e-06, + "loss": 0.857, + "step": 132430 + }, + { + "epoch": 0.958688932803463, + "grad_norm": 0.16903673112392426, + "learning_rate": 4.041318305862596e-06, + "loss": 0.8728, + "step": 132440 + }, + { + "epoch": 0.9587613194640492, + "grad_norm": 0.15044556558132172, + "learning_rate": 4.0412459192020095e-06, + "loss": 0.8705, + "step": 132450 + }, + { + "epoch": 0.9588337061246354, + "grad_norm": 0.1696968823671341, + "learning_rate": 4.041173532541424e-06, + "loss": 0.8593, + "step": 132460 + }, + { + "epoch": 0.9589060927852215, + "grad_norm": 0.1612185686826706, + "learning_rate": 4.0411011458808376e-06, + "loss": 0.8674, + "step": 132470 + }, + { + "epoch": 0.9589784794458077, + "grad_norm": 0.1500626504421234, + "learning_rate": 4.041028759220251e-06, + "loss": 0.873, + "step": 132480 + }, + { + "epoch": 0.9590508661063939, + "grad_norm": 0.145673006772995, + "learning_rate": 4.040956372559665e-06, + "loss": 0.8659, + "step": 132490 + }, + { + "epoch": 0.9591232527669801, + "grad_norm": 0.15620875358581543, + "learning_rate": 4.040883985899079e-06, + "loss": 0.8645, + "step": 132500 + }, + { + "epoch": 0.9591956394275662, + "grad_norm": 0.16961318254470825, + "learning_rate": 4.040811599238493e-06, + "loss": 0.8722, + "step": 132510 + }, + { + "epoch": 0.9592680260881524, + "grad_norm": 0.14998477697372437, + "learning_rate": 4.0407392125779065e-06, + "loss": 0.8499, + "step": 132520 + }, + { + "epoch": 0.9593404127487387, + "grad_norm": 0.1547980010509491, + "learning_rate": 4.04066682591732e-06, + "loss": 0.8705, + "step": 132530 + }, + { + "epoch": 0.9594127994093249, + "grad_norm": 0.14744049310684204, + "learning_rate": 4.040594439256735e-06, + "loss": 0.8626, + "step": 132540 + }, + { + "epoch": 0.9594851860699111, + "grad_norm": 0.1580532044172287, + "learning_rate": 4.040522052596148e-06, + "loss": 0.8651, + "step": 132550 + }, + { + "epoch": 0.9595575727304972, + "grad_norm": 0.14146533608436584, + "learning_rate": 4.040449665935562e-06, + "loss": 0.8675, + "step": 132560 + }, + { + "epoch": 0.9596299593910834, + "grad_norm": 0.15694855153560638, + "learning_rate": 4.0403772792749754e-06, + "loss": 0.8776, + "step": 132570 + }, + { + "epoch": 0.9597023460516696, + "grad_norm": 0.15668827295303345, + "learning_rate": 4.04030489261439e-06, + "loss": 0.8643, + "step": 132580 + }, + { + "epoch": 0.9597747327122558, + "grad_norm": 0.15416784584522247, + "learning_rate": 4.0402325059538035e-06, + "loss": 0.8595, + "step": 132590 + }, + { + "epoch": 0.959847119372842, + "grad_norm": 0.15903611481189728, + "learning_rate": 4.040160119293217e-06, + "loss": 0.8715, + "step": 132600 + }, + { + "epoch": 0.9599195060334281, + "grad_norm": 0.14831218123435974, + "learning_rate": 4.040087732632631e-06, + "loss": 0.8719, + "step": 132610 + }, + { + "epoch": 0.9599918926940143, + "grad_norm": 0.16342884302139282, + "learning_rate": 4.040015345972045e-06, + "loss": 0.8718, + "step": 132620 + }, + { + "epoch": 0.9600642793546005, + "grad_norm": 0.16396649181842804, + "learning_rate": 4.039942959311459e-06, + "loss": 0.8556, + "step": 132630 + }, + { + "epoch": 0.9601366660151868, + "grad_norm": 0.16489477455615997, + "learning_rate": 4.0398705726508724e-06, + "loss": 0.8738, + "step": 132640 + }, + { + "epoch": 0.960209052675773, + "grad_norm": 0.14648328721523285, + "learning_rate": 4.039798185990286e-06, + "loss": 0.882, + "step": 132650 + }, + { + "epoch": 0.9602814393363591, + "grad_norm": 0.15724149346351624, + "learning_rate": 4.0397257993297005e-06, + "loss": 0.8638, + "step": 132660 + }, + { + "epoch": 0.9603538259969453, + "grad_norm": 0.15346843004226685, + "learning_rate": 4.039653412669114e-06, + "loss": 0.8824, + "step": 132670 + }, + { + "epoch": 0.9604262126575315, + "grad_norm": 0.15470701456069946, + "learning_rate": 4.039581026008528e-06, + "loss": 0.8663, + "step": 132680 + }, + { + "epoch": 0.9604985993181177, + "grad_norm": 0.14876993000507355, + "learning_rate": 4.039508639347941e-06, + "loss": 0.8618, + "step": 132690 + }, + { + "epoch": 0.9605709859787038, + "grad_norm": 0.14748069643974304, + "learning_rate": 4.039436252687355e-06, + "loss": 0.8742, + "step": 132700 + }, + { + "epoch": 0.96064337263929, + "grad_norm": 0.14472724497318268, + "learning_rate": 4.039363866026769e-06, + "loss": 0.8591, + "step": 132710 + }, + { + "epoch": 0.9607157592998762, + "grad_norm": 0.16464345157146454, + "learning_rate": 4.039291479366182e-06, + "loss": 0.868, + "step": 132720 + }, + { + "epoch": 0.9607881459604624, + "grad_norm": 0.15630069375038147, + "learning_rate": 4.039219092705597e-06, + "loss": 0.8675, + "step": 132730 + }, + { + "epoch": 0.9608605326210486, + "grad_norm": 0.1542377769947052, + "learning_rate": 4.03914670604501e-06, + "loss": 0.8643, + "step": 132740 + }, + { + "epoch": 0.9609329192816348, + "grad_norm": 0.15142075717449188, + "learning_rate": 4.039074319384424e-06, + "loss": 0.8583, + "step": 132750 + }, + { + "epoch": 0.961005305942221, + "grad_norm": 0.14970579743385315, + "learning_rate": 4.0390019327238375e-06, + "loss": 0.8726, + "step": 132760 + }, + { + "epoch": 0.9610776926028072, + "grad_norm": 0.1593393087387085, + "learning_rate": 4.038929546063252e-06, + "loss": 0.8746, + "step": 132770 + }, + { + "epoch": 0.9611500792633934, + "grad_norm": 0.154205784201622, + "learning_rate": 4.038857159402666e-06, + "loss": 0.8675, + "step": 132780 + }, + { + "epoch": 0.9612224659239795, + "grad_norm": 0.15986141562461853, + "learning_rate": 4.038784772742079e-06, + "loss": 0.8697, + "step": 132790 + }, + { + "epoch": 0.9612948525845657, + "grad_norm": 0.1516096591949463, + "learning_rate": 4.038712386081493e-06, + "loss": 0.8603, + "step": 132800 + }, + { + "epoch": 0.9613672392451519, + "grad_norm": 0.14772486686706543, + "learning_rate": 4.038639999420907e-06, + "loss": 0.8557, + "step": 132810 + }, + { + "epoch": 0.9614396259057381, + "grad_norm": 0.152225062251091, + "learning_rate": 4.038567612760321e-06, + "loss": 0.8603, + "step": 132820 + }, + { + "epoch": 0.9615120125663242, + "grad_norm": 0.1596631109714508, + "learning_rate": 4.0384952260997345e-06, + "loss": 0.8656, + "step": 132830 + }, + { + "epoch": 0.9615843992269104, + "grad_norm": 0.15353646874427795, + "learning_rate": 4.038422839439148e-06, + "loss": 0.8606, + "step": 132840 + }, + { + "epoch": 0.9616567858874967, + "grad_norm": 0.15093198418617249, + "learning_rate": 4.038350452778563e-06, + "loss": 0.8651, + "step": 132850 + }, + { + "epoch": 0.9617291725480829, + "grad_norm": 0.15121009945869446, + "learning_rate": 4.038278066117976e-06, + "loss": 0.8747, + "step": 132860 + }, + { + "epoch": 0.961801559208669, + "grad_norm": 0.142518550157547, + "learning_rate": 4.03820567945739e-06, + "loss": 0.8627, + "step": 132870 + }, + { + "epoch": 0.9618739458692552, + "grad_norm": 0.14168381690979004, + "learning_rate": 4.0381332927968035e-06, + "loss": 0.872, + "step": 132880 + }, + { + "epoch": 0.9619463325298414, + "grad_norm": 0.16068200767040253, + "learning_rate": 4.038060906136217e-06, + "loss": 0.8666, + "step": 132890 + }, + { + "epoch": 0.9620187191904276, + "grad_norm": 0.15603065490722656, + "learning_rate": 4.0379885194756315e-06, + "loss": 0.8768, + "step": 132900 + }, + { + "epoch": 0.9620911058510138, + "grad_norm": 0.16330550611019135, + "learning_rate": 4.037916132815045e-06, + "loss": 0.8713, + "step": 132910 + }, + { + "epoch": 0.9621634925115999, + "grad_norm": 0.15388086438179016, + "learning_rate": 4.037843746154459e-06, + "loss": 0.8824, + "step": 132920 + }, + { + "epoch": 0.9622358791721861, + "grad_norm": 0.1486055701971054, + "learning_rate": 4.037771359493872e-06, + "loss": 0.8652, + "step": 132930 + }, + { + "epoch": 0.9623082658327723, + "grad_norm": 0.16068662703037262, + "learning_rate": 4.037698972833287e-06, + "loss": 0.8553, + "step": 132940 + }, + { + "epoch": 0.9623806524933586, + "grad_norm": 0.14844128489494324, + "learning_rate": 4.0376265861727005e-06, + "loss": 0.8692, + "step": 132950 + }, + { + "epoch": 0.9624530391539448, + "grad_norm": 0.1547292321920395, + "learning_rate": 4.037554199512114e-06, + "loss": 0.8598, + "step": 132960 + }, + { + "epoch": 0.9625254258145309, + "grad_norm": 0.14296914637088776, + "learning_rate": 4.037481812851528e-06, + "loss": 0.8745, + "step": 132970 + }, + { + "epoch": 0.9625978124751171, + "grad_norm": 0.1457308977842331, + "learning_rate": 4.037409426190942e-06, + "loss": 0.8685, + "step": 132980 + }, + { + "epoch": 0.9626701991357033, + "grad_norm": 0.16108804941177368, + "learning_rate": 4.037337039530356e-06, + "loss": 0.8675, + "step": 132990 + }, + { + "epoch": 0.9627425857962895, + "grad_norm": 0.1842719167470932, + "learning_rate": 4.037264652869769e-06, + "loss": 0.8723, + "step": 133000 + }, + { + "epoch": 0.9628149724568756, + "grad_norm": 0.1494869738817215, + "learning_rate": 4.037192266209183e-06, + "loss": 0.8629, + "step": 133010 + }, + { + "epoch": 0.9628873591174618, + "grad_norm": 0.1545685976743698, + "learning_rate": 4.0371198795485975e-06, + "loss": 0.8732, + "step": 133020 + }, + { + "epoch": 0.962959745778048, + "grad_norm": 0.1837606430053711, + "learning_rate": 4.037047492888011e-06, + "loss": 0.8715, + "step": 133030 + }, + { + "epoch": 0.9630321324386342, + "grad_norm": 0.15043866634368896, + "learning_rate": 4.036975106227425e-06, + "loss": 0.8642, + "step": 133040 + }, + { + "epoch": 0.9631045190992203, + "grad_norm": 0.1640947014093399, + "learning_rate": 4.036902719566838e-06, + "loss": 0.8633, + "step": 133050 + }, + { + "epoch": 0.9631769057598066, + "grad_norm": 0.16168645024299622, + "learning_rate": 4.036830332906253e-06, + "loss": 0.8725, + "step": 133060 + }, + { + "epoch": 0.9632492924203928, + "grad_norm": 0.16159774363040924, + "learning_rate": 4.036757946245666e-06, + "loss": 0.8642, + "step": 133070 + }, + { + "epoch": 0.963321679080979, + "grad_norm": 0.19764064252376556, + "learning_rate": 4.03668555958508e-06, + "loss": 0.8708, + "step": 133080 + }, + { + "epoch": 0.9633940657415652, + "grad_norm": 0.16055376827716827, + "learning_rate": 4.036613172924494e-06, + "loss": 0.8577, + "step": 133090 + }, + { + "epoch": 0.9634664524021513, + "grad_norm": 0.14640270173549652, + "learning_rate": 4.036540786263908e-06, + "loss": 0.8696, + "step": 133100 + }, + { + "epoch": 0.9635388390627375, + "grad_norm": 0.18850268423557281, + "learning_rate": 4.036468399603322e-06, + "loss": 0.8629, + "step": 133110 + }, + { + "epoch": 0.9636112257233237, + "grad_norm": 0.16357038915157318, + "learning_rate": 4.036396012942735e-06, + "loss": 0.8767, + "step": 133120 + }, + { + "epoch": 0.9636836123839099, + "grad_norm": 0.17244000732898712, + "learning_rate": 4.036323626282149e-06, + "loss": 0.8624, + "step": 133130 + }, + { + "epoch": 0.963755999044496, + "grad_norm": 0.15212216973304749, + "learning_rate": 4.036251239621563e-06, + "loss": 0.8561, + "step": 133140 + }, + { + "epoch": 0.9638283857050822, + "grad_norm": 0.1720954179763794, + "learning_rate": 4.036178852960977e-06, + "loss": 0.8683, + "step": 133150 + }, + { + "epoch": 0.9639007723656684, + "grad_norm": 0.21422089636325836, + "learning_rate": 4.036106466300391e-06, + "loss": 0.8845, + "step": 133160 + }, + { + "epoch": 0.9639731590262547, + "grad_norm": 0.15769261121749878, + "learning_rate": 4.036034079639804e-06, + "loss": 0.8543, + "step": 133170 + }, + { + "epoch": 0.9640455456868409, + "grad_norm": 0.15512368083000183, + "learning_rate": 4.035961692979219e-06, + "loss": 0.8571, + "step": 133180 + }, + { + "epoch": 0.964117932347427, + "grad_norm": 0.16007642447948456, + "learning_rate": 4.035889306318632e-06, + "loss": 0.8621, + "step": 133190 + }, + { + "epoch": 0.9641903190080132, + "grad_norm": 0.16909575462341309, + "learning_rate": 4.035816919658046e-06, + "loss": 0.8751, + "step": 133200 + }, + { + "epoch": 0.9642627056685994, + "grad_norm": 0.158931702375412, + "learning_rate": 4.0357445329974596e-06, + "loss": 0.8655, + "step": 133210 + }, + { + "epoch": 0.9643350923291856, + "grad_norm": 0.16062036156654358, + "learning_rate": 4.035672146336873e-06, + "loss": 0.8705, + "step": 133220 + }, + { + "epoch": 0.9644074789897717, + "grad_norm": 0.15879908204078674, + "learning_rate": 4.035599759676287e-06, + "loss": 0.8609, + "step": 133230 + }, + { + "epoch": 0.9644798656503579, + "grad_norm": 0.15936613082885742, + "learning_rate": 4.0355273730157e-06, + "loss": 0.8663, + "step": 133240 + }, + { + "epoch": 0.9645522523109441, + "grad_norm": 0.159668430685997, + "learning_rate": 4.035454986355115e-06, + "loss": 0.8778, + "step": 133250 + }, + { + "epoch": 0.9646246389715303, + "grad_norm": 0.15538303554058075, + "learning_rate": 4.0353825996945285e-06, + "loss": 0.8709, + "step": 133260 + }, + { + "epoch": 0.9646970256321166, + "grad_norm": 0.19383670389652252, + "learning_rate": 4.035310213033942e-06, + "loss": 0.8635, + "step": 133270 + }, + { + "epoch": 0.9647694122927027, + "grad_norm": 0.14985784888267517, + "learning_rate": 4.035237826373356e-06, + "loss": 0.8639, + "step": 133280 + }, + { + "epoch": 0.9648417989532889, + "grad_norm": 0.15540927648544312, + "learning_rate": 4.03516543971277e-06, + "loss": 0.8737, + "step": 133290 + }, + { + "epoch": 0.9649141856138751, + "grad_norm": 0.16484995186328888, + "learning_rate": 4.035093053052184e-06, + "loss": 0.8722, + "step": 133300 + }, + { + "epoch": 0.9649865722744613, + "grad_norm": 0.15824109315872192, + "learning_rate": 4.0350206663915974e-06, + "loss": 0.8558, + "step": 133310 + }, + { + "epoch": 0.9650589589350475, + "grad_norm": 0.16974005103111267, + "learning_rate": 4.034948279731011e-06, + "loss": 0.8753, + "step": 133320 + }, + { + "epoch": 0.9651313455956336, + "grad_norm": 0.1548565924167633, + "learning_rate": 4.0348758930704255e-06, + "loss": 0.872, + "step": 133330 + }, + { + "epoch": 0.9652037322562198, + "grad_norm": 0.16429255902767181, + "learning_rate": 4.034803506409839e-06, + "loss": 0.8556, + "step": 133340 + }, + { + "epoch": 0.965276118916806, + "grad_norm": 0.16265404224395752, + "learning_rate": 4.034731119749253e-06, + "loss": 0.8644, + "step": 133350 + }, + { + "epoch": 0.9653485055773922, + "grad_norm": 0.15244075655937195, + "learning_rate": 4.034658733088666e-06, + "loss": 0.8821, + "step": 133360 + }, + { + "epoch": 0.9654208922379783, + "grad_norm": 0.14360937476158142, + "learning_rate": 4.034586346428081e-06, + "loss": 0.8896, + "step": 133370 + }, + { + "epoch": 0.9654932788985646, + "grad_norm": 0.22526615858078003, + "learning_rate": 4.0345139597674944e-06, + "loss": 0.8681, + "step": 133380 + }, + { + "epoch": 0.9655656655591508, + "grad_norm": 0.14955149590969086, + "learning_rate": 4.034441573106908e-06, + "loss": 0.8433, + "step": 133390 + }, + { + "epoch": 0.965638052219737, + "grad_norm": 0.1530320793390274, + "learning_rate": 4.034369186446322e-06, + "loss": 0.8705, + "step": 133400 + }, + { + "epoch": 0.9657104388803232, + "grad_norm": 0.15504902601242065, + "learning_rate": 4.034296799785736e-06, + "loss": 0.8548, + "step": 133410 + }, + { + "epoch": 0.9657828255409093, + "grad_norm": 0.1458277404308319, + "learning_rate": 4.03422441312515e-06, + "loss": 0.8773, + "step": 133420 + }, + { + "epoch": 0.9658552122014955, + "grad_norm": 0.14981764554977417, + "learning_rate": 4.034152026464563e-06, + "loss": 0.8849, + "step": 133430 + }, + { + "epoch": 0.9659275988620817, + "grad_norm": 0.15874448418617249, + "learning_rate": 4.034079639803977e-06, + "loss": 0.8573, + "step": 133440 + }, + { + "epoch": 0.9659999855226679, + "grad_norm": 0.1638738363981247, + "learning_rate": 4.0340072531433914e-06, + "loss": 0.859, + "step": 133450 + }, + { + "epoch": 0.966072372183254, + "grad_norm": 0.15449586510658264, + "learning_rate": 4.033934866482805e-06, + "loss": 0.8728, + "step": 133460 + }, + { + "epoch": 0.9661447588438402, + "grad_norm": 0.148344486951828, + "learning_rate": 4.033862479822219e-06, + "loss": 0.8585, + "step": 133470 + }, + { + "epoch": 0.9662171455044264, + "grad_norm": 0.14319495856761932, + "learning_rate": 4.033790093161632e-06, + "loss": 0.8753, + "step": 133480 + }, + { + "epoch": 0.9662895321650127, + "grad_norm": 0.16567128896713257, + "learning_rate": 4.033717706501046e-06, + "loss": 0.8751, + "step": 133490 + }, + { + "epoch": 0.9663619188255989, + "grad_norm": 0.14397595822811127, + "learning_rate": 4.03364531984046e-06, + "loss": 0.8629, + "step": 133500 + }, + { + "epoch": 0.966434305486185, + "grad_norm": 0.15650872886180878, + "learning_rate": 4.033572933179874e-06, + "loss": 0.8647, + "step": 133510 + }, + { + "epoch": 0.9665066921467712, + "grad_norm": 0.15653972327709198, + "learning_rate": 4.033500546519288e-06, + "loss": 0.8722, + "step": 133520 + }, + { + "epoch": 0.9665790788073574, + "grad_norm": 0.15006305277347565, + "learning_rate": 4.033428159858701e-06, + "loss": 0.8495, + "step": 133530 + }, + { + "epoch": 0.9666514654679436, + "grad_norm": 0.14864414930343628, + "learning_rate": 4.033355773198116e-06, + "loss": 0.8673, + "step": 133540 + }, + { + "epoch": 0.9667238521285297, + "grad_norm": 0.14984916150569916, + "learning_rate": 4.033283386537529e-06, + "loss": 0.8704, + "step": 133550 + }, + { + "epoch": 0.9667962387891159, + "grad_norm": 0.14670242369174957, + "learning_rate": 4.033210999876943e-06, + "loss": 0.8673, + "step": 133560 + }, + { + "epoch": 0.9668686254497021, + "grad_norm": 0.1624784618616104, + "learning_rate": 4.0331386132163565e-06, + "loss": 0.863, + "step": 133570 + }, + { + "epoch": 0.9669410121102883, + "grad_norm": 0.1424523890018463, + "learning_rate": 4.033066226555771e-06, + "loss": 0.8602, + "step": 133580 + }, + { + "epoch": 0.9670133987708746, + "grad_norm": 0.17853686213493347, + "learning_rate": 4.032993839895185e-06, + "loss": 0.8608, + "step": 133590 + }, + { + "epoch": 0.9670857854314607, + "grad_norm": 0.15653735399246216, + "learning_rate": 4.032921453234598e-06, + "loss": 0.8705, + "step": 133600 + }, + { + "epoch": 0.9671581720920469, + "grad_norm": 0.14804360270500183, + "learning_rate": 4.032849066574012e-06, + "loss": 0.8701, + "step": 133610 + }, + { + "epoch": 0.9672305587526331, + "grad_norm": 0.1479026824235916, + "learning_rate": 4.032776679913426e-06, + "loss": 0.8601, + "step": 133620 + }, + { + "epoch": 0.9673029454132193, + "grad_norm": 0.15589579939842224, + "learning_rate": 4.03270429325284e-06, + "loss": 0.8698, + "step": 133630 + }, + { + "epoch": 0.9673753320738054, + "grad_norm": 0.14816348254680634, + "learning_rate": 4.0326319065922535e-06, + "loss": 0.8706, + "step": 133640 + }, + { + "epoch": 0.9674477187343916, + "grad_norm": 0.15198203921318054, + "learning_rate": 4.032559519931667e-06, + "loss": 0.8615, + "step": 133650 + }, + { + "epoch": 0.9675201053949778, + "grad_norm": 0.16135896742343903, + "learning_rate": 4.032487133271082e-06, + "loss": 0.8692, + "step": 133660 + }, + { + "epoch": 0.967592492055564, + "grad_norm": 0.1466723084449768, + "learning_rate": 4.032414746610495e-06, + "loss": 0.8766, + "step": 133670 + }, + { + "epoch": 0.9676648787161501, + "grad_norm": 0.1564931869506836, + "learning_rate": 4.032342359949909e-06, + "loss": 0.8609, + "step": 133680 + }, + { + "epoch": 0.9677372653767363, + "grad_norm": 0.18006184697151184, + "learning_rate": 4.0322699732893225e-06, + "loss": 0.8721, + "step": 133690 + }, + { + "epoch": 0.9678096520373226, + "grad_norm": 0.1543225795030594, + "learning_rate": 4.032197586628737e-06, + "loss": 0.8594, + "step": 133700 + }, + { + "epoch": 0.9678820386979088, + "grad_norm": 0.14942817389965057, + "learning_rate": 4.0321251999681505e-06, + "loss": 0.8686, + "step": 133710 + }, + { + "epoch": 0.967954425358495, + "grad_norm": 0.1509200930595398, + "learning_rate": 4.032052813307564e-06, + "loss": 0.8603, + "step": 133720 + }, + { + "epoch": 0.9680268120190811, + "grad_norm": 0.20642779767513275, + "learning_rate": 4.031980426646978e-06, + "loss": 0.8635, + "step": 133730 + }, + { + "epoch": 0.9680991986796673, + "grad_norm": 0.1582535356283188, + "learning_rate": 4.031908039986392e-06, + "loss": 0.8653, + "step": 133740 + }, + { + "epoch": 0.9681715853402535, + "grad_norm": 0.15433159470558167, + "learning_rate": 4.031835653325805e-06, + "loss": 0.866, + "step": 133750 + }, + { + "epoch": 0.9682439720008397, + "grad_norm": 0.14288122951984406, + "learning_rate": 4.031763266665219e-06, + "loss": 0.8631, + "step": 133760 + }, + { + "epoch": 0.9683163586614258, + "grad_norm": 0.14594586193561554, + "learning_rate": 4.031690880004633e-06, + "loss": 0.8562, + "step": 133770 + }, + { + "epoch": 0.968388745322012, + "grad_norm": 0.1446864902973175, + "learning_rate": 4.031618493344047e-06, + "loss": 0.8636, + "step": 133780 + }, + { + "epoch": 0.9684611319825982, + "grad_norm": 0.15992321074008942, + "learning_rate": 4.03154610668346e-06, + "loss": 0.8636, + "step": 133790 + }, + { + "epoch": 0.9685335186431845, + "grad_norm": 0.15092787146568298, + "learning_rate": 4.031473720022874e-06, + "loss": 0.8564, + "step": 133800 + }, + { + "epoch": 0.9686059053037707, + "grad_norm": 0.1624569296836853, + "learning_rate": 4.031401333362288e-06, + "loss": 0.8754, + "step": 133810 + }, + { + "epoch": 0.9686782919643568, + "grad_norm": 0.14972303807735443, + "learning_rate": 4.031328946701702e-06, + "loss": 0.852, + "step": 133820 + }, + { + "epoch": 0.968750678624943, + "grad_norm": 0.1559324413537979, + "learning_rate": 4.031256560041116e-06, + "loss": 0.8814, + "step": 133830 + }, + { + "epoch": 0.9688230652855292, + "grad_norm": 0.14922378957271576, + "learning_rate": 4.031184173380529e-06, + "loss": 0.8717, + "step": 133840 + }, + { + "epoch": 0.9688954519461154, + "grad_norm": 0.15362094342708588, + "learning_rate": 4.031111786719944e-06, + "loss": 0.8554, + "step": 133850 + }, + { + "epoch": 0.9689678386067015, + "grad_norm": 0.16382701694965363, + "learning_rate": 4.031039400059357e-06, + "loss": 0.8724, + "step": 133860 + }, + { + "epoch": 0.9690402252672877, + "grad_norm": 0.15398184955120087, + "learning_rate": 4.030967013398771e-06, + "loss": 0.8605, + "step": 133870 + }, + { + "epoch": 0.9691126119278739, + "grad_norm": 0.16252100467681885, + "learning_rate": 4.0308946267381846e-06, + "loss": 0.8688, + "step": 133880 + }, + { + "epoch": 0.9691849985884601, + "grad_norm": 0.15495038032531738, + "learning_rate": 4.030822240077599e-06, + "loss": 0.8642, + "step": 133890 + }, + { + "epoch": 0.9692573852490463, + "grad_norm": 0.15822090208530426, + "learning_rate": 4.030749853417013e-06, + "loss": 0.8567, + "step": 133900 + }, + { + "epoch": 0.9693297719096325, + "grad_norm": 0.1519637256860733, + "learning_rate": 4.030677466756426e-06, + "loss": 0.8769, + "step": 133910 + }, + { + "epoch": 0.9694021585702187, + "grad_norm": 0.1706445962190628, + "learning_rate": 4.03060508009584e-06, + "loss": 0.861, + "step": 133920 + }, + { + "epoch": 0.9694745452308049, + "grad_norm": 0.15888677537441254, + "learning_rate": 4.030532693435254e-06, + "loss": 0.8584, + "step": 133930 + }, + { + "epoch": 0.9695469318913911, + "grad_norm": 0.18062569200992584, + "learning_rate": 4.030460306774668e-06, + "loss": 0.8679, + "step": 133940 + }, + { + "epoch": 0.9696193185519772, + "grad_norm": 0.1512601226568222, + "learning_rate": 4.0303879201140816e-06, + "loss": 0.8515, + "step": 133950 + }, + { + "epoch": 0.9696917052125634, + "grad_norm": 0.1415674239397049, + "learning_rate": 4.030315533453495e-06, + "loss": 0.8646, + "step": 133960 + }, + { + "epoch": 0.9697640918731496, + "grad_norm": 0.1439589112997055, + "learning_rate": 4.03024314679291e-06, + "loss": 0.8688, + "step": 133970 + }, + { + "epoch": 0.9698364785337358, + "grad_norm": 0.17175428569316864, + "learning_rate": 4.030170760132323e-06, + "loss": 0.8702, + "step": 133980 + }, + { + "epoch": 0.969908865194322, + "grad_norm": 0.15791040658950806, + "learning_rate": 4.030098373471737e-06, + "loss": 0.8602, + "step": 133990 + }, + { + "epoch": 0.9699812518549081, + "grad_norm": 0.163300022482872, + "learning_rate": 4.0300259868111505e-06, + "loss": 0.8759, + "step": 134000 + }, + { + "epoch": 0.9700536385154943, + "grad_norm": 0.15517699718475342, + "learning_rate": 4.029953600150565e-06, + "loss": 0.8717, + "step": 134010 + }, + { + "epoch": 0.9701260251760806, + "grad_norm": 0.15258009731769562, + "learning_rate": 4.029881213489979e-06, + "loss": 0.8838, + "step": 134020 + }, + { + "epoch": 0.9701984118366668, + "grad_norm": 0.16164755821228027, + "learning_rate": 4.029808826829392e-06, + "loss": 0.8635, + "step": 134030 + }, + { + "epoch": 0.970270798497253, + "grad_norm": 0.1693074256181717, + "learning_rate": 4.029736440168806e-06, + "loss": 0.8551, + "step": 134040 + }, + { + "epoch": 0.9703431851578391, + "grad_norm": 0.15383243560791016, + "learning_rate": 4.02966405350822e-06, + "loss": 0.87, + "step": 134050 + }, + { + "epoch": 0.9704155718184253, + "grad_norm": 0.1397397667169571, + "learning_rate": 4.029591666847634e-06, + "loss": 0.8602, + "step": 134060 + }, + { + "epoch": 0.9704879584790115, + "grad_norm": 0.15033358335494995, + "learning_rate": 4.0295192801870475e-06, + "loss": 0.858, + "step": 134070 + }, + { + "epoch": 0.9705603451395977, + "grad_norm": 0.1515614092350006, + "learning_rate": 4.029446893526461e-06, + "loss": 0.8869, + "step": 134080 + }, + { + "epoch": 0.9706327318001838, + "grad_norm": 0.1568913608789444, + "learning_rate": 4.029374506865876e-06, + "loss": 0.8814, + "step": 134090 + }, + { + "epoch": 0.97070511846077, + "grad_norm": 0.1536082923412323, + "learning_rate": 4.029302120205289e-06, + "loss": 0.8603, + "step": 134100 + }, + { + "epoch": 0.9707775051213562, + "grad_norm": 0.16278186440467834, + "learning_rate": 4.029229733544703e-06, + "loss": 0.8659, + "step": 134110 + }, + { + "epoch": 0.9708498917819425, + "grad_norm": 0.15788668394088745, + "learning_rate": 4.0291573468841164e-06, + "loss": 0.8711, + "step": 134120 + }, + { + "epoch": 0.9709222784425287, + "grad_norm": 0.14995813369750977, + "learning_rate": 4.02908496022353e-06, + "loss": 0.8567, + "step": 134130 + }, + { + "epoch": 0.9709946651031148, + "grad_norm": 0.1554499715566635, + "learning_rate": 4.0290125735629445e-06, + "loss": 0.8612, + "step": 134140 + }, + { + "epoch": 0.971067051763701, + "grad_norm": 0.15231886506080627, + "learning_rate": 4.028940186902358e-06, + "loss": 0.8668, + "step": 134150 + }, + { + "epoch": 0.9711394384242872, + "grad_norm": 0.16602960228919983, + "learning_rate": 4.028867800241772e-06, + "loss": 0.8603, + "step": 134160 + }, + { + "epoch": 0.9712118250848734, + "grad_norm": 0.15559101104736328, + "learning_rate": 4.028795413581185e-06, + "loss": 0.8724, + "step": 134170 + }, + { + "epoch": 0.9712842117454595, + "grad_norm": 0.15899375081062317, + "learning_rate": 4.0287230269206e-06, + "loss": 0.8588, + "step": 134180 + }, + { + "epoch": 0.9713565984060457, + "grad_norm": 0.1541290581226349, + "learning_rate": 4.0286506402600134e-06, + "loss": 0.8554, + "step": 134190 + }, + { + "epoch": 0.9714289850666319, + "grad_norm": 0.16547095775604248, + "learning_rate": 4.028578253599427e-06, + "loss": 0.8729, + "step": 134200 + }, + { + "epoch": 0.9715013717272181, + "grad_norm": 0.14189139008522034, + "learning_rate": 4.028505866938841e-06, + "loss": 0.8666, + "step": 134210 + }, + { + "epoch": 0.9715737583878042, + "grad_norm": 0.16001036763191223, + "learning_rate": 4.028433480278255e-06, + "loss": 0.87, + "step": 134220 + }, + { + "epoch": 0.9716461450483905, + "grad_norm": 0.163381889462471, + "learning_rate": 4.028361093617669e-06, + "loss": 0.8479, + "step": 134230 + }, + { + "epoch": 0.9717185317089767, + "grad_norm": 0.15338028967380524, + "learning_rate": 4.028288706957082e-06, + "loss": 0.8646, + "step": 134240 + }, + { + "epoch": 0.9717909183695629, + "grad_norm": 0.152128666639328, + "learning_rate": 4.028216320296496e-06, + "loss": 0.8672, + "step": 134250 + }, + { + "epoch": 0.9718633050301491, + "grad_norm": 0.1537809669971466, + "learning_rate": 4.0281439336359105e-06, + "loss": 0.8555, + "step": 134260 + }, + { + "epoch": 0.9719356916907352, + "grad_norm": 0.14656396210193634, + "learning_rate": 4.028071546975324e-06, + "loss": 0.8636, + "step": 134270 + }, + { + "epoch": 0.9720080783513214, + "grad_norm": 0.15450994670391083, + "learning_rate": 4.027999160314737e-06, + "loss": 0.8658, + "step": 134280 + }, + { + "epoch": 0.9720804650119076, + "grad_norm": 0.15121851861476898, + "learning_rate": 4.027926773654151e-06, + "loss": 0.8681, + "step": 134290 + }, + { + "epoch": 0.9721528516724938, + "grad_norm": 0.1601051241159439, + "learning_rate": 4.027854386993565e-06, + "loss": 0.864, + "step": 134300 + }, + { + "epoch": 0.9722252383330799, + "grad_norm": 0.14622844755649567, + "learning_rate": 4.0277820003329785e-06, + "loss": 0.8713, + "step": 134310 + }, + { + "epoch": 0.9722976249936661, + "grad_norm": 0.14876750111579895, + "learning_rate": 4.027709613672392e-06, + "loss": 0.8719, + "step": 134320 + }, + { + "epoch": 0.9723700116542524, + "grad_norm": 0.17520910501480103, + "learning_rate": 4.027637227011807e-06, + "loss": 0.8661, + "step": 134330 + }, + { + "epoch": 0.9724423983148386, + "grad_norm": 0.1505105048418045, + "learning_rate": 4.02756484035122e-06, + "loss": 0.8659, + "step": 134340 + }, + { + "epoch": 0.9725147849754248, + "grad_norm": 0.18113350868225098, + "learning_rate": 4.027492453690634e-06, + "loss": 0.8705, + "step": 134350 + }, + { + "epoch": 0.9725871716360109, + "grad_norm": 0.14961007237434387, + "learning_rate": 4.0274200670300475e-06, + "loss": 0.8694, + "step": 134360 + }, + { + "epoch": 0.9726595582965971, + "grad_norm": 0.14745749533176422, + "learning_rate": 4.027347680369462e-06, + "loss": 0.8727, + "step": 134370 + }, + { + "epoch": 0.9727319449571833, + "grad_norm": 0.14691106975078583, + "learning_rate": 4.0272752937088755e-06, + "loss": 0.8673, + "step": 134380 + }, + { + "epoch": 0.9728043316177695, + "grad_norm": 0.145847350358963, + "learning_rate": 4.027202907048289e-06, + "loss": 0.8603, + "step": 134390 + }, + { + "epoch": 0.9728767182783556, + "grad_norm": 0.16000697016716003, + "learning_rate": 4.027130520387703e-06, + "loss": 0.8592, + "step": 134400 + }, + { + "epoch": 0.9729491049389418, + "grad_norm": 0.15306724607944489, + "learning_rate": 4.027058133727117e-06, + "loss": 0.8608, + "step": 134410 + }, + { + "epoch": 0.973021491599528, + "grad_norm": 0.1496717482805252, + "learning_rate": 4.026985747066531e-06, + "loss": 0.8751, + "step": 134420 + }, + { + "epoch": 0.9730938782601142, + "grad_norm": 0.1527402400970459, + "learning_rate": 4.0269133604059445e-06, + "loss": 0.8678, + "step": 134430 + }, + { + "epoch": 0.9731662649207005, + "grad_norm": 0.1510053426027298, + "learning_rate": 4.026840973745358e-06, + "loss": 0.8683, + "step": 134440 + }, + { + "epoch": 0.9732386515812866, + "grad_norm": 0.157288059592247, + "learning_rate": 4.0267685870847725e-06, + "loss": 0.8571, + "step": 134450 + }, + { + "epoch": 0.9733110382418728, + "grad_norm": 0.1452171355485916, + "learning_rate": 4.026696200424186e-06, + "loss": 0.8649, + "step": 134460 + }, + { + "epoch": 0.973383424902459, + "grad_norm": 0.15778832137584686, + "learning_rate": 4.0266238137636e-06, + "loss": 0.8636, + "step": 134470 + }, + { + "epoch": 0.9734558115630452, + "grad_norm": 0.14281177520751953, + "learning_rate": 4.026551427103013e-06, + "loss": 0.8588, + "step": 134480 + }, + { + "epoch": 0.9735281982236313, + "grad_norm": 0.14823725819587708, + "learning_rate": 4.026479040442428e-06, + "loss": 0.8644, + "step": 134490 + }, + { + "epoch": 0.9736005848842175, + "grad_norm": 0.14900419116020203, + "learning_rate": 4.0264066537818415e-06, + "loss": 0.8796, + "step": 134500 + }, + { + "epoch": 0.9736729715448037, + "grad_norm": 0.1535101979970932, + "learning_rate": 4.026334267121255e-06, + "loss": 0.8621, + "step": 134510 + }, + { + "epoch": 0.9737453582053899, + "grad_norm": 0.1498989462852478, + "learning_rate": 4.026261880460669e-06, + "loss": 0.8546, + "step": 134520 + }, + { + "epoch": 0.973817744865976, + "grad_norm": 0.16558587551116943, + "learning_rate": 4.026189493800083e-06, + "loss": 0.8622, + "step": 134530 + }, + { + "epoch": 0.9738901315265622, + "grad_norm": 0.15267030894756317, + "learning_rate": 4.026117107139497e-06, + "loss": 0.8666, + "step": 134540 + }, + { + "epoch": 0.9739625181871485, + "grad_norm": 0.15370555222034454, + "learning_rate": 4.02604472047891e-06, + "loss": 0.8474, + "step": 134550 + }, + { + "epoch": 0.9740349048477347, + "grad_norm": 0.1497911959886551, + "learning_rate": 4.025972333818324e-06, + "loss": 0.8697, + "step": 134560 + }, + { + "epoch": 0.9741072915083209, + "grad_norm": 0.15965795516967773, + "learning_rate": 4.0258999471577385e-06, + "loss": 0.869, + "step": 134570 + }, + { + "epoch": 0.974179678168907, + "grad_norm": 0.14840437471866608, + "learning_rate": 4.025827560497152e-06, + "loss": 0.8601, + "step": 134580 + }, + { + "epoch": 0.9742520648294932, + "grad_norm": 0.15610599517822266, + "learning_rate": 4.025755173836566e-06, + "loss": 0.8606, + "step": 134590 + }, + { + "epoch": 0.9743244514900794, + "grad_norm": 0.14686964452266693, + "learning_rate": 4.025682787175979e-06, + "loss": 0.8566, + "step": 134600 + }, + { + "epoch": 0.9743968381506656, + "grad_norm": 0.16029107570648193, + "learning_rate": 4.025610400515394e-06, + "loss": 0.8528, + "step": 134610 + }, + { + "epoch": 0.9744692248112518, + "grad_norm": 0.15175573527812958, + "learning_rate": 4.025538013854807e-06, + "loss": 0.8688, + "step": 134620 + }, + { + "epoch": 0.9745416114718379, + "grad_norm": 0.16867345571517944, + "learning_rate": 4.025465627194221e-06, + "loss": 0.8564, + "step": 134630 + }, + { + "epoch": 0.9746139981324241, + "grad_norm": 0.1488535851240158, + "learning_rate": 4.025393240533635e-06, + "loss": 0.8589, + "step": 134640 + }, + { + "epoch": 0.9746863847930104, + "grad_norm": 0.147738978266716, + "learning_rate": 4.025320853873049e-06, + "loss": 0.8754, + "step": 134650 + }, + { + "epoch": 0.9747587714535966, + "grad_norm": 0.14682996273040771, + "learning_rate": 4.025248467212463e-06, + "loss": 0.8585, + "step": 134660 + }, + { + "epoch": 0.9748311581141827, + "grad_norm": 0.17877760529518127, + "learning_rate": 4.025176080551876e-06, + "loss": 0.8617, + "step": 134670 + }, + { + "epoch": 0.9749035447747689, + "grad_norm": 0.3457520008087158, + "learning_rate": 4.02510369389129e-06, + "loss": 0.8692, + "step": 134680 + }, + { + "epoch": 0.9749759314353551, + "grad_norm": 0.1764483004808426, + "learning_rate": 4.025031307230704e-06, + "loss": 0.8658, + "step": 134690 + }, + { + "epoch": 0.9750483180959413, + "grad_norm": 0.17922911047935486, + "learning_rate": 4.024958920570118e-06, + "loss": 0.8704, + "step": 134700 + }, + { + "epoch": 0.9751207047565275, + "grad_norm": 0.1529940515756607, + "learning_rate": 4.024886533909532e-06, + "loss": 0.8758, + "step": 134710 + }, + { + "epoch": 0.9751930914171136, + "grad_norm": 0.1562705934047699, + "learning_rate": 4.024814147248945e-06, + "loss": 0.8639, + "step": 134720 + }, + { + "epoch": 0.9752654780776998, + "grad_norm": 0.15895576775074005, + "learning_rate": 4.024741760588359e-06, + "loss": 0.8625, + "step": 134730 + }, + { + "epoch": 0.975337864738286, + "grad_norm": 0.14802898466587067, + "learning_rate": 4.024669373927773e-06, + "loss": 0.8735, + "step": 134740 + }, + { + "epoch": 0.9754102513988722, + "grad_norm": 0.17037954926490784, + "learning_rate": 4.024596987267187e-06, + "loss": 0.873, + "step": 134750 + }, + { + "epoch": 0.9754826380594585, + "grad_norm": 0.16572614014148712, + "learning_rate": 4.024524600606601e-06, + "loss": 0.859, + "step": 134760 + }, + { + "epoch": 0.9755550247200446, + "grad_norm": 0.15422946214675903, + "learning_rate": 4.024452213946014e-06, + "loss": 0.8608, + "step": 134770 + }, + { + "epoch": 0.9756274113806308, + "grad_norm": 0.14978817105293274, + "learning_rate": 4.024379827285429e-06, + "loss": 0.8539, + "step": 134780 + }, + { + "epoch": 0.975699798041217, + "grad_norm": 0.15020768344402313, + "learning_rate": 4.024307440624842e-06, + "loss": 0.8647, + "step": 134790 + }, + { + "epoch": 0.9757721847018032, + "grad_norm": 0.1630668342113495, + "learning_rate": 4.024235053964256e-06, + "loss": 0.8748, + "step": 134800 + }, + { + "epoch": 0.9758445713623893, + "grad_norm": 0.1647442728281021, + "learning_rate": 4.0241626673036695e-06, + "loss": 0.8916, + "step": 134810 + }, + { + "epoch": 0.9759169580229755, + "grad_norm": 0.16220815479755402, + "learning_rate": 4.024090280643083e-06, + "loss": 0.8647, + "step": 134820 + }, + { + "epoch": 0.9759893446835617, + "grad_norm": 0.15508753061294556, + "learning_rate": 4.024017893982497e-06, + "loss": 0.8776, + "step": 134830 + }, + { + "epoch": 0.9760617313441479, + "grad_norm": 0.14630411565303802, + "learning_rate": 4.023945507321911e-06, + "loss": 0.862, + "step": 134840 + }, + { + "epoch": 0.976134118004734, + "grad_norm": 0.16463924944400787, + "learning_rate": 4.023873120661325e-06, + "loss": 0.8738, + "step": 134850 + }, + { + "epoch": 0.9762065046653203, + "grad_norm": 0.14505267143249512, + "learning_rate": 4.0238007340007384e-06, + "loss": 0.8578, + "step": 134860 + }, + { + "epoch": 0.9762788913259065, + "grad_norm": 0.15662142634391785, + "learning_rate": 4.023728347340152e-06, + "loss": 0.8646, + "step": 134870 + }, + { + "epoch": 0.9763512779864927, + "grad_norm": 0.3217943012714386, + "learning_rate": 4.0236559606795665e-06, + "loss": 0.87, + "step": 134880 + }, + { + "epoch": 0.9764236646470789, + "grad_norm": 0.1467258632183075, + "learning_rate": 4.02358357401898e-06, + "loss": 0.8735, + "step": 134890 + }, + { + "epoch": 0.976496051307665, + "grad_norm": 0.1595139056444168, + "learning_rate": 4.023511187358394e-06, + "loss": 0.8677, + "step": 134900 + }, + { + "epoch": 0.9765684379682512, + "grad_norm": 0.16887742280960083, + "learning_rate": 4.023438800697807e-06, + "loss": 0.8625, + "step": 134910 + }, + { + "epoch": 0.9766408246288374, + "grad_norm": 0.2070261687040329, + "learning_rate": 4.023366414037221e-06, + "loss": 0.8599, + "step": 134920 + }, + { + "epoch": 0.9767132112894236, + "grad_norm": 0.1530458927154541, + "learning_rate": 4.0232940273766354e-06, + "loss": 0.859, + "step": 134930 + }, + { + "epoch": 0.9767855979500097, + "grad_norm": 0.17413754761219025, + "learning_rate": 4.023221640716049e-06, + "loss": 0.8744, + "step": 134940 + }, + { + "epoch": 0.9768579846105959, + "grad_norm": 0.14189285039901733, + "learning_rate": 4.023149254055463e-06, + "loss": 0.8678, + "step": 134950 + }, + { + "epoch": 0.9769303712711821, + "grad_norm": 0.16996034979820251, + "learning_rate": 4.023076867394876e-06, + "loss": 0.8654, + "step": 134960 + }, + { + "epoch": 0.9770027579317684, + "grad_norm": 0.15220043063163757, + "learning_rate": 4.023004480734291e-06, + "loss": 0.8705, + "step": 134970 + }, + { + "epoch": 0.9770751445923546, + "grad_norm": 0.1522054523229599, + "learning_rate": 4.022932094073704e-06, + "loss": 0.8677, + "step": 134980 + }, + { + "epoch": 0.9771475312529407, + "grad_norm": 0.14856263995170593, + "learning_rate": 4.022859707413118e-06, + "loss": 0.8604, + "step": 134990 + }, + { + "epoch": 0.9772199179135269, + "grad_norm": 0.17470620572566986, + "learning_rate": 4.022787320752532e-06, + "loss": 0.8735, + "step": 135000 + }, + { + "epoch": 0.9772923045741131, + "grad_norm": 0.16149313747882843, + "learning_rate": 4.022714934091946e-06, + "loss": 0.8694, + "step": 135010 + }, + { + "epoch": 0.9773646912346993, + "grad_norm": 0.1480475217103958, + "learning_rate": 4.02264254743136e-06, + "loss": 0.8597, + "step": 135020 + }, + { + "epoch": 0.9774370778952854, + "grad_norm": 0.16102159023284912, + "learning_rate": 4.022570160770773e-06, + "loss": 0.8735, + "step": 135030 + }, + { + "epoch": 0.9775094645558716, + "grad_norm": 0.14369907975196838, + "learning_rate": 4.022497774110187e-06, + "loss": 0.8643, + "step": 135040 + }, + { + "epoch": 0.9775818512164578, + "grad_norm": 0.1809437870979309, + "learning_rate": 4.022425387449601e-06, + "loss": 0.8584, + "step": 135050 + }, + { + "epoch": 0.977654237877044, + "grad_norm": 0.15481232106685638, + "learning_rate": 4.022353000789015e-06, + "loss": 0.8688, + "step": 135060 + }, + { + "epoch": 0.9777266245376302, + "grad_norm": 0.1535213589668274, + "learning_rate": 4.022280614128429e-06, + "loss": 0.8592, + "step": 135070 + }, + { + "epoch": 0.9777990111982164, + "grad_norm": 0.14975209534168243, + "learning_rate": 4.022208227467842e-06, + "loss": 0.8657, + "step": 135080 + }, + { + "epoch": 0.9778713978588026, + "grad_norm": 0.15290719270706177, + "learning_rate": 4.022135840807257e-06, + "loss": 0.8633, + "step": 135090 + }, + { + "epoch": 0.9779437845193888, + "grad_norm": 0.15675273537635803, + "learning_rate": 4.02206345414667e-06, + "loss": 0.8642, + "step": 135100 + }, + { + "epoch": 0.978016171179975, + "grad_norm": 0.17015263438224792, + "learning_rate": 4.021991067486084e-06, + "loss": 0.8693, + "step": 135110 + }, + { + "epoch": 0.9780885578405611, + "grad_norm": 0.15289832651615143, + "learning_rate": 4.0219186808254975e-06, + "loss": 0.8696, + "step": 135120 + }, + { + "epoch": 0.9781609445011473, + "grad_norm": 0.15227100253105164, + "learning_rate": 4.021846294164912e-06, + "loss": 0.8625, + "step": 135130 + }, + { + "epoch": 0.9782333311617335, + "grad_norm": 0.19440871477127075, + "learning_rate": 4.021773907504326e-06, + "loss": 0.8697, + "step": 135140 + }, + { + "epoch": 0.9783057178223197, + "grad_norm": 0.15097714960575104, + "learning_rate": 4.021701520843739e-06, + "loss": 0.854, + "step": 135150 + }, + { + "epoch": 0.9783781044829059, + "grad_norm": 0.16234318912029266, + "learning_rate": 4.021629134183153e-06, + "loss": 0.8774, + "step": 135160 + }, + { + "epoch": 0.978450491143492, + "grad_norm": 0.19647642970085144, + "learning_rate": 4.021556747522567e-06, + "loss": 0.8644, + "step": 135170 + }, + { + "epoch": 0.9785228778040783, + "grad_norm": 0.14601005613803864, + "learning_rate": 4.021484360861981e-06, + "loss": 0.8577, + "step": 135180 + }, + { + "epoch": 0.9785952644646645, + "grad_norm": 0.15095117688179016, + "learning_rate": 4.0214119742013945e-06, + "loss": 0.8604, + "step": 135190 + }, + { + "epoch": 0.9786676511252507, + "grad_norm": 0.14409467577934265, + "learning_rate": 4.021339587540808e-06, + "loss": 0.8597, + "step": 135200 + }, + { + "epoch": 0.9787400377858368, + "grad_norm": 0.14906567335128784, + "learning_rate": 4.021267200880223e-06, + "loss": 0.8643, + "step": 135210 + }, + { + "epoch": 0.978812424446423, + "grad_norm": 0.15724803507328033, + "learning_rate": 4.021194814219636e-06, + "loss": 0.8458, + "step": 135220 + }, + { + "epoch": 0.9788848111070092, + "grad_norm": 0.14980719983577728, + "learning_rate": 4.02112242755905e-06, + "loss": 0.8623, + "step": 135230 + }, + { + "epoch": 0.9789571977675954, + "grad_norm": 0.14800138771533966, + "learning_rate": 4.0210500408984635e-06, + "loss": 0.8602, + "step": 135240 + }, + { + "epoch": 0.9790295844281816, + "grad_norm": 0.24829064309597015, + "learning_rate": 4.020977654237878e-06, + "loss": 0.8596, + "step": 135250 + }, + { + "epoch": 0.9791019710887677, + "grad_norm": 0.159278005361557, + "learning_rate": 4.0209052675772916e-06, + "loss": 0.8695, + "step": 135260 + }, + { + "epoch": 0.9791743577493539, + "grad_norm": 0.16504423320293427, + "learning_rate": 4.020832880916705e-06, + "loss": 0.8636, + "step": 135270 + }, + { + "epoch": 0.9792467444099401, + "grad_norm": 0.1613035798072815, + "learning_rate": 4.020760494256119e-06, + "loss": 0.8675, + "step": 135280 + }, + { + "epoch": 0.9793191310705264, + "grad_norm": 0.16693058609962463, + "learning_rate": 4.020688107595533e-06, + "loss": 0.8688, + "step": 135290 + }, + { + "epoch": 0.9793915177311125, + "grad_norm": 0.14621016383171082, + "learning_rate": 4.020615720934947e-06, + "loss": 0.8563, + "step": 135300 + }, + { + "epoch": 0.9794639043916987, + "grad_norm": 0.15493078529834747, + "learning_rate": 4.0205433342743605e-06, + "loss": 0.865, + "step": 135310 + }, + { + "epoch": 0.9795362910522849, + "grad_norm": 0.1841004192829132, + "learning_rate": 4.020470947613774e-06, + "loss": 0.8632, + "step": 135320 + }, + { + "epoch": 0.9796086777128711, + "grad_norm": 0.1623116284608841, + "learning_rate": 4.0203985609531886e-06, + "loss": 0.864, + "step": 135330 + }, + { + "epoch": 0.9796810643734573, + "grad_norm": 0.14910165965557098, + "learning_rate": 4.020326174292601e-06, + "loss": 0.8692, + "step": 135340 + }, + { + "epoch": 0.9797534510340434, + "grad_norm": 0.17828305065631866, + "learning_rate": 4.020253787632015e-06, + "loss": 0.8739, + "step": 135350 + }, + { + "epoch": 0.9798258376946296, + "grad_norm": 0.1424138993024826, + "learning_rate": 4.020181400971429e-06, + "loss": 0.8643, + "step": 135360 + }, + { + "epoch": 0.9798982243552158, + "grad_norm": 0.16113391518592834, + "learning_rate": 4.020109014310843e-06, + "loss": 0.8732, + "step": 135370 + }, + { + "epoch": 0.979970611015802, + "grad_norm": 0.17208503186702728, + "learning_rate": 4.020036627650257e-06, + "loss": 0.85, + "step": 135380 + }, + { + "epoch": 0.9800429976763883, + "grad_norm": 0.17494194209575653, + "learning_rate": 4.01996424098967e-06, + "loss": 0.8593, + "step": 135390 + }, + { + "epoch": 0.9801153843369744, + "grad_norm": 0.15494690835475922, + "learning_rate": 4.019891854329085e-06, + "loss": 0.8766, + "step": 135400 + }, + { + "epoch": 0.9801877709975606, + "grad_norm": 0.15408842265605927, + "learning_rate": 4.019819467668498e-06, + "loss": 0.8711, + "step": 135410 + }, + { + "epoch": 0.9802601576581468, + "grad_norm": 0.15154585242271423, + "learning_rate": 4.019747081007912e-06, + "loss": 0.8571, + "step": 135420 + }, + { + "epoch": 0.980332544318733, + "grad_norm": 0.16975507140159607, + "learning_rate": 4.0196746943473256e-06, + "loss": 0.8818, + "step": 135430 + }, + { + "epoch": 0.9804049309793191, + "grad_norm": 0.14920401573181152, + "learning_rate": 4.01960230768674e-06, + "loss": 0.869, + "step": 135440 + }, + { + "epoch": 0.9804773176399053, + "grad_norm": 0.15514759719371796, + "learning_rate": 4.019529921026154e-06, + "loss": 0.8593, + "step": 135450 + }, + { + "epoch": 0.9805497043004915, + "grad_norm": 0.16312751173973083, + "learning_rate": 4.019457534365567e-06, + "loss": 0.8585, + "step": 135460 + }, + { + "epoch": 0.9806220909610777, + "grad_norm": 0.14903192222118378, + "learning_rate": 4.019385147704981e-06, + "loss": 0.8557, + "step": 135470 + }, + { + "epoch": 0.9806944776216638, + "grad_norm": 0.15658678114414215, + "learning_rate": 4.019312761044395e-06, + "loss": 0.8683, + "step": 135480 + }, + { + "epoch": 0.98076686428225, + "grad_norm": 0.15279506146907806, + "learning_rate": 4.019240374383809e-06, + "loss": 0.8585, + "step": 135490 + }, + { + "epoch": 0.9808392509428363, + "grad_norm": 0.14755424857139587, + "learning_rate": 4.019167987723223e-06, + "loss": 0.8533, + "step": 135500 + }, + { + "epoch": 0.9809116376034225, + "grad_norm": 0.14603859186172485, + "learning_rate": 4.019095601062636e-06, + "loss": 0.8648, + "step": 135510 + }, + { + "epoch": 0.9809840242640087, + "grad_norm": 0.1589314043521881, + "learning_rate": 4.01902321440205e-06, + "loss": 0.8613, + "step": 135520 + }, + { + "epoch": 0.9810564109245948, + "grad_norm": 0.1501326709985733, + "learning_rate": 4.018950827741464e-06, + "loss": 0.8706, + "step": 135530 + }, + { + "epoch": 0.981128797585181, + "grad_norm": 0.15923535823822021, + "learning_rate": 4.018878441080878e-06, + "loss": 0.8739, + "step": 135540 + }, + { + "epoch": 0.9812011842457672, + "grad_norm": 0.15568740665912628, + "learning_rate": 4.0188060544202915e-06, + "loss": 0.8517, + "step": 135550 + }, + { + "epoch": 0.9812735709063534, + "grad_norm": 0.1511840522289276, + "learning_rate": 4.018733667759705e-06, + "loss": 0.8474, + "step": 135560 + }, + { + "epoch": 0.9813459575669395, + "grad_norm": 0.155585378408432, + "learning_rate": 4.01866128109912e-06, + "loss": 0.8598, + "step": 135570 + }, + { + "epoch": 0.9814183442275257, + "grad_norm": 0.15840795636177063, + "learning_rate": 4.018588894438533e-06, + "loss": 0.8683, + "step": 135580 + }, + { + "epoch": 0.9814907308881119, + "grad_norm": 0.1394014209508896, + "learning_rate": 4.018516507777947e-06, + "loss": 0.8745, + "step": 135590 + }, + { + "epoch": 0.9815631175486981, + "grad_norm": 0.15405994653701782, + "learning_rate": 4.0184441211173604e-06, + "loss": 0.876, + "step": 135600 + }, + { + "epoch": 0.9816355042092844, + "grad_norm": 0.15901432931423187, + "learning_rate": 4.018371734456775e-06, + "loss": 0.8676, + "step": 135610 + }, + { + "epoch": 0.9817078908698705, + "grad_norm": 0.150760680437088, + "learning_rate": 4.0182993477961885e-06, + "loss": 0.8553, + "step": 135620 + }, + { + "epoch": 0.9817802775304567, + "grad_norm": 0.22623537480831146, + "learning_rate": 4.018226961135602e-06, + "loss": 0.861, + "step": 135630 + }, + { + "epoch": 0.9818526641910429, + "grad_norm": 0.15437884628772736, + "learning_rate": 4.018154574475016e-06, + "loss": 0.8691, + "step": 135640 + }, + { + "epoch": 0.9819250508516291, + "grad_norm": 0.15233850479125977, + "learning_rate": 4.01808218781443e-06, + "loss": 0.8692, + "step": 135650 + }, + { + "epoch": 0.9819974375122152, + "grad_norm": 0.1725022792816162, + "learning_rate": 4.018009801153844e-06, + "loss": 0.8626, + "step": 135660 + }, + { + "epoch": 0.9820698241728014, + "grad_norm": 0.5819795727729797, + "learning_rate": 4.0179374144932574e-06, + "loss": 0.8739, + "step": 135670 + }, + { + "epoch": 0.9821422108333876, + "grad_norm": 0.20204482972621918, + "learning_rate": 4.017865027832671e-06, + "loss": 0.8681, + "step": 135680 + }, + { + "epoch": 0.9822145974939738, + "grad_norm": 0.15785406529903412, + "learning_rate": 4.0177926411720855e-06, + "loss": 0.8545, + "step": 135690 + }, + { + "epoch": 0.98228698415456, + "grad_norm": 0.17018094658851624, + "learning_rate": 4.017720254511499e-06, + "loss": 0.8569, + "step": 135700 + }, + { + "epoch": 0.9823593708151462, + "grad_norm": 0.1424306482076645, + "learning_rate": 4.017647867850913e-06, + "loss": 0.8755, + "step": 135710 + }, + { + "epoch": 0.9824317574757324, + "grad_norm": 0.146108016371727, + "learning_rate": 4.017575481190326e-06, + "loss": 0.8748, + "step": 135720 + }, + { + "epoch": 0.9825041441363186, + "grad_norm": 0.16462048888206482, + "learning_rate": 4.017503094529741e-06, + "loss": 0.8623, + "step": 135730 + }, + { + "epoch": 0.9825765307969048, + "grad_norm": 0.14882566034793854, + "learning_rate": 4.0174307078691544e-06, + "loss": 0.8583, + "step": 135740 + }, + { + "epoch": 0.982648917457491, + "grad_norm": 0.15076982975006104, + "learning_rate": 4.017358321208568e-06, + "loss": 0.8668, + "step": 135750 + }, + { + "epoch": 0.9827213041180771, + "grad_norm": 0.14368361234664917, + "learning_rate": 4.017285934547982e-06, + "loss": 0.8635, + "step": 135760 + }, + { + "epoch": 0.9827936907786633, + "grad_norm": 0.16168615221977234, + "learning_rate": 4.017213547887396e-06, + "loss": 0.8649, + "step": 135770 + }, + { + "epoch": 0.9828660774392495, + "grad_norm": 0.1403905153274536, + "learning_rate": 4.01714116122681e-06, + "loss": 0.8717, + "step": 135780 + }, + { + "epoch": 0.9829384640998357, + "grad_norm": 0.14840847253799438, + "learning_rate": 4.017068774566223e-06, + "loss": 0.8564, + "step": 135790 + }, + { + "epoch": 0.9830108507604218, + "grad_norm": 0.174706369638443, + "learning_rate": 4.016996387905637e-06, + "loss": 0.8632, + "step": 135800 + }, + { + "epoch": 0.983083237421008, + "grad_norm": 0.16106009483337402, + "learning_rate": 4.0169240012450515e-06, + "loss": 0.8746, + "step": 135810 + }, + { + "epoch": 0.9831556240815943, + "grad_norm": 0.15594612061977386, + "learning_rate": 4.016851614584465e-06, + "loss": 0.8661, + "step": 135820 + }, + { + "epoch": 0.9832280107421805, + "grad_norm": 0.14947769045829773, + "learning_rate": 4.016779227923879e-06, + "loss": 0.8688, + "step": 135830 + }, + { + "epoch": 0.9833003974027666, + "grad_norm": 0.15984922647476196, + "learning_rate": 4.016706841263292e-06, + "loss": 0.8712, + "step": 135840 + }, + { + "epoch": 0.9833727840633528, + "grad_norm": 0.1563669741153717, + "learning_rate": 4.016634454602707e-06, + "loss": 0.8738, + "step": 135850 + }, + { + "epoch": 0.983445170723939, + "grad_norm": 0.15901511907577515, + "learning_rate": 4.01656206794212e-06, + "loss": 0.8672, + "step": 135860 + }, + { + "epoch": 0.9835175573845252, + "grad_norm": 0.15088501572608948, + "learning_rate": 4.016489681281533e-06, + "loss": 0.8752, + "step": 135870 + }, + { + "epoch": 0.9835899440451114, + "grad_norm": 0.4412364065647125, + "learning_rate": 4.016417294620948e-06, + "loss": 0.8594, + "step": 135880 + }, + { + "epoch": 0.9836623307056975, + "grad_norm": 0.14504173398017883, + "learning_rate": 4.016344907960361e-06, + "loss": 0.8658, + "step": 135890 + }, + { + "epoch": 0.9837347173662837, + "grad_norm": 0.15376900136470795, + "learning_rate": 4.016272521299775e-06, + "loss": 0.8752, + "step": 135900 + }, + { + "epoch": 0.9838071040268699, + "grad_norm": 0.42392078042030334, + "learning_rate": 4.0162001346391885e-06, + "loss": 0.8586, + "step": 135910 + }, + { + "epoch": 0.9838794906874562, + "grad_norm": 0.17398519814014435, + "learning_rate": 4.016127747978603e-06, + "loss": 0.8659, + "step": 135920 + }, + { + "epoch": 0.9839518773480423, + "grad_norm": 0.15040136873722076, + "learning_rate": 4.0160553613180165e-06, + "loss": 0.863, + "step": 135930 + }, + { + "epoch": 0.9840242640086285, + "grad_norm": 0.15446005761623383, + "learning_rate": 4.01598297465743e-06, + "loss": 0.8706, + "step": 135940 + }, + { + "epoch": 0.9840966506692147, + "grad_norm": 0.14991377294063568, + "learning_rate": 4.015910587996844e-06, + "loss": 0.8666, + "step": 135950 + }, + { + "epoch": 0.9841690373298009, + "grad_norm": 0.15501756966114044, + "learning_rate": 4.015838201336258e-06, + "loss": 0.867, + "step": 135960 + }, + { + "epoch": 0.984241423990387, + "grad_norm": 0.1403217762708664, + "learning_rate": 4.015765814675672e-06, + "loss": 0.8682, + "step": 135970 + }, + { + "epoch": 0.9843138106509732, + "grad_norm": 0.1542348712682724, + "learning_rate": 4.0156934280150855e-06, + "loss": 0.8656, + "step": 135980 + }, + { + "epoch": 0.9843861973115594, + "grad_norm": 0.16103000938892365, + "learning_rate": 4.015621041354499e-06, + "loss": 0.8642, + "step": 135990 + }, + { + "epoch": 0.9844585839721456, + "grad_norm": 0.14834414422512054, + "learning_rate": 4.0155486546939136e-06, + "loss": 0.8572, + "step": 136000 + }, + { + "epoch": 0.9845309706327318, + "grad_norm": 0.16515208780765533, + "learning_rate": 4.015476268033327e-06, + "loss": 0.8589, + "step": 136010 + }, + { + "epoch": 0.9846033572933179, + "grad_norm": 0.16281743347644806, + "learning_rate": 4.015403881372741e-06, + "loss": 0.867, + "step": 136020 + }, + { + "epoch": 0.9846757439539042, + "grad_norm": 0.14419202506542206, + "learning_rate": 4.015331494712154e-06, + "loss": 0.8527, + "step": 136030 + }, + { + "epoch": 0.9847481306144904, + "grad_norm": 0.16212014853954315, + "learning_rate": 4.015259108051569e-06, + "loss": 0.8629, + "step": 136040 + }, + { + "epoch": 0.9848205172750766, + "grad_norm": 0.16474084556102753, + "learning_rate": 4.0151867213909825e-06, + "loss": 0.8652, + "step": 136050 + }, + { + "epoch": 0.9848929039356628, + "grad_norm": 0.19735056161880493, + "learning_rate": 4.015114334730396e-06, + "loss": 0.8733, + "step": 136060 + }, + { + "epoch": 0.9849652905962489, + "grad_norm": 0.164767324924469, + "learning_rate": 4.01504194806981e-06, + "loss": 0.8641, + "step": 136070 + }, + { + "epoch": 0.9850376772568351, + "grad_norm": 0.14573654532432556, + "learning_rate": 4.014969561409224e-06, + "loss": 0.8648, + "step": 136080 + }, + { + "epoch": 0.9851100639174213, + "grad_norm": 0.15756194293498993, + "learning_rate": 4.014897174748638e-06, + "loss": 0.8658, + "step": 136090 + }, + { + "epoch": 0.9851824505780075, + "grad_norm": 0.16832998394966125, + "learning_rate": 4.014824788088051e-06, + "loss": 0.8561, + "step": 136100 + }, + { + "epoch": 0.9852548372385936, + "grad_norm": 0.1867925524711609, + "learning_rate": 4.014752401427465e-06, + "loss": 0.8723, + "step": 136110 + }, + { + "epoch": 0.9853272238991798, + "grad_norm": 0.1441367268562317, + "learning_rate": 4.0146800147668795e-06, + "loss": 0.8627, + "step": 136120 + }, + { + "epoch": 0.985399610559766, + "grad_norm": 0.1538180559873581, + "learning_rate": 4.014607628106293e-06, + "loss": 0.8647, + "step": 136130 + }, + { + "epoch": 0.9854719972203523, + "grad_norm": 0.15871921181678772, + "learning_rate": 4.014535241445707e-06, + "loss": 0.8555, + "step": 136140 + }, + { + "epoch": 0.9855443838809385, + "grad_norm": 0.15353409945964813, + "learning_rate": 4.01446285478512e-06, + "loss": 0.8575, + "step": 136150 + }, + { + "epoch": 0.9856167705415246, + "grad_norm": 0.15181076526641846, + "learning_rate": 4.014390468124534e-06, + "loss": 0.8714, + "step": 136160 + }, + { + "epoch": 0.9856891572021108, + "grad_norm": 0.15089160203933716, + "learning_rate": 4.014318081463948e-06, + "loss": 0.8763, + "step": 136170 + }, + { + "epoch": 0.985761543862697, + "grad_norm": 0.15391786396503448, + "learning_rate": 4.014245694803362e-06, + "loss": 0.8725, + "step": 136180 + }, + { + "epoch": 0.9858339305232832, + "grad_norm": 0.14188186824321747, + "learning_rate": 4.014173308142776e-06, + "loss": 0.8615, + "step": 136190 + }, + { + "epoch": 0.9859063171838693, + "grad_norm": 0.16253648698329926, + "learning_rate": 4.014100921482189e-06, + "loss": 0.8683, + "step": 136200 + }, + { + "epoch": 0.9859787038444555, + "grad_norm": 0.154387429356575, + "learning_rate": 4.014028534821604e-06, + "loss": 0.859, + "step": 136210 + }, + { + "epoch": 0.9860510905050417, + "grad_norm": 0.15761414170265198, + "learning_rate": 4.013956148161017e-06, + "loss": 0.8724, + "step": 136220 + }, + { + "epoch": 0.9861234771656279, + "grad_norm": 0.14925530552864075, + "learning_rate": 4.013883761500431e-06, + "loss": 0.8658, + "step": 136230 + }, + { + "epoch": 0.9861958638262142, + "grad_norm": 0.15609656274318695, + "learning_rate": 4.013811374839845e-06, + "loss": 0.8684, + "step": 136240 + }, + { + "epoch": 0.9862682504868003, + "grad_norm": 0.1742173135280609, + "learning_rate": 4.013738988179259e-06, + "loss": 0.8502, + "step": 136250 + }, + { + "epoch": 0.9863406371473865, + "grad_norm": 0.15762127935886383, + "learning_rate": 4.013666601518673e-06, + "loss": 0.8603, + "step": 136260 + }, + { + "epoch": 0.9864130238079727, + "grad_norm": 0.178837388753891, + "learning_rate": 4.013594214858086e-06, + "loss": 0.8645, + "step": 136270 + }, + { + "epoch": 0.9864854104685589, + "grad_norm": 0.16362306475639343, + "learning_rate": 4.0135218281975e-06, + "loss": 0.8626, + "step": 136280 + }, + { + "epoch": 0.986557797129145, + "grad_norm": 0.14631548523902893, + "learning_rate": 4.013449441536914e-06, + "loss": 0.8687, + "step": 136290 + }, + { + "epoch": 0.9866301837897312, + "grad_norm": 0.14602495729923248, + "learning_rate": 4.013377054876328e-06, + "loss": 0.8654, + "step": 136300 + }, + { + "epoch": 0.9867025704503174, + "grad_norm": 0.15310421586036682, + "learning_rate": 4.013304668215742e-06, + "loss": 0.8742, + "step": 136310 + }, + { + "epoch": 0.9867749571109036, + "grad_norm": 0.14649897813796997, + "learning_rate": 4.013232281555155e-06, + "loss": 0.8571, + "step": 136320 + }, + { + "epoch": 0.9868473437714897, + "grad_norm": 0.15480268001556396, + "learning_rate": 4.01315989489457e-06, + "loss": 0.8737, + "step": 136330 + }, + { + "epoch": 0.9869197304320759, + "grad_norm": 0.161812886595726, + "learning_rate": 4.013087508233983e-06, + "loss": 0.8601, + "step": 136340 + }, + { + "epoch": 0.9869921170926622, + "grad_norm": 0.17308774590492249, + "learning_rate": 4.013015121573397e-06, + "loss": 0.8818, + "step": 136350 + }, + { + "epoch": 0.9870645037532484, + "grad_norm": 0.14882414042949677, + "learning_rate": 4.0129427349128105e-06, + "loss": 0.8744, + "step": 136360 + }, + { + "epoch": 0.9871368904138346, + "grad_norm": 0.14824624359607697, + "learning_rate": 4.012870348252225e-06, + "loss": 0.8686, + "step": 136370 + }, + { + "epoch": 0.9872092770744207, + "grad_norm": 0.16246159374713898, + "learning_rate": 4.012797961591639e-06, + "loss": 0.8634, + "step": 136380 + }, + { + "epoch": 0.9872816637350069, + "grad_norm": 0.17609475553035736, + "learning_rate": 4.012725574931052e-06, + "loss": 0.8657, + "step": 136390 + }, + { + "epoch": 0.9873540503955931, + "grad_norm": 0.14540891349315643, + "learning_rate": 4.012653188270466e-06, + "loss": 0.8443, + "step": 136400 + }, + { + "epoch": 0.9874264370561793, + "grad_norm": 0.15471509099006653, + "learning_rate": 4.0125808016098794e-06, + "loss": 0.8676, + "step": 136410 + }, + { + "epoch": 0.9874988237167655, + "grad_norm": 0.15584422647953033, + "learning_rate": 4.012508414949293e-06, + "loss": 0.8639, + "step": 136420 + }, + { + "epoch": 0.9875712103773516, + "grad_norm": 0.1576376110315323, + "learning_rate": 4.012436028288707e-06, + "loss": 0.8676, + "step": 136430 + }, + { + "epoch": 0.9876435970379378, + "grad_norm": 0.19695936143398285, + "learning_rate": 4.012363641628121e-06, + "loss": 0.8697, + "step": 136440 + }, + { + "epoch": 0.9877159836985241, + "grad_norm": 0.1736014187335968, + "learning_rate": 4.012291254967535e-06, + "loss": 0.8727, + "step": 136450 + }, + { + "epoch": 0.9877883703591103, + "grad_norm": 0.14788353443145752, + "learning_rate": 4.012218868306948e-06, + "loss": 0.8549, + "step": 136460 + }, + { + "epoch": 0.9878607570196964, + "grad_norm": 0.16188114881515503, + "learning_rate": 4.012146481646362e-06, + "loss": 0.8411, + "step": 136470 + }, + { + "epoch": 0.9879331436802826, + "grad_norm": 0.14878244698047638, + "learning_rate": 4.0120740949857764e-06, + "loss": 0.8626, + "step": 136480 + }, + { + "epoch": 0.9880055303408688, + "grad_norm": 0.16054673492908478, + "learning_rate": 4.01200170832519e-06, + "loss": 0.8722, + "step": 136490 + }, + { + "epoch": 0.988077917001455, + "grad_norm": 0.18446345627307892, + "learning_rate": 4.011929321664604e-06, + "loss": 0.8663, + "step": 136500 + }, + { + "epoch": 0.9881503036620412, + "grad_norm": 0.15131662786006927, + "learning_rate": 4.011856935004017e-06, + "loss": 0.8707, + "step": 136510 + }, + { + "epoch": 0.9882226903226273, + "grad_norm": 0.15739837288856506, + "learning_rate": 4.011784548343432e-06, + "loss": 0.8606, + "step": 136520 + }, + { + "epoch": 0.9882950769832135, + "grad_norm": 0.17093366384506226, + "learning_rate": 4.011712161682845e-06, + "loss": 0.8555, + "step": 136530 + }, + { + "epoch": 0.9883674636437997, + "grad_norm": 0.1519378423690796, + "learning_rate": 4.011639775022259e-06, + "loss": 0.8563, + "step": 136540 + }, + { + "epoch": 0.9884398503043859, + "grad_norm": 0.13930349051952362, + "learning_rate": 4.011567388361673e-06, + "loss": 0.8667, + "step": 136550 + }, + { + "epoch": 0.9885122369649721, + "grad_norm": 0.1486721783876419, + "learning_rate": 4.011495001701087e-06, + "loss": 0.8662, + "step": 136560 + }, + { + "epoch": 0.9885846236255583, + "grad_norm": 0.14712847769260406, + "learning_rate": 4.011422615040501e-06, + "loss": 0.8593, + "step": 136570 + }, + { + "epoch": 0.9886570102861445, + "grad_norm": 0.15622474253177643, + "learning_rate": 4.011350228379914e-06, + "loss": 0.8692, + "step": 136580 + }, + { + "epoch": 0.9887293969467307, + "grad_norm": 0.15203312039375305, + "learning_rate": 4.011277841719328e-06, + "loss": 0.8552, + "step": 136590 + }, + { + "epoch": 0.9888017836073169, + "grad_norm": 0.15190285444259644, + "learning_rate": 4.011205455058742e-06, + "loss": 0.8699, + "step": 136600 + }, + { + "epoch": 0.988874170267903, + "grad_norm": 0.14632107317447662, + "learning_rate": 4.011133068398156e-06, + "loss": 0.8657, + "step": 136610 + }, + { + "epoch": 0.9889465569284892, + "grad_norm": 0.16472133994102478, + "learning_rate": 4.01106068173757e-06, + "loss": 0.8669, + "step": 136620 + }, + { + "epoch": 0.9890189435890754, + "grad_norm": 0.17338450253009796, + "learning_rate": 4.010988295076983e-06, + "loss": 0.8675, + "step": 136630 + }, + { + "epoch": 0.9890913302496616, + "grad_norm": 0.1531408429145813, + "learning_rate": 4.010915908416398e-06, + "loss": 0.8706, + "step": 136640 + }, + { + "epoch": 0.9891637169102477, + "grad_norm": 0.2573656141757965, + "learning_rate": 4.010843521755811e-06, + "loss": 0.8592, + "step": 136650 + }, + { + "epoch": 0.9892361035708339, + "grad_norm": 0.17421439290046692, + "learning_rate": 4.010771135095225e-06, + "loss": 0.8501, + "step": 136660 + }, + { + "epoch": 0.9893084902314202, + "grad_norm": 0.1428343653678894, + "learning_rate": 4.0106987484346385e-06, + "loss": 0.8451, + "step": 136670 + }, + { + "epoch": 0.9893808768920064, + "grad_norm": 0.1828918159008026, + "learning_rate": 4.010626361774053e-06, + "loss": 0.8663, + "step": 136680 + }, + { + "epoch": 0.9894532635525926, + "grad_norm": 0.14895032346248627, + "learning_rate": 4.010553975113467e-06, + "loss": 0.8611, + "step": 136690 + }, + { + "epoch": 0.9895256502131787, + "grad_norm": 0.17357683181762695, + "learning_rate": 4.01048158845288e-06, + "loss": 0.8655, + "step": 136700 + }, + { + "epoch": 0.9895980368737649, + "grad_norm": 0.1530769318342209, + "learning_rate": 4.010409201792294e-06, + "loss": 0.852, + "step": 136710 + }, + { + "epoch": 0.9896704235343511, + "grad_norm": 0.14855614304542542, + "learning_rate": 4.010336815131708e-06, + "loss": 0.8639, + "step": 136720 + }, + { + "epoch": 0.9897428101949373, + "grad_norm": 0.15136641263961792, + "learning_rate": 4.010264428471122e-06, + "loss": 0.8603, + "step": 136730 + }, + { + "epoch": 0.9898151968555234, + "grad_norm": 0.15471212565898895, + "learning_rate": 4.0101920418105356e-06, + "loss": 0.8762, + "step": 136740 + }, + { + "epoch": 0.9898875835161096, + "grad_norm": 0.15268218517303467, + "learning_rate": 4.010119655149949e-06, + "loss": 0.8656, + "step": 136750 + }, + { + "epoch": 0.9899599701766958, + "grad_norm": 0.15985944867134094, + "learning_rate": 4.010047268489364e-06, + "loss": 0.8624, + "step": 136760 + }, + { + "epoch": 0.9900323568372821, + "grad_norm": 0.1638825535774231, + "learning_rate": 4.009974881828777e-06, + "loss": 0.8669, + "step": 136770 + }, + { + "epoch": 0.9901047434978683, + "grad_norm": 0.15115493535995483, + "learning_rate": 4.009902495168191e-06, + "loss": 0.8826, + "step": 136780 + }, + { + "epoch": 0.9901771301584544, + "grad_norm": 0.14795368909835815, + "learning_rate": 4.0098301085076045e-06, + "loss": 0.8596, + "step": 136790 + }, + { + "epoch": 0.9902495168190406, + "grad_norm": 0.17082315683364868, + "learning_rate": 4.009757721847018e-06, + "loss": 0.8785, + "step": 136800 + }, + { + "epoch": 0.9903219034796268, + "grad_norm": 0.1484622359275818, + "learning_rate": 4.0096853351864326e-06, + "loss": 0.8675, + "step": 136810 + }, + { + "epoch": 0.990394290140213, + "grad_norm": 0.16692779958248138, + "learning_rate": 4.009612948525846e-06, + "loss": 0.8716, + "step": 136820 + }, + { + "epoch": 0.9904666768007991, + "grad_norm": 0.16581696271896362, + "learning_rate": 4.00954056186526e-06, + "loss": 0.8664, + "step": 136830 + }, + { + "epoch": 0.9905390634613853, + "grad_norm": 0.15264694392681122, + "learning_rate": 4.009468175204673e-06, + "loss": 0.8654, + "step": 136840 + }, + { + "epoch": 0.9906114501219715, + "grad_norm": 0.17103955149650574, + "learning_rate": 4.009395788544088e-06, + "loss": 0.8827, + "step": 136850 + }, + { + "epoch": 0.9906838367825577, + "grad_norm": 0.15730604529380798, + "learning_rate": 4.0093234018835015e-06, + "loss": 0.8646, + "step": 136860 + }, + { + "epoch": 0.9907562234431438, + "grad_norm": 0.1455976516008377, + "learning_rate": 4.009251015222915e-06, + "loss": 0.8596, + "step": 136870 + }, + { + "epoch": 0.9908286101037301, + "grad_norm": 0.1492074877023697, + "learning_rate": 4.009178628562329e-06, + "loss": 0.8698, + "step": 136880 + }, + { + "epoch": 0.9909009967643163, + "grad_norm": 0.14965695142745972, + "learning_rate": 4.009106241901743e-06, + "loss": 0.867, + "step": 136890 + }, + { + "epoch": 0.9909733834249025, + "grad_norm": 0.19759173691272736, + "learning_rate": 4.009033855241157e-06, + "loss": 0.8611, + "step": 136900 + }, + { + "epoch": 0.9910457700854887, + "grad_norm": 0.17178118228912354, + "learning_rate": 4.00896146858057e-06, + "loss": 0.8834, + "step": 136910 + }, + { + "epoch": 0.9911181567460748, + "grad_norm": 0.1561564952135086, + "learning_rate": 4.008889081919984e-06, + "loss": 0.8574, + "step": 136920 + }, + { + "epoch": 0.991190543406661, + "grad_norm": 0.16888576745986938, + "learning_rate": 4.008816695259398e-06, + "loss": 0.8651, + "step": 136930 + }, + { + "epoch": 0.9912629300672472, + "grad_norm": 0.1438201516866684, + "learning_rate": 4.008744308598811e-06, + "loss": 0.8731, + "step": 136940 + }, + { + "epoch": 0.9913353167278334, + "grad_norm": 0.20436105132102966, + "learning_rate": 4.008671921938225e-06, + "loss": 0.8619, + "step": 136950 + }, + { + "epoch": 0.9914077033884195, + "grad_norm": 0.15767519176006317, + "learning_rate": 4.008599535277639e-06, + "loss": 0.8681, + "step": 136960 + }, + { + "epoch": 0.9914800900490057, + "grad_norm": 0.14873795211315155, + "learning_rate": 4.008527148617053e-06, + "loss": 0.8852, + "step": 136970 + }, + { + "epoch": 0.9915524767095919, + "grad_norm": 0.1454266458749771, + "learning_rate": 4.0084547619564666e-06, + "loss": 0.8646, + "step": 136980 + }, + { + "epoch": 0.9916248633701782, + "grad_norm": 0.14362385869026184, + "learning_rate": 4.00838237529588e-06, + "loss": 0.8638, + "step": 136990 + }, + { + "epoch": 0.9916972500307644, + "grad_norm": 0.2043345868587494, + "learning_rate": 4.008309988635295e-06, + "loss": 0.8635, + "step": 137000 + }, + { + "epoch": 0.9917696366913505, + "grad_norm": 0.15229232609272003, + "learning_rate": 4.008237601974708e-06, + "loss": 0.8684, + "step": 137010 + }, + { + "epoch": 0.9918420233519367, + "grad_norm": 0.17375898361206055, + "learning_rate": 4.008165215314122e-06, + "loss": 0.8571, + "step": 137020 + }, + { + "epoch": 0.9919144100125229, + "grad_norm": 0.16473175585269928, + "learning_rate": 4.0080928286535355e-06, + "loss": 0.8716, + "step": 137030 + }, + { + "epoch": 0.9919867966731091, + "grad_norm": 0.1511010229587555, + "learning_rate": 4.00802044199295e-06, + "loss": 0.8704, + "step": 137040 + }, + { + "epoch": 0.9920591833336952, + "grad_norm": 0.1537337750196457, + "learning_rate": 4.007948055332364e-06, + "loss": 0.8637, + "step": 137050 + }, + { + "epoch": 0.9921315699942814, + "grad_norm": 0.46637365221977234, + "learning_rate": 4.007875668671777e-06, + "loss": 0.8569, + "step": 137060 + }, + { + "epoch": 0.9922039566548676, + "grad_norm": 0.1638781875371933, + "learning_rate": 4.007803282011191e-06, + "loss": 0.8521, + "step": 137070 + }, + { + "epoch": 0.9922763433154538, + "grad_norm": 0.1593899428844452, + "learning_rate": 4.007730895350605e-06, + "loss": 0.8755, + "step": 137080 + }, + { + "epoch": 0.9923487299760401, + "grad_norm": 0.15153998136520386, + "learning_rate": 4.007658508690019e-06, + "loss": 0.864, + "step": 137090 + }, + { + "epoch": 0.9924211166366262, + "grad_norm": 0.15272100269794464, + "learning_rate": 4.0075861220294325e-06, + "loss": 0.8598, + "step": 137100 + }, + { + "epoch": 0.9924935032972124, + "grad_norm": 0.1559947431087494, + "learning_rate": 4.007513735368846e-06, + "loss": 0.8733, + "step": 137110 + }, + { + "epoch": 0.9925658899577986, + "grad_norm": 0.15066994726657867, + "learning_rate": 4.007441348708261e-06, + "loss": 0.8694, + "step": 137120 + }, + { + "epoch": 0.9926382766183848, + "grad_norm": 0.14346741139888763, + "learning_rate": 4.007368962047674e-06, + "loss": 0.8621, + "step": 137130 + }, + { + "epoch": 0.992710663278971, + "grad_norm": 0.14388960599899292, + "learning_rate": 4.007296575387088e-06, + "loss": 0.863, + "step": 137140 + }, + { + "epoch": 0.9927830499395571, + "grad_norm": 0.14946885406970978, + "learning_rate": 4.0072241887265014e-06, + "loss": 0.8577, + "step": 137150 + }, + { + "epoch": 0.9928554366001433, + "grad_norm": 0.15995468199253082, + "learning_rate": 4.007151802065916e-06, + "loss": 0.8613, + "step": 137160 + }, + { + "epoch": 0.9929278232607295, + "grad_norm": 0.15818224847316742, + "learning_rate": 4.0070794154053295e-06, + "loss": 0.871, + "step": 137170 + }, + { + "epoch": 0.9930002099213157, + "grad_norm": 0.15521134436130524, + "learning_rate": 4.007007028744743e-06, + "loss": 0.8568, + "step": 137180 + }, + { + "epoch": 0.9930725965819018, + "grad_norm": 0.15648885071277618, + "learning_rate": 4.006934642084157e-06, + "loss": 0.8688, + "step": 137190 + }, + { + "epoch": 0.9931449832424881, + "grad_norm": 0.15690802037715912, + "learning_rate": 4.006862255423571e-06, + "loss": 0.8696, + "step": 137200 + }, + { + "epoch": 0.9932173699030743, + "grad_norm": 0.17651623487472534, + "learning_rate": 4.006789868762985e-06, + "loss": 0.858, + "step": 137210 + }, + { + "epoch": 0.9932897565636605, + "grad_norm": 0.1453973650932312, + "learning_rate": 4.0067174821023984e-06, + "loss": 0.8623, + "step": 137220 + }, + { + "epoch": 0.9933621432242467, + "grad_norm": 0.16270014643669128, + "learning_rate": 4.006645095441812e-06, + "loss": 0.8702, + "step": 137230 + }, + { + "epoch": 0.9934345298848328, + "grad_norm": 0.2834886610507965, + "learning_rate": 4.0065727087812265e-06, + "loss": 0.8558, + "step": 137240 + }, + { + "epoch": 0.993506916545419, + "grad_norm": 0.1739642173051834, + "learning_rate": 4.00650032212064e-06, + "loss": 0.8631, + "step": 137250 + }, + { + "epoch": 0.9935793032060052, + "grad_norm": 0.16178597509860992, + "learning_rate": 4.006427935460054e-06, + "loss": 0.8616, + "step": 137260 + }, + { + "epoch": 0.9936516898665914, + "grad_norm": 0.1678241342306137, + "learning_rate": 4.006355548799467e-06, + "loss": 0.861, + "step": 137270 + }, + { + "epoch": 0.9937240765271775, + "grad_norm": 0.15866810083389282, + "learning_rate": 4.006283162138882e-06, + "loss": 0.8549, + "step": 137280 + }, + { + "epoch": 0.9937964631877637, + "grad_norm": 0.14100779592990875, + "learning_rate": 4.0062107754782955e-06, + "loss": 0.8714, + "step": 137290 + }, + { + "epoch": 0.99386884984835, + "grad_norm": 0.1390538066625595, + "learning_rate": 4.006138388817709e-06, + "loss": 0.8686, + "step": 137300 + }, + { + "epoch": 0.9939412365089362, + "grad_norm": 0.1549810916185379, + "learning_rate": 4.006066002157123e-06, + "loss": 0.8599, + "step": 137310 + }, + { + "epoch": 0.9940136231695224, + "grad_norm": 0.14956602454185486, + "learning_rate": 4.005993615496537e-06, + "loss": 0.8582, + "step": 137320 + }, + { + "epoch": 0.9940860098301085, + "grad_norm": 0.15790285170078278, + "learning_rate": 4.005921228835951e-06, + "loss": 0.8617, + "step": 137330 + }, + { + "epoch": 0.9941583964906947, + "grad_norm": 0.153904527425766, + "learning_rate": 4.005848842175364e-06, + "loss": 0.873, + "step": 137340 + }, + { + "epoch": 0.9942307831512809, + "grad_norm": 0.15151570737361908, + "learning_rate": 4.005776455514778e-06, + "loss": 0.8688, + "step": 137350 + }, + { + "epoch": 0.9943031698118671, + "grad_norm": 0.15569916367530823, + "learning_rate": 4.0057040688541925e-06, + "loss": 0.8518, + "step": 137360 + }, + { + "epoch": 0.9943755564724532, + "grad_norm": 0.14543747901916504, + "learning_rate": 4.005631682193606e-06, + "loss": 0.8739, + "step": 137370 + }, + { + "epoch": 0.9944479431330394, + "grad_norm": 0.16408121585845947, + "learning_rate": 4.00555929553302e-06, + "loss": 0.868, + "step": 137380 + }, + { + "epoch": 0.9945203297936256, + "grad_norm": 0.1823720484972, + "learning_rate": 4.005486908872433e-06, + "loss": 0.8547, + "step": 137390 + }, + { + "epoch": 0.9945927164542118, + "grad_norm": 0.15718266367912292, + "learning_rate": 4.005414522211847e-06, + "loss": 0.8677, + "step": 137400 + }, + { + "epoch": 0.994665103114798, + "grad_norm": 0.14774106442928314, + "learning_rate": 4.005342135551261e-06, + "loss": 0.8584, + "step": 137410 + }, + { + "epoch": 0.9947374897753842, + "grad_norm": 0.14286896586418152, + "learning_rate": 4.005269748890675e-06, + "loss": 0.8526, + "step": 137420 + }, + { + "epoch": 0.9948098764359704, + "grad_norm": 0.17574062943458557, + "learning_rate": 4.005197362230089e-06, + "loss": 0.8604, + "step": 137430 + }, + { + "epoch": 0.9948822630965566, + "grad_norm": 0.15135468542575836, + "learning_rate": 4.005124975569502e-06, + "loss": 0.8667, + "step": 137440 + }, + { + "epoch": 0.9949546497571428, + "grad_norm": 0.16362632811069489, + "learning_rate": 4.005052588908917e-06, + "loss": 0.8574, + "step": 137450 + }, + { + "epoch": 0.9950270364177289, + "grad_norm": 0.19329750537872314, + "learning_rate": 4.0049802022483295e-06, + "loss": 0.8783, + "step": 137460 + }, + { + "epoch": 0.9950994230783151, + "grad_norm": 0.1522989422082901, + "learning_rate": 4.004907815587744e-06, + "loss": 0.8557, + "step": 137470 + }, + { + "epoch": 0.9951718097389013, + "grad_norm": 0.17292699217796326, + "learning_rate": 4.0048354289271576e-06, + "loss": 0.8522, + "step": 137480 + }, + { + "epoch": 0.9952441963994875, + "grad_norm": 0.1753862351179123, + "learning_rate": 4.004763042266571e-06, + "loss": 0.8537, + "step": 137490 + }, + { + "epoch": 0.9953165830600736, + "grad_norm": 0.1441674530506134, + "learning_rate": 4.004690655605985e-06, + "loss": 0.8697, + "step": 137500 + }, + { + "epoch": 0.9953889697206598, + "grad_norm": 0.17849737405776978, + "learning_rate": 4.004618268945399e-06, + "loss": 0.8556, + "step": 137510 + }, + { + "epoch": 0.9954613563812461, + "grad_norm": 0.1520448625087738, + "learning_rate": 4.004545882284813e-06, + "loss": 0.8633, + "step": 137520 + }, + { + "epoch": 0.9955337430418323, + "grad_norm": 0.1547725945711136, + "learning_rate": 4.0044734956242265e-06, + "loss": 0.86, + "step": 137530 + }, + { + "epoch": 0.9956061297024185, + "grad_norm": 0.17297573387622833, + "learning_rate": 4.00440110896364e-06, + "loss": 0.8737, + "step": 137540 + }, + { + "epoch": 0.9956785163630046, + "grad_norm": 0.15290264785289764, + "learning_rate": 4.0043287223030546e-06, + "loss": 0.8615, + "step": 137550 + }, + { + "epoch": 0.9957509030235908, + "grad_norm": 0.15568116307258606, + "learning_rate": 4.004256335642468e-06, + "loss": 0.8603, + "step": 137560 + }, + { + "epoch": 0.995823289684177, + "grad_norm": 0.1544651985168457, + "learning_rate": 4.004183948981882e-06, + "loss": 0.8653, + "step": 137570 + }, + { + "epoch": 0.9958956763447632, + "grad_norm": 0.15817542374134064, + "learning_rate": 4.004111562321295e-06, + "loss": 0.8681, + "step": 137580 + }, + { + "epoch": 0.9959680630053493, + "grad_norm": 0.15345756709575653, + "learning_rate": 4.004039175660709e-06, + "loss": 0.8569, + "step": 137590 + }, + { + "epoch": 0.9960404496659355, + "grad_norm": 0.18895161151885986, + "learning_rate": 4.0039667890001235e-06, + "loss": 0.8733, + "step": 137600 + }, + { + "epoch": 0.9961128363265217, + "grad_norm": 0.1601308286190033, + "learning_rate": 4.003894402339537e-06, + "loss": 0.8627, + "step": 137610 + }, + { + "epoch": 0.996185222987108, + "grad_norm": 0.15279871225357056, + "learning_rate": 4.003822015678951e-06, + "loss": 0.8684, + "step": 137620 + }, + { + "epoch": 0.9962576096476942, + "grad_norm": 0.15466183423995972, + "learning_rate": 4.003749629018364e-06, + "loss": 0.8547, + "step": 137630 + }, + { + "epoch": 0.9963299963082803, + "grad_norm": 0.14806103706359863, + "learning_rate": 4.003677242357779e-06, + "loss": 0.8609, + "step": 137640 + }, + { + "epoch": 0.9964023829688665, + "grad_norm": 0.14731131494045258, + "learning_rate": 4.003604855697192e-06, + "loss": 0.8463, + "step": 137650 + }, + { + "epoch": 0.9964747696294527, + "grad_norm": 0.16127149760723114, + "learning_rate": 4.003532469036606e-06, + "loss": 0.8709, + "step": 137660 + }, + { + "epoch": 0.9965471562900389, + "grad_norm": 0.1602654606103897, + "learning_rate": 4.00346008237602e-06, + "loss": 0.8626, + "step": 137670 + }, + { + "epoch": 0.996619542950625, + "grad_norm": 0.18218478560447693, + "learning_rate": 4.003387695715434e-06, + "loss": 0.8643, + "step": 137680 + }, + { + "epoch": 0.9966919296112112, + "grad_norm": 0.19056251645088196, + "learning_rate": 4.003315309054848e-06, + "loss": 0.8618, + "step": 137690 + }, + { + "epoch": 0.9967643162717974, + "grad_norm": 0.15740570425987244, + "learning_rate": 4.003242922394261e-06, + "loss": 0.8707, + "step": 137700 + }, + { + "epoch": 0.9968367029323836, + "grad_norm": 0.15710806846618652, + "learning_rate": 4.003170535733675e-06, + "loss": 0.8691, + "step": 137710 + }, + { + "epoch": 0.9969090895929698, + "grad_norm": 0.16603271663188934, + "learning_rate": 4.003098149073089e-06, + "loss": 0.8569, + "step": 137720 + }, + { + "epoch": 0.996981476253556, + "grad_norm": 0.14341507852077484, + "learning_rate": 4.003025762412503e-06, + "loss": 0.8617, + "step": 137730 + }, + { + "epoch": 0.9970538629141422, + "grad_norm": 0.1590244024991989, + "learning_rate": 4.002953375751917e-06, + "loss": 0.8796, + "step": 137740 + }, + { + "epoch": 0.9971262495747284, + "grad_norm": 0.5351874232292175, + "learning_rate": 4.00288098909133e-06, + "loss": 0.8664, + "step": 137750 + }, + { + "epoch": 0.9971986362353146, + "grad_norm": 0.15063074231147766, + "learning_rate": 4.002808602430745e-06, + "loss": 0.862, + "step": 137760 + }, + { + "epoch": 0.9972710228959007, + "grad_norm": 0.18847909569740295, + "learning_rate": 4.002736215770158e-06, + "loss": 0.8713, + "step": 137770 + }, + { + "epoch": 0.9973434095564869, + "grad_norm": 0.1513339728116989, + "learning_rate": 4.002663829109572e-06, + "loss": 0.8714, + "step": 137780 + }, + { + "epoch": 0.9974157962170731, + "grad_norm": 0.14384783804416656, + "learning_rate": 4.002591442448986e-06, + "loss": 0.8723, + "step": 137790 + }, + { + "epoch": 0.9974881828776593, + "grad_norm": 0.1539144068956375, + "learning_rate": 4.0025190557884e-06, + "loss": 0.8618, + "step": 137800 + }, + { + "epoch": 0.9975605695382455, + "grad_norm": 0.1452489197254181, + "learning_rate": 4.002446669127814e-06, + "loss": 0.8714, + "step": 137810 + }, + { + "epoch": 0.9976329561988316, + "grad_norm": 0.15493297576904297, + "learning_rate": 4.002374282467227e-06, + "loss": 0.8562, + "step": 137820 + }, + { + "epoch": 0.9977053428594179, + "grad_norm": 0.154521182179451, + "learning_rate": 4.002301895806641e-06, + "loss": 0.8663, + "step": 137830 + }, + { + "epoch": 0.9977777295200041, + "grad_norm": 0.14628452062606812, + "learning_rate": 4.002229509146055e-06, + "loss": 0.8623, + "step": 137840 + }, + { + "epoch": 0.9978501161805903, + "grad_norm": 0.14919553697109222, + "learning_rate": 4.002157122485469e-06, + "loss": 0.8685, + "step": 137850 + }, + { + "epoch": 0.9979225028411765, + "grad_norm": 0.1762702614068985, + "learning_rate": 4.002084735824883e-06, + "loss": 0.8657, + "step": 137860 + }, + { + "epoch": 0.9979948895017626, + "grad_norm": 0.151996448636055, + "learning_rate": 4.002012349164296e-06, + "loss": 0.8568, + "step": 137870 + }, + { + "epoch": 0.9980672761623488, + "grad_norm": 0.15440037846565247, + "learning_rate": 4.001939962503711e-06, + "loss": 0.852, + "step": 137880 + }, + { + "epoch": 0.998139662822935, + "grad_norm": 0.16397789120674133, + "learning_rate": 4.001867575843124e-06, + "loss": 0.854, + "step": 137890 + }, + { + "epoch": 0.9982120494835212, + "grad_norm": 0.19279788434505463, + "learning_rate": 4.001795189182538e-06, + "loss": 0.8581, + "step": 137900 + }, + { + "epoch": 0.9982844361441073, + "grad_norm": 0.15897057950496674, + "learning_rate": 4.0017228025219515e-06, + "loss": 0.8711, + "step": 137910 + }, + { + "epoch": 0.9983568228046935, + "grad_norm": 0.1570604145526886, + "learning_rate": 4.001650415861366e-06, + "loss": 0.8645, + "step": 137920 + }, + { + "epoch": 0.9984292094652797, + "grad_norm": 0.16585461795330048, + "learning_rate": 4.00157802920078e-06, + "loss": 0.8796, + "step": 137930 + }, + { + "epoch": 0.998501596125866, + "grad_norm": 0.15717382729053497, + "learning_rate": 4.001505642540193e-06, + "loss": 0.8606, + "step": 137940 + }, + { + "epoch": 0.9985739827864522, + "grad_norm": 0.16818828880786896, + "learning_rate": 4.001433255879607e-06, + "loss": 0.8777, + "step": 137950 + }, + { + "epoch": 0.9986463694470383, + "grad_norm": 0.18249420821666718, + "learning_rate": 4.001360869219021e-06, + "loss": 0.866, + "step": 137960 + }, + { + "epoch": 0.9987187561076245, + "grad_norm": 0.16219259798526764, + "learning_rate": 4.001288482558435e-06, + "loss": 0.8638, + "step": 137970 + }, + { + "epoch": 0.9987911427682107, + "grad_norm": 0.15621773898601532, + "learning_rate": 4.0012160958978485e-06, + "loss": 0.8691, + "step": 137980 + }, + { + "epoch": 0.9988635294287969, + "grad_norm": 0.1662006676197052, + "learning_rate": 4.001143709237262e-06, + "loss": 0.8813, + "step": 137990 + }, + { + "epoch": 0.998935916089383, + "grad_norm": 0.13916003704071045, + "learning_rate": 4.001071322576676e-06, + "loss": 0.8568, + "step": 138000 + }, + { + "epoch": 0.9990083027499692, + "grad_norm": 0.15064559876918793, + "learning_rate": 4.000998935916089e-06, + "loss": 0.8619, + "step": 138010 + }, + { + "epoch": 0.9990806894105554, + "grad_norm": 0.16008462011814117, + "learning_rate": 4.000926549255503e-06, + "loss": 0.8738, + "step": 138020 + }, + { + "epoch": 0.9991530760711416, + "grad_norm": 0.1454089879989624, + "learning_rate": 4.0008541625949175e-06, + "loss": 0.8681, + "step": 138030 + }, + { + "epoch": 0.9992254627317277, + "grad_norm": 0.3634811341762543, + "learning_rate": 4.000781775934331e-06, + "loss": 0.8603, + "step": 138040 + }, + { + "epoch": 0.999297849392314, + "grad_norm": 0.16307184100151062, + "learning_rate": 4.000709389273745e-06, + "loss": 0.8636, + "step": 138050 + }, + { + "epoch": 0.9993702360529002, + "grad_norm": 0.15558481216430664, + "learning_rate": 4.000637002613158e-06, + "loss": 0.8703, + "step": 138060 + }, + { + "epoch": 0.9994426227134864, + "grad_norm": 0.1434854418039322, + "learning_rate": 4.000564615952573e-06, + "loss": 0.8678, + "step": 138070 + }, + { + "epoch": 0.9995150093740726, + "grad_norm": 0.13864096999168396, + "learning_rate": 4.000492229291986e-06, + "loss": 0.8635, + "step": 138080 + }, + { + "epoch": 0.9995873960346587, + "grad_norm": 0.1576741486787796, + "learning_rate": 4.0004198426314e-06, + "loss": 0.8626, + "step": 138090 + }, + { + "epoch": 0.9996597826952449, + "grad_norm": 0.14672447741031647, + "learning_rate": 4.000347455970814e-06, + "loss": 0.8483, + "step": 138100 + }, + { + "epoch": 0.9997321693558311, + "grad_norm": 0.16229544579982758, + "learning_rate": 4.000275069310228e-06, + "loss": 0.8708, + "step": 138110 + }, + { + "epoch": 0.9998045560164173, + "grad_norm": 0.14865851402282715, + "learning_rate": 4.000202682649642e-06, + "loss": 0.8589, + "step": 138120 + }, + { + "epoch": 0.9998769426770034, + "grad_norm": 0.14516963064670563, + "learning_rate": 4.000130295989055e-06, + "loss": 0.8713, + "step": 138130 + }, + { + "epoch": 0.9999493293375896, + "grad_norm": 0.14701320230960846, + "learning_rate": 4.000057909328469e-06, + "loss": 0.8514, + "step": 138140 + }, + { + "epoch": 1.000021715998176, + "grad_norm": 0.15110774338245392, + "learning_rate": 3.999985522667883e-06, + "loss": 0.8665, + "step": 138150 + }, + { + "epoch": 1.000094102658762, + "grad_norm": 0.14296278357505798, + "learning_rate": 3.999913136007297e-06, + "loss": 0.8619, + "step": 138160 + }, + { + "epoch": 1.0001664893193483, + "grad_norm": 0.14643356204032898, + "learning_rate": 3.999840749346711e-06, + "loss": 0.8675, + "step": 138170 + }, + { + "epoch": 1.0002388759799343, + "grad_norm": 0.16440226137638092, + "learning_rate": 3.999768362686124e-06, + "loss": 0.8456, + "step": 138180 + }, + { + "epoch": 1.0003112626405206, + "grad_norm": 0.14877723157405853, + "learning_rate": 3.999695976025538e-06, + "loss": 0.8591, + "step": 138190 + }, + { + "epoch": 1.0003836493011067, + "grad_norm": 0.150392085313797, + "learning_rate": 3.999623589364952e-06, + "loss": 0.8588, + "step": 138200 + }, + { + "epoch": 1.000456035961693, + "grad_norm": 0.1557486355304718, + "learning_rate": 3.999551202704366e-06, + "loss": 0.8715, + "step": 138210 + }, + { + "epoch": 1.0005284226222793, + "grad_norm": 0.16424834728240967, + "learning_rate": 3.9994788160437795e-06, + "loss": 0.8752, + "step": 138220 + }, + { + "epoch": 1.0006008092828653, + "grad_norm": 0.1615014523267746, + "learning_rate": 3.999406429383193e-06, + "loss": 0.8734, + "step": 138230 + }, + { + "epoch": 1.0006731959434516, + "grad_norm": 0.1606891006231308, + "learning_rate": 3.999334042722608e-06, + "loss": 0.8675, + "step": 138240 + }, + { + "epoch": 1.0007455826040377, + "grad_norm": 0.16052010655403137, + "learning_rate": 3.999261656062021e-06, + "loss": 0.8599, + "step": 138250 + }, + { + "epoch": 1.000817969264624, + "grad_norm": 0.154453843832016, + "learning_rate": 3.999189269401435e-06, + "loss": 0.8503, + "step": 138260 + }, + { + "epoch": 1.00089035592521, + "grad_norm": 0.1499967873096466, + "learning_rate": 3.9991168827408485e-06, + "loss": 0.8631, + "step": 138270 + }, + { + "epoch": 1.0009627425857963, + "grad_norm": 0.1510349065065384, + "learning_rate": 3.999044496080263e-06, + "loss": 0.8663, + "step": 138280 + }, + { + "epoch": 1.0010351292463824, + "grad_norm": 0.18189969658851624, + "learning_rate": 3.9989721094196766e-06, + "loss": 0.8725, + "step": 138290 + }, + { + "epoch": 1.0011075159069687, + "grad_norm": 0.1505780965089798, + "learning_rate": 3.99889972275909e-06, + "loss": 0.8589, + "step": 138300 + }, + { + "epoch": 1.001179902567555, + "grad_norm": 0.15750384330749512, + "learning_rate": 3.998827336098504e-06, + "loss": 0.8675, + "step": 138310 + }, + { + "epoch": 1.001252289228141, + "grad_norm": 0.14412176609039307, + "learning_rate": 3.998754949437918e-06, + "loss": 0.8613, + "step": 138320 + }, + { + "epoch": 1.0013246758887273, + "grad_norm": 0.1520223170518875, + "learning_rate": 3.998682562777332e-06, + "loss": 0.8639, + "step": 138330 + }, + { + "epoch": 1.0013970625493134, + "grad_norm": 0.15672039985656738, + "learning_rate": 3.9986101761167455e-06, + "loss": 0.8651, + "step": 138340 + }, + { + "epoch": 1.0014694492098997, + "grad_norm": 0.19693726301193237, + "learning_rate": 3.998537789456159e-06, + "loss": 0.8727, + "step": 138350 + }, + { + "epoch": 1.0015418358704857, + "grad_norm": 0.18891595304012299, + "learning_rate": 3.9984654027955736e-06, + "loss": 0.8756, + "step": 138360 + }, + { + "epoch": 1.001614222531072, + "grad_norm": 0.14918550848960876, + "learning_rate": 3.998393016134987e-06, + "loss": 0.8603, + "step": 138370 + }, + { + "epoch": 1.001686609191658, + "grad_norm": 0.16212646663188934, + "learning_rate": 3.998320629474401e-06, + "loss": 0.8653, + "step": 138380 + }, + { + "epoch": 1.0017589958522444, + "grad_norm": 0.14969107508659363, + "learning_rate": 3.998248242813814e-06, + "loss": 0.8777, + "step": 138390 + }, + { + "epoch": 1.0018313825128304, + "grad_norm": 0.14508505165576935, + "learning_rate": 3.998175856153229e-06, + "loss": 0.8573, + "step": 138400 + }, + { + "epoch": 1.0019037691734167, + "grad_norm": 0.1480925977230072, + "learning_rate": 3.9981034694926425e-06, + "loss": 0.872, + "step": 138410 + }, + { + "epoch": 1.001976155834003, + "grad_norm": 0.16480182111263275, + "learning_rate": 3.998031082832056e-06, + "loss": 0.881, + "step": 138420 + }, + { + "epoch": 1.002048542494589, + "grad_norm": 0.14416438341140747, + "learning_rate": 3.99795869617147e-06, + "loss": 0.867, + "step": 138430 + }, + { + "epoch": 1.0021209291551754, + "grad_norm": 0.14154788851737976, + "learning_rate": 3.997886309510884e-06, + "loss": 0.8568, + "step": 138440 + }, + { + "epoch": 1.0021933158157614, + "grad_norm": 0.1673395335674286, + "learning_rate": 3.997813922850298e-06, + "loss": 0.8553, + "step": 138450 + }, + { + "epoch": 1.0022657024763477, + "grad_norm": 0.16951638460159302, + "learning_rate": 3.997741536189711e-06, + "loss": 0.8569, + "step": 138460 + }, + { + "epoch": 1.0023380891369338, + "grad_norm": 0.16748495399951935, + "learning_rate": 3.997669149529125e-06, + "loss": 0.8657, + "step": 138470 + }, + { + "epoch": 1.00241047579752, + "grad_norm": 0.1609235256910324, + "learning_rate": 3.9975967628685395e-06, + "loss": 0.8493, + "step": 138480 + }, + { + "epoch": 1.0024828624581061, + "grad_norm": 0.15038661658763885, + "learning_rate": 3.997524376207953e-06, + "loss": 0.8577, + "step": 138490 + }, + { + "epoch": 1.0025552491186924, + "grad_norm": 0.15699024498462677, + "learning_rate": 3.997451989547367e-06, + "loss": 0.8584, + "step": 138500 + }, + { + "epoch": 1.0026276357792785, + "grad_norm": 0.153981551527977, + "learning_rate": 3.99737960288678e-06, + "loss": 0.8621, + "step": 138510 + }, + { + "epoch": 1.0027000224398648, + "grad_norm": 0.14797601103782654, + "learning_rate": 3.997307216226194e-06, + "loss": 0.8603, + "step": 138520 + }, + { + "epoch": 1.002772409100451, + "grad_norm": 0.18665072321891785, + "learning_rate": 3.997234829565608e-06, + "loss": 0.8615, + "step": 138530 + }, + { + "epoch": 1.0028447957610371, + "grad_norm": 0.18154151737689972, + "learning_rate": 3.997162442905021e-06, + "loss": 0.868, + "step": 138540 + }, + { + "epoch": 1.0029171824216234, + "grad_norm": 0.14707796275615692, + "learning_rate": 3.997090056244436e-06, + "loss": 0.8544, + "step": 138550 + }, + { + "epoch": 1.0029895690822095, + "grad_norm": 0.1541696935892105, + "learning_rate": 3.997017669583849e-06, + "loss": 0.8633, + "step": 138560 + }, + { + "epoch": 1.0030619557427958, + "grad_norm": 0.15165592730045319, + "learning_rate": 3.996945282923263e-06, + "loss": 0.8684, + "step": 138570 + }, + { + "epoch": 1.0031343424033818, + "grad_norm": 0.1640002727508545, + "learning_rate": 3.9968728962626765e-06, + "loss": 0.8738, + "step": 138580 + }, + { + "epoch": 1.0032067290639681, + "grad_norm": 0.14847826957702637, + "learning_rate": 3.996800509602091e-06, + "loss": 0.8608, + "step": 138590 + }, + { + "epoch": 1.0032791157245542, + "grad_norm": 0.16807755827903748, + "learning_rate": 3.996728122941505e-06, + "loss": 0.8656, + "step": 138600 + }, + { + "epoch": 1.0033515023851405, + "grad_norm": 0.175052210688591, + "learning_rate": 3.996655736280918e-06, + "loss": 0.8663, + "step": 138610 + }, + { + "epoch": 1.0034238890457265, + "grad_norm": 0.18418249487876892, + "learning_rate": 3.996583349620332e-06, + "loss": 0.8619, + "step": 138620 + }, + { + "epoch": 1.0034962757063128, + "grad_norm": 0.14789465069770813, + "learning_rate": 3.996510962959746e-06, + "loss": 0.8597, + "step": 138630 + }, + { + "epoch": 1.0035686623668991, + "grad_norm": 0.14474569261074066, + "learning_rate": 3.99643857629916e-06, + "loss": 0.8721, + "step": 138640 + }, + { + "epoch": 1.0036410490274852, + "grad_norm": 0.14751043915748596, + "learning_rate": 3.9963661896385735e-06, + "loss": 0.8922, + "step": 138650 + }, + { + "epoch": 1.0037134356880715, + "grad_norm": 0.14138489961624146, + "learning_rate": 3.996293802977987e-06, + "loss": 0.8643, + "step": 138660 + }, + { + "epoch": 1.0037858223486575, + "grad_norm": 0.14229081571102142, + "learning_rate": 3.996221416317402e-06, + "loss": 0.8595, + "step": 138670 + }, + { + "epoch": 1.0038582090092438, + "grad_norm": 0.24338965117931366, + "learning_rate": 3.996149029656815e-06, + "loss": 0.861, + "step": 138680 + }, + { + "epoch": 1.00393059566983, + "grad_norm": 0.2033909410238266, + "learning_rate": 3.996076642996229e-06, + "loss": 0.864, + "step": 138690 + }, + { + "epoch": 1.0040029823304162, + "grad_norm": 0.15376165509223938, + "learning_rate": 3.9960042563356424e-06, + "loss": 0.8662, + "step": 138700 + }, + { + "epoch": 1.0040753689910022, + "grad_norm": 0.14948670566082, + "learning_rate": 3.995931869675057e-06, + "loss": 0.8744, + "step": 138710 + }, + { + "epoch": 1.0041477556515885, + "grad_norm": 0.15459775924682617, + "learning_rate": 3.9958594830144705e-06, + "loss": 0.8763, + "step": 138720 + }, + { + "epoch": 1.0042201423121746, + "grad_norm": 0.16300074756145477, + "learning_rate": 3.995787096353884e-06, + "loss": 0.8532, + "step": 138730 + }, + { + "epoch": 1.004292528972761, + "grad_norm": 0.14748498797416687, + "learning_rate": 3.995714709693298e-06, + "loss": 0.8659, + "step": 138740 + }, + { + "epoch": 1.0043649156333472, + "grad_norm": 0.14953255653381348, + "learning_rate": 3.995642323032712e-06, + "loss": 0.8737, + "step": 138750 + }, + { + "epoch": 1.0044373022939332, + "grad_norm": 0.17030136287212372, + "learning_rate": 3.995569936372126e-06, + "loss": 0.8659, + "step": 138760 + }, + { + "epoch": 1.0045096889545195, + "grad_norm": 0.15466326475143433, + "learning_rate": 3.9954975497115395e-06, + "loss": 0.8597, + "step": 138770 + }, + { + "epoch": 1.0045820756151056, + "grad_norm": 0.2016264945268631, + "learning_rate": 3.995425163050953e-06, + "loss": 0.8675, + "step": 138780 + }, + { + "epoch": 1.0046544622756919, + "grad_norm": 0.2277584671974182, + "learning_rate": 3.9953527763903675e-06, + "loss": 0.8689, + "step": 138790 + }, + { + "epoch": 1.004726848936278, + "grad_norm": 0.14678841829299927, + "learning_rate": 3.995280389729781e-06, + "loss": 0.8658, + "step": 138800 + }, + { + "epoch": 1.0047992355968642, + "grad_norm": 0.18819843232631683, + "learning_rate": 3.995208003069195e-06, + "loss": 0.8627, + "step": 138810 + }, + { + "epoch": 1.0048716222574503, + "grad_norm": 0.14515884220600128, + "learning_rate": 3.995135616408608e-06, + "loss": 0.8478, + "step": 138820 + }, + { + "epoch": 1.0049440089180366, + "grad_norm": 0.15614381432533264, + "learning_rate": 3.995063229748022e-06, + "loss": 0.8636, + "step": 138830 + }, + { + "epoch": 1.0050163955786229, + "grad_norm": 0.14769770205020905, + "learning_rate": 3.9949908430874365e-06, + "loss": 0.8577, + "step": 138840 + }, + { + "epoch": 1.005088782239209, + "grad_norm": 0.15718652307987213, + "learning_rate": 3.99491845642685e-06, + "loss": 0.8645, + "step": 138850 + }, + { + "epoch": 1.0051611688997952, + "grad_norm": 0.14552608132362366, + "learning_rate": 3.994846069766264e-06, + "loss": 0.8609, + "step": 138860 + }, + { + "epoch": 1.0052335555603813, + "grad_norm": 0.1629728376865387, + "learning_rate": 3.994773683105677e-06, + "loss": 0.8725, + "step": 138870 + }, + { + "epoch": 1.0053059422209676, + "grad_norm": 0.1548013985157013, + "learning_rate": 3.994701296445092e-06, + "loss": 0.8592, + "step": 138880 + }, + { + "epoch": 1.0053783288815537, + "grad_norm": 0.1612953543663025, + "learning_rate": 3.994628909784505e-06, + "loss": 0.8649, + "step": 138890 + }, + { + "epoch": 1.00545071554214, + "grad_norm": 0.15613959729671478, + "learning_rate": 3.994556523123919e-06, + "loss": 0.852, + "step": 138900 + }, + { + "epoch": 1.005523102202726, + "grad_norm": 0.14688079059123993, + "learning_rate": 3.994484136463333e-06, + "loss": 0.8625, + "step": 138910 + }, + { + "epoch": 1.0055954888633123, + "grad_norm": 0.16856560111045837, + "learning_rate": 3.994411749802747e-06, + "loss": 0.866, + "step": 138920 + }, + { + "epoch": 1.0056678755238984, + "grad_norm": 0.14123956859111786, + "learning_rate": 3.994339363142161e-06, + "loss": 0.8595, + "step": 138930 + }, + { + "epoch": 1.0057402621844846, + "grad_norm": 0.15855082869529724, + "learning_rate": 3.994266976481574e-06, + "loss": 0.863, + "step": 138940 + }, + { + "epoch": 1.005812648845071, + "grad_norm": 0.16692784428596497, + "learning_rate": 3.994194589820988e-06, + "loss": 0.8534, + "step": 138950 + }, + { + "epoch": 1.005885035505657, + "grad_norm": 0.1530875861644745, + "learning_rate": 3.994122203160402e-06, + "loss": 0.855, + "step": 138960 + }, + { + "epoch": 1.0059574221662433, + "grad_norm": 0.16144618391990662, + "learning_rate": 3.994049816499816e-06, + "loss": 0.8496, + "step": 138970 + }, + { + "epoch": 1.0060298088268294, + "grad_norm": 0.15884657204151154, + "learning_rate": 3.99397742983923e-06, + "loss": 0.8641, + "step": 138980 + }, + { + "epoch": 1.0061021954874156, + "grad_norm": 0.1514037847518921, + "learning_rate": 3.993905043178643e-06, + "loss": 0.8562, + "step": 138990 + }, + { + "epoch": 1.0061745821480017, + "grad_norm": 0.1448083519935608, + "learning_rate": 3.993832656518058e-06, + "loss": 0.8545, + "step": 139000 + }, + { + "epoch": 1.006246968808588, + "grad_norm": 0.153413325548172, + "learning_rate": 3.993760269857471e-06, + "loss": 0.8668, + "step": 139010 + }, + { + "epoch": 1.006319355469174, + "grad_norm": 0.16846509277820587, + "learning_rate": 3.993687883196885e-06, + "loss": 0.8534, + "step": 139020 + }, + { + "epoch": 1.0063917421297603, + "grad_norm": 0.14701557159423828, + "learning_rate": 3.9936154965362986e-06, + "loss": 0.8597, + "step": 139030 + }, + { + "epoch": 1.0064641287903464, + "grad_norm": 0.15452063083648682, + "learning_rate": 3.993543109875713e-06, + "loss": 0.8734, + "step": 139040 + }, + { + "epoch": 1.0065365154509327, + "grad_norm": 0.14439797401428223, + "learning_rate": 3.993470723215126e-06, + "loss": 0.8762, + "step": 139050 + }, + { + "epoch": 1.006608902111519, + "grad_norm": 0.15302760899066925, + "learning_rate": 3.993398336554539e-06, + "loss": 0.8597, + "step": 139060 + }, + { + "epoch": 1.006681288772105, + "grad_norm": 0.1926603466272354, + "learning_rate": 3.993325949893954e-06, + "loss": 0.8524, + "step": 139070 + }, + { + "epoch": 1.0067536754326913, + "grad_norm": 0.15659856796264648, + "learning_rate": 3.9932535632333675e-06, + "loss": 0.8558, + "step": 139080 + }, + { + "epoch": 1.0068260620932774, + "grad_norm": 0.18837852776050568, + "learning_rate": 3.993181176572781e-06, + "loss": 0.8756, + "step": 139090 + }, + { + "epoch": 1.0068984487538637, + "grad_norm": 0.14618010818958282, + "learning_rate": 3.993108789912195e-06, + "loss": 0.8688, + "step": 139100 + }, + { + "epoch": 1.0069708354144498, + "grad_norm": 0.1539430171251297, + "learning_rate": 3.993036403251609e-06, + "loss": 0.8525, + "step": 139110 + }, + { + "epoch": 1.007043222075036, + "grad_norm": 0.1436515599489212, + "learning_rate": 3.992964016591023e-06, + "loss": 0.8657, + "step": 139120 + }, + { + "epoch": 1.0071156087356221, + "grad_norm": 0.15015138685703278, + "learning_rate": 3.992891629930436e-06, + "loss": 0.8606, + "step": 139130 + }, + { + "epoch": 1.0071879953962084, + "grad_norm": 0.15750154852867126, + "learning_rate": 3.99281924326985e-06, + "loss": 0.849, + "step": 139140 + }, + { + "epoch": 1.0072603820567945, + "grad_norm": 0.15625479817390442, + "learning_rate": 3.9927468566092645e-06, + "loss": 0.8614, + "step": 139150 + }, + { + "epoch": 1.0073327687173808, + "grad_norm": 0.1461600959300995, + "learning_rate": 3.992674469948678e-06, + "loss": 0.8485, + "step": 139160 + }, + { + "epoch": 1.007405155377967, + "grad_norm": 0.16277381777763367, + "learning_rate": 3.992602083288092e-06, + "loss": 0.8705, + "step": 139170 + }, + { + "epoch": 1.007477542038553, + "grad_norm": 0.3381327986717224, + "learning_rate": 3.992529696627505e-06, + "loss": 0.8686, + "step": 139180 + }, + { + "epoch": 1.0075499286991394, + "grad_norm": 0.14702801406383514, + "learning_rate": 3.99245730996692e-06, + "loss": 0.8526, + "step": 139190 + }, + { + "epoch": 1.0076223153597255, + "grad_norm": 0.15568077564239502, + "learning_rate": 3.992384923306333e-06, + "loss": 0.8497, + "step": 139200 + }, + { + "epoch": 1.0076947020203117, + "grad_norm": 0.16276511549949646, + "learning_rate": 3.992312536645747e-06, + "loss": 0.8634, + "step": 139210 + }, + { + "epoch": 1.0077670886808978, + "grad_norm": 0.1559554636478424, + "learning_rate": 3.992240149985161e-06, + "loss": 0.8703, + "step": 139220 + }, + { + "epoch": 1.007839475341484, + "grad_norm": 0.156097874045372, + "learning_rate": 3.992167763324575e-06, + "loss": 0.8721, + "step": 139230 + }, + { + "epoch": 1.0079118620020702, + "grad_norm": 0.1414407193660736, + "learning_rate": 3.992095376663989e-06, + "loss": 0.8661, + "step": 139240 + }, + { + "epoch": 1.0079842486626565, + "grad_norm": 0.15653790533542633, + "learning_rate": 3.992022990003402e-06, + "loss": 0.8547, + "step": 139250 + }, + { + "epoch": 1.0080566353232427, + "grad_norm": 0.15031273663043976, + "learning_rate": 3.991950603342816e-06, + "loss": 0.858, + "step": 139260 + }, + { + "epoch": 1.0081290219838288, + "grad_norm": 0.17239518463611603, + "learning_rate": 3.9918782166822304e-06, + "loss": 0.8548, + "step": 139270 + }, + { + "epoch": 1.008201408644415, + "grad_norm": 0.1503749042749405, + "learning_rate": 3.991805830021644e-06, + "loss": 0.8711, + "step": 139280 + }, + { + "epoch": 1.0082737953050012, + "grad_norm": 0.15691126883029938, + "learning_rate": 3.991733443361058e-06, + "loss": 0.8525, + "step": 139290 + }, + { + "epoch": 1.0083461819655875, + "grad_norm": 0.15124544501304626, + "learning_rate": 3.991661056700471e-06, + "loss": 0.8684, + "step": 139300 + }, + { + "epoch": 1.0084185686261735, + "grad_norm": 0.16640368103981018, + "learning_rate": 3.991588670039886e-06, + "loss": 0.8621, + "step": 139310 + }, + { + "epoch": 1.0084909552867598, + "grad_norm": 0.14733107388019562, + "learning_rate": 3.991516283379299e-06, + "loss": 0.8595, + "step": 139320 + }, + { + "epoch": 1.0085633419473459, + "grad_norm": 0.14905431866645813, + "learning_rate": 3.991443896718713e-06, + "loss": 0.8636, + "step": 139330 + }, + { + "epoch": 1.0086357286079322, + "grad_norm": 0.16022346913814545, + "learning_rate": 3.991371510058127e-06, + "loss": 0.8564, + "step": 139340 + }, + { + "epoch": 1.0087081152685182, + "grad_norm": 0.14993324875831604, + "learning_rate": 3.991299123397541e-06, + "loss": 0.8704, + "step": 139350 + }, + { + "epoch": 1.0087805019291045, + "grad_norm": 0.14608387649059296, + "learning_rate": 3.991226736736955e-06, + "loss": 0.848, + "step": 139360 + }, + { + "epoch": 1.0088528885896908, + "grad_norm": 0.14675495028495789, + "learning_rate": 3.991154350076368e-06, + "loss": 0.8736, + "step": 139370 + }, + { + "epoch": 1.0089252752502769, + "grad_norm": 0.13962878286838531, + "learning_rate": 3.991081963415782e-06, + "loss": 0.8634, + "step": 139380 + }, + { + "epoch": 1.0089976619108632, + "grad_norm": 0.1584140658378601, + "learning_rate": 3.991009576755196e-06, + "loss": 0.8708, + "step": 139390 + }, + { + "epoch": 1.0090700485714492, + "grad_norm": 0.1543683111667633, + "learning_rate": 3.99093719009461e-06, + "loss": 0.8651, + "step": 139400 + }, + { + "epoch": 1.0091424352320355, + "grad_norm": 0.1464136689901352, + "learning_rate": 3.990864803434024e-06, + "loss": 0.8693, + "step": 139410 + }, + { + "epoch": 1.0092148218926216, + "grad_norm": 0.1557295322418213, + "learning_rate": 3.990792416773437e-06, + "loss": 0.8609, + "step": 139420 + }, + { + "epoch": 1.0092872085532079, + "grad_norm": 0.14724186062812805, + "learning_rate": 3.990720030112851e-06, + "loss": 0.8753, + "step": 139430 + }, + { + "epoch": 1.009359595213794, + "grad_norm": 0.1482950747013092, + "learning_rate": 3.990647643452265e-06, + "loss": 0.8607, + "step": 139440 + }, + { + "epoch": 1.0094319818743802, + "grad_norm": 0.15687072277069092, + "learning_rate": 3.990575256791679e-06, + "loss": 0.857, + "step": 139450 + }, + { + "epoch": 1.0095043685349663, + "grad_norm": 0.15880662202835083, + "learning_rate": 3.9905028701310925e-06, + "loss": 0.8595, + "step": 139460 + }, + { + "epoch": 1.0095767551955526, + "grad_norm": 0.14912816882133484, + "learning_rate": 3.990430483470506e-06, + "loss": 0.8613, + "step": 139470 + }, + { + "epoch": 1.0096491418561389, + "grad_norm": 0.15200045704841614, + "learning_rate": 3.990358096809921e-06, + "loss": 0.8597, + "step": 139480 + }, + { + "epoch": 1.009721528516725, + "grad_norm": 0.1544422060251236, + "learning_rate": 3.990285710149334e-06, + "loss": 0.858, + "step": 139490 + }, + { + "epoch": 1.0097939151773112, + "grad_norm": 0.16103899478912354, + "learning_rate": 3.990213323488748e-06, + "loss": 0.8596, + "step": 139500 + }, + { + "epoch": 1.0098663018378973, + "grad_norm": 0.1505272537469864, + "learning_rate": 3.9901409368281615e-06, + "loss": 0.859, + "step": 139510 + }, + { + "epoch": 1.0099386884984836, + "grad_norm": 0.16239230334758759, + "learning_rate": 3.990068550167576e-06, + "loss": 0.8547, + "step": 139520 + }, + { + "epoch": 1.0100110751590696, + "grad_norm": 0.15705235302448273, + "learning_rate": 3.9899961635069895e-06, + "loss": 0.8604, + "step": 139530 + }, + { + "epoch": 1.010083461819656, + "grad_norm": 0.15264788269996643, + "learning_rate": 3.989923776846403e-06, + "loss": 0.8729, + "step": 139540 + }, + { + "epoch": 1.010155848480242, + "grad_norm": 0.15049117803573608, + "learning_rate": 3.989851390185817e-06, + "loss": 0.8613, + "step": 139550 + }, + { + "epoch": 1.0102282351408283, + "grad_norm": 0.17415769398212433, + "learning_rate": 3.989779003525231e-06, + "loss": 0.8613, + "step": 139560 + }, + { + "epoch": 1.0103006218014143, + "grad_norm": 0.151180699467659, + "learning_rate": 3.989706616864645e-06, + "loss": 0.8631, + "step": 139570 + }, + { + "epoch": 1.0103730084620006, + "grad_norm": 0.14194343984127045, + "learning_rate": 3.9896342302040585e-06, + "loss": 0.8597, + "step": 139580 + }, + { + "epoch": 1.010445395122587, + "grad_norm": 0.15992465615272522, + "learning_rate": 3.989561843543472e-06, + "loss": 0.8636, + "step": 139590 + }, + { + "epoch": 1.010517781783173, + "grad_norm": 0.15397891402244568, + "learning_rate": 3.989489456882886e-06, + "loss": 0.8599, + "step": 139600 + }, + { + "epoch": 1.0105901684437593, + "grad_norm": 0.16052521765232086, + "learning_rate": 3.989417070222299e-06, + "loss": 0.8576, + "step": 139610 + }, + { + "epoch": 1.0106625551043453, + "grad_norm": 0.15102127194404602, + "learning_rate": 3.989344683561713e-06, + "loss": 0.872, + "step": 139620 + }, + { + "epoch": 1.0107349417649316, + "grad_norm": 0.1572069674730301, + "learning_rate": 3.989272296901127e-06, + "loss": 0.8688, + "step": 139630 + }, + { + "epoch": 1.0108073284255177, + "grad_norm": 0.14729195833206177, + "learning_rate": 3.989199910240541e-06, + "loss": 0.8668, + "step": 139640 + }, + { + "epoch": 1.010879715086104, + "grad_norm": 0.16329136490821838, + "learning_rate": 3.989127523579955e-06, + "loss": 0.8643, + "step": 139650 + }, + { + "epoch": 1.01095210174669, + "grad_norm": 0.15864083170890808, + "learning_rate": 3.989055136919368e-06, + "loss": 0.8686, + "step": 139660 + }, + { + "epoch": 1.0110244884072763, + "grad_norm": 0.1520940214395523, + "learning_rate": 3.988982750258783e-06, + "loss": 0.8573, + "step": 139670 + }, + { + "epoch": 1.0110968750678624, + "grad_norm": 0.2237161546945572, + "learning_rate": 3.988910363598196e-06, + "loss": 0.8685, + "step": 139680 + }, + { + "epoch": 1.0111692617284487, + "grad_norm": 0.15291093289852142, + "learning_rate": 3.98883797693761e-06, + "loss": 0.8554, + "step": 139690 + }, + { + "epoch": 1.011241648389035, + "grad_norm": 0.1433836966753006, + "learning_rate": 3.9887655902770235e-06, + "loss": 0.8553, + "step": 139700 + }, + { + "epoch": 1.011314035049621, + "grad_norm": 0.15727286040782928, + "learning_rate": 3.988693203616438e-06, + "loss": 0.8692, + "step": 139710 + }, + { + "epoch": 1.0113864217102073, + "grad_norm": 0.14590954780578613, + "learning_rate": 3.988620816955852e-06, + "loss": 0.8654, + "step": 139720 + }, + { + "epoch": 1.0114588083707934, + "grad_norm": 0.15376925468444824, + "learning_rate": 3.988548430295265e-06, + "loss": 0.8614, + "step": 139730 + }, + { + "epoch": 1.0115311950313797, + "grad_norm": 0.14909863471984863, + "learning_rate": 3.988476043634679e-06, + "loss": 0.8432, + "step": 139740 + }, + { + "epoch": 1.0116035816919657, + "grad_norm": 0.15601171553134918, + "learning_rate": 3.988403656974093e-06, + "loss": 0.8543, + "step": 139750 + }, + { + "epoch": 1.011675968352552, + "grad_norm": 0.1624634563922882, + "learning_rate": 3.988331270313507e-06, + "loss": 0.8587, + "step": 139760 + }, + { + "epoch": 1.011748355013138, + "grad_norm": 0.15104293823242188, + "learning_rate": 3.9882588836529206e-06, + "loss": 0.8621, + "step": 139770 + }, + { + "epoch": 1.0118207416737244, + "grad_norm": 0.15875457227230072, + "learning_rate": 3.988186496992334e-06, + "loss": 0.8702, + "step": 139780 + }, + { + "epoch": 1.0118931283343104, + "grad_norm": 0.14369283616542816, + "learning_rate": 3.988114110331749e-06, + "loss": 0.8654, + "step": 139790 + }, + { + "epoch": 1.0119655149948967, + "grad_norm": 0.15194371342658997, + "learning_rate": 3.988041723671162e-06, + "loss": 0.8584, + "step": 139800 + }, + { + "epoch": 1.012037901655483, + "grad_norm": 0.18892666697502136, + "learning_rate": 3.987969337010576e-06, + "loss": 0.8606, + "step": 139810 + }, + { + "epoch": 1.012110288316069, + "grad_norm": 0.16798314452171326, + "learning_rate": 3.9878969503499895e-06, + "loss": 0.8621, + "step": 139820 + }, + { + "epoch": 1.0121826749766554, + "grad_norm": 0.15889109671115875, + "learning_rate": 3.987824563689404e-06, + "loss": 0.8675, + "step": 139830 + }, + { + "epoch": 1.0122550616372414, + "grad_norm": 0.14668938517570496, + "learning_rate": 3.9877521770288176e-06, + "loss": 0.8611, + "step": 139840 + }, + { + "epoch": 1.0123274482978277, + "grad_norm": 0.15897607803344727, + "learning_rate": 3.987679790368231e-06, + "loss": 0.8696, + "step": 139850 + }, + { + "epoch": 1.0123998349584138, + "grad_norm": 0.14037030935287476, + "learning_rate": 3.987607403707645e-06, + "loss": 0.8669, + "step": 139860 + }, + { + "epoch": 1.012472221619, + "grad_norm": 0.161557137966156, + "learning_rate": 3.987535017047059e-06, + "loss": 0.8397, + "step": 139870 + }, + { + "epoch": 1.0125446082795861, + "grad_norm": 0.14961223304271698, + "learning_rate": 3.987462630386473e-06, + "loss": 0.8637, + "step": 139880 + }, + { + "epoch": 1.0126169949401724, + "grad_norm": 0.16417111456394196, + "learning_rate": 3.9873902437258865e-06, + "loss": 0.8535, + "step": 139890 + }, + { + "epoch": 1.0126893816007587, + "grad_norm": 0.1531134694814682, + "learning_rate": 3.9873178570653e-06, + "loss": 0.8662, + "step": 139900 + }, + { + "epoch": 1.0127617682613448, + "grad_norm": 0.16314208507537842, + "learning_rate": 3.9872454704047146e-06, + "loss": 0.874, + "step": 139910 + }, + { + "epoch": 1.012834154921931, + "grad_norm": 0.1492464244365692, + "learning_rate": 3.987173083744128e-06, + "loss": 0.8594, + "step": 139920 + }, + { + "epoch": 1.0129065415825171, + "grad_norm": 0.1546127200126648, + "learning_rate": 3.987100697083542e-06, + "loss": 0.8683, + "step": 139930 + }, + { + "epoch": 1.0129789282431034, + "grad_norm": 0.23250380158424377, + "learning_rate": 3.987028310422955e-06, + "loss": 0.8602, + "step": 139940 + }, + { + "epoch": 1.0130513149036895, + "grad_norm": 0.15371747314929962, + "learning_rate": 3.98695592376237e-06, + "loss": 0.8577, + "step": 139950 + }, + { + "epoch": 1.0131237015642758, + "grad_norm": 0.15039215981960297, + "learning_rate": 3.9868835371017835e-06, + "loss": 0.8775, + "step": 139960 + }, + { + "epoch": 1.0131960882248618, + "grad_norm": 0.14372408390045166, + "learning_rate": 3.986811150441197e-06, + "loss": 0.8603, + "step": 139970 + }, + { + "epoch": 1.0132684748854481, + "grad_norm": 0.18086668848991394, + "learning_rate": 3.986738763780611e-06, + "loss": 0.8697, + "step": 139980 + }, + { + "epoch": 1.0133408615460342, + "grad_norm": 0.16421173512935638, + "learning_rate": 3.986666377120025e-06, + "loss": 0.8727, + "step": 139990 + }, + { + "epoch": 1.0134132482066205, + "grad_norm": 0.149836003780365, + "learning_rate": 3.986593990459439e-06, + "loss": 0.8633, + "step": 140000 + }, + { + "epoch": 1.0134856348672068, + "grad_norm": 0.15972940623760223, + "learning_rate": 3.9865216037988524e-06, + "loss": 0.8598, + "step": 140010 + }, + { + "epoch": 1.0135580215277928, + "grad_norm": 0.15127001702785492, + "learning_rate": 3.986449217138266e-06, + "loss": 0.8639, + "step": 140020 + }, + { + "epoch": 1.0136304081883791, + "grad_norm": 0.1499137580394745, + "learning_rate": 3.9863768304776805e-06, + "loss": 0.8712, + "step": 140030 + }, + { + "epoch": 1.0137027948489652, + "grad_norm": 0.1619751751422882, + "learning_rate": 3.986304443817094e-06, + "loss": 0.8751, + "step": 140040 + }, + { + "epoch": 1.0137751815095515, + "grad_norm": 0.15871897339820862, + "learning_rate": 3.986232057156508e-06, + "loss": 0.8627, + "step": 140050 + }, + { + "epoch": 1.0138475681701375, + "grad_norm": 0.15638139843940735, + "learning_rate": 3.986159670495921e-06, + "loss": 0.8587, + "step": 140060 + }, + { + "epoch": 1.0139199548307238, + "grad_norm": 0.1525679975748062, + "learning_rate": 3.986087283835335e-06, + "loss": 0.8644, + "step": 140070 + }, + { + "epoch": 1.01399234149131, + "grad_norm": 0.14840306341648102, + "learning_rate": 3.9860148971747494e-06, + "loss": 0.8653, + "step": 140080 + }, + { + "epoch": 1.0140647281518962, + "grad_norm": 0.15386714041233063, + "learning_rate": 3.985942510514163e-06, + "loss": 0.8709, + "step": 140090 + }, + { + "epoch": 1.0141371148124823, + "grad_norm": 0.17089219391345978, + "learning_rate": 3.985870123853577e-06, + "loss": 0.8653, + "step": 140100 + }, + { + "epoch": 1.0142095014730685, + "grad_norm": 0.16779905557632446, + "learning_rate": 3.98579773719299e-06, + "loss": 0.8503, + "step": 140110 + }, + { + "epoch": 1.0142818881336548, + "grad_norm": 0.1514463871717453, + "learning_rate": 3.985725350532404e-06, + "loss": 0.8604, + "step": 140120 + }, + { + "epoch": 1.014354274794241, + "grad_norm": 0.15929542481899261, + "learning_rate": 3.9856529638718175e-06, + "loss": 0.87, + "step": 140130 + }, + { + "epoch": 1.0144266614548272, + "grad_norm": 0.1467376947402954, + "learning_rate": 3.985580577211232e-06, + "loss": 0.8597, + "step": 140140 + }, + { + "epoch": 1.0144990481154132, + "grad_norm": 0.14556573331356049, + "learning_rate": 3.985508190550646e-06, + "loss": 0.8666, + "step": 140150 + }, + { + "epoch": 1.0145714347759995, + "grad_norm": 0.15759846568107605, + "learning_rate": 3.985435803890059e-06, + "loss": 0.8605, + "step": 140160 + }, + { + "epoch": 1.0146438214365856, + "grad_norm": 0.16892042756080627, + "learning_rate": 3.985363417229473e-06, + "loss": 0.8745, + "step": 140170 + }, + { + "epoch": 1.014716208097172, + "grad_norm": 0.16491712629795074, + "learning_rate": 3.985291030568887e-06, + "loss": 0.863, + "step": 140180 + }, + { + "epoch": 1.014788594757758, + "grad_norm": 0.14999713003635406, + "learning_rate": 3.985218643908301e-06, + "loss": 0.8698, + "step": 140190 + }, + { + "epoch": 1.0148609814183442, + "grad_norm": 0.15313737094402313, + "learning_rate": 3.9851462572477145e-06, + "loss": 0.8486, + "step": 140200 + }, + { + "epoch": 1.0149333680789303, + "grad_norm": 0.15523867309093475, + "learning_rate": 3.985073870587128e-06, + "loss": 0.8521, + "step": 140210 + }, + { + "epoch": 1.0150057547395166, + "grad_norm": 1.109740972518921, + "learning_rate": 3.985001483926543e-06, + "loss": 0.8812, + "step": 140220 + }, + { + "epoch": 1.0150781414001029, + "grad_norm": 0.15085531771183014, + "learning_rate": 3.984929097265956e-06, + "loss": 0.8549, + "step": 140230 + }, + { + "epoch": 1.015150528060689, + "grad_norm": 0.17226199805736542, + "learning_rate": 3.98485671060537e-06, + "loss": 0.8643, + "step": 140240 + }, + { + "epoch": 1.0152229147212752, + "grad_norm": 0.15006664395332336, + "learning_rate": 3.9847843239447835e-06, + "loss": 0.8721, + "step": 140250 + }, + { + "epoch": 1.0152953013818613, + "grad_norm": 0.1507822424173355, + "learning_rate": 3.984711937284197e-06, + "loss": 0.8708, + "step": 140260 + }, + { + "epoch": 1.0153676880424476, + "grad_norm": 0.1650809794664383, + "learning_rate": 3.9846395506236115e-06, + "loss": 0.8617, + "step": 140270 + }, + { + "epoch": 1.0154400747030337, + "grad_norm": 0.14646883308887482, + "learning_rate": 3.984567163963025e-06, + "loss": 0.8711, + "step": 140280 + }, + { + "epoch": 1.01551246136362, + "grad_norm": 0.16238850355148315, + "learning_rate": 3.984494777302439e-06, + "loss": 0.8666, + "step": 140290 + }, + { + "epoch": 1.015584848024206, + "grad_norm": 0.15635709464550018, + "learning_rate": 3.984422390641852e-06, + "loss": 0.8715, + "step": 140300 + }, + { + "epoch": 1.0156572346847923, + "grad_norm": 0.15769830346107483, + "learning_rate": 3.984350003981267e-06, + "loss": 0.8673, + "step": 140310 + }, + { + "epoch": 1.0157296213453786, + "grad_norm": 0.1550094187259674, + "learning_rate": 3.9842776173206805e-06, + "loss": 0.8631, + "step": 140320 + }, + { + "epoch": 1.0158020080059647, + "grad_norm": 0.16687199473381042, + "learning_rate": 3.984205230660094e-06, + "loss": 0.8557, + "step": 140330 + }, + { + "epoch": 1.015874394666551, + "grad_norm": 0.19312235713005066, + "learning_rate": 3.984132843999508e-06, + "loss": 0.868, + "step": 140340 + }, + { + "epoch": 1.015946781327137, + "grad_norm": 0.15332721173763275, + "learning_rate": 3.984060457338922e-06, + "loss": 0.8644, + "step": 140350 + }, + { + "epoch": 1.0160191679877233, + "grad_norm": 0.15439341962337494, + "learning_rate": 3.983988070678336e-06, + "loss": 0.8557, + "step": 140360 + }, + { + "epoch": 1.0160915546483094, + "grad_norm": 0.140192449092865, + "learning_rate": 3.983915684017749e-06, + "loss": 0.864, + "step": 140370 + }, + { + "epoch": 1.0161639413088956, + "grad_norm": 0.15483497083187103, + "learning_rate": 3.983843297357163e-06, + "loss": 0.8618, + "step": 140380 + }, + { + "epoch": 1.0162363279694817, + "grad_norm": 0.15083526074886322, + "learning_rate": 3.9837709106965775e-06, + "loss": 0.8493, + "step": 140390 + }, + { + "epoch": 1.016308714630068, + "grad_norm": 0.15921801328659058, + "learning_rate": 3.983698524035991e-06, + "loss": 0.8575, + "step": 140400 + }, + { + "epoch": 1.016381101290654, + "grad_norm": 0.15354293584823608, + "learning_rate": 3.983626137375405e-06, + "loss": 0.8575, + "step": 140410 + }, + { + "epoch": 1.0164534879512404, + "grad_norm": 0.159376859664917, + "learning_rate": 3.983553750714818e-06, + "loss": 0.8627, + "step": 140420 + }, + { + "epoch": 1.0165258746118266, + "grad_norm": 0.16422039270401, + "learning_rate": 3.983481364054233e-06, + "loss": 0.8659, + "step": 140430 + }, + { + "epoch": 1.0165982612724127, + "grad_norm": 0.16659992933273315, + "learning_rate": 3.983408977393646e-06, + "loss": 0.8698, + "step": 140440 + }, + { + "epoch": 1.016670647932999, + "grad_norm": 0.155796617269516, + "learning_rate": 3.98333659073306e-06, + "loss": 0.8626, + "step": 140450 + }, + { + "epoch": 1.016743034593585, + "grad_norm": 0.15610454976558685, + "learning_rate": 3.983264204072474e-06, + "loss": 0.8471, + "step": 140460 + }, + { + "epoch": 1.0168154212541713, + "grad_norm": 0.17221376299858093, + "learning_rate": 3.983191817411888e-06, + "loss": 0.8756, + "step": 140470 + }, + { + "epoch": 1.0168878079147574, + "grad_norm": 0.1467757523059845, + "learning_rate": 3.983119430751302e-06, + "loss": 0.8566, + "step": 140480 + }, + { + "epoch": 1.0169601945753437, + "grad_norm": 0.14400814473628998, + "learning_rate": 3.983047044090715e-06, + "loss": 0.8487, + "step": 140490 + }, + { + "epoch": 1.0170325812359298, + "grad_norm": 0.15027961134910583, + "learning_rate": 3.982974657430129e-06, + "loss": 0.8633, + "step": 140500 + }, + { + "epoch": 1.017104967896516, + "grad_norm": 0.14819438755512238, + "learning_rate": 3.982902270769543e-06, + "loss": 0.854, + "step": 140510 + }, + { + "epoch": 1.0171773545571021, + "grad_norm": 0.1465308964252472, + "learning_rate": 3.982829884108957e-06, + "loss": 0.8568, + "step": 140520 + }, + { + "epoch": 1.0172497412176884, + "grad_norm": 0.32729414105415344, + "learning_rate": 3.982757497448371e-06, + "loss": 0.8546, + "step": 140530 + }, + { + "epoch": 1.0173221278782747, + "grad_norm": 0.15239901840686798, + "learning_rate": 3.982685110787784e-06, + "loss": 0.8547, + "step": 140540 + }, + { + "epoch": 1.0173945145388608, + "grad_norm": 0.15983672440052032, + "learning_rate": 3.982612724127199e-06, + "loss": 0.8539, + "step": 140550 + }, + { + "epoch": 1.017466901199447, + "grad_norm": 0.15606176853179932, + "learning_rate": 3.982540337466612e-06, + "loss": 0.8574, + "step": 140560 + }, + { + "epoch": 1.0175392878600331, + "grad_norm": 0.1544933319091797, + "learning_rate": 3.982467950806026e-06, + "loss": 0.8641, + "step": 140570 + }, + { + "epoch": 1.0176116745206194, + "grad_norm": 0.14850009977817535, + "learning_rate": 3.9823955641454396e-06, + "loss": 0.8581, + "step": 140580 + }, + { + "epoch": 1.0176840611812055, + "grad_norm": 0.16443096101284027, + "learning_rate": 3.982323177484854e-06, + "loss": 0.853, + "step": 140590 + }, + { + "epoch": 1.0177564478417918, + "grad_norm": 0.15893018245697021, + "learning_rate": 3.982250790824268e-06, + "loss": 0.8707, + "step": 140600 + }, + { + "epoch": 1.0178288345023778, + "grad_norm": 0.16136851906776428, + "learning_rate": 3.982178404163681e-06, + "loss": 0.8528, + "step": 140610 + }, + { + "epoch": 1.017901221162964, + "grad_norm": 0.14998020231723785, + "learning_rate": 3.982106017503095e-06, + "loss": 0.8641, + "step": 140620 + }, + { + "epoch": 1.0179736078235502, + "grad_norm": 0.16709375381469727, + "learning_rate": 3.982033630842509e-06, + "loss": 0.8502, + "step": 140630 + }, + { + "epoch": 1.0180459944841365, + "grad_norm": 0.15164078772068024, + "learning_rate": 3.981961244181923e-06, + "loss": 0.8566, + "step": 140640 + }, + { + "epoch": 1.0181183811447228, + "grad_norm": 0.16210389137268066, + "learning_rate": 3.981888857521336e-06, + "loss": 0.8707, + "step": 140650 + }, + { + "epoch": 1.0181907678053088, + "grad_norm": 0.15444253385066986, + "learning_rate": 3.98181647086075e-06, + "loss": 0.8628, + "step": 140660 + }, + { + "epoch": 1.018263154465895, + "grad_norm": 0.15437939763069153, + "learning_rate": 3.981744084200164e-06, + "loss": 0.8504, + "step": 140670 + }, + { + "epoch": 1.0183355411264812, + "grad_norm": 0.15111126005649567, + "learning_rate": 3.981671697539577e-06, + "loss": 0.8609, + "step": 140680 + }, + { + "epoch": 1.0184079277870675, + "grad_norm": 0.1448373794555664, + "learning_rate": 3.981599310878991e-06, + "loss": 0.8588, + "step": 140690 + }, + { + "epoch": 1.0184803144476535, + "grad_norm": 0.14929819107055664, + "learning_rate": 3.9815269242184055e-06, + "loss": 0.868, + "step": 140700 + }, + { + "epoch": 1.0185527011082398, + "grad_norm": 0.1808697134256363, + "learning_rate": 3.981454537557819e-06, + "loss": 0.8635, + "step": 140710 + }, + { + "epoch": 1.0186250877688259, + "grad_norm": 0.1492968201637268, + "learning_rate": 3.981382150897233e-06, + "loss": 0.8852, + "step": 140720 + }, + { + "epoch": 1.0186974744294122, + "grad_norm": 0.15429550409317017, + "learning_rate": 3.981309764236646e-06, + "loss": 0.87, + "step": 140730 + }, + { + "epoch": 1.0187698610899982, + "grad_norm": 0.1998952180147171, + "learning_rate": 3.981237377576061e-06, + "loss": 0.8681, + "step": 140740 + }, + { + "epoch": 1.0188422477505845, + "grad_norm": 0.14868871867656708, + "learning_rate": 3.9811649909154744e-06, + "loss": 0.8709, + "step": 140750 + }, + { + "epoch": 1.0189146344111708, + "grad_norm": 0.16644510626792908, + "learning_rate": 3.981092604254888e-06, + "loss": 0.8687, + "step": 140760 + }, + { + "epoch": 1.0189870210717569, + "grad_norm": 0.14398843050003052, + "learning_rate": 3.981020217594302e-06, + "loss": 0.8643, + "step": 140770 + }, + { + "epoch": 1.0190594077323432, + "grad_norm": 0.14528527855873108, + "learning_rate": 3.980947830933716e-06, + "loss": 0.8783, + "step": 140780 + }, + { + "epoch": 1.0191317943929292, + "grad_norm": 0.15322700142860413, + "learning_rate": 3.98087544427313e-06, + "loss": 0.8527, + "step": 140790 + }, + { + "epoch": 1.0192041810535155, + "grad_norm": 0.1468924880027771, + "learning_rate": 3.980803057612543e-06, + "loss": 0.8566, + "step": 140800 + }, + { + "epoch": 1.0192765677141016, + "grad_norm": 0.1553269475698471, + "learning_rate": 3.980730670951957e-06, + "loss": 0.8583, + "step": 140810 + }, + { + "epoch": 1.0193489543746879, + "grad_norm": 0.14764392375946045, + "learning_rate": 3.9806582842913714e-06, + "loss": 0.8567, + "step": 140820 + }, + { + "epoch": 1.019421341035274, + "grad_norm": 0.16374526917934418, + "learning_rate": 3.980585897630785e-06, + "loss": 0.8495, + "step": 140830 + }, + { + "epoch": 1.0194937276958602, + "grad_norm": 0.1522345393896103, + "learning_rate": 3.980513510970199e-06, + "loss": 0.8623, + "step": 140840 + }, + { + "epoch": 1.0195661143564463, + "grad_norm": 0.16136972606182098, + "learning_rate": 3.980441124309612e-06, + "loss": 0.8658, + "step": 140850 + }, + { + "epoch": 1.0196385010170326, + "grad_norm": 0.15984009206295013, + "learning_rate": 3.980368737649026e-06, + "loss": 0.8635, + "step": 140860 + }, + { + "epoch": 1.0197108876776189, + "grad_norm": 0.14360426366329193, + "learning_rate": 3.98029635098844e-06, + "loss": 0.8678, + "step": 140870 + }, + { + "epoch": 1.019783274338205, + "grad_norm": 0.17367321252822876, + "learning_rate": 3.980223964327854e-06, + "loss": 0.869, + "step": 140880 + }, + { + "epoch": 1.0198556609987912, + "grad_norm": 0.14711663126945496, + "learning_rate": 3.980151577667268e-06, + "loss": 0.8577, + "step": 140890 + }, + { + "epoch": 1.0199280476593773, + "grad_norm": 0.15861208736896515, + "learning_rate": 3.980079191006681e-06, + "loss": 0.8562, + "step": 140900 + }, + { + "epoch": 1.0200004343199636, + "grad_norm": 0.15030620992183685, + "learning_rate": 3.980006804346096e-06, + "loss": 0.863, + "step": 140910 + }, + { + "epoch": 1.0200728209805496, + "grad_norm": 0.14951910078525543, + "learning_rate": 3.979934417685509e-06, + "loss": 0.8614, + "step": 140920 + }, + { + "epoch": 1.020145207641136, + "grad_norm": 0.1687590330839157, + "learning_rate": 3.979862031024923e-06, + "loss": 0.878, + "step": 140930 + }, + { + "epoch": 1.020217594301722, + "grad_norm": 0.17207682132720947, + "learning_rate": 3.9797896443643365e-06, + "loss": 0.8576, + "step": 140940 + }, + { + "epoch": 1.0202899809623083, + "grad_norm": 0.14084044098854065, + "learning_rate": 3.979717257703751e-06, + "loss": 0.8559, + "step": 140950 + }, + { + "epoch": 1.0203623676228946, + "grad_norm": 0.19523248076438904, + "learning_rate": 3.979644871043165e-06, + "loss": 0.8645, + "step": 140960 + }, + { + "epoch": 1.0204347542834806, + "grad_norm": 0.14832082390785217, + "learning_rate": 3.979572484382578e-06, + "loss": 0.8652, + "step": 140970 + }, + { + "epoch": 1.020507140944067, + "grad_norm": 0.21129783987998962, + "learning_rate": 3.979500097721992e-06, + "loss": 0.8643, + "step": 140980 + }, + { + "epoch": 1.020579527604653, + "grad_norm": 0.16113729774951935, + "learning_rate": 3.979427711061406e-06, + "loss": 0.8615, + "step": 140990 + }, + { + "epoch": 1.0206519142652393, + "grad_norm": 0.15041108429431915, + "learning_rate": 3.97935532440082e-06, + "loss": 0.8556, + "step": 141000 + }, + { + "epoch": 1.0207243009258253, + "grad_norm": 0.1588127315044403, + "learning_rate": 3.9792829377402335e-06, + "loss": 0.8617, + "step": 141010 + }, + { + "epoch": 1.0207966875864116, + "grad_norm": 0.1841687262058258, + "learning_rate": 3.979210551079647e-06, + "loss": 0.8581, + "step": 141020 + }, + { + "epoch": 1.0208690742469977, + "grad_norm": 0.1596136838197708, + "learning_rate": 3.979138164419062e-06, + "loss": 0.87, + "step": 141030 + }, + { + "epoch": 1.020941460907584, + "grad_norm": 0.15191321074962616, + "learning_rate": 3.979065777758475e-06, + "loss": 0.865, + "step": 141040 + }, + { + "epoch": 1.02101384756817, + "grad_norm": 0.14728322625160217, + "learning_rate": 3.978993391097889e-06, + "loss": 0.8643, + "step": 141050 + }, + { + "epoch": 1.0210862342287563, + "grad_norm": 0.15328173339366913, + "learning_rate": 3.9789210044373025e-06, + "loss": 0.8609, + "step": 141060 + }, + { + "epoch": 1.0211586208893426, + "grad_norm": 0.14939600229263306, + "learning_rate": 3.978848617776717e-06, + "loss": 0.8711, + "step": 141070 + }, + { + "epoch": 1.0212310075499287, + "grad_norm": 0.16299229860305786, + "learning_rate": 3.9787762311161305e-06, + "loss": 0.8558, + "step": 141080 + }, + { + "epoch": 1.021303394210515, + "grad_norm": 0.14965036511421204, + "learning_rate": 3.978703844455544e-06, + "loss": 0.8579, + "step": 141090 + }, + { + "epoch": 1.021375780871101, + "grad_norm": 0.1521276980638504, + "learning_rate": 3.978631457794958e-06, + "loss": 0.8616, + "step": 141100 + }, + { + "epoch": 1.0214481675316873, + "grad_norm": 0.17580242455005646, + "learning_rate": 3.978559071134372e-06, + "loss": 0.8586, + "step": 141110 + }, + { + "epoch": 1.0215205541922734, + "grad_norm": 0.15669845044612885, + "learning_rate": 3.978486684473786e-06, + "loss": 0.8737, + "step": 141120 + }, + { + "epoch": 1.0215929408528597, + "grad_norm": 0.1495697796344757, + "learning_rate": 3.9784142978131995e-06, + "loss": 0.8624, + "step": 141130 + }, + { + "epoch": 1.0216653275134457, + "grad_norm": 0.16285309195518494, + "learning_rate": 3.978341911152613e-06, + "loss": 0.8776, + "step": 141140 + }, + { + "epoch": 1.021737714174032, + "grad_norm": 0.14582541584968567, + "learning_rate": 3.9782695244920275e-06, + "loss": 0.8666, + "step": 141150 + }, + { + "epoch": 1.021810100834618, + "grad_norm": 0.15949222445487976, + "learning_rate": 3.978197137831441e-06, + "loss": 0.8504, + "step": 141160 + }, + { + "epoch": 1.0218824874952044, + "grad_norm": 0.1575903445482254, + "learning_rate": 3.978124751170855e-06, + "loss": 0.8695, + "step": 141170 + }, + { + "epoch": 1.0219548741557907, + "grad_norm": 0.17722275853157043, + "learning_rate": 3.978052364510268e-06, + "loss": 0.8657, + "step": 141180 + }, + { + "epoch": 1.0220272608163767, + "grad_norm": 0.14309902489185333, + "learning_rate": 3.977979977849682e-06, + "loss": 0.8619, + "step": 141190 + }, + { + "epoch": 1.022099647476963, + "grad_norm": 0.14356312155723572, + "learning_rate": 3.977907591189096e-06, + "loss": 0.8674, + "step": 141200 + }, + { + "epoch": 1.022172034137549, + "grad_norm": 0.15702299773693085, + "learning_rate": 3.977835204528509e-06, + "loss": 0.8666, + "step": 141210 + }, + { + "epoch": 1.0222444207981354, + "grad_norm": 0.16147883236408234, + "learning_rate": 3.977762817867924e-06, + "loss": 0.8675, + "step": 141220 + }, + { + "epoch": 1.0223168074587214, + "grad_norm": 0.1556180715560913, + "learning_rate": 3.977690431207337e-06, + "loss": 0.8506, + "step": 141230 + }, + { + "epoch": 1.0223891941193077, + "grad_norm": 0.17178305983543396, + "learning_rate": 3.977618044546751e-06, + "loss": 0.8649, + "step": 141240 + }, + { + "epoch": 1.0224615807798938, + "grad_norm": 0.16344556212425232, + "learning_rate": 3.9775456578861646e-06, + "loss": 0.8561, + "step": 141250 + }, + { + "epoch": 1.02253396744048, + "grad_norm": 0.20177888870239258, + "learning_rate": 3.977473271225579e-06, + "loss": 0.8556, + "step": 141260 + }, + { + "epoch": 1.0226063541010662, + "grad_norm": 0.16756558418273926, + "learning_rate": 3.977400884564993e-06, + "loss": 0.8527, + "step": 141270 + }, + { + "epoch": 1.0226787407616524, + "grad_norm": 0.14939232170581818, + "learning_rate": 3.977328497904406e-06, + "loss": 0.8596, + "step": 141280 + }, + { + "epoch": 1.0227511274222387, + "grad_norm": 0.14247731864452362, + "learning_rate": 3.97725611124382e-06, + "loss": 0.8656, + "step": 141290 + }, + { + "epoch": 1.0228235140828248, + "grad_norm": 0.18383385241031647, + "learning_rate": 3.977183724583234e-06, + "loss": 0.86, + "step": 141300 + }, + { + "epoch": 1.022895900743411, + "grad_norm": 0.16358555853366852, + "learning_rate": 3.977111337922648e-06, + "loss": 0.873, + "step": 141310 + }, + { + "epoch": 1.0229682874039971, + "grad_norm": 0.1494084596633911, + "learning_rate": 3.9770389512620616e-06, + "loss": 0.8498, + "step": 141320 + }, + { + "epoch": 1.0230406740645834, + "grad_norm": 0.14796346426010132, + "learning_rate": 3.976966564601475e-06, + "loss": 0.8665, + "step": 141330 + }, + { + "epoch": 1.0231130607251695, + "grad_norm": 0.2109985500574112, + "learning_rate": 3.97689417794089e-06, + "loss": 0.8533, + "step": 141340 + }, + { + "epoch": 1.0231854473857558, + "grad_norm": 0.14924803376197815, + "learning_rate": 3.976821791280303e-06, + "loss": 0.867, + "step": 141350 + }, + { + "epoch": 1.0232578340463419, + "grad_norm": 0.16223694384098053, + "learning_rate": 3.976749404619717e-06, + "loss": 0.87, + "step": 141360 + }, + { + "epoch": 1.0233302207069281, + "grad_norm": 0.14216773211956024, + "learning_rate": 3.9766770179591305e-06, + "loss": 0.8584, + "step": 141370 + }, + { + "epoch": 1.0234026073675142, + "grad_norm": 0.15058837831020355, + "learning_rate": 3.976604631298545e-06, + "loss": 0.8655, + "step": 141380 + }, + { + "epoch": 1.0234749940281005, + "grad_norm": 0.14986705780029297, + "learning_rate": 3.9765322446379586e-06, + "loss": 0.8506, + "step": 141390 + }, + { + "epoch": 1.0235473806886868, + "grad_norm": 0.1716388761997223, + "learning_rate": 3.976459857977372e-06, + "loss": 0.8623, + "step": 141400 + }, + { + "epoch": 1.0236197673492728, + "grad_norm": 0.1996438354253769, + "learning_rate": 3.976387471316786e-06, + "loss": 0.8684, + "step": 141410 + }, + { + "epoch": 1.0236921540098591, + "grad_norm": 0.14589093625545502, + "learning_rate": 3.9763150846562e-06, + "loss": 0.8659, + "step": 141420 + }, + { + "epoch": 1.0237645406704452, + "grad_norm": 0.1426560878753662, + "learning_rate": 3.976242697995614e-06, + "loss": 0.8633, + "step": 141430 + }, + { + "epoch": 1.0238369273310315, + "grad_norm": 0.21811816096305847, + "learning_rate": 3.9761703113350275e-06, + "loss": 0.857, + "step": 141440 + }, + { + "epoch": 1.0239093139916176, + "grad_norm": 0.13757973909378052, + "learning_rate": 3.976097924674441e-06, + "loss": 0.8659, + "step": 141450 + }, + { + "epoch": 1.0239817006522038, + "grad_norm": 0.15585637092590332, + "learning_rate": 3.9760255380138556e-06, + "loss": 0.8663, + "step": 141460 + }, + { + "epoch": 1.02405408731279, + "grad_norm": 0.1470966935157776, + "learning_rate": 3.975953151353269e-06, + "loss": 0.8585, + "step": 141470 + }, + { + "epoch": 1.0241264739733762, + "grad_norm": 0.15229181945323944, + "learning_rate": 3.975880764692683e-06, + "loss": 0.8514, + "step": 141480 + }, + { + "epoch": 1.0241988606339625, + "grad_norm": 0.1502394825220108, + "learning_rate": 3.9758083780320964e-06, + "loss": 0.8598, + "step": 141490 + }, + { + "epoch": 1.0242712472945485, + "grad_norm": 0.1439654380083084, + "learning_rate": 3.97573599137151e-06, + "loss": 0.8768, + "step": 141500 + }, + { + "epoch": 1.0243436339551348, + "grad_norm": 0.14857643842697144, + "learning_rate": 3.9756636047109245e-06, + "loss": 0.871, + "step": 141510 + }, + { + "epoch": 1.024416020615721, + "grad_norm": 0.1647205948829651, + "learning_rate": 3.975591218050338e-06, + "loss": 0.8624, + "step": 141520 + }, + { + "epoch": 1.0244884072763072, + "grad_norm": 0.16993491351604462, + "learning_rate": 3.975518831389752e-06, + "loss": 0.8779, + "step": 141530 + }, + { + "epoch": 1.0245607939368933, + "grad_norm": 0.16677454113960266, + "learning_rate": 3.975446444729165e-06, + "loss": 0.861, + "step": 141540 + }, + { + "epoch": 1.0246331805974795, + "grad_norm": 0.15071935951709747, + "learning_rate": 3.97537405806858e-06, + "loss": 0.87, + "step": 141550 + }, + { + "epoch": 1.0247055672580656, + "grad_norm": 0.15021325647830963, + "learning_rate": 3.9753016714079934e-06, + "loss": 0.8523, + "step": 141560 + }, + { + "epoch": 1.024777953918652, + "grad_norm": 0.14703767001628876, + "learning_rate": 3.975229284747407e-06, + "loss": 0.8572, + "step": 141570 + }, + { + "epoch": 1.024850340579238, + "grad_norm": 0.17009884119033813, + "learning_rate": 3.975156898086821e-06, + "loss": 0.8612, + "step": 141580 + }, + { + "epoch": 1.0249227272398242, + "grad_norm": 0.21612617373466492, + "learning_rate": 3.975084511426235e-06, + "loss": 0.8604, + "step": 141590 + }, + { + "epoch": 1.0249951139004105, + "grad_norm": 0.1644405573606491, + "learning_rate": 3.975012124765649e-06, + "loss": 0.858, + "step": 141600 + }, + { + "epoch": 1.0250675005609966, + "grad_norm": 0.1592147797346115, + "learning_rate": 3.974939738105062e-06, + "loss": 0.8701, + "step": 141610 + }, + { + "epoch": 1.025139887221583, + "grad_norm": 0.1543736755847931, + "learning_rate": 3.974867351444476e-06, + "loss": 0.851, + "step": 141620 + }, + { + "epoch": 1.025212273882169, + "grad_norm": 0.15851731598377228, + "learning_rate": 3.9747949647838904e-06, + "loss": 0.8625, + "step": 141630 + }, + { + "epoch": 1.0252846605427552, + "grad_norm": 0.1598898321390152, + "learning_rate": 3.974722578123304e-06, + "loss": 0.8513, + "step": 141640 + }, + { + "epoch": 1.0253570472033413, + "grad_norm": 0.1485353261232376, + "learning_rate": 3.974650191462718e-06, + "loss": 0.8666, + "step": 141650 + }, + { + "epoch": 1.0254294338639276, + "grad_norm": 0.15137670934200287, + "learning_rate": 3.974577804802131e-06, + "loss": 0.8551, + "step": 141660 + }, + { + "epoch": 1.0255018205245137, + "grad_norm": 0.14559021592140198, + "learning_rate": 3.974505418141546e-06, + "loss": 0.8602, + "step": 141670 + }, + { + "epoch": 1.0255742071851, + "grad_norm": 0.15725764632225037, + "learning_rate": 3.974433031480959e-06, + "loss": 0.8529, + "step": 141680 + }, + { + "epoch": 1.025646593845686, + "grad_norm": 0.15133067965507507, + "learning_rate": 3.974360644820373e-06, + "loss": 0.8677, + "step": 141690 + }, + { + "epoch": 1.0257189805062723, + "grad_norm": 0.16057194769382477, + "learning_rate": 3.974288258159787e-06, + "loss": 0.8644, + "step": 141700 + }, + { + "epoch": 1.0257913671668586, + "grad_norm": 0.14923806488513947, + "learning_rate": 3.9742158714992e-06, + "loss": 0.8539, + "step": 141710 + }, + { + "epoch": 1.0258637538274447, + "grad_norm": 0.1376243233680725, + "learning_rate": 3.974143484838614e-06, + "loss": 0.8581, + "step": 141720 + }, + { + "epoch": 1.025936140488031, + "grad_norm": 0.15476512908935547, + "learning_rate": 3.9740710981780274e-06, + "loss": 0.8533, + "step": 141730 + }, + { + "epoch": 1.026008527148617, + "grad_norm": 0.16103792190551758, + "learning_rate": 3.973998711517442e-06, + "loss": 0.8599, + "step": 141740 + }, + { + "epoch": 1.0260809138092033, + "grad_norm": 0.1493249237537384, + "learning_rate": 3.9739263248568555e-06, + "loss": 0.8548, + "step": 141750 + }, + { + "epoch": 1.0261533004697894, + "grad_norm": 0.1508176475763321, + "learning_rate": 3.973853938196269e-06, + "loss": 0.8681, + "step": 141760 + }, + { + "epoch": 1.0262256871303757, + "grad_norm": 0.16366364061832428, + "learning_rate": 3.973781551535683e-06, + "loss": 0.8723, + "step": 141770 + }, + { + "epoch": 1.0262980737909617, + "grad_norm": 0.1778716742992401, + "learning_rate": 3.973709164875097e-06, + "loss": 0.87, + "step": 141780 + }, + { + "epoch": 1.026370460451548, + "grad_norm": 0.1537426859140396, + "learning_rate": 3.973636778214511e-06, + "loss": 0.8588, + "step": 141790 + }, + { + "epoch": 1.026442847112134, + "grad_norm": 0.15936316549777985, + "learning_rate": 3.9735643915539245e-06, + "loss": 0.8524, + "step": 141800 + }, + { + "epoch": 1.0265152337727204, + "grad_norm": 0.15303193032741547, + "learning_rate": 3.973492004893338e-06, + "loss": 0.8655, + "step": 141810 + }, + { + "epoch": 1.0265876204333066, + "grad_norm": 0.1577739119529724, + "learning_rate": 3.9734196182327525e-06, + "loss": 0.8629, + "step": 141820 + }, + { + "epoch": 1.0266600070938927, + "grad_norm": 0.15587298572063446, + "learning_rate": 3.973347231572166e-06, + "loss": 0.8514, + "step": 141830 + }, + { + "epoch": 1.026732393754479, + "grad_norm": 0.15615636110305786, + "learning_rate": 3.97327484491158e-06, + "loss": 0.8699, + "step": 141840 + }, + { + "epoch": 1.026804780415065, + "grad_norm": 0.15278807282447815, + "learning_rate": 3.973202458250993e-06, + "loss": 0.8551, + "step": 141850 + }, + { + "epoch": 1.0268771670756514, + "grad_norm": 0.1699153482913971, + "learning_rate": 3.973130071590408e-06, + "loss": 0.8701, + "step": 141860 + }, + { + "epoch": 1.0269495537362374, + "grad_norm": 0.1508774608373642, + "learning_rate": 3.9730576849298215e-06, + "loss": 0.8502, + "step": 141870 + }, + { + "epoch": 1.0270219403968237, + "grad_norm": 0.1427360475063324, + "learning_rate": 3.972985298269235e-06, + "loss": 0.8529, + "step": 141880 + }, + { + "epoch": 1.0270943270574098, + "grad_norm": 0.15359556674957275, + "learning_rate": 3.972912911608649e-06, + "loss": 0.8513, + "step": 141890 + }, + { + "epoch": 1.027166713717996, + "grad_norm": 0.15233898162841797, + "learning_rate": 3.972840524948063e-06, + "loss": 0.8758, + "step": 141900 + }, + { + "epoch": 1.0272391003785821, + "grad_norm": 0.1512978971004486, + "learning_rate": 3.972768138287477e-06, + "loss": 0.8616, + "step": 141910 + }, + { + "epoch": 1.0273114870391684, + "grad_norm": 0.15592820942401886, + "learning_rate": 3.97269575162689e-06, + "loss": 0.8621, + "step": 141920 + }, + { + "epoch": 1.0273838736997547, + "grad_norm": 0.15350952744483948, + "learning_rate": 3.972623364966304e-06, + "loss": 0.8509, + "step": 141930 + }, + { + "epoch": 1.0274562603603408, + "grad_norm": 0.16110540926456451, + "learning_rate": 3.9725509783057185e-06, + "loss": 0.8634, + "step": 141940 + }, + { + "epoch": 1.027528647020927, + "grad_norm": 0.15037639439105988, + "learning_rate": 3.972478591645132e-06, + "loss": 0.8537, + "step": 141950 + }, + { + "epoch": 1.0276010336815131, + "grad_norm": 0.16510790586471558, + "learning_rate": 3.972406204984546e-06, + "loss": 0.8515, + "step": 141960 + }, + { + "epoch": 1.0276734203420994, + "grad_norm": 0.16185833513736725, + "learning_rate": 3.972333818323959e-06, + "loss": 0.8547, + "step": 141970 + }, + { + "epoch": 1.0277458070026855, + "grad_norm": 0.14903537929058075, + "learning_rate": 3.972261431663374e-06, + "loss": 0.8765, + "step": 141980 + }, + { + "epoch": 1.0278181936632718, + "grad_norm": 0.14744271337985992, + "learning_rate": 3.972189045002787e-06, + "loss": 0.8588, + "step": 141990 + }, + { + "epoch": 1.0278905803238578, + "grad_norm": 0.16065943241119385, + "learning_rate": 3.972116658342201e-06, + "loss": 0.8532, + "step": 142000 + }, + { + "epoch": 1.0279629669844441, + "grad_norm": 0.15768252313137054, + "learning_rate": 3.972044271681615e-06, + "loss": 0.8627, + "step": 142010 + }, + { + "epoch": 1.0280353536450304, + "grad_norm": 0.16931724548339844, + "learning_rate": 3.971971885021029e-06, + "loss": 0.8667, + "step": 142020 + }, + { + "epoch": 1.0281077403056165, + "grad_norm": 0.14971376955509186, + "learning_rate": 3.971899498360443e-06, + "loss": 0.8592, + "step": 142030 + }, + { + "epoch": 1.0281801269662028, + "grad_norm": 0.1479533463716507, + "learning_rate": 3.971827111699856e-06, + "loss": 0.8577, + "step": 142040 + }, + { + "epoch": 1.0282525136267888, + "grad_norm": 0.15381501615047455, + "learning_rate": 3.97175472503927e-06, + "loss": 0.8658, + "step": 142050 + }, + { + "epoch": 1.028324900287375, + "grad_norm": 0.1562289297580719, + "learning_rate": 3.971682338378684e-06, + "loss": 0.8571, + "step": 142060 + }, + { + "epoch": 1.0283972869479612, + "grad_norm": 0.15410132706165314, + "learning_rate": 3.971609951718098e-06, + "loss": 0.8519, + "step": 142070 + }, + { + "epoch": 1.0284696736085475, + "grad_norm": 0.19577772915363312, + "learning_rate": 3.971537565057512e-06, + "loss": 0.8558, + "step": 142080 + }, + { + "epoch": 1.0285420602691335, + "grad_norm": 0.15742763876914978, + "learning_rate": 3.971465178396925e-06, + "loss": 0.8577, + "step": 142090 + }, + { + "epoch": 1.0286144469297198, + "grad_norm": 0.16709017753601074, + "learning_rate": 3.971392791736339e-06, + "loss": 0.865, + "step": 142100 + }, + { + "epoch": 1.0286868335903059, + "grad_norm": 0.15220534801483154, + "learning_rate": 3.971320405075753e-06, + "loss": 0.8565, + "step": 142110 + }, + { + "epoch": 1.0287592202508922, + "grad_norm": 0.18524552881717682, + "learning_rate": 3.971248018415167e-06, + "loss": 0.8583, + "step": 142120 + }, + { + "epoch": 1.0288316069114785, + "grad_norm": 0.1528216153383255, + "learning_rate": 3.9711756317545806e-06, + "loss": 0.8781, + "step": 142130 + }, + { + "epoch": 1.0289039935720645, + "grad_norm": 0.15787044167518616, + "learning_rate": 3.971103245093994e-06, + "loss": 0.861, + "step": 142140 + }, + { + "epoch": 1.0289763802326508, + "grad_norm": 0.15298911929130554, + "learning_rate": 3.971030858433409e-06, + "loss": 0.8647, + "step": 142150 + }, + { + "epoch": 1.0290487668932369, + "grad_norm": 0.15209560096263885, + "learning_rate": 3.970958471772822e-06, + "loss": 0.8553, + "step": 142160 + }, + { + "epoch": 1.0291211535538232, + "grad_norm": 0.14856834709644318, + "learning_rate": 3.970886085112236e-06, + "loss": 0.8556, + "step": 142170 + }, + { + "epoch": 1.0291935402144092, + "grad_norm": 0.14934612810611725, + "learning_rate": 3.9708136984516495e-06, + "loss": 0.867, + "step": 142180 + }, + { + "epoch": 1.0292659268749955, + "grad_norm": 0.1754796802997589, + "learning_rate": 3.970741311791064e-06, + "loss": 0.8633, + "step": 142190 + }, + { + "epoch": 1.0293383135355816, + "grad_norm": 0.15860222280025482, + "learning_rate": 3.9706689251304776e-06, + "loss": 0.8635, + "step": 142200 + }, + { + "epoch": 1.0294107001961679, + "grad_norm": 0.16497546434402466, + "learning_rate": 3.970596538469891e-06, + "loss": 0.8575, + "step": 142210 + }, + { + "epoch": 1.029483086856754, + "grad_norm": 0.20866011083126068, + "learning_rate": 3.970524151809305e-06, + "loss": 0.8699, + "step": 142220 + }, + { + "epoch": 1.0295554735173402, + "grad_norm": 0.17648346722126007, + "learning_rate": 3.970451765148719e-06, + "loss": 0.8578, + "step": 142230 + }, + { + "epoch": 1.0296278601779265, + "grad_norm": 0.1537626087665558, + "learning_rate": 3.970379378488132e-06, + "loss": 0.8485, + "step": 142240 + }, + { + "epoch": 1.0297002468385126, + "grad_norm": 0.14427883923053741, + "learning_rate": 3.9703069918275465e-06, + "loss": 0.856, + "step": 142250 + }, + { + "epoch": 1.0297726334990989, + "grad_norm": 0.1432482898235321, + "learning_rate": 3.97023460516696e-06, + "loss": 0.8569, + "step": 142260 + }, + { + "epoch": 1.029845020159685, + "grad_norm": 0.14573949575424194, + "learning_rate": 3.970162218506374e-06, + "loss": 0.8622, + "step": 142270 + }, + { + "epoch": 1.0299174068202712, + "grad_norm": 0.1648380309343338, + "learning_rate": 3.970089831845787e-06, + "loss": 0.8654, + "step": 142280 + }, + { + "epoch": 1.0299897934808573, + "grad_norm": 0.16308258473873138, + "learning_rate": 3.970017445185201e-06, + "loss": 0.8474, + "step": 142290 + }, + { + "epoch": 1.0300621801414436, + "grad_norm": 0.1557963788509369, + "learning_rate": 3.9699450585246154e-06, + "loss": 0.8688, + "step": 142300 + }, + { + "epoch": 1.0301345668020296, + "grad_norm": 0.16031388938426971, + "learning_rate": 3.969872671864029e-06, + "loss": 0.8593, + "step": 142310 + }, + { + "epoch": 1.030206953462616, + "grad_norm": 0.17652627825737, + "learning_rate": 3.969800285203443e-06, + "loss": 0.8639, + "step": 142320 + }, + { + "epoch": 1.030279340123202, + "grad_norm": 0.1523296982049942, + "learning_rate": 3.969727898542856e-06, + "loss": 0.8444, + "step": 142330 + }, + { + "epoch": 1.0303517267837883, + "grad_norm": 0.16042089462280273, + "learning_rate": 3.969655511882271e-06, + "loss": 0.8583, + "step": 142340 + }, + { + "epoch": 1.0304241134443746, + "grad_norm": 0.15083900094032288, + "learning_rate": 3.969583125221684e-06, + "loss": 0.8611, + "step": 142350 + }, + { + "epoch": 1.0304965001049606, + "grad_norm": 0.16533105075359344, + "learning_rate": 3.969510738561098e-06, + "loss": 0.8657, + "step": 142360 + }, + { + "epoch": 1.030568886765547, + "grad_norm": 0.16816429793834686, + "learning_rate": 3.969438351900512e-06, + "loss": 0.8586, + "step": 142370 + }, + { + "epoch": 1.030641273426133, + "grad_norm": 0.15156440436840057, + "learning_rate": 3.969365965239926e-06, + "loss": 0.8609, + "step": 142380 + }, + { + "epoch": 1.0307136600867193, + "grad_norm": 0.15166370570659637, + "learning_rate": 3.96929357857934e-06, + "loss": 0.8651, + "step": 142390 + }, + { + "epoch": 1.0307860467473053, + "grad_norm": 0.15877971053123474, + "learning_rate": 3.969221191918753e-06, + "loss": 0.8612, + "step": 142400 + }, + { + "epoch": 1.0308584334078916, + "grad_norm": 0.15745185315608978, + "learning_rate": 3.969148805258167e-06, + "loss": 0.8582, + "step": 142410 + }, + { + "epoch": 1.0309308200684777, + "grad_norm": 0.16884928941726685, + "learning_rate": 3.969076418597581e-06, + "loss": 0.8673, + "step": 142420 + }, + { + "epoch": 1.031003206729064, + "grad_norm": 0.14777499437332153, + "learning_rate": 3.969004031936995e-06, + "loss": 0.8598, + "step": 142430 + }, + { + "epoch": 1.03107559338965, + "grad_norm": 0.17125903069972992, + "learning_rate": 3.968931645276409e-06, + "loss": 0.8579, + "step": 142440 + }, + { + "epoch": 1.0311479800502363, + "grad_norm": 0.18040338158607483, + "learning_rate": 3.968859258615822e-06, + "loss": 0.8556, + "step": 142450 + }, + { + "epoch": 1.0312203667108226, + "grad_norm": 0.14645922183990479, + "learning_rate": 3.968786871955237e-06, + "loss": 0.8515, + "step": 142460 + }, + { + "epoch": 1.0312927533714087, + "grad_norm": 0.17631933093070984, + "learning_rate": 3.96871448529465e-06, + "loss": 0.8559, + "step": 142470 + }, + { + "epoch": 1.031365140031995, + "grad_norm": 0.19517827033996582, + "learning_rate": 3.968642098634064e-06, + "loss": 0.8675, + "step": 142480 + }, + { + "epoch": 1.031437526692581, + "grad_norm": 0.5200027227401733, + "learning_rate": 3.9685697119734775e-06, + "loss": 0.8619, + "step": 142490 + }, + { + "epoch": 1.0315099133531673, + "grad_norm": 0.15519946813583374, + "learning_rate": 3.968497325312892e-06, + "loss": 0.8543, + "step": 142500 + }, + { + "epoch": 1.0315823000137534, + "grad_norm": 0.147262305021286, + "learning_rate": 3.968424938652306e-06, + "loss": 0.8527, + "step": 142510 + }, + { + "epoch": 1.0316546866743397, + "grad_norm": 0.15960846841335297, + "learning_rate": 3.968352551991719e-06, + "loss": 0.8642, + "step": 142520 + }, + { + "epoch": 1.0317270733349257, + "grad_norm": 0.16142988204956055, + "learning_rate": 3.968280165331133e-06, + "loss": 0.853, + "step": 142530 + }, + { + "epoch": 1.031799459995512, + "grad_norm": 0.14241138100624084, + "learning_rate": 3.968207778670547e-06, + "loss": 0.8441, + "step": 142540 + }, + { + "epoch": 1.0318718466560983, + "grad_norm": 0.15053856372833252, + "learning_rate": 3.968135392009961e-06, + "loss": 0.8477, + "step": 142550 + }, + { + "epoch": 1.0319442333166844, + "grad_norm": 0.14978879690170288, + "learning_rate": 3.9680630053493745e-06, + "loss": 0.852, + "step": 142560 + }, + { + "epoch": 1.0320166199772707, + "grad_norm": 0.15513886511325836, + "learning_rate": 3.967990618688788e-06, + "loss": 0.8575, + "step": 142570 + }, + { + "epoch": 1.0320890066378567, + "grad_norm": 0.14662916958332062, + "learning_rate": 3.967918232028203e-06, + "loss": 0.8626, + "step": 142580 + }, + { + "epoch": 1.032161393298443, + "grad_norm": 0.15396258234977722, + "learning_rate": 3.967845845367616e-06, + "loss": 0.8612, + "step": 142590 + }, + { + "epoch": 1.032233779959029, + "grad_norm": 0.1600462794303894, + "learning_rate": 3.96777345870703e-06, + "loss": 0.873, + "step": 142600 + }, + { + "epoch": 1.0323061666196154, + "grad_norm": 0.15220437943935394, + "learning_rate": 3.9677010720464435e-06, + "loss": 0.8591, + "step": 142610 + }, + { + "epoch": 1.0323785532802014, + "grad_norm": 0.14761093258857727, + "learning_rate": 3.967628685385858e-06, + "loss": 0.8484, + "step": 142620 + }, + { + "epoch": 1.0324509399407877, + "grad_norm": 0.15190240740776062, + "learning_rate": 3.9675562987252715e-06, + "loss": 0.8556, + "step": 142630 + }, + { + "epoch": 1.0325233266013738, + "grad_norm": 0.15413150191307068, + "learning_rate": 3.967483912064685e-06, + "loss": 0.8524, + "step": 142640 + }, + { + "epoch": 1.03259571326196, + "grad_norm": 0.16066095232963562, + "learning_rate": 3.967411525404099e-06, + "loss": 0.8588, + "step": 142650 + }, + { + "epoch": 1.0326680999225464, + "grad_norm": 0.1533208042383194, + "learning_rate": 3.967339138743513e-06, + "loss": 0.8539, + "step": 142660 + }, + { + "epoch": 1.0327404865831324, + "grad_norm": 0.1481548249721527, + "learning_rate": 3.967266752082927e-06, + "loss": 0.8609, + "step": 142670 + }, + { + "epoch": 1.0328128732437187, + "grad_norm": 0.16002528369426727, + "learning_rate": 3.9671943654223405e-06, + "loss": 0.8602, + "step": 142680 + }, + { + "epoch": 1.0328852599043048, + "grad_norm": 0.14188747107982635, + "learning_rate": 3.967121978761754e-06, + "loss": 0.8648, + "step": 142690 + }, + { + "epoch": 1.032957646564891, + "grad_norm": 0.1650628298521042, + "learning_rate": 3.9670495921011685e-06, + "loss": 0.8579, + "step": 142700 + }, + { + "epoch": 1.0330300332254772, + "grad_norm": 0.15831433236598969, + "learning_rate": 3.966977205440582e-06, + "loss": 0.8578, + "step": 142710 + }, + { + "epoch": 1.0331024198860634, + "grad_norm": 0.14619334042072296, + "learning_rate": 3.966904818779996e-06, + "loss": 0.8735, + "step": 142720 + }, + { + "epoch": 1.0331748065466495, + "grad_norm": 0.1563662737607956, + "learning_rate": 3.966832432119409e-06, + "loss": 0.8541, + "step": 142730 + }, + { + "epoch": 1.0332471932072358, + "grad_norm": 0.155498206615448, + "learning_rate": 3.966760045458823e-06, + "loss": 0.8615, + "step": 142740 + }, + { + "epoch": 1.0333195798678219, + "grad_norm": 0.1640671342611313, + "learning_rate": 3.9666876587982375e-06, + "loss": 0.8612, + "step": 142750 + }, + { + "epoch": 1.0333919665284081, + "grad_norm": 0.15346024930477142, + "learning_rate": 3.966615272137651e-06, + "loss": 0.8689, + "step": 142760 + }, + { + "epoch": 1.0334643531889944, + "grad_norm": 0.14893843233585358, + "learning_rate": 3.966542885477065e-06, + "loss": 0.8617, + "step": 142770 + }, + { + "epoch": 1.0335367398495805, + "grad_norm": 0.1711651086807251, + "learning_rate": 3.966470498816478e-06, + "loss": 0.8623, + "step": 142780 + }, + { + "epoch": 1.0336091265101668, + "grad_norm": 0.1499413400888443, + "learning_rate": 3.966398112155892e-06, + "loss": 0.8654, + "step": 142790 + }, + { + "epoch": 1.0336815131707529, + "grad_norm": 0.16550101339817047, + "learning_rate": 3.9663257254953056e-06, + "loss": 0.8597, + "step": 142800 + }, + { + "epoch": 1.0337538998313391, + "grad_norm": 0.15514644980430603, + "learning_rate": 3.96625333883472e-06, + "loss": 0.8546, + "step": 142810 + }, + { + "epoch": 1.0338262864919252, + "grad_norm": 0.15630175173282623, + "learning_rate": 3.966180952174134e-06, + "loss": 0.8613, + "step": 142820 + }, + { + "epoch": 1.0338986731525115, + "grad_norm": 0.15911681950092316, + "learning_rate": 3.966108565513547e-06, + "loss": 0.8679, + "step": 142830 + }, + { + "epoch": 1.0339710598130976, + "grad_norm": 0.14707563817501068, + "learning_rate": 3.966036178852961e-06, + "loss": 0.8566, + "step": 142840 + }, + { + "epoch": 1.0340434464736838, + "grad_norm": 0.15566620230674744, + "learning_rate": 3.965963792192375e-06, + "loss": 0.8664, + "step": 142850 + }, + { + "epoch": 1.03411583313427, + "grad_norm": 0.16189688444137573, + "learning_rate": 3.965891405531789e-06, + "loss": 0.8587, + "step": 142860 + }, + { + "epoch": 1.0341882197948562, + "grad_norm": 0.1611325591802597, + "learning_rate": 3.9658190188712026e-06, + "loss": 0.8551, + "step": 142870 + }, + { + "epoch": 1.0342606064554425, + "grad_norm": 0.15380004048347473, + "learning_rate": 3.965746632210616e-06, + "loss": 0.8503, + "step": 142880 + }, + { + "epoch": 1.0343329931160286, + "grad_norm": 0.15085004270076752, + "learning_rate": 3.96567424555003e-06, + "loss": 0.8579, + "step": 142890 + }, + { + "epoch": 1.0344053797766148, + "grad_norm": 0.15508107841014862, + "learning_rate": 3.965601858889444e-06, + "loss": 0.8644, + "step": 142900 + }, + { + "epoch": 1.034477766437201, + "grad_norm": 0.16797524690628052, + "learning_rate": 3.965529472228858e-06, + "loss": 0.8579, + "step": 142910 + }, + { + "epoch": 1.0345501530977872, + "grad_norm": 0.16412454843521118, + "learning_rate": 3.9654570855682715e-06, + "loss": 0.8653, + "step": 142920 + }, + { + "epoch": 1.0346225397583733, + "grad_norm": 0.1560967117547989, + "learning_rate": 3.965384698907685e-06, + "loss": 0.8617, + "step": 142930 + }, + { + "epoch": 1.0346949264189595, + "grad_norm": 0.1702379286289215, + "learning_rate": 3.9653123122470996e-06, + "loss": 0.8819, + "step": 142940 + }, + { + "epoch": 1.0347673130795456, + "grad_norm": 0.16101233661174774, + "learning_rate": 3.965239925586513e-06, + "loss": 0.8702, + "step": 142950 + }, + { + "epoch": 1.034839699740132, + "grad_norm": 0.1507912427186966, + "learning_rate": 3.965167538925927e-06, + "loss": 0.8644, + "step": 142960 + }, + { + "epoch": 1.034912086400718, + "grad_norm": 0.14601871371269226, + "learning_rate": 3.96509515226534e-06, + "loss": 0.8614, + "step": 142970 + }, + { + "epoch": 1.0349844730613043, + "grad_norm": 0.1494961827993393, + "learning_rate": 3.965022765604755e-06, + "loss": 0.8673, + "step": 142980 + }, + { + "epoch": 1.0350568597218905, + "grad_norm": 0.1469757854938507, + "learning_rate": 3.9649503789441685e-06, + "loss": 0.8586, + "step": 142990 + }, + { + "epoch": 1.0351292463824766, + "grad_norm": 0.1592247635126114, + "learning_rate": 3.964877992283582e-06, + "loss": 0.8657, + "step": 143000 + }, + { + "epoch": 1.035201633043063, + "grad_norm": 0.1555202305316925, + "learning_rate": 3.964805605622996e-06, + "loss": 0.8588, + "step": 143010 + }, + { + "epoch": 1.035274019703649, + "grad_norm": 0.15788942575454712, + "learning_rate": 3.96473321896241e-06, + "loss": 0.8612, + "step": 143020 + }, + { + "epoch": 1.0353464063642352, + "grad_norm": 0.15312904119491577, + "learning_rate": 3.964660832301824e-06, + "loss": 0.8609, + "step": 143030 + }, + { + "epoch": 1.0354187930248213, + "grad_norm": 0.1483820229768753, + "learning_rate": 3.9645884456412374e-06, + "loss": 0.8604, + "step": 143040 + }, + { + "epoch": 1.0354911796854076, + "grad_norm": 0.15752215683460236, + "learning_rate": 3.964516058980651e-06, + "loss": 0.8596, + "step": 143050 + }, + { + "epoch": 1.0355635663459937, + "grad_norm": 0.15568195283412933, + "learning_rate": 3.9644436723200655e-06, + "loss": 0.8665, + "step": 143060 + }, + { + "epoch": 1.03563595300658, + "grad_norm": 0.14673691987991333, + "learning_rate": 3.964371285659479e-06, + "loss": 0.8612, + "step": 143070 + }, + { + "epoch": 1.035708339667166, + "grad_norm": 0.15292707085609436, + "learning_rate": 3.964298898998893e-06, + "loss": 0.8511, + "step": 143080 + }, + { + "epoch": 1.0357807263277523, + "grad_norm": 0.1723155379295349, + "learning_rate": 3.964226512338306e-06, + "loss": 0.8634, + "step": 143090 + }, + { + "epoch": 1.0358531129883386, + "grad_norm": 0.15540607273578644, + "learning_rate": 3.964154125677721e-06, + "loss": 0.8704, + "step": 143100 + }, + { + "epoch": 1.0359254996489247, + "grad_norm": 0.14979644119739532, + "learning_rate": 3.9640817390171344e-06, + "loss": 0.8563, + "step": 143110 + }, + { + "epoch": 1.035997886309511, + "grad_norm": 0.1446203589439392, + "learning_rate": 3.964009352356548e-06, + "loss": 0.8557, + "step": 143120 + }, + { + "epoch": 1.036070272970097, + "grad_norm": 0.15356165170669556, + "learning_rate": 3.963936965695962e-06, + "loss": 0.8653, + "step": 143130 + }, + { + "epoch": 1.0361426596306833, + "grad_norm": 0.1651306450366974, + "learning_rate": 3.963864579035376e-06, + "loss": 0.857, + "step": 143140 + }, + { + "epoch": 1.0362150462912694, + "grad_norm": 0.16041286289691925, + "learning_rate": 3.96379219237479e-06, + "loss": 0.8544, + "step": 143150 + }, + { + "epoch": 1.0362874329518557, + "grad_norm": 0.14968006312847137, + "learning_rate": 3.963719805714203e-06, + "loss": 0.8596, + "step": 143160 + }, + { + "epoch": 1.0363598196124417, + "grad_norm": 0.1907356232404709, + "learning_rate": 3.963647419053617e-06, + "loss": 0.8688, + "step": 143170 + }, + { + "epoch": 1.036432206273028, + "grad_norm": 0.15427495539188385, + "learning_rate": 3.9635750323930314e-06, + "loss": 0.8584, + "step": 143180 + }, + { + "epoch": 1.0365045929336143, + "grad_norm": 0.15541379153728485, + "learning_rate": 3.963502645732445e-06, + "loss": 0.8563, + "step": 143190 + }, + { + "epoch": 1.0365769795942004, + "grad_norm": 0.14748956263065338, + "learning_rate": 3.963430259071859e-06, + "loss": 0.8564, + "step": 143200 + }, + { + "epoch": 1.0366493662547867, + "grad_norm": 0.15645940601825714, + "learning_rate": 3.963357872411272e-06, + "loss": 0.8467, + "step": 143210 + }, + { + "epoch": 1.0367217529153727, + "grad_norm": 0.21893535554409027, + "learning_rate": 3.963285485750687e-06, + "loss": 0.8673, + "step": 143220 + }, + { + "epoch": 1.036794139575959, + "grad_norm": 0.15868252515792847, + "learning_rate": 3.9632130990901e-06, + "loss": 0.8582, + "step": 143230 + }, + { + "epoch": 1.036866526236545, + "grad_norm": 0.1767452508211136, + "learning_rate": 3.963140712429514e-06, + "loss": 0.8549, + "step": 143240 + }, + { + "epoch": 1.0369389128971314, + "grad_norm": 0.14982333779335022, + "learning_rate": 3.963068325768928e-06, + "loss": 0.8538, + "step": 143250 + }, + { + "epoch": 1.0370112995577174, + "grad_norm": 0.1804773211479187, + "learning_rate": 3.962995939108342e-06, + "loss": 0.8644, + "step": 143260 + }, + { + "epoch": 1.0370836862183037, + "grad_norm": 0.15341947972774506, + "learning_rate": 3.962923552447756e-06, + "loss": 0.8605, + "step": 143270 + }, + { + "epoch": 1.0371560728788898, + "grad_norm": 0.15054786205291748, + "learning_rate": 3.962851165787169e-06, + "loss": 0.8607, + "step": 143280 + }, + { + "epoch": 1.037228459539476, + "grad_norm": 0.15896694362163544, + "learning_rate": 3.962778779126583e-06, + "loss": 0.845, + "step": 143290 + }, + { + "epoch": 1.0373008462000624, + "grad_norm": 0.1585065871477127, + "learning_rate": 3.9627063924659965e-06, + "loss": 0.8481, + "step": 143300 + }, + { + "epoch": 1.0373732328606484, + "grad_norm": 0.1433423012495041, + "learning_rate": 3.96263400580541e-06, + "loss": 0.8576, + "step": 143310 + }, + { + "epoch": 1.0374456195212347, + "grad_norm": 0.14972993731498718, + "learning_rate": 3.962561619144824e-06, + "loss": 0.8564, + "step": 143320 + }, + { + "epoch": 1.0375180061818208, + "grad_norm": 0.1547120362520218, + "learning_rate": 3.962489232484238e-06, + "loss": 0.8754, + "step": 143330 + }, + { + "epoch": 1.037590392842407, + "grad_norm": 0.15354642271995544, + "learning_rate": 3.962416845823652e-06, + "loss": 0.8516, + "step": 143340 + }, + { + "epoch": 1.0376627795029931, + "grad_norm": 0.15381920337677002, + "learning_rate": 3.9623444591630655e-06, + "loss": 0.8588, + "step": 143350 + }, + { + "epoch": 1.0377351661635794, + "grad_norm": 0.1736004650592804, + "learning_rate": 3.962272072502479e-06, + "loss": 0.863, + "step": 143360 + }, + { + "epoch": 1.0378075528241655, + "grad_norm": 0.1495305895805359, + "learning_rate": 3.9621996858418935e-06, + "loss": 0.861, + "step": 143370 + }, + { + "epoch": 1.0378799394847518, + "grad_norm": 0.16278128325939178, + "learning_rate": 3.962127299181307e-06, + "loss": 0.8673, + "step": 143380 + }, + { + "epoch": 1.0379523261453378, + "grad_norm": 0.16452762484550476, + "learning_rate": 3.962054912520721e-06, + "loss": 0.8692, + "step": 143390 + }, + { + "epoch": 1.0380247128059241, + "grad_norm": 0.15840129554271698, + "learning_rate": 3.961982525860134e-06, + "loss": 0.8589, + "step": 143400 + }, + { + "epoch": 1.0380970994665104, + "grad_norm": 0.14838318526744843, + "learning_rate": 3.961910139199549e-06, + "loss": 0.8547, + "step": 143410 + }, + { + "epoch": 1.0381694861270965, + "grad_norm": 0.16569004952907562, + "learning_rate": 3.9618377525389625e-06, + "loss": 0.847, + "step": 143420 + }, + { + "epoch": 1.0382418727876828, + "grad_norm": 0.1536937952041626, + "learning_rate": 3.961765365878376e-06, + "loss": 0.8547, + "step": 143430 + }, + { + "epoch": 1.0383142594482688, + "grad_norm": 0.16339300572872162, + "learning_rate": 3.96169297921779e-06, + "loss": 0.8637, + "step": 143440 + }, + { + "epoch": 1.0383866461088551, + "grad_norm": 0.1539309173822403, + "learning_rate": 3.961620592557204e-06, + "loss": 0.8667, + "step": 143450 + }, + { + "epoch": 1.0384590327694412, + "grad_norm": 0.16559149324893951, + "learning_rate": 3.961548205896618e-06, + "loss": 0.8595, + "step": 143460 + }, + { + "epoch": 1.0385314194300275, + "grad_norm": 0.1649179905653, + "learning_rate": 3.961475819236031e-06, + "loss": 0.8685, + "step": 143470 + }, + { + "epoch": 1.0386038060906135, + "grad_norm": 0.15256711840629578, + "learning_rate": 3.961403432575445e-06, + "loss": 0.856, + "step": 143480 + }, + { + "epoch": 1.0386761927511998, + "grad_norm": 0.1560206562280655, + "learning_rate": 3.9613310459148595e-06, + "loss": 0.8654, + "step": 143490 + }, + { + "epoch": 1.038748579411786, + "grad_norm": 0.16528300940990448, + "learning_rate": 3.961258659254273e-06, + "loss": 0.8678, + "step": 143500 + }, + { + "epoch": 1.0388209660723722, + "grad_norm": 0.1495116949081421, + "learning_rate": 3.961186272593687e-06, + "loss": 0.8661, + "step": 143510 + }, + { + "epoch": 1.0388933527329585, + "grad_norm": 0.16050077974796295, + "learning_rate": 3.9611138859331e-06, + "loss": 0.8632, + "step": 143520 + }, + { + "epoch": 1.0389657393935445, + "grad_norm": 0.14846397936344147, + "learning_rate": 3.961041499272514e-06, + "loss": 0.8513, + "step": 143530 + }, + { + "epoch": 1.0390381260541308, + "grad_norm": 0.15063372254371643, + "learning_rate": 3.960969112611928e-06, + "loss": 0.8696, + "step": 143540 + }, + { + "epoch": 1.0391105127147169, + "grad_norm": 0.15840241312980652, + "learning_rate": 3.960896725951342e-06, + "loss": 0.8648, + "step": 143550 + }, + { + "epoch": 1.0391828993753032, + "grad_norm": 0.22342956066131592, + "learning_rate": 3.960824339290756e-06, + "loss": 0.8598, + "step": 143560 + }, + { + "epoch": 1.0392552860358892, + "grad_norm": 0.15167798101902008, + "learning_rate": 3.960751952630169e-06, + "loss": 0.8531, + "step": 143570 + }, + { + "epoch": 1.0393276726964755, + "grad_norm": 0.158100888133049, + "learning_rate": 3.960679565969584e-06, + "loss": 0.8568, + "step": 143580 + }, + { + "epoch": 1.0394000593570616, + "grad_norm": 0.1609964668750763, + "learning_rate": 3.960607179308997e-06, + "loss": 0.8516, + "step": 143590 + }, + { + "epoch": 1.0394724460176479, + "grad_norm": 0.21195641160011292, + "learning_rate": 3.960534792648411e-06, + "loss": 0.8686, + "step": 143600 + }, + { + "epoch": 1.0395448326782342, + "grad_norm": 0.1418725848197937, + "learning_rate": 3.9604624059878246e-06, + "loss": 0.8644, + "step": 143610 + }, + { + "epoch": 1.0396172193388202, + "grad_norm": 0.15254880487918854, + "learning_rate": 3.960390019327239e-06, + "loss": 0.8557, + "step": 143620 + }, + { + "epoch": 1.0396896059994065, + "grad_norm": 0.1374930441379547, + "learning_rate": 3.960317632666653e-06, + "loss": 0.8588, + "step": 143630 + }, + { + "epoch": 1.0397619926599926, + "grad_norm": 0.1622588187456131, + "learning_rate": 3.960245246006066e-06, + "loss": 0.8537, + "step": 143640 + }, + { + "epoch": 1.0398343793205789, + "grad_norm": 0.1530616283416748, + "learning_rate": 3.96017285934548e-06, + "loss": 0.863, + "step": 143650 + }, + { + "epoch": 1.039906765981165, + "grad_norm": 0.16928766667842865, + "learning_rate": 3.960100472684894e-06, + "loss": 0.8646, + "step": 143660 + }, + { + "epoch": 1.0399791526417512, + "grad_norm": 0.15035808086395264, + "learning_rate": 3.960028086024308e-06, + "loss": 0.8539, + "step": 143670 + }, + { + "epoch": 1.0400515393023373, + "grad_norm": 0.1483323574066162, + "learning_rate": 3.9599556993637216e-06, + "loss": 0.8753, + "step": 143680 + }, + { + "epoch": 1.0401239259629236, + "grad_norm": 0.15935401618480682, + "learning_rate": 3.959883312703135e-06, + "loss": 0.8563, + "step": 143690 + }, + { + "epoch": 1.0401963126235096, + "grad_norm": 0.15291538834571838, + "learning_rate": 3.95981092604255e-06, + "loss": 0.8726, + "step": 143700 + }, + { + "epoch": 1.040268699284096, + "grad_norm": 0.14946019649505615, + "learning_rate": 3.959738539381963e-06, + "loss": 0.8623, + "step": 143710 + }, + { + "epoch": 1.0403410859446822, + "grad_norm": 0.13929761946201324, + "learning_rate": 3.959666152721377e-06, + "loss": 0.8612, + "step": 143720 + }, + { + "epoch": 1.0404134726052683, + "grad_norm": 0.1517852246761322, + "learning_rate": 3.9595937660607905e-06, + "loss": 0.8605, + "step": 143730 + }, + { + "epoch": 1.0404858592658546, + "grad_norm": 0.14445209503173828, + "learning_rate": 3.959521379400205e-06, + "loss": 0.8802, + "step": 143740 + }, + { + "epoch": 1.0405582459264406, + "grad_norm": 0.1581372618675232, + "learning_rate": 3.959448992739619e-06, + "loss": 0.8513, + "step": 143750 + }, + { + "epoch": 1.040630632587027, + "grad_norm": 0.16304130852222443, + "learning_rate": 3.959376606079032e-06, + "loss": 0.869, + "step": 143760 + }, + { + "epoch": 1.040703019247613, + "grad_norm": 0.17016209661960602, + "learning_rate": 3.959304219418446e-06, + "loss": 0.8516, + "step": 143770 + }, + { + "epoch": 1.0407754059081993, + "grad_norm": 0.1455002725124359, + "learning_rate": 3.95923183275786e-06, + "loss": 0.8541, + "step": 143780 + }, + { + "epoch": 1.0408477925687853, + "grad_norm": 0.3031005263328552, + "learning_rate": 3.959159446097274e-06, + "loss": 0.8615, + "step": 143790 + }, + { + "epoch": 1.0409201792293716, + "grad_norm": 0.15537060797214508, + "learning_rate": 3.9590870594366875e-06, + "loss": 0.8534, + "step": 143800 + }, + { + "epoch": 1.0409925658899577, + "grad_norm": 0.18739604949951172, + "learning_rate": 3.959014672776101e-06, + "loss": 0.8595, + "step": 143810 + }, + { + "epoch": 1.041064952550544, + "grad_norm": 0.15703223645687103, + "learning_rate": 3.958942286115516e-06, + "loss": 0.8672, + "step": 143820 + }, + { + "epoch": 1.0411373392111303, + "grad_norm": 0.15714220702648163, + "learning_rate": 3.958869899454928e-06, + "loss": 0.8801, + "step": 143830 + }, + { + "epoch": 1.0412097258717163, + "grad_norm": 0.14824450016021729, + "learning_rate": 3.958797512794342e-06, + "loss": 0.8673, + "step": 143840 + }, + { + "epoch": 1.0412821125323026, + "grad_norm": 0.14315825700759888, + "learning_rate": 3.9587251261337564e-06, + "loss": 0.8553, + "step": 143850 + }, + { + "epoch": 1.0413544991928887, + "grad_norm": 0.1702081710100174, + "learning_rate": 3.95865273947317e-06, + "loss": 0.8644, + "step": 143860 + }, + { + "epoch": 1.041426885853475, + "grad_norm": 0.18592719733715057, + "learning_rate": 3.958580352812584e-06, + "loss": 0.865, + "step": 143870 + }, + { + "epoch": 1.041499272514061, + "grad_norm": 0.15795159339904785, + "learning_rate": 3.958507966151997e-06, + "loss": 0.8566, + "step": 143880 + }, + { + "epoch": 1.0415716591746473, + "grad_norm": 0.16113890707492828, + "learning_rate": 3.958435579491412e-06, + "loss": 0.8577, + "step": 143890 + }, + { + "epoch": 1.0416440458352334, + "grad_norm": 0.29650846123695374, + "learning_rate": 3.958363192830825e-06, + "loss": 0.8594, + "step": 143900 + }, + { + "epoch": 1.0417164324958197, + "grad_norm": 0.14694488048553467, + "learning_rate": 3.958290806170239e-06, + "loss": 0.8693, + "step": 143910 + }, + { + "epoch": 1.0417888191564058, + "grad_norm": 0.15277814865112305, + "learning_rate": 3.958218419509653e-06, + "loss": 0.8697, + "step": 143920 + }, + { + "epoch": 1.041861205816992, + "grad_norm": 0.16288557648658752, + "learning_rate": 3.958146032849067e-06, + "loss": 0.8669, + "step": 143930 + }, + { + "epoch": 1.0419335924775783, + "grad_norm": 0.14526166021823883, + "learning_rate": 3.958073646188481e-06, + "loss": 0.8649, + "step": 143940 + }, + { + "epoch": 1.0420059791381644, + "grad_norm": 0.15091979503631592, + "learning_rate": 3.958001259527894e-06, + "loss": 0.859, + "step": 143950 + }, + { + "epoch": 1.0420783657987507, + "grad_norm": 0.14309091866016388, + "learning_rate": 3.957928872867308e-06, + "loss": 0.8614, + "step": 143960 + }, + { + "epoch": 1.0421507524593367, + "grad_norm": 0.15367577970027924, + "learning_rate": 3.957856486206722e-06, + "loss": 0.8606, + "step": 143970 + }, + { + "epoch": 1.042223139119923, + "grad_norm": 0.15176285803318024, + "learning_rate": 3.957784099546136e-06, + "loss": 0.8542, + "step": 143980 + }, + { + "epoch": 1.042295525780509, + "grad_norm": 0.1759510636329651, + "learning_rate": 3.95771171288555e-06, + "loss": 0.858, + "step": 143990 + }, + { + "epoch": 1.0423679124410954, + "grad_norm": 0.17628808319568634, + "learning_rate": 3.957639326224963e-06, + "loss": 0.8684, + "step": 144000 + }, + { + "epoch": 1.0424402991016815, + "grad_norm": 0.14174871146678925, + "learning_rate": 3.957566939564378e-06, + "loss": 0.8565, + "step": 144010 + }, + { + "epoch": 1.0425126857622677, + "grad_norm": 0.14700306951999664, + "learning_rate": 3.957494552903791e-06, + "loss": 0.8608, + "step": 144020 + }, + { + "epoch": 1.0425850724228538, + "grad_norm": 0.14982403814792633, + "learning_rate": 3.957422166243205e-06, + "loss": 0.8552, + "step": 144030 + }, + { + "epoch": 1.04265745908344, + "grad_norm": 0.1603231132030487, + "learning_rate": 3.9573497795826185e-06, + "loss": 0.8628, + "step": 144040 + }, + { + "epoch": 1.0427298457440264, + "grad_norm": 0.173030823469162, + "learning_rate": 3.957277392922033e-06, + "loss": 0.8695, + "step": 144050 + }, + { + "epoch": 1.0428022324046125, + "grad_norm": 0.18161053955554962, + "learning_rate": 3.957205006261447e-06, + "loss": 0.8633, + "step": 144060 + }, + { + "epoch": 1.0428746190651987, + "grad_norm": 0.15263399481773376, + "learning_rate": 3.95713261960086e-06, + "loss": 0.8574, + "step": 144070 + }, + { + "epoch": 1.0429470057257848, + "grad_norm": 0.15920861065387726, + "learning_rate": 3.957060232940274e-06, + "loss": 0.8629, + "step": 144080 + }, + { + "epoch": 1.043019392386371, + "grad_norm": 0.15555734932422638, + "learning_rate": 3.956987846279688e-06, + "loss": 0.8768, + "step": 144090 + }, + { + "epoch": 1.0430917790469572, + "grad_norm": 0.1550634801387787, + "learning_rate": 3.956915459619102e-06, + "loss": 0.8533, + "step": 144100 + }, + { + "epoch": 1.0431641657075434, + "grad_norm": 0.14551453292369843, + "learning_rate": 3.9568430729585155e-06, + "loss": 0.8579, + "step": 144110 + }, + { + "epoch": 1.0432365523681295, + "grad_norm": 0.14766032993793488, + "learning_rate": 3.956770686297929e-06, + "loss": 0.8678, + "step": 144120 + }, + { + "epoch": 1.0433089390287158, + "grad_norm": 0.15198858082294464, + "learning_rate": 3.956698299637344e-06, + "loss": 0.8638, + "step": 144130 + }, + { + "epoch": 1.0433813256893019, + "grad_norm": 0.16540324687957764, + "learning_rate": 3.956625912976757e-06, + "loss": 0.8662, + "step": 144140 + }, + { + "epoch": 1.0434537123498882, + "grad_norm": 0.14517223834991455, + "learning_rate": 3.956553526316171e-06, + "loss": 0.8454, + "step": 144150 + }, + { + "epoch": 1.0435260990104744, + "grad_norm": 0.15670832991600037, + "learning_rate": 3.9564811396555845e-06, + "loss": 0.8574, + "step": 144160 + }, + { + "epoch": 1.0435984856710605, + "grad_norm": 0.16372807323932648, + "learning_rate": 3.956408752994998e-06, + "loss": 0.8549, + "step": 144170 + }, + { + "epoch": 1.0436708723316468, + "grad_norm": 0.157616525888443, + "learning_rate": 3.9563363663344125e-06, + "loss": 0.844, + "step": 144180 + }, + { + "epoch": 1.0437432589922329, + "grad_norm": 0.14867444336414337, + "learning_rate": 3.956263979673826e-06, + "loss": 0.8581, + "step": 144190 + }, + { + "epoch": 1.0438156456528191, + "grad_norm": 0.15793468058109283, + "learning_rate": 3.95619159301324e-06, + "loss": 0.8503, + "step": 144200 + }, + { + "epoch": 1.0438880323134052, + "grad_norm": 0.14294303953647614, + "learning_rate": 3.956119206352653e-06, + "loss": 0.8643, + "step": 144210 + }, + { + "epoch": 1.0439604189739915, + "grad_norm": 0.15803156793117523, + "learning_rate": 3.956046819692068e-06, + "loss": 0.8626, + "step": 144220 + }, + { + "epoch": 1.0440328056345776, + "grad_norm": 0.16307860612869263, + "learning_rate": 3.9559744330314815e-06, + "loss": 0.8663, + "step": 144230 + }, + { + "epoch": 1.0441051922951639, + "grad_norm": 0.14012962579727173, + "learning_rate": 3.955902046370895e-06, + "loss": 0.8566, + "step": 144240 + }, + { + "epoch": 1.0441775789557501, + "grad_norm": 0.19442588090896606, + "learning_rate": 3.955829659710309e-06, + "loss": 0.8765, + "step": 144250 + }, + { + "epoch": 1.0442499656163362, + "grad_norm": 0.15497778356075287, + "learning_rate": 3.955757273049723e-06, + "loss": 0.8596, + "step": 144260 + }, + { + "epoch": 1.0443223522769225, + "grad_norm": 0.15223382413387299, + "learning_rate": 3.955684886389137e-06, + "loss": 0.8815, + "step": 144270 + }, + { + "epoch": 1.0443947389375086, + "grad_norm": 0.15136075019836426, + "learning_rate": 3.95561249972855e-06, + "loss": 0.8648, + "step": 144280 + }, + { + "epoch": 1.0444671255980948, + "grad_norm": 0.15024292469024658, + "learning_rate": 3.955540113067964e-06, + "loss": 0.86, + "step": 144290 + }, + { + "epoch": 1.044539512258681, + "grad_norm": 0.1588437706232071, + "learning_rate": 3.9554677264073785e-06, + "loss": 0.8607, + "step": 144300 + }, + { + "epoch": 1.0446118989192672, + "grad_norm": 0.15004202723503113, + "learning_rate": 3.955395339746792e-06, + "loss": 0.8609, + "step": 144310 + }, + { + "epoch": 1.0446842855798533, + "grad_norm": 0.14561429619789124, + "learning_rate": 3.955322953086206e-06, + "loss": 0.8673, + "step": 144320 + }, + { + "epoch": 1.0447566722404396, + "grad_norm": 0.1565464287996292, + "learning_rate": 3.955250566425619e-06, + "loss": 0.8591, + "step": 144330 + }, + { + "epoch": 1.0448290589010256, + "grad_norm": 0.1545923948287964, + "learning_rate": 3.955178179765034e-06, + "loss": 0.8445, + "step": 144340 + }, + { + "epoch": 1.044901445561612, + "grad_norm": 0.15572689473628998, + "learning_rate": 3.955105793104447e-06, + "loss": 0.8552, + "step": 144350 + }, + { + "epoch": 1.0449738322221982, + "grad_norm": 0.1447010338306427, + "learning_rate": 3.95503340644386e-06, + "loss": 0.8587, + "step": 144360 + }, + { + "epoch": 1.0450462188827843, + "grad_norm": 0.15390467643737793, + "learning_rate": 3.954961019783275e-06, + "loss": 0.8602, + "step": 144370 + }, + { + "epoch": 1.0451186055433705, + "grad_norm": 0.16179361939430237, + "learning_rate": 3.954888633122688e-06, + "loss": 0.8546, + "step": 144380 + }, + { + "epoch": 1.0451909922039566, + "grad_norm": 0.1796809881925583, + "learning_rate": 3.954816246462102e-06, + "loss": 0.8601, + "step": 144390 + }, + { + "epoch": 1.045263378864543, + "grad_norm": 0.16800828278064728, + "learning_rate": 3.9547438598015155e-06, + "loss": 0.8624, + "step": 144400 + }, + { + "epoch": 1.045335765525129, + "grad_norm": 0.14844734966754913, + "learning_rate": 3.95467147314093e-06, + "loss": 0.8666, + "step": 144410 + }, + { + "epoch": 1.0454081521857153, + "grad_norm": 0.1545286625623703, + "learning_rate": 3.9545990864803436e-06, + "loss": 0.8598, + "step": 144420 + }, + { + "epoch": 1.0454805388463013, + "grad_norm": 0.1570822298526764, + "learning_rate": 3.954526699819757e-06, + "loss": 0.8608, + "step": 144430 + }, + { + "epoch": 1.0455529255068876, + "grad_norm": 0.14228971302509308, + "learning_rate": 3.954454313159171e-06, + "loss": 0.8523, + "step": 144440 + }, + { + "epoch": 1.0456253121674737, + "grad_norm": 0.14609818160533905, + "learning_rate": 3.954381926498585e-06, + "loss": 0.8584, + "step": 144450 + }, + { + "epoch": 1.04569769882806, + "grad_norm": 0.1522809863090515, + "learning_rate": 3.954309539837999e-06, + "loss": 0.8569, + "step": 144460 + }, + { + "epoch": 1.0457700854886463, + "grad_norm": 0.2182849943637848, + "learning_rate": 3.9542371531774125e-06, + "loss": 0.8412, + "step": 144470 + }, + { + "epoch": 1.0458424721492323, + "grad_norm": 0.148259699344635, + "learning_rate": 3.954164766516826e-06, + "loss": 0.8521, + "step": 144480 + }, + { + "epoch": 1.0459148588098186, + "grad_norm": 0.14706644415855408, + "learning_rate": 3.954092379856241e-06, + "loss": 0.8622, + "step": 144490 + }, + { + "epoch": 1.0459872454704047, + "grad_norm": 0.15702740848064423, + "learning_rate": 3.954019993195654e-06, + "loss": 0.8563, + "step": 144500 + }, + { + "epoch": 1.046059632130991, + "grad_norm": 0.1426214873790741, + "learning_rate": 3.953947606535068e-06, + "loss": 0.8628, + "step": 144510 + }, + { + "epoch": 1.046132018791577, + "grad_norm": 0.14611200988292694, + "learning_rate": 3.9538752198744814e-06, + "loss": 0.8544, + "step": 144520 + }, + { + "epoch": 1.0462044054521633, + "grad_norm": 0.15756775438785553, + "learning_rate": 3.953802833213896e-06, + "loss": 0.8568, + "step": 144530 + }, + { + "epoch": 1.0462767921127494, + "grad_norm": 0.15759259462356567, + "learning_rate": 3.9537304465533095e-06, + "loss": 0.8634, + "step": 144540 + }, + { + "epoch": 1.0463491787733357, + "grad_norm": 0.15958790481090546, + "learning_rate": 3.953658059892723e-06, + "loss": 0.8763, + "step": 144550 + }, + { + "epoch": 1.0464215654339217, + "grad_norm": 0.15510474145412445, + "learning_rate": 3.953585673232137e-06, + "loss": 0.873, + "step": 144560 + }, + { + "epoch": 1.046493952094508, + "grad_norm": 0.1597568690776825, + "learning_rate": 3.953513286571551e-06, + "loss": 0.8488, + "step": 144570 + }, + { + "epoch": 1.0465663387550943, + "grad_norm": 0.14834840595722198, + "learning_rate": 3.953440899910965e-06, + "loss": 0.8545, + "step": 144580 + }, + { + "epoch": 1.0466387254156804, + "grad_norm": 0.13909775018692017, + "learning_rate": 3.9533685132503784e-06, + "loss": 0.8478, + "step": 144590 + }, + { + "epoch": 1.0467111120762667, + "grad_norm": 0.14474329352378845, + "learning_rate": 3.953296126589792e-06, + "loss": 0.8699, + "step": 144600 + }, + { + "epoch": 1.0467834987368527, + "grad_norm": 0.16412761807441711, + "learning_rate": 3.9532237399292065e-06, + "loss": 0.8616, + "step": 144610 + }, + { + "epoch": 1.046855885397439, + "grad_norm": 0.1485072374343872, + "learning_rate": 3.95315135326862e-06, + "loss": 0.8594, + "step": 144620 + }, + { + "epoch": 1.046928272058025, + "grad_norm": 0.1748332530260086, + "learning_rate": 3.953078966608034e-06, + "loss": 0.8552, + "step": 144630 + }, + { + "epoch": 1.0470006587186114, + "grad_norm": 0.15388593077659607, + "learning_rate": 3.953006579947447e-06, + "loss": 0.8633, + "step": 144640 + }, + { + "epoch": 1.0470730453791974, + "grad_norm": 0.1663622111082077, + "learning_rate": 3.952934193286862e-06, + "loss": 0.8675, + "step": 144650 + }, + { + "epoch": 1.0471454320397837, + "grad_norm": 0.1697302758693695, + "learning_rate": 3.9528618066262754e-06, + "loss": 0.8561, + "step": 144660 + }, + { + "epoch": 1.04721781870037, + "grad_norm": 0.15296570956707, + "learning_rate": 3.952789419965689e-06, + "loss": 0.8699, + "step": 144670 + }, + { + "epoch": 1.047290205360956, + "grad_norm": 0.1404290646314621, + "learning_rate": 3.952717033305103e-06, + "loss": 0.8566, + "step": 144680 + }, + { + "epoch": 1.0473625920215424, + "grad_norm": 0.16811597347259521, + "learning_rate": 3.952644646644517e-06, + "loss": 0.8771, + "step": 144690 + }, + { + "epoch": 1.0474349786821284, + "grad_norm": 0.15282821655273438, + "learning_rate": 3.952572259983931e-06, + "loss": 0.8612, + "step": 144700 + }, + { + "epoch": 1.0475073653427147, + "grad_norm": 0.16427479684352875, + "learning_rate": 3.952499873323344e-06, + "loss": 0.8633, + "step": 144710 + }, + { + "epoch": 1.0475797520033008, + "grad_norm": 0.16405102610588074, + "learning_rate": 3.952427486662758e-06, + "loss": 0.8652, + "step": 144720 + }, + { + "epoch": 1.047652138663887, + "grad_norm": 0.14191152155399323, + "learning_rate": 3.9523551000021724e-06, + "loss": 0.8628, + "step": 144730 + }, + { + "epoch": 1.0477245253244731, + "grad_norm": 0.15622809529304504, + "learning_rate": 3.952282713341586e-06, + "loss": 0.8697, + "step": 144740 + }, + { + "epoch": 1.0477969119850594, + "grad_norm": 0.15117841958999634, + "learning_rate": 3.952210326681e-06, + "loss": 0.8628, + "step": 144750 + }, + { + "epoch": 1.0478692986456455, + "grad_norm": 0.14459557831287384, + "learning_rate": 3.952137940020413e-06, + "loss": 0.861, + "step": 144760 + }, + { + "epoch": 1.0479416853062318, + "grad_norm": 0.14188896119594574, + "learning_rate": 3.952065553359827e-06, + "loss": 0.8772, + "step": 144770 + }, + { + "epoch": 1.048014071966818, + "grad_norm": 0.16736344993114471, + "learning_rate": 3.951993166699241e-06, + "loss": 0.8569, + "step": 144780 + }, + { + "epoch": 1.0480864586274041, + "grad_norm": 0.16879191994667053, + "learning_rate": 3.951920780038655e-06, + "loss": 0.8496, + "step": 144790 + }, + { + "epoch": 1.0481588452879904, + "grad_norm": 0.16716881096363068, + "learning_rate": 3.951848393378069e-06, + "loss": 0.8536, + "step": 144800 + }, + { + "epoch": 1.0482312319485765, + "grad_norm": 0.14509662985801697, + "learning_rate": 3.951776006717482e-06, + "loss": 0.8582, + "step": 144810 + }, + { + "epoch": 1.0483036186091628, + "grad_norm": 0.16360674798488617, + "learning_rate": 3.951703620056897e-06, + "loss": 0.8664, + "step": 144820 + }, + { + "epoch": 1.0483760052697488, + "grad_norm": 0.15307435393333435, + "learning_rate": 3.95163123339631e-06, + "loss": 0.8481, + "step": 144830 + }, + { + "epoch": 1.0484483919303351, + "grad_norm": 0.1502341479063034, + "learning_rate": 3.951558846735724e-06, + "loss": 0.8533, + "step": 144840 + }, + { + "epoch": 1.0485207785909212, + "grad_norm": 0.15502779185771942, + "learning_rate": 3.9514864600751375e-06, + "loss": 0.8588, + "step": 144850 + }, + { + "epoch": 1.0485931652515075, + "grad_norm": 0.15955467522144318, + "learning_rate": 3.951414073414552e-06, + "loss": 0.858, + "step": 144860 + }, + { + "epoch": 1.0486655519120935, + "grad_norm": 0.14335303008556366, + "learning_rate": 3.951341686753966e-06, + "loss": 0.8591, + "step": 144870 + }, + { + "epoch": 1.0487379385726798, + "grad_norm": 0.15716949105262756, + "learning_rate": 3.951269300093379e-06, + "loss": 0.8433, + "step": 144880 + }, + { + "epoch": 1.0488103252332661, + "grad_norm": 0.16508370637893677, + "learning_rate": 3.951196913432793e-06, + "loss": 0.8454, + "step": 144890 + }, + { + "epoch": 1.0488827118938522, + "grad_norm": 0.14553935825824738, + "learning_rate": 3.9511245267722065e-06, + "loss": 0.8657, + "step": 144900 + }, + { + "epoch": 1.0489550985544385, + "grad_norm": 0.16298197209835052, + "learning_rate": 3.95105214011162e-06, + "loss": 0.8496, + "step": 144910 + }, + { + "epoch": 1.0490274852150245, + "grad_norm": 0.15694205462932587, + "learning_rate": 3.9509797534510345e-06, + "loss": 0.8463, + "step": 144920 + }, + { + "epoch": 1.0490998718756108, + "grad_norm": 0.14872339367866516, + "learning_rate": 3.950907366790448e-06, + "loss": 0.8512, + "step": 144930 + }, + { + "epoch": 1.0491722585361969, + "grad_norm": 0.14613871276378632, + "learning_rate": 3.950834980129862e-06, + "loss": 0.8468, + "step": 144940 + }, + { + "epoch": 1.0492446451967832, + "grad_norm": 0.175027996301651, + "learning_rate": 3.950762593469275e-06, + "loss": 0.8601, + "step": 144950 + }, + { + "epoch": 1.0493170318573692, + "grad_norm": 0.15894457697868347, + "learning_rate": 3.950690206808689e-06, + "loss": 0.8597, + "step": 144960 + }, + { + "epoch": 1.0493894185179555, + "grad_norm": 0.14936257898807526, + "learning_rate": 3.9506178201481035e-06, + "loss": 0.8526, + "step": 144970 + }, + { + "epoch": 1.0494618051785416, + "grad_norm": 0.15866105258464813, + "learning_rate": 3.950545433487517e-06, + "loss": 0.8633, + "step": 144980 + }, + { + "epoch": 1.0495341918391279, + "grad_norm": 0.1673537641763687, + "learning_rate": 3.950473046826931e-06, + "loss": 0.8502, + "step": 144990 + }, + { + "epoch": 1.0496065784997142, + "grad_norm": 0.14420992136001587, + "learning_rate": 3.950400660166344e-06, + "loss": 0.8576, + "step": 145000 + }, + { + "epoch": 1.0496789651603002, + "grad_norm": 0.16589012742042542, + "learning_rate": 3.950328273505759e-06, + "loss": 0.8598, + "step": 145010 + }, + { + "epoch": 1.0497513518208865, + "grad_norm": 0.15447083115577698, + "learning_rate": 3.950255886845172e-06, + "loss": 0.8461, + "step": 145020 + }, + { + "epoch": 1.0498237384814726, + "grad_norm": 0.15255771577358246, + "learning_rate": 3.950183500184586e-06, + "loss": 0.8643, + "step": 145030 + }, + { + "epoch": 1.0498961251420589, + "grad_norm": 0.17621764540672302, + "learning_rate": 3.950111113524e-06, + "loss": 0.8766, + "step": 145040 + }, + { + "epoch": 1.049968511802645, + "grad_norm": 0.15463955700397491, + "learning_rate": 3.950038726863414e-06, + "loss": 0.8675, + "step": 145050 + }, + { + "epoch": 1.0500408984632312, + "grad_norm": 0.14184853434562683, + "learning_rate": 3.949966340202828e-06, + "loss": 0.8533, + "step": 145060 + }, + { + "epoch": 1.0501132851238173, + "grad_norm": 0.1818603277206421, + "learning_rate": 3.949893953542241e-06, + "loss": 0.8644, + "step": 145070 + }, + { + "epoch": 1.0501856717844036, + "grad_norm": 0.1516370177268982, + "learning_rate": 3.949821566881655e-06, + "loss": 0.8632, + "step": 145080 + }, + { + "epoch": 1.0502580584449897, + "grad_norm": 0.15722733736038208, + "learning_rate": 3.949749180221069e-06, + "loss": 0.8562, + "step": 145090 + }, + { + "epoch": 1.050330445105576, + "grad_norm": 0.1518261581659317, + "learning_rate": 3.949676793560483e-06, + "loss": 0.8694, + "step": 145100 + }, + { + "epoch": 1.0504028317661622, + "grad_norm": 0.15784966945648193, + "learning_rate": 3.949604406899897e-06, + "loss": 0.8699, + "step": 145110 + }, + { + "epoch": 1.0504752184267483, + "grad_norm": 0.15036171674728394, + "learning_rate": 3.94953202023931e-06, + "loss": 0.8759, + "step": 145120 + }, + { + "epoch": 1.0505476050873346, + "grad_norm": 0.1495043933391571, + "learning_rate": 3.949459633578725e-06, + "loss": 0.8592, + "step": 145130 + }, + { + "epoch": 1.0506199917479206, + "grad_norm": 0.16956430673599243, + "learning_rate": 3.949387246918138e-06, + "loss": 0.8543, + "step": 145140 + }, + { + "epoch": 1.050692378408507, + "grad_norm": 0.1504315882921219, + "learning_rate": 3.949314860257552e-06, + "loss": 0.8614, + "step": 145150 + }, + { + "epoch": 1.050764765069093, + "grad_norm": 0.16208255290985107, + "learning_rate": 3.9492424735969656e-06, + "loss": 0.8619, + "step": 145160 + }, + { + "epoch": 1.0508371517296793, + "grad_norm": 0.15502387285232544, + "learning_rate": 3.94917008693638e-06, + "loss": 0.8673, + "step": 145170 + }, + { + "epoch": 1.0509095383902654, + "grad_norm": 0.1422945111989975, + "learning_rate": 3.949097700275794e-06, + "loss": 0.8584, + "step": 145180 + }, + { + "epoch": 1.0509819250508516, + "grad_norm": 0.15001416206359863, + "learning_rate": 3.949025313615207e-06, + "loss": 0.8614, + "step": 145190 + }, + { + "epoch": 1.0510543117114377, + "grad_norm": 0.1533329039812088, + "learning_rate": 3.948952926954621e-06, + "loss": 0.8755, + "step": 145200 + }, + { + "epoch": 1.051126698372024, + "grad_norm": 0.16097886860370636, + "learning_rate": 3.948880540294035e-06, + "loss": 0.8498, + "step": 145210 + }, + { + "epoch": 1.0511990850326103, + "grad_norm": 0.16414912045001984, + "learning_rate": 3.948808153633449e-06, + "loss": 0.8579, + "step": 145220 + }, + { + "epoch": 1.0512714716931963, + "grad_norm": 0.1741316169500351, + "learning_rate": 3.948735766972863e-06, + "loss": 0.8486, + "step": 145230 + }, + { + "epoch": 1.0513438583537826, + "grad_norm": 0.15596316754817963, + "learning_rate": 3.948663380312276e-06, + "loss": 0.8546, + "step": 145240 + }, + { + "epoch": 1.0514162450143687, + "grad_norm": 0.15392540395259857, + "learning_rate": 3.948590993651691e-06, + "loss": 0.8443, + "step": 145250 + }, + { + "epoch": 1.051488631674955, + "grad_norm": 0.15028880536556244, + "learning_rate": 3.948518606991104e-06, + "loss": 0.8501, + "step": 145260 + }, + { + "epoch": 1.051561018335541, + "grad_norm": 0.14961551129817963, + "learning_rate": 3.948446220330518e-06, + "loss": 0.8549, + "step": 145270 + }, + { + "epoch": 1.0516334049961273, + "grad_norm": 0.16350728273391724, + "learning_rate": 3.9483738336699315e-06, + "loss": 0.8723, + "step": 145280 + }, + { + "epoch": 1.0517057916567134, + "grad_norm": 0.1536688506603241, + "learning_rate": 3.948301447009346e-06, + "loss": 0.8512, + "step": 145290 + }, + { + "epoch": 1.0517781783172997, + "grad_norm": 0.1536385864019394, + "learning_rate": 3.94822906034876e-06, + "loss": 0.8598, + "step": 145300 + }, + { + "epoch": 1.051850564977886, + "grad_norm": 0.1381438672542572, + "learning_rate": 3.948156673688173e-06, + "loss": 0.8638, + "step": 145310 + }, + { + "epoch": 1.051922951638472, + "grad_norm": 0.17076510190963745, + "learning_rate": 3.948084287027587e-06, + "loss": 0.8629, + "step": 145320 + }, + { + "epoch": 1.0519953382990583, + "grad_norm": 0.15944698452949524, + "learning_rate": 3.948011900367001e-06, + "loss": 0.8617, + "step": 145330 + }, + { + "epoch": 1.0520677249596444, + "grad_norm": 0.15273165702819824, + "learning_rate": 3.947939513706415e-06, + "loss": 0.8553, + "step": 145340 + }, + { + "epoch": 1.0521401116202307, + "grad_norm": 0.14267678558826447, + "learning_rate": 3.9478671270458285e-06, + "loss": 0.8529, + "step": 145350 + }, + { + "epoch": 1.0522124982808168, + "grad_norm": 0.1617887020111084, + "learning_rate": 3.947794740385242e-06, + "loss": 0.8506, + "step": 145360 + }, + { + "epoch": 1.052284884941403, + "grad_norm": 0.1612195074558258, + "learning_rate": 3.947722353724657e-06, + "loss": 0.8655, + "step": 145370 + }, + { + "epoch": 1.052357271601989, + "grad_norm": 0.16076426208019257, + "learning_rate": 3.94764996706407e-06, + "loss": 0.853, + "step": 145380 + }, + { + "epoch": 1.0524296582625754, + "grad_norm": 0.14427517354488373, + "learning_rate": 3.947577580403484e-06, + "loss": 0.8647, + "step": 145390 + }, + { + "epoch": 1.0525020449231615, + "grad_norm": 0.1446637511253357, + "learning_rate": 3.9475051937428974e-06, + "loss": 0.8605, + "step": 145400 + }, + { + "epoch": 1.0525744315837477, + "grad_norm": 0.15358710289001465, + "learning_rate": 3.947432807082311e-06, + "loss": 0.8512, + "step": 145410 + }, + { + "epoch": 1.052646818244334, + "grad_norm": 0.1505446434020996, + "learning_rate": 3.947360420421725e-06, + "loss": 0.8567, + "step": 145420 + }, + { + "epoch": 1.05271920490492, + "grad_norm": 0.1601179987192154, + "learning_rate": 3.947288033761138e-06, + "loss": 0.8596, + "step": 145430 + }, + { + "epoch": 1.0527915915655064, + "grad_norm": 0.1553214192390442, + "learning_rate": 3.947215647100553e-06, + "loss": 0.8566, + "step": 145440 + }, + { + "epoch": 1.0528639782260925, + "grad_norm": 0.1488734483718872, + "learning_rate": 3.947143260439966e-06, + "loss": 0.8725, + "step": 145450 + }, + { + "epoch": 1.0529363648866787, + "grad_norm": 0.15073513984680176, + "learning_rate": 3.94707087377938e-06, + "loss": 0.8653, + "step": 145460 + }, + { + "epoch": 1.0530087515472648, + "grad_norm": 0.14692334830760956, + "learning_rate": 3.946998487118794e-06, + "loss": 0.8713, + "step": 145470 + }, + { + "epoch": 1.053081138207851, + "grad_norm": 0.14102409780025482, + "learning_rate": 3.946926100458208e-06, + "loss": 0.866, + "step": 145480 + }, + { + "epoch": 1.0531535248684372, + "grad_norm": 0.15512901544570923, + "learning_rate": 3.946853713797622e-06, + "loss": 0.8619, + "step": 145490 + }, + { + "epoch": 1.0532259115290235, + "grad_norm": 0.15089280903339386, + "learning_rate": 3.946781327137035e-06, + "loss": 0.8563, + "step": 145500 + }, + { + "epoch": 1.0532982981896095, + "grad_norm": 0.14949414134025574, + "learning_rate": 3.946708940476449e-06, + "loss": 0.8751, + "step": 145510 + }, + { + "epoch": 1.0533706848501958, + "grad_norm": 0.18709976971149445, + "learning_rate": 3.946636553815863e-06, + "loss": 0.8556, + "step": 145520 + }, + { + "epoch": 1.053443071510782, + "grad_norm": 0.15413489937782288, + "learning_rate": 3.946564167155277e-06, + "loss": 0.8572, + "step": 145530 + }, + { + "epoch": 1.0535154581713682, + "grad_norm": 0.16273820400238037, + "learning_rate": 3.946491780494691e-06, + "loss": 0.8619, + "step": 145540 + }, + { + "epoch": 1.0535878448319544, + "grad_norm": 0.1684591919183731, + "learning_rate": 3.946419393834104e-06, + "loss": 0.8659, + "step": 145550 + }, + { + "epoch": 1.0536602314925405, + "grad_norm": 0.14503608644008636, + "learning_rate": 3.946347007173518e-06, + "loss": 0.8567, + "step": 145560 + }, + { + "epoch": 1.0537326181531268, + "grad_norm": 0.1562882363796234, + "learning_rate": 3.946274620512932e-06, + "loss": 0.8636, + "step": 145570 + }, + { + "epoch": 1.0538050048137129, + "grad_norm": 0.1774781495332718, + "learning_rate": 3.946202233852346e-06, + "loss": 0.8586, + "step": 145580 + }, + { + "epoch": 1.0538773914742992, + "grad_norm": 0.1477038562297821, + "learning_rate": 3.9461298471917595e-06, + "loss": 0.8533, + "step": 145590 + }, + { + "epoch": 1.0539497781348852, + "grad_norm": 0.15526357293128967, + "learning_rate": 3.946057460531173e-06, + "loss": 0.8632, + "step": 145600 + }, + { + "epoch": 1.0540221647954715, + "grad_norm": 0.15866219997406006, + "learning_rate": 3.945985073870588e-06, + "loss": 0.8617, + "step": 145610 + }, + { + "epoch": 1.0540945514560576, + "grad_norm": 0.1477918028831482, + "learning_rate": 3.945912687210001e-06, + "loss": 0.8589, + "step": 145620 + }, + { + "epoch": 1.0541669381166439, + "grad_norm": 0.15647591650485992, + "learning_rate": 3.945840300549415e-06, + "loss": 0.858, + "step": 145630 + }, + { + "epoch": 1.0542393247772301, + "grad_norm": 0.15782573819160461, + "learning_rate": 3.9457679138888285e-06, + "loss": 0.8497, + "step": 145640 + }, + { + "epoch": 1.0543117114378162, + "grad_norm": 0.15249016880989075, + "learning_rate": 3.945695527228243e-06, + "loss": 0.8575, + "step": 145650 + }, + { + "epoch": 1.0543840980984025, + "grad_norm": 0.15300941467285156, + "learning_rate": 3.9456231405676565e-06, + "loss": 0.8714, + "step": 145660 + }, + { + "epoch": 1.0544564847589886, + "grad_norm": 0.15666401386260986, + "learning_rate": 3.94555075390707e-06, + "loss": 0.8728, + "step": 145670 + }, + { + "epoch": 1.0545288714195749, + "grad_norm": 0.13935892283916473, + "learning_rate": 3.945478367246484e-06, + "loss": 0.8641, + "step": 145680 + }, + { + "epoch": 1.054601258080161, + "grad_norm": 0.1481926590204239, + "learning_rate": 3.945405980585898e-06, + "loss": 0.8426, + "step": 145690 + }, + { + "epoch": 1.0546736447407472, + "grad_norm": 0.14881548285484314, + "learning_rate": 3.945333593925312e-06, + "loss": 0.8627, + "step": 145700 + }, + { + "epoch": 1.0547460314013333, + "grad_norm": 0.15182042121887207, + "learning_rate": 3.9452612072647255e-06, + "loss": 0.8521, + "step": 145710 + }, + { + "epoch": 1.0548184180619196, + "grad_norm": 0.14741186797618866, + "learning_rate": 3.945188820604139e-06, + "loss": 0.8544, + "step": 145720 + }, + { + "epoch": 1.0548908047225058, + "grad_norm": 0.1624380648136139, + "learning_rate": 3.9451164339435536e-06, + "loss": 0.8632, + "step": 145730 + }, + { + "epoch": 1.054963191383092, + "grad_norm": 0.14663566648960114, + "learning_rate": 3.945044047282967e-06, + "loss": 0.8584, + "step": 145740 + }, + { + "epoch": 1.0550355780436782, + "grad_norm": 0.1530272662639618, + "learning_rate": 3.944971660622381e-06, + "loss": 0.8637, + "step": 145750 + }, + { + "epoch": 1.0551079647042643, + "grad_norm": 0.14073669910430908, + "learning_rate": 3.944899273961794e-06, + "loss": 0.8538, + "step": 145760 + }, + { + "epoch": 1.0551803513648506, + "grad_norm": 0.1432807892560959, + "learning_rate": 3.944826887301209e-06, + "loss": 0.8553, + "step": 145770 + }, + { + "epoch": 1.0552527380254366, + "grad_norm": 0.15058694779872894, + "learning_rate": 3.9447545006406225e-06, + "loss": 0.8688, + "step": 145780 + }, + { + "epoch": 1.055325124686023, + "grad_norm": 0.14936070144176483, + "learning_rate": 3.944682113980036e-06, + "loss": 0.8571, + "step": 145790 + }, + { + "epoch": 1.055397511346609, + "grad_norm": 0.1473982185125351, + "learning_rate": 3.94460972731945e-06, + "loss": 0.8464, + "step": 145800 + }, + { + "epoch": 1.0554698980071953, + "grad_norm": 0.14957574009895325, + "learning_rate": 3.944537340658864e-06, + "loss": 0.863, + "step": 145810 + }, + { + "epoch": 1.0555422846677813, + "grad_norm": 0.16294653713703156, + "learning_rate": 3.944464953998278e-06, + "loss": 0.8702, + "step": 145820 + }, + { + "epoch": 1.0556146713283676, + "grad_norm": 0.15721982717514038, + "learning_rate": 3.944392567337691e-06, + "loss": 0.8644, + "step": 145830 + }, + { + "epoch": 1.055687057988954, + "grad_norm": 0.15910634398460388, + "learning_rate": 3.944320180677105e-06, + "loss": 0.8531, + "step": 145840 + }, + { + "epoch": 1.05575944464954, + "grad_norm": 0.14400066435337067, + "learning_rate": 3.9442477940165195e-06, + "loss": 0.8654, + "step": 145850 + }, + { + "epoch": 1.0558318313101263, + "grad_norm": 0.15780583024024963, + "learning_rate": 3.944175407355933e-06, + "loss": 0.838, + "step": 145860 + }, + { + "epoch": 1.0559042179707123, + "grad_norm": 0.14663490653038025, + "learning_rate": 3.944103020695347e-06, + "loss": 0.8495, + "step": 145870 + }, + { + "epoch": 1.0559766046312986, + "grad_norm": 0.14983581006526947, + "learning_rate": 3.94403063403476e-06, + "loss": 0.8636, + "step": 145880 + }, + { + "epoch": 1.0560489912918847, + "grad_norm": 0.15457117557525635, + "learning_rate": 3.943958247374175e-06, + "loss": 0.8544, + "step": 145890 + }, + { + "epoch": 1.056121377952471, + "grad_norm": 0.15235184133052826, + "learning_rate": 3.943885860713588e-06, + "loss": 0.8751, + "step": 145900 + }, + { + "epoch": 1.056193764613057, + "grad_norm": 0.14950567483901978, + "learning_rate": 3.943813474053002e-06, + "loss": 0.8557, + "step": 145910 + }, + { + "epoch": 1.0562661512736433, + "grad_norm": 0.15957853198051453, + "learning_rate": 3.943741087392416e-06, + "loss": 0.8671, + "step": 145920 + }, + { + "epoch": 1.0563385379342294, + "grad_norm": 0.15900222957134247, + "learning_rate": 3.94366870073183e-06, + "loss": 0.8576, + "step": 145930 + }, + { + "epoch": 1.0564109245948157, + "grad_norm": 0.16637851297855377, + "learning_rate": 3.943596314071244e-06, + "loss": 0.862, + "step": 145940 + }, + { + "epoch": 1.056483311255402, + "grad_norm": 0.14471204578876495, + "learning_rate": 3.9435239274106565e-06, + "loss": 0.8473, + "step": 145950 + }, + { + "epoch": 1.056555697915988, + "grad_norm": 0.16126057505607605, + "learning_rate": 3.943451540750071e-06, + "loss": 0.8646, + "step": 145960 + }, + { + "epoch": 1.0566280845765743, + "grad_norm": 0.15724702179431915, + "learning_rate": 3.943379154089485e-06, + "loss": 0.8513, + "step": 145970 + }, + { + "epoch": 1.0567004712371604, + "grad_norm": 0.14262165129184723, + "learning_rate": 3.943306767428898e-06, + "loss": 0.8657, + "step": 145980 + }, + { + "epoch": 1.0567728578977467, + "grad_norm": 0.15832731127738953, + "learning_rate": 3.943234380768312e-06, + "loss": 0.8564, + "step": 145990 + }, + { + "epoch": 1.0568452445583327, + "grad_norm": 0.16884136199951172, + "learning_rate": 3.943161994107726e-06, + "loss": 0.8436, + "step": 146000 + }, + { + "epoch": 1.056917631218919, + "grad_norm": 0.16266973316669464, + "learning_rate": 3.94308960744714e-06, + "loss": 0.866, + "step": 146010 + }, + { + "epoch": 1.056990017879505, + "grad_norm": 0.15265819430351257, + "learning_rate": 3.9430172207865535e-06, + "loss": 0.8672, + "step": 146020 + }, + { + "epoch": 1.0570624045400914, + "grad_norm": 0.14945794641971588, + "learning_rate": 3.942944834125967e-06, + "loss": 0.852, + "step": 146030 + }, + { + "epoch": 1.0571347912006774, + "grad_norm": 0.15644454956054688, + "learning_rate": 3.942872447465382e-06, + "loss": 0.8661, + "step": 146040 + }, + { + "epoch": 1.0572071778612637, + "grad_norm": 0.21663767099380493, + "learning_rate": 3.942800060804795e-06, + "loss": 0.8755, + "step": 146050 + }, + { + "epoch": 1.05727956452185, + "grad_norm": 0.16206009685993195, + "learning_rate": 3.942727674144209e-06, + "loss": 0.8679, + "step": 146060 + }, + { + "epoch": 1.057351951182436, + "grad_norm": 0.15549540519714355, + "learning_rate": 3.9426552874836224e-06, + "loss": 0.8578, + "step": 146070 + }, + { + "epoch": 1.0574243378430224, + "grad_norm": 0.16803592443466187, + "learning_rate": 3.942582900823037e-06, + "loss": 0.8662, + "step": 146080 + }, + { + "epoch": 1.0574967245036084, + "grad_norm": 0.1614563912153244, + "learning_rate": 3.9425105141624505e-06, + "loss": 0.8658, + "step": 146090 + }, + { + "epoch": 1.0575691111641947, + "grad_norm": 0.14888714253902435, + "learning_rate": 3.942438127501864e-06, + "loss": 0.8591, + "step": 146100 + }, + { + "epoch": 1.0576414978247808, + "grad_norm": 0.16620272397994995, + "learning_rate": 3.942365740841278e-06, + "loss": 0.8625, + "step": 146110 + }, + { + "epoch": 1.057713884485367, + "grad_norm": 0.28313207626342773, + "learning_rate": 3.942293354180692e-06, + "loss": 0.8607, + "step": 146120 + }, + { + "epoch": 1.0577862711459531, + "grad_norm": 0.17238175868988037, + "learning_rate": 3.942220967520106e-06, + "loss": 0.8728, + "step": 146130 + }, + { + "epoch": 1.0578586578065394, + "grad_norm": 0.14445391297340393, + "learning_rate": 3.9421485808595194e-06, + "loss": 0.8665, + "step": 146140 + }, + { + "epoch": 1.0579310444671255, + "grad_norm": 0.15155573189258575, + "learning_rate": 3.942076194198933e-06, + "loss": 0.8549, + "step": 146150 + }, + { + "epoch": 1.0580034311277118, + "grad_norm": 0.16635553538799286, + "learning_rate": 3.9420038075383475e-06, + "loss": 0.8578, + "step": 146160 + }, + { + "epoch": 1.058075817788298, + "grad_norm": 0.19425910711288452, + "learning_rate": 3.941931420877761e-06, + "loss": 0.8583, + "step": 146170 + }, + { + "epoch": 1.0581482044488841, + "grad_norm": 0.14456264674663544, + "learning_rate": 3.941859034217175e-06, + "loss": 0.8481, + "step": 146180 + }, + { + "epoch": 1.0582205911094704, + "grad_norm": 0.15371955931186676, + "learning_rate": 3.941786647556588e-06, + "loss": 0.8503, + "step": 146190 + }, + { + "epoch": 1.0582929777700565, + "grad_norm": 0.14967955648899078, + "learning_rate": 3.941714260896002e-06, + "loss": 0.8658, + "step": 146200 + }, + { + "epoch": 1.0583653644306428, + "grad_norm": 0.14629121124744415, + "learning_rate": 3.9416418742354164e-06, + "loss": 0.8577, + "step": 146210 + }, + { + "epoch": 1.0584377510912288, + "grad_norm": 0.13868474960327148, + "learning_rate": 3.94156948757483e-06, + "loss": 0.8489, + "step": 146220 + }, + { + "epoch": 1.0585101377518151, + "grad_norm": 0.16160985827445984, + "learning_rate": 3.941497100914244e-06, + "loss": 0.8684, + "step": 146230 + }, + { + "epoch": 1.0585825244124012, + "grad_norm": 0.23618648946285248, + "learning_rate": 3.941424714253657e-06, + "loss": 0.8524, + "step": 146240 + }, + { + "epoch": 1.0586549110729875, + "grad_norm": 0.15129047632217407, + "learning_rate": 3.941352327593072e-06, + "loss": 0.8782, + "step": 146250 + }, + { + "epoch": 1.0587272977335735, + "grad_norm": 0.144764244556427, + "learning_rate": 3.941279940932485e-06, + "loss": 0.8546, + "step": 146260 + }, + { + "epoch": 1.0587996843941598, + "grad_norm": 0.14293217658996582, + "learning_rate": 3.941207554271899e-06, + "loss": 0.8508, + "step": 146270 + }, + { + "epoch": 1.0588720710547461, + "grad_norm": 0.1651974767446518, + "learning_rate": 3.941135167611313e-06, + "loss": 0.8733, + "step": 146280 + }, + { + "epoch": 1.0589444577153322, + "grad_norm": 0.15078642964363098, + "learning_rate": 3.941062780950727e-06, + "loss": 0.8563, + "step": 146290 + }, + { + "epoch": 1.0590168443759185, + "grad_norm": 0.1709497570991516, + "learning_rate": 3.940990394290141e-06, + "loss": 0.8564, + "step": 146300 + }, + { + "epoch": 1.0590892310365045, + "grad_norm": 0.15568095445632935, + "learning_rate": 3.940918007629554e-06, + "loss": 0.8716, + "step": 146310 + }, + { + "epoch": 1.0591616176970908, + "grad_norm": 0.15851420164108276, + "learning_rate": 3.940845620968968e-06, + "loss": 0.861, + "step": 146320 + }, + { + "epoch": 1.059234004357677, + "grad_norm": 0.15701240301132202, + "learning_rate": 3.940773234308382e-06, + "loss": 0.861, + "step": 146330 + }, + { + "epoch": 1.0593063910182632, + "grad_norm": 0.160984069108963, + "learning_rate": 3.940700847647796e-06, + "loss": 0.8659, + "step": 146340 + }, + { + "epoch": 1.0593787776788492, + "grad_norm": 0.15175053477287292, + "learning_rate": 3.94062846098721e-06, + "loss": 0.8628, + "step": 146350 + }, + { + "epoch": 1.0594511643394355, + "grad_norm": 0.15705060958862305, + "learning_rate": 3.940556074326623e-06, + "loss": 0.8726, + "step": 146360 + }, + { + "epoch": 1.0595235510000216, + "grad_norm": 0.1432766169309616, + "learning_rate": 3.940483687666038e-06, + "loss": 0.8457, + "step": 146370 + }, + { + "epoch": 1.059595937660608, + "grad_norm": 0.15701285004615784, + "learning_rate": 3.940411301005451e-06, + "loss": 0.8669, + "step": 146380 + }, + { + "epoch": 1.0596683243211942, + "grad_norm": 0.15151244401931763, + "learning_rate": 3.940338914344865e-06, + "loss": 0.8623, + "step": 146390 + }, + { + "epoch": 1.0597407109817802, + "grad_norm": 0.19881872832775116, + "learning_rate": 3.9402665276842785e-06, + "loss": 0.8554, + "step": 146400 + }, + { + "epoch": 1.0598130976423665, + "grad_norm": 0.14868730306625366, + "learning_rate": 3.940194141023693e-06, + "loss": 0.8686, + "step": 146410 + }, + { + "epoch": 1.0598854843029526, + "grad_norm": 0.14795683324337006, + "learning_rate": 3.940121754363107e-06, + "loss": 0.858, + "step": 146420 + }, + { + "epoch": 1.0599578709635389, + "grad_norm": 0.144784614443779, + "learning_rate": 3.94004936770252e-06, + "loss": 0.8613, + "step": 146430 + }, + { + "epoch": 1.060030257624125, + "grad_norm": 0.15263043344020844, + "learning_rate": 3.939976981041934e-06, + "loss": 0.8662, + "step": 146440 + }, + { + "epoch": 1.0601026442847112, + "grad_norm": 0.1521047055721283, + "learning_rate": 3.939904594381348e-06, + "loss": 0.8434, + "step": 146450 + }, + { + "epoch": 1.0601750309452973, + "grad_norm": 0.15608127415180206, + "learning_rate": 3.939832207720762e-06, + "loss": 0.855, + "step": 146460 + }, + { + "epoch": 1.0602474176058836, + "grad_norm": 0.15321072936058044, + "learning_rate": 3.9397598210601756e-06, + "loss": 0.8676, + "step": 146470 + }, + { + "epoch": 1.0603198042664699, + "grad_norm": 0.175469309091568, + "learning_rate": 3.939687434399589e-06, + "loss": 0.857, + "step": 146480 + }, + { + "epoch": 1.060392190927056, + "grad_norm": 0.15530693531036377, + "learning_rate": 3.939615047739003e-06, + "loss": 0.8568, + "step": 146490 + }, + { + "epoch": 1.0604645775876422, + "grad_norm": 0.14557047188282013, + "learning_rate": 3.939542661078416e-06, + "loss": 0.8633, + "step": 146500 + }, + { + "epoch": 1.0605369642482283, + "grad_norm": 0.1573035567998886, + "learning_rate": 3.93947027441783e-06, + "loss": 0.8631, + "step": 146510 + }, + { + "epoch": 1.0606093509088146, + "grad_norm": 0.15351547300815582, + "learning_rate": 3.9393978877572445e-06, + "loss": 0.8401, + "step": 146520 + }, + { + "epoch": 1.0606817375694007, + "grad_norm": 0.21780115365982056, + "learning_rate": 3.939325501096658e-06, + "loss": 0.8566, + "step": 146530 + }, + { + "epoch": 1.060754124229987, + "grad_norm": 0.19611738622188568, + "learning_rate": 3.939253114436072e-06, + "loss": 0.8488, + "step": 146540 + }, + { + "epoch": 1.060826510890573, + "grad_norm": 0.1587083786725998, + "learning_rate": 3.939180727775485e-06, + "loss": 0.8542, + "step": 146550 + }, + { + "epoch": 1.0608988975511593, + "grad_norm": 0.17451640963554382, + "learning_rate": 3.9391083411149e-06, + "loss": 0.8686, + "step": 146560 + }, + { + "epoch": 1.0609712842117454, + "grad_norm": 0.1856866180896759, + "learning_rate": 3.939035954454313e-06, + "loss": 0.8569, + "step": 146570 + }, + { + "epoch": 1.0610436708723316, + "grad_norm": 0.14287951588630676, + "learning_rate": 3.938963567793727e-06, + "loss": 0.8559, + "step": 146580 + }, + { + "epoch": 1.061116057532918, + "grad_norm": 0.17988088726997375, + "learning_rate": 3.938891181133141e-06, + "loss": 0.8711, + "step": 146590 + }, + { + "epoch": 1.061188444193504, + "grad_norm": 0.1705288589000702, + "learning_rate": 3.938818794472555e-06, + "loss": 0.8664, + "step": 146600 + }, + { + "epoch": 1.0612608308540903, + "grad_norm": 0.16595390439033508, + "learning_rate": 3.938746407811969e-06, + "loss": 0.8768, + "step": 146610 + }, + { + "epoch": 1.0613332175146764, + "grad_norm": 0.15512683987617493, + "learning_rate": 3.938674021151382e-06, + "loss": 0.86, + "step": 146620 + }, + { + "epoch": 1.0614056041752626, + "grad_norm": 0.1589556485414505, + "learning_rate": 3.938601634490796e-06, + "loss": 0.8608, + "step": 146630 + }, + { + "epoch": 1.0614779908358487, + "grad_norm": 0.1618974655866623, + "learning_rate": 3.93852924783021e-06, + "loss": 0.8557, + "step": 146640 + }, + { + "epoch": 1.061550377496435, + "grad_norm": 0.1717182695865631, + "learning_rate": 3.938456861169624e-06, + "loss": 0.8621, + "step": 146650 + }, + { + "epoch": 1.061622764157021, + "grad_norm": 0.1516030728816986, + "learning_rate": 3.938384474509038e-06, + "loss": 0.8814, + "step": 146660 + }, + { + "epoch": 1.0616951508176073, + "grad_norm": 0.14224623143672943, + "learning_rate": 3.938312087848451e-06, + "loss": 0.8649, + "step": 146670 + }, + { + "epoch": 1.0617675374781934, + "grad_norm": 0.1500004678964615, + "learning_rate": 3.938239701187866e-06, + "loss": 0.8508, + "step": 146680 + }, + { + "epoch": 1.0618399241387797, + "grad_norm": 0.14933015406131744, + "learning_rate": 3.938167314527279e-06, + "loss": 0.8643, + "step": 146690 + }, + { + "epoch": 1.061912310799366, + "grad_norm": 0.15696154534816742, + "learning_rate": 3.938094927866693e-06, + "loss": 0.8618, + "step": 146700 + }, + { + "epoch": 1.061984697459952, + "grad_norm": 0.1518896073102951, + "learning_rate": 3.9380225412061066e-06, + "loss": 0.8556, + "step": 146710 + }, + { + "epoch": 1.0620570841205383, + "grad_norm": 0.1495170295238495, + "learning_rate": 3.937950154545521e-06, + "loss": 0.855, + "step": 146720 + }, + { + "epoch": 1.0621294707811244, + "grad_norm": 0.15522539615631104, + "learning_rate": 3.937877767884935e-06, + "loss": 0.8592, + "step": 146730 + }, + { + "epoch": 1.0622018574417107, + "grad_norm": 0.14690908789634705, + "learning_rate": 3.937805381224348e-06, + "loss": 0.8489, + "step": 146740 + }, + { + "epoch": 1.0622742441022968, + "grad_norm": 0.17294001579284668, + "learning_rate": 3.937732994563762e-06, + "loss": 0.8548, + "step": 146750 + }, + { + "epoch": 1.062346630762883, + "grad_norm": 0.16013190150260925, + "learning_rate": 3.937660607903176e-06, + "loss": 0.8541, + "step": 146760 + }, + { + "epoch": 1.0624190174234691, + "grad_norm": 0.15690450370311737, + "learning_rate": 3.93758822124259e-06, + "loss": 0.8535, + "step": 146770 + }, + { + "epoch": 1.0624914040840554, + "grad_norm": 0.1489821821451187, + "learning_rate": 3.937515834582004e-06, + "loss": 0.8614, + "step": 146780 + }, + { + "epoch": 1.0625637907446417, + "grad_norm": 0.15481127798557281, + "learning_rate": 3.937443447921417e-06, + "loss": 0.8483, + "step": 146790 + }, + { + "epoch": 1.0626361774052278, + "grad_norm": 0.16512636840343475, + "learning_rate": 3.937371061260832e-06, + "loss": 0.8513, + "step": 146800 + }, + { + "epoch": 1.062708564065814, + "grad_norm": 0.1462734192609787, + "learning_rate": 3.937298674600245e-06, + "loss": 0.8618, + "step": 146810 + }, + { + "epoch": 1.0627809507264, + "grad_norm": 0.1540970355272293, + "learning_rate": 3.937226287939659e-06, + "loss": 0.8699, + "step": 146820 + }, + { + "epoch": 1.0628533373869864, + "grad_norm": 0.14309470355510712, + "learning_rate": 3.9371539012790725e-06, + "loss": 0.8544, + "step": 146830 + }, + { + "epoch": 1.0629257240475725, + "grad_norm": 0.14380641281604767, + "learning_rate": 3.937081514618486e-06, + "loss": 0.8579, + "step": 146840 + }, + { + "epoch": 1.0629981107081587, + "grad_norm": 0.15650497376918793, + "learning_rate": 3.937009127957901e-06, + "loss": 0.8614, + "step": 146850 + }, + { + "epoch": 1.0630704973687448, + "grad_norm": 0.15885330736637115, + "learning_rate": 3.936936741297314e-06, + "loss": 0.8604, + "step": 146860 + }, + { + "epoch": 1.063142884029331, + "grad_norm": 0.15795765817165375, + "learning_rate": 3.936864354636728e-06, + "loss": 0.8521, + "step": 146870 + }, + { + "epoch": 1.0632152706899172, + "grad_norm": 0.15613383054733276, + "learning_rate": 3.9367919679761414e-06, + "loss": 0.8605, + "step": 146880 + }, + { + "epoch": 1.0632876573505035, + "grad_norm": 0.14996646344661713, + "learning_rate": 3.936719581315556e-06, + "loss": 0.863, + "step": 146890 + }, + { + "epoch": 1.0633600440110897, + "grad_norm": 0.1522240936756134, + "learning_rate": 3.9366471946549695e-06, + "loss": 0.8565, + "step": 146900 + }, + { + "epoch": 1.0634324306716758, + "grad_norm": 0.1428409069776535, + "learning_rate": 3.936574807994383e-06, + "loss": 0.8535, + "step": 146910 + }, + { + "epoch": 1.063504817332262, + "grad_norm": 0.14927205443382263, + "learning_rate": 3.936502421333797e-06, + "loss": 0.8608, + "step": 146920 + }, + { + "epoch": 1.0635772039928482, + "grad_norm": 0.15207083523273468, + "learning_rate": 3.936430034673211e-06, + "loss": 0.8657, + "step": 146930 + }, + { + "epoch": 1.0636495906534345, + "grad_norm": 0.15743815898895264, + "learning_rate": 3.936357648012625e-06, + "loss": 0.8576, + "step": 146940 + }, + { + "epoch": 1.0637219773140205, + "grad_norm": 0.14594559371471405, + "learning_rate": 3.9362852613520384e-06, + "loss": 0.8709, + "step": 146950 + }, + { + "epoch": 1.0637943639746068, + "grad_norm": 0.1405821442604065, + "learning_rate": 3.936212874691452e-06, + "loss": 0.8572, + "step": 146960 + }, + { + "epoch": 1.0638667506351929, + "grad_norm": 0.15676797926425934, + "learning_rate": 3.9361404880308665e-06, + "loss": 0.8538, + "step": 146970 + }, + { + "epoch": 1.0639391372957792, + "grad_norm": 0.14403656125068665, + "learning_rate": 3.93606810137028e-06, + "loss": 0.8515, + "step": 146980 + }, + { + "epoch": 1.0640115239563652, + "grad_norm": 0.14301113784313202, + "learning_rate": 3.935995714709694e-06, + "loss": 0.8572, + "step": 146990 + }, + { + "epoch": 1.0640839106169515, + "grad_norm": 0.15309615433216095, + "learning_rate": 3.935923328049107e-06, + "loss": 0.861, + "step": 147000 + }, + { + "epoch": 1.0641562972775378, + "grad_norm": 0.3443004786968231, + "learning_rate": 3.935850941388521e-06, + "loss": 0.8656, + "step": 147010 + }, + { + "epoch": 1.0642286839381239, + "grad_norm": 0.13839933276176453, + "learning_rate": 3.935778554727935e-06, + "loss": 0.8555, + "step": 147020 + }, + { + "epoch": 1.0643010705987102, + "grad_norm": 0.14977069199085236, + "learning_rate": 3.935706168067348e-06, + "loss": 0.8623, + "step": 147030 + }, + { + "epoch": 1.0643734572592962, + "grad_norm": 0.166509747505188, + "learning_rate": 3.935633781406763e-06, + "loss": 0.8703, + "step": 147040 + }, + { + "epoch": 1.0644458439198825, + "grad_norm": 0.15191148221492767, + "learning_rate": 3.935561394746176e-06, + "loss": 0.8508, + "step": 147050 + }, + { + "epoch": 1.0645182305804686, + "grad_norm": 0.15983009338378906, + "learning_rate": 3.93548900808559e-06, + "loss": 0.8578, + "step": 147060 + }, + { + "epoch": 1.0645906172410549, + "grad_norm": 0.1482071876525879, + "learning_rate": 3.9354166214250035e-06, + "loss": 0.8419, + "step": 147070 + }, + { + "epoch": 1.064663003901641, + "grad_norm": 0.1566699892282486, + "learning_rate": 3.935344234764418e-06, + "loss": 0.8658, + "step": 147080 + }, + { + "epoch": 1.0647353905622272, + "grad_norm": 0.14756731688976288, + "learning_rate": 3.935271848103832e-06, + "loss": 0.8493, + "step": 147090 + }, + { + "epoch": 1.0648077772228133, + "grad_norm": 0.15499506890773773, + "learning_rate": 3.935199461443245e-06, + "loss": 0.8484, + "step": 147100 + }, + { + "epoch": 1.0648801638833996, + "grad_norm": 0.15901382267475128, + "learning_rate": 3.935127074782659e-06, + "loss": 0.8613, + "step": 147110 + }, + { + "epoch": 1.0649525505439859, + "grad_norm": 0.1550762802362442, + "learning_rate": 3.935054688122073e-06, + "loss": 0.8441, + "step": 147120 + }, + { + "epoch": 1.065024937204572, + "grad_norm": 0.17926537990570068, + "learning_rate": 3.934982301461487e-06, + "loss": 0.8558, + "step": 147130 + }, + { + "epoch": 1.0650973238651582, + "grad_norm": 0.31348535418510437, + "learning_rate": 3.9349099148009005e-06, + "loss": 0.8624, + "step": 147140 + }, + { + "epoch": 1.0651697105257443, + "grad_norm": 0.15891113877296448, + "learning_rate": 3.934837528140314e-06, + "loss": 0.8495, + "step": 147150 + }, + { + "epoch": 1.0652420971863306, + "grad_norm": 0.15291360020637512, + "learning_rate": 3.934765141479729e-06, + "loss": 0.8455, + "step": 147160 + }, + { + "epoch": 1.0653144838469166, + "grad_norm": 0.25229060649871826, + "learning_rate": 3.934692754819142e-06, + "loss": 0.8589, + "step": 147170 + }, + { + "epoch": 1.065386870507503, + "grad_norm": 0.17564287781715393, + "learning_rate": 3.934620368158556e-06, + "loss": 0.8675, + "step": 147180 + }, + { + "epoch": 1.065459257168089, + "grad_norm": 0.15342505276203156, + "learning_rate": 3.9345479814979695e-06, + "loss": 0.8534, + "step": 147190 + }, + { + "epoch": 1.0655316438286753, + "grad_norm": 0.15959271788597107, + "learning_rate": 3.934475594837384e-06, + "loss": 0.8614, + "step": 147200 + }, + { + "epoch": 1.0656040304892613, + "grad_norm": 0.15670059621334076, + "learning_rate": 3.9344032081767976e-06, + "loss": 0.8668, + "step": 147210 + }, + { + "epoch": 1.0656764171498476, + "grad_norm": 0.16998760402202606, + "learning_rate": 3.934330821516211e-06, + "loss": 0.8682, + "step": 147220 + }, + { + "epoch": 1.065748803810434, + "grad_norm": 0.1576838493347168, + "learning_rate": 3.934258434855625e-06, + "loss": 0.848, + "step": 147230 + }, + { + "epoch": 1.06582119047102, + "grad_norm": 0.13807885348796844, + "learning_rate": 3.934186048195039e-06, + "loss": 0.8532, + "step": 147240 + }, + { + "epoch": 1.0658935771316063, + "grad_norm": 0.15785497426986694, + "learning_rate": 3.934113661534453e-06, + "loss": 0.8594, + "step": 147250 + }, + { + "epoch": 1.0659659637921923, + "grad_norm": 0.16951316595077515, + "learning_rate": 3.9340412748738665e-06, + "loss": 0.8581, + "step": 147260 + }, + { + "epoch": 1.0660383504527786, + "grad_norm": 0.2050899863243103, + "learning_rate": 3.93396888821328e-06, + "loss": 0.8607, + "step": 147270 + }, + { + "epoch": 1.0661107371133647, + "grad_norm": 0.17105598747730255, + "learning_rate": 3.9338965015526946e-06, + "loss": 0.8594, + "step": 147280 + }, + { + "epoch": 1.066183123773951, + "grad_norm": 0.15434543788433075, + "learning_rate": 3.933824114892108e-06, + "loss": 0.8475, + "step": 147290 + }, + { + "epoch": 1.066255510434537, + "grad_norm": 0.1630607694387436, + "learning_rate": 3.933751728231522e-06, + "loss": 0.8562, + "step": 147300 + }, + { + "epoch": 1.0663278970951233, + "grad_norm": 0.15573401749134064, + "learning_rate": 3.933679341570935e-06, + "loss": 0.8537, + "step": 147310 + }, + { + "epoch": 1.0664002837557094, + "grad_norm": 0.15144288539886475, + "learning_rate": 3.93360695491035e-06, + "loss": 0.8598, + "step": 147320 + }, + { + "epoch": 1.0664726704162957, + "grad_norm": 0.14449411630630493, + "learning_rate": 3.9335345682497635e-06, + "loss": 0.8529, + "step": 147330 + }, + { + "epoch": 1.066545057076882, + "grad_norm": 0.15416362881660461, + "learning_rate": 3.933462181589177e-06, + "loss": 0.8507, + "step": 147340 + }, + { + "epoch": 1.066617443737468, + "grad_norm": 0.1476205438375473, + "learning_rate": 3.933389794928591e-06, + "loss": 0.8652, + "step": 147350 + }, + { + "epoch": 1.0666898303980543, + "grad_norm": 0.1484995037317276, + "learning_rate": 3.933317408268005e-06, + "loss": 0.8602, + "step": 147360 + }, + { + "epoch": 1.0667622170586404, + "grad_norm": 0.15167966485023499, + "learning_rate": 3.933245021607419e-06, + "loss": 0.841, + "step": 147370 + }, + { + "epoch": 1.0668346037192267, + "grad_norm": 0.15195885300636292, + "learning_rate": 3.933172634946832e-06, + "loss": 0.8577, + "step": 147380 + }, + { + "epoch": 1.0669069903798127, + "grad_norm": 0.15130667388439178, + "learning_rate": 3.933100248286246e-06, + "loss": 0.8491, + "step": 147390 + }, + { + "epoch": 1.066979377040399, + "grad_norm": 0.15865103900432587, + "learning_rate": 3.9330278616256605e-06, + "loss": 0.8707, + "step": 147400 + }, + { + "epoch": 1.067051763700985, + "grad_norm": 0.15017808973789215, + "learning_rate": 3.932955474965074e-06, + "loss": 0.8559, + "step": 147410 + }, + { + "epoch": 1.0671241503615714, + "grad_norm": 0.15235696732997894, + "learning_rate": 3.932883088304488e-06, + "loss": 0.8593, + "step": 147420 + }, + { + "epoch": 1.0671965370221574, + "grad_norm": 0.19113440811634064, + "learning_rate": 3.932810701643901e-06, + "loss": 0.8677, + "step": 147430 + }, + { + "epoch": 1.0672689236827437, + "grad_norm": 0.19408351182937622, + "learning_rate": 3.932738314983315e-06, + "loss": 0.854, + "step": 147440 + }, + { + "epoch": 1.06734131034333, + "grad_norm": 0.15390466153621674, + "learning_rate": 3.932665928322729e-06, + "loss": 0.8565, + "step": 147450 + }, + { + "epoch": 1.067413697003916, + "grad_norm": 0.1523483693599701, + "learning_rate": 3.932593541662143e-06, + "loss": 0.8531, + "step": 147460 + }, + { + "epoch": 1.0674860836645024, + "grad_norm": 0.17553357779979706, + "learning_rate": 3.932521155001557e-06, + "loss": 0.8569, + "step": 147470 + }, + { + "epoch": 1.0675584703250884, + "grad_norm": 0.19476906955242157, + "learning_rate": 3.93244876834097e-06, + "loss": 0.8691, + "step": 147480 + }, + { + "epoch": 1.0676308569856747, + "grad_norm": 0.2501210570335388, + "learning_rate": 3.932376381680385e-06, + "loss": 0.8737, + "step": 147490 + }, + { + "epoch": 1.0677032436462608, + "grad_norm": 0.17125388979911804, + "learning_rate": 3.932303995019798e-06, + "loss": 0.8577, + "step": 147500 + }, + { + "epoch": 1.067775630306847, + "grad_norm": 0.15461327135562897, + "learning_rate": 3.932231608359212e-06, + "loss": 0.8669, + "step": 147510 + }, + { + "epoch": 1.0678480169674331, + "grad_norm": 0.1496717482805252, + "learning_rate": 3.932159221698626e-06, + "loss": 0.864, + "step": 147520 + }, + { + "epoch": 1.0679204036280194, + "grad_norm": 0.14370813965797424, + "learning_rate": 3.93208683503804e-06, + "loss": 0.8496, + "step": 147530 + }, + { + "epoch": 1.0679927902886055, + "grad_norm": 0.16169150173664093, + "learning_rate": 3.932014448377453e-06, + "loss": 0.8587, + "step": 147540 + }, + { + "epoch": 1.0680651769491918, + "grad_norm": 0.15165531635284424, + "learning_rate": 3.931942061716867e-06, + "loss": 0.8602, + "step": 147550 + }, + { + "epoch": 1.068137563609778, + "grad_norm": 0.14875246584415436, + "learning_rate": 3.931869675056281e-06, + "loss": 0.8488, + "step": 147560 + }, + { + "epoch": 1.0682099502703641, + "grad_norm": 0.15369826555252075, + "learning_rate": 3.9317972883956945e-06, + "loss": 0.8615, + "step": 147570 + }, + { + "epoch": 1.0682823369309504, + "grad_norm": 0.15338794887065887, + "learning_rate": 3.931724901735108e-06, + "loss": 0.8682, + "step": 147580 + }, + { + "epoch": 1.0683547235915365, + "grad_norm": 0.17521625757217407, + "learning_rate": 3.931652515074523e-06, + "loss": 0.8585, + "step": 147590 + }, + { + "epoch": 1.0684271102521228, + "grad_norm": 0.15786945819854736, + "learning_rate": 3.931580128413936e-06, + "loss": 0.8668, + "step": 147600 + }, + { + "epoch": 1.0684994969127088, + "grad_norm": 0.16188044846057892, + "learning_rate": 3.93150774175335e-06, + "loss": 0.8529, + "step": 147610 + }, + { + "epoch": 1.0685718835732951, + "grad_norm": 0.15496951341629028, + "learning_rate": 3.9314353550927634e-06, + "loss": 0.8538, + "step": 147620 + }, + { + "epoch": 1.0686442702338812, + "grad_norm": 0.1538427770137787, + "learning_rate": 3.931362968432177e-06, + "loss": 0.871, + "step": 147630 + }, + { + "epoch": 1.0687166568944675, + "grad_norm": 0.14457300305366516, + "learning_rate": 3.9312905817715915e-06, + "loss": 0.8551, + "step": 147640 + }, + { + "epoch": 1.0687890435550538, + "grad_norm": 0.14885105192661285, + "learning_rate": 3.931218195111005e-06, + "loss": 0.8498, + "step": 147650 + }, + { + "epoch": 1.0688614302156398, + "grad_norm": 0.16504499316215515, + "learning_rate": 3.931145808450419e-06, + "loss": 0.853, + "step": 147660 + }, + { + "epoch": 1.0689338168762261, + "grad_norm": 0.15119002759456635, + "learning_rate": 3.931073421789832e-06, + "loss": 0.8574, + "step": 147670 + }, + { + "epoch": 1.0690062035368122, + "grad_norm": 0.15814948081970215, + "learning_rate": 3.931001035129247e-06, + "loss": 0.8593, + "step": 147680 + }, + { + "epoch": 1.0690785901973985, + "grad_norm": 0.14475545287132263, + "learning_rate": 3.9309286484686604e-06, + "loss": 0.8555, + "step": 147690 + }, + { + "epoch": 1.0691509768579845, + "grad_norm": 0.1500001847743988, + "learning_rate": 3.930856261808074e-06, + "loss": 0.8552, + "step": 147700 + }, + { + "epoch": 1.0692233635185708, + "grad_norm": 0.15069542825222015, + "learning_rate": 3.930783875147488e-06, + "loss": 0.8675, + "step": 147710 + }, + { + "epoch": 1.069295750179157, + "grad_norm": 0.14854957163333893, + "learning_rate": 3.930711488486902e-06, + "loss": 0.8658, + "step": 147720 + }, + { + "epoch": 1.0693681368397432, + "grad_norm": 0.17693032324314117, + "learning_rate": 3.930639101826316e-06, + "loss": 0.8453, + "step": 147730 + }, + { + "epoch": 1.0694405235003295, + "grad_norm": 0.15209737420082092, + "learning_rate": 3.930566715165729e-06, + "loss": 0.8566, + "step": 147740 + }, + { + "epoch": 1.0695129101609155, + "grad_norm": 0.1621839553117752, + "learning_rate": 3.930494328505143e-06, + "loss": 0.8516, + "step": 147750 + }, + { + "epoch": 1.0695852968215018, + "grad_norm": 0.14790567755699158, + "learning_rate": 3.9304219418445575e-06, + "loss": 0.8514, + "step": 147760 + }, + { + "epoch": 1.069657683482088, + "grad_norm": 0.14647607505321503, + "learning_rate": 3.930349555183971e-06, + "loss": 0.8419, + "step": 147770 + }, + { + "epoch": 1.0697300701426742, + "grad_norm": 0.16838887333869934, + "learning_rate": 3.930277168523385e-06, + "loss": 0.8465, + "step": 147780 + }, + { + "epoch": 1.0698024568032602, + "grad_norm": 0.1428205966949463, + "learning_rate": 3.930204781862798e-06, + "loss": 0.8642, + "step": 147790 + }, + { + "epoch": 1.0698748434638465, + "grad_norm": 0.14637254178524017, + "learning_rate": 3.930132395202213e-06, + "loss": 0.862, + "step": 147800 + }, + { + "epoch": 1.0699472301244326, + "grad_norm": 0.14838822185993195, + "learning_rate": 3.930060008541626e-06, + "loss": 0.8473, + "step": 147810 + }, + { + "epoch": 1.070019616785019, + "grad_norm": 0.15595309436321259, + "learning_rate": 3.92998762188104e-06, + "loss": 0.8575, + "step": 147820 + }, + { + "epoch": 1.070092003445605, + "grad_norm": 0.15181201696395874, + "learning_rate": 3.929915235220454e-06, + "loss": 0.8763, + "step": 147830 + }, + { + "epoch": 1.0701643901061912, + "grad_norm": 0.14543366432189941, + "learning_rate": 3.929842848559868e-06, + "loss": 0.8537, + "step": 147840 + }, + { + "epoch": 1.0702367767667775, + "grad_norm": 0.16064667701721191, + "learning_rate": 3.929770461899282e-06, + "loss": 0.8557, + "step": 147850 + }, + { + "epoch": 1.0703091634273636, + "grad_norm": 0.1469428986310959, + "learning_rate": 3.929698075238695e-06, + "loss": 0.8442, + "step": 147860 + }, + { + "epoch": 1.0703815500879499, + "grad_norm": 0.16561655700206757, + "learning_rate": 3.929625688578109e-06, + "loss": 0.849, + "step": 147870 + }, + { + "epoch": 1.070453936748536, + "grad_norm": 0.14810919761657715, + "learning_rate": 3.929553301917523e-06, + "loss": 0.8645, + "step": 147880 + }, + { + "epoch": 1.0705263234091222, + "grad_norm": 0.15601563453674316, + "learning_rate": 3.929480915256937e-06, + "loss": 0.8638, + "step": 147890 + }, + { + "epoch": 1.0705987100697083, + "grad_norm": 0.15862402319908142, + "learning_rate": 3.929408528596351e-06, + "loss": 0.8574, + "step": 147900 + }, + { + "epoch": 1.0706710967302946, + "grad_norm": 0.1481819897890091, + "learning_rate": 3.929336141935764e-06, + "loss": 0.8584, + "step": 147910 + }, + { + "epoch": 1.0707434833908807, + "grad_norm": 0.17046231031417847, + "learning_rate": 3.929263755275179e-06, + "loss": 0.8599, + "step": 147920 + }, + { + "epoch": 1.070815870051467, + "grad_norm": 0.155575230717659, + "learning_rate": 3.929191368614592e-06, + "loss": 0.8527, + "step": 147930 + }, + { + "epoch": 1.070888256712053, + "grad_norm": 0.1649925410747528, + "learning_rate": 3.929118981954006e-06, + "loss": 0.8621, + "step": 147940 + }, + { + "epoch": 1.0709606433726393, + "grad_norm": 0.14515741169452667, + "learning_rate": 3.9290465952934195e-06, + "loss": 0.8572, + "step": 147950 + }, + { + "epoch": 1.0710330300332256, + "grad_norm": 0.1484888792037964, + "learning_rate": 3.928974208632834e-06, + "loss": 0.8535, + "step": 147960 + }, + { + "epoch": 1.0711054166938117, + "grad_norm": 0.14969849586486816, + "learning_rate": 3.928901821972248e-06, + "loss": 0.8612, + "step": 147970 + }, + { + "epoch": 1.071177803354398, + "grad_norm": 0.1582787185907364, + "learning_rate": 3.928829435311661e-06, + "loss": 0.862, + "step": 147980 + }, + { + "epoch": 1.071250190014984, + "grad_norm": 0.14706555008888245, + "learning_rate": 3.928757048651075e-06, + "loss": 0.8579, + "step": 147990 + }, + { + "epoch": 1.0713225766755703, + "grad_norm": 0.1515503078699112, + "learning_rate": 3.928684661990489e-06, + "loss": 0.8742, + "step": 148000 + }, + { + "epoch": 1.0713949633361564, + "grad_norm": 0.16300860047340393, + "learning_rate": 3.928612275329903e-06, + "loss": 0.8495, + "step": 148010 + }, + { + "epoch": 1.0714673499967426, + "grad_norm": 0.15342256426811218, + "learning_rate": 3.9285398886693166e-06, + "loss": 0.864, + "step": 148020 + }, + { + "epoch": 1.0715397366573287, + "grad_norm": 0.1569945365190506, + "learning_rate": 3.92846750200873e-06, + "loss": 0.8606, + "step": 148030 + }, + { + "epoch": 1.071612123317915, + "grad_norm": 0.15621396899223328, + "learning_rate": 3.928395115348145e-06, + "loss": 0.8419, + "step": 148040 + }, + { + "epoch": 1.071684509978501, + "grad_norm": 0.14876516163349152, + "learning_rate": 3.928322728687558e-06, + "loss": 0.8576, + "step": 148050 + }, + { + "epoch": 1.0717568966390874, + "grad_norm": 0.15889860689640045, + "learning_rate": 3.928250342026972e-06, + "loss": 0.8586, + "step": 148060 + }, + { + "epoch": 1.0718292832996736, + "grad_norm": 0.15577679872512817, + "learning_rate": 3.9281779553663855e-06, + "loss": 0.8577, + "step": 148070 + }, + { + "epoch": 1.0719016699602597, + "grad_norm": 0.1523328423500061, + "learning_rate": 3.928105568705799e-06, + "loss": 0.8556, + "step": 148080 + }, + { + "epoch": 1.071974056620846, + "grad_norm": 0.15466256439685822, + "learning_rate": 3.928033182045213e-06, + "loss": 0.8548, + "step": 148090 + }, + { + "epoch": 1.072046443281432, + "grad_norm": 0.14170676469802856, + "learning_rate": 3.927960795384626e-06, + "loss": 0.8438, + "step": 148100 + }, + { + "epoch": 1.0721188299420183, + "grad_norm": 0.1515391618013382, + "learning_rate": 3.927888408724041e-06, + "loss": 0.8612, + "step": 148110 + }, + { + "epoch": 1.0721912166026044, + "grad_norm": 0.17052294313907623, + "learning_rate": 3.927816022063454e-06, + "loss": 0.846, + "step": 148120 + }, + { + "epoch": 1.0722636032631907, + "grad_norm": 0.16018043458461761, + "learning_rate": 3.927743635402868e-06, + "loss": 0.8622, + "step": 148130 + }, + { + "epoch": 1.0723359899237768, + "grad_norm": 0.15600261092185974, + "learning_rate": 3.927671248742282e-06, + "loss": 0.8633, + "step": 148140 + }, + { + "epoch": 1.072408376584363, + "grad_norm": 0.15749092400074005, + "learning_rate": 3.927598862081696e-06, + "loss": 0.843, + "step": 148150 + }, + { + "epoch": 1.0724807632449491, + "grad_norm": 0.15799428522586823, + "learning_rate": 3.92752647542111e-06, + "loss": 0.8613, + "step": 148160 + }, + { + "epoch": 1.0725531499055354, + "grad_norm": 0.16896884143352509, + "learning_rate": 3.927454088760523e-06, + "loss": 0.8452, + "step": 148170 + }, + { + "epoch": 1.0726255365661217, + "grad_norm": 0.1738024652004242, + "learning_rate": 3.927381702099937e-06, + "loss": 0.8582, + "step": 148180 + }, + { + "epoch": 1.0726979232267078, + "grad_norm": 0.1570395976305008, + "learning_rate": 3.927309315439351e-06, + "loss": 0.8687, + "step": 148190 + }, + { + "epoch": 1.072770309887294, + "grad_norm": 0.14952725172042847, + "learning_rate": 3.927236928778765e-06, + "loss": 0.8601, + "step": 148200 + }, + { + "epoch": 1.0728426965478801, + "grad_norm": 0.14488628506660461, + "learning_rate": 3.927164542118179e-06, + "loss": 0.847, + "step": 148210 + }, + { + "epoch": 1.0729150832084664, + "grad_norm": 0.16457195580005646, + "learning_rate": 3.927092155457592e-06, + "loss": 0.862, + "step": 148220 + }, + { + "epoch": 1.0729874698690525, + "grad_norm": 0.15635018050670624, + "learning_rate": 3.927019768797006e-06, + "loss": 0.85, + "step": 148230 + }, + { + "epoch": 1.0730598565296388, + "grad_norm": 0.15710517764091492, + "learning_rate": 3.92694738213642e-06, + "loss": 0.85, + "step": 148240 + }, + { + "epoch": 1.0731322431902248, + "grad_norm": 0.14973808825016022, + "learning_rate": 3.926874995475834e-06, + "loss": 0.8561, + "step": 148250 + }, + { + "epoch": 1.073204629850811, + "grad_norm": 0.18000011146068573, + "learning_rate": 3.926802608815248e-06, + "loss": 0.8432, + "step": 148260 + }, + { + "epoch": 1.0732770165113972, + "grad_norm": 0.14735475182533264, + "learning_rate": 3.926730222154661e-06, + "loss": 0.8719, + "step": 148270 + }, + { + "epoch": 1.0733494031719835, + "grad_norm": 0.1539383828639984, + "learning_rate": 3.926657835494076e-06, + "loss": 0.8637, + "step": 148280 + }, + { + "epoch": 1.0734217898325698, + "grad_norm": 0.26289451122283936, + "learning_rate": 3.926585448833489e-06, + "loss": 0.8712, + "step": 148290 + }, + { + "epoch": 1.0734941764931558, + "grad_norm": 0.16455796360969543, + "learning_rate": 3.926513062172903e-06, + "loss": 0.8612, + "step": 148300 + }, + { + "epoch": 1.073566563153742, + "grad_norm": 0.15031065046787262, + "learning_rate": 3.9264406755123165e-06, + "loss": 0.8556, + "step": 148310 + }, + { + "epoch": 1.0736389498143282, + "grad_norm": 0.14603126049041748, + "learning_rate": 3.926368288851731e-06, + "loss": 0.8603, + "step": 148320 + }, + { + "epoch": 1.0737113364749145, + "grad_norm": 0.1502143293619156, + "learning_rate": 3.926295902191145e-06, + "loss": 0.8621, + "step": 148330 + }, + { + "epoch": 1.0737837231355005, + "grad_norm": 0.15308043360710144, + "learning_rate": 3.926223515530558e-06, + "loss": 0.8633, + "step": 148340 + }, + { + "epoch": 1.0738561097960868, + "grad_norm": 0.15625415742397308, + "learning_rate": 3.926151128869972e-06, + "loss": 0.8654, + "step": 148350 + }, + { + "epoch": 1.0739284964566729, + "grad_norm": 0.16124477982521057, + "learning_rate": 3.926078742209386e-06, + "loss": 0.8583, + "step": 148360 + }, + { + "epoch": 1.0740008831172592, + "grad_norm": 0.14821426570415497, + "learning_rate": 3.9260063555488e-06, + "loss": 0.8497, + "step": 148370 + }, + { + "epoch": 1.0740732697778452, + "grad_norm": 0.14356663823127747, + "learning_rate": 3.9259339688882135e-06, + "loss": 0.8511, + "step": 148380 + }, + { + "epoch": 1.0741456564384315, + "grad_norm": 0.14811255037784576, + "learning_rate": 3.925861582227627e-06, + "loss": 0.8453, + "step": 148390 + }, + { + "epoch": 1.0742180430990178, + "grad_norm": 0.18157772719860077, + "learning_rate": 3.925789195567042e-06, + "loss": 0.8601, + "step": 148400 + }, + { + "epoch": 1.0742904297596039, + "grad_norm": 0.16508889198303223, + "learning_rate": 3.925716808906455e-06, + "loss": 0.8523, + "step": 148410 + }, + { + "epoch": 1.0743628164201902, + "grad_norm": 0.15497782826423645, + "learning_rate": 3.925644422245869e-06, + "loss": 0.8588, + "step": 148420 + }, + { + "epoch": 1.0744352030807762, + "grad_norm": 0.1482570469379425, + "learning_rate": 3.9255720355852824e-06, + "loss": 0.8657, + "step": 148430 + }, + { + "epoch": 1.0745075897413625, + "grad_norm": 0.15788806974887848, + "learning_rate": 3.925499648924697e-06, + "loss": 0.8588, + "step": 148440 + }, + { + "epoch": 1.0745799764019486, + "grad_norm": 0.1528482884168625, + "learning_rate": 3.9254272622641105e-06, + "loss": 0.8502, + "step": 148450 + }, + { + "epoch": 1.0746523630625349, + "grad_norm": 0.14317600429058075, + "learning_rate": 3.925354875603524e-06, + "loss": 0.8432, + "step": 148460 + }, + { + "epoch": 1.074724749723121, + "grad_norm": 0.15992312133312225, + "learning_rate": 3.925282488942938e-06, + "loss": 0.8555, + "step": 148470 + }, + { + "epoch": 1.0747971363837072, + "grad_norm": 0.16133040189743042, + "learning_rate": 3.925210102282352e-06, + "loss": 0.8568, + "step": 148480 + }, + { + "epoch": 1.0748695230442933, + "grad_norm": 0.1480935513973236, + "learning_rate": 3.925137715621766e-06, + "loss": 0.8637, + "step": 148490 + }, + { + "epoch": 1.0749419097048796, + "grad_norm": 0.16463685035705566, + "learning_rate": 3.9250653289611795e-06, + "loss": 0.8598, + "step": 148500 + }, + { + "epoch": 1.0750142963654659, + "grad_norm": 0.1424337923526764, + "learning_rate": 3.924992942300593e-06, + "loss": 0.855, + "step": 148510 + }, + { + "epoch": 1.075086683026052, + "grad_norm": 0.25730541348457336, + "learning_rate": 3.9249205556400075e-06, + "loss": 0.8715, + "step": 148520 + }, + { + "epoch": 1.0751590696866382, + "grad_norm": 0.15047651529312134, + "learning_rate": 3.924848168979421e-06, + "loss": 0.8611, + "step": 148530 + }, + { + "epoch": 1.0752314563472243, + "grad_norm": 0.15243446826934814, + "learning_rate": 3.924775782318835e-06, + "loss": 0.8515, + "step": 148540 + }, + { + "epoch": 1.0753038430078106, + "grad_norm": 0.14074011147022247, + "learning_rate": 3.924703395658248e-06, + "loss": 0.8418, + "step": 148550 + }, + { + "epoch": 1.0753762296683966, + "grad_norm": 0.14536842703819275, + "learning_rate": 3.924631008997663e-06, + "loss": 0.8604, + "step": 148560 + }, + { + "epoch": 1.075448616328983, + "grad_norm": 0.1618451476097107, + "learning_rate": 3.9245586223370765e-06, + "loss": 0.8389, + "step": 148570 + }, + { + "epoch": 1.075521002989569, + "grad_norm": 0.1468305140733719, + "learning_rate": 3.92448623567649e-06, + "loss": 0.8525, + "step": 148580 + }, + { + "epoch": 1.0755933896501553, + "grad_norm": 0.22342850267887115, + "learning_rate": 3.924413849015904e-06, + "loss": 0.8623, + "step": 148590 + }, + { + "epoch": 1.0756657763107413, + "grad_norm": 0.1797487586736679, + "learning_rate": 3.924341462355317e-06, + "loss": 0.8519, + "step": 148600 + }, + { + "epoch": 1.0757381629713276, + "grad_norm": 0.16016285121440887, + "learning_rate": 3.924269075694731e-06, + "loss": 0.8712, + "step": 148610 + }, + { + "epoch": 1.075810549631914, + "grad_norm": 0.16447670757770538, + "learning_rate": 3.9241966890341445e-06, + "loss": 0.8615, + "step": 148620 + }, + { + "epoch": 1.0758829362925, + "grad_norm": 0.18734236061573029, + "learning_rate": 3.924124302373559e-06, + "loss": 0.8522, + "step": 148630 + }, + { + "epoch": 1.0759553229530863, + "grad_norm": 0.14720183610916138, + "learning_rate": 3.924051915712973e-06, + "loss": 0.8525, + "step": 148640 + }, + { + "epoch": 1.0760277096136723, + "grad_norm": 0.1844429075717926, + "learning_rate": 3.923979529052386e-06, + "loss": 0.8465, + "step": 148650 + }, + { + "epoch": 1.0761000962742586, + "grad_norm": 0.15892651677131653, + "learning_rate": 3.9239071423918e-06, + "loss": 0.8523, + "step": 148660 + }, + { + "epoch": 1.0761724829348447, + "grad_norm": 0.15322613716125488, + "learning_rate": 3.923834755731214e-06, + "loss": 0.8584, + "step": 148670 + }, + { + "epoch": 1.076244869595431, + "grad_norm": 0.1480284184217453, + "learning_rate": 3.923762369070628e-06, + "loss": 0.8334, + "step": 148680 + }, + { + "epoch": 1.076317256256017, + "grad_norm": 0.16445012390613556, + "learning_rate": 3.9236899824100415e-06, + "loss": 0.8602, + "step": 148690 + }, + { + "epoch": 1.0763896429166033, + "grad_norm": 0.15210187435150146, + "learning_rate": 3.923617595749455e-06, + "loss": 0.8627, + "step": 148700 + }, + { + "epoch": 1.0764620295771896, + "grad_norm": 0.1529064029455185, + "learning_rate": 3.92354520908887e-06, + "loss": 0.8646, + "step": 148710 + }, + { + "epoch": 1.0765344162377757, + "grad_norm": 0.15188395977020264, + "learning_rate": 3.923472822428283e-06, + "loss": 0.8636, + "step": 148720 + }, + { + "epoch": 1.076606802898362, + "grad_norm": 0.14736254513263702, + "learning_rate": 3.923400435767697e-06, + "loss": 0.8703, + "step": 148730 + }, + { + "epoch": 1.076679189558948, + "grad_norm": 0.14823542535305023, + "learning_rate": 3.9233280491071105e-06, + "loss": 0.863, + "step": 148740 + }, + { + "epoch": 1.0767515762195343, + "grad_norm": 0.15504339337348938, + "learning_rate": 3.923255662446525e-06, + "loss": 0.845, + "step": 148750 + }, + { + "epoch": 1.0768239628801204, + "grad_norm": 0.1641693115234375, + "learning_rate": 3.9231832757859386e-06, + "loss": 0.8574, + "step": 148760 + }, + { + "epoch": 1.0768963495407067, + "grad_norm": 0.1393650621175766, + "learning_rate": 3.923110889125352e-06, + "loss": 0.8528, + "step": 148770 + }, + { + "epoch": 1.0769687362012927, + "grad_norm": 0.7887364625930786, + "learning_rate": 3.923038502464766e-06, + "loss": 0.8631, + "step": 148780 + }, + { + "epoch": 1.077041122861879, + "grad_norm": 0.14820659160614014, + "learning_rate": 3.92296611580418e-06, + "loss": 0.8757, + "step": 148790 + }, + { + "epoch": 1.0771135095224653, + "grad_norm": 0.1467932164669037, + "learning_rate": 3.922893729143594e-06, + "loss": 0.8715, + "step": 148800 + }, + { + "epoch": 1.0771858961830514, + "grad_norm": 0.1651478409767151, + "learning_rate": 3.9228213424830075e-06, + "loss": 0.8704, + "step": 148810 + }, + { + "epoch": 1.0772582828436377, + "grad_norm": 0.1475069671869278, + "learning_rate": 3.922748955822421e-06, + "loss": 0.8651, + "step": 148820 + }, + { + "epoch": 1.0773306695042237, + "grad_norm": 0.1452004313468933, + "learning_rate": 3.9226765691618356e-06, + "loss": 0.8628, + "step": 148830 + }, + { + "epoch": 1.07740305616481, + "grad_norm": 0.15325690805912018, + "learning_rate": 3.922604182501249e-06, + "loss": 0.8556, + "step": 148840 + }, + { + "epoch": 1.077475442825396, + "grad_norm": 0.16279549896717072, + "learning_rate": 3.922531795840663e-06, + "loss": 0.8468, + "step": 148850 + }, + { + "epoch": 1.0775478294859824, + "grad_norm": 0.1613786667585373, + "learning_rate": 3.922459409180076e-06, + "loss": 0.8632, + "step": 148860 + }, + { + "epoch": 1.0776202161465684, + "grad_norm": 0.15279968082904816, + "learning_rate": 3.92238702251949e-06, + "loss": 0.8663, + "step": 148870 + }, + { + "epoch": 1.0776926028071547, + "grad_norm": 0.16398665308952332, + "learning_rate": 3.9223146358589045e-06, + "loss": 0.8583, + "step": 148880 + }, + { + "epoch": 1.0777649894677408, + "grad_norm": 0.15407854318618774, + "learning_rate": 3.922242249198318e-06, + "loss": 0.8703, + "step": 148890 + }, + { + "epoch": 1.077837376128327, + "grad_norm": 0.16437429189682007, + "learning_rate": 3.922169862537732e-06, + "loss": 0.8619, + "step": 148900 + }, + { + "epoch": 1.0779097627889134, + "grad_norm": 0.14691048860549927, + "learning_rate": 3.922097475877145e-06, + "loss": 0.8478, + "step": 148910 + }, + { + "epoch": 1.0779821494494994, + "grad_norm": 0.14637692272663116, + "learning_rate": 3.92202508921656e-06, + "loss": 0.8654, + "step": 148920 + }, + { + "epoch": 1.0780545361100857, + "grad_norm": 0.1593993455171585, + "learning_rate": 3.921952702555973e-06, + "loss": 0.8506, + "step": 148930 + }, + { + "epoch": 1.0781269227706718, + "grad_norm": 0.14419616758823395, + "learning_rate": 3.921880315895387e-06, + "loss": 0.846, + "step": 148940 + }, + { + "epoch": 1.078199309431258, + "grad_norm": 0.15830476582050323, + "learning_rate": 3.921807929234801e-06, + "loss": 0.8575, + "step": 148950 + }, + { + "epoch": 1.0782716960918441, + "grad_norm": 0.16840828955173492, + "learning_rate": 3.921735542574215e-06, + "loss": 0.8483, + "step": 148960 + }, + { + "epoch": 1.0783440827524304, + "grad_norm": 0.15455256402492523, + "learning_rate": 3.921663155913629e-06, + "loss": 0.8433, + "step": 148970 + }, + { + "epoch": 1.0784164694130165, + "grad_norm": 0.1700802743434906, + "learning_rate": 3.921590769253042e-06, + "loss": 0.8655, + "step": 148980 + }, + { + "epoch": 1.0784888560736028, + "grad_norm": 0.1804065704345703, + "learning_rate": 3.921518382592456e-06, + "loss": 0.8648, + "step": 148990 + }, + { + "epoch": 1.0785612427341889, + "grad_norm": 0.16987882554531097, + "learning_rate": 3.9214459959318704e-06, + "loss": 0.8674, + "step": 149000 + }, + { + "epoch": 1.0786336293947751, + "grad_norm": 0.14747925102710724, + "learning_rate": 3.921373609271284e-06, + "loss": 0.858, + "step": 149010 + }, + { + "epoch": 1.0787060160553614, + "grad_norm": 0.1391853392124176, + "learning_rate": 3.921301222610698e-06, + "loss": 0.8458, + "step": 149020 + }, + { + "epoch": 1.0787784027159475, + "grad_norm": 0.15131983160972595, + "learning_rate": 3.921228835950111e-06, + "loss": 0.8572, + "step": 149030 + }, + { + "epoch": 1.0788507893765338, + "grad_norm": 0.14283746480941772, + "learning_rate": 3.921156449289526e-06, + "loss": 0.8434, + "step": 149040 + }, + { + "epoch": 1.0789231760371198, + "grad_norm": 0.1681165099143982, + "learning_rate": 3.921084062628939e-06, + "loss": 0.8539, + "step": 149050 + }, + { + "epoch": 1.0789955626977061, + "grad_norm": 0.15962868928909302, + "learning_rate": 3.921011675968353e-06, + "loss": 0.8488, + "step": 149060 + }, + { + "epoch": 1.0790679493582922, + "grad_norm": 0.14654859900474548, + "learning_rate": 3.920939289307767e-06, + "loss": 0.848, + "step": 149070 + }, + { + "epoch": 1.0791403360188785, + "grad_norm": 0.1605050265789032, + "learning_rate": 3.920866902647181e-06, + "loss": 0.8612, + "step": 149080 + }, + { + "epoch": 1.0792127226794646, + "grad_norm": 0.15435899794101715, + "learning_rate": 3.920794515986595e-06, + "loss": 0.8665, + "step": 149090 + }, + { + "epoch": 1.0792851093400508, + "grad_norm": 0.15466824173927307, + "learning_rate": 3.920722129326008e-06, + "loss": 0.8633, + "step": 149100 + }, + { + "epoch": 1.079357496000637, + "grad_norm": 0.1811055988073349, + "learning_rate": 3.920649742665422e-06, + "loss": 0.8626, + "step": 149110 + }, + { + "epoch": 1.0794298826612232, + "grad_norm": 0.15361252427101135, + "learning_rate": 3.920577356004836e-06, + "loss": 0.8588, + "step": 149120 + }, + { + "epoch": 1.0795022693218095, + "grad_norm": 0.15100868046283722, + "learning_rate": 3.920504969344249e-06, + "loss": 0.8744, + "step": 149130 + }, + { + "epoch": 1.0795746559823955, + "grad_norm": 0.16140015423297882, + "learning_rate": 3.920432582683663e-06, + "loss": 0.8592, + "step": 149140 + }, + { + "epoch": 1.0796470426429818, + "grad_norm": 0.1692638099193573, + "learning_rate": 3.920360196023077e-06, + "loss": 0.8445, + "step": 149150 + }, + { + "epoch": 1.079719429303568, + "grad_norm": 0.1461089849472046, + "learning_rate": 3.920287809362491e-06, + "loss": 0.8389, + "step": 149160 + }, + { + "epoch": 1.0797918159641542, + "grad_norm": 0.15460637211799622, + "learning_rate": 3.9202154227019044e-06, + "loss": 0.8514, + "step": 149170 + }, + { + "epoch": 1.0798642026247403, + "grad_norm": 0.15934206545352936, + "learning_rate": 3.920143036041318e-06, + "loss": 0.8502, + "step": 149180 + }, + { + "epoch": 1.0799365892853265, + "grad_norm": 0.17823490500450134, + "learning_rate": 3.9200706493807325e-06, + "loss": 0.8531, + "step": 149190 + }, + { + "epoch": 1.0800089759459126, + "grad_norm": 0.15246717631816864, + "learning_rate": 3.919998262720146e-06, + "loss": 0.8649, + "step": 149200 + }, + { + "epoch": 1.080081362606499, + "grad_norm": 0.1905028522014618, + "learning_rate": 3.91992587605956e-06, + "loss": 0.8662, + "step": 149210 + }, + { + "epoch": 1.080153749267085, + "grad_norm": 0.16161444783210754, + "learning_rate": 3.919853489398973e-06, + "loss": 0.8635, + "step": 149220 + }, + { + "epoch": 1.0802261359276712, + "grad_norm": 0.1495893895626068, + "learning_rate": 3.919781102738388e-06, + "loss": 0.8637, + "step": 149230 + }, + { + "epoch": 1.0802985225882575, + "grad_norm": 0.15014596283435822, + "learning_rate": 3.9197087160778015e-06, + "loss": 0.8642, + "step": 149240 + }, + { + "epoch": 1.0803709092488436, + "grad_norm": 0.13995687663555145, + "learning_rate": 3.919636329417215e-06, + "loss": 0.8641, + "step": 149250 + }, + { + "epoch": 1.08044329590943, + "grad_norm": 0.15207460522651672, + "learning_rate": 3.919563942756629e-06, + "loss": 0.852, + "step": 149260 + }, + { + "epoch": 1.080515682570016, + "grad_norm": 0.15077516436576843, + "learning_rate": 3.919491556096043e-06, + "loss": 0.8663, + "step": 149270 + }, + { + "epoch": 1.0805880692306022, + "grad_norm": 0.15563450753688812, + "learning_rate": 3.919419169435457e-06, + "loss": 0.8547, + "step": 149280 + }, + { + "epoch": 1.0806604558911883, + "grad_norm": 0.1566159725189209, + "learning_rate": 3.91934678277487e-06, + "loss": 0.8581, + "step": 149290 + }, + { + "epoch": 1.0807328425517746, + "grad_norm": 0.1530294120311737, + "learning_rate": 3.919274396114284e-06, + "loss": 0.8684, + "step": 149300 + }, + { + "epoch": 1.0808052292123607, + "grad_norm": 0.14563488960266113, + "learning_rate": 3.9192020094536985e-06, + "loss": 0.8668, + "step": 149310 + }, + { + "epoch": 1.080877615872947, + "grad_norm": 0.36675575375556946, + "learning_rate": 3.919129622793112e-06, + "loss": 0.8571, + "step": 149320 + }, + { + "epoch": 1.080950002533533, + "grad_norm": 0.17297199368476868, + "learning_rate": 3.919057236132526e-06, + "loss": 0.867, + "step": 149330 + }, + { + "epoch": 1.0810223891941193, + "grad_norm": 0.1599206179380417, + "learning_rate": 3.918984849471939e-06, + "loss": 0.8633, + "step": 149340 + }, + { + "epoch": 1.0810947758547056, + "grad_norm": 0.16135400533676147, + "learning_rate": 3.918912462811354e-06, + "loss": 0.8661, + "step": 149350 + }, + { + "epoch": 1.0811671625152917, + "grad_norm": 0.16339783370494843, + "learning_rate": 3.918840076150767e-06, + "loss": 0.8668, + "step": 149360 + }, + { + "epoch": 1.081239549175878, + "grad_norm": 0.14185838401317596, + "learning_rate": 3.918767689490181e-06, + "loss": 0.851, + "step": 149370 + }, + { + "epoch": 1.081311935836464, + "grad_norm": 0.14981046319007874, + "learning_rate": 3.918695302829595e-06, + "loss": 0.862, + "step": 149380 + }, + { + "epoch": 1.0813843224970503, + "grad_norm": 0.14530541002750397, + "learning_rate": 3.918622916169009e-06, + "loss": 0.8553, + "step": 149390 + }, + { + "epoch": 1.0814567091576364, + "grad_norm": 0.14957144856452942, + "learning_rate": 3.918550529508423e-06, + "loss": 0.8599, + "step": 149400 + }, + { + "epoch": 1.0815290958182227, + "grad_norm": 0.14457090198993683, + "learning_rate": 3.918478142847836e-06, + "loss": 0.8483, + "step": 149410 + }, + { + "epoch": 1.0816014824788087, + "grad_norm": 0.1516246795654297, + "learning_rate": 3.91840575618725e-06, + "loss": 0.8654, + "step": 149420 + }, + { + "epoch": 1.081673869139395, + "grad_norm": 0.15999293327331543, + "learning_rate": 3.918333369526664e-06, + "loss": 0.8624, + "step": 149430 + }, + { + "epoch": 1.081746255799981, + "grad_norm": 0.16079509258270264, + "learning_rate": 3.918260982866078e-06, + "loss": 0.8486, + "step": 149440 + }, + { + "epoch": 1.0818186424605674, + "grad_norm": 0.16731907427310944, + "learning_rate": 3.918188596205492e-06, + "loss": 0.8651, + "step": 149450 + }, + { + "epoch": 1.0818910291211536, + "grad_norm": 0.16714203357696533, + "learning_rate": 3.918116209544905e-06, + "loss": 0.8646, + "step": 149460 + }, + { + "epoch": 1.0819634157817397, + "grad_norm": 0.14884908497333527, + "learning_rate": 3.918043822884319e-06, + "loss": 0.8628, + "step": 149470 + }, + { + "epoch": 1.082035802442326, + "grad_norm": 0.1482784003019333, + "learning_rate": 3.917971436223733e-06, + "loss": 0.86, + "step": 149480 + }, + { + "epoch": 1.082108189102912, + "grad_norm": 0.17224667966365814, + "learning_rate": 3.917899049563147e-06, + "loss": 0.863, + "step": 149490 + }, + { + "epoch": 1.0821805757634984, + "grad_norm": 0.15392819046974182, + "learning_rate": 3.9178266629025606e-06, + "loss": 0.8601, + "step": 149500 + }, + { + "epoch": 1.0822529624240844, + "grad_norm": 0.15251944959163666, + "learning_rate": 3.917754276241974e-06, + "loss": 0.8585, + "step": 149510 + }, + { + "epoch": 1.0823253490846707, + "grad_norm": 0.14979909360408783, + "learning_rate": 3.917681889581389e-06, + "loss": 0.8553, + "step": 149520 + }, + { + "epoch": 1.0823977357452568, + "grad_norm": 0.1591353863477707, + "learning_rate": 3.917609502920802e-06, + "loss": 0.8604, + "step": 149530 + }, + { + "epoch": 1.082470122405843, + "grad_norm": 0.1540420949459076, + "learning_rate": 3.917537116260216e-06, + "loss": 0.8674, + "step": 149540 + }, + { + "epoch": 1.0825425090664291, + "grad_norm": 0.43315520882606506, + "learning_rate": 3.9174647295996295e-06, + "loss": 0.8565, + "step": 149550 + }, + { + "epoch": 1.0826148957270154, + "grad_norm": 0.14986060559749603, + "learning_rate": 3.917392342939044e-06, + "loss": 0.8639, + "step": 149560 + }, + { + "epoch": 1.0826872823876017, + "grad_norm": 0.1607617884874344, + "learning_rate": 3.9173199562784576e-06, + "loss": 0.8578, + "step": 149570 + }, + { + "epoch": 1.0827596690481878, + "grad_norm": 0.1520787626504898, + "learning_rate": 3.917247569617871e-06, + "loss": 0.8506, + "step": 149580 + }, + { + "epoch": 1.082832055708774, + "grad_norm": 0.15222980082035065, + "learning_rate": 3.917175182957285e-06, + "loss": 0.8564, + "step": 149590 + }, + { + "epoch": 1.0829044423693601, + "grad_norm": 0.14734259247779846, + "learning_rate": 3.917102796296699e-06, + "loss": 0.8571, + "step": 149600 + }, + { + "epoch": 1.0829768290299464, + "grad_norm": 0.15078797936439514, + "learning_rate": 3.917030409636113e-06, + "loss": 0.8483, + "step": 149610 + }, + { + "epoch": 1.0830492156905325, + "grad_norm": 0.15337948501110077, + "learning_rate": 3.9169580229755265e-06, + "loss": 0.8463, + "step": 149620 + }, + { + "epoch": 1.0831216023511188, + "grad_norm": 0.24916933476924896, + "learning_rate": 3.91688563631494e-06, + "loss": 0.8624, + "step": 149630 + }, + { + "epoch": 1.0831939890117048, + "grad_norm": 0.15374572575092316, + "learning_rate": 3.9168132496543546e-06, + "loss": 0.8475, + "step": 149640 + }, + { + "epoch": 1.0832663756722911, + "grad_norm": 0.17309990525245667, + "learning_rate": 3.916740862993768e-06, + "loss": 0.8458, + "step": 149650 + }, + { + "epoch": 1.0833387623328772, + "grad_norm": 0.16714857518672943, + "learning_rate": 3.916668476333181e-06, + "loss": 0.8569, + "step": 149660 + }, + { + "epoch": 1.0834111489934635, + "grad_norm": 0.15130509436130524, + "learning_rate": 3.916596089672595e-06, + "loss": 0.8534, + "step": 149670 + }, + { + "epoch": 1.0834835356540498, + "grad_norm": 0.1503904163837433, + "learning_rate": 3.916523703012009e-06, + "loss": 0.8673, + "step": 149680 + }, + { + "epoch": 1.0835559223146358, + "grad_norm": 0.15241362154483795, + "learning_rate": 3.916451316351423e-06, + "loss": 0.8575, + "step": 149690 + }, + { + "epoch": 1.083628308975222, + "grad_norm": 0.15771149098873138, + "learning_rate": 3.916378929690836e-06, + "loss": 0.8571, + "step": 149700 + }, + { + "epoch": 1.0837006956358082, + "grad_norm": 0.17620976269245148, + "learning_rate": 3.916306543030251e-06, + "loss": 0.8588, + "step": 149710 + }, + { + "epoch": 1.0837730822963945, + "grad_norm": 0.15407635271549225, + "learning_rate": 3.916234156369664e-06, + "loss": 0.8585, + "step": 149720 + }, + { + "epoch": 1.0838454689569805, + "grad_norm": 0.14716872572898865, + "learning_rate": 3.916161769709078e-06, + "loss": 0.8473, + "step": 149730 + }, + { + "epoch": 1.0839178556175668, + "grad_norm": 0.14605097472667694, + "learning_rate": 3.916089383048492e-06, + "loss": 0.8604, + "step": 149740 + }, + { + "epoch": 1.0839902422781529, + "grad_norm": 0.15472210943698883, + "learning_rate": 3.916016996387906e-06, + "loss": 0.8726, + "step": 149750 + }, + { + "epoch": 1.0840626289387392, + "grad_norm": 0.15628187358379364, + "learning_rate": 3.91594460972732e-06, + "loss": 0.8521, + "step": 149760 + }, + { + "epoch": 1.0841350155993255, + "grad_norm": 0.1554306596517563, + "learning_rate": 3.915872223066733e-06, + "loss": 0.8629, + "step": 149770 + }, + { + "epoch": 1.0842074022599115, + "grad_norm": 0.14699378609657288, + "learning_rate": 3.915799836406147e-06, + "loss": 0.8479, + "step": 149780 + }, + { + "epoch": 1.0842797889204978, + "grad_norm": 0.1461055874824524, + "learning_rate": 3.915727449745561e-06, + "loss": 0.8519, + "step": 149790 + }, + { + "epoch": 1.0843521755810839, + "grad_norm": 0.1457691639661789, + "learning_rate": 3.915655063084975e-06, + "loss": 0.8723, + "step": 149800 + }, + { + "epoch": 1.0844245622416702, + "grad_norm": 0.1460854858160019, + "learning_rate": 3.915582676424389e-06, + "loss": 0.8567, + "step": 149810 + }, + { + "epoch": 1.0844969489022562, + "grad_norm": 0.14653198421001434, + "learning_rate": 3.915510289763802e-06, + "loss": 0.8545, + "step": 149820 + }, + { + "epoch": 1.0845693355628425, + "grad_norm": 0.1542053073644638, + "learning_rate": 3.915437903103217e-06, + "loss": 0.8549, + "step": 149830 + }, + { + "epoch": 1.0846417222234286, + "grad_norm": 0.1533014178276062, + "learning_rate": 3.91536551644263e-06, + "loss": 0.8684, + "step": 149840 + }, + { + "epoch": 1.0847141088840149, + "grad_norm": 0.1554461568593979, + "learning_rate": 3.915293129782044e-06, + "loss": 0.8568, + "step": 149850 + }, + { + "epoch": 1.084786495544601, + "grad_norm": 0.15294988453388214, + "learning_rate": 3.9152207431214575e-06, + "loss": 0.8692, + "step": 149860 + }, + { + "epoch": 1.0848588822051872, + "grad_norm": 0.1588299423456192, + "learning_rate": 3.915148356460872e-06, + "loss": 0.8545, + "step": 149870 + }, + { + "epoch": 1.0849312688657735, + "grad_norm": 0.14999845623970032, + "learning_rate": 3.915075969800286e-06, + "loss": 0.8722, + "step": 149880 + }, + { + "epoch": 1.0850036555263596, + "grad_norm": 0.1531199961900711, + "learning_rate": 3.915003583139699e-06, + "loss": 0.8564, + "step": 149890 + }, + { + "epoch": 1.0850760421869459, + "grad_norm": 0.1617061197757721, + "learning_rate": 3.914931196479113e-06, + "loss": 0.8435, + "step": 149900 + }, + { + "epoch": 1.085148428847532, + "grad_norm": 0.1557346135377884, + "learning_rate": 3.914858809818527e-06, + "loss": 0.8582, + "step": 149910 + }, + { + "epoch": 1.0852208155081182, + "grad_norm": 0.14734655618667603, + "learning_rate": 3.914786423157941e-06, + "loss": 0.8694, + "step": 149920 + }, + { + "epoch": 1.0852932021687043, + "grad_norm": 0.1435697227716446, + "learning_rate": 3.9147140364973545e-06, + "loss": 0.8429, + "step": 149930 + }, + { + "epoch": 1.0853655888292906, + "grad_norm": 0.15137584507465363, + "learning_rate": 3.914641649836768e-06, + "loss": 0.866, + "step": 149940 + }, + { + "epoch": 1.0854379754898766, + "grad_norm": 0.1859600692987442, + "learning_rate": 3.914569263176183e-06, + "loss": 0.8518, + "step": 149950 + }, + { + "epoch": 1.085510362150463, + "grad_norm": 0.15186361968517303, + "learning_rate": 3.914496876515596e-06, + "loss": 0.8694, + "step": 149960 + }, + { + "epoch": 1.0855827488110492, + "grad_norm": 0.1560952514410019, + "learning_rate": 3.91442448985501e-06, + "loss": 0.8628, + "step": 149970 + }, + { + "epoch": 1.0856551354716353, + "grad_norm": 0.14067547023296356, + "learning_rate": 3.9143521031944234e-06, + "loss": 0.8585, + "step": 149980 + }, + { + "epoch": 1.0857275221322216, + "grad_norm": 0.15680783987045288, + "learning_rate": 3.914279716533838e-06, + "loss": 0.8498, + "step": 149990 + }, + { + "epoch": 1.0857999087928076, + "grad_norm": 0.15567612648010254, + "learning_rate": 3.9142073298732515e-06, + "loss": 0.8562, + "step": 150000 + }, + { + "epoch": 1.085872295453394, + "grad_norm": 0.1518632471561432, + "learning_rate": 3.914134943212665e-06, + "loss": 0.8667, + "step": 150010 + }, + { + "epoch": 1.08594468211398, + "grad_norm": 0.15622593462467194, + "learning_rate": 3.914062556552079e-06, + "loss": 0.8498, + "step": 150020 + }, + { + "epoch": 1.0860170687745663, + "grad_norm": 0.15988045930862427, + "learning_rate": 3.913990169891493e-06, + "loss": 0.8651, + "step": 150030 + }, + { + "epoch": 1.0860894554351523, + "grad_norm": 0.15659677982330322, + "learning_rate": 3.913917783230907e-06, + "loss": 0.8574, + "step": 150040 + }, + { + "epoch": 1.0861618420957386, + "grad_norm": 0.15888313949108124, + "learning_rate": 3.9138453965703205e-06, + "loss": 0.8573, + "step": 150050 + }, + { + "epoch": 1.0862342287563247, + "grad_norm": 0.16083279252052307, + "learning_rate": 3.913773009909734e-06, + "loss": 0.8572, + "step": 150060 + }, + { + "epoch": 1.086306615416911, + "grad_norm": 0.14704735577106476, + "learning_rate": 3.9137006232491485e-06, + "loss": 0.8524, + "step": 150070 + }, + { + "epoch": 1.0863790020774973, + "grad_norm": 0.15133285522460938, + "learning_rate": 3.913628236588562e-06, + "loss": 0.8759, + "step": 150080 + }, + { + "epoch": 1.0864513887380833, + "grad_norm": 0.14864712953567505, + "learning_rate": 3.913555849927976e-06, + "loss": 0.8561, + "step": 150090 + }, + { + "epoch": 1.0865237753986696, + "grad_norm": 0.14330703020095825, + "learning_rate": 3.913483463267389e-06, + "loss": 0.8613, + "step": 150100 + }, + { + "epoch": 1.0865961620592557, + "grad_norm": 0.1465221792459488, + "learning_rate": 3.913411076606803e-06, + "loss": 0.8581, + "step": 150110 + }, + { + "epoch": 1.086668548719842, + "grad_norm": 0.14701513946056366, + "learning_rate": 3.9133386899462175e-06, + "loss": 0.8542, + "step": 150120 + }, + { + "epoch": 1.086740935380428, + "grad_norm": 0.15257854759693146, + "learning_rate": 3.913266303285631e-06, + "loss": 0.868, + "step": 150130 + }, + { + "epoch": 1.0868133220410143, + "grad_norm": 0.15649712085723877, + "learning_rate": 3.913193916625045e-06, + "loss": 0.8602, + "step": 150140 + }, + { + "epoch": 1.0868857087016004, + "grad_norm": 0.14844316244125366, + "learning_rate": 3.913121529964458e-06, + "loss": 0.8488, + "step": 150150 + }, + { + "epoch": 1.0869580953621867, + "grad_norm": 0.14990560710430145, + "learning_rate": 3.913049143303873e-06, + "loss": 0.8656, + "step": 150160 + }, + { + "epoch": 1.0870304820227727, + "grad_norm": 0.17032568156719208, + "learning_rate": 3.912976756643286e-06, + "loss": 0.8592, + "step": 150170 + }, + { + "epoch": 1.087102868683359, + "grad_norm": 0.16417841613292694, + "learning_rate": 3.9129043699827e-06, + "loss": 0.8502, + "step": 150180 + }, + { + "epoch": 1.0871752553439453, + "grad_norm": 0.16040199995040894, + "learning_rate": 3.912831983322114e-06, + "loss": 0.8603, + "step": 150190 + }, + { + "epoch": 1.0872476420045314, + "grad_norm": 0.15688760578632355, + "learning_rate": 3.912759596661527e-06, + "loss": 0.8449, + "step": 150200 + }, + { + "epoch": 1.0873200286651177, + "grad_norm": 0.14727970957756042, + "learning_rate": 3.912687210000941e-06, + "loss": 0.8688, + "step": 150210 + }, + { + "epoch": 1.0873924153257037, + "grad_norm": 0.1582842767238617, + "learning_rate": 3.912614823340355e-06, + "loss": 0.8636, + "step": 150220 + }, + { + "epoch": 1.08746480198629, + "grad_norm": 0.15592309832572937, + "learning_rate": 3.912542436679769e-06, + "loss": 0.8656, + "step": 150230 + }, + { + "epoch": 1.087537188646876, + "grad_norm": 0.1545068770647049, + "learning_rate": 3.9124700500191826e-06, + "loss": 0.8603, + "step": 150240 + }, + { + "epoch": 1.0876095753074624, + "grad_norm": 0.15706755220890045, + "learning_rate": 3.912397663358596e-06, + "loss": 0.8549, + "step": 150250 + }, + { + "epoch": 1.0876819619680484, + "grad_norm": 0.15659025311470032, + "learning_rate": 3.912325276698011e-06, + "loss": 0.8617, + "step": 150260 + }, + { + "epoch": 1.0877543486286347, + "grad_norm": 0.18718166649341583, + "learning_rate": 3.912252890037424e-06, + "loss": 0.8653, + "step": 150270 + }, + { + "epoch": 1.0878267352892208, + "grad_norm": 0.15322886407375336, + "learning_rate": 3.912180503376838e-06, + "loss": 0.8603, + "step": 150280 + }, + { + "epoch": 1.087899121949807, + "grad_norm": 0.16585904359817505, + "learning_rate": 3.9121081167162515e-06, + "loss": 0.8705, + "step": 150290 + }, + { + "epoch": 1.0879715086103934, + "grad_norm": 0.13771013915538788, + "learning_rate": 3.912035730055665e-06, + "loss": 0.8482, + "step": 150300 + }, + { + "epoch": 1.0880438952709794, + "grad_norm": 0.16550597548484802, + "learning_rate": 3.9119633433950796e-06, + "loss": 0.8535, + "step": 150310 + }, + { + "epoch": 1.0881162819315657, + "grad_norm": 0.1450309306383133, + "learning_rate": 3.911890956734493e-06, + "loss": 0.8588, + "step": 150320 + }, + { + "epoch": 1.0881886685921518, + "grad_norm": 0.15524572134017944, + "learning_rate": 3.911818570073907e-06, + "loss": 0.8563, + "step": 150330 + }, + { + "epoch": 1.088261055252738, + "grad_norm": 0.1510109007358551, + "learning_rate": 3.91174618341332e-06, + "loss": 0.8731, + "step": 150340 + }, + { + "epoch": 1.0883334419133242, + "grad_norm": 0.15307964384555817, + "learning_rate": 3.911673796752735e-06, + "loss": 0.864, + "step": 150350 + }, + { + "epoch": 1.0884058285739104, + "grad_norm": 0.15317986905574799, + "learning_rate": 3.9116014100921485e-06, + "loss": 0.8565, + "step": 150360 + }, + { + "epoch": 1.0884782152344965, + "grad_norm": 0.15566273033618927, + "learning_rate": 3.911529023431562e-06, + "loss": 0.8512, + "step": 150370 + }, + { + "epoch": 1.0885506018950828, + "grad_norm": 0.14965330064296722, + "learning_rate": 3.911456636770976e-06, + "loss": 0.8651, + "step": 150380 + }, + { + "epoch": 1.0886229885556689, + "grad_norm": 0.1521342396736145, + "learning_rate": 3.91138425011039e-06, + "loss": 0.8568, + "step": 150390 + }, + { + "epoch": 1.0886953752162551, + "grad_norm": 0.1588059663772583, + "learning_rate": 3.911311863449804e-06, + "loss": 0.8595, + "step": 150400 + }, + { + "epoch": 1.0887677618768414, + "grad_norm": 0.145501509308815, + "learning_rate": 3.911239476789217e-06, + "loss": 0.8497, + "step": 150410 + }, + { + "epoch": 1.0888401485374275, + "grad_norm": 0.14711380004882812, + "learning_rate": 3.911167090128631e-06, + "loss": 0.849, + "step": 150420 + }, + { + "epoch": 1.0889125351980138, + "grad_norm": 0.1537601202726364, + "learning_rate": 3.9110947034680455e-06, + "loss": 0.8739, + "step": 150430 + }, + { + "epoch": 1.0889849218585999, + "grad_norm": 0.1484655886888504, + "learning_rate": 3.911022316807459e-06, + "loss": 0.8626, + "step": 150440 + }, + { + "epoch": 1.0890573085191861, + "grad_norm": 0.14920607209205627, + "learning_rate": 3.910949930146873e-06, + "loss": 0.8466, + "step": 150450 + }, + { + "epoch": 1.0891296951797722, + "grad_norm": 0.16342855989933014, + "learning_rate": 3.910877543486286e-06, + "loss": 0.858, + "step": 150460 + }, + { + "epoch": 1.0892020818403585, + "grad_norm": 0.15427295863628387, + "learning_rate": 3.910805156825701e-06, + "loss": 0.859, + "step": 150470 + }, + { + "epoch": 1.0892744685009446, + "grad_norm": 0.15797512233257294, + "learning_rate": 3.9107327701651144e-06, + "loss": 0.8466, + "step": 150480 + }, + { + "epoch": 1.0893468551615308, + "grad_norm": 0.15120099484920502, + "learning_rate": 3.910660383504528e-06, + "loss": 0.8429, + "step": 150490 + }, + { + "epoch": 1.089419241822117, + "grad_norm": 0.16131120920181274, + "learning_rate": 3.910587996843942e-06, + "loss": 0.8562, + "step": 150500 + }, + { + "epoch": 1.0894916284827032, + "grad_norm": 0.15284837782382965, + "learning_rate": 3.910515610183356e-06, + "loss": 0.8584, + "step": 150510 + }, + { + "epoch": 1.0895640151432895, + "grad_norm": 0.1720409095287323, + "learning_rate": 3.91044322352277e-06, + "loss": 0.8637, + "step": 150520 + }, + { + "epoch": 1.0896364018038756, + "grad_norm": 0.16043171286582947, + "learning_rate": 3.910370836862183e-06, + "loss": 0.8538, + "step": 150530 + }, + { + "epoch": 1.0897087884644618, + "grad_norm": 0.16862253844738007, + "learning_rate": 3.910298450201597e-06, + "loss": 0.8509, + "step": 150540 + }, + { + "epoch": 1.089781175125048, + "grad_norm": 0.1581002175807953, + "learning_rate": 3.9102260635410114e-06, + "loss": 0.851, + "step": 150550 + }, + { + "epoch": 1.0898535617856342, + "grad_norm": 0.15931819379329681, + "learning_rate": 3.910153676880425e-06, + "loss": 0.8528, + "step": 150560 + }, + { + "epoch": 1.0899259484462203, + "grad_norm": 0.1453382819890976, + "learning_rate": 3.910081290219839e-06, + "loss": 0.862, + "step": 150570 + }, + { + "epoch": 1.0899983351068065, + "grad_norm": 0.14687404036521912, + "learning_rate": 3.910008903559252e-06, + "loss": 0.8474, + "step": 150580 + }, + { + "epoch": 1.0900707217673926, + "grad_norm": 0.17053967714309692, + "learning_rate": 3.909936516898667e-06, + "loss": 0.8553, + "step": 150590 + }, + { + "epoch": 1.090143108427979, + "grad_norm": 0.1455235779285431, + "learning_rate": 3.90986413023808e-06, + "loss": 0.8503, + "step": 150600 + }, + { + "epoch": 1.090215495088565, + "grad_norm": 0.15172958374023438, + "learning_rate": 3.909791743577494e-06, + "loss": 0.8488, + "step": 150610 + }, + { + "epoch": 1.0902878817491513, + "grad_norm": 0.15800823271274567, + "learning_rate": 3.909719356916908e-06, + "loss": 0.8566, + "step": 150620 + }, + { + "epoch": 1.0903602684097375, + "grad_norm": 0.13901235163211823, + "learning_rate": 3.909646970256322e-06, + "loss": 0.8373, + "step": 150630 + }, + { + "epoch": 1.0904326550703236, + "grad_norm": 0.1460275948047638, + "learning_rate": 3.909574583595736e-06, + "loss": 0.8577, + "step": 150640 + }, + { + "epoch": 1.09050504173091, + "grad_norm": 0.1747177392244339, + "learning_rate": 3.909502196935149e-06, + "loss": 0.8456, + "step": 150650 + }, + { + "epoch": 1.090577428391496, + "grad_norm": 0.1587553173303604, + "learning_rate": 3.909429810274563e-06, + "loss": 0.8445, + "step": 150660 + }, + { + "epoch": 1.0906498150520822, + "grad_norm": 0.1453934758901596, + "learning_rate": 3.909357423613977e-06, + "loss": 0.8659, + "step": 150670 + }, + { + "epoch": 1.0907222017126683, + "grad_norm": 0.14467094838619232, + "learning_rate": 3.909285036953391e-06, + "loss": 0.8556, + "step": 150680 + }, + { + "epoch": 1.0907945883732546, + "grad_norm": 0.16606931388378143, + "learning_rate": 3.909212650292805e-06, + "loss": 0.8616, + "step": 150690 + }, + { + "epoch": 1.0908669750338407, + "grad_norm": 0.1615527719259262, + "learning_rate": 3.909140263632218e-06, + "loss": 0.866, + "step": 150700 + }, + { + "epoch": 1.090939361694427, + "grad_norm": 0.15768449008464813, + "learning_rate": 3.909067876971633e-06, + "loss": 0.8517, + "step": 150710 + }, + { + "epoch": 1.091011748355013, + "grad_norm": 0.15066152811050415, + "learning_rate": 3.9089954903110454e-06, + "loss": 0.8518, + "step": 150720 + }, + { + "epoch": 1.0910841350155993, + "grad_norm": 0.1462964415550232, + "learning_rate": 3.908923103650459e-06, + "loss": 0.8543, + "step": 150730 + }, + { + "epoch": 1.0911565216761856, + "grad_norm": 0.16555063426494598, + "learning_rate": 3.9088507169898735e-06, + "loss": 0.8591, + "step": 150740 + }, + { + "epoch": 1.0912289083367717, + "grad_norm": 0.14814066886901855, + "learning_rate": 3.908778330329287e-06, + "loss": 0.856, + "step": 150750 + }, + { + "epoch": 1.091301294997358, + "grad_norm": 0.16804130375385284, + "learning_rate": 3.908705943668701e-06, + "loss": 0.8561, + "step": 150760 + }, + { + "epoch": 1.091373681657944, + "grad_norm": 0.14669744670391083, + "learning_rate": 3.908633557008114e-06, + "loss": 0.8556, + "step": 150770 + }, + { + "epoch": 1.0914460683185303, + "grad_norm": 0.16342781484127045, + "learning_rate": 3.908561170347529e-06, + "loss": 0.8664, + "step": 150780 + }, + { + "epoch": 1.0915184549791164, + "grad_norm": 0.16906608641147614, + "learning_rate": 3.9084887836869425e-06, + "loss": 0.8687, + "step": 150790 + }, + { + "epoch": 1.0915908416397027, + "grad_norm": 0.15913209319114685, + "learning_rate": 3.908416397026356e-06, + "loss": 0.8517, + "step": 150800 + }, + { + "epoch": 1.0916632283002887, + "grad_norm": 0.15466493368148804, + "learning_rate": 3.90834401036577e-06, + "loss": 0.8546, + "step": 150810 + }, + { + "epoch": 1.091735614960875, + "grad_norm": 0.19748826324939728, + "learning_rate": 3.908271623705184e-06, + "loss": 0.866, + "step": 150820 + }, + { + "epoch": 1.0918080016214613, + "grad_norm": 0.15401212871074677, + "learning_rate": 3.908199237044598e-06, + "loss": 0.8585, + "step": 150830 + }, + { + "epoch": 1.0918803882820474, + "grad_norm": 0.1531715989112854, + "learning_rate": 3.908126850384011e-06, + "loss": 0.8631, + "step": 150840 + }, + { + "epoch": 1.0919527749426337, + "grad_norm": 0.14852984249591827, + "learning_rate": 3.908054463723425e-06, + "loss": 0.8497, + "step": 150850 + }, + { + "epoch": 1.0920251616032197, + "grad_norm": 0.1562282145023346, + "learning_rate": 3.9079820770628395e-06, + "loss": 0.8547, + "step": 150860 + }, + { + "epoch": 1.092097548263806, + "grad_norm": 0.14857418835163116, + "learning_rate": 3.907909690402253e-06, + "loss": 0.8633, + "step": 150870 + }, + { + "epoch": 1.092169934924392, + "grad_norm": 0.1435396671295166, + "learning_rate": 3.907837303741667e-06, + "loss": 0.8574, + "step": 150880 + }, + { + "epoch": 1.0922423215849784, + "grad_norm": 0.1506817787885666, + "learning_rate": 3.90776491708108e-06, + "loss": 0.8692, + "step": 150890 + }, + { + "epoch": 1.0923147082455644, + "grad_norm": 0.1591109335422516, + "learning_rate": 3.907692530420494e-06, + "loss": 0.8619, + "step": 150900 + }, + { + "epoch": 1.0923870949061507, + "grad_norm": 0.16759736835956573, + "learning_rate": 3.907620143759908e-06, + "loss": 0.8634, + "step": 150910 + }, + { + "epoch": 1.0924594815667368, + "grad_norm": 0.15088944137096405, + "learning_rate": 3.907547757099322e-06, + "loss": 0.8632, + "step": 150920 + }, + { + "epoch": 1.092531868227323, + "grad_norm": 0.16850528120994568, + "learning_rate": 3.907475370438736e-06, + "loss": 0.8562, + "step": 150930 + }, + { + "epoch": 1.0926042548879094, + "grad_norm": 0.15842682123184204, + "learning_rate": 3.907402983778149e-06, + "loss": 0.8653, + "step": 150940 + }, + { + "epoch": 1.0926766415484954, + "grad_norm": 0.30550140142440796, + "learning_rate": 3.907330597117564e-06, + "loss": 0.852, + "step": 150950 + }, + { + "epoch": 1.0927490282090817, + "grad_norm": 0.139174684882164, + "learning_rate": 3.907258210456977e-06, + "loss": 0.8543, + "step": 150960 + }, + { + "epoch": 1.0928214148696678, + "grad_norm": 0.15121084451675415, + "learning_rate": 3.907185823796391e-06, + "loss": 0.8517, + "step": 150970 + }, + { + "epoch": 1.092893801530254, + "grad_norm": 0.1478102058172226, + "learning_rate": 3.9071134371358046e-06, + "loss": 0.8651, + "step": 150980 + }, + { + "epoch": 1.0929661881908401, + "grad_norm": 0.1502637416124344, + "learning_rate": 3.907041050475219e-06, + "loss": 0.8538, + "step": 150990 + }, + { + "epoch": 1.0930385748514264, + "grad_norm": 0.14458067715168, + "learning_rate": 3.906968663814633e-06, + "loss": 0.8546, + "step": 151000 + }, + { + "epoch": 1.0931109615120125, + "grad_norm": 0.14992700517177582, + "learning_rate": 3.906896277154046e-06, + "loss": 0.8584, + "step": 151010 + }, + { + "epoch": 1.0931833481725988, + "grad_norm": 0.15242300927639008, + "learning_rate": 3.90682389049346e-06, + "loss": 0.8674, + "step": 151020 + }, + { + "epoch": 1.093255734833185, + "grad_norm": 0.14612126350402832, + "learning_rate": 3.906751503832874e-06, + "loss": 0.8515, + "step": 151030 + }, + { + "epoch": 1.0933281214937711, + "grad_norm": 0.1628275364637375, + "learning_rate": 3.906679117172288e-06, + "loss": 0.8642, + "step": 151040 + }, + { + "epoch": 1.0934005081543574, + "grad_norm": 0.1532110571861267, + "learning_rate": 3.9066067305117016e-06, + "loss": 0.8566, + "step": 151050 + }, + { + "epoch": 1.0934728948149435, + "grad_norm": 0.15354089438915253, + "learning_rate": 3.906534343851115e-06, + "loss": 0.8534, + "step": 151060 + }, + { + "epoch": 1.0935452814755298, + "grad_norm": 0.14480741322040558, + "learning_rate": 3.90646195719053e-06, + "loss": 0.8601, + "step": 151070 + }, + { + "epoch": 1.0936176681361158, + "grad_norm": 0.15542499721050262, + "learning_rate": 3.906389570529943e-06, + "loss": 0.8666, + "step": 151080 + }, + { + "epoch": 1.0936900547967021, + "grad_norm": 0.1690310835838318, + "learning_rate": 3.906317183869357e-06, + "loss": 0.8571, + "step": 151090 + }, + { + "epoch": 1.0937624414572882, + "grad_norm": 0.15053977072238922, + "learning_rate": 3.9062447972087705e-06, + "loss": 0.8588, + "step": 151100 + }, + { + "epoch": 1.0938348281178745, + "grad_norm": 0.15322209894657135, + "learning_rate": 3.906172410548185e-06, + "loss": 0.8564, + "step": 151110 + }, + { + "epoch": 1.0939072147784605, + "grad_norm": 0.15235215425491333, + "learning_rate": 3.9061000238875986e-06, + "loss": 0.8649, + "step": 151120 + }, + { + "epoch": 1.0939796014390468, + "grad_norm": 0.15845470130443573, + "learning_rate": 3.906027637227012e-06, + "loss": 0.8585, + "step": 151130 + }, + { + "epoch": 1.094051988099633, + "grad_norm": 0.1422303318977356, + "learning_rate": 3.905955250566426e-06, + "loss": 0.8602, + "step": 151140 + }, + { + "epoch": 1.0941243747602192, + "grad_norm": 0.16013941168785095, + "learning_rate": 3.90588286390584e-06, + "loss": 0.8649, + "step": 151150 + }, + { + "epoch": 1.0941967614208055, + "grad_norm": 0.16993825137615204, + "learning_rate": 3.905810477245254e-06, + "loss": 0.8522, + "step": 151160 + }, + { + "epoch": 1.0942691480813915, + "grad_norm": 0.16168084740638733, + "learning_rate": 3.9057380905846675e-06, + "loss": 0.8599, + "step": 151170 + }, + { + "epoch": 1.0943415347419778, + "grad_norm": 0.15797677636146545, + "learning_rate": 3.905665703924081e-06, + "loss": 0.8621, + "step": 151180 + }, + { + "epoch": 1.0944139214025639, + "grad_norm": 0.15897414088249207, + "learning_rate": 3.9055933172634956e-06, + "loss": 0.8508, + "step": 151190 + }, + { + "epoch": 1.0944863080631502, + "grad_norm": 0.16979162395000458, + "learning_rate": 3.905520930602909e-06, + "loss": 0.8518, + "step": 151200 + }, + { + "epoch": 1.0945586947237362, + "grad_norm": 0.1656196266412735, + "learning_rate": 3.905448543942323e-06, + "loss": 0.8492, + "step": 151210 + }, + { + "epoch": 1.0946310813843225, + "grad_norm": 0.15596655011177063, + "learning_rate": 3.905376157281736e-06, + "loss": 0.8483, + "step": 151220 + }, + { + "epoch": 1.0947034680449086, + "grad_norm": 0.15505443513393402, + "learning_rate": 3.905303770621151e-06, + "loss": 0.8654, + "step": 151230 + }, + { + "epoch": 1.0947758547054949, + "grad_norm": 0.1517280638217926, + "learning_rate": 3.9052313839605645e-06, + "loss": 0.8622, + "step": 151240 + }, + { + "epoch": 1.0948482413660812, + "grad_norm": 0.1682533472776413, + "learning_rate": 3.905158997299977e-06, + "loss": 0.8528, + "step": 151250 + }, + { + "epoch": 1.0949206280266672, + "grad_norm": 0.1618666797876358, + "learning_rate": 3.905086610639392e-06, + "loss": 0.8599, + "step": 151260 + }, + { + "epoch": 1.0949930146872535, + "grad_norm": 0.15311026573181152, + "learning_rate": 3.905014223978805e-06, + "loss": 0.8581, + "step": 151270 + }, + { + "epoch": 1.0950654013478396, + "grad_norm": 0.18201227486133575, + "learning_rate": 3.904941837318219e-06, + "loss": 0.8654, + "step": 151280 + }, + { + "epoch": 1.0951377880084259, + "grad_norm": 0.1799287348985672, + "learning_rate": 3.904869450657633e-06, + "loss": 0.8493, + "step": 151290 + }, + { + "epoch": 1.095210174669012, + "grad_norm": 0.13958740234375, + "learning_rate": 3.904797063997047e-06, + "loss": 0.8565, + "step": 151300 + }, + { + "epoch": 1.0952825613295982, + "grad_norm": 0.1535644382238388, + "learning_rate": 3.904724677336461e-06, + "loss": 0.8495, + "step": 151310 + }, + { + "epoch": 1.0953549479901843, + "grad_norm": 0.14460021257400513, + "learning_rate": 3.904652290675874e-06, + "loss": 0.8723, + "step": 151320 + }, + { + "epoch": 1.0954273346507706, + "grad_norm": 0.16038860380649567, + "learning_rate": 3.904579904015288e-06, + "loss": 0.8534, + "step": 151330 + }, + { + "epoch": 1.0954997213113566, + "grad_norm": 0.17048697173595428, + "learning_rate": 3.904507517354702e-06, + "loss": 0.8529, + "step": 151340 + }, + { + "epoch": 1.095572107971943, + "grad_norm": 0.15420754253864288, + "learning_rate": 3.904435130694116e-06, + "loss": 0.8602, + "step": 151350 + }, + { + "epoch": 1.0956444946325292, + "grad_norm": 0.14572323858737946, + "learning_rate": 3.90436274403353e-06, + "loss": 0.8527, + "step": 151360 + }, + { + "epoch": 1.0957168812931153, + "grad_norm": 0.16259993612766266, + "learning_rate": 3.904290357372943e-06, + "loss": 0.8697, + "step": 151370 + }, + { + "epoch": 1.0957892679537016, + "grad_norm": 0.15503832697868347, + "learning_rate": 3.904217970712358e-06, + "loss": 0.862, + "step": 151380 + }, + { + "epoch": 1.0958616546142876, + "grad_norm": 0.1643747240304947, + "learning_rate": 3.904145584051771e-06, + "loss": 0.8619, + "step": 151390 + }, + { + "epoch": 1.095934041274874, + "grad_norm": 0.1568668931722641, + "learning_rate": 3.904073197391185e-06, + "loss": 0.8584, + "step": 151400 + }, + { + "epoch": 1.09600642793546, + "grad_norm": 0.1458984911441803, + "learning_rate": 3.9040008107305985e-06, + "loss": 0.8597, + "step": 151410 + }, + { + "epoch": 1.0960788145960463, + "grad_norm": 0.15451470017433167, + "learning_rate": 3.903928424070013e-06, + "loss": 0.8573, + "step": 151420 + }, + { + "epoch": 1.0961512012566323, + "grad_norm": 0.15595468878746033, + "learning_rate": 3.903856037409427e-06, + "loss": 0.8785, + "step": 151430 + }, + { + "epoch": 1.0962235879172186, + "grad_norm": 0.15538018941879272, + "learning_rate": 3.90378365074884e-06, + "loss": 0.8642, + "step": 151440 + }, + { + "epoch": 1.0962959745778047, + "grad_norm": 0.15324239432811737, + "learning_rate": 3.903711264088254e-06, + "loss": 0.8584, + "step": 151450 + }, + { + "epoch": 1.096368361238391, + "grad_norm": 0.14837108552455902, + "learning_rate": 3.903638877427668e-06, + "loss": 0.8654, + "step": 151460 + }, + { + "epoch": 1.0964407478989773, + "grad_norm": 0.1704886555671692, + "learning_rate": 3.903566490767082e-06, + "loss": 0.8615, + "step": 151470 + }, + { + "epoch": 1.0965131345595633, + "grad_norm": 0.16142944991588593, + "learning_rate": 3.9034941041064955e-06, + "loss": 0.8701, + "step": 151480 + }, + { + "epoch": 1.0965855212201496, + "grad_norm": 0.16319426894187927, + "learning_rate": 3.903421717445909e-06, + "loss": 0.8603, + "step": 151490 + }, + { + "epoch": 1.0966579078807357, + "grad_norm": 0.18104782700538635, + "learning_rate": 3.903349330785324e-06, + "loss": 0.8598, + "step": 151500 + }, + { + "epoch": 1.096730294541322, + "grad_norm": 0.15671753883361816, + "learning_rate": 3.903276944124737e-06, + "loss": 0.8621, + "step": 151510 + }, + { + "epoch": 1.096802681201908, + "grad_norm": 0.1673654317855835, + "learning_rate": 3.903204557464151e-06, + "loss": 0.8553, + "step": 151520 + }, + { + "epoch": 1.0968750678624943, + "grad_norm": 0.14993630349636078, + "learning_rate": 3.9031321708035645e-06, + "loss": 0.846, + "step": 151530 + }, + { + "epoch": 1.0969474545230804, + "grad_norm": 0.16005301475524902, + "learning_rate": 3.903059784142978e-06, + "loss": 0.8595, + "step": 151540 + }, + { + "epoch": 1.0970198411836667, + "grad_norm": 0.1531304568052292, + "learning_rate": 3.9029873974823925e-06, + "loss": 0.8566, + "step": 151550 + }, + { + "epoch": 1.0970922278442528, + "grad_norm": 0.15316185355186462, + "learning_rate": 3.902915010821806e-06, + "loss": 0.848, + "step": 151560 + }, + { + "epoch": 1.097164614504839, + "grad_norm": 0.16039323806762695, + "learning_rate": 3.90284262416122e-06, + "loss": 0.8562, + "step": 151570 + }, + { + "epoch": 1.0972370011654253, + "grad_norm": 0.14785118401050568, + "learning_rate": 3.902770237500633e-06, + "loss": 0.8508, + "step": 151580 + }, + { + "epoch": 1.0973093878260114, + "grad_norm": 0.16044607758522034, + "learning_rate": 3.902697850840048e-06, + "loss": 0.8569, + "step": 151590 + }, + { + "epoch": 1.0973817744865977, + "grad_norm": 0.16360950469970703, + "learning_rate": 3.9026254641794615e-06, + "loss": 0.8555, + "step": 151600 + }, + { + "epoch": 1.0974541611471837, + "grad_norm": 0.15035872161388397, + "learning_rate": 3.902553077518875e-06, + "loss": 0.8524, + "step": 151610 + }, + { + "epoch": 1.09752654780777, + "grad_norm": 0.19104793667793274, + "learning_rate": 3.902480690858289e-06, + "loss": 0.8468, + "step": 151620 + }, + { + "epoch": 1.097598934468356, + "grad_norm": 0.1884557455778122, + "learning_rate": 3.902408304197703e-06, + "loss": 0.8692, + "step": 151630 + }, + { + "epoch": 1.0976713211289424, + "grad_norm": 0.15501312911510468, + "learning_rate": 3.902335917537117e-06, + "loss": 0.8693, + "step": 151640 + }, + { + "epoch": 1.0977437077895285, + "grad_norm": 0.1506832242012024, + "learning_rate": 3.90226353087653e-06, + "loss": 0.8568, + "step": 151650 + }, + { + "epoch": 1.0978160944501147, + "grad_norm": 0.1559952199459076, + "learning_rate": 3.902191144215944e-06, + "loss": 0.8556, + "step": 151660 + }, + { + "epoch": 1.0978884811107008, + "grad_norm": 0.17316295206546783, + "learning_rate": 3.9021187575553585e-06, + "loss": 0.8683, + "step": 151670 + }, + { + "epoch": 1.097960867771287, + "grad_norm": 0.1504284292459488, + "learning_rate": 3.902046370894772e-06, + "loss": 0.8607, + "step": 151680 + }, + { + "epoch": 1.0980332544318734, + "grad_norm": 0.15068010985851288, + "learning_rate": 3.901973984234186e-06, + "loss": 0.849, + "step": 151690 + }, + { + "epoch": 1.0981056410924595, + "grad_norm": 0.14526309072971344, + "learning_rate": 3.901901597573599e-06, + "loss": 0.8576, + "step": 151700 + }, + { + "epoch": 1.0981780277530457, + "grad_norm": 0.1651238054037094, + "learning_rate": 3.901829210913014e-06, + "loss": 0.8712, + "step": 151710 + }, + { + "epoch": 1.0982504144136318, + "grad_norm": 0.16406778991222382, + "learning_rate": 3.901756824252427e-06, + "loss": 0.8533, + "step": 151720 + }, + { + "epoch": 1.098322801074218, + "grad_norm": 0.15506918728351593, + "learning_rate": 3.901684437591841e-06, + "loss": 0.8644, + "step": 151730 + }, + { + "epoch": 1.0983951877348042, + "grad_norm": 0.1499270647764206, + "learning_rate": 3.901612050931255e-06, + "loss": 0.8435, + "step": 151740 + }, + { + "epoch": 1.0984675743953904, + "grad_norm": 0.15782611072063446, + "learning_rate": 3.901539664270669e-06, + "loss": 0.8602, + "step": 151750 + }, + { + "epoch": 1.0985399610559765, + "grad_norm": 0.14612479507923126, + "learning_rate": 3.901467277610083e-06, + "loss": 0.8569, + "step": 151760 + }, + { + "epoch": 1.0986123477165628, + "grad_norm": 0.15989379584789276, + "learning_rate": 3.901394890949496e-06, + "loss": 0.8473, + "step": 151770 + }, + { + "epoch": 1.0986847343771489, + "grad_norm": 0.14152558147907257, + "learning_rate": 3.90132250428891e-06, + "loss": 0.8588, + "step": 151780 + }, + { + "epoch": 1.0987571210377352, + "grad_norm": 0.14922699332237244, + "learning_rate": 3.9012501176283236e-06, + "loss": 0.8483, + "step": 151790 + }, + { + "epoch": 1.0988295076983214, + "grad_norm": 0.15178348124027252, + "learning_rate": 3.901177730967737e-06, + "loss": 0.8548, + "step": 151800 + }, + { + "epoch": 1.0989018943589075, + "grad_norm": 0.1589786410331726, + "learning_rate": 3.901105344307151e-06, + "loss": 0.8626, + "step": 151810 + }, + { + "epoch": 1.0989742810194938, + "grad_norm": 0.15005895495414734, + "learning_rate": 3.901032957646565e-06, + "loss": 0.8627, + "step": 151820 + }, + { + "epoch": 1.0990466676800799, + "grad_norm": 0.14310167729854584, + "learning_rate": 3.900960570985979e-06, + "loss": 0.8621, + "step": 151830 + }, + { + "epoch": 1.0991190543406661, + "grad_norm": 0.1532042920589447, + "learning_rate": 3.9008881843253925e-06, + "loss": 0.8571, + "step": 151840 + }, + { + "epoch": 1.0991914410012522, + "grad_norm": 0.15156692266464233, + "learning_rate": 3.900815797664806e-06, + "loss": 0.856, + "step": 151850 + }, + { + "epoch": 1.0992638276618385, + "grad_norm": 0.17657136917114258, + "learning_rate": 3.9007434110042206e-06, + "loss": 0.8521, + "step": 151860 + }, + { + "epoch": 1.0993362143224246, + "grad_norm": 0.17290574312210083, + "learning_rate": 3.900671024343634e-06, + "loss": 0.8681, + "step": 151870 + }, + { + "epoch": 1.0994086009830109, + "grad_norm": 0.14646093547344208, + "learning_rate": 3.900598637683048e-06, + "loss": 0.8464, + "step": 151880 + }, + { + "epoch": 1.0994809876435971, + "grad_norm": 0.14447811245918274, + "learning_rate": 3.900526251022461e-06, + "loss": 0.8395, + "step": 151890 + }, + { + "epoch": 1.0995533743041832, + "grad_norm": 0.16864459216594696, + "learning_rate": 3.900453864361876e-06, + "loss": 0.8542, + "step": 151900 + }, + { + "epoch": 1.0996257609647695, + "grad_norm": 0.1442280113697052, + "learning_rate": 3.9003814777012895e-06, + "loss": 0.853, + "step": 151910 + }, + { + "epoch": 1.0996981476253556, + "grad_norm": 0.15601858496665955, + "learning_rate": 3.900309091040703e-06, + "loss": 0.8549, + "step": 151920 + }, + { + "epoch": 1.0997705342859418, + "grad_norm": 0.14018070697784424, + "learning_rate": 3.900236704380117e-06, + "loss": 0.8535, + "step": 151930 + }, + { + "epoch": 1.099842920946528, + "grad_norm": 0.14601945877075195, + "learning_rate": 3.900164317719531e-06, + "loss": 0.8679, + "step": 151940 + }, + { + "epoch": 1.0999153076071142, + "grad_norm": 0.1592642217874527, + "learning_rate": 3.900091931058945e-06, + "loss": 0.8574, + "step": 151950 + }, + { + "epoch": 1.0999876942677003, + "grad_norm": 0.1683703064918518, + "learning_rate": 3.900019544398358e-06, + "loss": 0.8592, + "step": 151960 + }, + { + "epoch": 1.1000600809282866, + "grad_norm": 0.15897010266780853, + "learning_rate": 3.899947157737772e-06, + "loss": 0.8487, + "step": 151970 + }, + { + "epoch": 1.1001324675888726, + "grad_norm": 0.14617374539375305, + "learning_rate": 3.8998747710771865e-06, + "loss": 0.8556, + "step": 151980 + }, + { + "epoch": 1.100204854249459, + "grad_norm": 0.157510906457901, + "learning_rate": 3.8998023844166e-06, + "loss": 0.857, + "step": 151990 + }, + { + "epoch": 1.1002772409100452, + "grad_norm": 0.1601264476776123, + "learning_rate": 3.899729997756014e-06, + "loss": 0.8583, + "step": 152000 + }, + { + "epoch": 1.1003496275706313, + "grad_norm": 0.16230392456054688, + "learning_rate": 3.899657611095427e-06, + "loss": 0.8503, + "step": 152010 + }, + { + "epoch": 1.1004220142312175, + "grad_norm": 0.14697298407554626, + "learning_rate": 3.899585224434842e-06, + "loss": 0.8452, + "step": 152020 + }, + { + "epoch": 1.1004944008918036, + "grad_norm": 0.1654907613992691, + "learning_rate": 3.8995128377742554e-06, + "loss": 0.8531, + "step": 152030 + }, + { + "epoch": 1.10056678755239, + "grad_norm": 0.1564827561378479, + "learning_rate": 3.899440451113669e-06, + "loss": 0.8554, + "step": 152040 + }, + { + "epoch": 1.100639174212976, + "grad_norm": 0.14684629440307617, + "learning_rate": 3.899368064453083e-06, + "loss": 0.8562, + "step": 152050 + }, + { + "epoch": 1.1007115608735623, + "grad_norm": 0.15104876458644867, + "learning_rate": 3.899295677792497e-06, + "loss": 0.8607, + "step": 152060 + }, + { + "epoch": 1.1007839475341483, + "grad_norm": 0.15481539070606232, + "learning_rate": 3.899223291131911e-06, + "loss": 0.8587, + "step": 152070 + }, + { + "epoch": 1.1008563341947346, + "grad_norm": 0.15481428802013397, + "learning_rate": 3.899150904471324e-06, + "loss": 0.8526, + "step": 152080 + }, + { + "epoch": 1.100928720855321, + "grad_norm": 0.1640326827764511, + "learning_rate": 3.899078517810738e-06, + "loss": 0.8632, + "step": 152090 + }, + { + "epoch": 1.101001107515907, + "grad_norm": 0.1505155861377716, + "learning_rate": 3.8990061311501524e-06, + "loss": 0.8439, + "step": 152100 + }, + { + "epoch": 1.1010734941764933, + "grad_norm": 0.1723697930574417, + "learning_rate": 3.898933744489566e-06, + "loss": 0.8604, + "step": 152110 + }, + { + "epoch": 1.1011458808370793, + "grad_norm": 0.16168347001075745, + "learning_rate": 3.89886135782898e-06, + "loss": 0.8418, + "step": 152120 + }, + { + "epoch": 1.1012182674976656, + "grad_norm": 1.0787012577056885, + "learning_rate": 3.898788971168393e-06, + "loss": 0.8555, + "step": 152130 + }, + { + "epoch": 1.1012906541582517, + "grad_norm": 0.15610916912555695, + "learning_rate": 3.898716584507807e-06, + "loss": 0.8459, + "step": 152140 + }, + { + "epoch": 1.101363040818838, + "grad_norm": 0.1682717204093933, + "learning_rate": 3.898644197847221e-06, + "loss": 0.8496, + "step": 152150 + }, + { + "epoch": 1.101435427479424, + "grad_norm": 0.34548187255859375, + "learning_rate": 3.898571811186635e-06, + "loss": 0.8519, + "step": 152160 + }, + { + "epoch": 1.1015078141400103, + "grad_norm": 0.15088237822055817, + "learning_rate": 3.898499424526049e-06, + "loss": 0.8588, + "step": 152170 + }, + { + "epoch": 1.1015802008005964, + "grad_norm": 0.13609455525875092, + "learning_rate": 3.898427037865462e-06, + "loss": 0.847, + "step": 152180 + }, + { + "epoch": 1.1016525874611827, + "grad_norm": 0.14786964654922485, + "learning_rate": 3.898354651204877e-06, + "loss": 0.8687, + "step": 152190 + }, + { + "epoch": 1.101724974121769, + "grad_norm": 0.14596401154994965, + "learning_rate": 3.89828226454429e-06, + "loss": 0.8543, + "step": 152200 + }, + { + "epoch": 1.101797360782355, + "grad_norm": 0.1763083040714264, + "learning_rate": 3.898209877883704e-06, + "loss": 0.8499, + "step": 152210 + }, + { + "epoch": 1.1018697474429413, + "grad_norm": 0.19486874341964722, + "learning_rate": 3.8981374912231175e-06, + "loss": 0.857, + "step": 152220 + }, + { + "epoch": 1.1019421341035274, + "grad_norm": 0.15471166372299194, + "learning_rate": 3.898065104562532e-06, + "loss": 0.8454, + "step": 152230 + }, + { + "epoch": 1.1020145207641137, + "grad_norm": 0.14786270260810852, + "learning_rate": 3.897992717901946e-06, + "loss": 0.8673, + "step": 152240 + }, + { + "epoch": 1.1020869074246997, + "grad_norm": 0.14872819185256958, + "learning_rate": 3.897920331241359e-06, + "loss": 0.8543, + "step": 152250 + }, + { + "epoch": 1.102159294085286, + "grad_norm": 0.1608414351940155, + "learning_rate": 3.897847944580773e-06, + "loss": 0.8618, + "step": 152260 + }, + { + "epoch": 1.102231680745872, + "grad_norm": 0.1444377303123474, + "learning_rate": 3.897775557920187e-06, + "loss": 0.8486, + "step": 152270 + }, + { + "epoch": 1.1023040674064584, + "grad_norm": 0.16134855151176453, + "learning_rate": 3.897703171259601e-06, + "loss": 0.8642, + "step": 152280 + }, + { + "epoch": 1.1023764540670444, + "grad_norm": 0.1678602695465088, + "learning_rate": 3.8976307845990145e-06, + "loss": 0.8583, + "step": 152290 + }, + { + "epoch": 1.1024488407276307, + "grad_norm": 0.15565115213394165, + "learning_rate": 3.897558397938428e-06, + "loss": 0.8503, + "step": 152300 + }, + { + "epoch": 1.102521227388217, + "grad_norm": 0.16068263351917267, + "learning_rate": 3.897486011277842e-06, + "loss": 0.8734, + "step": 152310 + }, + { + "epoch": 1.102593614048803, + "grad_norm": 0.15741673111915588, + "learning_rate": 3.897413624617255e-06, + "loss": 0.8602, + "step": 152320 + }, + { + "epoch": 1.1026660007093894, + "grad_norm": 0.17430439591407776, + "learning_rate": 3.897341237956669e-06, + "loss": 0.8542, + "step": 152330 + }, + { + "epoch": 1.1027383873699754, + "grad_norm": 0.15103577077388763, + "learning_rate": 3.8972688512960835e-06, + "loss": 0.8624, + "step": 152340 + }, + { + "epoch": 1.1028107740305617, + "grad_norm": 0.15457139909267426, + "learning_rate": 3.897196464635497e-06, + "loss": 0.8546, + "step": 152350 + }, + { + "epoch": 1.1028831606911478, + "grad_norm": 0.1677873432636261, + "learning_rate": 3.897124077974911e-06, + "loss": 0.8774, + "step": 152360 + }, + { + "epoch": 1.102955547351734, + "grad_norm": 0.1446024477481842, + "learning_rate": 3.897051691314324e-06, + "loss": 0.8584, + "step": 152370 + }, + { + "epoch": 1.1030279340123201, + "grad_norm": 0.1553516536951065, + "learning_rate": 3.896979304653739e-06, + "loss": 0.855, + "step": 152380 + }, + { + "epoch": 1.1031003206729064, + "grad_norm": 0.15812979638576508, + "learning_rate": 3.896906917993152e-06, + "loss": 0.8538, + "step": 152390 + }, + { + "epoch": 1.1031727073334925, + "grad_norm": 0.16017301380634308, + "learning_rate": 3.896834531332566e-06, + "loss": 0.8485, + "step": 152400 + }, + { + "epoch": 1.1032450939940788, + "grad_norm": 0.15883536636829376, + "learning_rate": 3.89676214467198e-06, + "loss": 0.8547, + "step": 152410 + }, + { + "epoch": 1.103317480654665, + "grad_norm": 0.16042304039001465, + "learning_rate": 3.896689758011394e-06, + "loss": 0.8527, + "step": 152420 + }, + { + "epoch": 1.1033898673152511, + "grad_norm": 0.1522279679775238, + "learning_rate": 3.896617371350808e-06, + "loss": 0.8571, + "step": 152430 + }, + { + "epoch": 1.1034622539758374, + "grad_norm": 0.15726742148399353, + "learning_rate": 3.896544984690221e-06, + "loss": 0.8514, + "step": 152440 + }, + { + "epoch": 1.1035346406364235, + "grad_norm": 0.1631903052330017, + "learning_rate": 3.896472598029635e-06, + "loss": 0.8575, + "step": 152450 + }, + { + "epoch": 1.1036070272970098, + "grad_norm": 0.15091995894908905, + "learning_rate": 3.896400211369049e-06, + "loss": 0.8496, + "step": 152460 + }, + { + "epoch": 1.1036794139575958, + "grad_norm": 0.1674959510564804, + "learning_rate": 3.896327824708463e-06, + "loss": 0.863, + "step": 152470 + }, + { + "epoch": 1.1037518006181821, + "grad_norm": 0.15883536636829376, + "learning_rate": 3.896255438047877e-06, + "loss": 0.8545, + "step": 152480 + }, + { + "epoch": 1.1038241872787682, + "grad_norm": 0.17401011288166046, + "learning_rate": 3.89618305138729e-06, + "loss": 0.859, + "step": 152490 + }, + { + "epoch": 1.1038965739393545, + "grad_norm": 0.1531701236963272, + "learning_rate": 3.896110664726705e-06, + "loss": 0.8492, + "step": 152500 + }, + { + "epoch": 1.1039689605999405, + "grad_norm": 0.15053513646125793, + "learning_rate": 3.896038278066118e-06, + "loss": 0.8485, + "step": 152510 + }, + { + "epoch": 1.1040413472605268, + "grad_norm": 0.15643155574798584, + "learning_rate": 3.895965891405532e-06, + "loss": 0.8485, + "step": 152520 + }, + { + "epoch": 1.1041137339211131, + "grad_norm": 0.14518050849437714, + "learning_rate": 3.8958935047449456e-06, + "loss": 0.8522, + "step": 152530 + }, + { + "epoch": 1.1041861205816992, + "grad_norm": 0.16403423249721527, + "learning_rate": 3.89582111808436e-06, + "loss": 0.8595, + "step": 152540 + }, + { + "epoch": 1.1042585072422855, + "grad_norm": 0.18213549256324768, + "learning_rate": 3.895748731423774e-06, + "loss": 0.8591, + "step": 152550 + }, + { + "epoch": 1.1043308939028715, + "grad_norm": 0.15698887407779694, + "learning_rate": 3.895676344763187e-06, + "loss": 0.8547, + "step": 152560 + }, + { + "epoch": 1.1044032805634578, + "grad_norm": 0.1551319658756256, + "learning_rate": 3.895603958102601e-06, + "loss": 0.8563, + "step": 152570 + }, + { + "epoch": 1.1044756672240439, + "grad_norm": 0.1502751260995865, + "learning_rate": 3.895531571442015e-06, + "loss": 0.8594, + "step": 152580 + }, + { + "epoch": 1.1045480538846302, + "grad_norm": 0.15094530582427979, + "learning_rate": 3.895459184781429e-06, + "loss": 0.8501, + "step": 152590 + }, + { + "epoch": 1.1046204405452162, + "grad_norm": 0.14504896104335785, + "learning_rate": 3.8953867981208426e-06, + "loss": 0.8475, + "step": 152600 + }, + { + "epoch": 1.1046928272058025, + "grad_norm": 0.14708517491817474, + "learning_rate": 3.895314411460256e-06, + "loss": 0.8641, + "step": 152610 + }, + { + "epoch": 1.1047652138663886, + "grad_norm": 0.15217843651771545, + "learning_rate": 3.895242024799671e-06, + "loss": 0.8524, + "step": 152620 + }, + { + "epoch": 1.1048376005269749, + "grad_norm": 0.15076854825019836, + "learning_rate": 3.895169638139084e-06, + "loss": 0.8604, + "step": 152630 + }, + { + "epoch": 1.1049099871875612, + "grad_norm": 0.1549665480852127, + "learning_rate": 3.895097251478498e-06, + "loss": 0.8572, + "step": 152640 + }, + { + "epoch": 1.1049823738481472, + "grad_norm": 0.146447092294693, + "learning_rate": 3.8950248648179115e-06, + "loss": 0.8508, + "step": 152650 + }, + { + "epoch": 1.1050547605087335, + "grad_norm": 0.156124547123909, + "learning_rate": 3.894952478157326e-06, + "loss": 0.863, + "step": 152660 + }, + { + "epoch": 1.1051271471693196, + "grad_norm": 0.14300669729709625, + "learning_rate": 3.8948800914967396e-06, + "loss": 0.8673, + "step": 152670 + }, + { + "epoch": 1.1051995338299059, + "grad_norm": 0.1642896682024002, + "learning_rate": 3.894807704836153e-06, + "loss": 0.8495, + "step": 152680 + }, + { + "epoch": 1.105271920490492, + "grad_norm": 0.15257836878299713, + "learning_rate": 3.894735318175567e-06, + "loss": 0.8463, + "step": 152690 + }, + { + "epoch": 1.1053443071510782, + "grad_norm": 0.16251547634601593, + "learning_rate": 3.894662931514981e-06, + "loss": 0.8402, + "step": 152700 + }, + { + "epoch": 1.1054166938116643, + "grad_norm": 0.145891934633255, + "learning_rate": 3.894590544854395e-06, + "loss": 0.8492, + "step": 152710 + }, + { + "epoch": 1.1054890804722506, + "grad_norm": 0.16337795555591583, + "learning_rate": 3.8945181581938085e-06, + "loss": 0.8457, + "step": 152720 + }, + { + "epoch": 1.1055614671328367, + "grad_norm": 0.1507614105939865, + "learning_rate": 3.894445771533222e-06, + "loss": 0.8649, + "step": 152730 + }, + { + "epoch": 1.105633853793423, + "grad_norm": 0.16109922528266907, + "learning_rate": 3.894373384872637e-06, + "loss": 0.8611, + "step": 152740 + }, + { + "epoch": 1.1057062404540092, + "grad_norm": 0.14710114896297455, + "learning_rate": 3.89430099821205e-06, + "loss": 0.854, + "step": 152750 + }, + { + "epoch": 1.1057786271145953, + "grad_norm": 0.14651963114738464, + "learning_rate": 3.894228611551464e-06, + "loss": 0.8518, + "step": 152760 + }, + { + "epoch": 1.1058510137751816, + "grad_norm": 0.1510373204946518, + "learning_rate": 3.8941562248908774e-06, + "loss": 0.8726, + "step": 152770 + }, + { + "epoch": 1.1059234004357676, + "grad_norm": 0.14552970230579376, + "learning_rate": 3.894083838230291e-06, + "loss": 0.8645, + "step": 152780 + }, + { + "epoch": 1.105995787096354, + "grad_norm": 0.1477714329957962, + "learning_rate": 3.8940114515697055e-06, + "loss": 0.8663, + "step": 152790 + }, + { + "epoch": 1.10606817375694, + "grad_norm": 0.1511882096529007, + "learning_rate": 3.893939064909119e-06, + "loss": 0.8589, + "step": 152800 + }, + { + "epoch": 1.1061405604175263, + "grad_norm": 0.16555480659008026, + "learning_rate": 3.893866678248533e-06, + "loss": 0.8446, + "step": 152810 + }, + { + "epoch": 1.1062129470781124, + "grad_norm": 0.15422996878623962, + "learning_rate": 3.893794291587946e-06, + "loss": 0.8476, + "step": 152820 + }, + { + "epoch": 1.1062853337386986, + "grad_norm": 0.16590246558189392, + "learning_rate": 3.893721904927361e-06, + "loss": 0.8658, + "step": 152830 + }, + { + "epoch": 1.1063577203992847, + "grad_norm": 0.15505556762218475, + "learning_rate": 3.893649518266774e-06, + "loss": 0.8438, + "step": 152840 + }, + { + "epoch": 1.106430107059871, + "grad_norm": 0.15418249368667603, + "learning_rate": 3.893577131606188e-06, + "loss": 0.8507, + "step": 152850 + }, + { + "epoch": 1.1065024937204573, + "grad_norm": 0.15402624011039734, + "learning_rate": 3.893504744945602e-06, + "loss": 0.8656, + "step": 152860 + }, + { + "epoch": 1.1065748803810433, + "grad_norm": 0.15612943470478058, + "learning_rate": 3.893432358285015e-06, + "loss": 0.8594, + "step": 152870 + }, + { + "epoch": 1.1066472670416296, + "grad_norm": 0.1624891459941864, + "learning_rate": 3.893359971624429e-06, + "loss": 0.8577, + "step": 152880 + }, + { + "epoch": 1.1067196537022157, + "grad_norm": 0.18142132461071014, + "learning_rate": 3.893287584963843e-06, + "loss": 0.8541, + "step": 152890 + }, + { + "epoch": 1.106792040362802, + "grad_norm": 0.16291342675685883, + "learning_rate": 3.893215198303257e-06, + "loss": 0.8546, + "step": 152900 + }, + { + "epoch": 1.106864427023388, + "grad_norm": 0.15221039950847626, + "learning_rate": 3.893142811642671e-06, + "loss": 0.8548, + "step": 152910 + }, + { + "epoch": 1.1069368136839743, + "grad_norm": 0.15222559869289398, + "learning_rate": 3.893070424982084e-06, + "loss": 0.8612, + "step": 152920 + }, + { + "epoch": 1.1070092003445604, + "grad_norm": 0.15900181233882904, + "learning_rate": 3.892998038321498e-06, + "loss": 0.8725, + "step": 152930 + }, + { + "epoch": 1.1070815870051467, + "grad_norm": 0.1619795560836792, + "learning_rate": 3.892925651660912e-06, + "loss": 0.8511, + "step": 152940 + }, + { + "epoch": 1.107153973665733, + "grad_norm": 0.1571727842092514, + "learning_rate": 3.892853265000326e-06, + "loss": 0.8482, + "step": 152950 + }, + { + "epoch": 1.107226360326319, + "grad_norm": 0.13933265209197998, + "learning_rate": 3.8927808783397395e-06, + "loss": 0.8563, + "step": 152960 + }, + { + "epoch": 1.1072987469869053, + "grad_norm": 0.14725233614444733, + "learning_rate": 3.892708491679153e-06, + "loss": 0.8507, + "step": 152970 + }, + { + "epoch": 1.1073711336474914, + "grad_norm": 0.1523694097995758, + "learning_rate": 3.892636105018568e-06, + "loss": 0.8503, + "step": 152980 + }, + { + "epoch": 1.1074435203080777, + "grad_norm": 0.1705412119626999, + "learning_rate": 3.892563718357981e-06, + "loss": 0.8545, + "step": 152990 + }, + { + "epoch": 1.1075159069686638, + "grad_norm": 0.14305712282657623, + "learning_rate": 3.892491331697395e-06, + "loss": 0.8585, + "step": 153000 + }, + { + "epoch": 1.10758829362925, + "grad_norm": 0.1486334204673767, + "learning_rate": 3.8924189450368085e-06, + "loss": 0.847, + "step": 153010 + }, + { + "epoch": 1.107660680289836, + "grad_norm": 0.16062171757221222, + "learning_rate": 3.892346558376223e-06, + "loss": 0.8527, + "step": 153020 + }, + { + "epoch": 1.1077330669504224, + "grad_norm": 0.14781929552555084, + "learning_rate": 3.8922741717156365e-06, + "loss": 0.8677, + "step": 153030 + }, + { + "epoch": 1.1078054536110085, + "grad_norm": 0.14651741087436676, + "learning_rate": 3.89220178505505e-06, + "loss": 0.8535, + "step": 153040 + }, + { + "epoch": 1.1078778402715947, + "grad_norm": 0.14890103042125702, + "learning_rate": 3.892129398394464e-06, + "loss": 0.863, + "step": 153050 + }, + { + "epoch": 1.107950226932181, + "grad_norm": 0.14752887189388275, + "learning_rate": 3.892057011733878e-06, + "loss": 0.8494, + "step": 153060 + }, + { + "epoch": 1.108022613592767, + "grad_norm": 0.15425044298171997, + "learning_rate": 3.891984625073292e-06, + "loss": 0.8693, + "step": 153070 + }, + { + "epoch": 1.1080950002533534, + "grad_norm": 0.14572696387767792, + "learning_rate": 3.8919122384127055e-06, + "loss": 0.8467, + "step": 153080 + }, + { + "epoch": 1.1081673869139395, + "grad_norm": 0.16281406581401825, + "learning_rate": 3.891839851752119e-06, + "loss": 0.8794, + "step": 153090 + }, + { + "epoch": 1.1082397735745257, + "grad_norm": 0.152713343501091, + "learning_rate": 3.8917674650915335e-06, + "loss": 0.8605, + "step": 153100 + }, + { + "epoch": 1.1083121602351118, + "grad_norm": 0.14870685338974, + "learning_rate": 3.891695078430947e-06, + "loss": 0.8602, + "step": 153110 + }, + { + "epoch": 1.108384546895698, + "grad_norm": 0.15383371710777283, + "learning_rate": 3.891622691770361e-06, + "loss": 0.8538, + "step": 153120 + }, + { + "epoch": 1.1084569335562842, + "grad_norm": 0.1520891636610031, + "learning_rate": 3.891550305109774e-06, + "loss": 0.8539, + "step": 153130 + }, + { + "epoch": 1.1085293202168705, + "grad_norm": 0.15309153497219086, + "learning_rate": 3.891477918449189e-06, + "loss": 0.8459, + "step": 153140 + }, + { + "epoch": 1.1086017068774567, + "grad_norm": 0.16597382724285126, + "learning_rate": 3.8914055317886025e-06, + "loss": 0.8529, + "step": 153150 + }, + { + "epoch": 1.1086740935380428, + "grad_norm": 0.1510355919599533, + "learning_rate": 3.891333145128016e-06, + "loss": 0.8605, + "step": 153160 + }, + { + "epoch": 1.108746480198629, + "grad_norm": 0.14412082731723785, + "learning_rate": 3.89126075846743e-06, + "loss": 0.8532, + "step": 153170 + }, + { + "epoch": 1.1088188668592152, + "grad_norm": 0.1505080908536911, + "learning_rate": 3.891188371806844e-06, + "loss": 0.8613, + "step": 153180 + }, + { + "epoch": 1.1088912535198014, + "grad_norm": 0.14691261947155, + "learning_rate": 3.891115985146258e-06, + "loss": 0.8596, + "step": 153190 + }, + { + "epoch": 1.1089636401803875, + "grad_norm": 0.15509304404258728, + "learning_rate": 3.891043598485671e-06, + "loss": 0.8393, + "step": 153200 + }, + { + "epoch": 1.1090360268409738, + "grad_norm": 0.14733615517616272, + "learning_rate": 3.890971211825085e-06, + "loss": 0.8485, + "step": 153210 + }, + { + "epoch": 1.1091084135015599, + "grad_norm": 0.1337285041809082, + "learning_rate": 3.8908988251644995e-06, + "loss": 0.8556, + "step": 153220 + }, + { + "epoch": 1.1091808001621462, + "grad_norm": 0.14776895940303802, + "learning_rate": 3.890826438503913e-06, + "loss": 0.8796, + "step": 153230 + }, + { + "epoch": 1.1092531868227322, + "grad_norm": 0.1420595645904541, + "learning_rate": 3.890754051843327e-06, + "loss": 0.8565, + "step": 153240 + }, + { + "epoch": 1.1093255734833185, + "grad_norm": 0.16995348036289215, + "learning_rate": 3.89068166518274e-06, + "loss": 0.8469, + "step": 153250 + }, + { + "epoch": 1.1093979601439048, + "grad_norm": 0.14591839909553528, + "learning_rate": 3.890609278522155e-06, + "loss": 0.8588, + "step": 153260 + }, + { + "epoch": 1.1094703468044909, + "grad_norm": 0.15660324692726135, + "learning_rate": 3.890536891861568e-06, + "loss": 0.8571, + "step": 153270 + }, + { + "epoch": 1.1095427334650771, + "grad_norm": 0.16159191727638245, + "learning_rate": 3.890464505200982e-06, + "loss": 0.8689, + "step": 153280 + }, + { + "epoch": 1.1096151201256632, + "grad_norm": 0.15314868092536926, + "learning_rate": 3.890392118540396e-06, + "loss": 0.8534, + "step": 153290 + }, + { + "epoch": 1.1096875067862495, + "grad_norm": 0.1325388103723526, + "learning_rate": 3.89031973187981e-06, + "loss": 0.8674, + "step": 153300 + }, + { + "epoch": 1.1097598934468356, + "grad_norm": 0.15876524150371552, + "learning_rate": 3.890247345219224e-06, + "loss": 0.859, + "step": 153310 + }, + { + "epoch": 1.1098322801074219, + "grad_norm": 0.14967800676822662, + "learning_rate": 3.890174958558637e-06, + "loss": 0.8601, + "step": 153320 + }, + { + "epoch": 1.109904666768008, + "grad_norm": 0.14991550147533417, + "learning_rate": 3.890102571898051e-06, + "loss": 0.8454, + "step": 153330 + }, + { + "epoch": 1.1099770534285942, + "grad_norm": 0.15167036652565002, + "learning_rate": 3.890030185237465e-06, + "loss": 0.8558, + "step": 153340 + }, + { + "epoch": 1.1100494400891803, + "grad_norm": 0.15944884717464447, + "learning_rate": 3.889957798576879e-06, + "loss": 0.8634, + "step": 153350 + }, + { + "epoch": 1.1101218267497666, + "grad_norm": 0.14728944003582, + "learning_rate": 3.889885411916293e-06, + "loss": 0.8759, + "step": 153360 + }, + { + "epoch": 1.1101942134103528, + "grad_norm": 0.16900379955768585, + "learning_rate": 3.889813025255706e-06, + "loss": 0.8672, + "step": 153370 + }, + { + "epoch": 1.110266600070939, + "grad_norm": 0.14968164265155792, + "learning_rate": 3.88974063859512e-06, + "loss": 0.8545, + "step": 153380 + }, + { + "epoch": 1.1103389867315252, + "grad_norm": 0.16399233043193817, + "learning_rate": 3.8896682519345335e-06, + "loss": 0.864, + "step": 153390 + }, + { + "epoch": 1.1104113733921113, + "grad_norm": 0.1753818243741989, + "learning_rate": 3.889595865273947e-06, + "loss": 0.851, + "step": 153400 + }, + { + "epoch": 1.1104837600526976, + "grad_norm": 0.18223677575588226, + "learning_rate": 3.8895234786133616e-06, + "loss": 0.8468, + "step": 153410 + }, + { + "epoch": 1.1105561467132836, + "grad_norm": 0.1732460856437683, + "learning_rate": 3.889451091952775e-06, + "loss": 0.8565, + "step": 153420 + }, + { + "epoch": 1.11062853337387, + "grad_norm": 0.15468300879001617, + "learning_rate": 3.889378705292189e-06, + "loss": 0.8635, + "step": 153430 + }, + { + "epoch": 1.110700920034456, + "grad_norm": 0.15871159732341766, + "learning_rate": 3.889306318631602e-06, + "loss": 0.8611, + "step": 153440 + }, + { + "epoch": 1.1107733066950423, + "grad_norm": 0.14641880989074707, + "learning_rate": 3.889233931971017e-06, + "loss": 0.8447, + "step": 153450 + }, + { + "epoch": 1.1108456933556283, + "grad_norm": 0.1457509547472, + "learning_rate": 3.8891615453104305e-06, + "loss": 0.8662, + "step": 153460 + }, + { + "epoch": 1.1109180800162146, + "grad_norm": 0.1556580513715744, + "learning_rate": 3.889089158649844e-06, + "loss": 0.8524, + "step": 153470 + }, + { + "epoch": 1.110990466676801, + "grad_norm": 0.14962519705295563, + "learning_rate": 3.889016771989258e-06, + "loss": 0.8592, + "step": 153480 + }, + { + "epoch": 1.111062853337387, + "grad_norm": 0.1524748057126999, + "learning_rate": 3.888944385328672e-06, + "loss": 0.8583, + "step": 153490 + }, + { + "epoch": 1.1111352399979733, + "grad_norm": 0.17120623588562012, + "learning_rate": 3.888871998668086e-06, + "loss": 0.862, + "step": 153500 + }, + { + "epoch": 1.1112076266585593, + "grad_norm": 0.14351938664913177, + "learning_rate": 3.8887996120074994e-06, + "loss": 0.8434, + "step": 153510 + }, + { + "epoch": 1.1112800133191456, + "grad_norm": 0.14822779595851898, + "learning_rate": 3.888727225346913e-06, + "loss": 0.8371, + "step": 153520 + }, + { + "epoch": 1.1113523999797317, + "grad_norm": 0.20761913061141968, + "learning_rate": 3.8886548386863275e-06, + "loss": 0.8579, + "step": 153530 + }, + { + "epoch": 1.111424786640318, + "grad_norm": 0.15165117383003235, + "learning_rate": 3.888582452025741e-06, + "loss": 0.8596, + "step": 153540 + }, + { + "epoch": 1.111497173300904, + "grad_norm": 0.15417353808879852, + "learning_rate": 3.888510065365155e-06, + "loss": 0.8515, + "step": 153550 + }, + { + "epoch": 1.1115695599614903, + "grad_norm": 0.17663225531578064, + "learning_rate": 3.888437678704568e-06, + "loss": 0.8599, + "step": 153560 + }, + { + "epoch": 1.1116419466220764, + "grad_norm": 0.17057162523269653, + "learning_rate": 3.888365292043982e-06, + "loss": 0.8655, + "step": 153570 + }, + { + "epoch": 1.1117143332826627, + "grad_norm": 0.15689362585544586, + "learning_rate": 3.8882929053833964e-06, + "loss": 0.8619, + "step": 153580 + }, + { + "epoch": 1.111786719943249, + "grad_norm": 0.15272989869117737, + "learning_rate": 3.88822051872281e-06, + "loss": 0.8598, + "step": 153590 + }, + { + "epoch": 1.111859106603835, + "grad_norm": 0.15261130034923553, + "learning_rate": 3.888148132062224e-06, + "loss": 0.8659, + "step": 153600 + }, + { + "epoch": 1.1119314932644213, + "grad_norm": 0.15566353499889374, + "learning_rate": 3.888075745401637e-06, + "loss": 0.8488, + "step": 153610 + }, + { + "epoch": 1.1120038799250074, + "grad_norm": 0.155398890376091, + "learning_rate": 3.888003358741052e-06, + "loss": 0.8547, + "step": 153620 + }, + { + "epoch": 1.1120762665855937, + "grad_norm": 0.14617139101028442, + "learning_rate": 3.887930972080465e-06, + "loss": 0.8598, + "step": 153630 + }, + { + "epoch": 1.1121486532461797, + "grad_norm": 0.1497737020254135, + "learning_rate": 3.887858585419879e-06, + "loss": 0.8527, + "step": 153640 + }, + { + "epoch": 1.112221039906766, + "grad_norm": 0.143030047416687, + "learning_rate": 3.887786198759293e-06, + "loss": 0.8613, + "step": 153650 + }, + { + "epoch": 1.112293426567352, + "grad_norm": 0.15191875398159027, + "learning_rate": 3.887713812098707e-06, + "loss": 0.8623, + "step": 153660 + }, + { + "epoch": 1.1123658132279384, + "grad_norm": 0.14382189512252808, + "learning_rate": 3.887641425438121e-06, + "loss": 0.8587, + "step": 153670 + }, + { + "epoch": 1.1124381998885244, + "grad_norm": 0.15472808480262756, + "learning_rate": 3.887569038777534e-06, + "loss": 0.8605, + "step": 153680 + }, + { + "epoch": 1.1125105865491107, + "grad_norm": 0.15003816783428192, + "learning_rate": 3.887496652116948e-06, + "loss": 0.8393, + "step": 153690 + }, + { + "epoch": 1.112582973209697, + "grad_norm": 0.17156195640563965, + "learning_rate": 3.887424265456362e-06, + "loss": 0.8712, + "step": 153700 + }, + { + "epoch": 1.112655359870283, + "grad_norm": 0.13888807594776154, + "learning_rate": 3.887351878795776e-06, + "loss": 0.8557, + "step": 153710 + }, + { + "epoch": 1.1127277465308694, + "grad_norm": 0.13991951942443848, + "learning_rate": 3.88727949213519e-06, + "loss": 0.8459, + "step": 153720 + }, + { + "epoch": 1.1128001331914554, + "grad_norm": 0.16196198761463165, + "learning_rate": 3.887207105474603e-06, + "loss": 0.8508, + "step": 153730 + }, + { + "epoch": 1.1128725198520417, + "grad_norm": 0.14788612723350525, + "learning_rate": 3.887134718814018e-06, + "loss": 0.8425, + "step": 153740 + }, + { + "epoch": 1.1129449065126278, + "grad_norm": 0.1603209227323532, + "learning_rate": 3.887062332153431e-06, + "loss": 0.85, + "step": 153750 + }, + { + "epoch": 1.113017293173214, + "grad_norm": 0.16001610457897186, + "learning_rate": 3.886989945492845e-06, + "loss": 0.842, + "step": 153760 + }, + { + "epoch": 1.1130896798338001, + "grad_norm": 0.18758459389209747, + "learning_rate": 3.8869175588322585e-06, + "loss": 0.8503, + "step": 153770 + }, + { + "epoch": 1.1131620664943864, + "grad_norm": 0.1522257775068283, + "learning_rate": 3.886845172171673e-06, + "loss": 0.8627, + "step": 153780 + }, + { + "epoch": 1.1132344531549725, + "grad_norm": 0.15141525864601135, + "learning_rate": 3.886772785511087e-06, + "loss": 0.8585, + "step": 153790 + }, + { + "epoch": 1.1133068398155588, + "grad_norm": 0.22884730994701385, + "learning_rate": 3.8867003988505e-06, + "loss": 0.8604, + "step": 153800 + }, + { + "epoch": 1.113379226476145, + "grad_norm": 0.15862050652503967, + "learning_rate": 3.886628012189914e-06, + "loss": 0.8575, + "step": 153810 + }, + { + "epoch": 1.1134516131367311, + "grad_norm": 0.17081834375858307, + "learning_rate": 3.886555625529328e-06, + "loss": 0.8704, + "step": 153820 + }, + { + "epoch": 1.1135239997973174, + "grad_norm": 0.14992080628871918, + "learning_rate": 3.886483238868742e-06, + "loss": 0.864, + "step": 153830 + }, + { + "epoch": 1.1135963864579035, + "grad_norm": 0.16578203439712524, + "learning_rate": 3.8864108522081555e-06, + "loss": 0.8579, + "step": 153840 + }, + { + "epoch": 1.1136687731184898, + "grad_norm": 0.1445264369249344, + "learning_rate": 3.886338465547569e-06, + "loss": 0.8607, + "step": 153850 + }, + { + "epoch": 1.1137411597790758, + "grad_norm": 0.20468395948410034, + "learning_rate": 3.886266078886984e-06, + "loss": 0.8534, + "step": 153860 + }, + { + "epoch": 1.1138135464396621, + "grad_norm": 0.15679140388965607, + "learning_rate": 3.886193692226397e-06, + "loss": 0.8537, + "step": 153870 + }, + { + "epoch": 1.1138859331002482, + "grad_norm": 0.1447470486164093, + "learning_rate": 3.886121305565811e-06, + "loss": 0.8484, + "step": 153880 + }, + { + "epoch": 1.1139583197608345, + "grad_norm": 0.15149566531181335, + "learning_rate": 3.8860489189052245e-06, + "loss": 0.8667, + "step": 153890 + }, + { + "epoch": 1.1140307064214205, + "grad_norm": 0.15107187628746033, + "learning_rate": 3.885976532244638e-06, + "loss": 0.8702, + "step": 153900 + }, + { + "epoch": 1.1141030930820068, + "grad_norm": 0.15003450214862823, + "learning_rate": 3.885904145584052e-06, + "loss": 0.8469, + "step": 153910 + }, + { + "epoch": 1.1141754797425931, + "grad_norm": 0.1784355640411377, + "learning_rate": 3.885831758923465e-06, + "loss": 0.8632, + "step": 153920 + }, + { + "epoch": 1.1142478664031792, + "grad_norm": 0.16152457892894745, + "learning_rate": 3.88575937226288e-06, + "loss": 0.8586, + "step": 153930 + }, + { + "epoch": 1.1143202530637655, + "grad_norm": 0.15195733308792114, + "learning_rate": 3.885686985602293e-06, + "loss": 0.863, + "step": 153940 + }, + { + "epoch": 1.1143926397243515, + "grad_norm": 0.14799615740776062, + "learning_rate": 3.885614598941707e-06, + "loss": 0.8429, + "step": 153950 + }, + { + "epoch": 1.1144650263849378, + "grad_norm": 0.15094245970249176, + "learning_rate": 3.885542212281121e-06, + "loss": 0.8488, + "step": 153960 + }, + { + "epoch": 1.114537413045524, + "grad_norm": 0.15210039913654327, + "learning_rate": 3.885469825620535e-06, + "loss": 0.8584, + "step": 153970 + }, + { + "epoch": 1.1146097997061102, + "grad_norm": 0.16268359124660492, + "learning_rate": 3.885397438959949e-06, + "loss": 0.8587, + "step": 153980 + }, + { + "epoch": 1.1146821863666962, + "grad_norm": 0.15050064027309418, + "learning_rate": 3.885325052299362e-06, + "loss": 0.8557, + "step": 153990 + }, + { + "epoch": 1.1147545730272825, + "grad_norm": 0.15507516264915466, + "learning_rate": 3.885252665638776e-06, + "loss": 0.867, + "step": 154000 + }, + { + "epoch": 1.1148269596878686, + "grad_norm": 0.2021208256483078, + "learning_rate": 3.88518027897819e-06, + "loss": 0.8476, + "step": 154010 + }, + { + "epoch": 1.114899346348455, + "grad_norm": 0.15524426102638245, + "learning_rate": 3.885107892317604e-06, + "loss": 0.8567, + "step": 154020 + }, + { + "epoch": 1.1149717330090412, + "grad_norm": 0.14437155425548553, + "learning_rate": 3.885035505657018e-06, + "loss": 0.8575, + "step": 154030 + }, + { + "epoch": 1.1150441196696272, + "grad_norm": 0.15939441323280334, + "learning_rate": 3.884963118996431e-06, + "loss": 0.8591, + "step": 154040 + }, + { + "epoch": 1.1151165063302135, + "grad_norm": 0.15175363421440125, + "learning_rate": 3.884890732335846e-06, + "loss": 0.866, + "step": 154050 + }, + { + "epoch": 1.1151888929907996, + "grad_norm": 0.14421996474266052, + "learning_rate": 3.884818345675259e-06, + "loss": 0.8485, + "step": 154060 + }, + { + "epoch": 1.1152612796513859, + "grad_norm": 0.14754317700862885, + "learning_rate": 3.884745959014673e-06, + "loss": 0.8681, + "step": 154070 + }, + { + "epoch": 1.115333666311972, + "grad_norm": 0.16109803318977356, + "learning_rate": 3.8846735723540866e-06, + "loss": 0.8769, + "step": 154080 + }, + { + "epoch": 1.1154060529725582, + "grad_norm": 0.16321797668933868, + "learning_rate": 3.884601185693501e-06, + "loss": 0.8612, + "step": 154090 + }, + { + "epoch": 1.1154784396331443, + "grad_norm": 0.15551266074180603, + "learning_rate": 3.884528799032915e-06, + "loss": 0.865, + "step": 154100 + }, + { + "epoch": 1.1155508262937306, + "grad_norm": 0.15028153359889984, + "learning_rate": 3.884456412372328e-06, + "loss": 0.8584, + "step": 154110 + }, + { + "epoch": 1.1156232129543169, + "grad_norm": 0.14665335416793823, + "learning_rate": 3.884384025711742e-06, + "loss": 0.8349, + "step": 154120 + }, + { + "epoch": 1.115695599614903, + "grad_norm": 0.1615133285522461, + "learning_rate": 3.884311639051156e-06, + "loss": 0.8697, + "step": 154130 + }, + { + "epoch": 1.1157679862754892, + "grad_norm": 0.14545638859272003, + "learning_rate": 3.88423925239057e-06, + "loss": 0.8582, + "step": 154140 + }, + { + "epoch": 1.1158403729360753, + "grad_norm": 0.18917188048362732, + "learning_rate": 3.8841668657299836e-06, + "loss": 0.8593, + "step": 154150 + }, + { + "epoch": 1.1159127595966616, + "grad_norm": 0.1670539379119873, + "learning_rate": 3.884094479069397e-06, + "loss": 0.8695, + "step": 154160 + }, + { + "epoch": 1.1159851462572477, + "grad_norm": 0.1593600958585739, + "learning_rate": 3.884022092408812e-06, + "loss": 0.8738, + "step": 154170 + }, + { + "epoch": 1.116057532917834, + "grad_norm": 0.15494653582572937, + "learning_rate": 3.883949705748225e-06, + "loss": 0.8465, + "step": 154180 + }, + { + "epoch": 1.11612991957842, + "grad_norm": 0.15124863386154175, + "learning_rate": 3.883877319087639e-06, + "loss": 0.8538, + "step": 154190 + }, + { + "epoch": 1.1162023062390063, + "grad_norm": 0.1503598988056183, + "learning_rate": 3.8838049324270525e-06, + "loss": 0.8617, + "step": 154200 + }, + { + "epoch": 1.1162746928995926, + "grad_norm": 0.1482527107000351, + "learning_rate": 3.883732545766466e-06, + "loss": 0.8542, + "step": 154210 + }, + { + "epoch": 1.1163470795601786, + "grad_norm": 0.1634032428264618, + "learning_rate": 3.883660159105881e-06, + "loss": 0.8605, + "step": 154220 + }, + { + "epoch": 1.116419466220765, + "grad_norm": 0.14771081507205963, + "learning_rate": 3.883587772445294e-06, + "loss": 0.8564, + "step": 154230 + }, + { + "epoch": 1.116491852881351, + "grad_norm": 0.16932451725006104, + "learning_rate": 3.883515385784708e-06, + "loss": 0.8578, + "step": 154240 + }, + { + "epoch": 1.1165642395419373, + "grad_norm": 0.1720747947692871, + "learning_rate": 3.8834429991241214e-06, + "loss": 0.8435, + "step": 154250 + }, + { + "epoch": 1.1166366262025234, + "grad_norm": 0.15350791811943054, + "learning_rate": 3.883370612463536e-06, + "loss": 0.8628, + "step": 154260 + }, + { + "epoch": 1.1167090128631096, + "grad_norm": 0.162908136844635, + "learning_rate": 3.8832982258029495e-06, + "loss": 0.8472, + "step": 154270 + }, + { + "epoch": 1.1167813995236957, + "grad_norm": 0.1558998078107834, + "learning_rate": 3.883225839142363e-06, + "loss": 0.8482, + "step": 154280 + }, + { + "epoch": 1.116853786184282, + "grad_norm": 0.15881863236427307, + "learning_rate": 3.883153452481777e-06, + "loss": 0.8572, + "step": 154290 + }, + { + "epoch": 1.116926172844868, + "grad_norm": 0.15854284167289734, + "learning_rate": 3.883081065821191e-06, + "loss": 0.873, + "step": 154300 + }, + { + "epoch": 1.1169985595054543, + "grad_norm": 0.29879847168922424, + "learning_rate": 3.883008679160605e-06, + "loss": 0.8582, + "step": 154310 + }, + { + "epoch": 1.1170709461660406, + "grad_norm": 0.15802979469299316, + "learning_rate": 3.8829362925000184e-06, + "loss": 0.8642, + "step": 154320 + }, + { + "epoch": 1.1171433328266267, + "grad_norm": 0.1594230681657791, + "learning_rate": 3.882863905839432e-06, + "loss": 0.8585, + "step": 154330 + }, + { + "epoch": 1.117215719487213, + "grad_norm": 0.1390036940574646, + "learning_rate": 3.8827915191788465e-06, + "loss": 0.8615, + "step": 154340 + }, + { + "epoch": 1.117288106147799, + "grad_norm": 0.1467415988445282, + "learning_rate": 3.88271913251826e-06, + "loss": 0.8429, + "step": 154350 + }, + { + "epoch": 1.1173604928083853, + "grad_norm": 0.1545180082321167, + "learning_rate": 3.882646745857674e-06, + "loss": 0.857, + "step": 154360 + }, + { + "epoch": 1.1174328794689714, + "grad_norm": 0.1605067402124405, + "learning_rate": 3.882574359197087e-06, + "loss": 0.8628, + "step": 154370 + }, + { + "epoch": 1.1175052661295577, + "grad_norm": 0.1610914170742035, + "learning_rate": 3.882501972536502e-06, + "loss": 0.8567, + "step": 154380 + }, + { + "epoch": 1.1175776527901438, + "grad_norm": 0.19544139504432678, + "learning_rate": 3.8824295858759154e-06, + "loss": 0.851, + "step": 154390 + }, + { + "epoch": 1.11765003945073, + "grad_norm": 0.16312572360038757, + "learning_rate": 3.882357199215329e-06, + "loss": 0.8662, + "step": 154400 + }, + { + "epoch": 1.1177224261113161, + "grad_norm": 0.15379171073436737, + "learning_rate": 3.882284812554743e-06, + "loss": 0.8593, + "step": 154410 + }, + { + "epoch": 1.1177948127719024, + "grad_norm": 0.1567648947238922, + "learning_rate": 3.882212425894157e-06, + "loss": 0.8636, + "step": 154420 + }, + { + "epoch": 1.1178671994324887, + "grad_norm": 0.1432834267616272, + "learning_rate": 3.88214003923357e-06, + "loss": 0.8443, + "step": 154430 + }, + { + "epoch": 1.1179395860930748, + "grad_norm": 0.1487468183040619, + "learning_rate": 3.8820676525729835e-06, + "loss": 0.8509, + "step": 154440 + }, + { + "epoch": 1.118011972753661, + "grad_norm": 0.14194174110889435, + "learning_rate": 3.881995265912398e-06, + "loss": 0.8439, + "step": 154450 + }, + { + "epoch": 1.118084359414247, + "grad_norm": 0.15703342854976654, + "learning_rate": 3.881922879251812e-06, + "loss": 0.8675, + "step": 154460 + }, + { + "epoch": 1.1181567460748334, + "grad_norm": 0.15480118989944458, + "learning_rate": 3.881850492591225e-06, + "loss": 0.8605, + "step": 154470 + }, + { + "epoch": 1.1182291327354195, + "grad_norm": 0.14932669699192047, + "learning_rate": 3.881778105930639e-06, + "loss": 0.8424, + "step": 154480 + }, + { + "epoch": 1.1183015193960057, + "grad_norm": 0.16219106316566467, + "learning_rate": 3.881705719270053e-06, + "loss": 0.8539, + "step": 154490 + }, + { + "epoch": 1.1183739060565918, + "grad_norm": 0.1499941051006317, + "learning_rate": 3.881633332609467e-06, + "loss": 0.8615, + "step": 154500 + }, + { + "epoch": 1.118446292717178, + "grad_norm": 0.16468805074691772, + "learning_rate": 3.8815609459488805e-06, + "loss": 0.8536, + "step": 154510 + }, + { + "epoch": 1.1185186793777642, + "grad_norm": 0.1566213071346283, + "learning_rate": 3.881488559288294e-06, + "loss": 0.8603, + "step": 154520 + }, + { + "epoch": 1.1185910660383505, + "grad_norm": 0.14781454205513, + "learning_rate": 3.881416172627709e-06, + "loss": 0.8494, + "step": 154530 + }, + { + "epoch": 1.1186634526989367, + "grad_norm": 0.1491243541240692, + "learning_rate": 3.881343785967122e-06, + "loss": 0.8571, + "step": 154540 + }, + { + "epoch": 1.1187358393595228, + "grad_norm": 0.15416626632213593, + "learning_rate": 3.881271399306536e-06, + "loss": 0.8502, + "step": 154550 + }, + { + "epoch": 1.118808226020109, + "grad_norm": 0.1574944406747818, + "learning_rate": 3.8811990126459495e-06, + "loss": 0.8377, + "step": 154560 + }, + { + "epoch": 1.1188806126806952, + "grad_norm": 0.15383422374725342, + "learning_rate": 3.881126625985364e-06, + "loss": 0.8587, + "step": 154570 + }, + { + "epoch": 1.1189529993412815, + "grad_norm": 0.15116389095783234, + "learning_rate": 3.8810542393247775e-06, + "loss": 0.8461, + "step": 154580 + }, + { + "epoch": 1.1190253860018675, + "grad_norm": 0.15596124529838562, + "learning_rate": 3.880981852664191e-06, + "loss": 0.8434, + "step": 154590 + }, + { + "epoch": 1.1190977726624538, + "grad_norm": 0.15235568583011627, + "learning_rate": 3.880909466003605e-06, + "loss": 0.8597, + "step": 154600 + }, + { + "epoch": 1.1191701593230399, + "grad_norm": 0.1518736332654953, + "learning_rate": 3.880837079343019e-06, + "loss": 0.8583, + "step": 154610 + }, + { + "epoch": 1.1192425459836262, + "grad_norm": 0.14196760952472687, + "learning_rate": 3.880764692682433e-06, + "loss": 0.8663, + "step": 154620 + }, + { + "epoch": 1.1193149326442122, + "grad_norm": 0.15060287714004517, + "learning_rate": 3.8806923060218465e-06, + "loss": 0.8448, + "step": 154630 + }, + { + "epoch": 1.1193873193047985, + "grad_norm": 0.20425279438495636, + "learning_rate": 3.88061991936126e-06, + "loss": 0.8628, + "step": 154640 + }, + { + "epoch": 1.1194597059653848, + "grad_norm": 0.1478336602449417, + "learning_rate": 3.8805475327006745e-06, + "loss": 0.8626, + "step": 154650 + }, + { + "epoch": 1.1195320926259709, + "grad_norm": 0.14704637229442596, + "learning_rate": 3.880475146040088e-06, + "loss": 0.8552, + "step": 154660 + }, + { + "epoch": 1.1196044792865572, + "grad_norm": 0.2208547443151474, + "learning_rate": 3.880402759379502e-06, + "loss": 0.8501, + "step": 154670 + }, + { + "epoch": 1.1196768659471432, + "grad_norm": 0.15612949430942535, + "learning_rate": 3.880330372718915e-06, + "loss": 0.8516, + "step": 154680 + }, + { + "epoch": 1.1197492526077295, + "grad_norm": 0.15186789631843567, + "learning_rate": 3.88025798605833e-06, + "loss": 0.8542, + "step": 154690 + }, + { + "epoch": 1.1198216392683156, + "grad_norm": 0.16197703778743744, + "learning_rate": 3.8801855993977435e-06, + "loss": 0.8628, + "step": 154700 + }, + { + "epoch": 1.1198940259289019, + "grad_norm": 0.1460353136062622, + "learning_rate": 3.880113212737157e-06, + "loss": 0.871, + "step": 154710 + }, + { + "epoch": 1.119966412589488, + "grad_norm": 0.1506577581167221, + "learning_rate": 3.880040826076571e-06, + "loss": 0.8598, + "step": 154720 + }, + { + "epoch": 1.1200387992500742, + "grad_norm": 0.16504830121994019, + "learning_rate": 3.879968439415985e-06, + "loss": 0.8634, + "step": 154730 + }, + { + "epoch": 1.1201111859106603, + "grad_norm": 0.15591880679130554, + "learning_rate": 3.879896052755399e-06, + "loss": 0.8572, + "step": 154740 + }, + { + "epoch": 1.1201835725712466, + "grad_norm": 0.14666037261486053, + "learning_rate": 3.879823666094812e-06, + "loss": 0.8661, + "step": 154750 + }, + { + "epoch": 1.1202559592318329, + "grad_norm": 0.14609721302986145, + "learning_rate": 3.879751279434226e-06, + "loss": 0.8442, + "step": 154760 + }, + { + "epoch": 1.120328345892419, + "grad_norm": 0.15813161432743073, + "learning_rate": 3.8796788927736405e-06, + "loss": 0.8426, + "step": 154770 + }, + { + "epoch": 1.1204007325530052, + "grad_norm": 0.15691469609737396, + "learning_rate": 3.879606506113054e-06, + "loss": 0.8623, + "step": 154780 + }, + { + "epoch": 1.1204731192135913, + "grad_norm": 0.14903950691223145, + "learning_rate": 3.879534119452468e-06, + "loss": 0.8414, + "step": 154790 + }, + { + "epoch": 1.1205455058741776, + "grad_norm": 0.1798841953277588, + "learning_rate": 3.879461732791881e-06, + "loss": 0.8486, + "step": 154800 + }, + { + "epoch": 1.1206178925347636, + "grad_norm": 0.1467403918504715, + "learning_rate": 3.879389346131295e-06, + "loss": 0.8564, + "step": 154810 + }, + { + "epoch": 1.12069027919535, + "grad_norm": 0.14768868684768677, + "learning_rate": 3.879316959470709e-06, + "loss": 0.8532, + "step": 154820 + }, + { + "epoch": 1.120762665855936, + "grad_norm": 0.15052692592144012, + "learning_rate": 3.879244572810123e-06, + "loss": 0.8576, + "step": 154830 + }, + { + "epoch": 1.1208350525165223, + "grad_norm": 0.15779510140419006, + "learning_rate": 3.879172186149537e-06, + "loss": 0.8538, + "step": 154840 + }, + { + "epoch": 1.1209074391771083, + "grad_norm": 0.16950878500938416, + "learning_rate": 3.87909979948895e-06, + "loss": 0.852, + "step": 154850 + }, + { + "epoch": 1.1209798258376946, + "grad_norm": 0.16230851411819458, + "learning_rate": 3.879027412828365e-06, + "loss": 0.8514, + "step": 154860 + }, + { + "epoch": 1.121052212498281, + "grad_norm": 0.16090090572834015, + "learning_rate": 3.878955026167778e-06, + "loss": 0.8537, + "step": 154870 + }, + { + "epoch": 1.121124599158867, + "grad_norm": 0.15824168920516968, + "learning_rate": 3.878882639507192e-06, + "loss": 0.8446, + "step": 154880 + }, + { + "epoch": 1.1211969858194533, + "grad_norm": 0.145647332072258, + "learning_rate": 3.8788102528466056e-06, + "loss": 0.854, + "step": 154890 + }, + { + "epoch": 1.1212693724800393, + "grad_norm": 0.15013916790485382, + "learning_rate": 3.87873786618602e-06, + "loss": 0.8671, + "step": 154900 + }, + { + "epoch": 1.1213417591406256, + "grad_norm": 0.16897442936897278, + "learning_rate": 3.878665479525434e-06, + "loss": 0.8543, + "step": 154910 + }, + { + "epoch": 1.1214141458012117, + "grad_norm": 0.14996156096458435, + "learning_rate": 3.878593092864847e-06, + "loss": 0.8535, + "step": 154920 + }, + { + "epoch": 1.121486532461798, + "grad_norm": 0.16847410798072815, + "learning_rate": 3.878520706204261e-06, + "loss": 0.8508, + "step": 154930 + }, + { + "epoch": 1.121558919122384, + "grad_norm": 0.1546623855829239, + "learning_rate": 3.878448319543675e-06, + "loss": 0.8584, + "step": 154940 + }, + { + "epoch": 1.1216313057829703, + "grad_norm": 0.1652345508337021, + "learning_rate": 3.878375932883089e-06, + "loss": 0.8633, + "step": 154950 + }, + { + "epoch": 1.1217036924435564, + "grad_norm": 0.20880626142024994, + "learning_rate": 3.878303546222503e-06, + "loss": 0.8632, + "step": 154960 + }, + { + "epoch": 1.1217760791041427, + "grad_norm": 0.16059084236621857, + "learning_rate": 3.878231159561916e-06, + "loss": 0.8494, + "step": 154970 + }, + { + "epoch": 1.121848465764729, + "grad_norm": 0.15037423372268677, + "learning_rate": 3.87815877290133e-06, + "loss": 0.8516, + "step": 154980 + }, + { + "epoch": 1.121920852425315, + "grad_norm": 0.14622123539447784, + "learning_rate": 3.8780863862407434e-06, + "loss": 0.8621, + "step": 154990 + }, + { + "epoch": 1.1219932390859013, + "grad_norm": 0.1608671396970749, + "learning_rate": 3.878013999580157e-06, + "loss": 0.8603, + "step": 155000 + } + ], + "logging_steps": 10, + "max_steps": 690735, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 5000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.131881219057621e+19, + "train_batch_size": 80, + "trial_name": null, + "trial_params": null +}