{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 61000, "global_step": 130310, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00015348016268897246, "grad_norm": 8.691984176635742, "learning_rate": 1.5348016268897248e-08, "loss": 0.5718, "step": 10 }, { "epoch": 0.0003069603253779449, "grad_norm": 6.651424407958984, "learning_rate": 3.0696032537794495e-08, "loss": 0.5378, "step": 20 }, { "epoch": 0.00046044048806691734, "grad_norm": 6.30117654800415, "learning_rate": 4.604404880669174e-08, "loss": 0.5234, "step": 30 }, { "epoch": 0.0006139206507558898, "grad_norm": 7.479533672332764, "learning_rate": 6.139206507558899e-08, "loss": 0.5421, "step": 40 }, { "epoch": 0.0007674008134448623, "grad_norm": 7.3245086669921875, "learning_rate": 7.674008134448624e-08, "loss": 0.5615, "step": 50 }, { "epoch": 0.0009208809761338347, "grad_norm": 6.031955242156982, "learning_rate": 9.208809761338348e-08, "loss": 0.5298, "step": 60 }, { "epoch": 0.0010743611388228072, "grad_norm": 5.596158981323242, "learning_rate": 1.0743611388228072e-07, "loss": 0.5217, "step": 70 }, { "epoch": 0.0012278413015117797, "grad_norm": 6.613324165344238, "learning_rate": 1.2278413015117798e-07, "loss": 0.5532, "step": 80 }, { "epoch": 0.001381321464200752, "grad_norm": 6.804640293121338, "learning_rate": 1.3813214642007523e-07, "loss": 0.5404, "step": 90 }, { "epoch": 0.0015348016268897245, "grad_norm": 7.249266624450684, "learning_rate": 1.5348016268897247e-07, "loss": 0.5468, "step": 100 }, { "epoch": 0.001688281789578697, "grad_norm": 5.58872127532959, "learning_rate": 1.6882817895786971e-07, "loss": 0.5253, "step": 110 }, { "epoch": 0.0018417619522676694, "grad_norm": 4.902602672576904, "learning_rate": 1.8417619522676696e-07, "loss": 0.4629, "step": 120 }, { "epoch": 0.001995242114956642, "grad_norm": 4.893105506896973, "learning_rate": 1.995242114956642e-07, "loss": 0.4629, "step": 130 }, { "epoch": 0.0021487222776456144, "grad_norm": 4.370516300201416, "learning_rate": 2.1487222776456145e-07, "loss": 0.4661, "step": 140 }, { "epoch": 0.0023022024403345867, "grad_norm": 2.8548529148101807, "learning_rate": 2.3022024403345872e-07, "loss": 0.4225, "step": 150 }, { "epoch": 0.0024556826030235593, "grad_norm": 2.5361413955688477, "learning_rate": 2.4556826030235596e-07, "loss": 0.4024, "step": 160 }, { "epoch": 0.0026091627657125315, "grad_norm": 2.4152297973632812, "learning_rate": 2.609162765712532e-07, "loss": 0.41, "step": 170 }, { "epoch": 0.002762642928401504, "grad_norm": 2.253089666366577, "learning_rate": 2.7626429284015045e-07, "loss": 0.3881, "step": 180 }, { "epoch": 0.0029161230910904764, "grad_norm": 2.060492515563965, "learning_rate": 2.9161230910904767e-07, "loss": 0.3622, "step": 190 }, { "epoch": 0.003069603253779449, "grad_norm": 1.9946439266204834, "learning_rate": 3.0696032537794494e-07, "loss": 0.3506, "step": 200 }, { "epoch": 0.0032230834164684217, "grad_norm": 1.982726812362671, "learning_rate": 3.223083416468422e-07, "loss": 0.3067, "step": 210 }, { "epoch": 0.003376563579157394, "grad_norm": 1.8291927576065063, "learning_rate": 3.3765635791573943e-07, "loss": 0.3295, "step": 220 }, { "epoch": 0.0035300437418463665, "grad_norm": 1.590438961982727, "learning_rate": 3.5300437418463665e-07, "loss": 0.2949, "step": 230 }, { "epoch": 0.0036835239045353387, "grad_norm": 1.4990016222000122, "learning_rate": 3.683523904535339e-07, "loss": 0.3185, "step": 240 }, { "epoch": 0.0038370040672243114, "grad_norm": 1.797933578491211, "learning_rate": 3.8370040672243114e-07, "loss": 0.2722, "step": 250 }, { "epoch": 0.003990484229913284, "grad_norm": 1.8688817024230957, "learning_rate": 3.990484229913284e-07, "loss": 0.267, "step": 260 }, { "epoch": 0.004143964392602256, "grad_norm": 1.681073784828186, "learning_rate": 4.143964392602257e-07, "loss": 0.2654, "step": 270 }, { "epoch": 0.004297444555291229, "grad_norm": 1.691127061843872, "learning_rate": 4.297444555291229e-07, "loss": 0.2655, "step": 280 }, { "epoch": 0.004450924717980201, "grad_norm": 1.666329026222229, "learning_rate": 4.450924717980201e-07, "loss": 0.2797, "step": 290 }, { "epoch": 0.004604404880669173, "grad_norm": 1.4390430450439453, "learning_rate": 4.6044048806691744e-07, "loss": 0.2486, "step": 300 }, { "epoch": 0.004757885043358146, "grad_norm": 1.6849851608276367, "learning_rate": 4.7578850433581466e-07, "loss": 0.2836, "step": 310 }, { "epoch": 0.004911365206047119, "grad_norm": 1.5425904989242554, "learning_rate": 4.911365206047119e-07, "loss": 0.2541, "step": 320 }, { "epoch": 0.005064845368736091, "grad_norm": 1.6779205799102783, "learning_rate": 5.064845368736091e-07, "loss": 0.2345, "step": 330 }, { "epoch": 0.005218325531425063, "grad_norm": 1.7978744506835938, "learning_rate": 5.218325531425064e-07, "loss": 0.2341, "step": 340 }, { "epoch": 0.005371805694114036, "grad_norm": 1.5622626543045044, "learning_rate": 5.371805694114036e-07, "loss": 0.2241, "step": 350 }, { "epoch": 0.005525285856803008, "grad_norm": 1.7138158082962036, "learning_rate": 5.525285856803009e-07, "loss": 0.237, "step": 360 }, { "epoch": 0.005678766019491981, "grad_norm": 1.7302638292312622, "learning_rate": 5.678766019491981e-07, "loss": 0.2319, "step": 370 }, { "epoch": 0.005832246182180953, "grad_norm": 1.6608039140701294, "learning_rate": 5.832246182180953e-07, "loss": 0.2413, "step": 380 }, { "epoch": 0.005985726344869925, "grad_norm": 1.7030130624771118, "learning_rate": 5.985726344869926e-07, "loss": 0.2197, "step": 390 }, { "epoch": 0.006139206507558898, "grad_norm": 1.6259022951126099, "learning_rate": 6.139206507558899e-07, "loss": 0.2347, "step": 400 }, { "epoch": 0.006292686670247871, "grad_norm": 1.690609097480774, "learning_rate": 6.29268667024787e-07, "loss": 0.2252, "step": 410 }, { "epoch": 0.006446166832936843, "grad_norm": 1.916849970817566, "learning_rate": 6.446166832936844e-07, "loss": 0.2116, "step": 420 }, { "epoch": 0.006599646995625815, "grad_norm": 1.6760339736938477, "learning_rate": 6.599646995625816e-07, "loss": 0.247, "step": 430 }, { "epoch": 0.006753127158314788, "grad_norm": 1.7676575183868408, "learning_rate": 6.753127158314789e-07, "loss": 0.1854, "step": 440 }, { "epoch": 0.00690660732100376, "grad_norm": 1.5871663093566895, "learning_rate": 6.906607321003761e-07, "loss": 0.2212, "step": 450 }, { "epoch": 0.007060087483692733, "grad_norm": 1.826324462890625, "learning_rate": 7.060087483692733e-07, "loss": 0.1998, "step": 460 }, { "epoch": 0.007213567646381705, "grad_norm": 1.9471397399902344, "learning_rate": 7.213567646381706e-07, "loss": 0.2004, "step": 470 }, { "epoch": 0.0073670478090706775, "grad_norm": 1.7790535688400269, "learning_rate": 7.367047809070678e-07, "loss": 0.2063, "step": 480 }, { "epoch": 0.00752052797175965, "grad_norm": 1.7089639902114868, "learning_rate": 7.52052797175965e-07, "loss": 0.223, "step": 490 }, { "epoch": 0.007674008134448623, "grad_norm": 1.6562690734863281, "learning_rate": 7.674008134448623e-07, "loss": 0.1986, "step": 500 }, { "epoch": 0.007827488297137595, "grad_norm": 1.7514325380325317, "learning_rate": 7.827488297137597e-07, "loss": 0.196, "step": 510 }, { "epoch": 0.007980968459826567, "grad_norm": 1.8707454204559326, "learning_rate": 7.980968459826568e-07, "loss": 0.1848, "step": 520 }, { "epoch": 0.00813444862251554, "grad_norm": 1.8045165538787842, "learning_rate": 8.134448622515541e-07, "loss": 0.2093, "step": 530 }, { "epoch": 0.008287928785204512, "grad_norm": 1.84476637840271, "learning_rate": 8.287928785204514e-07, "loss": 0.2005, "step": 540 }, { "epoch": 0.008441408947893485, "grad_norm": 1.929309368133545, "learning_rate": 8.441408947893485e-07, "loss": 0.2242, "step": 550 }, { "epoch": 0.008594889110582458, "grad_norm": 1.5836937427520752, "learning_rate": 8.594889110582458e-07, "loss": 0.1896, "step": 560 }, { "epoch": 0.00874836927327143, "grad_norm": 1.470706582069397, "learning_rate": 8.74836927327143e-07, "loss": 0.1965, "step": 570 }, { "epoch": 0.008901849435960401, "grad_norm": 1.9242572784423828, "learning_rate": 8.901849435960402e-07, "loss": 0.1979, "step": 580 }, { "epoch": 0.009055329598649374, "grad_norm": 1.6284960508346558, "learning_rate": 9.055329598649375e-07, "loss": 0.1853, "step": 590 }, { "epoch": 0.009208809761338347, "grad_norm": 2.0550427436828613, "learning_rate": 9.208809761338349e-07, "loss": 0.198, "step": 600 }, { "epoch": 0.00936228992402732, "grad_norm": 1.747909426689148, "learning_rate": 9.36228992402732e-07, "loss": 0.1914, "step": 610 }, { "epoch": 0.009515770086716292, "grad_norm": 1.7831988334655762, "learning_rate": 9.515770086716293e-07, "loss": 0.1987, "step": 620 }, { "epoch": 0.009669250249405265, "grad_norm": 2.271171808242798, "learning_rate": 9.669250249405265e-07, "loss": 0.2113, "step": 630 }, { "epoch": 0.009822730412094237, "grad_norm": 1.9148186445236206, "learning_rate": 9.822730412094239e-07, "loss": 0.1972, "step": 640 }, { "epoch": 0.00997621057478321, "grad_norm": 1.7907600402832031, "learning_rate": 9.97621057478321e-07, "loss": 0.1844, "step": 650 }, { "epoch": 0.010129690737472183, "grad_norm": 1.747023105621338, "learning_rate": 1.0129690737472182e-06, "loss": 0.1739, "step": 660 }, { "epoch": 0.010283170900161153, "grad_norm": 1.9749536514282227, "learning_rate": 1.0283170900161156e-06, "loss": 0.1919, "step": 670 }, { "epoch": 0.010436651062850126, "grad_norm": 1.6480817794799805, "learning_rate": 1.0436651062850127e-06, "loss": 0.1822, "step": 680 }, { "epoch": 0.010590131225539099, "grad_norm": 1.958425760269165, "learning_rate": 1.05901312255391e-06, "loss": 0.1605, "step": 690 }, { "epoch": 0.010743611388228071, "grad_norm": 1.9732699394226074, "learning_rate": 1.0743611388228073e-06, "loss": 0.1771, "step": 700 }, { "epoch": 0.010897091550917044, "grad_norm": 1.804375410079956, "learning_rate": 1.0897091550917044e-06, "loss": 0.1859, "step": 710 }, { "epoch": 0.011050571713606017, "grad_norm": 1.890048861503601, "learning_rate": 1.1050571713606018e-06, "loss": 0.1554, "step": 720 }, { "epoch": 0.01120405187629499, "grad_norm": 1.7689001560211182, "learning_rate": 1.120405187629499e-06, "loss": 0.177, "step": 730 }, { "epoch": 0.011357532038983962, "grad_norm": 1.967545747756958, "learning_rate": 1.1357532038983961e-06, "loss": 0.1926, "step": 740 }, { "epoch": 0.011511012201672935, "grad_norm": 1.9637516736984253, "learning_rate": 1.1511012201672935e-06, "loss": 0.1734, "step": 750 }, { "epoch": 0.011664492364361905, "grad_norm": 1.8092924356460571, "learning_rate": 1.1664492364361907e-06, "loss": 0.182, "step": 760 }, { "epoch": 0.011817972527050878, "grad_norm": 1.9382688999176025, "learning_rate": 1.1817972527050878e-06, "loss": 0.1578, "step": 770 }, { "epoch": 0.01197145268973985, "grad_norm": 1.9681086540222168, "learning_rate": 1.1971452689739852e-06, "loss": 0.1692, "step": 780 }, { "epoch": 0.012124932852428823, "grad_norm": 2.1349213123321533, "learning_rate": 1.2124932852428826e-06, "loss": 0.162, "step": 790 }, { "epoch": 0.012278413015117796, "grad_norm": 1.7333012819290161, "learning_rate": 1.2278413015117798e-06, "loss": 0.1799, "step": 800 }, { "epoch": 0.012431893177806769, "grad_norm": 2.147516965866089, "learning_rate": 1.243189317780677e-06, "loss": 0.1355, "step": 810 }, { "epoch": 0.012585373340495741, "grad_norm": 1.4284454584121704, "learning_rate": 1.258537334049574e-06, "loss": 0.1659, "step": 820 }, { "epoch": 0.012738853503184714, "grad_norm": 2.2094242572784424, "learning_rate": 1.2738853503184715e-06, "loss": 0.1756, "step": 830 }, { "epoch": 0.012892333665873687, "grad_norm": 1.4826165437698364, "learning_rate": 1.2892333665873688e-06, "loss": 0.1676, "step": 840 }, { "epoch": 0.013045813828562658, "grad_norm": 1.8731261491775513, "learning_rate": 1.3045813828562658e-06, "loss": 0.1797, "step": 850 }, { "epoch": 0.01319929399125163, "grad_norm": 1.8527837991714478, "learning_rate": 1.3199293991251632e-06, "loss": 0.1516, "step": 860 }, { "epoch": 0.013352774153940603, "grad_norm": 1.8093218803405762, "learning_rate": 1.3352774153940603e-06, "loss": 0.1745, "step": 870 }, { "epoch": 0.013506254316629576, "grad_norm": 2.1442675590515137, "learning_rate": 1.3506254316629577e-06, "loss": 0.1426, "step": 880 }, { "epoch": 0.013659734479318548, "grad_norm": 1.770102620124817, "learning_rate": 1.3659734479318549e-06, "loss": 0.1625, "step": 890 }, { "epoch": 0.01381321464200752, "grad_norm": 1.7199360132217407, "learning_rate": 1.3813214642007523e-06, "loss": 0.1656, "step": 900 }, { "epoch": 0.013966694804696493, "grad_norm": 1.8848552703857422, "learning_rate": 1.3966694804696492e-06, "loss": 0.1499, "step": 910 }, { "epoch": 0.014120174967385466, "grad_norm": 1.6305171251296997, "learning_rate": 1.4120174967385466e-06, "loss": 0.1481, "step": 920 }, { "epoch": 0.014273655130074439, "grad_norm": 1.774765968322754, "learning_rate": 1.427365513007444e-06, "loss": 0.1534, "step": 930 }, { "epoch": 0.01442713529276341, "grad_norm": 1.583726167678833, "learning_rate": 1.4427135292763411e-06, "loss": 0.1517, "step": 940 }, { "epoch": 0.014580615455452382, "grad_norm": 2.141779661178589, "learning_rate": 1.4580615455452385e-06, "loss": 0.1729, "step": 950 }, { "epoch": 0.014734095618141355, "grad_norm": 1.6508026123046875, "learning_rate": 1.4734095618141357e-06, "loss": 0.1447, "step": 960 }, { "epoch": 0.014887575780830328, "grad_norm": 2.000558614730835, "learning_rate": 1.488757578083033e-06, "loss": 0.1449, "step": 970 }, { "epoch": 0.0150410559435193, "grad_norm": 1.7961822748184204, "learning_rate": 1.50410559435193e-06, "loss": 0.1676, "step": 980 }, { "epoch": 0.015194536106208273, "grad_norm": 1.557207703590393, "learning_rate": 1.5194536106208274e-06, "loss": 0.1519, "step": 990 }, { "epoch": 0.015348016268897246, "grad_norm": 2.0625758171081543, "learning_rate": 1.5348016268897245e-06, "loss": 0.1778, "step": 1000 }, { "epoch": 0.015501496431586218, "grad_norm": 1.9931068420410156, "learning_rate": 1.550149643158622e-06, "loss": 0.157, "step": 1010 }, { "epoch": 0.01565497659427519, "grad_norm": 1.8583245277404785, "learning_rate": 1.5654976594275193e-06, "loss": 0.18, "step": 1020 }, { "epoch": 0.015808456756964163, "grad_norm": 2.0031728744506836, "learning_rate": 1.5808456756964163e-06, "loss": 0.1532, "step": 1030 }, { "epoch": 0.015961936919653134, "grad_norm": 1.7851054668426514, "learning_rate": 1.5961936919653136e-06, "loss": 0.1604, "step": 1040 }, { "epoch": 0.01611541708234211, "grad_norm": 1.886083722114563, "learning_rate": 1.6115417082342108e-06, "loss": 0.1681, "step": 1050 }, { "epoch": 0.01626889724503108, "grad_norm": 1.931001901626587, "learning_rate": 1.6268897245031082e-06, "loss": 0.1517, "step": 1060 }, { "epoch": 0.01642237740772005, "grad_norm": 1.739009976387024, "learning_rate": 1.6422377407720053e-06, "loss": 0.1636, "step": 1070 }, { "epoch": 0.016575857570409025, "grad_norm": 1.76314115524292, "learning_rate": 1.6575857570409027e-06, "loss": 0.156, "step": 1080 }, { "epoch": 0.016729337733097996, "grad_norm": 1.7624787092208862, "learning_rate": 1.6729337733097997e-06, "loss": 0.1494, "step": 1090 }, { "epoch": 0.01688281789578697, "grad_norm": 1.8607791662216187, "learning_rate": 1.688281789578697e-06, "loss": 0.1697, "step": 1100 }, { "epoch": 0.01703629805847594, "grad_norm": 1.698325753211975, "learning_rate": 1.7036298058475944e-06, "loss": 0.1342, "step": 1110 }, { "epoch": 0.017189778221164916, "grad_norm": 1.841417908668518, "learning_rate": 1.7189778221164916e-06, "loss": 0.1215, "step": 1120 }, { "epoch": 0.017343258383853886, "grad_norm": 1.5874903202056885, "learning_rate": 1.734325838385389e-06, "loss": 0.1317, "step": 1130 }, { "epoch": 0.01749673854654286, "grad_norm": 1.558768630027771, "learning_rate": 1.749673854654286e-06, "loss": 0.1796, "step": 1140 }, { "epoch": 0.017650218709231832, "grad_norm": 1.692504644393921, "learning_rate": 1.7650218709231833e-06, "loss": 0.1371, "step": 1150 }, { "epoch": 0.017803698871920803, "grad_norm": 1.7285799980163574, "learning_rate": 1.7803698871920805e-06, "loss": 0.1778, "step": 1160 }, { "epoch": 0.017957179034609777, "grad_norm": 1.8717280626296997, "learning_rate": 1.7957179034609778e-06, "loss": 0.1366, "step": 1170 }, { "epoch": 0.018110659197298748, "grad_norm": 1.7939692735671997, "learning_rate": 1.811065919729875e-06, "loss": 0.1526, "step": 1180 }, { "epoch": 0.018264139359987722, "grad_norm": 2.048234224319458, "learning_rate": 1.8264139359987724e-06, "loss": 0.1356, "step": 1190 }, { "epoch": 0.018417619522676693, "grad_norm": 1.7364144325256348, "learning_rate": 1.8417619522676698e-06, "loss": 0.1434, "step": 1200 }, { "epoch": 0.018571099685365668, "grad_norm": 1.8054438829421997, "learning_rate": 1.8571099685365667e-06, "loss": 0.147, "step": 1210 }, { "epoch": 0.01872457984805464, "grad_norm": 1.7152273654937744, "learning_rate": 1.872457984805464e-06, "loss": 0.1589, "step": 1220 }, { "epoch": 0.018878060010743613, "grad_norm": 1.6838147640228271, "learning_rate": 1.8878060010743612e-06, "loss": 0.1367, "step": 1230 }, { "epoch": 0.019031540173432584, "grad_norm": 1.7368314266204834, "learning_rate": 1.9031540173432586e-06, "loss": 0.1466, "step": 1240 }, { "epoch": 0.019185020336121555, "grad_norm": 1.8172662258148193, "learning_rate": 1.918502033612156e-06, "loss": 0.1205, "step": 1250 }, { "epoch": 0.01933850049881053, "grad_norm": 1.7172917127609253, "learning_rate": 1.933850049881053e-06, "loss": 0.1432, "step": 1260 }, { "epoch": 0.0194919806614995, "grad_norm": 2.018237829208374, "learning_rate": 1.94919806614995e-06, "loss": 0.1454, "step": 1270 }, { "epoch": 0.019645460824188474, "grad_norm": 1.6191505193710327, "learning_rate": 1.9645460824188477e-06, "loss": 0.1406, "step": 1280 }, { "epoch": 0.019798940986877445, "grad_norm": 1.8721007108688354, "learning_rate": 1.979894098687745e-06, "loss": 0.1456, "step": 1290 }, { "epoch": 0.01995242114956642, "grad_norm": 1.8405985832214355, "learning_rate": 1.995242114956642e-06, "loss": 0.1525, "step": 1300 }, { "epoch": 0.02010590131225539, "grad_norm": 1.7736660242080688, "learning_rate": 2.010590131225539e-06, "loss": 0.1232, "step": 1310 }, { "epoch": 0.020259381474944365, "grad_norm": 1.8236079216003418, "learning_rate": 2.0259381474944364e-06, "loss": 0.1544, "step": 1320 }, { "epoch": 0.020412861637633336, "grad_norm": 1.898787260055542, "learning_rate": 2.041286163763334e-06, "loss": 0.158, "step": 1330 }, { "epoch": 0.020566341800322307, "grad_norm": 1.6456942558288574, "learning_rate": 2.056634180032231e-06, "loss": 0.1436, "step": 1340 }, { "epoch": 0.02071982196301128, "grad_norm": 1.556369423866272, "learning_rate": 2.0719821963011283e-06, "loss": 0.1431, "step": 1350 }, { "epoch": 0.020873302125700252, "grad_norm": 1.56626558303833, "learning_rate": 2.0873302125700255e-06, "loss": 0.1264, "step": 1360 }, { "epoch": 0.021026782288389226, "grad_norm": 1.784366250038147, "learning_rate": 2.1026782288389226e-06, "loss": 0.1517, "step": 1370 }, { "epoch": 0.021180262451078197, "grad_norm": 2.0411901473999023, "learning_rate": 2.11802624510782e-06, "loss": 0.1332, "step": 1380 }, { "epoch": 0.021333742613767172, "grad_norm": 1.5264734029769897, "learning_rate": 2.1333742613767174e-06, "loss": 0.1324, "step": 1390 }, { "epoch": 0.021487222776456143, "grad_norm": 2.014726161956787, "learning_rate": 2.1487222776456145e-06, "loss": 0.1226, "step": 1400 }, { "epoch": 0.021640702939145117, "grad_norm": 1.5493303537368774, "learning_rate": 2.1640702939145117e-06, "loss": 0.1651, "step": 1410 }, { "epoch": 0.021794183101834088, "grad_norm": 1.6850491762161255, "learning_rate": 2.179418310183409e-06, "loss": 0.1479, "step": 1420 }, { "epoch": 0.02194766326452306, "grad_norm": 1.6816450357437134, "learning_rate": 2.194766326452306e-06, "loss": 0.1437, "step": 1430 }, { "epoch": 0.022101143427212033, "grad_norm": 1.7871087789535522, "learning_rate": 2.2101143427212036e-06, "loss": 0.1502, "step": 1440 }, { "epoch": 0.022254623589901004, "grad_norm": 1.6719350814819336, "learning_rate": 2.2254623589901008e-06, "loss": 0.1258, "step": 1450 }, { "epoch": 0.02240810375258998, "grad_norm": 1.793506383895874, "learning_rate": 2.240810375258998e-06, "loss": 0.1213, "step": 1460 }, { "epoch": 0.02256158391527895, "grad_norm": 2.0980639457702637, "learning_rate": 2.2561583915278955e-06, "loss": 0.1267, "step": 1470 }, { "epoch": 0.022715064077967924, "grad_norm": 1.5368170738220215, "learning_rate": 2.2715064077967923e-06, "loss": 0.1303, "step": 1480 }, { "epoch": 0.022868544240656895, "grad_norm": 1.5718752145767212, "learning_rate": 2.28685442406569e-06, "loss": 0.1332, "step": 1490 }, { "epoch": 0.02302202440334587, "grad_norm": 1.7710720300674438, "learning_rate": 2.302202440334587e-06, "loss": 0.1482, "step": 1500 }, { "epoch": 0.02317550456603484, "grad_norm": 2.1393415927886963, "learning_rate": 2.317550456603484e-06, "loss": 0.1187, "step": 1510 }, { "epoch": 0.02332898472872381, "grad_norm": 1.7672243118286133, "learning_rate": 2.3328984728723814e-06, "loss": 0.1539, "step": 1520 }, { "epoch": 0.023482464891412785, "grad_norm": 1.716456651687622, "learning_rate": 2.3482464891412785e-06, "loss": 0.131, "step": 1530 }, { "epoch": 0.023635945054101756, "grad_norm": 1.9931210279464722, "learning_rate": 2.3635945054101757e-06, "loss": 0.1166, "step": 1540 }, { "epoch": 0.02378942521679073, "grad_norm": 1.6028945446014404, "learning_rate": 2.3789425216790733e-06, "loss": 0.1092, "step": 1550 }, { "epoch": 0.0239429053794797, "grad_norm": 1.974164605140686, "learning_rate": 2.3942905379479704e-06, "loss": 0.1383, "step": 1560 }, { "epoch": 0.024096385542168676, "grad_norm": 1.5515499114990234, "learning_rate": 2.4096385542168676e-06, "loss": 0.1257, "step": 1570 }, { "epoch": 0.024249865704857647, "grad_norm": 1.8173274993896484, "learning_rate": 2.424986570485765e-06, "loss": 0.1248, "step": 1580 }, { "epoch": 0.02440334586754662, "grad_norm": 1.7206352949142456, "learning_rate": 2.440334586754662e-06, "loss": 0.1418, "step": 1590 }, { "epoch": 0.024556826030235592, "grad_norm": 1.457000970840454, "learning_rate": 2.4556826030235595e-06, "loss": 0.1375, "step": 1600 }, { "epoch": 0.024710306192924563, "grad_norm": 1.629765510559082, "learning_rate": 2.4710306192924567e-06, "loss": 0.1251, "step": 1610 }, { "epoch": 0.024863786355613537, "grad_norm": 1.5692909955978394, "learning_rate": 2.486378635561354e-06, "loss": 0.1329, "step": 1620 }, { "epoch": 0.02501726651830251, "grad_norm": 2.053501605987549, "learning_rate": 2.5017266518302514e-06, "loss": 0.1312, "step": 1630 }, { "epoch": 0.025170746680991483, "grad_norm": 2.0697193145751953, "learning_rate": 2.517074668099148e-06, "loss": 0.16, "step": 1640 }, { "epoch": 0.025324226843680454, "grad_norm": 2.013000011444092, "learning_rate": 2.5324226843680454e-06, "loss": 0.1229, "step": 1650 }, { "epoch": 0.025477707006369428, "grad_norm": 1.717501163482666, "learning_rate": 2.547770700636943e-06, "loss": 0.1236, "step": 1660 }, { "epoch": 0.0256311871690584, "grad_norm": 1.6534180641174316, "learning_rate": 2.56311871690584e-06, "loss": 0.1248, "step": 1670 }, { "epoch": 0.025784667331747373, "grad_norm": 1.7379837036132812, "learning_rate": 2.5784667331747377e-06, "loss": 0.13, "step": 1680 }, { "epoch": 0.025938147494436344, "grad_norm": 1.7390923500061035, "learning_rate": 2.5938147494436344e-06, "loss": 0.1257, "step": 1690 }, { "epoch": 0.026091627657125315, "grad_norm": 1.7326284646987915, "learning_rate": 2.6091627657125316e-06, "loss": 0.1417, "step": 1700 }, { "epoch": 0.02624510781981429, "grad_norm": 1.8948469161987305, "learning_rate": 2.624510781981429e-06, "loss": 0.1458, "step": 1710 }, { "epoch": 0.02639858798250326, "grad_norm": 1.7587562799453735, "learning_rate": 2.6398587982503264e-06, "loss": 0.1348, "step": 1720 }, { "epoch": 0.026552068145192235, "grad_norm": 1.5430735349655151, "learning_rate": 2.6552068145192235e-06, "loss": 0.1263, "step": 1730 }, { "epoch": 0.026705548307881206, "grad_norm": 1.7571176290512085, "learning_rate": 2.6705548307881207e-06, "loss": 0.1259, "step": 1740 }, { "epoch": 0.02685902847057018, "grad_norm": 1.7568180561065674, "learning_rate": 2.6859028470570183e-06, "loss": 0.1041, "step": 1750 }, { "epoch": 0.02701250863325915, "grad_norm": 1.549731731414795, "learning_rate": 2.7012508633259154e-06, "loss": 0.1305, "step": 1760 }, { "epoch": 0.027165988795948125, "grad_norm": 1.7060325145721436, "learning_rate": 2.716598879594813e-06, "loss": 0.1247, "step": 1770 }, { "epoch": 0.027319468958637096, "grad_norm": 1.4246872663497925, "learning_rate": 2.7319468958637098e-06, "loss": 0.1326, "step": 1780 }, { "epoch": 0.027472949121326067, "grad_norm": 1.4144030809402466, "learning_rate": 2.747294912132607e-06, "loss": 0.1209, "step": 1790 }, { "epoch": 0.02762642928401504, "grad_norm": 1.667770266532898, "learning_rate": 2.7626429284015045e-06, "loss": 0.1194, "step": 1800 }, { "epoch": 0.027779909446704013, "grad_norm": 1.489606499671936, "learning_rate": 2.7779909446704017e-06, "loss": 0.111, "step": 1810 }, { "epoch": 0.027933389609392987, "grad_norm": 1.3510854244232178, "learning_rate": 2.7933389609392984e-06, "loss": 0.1194, "step": 1820 }, { "epoch": 0.028086869772081958, "grad_norm": 1.6378982067108154, "learning_rate": 2.808686977208196e-06, "loss": 0.1405, "step": 1830 }, { "epoch": 0.028240349934770932, "grad_norm": 1.6992261409759521, "learning_rate": 2.824034993477093e-06, "loss": 0.1348, "step": 1840 }, { "epoch": 0.028393830097459903, "grad_norm": 1.7998887300491333, "learning_rate": 2.8393830097459908e-06, "loss": 0.1473, "step": 1850 }, { "epoch": 0.028547310260148877, "grad_norm": 1.3996528387069702, "learning_rate": 2.854731026014888e-06, "loss": 0.1151, "step": 1860 }, { "epoch": 0.02870079042283785, "grad_norm": 1.410032033920288, "learning_rate": 2.8700790422837847e-06, "loss": 0.1275, "step": 1870 }, { "epoch": 0.02885427058552682, "grad_norm": 1.895951747894287, "learning_rate": 2.8854270585526823e-06, "loss": 0.1191, "step": 1880 }, { "epoch": 0.029007750748215794, "grad_norm": 1.3705050945281982, "learning_rate": 2.9007750748215794e-06, "loss": 0.1287, "step": 1890 }, { "epoch": 0.029161230910904765, "grad_norm": 1.9778037071228027, "learning_rate": 2.916123091090477e-06, "loss": 0.14, "step": 1900 }, { "epoch": 0.02931471107359374, "grad_norm": 1.6928542852401733, "learning_rate": 2.9314711073593738e-06, "loss": 0.1406, "step": 1910 }, { "epoch": 0.02946819123628271, "grad_norm": 1.8941030502319336, "learning_rate": 2.9468191236282714e-06, "loss": 0.1278, "step": 1920 }, { "epoch": 0.029621671398971684, "grad_norm": 1.3083969354629517, "learning_rate": 2.9621671398971685e-06, "loss": 0.1066, "step": 1930 }, { "epoch": 0.029775151561660655, "grad_norm": 1.3786259889602661, "learning_rate": 2.977515156166066e-06, "loss": 0.1358, "step": 1940 }, { "epoch": 0.029928631724349626, "grad_norm": 1.6872793436050415, "learning_rate": 2.9928631724349633e-06, "loss": 0.1223, "step": 1950 }, { "epoch": 0.0300821118870386, "grad_norm": 1.722474217414856, "learning_rate": 3.00821118870386e-06, "loss": 0.1149, "step": 1960 }, { "epoch": 0.03023559204972757, "grad_norm": 1.6543012857437134, "learning_rate": 3.0235592049727576e-06, "loss": 0.1287, "step": 1970 }, { "epoch": 0.030389072212416546, "grad_norm": 2.1337907314300537, "learning_rate": 3.0389072212416548e-06, "loss": 0.119, "step": 1980 }, { "epoch": 0.030542552375105517, "grad_norm": 1.3736518621444702, "learning_rate": 3.0542552375105524e-06, "loss": 0.1356, "step": 1990 }, { "epoch": 0.03069603253779449, "grad_norm": 1.9878664016723633, "learning_rate": 3.069603253779449e-06, "loss": 0.1369, "step": 2000 }, { "epoch": 0.030849512700483462, "grad_norm": 1.3858002424240112, "learning_rate": 3.0849512700483463e-06, "loss": 0.1348, "step": 2010 }, { "epoch": 0.031002992863172436, "grad_norm": 1.502766728401184, "learning_rate": 3.100299286317244e-06, "loss": 0.1393, "step": 2020 }, { "epoch": 0.031156473025861407, "grad_norm": 1.4729043245315552, "learning_rate": 3.115647302586141e-06, "loss": 0.1151, "step": 2030 }, { "epoch": 0.03130995318855038, "grad_norm": 1.8312392234802246, "learning_rate": 3.1309953188550386e-06, "loss": 0.1266, "step": 2040 }, { "epoch": 0.03146343335123935, "grad_norm": 1.5275013446807861, "learning_rate": 3.1463433351239353e-06, "loss": 0.1253, "step": 2050 }, { "epoch": 0.03161691351392833, "grad_norm": 1.4817765951156616, "learning_rate": 3.1616913513928325e-06, "loss": 0.123, "step": 2060 }, { "epoch": 0.0317703936766173, "grad_norm": 1.7660614252090454, "learning_rate": 3.17703936766173e-06, "loss": 0.1009, "step": 2070 }, { "epoch": 0.03192387383930627, "grad_norm": 1.4559096097946167, "learning_rate": 3.1923873839306273e-06, "loss": 0.1363, "step": 2080 }, { "epoch": 0.03207735400199524, "grad_norm": 1.4523926973342896, "learning_rate": 3.2077354001995244e-06, "loss": 0.1044, "step": 2090 }, { "epoch": 0.03223083416468422, "grad_norm": 1.935306429862976, "learning_rate": 3.2230834164684216e-06, "loss": 0.1084, "step": 2100 }, { "epoch": 0.03238431432737319, "grad_norm": 1.7230093479156494, "learning_rate": 3.238431432737319e-06, "loss": 0.1327, "step": 2110 }, { "epoch": 0.03253779449006216, "grad_norm": 1.6554601192474365, "learning_rate": 3.2537794490062163e-06, "loss": 0.1249, "step": 2120 }, { "epoch": 0.03269127465275113, "grad_norm": 1.6776564121246338, "learning_rate": 3.2691274652751135e-06, "loss": 0.1185, "step": 2130 }, { "epoch": 0.0328447548154401, "grad_norm": 1.4888091087341309, "learning_rate": 3.2844754815440107e-06, "loss": 0.1237, "step": 2140 }, { "epoch": 0.03299823497812908, "grad_norm": 1.9441604614257812, "learning_rate": 3.299823497812908e-06, "loss": 0.1262, "step": 2150 }, { "epoch": 0.03315171514081805, "grad_norm": 1.5613691806793213, "learning_rate": 3.3151715140818054e-06, "loss": 0.1203, "step": 2160 }, { "epoch": 0.03330519530350702, "grad_norm": 1.9751735925674438, "learning_rate": 3.3305195303507026e-06, "loss": 0.1305, "step": 2170 }, { "epoch": 0.03345867546619599, "grad_norm": 1.5188331604003906, "learning_rate": 3.3458675466195993e-06, "loss": 0.1215, "step": 2180 }, { "epoch": 0.03361215562888497, "grad_norm": 1.5673705339431763, "learning_rate": 3.361215562888497e-06, "loss": 0.1196, "step": 2190 }, { "epoch": 0.03376563579157394, "grad_norm": 1.5137544870376587, "learning_rate": 3.376563579157394e-06, "loss": 0.1195, "step": 2200 }, { "epoch": 0.03391911595426291, "grad_norm": 1.7304657697677612, "learning_rate": 3.3919115954262917e-06, "loss": 0.1343, "step": 2210 }, { "epoch": 0.03407259611695188, "grad_norm": 1.3763418197631836, "learning_rate": 3.407259611695189e-06, "loss": 0.1079, "step": 2220 }, { "epoch": 0.03422607627964085, "grad_norm": 1.6788028478622437, "learning_rate": 3.4226076279640856e-06, "loss": 0.1159, "step": 2230 }, { "epoch": 0.03437955644232983, "grad_norm": 1.5287078619003296, "learning_rate": 3.437955644232983e-06, "loss": 0.1203, "step": 2240 }, { "epoch": 0.0345330366050188, "grad_norm": 1.5208035707473755, "learning_rate": 3.4533036605018803e-06, "loss": 0.1323, "step": 2250 }, { "epoch": 0.03468651676770777, "grad_norm": 1.9130128622055054, "learning_rate": 3.468651676770778e-06, "loss": 0.0986, "step": 2260 }, { "epoch": 0.034839996930396744, "grad_norm": 1.4821105003356934, "learning_rate": 3.4839996930396747e-06, "loss": 0.1091, "step": 2270 }, { "epoch": 0.03499347709308572, "grad_norm": 1.4875725507736206, "learning_rate": 3.499347709308572e-06, "loss": 0.1069, "step": 2280 }, { "epoch": 0.03514695725577469, "grad_norm": 1.5984584093093872, "learning_rate": 3.5146957255774694e-06, "loss": 0.1262, "step": 2290 }, { "epoch": 0.035300437418463664, "grad_norm": 1.4602051973342896, "learning_rate": 3.5300437418463666e-06, "loss": 0.1043, "step": 2300 }, { "epoch": 0.035453917581152634, "grad_norm": 1.5681740045547485, "learning_rate": 3.545391758115264e-06, "loss": 0.1, "step": 2310 }, { "epoch": 0.035607397743841605, "grad_norm": 1.5449696779251099, "learning_rate": 3.560739774384161e-06, "loss": 0.1184, "step": 2320 }, { "epoch": 0.03576087790653058, "grad_norm": 1.1301732063293457, "learning_rate": 3.5760877906530585e-06, "loss": 0.1176, "step": 2330 }, { "epoch": 0.035914358069219554, "grad_norm": 1.8073254823684692, "learning_rate": 3.5914358069219557e-06, "loss": 0.1302, "step": 2340 }, { "epoch": 0.036067838231908525, "grad_norm": 1.3277987241744995, "learning_rate": 3.6067838231908533e-06, "loss": 0.1328, "step": 2350 }, { "epoch": 0.036221318394597496, "grad_norm": 1.4289427995681763, "learning_rate": 3.62213183945975e-06, "loss": 0.1198, "step": 2360 }, { "epoch": 0.036374798557286474, "grad_norm": 1.1730632781982422, "learning_rate": 3.637479855728647e-06, "loss": 0.1103, "step": 2370 }, { "epoch": 0.036528278719975445, "grad_norm": 1.3937855958938599, "learning_rate": 3.6528278719975448e-06, "loss": 0.1051, "step": 2380 }, { "epoch": 0.036681758882664416, "grad_norm": 1.543083667755127, "learning_rate": 3.668175888266442e-06, "loss": 0.1166, "step": 2390 }, { "epoch": 0.036835239045353387, "grad_norm": 1.4505294561386108, "learning_rate": 3.6835239045353395e-06, "loss": 0.1174, "step": 2400 }, { "epoch": 0.03698871920804236, "grad_norm": 1.5883643627166748, "learning_rate": 3.6988719208042362e-06, "loss": 0.1307, "step": 2410 }, { "epoch": 0.037142199370731335, "grad_norm": 1.3739732503890991, "learning_rate": 3.7142199370731334e-06, "loss": 0.1115, "step": 2420 }, { "epoch": 0.037295679533420306, "grad_norm": 1.8778141736984253, "learning_rate": 3.729567953342031e-06, "loss": 0.113, "step": 2430 }, { "epoch": 0.03744915969610928, "grad_norm": 1.3567522764205933, "learning_rate": 3.744915969610928e-06, "loss": 0.1011, "step": 2440 }, { "epoch": 0.03760263985879825, "grad_norm": 1.5818160772323608, "learning_rate": 3.760263985879825e-06, "loss": 0.1158, "step": 2450 }, { "epoch": 0.037756120021487226, "grad_norm": 1.6237316131591797, "learning_rate": 3.7756120021487225e-06, "loss": 0.1308, "step": 2460 }, { "epoch": 0.0379096001841762, "grad_norm": 1.399431824684143, "learning_rate": 3.7909600184176197e-06, "loss": 0.1245, "step": 2470 }, { "epoch": 0.03806308034686517, "grad_norm": 1.5182310342788696, "learning_rate": 3.8063080346865172e-06, "loss": 0.0979, "step": 2480 }, { "epoch": 0.03821656050955414, "grad_norm": 1.3382327556610107, "learning_rate": 3.821656050955415e-06, "loss": 0.1082, "step": 2490 }, { "epoch": 0.03837004067224311, "grad_norm": 1.9344550371170044, "learning_rate": 3.837004067224312e-06, "loss": 0.1208, "step": 2500 }, { "epoch": 0.03852352083493209, "grad_norm": 1.5033589601516724, "learning_rate": 3.852352083493209e-06, "loss": 0.101, "step": 2510 }, { "epoch": 0.03867700099762106, "grad_norm": 1.5397685766220093, "learning_rate": 3.867700099762106e-06, "loss": 0.1099, "step": 2520 }, { "epoch": 0.03883048116031003, "grad_norm": 1.5747337341308594, "learning_rate": 3.8830481160310035e-06, "loss": 0.1166, "step": 2530 }, { "epoch": 0.038983961322999, "grad_norm": 1.674035906791687, "learning_rate": 3.8983961322999e-06, "loss": 0.1228, "step": 2540 }, { "epoch": 0.03913744148568798, "grad_norm": 1.4994454383850098, "learning_rate": 3.913744148568798e-06, "loss": 0.1128, "step": 2550 }, { "epoch": 0.03929092164837695, "grad_norm": 1.3889212608337402, "learning_rate": 3.929092164837695e-06, "loss": 0.1065, "step": 2560 }, { "epoch": 0.03944440181106592, "grad_norm": 1.3057225942611694, "learning_rate": 3.944440181106592e-06, "loss": 0.0925, "step": 2570 }, { "epoch": 0.03959788197375489, "grad_norm": 1.6561838388442993, "learning_rate": 3.95978819737549e-06, "loss": 0.1199, "step": 2580 }, { "epoch": 0.03975136213644386, "grad_norm": 1.765676736831665, "learning_rate": 3.9751362136443865e-06, "loss": 0.1083, "step": 2590 }, { "epoch": 0.03990484229913284, "grad_norm": 1.5218415260314941, "learning_rate": 3.990484229913284e-06, "loss": 0.1123, "step": 2600 }, { "epoch": 0.04005832246182181, "grad_norm": 1.4662688970565796, "learning_rate": 4.005832246182182e-06, "loss": 0.1111, "step": 2610 }, { "epoch": 0.04021180262451078, "grad_norm": 1.273779034614563, "learning_rate": 4.021180262451078e-06, "loss": 0.1096, "step": 2620 }, { "epoch": 0.04036528278719975, "grad_norm": 1.7429243326187134, "learning_rate": 4.036528278719975e-06, "loss": 0.1091, "step": 2630 }, { "epoch": 0.04051876294988873, "grad_norm": 1.6785987615585327, "learning_rate": 4.051876294988873e-06, "loss": 0.1233, "step": 2640 }, { "epoch": 0.0406722431125777, "grad_norm": 1.5601191520690918, "learning_rate": 4.06722431125777e-06, "loss": 0.1182, "step": 2650 }, { "epoch": 0.04082572327526667, "grad_norm": 1.5083750486373901, "learning_rate": 4.082572327526668e-06, "loss": 0.1037, "step": 2660 }, { "epoch": 0.04097920343795564, "grad_norm": 1.629745364189148, "learning_rate": 4.097920343795565e-06, "loss": 0.1099, "step": 2670 }, { "epoch": 0.041132683600644614, "grad_norm": 1.4354757070541382, "learning_rate": 4.113268360064462e-06, "loss": 0.1118, "step": 2680 }, { "epoch": 0.04128616376333359, "grad_norm": 1.4371083974838257, "learning_rate": 4.128616376333359e-06, "loss": 0.1053, "step": 2690 }, { "epoch": 0.04143964392602256, "grad_norm": 1.6558717489242554, "learning_rate": 4.1439643926022566e-06, "loss": 0.1029, "step": 2700 }, { "epoch": 0.04159312408871153, "grad_norm": 1.6087628602981567, "learning_rate": 4.159312408871154e-06, "loss": 0.107, "step": 2710 }, { "epoch": 0.041746604251400504, "grad_norm": 1.8166104555130005, "learning_rate": 4.174660425140051e-06, "loss": 0.1225, "step": 2720 }, { "epoch": 0.04190008441408948, "grad_norm": 1.2814282178878784, "learning_rate": 4.1900084414089485e-06, "loss": 0.1181, "step": 2730 }, { "epoch": 0.04205356457677845, "grad_norm": 1.2270615100860596, "learning_rate": 4.205356457677845e-06, "loss": 0.1005, "step": 2740 }, { "epoch": 0.042207044739467424, "grad_norm": 1.2145520448684692, "learning_rate": 4.220704473946743e-06, "loss": 0.1054, "step": 2750 }, { "epoch": 0.042360524902156395, "grad_norm": 1.3399711847305298, "learning_rate": 4.23605249021564e-06, "loss": 0.11, "step": 2760 }, { "epoch": 0.042514005064845366, "grad_norm": 1.5539604425430298, "learning_rate": 4.251400506484537e-06, "loss": 0.0981, "step": 2770 }, { "epoch": 0.042667485227534344, "grad_norm": 1.3822135925292969, "learning_rate": 4.266748522753435e-06, "loss": 0.089, "step": 2780 }, { "epoch": 0.042820965390223314, "grad_norm": 1.4996179342269897, "learning_rate": 4.2820965390223315e-06, "loss": 0.1154, "step": 2790 }, { "epoch": 0.042974445552912285, "grad_norm": 1.2074458599090576, "learning_rate": 4.297444555291229e-06, "loss": 0.1089, "step": 2800 }, { "epoch": 0.043127925715601256, "grad_norm": 1.542603611946106, "learning_rate": 4.312792571560126e-06, "loss": 0.1043, "step": 2810 }, { "epoch": 0.043281405878290234, "grad_norm": 1.3661984205245972, "learning_rate": 4.328140587829023e-06, "loss": 0.1073, "step": 2820 }, { "epoch": 0.043434886040979205, "grad_norm": 1.408433437347412, "learning_rate": 4.343488604097921e-06, "loss": 0.1067, "step": 2830 }, { "epoch": 0.043588366203668176, "grad_norm": 1.2440776824951172, "learning_rate": 4.358836620366818e-06, "loss": 0.1127, "step": 2840 }, { "epoch": 0.04374184636635715, "grad_norm": 1.5103574991226196, "learning_rate": 4.374184636635715e-06, "loss": 0.0952, "step": 2850 }, { "epoch": 0.04389532652904612, "grad_norm": 1.3752745389938354, "learning_rate": 4.389532652904612e-06, "loss": 0.1062, "step": 2860 }, { "epoch": 0.044048806691735096, "grad_norm": 1.4197977781295776, "learning_rate": 4.40488066917351e-06, "loss": 0.1162, "step": 2870 }, { "epoch": 0.04420228685442407, "grad_norm": 1.0755301713943481, "learning_rate": 4.420228685442407e-06, "loss": 0.0996, "step": 2880 }, { "epoch": 0.04435576701711304, "grad_norm": 1.7035467624664307, "learning_rate": 4.435576701711304e-06, "loss": 0.0939, "step": 2890 }, { "epoch": 0.04450924717980201, "grad_norm": 1.24080228805542, "learning_rate": 4.4509247179802016e-06, "loss": 0.1106, "step": 2900 }, { "epoch": 0.044662727342490986, "grad_norm": 1.6132925748825073, "learning_rate": 4.466272734249098e-06, "loss": 0.1034, "step": 2910 }, { "epoch": 0.04481620750517996, "grad_norm": 1.5817972421646118, "learning_rate": 4.481620750517996e-06, "loss": 0.1006, "step": 2920 }, { "epoch": 0.04496968766786893, "grad_norm": 1.2066608667373657, "learning_rate": 4.4969687667868935e-06, "loss": 0.1053, "step": 2930 }, { "epoch": 0.0451231678305579, "grad_norm": 1.3628545999526978, "learning_rate": 4.512316783055791e-06, "loss": 0.1142, "step": 2940 }, { "epoch": 0.04527664799324687, "grad_norm": 1.4034658670425415, "learning_rate": 4.527664799324688e-06, "loss": 0.1013, "step": 2950 }, { "epoch": 0.04543012815593585, "grad_norm": 1.3910009860992432, "learning_rate": 4.5430128155935846e-06, "loss": 0.0962, "step": 2960 }, { "epoch": 0.04558360831862482, "grad_norm": 1.6070396900177002, "learning_rate": 4.558360831862482e-06, "loss": 0.1191, "step": 2970 }, { "epoch": 0.04573708848131379, "grad_norm": 1.1031993627548218, "learning_rate": 4.57370884813138e-06, "loss": 0.1107, "step": 2980 }, { "epoch": 0.04589056864400276, "grad_norm": 1.3174023628234863, "learning_rate": 4.5890568644002765e-06, "loss": 0.1113, "step": 2990 }, { "epoch": 0.04604404880669174, "grad_norm": 1.588921308517456, "learning_rate": 4.604404880669174e-06, "loss": 0.1198, "step": 3000 }, { "epoch": 0.04619752896938071, "grad_norm": 1.1247618198394775, "learning_rate": 4.619752896938071e-06, "loss": 0.0937, "step": 3010 }, { "epoch": 0.04635100913206968, "grad_norm": 1.4722557067871094, "learning_rate": 4.635100913206968e-06, "loss": 0.1167, "step": 3020 }, { "epoch": 0.04650448929475865, "grad_norm": 1.5412864685058594, "learning_rate": 4.650448929475865e-06, "loss": 0.0937, "step": 3030 }, { "epoch": 0.04665796945744762, "grad_norm": 1.404645562171936, "learning_rate": 4.665796945744763e-06, "loss": 0.1241, "step": 3040 }, { "epoch": 0.0468114496201366, "grad_norm": 1.3067424297332764, "learning_rate": 4.68114496201366e-06, "loss": 0.1098, "step": 3050 }, { "epoch": 0.04696492978282557, "grad_norm": 1.5430288314819336, "learning_rate": 4.696492978282557e-06, "loss": 0.1, "step": 3060 }, { "epoch": 0.04711840994551454, "grad_norm": 1.5778440237045288, "learning_rate": 4.711840994551455e-06, "loss": 0.0987, "step": 3070 }, { "epoch": 0.04727189010820351, "grad_norm": 1.2576299905776978, "learning_rate": 4.727189010820351e-06, "loss": 0.1163, "step": 3080 }, { "epoch": 0.04742537027089249, "grad_norm": 1.466583490371704, "learning_rate": 4.742537027089249e-06, "loss": 0.0969, "step": 3090 }, { "epoch": 0.04757885043358146, "grad_norm": 1.4930421113967896, "learning_rate": 4.7578850433581466e-06, "loss": 0.1, "step": 3100 }, { "epoch": 0.04773233059627043, "grad_norm": 1.6283856630325317, "learning_rate": 4.773233059627044e-06, "loss": 0.1115, "step": 3110 }, { "epoch": 0.0478858107589594, "grad_norm": 1.8014568090438843, "learning_rate": 4.788581075895941e-06, "loss": 0.1133, "step": 3120 }, { "epoch": 0.048039290921648374, "grad_norm": 1.1836624145507812, "learning_rate": 4.803929092164838e-06, "loss": 0.1059, "step": 3130 }, { "epoch": 0.04819277108433735, "grad_norm": 1.6000874042510986, "learning_rate": 4.819277108433735e-06, "loss": 0.121, "step": 3140 }, { "epoch": 0.04834625124702632, "grad_norm": 1.4007055759429932, "learning_rate": 4.834625124702633e-06, "loss": 0.1011, "step": 3150 }, { "epoch": 0.048499731409715294, "grad_norm": 1.1124664545059204, "learning_rate": 4.84997314097153e-06, "loss": 0.0855, "step": 3160 }, { "epoch": 0.048653211572404265, "grad_norm": 1.2686457633972168, "learning_rate": 4.865321157240427e-06, "loss": 0.1283, "step": 3170 }, { "epoch": 0.04880669173509324, "grad_norm": 1.7616074085235596, "learning_rate": 4.880669173509324e-06, "loss": 0.0948, "step": 3180 }, { "epoch": 0.04896017189778221, "grad_norm": 1.3830022811889648, "learning_rate": 4.8960171897782215e-06, "loss": 0.1024, "step": 3190 }, { "epoch": 0.049113652060471184, "grad_norm": 1.6697444915771484, "learning_rate": 4.911365206047119e-06, "loss": 0.1169, "step": 3200 }, { "epoch": 0.049267132223160155, "grad_norm": 1.166772484779358, "learning_rate": 4.926713222316016e-06, "loss": 0.0872, "step": 3210 }, { "epoch": 0.049420612385849126, "grad_norm": 1.394077181816101, "learning_rate": 4.942061238584913e-06, "loss": 0.095, "step": 3220 }, { "epoch": 0.049574092548538104, "grad_norm": 1.3372758626937866, "learning_rate": 4.95740925485381e-06, "loss": 0.105, "step": 3230 }, { "epoch": 0.049727572711227075, "grad_norm": 1.596899151802063, "learning_rate": 4.972757271122708e-06, "loss": 0.1113, "step": 3240 }, { "epoch": 0.049881052873916046, "grad_norm": 1.3485198020935059, "learning_rate": 4.988105287391605e-06, "loss": 0.0855, "step": 3250 }, { "epoch": 0.05003453303660502, "grad_norm": 1.7290126085281372, "learning_rate": 5.003453303660503e-06, "loss": 0.1101, "step": 3260 }, { "epoch": 0.050188013199293995, "grad_norm": 1.300766110420227, "learning_rate": 5.0188013199294e-06, "loss": 0.1051, "step": 3270 }, { "epoch": 0.050341493361982965, "grad_norm": 1.4078818559646606, "learning_rate": 5.034149336198296e-06, "loss": 0.1081, "step": 3280 }, { "epoch": 0.050494973524671936, "grad_norm": 1.2099568843841553, "learning_rate": 5.049497352467194e-06, "loss": 0.1023, "step": 3290 }, { "epoch": 0.05064845368736091, "grad_norm": 1.1274701356887817, "learning_rate": 5.064845368736091e-06, "loss": 0.0976, "step": 3300 }, { "epoch": 0.05080193385004988, "grad_norm": 1.7840906381607056, "learning_rate": 5.080193385004989e-06, "loss": 0.1004, "step": 3310 }, { "epoch": 0.050955414012738856, "grad_norm": 1.4561687707901, "learning_rate": 5.095541401273886e-06, "loss": 0.1036, "step": 3320 }, { "epoch": 0.05110889417542783, "grad_norm": 1.7384532690048218, "learning_rate": 5.110889417542783e-06, "loss": 0.1019, "step": 3330 }, { "epoch": 0.0512623743381168, "grad_norm": 1.4783638715744019, "learning_rate": 5.12623743381168e-06, "loss": 0.1175, "step": 3340 }, { "epoch": 0.05141585450080577, "grad_norm": 1.0973371267318726, "learning_rate": 5.141585450080577e-06, "loss": 0.104, "step": 3350 }, { "epoch": 0.05156933466349475, "grad_norm": 1.246592402458191, "learning_rate": 5.156933466349475e-06, "loss": 0.1068, "step": 3360 }, { "epoch": 0.05172281482618372, "grad_norm": 1.356152892112732, "learning_rate": 5.172281482618372e-06, "loss": 0.0975, "step": 3370 }, { "epoch": 0.05187629498887269, "grad_norm": 1.2589141130447388, "learning_rate": 5.187629498887269e-06, "loss": 0.1016, "step": 3380 }, { "epoch": 0.05202977515156166, "grad_norm": 1.4307256937026978, "learning_rate": 5.2029775151561665e-06, "loss": 0.1061, "step": 3390 }, { "epoch": 0.05218325531425063, "grad_norm": 1.2398899793624878, "learning_rate": 5.218325531425063e-06, "loss": 0.0892, "step": 3400 }, { "epoch": 0.05233673547693961, "grad_norm": 1.30036199092865, "learning_rate": 5.233673547693962e-06, "loss": 0.1014, "step": 3410 }, { "epoch": 0.05249021563962858, "grad_norm": 1.3176579475402832, "learning_rate": 5.249021563962858e-06, "loss": 0.1068, "step": 3420 }, { "epoch": 0.05264369580231755, "grad_norm": 1.1503736972808838, "learning_rate": 5.264369580231755e-06, "loss": 0.1196, "step": 3430 }, { "epoch": 0.05279717596500652, "grad_norm": 1.3621073961257935, "learning_rate": 5.279717596500653e-06, "loss": 0.0953, "step": 3440 }, { "epoch": 0.0529506561276955, "grad_norm": 1.3448199033737183, "learning_rate": 5.29506561276955e-06, "loss": 0.0922, "step": 3450 }, { "epoch": 0.05310413629038447, "grad_norm": 1.3844655752182007, "learning_rate": 5.310413629038447e-06, "loss": 0.1117, "step": 3460 }, { "epoch": 0.05325761645307344, "grad_norm": 1.3042716979980469, "learning_rate": 5.325761645307345e-06, "loss": 0.0811, "step": 3470 }, { "epoch": 0.05341109661576241, "grad_norm": 1.222896933555603, "learning_rate": 5.341109661576241e-06, "loss": 0.1012, "step": 3480 }, { "epoch": 0.05356457677845138, "grad_norm": 1.3437981605529785, "learning_rate": 5.356457677845139e-06, "loss": 0.0764, "step": 3490 }, { "epoch": 0.05371805694114036, "grad_norm": 1.5330802202224731, "learning_rate": 5.3718056941140365e-06, "loss": 0.0965, "step": 3500 }, { "epoch": 0.05387153710382933, "grad_norm": 1.3183045387268066, "learning_rate": 5.387153710382933e-06, "loss": 0.1074, "step": 3510 }, { "epoch": 0.0540250172665183, "grad_norm": 1.4618918895721436, "learning_rate": 5.402501726651831e-06, "loss": 0.0842, "step": 3520 }, { "epoch": 0.05417849742920727, "grad_norm": 1.5246522426605225, "learning_rate": 5.417849742920728e-06, "loss": 0.1051, "step": 3530 }, { "epoch": 0.05433197759189625, "grad_norm": 1.1890748739242554, "learning_rate": 5.433197759189626e-06, "loss": 0.1152, "step": 3540 }, { "epoch": 0.05448545775458522, "grad_norm": 1.4442641735076904, "learning_rate": 5.448545775458523e-06, "loss": 0.0925, "step": 3550 }, { "epoch": 0.05463893791727419, "grad_norm": 1.4008320569992065, "learning_rate": 5.4638937917274195e-06, "loss": 0.0872, "step": 3560 }, { "epoch": 0.054792418079963164, "grad_norm": 1.332016944885254, "learning_rate": 5.479241807996317e-06, "loss": 0.0786, "step": 3570 }, { "epoch": 0.054945898242652134, "grad_norm": 1.2290513515472412, "learning_rate": 5.494589824265214e-06, "loss": 0.0869, "step": 3580 }, { "epoch": 0.05509937840534111, "grad_norm": 1.5645036697387695, "learning_rate": 5.509937840534112e-06, "loss": 0.0999, "step": 3590 }, { "epoch": 0.05525285856803008, "grad_norm": 1.159070611000061, "learning_rate": 5.525285856803009e-06, "loss": 0.0851, "step": 3600 }, { "epoch": 0.055406338730719054, "grad_norm": 1.3804007768630981, "learning_rate": 5.540633873071906e-06, "loss": 0.1076, "step": 3610 }, { "epoch": 0.055559818893408025, "grad_norm": 1.5531482696533203, "learning_rate": 5.555981889340803e-06, "loss": 0.0935, "step": 3620 }, { "epoch": 0.055713299056097, "grad_norm": 1.233854055404663, "learning_rate": 5.5713299056097e-06, "loss": 0.09, "step": 3630 }, { "epoch": 0.055866779218785974, "grad_norm": 1.3333230018615723, "learning_rate": 5.586677921878597e-06, "loss": 0.0962, "step": 3640 }, { "epoch": 0.056020259381474945, "grad_norm": 1.2933285236358643, "learning_rate": 5.602025938147495e-06, "loss": 0.102, "step": 3650 }, { "epoch": 0.056173739544163916, "grad_norm": 1.1321861743927002, "learning_rate": 5.617373954416392e-06, "loss": 0.0947, "step": 3660 }, { "epoch": 0.05632721970685289, "grad_norm": 1.2400970458984375, "learning_rate": 5.63272197068529e-06, "loss": 0.0843, "step": 3670 }, { "epoch": 0.056480699869541864, "grad_norm": 1.325560212135315, "learning_rate": 5.648069986954186e-06, "loss": 0.0848, "step": 3680 }, { "epoch": 0.056634180032230835, "grad_norm": 1.226412296295166, "learning_rate": 5.663418003223083e-06, "loss": 0.0926, "step": 3690 }, { "epoch": 0.056787660194919806, "grad_norm": 1.3813420534133911, "learning_rate": 5.6787660194919815e-06, "loss": 0.1163, "step": 3700 }, { "epoch": 0.05694114035760878, "grad_norm": 1.6675556898117065, "learning_rate": 5.694114035760878e-06, "loss": 0.1159, "step": 3710 }, { "epoch": 0.057094620520297755, "grad_norm": 1.4199024438858032, "learning_rate": 5.709462052029776e-06, "loss": 0.1101, "step": 3720 }, { "epoch": 0.057248100682986726, "grad_norm": 1.402928113937378, "learning_rate": 5.724810068298673e-06, "loss": 0.0834, "step": 3730 }, { "epoch": 0.0574015808456757, "grad_norm": 1.1918388605117798, "learning_rate": 5.740158084567569e-06, "loss": 0.0942, "step": 3740 }, { "epoch": 0.05755506100836467, "grad_norm": 1.3349943161010742, "learning_rate": 5.755506100836468e-06, "loss": 0.0943, "step": 3750 }, { "epoch": 0.05770854117105364, "grad_norm": 1.2541173696517944, "learning_rate": 5.7708541171053645e-06, "loss": 0.0961, "step": 3760 }, { "epoch": 0.057862021333742616, "grad_norm": 1.4450844526290894, "learning_rate": 5.786202133374262e-06, "loss": 0.0924, "step": 3770 }, { "epoch": 0.05801550149643159, "grad_norm": 1.206703782081604, "learning_rate": 5.801550149643159e-06, "loss": 0.096, "step": 3780 }, { "epoch": 0.05816898165912056, "grad_norm": 1.2251291275024414, "learning_rate": 5.816898165912056e-06, "loss": 0.102, "step": 3790 }, { "epoch": 0.05832246182180953, "grad_norm": 1.2256014347076416, "learning_rate": 5.832246182180954e-06, "loss": 0.0951, "step": 3800 }, { "epoch": 0.0584759419844985, "grad_norm": 1.1975849866867065, "learning_rate": 5.847594198449851e-06, "loss": 0.1027, "step": 3810 }, { "epoch": 0.05862942214718748, "grad_norm": 1.5359638929367065, "learning_rate": 5.8629422147187475e-06, "loss": 0.0846, "step": 3820 }, { "epoch": 0.05878290230987645, "grad_norm": 1.299963116645813, "learning_rate": 5.878290230987645e-06, "loss": 0.081, "step": 3830 }, { "epoch": 0.05893638247256542, "grad_norm": 1.3146288394927979, "learning_rate": 5.893638247256543e-06, "loss": 0.0939, "step": 3840 }, { "epoch": 0.05908986263525439, "grad_norm": 1.1559491157531738, "learning_rate": 5.90898626352544e-06, "loss": 0.091, "step": 3850 }, { "epoch": 0.05924334279794337, "grad_norm": 1.1306601762771606, "learning_rate": 5.924334279794337e-06, "loss": 0.1039, "step": 3860 }, { "epoch": 0.05939682296063234, "grad_norm": 1.3322181701660156, "learning_rate": 5.939682296063234e-06, "loss": 0.1062, "step": 3870 }, { "epoch": 0.05955030312332131, "grad_norm": 1.2355868816375732, "learning_rate": 5.955030312332132e-06, "loss": 0.0956, "step": 3880 }, { "epoch": 0.05970378328601028, "grad_norm": 1.0314501523971558, "learning_rate": 5.970378328601029e-06, "loss": 0.0765, "step": 3890 }, { "epoch": 0.05985726344869925, "grad_norm": 1.304997444152832, "learning_rate": 5.9857263448699265e-06, "loss": 0.0986, "step": 3900 }, { "epoch": 0.06001074361138823, "grad_norm": 1.1701855659484863, "learning_rate": 6.001074361138823e-06, "loss": 0.1096, "step": 3910 }, { "epoch": 0.0601642237740772, "grad_norm": 1.0359749794006348, "learning_rate": 6.01642237740772e-06, "loss": 0.0759, "step": 3920 }, { "epoch": 0.06031770393676617, "grad_norm": 1.0481810569763184, "learning_rate": 6.0317703936766185e-06, "loss": 0.0954, "step": 3930 }, { "epoch": 0.06047118409945514, "grad_norm": 1.332384705543518, "learning_rate": 6.047118409945515e-06, "loss": 0.1074, "step": 3940 }, { "epoch": 0.06062466426214412, "grad_norm": 1.09083092212677, "learning_rate": 6.062466426214413e-06, "loss": 0.091, "step": 3950 }, { "epoch": 0.06077814442483309, "grad_norm": 0.8927128314971924, "learning_rate": 6.0778144424833095e-06, "loss": 0.0994, "step": 3960 }, { "epoch": 0.06093162458752206, "grad_norm": 1.6136213541030884, "learning_rate": 6.093162458752206e-06, "loss": 0.0928, "step": 3970 }, { "epoch": 0.06108510475021103, "grad_norm": 1.103286623954773, "learning_rate": 6.108510475021105e-06, "loss": 0.0935, "step": 3980 }, { "epoch": 0.061238584912900004, "grad_norm": 1.1777034997940063, "learning_rate": 6.1238584912900014e-06, "loss": 0.0927, "step": 3990 }, { "epoch": 0.06139206507558898, "grad_norm": 1.1728825569152832, "learning_rate": 6.139206507558898e-06, "loss": 0.0999, "step": 4000 }, { "epoch": 0.06154554523827795, "grad_norm": 1.2538630962371826, "learning_rate": 6.154554523827796e-06, "loss": 0.0906, "step": 4010 }, { "epoch": 0.061699025400966924, "grad_norm": 1.1084039211273193, "learning_rate": 6.1699025400966925e-06, "loss": 0.0851, "step": 4020 }, { "epoch": 0.061852505563655895, "grad_norm": 1.2340023517608643, "learning_rate": 6.185250556365591e-06, "loss": 0.099, "step": 4030 }, { "epoch": 0.06200598572634487, "grad_norm": 1.6674846410751343, "learning_rate": 6.200598572634488e-06, "loss": 0.1127, "step": 4040 }, { "epoch": 0.062159465889033844, "grad_norm": 1.4097322225570679, "learning_rate": 6.2159465889033844e-06, "loss": 0.0955, "step": 4050 }, { "epoch": 0.062312946051722815, "grad_norm": 1.1126710176467896, "learning_rate": 6.231294605172282e-06, "loss": 0.1065, "step": 4060 }, { "epoch": 0.062466426214411785, "grad_norm": 1.269852876663208, "learning_rate": 6.246642621441179e-06, "loss": 0.0887, "step": 4070 }, { "epoch": 0.06261990637710076, "grad_norm": 1.2234703302383423, "learning_rate": 6.261990637710077e-06, "loss": 0.0945, "step": 4080 }, { "epoch": 0.06277338653978973, "grad_norm": 1.0888903141021729, "learning_rate": 6.277338653978974e-06, "loss": 0.0892, "step": 4090 }, { "epoch": 0.0629268667024787, "grad_norm": 1.1346328258514404, "learning_rate": 6.292686670247871e-06, "loss": 0.0954, "step": 4100 }, { "epoch": 0.06308034686516768, "grad_norm": 1.335428237915039, "learning_rate": 6.308034686516768e-06, "loss": 0.0993, "step": 4110 }, { "epoch": 0.06323382702785665, "grad_norm": 1.3386598825454712, "learning_rate": 6.323382702785665e-06, "loss": 0.0936, "step": 4120 }, { "epoch": 0.06338730719054562, "grad_norm": 1.5010406970977783, "learning_rate": 6.338730719054562e-06, "loss": 0.1108, "step": 4130 }, { "epoch": 0.0635407873532346, "grad_norm": 1.233337640762329, "learning_rate": 6.35407873532346e-06, "loss": 0.0875, "step": 4140 }, { "epoch": 0.06369426751592357, "grad_norm": 1.20348060131073, "learning_rate": 6.369426751592357e-06, "loss": 0.0926, "step": 4150 }, { "epoch": 0.06384774767861254, "grad_norm": 1.2667292356491089, "learning_rate": 6.3847747678612545e-06, "loss": 0.0929, "step": 4160 }, { "epoch": 0.06400122784130151, "grad_norm": 1.3237130641937256, "learning_rate": 6.400122784130151e-06, "loss": 0.1037, "step": 4170 }, { "epoch": 0.06415470800399048, "grad_norm": 1.2389373779296875, "learning_rate": 6.415470800399049e-06, "loss": 0.109, "step": 4180 }, { "epoch": 0.06430818816667945, "grad_norm": 1.0440704822540283, "learning_rate": 6.4308188166679464e-06, "loss": 0.0967, "step": 4190 }, { "epoch": 0.06446166832936844, "grad_norm": 1.1985282897949219, "learning_rate": 6.446166832936843e-06, "loss": 0.0987, "step": 4200 }, { "epoch": 0.0646151484920574, "grad_norm": 1.2560558319091797, "learning_rate": 6.461514849205741e-06, "loss": 0.09, "step": 4210 }, { "epoch": 0.06476862865474638, "grad_norm": 0.9937943816184998, "learning_rate": 6.476862865474638e-06, "loss": 0.0958, "step": 4220 }, { "epoch": 0.06492210881743535, "grad_norm": 1.4060838222503662, "learning_rate": 6.492210881743535e-06, "loss": 0.103, "step": 4230 }, { "epoch": 0.06507558898012432, "grad_norm": 1.1648201942443848, "learning_rate": 6.507558898012433e-06, "loss": 0.0768, "step": 4240 }, { "epoch": 0.06522906914281329, "grad_norm": 1.2303587198257446, "learning_rate": 6.5229069142813294e-06, "loss": 0.0887, "step": 4250 }, { "epoch": 0.06538254930550226, "grad_norm": 1.628182291984558, "learning_rate": 6.538254930550227e-06, "loss": 0.0973, "step": 4260 }, { "epoch": 0.06553602946819123, "grad_norm": 1.1348681449890137, "learning_rate": 6.553602946819125e-06, "loss": 0.086, "step": 4270 }, { "epoch": 0.0656895096308802, "grad_norm": 1.22709059715271, "learning_rate": 6.568950963088021e-06, "loss": 0.1027, "step": 4280 }, { "epoch": 0.06584298979356919, "grad_norm": 1.1905454397201538, "learning_rate": 6.584298979356919e-06, "loss": 0.0862, "step": 4290 }, { "epoch": 0.06599646995625816, "grad_norm": 0.9288424849510193, "learning_rate": 6.599646995625816e-06, "loss": 0.0797, "step": 4300 }, { "epoch": 0.06614995011894713, "grad_norm": 0.9959090948104858, "learning_rate": 6.614995011894712e-06, "loss": 0.0836, "step": 4310 }, { "epoch": 0.0663034302816361, "grad_norm": 1.5068689584732056, "learning_rate": 6.630343028163611e-06, "loss": 0.0884, "step": 4320 }, { "epoch": 0.06645691044432507, "grad_norm": 1.5301973819732666, "learning_rate": 6.645691044432508e-06, "loss": 0.1119, "step": 4330 }, { "epoch": 0.06661039060701404, "grad_norm": 1.120393991470337, "learning_rate": 6.661039060701405e-06, "loss": 0.09, "step": 4340 }, { "epoch": 0.06676387076970301, "grad_norm": 0.9342043995857239, "learning_rate": 6.676387076970302e-06, "loss": 0.0843, "step": 4350 }, { "epoch": 0.06691735093239198, "grad_norm": 1.16162109375, "learning_rate": 6.691735093239199e-06, "loss": 0.0855, "step": 4360 }, { "epoch": 0.06707083109508095, "grad_norm": 1.0303308963775635, "learning_rate": 6.707083109508097e-06, "loss": 0.0944, "step": 4370 }, { "epoch": 0.06722431125776994, "grad_norm": 1.4596513509750366, "learning_rate": 6.722431125776994e-06, "loss": 0.0874, "step": 4380 }, { "epoch": 0.06737779142045891, "grad_norm": 1.2600041627883911, "learning_rate": 6.7377791420458914e-06, "loss": 0.0909, "step": 4390 }, { "epoch": 0.06753127158314788, "grad_norm": 1.1741758584976196, "learning_rate": 6.753127158314788e-06, "loss": 0.0749, "step": 4400 }, { "epoch": 0.06768475174583685, "grad_norm": 1.4038090705871582, "learning_rate": 6.768475174583685e-06, "loss": 0.0957, "step": 4410 }, { "epoch": 0.06783823190852582, "grad_norm": 1.3298591375350952, "learning_rate": 6.783823190852583e-06, "loss": 0.0951, "step": 4420 }, { "epoch": 0.0679917120712148, "grad_norm": 1.1891769170761108, "learning_rate": 6.79917120712148e-06, "loss": 0.074, "step": 4430 }, { "epoch": 0.06814519223390376, "grad_norm": 1.2658945322036743, "learning_rate": 6.814519223390378e-06, "loss": 0.0828, "step": 4440 }, { "epoch": 0.06829867239659274, "grad_norm": 1.2014132738113403, "learning_rate": 6.8298672396592744e-06, "loss": 0.0796, "step": 4450 }, { "epoch": 0.0684521525592817, "grad_norm": 1.1691696643829346, "learning_rate": 6.845215255928171e-06, "loss": 0.092, "step": 4460 }, { "epoch": 0.06860563272197069, "grad_norm": 1.1197625398635864, "learning_rate": 6.86056327219707e-06, "loss": 0.0845, "step": 4470 }, { "epoch": 0.06875911288465966, "grad_norm": 1.3163182735443115, "learning_rate": 6.875911288465966e-06, "loss": 0.0873, "step": 4480 }, { "epoch": 0.06891259304734863, "grad_norm": 1.3578722476959229, "learning_rate": 6.891259304734863e-06, "loss": 0.1097, "step": 4490 }, { "epoch": 0.0690660732100376, "grad_norm": 1.1517294645309448, "learning_rate": 6.906607321003761e-06, "loss": 0.0982, "step": 4500 }, { "epoch": 0.06921955337272657, "grad_norm": 1.165242314338684, "learning_rate": 6.921955337272657e-06, "loss": 0.0885, "step": 4510 }, { "epoch": 0.06937303353541555, "grad_norm": 1.0456702709197998, "learning_rate": 6.937303353541556e-06, "loss": 0.0914, "step": 4520 }, { "epoch": 0.06952651369810452, "grad_norm": 1.0618247985839844, "learning_rate": 6.952651369810453e-06, "loss": 0.093, "step": 4530 }, { "epoch": 0.06967999386079349, "grad_norm": 1.031679391860962, "learning_rate": 6.967999386079349e-06, "loss": 0.0827, "step": 4540 }, { "epoch": 0.06983347402348246, "grad_norm": 1.4767476320266724, "learning_rate": 6.983347402348247e-06, "loss": 0.1033, "step": 4550 }, { "epoch": 0.06998695418617144, "grad_norm": 0.9844877123832703, "learning_rate": 6.998695418617144e-06, "loss": 0.0871, "step": 4560 }, { "epoch": 0.07014043434886041, "grad_norm": 1.0365450382232666, "learning_rate": 7.014043434886042e-06, "loss": 0.0893, "step": 4570 }, { "epoch": 0.07029391451154939, "grad_norm": 1.1837743520736694, "learning_rate": 7.029391451154939e-06, "loss": 0.0911, "step": 4580 }, { "epoch": 0.07044739467423836, "grad_norm": 0.8970546722412109, "learning_rate": 7.044739467423836e-06, "loss": 0.0924, "step": 4590 }, { "epoch": 0.07060087483692733, "grad_norm": 1.315337896347046, "learning_rate": 7.060087483692733e-06, "loss": 0.091, "step": 4600 }, { "epoch": 0.0707543549996163, "grad_norm": 1.1001358032226562, "learning_rate": 7.075435499961631e-06, "loss": 0.0908, "step": 4610 }, { "epoch": 0.07090783516230527, "grad_norm": 0.9866149425506592, "learning_rate": 7.090783516230528e-06, "loss": 0.0935, "step": 4620 }, { "epoch": 0.07106131532499424, "grad_norm": 1.3416001796722412, "learning_rate": 7.106131532499425e-06, "loss": 0.0806, "step": 4630 }, { "epoch": 0.07121479548768321, "grad_norm": 1.0118743181228638, "learning_rate": 7.121479548768322e-06, "loss": 0.0884, "step": 4640 }, { "epoch": 0.0713682756503722, "grad_norm": 0.9058448672294617, "learning_rate": 7.13682756503722e-06, "loss": 0.0961, "step": 4650 }, { "epoch": 0.07152175581306117, "grad_norm": 1.0034527778625488, "learning_rate": 7.152175581306117e-06, "loss": 0.1071, "step": 4660 }, { "epoch": 0.07167523597575014, "grad_norm": 1.465087890625, "learning_rate": 7.167523597575014e-06, "loss": 0.1002, "step": 4670 }, { "epoch": 0.07182871613843911, "grad_norm": 1.330031394958496, "learning_rate": 7.182871613843911e-06, "loss": 0.0904, "step": 4680 }, { "epoch": 0.07198219630112808, "grad_norm": 1.0354045629501343, "learning_rate": 7.198219630112808e-06, "loss": 0.0955, "step": 4690 }, { "epoch": 0.07213567646381705, "grad_norm": 1.0015677213668823, "learning_rate": 7.2135676463817065e-06, "loss": 0.0946, "step": 4700 }, { "epoch": 0.07228915662650602, "grad_norm": 0.9778575301170349, "learning_rate": 7.228915662650603e-06, "loss": 0.089, "step": 4710 }, { "epoch": 0.07244263678919499, "grad_norm": 1.190567970275879, "learning_rate": 7.2442636789195e-06, "loss": 0.0748, "step": 4720 }, { "epoch": 0.07259611695188396, "grad_norm": 0.8895498514175415, "learning_rate": 7.259611695188398e-06, "loss": 0.1034, "step": 4730 }, { "epoch": 0.07274959711457295, "grad_norm": 1.0148252248764038, "learning_rate": 7.274959711457294e-06, "loss": 0.091, "step": 4740 }, { "epoch": 0.07290307727726192, "grad_norm": 1.1185104846954346, "learning_rate": 7.290307727726193e-06, "loss": 0.0921, "step": 4750 }, { "epoch": 0.07305655743995089, "grad_norm": 1.1140360832214355, "learning_rate": 7.3056557439950895e-06, "loss": 0.0826, "step": 4760 }, { "epoch": 0.07321003760263986, "grad_norm": 1.247681736946106, "learning_rate": 7.321003760263986e-06, "loss": 0.0833, "step": 4770 }, { "epoch": 0.07336351776532883, "grad_norm": 1.358451008796692, "learning_rate": 7.336351776532884e-06, "loss": 0.1099, "step": 4780 }, { "epoch": 0.0735169979280178, "grad_norm": 1.4065901041030884, "learning_rate": 7.3516997928017806e-06, "loss": 0.0932, "step": 4790 }, { "epoch": 0.07367047809070677, "grad_norm": 1.428105354309082, "learning_rate": 7.367047809070679e-06, "loss": 0.0882, "step": 4800 }, { "epoch": 0.07382395825339574, "grad_norm": 0.8263379335403442, "learning_rate": 7.382395825339576e-06, "loss": 0.072, "step": 4810 }, { "epoch": 0.07397743841608471, "grad_norm": 1.1097097396850586, "learning_rate": 7.3977438416084725e-06, "loss": 0.084, "step": 4820 }, { "epoch": 0.0741309185787737, "grad_norm": 1.1234264373779297, "learning_rate": 7.41309185787737e-06, "loss": 0.0869, "step": 4830 }, { "epoch": 0.07428439874146267, "grad_norm": 1.3422046899795532, "learning_rate": 7.428439874146267e-06, "loss": 0.0804, "step": 4840 }, { "epoch": 0.07443787890415164, "grad_norm": 1.1448479890823364, "learning_rate": 7.4437878904151636e-06, "loss": 0.108, "step": 4850 }, { "epoch": 0.07459135906684061, "grad_norm": 1.1355334520339966, "learning_rate": 7.459135906684062e-06, "loss": 0.0778, "step": 4860 }, { "epoch": 0.07474483922952958, "grad_norm": 0.9865667819976807, "learning_rate": 7.474483922952959e-06, "loss": 0.0876, "step": 4870 }, { "epoch": 0.07489831939221855, "grad_norm": 1.1662262678146362, "learning_rate": 7.489831939221856e-06, "loss": 0.0851, "step": 4880 }, { "epoch": 0.07505179955490753, "grad_norm": 1.1472481489181519, "learning_rate": 7.505179955490753e-06, "loss": 0.0998, "step": 4890 }, { "epoch": 0.0752052797175965, "grad_norm": 1.1507887840270996, "learning_rate": 7.52052797175965e-06, "loss": 0.0702, "step": 4900 }, { "epoch": 0.07535875988028547, "grad_norm": 1.020796537399292, "learning_rate": 7.535875988028548e-06, "loss": 0.0813, "step": 4910 }, { "epoch": 0.07551224004297445, "grad_norm": 1.137057900428772, "learning_rate": 7.551224004297445e-06, "loss": 0.0846, "step": 4920 }, { "epoch": 0.07566572020566342, "grad_norm": 1.2756565809249878, "learning_rate": 7.566572020566343e-06, "loss": 0.0943, "step": 4930 }, { "epoch": 0.0758192003683524, "grad_norm": 1.1099493503570557, "learning_rate": 7.581920036835239e-06, "loss": 0.0852, "step": 4940 }, { "epoch": 0.07597268053104136, "grad_norm": 0.8876002430915833, "learning_rate": 7.597268053104137e-06, "loss": 0.0768, "step": 4950 }, { "epoch": 0.07612616069373034, "grad_norm": 0.9270637035369873, "learning_rate": 7.6126160693730345e-06, "loss": 0.0907, "step": 4960 }, { "epoch": 0.0762796408564193, "grad_norm": 0.9573376774787903, "learning_rate": 7.627964085641931e-06, "loss": 0.0859, "step": 4970 }, { "epoch": 0.07643312101910828, "grad_norm": 1.067784070968628, "learning_rate": 7.64331210191083e-06, "loss": 0.0768, "step": 4980 }, { "epoch": 0.07658660118179725, "grad_norm": 1.1576902866363525, "learning_rate": 7.658660118179726e-06, "loss": 0.0807, "step": 4990 }, { "epoch": 0.07674008134448622, "grad_norm": 0.9975094199180603, "learning_rate": 7.674008134448623e-06, "loss": 0.0784, "step": 5000 }, { "epoch": 0.0768935615071752, "grad_norm": 1.0330522060394287, "learning_rate": 7.68935615071752e-06, "loss": 0.0971, "step": 5010 }, { "epoch": 0.07704704166986417, "grad_norm": 1.3341906070709229, "learning_rate": 7.704704166986418e-06, "loss": 0.1079, "step": 5020 }, { "epoch": 0.07720052183255315, "grad_norm": 1.2068113088607788, "learning_rate": 7.720052183255315e-06, "loss": 0.086, "step": 5030 }, { "epoch": 0.07735400199524212, "grad_norm": 0.9557150602340698, "learning_rate": 7.735400199524212e-06, "loss": 0.0977, "step": 5040 }, { "epoch": 0.07750748215793109, "grad_norm": 0.9405173659324646, "learning_rate": 7.750748215793109e-06, "loss": 0.0831, "step": 5050 }, { "epoch": 0.07766096232062006, "grad_norm": 0.992763340473175, "learning_rate": 7.766096232062007e-06, "loss": 0.0898, "step": 5060 }, { "epoch": 0.07781444248330903, "grad_norm": 1.0196233987808228, "learning_rate": 7.781444248330904e-06, "loss": 0.0809, "step": 5070 }, { "epoch": 0.077967922645998, "grad_norm": 1.174096941947937, "learning_rate": 7.7967922645998e-06, "loss": 0.093, "step": 5080 }, { "epoch": 0.07812140280868697, "grad_norm": 1.1572060585021973, "learning_rate": 7.812140280868699e-06, "loss": 0.0942, "step": 5090 }, { "epoch": 0.07827488297137596, "grad_norm": 1.0891337394714355, "learning_rate": 7.827488297137596e-06, "loss": 0.0882, "step": 5100 }, { "epoch": 0.07842836313406493, "grad_norm": 1.1296318769454956, "learning_rate": 7.842836313406494e-06, "loss": 0.1021, "step": 5110 }, { "epoch": 0.0785818432967539, "grad_norm": 1.4641802310943604, "learning_rate": 7.85818432967539e-06, "loss": 0.0915, "step": 5120 }, { "epoch": 0.07873532345944287, "grad_norm": 0.9676700234413147, "learning_rate": 7.873532345944288e-06, "loss": 0.085, "step": 5130 }, { "epoch": 0.07888880362213184, "grad_norm": 1.134684681892395, "learning_rate": 7.888880362213184e-06, "loss": 0.0684, "step": 5140 }, { "epoch": 0.07904228378482081, "grad_norm": 0.7936806082725525, "learning_rate": 7.904228378482081e-06, "loss": 0.0865, "step": 5150 }, { "epoch": 0.07919576394750978, "grad_norm": 1.4251799583435059, "learning_rate": 7.91957639475098e-06, "loss": 0.0968, "step": 5160 }, { "epoch": 0.07934924411019875, "grad_norm": 1.3397012948989868, "learning_rate": 7.934924411019876e-06, "loss": 0.0869, "step": 5170 }, { "epoch": 0.07950272427288772, "grad_norm": 1.3125320672988892, "learning_rate": 7.950272427288773e-06, "loss": 0.0855, "step": 5180 }, { "epoch": 0.07965620443557671, "grad_norm": 1.0710276365280151, "learning_rate": 7.965620443557671e-06, "loss": 0.0876, "step": 5190 }, { "epoch": 0.07980968459826568, "grad_norm": 1.523543119430542, "learning_rate": 7.980968459826568e-06, "loss": 0.0916, "step": 5200 }, { "epoch": 0.07996316476095465, "grad_norm": 1.5700201988220215, "learning_rate": 7.996316476095465e-06, "loss": 0.0793, "step": 5210 }, { "epoch": 0.08011664492364362, "grad_norm": 1.2153816223144531, "learning_rate": 8.011664492364363e-06, "loss": 0.0598, "step": 5220 }, { "epoch": 0.08027012508633259, "grad_norm": 0.9479465484619141, "learning_rate": 8.02701250863326e-06, "loss": 0.0894, "step": 5230 }, { "epoch": 0.08042360524902156, "grad_norm": 1.1202898025512695, "learning_rate": 8.042360524902157e-06, "loss": 0.0862, "step": 5240 }, { "epoch": 0.08057708541171053, "grad_norm": 1.2105728387832642, "learning_rate": 8.057708541171054e-06, "loss": 0.0828, "step": 5250 }, { "epoch": 0.0807305655743995, "grad_norm": 0.9344853162765503, "learning_rate": 8.07305655743995e-06, "loss": 0.0882, "step": 5260 }, { "epoch": 0.08088404573708848, "grad_norm": 1.0036439895629883, "learning_rate": 8.088404573708849e-06, "loss": 0.0821, "step": 5270 }, { "epoch": 0.08103752589977746, "grad_norm": 1.0508109331130981, "learning_rate": 8.103752589977745e-06, "loss": 0.0958, "step": 5280 }, { "epoch": 0.08119100606246643, "grad_norm": 0.9089701771736145, "learning_rate": 8.119100606246644e-06, "loss": 0.0748, "step": 5290 }, { "epoch": 0.0813444862251554, "grad_norm": 1.0854467153549194, "learning_rate": 8.13444862251554e-06, "loss": 0.0896, "step": 5300 }, { "epoch": 0.08149796638784437, "grad_norm": 1.2402088642120361, "learning_rate": 8.149796638784437e-06, "loss": 0.0754, "step": 5310 }, { "epoch": 0.08165144655053334, "grad_norm": 1.1106442213058472, "learning_rate": 8.165144655053336e-06, "loss": 0.0865, "step": 5320 }, { "epoch": 0.08180492671322231, "grad_norm": 0.7158452272415161, "learning_rate": 8.180492671322233e-06, "loss": 0.085, "step": 5330 }, { "epoch": 0.08195840687591129, "grad_norm": 0.9951727986335754, "learning_rate": 8.19584068759113e-06, "loss": 0.0808, "step": 5340 }, { "epoch": 0.08211188703860026, "grad_norm": 0.920162558555603, "learning_rate": 8.211188703860026e-06, "loss": 0.0953, "step": 5350 }, { "epoch": 0.08226536720128923, "grad_norm": 1.6000343561172485, "learning_rate": 8.226536720128924e-06, "loss": 0.0735, "step": 5360 }, { "epoch": 0.08241884736397821, "grad_norm": 1.0663669109344482, "learning_rate": 8.241884736397821e-06, "loss": 0.0766, "step": 5370 }, { "epoch": 0.08257232752666718, "grad_norm": 1.150314450263977, "learning_rate": 8.257232752666718e-06, "loss": 0.0756, "step": 5380 }, { "epoch": 0.08272580768935615, "grad_norm": 1.0212706327438354, "learning_rate": 8.272580768935615e-06, "loss": 0.0854, "step": 5390 }, { "epoch": 0.08287928785204512, "grad_norm": 1.0229880809783936, "learning_rate": 8.287928785204513e-06, "loss": 0.0848, "step": 5400 }, { "epoch": 0.0830327680147341, "grad_norm": 1.3364893198013306, "learning_rate": 8.30327680147341e-06, "loss": 0.088, "step": 5410 }, { "epoch": 0.08318624817742307, "grad_norm": 0.8345897197723389, "learning_rate": 8.318624817742308e-06, "loss": 0.0724, "step": 5420 }, { "epoch": 0.08333972834011204, "grad_norm": 0.8147444725036621, "learning_rate": 8.333972834011205e-06, "loss": 0.0763, "step": 5430 }, { "epoch": 0.08349320850280101, "grad_norm": 1.1298997402191162, "learning_rate": 8.349320850280102e-06, "loss": 0.0871, "step": 5440 }, { "epoch": 0.08364668866548998, "grad_norm": 0.780101478099823, "learning_rate": 8.364668866549e-06, "loss": 0.0915, "step": 5450 }, { "epoch": 0.08380016882817896, "grad_norm": 1.0907055139541626, "learning_rate": 8.380016882817897e-06, "loss": 0.0814, "step": 5460 }, { "epoch": 0.08395364899086794, "grad_norm": 1.0227279663085938, "learning_rate": 8.395364899086794e-06, "loss": 0.0782, "step": 5470 }, { "epoch": 0.0841071291535569, "grad_norm": 1.1925398111343384, "learning_rate": 8.41071291535569e-06, "loss": 0.0908, "step": 5480 }, { "epoch": 0.08426060931624588, "grad_norm": 0.775211751461029, "learning_rate": 8.426060931624587e-06, "loss": 0.0778, "step": 5490 }, { "epoch": 0.08441408947893485, "grad_norm": 1.1386404037475586, "learning_rate": 8.441408947893486e-06, "loss": 0.081, "step": 5500 }, { "epoch": 0.08456756964162382, "grad_norm": 1.0260391235351562, "learning_rate": 8.456756964162382e-06, "loss": 0.105, "step": 5510 }, { "epoch": 0.08472104980431279, "grad_norm": 1.3268481492996216, "learning_rate": 8.47210498043128e-06, "loss": 0.093, "step": 5520 }, { "epoch": 0.08487452996700176, "grad_norm": 0.897160530090332, "learning_rate": 8.487452996700178e-06, "loss": 0.0773, "step": 5530 }, { "epoch": 0.08502801012969073, "grad_norm": 0.9914987683296204, "learning_rate": 8.502801012969074e-06, "loss": 0.0832, "step": 5540 }, { "epoch": 0.08518149029237972, "grad_norm": 1.153226613998413, "learning_rate": 8.518149029237973e-06, "loss": 0.0814, "step": 5550 }, { "epoch": 0.08533497045506869, "grad_norm": 0.7398780584335327, "learning_rate": 8.53349704550687e-06, "loss": 0.0699, "step": 5560 }, { "epoch": 0.08548845061775766, "grad_norm": 1.2880337238311768, "learning_rate": 8.548845061775766e-06, "loss": 0.0808, "step": 5570 }, { "epoch": 0.08564193078044663, "grad_norm": 0.8100732564926147, "learning_rate": 8.564193078044663e-06, "loss": 0.0783, "step": 5580 }, { "epoch": 0.0857954109431356, "grad_norm": 1.1180980205535889, "learning_rate": 8.57954109431356e-06, "loss": 0.0727, "step": 5590 }, { "epoch": 0.08594889110582457, "grad_norm": 1.3006776571273804, "learning_rate": 8.594889110582458e-06, "loss": 0.0778, "step": 5600 }, { "epoch": 0.08610237126851354, "grad_norm": 1.113943099975586, "learning_rate": 8.610237126851355e-06, "loss": 0.0765, "step": 5610 }, { "epoch": 0.08625585143120251, "grad_norm": 0.9968248009681702, "learning_rate": 8.625585143120252e-06, "loss": 0.0995, "step": 5620 }, { "epoch": 0.08640933159389148, "grad_norm": 1.2055003643035889, "learning_rate": 8.64093315938915e-06, "loss": 0.076, "step": 5630 }, { "epoch": 0.08656281175658047, "grad_norm": 1.6962981224060059, "learning_rate": 8.656281175658047e-06, "loss": 0.0926, "step": 5640 }, { "epoch": 0.08671629191926944, "grad_norm": 1.1858810186386108, "learning_rate": 8.671629191926945e-06, "loss": 0.084, "step": 5650 }, { "epoch": 0.08686977208195841, "grad_norm": 1.1668776273727417, "learning_rate": 8.686977208195842e-06, "loss": 0.0839, "step": 5660 }, { "epoch": 0.08702325224464738, "grad_norm": 1.1951349973678589, "learning_rate": 8.702325224464739e-06, "loss": 0.0889, "step": 5670 }, { "epoch": 0.08717673240733635, "grad_norm": 0.8158345818519592, "learning_rate": 8.717673240733635e-06, "loss": 0.0786, "step": 5680 }, { "epoch": 0.08733021257002532, "grad_norm": 1.0713223218917847, "learning_rate": 8.733021257002532e-06, "loss": 0.0787, "step": 5690 }, { "epoch": 0.0874836927327143, "grad_norm": 1.3191574811935425, "learning_rate": 8.74836927327143e-06, "loss": 0.0968, "step": 5700 }, { "epoch": 0.08763717289540326, "grad_norm": 1.097015142440796, "learning_rate": 8.763717289540327e-06, "loss": 0.0779, "step": 5710 }, { "epoch": 0.08779065305809224, "grad_norm": 1.2582488059997559, "learning_rate": 8.779065305809224e-06, "loss": 0.0933, "step": 5720 }, { "epoch": 0.08794413322078122, "grad_norm": 1.2119826078414917, "learning_rate": 8.794413322078123e-06, "loss": 0.0807, "step": 5730 }, { "epoch": 0.08809761338347019, "grad_norm": 1.4127001762390137, "learning_rate": 8.80976133834702e-06, "loss": 0.0893, "step": 5740 }, { "epoch": 0.08825109354615916, "grad_norm": 1.2266443967819214, "learning_rate": 8.825109354615916e-06, "loss": 0.097, "step": 5750 }, { "epoch": 0.08840457370884813, "grad_norm": 1.2878806591033936, "learning_rate": 8.840457370884814e-06, "loss": 0.0812, "step": 5760 }, { "epoch": 0.0885580538715371, "grad_norm": 0.8695161938667297, "learning_rate": 8.855805387153711e-06, "loss": 0.0866, "step": 5770 }, { "epoch": 0.08871153403422607, "grad_norm": 1.1012868881225586, "learning_rate": 8.871153403422608e-06, "loss": 0.0795, "step": 5780 }, { "epoch": 0.08886501419691505, "grad_norm": 1.3578765392303467, "learning_rate": 8.886501419691506e-06, "loss": 0.0899, "step": 5790 }, { "epoch": 0.08901849435960402, "grad_norm": 1.3999619483947754, "learning_rate": 8.901849435960403e-06, "loss": 0.0939, "step": 5800 }, { "epoch": 0.08917197452229299, "grad_norm": 1.3902592658996582, "learning_rate": 8.9171974522293e-06, "loss": 0.0916, "step": 5810 }, { "epoch": 0.08932545468498197, "grad_norm": 1.4189187288284302, "learning_rate": 8.932545468498197e-06, "loss": 0.0845, "step": 5820 }, { "epoch": 0.08947893484767094, "grad_norm": 1.0488817691802979, "learning_rate": 8.947893484767095e-06, "loss": 0.0857, "step": 5830 }, { "epoch": 0.08963241501035991, "grad_norm": 1.2832555770874023, "learning_rate": 8.963241501035992e-06, "loss": 0.0893, "step": 5840 }, { "epoch": 0.08978589517304889, "grad_norm": 0.8592072129249573, "learning_rate": 8.978589517304889e-06, "loss": 0.0668, "step": 5850 }, { "epoch": 0.08993937533573786, "grad_norm": 0.9430562853813171, "learning_rate": 8.993937533573787e-06, "loss": 0.0834, "step": 5860 }, { "epoch": 0.09009285549842683, "grad_norm": 1.1253242492675781, "learning_rate": 9.009285549842684e-06, "loss": 0.0914, "step": 5870 }, { "epoch": 0.0902463356611158, "grad_norm": 1.1564209461212158, "learning_rate": 9.024633566111582e-06, "loss": 0.0761, "step": 5880 }, { "epoch": 0.09039981582380477, "grad_norm": 0.886978268623352, "learning_rate": 9.039981582380479e-06, "loss": 0.0723, "step": 5890 }, { "epoch": 0.09055329598649374, "grad_norm": 0.8420110940933228, "learning_rate": 9.055329598649376e-06, "loss": 0.0711, "step": 5900 }, { "epoch": 0.09070677614918272, "grad_norm": 0.956099808216095, "learning_rate": 9.070677614918272e-06, "loss": 0.075, "step": 5910 }, { "epoch": 0.0908602563118717, "grad_norm": 1.0810353755950928, "learning_rate": 9.086025631187169e-06, "loss": 0.0801, "step": 5920 }, { "epoch": 0.09101373647456067, "grad_norm": 0.9583218693733215, "learning_rate": 9.101373647456066e-06, "loss": 0.0699, "step": 5930 }, { "epoch": 0.09116721663724964, "grad_norm": 1.1360455751419067, "learning_rate": 9.116721663724964e-06, "loss": 0.0875, "step": 5940 }, { "epoch": 0.09132069679993861, "grad_norm": 1.1639158725738525, "learning_rate": 9.132069679993861e-06, "loss": 0.0901, "step": 5950 }, { "epoch": 0.09147417696262758, "grad_norm": 0.7401775121688843, "learning_rate": 9.14741769626276e-06, "loss": 0.0803, "step": 5960 }, { "epoch": 0.09162765712531655, "grad_norm": 0.855141818523407, "learning_rate": 9.162765712531656e-06, "loss": 0.0751, "step": 5970 }, { "epoch": 0.09178113728800552, "grad_norm": 1.1354702711105347, "learning_rate": 9.178113728800553e-06, "loss": 0.0935, "step": 5980 }, { "epoch": 0.09193461745069449, "grad_norm": 1.0801841020584106, "learning_rate": 9.193461745069451e-06, "loss": 0.0837, "step": 5990 }, { "epoch": 0.09208809761338348, "grad_norm": 1.064200520515442, "learning_rate": 9.208809761338348e-06, "loss": 0.0713, "step": 6000 }, { "epoch": 0.09224157777607245, "grad_norm": 1.2896076440811157, "learning_rate": 9.224157777607245e-06, "loss": 0.096, "step": 6010 }, { "epoch": 0.09239505793876142, "grad_norm": 1.1549102067947388, "learning_rate": 9.239505793876142e-06, "loss": 0.0848, "step": 6020 }, { "epoch": 0.09254853810145039, "grad_norm": 0.8834503889083862, "learning_rate": 9.254853810145038e-06, "loss": 0.0897, "step": 6030 }, { "epoch": 0.09270201826413936, "grad_norm": 1.040113091468811, "learning_rate": 9.270201826413937e-06, "loss": 0.0854, "step": 6040 }, { "epoch": 0.09285549842682833, "grad_norm": 1.186586618423462, "learning_rate": 9.285549842682834e-06, "loss": 0.0894, "step": 6050 }, { "epoch": 0.0930089785895173, "grad_norm": 0.8028474450111389, "learning_rate": 9.30089785895173e-06, "loss": 0.0817, "step": 6060 }, { "epoch": 0.09316245875220627, "grad_norm": 1.096925973892212, "learning_rate": 9.316245875220629e-06, "loss": 0.074, "step": 6070 }, { "epoch": 0.09331593891489524, "grad_norm": 1.1151872873306274, "learning_rate": 9.331593891489525e-06, "loss": 0.086, "step": 6080 }, { "epoch": 0.09346941907758423, "grad_norm": 0.9791191220283508, "learning_rate": 9.346941907758424e-06, "loss": 0.0868, "step": 6090 }, { "epoch": 0.0936228992402732, "grad_norm": 0.8215442895889282, "learning_rate": 9.36228992402732e-06, "loss": 0.0752, "step": 6100 }, { "epoch": 0.09377637940296217, "grad_norm": 0.876791775226593, "learning_rate": 9.377637940296217e-06, "loss": 0.0769, "step": 6110 }, { "epoch": 0.09392985956565114, "grad_norm": 0.7858976125717163, "learning_rate": 9.392985956565114e-06, "loss": 0.0784, "step": 6120 }, { "epoch": 0.09408333972834011, "grad_norm": 1.3369766473770142, "learning_rate": 9.408333972834013e-06, "loss": 0.0859, "step": 6130 }, { "epoch": 0.09423681989102908, "grad_norm": 1.1611775159835815, "learning_rate": 9.42368198910291e-06, "loss": 0.0859, "step": 6140 }, { "epoch": 0.09439030005371805, "grad_norm": 1.3163360357284546, "learning_rate": 9.439030005371806e-06, "loss": 0.0863, "step": 6150 }, { "epoch": 0.09454378021640703, "grad_norm": 1.1270910501480103, "learning_rate": 9.454378021640703e-06, "loss": 0.0869, "step": 6160 }, { "epoch": 0.094697260379096, "grad_norm": 1.210292935371399, "learning_rate": 9.469726037909601e-06, "loss": 0.0858, "step": 6170 }, { "epoch": 0.09485074054178498, "grad_norm": 1.1022086143493652, "learning_rate": 9.485074054178498e-06, "loss": 0.0879, "step": 6180 }, { "epoch": 0.09500422070447395, "grad_norm": 1.0294594764709473, "learning_rate": 9.500422070447396e-06, "loss": 0.0898, "step": 6190 }, { "epoch": 0.09515770086716292, "grad_norm": 0.9314491748809814, "learning_rate": 9.515770086716293e-06, "loss": 0.0768, "step": 6200 }, { "epoch": 0.0953111810298519, "grad_norm": 1.1369656324386597, "learning_rate": 9.53111810298519e-06, "loss": 0.073, "step": 6210 }, { "epoch": 0.09546466119254086, "grad_norm": 0.9877233505249023, "learning_rate": 9.546466119254088e-06, "loss": 0.0796, "step": 6220 }, { "epoch": 0.09561814135522984, "grad_norm": 0.9015820026397705, "learning_rate": 9.561814135522985e-06, "loss": 0.086, "step": 6230 }, { "epoch": 0.0957716215179188, "grad_norm": 1.054566502571106, "learning_rate": 9.577162151791882e-06, "loss": 0.0881, "step": 6240 }, { "epoch": 0.09592510168060778, "grad_norm": 1.0594143867492676, "learning_rate": 9.592510168060779e-06, "loss": 0.0735, "step": 6250 }, { "epoch": 0.09607858184329675, "grad_norm": 1.0548663139343262, "learning_rate": 9.607858184329675e-06, "loss": 0.091, "step": 6260 }, { "epoch": 0.09623206200598573, "grad_norm": 1.1138807535171509, "learning_rate": 9.623206200598574e-06, "loss": 0.0758, "step": 6270 }, { "epoch": 0.0963855421686747, "grad_norm": 0.8302594423294067, "learning_rate": 9.63855421686747e-06, "loss": 0.0875, "step": 6280 }, { "epoch": 0.09653902233136367, "grad_norm": 0.6653923392295837, "learning_rate": 9.653902233136367e-06, "loss": 0.0735, "step": 6290 }, { "epoch": 0.09669250249405265, "grad_norm": 0.8282012343406677, "learning_rate": 9.669250249405266e-06, "loss": 0.077, "step": 6300 }, { "epoch": 0.09684598265674162, "grad_norm": 0.9259793162345886, "learning_rate": 9.684598265674162e-06, "loss": 0.078, "step": 6310 }, { "epoch": 0.09699946281943059, "grad_norm": 1.4402546882629395, "learning_rate": 9.69994628194306e-06, "loss": 0.0684, "step": 6320 }, { "epoch": 0.09715294298211956, "grad_norm": 1.0487451553344727, "learning_rate": 9.715294298211958e-06, "loss": 0.0793, "step": 6330 }, { "epoch": 0.09730642314480853, "grad_norm": 1.0916541814804077, "learning_rate": 9.730642314480854e-06, "loss": 0.0975, "step": 6340 }, { "epoch": 0.0974599033074975, "grad_norm": 1.3338851928710938, "learning_rate": 9.745990330749751e-06, "loss": 0.0755, "step": 6350 }, { "epoch": 0.09761338347018648, "grad_norm": 0.7155436277389526, "learning_rate": 9.761338347018648e-06, "loss": 0.0782, "step": 6360 }, { "epoch": 0.09776686363287546, "grad_norm": 1.3812159299850464, "learning_rate": 9.776686363287546e-06, "loss": 0.0916, "step": 6370 }, { "epoch": 0.09792034379556443, "grad_norm": 1.2953428030014038, "learning_rate": 9.792034379556443e-06, "loss": 0.0786, "step": 6380 }, { "epoch": 0.0980738239582534, "grad_norm": 0.8840582370758057, "learning_rate": 9.80738239582534e-06, "loss": 0.0867, "step": 6390 }, { "epoch": 0.09822730412094237, "grad_norm": 1.1668522357940674, "learning_rate": 9.822730412094238e-06, "loss": 0.0791, "step": 6400 }, { "epoch": 0.09838078428363134, "grad_norm": 0.9066705107688904, "learning_rate": 9.838078428363135e-06, "loss": 0.0824, "step": 6410 }, { "epoch": 0.09853426444632031, "grad_norm": 0.9702092409133911, "learning_rate": 9.853426444632032e-06, "loss": 0.0695, "step": 6420 }, { "epoch": 0.09868774460900928, "grad_norm": 0.9688230752944946, "learning_rate": 9.86877446090093e-06, "loss": 0.0709, "step": 6430 }, { "epoch": 0.09884122477169825, "grad_norm": 1.2928184270858765, "learning_rate": 9.884122477169827e-06, "loss": 0.0757, "step": 6440 }, { "epoch": 0.09899470493438724, "grad_norm": 1.3396764993667603, "learning_rate": 9.899470493438724e-06, "loss": 0.0972, "step": 6450 }, { "epoch": 0.09914818509707621, "grad_norm": 0.7583783864974976, "learning_rate": 9.91481850970762e-06, "loss": 0.073, "step": 6460 }, { "epoch": 0.09930166525976518, "grad_norm": 0.8398375511169434, "learning_rate": 9.930166525976519e-06, "loss": 0.0868, "step": 6470 }, { "epoch": 0.09945514542245415, "grad_norm": 0.9329052567481995, "learning_rate": 9.945514542245415e-06, "loss": 0.0844, "step": 6480 }, { "epoch": 0.09960862558514312, "grad_norm": 1.0186718702316284, "learning_rate": 9.960862558514312e-06, "loss": 0.0638, "step": 6490 }, { "epoch": 0.09976210574783209, "grad_norm": 1.3519141674041748, "learning_rate": 9.97621057478321e-06, "loss": 0.0838, "step": 6500 }, { "epoch": 0.09991558591052106, "grad_norm": 0.5548356175422668, "learning_rate": 9.991558591052107e-06, "loss": 0.0831, "step": 6510 }, { "epoch": 0.10006906607321003, "grad_norm": 0.807719349861145, "learning_rate": 1.0006906607321006e-05, "loss": 0.0882, "step": 6520 }, { "epoch": 0.100222546235899, "grad_norm": 0.948028028011322, "learning_rate": 1.0022254623589903e-05, "loss": 0.0804, "step": 6530 }, { "epoch": 0.10037602639858799, "grad_norm": 1.1676459312438965, "learning_rate": 1.00376026398588e-05, "loss": 0.0831, "step": 6540 }, { "epoch": 0.10052950656127696, "grad_norm": 0.7746410369873047, "learning_rate": 1.0052950656127696e-05, "loss": 0.0806, "step": 6550 }, { "epoch": 0.10068298672396593, "grad_norm": 0.8997694253921509, "learning_rate": 1.0068298672396593e-05, "loss": 0.0748, "step": 6560 }, { "epoch": 0.1008364668866549, "grad_norm": 1.034250020980835, "learning_rate": 1.0083646688665491e-05, "loss": 0.0791, "step": 6570 }, { "epoch": 0.10098994704934387, "grad_norm": 1.0795247554779053, "learning_rate": 1.0098994704934388e-05, "loss": 0.0774, "step": 6580 }, { "epoch": 0.10114342721203284, "grad_norm": 0.9715172648429871, "learning_rate": 1.0114342721203285e-05, "loss": 0.0761, "step": 6590 }, { "epoch": 0.10129690737472181, "grad_norm": 0.8782834410667419, "learning_rate": 1.0129690737472181e-05, "loss": 0.0792, "step": 6600 }, { "epoch": 0.10145038753741079, "grad_norm": 0.9008817672729492, "learning_rate": 1.0145038753741078e-05, "loss": 0.0868, "step": 6610 }, { "epoch": 0.10160386770009976, "grad_norm": 0.8957838416099548, "learning_rate": 1.0160386770009978e-05, "loss": 0.0762, "step": 6620 }, { "epoch": 0.10175734786278874, "grad_norm": 0.8579633235931396, "learning_rate": 1.0175734786278875e-05, "loss": 0.0732, "step": 6630 }, { "epoch": 0.10191082802547771, "grad_norm": 1.1616649627685547, "learning_rate": 1.0191082802547772e-05, "loss": 0.0724, "step": 6640 }, { "epoch": 0.10206430818816668, "grad_norm": 0.8640633225440979, "learning_rate": 1.0206430818816669e-05, "loss": 0.0771, "step": 6650 }, { "epoch": 0.10221778835085565, "grad_norm": 0.771166980266571, "learning_rate": 1.0221778835085565e-05, "loss": 0.0901, "step": 6660 }, { "epoch": 0.10237126851354462, "grad_norm": 1.36296808719635, "learning_rate": 1.0237126851354464e-05, "loss": 0.08, "step": 6670 }, { "epoch": 0.1025247486762336, "grad_norm": 0.7982727289199829, "learning_rate": 1.025247486762336e-05, "loss": 0.0841, "step": 6680 }, { "epoch": 0.10267822883892257, "grad_norm": 0.9521018862724304, "learning_rate": 1.0267822883892257e-05, "loss": 0.0732, "step": 6690 }, { "epoch": 0.10283170900161154, "grad_norm": 1.1441330909729004, "learning_rate": 1.0283170900161154e-05, "loss": 0.0801, "step": 6700 }, { "epoch": 0.10298518916430051, "grad_norm": 0.7831258177757263, "learning_rate": 1.029851891643005e-05, "loss": 0.0728, "step": 6710 }, { "epoch": 0.1031386693269895, "grad_norm": 0.9594160318374634, "learning_rate": 1.031386693269895e-05, "loss": 0.0811, "step": 6720 }, { "epoch": 0.10329214948967846, "grad_norm": 1.350459337234497, "learning_rate": 1.0329214948967848e-05, "loss": 0.0962, "step": 6730 }, { "epoch": 0.10344562965236744, "grad_norm": 0.9105786681175232, "learning_rate": 1.0344562965236744e-05, "loss": 0.0669, "step": 6740 }, { "epoch": 0.1035991098150564, "grad_norm": 1.0724916458129883, "learning_rate": 1.0359910981505641e-05, "loss": 0.0824, "step": 6750 }, { "epoch": 0.10375258997774538, "grad_norm": 0.9183483719825745, "learning_rate": 1.0375258997774538e-05, "loss": 0.0816, "step": 6760 }, { "epoch": 0.10390607014043435, "grad_norm": 1.172400712966919, "learning_rate": 1.0390607014043436e-05, "loss": 0.0892, "step": 6770 }, { "epoch": 0.10405955030312332, "grad_norm": 1.1495692729949951, "learning_rate": 1.0405955030312333e-05, "loss": 0.0649, "step": 6780 }, { "epoch": 0.10421303046581229, "grad_norm": 0.8838276267051697, "learning_rate": 1.042130304658123e-05, "loss": 0.0766, "step": 6790 }, { "epoch": 0.10436651062850126, "grad_norm": 0.6142831444740295, "learning_rate": 1.0436651062850126e-05, "loss": 0.0777, "step": 6800 }, { "epoch": 0.10451999079119025, "grad_norm": 1.3141515254974365, "learning_rate": 1.0451999079119025e-05, "loss": 0.0781, "step": 6810 }, { "epoch": 0.10467347095387922, "grad_norm": 1.197521448135376, "learning_rate": 1.0467347095387923e-05, "loss": 0.0749, "step": 6820 }, { "epoch": 0.10482695111656819, "grad_norm": 0.9428767561912537, "learning_rate": 1.048269511165682e-05, "loss": 0.0937, "step": 6830 }, { "epoch": 0.10498043127925716, "grad_norm": 1.0319017171859741, "learning_rate": 1.0498043127925717e-05, "loss": 0.0959, "step": 6840 }, { "epoch": 0.10513391144194613, "grad_norm": 0.9993795156478882, "learning_rate": 1.0513391144194614e-05, "loss": 0.0764, "step": 6850 }, { "epoch": 0.1052873916046351, "grad_norm": 1.020858883857727, "learning_rate": 1.052873916046351e-05, "loss": 0.0917, "step": 6860 }, { "epoch": 0.10544087176732407, "grad_norm": 1.0313503742218018, "learning_rate": 1.0544087176732407e-05, "loss": 0.0918, "step": 6870 }, { "epoch": 0.10559435193001304, "grad_norm": 0.9963934421539307, "learning_rate": 1.0559435193001305e-05, "loss": 0.0775, "step": 6880 }, { "epoch": 0.10574783209270201, "grad_norm": 1.0902518033981323, "learning_rate": 1.0574783209270202e-05, "loss": 0.08, "step": 6890 }, { "epoch": 0.105901312255391, "grad_norm": 1.2796008586883545, "learning_rate": 1.05901312255391e-05, "loss": 0.0901, "step": 6900 }, { "epoch": 0.10605479241807997, "grad_norm": 0.9293486475944519, "learning_rate": 1.0605479241807997e-05, "loss": 0.0683, "step": 6910 }, { "epoch": 0.10620827258076894, "grad_norm": 1.1560328006744385, "learning_rate": 1.0620827258076894e-05, "loss": 0.0847, "step": 6920 }, { "epoch": 0.10636175274345791, "grad_norm": 0.9879751801490784, "learning_rate": 1.0636175274345793e-05, "loss": 0.085, "step": 6930 }, { "epoch": 0.10651523290614688, "grad_norm": 0.8664377927780151, "learning_rate": 1.065152329061469e-05, "loss": 0.0708, "step": 6940 }, { "epoch": 0.10666871306883585, "grad_norm": 1.0449903011322021, "learning_rate": 1.0666871306883586e-05, "loss": 0.0894, "step": 6950 }, { "epoch": 0.10682219323152482, "grad_norm": 0.7020130157470703, "learning_rate": 1.0682219323152483e-05, "loss": 0.0873, "step": 6960 }, { "epoch": 0.1069756733942138, "grad_norm": 1.110740065574646, "learning_rate": 1.069756733942138e-05, "loss": 0.0727, "step": 6970 }, { "epoch": 0.10712915355690276, "grad_norm": 0.8716080784797668, "learning_rate": 1.0712915355690278e-05, "loss": 0.0849, "step": 6980 }, { "epoch": 0.10728263371959175, "grad_norm": 0.7980071902275085, "learning_rate": 1.0728263371959176e-05, "loss": 0.0814, "step": 6990 }, { "epoch": 0.10743611388228072, "grad_norm": 0.9923562407493591, "learning_rate": 1.0743611388228073e-05, "loss": 0.0795, "step": 7000 }, { "epoch": 0.10758959404496969, "grad_norm": 0.9270330667495728, "learning_rate": 1.075895940449697e-05, "loss": 0.0865, "step": 7010 }, { "epoch": 0.10774307420765866, "grad_norm": 0.897240161895752, "learning_rate": 1.0774307420765867e-05, "loss": 0.078, "step": 7020 }, { "epoch": 0.10789655437034763, "grad_norm": 0.8505697250366211, "learning_rate": 1.0789655437034765e-05, "loss": 0.0673, "step": 7030 }, { "epoch": 0.1080500345330366, "grad_norm": 0.9496402144432068, "learning_rate": 1.0805003453303662e-05, "loss": 0.0776, "step": 7040 }, { "epoch": 0.10820351469572558, "grad_norm": 0.8003101348876953, "learning_rate": 1.0820351469572559e-05, "loss": 0.0786, "step": 7050 }, { "epoch": 0.10835699485841455, "grad_norm": 0.8710169196128845, "learning_rate": 1.0835699485841455e-05, "loss": 0.0709, "step": 7060 }, { "epoch": 0.10851047502110352, "grad_norm": 1.0021322965621948, "learning_rate": 1.0851047502110352e-05, "loss": 0.0819, "step": 7070 }, { "epoch": 0.1086639551837925, "grad_norm": 0.7422587871551514, "learning_rate": 1.0866395518379252e-05, "loss": 0.0854, "step": 7080 }, { "epoch": 0.10881743534648147, "grad_norm": 1.0030944347381592, "learning_rate": 1.0881743534648149e-05, "loss": 0.0749, "step": 7090 }, { "epoch": 0.10897091550917044, "grad_norm": 1.0156457424163818, "learning_rate": 1.0897091550917046e-05, "loss": 0.0752, "step": 7100 }, { "epoch": 0.10912439567185941, "grad_norm": 0.8428593277931213, "learning_rate": 1.0912439567185942e-05, "loss": 0.0953, "step": 7110 }, { "epoch": 0.10927787583454839, "grad_norm": 0.987329363822937, "learning_rate": 1.0927787583454839e-05, "loss": 0.1034, "step": 7120 }, { "epoch": 0.10943135599723736, "grad_norm": 1.0745749473571777, "learning_rate": 1.0943135599723738e-05, "loss": 0.0822, "step": 7130 }, { "epoch": 0.10958483615992633, "grad_norm": 1.1566061973571777, "learning_rate": 1.0958483615992634e-05, "loss": 0.0893, "step": 7140 }, { "epoch": 0.1097383163226153, "grad_norm": 1.0102194547653198, "learning_rate": 1.0973831632261531e-05, "loss": 0.0788, "step": 7150 }, { "epoch": 0.10989179648530427, "grad_norm": 1.0560591220855713, "learning_rate": 1.0989179648530428e-05, "loss": 0.0717, "step": 7160 }, { "epoch": 0.11004527664799325, "grad_norm": 1.3635210990905762, "learning_rate": 1.1004527664799324e-05, "loss": 0.0855, "step": 7170 }, { "epoch": 0.11019875681068222, "grad_norm": 1.0359492301940918, "learning_rate": 1.1019875681068225e-05, "loss": 0.0854, "step": 7180 }, { "epoch": 0.1103522369733712, "grad_norm": 1.0500699281692505, "learning_rate": 1.1035223697337121e-05, "loss": 0.0892, "step": 7190 }, { "epoch": 0.11050571713606017, "grad_norm": 0.9632185101509094, "learning_rate": 1.1050571713606018e-05, "loss": 0.0682, "step": 7200 }, { "epoch": 0.11065919729874914, "grad_norm": 0.9848228693008423, "learning_rate": 1.1065919729874915e-05, "loss": 0.0985, "step": 7210 }, { "epoch": 0.11081267746143811, "grad_norm": 1.1506972312927246, "learning_rate": 1.1081267746143812e-05, "loss": 0.0613, "step": 7220 }, { "epoch": 0.11096615762412708, "grad_norm": 1.0171711444854736, "learning_rate": 1.1096615762412708e-05, "loss": 0.0861, "step": 7230 }, { "epoch": 0.11111963778681605, "grad_norm": 1.1050505638122559, "learning_rate": 1.1111963778681607e-05, "loss": 0.0734, "step": 7240 }, { "epoch": 0.11127311794950502, "grad_norm": 0.7892197370529175, "learning_rate": 1.1127311794950503e-05, "loss": 0.073, "step": 7250 }, { "epoch": 0.111426598112194, "grad_norm": 0.7784057855606079, "learning_rate": 1.11426598112194e-05, "loss": 0.0601, "step": 7260 }, { "epoch": 0.11158007827488298, "grad_norm": 1.2113091945648193, "learning_rate": 1.1158007827488297e-05, "loss": 0.0615, "step": 7270 }, { "epoch": 0.11173355843757195, "grad_norm": 0.8243152499198914, "learning_rate": 1.1173355843757194e-05, "loss": 0.0799, "step": 7280 }, { "epoch": 0.11188703860026092, "grad_norm": 0.8881662487983704, "learning_rate": 1.1188703860026094e-05, "loss": 0.0676, "step": 7290 }, { "epoch": 0.11204051876294989, "grad_norm": 1.153313398361206, "learning_rate": 1.120405187629499e-05, "loss": 0.0892, "step": 7300 }, { "epoch": 0.11219399892563886, "grad_norm": 0.7097883224487305, "learning_rate": 1.1219399892563887e-05, "loss": 0.0762, "step": 7310 }, { "epoch": 0.11234747908832783, "grad_norm": 0.7689018845558167, "learning_rate": 1.1234747908832784e-05, "loss": 0.0787, "step": 7320 }, { "epoch": 0.1125009592510168, "grad_norm": 1.0849533081054688, "learning_rate": 1.125009592510168e-05, "loss": 0.0876, "step": 7330 }, { "epoch": 0.11265443941370577, "grad_norm": 0.9652071595191956, "learning_rate": 1.126544394137058e-05, "loss": 0.0886, "step": 7340 }, { "epoch": 0.11280791957639476, "grad_norm": 0.9077328443527222, "learning_rate": 1.1280791957639476e-05, "loss": 0.0843, "step": 7350 }, { "epoch": 0.11296139973908373, "grad_norm": 1.038831353187561, "learning_rate": 1.1296139973908373e-05, "loss": 0.0776, "step": 7360 }, { "epoch": 0.1131148799017727, "grad_norm": 1.0259711742401123, "learning_rate": 1.131148799017727e-05, "loss": 0.0878, "step": 7370 }, { "epoch": 0.11326836006446167, "grad_norm": 1.0377516746520996, "learning_rate": 1.1326836006446166e-05, "loss": 0.085, "step": 7380 }, { "epoch": 0.11342184022715064, "grad_norm": 1.7910473346710205, "learning_rate": 1.1342184022715066e-05, "loss": 0.0677, "step": 7390 }, { "epoch": 0.11357532038983961, "grad_norm": 1.0436664819717407, "learning_rate": 1.1357532038983963e-05, "loss": 0.0697, "step": 7400 }, { "epoch": 0.11372880055252858, "grad_norm": 0.8054331541061401, "learning_rate": 1.137288005525286e-05, "loss": 0.0708, "step": 7410 }, { "epoch": 0.11388228071521755, "grad_norm": 0.9409052133560181, "learning_rate": 1.1388228071521757e-05, "loss": 0.0745, "step": 7420 }, { "epoch": 0.11403576087790653, "grad_norm": 1.0173065662384033, "learning_rate": 1.1403576087790653e-05, "loss": 0.0838, "step": 7430 }, { "epoch": 0.11418924104059551, "grad_norm": 1.1005812883377075, "learning_rate": 1.1418924104059552e-05, "loss": 0.0769, "step": 7440 }, { "epoch": 0.11434272120328448, "grad_norm": 0.8716233372688293, "learning_rate": 1.1434272120328448e-05, "loss": 0.0704, "step": 7450 }, { "epoch": 0.11449620136597345, "grad_norm": 0.9281632304191589, "learning_rate": 1.1449620136597345e-05, "loss": 0.0685, "step": 7460 }, { "epoch": 0.11464968152866242, "grad_norm": 1.0315016508102417, "learning_rate": 1.1464968152866242e-05, "loss": 0.0768, "step": 7470 }, { "epoch": 0.1148031616913514, "grad_norm": 0.6771824955940247, "learning_rate": 1.1480316169135139e-05, "loss": 0.0739, "step": 7480 }, { "epoch": 0.11495664185404036, "grad_norm": 1.3154263496398926, "learning_rate": 1.1495664185404039e-05, "loss": 0.0829, "step": 7490 }, { "epoch": 0.11511012201672934, "grad_norm": 1.0994634628295898, "learning_rate": 1.1511012201672936e-05, "loss": 0.0718, "step": 7500 }, { "epoch": 0.1152636021794183, "grad_norm": 0.9881306290626526, "learning_rate": 1.1526360217941832e-05, "loss": 0.0654, "step": 7510 }, { "epoch": 0.11541708234210728, "grad_norm": 0.7254807949066162, "learning_rate": 1.1541708234210729e-05, "loss": 0.0753, "step": 7520 }, { "epoch": 0.11557056250479625, "grad_norm": 1.083019495010376, "learning_rate": 1.1557056250479626e-05, "loss": 0.0716, "step": 7530 }, { "epoch": 0.11572404266748523, "grad_norm": 0.9019991755485535, "learning_rate": 1.1572404266748524e-05, "loss": 0.0725, "step": 7540 }, { "epoch": 0.1158775228301742, "grad_norm": 0.9303022623062134, "learning_rate": 1.1587752283017421e-05, "loss": 0.0675, "step": 7550 }, { "epoch": 0.11603100299286317, "grad_norm": 1.127816915512085, "learning_rate": 1.1603100299286318e-05, "loss": 0.081, "step": 7560 }, { "epoch": 0.11618448315555215, "grad_norm": 0.7207096815109253, "learning_rate": 1.1618448315555214e-05, "loss": 0.0705, "step": 7570 }, { "epoch": 0.11633796331824112, "grad_norm": 0.944913387298584, "learning_rate": 1.1633796331824111e-05, "loss": 0.0827, "step": 7580 }, { "epoch": 0.11649144348093009, "grad_norm": 1.2718020677566528, "learning_rate": 1.164914434809301e-05, "loss": 0.0788, "step": 7590 }, { "epoch": 0.11664492364361906, "grad_norm": 0.9744902849197388, "learning_rate": 1.1664492364361908e-05, "loss": 0.0689, "step": 7600 }, { "epoch": 0.11679840380630803, "grad_norm": 0.7635206580162048, "learning_rate": 1.1679840380630805e-05, "loss": 0.0767, "step": 7610 }, { "epoch": 0.116951883968997, "grad_norm": 0.8637260794639587, "learning_rate": 1.1695188396899702e-05, "loss": 0.0588, "step": 7620 }, { "epoch": 0.11710536413168599, "grad_norm": 0.7053728699684143, "learning_rate": 1.1710536413168598e-05, "loss": 0.0581, "step": 7630 }, { "epoch": 0.11725884429437496, "grad_norm": 0.7108251452445984, "learning_rate": 1.1725884429437495e-05, "loss": 0.0619, "step": 7640 }, { "epoch": 0.11741232445706393, "grad_norm": 1.1613339185714722, "learning_rate": 1.1741232445706393e-05, "loss": 0.0756, "step": 7650 }, { "epoch": 0.1175658046197529, "grad_norm": 1.050368309020996, "learning_rate": 1.175658046197529e-05, "loss": 0.0686, "step": 7660 }, { "epoch": 0.11771928478244187, "grad_norm": 0.8437767624855042, "learning_rate": 1.1771928478244189e-05, "loss": 0.0736, "step": 7670 }, { "epoch": 0.11787276494513084, "grad_norm": 2.704633951187134, "learning_rate": 1.1787276494513085e-05, "loss": 0.0802, "step": 7680 }, { "epoch": 0.11802624510781981, "grad_norm": 1.000658392906189, "learning_rate": 1.1802624510781982e-05, "loss": 0.0786, "step": 7690 }, { "epoch": 0.11817972527050878, "grad_norm": 1.044574499130249, "learning_rate": 1.181797252705088e-05, "loss": 0.0886, "step": 7700 }, { "epoch": 0.11833320543319775, "grad_norm": 0.9999223947525024, "learning_rate": 1.1833320543319777e-05, "loss": 0.0901, "step": 7710 }, { "epoch": 0.11848668559588674, "grad_norm": 0.9119364023208618, "learning_rate": 1.1848668559588674e-05, "loss": 0.0869, "step": 7720 }, { "epoch": 0.11864016575857571, "grad_norm": 0.6754350662231445, "learning_rate": 1.186401657585757e-05, "loss": 0.0671, "step": 7730 }, { "epoch": 0.11879364592126468, "grad_norm": 0.9961066842079163, "learning_rate": 1.1879364592126468e-05, "loss": 0.0661, "step": 7740 }, { "epoch": 0.11894712608395365, "grad_norm": 0.8874518871307373, "learning_rate": 1.1894712608395366e-05, "loss": 0.0859, "step": 7750 }, { "epoch": 0.11910060624664262, "grad_norm": 1.0058770179748535, "learning_rate": 1.1910060624664264e-05, "loss": 0.074, "step": 7760 }, { "epoch": 0.11925408640933159, "grad_norm": 0.8766112923622131, "learning_rate": 1.1925408640933161e-05, "loss": 0.0848, "step": 7770 }, { "epoch": 0.11940756657202056, "grad_norm": 1.0808203220367432, "learning_rate": 1.1940756657202058e-05, "loss": 0.072, "step": 7780 }, { "epoch": 0.11956104673470953, "grad_norm": 0.7406820058822632, "learning_rate": 1.1956104673470955e-05, "loss": 0.0775, "step": 7790 }, { "epoch": 0.1197145268973985, "grad_norm": 0.9809466004371643, "learning_rate": 1.1971452689739853e-05, "loss": 0.0708, "step": 7800 }, { "epoch": 0.11986800706008749, "grad_norm": 0.8709692358970642, "learning_rate": 1.198680070600875e-05, "loss": 0.069, "step": 7810 }, { "epoch": 0.12002148722277646, "grad_norm": 0.8464816808700562, "learning_rate": 1.2002148722277647e-05, "loss": 0.0785, "step": 7820 }, { "epoch": 0.12017496738546543, "grad_norm": 0.9566229581832886, "learning_rate": 1.2017496738546543e-05, "loss": 0.0805, "step": 7830 }, { "epoch": 0.1203284475481544, "grad_norm": 1.0958064794540405, "learning_rate": 1.203284475481544e-05, "loss": 0.0743, "step": 7840 }, { "epoch": 0.12048192771084337, "grad_norm": 0.6959251165390015, "learning_rate": 1.204819277108434e-05, "loss": 0.0869, "step": 7850 }, { "epoch": 0.12063540787353234, "grad_norm": 0.7556108236312866, "learning_rate": 1.2063540787353237e-05, "loss": 0.0777, "step": 7860 }, { "epoch": 0.12078888803622131, "grad_norm": 0.849077045917511, "learning_rate": 1.2078888803622134e-05, "loss": 0.075, "step": 7870 }, { "epoch": 0.12094236819891029, "grad_norm": 1.1529361009597778, "learning_rate": 1.209423681989103e-05, "loss": 0.0843, "step": 7880 }, { "epoch": 0.12109584836159926, "grad_norm": 1.189692735671997, "learning_rate": 1.2109584836159927e-05, "loss": 0.0811, "step": 7890 }, { "epoch": 0.12124932852428824, "grad_norm": 1.0408203601837158, "learning_rate": 1.2124932852428826e-05, "loss": 0.0776, "step": 7900 }, { "epoch": 0.12140280868697721, "grad_norm": 0.983040988445282, "learning_rate": 1.2140280868697722e-05, "loss": 0.0895, "step": 7910 }, { "epoch": 0.12155628884966618, "grad_norm": 0.7970344424247742, "learning_rate": 1.2155628884966619e-05, "loss": 0.065, "step": 7920 }, { "epoch": 0.12170976901235515, "grad_norm": 0.8397560715675354, "learning_rate": 1.2170976901235516e-05, "loss": 0.0725, "step": 7930 }, { "epoch": 0.12186324917504412, "grad_norm": 0.9584901332855225, "learning_rate": 1.2186324917504413e-05, "loss": 0.0828, "step": 7940 }, { "epoch": 0.1220167293377331, "grad_norm": 0.8608944416046143, "learning_rate": 1.220167293377331e-05, "loss": 0.0785, "step": 7950 }, { "epoch": 0.12217020950042207, "grad_norm": 0.9996734261512756, "learning_rate": 1.221702095004221e-05, "loss": 0.0697, "step": 7960 }, { "epoch": 0.12232368966311104, "grad_norm": 0.9597033858299255, "learning_rate": 1.2232368966311106e-05, "loss": 0.0753, "step": 7970 }, { "epoch": 0.12247716982580001, "grad_norm": 0.745855450630188, "learning_rate": 1.2247716982580003e-05, "loss": 0.0868, "step": 7980 }, { "epoch": 0.122630649988489, "grad_norm": 1.095660924911499, "learning_rate": 1.22630649988489e-05, "loss": 0.0746, "step": 7990 }, { "epoch": 0.12278413015117796, "grad_norm": 0.7249221801757812, "learning_rate": 1.2278413015117796e-05, "loss": 0.0724, "step": 8000 }, { "epoch": 0.12293761031386694, "grad_norm": 0.8479332327842712, "learning_rate": 1.2293761031386695e-05, "loss": 0.0915, "step": 8010 }, { "epoch": 0.1230910904765559, "grad_norm": 1.0679175853729248, "learning_rate": 1.2309109047655592e-05, "loss": 0.0669, "step": 8020 }, { "epoch": 0.12324457063924488, "grad_norm": 0.8384817242622375, "learning_rate": 1.2324457063924488e-05, "loss": 0.0752, "step": 8030 }, { "epoch": 0.12339805080193385, "grad_norm": 0.7733489274978638, "learning_rate": 1.2339805080193385e-05, "loss": 0.0608, "step": 8040 }, { "epoch": 0.12355153096462282, "grad_norm": 0.7633740901947021, "learning_rate": 1.2355153096462282e-05, "loss": 0.0667, "step": 8050 }, { "epoch": 0.12370501112731179, "grad_norm": 0.9215208292007446, "learning_rate": 1.2370501112731182e-05, "loss": 0.0756, "step": 8060 }, { "epoch": 0.12385849129000076, "grad_norm": 1.0949082374572754, "learning_rate": 1.2385849129000079e-05, "loss": 0.0767, "step": 8070 }, { "epoch": 0.12401197145268975, "grad_norm": 0.8237854838371277, "learning_rate": 1.2401197145268975e-05, "loss": 0.0655, "step": 8080 }, { "epoch": 0.12416545161537872, "grad_norm": 0.7676175832748413, "learning_rate": 1.2416545161537872e-05, "loss": 0.0704, "step": 8090 }, { "epoch": 0.12431893177806769, "grad_norm": 1.007051706314087, "learning_rate": 1.2431893177806769e-05, "loss": 0.084, "step": 8100 }, { "epoch": 0.12447241194075666, "grad_norm": 0.7349948883056641, "learning_rate": 1.2447241194075667e-05, "loss": 0.065, "step": 8110 }, { "epoch": 0.12462589210344563, "grad_norm": 1.1482772827148438, "learning_rate": 1.2462589210344564e-05, "loss": 0.064, "step": 8120 }, { "epoch": 0.1247793722661346, "grad_norm": 1.1048192977905273, "learning_rate": 1.247793722661346e-05, "loss": 0.0744, "step": 8130 }, { "epoch": 0.12493285242882357, "grad_norm": 0.6711339950561523, "learning_rate": 1.2493285242882358e-05, "loss": 0.0746, "step": 8140 }, { "epoch": 0.12508633259151256, "grad_norm": 0.9678426384925842, "learning_rate": 1.2508633259151254e-05, "loss": 0.0742, "step": 8150 }, { "epoch": 0.1252398127542015, "grad_norm": 1.0892693996429443, "learning_rate": 1.2523981275420154e-05, "loss": 0.0905, "step": 8160 }, { "epoch": 0.1253932929168905, "grad_norm": 0.6485267877578735, "learning_rate": 1.2539329291689051e-05, "loss": 0.0737, "step": 8170 }, { "epoch": 0.12554677307957945, "grad_norm": 1.1613949537277222, "learning_rate": 1.2554677307957948e-05, "loss": 0.0962, "step": 8180 }, { "epoch": 0.12570025324226844, "grad_norm": 0.7281707525253296, "learning_rate": 1.2570025324226845e-05, "loss": 0.0664, "step": 8190 }, { "epoch": 0.1258537334049574, "grad_norm": 0.9147013425827026, "learning_rate": 1.2585373340495741e-05, "loss": 0.0732, "step": 8200 }, { "epoch": 0.12600721356764638, "grad_norm": 1.0158731937408447, "learning_rate": 1.260072135676464e-05, "loss": 0.0747, "step": 8210 }, { "epoch": 0.12616069373033537, "grad_norm": 0.7424861192703247, "learning_rate": 1.2616069373033537e-05, "loss": 0.0857, "step": 8220 }, { "epoch": 0.12631417389302432, "grad_norm": 1.018804907798767, "learning_rate": 1.2631417389302433e-05, "loss": 0.0824, "step": 8230 }, { "epoch": 0.1264676540557133, "grad_norm": 0.7847954034805298, "learning_rate": 1.264676540557133e-05, "loss": 0.1002, "step": 8240 }, { "epoch": 0.12662113421840226, "grad_norm": 0.9244616031646729, "learning_rate": 1.2662113421840227e-05, "loss": 0.0799, "step": 8250 }, { "epoch": 0.12677461438109125, "grad_norm": 0.9859228730201721, "learning_rate": 1.2677461438109124e-05, "loss": 0.0836, "step": 8260 }, { "epoch": 0.1269280945437802, "grad_norm": 0.9237849712371826, "learning_rate": 1.2692809454378024e-05, "loss": 0.0847, "step": 8270 }, { "epoch": 0.1270815747064692, "grad_norm": 0.9397337436676025, "learning_rate": 1.270815747064692e-05, "loss": 0.0971, "step": 8280 }, { "epoch": 0.12723505486915815, "grad_norm": 0.9874135851860046, "learning_rate": 1.2723505486915817e-05, "loss": 0.0806, "step": 8290 }, { "epoch": 0.12738853503184713, "grad_norm": 0.910225510597229, "learning_rate": 1.2738853503184714e-05, "loss": 0.0918, "step": 8300 }, { "epoch": 0.12754201519453612, "grad_norm": 1.05225670337677, "learning_rate": 1.275420151945361e-05, "loss": 0.0822, "step": 8310 }, { "epoch": 0.12769549535722508, "grad_norm": 1.0020745992660522, "learning_rate": 1.2769549535722509e-05, "loss": 0.0775, "step": 8320 }, { "epoch": 0.12784897551991406, "grad_norm": 0.9729218482971191, "learning_rate": 1.2784897551991406e-05, "loss": 0.08, "step": 8330 }, { "epoch": 0.12800245568260302, "grad_norm": 1.0673584938049316, "learning_rate": 1.2800245568260303e-05, "loss": 0.082, "step": 8340 }, { "epoch": 0.128155935845292, "grad_norm": 0.9594094753265381, "learning_rate": 1.28155935845292e-05, "loss": 0.0742, "step": 8350 }, { "epoch": 0.12830941600798096, "grad_norm": 0.9644810557365417, "learning_rate": 1.2830941600798098e-05, "loss": 0.0717, "step": 8360 }, { "epoch": 0.12846289617066994, "grad_norm": 0.6419963836669922, "learning_rate": 1.2846289617066996e-05, "loss": 0.0721, "step": 8370 }, { "epoch": 0.1286163763333589, "grad_norm": 0.7883622646331787, "learning_rate": 1.2861637633335893e-05, "loss": 0.0675, "step": 8380 }, { "epoch": 0.12876985649604789, "grad_norm": 1.0309301614761353, "learning_rate": 1.287698564960479e-05, "loss": 0.0833, "step": 8390 }, { "epoch": 0.12892333665873687, "grad_norm": 0.9648522138595581, "learning_rate": 1.2892333665873686e-05, "loss": 0.0709, "step": 8400 }, { "epoch": 0.12907681682142583, "grad_norm": 1.161414384841919, "learning_rate": 1.2907681682142583e-05, "loss": 0.0794, "step": 8410 }, { "epoch": 0.1292302969841148, "grad_norm": 0.7943896055221558, "learning_rate": 1.2923029698411482e-05, "loss": 0.0672, "step": 8420 }, { "epoch": 0.12938377714680377, "grad_norm": 0.6653356552124023, "learning_rate": 1.2938377714680378e-05, "loss": 0.0651, "step": 8430 }, { "epoch": 0.12953725730949275, "grad_norm": 0.7629586458206177, "learning_rate": 1.2953725730949277e-05, "loss": 0.0656, "step": 8440 }, { "epoch": 0.1296907374721817, "grad_norm": 1.1561644077301025, "learning_rate": 1.2969073747218173e-05, "loss": 0.0745, "step": 8450 }, { "epoch": 0.1298442176348707, "grad_norm": 0.9852141737937927, "learning_rate": 1.298442176348707e-05, "loss": 0.0779, "step": 8460 }, { "epoch": 0.12999769779755965, "grad_norm": 0.8312051892280579, "learning_rate": 1.2999769779755969e-05, "loss": 0.0611, "step": 8470 }, { "epoch": 0.13015117796024864, "grad_norm": 0.7721074819564819, "learning_rate": 1.3015117796024865e-05, "loss": 0.0764, "step": 8480 }, { "epoch": 0.13030465812293762, "grad_norm": 0.7562073469161987, "learning_rate": 1.3030465812293762e-05, "loss": 0.0807, "step": 8490 }, { "epoch": 0.13045813828562658, "grad_norm": 0.9863452315330505, "learning_rate": 1.3045813828562659e-05, "loss": 0.074, "step": 8500 }, { "epoch": 0.13061161844831556, "grad_norm": 0.7129237651824951, "learning_rate": 1.3061161844831556e-05, "loss": 0.0727, "step": 8510 }, { "epoch": 0.13076509861100452, "grad_norm": 0.736729621887207, "learning_rate": 1.3076509861100454e-05, "loss": 0.0705, "step": 8520 }, { "epoch": 0.1309185787736935, "grad_norm": 0.7619386911392212, "learning_rate": 1.3091857877369352e-05, "loss": 0.0718, "step": 8530 }, { "epoch": 0.13107205893638246, "grad_norm": 1.1673225164413452, "learning_rate": 1.310720589363825e-05, "loss": 0.076, "step": 8540 }, { "epoch": 0.13122553909907145, "grad_norm": 1.0113354921340942, "learning_rate": 1.3122553909907146e-05, "loss": 0.0793, "step": 8550 }, { "epoch": 0.1313790192617604, "grad_norm": 0.9689770936965942, "learning_rate": 1.3137901926176043e-05, "loss": 0.0703, "step": 8560 }, { "epoch": 0.1315324994244494, "grad_norm": 0.7167981266975403, "learning_rate": 1.3153249942444941e-05, "loss": 0.0614, "step": 8570 }, { "epoch": 0.13168597958713837, "grad_norm": 1.0352401733398438, "learning_rate": 1.3168597958713838e-05, "loss": 0.0735, "step": 8580 }, { "epoch": 0.13183945974982733, "grad_norm": 0.7351460456848145, "learning_rate": 1.3183945974982735e-05, "loss": 0.075, "step": 8590 }, { "epoch": 0.13199293991251632, "grad_norm": 0.9075281620025635, "learning_rate": 1.3199293991251631e-05, "loss": 0.0797, "step": 8600 }, { "epoch": 0.13214642007520527, "grad_norm": 0.7940113544464111, "learning_rate": 1.3214642007520528e-05, "loss": 0.0693, "step": 8610 }, { "epoch": 0.13229990023789426, "grad_norm": 0.7201026082038879, "learning_rate": 1.3229990023789425e-05, "loss": 0.0653, "step": 8620 }, { "epoch": 0.13245338040058321, "grad_norm": 0.8316521644592285, "learning_rate": 1.3245338040058325e-05, "loss": 0.0789, "step": 8630 }, { "epoch": 0.1326068605632722, "grad_norm": 0.8020749688148499, "learning_rate": 1.3260686056327222e-05, "loss": 0.0745, "step": 8640 }, { "epoch": 0.13276034072596116, "grad_norm": 0.8213643431663513, "learning_rate": 1.3276034072596118e-05, "loss": 0.0866, "step": 8650 }, { "epoch": 0.13291382088865014, "grad_norm": 0.9187143445014954, "learning_rate": 1.3291382088865015e-05, "loss": 0.0729, "step": 8660 }, { "epoch": 0.13306730105133913, "grad_norm": 0.9661180973052979, "learning_rate": 1.3306730105133912e-05, "loss": 0.0602, "step": 8670 }, { "epoch": 0.13322078121402808, "grad_norm": 0.943894624710083, "learning_rate": 1.332207812140281e-05, "loss": 0.076, "step": 8680 }, { "epoch": 0.13337426137671707, "grad_norm": 0.8922330737113953, "learning_rate": 1.3337426137671707e-05, "loss": 0.0713, "step": 8690 }, { "epoch": 0.13352774153940603, "grad_norm": 1.0296200513839722, "learning_rate": 1.3352774153940604e-05, "loss": 0.0644, "step": 8700 }, { "epoch": 0.133681221702095, "grad_norm": 0.9445199966430664, "learning_rate": 1.33681221702095e-05, "loss": 0.0743, "step": 8710 }, { "epoch": 0.13383470186478397, "grad_norm": 0.955337643623352, "learning_rate": 1.3383470186478397e-05, "loss": 0.0711, "step": 8720 }, { "epoch": 0.13398818202747295, "grad_norm": 0.7305927276611328, "learning_rate": 1.3398818202747297e-05, "loss": 0.0759, "step": 8730 }, { "epoch": 0.1341416621901619, "grad_norm": 0.6827653050422668, "learning_rate": 1.3414166219016194e-05, "loss": 0.071, "step": 8740 }, { "epoch": 0.1342951423528509, "grad_norm": 0.8450679183006287, "learning_rate": 1.3429514235285091e-05, "loss": 0.0708, "step": 8750 }, { "epoch": 0.13444862251553988, "grad_norm": 0.7340912222862244, "learning_rate": 1.3444862251553988e-05, "loss": 0.0708, "step": 8760 }, { "epoch": 0.13460210267822884, "grad_norm": 0.9038487672805786, "learning_rate": 1.3460210267822884e-05, "loss": 0.0794, "step": 8770 }, { "epoch": 0.13475558284091782, "grad_norm": 0.6749180555343628, "learning_rate": 1.3475558284091783e-05, "loss": 0.081, "step": 8780 }, { "epoch": 0.13490906300360678, "grad_norm": 1.0051074028015137, "learning_rate": 1.349090630036068e-05, "loss": 0.0865, "step": 8790 }, { "epoch": 0.13506254316629576, "grad_norm": 0.7123197317123413, "learning_rate": 1.3506254316629576e-05, "loss": 0.0716, "step": 8800 }, { "epoch": 0.13521602332898472, "grad_norm": 0.8814381957054138, "learning_rate": 1.3521602332898473e-05, "loss": 0.0674, "step": 8810 }, { "epoch": 0.1353695034916737, "grad_norm": 0.8166098594665527, "learning_rate": 1.353695034916737e-05, "loss": 0.0686, "step": 8820 }, { "epoch": 0.13552298365436266, "grad_norm": 1.0707637071609497, "learning_rate": 1.355229836543627e-05, "loss": 0.0672, "step": 8830 }, { "epoch": 0.13567646381705165, "grad_norm": 0.9806626439094543, "learning_rate": 1.3567646381705167e-05, "loss": 0.0778, "step": 8840 }, { "epoch": 0.13582994397974063, "grad_norm": 0.9683653116226196, "learning_rate": 1.3582994397974063e-05, "loss": 0.0837, "step": 8850 }, { "epoch": 0.1359834241424296, "grad_norm": 1.2198923826217651, "learning_rate": 1.359834241424296e-05, "loss": 0.0731, "step": 8860 }, { "epoch": 0.13613690430511857, "grad_norm": 0.7405942678451538, "learning_rate": 1.3613690430511857e-05, "loss": 0.0743, "step": 8870 }, { "epoch": 0.13629038446780753, "grad_norm": 0.8288973569869995, "learning_rate": 1.3629038446780755e-05, "loss": 0.0683, "step": 8880 }, { "epoch": 0.13644386463049651, "grad_norm": 0.8849179148674011, "learning_rate": 1.3644386463049652e-05, "loss": 0.0674, "step": 8890 }, { "epoch": 0.13659734479318547, "grad_norm": 0.6514289379119873, "learning_rate": 1.3659734479318549e-05, "loss": 0.0751, "step": 8900 }, { "epoch": 0.13675082495587446, "grad_norm": 0.7330586910247803, "learning_rate": 1.3675082495587446e-05, "loss": 0.0713, "step": 8910 }, { "epoch": 0.1369043051185634, "grad_norm": 0.823684573173523, "learning_rate": 1.3690430511856342e-05, "loss": 0.0688, "step": 8920 }, { "epoch": 0.1370577852812524, "grad_norm": 0.9942905902862549, "learning_rate": 1.3705778528125242e-05, "loss": 0.0863, "step": 8930 }, { "epoch": 0.13721126544394138, "grad_norm": 0.6891562342643738, "learning_rate": 1.372112654439414e-05, "loss": 0.0715, "step": 8940 }, { "epoch": 0.13736474560663034, "grad_norm": 1.0694348812103271, "learning_rate": 1.3736474560663036e-05, "loss": 0.0843, "step": 8950 }, { "epoch": 0.13751822576931932, "grad_norm": 0.8295879364013672, "learning_rate": 1.3751822576931933e-05, "loss": 0.0617, "step": 8960 }, { "epoch": 0.13767170593200828, "grad_norm": 1.0236127376556396, "learning_rate": 1.376717059320083e-05, "loss": 0.0632, "step": 8970 }, { "epoch": 0.13782518609469727, "grad_norm": 0.6968839168548584, "learning_rate": 1.3782518609469726e-05, "loss": 0.0804, "step": 8980 }, { "epoch": 0.13797866625738622, "grad_norm": 1.211485505104065, "learning_rate": 1.3797866625738625e-05, "loss": 0.0844, "step": 8990 }, { "epoch": 0.1381321464200752, "grad_norm": 0.8835970163345337, "learning_rate": 1.3813214642007521e-05, "loss": 0.0805, "step": 9000 }, { "epoch": 0.13828562658276417, "grad_norm": 0.8092471957206726, "learning_rate": 1.3828562658276418e-05, "loss": 0.0747, "step": 9010 }, { "epoch": 0.13843910674545315, "grad_norm": 1.0095593929290771, "learning_rate": 1.3843910674545315e-05, "loss": 0.0683, "step": 9020 }, { "epoch": 0.13859258690814213, "grad_norm": 0.9622795581817627, "learning_rate": 1.3859258690814212e-05, "loss": 0.0765, "step": 9030 }, { "epoch": 0.1387460670708311, "grad_norm": 1.1101163625717163, "learning_rate": 1.3874606707083112e-05, "loss": 0.074, "step": 9040 }, { "epoch": 0.13889954723352008, "grad_norm": 0.6737242937088013, "learning_rate": 1.3889954723352008e-05, "loss": 0.0646, "step": 9050 }, { "epoch": 0.13905302739620903, "grad_norm": 0.6745513081550598, "learning_rate": 1.3905302739620905e-05, "loss": 0.0682, "step": 9060 }, { "epoch": 0.13920650755889802, "grad_norm": 1.1528232097625732, "learning_rate": 1.3920650755889802e-05, "loss": 0.0815, "step": 9070 }, { "epoch": 0.13935998772158698, "grad_norm": 1.0649316310882568, "learning_rate": 1.3935998772158699e-05, "loss": 0.0734, "step": 9080 }, { "epoch": 0.13951346788427596, "grad_norm": 1.0768581628799438, "learning_rate": 1.3951346788427597e-05, "loss": 0.0746, "step": 9090 }, { "epoch": 0.13966694804696492, "grad_norm": 0.7377780079841614, "learning_rate": 1.3966694804696494e-05, "loss": 0.0689, "step": 9100 }, { "epoch": 0.1398204282096539, "grad_norm": 0.7324938774108887, "learning_rate": 1.398204282096539e-05, "loss": 0.0657, "step": 9110 }, { "epoch": 0.1399739083723429, "grad_norm": 1.0395759344100952, "learning_rate": 1.3997390837234287e-05, "loss": 0.0724, "step": 9120 }, { "epoch": 0.14012738853503184, "grad_norm": 0.9968676567077637, "learning_rate": 1.4012738853503186e-05, "loss": 0.0709, "step": 9130 }, { "epoch": 0.14028086869772083, "grad_norm": 0.9920607805252075, "learning_rate": 1.4028086869772084e-05, "loss": 0.0774, "step": 9140 }, { "epoch": 0.14043434886040979, "grad_norm": 0.763800323009491, "learning_rate": 1.4043434886040981e-05, "loss": 0.059, "step": 9150 }, { "epoch": 0.14058782902309877, "grad_norm": 0.8215949535369873, "learning_rate": 1.4058782902309878e-05, "loss": 0.0676, "step": 9160 }, { "epoch": 0.14074130918578773, "grad_norm": 0.8778296709060669, "learning_rate": 1.4074130918578774e-05, "loss": 0.0743, "step": 9170 }, { "epoch": 0.1408947893484767, "grad_norm": 0.994555652141571, "learning_rate": 1.4089478934847671e-05, "loss": 0.0651, "step": 9180 }, { "epoch": 0.14104826951116567, "grad_norm": 1.1641438007354736, "learning_rate": 1.410482695111657e-05, "loss": 0.0739, "step": 9190 }, { "epoch": 0.14120174967385465, "grad_norm": 0.7916626334190369, "learning_rate": 1.4120174967385466e-05, "loss": 0.0636, "step": 9200 }, { "epoch": 0.14135522983654364, "grad_norm": 0.8288775086402893, "learning_rate": 1.4135522983654363e-05, "loss": 0.075, "step": 9210 }, { "epoch": 0.1415087099992326, "grad_norm": 0.77887362241745, "learning_rate": 1.4150870999923262e-05, "loss": 0.0926, "step": 9220 }, { "epoch": 0.14166219016192158, "grad_norm": 1.212809443473816, "learning_rate": 1.4166219016192158e-05, "loss": 0.0837, "step": 9230 }, { "epoch": 0.14181567032461054, "grad_norm": 0.9279800057411194, "learning_rate": 1.4181567032461057e-05, "loss": 0.0785, "step": 9240 }, { "epoch": 0.14196915048729952, "grad_norm": 0.9966262578964233, "learning_rate": 1.4196915048729953e-05, "loss": 0.0649, "step": 9250 }, { "epoch": 0.14212263064998848, "grad_norm": 0.6907628774642944, "learning_rate": 1.421226306499885e-05, "loss": 0.0692, "step": 9260 }, { "epoch": 0.14227611081267746, "grad_norm": 0.8354591131210327, "learning_rate": 1.4227611081267747e-05, "loss": 0.0795, "step": 9270 }, { "epoch": 0.14242959097536642, "grad_norm": 1.0157980918884277, "learning_rate": 1.4242959097536644e-05, "loss": 0.0674, "step": 9280 }, { "epoch": 0.1425830711380554, "grad_norm": 1.0991742610931396, "learning_rate": 1.4258307113805542e-05, "loss": 0.0762, "step": 9290 }, { "epoch": 0.1427365513007444, "grad_norm": 0.7433490753173828, "learning_rate": 1.427365513007444e-05, "loss": 0.0812, "step": 9300 }, { "epoch": 0.14289003146343335, "grad_norm": 0.9134533405303955, "learning_rate": 1.4289003146343337e-05, "loss": 0.0713, "step": 9310 }, { "epoch": 0.14304351162612233, "grad_norm": 0.9323055744171143, "learning_rate": 1.4304351162612234e-05, "loss": 0.083, "step": 9320 }, { "epoch": 0.1431969917888113, "grad_norm": 0.6641127467155457, "learning_rate": 1.431969917888113e-05, "loss": 0.0751, "step": 9330 }, { "epoch": 0.14335047195150027, "grad_norm": 0.93982994556427, "learning_rate": 1.4335047195150027e-05, "loss": 0.0687, "step": 9340 }, { "epoch": 0.14350395211418923, "grad_norm": 0.8692604899406433, "learning_rate": 1.4350395211418926e-05, "loss": 0.0614, "step": 9350 }, { "epoch": 0.14365743227687822, "grad_norm": 0.95781409740448, "learning_rate": 1.4365743227687823e-05, "loss": 0.0749, "step": 9360 }, { "epoch": 0.14381091243956717, "grad_norm": 0.6507485508918762, "learning_rate": 1.438109124395672e-05, "loss": 0.0608, "step": 9370 }, { "epoch": 0.14396439260225616, "grad_norm": 0.8570566177368164, "learning_rate": 1.4396439260225616e-05, "loss": 0.0538, "step": 9380 }, { "epoch": 0.14411787276494514, "grad_norm": 0.6459655165672302, "learning_rate": 1.4411787276494513e-05, "loss": 0.0667, "step": 9390 }, { "epoch": 0.1442713529276341, "grad_norm": 0.6576370596885681, "learning_rate": 1.4427135292763413e-05, "loss": 0.072, "step": 9400 }, { "epoch": 0.14442483309032308, "grad_norm": 0.8128161430358887, "learning_rate": 1.444248330903231e-05, "loss": 0.0744, "step": 9410 }, { "epoch": 0.14457831325301204, "grad_norm": 0.7073536515235901, "learning_rate": 1.4457831325301207e-05, "loss": 0.0637, "step": 9420 }, { "epoch": 0.14473179341570103, "grad_norm": 1.0756934881210327, "learning_rate": 1.4473179341570103e-05, "loss": 0.0672, "step": 9430 }, { "epoch": 0.14488527357838998, "grad_norm": 0.9229929447174072, "learning_rate": 1.4488527357839e-05, "loss": 0.0622, "step": 9440 }, { "epoch": 0.14503875374107897, "grad_norm": 0.6785399317741394, "learning_rate": 1.4503875374107898e-05, "loss": 0.0803, "step": 9450 }, { "epoch": 0.14519223390376793, "grad_norm": 0.6143994927406311, "learning_rate": 1.4519223390376795e-05, "loss": 0.0651, "step": 9460 }, { "epoch": 0.1453457140664569, "grad_norm": 0.5801910161972046, "learning_rate": 1.4534571406645692e-05, "loss": 0.0833, "step": 9470 }, { "epoch": 0.1454991942291459, "grad_norm": 1.100695252418518, "learning_rate": 1.4549919422914589e-05, "loss": 0.0665, "step": 9480 }, { "epoch": 0.14565267439183485, "grad_norm": 0.8606224656105042, "learning_rate": 1.4565267439183485e-05, "loss": 0.0737, "step": 9490 }, { "epoch": 0.14580615455452384, "grad_norm": 0.8941163420677185, "learning_rate": 1.4580615455452386e-05, "loss": 0.0811, "step": 9500 }, { "epoch": 0.1459596347172128, "grad_norm": 0.8916667699813843, "learning_rate": 1.4595963471721282e-05, "loss": 0.0845, "step": 9510 }, { "epoch": 0.14611311487990178, "grad_norm": 0.8328119516372681, "learning_rate": 1.4611311487990179e-05, "loss": 0.0825, "step": 9520 }, { "epoch": 0.14626659504259074, "grad_norm": 0.8816018104553223, "learning_rate": 1.4626659504259076e-05, "loss": 0.0738, "step": 9530 }, { "epoch": 0.14642007520527972, "grad_norm": 0.9127072691917419, "learning_rate": 1.4642007520527972e-05, "loss": 0.0668, "step": 9540 }, { "epoch": 0.14657355536796868, "grad_norm": 0.9408010840415955, "learning_rate": 1.4657355536796871e-05, "loss": 0.0682, "step": 9550 }, { "epoch": 0.14672703553065766, "grad_norm": 0.7310327291488647, "learning_rate": 1.4672703553065768e-05, "loss": 0.0729, "step": 9560 }, { "epoch": 0.14688051569334665, "grad_norm": 1.3990275859832764, "learning_rate": 1.4688051569334664e-05, "loss": 0.0711, "step": 9570 }, { "epoch": 0.1470339958560356, "grad_norm": 0.633263885974884, "learning_rate": 1.4703399585603561e-05, "loss": 0.0698, "step": 9580 }, { "epoch": 0.1471874760187246, "grad_norm": 0.7900577187538147, "learning_rate": 1.4718747601872458e-05, "loss": 0.0703, "step": 9590 }, { "epoch": 0.14734095618141355, "grad_norm": 0.8591098785400391, "learning_rate": 1.4734095618141358e-05, "loss": 0.0603, "step": 9600 }, { "epoch": 0.14749443634410253, "grad_norm": 0.8891927599906921, "learning_rate": 1.4749443634410255e-05, "loss": 0.075, "step": 9610 }, { "epoch": 0.1476479165067915, "grad_norm": 1.1708016395568848, "learning_rate": 1.4764791650679152e-05, "loss": 0.0701, "step": 9620 }, { "epoch": 0.14780139666948047, "grad_norm": 0.7553046941757202, "learning_rate": 1.4780139666948048e-05, "loss": 0.073, "step": 9630 }, { "epoch": 0.14795487683216943, "grad_norm": 0.633119523525238, "learning_rate": 1.4795487683216945e-05, "loss": 0.072, "step": 9640 }, { "epoch": 0.14810835699485841, "grad_norm": 0.6398165822029114, "learning_rate": 1.4810835699485843e-05, "loss": 0.0708, "step": 9650 }, { "epoch": 0.1482618371575474, "grad_norm": 0.8399470448493958, "learning_rate": 1.482618371575474e-05, "loss": 0.0782, "step": 9660 }, { "epoch": 0.14841531732023636, "grad_norm": 0.748447060585022, "learning_rate": 1.4841531732023637e-05, "loss": 0.0687, "step": 9670 }, { "epoch": 0.14856879748292534, "grad_norm": 0.7461723685264587, "learning_rate": 1.4856879748292534e-05, "loss": 0.0499, "step": 9680 }, { "epoch": 0.1487222776456143, "grad_norm": 0.821020245552063, "learning_rate": 1.487222776456143e-05, "loss": 0.0651, "step": 9690 }, { "epoch": 0.14887575780830328, "grad_norm": 0.5839148163795471, "learning_rate": 1.4887575780830327e-05, "loss": 0.0642, "step": 9700 }, { "epoch": 0.14902923797099224, "grad_norm": 1.1201354265213013, "learning_rate": 1.4902923797099227e-05, "loss": 0.0771, "step": 9710 }, { "epoch": 0.14918271813368122, "grad_norm": 0.8255123496055603, "learning_rate": 1.4918271813368124e-05, "loss": 0.07, "step": 9720 }, { "epoch": 0.14933619829637018, "grad_norm": 0.6958693265914917, "learning_rate": 1.493361982963702e-05, "loss": 0.0565, "step": 9730 }, { "epoch": 0.14948967845905917, "grad_norm": 0.6420494914054871, "learning_rate": 1.4948967845905917e-05, "loss": 0.0607, "step": 9740 }, { "epoch": 0.14964315862174815, "grad_norm": 0.8813372850418091, "learning_rate": 1.4964315862174814e-05, "loss": 0.0708, "step": 9750 }, { "epoch": 0.1497966387844371, "grad_norm": 0.8785001039505005, "learning_rate": 1.4979663878443713e-05, "loss": 0.0818, "step": 9760 }, { "epoch": 0.1499501189471261, "grad_norm": 0.6715682744979858, "learning_rate": 1.499501189471261e-05, "loss": 0.069, "step": 9770 }, { "epoch": 0.15010359910981505, "grad_norm": 0.8162952065467834, "learning_rate": 1.5010359910981506e-05, "loss": 0.0752, "step": 9780 }, { "epoch": 0.15025707927250403, "grad_norm": 0.6377097964286804, "learning_rate": 1.5025707927250403e-05, "loss": 0.0656, "step": 9790 }, { "epoch": 0.150410559435193, "grad_norm": 0.8148257732391357, "learning_rate": 1.50410559435193e-05, "loss": 0.0633, "step": 9800 }, { "epoch": 0.15056403959788198, "grad_norm": 0.979729950428009, "learning_rate": 1.50564039597882e-05, "loss": 0.0659, "step": 9810 }, { "epoch": 0.15071751976057093, "grad_norm": 0.8030733466148376, "learning_rate": 1.5071751976057097e-05, "loss": 0.0753, "step": 9820 }, { "epoch": 0.15087099992325992, "grad_norm": 0.7258292436599731, "learning_rate": 1.5087099992325993e-05, "loss": 0.0691, "step": 9830 }, { "epoch": 0.1510244800859489, "grad_norm": 1.1503902673721313, "learning_rate": 1.510244800859489e-05, "loss": 0.068, "step": 9840 }, { "epoch": 0.15117796024863786, "grad_norm": 0.6410447955131531, "learning_rate": 1.5117796024863787e-05, "loss": 0.0798, "step": 9850 }, { "epoch": 0.15133144041132685, "grad_norm": 0.601487934589386, "learning_rate": 1.5133144041132685e-05, "loss": 0.0671, "step": 9860 }, { "epoch": 0.1514849205740158, "grad_norm": 0.8570122718811035, "learning_rate": 1.5148492057401582e-05, "loss": 0.0587, "step": 9870 }, { "epoch": 0.1516384007367048, "grad_norm": 1.0331116914749146, "learning_rate": 1.5163840073670479e-05, "loss": 0.0673, "step": 9880 }, { "epoch": 0.15179188089939374, "grad_norm": 0.9244276881217957, "learning_rate": 1.5179188089939375e-05, "loss": 0.0801, "step": 9890 }, { "epoch": 0.15194536106208273, "grad_norm": 1.0353810787200928, "learning_rate": 1.5194536106208274e-05, "loss": 0.065, "step": 9900 }, { "epoch": 0.15209884122477169, "grad_norm": 0.7660245895385742, "learning_rate": 1.5209884122477172e-05, "loss": 0.0763, "step": 9910 }, { "epoch": 0.15225232138746067, "grad_norm": 0.9795351624488831, "learning_rate": 1.5225232138746069e-05, "loss": 0.0577, "step": 9920 }, { "epoch": 0.15240580155014966, "grad_norm": 0.8257235884666443, "learning_rate": 1.5240580155014966e-05, "loss": 0.0609, "step": 9930 }, { "epoch": 0.1525592817128386, "grad_norm": 0.7147971987724304, "learning_rate": 1.5255928171283862e-05, "loss": 0.0747, "step": 9940 }, { "epoch": 0.1527127618755276, "grad_norm": 0.7329027056694031, "learning_rate": 1.527127618755276e-05, "loss": 0.0653, "step": 9950 }, { "epoch": 0.15286624203821655, "grad_norm": 0.7883205413818359, "learning_rate": 1.528662420382166e-05, "loss": 0.0593, "step": 9960 }, { "epoch": 0.15301972220090554, "grad_norm": 0.7974170446395874, "learning_rate": 1.5301972220090554e-05, "loss": 0.0609, "step": 9970 }, { "epoch": 0.1531732023635945, "grad_norm": 0.8648584485054016, "learning_rate": 1.5317320236359453e-05, "loss": 0.0682, "step": 9980 }, { "epoch": 0.15332668252628348, "grad_norm": 0.8426690697669983, "learning_rate": 1.5332668252628348e-05, "loss": 0.0773, "step": 9990 }, { "epoch": 0.15348016268897244, "grad_norm": 0.6755056977272034, "learning_rate": 1.5348016268897246e-05, "loss": 0.0702, "step": 10000 }, { "epoch": 0.15363364285166142, "grad_norm": 0.9354260563850403, "learning_rate": 1.5363364285166145e-05, "loss": 0.0664, "step": 10010 }, { "epoch": 0.1537871230143504, "grad_norm": 0.8755561709403992, "learning_rate": 1.537871230143504e-05, "loss": 0.0639, "step": 10020 }, { "epoch": 0.15394060317703936, "grad_norm": 0.6561378836631775, "learning_rate": 1.5394060317703938e-05, "loss": 0.0599, "step": 10030 }, { "epoch": 0.15409408333972835, "grad_norm": 0.7621110081672668, "learning_rate": 1.5409408333972837e-05, "loss": 0.0607, "step": 10040 }, { "epoch": 0.1542475635024173, "grad_norm": 0.9901028275489807, "learning_rate": 1.5424756350241732e-05, "loss": 0.072, "step": 10050 }, { "epoch": 0.1544010436651063, "grad_norm": 0.7525966763496399, "learning_rate": 1.544010436651063e-05, "loss": 0.0629, "step": 10060 }, { "epoch": 0.15455452382779525, "grad_norm": 0.9788570404052734, "learning_rate": 1.545545238277953e-05, "loss": 0.0617, "step": 10070 }, { "epoch": 0.15470800399048423, "grad_norm": 0.8180756568908691, "learning_rate": 1.5470800399048424e-05, "loss": 0.0685, "step": 10080 }, { "epoch": 0.1548614841531732, "grad_norm": 0.9133009314537048, "learning_rate": 1.5486148415317322e-05, "loss": 0.0755, "step": 10090 }, { "epoch": 0.15501496431586217, "grad_norm": 0.7870527505874634, "learning_rate": 1.5501496431586217e-05, "loss": 0.0753, "step": 10100 }, { "epoch": 0.15516844447855116, "grad_norm": 0.770147979259491, "learning_rate": 1.5516844447855116e-05, "loss": 0.0784, "step": 10110 }, { "epoch": 0.15532192464124012, "grad_norm": 0.9513058662414551, "learning_rate": 1.5532192464124014e-05, "loss": 0.0687, "step": 10120 }, { "epoch": 0.1554754048039291, "grad_norm": 0.8180469870567322, "learning_rate": 1.5547540480392912e-05, "loss": 0.0623, "step": 10130 }, { "epoch": 0.15562888496661806, "grad_norm": 1.0955568552017212, "learning_rate": 1.5562888496661807e-05, "loss": 0.0657, "step": 10140 }, { "epoch": 0.15578236512930704, "grad_norm": 0.6876780390739441, "learning_rate": 1.5578236512930706e-05, "loss": 0.0571, "step": 10150 }, { "epoch": 0.155935845291996, "grad_norm": 1.0172568559646606, "learning_rate": 1.55935845291996e-05, "loss": 0.0621, "step": 10160 }, { "epoch": 0.15608932545468499, "grad_norm": 0.863236665725708, "learning_rate": 1.56089325454685e-05, "loss": 0.0658, "step": 10170 }, { "epoch": 0.15624280561737394, "grad_norm": 0.8859457969665527, "learning_rate": 1.5624280561737398e-05, "loss": 0.0601, "step": 10180 }, { "epoch": 0.15639628578006293, "grad_norm": 0.6902763843536377, "learning_rate": 1.5639628578006293e-05, "loss": 0.0647, "step": 10190 }, { "epoch": 0.1565497659427519, "grad_norm": 0.8248108625411987, "learning_rate": 1.565497659427519e-05, "loss": 0.0787, "step": 10200 }, { "epoch": 0.15670324610544087, "grad_norm": 0.7894905209541321, "learning_rate": 1.5670324610544086e-05, "loss": 0.0703, "step": 10210 }, { "epoch": 0.15685672626812985, "grad_norm": 0.965881884098053, "learning_rate": 1.5685672626812988e-05, "loss": 0.0698, "step": 10220 }, { "epoch": 0.1570102064308188, "grad_norm": 0.7693638205528259, "learning_rate": 1.5701020643081883e-05, "loss": 0.0617, "step": 10230 }, { "epoch": 0.1571636865935078, "grad_norm": 0.8769267797470093, "learning_rate": 1.571636865935078e-05, "loss": 0.0695, "step": 10240 }, { "epoch": 0.15731716675619675, "grad_norm": 1.0445702075958252, "learning_rate": 1.5731716675619677e-05, "loss": 0.078, "step": 10250 }, { "epoch": 0.15747064691888574, "grad_norm": 0.7525782585144043, "learning_rate": 1.5747064691888575e-05, "loss": 0.0738, "step": 10260 }, { "epoch": 0.1576241270815747, "grad_norm": 0.7700458765029907, "learning_rate": 1.5762412708157474e-05, "loss": 0.074, "step": 10270 }, { "epoch": 0.15777760724426368, "grad_norm": 0.6774086952209473, "learning_rate": 1.577776072442637e-05, "loss": 0.0761, "step": 10280 }, { "epoch": 0.15793108740695266, "grad_norm": 0.748344361782074, "learning_rate": 1.5793108740695267e-05, "loss": 0.0756, "step": 10290 }, { "epoch": 0.15808456756964162, "grad_norm": 0.7626968026161194, "learning_rate": 1.5808456756964162e-05, "loss": 0.0612, "step": 10300 }, { "epoch": 0.1582380477323306, "grad_norm": 0.5746101140975952, "learning_rate": 1.582380477323306e-05, "loss": 0.0649, "step": 10310 }, { "epoch": 0.15839152789501956, "grad_norm": 0.6439284086227417, "learning_rate": 1.583915278950196e-05, "loss": 0.0609, "step": 10320 }, { "epoch": 0.15854500805770855, "grad_norm": 0.8079944849014282, "learning_rate": 1.5854500805770857e-05, "loss": 0.0683, "step": 10330 }, { "epoch": 0.1586984882203975, "grad_norm": 0.6421492695808411, "learning_rate": 1.5869848822039752e-05, "loss": 0.0719, "step": 10340 }, { "epoch": 0.1588519683830865, "grad_norm": 1.134055256843567, "learning_rate": 1.588519683830865e-05, "loss": 0.055, "step": 10350 }, { "epoch": 0.15900544854577545, "grad_norm": 0.6874112486839294, "learning_rate": 1.5900544854577546e-05, "loss": 0.0757, "step": 10360 }, { "epoch": 0.15915892870846443, "grad_norm": 0.8464698195457458, "learning_rate": 1.5915892870846444e-05, "loss": 0.0776, "step": 10370 }, { "epoch": 0.15931240887115342, "grad_norm": 0.7095886468887329, "learning_rate": 1.5931240887115343e-05, "loss": 0.0824, "step": 10380 }, { "epoch": 0.15946588903384237, "grad_norm": 0.8529601097106934, "learning_rate": 1.5946588903384238e-05, "loss": 0.0792, "step": 10390 }, { "epoch": 0.15961936919653136, "grad_norm": 0.6679795980453491, "learning_rate": 1.5961936919653136e-05, "loss": 0.0655, "step": 10400 }, { "epoch": 0.15977284935922031, "grad_norm": 0.7846323251724243, "learning_rate": 1.597728493592203e-05, "loss": 0.0805, "step": 10410 }, { "epoch": 0.1599263295219093, "grad_norm": 0.8888245820999146, "learning_rate": 1.599263295219093e-05, "loss": 0.069, "step": 10420 }, { "epoch": 0.16007980968459826, "grad_norm": 0.7031064629554749, "learning_rate": 1.6007980968459828e-05, "loss": 0.0664, "step": 10430 }, { "epoch": 0.16023328984728724, "grad_norm": 0.8344544768333435, "learning_rate": 1.6023328984728727e-05, "loss": 0.0674, "step": 10440 }, { "epoch": 0.1603867700099762, "grad_norm": 1.1667182445526123, "learning_rate": 1.6038677000997622e-05, "loss": 0.0605, "step": 10450 }, { "epoch": 0.16054025017266518, "grad_norm": 1.0628312826156616, "learning_rate": 1.605402501726652e-05, "loss": 0.0709, "step": 10460 }, { "epoch": 0.16069373033535417, "grad_norm": 1.1457771062850952, "learning_rate": 1.6069373033535415e-05, "loss": 0.0736, "step": 10470 }, { "epoch": 0.16084721049804312, "grad_norm": 0.8113125562667847, "learning_rate": 1.6084721049804314e-05, "loss": 0.0664, "step": 10480 }, { "epoch": 0.1610006906607321, "grad_norm": 0.752362072467804, "learning_rate": 1.6100069066073212e-05, "loss": 0.0824, "step": 10490 }, { "epoch": 0.16115417082342107, "grad_norm": 0.7355091571807861, "learning_rate": 1.6115417082342107e-05, "loss": 0.062, "step": 10500 }, { "epoch": 0.16130765098611005, "grad_norm": 1.0835914611816406, "learning_rate": 1.6130765098611006e-05, "loss": 0.061, "step": 10510 }, { "epoch": 0.161461131148799, "grad_norm": 0.9875301718711853, "learning_rate": 1.61461131148799e-05, "loss": 0.0652, "step": 10520 }, { "epoch": 0.161614611311488, "grad_norm": 0.8189069032669067, "learning_rate": 1.6161461131148802e-05, "loss": 0.0838, "step": 10530 }, { "epoch": 0.16176809147417695, "grad_norm": 0.9190618991851807, "learning_rate": 1.6176809147417697e-05, "loss": 0.0577, "step": 10540 }, { "epoch": 0.16192157163686594, "grad_norm": 0.9094547033309937, "learning_rate": 1.6192157163686596e-05, "loss": 0.0747, "step": 10550 }, { "epoch": 0.16207505179955492, "grad_norm": 1.0342857837677002, "learning_rate": 1.620750517995549e-05, "loss": 0.0721, "step": 10560 }, { "epoch": 0.16222853196224388, "grad_norm": 0.8167394399642944, "learning_rate": 1.622285319622439e-05, "loss": 0.0723, "step": 10570 }, { "epoch": 0.16238201212493286, "grad_norm": 0.6034983992576599, "learning_rate": 1.6238201212493288e-05, "loss": 0.0764, "step": 10580 }, { "epoch": 0.16253549228762182, "grad_norm": 1.02525794506073, "learning_rate": 1.6253549228762183e-05, "loss": 0.0583, "step": 10590 }, { "epoch": 0.1626889724503108, "grad_norm": 0.7272692322731018, "learning_rate": 1.626889724503108e-05, "loss": 0.0639, "step": 10600 }, { "epoch": 0.16284245261299976, "grad_norm": 0.673098087310791, "learning_rate": 1.6284245261299976e-05, "loss": 0.0656, "step": 10610 }, { "epoch": 0.16299593277568875, "grad_norm": 0.7286852598190308, "learning_rate": 1.6299593277568875e-05, "loss": 0.074, "step": 10620 }, { "epoch": 0.1631494129383777, "grad_norm": 0.5608854293823242, "learning_rate": 1.6314941293837773e-05, "loss": 0.067, "step": 10630 }, { "epoch": 0.1633028931010667, "grad_norm": 0.7887623906135559, "learning_rate": 1.633028931010667e-05, "loss": 0.0547, "step": 10640 }, { "epoch": 0.16345637326375567, "grad_norm": 0.7759367823600769, "learning_rate": 1.6345637326375567e-05, "loss": 0.0794, "step": 10650 }, { "epoch": 0.16360985342644463, "grad_norm": 1.1278526782989502, "learning_rate": 1.6360985342644465e-05, "loss": 0.0721, "step": 10660 }, { "epoch": 0.16376333358913361, "grad_norm": 0.8176709413528442, "learning_rate": 1.637633335891336e-05, "loss": 0.0654, "step": 10670 }, { "epoch": 0.16391681375182257, "grad_norm": 0.9475258588790894, "learning_rate": 1.639168137518226e-05, "loss": 0.0611, "step": 10680 }, { "epoch": 0.16407029391451156, "grad_norm": 0.8014469146728516, "learning_rate": 1.6407029391451157e-05, "loss": 0.06, "step": 10690 }, { "epoch": 0.1642237740772005, "grad_norm": 0.7795228958129883, "learning_rate": 1.6422377407720052e-05, "loss": 0.0759, "step": 10700 }, { "epoch": 0.1643772542398895, "grad_norm": 0.8646900057792664, "learning_rate": 1.643772542398895e-05, "loss": 0.0612, "step": 10710 }, { "epoch": 0.16453073440257845, "grad_norm": 0.7538985013961792, "learning_rate": 1.645307344025785e-05, "loss": 0.0696, "step": 10720 }, { "epoch": 0.16468421456526744, "grad_norm": 0.7674204707145691, "learning_rate": 1.6468421456526744e-05, "loss": 0.0582, "step": 10730 }, { "epoch": 0.16483769472795642, "grad_norm": 0.8768881559371948, "learning_rate": 1.6483769472795642e-05, "loss": 0.0733, "step": 10740 }, { "epoch": 0.16499117489064538, "grad_norm": 0.7888249158859253, "learning_rate": 1.649911748906454e-05, "loss": 0.071, "step": 10750 }, { "epoch": 0.16514465505333437, "grad_norm": 0.5682582855224609, "learning_rate": 1.6514465505333436e-05, "loss": 0.0653, "step": 10760 }, { "epoch": 0.16529813521602332, "grad_norm": 0.7690806984901428, "learning_rate": 1.6529813521602334e-05, "loss": 0.0566, "step": 10770 }, { "epoch": 0.1654516153787123, "grad_norm": 0.7236483097076416, "learning_rate": 1.654516153787123e-05, "loss": 0.0633, "step": 10780 }, { "epoch": 0.16560509554140126, "grad_norm": 0.6716984510421753, "learning_rate": 1.6560509554140128e-05, "loss": 0.0519, "step": 10790 }, { "epoch": 0.16575857570409025, "grad_norm": 0.7954491376876831, "learning_rate": 1.6575857570409026e-05, "loss": 0.0755, "step": 10800 }, { "epoch": 0.1659120558667792, "grad_norm": 0.7027291059494019, "learning_rate": 1.6591205586677925e-05, "loss": 0.0674, "step": 10810 }, { "epoch": 0.1660655360294682, "grad_norm": 1.084392786026001, "learning_rate": 1.660655360294682e-05, "loss": 0.0683, "step": 10820 }, { "epoch": 0.16621901619215718, "grad_norm": 0.7356905341148376, "learning_rate": 1.6621901619215718e-05, "loss": 0.0667, "step": 10830 }, { "epoch": 0.16637249635484613, "grad_norm": 0.8506235480308533, "learning_rate": 1.6637249635484617e-05, "loss": 0.0799, "step": 10840 }, { "epoch": 0.16652597651753512, "grad_norm": 0.8893174529075623, "learning_rate": 1.6652597651753512e-05, "loss": 0.08, "step": 10850 }, { "epoch": 0.16667945668022408, "grad_norm": 0.6865788698196411, "learning_rate": 1.666794566802241e-05, "loss": 0.0672, "step": 10860 }, { "epoch": 0.16683293684291306, "grad_norm": 0.6985292434692383, "learning_rate": 1.6683293684291305e-05, "loss": 0.0646, "step": 10870 }, { "epoch": 0.16698641700560202, "grad_norm": 0.8241227269172668, "learning_rate": 1.6698641700560204e-05, "loss": 0.0704, "step": 10880 }, { "epoch": 0.167139897168291, "grad_norm": 0.5884559154510498, "learning_rate": 1.6713989716829102e-05, "loss": 0.0629, "step": 10890 }, { "epoch": 0.16729337733097996, "grad_norm": 0.6643840074539185, "learning_rate": 1.6729337733098e-05, "loss": 0.0676, "step": 10900 }, { "epoch": 0.16744685749366894, "grad_norm": 0.6347523927688599, "learning_rate": 1.6744685749366896e-05, "loss": 0.0549, "step": 10910 }, { "epoch": 0.16760033765635793, "grad_norm": 0.868026077747345, "learning_rate": 1.6760033765635794e-05, "loss": 0.0697, "step": 10920 }, { "epoch": 0.16775381781904689, "grad_norm": 0.6077331900596619, "learning_rate": 1.677538178190469e-05, "loss": 0.0627, "step": 10930 }, { "epoch": 0.16790729798173587, "grad_norm": 0.6172699332237244, "learning_rate": 1.6790729798173587e-05, "loss": 0.0617, "step": 10940 }, { "epoch": 0.16806077814442483, "grad_norm": 0.8728526830673218, "learning_rate": 1.6806077814442486e-05, "loss": 0.0588, "step": 10950 }, { "epoch": 0.1682142583071138, "grad_norm": 0.9965080618858337, "learning_rate": 1.682142583071138e-05, "loss": 0.0682, "step": 10960 }, { "epoch": 0.16836773846980277, "grad_norm": 0.7480833530426025, "learning_rate": 1.683677384698028e-05, "loss": 0.0584, "step": 10970 }, { "epoch": 0.16852121863249175, "grad_norm": 0.8056139945983887, "learning_rate": 1.6852121863249174e-05, "loss": 0.0803, "step": 10980 }, { "epoch": 0.1686746987951807, "grad_norm": 0.7262210249900818, "learning_rate": 1.6867469879518076e-05, "loss": 0.0653, "step": 10990 }, { "epoch": 0.1688281789578697, "grad_norm": 0.8514172434806824, "learning_rate": 1.688281789578697e-05, "loss": 0.0732, "step": 11000 }, { "epoch": 0.16898165912055868, "grad_norm": 0.9183686375617981, "learning_rate": 1.689816591205587e-05, "loss": 0.0575, "step": 11010 }, { "epoch": 0.16913513928324764, "grad_norm": 0.6401649117469788, "learning_rate": 1.6913513928324765e-05, "loss": 0.0539, "step": 11020 }, { "epoch": 0.16928861944593662, "grad_norm": 0.7407079935073853, "learning_rate": 1.6928861944593663e-05, "loss": 0.0646, "step": 11030 }, { "epoch": 0.16944209960862558, "grad_norm": 1.4057286977767944, "learning_rate": 1.694420996086256e-05, "loss": 0.0709, "step": 11040 }, { "epoch": 0.16959557977131456, "grad_norm": 0.9989942312240601, "learning_rate": 1.6959557977131457e-05, "loss": 0.0641, "step": 11050 }, { "epoch": 0.16974905993400352, "grad_norm": 0.8725926280021667, "learning_rate": 1.6974905993400355e-05, "loss": 0.0632, "step": 11060 }, { "epoch": 0.1699025400966925, "grad_norm": 0.7163800597190857, "learning_rate": 1.699025400966925e-05, "loss": 0.0735, "step": 11070 }, { "epoch": 0.17005602025938146, "grad_norm": 0.8681125640869141, "learning_rate": 1.700560202593815e-05, "loss": 0.0601, "step": 11080 }, { "epoch": 0.17020950042207045, "grad_norm": 0.6923156380653381, "learning_rate": 1.7020950042207044e-05, "loss": 0.0676, "step": 11090 }, { "epoch": 0.17036298058475943, "grad_norm": 0.833810567855835, "learning_rate": 1.7036298058475945e-05, "loss": 0.0542, "step": 11100 }, { "epoch": 0.1705164607474484, "grad_norm": 1.0967565774917603, "learning_rate": 1.705164607474484e-05, "loss": 0.054, "step": 11110 }, { "epoch": 0.17066994091013737, "grad_norm": 0.6699127554893494, "learning_rate": 1.706699409101374e-05, "loss": 0.0795, "step": 11120 }, { "epoch": 0.17082342107282633, "grad_norm": 0.711043655872345, "learning_rate": 1.7082342107282634e-05, "loss": 0.0574, "step": 11130 }, { "epoch": 0.17097690123551532, "grad_norm": 0.6823534369468689, "learning_rate": 1.7097690123551532e-05, "loss": 0.0563, "step": 11140 }, { "epoch": 0.17113038139820427, "grad_norm": 0.768930971622467, "learning_rate": 1.711303813982043e-05, "loss": 0.0703, "step": 11150 }, { "epoch": 0.17128386156089326, "grad_norm": 1.0174180269241333, "learning_rate": 1.7128386156089326e-05, "loss": 0.0654, "step": 11160 }, { "epoch": 0.17143734172358222, "grad_norm": 0.9678444862365723, "learning_rate": 1.7143734172358224e-05, "loss": 0.0674, "step": 11170 }, { "epoch": 0.1715908218862712, "grad_norm": 0.9873467683792114, "learning_rate": 1.715908218862712e-05, "loss": 0.0735, "step": 11180 }, { "epoch": 0.17174430204896018, "grad_norm": 0.5742461085319519, "learning_rate": 1.7174430204896018e-05, "loss": 0.0579, "step": 11190 }, { "epoch": 0.17189778221164914, "grad_norm": 0.6938338875770569, "learning_rate": 1.7189778221164916e-05, "loss": 0.0733, "step": 11200 }, { "epoch": 0.17205126237433813, "grad_norm": 0.835345447063446, "learning_rate": 1.7205126237433815e-05, "loss": 0.0702, "step": 11210 }, { "epoch": 0.17220474253702708, "grad_norm": 0.7423105239868164, "learning_rate": 1.722047425370271e-05, "loss": 0.0642, "step": 11220 }, { "epoch": 0.17235822269971607, "grad_norm": 0.6995028257369995, "learning_rate": 1.7235822269971608e-05, "loss": 0.0658, "step": 11230 }, { "epoch": 0.17251170286240503, "grad_norm": 0.6336666345596313, "learning_rate": 1.7251170286240503e-05, "loss": 0.0795, "step": 11240 }, { "epoch": 0.172665183025094, "grad_norm": 0.6643989086151123, "learning_rate": 1.72665183025094e-05, "loss": 0.0636, "step": 11250 }, { "epoch": 0.17281866318778297, "grad_norm": 0.8571460843086243, "learning_rate": 1.72818663187783e-05, "loss": 0.0724, "step": 11260 }, { "epoch": 0.17297214335047195, "grad_norm": 0.7989628314971924, "learning_rate": 1.7297214335047195e-05, "loss": 0.0679, "step": 11270 }, { "epoch": 0.17312562351316094, "grad_norm": 0.6785359382629395, "learning_rate": 1.7312562351316094e-05, "loss": 0.0717, "step": 11280 }, { "epoch": 0.1732791036758499, "grad_norm": 0.8444017767906189, "learning_rate": 1.732791036758499e-05, "loss": 0.0492, "step": 11290 }, { "epoch": 0.17343258383853888, "grad_norm": 0.8645681142807007, "learning_rate": 1.734325838385389e-05, "loss": 0.0704, "step": 11300 }, { "epoch": 0.17358606400122784, "grad_norm": 0.902280867099762, "learning_rate": 1.7358606400122786e-05, "loss": 0.0734, "step": 11310 }, { "epoch": 0.17373954416391682, "grad_norm": 0.8465242981910706, "learning_rate": 1.7373954416391684e-05, "loss": 0.0756, "step": 11320 }, { "epoch": 0.17389302432660578, "grad_norm": 0.6082513928413391, "learning_rate": 1.738930243266058e-05, "loss": 0.0671, "step": 11330 }, { "epoch": 0.17404650448929476, "grad_norm": 1.1060670614242554, "learning_rate": 1.7404650448929477e-05, "loss": 0.0749, "step": 11340 }, { "epoch": 0.17419998465198372, "grad_norm": 0.630929708480835, "learning_rate": 1.7419998465198376e-05, "loss": 0.0607, "step": 11350 }, { "epoch": 0.1743534648146727, "grad_norm": 0.6848860383033752, "learning_rate": 1.743534648146727e-05, "loss": 0.0633, "step": 11360 }, { "epoch": 0.1745069449773617, "grad_norm": 1.012864112854004, "learning_rate": 1.745069449773617e-05, "loss": 0.0681, "step": 11370 }, { "epoch": 0.17466042514005065, "grad_norm": 0.7190771102905273, "learning_rate": 1.7466042514005064e-05, "loss": 0.078, "step": 11380 }, { "epoch": 0.17481390530273963, "grad_norm": 0.6872773170471191, "learning_rate": 1.7481390530273963e-05, "loss": 0.0483, "step": 11390 }, { "epoch": 0.1749673854654286, "grad_norm": 0.7020688056945801, "learning_rate": 1.749673854654286e-05, "loss": 0.0575, "step": 11400 }, { "epoch": 0.17512086562811757, "grad_norm": 0.954170823097229, "learning_rate": 1.751208656281176e-05, "loss": 0.0815, "step": 11410 }, { "epoch": 0.17527434579080653, "grad_norm": 0.5907428860664368, "learning_rate": 1.7527434579080655e-05, "loss": 0.0577, "step": 11420 }, { "epoch": 0.17542782595349551, "grad_norm": 0.5969783663749695, "learning_rate": 1.7542782595349553e-05, "loss": 0.0635, "step": 11430 }, { "epoch": 0.17558130611618447, "grad_norm": 0.7588759660720825, "learning_rate": 1.7558130611618448e-05, "loss": 0.0644, "step": 11440 }, { "epoch": 0.17573478627887346, "grad_norm": 0.9240906834602356, "learning_rate": 1.7573478627887347e-05, "loss": 0.077, "step": 11450 }, { "epoch": 0.17588826644156244, "grad_norm": 0.8317656517028809, "learning_rate": 1.7588826644156245e-05, "loss": 0.0588, "step": 11460 }, { "epoch": 0.1760417466042514, "grad_norm": 0.873019814491272, "learning_rate": 1.760417466042514e-05, "loss": 0.0643, "step": 11470 }, { "epoch": 0.17619522676694038, "grad_norm": 0.9746819138526917, "learning_rate": 1.761952267669404e-05, "loss": 0.0609, "step": 11480 }, { "epoch": 0.17634870692962934, "grad_norm": 0.9384780526161194, "learning_rate": 1.7634870692962937e-05, "loss": 0.0651, "step": 11490 }, { "epoch": 0.17650218709231832, "grad_norm": 0.7844185829162598, "learning_rate": 1.7650218709231832e-05, "loss": 0.0583, "step": 11500 }, { "epoch": 0.17665566725500728, "grad_norm": 0.8346754312515259, "learning_rate": 1.766556672550073e-05, "loss": 0.0543, "step": 11510 }, { "epoch": 0.17680914741769627, "grad_norm": 0.8150117993354797, "learning_rate": 1.768091474176963e-05, "loss": 0.0675, "step": 11520 }, { "epoch": 0.17696262758038522, "grad_norm": 0.7055215835571289, "learning_rate": 1.7696262758038524e-05, "loss": 0.0614, "step": 11530 }, { "epoch": 0.1771161077430742, "grad_norm": 0.8603428602218628, "learning_rate": 1.7711610774307422e-05, "loss": 0.0806, "step": 11540 }, { "epoch": 0.1772695879057632, "grad_norm": 0.7444821000099182, "learning_rate": 1.7726958790576317e-05, "loss": 0.067, "step": 11550 }, { "epoch": 0.17742306806845215, "grad_norm": 0.5026836395263672, "learning_rate": 1.7742306806845216e-05, "loss": 0.0617, "step": 11560 }, { "epoch": 0.17757654823114113, "grad_norm": 0.5525770783424377, "learning_rate": 1.7757654823114114e-05, "loss": 0.0716, "step": 11570 }, { "epoch": 0.1777300283938301, "grad_norm": 0.6145413517951965, "learning_rate": 1.7773002839383013e-05, "loss": 0.0584, "step": 11580 }, { "epoch": 0.17788350855651908, "grad_norm": 1.1588796377182007, "learning_rate": 1.7788350855651908e-05, "loss": 0.0671, "step": 11590 }, { "epoch": 0.17803698871920803, "grad_norm": 0.7287824749946594, "learning_rate": 1.7803698871920806e-05, "loss": 0.0716, "step": 11600 }, { "epoch": 0.17819046888189702, "grad_norm": 0.8124311566352844, "learning_rate": 1.7819046888189705e-05, "loss": 0.0659, "step": 11610 }, { "epoch": 0.17834394904458598, "grad_norm": 0.8205480575561523, "learning_rate": 1.78343949044586e-05, "loss": 0.0678, "step": 11620 }, { "epoch": 0.17849742920727496, "grad_norm": 0.6097784638404846, "learning_rate": 1.7849742920727498e-05, "loss": 0.0688, "step": 11630 }, { "epoch": 0.17865090936996394, "grad_norm": 0.6910631060600281, "learning_rate": 1.7865090936996393e-05, "loss": 0.0616, "step": 11640 }, { "epoch": 0.1788043895326529, "grad_norm": 0.5784913301467896, "learning_rate": 1.788043895326529e-05, "loss": 0.0732, "step": 11650 }, { "epoch": 0.1789578696953419, "grad_norm": 0.7131727337837219, "learning_rate": 1.789578696953419e-05, "loss": 0.0616, "step": 11660 }, { "epoch": 0.17911134985803084, "grad_norm": 0.7575778961181641, "learning_rate": 1.791113498580309e-05, "loss": 0.0536, "step": 11670 }, { "epoch": 0.17926483002071983, "grad_norm": 0.6507121324539185, "learning_rate": 1.7926483002071984e-05, "loss": 0.0616, "step": 11680 }, { "epoch": 0.17941831018340879, "grad_norm": 0.7711879014968872, "learning_rate": 1.7941831018340882e-05, "loss": 0.0682, "step": 11690 }, { "epoch": 0.17957179034609777, "grad_norm": 0.571688711643219, "learning_rate": 1.7957179034609777e-05, "loss": 0.0589, "step": 11700 }, { "epoch": 0.17972527050878673, "grad_norm": 0.7376192808151245, "learning_rate": 1.7972527050878676e-05, "loss": 0.0583, "step": 11710 }, { "epoch": 0.1798787506714757, "grad_norm": 0.8309144377708435, "learning_rate": 1.7987875067147574e-05, "loss": 0.0672, "step": 11720 }, { "epoch": 0.1800322308341647, "grad_norm": 0.8255230784416199, "learning_rate": 1.800322308341647e-05, "loss": 0.0675, "step": 11730 }, { "epoch": 0.18018571099685365, "grad_norm": 0.8909797668457031, "learning_rate": 1.8018571099685367e-05, "loss": 0.0615, "step": 11740 }, { "epoch": 0.18033919115954264, "grad_norm": 0.7269678711891174, "learning_rate": 1.8033919115954262e-05, "loss": 0.0589, "step": 11750 }, { "epoch": 0.1804926713222316, "grad_norm": 0.8601005673408508, "learning_rate": 1.8049267132223164e-05, "loss": 0.0789, "step": 11760 }, { "epoch": 0.18064615148492058, "grad_norm": 0.8146107196807861, "learning_rate": 1.806461514849206e-05, "loss": 0.0582, "step": 11770 }, { "epoch": 0.18079963164760954, "grad_norm": 0.6879366636276245, "learning_rate": 1.8079963164760958e-05, "loss": 0.0603, "step": 11780 }, { "epoch": 0.18095311181029852, "grad_norm": 0.6930825710296631, "learning_rate": 1.8095311181029853e-05, "loss": 0.0667, "step": 11790 }, { "epoch": 0.18110659197298748, "grad_norm": 0.7386738657951355, "learning_rate": 1.811065919729875e-05, "loss": 0.0769, "step": 11800 }, { "epoch": 0.18126007213567646, "grad_norm": 0.7534636855125427, "learning_rate": 1.8126007213567646e-05, "loss": 0.0657, "step": 11810 }, { "epoch": 0.18141355229836545, "grad_norm": 0.8276269435882568, "learning_rate": 1.8141355229836545e-05, "loss": 0.0633, "step": 11820 }, { "epoch": 0.1815670324610544, "grad_norm": 0.5566281080245972, "learning_rate": 1.8156703246105443e-05, "loss": 0.0645, "step": 11830 }, { "epoch": 0.1817205126237434, "grad_norm": 0.4743839502334595, "learning_rate": 1.8172051262374338e-05, "loss": 0.0606, "step": 11840 }, { "epoch": 0.18187399278643235, "grad_norm": 0.8834963440895081, "learning_rate": 1.8187399278643237e-05, "loss": 0.0821, "step": 11850 }, { "epoch": 0.18202747294912133, "grad_norm": 0.6938126087188721, "learning_rate": 1.8202747294912132e-05, "loss": 0.0729, "step": 11860 }, { "epoch": 0.1821809531118103, "grad_norm": 0.7032080888748169, "learning_rate": 1.8218095311181034e-05, "loss": 0.0678, "step": 11870 }, { "epoch": 0.18233443327449927, "grad_norm": 0.9632622003555298, "learning_rate": 1.823344332744993e-05, "loss": 0.0743, "step": 11880 }, { "epoch": 0.18248791343718823, "grad_norm": 0.4464687705039978, "learning_rate": 1.8248791343718827e-05, "loss": 0.0643, "step": 11890 }, { "epoch": 0.18264139359987722, "grad_norm": 1.1910723447799683, "learning_rate": 1.8264139359987722e-05, "loss": 0.089, "step": 11900 }, { "epoch": 0.1827948737625662, "grad_norm": 1.0810158252716064, "learning_rate": 1.827948737625662e-05, "loss": 0.0537, "step": 11910 }, { "epoch": 0.18294835392525516, "grad_norm": 0.8576998114585876, "learning_rate": 1.829483539252552e-05, "loss": 0.059, "step": 11920 }, { "epoch": 0.18310183408794414, "grad_norm": 0.6611126065254211, "learning_rate": 1.8310183408794414e-05, "loss": 0.0624, "step": 11930 }, { "epoch": 0.1832553142506331, "grad_norm": 0.7985232472419739, "learning_rate": 1.8325531425063312e-05, "loss": 0.0864, "step": 11940 }, { "epoch": 0.18340879441332208, "grad_norm": 1.103540062904358, "learning_rate": 1.8340879441332207e-05, "loss": 0.0727, "step": 11950 }, { "epoch": 0.18356227457601104, "grad_norm": 0.9201864004135132, "learning_rate": 1.8356227457601106e-05, "loss": 0.0611, "step": 11960 }, { "epoch": 0.18371575473870003, "grad_norm": 0.5089479088783264, "learning_rate": 1.8371575473870004e-05, "loss": 0.0661, "step": 11970 }, { "epoch": 0.18386923490138898, "grad_norm": 1.0365164279937744, "learning_rate": 1.8386923490138903e-05, "loss": 0.0746, "step": 11980 }, { "epoch": 0.18402271506407797, "grad_norm": 0.7015755772590637, "learning_rate": 1.8402271506407798e-05, "loss": 0.0652, "step": 11990 }, { "epoch": 0.18417619522676695, "grad_norm": 0.7303932905197144, "learning_rate": 1.8417619522676696e-05, "loss": 0.0657, "step": 12000 }, { "epoch": 0.1843296753894559, "grad_norm": 0.7820771336555481, "learning_rate": 1.843296753894559e-05, "loss": 0.0675, "step": 12010 }, { "epoch": 0.1844831555521449, "grad_norm": 0.5823015570640564, "learning_rate": 1.844831555521449e-05, "loss": 0.0641, "step": 12020 }, { "epoch": 0.18463663571483385, "grad_norm": 0.7975799441337585, "learning_rate": 1.8463663571483388e-05, "loss": 0.0535, "step": 12030 }, { "epoch": 0.18479011587752284, "grad_norm": 0.5790008902549744, "learning_rate": 1.8479011587752283e-05, "loss": 0.0672, "step": 12040 }, { "epoch": 0.1849435960402118, "grad_norm": 0.689095675945282, "learning_rate": 1.849435960402118e-05, "loss": 0.0556, "step": 12050 }, { "epoch": 0.18509707620290078, "grad_norm": 0.6957814693450928, "learning_rate": 1.8509707620290077e-05, "loss": 0.0745, "step": 12060 }, { "epoch": 0.18525055636558974, "grad_norm": 0.6626825332641602, "learning_rate": 1.852505563655898e-05, "loss": 0.0693, "step": 12070 }, { "epoch": 0.18540403652827872, "grad_norm": 0.7926549315452576, "learning_rate": 1.8540403652827874e-05, "loss": 0.0648, "step": 12080 }, { "epoch": 0.1855575166909677, "grad_norm": 0.6231748461723328, "learning_rate": 1.8555751669096772e-05, "loss": 0.057, "step": 12090 }, { "epoch": 0.18571099685365666, "grad_norm": 1.0976769924163818, "learning_rate": 1.8571099685365667e-05, "loss": 0.0676, "step": 12100 }, { "epoch": 0.18586447701634565, "grad_norm": 0.7713425159454346, "learning_rate": 1.8586447701634565e-05, "loss": 0.0637, "step": 12110 }, { "epoch": 0.1860179571790346, "grad_norm": 0.859754741191864, "learning_rate": 1.860179571790346e-05, "loss": 0.0549, "step": 12120 }, { "epoch": 0.1861714373417236, "grad_norm": 0.7797519564628601, "learning_rate": 1.861714373417236e-05, "loss": 0.0753, "step": 12130 }, { "epoch": 0.18632491750441255, "grad_norm": 0.637409508228302, "learning_rate": 1.8632491750441257e-05, "loss": 0.0815, "step": 12140 }, { "epoch": 0.18647839766710153, "grad_norm": 0.7697461843490601, "learning_rate": 1.8647839766710152e-05, "loss": 0.0634, "step": 12150 }, { "epoch": 0.1866318778297905, "grad_norm": 0.6754788756370544, "learning_rate": 1.866318778297905e-05, "loss": 0.057, "step": 12160 }, { "epoch": 0.18678535799247947, "grad_norm": 0.9814466238021851, "learning_rate": 1.867853579924795e-05, "loss": 0.0617, "step": 12170 }, { "epoch": 0.18693883815516846, "grad_norm": 0.7076228260993958, "learning_rate": 1.8693883815516848e-05, "loss": 0.0579, "step": 12180 }, { "epoch": 0.18709231831785741, "grad_norm": 1.0213817358016968, "learning_rate": 1.8709231831785743e-05, "loss": 0.0716, "step": 12190 }, { "epoch": 0.1872457984805464, "grad_norm": 0.6171581149101257, "learning_rate": 1.872457984805464e-05, "loss": 0.0726, "step": 12200 }, { "epoch": 0.18739927864323536, "grad_norm": 0.885704755783081, "learning_rate": 1.8739927864323536e-05, "loss": 0.0633, "step": 12210 }, { "epoch": 0.18755275880592434, "grad_norm": 0.7217461466789246, "learning_rate": 1.8755275880592435e-05, "loss": 0.0595, "step": 12220 }, { "epoch": 0.1877062389686133, "grad_norm": 0.5932738780975342, "learning_rate": 1.8770623896861333e-05, "loss": 0.0604, "step": 12230 }, { "epoch": 0.18785971913130228, "grad_norm": 0.5862298607826233, "learning_rate": 1.8785971913130228e-05, "loss": 0.0491, "step": 12240 }, { "epoch": 0.18801319929399124, "grad_norm": 0.6844924688339233, "learning_rate": 1.8801319929399127e-05, "loss": 0.0581, "step": 12250 }, { "epoch": 0.18816667945668022, "grad_norm": 0.5601293444633484, "learning_rate": 1.8816667945668025e-05, "loss": 0.0582, "step": 12260 }, { "epoch": 0.1883201596193692, "grad_norm": 0.6230742335319519, "learning_rate": 1.883201596193692e-05, "loss": 0.0658, "step": 12270 }, { "epoch": 0.18847363978205817, "grad_norm": 0.5551549792289734, "learning_rate": 1.884736397820582e-05, "loss": 0.074, "step": 12280 }, { "epoch": 0.18862711994474715, "grad_norm": 0.463066041469574, "learning_rate": 1.8862711994474717e-05, "loss": 0.0678, "step": 12290 }, { "epoch": 0.1887806001074361, "grad_norm": 0.7678158283233643, "learning_rate": 1.8878060010743612e-05, "loss": 0.0608, "step": 12300 }, { "epoch": 0.1889340802701251, "grad_norm": 0.6203338503837585, "learning_rate": 1.889340802701251e-05, "loss": 0.0755, "step": 12310 }, { "epoch": 0.18908756043281405, "grad_norm": 0.6550191044807434, "learning_rate": 1.8908756043281406e-05, "loss": 0.063, "step": 12320 }, { "epoch": 0.18924104059550304, "grad_norm": 0.5240475535392761, "learning_rate": 1.8924104059550304e-05, "loss": 0.0695, "step": 12330 }, { "epoch": 0.189394520758192, "grad_norm": 0.9065999984741211, "learning_rate": 1.8939452075819202e-05, "loss": 0.0535, "step": 12340 }, { "epoch": 0.18954800092088098, "grad_norm": 0.9432088136672974, "learning_rate": 1.89548000920881e-05, "loss": 0.0665, "step": 12350 }, { "epoch": 0.18970148108356996, "grad_norm": 0.7040534019470215, "learning_rate": 1.8970148108356996e-05, "loss": 0.0855, "step": 12360 }, { "epoch": 0.18985496124625892, "grad_norm": 0.8054395914077759, "learning_rate": 1.8985496124625894e-05, "loss": 0.0666, "step": 12370 }, { "epoch": 0.1900084414089479, "grad_norm": 0.8377683162689209, "learning_rate": 1.9000844140894793e-05, "loss": 0.0654, "step": 12380 }, { "epoch": 0.19016192157163686, "grad_norm": 0.6859936714172363, "learning_rate": 1.9016192157163688e-05, "loss": 0.0839, "step": 12390 }, { "epoch": 0.19031540173432585, "grad_norm": 0.8058871030807495, "learning_rate": 1.9031540173432586e-05, "loss": 0.0554, "step": 12400 }, { "epoch": 0.1904688818970148, "grad_norm": 0.8303741812705994, "learning_rate": 1.904688818970148e-05, "loss": 0.0588, "step": 12410 }, { "epoch": 0.1906223620597038, "grad_norm": 0.6311078667640686, "learning_rate": 1.906223620597038e-05, "loss": 0.0723, "step": 12420 }, { "epoch": 0.19077584222239274, "grad_norm": 0.5978125929832458, "learning_rate": 1.9077584222239278e-05, "loss": 0.0671, "step": 12430 }, { "epoch": 0.19092932238508173, "grad_norm": 0.5184743404388428, "learning_rate": 1.9092932238508177e-05, "loss": 0.0792, "step": 12440 }, { "epoch": 0.1910828025477707, "grad_norm": 0.9139849543571472, "learning_rate": 1.910828025477707e-05, "loss": 0.0798, "step": 12450 }, { "epoch": 0.19123628271045967, "grad_norm": 0.5101162791252136, "learning_rate": 1.912362827104597e-05, "loss": 0.06, "step": 12460 }, { "epoch": 0.19138976287314866, "grad_norm": 0.6192538738250732, "learning_rate": 1.9138976287314865e-05, "loss": 0.0679, "step": 12470 }, { "epoch": 0.1915432430358376, "grad_norm": 0.8553406000137329, "learning_rate": 1.9154324303583764e-05, "loss": 0.0643, "step": 12480 }, { "epoch": 0.1916967231985266, "grad_norm": 0.8497071862220764, "learning_rate": 1.9169672319852662e-05, "loss": 0.0656, "step": 12490 }, { "epoch": 0.19185020336121555, "grad_norm": 0.591977059841156, "learning_rate": 1.9185020336121557e-05, "loss": 0.054, "step": 12500 }, { "epoch": 0.19200368352390454, "grad_norm": 0.7200287580490112, "learning_rate": 1.9200368352390455e-05, "loss": 0.0645, "step": 12510 }, { "epoch": 0.1921571636865935, "grad_norm": 0.9465169310569763, "learning_rate": 1.921571636865935e-05, "loss": 0.0673, "step": 12520 }, { "epoch": 0.19231064384928248, "grad_norm": 0.6177148818969727, "learning_rate": 1.923106438492825e-05, "loss": 0.0682, "step": 12530 }, { "epoch": 0.19246412401197147, "grad_norm": 0.6269131898880005, "learning_rate": 1.9246412401197147e-05, "loss": 0.0728, "step": 12540 }, { "epoch": 0.19261760417466042, "grad_norm": 0.6277081966400146, "learning_rate": 1.9261760417466046e-05, "loss": 0.0707, "step": 12550 }, { "epoch": 0.1927710843373494, "grad_norm": 0.9158781170845032, "learning_rate": 1.927710843373494e-05, "loss": 0.0662, "step": 12560 }, { "epoch": 0.19292456450003836, "grad_norm": 0.8857723474502563, "learning_rate": 1.929245645000384e-05, "loss": 0.0729, "step": 12570 }, { "epoch": 0.19307804466272735, "grad_norm": 0.5275251269340515, "learning_rate": 1.9307804466272734e-05, "loss": 0.0683, "step": 12580 }, { "epoch": 0.1932315248254163, "grad_norm": 0.9038318991661072, "learning_rate": 1.9323152482541633e-05, "loss": 0.0788, "step": 12590 }, { "epoch": 0.1933850049881053, "grad_norm": 0.7815020680427551, "learning_rate": 1.933850049881053e-05, "loss": 0.065, "step": 12600 }, { "epoch": 0.19353848515079425, "grad_norm": 0.48504024744033813, "learning_rate": 1.9353848515079426e-05, "loss": 0.0563, "step": 12610 }, { "epoch": 0.19369196531348323, "grad_norm": 0.708026647567749, "learning_rate": 1.9369196531348325e-05, "loss": 0.066, "step": 12620 }, { "epoch": 0.19384544547617222, "grad_norm": 0.6764810681343079, "learning_rate": 1.938454454761722e-05, "loss": 0.0694, "step": 12630 }, { "epoch": 0.19399892563886117, "grad_norm": 0.705482006072998, "learning_rate": 1.939989256388612e-05, "loss": 0.0684, "step": 12640 }, { "epoch": 0.19415240580155016, "grad_norm": 0.5616893768310547, "learning_rate": 1.9415240580155017e-05, "loss": 0.0703, "step": 12650 }, { "epoch": 0.19430588596423912, "grad_norm": 0.5999292731285095, "learning_rate": 1.9430588596423915e-05, "loss": 0.0651, "step": 12660 }, { "epoch": 0.1944593661269281, "grad_norm": 0.6732624769210815, "learning_rate": 1.944593661269281e-05, "loss": 0.0586, "step": 12670 }, { "epoch": 0.19461284628961706, "grad_norm": 0.6589756011962891, "learning_rate": 1.946128462896171e-05, "loss": 0.0765, "step": 12680 }, { "epoch": 0.19476632645230604, "grad_norm": 0.6400941014289856, "learning_rate": 1.9476632645230607e-05, "loss": 0.0621, "step": 12690 }, { "epoch": 0.194919806614995, "grad_norm": 0.6608678102493286, "learning_rate": 1.9491980661499502e-05, "loss": 0.0599, "step": 12700 }, { "epoch": 0.19507328677768399, "grad_norm": 0.7832278609275818, "learning_rate": 1.95073286777684e-05, "loss": 0.0657, "step": 12710 }, { "epoch": 0.19522676694037297, "grad_norm": 0.4451389014720917, "learning_rate": 1.9522676694037296e-05, "loss": 0.0581, "step": 12720 }, { "epoch": 0.19538024710306193, "grad_norm": 0.6997516751289368, "learning_rate": 1.9538024710306194e-05, "loss": 0.0701, "step": 12730 }, { "epoch": 0.1955337272657509, "grad_norm": 0.6851418614387512, "learning_rate": 1.9553372726575092e-05, "loss": 0.0774, "step": 12740 }, { "epoch": 0.19568720742843987, "grad_norm": 0.6942652463912964, "learning_rate": 1.956872074284399e-05, "loss": 0.0592, "step": 12750 }, { "epoch": 0.19584068759112885, "grad_norm": 0.5750807523727417, "learning_rate": 1.9584068759112886e-05, "loss": 0.0625, "step": 12760 }, { "epoch": 0.1959941677538178, "grad_norm": 0.9568031430244446, "learning_rate": 1.9599416775381784e-05, "loss": 0.0582, "step": 12770 }, { "epoch": 0.1961476479165068, "grad_norm": 0.6728317737579346, "learning_rate": 1.961476479165068e-05, "loss": 0.0717, "step": 12780 }, { "epoch": 0.19630112807919575, "grad_norm": 0.6904451251029968, "learning_rate": 1.9630112807919578e-05, "loss": 0.0777, "step": 12790 }, { "epoch": 0.19645460824188474, "grad_norm": 0.661492109298706, "learning_rate": 1.9645460824188476e-05, "loss": 0.0616, "step": 12800 }, { "epoch": 0.19660808840457372, "grad_norm": 0.6100581288337708, "learning_rate": 1.966080884045737e-05, "loss": 0.0571, "step": 12810 }, { "epoch": 0.19676156856726268, "grad_norm": 0.7944172620773315, "learning_rate": 1.967615685672627e-05, "loss": 0.0597, "step": 12820 }, { "epoch": 0.19691504872995166, "grad_norm": 0.7134007811546326, "learning_rate": 1.9691504872995165e-05, "loss": 0.0586, "step": 12830 }, { "epoch": 0.19706852889264062, "grad_norm": 0.6830191016197205, "learning_rate": 1.9706852889264063e-05, "loss": 0.066, "step": 12840 }, { "epoch": 0.1972220090553296, "grad_norm": 0.7282140254974365, "learning_rate": 1.972220090553296e-05, "loss": 0.0674, "step": 12850 }, { "epoch": 0.19737548921801856, "grad_norm": 0.5723252892494202, "learning_rate": 1.973754892180186e-05, "loss": 0.0738, "step": 12860 }, { "epoch": 0.19752896938070755, "grad_norm": 0.6706973314285278, "learning_rate": 1.9752896938070755e-05, "loss": 0.0771, "step": 12870 }, { "epoch": 0.1976824495433965, "grad_norm": 0.62874835729599, "learning_rate": 1.9768244954339654e-05, "loss": 0.0652, "step": 12880 }, { "epoch": 0.1978359297060855, "grad_norm": 0.6495690941810608, "learning_rate": 1.978359297060855e-05, "loss": 0.0743, "step": 12890 }, { "epoch": 0.19798940986877447, "grad_norm": 0.8438411355018616, "learning_rate": 1.9798940986877447e-05, "loss": 0.0664, "step": 12900 }, { "epoch": 0.19814289003146343, "grad_norm": 0.5143527984619141, "learning_rate": 1.9814289003146345e-05, "loss": 0.0631, "step": 12910 }, { "epoch": 0.19829637019415242, "grad_norm": 0.5348201990127563, "learning_rate": 1.982963701941524e-05, "loss": 0.0576, "step": 12920 }, { "epoch": 0.19844985035684137, "grad_norm": 0.7659285068511963, "learning_rate": 1.984498503568414e-05, "loss": 0.0595, "step": 12930 }, { "epoch": 0.19860333051953036, "grad_norm": 0.8028452396392822, "learning_rate": 1.9860333051953037e-05, "loss": 0.0651, "step": 12940 }, { "epoch": 0.19875681068221931, "grad_norm": 0.7165825366973877, "learning_rate": 1.9875681068221936e-05, "loss": 0.0594, "step": 12950 }, { "epoch": 0.1989102908449083, "grad_norm": 0.5045925378799438, "learning_rate": 1.989102908449083e-05, "loss": 0.0609, "step": 12960 }, { "epoch": 0.19906377100759726, "grad_norm": 0.6834500432014465, "learning_rate": 1.990637710075973e-05, "loss": 0.0588, "step": 12970 }, { "epoch": 0.19921725117028624, "grad_norm": 0.7934616804122925, "learning_rate": 1.9921725117028624e-05, "loss": 0.0887, "step": 12980 }, { "epoch": 0.19937073133297523, "grad_norm": 0.6319302916526794, "learning_rate": 1.9937073133297523e-05, "loss": 0.0666, "step": 12990 }, { "epoch": 0.19952421149566418, "grad_norm": 0.654288649559021, "learning_rate": 1.995242114956642e-05, "loss": 0.0639, "step": 13000 }, { "epoch": 0.19967769165835317, "grad_norm": 0.6927042007446289, "learning_rate": 1.9967769165835316e-05, "loss": 0.0559, "step": 13010 }, { "epoch": 0.19983117182104213, "grad_norm": 0.6205968856811523, "learning_rate": 1.9983117182104215e-05, "loss": 0.0857, "step": 13020 }, { "epoch": 0.1999846519837311, "grad_norm": 0.6934603452682495, "learning_rate": 1.9998465198373113e-05, "loss": 0.0571, "step": 13030 }, { "epoch": 0.20013813214642007, "grad_norm": 0.6964521408081055, "learning_rate": 1.9999999709387524e-05, "loss": 0.0534, "step": 13040 }, { "epoch": 0.20029161230910905, "grad_norm": 0.6503127813339233, "learning_rate": 1.99999987048012e-05, "loss": 0.0609, "step": 13050 }, { "epoch": 0.200445092471798, "grad_norm": 0.9333605766296387, "learning_rate": 1.9999996982653297e-05, "loss": 0.0661, "step": 13060 }, { "epoch": 0.200598572634487, "grad_norm": 0.7162254452705383, "learning_rate": 1.9999994542943933e-05, "loss": 0.0628, "step": 13070 }, { "epoch": 0.20075205279717598, "grad_norm": 0.8940792083740234, "learning_rate": 1.999999138567329e-05, "loss": 0.0637, "step": 13080 }, { "epoch": 0.20090553295986494, "grad_norm": 0.8556881546974182, "learning_rate": 1.9999987510841592e-05, "loss": 0.0585, "step": 13090 }, { "epoch": 0.20105901312255392, "grad_norm": 0.7288274765014648, "learning_rate": 1.9999982918449113e-05, "loss": 0.0652, "step": 13100 }, { "epoch": 0.20121249328524288, "grad_norm": 0.7055719494819641, "learning_rate": 1.999997760849619e-05, "loss": 0.0777, "step": 13110 }, { "epoch": 0.20136597344793186, "grad_norm": 0.6826000213623047, "learning_rate": 1.9999971580983194e-05, "loss": 0.0669, "step": 13120 }, { "epoch": 0.20151945361062082, "grad_norm": 0.7216922640800476, "learning_rate": 1.9999964835910564e-05, "loss": 0.0667, "step": 13130 }, { "epoch": 0.2016729337733098, "grad_norm": 0.5128002166748047, "learning_rate": 1.9999957373278788e-05, "loss": 0.0588, "step": 13140 }, { "epoch": 0.20182641393599876, "grad_norm": 0.7821229696273804, "learning_rate": 1.9999949193088396e-05, "loss": 0.0548, "step": 13150 }, { "epoch": 0.20197989409868775, "grad_norm": 0.7446670532226562, "learning_rate": 1.9999940295339973e-05, "loss": 0.0722, "step": 13160 }, { "epoch": 0.20213337426137673, "grad_norm": 0.9423218965530396, "learning_rate": 1.9999930680034164e-05, "loss": 0.0642, "step": 13170 }, { "epoch": 0.2022868544240657, "grad_norm": 0.5769614577293396, "learning_rate": 1.9999920347171652e-05, "loss": 0.0731, "step": 13180 }, { "epoch": 0.20244033458675467, "grad_norm": 0.6629437804222107, "learning_rate": 1.999990929675318e-05, "loss": 0.0625, "step": 13190 }, { "epoch": 0.20259381474944363, "grad_norm": 0.6479567289352417, "learning_rate": 1.9999897528779545e-05, "loss": 0.072, "step": 13200 }, { "epoch": 0.20274729491213261, "grad_norm": 0.607630729675293, "learning_rate": 1.9999885043251586e-05, "loss": 0.0851, "step": 13210 }, { "epoch": 0.20290077507482157, "grad_norm": 0.6807599663734436, "learning_rate": 1.9999871840170207e-05, "loss": 0.0518, "step": 13220 }, { "epoch": 0.20305425523751056, "grad_norm": 1.5426812171936035, "learning_rate": 1.999985791953635e-05, "loss": 0.0619, "step": 13230 }, { "epoch": 0.2032077354001995, "grad_norm": 0.5873217582702637, "learning_rate": 1.999984328135101e-05, "loss": 0.0526, "step": 13240 }, { "epoch": 0.2033612155628885, "grad_norm": 0.6618098616600037, "learning_rate": 1.999982792561524e-05, "loss": 0.0676, "step": 13250 }, { "epoch": 0.20351469572557748, "grad_norm": 0.7322198748588562, "learning_rate": 1.999981185233015e-05, "loss": 0.0687, "step": 13260 }, { "epoch": 0.20366817588826644, "grad_norm": 0.7395220398902893, "learning_rate": 1.9999795061496883e-05, "loss": 0.0629, "step": 13270 }, { "epoch": 0.20382165605095542, "grad_norm": 0.5966060757637024, "learning_rate": 1.999977755311665e-05, "loss": 0.05, "step": 13280 }, { "epoch": 0.20397513621364438, "grad_norm": 0.670139491558075, "learning_rate": 1.9999759327190703e-05, "loss": 0.0694, "step": 13290 }, { "epoch": 0.20412861637633337, "grad_norm": 0.6642162799835205, "learning_rate": 1.9999740383720352e-05, "loss": 0.064, "step": 13300 }, { "epoch": 0.20428209653902232, "grad_norm": 0.7884939312934875, "learning_rate": 1.9999720722706955e-05, "loss": 0.07, "step": 13310 }, { "epoch": 0.2044355767017113, "grad_norm": 0.7253804802894592, "learning_rate": 1.9999700344151924e-05, "loss": 0.0676, "step": 13320 }, { "epoch": 0.20458905686440026, "grad_norm": 0.6466392278671265, "learning_rate": 1.9999679248056726e-05, "loss": 0.0591, "step": 13330 }, { "epoch": 0.20474253702708925, "grad_norm": 0.6399741768836975, "learning_rate": 1.9999657434422866e-05, "loss": 0.0553, "step": 13340 }, { "epoch": 0.20489601718977823, "grad_norm": 1.013816475868225, "learning_rate": 1.9999634903251914e-05, "loss": 0.0549, "step": 13350 }, { "epoch": 0.2050494973524672, "grad_norm": 0.7668477892875671, "learning_rate": 1.9999611654545483e-05, "loss": 0.0617, "step": 13360 }, { "epoch": 0.20520297751515618, "grad_norm": 0.6576216220855713, "learning_rate": 1.9999587688305246e-05, "loss": 0.0656, "step": 13370 }, { "epoch": 0.20535645767784513, "grad_norm": 0.5269632339477539, "learning_rate": 1.9999563004532923e-05, "loss": 0.0754, "step": 13380 }, { "epoch": 0.20550993784053412, "grad_norm": 1.0441865921020508, "learning_rate": 1.9999537603230284e-05, "loss": 0.0717, "step": 13390 }, { "epoch": 0.20566341800322308, "grad_norm": 0.5437101125717163, "learning_rate": 1.9999511484399146e-05, "loss": 0.0678, "step": 13400 }, { "epoch": 0.20581689816591206, "grad_norm": 0.7759417295455933, "learning_rate": 1.999948464804139e-05, "loss": 0.0684, "step": 13410 }, { "epoch": 0.20597037832860102, "grad_norm": 0.7570949196815491, "learning_rate": 1.999945709415894e-05, "loss": 0.0684, "step": 13420 }, { "epoch": 0.20612385849129, "grad_norm": 0.6813003420829773, "learning_rate": 1.9999428822753775e-05, "loss": 0.0705, "step": 13430 }, { "epoch": 0.206277338653979, "grad_norm": 0.9257316589355469, "learning_rate": 1.999939983382792e-05, "loss": 0.0573, "step": 13440 }, { "epoch": 0.20643081881666794, "grad_norm": 0.732395589351654, "learning_rate": 1.9999370127383457e-05, "loss": 0.0653, "step": 13450 }, { "epoch": 0.20658429897935693, "grad_norm": 0.6097633242607117, "learning_rate": 1.9999339703422516e-05, "loss": 0.0627, "step": 13460 }, { "epoch": 0.20673777914204589, "grad_norm": 0.7773666977882385, "learning_rate": 1.999930856194728e-05, "loss": 0.0671, "step": 13470 }, { "epoch": 0.20689125930473487, "grad_norm": 0.7587971687316895, "learning_rate": 1.999927670295999e-05, "loss": 0.0714, "step": 13480 }, { "epoch": 0.20704473946742383, "grad_norm": 0.5588840842247009, "learning_rate": 1.9999244126462924e-05, "loss": 0.059, "step": 13490 }, { "epoch": 0.2071982196301128, "grad_norm": 0.645067036151886, "learning_rate": 1.999921083245842e-05, "loss": 0.0641, "step": 13500 }, { "epoch": 0.20735169979280177, "grad_norm": 0.8130320906639099, "learning_rate": 1.9999176820948877e-05, "loss": 0.0706, "step": 13510 }, { "epoch": 0.20750517995549075, "grad_norm": 0.6976729035377502, "learning_rate": 1.9999142091936724e-05, "loss": 0.067, "step": 13520 }, { "epoch": 0.20765866011817974, "grad_norm": 0.7058287858963013, "learning_rate": 1.9999106645424455e-05, "loss": 0.0709, "step": 13530 }, { "epoch": 0.2078121402808687, "grad_norm": 0.6071164011955261, "learning_rate": 1.999907048141462e-05, "loss": 0.0585, "step": 13540 }, { "epoch": 0.20796562044355768, "grad_norm": 0.6723839640617371, "learning_rate": 1.9999033599909804e-05, "loss": 0.0742, "step": 13550 }, { "epoch": 0.20811910060624664, "grad_norm": 0.6160528063774109, "learning_rate": 1.9998996000912663e-05, "loss": 0.0595, "step": 13560 }, { "epoch": 0.20827258076893562, "grad_norm": 0.5186132788658142, "learning_rate": 1.9998957684425893e-05, "loss": 0.0579, "step": 13570 }, { "epoch": 0.20842606093162458, "grad_norm": 0.9784539341926575, "learning_rate": 1.9998918650452238e-05, "loss": 0.0637, "step": 13580 }, { "epoch": 0.20857954109431356, "grad_norm": 0.6462875008583069, "learning_rate": 1.9998878898994504e-05, "loss": 0.0645, "step": 13590 }, { "epoch": 0.20873302125700252, "grad_norm": 0.466707319021225, "learning_rate": 1.999883843005554e-05, "loss": 0.0647, "step": 13600 }, { "epoch": 0.2088865014196915, "grad_norm": 0.6719377040863037, "learning_rate": 1.9998797243638254e-05, "loss": 0.0581, "step": 13610 }, { "epoch": 0.2090399815823805, "grad_norm": 0.5114620923995972, "learning_rate": 1.9998755339745602e-05, "loss": 0.0771, "step": 13620 }, { "epoch": 0.20919346174506945, "grad_norm": 0.723310112953186, "learning_rate": 1.999871271838058e-05, "loss": 0.0733, "step": 13630 }, { "epoch": 0.20934694190775843, "grad_norm": 0.6084756255149841, "learning_rate": 1.9998669379546262e-05, "loss": 0.0517, "step": 13640 }, { "epoch": 0.2095004220704474, "grad_norm": 0.5691758394241333, "learning_rate": 1.9998625323245747e-05, "loss": 0.0616, "step": 13650 }, { "epoch": 0.20965390223313637, "grad_norm": 0.740288496017456, "learning_rate": 1.99985805494822e-05, "loss": 0.0622, "step": 13660 }, { "epoch": 0.20980738239582533, "grad_norm": 0.745668351650238, "learning_rate": 1.9998535058258835e-05, "loss": 0.0666, "step": 13670 }, { "epoch": 0.20996086255851432, "grad_norm": 0.7651581168174744, "learning_rate": 1.9998488849578914e-05, "loss": 0.074, "step": 13680 }, { "epoch": 0.21011434272120327, "grad_norm": 0.8329300880432129, "learning_rate": 1.999844192344575e-05, "loss": 0.063, "step": 13690 }, { "epoch": 0.21026782288389226, "grad_norm": 0.6591222286224365, "learning_rate": 1.999839427986272e-05, "loss": 0.0582, "step": 13700 }, { "epoch": 0.21042130304658124, "grad_norm": 0.6916355490684509, "learning_rate": 1.999834591883323e-05, "loss": 0.0671, "step": 13710 }, { "epoch": 0.2105747832092702, "grad_norm": 0.7089632153511047, "learning_rate": 1.999829684036076e-05, "loss": 0.0662, "step": 13720 }, { "epoch": 0.21072826337195918, "grad_norm": 0.7900611162185669, "learning_rate": 1.999824704444883e-05, "loss": 0.0661, "step": 13730 }, { "epoch": 0.21088174353464814, "grad_norm": 0.6333492398262024, "learning_rate": 1.9998196531101005e-05, "loss": 0.0691, "step": 13740 }, { "epoch": 0.21103522369733713, "grad_norm": 0.7585776448249817, "learning_rate": 1.9998145300320923e-05, "loss": 0.057, "step": 13750 }, { "epoch": 0.21118870386002608, "grad_norm": 0.7555041313171387, "learning_rate": 1.999809335211225e-05, "loss": 0.076, "step": 13760 }, { "epoch": 0.21134218402271507, "grad_norm": 0.8029013276100159, "learning_rate": 1.9998040686478716e-05, "loss": 0.0634, "step": 13770 }, { "epoch": 0.21149566418540403, "grad_norm": 0.5221790671348572, "learning_rate": 1.99979873034241e-05, "loss": 0.0665, "step": 13780 }, { "epoch": 0.211649144348093, "grad_norm": 0.615469217300415, "learning_rate": 1.999793320295224e-05, "loss": 0.054, "step": 13790 }, { "epoch": 0.211802624510782, "grad_norm": 0.7157528400421143, "learning_rate": 1.9997878385067002e-05, "loss": 0.0714, "step": 13800 }, { "epoch": 0.21195610467347095, "grad_norm": 0.8783881068229675, "learning_rate": 1.9997822849772334e-05, "loss": 0.0671, "step": 13810 }, { "epoch": 0.21210958483615994, "grad_norm": 0.5948585867881775, "learning_rate": 1.9997766597072217e-05, "loss": 0.0719, "step": 13820 }, { "epoch": 0.2122630649988489, "grad_norm": 1.006502628326416, "learning_rate": 1.9997709626970685e-05, "loss": 0.0649, "step": 13830 }, { "epoch": 0.21241654516153788, "grad_norm": 0.681703507900238, "learning_rate": 1.9997651939471827e-05, "loss": 0.0629, "step": 13840 }, { "epoch": 0.21257002532422684, "grad_norm": 0.6660040020942688, "learning_rate": 1.9997593534579783e-05, "loss": 0.0708, "step": 13850 }, { "epoch": 0.21272350548691582, "grad_norm": 1.1494653224945068, "learning_rate": 1.9997534412298745e-05, "loss": 0.0708, "step": 13860 }, { "epoch": 0.21287698564960478, "grad_norm": 0.5948536992073059, "learning_rate": 1.9997474572632953e-05, "loss": 0.0569, "step": 13870 }, { "epoch": 0.21303046581229376, "grad_norm": 0.6452885866165161, "learning_rate": 1.99974140155867e-05, "loss": 0.0655, "step": 13880 }, { "epoch": 0.21318394597498275, "grad_norm": 0.5750848054885864, "learning_rate": 1.9997352741164337e-05, "loss": 0.0624, "step": 13890 }, { "epoch": 0.2133374261376717, "grad_norm": 0.7493662238121033, "learning_rate": 1.9997290749370257e-05, "loss": 0.0601, "step": 13900 }, { "epoch": 0.2134909063003607, "grad_norm": 0.6051182150840759, "learning_rate": 1.9997228040208907e-05, "loss": 0.0606, "step": 13910 }, { "epoch": 0.21364438646304965, "grad_norm": 0.6750280857086182, "learning_rate": 1.999716461368479e-05, "loss": 0.0463, "step": 13920 }, { "epoch": 0.21379786662573863, "grad_norm": 1.0354009866714478, "learning_rate": 1.9997100469802452e-05, "loss": 0.0722, "step": 13930 }, { "epoch": 0.2139513467884276, "grad_norm": 0.7142915725708008, "learning_rate": 1.9997035608566502e-05, "loss": 0.056, "step": 13940 }, { "epoch": 0.21410482695111657, "grad_norm": 0.5635131597518921, "learning_rate": 1.999697002998159e-05, "loss": 0.0667, "step": 13950 }, { "epoch": 0.21425830711380553, "grad_norm": 0.7849467396736145, "learning_rate": 1.9996903734052423e-05, "loss": 0.0492, "step": 13960 }, { "epoch": 0.21441178727649451, "grad_norm": 0.6850298643112183, "learning_rate": 1.999683672078376e-05, "loss": 0.0713, "step": 13970 }, { "epoch": 0.2145652674391835, "grad_norm": 0.7830144166946411, "learning_rate": 1.9996768990180408e-05, "loss": 0.0556, "step": 13980 }, { "epoch": 0.21471874760187246, "grad_norm": 0.7876488566398621, "learning_rate": 1.9996700542247224e-05, "loss": 0.0653, "step": 13990 }, { "epoch": 0.21487222776456144, "grad_norm": 0.5766248106956482, "learning_rate": 1.9996631376989126e-05, "loss": 0.0609, "step": 14000 }, { "epoch": 0.2150257079272504, "grad_norm": 0.5497604012489319, "learning_rate": 1.9996561494411072e-05, "loss": 0.0653, "step": 14010 }, { "epoch": 0.21517918808993938, "grad_norm": 0.5332120656967163, "learning_rate": 1.999649089451808e-05, "loss": 0.0603, "step": 14020 }, { "epoch": 0.21533266825262834, "grad_norm": 0.7422909736633301, "learning_rate": 1.999641957731521e-05, "loss": 0.0757, "step": 14030 }, { "epoch": 0.21548614841531732, "grad_norm": 0.5370504260063171, "learning_rate": 1.9996347542807585e-05, "loss": 0.0509, "step": 14040 }, { "epoch": 0.21563962857800628, "grad_norm": 0.6069746613502502, "learning_rate": 1.999627479100037e-05, "loss": 0.051, "step": 14050 }, { "epoch": 0.21579310874069527, "grad_norm": 0.7174535989761353, "learning_rate": 1.999620132189879e-05, "loss": 0.0626, "step": 14060 }, { "epoch": 0.21594658890338425, "grad_norm": 0.5124052166938782, "learning_rate": 1.9996127135508118e-05, "loss": 0.062, "step": 14070 }, { "epoch": 0.2161000690660732, "grad_norm": 0.7746309638023376, "learning_rate": 1.999605223183367e-05, "loss": 0.0697, "step": 14080 }, { "epoch": 0.2162535492287622, "grad_norm": 0.6080886125564575, "learning_rate": 1.9995976610880826e-05, "loss": 0.0695, "step": 14090 }, { "epoch": 0.21640702939145115, "grad_norm": 0.6350159049034119, "learning_rate": 1.9995900272655013e-05, "loss": 0.0612, "step": 14100 }, { "epoch": 0.21656050955414013, "grad_norm": 0.5308709144592285, "learning_rate": 1.99958232171617e-05, "loss": 0.0555, "step": 14110 }, { "epoch": 0.2167139897168291, "grad_norm": 0.5902043581008911, "learning_rate": 1.999574544440643e-05, "loss": 0.0571, "step": 14120 }, { "epoch": 0.21686746987951808, "grad_norm": 0.5111143589019775, "learning_rate": 1.9995666954394772e-05, "loss": 0.0603, "step": 14130 }, { "epoch": 0.21702095004220703, "grad_norm": 0.49742385745048523, "learning_rate": 1.9995587747132367e-05, "loss": 0.0558, "step": 14140 }, { "epoch": 0.21717443020489602, "grad_norm": 0.6165192127227783, "learning_rate": 1.9995507822624888e-05, "loss": 0.0702, "step": 14150 }, { "epoch": 0.217327910367585, "grad_norm": 0.8965000510215759, "learning_rate": 1.9995427180878084e-05, "loss": 0.0572, "step": 14160 }, { "epoch": 0.21748139053027396, "grad_norm": 0.6690358519554138, "learning_rate": 1.999534582189773e-05, "loss": 0.0616, "step": 14170 }, { "epoch": 0.21763487069296295, "grad_norm": 0.494899719953537, "learning_rate": 1.9995263745689667e-05, "loss": 0.0741, "step": 14180 }, { "epoch": 0.2177883508556519, "grad_norm": 0.6490617394447327, "learning_rate": 1.999518095225979e-05, "loss": 0.0586, "step": 14190 }, { "epoch": 0.2179418310183409, "grad_norm": 0.5596715211868286, "learning_rate": 1.999509744161403e-05, "loss": 0.0652, "step": 14200 }, { "epoch": 0.21809531118102984, "grad_norm": 0.5207169055938721, "learning_rate": 1.999501321375839e-05, "loss": 0.0625, "step": 14210 }, { "epoch": 0.21824879134371883, "grad_norm": 0.7203729748725891, "learning_rate": 1.9994928268698906e-05, "loss": 0.0682, "step": 14220 }, { "epoch": 0.21840227150640779, "grad_norm": 0.6636756062507629, "learning_rate": 1.9994842606441677e-05, "loss": 0.0624, "step": 14230 }, { "epoch": 0.21855575166909677, "grad_norm": 0.4477170705795288, "learning_rate": 1.9994756226992846e-05, "loss": 0.0535, "step": 14240 }, { "epoch": 0.21870923183178576, "grad_norm": 0.6385126113891602, "learning_rate": 1.9994669130358617e-05, "loss": 0.0567, "step": 14250 }, { "epoch": 0.2188627119944747, "grad_norm": 0.8653728365898132, "learning_rate": 1.9994581316545233e-05, "loss": 0.0684, "step": 14260 }, { "epoch": 0.2190161921571637, "grad_norm": 0.5834375023841858, "learning_rate": 1.9994492785559004e-05, "loss": 0.0587, "step": 14270 }, { "epoch": 0.21916967231985265, "grad_norm": 0.5621783137321472, "learning_rate": 1.9994403537406275e-05, "loss": 0.0505, "step": 14280 }, { "epoch": 0.21932315248254164, "grad_norm": 0.6301398873329163, "learning_rate": 1.9994313572093454e-05, "loss": 0.0597, "step": 14290 }, { "epoch": 0.2194766326452306, "grad_norm": 0.6706529855728149, "learning_rate": 1.9994222889626994e-05, "loss": 0.0564, "step": 14300 }, { "epoch": 0.21963011280791958, "grad_norm": 0.6746609807014465, "learning_rate": 1.9994131490013405e-05, "loss": 0.0519, "step": 14310 }, { "epoch": 0.21978359297060854, "grad_norm": 0.4850047528743744, "learning_rate": 1.9994039373259243e-05, "loss": 0.0575, "step": 14320 }, { "epoch": 0.21993707313329752, "grad_norm": 0.8112771511077881, "learning_rate": 1.9993946539371117e-05, "loss": 0.0652, "step": 14330 }, { "epoch": 0.2200905532959865, "grad_norm": 0.6124503016471863, "learning_rate": 1.9993852988355692e-05, "loss": 0.0629, "step": 14340 }, { "epoch": 0.22024403345867546, "grad_norm": 0.5871568918228149, "learning_rate": 1.9993758720219677e-05, "loss": 0.0601, "step": 14350 }, { "epoch": 0.22039751362136445, "grad_norm": 0.6196720600128174, "learning_rate": 1.999366373496984e-05, "loss": 0.0586, "step": 14360 }, { "epoch": 0.2205509937840534, "grad_norm": 0.602465808391571, "learning_rate": 1.9993568032612995e-05, "loss": 0.0681, "step": 14370 }, { "epoch": 0.2207044739467424, "grad_norm": 0.5668675899505615, "learning_rate": 1.999347161315601e-05, "loss": 0.0542, "step": 14380 }, { "epoch": 0.22085795410943135, "grad_norm": 0.6315658092498779, "learning_rate": 1.9993374476605805e-05, "loss": 0.0626, "step": 14390 }, { "epoch": 0.22101143427212033, "grad_norm": 0.7300616502761841, "learning_rate": 1.9993276622969343e-05, "loss": 0.0637, "step": 14400 }, { "epoch": 0.2211649144348093, "grad_norm": 0.6223890781402588, "learning_rate": 1.9993178052253656e-05, "loss": 0.0582, "step": 14410 }, { "epoch": 0.22131839459749827, "grad_norm": 0.7580128312110901, "learning_rate": 1.999307876446581e-05, "loss": 0.073, "step": 14420 }, { "epoch": 0.22147187476018726, "grad_norm": 0.6177161931991577, "learning_rate": 1.999297875961293e-05, "loss": 0.0691, "step": 14430 }, { "epoch": 0.22162535492287622, "grad_norm": 0.5607873201370239, "learning_rate": 1.999287803770219e-05, "loss": 0.0577, "step": 14440 }, { "epoch": 0.2217788350855652, "grad_norm": 0.5764316320419312, "learning_rate": 1.9992776598740828e-05, "loss": 0.0643, "step": 14450 }, { "epoch": 0.22193231524825416, "grad_norm": 0.6295424103736877, "learning_rate": 1.9992674442736113e-05, "loss": 0.0596, "step": 14460 }, { "epoch": 0.22208579541094314, "grad_norm": 0.5990732908248901, "learning_rate": 1.9992571569695376e-05, "loss": 0.0563, "step": 14470 }, { "epoch": 0.2222392755736321, "grad_norm": 0.5506800413131714, "learning_rate": 1.9992467979626006e-05, "loss": 0.0549, "step": 14480 }, { "epoch": 0.22239275573632108, "grad_norm": 0.7446104288101196, "learning_rate": 1.9992363672535423e-05, "loss": 0.0608, "step": 14490 }, { "epoch": 0.22254623589901004, "grad_norm": 0.6586883664131165, "learning_rate": 1.9992258648431124e-05, "loss": 0.0511, "step": 14500 }, { "epoch": 0.22269971606169903, "grad_norm": 0.6080570816993713, "learning_rate": 1.999215290732064e-05, "loss": 0.0513, "step": 14510 }, { "epoch": 0.222853196224388, "grad_norm": 0.7946155667304993, "learning_rate": 1.999204644921156e-05, "loss": 0.0621, "step": 14520 }, { "epoch": 0.22300667638707697, "grad_norm": 0.7228635549545288, "learning_rate": 1.9991939274111525e-05, "loss": 0.0519, "step": 14530 }, { "epoch": 0.22316015654976595, "grad_norm": 0.871181309223175, "learning_rate": 1.9991831382028216e-05, "loss": 0.0585, "step": 14540 }, { "epoch": 0.2233136367124549, "grad_norm": 0.8156861066818237, "learning_rate": 1.999172277296939e-05, "loss": 0.0497, "step": 14550 }, { "epoch": 0.2234671168751439, "grad_norm": 0.5428505539894104, "learning_rate": 1.9991613446942826e-05, "loss": 0.0688, "step": 14560 }, { "epoch": 0.22362059703783285, "grad_norm": 0.6282780170440674, "learning_rate": 1.9991503403956377e-05, "loss": 0.0572, "step": 14570 }, { "epoch": 0.22377407720052184, "grad_norm": 0.5526107549667358, "learning_rate": 1.9991392644017938e-05, "loss": 0.0553, "step": 14580 }, { "epoch": 0.2239275573632108, "grad_norm": 0.7188931703567505, "learning_rate": 1.9991281167135454e-05, "loss": 0.0616, "step": 14590 }, { "epoch": 0.22408103752589978, "grad_norm": 0.639098048210144, "learning_rate": 1.999116897331693e-05, "loss": 0.0466, "step": 14600 }, { "epoch": 0.22423451768858876, "grad_norm": 0.9053958654403687, "learning_rate": 1.9991056062570407e-05, "loss": 0.0553, "step": 14610 }, { "epoch": 0.22438799785127772, "grad_norm": 0.5362327098846436, "learning_rate": 1.9990942434903995e-05, "loss": 0.0624, "step": 14620 }, { "epoch": 0.2245414780139667, "grad_norm": 0.3606760799884796, "learning_rate": 1.9990828090325847e-05, "loss": 0.064, "step": 14630 }, { "epoch": 0.22469495817665566, "grad_norm": 0.5543673634529114, "learning_rate": 1.9990713028844166e-05, "loss": 0.0719, "step": 14640 }, { "epoch": 0.22484843833934465, "grad_norm": 0.7441783547401428, "learning_rate": 1.9990597250467205e-05, "loss": 0.0644, "step": 14650 }, { "epoch": 0.2250019185020336, "grad_norm": 0.6776754856109619, "learning_rate": 1.9990480755203276e-05, "loss": 0.0652, "step": 14660 }, { "epoch": 0.2251553986647226, "grad_norm": 0.47906970977783203, "learning_rate": 1.999036354306074e-05, "loss": 0.0581, "step": 14670 }, { "epoch": 0.22530887882741155, "grad_norm": 0.5478805303573608, "learning_rate": 1.9990245614048004e-05, "loss": 0.0533, "step": 14680 }, { "epoch": 0.22546235899010053, "grad_norm": 0.7845261096954346, "learning_rate": 1.9990126968173534e-05, "loss": 0.0656, "step": 14690 }, { "epoch": 0.22561583915278952, "grad_norm": 0.7383500933647156, "learning_rate": 1.9990007605445835e-05, "loss": 0.078, "step": 14700 }, { "epoch": 0.22576931931547847, "grad_norm": 0.7300102114677429, "learning_rate": 1.9989887525873484e-05, "loss": 0.0761, "step": 14710 }, { "epoch": 0.22592279947816746, "grad_norm": 0.5957512855529785, "learning_rate": 1.998976672946509e-05, "loss": 0.068, "step": 14720 }, { "epoch": 0.22607627964085641, "grad_norm": 0.5391073822975159, "learning_rate": 1.998964521622932e-05, "loss": 0.0557, "step": 14730 }, { "epoch": 0.2262297598035454, "grad_norm": 0.6271214485168457, "learning_rate": 1.9989522986174896e-05, "loss": 0.0565, "step": 14740 }, { "epoch": 0.22638323996623436, "grad_norm": 0.6075308322906494, "learning_rate": 1.998940003931059e-05, "loss": 0.0524, "step": 14750 }, { "epoch": 0.22653672012892334, "grad_norm": 0.7088937163352966, "learning_rate": 1.9989276375645224e-05, "loss": 0.0594, "step": 14760 }, { "epoch": 0.2266902002916123, "grad_norm": 0.917729377746582, "learning_rate": 1.9989151995187667e-05, "loss": 0.0615, "step": 14770 }, { "epoch": 0.22684368045430128, "grad_norm": 0.5751436352729797, "learning_rate": 1.998902689794685e-05, "loss": 0.0698, "step": 14780 }, { "epoch": 0.22699716061699027, "grad_norm": 0.6851555705070496, "learning_rate": 1.9988901083931743e-05, "loss": 0.0609, "step": 14790 }, { "epoch": 0.22715064077967922, "grad_norm": 0.9790156483650208, "learning_rate": 1.998877455315138e-05, "loss": 0.0623, "step": 14800 }, { "epoch": 0.2273041209423682, "grad_norm": 0.45404157042503357, "learning_rate": 1.998864730561484e-05, "loss": 0.0661, "step": 14810 }, { "epoch": 0.22745760110505717, "grad_norm": 0.7999679446220398, "learning_rate": 1.998851934133125e-05, "loss": 0.0581, "step": 14820 }, { "epoch": 0.22761108126774615, "grad_norm": 0.590615451335907, "learning_rate": 1.998839066030979e-05, "loss": 0.0572, "step": 14830 }, { "epoch": 0.2277645614304351, "grad_norm": 0.8488551378250122, "learning_rate": 1.9988261262559705e-05, "loss": 0.0609, "step": 14840 }, { "epoch": 0.2279180415931241, "grad_norm": 0.5207691192626953, "learning_rate": 1.998813114809027e-05, "loss": 0.0482, "step": 14850 }, { "epoch": 0.22807152175581305, "grad_norm": 0.5534146428108215, "learning_rate": 1.9988000316910824e-05, "loss": 0.0567, "step": 14860 }, { "epoch": 0.22822500191850204, "grad_norm": 0.7389765977859497, "learning_rate": 1.9987868769030755e-05, "loss": 0.0491, "step": 14870 }, { "epoch": 0.22837848208119102, "grad_norm": 0.7593105435371399, "learning_rate": 1.99877365044595e-05, "loss": 0.062, "step": 14880 }, { "epoch": 0.22853196224387998, "grad_norm": 0.815538763999939, "learning_rate": 1.998760352320656e-05, "loss": 0.0624, "step": 14890 }, { "epoch": 0.22868544240656896, "grad_norm": 0.6417046785354614, "learning_rate": 1.9987469825281463e-05, "loss": 0.0712, "step": 14900 }, { "epoch": 0.22883892256925792, "grad_norm": 0.6083815097808838, "learning_rate": 1.998733541069381e-05, "loss": 0.0649, "step": 14910 }, { "epoch": 0.2289924027319469, "grad_norm": 0.6058230400085449, "learning_rate": 1.998720027945325e-05, "loss": 0.0599, "step": 14920 }, { "epoch": 0.22914588289463586, "grad_norm": 0.7221171855926514, "learning_rate": 1.9987064431569472e-05, "loss": 0.064, "step": 14930 }, { "epoch": 0.22929936305732485, "grad_norm": 0.5528369545936584, "learning_rate": 1.9986927867052226e-05, "loss": 0.0537, "step": 14940 }, { "epoch": 0.2294528432200138, "grad_norm": 0.8177332878112793, "learning_rate": 1.9986790585911313e-05, "loss": 0.0678, "step": 14950 }, { "epoch": 0.2296063233827028, "grad_norm": 0.6784903407096863, "learning_rate": 1.9986652588156583e-05, "loss": 0.0615, "step": 14960 }, { "epoch": 0.22975980354539177, "grad_norm": 0.7587006688117981, "learning_rate": 1.9986513873797936e-05, "loss": 0.0701, "step": 14970 }, { "epoch": 0.22991328370808073, "grad_norm": 0.6042486429214478, "learning_rate": 1.998637444284533e-05, "loss": 0.0553, "step": 14980 }, { "epoch": 0.2300667638707697, "grad_norm": 0.7217245101928711, "learning_rate": 1.998623429530877e-05, "loss": 0.0579, "step": 14990 }, { "epoch": 0.23022024403345867, "grad_norm": 0.654309093952179, "learning_rate": 1.9986093431198306e-05, "loss": 0.0638, "step": 15000 }, { "epoch": 0.23037372419614766, "grad_norm": 0.5845738649368286, "learning_rate": 1.9985951850524052e-05, "loss": 0.0595, "step": 15010 }, { "epoch": 0.2305272043588366, "grad_norm": 0.6496204733848572, "learning_rate": 1.9985809553296164e-05, "loss": 0.0478, "step": 15020 }, { "epoch": 0.2306806845215256, "grad_norm": 0.618815541267395, "learning_rate": 1.9985666539524857e-05, "loss": 0.0659, "step": 15030 }, { "epoch": 0.23083416468421455, "grad_norm": 0.6674293279647827, "learning_rate": 1.9985522809220387e-05, "loss": 0.0605, "step": 15040 }, { "epoch": 0.23098764484690354, "grad_norm": 0.4488937556743622, "learning_rate": 1.9985378362393075e-05, "loss": 0.0566, "step": 15050 }, { "epoch": 0.2311411250095925, "grad_norm": 0.7185242176055908, "learning_rate": 1.9985233199053278e-05, "loss": 0.0492, "step": 15060 }, { "epoch": 0.23129460517228148, "grad_norm": 0.6373649835586548, "learning_rate": 1.9985087319211417e-05, "loss": 0.0585, "step": 15070 }, { "epoch": 0.23144808533497047, "grad_norm": 0.835216224193573, "learning_rate": 1.998494072287796e-05, "loss": 0.0537, "step": 15080 }, { "epoch": 0.23160156549765942, "grad_norm": 0.4702787697315216, "learning_rate": 1.998479341006342e-05, "loss": 0.0521, "step": 15090 }, { "epoch": 0.2317550456603484, "grad_norm": 0.4959578812122345, "learning_rate": 1.998464538077838e-05, "loss": 0.0616, "step": 15100 }, { "epoch": 0.23190852582303736, "grad_norm": 0.5908384323120117, "learning_rate": 1.9984496635033452e-05, "loss": 0.0714, "step": 15110 }, { "epoch": 0.23206200598572635, "grad_norm": 0.670657217502594, "learning_rate": 1.9984347172839312e-05, "loss": 0.0651, "step": 15120 }, { "epoch": 0.2322154861484153, "grad_norm": 0.692574679851532, "learning_rate": 1.9984196994206684e-05, "loss": 0.0652, "step": 15130 }, { "epoch": 0.2323689663111043, "grad_norm": 0.4876822531223297, "learning_rate": 1.9984046099146344e-05, "loss": 0.055, "step": 15140 }, { "epoch": 0.23252244647379325, "grad_norm": 0.5166864991188049, "learning_rate": 1.9983894487669123e-05, "loss": 0.0623, "step": 15150 }, { "epoch": 0.23267592663648223, "grad_norm": 0.630760133266449, "learning_rate": 1.99837421597859e-05, "loss": 0.068, "step": 15160 }, { "epoch": 0.23282940679917122, "grad_norm": 0.6000109314918518, "learning_rate": 1.99835891155076e-05, "loss": 0.0665, "step": 15170 }, { "epoch": 0.23298288696186017, "grad_norm": 0.4360281825065613, "learning_rate": 1.9983435354845207e-05, "loss": 0.0508, "step": 15180 }, { "epoch": 0.23313636712454916, "grad_norm": 0.6805937886238098, "learning_rate": 1.9983280877809758e-05, "loss": 0.0617, "step": 15190 }, { "epoch": 0.23328984728723812, "grad_norm": 0.6484940648078918, "learning_rate": 1.9983125684412335e-05, "loss": 0.0527, "step": 15200 }, { "epoch": 0.2334433274499271, "grad_norm": 0.7717311978340149, "learning_rate": 1.9982969774664075e-05, "loss": 0.0675, "step": 15210 }, { "epoch": 0.23359680761261606, "grad_norm": 0.659991443157196, "learning_rate": 1.9982813148576162e-05, "loss": 0.057, "step": 15220 }, { "epoch": 0.23375028777530504, "grad_norm": 0.8836497068405151, "learning_rate": 1.9982655806159838e-05, "loss": 0.0603, "step": 15230 }, { "epoch": 0.233903767937994, "grad_norm": 0.9348165392875671, "learning_rate": 1.9982497747426395e-05, "loss": 0.0728, "step": 15240 }, { "epoch": 0.23405724810068299, "grad_norm": 0.7517918944358826, "learning_rate": 1.9982338972387173e-05, "loss": 0.0723, "step": 15250 }, { "epoch": 0.23421072826337197, "grad_norm": 0.6677565574645996, "learning_rate": 1.998217948105356e-05, "loss": 0.0613, "step": 15260 }, { "epoch": 0.23436420842606093, "grad_norm": 0.5324375629425049, "learning_rate": 1.998201927343701e-05, "loss": 0.0646, "step": 15270 }, { "epoch": 0.2345176885887499, "grad_norm": 0.6642065048217773, "learning_rate": 1.9981858349549014e-05, "loss": 0.0604, "step": 15280 }, { "epoch": 0.23467116875143887, "grad_norm": 0.6062251329421997, "learning_rate": 1.9981696709401116e-05, "loss": 0.0609, "step": 15290 }, { "epoch": 0.23482464891412785, "grad_norm": 0.5816442370414734, "learning_rate": 1.9981534353004923e-05, "loss": 0.0638, "step": 15300 }, { "epoch": 0.2349781290768168, "grad_norm": 0.8123746514320374, "learning_rate": 1.9981371280372075e-05, "loss": 0.061, "step": 15310 }, { "epoch": 0.2351316092395058, "grad_norm": 0.4085613489151001, "learning_rate": 1.998120749151428e-05, "loss": 0.0581, "step": 15320 }, { "epoch": 0.23528508940219475, "grad_norm": 0.6184921860694885, "learning_rate": 1.998104298644329e-05, "loss": 0.0598, "step": 15330 }, { "epoch": 0.23543856956488374, "grad_norm": 0.6159772276878357, "learning_rate": 1.9980877765170907e-05, "loss": 0.0538, "step": 15340 }, { "epoch": 0.23559204972757272, "grad_norm": 0.5771443843841553, "learning_rate": 1.998071182770899e-05, "loss": 0.0703, "step": 15350 }, { "epoch": 0.23574552989026168, "grad_norm": 0.6226953864097595, "learning_rate": 1.9980545174069444e-05, "loss": 0.0552, "step": 15360 }, { "epoch": 0.23589901005295066, "grad_norm": 0.7343754768371582, "learning_rate": 1.9980377804264227e-05, "loss": 0.0678, "step": 15370 }, { "epoch": 0.23605249021563962, "grad_norm": 0.7094153761863708, "learning_rate": 1.9980209718305352e-05, "loss": 0.0723, "step": 15380 }, { "epoch": 0.2362059703783286, "grad_norm": 0.5385090112686157, "learning_rate": 1.9980040916204878e-05, "loss": 0.0498, "step": 15390 }, { "epoch": 0.23635945054101756, "grad_norm": 0.9128662347793579, "learning_rate": 1.9979871397974916e-05, "loss": 0.0547, "step": 15400 }, { "epoch": 0.23651293070370655, "grad_norm": 0.4793127477169037, "learning_rate": 1.9979701163627633e-05, "loss": 0.0669, "step": 15410 }, { "epoch": 0.2366664108663955, "grad_norm": 0.45918160676956177, "learning_rate": 1.9979530213175238e-05, "loss": 0.0556, "step": 15420 }, { "epoch": 0.2368198910290845, "grad_norm": 0.7890644073486328, "learning_rate": 1.9979358546630004e-05, "loss": 0.0666, "step": 15430 }, { "epoch": 0.23697337119177347, "grad_norm": 0.6319273114204407, "learning_rate": 1.997918616400425e-05, "loss": 0.0567, "step": 15440 }, { "epoch": 0.23712685135446243, "grad_norm": 0.8440466523170471, "learning_rate": 1.9979013065310342e-05, "loss": 0.0706, "step": 15450 }, { "epoch": 0.23728033151715142, "grad_norm": 0.6174749135971069, "learning_rate": 1.9978839250560704e-05, "loss": 0.0571, "step": 15460 }, { "epoch": 0.23743381167984037, "grad_norm": 0.5734860301017761, "learning_rate": 1.9978664719767803e-05, "loss": 0.0627, "step": 15470 }, { "epoch": 0.23758729184252936, "grad_norm": 0.8001341819763184, "learning_rate": 1.9978489472944168e-05, "loss": 0.0532, "step": 15480 }, { "epoch": 0.23774077200521831, "grad_norm": 0.5481273531913757, "learning_rate": 1.997831351010237e-05, "loss": 0.0626, "step": 15490 }, { "epoch": 0.2378942521679073, "grad_norm": 0.7094454765319824, "learning_rate": 1.997813683125504e-05, "loss": 0.0578, "step": 15500 }, { "epoch": 0.23804773233059626, "grad_norm": 1.337729573249817, "learning_rate": 1.9977959436414853e-05, "loss": 0.0811, "step": 15510 }, { "epoch": 0.23820121249328524, "grad_norm": 0.7395841479301453, "learning_rate": 1.9977781325594537e-05, "loss": 0.0682, "step": 15520 }, { "epoch": 0.23835469265597423, "grad_norm": 0.5390929579734802, "learning_rate": 1.9977602498806873e-05, "loss": 0.0606, "step": 15530 }, { "epoch": 0.23850817281866318, "grad_norm": 0.6256608366966248, "learning_rate": 1.9977422956064698e-05, "loss": 0.0547, "step": 15540 }, { "epoch": 0.23866165298135217, "grad_norm": 0.6143786311149597, "learning_rate": 1.9977242697380888e-05, "loss": 0.0566, "step": 15550 }, { "epoch": 0.23881513314404113, "grad_norm": 0.7356338500976562, "learning_rate": 1.997706172276838e-05, "loss": 0.0581, "step": 15560 }, { "epoch": 0.2389686133067301, "grad_norm": 0.5745862126350403, "learning_rate": 1.9976880032240165e-05, "loss": 0.0686, "step": 15570 }, { "epoch": 0.23912209346941907, "grad_norm": 0.622582197189331, "learning_rate": 1.9976697625809273e-05, "loss": 0.049, "step": 15580 }, { "epoch": 0.23927557363210805, "grad_norm": 0.5801891684532166, "learning_rate": 1.9976514503488797e-05, "loss": 0.0536, "step": 15590 }, { "epoch": 0.239429053794797, "grad_norm": 0.4257759749889374, "learning_rate": 1.9976330665291877e-05, "loss": 0.0569, "step": 15600 }, { "epoch": 0.239582533957486, "grad_norm": 0.7763046622276306, "learning_rate": 1.9976146111231702e-05, "loss": 0.0628, "step": 15610 }, { "epoch": 0.23973601412017498, "grad_norm": 0.6312134861946106, "learning_rate": 1.9975960841321516e-05, "loss": 0.0661, "step": 15620 }, { "epoch": 0.23988949428286394, "grad_norm": 0.6053335666656494, "learning_rate": 1.9975774855574615e-05, "loss": 0.0628, "step": 15630 }, { "epoch": 0.24004297444555292, "grad_norm": 0.4914205074310303, "learning_rate": 1.9975588154004345e-05, "loss": 0.0546, "step": 15640 }, { "epoch": 0.24019645460824188, "grad_norm": 0.6961408853530884, "learning_rate": 1.99754007366241e-05, "loss": 0.066, "step": 15650 }, { "epoch": 0.24034993477093086, "grad_norm": 0.6752996444702148, "learning_rate": 1.997521260344733e-05, "loss": 0.0612, "step": 15660 }, { "epoch": 0.24050341493361982, "grad_norm": 0.743779718875885, "learning_rate": 1.9975023754487538e-05, "loss": 0.0593, "step": 15670 }, { "epoch": 0.2406568950963088, "grad_norm": 0.6362804174423218, "learning_rate": 1.9974834189758267e-05, "loss": 0.0677, "step": 15680 }, { "epoch": 0.24081037525899776, "grad_norm": 0.6353877782821655, "learning_rate": 1.9974643909273126e-05, "loss": 0.0632, "step": 15690 }, { "epoch": 0.24096385542168675, "grad_norm": 0.5322431325912476, "learning_rate": 1.9974452913045767e-05, "loss": 0.0508, "step": 15700 }, { "epoch": 0.24111733558437573, "grad_norm": 0.8206204771995544, "learning_rate": 1.9974261201089894e-05, "loss": 0.0695, "step": 15710 }, { "epoch": 0.2412708157470647, "grad_norm": 0.630565345287323, "learning_rate": 1.9974068773419267e-05, "loss": 0.0567, "step": 15720 }, { "epoch": 0.24142429590975367, "grad_norm": 0.6915889978408813, "learning_rate": 1.997387563004769e-05, "loss": 0.0677, "step": 15730 }, { "epoch": 0.24157777607244263, "grad_norm": 0.38701170682907104, "learning_rate": 1.9973681770989022e-05, "loss": 0.0615, "step": 15740 }, { "epoch": 0.24173125623513161, "grad_norm": 0.534241795539856, "learning_rate": 1.9973487196257177e-05, "loss": 0.0576, "step": 15750 }, { "epoch": 0.24188473639782057, "grad_norm": 0.6182935237884521, "learning_rate": 1.9973291905866117e-05, "loss": 0.0553, "step": 15760 }, { "epoch": 0.24203821656050956, "grad_norm": 0.5270177721977234, "learning_rate": 1.997309589982985e-05, "loss": 0.0492, "step": 15770 }, { "epoch": 0.2421916967231985, "grad_norm": 0.6028861999511719, "learning_rate": 1.9972899178162447e-05, "loss": 0.0524, "step": 15780 }, { "epoch": 0.2423451768858875, "grad_norm": 0.8850976228713989, "learning_rate": 1.997270174087802e-05, "loss": 0.0571, "step": 15790 }, { "epoch": 0.24249865704857648, "grad_norm": 0.5224472284317017, "learning_rate": 1.9972503587990737e-05, "loss": 0.0619, "step": 15800 }, { "epoch": 0.24265213721126544, "grad_norm": 0.5752559900283813, "learning_rate": 1.9972304719514817e-05, "loss": 0.0666, "step": 15810 }, { "epoch": 0.24280561737395442, "grad_norm": 0.7958666682243347, "learning_rate": 1.997210513546453e-05, "loss": 0.0567, "step": 15820 }, { "epoch": 0.24295909753664338, "grad_norm": 0.6944963335990906, "learning_rate": 1.9971904835854198e-05, "loss": 0.0578, "step": 15830 }, { "epoch": 0.24311257769933237, "grad_norm": 0.6562560796737671, "learning_rate": 1.9971703820698197e-05, "loss": 0.0556, "step": 15840 }, { "epoch": 0.24326605786202132, "grad_norm": 0.5977939367294312, "learning_rate": 1.9971502090010944e-05, "loss": 0.0719, "step": 15850 }, { "epoch": 0.2434195380247103, "grad_norm": 0.49116694927215576, "learning_rate": 1.9971299643806918e-05, "loss": 0.0554, "step": 15860 }, { "epoch": 0.24357301818739927, "grad_norm": 0.4952727258205414, "learning_rate": 1.9971096482100645e-05, "loss": 0.0505, "step": 15870 }, { "epoch": 0.24372649835008825, "grad_norm": 0.6234854459762573, "learning_rate": 1.9970892604906703e-05, "loss": 0.0483, "step": 15880 }, { "epoch": 0.24387997851277723, "grad_norm": 0.5275224447250366, "learning_rate": 1.9970688012239723e-05, "loss": 0.0505, "step": 15890 }, { "epoch": 0.2440334586754662, "grad_norm": 0.5542510747909546, "learning_rate": 1.9970482704114388e-05, "loss": 0.0631, "step": 15900 }, { "epoch": 0.24418693883815518, "grad_norm": 0.6129814386367798, "learning_rate": 1.997027668054542e-05, "loss": 0.0637, "step": 15910 }, { "epoch": 0.24434041900084413, "grad_norm": 0.5965235233306885, "learning_rate": 1.9970069941547617e-05, "loss": 0.0612, "step": 15920 }, { "epoch": 0.24449389916353312, "grad_norm": 0.4932247996330261, "learning_rate": 1.9969862487135803e-05, "loss": 0.0526, "step": 15930 }, { "epoch": 0.24464737932622208, "grad_norm": 0.6552684307098389, "learning_rate": 1.9969654317324868e-05, "loss": 0.057, "step": 15940 }, { "epoch": 0.24480085948891106, "grad_norm": 0.5942704677581787, "learning_rate": 1.9969445432129747e-05, "loss": 0.0637, "step": 15950 }, { "epoch": 0.24495433965160002, "grad_norm": 0.8126421570777893, "learning_rate": 1.9969235831565435e-05, "loss": 0.0492, "step": 15960 }, { "epoch": 0.245107819814289, "grad_norm": 0.7141304016113281, "learning_rate": 1.9969025515646963e-05, "loss": 0.0622, "step": 15970 }, { "epoch": 0.245261299976978, "grad_norm": 0.7302089929580688, "learning_rate": 1.996881448438943e-05, "loss": 0.065, "step": 15980 }, { "epoch": 0.24541478013966694, "grad_norm": 0.7705302238464355, "learning_rate": 1.9968602737807977e-05, "loss": 0.0639, "step": 15990 }, { "epoch": 0.24556826030235593, "grad_norm": 0.6932687163352966, "learning_rate": 1.9968390275917794e-05, "loss": 0.0624, "step": 16000 }, { "epoch": 0.24572174046504489, "grad_norm": 0.6504892110824585, "learning_rate": 1.996817709873413e-05, "loss": 0.0672, "step": 16010 }, { "epoch": 0.24587522062773387, "grad_norm": 0.7078189849853516, "learning_rate": 1.9967963206272284e-05, "loss": 0.053, "step": 16020 }, { "epoch": 0.24602870079042283, "grad_norm": 0.5801723599433899, "learning_rate": 1.9967748598547602e-05, "loss": 0.0558, "step": 16030 }, { "epoch": 0.2461821809531118, "grad_norm": 0.49740904569625854, "learning_rate": 1.9967533275575483e-05, "loss": 0.0466, "step": 16040 }, { "epoch": 0.24633566111580077, "grad_norm": 0.5121061205863953, "learning_rate": 1.9967317237371376e-05, "loss": 0.0592, "step": 16050 }, { "epoch": 0.24648914127848975, "grad_norm": 0.49589163064956665, "learning_rate": 1.9967100483950784e-05, "loss": 0.0605, "step": 16060 }, { "epoch": 0.24664262144117874, "grad_norm": 0.7104998230934143, "learning_rate": 1.996688301532926e-05, "loss": 0.0545, "step": 16070 }, { "epoch": 0.2467961016038677, "grad_norm": 0.7285618782043457, "learning_rate": 1.9966664831522417e-05, "loss": 0.0692, "step": 16080 }, { "epoch": 0.24694958176655668, "grad_norm": 0.6806405782699585, "learning_rate": 1.9966445932545896e-05, "loss": 0.0596, "step": 16090 }, { "epoch": 0.24710306192924564, "grad_norm": 0.5734701156616211, "learning_rate": 1.996622631841542e-05, "loss": 0.0562, "step": 16100 }, { "epoch": 0.24725654209193462, "grad_norm": 0.690426230430603, "learning_rate": 1.9966005989146734e-05, "loss": 0.0639, "step": 16110 }, { "epoch": 0.24741002225462358, "grad_norm": 0.5872116088867188, "learning_rate": 1.9965784944755654e-05, "loss": 0.064, "step": 16120 }, { "epoch": 0.24756350241731256, "grad_norm": 0.5925194025039673, "learning_rate": 1.9965563185258045e-05, "loss": 0.0425, "step": 16130 }, { "epoch": 0.24771698258000152, "grad_norm": 0.6132639646530151, "learning_rate": 1.996534071066981e-05, "loss": 0.0446, "step": 16140 }, { "epoch": 0.2478704627426905, "grad_norm": 0.5689213871955872, "learning_rate": 1.9965117521006926e-05, "loss": 0.0567, "step": 16150 }, { "epoch": 0.2480239429053795, "grad_norm": 0.5109143853187561, "learning_rate": 1.9964893616285397e-05, "loss": 0.0504, "step": 16160 }, { "epoch": 0.24817742306806845, "grad_norm": 0.7691954970359802, "learning_rate": 1.9964668996521294e-05, "loss": 0.0626, "step": 16170 }, { "epoch": 0.24833090323075743, "grad_norm": 0.805292010307312, "learning_rate": 1.9964443661730733e-05, "loss": 0.0608, "step": 16180 }, { "epoch": 0.2484843833934464, "grad_norm": 0.6602947115898132, "learning_rate": 1.9964217611929887e-05, "loss": 0.0629, "step": 16190 }, { "epoch": 0.24863786355613537, "grad_norm": 0.5386625528335571, "learning_rate": 1.9963990847134974e-05, "loss": 0.064, "step": 16200 }, { "epoch": 0.24879134371882433, "grad_norm": 1.0194251537322998, "learning_rate": 1.9963763367362264e-05, "loss": 0.061, "step": 16210 }, { "epoch": 0.24894482388151332, "grad_norm": 0.5618758797645569, "learning_rate": 1.9963535172628085e-05, "loss": 0.061, "step": 16220 }, { "epoch": 0.24909830404420227, "grad_norm": 0.8818707466125488, "learning_rate": 1.9963306262948804e-05, "loss": 0.0513, "step": 16230 }, { "epoch": 0.24925178420689126, "grad_norm": 0.7570414543151855, "learning_rate": 1.9963076638340854e-05, "loss": 0.0511, "step": 16240 }, { "epoch": 0.24940526436958024, "grad_norm": 0.524884045124054, "learning_rate": 1.996284629882071e-05, "loss": 0.0507, "step": 16250 }, { "epoch": 0.2495587445322692, "grad_norm": 0.539867103099823, "learning_rate": 1.9962615244404894e-05, "loss": 0.0576, "step": 16260 }, { "epoch": 0.24971222469495818, "grad_norm": 0.46810466051101685, "learning_rate": 1.9962383475109997e-05, "loss": 0.0629, "step": 16270 }, { "epoch": 0.24986570485764714, "grad_norm": 0.6328993439674377, "learning_rate": 1.996215099095264e-05, "loss": 0.0598, "step": 16280 }, { "epoch": 0.2500191850203361, "grad_norm": 0.6169353127479553, "learning_rate": 1.996191779194951e-05, "loss": 0.0566, "step": 16290 }, { "epoch": 0.2501726651830251, "grad_norm": 0.6910533905029297, "learning_rate": 1.9961683878117338e-05, "loss": 0.0487, "step": 16300 }, { "epoch": 0.25032614534571407, "grad_norm": 0.6535077691078186, "learning_rate": 1.996144924947291e-05, "loss": 0.0619, "step": 16310 }, { "epoch": 0.250479625508403, "grad_norm": 0.5769293308258057, "learning_rate": 1.9961213906033065e-05, "loss": 0.0564, "step": 16320 }, { "epoch": 0.25063310567109204, "grad_norm": 0.6339013576507568, "learning_rate": 1.9960977847814685e-05, "loss": 0.0778, "step": 16330 }, { "epoch": 0.250786585833781, "grad_norm": 0.772063672542572, "learning_rate": 1.9960741074834713e-05, "loss": 0.0635, "step": 16340 }, { "epoch": 0.25094006599646995, "grad_norm": 0.554348349571228, "learning_rate": 1.9960503587110135e-05, "loss": 0.0632, "step": 16350 }, { "epoch": 0.2510935461591589, "grad_norm": 0.6697093844413757, "learning_rate": 1.9960265384657994e-05, "loss": 0.0717, "step": 16360 }, { "epoch": 0.2512470263218479, "grad_norm": 0.7311115860939026, "learning_rate": 1.9960026467495386e-05, "loss": 0.0587, "step": 16370 }, { "epoch": 0.2514005064845369, "grad_norm": 0.5088156461715698, "learning_rate": 1.995978683563945e-05, "loss": 0.0504, "step": 16380 }, { "epoch": 0.25155398664722584, "grad_norm": 0.8545660972595215, "learning_rate": 1.995954648910738e-05, "loss": 0.0518, "step": 16390 }, { "epoch": 0.2517074668099148, "grad_norm": 0.5819445252418518, "learning_rate": 1.9959305427916428e-05, "loss": 0.0556, "step": 16400 }, { "epoch": 0.2518609469726038, "grad_norm": 0.7484441995620728, "learning_rate": 1.995906365208389e-05, "loss": 0.0548, "step": 16410 }, { "epoch": 0.25201442713529276, "grad_norm": 0.6420010924339294, "learning_rate": 1.995882116162711e-05, "loss": 0.0637, "step": 16420 }, { "epoch": 0.2521679072979817, "grad_norm": 0.4836702346801758, "learning_rate": 1.9958577956563492e-05, "loss": 0.0484, "step": 16430 }, { "epoch": 0.25232138746067073, "grad_norm": 0.5415528416633606, "learning_rate": 1.995833403691049e-05, "loss": 0.0453, "step": 16440 }, { "epoch": 0.2524748676233597, "grad_norm": 0.521331787109375, "learning_rate": 1.99580894026856e-05, "loss": 0.0602, "step": 16450 }, { "epoch": 0.25262834778604865, "grad_norm": 0.5374608039855957, "learning_rate": 1.995784405390638e-05, "loss": 0.0527, "step": 16460 }, { "epoch": 0.2527818279487376, "grad_norm": 0.6850093603134155, "learning_rate": 1.9957597990590438e-05, "loss": 0.0634, "step": 16470 }, { "epoch": 0.2529353081114266, "grad_norm": 0.4682241380214691, "learning_rate": 1.9957351212755424e-05, "loss": 0.0638, "step": 16480 }, { "epoch": 0.2530887882741156, "grad_norm": 0.645840585231781, "learning_rate": 1.9957103720419052e-05, "loss": 0.0466, "step": 16490 }, { "epoch": 0.25324226843680453, "grad_norm": 0.7479425668716431, "learning_rate": 1.995685551359908e-05, "loss": 0.0603, "step": 16500 }, { "epoch": 0.25339574859949354, "grad_norm": 0.4352776110172272, "learning_rate": 1.9956606592313316e-05, "loss": 0.0522, "step": 16510 }, { "epoch": 0.2535492287621825, "grad_norm": 0.38905972242355347, "learning_rate": 1.9956356956579624e-05, "loss": 0.0485, "step": 16520 }, { "epoch": 0.25370270892487146, "grad_norm": 0.7980170845985413, "learning_rate": 1.995610660641591e-05, "loss": 0.0651, "step": 16530 }, { "epoch": 0.2538561890875604, "grad_norm": 0.46143394708633423, "learning_rate": 1.995585554184015e-05, "loss": 0.0623, "step": 16540 }, { "epoch": 0.2540096692502494, "grad_norm": 0.4717766344547272, "learning_rate": 1.995560376287035e-05, "loss": 0.0573, "step": 16550 }, { "epoch": 0.2541631494129384, "grad_norm": 0.48673465847969055, "learning_rate": 1.9955351269524584e-05, "loss": 0.0684, "step": 16560 }, { "epoch": 0.25431662957562734, "grad_norm": 0.9165570139884949, "learning_rate": 1.995509806182096e-05, "loss": 0.0666, "step": 16570 }, { "epoch": 0.2544701097383163, "grad_norm": 0.6778225898742676, "learning_rate": 1.9954844139777656e-05, "loss": 0.0633, "step": 16580 }, { "epoch": 0.2546235899010053, "grad_norm": 0.5646042823791504, "learning_rate": 1.995458950341289e-05, "loss": 0.0579, "step": 16590 }, { "epoch": 0.25477707006369427, "grad_norm": 0.6401301026344299, "learning_rate": 1.995433415274493e-05, "loss": 0.0623, "step": 16600 }, { "epoch": 0.2549305502263832, "grad_norm": 0.5574002861976624, "learning_rate": 1.9954078087792104e-05, "loss": 0.0656, "step": 16610 }, { "epoch": 0.25508403038907224, "grad_norm": 0.6800645589828491, "learning_rate": 1.9953821308572787e-05, "loss": 0.0599, "step": 16620 }, { "epoch": 0.2552375105517612, "grad_norm": 0.5408307313919067, "learning_rate": 1.99535638151054e-05, "loss": 0.0602, "step": 16630 }, { "epoch": 0.25539099071445015, "grad_norm": 0.5185867547988892, "learning_rate": 1.995330560740842e-05, "loss": 0.0527, "step": 16640 }, { "epoch": 0.2555444708771391, "grad_norm": 0.6583004593849182, "learning_rate": 1.995304668550038e-05, "loss": 0.0561, "step": 16650 }, { "epoch": 0.2556979510398281, "grad_norm": 0.5320488214492798, "learning_rate": 1.9952787049399853e-05, "loss": 0.0593, "step": 16660 }, { "epoch": 0.2558514312025171, "grad_norm": 0.7131128311157227, "learning_rate": 1.9952526699125474e-05, "loss": 0.0687, "step": 16670 }, { "epoch": 0.25600491136520603, "grad_norm": 0.6757879257202148, "learning_rate": 1.9952265634695922e-05, "loss": 0.046, "step": 16680 }, { "epoch": 0.25615839152789505, "grad_norm": 0.7038201689720154, "learning_rate": 1.9952003856129935e-05, "loss": 0.0581, "step": 16690 }, { "epoch": 0.256311871690584, "grad_norm": 0.6197981834411621, "learning_rate": 1.9951741363446286e-05, "loss": 0.068, "step": 16700 }, { "epoch": 0.25646535185327296, "grad_norm": 0.7095482349395752, "learning_rate": 1.9951478156663823e-05, "loss": 0.0704, "step": 16710 }, { "epoch": 0.2566188320159619, "grad_norm": 0.6939765810966492, "learning_rate": 1.9951214235801428e-05, "loss": 0.0641, "step": 16720 }, { "epoch": 0.25677231217865093, "grad_norm": 0.506122887134552, "learning_rate": 1.9950949600878037e-05, "loss": 0.0546, "step": 16730 }, { "epoch": 0.2569257923413399, "grad_norm": 0.5472978353500366, "learning_rate": 1.9950684251912638e-05, "loss": 0.0439, "step": 16740 }, { "epoch": 0.25707927250402884, "grad_norm": 0.5151472091674805, "learning_rate": 1.995041818892428e-05, "loss": 0.0625, "step": 16750 }, { "epoch": 0.2572327526667178, "grad_norm": 0.5450022220611572, "learning_rate": 1.9950151411932045e-05, "loss": 0.061, "step": 16760 }, { "epoch": 0.2573862328294068, "grad_norm": 1.133263111114502, "learning_rate": 1.9949883920955078e-05, "loss": 0.055, "step": 16770 }, { "epoch": 0.25753971299209577, "grad_norm": 1.072533130645752, "learning_rate": 1.994961571601258e-05, "loss": 0.0637, "step": 16780 }, { "epoch": 0.2576931931547847, "grad_norm": 0.528658390045166, "learning_rate": 1.994934679712379e-05, "loss": 0.0484, "step": 16790 }, { "epoch": 0.25784667331747374, "grad_norm": 0.5089109539985657, "learning_rate": 1.9949077164308002e-05, "loss": 0.0646, "step": 16800 }, { "epoch": 0.2580001534801627, "grad_norm": 0.6068509221076965, "learning_rate": 1.994880681758457e-05, "loss": 0.0596, "step": 16810 }, { "epoch": 0.25815363364285165, "grad_norm": 0.46313604712486267, "learning_rate": 1.994853575697289e-05, "loss": 0.0557, "step": 16820 }, { "epoch": 0.2583071138055406, "grad_norm": 0.6760433316230774, "learning_rate": 1.9948263982492413e-05, "loss": 0.0582, "step": 16830 }, { "epoch": 0.2584605939682296, "grad_norm": 0.7461320161819458, "learning_rate": 1.9947991494162638e-05, "loss": 0.058, "step": 16840 }, { "epoch": 0.2586140741309186, "grad_norm": 0.8569831252098083, "learning_rate": 1.9947718292003128e-05, "loss": 0.0522, "step": 16850 }, { "epoch": 0.25876755429360754, "grad_norm": 0.676324725151062, "learning_rate": 1.994744437603347e-05, "loss": 0.0503, "step": 16860 }, { "epoch": 0.25892103445629655, "grad_norm": 0.6947622299194336, "learning_rate": 1.9947169746273333e-05, "loss": 0.0559, "step": 16870 }, { "epoch": 0.2590745146189855, "grad_norm": 0.6806246042251587, "learning_rate": 1.994689440274242e-05, "loss": 0.0546, "step": 16880 }, { "epoch": 0.25922799478167446, "grad_norm": 0.5672016739845276, "learning_rate": 1.9946618345460482e-05, "loss": 0.0548, "step": 16890 }, { "epoch": 0.2593814749443634, "grad_norm": 0.5777589678764343, "learning_rate": 1.9946341574447334e-05, "loss": 0.0512, "step": 16900 }, { "epoch": 0.25953495510705243, "grad_norm": 0.6472449898719788, "learning_rate": 1.994606408972284e-05, "loss": 0.0628, "step": 16910 }, { "epoch": 0.2596884352697414, "grad_norm": 0.552078366279602, "learning_rate": 1.99457858913069e-05, "loss": 0.0458, "step": 16920 }, { "epoch": 0.25984191543243035, "grad_norm": 0.3471072018146515, "learning_rate": 1.9945506979219486e-05, "loss": 0.0485, "step": 16930 }, { "epoch": 0.2599953955951193, "grad_norm": 0.6280314326286316, "learning_rate": 1.994522735348061e-05, "loss": 0.0585, "step": 16940 }, { "epoch": 0.2601488757578083, "grad_norm": 0.45091211795806885, "learning_rate": 1.994494701411033e-05, "loss": 0.0479, "step": 16950 }, { "epoch": 0.2603023559204973, "grad_norm": 0.5338189005851746, "learning_rate": 1.994466596112877e-05, "loss": 0.0578, "step": 16960 }, { "epoch": 0.26045583608318623, "grad_norm": 0.38086119294166565, "learning_rate": 1.9944384194556093e-05, "loss": 0.0616, "step": 16970 }, { "epoch": 0.26060931624587524, "grad_norm": 0.5906543135643005, "learning_rate": 1.994410171441252e-05, "loss": 0.0518, "step": 16980 }, { "epoch": 0.2607627964085642, "grad_norm": 0.8759128451347351, "learning_rate": 1.9943818520718322e-05, "loss": 0.0567, "step": 16990 }, { "epoch": 0.26091627657125316, "grad_norm": 0.33309993147850037, "learning_rate": 1.9943534613493817e-05, "loss": 0.0534, "step": 17000 }, { "epoch": 0.2610697567339421, "grad_norm": 0.6971983313560486, "learning_rate": 1.994324999275938e-05, "loss": 0.0723, "step": 17010 }, { "epoch": 0.26122323689663113, "grad_norm": 0.6488213539123535, "learning_rate": 1.9942964658535428e-05, "loss": 0.0632, "step": 17020 }, { "epoch": 0.2613767170593201, "grad_norm": 0.5919948816299438, "learning_rate": 1.994267861084244e-05, "loss": 0.0655, "step": 17030 }, { "epoch": 0.26153019722200904, "grad_norm": 0.9309395551681519, "learning_rate": 1.994239184970094e-05, "loss": 0.0617, "step": 17040 }, { "epoch": 0.26168367738469805, "grad_norm": 0.515339732170105, "learning_rate": 1.994210437513151e-05, "loss": 0.0538, "step": 17050 }, { "epoch": 0.261837157547387, "grad_norm": 0.7552432417869568, "learning_rate": 1.9941816187154772e-05, "loss": 0.0662, "step": 17060 }, { "epoch": 0.26199063771007597, "grad_norm": 0.5219020247459412, "learning_rate": 1.994152728579141e-05, "loss": 0.0454, "step": 17070 }, { "epoch": 0.2621441178727649, "grad_norm": 0.7316927313804626, "learning_rate": 1.9941237671062147e-05, "loss": 0.0669, "step": 17080 }, { "epoch": 0.26229759803545394, "grad_norm": 0.5827465057373047, "learning_rate": 1.9940947342987773e-05, "loss": 0.0639, "step": 17090 }, { "epoch": 0.2624510781981429, "grad_norm": 0.5427932143211365, "learning_rate": 1.9940656301589117e-05, "loss": 0.0487, "step": 17100 }, { "epoch": 0.26260455836083185, "grad_norm": 0.5932332277297974, "learning_rate": 1.9940364546887064e-05, "loss": 0.0579, "step": 17110 }, { "epoch": 0.2627580385235208, "grad_norm": 0.664691686630249, "learning_rate": 1.9940072078902545e-05, "loss": 0.0609, "step": 17120 }, { "epoch": 0.2629115186862098, "grad_norm": 0.7124509811401367, "learning_rate": 1.993977889765655e-05, "loss": 0.055, "step": 17130 }, { "epoch": 0.2630649988488988, "grad_norm": 0.6500306129455566, "learning_rate": 1.9939485003170123e-05, "loss": 0.0529, "step": 17140 }, { "epoch": 0.26321847901158774, "grad_norm": 0.5669024586677551, "learning_rate": 1.993919039546434e-05, "loss": 0.0685, "step": 17150 }, { "epoch": 0.26337195917427675, "grad_norm": 0.5719018578529358, "learning_rate": 1.9938895074560352e-05, "loss": 0.0521, "step": 17160 }, { "epoch": 0.2635254393369657, "grad_norm": 0.7322549819946289, "learning_rate": 1.9938599040479342e-05, "loss": 0.054, "step": 17170 }, { "epoch": 0.26367891949965466, "grad_norm": 0.6158004403114319, "learning_rate": 1.9938302293242556e-05, "loss": 0.0618, "step": 17180 }, { "epoch": 0.2638323996623436, "grad_norm": 0.36046725511550903, "learning_rate": 1.993800483287129e-05, "loss": 0.0705, "step": 17190 }, { "epoch": 0.26398587982503263, "grad_norm": 0.4881291687488556, "learning_rate": 1.993770665938688e-05, "loss": 0.0597, "step": 17200 }, { "epoch": 0.2641393599877216, "grad_norm": 0.4262724220752716, "learning_rate": 1.9937407772810732e-05, "loss": 0.06, "step": 17210 }, { "epoch": 0.26429284015041055, "grad_norm": 0.7843614220619202, "learning_rate": 1.9937108173164286e-05, "loss": 0.0702, "step": 17220 }, { "epoch": 0.26444632031309956, "grad_norm": 0.5102298259735107, "learning_rate": 1.9936807860469042e-05, "loss": 0.0505, "step": 17230 }, { "epoch": 0.2645998004757885, "grad_norm": 0.6006859540939331, "learning_rate": 1.9936506834746552e-05, "loss": 0.0573, "step": 17240 }, { "epoch": 0.2647532806384775, "grad_norm": 0.5691327452659607, "learning_rate": 1.9936205096018412e-05, "loss": 0.0705, "step": 17250 }, { "epoch": 0.26490676080116643, "grad_norm": 0.5173113346099854, "learning_rate": 1.993590264430628e-05, "loss": 0.0606, "step": 17260 }, { "epoch": 0.26506024096385544, "grad_norm": 0.6122280955314636, "learning_rate": 1.993559947963185e-05, "loss": 0.0523, "step": 17270 }, { "epoch": 0.2652137211265444, "grad_norm": 0.6800746917724609, "learning_rate": 1.9935295602016885e-05, "loss": 0.0549, "step": 17280 }, { "epoch": 0.26536720128923336, "grad_norm": 0.5456201434135437, "learning_rate": 1.9934991011483182e-05, "loss": 0.056, "step": 17290 }, { "epoch": 0.2655206814519223, "grad_norm": 0.5853371024131775, "learning_rate": 1.99346857080526e-05, "loss": 0.0499, "step": 17300 }, { "epoch": 0.2656741616146113, "grad_norm": 0.5816302299499512, "learning_rate": 1.993437969174705e-05, "loss": 0.0492, "step": 17310 }, { "epoch": 0.2658276417773003, "grad_norm": 0.7429758310317993, "learning_rate": 1.9934072962588488e-05, "loss": 0.0583, "step": 17320 }, { "epoch": 0.26598112193998924, "grad_norm": 0.3888881206512451, "learning_rate": 1.993376552059892e-05, "loss": 0.0487, "step": 17330 }, { "epoch": 0.26613460210267825, "grad_norm": 0.5249912738800049, "learning_rate": 1.9933457365800415e-05, "loss": 0.056, "step": 17340 }, { "epoch": 0.2662880822653672, "grad_norm": 0.5874505043029785, "learning_rate": 1.9933148498215078e-05, "loss": 0.0601, "step": 17350 }, { "epoch": 0.26644156242805617, "grad_norm": 0.546979546546936, "learning_rate": 1.9932838917865075e-05, "loss": 0.0425, "step": 17360 }, { "epoch": 0.2665950425907451, "grad_norm": 0.6487125754356384, "learning_rate": 1.993252862477262e-05, "loss": 0.0623, "step": 17370 }, { "epoch": 0.26674852275343414, "grad_norm": 0.5102443099021912, "learning_rate": 1.9932217618959977e-05, "loss": 0.0553, "step": 17380 }, { "epoch": 0.2669020029161231, "grad_norm": 0.41881120204925537, "learning_rate": 1.9931905900449466e-05, "loss": 0.0521, "step": 17390 }, { "epoch": 0.26705548307881205, "grad_norm": 0.4140091836452484, "learning_rate": 1.993159346926345e-05, "loss": 0.054, "step": 17400 }, { "epoch": 0.26720896324150106, "grad_norm": 0.649787962436676, "learning_rate": 1.9931280325424355e-05, "loss": 0.0604, "step": 17410 }, { "epoch": 0.26736244340419, "grad_norm": 0.466367244720459, "learning_rate": 1.9930966468954644e-05, "loss": 0.0518, "step": 17420 }, { "epoch": 0.267515923566879, "grad_norm": 0.5844457745552063, "learning_rate": 1.993065189987684e-05, "loss": 0.0561, "step": 17430 }, { "epoch": 0.26766940372956793, "grad_norm": 0.5661511421203613, "learning_rate": 1.993033661821352e-05, "loss": 0.0528, "step": 17440 }, { "epoch": 0.26782288389225695, "grad_norm": 0.7046656012535095, "learning_rate": 1.9930020623987297e-05, "loss": 0.0555, "step": 17450 }, { "epoch": 0.2679763640549459, "grad_norm": 0.8353971242904663, "learning_rate": 1.9929703917220853e-05, "loss": 0.0615, "step": 17460 }, { "epoch": 0.26812984421763486, "grad_norm": 0.6019846796989441, "learning_rate": 1.9929386497936915e-05, "loss": 0.0689, "step": 17470 }, { "epoch": 0.2682833243803238, "grad_norm": 0.41746413707733154, "learning_rate": 1.9929068366158256e-05, "loss": 0.0542, "step": 17480 }, { "epoch": 0.26843680454301283, "grad_norm": 0.50107342004776, "learning_rate": 1.9928749521907704e-05, "loss": 0.0603, "step": 17490 }, { "epoch": 0.2685902847057018, "grad_norm": 0.6067327260971069, "learning_rate": 1.992842996520814e-05, "loss": 0.0526, "step": 17500 }, { "epoch": 0.26874376486839074, "grad_norm": 0.5728759765625, "learning_rate": 1.9928109696082495e-05, "loss": 0.0544, "step": 17510 }, { "epoch": 0.26889724503107976, "grad_norm": 0.504878580570221, "learning_rate": 1.992778871455375e-05, "loss": 0.0519, "step": 17520 }, { "epoch": 0.2690507251937687, "grad_norm": 0.5762083530426025, "learning_rate": 1.9927467020644932e-05, "loss": 0.0693, "step": 17530 }, { "epoch": 0.26920420535645767, "grad_norm": 0.577182948589325, "learning_rate": 1.9927144614379134e-05, "loss": 0.0613, "step": 17540 }, { "epoch": 0.26935768551914663, "grad_norm": 0.4712943434715271, "learning_rate": 1.992682149577948e-05, "loss": 0.0582, "step": 17550 }, { "epoch": 0.26951116568183564, "grad_norm": 0.5846714377403259, "learning_rate": 1.9926497664869166e-05, "loss": 0.0482, "step": 17560 }, { "epoch": 0.2696646458445246, "grad_norm": 0.5534276962280273, "learning_rate": 1.9926173121671416e-05, "loss": 0.055, "step": 17570 }, { "epoch": 0.26981812600721355, "grad_norm": 0.5168551802635193, "learning_rate": 1.9925847866209533e-05, "loss": 0.0668, "step": 17580 }, { "epoch": 0.26997160616990257, "grad_norm": 0.5518394708633423, "learning_rate": 1.9925521898506846e-05, "loss": 0.0613, "step": 17590 }, { "epoch": 0.2701250863325915, "grad_norm": 0.5161780714988708, "learning_rate": 1.992519521858675e-05, "loss": 0.0462, "step": 17600 }, { "epoch": 0.2702785664952805, "grad_norm": 0.678541362285614, "learning_rate": 1.9924867826472685e-05, "loss": 0.0598, "step": 17610 }, { "epoch": 0.27043204665796944, "grad_norm": 0.6186378002166748, "learning_rate": 1.992453972218814e-05, "loss": 0.0521, "step": 17620 }, { "epoch": 0.27058552682065845, "grad_norm": 0.4884982705116272, "learning_rate": 1.9924210905756663e-05, "loss": 0.0565, "step": 17630 }, { "epoch": 0.2707390069833474, "grad_norm": 0.4951339066028595, "learning_rate": 1.9923881377201846e-05, "loss": 0.0518, "step": 17640 }, { "epoch": 0.27089248714603636, "grad_norm": 0.49602967500686646, "learning_rate": 1.9923551136547338e-05, "loss": 0.0478, "step": 17650 }, { "epoch": 0.2710459673087253, "grad_norm": 0.6483648419380188, "learning_rate": 1.9923220183816833e-05, "loss": 0.0647, "step": 17660 }, { "epoch": 0.27119944747141433, "grad_norm": 0.6897918581962585, "learning_rate": 1.9922888519034074e-05, "loss": 0.0594, "step": 17670 }, { "epoch": 0.2713529276341033, "grad_norm": 0.4817480146884918, "learning_rate": 1.992255614222287e-05, "loss": 0.0578, "step": 17680 }, { "epoch": 0.27150640779679225, "grad_norm": 0.596951961517334, "learning_rate": 1.9922223053407067e-05, "loss": 0.06, "step": 17690 }, { "epoch": 0.27165988795948126, "grad_norm": 0.474657267332077, "learning_rate": 1.9921889252610564e-05, "loss": 0.0643, "step": 17700 }, { "epoch": 0.2718133681221702, "grad_norm": 0.678950846195221, "learning_rate": 1.9921554739857316e-05, "loss": 0.0523, "step": 17710 }, { "epoch": 0.2719668482848592, "grad_norm": 0.6127526760101318, "learning_rate": 1.9921219515171324e-05, "loss": 0.062, "step": 17720 }, { "epoch": 0.27212032844754813, "grad_norm": 0.6097144484519958, "learning_rate": 1.9920883578576644e-05, "loss": 0.053, "step": 17730 }, { "epoch": 0.27227380861023714, "grad_norm": 0.5242661833763123, "learning_rate": 1.992054693009738e-05, "loss": 0.053, "step": 17740 }, { "epoch": 0.2724272887729261, "grad_norm": 0.6768051981925964, "learning_rate": 1.9920209569757695e-05, "loss": 0.0577, "step": 17750 }, { "epoch": 0.27258076893561506, "grad_norm": 0.9155818223953247, "learning_rate": 1.991987149758179e-05, "loss": 0.051, "step": 17760 }, { "epoch": 0.27273424909830407, "grad_norm": 0.7093937993049622, "learning_rate": 1.991953271359392e-05, "loss": 0.0653, "step": 17770 }, { "epoch": 0.27288772926099303, "grad_norm": 0.5527374744415283, "learning_rate": 1.9919193217818403e-05, "loss": 0.0556, "step": 17780 }, { "epoch": 0.273041209423682, "grad_norm": 0.4461464285850525, "learning_rate": 1.99188530102796e-05, "loss": 0.0498, "step": 17790 }, { "epoch": 0.27319468958637094, "grad_norm": 0.5675284266471863, "learning_rate": 1.9918512091001915e-05, "loss": 0.0579, "step": 17800 }, { "epoch": 0.27334816974905995, "grad_norm": 0.8775984048843384, "learning_rate": 1.991817046000982e-05, "loss": 0.0714, "step": 17810 }, { "epoch": 0.2735016499117489, "grad_norm": 0.6837757229804993, "learning_rate": 1.9917828117327823e-05, "loss": 0.0493, "step": 17820 }, { "epoch": 0.27365513007443787, "grad_norm": 0.6623613834381104, "learning_rate": 1.9917485062980494e-05, "loss": 0.0517, "step": 17830 }, { "epoch": 0.2738086102371268, "grad_norm": 0.7672053575515747, "learning_rate": 1.9917141296992443e-05, "loss": 0.0638, "step": 17840 }, { "epoch": 0.27396209039981584, "grad_norm": 0.7183113694190979, "learning_rate": 1.991679681938834e-05, "loss": 0.066, "step": 17850 }, { "epoch": 0.2741155705625048, "grad_norm": 0.716980516910553, "learning_rate": 1.991645163019291e-05, "loss": 0.0545, "step": 17860 }, { "epoch": 0.27426905072519375, "grad_norm": 0.7561107873916626, "learning_rate": 1.9916105729430914e-05, "loss": 0.0482, "step": 17870 }, { "epoch": 0.27442253088788277, "grad_norm": 0.4977521598339081, "learning_rate": 1.9915759117127175e-05, "loss": 0.0469, "step": 17880 }, { "epoch": 0.2745760110505717, "grad_norm": 0.4748601019382477, "learning_rate": 1.9915411793306565e-05, "loss": 0.0508, "step": 17890 }, { "epoch": 0.2747294912132607, "grad_norm": 0.5073931813240051, "learning_rate": 1.991506375799401e-05, "loss": 0.0575, "step": 17900 }, { "epoch": 0.27488297137594964, "grad_norm": 0.45444774627685547, "learning_rate": 1.9914715011214474e-05, "loss": 0.0554, "step": 17910 }, { "epoch": 0.27503645153863865, "grad_norm": 0.585658848285675, "learning_rate": 1.9914365552992993e-05, "loss": 0.0524, "step": 17920 }, { "epoch": 0.2751899317013276, "grad_norm": 0.5707097053527832, "learning_rate": 1.9914015383354634e-05, "loss": 0.0605, "step": 17930 }, { "epoch": 0.27534341186401656, "grad_norm": 0.5282866954803467, "learning_rate": 1.991366450232453e-05, "loss": 0.0575, "step": 17940 }, { "epoch": 0.2754968920267056, "grad_norm": 0.5813442468643188, "learning_rate": 1.9913312909927855e-05, "loss": 0.0478, "step": 17950 }, { "epoch": 0.27565037218939453, "grad_norm": 0.6073573231697083, "learning_rate": 1.991296060618984e-05, "loss": 0.0511, "step": 17960 }, { "epoch": 0.2758038523520835, "grad_norm": 0.7223449349403381, "learning_rate": 1.9912607591135765e-05, "loss": 0.0539, "step": 17970 }, { "epoch": 0.27595733251477245, "grad_norm": 0.6439905762672424, "learning_rate": 1.991225386479096e-05, "loss": 0.0641, "step": 17980 }, { "epoch": 0.27611081267746146, "grad_norm": 0.5944869518280029, "learning_rate": 1.9911899427180808e-05, "loss": 0.054, "step": 17990 }, { "epoch": 0.2762642928401504, "grad_norm": 0.4382035732269287, "learning_rate": 1.9911544278330738e-05, "loss": 0.0553, "step": 18000 }, { "epoch": 0.2764177730028394, "grad_norm": 0.5948370695114136, "learning_rate": 1.9911188418266242e-05, "loss": 0.0517, "step": 18010 }, { "epoch": 0.27657125316552833, "grad_norm": 0.6575189232826233, "learning_rate": 1.991083184701285e-05, "loss": 0.0597, "step": 18020 }, { "epoch": 0.27672473332821734, "grad_norm": 0.6772049069404602, "learning_rate": 1.9910474564596148e-05, "loss": 0.0531, "step": 18030 }, { "epoch": 0.2768782134909063, "grad_norm": 0.5445865988731384, "learning_rate": 1.9910116571041774e-05, "loss": 0.0463, "step": 18040 }, { "epoch": 0.27703169365359526, "grad_norm": 0.8573126792907715, "learning_rate": 1.9909757866375415e-05, "loss": 0.0565, "step": 18050 }, { "epoch": 0.27718517381628427, "grad_norm": 0.640453577041626, "learning_rate": 1.9909398450622816e-05, "loss": 0.0481, "step": 18060 }, { "epoch": 0.2773386539789732, "grad_norm": 0.8108947277069092, "learning_rate": 1.9909038323809758e-05, "loss": 0.0493, "step": 18070 }, { "epoch": 0.2774921341416622, "grad_norm": 0.6723916530609131, "learning_rate": 1.9908677485962092e-05, "loss": 0.055, "step": 18080 }, { "epoch": 0.27764561430435114, "grad_norm": 0.535087525844574, "learning_rate": 1.99083159371057e-05, "loss": 0.0641, "step": 18090 }, { "epoch": 0.27779909446704015, "grad_norm": 0.4893547594547272, "learning_rate": 1.9907953677266536e-05, "loss": 0.0456, "step": 18100 }, { "epoch": 0.2779525746297291, "grad_norm": 0.7199770212173462, "learning_rate": 1.990759070647059e-05, "loss": 0.0599, "step": 18110 }, { "epoch": 0.27810605479241807, "grad_norm": 0.6659570932388306, "learning_rate": 1.99072270247439e-05, "loss": 0.0542, "step": 18120 }, { "epoch": 0.2782595349551071, "grad_norm": 0.44311046600341797, "learning_rate": 1.9906862632112577e-05, "loss": 0.0483, "step": 18130 }, { "epoch": 0.27841301511779604, "grad_norm": 0.6272259950637817, "learning_rate": 1.9906497528602754e-05, "loss": 0.0627, "step": 18140 }, { "epoch": 0.278566495280485, "grad_norm": 0.7795793414115906, "learning_rate": 1.990613171424064e-05, "loss": 0.0702, "step": 18150 }, { "epoch": 0.27871997544317395, "grad_norm": 0.46317967772483826, "learning_rate": 1.990576518905248e-05, "loss": 0.0536, "step": 18160 }, { "epoch": 0.27887345560586296, "grad_norm": 0.4992486238479614, "learning_rate": 1.9905397953064575e-05, "loss": 0.0471, "step": 18170 }, { "epoch": 0.2790269357685519, "grad_norm": 0.4863639175891876, "learning_rate": 1.9905030006303276e-05, "loss": 0.0569, "step": 18180 }, { "epoch": 0.2791804159312409, "grad_norm": 0.5670425891876221, "learning_rate": 1.9904661348794987e-05, "loss": 0.0554, "step": 18190 }, { "epoch": 0.27933389609392983, "grad_norm": 0.6345893144607544, "learning_rate": 1.990429198056616e-05, "loss": 0.0607, "step": 18200 }, { "epoch": 0.27948737625661885, "grad_norm": 0.7823130488395691, "learning_rate": 1.9903921901643298e-05, "loss": 0.0568, "step": 18210 }, { "epoch": 0.2796408564193078, "grad_norm": 0.7905495762825012, "learning_rate": 1.9903551112052958e-05, "loss": 0.0622, "step": 18220 }, { "epoch": 0.27979433658199676, "grad_norm": 0.37720486521720886, "learning_rate": 1.990317961182175e-05, "loss": 0.0433, "step": 18230 }, { "epoch": 0.2799478167446858, "grad_norm": 0.5390685796737671, "learning_rate": 1.990280740097632e-05, "loss": 0.0494, "step": 18240 }, { "epoch": 0.28010129690737473, "grad_norm": 0.6468330025672913, "learning_rate": 1.9902434479543394e-05, "loss": 0.0461, "step": 18250 }, { "epoch": 0.2802547770700637, "grad_norm": 0.5145399570465088, "learning_rate": 1.9902060847549716e-05, "loss": 0.0554, "step": 18260 }, { "epoch": 0.28040825723275264, "grad_norm": 0.675727903842926, "learning_rate": 1.9901686505022105e-05, "loss": 0.0488, "step": 18270 }, { "epoch": 0.28056173739544166, "grad_norm": 0.6553292870521545, "learning_rate": 1.9901311451987418e-05, "loss": 0.0633, "step": 18280 }, { "epoch": 0.2807152175581306, "grad_norm": 0.5218939781188965, "learning_rate": 1.9900935688472567e-05, "loss": 0.0522, "step": 18290 }, { "epoch": 0.28086869772081957, "grad_norm": 0.7091104984283447, "learning_rate": 1.9900559214504523e-05, "loss": 0.0558, "step": 18300 }, { "epoch": 0.2810221778835086, "grad_norm": 0.6413140296936035, "learning_rate": 1.990018203011029e-05, "loss": 0.0604, "step": 18310 }, { "epoch": 0.28117565804619754, "grad_norm": 0.6393271684646606, "learning_rate": 1.9899804135316942e-05, "loss": 0.0516, "step": 18320 }, { "epoch": 0.2813291382088865, "grad_norm": 0.46324652433395386, "learning_rate": 1.989942553015159e-05, "loss": 0.0572, "step": 18330 }, { "epoch": 0.28148261837157545, "grad_norm": 0.7219741344451904, "learning_rate": 1.98990462146414e-05, "loss": 0.0599, "step": 18340 }, { "epoch": 0.28163609853426447, "grad_norm": 0.5957145094871521, "learning_rate": 1.9898666188813595e-05, "loss": 0.0601, "step": 18350 }, { "epoch": 0.2817895786969534, "grad_norm": 0.6593832969665527, "learning_rate": 1.9898285452695442e-05, "loss": 0.0592, "step": 18360 }, { "epoch": 0.2819430588596424, "grad_norm": 0.6704214215278625, "learning_rate": 1.9897904006314257e-05, "loss": 0.0586, "step": 18370 }, { "epoch": 0.28209653902233134, "grad_norm": 0.6826335191726685, "learning_rate": 1.989752184969742e-05, "loss": 0.0639, "step": 18380 }, { "epoch": 0.28225001918502035, "grad_norm": 0.5620444416999817, "learning_rate": 1.9897138982872346e-05, "loss": 0.0549, "step": 18390 }, { "epoch": 0.2824034993477093, "grad_norm": 0.5164963603019714, "learning_rate": 1.9896755405866512e-05, "loss": 0.0553, "step": 18400 }, { "epoch": 0.28255697951039827, "grad_norm": 0.5423986315727234, "learning_rate": 1.989637111870744e-05, "loss": 0.059, "step": 18410 }, { "epoch": 0.2827104596730873, "grad_norm": 0.5765137672424316, "learning_rate": 1.9895986121422704e-05, "loss": 0.055, "step": 18420 }, { "epoch": 0.28286393983577623, "grad_norm": 0.618346631526947, "learning_rate": 1.9895600414039933e-05, "loss": 0.0548, "step": 18430 }, { "epoch": 0.2830174199984652, "grad_norm": 0.5773553848266602, "learning_rate": 1.9895213996586803e-05, "loss": 0.0479, "step": 18440 }, { "epoch": 0.28317090016115415, "grad_norm": 0.6427943110466003, "learning_rate": 1.9894826869091036e-05, "loss": 0.0574, "step": 18450 }, { "epoch": 0.28332438032384316, "grad_norm": 0.6201515197753906, "learning_rate": 1.989443903158042e-05, "loss": 0.0547, "step": 18460 }, { "epoch": 0.2834778604865321, "grad_norm": 0.4898355305194855, "learning_rate": 1.989405048408278e-05, "loss": 0.0521, "step": 18470 }, { "epoch": 0.2836313406492211, "grad_norm": 0.5767629146575928, "learning_rate": 1.9893661226626e-05, "loss": 0.0555, "step": 18480 }, { "epoch": 0.2837848208119101, "grad_norm": 0.5550879240036011, "learning_rate": 1.9893271259238007e-05, "loss": 0.0559, "step": 18490 }, { "epoch": 0.28393830097459904, "grad_norm": 0.8479639887809753, "learning_rate": 1.9892880581946782e-05, "loss": 0.0439, "step": 18500 }, { "epoch": 0.284091781137288, "grad_norm": 0.3834512233734131, "learning_rate": 1.9892489194780366e-05, "loss": 0.0628, "step": 18510 }, { "epoch": 0.28424526129997696, "grad_norm": 0.5536749958992004, "learning_rate": 1.9892097097766837e-05, "loss": 0.0594, "step": 18520 }, { "epoch": 0.28439874146266597, "grad_norm": 0.5509463548660278, "learning_rate": 1.9891704290934332e-05, "loss": 0.0445, "step": 18530 }, { "epoch": 0.28455222162535493, "grad_norm": 0.48444369435310364, "learning_rate": 1.989131077431104e-05, "loss": 0.0436, "step": 18540 }, { "epoch": 0.2847057017880439, "grad_norm": 0.4081836938858032, "learning_rate": 1.9890916547925197e-05, "loss": 0.0548, "step": 18550 }, { "epoch": 0.28485918195073284, "grad_norm": 0.6652548313140869, "learning_rate": 1.989052161180509e-05, "loss": 0.0659, "step": 18560 }, { "epoch": 0.28501266211342186, "grad_norm": 0.5921288132667542, "learning_rate": 1.9890125965979056e-05, "loss": 0.0579, "step": 18570 }, { "epoch": 0.2851661422761108, "grad_norm": 0.36409199237823486, "learning_rate": 1.9889729610475493e-05, "loss": 0.0412, "step": 18580 }, { "epoch": 0.28531962243879977, "grad_norm": 0.5059319138526917, "learning_rate": 1.9889332545322834e-05, "loss": 0.06, "step": 18590 }, { "epoch": 0.2854731026014888, "grad_norm": 0.9100921750068665, "learning_rate": 1.988893477054957e-05, "loss": 0.0651, "step": 18600 }, { "epoch": 0.28562658276417774, "grad_norm": 0.3875839114189148, "learning_rate": 1.988853628618425e-05, "loss": 0.0431, "step": 18610 }, { "epoch": 0.2857800629268667, "grad_norm": 0.5078288316726685, "learning_rate": 1.9888137092255466e-05, "loss": 0.0612, "step": 18620 }, { "epoch": 0.28593354308955565, "grad_norm": 0.6847828030586243, "learning_rate": 1.9887737188791857e-05, "loss": 0.0598, "step": 18630 }, { "epoch": 0.28608702325224467, "grad_norm": 0.5078270435333252, "learning_rate": 1.9887336575822125e-05, "loss": 0.0505, "step": 18640 }, { "epoch": 0.2862405034149336, "grad_norm": 0.5716697573661804, "learning_rate": 1.988693525337502e-05, "loss": 0.0505, "step": 18650 }, { "epoch": 0.2863939835776226, "grad_norm": 0.49328941106796265, "learning_rate": 1.988653322147933e-05, "loss": 0.0428, "step": 18660 }, { "epoch": 0.2865474637403116, "grad_norm": 0.47129538655281067, "learning_rate": 1.9886130480163904e-05, "loss": 0.0472, "step": 18670 }, { "epoch": 0.28670094390300055, "grad_norm": 0.8205468058586121, "learning_rate": 1.988572702945765e-05, "loss": 0.056, "step": 18680 }, { "epoch": 0.2868544240656895, "grad_norm": 0.5795788764953613, "learning_rate": 1.988532286938951e-05, "loss": 0.0586, "step": 18690 }, { "epoch": 0.28700790422837846, "grad_norm": 0.6895644068717957, "learning_rate": 1.9884917999988486e-05, "loss": 0.0617, "step": 18700 }, { "epoch": 0.2871613843910675, "grad_norm": 0.704652726650238, "learning_rate": 1.9884512421283632e-05, "loss": 0.0584, "step": 18710 }, { "epoch": 0.28731486455375643, "grad_norm": 0.7341421842575073, "learning_rate": 1.9884106133304054e-05, "loss": 0.053, "step": 18720 }, { "epoch": 0.2874683447164454, "grad_norm": 0.7637909650802612, "learning_rate": 1.9883699136078897e-05, "loss": 0.0601, "step": 18730 }, { "epoch": 0.28762182487913435, "grad_norm": 0.6047495603561401, "learning_rate": 1.9883291429637373e-05, "loss": 0.0598, "step": 18740 }, { "epoch": 0.28777530504182336, "grad_norm": 0.5538708567619324, "learning_rate": 1.988288301400873e-05, "loss": 0.0522, "step": 18750 }, { "epoch": 0.2879287852045123, "grad_norm": 0.6394161581993103, "learning_rate": 1.9882473889222284e-05, "loss": 0.0472, "step": 18760 }, { "epoch": 0.2880822653672013, "grad_norm": 0.5173266530036926, "learning_rate": 1.9882064055307388e-05, "loss": 0.0534, "step": 18770 }, { "epoch": 0.2882357455298903, "grad_norm": 0.5420504808425903, "learning_rate": 1.9881653512293446e-05, "loss": 0.0589, "step": 18780 }, { "epoch": 0.28838922569257924, "grad_norm": 0.7009912729263306, "learning_rate": 1.9881242260209924e-05, "loss": 0.0481, "step": 18790 }, { "epoch": 0.2885427058552682, "grad_norm": 0.7038000822067261, "learning_rate": 1.9880830299086324e-05, "loss": 0.0622, "step": 18800 }, { "epoch": 0.28869618601795716, "grad_norm": 0.7375309467315674, "learning_rate": 1.9880417628952213e-05, "loss": 0.0532, "step": 18810 }, { "epoch": 0.28884966618064617, "grad_norm": 0.682995080947876, "learning_rate": 1.98800042498372e-05, "loss": 0.0547, "step": 18820 }, { "epoch": 0.2890031463433351, "grad_norm": 0.7178454399108887, "learning_rate": 1.987959016177095e-05, "loss": 0.0554, "step": 18830 }, { "epoch": 0.2891566265060241, "grad_norm": 0.5534664392471313, "learning_rate": 1.9879175364783174e-05, "loss": 0.0572, "step": 18840 }, { "epoch": 0.2893101066687131, "grad_norm": 0.6096992492675781, "learning_rate": 1.9878759858903634e-05, "loss": 0.056, "step": 18850 }, { "epoch": 0.28946358683140205, "grad_norm": 0.49772870540618896, "learning_rate": 1.987834364416215e-05, "loss": 0.055, "step": 18860 }, { "epoch": 0.289617066994091, "grad_norm": 0.6634455323219299, "learning_rate": 1.9877926720588586e-05, "loss": 0.053, "step": 18870 }, { "epoch": 0.28977054715677997, "grad_norm": 0.6539319753646851, "learning_rate": 1.987750908821286e-05, "loss": 0.0556, "step": 18880 }, { "epoch": 0.289924027319469, "grad_norm": 0.6749168038368225, "learning_rate": 1.987709074706494e-05, "loss": 0.0509, "step": 18890 }, { "epoch": 0.29007750748215794, "grad_norm": 0.4254681468009949, "learning_rate": 1.9876671697174838e-05, "loss": 0.0569, "step": 18900 }, { "epoch": 0.2902309876448469, "grad_norm": 0.6501044034957886, "learning_rate": 1.987625193857263e-05, "loss": 0.0627, "step": 18910 }, { "epoch": 0.29038446780753585, "grad_norm": 0.41264092922210693, "learning_rate": 1.9875831471288432e-05, "loss": 0.0682, "step": 18920 }, { "epoch": 0.29053794797022486, "grad_norm": 0.6498766541481018, "learning_rate": 1.9875410295352423e-05, "loss": 0.0613, "step": 18930 }, { "epoch": 0.2906914281329138, "grad_norm": 0.5683800578117371, "learning_rate": 1.9874988410794817e-05, "loss": 0.0551, "step": 18940 }, { "epoch": 0.2908449082956028, "grad_norm": 0.6582192182540894, "learning_rate": 1.987456581764589e-05, "loss": 0.065, "step": 18950 }, { "epoch": 0.2909983884582918, "grad_norm": 0.4616911709308624, "learning_rate": 1.9874142515935966e-05, "loss": 0.0518, "step": 18960 }, { "epoch": 0.29115186862098075, "grad_norm": 0.5733194351196289, "learning_rate": 1.987371850569542e-05, "loss": 0.0552, "step": 18970 }, { "epoch": 0.2913053487836697, "grad_norm": 0.5627292394638062, "learning_rate": 1.987329378695467e-05, "loss": 0.0571, "step": 18980 }, { "epoch": 0.29145882894635866, "grad_norm": 0.4470226466655731, "learning_rate": 1.9872868359744207e-05, "loss": 0.0525, "step": 18990 }, { "epoch": 0.2916123091090477, "grad_norm": 0.8023382425308228, "learning_rate": 1.9872442224094544e-05, "loss": 0.0562, "step": 19000 }, { "epoch": 0.29176578927173663, "grad_norm": 0.599997878074646, "learning_rate": 1.9872015380036263e-05, "loss": 0.0593, "step": 19010 }, { "epoch": 0.2919192694344256, "grad_norm": 0.6036457419395447, "learning_rate": 1.9871587827599998e-05, "loss": 0.0518, "step": 19020 }, { "epoch": 0.2920727495971146, "grad_norm": 0.49140337109565735, "learning_rate": 1.987115956681642e-05, "loss": 0.0574, "step": 19030 }, { "epoch": 0.29222622975980356, "grad_norm": 0.5020847320556641, "learning_rate": 1.9870730597716265e-05, "loss": 0.0552, "step": 19040 }, { "epoch": 0.2923797099224925, "grad_norm": 0.6440312266349792, "learning_rate": 1.9870300920330315e-05, "loss": 0.056, "step": 19050 }, { "epoch": 0.29253319008518147, "grad_norm": 0.5670533180236816, "learning_rate": 1.9869870534689398e-05, "loss": 0.0703, "step": 19060 }, { "epoch": 0.2926866702478705, "grad_norm": 0.7433170080184937, "learning_rate": 1.98694394408244e-05, "loss": 0.0558, "step": 19070 }, { "epoch": 0.29284015041055944, "grad_norm": 0.4423912465572357, "learning_rate": 1.9869007638766253e-05, "loss": 0.0563, "step": 19080 }, { "epoch": 0.2929936305732484, "grad_norm": 0.49966925382614136, "learning_rate": 1.986857512854594e-05, "loss": 0.0525, "step": 19090 }, { "epoch": 0.29314711073593736, "grad_norm": 0.501374363899231, "learning_rate": 1.98681419101945e-05, "loss": 0.0653, "step": 19100 }, { "epoch": 0.29330059089862637, "grad_norm": 0.5075693130493164, "learning_rate": 1.986770798374302e-05, "loss": 0.0553, "step": 19110 }, { "epoch": 0.2934540710613153, "grad_norm": 0.4900131821632385, "learning_rate": 1.9867273349222633e-05, "loss": 0.0515, "step": 19120 }, { "epoch": 0.2936075512240043, "grad_norm": 0.6161296963691711, "learning_rate": 1.986683800666452e-05, "loss": 0.0663, "step": 19130 }, { "epoch": 0.2937610313866933, "grad_norm": 0.6170608997344971, "learning_rate": 1.9866401956099938e-05, "loss": 0.0551, "step": 19140 }, { "epoch": 0.29391451154938225, "grad_norm": 0.5344338417053223, "learning_rate": 1.986596519756016e-05, "loss": 0.0484, "step": 19150 }, { "epoch": 0.2940679917120712, "grad_norm": 0.4432353973388672, "learning_rate": 1.9865527731076534e-05, "loss": 0.0611, "step": 19160 }, { "epoch": 0.29422147187476017, "grad_norm": 0.47650569677352905, "learning_rate": 1.9865089556680445e-05, "loss": 0.0512, "step": 19170 }, { "epoch": 0.2943749520374492, "grad_norm": 0.5585644841194153, "learning_rate": 1.986465067440334e-05, "loss": 0.0572, "step": 19180 }, { "epoch": 0.29452843220013813, "grad_norm": 0.5804152488708496, "learning_rate": 1.986421108427671e-05, "loss": 0.0522, "step": 19190 }, { "epoch": 0.2946819123628271, "grad_norm": 0.4563980996608734, "learning_rate": 1.98637707863321e-05, "loss": 0.0492, "step": 19200 }, { "epoch": 0.2948353925255161, "grad_norm": 0.5011278390884399, "learning_rate": 1.9863329780601098e-05, "loss": 0.0552, "step": 19210 }, { "epoch": 0.29498887268820506, "grad_norm": 0.5124266743659973, "learning_rate": 1.9862888067115357e-05, "loss": 0.0545, "step": 19220 }, { "epoch": 0.295142352850894, "grad_norm": 0.45936131477355957, "learning_rate": 1.986244564590657e-05, "loss": 0.0502, "step": 19230 }, { "epoch": 0.295295833013583, "grad_norm": 0.6030067801475525, "learning_rate": 1.986200251700648e-05, "loss": 0.0519, "step": 19240 }, { "epoch": 0.295449313176272, "grad_norm": 0.5274835824966431, "learning_rate": 1.9861558680446888e-05, "loss": 0.0521, "step": 19250 }, { "epoch": 0.29560279333896095, "grad_norm": 0.6231362223625183, "learning_rate": 1.986111413625964e-05, "loss": 0.0478, "step": 19260 }, { "epoch": 0.2957562735016499, "grad_norm": 0.7571545839309692, "learning_rate": 1.9860668884476632e-05, "loss": 0.0467, "step": 19270 }, { "epoch": 0.29590975366433886, "grad_norm": 0.4834781289100647, "learning_rate": 1.9860222925129822e-05, "loss": 0.0624, "step": 19280 }, { "epoch": 0.29606323382702787, "grad_norm": 0.5063542127609253, "learning_rate": 1.98597762582512e-05, "loss": 0.0526, "step": 19290 }, { "epoch": 0.29621671398971683, "grad_norm": 0.4234156608581543, "learning_rate": 1.9859328883872827e-05, "loss": 0.0465, "step": 19300 }, { "epoch": 0.2963701941524058, "grad_norm": 0.5842573642730713, "learning_rate": 1.98588808020268e-05, "loss": 0.0503, "step": 19310 }, { "epoch": 0.2965236743150948, "grad_norm": 0.5776521563529968, "learning_rate": 1.9858432012745268e-05, "loss": 0.0486, "step": 19320 }, { "epoch": 0.29667715447778376, "grad_norm": 0.5140674114227295, "learning_rate": 1.985798251606044e-05, "loss": 0.051, "step": 19330 }, { "epoch": 0.2968306346404727, "grad_norm": 0.737553060054779, "learning_rate": 1.9857532312004568e-05, "loss": 0.0425, "step": 19340 }, { "epoch": 0.29698411480316167, "grad_norm": 0.5439649224281311, "learning_rate": 1.985708140060996e-05, "loss": 0.0506, "step": 19350 }, { "epoch": 0.2971375949658507, "grad_norm": 0.5240995287895203, "learning_rate": 1.9856629781908966e-05, "loss": 0.0445, "step": 19360 }, { "epoch": 0.29729107512853964, "grad_norm": 0.4723389744758606, "learning_rate": 1.9856177455933995e-05, "loss": 0.0576, "step": 19370 }, { "epoch": 0.2974445552912286, "grad_norm": 0.6340433359146118, "learning_rate": 1.9855724422717507e-05, "loss": 0.046, "step": 19380 }, { "epoch": 0.2975980354539176, "grad_norm": 0.5345759987831116, "learning_rate": 1.9855270682292006e-05, "loss": 0.0517, "step": 19390 }, { "epoch": 0.29775151561660657, "grad_norm": 0.6467471122741699, "learning_rate": 1.9854816234690053e-05, "loss": 0.0616, "step": 19400 }, { "epoch": 0.2979049957792955, "grad_norm": 0.809816837310791, "learning_rate": 1.9854361079944255e-05, "loss": 0.0735, "step": 19410 }, { "epoch": 0.2980584759419845, "grad_norm": 0.496262788772583, "learning_rate": 1.9853905218087277e-05, "loss": 0.0484, "step": 19420 }, { "epoch": 0.2982119561046735, "grad_norm": 0.5066131353378296, "learning_rate": 1.9853448649151824e-05, "loss": 0.0564, "step": 19430 }, { "epoch": 0.29836543626736245, "grad_norm": 0.40502333641052246, "learning_rate": 1.985299137317066e-05, "loss": 0.0501, "step": 19440 }, { "epoch": 0.2985189164300514, "grad_norm": 0.5330043435096741, "learning_rate": 1.98525333901766e-05, "loss": 0.064, "step": 19450 }, { "epoch": 0.29867239659274036, "grad_norm": 0.42885637283325195, "learning_rate": 1.9852074700202505e-05, "loss": 0.0515, "step": 19460 }, { "epoch": 0.2988258767554294, "grad_norm": 0.7644667029380798, "learning_rate": 1.9851615303281284e-05, "loss": 0.0638, "step": 19470 }, { "epoch": 0.29897935691811833, "grad_norm": 0.565868616104126, "learning_rate": 1.985115519944591e-05, "loss": 0.0599, "step": 19480 }, { "epoch": 0.2991328370808073, "grad_norm": 0.4597488343715668, "learning_rate": 1.9850694388729396e-05, "loss": 0.0535, "step": 19490 }, { "epoch": 0.2992863172434963, "grad_norm": 0.4241028130054474, "learning_rate": 1.9850232871164803e-05, "loss": 0.0518, "step": 19500 }, { "epoch": 0.29943979740618526, "grad_norm": 0.5286077857017517, "learning_rate": 1.984977064678525e-05, "loss": 0.0547, "step": 19510 }, { "epoch": 0.2995932775688742, "grad_norm": 0.587270975112915, "learning_rate": 1.984930771562391e-05, "loss": 0.0495, "step": 19520 }, { "epoch": 0.2997467577315632, "grad_norm": 0.6303067803382874, "learning_rate": 1.9848844077713995e-05, "loss": 0.0546, "step": 19530 }, { "epoch": 0.2999002378942522, "grad_norm": 0.3557683229446411, "learning_rate": 1.9848379733088775e-05, "loss": 0.0493, "step": 19540 }, { "epoch": 0.30005371805694114, "grad_norm": 0.8971455693244934, "learning_rate": 1.984791468178157e-05, "loss": 0.0531, "step": 19550 }, { "epoch": 0.3002071982196301, "grad_norm": 0.5302455425262451, "learning_rate": 1.984744892382575e-05, "loss": 0.0429, "step": 19560 }, { "epoch": 0.3003606783823191, "grad_norm": 0.4542834460735321, "learning_rate": 1.984698245925474e-05, "loss": 0.0575, "step": 19570 }, { "epoch": 0.30051415854500807, "grad_norm": 0.5609133243560791, "learning_rate": 1.9846515288102006e-05, "loss": 0.0543, "step": 19580 }, { "epoch": 0.300667638707697, "grad_norm": 0.5123578906059265, "learning_rate": 1.9846047410401074e-05, "loss": 0.0538, "step": 19590 }, { "epoch": 0.300821118870386, "grad_norm": 0.511851966381073, "learning_rate": 1.9845578826185513e-05, "loss": 0.0474, "step": 19600 }, { "epoch": 0.300974599033075, "grad_norm": 0.5631884932518005, "learning_rate": 1.9845109535488953e-05, "loss": 0.0567, "step": 19610 }, { "epoch": 0.30112807919576395, "grad_norm": 0.5887303352355957, "learning_rate": 1.9844639538345064e-05, "loss": 0.054, "step": 19620 }, { "epoch": 0.3012815593584529, "grad_norm": 0.4543742835521698, "learning_rate": 1.9844168834787573e-05, "loss": 0.0622, "step": 19630 }, { "epoch": 0.30143503952114187, "grad_norm": 0.5248111486434937, "learning_rate": 1.9843697424850252e-05, "loss": 0.0511, "step": 19640 }, { "epoch": 0.3015885196838309, "grad_norm": 0.5120658874511719, "learning_rate": 1.9843225308566935e-05, "loss": 0.0532, "step": 19650 }, { "epoch": 0.30174199984651984, "grad_norm": 0.41106465458869934, "learning_rate": 1.9842752485971494e-05, "loss": 0.0559, "step": 19660 }, { "epoch": 0.3018954800092088, "grad_norm": 0.6301864981651306, "learning_rate": 1.9842278957097856e-05, "loss": 0.0459, "step": 19670 }, { "epoch": 0.3020489601718978, "grad_norm": 0.5636878609657288, "learning_rate": 1.9841804721980003e-05, "loss": 0.062, "step": 19680 }, { "epoch": 0.30220244033458676, "grad_norm": 0.6641325950622559, "learning_rate": 1.984132978065196e-05, "loss": 0.0557, "step": 19690 }, { "epoch": 0.3023559204972757, "grad_norm": 0.5761093497276306, "learning_rate": 1.9840854133147815e-05, "loss": 0.0515, "step": 19700 }, { "epoch": 0.3025094006599647, "grad_norm": 0.6060268878936768, "learning_rate": 1.9840377779501688e-05, "loss": 0.0452, "step": 19710 }, { "epoch": 0.3026628808226537, "grad_norm": 0.5908516049385071, "learning_rate": 1.9839900719747768e-05, "loss": 0.0478, "step": 19720 }, { "epoch": 0.30281636098534265, "grad_norm": 0.6822997331619263, "learning_rate": 1.9839422953920288e-05, "loss": 0.0535, "step": 19730 }, { "epoch": 0.3029698411480316, "grad_norm": 0.4244077205657959, "learning_rate": 1.9838944482053522e-05, "loss": 0.0501, "step": 19740 }, { "epoch": 0.3031233213107206, "grad_norm": 0.8907994031906128, "learning_rate": 1.9838465304181814e-05, "loss": 0.0651, "step": 19750 }, { "epoch": 0.3032768014734096, "grad_norm": 0.40833622217178345, "learning_rate": 1.9837985420339542e-05, "loss": 0.0583, "step": 19760 }, { "epoch": 0.30343028163609853, "grad_norm": 0.4434032142162323, "learning_rate": 1.983750483056114e-05, "loss": 0.0436, "step": 19770 }, { "epoch": 0.3035837617987875, "grad_norm": 0.44432899355888367, "learning_rate": 1.9837023534881092e-05, "loss": 0.0602, "step": 19780 }, { "epoch": 0.3037372419614765, "grad_norm": 0.6277226805686951, "learning_rate": 1.983654153333394e-05, "loss": 0.0551, "step": 19790 }, { "epoch": 0.30389072212416546, "grad_norm": 0.521695077419281, "learning_rate": 1.9836058825954265e-05, "loss": 0.0512, "step": 19800 }, { "epoch": 0.3040442022868544, "grad_norm": 0.9382975697517395, "learning_rate": 1.9835575412776708e-05, "loss": 0.0464, "step": 19810 }, { "epoch": 0.30419768244954337, "grad_norm": 0.6268026828765869, "learning_rate": 1.9835091293835954e-05, "loss": 0.0615, "step": 19820 }, { "epoch": 0.3043511626122324, "grad_norm": 0.35005220770835876, "learning_rate": 1.9834606469166743e-05, "loss": 0.0471, "step": 19830 }, { "epoch": 0.30450464277492134, "grad_norm": 0.4553036093711853, "learning_rate": 1.9834120938803866e-05, "loss": 0.0536, "step": 19840 }, { "epoch": 0.3046581229376103, "grad_norm": 0.40309974551200867, "learning_rate": 1.9833634702782158e-05, "loss": 0.0537, "step": 19850 }, { "epoch": 0.3048116031002993, "grad_norm": 0.5194103717803955, "learning_rate": 1.9833147761136512e-05, "loss": 0.0515, "step": 19860 }, { "epoch": 0.30496508326298827, "grad_norm": 0.5878709554672241, "learning_rate": 1.983266011390187e-05, "loss": 0.055, "step": 19870 }, { "epoch": 0.3051185634256772, "grad_norm": 0.4077643156051636, "learning_rate": 1.9832171761113222e-05, "loss": 0.0524, "step": 19880 }, { "epoch": 0.3052720435883662, "grad_norm": 0.6514273285865784, "learning_rate": 1.983168270280561e-05, "loss": 0.0577, "step": 19890 }, { "epoch": 0.3054255237510552, "grad_norm": 0.49051687121391296, "learning_rate": 1.9831192939014134e-05, "loss": 0.0622, "step": 19900 }, { "epoch": 0.30557900391374415, "grad_norm": 0.5320793986320496, "learning_rate": 1.983070246977393e-05, "loss": 0.0555, "step": 19910 }, { "epoch": 0.3057324840764331, "grad_norm": 0.5156873464584351, "learning_rate": 1.983021129512019e-05, "loss": 0.0554, "step": 19920 }, { "epoch": 0.3058859642391221, "grad_norm": 0.5378425121307373, "learning_rate": 1.9829719415088167e-05, "loss": 0.0488, "step": 19930 }, { "epoch": 0.3060394444018111, "grad_norm": 0.582619845867157, "learning_rate": 1.982922682971315e-05, "loss": 0.0528, "step": 19940 }, { "epoch": 0.30619292456450004, "grad_norm": 0.4581582844257355, "learning_rate": 1.9828733539030486e-05, "loss": 0.0613, "step": 19950 }, { "epoch": 0.306346404727189, "grad_norm": 0.6218756437301636, "learning_rate": 1.9828239543075575e-05, "loss": 0.0659, "step": 19960 }, { "epoch": 0.306499884889878, "grad_norm": 0.6767091751098633, "learning_rate": 1.982774484188386e-05, "loss": 0.057, "step": 19970 }, { "epoch": 0.30665336505256696, "grad_norm": 0.9632564187049866, "learning_rate": 1.9827249435490846e-05, "loss": 0.0571, "step": 19980 }, { "epoch": 0.3068068452152559, "grad_norm": 0.7765918374061584, "learning_rate": 1.9826753323932073e-05, "loss": 0.0511, "step": 19990 }, { "epoch": 0.3069603253779449, "grad_norm": 0.6393683552742004, "learning_rate": 1.9826256507243143e-05, "loss": 0.0606, "step": 20000 }, { "epoch": 0.3071138055406339, "grad_norm": 0.5083634853363037, "learning_rate": 1.9825758985459704e-05, "loss": 0.0514, "step": 20010 }, { "epoch": 0.30726728570332285, "grad_norm": 0.6973241567611694, "learning_rate": 1.9825260758617465e-05, "loss": 0.0472, "step": 20020 }, { "epoch": 0.3074207658660118, "grad_norm": 0.5454500317573547, "learning_rate": 1.9824761826752167e-05, "loss": 0.0584, "step": 20030 }, { "epoch": 0.3075742460287008, "grad_norm": 0.6201151609420776, "learning_rate": 1.9824262189899612e-05, "loss": 0.06, "step": 20040 }, { "epoch": 0.30772772619138977, "grad_norm": 0.515701413154602, "learning_rate": 1.9823761848095657e-05, "loss": 0.0518, "step": 20050 }, { "epoch": 0.30788120635407873, "grad_norm": 0.6175621151924133, "learning_rate": 1.9823260801376202e-05, "loss": 0.0541, "step": 20060 }, { "epoch": 0.3080346865167677, "grad_norm": 0.5373969674110413, "learning_rate": 1.98227590497772e-05, "loss": 0.0589, "step": 20070 }, { "epoch": 0.3081881666794567, "grad_norm": 0.5772056579589844, "learning_rate": 1.9822256593334658e-05, "loss": 0.0531, "step": 20080 }, { "epoch": 0.30834164684214566, "grad_norm": 0.6265556216239929, "learning_rate": 1.9821753432084624e-05, "loss": 0.0555, "step": 20090 }, { "epoch": 0.3084951270048346, "grad_norm": 0.5538582801818848, "learning_rate": 1.9821249566063208e-05, "loss": 0.052, "step": 20100 }, { "epoch": 0.30864860716752357, "grad_norm": 0.46793925762176514, "learning_rate": 1.9820744995306565e-05, "loss": 0.0488, "step": 20110 }, { "epoch": 0.3088020873302126, "grad_norm": 0.562666118144989, "learning_rate": 1.98202397198509e-05, "loss": 0.047, "step": 20120 }, { "epoch": 0.30895556749290154, "grad_norm": 0.6785548329353333, "learning_rate": 1.981973373973247e-05, "loss": 0.0545, "step": 20130 }, { "epoch": 0.3091090476555905, "grad_norm": 0.5225386023521423, "learning_rate": 1.9819227054987582e-05, "loss": 0.0554, "step": 20140 }, { "epoch": 0.3092625278182795, "grad_norm": 0.7849443554878235, "learning_rate": 1.9818719665652594e-05, "loss": 0.0543, "step": 20150 }, { "epoch": 0.30941600798096847, "grad_norm": 0.5386089086532593, "learning_rate": 1.9818211571763912e-05, "loss": 0.0567, "step": 20160 }, { "epoch": 0.3095694881436574, "grad_norm": 0.4285756051540375, "learning_rate": 1.9817702773357998e-05, "loss": 0.0482, "step": 20170 }, { "epoch": 0.3097229683063464, "grad_norm": 0.6550852060317993, "learning_rate": 1.981719327047136e-05, "loss": 0.0478, "step": 20180 }, { "epoch": 0.3098764484690354, "grad_norm": 0.6142092347145081, "learning_rate": 1.981668306314056e-05, "loss": 0.0588, "step": 20190 }, { "epoch": 0.31002992863172435, "grad_norm": 0.6308094263076782, "learning_rate": 1.9816172151402205e-05, "loss": 0.062, "step": 20200 }, { "epoch": 0.3101834087944133, "grad_norm": 0.40493783354759216, "learning_rate": 1.981566053529296e-05, "loss": 0.0478, "step": 20210 }, { "epoch": 0.3103368889571023, "grad_norm": 0.4341984689235687, "learning_rate": 1.9815148214849532e-05, "loss": 0.0561, "step": 20220 }, { "epoch": 0.3104903691197913, "grad_norm": 1.0765036344528198, "learning_rate": 1.9814635190108685e-05, "loss": 0.0466, "step": 20230 }, { "epoch": 0.31064384928248023, "grad_norm": 0.43846502900123596, "learning_rate": 1.9814121461107235e-05, "loss": 0.0496, "step": 20240 }, { "epoch": 0.3107973294451692, "grad_norm": 0.5348806977272034, "learning_rate": 1.9813607027882043e-05, "loss": 0.054, "step": 20250 }, { "epoch": 0.3109508096078582, "grad_norm": 0.48983779549598694, "learning_rate": 1.981309189047002e-05, "loss": 0.049, "step": 20260 }, { "epoch": 0.31110428977054716, "grad_norm": 0.45001667737960815, "learning_rate": 1.9812576048908135e-05, "loss": 0.0522, "step": 20270 }, { "epoch": 0.3112577699332361, "grad_norm": 0.7818464040756226, "learning_rate": 1.9812059503233398e-05, "loss": 0.057, "step": 20280 }, { "epoch": 0.3114112500959251, "grad_norm": 0.5588098764419556, "learning_rate": 1.981154225348288e-05, "loss": 0.051, "step": 20290 }, { "epoch": 0.3115647302586141, "grad_norm": 0.5643105506896973, "learning_rate": 1.981102429969369e-05, "loss": 0.0501, "step": 20300 }, { "epoch": 0.31171821042130304, "grad_norm": 0.5042148232460022, "learning_rate": 1.9810505641903e-05, "loss": 0.0479, "step": 20310 }, { "epoch": 0.311871690583992, "grad_norm": 0.3209056854248047, "learning_rate": 1.9809986280148023e-05, "loss": 0.0534, "step": 20320 }, { "epoch": 0.312025170746681, "grad_norm": 0.7002902626991272, "learning_rate": 1.980946621446603e-05, "loss": 0.0613, "step": 20330 }, { "epoch": 0.31217865090936997, "grad_norm": 0.5681454539299011, "learning_rate": 1.9808945444894337e-05, "loss": 0.053, "step": 20340 }, { "epoch": 0.3123321310720589, "grad_norm": 0.5109831094741821, "learning_rate": 1.9808423971470315e-05, "loss": 0.0712, "step": 20350 }, { "epoch": 0.3124856112347479, "grad_norm": 0.5010906457901001, "learning_rate": 1.9807901794231376e-05, "loss": 0.0595, "step": 20360 }, { "epoch": 0.3126390913974369, "grad_norm": 0.5762009620666504, "learning_rate": 1.9807378913215e-05, "loss": 0.0561, "step": 20370 }, { "epoch": 0.31279257156012585, "grad_norm": 0.853592574596405, "learning_rate": 1.9806855328458698e-05, "loss": 0.0484, "step": 20380 }, { "epoch": 0.3129460517228148, "grad_norm": 0.6443658471107483, "learning_rate": 1.980633104000004e-05, "loss": 0.0502, "step": 20390 }, { "epoch": 0.3130995318855038, "grad_norm": 0.6249514818191528, "learning_rate": 1.9805806047876653e-05, "loss": 0.0527, "step": 20400 }, { "epoch": 0.3132530120481928, "grad_norm": 0.6228529810905457, "learning_rate": 1.9805280352126206e-05, "loss": 0.0461, "step": 20410 }, { "epoch": 0.31340649221088174, "grad_norm": 0.6879099011421204, "learning_rate": 1.980475395278642e-05, "loss": 0.0504, "step": 20420 }, { "epoch": 0.3135599723735707, "grad_norm": 0.5814946889877319, "learning_rate": 1.980422684989507e-05, "loss": 0.0501, "step": 20430 }, { "epoch": 0.3137134525362597, "grad_norm": 0.5216183066368103, "learning_rate": 1.9803699043489974e-05, "loss": 0.0661, "step": 20440 }, { "epoch": 0.31386693269894866, "grad_norm": 0.6101706624031067, "learning_rate": 1.980317053360901e-05, "loss": 0.0477, "step": 20450 }, { "epoch": 0.3140204128616376, "grad_norm": 0.4982011020183563, "learning_rate": 1.98026413202901e-05, "loss": 0.0451, "step": 20460 }, { "epoch": 0.3141738930243266, "grad_norm": 0.5714846849441528, "learning_rate": 1.9802111403571217e-05, "loss": 0.0556, "step": 20470 }, { "epoch": 0.3143273731870156, "grad_norm": 0.40946510434150696, "learning_rate": 1.980158078349039e-05, "loss": 0.0563, "step": 20480 }, { "epoch": 0.31448085334970455, "grad_norm": 0.6827487349510193, "learning_rate": 1.9801049460085694e-05, "loss": 0.0554, "step": 20490 }, { "epoch": 0.3146343335123935, "grad_norm": 0.5622829794883728, "learning_rate": 1.9800517433395247e-05, "loss": 0.0533, "step": 20500 }, { "epoch": 0.3147878136750825, "grad_norm": 0.718572199344635, "learning_rate": 1.979998470345723e-05, "loss": 0.0579, "step": 20510 }, { "epoch": 0.3149412938377715, "grad_norm": 0.6794270277023315, "learning_rate": 1.9799451270309876e-05, "loss": 0.0475, "step": 20520 }, { "epoch": 0.31509477400046043, "grad_norm": 0.6405255794525146, "learning_rate": 1.9798917133991452e-05, "loss": 0.0485, "step": 20530 }, { "epoch": 0.3152482541631494, "grad_norm": 0.40788736939430237, "learning_rate": 1.979838229454029e-05, "loss": 0.0504, "step": 20540 }, { "epoch": 0.3154017343258384, "grad_norm": 0.5923780202865601, "learning_rate": 1.9797846751994768e-05, "loss": 0.0594, "step": 20550 }, { "epoch": 0.31555521448852736, "grad_norm": 0.4977628290653229, "learning_rate": 1.9797310506393316e-05, "loss": 0.0486, "step": 20560 }, { "epoch": 0.3157086946512163, "grad_norm": 0.7251647114753723, "learning_rate": 1.9796773557774407e-05, "loss": 0.0537, "step": 20570 }, { "epoch": 0.3158621748139053, "grad_norm": 0.5903784036636353, "learning_rate": 1.9796235906176577e-05, "loss": 0.0519, "step": 20580 }, { "epoch": 0.3160156549765943, "grad_norm": 0.6865090131759644, "learning_rate": 1.9795697551638406e-05, "loss": 0.0527, "step": 20590 }, { "epoch": 0.31616913513928324, "grad_norm": 0.6573770046234131, "learning_rate": 1.9795158494198522e-05, "loss": 0.0478, "step": 20600 }, { "epoch": 0.3163226153019722, "grad_norm": 0.4906485080718994, "learning_rate": 1.97946187338956e-05, "loss": 0.0528, "step": 20610 }, { "epoch": 0.3164760954646612, "grad_norm": 0.4349863529205322, "learning_rate": 1.979407827076838e-05, "loss": 0.0636, "step": 20620 }, { "epoch": 0.31662957562735017, "grad_norm": 0.5073959827423096, "learning_rate": 1.979353710485564e-05, "loss": 0.0443, "step": 20630 }, { "epoch": 0.3167830557900391, "grad_norm": 0.5936321020126343, "learning_rate": 1.9792995236196213e-05, "loss": 0.0603, "step": 20640 }, { "epoch": 0.3169365359527281, "grad_norm": 0.5928264260292053, "learning_rate": 1.9792452664828977e-05, "loss": 0.0581, "step": 20650 }, { "epoch": 0.3170900161154171, "grad_norm": 0.602182924747467, "learning_rate": 1.9791909390792874e-05, "loss": 0.054, "step": 20660 }, { "epoch": 0.31724349627810605, "grad_norm": 0.4387931823730469, "learning_rate": 1.979136541412688e-05, "loss": 0.0548, "step": 20670 }, { "epoch": 0.317396976440795, "grad_norm": 0.6383752226829529, "learning_rate": 1.9790820734870025e-05, "loss": 0.0592, "step": 20680 }, { "epoch": 0.317550456603484, "grad_norm": 0.5106366276741028, "learning_rate": 1.9790275353061402e-05, "loss": 0.0528, "step": 20690 }, { "epoch": 0.317703936766173, "grad_norm": 0.4628506004810333, "learning_rate": 1.9789729268740143e-05, "loss": 0.0432, "step": 20700 }, { "epoch": 0.31785741692886194, "grad_norm": 0.3836751878261566, "learning_rate": 1.978918248194543e-05, "loss": 0.0491, "step": 20710 }, { "epoch": 0.3180108970915509, "grad_norm": 0.4865717887878418, "learning_rate": 1.97886349927165e-05, "loss": 0.0509, "step": 20720 }, { "epoch": 0.3181643772542399, "grad_norm": 0.4671115279197693, "learning_rate": 1.978808680109264e-05, "loss": 0.0526, "step": 20730 }, { "epoch": 0.31831785741692886, "grad_norm": 0.5293611288070679, "learning_rate": 1.9787537907113185e-05, "loss": 0.0582, "step": 20740 }, { "epoch": 0.3184713375796178, "grad_norm": 0.5089760422706604, "learning_rate": 1.9786988310817523e-05, "loss": 0.0583, "step": 20750 }, { "epoch": 0.31862481774230683, "grad_norm": 0.6514373421669006, "learning_rate": 1.9786438012245086e-05, "loss": 0.0588, "step": 20760 }, { "epoch": 0.3187782979049958, "grad_norm": 0.5565792322158813, "learning_rate": 1.978588701143537e-05, "loss": 0.0471, "step": 20770 }, { "epoch": 0.31893177806768475, "grad_norm": 0.7983899712562561, "learning_rate": 1.9785335308427902e-05, "loss": 0.0667, "step": 20780 }, { "epoch": 0.3190852582303737, "grad_norm": 0.41967707872390747, "learning_rate": 1.978478290326228e-05, "loss": 0.0569, "step": 20790 }, { "epoch": 0.3192387383930627, "grad_norm": 0.6547767519950867, "learning_rate": 1.9784229795978133e-05, "loss": 0.0515, "step": 20800 }, { "epoch": 0.3193922185557517, "grad_norm": 0.6219730973243713, "learning_rate": 1.9783675986615158e-05, "loss": 0.0637, "step": 20810 }, { "epoch": 0.31954569871844063, "grad_norm": 0.7514408826828003, "learning_rate": 1.978312147521309e-05, "loss": 0.0603, "step": 20820 }, { "epoch": 0.3196991788811296, "grad_norm": 0.48035773634910583, "learning_rate": 1.978256626181172e-05, "loss": 0.0577, "step": 20830 }, { "epoch": 0.3198526590438186, "grad_norm": 0.5464568734169006, "learning_rate": 1.978201034645089e-05, "loss": 0.059, "step": 20840 }, { "epoch": 0.32000613920650756, "grad_norm": 0.5760999321937561, "learning_rate": 1.9781453729170486e-05, "loss": 0.0434, "step": 20850 }, { "epoch": 0.3201596193691965, "grad_norm": 0.43358534574508667, "learning_rate": 1.9780896410010453e-05, "loss": 0.0532, "step": 20860 }, { "epoch": 0.3203130995318855, "grad_norm": 0.3502909243106842, "learning_rate": 1.9780338389010776e-05, "loss": 0.036, "step": 20870 }, { "epoch": 0.3204665796945745, "grad_norm": 0.48173823952674866, "learning_rate": 1.9779779666211504e-05, "loss": 0.0475, "step": 20880 }, { "epoch": 0.32062005985726344, "grad_norm": 0.4877852201461792, "learning_rate": 1.9779220241652723e-05, "loss": 0.0595, "step": 20890 }, { "epoch": 0.3207735400199524, "grad_norm": 0.5152785181999207, "learning_rate": 1.977866011537458e-05, "loss": 0.0535, "step": 20900 }, { "epoch": 0.3209270201826414, "grad_norm": 0.4456159770488739, "learning_rate": 1.977809928741726e-05, "loss": 0.0402, "step": 20910 }, { "epoch": 0.32108050034533037, "grad_norm": 0.5783889293670654, "learning_rate": 1.9777537757821018e-05, "loss": 0.0583, "step": 20920 }, { "epoch": 0.3212339805080193, "grad_norm": 0.5832173824310303, "learning_rate": 1.9776975526626132e-05, "loss": 0.0617, "step": 20930 }, { "epoch": 0.32138746067070834, "grad_norm": 0.48150792717933655, "learning_rate": 1.977641259387296e-05, "loss": 0.0527, "step": 20940 }, { "epoch": 0.3215409408333973, "grad_norm": 0.6940484642982483, "learning_rate": 1.9775848959601884e-05, "loss": 0.0533, "step": 20950 }, { "epoch": 0.32169442099608625, "grad_norm": 0.6436746120452881, "learning_rate": 1.9775284623853355e-05, "loss": 0.059, "step": 20960 }, { "epoch": 0.3218479011587752, "grad_norm": 0.39783337712287903, "learning_rate": 1.977471958666787e-05, "loss": 0.0544, "step": 20970 }, { "epoch": 0.3220013813214642, "grad_norm": 0.5001217722892761, "learning_rate": 1.9774153848085968e-05, "loss": 0.0506, "step": 20980 }, { "epoch": 0.3221548614841532, "grad_norm": 0.658488929271698, "learning_rate": 1.9773587408148244e-05, "loss": 0.064, "step": 20990 }, { "epoch": 0.32230834164684213, "grad_norm": 0.560320258140564, "learning_rate": 1.9773020266895345e-05, "loss": 0.061, "step": 21000 }, { "epoch": 0.3224618218095311, "grad_norm": 0.40608200430870056, "learning_rate": 1.9772452424367975e-05, "loss": 0.0528, "step": 21010 }, { "epoch": 0.3226153019722201, "grad_norm": 0.476129949092865, "learning_rate": 1.9771883880606866e-05, "loss": 0.0577, "step": 21020 }, { "epoch": 0.32276878213490906, "grad_norm": 0.6394253969192505, "learning_rate": 1.9771314635652822e-05, "loss": 0.0501, "step": 21030 }, { "epoch": 0.322922262297598, "grad_norm": 0.5072411298751831, "learning_rate": 1.9770744689546694e-05, "loss": 0.0575, "step": 21040 }, { "epoch": 0.32307574246028703, "grad_norm": 0.4605277478694916, "learning_rate": 1.9770174042329372e-05, "loss": 0.0526, "step": 21050 }, { "epoch": 0.323229222622976, "grad_norm": 0.6488914489746094, "learning_rate": 1.9769602694041802e-05, "loss": 0.0534, "step": 21060 }, { "epoch": 0.32338270278566494, "grad_norm": 0.4967397153377533, "learning_rate": 1.976903064472499e-05, "loss": 0.0499, "step": 21070 }, { "epoch": 0.3235361829483539, "grad_norm": 0.8433883786201477, "learning_rate": 1.976845789441998e-05, "loss": 0.048, "step": 21080 }, { "epoch": 0.3236896631110429, "grad_norm": 0.7062321901321411, "learning_rate": 1.976788444316787e-05, "loss": 0.0549, "step": 21090 }, { "epoch": 0.32384314327373187, "grad_norm": 0.31965819001197815, "learning_rate": 1.9767310291009804e-05, "loss": 0.0411, "step": 21100 }, { "epoch": 0.3239966234364208, "grad_norm": 0.5055667757987976, "learning_rate": 1.976673543798699e-05, "loss": 0.0531, "step": 21110 }, { "epoch": 0.32415010359910984, "grad_norm": 0.5800830721855164, "learning_rate": 1.9766159884140673e-05, "loss": 0.0523, "step": 21120 }, { "epoch": 0.3243035837617988, "grad_norm": 0.6060787439346313, "learning_rate": 1.976558362951215e-05, "loss": 0.0601, "step": 21130 }, { "epoch": 0.32445706392448775, "grad_norm": 0.5518065690994263, "learning_rate": 1.976500667414278e-05, "loss": 0.0484, "step": 21140 }, { "epoch": 0.3246105440871767, "grad_norm": 0.4557102620601654, "learning_rate": 1.976442901807395e-05, "loss": 0.0504, "step": 21150 }, { "epoch": 0.3247640242498657, "grad_norm": 0.6747861504554749, "learning_rate": 1.976385066134712e-05, "loss": 0.0529, "step": 21160 }, { "epoch": 0.3249175044125547, "grad_norm": 0.5133000016212463, "learning_rate": 1.9763271604003784e-05, "loss": 0.0498, "step": 21170 }, { "epoch": 0.32507098457524364, "grad_norm": 0.5945244431495667, "learning_rate": 1.97626918460855e-05, "loss": 0.059, "step": 21180 }, { "epoch": 0.3252244647379326, "grad_norm": 0.6775190830230713, "learning_rate": 1.9762111387633866e-05, "loss": 0.0536, "step": 21190 }, { "epoch": 0.3253779449006216, "grad_norm": 0.6070829629898071, "learning_rate": 1.976153022869053e-05, "loss": 0.0446, "step": 21200 }, { "epoch": 0.32553142506331056, "grad_norm": 0.69745272397995, "learning_rate": 1.9760948369297197e-05, "loss": 0.0496, "step": 21210 }, { "epoch": 0.3256849052259995, "grad_norm": 0.44137051701545715, "learning_rate": 1.976036580949562e-05, "loss": 0.0471, "step": 21220 }, { "epoch": 0.32583838538868853, "grad_norm": 0.41766157746315, "learning_rate": 1.9759782549327603e-05, "loss": 0.0532, "step": 21230 }, { "epoch": 0.3259918655513775, "grad_norm": 0.6506628394126892, "learning_rate": 1.975919858883499e-05, "loss": 0.047, "step": 21240 }, { "epoch": 0.32614534571406645, "grad_norm": 0.517788827419281, "learning_rate": 1.9758613928059692e-05, "loss": 0.0575, "step": 21250 }, { "epoch": 0.3262988258767554, "grad_norm": 0.6626763939857483, "learning_rate": 1.975802856704366e-05, "loss": 0.0489, "step": 21260 }, { "epoch": 0.3264523060394444, "grad_norm": 0.9006084203720093, "learning_rate": 1.97574425058289e-05, "loss": 0.0538, "step": 21270 }, { "epoch": 0.3266057862021334, "grad_norm": 0.7156775593757629, "learning_rate": 1.9756855744457458e-05, "loss": 0.051, "step": 21280 }, { "epoch": 0.32675926636482233, "grad_norm": 0.5664573907852173, "learning_rate": 1.975626828297144e-05, "loss": 0.0484, "step": 21290 }, { "epoch": 0.32691274652751134, "grad_norm": 0.7255904078483582, "learning_rate": 1.9755680121413005e-05, "loss": 0.0525, "step": 21300 }, { "epoch": 0.3270662266902003, "grad_norm": 0.5580692887306213, "learning_rate": 1.9755091259824353e-05, "loss": 0.0513, "step": 21310 }, { "epoch": 0.32721970685288926, "grad_norm": 0.8317140340805054, "learning_rate": 1.9754501698247742e-05, "loss": 0.061, "step": 21320 }, { "epoch": 0.3273731870155782, "grad_norm": 0.6018806099891663, "learning_rate": 1.9753911436725472e-05, "loss": 0.0611, "step": 21330 }, { "epoch": 0.32752666717826723, "grad_norm": 0.6034223437309265, "learning_rate": 1.97533204752999e-05, "loss": 0.0617, "step": 21340 }, { "epoch": 0.3276801473409562, "grad_norm": 0.503838300704956, "learning_rate": 1.9752728814013433e-05, "loss": 0.0579, "step": 21350 }, { "epoch": 0.32783362750364514, "grad_norm": 0.3818899989128113, "learning_rate": 1.9752136452908522e-05, "loss": 0.0511, "step": 21360 }, { "epoch": 0.3279871076663341, "grad_norm": 0.5495842695236206, "learning_rate": 1.9751543392027678e-05, "loss": 0.0513, "step": 21370 }, { "epoch": 0.3281405878290231, "grad_norm": 0.6194201707839966, "learning_rate": 1.9750949631413455e-05, "loss": 0.0663, "step": 21380 }, { "epoch": 0.32829406799171207, "grad_norm": 0.4493919909000397, "learning_rate": 1.9750355171108455e-05, "loss": 0.0464, "step": 21390 }, { "epoch": 0.328447548154401, "grad_norm": 0.5088135004043579, "learning_rate": 1.974976001115534e-05, "loss": 0.0694, "step": 21400 }, { "epoch": 0.32860102831709004, "grad_norm": 0.47493427991867065, "learning_rate": 1.974916415159681e-05, "loss": 0.0451, "step": 21410 }, { "epoch": 0.328754508479779, "grad_norm": 0.6787830591201782, "learning_rate": 1.9748567592475625e-05, "loss": 0.0521, "step": 21420 }, { "epoch": 0.32890798864246795, "grad_norm": 0.6124855875968933, "learning_rate": 1.9747970333834593e-05, "loss": 0.0655, "step": 21430 }, { "epoch": 0.3290614688051569, "grad_norm": 0.4626416265964508, "learning_rate": 1.974737237571657e-05, "loss": 0.0579, "step": 21440 }, { "epoch": 0.3292149489678459, "grad_norm": 0.3635717034339905, "learning_rate": 1.9746773718164465e-05, "loss": 0.0473, "step": 21450 }, { "epoch": 0.3293684291305349, "grad_norm": 0.48863789439201355, "learning_rate": 1.9746174361221232e-05, "loss": 0.051, "step": 21460 }, { "epoch": 0.32952190929322384, "grad_norm": 0.5625522136688232, "learning_rate": 1.974557430492988e-05, "loss": 0.0515, "step": 21470 }, { "epoch": 0.32967538945591285, "grad_norm": 0.48836201429367065, "learning_rate": 1.9744973549333468e-05, "loss": 0.0602, "step": 21480 }, { "epoch": 0.3298288696186018, "grad_norm": 0.5535566210746765, "learning_rate": 1.97443720944751e-05, "loss": 0.0551, "step": 21490 }, { "epoch": 0.32998234978129076, "grad_norm": 0.5227598547935486, "learning_rate": 1.974376994039794e-05, "loss": 0.0506, "step": 21500 }, { "epoch": 0.3301358299439797, "grad_norm": 0.4731796383857727, "learning_rate": 1.9743167087145193e-05, "loss": 0.0569, "step": 21510 }, { "epoch": 0.33028931010666873, "grad_norm": 0.4179776906967163, "learning_rate": 1.9742563534760114e-05, "loss": 0.0415, "step": 21520 }, { "epoch": 0.3304427902693577, "grad_norm": 0.5096960067749023, "learning_rate": 1.9741959283286018e-05, "loss": 0.049, "step": 21530 }, { "epoch": 0.33059627043204665, "grad_norm": 0.42588508129119873, "learning_rate": 1.974135433276626e-05, "loss": 0.0402, "step": 21540 }, { "epoch": 0.3307497505947356, "grad_norm": 0.5280384421348572, "learning_rate": 1.974074868324425e-05, "loss": 0.0453, "step": 21550 }, { "epoch": 0.3309032307574246, "grad_norm": 0.5126922130584717, "learning_rate": 1.974014233476345e-05, "loss": 0.0565, "step": 21560 }, { "epoch": 0.3310567109201136, "grad_norm": 0.5345509052276611, "learning_rate": 1.973953528736736e-05, "loss": 0.0536, "step": 21570 }, { "epoch": 0.33121019108280253, "grad_norm": 0.48278847336769104, "learning_rate": 1.973892754109955e-05, "loss": 0.0557, "step": 21580 }, { "epoch": 0.33136367124549154, "grad_norm": 0.35123926401138306, "learning_rate": 1.9738319096003625e-05, "loss": 0.0589, "step": 21590 }, { "epoch": 0.3315171514081805, "grad_norm": 0.5929670333862305, "learning_rate": 1.9737709952123245e-05, "loss": 0.0511, "step": 21600 }, { "epoch": 0.33167063157086946, "grad_norm": 0.5853722095489502, "learning_rate": 1.9737100109502117e-05, "loss": 0.0533, "step": 21610 }, { "epoch": 0.3318241117335584, "grad_norm": 0.6268092393875122, "learning_rate": 1.9736489568184002e-05, "loss": 0.0559, "step": 21620 }, { "epoch": 0.3319775918962474, "grad_norm": 0.3996371924877167, "learning_rate": 1.9735878328212716e-05, "loss": 0.0373, "step": 21630 }, { "epoch": 0.3321310720589364, "grad_norm": 0.462505042552948, "learning_rate": 1.9735266389632114e-05, "loss": 0.0539, "step": 21640 }, { "epoch": 0.33228455222162534, "grad_norm": 0.5379521250724792, "learning_rate": 1.9734653752486107e-05, "loss": 0.0526, "step": 21650 }, { "epoch": 0.33243803238431435, "grad_norm": 0.6377729177474976, "learning_rate": 1.9734040416818653e-05, "loss": 0.0512, "step": 21660 }, { "epoch": 0.3325915125470033, "grad_norm": 0.49154967069625854, "learning_rate": 1.973342638267377e-05, "loss": 0.0486, "step": 21670 }, { "epoch": 0.33274499270969227, "grad_norm": 0.35744860768318176, "learning_rate": 1.9732811650095512e-05, "loss": 0.0402, "step": 21680 }, { "epoch": 0.3328984728723812, "grad_norm": 0.5883060693740845, "learning_rate": 1.973219621912799e-05, "loss": 0.0518, "step": 21690 }, { "epoch": 0.33305195303507024, "grad_norm": 0.625545859336853, "learning_rate": 1.973158008981537e-05, "loss": 0.0579, "step": 21700 }, { "epoch": 0.3332054331977592, "grad_norm": 0.5455380082130432, "learning_rate": 1.9730963262201858e-05, "loss": 0.0492, "step": 21710 }, { "epoch": 0.33335891336044815, "grad_norm": 0.6872968673706055, "learning_rate": 1.973034573633172e-05, "loss": 0.0474, "step": 21720 }, { "epoch": 0.3335123935231371, "grad_norm": 0.6314665079116821, "learning_rate": 1.9729727512249262e-05, "loss": 0.0536, "step": 21730 }, { "epoch": 0.3336658736858261, "grad_norm": 0.44385233521461487, "learning_rate": 1.972910858999885e-05, "loss": 0.0509, "step": 21740 }, { "epoch": 0.3338193538485151, "grad_norm": 0.48189419507980347, "learning_rate": 1.9728488969624893e-05, "loss": 0.0511, "step": 21750 }, { "epoch": 0.33397283401120403, "grad_norm": 0.3886387050151825, "learning_rate": 1.9727868651171852e-05, "loss": 0.0504, "step": 21760 }, { "epoch": 0.33412631417389305, "grad_norm": 0.40484562516212463, "learning_rate": 1.9727247634684235e-05, "loss": 0.056, "step": 21770 }, { "epoch": 0.334279794336582, "grad_norm": 0.4038989245891571, "learning_rate": 1.9726625920206616e-05, "loss": 0.0523, "step": 21780 }, { "epoch": 0.33443327449927096, "grad_norm": 0.5538970232009888, "learning_rate": 1.97260035077836e-05, "loss": 0.0533, "step": 21790 }, { "epoch": 0.3345867546619599, "grad_norm": 0.4747280180454254, "learning_rate": 1.972538039745984e-05, "loss": 0.0506, "step": 21800 }, { "epoch": 0.33474023482464893, "grad_norm": 0.5140469074249268, "learning_rate": 1.9724756589280063e-05, "loss": 0.0434, "step": 21810 }, { "epoch": 0.3348937149873379, "grad_norm": 0.5408921241760254, "learning_rate": 1.972413208328902e-05, "loss": 0.0512, "step": 21820 }, { "epoch": 0.33504719515002684, "grad_norm": 0.47714850306510925, "learning_rate": 1.972350687953153e-05, "loss": 0.0497, "step": 21830 }, { "epoch": 0.33520067531271586, "grad_norm": 0.5908259153366089, "learning_rate": 1.9722880978052453e-05, "loss": 0.0519, "step": 21840 }, { "epoch": 0.3353541554754048, "grad_norm": 0.4716876149177551, "learning_rate": 1.97222543788967e-05, "loss": 0.0502, "step": 21850 }, { "epoch": 0.33550763563809377, "grad_norm": 0.4825022518634796, "learning_rate": 1.9721627082109234e-05, "loss": 0.0436, "step": 21860 }, { "epoch": 0.33566111580078273, "grad_norm": 0.43353742361068726, "learning_rate": 1.972099908773507e-05, "loss": 0.0443, "step": 21870 }, { "epoch": 0.33581459596347174, "grad_norm": 0.3959461450576782, "learning_rate": 1.9720370395819265e-05, "loss": 0.0462, "step": 21880 }, { "epoch": 0.3359680761261607, "grad_norm": 0.4961733818054199, "learning_rate": 1.9719741006406936e-05, "loss": 0.046, "step": 21890 }, { "epoch": 0.33612155628884965, "grad_norm": 0.6893739104270935, "learning_rate": 1.9719110919543246e-05, "loss": 0.0592, "step": 21900 }, { "epoch": 0.3362750364515386, "grad_norm": 0.34341099858283997, "learning_rate": 1.9718480135273404e-05, "loss": 0.0486, "step": 21910 }, { "epoch": 0.3364285166142276, "grad_norm": 0.7372750639915466, "learning_rate": 1.9717848653642675e-05, "loss": 0.0535, "step": 21920 }, { "epoch": 0.3365819967769166, "grad_norm": 0.7046477198600769, "learning_rate": 1.971721647469637e-05, "loss": 0.0652, "step": 21930 }, { "epoch": 0.33673547693960554, "grad_norm": 0.4887336492538452, "learning_rate": 1.9716583598479854e-05, "loss": 0.0524, "step": 21940 }, { "epoch": 0.33688895710229455, "grad_norm": 0.5182104110717773, "learning_rate": 1.971595002503854e-05, "loss": 0.0561, "step": 21950 }, { "epoch": 0.3370424372649835, "grad_norm": 0.7876673936843872, "learning_rate": 1.971531575441789e-05, "loss": 0.0491, "step": 21960 }, { "epoch": 0.33719591742767246, "grad_norm": 0.6497624516487122, "learning_rate": 1.9714680786663413e-05, "loss": 0.0484, "step": 21970 }, { "epoch": 0.3373493975903614, "grad_norm": 0.3976857662200928, "learning_rate": 1.9714045121820676e-05, "loss": 0.0591, "step": 21980 }, { "epoch": 0.33750287775305043, "grad_norm": 0.5297741889953613, "learning_rate": 1.9713408759935296e-05, "loss": 0.0462, "step": 21990 }, { "epoch": 0.3376563579157394, "grad_norm": 0.47390812635421753, "learning_rate": 1.9712771701052927e-05, "loss": 0.0412, "step": 22000 }, { "epoch": 0.33780983807842835, "grad_norm": 0.537269115447998, "learning_rate": 1.9712133945219286e-05, "loss": 0.0555, "step": 22010 }, { "epoch": 0.33796331824111736, "grad_norm": 0.4297369718551636, "learning_rate": 1.9711495492480137e-05, "loss": 0.0511, "step": 22020 }, { "epoch": 0.3381167984038063, "grad_norm": 0.6128020882606506, "learning_rate": 1.971085634288129e-05, "loss": 0.0564, "step": 22030 }, { "epoch": 0.3382702785664953, "grad_norm": 0.5522467494010925, "learning_rate": 1.971021649646861e-05, "loss": 0.0624, "step": 22040 }, { "epoch": 0.33842375872918423, "grad_norm": 0.6772356629371643, "learning_rate": 1.970957595328801e-05, "loss": 0.0533, "step": 22050 }, { "epoch": 0.33857723889187324, "grad_norm": 0.6998868584632874, "learning_rate": 1.9708934713385456e-05, "loss": 0.0462, "step": 22060 }, { "epoch": 0.3387307190545622, "grad_norm": 0.43450263142585754, "learning_rate": 1.970829277680695e-05, "loss": 0.0437, "step": 22070 }, { "epoch": 0.33888419921725116, "grad_norm": 0.44709011912345886, "learning_rate": 1.970765014359857e-05, "loss": 0.0465, "step": 22080 }, { "epoch": 0.3390376793799401, "grad_norm": 0.3525497019290924, "learning_rate": 1.970700681380642e-05, "loss": 0.0424, "step": 22090 }, { "epoch": 0.33919115954262913, "grad_norm": 0.5524181723594666, "learning_rate": 1.970636278747666e-05, "loss": 0.055, "step": 22100 }, { "epoch": 0.3393446397053181, "grad_norm": 0.45504453778266907, "learning_rate": 1.9705718064655508e-05, "loss": 0.0436, "step": 22110 }, { "epoch": 0.33949811986800704, "grad_norm": 0.49887794256210327, "learning_rate": 1.9705072645389228e-05, "loss": 0.0537, "step": 22120 }, { "epoch": 0.33965160003069605, "grad_norm": 0.5094966888427734, "learning_rate": 1.970442652972413e-05, "loss": 0.0608, "step": 22130 }, { "epoch": 0.339805080193385, "grad_norm": 0.4599212110042572, "learning_rate": 1.9703779717706576e-05, "loss": 0.0443, "step": 22140 }, { "epoch": 0.33995856035607397, "grad_norm": 0.4217885136604309, "learning_rate": 1.970313220938298e-05, "loss": 0.059, "step": 22150 }, { "epoch": 0.3401120405187629, "grad_norm": 0.4737890064716339, "learning_rate": 1.9702484004799806e-05, "loss": 0.0503, "step": 22160 }, { "epoch": 0.34026552068145194, "grad_norm": 0.39463257789611816, "learning_rate": 1.9701835104003568e-05, "loss": 0.0469, "step": 22170 }, { "epoch": 0.3404190008441409, "grad_norm": 0.4889739751815796, "learning_rate": 1.9701185507040823e-05, "loss": 0.0509, "step": 22180 }, { "epoch": 0.34057248100682985, "grad_norm": 0.4265521466732025, "learning_rate": 1.9700535213958186e-05, "loss": 0.0441, "step": 22190 }, { "epoch": 0.34072596116951887, "grad_norm": 0.5506619215011597, "learning_rate": 1.9699884224802324e-05, "loss": 0.0652, "step": 22200 }, { "epoch": 0.3408794413322078, "grad_norm": 0.4364302158355713, "learning_rate": 1.9699232539619944e-05, "loss": 0.0512, "step": 22210 }, { "epoch": 0.3410329214948968, "grad_norm": 0.45697665214538574, "learning_rate": 1.969858015845781e-05, "loss": 0.0605, "step": 22220 }, { "epoch": 0.34118640165758574, "grad_norm": 0.5813705325126648, "learning_rate": 1.9697927081362736e-05, "loss": 0.0515, "step": 22230 }, { "epoch": 0.34133988182027475, "grad_norm": 0.4584566652774811, "learning_rate": 1.969727330838158e-05, "loss": 0.0503, "step": 22240 }, { "epoch": 0.3414933619829637, "grad_norm": 0.46070224046707153, "learning_rate": 1.9696618839561263e-05, "loss": 0.0611, "step": 22250 }, { "epoch": 0.34164684214565266, "grad_norm": 0.4997563064098358, "learning_rate": 1.9695963674948735e-05, "loss": 0.0427, "step": 22260 }, { "epoch": 0.3418003223083416, "grad_norm": 0.5679104328155518, "learning_rate": 1.969530781459102e-05, "loss": 0.0464, "step": 22270 }, { "epoch": 0.34195380247103063, "grad_norm": 0.530456006526947, "learning_rate": 1.969465125853517e-05, "loss": 0.0489, "step": 22280 }, { "epoch": 0.3421072826337196, "grad_norm": 0.5117681622505188, "learning_rate": 1.9693994006828304e-05, "loss": 0.0427, "step": 22290 }, { "epoch": 0.34226076279640855, "grad_norm": 0.5320906043052673, "learning_rate": 1.9693336059517578e-05, "loss": 0.0521, "step": 22300 }, { "epoch": 0.34241424295909756, "grad_norm": 0.44272276759147644, "learning_rate": 1.969267741665021e-05, "loss": 0.042, "step": 22310 }, { "epoch": 0.3425677231217865, "grad_norm": 0.547259509563446, "learning_rate": 1.9692018078273457e-05, "loss": 0.0593, "step": 22320 }, { "epoch": 0.3427212032844755, "grad_norm": 0.5212626457214355, "learning_rate": 1.969135804443463e-05, "loss": 0.0432, "step": 22330 }, { "epoch": 0.34287468344716443, "grad_norm": 0.5761920213699341, "learning_rate": 1.96906973151811e-05, "loss": 0.0501, "step": 22340 }, { "epoch": 0.34302816360985344, "grad_norm": 0.440202534198761, "learning_rate": 1.9690035890560265e-05, "loss": 0.0494, "step": 22350 }, { "epoch": 0.3431816437725424, "grad_norm": 0.4819585978984833, "learning_rate": 1.96893737706196e-05, "loss": 0.0474, "step": 22360 }, { "epoch": 0.34333512393523136, "grad_norm": 0.9174756407737732, "learning_rate": 1.96887109554066e-05, "loss": 0.0476, "step": 22370 }, { "epoch": 0.34348860409792037, "grad_norm": 0.5967981219291687, "learning_rate": 1.9688047444968837e-05, "loss": 0.0455, "step": 22380 }, { "epoch": 0.3436420842606093, "grad_norm": 0.4231306314468384, "learning_rate": 1.9687383239353924e-05, "loss": 0.0496, "step": 22390 }, { "epoch": 0.3437955644232983, "grad_norm": 0.6368273496627808, "learning_rate": 1.9686718338609517e-05, "loss": 0.056, "step": 22400 }, { "epoch": 0.34394904458598724, "grad_norm": 0.6411020755767822, "learning_rate": 1.9686052742783324e-05, "loss": 0.0499, "step": 22410 }, { "epoch": 0.34410252474867625, "grad_norm": 0.4039333462715149, "learning_rate": 1.968538645192311e-05, "loss": 0.0591, "step": 22420 }, { "epoch": 0.3442560049113652, "grad_norm": 0.583142876625061, "learning_rate": 1.9684719466076684e-05, "loss": 0.0484, "step": 22430 }, { "epoch": 0.34440948507405417, "grad_norm": 0.5382466316223145, "learning_rate": 1.968405178529191e-05, "loss": 0.056, "step": 22440 }, { "epoch": 0.3445629652367431, "grad_norm": 0.48831266164779663, "learning_rate": 1.9683383409616692e-05, "loss": 0.0497, "step": 22450 }, { "epoch": 0.34471644539943214, "grad_norm": 0.4239193797111511, "learning_rate": 1.9682714339098994e-05, "loss": 0.0593, "step": 22460 }, { "epoch": 0.3448699255621211, "grad_norm": 0.5473684668540955, "learning_rate": 1.9682044573786826e-05, "loss": 0.0458, "step": 22470 }, { "epoch": 0.34502340572481005, "grad_norm": 0.6231057047843933, "learning_rate": 1.9681374113728244e-05, "loss": 0.045, "step": 22480 }, { "epoch": 0.34517688588749906, "grad_norm": 0.60584557056427, "learning_rate": 1.968070295897136e-05, "loss": 0.0576, "step": 22490 }, { "epoch": 0.345330366050188, "grad_norm": 0.6619982719421387, "learning_rate": 1.9680031109564338e-05, "loss": 0.0502, "step": 22500 }, { "epoch": 0.345483846212877, "grad_norm": 0.39454224705696106, "learning_rate": 1.9679358565555383e-05, "loss": 0.0342, "step": 22510 }, { "epoch": 0.34563732637556593, "grad_norm": 0.6331418752670288, "learning_rate": 1.9678685326992754e-05, "loss": 0.0487, "step": 22520 }, { "epoch": 0.34579080653825495, "grad_norm": 0.49991342425346375, "learning_rate": 1.9678011393924758e-05, "loss": 0.0533, "step": 22530 }, { "epoch": 0.3459442867009439, "grad_norm": 0.5275688767433167, "learning_rate": 1.9677336766399758e-05, "loss": 0.053, "step": 22540 }, { "epoch": 0.34609776686363286, "grad_norm": 0.7824323773384094, "learning_rate": 1.967666144446616e-05, "loss": 0.0519, "step": 22550 }, { "epoch": 0.3462512470263219, "grad_norm": 0.5626983046531677, "learning_rate": 1.9675985428172422e-05, "loss": 0.0453, "step": 22560 }, { "epoch": 0.34640472718901083, "grad_norm": 0.6077854037284851, "learning_rate": 1.9675308717567058e-05, "loss": 0.0475, "step": 22570 }, { "epoch": 0.3465582073516998, "grad_norm": 0.44776207208633423, "learning_rate": 1.967463131269862e-05, "loss": 0.0426, "step": 22580 }, { "epoch": 0.34671168751438874, "grad_norm": 0.47049376368522644, "learning_rate": 1.967395321361572e-05, "loss": 0.0503, "step": 22590 }, { "epoch": 0.34686516767707776, "grad_norm": 0.8908557295799255, "learning_rate": 1.967327442036701e-05, "loss": 0.0504, "step": 22600 }, { "epoch": 0.3470186478397667, "grad_norm": 0.5515998005867004, "learning_rate": 1.9672594933001203e-05, "loss": 0.0607, "step": 22610 }, { "epoch": 0.34717212800245567, "grad_norm": 0.5673224329948425, "learning_rate": 1.9671914751567058e-05, "loss": 0.0515, "step": 22620 }, { "epoch": 0.34732560816514463, "grad_norm": 0.7329588532447815, "learning_rate": 1.9671233876113373e-05, "loss": 0.0537, "step": 22630 }, { "epoch": 0.34747908832783364, "grad_norm": 0.4509914517402649, "learning_rate": 1.9670552306689017e-05, "loss": 0.0488, "step": 22640 }, { "epoch": 0.3476325684905226, "grad_norm": 0.6909794807434082, "learning_rate": 1.9669870043342887e-05, "loss": 0.0523, "step": 22650 }, { "epoch": 0.34778604865321155, "grad_norm": 0.4931316077709198, "learning_rate": 1.9669187086123944e-05, "loss": 0.0582, "step": 22660 }, { "epoch": 0.34793952881590057, "grad_norm": 0.5844268798828125, "learning_rate": 1.9668503435081195e-05, "loss": 0.0566, "step": 22670 }, { "epoch": 0.3480930089785895, "grad_norm": 0.48825812339782715, "learning_rate": 1.9667819090263697e-05, "loss": 0.0484, "step": 22680 }, { "epoch": 0.3482464891412785, "grad_norm": 0.4101269543170929, "learning_rate": 1.966713405172055e-05, "loss": 0.0616, "step": 22690 }, { "epoch": 0.34839996930396744, "grad_norm": 0.5733222961425781, "learning_rate": 1.9666448319500916e-05, "loss": 0.0398, "step": 22700 }, { "epoch": 0.34855344946665645, "grad_norm": 0.7358404994010925, "learning_rate": 1.9665761893653997e-05, "loss": 0.0512, "step": 22710 }, { "epoch": 0.3487069296293454, "grad_norm": 0.5359091758728027, "learning_rate": 1.966507477422905e-05, "loss": 0.0531, "step": 22720 }, { "epoch": 0.34886040979203436, "grad_norm": 0.5678215026855469, "learning_rate": 1.966438696127538e-05, "loss": 0.06, "step": 22730 }, { "epoch": 0.3490138899547234, "grad_norm": 0.6959419250488281, "learning_rate": 1.966369845484234e-05, "loss": 0.0571, "step": 22740 }, { "epoch": 0.34916737011741233, "grad_norm": 0.8387916684150696, "learning_rate": 1.966300925497934e-05, "loss": 0.0459, "step": 22750 }, { "epoch": 0.3493208502801013, "grad_norm": 0.5017713308334351, "learning_rate": 1.9662319361735827e-05, "loss": 0.039, "step": 22760 }, { "epoch": 0.34947433044279025, "grad_norm": 0.4588607847690582, "learning_rate": 1.966162877516131e-05, "loss": 0.0458, "step": 22770 }, { "epoch": 0.34962781060547926, "grad_norm": 0.5891209840774536, "learning_rate": 1.966093749530534e-05, "loss": 0.0499, "step": 22780 }, { "epoch": 0.3497812907681682, "grad_norm": 0.6025794744491577, "learning_rate": 1.966024552221752e-05, "loss": 0.0502, "step": 22790 }, { "epoch": 0.3499347709308572, "grad_norm": 0.5923450589179993, "learning_rate": 1.9659552855947508e-05, "loss": 0.0589, "step": 22800 }, { "epoch": 0.35008825109354613, "grad_norm": 0.6060760617256165, "learning_rate": 1.9658859496545005e-05, "loss": 0.0409, "step": 22810 }, { "epoch": 0.35024173125623514, "grad_norm": 0.5784064531326294, "learning_rate": 1.9658165444059762e-05, "loss": 0.0558, "step": 22820 }, { "epoch": 0.3503952114189241, "grad_norm": 0.4140236973762512, "learning_rate": 1.965747069854158e-05, "loss": 0.0439, "step": 22830 }, { "epoch": 0.35054869158161306, "grad_norm": 0.4776034355163574, "learning_rate": 1.965677526004032e-05, "loss": 0.0414, "step": 22840 }, { "epoch": 0.35070217174430207, "grad_norm": 0.5588324069976807, "learning_rate": 1.9656079128605872e-05, "loss": 0.0487, "step": 22850 }, { "epoch": 0.35085565190699103, "grad_norm": 0.41752567887306213, "learning_rate": 1.9655382304288196e-05, "loss": 0.0519, "step": 22860 }, { "epoch": 0.35100913206968, "grad_norm": 0.5594543218612671, "learning_rate": 1.9654684787137292e-05, "loss": 0.0515, "step": 22870 }, { "epoch": 0.35116261223236894, "grad_norm": 0.49117568135261536, "learning_rate": 1.965398657720321e-05, "loss": 0.0535, "step": 22880 }, { "epoch": 0.35131609239505796, "grad_norm": 0.566739559173584, "learning_rate": 1.965328767453605e-05, "loss": 0.0533, "step": 22890 }, { "epoch": 0.3514695725577469, "grad_norm": 0.47757548093795776, "learning_rate": 1.9652588079185962e-05, "loss": 0.0525, "step": 22900 }, { "epoch": 0.35162305272043587, "grad_norm": 0.5066211223602295, "learning_rate": 1.965188779120315e-05, "loss": 0.051, "step": 22910 }, { "epoch": 0.3517765328831249, "grad_norm": 0.6065613031387329, "learning_rate": 1.9651186810637863e-05, "loss": 0.0498, "step": 22920 }, { "epoch": 0.35193001304581384, "grad_norm": 0.3990791440010071, "learning_rate": 1.9650485137540397e-05, "loss": 0.0575, "step": 22930 }, { "epoch": 0.3520834932085028, "grad_norm": 0.5112279057502747, "learning_rate": 1.964978277196111e-05, "loss": 0.0544, "step": 22940 }, { "epoch": 0.35223697337119175, "grad_norm": 0.7477778792381287, "learning_rate": 1.964907971395039e-05, "loss": 0.0566, "step": 22950 }, { "epoch": 0.35239045353388077, "grad_norm": 0.6492113471031189, "learning_rate": 1.964837596355869e-05, "loss": 0.0504, "step": 22960 }, { "epoch": 0.3525439336965697, "grad_norm": 0.5431203842163086, "learning_rate": 1.9647671520836513e-05, "loss": 0.0594, "step": 22970 }, { "epoch": 0.3526974138592587, "grad_norm": 0.39352288842201233, "learning_rate": 1.96469663858344e-05, "loss": 0.0488, "step": 22980 }, { "epoch": 0.35285089402194764, "grad_norm": 0.46411439776420593, "learning_rate": 1.964626055860295e-05, "loss": 0.0516, "step": 22990 }, { "epoch": 0.35300437418463665, "grad_norm": 0.4345919191837311, "learning_rate": 1.964555403919282e-05, "loss": 0.0515, "step": 23000 }, { "epoch": 0.3531578543473256, "grad_norm": 0.5660200119018555, "learning_rate": 1.9644846827654695e-05, "loss": 0.0489, "step": 23010 }, { "epoch": 0.35331133451001456, "grad_norm": 0.4058748483657837, "learning_rate": 1.9644138924039326e-05, "loss": 0.0537, "step": 23020 }, { "epoch": 0.3534648146727036, "grad_norm": 0.5852761268615723, "learning_rate": 1.9643430328397514e-05, "loss": 0.0558, "step": 23030 }, { "epoch": 0.35361829483539253, "grad_norm": 0.5761720538139343, "learning_rate": 1.9642721040780098e-05, "loss": 0.0523, "step": 23040 }, { "epoch": 0.3537717749980815, "grad_norm": 0.44351550936698914, "learning_rate": 1.9642011061237974e-05, "loss": 0.0547, "step": 23050 }, { "epoch": 0.35392525516077045, "grad_norm": 0.6654773950576782, "learning_rate": 1.9641300389822097e-05, "loss": 0.0496, "step": 23060 }, { "epoch": 0.35407873532345946, "grad_norm": 0.6520901322364807, "learning_rate": 1.964058902658345e-05, "loss": 0.0584, "step": 23070 }, { "epoch": 0.3542322154861484, "grad_norm": 0.49000734090805054, "learning_rate": 1.9639876971573085e-05, "loss": 0.0567, "step": 23080 }, { "epoch": 0.3543856956488374, "grad_norm": 0.5197575092315674, "learning_rate": 1.9639164224842092e-05, "loss": 0.0712, "step": 23090 }, { "epoch": 0.3545391758115264, "grad_norm": 0.48912298679351807, "learning_rate": 1.963845078644162e-05, "loss": 0.0441, "step": 23100 }, { "epoch": 0.35469265597421534, "grad_norm": 0.4088694453239441, "learning_rate": 1.9637736656422862e-05, "loss": 0.0458, "step": 23110 }, { "epoch": 0.3548461361369043, "grad_norm": 0.5161069631576538, "learning_rate": 1.9637021834837054e-05, "loss": 0.0534, "step": 23120 }, { "epoch": 0.35499961629959326, "grad_norm": 0.6021214723587036, "learning_rate": 1.9636306321735497e-05, "loss": 0.0543, "step": 23130 }, { "epoch": 0.35515309646228227, "grad_norm": 0.4810900390148163, "learning_rate": 1.963559011716953e-05, "loss": 0.0519, "step": 23140 }, { "epoch": 0.3553065766249712, "grad_norm": 0.5988573431968689, "learning_rate": 1.963487322119055e-05, "loss": 0.0531, "step": 23150 }, { "epoch": 0.3554600567876602, "grad_norm": 0.5058398842811584, "learning_rate": 1.9634155633849987e-05, "loss": 0.0644, "step": 23160 }, { "epoch": 0.35561353695034914, "grad_norm": 0.6303020119667053, "learning_rate": 1.963343735519934e-05, "loss": 0.0575, "step": 23170 }, { "epoch": 0.35576701711303815, "grad_norm": 0.4625372886657715, "learning_rate": 1.9632718385290156e-05, "loss": 0.0467, "step": 23180 }, { "epoch": 0.3559204972757271, "grad_norm": 0.42394283413887024, "learning_rate": 1.9631998724174012e-05, "loss": 0.0426, "step": 23190 }, { "epoch": 0.35607397743841607, "grad_norm": 0.7450016140937805, "learning_rate": 1.963127837190256e-05, "loss": 0.0491, "step": 23200 }, { "epoch": 0.3562274576011051, "grad_norm": 0.5268837809562683, "learning_rate": 1.9630557328527482e-05, "loss": 0.0541, "step": 23210 }, { "epoch": 0.35638093776379404, "grad_norm": 0.347356915473938, "learning_rate": 1.9629835594100522e-05, "loss": 0.0541, "step": 23220 }, { "epoch": 0.356534417926483, "grad_norm": 0.6689332127571106, "learning_rate": 1.962911316867347e-05, "loss": 0.0609, "step": 23230 }, { "epoch": 0.35668789808917195, "grad_norm": 0.678989827632904, "learning_rate": 1.9628390052298155e-05, "loss": 0.0566, "step": 23240 }, { "epoch": 0.35684137825186096, "grad_norm": 0.3303671181201935, "learning_rate": 1.9627666245026474e-05, "loss": 0.0413, "step": 23250 }, { "epoch": 0.3569948584145499, "grad_norm": 0.5154855251312256, "learning_rate": 1.9626941746910365e-05, "loss": 0.0485, "step": 23260 }, { "epoch": 0.3571483385772389, "grad_norm": 0.5561647415161133, "learning_rate": 1.9626216558001808e-05, "loss": 0.0439, "step": 23270 }, { "epoch": 0.3573018187399279, "grad_norm": 0.47501111030578613, "learning_rate": 1.9625490678352847e-05, "loss": 0.053, "step": 23280 }, { "epoch": 0.35745529890261685, "grad_norm": 0.4835786819458008, "learning_rate": 1.9624764108015566e-05, "loss": 0.0497, "step": 23290 }, { "epoch": 0.3576087790653058, "grad_norm": 0.5603644847869873, "learning_rate": 1.96240368470421e-05, "loss": 0.0494, "step": 23300 }, { "epoch": 0.35776225922799476, "grad_norm": 0.46110692620277405, "learning_rate": 1.9623308895484635e-05, "loss": 0.0447, "step": 23310 }, { "epoch": 0.3579157393906838, "grad_norm": 0.5510023236274719, "learning_rate": 1.9622580253395408e-05, "loss": 0.0427, "step": 23320 }, { "epoch": 0.35806921955337273, "grad_norm": 0.5411784648895264, "learning_rate": 1.9621850920826697e-05, "loss": 0.0696, "step": 23330 }, { "epoch": 0.3582226997160617, "grad_norm": 0.5900686979293823, "learning_rate": 1.9621120897830844e-05, "loss": 0.052, "step": 23340 }, { "epoch": 0.35837617987875064, "grad_norm": 0.4058760106563568, "learning_rate": 1.9620390184460227e-05, "loss": 0.0505, "step": 23350 }, { "epoch": 0.35852966004143966, "grad_norm": 0.5109518766403198, "learning_rate": 1.9619658780767283e-05, "loss": 0.0438, "step": 23360 }, { "epoch": 0.3586831402041286, "grad_norm": 0.351979523897171, "learning_rate": 1.9618926686804492e-05, "loss": 0.0456, "step": 23370 }, { "epoch": 0.35883662036681757, "grad_norm": 0.5207395553588867, "learning_rate": 1.9618193902624393e-05, "loss": 0.0446, "step": 23380 }, { "epoch": 0.3589901005295066, "grad_norm": 0.4742363691329956, "learning_rate": 1.9617460428279557e-05, "loss": 0.0472, "step": 23390 }, { "epoch": 0.35914358069219554, "grad_norm": 0.35818907618522644, "learning_rate": 1.9616726263822617e-05, "loss": 0.0478, "step": 23400 }, { "epoch": 0.3592970608548845, "grad_norm": 0.7319839000701904, "learning_rate": 1.9615991409306265e-05, "loss": 0.0575, "step": 23410 }, { "epoch": 0.35945054101757345, "grad_norm": 0.6273929476737976, "learning_rate": 1.961525586478322e-05, "loss": 0.0522, "step": 23420 }, { "epoch": 0.35960402118026247, "grad_norm": 0.6631022095680237, "learning_rate": 1.9614519630306268e-05, "loss": 0.0526, "step": 23430 }, { "epoch": 0.3597575013429514, "grad_norm": 0.6804252862930298, "learning_rate": 1.961378270592823e-05, "loss": 0.06, "step": 23440 }, { "epoch": 0.3599109815056404, "grad_norm": 0.5273194909095764, "learning_rate": 1.9613045091701996e-05, "loss": 0.0506, "step": 23450 }, { "epoch": 0.3600644616683294, "grad_norm": 0.46915891766548157, "learning_rate": 1.961230678768049e-05, "loss": 0.0395, "step": 23460 }, { "epoch": 0.36021794183101835, "grad_norm": 0.5913196206092834, "learning_rate": 1.9611567793916687e-05, "loss": 0.0562, "step": 23470 }, { "epoch": 0.3603714219937073, "grad_norm": 0.5686236619949341, "learning_rate": 1.9610828110463615e-05, "loss": 0.0562, "step": 23480 }, { "epoch": 0.36052490215639627, "grad_norm": 0.6907619833946228, "learning_rate": 1.9610087737374356e-05, "loss": 0.044, "step": 23490 }, { "epoch": 0.3606783823190853, "grad_norm": 0.5654131174087524, "learning_rate": 1.9609346674702032e-05, "loss": 0.0505, "step": 23500 }, { "epoch": 0.36083186248177423, "grad_norm": 0.42021897435188293, "learning_rate": 1.9608604922499817e-05, "loss": 0.0446, "step": 23510 }, { "epoch": 0.3609853426444632, "grad_norm": 0.3239342272281647, "learning_rate": 1.9607862480820938e-05, "loss": 0.0489, "step": 23520 }, { "epoch": 0.36113882280715215, "grad_norm": 0.5151088833808899, "learning_rate": 1.9607119349718677e-05, "loss": 0.0521, "step": 23530 }, { "epoch": 0.36129230296984116, "grad_norm": 0.5321199893951416, "learning_rate": 1.9606375529246346e-05, "loss": 0.0471, "step": 23540 }, { "epoch": 0.3614457831325301, "grad_norm": 0.6635783314704895, "learning_rate": 1.9605631019457325e-05, "loss": 0.0414, "step": 23550 }, { "epoch": 0.3615992632952191, "grad_norm": 0.5875768661499023, "learning_rate": 1.960488582040504e-05, "loss": 0.0581, "step": 23560 }, { "epoch": 0.3617527434579081, "grad_norm": 0.4553163945674896, "learning_rate": 1.9604139932142954e-05, "loss": 0.0447, "step": 23570 }, { "epoch": 0.36190622362059705, "grad_norm": 0.6118851900100708, "learning_rate": 1.9603393354724598e-05, "loss": 0.0511, "step": 23580 }, { "epoch": 0.362059703783286, "grad_norm": 0.48711779713630676, "learning_rate": 1.9602646088203543e-05, "loss": 0.0456, "step": 23590 }, { "epoch": 0.36221318394597496, "grad_norm": 0.4315935969352722, "learning_rate": 1.9601898132633407e-05, "loss": 0.053, "step": 23600 }, { "epoch": 0.36236666410866397, "grad_norm": 0.5081061124801636, "learning_rate": 1.960114948806786e-05, "loss": 0.0572, "step": 23610 }, { "epoch": 0.36252014427135293, "grad_norm": 0.7081735134124756, "learning_rate": 1.9600400154560622e-05, "loss": 0.0551, "step": 23620 }, { "epoch": 0.3626736244340419, "grad_norm": 0.7888824343681335, "learning_rate": 1.9599650132165466e-05, "loss": 0.0471, "step": 23630 }, { "epoch": 0.3628271045967309, "grad_norm": 0.33638879656791687, "learning_rate": 1.9598899420936205e-05, "loss": 0.0492, "step": 23640 }, { "epoch": 0.36298058475941986, "grad_norm": 0.5089905858039856, "learning_rate": 1.9598148020926714e-05, "loss": 0.0469, "step": 23650 }, { "epoch": 0.3631340649221088, "grad_norm": 0.6773430109024048, "learning_rate": 1.959739593219091e-05, "loss": 0.0515, "step": 23660 }, { "epoch": 0.36328754508479777, "grad_norm": 0.7150158882141113, "learning_rate": 1.9596643154782748e-05, "loss": 0.0597, "step": 23670 }, { "epoch": 0.3634410252474868, "grad_norm": 0.5473431348800659, "learning_rate": 1.959588968875626e-05, "loss": 0.0431, "step": 23680 }, { "epoch": 0.36359450541017574, "grad_norm": 0.3925130367279053, "learning_rate": 1.9595135534165503e-05, "loss": 0.0566, "step": 23690 }, { "epoch": 0.3637479855728647, "grad_norm": 0.5542489886283875, "learning_rate": 1.9594380691064595e-05, "loss": 0.0653, "step": 23700 }, { "epoch": 0.36390146573555365, "grad_norm": 0.532598078250885, "learning_rate": 1.9593625159507698e-05, "loss": 0.0556, "step": 23710 }, { "epoch": 0.36405494589824267, "grad_norm": 0.48330676555633545, "learning_rate": 1.9592868939549033e-05, "loss": 0.054, "step": 23720 }, { "epoch": 0.3642084260609316, "grad_norm": 0.5304890871047974, "learning_rate": 1.9592112031242852e-05, "loss": 0.0562, "step": 23730 }, { "epoch": 0.3643619062236206, "grad_norm": 0.39718037843704224, "learning_rate": 1.959135443464348e-05, "loss": 0.0506, "step": 23740 }, { "epoch": 0.3645153863863096, "grad_norm": 0.3618604242801666, "learning_rate": 1.959059614980527e-05, "loss": 0.045, "step": 23750 }, { "epoch": 0.36466886654899855, "grad_norm": 0.511479914188385, "learning_rate": 1.9589837176782642e-05, "loss": 0.0361, "step": 23760 }, { "epoch": 0.3648223467116875, "grad_norm": 0.6504064202308655, "learning_rate": 1.9589077515630045e-05, "loss": 0.0555, "step": 23770 }, { "epoch": 0.36497582687437646, "grad_norm": 0.5079190731048584, "learning_rate": 1.9588317166402e-05, "loss": 0.0534, "step": 23780 }, { "epoch": 0.3651293070370655, "grad_norm": 0.6281335949897766, "learning_rate": 1.9587556129153065e-05, "loss": 0.0542, "step": 23790 }, { "epoch": 0.36528278719975443, "grad_norm": 0.672465443611145, "learning_rate": 1.9586794403937846e-05, "loss": 0.0462, "step": 23800 }, { "epoch": 0.3654362673624434, "grad_norm": 0.39089149236679077, "learning_rate": 1.9586031990811003e-05, "loss": 0.0469, "step": 23810 }, { "epoch": 0.3655897475251324, "grad_norm": 0.5704272985458374, "learning_rate": 1.9585268889827243e-05, "loss": 0.0525, "step": 23820 }, { "epoch": 0.36574322768782136, "grad_norm": 0.3445926904678345, "learning_rate": 1.9584505101041326e-05, "loss": 0.046, "step": 23830 }, { "epoch": 0.3658967078505103, "grad_norm": 0.41946062445640564, "learning_rate": 1.9583740624508057e-05, "loss": 0.0372, "step": 23840 }, { "epoch": 0.3660501880131993, "grad_norm": 0.4512186348438263, "learning_rate": 1.9582975460282288e-05, "loss": 0.0531, "step": 23850 }, { "epoch": 0.3662036681758883, "grad_norm": 0.43640637397766113, "learning_rate": 1.9582209608418933e-05, "loss": 0.0576, "step": 23860 }, { "epoch": 0.36635714833857724, "grad_norm": 0.58955317735672, "learning_rate": 1.9581443068972934e-05, "loss": 0.0451, "step": 23870 }, { "epoch": 0.3665106285012662, "grad_norm": 0.4514896869659424, "learning_rate": 1.9580675841999307e-05, "loss": 0.0553, "step": 23880 }, { "epoch": 0.36666410866395516, "grad_norm": 0.5938939452171326, "learning_rate": 1.95799079275531e-05, "loss": 0.0466, "step": 23890 }, { "epoch": 0.36681758882664417, "grad_norm": 0.45433899760246277, "learning_rate": 1.9579139325689417e-05, "loss": 0.0484, "step": 23900 }, { "epoch": 0.3669710689893331, "grad_norm": 0.3631526231765747, "learning_rate": 1.9578370036463406e-05, "loss": 0.0455, "step": 23910 }, { "epoch": 0.3671245491520221, "grad_norm": 0.43058279156684875, "learning_rate": 1.9577600059930275e-05, "loss": 0.0479, "step": 23920 }, { "epoch": 0.3672780293147111, "grad_norm": 0.6349043250083923, "learning_rate": 1.957682939614527e-05, "loss": 0.05, "step": 23930 }, { "epoch": 0.36743150947740005, "grad_norm": 0.5856439471244812, "learning_rate": 1.9576058045163687e-05, "loss": 0.0582, "step": 23940 }, { "epoch": 0.367584989640089, "grad_norm": 0.43985944986343384, "learning_rate": 1.9575286007040887e-05, "loss": 0.0511, "step": 23950 }, { "epoch": 0.36773846980277797, "grad_norm": 0.5420786738395691, "learning_rate": 1.9574513281832255e-05, "loss": 0.0673, "step": 23960 }, { "epoch": 0.367891949965467, "grad_norm": 0.5920679569244385, "learning_rate": 1.9573739869593252e-05, "loss": 0.0461, "step": 23970 }, { "epoch": 0.36804543012815594, "grad_norm": 0.5746629238128662, "learning_rate": 1.957296577037936e-05, "loss": 0.0509, "step": 23980 }, { "epoch": 0.3681989102908449, "grad_norm": 0.8046333193778992, "learning_rate": 1.9572190984246143e-05, "loss": 0.0446, "step": 23990 }, { "epoch": 0.3683523904535339, "grad_norm": 0.3519909083843231, "learning_rate": 1.957141551124918e-05, "loss": 0.0478, "step": 24000 }, { "epoch": 0.36850587061622286, "grad_norm": 0.49573904275894165, "learning_rate": 1.9570639351444126e-05, "loss": 0.0475, "step": 24010 }, { "epoch": 0.3686593507789118, "grad_norm": 0.4759378433227539, "learning_rate": 1.9569862504886673e-05, "loss": 0.0474, "step": 24020 }, { "epoch": 0.3688128309416008, "grad_norm": 0.5464677810668945, "learning_rate": 1.9569084971632563e-05, "loss": 0.0551, "step": 24030 }, { "epoch": 0.3689663111042898, "grad_norm": 0.4822022318840027, "learning_rate": 1.9568306751737594e-05, "loss": 0.0509, "step": 24040 }, { "epoch": 0.36911979126697875, "grad_norm": 0.6575996279716492, "learning_rate": 1.95675278452576e-05, "loss": 0.0429, "step": 24050 }, { "epoch": 0.3692732714296677, "grad_norm": 0.6496216058731079, "learning_rate": 1.9566748252248477e-05, "loss": 0.0462, "step": 24060 }, { "epoch": 0.36942675159235666, "grad_norm": 0.4717913866043091, "learning_rate": 1.9565967972766164e-05, "loss": 0.0529, "step": 24070 }, { "epoch": 0.3695802317550457, "grad_norm": 0.3033473789691925, "learning_rate": 1.9565187006866654e-05, "loss": 0.0436, "step": 24080 }, { "epoch": 0.36973371191773463, "grad_norm": 0.43402099609375, "learning_rate": 1.9564405354605982e-05, "loss": 0.0607, "step": 24090 }, { "epoch": 0.3698871920804236, "grad_norm": 0.18956860899925232, "learning_rate": 1.9563623016040242e-05, "loss": 0.0419, "step": 24100 }, { "epoch": 0.3700406722431126, "grad_norm": 0.5496340990066528, "learning_rate": 1.9562839991225563e-05, "loss": 0.0548, "step": 24110 }, { "epoch": 0.37019415240580156, "grad_norm": 0.6460297703742981, "learning_rate": 1.956205628021814e-05, "loss": 0.0484, "step": 24120 }, { "epoch": 0.3703476325684905, "grad_norm": 0.6473960280418396, "learning_rate": 1.9561271883074205e-05, "loss": 0.0541, "step": 24130 }, { "epoch": 0.37050111273117947, "grad_norm": 0.48035988211631775, "learning_rate": 1.9560486799850043e-05, "loss": 0.0469, "step": 24140 }, { "epoch": 0.3706545928938685, "grad_norm": 0.6936374306678772, "learning_rate": 1.955970103060199e-05, "loss": 0.0513, "step": 24150 }, { "epoch": 0.37080807305655744, "grad_norm": 0.6696738600730896, "learning_rate": 1.955891457538643e-05, "loss": 0.0505, "step": 24160 }, { "epoch": 0.3709615532192464, "grad_norm": 0.7284805774688721, "learning_rate": 1.9558127434259798e-05, "loss": 0.0666, "step": 24170 }, { "epoch": 0.3711150333819354, "grad_norm": 0.5094659924507141, "learning_rate": 1.955733960727857e-05, "loss": 0.0478, "step": 24180 }, { "epoch": 0.37126851354462437, "grad_norm": 0.47582632303237915, "learning_rate": 1.9556551094499286e-05, "loss": 0.0567, "step": 24190 }, { "epoch": 0.3714219937073133, "grad_norm": 0.4952121078968048, "learning_rate": 1.955576189597852e-05, "loss": 0.0521, "step": 24200 }, { "epoch": 0.3715754738700023, "grad_norm": 0.5120697617530823, "learning_rate": 1.95549720117729e-05, "loss": 0.0443, "step": 24210 }, { "epoch": 0.3717289540326913, "grad_norm": 0.5980885624885559, "learning_rate": 1.955418144193911e-05, "loss": 0.0571, "step": 24220 }, { "epoch": 0.37188243419538025, "grad_norm": 0.46724656224250793, "learning_rate": 1.955339018653388e-05, "loss": 0.059, "step": 24230 }, { "epoch": 0.3720359143580692, "grad_norm": 0.559043288230896, "learning_rate": 1.9552598245613982e-05, "loss": 0.0438, "step": 24240 }, { "epoch": 0.37218939452075817, "grad_norm": 0.47373226284980774, "learning_rate": 1.9551805619236252e-05, "loss": 0.0483, "step": 24250 }, { "epoch": 0.3723428746834472, "grad_norm": 0.6016732454299927, "learning_rate": 1.955101230745755e-05, "loss": 0.0463, "step": 24260 }, { "epoch": 0.37249635484613614, "grad_norm": 0.42813098430633545, "learning_rate": 1.9550218310334814e-05, "loss": 0.0389, "step": 24270 }, { "epoch": 0.3726498350088251, "grad_norm": 0.4000905454158783, "learning_rate": 1.9549423627925015e-05, "loss": 0.0565, "step": 24280 }, { "epoch": 0.3728033151715141, "grad_norm": 0.38102856278419495, "learning_rate": 1.9548628260285176e-05, "loss": 0.0496, "step": 24290 }, { "epoch": 0.37295679533420306, "grad_norm": 0.5344043970108032, "learning_rate": 1.9547832207472366e-05, "loss": 0.0359, "step": 24300 }, { "epoch": 0.373110275496892, "grad_norm": 0.4971364140510559, "learning_rate": 1.9547035469543712e-05, "loss": 0.0554, "step": 24310 }, { "epoch": 0.373263755659581, "grad_norm": 0.36862799525260925, "learning_rate": 1.954623804655638e-05, "loss": 0.0569, "step": 24320 }, { "epoch": 0.37341723582227, "grad_norm": 0.5256595015525818, "learning_rate": 1.9545439938567598e-05, "loss": 0.0473, "step": 24330 }, { "epoch": 0.37357071598495895, "grad_norm": 0.44157686829566956, "learning_rate": 1.9544641145634625e-05, "loss": 0.0508, "step": 24340 }, { "epoch": 0.3737241961476479, "grad_norm": 0.4436924457550049, "learning_rate": 1.954384166781479e-05, "loss": 0.0592, "step": 24350 }, { "epoch": 0.3738776763103369, "grad_norm": 0.41807445883750916, "learning_rate": 1.9543041505165447e-05, "loss": 0.0442, "step": 24360 }, { "epoch": 0.37403115647302587, "grad_norm": 0.3642350435256958, "learning_rate": 1.9542240657744025e-05, "loss": 0.0659, "step": 24370 }, { "epoch": 0.37418463663571483, "grad_norm": 0.3955611288547516, "learning_rate": 1.9541439125607985e-05, "loss": 0.0455, "step": 24380 }, { "epoch": 0.3743381167984038, "grad_norm": 0.3574214279651642, "learning_rate": 1.954063690881484e-05, "loss": 0.0514, "step": 24390 }, { "epoch": 0.3744915969610928, "grad_norm": 0.5076133608818054, "learning_rate": 1.9539834007422155e-05, "loss": 0.0509, "step": 24400 }, { "epoch": 0.37464507712378176, "grad_norm": 0.35475075244903564, "learning_rate": 1.9539030421487548e-05, "loss": 0.055, "step": 24410 }, { "epoch": 0.3747985572864707, "grad_norm": 0.4181745946407318, "learning_rate": 1.9538226151068675e-05, "loss": 0.0588, "step": 24420 }, { "epoch": 0.37495203744915967, "grad_norm": 0.9636420607566833, "learning_rate": 1.953742119622325e-05, "loss": 0.0506, "step": 24430 }, { "epoch": 0.3751055176118487, "grad_norm": 0.5684666037559509, "learning_rate": 1.9536615557009032e-05, "loss": 0.0488, "step": 24440 }, { "epoch": 0.37525899777453764, "grad_norm": 0.5945050120353699, "learning_rate": 1.9535809233483834e-05, "loss": 0.0534, "step": 24450 }, { "epoch": 0.3754124779372266, "grad_norm": 0.5939933061599731, "learning_rate": 1.953500222570551e-05, "loss": 0.0411, "step": 24460 }, { "epoch": 0.3755659580999156, "grad_norm": 0.43570366501808167, "learning_rate": 1.953419453373197e-05, "loss": 0.0511, "step": 24470 }, { "epoch": 0.37571943826260457, "grad_norm": 0.46178746223449707, "learning_rate": 1.9533386157621176e-05, "loss": 0.0493, "step": 24480 }, { "epoch": 0.3758729184252935, "grad_norm": 0.4142175316810608, "learning_rate": 1.9532577097431124e-05, "loss": 0.0458, "step": 24490 }, { "epoch": 0.3760263985879825, "grad_norm": 0.7205913662910461, "learning_rate": 1.9531767353219876e-05, "loss": 0.0574, "step": 24500 }, { "epoch": 0.3761798787506715, "grad_norm": 0.5299874544143677, "learning_rate": 1.953095692504553e-05, "loss": 0.0494, "step": 24510 }, { "epoch": 0.37633335891336045, "grad_norm": 0.47634318470954895, "learning_rate": 1.953014581296625e-05, "loss": 0.0476, "step": 24520 }, { "epoch": 0.3764868390760494, "grad_norm": 0.6495774984359741, "learning_rate": 1.952933401704023e-05, "loss": 0.0385, "step": 24530 }, { "epoch": 0.3766403192387384, "grad_norm": 0.6930984854698181, "learning_rate": 1.9528521537325723e-05, "loss": 0.045, "step": 24540 }, { "epoch": 0.3767937994014274, "grad_norm": 0.7579116225242615, "learning_rate": 1.9527708373881025e-05, "loss": 0.0501, "step": 24550 }, { "epoch": 0.37694727956411633, "grad_norm": 0.49391651153564453, "learning_rate": 1.9526894526764496e-05, "loss": 0.04, "step": 24560 }, { "epoch": 0.3771007597268053, "grad_norm": 0.5695183873176575, "learning_rate": 1.9526079996034525e-05, "loss": 0.0562, "step": 24570 }, { "epoch": 0.3772542398894943, "grad_norm": 0.44123968482017517, "learning_rate": 1.9525264781749566e-05, "loss": 0.0547, "step": 24580 }, { "epoch": 0.37740772005218326, "grad_norm": 0.7248222827911377, "learning_rate": 1.9524448883968107e-05, "loss": 0.0427, "step": 24590 }, { "epoch": 0.3775612002148722, "grad_norm": 0.5475986003875732, "learning_rate": 1.9523632302748706e-05, "loss": 0.0509, "step": 24600 }, { "epoch": 0.3777146803775612, "grad_norm": 0.3346813917160034, "learning_rate": 1.952281503814995e-05, "loss": 0.0509, "step": 24610 }, { "epoch": 0.3778681605402502, "grad_norm": 0.5422468781471252, "learning_rate": 1.9521997090230482e-05, "loss": 0.0595, "step": 24620 }, { "epoch": 0.37802164070293914, "grad_norm": 0.8979112505912781, "learning_rate": 1.9521178459048996e-05, "loss": 0.0556, "step": 24630 }, { "epoch": 0.3781751208656281, "grad_norm": 0.4659560024738312, "learning_rate": 1.9520359144664237e-05, "loss": 0.0461, "step": 24640 }, { "epoch": 0.3783286010283171, "grad_norm": 0.46097150444984436, "learning_rate": 1.9519539147134995e-05, "loss": 0.0566, "step": 24650 }, { "epoch": 0.37848208119100607, "grad_norm": 0.4464870095252991, "learning_rate": 1.9518718466520105e-05, "loss": 0.0571, "step": 24660 }, { "epoch": 0.378635561353695, "grad_norm": 0.5327655673027039, "learning_rate": 1.951789710287846e-05, "loss": 0.0553, "step": 24670 }, { "epoch": 0.378789041516384, "grad_norm": 0.47714513540267944, "learning_rate": 1.9517075056269e-05, "loss": 0.0377, "step": 24680 }, { "epoch": 0.378942521679073, "grad_norm": 0.29803332686424255, "learning_rate": 1.9516252326750705e-05, "loss": 0.0387, "step": 24690 }, { "epoch": 0.37909600184176195, "grad_norm": 0.4870759844779968, "learning_rate": 1.9515428914382617e-05, "loss": 0.0483, "step": 24700 }, { "epoch": 0.3792494820044509, "grad_norm": 0.4793190360069275, "learning_rate": 1.9514604819223818e-05, "loss": 0.0492, "step": 24710 }, { "epoch": 0.3794029621671399, "grad_norm": 0.7104774117469788, "learning_rate": 1.9513780041333444e-05, "loss": 0.0465, "step": 24720 }, { "epoch": 0.3795564423298289, "grad_norm": 0.43449607491493225, "learning_rate": 1.9512954580770677e-05, "loss": 0.0419, "step": 24730 }, { "epoch": 0.37970992249251784, "grad_norm": 0.5637012124061584, "learning_rate": 1.951212843759475e-05, "loss": 0.0461, "step": 24740 }, { "epoch": 0.3798634026552068, "grad_norm": 0.7462929487228394, "learning_rate": 1.9511301611864942e-05, "loss": 0.057, "step": 24750 }, { "epoch": 0.3800168828178958, "grad_norm": 0.4119717478752136, "learning_rate": 1.951047410364058e-05, "loss": 0.0507, "step": 24760 }, { "epoch": 0.38017036298058476, "grad_norm": 0.6267496943473816, "learning_rate": 1.950964591298105e-05, "loss": 0.0488, "step": 24770 }, { "epoch": 0.3803238431432737, "grad_norm": 0.43495887517929077, "learning_rate": 1.9508817039945774e-05, "loss": 0.0492, "step": 24780 }, { "epoch": 0.3804773233059627, "grad_norm": 0.4314129650592804, "learning_rate": 1.9507987484594232e-05, "loss": 0.0502, "step": 24790 }, { "epoch": 0.3806308034686517, "grad_norm": 0.5047745108604431, "learning_rate": 1.9507157246985945e-05, "loss": 0.0515, "step": 24800 }, { "epoch": 0.38078428363134065, "grad_norm": 0.4687258303165436, "learning_rate": 1.9506326327180494e-05, "loss": 0.0366, "step": 24810 }, { "epoch": 0.3809377637940296, "grad_norm": 0.47080934047698975, "learning_rate": 1.9505494725237502e-05, "loss": 0.0628, "step": 24820 }, { "epoch": 0.3810912439567186, "grad_norm": 0.41157767176628113, "learning_rate": 1.9504662441216636e-05, "loss": 0.0494, "step": 24830 }, { "epoch": 0.3812447241194076, "grad_norm": 0.38662049174308777, "learning_rate": 1.950382947517762e-05, "loss": 0.046, "step": 24840 }, { "epoch": 0.38139820428209653, "grad_norm": 0.42895710468292236, "learning_rate": 1.9502995827180225e-05, "loss": 0.0489, "step": 24850 }, { "epoch": 0.3815516844447855, "grad_norm": 0.5802217125892639, "learning_rate": 1.9502161497284276e-05, "loss": 0.0512, "step": 24860 }, { "epoch": 0.3817051646074745, "grad_norm": 0.5011193156242371, "learning_rate": 1.9501326485549633e-05, "loss": 0.0463, "step": 24870 }, { "epoch": 0.38185864477016346, "grad_norm": 0.36708465218544006, "learning_rate": 1.9500490792036216e-05, "loss": 0.0529, "step": 24880 }, { "epoch": 0.3820121249328524, "grad_norm": 0.6426084041595459, "learning_rate": 1.9499654416803988e-05, "loss": 0.0512, "step": 24890 }, { "epoch": 0.3821656050955414, "grad_norm": 0.7111472487449646, "learning_rate": 1.9498817359912973e-05, "loss": 0.0643, "step": 24900 }, { "epoch": 0.3823190852582304, "grad_norm": 0.4197068512439728, "learning_rate": 1.9497979621423226e-05, "loss": 0.044, "step": 24910 }, { "epoch": 0.38247256542091934, "grad_norm": 0.37213218212127686, "learning_rate": 1.9497141201394865e-05, "loss": 0.0536, "step": 24920 }, { "epoch": 0.3826260455836083, "grad_norm": 0.5336489081382751, "learning_rate": 1.9496302099888048e-05, "loss": 0.0508, "step": 24930 }, { "epoch": 0.3827795257462973, "grad_norm": 0.6571310758590698, "learning_rate": 1.949546231696299e-05, "loss": 0.0529, "step": 24940 }, { "epoch": 0.38293300590898627, "grad_norm": 0.815001904964447, "learning_rate": 1.949462185267995e-05, "loss": 0.0473, "step": 24950 }, { "epoch": 0.3830864860716752, "grad_norm": 0.5224036574363708, "learning_rate": 1.9493780707099233e-05, "loss": 0.0459, "step": 24960 }, { "epoch": 0.3832399662343642, "grad_norm": 0.4555412828922272, "learning_rate": 1.9492938880281194e-05, "loss": 0.0491, "step": 24970 }, { "epoch": 0.3833934463970532, "grad_norm": 0.5929392576217651, "learning_rate": 1.949209637228625e-05, "loss": 0.0406, "step": 24980 }, { "epoch": 0.38354692655974215, "grad_norm": 0.5536690354347229, "learning_rate": 1.9491253183174848e-05, "loss": 0.0554, "step": 24990 }, { "epoch": 0.3837004067224311, "grad_norm": 0.47814005613327026, "learning_rate": 1.9490409313007494e-05, "loss": 0.0389, "step": 25000 }, { "epoch": 0.3838538868851201, "grad_norm": 0.5829505920410156, "learning_rate": 1.948956476184474e-05, "loss": 0.0617, "step": 25010 }, { "epoch": 0.3840073670478091, "grad_norm": 0.3963032066822052, "learning_rate": 1.948871952974719e-05, "loss": 0.0528, "step": 25020 }, { "epoch": 0.38416084721049804, "grad_norm": 0.5363067388534546, "learning_rate": 1.9487873616775487e-05, "loss": 0.0585, "step": 25030 }, { "epoch": 0.384314327373187, "grad_norm": 0.8469104766845703, "learning_rate": 1.9487027022990343e-05, "loss": 0.0661, "step": 25040 }, { "epoch": 0.384467807535876, "grad_norm": 0.4669818878173828, "learning_rate": 1.9486179748452497e-05, "loss": 0.044, "step": 25050 }, { "epoch": 0.38462128769856496, "grad_norm": 0.7222546935081482, "learning_rate": 1.9485331793222753e-05, "loss": 0.0509, "step": 25060 }, { "epoch": 0.3847747678612539, "grad_norm": 0.4258444011211395, "learning_rate": 1.948448315736195e-05, "loss": 0.0602, "step": 25070 }, { "epoch": 0.38492824802394293, "grad_norm": 0.660617470741272, "learning_rate": 1.9483633840930984e-05, "loss": 0.0492, "step": 25080 }, { "epoch": 0.3850817281866319, "grad_norm": 0.40105053782463074, "learning_rate": 1.94827838439908e-05, "loss": 0.0483, "step": 25090 }, { "epoch": 0.38523520834932085, "grad_norm": 0.29934969544410706, "learning_rate": 1.9481933166602396e-05, "loss": 0.0424, "step": 25100 }, { "epoch": 0.3853886885120098, "grad_norm": 0.4079023003578186, "learning_rate": 1.9481081808826805e-05, "loss": 0.0401, "step": 25110 }, { "epoch": 0.3855421686746988, "grad_norm": 0.4647606611251831, "learning_rate": 1.9480229770725124e-05, "loss": 0.0484, "step": 25120 }, { "epoch": 0.3856956488373878, "grad_norm": 0.5474405884742737, "learning_rate": 1.9479377052358487e-05, "loss": 0.0615, "step": 25130 }, { "epoch": 0.38584912900007673, "grad_norm": 0.5057555437088013, "learning_rate": 1.947852365378808e-05, "loss": 0.0382, "step": 25140 }, { "epoch": 0.3860026091627657, "grad_norm": 0.4496133625507355, "learning_rate": 1.9477669575075147e-05, "loss": 0.0455, "step": 25150 }, { "epoch": 0.3861560893254547, "grad_norm": 0.5936809182167053, "learning_rate": 1.947681481628097e-05, "loss": 0.0436, "step": 25160 }, { "epoch": 0.38630956948814366, "grad_norm": 0.8184897303581238, "learning_rate": 1.9475959377466876e-05, "loss": 0.0445, "step": 25170 }, { "epoch": 0.3864630496508326, "grad_norm": 0.527947187423706, "learning_rate": 1.947510325869426e-05, "loss": 0.044, "step": 25180 }, { "epoch": 0.3866165298135216, "grad_norm": 0.5049175024032593, "learning_rate": 1.9474246460024552e-05, "loss": 0.051, "step": 25190 }, { "epoch": 0.3867700099762106, "grad_norm": 0.5665743350982666, "learning_rate": 1.947338898151922e-05, "loss": 0.0543, "step": 25200 }, { "epoch": 0.38692349013889954, "grad_norm": 0.5935093760490417, "learning_rate": 1.947253082323981e-05, "loss": 0.047, "step": 25210 }, { "epoch": 0.3870769703015885, "grad_norm": 0.5884120464324951, "learning_rate": 1.947167198524789e-05, "loss": 0.0499, "step": 25220 }, { "epoch": 0.3872304504642775, "grad_norm": 0.4948827624320984, "learning_rate": 1.947081246760509e-05, "loss": 0.0568, "step": 25230 }, { "epoch": 0.38738393062696647, "grad_norm": 0.42407646775245667, "learning_rate": 1.9469952270373083e-05, "loss": 0.0454, "step": 25240 }, { "epoch": 0.3875374107896554, "grad_norm": 0.3901045024394989, "learning_rate": 1.9469091393613597e-05, "loss": 0.0474, "step": 25250 }, { "epoch": 0.38769089095234444, "grad_norm": 0.6414143443107605, "learning_rate": 1.9468229837388407e-05, "loss": 0.0637, "step": 25260 }, { "epoch": 0.3878443711150334, "grad_norm": 0.48661068081855774, "learning_rate": 1.9467367601759327e-05, "loss": 0.0522, "step": 25270 }, { "epoch": 0.38799785127772235, "grad_norm": 0.5268753170967102, "learning_rate": 1.9466504686788234e-05, "loss": 0.0574, "step": 25280 }, { "epoch": 0.3881513314404113, "grad_norm": 0.31805336475372314, "learning_rate": 1.946564109253705e-05, "loss": 0.0546, "step": 25290 }, { "epoch": 0.3883048116031003, "grad_norm": 0.4120784401893616, "learning_rate": 1.9464776819067732e-05, "loss": 0.0475, "step": 25300 }, { "epoch": 0.3884582917657893, "grad_norm": 0.41205549240112305, "learning_rate": 1.946391186644231e-05, "loss": 0.0494, "step": 25310 }, { "epoch": 0.38861177192847823, "grad_norm": 0.48270514607429504, "learning_rate": 1.946304623472284e-05, "loss": 0.0489, "step": 25320 }, { "epoch": 0.3887652520911672, "grad_norm": 0.4510549306869507, "learning_rate": 1.9462179923971444e-05, "loss": 0.0446, "step": 25330 }, { "epoch": 0.3889187322538562, "grad_norm": 0.4142148494720459, "learning_rate": 1.946131293425028e-05, "loss": 0.0464, "step": 25340 }, { "epoch": 0.38907221241654516, "grad_norm": 0.3433108627796173, "learning_rate": 1.946044526562156e-05, "loss": 0.0403, "step": 25350 }, { "epoch": 0.3892256925792341, "grad_norm": 0.6291942000389099, "learning_rate": 1.945957691814755e-05, "loss": 0.0551, "step": 25360 }, { "epoch": 0.38937917274192313, "grad_norm": 0.46018505096435547, "learning_rate": 1.9458707891890548e-05, "loss": 0.0477, "step": 25370 }, { "epoch": 0.3895326529046121, "grad_norm": 0.6320154666900635, "learning_rate": 1.9457838186912927e-05, "loss": 0.0455, "step": 25380 }, { "epoch": 0.38968613306730104, "grad_norm": 0.47007548809051514, "learning_rate": 1.9456967803277082e-05, "loss": 0.0462, "step": 25390 }, { "epoch": 0.38983961322999, "grad_norm": 0.5229553580284119, "learning_rate": 1.945609674104547e-05, "loss": 0.0496, "step": 25400 }, { "epoch": 0.389993093392679, "grad_norm": 0.4633926749229431, "learning_rate": 1.94552250002806e-05, "loss": 0.046, "step": 25410 }, { "epoch": 0.39014657355536797, "grad_norm": 0.7111127972602844, "learning_rate": 1.9454352581045022e-05, "loss": 0.0595, "step": 25420 }, { "epoch": 0.3903000537180569, "grad_norm": 0.7359415292739868, "learning_rate": 1.9453479483401336e-05, "loss": 0.0429, "step": 25430 }, { "epoch": 0.39045353388074594, "grad_norm": 0.3209552466869354, "learning_rate": 1.9452605707412195e-05, "loss": 0.0536, "step": 25440 }, { "epoch": 0.3906070140434349, "grad_norm": 0.42724117636680603, "learning_rate": 1.9451731253140296e-05, "loss": 0.0499, "step": 25450 }, { "epoch": 0.39076049420612385, "grad_norm": 0.5141250491142273, "learning_rate": 1.9450856120648388e-05, "loss": 0.0506, "step": 25460 }, { "epoch": 0.3909139743688128, "grad_norm": 0.5405067205429077, "learning_rate": 1.9449980309999267e-05, "loss": 0.0424, "step": 25470 }, { "epoch": 0.3910674545315018, "grad_norm": 0.5611537098884583, "learning_rate": 1.9449103821255778e-05, "loss": 0.0379, "step": 25480 }, { "epoch": 0.3912209346941908, "grad_norm": 0.5749300122261047, "learning_rate": 1.944822665448081e-05, "loss": 0.0496, "step": 25490 }, { "epoch": 0.39137441485687974, "grad_norm": 0.44082263112068176, "learning_rate": 1.944734880973731e-05, "loss": 0.0512, "step": 25500 }, { "epoch": 0.3915278950195687, "grad_norm": 0.5325303077697754, "learning_rate": 1.9446470287088264e-05, "loss": 0.0371, "step": 25510 }, { "epoch": 0.3916813751822577, "grad_norm": 0.4326745271682739, "learning_rate": 1.944559108659672e-05, "loss": 0.0515, "step": 25520 }, { "epoch": 0.39183485534494666, "grad_norm": 0.44063642621040344, "learning_rate": 1.9444711208325758e-05, "loss": 0.0483, "step": 25530 }, { "epoch": 0.3919883355076356, "grad_norm": 0.3958291709423065, "learning_rate": 1.9443830652338518e-05, "loss": 0.0443, "step": 25540 }, { "epoch": 0.39214181567032463, "grad_norm": 0.36382007598876953, "learning_rate": 1.9442949418698185e-05, "loss": 0.0441, "step": 25550 }, { "epoch": 0.3922952958330136, "grad_norm": 0.4487064778804779, "learning_rate": 1.9442067507467994e-05, "loss": 0.0458, "step": 25560 }, { "epoch": 0.39244877599570255, "grad_norm": 0.40073445439338684, "learning_rate": 1.9441184918711226e-05, "loss": 0.0446, "step": 25570 }, { "epoch": 0.3926022561583915, "grad_norm": 0.4234387278556824, "learning_rate": 1.9440301652491208e-05, "loss": 0.0429, "step": 25580 }, { "epoch": 0.3927557363210805, "grad_norm": 0.5361803770065308, "learning_rate": 1.9439417708871332e-05, "loss": 0.0496, "step": 25590 }, { "epoch": 0.3929092164837695, "grad_norm": 0.49671387672424316, "learning_rate": 1.9438533087915014e-05, "loss": 0.0473, "step": 25600 }, { "epoch": 0.39306269664645843, "grad_norm": 0.5904961228370667, "learning_rate": 1.9437647789685737e-05, "loss": 0.0591, "step": 25610 }, { "epoch": 0.39321617680914744, "grad_norm": 0.4570513963699341, "learning_rate": 1.9436761814247026e-05, "loss": 0.0468, "step": 25620 }, { "epoch": 0.3933696569718364, "grad_norm": 0.4987599551677704, "learning_rate": 1.9435875161662448e-05, "loss": 0.0387, "step": 25630 }, { "epoch": 0.39352313713452536, "grad_norm": 0.5384908318519592, "learning_rate": 1.943498783199564e-05, "loss": 0.0567, "step": 25640 }, { "epoch": 0.3936766172972143, "grad_norm": 0.49574360251426697, "learning_rate": 1.9434099825310264e-05, "loss": 0.0418, "step": 25650 }, { "epoch": 0.3938300974599033, "grad_norm": 0.336246132850647, "learning_rate": 1.943321114167004e-05, "loss": 0.0411, "step": 25660 }, { "epoch": 0.3939835776225923, "grad_norm": 0.5948551297187805, "learning_rate": 1.9432321781138742e-05, "loss": 0.0573, "step": 25670 }, { "epoch": 0.39413705778528124, "grad_norm": 0.507610559463501, "learning_rate": 1.943143174378018e-05, "loss": 0.062, "step": 25680 }, { "epoch": 0.3942905379479702, "grad_norm": 0.3388473689556122, "learning_rate": 1.9430541029658227e-05, "loss": 0.0631, "step": 25690 }, { "epoch": 0.3944440181106592, "grad_norm": 0.38083386421203613, "learning_rate": 1.9429649638836787e-05, "loss": 0.0471, "step": 25700 }, { "epoch": 0.39459749827334817, "grad_norm": 0.5394788384437561, "learning_rate": 1.9428757571379836e-05, "loss": 0.0455, "step": 25710 }, { "epoch": 0.3947509784360371, "grad_norm": 0.5191327929496765, "learning_rate": 1.9427864827351373e-05, "loss": 0.047, "step": 25720 }, { "epoch": 0.39490445859872614, "grad_norm": 0.511528491973877, "learning_rate": 1.9426971406815464e-05, "loss": 0.0374, "step": 25730 }, { "epoch": 0.3950579387614151, "grad_norm": 0.6904944181442261, "learning_rate": 1.9426077309836216e-05, "loss": 0.048, "step": 25740 }, { "epoch": 0.39521141892410405, "grad_norm": 0.5488818883895874, "learning_rate": 1.9425182536477793e-05, "loss": 0.0382, "step": 25750 }, { "epoch": 0.395364899086793, "grad_norm": 0.4556718170642853, "learning_rate": 1.9424287086804384e-05, "loss": 0.044, "step": 25760 }, { "epoch": 0.395518379249482, "grad_norm": 0.4508383274078369, "learning_rate": 1.942339096088026e-05, "loss": 0.0382, "step": 25770 }, { "epoch": 0.395671859412171, "grad_norm": 0.5836442112922668, "learning_rate": 1.9422494158769715e-05, "loss": 0.0523, "step": 25780 }, { "epoch": 0.39582533957485994, "grad_norm": 0.3560740649700165, "learning_rate": 1.9421596680537105e-05, "loss": 0.0533, "step": 25790 }, { "epoch": 0.39597881973754895, "grad_norm": 0.5412015318870544, "learning_rate": 1.942069852624682e-05, "loss": 0.0539, "step": 25800 }, { "epoch": 0.3961322999002379, "grad_norm": 0.4342454969882965, "learning_rate": 1.941979969596332e-05, "loss": 0.051, "step": 25810 }, { "epoch": 0.39628578006292686, "grad_norm": 0.5803071856498718, "learning_rate": 1.9418900189751096e-05, "loss": 0.0762, "step": 25820 }, { "epoch": 0.3964392602256158, "grad_norm": 0.4806313216686249, "learning_rate": 1.941800000767469e-05, "loss": 0.0617, "step": 25830 }, { "epoch": 0.39659274038830483, "grad_norm": 0.44289320707321167, "learning_rate": 1.9417099149798703e-05, "loss": 0.0427, "step": 25840 }, { "epoch": 0.3967462205509938, "grad_norm": 0.5378814935684204, "learning_rate": 1.941619761618777e-05, "loss": 0.0435, "step": 25850 }, { "epoch": 0.39689970071368275, "grad_norm": 0.5536177158355713, "learning_rate": 1.941529540690658e-05, "loss": 0.0609, "step": 25860 }, { "epoch": 0.3970531808763717, "grad_norm": 0.8525741696357727, "learning_rate": 1.9414392522019886e-05, "loss": 0.0621, "step": 25870 }, { "epoch": 0.3972066610390607, "grad_norm": 0.5515909790992737, "learning_rate": 1.9413488961592463e-05, "loss": 0.0427, "step": 25880 }, { "epoch": 0.3973601412017497, "grad_norm": 0.6931993961334229, "learning_rate": 1.941258472568915e-05, "loss": 0.0551, "step": 25890 }, { "epoch": 0.39751362136443863, "grad_norm": 0.3504290282726288, "learning_rate": 1.941167981437483e-05, "loss": 0.0571, "step": 25900 }, { "epoch": 0.39766710152712764, "grad_norm": 0.5606536269187927, "learning_rate": 1.9410774227714436e-05, "loss": 0.0473, "step": 25910 }, { "epoch": 0.3978205816898166, "grad_norm": 0.5295652747154236, "learning_rate": 1.9409867965772958e-05, "loss": 0.0389, "step": 25920 }, { "epoch": 0.39797406185250556, "grad_norm": 0.6083959341049194, "learning_rate": 1.9408961028615412e-05, "loss": 0.0399, "step": 25930 }, { "epoch": 0.3981275420151945, "grad_norm": 0.49848583340644836, "learning_rate": 1.9408053416306888e-05, "loss": 0.0572, "step": 25940 }, { "epoch": 0.3982810221778835, "grad_norm": 0.5243436098098755, "learning_rate": 1.9407145128912508e-05, "loss": 0.0442, "step": 25950 }, { "epoch": 0.3984345023405725, "grad_norm": 0.5922545194625854, "learning_rate": 1.9406236166497447e-05, "loss": 0.0419, "step": 25960 }, { "epoch": 0.39858798250326144, "grad_norm": 0.41957977414131165, "learning_rate": 1.9405326529126927e-05, "loss": 0.0507, "step": 25970 }, { "epoch": 0.39874146266595045, "grad_norm": 0.39346563816070557, "learning_rate": 1.9404416216866225e-05, "loss": 0.037, "step": 25980 }, { "epoch": 0.3988949428286394, "grad_norm": 0.5538202524185181, "learning_rate": 1.9403505229780654e-05, "loss": 0.0484, "step": 25990 }, { "epoch": 0.39904842299132837, "grad_norm": 0.612786591053009, "learning_rate": 1.940259356793559e-05, "loss": 0.0532, "step": 26000 }, { "epoch": 0.3992019031540173, "grad_norm": 0.41452622413635254, "learning_rate": 1.940168123139645e-05, "loss": 0.0581, "step": 26010 }, { "epoch": 0.39935538331670634, "grad_norm": 0.5530696511268616, "learning_rate": 1.94007682202287e-05, "loss": 0.0516, "step": 26020 }, { "epoch": 0.3995088634793953, "grad_norm": 0.8137233257293701, "learning_rate": 1.9399854534497845e-05, "loss": 0.0593, "step": 26030 }, { "epoch": 0.39966234364208425, "grad_norm": 0.4992435574531555, "learning_rate": 1.939894017426946e-05, "loss": 0.0566, "step": 26040 }, { "epoch": 0.3998158238047732, "grad_norm": 0.7227239012718201, "learning_rate": 1.9398025139609147e-05, "loss": 0.0501, "step": 26050 }, { "epoch": 0.3999693039674622, "grad_norm": 0.3912697732448578, "learning_rate": 1.9397109430582574e-05, "loss": 0.0372, "step": 26060 }, { "epoch": 0.4001227841301512, "grad_norm": 0.673250138759613, "learning_rate": 1.9396193047255436e-05, "loss": 0.0455, "step": 26070 }, { "epoch": 0.40027626429284013, "grad_norm": 0.5617305636405945, "learning_rate": 1.93952759896935e-05, "loss": 0.0378, "step": 26080 }, { "epoch": 0.40042974445552915, "grad_norm": 0.5251043438911438, "learning_rate": 1.939435825796257e-05, "loss": 0.0421, "step": 26090 }, { "epoch": 0.4005832246182181, "grad_norm": 0.8218551874160767, "learning_rate": 1.9393439852128494e-05, "loss": 0.0501, "step": 26100 }, { "epoch": 0.40073670478090706, "grad_norm": 0.4340316355228424, "learning_rate": 1.9392520772257176e-05, "loss": 0.0484, "step": 26110 }, { "epoch": 0.400890184943596, "grad_norm": 0.835940420627594, "learning_rate": 1.9391601018414566e-05, "loss": 0.0638, "step": 26120 }, { "epoch": 0.40104366510628503, "grad_norm": 0.4138988256454468, "learning_rate": 1.9390680590666658e-05, "loss": 0.0484, "step": 26130 }, { "epoch": 0.401197145268974, "grad_norm": 0.9434389472007751, "learning_rate": 1.9389759489079507e-05, "loss": 0.0547, "step": 26140 }, { "epoch": 0.40135062543166294, "grad_norm": 0.5832945704460144, "learning_rate": 1.93888377137192e-05, "loss": 0.0548, "step": 26150 }, { "epoch": 0.40150410559435196, "grad_norm": 0.440277099609375, "learning_rate": 1.938791526465188e-05, "loss": 0.0426, "step": 26160 }, { "epoch": 0.4016575857570409, "grad_norm": 0.5691930055618286, "learning_rate": 1.9386992141943742e-05, "loss": 0.0538, "step": 26170 }, { "epoch": 0.40181106591972987, "grad_norm": 0.4848288595676422, "learning_rate": 1.9386068345661023e-05, "loss": 0.0455, "step": 26180 }, { "epoch": 0.4019645460824188, "grad_norm": 0.4983239471912384, "learning_rate": 1.9385143875870017e-05, "loss": 0.0525, "step": 26190 }, { "epoch": 0.40211802624510784, "grad_norm": 0.5261874794960022, "learning_rate": 1.9384218732637054e-05, "loss": 0.0412, "step": 26200 }, { "epoch": 0.4022715064077968, "grad_norm": 0.5045468807220459, "learning_rate": 1.9383292916028517e-05, "loss": 0.05, "step": 26210 }, { "epoch": 0.40242498657048575, "grad_norm": 0.4141380786895752, "learning_rate": 1.9382366426110847e-05, "loss": 0.044, "step": 26220 }, { "epoch": 0.4025784667331747, "grad_norm": 0.8047770261764526, "learning_rate": 1.938143926295052e-05, "loss": 0.0513, "step": 26230 }, { "epoch": 0.4027319468958637, "grad_norm": 0.5123628973960876, "learning_rate": 1.938051142661407e-05, "loss": 0.0489, "step": 26240 }, { "epoch": 0.4028854270585527, "grad_norm": 0.6470583081245422, "learning_rate": 1.937958291716807e-05, "loss": 0.0554, "step": 26250 }, { "epoch": 0.40303890722124164, "grad_norm": 0.4443073868751526, "learning_rate": 1.9378653734679146e-05, "loss": 0.0523, "step": 26260 }, { "epoch": 0.40319238738393065, "grad_norm": 0.5359684228897095, "learning_rate": 1.937772387921397e-05, "loss": 0.0558, "step": 26270 }, { "epoch": 0.4033458675466196, "grad_norm": 0.6556307673454285, "learning_rate": 1.937679335083928e-05, "loss": 0.0491, "step": 26280 }, { "epoch": 0.40349934770930856, "grad_norm": 0.5096419453620911, "learning_rate": 1.937586214962183e-05, "loss": 0.0391, "step": 26290 }, { "epoch": 0.4036528278719975, "grad_norm": 0.4985276162624359, "learning_rate": 1.9374930275628446e-05, "loss": 0.0528, "step": 26300 }, { "epoch": 0.40380630803468653, "grad_norm": 0.5960076451301575, "learning_rate": 1.9373997728926e-05, "loss": 0.0507, "step": 26310 }, { "epoch": 0.4039597881973755, "grad_norm": 0.5539548993110657, "learning_rate": 1.93730645095814e-05, "loss": 0.0426, "step": 26320 }, { "epoch": 0.40411326836006445, "grad_norm": 0.3571273684501648, "learning_rate": 1.9372130617661615e-05, "loss": 0.0433, "step": 26330 }, { "epoch": 0.40426674852275346, "grad_norm": 0.32907453179359436, "learning_rate": 1.937119605323366e-05, "loss": 0.049, "step": 26340 }, { "epoch": 0.4044202286854424, "grad_norm": 0.726996898651123, "learning_rate": 1.937026081636459e-05, "loss": 0.052, "step": 26350 }, { "epoch": 0.4045737088481314, "grad_norm": 0.6294204592704773, "learning_rate": 1.9369324907121512e-05, "loss": 0.0527, "step": 26360 }, { "epoch": 0.40472718901082033, "grad_norm": 0.46463000774383545, "learning_rate": 1.9368388325571594e-05, "loss": 0.0614, "step": 26370 }, { "epoch": 0.40488066917350934, "grad_norm": 0.4138377606868744, "learning_rate": 1.936745107178203e-05, "loss": 0.0469, "step": 26380 }, { "epoch": 0.4050341493361983, "grad_norm": 0.6215517520904541, "learning_rate": 1.936651314582008e-05, "loss": 0.048, "step": 26390 }, { "epoch": 0.40518762949888726, "grad_norm": 0.5855164527893066, "learning_rate": 1.9365574547753048e-05, "loss": 0.0456, "step": 26400 }, { "epoch": 0.4053411096615762, "grad_norm": 0.3902471959590912, "learning_rate": 1.936463527764828e-05, "loss": 0.0476, "step": 26410 }, { "epoch": 0.40549458982426523, "grad_norm": 0.24595481157302856, "learning_rate": 1.936369533557317e-05, "loss": 0.0407, "step": 26420 }, { "epoch": 0.4056480699869542, "grad_norm": 0.5991965532302856, "learning_rate": 1.9362754721595175e-05, "loss": 0.0475, "step": 26430 }, { "epoch": 0.40580155014964314, "grad_norm": 0.5250510573387146, "learning_rate": 1.9361813435781786e-05, "loss": 0.048, "step": 26440 }, { "epoch": 0.40595503031233215, "grad_norm": 0.36763298511505127, "learning_rate": 1.936087147820054e-05, "loss": 0.0479, "step": 26450 }, { "epoch": 0.4061085104750211, "grad_norm": 0.5006511211395264, "learning_rate": 1.935992884891904e-05, "loss": 0.0574, "step": 26460 }, { "epoch": 0.40626199063771007, "grad_norm": 0.6555877327919006, "learning_rate": 1.9358985548004914e-05, "loss": 0.0487, "step": 26470 }, { "epoch": 0.406415470800399, "grad_norm": 0.3219817876815796, "learning_rate": 1.9358041575525856e-05, "loss": 0.0522, "step": 26480 }, { "epoch": 0.40656895096308804, "grad_norm": 0.5061848163604736, "learning_rate": 1.93570969315496e-05, "loss": 0.0504, "step": 26490 }, { "epoch": 0.406722431125777, "grad_norm": 0.5058839321136475, "learning_rate": 1.935615161614393e-05, "loss": 0.0521, "step": 26500 }, { "epoch": 0.40687591128846595, "grad_norm": 0.339141309261322, "learning_rate": 1.9355205629376675e-05, "loss": 0.0528, "step": 26510 }, { "epoch": 0.40702939145115496, "grad_norm": 0.6298207640647888, "learning_rate": 1.9354258971315724e-05, "loss": 0.0472, "step": 26520 }, { "epoch": 0.4071828716138439, "grad_norm": 0.643375813961029, "learning_rate": 1.9353311642029e-05, "loss": 0.046, "step": 26530 }, { "epoch": 0.4073363517765329, "grad_norm": 0.5161901712417603, "learning_rate": 1.9352363641584476e-05, "loss": 0.049, "step": 26540 }, { "epoch": 0.40748983193922184, "grad_norm": 0.527613639831543, "learning_rate": 1.9351414970050186e-05, "loss": 0.0518, "step": 26550 }, { "epoch": 0.40764331210191085, "grad_norm": 0.481237530708313, "learning_rate": 1.9350465627494196e-05, "loss": 0.0524, "step": 26560 }, { "epoch": 0.4077967922645998, "grad_norm": 0.4577370882034302, "learning_rate": 1.9349515613984628e-05, "loss": 0.0417, "step": 26570 }, { "epoch": 0.40795027242728876, "grad_norm": 0.5879689455032349, "learning_rate": 1.934856492958965e-05, "loss": 0.0393, "step": 26580 }, { "epoch": 0.4081037525899777, "grad_norm": 0.6516128182411194, "learning_rate": 1.934761357437749e-05, "loss": 0.0565, "step": 26590 }, { "epoch": 0.40825723275266673, "grad_norm": 0.4862172603607178, "learning_rate": 1.93466615484164e-05, "loss": 0.0407, "step": 26600 }, { "epoch": 0.4084107129153557, "grad_norm": 0.5035387873649597, "learning_rate": 1.93457088517747e-05, "loss": 0.0532, "step": 26610 }, { "epoch": 0.40856419307804465, "grad_norm": 0.36948636174201965, "learning_rate": 1.934475548452075e-05, "loss": 0.0455, "step": 26620 }, { "epoch": 0.40871767324073366, "grad_norm": 0.5009413957595825, "learning_rate": 1.9343801446722966e-05, "loss": 0.0429, "step": 26630 }, { "epoch": 0.4088711534034226, "grad_norm": 0.3889690041542053, "learning_rate": 1.9342846738449798e-05, "loss": 0.0476, "step": 26640 }, { "epoch": 0.4090246335661116, "grad_norm": 0.38345304131507874, "learning_rate": 1.9341891359769754e-05, "loss": 0.049, "step": 26650 }, { "epoch": 0.40917811372880053, "grad_norm": 0.544123113155365, "learning_rate": 1.934093531075139e-05, "loss": 0.0424, "step": 26660 }, { "epoch": 0.40933159389148954, "grad_norm": 0.45691099762916565, "learning_rate": 1.933997859146331e-05, "loss": 0.0444, "step": 26670 }, { "epoch": 0.4094850740541785, "grad_norm": 0.4373791515827179, "learning_rate": 1.9339021201974166e-05, "loss": 0.0476, "step": 26680 }, { "epoch": 0.40963855421686746, "grad_norm": 0.4586421847343445, "learning_rate": 1.9338063142352644e-05, "loss": 0.0465, "step": 26690 }, { "epoch": 0.40979203437955647, "grad_norm": 0.46014031767845154, "learning_rate": 1.933710441266751e-05, "loss": 0.0434, "step": 26700 }, { "epoch": 0.4099455145422454, "grad_norm": 0.4456298351287842, "learning_rate": 1.9336145012987542e-05, "loss": 0.0549, "step": 26710 }, { "epoch": 0.4100989947049344, "grad_norm": 0.5678703188896179, "learning_rate": 1.9335184943381594e-05, "loss": 0.0535, "step": 26720 }, { "epoch": 0.41025247486762334, "grad_norm": 0.6022910475730896, "learning_rate": 1.9334224203918548e-05, "loss": 0.0547, "step": 26730 }, { "epoch": 0.41040595503031235, "grad_norm": 0.6937448382377625, "learning_rate": 1.933326279466735e-05, "loss": 0.0553, "step": 26740 }, { "epoch": 0.4105594351930013, "grad_norm": 0.4231529235839844, "learning_rate": 1.9332300715696983e-05, "loss": 0.0442, "step": 26750 }, { "epoch": 0.41071291535569027, "grad_norm": 0.43823790550231934, "learning_rate": 1.9331337967076486e-05, "loss": 0.0435, "step": 26760 }, { "epoch": 0.4108663955183792, "grad_norm": 0.6613631844520569, "learning_rate": 1.9330374548874937e-05, "loss": 0.0473, "step": 26770 }, { "epoch": 0.41101987568106824, "grad_norm": 0.5487679839134216, "learning_rate": 1.9329410461161476e-05, "loss": 0.0485, "step": 26780 }, { "epoch": 0.4111733558437572, "grad_norm": 0.4990983009338379, "learning_rate": 1.932844570400527e-05, "loss": 0.0399, "step": 26790 }, { "epoch": 0.41132683600644615, "grad_norm": 0.5497710108757019, "learning_rate": 1.9327480277475555e-05, "loss": 0.053, "step": 26800 }, { "epoch": 0.41148031616913516, "grad_norm": 0.5163520574569702, "learning_rate": 1.93265141816416e-05, "loss": 0.0464, "step": 26810 }, { "epoch": 0.4116337963318241, "grad_norm": 0.562279462814331, "learning_rate": 1.932554741657274e-05, "loss": 0.0518, "step": 26820 }, { "epoch": 0.4117872764945131, "grad_norm": 0.4968518316745758, "learning_rate": 1.9324579982338333e-05, "loss": 0.0351, "step": 26830 }, { "epoch": 0.41194075665720203, "grad_norm": 0.4550221562385559, "learning_rate": 1.9323611879007803e-05, "loss": 0.0507, "step": 26840 }, { "epoch": 0.41209423681989105, "grad_norm": 0.47908610105514526, "learning_rate": 1.932264310665062e-05, "loss": 0.0509, "step": 26850 }, { "epoch": 0.41224771698258, "grad_norm": 0.4658854901790619, "learning_rate": 1.93216736653363e-05, "loss": 0.0465, "step": 26860 }, { "epoch": 0.41240119714526896, "grad_norm": 0.4567418098449707, "learning_rate": 1.93207035551344e-05, "loss": 0.0426, "step": 26870 }, { "epoch": 0.412554677307958, "grad_norm": 0.3630359470844269, "learning_rate": 1.931973277611454e-05, "loss": 0.0514, "step": 26880 }, { "epoch": 0.41270815747064693, "grad_norm": 0.6758246421813965, "learning_rate": 1.931876132834637e-05, "loss": 0.044, "step": 26890 }, { "epoch": 0.4128616376333359, "grad_norm": 0.5181974768638611, "learning_rate": 1.9317789211899603e-05, "loss": 0.047, "step": 26900 }, { "epoch": 0.41301511779602484, "grad_norm": 0.4854281544685364, "learning_rate": 1.9316816426843997e-05, "loss": 0.045, "step": 26910 }, { "epoch": 0.41316859795871386, "grad_norm": 0.6325050592422485, "learning_rate": 1.931584297324935e-05, "loss": 0.0471, "step": 26920 }, { "epoch": 0.4133220781214028, "grad_norm": 0.539024829864502, "learning_rate": 1.9314868851185516e-05, "loss": 0.0492, "step": 26930 }, { "epoch": 0.41347555828409177, "grad_norm": 0.4713425636291504, "learning_rate": 1.931389406072239e-05, "loss": 0.0462, "step": 26940 }, { "epoch": 0.41362903844678073, "grad_norm": 0.525455117225647, "learning_rate": 1.9312918601929926e-05, "loss": 0.0503, "step": 26950 }, { "epoch": 0.41378251860946974, "grad_norm": 0.45062586665153503, "learning_rate": 1.9311942474878114e-05, "loss": 0.0479, "step": 26960 }, { "epoch": 0.4139359987721587, "grad_norm": 0.9524551033973694, "learning_rate": 1.9310965679637e-05, "loss": 0.0507, "step": 26970 }, { "epoch": 0.41408947893484765, "grad_norm": 0.442085325717926, "learning_rate": 1.9309988216276677e-05, "loss": 0.0543, "step": 26980 }, { "epoch": 0.41424295909753667, "grad_norm": 0.6132255792617798, "learning_rate": 1.9309010084867275e-05, "loss": 0.0423, "step": 26990 }, { "epoch": 0.4143964392602256, "grad_norm": 0.40836241841316223, "learning_rate": 1.930803128547899e-05, "loss": 0.047, "step": 27000 }, { "epoch": 0.4145499194229146, "grad_norm": 0.7818101644515991, "learning_rate": 1.9307051818182056e-05, "loss": 0.0501, "step": 27010 }, { "epoch": 0.41470339958560354, "grad_norm": 0.3003341257572174, "learning_rate": 1.930607168304675e-05, "loss": 0.0406, "step": 27020 }, { "epoch": 0.41485687974829255, "grad_norm": 0.37745869159698486, "learning_rate": 1.9305090880143413e-05, "loss": 0.0531, "step": 27030 }, { "epoch": 0.4150103599109815, "grad_norm": 0.582542896270752, "learning_rate": 1.930410940954241e-05, "loss": 0.0535, "step": 27040 }, { "epoch": 0.41516384007367046, "grad_norm": 0.4457186460494995, "learning_rate": 1.9303127271314176e-05, "loss": 0.0459, "step": 27050 }, { "epoch": 0.4153173202363595, "grad_norm": 0.5564910769462585, "learning_rate": 1.9302144465529186e-05, "loss": 0.0504, "step": 27060 }, { "epoch": 0.41547080039904843, "grad_norm": 0.5561356544494629, "learning_rate": 1.930116099225796e-05, "loss": 0.0486, "step": 27070 }, { "epoch": 0.4156242805617374, "grad_norm": 0.3491499423980713, "learning_rate": 1.930017685157107e-05, "loss": 0.0562, "step": 27080 }, { "epoch": 0.41577776072442635, "grad_norm": 0.6426780819892883, "learning_rate": 1.929919204353913e-05, "loss": 0.0598, "step": 27090 }, { "epoch": 0.41593124088711536, "grad_norm": 0.500889003276825, "learning_rate": 1.9298206568232812e-05, "loss": 0.0579, "step": 27100 }, { "epoch": 0.4160847210498043, "grad_norm": 0.409443199634552, "learning_rate": 1.9297220425722825e-05, "loss": 0.0434, "step": 27110 }, { "epoch": 0.4162382012124933, "grad_norm": 0.5415923595428467, "learning_rate": 1.9296233616079934e-05, "loss": 0.0403, "step": 27120 }, { "epoch": 0.41639168137518223, "grad_norm": 0.35864511132240295, "learning_rate": 1.929524613937495e-05, "loss": 0.0418, "step": 27130 }, { "epoch": 0.41654516153787124, "grad_norm": 0.5606052875518799, "learning_rate": 1.9294257995678724e-05, "loss": 0.0457, "step": 27140 }, { "epoch": 0.4166986417005602, "grad_norm": 0.37427327036857605, "learning_rate": 1.9293269185062166e-05, "loss": 0.0485, "step": 27150 }, { "epoch": 0.41685212186324916, "grad_norm": 0.4998699128627777, "learning_rate": 1.929227970759623e-05, "loss": 0.0513, "step": 27160 }, { "epoch": 0.41700560202593817, "grad_norm": 0.4688933491706848, "learning_rate": 1.9291289563351916e-05, "loss": 0.0379, "step": 27170 }, { "epoch": 0.41715908218862713, "grad_norm": 0.560530960559845, "learning_rate": 1.929029875240027e-05, "loss": 0.0531, "step": 27180 }, { "epoch": 0.4173125623513161, "grad_norm": 0.7750366926193237, "learning_rate": 1.9289307274812397e-05, "loss": 0.0636, "step": 27190 }, { "epoch": 0.41746604251400504, "grad_norm": 0.5075268745422363, "learning_rate": 1.928831513065943e-05, "loss": 0.0459, "step": 27200 }, { "epoch": 0.41761952267669405, "grad_norm": 0.5582090020179749, "learning_rate": 1.9287322320012572e-05, "loss": 0.0486, "step": 27210 }, { "epoch": 0.417773002839383, "grad_norm": 0.5126310586929321, "learning_rate": 1.9286328842943057e-05, "loss": 0.0505, "step": 27220 }, { "epoch": 0.41792648300207197, "grad_norm": 0.5694646239280701, "learning_rate": 1.9285334699522176e-05, "loss": 0.0545, "step": 27230 }, { "epoch": 0.418079963164761, "grad_norm": 0.4846322536468506, "learning_rate": 1.928433988982126e-05, "loss": 0.0405, "step": 27240 }, { "epoch": 0.41823344332744994, "grad_norm": 0.4640311300754547, "learning_rate": 1.9283344413911705e-05, "loss": 0.0466, "step": 27250 }, { "epoch": 0.4183869234901389, "grad_norm": 0.2499108612537384, "learning_rate": 1.9282348271864926e-05, "loss": 0.0335, "step": 27260 }, { "epoch": 0.41854040365282785, "grad_norm": 0.6697507500648499, "learning_rate": 1.9281351463752416e-05, "loss": 0.0486, "step": 27270 }, { "epoch": 0.41869388381551687, "grad_norm": 0.6529773473739624, "learning_rate": 1.9280353989645693e-05, "loss": 0.0624, "step": 27280 }, { "epoch": 0.4188473639782058, "grad_norm": 0.42819613218307495, "learning_rate": 1.9279355849616337e-05, "loss": 0.0424, "step": 27290 }, { "epoch": 0.4190008441408948, "grad_norm": 0.42156514525413513, "learning_rate": 1.9278357043735968e-05, "loss": 0.0505, "step": 27300 }, { "epoch": 0.41915432430358374, "grad_norm": 0.5823037624359131, "learning_rate": 1.9277357572076263e-05, "loss": 0.0577, "step": 27310 }, { "epoch": 0.41930780446627275, "grad_norm": 0.6169046759605408, "learning_rate": 1.927635743470893e-05, "loss": 0.0467, "step": 27320 }, { "epoch": 0.4194612846289617, "grad_norm": 0.8857683539390564, "learning_rate": 1.9275356631705742e-05, "loss": 0.0381, "step": 27330 }, { "epoch": 0.41961476479165066, "grad_norm": 0.4444213807582855, "learning_rate": 1.9274355163138508e-05, "loss": 0.0459, "step": 27340 }, { "epoch": 0.4197682449543397, "grad_norm": 0.4942897856235504, "learning_rate": 1.9273353029079097e-05, "loss": 0.0582, "step": 27350 }, { "epoch": 0.41992172511702863, "grad_norm": 0.5041247606277466, "learning_rate": 1.9272350229599412e-05, "loss": 0.0477, "step": 27360 }, { "epoch": 0.4200752052797176, "grad_norm": 0.6046010851860046, "learning_rate": 1.927134676477141e-05, "loss": 0.0469, "step": 27370 }, { "epoch": 0.42022868544240655, "grad_norm": 0.44783294200897217, "learning_rate": 1.92703426346671e-05, "loss": 0.0518, "step": 27380 }, { "epoch": 0.42038216560509556, "grad_norm": 0.3868907690048218, "learning_rate": 1.926933783935853e-05, "loss": 0.0408, "step": 27390 }, { "epoch": 0.4205356457677845, "grad_norm": 0.3915683925151825, "learning_rate": 1.9268332378917804e-05, "loss": 0.051, "step": 27400 }, { "epoch": 0.4206891259304735, "grad_norm": 0.4074188768863678, "learning_rate": 1.9267326253417068e-05, "loss": 0.0474, "step": 27410 }, { "epoch": 0.4208426060931625, "grad_norm": 0.43133455514907837, "learning_rate": 1.9266319462928518e-05, "loss": 0.0498, "step": 27420 }, { "epoch": 0.42099608625585144, "grad_norm": 0.5495785474777222, "learning_rate": 1.9265312007524396e-05, "loss": 0.0556, "step": 27430 }, { "epoch": 0.4211495664185404, "grad_norm": 0.3153461813926697, "learning_rate": 1.9264303887276996e-05, "loss": 0.0431, "step": 27440 }, { "epoch": 0.42130304658122936, "grad_norm": 0.5447821021080017, "learning_rate": 1.9263295102258654e-05, "loss": 0.0386, "step": 27450 }, { "epoch": 0.42145652674391837, "grad_norm": 0.45423105359077454, "learning_rate": 1.926228565254176e-05, "loss": 0.0477, "step": 27460 }, { "epoch": 0.4216100069066073, "grad_norm": 0.40073853731155396, "learning_rate": 1.9261275538198748e-05, "loss": 0.0596, "step": 27470 }, { "epoch": 0.4217634870692963, "grad_norm": 0.7647970914840698, "learning_rate": 1.9260264759302093e-05, "loss": 0.0481, "step": 27480 }, { "epoch": 0.42191696723198524, "grad_norm": 0.46180498600006104, "learning_rate": 1.925925331592433e-05, "loss": 0.046, "step": 27490 }, { "epoch": 0.42207044739467425, "grad_norm": 0.32108721137046814, "learning_rate": 1.925824120813804e-05, "loss": 0.0482, "step": 27500 }, { "epoch": 0.4222239275573632, "grad_norm": 0.35719114542007446, "learning_rate": 1.9257228436015843e-05, "loss": 0.0431, "step": 27510 }, { "epoch": 0.42237740772005217, "grad_norm": 0.5167911052703857, "learning_rate": 1.9256214999630414e-05, "loss": 0.0433, "step": 27520 }, { "epoch": 0.4225308878827412, "grad_norm": 0.5736404657363892, "learning_rate": 1.925520089905447e-05, "loss": 0.0485, "step": 27530 }, { "epoch": 0.42268436804543014, "grad_norm": 0.6614263653755188, "learning_rate": 1.9254186134360782e-05, "loss": 0.0466, "step": 27540 }, { "epoch": 0.4228378482081191, "grad_norm": 0.5237690210342407, "learning_rate": 1.925317070562216e-05, "loss": 0.0456, "step": 27550 }, { "epoch": 0.42299132837080805, "grad_norm": 0.4869053363800049, "learning_rate": 1.9252154612911478e-05, "loss": 0.0575, "step": 27560 }, { "epoch": 0.42314480853349706, "grad_norm": 0.48206964135169983, "learning_rate": 1.925113785630164e-05, "loss": 0.0499, "step": 27570 }, { "epoch": 0.423298288696186, "grad_norm": 0.8066364526748657, "learning_rate": 1.9250120435865603e-05, "loss": 0.0436, "step": 27580 }, { "epoch": 0.423451768858875, "grad_norm": 0.5202724933624268, "learning_rate": 1.9249102351676378e-05, "loss": 0.0382, "step": 27590 }, { "epoch": 0.423605249021564, "grad_norm": 0.25434306263923645, "learning_rate": 1.9248083603807013e-05, "loss": 0.0452, "step": 27600 }, { "epoch": 0.42375872918425295, "grad_norm": 0.39071962237358093, "learning_rate": 1.9247064192330617e-05, "loss": 0.0378, "step": 27610 }, { "epoch": 0.4239122093469419, "grad_norm": 0.5585325360298157, "learning_rate": 1.924604411732033e-05, "loss": 0.0434, "step": 27620 }, { "epoch": 0.42406568950963086, "grad_norm": 0.30331951379776, "learning_rate": 1.9245023378849356e-05, "loss": 0.0526, "step": 27630 }, { "epoch": 0.4242191696723199, "grad_norm": 0.4428534209728241, "learning_rate": 1.924400197699094e-05, "loss": 0.0425, "step": 27640 }, { "epoch": 0.42437264983500883, "grad_norm": 0.5119346380233765, "learning_rate": 1.9242979911818365e-05, "loss": 0.0413, "step": 27650 }, { "epoch": 0.4245261299976978, "grad_norm": 0.46153274178504944, "learning_rate": 1.924195718340498e-05, "loss": 0.0424, "step": 27660 }, { "epoch": 0.42467961016038674, "grad_norm": 0.5041608810424805, "learning_rate": 1.9240933791824167e-05, "loss": 0.0487, "step": 27670 }, { "epoch": 0.42483309032307576, "grad_norm": 0.5952807068824768, "learning_rate": 1.9239909737149362e-05, "loss": 0.0549, "step": 27680 }, { "epoch": 0.4249865704857647, "grad_norm": 0.35189783573150635, "learning_rate": 1.923888501945405e-05, "loss": 0.0457, "step": 27690 }, { "epoch": 0.42514005064845367, "grad_norm": 0.3911120593547821, "learning_rate": 1.9237859638811753e-05, "loss": 0.0579, "step": 27700 }, { "epoch": 0.4252935308111427, "grad_norm": 0.5100461840629578, "learning_rate": 1.9236833595296055e-05, "loss": 0.0556, "step": 27710 }, { "epoch": 0.42544701097383164, "grad_norm": 0.5456752777099609, "learning_rate": 1.9235806888980583e-05, "loss": 0.0652, "step": 27720 }, { "epoch": 0.4256004911365206, "grad_norm": 0.6066398024559021, "learning_rate": 1.9234779519939e-05, "loss": 0.0575, "step": 27730 }, { "epoch": 0.42575397129920955, "grad_norm": 0.44334787130355835, "learning_rate": 1.9233751488245035e-05, "loss": 0.0399, "step": 27740 }, { "epoch": 0.42590745146189857, "grad_norm": 0.5353498458862305, "learning_rate": 1.9232722793972455e-05, "loss": 0.055, "step": 27750 }, { "epoch": 0.4260609316245875, "grad_norm": 0.4764995574951172, "learning_rate": 1.9231693437195068e-05, "loss": 0.0542, "step": 27760 }, { "epoch": 0.4262144117872765, "grad_norm": 0.5469278693199158, "learning_rate": 1.9230663417986742e-05, "loss": 0.0497, "step": 27770 }, { "epoch": 0.4263678919499655, "grad_norm": 0.4181423783302307, "learning_rate": 1.9229632736421392e-05, "loss": 0.0595, "step": 27780 }, { "epoch": 0.42652137211265445, "grad_norm": 0.422627717256546, "learning_rate": 1.9228601392572964e-05, "loss": 0.0397, "step": 27790 }, { "epoch": 0.4266748522753434, "grad_norm": 0.5977815985679626, "learning_rate": 1.9227569386515473e-05, "loss": 0.0518, "step": 27800 }, { "epoch": 0.42682833243803237, "grad_norm": 0.8387439250946045, "learning_rate": 1.9226536718322965e-05, "loss": 0.0502, "step": 27810 }, { "epoch": 0.4269818126007214, "grad_norm": 0.3633549213409424, "learning_rate": 1.922550338806955e-05, "loss": 0.0494, "step": 27820 }, { "epoch": 0.42713529276341033, "grad_norm": 0.47285500168800354, "learning_rate": 1.9224469395829363e-05, "loss": 0.0462, "step": 27830 }, { "epoch": 0.4272887729260993, "grad_norm": 0.5225227475166321, "learning_rate": 1.922343474167661e-05, "loss": 0.0437, "step": 27840 }, { "epoch": 0.42744225308878825, "grad_norm": 0.5126924514770508, "learning_rate": 1.9222399425685527e-05, "loss": 0.0381, "step": 27850 }, { "epoch": 0.42759573325147726, "grad_norm": 0.5341145992279053, "learning_rate": 1.9221363447930407e-05, "loss": 0.0435, "step": 27860 }, { "epoch": 0.4277492134141662, "grad_norm": 0.49457111954689026, "learning_rate": 1.9220326808485593e-05, "loss": 0.0494, "step": 27870 }, { "epoch": 0.4279026935768552, "grad_norm": 0.5238566398620605, "learning_rate": 1.921928950742546e-05, "loss": 0.0511, "step": 27880 }, { "epoch": 0.4280561737395442, "grad_norm": 0.45684999227523804, "learning_rate": 1.9218251544824444e-05, "loss": 0.05, "step": 27890 }, { "epoch": 0.42820965390223314, "grad_norm": 0.3596891760826111, "learning_rate": 1.921721292075703e-05, "loss": 0.0565, "step": 27900 }, { "epoch": 0.4283631340649221, "grad_norm": 0.46910566091537476, "learning_rate": 1.9216173635297743e-05, "loss": 0.0499, "step": 27910 }, { "epoch": 0.42851661422761106, "grad_norm": 0.4352415204048157, "learning_rate": 1.921513368852116e-05, "loss": 0.0428, "step": 27920 }, { "epoch": 0.42867009439030007, "grad_norm": 0.5408806204795837, "learning_rate": 1.92140930805019e-05, "loss": 0.0517, "step": 27930 }, { "epoch": 0.42882357455298903, "grad_norm": 0.42667943239212036, "learning_rate": 1.9213051811314632e-05, "loss": 0.052, "step": 27940 }, { "epoch": 0.428977054715678, "grad_norm": 0.49060818552970886, "learning_rate": 1.9212009881034076e-05, "loss": 0.0457, "step": 27950 }, { "epoch": 0.429130534878367, "grad_norm": 0.5634194016456604, "learning_rate": 1.9210967289735e-05, "loss": 0.0438, "step": 27960 }, { "epoch": 0.42928401504105596, "grad_norm": 0.4058414101600647, "learning_rate": 1.9209924037492212e-05, "loss": 0.0389, "step": 27970 }, { "epoch": 0.4294374952037449, "grad_norm": 0.5212612152099609, "learning_rate": 1.9208880124380575e-05, "loss": 0.0397, "step": 27980 }, { "epoch": 0.42959097536643387, "grad_norm": 0.30108442902565, "learning_rate": 1.9207835550474988e-05, "loss": 0.0344, "step": 27990 }, { "epoch": 0.4297444555291229, "grad_norm": 0.3053460717201233, "learning_rate": 1.9206790315850417e-05, "loss": 0.0518, "step": 28000 }, { "epoch": 0.42989793569181184, "grad_norm": 0.42354658246040344, "learning_rate": 1.920574442058186e-05, "loss": 0.0467, "step": 28010 }, { "epoch": 0.4300514158545008, "grad_norm": 0.6323308348655701, "learning_rate": 1.9204697864744363e-05, "loss": 0.0448, "step": 28020 }, { "epoch": 0.43020489601718975, "grad_norm": 0.45695045590400696, "learning_rate": 1.9203650648413025e-05, "loss": 0.0485, "step": 28030 }, { "epoch": 0.43035837617987877, "grad_norm": 0.36434078216552734, "learning_rate": 1.9202602771662994e-05, "loss": 0.0357, "step": 28040 }, { "epoch": 0.4305118563425677, "grad_norm": 0.43468689918518066, "learning_rate": 1.920155423456945e-05, "loss": 0.0519, "step": 28050 }, { "epoch": 0.4306653365052567, "grad_norm": 0.3890943229198456, "learning_rate": 1.9200505037207647e-05, "loss": 0.046, "step": 28060 }, { "epoch": 0.4308188166679457, "grad_norm": 0.617179811000824, "learning_rate": 1.919945517965286e-05, "loss": 0.0497, "step": 28070 }, { "epoch": 0.43097229683063465, "grad_norm": 0.6199304461479187, "learning_rate": 1.9198404661980433e-05, "loss": 0.0597, "step": 28080 }, { "epoch": 0.4311257769933236, "grad_norm": 0.4380687475204468, "learning_rate": 1.9197353484265736e-05, "loss": 0.0481, "step": 28090 }, { "epoch": 0.43127925715601256, "grad_norm": 0.45790934562683105, "learning_rate": 1.9196301646584207e-05, "loss": 0.0417, "step": 28100 }, { "epoch": 0.4314327373187016, "grad_norm": 0.43675705790519714, "learning_rate": 1.9195249149011316e-05, "loss": 0.0441, "step": 28110 }, { "epoch": 0.43158621748139053, "grad_norm": 0.5673602819442749, "learning_rate": 1.9194195991622582e-05, "loss": 0.0651, "step": 28120 }, { "epoch": 0.4317396976440795, "grad_norm": 0.5943882465362549, "learning_rate": 1.9193142174493587e-05, "loss": 0.0526, "step": 28130 }, { "epoch": 0.4318931778067685, "grad_norm": 0.5144250988960266, "learning_rate": 1.9192087697699944e-05, "loss": 0.0481, "step": 28140 }, { "epoch": 0.43204665796945746, "grad_norm": 0.44296079874038696, "learning_rate": 1.9191032561317317e-05, "loss": 0.0548, "step": 28150 }, { "epoch": 0.4322001381321464, "grad_norm": 0.5356835722923279, "learning_rate": 1.9189976765421416e-05, "loss": 0.0567, "step": 28160 }, { "epoch": 0.4323536182948354, "grad_norm": 0.41525158286094666, "learning_rate": 1.9188920310088005e-05, "loss": 0.0487, "step": 28170 }, { "epoch": 0.4325070984575244, "grad_norm": 0.36057743430137634, "learning_rate": 1.918786319539289e-05, "loss": 0.0381, "step": 28180 }, { "epoch": 0.43266057862021334, "grad_norm": 0.6586805582046509, "learning_rate": 1.918680542141193e-05, "loss": 0.0517, "step": 28190 }, { "epoch": 0.4328140587829023, "grad_norm": 0.5154150128364563, "learning_rate": 1.9185746988221018e-05, "loss": 0.0467, "step": 28200 }, { "epoch": 0.43296753894559126, "grad_norm": 0.39066967368125916, "learning_rate": 1.918468789589611e-05, "loss": 0.0457, "step": 28210 }, { "epoch": 0.43312101910828027, "grad_norm": 0.4139029085636139, "learning_rate": 1.9183628144513196e-05, "loss": 0.0485, "step": 28220 }, { "epoch": 0.4332744992709692, "grad_norm": 0.46393895149230957, "learning_rate": 1.918256773414833e-05, "loss": 0.0525, "step": 28230 }, { "epoch": 0.4334279794336582, "grad_norm": 0.5089852213859558, "learning_rate": 1.918150666487759e-05, "loss": 0.0461, "step": 28240 }, { "epoch": 0.4335814595963472, "grad_norm": 0.5027654767036438, "learning_rate": 1.9180444936777122e-05, "loss": 0.0427, "step": 28250 }, { "epoch": 0.43373493975903615, "grad_norm": 0.9591531753540039, "learning_rate": 1.9179382549923112e-05, "loss": 0.0518, "step": 28260 }, { "epoch": 0.4338884199217251, "grad_norm": 0.45817846059799194, "learning_rate": 1.917831950439179e-05, "loss": 0.0385, "step": 28270 }, { "epoch": 0.43404190008441407, "grad_norm": 0.4764541983604431, "learning_rate": 1.917725580025944e-05, "loss": 0.0496, "step": 28280 }, { "epoch": 0.4341953802471031, "grad_norm": 0.5142108201980591, "learning_rate": 1.9176191437602384e-05, "loss": 0.0531, "step": 28290 }, { "epoch": 0.43434886040979204, "grad_norm": 0.5464670658111572, "learning_rate": 1.9175126416496996e-05, "loss": 0.0392, "step": 28300 }, { "epoch": 0.434502340572481, "grad_norm": 0.5654213428497314, "learning_rate": 1.9174060737019706e-05, "loss": 0.0489, "step": 28310 }, { "epoch": 0.43465582073517, "grad_norm": 0.39050620794296265, "learning_rate": 1.9172994399246975e-05, "loss": 0.0422, "step": 28320 }, { "epoch": 0.43480930089785896, "grad_norm": 0.2624513804912567, "learning_rate": 1.9171927403255326e-05, "loss": 0.0448, "step": 28330 }, { "epoch": 0.4349627810605479, "grad_norm": 0.8435528874397278, "learning_rate": 1.9170859749121316e-05, "loss": 0.0649, "step": 28340 }, { "epoch": 0.4351162612232369, "grad_norm": 0.44186943769454956, "learning_rate": 1.916979143692156e-05, "loss": 0.0459, "step": 28350 }, { "epoch": 0.4352697413859259, "grad_norm": 0.47888147830963135, "learning_rate": 1.9168722466732713e-05, "loss": 0.0479, "step": 28360 }, { "epoch": 0.43542322154861485, "grad_norm": 0.45092251896858215, "learning_rate": 1.9167652838631483e-05, "loss": 0.0441, "step": 28370 }, { "epoch": 0.4355767017113038, "grad_norm": 0.4506010115146637, "learning_rate": 1.916658255269462e-05, "loss": 0.045, "step": 28380 }, { "epoch": 0.43573018187399276, "grad_norm": 0.6463885307312012, "learning_rate": 1.9165511608998927e-05, "loss": 0.0457, "step": 28390 }, { "epoch": 0.4358836620366818, "grad_norm": 0.5238053202629089, "learning_rate": 1.916444000762125e-05, "loss": 0.0491, "step": 28400 }, { "epoch": 0.43603714219937073, "grad_norm": 0.3443332314491272, "learning_rate": 1.9163367748638476e-05, "loss": 0.0406, "step": 28410 }, { "epoch": 0.4361906223620597, "grad_norm": 0.4782334268093109, "learning_rate": 1.9162294832127557e-05, "loss": 0.0567, "step": 28420 }, { "epoch": 0.4363441025247487, "grad_norm": 0.6368808150291443, "learning_rate": 1.9161221258165477e-05, "loss": 0.0495, "step": 28430 }, { "epoch": 0.43649758268743766, "grad_norm": 0.699799120426178, "learning_rate": 1.916014702682927e-05, "loss": 0.0491, "step": 28440 }, { "epoch": 0.4366510628501266, "grad_norm": 0.5153094530105591, "learning_rate": 1.9159072138196015e-05, "loss": 0.0509, "step": 28450 }, { "epoch": 0.43680454301281557, "grad_norm": 0.6275997161865234, "learning_rate": 1.915799659234285e-05, "loss": 0.0603, "step": 28460 }, { "epoch": 0.4369580231755046, "grad_norm": 0.4244128167629242, "learning_rate": 1.9156920389346945e-05, "loss": 0.0488, "step": 28470 }, { "epoch": 0.43711150333819354, "grad_norm": 0.5092693567276001, "learning_rate": 1.9155843529285533e-05, "loss": 0.0515, "step": 28480 }, { "epoch": 0.4372649835008825, "grad_norm": 0.6869122385978699, "learning_rate": 1.915476601223588e-05, "loss": 0.0413, "step": 28490 }, { "epoch": 0.4374184636635715, "grad_norm": 0.3932001292705536, "learning_rate": 1.91536878382753e-05, "loss": 0.0619, "step": 28500 }, { "epoch": 0.43757194382626047, "grad_norm": 0.48910731077194214, "learning_rate": 1.9152609007481166e-05, "loss": 0.0438, "step": 28510 }, { "epoch": 0.4377254239889494, "grad_norm": 0.4092288613319397, "learning_rate": 1.915152951993089e-05, "loss": 0.0445, "step": 28520 }, { "epoch": 0.4378789041516384, "grad_norm": 0.43945637345314026, "learning_rate": 1.9150449375701924e-05, "loss": 0.0438, "step": 28530 }, { "epoch": 0.4380323843143274, "grad_norm": 0.6588780283927917, "learning_rate": 1.9149368574871784e-05, "loss": 0.0608, "step": 28540 }, { "epoch": 0.43818586447701635, "grad_norm": 0.47592049837112427, "learning_rate": 1.914828711751802e-05, "loss": 0.0424, "step": 28550 }, { "epoch": 0.4383393446397053, "grad_norm": 0.5092447996139526, "learning_rate": 1.9147205003718236e-05, "loss": 0.0516, "step": 28560 }, { "epoch": 0.43849282480239427, "grad_norm": 0.5853741765022278, "learning_rate": 1.914612223355008e-05, "loss": 0.046, "step": 28570 }, { "epoch": 0.4386463049650833, "grad_norm": 0.5687437653541565, "learning_rate": 1.9145038807091242e-05, "loss": 0.0512, "step": 28580 }, { "epoch": 0.43879978512777223, "grad_norm": 0.5601189136505127, "learning_rate": 1.9143954724419472e-05, "loss": 0.049, "step": 28590 }, { "epoch": 0.4389532652904612, "grad_norm": 0.5357370376586914, "learning_rate": 1.9142869985612553e-05, "loss": 0.0381, "step": 28600 }, { "epoch": 0.4391067454531502, "grad_norm": 0.5265185236930847, "learning_rate": 1.9141784590748326e-05, "loss": 0.0405, "step": 28610 }, { "epoch": 0.43926022561583916, "grad_norm": 0.43398258090019226, "learning_rate": 1.9140698539904675e-05, "loss": 0.0447, "step": 28620 }, { "epoch": 0.4394137057785281, "grad_norm": 0.5951694250106812, "learning_rate": 1.913961183315953e-05, "loss": 0.0507, "step": 28630 }, { "epoch": 0.4395671859412171, "grad_norm": 0.630787193775177, "learning_rate": 1.9138524470590867e-05, "loss": 0.043, "step": 28640 }, { "epoch": 0.4397206661039061, "grad_norm": 0.3917539417743683, "learning_rate": 1.913743645227671e-05, "loss": 0.0471, "step": 28650 }, { "epoch": 0.43987414626659505, "grad_norm": 0.4557769000530243, "learning_rate": 1.9136347778295136e-05, "loss": 0.0416, "step": 28660 }, { "epoch": 0.440027626429284, "grad_norm": 0.972230076789856, "learning_rate": 1.9135258448724264e-05, "loss": 0.044, "step": 28670 }, { "epoch": 0.440181106591973, "grad_norm": 0.5291154384613037, "learning_rate": 1.9134168463642254e-05, "loss": 0.0489, "step": 28680 }, { "epoch": 0.44033458675466197, "grad_norm": 0.38511955738067627, "learning_rate": 1.913307782312732e-05, "loss": 0.0455, "step": 28690 }, { "epoch": 0.44048806691735093, "grad_norm": 0.6622788310050964, "learning_rate": 1.913198652725773e-05, "loss": 0.0567, "step": 28700 }, { "epoch": 0.4406415470800399, "grad_norm": 0.4259209632873535, "learning_rate": 1.9130894576111784e-05, "loss": 0.0419, "step": 28710 }, { "epoch": 0.4407950272427289, "grad_norm": 0.4475433826446533, "learning_rate": 1.912980196976784e-05, "loss": 0.0597, "step": 28720 }, { "epoch": 0.44094850740541786, "grad_norm": 0.4976860582828522, "learning_rate": 1.9128708708304296e-05, "loss": 0.04, "step": 28730 }, { "epoch": 0.4411019875681068, "grad_norm": 0.3695831000804901, "learning_rate": 1.91276147917996e-05, "loss": 0.0537, "step": 28740 }, { "epoch": 0.44125546773079577, "grad_norm": 0.37412717938423157, "learning_rate": 1.912652022033225e-05, "loss": 0.058, "step": 28750 }, { "epoch": 0.4414089478934848, "grad_norm": 0.4127403497695923, "learning_rate": 1.9125424993980787e-05, "loss": 0.037, "step": 28760 }, { "epoch": 0.44156242805617374, "grad_norm": 0.5159814953804016, "learning_rate": 1.9124329112823804e-05, "loss": 0.0454, "step": 28770 }, { "epoch": 0.4417159082188627, "grad_norm": 0.23189321160316467, "learning_rate": 1.9123232576939932e-05, "loss": 0.0474, "step": 28780 }, { "epoch": 0.4418693883815517, "grad_norm": 0.5648969411849976, "learning_rate": 1.9122135386407855e-05, "loss": 0.0468, "step": 28790 }, { "epoch": 0.44202286854424067, "grad_norm": 0.6027526259422302, "learning_rate": 1.9121037541306305e-05, "loss": 0.0458, "step": 28800 }, { "epoch": 0.4421763487069296, "grad_norm": 0.587631344795227, "learning_rate": 1.9119939041714058e-05, "loss": 0.0498, "step": 28810 }, { "epoch": 0.4423298288696186, "grad_norm": 0.531453549861908, "learning_rate": 1.9118839887709937e-05, "loss": 0.0375, "step": 28820 }, { "epoch": 0.4424833090323076, "grad_norm": 0.4141082465648651, "learning_rate": 1.9117740079372816e-05, "loss": 0.0487, "step": 28830 }, { "epoch": 0.44263678919499655, "grad_norm": 0.4022989273071289, "learning_rate": 1.9116639616781612e-05, "loss": 0.0401, "step": 28840 }, { "epoch": 0.4427902693576855, "grad_norm": 0.3609255850315094, "learning_rate": 1.911553850001529e-05, "loss": 0.0531, "step": 28850 }, { "epoch": 0.4429437495203745, "grad_norm": 0.5132826566696167, "learning_rate": 1.9114436729152862e-05, "loss": 0.0497, "step": 28860 }, { "epoch": 0.4430972296830635, "grad_norm": 0.512032687664032, "learning_rate": 1.9113334304273383e-05, "loss": 0.049, "step": 28870 }, { "epoch": 0.44325070984575243, "grad_norm": 0.40804266929626465, "learning_rate": 1.9112231225455967e-05, "loss": 0.0445, "step": 28880 }, { "epoch": 0.4434041900084414, "grad_norm": 0.5794721245765686, "learning_rate": 1.9111127492779758e-05, "loss": 0.0495, "step": 28890 }, { "epoch": 0.4435576701711304, "grad_norm": 0.4840678572654724, "learning_rate": 1.911002310632396e-05, "loss": 0.0555, "step": 28900 }, { "epoch": 0.44371115033381936, "grad_norm": 0.45948198437690735, "learning_rate": 1.910891806616782e-05, "loss": 0.0614, "step": 28910 }, { "epoch": 0.4438646304965083, "grad_norm": 0.3750803470611572, "learning_rate": 1.910781237239063e-05, "loss": 0.0394, "step": 28920 }, { "epoch": 0.4440181106591973, "grad_norm": 0.5593377351760864, "learning_rate": 1.910670602507173e-05, "loss": 0.038, "step": 28930 }, { "epoch": 0.4441715908218863, "grad_norm": 0.7201852798461914, "learning_rate": 1.910559902429051e-05, "loss": 0.0545, "step": 28940 }, { "epoch": 0.44432507098457524, "grad_norm": 0.386459618806839, "learning_rate": 1.91044913701264e-05, "loss": 0.0486, "step": 28950 }, { "epoch": 0.4444785511472642, "grad_norm": 0.5907604694366455, "learning_rate": 1.9103383062658884e-05, "loss": 0.0419, "step": 28960 }, { "epoch": 0.4446320313099532, "grad_norm": 0.6277404427528381, "learning_rate": 1.910227410196749e-05, "loss": 0.0463, "step": 28970 }, { "epoch": 0.44478551147264217, "grad_norm": 0.608779788017273, "learning_rate": 1.910116448813179e-05, "loss": 0.0477, "step": 28980 }, { "epoch": 0.4449389916353311, "grad_norm": 0.5512873530387878, "learning_rate": 1.910005422123141e-05, "loss": 0.0542, "step": 28990 }, { "epoch": 0.4450924717980201, "grad_norm": 0.7278991341590881, "learning_rate": 1.9098943301346013e-05, "loss": 0.0447, "step": 29000 }, { "epoch": 0.4452459519607091, "grad_norm": 0.4819819927215576, "learning_rate": 1.909783172855532e-05, "loss": 0.0438, "step": 29010 }, { "epoch": 0.44539943212339805, "grad_norm": 0.6974143981933594, "learning_rate": 1.909671950293909e-05, "loss": 0.0423, "step": 29020 }, { "epoch": 0.445552912286087, "grad_norm": 0.4513016939163208, "learning_rate": 1.9095606624577132e-05, "loss": 0.0577, "step": 29030 }, { "epoch": 0.445706392448776, "grad_norm": 0.613933265209198, "learning_rate": 1.9094493093549303e-05, "loss": 0.0554, "step": 29040 }, { "epoch": 0.445859872611465, "grad_norm": 0.46682068705558777, "learning_rate": 1.9093378909935503e-05, "loss": 0.0575, "step": 29050 }, { "epoch": 0.44601335277415394, "grad_norm": 0.48264652490615845, "learning_rate": 1.9092264073815684e-05, "loss": 0.0517, "step": 29060 }, { "epoch": 0.4461668329368429, "grad_norm": 0.405179500579834, "learning_rate": 1.9091148585269845e-05, "loss": 0.0452, "step": 29070 }, { "epoch": 0.4463203130995319, "grad_norm": 0.5112320780754089, "learning_rate": 1.9090032444378024e-05, "loss": 0.055, "step": 29080 }, { "epoch": 0.44647379326222086, "grad_norm": 0.620301365852356, "learning_rate": 1.9088915651220316e-05, "loss": 0.0399, "step": 29090 }, { "epoch": 0.4466272734249098, "grad_norm": 0.430224746465683, "learning_rate": 1.908779820587685e-05, "loss": 0.0419, "step": 29100 }, { "epoch": 0.4467807535875988, "grad_norm": 0.5766242146492004, "learning_rate": 1.908668010842782e-05, "loss": 0.045, "step": 29110 }, { "epoch": 0.4469342337502878, "grad_norm": 0.3102065920829773, "learning_rate": 1.908556135895345e-05, "loss": 0.0494, "step": 29120 }, { "epoch": 0.44708771391297675, "grad_norm": 0.3701312839984894, "learning_rate": 1.9084441957534018e-05, "loss": 0.0445, "step": 29130 }, { "epoch": 0.4472411940756657, "grad_norm": 0.7113192081451416, "learning_rate": 1.9083321904249845e-05, "loss": 0.0438, "step": 29140 }, { "epoch": 0.4473946742383547, "grad_norm": 0.5252092480659485, "learning_rate": 1.9082201199181306e-05, "loss": 0.047, "step": 29150 }, { "epoch": 0.4475481544010437, "grad_norm": 0.48488351702690125, "learning_rate": 1.908107984240882e-05, "loss": 0.0525, "step": 29160 }, { "epoch": 0.44770163456373263, "grad_norm": 0.43398091197013855, "learning_rate": 1.9079957834012847e-05, "loss": 0.0459, "step": 29170 }, { "epoch": 0.4478551147264216, "grad_norm": 0.4857255220413208, "learning_rate": 1.90788351740739e-05, "loss": 0.047, "step": 29180 }, { "epoch": 0.4480085948891106, "grad_norm": 0.4389459490776062, "learning_rate": 1.9077711862672535e-05, "loss": 0.052, "step": 29190 }, { "epoch": 0.44816207505179956, "grad_norm": 0.6077932119369507, "learning_rate": 1.9076587899889362e-05, "loss": 0.0424, "step": 29200 }, { "epoch": 0.4483155552144885, "grad_norm": 0.4378766417503357, "learning_rate": 1.9075463285805026e-05, "loss": 0.0425, "step": 29210 }, { "epoch": 0.4484690353771775, "grad_norm": 0.2980617880821228, "learning_rate": 1.907433802050023e-05, "loss": 0.0356, "step": 29220 }, { "epoch": 0.4486225155398665, "grad_norm": 0.6587315201759338, "learning_rate": 1.907321210405571e-05, "loss": 0.0449, "step": 29230 }, { "epoch": 0.44877599570255544, "grad_norm": 0.49394330382347107, "learning_rate": 1.9072085536552267e-05, "loss": 0.0598, "step": 29240 }, { "epoch": 0.4489294758652444, "grad_norm": 0.4542545974254608, "learning_rate": 1.9070958318070737e-05, "loss": 0.0566, "step": 29250 }, { "epoch": 0.4490829560279334, "grad_norm": 0.5142747759819031, "learning_rate": 1.9069830448692e-05, "loss": 0.044, "step": 29260 }, { "epoch": 0.44923643619062237, "grad_norm": 0.5703189969062805, "learning_rate": 1.906870192849699e-05, "loss": 0.0551, "step": 29270 }, { "epoch": 0.4493899163533113, "grad_norm": 0.43792524933815, "learning_rate": 1.906757275756669e-05, "loss": 0.0545, "step": 29280 }, { "epoch": 0.4495433965160003, "grad_norm": 0.5122897028923035, "learning_rate": 1.9066442935982123e-05, "loss": 0.0414, "step": 29290 }, { "epoch": 0.4496968766786893, "grad_norm": 0.47914156317710876, "learning_rate": 1.9065312463824354e-05, "loss": 0.0476, "step": 29300 }, { "epoch": 0.44985035684137825, "grad_norm": 0.6705445051193237, "learning_rate": 1.906418134117451e-05, "loss": 0.0515, "step": 29310 }, { "epoch": 0.4500038370040672, "grad_norm": 0.5288784503936768, "learning_rate": 1.906304956811375e-05, "loss": 0.0453, "step": 29320 }, { "epoch": 0.4501573171667562, "grad_norm": 0.6892178654670715, "learning_rate": 1.9061917144723288e-05, "loss": 0.0519, "step": 29330 }, { "epoch": 0.4503107973294452, "grad_norm": 0.6689967513084412, "learning_rate": 1.9060784071084386e-05, "loss": 0.0422, "step": 29340 }, { "epoch": 0.45046427749213414, "grad_norm": 0.2918134927749634, "learning_rate": 1.905965034727834e-05, "loss": 0.0494, "step": 29350 }, { "epoch": 0.4506177576548231, "grad_norm": 0.27367103099823, "learning_rate": 1.9058515973386514e-05, "loss": 0.0465, "step": 29360 }, { "epoch": 0.4507712378175121, "grad_norm": 0.4541763663291931, "learning_rate": 1.9057380949490293e-05, "loss": 0.0363, "step": 29370 }, { "epoch": 0.45092471798020106, "grad_norm": 0.42194849252700806, "learning_rate": 1.9056245275671134e-05, "loss": 0.0427, "step": 29380 }, { "epoch": 0.45107819814289, "grad_norm": 0.5273065567016602, "learning_rate": 1.9055108952010516e-05, "loss": 0.0506, "step": 29390 }, { "epoch": 0.45123167830557903, "grad_norm": 0.5279853343963623, "learning_rate": 1.905397197858999e-05, "loss": 0.0484, "step": 29400 }, { "epoch": 0.451385158468268, "grad_norm": 0.4185332953929901, "learning_rate": 1.9052834355491134e-05, "loss": 0.0529, "step": 29410 }, { "epoch": 0.45153863863095695, "grad_norm": 0.5071894526481628, "learning_rate": 1.9051696082795578e-05, "loss": 0.0403, "step": 29420 }, { "epoch": 0.4516921187936459, "grad_norm": 0.31496182084083557, "learning_rate": 1.9050557160585005e-05, "loss": 0.0513, "step": 29430 }, { "epoch": 0.4518455989563349, "grad_norm": 0.5359275341033936, "learning_rate": 1.9049417588941138e-05, "loss": 0.0559, "step": 29440 }, { "epoch": 0.45199907911902387, "grad_norm": 0.7824566960334778, "learning_rate": 1.9048277367945748e-05, "loss": 0.0567, "step": 29450 }, { "epoch": 0.45215255928171283, "grad_norm": 0.3561597764492035, "learning_rate": 1.904713649768065e-05, "loss": 0.0369, "step": 29460 }, { "epoch": 0.4523060394444018, "grad_norm": 0.4508640766143799, "learning_rate": 1.9045994978227713e-05, "loss": 0.0377, "step": 29470 }, { "epoch": 0.4524595196070908, "grad_norm": 0.4350992441177368, "learning_rate": 1.9044852809668843e-05, "loss": 0.0621, "step": 29480 }, { "epoch": 0.45261299976977976, "grad_norm": 0.6092047691345215, "learning_rate": 1.9043709992086004e-05, "loss": 0.0467, "step": 29490 }, { "epoch": 0.4527664799324687, "grad_norm": 0.47497713565826416, "learning_rate": 1.9042566525561196e-05, "loss": 0.0422, "step": 29500 }, { "epoch": 0.4529199600951577, "grad_norm": 0.4008376896381378, "learning_rate": 1.904142241017647e-05, "loss": 0.0443, "step": 29510 }, { "epoch": 0.4530734402578467, "grad_norm": 0.49661099910736084, "learning_rate": 1.9040277646013928e-05, "loss": 0.0433, "step": 29520 }, { "epoch": 0.45322692042053564, "grad_norm": 0.5665442943572998, "learning_rate": 1.9039132233155705e-05, "loss": 0.0494, "step": 29530 }, { "epoch": 0.4533804005832246, "grad_norm": 0.5540515184402466, "learning_rate": 1.9037986171683998e-05, "loss": 0.0461, "step": 29540 }, { "epoch": 0.4535338807459136, "grad_norm": 0.6131424307823181, "learning_rate": 1.903683946168104e-05, "loss": 0.0527, "step": 29550 }, { "epoch": 0.45368736090860257, "grad_norm": 0.38820680975914, "learning_rate": 1.903569210322912e-05, "loss": 0.054, "step": 29560 }, { "epoch": 0.4538408410712915, "grad_norm": 0.5101001858711243, "learning_rate": 1.9034544096410563e-05, "loss": 0.0531, "step": 29570 }, { "epoch": 0.45399432123398054, "grad_norm": 0.39812740683555603, "learning_rate": 1.903339544130775e-05, "loss": 0.0404, "step": 29580 }, { "epoch": 0.4541478013966695, "grad_norm": 0.5723540186882019, "learning_rate": 1.90322461380031e-05, "loss": 0.0407, "step": 29590 }, { "epoch": 0.45430128155935845, "grad_norm": 0.4745054841041565, "learning_rate": 1.9031096186579082e-05, "loss": 0.0411, "step": 29600 }, { "epoch": 0.4544547617220474, "grad_norm": 0.6675377488136292, "learning_rate": 1.902994558711822e-05, "loss": 0.0369, "step": 29610 }, { "epoch": 0.4546082418847364, "grad_norm": 0.45909664034843445, "learning_rate": 1.9028794339703065e-05, "loss": 0.0521, "step": 29620 }, { "epoch": 0.4547617220474254, "grad_norm": 0.5158364772796631, "learning_rate": 1.9027642444416234e-05, "loss": 0.0417, "step": 29630 }, { "epoch": 0.45491520221011433, "grad_norm": 0.6145325303077698, "learning_rate": 1.902648990134038e-05, "loss": 0.0463, "step": 29640 }, { "epoch": 0.4550686823728033, "grad_norm": 0.49596479535102844, "learning_rate": 1.902533671055821e-05, "loss": 0.0509, "step": 29650 }, { "epoch": 0.4552221625354923, "grad_norm": 0.515636682510376, "learning_rate": 1.9024182872152462e-05, "loss": 0.0497, "step": 29660 }, { "epoch": 0.45537564269818126, "grad_norm": 0.47215554118156433, "learning_rate": 1.902302838620594e-05, "loss": 0.0404, "step": 29670 }, { "epoch": 0.4555291228608702, "grad_norm": 0.4028223156929016, "learning_rate": 1.9021873252801484e-05, "loss": 0.046, "step": 29680 }, { "epoch": 0.45568260302355923, "grad_norm": 0.34708473086357117, "learning_rate": 1.902071747202198e-05, "loss": 0.0434, "step": 29690 }, { "epoch": 0.4558360831862482, "grad_norm": 0.6138525605201721, "learning_rate": 1.901956104395036e-05, "loss": 0.0449, "step": 29700 }, { "epoch": 0.45598956334893714, "grad_norm": 0.5674062967300415, "learning_rate": 1.901840396866961e-05, "loss": 0.0509, "step": 29710 }, { "epoch": 0.4561430435116261, "grad_norm": 0.49423879384994507, "learning_rate": 1.9017246246262755e-05, "loss": 0.0475, "step": 29720 }, { "epoch": 0.4562965236743151, "grad_norm": 0.5078328847885132, "learning_rate": 1.901608787681287e-05, "loss": 0.0488, "step": 29730 }, { "epoch": 0.45645000383700407, "grad_norm": 0.4304361641407013, "learning_rate": 1.901492886040307e-05, "loss": 0.0359, "step": 29740 }, { "epoch": 0.456603483999693, "grad_norm": 0.331033855676651, "learning_rate": 1.9013769197116533e-05, "loss": 0.0509, "step": 29750 }, { "epoch": 0.45675696416238204, "grad_norm": 0.4517119526863098, "learning_rate": 1.901260888703646e-05, "loss": 0.0459, "step": 29760 }, { "epoch": 0.456910444325071, "grad_norm": 0.44563692808151245, "learning_rate": 1.9011447930246115e-05, "loss": 0.0517, "step": 29770 }, { "epoch": 0.45706392448775995, "grad_norm": 0.4976869225502014, "learning_rate": 1.9010286326828803e-05, "loss": 0.0506, "step": 29780 }, { "epoch": 0.4572174046504489, "grad_norm": 0.5859265923500061, "learning_rate": 1.900912407686788e-05, "loss": 0.0596, "step": 29790 }, { "epoch": 0.4573708848131379, "grad_norm": 0.38617977499961853, "learning_rate": 1.900796118044674e-05, "loss": 0.0465, "step": 29800 }, { "epoch": 0.4575243649758269, "grad_norm": 0.44060084223747253, "learning_rate": 1.900679763764883e-05, "loss": 0.0356, "step": 29810 }, { "epoch": 0.45767784513851584, "grad_norm": 0.6055809855461121, "learning_rate": 1.900563344855764e-05, "loss": 0.048, "step": 29820 }, { "epoch": 0.4578313253012048, "grad_norm": 0.690523624420166, "learning_rate": 1.900446861325671e-05, "loss": 0.0525, "step": 29830 }, { "epoch": 0.4579848054638938, "grad_norm": 0.5737436413764954, "learning_rate": 1.9003303131829622e-05, "loss": 0.0448, "step": 29840 }, { "epoch": 0.45813828562658276, "grad_norm": 0.38476723432540894, "learning_rate": 1.9002137004360007e-05, "loss": 0.054, "step": 29850 }, { "epoch": 0.4582917657892717, "grad_norm": 0.5803274512290955, "learning_rate": 1.9000970230931544e-05, "loss": 0.0517, "step": 29860 }, { "epoch": 0.45844524595196073, "grad_norm": 0.31390658020973206, "learning_rate": 1.8999802811627952e-05, "loss": 0.0451, "step": 29870 }, { "epoch": 0.4585987261146497, "grad_norm": 0.44973140954971313, "learning_rate": 1.8998634746533004e-05, "loss": 0.0439, "step": 29880 }, { "epoch": 0.45875220627733865, "grad_norm": 0.6716129183769226, "learning_rate": 1.8997466035730515e-05, "loss": 0.0416, "step": 29890 }, { "epoch": 0.4589056864400276, "grad_norm": 0.5979340076446533, "learning_rate": 1.8996296679304346e-05, "loss": 0.0481, "step": 29900 }, { "epoch": 0.4590591666027166, "grad_norm": 0.43601521849632263, "learning_rate": 1.899512667733841e-05, "loss": 0.0399, "step": 29910 }, { "epoch": 0.4592126467654056, "grad_norm": 0.3727093040943146, "learning_rate": 1.8993956029916653e-05, "loss": 0.0506, "step": 29920 }, { "epoch": 0.45936612692809453, "grad_norm": 0.20047356188297272, "learning_rate": 1.8992784737123085e-05, "loss": 0.0435, "step": 29930 }, { "epoch": 0.45951960709078354, "grad_norm": 0.5066564083099365, "learning_rate": 1.899161279904175e-05, "loss": 0.0441, "step": 29940 }, { "epoch": 0.4596730872534725, "grad_norm": 0.5722052454948425, "learning_rate": 1.8990440215756744e-05, "loss": 0.0356, "step": 29950 }, { "epoch": 0.45982656741616146, "grad_norm": 0.43479838967323303, "learning_rate": 1.89892669873522e-05, "loss": 0.0423, "step": 29960 }, { "epoch": 0.4599800475788504, "grad_norm": 0.5057786703109741, "learning_rate": 1.898809311391231e-05, "loss": 0.0605, "step": 29970 }, { "epoch": 0.4601335277415394, "grad_norm": 0.3763933479785919, "learning_rate": 1.8986918595521308e-05, "loss": 0.0457, "step": 29980 }, { "epoch": 0.4602870079042284, "grad_norm": 0.710395336151123, "learning_rate": 1.898574343226347e-05, "loss": 0.0467, "step": 29990 }, { "epoch": 0.46044048806691734, "grad_norm": 0.5027486681938171, "learning_rate": 1.8984567624223123e-05, "loss": 0.0442, "step": 30000 }, { "epoch": 0.4605939682296063, "grad_norm": 0.3894475996494293, "learning_rate": 1.8983391171484636e-05, "loss": 0.0527, "step": 30010 }, { "epoch": 0.4607474483922953, "grad_norm": 0.624396800994873, "learning_rate": 1.898221407413243e-05, "loss": 0.058, "step": 30020 }, { "epoch": 0.46090092855498427, "grad_norm": 0.4645821750164032, "learning_rate": 1.898103633225097e-05, "loss": 0.0441, "step": 30030 }, { "epoch": 0.4610544087176732, "grad_norm": 0.43653377890586853, "learning_rate": 1.897985794592476e-05, "loss": 0.0392, "step": 30040 }, { "epoch": 0.46120788888036224, "grad_norm": 0.4133630096912384, "learning_rate": 1.8978678915238356e-05, "loss": 0.036, "step": 30050 }, { "epoch": 0.4613613690430512, "grad_norm": 0.5175964832305908, "learning_rate": 1.897749924027637e-05, "loss": 0.0461, "step": 30060 }, { "epoch": 0.46151484920574015, "grad_norm": 0.35783305764198303, "learning_rate": 1.8976318921123443e-05, "loss": 0.0464, "step": 30070 }, { "epoch": 0.4616683293684291, "grad_norm": 0.4929831922054291, "learning_rate": 1.8975137957864276e-05, "loss": 0.0463, "step": 30080 }, { "epoch": 0.4618218095311181, "grad_norm": 0.587911069393158, "learning_rate": 1.8973956350583608e-05, "loss": 0.0413, "step": 30090 }, { "epoch": 0.4619752896938071, "grad_norm": 0.4045490026473999, "learning_rate": 1.897277409936622e-05, "loss": 0.0633, "step": 30100 }, { "epoch": 0.46212876985649604, "grad_norm": 0.4300389289855957, "learning_rate": 1.8971591204296956e-05, "loss": 0.045, "step": 30110 }, { "epoch": 0.462282250019185, "grad_norm": 0.4015170633792877, "learning_rate": 1.897040766546069e-05, "loss": 0.0522, "step": 30120 }, { "epoch": 0.462435730181874, "grad_norm": 0.487481027841568, "learning_rate": 1.8969223482942354e-05, "loss": 0.0419, "step": 30130 }, { "epoch": 0.46258921034456296, "grad_norm": 0.38795554637908936, "learning_rate": 1.8968038656826914e-05, "loss": 0.0388, "step": 30140 }, { "epoch": 0.4627426905072519, "grad_norm": 0.5586264133453369, "learning_rate": 1.896685318719939e-05, "loss": 0.0572, "step": 30150 }, { "epoch": 0.46289617066994093, "grad_norm": 0.5464595556259155, "learning_rate": 1.8965667074144846e-05, "loss": 0.0511, "step": 30160 }, { "epoch": 0.4630496508326299, "grad_norm": 0.520212709903717, "learning_rate": 1.8964480317748398e-05, "loss": 0.0403, "step": 30170 }, { "epoch": 0.46320313099531885, "grad_norm": 0.46601203083992004, "learning_rate": 1.8963292918095196e-05, "loss": 0.0403, "step": 30180 }, { "epoch": 0.4633566111580078, "grad_norm": 0.4529013931751251, "learning_rate": 1.896210487527045e-05, "loss": 0.0465, "step": 30190 }, { "epoch": 0.4635100913206968, "grad_norm": 0.4186548888683319, "learning_rate": 1.8960916189359406e-05, "loss": 0.0504, "step": 30200 }, { "epoch": 0.4636635714833858, "grad_norm": 0.708375096321106, "learning_rate": 1.895972686044736e-05, "loss": 0.0432, "step": 30210 }, { "epoch": 0.46381705164607473, "grad_norm": 0.5873250961303711, "learning_rate": 1.8958536888619653e-05, "loss": 0.0527, "step": 30220 }, { "epoch": 0.46397053180876374, "grad_norm": 0.571479320526123, "learning_rate": 1.895734627396167e-05, "loss": 0.045, "step": 30230 }, { "epoch": 0.4641240119714527, "grad_norm": 0.5038644075393677, "learning_rate": 1.8956155016558857e-05, "loss": 0.0508, "step": 30240 }, { "epoch": 0.46427749213414166, "grad_norm": 0.46690937876701355, "learning_rate": 1.895496311649668e-05, "loss": 0.043, "step": 30250 }, { "epoch": 0.4644309722968306, "grad_norm": 0.39159542322158813, "learning_rate": 1.895377057386067e-05, "loss": 0.0472, "step": 30260 }, { "epoch": 0.4645844524595196, "grad_norm": 0.684330940246582, "learning_rate": 1.89525773887364e-05, "loss": 0.0473, "step": 30270 }, { "epoch": 0.4647379326222086, "grad_norm": 0.49244868755340576, "learning_rate": 1.895138356120949e-05, "loss": 0.045, "step": 30280 }, { "epoch": 0.46489141278489754, "grad_norm": 0.3801514804363251, "learning_rate": 1.8950189091365602e-05, "loss": 0.0471, "step": 30290 }, { "epoch": 0.4650448929475865, "grad_norm": 0.32270872592926025, "learning_rate": 1.8948993979290446e-05, "loss": 0.0375, "step": 30300 }, { "epoch": 0.4651983731102755, "grad_norm": 0.8144887089729309, "learning_rate": 1.894779822506978e-05, "loss": 0.0595, "step": 30310 }, { "epoch": 0.46535185327296447, "grad_norm": 0.3569713234901428, "learning_rate": 1.8946601828789408e-05, "loss": 0.0415, "step": 30320 }, { "epoch": 0.4655053334356534, "grad_norm": 0.5522640347480774, "learning_rate": 1.8945404790535176e-05, "loss": 0.0462, "step": 30330 }, { "epoch": 0.46565881359834244, "grad_norm": 0.6025185585021973, "learning_rate": 1.8944207110392984e-05, "loss": 0.0544, "step": 30340 }, { "epoch": 0.4658122937610314, "grad_norm": 0.44226300716400146, "learning_rate": 1.8943008788448767e-05, "loss": 0.0492, "step": 30350 }, { "epoch": 0.46596577392372035, "grad_norm": 0.369476318359375, "learning_rate": 1.8941809824788514e-05, "loss": 0.0478, "step": 30360 }, { "epoch": 0.4661192540864093, "grad_norm": 0.4925704002380371, "learning_rate": 1.8940610219498258e-05, "loss": 0.0471, "step": 30370 }, { "epoch": 0.4662727342490983, "grad_norm": 0.5232747197151184, "learning_rate": 1.893940997266408e-05, "loss": 0.0383, "step": 30380 }, { "epoch": 0.4664262144117873, "grad_norm": 0.5315121412277222, "learning_rate": 1.8938209084372104e-05, "loss": 0.0512, "step": 30390 }, { "epoch": 0.46657969457447623, "grad_norm": 0.45225727558135986, "learning_rate": 1.89370075547085e-05, "loss": 0.0448, "step": 30400 }, { "epoch": 0.46673317473716525, "grad_norm": 0.46386972069740295, "learning_rate": 1.8935805383759484e-05, "loss": 0.0388, "step": 30410 }, { "epoch": 0.4668866548998542, "grad_norm": 0.6541358232498169, "learning_rate": 1.8934602571611323e-05, "loss": 0.0469, "step": 30420 }, { "epoch": 0.46704013506254316, "grad_norm": 0.48064497113227844, "learning_rate": 1.893339911835032e-05, "loss": 0.0506, "step": 30430 }, { "epoch": 0.4671936152252321, "grad_norm": 0.4005785882472992, "learning_rate": 1.8932195024062842e-05, "loss": 0.0475, "step": 30440 }, { "epoch": 0.46734709538792113, "grad_norm": 0.5445104241371155, "learning_rate": 1.893099028883528e-05, "loss": 0.0479, "step": 30450 }, { "epoch": 0.4675005755506101, "grad_norm": 0.47972139716148376, "learning_rate": 1.892978491275408e-05, "loss": 0.0449, "step": 30460 }, { "epoch": 0.46765405571329904, "grad_norm": 0.3562178611755371, "learning_rate": 1.892857889590574e-05, "loss": 0.0394, "step": 30470 }, { "epoch": 0.467807535875988, "grad_norm": 0.5899629592895508, "learning_rate": 1.89273722383768e-05, "loss": 0.043, "step": 30480 }, { "epoch": 0.467961016038677, "grad_norm": 0.39747828245162964, "learning_rate": 1.8926164940253846e-05, "loss": 0.0447, "step": 30490 }, { "epoch": 0.46811449620136597, "grad_norm": 0.5846978425979614, "learning_rate": 1.8924957001623503e-05, "loss": 0.0436, "step": 30500 }, { "epoch": 0.4682679763640549, "grad_norm": 0.5294001698493958, "learning_rate": 1.8923748422572453e-05, "loss": 0.0546, "step": 30510 }, { "epoch": 0.46842145652674394, "grad_norm": 0.4156314730644226, "learning_rate": 1.8922539203187412e-05, "loss": 0.0385, "step": 30520 }, { "epoch": 0.4685749366894329, "grad_norm": 0.48257243633270264, "learning_rate": 1.8921329343555158e-05, "loss": 0.0497, "step": 30530 }, { "epoch": 0.46872841685212185, "grad_norm": 0.35665932297706604, "learning_rate": 1.8920118843762503e-05, "loss": 0.0349, "step": 30540 }, { "epoch": 0.4688818970148108, "grad_norm": 0.35993698239326477, "learning_rate": 1.891890770389631e-05, "loss": 0.0503, "step": 30550 }, { "epoch": 0.4690353771774998, "grad_norm": 0.4453459084033966, "learning_rate": 1.891769592404348e-05, "loss": 0.0427, "step": 30560 }, { "epoch": 0.4691888573401888, "grad_norm": 0.4977068305015564, "learning_rate": 1.8916483504290966e-05, "loss": 0.0504, "step": 30570 }, { "epoch": 0.46934233750287774, "grad_norm": 0.3730800151824951, "learning_rate": 1.891527044472577e-05, "loss": 0.0452, "step": 30580 }, { "epoch": 0.46949581766556675, "grad_norm": 0.4284704327583313, "learning_rate": 1.891405674543494e-05, "loss": 0.0459, "step": 30590 }, { "epoch": 0.4696492978282557, "grad_norm": 0.5024580955505371, "learning_rate": 1.8912842406505556e-05, "loss": 0.0424, "step": 30600 }, { "epoch": 0.46980277799094466, "grad_norm": 0.4755384624004364, "learning_rate": 1.8911627428024763e-05, "loss": 0.0627, "step": 30610 }, { "epoch": 0.4699562581536336, "grad_norm": 0.420002281665802, "learning_rate": 1.891041181007974e-05, "loss": 0.0391, "step": 30620 }, { "epoch": 0.47010973831632263, "grad_norm": 0.6431061029434204, "learning_rate": 1.8909195552757714e-05, "loss": 0.0418, "step": 30630 }, { "epoch": 0.4702632184790116, "grad_norm": 0.3991106450557709, "learning_rate": 1.8907978656145967e-05, "loss": 0.0412, "step": 30640 }, { "epoch": 0.47041669864170055, "grad_norm": 0.42811286449432373, "learning_rate": 1.8906761120331808e-05, "loss": 0.0534, "step": 30650 }, { "epoch": 0.4705701788043895, "grad_norm": 0.7655143141746521, "learning_rate": 1.8905542945402606e-05, "loss": 0.0431, "step": 30660 }, { "epoch": 0.4707236589670785, "grad_norm": 0.46485981345176697, "learning_rate": 1.8904324131445775e-05, "loss": 0.0495, "step": 30670 }, { "epoch": 0.4708771391297675, "grad_norm": 0.43819189071655273, "learning_rate": 1.890310467854877e-05, "loss": 0.0518, "step": 30680 }, { "epoch": 0.47103061929245643, "grad_norm": 0.6875366568565369, "learning_rate": 1.8901884586799095e-05, "loss": 0.056, "step": 30690 }, { "epoch": 0.47118409945514544, "grad_norm": 0.4159853160381317, "learning_rate": 1.89006638562843e-05, "loss": 0.043, "step": 30700 }, { "epoch": 0.4713375796178344, "grad_norm": 0.39797189831733704, "learning_rate": 1.889944248709198e-05, "loss": 0.0332, "step": 30710 }, { "epoch": 0.47149105978052336, "grad_norm": 0.3239367604255676, "learning_rate": 1.8898220479309778e-05, "loss": 0.0365, "step": 30720 }, { "epoch": 0.4716445399432123, "grad_norm": 0.49593475461006165, "learning_rate": 1.8896997833025374e-05, "loss": 0.0415, "step": 30730 }, { "epoch": 0.47179802010590133, "grad_norm": 0.4117472469806671, "learning_rate": 1.8895774548326506e-05, "loss": 0.0419, "step": 30740 }, { "epoch": 0.4719515002685903, "grad_norm": 0.5849118232727051, "learning_rate": 1.889455062530095e-05, "loss": 0.0482, "step": 30750 }, { "epoch": 0.47210498043127924, "grad_norm": 0.34214088320732117, "learning_rate": 1.8893326064036526e-05, "loss": 0.0445, "step": 30760 }, { "epoch": 0.47225846059396825, "grad_norm": 0.5152453780174255, "learning_rate": 1.8892100864621114e-05, "loss": 0.0488, "step": 30770 }, { "epoch": 0.4724119407566572, "grad_norm": 0.318810373544693, "learning_rate": 1.8890875027142623e-05, "loss": 0.0503, "step": 30780 }, { "epoch": 0.47256542091934617, "grad_norm": 0.4418718218803406, "learning_rate": 1.8889648551689012e-05, "loss": 0.0441, "step": 30790 }, { "epoch": 0.4727189010820351, "grad_norm": 0.5294760465621948, "learning_rate": 1.8888421438348296e-05, "loss": 0.0527, "step": 30800 }, { "epoch": 0.47287238124472414, "grad_norm": 0.33170244097709656, "learning_rate": 1.8887193687208518e-05, "loss": 0.0348, "step": 30810 }, { "epoch": 0.4730258614074131, "grad_norm": 0.5450940132141113, "learning_rate": 1.8885965298357787e-05, "loss": 0.0537, "step": 30820 }, { "epoch": 0.47317934157010205, "grad_norm": 0.38680312037467957, "learning_rate": 1.8884736271884238e-05, "loss": 0.0447, "step": 30830 }, { "epoch": 0.473332821732791, "grad_norm": 0.350376695394516, "learning_rate": 1.888350660787607e-05, "loss": 0.0431, "step": 30840 }, { "epoch": 0.47348630189548, "grad_norm": 0.4900617003440857, "learning_rate": 1.8882276306421516e-05, "loss": 0.0483, "step": 30850 }, { "epoch": 0.473639782058169, "grad_norm": 0.406699001789093, "learning_rate": 1.888104536760885e-05, "loss": 0.0394, "step": 30860 }, { "epoch": 0.47379326222085794, "grad_norm": 0.45345523953437805, "learning_rate": 1.887981379152641e-05, "loss": 0.0542, "step": 30870 }, { "epoch": 0.47394674238354695, "grad_norm": 0.3966761827468872, "learning_rate": 1.8878581578262567e-05, "loss": 0.0449, "step": 30880 }, { "epoch": 0.4741002225462359, "grad_norm": 0.5177494883537292, "learning_rate": 1.8877348727905736e-05, "loss": 0.0465, "step": 30890 }, { "epoch": 0.47425370270892486, "grad_norm": 0.4614032208919525, "learning_rate": 1.8876115240544385e-05, "loss": 0.0668, "step": 30900 }, { "epoch": 0.4744071828716138, "grad_norm": 0.5287589430809021, "learning_rate": 1.8874881116267018e-05, "loss": 0.0464, "step": 30910 }, { "epoch": 0.47456066303430283, "grad_norm": 0.6124269366264343, "learning_rate": 1.88736463551622e-05, "loss": 0.0531, "step": 30920 }, { "epoch": 0.4747141431969918, "grad_norm": 0.5065978765487671, "learning_rate": 1.8872410957318527e-05, "loss": 0.0503, "step": 30930 }, { "epoch": 0.47486762335968075, "grad_norm": 0.41334471106529236, "learning_rate": 1.887117492282465e-05, "loss": 0.0362, "step": 30940 }, { "epoch": 0.47502110352236976, "grad_norm": 0.49284934997558594, "learning_rate": 1.886993825176926e-05, "loss": 0.0428, "step": 30950 }, { "epoch": 0.4751745836850587, "grad_norm": 0.49278852343559265, "learning_rate": 1.8868700944241096e-05, "loss": 0.051, "step": 30960 }, { "epoch": 0.4753280638477477, "grad_norm": 0.4803861379623413, "learning_rate": 1.8867463000328944e-05, "loss": 0.0391, "step": 30970 }, { "epoch": 0.47548154401043663, "grad_norm": 0.4133554995059967, "learning_rate": 1.8866224420121628e-05, "loss": 0.0483, "step": 30980 }, { "epoch": 0.47563502417312564, "grad_norm": 0.3087526559829712, "learning_rate": 1.886498520370803e-05, "loss": 0.0383, "step": 30990 }, { "epoch": 0.4757885043358146, "grad_norm": 0.6226566433906555, "learning_rate": 1.886374535117707e-05, "loss": 0.0566, "step": 31000 }, { "epoch": 0.47594198449850356, "grad_norm": 0.4849223494529724, "learning_rate": 1.886250486261772e-05, "loss": 0.0349, "step": 31010 }, { "epoch": 0.4760954646611925, "grad_norm": 0.49144649505615234, "learning_rate": 1.886126373811898e-05, "loss": 0.0361, "step": 31020 }, { "epoch": 0.4762489448238815, "grad_norm": 0.517780601978302, "learning_rate": 1.8860021977769918e-05, "loss": 0.0451, "step": 31030 }, { "epoch": 0.4764024249865705, "grad_norm": 0.4011479318141937, "learning_rate": 1.8858779581659638e-05, "loss": 0.0423, "step": 31040 }, { "epoch": 0.47655590514925944, "grad_norm": 0.36878177523612976, "learning_rate": 1.8857536549877285e-05, "loss": 0.0417, "step": 31050 }, { "epoch": 0.47670938531194845, "grad_norm": 0.4800419807434082, "learning_rate": 1.885629288251206e-05, "loss": 0.0411, "step": 31060 }, { "epoch": 0.4768628654746374, "grad_norm": 0.4420831799507141, "learning_rate": 1.8855048579653197e-05, "loss": 0.0466, "step": 31070 }, { "epoch": 0.47701634563732637, "grad_norm": 0.3916960060596466, "learning_rate": 1.8853803641389987e-05, "loss": 0.0353, "step": 31080 }, { "epoch": 0.4771698258000153, "grad_norm": 0.5362246632575989, "learning_rate": 1.8852558067811762e-05, "loss": 0.0562, "step": 31090 }, { "epoch": 0.47732330596270434, "grad_norm": 0.5228669047355652, "learning_rate": 1.8851311859007896e-05, "loss": 0.0432, "step": 31100 }, { "epoch": 0.4774767861253933, "grad_norm": 0.5368010997772217, "learning_rate": 1.8850065015067817e-05, "loss": 0.044, "step": 31110 }, { "epoch": 0.47763026628808225, "grad_norm": 0.34335801005363464, "learning_rate": 1.8848817536080987e-05, "loss": 0.0463, "step": 31120 }, { "epoch": 0.47778374645077126, "grad_norm": 0.4726921617984772, "learning_rate": 1.884756942213693e-05, "loss": 0.0438, "step": 31130 }, { "epoch": 0.4779372266134602, "grad_norm": 0.5032730102539062, "learning_rate": 1.8846320673325196e-05, "loss": 0.0522, "step": 31140 }, { "epoch": 0.4780907067761492, "grad_norm": 0.5125794410705566, "learning_rate": 1.8845071289735396e-05, "loss": 0.04, "step": 31150 }, { "epoch": 0.47824418693883813, "grad_norm": 0.39613622426986694, "learning_rate": 1.8843821271457185e-05, "loss": 0.0403, "step": 31160 }, { "epoch": 0.47839766710152715, "grad_norm": 0.4813574552536011, "learning_rate": 1.884257061858025e-05, "loss": 0.0468, "step": 31170 }, { "epoch": 0.4785511472642161, "grad_norm": 0.48313093185424805, "learning_rate": 1.8841319331194335e-05, "loss": 0.0442, "step": 31180 }, { "epoch": 0.47870462742690506, "grad_norm": 0.5976888537406921, "learning_rate": 1.8840067409389237e-05, "loss": 0.044, "step": 31190 }, { "epoch": 0.478858107589594, "grad_norm": 0.4331175982952118, "learning_rate": 1.8838814853254775e-05, "loss": 0.0366, "step": 31200 }, { "epoch": 0.47901158775228303, "grad_norm": 0.434250146150589, "learning_rate": 1.883756166288084e-05, "loss": 0.0387, "step": 31210 }, { "epoch": 0.479165067914972, "grad_norm": 0.35718783736228943, "learning_rate": 1.8836307838357348e-05, "loss": 0.0374, "step": 31220 }, { "epoch": 0.47931854807766094, "grad_norm": 0.6125292181968689, "learning_rate": 1.883505337977427e-05, "loss": 0.0423, "step": 31230 }, { "epoch": 0.47947202824034996, "grad_norm": 0.6139647364616394, "learning_rate": 1.883379828722163e-05, "loss": 0.047, "step": 31240 }, { "epoch": 0.4796255084030389, "grad_norm": 0.41693443059921265, "learning_rate": 1.8832542560789474e-05, "loss": 0.039, "step": 31250 }, { "epoch": 0.47977898856572787, "grad_norm": 0.5812502503395081, "learning_rate": 1.8831286200567914e-05, "loss": 0.0494, "step": 31260 }, { "epoch": 0.47993246872841683, "grad_norm": 0.5475037097930908, "learning_rate": 1.883002920664711e-05, "loss": 0.0381, "step": 31270 }, { "epoch": 0.48008594889110584, "grad_norm": 0.3568776547908783, "learning_rate": 1.882877157911725e-05, "loss": 0.0478, "step": 31280 }, { "epoch": 0.4802394290537948, "grad_norm": 0.6064499616622925, "learning_rate": 1.8827513318068574e-05, "loss": 0.0485, "step": 31290 }, { "epoch": 0.48039290921648375, "grad_norm": 0.4893823564052582, "learning_rate": 1.8826254423591382e-05, "loss": 0.0436, "step": 31300 }, { "epoch": 0.48054638937917277, "grad_norm": 0.5098892450332642, "learning_rate": 1.8824994895775997e-05, "loss": 0.0515, "step": 31310 }, { "epoch": 0.4806998695418617, "grad_norm": 0.4062361419200897, "learning_rate": 1.88237347347128e-05, "loss": 0.0329, "step": 31320 }, { "epoch": 0.4808533497045507, "grad_norm": 0.49622705578804016, "learning_rate": 1.8822473940492216e-05, "loss": 0.0502, "step": 31330 }, { "epoch": 0.48100682986723964, "grad_norm": 0.4682675004005432, "learning_rate": 1.8821212513204718e-05, "loss": 0.043, "step": 31340 }, { "epoch": 0.48116031002992865, "grad_norm": 0.4662316143512726, "learning_rate": 1.8819950452940815e-05, "loss": 0.0441, "step": 31350 }, { "epoch": 0.4813137901926176, "grad_norm": 0.4876142144203186, "learning_rate": 1.8818687759791072e-05, "loss": 0.0435, "step": 31360 }, { "epoch": 0.48146727035530656, "grad_norm": 0.30994951725006104, "learning_rate": 1.881742443384609e-05, "loss": 0.0498, "step": 31370 }, { "epoch": 0.4816207505179955, "grad_norm": 0.3735625147819519, "learning_rate": 1.881616047519653e-05, "loss": 0.0415, "step": 31380 }, { "epoch": 0.48177423068068453, "grad_norm": 0.4118516445159912, "learning_rate": 1.8814895883933082e-05, "loss": 0.044, "step": 31390 }, { "epoch": 0.4819277108433735, "grad_norm": 0.35604381561279297, "learning_rate": 1.881363066014649e-05, "loss": 0.0358, "step": 31400 }, { "epoch": 0.48208119100606245, "grad_norm": 0.5656758546829224, "learning_rate": 1.881236480392754e-05, "loss": 0.0448, "step": 31410 }, { "epoch": 0.48223467116875146, "grad_norm": 0.31817910075187683, "learning_rate": 1.8811098315367068e-05, "loss": 0.0343, "step": 31420 }, { "epoch": 0.4823881513314404, "grad_norm": 0.3971094787120819, "learning_rate": 1.8809831194555945e-05, "loss": 0.0448, "step": 31430 }, { "epoch": 0.4825416314941294, "grad_norm": 0.4989999532699585, "learning_rate": 1.8808563441585103e-05, "loss": 0.0587, "step": 31440 }, { "epoch": 0.48269511165681833, "grad_norm": 0.4087037146091461, "learning_rate": 1.880729505654551e-05, "loss": 0.0393, "step": 31450 }, { "epoch": 0.48284859181950734, "grad_norm": 0.34493520855903625, "learning_rate": 1.8806026039528175e-05, "loss": 0.054, "step": 31460 }, { "epoch": 0.4830020719821963, "grad_norm": 0.33485662937164307, "learning_rate": 1.8804756390624165e-05, "loss": 0.049, "step": 31470 }, { "epoch": 0.48315555214488526, "grad_norm": 0.6714707612991333, "learning_rate": 1.8803486109924577e-05, "loss": 0.0443, "step": 31480 }, { "epoch": 0.48330903230757427, "grad_norm": 0.5031631588935852, "learning_rate": 1.8802215197520574e-05, "loss": 0.0529, "step": 31490 }, { "epoch": 0.48346251247026323, "grad_norm": 0.4286509156227112, "learning_rate": 1.8800943653503336e-05, "loss": 0.0565, "step": 31500 }, { "epoch": 0.4836159926329522, "grad_norm": 0.4183908700942993, "learning_rate": 1.8799671477964114e-05, "loss": 0.0403, "step": 31510 }, { "epoch": 0.48376947279564114, "grad_norm": 0.5248700380325317, "learning_rate": 1.8798398670994192e-05, "loss": 0.0423, "step": 31520 }, { "epoch": 0.48392295295833015, "grad_norm": 0.44125619530677795, "learning_rate": 1.8797125232684906e-05, "loss": 0.0391, "step": 31530 }, { "epoch": 0.4840764331210191, "grad_norm": 0.38139405846595764, "learning_rate": 1.8795851163127626e-05, "loss": 0.0342, "step": 31540 }, { "epoch": 0.48422991328370807, "grad_norm": 0.6972782611846924, "learning_rate": 1.8794576462413777e-05, "loss": 0.0496, "step": 31550 }, { "epoch": 0.484383393446397, "grad_norm": 0.4615042209625244, "learning_rate": 1.879330113063483e-05, "loss": 0.0405, "step": 31560 }, { "epoch": 0.48453687360908604, "grad_norm": 0.5630578994750977, "learning_rate": 1.879202516788229e-05, "loss": 0.0495, "step": 31570 }, { "epoch": 0.484690353771775, "grad_norm": 0.37294384837150574, "learning_rate": 1.8790748574247726e-05, "loss": 0.0345, "step": 31580 }, { "epoch": 0.48484383393446395, "grad_norm": 0.522304892539978, "learning_rate": 1.878947134982273e-05, "loss": 0.0449, "step": 31590 }, { "epoch": 0.48499731409715297, "grad_norm": 0.39141038060188293, "learning_rate": 1.8788193494698963e-05, "loss": 0.0379, "step": 31600 }, { "epoch": 0.4851507942598419, "grad_norm": 0.42703118920326233, "learning_rate": 1.8786915008968106e-05, "loss": 0.0378, "step": 31610 }, { "epoch": 0.4853042744225309, "grad_norm": 0.3782655596733093, "learning_rate": 1.878563589272191e-05, "loss": 0.047, "step": 31620 }, { "epoch": 0.48545775458521984, "grad_norm": 0.3687340021133423, "learning_rate": 1.8784356146052155e-05, "loss": 0.042, "step": 31630 }, { "epoch": 0.48561123474790885, "grad_norm": 0.4953387975692749, "learning_rate": 1.8783075769050664e-05, "loss": 0.0425, "step": 31640 }, { "epoch": 0.4857647149105978, "grad_norm": 0.3616780936717987, "learning_rate": 1.878179476180932e-05, "loss": 0.0432, "step": 31650 }, { "epoch": 0.48591819507328676, "grad_norm": 0.43557459115982056, "learning_rate": 1.8780513124420046e-05, "loss": 0.0466, "step": 31660 }, { "epoch": 0.4860716752359758, "grad_norm": 0.3752641975879669, "learning_rate": 1.8779230856974797e-05, "loss": 0.039, "step": 31670 }, { "epoch": 0.48622515539866473, "grad_norm": 0.3134772777557373, "learning_rate": 1.877794795956559e-05, "loss": 0.0535, "step": 31680 }, { "epoch": 0.4863786355613537, "grad_norm": 0.3832976520061493, "learning_rate": 1.877666443228448e-05, "loss": 0.0305, "step": 31690 }, { "epoch": 0.48653211572404265, "grad_norm": 0.46982505917549133, "learning_rate": 1.8775380275223568e-05, "loss": 0.0395, "step": 31700 }, { "epoch": 0.48668559588673166, "grad_norm": 0.47471076250076294, "learning_rate": 1.8774095488475e-05, "loss": 0.0392, "step": 31710 }, { "epoch": 0.4868390760494206, "grad_norm": 0.4882543087005615, "learning_rate": 1.877281007213097e-05, "loss": 0.0457, "step": 31720 }, { "epoch": 0.4869925562121096, "grad_norm": 0.3498709797859192, "learning_rate": 1.8771524026283707e-05, "loss": 0.0404, "step": 31730 }, { "epoch": 0.48714603637479853, "grad_norm": 0.3982790410518646, "learning_rate": 1.87702373510255e-05, "loss": 0.0424, "step": 31740 }, { "epoch": 0.48729951653748754, "grad_norm": 0.3133179545402527, "learning_rate": 1.8768950046448674e-05, "loss": 0.0358, "step": 31750 }, { "epoch": 0.4874529967001765, "grad_norm": 0.46207770705223083, "learning_rate": 1.87676621126456e-05, "loss": 0.0384, "step": 31760 }, { "epoch": 0.48760647686286546, "grad_norm": 0.5804077386856079, "learning_rate": 1.8766373549708696e-05, "loss": 0.0507, "step": 31770 }, { "epoch": 0.48775995702555447, "grad_norm": 0.42658981680870056, "learning_rate": 1.8765084357730427e-05, "loss": 0.051, "step": 31780 }, { "epoch": 0.4879134371882434, "grad_norm": 0.47004029154777527, "learning_rate": 1.876379453680329e-05, "loss": 0.0487, "step": 31790 }, { "epoch": 0.4880669173509324, "grad_norm": 0.4231826961040497, "learning_rate": 1.876250408701985e-05, "loss": 0.0468, "step": 31800 }, { "epoch": 0.48822039751362134, "grad_norm": 0.44877347350120544, "learning_rate": 1.87612130084727e-05, "loss": 0.0425, "step": 31810 }, { "epoch": 0.48837387767631035, "grad_norm": 0.5026544332504272, "learning_rate": 1.8759921301254485e-05, "loss": 0.0442, "step": 31820 }, { "epoch": 0.4885273578389993, "grad_norm": 0.3092997074127197, "learning_rate": 1.8758628965457884e-05, "loss": 0.04, "step": 31830 }, { "epoch": 0.48868083800168827, "grad_norm": 0.4457075595855713, "learning_rate": 1.8757336001175644e-05, "loss": 0.0455, "step": 31840 }, { "epoch": 0.4888343181643773, "grad_norm": 0.6996881365776062, "learning_rate": 1.875604240850053e-05, "loss": 0.051, "step": 31850 }, { "epoch": 0.48898779832706624, "grad_norm": 0.46153053641319275, "learning_rate": 1.8754748187525376e-05, "loss": 0.0478, "step": 31860 }, { "epoch": 0.4891412784897552, "grad_norm": 0.5521715879440308, "learning_rate": 1.8753453338343046e-05, "loss": 0.0452, "step": 31870 }, { "epoch": 0.48929475865244415, "grad_norm": 0.3979230225086212, "learning_rate": 1.875215786104645e-05, "loss": 0.0415, "step": 31880 }, { "epoch": 0.48944823881513316, "grad_norm": 0.35504910349845886, "learning_rate": 1.875086175572855e-05, "loss": 0.042, "step": 31890 }, { "epoch": 0.4896017189778221, "grad_norm": 0.5824669003486633, "learning_rate": 1.8749565022482347e-05, "loss": 0.0392, "step": 31900 }, { "epoch": 0.4897551991405111, "grad_norm": 0.4214737117290497, "learning_rate": 1.8748267661400894e-05, "loss": 0.0407, "step": 31910 }, { "epoch": 0.48990867930320003, "grad_norm": 0.5083222985267639, "learning_rate": 1.8746969672577283e-05, "loss": 0.0381, "step": 31920 }, { "epoch": 0.49006215946588905, "grad_norm": 0.6556828618049622, "learning_rate": 1.874567105610465e-05, "loss": 0.0526, "step": 31930 }, { "epoch": 0.490215639628578, "grad_norm": 0.46330469846725464, "learning_rate": 1.8744371812076182e-05, "loss": 0.0491, "step": 31940 }, { "epoch": 0.49036911979126696, "grad_norm": 0.5466348528862, "learning_rate": 1.8743071940585108e-05, "loss": 0.0397, "step": 31950 }, { "epoch": 0.490522599953956, "grad_norm": 0.47857043147087097, "learning_rate": 1.8741771441724695e-05, "loss": 0.0424, "step": 31960 }, { "epoch": 0.49067608011664493, "grad_norm": 0.520172655582428, "learning_rate": 1.8740470315588273e-05, "loss": 0.0445, "step": 31970 }, { "epoch": 0.4908295602793339, "grad_norm": 0.35542696714401245, "learning_rate": 1.8739168562269196e-05, "loss": 0.054, "step": 31980 }, { "epoch": 0.49098304044202284, "grad_norm": 0.7937743663787842, "learning_rate": 1.8737866181860877e-05, "loss": 0.0419, "step": 31990 }, { "epoch": 0.49113652060471186, "grad_norm": 0.5571171641349792, "learning_rate": 1.873656317445677e-05, "loss": 0.0407, "step": 32000 }, { "epoch": 0.4912900007674008, "grad_norm": 0.387001097202301, "learning_rate": 1.8735259540150377e-05, "loss": 0.0375, "step": 32010 }, { "epoch": 0.49144348093008977, "grad_norm": 0.4992322027683258, "learning_rate": 1.8733955279035236e-05, "loss": 0.0448, "step": 32020 }, { "epoch": 0.4915969610927788, "grad_norm": 0.47434064745903015, "learning_rate": 1.8732650391204936e-05, "loss": 0.0447, "step": 32030 }, { "epoch": 0.49175044125546774, "grad_norm": 0.42814990878105164, "learning_rate": 1.8731344876753113e-05, "loss": 0.0498, "step": 32040 }, { "epoch": 0.4919039214181567, "grad_norm": 0.5885627865791321, "learning_rate": 1.8730038735773444e-05, "loss": 0.0407, "step": 32050 }, { "epoch": 0.49205740158084565, "grad_norm": 0.4009067416191101, "learning_rate": 1.872873196835966e-05, "loss": 0.0472, "step": 32060 }, { "epoch": 0.49221088174353467, "grad_norm": 0.3926878273487091, "learning_rate": 1.872742457460552e-05, "loss": 0.0449, "step": 32070 }, { "epoch": 0.4923643619062236, "grad_norm": 0.44970348477363586, "learning_rate": 1.872611655460484e-05, "loss": 0.0492, "step": 32080 }, { "epoch": 0.4925178420689126, "grad_norm": 0.4894280433654785, "learning_rate": 1.8724807908451482e-05, "loss": 0.0426, "step": 32090 }, { "epoch": 0.49267132223160154, "grad_norm": 0.4103005528450012, "learning_rate": 1.8723498636239345e-05, "loss": 0.039, "step": 32100 }, { "epoch": 0.49282480239429055, "grad_norm": 0.4243747889995575, "learning_rate": 1.8722188738062384e-05, "loss": 0.053, "step": 32110 }, { "epoch": 0.4929782825569795, "grad_norm": 0.5033011436462402, "learning_rate": 1.8720878214014582e-05, "loss": 0.0519, "step": 32120 }, { "epoch": 0.49313176271966846, "grad_norm": 0.35397323966026306, "learning_rate": 1.871956706418999e-05, "loss": 0.0469, "step": 32130 }, { "epoch": 0.4932852428823575, "grad_norm": 0.5871561169624329, "learning_rate": 1.871825528868268e-05, "loss": 0.0538, "step": 32140 }, { "epoch": 0.49343872304504643, "grad_norm": 0.8202776312828064, "learning_rate": 1.8716942887586783e-05, "loss": 0.0513, "step": 32150 }, { "epoch": 0.4935922032077354, "grad_norm": 0.4223484396934509, "learning_rate": 1.8715629860996475e-05, "loss": 0.0489, "step": 32160 }, { "epoch": 0.49374568337042435, "grad_norm": 0.5186775922775269, "learning_rate": 1.8714316209005968e-05, "loss": 0.0481, "step": 32170 }, { "epoch": 0.49389916353311336, "grad_norm": 0.44200292229652405, "learning_rate": 1.871300193170953e-05, "loss": 0.0334, "step": 32180 }, { "epoch": 0.4940526436958023, "grad_norm": 0.4417136609554291, "learning_rate": 1.871168702920147e-05, "loss": 0.0534, "step": 32190 }, { "epoch": 0.4942061238584913, "grad_norm": 0.4061639904975891, "learning_rate": 1.8710371501576133e-05, "loss": 0.0377, "step": 32200 }, { "epoch": 0.4943596040211803, "grad_norm": 0.4937624931335449, "learning_rate": 1.8709055348927925e-05, "loss": 0.0482, "step": 32210 }, { "epoch": 0.49451308418386924, "grad_norm": 0.42047521471977234, "learning_rate": 1.8707738571351283e-05, "loss": 0.0521, "step": 32220 }, { "epoch": 0.4946665643465582, "grad_norm": 0.4869193136692047, "learning_rate": 1.870642116894069e-05, "loss": 0.0481, "step": 32230 }, { "epoch": 0.49482004450924716, "grad_norm": 0.36817121505737305, "learning_rate": 1.8705103141790686e-05, "loss": 0.0535, "step": 32240 }, { "epoch": 0.49497352467193617, "grad_norm": 0.527324378490448, "learning_rate": 1.870378448999584e-05, "loss": 0.0543, "step": 32250 }, { "epoch": 0.49512700483462513, "grad_norm": 0.5067764520645142, "learning_rate": 1.8702465213650783e-05, "loss": 0.0499, "step": 32260 }, { "epoch": 0.4952804849973141, "grad_norm": 0.4229653775691986, "learning_rate": 1.870114531285017e-05, "loss": 0.0456, "step": 32270 }, { "epoch": 0.49543396516000304, "grad_norm": 0.47961774468421936, "learning_rate": 1.869982478768872e-05, "loss": 0.0457, "step": 32280 }, { "epoch": 0.49558744532269206, "grad_norm": 0.5111884474754333, "learning_rate": 1.8698503638261185e-05, "loss": 0.0485, "step": 32290 }, { "epoch": 0.495740925485381, "grad_norm": 0.6042202711105347, "learning_rate": 1.8697181864662365e-05, "loss": 0.0464, "step": 32300 }, { "epoch": 0.49589440564806997, "grad_norm": 0.4930535852909088, "learning_rate": 1.8695859466987115e-05, "loss": 0.0559, "step": 32310 }, { "epoch": 0.496047885810759, "grad_norm": 0.5632262825965881, "learning_rate": 1.869453644533031e-05, "loss": 0.0438, "step": 32320 }, { "epoch": 0.49620136597344794, "grad_norm": 0.4896872043609619, "learning_rate": 1.8693212799786895e-05, "loss": 0.0488, "step": 32330 }, { "epoch": 0.4963548461361369, "grad_norm": 0.33549806475639343, "learning_rate": 1.8691888530451846e-05, "loss": 0.0355, "step": 32340 }, { "epoch": 0.49650832629882585, "grad_norm": 0.3123911917209625, "learning_rate": 1.869056363742019e-05, "loss": 0.0395, "step": 32350 }, { "epoch": 0.49666180646151487, "grad_norm": 0.6445783376693726, "learning_rate": 1.868923812078699e-05, "loss": 0.0516, "step": 32360 }, { "epoch": 0.4968152866242038, "grad_norm": 0.3861691653728485, "learning_rate": 1.8687911980647375e-05, "loss": 0.042, "step": 32370 }, { "epoch": 0.4969687667868928, "grad_norm": 0.5850500464439392, "learning_rate": 1.8686585217096483e-05, "loss": 0.0499, "step": 32380 }, { "epoch": 0.4971222469495818, "grad_norm": 0.4514615535736084, "learning_rate": 1.8685257830229536e-05, "loss": 0.0445, "step": 32390 }, { "epoch": 0.49727572711227075, "grad_norm": 0.5263655185699463, "learning_rate": 1.8683929820141767e-05, "loss": 0.0405, "step": 32400 }, { "epoch": 0.4974292072749597, "grad_norm": 0.42575618624687195, "learning_rate": 1.8682601186928485e-05, "loss": 0.0339, "step": 32410 }, { "epoch": 0.49758268743764866, "grad_norm": 0.47224217653274536, "learning_rate": 1.868127193068501e-05, "loss": 0.0379, "step": 32420 }, { "epoch": 0.4977361676003377, "grad_norm": 0.5761165022850037, "learning_rate": 1.8679942051506737e-05, "loss": 0.0455, "step": 32430 }, { "epoch": 0.49788964776302663, "grad_norm": 0.4466749131679535, "learning_rate": 1.8678611549489092e-05, "loss": 0.0383, "step": 32440 }, { "epoch": 0.4980431279257156, "grad_norm": 0.48444652557373047, "learning_rate": 1.867728042472754e-05, "loss": 0.0392, "step": 32450 }, { "epoch": 0.49819660808840455, "grad_norm": 0.6045377850532532, "learning_rate": 1.8675948677317604e-05, "loss": 0.043, "step": 32460 }, { "epoch": 0.49835008825109356, "grad_norm": 0.4284573793411255, "learning_rate": 1.867461630735484e-05, "loss": 0.0545, "step": 32470 }, { "epoch": 0.4985035684137825, "grad_norm": 0.3838101923465729, "learning_rate": 1.867328331493486e-05, "loss": 0.0412, "step": 32480 }, { "epoch": 0.4986570485764715, "grad_norm": 0.5126412510871887, "learning_rate": 1.8671949700153305e-05, "loss": 0.0426, "step": 32490 }, { "epoch": 0.4988105287391605, "grad_norm": 0.5474240779876709, "learning_rate": 1.8670615463105877e-05, "loss": 0.0474, "step": 32500 }, { "epoch": 0.49896400890184944, "grad_norm": 0.42463037371635437, "learning_rate": 1.8669280603888314e-05, "loss": 0.0482, "step": 32510 }, { "epoch": 0.4991174890645384, "grad_norm": 0.3774053752422333, "learning_rate": 1.8667945122596402e-05, "loss": 0.0454, "step": 32520 }, { "epoch": 0.49927096922722736, "grad_norm": 0.5233913660049438, "learning_rate": 1.866660901932597e-05, "loss": 0.057, "step": 32530 }, { "epoch": 0.49942444938991637, "grad_norm": 0.4378274083137512, "learning_rate": 1.866527229417289e-05, "loss": 0.0448, "step": 32540 }, { "epoch": 0.4995779295526053, "grad_norm": 0.5095233917236328, "learning_rate": 1.866393494723308e-05, "loss": 0.0436, "step": 32550 }, { "epoch": 0.4997314097152943, "grad_norm": 0.3905951976776123, "learning_rate": 1.8662596978602506e-05, "loss": 0.0459, "step": 32560 }, { "epoch": 0.4998848898779833, "grad_norm": 0.41344761848449707, "learning_rate": 1.866125838837717e-05, "loss": 0.0442, "step": 32570 }, { "epoch": 0.5000383700406722, "grad_norm": 0.2840031087398529, "learning_rate": 1.865991917665313e-05, "loss": 0.039, "step": 32580 }, { "epoch": 0.5001918502033612, "grad_norm": 0.5393927097320557, "learning_rate": 1.8658579343526474e-05, "loss": 0.0402, "step": 32590 }, { "epoch": 0.5003453303660502, "grad_norm": 0.40634018182754517, "learning_rate": 1.8657238889093356e-05, "loss": 0.0425, "step": 32600 }, { "epoch": 0.5004988105287391, "grad_norm": 0.41780751943588257, "learning_rate": 1.865589781344995e-05, "loss": 0.0439, "step": 32610 }, { "epoch": 0.5006522906914281, "grad_norm": 0.6574164032936096, "learning_rate": 1.8654556116692496e-05, "loss": 0.0459, "step": 32620 }, { "epoch": 0.5008057708541171, "grad_norm": 0.41457924246788025, "learning_rate": 1.8653213798917263e-05, "loss": 0.0446, "step": 32630 }, { "epoch": 0.500959251016806, "grad_norm": 0.4763413965702057, "learning_rate": 1.8651870860220574e-05, "loss": 0.0456, "step": 32640 }, { "epoch": 0.5011127311794951, "grad_norm": 0.397633820772171, "learning_rate": 1.8650527300698788e-05, "loss": 0.0496, "step": 32650 }, { "epoch": 0.5012662113421841, "grad_norm": 0.4686315357685089, "learning_rate": 1.8649183120448317e-05, "loss": 0.042, "step": 32660 }, { "epoch": 0.501419691504873, "grad_norm": 0.4736368656158447, "learning_rate": 1.8647838319565614e-05, "loss": 0.0367, "step": 32670 }, { "epoch": 0.501573171667562, "grad_norm": 0.5645484328269958, "learning_rate": 1.8646492898147177e-05, "loss": 0.0479, "step": 32680 }, { "epoch": 0.5017266518302509, "grad_norm": 0.35838940739631653, "learning_rate": 1.864514685628955e-05, "loss": 0.0448, "step": 32690 }, { "epoch": 0.5018801319929399, "grad_norm": 0.6346972584724426, "learning_rate": 1.8643800194089315e-05, "loss": 0.0501, "step": 32700 }, { "epoch": 0.5020336121556289, "grad_norm": 0.38038408756256104, "learning_rate": 1.864245291164311e-05, "loss": 0.033, "step": 32710 }, { "epoch": 0.5021870923183178, "grad_norm": 0.515079140663147, "learning_rate": 1.8641105009047607e-05, "loss": 0.0453, "step": 32720 }, { "epoch": 0.5023405724810068, "grad_norm": 0.3906264007091522, "learning_rate": 1.8639756486399526e-05, "loss": 0.0463, "step": 32730 }, { "epoch": 0.5024940526436958, "grad_norm": 0.5798229575157166, "learning_rate": 1.8638407343795633e-05, "loss": 0.0447, "step": 32740 }, { "epoch": 0.5026475328063847, "grad_norm": 0.6403605937957764, "learning_rate": 1.8637057581332732e-05, "loss": 0.0403, "step": 32750 }, { "epoch": 0.5028010129690738, "grad_norm": 0.5892131328582764, "learning_rate": 1.8635707199107686e-05, "loss": 0.0406, "step": 32760 }, { "epoch": 0.5029544931317628, "grad_norm": 0.6195287108421326, "learning_rate": 1.8634356197217383e-05, "loss": 0.0503, "step": 32770 }, { "epoch": 0.5031079732944517, "grad_norm": 0.3856659233570099, "learning_rate": 1.863300457575878e-05, "loss": 0.0432, "step": 32780 }, { "epoch": 0.5032614534571407, "grad_norm": 0.49343642592430115, "learning_rate": 1.8631652334828853e-05, "loss": 0.0579, "step": 32790 }, { "epoch": 0.5034149336198296, "grad_norm": 0.5581701397895813, "learning_rate": 1.8630299474524632e-05, "loss": 0.0608, "step": 32800 }, { "epoch": 0.5035684137825186, "grad_norm": 0.3947276473045349, "learning_rate": 1.86289459949432e-05, "loss": 0.0436, "step": 32810 }, { "epoch": 0.5037218939452076, "grad_norm": 0.23193354904651642, "learning_rate": 1.862759189618167e-05, "loss": 0.0446, "step": 32820 }, { "epoch": 0.5038753741078965, "grad_norm": 0.4577118158340454, "learning_rate": 1.8626237178337216e-05, "loss": 0.0358, "step": 32830 }, { "epoch": 0.5040288542705855, "grad_norm": 0.34144166111946106, "learning_rate": 1.8624881841507046e-05, "loss": 0.0335, "step": 32840 }, { "epoch": 0.5041823344332745, "grad_norm": 0.8520560264587402, "learning_rate": 1.8623525885788405e-05, "loss": 0.0514, "step": 32850 }, { "epoch": 0.5043358145959634, "grad_norm": 0.7668495774269104, "learning_rate": 1.86221693112786e-05, "loss": 0.0507, "step": 32860 }, { "epoch": 0.5044892947586525, "grad_norm": 0.30715224146842957, "learning_rate": 1.8620812118074972e-05, "loss": 0.0352, "step": 32870 }, { "epoch": 0.5046427749213415, "grad_norm": 0.4845987856388092, "learning_rate": 1.86194543062749e-05, "loss": 0.0374, "step": 32880 }, { "epoch": 0.5047962550840304, "grad_norm": 0.6946525573730469, "learning_rate": 1.861809587597583e-05, "loss": 0.051, "step": 32890 }, { "epoch": 0.5049497352467194, "grad_norm": 0.5188230276107788, "learning_rate": 1.861673682727523e-05, "loss": 0.045, "step": 32900 }, { "epoch": 0.5051032154094083, "grad_norm": 0.44128429889678955, "learning_rate": 1.8615377160270612e-05, "loss": 0.0479, "step": 32910 }, { "epoch": 0.5052566955720973, "grad_norm": 0.26919424533843994, "learning_rate": 1.8614016875059552e-05, "loss": 0.0513, "step": 32920 }, { "epoch": 0.5054101757347863, "grad_norm": 0.3720189332962036, "learning_rate": 1.8612655971739656e-05, "loss": 0.0323, "step": 32930 }, { "epoch": 0.5055636558974752, "grad_norm": 0.35061004757881165, "learning_rate": 1.8611294450408576e-05, "loss": 0.0348, "step": 32940 }, { "epoch": 0.5057171360601642, "grad_norm": 0.3789161443710327, "learning_rate": 1.8609932311164008e-05, "loss": 0.0381, "step": 32950 }, { "epoch": 0.5058706162228532, "grad_norm": 0.5716487169265747, "learning_rate": 1.86085695541037e-05, "loss": 0.0393, "step": 32960 }, { "epoch": 0.5060240963855421, "grad_norm": 0.35540783405303955, "learning_rate": 1.8607206179325433e-05, "loss": 0.0483, "step": 32970 }, { "epoch": 0.5061775765482311, "grad_norm": 0.43751147389411926, "learning_rate": 1.8605842186927035e-05, "loss": 0.0363, "step": 32980 }, { "epoch": 0.5063310567109202, "grad_norm": 0.41357865929603577, "learning_rate": 1.8604477577006387e-05, "loss": 0.0524, "step": 32990 }, { "epoch": 0.5064845368736091, "grad_norm": 0.38910919427871704, "learning_rate": 1.860311234966141e-05, "loss": 0.0411, "step": 33000 }, { "epoch": 0.5066380170362981, "grad_norm": 0.4556829035282135, "learning_rate": 1.8601746504990056e-05, "loss": 0.043, "step": 33010 }, { "epoch": 0.5067914971989871, "grad_norm": 1.1493371725082397, "learning_rate": 1.860038004309034e-05, "loss": 0.0491, "step": 33020 }, { "epoch": 0.506944977361676, "grad_norm": 0.4680449068546295, "learning_rate": 1.8599012964060316e-05, "loss": 0.0436, "step": 33030 }, { "epoch": 0.507098457524365, "grad_norm": 0.48642733693122864, "learning_rate": 1.859764526799808e-05, "loss": 0.0441, "step": 33040 }, { "epoch": 0.5072519376870539, "grad_norm": 0.4099145829677582, "learning_rate": 1.8596276955001768e-05, "loss": 0.0431, "step": 33050 }, { "epoch": 0.5074054178497429, "grad_norm": 0.49321866035461426, "learning_rate": 1.8594908025169573e-05, "loss": 0.043, "step": 33060 }, { "epoch": 0.5075588980124319, "grad_norm": 0.42507681250572205, "learning_rate": 1.8593538478599712e-05, "loss": 0.0553, "step": 33070 }, { "epoch": 0.5077123781751208, "grad_norm": 0.46638911962509155, "learning_rate": 1.8592168315390472e-05, "loss": 0.0485, "step": 33080 }, { "epoch": 0.5078658583378098, "grad_norm": 0.3483223021030426, "learning_rate": 1.859079753564016e-05, "loss": 0.0515, "step": 33090 }, { "epoch": 0.5080193385004989, "grad_norm": 0.47800135612487793, "learning_rate": 1.8589426139447142e-05, "loss": 0.0389, "step": 33100 }, { "epoch": 0.5081728186631878, "grad_norm": 0.5776951909065247, "learning_rate": 1.858805412690982e-05, "loss": 0.0567, "step": 33110 }, { "epoch": 0.5083262988258768, "grad_norm": 0.5552274584770203, "learning_rate": 1.8586681498126657e-05, "loss": 0.0442, "step": 33120 }, { "epoch": 0.5084797789885658, "grad_norm": 0.46728065609931946, "learning_rate": 1.8585308253196137e-05, "loss": 0.0396, "step": 33130 }, { "epoch": 0.5086332591512547, "grad_norm": 0.3328321576118469, "learning_rate": 1.8583934392216797e-05, "loss": 0.0386, "step": 33140 }, { "epoch": 0.5087867393139437, "grad_norm": 0.5205628275871277, "learning_rate": 1.8582559915287226e-05, "loss": 0.0445, "step": 33150 }, { "epoch": 0.5089402194766326, "grad_norm": 0.47075557708740234, "learning_rate": 1.858118482250605e-05, "loss": 0.0562, "step": 33160 }, { "epoch": 0.5090936996393216, "grad_norm": 0.4800010621547699, "learning_rate": 1.8579809113971938e-05, "loss": 0.0461, "step": 33170 }, { "epoch": 0.5092471798020106, "grad_norm": 0.3183010518550873, "learning_rate": 1.857843278978361e-05, "loss": 0.0552, "step": 33180 }, { "epoch": 0.5094006599646995, "grad_norm": 0.4866288900375366, "learning_rate": 1.857705585003982e-05, "loss": 0.0537, "step": 33190 }, { "epoch": 0.5095541401273885, "grad_norm": 0.5210946202278137, "learning_rate": 1.857567829483937e-05, "loss": 0.05, "step": 33200 }, { "epoch": 0.5097076202900775, "grad_norm": 0.4601939618587494, "learning_rate": 1.857430012428112e-05, "loss": 0.0408, "step": 33210 }, { "epoch": 0.5098611004527664, "grad_norm": 0.3875747323036194, "learning_rate": 1.8572921338463954e-05, "loss": 0.0408, "step": 33220 }, { "epoch": 0.5100145806154555, "grad_norm": 0.7096023559570312, "learning_rate": 1.857154193748681e-05, "loss": 0.0549, "step": 33230 }, { "epoch": 0.5101680607781445, "grad_norm": 0.5916392207145691, "learning_rate": 1.8570161921448665e-05, "loss": 0.038, "step": 33240 }, { "epoch": 0.5103215409408334, "grad_norm": 0.45400846004486084, "learning_rate": 1.8568781290448545e-05, "loss": 0.04, "step": 33250 }, { "epoch": 0.5104750211035224, "grad_norm": 0.7040917277336121, "learning_rate": 1.8567400044585525e-05, "loss": 0.043, "step": 33260 }, { "epoch": 0.5106285012662113, "grad_norm": 0.5643277168273926, "learning_rate": 1.856601818395871e-05, "loss": 0.0384, "step": 33270 }, { "epoch": 0.5107819814289003, "grad_norm": 0.48723331093788147, "learning_rate": 1.856463570866726e-05, "loss": 0.0443, "step": 33280 }, { "epoch": 0.5109354615915893, "grad_norm": 0.4393030107021332, "learning_rate": 1.856325261881038e-05, "loss": 0.0557, "step": 33290 }, { "epoch": 0.5110889417542782, "grad_norm": 0.27366402745246887, "learning_rate": 1.8561868914487305e-05, "loss": 0.0421, "step": 33300 }, { "epoch": 0.5112424219169672, "grad_norm": 0.48646268248558044, "learning_rate": 1.856048459579734e-05, "loss": 0.0345, "step": 33310 }, { "epoch": 0.5113959020796562, "grad_norm": 0.526968240737915, "learning_rate": 1.8559099662839802e-05, "loss": 0.0463, "step": 33320 }, { "epoch": 0.5115493822423451, "grad_norm": 0.3750708997249603, "learning_rate": 1.8557714115714077e-05, "loss": 0.055, "step": 33330 }, { "epoch": 0.5117028624050342, "grad_norm": 0.4399351477622986, "learning_rate": 1.855632795451959e-05, "loss": 0.0441, "step": 33340 }, { "epoch": 0.5118563425677232, "grad_norm": 0.3942532539367676, "learning_rate": 1.8554941179355797e-05, "loss": 0.0423, "step": 33350 }, { "epoch": 0.5120098227304121, "grad_norm": 0.40734031796455383, "learning_rate": 1.8553553790322214e-05, "loss": 0.0403, "step": 33360 }, { "epoch": 0.5121633028931011, "grad_norm": 0.6114932894706726, "learning_rate": 1.8552165787518396e-05, "loss": 0.0482, "step": 33370 }, { "epoch": 0.5123167830557901, "grad_norm": 0.42437243461608887, "learning_rate": 1.8550777171043934e-05, "loss": 0.0523, "step": 33380 }, { "epoch": 0.512470263218479, "grad_norm": 0.43741440773010254, "learning_rate": 1.8549387940998476e-05, "loss": 0.0464, "step": 33390 }, { "epoch": 0.512623743381168, "grad_norm": 0.464388906955719, "learning_rate": 1.854799809748171e-05, "loss": 0.0437, "step": 33400 }, { "epoch": 0.5127772235438569, "grad_norm": 0.6441669464111328, "learning_rate": 1.854660764059335e-05, "loss": 0.0558, "step": 33410 }, { "epoch": 0.5129307037065459, "grad_norm": 0.4166402220726013, "learning_rate": 1.854521657043319e-05, "loss": 0.0503, "step": 33420 }, { "epoch": 0.5130841838692349, "grad_norm": 0.4114845395088196, "learning_rate": 1.854382488710104e-05, "loss": 0.0409, "step": 33430 }, { "epoch": 0.5132376640319238, "grad_norm": 0.3608035445213318, "learning_rate": 1.854243259069676e-05, "loss": 0.0477, "step": 33440 }, { "epoch": 0.5133911441946128, "grad_norm": 0.40072089433670044, "learning_rate": 1.8541039681320256e-05, "loss": 0.0387, "step": 33450 }, { "epoch": 0.5135446243573019, "grad_norm": 0.4198954403400421, "learning_rate": 1.853964615907148e-05, "loss": 0.0444, "step": 33460 }, { "epoch": 0.5136981045199908, "grad_norm": 0.35955125093460083, "learning_rate": 1.8538252024050424e-05, "loss": 0.0382, "step": 33470 }, { "epoch": 0.5138515846826798, "grad_norm": 0.39918801188468933, "learning_rate": 1.8536857276357124e-05, "loss": 0.048, "step": 33480 }, { "epoch": 0.5140050648453688, "grad_norm": 0.47695043683052063, "learning_rate": 1.8535461916091668e-05, "loss": 0.0466, "step": 33490 }, { "epoch": 0.5141585450080577, "grad_norm": 0.7579275369644165, "learning_rate": 1.8534065943354177e-05, "loss": 0.0555, "step": 33500 }, { "epoch": 0.5143120251707467, "grad_norm": 0.48861685395240784, "learning_rate": 1.853266935824482e-05, "loss": 0.0397, "step": 33510 }, { "epoch": 0.5144655053334356, "grad_norm": 0.4530434012413025, "learning_rate": 1.8531272160863815e-05, "loss": 0.0409, "step": 33520 }, { "epoch": 0.5146189854961246, "grad_norm": 0.4037337005138397, "learning_rate": 1.8529874351311413e-05, "loss": 0.04, "step": 33530 }, { "epoch": 0.5147724656588136, "grad_norm": 0.6456338167190552, "learning_rate": 1.8528475929687922e-05, "loss": 0.047, "step": 33540 }, { "epoch": 0.5149259458215025, "grad_norm": 0.5916973948478699, "learning_rate": 1.8527076896093687e-05, "loss": 0.0399, "step": 33550 }, { "epoch": 0.5150794259841915, "grad_norm": 0.33626699447631836, "learning_rate": 1.8525677250629092e-05, "loss": 0.048, "step": 33560 }, { "epoch": 0.5152329061468806, "grad_norm": 0.5249722599983215, "learning_rate": 1.852427699339457e-05, "loss": 0.0447, "step": 33570 }, { "epoch": 0.5153863863095695, "grad_norm": 0.4052063822746277, "learning_rate": 1.8522876124490604e-05, "loss": 0.0395, "step": 33580 }, { "epoch": 0.5155398664722585, "grad_norm": 0.5238139629364014, "learning_rate": 1.852147464401771e-05, "loss": 0.0583, "step": 33590 }, { "epoch": 0.5156933466349475, "grad_norm": 0.5147346258163452, "learning_rate": 1.8520072552076457e-05, "loss": 0.0462, "step": 33600 }, { "epoch": 0.5158468267976364, "grad_norm": 0.3347148299217224, "learning_rate": 1.8518669848767455e-05, "loss": 0.0517, "step": 33610 }, { "epoch": 0.5160003069603254, "grad_norm": 0.3242601752281189, "learning_rate": 1.8517266534191347e-05, "loss": 0.0415, "step": 33620 }, { "epoch": 0.5161537871230143, "grad_norm": 0.4317902624607086, "learning_rate": 1.851586260844884e-05, "loss": 0.0465, "step": 33630 }, { "epoch": 0.5163072672857033, "grad_norm": 0.5740068554878235, "learning_rate": 1.851445807164067e-05, "loss": 0.0518, "step": 33640 }, { "epoch": 0.5164607474483923, "grad_norm": 0.37728869915008545, "learning_rate": 1.851305292386762e-05, "loss": 0.047, "step": 33650 }, { "epoch": 0.5166142276110812, "grad_norm": 0.3600757122039795, "learning_rate": 1.851164716523052e-05, "loss": 0.0356, "step": 33660 }, { "epoch": 0.5167677077737702, "grad_norm": 0.48840782046318054, "learning_rate": 1.851024079583024e-05, "loss": 0.0497, "step": 33670 }, { "epoch": 0.5169211879364592, "grad_norm": 0.3230718970298767, "learning_rate": 1.8508833815767696e-05, "loss": 0.0418, "step": 33680 }, { "epoch": 0.5170746680991481, "grad_norm": 0.6337504982948303, "learning_rate": 1.850742622514385e-05, "loss": 0.0448, "step": 33690 }, { "epoch": 0.5172281482618372, "grad_norm": 0.47041231393814087, "learning_rate": 1.8506018024059704e-05, "loss": 0.0489, "step": 33700 }, { "epoch": 0.5173816284245262, "grad_norm": 0.28010180592536926, "learning_rate": 1.8504609212616305e-05, "loss": 0.039, "step": 33710 }, { "epoch": 0.5175351085872151, "grad_norm": 0.5072258710861206, "learning_rate": 1.8503199790914744e-05, "loss": 0.0403, "step": 33720 }, { "epoch": 0.5176885887499041, "grad_norm": 0.39009496569633484, "learning_rate": 1.850178975905615e-05, "loss": 0.0484, "step": 33730 }, { "epoch": 0.5178420689125931, "grad_norm": 0.41065022349357605, "learning_rate": 1.8500379117141715e-05, "loss": 0.0442, "step": 33740 }, { "epoch": 0.517995549075282, "grad_norm": 0.9084530472755432, "learning_rate": 1.849896786527265e-05, "loss": 0.0447, "step": 33750 }, { "epoch": 0.518149029237971, "grad_norm": 0.4146178364753723, "learning_rate": 1.8497556003550224e-05, "loss": 0.0425, "step": 33760 }, { "epoch": 0.5183025094006599, "grad_norm": 0.35194483399391174, "learning_rate": 1.8496143532075748e-05, "loss": 0.0372, "step": 33770 }, { "epoch": 0.5184559895633489, "grad_norm": 0.415193647146225, "learning_rate": 1.849473045095057e-05, "loss": 0.0515, "step": 33780 }, { "epoch": 0.5186094697260379, "grad_norm": 0.4430404305458069, "learning_rate": 1.8493316760276097e-05, "loss": 0.0515, "step": 33790 }, { "epoch": 0.5187629498887268, "grad_norm": 0.46180233359336853, "learning_rate": 1.8491902460153763e-05, "loss": 0.0579, "step": 33800 }, { "epoch": 0.5189164300514159, "grad_norm": 0.4981617331504822, "learning_rate": 1.8490487550685053e-05, "loss": 0.0407, "step": 33810 }, { "epoch": 0.5190699102141049, "grad_norm": 0.40343400835990906, "learning_rate": 1.84890720319715e-05, "loss": 0.0491, "step": 33820 }, { "epoch": 0.5192233903767938, "grad_norm": 0.40330269932746887, "learning_rate": 1.8487655904114673e-05, "loss": 0.0387, "step": 33830 }, { "epoch": 0.5193768705394828, "grad_norm": 0.6164826154708862, "learning_rate": 1.8486239167216185e-05, "loss": 0.0477, "step": 33840 }, { "epoch": 0.5195303507021718, "grad_norm": 0.5116811990737915, "learning_rate": 1.8484821821377702e-05, "loss": 0.0474, "step": 33850 }, { "epoch": 0.5196838308648607, "grad_norm": 0.3767741620540619, "learning_rate": 1.8483403866700918e-05, "loss": 0.0415, "step": 33860 }, { "epoch": 0.5198373110275497, "grad_norm": 0.433582603931427, "learning_rate": 1.8481985303287593e-05, "loss": 0.0407, "step": 33870 }, { "epoch": 0.5199907911902386, "grad_norm": 0.6471046805381775, "learning_rate": 1.848056613123951e-05, "loss": 0.0449, "step": 33880 }, { "epoch": 0.5201442713529276, "grad_norm": 0.6105167865753174, "learning_rate": 1.84791463506585e-05, "loss": 0.0476, "step": 33890 }, { "epoch": 0.5202977515156166, "grad_norm": 0.37888991832733154, "learning_rate": 1.847772596164645e-05, "loss": 0.0377, "step": 33900 }, { "epoch": 0.5204512316783055, "grad_norm": 0.5081467628479004, "learning_rate": 1.847630496430527e-05, "loss": 0.0451, "step": 33910 }, { "epoch": 0.5206047118409945, "grad_norm": 0.5449358820915222, "learning_rate": 1.8474883358736937e-05, "loss": 0.0422, "step": 33920 }, { "epoch": 0.5207581920036836, "grad_norm": 0.32120218873023987, "learning_rate": 1.8473461145043457e-05, "loss": 0.0471, "step": 33930 }, { "epoch": 0.5209116721663725, "grad_norm": 0.3467376232147217, "learning_rate": 1.8472038323326878e-05, "loss": 0.0402, "step": 33940 }, { "epoch": 0.5210651523290615, "grad_norm": 0.3613039255142212, "learning_rate": 1.8470614893689298e-05, "loss": 0.0455, "step": 33950 }, { "epoch": 0.5212186324917505, "grad_norm": 0.5623016357421875, "learning_rate": 1.846919085623286e-05, "loss": 0.0335, "step": 33960 }, { "epoch": 0.5213721126544394, "grad_norm": 0.508509635925293, "learning_rate": 1.846776621105974e-05, "loss": 0.0379, "step": 33970 }, { "epoch": 0.5215255928171284, "grad_norm": 0.6162812113761902, "learning_rate": 1.8466340958272177e-05, "loss": 0.053, "step": 33980 }, { "epoch": 0.5216790729798173, "grad_norm": 0.6235093474388123, "learning_rate": 1.8464915097972433e-05, "loss": 0.0467, "step": 33990 }, { "epoch": 0.5218325531425063, "grad_norm": 0.41367998719215393, "learning_rate": 1.846348863026282e-05, "loss": 0.0368, "step": 34000 }, { "epoch": 0.5219860333051953, "grad_norm": 0.4979706108570099, "learning_rate": 1.8462061555245703e-05, "loss": 0.0386, "step": 34010 }, { "epoch": 0.5221395134678842, "grad_norm": 0.46578359603881836, "learning_rate": 1.846063387302348e-05, "loss": 0.0507, "step": 34020 }, { "epoch": 0.5222929936305732, "grad_norm": 0.3950074315071106, "learning_rate": 1.8459205583698598e-05, "loss": 0.0506, "step": 34030 }, { "epoch": 0.5224464737932623, "grad_norm": 0.30982786417007446, "learning_rate": 1.8457776687373544e-05, "loss": 0.0401, "step": 34040 }, { "epoch": 0.5225999539559512, "grad_norm": 0.5350876450538635, "learning_rate": 1.845634718415085e-05, "loss": 0.0495, "step": 34050 }, { "epoch": 0.5227534341186402, "grad_norm": 0.5149698257446289, "learning_rate": 1.8454917074133094e-05, "loss": 0.0436, "step": 34060 }, { "epoch": 0.5229069142813292, "grad_norm": 0.6369085311889648, "learning_rate": 1.845348635742289e-05, "loss": 0.0412, "step": 34070 }, { "epoch": 0.5230603944440181, "grad_norm": 0.4548455774784088, "learning_rate": 1.8452055034122905e-05, "loss": 0.0466, "step": 34080 }, { "epoch": 0.5232138746067071, "grad_norm": 0.4239148795604706, "learning_rate": 1.8450623104335843e-05, "loss": 0.0394, "step": 34090 }, { "epoch": 0.5233673547693961, "grad_norm": 0.29881417751312256, "learning_rate": 1.8449190568164458e-05, "loss": 0.042, "step": 34100 }, { "epoch": 0.523520834932085, "grad_norm": 0.6047583818435669, "learning_rate": 1.8447757425711535e-05, "loss": 0.0482, "step": 34110 }, { "epoch": 0.523674315094774, "grad_norm": 0.407671719789505, "learning_rate": 1.844632367707992e-05, "loss": 0.0464, "step": 34120 }, { "epoch": 0.5238277952574629, "grad_norm": 0.4609729051589966, "learning_rate": 1.8444889322372486e-05, "loss": 0.0427, "step": 34130 }, { "epoch": 0.5239812754201519, "grad_norm": 0.5797867178916931, "learning_rate": 1.8443454361692167e-05, "loss": 0.0366, "step": 34140 }, { "epoch": 0.524134755582841, "grad_norm": 0.5474380850791931, "learning_rate": 1.8442018795141916e-05, "loss": 0.0499, "step": 34150 }, { "epoch": 0.5242882357455299, "grad_norm": 0.37628114223480225, "learning_rate": 1.8440582622824754e-05, "loss": 0.0407, "step": 34160 }, { "epoch": 0.5244417159082189, "grad_norm": 0.39439478516578674, "learning_rate": 1.8439145844843734e-05, "loss": 0.0374, "step": 34170 }, { "epoch": 0.5245951960709079, "grad_norm": 0.5733376145362854, "learning_rate": 1.843770846130195e-05, "loss": 0.0539, "step": 34180 }, { "epoch": 0.5247486762335968, "grad_norm": 0.611029863357544, "learning_rate": 1.8436270472302545e-05, "loss": 0.0424, "step": 34190 }, { "epoch": 0.5249021563962858, "grad_norm": 0.673647403717041, "learning_rate": 1.8434831877948704e-05, "loss": 0.0423, "step": 34200 }, { "epoch": 0.5250556365589748, "grad_norm": 0.35793837904930115, "learning_rate": 1.8433392678343656e-05, "loss": 0.0394, "step": 34210 }, { "epoch": 0.5252091167216637, "grad_norm": 0.6930642127990723, "learning_rate": 1.843195287359067e-05, "loss": 0.0425, "step": 34220 }, { "epoch": 0.5253625968843527, "grad_norm": 0.40377572178840637, "learning_rate": 1.843051246379306e-05, "loss": 0.0418, "step": 34230 }, { "epoch": 0.5255160770470416, "grad_norm": 0.5128865242004395, "learning_rate": 1.8429071449054192e-05, "loss": 0.0539, "step": 34240 }, { "epoch": 0.5256695572097306, "grad_norm": 0.6038863062858582, "learning_rate": 1.8427629829477456e-05, "loss": 0.0495, "step": 34250 }, { "epoch": 0.5258230373724196, "grad_norm": 0.38198322057724, "learning_rate": 1.842618760516631e-05, "loss": 0.0437, "step": 34260 }, { "epoch": 0.5259765175351085, "grad_norm": 0.5281840562820435, "learning_rate": 1.842474477622423e-05, "loss": 0.0493, "step": 34270 }, { "epoch": 0.5261299976977976, "grad_norm": 0.3500162959098816, "learning_rate": 1.8423301342754755e-05, "loss": 0.0439, "step": 34280 }, { "epoch": 0.5262834778604866, "grad_norm": 0.3903065621852875, "learning_rate": 1.8421857304861462e-05, "loss": 0.0419, "step": 34290 }, { "epoch": 0.5264369580231755, "grad_norm": 0.5646812915802002, "learning_rate": 1.8420412662647963e-05, "loss": 0.0468, "step": 34300 }, { "epoch": 0.5265904381858645, "grad_norm": 0.571731686592102, "learning_rate": 1.8418967416217924e-05, "loss": 0.0452, "step": 34310 }, { "epoch": 0.5267439183485535, "grad_norm": 0.41868436336517334, "learning_rate": 1.841752156567505e-05, "loss": 0.0457, "step": 34320 }, { "epoch": 0.5268973985112424, "grad_norm": 0.3601611256599426, "learning_rate": 1.841607511112309e-05, "loss": 0.0384, "step": 34330 }, { "epoch": 0.5270508786739314, "grad_norm": 0.44337591528892517, "learning_rate": 1.8414628052665833e-05, "loss": 0.042, "step": 34340 }, { "epoch": 0.5272043588366203, "grad_norm": 0.4059304893016815, "learning_rate": 1.8413180390407118e-05, "loss": 0.0412, "step": 34350 }, { "epoch": 0.5273578389993093, "grad_norm": 0.5934967994689941, "learning_rate": 1.8411732124450827e-05, "loss": 0.0461, "step": 34360 }, { "epoch": 0.5275113191619983, "grad_norm": 0.42043057084083557, "learning_rate": 1.8410283254900873e-05, "loss": 0.0462, "step": 34370 }, { "epoch": 0.5276647993246872, "grad_norm": 0.627748966217041, "learning_rate": 1.8408833781861225e-05, "loss": 0.0454, "step": 34380 }, { "epoch": 0.5278182794873763, "grad_norm": 0.5950840711593628, "learning_rate": 1.8407383705435896e-05, "loss": 0.0432, "step": 34390 }, { "epoch": 0.5279717596500653, "grad_norm": 0.37607598304748535, "learning_rate": 1.8405933025728934e-05, "loss": 0.0482, "step": 34400 }, { "epoch": 0.5281252398127542, "grad_norm": 0.457539826631546, "learning_rate": 1.8404481742844433e-05, "loss": 0.0428, "step": 34410 }, { "epoch": 0.5282787199754432, "grad_norm": 0.6821067333221436, "learning_rate": 1.8403029856886535e-05, "loss": 0.0397, "step": 34420 }, { "epoch": 0.5284322001381322, "grad_norm": 0.27168527245521545, "learning_rate": 1.8401577367959417e-05, "loss": 0.0489, "step": 34430 }, { "epoch": 0.5285856803008211, "grad_norm": 0.5693329572677612, "learning_rate": 1.840012427616731e-05, "loss": 0.0433, "step": 34440 }, { "epoch": 0.5287391604635101, "grad_norm": 0.4405961036682129, "learning_rate": 1.8398670581614477e-05, "loss": 0.0464, "step": 34450 }, { "epoch": 0.5288926406261991, "grad_norm": 0.4157332479953766, "learning_rate": 1.839721628440523e-05, "loss": 0.0383, "step": 34460 }, { "epoch": 0.529046120788888, "grad_norm": 0.2778935730457306, "learning_rate": 1.839576138464393e-05, "loss": 0.045, "step": 34470 }, { "epoch": 0.529199600951577, "grad_norm": 0.4969881474971771, "learning_rate": 1.8394305882434972e-05, "loss": 0.0418, "step": 34480 }, { "epoch": 0.5293530811142659, "grad_norm": 0.4941237270832062, "learning_rate": 1.8392849777882792e-05, "loss": 0.0513, "step": 34490 }, { "epoch": 0.529506561276955, "grad_norm": 0.5229565501213074, "learning_rate": 1.8391393071091877e-05, "loss": 0.0497, "step": 34500 }, { "epoch": 0.529660041439644, "grad_norm": 0.47173187136650085, "learning_rate": 1.8389935762166762e-05, "loss": 0.0385, "step": 34510 }, { "epoch": 0.5298135216023329, "grad_norm": 0.3470028042793274, "learning_rate": 1.8388477851212007e-05, "loss": 0.0408, "step": 34520 }, { "epoch": 0.5299670017650219, "grad_norm": 0.5085234642028809, "learning_rate": 1.8387019338332234e-05, "loss": 0.0414, "step": 34530 }, { "epoch": 0.5301204819277109, "grad_norm": 0.5079159140586853, "learning_rate": 1.8385560223632097e-05, "loss": 0.0444, "step": 34540 }, { "epoch": 0.5302739620903998, "grad_norm": 0.3678014576435089, "learning_rate": 1.8384100507216294e-05, "loss": 0.0406, "step": 34550 }, { "epoch": 0.5304274422530888, "grad_norm": 0.5037451982498169, "learning_rate": 1.8382640189189574e-05, "loss": 0.0478, "step": 34560 }, { "epoch": 0.5305809224157778, "grad_norm": 0.6249066591262817, "learning_rate": 1.838117926965672e-05, "loss": 0.0466, "step": 34570 }, { "epoch": 0.5307344025784667, "grad_norm": 0.5049318671226501, "learning_rate": 1.8379717748722566e-05, "loss": 0.0494, "step": 34580 }, { "epoch": 0.5308878827411557, "grad_norm": 0.43638914823532104, "learning_rate": 1.8378255626491982e-05, "loss": 0.0412, "step": 34590 }, { "epoch": 0.5310413629038446, "grad_norm": 0.3524591326713562, "learning_rate": 1.837679290306988e-05, "loss": 0.046, "step": 34600 }, { "epoch": 0.5311948430665336, "grad_norm": 0.46183356642723083, "learning_rate": 1.837532957856123e-05, "loss": 0.0411, "step": 34610 }, { "epoch": 0.5313483232292227, "grad_norm": 0.31978970766067505, "learning_rate": 1.8373865653071026e-05, "loss": 0.0338, "step": 34620 }, { "epoch": 0.5315018033919116, "grad_norm": 0.41704410314559937, "learning_rate": 1.8372401126704314e-05, "loss": 0.0426, "step": 34630 }, { "epoch": 0.5316552835546006, "grad_norm": 0.40442442893981934, "learning_rate": 1.837093599956619e-05, "loss": 0.0373, "step": 34640 }, { "epoch": 0.5318087637172896, "grad_norm": 0.7383964657783508, "learning_rate": 1.836947027176178e-05, "loss": 0.0426, "step": 34650 }, { "epoch": 0.5319622438799785, "grad_norm": 0.4229338467121124, "learning_rate": 1.836800394339626e-05, "loss": 0.045, "step": 34660 }, { "epoch": 0.5321157240426675, "grad_norm": 0.49592843651771545, "learning_rate": 1.8366537014574847e-05, "loss": 0.0454, "step": 34670 }, { "epoch": 0.5322692042053565, "grad_norm": 0.39360061287879944, "learning_rate": 1.8365069485402805e-05, "loss": 0.0482, "step": 34680 }, { "epoch": 0.5324226843680454, "grad_norm": 0.35176873207092285, "learning_rate": 1.8363601355985438e-05, "loss": 0.0534, "step": 34690 }, { "epoch": 0.5325761645307344, "grad_norm": 0.465644508600235, "learning_rate": 1.8362132626428087e-05, "loss": 0.0405, "step": 34700 }, { "epoch": 0.5327296446934233, "grad_norm": 0.4352405369281769, "learning_rate": 1.836066329683615e-05, "loss": 0.047, "step": 34710 }, { "epoch": 0.5328831248561123, "grad_norm": 0.5220405459403992, "learning_rate": 1.835919336731506e-05, "loss": 0.038, "step": 34720 }, { "epoch": 0.5330366050188013, "grad_norm": 0.46636828780174255, "learning_rate": 1.835772283797029e-05, "loss": 0.0419, "step": 34730 }, { "epoch": 0.5331900851814902, "grad_norm": 0.4257700443267822, "learning_rate": 1.8356251708907357e-05, "loss": 0.0434, "step": 34740 }, { "epoch": 0.5333435653441793, "grad_norm": 0.4295860230922699, "learning_rate": 1.8354779980231834e-05, "loss": 0.0415, "step": 34750 }, { "epoch": 0.5334970455068683, "grad_norm": 0.33030593395233154, "learning_rate": 1.8353307652049315e-05, "loss": 0.0339, "step": 34760 }, { "epoch": 0.5336505256695572, "grad_norm": 0.3654598593711853, "learning_rate": 1.835183472446546e-05, "loss": 0.0467, "step": 34770 }, { "epoch": 0.5338040058322462, "grad_norm": 0.4548940658569336, "learning_rate": 1.835036119758595e-05, "loss": 0.0365, "step": 34780 }, { "epoch": 0.5339574859949352, "grad_norm": 0.46183285117149353, "learning_rate": 1.8348887071516524e-05, "loss": 0.0389, "step": 34790 }, { "epoch": 0.5341109661576241, "grad_norm": 0.6060793399810791, "learning_rate": 1.834741234636296e-05, "loss": 0.0509, "step": 34800 }, { "epoch": 0.5342644463203131, "grad_norm": 0.44400647282600403, "learning_rate": 1.834593702223108e-05, "loss": 0.0462, "step": 34810 }, { "epoch": 0.5344179264830021, "grad_norm": 0.544955313205719, "learning_rate": 1.8344461099226746e-05, "loss": 0.0372, "step": 34820 }, { "epoch": 0.534571406645691, "grad_norm": 0.591293215751648, "learning_rate": 1.834298457745586e-05, "loss": 0.0439, "step": 34830 }, { "epoch": 0.53472488680838, "grad_norm": 0.3874850273132324, "learning_rate": 1.8341507457024382e-05, "loss": 0.0575, "step": 34840 }, { "epoch": 0.5348783669710689, "grad_norm": 0.3741329312324524, "learning_rate": 1.8340029738038296e-05, "loss": 0.0446, "step": 34850 }, { "epoch": 0.535031847133758, "grad_norm": 0.4814055562019348, "learning_rate": 1.833855142060364e-05, "loss": 0.0503, "step": 34860 }, { "epoch": 0.535185327296447, "grad_norm": 0.5026240944862366, "learning_rate": 1.833707250482649e-05, "loss": 0.0474, "step": 34870 }, { "epoch": 0.5353388074591359, "grad_norm": 0.4921536445617676, "learning_rate": 1.8335592990812974e-05, "loss": 0.0475, "step": 34880 }, { "epoch": 0.5354922876218249, "grad_norm": 0.37328529357910156, "learning_rate": 1.833411287866925e-05, "loss": 0.0364, "step": 34890 }, { "epoch": 0.5356457677845139, "grad_norm": 0.3680925965309143, "learning_rate": 1.8332632168501527e-05, "loss": 0.0471, "step": 34900 }, { "epoch": 0.5357992479472028, "grad_norm": 0.49161264300346375, "learning_rate": 1.833115086041606e-05, "loss": 0.0463, "step": 34910 }, { "epoch": 0.5359527281098918, "grad_norm": 0.5109128952026367, "learning_rate": 1.8329668954519132e-05, "loss": 0.0369, "step": 34920 }, { "epoch": 0.5361062082725808, "grad_norm": 0.5047782063484192, "learning_rate": 1.832818645091708e-05, "loss": 0.0447, "step": 34930 }, { "epoch": 0.5362596884352697, "grad_norm": 0.3950522243976593, "learning_rate": 1.8326703349716296e-05, "loss": 0.0458, "step": 34940 }, { "epoch": 0.5364131685979587, "grad_norm": 0.47063541412353516, "learning_rate": 1.8325219651023188e-05, "loss": 0.0434, "step": 34950 }, { "epoch": 0.5365666487606476, "grad_norm": 0.32803666591644287, "learning_rate": 1.8323735354944225e-05, "loss": 0.0456, "step": 34960 }, { "epoch": 0.5367201289233366, "grad_norm": 0.393889456987381, "learning_rate": 1.8322250461585914e-05, "loss": 0.0441, "step": 34970 }, { "epoch": 0.5368736090860257, "grad_norm": 0.5426432490348816, "learning_rate": 1.8320764971054807e-05, "loss": 0.0393, "step": 34980 }, { "epoch": 0.5370270892487146, "grad_norm": 0.5191671848297119, "learning_rate": 1.8319278883457498e-05, "loss": 0.0406, "step": 34990 }, { "epoch": 0.5371805694114036, "grad_norm": 0.5578767657279968, "learning_rate": 1.8317792198900616e-05, "loss": 0.0417, "step": 35000 }, { "epoch": 0.5373340495740926, "grad_norm": 0.5326303839683533, "learning_rate": 1.831630491749085e-05, "loss": 0.0416, "step": 35010 }, { "epoch": 0.5374875297367815, "grad_norm": 0.3646209239959717, "learning_rate": 1.8314817039334912e-05, "loss": 0.0344, "step": 35020 }, { "epoch": 0.5376410098994705, "grad_norm": 0.45843422412872314, "learning_rate": 1.831332856453957e-05, "loss": 0.0454, "step": 35030 }, { "epoch": 0.5377944900621595, "grad_norm": 0.4691675305366516, "learning_rate": 1.8311839493211635e-05, "loss": 0.0571, "step": 35040 }, { "epoch": 0.5379479702248484, "grad_norm": 0.6175574064254761, "learning_rate": 1.8310349825457955e-05, "loss": 0.0508, "step": 35050 }, { "epoch": 0.5381014503875374, "grad_norm": 0.33192867040634155, "learning_rate": 1.830885956138542e-05, "loss": 0.0368, "step": 35060 }, { "epoch": 0.5382549305502263, "grad_norm": 0.3643374741077423, "learning_rate": 1.8307368701100964e-05, "loss": 0.0429, "step": 35070 }, { "epoch": 0.5384084107129153, "grad_norm": 0.706572949886322, "learning_rate": 1.8305877244711573e-05, "loss": 0.0427, "step": 35080 }, { "epoch": 0.5385618908756044, "grad_norm": 0.4046885669231415, "learning_rate": 1.8304385192324265e-05, "loss": 0.0366, "step": 35090 }, { "epoch": 0.5387153710382933, "grad_norm": 0.6639824509620667, "learning_rate": 1.83028925440461e-05, "loss": 0.0464, "step": 35100 }, { "epoch": 0.5388688512009823, "grad_norm": 0.5063901543617249, "learning_rate": 1.8301399299984187e-05, "loss": 0.0444, "step": 35110 }, { "epoch": 0.5390223313636713, "grad_norm": 0.47596225142478943, "learning_rate": 1.829990546024568e-05, "loss": 0.0395, "step": 35120 }, { "epoch": 0.5391758115263602, "grad_norm": 0.3749367594718933, "learning_rate": 1.829841102493777e-05, "loss": 0.0414, "step": 35130 }, { "epoch": 0.5393292916890492, "grad_norm": 0.2993697226047516, "learning_rate": 1.829691599416768e-05, "loss": 0.0415, "step": 35140 }, { "epoch": 0.5394827718517382, "grad_norm": 0.3702605366706848, "learning_rate": 1.8295420368042706e-05, "loss": 0.0447, "step": 35150 }, { "epoch": 0.5396362520144271, "grad_norm": 0.42248430848121643, "learning_rate": 1.8293924146670152e-05, "loss": 0.0423, "step": 35160 }, { "epoch": 0.5397897321771161, "grad_norm": 0.4335796535015106, "learning_rate": 1.829242733015739e-05, "loss": 0.0422, "step": 35170 }, { "epoch": 0.5399432123398051, "grad_norm": 0.3822646737098694, "learning_rate": 1.829092991861183e-05, "loss": 0.0438, "step": 35180 }, { "epoch": 0.540096692502494, "grad_norm": 0.4806510806083679, "learning_rate": 1.828943191214091e-05, "loss": 0.0459, "step": 35190 }, { "epoch": 0.540250172665183, "grad_norm": 0.35968881845474243, "learning_rate": 1.828793331085213e-05, "loss": 0.0454, "step": 35200 }, { "epoch": 0.540403652827872, "grad_norm": 0.6676110029220581, "learning_rate": 1.8286434114853017e-05, "loss": 0.0527, "step": 35210 }, { "epoch": 0.540557132990561, "grad_norm": 0.5583232641220093, "learning_rate": 1.828493432425115e-05, "loss": 0.052, "step": 35220 }, { "epoch": 0.54071061315325, "grad_norm": 0.5600141286849976, "learning_rate": 1.828343393915415e-05, "loss": 0.0557, "step": 35230 }, { "epoch": 0.5408640933159389, "grad_norm": 0.48151493072509766, "learning_rate": 1.8281932959669674e-05, "loss": 0.0367, "step": 35240 }, { "epoch": 0.5410175734786279, "grad_norm": 0.4244171977043152, "learning_rate": 1.8280431385905433e-05, "loss": 0.0385, "step": 35250 }, { "epoch": 0.5411710536413169, "grad_norm": 0.46808379888534546, "learning_rate": 1.8278929217969172e-05, "loss": 0.0596, "step": 35260 }, { "epoch": 0.5413245338040058, "grad_norm": 0.43112924695014954, "learning_rate": 1.827742645596868e-05, "loss": 0.0459, "step": 35270 }, { "epoch": 0.5414780139666948, "grad_norm": 0.5195664167404175, "learning_rate": 1.827592310001179e-05, "loss": 0.0399, "step": 35280 }, { "epoch": 0.5416314941293838, "grad_norm": 0.5199666619300842, "learning_rate": 1.8274419150206376e-05, "loss": 0.0393, "step": 35290 }, { "epoch": 0.5417849742920727, "grad_norm": 0.3979943096637726, "learning_rate": 1.8272914606660356e-05, "loss": 0.0332, "step": 35300 }, { "epoch": 0.5419384544547617, "grad_norm": 0.43926578760147095, "learning_rate": 1.8271409469481693e-05, "loss": 0.0402, "step": 35310 }, { "epoch": 0.5420919346174506, "grad_norm": 0.37750643491744995, "learning_rate": 1.8269903738778383e-05, "loss": 0.0357, "step": 35320 }, { "epoch": 0.5422454147801397, "grad_norm": 0.42464902997016907, "learning_rate": 1.8268397414658482e-05, "loss": 0.0509, "step": 35330 }, { "epoch": 0.5423988949428287, "grad_norm": 0.500552237033844, "learning_rate": 1.8266890497230067e-05, "loss": 0.0367, "step": 35340 }, { "epoch": 0.5425523751055176, "grad_norm": 0.34259214997291565, "learning_rate": 1.8265382986601277e-05, "loss": 0.0391, "step": 35350 }, { "epoch": 0.5427058552682066, "grad_norm": 0.3773353397846222, "learning_rate": 1.8263874882880278e-05, "loss": 0.0497, "step": 35360 }, { "epoch": 0.5428593354308956, "grad_norm": 0.3491387665271759, "learning_rate": 1.826236618617529e-05, "loss": 0.0415, "step": 35370 }, { "epoch": 0.5430128155935845, "grad_norm": 0.4654329717159271, "learning_rate": 1.8260856896594576e-05, "loss": 0.048, "step": 35380 }, { "epoch": 0.5431662957562735, "grad_norm": 0.6055399179458618, "learning_rate": 1.825934701424643e-05, "loss": 0.0376, "step": 35390 }, { "epoch": 0.5433197759189625, "grad_norm": 0.8041864037513733, "learning_rate": 1.825783653923919e-05, "loss": 0.0498, "step": 35400 }, { "epoch": 0.5434732560816514, "grad_norm": 0.40268009901046753, "learning_rate": 1.825632547168126e-05, "loss": 0.0479, "step": 35410 }, { "epoch": 0.5436267362443404, "grad_norm": 0.31258442997932434, "learning_rate": 1.8254813811681047e-05, "loss": 0.041, "step": 35420 }, { "epoch": 0.5437802164070293, "grad_norm": 0.4558827579021454, "learning_rate": 1.825330155934704e-05, "loss": 0.0406, "step": 35430 }, { "epoch": 0.5439336965697184, "grad_norm": 0.47218507528305054, "learning_rate": 1.825178871478774e-05, "loss": 0.0417, "step": 35440 }, { "epoch": 0.5440871767324074, "grad_norm": 0.3454698324203491, "learning_rate": 1.825027527811171e-05, "loss": 0.0353, "step": 35450 }, { "epoch": 0.5442406568950963, "grad_norm": 0.4821479916572571, "learning_rate": 1.824876124942754e-05, "loss": 0.0495, "step": 35460 }, { "epoch": 0.5443941370577853, "grad_norm": 0.39373624324798584, "learning_rate": 1.8247246628843887e-05, "loss": 0.0371, "step": 35470 }, { "epoch": 0.5445476172204743, "grad_norm": 0.5192659497261047, "learning_rate": 1.824573141646942e-05, "loss": 0.0473, "step": 35480 }, { "epoch": 0.5447010973831632, "grad_norm": 0.4820541739463806, "learning_rate": 1.824421561241287e-05, "loss": 0.0565, "step": 35490 }, { "epoch": 0.5448545775458522, "grad_norm": 0.585239589214325, "learning_rate": 1.8242699216783004e-05, "loss": 0.0446, "step": 35500 }, { "epoch": 0.5450080577085412, "grad_norm": 0.5263137221336365, "learning_rate": 1.8241182229688628e-05, "loss": 0.0542, "step": 35510 }, { "epoch": 0.5451615378712301, "grad_norm": 0.617027759552002, "learning_rate": 1.8239664651238607e-05, "loss": 0.0474, "step": 35520 }, { "epoch": 0.5453150180339191, "grad_norm": 0.2739311158657074, "learning_rate": 1.823814648154183e-05, "loss": 0.048, "step": 35530 }, { "epoch": 0.5454684981966081, "grad_norm": 0.3805514872074127, "learning_rate": 1.823662772070723e-05, "loss": 0.0422, "step": 35540 }, { "epoch": 0.545621978359297, "grad_norm": 0.5683437585830688, "learning_rate": 1.8235108368843793e-05, "loss": 0.0407, "step": 35550 }, { "epoch": 0.5457754585219861, "grad_norm": 0.5324431657791138, "learning_rate": 1.8233588426060545e-05, "loss": 0.0479, "step": 35560 }, { "epoch": 0.545928938684675, "grad_norm": 0.6017602682113647, "learning_rate": 1.823206789246655e-05, "loss": 0.0575, "step": 35570 }, { "epoch": 0.546082418847364, "grad_norm": 0.788719892501831, "learning_rate": 1.823054676817091e-05, "loss": 0.0473, "step": 35580 }, { "epoch": 0.546235899010053, "grad_norm": 0.4107734262943268, "learning_rate": 1.8229025053282776e-05, "loss": 0.0412, "step": 35590 }, { "epoch": 0.5463893791727419, "grad_norm": 0.5349747538566589, "learning_rate": 1.8227502747911346e-05, "loss": 0.041, "step": 35600 }, { "epoch": 0.5465428593354309, "grad_norm": 0.37744569778442383, "learning_rate": 1.822597985216585e-05, "loss": 0.0312, "step": 35610 }, { "epoch": 0.5466963394981199, "grad_norm": 0.3755771219730377, "learning_rate": 1.8224456366155566e-05, "loss": 0.0479, "step": 35620 }, { "epoch": 0.5468498196608088, "grad_norm": 0.7841432094573975, "learning_rate": 1.8222932289989816e-05, "loss": 0.0372, "step": 35630 }, { "epoch": 0.5470032998234978, "grad_norm": 0.5129373073577881, "learning_rate": 1.8221407623777957e-05, "loss": 0.0404, "step": 35640 }, { "epoch": 0.5471567799861868, "grad_norm": 0.4765455424785614, "learning_rate": 1.82198823676294e-05, "loss": 0.0463, "step": 35650 }, { "epoch": 0.5473102601488757, "grad_norm": 0.4846649765968323, "learning_rate": 1.8218356521653587e-05, "loss": 0.0387, "step": 35660 }, { "epoch": 0.5474637403115648, "grad_norm": 0.49485674500465393, "learning_rate": 1.8216830085960007e-05, "loss": 0.0361, "step": 35670 }, { "epoch": 0.5476172204742537, "grad_norm": 0.4781188666820526, "learning_rate": 1.821530306065819e-05, "loss": 0.0498, "step": 35680 }, { "epoch": 0.5477707006369427, "grad_norm": 0.5014233589172363, "learning_rate": 1.8213775445857716e-05, "loss": 0.048, "step": 35690 }, { "epoch": 0.5479241807996317, "grad_norm": 0.5729696750640869, "learning_rate": 1.8212247241668193e-05, "loss": 0.0385, "step": 35700 }, { "epoch": 0.5480776609623206, "grad_norm": 0.5076062679290771, "learning_rate": 1.8210718448199282e-05, "loss": 0.0377, "step": 35710 }, { "epoch": 0.5482311411250096, "grad_norm": 0.4321064054965973, "learning_rate": 1.8209189065560687e-05, "loss": 0.0378, "step": 35720 }, { "epoch": 0.5483846212876986, "grad_norm": 0.687508761882782, "learning_rate": 1.8207659093862143e-05, "loss": 0.0368, "step": 35730 }, { "epoch": 0.5485381014503875, "grad_norm": 0.6888679265975952, "learning_rate": 1.8206128533213442e-05, "loss": 0.054, "step": 35740 }, { "epoch": 0.5486915816130765, "grad_norm": 0.5395651459693909, "learning_rate": 1.8204597383724408e-05, "loss": 0.0438, "step": 35750 }, { "epoch": 0.5488450617757655, "grad_norm": 0.7415543794631958, "learning_rate": 1.820306564550491e-05, "loss": 0.0441, "step": 35760 }, { "epoch": 0.5489985419384544, "grad_norm": 0.39380496740341187, "learning_rate": 1.820153331866486e-05, "loss": 0.047, "step": 35770 }, { "epoch": 0.5491520221011434, "grad_norm": 0.3733890950679779, "learning_rate": 1.8200000403314215e-05, "loss": 0.0389, "step": 35780 }, { "epoch": 0.5493055022638323, "grad_norm": 0.46455541253089905, "learning_rate": 1.819846689956296e-05, "loss": 0.0452, "step": 35790 }, { "epoch": 0.5494589824265214, "grad_norm": 0.3636070489883423, "learning_rate": 1.819693280752115e-05, "loss": 0.0489, "step": 35800 }, { "epoch": 0.5496124625892104, "grad_norm": 0.4903065860271454, "learning_rate": 1.8195398127298857e-05, "loss": 0.0505, "step": 35810 }, { "epoch": 0.5497659427518993, "grad_norm": 0.43911176919937134, "learning_rate": 1.81938628590062e-05, "loss": 0.0378, "step": 35820 }, { "epoch": 0.5499194229145883, "grad_norm": 0.41352248191833496, "learning_rate": 1.819232700275335e-05, "loss": 0.0551, "step": 35830 }, { "epoch": 0.5500729030772773, "grad_norm": 0.4506373703479767, "learning_rate": 1.8190790558650512e-05, "loss": 0.0373, "step": 35840 }, { "epoch": 0.5502263832399662, "grad_norm": 0.5587015748023987, "learning_rate": 1.8189253526807937e-05, "loss": 0.0477, "step": 35850 }, { "epoch": 0.5503798634026552, "grad_norm": 0.43454447388648987, "learning_rate": 1.8187715907335916e-05, "loss": 0.0359, "step": 35860 }, { "epoch": 0.5505333435653442, "grad_norm": 0.41078394651412964, "learning_rate": 1.8186177700344777e-05, "loss": 0.0449, "step": 35870 }, { "epoch": 0.5506868237280331, "grad_norm": 0.43617793917655945, "learning_rate": 1.8184638905944905e-05, "loss": 0.0428, "step": 35880 }, { "epoch": 0.5508403038907221, "grad_norm": 0.32857823371887207, "learning_rate": 1.818309952424671e-05, "loss": 0.048, "step": 35890 }, { "epoch": 0.5509937840534112, "grad_norm": 0.3407353162765503, "learning_rate": 1.818155955536066e-05, "loss": 0.047, "step": 35900 }, { "epoch": 0.5511472642161, "grad_norm": 0.35759761929512024, "learning_rate": 1.818001899939725e-05, "loss": 0.0467, "step": 35910 }, { "epoch": 0.5513007443787891, "grad_norm": 0.2556246817111969, "learning_rate": 1.8178477856467027e-05, "loss": 0.0336, "step": 35920 }, { "epoch": 0.551454224541478, "grad_norm": 0.27738580107688904, "learning_rate": 1.817693612668058e-05, "loss": 0.0358, "step": 35930 }, { "epoch": 0.551607704704167, "grad_norm": 0.43962109088897705, "learning_rate": 1.8175393810148535e-05, "loss": 0.0395, "step": 35940 }, { "epoch": 0.551761184866856, "grad_norm": 0.3326188921928406, "learning_rate": 1.817385090698156e-05, "loss": 0.0392, "step": 35950 }, { "epoch": 0.5519146650295449, "grad_norm": 0.3619239926338196, "learning_rate": 1.8172307417290376e-05, "loss": 0.0406, "step": 35960 }, { "epoch": 0.5520681451922339, "grad_norm": 0.3755476772785187, "learning_rate": 1.817076334118573e-05, "loss": 0.0496, "step": 35970 }, { "epoch": 0.5522216253549229, "grad_norm": 0.2569466829299927, "learning_rate": 1.8169218678778426e-05, "loss": 0.0456, "step": 35980 }, { "epoch": 0.5523751055176118, "grad_norm": 0.48843398690223694, "learning_rate": 1.8167673430179294e-05, "loss": 0.0322, "step": 35990 }, { "epoch": 0.5525285856803008, "grad_norm": 0.3979971408843994, "learning_rate": 1.8166127595499218e-05, "loss": 0.0428, "step": 36000 }, { "epoch": 0.5526820658429898, "grad_norm": 0.5675497055053711, "learning_rate": 1.816458117484913e-05, "loss": 0.0383, "step": 36010 }, { "epoch": 0.5528355460056787, "grad_norm": 0.40899187326431274, "learning_rate": 1.8163034168339987e-05, "loss": 0.0471, "step": 36020 }, { "epoch": 0.5529890261683678, "grad_norm": 0.35561299324035645, "learning_rate": 1.8161486576082795e-05, "loss": 0.0366, "step": 36030 }, { "epoch": 0.5531425063310567, "grad_norm": 0.41203832626342773, "learning_rate": 1.8159938398188606e-05, "loss": 0.0486, "step": 36040 }, { "epoch": 0.5532959864937457, "grad_norm": 0.3305829167366028, "learning_rate": 1.8158389634768512e-05, "loss": 0.0327, "step": 36050 }, { "epoch": 0.5534494666564347, "grad_norm": 0.41304871439933777, "learning_rate": 1.815684028593365e-05, "loss": 0.0438, "step": 36060 }, { "epoch": 0.5536029468191236, "grad_norm": 0.4320225417613983, "learning_rate": 1.8155290351795183e-05, "loss": 0.0475, "step": 36070 }, { "epoch": 0.5537564269818126, "grad_norm": 0.449564665555954, "learning_rate": 1.8153739832464342e-05, "loss": 0.0294, "step": 36080 }, { "epoch": 0.5539099071445016, "grad_norm": 0.4456407427787781, "learning_rate": 1.8152188728052377e-05, "loss": 0.0373, "step": 36090 }, { "epoch": 0.5540633873071905, "grad_norm": 0.3890218436717987, "learning_rate": 1.8150637038670598e-05, "loss": 0.0397, "step": 36100 }, { "epoch": 0.5542168674698795, "grad_norm": 0.3012431263923645, "learning_rate": 1.814908476443034e-05, "loss": 0.0355, "step": 36110 }, { "epoch": 0.5543703476325685, "grad_norm": 0.40969955921173096, "learning_rate": 1.814753190544299e-05, "loss": 0.0464, "step": 36120 }, { "epoch": 0.5545238277952574, "grad_norm": 0.4920709729194641, "learning_rate": 1.814597846181998e-05, "loss": 0.0396, "step": 36130 }, { "epoch": 0.5546773079579465, "grad_norm": 0.46134212613105774, "learning_rate": 1.8144424433672776e-05, "loss": 0.035, "step": 36140 }, { "epoch": 0.5548307881206354, "grad_norm": 0.39137002825737, "learning_rate": 1.8142869821112886e-05, "loss": 0.0393, "step": 36150 }, { "epoch": 0.5549842682833244, "grad_norm": 0.6885603070259094, "learning_rate": 1.8141314624251868e-05, "loss": 0.0642, "step": 36160 }, { "epoch": 0.5551377484460134, "grad_norm": 0.4908393323421478, "learning_rate": 1.8139758843201316e-05, "loss": 0.0493, "step": 36170 }, { "epoch": 0.5552912286087023, "grad_norm": 0.5566482543945312, "learning_rate": 1.8138202478072867e-05, "loss": 0.0467, "step": 36180 }, { "epoch": 0.5554447087713913, "grad_norm": 0.4980156123638153, "learning_rate": 1.8136645528978193e-05, "loss": 0.0347, "step": 36190 }, { "epoch": 0.5555981889340803, "grad_norm": 0.8054441809654236, "learning_rate": 1.8135087996029024e-05, "loss": 0.0467, "step": 36200 }, { "epoch": 0.5557516690967692, "grad_norm": 0.35514965653419495, "learning_rate": 1.8133529879337118e-05, "loss": 0.0378, "step": 36210 }, { "epoch": 0.5559051492594582, "grad_norm": 0.5331484079360962, "learning_rate": 1.813197117901428e-05, "loss": 0.0442, "step": 36220 }, { "epoch": 0.5560586294221472, "grad_norm": 0.45212408900260925, "learning_rate": 1.813041189517236e-05, "loss": 0.0475, "step": 36230 }, { "epoch": 0.5562121095848361, "grad_norm": 0.41103804111480713, "learning_rate": 1.812885202792324e-05, "loss": 0.0488, "step": 36240 }, { "epoch": 0.5563655897475251, "grad_norm": 0.37902864813804626, "learning_rate": 1.8127291577378857e-05, "loss": 0.0412, "step": 36250 }, { "epoch": 0.5565190699102142, "grad_norm": 0.5582243800163269, "learning_rate": 1.8125730543651174e-05, "loss": 0.0354, "step": 36260 }, { "epoch": 0.5566725500729031, "grad_norm": 0.5622771382331848, "learning_rate": 1.8124168926852214e-05, "loss": 0.0413, "step": 36270 }, { "epoch": 0.5568260302355921, "grad_norm": 0.4500199556350708, "learning_rate": 1.8122606727094025e-05, "loss": 0.0499, "step": 36280 }, { "epoch": 0.556979510398281, "grad_norm": 0.4609386622905731, "learning_rate": 1.812104394448871e-05, "loss": 0.0345, "step": 36290 }, { "epoch": 0.55713299056097, "grad_norm": 0.3971070647239685, "learning_rate": 1.8119480579148407e-05, "loss": 0.0419, "step": 36300 }, { "epoch": 0.557286470723659, "grad_norm": 0.343391090631485, "learning_rate": 1.8117916631185295e-05, "loss": 0.0347, "step": 36310 }, { "epoch": 0.5574399508863479, "grad_norm": 0.5738260746002197, "learning_rate": 1.8116352100711598e-05, "loss": 0.0434, "step": 36320 }, { "epoch": 0.5575934310490369, "grad_norm": 0.40211692452430725, "learning_rate": 1.811478698783958e-05, "loss": 0.0408, "step": 36330 }, { "epoch": 0.5577469112117259, "grad_norm": 0.49873027205467224, "learning_rate": 1.811322129268155e-05, "loss": 0.0504, "step": 36340 }, { "epoch": 0.5579003913744148, "grad_norm": 0.31260791420936584, "learning_rate": 1.8111655015349855e-05, "loss": 0.0356, "step": 36350 }, { "epoch": 0.5580538715371038, "grad_norm": 0.47723352909088135, "learning_rate": 1.8110088155956887e-05, "loss": 0.0331, "step": 36360 }, { "epoch": 0.5582073516997929, "grad_norm": 0.34700632095336914, "learning_rate": 1.8108520714615073e-05, "loss": 0.0413, "step": 36370 }, { "epoch": 0.5583608318624818, "grad_norm": 0.459831565618515, "learning_rate": 1.8106952691436888e-05, "loss": 0.0392, "step": 36380 }, { "epoch": 0.5585143120251708, "grad_norm": 0.3124781548976898, "learning_rate": 1.8105384086534852e-05, "loss": 0.0451, "step": 36390 }, { "epoch": 0.5586677921878597, "grad_norm": 0.5561027526855469, "learning_rate": 1.8103814900021515e-05, "loss": 0.0499, "step": 36400 }, { "epoch": 0.5588212723505487, "grad_norm": 0.31677860021591187, "learning_rate": 1.8102245132009482e-05, "loss": 0.0324, "step": 36410 }, { "epoch": 0.5589747525132377, "grad_norm": 0.3475092351436615, "learning_rate": 1.8100674782611392e-05, "loss": 0.0365, "step": 36420 }, { "epoch": 0.5591282326759266, "grad_norm": 0.4076044261455536, "learning_rate": 1.8099103851939923e-05, "loss": 0.0381, "step": 36430 }, { "epoch": 0.5592817128386156, "grad_norm": 0.5526824593544006, "learning_rate": 1.8097532340107803e-05, "loss": 0.0363, "step": 36440 }, { "epoch": 0.5594351930013046, "grad_norm": 0.39859312772750854, "learning_rate": 1.80959602472278e-05, "loss": 0.0461, "step": 36450 }, { "epoch": 0.5595886731639935, "grad_norm": 0.4137330949306488, "learning_rate": 1.8094387573412718e-05, "loss": 0.0387, "step": 36460 }, { "epoch": 0.5597421533266825, "grad_norm": 0.568088710308075, "learning_rate": 1.80928143187754e-05, "loss": 0.043, "step": 36470 }, { "epoch": 0.5598956334893715, "grad_norm": 0.35473719239234924, "learning_rate": 1.809124048342875e-05, "loss": 0.0406, "step": 36480 }, { "epoch": 0.5600491136520604, "grad_norm": 0.31305214762687683, "learning_rate": 1.808966606748569e-05, "loss": 0.0372, "step": 36490 }, { "epoch": 0.5602025938147495, "grad_norm": 0.4855826199054718, "learning_rate": 1.80880910710592e-05, "loss": 0.0464, "step": 36500 }, { "epoch": 0.5603560739774384, "grad_norm": 0.40804943442344666, "learning_rate": 1.808651549426229e-05, "loss": 0.0373, "step": 36510 }, { "epoch": 0.5605095541401274, "grad_norm": 0.41995424032211304, "learning_rate": 1.808493933720802e-05, "loss": 0.0436, "step": 36520 }, { "epoch": 0.5606630343028164, "grad_norm": 0.41806551814079285, "learning_rate": 1.8083362600009496e-05, "loss": 0.0383, "step": 36530 }, { "epoch": 0.5608165144655053, "grad_norm": 0.29031234979629517, "learning_rate": 1.8081785282779848e-05, "loss": 0.049, "step": 36540 }, { "epoch": 0.5609699946281943, "grad_norm": 0.5223950743675232, "learning_rate": 1.808020738563226e-05, "loss": 0.0455, "step": 36550 }, { "epoch": 0.5611234747908833, "grad_norm": 0.37213411927223206, "learning_rate": 1.807862890867996e-05, "loss": 0.0404, "step": 36560 }, { "epoch": 0.5612769549535722, "grad_norm": 0.39316263794898987, "learning_rate": 1.8077049852036212e-05, "loss": 0.049, "step": 36570 }, { "epoch": 0.5614304351162612, "grad_norm": 0.4939298927783966, "learning_rate": 1.8075470215814323e-05, "loss": 0.0529, "step": 36580 }, { "epoch": 0.5615839152789502, "grad_norm": 0.6444617509841919, "learning_rate": 1.8073890000127644e-05, "loss": 0.0409, "step": 36590 }, { "epoch": 0.5617373954416391, "grad_norm": 0.45280006527900696, "learning_rate": 1.8072309205089558e-05, "loss": 0.038, "step": 36600 }, { "epoch": 0.5618908756043282, "grad_norm": 0.43052101135253906, "learning_rate": 1.8070727830813503e-05, "loss": 0.0389, "step": 36610 }, { "epoch": 0.5620443557670172, "grad_norm": 0.27372944355010986, "learning_rate": 1.8069145877412952e-05, "loss": 0.0397, "step": 36620 }, { "epoch": 0.5621978359297061, "grad_norm": 0.4360068738460541, "learning_rate": 1.8067563345001417e-05, "loss": 0.0461, "step": 36630 }, { "epoch": 0.5623513160923951, "grad_norm": 0.45602428913116455, "learning_rate": 1.8065980233692455e-05, "loss": 0.0492, "step": 36640 }, { "epoch": 0.562504796255084, "grad_norm": 0.3685436546802521, "learning_rate": 1.806439654359967e-05, "loss": 0.0404, "step": 36650 }, { "epoch": 0.562658276417773, "grad_norm": 0.4714842736721039, "learning_rate": 1.806281227483669e-05, "loss": 0.0446, "step": 36660 }, { "epoch": 0.562811756580462, "grad_norm": 0.486915647983551, "learning_rate": 1.806122742751721e-05, "loss": 0.0429, "step": 36670 }, { "epoch": 0.5629652367431509, "grad_norm": 0.46010881662368774, "learning_rate": 1.805964200175494e-05, "loss": 0.0514, "step": 36680 }, { "epoch": 0.5631187169058399, "grad_norm": 0.44593149423599243, "learning_rate": 1.805805599766365e-05, "loss": 0.0392, "step": 36690 }, { "epoch": 0.5632721970685289, "grad_norm": 0.45579618215560913, "learning_rate": 1.805646941535715e-05, "loss": 0.0472, "step": 36700 }, { "epoch": 0.5634256772312178, "grad_norm": 0.40003615617752075, "learning_rate": 1.8054882254949277e-05, "loss": 0.0415, "step": 36710 }, { "epoch": 0.5635791573939068, "grad_norm": 0.4932726323604584, "learning_rate": 1.8053294516553924e-05, "loss": 0.0345, "step": 36720 }, { "epoch": 0.5637326375565959, "grad_norm": 0.5982106924057007, "learning_rate": 1.8051706200285025e-05, "loss": 0.0441, "step": 36730 }, { "epoch": 0.5638861177192848, "grad_norm": 0.5221298933029175, "learning_rate": 1.8050117306256544e-05, "loss": 0.0413, "step": 36740 }, { "epoch": 0.5640395978819738, "grad_norm": 0.5786097645759583, "learning_rate": 1.80485278345825e-05, "loss": 0.0463, "step": 36750 }, { "epoch": 0.5641930780446627, "grad_norm": 0.4673146605491638, "learning_rate": 1.8046937785376944e-05, "loss": 0.0447, "step": 36760 }, { "epoch": 0.5643465582073517, "grad_norm": 0.5066752433776855, "learning_rate": 1.8045347158753978e-05, "loss": 0.0452, "step": 36770 }, { "epoch": 0.5645000383700407, "grad_norm": 0.44356679916381836, "learning_rate": 1.8043755954827727e-05, "loss": 0.0454, "step": 36780 }, { "epoch": 0.5646535185327296, "grad_norm": 0.48287034034729004, "learning_rate": 1.8042164173712383e-05, "loss": 0.0362, "step": 36790 }, { "epoch": 0.5648069986954186, "grad_norm": 0.3515676259994507, "learning_rate": 1.8040571815522158e-05, "loss": 0.0462, "step": 36800 }, { "epoch": 0.5649604788581076, "grad_norm": 0.3954462707042694, "learning_rate": 1.8038978880371318e-05, "loss": 0.0369, "step": 36810 }, { "epoch": 0.5651139590207965, "grad_norm": 0.47414031624794006, "learning_rate": 1.803738536837416e-05, "loss": 0.0406, "step": 36820 }, { "epoch": 0.5652674391834855, "grad_norm": 0.4596673548221588, "learning_rate": 1.8035791279645037e-05, "loss": 0.0412, "step": 36830 }, { "epoch": 0.5654209193461746, "grad_norm": 0.5553268194198608, "learning_rate": 1.8034196614298326e-05, "loss": 0.0408, "step": 36840 }, { "epoch": 0.5655743995088635, "grad_norm": 0.5434901714324951, "learning_rate": 1.803260137244846e-05, "loss": 0.0556, "step": 36850 }, { "epoch": 0.5657278796715525, "grad_norm": 0.6038424968719482, "learning_rate": 1.8031005554209904e-05, "loss": 0.0447, "step": 36860 }, { "epoch": 0.5658813598342414, "grad_norm": 0.5617937445640564, "learning_rate": 1.802940915969717e-05, "loss": 0.0474, "step": 36870 }, { "epoch": 0.5660348399969304, "grad_norm": 0.4476713240146637, "learning_rate": 1.8027812189024808e-05, "loss": 0.0397, "step": 36880 }, { "epoch": 0.5661883201596194, "grad_norm": 0.3564968407154083, "learning_rate": 1.802621464230741e-05, "loss": 0.0375, "step": 36890 }, { "epoch": 0.5663418003223083, "grad_norm": 0.42205050587654114, "learning_rate": 1.8024616519659613e-05, "loss": 0.0468, "step": 36900 }, { "epoch": 0.5664952804849973, "grad_norm": 0.580046534538269, "learning_rate": 1.8023017821196088e-05, "loss": 0.0493, "step": 36910 }, { "epoch": 0.5666487606476863, "grad_norm": 0.5057944655418396, "learning_rate": 1.8021418547031552e-05, "loss": 0.0421, "step": 36920 }, { "epoch": 0.5668022408103752, "grad_norm": 0.5046705603599548, "learning_rate": 1.8019818697280764e-05, "loss": 0.0421, "step": 36930 }, { "epoch": 0.5669557209730642, "grad_norm": 0.3030637204647064, "learning_rate": 1.8018218272058525e-05, "loss": 0.0413, "step": 36940 }, { "epoch": 0.5671092011357532, "grad_norm": 0.6930097341537476, "learning_rate": 1.8016617271479673e-05, "loss": 0.0432, "step": 36950 }, { "epoch": 0.5672626812984422, "grad_norm": 0.6426254510879517, "learning_rate": 1.8015015695659087e-05, "loss": 0.0522, "step": 36960 }, { "epoch": 0.5674161614611312, "grad_norm": 0.6435261964797974, "learning_rate": 1.8013413544711698e-05, "loss": 0.0561, "step": 36970 }, { "epoch": 0.5675696416238202, "grad_norm": 0.6104083061218262, "learning_rate": 1.8011810818752465e-05, "loss": 0.0439, "step": 36980 }, { "epoch": 0.5677231217865091, "grad_norm": 0.5614802837371826, "learning_rate": 1.801020751789639e-05, "loss": 0.0581, "step": 36990 }, { "epoch": 0.5678766019491981, "grad_norm": 0.4120694696903229, "learning_rate": 1.8008603642258525e-05, "loss": 0.0536, "step": 37000 }, { "epoch": 0.568030082111887, "grad_norm": 0.46043846011161804, "learning_rate": 1.800699919195396e-05, "loss": 0.0488, "step": 37010 }, { "epoch": 0.568183562274576, "grad_norm": 0.32835111021995544, "learning_rate": 1.8005394167097814e-05, "loss": 0.0355, "step": 37020 }, { "epoch": 0.568337042437265, "grad_norm": 0.5420152544975281, "learning_rate": 1.8003788567805266e-05, "loss": 0.0376, "step": 37030 }, { "epoch": 0.5684905225999539, "grad_norm": 0.3327960669994354, "learning_rate": 1.8002182394191528e-05, "loss": 0.0457, "step": 37040 }, { "epoch": 0.5686440027626429, "grad_norm": 0.35897842049598694, "learning_rate": 1.800057564637185e-05, "loss": 0.0389, "step": 37050 }, { "epoch": 0.5687974829253319, "grad_norm": 0.5613123774528503, "learning_rate": 1.7998968324461524e-05, "loss": 0.0427, "step": 37060 }, { "epoch": 0.5689509630880208, "grad_norm": 0.489184707403183, "learning_rate": 1.799736042857589e-05, "loss": 0.053, "step": 37070 }, { "epoch": 0.5691044432507099, "grad_norm": 0.346587598323822, "learning_rate": 1.7995751958830326e-05, "loss": 0.0359, "step": 37080 }, { "epoch": 0.5692579234133989, "grad_norm": 0.4589284360408783, "learning_rate": 1.799414291534024e-05, "loss": 0.0477, "step": 37090 }, { "epoch": 0.5694114035760878, "grad_norm": 0.6294649839401245, "learning_rate": 1.79925332982211e-05, "loss": 0.0502, "step": 37100 }, { "epoch": 0.5695648837387768, "grad_norm": 0.36937373876571655, "learning_rate": 1.79909231075884e-05, "loss": 0.0366, "step": 37110 }, { "epoch": 0.5697183639014657, "grad_norm": 0.39120587706565857, "learning_rate": 1.798931234355769e-05, "loss": 0.0393, "step": 37120 }, { "epoch": 0.5698718440641547, "grad_norm": 0.669928789138794, "learning_rate": 1.798770100624454e-05, "loss": 0.0466, "step": 37130 }, { "epoch": 0.5700253242268437, "grad_norm": 0.32697805762290955, "learning_rate": 1.798608909576458e-05, "loss": 0.0337, "step": 37140 }, { "epoch": 0.5701788043895326, "grad_norm": 0.5022000670433044, "learning_rate": 1.7984476612233478e-05, "loss": 0.0368, "step": 37150 }, { "epoch": 0.5703322845522216, "grad_norm": 0.3993085026741028, "learning_rate": 1.7982863555766932e-05, "loss": 0.039, "step": 37160 }, { "epoch": 0.5704857647149106, "grad_norm": 0.3425785005092621, "learning_rate": 1.798124992648069e-05, "loss": 0.051, "step": 37170 }, { "epoch": 0.5706392448775995, "grad_norm": 0.4352269768714905, "learning_rate": 1.797963572449055e-05, "loss": 0.0368, "step": 37180 }, { "epoch": 0.5707927250402886, "grad_norm": 0.49355435371398926, "learning_rate": 1.7978020949912327e-05, "loss": 0.0505, "step": 37190 }, { "epoch": 0.5709462052029776, "grad_norm": 0.2685209810733795, "learning_rate": 1.79764056028619e-05, "loss": 0.0349, "step": 37200 }, { "epoch": 0.5710996853656665, "grad_norm": 0.4121328294277191, "learning_rate": 1.7974789683455177e-05, "loss": 0.0405, "step": 37210 }, { "epoch": 0.5712531655283555, "grad_norm": 0.5890661478042603, "learning_rate": 1.797317319180811e-05, "loss": 0.041, "step": 37220 }, { "epoch": 0.5714066456910444, "grad_norm": 0.45462465286254883, "learning_rate": 1.7971556128036693e-05, "loss": 0.0403, "step": 37230 }, { "epoch": 0.5715601258537334, "grad_norm": 0.2845461368560791, "learning_rate": 1.7969938492256958e-05, "loss": 0.037, "step": 37240 }, { "epoch": 0.5717136060164224, "grad_norm": 0.46260693669319153, "learning_rate": 1.7968320284584985e-05, "loss": 0.0491, "step": 37250 }, { "epoch": 0.5718670861791113, "grad_norm": 0.5565980076789856, "learning_rate": 1.7966701505136888e-05, "loss": 0.0437, "step": 37260 }, { "epoch": 0.5720205663418003, "grad_norm": 0.28107383847236633, "learning_rate": 1.796508215402882e-05, "loss": 0.0433, "step": 37270 }, { "epoch": 0.5721740465044893, "grad_norm": 0.5599108338356018, "learning_rate": 1.7963462231376988e-05, "loss": 0.0403, "step": 37280 }, { "epoch": 0.5723275266671782, "grad_norm": 0.4405064284801483, "learning_rate": 1.7961841737297627e-05, "loss": 0.0439, "step": 37290 }, { "epoch": 0.5724810068298672, "grad_norm": 0.548443078994751, "learning_rate": 1.7960220671907018e-05, "loss": 0.044, "step": 37300 }, { "epoch": 0.5726344869925563, "grad_norm": 0.5528575778007507, "learning_rate": 1.795859903532148e-05, "loss": 0.0373, "step": 37310 }, { "epoch": 0.5727879671552452, "grad_norm": 0.4312061667442322, "learning_rate": 1.7956976827657376e-05, "loss": 0.0392, "step": 37320 }, { "epoch": 0.5729414473179342, "grad_norm": 0.3518524765968323, "learning_rate": 1.7955354049031114e-05, "loss": 0.036, "step": 37330 }, { "epoch": 0.5730949274806232, "grad_norm": 0.553979754447937, "learning_rate": 1.7953730699559133e-05, "loss": 0.0564, "step": 37340 }, { "epoch": 0.5732484076433121, "grad_norm": 0.3967128098011017, "learning_rate": 1.7952106779357922e-05, "loss": 0.0411, "step": 37350 }, { "epoch": 0.5734018878060011, "grad_norm": 0.5068401098251343, "learning_rate": 1.7950482288544004e-05, "loss": 0.0496, "step": 37360 }, { "epoch": 0.57355536796869, "grad_norm": 0.4359878599643707, "learning_rate": 1.7948857227233948e-05, "loss": 0.0438, "step": 37370 }, { "epoch": 0.573708848131379, "grad_norm": 0.5159381031990051, "learning_rate": 1.7947231595544364e-05, "loss": 0.0407, "step": 37380 }, { "epoch": 0.573862328294068, "grad_norm": 0.4520273804664612, "learning_rate": 1.7945605393591897e-05, "loss": 0.0478, "step": 37390 }, { "epoch": 0.5740158084567569, "grad_norm": 0.4455021619796753, "learning_rate": 1.794397862149324e-05, "loss": 0.0374, "step": 37400 }, { "epoch": 0.5741692886194459, "grad_norm": 0.4058179259300232, "learning_rate": 1.7942351279365126e-05, "loss": 0.0479, "step": 37410 }, { "epoch": 0.574322768782135, "grad_norm": 0.5143257975578308, "learning_rate": 1.7940723367324324e-05, "loss": 0.0371, "step": 37420 }, { "epoch": 0.5744762489448239, "grad_norm": 0.3357616066932678, "learning_rate": 1.7939094885487646e-05, "loss": 0.0348, "step": 37430 }, { "epoch": 0.5746297291075129, "grad_norm": 0.48222652077674866, "learning_rate": 1.7937465833971943e-05, "loss": 0.0374, "step": 37440 }, { "epoch": 0.5747832092702019, "grad_norm": 0.7503275275230408, "learning_rate": 1.7935836212894115e-05, "loss": 0.0514, "step": 37450 }, { "epoch": 0.5749366894328908, "grad_norm": 0.3540997803211212, "learning_rate": 1.7934206022371094e-05, "loss": 0.0453, "step": 37460 }, { "epoch": 0.5750901695955798, "grad_norm": 0.3812716007232666, "learning_rate": 1.793257526251986e-05, "loss": 0.0508, "step": 37470 }, { "epoch": 0.5752436497582687, "grad_norm": 0.5025743246078491, "learning_rate": 1.793094393345743e-05, "loss": 0.0454, "step": 37480 }, { "epoch": 0.5753971299209577, "grad_norm": 0.41673940420150757, "learning_rate": 1.792931203530085e-05, "loss": 0.0383, "step": 37490 }, { "epoch": 0.5755506100836467, "grad_norm": 0.3983667492866516, "learning_rate": 1.7927679568167238e-05, "loss": 0.0355, "step": 37500 }, { "epoch": 0.5757040902463356, "grad_norm": 0.7426276803016663, "learning_rate": 1.792604653217372e-05, "loss": 0.045, "step": 37510 }, { "epoch": 0.5758575704090246, "grad_norm": 0.6490141153335571, "learning_rate": 1.792441292743748e-05, "loss": 0.0408, "step": 37520 }, { "epoch": 0.5760110505717136, "grad_norm": 0.36165520548820496, "learning_rate": 1.792277875407574e-05, "loss": 0.0399, "step": 37530 }, { "epoch": 0.5761645307344025, "grad_norm": 0.39968931674957275, "learning_rate": 1.7921144012205762e-05, "loss": 0.038, "step": 37540 }, { "epoch": 0.5763180108970916, "grad_norm": 0.46865278482437134, "learning_rate": 1.791950870194485e-05, "loss": 0.0451, "step": 37550 }, { "epoch": 0.5764714910597806, "grad_norm": 0.5506157279014587, "learning_rate": 1.791787282341034e-05, "loss": 0.0451, "step": 37560 }, { "epoch": 0.5766249712224695, "grad_norm": 0.61184161901474, "learning_rate": 1.7916236376719626e-05, "loss": 0.0424, "step": 37570 }, { "epoch": 0.5767784513851585, "grad_norm": 0.4588552415370941, "learning_rate": 1.7914599361990128e-05, "loss": 0.0449, "step": 37580 }, { "epoch": 0.5769319315478474, "grad_norm": 0.3642082214355469, "learning_rate": 1.7912961779339317e-05, "loss": 0.0483, "step": 37590 }, { "epoch": 0.5770854117105364, "grad_norm": 0.5139023065567017, "learning_rate": 1.7911323628884693e-05, "loss": 0.0412, "step": 37600 }, { "epoch": 0.5772388918732254, "grad_norm": 0.33268511295318604, "learning_rate": 1.7909684910743806e-05, "loss": 0.0331, "step": 37610 }, { "epoch": 0.5773923720359143, "grad_norm": 0.46877220273017883, "learning_rate": 1.7908045625034247e-05, "loss": 0.0409, "step": 37620 }, { "epoch": 0.5775458521986033, "grad_norm": 0.7794274687767029, "learning_rate": 1.790640577187364e-05, "loss": 0.0457, "step": 37630 }, { "epoch": 0.5776993323612923, "grad_norm": 0.6089743971824646, "learning_rate": 1.7904765351379657e-05, "loss": 0.0421, "step": 37640 }, { "epoch": 0.5778528125239812, "grad_norm": 0.6676805019378662, "learning_rate": 1.790312436367001e-05, "loss": 0.0525, "step": 37650 }, { "epoch": 0.5780062926866703, "grad_norm": 0.5874137878417969, "learning_rate": 1.7901482808862446e-05, "loss": 0.0349, "step": 37660 }, { "epoch": 0.5781597728493593, "grad_norm": 0.7659218907356262, "learning_rate": 1.789984068707476e-05, "loss": 0.0449, "step": 37670 }, { "epoch": 0.5783132530120482, "grad_norm": 0.48265334963798523, "learning_rate": 1.789819799842478e-05, "loss": 0.0373, "step": 37680 }, { "epoch": 0.5784667331747372, "grad_norm": 0.5080066323280334, "learning_rate": 1.7896554743030388e-05, "loss": 0.0504, "step": 37690 }, { "epoch": 0.5786202133374262, "grad_norm": 0.47426825761795044, "learning_rate": 1.789491092100949e-05, "loss": 0.0342, "step": 37700 }, { "epoch": 0.5787736935001151, "grad_norm": 0.39079076051712036, "learning_rate": 1.7893266532480043e-05, "loss": 0.0425, "step": 37710 }, { "epoch": 0.5789271736628041, "grad_norm": 0.43220996856689453, "learning_rate": 1.7891621577560038e-05, "loss": 0.0441, "step": 37720 }, { "epoch": 0.579080653825493, "grad_norm": 0.4803939759731293, "learning_rate": 1.7889976056367516e-05, "loss": 0.044, "step": 37730 }, { "epoch": 0.579234133988182, "grad_norm": 0.5488793849945068, "learning_rate": 1.7888329969020554e-05, "loss": 0.0356, "step": 37740 }, { "epoch": 0.579387614150871, "grad_norm": 0.28948479890823364, "learning_rate": 1.7886683315637263e-05, "loss": 0.0348, "step": 37750 }, { "epoch": 0.5795410943135599, "grad_norm": 0.27534544467926025, "learning_rate": 1.7885036096335806e-05, "loss": 0.0403, "step": 37760 }, { "epoch": 0.579694574476249, "grad_norm": 0.32647132873535156, "learning_rate": 1.788338831123438e-05, "loss": 0.0326, "step": 37770 }, { "epoch": 0.579848054638938, "grad_norm": 0.5232931971549988, "learning_rate": 1.7881739960451217e-05, "loss": 0.0415, "step": 37780 }, { "epoch": 0.5800015348016269, "grad_norm": 0.32160091400146484, "learning_rate": 1.7880091044104606e-05, "loss": 0.0483, "step": 37790 }, { "epoch": 0.5801550149643159, "grad_norm": 0.31081679463386536, "learning_rate": 1.787844156231286e-05, "loss": 0.0519, "step": 37800 }, { "epoch": 0.5803084951270049, "grad_norm": 0.6329803466796875, "learning_rate": 1.7876791515194346e-05, "loss": 0.0338, "step": 37810 }, { "epoch": 0.5804619752896938, "grad_norm": 0.3397987186908722, "learning_rate": 1.787514090286746e-05, "loss": 0.0387, "step": 37820 }, { "epoch": 0.5806154554523828, "grad_norm": 0.4094519019126892, "learning_rate": 1.7873489725450642e-05, "loss": 0.0434, "step": 37830 }, { "epoch": 0.5807689356150717, "grad_norm": 0.40572643280029297, "learning_rate": 1.7871837983062385e-05, "loss": 0.0397, "step": 37840 }, { "epoch": 0.5809224157777607, "grad_norm": 0.42187121510505676, "learning_rate": 1.7870185675821198e-05, "loss": 0.0428, "step": 37850 }, { "epoch": 0.5810758959404497, "grad_norm": 0.2993626892566681, "learning_rate": 1.7868532803845652e-05, "loss": 0.0325, "step": 37860 }, { "epoch": 0.5812293761031386, "grad_norm": 0.2669340670108795, "learning_rate": 1.786687936725435e-05, "loss": 0.0356, "step": 37870 }, { "epoch": 0.5813828562658276, "grad_norm": 0.432591050863266, "learning_rate": 1.7865225366165934e-05, "loss": 0.0518, "step": 37880 }, { "epoch": 0.5815363364285167, "grad_norm": 0.45545271039009094, "learning_rate": 1.7863570800699087e-05, "loss": 0.0376, "step": 37890 }, { "epoch": 0.5816898165912056, "grad_norm": 0.40325334668159485, "learning_rate": 1.786191567097254e-05, "loss": 0.0429, "step": 37900 }, { "epoch": 0.5818432967538946, "grad_norm": 0.4323957860469818, "learning_rate": 1.7860259977105058e-05, "loss": 0.0368, "step": 37910 }, { "epoch": 0.5819967769165836, "grad_norm": 0.6132642030715942, "learning_rate": 1.7858603719215445e-05, "loss": 0.0416, "step": 37920 }, { "epoch": 0.5821502570792725, "grad_norm": 0.29664674401283264, "learning_rate": 1.7856946897422545e-05, "loss": 0.0411, "step": 37930 }, { "epoch": 0.5823037372419615, "grad_norm": 0.4030279219150543, "learning_rate": 1.7855289511845247e-05, "loss": 0.0392, "step": 37940 }, { "epoch": 0.5824572174046504, "grad_norm": 0.3226017653942108, "learning_rate": 1.7853631562602488e-05, "loss": 0.0546, "step": 37950 }, { "epoch": 0.5826106975673394, "grad_norm": 0.37476176023483276, "learning_rate": 1.785197304981322e-05, "loss": 0.0471, "step": 37960 }, { "epoch": 0.5827641777300284, "grad_norm": 0.42046406865119934, "learning_rate": 1.785031397359646e-05, "loss": 0.0508, "step": 37970 }, { "epoch": 0.5829176578927173, "grad_norm": 0.4044402539730072, "learning_rate": 1.784865433407126e-05, "loss": 0.037, "step": 37980 }, { "epoch": 0.5830711380554063, "grad_norm": 0.38511306047439575, "learning_rate": 1.7846994131356705e-05, "loss": 0.0343, "step": 37990 }, { "epoch": 0.5832246182180953, "grad_norm": 1.0003167390823364, "learning_rate": 1.7845333365571922e-05, "loss": 0.0593, "step": 38000 }, { "epoch": 0.5833780983807842, "grad_norm": 0.4069834053516388, "learning_rate": 1.784367203683609e-05, "loss": 0.0289, "step": 38010 }, { "epoch": 0.5835315785434733, "grad_norm": 0.4822530150413513, "learning_rate": 1.784201014526841e-05, "loss": 0.0521, "step": 38020 }, { "epoch": 0.5836850587061623, "grad_norm": 0.5932601094245911, "learning_rate": 1.784034769098814e-05, "loss": 0.0485, "step": 38030 }, { "epoch": 0.5838385388688512, "grad_norm": 0.4457421600818634, "learning_rate": 1.7838684674114568e-05, "loss": 0.039, "step": 38040 }, { "epoch": 0.5839920190315402, "grad_norm": 0.6489425301551819, "learning_rate": 1.7837021094767025e-05, "loss": 0.048, "step": 38050 }, { "epoch": 0.5841454991942292, "grad_norm": 0.4245728850364685, "learning_rate": 1.7835356953064886e-05, "loss": 0.04, "step": 38060 }, { "epoch": 0.5842989793569181, "grad_norm": 0.5900495052337646, "learning_rate": 1.783369224912756e-05, "loss": 0.04, "step": 38070 }, { "epoch": 0.5844524595196071, "grad_norm": 0.35709765553474426, "learning_rate": 1.7832026983074505e-05, "loss": 0.0433, "step": 38080 }, { "epoch": 0.584605939682296, "grad_norm": 0.25171446800231934, "learning_rate": 1.783036115502521e-05, "loss": 0.0408, "step": 38090 }, { "epoch": 0.584759419844985, "grad_norm": 0.38953256607055664, "learning_rate": 1.7828694765099208e-05, "loss": 0.0409, "step": 38100 }, { "epoch": 0.584912900007674, "grad_norm": 0.44900187849998474, "learning_rate": 1.7827027813416074e-05, "loss": 0.0467, "step": 38110 }, { "epoch": 0.5850663801703629, "grad_norm": 0.45213231444358826, "learning_rate": 1.782536030009542e-05, "loss": 0.0332, "step": 38120 }, { "epoch": 0.585219860333052, "grad_norm": 0.48176082968711853, "learning_rate": 1.7823692225256904e-05, "loss": 0.0407, "step": 38130 }, { "epoch": 0.585373340495741, "grad_norm": 0.4655252993106842, "learning_rate": 1.782202358902022e-05, "loss": 0.0441, "step": 38140 }, { "epoch": 0.5855268206584299, "grad_norm": 0.3001289963722229, "learning_rate": 1.78203543915051e-05, "loss": 0.0444, "step": 38150 }, { "epoch": 0.5856803008211189, "grad_norm": 0.6953039765357971, "learning_rate": 1.7818684632831324e-05, "loss": 0.0386, "step": 38160 }, { "epoch": 0.5858337809838079, "grad_norm": 0.3685106337070465, "learning_rate": 1.7817014313118703e-05, "loss": 0.0479, "step": 38170 }, { "epoch": 0.5859872611464968, "grad_norm": 0.6739587187767029, "learning_rate": 1.7815343432487094e-05, "loss": 0.0448, "step": 38180 }, { "epoch": 0.5861407413091858, "grad_norm": 1.3124676942825317, "learning_rate": 1.7813671991056395e-05, "loss": 0.0415, "step": 38190 }, { "epoch": 0.5862942214718747, "grad_norm": 0.4875018298625946, "learning_rate": 1.7811999988946537e-05, "loss": 0.0379, "step": 38200 }, { "epoch": 0.5864477016345637, "grad_norm": 0.46108201146125793, "learning_rate": 1.7810327426277505e-05, "loss": 0.0425, "step": 38210 }, { "epoch": 0.5866011817972527, "grad_norm": 0.6076374650001526, "learning_rate": 1.780865430316931e-05, "loss": 0.0443, "step": 38220 }, { "epoch": 0.5867546619599416, "grad_norm": 0.4982433617115021, "learning_rate": 1.7806980619742006e-05, "loss": 0.0354, "step": 38230 }, { "epoch": 0.5869081421226306, "grad_norm": 0.4737371802330017, "learning_rate": 1.7805306376115697e-05, "loss": 0.0475, "step": 38240 }, { "epoch": 0.5870616222853197, "grad_norm": 0.6345263123512268, "learning_rate": 1.7803631572410515e-05, "loss": 0.0448, "step": 38250 }, { "epoch": 0.5872151024480086, "grad_norm": 0.6478474736213684, "learning_rate": 1.780195620874664e-05, "loss": 0.0446, "step": 38260 }, { "epoch": 0.5873685826106976, "grad_norm": 0.4791342318058014, "learning_rate": 1.780028028524429e-05, "loss": 0.0382, "step": 38270 }, { "epoch": 0.5875220627733866, "grad_norm": 0.26437100768089294, "learning_rate": 1.779860380202372e-05, "loss": 0.0395, "step": 38280 }, { "epoch": 0.5876755429360755, "grad_norm": 0.5404361486434937, "learning_rate": 1.7796926759205236e-05, "loss": 0.042, "step": 38290 }, { "epoch": 0.5878290230987645, "grad_norm": 0.4786456525325775, "learning_rate": 1.7795249156909163e-05, "loss": 0.0408, "step": 38300 }, { "epoch": 0.5879825032614534, "grad_norm": 0.27616146206855774, "learning_rate": 1.7793570995255893e-05, "loss": 0.0451, "step": 38310 }, { "epoch": 0.5881359834241424, "grad_norm": 0.4340970814228058, "learning_rate": 1.7791892274365833e-05, "loss": 0.0397, "step": 38320 }, { "epoch": 0.5882894635868314, "grad_norm": 0.4050920605659485, "learning_rate": 1.779021299435945e-05, "loss": 0.039, "step": 38330 }, { "epoch": 0.5884429437495203, "grad_norm": 0.5008623003959656, "learning_rate": 1.7788533155357238e-05, "loss": 0.0431, "step": 38340 }, { "epoch": 0.5885964239122093, "grad_norm": 0.46972596645355225, "learning_rate": 1.7786852757479738e-05, "loss": 0.0392, "step": 38350 }, { "epoch": 0.5887499040748984, "grad_norm": 0.4700809121131897, "learning_rate": 1.7785171800847525e-05, "loss": 0.0413, "step": 38360 }, { "epoch": 0.5889033842375873, "grad_norm": 0.4625977873802185, "learning_rate": 1.7783490285581223e-05, "loss": 0.0431, "step": 38370 }, { "epoch": 0.5890568644002763, "grad_norm": 0.4152967631816864, "learning_rate": 1.778180821180149e-05, "loss": 0.0364, "step": 38380 }, { "epoch": 0.5892103445629653, "grad_norm": 0.3425879180431366, "learning_rate": 1.7780125579629024e-05, "loss": 0.0394, "step": 38390 }, { "epoch": 0.5893638247256542, "grad_norm": 0.5037339329719543, "learning_rate": 1.7778442389184562e-05, "loss": 0.0406, "step": 38400 }, { "epoch": 0.5895173048883432, "grad_norm": 0.6193504333496094, "learning_rate": 1.777675864058889e-05, "loss": 0.0486, "step": 38410 }, { "epoch": 0.5896707850510322, "grad_norm": 0.48017948865890503, "learning_rate": 1.7775074333962822e-05, "loss": 0.0376, "step": 38420 }, { "epoch": 0.5898242652137211, "grad_norm": 0.2976153790950775, "learning_rate": 1.7773389469427216e-05, "loss": 0.0357, "step": 38430 }, { "epoch": 0.5899777453764101, "grad_norm": 0.5069386959075928, "learning_rate": 1.777170404710298e-05, "loss": 0.0468, "step": 38440 }, { "epoch": 0.590131225539099, "grad_norm": 0.4648423492908478, "learning_rate": 1.777001806711104e-05, "loss": 0.0434, "step": 38450 }, { "epoch": 0.590284705701788, "grad_norm": 0.5237005949020386, "learning_rate": 1.7768331529572383e-05, "loss": 0.0534, "step": 38460 }, { "epoch": 0.590438185864477, "grad_norm": 0.4883895814418793, "learning_rate": 1.7766644434608035e-05, "loss": 0.0372, "step": 38470 }, { "epoch": 0.590591666027166, "grad_norm": 0.31031468510627747, "learning_rate": 1.7764956782339044e-05, "loss": 0.0357, "step": 38480 }, { "epoch": 0.590745146189855, "grad_norm": 0.3802495300769806, "learning_rate": 1.7763268572886515e-05, "loss": 0.0443, "step": 38490 }, { "epoch": 0.590898626352544, "grad_norm": 0.5037605166435242, "learning_rate": 1.7761579806371587e-05, "loss": 0.0367, "step": 38500 }, { "epoch": 0.5910521065152329, "grad_norm": 0.363386332988739, "learning_rate": 1.7759890482915444e-05, "loss": 0.0444, "step": 38510 }, { "epoch": 0.5912055866779219, "grad_norm": 0.4734555780887604, "learning_rate": 1.7758200602639296e-05, "loss": 0.0382, "step": 38520 }, { "epoch": 0.5913590668406109, "grad_norm": 0.24769997596740723, "learning_rate": 1.7756510165664404e-05, "loss": 0.0296, "step": 38530 }, { "epoch": 0.5915125470032998, "grad_norm": 0.4586979150772095, "learning_rate": 1.7754819172112074e-05, "loss": 0.0335, "step": 38540 }, { "epoch": 0.5916660271659888, "grad_norm": 0.2916174530982971, "learning_rate": 1.7753127622103638e-05, "loss": 0.0421, "step": 38550 }, { "epoch": 0.5918195073286777, "grad_norm": 0.33044010400772095, "learning_rate": 1.7751435515760482e-05, "loss": 0.0431, "step": 38560 }, { "epoch": 0.5919729874913667, "grad_norm": 0.560779869556427, "learning_rate": 1.7749742853204018e-05, "loss": 0.0521, "step": 38570 }, { "epoch": 0.5921264676540557, "grad_norm": 0.39424389600753784, "learning_rate": 1.7748049634555712e-05, "loss": 0.0463, "step": 38580 }, { "epoch": 0.5922799478167446, "grad_norm": 0.3461337089538574, "learning_rate": 1.774635585993706e-05, "loss": 0.0422, "step": 38590 }, { "epoch": 0.5924334279794337, "grad_norm": 0.5794724822044373, "learning_rate": 1.77446615294696e-05, "loss": 0.0419, "step": 38600 }, { "epoch": 0.5925869081421227, "grad_norm": 0.4147323668003082, "learning_rate": 1.774296664327491e-05, "loss": 0.0384, "step": 38610 }, { "epoch": 0.5927403883048116, "grad_norm": 0.3275068998336792, "learning_rate": 1.7741271201474606e-05, "loss": 0.0448, "step": 38620 }, { "epoch": 0.5928938684675006, "grad_norm": 0.43533211946487427, "learning_rate": 1.7739575204190354e-05, "loss": 0.0376, "step": 38630 }, { "epoch": 0.5930473486301896, "grad_norm": 0.42567598819732666, "learning_rate": 1.7737878651543847e-05, "loss": 0.0394, "step": 38640 }, { "epoch": 0.5932008287928785, "grad_norm": 0.5808573961257935, "learning_rate": 1.7736181543656824e-05, "loss": 0.0451, "step": 38650 }, { "epoch": 0.5933543089555675, "grad_norm": 0.4477894902229309, "learning_rate": 1.7734483880651063e-05, "loss": 0.034, "step": 38660 }, { "epoch": 0.5935077891182564, "grad_norm": 0.5152050256729126, "learning_rate": 1.7732785662648382e-05, "loss": 0.0341, "step": 38670 }, { "epoch": 0.5936612692809454, "grad_norm": 0.4415578544139862, "learning_rate": 1.7731086889770637e-05, "loss": 0.0391, "step": 38680 }, { "epoch": 0.5938147494436344, "grad_norm": 0.5899091958999634, "learning_rate": 1.772938756213973e-05, "loss": 0.0569, "step": 38690 }, { "epoch": 0.5939682296063233, "grad_norm": 0.6086992025375366, "learning_rate": 1.7727687679877593e-05, "loss": 0.0409, "step": 38700 }, { "epoch": 0.5941217097690124, "grad_norm": 0.32779133319854736, "learning_rate": 1.7725987243106205e-05, "loss": 0.0372, "step": 38710 }, { "epoch": 0.5942751899317014, "grad_norm": 0.5494503378868103, "learning_rate": 1.7724286251947586e-05, "loss": 0.0477, "step": 38720 }, { "epoch": 0.5944286700943903, "grad_norm": 0.5523680448532104, "learning_rate": 1.772258470652379e-05, "loss": 0.0456, "step": 38730 }, { "epoch": 0.5945821502570793, "grad_norm": 0.4819537401199341, "learning_rate": 1.7720882606956907e-05, "loss": 0.051, "step": 38740 }, { "epoch": 0.5947356304197683, "grad_norm": 0.37903761863708496, "learning_rate": 1.7719179953369085e-05, "loss": 0.0452, "step": 38750 }, { "epoch": 0.5948891105824572, "grad_norm": 0.4178241789340973, "learning_rate": 1.771747674588249e-05, "loss": 0.0423, "step": 38760 }, { "epoch": 0.5950425907451462, "grad_norm": 0.47412046790122986, "learning_rate": 1.7715772984619346e-05, "loss": 0.0482, "step": 38770 }, { "epoch": 0.5951960709078352, "grad_norm": 0.33240923285484314, "learning_rate": 1.7714068669701903e-05, "loss": 0.0504, "step": 38780 }, { "epoch": 0.5953495510705241, "grad_norm": 0.7087287306785583, "learning_rate": 1.7712363801252455e-05, "loss": 0.0351, "step": 38790 }, { "epoch": 0.5955030312332131, "grad_norm": 0.2718099057674408, "learning_rate": 1.771065837939334e-05, "loss": 0.043, "step": 38800 }, { "epoch": 0.595656511395902, "grad_norm": 0.3176419138908386, "learning_rate": 1.770895240424693e-05, "loss": 0.0445, "step": 38810 }, { "epoch": 0.595809991558591, "grad_norm": 0.37480980157852173, "learning_rate": 1.7707245875935645e-05, "loss": 0.046, "step": 38820 }, { "epoch": 0.5959634717212801, "grad_norm": 0.35465821623802185, "learning_rate": 1.7705538794581932e-05, "loss": 0.0364, "step": 38830 }, { "epoch": 0.596116951883969, "grad_norm": 0.3559640944004059, "learning_rate": 1.770383116030829e-05, "loss": 0.0409, "step": 38840 }, { "epoch": 0.596270432046658, "grad_norm": 0.5016882419586182, "learning_rate": 1.7702122973237246e-05, "loss": 0.0408, "step": 38850 }, { "epoch": 0.596423912209347, "grad_norm": 0.46309366822242737, "learning_rate": 1.770041423349138e-05, "loss": 0.0368, "step": 38860 }, { "epoch": 0.5965773923720359, "grad_norm": 0.46281757950782776, "learning_rate": 1.7698704941193295e-05, "loss": 0.0361, "step": 38870 }, { "epoch": 0.5967308725347249, "grad_norm": 0.44593027234077454, "learning_rate": 1.7696995096465658e-05, "loss": 0.0407, "step": 38880 }, { "epoch": 0.5968843526974139, "grad_norm": 0.32783082127571106, "learning_rate": 1.7695284699431147e-05, "loss": 0.0518, "step": 38890 }, { "epoch": 0.5970378328601028, "grad_norm": 0.37567606568336487, "learning_rate": 1.76935737502125e-05, "loss": 0.0407, "step": 38900 }, { "epoch": 0.5971913130227918, "grad_norm": 0.41337326169013977, "learning_rate": 1.7691862248932485e-05, "loss": 0.0411, "step": 38910 }, { "epoch": 0.5973447931854807, "grad_norm": 0.4278438687324524, "learning_rate": 1.769015019571392e-05, "loss": 0.0387, "step": 38920 }, { "epoch": 0.5974982733481697, "grad_norm": 0.4824163317680359, "learning_rate": 1.7688437590679645e-05, "loss": 0.0334, "step": 38930 }, { "epoch": 0.5976517535108588, "grad_norm": 0.3880116641521454, "learning_rate": 1.7686724433952557e-05, "loss": 0.0376, "step": 38940 }, { "epoch": 0.5978052336735477, "grad_norm": 0.38739073276519775, "learning_rate": 1.7685010725655587e-05, "loss": 0.0392, "step": 38950 }, { "epoch": 0.5979587138362367, "grad_norm": 0.3169502317905426, "learning_rate": 1.7683296465911695e-05, "loss": 0.0418, "step": 38960 }, { "epoch": 0.5981121939989257, "grad_norm": 0.3074110746383667, "learning_rate": 1.7681581654843904e-05, "loss": 0.0332, "step": 38970 }, { "epoch": 0.5982656741616146, "grad_norm": 0.5019174814224243, "learning_rate": 1.7679866292575246e-05, "loss": 0.0383, "step": 38980 }, { "epoch": 0.5984191543243036, "grad_norm": 0.3209587633609772, "learning_rate": 1.767815037922882e-05, "loss": 0.0332, "step": 38990 }, { "epoch": 0.5985726344869926, "grad_norm": 0.3680361211299896, "learning_rate": 1.767643391492775e-05, "loss": 0.0451, "step": 39000 }, { "epoch": 0.5987261146496815, "grad_norm": 0.4938565194606781, "learning_rate": 1.7674716899795205e-05, "loss": 0.0466, "step": 39010 }, { "epoch": 0.5988795948123705, "grad_norm": 0.38012343645095825, "learning_rate": 1.7672999333954387e-05, "loss": 0.0411, "step": 39020 }, { "epoch": 0.5990330749750594, "grad_norm": 0.6060382723808289, "learning_rate": 1.7671281217528543e-05, "loss": 0.0367, "step": 39030 }, { "epoch": 0.5991865551377484, "grad_norm": 0.3525732159614563, "learning_rate": 1.766956255064096e-05, "loss": 0.0495, "step": 39040 }, { "epoch": 0.5993400353004374, "grad_norm": 0.5956245064735413, "learning_rate": 1.7667843333414968e-05, "loss": 0.0466, "step": 39050 }, { "epoch": 0.5994935154631263, "grad_norm": 0.41227248311042786, "learning_rate": 1.766612356597392e-05, "loss": 0.0344, "step": 39060 }, { "epoch": 0.5996469956258154, "grad_norm": 0.329360693693161, "learning_rate": 1.766440324844123e-05, "loss": 0.0458, "step": 39070 }, { "epoch": 0.5998004757885044, "grad_norm": 0.39940646290779114, "learning_rate": 1.7662682380940336e-05, "loss": 0.0374, "step": 39080 }, { "epoch": 0.5999539559511933, "grad_norm": 0.6282939910888672, "learning_rate": 1.7660960963594723e-05, "loss": 0.048, "step": 39090 }, { "epoch": 0.6001074361138823, "grad_norm": 0.3703835904598236, "learning_rate": 1.765923899652791e-05, "loss": 0.0306, "step": 39100 }, { "epoch": 0.6002609162765713, "grad_norm": 0.4716401696205139, "learning_rate": 1.7657516479863462e-05, "loss": 0.0465, "step": 39110 }, { "epoch": 0.6004143964392602, "grad_norm": 0.38524505496025085, "learning_rate": 1.765579341372498e-05, "loss": 0.036, "step": 39120 }, { "epoch": 0.6005678766019492, "grad_norm": 0.33016276359558105, "learning_rate": 1.7654069798236107e-05, "loss": 0.0374, "step": 39130 }, { "epoch": 0.6007213567646382, "grad_norm": 0.4461848735809326, "learning_rate": 1.765234563352052e-05, "loss": 0.0406, "step": 39140 }, { "epoch": 0.6008748369273271, "grad_norm": 0.48444613814353943, "learning_rate": 1.7650620919701938e-05, "loss": 0.0568, "step": 39150 }, { "epoch": 0.6010283170900161, "grad_norm": 0.284997820854187, "learning_rate": 1.7648895656904116e-05, "loss": 0.0344, "step": 39160 }, { "epoch": 0.601181797252705, "grad_norm": 0.4362579882144928, "learning_rate": 1.7647169845250866e-05, "loss": 0.0413, "step": 39170 }, { "epoch": 0.601335277415394, "grad_norm": 0.6063703894615173, "learning_rate": 1.764544348486601e-05, "loss": 0.0452, "step": 39180 }, { "epoch": 0.6014887575780831, "grad_norm": 0.3897307515144348, "learning_rate": 1.764371657587343e-05, "loss": 0.037, "step": 39190 }, { "epoch": 0.601642237740772, "grad_norm": 0.38463300466537476, "learning_rate": 1.7641989118397047e-05, "loss": 0.032, "step": 39200 }, { "epoch": 0.601795717903461, "grad_norm": 0.7809932827949524, "learning_rate": 1.7640261112560814e-05, "loss": 0.0469, "step": 39210 }, { "epoch": 0.60194919806615, "grad_norm": 0.5127362608909607, "learning_rate": 1.7638532558488726e-05, "loss": 0.049, "step": 39220 }, { "epoch": 0.6021026782288389, "grad_norm": 0.5601686239242554, "learning_rate": 1.7636803456304816e-05, "loss": 0.0491, "step": 39230 }, { "epoch": 0.6022561583915279, "grad_norm": 0.5025057792663574, "learning_rate": 1.7635073806133158e-05, "loss": 0.0536, "step": 39240 }, { "epoch": 0.6024096385542169, "grad_norm": 0.413723886013031, "learning_rate": 1.7633343608097865e-05, "loss": 0.0362, "step": 39250 }, { "epoch": 0.6025631187169058, "grad_norm": 0.33228617906570435, "learning_rate": 1.7631612862323093e-05, "loss": 0.0487, "step": 39260 }, { "epoch": 0.6027165988795948, "grad_norm": 0.3872368633747101, "learning_rate": 1.762988156893303e-05, "loss": 0.0495, "step": 39270 }, { "epoch": 0.6028700790422837, "grad_norm": 0.36343565583229065, "learning_rate": 1.762814972805191e-05, "loss": 0.0503, "step": 39280 }, { "epoch": 0.6030235592049727, "grad_norm": 0.44725024700164795, "learning_rate": 1.7626417339803995e-05, "loss": 0.0318, "step": 39290 }, { "epoch": 0.6031770393676618, "grad_norm": 0.5055866241455078, "learning_rate": 1.7624684404313607e-05, "loss": 0.0476, "step": 39300 }, { "epoch": 0.6033305195303507, "grad_norm": 0.4977017045021057, "learning_rate": 1.7622950921705085e-05, "loss": 0.0429, "step": 39310 }, { "epoch": 0.6034839996930397, "grad_norm": 0.5949932336807251, "learning_rate": 1.7621216892102825e-05, "loss": 0.0515, "step": 39320 }, { "epoch": 0.6036374798557287, "grad_norm": 0.6628060936927795, "learning_rate": 1.7619482315631243e-05, "loss": 0.0437, "step": 39330 }, { "epoch": 0.6037909600184176, "grad_norm": 0.4400663375854492, "learning_rate": 1.7617747192414817e-05, "loss": 0.0421, "step": 39340 }, { "epoch": 0.6039444401811066, "grad_norm": 0.4196866750717163, "learning_rate": 1.7616011522578046e-05, "loss": 0.0401, "step": 39350 }, { "epoch": 0.6040979203437956, "grad_norm": 0.6097036600112915, "learning_rate": 1.761427530624548e-05, "loss": 0.0368, "step": 39360 }, { "epoch": 0.6042514005064845, "grad_norm": 0.41789889335632324, "learning_rate": 1.7612538543541698e-05, "loss": 0.0379, "step": 39370 }, { "epoch": 0.6044048806691735, "grad_norm": 0.43160369992256165, "learning_rate": 1.761080123459133e-05, "loss": 0.053, "step": 39380 }, { "epoch": 0.6045583608318624, "grad_norm": 0.5876190066337585, "learning_rate": 1.760906337951903e-05, "loss": 0.0372, "step": 39390 }, { "epoch": 0.6047118409945514, "grad_norm": 0.5335355401039124, "learning_rate": 1.7607324978449508e-05, "loss": 0.0526, "step": 39400 }, { "epoch": 0.6048653211572405, "grad_norm": 0.6194691061973572, "learning_rate": 1.76055860315075e-05, "loss": 0.0408, "step": 39410 }, { "epoch": 0.6050188013199294, "grad_norm": 0.35123148560523987, "learning_rate": 1.7603846538817785e-05, "loss": 0.0327, "step": 39420 }, { "epoch": 0.6051722814826184, "grad_norm": 0.9839013814926147, "learning_rate": 1.7602106500505187e-05, "loss": 0.0528, "step": 39430 }, { "epoch": 0.6053257616453074, "grad_norm": 0.4602562487125397, "learning_rate": 1.7600365916694562e-05, "loss": 0.0371, "step": 39440 }, { "epoch": 0.6054792418079963, "grad_norm": 0.4109751582145691, "learning_rate": 1.759862478751081e-05, "loss": 0.0411, "step": 39450 }, { "epoch": 0.6056327219706853, "grad_norm": 0.42336681485176086, "learning_rate": 1.7596883113078866e-05, "loss": 0.0377, "step": 39460 }, { "epoch": 0.6057862021333743, "grad_norm": 0.3324354887008667, "learning_rate": 1.7595140893523702e-05, "loss": 0.0456, "step": 39470 }, { "epoch": 0.6059396822960632, "grad_norm": 0.3360876739025116, "learning_rate": 1.759339812897034e-05, "loss": 0.0337, "step": 39480 }, { "epoch": 0.6060931624587522, "grad_norm": 0.5581541657447815, "learning_rate": 1.7591654819543825e-05, "loss": 0.039, "step": 39490 }, { "epoch": 0.6062466426214412, "grad_norm": 0.5278341174125671, "learning_rate": 1.758991096536926e-05, "loss": 0.0372, "step": 39500 }, { "epoch": 0.6064001227841301, "grad_norm": 0.31290099024772644, "learning_rate": 1.7588166566571776e-05, "loss": 0.0391, "step": 39510 }, { "epoch": 0.6065536029468191, "grad_norm": 0.46223554015159607, "learning_rate": 1.7586421623276542e-05, "loss": 0.0419, "step": 39520 }, { "epoch": 0.606707083109508, "grad_norm": 0.6173921823501587, "learning_rate": 1.7584676135608764e-05, "loss": 0.0434, "step": 39530 }, { "epoch": 0.6068605632721971, "grad_norm": 0.24783176183700562, "learning_rate": 1.7582930103693695e-05, "loss": 0.0321, "step": 39540 }, { "epoch": 0.6070140434348861, "grad_norm": 0.322041392326355, "learning_rate": 1.7581183527656627e-05, "loss": 0.0545, "step": 39550 }, { "epoch": 0.607167523597575, "grad_norm": 0.365784227848053, "learning_rate": 1.7579436407622885e-05, "loss": 0.0322, "step": 39560 }, { "epoch": 0.607321003760264, "grad_norm": 0.4868828356266022, "learning_rate": 1.757768874371783e-05, "loss": 0.0375, "step": 39570 }, { "epoch": 0.607474483922953, "grad_norm": 0.35593685507774353, "learning_rate": 1.757594053606688e-05, "loss": 0.0402, "step": 39580 }, { "epoch": 0.6076279640856419, "grad_norm": 0.5516595840454102, "learning_rate": 1.757419178479547e-05, "loss": 0.0405, "step": 39590 }, { "epoch": 0.6077814442483309, "grad_norm": 0.39520853757858276, "learning_rate": 1.7572442490029085e-05, "loss": 0.0315, "step": 39600 }, { "epoch": 0.6079349244110199, "grad_norm": 0.34805068373680115, "learning_rate": 1.7570692651893252e-05, "loss": 0.0519, "step": 39610 }, { "epoch": 0.6080884045737088, "grad_norm": 0.6191904544830322, "learning_rate": 1.7568942270513526e-05, "loss": 0.0519, "step": 39620 }, { "epoch": 0.6082418847363978, "grad_norm": 0.5899694561958313, "learning_rate": 1.7567191346015512e-05, "loss": 0.0463, "step": 39630 }, { "epoch": 0.6083953648990867, "grad_norm": 0.5793687701225281, "learning_rate": 1.756543987852485e-05, "loss": 0.0544, "step": 39640 }, { "epoch": 0.6085488450617758, "grad_norm": 0.4509589672088623, "learning_rate": 1.756368786816722e-05, "loss": 0.0467, "step": 39650 }, { "epoch": 0.6087023252244648, "grad_norm": 0.41967064142227173, "learning_rate": 1.7561935315068336e-05, "loss": 0.0418, "step": 39660 }, { "epoch": 0.6088558053871537, "grad_norm": 0.4276578426361084, "learning_rate": 1.7560182219353953e-05, "loss": 0.0362, "step": 39670 }, { "epoch": 0.6090092855498427, "grad_norm": 0.36141204833984375, "learning_rate": 1.7558428581149872e-05, "loss": 0.0375, "step": 39680 }, { "epoch": 0.6091627657125317, "grad_norm": 0.3784090280532837, "learning_rate": 1.7556674400581925e-05, "loss": 0.038, "step": 39690 }, { "epoch": 0.6093162458752206, "grad_norm": 0.4013544023036957, "learning_rate": 1.7554919677775983e-05, "loss": 0.0327, "step": 39700 }, { "epoch": 0.6094697260379096, "grad_norm": 0.3786754310131073, "learning_rate": 1.7553164412857958e-05, "loss": 0.0326, "step": 39710 }, { "epoch": 0.6096232062005986, "grad_norm": 0.4568442404270172, "learning_rate": 1.755140860595381e-05, "loss": 0.0413, "step": 39720 }, { "epoch": 0.6097766863632875, "grad_norm": 0.4943276047706604, "learning_rate": 1.7549652257189514e-05, "loss": 0.0452, "step": 39730 }, { "epoch": 0.6099301665259765, "grad_norm": 0.6805182695388794, "learning_rate": 1.754789536669111e-05, "loss": 0.0491, "step": 39740 }, { "epoch": 0.6100836466886654, "grad_norm": 0.4657455086708069, "learning_rate": 1.754613793458466e-05, "loss": 0.0418, "step": 39750 }, { "epoch": 0.6102371268513544, "grad_norm": 0.6322004795074463, "learning_rate": 1.7544379960996277e-05, "loss": 0.0443, "step": 39760 }, { "epoch": 0.6103906070140435, "grad_norm": 0.45861107110977173, "learning_rate": 1.7542621446052105e-05, "loss": 0.0321, "step": 39770 }, { "epoch": 0.6105440871767324, "grad_norm": 0.35273441672325134, "learning_rate": 1.754086238987832e-05, "loss": 0.051, "step": 39780 }, { "epoch": 0.6106975673394214, "grad_norm": 0.28114140033721924, "learning_rate": 1.7539102792601155e-05, "loss": 0.0301, "step": 39790 }, { "epoch": 0.6108510475021104, "grad_norm": 0.38055598735809326, "learning_rate": 1.7537342654346864e-05, "loss": 0.0439, "step": 39800 }, { "epoch": 0.6110045276647993, "grad_norm": 0.4712681472301483, "learning_rate": 1.7535581975241755e-05, "loss": 0.0411, "step": 39810 }, { "epoch": 0.6111580078274883, "grad_norm": 0.5416804552078247, "learning_rate": 1.7533820755412164e-05, "loss": 0.0377, "step": 39820 }, { "epoch": 0.6113114879901773, "grad_norm": 0.505106508731842, "learning_rate": 1.753205899498447e-05, "loss": 0.0481, "step": 39830 }, { "epoch": 0.6114649681528662, "grad_norm": 0.2694770395755768, "learning_rate": 1.753029669408509e-05, "loss": 0.0456, "step": 39840 }, { "epoch": 0.6116184483155552, "grad_norm": 0.5118029117584229, "learning_rate": 1.7528533852840476e-05, "loss": 0.039, "step": 39850 }, { "epoch": 0.6117719284782442, "grad_norm": 0.43549954891204834, "learning_rate": 1.7526770471377133e-05, "loss": 0.0399, "step": 39860 }, { "epoch": 0.6119254086409331, "grad_norm": 0.7624979019165039, "learning_rate": 1.7525006549821584e-05, "loss": 0.0485, "step": 39870 }, { "epoch": 0.6120788888036222, "grad_norm": 0.3580288887023926, "learning_rate": 1.7523242088300407e-05, "loss": 0.0348, "step": 39880 }, { "epoch": 0.6122323689663111, "grad_norm": 0.5715909004211426, "learning_rate": 1.752147708694021e-05, "loss": 0.0367, "step": 39890 }, { "epoch": 0.6123858491290001, "grad_norm": 0.45387372374534607, "learning_rate": 1.7519711545867643e-05, "loss": 0.036, "step": 39900 }, { "epoch": 0.6125393292916891, "grad_norm": 0.508645236492157, "learning_rate": 1.7517945465209397e-05, "loss": 0.0421, "step": 39910 }, { "epoch": 0.612692809454378, "grad_norm": 0.43791458010673523, "learning_rate": 1.7516178845092198e-05, "loss": 0.0378, "step": 39920 }, { "epoch": 0.612846289617067, "grad_norm": 0.34003788232803345, "learning_rate": 1.7514411685642813e-05, "loss": 0.0364, "step": 39930 }, { "epoch": 0.612999769779756, "grad_norm": 0.7766522169113159, "learning_rate": 1.7512643986988042e-05, "loss": 0.0327, "step": 39940 }, { "epoch": 0.6131532499424449, "grad_norm": 0.47654733061790466, "learning_rate": 1.7510875749254734e-05, "loss": 0.0443, "step": 39950 }, { "epoch": 0.6133067301051339, "grad_norm": 0.40339258313179016, "learning_rate": 1.7509106972569764e-05, "loss": 0.0398, "step": 39960 }, { "epoch": 0.6134602102678229, "grad_norm": 0.54220050573349, "learning_rate": 1.750733765706006e-05, "loss": 0.041, "step": 39970 }, { "epoch": 0.6136136904305118, "grad_norm": 0.48700085282325745, "learning_rate": 1.7505567802852583e-05, "loss": 0.0369, "step": 39980 }, { "epoch": 0.6137671705932009, "grad_norm": 0.7093506455421448, "learning_rate": 1.750379741007432e-05, "loss": 0.0368, "step": 39990 }, { "epoch": 0.6139206507558898, "grad_norm": 0.42164576053619385, "learning_rate": 1.750202647885232e-05, "loss": 0.0355, "step": 40000 }, { "epoch": 0.6140741309185788, "grad_norm": 0.3159472644329071, "learning_rate": 1.7500255009313645e-05, "loss": 0.0345, "step": 40010 }, { "epoch": 0.6142276110812678, "grad_norm": 0.35220304131507874, "learning_rate": 1.749848300158542e-05, "loss": 0.0423, "step": 40020 }, { "epoch": 0.6143810912439567, "grad_norm": 0.2552098333835602, "learning_rate": 1.7496710455794794e-05, "loss": 0.0337, "step": 40030 }, { "epoch": 0.6145345714066457, "grad_norm": 0.4381023943424225, "learning_rate": 1.7494937372068955e-05, "loss": 0.0436, "step": 40040 }, { "epoch": 0.6146880515693347, "grad_norm": 0.4714812934398651, "learning_rate": 1.7493163750535138e-05, "loss": 0.0322, "step": 40050 }, { "epoch": 0.6148415317320236, "grad_norm": 0.44102734327316284, "learning_rate": 1.749138959132061e-05, "loss": 0.0378, "step": 40060 }, { "epoch": 0.6149950118947126, "grad_norm": 0.5538018345832825, "learning_rate": 1.7489614894552672e-05, "loss": 0.0407, "step": 40070 }, { "epoch": 0.6151484920574016, "grad_norm": 0.405129611492157, "learning_rate": 1.7487839660358675e-05, "loss": 0.0521, "step": 40080 }, { "epoch": 0.6153019722200905, "grad_norm": 0.42006105184555054, "learning_rate": 1.7486063888866004e-05, "loss": 0.0394, "step": 40090 }, { "epoch": 0.6154554523827795, "grad_norm": 0.502534031867981, "learning_rate": 1.7484287580202077e-05, "loss": 0.0445, "step": 40100 }, { "epoch": 0.6156089325454684, "grad_norm": 0.3837631344795227, "learning_rate": 1.748251073449436e-05, "loss": 0.0325, "step": 40110 }, { "epoch": 0.6157624127081575, "grad_norm": 0.5322748422622681, "learning_rate": 1.7480733351870348e-05, "loss": 0.0471, "step": 40120 }, { "epoch": 0.6159158928708465, "grad_norm": 0.5316237807273865, "learning_rate": 1.7478955432457584e-05, "loss": 0.0468, "step": 40130 }, { "epoch": 0.6160693730335354, "grad_norm": 0.39482995867729187, "learning_rate": 1.7477176976383637e-05, "loss": 0.0467, "step": 40140 }, { "epoch": 0.6162228531962244, "grad_norm": 0.37655702233314514, "learning_rate": 1.747539798377613e-05, "loss": 0.0403, "step": 40150 }, { "epoch": 0.6163763333589134, "grad_norm": 0.475249707698822, "learning_rate": 1.7473618454762717e-05, "loss": 0.0412, "step": 40160 }, { "epoch": 0.6165298135216023, "grad_norm": 0.7852820754051208, "learning_rate": 1.7471838389471084e-05, "loss": 0.0422, "step": 40170 }, { "epoch": 0.6166832936842913, "grad_norm": 0.43008872866630554, "learning_rate": 1.7470057788028967e-05, "loss": 0.0389, "step": 40180 }, { "epoch": 0.6168367738469803, "grad_norm": 0.5865294933319092, "learning_rate": 1.7468276650564127e-05, "loss": 0.0356, "step": 40190 }, { "epoch": 0.6169902540096692, "grad_norm": 0.44351449608802795, "learning_rate": 1.7466494977204384e-05, "loss": 0.0472, "step": 40200 }, { "epoch": 0.6171437341723582, "grad_norm": 0.6314511895179749, "learning_rate": 1.7464712768077576e-05, "loss": 0.0405, "step": 40210 }, { "epoch": 0.6172972143350471, "grad_norm": 0.4717505872249603, "learning_rate": 1.7462930023311587e-05, "loss": 0.0446, "step": 40220 }, { "epoch": 0.6174506944977362, "grad_norm": 0.5620492696762085, "learning_rate": 1.7461146743034344e-05, "loss": 0.0418, "step": 40230 }, { "epoch": 0.6176041746604252, "grad_norm": 0.42453819513320923, "learning_rate": 1.7459362927373806e-05, "loss": 0.0321, "step": 40240 }, { "epoch": 0.6177576548231141, "grad_norm": 0.41134387254714966, "learning_rate": 1.7457578576457973e-05, "loss": 0.0349, "step": 40250 }, { "epoch": 0.6179111349858031, "grad_norm": 0.5788951516151428, "learning_rate": 1.7455793690414882e-05, "loss": 0.0433, "step": 40260 }, { "epoch": 0.6180646151484921, "grad_norm": 0.48373979330062866, "learning_rate": 1.7454008269372613e-05, "loss": 0.0439, "step": 40270 }, { "epoch": 0.618218095311181, "grad_norm": 0.4640970528125763, "learning_rate": 1.745222231345928e-05, "loss": 0.0433, "step": 40280 }, { "epoch": 0.61837157547387, "grad_norm": 0.507531464099884, "learning_rate": 1.745043582280303e-05, "loss": 0.0519, "step": 40290 }, { "epoch": 0.618525055636559, "grad_norm": 0.46986547112464905, "learning_rate": 1.7448648797532063e-05, "loss": 0.0401, "step": 40300 }, { "epoch": 0.6186785357992479, "grad_norm": 0.6358267068862915, "learning_rate": 1.7446861237774607e-05, "loss": 0.0417, "step": 40310 }, { "epoch": 0.6188320159619369, "grad_norm": 0.42817866802215576, "learning_rate": 1.744507314365893e-05, "loss": 0.0451, "step": 40320 }, { "epoch": 0.618985496124626, "grad_norm": 0.4084489643573761, "learning_rate": 1.7443284515313333e-05, "loss": 0.0487, "step": 40330 }, { "epoch": 0.6191389762873148, "grad_norm": 0.3541419804096222, "learning_rate": 1.7441495352866172e-05, "loss": 0.05, "step": 40340 }, { "epoch": 0.6192924564500039, "grad_norm": 0.3972551226615906, "learning_rate": 1.743970565644582e-05, "loss": 0.0367, "step": 40350 }, { "epoch": 0.6194459366126928, "grad_norm": 0.48450255393981934, "learning_rate": 1.743791542618071e-05, "loss": 0.0452, "step": 40360 }, { "epoch": 0.6195994167753818, "grad_norm": 0.33432185649871826, "learning_rate": 1.743612466219929e-05, "loss": 0.0337, "step": 40370 }, { "epoch": 0.6197528969380708, "grad_norm": 0.4885261058807373, "learning_rate": 1.7434333364630066e-05, "loss": 0.0648, "step": 40380 }, { "epoch": 0.6199063771007597, "grad_norm": 0.5901548266410828, "learning_rate": 1.7432541533601573e-05, "loss": 0.0437, "step": 40390 }, { "epoch": 0.6200598572634487, "grad_norm": 0.5080252289772034, "learning_rate": 1.7430749169242383e-05, "loss": 0.0476, "step": 40400 }, { "epoch": 0.6202133374261377, "grad_norm": 0.3603161573410034, "learning_rate": 1.7428956271681113e-05, "loss": 0.0368, "step": 40410 }, { "epoch": 0.6203668175888266, "grad_norm": 0.3073952794075012, "learning_rate": 1.7427162841046414e-05, "loss": 0.0341, "step": 40420 }, { "epoch": 0.6205202977515156, "grad_norm": 0.3390504717826843, "learning_rate": 1.7425368877466977e-05, "loss": 0.0334, "step": 40430 }, { "epoch": 0.6206737779142046, "grad_norm": 0.3451354205608368, "learning_rate": 1.7423574381071526e-05, "loss": 0.0399, "step": 40440 }, { "epoch": 0.6208272580768935, "grad_norm": 0.4077194035053253, "learning_rate": 1.7421779351988826e-05, "loss": 0.039, "step": 40450 }, { "epoch": 0.6209807382395826, "grad_norm": 0.5308136940002441, "learning_rate": 1.741998379034769e-05, "loss": 0.0393, "step": 40460 }, { "epoch": 0.6211342184022715, "grad_norm": 0.44610053300857544, "learning_rate": 1.741818769627695e-05, "loss": 0.0457, "step": 40470 }, { "epoch": 0.6212876985649605, "grad_norm": 0.3223002552986145, "learning_rate": 1.7416391069905494e-05, "loss": 0.0415, "step": 40480 }, { "epoch": 0.6214411787276495, "grad_norm": 0.3578481078147888, "learning_rate": 1.741459391136224e-05, "loss": 0.0415, "step": 40490 }, { "epoch": 0.6215946588903384, "grad_norm": 0.7143754363059998, "learning_rate": 1.7412796220776142e-05, "loss": 0.0411, "step": 40500 }, { "epoch": 0.6217481390530274, "grad_norm": 0.29378095269203186, "learning_rate": 1.7410997998276198e-05, "loss": 0.0493, "step": 40510 }, { "epoch": 0.6219016192157164, "grad_norm": 0.8608745336532593, "learning_rate": 1.7409199243991445e-05, "loss": 0.0368, "step": 40520 }, { "epoch": 0.6220550993784053, "grad_norm": 0.43341919779777527, "learning_rate": 1.7407399958050946e-05, "loss": 0.0356, "step": 40530 }, { "epoch": 0.6222085795410943, "grad_norm": 1.2405757904052734, "learning_rate": 1.7405600140583815e-05, "loss": 0.0428, "step": 40540 }, { "epoch": 0.6223620597037833, "grad_norm": 0.30158671736717224, "learning_rate": 1.7403799791719204e-05, "loss": 0.0384, "step": 40550 }, { "epoch": 0.6225155398664722, "grad_norm": 0.4494445323944092, "learning_rate": 1.7401998911586292e-05, "loss": 0.0391, "step": 40560 }, { "epoch": 0.6226690200291612, "grad_norm": 0.5560293793678284, "learning_rate": 1.740019750031431e-05, "loss": 0.0466, "step": 40570 }, { "epoch": 0.6228225001918501, "grad_norm": 0.326537549495697, "learning_rate": 1.7398395558032518e-05, "loss": 0.0413, "step": 40580 }, { "epoch": 0.6229759803545392, "grad_norm": 0.31168046593666077, "learning_rate": 1.7396593084870214e-05, "loss": 0.044, "step": 40590 }, { "epoch": 0.6231294605172282, "grad_norm": 0.4307752251625061, "learning_rate": 1.7394790080956737e-05, "loss": 0.0412, "step": 40600 }, { "epoch": 0.6232829406799171, "grad_norm": 0.3958381414413452, "learning_rate": 1.7392986546421472e-05, "loss": 0.0405, "step": 40610 }, { "epoch": 0.6234364208426061, "grad_norm": 0.5203395485877991, "learning_rate": 1.739118248139382e-05, "loss": 0.0454, "step": 40620 }, { "epoch": 0.6235899010052951, "grad_norm": 0.5881877541542053, "learning_rate": 1.7389377886003243e-05, "loss": 0.0377, "step": 40630 }, { "epoch": 0.623743381167984, "grad_norm": 0.48636895418167114, "learning_rate": 1.738757276037923e-05, "loss": 0.0383, "step": 40640 }, { "epoch": 0.623896861330673, "grad_norm": 0.36250296235084534, "learning_rate": 1.738576710465131e-05, "loss": 0.0358, "step": 40650 }, { "epoch": 0.624050341493362, "grad_norm": 0.2482564002275467, "learning_rate": 1.7383960918949045e-05, "loss": 0.0377, "step": 40660 }, { "epoch": 0.6242038216560509, "grad_norm": 0.4865724444389343, "learning_rate": 1.738215420340205e-05, "loss": 0.0375, "step": 40670 }, { "epoch": 0.6243573018187399, "grad_norm": 0.4913530647754669, "learning_rate": 1.738034695813996e-05, "loss": 0.0409, "step": 40680 }, { "epoch": 0.624510781981429, "grad_norm": 0.3540717363357544, "learning_rate": 1.7378539183292454e-05, "loss": 0.0353, "step": 40690 }, { "epoch": 0.6246642621441179, "grad_norm": 0.5891221761703491, "learning_rate": 1.7376730878989262e-05, "loss": 0.0314, "step": 40700 }, { "epoch": 0.6248177423068069, "grad_norm": 0.3969140946865082, "learning_rate": 1.7374922045360128e-05, "loss": 0.0366, "step": 40710 }, { "epoch": 0.6249712224694958, "grad_norm": 0.4390484392642975, "learning_rate": 1.7373112682534854e-05, "loss": 0.0421, "step": 40720 }, { "epoch": 0.6251247026321848, "grad_norm": 0.5203665494918823, "learning_rate": 1.7371302790643277e-05, "loss": 0.0398, "step": 40730 }, { "epoch": 0.6252781827948738, "grad_norm": 0.5091445446014404, "learning_rate": 1.736949236981526e-05, "loss": 0.0517, "step": 40740 }, { "epoch": 0.6254316629575627, "grad_norm": 0.6283219456672668, "learning_rate": 1.7367681420180717e-05, "loss": 0.0523, "step": 40750 }, { "epoch": 0.6255851431202517, "grad_norm": 0.37291309237480164, "learning_rate": 1.736586994186959e-05, "loss": 0.0417, "step": 40760 }, { "epoch": 0.6257386232829407, "grad_norm": 0.4243389070034027, "learning_rate": 1.7364057935011865e-05, "loss": 0.0365, "step": 40770 }, { "epoch": 0.6258921034456296, "grad_norm": 0.3748776614665985, "learning_rate": 1.736224539973757e-05, "loss": 0.0311, "step": 40780 }, { "epoch": 0.6260455836083186, "grad_norm": 0.4031694829463959, "learning_rate": 1.736043233617676e-05, "loss": 0.0321, "step": 40790 }, { "epoch": 0.6261990637710076, "grad_norm": 0.6249174475669861, "learning_rate": 1.7358618744459534e-05, "loss": 0.0332, "step": 40800 }, { "epoch": 0.6263525439336965, "grad_norm": 0.5883401036262512, "learning_rate": 1.7356804624716032e-05, "loss": 0.0503, "step": 40810 }, { "epoch": 0.6265060240963856, "grad_norm": 0.5052503943443298, "learning_rate": 1.735498997707642e-05, "loss": 0.0397, "step": 40820 }, { "epoch": 0.6266595042590745, "grad_norm": 0.40384185314178467, "learning_rate": 1.7353174801670925e-05, "loss": 0.0476, "step": 40830 }, { "epoch": 0.6268129844217635, "grad_norm": 0.6366856098175049, "learning_rate": 1.735135909862978e-05, "loss": 0.0432, "step": 40840 }, { "epoch": 0.6269664645844525, "grad_norm": 0.5043630599975586, "learning_rate": 1.7349542868083287e-05, "loss": 0.0519, "step": 40850 }, { "epoch": 0.6271199447471414, "grad_norm": 0.43499472737312317, "learning_rate": 1.7347726110161766e-05, "loss": 0.045, "step": 40860 }, { "epoch": 0.6272734249098304, "grad_norm": 0.4913117587566376, "learning_rate": 1.7345908824995574e-05, "loss": 0.056, "step": 40870 }, { "epoch": 0.6274269050725194, "grad_norm": 0.5067717432975769, "learning_rate": 1.7344091012715124e-05, "loss": 0.0343, "step": 40880 }, { "epoch": 0.6275803852352083, "grad_norm": 0.4047285318374634, "learning_rate": 1.7342272673450846e-05, "loss": 0.0454, "step": 40890 }, { "epoch": 0.6277338653978973, "grad_norm": 0.6526632308959961, "learning_rate": 1.7340453807333224e-05, "loss": 0.0474, "step": 40900 }, { "epoch": 0.6278873455605863, "grad_norm": 0.5445119738578796, "learning_rate": 1.7338634414492767e-05, "loss": 0.0535, "step": 40910 }, { "epoch": 0.6280408257232752, "grad_norm": 0.30126655101776123, "learning_rate": 1.7336814495060036e-05, "loss": 0.0335, "step": 40920 }, { "epoch": 0.6281943058859643, "grad_norm": 0.3096742630004883, "learning_rate": 1.733499404916561e-05, "loss": 0.0447, "step": 40930 }, { "epoch": 0.6283477860486532, "grad_norm": 0.4015313386917114, "learning_rate": 1.7333173076940127e-05, "loss": 0.0633, "step": 40940 }, { "epoch": 0.6285012662113422, "grad_norm": 0.4185710847377777, "learning_rate": 1.7331351578514247e-05, "loss": 0.0355, "step": 40950 }, { "epoch": 0.6286547463740312, "grad_norm": 0.405855268239975, "learning_rate": 1.7329529554018677e-05, "loss": 0.0397, "step": 40960 }, { "epoch": 0.6288082265367201, "grad_norm": 0.38243991136550903, "learning_rate": 1.7327707003584158e-05, "loss": 0.0416, "step": 40970 }, { "epoch": 0.6289617066994091, "grad_norm": 0.3731559216976166, "learning_rate": 1.7325883927341465e-05, "loss": 0.0347, "step": 40980 }, { "epoch": 0.6291151868620981, "grad_norm": 0.3665103614330292, "learning_rate": 1.732406032542142e-05, "loss": 0.038, "step": 40990 }, { "epoch": 0.629268667024787, "grad_norm": 0.37113097310066223, "learning_rate": 1.732223619795488e-05, "loss": 0.042, "step": 41000 }, { "epoch": 0.629422147187476, "grad_norm": 0.3378503918647766, "learning_rate": 1.732041154507273e-05, "loss": 0.0405, "step": 41010 }, { "epoch": 0.629575627350165, "grad_norm": 0.5356717109680176, "learning_rate": 1.7318586366905906e-05, "loss": 0.0453, "step": 41020 }, { "epoch": 0.6297291075128539, "grad_norm": 0.43041133880615234, "learning_rate": 1.731676066358537e-05, "loss": 0.0563, "step": 41030 }, { "epoch": 0.629882587675543, "grad_norm": 0.5152373909950256, "learning_rate": 1.7314934435242136e-05, "loss": 0.0418, "step": 41040 }, { "epoch": 0.630036067838232, "grad_norm": 0.26690077781677246, "learning_rate": 1.7313107682007237e-05, "loss": 0.0383, "step": 41050 }, { "epoch": 0.6301895480009209, "grad_norm": 0.3674459755420685, "learning_rate": 1.7311280404011765e-05, "loss": 0.0396, "step": 41060 }, { "epoch": 0.6303430281636099, "grad_norm": 0.42829430103302, "learning_rate": 1.7309452601386826e-05, "loss": 0.0417, "step": 41070 }, { "epoch": 0.6304965083262988, "grad_norm": 0.4019445776939392, "learning_rate": 1.7307624274263585e-05, "loss": 0.0394, "step": 41080 }, { "epoch": 0.6306499884889878, "grad_norm": 0.49258485436439514, "learning_rate": 1.7305795422773235e-05, "loss": 0.0342, "step": 41090 }, { "epoch": 0.6308034686516768, "grad_norm": 0.3415142893791199, "learning_rate": 1.7303966047047004e-05, "loss": 0.0301, "step": 41100 }, { "epoch": 0.6309569488143657, "grad_norm": 0.3380577862262726, "learning_rate": 1.7302136147216163e-05, "loss": 0.0416, "step": 41110 }, { "epoch": 0.6311104289770547, "grad_norm": 0.37838080525398254, "learning_rate": 1.7300305723412017e-05, "loss": 0.0342, "step": 41120 }, { "epoch": 0.6312639091397437, "grad_norm": 0.3468453884124756, "learning_rate": 1.7298474775765915e-05, "loss": 0.0413, "step": 41130 }, { "epoch": 0.6314173893024326, "grad_norm": 0.38301417231559753, "learning_rate": 1.7296643304409232e-05, "loss": 0.0394, "step": 41140 }, { "epoch": 0.6315708694651216, "grad_norm": 0.46107402443885803, "learning_rate": 1.729481130947339e-05, "loss": 0.036, "step": 41150 }, { "epoch": 0.6317243496278107, "grad_norm": 0.43106210231781006, "learning_rate": 1.729297879108985e-05, "loss": 0.0363, "step": 41160 }, { "epoch": 0.6318778297904996, "grad_norm": 0.6700749397277832, "learning_rate": 1.7291145749390104e-05, "loss": 0.0469, "step": 41170 }, { "epoch": 0.6320313099531886, "grad_norm": 0.40786418318748474, "learning_rate": 1.7289312184505677e-05, "loss": 0.0449, "step": 41180 }, { "epoch": 0.6321847901158775, "grad_norm": 0.4419967532157898, "learning_rate": 1.728747809656815e-05, "loss": 0.0402, "step": 41190 }, { "epoch": 0.6323382702785665, "grad_norm": 0.3073713779449463, "learning_rate": 1.7285643485709123e-05, "loss": 0.0403, "step": 41200 }, { "epoch": 0.6324917504412555, "grad_norm": 0.3967975378036499, "learning_rate": 1.728380835206024e-05, "loss": 0.0503, "step": 41210 }, { "epoch": 0.6326452306039444, "grad_norm": 0.4196012616157532, "learning_rate": 1.728197269575319e-05, "loss": 0.0437, "step": 41220 }, { "epoch": 0.6327987107666334, "grad_norm": 0.4325277805328369, "learning_rate": 1.7280136516919687e-05, "loss": 0.0357, "step": 41230 }, { "epoch": 0.6329521909293224, "grad_norm": 0.32160165905952454, "learning_rate": 1.727829981569149e-05, "loss": 0.0326, "step": 41240 }, { "epoch": 0.6331056710920113, "grad_norm": 0.5097662210464478, "learning_rate": 1.727646259220039e-05, "loss": 0.0384, "step": 41250 }, { "epoch": 0.6332591512547003, "grad_norm": 0.5160728096961975, "learning_rate": 1.7274624846578224e-05, "loss": 0.048, "step": 41260 }, { "epoch": 0.6334126314173893, "grad_norm": 0.5239773392677307, "learning_rate": 1.727278657895686e-05, "loss": 0.0414, "step": 41270 }, { "epoch": 0.6335661115800783, "grad_norm": 0.487049400806427, "learning_rate": 1.7270947789468205e-05, "loss": 0.0311, "step": 41280 }, { "epoch": 0.6337195917427673, "grad_norm": 0.5602114796638489, "learning_rate": 1.7269108478244203e-05, "loss": 0.0491, "step": 41290 }, { "epoch": 0.6338730719054562, "grad_norm": 0.41686901450157166, "learning_rate": 1.7267268645416835e-05, "loss": 0.0458, "step": 41300 }, { "epoch": 0.6340265520681452, "grad_norm": 0.29520344734191895, "learning_rate": 1.726542829111812e-05, "loss": 0.0427, "step": 41310 }, { "epoch": 0.6341800322308342, "grad_norm": 0.4584874212741852, "learning_rate": 1.726358741548012e-05, "loss": 0.044, "step": 41320 }, { "epoch": 0.6343335123935231, "grad_norm": 0.4225022494792938, "learning_rate": 1.7261746018634925e-05, "loss": 0.0392, "step": 41330 }, { "epoch": 0.6344869925562121, "grad_norm": 0.3352949619293213, "learning_rate": 1.7259904100714665e-05, "loss": 0.0395, "step": 41340 }, { "epoch": 0.6346404727189011, "grad_norm": 0.5275317430496216, "learning_rate": 1.7258061661851514e-05, "loss": 0.0372, "step": 41350 }, { "epoch": 0.63479395288159, "grad_norm": 0.35919082164764404, "learning_rate": 1.7256218702177674e-05, "loss": 0.0367, "step": 41360 }, { "epoch": 0.634947433044279, "grad_norm": 0.4430061876773834, "learning_rate": 1.725437522182539e-05, "loss": 0.0396, "step": 41370 }, { "epoch": 0.635100913206968, "grad_norm": 0.3819167912006378, "learning_rate": 1.7252531220926944e-05, "loss": 0.0432, "step": 41380 }, { "epoch": 0.6352543933696569, "grad_norm": 0.4309462010860443, "learning_rate": 1.7250686699614655e-05, "loss": 0.0416, "step": 41390 }, { "epoch": 0.635407873532346, "grad_norm": 0.3742564916610718, "learning_rate": 1.7248841658020874e-05, "loss": 0.0371, "step": 41400 }, { "epoch": 0.635561353695035, "grad_norm": 0.5456785559654236, "learning_rate": 1.7246996096278e-05, "loss": 0.043, "step": 41410 }, { "epoch": 0.6357148338577239, "grad_norm": 0.3529497981071472, "learning_rate": 1.724515001451846e-05, "loss": 0.0364, "step": 41420 }, { "epoch": 0.6358683140204129, "grad_norm": 0.42882657051086426, "learning_rate": 1.7243303412874724e-05, "loss": 0.0297, "step": 41430 }, { "epoch": 0.6360217941831018, "grad_norm": 0.49957776069641113, "learning_rate": 1.724145629147929e-05, "loss": 0.0446, "step": 41440 }, { "epoch": 0.6361752743457908, "grad_norm": 0.29406267404556274, "learning_rate": 1.7239608650464714e-05, "loss": 0.0454, "step": 41450 }, { "epoch": 0.6363287545084798, "grad_norm": 0.4240902066230774, "learning_rate": 1.7237760489963566e-05, "loss": 0.0401, "step": 41460 }, { "epoch": 0.6364822346711687, "grad_norm": 0.34708741307258606, "learning_rate": 1.7235911810108462e-05, "loss": 0.0367, "step": 41470 }, { "epoch": 0.6366357148338577, "grad_norm": 0.5006195306777954, "learning_rate": 1.723406261103206e-05, "loss": 0.0526, "step": 41480 }, { "epoch": 0.6367891949965467, "grad_norm": 0.3376553952693939, "learning_rate": 1.7232212892867052e-05, "loss": 0.0401, "step": 41490 }, { "epoch": 0.6369426751592356, "grad_norm": 0.5707849860191345, "learning_rate": 1.723036265574616e-05, "loss": 0.05, "step": 41500 }, { "epoch": 0.6370961553219247, "grad_norm": 0.4324966371059418, "learning_rate": 1.722851189980216e-05, "loss": 0.0361, "step": 41510 }, { "epoch": 0.6372496354846137, "grad_norm": 0.4304289221763611, "learning_rate": 1.7226660625167845e-05, "loss": 0.0478, "step": 41520 }, { "epoch": 0.6374031156473026, "grad_norm": 0.418127179145813, "learning_rate": 1.7224808831976066e-05, "loss": 0.0352, "step": 41530 }, { "epoch": 0.6375565958099916, "grad_norm": 0.423286497592926, "learning_rate": 1.7222956520359692e-05, "loss": 0.0376, "step": 41540 }, { "epoch": 0.6377100759726805, "grad_norm": 0.3771677315235138, "learning_rate": 1.7221103690451645e-05, "loss": 0.0491, "step": 41550 }, { "epoch": 0.6378635561353695, "grad_norm": 0.524652898311615, "learning_rate": 1.721925034238487e-05, "loss": 0.0473, "step": 41560 }, { "epoch": 0.6380170362980585, "grad_norm": 0.5406127572059631, "learning_rate": 1.7217396476292357e-05, "loss": 0.0465, "step": 41570 }, { "epoch": 0.6381705164607474, "grad_norm": 0.4293443560600281, "learning_rate": 1.7215542092307136e-05, "loss": 0.0426, "step": 41580 }, { "epoch": 0.6383239966234364, "grad_norm": 0.43845459818840027, "learning_rate": 1.7213687190562266e-05, "loss": 0.0458, "step": 41590 }, { "epoch": 0.6384774767861254, "grad_norm": 0.5435536503791809, "learning_rate": 1.7211831771190855e-05, "loss": 0.0401, "step": 41600 }, { "epoch": 0.6386309569488143, "grad_norm": 0.5848208665847778, "learning_rate": 1.7209975834326035e-05, "loss": 0.033, "step": 41610 }, { "epoch": 0.6387844371115033, "grad_norm": 0.439401775598526, "learning_rate": 1.720811938010098e-05, "loss": 0.0392, "step": 41620 }, { "epoch": 0.6389379172741924, "grad_norm": 0.38489070534706116, "learning_rate": 1.7206262408648905e-05, "loss": 0.0393, "step": 41630 }, { "epoch": 0.6390913974368813, "grad_norm": 0.3436737060546875, "learning_rate": 1.720440492010306e-05, "loss": 0.0345, "step": 41640 }, { "epoch": 0.6392448775995703, "grad_norm": 0.4794924855232239, "learning_rate": 1.720254691459673e-05, "loss": 0.0445, "step": 41650 }, { "epoch": 0.6393983577622592, "grad_norm": 0.44489791989326477, "learning_rate": 1.7200688392263235e-05, "loss": 0.043, "step": 41660 }, { "epoch": 0.6395518379249482, "grad_norm": 0.49902597069740295, "learning_rate": 1.719882935323594e-05, "loss": 0.0366, "step": 41670 }, { "epoch": 0.6397053180876372, "grad_norm": 0.6003847718238831, "learning_rate": 1.719696979764824e-05, "loss": 0.0349, "step": 41680 }, { "epoch": 0.6398587982503261, "grad_norm": 0.5480045080184937, "learning_rate": 1.7195109725633568e-05, "loss": 0.0541, "step": 41690 }, { "epoch": 0.6400122784130151, "grad_norm": 0.4095483124256134, "learning_rate": 1.7193249137325404e-05, "loss": 0.0378, "step": 41700 }, { "epoch": 0.6401657585757041, "grad_norm": 0.23359939455986023, "learning_rate": 1.7191388032857248e-05, "loss": 0.0322, "step": 41710 }, { "epoch": 0.640319238738393, "grad_norm": 0.44659343361854553, "learning_rate": 1.718952641236265e-05, "loss": 0.0434, "step": 41720 }, { "epoch": 0.640472718901082, "grad_norm": 0.5053739547729492, "learning_rate": 1.7187664275975186e-05, "loss": 0.0506, "step": 41730 }, { "epoch": 0.640626199063771, "grad_norm": 0.26863768696784973, "learning_rate": 1.7185801623828483e-05, "loss": 0.0398, "step": 41740 }, { "epoch": 0.64077967922646, "grad_norm": 0.3838299512863159, "learning_rate": 1.71839384560562e-05, "loss": 0.0397, "step": 41750 }, { "epoch": 0.640933159389149, "grad_norm": 0.4246322512626648, "learning_rate": 1.7182074772792026e-05, "loss": 0.028, "step": 41760 }, { "epoch": 0.641086639551838, "grad_norm": 0.44497358798980713, "learning_rate": 1.718021057416969e-05, "loss": 0.0379, "step": 41770 }, { "epoch": 0.6412401197145269, "grad_norm": 0.5055258274078369, "learning_rate": 1.717834586032296e-05, "loss": 0.0362, "step": 41780 }, { "epoch": 0.6413935998772159, "grad_norm": 0.5112568140029907, "learning_rate": 1.7176480631385646e-05, "loss": 0.0458, "step": 41790 }, { "epoch": 0.6415470800399048, "grad_norm": 0.49251407384872437, "learning_rate": 1.7174614887491586e-05, "loss": 0.0357, "step": 41800 }, { "epoch": 0.6417005602025938, "grad_norm": 0.6195774674415588, "learning_rate": 1.7172748628774658e-05, "loss": 0.0425, "step": 41810 }, { "epoch": 0.6418540403652828, "grad_norm": 0.4579692780971527, "learning_rate": 1.7170881855368778e-05, "loss": 0.0408, "step": 41820 }, { "epoch": 0.6420075205279717, "grad_norm": 0.35283222794532776, "learning_rate": 1.7169014567407903e-05, "loss": 0.0416, "step": 41830 }, { "epoch": 0.6421610006906607, "grad_norm": 0.3976176083087921, "learning_rate": 1.7167146765026015e-05, "loss": 0.0353, "step": 41840 }, { "epoch": 0.6423144808533497, "grad_norm": 0.41599443554878235, "learning_rate": 1.7165278448357144e-05, "loss": 0.0376, "step": 41850 }, { "epoch": 0.6424679610160386, "grad_norm": 0.5708218812942505, "learning_rate": 1.7163409617535357e-05, "loss": 0.0371, "step": 41860 }, { "epoch": 0.6426214411787277, "grad_norm": 0.40304338932037354, "learning_rate": 1.7161540272694747e-05, "loss": 0.0408, "step": 41870 }, { "epoch": 0.6427749213414167, "grad_norm": 0.433562308549881, "learning_rate": 1.7159670413969456e-05, "loss": 0.0344, "step": 41880 }, { "epoch": 0.6429284015041056, "grad_norm": 0.47029414772987366, "learning_rate": 1.7157800041493654e-05, "loss": 0.0391, "step": 41890 }, { "epoch": 0.6430818816667946, "grad_norm": 0.33089151978492737, "learning_rate": 1.715592915540156e-05, "loss": 0.0302, "step": 41900 }, { "epoch": 0.6432353618294835, "grad_norm": 0.49434542655944824, "learning_rate": 1.715405775582741e-05, "loss": 0.0415, "step": 41910 }, { "epoch": 0.6433888419921725, "grad_norm": 0.3099076747894287, "learning_rate": 1.7152185842905492e-05, "loss": 0.0355, "step": 41920 }, { "epoch": 0.6435423221548615, "grad_norm": 0.5176795125007629, "learning_rate": 1.7150313416770134e-05, "loss": 0.0385, "step": 41930 }, { "epoch": 0.6436958023175504, "grad_norm": 0.5139212012290955, "learning_rate": 1.7148440477555688e-05, "loss": 0.0388, "step": 41940 }, { "epoch": 0.6438492824802394, "grad_norm": 0.3286180794239044, "learning_rate": 1.714656702539655e-05, "loss": 0.0363, "step": 41950 }, { "epoch": 0.6440027626429284, "grad_norm": 0.4639885127544403, "learning_rate": 1.7144693060427157e-05, "loss": 0.0526, "step": 41960 }, { "epoch": 0.6441562428056173, "grad_norm": 0.3359004259109497, "learning_rate": 1.7142818582781965e-05, "loss": 0.0403, "step": 41970 }, { "epoch": 0.6443097229683064, "grad_norm": 0.4494737684726715, "learning_rate": 1.714094359259549e-05, "loss": 0.031, "step": 41980 }, { "epoch": 0.6444632031309954, "grad_norm": 0.3853474259376526, "learning_rate": 1.7139068090002274e-05, "loss": 0.0455, "step": 41990 }, { "epoch": 0.6446166832936843, "grad_norm": 0.4299226403236389, "learning_rate": 1.7137192075136892e-05, "loss": 0.0406, "step": 42000 }, { "epoch": 0.6447701634563733, "grad_norm": 0.32871443033218384, "learning_rate": 1.713531554813396e-05, "loss": 0.0419, "step": 42010 }, { "epoch": 0.6449236436190622, "grad_norm": 0.2866152822971344, "learning_rate": 1.7133438509128128e-05, "loss": 0.0369, "step": 42020 }, { "epoch": 0.6450771237817512, "grad_norm": 0.4696997106075287, "learning_rate": 1.7131560958254093e-05, "loss": 0.0428, "step": 42030 }, { "epoch": 0.6452306039444402, "grad_norm": 0.36905255913734436, "learning_rate": 1.7129682895646574e-05, "loss": 0.0351, "step": 42040 }, { "epoch": 0.6453840841071291, "grad_norm": 0.3121408224105835, "learning_rate": 1.7127804321440336e-05, "loss": 0.0521, "step": 42050 }, { "epoch": 0.6455375642698181, "grad_norm": 0.37134554982185364, "learning_rate": 1.7125925235770177e-05, "loss": 0.0437, "step": 42060 }, { "epoch": 0.6456910444325071, "grad_norm": 0.5088488459587097, "learning_rate": 1.7124045638770934e-05, "loss": 0.042, "step": 42070 }, { "epoch": 0.645844524595196, "grad_norm": 0.39590519666671753, "learning_rate": 1.7122165530577482e-05, "loss": 0.0409, "step": 42080 }, { "epoch": 0.645998004757885, "grad_norm": 0.3651058077812195, "learning_rate": 1.7120284911324726e-05, "loss": 0.0441, "step": 42090 }, { "epoch": 0.6461514849205741, "grad_norm": 0.41754159331321716, "learning_rate": 1.7118403781147614e-05, "loss": 0.0458, "step": 42100 }, { "epoch": 0.646304965083263, "grad_norm": 0.4168139398097992, "learning_rate": 1.711652214018113e-05, "loss": 0.0448, "step": 42110 }, { "epoch": 0.646458445245952, "grad_norm": 0.44013741612434387, "learning_rate": 1.7114639988560288e-05, "loss": 0.0344, "step": 42120 }, { "epoch": 0.646611925408641, "grad_norm": 0.6142598986625671, "learning_rate": 1.7112757326420154e-05, "loss": 0.0533, "step": 42130 }, { "epoch": 0.6467654055713299, "grad_norm": 0.341875821352005, "learning_rate": 1.711087415389581e-05, "loss": 0.036, "step": 42140 }, { "epoch": 0.6469188857340189, "grad_norm": 0.38575562834739685, "learning_rate": 1.710899047112239e-05, "loss": 0.0378, "step": 42150 }, { "epoch": 0.6470723658967078, "grad_norm": 0.3588857352733612, "learning_rate": 1.7107106278235064e-05, "loss": 0.039, "step": 42160 }, { "epoch": 0.6472258460593968, "grad_norm": 0.45133548974990845, "learning_rate": 1.7105221575369028e-05, "loss": 0.0303, "step": 42170 }, { "epoch": 0.6473793262220858, "grad_norm": 0.4469810128211975, "learning_rate": 1.7103336362659522e-05, "loss": 0.0458, "step": 42180 }, { "epoch": 0.6475328063847747, "grad_norm": 0.5493893027305603, "learning_rate": 1.7101450640241822e-05, "loss": 0.0499, "step": 42190 }, { "epoch": 0.6476862865474637, "grad_norm": 0.6309503316879272, "learning_rate": 1.7099564408251245e-05, "loss": 0.0482, "step": 42200 }, { "epoch": 0.6478397667101528, "grad_norm": 0.5192887187004089, "learning_rate": 1.7097677666823135e-05, "loss": 0.0284, "step": 42210 }, { "epoch": 0.6479932468728417, "grad_norm": 0.4500158429145813, "learning_rate": 1.7095790416092876e-05, "loss": 0.0453, "step": 42220 }, { "epoch": 0.6481467270355307, "grad_norm": 0.4568954110145569, "learning_rate": 1.709390265619589e-05, "loss": 0.037, "step": 42230 }, { "epoch": 0.6483002071982197, "grad_norm": 0.3053257167339325, "learning_rate": 1.709201438726764e-05, "loss": 0.0411, "step": 42240 }, { "epoch": 0.6484536873609086, "grad_norm": 0.450092613697052, "learning_rate": 1.7090125609443623e-05, "loss": 0.0433, "step": 42250 }, { "epoch": 0.6486071675235976, "grad_norm": 0.33823657035827637, "learning_rate": 1.708823632285936e-05, "loss": 0.0358, "step": 42260 }, { "epoch": 0.6487606476862865, "grad_norm": 0.37371626496315, "learning_rate": 1.7086346527650425e-05, "loss": 0.0434, "step": 42270 }, { "epoch": 0.6489141278489755, "grad_norm": 0.4199438691139221, "learning_rate": 1.7084456223952423e-05, "loss": 0.0357, "step": 42280 }, { "epoch": 0.6490676080116645, "grad_norm": 0.3477391004562378, "learning_rate": 1.7082565411900998e-05, "loss": 0.0508, "step": 42290 }, { "epoch": 0.6492210881743534, "grad_norm": 0.4046880602836609, "learning_rate": 1.708067409163182e-05, "loss": 0.0471, "step": 42300 }, { "epoch": 0.6493745683370424, "grad_norm": 0.37709566950798035, "learning_rate": 1.707878226328061e-05, "loss": 0.0448, "step": 42310 }, { "epoch": 0.6495280484997314, "grad_norm": 0.37920814752578735, "learning_rate": 1.707688992698311e-05, "loss": 0.0472, "step": 42320 }, { "epoch": 0.6496815286624203, "grad_norm": 0.4367770850658417, "learning_rate": 1.7074997082875113e-05, "loss": 0.0441, "step": 42330 }, { "epoch": 0.6498350088251094, "grad_norm": 0.39342349767684937, "learning_rate": 1.7073103731092445e-05, "loss": 0.0488, "step": 42340 }, { "epoch": 0.6499884889877984, "grad_norm": 0.4149223864078522, "learning_rate": 1.707120987177096e-05, "loss": 0.0448, "step": 42350 }, { "epoch": 0.6501419691504873, "grad_norm": 0.3373572826385498, "learning_rate": 1.7069315505046553e-05, "loss": 0.0462, "step": 42360 }, { "epoch": 0.6502954493131763, "grad_norm": 0.4773731827735901, "learning_rate": 1.7067420631055158e-05, "loss": 0.0382, "step": 42370 }, { "epoch": 0.6504489294758652, "grad_norm": 0.510850191116333, "learning_rate": 1.706552524993275e-05, "loss": 0.0526, "step": 42380 }, { "epoch": 0.6506024096385542, "grad_norm": 0.45475268363952637, "learning_rate": 1.7063629361815327e-05, "loss": 0.0421, "step": 42390 }, { "epoch": 0.6507558898012432, "grad_norm": 0.4278429448604584, "learning_rate": 1.7061732966838934e-05, "loss": 0.0417, "step": 42400 }, { "epoch": 0.6509093699639321, "grad_norm": 0.5092355608940125, "learning_rate": 1.7059836065139646e-05, "loss": 0.0498, "step": 42410 }, { "epoch": 0.6510628501266211, "grad_norm": 0.4042315185070038, "learning_rate": 1.705793865685358e-05, "loss": 0.0389, "step": 42420 }, { "epoch": 0.6512163302893101, "grad_norm": 0.4752404987812042, "learning_rate": 1.7056040742116892e-05, "loss": 0.0325, "step": 42430 }, { "epoch": 0.651369810451999, "grad_norm": 0.482583224773407, "learning_rate": 1.7054142321065756e-05, "loss": 0.0427, "step": 42440 }, { "epoch": 0.651523290614688, "grad_norm": 0.44152504205703735, "learning_rate": 1.705224339383641e-05, "loss": 0.0364, "step": 42450 }, { "epoch": 0.6516767707773771, "grad_norm": 0.25938576459884644, "learning_rate": 1.70503439605651e-05, "loss": 0.0327, "step": 42460 }, { "epoch": 0.651830250940066, "grad_norm": 0.40980443358421326, "learning_rate": 1.7048444021388132e-05, "loss": 0.042, "step": 42470 }, { "epoch": 0.651983731102755, "grad_norm": 0.4804301857948303, "learning_rate": 1.704654357644183e-05, "loss": 0.0466, "step": 42480 }, { "epoch": 0.652137211265444, "grad_norm": 0.32348155975341797, "learning_rate": 1.704464262586257e-05, "loss": 0.0335, "step": 42490 }, { "epoch": 0.6522906914281329, "grad_norm": 0.39971163868904114, "learning_rate": 1.7042741169786756e-05, "loss": 0.0463, "step": 42500 }, { "epoch": 0.6524441715908219, "grad_norm": 0.4233042299747467, "learning_rate": 1.704083920835083e-05, "loss": 0.0403, "step": 42510 }, { "epoch": 0.6525976517535108, "grad_norm": 0.31414180994033813, "learning_rate": 1.7038936741691263e-05, "loss": 0.0374, "step": 42520 }, { "epoch": 0.6527511319161998, "grad_norm": 0.43512338399887085, "learning_rate": 1.7037033769944577e-05, "loss": 0.0441, "step": 42530 }, { "epoch": 0.6529046120788888, "grad_norm": 0.5399850606918335, "learning_rate": 1.7035130293247314e-05, "loss": 0.0423, "step": 42540 }, { "epoch": 0.6530580922415777, "grad_norm": 0.3459080457687378, "learning_rate": 1.7033226311736066e-05, "loss": 0.0295, "step": 42550 }, { "epoch": 0.6532115724042667, "grad_norm": 0.3933076858520508, "learning_rate": 1.703132182554745e-05, "loss": 0.0302, "step": 42560 }, { "epoch": 0.6533650525669558, "grad_norm": 0.522007167339325, "learning_rate": 1.702941683481813e-05, "loss": 0.0423, "step": 42570 }, { "epoch": 0.6535185327296447, "grad_norm": 0.4740239977836609, "learning_rate": 1.7027511339684802e-05, "loss": 0.0382, "step": 42580 }, { "epoch": 0.6536720128923337, "grad_norm": 0.3665684759616852, "learning_rate": 1.702560534028419e-05, "loss": 0.0294, "step": 42590 }, { "epoch": 0.6538254930550227, "grad_norm": 0.4396916329860687, "learning_rate": 1.702369883675307e-05, "loss": 0.0427, "step": 42600 }, { "epoch": 0.6539789732177116, "grad_norm": 0.6514809727668762, "learning_rate": 1.7021791829228232e-05, "loss": 0.0507, "step": 42610 }, { "epoch": 0.6541324533804006, "grad_norm": 0.3997219502925873, "learning_rate": 1.701988431784653e-05, "loss": 0.0385, "step": 42620 }, { "epoch": 0.6542859335430895, "grad_norm": 0.47819820046424866, "learning_rate": 1.7017976302744835e-05, "loss": 0.0312, "step": 42630 }, { "epoch": 0.6544394137057785, "grad_norm": 0.5204542875289917, "learning_rate": 1.7016067784060057e-05, "loss": 0.0531, "step": 42640 }, { "epoch": 0.6545928938684675, "grad_norm": 0.3475320637226105, "learning_rate": 1.7014158761929144e-05, "loss": 0.0332, "step": 42650 }, { "epoch": 0.6547463740311564, "grad_norm": 0.4734717905521393, "learning_rate": 1.701224923648908e-05, "loss": 0.0366, "step": 42660 }, { "epoch": 0.6548998541938454, "grad_norm": 0.5150703191757202, "learning_rate": 1.7010339207876883e-05, "loss": 0.0391, "step": 42670 }, { "epoch": 0.6550533343565345, "grad_norm": 0.3891390562057495, "learning_rate": 1.700842867622962e-05, "loss": 0.0393, "step": 42680 }, { "epoch": 0.6552068145192234, "grad_norm": 0.35131892561912537, "learning_rate": 1.700651764168437e-05, "loss": 0.0383, "step": 42690 }, { "epoch": 0.6553602946819124, "grad_norm": 0.45305466651916504, "learning_rate": 1.7004606104378267e-05, "loss": 0.0408, "step": 42700 }, { "epoch": 0.6555137748446014, "grad_norm": 0.3171847462654114, "learning_rate": 1.7002694064448476e-05, "loss": 0.0402, "step": 42710 }, { "epoch": 0.6556672550072903, "grad_norm": 0.5281545519828796, "learning_rate": 1.7000781522032195e-05, "loss": 0.0365, "step": 42720 }, { "epoch": 0.6558207351699793, "grad_norm": 0.43696579337120056, "learning_rate": 1.699886847726667e-05, "loss": 0.0379, "step": 42730 }, { "epoch": 0.6559742153326682, "grad_norm": 0.44926130771636963, "learning_rate": 1.699695493028916e-05, "loss": 0.0414, "step": 42740 }, { "epoch": 0.6561276954953572, "grad_norm": 0.5162954330444336, "learning_rate": 1.6995040881236987e-05, "loss": 0.036, "step": 42750 }, { "epoch": 0.6562811756580462, "grad_norm": 0.42009350657463074, "learning_rate": 1.6993126330247487e-05, "loss": 0.0503, "step": 42760 }, { "epoch": 0.6564346558207351, "grad_norm": 0.34529909491539, "learning_rate": 1.6991211277458043e-05, "loss": 0.036, "step": 42770 }, { "epoch": 0.6565881359834241, "grad_norm": 0.39210283756256104, "learning_rate": 1.6989295723006073e-05, "loss": 0.0311, "step": 42780 }, { "epoch": 0.6567416161461131, "grad_norm": 0.40480953454971313, "learning_rate": 1.698737966702903e-05, "loss": 0.042, "step": 42790 }, { "epoch": 0.656895096308802, "grad_norm": 0.3300614356994629, "learning_rate": 1.69854631096644e-05, "loss": 0.0395, "step": 42800 }, { "epoch": 0.6570485764714911, "grad_norm": 0.5953159332275391, "learning_rate": 1.6983546051049714e-05, "loss": 0.046, "step": 42810 }, { "epoch": 0.6572020566341801, "grad_norm": 0.2883179187774658, "learning_rate": 1.698162849132252e-05, "loss": 0.0465, "step": 42820 }, { "epoch": 0.657355536796869, "grad_norm": 0.36464500427246094, "learning_rate": 1.697971043062043e-05, "loss": 0.0434, "step": 42830 }, { "epoch": 0.657509016959558, "grad_norm": 0.4577070474624634, "learning_rate": 1.697779186908107e-05, "loss": 0.0347, "step": 42840 }, { "epoch": 0.657662497122247, "grad_norm": 0.4224275052547455, "learning_rate": 1.6975872806842106e-05, "loss": 0.0461, "step": 42850 }, { "epoch": 0.6578159772849359, "grad_norm": 0.6385140419006348, "learning_rate": 1.697395324404125e-05, "loss": 0.0504, "step": 42860 }, { "epoch": 0.6579694574476249, "grad_norm": 0.5517114400863647, "learning_rate": 1.697203318081623e-05, "loss": 0.0523, "step": 42870 }, { "epoch": 0.6581229376103138, "grad_norm": 0.3844960629940033, "learning_rate": 1.6970112617304833e-05, "loss": 0.0537, "step": 42880 }, { "epoch": 0.6582764177730028, "grad_norm": 0.29011738300323486, "learning_rate": 1.6968191553644868e-05, "loss": 0.0378, "step": 42890 }, { "epoch": 0.6584298979356918, "grad_norm": 0.45581328868865967, "learning_rate": 1.6966269989974184e-05, "loss": 0.0554, "step": 42900 }, { "epoch": 0.6585833780983807, "grad_norm": 0.48174014687538147, "learning_rate": 1.6964347926430666e-05, "loss": 0.0423, "step": 42910 }, { "epoch": 0.6587368582610698, "grad_norm": 0.28378644585609436, "learning_rate": 1.696242536315223e-05, "loss": 0.0402, "step": 42920 }, { "epoch": 0.6588903384237588, "grad_norm": 0.4561298191547394, "learning_rate": 1.696050230027684e-05, "loss": 0.0352, "step": 42930 }, { "epoch": 0.6590438185864477, "grad_norm": 0.4196615219116211, "learning_rate": 1.6958578737942473e-05, "loss": 0.0436, "step": 42940 }, { "epoch": 0.6591972987491367, "grad_norm": 0.4353877604007721, "learning_rate": 1.6956654676287172e-05, "loss": 0.0456, "step": 42950 }, { "epoch": 0.6593507789118257, "grad_norm": 0.4622829258441925, "learning_rate": 1.695473011544899e-05, "loss": 0.0355, "step": 42960 }, { "epoch": 0.6595042590745146, "grad_norm": 0.30889183282852173, "learning_rate": 1.6952805055566027e-05, "loss": 0.0362, "step": 42970 }, { "epoch": 0.6596577392372036, "grad_norm": 0.37480878829956055, "learning_rate": 1.6950879496776428e-05, "loss": 0.0385, "step": 42980 }, { "epoch": 0.6598112193998925, "grad_norm": 0.398802787065506, "learning_rate": 1.694895343921835e-05, "loss": 0.0435, "step": 42990 }, { "epoch": 0.6599646995625815, "grad_norm": 0.4356752634048462, "learning_rate": 1.6947026883030008e-05, "loss": 0.0393, "step": 43000 }, { "epoch": 0.6601181797252705, "grad_norm": 0.4102120101451874, "learning_rate": 1.6945099828349642e-05, "loss": 0.0349, "step": 43010 }, { "epoch": 0.6602716598879594, "grad_norm": 0.413568913936615, "learning_rate": 1.694317227531553e-05, "loss": 0.0408, "step": 43020 }, { "epoch": 0.6604251400506485, "grad_norm": 0.5194500088691711, "learning_rate": 1.6941244224065988e-05, "loss": 0.0394, "step": 43030 }, { "epoch": 0.6605786202133375, "grad_norm": 0.41645315289497375, "learning_rate": 1.693931567473936e-05, "loss": 0.0496, "step": 43040 }, { "epoch": 0.6607321003760264, "grad_norm": 0.45612961053848267, "learning_rate": 1.6937386627474038e-05, "loss": 0.0423, "step": 43050 }, { "epoch": 0.6608855805387154, "grad_norm": 0.4488135576248169, "learning_rate": 1.6935457082408438e-05, "loss": 0.0328, "step": 43060 }, { "epoch": 0.6610390607014044, "grad_norm": 0.3291586935520172, "learning_rate": 1.693352703968102e-05, "loss": 0.0394, "step": 43070 }, { "epoch": 0.6611925408640933, "grad_norm": 0.40500661730766296, "learning_rate": 1.6931596499430275e-05, "loss": 0.0408, "step": 43080 }, { "epoch": 0.6613460210267823, "grad_norm": 0.34201958775520325, "learning_rate": 1.6929665461794732e-05, "loss": 0.0352, "step": 43090 }, { "epoch": 0.6614995011894712, "grad_norm": 0.45958778262138367, "learning_rate": 1.692773392691295e-05, "loss": 0.0399, "step": 43100 }, { "epoch": 0.6616529813521602, "grad_norm": 0.4947746694087982, "learning_rate": 1.6925801894923535e-05, "loss": 0.0467, "step": 43110 }, { "epoch": 0.6618064615148492, "grad_norm": 0.373757541179657, "learning_rate": 1.692386936596512e-05, "loss": 0.0346, "step": 43120 }, { "epoch": 0.6619599416775381, "grad_norm": 0.3791181445121765, "learning_rate": 1.6921936340176377e-05, "loss": 0.0396, "step": 43130 }, { "epoch": 0.6621134218402271, "grad_norm": 0.40496277809143066, "learning_rate": 1.6920002817696008e-05, "loss": 0.0394, "step": 43140 }, { "epoch": 0.6622669020029162, "grad_norm": 0.3645361661911011, "learning_rate": 1.6918068798662764e-05, "loss": 0.044, "step": 43150 }, { "epoch": 0.6624203821656051, "grad_norm": 0.5864940285682678, "learning_rate": 1.6916134283215412e-05, "loss": 0.0463, "step": 43160 }, { "epoch": 0.6625738623282941, "grad_norm": 0.290906697511673, "learning_rate": 1.6914199271492774e-05, "loss": 0.0353, "step": 43170 }, { "epoch": 0.6627273424909831, "grad_norm": 0.45396536588668823, "learning_rate": 1.6912263763633693e-05, "loss": 0.0361, "step": 43180 }, { "epoch": 0.662880822653672, "grad_norm": 0.41532278060913086, "learning_rate": 1.6910327759777058e-05, "loss": 0.0409, "step": 43190 }, { "epoch": 0.663034302816361, "grad_norm": 0.43270954489707947, "learning_rate": 1.6908391260061786e-05, "loss": 0.0336, "step": 43200 }, { "epoch": 0.66318778297905, "grad_norm": 0.5286371111869812, "learning_rate": 1.6906454264626834e-05, "loss": 0.0333, "step": 43210 }, { "epoch": 0.6633412631417389, "grad_norm": 0.675990879535675, "learning_rate": 1.6904516773611197e-05, "loss": 0.0492, "step": 43220 }, { "epoch": 0.6634947433044279, "grad_norm": 0.6344227194786072, "learning_rate": 1.6902578787153897e-05, "loss": 0.0403, "step": 43230 }, { "epoch": 0.6636482234671168, "grad_norm": 0.3818468749523163, "learning_rate": 1.6900640305393997e-05, "loss": 0.042, "step": 43240 }, { "epoch": 0.6638017036298058, "grad_norm": 0.3664064109325409, "learning_rate": 1.6898701328470596e-05, "loss": 0.0475, "step": 43250 }, { "epoch": 0.6639551837924949, "grad_norm": 0.32784023880958557, "learning_rate": 1.689676185652283e-05, "loss": 0.0473, "step": 43260 }, { "epoch": 0.6641086639551838, "grad_norm": 0.4298108220100403, "learning_rate": 1.6894821889689862e-05, "loss": 0.0419, "step": 43270 }, { "epoch": 0.6642621441178728, "grad_norm": 0.4543575346469879, "learning_rate": 1.6892881428110902e-05, "loss": 0.036, "step": 43280 }, { "epoch": 0.6644156242805618, "grad_norm": 0.45679354667663574, "learning_rate": 1.6890940471925186e-05, "loss": 0.0442, "step": 43290 }, { "epoch": 0.6645691044432507, "grad_norm": 0.39562225341796875, "learning_rate": 1.6888999021272e-05, "loss": 0.0329, "step": 43300 }, { "epoch": 0.6647225846059397, "grad_norm": 0.2940720319747925, "learning_rate": 1.688705707629064e-05, "loss": 0.0387, "step": 43310 }, { "epoch": 0.6648760647686287, "grad_norm": 0.3545334041118622, "learning_rate": 1.688511463712046e-05, "loss": 0.0305, "step": 43320 }, { "epoch": 0.6650295449313176, "grad_norm": 0.37457844614982605, "learning_rate": 1.6883171703900844e-05, "loss": 0.045, "step": 43330 }, { "epoch": 0.6651830250940066, "grad_norm": 0.5420060157775879, "learning_rate": 1.6881228276771207e-05, "loss": 0.0412, "step": 43340 }, { "epoch": 0.6653365052566955, "grad_norm": 0.42581576108932495, "learning_rate": 1.6879284355871002e-05, "loss": 0.0447, "step": 43350 }, { "epoch": 0.6654899854193845, "grad_norm": 0.4311508536338806, "learning_rate": 1.6877339941339714e-05, "loss": 0.0356, "step": 43360 }, { "epoch": 0.6656434655820735, "grad_norm": 0.5812095403671265, "learning_rate": 1.6875395033316873e-05, "loss": 0.028, "step": 43370 }, { "epoch": 0.6657969457447624, "grad_norm": 0.37267258763313293, "learning_rate": 1.6873449631942033e-05, "loss": 0.0397, "step": 43380 }, { "epoch": 0.6659504259074515, "grad_norm": 0.3880821764469147, "learning_rate": 1.6871503737354792e-05, "loss": 0.0386, "step": 43390 }, { "epoch": 0.6661039060701405, "grad_norm": 0.4414460361003876, "learning_rate": 1.686955734969478e-05, "loss": 0.0445, "step": 43400 }, { "epoch": 0.6662573862328294, "grad_norm": 0.5201975107192993, "learning_rate": 1.6867610469101657e-05, "loss": 0.0451, "step": 43410 }, { "epoch": 0.6664108663955184, "grad_norm": 0.4879363775253296, "learning_rate": 1.686566309571513e-05, "loss": 0.0368, "step": 43420 }, { "epoch": 0.6665643465582074, "grad_norm": 0.4393157660961151, "learning_rate": 1.6863715229674932e-05, "loss": 0.0433, "step": 43430 }, { "epoch": 0.6667178267208963, "grad_norm": 0.4937094748020172, "learning_rate": 1.6861766871120835e-05, "loss": 0.0387, "step": 43440 }, { "epoch": 0.6668713068835853, "grad_norm": 0.3815596401691437, "learning_rate": 1.6859818020192645e-05, "loss": 0.0351, "step": 43450 }, { "epoch": 0.6670247870462742, "grad_norm": 0.3480779230594635, "learning_rate": 1.6857868677030205e-05, "loss": 0.0481, "step": 43460 }, { "epoch": 0.6671782672089632, "grad_norm": 0.583756685256958, "learning_rate": 1.6855918841773393e-05, "loss": 0.0496, "step": 43470 }, { "epoch": 0.6673317473716522, "grad_norm": 0.4915943443775177, "learning_rate": 1.685396851456212e-05, "loss": 0.0311, "step": 43480 }, { "epoch": 0.6674852275343411, "grad_norm": 0.5041594505310059, "learning_rate": 1.6852017695536338e-05, "loss": 0.0502, "step": 43490 }, { "epoch": 0.6676387076970302, "grad_norm": 0.4311579465866089, "learning_rate": 1.6850066384836026e-05, "loss": 0.0327, "step": 43500 }, { "epoch": 0.6677921878597192, "grad_norm": 0.45746874809265137, "learning_rate": 1.68481145826012e-05, "loss": 0.0449, "step": 43510 }, { "epoch": 0.6679456680224081, "grad_norm": 0.3602045178413391, "learning_rate": 1.684616228897192e-05, "loss": 0.0434, "step": 43520 }, { "epoch": 0.6680991481850971, "grad_norm": 0.48549768328666687, "learning_rate": 1.6844209504088275e-05, "loss": 0.0431, "step": 43530 }, { "epoch": 0.6682526283477861, "grad_norm": 0.4236634075641632, "learning_rate": 1.6842256228090387e-05, "loss": 0.038, "step": 43540 }, { "epoch": 0.668406108510475, "grad_norm": 0.27687087655067444, "learning_rate": 1.6840302461118415e-05, "loss": 0.0339, "step": 43550 }, { "epoch": 0.668559588673164, "grad_norm": 0.5409709215164185, "learning_rate": 1.6838348203312555e-05, "loss": 0.0407, "step": 43560 }, { "epoch": 0.668713068835853, "grad_norm": 0.6699483394622803, "learning_rate": 1.683639345481303e-05, "loss": 0.0312, "step": 43570 }, { "epoch": 0.6688665489985419, "grad_norm": 0.3835989534854889, "learning_rate": 1.6834438215760122e-05, "loss": 0.0363, "step": 43580 }, { "epoch": 0.6690200291612309, "grad_norm": 0.35425737500190735, "learning_rate": 1.6832482486294117e-05, "loss": 0.0406, "step": 43590 }, { "epoch": 0.6691735093239198, "grad_norm": 0.4302813410758972, "learning_rate": 1.6830526266555355e-05, "loss": 0.0399, "step": 43600 }, { "epoch": 0.6693269894866088, "grad_norm": 0.5580951571464539, "learning_rate": 1.6828569556684204e-05, "loss": 0.0394, "step": 43610 }, { "epoch": 0.6694804696492979, "grad_norm": 0.47235602140426636, "learning_rate": 1.6826612356821074e-05, "loss": 0.0413, "step": 43620 }, { "epoch": 0.6696339498119868, "grad_norm": 0.49271610379219055, "learning_rate": 1.6824654667106404e-05, "loss": 0.0489, "step": 43630 }, { "epoch": 0.6697874299746758, "grad_norm": 0.36174046993255615, "learning_rate": 1.6822696487680676e-05, "loss": 0.0372, "step": 43640 }, { "epoch": 0.6699409101373648, "grad_norm": 0.5541732311248779, "learning_rate": 1.6820737818684395e-05, "loss": 0.0416, "step": 43650 }, { "epoch": 0.6700943903000537, "grad_norm": 0.3295765817165375, "learning_rate": 1.681877866025811e-05, "loss": 0.0377, "step": 43660 }, { "epoch": 0.6702478704627427, "grad_norm": 0.42803406715393066, "learning_rate": 1.68168190125424e-05, "loss": 0.0385, "step": 43670 }, { "epoch": 0.6704013506254317, "grad_norm": 0.4523228704929352, "learning_rate": 1.6814858875677883e-05, "loss": 0.0346, "step": 43680 }, { "epoch": 0.6705548307881206, "grad_norm": 0.36489635705947876, "learning_rate": 1.6812898249805213e-05, "loss": 0.0358, "step": 43690 }, { "epoch": 0.6707083109508096, "grad_norm": 0.4474121034145355, "learning_rate": 1.6810937135065075e-05, "loss": 0.0356, "step": 43700 }, { "epoch": 0.6708617911134985, "grad_norm": 0.5156313180923462, "learning_rate": 1.6808975531598192e-05, "loss": 0.0433, "step": 43710 }, { "epoch": 0.6710152712761875, "grad_norm": 0.41968655586242676, "learning_rate": 1.680701343954532e-05, "loss": 0.0328, "step": 43720 }, { "epoch": 0.6711687514388766, "grad_norm": 0.5127241611480713, "learning_rate": 1.680505085904726e-05, "loss": 0.0407, "step": 43730 }, { "epoch": 0.6713222316015655, "grad_norm": 0.37235820293426514, "learning_rate": 1.6803087790244822e-05, "loss": 0.0575, "step": 43740 }, { "epoch": 0.6714757117642545, "grad_norm": 0.4171707034111023, "learning_rate": 1.6801124233278884e-05, "loss": 0.0474, "step": 43750 }, { "epoch": 0.6716291919269435, "grad_norm": 0.4291827976703644, "learning_rate": 1.6799160188290332e-05, "loss": 0.0414, "step": 43760 }, { "epoch": 0.6717826720896324, "grad_norm": 0.36598852276802063, "learning_rate": 1.6797195655420104e-05, "loss": 0.0479, "step": 43770 }, { "epoch": 0.6719361522523214, "grad_norm": 0.4505510628223419, "learning_rate": 1.6795230634809168e-05, "loss": 0.0357, "step": 43780 }, { "epoch": 0.6720896324150104, "grad_norm": 0.36764097213745117, "learning_rate": 1.6793265126598525e-05, "loss": 0.0377, "step": 43790 }, { "epoch": 0.6722431125776993, "grad_norm": 0.3864659070968628, "learning_rate": 1.6791299130929213e-05, "loss": 0.0404, "step": 43800 }, { "epoch": 0.6723965927403883, "grad_norm": 0.5151364207267761, "learning_rate": 1.67893326479423e-05, "loss": 0.0456, "step": 43810 }, { "epoch": 0.6725500729030772, "grad_norm": 0.44492483139038086, "learning_rate": 1.6787365677778904e-05, "loss": 0.039, "step": 43820 }, { "epoch": 0.6727035530657662, "grad_norm": 0.49426916241645813, "learning_rate": 1.6785398220580154e-05, "loss": 0.0411, "step": 43830 }, { "epoch": 0.6728570332284552, "grad_norm": 0.23597663640975952, "learning_rate": 1.678343027648724e-05, "loss": 0.0403, "step": 43840 }, { "epoch": 0.6730105133911441, "grad_norm": 0.36357614398002625, "learning_rate": 1.678146184564136e-05, "loss": 0.0378, "step": 43850 }, { "epoch": 0.6731639935538332, "grad_norm": 0.28981682658195496, "learning_rate": 1.677949292818377e-05, "loss": 0.0384, "step": 43860 }, { "epoch": 0.6733174737165222, "grad_norm": 0.5049893856048584, "learning_rate": 1.6777523524255755e-05, "loss": 0.0449, "step": 43870 }, { "epoch": 0.6734709538792111, "grad_norm": 0.4048236906528473, "learning_rate": 1.6775553633998625e-05, "loss": 0.0383, "step": 43880 }, { "epoch": 0.6736244340419001, "grad_norm": 0.43588873744010925, "learning_rate": 1.6773583257553735e-05, "loss": 0.0388, "step": 43890 }, { "epoch": 0.6737779142045891, "grad_norm": 0.5769848227500916, "learning_rate": 1.6771612395062472e-05, "loss": 0.0349, "step": 43900 }, { "epoch": 0.673931394367278, "grad_norm": 0.4418486952781677, "learning_rate": 1.6769641046666252e-05, "loss": 0.0386, "step": 43910 }, { "epoch": 0.674084874529967, "grad_norm": 0.3978218734264374, "learning_rate": 1.676766921250654e-05, "loss": 0.0387, "step": 43920 }, { "epoch": 0.674238354692656, "grad_norm": 0.49625077843666077, "learning_rate": 1.6765696892724822e-05, "loss": 0.0416, "step": 43930 }, { "epoch": 0.6743918348553449, "grad_norm": 0.36527079343795776, "learning_rate": 1.6763724087462622e-05, "loss": 0.0374, "step": 43940 }, { "epoch": 0.6745453150180339, "grad_norm": 0.57912278175354, "learning_rate": 1.6761750796861508e-05, "loss": 0.0342, "step": 43950 }, { "epoch": 0.6746987951807228, "grad_norm": 0.41953474283218384, "learning_rate": 1.6759777021063072e-05, "loss": 0.047, "step": 43960 }, { "epoch": 0.6748522753434119, "grad_norm": 0.44535914063453674, "learning_rate": 1.6757802760208942e-05, "loss": 0.039, "step": 43970 }, { "epoch": 0.6750057555061009, "grad_norm": 0.39625677466392517, "learning_rate": 1.6755828014440787e-05, "loss": 0.0399, "step": 43980 }, { "epoch": 0.6751592356687898, "grad_norm": 0.49270907044410706, "learning_rate": 1.6753852783900306e-05, "loss": 0.0418, "step": 43990 }, { "epoch": 0.6753127158314788, "grad_norm": 0.28598347306251526, "learning_rate": 1.6751877068729238e-05, "loss": 0.0394, "step": 44000 }, { "epoch": 0.6754661959941678, "grad_norm": 0.4506230354309082, "learning_rate": 1.6749900869069343e-05, "loss": 0.0327, "step": 44010 }, { "epoch": 0.6756196761568567, "grad_norm": 0.3039160668849945, "learning_rate": 1.6747924185062433e-05, "loss": 0.0354, "step": 44020 }, { "epoch": 0.6757731563195457, "grad_norm": 0.4197687804698944, "learning_rate": 1.674594701685035e-05, "loss": 0.0328, "step": 44030 }, { "epoch": 0.6759266364822347, "grad_norm": 0.6072009205818176, "learning_rate": 1.6743969364574957e-05, "loss": 0.0356, "step": 44040 }, { "epoch": 0.6760801166449236, "grad_norm": 0.44521719217300415, "learning_rate": 1.674199122837817e-05, "loss": 0.0421, "step": 44050 }, { "epoch": 0.6762335968076126, "grad_norm": 0.47297203540802, "learning_rate": 1.6740012608401938e-05, "loss": 0.043, "step": 44060 }, { "epoch": 0.6763870769703015, "grad_norm": 0.5194669365882874, "learning_rate": 1.6738033504788226e-05, "loss": 0.0385, "step": 44070 }, { "epoch": 0.6765405571329905, "grad_norm": 0.39703643321990967, "learning_rate": 1.6736053917679058e-05, "loss": 0.0389, "step": 44080 }, { "epoch": 0.6766940372956796, "grad_norm": 0.38060835003852844, "learning_rate": 1.6734073847216478e-05, "loss": 0.0362, "step": 44090 }, { "epoch": 0.6768475174583685, "grad_norm": 0.5420107841491699, "learning_rate": 1.6732093293542564e-05, "loss": 0.0427, "step": 44100 }, { "epoch": 0.6770009976210575, "grad_norm": 0.41184234619140625, "learning_rate": 1.673011225679944e-05, "loss": 0.0369, "step": 44110 }, { "epoch": 0.6771544777837465, "grad_norm": 0.6764559149742126, "learning_rate": 1.6728130737129252e-05, "loss": 0.0539, "step": 44120 }, { "epoch": 0.6773079579464354, "grad_norm": 0.40763112902641296, "learning_rate": 1.672614873467419e-05, "loss": 0.0373, "step": 44130 }, { "epoch": 0.6774614381091244, "grad_norm": 0.45891234278678894, "learning_rate": 1.672416624957647e-05, "loss": 0.0382, "step": 44140 }, { "epoch": 0.6776149182718134, "grad_norm": 0.44713565707206726, "learning_rate": 1.6722183281978356e-05, "loss": 0.0362, "step": 44150 }, { "epoch": 0.6777683984345023, "grad_norm": 0.3553842008113861, "learning_rate": 1.672019983202213e-05, "loss": 0.0425, "step": 44160 }, { "epoch": 0.6779218785971913, "grad_norm": 0.3334140181541443, "learning_rate": 1.6718215899850127e-05, "loss": 0.0401, "step": 44170 }, { "epoch": 0.6780753587598802, "grad_norm": 0.2947387993335724, "learning_rate": 1.6716231485604692e-05, "loss": 0.0338, "step": 44180 }, { "epoch": 0.6782288389225692, "grad_norm": 0.627781331539154, "learning_rate": 1.6714246589428228e-05, "loss": 0.037, "step": 44190 }, { "epoch": 0.6783823190852583, "grad_norm": 0.5946485996246338, "learning_rate": 1.671226121146316e-05, "loss": 0.0334, "step": 44200 }, { "epoch": 0.6785357992479472, "grad_norm": 0.43518340587615967, "learning_rate": 1.6710275351851953e-05, "loss": 0.0461, "step": 44210 }, { "epoch": 0.6786892794106362, "grad_norm": 0.688674807548523, "learning_rate": 1.670828901073711e-05, "loss": 0.046, "step": 44220 }, { "epoch": 0.6788427595733252, "grad_norm": 0.27524876594543457, "learning_rate": 1.670630218826115e-05, "loss": 0.0329, "step": 44230 }, { "epoch": 0.6789962397360141, "grad_norm": 0.5700685977935791, "learning_rate": 1.670431488456665e-05, "loss": 0.04, "step": 44240 }, { "epoch": 0.6791497198987031, "grad_norm": 0.45515429973602295, "learning_rate": 1.6702327099796214e-05, "loss": 0.032, "step": 44250 }, { "epoch": 0.6793032000613921, "grad_norm": 0.5612783432006836, "learning_rate": 1.670033883409247e-05, "loss": 0.0504, "step": 44260 }, { "epoch": 0.679456680224081, "grad_norm": 0.39384496212005615, "learning_rate": 1.669835008759809e-05, "loss": 0.0322, "step": 44270 }, { "epoch": 0.67961016038677, "grad_norm": 0.33020269870758057, "learning_rate": 1.669636086045578e-05, "loss": 0.0339, "step": 44280 }, { "epoch": 0.679763640549459, "grad_norm": 0.4072957932949066, "learning_rate": 1.669437115280828e-05, "loss": 0.0452, "step": 44290 }, { "epoch": 0.6799171207121479, "grad_norm": 0.666612446308136, "learning_rate": 1.669238096479836e-05, "loss": 0.0405, "step": 44300 }, { "epoch": 0.680070600874837, "grad_norm": 0.34686118364334106, "learning_rate": 1.669039029656883e-05, "loss": 0.0431, "step": 44310 }, { "epoch": 0.6802240810375259, "grad_norm": 0.39191433787345886, "learning_rate": 1.668839914826254e-05, "loss": 0.0352, "step": 44320 }, { "epoch": 0.6803775612002149, "grad_norm": 0.5885943174362183, "learning_rate": 1.6686407520022355e-05, "loss": 0.0423, "step": 44330 }, { "epoch": 0.6805310413629039, "grad_norm": 0.5191898345947266, "learning_rate": 1.6684415411991198e-05, "loss": 0.0348, "step": 44340 }, { "epoch": 0.6806845215255928, "grad_norm": 0.5520237684249878, "learning_rate": 1.668242282431201e-05, "loss": 0.0367, "step": 44350 }, { "epoch": 0.6808380016882818, "grad_norm": 0.47078630328178406, "learning_rate": 1.668042975712777e-05, "loss": 0.0387, "step": 44360 }, { "epoch": 0.6809914818509708, "grad_norm": 0.4751673936843872, "learning_rate": 1.667843621058149e-05, "loss": 0.0457, "step": 44370 }, { "epoch": 0.6811449620136597, "grad_norm": 0.5083388686180115, "learning_rate": 1.6676442184816223e-05, "loss": 0.0361, "step": 44380 }, { "epoch": 0.6812984421763487, "grad_norm": 0.5175982117652893, "learning_rate": 1.6674447679975058e-05, "loss": 0.0499, "step": 44390 }, { "epoch": 0.6814519223390377, "grad_norm": 0.3976042568683624, "learning_rate": 1.6672452696201108e-05, "loss": 0.0507, "step": 44400 }, { "epoch": 0.6816054025017266, "grad_norm": 0.4635325074195862, "learning_rate": 1.6670457233637523e-05, "loss": 0.0446, "step": 44410 }, { "epoch": 0.6817588826644156, "grad_norm": 0.5131575465202332, "learning_rate": 1.666846129242749e-05, "loss": 0.0338, "step": 44420 }, { "epoch": 0.6819123628271045, "grad_norm": 0.35238075256347656, "learning_rate": 1.666646487271424e-05, "loss": 0.0428, "step": 44430 }, { "epoch": 0.6820658429897936, "grad_norm": 0.5881467461585999, "learning_rate": 1.666446797464101e-05, "loss": 0.0407, "step": 44440 }, { "epoch": 0.6822193231524826, "grad_norm": 0.3335917592048645, "learning_rate": 1.666247059835111e-05, "loss": 0.0468, "step": 44450 }, { "epoch": 0.6823728033151715, "grad_norm": 0.6299095153808594, "learning_rate": 1.666047274398785e-05, "loss": 0.0381, "step": 44460 }, { "epoch": 0.6825262834778605, "grad_norm": 0.4754902124404907, "learning_rate": 1.6658474411694593e-05, "loss": 0.0451, "step": 44470 }, { "epoch": 0.6826797636405495, "grad_norm": 0.42317095398902893, "learning_rate": 1.6656475601614733e-05, "loss": 0.0452, "step": 44480 }, { "epoch": 0.6828332438032384, "grad_norm": 0.28799471259117126, "learning_rate": 1.6654476313891693e-05, "loss": 0.0335, "step": 44490 }, { "epoch": 0.6829867239659274, "grad_norm": 0.30190175771713257, "learning_rate": 1.665247654866894e-05, "loss": 0.0595, "step": 44500 }, { "epoch": 0.6831402041286164, "grad_norm": 0.371098130941391, "learning_rate": 1.6650476306089962e-05, "loss": 0.0334, "step": 44510 }, { "epoch": 0.6832936842913053, "grad_norm": 0.3428143560886383, "learning_rate": 1.6648475586298296e-05, "loss": 0.0386, "step": 44520 }, { "epoch": 0.6834471644539943, "grad_norm": 0.4389808177947998, "learning_rate": 1.66464743894375e-05, "loss": 0.0446, "step": 44530 }, { "epoch": 0.6836006446166832, "grad_norm": 0.41709208488464355, "learning_rate": 1.6644472715651178e-05, "loss": 0.0305, "step": 44540 }, { "epoch": 0.6837541247793723, "grad_norm": 0.3457297086715698, "learning_rate": 1.664247056508296e-05, "loss": 0.0467, "step": 44550 }, { "epoch": 0.6839076049420613, "grad_norm": 0.7486692667007446, "learning_rate": 1.6640467937876507e-05, "loss": 0.0422, "step": 44560 }, { "epoch": 0.6840610851047502, "grad_norm": 0.5149848461151123, "learning_rate": 1.663846483417553e-05, "loss": 0.0432, "step": 44570 }, { "epoch": 0.6842145652674392, "grad_norm": 0.40121912956237793, "learning_rate": 1.6636461254123758e-05, "loss": 0.04, "step": 44580 }, { "epoch": 0.6843680454301282, "grad_norm": 0.3853830099105835, "learning_rate": 1.6634457197864962e-05, "loss": 0.0494, "step": 44590 }, { "epoch": 0.6845215255928171, "grad_norm": 0.43230804800987244, "learning_rate": 1.6632452665542944e-05, "loss": 0.0531, "step": 44600 }, { "epoch": 0.6846750057555061, "grad_norm": 0.5793358087539673, "learning_rate": 1.6630447657301542e-05, "loss": 0.0394, "step": 44610 }, { "epoch": 0.6848284859181951, "grad_norm": 0.49980419874191284, "learning_rate": 1.662844217328463e-05, "loss": 0.0369, "step": 44620 }, { "epoch": 0.684981966080884, "grad_norm": 0.4014466404914856, "learning_rate": 1.662643621363611e-05, "loss": 0.0462, "step": 44630 }, { "epoch": 0.685135446243573, "grad_norm": 0.3714095652103424, "learning_rate": 1.662442977849992e-05, "loss": 0.0355, "step": 44640 }, { "epoch": 0.685288926406262, "grad_norm": 0.4700325131416321, "learning_rate": 1.6622422868020044e-05, "loss": 0.0368, "step": 44650 }, { "epoch": 0.685442406568951, "grad_norm": 0.4381849765777588, "learning_rate": 1.6620415482340485e-05, "loss": 0.0429, "step": 44660 }, { "epoch": 0.68559588673164, "grad_norm": 0.44626331329345703, "learning_rate": 1.661840762160528e-05, "loss": 0.0436, "step": 44670 }, { "epoch": 0.6857493668943289, "grad_norm": 0.587874710559845, "learning_rate": 1.6616399285958512e-05, "loss": 0.0369, "step": 44680 }, { "epoch": 0.6859028470570179, "grad_norm": 0.4049529433250427, "learning_rate": 1.6614390475544287e-05, "loss": 0.0449, "step": 44690 }, { "epoch": 0.6860563272197069, "grad_norm": 0.615972638130188, "learning_rate": 1.661238119050675e-05, "loss": 0.0418, "step": 44700 }, { "epoch": 0.6862098073823958, "grad_norm": 0.4573531150817871, "learning_rate": 1.6610371430990086e-05, "loss": 0.0404, "step": 44710 }, { "epoch": 0.6863632875450848, "grad_norm": 0.38976243138313293, "learning_rate": 1.6608361197138504e-05, "loss": 0.0421, "step": 44720 }, { "epoch": 0.6865167677077738, "grad_norm": 0.42834874987602234, "learning_rate": 1.6606350489096245e-05, "loss": 0.04, "step": 44730 }, { "epoch": 0.6866702478704627, "grad_norm": 0.3221938908100128, "learning_rate": 1.6604339307007596e-05, "loss": 0.0354, "step": 44740 }, { "epoch": 0.6868237280331517, "grad_norm": 0.43064001202583313, "learning_rate": 1.6602327651016874e-05, "loss": 0.0423, "step": 44750 }, { "epoch": 0.6869772081958407, "grad_norm": 0.37248027324676514, "learning_rate": 1.660031552126842e-05, "loss": 0.0366, "step": 44760 }, { "epoch": 0.6871306883585296, "grad_norm": 0.4365268647670746, "learning_rate": 1.6598302917906622e-05, "loss": 0.0513, "step": 44770 }, { "epoch": 0.6872841685212187, "grad_norm": 0.5761393904685974, "learning_rate": 1.6596289841075896e-05, "loss": 0.0423, "step": 44780 }, { "epoch": 0.6874376486839076, "grad_norm": 0.36978453397750854, "learning_rate": 1.6594276290920688e-05, "loss": 0.0348, "step": 44790 }, { "epoch": 0.6875911288465966, "grad_norm": 0.33359330892562866, "learning_rate": 1.659226226758549e-05, "loss": 0.039, "step": 44800 }, { "epoch": 0.6877446090092856, "grad_norm": 0.31670770049095154, "learning_rate": 1.6590247771214816e-05, "loss": 0.0345, "step": 44810 }, { "epoch": 0.6878980891719745, "grad_norm": 0.4444960951805115, "learning_rate": 1.6588232801953223e-05, "loss": 0.0379, "step": 44820 }, { "epoch": 0.6880515693346635, "grad_norm": 0.44299519062042236, "learning_rate": 1.6586217359945293e-05, "loss": 0.0317, "step": 44830 }, { "epoch": 0.6882050494973525, "grad_norm": 0.4344033896923065, "learning_rate": 1.6584201445335645e-05, "loss": 0.0384, "step": 44840 }, { "epoch": 0.6883585296600414, "grad_norm": 0.37517228722572327, "learning_rate": 1.6582185058268936e-05, "loss": 0.0443, "step": 44850 }, { "epoch": 0.6885120098227304, "grad_norm": 0.3780937194824219, "learning_rate": 1.658016819888985e-05, "loss": 0.0296, "step": 44860 }, { "epoch": 0.6886654899854194, "grad_norm": 0.37605616450309753, "learning_rate": 1.6578150867343123e-05, "loss": 0.0347, "step": 44870 }, { "epoch": 0.6888189701481083, "grad_norm": 0.3842392563819885, "learning_rate": 1.6576133063773496e-05, "loss": 0.0313, "step": 44880 }, { "epoch": 0.6889724503107973, "grad_norm": 0.42453739047050476, "learning_rate": 1.6574114788325767e-05, "loss": 0.0415, "step": 44890 }, { "epoch": 0.6891259304734862, "grad_norm": 0.4996246099472046, "learning_rate": 1.6572096041144753e-05, "loss": 0.0379, "step": 44900 }, { "epoch": 0.6892794106361753, "grad_norm": 0.501021683216095, "learning_rate": 1.6570076822375317e-05, "loss": 0.043, "step": 44910 }, { "epoch": 0.6894328907988643, "grad_norm": 0.4408095180988312, "learning_rate": 1.6568057132162348e-05, "loss": 0.0473, "step": 44920 }, { "epoch": 0.6895863709615532, "grad_norm": 0.2861311435699463, "learning_rate": 1.656603697065077e-05, "loss": 0.0358, "step": 44930 }, { "epoch": 0.6897398511242422, "grad_norm": 0.3998424708843231, "learning_rate": 1.6564016337985546e-05, "loss": 0.0326, "step": 44940 }, { "epoch": 0.6898933312869312, "grad_norm": 0.45682284235954285, "learning_rate": 1.656199523431167e-05, "loss": 0.0385, "step": 44950 }, { "epoch": 0.6900468114496201, "grad_norm": 0.5777379870414734, "learning_rate": 1.6559973659774163e-05, "loss": 0.0414, "step": 44960 }, { "epoch": 0.6902002916123091, "grad_norm": 0.4823199510574341, "learning_rate": 1.655795161451809e-05, "loss": 0.0355, "step": 44970 }, { "epoch": 0.6903537717749981, "grad_norm": 0.4897819757461548, "learning_rate": 1.655592909868854e-05, "loss": 0.0456, "step": 44980 }, { "epoch": 0.690507251937687, "grad_norm": 0.31522807478904724, "learning_rate": 1.655390611243064e-05, "loss": 0.0292, "step": 44990 }, { "epoch": 0.690660732100376, "grad_norm": 0.35029321908950806, "learning_rate": 1.6551882655889565e-05, "loss": 0.0515, "step": 45000 }, { "epoch": 0.690814212263065, "grad_norm": 0.6349129676818848, "learning_rate": 1.65498587292105e-05, "loss": 0.0434, "step": 45010 }, { "epoch": 0.690967692425754, "grad_norm": 0.29258978366851807, "learning_rate": 1.6547834332538668e-05, "loss": 0.0368, "step": 45020 }, { "epoch": 0.691121172588443, "grad_norm": 0.4176761209964752, "learning_rate": 1.6545809466019345e-05, "loss": 0.048, "step": 45030 }, { "epoch": 0.6912746527511319, "grad_norm": 0.36657044291496277, "learning_rate": 1.654378412979782e-05, "loss": 0.0362, "step": 45040 }, { "epoch": 0.6914281329138209, "grad_norm": 0.3934519588947296, "learning_rate": 1.654175832401943e-05, "loss": 0.033, "step": 45050 }, { "epoch": 0.6915816130765099, "grad_norm": 0.4823852777481079, "learning_rate": 1.653973204882953e-05, "loss": 0.0447, "step": 45060 }, { "epoch": 0.6917350932391988, "grad_norm": 0.3813953697681427, "learning_rate": 1.6537705304373527e-05, "loss": 0.038, "step": 45070 }, { "epoch": 0.6918885734018878, "grad_norm": 0.4356282353401184, "learning_rate": 1.6535678090796844e-05, "loss": 0.0316, "step": 45080 }, { "epoch": 0.6920420535645768, "grad_norm": 0.651024580001831, "learning_rate": 1.653365040824495e-05, "loss": 0.0432, "step": 45090 }, { "epoch": 0.6921955337272657, "grad_norm": 0.526043176651001, "learning_rate": 1.6531622256863344e-05, "loss": 0.0309, "step": 45100 }, { "epoch": 0.6923490138899547, "grad_norm": 0.2942568361759186, "learning_rate": 1.652959363679756e-05, "loss": 0.0446, "step": 45110 }, { "epoch": 0.6925024940526437, "grad_norm": 0.40194353461265564, "learning_rate": 1.652756454819316e-05, "loss": 0.0304, "step": 45120 }, { "epoch": 0.6926559742153326, "grad_norm": 0.35432934761047363, "learning_rate": 1.6525534991195745e-05, "loss": 0.0357, "step": 45130 }, { "epoch": 0.6928094543780217, "grad_norm": 0.42622190713882446, "learning_rate": 1.652350496595095e-05, "loss": 0.0418, "step": 45140 }, { "epoch": 0.6929629345407106, "grad_norm": 0.36904776096343994, "learning_rate": 1.6521474472604442e-05, "loss": 0.0373, "step": 45150 }, { "epoch": 0.6931164147033996, "grad_norm": 0.7053045034408569, "learning_rate": 1.6519443511301915e-05, "loss": 0.0335, "step": 45160 }, { "epoch": 0.6932698948660886, "grad_norm": 0.3387088477611542, "learning_rate": 1.6517412082189116e-05, "loss": 0.0362, "step": 45170 }, { "epoch": 0.6934233750287775, "grad_norm": 0.4013192057609558, "learning_rate": 1.65153801854118e-05, "loss": 0.0377, "step": 45180 }, { "epoch": 0.6935768551914665, "grad_norm": 0.3101034462451935, "learning_rate": 1.651334782111577e-05, "loss": 0.0356, "step": 45190 }, { "epoch": 0.6937303353541555, "grad_norm": 0.38988152146339417, "learning_rate": 1.651131498944687e-05, "loss": 0.0421, "step": 45200 }, { "epoch": 0.6938838155168444, "grad_norm": 0.4588204324245453, "learning_rate": 1.6509281690550955e-05, "loss": 0.0432, "step": 45210 }, { "epoch": 0.6940372956795334, "grad_norm": 0.3928549587726593, "learning_rate": 1.650724792457394e-05, "loss": 0.0373, "step": 45220 }, { "epoch": 0.6941907758422224, "grad_norm": 0.4640406370162964, "learning_rate": 1.650521369166175e-05, "loss": 0.0371, "step": 45230 }, { "epoch": 0.6943442560049113, "grad_norm": 0.6521461606025696, "learning_rate": 1.6503178991960354e-05, "loss": 0.0449, "step": 45240 }, { "epoch": 0.6944977361676004, "grad_norm": 0.2903396189212799, "learning_rate": 1.650114382561576e-05, "loss": 0.037, "step": 45250 }, { "epoch": 0.6946512163302893, "grad_norm": 0.3860075771808624, "learning_rate": 1.6499108192774004e-05, "loss": 0.038, "step": 45260 }, { "epoch": 0.6948046964929783, "grad_norm": 0.5227579474449158, "learning_rate": 1.6497072093581154e-05, "loss": 0.0369, "step": 45270 }, { "epoch": 0.6949581766556673, "grad_norm": 0.24483995139598846, "learning_rate": 1.6495035528183304e-05, "loss": 0.0429, "step": 45280 }, { "epoch": 0.6951116568183562, "grad_norm": 0.41829249262809753, "learning_rate": 1.6492998496726606e-05, "loss": 0.035, "step": 45290 }, { "epoch": 0.6952651369810452, "grad_norm": 0.44071903824806213, "learning_rate": 1.6490960999357216e-05, "loss": 0.0376, "step": 45300 }, { "epoch": 0.6954186171437342, "grad_norm": 0.33310186862945557, "learning_rate": 1.648892303622134e-05, "loss": 0.0403, "step": 45310 }, { "epoch": 0.6955720973064231, "grad_norm": 0.40235769748687744, "learning_rate": 1.6486884607465222e-05, "loss": 0.0404, "step": 45320 }, { "epoch": 0.6957255774691121, "grad_norm": 0.36601895093917847, "learning_rate": 1.6484845713235123e-05, "loss": 0.0401, "step": 45330 }, { "epoch": 0.6958790576318011, "grad_norm": 0.41242411732673645, "learning_rate": 1.6482806353677347e-05, "loss": 0.0348, "step": 45340 }, { "epoch": 0.69603253779449, "grad_norm": 0.5248878598213196, "learning_rate": 1.648076652893824e-05, "loss": 0.042, "step": 45350 }, { "epoch": 0.696186017957179, "grad_norm": 0.5575467348098755, "learning_rate": 1.647872623916416e-05, "loss": 0.0489, "step": 45360 }, { "epoch": 0.6963394981198681, "grad_norm": 0.4254952073097229, "learning_rate": 1.6476685484501514e-05, "loss": 0.0394, "step": 45370 }, { "epoch": 0.696492978282557, "grad_norm": 0.5431193709373474, "learning_rate": 1.6474644265096745e-05, "loss": 0.0385, "step": 45380 }, { "epoch": 0.696646458445246, "grad_norm": 0.4557267129421234, "learning_rate": 1.6472602581096316e-05, "loss": 0.0582, "step": 45390 }, { "epoch": 0.6967999386079349, "grad_norm": 0.4909428656101227, "learning_rate": 1.647056043264673e-05, "loss": 0.0498, "step": 45400 }, { "epoch": 0.6969534187706239, "grad_norm": 0.35891640186309814, "learning_rate": 1.6468517819894533e-05, "loss": 0.0384, "step": 45410 }, { "epoch": 0.6971068989333129, "grad_norm": 0.41081857681274414, "learning_rate": 1.6466474742986284e-05, "loss": 0.0424, "step": 45420 }, { "epoch": 0.6972603790960018, "grad_norm": 0.4369806945323944, "learning_rate": 1.646443120206859e-05, "loss": 0.0389, "step": 45430 }, { "epoch": 0.6974138592586908, "grad_norm": 0.3909912407398224, "learning_rate": 1.646238719728809e-05, "loss": 0.0403, "step": 45440 }, { "epoch": 0.6975673394213798, "grad_norm": 0.3129705488681793, "learning_rate": 1.6460342728791455e-05, "loss": 0.0382, "step": 45450 }, { "epoch": 0.6977208195840687, "grad_norm": 0.36705586314201355, "learning_rate": 1.6458297796725383e-05, "loss": 0.0521, "step": 45460 }, { "epoch": 0.6978742997467577, "grad_norm": 0.3758176863193512, "learning_rate": 1.645625240123661e-05, "loss": 0.0332, "step": 45470 }, { "epoch": 0.6980277799094468, "grad_norm": 0.4201083779335022, "learning_rate": 1.6454206542471913e-05, "loss": 0.041, "step": 45480 }, { "epoch": 0.6981812600721357, "grad_norm": 0.3371908962726593, "learning_rate": 1.645216022057809e-05, "loss": 0.0408, "step": 45490 }, { "epoch": 0.6983347402348247, "grad_norm": 0.39380040764808655, "learning_rate": 1.645011343570198e-05, "loss": 0.0415, "step": 45500 }, { "epoch": 0.6984882203975136, "grad_norm": 0.5509623289108276, "learning_rate": 1.6448066187990448e-05, "loss": 0.0469, "step": 45510 }, { "epoch": 0.6986417005602026, "grad_norm": 0.6574108600616455, "learning_rate": 1.64460184775904e-05, "loss": 0.0336, "step": 45520 }, { "epoch": 0.6987951807228916, "grad_norm": 0.24138577282428741, "learning_rate": 1.644397030464877e-05, "loss": 0.035, "step": 45530 }, { "epoch": 0.6989486608855805, "grad_norm": 0.3079555332660675, "learning_rate": 1.644192166931253e-05, "loss": 0.0487, "step": 45540 }, { "epoch": 0.6991021410482695, "grad_norm": 0.3971554934978485, "learning_rate": 1.643987257172868e-05, "loss": 0.0394, "step": 45550 }, { "epoch": 0.6992556212109585, "grad_norm": 0.42205923795700073, "learning_rate": 1.643782301204426e-05, "loss": 0.0413, "step": 45560 }, { "epoch": 0.6994091013736474, "grad_norm": 0.49822181463241577, "learning_rate": 1.6435772990406328e-05, "loss": 0.0388, "step": 45570 }, { "epoch": 0.6995625815363364, "grad_norm": 0.4575061500072479, "learning_rate": 1.6433722506961994e-05, "loss": 0.032, "step": 45580 }, { "epoch": 0.6997160616990254, "grad_norm": 0.3795034885406494, "learning_rate": 1.643167156185839e-05, "loss": 0.039, "step": 45590 }, { "epoch": 0.6998695418617144, "grad_norm": 0.44749435782432556, "learning_rate": 1.6429620155242685e-05, "loss": 0.0429, "step": 45600 }, { "epoch": 0.7000230220244034, "grad_norm": 0.4166125953197479, "learning_rate": 1.642756828726208e-05, "loss": 0.033, "step": 45610 }, { "epoch": 0.7001765021870923, "grad_norm": 0.47038912773132324, "learning_rate": 1.642551595806381e-05, "loss": 0.0393, "step": 45620 }, { "epoch": 0.7003299823497813, "grad_norm": 0.4939087927341461, "learning_rate": 1.6423463167795138e-05, "loss": 0.039, "step": 45630 }, { "epoch": 0.7004834625124703, "grad_norm": 0.4768620729446411, "learning_rate": 1.642140991660337e-05, "loss": 0.0333, "step": 45640 }, { "epoch": 0.7006369426751592, "grad_norm": 0.5722091794013977, "learning_rate": 1.641935620463584e-05, "loss": 0.035, "step": 45650 }, { "epoch": 0.7007904228378482, "grad_norm": 0.5477108955383301, "learning_rate": 1.6417302032039907e-05, "loss": 0.0392, "step": 45660 }, { "epoch": 0.7009439030005372, "grad_norm": 0.5127326846122742, "learning_rate": 1.6415247398962978e-05, "loss": 0.039, "step": 45670 }, { "epoch": 0.7010973831632261, "grad_norm": 0.532807469367981, "learning_rate": 1.641319230555248e-05, "loss": 0.04, "step": 45680 }, { "epoch": 0.7012508633259151, "grad_norm": 0.34739455580711365, "learning_rate": 1.6411136751955882e-05, "loss": 0.0357, "step": 45690 }, { "epoch": 0.7014043434886041, "grad_norm": 0.47190508246421814, "learning_rate": 1.6409080738320687e-05, "loss": 0.0392, "step": 45700 }, { "epoch": 0.701557823651293, "grad_norm": 0.38486728072166443, "learning_rate": 1.640702426479442e-05, "loss": 0.0396, "step": 45710 }, { "epoch": 0.7017113038139821, "grad_norm": 0.3471156358718872, "learning_rate": 1.6404967331524643e-05, "loss": 0.0371, "step": 45720 }, { "epoch": 0.7018647839766711, "grad_norm": 0.3123101592063904, "learning_rate": 1.6402909938658957e-05, "loss": 0.0339, "step": 45730 }, { "epoch": 0.70201826413936, "grad_norm": 0.4412630796432495, "learning_rate": 1.6400852086345e-05, "loss": 0.0302, "step": 45740 }, { "epoch": 0.702171744302049, "grad_norm": 0.7164685726165771, "learning_rate": 1.6398793774730425e-05, "loss": 0.04, "step": 45750 }, { "epoch": 0.7023252244647379, "grad_norm": 0.3159075081348419, "learning_rate": 1.6396735003962933e-05, "loss": 0.0401, "step": 45760 }, { "epoch": 0.7024787046274269, "grad_norm": 0.6463215351104736, "learning_rate": 1.6394675774190254e-05, "loss": 0.0385, "step": 45770 }, { "epoch": 0.7026321847901159, "grad_norm": 0.5210551023483276, "learning_rate": 1.639261608556015e-05, "loss": 0.0311, "step": 45780 }, { "epoch": 0.7027856649528048, "grad_norm": 0.4418541491031647, "learning_rate": 1.6390555938220417e-05, "loss": 0.0361, "step": 45790 }, { "epoch": 0.7029391451154938, "grad_norm": 0.5235570669174194, "learning_rate": 1.6388495332318883e-05, "loss": 0.0494, "step": 45800 }, { "epoch": 0.7030926252781828, "grad_norm": 0.3374834954738617, "learning_rate": 1.6386434268003406e-05, "loss": 0.0347, "step": 45810 }, { "epoch": 0.7032461054408717, "grad_norm": 0.42935898900032043, "learning_rate": 1.6384372745421884e-05, "loss": 0.0408, "step": 45820 }, { "epoch": 0.7033995856035608, "grad_norm": 0.304554283618927, "learning_rate": 1.638231076472224e-05, "loss": 0.0321, "step": 45830 }, { "epoch": 0.7035530657662498, "grad_norm": 0.5634416937828064, "learning_rate": 1.638024832605244e-05, "loss": 0.0402, "step": 45840 }, { "epoch": 0.7037065459289387, "grad_norm": 0.3925313651561737, "learning_rate": 1.637818542956047e-05, "loss": 0.0411, "step": 45850 }, { "epoch": 0.7038600260916277, "grad_norm": 0.40415915846824646, "learning_rate": 1.637612207539436e-05, "loss": 0.0346, "step": 45860 }, { "epoch": 0.7040135062543166, "grad_norm": 0.49363628029823303, "learning_rate": 1.6374058263702164e-05, "loss": 0.0468, "step": 45870 }, { "epoch": 0.7041669864170056, "grad_norm": 0.5064993500709534, "learning_rate": 1.637199399463198e-05, "loss": 0.0408, "step": 45880 }, { "epoch": 0.7043204665796946, "grad_norm": 0.3963421583175659, "learning_rate": 1.6369929268331926e-05, "loss": 0.0358, "step": 45890 }, { "epoch": 0.7044739467423835, "grad_norm": 0.34696856141090393, "learning_rate": 1.6367864084950165e-05, "loss": 0.032, "step": 45900 }, { "epoch": 0.7046274269050725, "grad_norm": 0.42817673087120056, "learning_rate": 1.6365798444634878e-05, "loss": 0.041, "step": 45910 }, { "epoch": 0.7047809070677615, "grad_norm": 0.5227847695350647, "learning_rate": 1.63637323475343e-05, "loss": 0.0438, "step": 45920 }, { "epoch": 0.7049343872304504, "grad_norm": 0.3726543188095093, "learning_rate": 1.636166579379667e-05, "loss": 0.0457, "step": 45930 }, { "epoch": 0.7050878673931394, "grad_norm": 0.4405941069126129, "learning_rate": 1.6359598783570287e-05, "loss": 0.0377, "step": 45940 }, { "epoch": 0.7052413475558285, "grad_norm": 0.3479788303375244, "learning_rate": 1.635753131700347e-05, "loss": 0.0306, "step": 45950 }, { "epoch": 0.7053948277185174, "grad_norm": 0.49553385376930237, "learning_rate": 1.6355463394244572e-05, "loss": 0.0373, "step": 45960 }, { "epoch": 0.7055483078812064, "grad_norm": 0.5716503262519836, "learning_rate": 1.6353395015441978e-05, "loss": 0.0397, "step": 45970 }, { "epoch": 0.7057017880438953, "grad_norm": 0.441648930311203, "learning_rate": 1.6351326180744107e-05, "loss": 0.034, "step": 45980 }, { "epoch": 0.7058552682065843, "grad_norm": 0.48417526483535767, "learning_rate": 1.6349256890299413e-05, "loss": 0.0476, "step": 45990 }, { "epoch": 0.7060087483692733, "grad_norm": 0.29361942410469055, "learning_rate": 1.6347187144256378e-05, "loss": 0.0394, "step": 46000 }, { "epoch": 0.7061622285319622, "grad_norm": 0.45785605907440186, "learning_rate": 1.6345116942763517e-05, "loss": 0.0409, "step": 46010 }, { "epoch": 0.7063157086946512, "grad_norm": 0.337406188249588, "learning_rate": 1.6343046285969387e-05, "loss": 0.0415, "step": 46020 }, { "epoch": 0.7064691888573402, "grad_norm": 0.39987125992774963, "learning_rate": 1.6340975174022563e-05, "loss": 0.0364, "step": 46030 }, { "epoch": 0.7066226690200291, "grad_norm": 0.3116639256477356, "learning_rate": 1.6338903607071666e-05, "loss": 0.0366, "step": 46040 }, { "epoch": 0.7067761491827181, "grad_norm": 0.5580320358276367, "learning_rate": 1.6336831585265337e-05, "loss": 0.0414, "step": 46050 }, { "epoch": 0.7069296293454072, "grad_norm": 0.628587007522583, "learning_rate": 1.633475910875226e-05, "loss": 0.0363, "step": 46060 }, { "epoch": 0.707083109508096, "grad_norm": 0.6285319328308105, "learning_rate": 1.633268617768115e-05, "loss": 0.0392, "step": 46070 }, { "epoch": 0.7072365896707851, "grad_norm": 0.47332796454429626, "learning_rate": 1.633061279220075e-05, "loss": 0.0441, "step": 46080 }, { "epoch": 0.7073900698334741, "grad_norm": 0.3744036853313446, "learning_rate": 1.632853895245984e-05, "loss": 0.0361, "step": 46090 }, { "epoch": 0.707543549996163, "grad_norm": 0.47776612639427185, "learning_rate": 1.6326464658607228e-05, "loss": 0.0411, "step": 46100 }, { "epoch": 0.707697030158852, "grad_norm": 0.4528729319572449, "learning_rate": 1.632438991079176e-05, "loss": 0.0432, "step": 46110 }, { "epoch": 0.7078505103215409, "grad_norm": 0.44113031029701233, "learning_rate": 1.632231470916231e-05, "loss": 0.0439, "step": 46120 }, { "epoch": 0.7080039904842299, "grad_norm": 0.6000691056251526, "learning_rate": 1.6320239053867785e-05, "loss": 0.0449, "step": 46130 }, { "epoch": 0.7081574706469189, "grad_norm": 0.3912647068500519, "learning_rate": 1.6318162945057135e-05, "loss": 0.0338, "step": 46140 }, { "epoch": 0.7083109508096078, "grad_norm": 0.40230658650398254, "learning_rate": 1.631608638287932e-05, "loss": 0.0352, "step": 46150 }, { "epoch": 0.7084644309722968, "grad_norm": 0.34485727548599243, "learning_rate": 1.6314009367483358e-05, "loss": 0.0279, "step": 46160 }, { "epoch": 0.7086179111349858, "grad_norm": 0.48651519417762756, "learning_rate": 1.631193189901828e-05, "loss": 0.0474, "step": 46170 }, { "epoch": 0.7087713912976747, "grad_norm": 0.528540313243866, "learning_rate": 1.6309853977633162e-05, "loss": 0.0367, "step": 46180 }, { "epoch": 0.7089248714603638, "grad_norm": 0.6708813309669495, "learning_rate": 1.6307775603477105e-05, "loss": 0.0496, "step": 46190 }, { "epoch": 0.7090783516230528, "grad_norm": 0.3766978681087494, "learning_rate": 1.6305696776699246e-05, "loss": 0.0458, "step": 46200 }, { "epoch": 0.7092318317857417, "grad_norm": 0.43703895807266235, "learning_rate": 1.6303617497448752e-05, "loss": 0.0454, "step": 46210 }, { "epoch": 0.7093853119484307, "grad_norm": 0.3808724582195282, "learning_rate": 1.630153776587483e-05, "loss": 0.0388, "step": 46220 }, { "epoch": 0.7095387921111196, "grad_norm": 0.473228394985199, "learning_rate": 1.6299457582126704e-05, "loss": 0.038, "step": 46230 }, { "epoch": 0.7096922722738086, "grad_norm": 0.4431702494621277, "learning_rate": 1.629737694635365e-05, "loss": 0.0337, "step": 46240 }, { "epoch": 0.7098457524364976, "grad_norm": 0.5886850357055664, "learning_rate": 1.6295295858704956e-05, "loss": 0.0341, "step": 46250 }, { "epoch": 0.7099992325991865, "grad_norm": 0.4094930589199066, "learning_rate": 1.629321431932996e-05, "loss": 0.0392, "step": 46260 }, { "epoch": 0.7101527127618755, "grad_norm": 0.521419107913971, "learning_rate": 1.629113232837803e-05, "loss": 0.0471, "step": 46270 }, { "epoch": 0.7103061929245645, "grad_norm": 0.32038015127182007, "learning_rate": 1.628904988599855e-05, "loss": 0.0391, "step": 46280 }, { "epoch": 0.7104596730872534, "grad_norm": 0.3574281632900238, "learning_rate": 1.6286966992340953e-05, "loss": 0.034, "step": 46290 }, { "epoch": 0.7106131532499425, "grad_norm": 0.4030381441116333, "learning_rate": 1.6284883647554703e-05, "loss": 0.0392, "step": 46300 }, { "epoch": 0.7107666334126315, "grad_norm": 0.2931695580482483, "learning_rate": 1.6282799851789288e-05, "loss": 0.0344, "step": 46310 }, { "epoch": 0.7109201135753204, "grad_norm": 0.450716108083725, "learning_rate": 1.628071560519424e-05, "loss": 0.0491, "step": 46320 }, { "epoch": 0.7110735937380094, "grad_norm": 0.36641424894332886, "learning_rate": 1.6278630907919104e-05, "loss": 0.0354, "step": 46330 }, { "epoch": 0.7112270739006983, "grad_norm": 0.4581364095211029, "learning_rate": 1.627654576011348e-05, "loss": 0.0386, "step": 46340 }, { "epoch": 0.7113805540633873, "grad_norm": 0.5405148267745972, "learning_rate": 1.627446016192699e-05, "loss": 0.0412, "step": 46350 }, { "epoch": 0.7115340342260763, "grad_norm": 0.39557868242263794, "learning_rate": 1.6272374113509285e-05, "loss": 0.0381, "step": 46360 }, { "epoch": 0.7116875143887652, "grad_norm": 0.4482981562614441, "learning_rate": 1.6270287615010054e-05, "loss": 0.0398, "step": 46370 }, { "epoch": 0.7118409945514542, "grad_norm": 0.33534878492355347, "learning_rate": 1.6268200666579017e-05, "loss": 0.0455, "step": 46380 }, { "epoch": 0.7119944747141432, "grad_norm": 0.6088858842849731, "learning_rate": 1.626611326836592e-05, "loss": 0.045, "step": 46390 }, { "epoch": 0.7121479548768321, "grad_norm": 0.5318165421485901, "learning_rate": 1.626402542052055e-05, "loss": 0.0341, "step": 46400 }, { "epoch": 0.7123014350395211, "grad_norm": 0.3409813642501831, "learning_rate": 1.6261937123192725e-05, "loss": 0.0313, "step": 46410 }, { "epoch": 0.7124549152022102, "grad_norm": 0.4644860029220581, "learning_rate": 1.6259848376532292e-05, "loss": 0.0338, "step": 46420 }, { "epoch": 0.7126083953648991, "grad_norm": 0.41908738017082214, "learning_rate": 1.625775918068913e-05, "loss": 0.036, "step": 46430 }, { "epoch": 0.7127618755275881, "grad_norm": 0.41866567730903625, "learning_rate": 1.6255669535813154e-05, "loss": 0.0427, "step": 46440 }, { "epoch": 0.7129153556902771, "grad_norm": 0.3197390139102936, "learning_rate": 1.6253579442054307e-05, "loss": 0.0435, "step": 46450 }, { "epoch": 0.713068835852966, "grad_norm": 0.28778836131095886, "learning_rate": 1.6251488899562565e-05, "loss": 0.0323, "step": 46460 }, { "epoch": 0.713222316015655, "grad_norm": 0.6082656383514404, "learning_rate": 1.624939790848794e-05, "loss": 0.0426, "step": 46470 }, { "epoch": 0.7133757961783439, "grad_norm": 0.36540278792381287, "learning_rate": 1.6247306468980475e-05, "loss": 0.0404, "step": 46480 }, { "epoch": 0.7135292763410329, "grad_norm": 0.49277520179748535, "learning_rate": 1.6245214581190237e-05, "loss": 0.0318, "step": 46490 }, { "epoch": 0.7136827565037219, "grad_norm": 0.4259417653083801, "learning_rate": 1.624312224526734e-05, "loss": 0.0388, "step": 46500 }, { "epoch": 0.7138362366664108, "grad_norm": 0.31813520193099976, "learning_rate": 1.6241029461361918e-05, "loss": 0.0508, "step": 46510 }, { "epoch": 0.7139897168290998, "grad_norm": 0.3729797303676605, "learning_rate": 1.623893622962414e-05, "loss": 0.0391, "step": 46520 }, { "epoch": 0.7141431969917889, "grad_norm": 0.4501309394836426, "learning_rate": 1.623684255020421e-05, "loss": 0.0355, "step": 46530 }, { "epoch": 0.7142966771544778, "grad_norm": 0.5886253714561462, "learning_rate": 1.623474842325236e-05, "loss": 0.0475, "step": 46540 }, { "epoch": 0.7144501573171668, "grad_norm": 0.36355888843536377, "learning_rate": 1.623265384891886e-05, "loss": 0.0322, "step": 46550 }, { "epoch": 0.7146036374798558, "grad_norm": 0.3119552731513977, "learning_rate": 1.6230558827354005e-05, "loss": 0.0356, "step": 46560 }, { "epoch": 0.7147571176425447, "grad_norm": 0.37665578722953796, "learning_rate": 1.622846335870813e-05, "loss": 0.0292, "step": 46570 }, { "epoch": 0.7149105978052337, "grad_norm": 0.6252984404563904, "learning_rate": 1.6226367443131596e-05, "loss": 0.0377, "step": 46580 }, { "epoch": 0.7150640779679226, "grad_norm": 0.4335126578807831, "learning_rate": 1.6224271080774797e-05, "loss": 0.0389, "step": 46590 }, { "epoch": 0.7152175581306116, "grad_norm": 0.45092394948005676, "learning_rate": 1.622217427178816e-05, "loss": 0.0459, "step": 46600 }, { "epoch": 0.7153710382933006, "grad_norm": 0.41972774267196655, "learning_rate": 1.6220077016322147e-05, "loss": 0.0515, "step": 46610 }, { "epoch": 0.7155245184559895, "grad_norm": 0.4308277368545532, "learning_rate": 1.6217979314527242e-05, "loss": 0.0423, "step": 46620 }, { "epoch": 0.7156779986186785, "grad_norm": 0.49713873863220215, "learning_rate": 1.6215881166553975e-05, "loss": 0.0305, "step": 46630 }, { "epoch": 0.7158314787813675, "grad_norm": 0.7802619934082031, "learning_rate": 1.6213782572552898e-05, "loss": 0.039, "step": 46640 }, { "epoch": 0.7159849589440564, "grad_norm": 0.21521058678627014, "learning_rate": 1.62116835326746e-05, "loss": 0.0415, "step": 46650 }, { "epoch": 0.7161384391067455, "grad_norm": 0.4865254759788513, "learning_rate": 1.6209584047069696e-05, "loss": 0.0382, "step": 46660 }, { "epoch": 0.7162919192694345, "grad_norm": 0.2548217475414276, "learning_rate": 1.620748411588884e-05, "loss": 0.0407, "step": 46670 }, { "epoch": 0.7164453994321234, "grad_norm": 0.37597528100013733, "learning_rate": 1.6205383739282715e-05, "loss": 0.036, "step": 46680 }, { "epoch": 0.7165988795948124, "grad_norm": 0.4796905219554901, "learning_rate": 1.6203282917402038e-05, "loss": 0.0454, "step": 46690 }, { "epoch": 0.7167523597575013, "grad_norm": 0.36320602893829346, "learning_rate": 1.620118165039755e-05, "loss": 0.0366, "step": 46700 }, { "epoch": 0.7169058399201903, "grad_norm": 0.33655020594596863, "learning_rate": 1.6199079938420035e-05, "loss": 0.0302, "step": 46710 }, { "epoch": 0.7170593200828793, "grad_norm": 0.42279115319252014, "learning_rate": 1.6196977781620304e-05, "loss": 0.0304, "step": 46720 }, { "epoch": 0.7172128002455682, "grad_norm": 0.4156150221824646, "learning_rate": 1.6194875180149195e-05, "loss": 0.045, "step": 46730 }, { "epoch": 0.7173662804082572, "grad_norm": 0.4692081809043884, "learning_rate": 1.619277213415759e-05, "loss": 0.0402, "step": 46740 }, { "epoch": 0.7175197605709462, "grad_norm": 0.5663306713104248, "learning_rate": 1.6190668643796383e-05, "loss": 0.0422, "step": 46750 }, { "epoch": 0.7176732407336351, "grad_norm": 0.5222018361091614, "learning_rate": 1.6188564709216527e-05, "loss": 0.0408, "step": 46760 }, { "epoch": 0.7178267208963242, "grad_norm": 0.5845357179641724, "learning_rate": 1.6186460330568986e-05, "loss": 0.0529, "step": 46770 }, { "epoch": 0.7179802010590132, "grad_norm": 0.40272271633148193, "learning_rate": 1.6184355508004758e-05, "loss": 0.044, "step": 46780 }, { "epoch": 0.7181336812217021, "grad_norm": 0.37230467796325684, "learning_rate": 1.6182250241674884e-05, "loss": 0.0326, "step": 46790 }, { "epoch": 0.7182871613843911, "grad_norm": 0.32212305068969727, "learning_rate": 1.6180144531730423e-05, "loss": 0.0322, "step": 46800 }, { "epoch": 0.7184406415470801, "grad_norm": 0.34444522857666016, "learning_rate": 1.617803837832248e-05, "loss": 0.0473, "step": 46810 }, { "epoch": 0.718594121709769, "grad_norm": 0.38013312220573425, "learning_rate": 1.617593178160218e-05, "loss": 0.0311, "step": 46820 }, { "epoch": 0.718747601872458, "grad_norm": 0.46091389656066895, "learning_rate": 1.6173824741720683e-05, "loss": 0.0347, "step": 46830 }, { "epoch": 0.7189010820351469, "grad_norm": 0.5014074444770813, "learning_rate": 1.6171717258829192e-05, "loss": 0.0473, "step": 46840 }, { "epoch": 0.7190545621978359, "grad_norm": 0.4005396366119385, "learning_rate": 1.6169609333078918e-05, "loss": 0.0299, "step": 46850 }, { "epoch": 0.7192080423605249, "grad_norm": 0.5318815112113953, "learning_rate": 1.6167500964621125e-05, "loss": 0.0416, "step": 46860 }, { "epoch": 0.7193615225232138, "grad_norm": 0.5365328788757324, "learning_rate": 1.61653921536071e-05, "loss": 0.0436, "step": 46870 }, { "epoch": 0.7195150026859028, "grad_norm": 0.37498852610588074, "learning_rate": 1.6163282900188163e-05, "loss": 0.0382, "step": 46880 }, { "epoch": 0.7196684828485919, "grad_norm": 0.4853670299053192, "learning_rate": 1.6161173204515668e-05, "loss": 0.0375, "step": 46890 }, { "epoch": 0.7198219630112808, "grad_norm": 0.425707072019577, "learning_rate": 1.6159063066741e-05, "loss": 0.0443, "step": 46900 }, { "epoch": 0.7199754431739698, "grad_norm": 0.40106889605522156, "learning_rate": 1.6156952487015566e-05, "loss": 0.0365, "step": 46910 }, { "epoch": 0.7201289233366588, "grad_norm": 0.435621440410614, "learning_rate": 1.615484146549082e-05, "loss": 0.0426, "step": 46920 }, { "epoch": 0.7202824034993477, "grad_norm": 0.5048223733901978, "learning_rate": 1.615273000231824e-05, "loss": 0.0356, "step": 46930 }, { "epoch": 0.7204358836620367, "grad_norm": 0.5299933552742004, "learning_rate": 1.6150618097649336e-05, "loss": 0.0323, "step": 46940 }, { "epoch": 0.7205893638247256, "grad_norm": 0.4137808084487915, "learning_rate": 1.614850575163565e-05, "loss": 0.0388, "step": 46950 }, { "epoch": 0.7207428439874146, "grad_norm": 0.27896562218666077, "learning_rate": 1.6146392964428756e-05, "loss": 0.0426, "step": 46960 }, { "epoch": 0.7208963241501036, "grad_norm": 0.5139961242675781, "learning_rate": 1.6144279736180265e-05, "loss": 0.0382, "step": 46970 }, { "epoch": 0.7210498043127925, "grad_norm": 0.43854886293411255, "learning_rate": 1.6142166067041804e-05, "loss": 0.0376, "step": 46980 }, { "epoch": 0.7212032844754815, "grad_norm": 0.38468456268310547, "learning_rate": 1.6140051957165043e-05, "loss": 0.0406, "step": 46990 }, { "epoch": 0.7213567646381706, "grad_norm": 0.3232068121433258, "learning_rate": 1.6137937406701692e-05, "loss": 0.0441, "step": 47000 }, { "epoch": 0.7215102448008595, "grad_norm": 0.4197283685207367, "learning_rate": 1.6135822415803472e-05, "loss": 0.0488, "step": 47010 }, { "epoch": 0.7216637249635485, "grad_norm": 0.301935076713562, "learning_rate": 1.6133706984622154e-05, "loss": 0.0384, "step": 47020 }, { "epoch": 0.7218172051262375, "grad_norm": 0.5482348799705505, "learning_rate": 1.613159111330953e-05, "loss": 0.0396, "step": 47030 }, { "epoch": 0.7219706852889264, "grad_norm": 0.2787516415119171, "learning_rate": 1.612947480201743e-05, "loss": 0.0332, "step": 47040 }, { "epoch": 0.7221241654516154, "grad_norm": 0.459357887506485, "learning_rate": 1.6127358050897708e-05, "loss": 0.0403, "step": 47050 }, { "epoch": 0.7222776456143043, "grad_norm": 0.36302152276039124, "learning_rate": 1.6125240860102254e-05, "loss": 0.0452, "step": 47060 }, { "epoch": 0.7224311257769933, "grad_norm": 0.544544517993927, "learning_rate": 1.6123123229782997e-05, "loss": 0.0431, "step": 47070 }, { "epoch": 0.7225846059396823, "grad_norm": 0.4239489436149597, "learning_rate": 1.612100516009188e-05, "loss": 0.038, "step": 47080 }, { "epoch": 0.7227380861023712, "grad_norm": 0.28121811151504517, "learning_rate": 1.6118886651180896e-05, "loss": 0.0289, "step": 47090 }, { "epoch": 0.7228915662650602, "grad_norm": 0.4245966970920563, "learning_rate": 1.6116767703202054e-05, "loss": 0.0538, "step": 47100 }, { "epoch": 0.7230450464277492, "grad_norm": 0.4322238862514496, "learning_rate": 1.6114648316307406e-05, "loss": 0.0387, "step": 47110 }, { "epoch": 0.7231985265904382, "grad_norm": 0.2893117368221283, "learning_rate": 1.6112528490649027e-05, "loss": 0.0336, "step": 47120 }, { "epoch": 0.7233520067531272, "grad_norm": 0.4313196539878845, "learning_rate": 1.6110408226379034e-05, "loss": 0.0332, "step": 47130 }, { "epoch": 0.7235054869158162, "grad_norm": 0.4976483881473541, "learning_rate": 1.6108287523649565e-05, "loss": 0.0451, "step": 47140 }, { "epoch": 0.7236589670785051, "grad_norm": 0.4108542501926422, "learning_rate": 1.6106166382612794e-05, "loss": 0.0374, "step": 47150 }, { "epoch": 0.7238124472411941, "grad_norm": 0.42170608043670654, "learning_rate": 1.6104044803420926e-05, "loss": 0.0304, "step": 47160 }, { "epoch": 0.7239659274038831, "grad_norm": 0.35435858368873596, "learning_rate": 1.6101922786226193e-05, "loss": 0.045, "step": 47170 }, { "epoch": 0.724119407566572, "grad_norm": 0.5987370014190674, "learning_rate": 1.609980033118087e-05, "loss": 0.0433, "step": 47180 }, { "epoch": 0.724272887729261, "grad_norm": 0.5003364682197571, "learning_rate": 1.6097677438437254e-05, "loss": 0.0345, "step": 47190 }, { "epoch": 0.7244263678919499, "grad_norm": 0.4057934284210205, "learning_rate": 1.609555410814768e-05, "loss": 0.0364, "step": 47200 }, { "epoch": 0.7245798480546389, "grad_norm": 0.33760032057762146, "learning_rate": 1.6093430340464496e-05, "loss": 0.0352, "step": 47210 }, { "epoch": 0.7247333282173279, "grad_norm": 0.5009250640869141, "learning_rate": 1.609130613554011e-05, "loss": 0.0468, "step": 47220 }, { "epoch": 0.7248868083800168, "grad_norm": 0.2935386896133423, "learning_rate": 1.6089181493526938e-05, "loss": 0.038, "step": 47230 }, { "epoch": 0.7250402885427059, "grad_norm": 0.3279637098312378, "learning_rate": 1.6087056414577444e-05, "loss": 0.0354, "step": 47240 }, { "epoch": 0.7251937687053949, "grad_norm": 0.5440467596054077, "learning_rate": 1.6084930898844104e-05, "loss": 0.0419, "step": 47250 }, { "epoch": 0.7253472488680838, "grad_norm": 0.381585031747818, "learning_rate": 1.608280494647945e-05, "loss": 0.0361, "step": 47260 }, { "epoch": 0.7255007290307728, "grad_norm": 0.37649184465408325, "learning_rate": 1.6080678557636026e-05, "loss": 0.0398, "step": 47270 }, { "epoch": 0.7256542091934618, "grad_norm": 0.3792988061904907, "learning_rate": 1.607855173246641e-05, "loss": 0.0308, "step": 47280 }, { "epoch": 0.7258076893561507, "grad_norm": 0.34490928053855896, "learning_rate": 1.6076424471123218e-05, "loss": 0.0369, "step": 47290 }, { "epoch": 0.7259611695188397, "grad_norm": 0.3041943311691284, "learning_rate": 1.6074296773759097e-05, "loss": 0.0387, "step": 47300 }, { "epoch": 0.7261146496815286, "grad_norm": 0.26165151596069336, "learning_rate": 1.607216864052672e-05, "loss": 0.0487, "step": 47310 }, { "epoch": 0.7262681298442176, "grad_norm": 0.42406710982322693, "learning_rate": 1.6070040071578793e-05, "loss": 0.043, "step": 47320 }, { "epoch": 0.7264216100069066, "grad_norm": 0.4240325391292572, "learning_rate": 1.6067911067068054e-05, "loss": 0.04, "step": 47330 }, { "epoch": 0.7265750901695955, "grad_norm": 0.4178861081600189, "learning_rate": 1.6065781627147276e-05, "loss": 0.0366, "step": 47340 }, { "epoch": 0.7267285703322846, "grad_norm": 0.559566855430603, "learning_rate": 1.606365175196925e-05, "loss": 0.0332, "step": 47350 }, { "epoch": 0.7268820504949736, "grad_norm": 0.32359227538108826, "learning_rate": 1.6061521441686817e-05, "loss": 0.0462, "step": 47360 }, { "epoch": 0.7270355306576625, "grad_norm": 0.452961802482605, "learning_rate": 1.6059390696452837e-05, "loss": 0.0458, "step": 47370 }, { "epoch": 0.7271890108203515, "grad_norm": 0.42930954694747925, "learning_rate": 1.6057259516420203e-05, "loss": 0.0338, "step": 47380 }, { "epoch": 0.7273424909830405, "grad_norm": 0.4470440149307251, "learning_rate": 1.6055127901741842e-05, "loss": 0.0342, "step": 47390 }, { "epoch": 0.7274959711457294, "grad_norm": 0.37851482629776, "learning_rate": 1.605299585257071e-05, "loss": 0.0398, "step": 47400 }, { "epoch": 0.7276494513084184, "grad_norm": 0.4247013330459595, "learning_rate": 1.605086336905979e-05, "loss": 0.0457, "step": 47410 }, { "epoch": 0.7278029314711073, "grad_norm": 0.36851465702056885, "learning_rate": 1.6048730451362112e-05, "loss": 0.0452, "step": 47420 }, { "epoch": 0.7279564116337963, "grad_norm": 0.3120374083518982, "learning_rate": 1.6046597099630717e-05, "loss": 0.0362, "step": 47430 }, { "epoch": 0.7281098917964853, "grad_norm": 0.3979223668575287, "learning_rate": 1.604446331401869e-05, "loss": 0.0394, "step": 47440 }, { "epoch": 0.7282633719591742, "grad_norm": 0.2863331437110901, "learning_rate": 1.6042329094679137e-05, "loss": 0.034, "step": 47450 }, { "epoch": 0.7284168521218632, "grad_norm": 0.4228396415710449, "learning_rate": 1.604019444176521e-05, "loss": 0.0414, "step": 47460 }, { "epoch": 0.7285703322845523, "grad_norm": 0.44190356135368347, "learning_rate": 1.6038059355430074e-05, "loss": 0.0465, "step": 47470 }, { "epoch": 0.7287238124472412, "grad_norm": 0.2795962989330292, "learning_rate": 1.6035923835826945e-05, "loss": 0.0365, "step": 47480 }, { "epoch": 0.7288772926099302, "grad_norm": 0.4642277657985687, "learning_rate": 1.6033787883109056e-05, "loss": 0.0376, "step": 47490 }, { "epoch": 0.7290307727726192, "grad_norm": 0.3176283836364746, "learning_rate": 1.6031651497429667e-05, "loss": 0.0479, "step": 47500 }, { "epoch": 0.7291842529353081, "grad_norm": 0.41339775919914246, "learning_rate": 1.602951467894209e-05, "loss": 0.0366, "step": 47510 }, { "epoch": 0.7293377330979971, "grad_norm": 0.45999887585639954, "learning_rate": 1.6027377427799647e-05, "loss": 0.0352, "step": 47520 }, { "epoch": 0.7294912132606861, "grad_norm": 0.4270203113555908, "learning_rate": 1.6025239744155697e-05, "loss": 0.0342, "step": 47530 }, { "epoch": 0.729644693423375, "grad_norm": 0.5543727278709412, "learning_rate": 1.602310162816364e-05, "loss": 0.0401, "step": 47540 }, { "epoch": 0.729798173586064, "grad_norm": 0.29183128476142883, "learning_rate": 1.602096307997689e-05, "loss": 0.0329, "step": 47550 }, { "epoch": 0.7299516537487529, "grad_norm": 0.44629257917404175, "learning_rate": 1.601882409974891e-05, "loss": 0.0328, "step": 47560 }, { "epoch": 0.7301051339114419, "grad_norm": 0.4009837806224823, "learning_rate": 1.6016684687633176e-05, "loss": 0.0389, "step": 47570 }, { "epoch": 0.730258614074131, "grad_norm": 0.49941280484199524, "learning_rate": 1.601454484378321e-05, "loss": 0.0409, "step": 47580 }, { "epoch": 0.7304120942368199, "grad_norm": 0.43374449014663696, "learning_rate": 1.601240456835256e-05, "loss": 0.035, "step": 47590 }, { "epoch": 0.7305655743995089, "grad_norm": 0.48078465461730957, "learning_rate": 1.60102638614948e-05, "loss": 0.0421, "step": 47600 }, { "epoch": 0.7307190545621979, "grad_norm": 0.47812020778656006, "learning_rate": 1.600812272336354e-05, "loss": 0.0345, "step": 47610 }, { "epoch": 0.7308725347248868, "grad_norm": 0.6950194239616394, "learning_rate": 1.6005981154112417e-05, "loss": 0.0412, "step": 47620 }, { "epoch": 0.7310260148875758, "grad_norm": 0.38729149103164673, "learning_rate": 1.6003839153895106e-05, "loss": 0.0323, "step": 47630 }, { "epoch": 0.7311794950502648, "grad_norm": 0.5029705762863159, "learning_rate": 1.6001696722865312e-05, "loss": 0.0521, "step": 47640 }, { "epoch": 0.7313329752129537, "grad_norm": 0.39957132935523987, "learning_rate": 1.5999553861176757e-05, "loss": 0.042, "step": 47650 }, { "epoch": 0.7314864553756427, "grad_norm": 0.4619283676147461, "learning_rate": 1.5997410568983216e-05, "loss": 0.0382, "step": 47660 }, { "epoch": 0.7316399355383316, "grad_norm": 0.2121576964855194, "learning_rate": 1.5995266846438475e-05, "loss": 0.0317, "step": 47670 }, { "epoch": 0.7317934157010206, "grad_norm": 0.2826670706272125, "learning_rate": 1.5993122693696363e-05, "loss": 0.0303, "step": 47680 }, { "epoch": 0.7319468958637096, "grad_norm": 1.057289958000183, "learning_rate": 1.599097811091074e-05, "loss": 0.0366, "step": 47690 }, { "epoch": 0.7321003760263985, "grad_norm": 0.6640204191207886, "learning_rate": 1.5988833098235485e-05, "loss": 0.0472, "step": 47700 }, { "epoch": 0.7322538561890876, "grad_norm": 0.3692752718925476, "learning_rate": 1.598668765582452e-05, "loss": 0.0357, "step": 47710 }, { "epoch": 0.7324073363517766, "grad_norm": 0.5211824774742126, "learning_rate": 1.5984541783831792e-05, "loss": 0.0388, "step": 47720 }, { "epoch": 0.7325608165144655, "grad_norm": 0.3899395763874054, "learning_rate": 1.5982395482411285e-05, "loss": 0.0389, "step": 47730 }, { "epoch": 0.7327142966771545, "grad_norm": 0.29859867691993713, "learning_rate": 1.5980248751717005e-05, "loss": 0.0313, "step": 47740 }, { "epoch": 0.7328677768398435, "grad_norm": 0.36352065205574036, "learning_rate": 1.5978101591902993e-05, "loss": 0.0344, "step": 47750 }, { "epoch": 0.7330212570025324, "grad_norm": 0.4275447726249695, "learning_rate": 1.5975954003123324e-05, "loss": 0.0362, "step": 47760 }, { "epoch": 0.7331747371652214, "grad_norm": 0.570555567741394, "learning_rate": 1.59738059855321e-05, "loss": 0.0389, "step": 47770 }, { "epoch": 0.7333282173279103, "grad_norm": 0.337456077337265, "learning_rate": 1.5971657539283452e-05, "loss": 0.0376, "step": 47780 }, { "epoch": 0.7334816974905993, "grad_norm": 0.38698869943618774, "learning_rate": 1.596950866453155e-05, "loss": 0.0302, "step": 47790 }, { "epoch": 0.7336351776532883, "grad_norm": 0.5005072951316833, "learning_rate": 1.5967359361430578e-05, "loss": 0.038, "step": 47800 }, { "epoch": 0.7337886578159772, "grad_norm": 0.5583388209342957, "learning_rate": 1.5965209630134774e-05, "loss": 0.0379, "step": 47810 }, { "epoch": 0.7339421379786663, "grad_norm": 0.3798491358757019, "learning_rate": 1.596305947079839e-05, "loss": 0.0327, "step": 47820 }, { "epoch": 0.7340956181413553, "grad_norm": 0.34880438446998596, "learning_rate": 1.5960908883575704e-05, "loss": 0.0336, "step": 47830 }, { "epoch": 0.7342490983040442, "grad_norm": 0.31876567006111145, "learning_rate": 1.5958757868621052e-05, "loss": 0.0365, "step": 47840 }, { "epoch": 0.7344025784667332, "grad_norm": 0.40286779403686523, "learning_rate": 1.595660642608877e-05, "loss": 0.0366, "step": 47850 }, { "epoch": 0.7345560586294222, "grad_norm": 0.7569051384925842, "learning_rate": 1.595445455613324e-05, "loss": 0.0438, "step": 47860 }, { "epoch": 0.7347095387921111, "grad_norm": 0.5403481125831604, "learning_rate": 1.595230225890887e-05, "loss": 0.043, "step": 47870 }, { "epoch": 0.7348630189548001, "grad_norm": 0.42236796021461487, "learning_rate": 1.5950149534570102e-05, "loss": 0.0415, "step": 47880 }, { "epoch": 0.7350164991174891, "grad_norm": 0.4742829203605652, "learning_rate": 1.594799638327141e-05, "loss": 0.0354, "step": 47890 }, { "epoch": 0.735169979280178, "grad_norm": 0.25506308674812317, "learning_rate": 1.5945842805167298e-05, "loss": 0.0344, "step": 47900 }, { "epoch": 0.735323459442867, "grad_norm": 0.5621371269226074, "learning_rate": 1.594368880041229e-05, "loss": 0.0422, "step": 47910 }, { "epoch": 0.7354769396055559, "grad_norm": 0.5868197083473206, "learning_rate": 1.5941534369160953e-05, "loss": 0.039, "step": 47920 }, { "epoch": 0.735630419768245, "grad_norm": 0.3145886957645416, "learning_rate": 1.593937951156788e-05, "loss": 0.0395, "step": 47930 }, { "epoch": 0.735783899930934, "grad_norm": 0.4230249524116516, "learning_rate": 1.59372242277877e-05, "loss": 0.0345, "step": 47940 }, { "epoch": 0.7359373800936229, "grad_norm": 0.39212483167648315, "learning_rate": 1.5935068517975066e-05, "loss": 0.0368, "step": 47950 }, { "epoch": 0.7360908602563119, "grad_norm": 0.41854098439216614, "learning_rate": 1.593291238228466e-05, "loss": 0.0334, "step": 47960 }, { "epoch": 0.7362443404190009, "grad_norm": 0.42934051156044006, "learning_rate": 1.5930755820871197e-05, "loss": 0.0462, "step": 47970 }, { "epoch": 0.7363978205816898, "grad_norm": 0.3972248136997223, "learning_rate": 1.592859883388943e-05, "loss": 0.043, "step": 47980 }, { "epoch": 0.7365513007443788, "grad_norm": 0.4585546851158142, "learning_rate": 1.592644142149413e-05, "loss": 0.0419, "step": 47990 }, { "epoch": 0.7367047809070678, "grad_norm": 0.3844599723815918, "learning_rate": 1.5924283583840108e-05, "loss": 0.0377, "step": 48000 }, { "epoch": 0.7368582610697567, "grad_norm": 0.2856101989746094, "learning_rate": 1.5922125321082204e-05, "loss": 0.0401, "step": 48010 }, { "epoch": 0.7370117412324457, "grad_norm": 0.24958397448062897, "learning_rate": 1.591996663337528e-05, "loss": 0.0331, "step": 48020 }, { "epoch": 0.7371652213951346, "grad_norm": 0.32175591588020325, "learning_rate": 1.591780752087424e-05, "loss": 0.041, "step": 48030 }, { "epoch": 0.7373187015578236, "grad_norm": 0.4827011227607727, "learning_rate": 1.5915647983734015e-05, "loss": 0.0516, "step": 48040 }, { "epoch": 0.7374721817205127, "grad_norm": 0.4631567895412445, "learning_rate": 1.5913488022109562e-05, "loss": 0.03, "step": 48050 }, { "epoch": 0.7376256618832016, "grad_norm": 0.4717487692832947, "learning_rate": 1.5911327636155874e-05, "loss": 0.044, "step": 48060 }, { "epoch": 0.7377791420458906, "grad_norm": 0.3512330949306488, "learning_rate": 1.5909166826027967e-05, "loss": 0.0356, "step": 48070 }, { "epoch": 0.7379326222085796, "grad_norm": 0.4493286609649658, "learning_rate": 1.59070055918809e-05, "loss": 0.0296, "step": 48080 }, { "epoch": 0.7380861023712685, "grad_norm": 0.27787846326828003, "learning_rate": 1.590484393386975e-05, "loss": 0.0349, "step": 48090 }, { "epoch": 0.7382395825339575, "grad_norm": 0.3519729673862457, "learning_rate": 1.590268185214963e-05, "loss": 0.0342, "step": 48100 }, { "epoch": 0.7383930626966465, "grad_norm": 0.5291364789009094, "learning_rate": 1.5900519346875682e-05, "loss": 0.0343, "step": 48110 }, { "epoch": 0.7385465428593354, "grad_norm": 0.45346540212631226, "learning_rate": 1.589835641820308e-05, "loss": 0.0385, "step": 48120 }, { "epoch": 0.7387000230220244, "grad_norm": 0.3063989281654358, "learning_rate": 1.589619306628703e-05, "loss": 0.0294, "step": 48130 }, { "epoch": 0.7388535031847133, "grad_norm": 0.21209755539894104, "learning_rate": 1.589402929128276e-05, "loss": 0.0308, "step": 48140 }, { "epoch": 0.7390069833474023, "grad_norm": 0.42367643117904663, "learning_rate": 1.589186509334554e-05, "loss": 0.0419, "step": 48150 }, { "epoch": 0.7391604635100913, "grad_norm": 0.43445461988449097, "learning_rate": 1.5889700472630664e-05, "loss": 0.0359, "step": 48160 }, { "epoch": 0.7393139436727802, "grad_norm": 0.3702901601791382, "learning_rate": 1.5887535429293447e-05, "loss": 0.0282, "step": 48170 }, { "epoch": 0.7394674238354693, "grad_norm": 0.46462345123291016, "learning_rate": 1.5885369963489258e-05, "loss": 0.0396, "step": 48180 }, { "epoch": 0.7396209039981583, "grad_norm": 0.3476668894290924, "learning_rate": 1.5883204075373476e-05, "loss": 0.0374, "step": 48190 }, { "epoch": 0.7397743841608472, "grad_norm": 0.3724519610404968, "learning_rate": 1.5881037765101517e-05, "loss": 0.0479, "step": 48200 }, { "epoch": 0.7399278643235362, "grad_norm": 0.3166947066783905, "learning_rate": 1.587887103282883e-05, "loss": 0.0409, "step": 48210 }, { "epoch": 0.7400813444862252, "grad_norm": 0.3677802085876465, "learning_rate": 1.5876703878710887e-05, "loss": 0.0389, "step": 48220 }, { "epoch": 0.7402348246489141, "grad_norm": 0.514579176902771, "learning_rate": 1.5874536302903197e-05, "loss": 0.0444, "step": 48230 }, { "epoch": 0.7403883048116031, "grad_norm": 0.34649819135665894, "learning_rate": 1.5872368305561298e-05, "loss": 0.0417, "step": 48240 }, { "epoch": 0.7405417849742921, "grad_norm": 0.6134201288223267, "learning_rate": 1.5870199886840755e-05, "loss": 0.0437, "step": 48250 }, { "epoch": 0.740695265136981, "grad_norm": 0.38954687118530273, "learning_rate": 1.5868031046897164e-05, "loss": 0.0368, "step": 48260 }, { "epoch": 0.74084874529967, "grad_norm": 0.5490109324455261, "learning_rate": 1.5865861785886157e-05, "loss": 0.0443, "step": 48270 }, { "epoch": 0.7410022254623589, "grad_norm": 0.3178309202194214, "learning_rate": 1.5863692103963387e-05, "loss": 0.0446, "step": 48280 }, { "epoch": 0.741155705625048, "grad_norm": 0.4191255271434784, "learning_rate": 1.586152200128455e-05, "loss": 0.0464, "step": 48290 }, { "epoch": 0.741309185787737, "grad_norm": 0.4176768362522125, "learning_rate": 1.5859351478005356e-05, "loss": 0.0406, "step": 48300 }, { "epoch": 0.7414626659504259, "grad_norm": 0.35075679421424866, "learning_rate": 1.585718053428156e-05, "loss": 0.046, "step": 48310 }, { "epoch": 0.7416161461131149, "grad_norm": 0.6654664874076843, "learning_rate": 1.5855009170268928e-05, "loss": 0.0302, "step": 48320 }, { "epoch": 0.7417696262758039, "grad_norm": 0.2828686535358429, "learning_rate": 1.5852837386123286e-05, "loss": 0.0318, "step": 48330 }, { "epoch": 0.7419231064384928, "grad_norm": 0.3586984872817993, "learning_rate": 1.5850665182000463e-05, "loss": 0.0409, "step": 48340 }, { "epoch": 0.7420765866011818, "grad_norm": 0.26554080843925476, "learning_rate": 1.584849255805633e-05, "loss": 0.0429, "step": 48350 }, { "epoch": 0.7422300667638708, "grad_norm": 0.5194066166877747, "learning_rate": 1.5846319514446785e-05, "loss": 0.0456, "step": 48360 }, { "epoch": 0.7423835469265597, "grad_norm": 0.553964376449585, "learning_rate": 1.584414605132776e-05, "loss": 0.033, "step": 48370 }, { "epoch": 0.7425370270892487, "grad_norm": 0.5334008932113647, "learning_rate": 1.584197216885521e-05, "loss": 0.0427, "step": 48380 }, { "epoch": 0.7426905072519376, "grad_norm": 0.3247799873352051, "learning_rate": 1.583979786718513e-05, "loss": 0.0357, "step": 48390 }, { "epoch": 0.7428439874146266, "grad_norm": 0.5558258891105652, "learning_rate": 1.5837623146473533e-05, "loss": 0.0393, "step": 48400 }, { "epoch": 0.7429974675773157, "grad_norm": 0.3900856375694275, "learning_rate": 1.5835448006876477e-05, "loss": 0.0409, "step": 48410 }, { "epoch": 0.7431509477400046, "grad_norm": 0.3988463878631592, "learning_rate": 1.583327244855003e-05, "loss": 0.0369, "step": 48420 }, { "epoch": 0.7433044279026936, "grad_norm": 0.34430286288261414, "learning_rate": 1.5831096471650314e-05, "loss": 0.0428, "step": 48430 }, { "epoch": 0.7434579080653826, "grad_norm": 0.36996638774871826, "learning_rate": 1.5828920076333462e-05, "loss": 0.0448, "step": 48440 }, { "epoch": 0.7436113882280715, "grad_norm": 0.44108474254608154, "learning_rate": 1.5826743262755647e-05, "loss": 0.0448, "step": 48450 }, { "epoch": 0.7437648683907605, "grad_norm": 0.3182702362537384, "learning_rate": 1.5824566031073066e-05, "loss": 0.0325, "step": 48460 }, { "epoch": 0.7439183485534495, "grad_norm": 0.34313347935676575, "learning_rate": 1.582238838144195e-05, "loss": 0.032, "step": 48470 }, { "epoch": 0.7440718287161384, "grad_norm": 0.45765799283981323, "learning_rate": 1.5820210314018558e-05, "loss": 0.0372, "step": 48480 }, { "epoch": 0.7442253088788274, "grad_norm": 0.5561820864677429, "learning_rate": 1.5818031828959183e-05, "loss": 0.0401, "step": 48490 }, { "epoch": 0.7443787890415163, "grad_norm": 0.5946050882339478, "learning_rate": 1.581585292642014e-05, "loss": 0.0376, "step": 48500 }, { "epoch": 0.7445322692042053, "grad_norm": 0.39505329728126526, "learning_rate": 1.581367360655778e-05, "loss": 0.0428, "step": 48510 }, { "epoch": 0.7446857493668944, "grad_norm": 0.4496069550514221, "learning_rate": 1.5811493869528486e-05, "loss": 0.0389, "step": 48520 }, { "epoch": 0.7448392295295833, "grad_norm": 0.3284708559513092, "learning_rate": 1.580931371548866e-05, "loss": 0.0476, "step": 48530 }, { "epoch": 0.7449927096922723, "grad_norm": 0.3506487011909485, "learning_rate": 1.580713314459475e-05, "loss": 0.0359, "step": 48540 }, { "epoch": 0.7451461898549613, "grad_norm": 0.35053449869155884, "learning_rate": 1.5804952157003222e-05, "loss": 0.038, "step": 48550 }, { "epoch": 0.7452996700176502, "grad_norm": 0.5732356309890747, "learning_rate": 1.5802770752870577e-05, "loss": 0.0464, "step": 48560 }, { "epoch": 0.7454531501803392, "grad_norm": 0.34283119440078735, "learning_rate": 1.580058893235334e-05, "loss": 0.0384, "step": 48570 }, { "epoch": 0.7456066303430282, "grad_norm": 0.3915785849094391, "learning_rate": 1.5798406695608072e-05, "loss": 0.0416, "step": 48580 }, { "epoch": 0.7457601105057171, "grad_norm": 0.5174780488014221, "learning_rate": 1.5796224042791364e-05, "loss": 0.0399, "step": 48590 }, { "epoch": 0.7459135906684061, "grad_norm": 0.36701977252960205, "learning_rate": 1.5794040974059833e-05, "loss": 0.0389, "step": 48600 }, { "epoch": 0.7460670708310951, "grad_norm": 0.5488424897193909, "learning_rate": 1.5791857489570132e-05, "loss": 0.0373, "step": 48610 }, { "epoch": 0.746220550993784, "grad_norm": 0.4204743504524231, "learning_rate": 1.5789673589478927e-05, "loss": 0.0365, "step": 48620 }, { "epoch": 0.746374031156473, "grad_norm": 0.3655015826225281, "learning_rate": 1.578748927394294e-05, "loss": 0.0424, "step": 48630 }, { "epoch": 0.746527511319162, "grad_norm": 0.5021856427192688, "learning_rate": 1.5785304543118905e-05, "loss": 0.0394, "step": 48640 }, { "epoch": 0.746680991481851, "grad_norm": 0.44391515851020813, "learning_rate": 1.5783119397163588e-05, "loss": 0.0391, "step": 48650 }, { "epoch": 0.74683447164454, "grad_norm": 0.39133092761039734, "learning_rate": 1.5780933836233786e-05, "loss": 0.0369, "step": 48660 }, { "epoch": 0.7469879518072289, "grad_norm": 0.6701064705848694, "learning_rate": 1.577874786048633e-05, "loss": 0.0293, "step": 48670 }, { "epoch": 0.7471414319699179, "grad_norm": 0.31069597601890564, "learning_rate": 1.5776561470078072e-05, "loss": 0.0334, "step": 48680 }, { "epoch": 0.7472949121326069, "grad_norm": 0.3631712794303894, "learning_rate": 1.5774374665165905e-05, "loss": 0.0347, "step": 48690 }, { "epoch": 0.7474483922952958, "grad_norm": 0.5265395641326904, "learning_rate": 1.5772187445906738e-05, "loss": 0.0419, "step": 48700 }, { "epoch": 0.7476018724579848, "grad_norm": 0.33334800601005554, "learning_rate": 1.576999981245753e-05, "loss": 0.0426, "step": 48710 }, { "epoch": 0.7477553526206738, "grad_norm": 0.40968987345695496, "learning_rate": 1.5767811764975243e-05, "loss": 0.0322, "step": 48720 }, { "epoch": 0.7479088327833627, "grad_norm": 0.3720463514328003, "learning_rate": 1.5765623303616893e-05, "loss": 0.038, "step": 48730 }, { "epoch": 0.7480623129460517, "grad_norm": 0.4459514319896698, "learning_rate": 1.576343442853951e-05, "loss": 0.0401, "step": 48740 }, { "epoch": 0.7482157931087406, "grad_norm": 0.46468695998191833, "learning_rate": 1.5761245139900163e-05, "loss": 0.036, "step": 48750 }, { "epoch": 0.7483692732714297, "grad_norm": 0.5489006042480469, "learning_rate": 1.5759055437855945e-05, "loss": 0.0417, "step": 48760 }, { "epoch": 0.7485227534341187, "grad_norm": 0.3843017518520355, "learning_rate": 1.5756865322563983e-05, "loss": 0.0366, "step": 48770 }, { "epoch": 0.7486762335968076, "grad_norm": 0.3910592794418335, "learning_rate": 1.5754674794181424e-05, "loss": 0.0424, "step": 48780 }, { "epoch": 0.7488297137594966, "grad_norm": 0.3911300301551819, "learning_rate": 1.5752483852865463e-05, "loss": 0.0401, "step": 48790 }, { "epoch": 0.7489831939221856, "grad_norm": 0.3567267954349518, "learning_rate": 1.5750292498773303e-05, "loss": 0.0354, "step": 48800 }, { "epoch": 0.7491366740848745, "grad_norm": 0.3939967751502991, "learning_rate": 1.5748100732062193e-05, "loss": 0.0431, "step": 48810 }, { "epoch": 0.7492901542475635, "grad_norm": 0.4535684883594513, "learning_rate": 1.574590855288941e-05, "loss": 0.0428, "step": 48820 }, { "epoch": 0.7494436344102525, "grad_norm": 0.3856799006462097, "learning_rate": 1.5743715961412242e-05, "loss": 0.0272, "step": 48830 }, { "epoch": 0.7495971145729414, "grad_norm": 0.41054198145866394, "learning_rate": 1.5741522957788036e-05, "loss": 0.0297, "step": 48840 }, { "epoch": 0.7497505947356304, "grad_norm": 0.38392144441604614, "learning_rate": 1.5739329542174147e-05, "loss": 0.0334, "step": 48850 }, { "epoch": 0.7499040748983193, "grad_norm": 0.36848288774490356, "learning_rate": 1.5737135714727964e-05, "loss": 0.0425, "step": 48860 }, { "epoch": 0.7500575550610084, "grad_norm": 0.5960812568664551, "learning_rate": 1.5734941475606912e-05, "loss": 0.0393, "step": 48870 }, { "epoch": 0.7502110352236974, "grad_norm": 0.5428705215454102, "learning_rate": 1.5732746824968442e-05, "loss": 0.046, "step": 48880 }, { "epoch": 0.7503645153863863, "grad_norm": 0.3539987802505493, "learning_rate": 1.5730551762970027e-05, "loss": 0.0396, "step": 48890 }, { "epoch": 0.7505179955490753, "grad_norm": 0.5697197914123535, "learning_rate": 1.572835628976918e-05, "loss": 0.0379, "step": 48900 }, { "epoch": 0.7506714757117643, "grad_norm": 0.5967084765434265, "learning_rate": 1.5726160405523447e-05, "loss": 0.0379, "step": 48910 }, { "epoch": 0.7508249558744532, "grad_norm": 0.27991074323654175, "learning_rate": 1.572396411039038e-05, "loss": 0.041, "step": 48920 }, { "epoch": 0.7509784360371422, "grad_norm": 0.38531818985939026, "learning_rate": 1.5721767404527593e-05, "loss": 0.0356, "step": 48930 }, { "epoch": 0.7511319161998312, "grad_norm": 0.3197552263736725, "learning_rate": 1.5719570288092706e-05, "loss": 0.0337, "step": 48940 }, { "epoch": 0.7512853963625201, "grad_norm": 0.4033254086971283, "learning_rate": 1.571737276124337e-05, "loss": 0.036, "step": 48950 }, { "epoch": 0.7514388765252091, "grad_norm": 0.5082380175590515, "learning_rate": 1.571517482413728e-05, "loss": 0.0298, "step": 48960 }, { "epoch": 0.7515923566878981, "grad_norm": 0.3668585419654846, "learning_rate": 1.571297647693215e-05, "loss": 0.0409, "step": 48970 }, { "epoch": 0.751745836850587, "grad_norm": 0.5247107148170471, "learning_rate": 1.5710777719785724e-05, "loss": 0.0461, "step": 48980 }, { "epoch": 0.7518993170132761, "grad_norm": 0.4225078225135803, "learning_rate": 1.570857855285578e-05, "loss": 0.0462, "step": 48990 }, { "epoch": 0.752052797175965, "grad_norm": 0.5295465588569641, "learning_rate": 1.5706378976300108e-05, "loss": 0.0418, "step": 49000 }, { "epoch": 0.752206277338654, "grad_norm": 0.2490677833557129, "learning_rate": 1.5704178990276557e-05, "loss": 0.0392, "step": 49010 }, { "epoch": 0.752359757501343, "grad_norm": 0.7945915460586548, "learning_rate": 1.5701978594942982e-05, "loss": 0.0398, "step": 49020 }, { "epoch": 0.7525132376640319, "grad_norm": 0.4472762942314148, "learning_rate": 1.5699777790457278e-05, "loss": 0.0417, "step": 49030 }, { "epoch": 0.7526667178267209, "grad_norm": 0.4806850552558899, "learning_rate": 1.5697576576977364e-05, "loss": 0.0442, "step": 49040 }, { "epoch": 0.7528201979894099, "grad_norm": 0.28622230887413025, "learning_rate": 1.569537495466119e-05, "loss": 0.0327, "step": 49050 }, { "epoch": 0.7529736781520988, "grad_norm": 0.5521493554115295, "learning_rate": 1.569317292366674e-05, "loss": 0.0388, "step": 49060 }, { "epoch": 0.7531271583147878, "grad_norm": 0.22853831946849823, "learning_rate": 1.569097048415202e-05, "loss": 0.043, "step": 49070 }, { "epoch": 0.7532806384774768, "grad_norm": 0.30517059564590454, "learning_rate": 1.5688767636275068e-05, "loss": 0.028, "step": 49080 }, { "epoch": 0.7534341186401657, "grad_norm": 0.2967287302017212, "learning_rate": 1.5686564380193955e-05, "loss": 0.0378, "step": 49090 }, { "epoch": 0.7535875988028548, "grad_norm": 0.6837950944900513, "learning_rate": 1.5684360716066772e-05, "loss": 0.0378, "step": 49100 }, { "epoch": 0.7537410789655437, "grad_norm": 0.4685656428337097, "learning_rate": 1.5682156644051652e-05, "loss": 0.0303, "step": 49110 }, { "epoch": 0.7538945591282327, "grad_norm": 0.3620809316635132, "learning_rate": 1.567995216430675e-05, "loss": 0.042, "step": 49120 }, { "epoch": 0.7540480392909217, "grad_norm": 0.297309935092926, "learning_rate": 1.5677747276990252e-05, "loss": 0.0398, "step": 49130 }, { "epoch": 0.7542015194536106, "grad_norm": 0.3524666130542755, "learning_rate": 1.5675541982260366e-05, "loss": 0.0345, "step": 49140 }, { "epoch": 0.7543549996162996, "grad_norm": 0.45051246881484985, "learning_rate": 1.5673336280275338e-05, "loss": 0.0404, "step": 49150 }, { "epoch": 0.7545084797789886, "grad_norm": 0.3813839554786682, "learning_rate": 1.5671130171193448e-05, "loss": 0.0325, "step": 49160 }, { "epoch": 0.7546619599416775, "grad_norm": 0.259768009185791, "learning_rate": 1.566892365517299e-05, "loss": 0.0327, "step": 49170 }, { "epoch": 0.7548154401043665, "grad_norm": 0.3472288250923157, "learning_rate": 1.5666716732372297e-05, "loss": 0.0326, "step": 49180 }, { "epoch": 0.7549689202670555, "grad_norm": 0.41325289011001587, "learning_rate": 1.5664509402949724e-05, "loss": 0.0362, "step": 49190 }, { "epoch": 0.7551224004297444, "grad_norm": 0.47487252950668335, "learning_rate": 1.5662301667063674e-05, "loss": 0.0458, "step": 49200 }, { "epoch": 0.7552758805924334, "grad_norm": 0.505574107170105, "learning_rate": 1.5660093524872555e-05, "loss": 0.0486, "step": 49210 }, { "epoch": 0.7554293607551223, "grad_norm": 0.5131657719612122, "learning_rate": 1.5657884976534816e-05, "loss": 0.0338, "step": 49220 }, { "epoch": 0.7555828409178114, "grad_norm": 0.43912285566329956, "learning_rate": 1.5655676022208937e-05, "loss": 0.0518, "step": 49230 }, { "epoch": 0.7557363210805004, "grad_norm": 0.355377733707428, "learning_rate": 1.5653466662053422e-05, "loss": 0.0307, "step": 49240 }, { "epoch": 0.7558898012431893, "grad_norm": 0.39170968532562256, "learning_rate": 1.5651256896226805e-05, "loss": 0.039, "step": 49250 }, { "epoch": 0.7560432814058783, "grad_norm": 0.4600805640220642, "learning_rate": 1.5649046724887657e-05, "loss": 0.0341, "step": 49260 }, { "epoch": 0.7561967615685673, "grad_norm": 0.32253915071487427, "learning_rate": 1.5646836148194563e-05, "loss": 0.0383, "step": 49270 }, { "epoch": 0.7563502417312562, "grad_norm": 0.5439344048500061, "learning_rate": 1.5644625166306152e-05, "loss": 0.0378, "step": 49280 }, { "epoch": 0.7565037218939452, "grad_norm": 0.6156522035598755, "learning_rate": 1.564241377938107e-05, "loss": 0.0405, "step": 49290 }, { "epoch": 0.7566572020566342, "grad_norm": 0.3334760367870331, "learning_rate": 1.5640201987578e-05, "loss": 0.0312, "step": 49300 }, { "epoch": 0.7568106822193231, "grad_norm": 0.6005296111106873, "learning_rate": 1.5637989791055655e-05, "loss": 0.0348, "step": 49310 }, { "epoch": 0.7569641623820121, "grad_norm": 0.6854735612869263, "learning_rate": 1.5635777189972764e-05, "loss": 0.0384, "step": 49320 }, { "epoch": 0.7571176425447012, "grad_norm": 0.5089435577392578, "learning_rate": 1.563356418448811e-05, "loss": 0.0474, "step": 49330 }, { "epoch": 0.75727112270739, "grad_norm": 0.440680593252182, "learning_rate": 1.5631350774760475e-05, "loss": 0.0437, "step": 49340 }, { "epoch": 0.7574246028700791, "grad_norm": 0.3204026520252228, "learning_rate": 1.5629136960948695e-05, "loss": 0.0371, "step": 49350 }, { "epoch": 0.757578083032768, "grad_norm": 0.37739983201026917, "learning_rate": 1.5626922743211622e-05, "loss": 0.0343, "step": 49360 }, { "epoch": 0.757731563195457, "grad_norm": 0.5094757676124573, "learning_rate": 1.5624708121708133e-05, "loss": 0.0359, "step": 49370 }, { "epoch": 0.757885043358146, "grad_norm": 0.5893456935882568, "learning_rate": 1.5622493096597154e-05, "loss": 0.0383, "step": 49380 }, { "epoch": 0.7580385235208349, "grad_norm": 0.21446682512760162, "learning_rate": 1.562027766803761e-05, "loss": 0.0347, "step": 49390 }, { "epoch": 0.7581920036835239, "grad_norm": 0.3421814441680908, "learning_rate": 1.5618061836188488e-05, "loss": 0.0371, "step": 49400 }, { "epoch": 0.7583454838462129, "grad_norm": 0.2885233163833618, "learning_rate": 1.561584560120878e-05, "loss": 0.0329, "step": 49410 }, { "epoch": 0.7584989640089018, "grad_norm": 0.255343496799469, "learning_rate": 1.561362896325751e-05, "loss": 0.0321, "step": 49420 }, { "epoch": 0.7586524441715908, "grad_norm": 0.39292973279953003, "learning_rate": 1.5611411922493745e-05, "loss": 0.0352, "step": 49430 }, { "epoch": 0.7588059243342798, "grad_norm": 0.39806318283081055, "learning_rate": 1.5609194479076566e-05, "loss": 0.051, "step": 49440 }, { "epoch": 0.7589594044969687, "grad_norm": 0.4562711715698242, "learning_rate": 1.5606976633165086e-05, "loss": 0.0356, "step": 49450 }, { "epoch": 0.7591128846596578, "grad_norm": 0.2651684284210205, "learning_rate": 1.5604758384918458e-05, "loss": 0.0363, "step": 49460 }, { "epoch": 0.7592663648223467, "grad_norm": 0.5529171228408813, "learning_rate": 1.5602539734495847e-05, "loss": 0.0348, "step": 49470 }, { "epoch": 0.7594198449850357, "grad_norm": 0.5769456624984741, "learning_rate": 1.5600320682056454e-05, "loss": 0.0489, "step": 49480 }, { "epoch": 0.7595733251477247, "grad_norm": 0.3520210385322571, "learning_rate": 1.5598101227759514e-05, "loss": 0.0341, "step": 49490 }, { "epoch": 0.7597268053104136, "grad_norm": 0.49998846650123596, "learning_rate": 1.5595881371764284e-05, "loss": 0.0346, "step": 49500 }, { "epoch": 0.7598802854731026, "grad_norm": 0.36515405774116516, "learning_rate": 1.5593661114230056e-05, "loss": 0.0387, "step": 49510 }, { "epoch": 0.7600337656357916, "grad_norm": 0.22915254533290863, "learning_rate": 1.5591440455316144e-05, "loss": 0.0346, "step": 49520 }, { "epoch": 0.7601872457984805, "grad_norm": 0.3700423836708069, "learning_rate": 1.5589219395181895e-05, "loss": 0.0402, "step": 49530 }, { "epoch": 0.7603407259611695, "grad_norm": 0.3663983643054962, "learning_rate": 1.5586997933986683e-05, "loss": 0.044, "step": 49540 }, { "epoch": 0.7604942061238585, "grad_norm": 0.5649487376213074, "learning_rate": 1.558477607188991e-05, "loss": 0.0386, "step": 49550 }, { "epoch": 0.7606476862865474, "grad_norm": 0.3720106780529022, "learning_rate": 1.5582553809051014e-05, "loss": 0.0319, "step": 49560 }, { "epoch": 0.7608011664492365, "grad_norm": 0.3744312524795532, "learning_rate": 1.5580331145629452e-05, "loss": 0.0398, "step": 49570 }, { "epoch": 0.7609546466119254, "grad_norm": 0.6665613055229187, "learning_rate": 1.557810808178471e-05, "loss": 0.0406, "step": 49580 }, { "epoch": 0.7611081267746144, "grad_norm": 0.554280698299408, "learning_rate": 1.5575884617676314e-05, "loss": 0.0377, "step": 49590 }, { "epoch": 0.7612616069373034, "grad_norm": 0.5019139647483826, "learning_rate": 1.557366075346381e-05, "loss": 0.0394, "step": 49600 }, { "epoch": 0.7614150870999923, "grad_norm": 0.3679843842983246, "learning_rate": 1.557143648930677e-05, "loss": 0.0316, "step": 49610 }, { "epoch": 0.7615685672626813, "grad_norm": 0.49756696820259094, "learning_rate": 1.5569211825364798e-05, "loss": 0.0344, "step": 49620 }, { "epoch": 0.7617220474253703, "grad_norm": 0.356887549161911, "learning_rate": 1.5566986761797533e-05, "loss": 0.0334, "step": 49630 }, { "epoch": 0.7618755275880592, "grad_norm": 0.2890283763408661, "learning_rate": 1.556476129876463e-05, "loss": 0.0412, "step": 49640 }, { "epoch": 0.7620290077507482, "grad_norm": 0.40808141231536865, "learning_rate": 1.556253543642579e-05, "loss": 0.0337, "step": 49650 }, { "epoch": 0.7621824879134372, "grad_norm": 0.31512826681137085, "learning_rate": 1.5560309174940722e-05, "loss": 0.0406, "step": 49660 }, { "epoch": 0.7623359680761261, "grad_norm": 0.3805740177631378, "learning_rate": 1.555808251446918e-05, "loss": 0.0346, "step": 49670 }, { "epoch": 0.7624894482388151, "grad_norm": 0.6812325716018677, "learning_rate": 1.5555855455170937e-05, "loss": 0.0428, "step": 49680 }, { "epoch": 0.7626429284015042, "grad_norm": 0.49974682927131653, "learning_rate": 1.55536279972058e-05, "loss": 0.0329, "step": 49690 }, { "epoch": 0.7627964085641931, "grad_norm": 0.5435808300971985, "learning_rate": 1.5551400140733604e-05, "loss": 0.0444, "step": 49700 }, { "epoch": 0.7629498887268821, "grad_norm": 0.38916265964508057, "learning_rate": 1.554917188591421e-05, "loss": 0.0391, "step": 49710 }, { "epoch": 0.763103368889571, "grad_norm": 0.46289485692977905, "learning_rate": 1.5546943232907507e-05, "loss": 0.0378, "step": 49720 }, { "epoch": 0.76325684905226, "grad_norm": 0.34662970900535583, "learning_rate": 1.5544714181873422e-05, "loss": 0.0281, "step": 49730 }, { "epoch": 0.763410329214949, "grad_norm": 0.3961634337902069, "learning_rate": 1.5542484732971896e-05, "loss": 0.0276, "step": 49740 }, { "epoch": 0.7635638093776379, "grad_norm": 0.28704872727394104, "learning_rate": 1.5540254886362905e-05, "loss": 0.0403, "step": 49750 }, { "epoch": 0.7637172895403269, "grad_norm": 0.44686567783355713, "learning_rate": 1.5538024642206457e-05, "loss": 0.0397, "step": 49760 }, { "epoch": 0.7638707697030159, "grad_norm": 0.43453076481819153, "learning_rate": 1.5535794000662588e-05, "loss": 0.0367, "step": 49770 }, { "epoch": 0.7640242498657048, "grad_norm": 0.30760660767555237, "learning_rate": 1.5533562961891356e-05, "loss": 0.0272, "step": 49780 }, { "epoch": 0.7641777300283938, "grad_norm": 0.4607076942920685, "learning_rate": 1.5531331526052854e-05, "loss": 0.0388, "step": 49790 }, { "epoch": 0.7643312101910829, "grad_norm": 0.378974974155426, "learning_rate": 1.5529099693307205e-05, "loss": 0.0348, "step": 49800 }, { "epoch": 0.7644846903537718, "grad_norm": 0.45248183608055115, "learning_rate": 1.552686746381455e-05, "loss": 0.0442, "step": 49810 }, { "epoch": 0.7646381705164608, "grad_norm": 0.451587438583374, "learning_rate": 1.5524634837735067e-05, "loss": 0.0415, "step": 49820 }, { "epoch": 0.7647916506791497, "grad_norm": 0.650070309638977, "learning_rate": 1.5522401815228962e-05, "loss": 0.0414, "step": 49830 }, { "epoch": 0.7649451308418387, "grad_norm": 0.41442519426345825, "learning_rate": 1.552016839645647e-05, "loss": 0.0395, "step": 49840 }, { "epoch": 0.7650986110045277, "grad_norm": 0.45607632398605347, "learning_rate": 1.551793458157785e-05, "loss": 0.0487, "step": 49850 }, { "epoch": 0.7652520911672166, "grad_norm": 0.43185558915138245, "learning_rate": 1.551570037075339e-05, "loss": 0.0341, "step": 49860 }, { "epoch": 0.7654055713299056, "grad_norm": 0.5090250372886658, "learning_rate": 1.551346576414341e-05, "loss": 0.0507, "step": 49870 }, { "epoch": 0.7655590514925946, "grad_norm": 0.5103257894515991, "learning_rate": 1.5511230761908264e-05, "loss": 0.0356, "step": 49880 }, { "epoch": 0.7657125316552835, "grad_norm": 0.39661189913749695, "learning_rate": 1.5508995364208317e-05, "loss": 0.0364, "step": 49890 }, { "epoch": 0.7658660118179725, "grad_norm": 0.42640331387519836, "learning_rate": 1.5506759571203976e-05, "loss": 0.0355, "step": 49900 }, { "epoch": 0.7660194919806615, "grad_norm": 0.37264522910118103, "learning_rate": 1.550452338305567e-05, "loss": 0.0516, "step": 49910 }, { "epoch": 0.7661729721433505, "grad_norm": 0.4243369698524475, "learning_rate": 1.550228679992387e-05, "loss": 0.042, "step": 49920 }, { "epoch": 0.7663264523060395, "grad_norm": 0.3835618495941162, "learning_rate": 1.5500049821969054e-05, "loss": 0.0376, "step": 49930 }, { "epoch": 0.7664799324687284, "grad_norm": 0.41634035110473633, "learning_rate": 1.549781244935174e-05, "loss": 0.0361, "step": 49940 }, { "epoch": 0.7666334126314174, "grad_norm": 0.6088942885398865, "learning_rate": 1.549557468223248e-05, "loss": 0.0423, "step": 49950 }, { "epoch": 0.7667868927941064, "grad_norm": 0.5453212261199951, "learning_rate": 1.5493336520771843e-05, "loss": 0.0408, "step": 49960 }, { "epoch": 0.7669403729567953, "grad_norm": 0.49399131536483765, "learning_rate": 1.5491097965130428e-05, "loss": 0.0467, "step": 49970 }, { "epoch": 0.7670938531194843, "grad_norm": 0.623124361038208, "learning_rate": 1.548885901546887e-05, "loss": 0.0361, "step": 49980 }, { "epoch": 0.7672473332821733, "grad_norm": 0.4644119441509247, "learning_rate": 1.5486619671947822e-05, "loss": 0.0394, "step": 49990 }, { "epoch": 0.7674008134448622, "grad_norm": 0.5317888259887695, "learning_rate": 1.548437993472798e-05, "loss": 0.0423, "step": 50000 }, { "epoch": 0.7675542936075512, "grad_norm": 0.26870572566986084, "learning_rate": 1.5482139803970052e-05, "loss": 0.0371, "step": 50010 }, { "epoch": 0.7677077737702402, "grad_norm": 0.3625873625278473, "learning_rate": 1.5479899279834777e-05, "loss": 0.035, "step": 50020 }, { "epoch": 0.7678612539329291, "grad_norm": 0.3351014256477356, "learning_rate": 1.547765836248294e-05, "loss": 0.0399, "step": 50030 }, { "epoch": 0.7680147340956182, "grad_norm": 0.33548033237457275, "learning_rate": 1.5475417052075325e-05, "loss": 0.037, "step": 50040 }, { "epoch": 0.7681682142583072, "grad_norm": 0.41695478558540344, "learning_rate": 1.5473175348772773e-05, "loss": 0.037, "step": 50050 }, { "epoch": 0.7683216944209961, "grad_norm": 0.23241670429706573, "learning_rate": 1.5470933252736134e-05, "loss": 0.0204, "step": 50060 }, { "epoch": 0.7684751745836851, "grad_norm": 0.5014275908470154, "learning_rate": 1.546869076412629e-05, "loss": 0.0316, "step": 50070 }, { "epoch": 0.768628654746374, "grad_norm": 0.4313358664512634, "learning_rate": 1.5466447883104157e-05, "loss": 0.0388, "step": 50080 }, { "epoch": 0.768782134909063, "grad_norm": 0.40174147486686707, "learning_rate": 1.5464204609830675e-05, "loss": 0.0317, "step": 50090 }, { "epoch": 0.768935615071752, "grad_norm": 0.5901543498039246, "learning_rate": 1.546196094446681e-05, "loss": 0.0369, "step": 50100 }, { "epoch": 0.7690890952344409, "grad_norm": 0.2527622878551483, "learning_rate": 1.5459716887173565e-05, "loss": 0.0381, "step": 50110 }, { "epoch": 0.7692425753971299, "grad_norm": 0.3218749761581421, "learning_rate": 1.545747243811196e-05, "loss": 0.0338, "step": 50120 }, { "epoch": 0.7693960555598189, "grad_norm": 0.26354867219924927, "learning_rate": 1.545522759744305e-05, "loss": 0.0379, "step": 50130 }, { "epoch": 0.7695495357225078, "grad_norm": 0.39617207646369934, "learning_rate": 1.5452982365327916e-05, "loss": 0.0443, "step": 50140 }, { "epoch": 0.7697030158851969, "grad_norm": 0.46734488010406494, "learning_rate": 1.545073674192766e-05, "loss": 0.0342, "step": 50150 }, { "epoch": 0.7698564960478859, "grad_norm": 0.4373033940792084, "learning_rate": 1.5448490727403434e-05, "loss": 0.0363, "step": 50160 }, { "epoch": 0.7700099762105748, "grad_norm": 0.5383514761924744, "learning_rate": 1.544624432191639e-05, "loss": 0.0375, "step": 50170 }, { "epoch": 0.7701634563732638, "grad_norm": 0.32941243052482605, "learning_rate": 1.544399752562773e-05, "loss": 0.0363, "step": 50180 }, { "epoch": 0.7703169365359527, "grad_norm": 0.4664914608001709, "learning_rate": 1.5441750338698672e-05, "loss": 0.0335, "step": 50190 }, { "epoch": 0.7704704166986417, "grad_norm": 0.2553960382938385, "learning_rate": 1.543950276129046e-05, "loss": 0.0307, "step": 50200 }, { "epoch": 0.7706238968613307, "grad_norm": 0.45815691351890564, "learning_rate": 1.543725479356439e-05, "loss": 0.0434, "step": 50210 }, { "epoch": 0.7707773770240196, "grad_norm": 0.5186212658882141, "learning_rate": 1.5435006435681744e-05, "loss": 0.038, "step": 50220 }, { "epoch": 0.7709308571867086, "grad_norm": 0.4435155391693115, "learning_rate": 1.543275768780387e-05, "loss": 0.0371, "step": 50230 }, { "epoch": 0.7710843373493976, "grad_norm": 0.26607298851013184, "learning_rate": 1.5430508550092123e-05, "loss": 0.0289, "step": 50240 }, { "epoch": 0.7712378175120865, "grad_norm": 0.3823274075984955, "learning_rate": 1.54282590227079e-05, "loss": 0.0397, "step": 50250 }, { "epoch": 0.7713912976747755, "grad_norm": 0.24855643510818481, "learning_rate": 1.542600910581261e-05, "loss": 0.0342, "step": 50260 }, { "epoch": 0.7715447778374646, "grad_norm": 0.32403308153152466, "learning_rate": 1.5423758799567706e-05, "loss": 0.0388, "step": 50270 }, { "epoch": 0.7716982580001535, "grad_norm": 0.3764135539531708, "learning_rate": 1.5421508104134654e-05, "loss": 0.0282, "step": 50280 }, { "epoch": 0.7718517381628425, "grad_norm": 0.4688526391983032, "learning_rate": 1.5419257019674958e-05, "loss": 0.0397, "step": 50290 }, { "epoch": 0.7720052183255314, "grad_norm": 0.3905620574951172, "learning_rate": 1.541700554635015e-05, "loss": 0.0388, "step": 50300 }, { "epoch": 0.7721586984882204, "grad_norm": 0.3836398124694824, "learning_rate": 1.541475368432179e-05, "loss": 0.0369, "step": 50310 }, { "epoch": 0.7723121786509094, "grad_norm": 0.4615512192249298, "learning_rate": 1.5412501433751452e-05, "loss": 0.045, "step": 50320 }, { "epoch": 0.7724656588135983, "grad_norm": 0.49039846658706665, "learning_rate": 1.5410248794800757e-05, "loss": 0.0421, "step": 50330 }, { "epoch": 0.7726191389762873, "grad_norm": 0.3244301378726959, "learning_rate": 1.5407995767631345e-05, "loss": 0.0359, "step": 50340 }, { "epoch": 0.7727726191389763, "grad_norm": 0.46594709157943726, "learning_rate": 1.540574235240488e-05, "loss": 0.0301, "step": 50350 }, { "epoch": 0.7729260993016652, "grad_norm": 0.36455896496772766, "learning_rate": 1.5403488549283065e-05, "loss": 0.0285, "step": 50360 }, { "epoch": 0.7730795794643542, "grad_norm": 0.40056487917900085, "learning_rate": 1.540123435842762e-05, "loss": 0.0307, "step": 50370 }, { "epoch": 0.7732330596270433, "grad_norm": 0.48066678643226624, "learning_rate": 1.53989797800003e-05, "loss": 0.0405, "step": 50380 }, { "epoch": 0.7733865397897322, "grad_norm": 0.309856116771698, "learning_rate": 1.5396724814162882e-05, "loss": 0.0389, "step": 50390 }, { "epoch": 0.7735400199524212, "grad_norm": 1.0468860864639282, "learning_rate": 1.5394469461077172e-05, "loss": 0.0369, "step": 50400 }, { "epoch": 0.7736935001151101, "grad_norm": 0.4034149944782257, "learning_rate": 1.5392213720905015e-05, "loss": 0.031, "step": 50410 }, { "epoch": 0.7738469802777991, "grad_norm": 0.28308406472206116, "learning_rate": 1.5389957593808262e-05, "loss": 0.0313, "step": 50420 }, { "epoch": 0.7740004604404881, "grad_norm": 0.5846574902534485, "learning_rate": 1.5387701079948812e-05, "loss": 0.0318, "step": 50430 }, { "epoch": 0.774153940603177, "grad_norm": 0.5287496447563171, "learning_rate": 1.538544417948858e-05, "loss": 0.0399, "step": 50440 }, { "epoch": 0.774307420765866, "grad_norm": 0.4149992763996124, "learning_rate": 1.5383186892589514e-05, "loss": 0.0361, "step": 50450 }, { "epoch": 0.774460900928555, "grad_norm": 0.3486495316028595, "learning_rate": 1.538092921941359e-05, "loss": 0.0394, "step": 50460 }, { "epoch": 0.7746143810912439, "grad_norm": 0.49230024218559265, "learning_rate": 1.5378671160122806e-05, "loss": 0.0409, "step": 50470 }, { "epoch": 0.7747678612539329, "grad_norm": 0.522205650806427, "learning_rate": 1.5376412714879196e-05, "loss": 0.034, "step": 50480 }, { "epoch": 0.774921341416622, "grad_norm": 0.715796172618866, "learning_rate": 1.5374153883844814e-05, "loss": 0.0343, "step": 50490 }, { "epoch": 0.7750748215793108, "grad_norm": 0.38663947582244873, "learning_rate": 1.5371894667181745e-05, "loss": 0.0442, "step": 50500 }, { "epoch": 0.7752283017419999, "grad_norm": 0.4416380822658539, "learning_rate": 1.5369635065052105e-05, "loss": 0.0356, "step": 50510 }, { "epoch": 0.7753817819046889, "grad_norm": 0.4434192180633545, "learning_rate": 1.536737507761803e-05, "loss": 0.0431, "step": 50520 }, { "epoch": 0.7755352620673778, "grad_norm": 0.33964699506759644, "learning_rate": 1.5365114705041695e-05, "loss": 0.036, "step": 50530 }, { "epoch": 0.7756887422300668, "grad_norm": 0.4002453684806824, "learning_rate": 1.5362853947485288e-05, "loss": 0.0425, "step": 50540 }, { "epoch": 0.7758422223927557, "grad_norm": 0.4493350088596344, "learning_rate": 1.5360592805111035e-05, "loss": 0.0427, "step": 50550 }, { "epoch": 0.7759957025554447, "grad_norm": 0.2808111906051636, "learning_rate": 1.5358331278081187e-05, "loss": 0.0343, "step": 50560 }, { "epoch": 0.7761491827181337, "grad_norm": 0.4204861521720886, "learning_rate": 1.5356069366558023e-05, "loss": 0.0388, "step": 50570 }, { "epoch": 0.7763026628808226, "grad_norm": 0.4240487217903137, "learning_rate": 1.5353807070703854e-05, "loss": 0.0392, "step": 50580 }, { "epoch": 0.7764561430435116, "grad_norm": 0.5226961374282837, "learning_rate": 1.5351544390681005e-05, "loss": 0.0374, "step": 50590 }, { "epoch": 0.7766096232062006, "grad_norm": 0.2928394377231598, "learning_rate": 1.534928132665184e-05, "loss": 0.0361, "step": 50600 }, { "epoch": 0.7767631033688895, "grad_norm": 0.39819616079330444, "learning_rate": 1.5347017878778746e-05, "loss": 0.0398, "step": 50610 }, { "epoch": 0.7769165835315786, "grad_norm": 0.29668062925338745, "learning_rate": 1.5344754047224147e-05, "loss": 0.0325, "step": 50620 }, { "epoch": 0.7770700636942676, "grad_norm": 0.46305081248283386, "learning_rate": 1.534248983215048e-05, "loss": 0.0364, "step": 50630 }, { "epoch": 0.7772235438569565, "grad_norm": 0.48422613739967346, "learning_rate": 1.5340225233720216e-05, "loss": 0.0425, "step": 50640 }, { "epoch": 0.7773770240196455, "grad_norm": 0.35570451617240906, "learning_rate": 1.5337960252095862e-05, "loss": 0.0336, "step": 50650 }, { "epoch": 0.7775305041823344, "grad_norm": 0.29535654187202454, "learning_rate": 1.5335694887439932e-05, "loss": 0.0302, "step": 50660 }, { "epoch": 0.7776839843450234, "grad_norm": 0.47521016001701355, "learning_rate": 1.533342913991499e-05, "loss": 0.038, "step": 50670 }, { "epoch": 0.7778374645077124, "grad_norm": 0.5544255971908569, "learning_rate": 1.5331163009683615e-05, "loss": 0.0405, "step": 50680 }, { "epoch": 0.7779909446704013, "grad_norm": 0.4226493239402771, "learning_rate": 1.5328896496908413e-05, "loss": 0.0412, "step": 50690 }, { "epoch": 0.7781444248330903, "grad_norm": 0.34124746918678284, "learning_rate": 1.5326629601752022e-05, "loss": 0.0308, "step": 50700 }, { "epoch": 0.7782979049957793, "grad_norm": 0.4936629831790924, "learning_rate": 1.5324362324377106e-05, "loss": 0.0379, "step": 50710 }, { "epoch": 0.7784513851584682, "grad_norm": 0.3377097547054291, "learning_rate": 1.5322094664946353e-05, "loss": 0.0249, "step": 50720 }, { "epoch": 0.7786048653211572, "grad_norm": 0.44668224453926086, "learning_rate": 1.5319826623622488e-05, "loss": 0.0388, "step": 50730 }, { "epoch": 0.7787583454838463, "grad_norm": 0.43183255195617676, "learning_rate": 1.5317558200568253e-05, "loss": 0.0343, "step": 50740 }, { "epoch": 0.7789118256465352, "grad_norm": 0.4178345501422882, "learning_rate": 1.531528939594642e-05, "loss": 0.0425, "step": 50750 }, { "epoch": 0.7790653058092242, "grad_norm": 0.40911078453063965, "learning_rate": 1.531302020991979e-05, "loss": 0.0385, "step": 50760 }, { "epoch": 0.7792187859719131, "grad_norm": 0.24568568170070648, "learning_rate": 1.5310750642651194e-05, "loss": 0.0358, "step": 50770 }, { "epoch": 0.7793722661346021, "grad_norm": 0.5292656421661377, "learning_rate": 1.530848069430349e-05, "loss": 0.0362, "step": 50780 }, { "epoch": 0.7795257462972911, "grad_norm": 0.3254563510417938, "learning_rate": 1.530621036503955e-05, "loss": 0.0304, "step": 50790 }, { "epoch": 0.77967922645998, "grad_norm": 0.3470207452774048, "learning_rate": 1.5303939655022295e-05, "loss": 0.034, "step": 50800 }, { "epoch": 0.779832706622669, "grad_norm": 0.38152962923049927, "learning_rate": 1.5301668564414657e-05, "loss": 0.0311, "step": 50810 }, { "epoch": 0.779986186785358, "grad_norm": 0.2914086580276489, "learning_rate": 1.52993970933796e-05, "loss": 0.0329, "step": 50820 }, { "epoch": 0.7801396669480469, "grad_norm": 0.4297751486301422, "learning_rate": 1.529712524208012e-05, "loss": 0.0312, "step": 50830 }, { "epoch": 0.7802931471107359, "grad_norm": 0.5761513113975525, "learning_rate": 1.5294853010679235e-05, "loss": 0.0397, "step": 50840 }, { "epoch": 0.780446627273425, "grad_norm": 0.40127694606781006, "learning_rate": 1.529258039933999e-05, "loss": 0.0361, "step": 50850 }, { "epoch": 0.7806001074361139, "grad_norm": 0.619202196598053, "learning_rate": 1.5290307408225463e-05, "loss": 0.0404, "step": 50860 }, { "epoch": 0.7807535875988029, "grad_norm": 0.4490020275115967, "learning_rate": 1.5288034037498748e-05, "loss": 0.0343, "step": 50870 }, { "epoch": 0.7809070677614919, "grad_norm": 0.3578501045703888, "learning_rate": 1.528576028732298e-05, "loss": 0.0457, "step": 50880 }, { "epoch": 0.7810605479241808, "grad_norm": 0.55464106798172, "learning_rate": 1.528348615786131e-05, "loss": 0.0435, "step": 50890 }, { "epoch": 0.7812140280868698, "grad_norm": 0.48198917508125305, "learning_rate": 1.5281211649276923e-05, "loss": 0.0418, "step": 50900 }, { "epoch": 0.7813675082495587, "grad_norm": 0.5252379775047302, "learning_rate": 1.527893676173303e-05, "loss": 0.0457, "step": 50910 }, { "epoch": 0.7815209884122477, "grad_norm": 0.5038540959358215, "learning_rate": 1.5276661495392866e-05, "loss": 0.0358, "step": 50920 }, { "epoch": 0.7816744685749367, "grad_norm": 0.3119997978210449, "learning_rate": 1.5274385850419696e-05, "loss": 0.0367, "step": 50930 }, { "epoch": 0.7818279487376256, "grad_norm": 0.352620005607605, "learning_rate": 1.5272109826976817e-05, "loss": 0.0481, "step": 50940 }, { "epoch": 0.7819814289003146, "grad_norm": 0.3705753684043884, "learning_rate": 1.526983342522754e-05, "loss": 0.0321, "step": 50950 }, { "epoch": 0.7821349090630036, "grad_norm": 0.3974072337150574, "learning_rate": 1.5267556645335208e-05, "loss": 0.0327, "step": 50960 }, { "epoch": 0.7822883892256925, "grad_norm": 0.3602399230003357, "learning_rate": 1.5265279487463206e-05, "loss": 0.0308, "step": 50970 }, { "epoch": 0.7824418693883816, "grad_norm": 0.6578912138938904, "learning_rate": 1.5263001951774926e-05, "loss": 0.0359, "step": 50980 }, { "epoch": 0.7825953495510706, "grad_norm": 0.33345261216163635, "learning_rate": 1.5260724038433795e-05, "loss": 0.037, "step": 50990 }, { "epoch": 0.7827488297137595, "grad_norm": 0.46572145819664, "learning_rate": 1.5258445747603272e-05, "loss": 0.0308, "step": 51000 }, { "epoch": 0.7829023098764485, "grad_norm": 0.4529598653316498, "learning_rate": 1.5256167079446833e-05, "loss": 0.0431, "step": 51010 }, { "epoch": 0.7830557900391374, "grad_norm": 0.44196921586990356, "learning_rate": 1.5253888034127991e-05, "loss": 0.0301, "step": 51020 }, { "epoch": 0.7832092702018264, "grad_norm": 0.3884883522987366, "learning_rate": 1.525160861181028e-05, "loss": 0.0393, "step": 51030 }, { "epoch": 0.7833627503645154, "grad_norm": 0.5601595044136047, "learning_rate": 1.524932881265726e-05, "loss": 0.0335, "step": 51040 }, { "epoch": 0.7835162305272043, "grad_norm": 0.44320419430732727, "learning_rate": 1.5247048636832527e-05, "loss": 0.0322, "step": 51050 }, { "epoch": 0.7836697106898933, "grad_norm": 0.34313520789146423, "learning_rate": 1.5244768084499691e-05, "loss": 0.0296, "step": 51060 }, { "epoch": 0.7838231908525823, "grad_norm": 0.44977498054504395, "learning_rate": 1.5242487155822398e-05, "loss": 0.0338, "step": 51070 }, { "epoch": 0.7839766710152712, "grad_norm": 0.5092975497245789, "learning_rate": 1.524020585096432e-05, "loss": 0.0386, "step": 51080 }, { "epoch": 0.7841301511779603, "grad_norm": 0.47194942831993103, "learning_rate": 1.5237924170089153e-05, "loss": 0.0359, "step": 51090 }, { "epoch": 0.7842836313406493, "grad_norm": 0.3052125573158264, "learning_rate": 1.5235642113360625e-05, "loss": 0.0317, "step": 51100 }, { "epoch": 0.7844371115033382, "grad_norm": 0.4333975613117218, "learning_rate": 1.5233359680942483e-05, "loss": 0.0376, "step": 51110 }, { "epoch": 0.7845905916660272, "grad_norm": 0.34400415420532227, "learning_rate": 1.5231076872998507e-05, "loss": 0.035, "step": 51120 }, { "epoch": 0.7847440718287161, "grad_norm": 0.4625355303287506, "learning_rate": 1.5228793689692505e-05, "loss": 0.0385, "step": 51130 }, { "epoch": 0.7848975519914051, "grad_norm": 0.3944125175476074, "learning_rate": 1.5226510131188308e-05, "loss": 0.0378, "step": 51140 }, { "epoch": 0.7850510321540941, "grad_norm": 0.2874702215194702, "learning_rate": 1.5224226197649773e-05, "loss": 0.0403, "step": 51150 }, { "epoch": 0.785204512316783, "grad_norm": 0.22936367988586426, "learning_rate": 1.522194188924079e-05, "loss": 0.0326, "step": 51160 }, { "epoch": 0.785357992479472, "grad_norm": 0.3368948698043823, "learning_rate": 1.521965720612527e-05, "loss": 0.0264, "step": 51170 }, { "epoch": 0.785511472642161, "grad_norm": 0.4390360116958618, "learning_rate": 1.5217372148467153e-05, "loss": 0.0437, "step": 51180 }, { "epoch": 0.7856649528048499, "grad_norm": 0.36266422271728516, "learning_rate": 1.5215086716430407e-05, "loss": 0.0378, "step": 51190 }, { "epoch": 0.785818432967539, "grad_norm": 0.5055236220359802, "learning_rate": 1.5212800910179026e-05, "loss": 0.036, "step": 51200 }, { "epoch": 0.785971913130228, "grad_norm": 0.4888516962528229, "learning_rate": 1.5210514729877031e-05, "loss": 0.0333, "step": 51210 }, { "epoch": 0.7861253932929169, "grad_norm": 0.461039274930954, "learning_rate": 1.5208228175688468e-05, "loss": 0.0395, "step": 51220 }, { "epoch": 0.7862788734556059, "grad_norm": 0.43818578124046326, "learning_rate": 1.5205941247777413e-05, "loss": 0.0462, "step": 51230 }, { "epoch": 0.7864323536182949, "grad_norm": 0.3864065408706665, "learning_rate": 1.5203653946307966e-05, "loss": 0.0379, "step": 51240 }, { "epoch": 0.7865858337809838, "grad_norm": 0.46976232528686523, "learning_rate": 1.5201366271444258e-05, "loss": 0.0327, "step": 51250 }, { "epoch": 0.7867393139436728, "grad_norm": 0.35125279426574707, "learning_rate": 1.5199078223350436e-05, "loss": 0.0419, "step": 51260 }, { "epoch": 0.7868927941063617, "grad_norm": 0.3223479986190796, "learning_rate": 1.519678980219069e-05, "loss": 0.033, "step": 51270 }, { "epoch": 0.7870462742690507, "grad_norm": 0.4569989740848541, "learning_rate": 1.5194501008129224e-05, "loss": 0.0431, "step": 51280 }, { "epoch": 0.7871997544317397, "grad_norm": 0.3123166561126709, "learning_rate": 1.5192211841330271e-05, "loss": 0.0344, "step": 51290 }, { "epoch": 0.7873532345944286, "grad_norm": 0.30436384677886963, "learning_rate": 1.51899223019581e-05, "loss": 0.0351, "step": 51300 }, { "epoch": 0.7875067147571176, "grad_norm": 0.3304674029350281, "learning_rate": 1.5187632390176994e-05, "loss": 0.0424, "step": 51310 }, { "epoch": 0.7876601949198067, "grad_norm": 0.316853404045105, "learning_rate": 1.518534210615127e-05, "loss": 0.0381, "step": 51320 }, { "epoch": 0.7878136750824956, "grad_norm": 0.4109441637992859, "learning_rate": 1.5183051450045274e-05, "loss": 0.0449, "step": 51330 }, { "epoch": 0.7879671552451846, "grad_norm": 0.5680601000785828, "learning_rate": 1.5180760422023366e-05, "loss": 0.0355, "step": 51340 }, { "epoch": 0.7881206354078736, "grad_norm": 0.252139687538147, "learning_rate": 1.5178469022249943e-05, "loss": 0.0287, "step": 51350 }, { "epoch": 0.7882741155705625, "grad_norm": 0.36856886744499207, "learning_rate": 1.5176177250889437e-05, "loss": 0.0326, "step": 51360 }, { "epoch": 0.7884275957332515, "grad_norm": 0.3928258419036865, "learning_rate": 1.5173885108106286e-05, "loss": 0.0407, "step": 51370 }, { "epoch": 0.7885810758959404, "grad_norm": 0.3872471749782562, "learning_rate": 1.5171592594064972e-05, "loss": 0.0313, "step": 51380 }, { "epoch": 0.7887345560586294, "grad_norm": 0.42802751064300537, "learning_rate": 1.5169299708929988e-05, "loss": 0.0338, "step": 51390 }, { "epoch": 0.7888880362213184, "grad_norm": 0.33289045095443726, "learning_rate": 1.5167006452865873e-05, "loss": 0.0337, "step": 51400 }, { "epoch": 0.7890415163840073, "grad_norm": 0.7255563139915466, "learning_rate": 1.5164712826037181e-05, "loss": 0.031, "step": 51410 }, { "epoch": 0.7891949965466963, "grad_norm": 0.422040194272995, "learning_rate": 1.5162418828608485e-05, "loss": 0.0314, "step": 51420 }, { "epoch": 0.7893484767093853, "grad_norm": 0.5626665353775024, "learning_rate": 1.5160124460744401e-05, "loss": 0.0383, "step": 51430 }, { "epoch": 0.7895019568720743, "grad_norm": 0.37505170702934265, "learning_rate": 1.5157829722609562e-05, "loss": 0.0334, "step": 51440 }, { "epoch": 0.7896554370347633, "grad_norm": 0.3731289505958557, "learning_rate": 1.5155534614368632e-05, "loss": 0.032, "step": 51450 }, { "epoch": 0.7898089171974523, "grad_norm": 0.5213406085968018, "learning_rate": 1.5153239136186297e-05, "loss": 0.0378, "step": 51460 }, { "epoch": 0.7899623973601412, "grad_norm": 0.3170721232891083, "learning_rate": 1.5150943288227269e-05, "loss": 0.0316, "step": 51470 }, { "epoch": 0.7901158775228302, "grad_norm": 0.31087741255760193, "learning_rate": 1.5148647070656295e-05, "loss": 0.0409, "step": 51480 }, { "epoch": 0.7902693576855191, "grad_norm": 0.5212446451187134, "learning_rate": 1.5146350483638141e-05, "loss": 0.0455, "step": 51490 }, { "epoch": 0.7904228378482081, "grad_norm": 0.5858598351478577, "learning_rate": 1.51440535273376e-05, "loss": 0.0331, "step": 51500 }, { "epoch": 0.7905763180108971, "grad_norm": 0.3221515715122223, "learning_rate": 1.5141756201919491e-05, "loss": 0.0318, "step": 51510 }, { "epoch": 0.790729798173586, "grad_norm": 0.39477643370628357, "learning_rate": 1.5139458507548664e-05, "loss": 0.04, "step": 51520 }, { "epoch": 0.790883278336275, "grad_norm": 0.5153036713600159, "learning_rate": 1.5137160444389988e-05, "loss": 0.0389, "step": 51530 }, { "epoch": 0.791036758498964, "grad_norm": 0.40450790524482727, "learning_rate": 1.5134862012608374e-05, "loss": 0.0357, "step": 51540 }, { "epoch": 0.791190238661653, "grad_norm": 0.3085100054740906, "learning_rate": 1.5132563212368738e-05, "loss": 0.0379, "step": 51550 }, { "epoch": 0.791343718824342, "grad_norm": 0.4590626657009125, "learning_rate": 1.513026404383604e-05, "loss": 0.0334, "step": 51560 }, { "epoch": 0.791497198987031, "grad_norm": 0.33997052907943726, "learning_rate": 1.5127964507175253e-05, "loss": 0.0331, "step": 51570 }, { "epoch": 0.7916506791497199, "grad_norm": 0.4197515845298767, "learning_rate": 1.5125664602551386e-05, "loss": 0.0385, "step": 51580 }, { "epoch": 0.7918041593124089, "grad_norm": 0.41074588894844055, "learning_rate": 1.5123364330129475e-05, "loss": 0.0303, "step": 51590 }, { "epoch": 0.7919576394750979, "grad_norm": 0.3624779284000397, "learning_rate": 1.5121063690074573e-05, "loss": 0.034, "step": 51600 }, { "epoch": 0.7921111196377868, "grad_norm": 0.39315569400787354, "learning_rate": 1.5118762682551769e-05, "loss": 0.0418, "step": 51610 }, { "epoch": 0.7922645998004758, "grad_norm": 0.3484378457069397, "learning_rate": 1.5116461307726172e-05, "loss": 0.0413, "step": 51620 }, { "epoch": 0.7924180799631647, "grad_norm": 0.3815751075744629, "learning_rate": 1.5114159565762922e-05, "loss": 0.0422, "step": 51630 }, { "epoch": 0.7925715601258537, "grad_norm": 0.39176610112190247, "learning_rate": 1.5111857456827182e-05, "loss": 0.0417, "step": 51640 }, { "epoch": 0.7927250402885427, "grad_norm": 0.4258411228656769, "learning_rate": 1.5109554981084142e-05, "loss": 0.0379, "step": 51650 }, { "epoch": 0.7928785204512316, "grad_norm": 0.5458104014396667, "learning_rate": 1.510725213869902e-05, "loss": 0.0421, "step": 51660 }, { "epoch": 0.7930320006139207, "grad_norm": 0.4484604299068451, "learning_rate": 1.5104948929837058e-05, "loss": 0.0338, "step": 51670 }, { "epoch": 0.7931854807766097, "grad_norm": 0.4073517620563507, "learning_rate": 1.5102645354663528e-05, "loss": 0.0292, "step": 51680 }, { "epoch": 0.7933389609392986, "grad_norm": 0.3786752223968506, "learning_rate": 1.5100341413343722e-05, "loss": 0.037, "step": 51690 }, { "epoch": 0.7934924411019876, "grad_norm": 0.3982575535774231, "learning_rate": 1.5098037106042965e-05, "loss": 0.0307, "step": 51700 }, { "epoch": 0.7936459212646766, "grad_norm": 0.41274750232696533, "learning_rate": 1.5095732432926599e-05, "loss": 0.0349, "step": 51710 }, { "epoch": 0.7937994014273655, "grad_norm": 0.35575711727142334, "learning_rate": 1.5093427394160012e-05, "loss": 0.044, "step": 51720 }, { "epoch": 0.7939528815900545, "grad_norm": 0.4233075678348541, "learning_rate": 1.509112198990859e-05, "loss": 0.0362, "step": 51730 }, { "epoch": 0.7941063617527434, "grad_norm": 0.46843579411506653, "learning_rate": 1.5088816220337768e-05, "loss": 0.0415, "step": 51740 }, { "epoch": 0.7942598419154324, "grad_norm": 0.5099686980247498, "learning_rate": 1.5086510085612997e-05, "loss": 0.043, "step": 51750 }, { "epoch": 0.7944133220781214, "grad_norm": 0.5957148671150208, "learning_rate": 1.5084203585899757e-05, "loss": 0.0428, "step": 51760 }, { "epoch": 0.7945668022408103, "grad_norm": 0.33569276332855225, "learning_rate": 1.5081896721363554e-05, "loss": 0.0349, "step": 51770 }, { "epoch": 0.7947202824034993, "grad_norm": 0.5691990256309509, "learning_rate": 1.507958949216992e-05, "loss": 0.0423, "step": 51780 }, { "epoch": 0.7948737625661884, "grad_norm": 0.4453582763671875, "learning_rate": 1.5077281898484409e-05, "loss": 0.0387, "step": 51790 }, { "epoch": 0.7950272427288773, "grad_norm": 0.5317229628562927, "learning_rate": 1.5074973940472614e-05, "loss": 0.0336, "step": 51800 }, { "epoch": 0.7951807228915663, "grad_norm": 0.44631317257881165, "learning_rate": 1.5072665618300133e-05, "loss": 0.0315, "step": 51810 }, { "epoch": 0.7953342030542553, "grad_norm": 0.725145161151886, "learning_rate": 1.5070356932132613e-05, "loss": 0.0398, "step": 51820 }, { "epoch": 0.7954876832169442, "grad_norm": 0.3415868878364563, "learning_rate": 1.506804788213571e-05, "loss": 0.0324, "step": 51830 }, { "epoch": 0.7956411633796332, "grad_norm": 0.3557879626750946, "learning_rate": 1.5065738468475113e-05, "loss": 0.0317, "step": 51840 }, { "epoch": 0.7957946435423221, "grad_norm": 0.3573818802833557, "learning_rate": 1.5063428691316543e-05, "loss": 0.0349, "step": 51850 }, { "epoch": 0.7959481237050111, "grad_norm": 0.32966509461402893, "learning_rate": 1.5061118550825735e-05, "loss": 0.0351, "step": 51860 }, { "epoch": 0.7961016038677001, "grad_norm": 0.4271329343318939, "learning_rate": 1.5058808047168453e-05, "loss": 0.0367, "step": 51870 }, { "epoch": 0.796255084030389, "grad_norm": 0.5017150044441223, "learning_rate": 1.50564971805105e-05, "loss": 0.036, "step": 51880 }, { "epoch": 0.796408564193078, "grad_norm": 0.4069422781467438, "learning_rate": 1.5054185951017686e-05, "loss": 0.0358, "step": 51890 }, { "epoch": 0.796562044355767, "grad_norm": 0.6548840999603271, "learning_rate": 1.5051874358855858e-05, "loss": 0.0476, "step": 51900 }, { "epoch": 0.796715524518456, "grad_norm": 0.2947961091995239, "learning_rate": 1.5049562404190887e-05, "loss": 0.0363, "step": 51910 }, { "epoch": 0.796869004681145, "grad_norm": 0.417265385389328, "learning_rate": 1.5047250087188671e-05, "loss": 0.0414, "step": 51920 }, { "epoch": 0.797022484843834, "grad_norm": 0.3693264424800873, "learning_rate": 1.5044937408015137e-05, "loss": 0.0384, "step": 51930 }, { "epoch": 0.7971759650065229, "grad_norm": 0.3473675549030304, "learning_rate": 1.5042624366836227e-05, "loss": 0.0361, "step": 51940 }, { "epoch": 0.7973294451692119, "grad_norm": 0.5488151907920837, "learning_rate": 1.504031096381792e-05, "loss": 0.0383, "step": 51950 }, { "epoch": 0.7974829253319009, "grad_norm": 0.2711021602153778, "learning_rate": 1.5037997199126215e-05, "loss": 0.0369, "step": 51960 }, { "epoch": 0.7976364054945898, "grad_norm": 0.4060070514678955, "learning_rate": 1.5035683072927137e-05, "loss": 0.0424, "step": 51970 }, { "epoch": 0.7977898856572788, "grad_norm": 0.31167420744895935, "learning_rate": 1.5033368585386745e-05, "loss": 0.0286, "step": 51980 }, { "epoch": 0.7979433658199677, "grad_norm": 0.6262791156768799, "learning_rate": 1.5031053736671113e-05, "loss": 0.0357, "step": 51990 }, { "epoch": 0.7980968459826567, "grad_norm": 0.48657387495040894, "learning_rate": 1.5028738526946347e-05, "loss": 0.0334, "step": 52000 }, { "epoch": 0.7982503261453457, "grad_norm": 0.23643995821475983, "learning_rate": 1.5026422956378578e-05, "loss": 0.0395, "step": 52010 }, { "epoch": 0.7984038063080346, "grad_norm": 0.41090306639671326, "learning_rate": 1.5024107025133958e-05, "loss": 0.0311, "step": 52020 }, { "epoch": 0.7985572864707237, "grad_norm": 0.2594304382801056, "learning_rate": 1.5021790733378678e-05, "loss": 0.0271, "step": 52030 }, { "epoch": 0.7987107666334127, "grad_norm": 0.3826199769973755, "learning_rate": 1.5019474081278942e-05, "loss": 0.0396, "step": 52040 }, { "epoch": 0.7988642467961016, "grad_norm": 0.3778764009475708, "learning_rate": 1.5017157069000981e-05, "loss": 0.0424, "step": 52050 }, { "epoch": 0.7990177269587906, "grad_norm": 0.5494931936264038, "learning_rate": 1.501483969671106e-05, "loss": 0.0365, "step": 52060 }, { "epoch": 0.7991712071214796, "grad_norm": 0.6136178374290466, "learning_rate": 1.501252196457546e-05, "loss": 0.0425, "step": 52070 }, { "epoch": 0.7993246872841685, "grad_norm": 0.3184964656829834, "learning_rate": 1.5010203872760495e-05, "loss": 0.0364, "step": 52080 }, { "epoch": 0.7994781674468575, "grad_norm": 0.30311810970306396, "learning_rate": 1.5007885421432501e-05, "loss": 0.0473, "step": 52090 }, { "epoch": 0.7996316476095464, "grad_norm": 0.2678956091403961, "learning_rate": 1.5005566610757844e-05, "loss": 0.0389, "step": 52100 }, { "epoch": 0.7997851277722354, "grad_norm": 0.4999981224536896, "learning_rate": 1.500324744090291e-05, "loss": 0.0487, "step": 52110 }, { "epoch": 0.7999386079349244, "grad_norm": 0.3351810574531555, "learning_rate": 1.500092791203412e-05, "loss": 0.0369, "step": 52120 }, { "epoch": 0.8000920880976133, "grad_norm": 0.3024708926677704, "learning_rate": 1.4998608024317905e-05, "loss": 0.0517, "step": 52130 }, { "epoch": 0.8002455682603024, "grad_norm": 0.36113405227661133, "learning_rate": 1.4996287777920736e-05, "loss": 0.0435, "step": 52140 }, { "epoch": 0.8003990484229914, "grad_norm": 0.3473772704601288, "learning_rate": 1.4993967173009107e-05, "loss": 0.0352, "step": 52150 }, { "epoch": 0.8005525285856803, "grad_norm": 0.6721522212028503, "learning_rate": 1.499164620974953e-05, "loss": 0.0465, "step": 52160 }, { "epoch": 0.8007060087483693, "grad_norm": 0.3606327176094055, "learning_rate": 1.4989324888308556e-05, "loss": 0.0336, "step": 52170 }, { "epoch": 0.8008594889110583, "grad_norm": 0.3160223960876465, "learning_rate": 1.4987003208852748e-05, "loss": 0.036, "step": 52180 }, { "epoch": 0.8010129690737472, "grad_norm": 0.4772816002368927, "learning_rate": 1.4984681171548702e-05, "loss": 0.0366, "step": 52190 }, { "epoch": 0.8011664492364362, "grad_norm": 0.3022661507129669, "learning_rate": 1.4982358776563043e-05, "loss": 0.0279, "step": 52200 }, { "epoch": 0.8013199293991251, "grad_norm": 0.44324514269828796, "learning_rate": 1.498003602406241e-05, "loss": 0.0374, "step": 52210 }, { "epoch": 0.8014734095618141, "grad_norm": 0.3610244393348694, "learning_rate": 1.4977712914213482e-05, "loss": 0.0348, "step": 52220 }, { "epoch": 0.8016268897245031, "grad_norm": 0.5022326111793518, "learning_rate": 1.4975389447182949e-05, "loss": 0.0283, "step": 52230 }, { "epoch": 0.801780369887192, "grad_norm": 0.40979668498039246, "learning_rate": 1.4973065623137536e-05, "loss": 0.0356, "step": 52240 }, { "epoch": 0.801933850049881, "grad_norm": 0.3706318140029907, "learning_rate": 1.4970741442244e-05, "loss": 0.0418, "step": 52250 }, { "epoch": 0.8020873302125701, "grad_norm": 0.46718600392341614, "learning_rate": 1.4968416904669105e-05, "loss": 0.0329, "step": 52260 }, { "epoch": 0.802240810375259, "grad_norm": 0.22719332575798035, "learning_rate": 1.4966092010579659e-05, "loss": 0.0255, "step": 52270 }, { "epoch": 0.802394290537948, "grad_norm": 0.5524169206619263, "learning_rate": 1.4963766760142479e-05, "loss": 0.0367, "step": 52280 }, { "epoch": 0.802547770700637, "grad_norm": 0.6358686685562134, "learning_rate": 1.4961441153524421e-05, "loss": 0.0384, "step": 52290 }, { "epoch": 0.8027012508633259, "grad_norm": 0.37543055415153503, "learning_rate": 1.4959115190892364e-05, "loss": 0.035, "step": 52300 }, { "epoch": 0.8028547310260149, "grad_norm": 0.3643619418144226, "learning_rate": 1.4956788872413203e-05, "loss": 0.0362, "step": 52310 }, { "epoch": 0.8030082111887039, "grad_norm": 0.3428332805633545, "learning_rate": 1.4954462198253874e-05, "loss": 0.0337, "step": 52320 }, { "epoch": 0.8031616913513928, "grad_norm": 0.305621474981308, "learning_rate": 1.4952135168581325e-05, "loss": 0.0298, "step": 52330 }, { "epoch": 0.8033151715140818, "grad_norm": 0.4799586534500122, "learning_rate": 1.4949807783562534e-05, "loss": 0.0383, "step": 52340 }, { "epoch": 0.8034686516767707, "grad_norm": 0.37998828291893005, "learning_rate": 1.4947480043364508e-05, "loss": 0.0379, "step": 52350 }, { "epoch": 0.8036221318394597, "grad_norm": 0.3617064356803894, "learning_rate": 1.4945151948154278e-05, "loss": 0.0313, "step": 52360 }, { "epoch": 0.8037756120021488, "grad_norm": 0.3840053975582123, "learning_rate": 1.4942823498098893e-05, "loss": 0.0413, "step": 52370 }, { "epoch": 0.8039290921648377, "grad_norm": 0.37478262186050415, "learning_rate": 1.4940494693365442e-05, "loss": 0.0387, "step": 52380 }, { "epoch": 0.8040825723275267, "grad_norm": 0.4184842109680176, "learning_rate": 1.4938165534121022e-05, "loss": 0.0305, "step": 52390 }, { "epoch": 0.8042360524902157, "grad_norm": 0.3461475074291229, "learning_rate": 1.4935836020532772e-05, "loss": 0.0386, "step": 52400 }, { "epoch": 0.8043895326529046, "grad_norm": 0.3622933328151703, "learning_rate": 1.4933506152767847e-05, "loss": 0.0308, "step": 52410 }, { "epoch": 0.8045430128155936, "grad_norm": 0.3906075358390808, "learning_rate": 1.4931175930993428e-05, "loss": 0.0508, "step": 52420 }, { "epoch": 0.8046964929782826, "grad_norm": 0.4917702376842499, "learning_rate": 1.4928845355376722e-05, "loss": 0.0357, "step": 52430 }, { "epoch": 0.8048499731409715, "grad_norm": 0.4310714900493622, "learning_rate": 1.4926514426084966e-05, "loss": 0.0355, "step": 52440 }, { "epoch": 0.8050034533036605, "grad_norm": 0.3707126975059509, "learning_rate": 1.4924183143285413e-05, "loss": 0.0269, "step": 52450 }, { "epoch": 0.8051569334663494, "grad_norm": 0.4564307630062103, "learning_rate": 1.4921851507145352e-05, "loss": 0.0395, "step": 52460 }, { "epoch": 0.8053104136290384, "grad_norm": 0.295119047164917, "learning_rate": 1.4919519517832093e-05, "loss": 0.0285, "step": 52470 }, { "epoch": 0.8054638937917274, "grad_norm": 0.36668968200683594, "learning_rate": 1.4917187175512963e-05, "loss": 0.0316, "step": 52480 }, { "epoch": 0.8056173739544163, "grad_norm": 0.2544209063053131, "learning_rate": 1.491485448035533e-05, "loss": 0.0305, "step": 52490 }, { "epoch": 0.8057708541171054, "grad_norm": 0.3219051659107208, "learning_rate": 1.4912521432526577e-05, "loss": 0.0347, "step": 52500 }, { "epoch": 0.8059243342797944, "grad_norm": 0.47266456484794617, "learning_rate": 1.491018803219411e-05, "loss": 0.0466, "step": 52510 }, { "epoch": 0.8060778144424833, "grad_norm": 0.5519198179244995, "learning_rate": 1.4907854279525373e-05, "loss": 0.0379, "step": 52520 }, { "epoch": 0.8062312946051723, "grad_norm": 0.33867427706718445, "learning_rate": 1.4905520174687817e-05, "loss": 0.0367, "step": 52530 }, { "epoch": 0.8063847747678613, "grad_norm": 0.48092207312583923, "learning_rate": 1.4903185717848941e-05, "loss": 0.0388, "step": 52540 }, { "epoch": 0.8065382549305502, "grad_norm": 0.4814128577709198, "learning_rate": 1.4900850909176246e-05, "loss": 0.0349, "step": 52550 }, { "epoch": 0.8066917350932392, "grad_norm": 0.39801838994026184, "learning_rate": 1.4898515748837272e-05, "loss": 0.044, "step": 52560 }, { "epoch": 0.8068452152559281, "grad_norm": 0.28759855031967163, "learning_rate": 1.4896180236999587e-05, "loss": 0.0294, "step": 52570 }, { "epoch": 0.8069986954186171, "grad_norm": 0.5257435441017151, "learning_rate": 1.4893844373830766e-05, "loss": 0.0389, "step": 52580 }, { "epoch": 0.8071521755813061, "grad_norm": 0.570925235748291, "learning_rate": 1.4891508159498433e-05, "loss": 0.0354, "step": 52590 }, { "epoch": 0.807305655743995, "grad_norm": 0.32822945713996887, "learning_rate": 1.4889171594170221e-05, "loss": 0.035, "step": 52600 }, { "epoch": 0.8074591359066841, "grad_norm": 0.4261333644390106, "learning_rate": 1.4886834678013795e-05, "loss": 0.0363, "step": 52610 }, { "epoch": 0.8076126160693731, "grad_norm": 0.3707699775695801, "learning_rate": 1.4884497411196842e-05, "loss": 0.0466, "step": 52620 }, { "epoch": 0.807766096232062, "grad_norm": 0.6748382449150085, "learning_rate": 1.4882159793887072e-05, "loss": 0.0387, "step": 52630 }, { "epoch": 0.807919576394751, "grad_norm": 0.37476950883865356, "learning_rate": 1.4879821826252227e-05, "loss": 0.0388, "step": 52640 }, { "epoch": 0.80807305655744, "grad_norm": 0.3671753704547882, "learning_rate": 1.4877483508460073e-05, "loss": 0.0338, "step": 52650 }, { "epoch": 0.8082265367201289, "grad_norm": 0.47655051946640015, "learning_rate": 1.4875144840678392e-05, "loss": 0.0343, "step": 52660 }, { "epoch": 0.8083800168828179, "grad_norm": 0.3345956802368164, "learning_rate": 1.4872805823075008e-05, "loss": 0.0318, "step": 52670 }, { "epoch": 0.8085334970455069, "grad_norm": 0.5480309724807739, "learning_rate": 1.4870466455817748e-05, "loss": 0.0464, "step": 52680 }, { "epoch": 0.8086869772081958, "grad_norm": 0.25598281621932983, "learning_rate": 1.4868126739074484e-05, "loss": 0.0392, "step": 52690 }, { "epoch": 0.8088404573708848, "grad_norm": 0.3653700351715088, "learning_rate": 1.4865786673013102e-05, "loss": 0.027, "step": 52700 }, { "epoch": 0.8089939375335737, "grad_norm": 0.5863469839096069, "learning_rate": 1.4863446257801518e-05, "loss": 0.0419, "step": 52710 }, { "epoch": 0.8091474176962627, "grad_norm": 0.5284124612808228, "learning_rate": 1.486110549360767e-05, "loss": 0.0424, "step": 52720 }, { "epoch": 0.8093008978589518, "grad_norm": 0.40651020407676697, "learning_rate": 1.4858764380599522e-05, "loss": 0.0356, "step": 52730 }, { "epoch": 0.8094543780216407, "grad_norm": 0.4304462969303131, "learning_rate": 1.4856422918945064e-05, "loss": 0.0337, "step": 52740 }, { "epoch": 0.8096078581843297, "grad_norm": 0.33329257369041443, "learning_rate": 1.485408110881231e-05, "loss": 0.0361, "step": 52750 }, { "epoch": 0.8097613383470187, "grad_norm": 0.3404890298843384, "learning_rate": 1.4851738950369296e-05, "loss": 0.0311, "step": 52760 }, { "epoch": 0.8099148185097076, "grad_norm": 0.3165093958377838, "learning_rate": 1.4849396443784096e-05, "loss": 0.0268, "step": 52770 }, { "epoch": 0.8100682986723966, "grad_norm": 0.30088484287261963, "learning_rate": 1.4847053589224789e-05, "loss": 0.0337, "step": 52780 }, { "epoch": 0.8102217788350856, "grad_norm": 0.4758003354072571, "learning_rate": 1.4844710386859493e-05, "loss": 0.0373, "step": 52790 }, { "epoch": 0.8103752589977745, "grad_norm": 0.6419268846511841, "learning_rate": 1.484236683685635e-05, "loss": 0.0369, "step": 52800 }, { "epoch": 0.8105287391604635, "grad_norm": 0.47299349308013916, "learning_rate": 1.4840022939383518e-05, "loss": 0.0285, "step": 52810 }, { "epoch": 0.8106822193231524, "grad_norm": 0.5065985918045044, "learning_rate": 1.4837678694609194e-05, "loss": 0.038, "step": 52820 }, { "epoch": 0.8108356994858414, "grad_norm": 0.501968502998352, "learning_rate": 1.4835334102701586e-05, "loss": 0.0353, "step": 52830 }, { "epoch": 0.8109891796485305, "grad_norm": 0.3035987317562103, "learning_rate": 1.4832989163828932e-05, "loss": 0.0373, "step": 52840 }, { "epoch": 0.8111426598112194, "grad_norm": 0.5392135381698608, "learning_rate": 1.4830643878159505e-05, "loss": 0.0348, "step": 52850 }, { "epoch": 0.8112961399739084, "grad_norm": 0.47978147864341736, "learning_rate": 1.482829824586158e-05, "loss": 0.0363, "step": 52860 }, { "epoch": 0.8114496201365974, "grad_norm": 0.44911178946495056, "learning_rate": 1.4825952267103483e-05, "loss": 0.0391, "step": 52870 }, { "epoch": 0.8116031002992863, "grad_norm": 0.4449094235897064, "learning_rate": 1.4823605942053547e-05, "loss": 0.0346, "step": 52880 }, { "epoch": 0.8117565804619753, "grad_norm": 0.2950444221496582, "learning_rate": 1.4821259270880131e-05, "loss": 0.0396, "step": 52890 }, { "epoch": 0.8119100606246643, "grad_norm": 0.36688315868377686, "learning_rate": 1.4818912253751634e-05, "loss": 0.0355, "step": 52900 }, { "epoch": 0.8120635407873532, "grad_norm": 0.4320170283317566, "learning_rate": 1.4816564890836458e-05, "loss": 0.0417, "step": 52910 }, { "epoch": 0.8122170209500422, "grad_norm": 0.41940075159072876, "learning_rate": 1.4814217182303048e-05, "loss": 0.0478, "step": 52920 }, { "epoch": 0.8123705011127311, "grad_norm": 0.24899040162563324, "learning_rate": 1.4811869128319862e-05, "loss": 0.0398, "step": 52930 }, { "epoch": 0.8125239812754201, "grad_norm": 0.32072052359580994, "learning_rate": 1.4809520729055389e-05, "loss": 0.0345, "step": 52940 }, { "epoch": 0.8126774614381091, "grad_norm": 0.47365278005599976, "learning_rate": 1.480717198467814e-05, "loss": 0.0367, "step": 52950 }, { "epoch": 0.812830941600798, "grad_norm": 0.717549204826355, "learning_rate": 1.480482289535666e-05, "loss": 0.0463, "step": 52960 }, { "epoch": 0.8129844217634871, "grad_norm": 0.42525020241737366, "learning_rate": 1.4802473461259498e-05, "loss": 0.043, "step": 52970 }, { "epoch": 0.8131379019261761, "grad_norm": 0.4261469841003418, "learning_rate": 1.480012368255525e-05, "loss": 0.0402, "step": 52980 }, { "epoch": 0.813291382088865, "grad_norm": 0.36407971382141113, "learning_rate": 1.479777355941252e-05, "loss": 0.0448, "step": 52990 }, { "epoch": 0.813444862251554, "grad_norm": 0.5323433876037598, "learning_rate": 1.479542309199995e-05, "loss": 0.047, "step": 53000 }, { "epoch": 0.813598342414243, "grad_norm": 0.3178997039794922, "learning_rate": 1.4793072280486198e-05, "loss": 0.0362, "step": 53010 }, { "epoch": 0.8137518225769319, "grad_norm": 0.3264099657535553, "learning_rate": 1.4790721125039949e-05, "loss": 0.0354, "step": 53020 }, { "epoch": 0.8139053027396209, "grad_norm": 0.38030874729156494, "learning_rate": 1.4788369625829913e-05, "loss": 0.0323, "step": 53030 }, { "epoch": 0.8140587829023099, "grad_norm": 0.6883519887924194, "learning_rate": 1.4786017783024823e-05, "loss": 0.0439, "step": 53040 }, { "epoch": 0.8142122630649988, "grad_norm": 0.4659055769443512, "learning_rate": 1.478366559679344e-05, "loss": 0.0312, "step": 53050 }, { "epoch": 0.8143657432276878, "grad_norm": 0.5503897666931152, "learning_rate": 1.4781313067304548e-05, "loss": 0.0305, "step": 53060 }, { "epoch": 0.8145192233903767, "grad_norm": 0.30351734161376953, "learning_rate": 1.4778960194726955e-05, "loss": 0.0357, "step": 53070 }, { "epoch": 0.8146727035530658, "grad_norm": 0.3666735291481018, "learning_rate": 1.4776606979229496e-05, "loss": 0.0346, "step": 53080 }, { "epoch": 0.8148261837157548, "grad_norm": 0.3838391602039337, "learning_rate": 1.4774253420981024e-05, "loss": 0.0364, "step": 53090 }, { "epoch": 0.8149796638784437, "grad_norm": 0.5725019574165344, "learning_rate": 1.4771899520150426e-05, "loss": 0.0427, "step": 53100 }, { "epoch": 0.8151331440411327, "grad_norm": 0.5012485384941101, "learning_rate": 1.4769545276906607e-05, "loss": 0.0363, "step": 53110 }, { "epoch": 0.8152866242038217, "grad_norm": 0.26037850975990295, "learning_rate": 1.47671906914185e-05, "loss": 0.0334, "step": 53120 }, { "epoch": 0.8154401043665106, "grad_norm": 0.5235545635223389, "learning_rate": 1.4764835763855056e-05, "loss": 0.028, "step": 53130 }, { "epoch": 0.8155935845291996, "grad_norm": 0.48215973377227783, "learning_rate": 1.4762480494385266e-05, "loss": 0.0282, "step": 53140 }, { "epoch": 0.8157470646918886, "grad_norm": 0.4725555181503296, "learning_rate": 1.4760124883178123e-05, "loss": 0.0338, "step": 53150 }, { "epoch": 0.8159005448545775, "grad_norm": 0.39401957392692566, "learning_rate": 1.4757768930402665e-05, "loss": 0.0378, "step": 53160 }, { "epoch": 0.8160540250172665, "grad_norm": 0.27448341250419617, "learning_rate": 1.4755412636227942e-05, "loss": 0.0336, "step": 53170 }, { "epoch": 0.8162075051799554, "grad_norm": 0.30791929364204407, "learning_rate": 1.4753056000823032e-05, "loss": 0.0332, "step": 53180 }, { "epoch": 0.8163609853426445, "grad_norm": 0.38142892718315125, "learning_rate": 1.475069902435704e-05, "loss": 0.0366, "step": 53190 }, { "epoch": 0.8165144655053335, "grad_norm": 0.395394891500473, "learning_rate": 1.4748341706999097e-05, "loss": 0.035, "step": 53200 }, { "epoch": 0.8166679456680224, "grad_norm": 0.3734396994113922, "learning_rate": 1.4745984048918347e-05, "loss": 0.04, "step": 53210 }, { "epoch": 0.8168214258307114, "grad_norm": 0.3717643618583679, "learning_rate": 1.4743626050283977e-05, "loss": 0.0385, "step": 53220 }, { "epoch": 0.8169749059934004, "grad_norm": 0.2630196511745453, "learning_rate": 1.4741267711265177e-05, "loss": 0.0358, "step": 53230 }, { "epoch": 0.8171283861560893, "grad_norm": 0.3719271421432495, "learning_rate": 1.4738909032031181e-05, "loss": 0.0328, "step": 53240 }, { "epoch": 0.8172818663187783, "grad_norm": 0.46752163767814636, "learning_rate": 1.4736550012751231e-05, "loss": 0.0387, "step": 53250 }, { "epoch": 0.8174353464814673, "grad_norm": 0.4359040856361389, "learning_rate": 1.473419065359461e-05, "loss": 0.0343, "step": 53260 }, { "epoch": 0.8175888266441562, "grad_norm": 0.28790584206581116, "learning_rate": 1.473183095473061e-05, "loss": 0.0472, "step": 53270 }, { "epoch": 0.8177423068068452, "grad_norm": 0.5163463950157166, "learning_rate": 1.4729470916328555e-05, "loss": 0.0448, "step": 53280 }, { "epoch": 0.8178957869695341, "grad_norm": 0.4446287453174591, "learning_rate": 1.4727110538557794e-05, "loss": 0.0499, "step": 53290 }, { "epoch": 0.8180492671322231, "grad_norm": 0.3417535126209259, "learning_rate": 1.4724749821587699e-05, "loss": 0.033, "step": 53300 }, { "epoch": 0.8182027472949122, "grad_norm": 0.4981723725795746, "learning_rate": 1.472238876558766e-05, "loss": 0.0348, "step": 53310 }, { "epoch": 0.8183562274576011, "grad_norm": 0.4306705594062805, "learning_rate": 1.472002737072711e-05, "loss": 0.0422, "step": 53320 }, { "epoch": 0.8185097076202901, "grad_norm": 0.7118698954582214, "learning_rate": 1.471766563717548e-05, "loss": 0.0395, "step": 53330 }, { "epoch": 0.8186631877829791, "grad_norm": 0.43214893341064453, "learning_rate": 1.4715303565102245e-05, "loss": 0.039, "step": 53340 }, { "epoch": 0.818816667945668, "grad_norm": 0.46606290340423584, "learning_rate": 1.4712941154676903e-05, "loss": 0.0313, "step": 53350 }, { "epoch": 0.818970148108357, "grad_norm": 0.2737847566604614, "learning_rate": 1.4710578406068963e-05, "loss": 0.0328, "step": 53360 }, { "epoch": 0.819123628271046, "grad_norm": 0.40799716114997864, "learning_rate": 1.4708215319447971e-05, "loss": 0.0348, "step": 53370 }, { "epoch": 0.8192771084337349, "grad_norm": 0.3588652014732361, "learning_rate": 1.4705851894983491e-05, "loss": 0.0339, "step": 53380 }, { "epoch": 0.8194305885964239, "grad_norm": 0.44343438744544983, "learning_rate": 1.4703488132845113e-05, "loss": 0.0304, "step": 53390 }, { "epoch": 0.8195840687591129, "grad_norm": 0.3470207452774048, "learning_rate": 1.4701124033202461e-05, "loss": 0.0337, "step": 53400 }, { "epoch": 0.8197375489218018, "grad_norm": 0.5296696424484253, "learning_rate": 1.4698759596225158e-05, "loss": 0.0422, "step": 53410 }, { "epoch": 0.8198910290844909, "grad_norm": 0.5996800065040588, "learning_rate": 1.469639482208288e-05, "loss": 0.0389, "step": 53420 }, { "epoch": 0.8200445092471798, "grad_norm": 0.4954473674297333, "learning_rate": 1.4694029710945306e-05, "loss": 0.0337, "step": 53430 }, { "epoch": 0.8201979894098688, "grad_norm": 0.35407865047454834, "learning_rate": 1.4691664262982151e-05, "loss": 0.037, "step": 53440 }, { "epoch": 0.8203514695725578, "grad_norm": 0.40925124287605286, "learning_rate": 1.4689298478363153e-05, "loss": 0.0413, "step": 53450 }, { "epoch": 0.8205049497352467, "grad_norm": 0.34854790568351746, "learning_rate": 1.468693235725806e-05, "loss": 0.0341, "step": 53460 }, { "epoch": 0.8206584298979357, "grad_norm": 0.46279317140579224, "learning_rate": 1.4684565899836672e-05, "loss": 0.038, "step": 53470 }, { "epoch": 0.8208119100606247, "grad_norm": 0.49875739216804504, "learning_rate": 1.4682199106268786e-05, "loss": 0.0349, "step": 53480 }, { "epoch": 0.8209653902233136, "grad_norm": 0.42012593150138855, "learning_rate": 1.4679831976724236e-05, "loss": 0.0408, "step": 53490 }, { "epoch": 0.8211188703860026, "grad_norm": 0.37066325545310974, "learning_rate": 1.4677464511372884e-05, "loss": 0.0379, "step": 53500 }, { "epoch": 0.8212723505486916, "grad_norm": 0.39877548813819885, "learning_rate": 1.4675096710384602e-05, "loss": 0.0359, "step": 53510 }, { "epoch": 0.8214258307113805, "grad_norm": 0.5024042725563049, "learning_rate": 1.4672728573929297e-05, "loss": 0.0349, "step": 53520 }, { "epoch": 0.8215793108740695, "grad_norm": 0.4575779139995575, "learning_rate": 1.4670360102176902e-05, "loss": 0.0383, "step": 53530 }, { "epoch": 0.8217327910367584, "grad_norm": 0.3875540792942047, "learning_rate": 1.466799129529736e-05, "loss": 0.0386, "step": 53540 }, { "epoch": 0.8218862711994475, "grad_norm": 0.5594115257263184, "learning_rate": 1.4665622153460656e-05, "loss": 0.0468, "step": 53550 }, { "epoch": 0.8220397513621365, "grad_norm": 0.295417457818985, "learning_rate": 1.4663252676836792e-05, "loss": 0.0363, "step": 53560 }, { "epoch": 0.8221932315248254, "grad_norm": 0.3975898325443268, "learning_rate": 1.4660882865595784e-05, "loss": 0.0391, "step": 53570 }, { "epoch": 0.8223467116875144, "grad_norm": 0.41999340057373047, "learning_rate": 1.4658512719907684e-05, "loss": 0.0332, "step": 53580 }, { "epoch": 0.8225001918502034, "grad_norm": 0.3807755410671234, "learning_rate": 1.4656142239942569e-05, "loss": 0.0403, "step": 53590 }, { "epoch": 0.8226536720128923, "grad_norm": 0.40444672107696533, "learning_rate": 1.4653771425870528e-05, "loss": 0.0417, "step": 53600 }, { "epoch": 0.8228071521755813, "grad_norm": 0.49558016657829285, "learning_rate": 1.465140027786169e-05, "loss": 0.0324, "step": 53610 }, { "epoch": 0.8229606323382703, "grad_norm": 0.4454789459705353, "learning_rate": 1.464902879608619e-05, "loss": 0.0397, "step": 53620 }, { "epoch": 0.8231141125009592, "grad_norm": 0.4444628357887268, "learning_rate": 1.4646656980714204e-05, "loss": 0.0368, "step": 53630 }, { "epoch": 0.8232675926636482, "grad_norm": 0.3595706820487976, "learning_rate": 1.4644284831915922e-05, "loss": 0.0356, "step": 53640 }, { "epoch": 0.8234210728263371, "grad_norm": 0.43085619807243347, "learning_rate": 1.464191234986156e-05, "loss": 0.0356, "step": 53650 }, { "epoch": 0.8235745529890262, "grad_norm": 0.32207661867141724, "learning_rate": 1.463953953472136e-05, "loss": 0.0421, "step": 53660 }, { "epoch": 0.8237280331517152, "grad_norm": 0.38129884004592896, "learning_rate": 1.4637166386665582e-05, "loss": 0.0349, "step": 53670 }, { "epoch": 0.8238815133144041, "grad_norm": 0.25794127583503723, "learning_rate": 1.4634792905864517e-05, "loss": 0.0318, "step": 53680 }, { "epoch": 0.8240349934770931, "grad_norm": 0.5172629356384277, "learning_rate": 1.4632419092488479e-05, "loss": 0.0348, "step": 53690 }, { "epoch": 0.8241884736397821, "grad_norm": 0.7648114562034607, "learning_rate": 1.4630044946707798e-05, "loss": 0.0428, "step": 53700 }, { "epoch": 0.824341953802471, "grad_norm": 0.502221941947937, "learning_rate": 1.4627670468692839e-05, "loss": 0.0396, "step": 53710 }, { "epoch": 0.82449543396516, "grad_norm": 0.2820497751235962, "learning_rate": 1.462529565861398e-05, "loss": 0.0408, "step": 53720 }, { "epoch": 0.824648914127849, "grad_norm": 0.3875774145126343, "learning_rate": 1.4622920516641632e-05, "loss": 0.0439, "step": 53730 }, { "epoch": 0.8248023942905379, "grad_norm": 0.33401039242744446, "learning_rate": 1.462054504294623e-05, "loss": 0.0413, "step": 53740 }, { "epoch": 0.8249558744532269, "grad_norm": 0.4206398129463196, "learning_rate": 1.4618169237698218e-05, "loss": 0.0384, "step": 53750 }, { "epoch": 0.825109354615916, "grad_norm": 0.3372204005718231, "learning_rate": 1.4615793101068086e-05, "loss": 0.0285, "step": 53760 }, { "epoch": 0.8252628347786048, "grad_norm": 0.40202680230140686, "learning_rate": 1.461341663322633e-05, "loss": 0.0312, "step": 53770 }, { "epoch": 0.8254163149412939, "grad_norm": 0.3684007525444031, "learning_rate": 1.4611039834343478e-05, "loss": 0.0513, "step": 53780 }, { "epoch": 0.8255697951039828, "grad_norm": 0.31874680519104004, "learning_rate": 1.460866270459008e-05, "loss": 0.0364, "step": 53790 }, { "epoch": 0.8257232752666718, "grad_norm": 0.8010402917861938, "learning_rate": 1.4606285244136708e-05, "loss": 0.0369, "step": 53800 }, { "epoch": 0.8258767554293608, "grad_norm": 0.4358749985694885, "learning_rate": 1.4603907453153963e-05, "loss": 0.0466, "step": 53810 }, { "epoch": 0.8260302355920497, "grad_norm": 0.37354668974876404, "learning_rate": 1.4601529331812465e-05, "loss": 0.0393, "step": 53820 }, { "epoch": 0.8261837157547387, "grad_norm": 0.5464887022972107, "learning_rate": 1.4599150880282854e-05, "loss": 0.0381, "step": 53830 }, { "epoch": 0.8263371959174277, "grad_norm": 0.39446285367012024, "learning_rate": 1.4596772098735806e-05, "loss": 0.0389, "step": 53840 }, { "epoch": 0.8264906760801166, "grad_norm": 0.33787503838539124, "learning_rate": 1.4594392987342007e-05, "loss": 0.0301, "step": 53850 }, { "epoch": 0.8266441562428056, "grad_norm": 0.44960251450538635, "learning_rate": 1.4592013546272177e-05, "loss": 0.0378, "step": 53860 }, { "epoch": 0.8267976364054946, "grad_norm": 0.5380621552467346, "learning_rate": 1.4589633775697056e-05, "loss": 0.0342, "step": 53870 }, { "epoch": 0.8269511165681835, "grad_norm": 0.47426238656044006, "learning_rate": 1.4587253675787403e-05, "loss": 0.0469, "step": 53880 }, { "epoch": 0.8271045967308726, "grad_norm": 0.3667210638523102, "learning_rate": 1.4584873246714009e-05, "loss": 0.0338, "step": 53890 }, { "epoch": 0.8272580768935615, "grad_norm": 0.3770350515842438, "learning_rate": 1.4582492488647684e-05, "loss": 0.0381, "step": 53900 }, { "epoch": 0.8274115570562505, "grad_norm": 0.3730744421482086, "learning_rate": 1.4580111401759256e-05, "loss": 0.0359, "step": 53910 }, { "epoch": 0.8275650372189395, "grad_norm": 0.6556518077850342, "learning_rate": 1.457772998621959e-05, "loss": 0.0391, "step": 53920 }, { "epoch": 0.8277185173816284, "grad_norm": 0.2865848243236542, "learning_rate": 1.4575348242199568e-05, "loss": 0.0256, "step": 53930 }, { "epoch": 0.8278719975443174, "grad_norm": 0.37479937076568604, "learning_rate": 1.4572966169870089e-05, "loss": 0.0358, "step": 53940 }, { "epoch": 0.8280254777070064, "grad_norm": 0.44870230555534363, "learning_rate": 1.4570583769402085e-05, "loss": 0.0362, "step": 53950 }, { "epoch": 0.8281789578696953, "grad_norm": 0.37598463892936707, "learning_rate": 1.4568201040966503e-05, "loss": 0.0328, "step": 53960 }, { "epoch": 0.8283324380323843, "grad_norm": 0.40108200907707214, "learning_rate": 1.4565817984734328e-05, "loss": 0.0443, "step": 53970 }, { "epoch": 0.8284859181950733, "grad_norm": 0.4968045949935913, "learning_rate": 1.456343460087655e-05, "loss": 0.0352, "step": 53980 }, { "epoch": 0.8286393983577622, "grad_norm": 0.3433496654033661, "learning_rate": 1.4561050889564196e-05, "loss": 0.0347, "step": 53990 }, { "epoch": 0.8287928785204512, "grad_norm": 0.39037248492240906, "learning_rate": 1.455866685096831e-05, "loss": 0.0396, "step": 54000 }, { "epoch": 0.8289463586831401, "grad_norm": 0.5445319414138794, "learning_rate": 1.4556282485259963e-05, "loss": 0.0418, "step": 54010 }, { "epoch": 0.8290998388458292, "grad_norm": 0.35048016905784607, "learning_rate": 1.4553897792610248e-05, "loss": 0.0317, "step": 54020 }, { "epoch": 0.8292533190085182, "grad_norm": 0.49721089005470276, "learning_rate": 1.455151277319028e-05, "loss": 0.0344, "step": 54030 }, { "epoch": 0.8294067991712071, "grad_norm": 0.36824148893356323, "learning_rate": 1.4549127427171199e-05, "loss": 0.0369, "step": 54040 }, { "epoch": 0.8295602793338961, "grad_norm": 0.48939621448516846, "learning_rate": 1.4546741754724167e-05, "loss": 0.0378, "step": 54050 }, { "epoch": 0.8297137594965851, "grad_norm": 0.334009051322937, "learning_rate": 1.4544355756020376e-05, "loss": 0.0443, "step": 54060 }, { "epoch": 0.829867239659274, "grad_norm": 0.2654307782649994, "learning_rate": 1.4541969431231034e-05, "loss": 0.0425, "step": 54070 }, { "epoch": 0.830020719821963, "grad_norm": 0.32859328389167786, "learning_rate": 1.453958278052737e-05, "loss": 0.0405, "step": 54080 }, { "epoch": 0.830174199984652, "grad_norm": 0.40708065032958984, "learning_rate": 1.4537195804080643e-05, "loss": 0.0302, "step": 54090 }, { "epoch": 0.8303276801473409, "grad_norm": 0.33230456709861755, "learning_rate": 1.4534808502062137e-05, "loss": 0.0392, "step": 54100 }, { "epoch": 0.8304811603100299, "grad_norm": 0.2595469355583191, "learning_rate": 1.4532420874643151e-05, "loss": 0.0379, "step": 54110 }, { "epoch": 0.830634640472719, "grad_norm": 0.39230597019195557, "learning_rate": 1.4530032921995016e-05, "loss": 0.0389, "step": 54120 }, { "epoch": 0.8307881206354079, "grad_norm": 0.5019692778587341, "learning_rate": 1.452764464428908e-05, "loss": 0.0434, "step": 54130 }, { "epoch": 0.8309416007980969, "grad_norm": 0.29118818044662476, "learning_rate": 1.4525256041696717e-05, "loss": 0.0359, "step": 54140 }, { "epoch": 0.8310950809607858, "grad_norm": 0.5830568075180054, "learning_rate": 1.4522867114389324e-05, "loss": 0.039, "step": 54150 }, { "epoch": 0.8312485611234748, "grad_norm": 0.40604352951049805, "learning_rate": 1.4520477862538319e-05, "loss": 0.0392, "step": 54160 }, { "epoch": 0.8314020412861638, "grad_norm": 0.44603878259658813, "learning_rate": 1.4518088286315149e-05, "loss": 0.0258, "step": 54170 }, { "epoch": 0.8315555214488527, "grad_norm": 0.5411913990974426, "learning_rate": 1.4515698385891279e-05, "loss": 0.0306, "step": 54180 }, { "epoch": 0.8317090016115417, "grad_norm": 0.3451124429702759, "learning_rate": 1.4513308161438202e-05, "loss": 0.0325, "step": 54190 }, { "epoch": 0.8318624817742307, "grad_norm": 0.621669590473175, "learning_rate": 1.4510917613127426e-05, "loss": 0.0328, "step": 54200 }, { "epoch": 0.8320159619369196, "grad_norm": 0.44007834792137146, "learning_rate": 1.4508526741130493e-05, "loss": 0.0323, "step": 54210 }, { "epoch": 0.8321694420996086, "grad_norm": 0.4270886778831482, "learning_rate": 1.4506135545618962e-05, "loss": 0.0382, "step": 54220 }, { "epoch": 0.8323229222622976, "grad_norm": 0.2065163105726242, "learning_rate": 1.450374402676441e-05, "loss": 0.0322, "step": 54230 }, { "epoch": 0.8324764024249866, "grad_norm": 0.4892269968986511, "learning_rate": 1.4501352184738453e-05, "loss": 0.0428, "step": 54240 }, { "epoch": 0.8326298825876756, "grad_norm": 0.5841101408004761, "learning_rate": 1.4498960019712712e-05, "loss": 0.0498, "step": 54250 }, { "epoch": 0.8327833627503645, "grad_norm": 0.52951979637146, "learning_rate": 1.4496567531858841e-05, "loss": 0.036, "step": 54260 }, { "epoch": 0.8329368429130535, "grad_norm": 0.3508703112602234, "learning_rate": 1.4494174721348521e-05, "loss": 0.033, "step": 54270 }, { "epoch": 0.8330903230757425, "grad_norm": 0.5554521679878235, "learning_rate": 1.4491781588353444e-05, "loss": 0.0291, "step": 54280 }, { "epoch": 0.8332438032384314, "grad_norm": 0.3581850528717041, "learning_rate": 1.4489388133045336e-05, "loss": 0.0327, "step": 54290 }, { "epoch": 0.8333972834011204, "grad_norm": 0.42189446091651917, "learning_rate": 1.4486994355595945e-05, "loss": 0.0309, "step": 54300 }, { "epoch": 0.8335507635638094, "grad_norm": 0.5095492005348206, "learning_rate": 1.4484600256177033e-05, "loss": 0.0389, "step": 54310 }, { "epoch": 0.8337042437264983, "grad_norm": 0.483806312084198, "learning_rate": 1.4482205834960397e-05, "loss": 0.0362, "step": 54320 }, { "epoch": 0.8338577238891873, "grad_norm": 0.49210649728775024, "learning_rate": 1.4479811092117847e-05, "loss": 0.039, "step": 54330 }, { "epoch": 0.8340112040518763, "grad_norm": 0.3406253457069397, "learning_rate": 1.447741602782122e-05, "loss": 0.0356, "step": 54340 }, { "epoch": 0.8341646842145652, "grad_norm": 0.5211800336837769, "learning_rate": 1.447502064224238e-05, "loss": 0.0368, "step": 54350 }, { "epoch": 0.8343181643772543, "grad_norm": 0.28290435671806335, "learning_rate": 1.4472624935553213e-05, "loss": 0.0384, "step": 54360 }, { "epoch": 0.8344716445399432, "grad_norm": 0.35745367407798767, "learning_rate": 1.4470228907925621e-05, "loss": 0.0308, "step": 54370 }, { "epoch": 0.8346251247026322, "grad_norm": 0.49381789565086365, "learning_rate": 1.4467832559531533e-05, "loss": 0.039, "step": 54380 }, { "epoch": 0.8347786048653212, "grad_norm": 0.5861896276473999, "learning_rate": 1.4465435890542905e-05, "loss": 0.0432, "step": 54390 }, { "epoch": 0.8349320850280101, "grad_norm": 0.4526192545890808, "learning_rate": 1.4463038901131714e-05, "loss": 0.0382, "step": 54400 }, { "epoch": 0.8350855651906991, "grad_norm": 0.37045982480049133, "learning_rate": 1.4460641591469953e-05, "loss": 0.0326, "step": 54410 }, { "epoch": 0.8352390453533881, "grad_norm": 0.3311033546924591, "learning_rate": 1.445824396172965e-05, "loss": 0.0319, "step": 54420 }, { "epoch": 0.835392525516077, "grad_norm": 0.4675993025302887, "learning_rate": 1.4455846012082846e-05, "loss": 0.043, "step": 54430 }, { "epoch": 0.835546005678766, "grad_norm": 0.40453144907951355, "learning_rate": 1.445344774270161e-05, "loss": 0.0431, "step": 54440 }, { "epoch": 0.835699485841455, "grad_norm": 0.28738605976104736, "learning_rate": 1.4451049153758032e-05, "loss": 0.0377, "step": 54450 }, { "epoch": 0.8358529660041439, "grad_norm": 0.284973680973053, "learning_rate": 1.4448650245424225e-05, "loss": 0.0399, "step": 54460 }, { "epoch": 0.836006446166833, "grad_norm": 0.6111074686050415, "learning_rate": 1.4446251017872328e-05, "loss": 0.0362, "step": 54470 }, { "epoch": 0.836159926329522, "grad_norm": 0.2962700426578522, "learning_rate": 1.4443851471274497e-05, "loss": 0.0344, "step": 54480 }, { "epoch": 0.8363134064922109, "grad_norm": 0.4373109042644501, "learning_rate": 1.4441451605802917e-05, "loss": 0.0367, "step": 54490 }, { "epoch": 0.8364668866548999, "grad_norm": 0.4587388336658478, "learning_rate": 1.4439051421629792e-05, "loss": 0.0327, "step": 54500 }, { "epoch": 0.8366203668175888, "grad_norm": 0.3096887171268463, "learning_rate": 1.4436650918927349e-05, "loss": 0.0338, "step": 54510 }, { "epoch": 0.8367738469802778, "grad_norm": 0.34453335404396057, "learning_rate": 1.4434250097867842e-05, "loss": 0.0389, "step": 54520 }, { "epoch": 0.8369273271429668, "grad_norm": 0.31696993112564087, "learning_rate": 1.4431848958623542e-05, "loss": 0.0456, "step": 54530 }, { "epoch": 0.8370808073056557, "grad_norm": 0.5069515705108643, "learning_rate": 1.4429447501366745e-05, "loss": 0.0413, "step": 54540 }, { "epoch": 0.8372342874683447, "grad_norm": 0.6689621806144714, "learning_rate": 1.442704572626977e-05, "loss": 0.0382, "step": 54550 }, { "epoch": 0.8373877676310337, "grad_norm": 0.6148549914360046, "learning_rate": 1.4424643633504964e-05, "loss": 0.0375, "step": 54560 }, { "epoch": 0.8375412477937226, "grad_norm": 0.49653470516204834, "learning_rate": 1.4422241223244689e-05, "loss": 0.0447, "step": 54570 }, { "epoch": 0.8376947279564116, "grad_norm": 0.22631767392158508, "learning_rate": 1.4419838495661332e-05, "loss": 0.0314, "step": 54580 }, { "epoch": 0.8378482081191007, "grad_norm": 0.5230301022529602, "learning_rate": 1.4417435450927304e-05, "loss": 0.0422, "step": 54590 }, { "epoch": 0.8380016882817896, "grad_norm": 0.3730737566947937, "learning_rate": 1.4415032089215037e-05, "loss": 0.0486, "step": 54600 }, { "epoch": 0.8381551684444786, "grad_norm": 0.4573509395122528, "learning_rate": 1.4412628410696989e-05, "loss": 0.0408, "step": 54610 }, { "epoch": 0.8383086486071675, "grad_norm": 0.4090527594089508, "learning_rate": 1.441022441554564e-05, "loss": 0.0382, "step": 54620 }, { "epoch": 0.8384621287698565, "grad_norm": 0.32930317521095276, "learning_rate": 1.4407820103933488e-05, "loss": 0.0343, "step": 54630 }, { "epoch": 0.8386156089325455, "grad_norm": 0.4663470387458801, "learning_rate": 1.4405415476033057e-05, "loss": 0.0413, "step": 54640 }, { "epoch": 0.8387690890952344, "grad_norm": 0.5873574614524841, "learning_rate": 1.4403010532016896e-05, "loss": 0.0389, "step": 54650 }, { "epoch": 0.8389225692579234, "grad_norm": 0.4045751690864563, "learning_rate": 1.4400605272057577e-05, "loss": 0.0304, "step": 54660 }, { "epoch": 0.8390760494206124, "grad_norm": 0.38318338990211487, "learning_rate": 1.4398199696327688e-05, "loss": 0.039, "step": 54670 }, { "epoch": 0.8392295295833013, "grad_norm": 0.3697221875190735, "learning_rate": 1.4395793804999846e-05, "loss": 0.0365, "step": 54680 }, { "epoch": 0.8393830097459903, "grad_norm": 0.34554603695869446, "learning_rate": 1.4393387598246688e-05, "loss": 0.0273, "step": 54690 }, { "epoch": 0.8395364899086794, "grad_norm": 0.3130033612251282, "learning_rate": 1.4390981076240871e-05, "loss": 0.0259, "step": 54700 }, { "epoch": 0.8396899700713683, "grad_norm": 0.21443504095077515, "learning_rate": 1.4388574239155084e-05, "loss": 0.0331, "step": 54710 }, { "epoch": 0.8398434502340573, "grad_norm": 0.4717191755771637, "learning_rate": 1.4386167087162029e-05, "loss": 0.035, "step": 54720 }, { "epoch": 0.8399969303967462, "grad_norm": 0.4957069456577301, "learning_rate": 1.4383759620434433e-05, "loss": 0.0367, "step": 54730 }, { "epoch": 0.8401504105594352, "grad_norm": 0.36540424823760986, "learning_rate": 1.438135183914505e-05, "loss": 0.0363, "step": 54740 }, { "epoch": 0.8403038907221242, "grad_norm": 0.42763426899909973, "learning_rate": 1.4378943743466649e-05, "loss": 0.0337, "step": 54750 }, { "epoch": 0.8404573708848131, "grad_norm": 0.4577034115791321, "learning_rate": 1.4376535333572028e-05, "loss": 0.0337, "step": 54760 }, { "epoch": 0.8406108510475021, "grad_norm": 0.3090803921222687, "learning_rate": 1.4374126609634005e-05, "loss": 0.0353, "step": 54770 }, { "epoch": 0.8407643312101911, "grad_norm": 0.4400486946105957, "learning_rate": 1.437171757182542e-05, "loss": 0.0349, "step": 54780 }, { "epoch": 0.84091781137288, "grad_norm": 0.3574541211128235, "learning_rate": 1.436930822031914e-05, "loss": 0.046, "step": 54790 }, { "epoch": 0.841071291535569, "grad_norm": 0.36400625109672546, "learning_rate": 1.4366898555288045e-05, "loss": 0.0332, "step": 54800 }, { "epoch": 0.841224771698258, "grad_norm": 0.4336187541484833, "learning_rate": 1.4364488576905048e-05, "loss": 0.0298, "step": 54810 }, { "epoch": 0.841378251860947, "grad_norm": 0.4895140528678894, "learning_rate": 1.4362078285343078e-05, "loss": 0.0356, "step": 54820 }, { "epoch": 0.841531732023636, "grad_norm": 0.4126829206943512, "learning_rate": 1.4359667680775086e-05, "loss": 0.0301, "step": 54830 }, { "epoch": 0.841685212186325, "grad_norm": 0.420787513256073, "learning_rate": 1.4357256763374053e-05, "loss": 0.04, "step": 54840 }, { "epoch": 0.8418386923490139, "grad_norm": 0.4656091332435608, "learning_rate": 1.4354845533312974e-05, "loss": 0.0373, "step": 54850 }, { "epoch": 0.8419921725117029, "grad_norm": 0.3988814353942871, "learning_rate": 1.4352433990764867e-05, "loss": 0.0287, "step": 54860 }, { "epoch": 0.8421456526743918, "grad_norm": 0.4739437997341156, "learning_rate": 1.435002213590278e-05, "loss": 0.0447, "step": 54870 }, { "epoch": 0.8422991328370808, "grad_norm": 0.3627662658691406, "learning_rate": 1.4347609968899776e-05, "loss": 0.0367, "step": 54880 }, { "epoch": 0.8424526129997698, "grad_norm": 0.34903332591056824, "learning_rate": 1.4345197489928945e-05, "loss": 0.0345, "step": 54890 }, { "epoch": 0.8426060931624587, "grad_norm": 0.5077811479568481, "learning_rate": 1.4342784699163394e-05, "loss": 0.0352, "step": 54900 }, { "epoch": 0.8427595733251477, "grad_norm": 0.4533279836177826, "learning_rate": 1.4340371596776256e-05, "loss": 0.0375, "step": 54910 }, { "epoch": 0.8429130534878367, "grad_norm": 0.4143918752670288, "learning_rate": 1.4337958182940693e-05, "loss": 0.0301, "step": 54920 }, { "epoch": 0.8430665336505256, "grad_norm": 0.2928121089935303, "learning_rate": 1.433554445782987e-05, "loss": 0.0398, "step": 54930 }, { "epoch": 0.8432200138132147, "grad_norm": 0.3435191810131073, "learning_rate": 1.4333130421616996e-05, "loss": 0.0379, "step": 54940 }, { "epoch": 0.8433734939759037, "grad_norm": 0.42893505096435547, "learning_rate": 1.4330716074475287e-05, "loss": 0.0362, "step": 54950 }, { "epoch": 0.8435269741385926, "grad_norm": 0.3886258602142334, "learning_rate": 1.4328301416577994e-05, "loss": 0.0343, "step": 54960 }, { "epoch": 0.8436804543012816, "grad_norm": 0.19632963836193085, "learning_rate": 1.4325886448098379e-05, "loss": 0.0406, "step": 54970 }, { "epoch": 0.8438339344639705, "grad_norm": 0.42402219772338867, "learning_rate": 1.432347116920973e-05, "loss": 0.0354, "step": 54980 }, { "epoch": 0.8439874146266595, "grad_norm": 0.38558095693588257, "learning_rate": 1.4321055580085363e-05, "loss": 0.0293, "step": 54990 }, { "epoch": 0.8441408947893485, "grad_norm": 0.39922910928726196, "learning_rate": 1.4318639680898607e-05, "loss": 0.0401, "step": 55000 }, { "epoch": 0.8442943749520374, "grad_norm": 0.40304839611053467, "learning_rate": 1.4316223471822818e-05, "loss": 0.0381, "step": 55010 }, { "epoch": 0.8444478551147264, "grad_norm": 0.45609617233276367, "learning_rate": 1.4313806953031377e-05, "loss": 0.0385, "step": 55020 }, { "epoch": 0.8446013352774154, "grad_norm": 0.3309035003185272, "learning_rate": 1.4311390124697684e-05, "loss": 0.0289, "step": 55030 }, { "epoch": 0.8447548154401043, "grad_norm": 0.3706333637237549, "learning_rate": 1.4308972986995155e-05, "loss": 0.0407, "step": 55040 }, { "epoch": 0.8449082956027933, "grad_norm": 0.43489858508110046, "learning_rate": 1.4306555540097242e-05, "loss": 0.0282, "step": 55050 }, { "epoch": 0.8450617757654824, "grad_norm": 0.4213172197341919, "learning_rate": 1.4304137784177407e-05, "loss": 0.0305, "step": 55060 }, { "epoch": 0.8452152559281713, "grad_norm": 0.5220983624458313, "learning_rate": 1.4301719719409142e-05, "loss": 0.0419, "step": 55070 }, { "epoch": 0.8453687360908603, "grad_norm": 0.28332990407943726, "learning_rate": 1.4299301345965957e-05, "loss": 0.0407, "step": 55080 }, { "epoch": 0.8455222162535492, "grad_norm": 0.35276514291763306, "learning_rate": 1.4296882664021383e-05, "loss": 0.0298, "step": 55090 }, { "epoch": 0.8456756964162382, "grad_norm": 0.5187957882881165, "learning_rate": 1.4294463673748977e-05, "loss": 0.0334, "step": 55100 }, { "epoch": 0.8458291765789272, "grad_norm": 0.48519688844680786, "learning_rate": 1.4292044375322318e-05, "loss": 0.0366, "step": 55110 }, { "epoch": 0.8459826567416161, "grad_norm": 0.44496726989746094, "learning_rate": 1.4289624768915007e-05, "loss": 0.0437, "step": 55120 }, { "epoch": 0.8461361369043051, "grad_norm": 0.22308282554149628, "learning_rate": 1.428720485470066e-05, "loss": 0.0357, "step": 55130 }, { "epoch": 0.8462896170669941, "grad_norm": 0.44772669672966003, "learning_rate": 1.4284784632852927e-05, "loss": 0.0378, "step": 55140 }, { "epoch": 0.846443097229683, "grad_norm": 0.3199670910835266, "learning_rate": 1.4282364103545465e-05, "loss": 0.0478, "step": 55150 }, { "epoch": 0.846596577392372, "grad_norm": 0.40510523319244385, "learning_rate": 1.427994326695197e-05, "loss": 0.0395, "step": 55160 }, { "epoch": 0.846750057555061, "grad_norm": 0.4229350686073303, "learning_rate": 1.427752212324615e-05, "loss": 0.0402, "step": 55170 }, { "epoch": 0.84690353771775, "grad_norm": 0.3439427316188812, "learning_rate": 1.4275100672601738e-05, "loss": 0.0389, "step": 55180 }, { "epoch": 0.847057017880439, "grad_norm": 0.36733514070510864, "learning_rate": 1.4272678915192484e-05, "loss": 0.0345, "step": 55190 }, { "epoch": 0.847210498043128, "grad_norm": 0.6089484095573425, "learning_rate": 1.4270256851192166e-05, "loss": 0.052, "step": 55200 }, { "epoch": 0.8473639782058169, "grad_norm": 0.41580793261528015, "learning_rate": 1.4267834480774585e-05, "loss": 0.0406, "step": 55210 }, { "epoch": 0.8475174583685059, "grad_norm": 0.28035467863082886, "learning_rate": 1.4265411804113558e-05, "loss": 0.03, "step": 55220 }, { "epoch": 0.8476709385311948, "grad_norm": 0.31638652086257935, "learning_rate": 1.4262988821382925e-05, "loss": 0.0367, "step": 55230 }, { "epoch": 0.8478244186938838, "grad_norm": 0.3620470464229584, "learning_rate": 1.4260565532756559e-05, "loss": 0.0325, "step": 55240 }, { "epoch": 0.8479778988565728, "grad_norm": 0.5405986309051514, "learning_rate": 1.4258141938408333e-05, "loss": 0.0277, "step": 55250 }, { "epoch": 0.8481313790192617, "grad_norm": 0.44879502058029175, "learning_rate": 1.4255718038512163e-05, "loss": 0.0433, "step": 55260 }, { "epoch": 0.8482848591819507, "grad_norm": 0.50870680809021, "learning_rate": 1.4253293833241979e-05, "loss": 0.0387, "step": 55270 }, { "epoch": 0.8484383393446397, "grad_norm": 0.5028737187385559, "learning_rate": 1.4250869322771728e-05, "loss": 0.0437, "step": 55280 }, { "epoch": 0.8485918195073286, "grad_norm": 0.4434404969215393, "learning_rate": 1.4248444507275387e-05, "loss": 0.0366, "step": 55290 }, { "epoch": 0.8487452996700177, "grad_norm": 0.40962594747543335, "learning_rate": 1.424601938692695e-05, "loss": 0.047, "step": 55300 }, { "epoch": 0.8488987798327067, "grad_norm": 0.5502113103866577, "learning_rate": 1.4243593961900437e-05, "loss": 0.0405, "step": 55310 }, { "epoch": 0.8490522599953956, "grad_norm": 0.36284011602401733, "learning_rate": 1.4241168232369887e-05, "loss": 0.033, "step": 55320 }, { "epoch": 0.8492057401580846, "grad_norm": 0.39141184091567993, "learning_rate": 1.4238742198509354e-05, "loss": 0.0424, "step": 55330 }, { "epoch": 0.8493592203207735, "grad_norm": 0.3880392611026764, "learning_rate": 1.4236315860492932e-05, "loss": 0.0385, "step": 55340 }, { "epoch": 0.8495127004834625, "grad_norm": 0.31888461112976074, "learning_rate": 1.4233889218494716e-05, "loss": 0.0312, "step": 55350 }, { "epoch": 0.8496661806461515, "grad_norm": 0.3169408440589905, "learning_rate": 1.4231462272688839e-05, "loss": 0.027, "step": 55360 }, { "epoch": 0.8498196608088404, "grad_norm": 0.27037495374679565, "learning_rate": 1.4229035023249445e-05, "loss": 0.0317, "step": 55370 }, { "epoch": 0.8499731409715294, "grad_norm": 0.5393558144569397, "learning_rate": 1.4226607470350703e-05, "loss": 0.0373, "step": 55380 }, { "epoch": 0.8501266211342184, "grad_norm": 0.4407259225845337, "learning_rate": 1.4224179614166813e-05, "loss": 0.0324, "step": 55390 }, { "epoch": 0.8502801012969073, "grad_norm": 0.4950522184371948, "learning_rate": 1.422175145487198e-05, "loss": 0.0355, "step": 55400 }, { "epoch": 0.8504335814595964, "grad_norm": 0.3985517621040344, "learning_rate": 1.4219322992640447e-05, "loss": 0.0388, "step": 55410 }, { "epoch": 0.8505870616222854, "grad_norm": 0.308747798204422, "learning_rate": 1.4216894227646465e-05, "loss": 0.0312, "step": 55420 }, { "epoch": 0.8507405417849743, "grad_norm": 0.33387264609336853, "learning_rate": 1.4214465160064314e-05, "loss": 0.0357, "step": 55430 }, { "epoch": 0.8508940219476633, "grad_norm": 0.29054126143455505, "learning_rate": 1.4212035790068295e-05, "loss": 0.035, "step": 55440 }, { "epoch": 0.8510475021103522, "grad_norm": 0.4559817612171173, "learning_rate": 1.4209606117832732e-05, "loss": 0.0358, "step": 55450 }, { "epoch": 0.8512009822730412, "grad_norm": 0.5773892998695374, "learning_rate": 1.4207176143531969e-05, "loss": 0.0404, "step": 55460 }, { "epoch": 0.8513544624357302, "grad_norm": 0.4218066930770874, "learning_rate": 1.420474586734037e-05, "loss": 0.0368, "step": 55470 }, { "epoch": 0.8515079425984191, "grad_norm": 0.26534122228622437, "learning_rate": 1.420231528943232e-05, "loss": 0.0357, "step": 55480 }, { "epoch": 0.8516614227611081, "grad_norm": 0.46062231063842773, "learning_rate": 1.4199884409982235e-05, "loss": 0.0396, "step": 55490 }, { "epoch": 0.8518149029237971, "grad_norm": 0.40901339054107666, "learning_rate": 1.4197453229164537e-05, "loss": 0.0274, "step": 55500 }, { "epoch": 0.851968383086486, "grad_norm": 0.32224708795547485, "learning_rate": 1.4195021747153686e-05, "loss": 0.0268, "step": 55510 }, { "epoch": 0.852121863249175, "grad_norm": 0.5032036304473877, "learning_rate": 1.4192589964124146e-05, "loss": 0.0383, "step": 55520 }, { "epoch": 0.8522753434118641, "grad_norm": 0.3981972932815552, "learning_rate": 1.4190157880250424e-05, "loss": 0.0407, "step": 55530 }, { "epoch": 0.852428823574553, "grad_norm": 0.45309922099113464, "learning_rate": 1.4187725495707031e-05, "loss": 0.0319, "step": 55540 }, { "epoch": 0.852582303737242, "grad_norm": 1.1260898113250732, "learning_rate": 1.4185292810668508e-05, "loss": 0.0369, "step": 55550 }, { "epoch": 0.852735783899931, "grad_norm": 0.3938755691051483, "learning_rate": 1.4182859825309412e-05, "loss": 0.0372, "step": 55560 }, { "epoch": 0.8528892640626199, "grad_norm": 0.4221401512622833, "learning_rate": 1.4180426539804324e-05, "loss": 0.0411, "step": 55570 }, { "epoch": 0.8530427442253089, "grad_norm": 0.39363938570022583, "learning_rate": 1.4177992954327851e-05, "loss": 0.0438, "step": 55580 }, { "epoch": 0.8531962243879978, "grad_norm": 0.28391972184181213, "learning_rate": 1.4175559069054617e-05, "loss": 0.043, "step": 55590 }, { "epoch": 0.8533497045506868, "grad_norm": 0.4383658468723297, "learning_rate": 1.4173124884159266e-05, "loss": 0.038, "step": 55600 }, { "epoch": 0.8535031847133758, "grad_norm": 0.27064454555511475, "learning_rate": 1.4170690399816469e-05, "loss": 0.0285, "step": 55610 }, { "epoch": 0.8536566648760647, "grad_norm": 0.5702966451644897, "learning_rate": 1.4168255616200912e-05, "loss": 0.0369, "step": 55620 }, { "epoch": 0.8538101450387537, "grad_norm": 0.35272926092147827, "learning_rate": 1.4165820533487307e-05, "loss": 0.0321, "step": 55630 }, { "epoch": 0.8539636252014428, "grad_norm": 0.3187040686607361, "learning_rate": 1.4163385151850388e-05, "loss": 0.0311, "step": 55640 }, { "epoch": 0.8541171053641317, "grad_norm": 0.3966316282749176, "learning_rate": 1.4160949471464904e-05, "loss": 0.0275, "step": 55650 }, { "epoch": 0.8542705855268207, "grad_norm": 0.42014896869659424, "learning_rate": 1.4158513492505639e-05, "loss": 0.032, "step": 55660 }, { "epoch": 0.8544240656895097, "grad_norm": 0.42096415162086487, "learning_rate": 1.4156077215147379e-05, "loss": 0.041, "step": 55670 }, { "epoch": 0.8545775458521986, "grad_norm": 0.4108004570007324, "learning_rate": 1.4153640639564946e-05, "loss": 0.0396, "step": 55680 }, { "epoch": 0.8547310260148876, "grad_norm": 0.3496652841567993, "learning_rate": 1.4151203765933184e-05, "loss": 0.0267, "step": 55690 }, { "epoch": 0.8548845061775765, "grad_norm": 0.520676851272583, "learning_rate": 1.4148766594426946e-05, "loss": 0.0353, "step": 55700 }, { "epoch": 0.8550379863402655, "grad_norm": 0.32666870951652527, "learning_rate": 1.414632912522112e-05, "loss": 0.0355, "step": 55710 }, { "epoch": 0.8551914665029545, "grad_norm": 0.3351089060306549, "learning_rate": 1.4143891358490607e-05, "loss": 0.0363, "step": 55720 }, { "epoch": 0.8553449466656434, "grad_norm": 0.4572024643421173, "learning_rate": 1.4141453294410333e-05, "loss": 0.0464, "step": 55730 }, { "epoch": 0.8554984268283324, "grad_norm": 0.6314920783042908, "learning_rate": 1.4139014933155242e-05, "loss": 0.0361, "step": 55740 }, { "epoch": 0.8556519069910214, "grad_norm": 0.41731202602386475, "learning_rate": 1.41365762749003e-05, "loss": 0.0417, "step": 55750 }, { "epoch": 0.8558053871537104, "grad_norm": 0.4093460142612457, "learning_rate": 1.4134137319820502e-05, "loss": 0.0405, "step": 55760 }, { "epoch": 0.8559588673163994, "grad_norm": 0.4474910795688629, "learning_rate": 1.4131698068090851e-05, "loss": 0.0412, "step": 55770 }, { "epoch": 0.8561123474790884, "grad_norm": 0.43553540110588074, "learning_rate": 1.4129258519886384e-05, "loss": 0.0305, "step": 55780 }, { "epoch": 0.8562658276417773, "grad_norm": 0.4414271414279938, "learning_rate": 1.4126818675382152e-05, "loss": 0.0446, "step": 55790 }, { "epoch": 0.8564193078044663, "grad_norm": 0.31439751386642456, "learning_rate": 1.4124378534753228e-05, "loss": 0.0322, "step": 55800 }, { "epoch": 0.8565727879671552, "grad_norm": 0.2968916893005371, "learning_rate": 1.4121938098174705e-05, "loss": 0.0464, "step": 55810 }, { "epoch": 0.8567262681298442, "grad_norm": 0.8482601642608643, "learning_rate": 1.4119497365821706e-05, "loss": 0.0339, "step": 55820 }, { "epoch": 0.8568797482925332, "grad_norm": 0.5115651488304138, "learning_rate": 1.411705633786936e-05, "loss": 0.0455, "step": 55830 }, { "epoch": 0.8570332284552221, "grad_norm": 0.39704662561416626, "learning_rate": 1.4114615014492837e-05, "loss": 0.045, "step": 55840 }, { "epoch": 0.8571867086179111, "grad_norm": 0.49095407128334045, "learning_rate": 1.4112173395867302e-05, "loss": 0.0361, "step": 55850 }, { "epoch": 0.8573401887806001, "grad_norm": 0.4011947810649872, "learning_rate": 1.410973148216797e-05, "loss": 0.034, "step": 55860 }, { "epoch": 0.857493668943289, "grad_norm": 0.29864197969436646, "learning_rate": 1.4107289273570056e-05, "loss": 0.0427, "step": 55870 }, { "epoch": 0.8576471491059781, "grad_norm": 0.3628314435482025, "learning_rate": 1.4104846770248804e-05, "loss": 0.0327, "step": 55880 }, { "epoch": 0.8578006292686671, "grad_norm": 0.3334677219390869, "learning_rate": 1.4102403972379484e-05, "loss": 0.0434, "step": 55890 }, { "epoch": 0.857954109431356, "grad_norm": 0.4032452404499054, "learning_rate": 1.4099960880137373e-05, "loss": 0.0368, "step": 55900 }, { "epoch": 0.858107589594045, "grad_norm": 0.35721808671951294, "learning_rate": 1.4097517493697785e-05, "loss": 0.0258, "step": 55910 }, { "epoch": 0.858261069756734, "grad_norm": 0.5422850847244263, "learning_rate": 1.4095073813236046e-05, "loss": 0.0415, "step": 55920 }, { "epoch": 0.8584145499194229, "grad_norm": 0.4108878970146179, "learning_rate": 1.4092629838927503e-05, "loss": 0.0337, "step": 55930 }, { "epoch": 0.8585680300821119, "grad_norm": 0.5603610277175903, "learning_rate": 1.409018557094753e-05, "loss": 0.0434, "step": 55940 }, { "epoch": 0.8587215102448008, "grad_norm": 0.35826876759529114, "learning_rate": 1.4087741009471518e-05, "loss": 0.0288, "step": 55950 }, { "epoch": 0.8588749904074898, "grad_norm": 0.3888494074344635, "learning_rate": 1.4085296154674877e-05, "loss": 0.0425, "step": 55960 }, { "epoch": 0.8590284705701788, "grad_norm": 0.329542875289917, "learning_rate": 1.408285100673304e-05, "loss": 0.038, "step": 55970 }, { "epoch": 0.8591819507328677, "grad_norm": 0.3552868366241455, "learning_rate": 1.4080405565821462e-05, "loss": 0.0299, "step": 55980 }, { "epoch": 0.8593354308955568, "grad_norm": 0.2826455235481262, "learning_rate": 1.4077959832115622e-05, "loss": 0.0357, "step": 55990 }, { "epoch": 0.8594889110582458, "grad_norm": 0.3174575865268707, "learning_rate": 1.407551380579101e-05, "loss": 0.0376, "step": 56000 }, { "epoch": 0.8596423912209347, "grad_norm": 0.4045010805130005, "learning_rate": 1.407306748702315e-05, "loss": 0.0305, "step": 56010 }, { "epoch": 0.8597958713836237, "grad_norm": 0.3565555214881897, "learning_rate": 1.407062087598758e-05, "loss": 0.0359, "step": 56020 }, { "epoch": 0.8599493515463127, "grad_norm": 0.21709655225276947, "learning_rate": 1.406817397285985e-05, "loss": 0.0311, "step": 56030 }, { "epoch": 0.8601028317090016, "grad_norm": 0.43678489327430725, "learning_rate": 1.4065726777815552e-05, "loss": 0.0367, "step": 56040 }, { "epoch": 0.8602563118716906, "grad_norm": 0.32948893308639526, "learning_rate": 1.4063279291030282e-05, "loss": 0.0367, "step": 56050 }, { "epoch": 0.8604097920343795, "grad_norm": 0.42836523056030273, "learning_rate": 1.4060831512679665e-05, "loss": 0.0341, "step": 56060 }, { "epoch": 0.8605632721970685, "grad_norm": 0.4295753240585327, "learning_rate": 1.4058383442939336e-05, "loss": 0.0392, "step": 56070 }, { "epoch": 0.8607167523597575, "grad_norm": 0.40234461426734924, "learning_rate": 1.405593508198497e-05, "loss": 0.0392, "step": 56080 }, { "epoch": 0.8608702325224464, "grad_norm": 0.3805285394191742, "learning_rate": 1.4053486429992249e-05, "loss": 0.0365, "step": 56090 }, { "epoch": 0.8610237126851354, "grad_norm": 0.47464990615844727, "learning_rate": 1.4051037487136875e-05, "loss": 0.0361, "step": 56100 }, { "epoch": 0.8611771928478245, "grad_norm": 0.31547197699546814, "learning_rate": 1.4048588253594577e-05, "loss": 0.0338, "step": 56110 }, { "epoch": 0.8613306730105134, "grad_norm": 0.3791522979736328, "learning_rate": 1.4046138729541102e-05, "loss": 0.0377, "step": 56120 }, { "epoch": 0.8614841531732024, "grad_norm": 0.6064994931221008, "learning_rate": 1.4043688915152219e-05, "loss": 0.0375, "step": 56130 }, { "epoch": 0.8616376333358914, "grad_norm": 0.3335563838481903, "learning_rate": 1.404123881060372e-05, "loss": 0.0319, "step": 56140 }, { "epoch": 0.8617911134985803, "grad_norm": 0.38770791888237, "learning_rate": 1.4038788416071409e-05, "loss": 0.0351, "step": 56150 }, { "epoch": 0.8619445936612693, "grad_norm": 0.510144829750061, "learning_rate": 1.4036337731731122e-05, "loss": 0.0385, "step": 56160 }, { "epoch": 0.8620980738239582, "grad_norm": 0.49807092547416687, "learning_rate": 1.4033886757758705e-05, "loss": 0.0359, "step": 56170 }, { "epoch": 0.8622515539866472, "grad_norm": 0.3436600863933563, "learning_rate": 1.403143549433004e-05, "loss": 0.0323, "step": 56180 }, { "epoch": 0.8624050341493362, "grad_norm": 0.4680962860584259, "learning_rate": 1.4028983941621012e-05, "loss": 0.0378, "step": 56190 }, { "epoch": 0.8625585143120251, "grad_norm": 0.393037885427475, "learning_rate": 1.402653209980754e-05, "loss": 0.0364, "step": 56200 }, { "epoch": 0.8627119944747141, "grad_norm": 0.5141786932945251, "learning_rate": 1.4024079969065558e-05, "loss": 0.0375, "step": 56210 }, { "epoch": 0.8628654746374032, "grad_norm": 0.4754156172275543, "learning_rate": 1.4021627549571016e-05, "loss": 0.0359, "step": 56220 }, { "epoch": 0.863018954800092, "grad_norm": 0.41442954540252686, "learning_rate": 1.4019174841499896e-05, "loss": 0.04, "step": 56230 }, { "epoch": 0.8631724349627811, "grad_norm": 0.42906492948532104, "learning_rate": 1.4016721845028195e-05, "loss": 0.035, "step": 56240 }, { "epoch": 0.8633259151254701, "grad_norm": 0.27193689346313477, "learning_rate": 1.4014268560331924e-05, "loss": 0.0286, "step": 56250 }, { "epoch": 0.863479395288159, "grad_norm": 0.3572612702846527, "learning_rate": 1.4011814987587132e-05, "loss": 0.0325, "step": 56260 }, { "epoch": 0.863632875450848, "grad_norm": 0.5678472518920898, "learning_rate": 1.4009361126969868e-05, "loss": 0.0377, "step": 56270 }, { "epoch": 0.863786355613537, "grad_norm": 0.3665163815021515, "learning_rate": 1.4006906978656217e-05, "loss": 0.0368, "step": 56280 }, { "epoch": 0.8639398357762259, "grad_norm": 0.39775794744491577, "learning_rate": 1.4004452542822277e-05, "loss": 0.0378, "step": 56290 }, { "epoch": 0.8640933159389149, "grad_norm": 0.43330317735671997, "learning_rate": 1.400199781964417e-05, "loss": 0.0281, "step": 56300 }, { "epoch": 0.8642467961016038, "grad_norm": 0.36534959077835083, "learning_rate": 1.3999542809298043e-05, "loss": 0.0444, "step": 56310 }, { "epoch": 0.8644002762642928, "grad_norm": 0.3846520483493805, "learning_rate": 1.3997087511960045e-05, "loss": 0.0274, "step": 56320 }, { "epoch": 0.8645537564269818, "grad_norm": 0.3401738703250885, "learning_rate": 1.399463192780637e-05, "loss": 0.03, "step": 56330 }, { "epoch": 0.8647072365896707, "grad_norm": 0.3661091923713684, "learning_rate": 1.399217605701322e-05, "loss": 0.0386, "step": 56340 }, { "epoch": 0.8648607167523598, "grad_norm": 0.4149923324584961, "learning_rate": 1.3989719899756812e-05, "loss": 0.0311, "step": 56350 }, { "epoch": 0.8650141969150488, "grad_norm": 0.3800472021102905, "learning_rate": 1.3987263456213398e-05, "loss": 0.0308, "step": 56360 }, { "epoch": 0.8651676770777377, "grad_norm": 0.384740948677063, "learning_rate": 1.3984806726559241e-05, "loss": 0.0319, "step": 56370 }, { "epoch": 0.8653211572404267, "grad_norm": 0.3971541225910187, "learning_rate": 1.3982349710970622e-05, "loss": 0.0442, "step": 56380 }, { "epoch": 0.8654746374031157, "grad_norm": 0.3074119985103607, "learning_rate": 1.3979892409623854e-05, "loss": 0.0416, "step": 56390 }, { "epoch": 0.8656281175658046, "grad_norm": 0.4379608631134033, "learning_rate": 1.3977434822695257e-05, "loss": 0.0328, "step": 56400 }, { "epoch": 0.8657815977284936, "grad_norm": 0.5026319622993469, "learning_rate": 1.3974976950361184e-05, "loss": 0.0338, "step": 56410 }, { "epoch": 0.8659350778911825, "grad_norm": 0.4821086525917053, "learning_rate": 1.3972518792797998e-05, "loss": 0.0452, "step": 56420 }, { "epoch": 0.8660885580538715, "grad_norm": 0.2482600212097168, "learning_rate": 1.3970060350182086e-05, "loss": 0.0379, "step": 56430 }, { "epoch": 0.8662420382165605, "grad_norm": 0.7373565435409546, "learning_rate": 1.3967601622689864e-05, "loss": 0.032, "step": 56440 }, { "epoch": 0.8663955183792494, "grad_norm": 0.407272070646286, "learning_rate": 1.3965142610497751e-05, "loss": 0.0365, "step": 56450 }, { "epoch": 0.8665489985419385, "grad_norm": 0.34796634316444397, "learning_rate": 1.3962683313782205e-05, "loss": 0.0358, "step": 56460 }, { "epoch": 0.8667024787046275, "grad_norm": 0.4899144768714905, "learning_rate": 1.3960223732719688e-05, "loss": 0.0435, "step": 56470 }, { "epoch": 0.8668559588673164, "grad_norm": 0.4770222008228302, "learning_rate": 1.3957763867486695e-05, "loss": 0.0402, "step": 56480 }, { "epoch": 0.8670094390300054, "grad_norm": 0.48651108145713806, "learning_rate": 1.3955303718259738e-05, "loss": 0.0356, "step": 56490 }, { "epoch": 0.8671629191926944, "grad_norm": 0.363285630941391, "learning_rate": 1.3952843285215344e-05, "loss": 0.0332, "step": 56500 }, { "epoch": 0.8673163993553833, "grad_norm": 0.2008650153875351, "learning_rate": 1.3950382568530064e-05, "loss": 0.024, "step": 56510 }, { "epoch": 0.8674698795180723, "grad_norm": 0.521186351776123, "learning_rate": 1.3947921568380473e-05, "loss": 0.0339, "step": 56520 }, { "epoch": 0.8676233596807612, "grad_norm": 2.303558349609375, "learning_rate": 1.394546028494316e-05, "loss": 0.0361, "step": 56530 }, { "epoch": 0.8677768398434502, "grad_norm": 0.3925258219242096, "learning_rate": 1.3942998718394738e-05, "loss": 0.0388, "step": 56540 }, { "epoch": 0.8679303200061392, "grad_norm": 0.3105592131614685, "learning_rate": 1.3940536868911842e-05, "loss": 0.0448, "step": 56550 }, { "epoch": 0.8680838001688281, "grad_norm": 0.3781154751777649, "learning_rate": 1.393807473667112e-05, "loss": 0.0379, "step": 56560 }, { "epoch": 0.8682372803315171, "grad_norm": 0.485785573720932, "learning_rate": 1.3935612321849252e-05, "loss": 0.0307, "step": 56570 }, { "epoch": 0.8683907604942062, "grad_norm": 0.393086314201355, "learning_rate": 1.3933149624622923e-05, "loss": 0.0403, "step": 56580 }, { "epoch": 0.8685442406568951, "grad_norm": 0.28321021795272827, "learning_rate": 1.3930686645168854e-05, "loss": 0.031, "step": 56590 }, { "epoch": 0.8686977208195841, "grad_norm": 0.4359864294528961, "learning_rate": 1.3928223383663775e-05, "loss": 0.0355, "step": 56600 }, { "epoch": 0.8688512009822731, "grad_norm": 0.3867986500263214, "learning_rate": 1.392575984028444e-05, "loss": 0.0347, "step": 56610 }, { "epoch": 0.869004681144962, "grad_norm": 0.3656373620033264, "learning_rate": 1.3923296015207626e-05, "loss": 0.0379, "step": 56620 }, { "epoch": 0.869158161307651, "grad_norm": 0.5107429027557373, "learning_rate": 1.3920831908610129e-05, "loss": 0.0327, "step": 56630 }, { "epoch": 0.86931164147034, "grad_norm": 0.4459047019481659, "learning_rate": 1.3918367520668756e-05, "loss": 0.0302, "step": 56640 }, { "epoch": 0.8694651216330289, "grad_norm": 0.3307001292705536, "learning_rate": 1.3915902851560352e-05, "loss": 0.0317, "step": 56650 }, { "epoch": 0.8696186017957179, "grad_norm": 0.25337478518486023, "learning_rate": 1.3913437901461766e-05, "loss": 0.0337, "step": 56660 }, { "epoch": 0.8697720819584068, "grad_norm": 0.34762096405029297, "learning_rate": 1.3910972670549873e-05, "loss": 0.0424, "step": 56670 }, { "epoch": 0.8699255621210958, "grad_norm": 0.3806169629096985, "learning_rate": 1.3908507159001572e-05, "loss": 0.0389, "step": 56680 }, { "epoch": 0.8700790422837849, "grad_norm": 0.3747349977493286, "learning_rate": 1.3906041366993776e-05, "loss": 0.0446, "step": 56690 }, { "epoch": 0.8702325224464738, "grad_norm": 0.33344393968582153, "learning_rate": 1.3903575294703425e-05, "loss": 0.038, "step": 56700 }, { "epoch": 0.8703860026091628, "grad_norm": 0.4792720377445221, "learning_rate": 1.3901108942307469e-05, "loss": 0.0359, "step": 56710 }, { "epoch": 0.8705394827718518, "grad_norm": 0.28093793988227844, "learning_rate": 1.3898642309982885e-05, "loss": 0.0331, "step": 56720 }, { "epoch": 0.8706929629345407, "grad_norm": 0.4786868095397949, "learning_rate": 1.3896175397906672e-05, "loss": 0.0395, "step": 56730 }, { "epoch": 0.8708464430972297, "grad_norm": 0.35869622230529785, "learning_rate": 1.3893708206255847e-05, "loss": 0.0319, "step": 56740 }, { "epoch": 0.8709999232599187, "grad_norm": 0.41859546303749084, "learning_rate": 1.389124073520744e-05, "loss": 0.0332, "step": 56750 }, { "epoch": 0.8711534034226076, "grad_norm": 0.526498556137085, "learning_rate": 1.3888772984938519e-05, "loss": 0.0301, "step": 56760 }, { "epoch": 0.8713068835852966, "grad_norm": 0.41496604681015015, "learning_rate": 1.3886304955626146e-05, "loss": 0.034, "step": 56770 }, { "epoch": 0.8714603637479855, "grad_norm": 0.44647130370140076, "learning_rate": 1.3883836647447427e-05, "loss": 0.0363, "step": 56780 }, { "epoch": 0.8716138439106745, "grad_norm": 0.6196275949478149, "learning_rate": 1.3881368060579474e-05, "loss": 0.0293, "step": 56790 }, { "epoch": 0.8717673240733635, "grad_norm": 0.45569106936454773, "learning_rate": 1.3878899195199425e-05, "loss": 0.0338, "step": 56800 }, { "epoch": 0.8719208042360524, "grad_norm": 0.3171067237854004, "learning_rate": 1.3876430051484438e-05, "loss": 0.0332, "step": 56810 }, { "epoch": 0.8720742843987415, "grad_norm": 0.3835042119026184, "learning_rate": 1.3873960629611682e-05, "loss": 0.0415, "step": 56820 }, { "epoch": 0.8722277645614305, "grad_norm": 0.6285680532455444, "learning_rate": 1.3871490929758364e-05, "loss": 0.0446, "step": 56830 }, { "epoch": 0.8723812447241194, "grad_norm": 0.46557188034057617, "learning_rate": 1.3869020952101693e-05, "loss": 0.0372, "step": 56840 }, { "epoch": 0.8725347248868084, "grad_norm": 0.5660942196846008, "learning_rate": 1.3866550696818905e-05, "loss": 0.0441, "step": 56850 }, { "epoch": 0.8726882050494974, "grad_norm": 0.44118863344192505, "learning_rate": 1.3864080164087263e-05, "loss": 0.0301, "step": 56860 }, { "epoch": 0.8728416852121863, "grad_norm": 0.43476277589797974, "learning_rate": 1.3861609354084034e-05, "loss": 0.037, "step": 56870 }, { "epoch": 0.8729951653748753, "grad_norm": 0.4331261217594147, "learning_rate": 1.3859138266986518e-05, "loss": 0.0336, "step": 56880 }, { "epoch": 0.8731486455375642, "grad_norm": 0.27630364894866943, "learning_rate": 1.385666690297203e-05, "loss": 0.0428, "step": 56890 }, { "epoch": 0.8733021257002532, "grad_norm": 0.41720592975616455, "learning_rate": 1.3854195262217907e-05, "loss": 0.0394, "step": 56900 }, { "epoch": 0.8734556058629422, "grad_norm": 0.5122259259223938, "learning_rate": 1.3851723344901502e-05, "loss": 0.0323, "step": 56910 }, { "epoch": 0.8736090860256311, "grad_norm": 0.3805297017097473, "learning_rate": 1.3849251151200196e-05, "loss": 0.0338, "step": 56920 }, { "epoch": 0.8737625661883202, "grad_norm": 0.49707844853401184, "learning_rate": 1.3846778681291377e-05, "loss": 0.0386, "step": 56930 }, { "epoch": 0.8739160463510092, "grad_norm": 0.3368205726146698, "learning_rate": 1.3844305935352464e-05, "loss": 0.0416, "step": 56940 }, { "epoch": 0.8740695265136981, "grad_norm": 0.5167819857597351, "learning_rate": 1.3841832913560888e-05, "loss": 0.0407, "step": 56950 }, { "epoch": 0.8742230066763871, "grad_norm": 0.3464813828468323, "learning_rate": 1.383935961609411e-05, "loss": 0.03, "step": 56960 }, { "epoch": 0.8743764868390761, "grad_norm": 0.31768134236335754, "learning_rate": 1.3836886043129601e-05, "loss": 0.0301, "step": 56970 }, { "epoch": 0.874529967001765, "grad_norm": 0.3079151213169098, "learning_rate": 1.3834412194844853e-05, "loss": 0.0337, "step": 56980 }, { "epoch": 0.874683447164454, "grad_norm": 0.4250461161136627, "learning_rate": 1.383193807141738e-05, "loss": 0.0283, "step": 56990 }, { "epoch": 0.874836927327143, "grad_norm": 0.4391665756702423, "learning_rate": 1.3829463673024718e-05, "loss": 0.0342, "step": 57000 }, { "epoch": 0.8749904074898319, "grad_norm": 0.3629966676235199, "learning_rate": 1.3826988999844422e-05, "loss": 0.0349, "step": 57010 }, { "epoch": 0.8751438876525209, "grad_norm": 0.404492050409317, "learning_rate": 1.3824514052054064e-05, "loss": 0.0407, "step": 57020 }, { "epoch": 0.8752973678152098, "grad_norm": 0.29658254981040955, "learning_rate": 1.3822038829831233e-05, "loss": 0.0292, "step": 57030 }, { "epoch": 0.8754508479778988, "grad_norm": 0.37890729308128357, "learning_rate": 1.3819563333353541e-05, "loss": 0.0405, "step": 57040 }, { "epoch": 0.8756043281405879, "grad_norm": 0.33694615960121155, "learning_rate": 1.3817087562798626e-05, "loss": 0.0354, "step": 57050 }, { "epoch": 0.8757578083032768, "grad_norm": 0.33736512064933777, "learning_rate": 1.3814611518344138e-05, "loss": 0.0323, "step": 57060 }, { "epoch": 0.8759112884659658, "grad_norm": 0.4374540448188782, "learning_rate": 1.3812135200167746e-05, "loss": 0.041, "step": 57070 }, { "epoch": 0.8760647686286548, "grad_norm": 0.30895036458969116, "learning_rate": 1.3809658608447143e-05, "loss": 0.0312, "step": 57080 }, { "epoch": 0.8762182487913437, "grad_norm": 0.4217454195022583, "learning_rate": 1.3807181743360038e-05, "loss": 0.0361, "step": 57090 }, { "epoch": 0.8763717289540327, "grad_norm": 0.33956098556518555, "learning_rate": 1.3804704605084162e-05, "loss": 0.0399, "step": 57100 }, { "epoch": 0.8765252091167217, "grad_norm": 0.32916125655174255, "learning_rate": 1.3802227193797268e-05, "loss": 0.0321, "step": 57110 }, { "epoch": 0.8766786892794106, "grad_norm": 0.3121756315231323, "learning_rate": 1.3799749509677122e-05, "loss": 0.0381, "step": 57120 }, { "epoch": 0.8768321694420996, "grad_norm": 0.396575927734375, "learning_rate": 1.3797271552901515e-05, "loss": 0.0353, "step": 57130 }, { "epoch": 0.8769856496047885, "grad_norm": 0.27948591113090515, "learning_rate": 1.3794793323648253e-05, "loss": 0.0328, "step": 57140 }, { "epoch": 0.8771391297674775, "grad_norm": 0.29325515031814575, "learning_rate": 1.3792314822095167e-05, "loss": 0.0341, "step": 57150 }, { "epoch": 0.8772926099301666, "grad_norm": 0.28314870595932007, "learning_rate": 1.3789836048420105e-05, "loss": 0.0302, "step": 57160 }, { "epoch": 0.8774460900928555, "grad_norm": 0.2733677923679352, "learning_rate": 1.378735700280093e-05, "loss": 0.0288, "step": 57170 }, { "epoch": 0.8775995702555445, "grad_norm": 0.3563377857208252, "learning_rate": 1.3784877685415533e-05, "loss": 0.0325, "step": 57180 }, { "epoch": 0.8777530504182335, "grad_norm": 0.3823702335357666, "learning_rate": 1.378239809644182e-05, "loss": 0.0468, "step": 57190 }, { "epoch": 0.8779065305809224, "grad_norm": 0.40852251648902893, "learning_rate": 1.3779918236057717e-05, "loss": 0.0384, "step": 57200 }, { "epoch": 0.8780600107436114, "grad_norm": 0.5263674259185791, "learning_rate": 1.3777438104441165e-05, "loss": 0.0331, "step": 57210 }, { "epoch": 0.8782134909063004, "grad_norm": 0.32182034850120544, "learning_rate": 1.3774957701770136e-05, "loss": 0.0354, "step": 57220 }, { "epoch": 0.8783669710689893, "grad_norm": 0.37996095418930054, "learning_rate": 1.3772477028222609e-05, "loss": 0.0378, "step": 57230 }, { "epoch": 0.8785204512316783, "grad_norm": 0.44265061616897583, "learning_rate": 1.3769996083976587e-05, "loss": 0.0333, "step": 57240 }, { "epoch": 0.8786739313943672, "grad_norm": 0.47915130853652954, "learning_rate": 1.3767514869210098e-05, "loss": 0.0401, "step": 57250 }, { "epoch": 0.8788274115570562, "grad_norm": 0.42948758602142334, "learning_rate": 1.376503338410118e-05, "loss": 0.031, "step": 57260 }, { "epoch": 0.8789808917197452, "grad_norm": 0.5631344318389893, "learning_rate": 1.3762551628827892e-05, "loss": 0.0367, "step": 57270 }, { "epoch": 0.8791343718824342, "grad_norm": 0.38468414545059204, "learning_rate": 1.3760069603568325e-05, "loss": 0.0308, "step": 57280 }, { "epoch": 0.8792878520451232, "grad_norm": 0.5126992464065552, "learning_rate": 1.3757587308500573e-05, "loss": 0.0361, "step": 57290 }, { "epoch": 0.8794413322078122, "grad_norm": 0.5595798492431641, "learning_rate": 1.3755104743802754e-05, "loss": 0.0364, "step": 57300 }, { "epoch": 0.8795948123705011, "grad_norm": 0.6685703992843628, "learning_rate": 1.3752621909653018e-05, "loss": 0.0411, "step": 57310 }, { "epoch": 0.8797482925331901, "grad_norm": 0.35923653841018677, "learning_rate": 1.3750138806229506e-05, "loss": 0.0311, "step": 57320 }, { "epoch": 0.8799017726958791, "grad_norm": 0.47053247690200806, "learning_rate": 1.3747655433710413e-05, "loss": 0.031, "step": 57330 }, { "epoch": 0.880055252858568, "grad_norm": 0.30830830335617065, "learning_rate": 1.3745171792273927e-05, "loss": 0.037, "step": 57340 }, { "epoch": 0.880208733021257, "grad_norm": 0.42164257168769836, "learning_rate": 1.3742687882098267e-05, "loss": 0.0368, "step": 57350 }, { "epoch": 0.880362213183946, "grad_norm": 0.6595871448516846, "learning_rate": 1.374020370336167e-05, "loss": 0.0418, "step": 57360 }, { "epoch": 0.8805156933466349, "grad_norm": 0.4300297498703003, "learning_rate": 1.3737719256242387e-05, "loss": 0.0368, "step": 57370 }, { "epoch": 0.8806691735093239, "grad_norm": 0.3484916388988495, "learning_rate": 1.3735234540918698e-05, "loss": 0.0343, "step": 57380 }, { "epoch": 0.8808226536720128, "grad_norm": 0.4918699264526367, "learning_rate": 1.3732749557568894e-05, "loss": 0.0376, "step": 57390 }, { "epoch": 0.8809761338347019, "grad_norm": 0.2574087977409363, "learning_rate": 1.3730264306371286e-05, "loss": 0.0322, "step": 57400 }, { "epoch": 0.8811296139973909, "grad_norm": 0.44383522868156433, "learning_rate": 1.3727778787504211e-05, "loss": 0.0389, "step": 57410 }, { "epoch": 0.8812830941600798, "grad_norm": 0.42433199286460876, "learning_rate": 1.3725293001146017e-05, "loss": 0.0398, "step": 57420 }, { "epoch": 0.8814365743227688, "grad_norm": 0.3885781764984131, "learning_rate": 1.3722806947475071e-05, "loss": 0.0373, "step": 57430 }, { "epoch": 0.8815900544854578, "grad_norm": 0.5657851099967957, "learning_rate": 1.372032062666977e-05, "loss": 0.0407, "step": 57440 }, { "epoch": 0.8817435346481467, "grad_norm": 0.46863579750061035, "learning_rate": 1.3717834038908517e-05, "loss": 0.043, "step": 57450 }, { "epoch": 0.8818970148108357, "grad_norm": 0.5346007347106934, "learning_rate": 1.3715347184369746e-05, "loss": 0.0314, "step": 57460 }, { "epoch": 0.8820504949735247, "grad_norm": 0.24827183783054352, "learning_rate": 1.3712860063231897e-05, "loss": 0.0357, "step": 57470 }, { "epoch": 0.8822039751362136, "grad_norm": 0.5094324350357056, "learning_rate": 1.3710372675673442e-05, "loss": 0.0356, "step": 57480 }, { "epoch": 0.8823574552989026, "grad_norm": 0.38585540652275085, "learning_rate": 1.3707885021872863e-05, "loss": 0.0332, "step": 57490 }, { "epoch": 0.8825109354615915, "grad_norm": 0.3300098478794098, "learning_rate": 1.3705397102008663e-05, "loss": 0.0369, "step": 57500 }, { "epoch": 0.8826644156242806, "grad_norm": 0.31625375151634216, "learning_rate": 1.3702908916259374e-05, "loss": 0.0388, "step": 57510 }, { "epoch": 0.8828178957869696, "grad_norm": 0.3905313313007355, "learning_rate": 1.370042046480353e-05, "loss": 0.0373, "step": 57520 }, { "epoch": 0.8829713759496585, "grad_norm": 0.4536200761795044, "learning_rate": 1.3697931747819695e-05, "loss": 0.0426, "step": 57530 }, { "epoch": 0.8831248561123475, "grad_norm": 0.4572881758213043, "learning_rate": 1.369544276548645e-05, "loss": 0.046, "step": 57540 }, { "epoch": 0.8832783362750365, "grad_norm": 0.43849867582321167, "learning_rate": 1.3692953517982396e-05, "loss": 0.0312, "step": 57550 }, { "epoch": 0.8834318164377254, "grad_norm": 0.3049415647983551, "learning_rate": 1.3690464005486153e-05, "loss": 0.034, "step": 57560 }, { "epoch": 0.8835852966004144, "grad_norm": 0.5198186635971069, "learning_rate": 1.3687974228176357e-05, "loss": 0.0269, "step": 57570 }, { "epoch": 0.8837387767631034, "grad_norm": 0.3317174017429352, "learning_rate": 1.3685484186231663e-05, "loss": 0.0396, "step": 57580 }, { "epoch": 0.8838922569257923, "grad_norm": 0.5529343485832214, "learning_rate": 1.3682993879830748e-05, "loss": 0.0356, "step": 57590 }, { "epoch": 0.8840457370884813, "grad_norm": 0.29069218039512634, "learning_rate": 1.3680503309152312e-05, "loss": 0.0282, "step": 57600 }, { "epoch": 0.8841992172511702, "grad_norm": 0.35335344076156616, "learning_rate": 1.367801247437506e-05, "loss": 0.0364, "step": 57610 }, { "epoch": 0.8843526974138592, "grad_norm": 0.546406090259552, "learning_rate": 1.3675521375677732e-05, "loss": 0.0293, "step": 57620 }, { "epoch": 0.8845061775765483, "grad_norm": 0.4726790487766266, "learning_rate": 1.3673030013239074e-05, "loss": 0.0354, "step": 57630 }, { "epoch": 0.8846596577392372, "grad_norm": 0.2912309467792511, "learning_rate": 1.3670538387237862e-05, "loss": 0.0318, "step": 57640 }, { "epoch": 0.8848131379019262, "grad_norm": 0.3166801929473877, "learning_rate": 1.3668046497852882e-05, "loss": 0.0365, "step": 57650 }, { "epoch": 0.8849666180646152, "grad_norm": 0.3758684992790222, "learning_rate": 1.3665554345262944e-05, "loss": 0.0306, "step": 57660 }, { "epoch": 0.8851200982273041, "grad_norm": 0.5376856327056885, "learning_rate": 1.3663061929646872e-05, "loss": 0.0305, "step": 57670 }, { "epoch": 0.8852735783899931, "grad_norm": 0.38275226950645447, "learning_rate": 1.3660569251183522e-05, "loss": 0.04, "step": 57680 }, { "epoch": 0.8854270585526821, "grad_norm": 0.2851822078227997, "learning_rate": 1.3658076310051745e-05, "loss": 0.0311, "step": 57690 }, { "epoch": 0.885580538715371, "grad_norm": 0.3841819167137146, "learning_rate": 1.3655583106430434e-05, "loss": 0.0271, "step": 57700 }, { "epoch": 0.88573401887806, "grad_norm": 0.22554965317249298, "learning_rate": 1.3653089640498491e-05, "loss": 0.0413, "step": 57710 }, { "epoch": 0.885887499040749, "grad_norm": 0.42615577578544617, "learning_rate": 1.3650595912434833e-05, "loss": 0.0365, "step": 57720 }, { "epoch": 0.8860409792034379, "grad_norm": 0.4284624755382538, "learning_rate": 1.364810192241841e-05, "loss": 0.0283, "step": 57730 }, { "epoch": 0.886194459366127, "grad_norm": 0.3244706988334656, "learning_rate": 1.364560767062817e-05, "loss": 0.0316, "step": 57740 }, { "epoch": 0.8863479395288159, "grad_norm": 0.44892290234565735, "learning_rate": 1.3643113157243097e-05, "loss": 0.038, "step": 57750 }, { "epoch": 0.8865014196915049, "grad_norm": 0.3293410837650299, "learning_rate": 1.3640618382442186e-05, "loss": 0.0308, "step": 57760 }, { "epoch": 0.8866548998541939, "grad_norm": 0.368099570274353, "learning_rate": 1.3638123346404451e-05, "loss": 0.0366, "step": 57770 }, { "epoch": 0.8868083800168828, "grad_norm": 0.3773038685321808, "learning_rate": 1.3635628049308934e-05, "loss": 0.0337, "step": 57780 }, { "epoch": 0.8869618601795718, "grad_norm": 0.3319489657878876, "learning_rate": 1.3633132491334676e-05, "loss": 0.0371, "step": 57790 }, { "epoch": 0.8871153403422608, "grad_norm": 0.6270768642425537, "learning_rate": 1.3630636672660757e-05, "loss": 0.0364, "step": 57800 }, { "epoch": 0.8872688205049497, "grad_norm": 0.25710174441337585, "learning_rate": 1.3628140593466266e-05, "loss": 0.0381, "step": 57810 }, { "epoch": 0.8874223006676387, "grad_norm": 0.3497161269187927, "learning_rate": 1.3625644253930307e-05, "loss": 0.036, "step": 57820 }, { "epoch": 0.8875757808303277, "grad_norm": 0.3428027331829071, "learning_rate": 1.3623147654232016e-05, "loss": 0.034, "step": 57830 }, { "epoch": 0.8877292609930166, "grad_norm": 0.37215641140937805, "learning_rate": 1.3620650794550537e-05, "loss": 0.0338, "step": 57840 }, { "epoch": 0.8878827411557056, "grad_norm": 0.4596189558506012, "learning_rate": 1.361815367506503e-05, "loss": 0.0321, "step": 57850 }, { "epoch": 0.8880362213183945, "grad_norm": 0.423833429813385, "learning_rate": 1.3615656295954684e-05, "loss": 0.0298, "step": 57860 }, { "epoch": 0.8881897014810836, "grad_norm": 0.23243185877799988, "learning_rate": 1.3613158657398697e-05, "loss": 0.0299, "step": 57870 }, { "epoch": 0.8883431816437726, "grad_norm": 0.488779217004776, "learning_rate": 1.3610660759576295e-05, "loss": 0.036, "step": 57880 }, { "epoch": 0.8884966618064615, "grad_norm": 0.5184056758880615, "learning_rate": 1.3608162602666717e-05, "loss": 0.0389, "step": 57890 }, { "epoch": 0.8886501419691505, "grad_norm": 0.5223016142845154, "learning_rate": 1.3605664186849214e-05, "loss": 0.0396, "step": 57900 }, { "epoch": 0.8888036221318395, "grad_norm": 0.3381076455116272, "learning_rate": 1.3603165512303073e-05, "loss": 0.0271, "step": 57910 }, { "epoch": 0.8889571022945284, "grad_norm": 0.29337725043296814, "learning_rate": 1.3600666579207582e-05, "loss": 0.034, "step": 57920 }, { "epoch": 0.8891105824572174, "grad_norm": 0.37468206882476807, "learning_rate": 1.3598167387742059e-05, "loss": 0.0383, "step": 57930 }, { "epoch": 0.8892640626199064, "grad_norm": 0.46713486313819885, "learning_rate": 1.3595667938085832e-05, "loss": 0.0389, "step": 57940 }, { "epoch": 0.8894175427825953, "grad_norm": 0.543820321559906, "learning_rate": 1.3593168230418254e-05, "loss": 0.0371, "step": 57950 }, { "epoch": 0.8895710229452843, "grad_norm": 0.5606473684310913, "learning_rate": 1.3590668264918698e-05, "loss": 0.0342, "step": 57960 }, { "epoch": 0.8897245031079732, "grad_norm": 0.29539912939071655, "learning_rate": 1.3588168041766548e-05, "loss": 0.0347, "step": 57970 }, { "epoch": 0.8898779832706623, "grad_norm": 0.4438919723033905, "learning_rate": 1.3585667561141212e-05, "loss": 0.0458, "step": 57980 }, { "epoch": 0.8900314634333513, "grad_norm": 0.4647980332374573, "learning_rate": 1.3583166823222113e-05, "loss": 0.0467, "step": 57990 }, { "epoch": 0.8901849435960402, "grad_norm": 0.3987463712692261, "learning_rate": 1.3580665828188695e-05, "loss": 0.0311, "step": 58000 }, { "epoch": 0.8903384237587292, "grad_norm": 0.34970608353614807, "learning_rate": 1.357816457622042e-05, "loss": 0.0301, "step": 58010 }, { "epoch": 0.8904919039214182, "grad_norm": 0.5907384753227234, "learning_rate": 1.357566306749677e-05, "loss": 0.0366, "step": 58020 }, { "epoch": 0.8906453840841071, "grad_norm": 0.38955947756767273, "learning_rate": 1.3573161302197245e-05, "loss": 0.0355, "step": 58030 }, { "epoch": 0.8907988642467961, "grad_norm": 0.500407874584198, "learning_rate": 1.3570659280501357e-05, "loss": 0.0378, "step": 58040 }, { "epoch": 0.8909523444094851, "grad_norm": 0.39207834005355835, "learning_rate": 1.3568157002588642e-05, "loss": 0.0354, "step": 58050 }, { "epoch": 0.891105824572174, "grad_norm": 0.27566808462142944, "learning_rate": 1.3565654468638658e-05, "loss": 0.028, "step": 58060 }, { "epoch": 0.891259304734863, "grad_norm": 0.5693351030349731, "learning_rate": 1.3563151678830974e-05, "loss": 0.0367, "step": 58070 }, { "epoch": 0.891412784897552, "grad_norm": 0.3410176634788513, "learning_rate": 1.3560648633345185e-05, "loss": 0.0365, "step": 58080 }, { "epoch": 0.891566265060241, "grad_norm": 0.3730611205101013, "learning_rate": 1.3558145332360893e-05, "loss": 0.036, "step": 58090 }, { "epoch": 0.89171974522293, "grad_norm": 0.45886579155921936, "learning_rate": 1.3555641776057729e-05, "loss": 0.0368, "step": 58100 }, { "epoch": 0.8918732253856189, "grad_norm": 0.2672784626483917, "learning_rate": 1.3553137964615339e-05, "loss": 0.0358, "step": 58110 }, { "epoch": 0.8920267055483079, "grad_norm": 0.5086467266082764, "learning_rate": 1.3550633898213387e-05, "loss": 0.0336, "step": 58120 }, { "epoch": 0.8921801857109969, "grad_norm": 0.4796787202358246, "learning_rate": 1.3548129577031555e-05, "loss": 0.0359, "step": 58130 }, { "epoch": 0.8923336658736858, "grad_norm": 0.4886437654495239, "learning_rate": 1.3545625001249541e-05, "loss": 0.0375, "step": 58140 }, { "epoch": 0.8924871460363748, "grad_norm": 0.2784007787704468, "learning_rate": 1.3543120171047067e-05, "loss": 0.0286, "step": 58150 }, { "epoch": 0.8926406261990638, "grad_norm": 0.26922473311424255, "learning_rate": 1.354061508660387e-05, "loss": 0.0297, "step": 58160 }, { "epoch": 0.8927941063617527, "grad_norm": 0.30565932393074036, "learning_rate": 1.3538109748099705e-05, "loss": 0.0314, "step": 58170 }, { "epoch": 0.8929475865244417, "grad_norm": 0.45007482171058655, "learning_rate": 1.3535604155714343e-05, "loss": 0.0361, "step": 58180 }, { "epoch": 0.8931010666871307, "grad_norm": 0.3527618646621704, "learning_rate": 1.3533098309627577e-05, "loss": 0.0387, "step": 58190 }, { "epoch": 0.8932545468498196, "grad_norm": 0.29414162039756775, "learning_rate": 1.3530592210019217e-05, "loss": 0.0328, "step": 58200 }, { "epoch": 0.8934080270125087, "grad_norm": 0.3425356447696686, "learning_rate": 1.3528085857069092e-05, "loss": 0.0388, "step": 58210 }, { "epoch": 0.8935615071751976, "grad_norm": 0.4173620045185089, "learning_rate": 1.3525579250957049e-05, "loss": 0.0377, "step": 58220 }, { "epoch": 0.8937149873378866, "grad_norm": 0.6300813555717468, "learning_rate": 1.352307239186295e-05, "loss": 0.0334, "step": 58230 }, { "epoch": 0.8938684675005756, "grad_norm": 0.4087260663509369, "learning_rate": 1.352056527996668e-05, "loss": 0.0308, "step": 58240 }, { "epoch": 0.8940219476632645, "grad_norm": 0.3768622875213623, "learning_rate": 1.3518057915448135e-05, "loss": 0.0321, "step": 58250 }, { "epoch": 0.8941754278259535, "grad_norm": 0.34924453496932983, "learning_rate": 1.3515550298487241e-05, "loss": 0.038, "step": 58260 }, { "epoch": 0.8943289079886425, "grad_norm": 0.6237875819206238, "learning_rate": 1.3513042429263927e-05, "loss": 0.0376, "step": 58270 }, { "epoch": 0.8944823881513314, "grad_norm": 0.32304051518440247, "learning_rate": 1.3510534307958159e-05, "loss": 0.0341, "step": 58280 }, { "epoch": 0.8946358683140204, "grad_norm": 0.3313066065311432, "learning_rate": 1.3508025934749897e-05, "loss": 0.0426, "step": 58290 }, { "epoch": 0.8947893484767094, "grad_norm": 0.2804919481277466, "learning_rate": 1.3505517309819142e-05, "loss": 0.041, "step": 58300 }, { "epoch": 0.8949428286393983, "grad_norm": 0.41468948125839233, "learning_rate": 1.3503008433345901e-05, "loss": 0.038, "step": 58310 }, { "epoch": 0.8950963088020873, "grad_norm": 0.3476642668247223, "learning_rate": 1.3500499305510198e-05, "loss": 0.0278, "step": 58320 }, { "epoch": 0.8952497889647762, "grad_norm": 0.5263018012046814, "learning_rate": 1.3497989926492083e-05, "loss": 0.0369, "step": 58330 }, { "epoch": 0.8954032691274653, "grad_norm": 0.41095221042633057, "learning_rate": 1.3495480296471615e-05, "loss": 0.0288, "step": 58340 }, { "epoch": 0.8955567492901543, "grad_norm": 0.4988766014575958, "learning_rate": 1.3492970415628876e-05, "loss": 0.0412, "step": 58350 }, { "epoch": 0.8957102294528432, "grad_norm": 0.4015735685825348, "learning_rate": 1.3490460284143973e-05, "loss": 0.0343, "step": 58360 }, { "epoch": 0.8958637096155322, "grad_norm": 0.4395192265510559, "learning_rate": 1.3487949902197012e-05, "loss": 0.0419, "step": 58370 }, { "epoch": 0.8960171897782212, "grad_norm": 0.39522409439086914, "learning_rate": 1.348543926996814e-05, "loss": 0.035, "step": 58380 }, { "epoch": 0.8961706699409101, "grad_norm": 0.4525642991065979, "learning_rate": 1.3482928387637499e-05, "loss": 0.0381, "step": 58390 }, { "epoch": 0.8963241501035991, "grad_norm": 0.20716607570648193, "learning_rate": 1.3480417255385267e-05, "loss": 0.0314, "step": 58400 }, { "epoch": 0.8964776302662881, "grad_norm": 0.315104603767395, "learning_rate": 1.3477905873391634e-05, "loss": 0.0282, "step": 58410 }, { "epoch": 0.896631110428977, "grad_norm": 0.34203341603279114, "learning_rate": 1.34753942418368e-05, "loss": 0.0286, "step": 58420 }, { "epoch": 0.896784590591666, "grad_norm": 0.40073326230049133, "learning_rate": 1.3472882360900998e-05, "loss": 0.0355, "step": 58430 }, { "epoch": 0.896938070754355, "grad_norm": 0.39453014731407166, "learning_rate": 1.3470370230764469e-05, "loss": 0.0397, "step": 58440 }, { "epoch": 0.897091550917044, "grad_norm": 0.4193643629550934, "learning_rate": 1.3467857851607473e-05, "loss": 0.0341, "step": 58450 }, { "epoch": 0.897245031079733, "grad_norm": 0.540147066116333, "learning_rate": 1.3465345223610289e-05, "loss": 0.0363, "step": 58460 }, { "epoch": 0.8973985112424219, "grad_norm": 0.5972879528999329, "learning_rate": 1.346283234695321e-05, "loss": 0.0455, "step": 58470 }, { "epoch": 0.8975519914051109, "grad_norm": 0.5257357358932495, "learning_rate": 1.3460319221816556e-05, "loss": 0.0376, "step": 58480 }, { "epoch": 0.8977054715677999, "grad_norm": 0.45741233229637146, "learning_rate": 1.3457805848380653e-05, "loss": 0.0387, "step": 58490 }, { "epoch": 0.8978589517304888, "grad_norm": 0.5085516571998596, "learning_rate": 1.3455292226825858e-05, "loss": 0.0349, "step": 58500 }, { "epoch": 0.8980124318931778, "grad_norm": 0.3300316035747528, "learning_rate": 1.3452778357332534e-05, "loss": 0.0276, "step": 58510 }, { "epoch": 0.8981659120558668, "grad_norm": 0.3717324137687683, "learning_rate": 1.3450264240081071e-05, "loss": 0.0394, "step": 58520 }, { "epoch": 0.8983193922185557, "grad_norm": 0.20092475414276123, "learning_rate": 1.3447749875251868e-05, "loss": 0.033, "step": 58530 }, { "epoch": 0.8984728723812447, "grad_norm": 0.41082823276519775, "learning_rate": 1.3445235263025347e-05, "loss": 0.0331, "step": 58540 }, { "epoch": 0.8986263525439337, "grad_norm": 0.4276998043060303, "learning_rate": 1.3442720403581949e-05, "loss": 0.0281, "step": 58550 }, { "epoch": 0.8987798327066227, "grad_norm": 0.4166800081729889, "learning_rate": 1.3440205297102126e-05, "loss": 0.0355, "step": 58560 }, { "epoch": 0.8989333128693117, "grad_norm": 0.3804011642932892, "learning_rate": 1.3437689943766361e-05, "loss": 0.0358, "step": 58570 }, { "epoch": 0.8990867930320006, "grad_norm": 0.3225368559360504, "learning_rate": 1.3435174343755137e-05, "loss": 0.0296, "step": 58580 }, { "epoch": 0.8992402731946896, "grad_norm": 0.35793349146842957, "learning_rate": 1.343265849724897e-05, "loss": 0.038, "step": 58590 }, { "epoch": 0.8993937533573786, "grad_norm": 0.499803751707077, "learning_rate": 1.3430142404428385e-05, "loss": 0.0424, "step": 58600 }, { "epoch": 0.8995472335200675, "grad_norm": 0.5264678001403809, "learning_rate": 1.3427626065473925e-05, "loss": 0.0323, "step": 58610 }, { "epoch": 0.8997007136827565, "grad_norm": 0.3089614808559418, "learning_rate": 1.3425109480566158e-05, "loss": 0.037, "step": 58620 }, { "epoch": 0.8998541938454455, "grad_norm": 0.34378781914711, "learning_rate": 1.3422592649885662e-05, "loss": 0.0271, "step": 58630 }, { "epoch": 0.9000076740081344, "grad_norm": 0.4358571171760559, "learning_rate": 1.3420075573613031e-05, "loss": 0.0437, "step": 58640 }, { "epoch": 0.9001611541708234, "grad_norm": 0.4723658263683319, "learning_rate": 1.341755825192889e-05, "loss": 0.0377, "step": 58650 }, { "epoch": 0.9003146343335124, "grad_norm": 0.32124578952789307, "learning_rate": 1.3415040685013863e-05, "loss": 0.0348, "step": 58660 }, { "epoch": 0.9004681144962013, "grad_norm": 0.3603222072124481, "learning_rate": 1.3412522873048608e-05, "loss": 0.0308, "step": 58670 }, { "epoch": 0.9006215946588904, "grad_norm": 0.4461402893066406, "learning_rate": 1.3410004816213789e-05, "loss": 0.0331, "step": 58680 }, { "epoch": 0.9007750748215793, "grad_norm": 0.2888104021549225, "learning_rate": 1.340748651469009e-05, "loss": 0.0391, "step": 58690 }, { "epoch": 0.9009285549842683, "grad_norm": 0.5986515283584595, "learning_rate": 1.3404967968658224e-05, "loss": 0.0518, "step": 58700 }, { "epoch": 0.9010820351469573, "grad_norm": 0.4042091369628906, "learning_rate": 1.3402449178298904e-05, "loss": 0.0364, "step": 58710 }, { "epoch": 0.9012355153096462, "grad_norm": 0.7372307777404785, "learning_rate": 1.3399930143792871e-05, "loss": 0.0438, "step": 58720 }, { "epoch": 0.9013889954723352, "grad_norm": 0.44767504930496216, "learning_rate": 1.3397410865320884e-05, "loss": 0.0406, "step": 58730 }, { "epoch": 0.9015424756350242, "grad_norm": 0.33139246702194214, "learning_rate": 1.339489134306371e-05, "loss": 0.0318, "step": 58740 }, { "epoch": 0.9016959557977131, "grad_norm": 0.5395535826683044, "learning_rate": 1.3392371577202148e-05, "loss": 0.0415, "step": 58750 }, { "epoch": 0.9018494359604021, "grad_norm": 0.2666223347187042, "learning_rate": 1.3389851567917e-05, "loss": 0.0328, "step": 58760 }, { "epoch": 0.9020029161230911, "grad_norm": 0.703851044178009, "learning_rate": 1.3387331315389096e-05, "loss": 0.0468, "step": 58770 }, { "epoch": 0.90215639628578, "grad_norm": 0.4684508144855499, "learning_rate": 1.3384810819799283e-05, "loss": 0.0348, "step": 58780 }, { "epoch": 0.902309876448469, "grad_norm": 0.2598232924938202, "learning_rate": 1.3382290081328412e-05, "loss": 0.0419, "step": 58790 }, { "epoch": 0.9024633566111581, "grad_norm": 0.3991275727748871, "learning_rate": 1.3379769100157374e-05, "loss": 0.0315, "step": 58800 }, { "epoch": 0.902616836773847, "grad_norm": 0.482785165309906, "learning_rate": 1.3377247876467054e-05, "loss": 0.0274, "step": 58810 }, { "epoch": 0.902770316936536, "grad_norm": 0.26617714762687683, "learning_rate": 1.3374726410438372e-05, "loss": 0.0335, "step": 58820 }, { "epoch": 0.9029237970992249, "grad_norm": 0.39671704173088074, "learning_rate": 1.3372204702252258e-05, "loss": 0.0365, "step": 58830 }, { "epoch": 0.9030772772619139, "grad_norm": 0.345858097076416, "learning_rate": 1.3369682752089657e-05, "loss": 0.0364, "step": 58840 }, { "epoch": 0.9032307574246029, "grad_norm": 0.560796856880188, "learning_rate": 1.3367160560131538e-05, "loss": 0.0444, "step": 58850 }, { "epoch": 0.9033842375872918, "grad_norm": 0.5922504663467407, "learning_rate": 1.3364638126558882e-05, "loss": 0.039, "step": 58860 }, { "epoch": 0.9035377177499808, "grad_norm": 0.48907339572906494, "learning_rate": 1.3362115451552686e-05, "loss": 0.0416, "step": 58870 }, { "epoch": 0.9036911979126698, "grad_norm": 0.39926037192344666, "learning_rate": 1.3359592535293975e-05, "loss": 0.0313, "step": 58880 }, { "epoch": 0.9038446780753587, "grad_norm": 0.42530810832977295, "learning_rate": 1.3357069377963777e-05, "loss": 0.0388, "step": 58890 }, { "epoch": 0.9039981582380477, "grad_norm": 0.2998526096343994, "learning_rate": 1.335454597974315e-05, "loss": 0.0297, "step": 58900 }, { "epoch": 0.9041516384007368, "grad_norm": 0.4394433796405792, "learning_rate": 1.3352022340813158e-05, "loss": 0.0354, "step": 58910 }, { "epoch": 0.9043051185634257, "grad_norm": 0.3724853992462158, "learning_rate": 1.3349498461354888e-05, "loss": 0.0369, "step": 58920 }, { "epoch": 0.9044585987261147, "grad_norm": 0.5264756679534912, "learning_rate": 1.3346974341549448e-05, "loss": 0.0391, "step": 58930 }, { "epoch": 0.9046120788888036, "grad_norm": 0.26341021060943604, "learning_rate": 1.3344449981577956e-05, "loss": 0.0434, "step": 58940 }, { "epoch": 0.9047655590514926, "grad_norm": 0.2891751229763031, "learning_rate": 1.3341925381621552e-05, "loss": 0.0398, "step": 58950 }, { "epoch": 0.9049190392141816, "grad_norm": 0.4106176197528839, "learning_rate": 1.3339400541861391e-05, "loss": 0.0335, "step": 58960 }, { "epoch": 0.9050725193768705, "grad_norm": 0.3304380774497986, "learning_rate": 1.3336875462478642e-05, "loss": 0.0357, "step": 58970 }, { "epoch": 0.9052259995395595, "grad_norm": 0.5005921125411987, "learning_rate": 1.3334350143654505e-05, "loss": 0.0252, "step": 58980 }, { "epoch": 0.9053794797022485, "grad_norm": 0.4732997417449951, "learning_rate": 1.3331824585570177e-05, "loss": 0.0339, "step": 58990 }, { "epoch": 0.9055329598649374, "grad_norm": 0.319125771522522, "learning_rate": 1.3329298788406887e-05, "loss": 0.0324, "step": 59000 }, { "epoch": 0.9056864400276264, "grad_norm": 0.468764990568161, "learning_rate": 1.3326772752345878e-05, "loss": 0.0401, "step": 59010 }, { "epoch": 0.9058399201903155, "grad_norm": 0.5911544561386108, "learning_rate": 1.3324246477568403e-05, "loss": 0.0355, "step": 59020 }, { "epoch": 0.9059934003530044, "grad_norm": 0.4459605813026428, "learning_rate": 1.3321719964255745e-05, "loss": 0.0347, "step": 59030 }, { "epoch": 0.9061468805156934, "grad_norm": 0.5194128751754761, "learning_rate": 1.3319193212589188e-05, "loss": 0.0293, "step": 59040 }, { "epoch": 0.9063003606783823, "grad_norm": 0.3635381758213043, "learning_rate": 1.3316666222750052e-05, "loss": 0.0439, "step": 59050 }, { "epoch": 0.9064538408410713, "grad_norm": 0.3267028331756592, "learning_rate": 1.3314138994919654e-05, "loss": 0.04, "step": 59060 }, { "epoch": 0.9066073210037603, "grad_norm": 0.32624635100364685, "learning_rate": 1.3311611529279348e-05, "loss": 0.0343, "step": 59070 }, { "epoch": 0.9067608011664492, "grad_norm": 0.3633507788181305, "learning_rate": 1.3309083826010492e-05, "loss": 0.0376, "step": 59080 }, { "epoch": 0.9069142813291382, "grad_norm": 0.3306790590286255, "learning_rate": 1.3306555885294461e-05, "loss": 0.0314, "step": 59090 }, { "epoch": 0.9070677614918272, "grad_norm": 0.4287186563014984, "learning_rate": 1.3304027707312652e-05, "loss": 0.0437, "step": 59100 }, { "epoch": 0.9072212416545161, "grad_norm": 0.346003919839859, "learning_rate": 1.3301499292246476e-05, "loss": 0.0318, "step": 59110 }, { "epoch": 0.9073747218172051, "grad_norm": 0.34740540385246277, "learning_rate": 1.3298970640277367e-05, "loss": 0.0351, "step": 59120 }, { "epoch": 0.9075282019798941, "grad_norm": 0.24933932721614838, "learning_rate": 1.3296441751586768e-05, "loss": 0.0348, "step": 59130 }, { "epoch": 0.907681682142583, "grad_norm": 0.4558980166912079, "learning_rate": 1.3293912626356142e-05, "loss": 0.0339, "step": 59140 }, { "epoch": 0.9078351623052721, "grad_norm": 0.4081173241138458, "learning_rate": 1.3291383264766973e-05, "loss": 0.0331, "step": 59150 }, { "epoch": 0.9079886424679611, "grad_norm": 0.33359646797180176, "learning_rate": 1.328885366700075e-05, "loss": 0.0307, "step": 59160 }, { "epoch": 0.90814212263065, "grad_norm": 0.44421643018722534, "learning_rate": 1.3286323833238997e-05, "loss": 0.0362, "step": 59170 }, { "epoch": 0.908295602793339, "grad_norm": 0.566389262676239, "learning_rate": 1.328379376366324e-05, "loss": 0.0535, "step": 59180 }, { "epoch": 0.9084490829560279, "grad_norm": 0.4811188876628876, "learning_rate": 1.3281263458455027e-05, "loss": 0.0408, "step": 59190 }, { "epoch": 0.9086025631187169, "grad_norm": 0.34848544001579285, "learning_rate": 1.3278732917795928e-05, "loss": 0.0372, "step": 59200 }, { "epoch": 0.9087560432814059, "grad_norm": 0.39238518476486206, "learning_rate": 1.3276202141867516e-05, "loss": 0.0469, "step": 59210 }, { "epoch": 0.9089095234440948, "grad_norm": 0.4690118730068207, "learning_rate": 1.3273671130851395e-05, "loss": 0.0458, "step": 59220 }, { "epoch": 0.9090630036067838, "grad_norm": 0.6046480536460876, "learning_rate": 1.3271139884929183e-05, "loss": 0.0289, "step": 59230 }, { "epoch": 0.9092164837694728, "grad_norm": 0.33477458357810974, "learning_rate": 1.3268608404282506e-05, "loss": 0.0426, "step": 59240 }, { "epoch": 0.9093699639321617, "grad_norm": 0.5441018342971802, "learning_rate": 1.3266076689093023e-05, "loss": 0.0338, "step": 59250 }, { "epoch": 0.9095234440948508, "grad_norm": 0.5190504789352417, "learning_rate": 1.3263544739542389e-05, "loss": 0.0298, "step": 59260 }, { "epoch": 0.9096769242575398, "grad_norm": 0.4152124524116516, "learning_rate": 1.3261012555812297e-05, "loss": 0.0377, "step": 59270 }, { "epoch": 0.9098304044202287, "grad_norm": 0.32267531752586365, "learning_rate": 1.325848013808444e-05, "loss": 0.03, "step": 59280 }, { "epoch": 0.9099838845829177, "grad_norm": 0.3621916174888611, "learning_rate": 1.3255947486540532e-05, "loss": 0.0349, "step": 59290 }, { "epoch": 0.9101373647456066, "grad_norm": 0.4034360647201538, "learning_rate": 1.3253414601362319e-05, "loss": 0.035, "step": 59300 }, { "epoch": 0.9102908449082956, "grad_norm": 0.4134330153465271, "learning_rate": 1.325088148273154e-05, "loss": 0.042, "step": 59310 }, { "epoch": 0.9104443250709846, "grad_norm": 0.4115663766860962, "learning_rate": 1.3248348130829962e-05, "loss": 0.0302, "step": 59320 }, { "epoch": 0.9105978052336735, "grad_norm": 0.3901185691356659, "learning_rate": 1.3245814545839377e-05, "loss": 0.0353, "step": 59330 }, { "epoch": 0.9107512853963625, "grad_norm": 0.38920971751213074, "learning_rate": 1.3243280727941576e-05, "loss": 0.0388, "step": 59340 }, { "epoch": 0.9109047655590515, "grad_norm": 0.28182223439216614, "learning_rate": 1.3240746677318383e-05, "loss": 0.0376, "step": 59350 }, { "epoch": 0.9110582457217404, "grad_norm": 0.3792369067668915, "learning_rate": 1.3238212394151628e-05, "loss": 0.0392, "step": 59360 }, { "epoch": 0.9112117258844294, "grad_norm": 0.3260458707809448, "learning_rate": 1.3235677878623161e-05, "loss": 0.0359, "step": 59370 }, { "epoch": 0.9113652060471185, "grad_norm": 0.4854401648044586, "learning_rate": 1.3233143130914854e-05, "loss": 0.0351, "step": 59380 }, { "epoch": 0.9115186862098074, "grad_norm": 0.26687633991241455, "learning_rate": 1.3230608151208583e-05, "loss": 0.035, "step": 59390 }, { "epoch": 0.9116721663724964, "grad_norm": 0.2392185777425766, "learning_rate": 1.3228072939686256e-05, "loss": 0.0316, "step": 59400 }, { "epoch": 0.9118256465351853, "grad_norm": 0.3592475652694702, "learning_rate": 1.3225537496529786e-05, "loss": 0.0339, "step": 59410 }, { "epoch": 0.9119791266978743, "grad_norm": 0.4948042631149292, "learning_rate": 1.3223001821921103e-05, "loss": 0.0353, "step": 59420 }, { "epoch": 0.9121326068605633, "grad_norm": 0.6236250996589661, "learning_rate": 1.3220465916042169e-05, "loss": 0.0287, "step": 59430 }, { "epoch": 0.9122860870232522, "grad_norm": 0.46751007437705994, "learning_rate": 1.321792977907494e-05, "loss": 0.0537, "step": 59440 }, { "epoch": 0.9124395671859412, "grad_norm": 0.26760831475257874, "learning_rate": 1.3215393411201402e-05, "loss": 0.0321, "step": 59450 }, { "epoch": 0.9125930473486302, "grad_norm": 0.3188903331756592, "learning_rate": 1.3212856812603558e-05, "loss": 0.0337, "step": 59460 }, { "epoch": 0.9127465275113191, "grad_norm": 0.4036185145378113, "learning_rate": 1.3210319983463422e-05, "loss": 0.0354, "step": 59470 }, { "epoch": 0.9129000076740081, "grad_norm": 0.46916496753692627, "learning_rate": 1.3207782923963029e-05, "loss": 0.0344, "step": 59480 }, { "epoch": 0.9130534878366972, "grad_norm": 0.41940802335739136, "learning_rate": 1.3205245634284427e-05, "loss": 0.0332, "step": 59490 }, { "epoch": 0.913206967999386, "grad_norm": 0.33697229623794556, "learning_rate": 1.3202708114609684e-05, "loss": 0.0301, "step": 59500 }, { "epoch": 0.9133604481620751, "grad_norm": 0.4750324785709381, "learning_rate": 1.320017036512088e-05, "loss": 0.0327, "step": 59510 }, { "epoch": 0.9135139283247641, "grad_norm": 0.4309529960155487, "learning_rate": 1.3197632386000112e-05, "loss": 0.0311, "step": 59520 }, { "epoch": 0.913667408487453, "grad_norm": 0.44299575686454773, "learning_rate": 1.3195094177429505e-05, "loss": 0.0422, "step": 59530 }, { "epoch": 0.913820888650142, "grad_norm": 0.3902227580547333, "learning_rate": 1.3192555739591182e-05, "loss": 0.0486, "step": 59540 }, { "epoch": 0.9139743688128309, "grad_norm": 0.45522192120552063, "learning_rate": 1.3190017072667298e-05, "loss": 0.0356, "step": 59550 }, { "epoch": 0.9141278489755199, "grad_norm": 0.3868124186992645, "learning_rate": 1.3187478176840012e-05, "loss": 0.0282, "step": 59560 }, { "epoch": 0.9142813291382089, "grad_norm": 0.3337736427783966, "learning_rate": 1.3184939052291508e-05, "loss": 0.025, "step": 59570 }, { "epoch": 0.9144348093008978, "grad_norm": 0.43628036975860596, "learning_rate": 1.3182399699203985e-05, "loss": 0.041, "step": 59580 }, { "epoch": 0.9145882894635868, "grad_norm": 0.3115597367286682, "learning_rate": 1.3179860117759658e-05, "loss": 0.0293, "step": 59590 }, { "epoch": 0.9147417696262758, "grad_norm": 0.33417773246765137, "learning_rate": 1.3177320308140755e-05, "loss": 0.0333, "step": 59600 }, { "epoch": 0.9148952497889647, "grad_norm": 0.5109925270080566, "learning_rate": 1.3174780270529523e-05, "loss": 0.0413, "step": 59610 }, { "epoch": 0.9150487299516538, "grad_norm": 0.4897007644176483, "learning_rate": 1.3172240005108227e-05, "loss": 0.0487, "step": 59620 }, { "epoch": 0.9152022101143428, "grad_norm": 0.3135022819042206, "learning_rate": 1.316969951205915e-05, "loss": 0.0291, "step": 59630 }, { "epoch": 0.9153556902770317, "grad_norm": 0.5286251306533813, "learning_rate": 1.316715879156458e-05, "loss": 0.0352, "step": 59640 }, { "epoch": 0.9155091704397207, "grad_norm": 0.37437406182289124, "learning_rate": 1.3164617843806835e-05, "loss": 0.0326, "step": 59650 }, { "epoch": 0.9156626506024096, "grad_norm": 0.5217564702033997, "learning_rate": 1.316207666896824e-05, "loss": 0.0341, "step": 59660 }, { "epoch": 0.9158161307650986, "grad_norm": 0.33656245470046997, "learning_rate": 1.3159535267231145e-05, "loss": 0.0298, "step": 59670 }, { "epoch": 0.9159696109277876, "grad_norm": 0.5138997435569763, "learning_rate": 1.3156993638777907e-05, "loss": 0.0386, "step": 59680 }, { "epoch": 0.9161230910904765, "grad_norm": 0.3865245580673218, "learning_rate": 1.3154451783790906e-05, "loss": 0.0304, "step": 59690 }, { "epoch": 0.9162765712531655, "grad_norm": 0.3815128803253174, "learning_rate": 1.3151909702452534e-05, "loss": 0.0311, "step": 59700 }, { "epoch": 0.9164300514158545, "grad_norm": 0.46396633982658386, "learning_rate": 1.3149367394945203e-05, "loss": 0.0298, "step": 59710 }, { "epoch": 0.9165835315785434, "grad_norm": 0.4508172869682312, "learning_rate": 1.3146824861451335e-05, "loss": 0.0347, "step": 59720 }, { "epoch": 0.9167370117412325, "grad_norm": 0.3117154836654663, "learning_rate": 1.314428210215338e-05, "loss": 0.0288, "step": 59730 }, { "epoch": 0.9168904919039215, "grad_norm": 0.3893009424209595, "learning_rate": 1.3141739117233786e-05, "loss": 0.0278, "step": 59740 }, { "epoch": 0.9170439720666104, "grad_norm": 0.38029104471206665, "learning_rate": 1.3139195906875041e-05, "loss": 0.0407, "step": 59750 }, { "epoch": 0.9171974522292994, "grad_norm": 0.32481899857521057, "learning_rate": 1.3136652471259624e-05, "loss": 0.0343, "step": 59760 }, { "epoch": 0.9173509323919883, "grad_norm": 0.3710475564002991, "learning_rate": 1.313410881057005e-05, "loss": 0.0278, "step": 59770 }, { "epoch": 0.9175044125546773, "grad_norm": 0.4456966817378998, "learning_rate": 1.3131564924988838e-05, "loss": 0.0333, "step": 59780 }, { "epoch": 0.9176578927173663, "grad_norm": 0.5460444092750549, "learning_rate": 1.3129020814698527e-05, "loss": 0.0358, "step": 59790 }, { "epoch": 0.9178113728800552, "grad_norm": 0.3311873972415924, "learning_rate": 1.312647647988168e-05, "loss": 0.0334, "step": 59800 }, { "epoch": 0.9179648530427442, "grad_norm": 0.24370914697647095, "learning_rate": 1.3123931920720861e-05, "loss": 0.0367, "step": 59810 }, { "epoch": 0.9181183332054332, "grad_norm": 0.35895979404449463, "learning_rate": 1.312138713739866e-05, "loss": 0.0383, "step": 59820 }, { "epoch": 0.9182718133681221, "grad_norm": 0.5034216046333313, "learning_rate": 1.311884213009768e-05, "loss": 0.0312, "step": 59830 }, { "epoch": 0.9184252935308111, "grad_norm": 0.354068398475647, "learning_rate": 1.311629689900054e-05, "loss": 0.0344, "step": 59840 }, { "epoch": 0.9185787736935002, "grad_norm": 0.6354578137397766, "learning_rate": 1.3113751444289884e-05, "loss": 0.0429, "step": 59850 }, { "epoch": 0.9187322538561891, "grad_norm": 0.4670525789260864, "learning_rate": 1.3111205766148354e-05, "loss": 0.0319, "step": 59860 }, { "epoch": 0.9188857340188781, "grad_norm": 0.4413506090641022, "learning_rate": 1.3108659864758621e-05, "loss": 0.0358, "step": 59870 }, { "epoch": 0.9190392141815671, "grad_norm": 0.4407128393650055, "learning_rate": 1.3106113740303374e-05, "loss": 0.0339, "step": 59880 }, { "epoch": 0.919192694344256, "grad_norm": 0.4161911904811859, "learning_rate": 1.3103567392965309e-05, "loss": 0.0392, "step": 59890 }, { "epoch": 0.919346174506945, "grad_norm": 0.3830898404121399, "learning_rate": 1.310102082292714e-05, "loss": 0.0415, "step": 59900 }, { "epoch": 0.9194996546696339, "grad_norm": 0.26359739899635315, "learning_rate": 1.3098474030371604e-05, "loss": 0.0406, "step": 59910 }, { "epoch": 0.9196531348323229, "grad_norm": 0.42038923501968384, "learning_rate": 1.3095927015481445e-05, "loss": 0.0392, "step": 59920 }, { "epoch": 0.9198066149950119, "grad_norm": 0.5030753016471863, "learning_rate": 1.3093379778439433e-05, "loss": 0.0402, "step": 59930 }, { "epoch": 0.9199600951577008, "grad_norm": 0.3089680075645447, "learning_rate": 1.3090832319428338e-05, "loss": 0.0311, "step": 59940 }, { "epoch": 0.9201135753203898, "grad_norm": 0.4135497212409973, "learning_rate": 1.3088284638630966e-05, "loss": 0.0321, "step": 59950 }, { "epoch": 0.9202670554830789, "grad_norm": 0.3903934061527252, "learning_rate": 1.3085736736230122e-05, "loss": 0.0371, "step": 59960 }, { "epoch": 0.9204205356457678, "grad_norm": 0.4468834102153778, "learning_rate": 1.3083188612408635e-05, "loss": 0.0338, "step": 59970 }, { "epoch": 0.9205740158084568, "grad_norm": 0.6951948404312134, "learning_rate": 1.3080640267349356e-05, "loss": 0.0415, "step": 59980 }, { "epoch": 0.9207274959711458, "grad_norm": 0.3923472762107849, "learning_rate": 1.3078091701235131e-05, "loss": 0.0336, "step": 59990 }, { "epoch": 0.9208809761338347, "grad_norm": 0.44010889530181885, "learning_rate": 1.3075542914248847e-05, "loss": 0.0387, "step": 60000 }, { "epoch": 0.9210344562965237, "grad_norm": 0.3204006552696228, "learning_rate": 1.307299390657339e-05, "loss": 0.0337, "step": 60010 }, { "epoch": 0.9211879364592126, "grad_norm": 0.5304081439971924, "learning_rate": 1.3070444678391669e-05, "loss": 0.0435, "step": 60020 }, { "epoch": 0.9213414166219016, "grad_norm": 0.38237887620925903, "learning_rate": 1.3067895229886602e-05, "loss": 0.0344, "step": 60030 }, { "epoch": 0.9214948967845906, "grad_norm": 0.3029469847679138, "learning_rate": 1.3065345561241134e-05, "loss": 0.0374, "step": 60040 }, { "epoch": 0.9216483769472795, "grad_norm": 0.36677730083465576, "learning_rate": 1.3062795672638217e-05, "loss": 0.0358, "step": 60050 }, { "epoch": 0.9218018571099685, "grad_norm": 0.34324315190315247, "learning_rate": 1.306024556426082e-05, "loss": 0.0324, "step": 60060 }, { "epoch": 0.9219553372726575, "grad_norm": 0.3590920567512512, "learning_rate": 1.3057695236291931e-05, "loss": 0.0306, "step": 60070 }, { "epoch": 0.9221088174353465, "grad_norm": 0.348197877407074, "learning_rate": 1.305514468891455e-05, "loss": 0.0339, "step": 60080 }, { "epoch": 0.9222622975980355, "grad_norm": 0.4648938775062561, "learning_rate": 1.3052593922311695e-05, "loss": 0.0337, "step": 60090 }, { "epoch": 0.9224157777607245, "grad_norm": 0.5032097101211548, "learning_rate": 1.3050042936666401e-05, "loss": 0.0367, "step": 60100 }, { "epoch": 0.9225692579234134, "grad_norm": 0.25494539737701416, "learning_rate": 1.3047491732161714e-05, "loss": 0.0376, "step": 60110 }, { "epoch": 0.9227227380861024, "grad_norm": 0.3379794955253601, "learning_rate": 1.3044940308980701e-05, "loss": 0.0355, "step": 60120 }, { "epoch": 0.9228762182487913, "grad_norm": 0.4546125829219818, "learning_rate": 1.304238866730644e-05, "loss": 0.0352, "step": 60130 }, { "epoch": 0.9230296984114803, "grad_norm": 0.4633415937423706, "learning_rate": 1.3039836807322029e-05, "loss": 0.0293, "step": 60140 }, { "epoch": 0.9231831785741693, "grad_norm": 0.40495777130126953, "learning_rate": 1.303728472921058e-05, "loss": 0.0451, "step": 60150 }, { "epoch": 0.9233366587368582, "grad_norm": 0.375275194644928, "learning_rate": 1.3034732433155217e-05, "loss": 0.0305, "step": 60160 }, { "epoch": 0.9234901388995472, "grad_norm": 0.38276124000549316, "learning_rate": 1.3032179919339091e-05, "loss": 0.033, "step": 60170 }, { "epoch": 0.9236436190622362, "grad_norm": 0.2728043496608734, "learning_rate": 1.302962718794535e-05, "loss": 0.0365, "step": 60180 }, { "epoch": 0.9237970992249251, "grad_norm": 0.26678651571273804, "learning_rate": 1.3027074239157176e-05, "loss": 0.0372, "step": 60190 }, { "epoch": 0.9239505793876142, "grad_norm": 0.367906779050827, "learning_rate": 1.3024521073157756e-05, "loss": 0.0328, "step": 60200 }, { "epoch": 0.9241040595503032, "grad_norm": 0.42948684096336365, "learning_rate": 1.3021967690130293e-05, "loss": 0.0339, "step": 60210 }, { "epoch": 0.9242575397129921, "grad_norm": 0.5055098533630371, "learning_rate": 1.3019414090258016e-05, "loss": 0.0357, "step": 60220 }, { "epoch": 0.9244110198756811, "grad_norm": 0.35734274983406067, "learning_rate": 1.301686027372415e-05, "loss": 0.03, "step": 60230 }, { "epoch": 0.92456450003837, "grad_norm": 0.3575472831726074, "learning_rate": 1.3014306240711956e-05, "loss": 0.0292, "step": 60240 }, { "epoch": 0.924717980201059, "grad_norm": 0.5483100414276123, "learning_rate": 1.30117519914047e-05, "loss": 0.0417, "step": 60250 }, { "epoch": 0.924871460363748, "grad_norm": 0.25106367468833923, "learning_rate": 1.3009197525985662e-05, "loss": 0.0326, "step": 60260 }, { "epoch": 0.9250249405264369, "grad_norm": 0.3489813506603241, "learning_rate": 1.3006642844638143e-05, "loss": 0.0349, "step": 60270 }, { "epoch": 0.9251784206891259, "grad_norm": 0.37295231223106384, "learning_rate": 1.3004087947545455e-05, "loss": 0.0335, "step": 60280 }, { "epoch": 0.9253319008518149, "grad_norm": 0.3577422797679901, "learning_rate": 1.3001532834890932e-05, "loss": 0.0327, "step": 60290 }, { "epoch": 0.9254853810145038, "grad_norm": 0.3125772476196289, "learning_rate": 1.2998977506857916e-05, "loss": 0.032, "step": 60300 }, { "epoch": 0.9256388611771929, "grad_norm": 0.4677307903766632, "learning_rate": 1.2996421963629769e-05, "loss": 0.039, "step": 60310 }, { "epoch": 0.9257923413398819, "grad_norm": 0.48267120122909546, "learning_rate": 1.2993866205389865e-05, "loss": 0.0385, "step": 60320 }, { "epoch": 0.9259458215025708, "grad_norm": 0.331800252199173, "learning_rate": 1.2991310232321594e-05, "loss": 0.0371, "step": 60330 }, { "epoch": 0.9260993016652598, "grad_norm": 0.40008744597435, "learning_rate": 1.2988754044608368e-05, "loss": 0.036, "step": 60340 }, { "epoch": 0.9262527818279488, "grad_norm": 0.44565021991729736, "learning_rate": 1.2986197642433607e-05, "loss": 0.0359, "step": 60350 }, { "epoch": 0.9264062619906377, "grad_norm": 0.39935582876205444, "learning_rate": 1.2983641025980746e-05, "loss": 0.0322, "step": 60360 }, { "epoch": 0.9265597421533267, "grad_norm": 1.0512182712554932, "learning_rate": 1.2981084195433242e-05, "loss": 0.0416, "step": 60370 }, { "epoch": 0.9267132223160156, "grad_norm": 0.4791205823421478, "learning_rate": 1.297852715097456e-05, "loss": 0.0435, "step": 60380 }, { "epoch": 0.9268667024787046, "grad_norm": 0.5192086100578308, "learning_rate": 1.2975969892788186e-05, "loss": 0.0301, "step": 60390 }, { "epoch": 0.9270201826413936, "grad_norm": 0.2709321677684784, "learning_rate": 1.2973412421057616e-05, "loss": 0.0292, "step": 60400 }, { "epoch": 0.9271736628040825, "grad_norm": 0.5809021592140198, "learning_rate": 1.297085473596637e-05, "loss": 0.0363, "step": 60410 }, { "epoch": 0.9273271429667715, "grad_norm": 0.40554267168045044, "learning_rate": 1.2968296837697973e-05, "loss": 0.0323, "step": 60420 }, { "epoch": 0.9274806231294606, "grad_norm": 0.445139616727829, "learning_rate": 1.296573872643597e-05, "loss": 0.0439, "step": 60430 }, { "epoch": 0.9276341032921495, "grad_norm": 0.4832255244255066, "learning_rate": 1.2963180402363925e-05, "loss": 0.0388, "step": 60440 }, { "epoch": 0.9277875834548385, "grad_norm": 0.6928964853286743, "learning_rate": 1.2960621865665409e-05, "loss": 0.0379, "step": 60450 }, { "epoch": 0.9279410636175275, "grad_norm": 0.6242285966873169, "learning_rate": 1.2958063116524016e-05, "loss": 0.0469, "step": 60460 }, { "epoch": 0.9280945437802164, "grad_norm": 0.5434939861297607, "learning_rate": 1.2955504155123348e-05, "loss": 0.0369, "step": 60470 }, { "epoch": 0.9282480239429054, "grad_norm": 0.38919878005981445, "learning_rate": 1.2952944981647033e-05, "loss": 0.0398, "step": 60480 }, { "epoch": 0.9284015041055943, "grad_norm": 0.3083818256855011, "learning_rate": 1.2950385596278699e-05, "loss": 0.0289, "step": 60490 }, { "epoch": 0.9285549842682833, "grad_norm": 0.35378560423851013, "learning_rate": 1.2947825999202006e-05, "loss": 0.0514, "step": 60500 }, { "epoch": 0.9287084644309723, "grad_norm": 0.6216208934783936, "learning_rate": 1.2945266190600616e-05, "loss": 0.0433, "step": 60510 }, { "epoch": 0.9288619445936612, "grad_norm": 0.24462716281414032, "learning_rate": 1.2942706170658213e-05, "loss": 0.04, "step": 60520 }, { "epoch": 0.9290154247563502, "grad_norm": 0.5530585646629333, "learning_rate": 1.2940145939558493e-05, "loss": 0.0359, "step": 60530 }, { "epoch": 0.9291689049190393, "grad_norm": 0.41422271728515625, "learning_rate": 1.2937585497485168e-05, "loss": 0.0334, "step": 60540 }, { "epoch": 0.9293223850817282, "grad_norm": 0.31997543573379517, "learning_rate": 1.2935024844621966e-05, "loss": 0.0361, "step": 60550 }, { "epoch": 0.9294758652444172, "grad_norm": 0.4382784962654114, "learning_rate": 1.2932463981152633e-05, "loss": 0.0328, "step": 60560 }, { "epoch": 0.9296293454071062, "grad_norm": 0.33816128969192505, "learning_rate": 1.292990290726092e-05, "loss": 0.0436, "step": 60570 }, { "epoch": 0.9297828255697951, "grad_norm": 0.6545005440711975, "learning_rate": 1.2927341623130605e-05, "loss": 0.0407, "step": 60580 }, { "epoch": 0.9299363057324841, "grad_norm": 0.281298965215683, "learning_rate": 1.2924780128945473e-05, "loss": 0.0358, "step": 60590 }, { "epoch": 0.930089785895173, "grad_norm": 0.8046485781669617, "learning_rate": 1.2922218424889332e-05, "loss": 0.0442, "step": 60600 }, { "epoch": 0.930243266057862, "grad_norm": 0.41096770763397217, "learning_rate": 1.2919656511145995e-05, "loss": 0.0319, "step": 60610 }, { "epoch": 0.930396746220551, "grad_norm": 0.4089379608631134, "learning_rate": 1.2917094387899296e-05, "loss": 0.0324, "step": 60620 }, { "epoch": 0.9305502263832399, "grad_norm": 0.3340871036052704, "learning_rate": 1.2914532055333082e-05, "loss": 0.022, "step": 60630 }, { "epoch": 0.9307037065459289, "grad_norm": 0.38481801748275757, "learning_rate": 1.2911969513631223e-05, "loss": 0.0332, "step": 60640 }, { "epoch": 0.930857186708618, "grad_norm": 0.32638686895370483, "learning_rate": 1.2909406762977587e-05, "loss": 0.0381, "step": 60650 }, { "epoch": 0.9310106668713068, "grad_norm": 0.2874877154827118, "learning_rate": 1.2906843803556073e-05, "loss": 0.0448, "step": 60660 }, { "epoch": 0.9311641470339959, "grad_norm": 0.27322787046432495, "learning_rate": 1.2904280635550592e-05, "loss": 0.0336, "step": 60670 }, { "epoch": 0.9313176271966849, "grad_norm": 0.5143307447433472, "learning_rate": 1.2901717259145058e-05, "loss": 0.0376, "step": 60680 }, { "epoch": 0.9314711073593738, "grad_norm": 0.3496617078781128, "learning_rate": 1.2899153674523422e-05, "loss": 0.0362, "step": 60690 }, { "epoch": 0.9316245875220628, "grad_norm": 0.3492899537086487, "learning_rate": 1.2896589881869623e-05, "loss": 0.03, "step": 60700 }, { "epoch": 0.9317780676847518, "grad_norm": 0.3644510805606842, "learning_rate": 1.2894025881367638e-05, "loss": 0.0349, "step": 60710 }, { "epoch": 0.9319315478474407, "grad_norm": 0.37360814213752747, "learning_rate": 1.2891461673201451e-05, "loss": 0.0327, "step": 60720 }, { "epoch": 0.9320850280101297, "grad_norm": 0.3871225118637085, "learning_rate": 1.2888897257555048e-05, "loss": 0.0365, "step": 60730 }, { "epoch": 0.9322385081728186, "grad_norm": 0.327072411775589, "learning_rate": 1.2886332634612458e-05, "loss": 0.037, "step": 60740 }, { "epoch": 0.9323919883355076, "grad_norm": 0.33725160360336304, "learning_rate": 1.2883767804557696e-05, "loss": 0.0289, "step": 60750 }, { "epoch": 0.9325454684981966, "grad_norm": 0.5451302528381348, "learning_rate": 1.2881202767574808e-05, "loss": 0.0431, "step": 60760 }, { "epoch": 0.9326989486608855, "grad_norm": 0.49603691697120667, "learning_rate": 1.2878637523847855e-05, "loss": 0.0343, "step": 60770 }, { "epoch": 0.9328524288235746, "grad_norm": 0.35147589445114136, "learning_rate": 1.2876072073560904e-05, "loss": 0.0323, "step": 60780 }, { "epoch": 0.9330059089862636, "grad_norm": 0.495137482881546, "learning_rate": 1.287350641689804e-05, "loss": 0.0452, "step": 60790 }, { "epoch": 0.9331593891489525, "grad_norm": 0.41166403889656067, "learning_rate": 1.2870940554043377e-05, "loss": 0.0418, "step": 60800 }, { "epoch": 0.9333128693116415, "grad_norm": 0.39532142877578735, "learning_rate": 1.2868374485181017e-05, "loss": 0.0396, "step": 60810 }, { "epoch": 0.9334663494743305, "grad_norm": 0.35858985781669617, "learning_rate": 1.2865808210495099e-05, "loss": 0.0333, "step": 60820 }, { "epoch": 0.9336198296370194, "grad_norm": 0.3926067650318146, "learning_rate": 1.286324173016977e-05, "loss": 0.0337, "step": 60830 }, { "epoch": 0.9337733097997084, "grad_norm": 0.4400257468223572, "learning_rate": 1.2860675044389182e-05, "loss": 0.038, "step": 60840 }, { "epoch": 0.9339267899623973, "grad_norm": 0.41178226470947266, "learning_rate": 1.2858108153337522e-05, "loss": 0.0317, "step": 60850 }, { "epoch": 0.9340802701250863, "grad_norm": 0.3866123557090759, "learning_rate": 1.2855541057198972e-05, "loss": 0.0349, "step": 60860 }, { "epoch": 0.9342337502877753, "grad_norm": 0.31023985147476196, "learning_rate": 1.285297375615774e-05, "loss": 0.0376, "step": 60870 }, { "epoch": 0.9343872304504642, "grad_norm": 0.4549044370651245, "learning_rate": 1.2850406250398048e-05, "loss": 0.032, "step": 60880 }, { "epoch": 0.9345407106131532, "grad_norm": 0.5545321702957153, "learning_rate": 1.2847838540104125e-05, "loss": 0.0336, "step": 60890 }, { "epoch": 0.9346941907758423, "grad_norm": 0.3164960741996765, "learning_rate": 1.2845270625460226e-05, "loss": 0.0337, "step": 60900 }, { "epoch": 0.9348476709385312, "grad_norm": 0.38443395495414734, "learning_rate": 1.2842702506650611e-05, "loss": 0.0322, "step": 60910 }, { "epoch": 0.9350011511012202, "grad_norm": 0.4002346098423004, "learning_rate": 1.2840134183859556e-05, "loss": 0.0297, "step": 60920 }, { "epoch": 0.9351546312639092, "grad_norm": 0.4168824851512909, "learning_rate": 1.283756565727136e-05, "loss": 0.0397, "step": 60930 }, { "epoch": 0.9353081114265981, "grad_norm": 0.4151267409324646, "learning_rate": 1.2834996927070326e-05, "loss": 0.0422, "step": 60940 }, { "epoch": 0.9354615915892871, "grad_norm": 0.33667150139808655, "learning_rate": 1.283242799344078e-05, "loss": 0.0429, "step": 60950 }, { "epoch": 0.935615071751976, "grad_norm": 0.34375643730163574, "learning_rate": 1.2829858856567054e-05, "loss": 0.035, "step": 60960 }, { "epoch": 0.935768551914665, "grad_norm": 0.5933964848518372, "learning_rate": 1.2827289516633506e-05, "loss": 0.0414, "step": 60970 }, { "epoch": 0.935922032077354, "grad_norm": 0.5386691093444824, "learning_rate": 1.2824719973824496e-05, "loss": 0.0361, "step": 60980 }, { "epoch": 0.9360755122400429, "grad_norm": 0.2862400412559509, "learning_rate": 1.2822150228324406e-05, "loss": 0.0261, "step": 60990 }, { "epoch": 0.9362289924027319, "grad_norm": 0.35921695828437805, "learning_rate": 1.2819580280317631e-05, "loss": 0.0336, "step": 61000 }, { "epoch": 0.9362289924027319, "eval_loss": 0.023720689117908478, "eval_runtime": 4.8147, "eval_samples_per_second": 41.539, "eval_steps_per_second": 20.77, "step": 61000 }, { "epoch": 0.936382472565421, "grad_norm": 0.4193841814994812, "learning_rate": 1.2817010129988585e-05, "loss": 0.0406, "step": 61010 }, { "epoch": 0.9365359527281099, "grad_norm": 0.29001790285110474, "learning_rate": 1.2814439777521685e-05, "loss": 0.026, "step": 61020 }, { "epoch": 0.9366894328907989, "grad_norm": 0.40122905373573303, "learning_rate": 1.2811869223101377e-05, "loss": 0.0395, "step": 61030 }, { "epoch": 0.9368429130534879, "grad_norm": 0.297353595495224, "learning_rate": 1.2809298466912105e-05, "loss": 0.0332, "step": 61040 }, { "epoch": 0.9369963932161768, "grad_norm": 0.3378528654575348, "learning_rate": 1.2806727509138346e-05, "loss": 0.0324, "step": 61050 }, { "epoch": 0.9371498733788658, "grad_norm": 0.32521799206733704, "learning_rate": 1.2804156349964578e-05, "loss": 0.0358, "step": 61060 }, { "epoch": 0.9373033535415548, "grad_norm": 0.4352814257144928, "learning_rate": 1.2801584989575298e-05, "loss": 0.034, "step": 61070 }, { "epoch": 0.9374568337042437, "grad_norm": 0.4009550213813782, "learning_rate": 1.2799013428155012e-05, "loss": 0.0359, "step": 61080 }, { "epoch": 0.9376103138669327, "grad_norm": 0.5055169463157654, "learning_rate": 1.2796441665888254e-05, "loss": 0.034, "step": 61090 }, { "epoch": 0.9377637940296216, "grad_norm": 0.2957785427570343, "learning_rate": 1.2793869702959558e-05, "loss": 0.0348, "step": 61100 }, { "epoch": 0.9379172741923106, "grad_norm": 0.3072485029697418, "learning_rate": 1.2791297539553483e-05, "loss": 0.0328, "step": 61110 }, { "epoch": 0.9380707543549996, "grad_norm": 0.3650927245616913, "learning_rate": 1.278872517585459e-05, "loss": 0.0365, "step": 61120 }, { "epoch": 0.9382242345176885, "grad_norm": 0.3447363078594208, "learning_rate": 1.2786152612047466e-05, "loss": 0.0283, "step": 61130 }, { "epoch": 0.9383777146803776, "grad_norm": 0.3852880895137787, "learning_rate": 1.2783579848316714e-05, "loss": 0.032, "step": 61140 }, { "epoch": 0.9385311948430666, "grad_norm": 0.5975598096847534, "learning_rate": 1.2781006884846939e-05, "loss": 0.0379, "step": 61150 }, { "epoch": 0.9386846750057555, "grad_norm": 0.29935726523399353, "learning_rate": 1.2778433721822766e-05, "loss": 0.0421, "step": 61160 }, { "epoch": 0.9388381551684445, "grad_norm": 0.4048680365085602, "learning_rate": 1.277586035942884e-05, "loss": 0.0347, "step": 61170 }, { "epoch": 0.9389916353311335, "grad_norm": 0.2899278700351715, "learning_rate": 1.2773286797849812e-05, "loss": 0.029, "step": 61180 }, { "epoch": 0.9391451154938224, "grad_norm": 0.49587830901145935, "learning_rate": 1.2770713037270351e-05, "loss": 0.0388, "step": 61190 }, { "epoch": 0.9392985956565114, "grad_norm": 0.41033998131752014, "learning_rate": 1.2768139077875144e-05, "loss": 0.0375, "step": 61200 }, { "epoch": 0.9394520758192003, "grad_norm": 0.3517768979072571, "learning_rate": 1.2765564919848883e-05, "loss": 0.0329, "step": 61210 }, { "epoch": 0.9396055559818893, "grad_norm": 0.45392918586730957, "learning_rate": 1.2762990563376288e-05, "loss": 0.0343, "step": 61220 }, { "epoch": 0.9397590361445783, "grad_norm": 0.3807833194732666, "learning_rate": 1.2760416008642073e-05, "loss": 0.0387, "step": 61230 }, { "epoch": 0.9399125163072672, "grad_norm": 0.4615933299064636, "learning_rate": 1.275784125583099e-05, "loss": 0.0357, "step": 61240 }, { "epoch": 0.9400659964699563, "grad_norm": 0.2787884473800659, "learning_rate": 1.2755266305127787e-05, "loss": 0.0343, "step": 61250 }, { "epoch": 0.9402194766326453, "grad_norm": 0.4098021388053894, "learning_rate": 1.2752691156717232e-05, "loss": 0.0389, "step": 61260 }, { "epoch": 0.9403729567953342, "grad_norm": 0.29374757409095764, "learning_rate": 1.2750115810784112e-05, "loss": 0.0333, "step": 61270 }, { "epoch": 0.9405264369580232, "grad_norm": 0.2933919131755829, "learning_rate": 1.2747540267513219e-05, "loss": 0.0409, "step": 61280 }, { "epoch": 0.9406799171207122, "grad_norm": 0.3077133595943451, "learning_rate": 1.2744964527089368e-05, "loss": 0.0338, "step": 61290 }, { "epoch": 0.9408333972834011, "grad_norm": 0.7778107523918152, "learning_rate": 1.2742388589697383e-05, "loss": 0.0341, "step": 61300 }, { "epoch": 0.9409868774460901, "grad_norm": 0.48261016607284546, "learning_rate": 1.2739812455522102e-05, "loss": 0.0422, "step": 61310 }, { "epoch": 0.941140357608779, "grad_norm": 0.33849942684173584, "learning_rate": 1.2737236124748384e-05, "loss": 0.0396, "step": 61320 }, { "epoch": 0.941293837771468, "grad_norm": 0.39642035961151123, "learning_rate": 1.2734659597561088e-05, "loss": 0.0368, "step": 61330 }, { "epoch": 0.941447317934157, "grad_norm": 0.33522242307662964, "learning_rate": 1.27320828741451e-05, "loss": 0.027, "step": 61340 }, { "epoch": 0.9416007980968459, "grad_norm": 0.4395402669906616, "learning_rate": 1.2729505954685317e-05, "loss": 0.035, "step": 61350 }, { "epoch": 0.941754278259535, "grad_norm": 0.3165808618068695, "learning_rate": 1.2726928839366649e-05, "loss": 0.0326, "step": 61360 }, { "epoch": 0.941907758422224, "grad_norm": 0.3413073718547821, "learning_rate": 1.2724351528374017e-05, "loss": 0.0278, "step": 61370 }, { "epoch": 0.9420612385849129, "grad_norm": 0.36554163694381714, "learning_rate": 1.2721774021892364e-05, "loss": 0.0355, "step": 61380 }, { "epoch": 0.9422147187476019, "grad_norm": 0.4504958391189575, "learning_rate": 1.2719196320106633e-05, "loss": 0.0325, "step": 61390 }, { "epoch": 0.9423681989102909, "grad_norm": 0.412327378988266, "learning_rate": 1.2716618423201801e-05, "loss": 0.0399, "step": 61400 }, { "epoch": 0.9425216790729798, "grad_norm": 0.46201539039611816, "learning_rate": 1.271404033136284e-05, "loss": 0.0403, "step": 61410 }, { "epoch": 0.9426751592356688, "grad_norm": 0.6046605110168457, "learning_rate": 1.2711462044774747e-05, "loss": 0.0407, "step": 61420 }, { "epoch": 0.9428286393983578, "grad_norm": 0.2991393804550171, "learning_rate": 1.270888356362253e-05, "loss": 0.0313, "step": 61430 }, { "epoch": 0.9429821195610467, "grad_norm": 0.4227268397808075, "learning_rate": 1.2706304888091209e-05, "loss": 0.0359, "step": 61440 }, { "epoch": 0.9431355997237357, "grad_norm": 0.3478432595729828, "learning_rate": 1.2703726018365825e-05, "loss": 0.027, "step": 61450 }, { "epoch": 0.9432890798864246, "grad_norm": 0.3928478956222534, "learning_rate": 1.270114695463142e-05, "loss": 0.0339, "step": 61460 }, { "epoch": 0.9434425600491136, "grad_norm": 0.337910532951355, "learning_rate": 1.2698567697073066e-05, "loss": 0.0281, "step": 61470 }, { "epoch": 0.9435960402118027, "grad_norm": 0.33848804235458374, "learning_rate": 1.2695988245875835e-05, "loss": 0.0315, "step": 61480 }, { "epoch": 0.9437495203744916, "grad_norm": 0.42834287881851196, "learning_rate": 1.2693408601224817e-05, "loss": 0.0395, "step": 61490 }, { "epoch": 0.9439030005371806, "grad_norm": 0.3954629898071289, "learning_rate": 1.2690828763305123e-05, "loss": 0.0325, "step": 61500 }, { "epoch": 0.9440564806998696, "grad_norm": 0.39981526136398315, "learning_rate": 1.2688248732301872e-05, "loss": 0.031, "step": 61510 }, { "epoch": 0.9442099608625585, "grad_norm": 0.4030080735683441, "learning_rate": 1.2685668508400193e-05, "loss": 0.0289, "step": 61520 }, { "epoch": 0.9443634410252475, "grad_norm": 0.4127417802810669, "learning_rate": 1.2683088091785237e-05, "loss": 0.0301, "step": 61530 }, { "epoch": 0.9445169211879365, "grad_norm": 0.40100210905075073, "learning_rate": 1.2680507482642157e-05, "loss": 0.0424, "step": 61540 }, { "epoch": 0.9446704013506254, "grad_norm": 0.43726128339767456, "learning_rate": 1.267792668115614e-05, "loss": 0.0421, "step": 61550 }, { "epoch": 0.9448238815133144, "grad_norm": 0.2959340512752533, "learning_rate": 1.2675345687512364e-05, "loss": 0.0328, "step": 61560 }, { "epoch": 0.9449773616760033, "grad_norm": 0.4465321898460388, "learning_rate": 1.2672764501896035e-05, "loss": 0.0347, "step": 61570 }, { "epoch": 0.9451308418386923, "grad_norm": 0.39160582423210144, "learning_rate": 1.267018312449237e-05, "loss": 0.0379, "step": 61580 }, { "epoch": 0.9452843220013813, "grad_norm": 0.413323312997818, "learning_rate": 1.2667601555486599e-05, "loss": 0.036, "step": 61590 }, { "epoch": 0.9454378021640703, "grad_norm": 0.414630651473999, "learning_rate": 1.2665019795063963e-05, "loss": 0.0288, "step": 61600 }, { "epoch": 0.9455912823267593, "grad_norm": 0.4150051772594452, "learning_rate": 1.266243784340972e-05, "loss": 0.0324, "step": 61610 }, { "epoch": 0.9457447624894483, "grad_norm": 0.4599013924598694, "learning_rate": 1.2659855700709143e-05, "loss": 0.0408, "step": 61620 }, { "epoch": 0.9458982426521372, "grad_norm": 0.38049837946891785, "learning_rate": 1.2657273367147511e-05, "loss": 0.0321, "step": 61630 }, { "epoch": 0.9460517228148262, "grad_norm": 0.46542292833328247, "learning_rate": 1.2654690842910132e-05, "loss": 0.0329, "step": 61640 }, { "epoch": 0.9462052029775152, "grad_norm": 0.43397462368011475, "learning_rate": 1.2652108128182308e-05, "loss": 0.0433, "step": 61650 }, { "epoch": 0.9463586831402041, "grad_norm": 0.308613657951355, "learning_rate": 1.2649525223149373e-05, "loss": 0.0309, "step": 61660 }, { "epoch": 0.9465121633028931, "grad_norm": 0.4628499448299408, "learning_rate": 1.264694212799666e-05, "loss": 0.0346, "step": 61670 }, { "epoch": 0.946665643465582, "grad_norm": 0.3179755210876465, "learning_rate": 1.2644358842909524e-05, "loss": 0.0301, "step": 61680 }, { "epoch": 0.946819123628271, "grad_norm": 0.3464799225330353, "learning_rate": 1.2641775368073334e-05, "loss": 0.04, "step": 61690 }, { "epoch": 0.94697260379096, "grad_norm": 0.46717724204063416, "learning_rate": 1.2639191703673468e-05, "loss": 0.0339, "step": 61700 }, { "epoch": 0.947126083953649, "grad_norm": 0.36154791712760925, "learning_rate": 1.263660784989532e-05, "loss": 0.0367, "step": 61710 }, { "epoch": 0.947279564116338, "grad_norm": 0.525736927986145, "learning_rate": 1.2634023806924298e-05, "loss": 0.0363, "step": 61720 }, { "epoch": 0.947433044279027, "grad_norm": 0.3502610921859741, "learning_rate": 1.2631439574945821e-05, "loss": 0.0323, "step": 61730 }, { "epoch": 0.9475865244417159, "grad_norm": 0.32954487204551697, "learning_rate": 1.262885515414533e-05, "loss": 0.0342, "step": 61740 }, { "epoch": 0.9477400046044049, "grad_norm": 0.46511122584342957, "learning_rate": 1.2626270544708263e-05, "loss": 0.0285, "step": 61750 }, { "epoch": 0.9478934847670939, "grad_norm": 0.38023409247398376, "learning_rate": 1.262368574682009e-05, "loss": 0.0277, "step": 61760 }, { "epoch": 0.9480469649297828, "grad_norm": 0.3214097023010254, "learning_rate": 1.2621100760666285e-05, "loss": 0.0271, "step": 61770 }, { "epoch": 0.9482004450924718, "grad_norm": 0.5518763065338135, "learning_rate": 1.2618515586432332e-05, "loss": 0.0352, "step": 61780 }, { "epoch": 0.9483539252551608, "grad_norm": 0.269785612821579, "learning_rate": 1.2615930224303736e-05, "loss": 0.0357, "step": 61790 }, { "epoch": 0.9485074054178497, "grad_norm": 0.4468439817428589, "learning_rate": 1.2613344674466016e-05, "loss": 0.0363, "step": 61800 }, { "epoch": 0.9486608855805387, "grad_norm": 0.38381001353263855, "learning_rate": 1.2610758937104694e-05, "loss": 0.0301, "step": 61810 }, { "epoch": 0.9488143657432276, "grad_norm": 0.30855047702789307, "learning_rate": 1.2608173012405323e-05, "loss": 0.0531, "step": 61820 }, { "epoch": 0.9489678459059167, "grad_norm": 0.40818825364112854, "learning_rate": 1.2605586900553446e-05, "loss": 0.0346, "step": 61830 }, { "epoch": 0.9491213260686057, "grad_norm": 0.44596266746520996, "learning_rate": 1.2603000601734643e-05, "loss": 0.03, "step": 61840 }, { "epoch": 0.9492748062312946, "grad_norm": 0.36909806728363037, "learning_rate": 1.2600414116134492e-05, "loss": 0.0385, "step": 61850 }, { "epoch": 0.9494282863939836, "grad_norm": 0.5261135697364807, "learning_rate": 1.259782744393859e-05, "loss": 0.0508, "step": 61860 }, { "epoch": 0.9495817665566726, "grad_norm": 0.44602903723716736, "learning_rate": 1.2595240585332549e-05, "loss": 0.0395, "step": 61870 }, { "epoch": 0.9497352467193615, "grad_norm": 0.36004582047462463, "learning_rate": 1.2592653540501987e-05, "loss": 0.0346, "step": 61880 }, { "epoch": 0.9498887268820505, "grad_norm": 0.42389926314353943, "learning_rate": 1.2590066309632545e-05, "loss": 0.0332, "step": 61890 }, { "epoch": 0.9500422070447395, "grad_norm": 0.4749727249145508, "learning_rate": 1.2587478892909872e-05, "loss": 0.0347, "step": 61900 }, { "epoch": 0.9501956872074284, "grad_norm": 0.36463603377342224, "learning_rate": 1.258489129051963e-05, "loss": 0.0312, "step": 61910 }, { "epoch": 0.9503491673701174, "grad_norm": 0.3695419132709503, "learning_rate": 1.2582303502647496e-05, "loss": 0.0367, "step": 61920 }, { "epoch": 0.9505026475328063, "grad_norm": 0.41080421209335327, "learning_rate": 1.2579715529479159e-05, "loss": 0.0372, "step": 61930 }, { "epoch": 0.9506561276954953, "grad_norm": 0.28579312562942505, "learning_rate": 1.2577127371200325e-05, "loss": 0.0363, "step": 61940 }, { "epoch": 0.9508096078581844, "grad_norm": 0.4097406566143036, "learning_rate": 1.2574539027996707e-05, "loss": 0.0303, "step": 61950 }, { "epoch": 0.9509630880208733, "grad_norm": 0.421274334192276, "learning_rate": 1.2571950500054031e-05, "loss": 0.0385, "step": 61960 }, { "epoch": 0.9511165681835623, "grad_norm": 0.43778303265571594, "learning_rate": 1.2569361787558048e-05, "loss": 0.0322, "step": 61970 }, { "epoch": 0.9512700483462513, "grad_norm": 0.40409526228904724, "learning_rate": 1.256677289069451e-05, "loss": 0.0307, "step": 61980 }, { "epoch": 0.9514235285089402, "grad_norm": 0.5063966512680054, "learning_rate": 1.2564183809649188e-05, "loss": 0.0305, "step": 61990 }, { "epoch": 0.9515770086716292, "grad_norm": 0.508875846862793, "learning_rate": 1.2561594544607862e-05, "loss": 0.0395, "step": 62000 }, { "epoch": 0.9517304888343182, "grad_norm": 0.25818559527397156, "learning_rate": 1.255900509575633e-05, "loss": 0.026, "step": 62010 }, { "epoch": 0.9518839689970071, "grad_norm": 0.31925168633461, "learning_rate": 1.25564154632804e-05, "loss": 0.0406, "step": 62020 }, { "epoch": 0.9520374491596961, "grad_norm": 0.31873247027397156, "learning_rate": 1.2553825647365894e-05, "loss": 0.037, "step": 62030 }, { "epoch": 0.952190929322385, "grad_norm": 0.39665696024894714, "learning_rate": 1.2551235648198648e-05, "loss": 0.0307, "step": 62040 }, { "epoch": 0.952344409485074, "grad_norm": 0.4799191951751709, "learning_rate": 1.2548645465964507e-05, "loss": 0.0356, "step": 62050 }, { "epoch": 0.952497889647763, "grad_norm": 0.39459410309791565, "learning_rate": 1.2546055100849337e-05, "loss": 0.0333, "step": 62060 }, { "epoch": 0.952651369810452, "grad_norm": 0.5343741774559021, "learning_rate": 1.2543464553039012e-05, "loss": 0.0345, "step": 62070 }, { "epoch": 0.952804849973141, "grad_norm": 0.38878652453422546, "learning_rate": 1.2540873822719417e-05, "loss": 0.0339, "step": 62080 }, { "epoch": 0.95295833013583, "grad_norm": 0.2486916482448578, "learning_rate": 1.2538282910076457e-05, "loss": 0.031, "step": 62090 }, { "epoch": 0.9531118102985189, "grad_norm": 0.26284077763557434, "learning_rate": 1.2535691815296039e-05, "loss": 0.0319, "step": 62100 }, { "epoch": 0.9532652904612079, "grad_norm": 0.37091803550720215, "learning_rate": 1.2533100538564097e-05, "loss": 0.0376, "step": 62110 }, { "epoch": 0.9534187706238969, "grad_norm": 0.3635055422782898, "learning_rate": 1.2530509080066571e-05, "loss": 0.0321, "step": 62120 }, { "epoch": 0.9535722507865858, "grad_norm": 0.34863537549972534, "learning_rate": 1.252791743998941e-05, "loss": 0.0307, "step": 62130 }, { "epoch": 0.9537257309492748, "grad_norm": 0.34886932373046875, "learning_rate": 1.2525325618518585e-05, "loss": 0.0368, "step": 62140 }, { "epoch": 0.9538792111119638, "grad_norm": 0.5987924933433533, "learning_rate": 1.2522733615840066e-05, "loss": 0.0407, "step": 62150 }, { "epoch": 0.9540326912746527, "grad_norm": 0.3605840504169464, "learning_rate": 1.2520141432139856e-05, "loss": 0.0336, "step": 62160 }, { "epoch": 0.9541861714373417, "grad_norm": 0.5857957601547241, "learning_rate": 1.2517549067603953e-05, "loss": 0.044, "step": 62170 }, { "epoch": 0.9543396516000306, "grad_norm": 0.4255589246749878, "learning_rate": 1.2514956522418378e-05, "loss": 0.035, "step": 62180 }, { "epoch": 0.9544931317627197, "grad_norm": 0.4539766013622284, "learning_rate": 1.2512363796769166e-05, "loss": 0.0361, "step": 62190 }, { "epoch": 0.9546466119254087, "grad_norm": 0.4717664122581482, "learning_rate": 1.2509770890842352e-05, "loss": 0.0348, "step": 62200 }, { "epoch": 0.9548000920880976, "grad_norm": 0.3234517574310303, "learning_rate": 1.2507177804824e-05, "loss": 0.036, "step": 62210 }, { "epoch": 0.9549535722507866, "grad_norm": 0.4445953667163849, "learning_rate": 1.2504584538900178e-05, "loss": 0.0411, "step": 62220 }, { "epoch": 0.9551070524134756, "grad_norm": 0.3452427089214325, "learning_rate": 1.2501991093256964e-05, "loss": 0.0342, "step": 62230 }, { "epoch": 0.9552605325761645, "grad_norm": 0.27433452010154724, "learning_rate": 1.2499397468080465e-05, "loss": 0.0288, "step": 62240 }, { "epoch": 0.9554140127388535, "grad_norm": 0.390648752450943, "learning_rate": 1.249680366355678e-05, "loss": 0.0293, "step": 62250 }, { "epoch": 0.9555674929015425, "grad_norm": 0.4828967750072479, "learning_rate": 1.2494209679872033e-05, "loss": 0.0303, "step": 62260 }, { "epoch": 0.9557209730642314, "grad_norm": 0.37038958072662354, "learning_rate": 1.2491615517212358e-05, "loss": 0.0386, "step": 62270 }, { "epoch": 0.9558744532269204, "grad_norm": 0.4209998846054077, "learning_rate": 1.2489021175763903e-05, "loss": 0.0296, "step": 62280 }, { "epoch": 0.9560279333896093, "grad_norm": 0.43360835313796997, "learning_rate": 1.2486426655712832e-05, "loss": 0.0349, "step": 62290 }, { "epoch": 0.9561814135522984, "grad_norm": 0.4738960266113281, "learning_rate": 1.2483831957245309e-05, "loss": 0.0354, "step": 62300 }, { "epoch": 0.9563348937149874, "grad_norm": 0.3661768138408661, "learning_rate": 1.2481237080547524e-05, "loss": 0.0333, "step": 62310 }, { "epoch": 0.9564883738776763, "grad_norm": 0.33828577399253845, "learning_rate": 1.2478642025805678e-05, "loss": 0.0322, "step": 62320 }, { "epoch": 0.9566418540403653, "grad_norm": 0.28558439016342163, "learning_rate": 1.2476046793205978e-05, "loss": 0.0328, "step": 62330 }, { "epoch": 0.9567953342030543, "grad_norm": 0.4197463393211365, "learning_rate": 1.2473451382934652e-05, "loss": 0.047, "step": 62340 }, { "epoch": 0.9569488143657432, "grad_norm": 0.5262964367866516, "learning_rate": 1.2470855795177934e-05, "loss": 0.0377, "step": 62350 }, { "epoch": 0.9571022945284322, "grad_norm": 0.2758423984050751, "learning_rate": 1.2468260030122072e-05, "loss": 0.0289, "step": 62360 }, { "epoch": 0.9572557746911212, "grad_norm": 0.49969682097435, "learning_rate": 1.2465664087953335e-05, "loss": 0.0392, "step": 62370 }, { "epoch": 0.9574092548538101, "grad_norm": 0.379917711019516, "learning_rate": 1.2463067968857987e-05, "loss": 0.0351, "step": 62380 }, { "epoch": 0.9575627350164991, "grad_norm": 0.4040036201477051, "learning_rate": 1.2460471673022325e-05, "loss": 0.029, "step": 62390 }, { "epoch": 0.957716215179188, "grad_norm": 0.6348454356193542, "learning_rate": 1.2457875200632645e-05, "loss": 0.0324, "step": 62400 }, { "epoch": 0.957869695341877, "grad_norm": 0.4152028262615204, "learning_rate": 1.2455278551875258e-05, "loss": 0.0297, "step": 62410 }, { "epoch": 0.9580231755045661, "grad_norm": 0.5220814347267151, "learning_rate": 1.24526817269365e-05, "loss": 0.0409, "step": 62420 }, { "epoch": 0.958176655667255, "grad_norm": 0.42086881399154663, "learning_rate": 1.2450084726002694e-05, "loss": 0.041, "step": 62430 }, { "epoch": 0.958330135829944, "grad_norm": 0.2990889251232147, "learning_rate": 1.24474875492602e-05, "loss": 0.0351, "step": 62440 }, { "epoch": 0.958483615992633, "grad_norm": 0.2489490658044815, "learning_rate": 1.2444890196895381e-05, "loss": 0.0258, "step": 62450 }, { "epoch": 0.9586370961553219, "grad_norm": 0.3991223871707916, "learning_rate": 1.244229266909461e-05, "loss": 0.0356, "step": 62460 }, { "epoch": 0.9587905763180109, "grad_norm": 0.36725160479545593, "learning_rate": 1.2439694966044277e-05, "loss": 0.0394, "step": 62470 }, { "epoch": 0.9589440564806999, "grad_norm": 0.42517155408859253, "learning_rate": 1.2437097087930785e-05, "loss": 0.0356, "step": 62480 }, { "epoch": 0.9590975366433888, "grad_norm": 0.2885141372680664, "learning_rate": 1.2434499034940548e-05, "loss": 0.0356, "step": 62490 }, { "epoch": 0.9592510168060778, "grad_norm": 0.39534518122673035, "learning_rate": 1.2431900807259989e-05, "loss": 0.0288, "step": 62500 }, { "epoch": 0.9594044969687668, "grad_norm": 0.4329024851322174, "learning_rate": 1.2429302405075547e-05, "loss": 0.0282, "step": 62510 }, { "epoch": 0.9595579771314557, "grad_norm": 0.43325385451316833, "learning_rate": 1.2426703828573676e-05, "loss": 0.0319, "step": 62520 }, { "epoch": 0.9597114572941448, "grad_norm": 0.4028525650501251, "learning_rate": 1.2424105077940842e-05, "loss": 0.0275, "step": 62530 }, { "epoch": 0.9598649374568337, "grad_norm": 0.2863802909851074, "learning_rate": 1.2421506153363515e-05, "loss": 0.0241, "step": 62540 }, { "epoch": 0.9600184176195227, "grad_norm": 0.39903223514556885, "learning_rate": 1.2418907055028188e-05, "loss": 0.0381, "step": 62550 }, { "epoch": 0.9601718977822117, "grad_norm": 0.3595415949821472, "learning_rate": 1.241630778312136e-05, "loss": 0.0347, "step": 62560 }, { "epoch": 0.9603253779449006, "grad_norm": 0.2853461503982544, "learning_rate": 1.2413708337829548e-05, "loss": 0.0316, "step": 62570 }, { "epoch": 0.9604788581075896, "grad_norm": 0.48002466559410095, "learning_rate": 1.2411108719339277e-05, "loss": 0.0413, "step": 62580 }, { "epoch": 0.9606323382702786, "grad_norm": 0.36235418915748596, "learning_rate": 1.2408508927837083e-05, "loss": 0.0337, "step": 62590 }, { "epoch": 0.9607858184329675, "grad_norm": 0.3104422390460968, "learning_rate": 1.2405908963509521e-05, "loss": 0.0352, "step": 62600 }, { "epoch": 0.9609392985956565, "grad_norm": 0.45825567841529846, "learning_rate": 1.2403308826543153e-05, "loss": 0.0408, "step": 62610 }, { "epoch": 0.9610927787583455, "grad_norm": 0.4365758001804352, "learning_rate": 1.2400708517124551e-05, "loss": 0.0318, "step": 62620 }, { "epoch": 0.9612462589210344, "grad_norm": 0.2974421977996826, "learning_rate": 1.239810803544031e-05, "loss": 0.0303, "step": 62630 }, { "epoch": 0.9613997390837234, "grad_norm": 0.2867141664028168, "learning_rate": 1.2395507381677028e-05, "loss": 0.0323, "step": 62640 }, { "epoch": 0.9615532192464123, "grad_norm": 0.6566246151924133, "learning_rate": 1.2392906556021313e-05, "loss": 0.0399, "step": 62650 }, { "epoch": 0.9617066994091014, "grad_norm": 0.35511553287506104, "learning_rate": 1.2390305558659799e-05, "loss": 0.0342, "step": 62660 }, { "epoch": 0.9618601795717904, "grad_norm": 0.3732791244983673, "learning_rate": 1.2387704389779118e-05, "loss": 0.0279, "step": 62670 }, { "epoch": 0.9620136597344793, "grad_norm": 0.4550198018550873, "learning_rate": 1.2385103049565923e-05, "loss": 0.0385, "step": 62680 }, { "epoch": 0.9621671398971683, "grad_norm": 0.40641891956329346, "learning_rate": 1.2382501538206872e-05, "loss": 0.0299, "step": 62690 }, { "epoch": 0.9623206200598573, "grad_norm": 0.41157063841819763, "learning_rate": 1.237989985588864e-05, "loss": 0.0388, "step": 62700 }, { "epoch": 0.9624741002225462, "grad_norm": 0.32088685035705566, "learning_rate": 1.2377298002797918e-05, "loss": 0.0333, "step": 62710 }, { "epoch": 0.9626275803852352, "grad_norm": 0.2695613503456116, "learning_rate": 1.2374695979121403e-05, "loss": 0.0264, "step": 62720 }, { "epoch": 0.9627810605479242, "grad_norm": 0.36121436953544617, "learning_rate": 1.2372093785045803e-05, "loss": 0.0272, "step": 62730 }, { "epoch": 0.9629345407106131, "grad_norm": 0.34997379779815674, "learning_rate": 1.236949142075785e-05, "loss": 0.0304, "step": 62740 }, { "epoch": 0.9630880208733021, "grad_norm": 0.3916020691394806, "learning_rate": 1.2366888886444268e-05, "loss": 0.0373, "step": 62750 }, { "epoch": 0.963241501035991, "grad_norm": 0.5740580558776855, "learning_rate": 1.2364286182291813e-05, "loss": 0.0342, "step": 62760 }, { "epoch": 0.9633949811986801, "grad_norm": 0.3525732159614563, "learning_rate": 1.2361683308487242e-05, "loss": 0.0434, "step": 62770 }, { "epoch": 0.9635484613613691, "grad_norm": 0.3953039050102234, "learning_rate": 1.2359080265217329e-05, "loss": 0.0323, "step": 62780 }, { "epoch": 0.963701941524058, "grad_norm": 0.5729644894599915, "learning_rate": 1.235647705266886e-05, "loss": 0.0482, "step": 62790 }, { "epoch": 0.963855421686747, "grad_norm": 0.4627145230770111, "learning_rate": 1.2353873671028625e-05, "loss": 0.0338, "step": 62800 }, { "epoch": 0.964008901849436, "grad_norm": 0.3741639256477356, "learning_rate": 1.2351270120483438e-05, "loss": 0.0403, "step": 62810 }, { "epoch": 0.9641623820121249, "grad_norm": 0.363278865814209, "learning_rate": 1.2348666401220121e-05, "loss": 0.0406, "step": 62820 }, { "epoch": 0.9643158621748139, "grad_norm": 0.42296653985977173, "learning_rate": 1.23460625134255e-05, "loss": 0.049, "step": 62830 }, { "epoch": 0.9644693423375029, "grad_norm": 0.25866347551345825, "learning_rate": 1.234345845728643e-05, "loss": 0.0318, "step": 62840 }, { "epoch": 0.9646228225001918, "grad_norm": 0.27550008893013, "learning_rate": 1.2340854232989761e-05, "loss": 0.0298, "step": 62850 }, { "epoch": 0.9647763026628808, "grad_norm": 0.8316839933395386, "learning_rate": 1.2338249840722361e-05, "loss": 0.0294, "step": 62860 }, { "epoch": 0.9649297828255698, "grad_norm": 0.3383598327636719, "learning_rate": 1.2335645280671119e-05, "loss": 0.0299, "step": 62870 }, { "epoch": 0.9650832629882587, "grad_norm": 0.45439329743385315, "learning_rate": 1.2333040553022917e-05, "loss": 0.0346, "step": 62880 }, { "epoch": 0.9652367431509478, "grad_norm": 0.302951842546463, "learning_rate": 1.233043565796467e-05, "loss": 0.034, "step": 62890 }, { "epoch": 0.9653902233136367, "grad_norm": 0.3932969570159912, "learning_rate": 1.2327830595683292e-05, "loss": 0.0375, "step": 62900 }, { "epoch": 0.9655437034763257, "grad_norm": 0.32246291637420654, "learning_rate": 1.2325225366365709e-05, "loss": 0.0382, "step": 62910 }, { "epoch": 0.9656971836390147, "grad_norm": 0.37453988194465637, "learning_rate": 1.2322619970198873e-05, "loss": 0.0382, "step": 62920 }, { "epoch": 0.9658506638017036, "grad_norm": 0.3334099054336548, "learning_rate": 1.2320014407369722e-05, "loss": 0.0374, "step": 62930 }, { "epoch": 0.9660041439643926, "grad_norm": 0.2699354290962219, "learning_rate": 1.2317408678065233e-05, "loss": 0.0277, "step": 62940 }, { "epoch": 0.9661576241270816, "grad_norm": 0.6640338897705078, "learning_rate": 1.2314802782472377e-05, "loss": 0.04, "step": 62950 }, { "epoch": 0.9663111042897705, "grad_norm": 0.3365885317325592, "learning_rate": 1.2312196720778142e-05, "loss": 0.0377, "step": 62960 }, { "epoch": 0.9664645844524595, "grad_norm": 0.309357613325119, "learning_rate": 1.2309590493169537e-05, "loss": 0.0348, "step": 62970 }, { "epoch": 0.9666180646151485, "grad_norm": 0.31380972266197205, "learning_rate": 1.230698409983357e-05, "loss": 0.0359, "step": 62980 }, { "epoch": 0.9667715447778374, "grad_norm": 0.39676257967948914, "learning_rate": 1.2304377540957264e-05, "loss": 0.0339, "step": 62990 }, { "epoch": 0.9669250249405265, "grad_norm": 0.2736608684062958, "learning_rate": 1.2301770816727658e-05, "loss": 0.0313, "step": 63000 }, { "epoch": 0.9670785051032154, "grad_norm": 0.18900816142559052, "learning_rate": 1.2299163927331799e-05, "loss": 0.0313, "step": 63010 }, { "epoch": 0.9672319852659044, "grad_norm": 0.2803417444229126, "learning_rate": 1.2296556872956748e-05, "loss": 0.0334, "step": 63020 }, { "epoch": 0.9673854654285934, "grad_norm": 0.30255326628685, "learning_rate": 1.2293949653789578e-05, "loss": 0.0366, "step": 63030 }, { "epoch": 0.9675389455912823, "grad_norm": 0.3803277611732483, "learning_rate": 1.2291342270017375e-05, "loss": 0.0256, "step": 63040 }, { "epoch": 0.9676924257539713, "grad_norm": 0.3001861870288849, "learning_rate": 1.2288734721827232e-05, "loss": 0.0318, "step": 63050 }, { "epoch": 0.9678459059166603, "grad_norm": 0.31057676672935486, "learning_rate": 1.2286127009406255e-05, "loss": 0.0314, "step": 63060 }, { "epoch": 0.9679993860793492, "grad_norm": 0.3878125250339508, "learning_rate": 1.2283519132941566e-05, "loss": 0.0269, "step": 63070 }, { "epoch": 0.9681528662420382, "grad_norm": 0.3342824876308441, "learning_rate": 1.2280911092620298e-05, "loss": 0.025, "step": 63080 }, { "epoch": 0.9683063464047272, "grad_norm": 0.2999347150325775, "learning_rate": 1.227830288862959e-05, "loss": 0.0322, "step": 63090 }, { "epoch": 0.9684598265674161, "grad_norm": 0.40165847539901733, "learning_rate": 1.2275694521156598e-05, "loss": 0.0471, "step": 63100 }, { "epoch": 0.9686133067301052, "grad_norm": 0.42368343472480774, "learning_rate": 1.227308599038849e-05, "loss": 0.0306, "step": 63110 }, { "epoch": 0.968766786892794, "grad_norm": 0.3867558240890503, "learning_rate": 1.2270477296512443e-05, "loss": 0.0436, "step": 63120 }, { "epoch": 0.9689202670554831, "grad_norm": 0.2975963056087494, "learning_rate": 1.2267868439715647e-05, "loss": 0.03, "step": 63130 }, { "epoch": 0.9690737472181721, "grad_norm": 0.45397132635116577, "learning_rate": 1.2265259420185305e-05, "loss": 0.0361, "step": 63140 }, { "epoch": 0.969227227380861, "grad_norm": 0.3166915476322174, "learning_rate": 1.2262650238108624e-05, "loss": 0.031, "step": 63150 }, { "epoch": 0.96938070754355, "grad_norm": 0.3591855764389038, "learning_rate": 1.226004089367284e-05, "loss": 0.0294, "step": 63160 }, { "epoch": 0.969534187706239, "grad_norm": 0.3668001592159271, "learning_rate": 1.225743138706518e-05, "loss": 0.0324, "step": 63170 }, { "epoch": 0.9696876678689279, "grad_norm": 0.4550517201423645, "learning_rate": 1.2254821718472896e-05, "loss": 0.0323, "step": 63180 }, { "epoch": 0.9698411480316169, "grad_norm": 0.43863633275032043, "learning_rate": 1.2252211888083248e-05, "loss": 0.0439, "step": 63190 }, { "epoch": 0.9699946281943059, "grad_norm": 0.35076746344566345, "learning_rate": 1.2249601896083504e-05, "loss": 0.0406, "step": 63200 }, { "epoch": 0.9701481083569948, "grad_norm": 0.4816280007362366, "learning_rate": 1.2246991742660955e-05, "loss": 0.0399, "step": 63210 }, { "epoch": 0.9703015885196838, "grad_norm": 0.23247641324996948, "learning_rate": 1.2244381428002884e-05, "loss": 0.027, "step": 63220 }, { "epoch": 0.9704550686823729, "grad_norm": 0.36940762400627136, "learning_rate": 1.2241770952296608e-05, "loss": 0.0291, "step": 63230 }, { "epoch": 0.9706085488450618, "grad_norm": 0.45194870233535767, "learning_rate": 1.2239160315729439e-05, "loss": 0.0374, "step": 63240 }, { "epoch": 0.9707620290077508, "grad_norm": 0.395291805267334, "learning_rate": 1.2236549518488706e-05, "loss": 0.0388, "step": 63250 }, { "epoch": 0.9709155091704397, "grad_norm": 0.636228621006012, "learning_rate": 1.2233938560761753e-05, "loss": 0.0468, "step": 63260 }, { "epoch": 0.9710689893331287, "grad_norm": 0.34409090876579285, "learning_rate": 1.2231327442735932e-05, "loss": 0.0307, "step": 63270 }, { "epoch": 0.9712224694958177, "grad_norm": 0.29901596903800964, "learning_rate": 1.2228716164598603e-05, "loss": 0.0318, "step": 63280 }, { "epoch": 0.9713759496585066, "grad_norm": 0.48197317123413086, "learning_rate": 1.2226104726537146e-05, "loss": 0.0357, "step": 63290 }, { "epoch": 0.9715294298211956, "grad_norm": 0.48279109597206116, "learning_rate": 1.2223493128738943e-05, "loss": 0.0331, "step": 63300 }, { "epoch": 0.9716829099838846, "grad_norm": 0.30174437165260315, "learning_rate": 1.2220881371391397e-05, "loss": 0.0328, "step": 63310 }, { "epoch": 0.9718363901465735, "grad_norm": 0.4575918912887573, "learning_rate": 1.2218269454681914e-05, "loss": 0.0301, "step": 63320 }, { "epoch": 0.9719898703092625, "grad_norm": 0.36676478385925293, "learning_rate": 1.2215657378797918e-05, "loss": 0.0292, "step": 63330 }, { "epoch": 0.9721433504719516, "grad_norm": 0.3669513761997223, "learning_rate": 1.2213045143926844e-05, "loss": 0.0354, "step": 63340 }, { "epoch": 0.9722968306346405, "grad_norm": 0.5453954935073853, "learning_rate": 1.2210432750256125e-05, "loss": 0.042, "step": 63350 }, { "epoch": 0.9724503107973295, "grad_norm": 0.4249415397644043, "learning_rate": 1.2207820197973226e-05, "loss": 0.0364, "step": 63360 }, { "epoch": 0.9726037909600184, "grad_norm": 0.2920854687690735, "learning_rate": 1.2205207487265614e-05, "loss": 0.027, "step": 63370 }, { "epoch": 0.9727572711227074, "grad_norm": 0.43011313676834106, "learning_rate": 1.220259461832076e-05, "loss": 0.0333, "step": 63380 }, { "epoch": 0.9729107512853964, "grad_norm": 0.4336376488208771, "learning_rate": 1.219998159132616e-05, "loss": 0.0343, "step": 63390 }, { "epoch": 0.9730642314480853, "grad_norm": 0.5779755711555481, "learning_rate": 1.2197368406469314e-05, "loss": 0.0372, "step": 63400 }, { "epoch": 0.9732177116107743, "grad_norm": 0.3794606328010559, "learning_rate": 1.219475506393773e-05, "loss": 0.0275, "step": 63410 }, { "epoch": 0.9733711917734633, "grad_norm": 0.48900166153907776, "learning_rate": 1.2192141563918937e-05, "loss": 0.0349, "step": 63420 }, { "epoch": 0.9735246719361522, "grad_norm": 0.49840861558914185, "learning_rate": 1.2189527906600464e-05, "loss": 0.0376, "step": 63430 }, { "epoch": 0.9736781520988412, "grad_norm": 0.4266591966152191, "learning_rate": 1.2186914092169863e-05, "loss": 0.0455, "step": 63440 }, { "epoch": 0.9738316322615302, "grad_norm": 0.4632716774940491, "learning_rate": 1.2184300120814687e-05, "loss": 0.0312, "step": 63450 }, { "epoch": 0.9739851124242191, "grad_norm": 0.33615440130233765, "learning_rate": 1.2181685992722507e-05, "loss": 0.0347, "step": 63460 }, { "epoch": 0.9741385925869082, "grad_norm": 0.4068128764629364, "learning_rate": 1.2179071708080901e-05, "loss": 0.0312, "step": 63470 }, { "epoch": 0.9742920727495971, "grad_norm": 0.36023858189582825, "learning_rate": 1.2176457267077459e-05, "loss": 0.0332, "step": 63480 }, { "epoch": 0.9744455529122861, "grad_norm": 0.3678889274597168, "learning_rate": 1.2173842669899788e-05, "loss": 0.0378, "step": 63490 }, { "epoch": 0.9745990330749751, "grad_norm": 0.45711591839790344, "learning_rate": 1.2171227916735497e-05, "loss": 0.0313, "step": 63500 }, { "epoch": 0.974752513237664, "grad_norm": 0.39876165986061096, "learning_rate": 1.2168613007772211e-05, "loss": 0.0348, "step": 63510 }, { "epoch": 0.974905993400353, "grad_norm": 0.2043687403202057, "learning_rate": 1.2165997943197567e-05, "loss": 0.0356, "step": 63520 }, { "epoch": 0.975059473563042, "grad_norm": 0.43798181414604187, "learning_rate": 1.2163382723199216e-05, "loss": 0.0318, "step": 63530 }, { "epoch": 0.9752129537257309, "grad_norm": 0.48466333746910095, "learning_rate": 1.2160767347964808e-05, "loss": 0.0348, "step": 63540 }, { "epoch": 0.9753664338884199, "grad_norm": 0.44991129636764526, "learning_rate": 1.2158151817682018e-05, "loss": 0.0337, "step": 63550 }, { "epoch": 0.9755199140511089, "grad_norm": 0.29875466227531433, "learning_rate": 1.2155536132538526e-05, "loss": 0.036, "step": 63560 }, { "epoch": 0.9756733942137978, "grad_norm": 0.3276005983352661, "learning_rate": 1.215292029272202e-05, "loss": 0.0322, "step": 63570 }, { "epoch": 0.9758268743764869, "grad_norm": 0.44937577843666077, "learning_rate": 1.2150304298420206e-05, "loss": 0.0315, "step": 63580 }, { "epoch": 0.9759803545391759, "grad_norm": 0.34567025303840637, "learning_rate": 1.2147688149820799e-05, "loss": 0.0299, "step": 63590 }, { "epoch": 0.9761338347018648, "grad_norm": 0.44283610582351685, "learning_rate": 1.2145071847111519e-05, "loss": 0.0415, "step": 63600 }, { "epoch": 0.9762873148645538, "grad_norm": 0.33943474292755127, "learning_rate": 1.2142455390480106e-05, "loss": 0.0343, "step": 63610 }, { "epoch": 0.9764407950272427, "grad_norm": 0.3647826910018921, "learning_rate": 1.2139838780114301e-05, "loss": 0.0289, "step": 63620 }, { "epoch": 0.9765942751899317, "grad_norm": 0.34875261783599854, "learning_rate": 1.213722201620187e-05, "loss": 0.0326, "step": 63630 }, { "epoch": 0.9767477553526207, "grad_norm": 0.2524961233139038, "learning_rate": 1.2134605098930578e-05, "loss": 0.0362, "step": 63640 }, { "epoch": 0.9769012355153096, "grad_norm": 0.42109552025794983, "learning_rate": 1.2131988028488204e-05, "loss": 0.0336, "step": 63650 }, { "epoch": 0.9770547156779986, "grad_norm": 0.2945682108402252, "learning_rate": 1.2129370805062543e-05, "loss": 0.0428, "step": 63660 }, { "epoch": 0.9772081958406876, "grad_norm": 0.5323272347450256, "learning_rate": 1.2126753428841394e-05, "loss": 0.037, "step": 63670 }, { "epoch": 0.9773616760033765, "grad_norm": 0.3595292270183563, "learning_rate": 1.2124135900012568e-05, "loss": 0.0301, "step": 63680 }, { "epoch": 0.9775151561660655, "grad_norm": 0.6728954911231995, "learning_rate": 1.212151821876389e-05, "loss": 0.0317, "step": 63690 }, { "epoch": 0.9776686363287546, "grad_norm": 0.321889191865921, "learning_rate": 1.2118900385283197e-05, "loss": 0.0322, "step": 63700 }, { "epoch": 0.9778221164914435, "grad_norm": 0.447134792804718, "learning_rate": 1.2116282399758335e-05, "loss": 0.047, "step": 63710 }, { "epoch": 0.9779755966541325, "grad_norm": 0.34248897433280945, "learning_rate": 1.2113664262377153e-05, "loss": 0.0621, "step": 63720 }, { "epoch": 0.9781290768168214, "grad_norm": 0.4796154499053955, "learning_rate": 1.2111045973327531e-05, "loss": 0.03, "step": 63730 }, { "epoch": 0.9782825569795104, "grad_norm": 0.39389851689338684, "learning_rate": 1.2108427532797337e-05, "loss": 0.026, "step": 63740 }, { "epoch": 0.9784360371421994, "grad_norm": 0.4207788109779358, "learning_rate": 1.2105808940974463e-05, "loss": 0.037, "step": 63750 }, { "epoch": 0.9785895173048883, "grad_norm": 0.3266981840133667, "learning_rate": 1.2103190198046815e-05, "loss": 0.035, "step": 63760 }, { "epoch": 0.9787429974675773, "grad_norm": 0.3511342704296112, "learning_rate": 1.2100571304202296e-05, "loss": 0.0294, "step": 63770 }, { "epoch": 0.9788964776302663, "grad_norm": 0.4566374123096466, "learning_rate": 1.209795225962883e-05, "loss": 0.0338, "step": 63780 }, { "epoch": 0.9790499577929552, "grad_norm": 0.713053286075592, "learning_rate": 1.2095333064514352e-05, "loss": 0.0372, "step": 63790 }, { "epoch": 0.9792034379556442, "grad_norm": 0.45650482177734375, "learning_rate": 1.2092713719046802e-05, "loss": 0.0331, "step": 63800 }, { "epoch": 0.9793569181183333, "grad_norm": 0.33165237307548523, "learning_rate": 1.2090094223414137e-05, "loss": 0.0334, "step": 63810 }, { "epoch": 0.9795103982810222, "grad_norm": 0.6566394567489624, "learning_rate": 1.2087474577804324e-05, "loss": 0.0289, "step": 63820 }, { "epoch": 0.9796638784437112, "grad_norm": 0.32727107405662537, "learning_rate": 1.208485478240533e-05, "loss": 0.0282, "step": 63830 }, { "epoch": 0.9798173586064001, "grad_norm": 0.46745043992996216, "learning_rate": 1.2082234837405153e-05, "loss": 0.0366, "step": 63840 }, { "epoch": 0.9799708387690891, "grad_norm": 0.23834837973117828, "learning_rate": 1.207961474299178e-05, "loss": 0.0303, "step": 63850 }, { "epoch": 0.9801243189317781, "grad_norm": 0.2775716483592987, "learning_rate": 1.2076994499353227e-05, "loss": 0.0324, "step": 63860 }, { "epoch": 0.980277799094467, "grad_norm": 0.24093003571033478, "learning_rate": 1.2074374106677506e-05, "loss": 0.0344, "step": 63870 }, { "epoch": 0.980431279257156, "grad_norm": 0.43986842036247253, "learning_rate": 1.207175356515265e-05, "loss": 0.0341, "step": 63880 }, { "epoch": 0.980584759419845, "grad_norm": 0.35279208421707153, "learning_rate": 1.2069132874966703e-05, "loss": 0.0303, "step": 63890 }, { "epoch": 0.9807382395825339, "grad_norm": 0.5367894768714905, "learning_rate": 1.2066512036307705e-05, "loss": 0.0334, "step": 63900 }, { "epoch": 0.9808917197452229, "grad_norm": 0.3752955496311188, "learning_rate": 1.2063891049363725e-05, "loss": 0.0262, "step": 63910 }, { "epoch": 0.981045199907912, "grad_norm": 0.34594833850860596, "learning_rate": 1.2061269914322835e-05, "loss": 0.0405, "step": 63920 }, { "epoch": 0.9811986800706008, "grad_norm": 0.3702255189418793, "learning_rate": 1.2058648631373114e-05, "loss": 0.037, "step": 63930 }, { "epoch": 0.9813521602332899, "grad_norm": 0.3544745445251465, "learning_rate": 1.2056027200702658e-05, "loss": 0.0364, "step": 63940 }, { "epoch": 0.9815056403959789, "grad_norm": 0.4423636496067047, "learning_rate": 1.205340562249957e-05, "loss": 0.0364, "step": 63950 }, { "epoch": 0.9816591205586678, "grad_norm": 0.5653408765792847, "learning_rate": 1.2050783896951965e-05, "loss": 0.0299, "step": 63960 }, { "epoch": 0.9818126007213568, "grad_norm": 0.26409783959388733, "learning_rate": 1.2048162024247967e-05, "loss": 0.03, "step": 63970 }, { "epoch": 0.9819660808840457, "grad_norm": 0.3469657301902771, "learning_rate": 1.204554000457571e-05, "loss": 0.0356, "step": 63980 }, { "epoch": 0.9821195610467347, "grad_norm": 0.4198402762413025, "learning_rate": 1.2042917838123344e-05, "loss": 0.0276, "step": 63990 }, { "epoch": 0.9822730412094237, "grad_norm": 0.3027660846710205, "learning_rate": 1.2040295525079024e-05, "loss": 0.0335, "step": 64000 }, { "epoch": 0.9824265213721126, "grad_norm": 0.44587230682373047, "learning_rate": 1.2037673065630918e-05, "loss": 0.0252, "step": 64010 }, { "epoch": 0.9825800015348016, "grad_norm": 0.30365118384361267, "learning_rate": 1.2035050459967199e-05, "loss": 0.0274, "step": 64020 }, { "epoch": 0.9827334816974906, "grad_norm": 0.34391921758651733, "learning_rate": 1.203242770827606e-05, "loss": 0.0361, "step": 64030 }, { "epoch": 0.9828869618601795, "grad_norm": 0.6239853501319885, "learning_rate": 1.2029804810745697e-05, "loss": 0.0399, "step": 64040 }, { "epoch": 0.9830404420228686, "grad_norm": 0.3692863881587982, "learning_rate": 1.2027181767564322e-05, "loss": 0.0382, "step": 64050 }, { "epoch": 0.9831939221855576, "grad_norm": 0.3118760585784912, "learning_rate": 1.2024558578920153e-05, "loss": 0.0307, "step": 64060 }, { "epoch": 0.9833474023482465, "grad_norm": 0.4416901469230652, "learning_rate": 1.2021935245001419e-05, "loss": 0.0277, "step": 64070 }, { "epoch": 0.9835008825109355, "grad_norm": 0.6361757516860962, "learning_rate": 1.201931176599636e-05, "loss": 0.0358, "step": 64080 }, { "epoch": 0.9836543626736244, "grad_norm": 0.3578389883041382, "learning_rate": 1.201668814209323e-05, "loss": 0.0395, "step": 64090 }, { "epoch": 0.9838078428363134, "grad_norm": 0.3836573660373688, "learning_rate": 1.2014064373480288e-05, "loss": 0.038, "step": 64100 }, { "epoch": 0.9839613229990024, "grad_norm": 0.34081050753593445, "learning_rate": 1.2011440460345804e-05, "loss": 0.0286, "step": 64110 }, { "epoch": 0.9841148031616913, "grad_norm": 0.38440603017807007, "learning_rate": 1.200881640287806e-05, "loss": 0.0306, "step": 64120 }, { "epoch": 0.9842682833243803, "grad_norm": 0.36708176136016846, "learning_rate": 1.2006192201265354e-05, "loss": 0.0289, "step": 64130 }, { "epoch": 0.9844217634870693, "grad_norm": 0.5111413598060608, "learning_rate": 1.2003567855695983e-05, "loss": 0.0384, "step": 64140 }, { "epoch": 0.9845752436497582, "grad_norm": 0.3759816586971283, "learning_rate": 1.2000943366358262e-05, "loss": 0.0376, "step": 64150 }, { "epoch": 0.9847287238124472, "grad_norm": 0.4447847604751587, "learning_rate": 1.1998318733440516e-05, "loss": 0.0326, "step": 64160 }, { "epoch": 0.9848822039751363, "grad_norm": 0.4708523750305176, "learning_rate": 1.199569395713107e-05, "loss": 0.0331, "step": 64170 }, { "epoch": 0.9850356841378252, "grad_norm": 0.6096638441085815, "learning_rate": 1.1993069037618279e-05, "loss": 0.0432, "step": 64180 }, { "epoch": 0.9851891643005142, "grad_norm": 0.45255935192108154, "learning_rate": 1.199044397509049e-05, "loss": 0.0308, "step": 64190 }, { "epoch": 0.9853426444632031, "grad_norm": 0.30984264612197876, "learning_rate": 1.1987818769736071e-05, "loss": 0.0352, "step": 64200 }, { "epoch": 0.9854961246258921, "grad_norm": 0.6647009253501892, "learning_rate": 1.1985193421743399e-05, "loss": 0.0473, "step": 64210 }, { "epoch": 0.9856496047885811, "grad_norm": 0.4881986677646637, "learning_rate": 1.1982567931300851e-05, "loss": 0.0276, "step": 64220 }, { "epoch": 0.98580308495127, "grad_norm": 0.35771405696868896, "learning_rate": 1.1979942298596829e-05, "loss": 0.0317, "step": 64230 }, { "epoch": 0.985956565113959, "grad_norm": 0.429647296667099, "learning_rate": 1.1977316523819735e-05, "loss": 0.0416, "step": 64240 }, { "epoch": 0.986110045276648, "grad_norm": 0.2508072555065155, "learning_rate": 1.1974690607157984e-05, "loss": 0.0314, "step": 64250 }, { "epoch": 0.9862635254393369, "grad_norm": 0.3536906838417053, "learning_rate": 1.1972064548800009e-05, "loss": 0.0326, "step": 64260 }, { "epoch": 0.9864170056020259, "grad_norm": 0.63199782371521, "learning_rate": 1.1969438348934233e-05, "loss": 0.0444, "step": 64270 }, { "epoch": 0.986570485764715, "grad_norm": 0.34352636337280273, "learning_rate": 1.1966812007749113e-05, "loss": 0.0307, "step": 64280 }, { "epoch": 0.9867239659274039, "grad_norm": 0.5111299157142639, "learning_rate": 1.1964185525433101e-05, "loss": 0.0378, "step": 64290 }, { "epoch": 0.9868774460900929, "grad_norm": 0.3680226504802704, "learning_rate": 1.1961558902174661e-05, "loss": 0.0264, "step": 64300 }, { "epoch": 0.9870309262527819, "grad_norm": 0.4538756012916565, "learning_rate": 1.1958932138162277e-05, "loss": 0.0235, "step": 64310 }, { "epoch": 0.9871844064154708, "grad_norm": 0.3989942967891693, "learning_rate": 1.1956305233584427e-05, "loss": 0.0312, "step": 64320 }, { "epoch": 0.9873378865781598, "grad_norm": 0.34883856773376465, "learning_rate": 1.195367818862961e-05, "loss": 0.0325, "step": 64330 }, { "epoch": 0.9874913667408487, "grad_norm": 0.49710267782211304, "learning_rate": 1.1951051003486338e-05, "loss": 0.0389, "step": 64340 }, { "epoch": 0.9876448469035377, "grad_norm": 0.4112275540828705, "learning_rate": 1.1948423678343119e-05, "loss": 0.0409, "step": 64350 }, { "epoch": 0.9877983270662267, "grad_norm": 0.40876203775405884, "learning_rate": 1.194579621338849e-05, "loss": 0.0308, "step": 64360 }, { "epoch": 0.9879518072289156, "grad_norm": 0.3561055362224579, "learning_rate": 1.1943168608810977e-05, "loss": 0.0323, "step": 64370 }, { "epoch": 0.9881052873916046, "grad_norm": 0.4624534547328949, "learning_rate": 1.1940540864799133e-05, "loss": 0.0411, "step": 64380 }, { "epoch": 0.9882587675542936, "grad_norm": 0.39855441451072693, "learning_rate": 1.1937912981541518e-05, "loss": 0.0297, "step": 64390 }, { "epoch": 0.9884122477169826, "grad_norm": 0.29756960272789, "learning_rate": 1.193528495922669e-05, "loss": 0.0356, "step": 64400 }, { "epoch": 0.9885657278796716, "grad_norm": 0.37571749091148376, "learning_rate": 1.1932656798043235e-05, "loss": 0.0434, "step": 64410 }, { "epoch": 0.9887192080423606, "grad_norm": 0.3457525074481964, "learning_rate": 1.193002849817973e-05, "loss": 0.0387, "step": 64420 }, { "epoch": 0.9888726882050495, "grad_norm": 0.3243035674095154, "learning_rate": 1.1927400059824779e-05, "loss": 0.027, "step": 64430 }, { "epoch": 0.9890261683677385, "grad_norm": 0.36276888847351074, "learning_rate": 1.1924771483166989e-05, "loss": 0.0402, "step": 64440 }, { "epoch": 0.9891796485304274, "grad_norm": 0.4687274396419525, "learning_rate": 1.1922142768394971e-05, "loss": 0.0298, "step": 64450 }, { "epoch": 0.9893331286931164, "grad_norm": 0.30146658420562744, "learning_rate": 1.1919513915697357e-05, "loss": 0.0323, "step": 64460 }, { "epoch": 0.9894866088558054, "grad_norm": 0.40178540349006653, "learning_rate": 1.191688492526278e-05, "loss": 0.045, "step": 64470 }, { "epoch": 0.9896400890184943, "grad_norm": 0.8165110349655151, "learning_rate": 1.1914255797279886e-05, "loss": 0.0278, "step": 64480 }, { "epoch": 0.9897935691811833, "grad_norm": 0.3352088928222656, "learning_rate": 1.1911626531937335e-05, "loss": 0.0312, "step": 64490 }, { "epoch": 0.9899470493438723, "grad_norm": 0.29565659165382385, "learning_rate": 1.1908997129423789e-05, "loss": 0.0224, "step": 64500 }, { "epoch": 0.9901005295065612, "grad_norm": 0.33047109842300415, "learning_rate": 1.1906367589927927e-05, "loss": 0.0319, "step": 64510 }, { "epoch": 0.9902540096692503, "grad_norm": 0.2783496081829071, "learning_rate": 1.1903737913638433e-05, "loss": 0.0314, "step": 64520 }, { "epoch": 0.9904074898319393, "grad_norm": 0.3607606291770935, "learning_rate": 1.1901108100744e-05, "loss": 0.0309, "step": 64530 }, { "epoch": 0.9905609699946282, "grad_norm": 0.3807573616504669, "learning_rate": 1.189847815143334e-05, "loss": 0.0412, "step": 64540 }, { "epoch": 0.9907144501573172, "grad_norm": 0.30813339352607727, "learning_rate": 1.1895848065895162e-05, "loss": 0.028, "step": 64550 }, { "epoch": 0.9908679303200061, "grad_norm": 0.37287721037864685, "learning_rate": 1.1893217844318193e-05, "loss": 0.0335, "step": 64560 }, { "epoch": 0.9910214104826951, "grad_norm": 0.3722834289073944, "learning_rate": 1.1890587486891167e-05, "loss": 0.0314, "step": 64570 }, { "epoch": 0.9911748906453841, "grad_norm": 0.4461025893688202, "learning_rate": 1.1887956993802827e-05, "loss": 0.0301, "step": 64580 }, { "epoch": 0.991328370808073, "grad_norm": 0.23976421356201172, "learning_rate": 1.1885326365241932e-05, "loss": 0.0297, "step": 64590 }, { "epoch": 0.991481850970762, "grad_norm": 0.38944879174232483, "learning_rate": 1.188269560139724e-05, "loss": 0.0337, "step": 64600 }, { "epoch": 0.991635331133451, "grad_norm": 0.42422088980674744, "learning_rate": 1.188006470245753e-05, "loss": 0.0377, "step": 64610 }, { "epoch": 0.9917888112961399, "grad_norm": 0.6274380087852478, "learning_rate": 1.187743366861158e-05, "loss": 0.0389, "step": 64620 }, { "epoch": 0.991942291458829, "grad_norm": 0.38129299879074097, "learning_rate": 1.1874802500048188e-05, "loss": 0.0354, "step": 64630 }, { "epoch": 0.992095771621518, "grad_norm": 0.39415523409843445, "learning_rate": 1.1872171196956153e-05, "loss": 0.0316, "step": 64640 }, { "epoch": 0.9922492517842069, "grad_norm": 0.5304239392280579, "learning_rate": 1.1869539759524288e-05, "loss": 0.0388, "step": 64650 }, { "epoch": 0.9924027319468959, "grad_norm": 0.3621053993701935, "learning_rate": 1.1866908187941415e-05, "loss": 0.0329, "step": 64660 }, { "epoch": 0.9925562121095849, "grad_norm": 0.3441444933414459, "learning_rate": 1.1864276482396365e-05, "loss": 0.0407, "step": 64670 }, { "epoch": 0.9927096922722738, "grad_norm": 0.40416330099105835, "learning_rate": 1.1861644643077978e-05, "loss": 0.0396, "step": 64680 }, { "epoch": 0.9928631724349628, "grad_norm": 0.2885284423828125, "learning_rate": 1.1859012670175111e-05, "loss": 0.0316, "step": 64690 }, { "epoch": 0.9930166525976517, "grad_norm": 0.34121769666671753, "learning_rate": 1.1856380563876617e-05, "loss": 0.0368, "step": 64700 }, { "epoch": 0.9931701327603407, "grad_norm": 0.7751076221466064, "learning_rate": 1.185374832437137e-05, "loss": 0.0351, "step": 64710 }, { "epoch": 0.9933236129230297, "grad_norm": 0.26464319229125977, "learning_rate": 1.1851115951848245e-05, "loss": 0.029, "step": 64720 }, { "epoch": 0.9934770930857186, "grad_norm": 0.5301808714866638, "learning_rate": 1.1848483446496137e-05, "loss": 0.0323, "step": 64730 }, { "epoch": 0.9936305732484076, "grad_norm": 0.34560370445251465, "learning_rate": 1.1845850808503939e-05, "loss": 0.0369, "step": 64740 }, { "epoch": 0.9937840534110967, "grad_norm": 0.27971309423446655, "learning_rate": 1.184321803806056e-05, "loss": 0.0277, "step": 64750 }, { "epoch": 0.9939375335737856, "grad_norm": 0.35379669070243835, "learning_rate": 1.1840585135354923e-05, "loss": 0.0283, "step": 64760 }, { "epoch": 0.9940910137364746, "grad_norm": 0.3328567445278168, "learning_rate": 1.183795210057595e-05, "loss": 0.036, "step": 64770 }, { "epoch": 0.9942444938991636, "grad_norm": 0.3484698534011841, "learning_rate": 1.1835318933912575e-05, "loss": 0.0295, "step": 64780 }, { "epoch": 0.9943979740618525, "grad_norm": 0.34622377157211304, "learning_rate": 1.1832685635553752e-05, "loss": 0.0455, "step": 64790 }, { "epoch": 0.9945514542245415, "grad_norm": 0.3615139424800873, "learning_rate": 1.1830052205688427e-05, "loss": 0.0334, "step": 64800 }, { "epoch": 0.9947049343872304, "grad_norm": 0.33567818999290466, "learning_rate": 1.1827418644505574e-05, "loss": 0.0372, "step": 64810 }, { "epoch": 0.9948584145499194, "grad_norm": 0.43269649147987366, "learning_rate": 1.182478495219416e-05, "loss": 0.035, "step": 64820 }, { "epoch": 0.9950118947126084, "grad_norm": 0.37098172307014465, "learning_rate": 1.1822151128943173e-05, "loss": 0.0409, "step": 64830 }, { "epoch": 0.9951653748752973, "grad_norm": 0.4762234091758728, "learning_rate": 1.1819517174941603e-05, "loss": 0.0296, "step": 64840 }, { "epoch": 0.9953188550379863, "grad_norm": 0.7008214592933655, "learning_rate": 1.1816883090378455e-05, "loss": 0.0358, "step": 64850 }, { "epoch": 0.9954723352006754, "grad_norm": 0.4582580029964447, "learning_rate": 1.1814248875442741e-05, "loss": 0.0389, "step": 64860 }, { "epoch": 0.9956258153633643, "grad_norm": 0.2870240807533264, "learning_rate": 1.1811614530323482e-05, "loss": 0.0344, "step": 64870 }, { "epoch": 0.9957792955260533, "grad_norm": 0.3413914144039154, "learning_rate": 1.1808980055209704e-05, "loss": 0.0348, "step": 64880 }, { "epoch": 0.9959327756887423, "grad_norm": 0.30615234375, "learning_rate": 1.1806345450290452e-05, "loss": 0.0319, "step": 64890 }, { "epoch": 0.9960862558514312, "grad_norm": 0.2339893877506256, "learning_rate": 1.1803710715754773e-05, "loss": 0.0241, "step": 64900 }, { "epoch": 0.9962397360141202, "grad_norm": 0.4014759063720703, "learning_rate": 1.1801075851791725e-05, "loss": 0.0316, "step": 64910 }, { "epoch": 0.9963932161768091, "grad_norm": 0.33750680088996887, "learning_rate": 1.1798440858590378e-05, "loss": 0.0423, "step": 64920 }, { "epoch": 0.9965466963394981, "grad_norm": 0.5127067565917969, "learning_rate": 1.1795805736339809e-05, "loss": 0.031, "step": 64930 }, { "epoch": 0.9967001765021871, "grad_norm": 0.38182228803634644, "learning_rate": 1.1793170485229102e-05, "loss": 0.0458, "step": 64940 }, { "epoch": 0.996853656664876, "grad_norm": 0.3882350027561188, "learning_rate": 1.1790535105447351e-05, "loss": 0.0393, "step": 64950 }, { "epoch": 0.997007136827565, "grad_norm": 0.32278913259506226, "learning_rate": 1.1787899597183666e-05, "loss": 0.0276, "step": 64960 }, { "epoch": 0.997160616990254, "grad_norm": 0.3760725259780884, "learning_rate": 1.1785263960627158e-05, "loss": 0.0359, "step": 64970 }, { "epoch": 0.997314097152943, "grad_norm": 0.32687073945999146, "learning_rate": 1.178262819596695e-05, "loss": 0.0383, "step": 64980 }, { "epoch": 0.997467577315632, "grad_norm": 0.33725929260253906, "learning_rate": 1.1779992303392176e-05, "loss": 0.0319, "step": 64990 }, { "epoch": 0.997621057478321, "grad_norm": 0.40483009815216064, "learning_rate": 1.1777356283091972e-05, "loss": 0.0268, "step": 65000 }, { "epoch": 0.9977745376410099, "grad_norm": 0.4450477957725525, "learning_rate": 1.1774720135255497e-05, "loss": 0.0343, "step": 65010 }, { "epoch": 0.9979280178036989, "grad_norm": 0.3582281470298767, "learning_rate": 1.1772083860071905e-05, "loss": 0.0265, "step": 65020 }, { "epoch": 0.9980814979663879, "grad_norm": 0.379863440990448, "learning_rate": 1.176944745773037e-05, "loss": 0.0271, "step": 65030 }, { "epoch": 0.9982349781290768, "grad_norm": 0.37636762857437134, "learning_rate": 1.1766810928420062e-05, "loss": 0.0341, "step": 65040 }, { "epoch": 0.9983884582917658, "grad_norm": 0.3430424630641937, "learning_rate": 1.1764174272330178e-05, "loss": 0.0321, "step": 65050 }, { "epoch": 0.9985419384544547, "grad_norm": 0.34370338916778564, "learning_rate": 1.1761537489649908e-05, "loss": 0.041, "step": 65060 }, { "epoch": 0.9986954186171437, "grad_norm": 0.5327957272529602, "learning_rate": 1.175890058056846e-05, "loss": 0.0376, "step": 65070 }, { "epoch": 0.9988488987798327, "grad_norm": 0.4546772837638855, "learning_rate": 1.1756263545275046e-05, "loss": 0.0286, "step": 65080 }, { "epoch": 0.9990023789425216, "grad_norm": 0.41030481457710266, "learning_rate": 1.175362638395889e-05, "loss": 0.0328, "step": 65090 }, { "epoch": 0.9991558591052107, "grad_norm": 0.4399203360080719, "learning_rate": 1.1750989096809227e-05, "loss": 0.0344, "step": 65100 }, { "epoch": 0.9993093392678997, "grad_norm": 0.3708217740058899, "learning_rate": 1.1748351684015297e-05, "loss": 0.0281, "step": 65110 }, { "epoch": 0.9994628194305886, "grad_norm": 0.3872683048248291, "learning_rate": 1.1745714145766352e-05, "loss": 0.0298, "step": 65120 }, { "epoch": 0.9996162995932776, "grad_norm": 0.2930818796157837, "learning_rate": 1.174307648225165e-05, "loss": 0.036, "step": 65130 }, { "epoch": 0.9997697797559666, "grad_norm": 0.310050368309021, "learning_rate": 1.1740438693660457e-05, "loss": 0.0357, "step": 65140 }, { "epoch": 0.9999232599186555, "grad_norm": 0.5952459573745728, "learning_rate": 1.1737800780182057e-05, "loss": 0.0324, "step": 65150 }, { "epoch": 1.0000767400813444, "grad_norm": 0.40113314986228943, "learning_rate": 1.1735162742005734e-05, "loss": 0.0311, "step": 65160 }, { "epoch": 1.0002302202440334, "grad_norm": 0.40269339084625244, "learning_rate": 1.1732524579320778e-05, "loss": 0.0399, "step": 65170 }, { "epoch": 1.0003837004067224, "grad_norm": 0.36585304141044617, "learning_rate": 1.1729886292316506e-05, "loss": 0.0287, "step": 65180 }, { "epoch": 1.0005371805694114, "grad_norm": 0.3706117570400238, "learning_rate": 1.1727247881182218e-05, "loss": 0.033, "step": 65190 }, { "epoch": 1.0006906607321004, "grad_norm": 0.397151917219162, "learning_rate": 1.1724609346107243e-05, "loss": 0.0392, "step": 65200 }, { "epoch": 1.0008441408947895, "grad_norm": 0.346771240234375, "learning_rate": 1.1721970687280912e-05, "loss": 0.0403, "step": 65210 }, { "epoch": 1.0009976210574782, "grad_norm": 0.37811315059661865, "learning_rate": 1.1719331904892563e-05, "loss": 0.0314, "step": 65220 }, { "epoch": 1.0011511012201673, "grad_norm": 0.38425227999687195, "learning_rate": 1.1716692999131549e-05, "loss": 0.0326, "step": 65230 }, { "epoch": 1.0013045813828563, "grad_norm": 0.32281336188316345, "learning_rate": 1.171405397018722e-05, "loss": 0.0261, "step": 65240 }, { "epoch": 1.0014580615455453, "grad_norm": 0.2938534617424011, "learning_rate": 1.1711414818248951e-05, "loss": 0.0301, "step": 65250 }, { "epoch": 1.0016115417082343, "grad_norm": 0.3296862244606018, "learning_rate": 1.1708775543506113e-05, "loss": 0.0299, "step": 65260 }, { "epoch": 1.001765021870923, "grad_norm": 0.4591067433357239, "learning_rate": 1.1706136146148088e-05, "loss": 0.029, "step": 65270 }, { "epoch": 1.001918502033612, "grad_norm": 0.5283892154693604, "learning_rate": 1.170349662636428e-05, "loss": 0.0353, "step": 65280 }, { "epoch": 1.0020719821963011, "grad_norm": 0.40202653408050537, "learning_rate": 1.1700856984344076e-05, "loss": 0.0357, "step": 65290 }, { "epoch": 1.0022254623589901, "grad_norm": 0.37562406063079834, "learning_rate": 1.169821722027689e-05, "loss": 0.0345, "step": 65300 }, { "epoch": 1.0023789425216791, "grad_norm": 0.3648335039615631, "learning_rate": 1.1695577334352155e-05, "loss": 0.0264, "step": 65310 }, { "epoch": 1.0025324226843682, "grad_norm": 0.5058571100234985, "learning_rate": 1.1692937326759282e-05, "loss": 0.0337, "step": 65320 }, { "epoch": 1.002685902847057, "grad_norm": 0.5514948964118958, "learning_rate": 1.1690297197687717e-05, "loss": 0.0325, "step": 65330 }, { "epoch": 1.002839383009746, "grad_norm": 0.4784839153289795, "learning_rate": 1.1687656947326904e-05, "loss": 0.0303, "step": 65340 }, { "epoch": 1.002992863172435, "grad_norm": 0.4404675364494324, "learning_rate": 1.1685016575866294e-05, "loss": 0.0292, "step": 65350 }, { "epoch": 1.003146343335124, "grad_norm": 0.38893193006515503, "learning_rate": 1.1682376083495357e-05, "loss": 0.0351, "step": 65360 }, { "epoch": 1.003299823497813, "grad_norm": 0.33908337354660034, "learning_rate": 1.1679735470403556e-05, "loss": 0.0282, "step": 65370 }, { "epoch": 1.0034533036605018, "grad_norm": 0.31361865997314453, "learning_rate": 1.1677094736780376e-05, "loss": 0.034, "step": 65380 }, { "epoch": 1.0036067838231908, "grad_norm": 0.3945293128490448, "learning_rate": 1.1674453882815306e-05, "loss": 0.0242, "step": 65390 }, { "epoch": 1.0037602639858798, "grad_norm": 0.302304744720459, "learning_rate": 1.1671812908697842e-05, "loss": 0.0279, "step": 65400 }, { "epoch": 1.0039137441485688, "grad_norm": 0.32742956280708313, "learning_rate": 1.1669171814617495e-05, "loss": 0.0298, "step": 65410 }, { "epoch": 1.0040672243112578, "grad_norm": 0.34078189730644226, "learning_rate": 1.1666530600763772e-05, "loss": 0.0318, "step": 65420 }, { "epoch": 1.0042207044739468, "grad_norm": 0.32015591859817505, "learning_rate": 1.16638892673262e-05, "loss": 0.0279, "step": 65430 }, { "epoch": 1.0043741846366356, "grad_norm": 0.34563490748405457, "learning_rate": 1.1661247814494313e-05, "loss": 0.0332, "step": 65440 }, { "epoch": 1.0045276647993246, "grad_norm": 0.4175247251987457, "learning_rate": 1.1658606242457649e-05, "loss": 0.028, "step": 65450 }, { "epoch": 1.0046811449620137, "grad_norm": 0.34186238050460815, "learning_rate": 1.1655964551405758e-05, "loss": 0.0277, "step": 65460 }, { "epoch": 1.0048346251247027, "grad_norm": 0.3746342658996582, "learning_rate": 1.1653322741528196e-05, "loss": 0.0257, "step": 65470 }, { "epoch": 1.0049881052873917, "grad_norm": 0.5823001265525818, "learning_rate": 1.1650680813014534e-05, "loss": 0.0343, "step": 65480 }, { "epoch": 1.0051415854500805, "grad_norm": 0.41396766901016235, "learning_rate": 1.1648038766054341e-05, "loss": 0.0361, "step": 65490 }, { "epoch": 1.0052950656127695, "grad_norm": 0.30552196502685547, "learning_rate": 1.1645396600837201e-05, "loss": 0.0297, "step": 65500 }, { "epoch": 1.0054485457754585, "grad_norm": 0.30258920788764954, "learning_rate": 1.1642754317552708e-05, "loss": 0.0285, "step": 65510 }, { "epoch": 1.0056020259381475, "grad_norm": 0.4669666588306427, "learning_rate": 1.1640111916390464e-05, "loss": 0.0271, "step": 65520 }, { "epoch": 1.0057555061008365, "grad_norm": 0.3625909686088562, "learning_rate": 1.1637469397540074e-05, "loss": 0.0333, "step": 65530 }, { "epoch": 1.0059089862635255, "grad_norm": 0.2816084921360016, "learning_rate": 1.1634826761191154e-05, "loss": 0.0297, "step": 65540 }, { "epoch": 1.0060624664262143, "grad_norm": 0.33437514305114746, "learning_rate": 1.1632184007533331e-05, "loss": 0.0306, "step": 65550 }, { "epoch": 1.0062159465889033, "grad_norm": 0.48801928758621216, "learning_rate": 1.1629541136756239e-05, "loss": 0.032, "step": 65560 }, { "epoch": 1.0063694267515924, "grad_norm": 0.4794134199619293, "learning_rate": 1.1626898149049523e-05, "loss": 0.0384, "step": 65570 }, { "epoch": 1.0065229069142814, "grad_norm": 0.3557785451412201, "learning_rate": 1.162425504460283e-05, "loss": 0.0264, "step": 65580 }, { "epoch": 1.0066763870769704, "grad_norm": 0.4539794623851776, "learning_rate": 1.162161182360582e-05, "loss": 0.0306, "step": 65590 }, { "epoch": 1.0068298672396592, "grad_norm": 0.44384336471557617, "learning_rate": 1.1618968486248158e-05, "loss": 0.0277, "step": 65600 }, { "epoch": 1.0069833474023482, "grad_norm": 0.272520512342453, "learning_rate": 1.1616325032719525e-05, "loss": 0.0294, "step": 65610 }, { "epoch": 1.0071368275650372, "grad_norm": 0.3154827654361725, "learning_rate": 1.1613681463209603e-05, "loss": 0.0265, "step": 65620 }, { "epoch": 1.0072903077277262, "grad_norm": 0.6016177535057068, "learning_rate": 1.1611037777908083e-05, "loss": 0.0419, "step": 65630 }, { "epoch": 1.0074437878904152, "grad_norm": 0.47612836956977844, "learning_rate": 1.1608393977004664e-05, "loss": 0.0318, "step": 65640 }, { "epoch": 1.0075972680531042, "grad_norm": 0.44470685720443726, "learning_rate": 1.160575006068906e-05, "loss": 0.0273, "step": 65650 }, { "epoch": 1.007750748215793, "grad_norm": 0.2890675961971283, "learning_rate": 1.1603106029150985e-05, "loss": 0.0305, "step": 65660 }, { "epoch": 1.007904228378482, "grad_norm": 0.3646937906742096, "learning_rate": 1.1600461882580167e-05, "loss": 0.0267, "step": 65670 }, { "epoch": 1.008057708541171, "grad_norm": 0.49594148993492126, "learning_rate": 1.1597817621166336e-05, "loss": 0.0373, "step": 65680 }, { "epoch": 1.00821118870386, "grad_norm": 0.38770005106925964, "learning_rate": 1.1595173245099236e-05, "loss": 0.0413, "step": 65690 }, { "epoch": 1.008364668866549, "grad_norm": 0.39180296659469604, "learning_rate": 1.1592528754568621e-05, "loss": 0.0281, "step": 65700 }, { "epoch": 1.0085181490292379, "grad_norm": 0.42164477705955505, "learning_rate": 1.1589884149764243e-05, "loss": 0.0263, "step": 65710 }, { "epoch": 1.0086716291919269, "grad_norm": 0.21534933149814606, "learning_rate": 1.1587239430875872e-05, "loss": 0.031, "step": 65720 }, { "epoch": 1.008825109354616, "grad_norm": 0.4493781328201294, "learning_rate": 1.1584594598093286e-05, "loss": 0.0359, "step": 65730 }, { "epoch": 1.008978589517305, "grad_norm": 0.35563889145851135, "learning_rate": 1.1581949651606262e-05, "loss": 0.0314, "step": 65740 }, { "epoch": 1.009132069679994, "grad_norm": 0.3096918761730194, "learning_rate": 1.1579304591604594e-05, "loss": 0.0295, "step": 65750 }, { "epoch": 1.009285549842683, "grad_norm": 0.5189176797866821, "learning_rate": 1.1576659418278083e-05, "loss": 0.0446, "step": 65760 }, { "epoch": 1.0094390300053717, "grad_norm": 0.3453415632247925, "learning_rate": 1.157401413181653e-05, "loss": 0.0341, "step": 65770 }, { "epoch": 1.0095925101680607, "grad_norm": 0.3142472207546234, "learning_rate": 1.1571368732409762e-05, "loss": 0.0263, "step": 65780 }, { "epoch": 1.0097459903307497, "grad_norm": 0.4763789176940918, "learning_rate": 1.1568723220247593e-05, "loss": 0.0419, "step": 65790 }, { "epoch": 1.0098994704934388, "grad_norm": 0.36753928661346436, "learning_rate": 1.1566077595519857e-05, "loss": 0.0337, "step": 65800 }, { "epoch": 1.0100529506561278, "grad_norm": 0.49882328510284424, "learning_rate": 1.1563431858416399e-05, "loss": 0.0327, "step": 65810 }, { "epoch": 1.0102064308188166, "grad_norm": 0.34378355741500854, "learning_rate": 1.1560786009127056e-05, "loss": 0.0318, "step": 65820 }, { "epoch": 1.0103599109815056, "grad_norm": 0.5055686831474304, "learning_rate": 1.1558140047841698e-05, "loss": 0.0255, "step": 65830 }, { "epoch": 1.0105133911441946, "grad_norm": 0.344497412443161, "learning_rate": 1.1555493974750178e-05, "loss": 0.0226, "step": 65840 }, { "epoch": 1.0106668713068836, "grad_norm": 0.21307867765426636, "learning_rate": 1.1552847790042372e-05, "loss": 0.0361, "step": 65850 }, { "epoch": 1.0108203514695726, "grad_norm": 0.35380226373672485, "learning_rate": 1.1550201493908162e-05, "loss": 0.0288, "step": 65860 }, { "epoch": 1.0109738316322616, "grad_norm": 0.520535945892334, "learning_rate": 1.1547555086537432e-05, "loss": 0.0307, "step": 65870 }, { "epoch": 1.0111273117949504, "grad_norm": 0.3752802908420563, "learning_rate": 1.154490856812008e-05, "loss": 0.0295, "step": 65880 }, { "epoch": 1.0112807919576394, "grad_norm": 0.3539625108242035, "learning_rate": 1.1542261938846012e-05, "loss": 0.0435, "step": 65890 }, { "epoch": 1.0114342721203284, "grad_norm": 0.4068939983844757, "learning_rate": 1.1539615198905137e-05, "loss": 0.0353, "step": 65900 }, { "epoch": 1.0115877522830174, "grad_norm": 0.4293906092643738, "learning_rate": 1.1536968348487378e-05, "loss": 0.0243, "step": 65910 }, { "epoch": 1.0117412324457065, "grad_norm": 0.2983022630214691, "learning_rate": 1.1534321387782659e-05, "loss": 0.0327, "step": 65920 }, { "epoch": 1.0118947126083955, "grad_norm": 0.36425429582595825, "learning_rate": 1.1531674316980919e-05, "loss": 0.0324, "step": 65930 }, { "epoch": 1.0120481927710843, "grad_norm": 0.3590450584888458, "learning_rate": 1.15290271362721e-05, "loss": 0.0258, "step": 65940 }, { "epoch": 1.0122016729337733, "grad_norm": 0.3515225350856781, "learning_rate": 1.152637984584615e-05, "loss": 0.0218, "step": 65950 }, { "epoch": 1.0123551530964623, "grad_norm": 0.35385099053382874, "learning_rate": 1.1523732445893036e-05, "loss": 0.0321, "step": 65960 }, { "epoch": 1.0125086332591513, "grad_norm": 0.47244733572006226, "learning_rate": 1.152108493660272e-05, "loss": 0.0314, "step": 65970 }, { "epoch": 1.0126621134218403, "grad_norm": 0.32517895102500916, "learning_rate": 1.1518437318165181e-05, "loss": 0.0373, "step": 65980 }, { "epoch": 1.012815593584529, "grad_norm": 0.40354686975479126, "learning_rate": 1.1515789590770399e-05, "loss": 0.0272, "step": 65990 }, { "epoch": 1.0129690737472181, "grad_norm": 0.39653733372688293, "learning_rate": 1.1513141754608363e-05, "loss": 0.029, "step": 66000 }, { "epoch": 1.0131225539099071, "grad_norm": 0.539462149143219, "learning_rate": 1.1510493809869077e-05, "loss": 0.0379, "step": 66010 }, { "epoch": 1.0132760340725961, "grad_norm": 0.2703416049480438, "learning_rate": 1.1507845756742542e-05, "loss": 0.0295, "step": 66020 }, { "epoch": 1.0134295142352852, "grad_norm": 0.39799764752388, "learning_rate": 1.1505197595418776e-05, "loss": 0.0273, "step": 66030 }, { "epoch": 1.0135829943979742, "grad_norm": 0.36963245272636414, "learning_rate": 1.15025493260878e-05, "loss": 0.0246, "step": 66040 }, { "epoch": 1.013736474560663, "grad_norm": 0.32035958766937256, "learning_rate": 1.149990094893964e-05, "loss": 0.0287, "step": 66050 }, { "epoch": 1.013889954723352, "grad_norm": 0.4466750919818878, "learning_rate": 1.149725246416434e-05, "loss": 0.0337, "step": 66060 }, { "epoch": 1.014043434886041, "grad_norm": 0.521011471748352, "learning_rate": 1.149460387195194e-05, "loss": 0.0332, "step": 66070 }, { "epoch": 1.01419691504873, "grad_norm": 0.38980862498283386, "learning_rate": 1.1491955172492493e-05, "loss": 0.0344, "step": 66080 }, { "epoch": 1.014350395211419, "grad_norm": 0.5156631469726562, "learning_rate": 1.148930636597606e-05, "loss": 0.0292, "step": 66090 }, { "epoch": 1.0145038753741078, "grad_norm": 0.24737681448459625, "learning_rate": 1.1486657452592713e-05, "loss": 0.0298, "step": 66100 }, { "epoch": 1.0146573555367968, "grad_norm": 0.42470693588256836, "learning_rate": 1.1484008432532525e-05, "loss": 0.0277, "step": 66110 }, { "epoch": 1.0148108356994858, "grad_norm": 0.29510176181793213, "learning_rate": 1.1481359305985578e-05, "loss": 0.0357, "step": 66120 }, { "epoch": 1.0149643158621748, "grad_norm": 0.41170427203178406, "learning_rate": 1.1478710073141967e-05, "loss": 0.0356, "step": 66130 }, { "epoch": 1.0151177960248638, "grad_norm": 0.3801164925098419, "learning_rate": 1.1476060734191785e-05, "loss": 0.0401, "step": 66140 }, { "epoch": 1.0152712761875529, "grad_norm": 0.38755854964256287, "learning_rate": 1.1473411289325145e-05, "loss": 0.034, "step": 66150 }, { "epoch": 1.0154247563502417, "grad_norm": 0.4137529134750366, "learning_rate": 1.1470761738732157e-05, "loss": 0.0243, "step": 66160 }, { "epoch": 1.0155782365129307, "grad_norm": 0.308627188205719, "learning_rate": 1.1468112082602942e-05, "loss": 0.0284, "step": 66170 }, { "epoch": 1.0157317166756197, "grad_norm": 0.43901678919792175, "learning_rate": 1.1465462321127632e-05, "loss": 0.0349, "step": 66180 }, { "epoch": 1.0158851968383087, "grad_norm": 0.5995374917984009, "learning_rate": 1.146281245449636e-05, "loss": 0.0351, "step": 66190 }, { "epoch": 1.0160386770009977, "grad_norm": 0.3719010055065155, "learning_rate": 1.1460162482899275e-05, "loss": 0.0304, "step": 66200 }, { "epoch": 1.0161921571636865, "grad_norm": 0.32395029067993164, "learning_rate": 1.1457512406526527e-05, "loss": 0.026, "step": 66210 }, { "epoch": 1.0163456373263755, "grad_norm": 0.5014082789421082, "learning_rate": 1.1454862225568274e-05, "loss": 0.0319, "step": 66220 }, { "epoch": 1.0164991174890645, "grad_norm": 0.3533635437488556, "learning_rate": 1.1452211940214683e-05, "loss": 0.0274, "step": 66230 }, { "epoch": 1.0166525976517535, "grad_norm": 0.35890820622444153, "learning_rate": 1.144956155065593e-05, "loss": 0.0286, "step": 66240 }, { "epoch": 1.0168060778144425, "grad_norm": 0.4799714684486389, "learning_rate": 1.1446911057082195e-05, "loss": 0.0379, "step": 66250 }, { "epoch": 1.0169595579771316, "grad_norm": 0.36077573895454407, "learning_rate": 1.144426045968367e-05, "loss": 0.0361, "step": 66260 }, { "epoch": 1.0171130381398203, "grad_norm": 0.21111108362674713, "learning_rate": 1.1441609758650545e-05, "loss": 0.0252, "step": 66270 }, { "epoch": 1.0172665183025094, "grad_norm": 0.5575491189956665, "learning_rate": 1.1438958954173034e-05, "loss": 0.0351, "step": 66280 }, { "epoch": 1.0174199984651984, "grad_norm": 0.3553962707519531, "learning_rate": 1.143630804644134e-05, "loss": 0.0354, "step": 66290 }, { "epoch": 1.0175734786278874, "grad_norm": 0.43420273065567017, "learning_rate": 1.1433657035645687e-05, "loss": 0.0283, "step": 66300 }, { "epoch": 1.0177269587905764, "grad_norm": 0.38383379578590393, "learning_rate": 1.14310059219763e-05, "loss": 0.0311, "step": 66310 }, { "epoch": 1.0178804389532652, "grad_norm": 0.4337182343006134, "learning_rate": 1.1428354705623409e-05, "loss": 0.0339, "step": 66320 }, { "epoch": 1.0180339191159542, "grad_norm": 0.27845364809036255, "learning_rate": 1.1425703386777263e-05, "loss": 0.035, "step": 66330 }, { "epoch": 1.0181873992786432, "grad_norm": 0.3219497799873352, "learning_rate": 1.1423051965628102e-05, "loss": 0.0297, "step": 66340 }, { "epoch": 1.0183408794413322, "grad_norm": 0.34767985343933105, "learning_rate": 1.142040044236619e-05, "loss": 0.0304, "step": 66350 }, { "epoch": 1.0184943596040212, "grad_norm": 0.36748236417770386, "learning_rate": 1.1417748817181784e-05, "loss": 0.0289, "step": 66360 }, { "epoch": 1.0186478397667103, "grad_norm": 0.31193476915359497, "learning_rate": 1.1415097090265157e-05, "loss": 0.0298, "step": 66370 }, { "epoch": 1.018801319929399, "grad_norm": 0.44214504957199097, "learning_rate": 1.1412445261806585e-05, "loss": 0.0375, "step": 66380 }, { "epoch": 1.018954800092088, "grad_norm": 0.4664725661277771, "learning_rate": 1.1409793331996357e-05, "loss": 0.0391, "step": 66390 }, { "epoch": 1.019108280254777, "grad_norm": 0.40430688858032227, "learning_rate": 1.1407141301024762e-05, "loss": 0.0218, "step": 66400 }, { "epoch": 1.019261760417466, "grad_norm": 0.4714328646659851, "learning_rate": 1.14044891690821e-05, "loss": 0.0281, "step": 66410 }, { "epoch": 1.019415240580155, "grad_norm": 0.41958603262901306, "learning_rate": 1.1401836936358677e-05, "loss": 0.0359, "step": 66420 }, { "epoch": 1.0195687207428439, "grad_norm": 0.4920600652694702, "learning_rate": 1.1399184603044809e-05, "loss": 0.0426, "step": 66430 }, { "epoch": 1.019722200905533, "grad_norm": 0.33819758892059326, "learning_rate": 1.1396532169330817e-05, "loss": 0.0276, "step": 66440 }, { "epoch": 1.019875681068222, "grad_norm": 0.4190744459629059, "learning_rate": 1.1393879635407028e-05, "loss": 0.0285, "step": 66450 }, { "epoch": 1.020029161230911, "grad_norm": 0.3964846134185791, "learning_rate": 1.1391227001463779e-05, "loss": 0.0292, "step": 66460 }, { "epoch": 1.0201826413936, "grad_norm": 0.406089186668396, "learning_rate": 1.138857426769141e-05, "loss": 0.0282, "step": 66470 }, { "epoch": 1.020336121556289, "grad_norm": 0.489487886428833, "learning_rate": 1.1385921434280277e-05, "loss": 0.0377, "step": 66480 }, { "epoch": 1.0204896017189777, "grad_norm": 0.3963990807533264, "learning_rate": 1.1383268501420732e-05, "loss": 0.0322, "step": 66490 }, { "epoch": 1.0206430818816667, "grad_norm": 0.2202557921409607, "learning_rate": 1.138061546930314e-05, "loss": 0.0235, "step": 66500 }, { "epoch": 1.0207965620443558, "grad_norm": 0.511060357093811, "learning_rate": 1.1377962338117875e-05, "loss": 0.0306, "step": 66510 }, { "epoch": 1.0209500422070448, "grad_norm": 0.416495144367218, "learning_rate": 1.1375309108055313e-05, "loss": 0.0368, "step": 66520 }, { "epoch": 1.0211035223697338, "grad_norm": 0.33491215109825134, "learning_rate": 1.1372655779305838e-05, "loss": 0.0262, "step": 66530 }, { "epoch": 1.0212570025324226, "grad_norm": 0.42121320962905884, "learning_rate": 1.1370002352059851e-05, "loss": 0.0281, "step": 66540 }, { "epoch": 1.0214104826951116, "grad_norm": 0.2747475802898407, "learning_rate": 1.1367348826507743e-05, "loss": 0.0264, "step": 66550 }, { "epoch": 1.0215639628578006, "grad_norm": 0.3568742275238037, "learning_rate": 1.136469520283992e-05, "loss": 0.0297, "step": 66560 }, { "epoch": 1.0217174430204896, "grad_norm": 0.5276102423667908, "learning_rate": 1.1362041481246803e-05, "loss": 0.0317, "step": 66570 }, { "epoch": 1.0218709231831786, "grad_norm": 0.4194699823856354, "learning_rate": 1.135938766191881e-05, "loss": 0.0329, "step": 66580 }, { "epoch": 1.0220244033458676, "grad_norm": 0.30965280532836914, "learning_rate": 1.1356733745046368e-05, "loss": 0.0245, "step": 66590 }, { "epoch": 1.0221778835085564, "grad_norm": 0.4176255166530609, "learning_rate": 1.1354079730819911e-05, "loss": 0.0274, "step": 66600 }, { "epoch": 1.0223313636712454, "grad_norm": 0.5487449765205383, "learning_rate": 1.1351425619429883e-05, "loss": 0.0339, "step": 66610 }, { "epoch": 1.0224848438339345, "grad_norm": 0.34932032227516174, "learning_rate": 1.1348771411066732e-05, "loss": 0.0334, "step": 66620 }, { "epoch": 1.0226383239966235, "grad_norm": 0.3929770290851593, "learning_rate": 1.1346117105920916e-05, "loss": 0.0319, "step": 66630 }, { "epoch": 1.0227918041593125, "grad_norm": 0.4246385395526886, "learning_rate": 1.134346270418289e-05, "loss": 0.0264, "step": 66640 }, { "epoch": 1.0229452843220015, "grad_norm": 0.4111543297767639, "learning_rate": 1.1340808206043135e-05, "loss": 0.0357, "step": 66650 }, { "epoch": 1.0230987644846903, "grad_norm": 0.49531328678131104, "learning_rate": 1.1338153611692118e-05, "loss": 0.0324, "step": 66660 }, { "epoch": 1.0232522446473793, "grad_norm": 0.35722753405570984, "learning_rate": 1.133549892132033e-05, "loss": 0.0375, "step": 66670 }, { "epoch": 1.0234057248100683, "grad_norm": 0.4352210760116577, "learning_rate": 1.1332844135118255e-05, "loss": 0.0347, "step": 66680 }, { "epoch": 1.0235592049727573, "grad_norm": 0.2894243896007538, "learning_rate": 1.1330189253276394e-05, "loss": 0.0226, "step": 66690 }, { "epoch": 1.0237126851354463, "grad_norm": 0.46440941095352173, "learning_rate": 1.1327534275985253e-05, "loss": 0.0241, "step": 66700 }, { "epoch": 1.0238661652981351, "grad_norm": 0.39731496572494507, "learning_rate": 1.1324879203435335e-05, "loss": 0.037, "step": 66710 }, { "epoch": 1.0240196454608241, "grad_norm": 0.3998114764690399, "learning_rate": 1.1322224035817166e-05, "loss": 0.0287, "step": 66720 }, { "epoch": 1.0241731256235131, "grad_norm": 0.3861686587333679, "learning_rate": 1.1319568773321267e-05, "loss": 0.0344, "step": 66730 }, { "epoch": 1.0243266057862022, "grad_norm": 0.3796514570713043, "learning_rate": 1.1316913416138172e-05, "loss": 0.032, "step": 66740 }, { "epoch": 1.0244800859488912, "grad_norm": 0.3573037087917328, "learning_rate": 1.1314257964458417e-05, "loss": 0.0272, "step": 66750 }, { "epoch": 1.0246335661115802, "grad_norm": 0.3344758152961731, "learning_rate": 1.1311602418472546e-05, "loss": 0.0294, "step": 66760 }, { "epoch": 1.024787046274269, "grad_norm": 0.4182240068912506, "learning_rate": 1.130894677837111e-05, "loss": 0.0388, "step": 66770 }, { "epoch": 1.024940526436958, "grad_norm": 0.2819080650806427, "learning_rate": 1.1306291044344674e-05, "loss": 0.0297, "step": 66780 }, { "epoch": 1.025094006599647, "grad_norm": 0.46053966879844666, "learning_rate": 1.1303635216583797e-05, "loss": 0.0398, "step": 66790 }, { "epoch": 1.025247486762336, "grad_norm": 0.34664520621299744, "learning_rate": 1.1300979295279055e-05, "loss": 0.0388, "step": 66800 }, { "epoch": 1.025400966925025, "grad_norm": 0.4084101915359497, "learning_rate": 1.1298323280621024e-05, "loss": 0.03, "step": 66810 }, { "epoch": 1.0255544470877138, "grad_norm": 0.36366915702819824, "learning_rate": 1.1295667172800289e-05, "loss": 0.0345, "step": 66820 }, { "epoch": 1.0257079272504028, "grad_norm": 0.331560879945755, "learning_rate": 1.1293010972007449e-05, "loss": 0.0253, "step": 66830 }, { "epoch": 1.0258614074130918, "grad_norm": 0.2997637987136841, "learning_rate": 1.1290354678433091e-05, "loss": 0.0315, "step": 66840 }, { "epoch": 1.0260148875757809, "grad_norm": 0.2951458990573883, "learning_rate": 1.128769829226783e-05, "loss": 0.0274, "step": 66850 }, { "epoch": 1.0261683677384699, "grad_norm": 0.2257109135389328, "learning_rate": 1.1285041813702274e-05, "loss": 0.0277, "step": 66860 }, { "epoch": 1.0263218479011589, "grad_norm": 0.3953758180141449, "learning_rate": 1.1282385242927038e-05, "loss": 0.0281, "step": 66870 }, { "epoch": 1.0264753280638477, "grad_norm": 0.44774511456489563, "learning_rate": 1.127972858013276e-05, "loss": 0.0329, "step": 66880 }, { "epoch": 1.0266288082265367, "grad_norm": 0.22031496465206146, "learning_rate": 1.1277071825510057e-05, "loss": 0.0252, "step": 66890 }, { "epoch": 1.0267822883892257, "grad_norm": 0.443094402551651, "learning_rate": 1.127441497924958e-05, "loss": 0.0345, "step": 66900 }, { "epoch": 1.0269357685519147, "grad_norm": 0.4659706950187683, "learning_rate": 1.1271758041541965e-05, "loss": 0.0291, "step": 66910 }, { "epoch": 1.0270892487146037, "grad_norm": 0.3732234239578247, "learning_rate": 1.1269101012577866e-05, "loss": 0.0253, "step": 66920 }, { "epoch": 1.0272427288772925, "grad_norm": 0.24528439342975616, "learning_rate": 1.1266443892547945e-05, "loss": 0.0255, "step": 66930 }, { "epoch": 1.0273962090399815, "grad_norm": 0.3617894649505615, "learning_rate": 1.1263786681642861e-05, "loss": 0.0309, "step": 66940 }, { "epoch": 1.0275496892026705, "grad_norm": 0.4005504250526428, "learning_rate": 1.1261129380053291e-05, "loss": 0.0391, "step": 66950 }, { "epoch": 1.0277031693653595, "grad_norm": 0.44426673650741577, "learning_rate": 1.125847198796991e-05, "loss": 0.0289, "step": 66960 }, { "epoch": 1.0278566495280486, "grad_norm": 0.3927651047706604, "learning_rate": 1.12558145055834e-05, "loss": 0.0355, "step": 66970 }, { "epoch": 1.0280101296907376, "grad_norm": 0.5729421973228455, "learning_rate": 1.1253156933084456e-05, "loss": 0.0371, "step": 66980 }, { "epoch": 1.0281636098534264, "grad_norm": 0.3860541582107544, "learning_rate": 1.1250499270663775e-05, "loss": 0.0268, "step": 66990 }, { "epoch": 1.0283170900161154, "grad_norm": 0.36830154061317444, "learning_rate": 1.1247841518512057e-05, "loss": 0.0227, "step": 67000 }, { "epoch": 1.0284705701788044, "grad_norm": 0.31704285740852356, "learning_rate": 1.1245183676820015e-05, "loss": 0.037, "step": 67010 }, { "epoch": 1.0286240503414934, "grad_norm": 0.22495275735855103, "learning_rate": 1.124252574577836e-05, "loss": 0.0262, "step": 67020 }, { "epoch": 1.0287775305041824, "grad_norm": 0.3927800953388214, "learning_rate": 1.1239867725577827e-05, "loss": 0.0235, "step": 67030 }, { "epoch": 1.0289310106668712, "grad_norm": 0.5067737698554993, "learning_rate": 1.1237209616409133e-05, "loss": 0.0353, "step": 67040 }, { "epoch": 1.0290844908295602, "grad_norm": 0.40716779232025146, "learning_rate": 1.1234551418463023e-05, "loss": 0.0335, "step": 67050 }, { "epoch": 1.0292379709922492, "grad_norm": 0.374306857585907, "learning_rate": 1.1231893131930229e-05, "loss": 0.0375, "step": 67060 }, { "epoch": 1.0293914511549382, "grad_norm": 0.4208559989929199, "learning_rate": 1.1229234757001507e-05, "loss": 0.0294, "step": 67070 }, { "epoch": 1.0295449313176273, "grad_norm": 0.3018961250782013, "learning_rate": 1.1226576293867613e-05, "loss": 0.0274, "step": 67080 }, { "epoch": 1.0296984114803163, "grad_norm": 0.500801146030426, "learning_rate": 1.1223917742719304e-05, "loss": 0.0275, "step": 67090 }, { "epoch": 1.029851891643005, "grad_norm": 0.4030153453350067, "learning_rate": 1.1221259103747348e-05, "loss": 0.0292, "step": 67100 }, { "epoch": 1.030005371805694, "grad_norm": 0.37705904245376587, "learning_rate": 1.1218600377142517e-05, "loss": 0.0244, "step": 67110 }, { "epoch": 1.030158851968383, "grad_norm": 0.3232978284358978, "learning_rate": 1.1215941563095597e-05, "loss": 0.0263, "step": 67120 }, { "epoch": 1.030312332131072, "grad_norm": 0.33806300163269043, "learning_rate": 1.121328266179737e-05, "loss": 0.0306, "step": 67130 }, { "epoch": 1.030465812293761, "grad_norm": 0.3381823003292084, "learning_rate": 1.1210623673438627e-05, "loss": 0.0279, "step": 67140 }, { "epoch": 1.03061929245645, "grad_norm": 0.3632696270942688, "learning_rate": 1.120796459821017e-05, "loss": 0.0215, "step": 67150 }, { "epoch": 1.030772772619139, "grad_norm": 0.2504579722881317, "learning_rate": 1.1205305436302803e-05, "loss": 0.0295, "step": 67160 }, { "epoch": 1.030926252781828, "grad_norm": 0.44249871373176575, "learning_rate": 1.1202646187907338e-05, "loss": 0.0237, "step": 67170 }, { "epoch": 1.031079732944517, "grad_norm": 0.5037722587585449, "learning_rate": 1.119998685321459e-05, "loss": 0.0296, "step": 67180 }, { "epoch": 1.031233213107206, "grad_norm": 0.36600613594055176, "learning_rate": 1.1197327432415386e-05, "loss": 0.0338, "step": 67190 }, { "epoch": 1.031386693269895, "grad_norm": 0.3758997917175293, "learning_rate": 1.1194667925700557e-05, "loss": 0.0284, "step": 67200 }, { "epoch": 1.0315401734325838, "grad_norm": 0.319683313369751, "learning_rate": 1.1192008333260933e-05, "loss": 0.0372, "step": 67210 }, { "epoch": 1.0316936535952728, "grad_norm": 0.35821622610092163, "learning_rate": 1.1189348655287359e-05, "loss": 0.0359, "step": 67220 }, { "epoch": 1.0318471337579618, "grad_norm": 0.31606656312942505, "learning_rate": 1.1186688891970686e-05, "loss": 0.0262, "step": 67230 }, { "epoch": 1.0320006139206508, "grad_norm": 0.29760006070137024, "learning_rate": 1.1184029043501763e-05, "loss": 0.0341, "step": 67240 }, { "epoch": 1.0321540940833398, "grad_norm": 0.3095099925994873, "learning_rate": 1.118136911007146e-05, "loss": 0.0298, "step": 67250 }, { "epoch": 1.0323075742460288, "grad_norm": 0.3559892475605011, "learning_rate": 1.1178709091870632e-05, "loss": 0.0447, "step": 67260 }, { "epoch": 1.0324610544087176, "grad_norm": 0.5100675821304321, "learning_rate": 1.1176048989090158e-05, "loss": 0.0362, "step": 67270 }, { "epoch": 1.0326145345714066, "grad_norm": 0.3208209276199341, "learning_rate": 1.1173388801920917e-05, "loss": 0.0281, "step": 67280 }, { "epoch": 1.0327680147340956, "grad_norm": 0.3281417191028595, "learning_rate": 1.1170728530553789e-05, "loss": 0.0365, "step": 67290 }, { "epoch": 1.0329214948967846, "grad_norm": 0.2241702675819397, "learning_rate": 1.1168068175179676e-05, "loss": 0.0256, "step": 67300 }, { "epoch": 1.0330749750594737, "grad_norm": 0.3843107521533966, "learning_rate": 1.1165407735989461e-05, "loss": 0.0335, "step": 67310 }, { "epoch": 1.0332284552221624, "grad_norm": 0.39061298966407776, "learning_rate": 1.1162747213174055e-05, "loss": 0.0323, "step": 67320 }, { "epoch": 1.0333819353848515, "grad_norm": 0.5723792314529419, "learning_rate": 1.1160086606924367e-05, "loss": 0.03, "step": 67330 }, { "epoch": 1.0335354155475405, "grad_norm": 0.262324720621109, "learning_rate": 1.1157425917431308e-05, "loss": 0.0362, "step": 67340 }, { "epoch": 1.0336888957102295, "grad_norm": 0.42352089285850525, "learning_rate": 1.1154765144885802e-05, "loss": 0.0284, "step": 67350 }, { "epoch": 1.0338423758729185, "grad_norm": 0.7338852286338806, "learning_rate": 1.1152104289478775e-05, "loss": 0.0274, "step": 67360 }, { "epoch": 1.0339958560356073, "grad_norm": 0.3014529347419739, "learning_rate": 1.1149443351401159e-05, "loss": 0.028, "step": 67370 }, { "epoch": 1.0341493361982963, "grad_norm": 0.36380892992019653, "learning_rate": 1.1146782330843897e-05, "loss": 0.0281, "step": 67380 }, { "epoch": 1.0343028163609853, "grad_norm": 0.4022427797317505, "learning_rate": 1.1144121227997927e-05, "loss": 0.0335, "step": 67390 }, { "epoch": 1.0344562965236743, "grad_norm": 0.3227323293685913, "learning_rate": 1.1141460043054202e-05, "loss": 0.028, "step": 67400 }, { "epoch": 1.0346097766863633, "grad_norm": 0.5022153258323669, "learning_rate": 1.1138798776203682e-05, "loss": 0.0275, "step": 67410 }, { "epoch": 1.0347632568490523, "grad_norm": 0.4274621307849884, "learning_rate": 1.1136137427637324e-05, "loss": 0.0302, "step": 67420 }, { "epoch": 1.0349167370117411, "grad_norm": 0.39631620049476624, "learning_rate": 1.1133475997546099e-05, "loss": 0.0344, "step": 67430 }, { "epoch": 1.0350702171744302, "grad_norm": 0.30875080823898315, "learning_rate": 1.113081448612098e-05, "loss": 0.0215, "step": 67440 }, { "epoch": 1.0352236973371192, "grad_norm": 0.48494014143943787, "learning_rate": 1.1128152893552949e-05, "loss": 0.0328, "step": 67450 }, { "epoch": 1.0353771774998082, "grad_norm": 0.4372751712799072, "learning_rate": 1.1125491220032989e-05, "loss": 0.0334, "step": 67460 }, { "epoch": 1.0355306576624972, "grad_norm": 0.5091769695281982, "learning_rate": 1.1122829465752092e-05, "loss": 0.0398, "step": 67470 }, { "epoch": 1.0356841378251862, "grad_norm": 0.520053505897522, "learning_rate": 1.1120167630901258e-05, "loss": 0.0301, "step": 67480 }, { "epoch": 1.035837617987875, "grad_norm": 0.3458981215953827, "learning_rate": 1.1117505715671487e-05, "loss": 0.0294, "step": 67490 }, { "epoch": 1.035991098150564, "grad_norm": 0.40389370918273926, "learning_rate": 1.1114843720253789e-05, "loss": 0.0306, "step": 67500 }, { "epoch": 1.036144578313253, "grad_norm": 0.27268925309181213, "learning_rate": 1.1112181644839178e-05, "loss": 0.023, "step": 67510 }, { "epoch": 1.036298058475942, "grad_norm": 0.33135029673576355, "learning_rate": 1.1109519489618672e-05, "loss": 0.0311, "step": 67520 }, { "epoch": 1.036451538638631, "grad_norm": 0.32788464426994324, "learning_rate": 1.1106857254783304e-05, "loss": 0.0256, "step": 67530 }, { "epoch": 1.0366050188013198, "grad_norm": 0.3536458909511566, "learning_rate": 1.11041949405241e-05, "loss": 0.0334, "step": 67540 }, { "epoch": 1.0367584989640088, "grad_norm": 0.3480774164199829, "learning_rate": 1.1101532547032098e-05, "loss": 0.0281, "step": 67550 }, { "epoch": 1.0369119791266979, "grad_norm": 0.3919546604156494, "learning_rate": 1.1098870074498344e-05, "loss": 0.0352, "step": 67560 }, { "epoch": 1.0370654592893869, "grad_norm": 0.40173932909965515, "learning_rate": 1.1096207523113884e-05, "loss": 0.0285, "step": 67570 }, { "epoch": 1.0372189394520759, "grad_norm": 0.29592904448509216, "learning_rate": 1.1093544893069774e-05, "loss": 0.0408, "step": 67580 }, { "epoch": 1.037372419614765, "grad_norm": 0.31149765849113464, "learning_rate": 1.1090882184557075e-05, "loss": 0.0206, "step": 67590 }, { "epoch": 1.0375258997774537, "grad_norm": 0.24379447102546692, "learning_rate": 1.1088219397766849e-05, "loss": 0.0249, "step": 67600 }, { "epoch": 1.0376793799401427, "grad_norm": 0.3671051561832428, "learning_rate": 1.108555653289017e-05, "loss": 0.0304, "step": 67610 }, { "epoch": 1.0378328601028317, "grad_norm": 0.38246893882751465, "learning_rate": 1.1082893590118113e-05, "loss": 0.034, "step": 67620 }, { "epoch": 1.0379863402655207, "grad_norm": 0.46244367957115173, "learning_rate": 1.1080230569641766e-05, "loss": 0.0325, "step": 67630 }, { "epoch": 1.0381398204282097, "grad_norm": 0.39219406247138977, "learning_rate": 1.1077567471652211e-05, "loss": 0.0336, "step": 67640 }, { "epoch": 1.0382933005908985, "grad_norm": 0.32700297236442566, "learning_rate": 1.1074904296340545e-05, "loss": 0.0283, "step": 67650 }, { "epoch": 1.0384467807535875, "grad_norm": 0.2786141335964203, "learning_rate": 1.1072241043897866e-05, "loss": 0.0347, "step": 67660 }, { "epoch": 1.0386002609162766, "grad_norm": 0.39272356033325195, "learning_rate": 1.106957771451528e-05, "loss": 0.0318, "step": 67670 }, { "epoch": 1.0387537410789656, "grad_norm": 0.44168081879615784, "learning_rate": 1.1066914308383895e-05, "loss": 0.0303, "step": 67680 }, { "epoch": 1.0389072212416546, "grad_norm": 0.35080960392951965, "learning_rate": 1.106425082569483e-05, "loss": 0.0246, "step": 67690 }, { "epoch": 1.0390607014043436, "grad_norm": 0.45248714089393616, "learning_rate": 1.1061587266639204e-05, "loss": 0.0294, "step": 67700 }, { "epoch": 1.0392141815670324, "grad_norm": 0.4079968333244324, "learning_rate": 1.1058923631408142e-05, "loss": 0.0296, "step": 67710 }, { "epoch": 1.0393676617297214, "grad_norm": 0.34295496344566345, "learning_rate": 1.1056259920192783e-05, "loss": 0.0361, "step": 67720 }, { "epoch": 1.0395211418924104, "grad_norm": 0.510033905506134, "learning_rate": 1.1053596133184258e-05, "loss": 0.0358, "step": 67730 }, { "epoch": 1.0396746220550994, "grad_norm": 0.35285428166389465, "learning_rate": 1.1050932270573711e-05, "loss": 0.0356, "step": 67740 }, { "epoch": 1.0398281022177884, "grad_norm": 0.2614591717720032, "learning_rate": 1.1048268332552297e-05, "loss": 0.0232, "step": 67750 }, { "epoch": 1.0399815823804772, "grad_norm": 0.37647101283073425, "learning_rate": 1.1045604319311159e-05, "loss": 0.0294, "step": 67760 }, { "epoch": 1.0401350625431662, "grad_norm": 0.38601744174957275, "learning_rate": 1.1042940231041465e-05, "loss": 0.0224, "step": 67770 }, { "epoch": 1.0402885427058552, "grad_norm": 0.33128032088279724, "learning_rate": 1.104027606793438e-05, "loss": 0.0305, "step": 67780 }, { "epoch": 1.0404420228685443, "grad_norm": 0.5595617890357971, "learning_rate": 1.1037611830181065e-05, "loss": 0.0338, "step": 67790 }, { "epoch": 1.0405955030312333, "grad_norm": 0.401523232460022, "learning_rate": 1.103494751797271e-05, "loss": 0.0304, "step": 67800 }, { "epoch": 1.0407489831939223, "grad_norm": 0.5436943769454956, "learning_rate": 1.103228313150048e-05, "loss": 0.0273, "step": 67810 }, { "epoch": 1.040902463356611, "grad_norm": 0.4102357029914856, "learning_rate": 1.1029618670955573e-05, "loss": 0.0306, "step": 67820 }, { "epoch": 1.0410559435193, "grad_norm": 0.4916558563709259, "learning_rate": 1.1026954136529175e-05, "loss": 0.0226, "step": 67830 }, { "epoch": 1.041209423681989, "grad_norm": 0.474084734916687, "learning_rate": 1.1024289528412484e-05, "loss": 0.0281, "step": 67840 }, { "epoch": 1.0413629038446781, "grad_norm": 0.33163005113601685, "learning_rate": 1.1021624846796703e-05, "loss": 0.039, "step": 67850 }, { "epoch": 1.0415163840073671, "grad_norm": 0.4214474558830261, "learning_rate": 1.1018960091873039e-05, "loss": 0.0336, "step": 67860 }, { "epoch": 1.041669864170056, "grad_norm": 0.4329037666320801, "learning_rate": 1.1016295263832699e-05, "loss": 0.0235, "step": 67870 }, { "epoch": 1.041823344332745, "grad_norm": 0.487415075302124, "learning_rate": 1.1013630362866913e-05, "loss": 0.0296, "step": 67880 }, { "epoch": 1.041976824495434, "grad_norm": 0.45736613869667053, "learning_rate": 1.1010965389166892e-05, "loss": 0.0283, "step": 67890 }, { "epoch": 1.042130304658123, "grad_norm": 0.4522298276424408, "learning_rate": 1.1008300342923871e-05, "loss": 0.0253, "step": 67900 }, { "epoch": 1.042283784820812, "grad_norm": 0.3854687511920929, "learning_rate": 1.1005635224329082e-05, "loss": 0.0365, "step": 67910 }, { "epoch": 1.042437264983501, "grad_norm": 0.26578038930892944, "learning_rate": 1.1002970033573766e-05, "loss": 0.0396, "step": 67920 }, { "epoch": 1.0425907451461898, "grad_norm": 0.24122941493988037, "learning_rate": 1.1000304770849164e-05, "loss": 0.0233, "step": 67930 }, { "epoch": 1.0427442253088788, "grad_norm": 0.3657318353652954, "learning_rate": 1.0997639436346522e-05, "loss": 0.0325, "step": 67940 }, { "epoch": 1.0428977054715678, "grad_norm": 0.5192129611968994, "learning_rate": 1.0994974030257097e-05, "loss": 0.033, "step": 67950 }, { "epoch": 1.0430511856342568, "grad_norm": 0.26929667592048645, "learning_rate": 1.0992308552772153e-05, "loss": 0.0288, "step": 67960 }, { "epoch": 1.0432046657969458, "grad_norm": 0.21828299760818481, "learning_rate": 1.0989643004082948e-05, "loss": 0.0306, "step": 67970 }, { "epoch": 1.0433581459596346, "grad_norm": 0.28129395842552185, "learning_rate": 1.0986977384380755e-05, "loss": 0.0253, "step": 67980 }, { "epoch": 1.0435116261223236, "grad_norm": 0.31575995683670044, "learning_rate": 1.0984311693856843e-05, "loss": 0.0305, "step": 67990 }, { "epoch": 1.0436651062850126, "grad_norm": 0.3322039842605591, "learning_rate": 1.0981645932702503e-05, "loss": 0.0261, "step": 68000 }, { "epoch": 1.0438185864477016, "grad_norm": 0.29598233103752136, "learning_rate": 1.097898010110901e-05, "loss": 0.0236, "step": 68010 }, { "epoch": 1.0439720666103907, "grad_norm": 0.4413970112800598, "learning_rate": 1.0976314199267657e-05, "loss": 0.0291, "step": 68020 }, { "epoch": 1.0441255467730797, "grad_norm": 0.5509024858474731, "learning_rate": 1.0973648227369737e-05, "loss": 0.036, "step": 68030 }, { "epoch": 1.0442790269357685, "grad_norm": 0.3661731779575348, "learning_rate": 1.0970982185606555e-05, "loss": 0.0322, "step": 68040 }, { "epoch": 1.0444325070984575, "grad_norm": 0.26728519797325134, "learning_rate": 1.0968316074169414e-05, "loss": 0.0317, "step": 68050 }, { "epoch": 1.0445859872611465, "grad_norm": 0.6238507032394409, "learning_rate": 1.0965649893249619e-05, "loss": 0.0331, "step": 68060 }, { "epoch": 1.0447394674238355, "grad_norm": 0.4204256534576416, "learning_rate": 1.0962983643038493e-05, "loss": 0.0307, "step": 68070 }, { "epoch": 1.0448929475865245, "grad_norm": 0.4252377450466156, "learning_rate": 1.0960317323727345e-05, "loss": 0.0285, "step": 68080 }, { "epoch": 1.0450464277492135, "grad_norm": 0.45809584856033325, "learning_rate": 1.095765093550751e-05, "loss": 0.0276, "step": 68090 }, { "epoch": 1.0451999079119023, "grad_norm": 0.39446499943733215, "learning_rate": 1.0954984478570316e-05, "loss": 0.0296, "step": 68100 }, { "epoch": 1.0453533880745913, "grad_norm": 0.4995425045490265, "learning_rate": 1.0952317953107095e-05, "loss": 0.0303, "step": 68110 }, { "epoch": 1.0455068682372803, "grad_norm": 0.5070648193359375, "learning_rate": 1.0949651359309186e-05, "loss": 0.0271, "step": 68120 }, { "epoch": 1.0456603483999694, "grad_norm": 0.4156457483768463, "learning_rate": 1.0946984697367933e-05, "loss": 0.0307, "step": 68130 }, { "epoch": 1.0458138285626584, "grad_norm": 0.4469476342201233, "learning_rate": 1.0944317967474691e-05, "loss": 0.0253, "step": 68140 }, { "epoch": 1.0459673087253472, "grad_norm": 0.36629748344421387, "learning_rate": 1.094165116982081e-05, "loss": 0.0439, "step": 68150 }, { "epoch": 1.0461207888880362, "grad_norm": 0.38295286893844604, "learning_rate": 1.0938984304597648e-05, "loss": 0.0303, "step": 68160 }, { "epoch": 1.0462742690507252, "grad_norm": 0.4879415035247803, "learning_rate": 1.0936317371996576e-05, "loss": 0.0399, "step": 68170 }, { "epoch": 1.0464277492134142, "grad_norm": 0.35897204279899597, "learning_rate": 1.0933650372208952e-05, "loss": 0.034, "step": 68180 }, { "epoch": 1.0465812293761032, "grad_norm": 0.36630943417549133, "learning_rate": 1.0930983305426162e-05, "loss": 0.0322, "step": 68190 }, { "epoch": 1.0467347095387922, "grad_norm": 0.4241332411766052, "learning_rate": 1.0928316171839575e-05, "loss": 0.034, "step": 68200 }, { "epoch": 1.046888189701481, "grad_norm": 0.3290940225124359, "learning_rate": 1.0925648971640575e-05, "loss": 0.0257, "step": 68210 }, { "epoch": 1.04704166986417, "grad_norm": 0.5059393644332886, "learning_rate": 1.0922981705020557e-05, "loss": 0.0391, "step": 68220 }, { "epoch": 1.047195150026859, "grad_norm": 0.6108510494232178, "learning_rate": 1.0920314372170904e-05, "loss": 0.0358, "step": 68230 }, { "epoch": 1.047348630189548, "grad_norm": 0.4577643871307373, "learning_rate": 1.0917646973283023e-05, "loss": 0.0371, "step": 68240 }, { "epoch": 1.047502110352237, "grad_norm": 0.3410022556781769, "learning_rate": 1.0914979508548311e-05, "loss": 0.0286, "step": 68250 }, { "epoch": 1.0476555905149258, "grad_norm": 0.5413411855697632, "learning_rate": 1.0912311978158175e-05, "loss": 0.0271, "step": 68260 }, { "epoch": 1.0478090706776149, "grad_norm": 0.44109585881233215, "learning_rate": 1.090964438230403e-05, "loss": 0.0378, "step": 68270 }, { "epoch": 1.0479625508403039, "grad_norm": 0.40454331040382385, "learning_rate": 1.0906976721177289e-05, "loss": 0.0316, "step": 68280 }, { "epoch": 1.0481160310029929, "grad_norm": 0.20865824818611145, "learning_rate": 1.0904308994969373e-05, "loss": 0.0309, "step": 68290 }, { "epoch": 1.048269511165682, "grad_norm": 0.35532012581825256, "learning_rate": 1.0901641203871714e-05, "loss": 0.0325, "step": 68300 }, { "epoch": 1.048422991328371, "grad_norm": 0.6310229301452637, "learning_rate": 1.0898973348075735e-05, "loss": 0.0315, "step": 68310 }, { "epoch": 1.0485764714910597, "grad_norm": 0.3116258680820465, "learning_rate": 1.0896305427772873e-05, "loss": 0.021, "step": 68320 }, { "epoch": 1.0487299516537487, "grad_norm": 0.4262663722038269, "learning_rate": 1.089363744315457e-05, "loss": 0.0332, "step": 68330 }, { "epoch": 1.0488834318164377, "grad_norm": 0.37248536944389343, "learning_rate": 1.0890969394412267e-05, "loss": 0.0332, "step": 68340 }, { "epoch": 1.0490369119791267, "grad_norm": 0.47582709789276123, "learning_rate": 1.0888301281737419e-05, "loss": 0.0322, "step": 68350 }, { "epoch": 1.0491903921418158, "grad_norm": 0.3451847434043884, "learning_rate": 1.0885633105321474e-05, "loss": 0.0288, "step": 68360 }, { "epoch": 1.0493438723045045, "grad_norm": 0.6020116209983826, "learning_rate": 1.088296486535589e-05, "loss": 0.034, "step": 68370 }, { "epoch": 1.0494973524671936, "grad_norm": 0.3936903774738312, "learning_rate": 1.0880296562032133e-05, "loss": 0.0295, "step": 68380 }, { "epoch": 1.0496508326298826, "grad_norm": 0.4112843871116638, "learning_rate": 1.0877628195541665e-05, "loss": 0.023, "step": 68390 }, { "epoch": 1.0498043127925716, "grad_norm": 0.42187342047691345, "learning_rate": 1.0874959766075967e-05, "loss": 0.0401, "step": 68400 }, { "epoch": 1.0499577929552606, "grad_norm": 0.41925206780433655, "learning_rate": 1.0872291273826505e-05, "loss": 0.0303, "step": 68410 }, { "epoch": 1.0501112731179496, "grad_norm": 0.4265116751194, "learning_rate": 1.0869622718984766e-05, "loss": 0.0348, "step": 68420 }, { "epoch": 1.0502647532806384, "grad_norm": 0.5345647931098938, "learning_rate": 1.0866954101742235e-05, "loss": 0.0303, "step": 68430 }, { "epoch": 1.0504182334433274, "grad_norm": 0.3903912603855133, "learning_rate": 1.0864285422290398e-05, "loss": 0.0257, "step": 68440 }, { "epoch": 1.0505717136060164, "grad_norm": 0.31991642713546753, "learning_rate": 1.0861616680820753e-05, "loss": 0.0358, "step": 68450 }, { "epoch": 1.0507251937687054, "grad_norm": 0.46554380655288696, "learning_rate": 1.0858947877524797e-05, "loss": 0.0336, "step": 68460 }, { "epoch": 1.0508786739313944, "grad_norm": 0.5279620289802551, "learning_rate": 1.0856279012594034e-05, "loss": 0.0315, "step": 68470 }, { "epoch": 1.0510321540940832, "grad_norm": 0.38243547081947327, "learning_rate": 1.085361008621997e-05, "loss": 0.0293, "step": 68480 }, { "epoch": 1.0511856342567722, "grad_norm": 0.4018777310848236, "learning_rate": 1.0850941098594117e-05, "loss": 0.0348, "step": 68490 }, { "epoch": 1.0513391144194613, "grad_norm": 0.4155927300453186, "learning_rate": 1.0848272049907994e-05, "loss": 0.0343, "step": 68500 }, { "epoch": 1.0514925945821503, "grad_norm": 0.5267933011054993, "learning_rate": 1.0845602940353117e-05, "loss": 0.0347, "step": 68510 }, { "epoch": 1.0516460747448393, "grad_norm": 0.3892870247364044, "learning_rate": 1.0842933770121015e-05, "loss": 0.0371, "step": 68520 }, { "epoch": 1.0517995549075283, "grad_norm": 0.3672889471054077, "learning_rate": 1.0840264539403219e-05, "loss": 0.0305, "step": 68530 }, { "epoch": 1.051953035070217, "grad_norm": 0.38667333126068115, "learning_rate": 1.0837595248391257e-05, "loss": 0.0269, "step": 68540 }, { "epoch": 1.052106515232906, "grad_norm": 0.3075660765171051, "learning_rate": 1.0834925897276672e-05, "loss": 0.0248, "step": 68550 }, { "epoch": 1.0522599953955951, "grad_norm": 0.43686944246292114, "learning_rate": 1.0832256486251002e-05, "loss": 0.0344, "step": 68560 }, { "epoch": 1.0524134755582841, "grad_norm": 0.31121259927749634, "learning_rate": 1.0829587015505799e-05, "loss": 0.0302, "step": 68570 }, { "epoch": 1.0525669557209731, "grad_norm": 0.4777407646179199, "learning_rate": 1.0826917485232608e-05, "loss": 0.0276, "step": 68580 }, { "epoch": 1.052720435883662, "grad_norm": 0.42358121275901794, "learning_rate": 1.082424789562299e-05, "loss": 0.0271, "step": 68590 }, { "epoch": 1.052873916046351, "grad_norm": 0.3603442907333374, "learning_rate": 1.0821578246868503e-05, "loss": 0.0307, "step": 68600 }, { "epoch": 1.05302739620904, "grad_norm": 0.4020305871963501, "learning_rate": 1.0818908539160709e-05, "loss": 0.03, "step": 68610 }, { "epoch": 1.053180876371729, "grad_norm": 0.27574947476387024, "learning_rate": 1.0816238772691175e-05, "loss": 0.0318, "step": 68620 }, { "epoch": 1.053334356534418, "grad_norm": 0.4420788884162903, "learning_rate": 1.0813568947651474e-05, "loss": 0.0347, "step": 68630 }, { "epoch": 1.053487836697107, "grad_norm": 0.34153929352760315, "learning_rate": 1.0810899064233187e-05, "loss": 0.0321, "step": 68640 }, { "epoch": 1.0536413168597958, "grad_norm": 0.5010422468185425, "learning_rate": 1.0808229122627889e-05, "loss": 0.0347, "step": 68650 }, { "epoch": 1.0537947970224848, "grad_norm": 0.29059329628944397, "learning_rate": 1.080555912302717e-05, "loss": 0.0308, "step": 68660 }, { "epoch": 1.0539482771851738, "grad_norm": 0.25076404213905334, "learning_rate": 1.0802889065622613e-05, "loss": 0.0344, "step": 68670 }, { "epoch": 1.0541017573478628, "grad_norm": 0.41988733410835266, "learning_rate": 1.0800218950605811e-05, "loss": 0.0317, "step": 68680 }, { "epoch": 1.0542552375105518, "grad_norm": 0.46211883425712585, "learning_rate": 1.0797548778168368e-05, "loss": 0.033, "step": 68690 }, { "epoch": 1.0544087176732408, "grad_norm": 0.31022992730140686, "learning_rate": 1.079487854850188e-05, "loss": 0.0309, "step": 68700 }, { "epoch": 1.0545621978359296, "grad_norm": 0.3920014798641205, "learning_rate": 1.0792208261797952e-05, "loss": 0.0352, "step": 68710 }, { "epoch": 1.0547156779986187, "grad_norm": 0.3365005850791931, "learning_rate": 1.0789537918248201e-05, "loss": 0.0261, "step": 68720 }, { "epoch": 1.0548691581613077, "grad_norm": 0.42084944248199463, "learning_rate": 1.0786867518044229e-05, "loss": 0.03, "step": 68730 }, { "epoch": 1.0550226383239967, "grad_norm": 0.3973854184150696, "learning_rate": 1.0784197061377662e-05, "loss": 0.0331, "step": 68740 }, { "epoch": 1.0551761184866857, "grad_norm": 0.4111166298389435, "learning_rate": 1.078152654844012e-05, "loss": 0.0335, "step": 68750 }, { "epoch": 1.0553295986493745, "grad_norm": 0.38928577303886414, "learning_rate": 1.0778855979423226e-05, "loss": 0.0326, "step": 68760 }, { "epoch": 1.0554830788120635, "grad_norm": 0.3389630615711212, "learning_rate": 1.0776185354518616e-05, "loss": 0.0366, "step": 68770 }, { "epoch": 1.0556365589747525, "grad_norm": 0.47385576367378235, "learning_rate": 1.0773514673917915e-05, "loss": 0.0312, "step": 68780 }, { "epoch": 1.0557900391374415, "grad_norm": 0.3611661195755005, "learning_rate": 1.077084393781277e-05, "loss": 0.0335, "step": 68790 }, { "epoch": 1.0559435193001305, "grad_norm": 0.40534475445747375, "learning_rate": 1.0768173146394816e-05, "loss": 0.0349, "step": 68800 }, { "epoch": 1.0560969994628193, "grad_norm": 0.3650763928890228, "learning_rate": 1.0765502299855699e-05, "loss": 0.0249, "step": 68810 }, { "epoch": 1.0562504796255083, "grad_norm": 0.3683253526687622, "learning_rate": 1.0762831398387074e-05, "loss": 0.0263, "step": 68820 }, { "epoch": 1.0564039597881973, "grad_norm": 0.37528181076049805, "learning_rate": 1.0760160442180592e-05, "loss": 0.0296, "step": 68830 }, { "epoch": 1.0565574399508864, "grad_norm": 0.33835625648498535, "learning_rate": 1.0757489431427905e-05, "loss": 0.0281, "step": 68840 }, { "epoch": 1.0567109201135754, "grad_norm": 0.5642163753509521, "learning_rate": 1.0754818366320687e-05, "loss": 0.0327, "step": 68850 }, { "epoch": 1.0568644002762644, "grad_norm": 0.448912113904953, "learning_rate": 1.075214724705059e-05, "loss": 0.0295, "step": 68860 }, { "epoch": 1.0570178804389532, "grad_norm": 0.3581378161907196, "learning_rate": 1.0749476073809293e-05, "loss": 0.0234, "step": 68870 }, { "epoch": 1.0571713606016422, "grad_norm": 0.3553033173084259, "learning_rate": 1.0746804846788464e-05, "loss": 0.027, "step": 68880 }, { "epoch": 1.0573248407643312, "grad_norm": 0.36260345578193665, "learning_rate": 1.074413356617978e-05, "loss": 0.028, "step": 68890 }, { "epoch": 1.0574783209270202, "grad_norm": 0.4353562295436859, "learning_rate": 1.0741462232174926e-05, "loss": 0.0285, "step": 68900 }, { "epoch": 1.0576318010897092, "grad_norm": 0.39580443501472473, "learning_rate": 1.0738790844965581e-05, "loss": 0.033, "step": 68910 }, { "epoch": 1.0577852812523982, "grad_norm": 0.285158634185791, "learning_rate": 1.073611940474344e-05, "loss": 0.0272, "step": 68920 }, { "epoch": 1.057938761415087, "grad_norm": 0.25624212622642517, "learning_rate": 1.073344791170019e-05, "loss": 0.0264, "step": 68930 }, { "epoch": 1.058092241577776, "grad_norm": 0.3804348409175873, "learning_rate": 1.0730776366027526e-05, "loss": 0.035, "step": 68940 }, { "epoch": 1.058245721740465, "grad_norm": 0.33064427971839905, "learning_rate": 1.0728104767917154e-05, "loss": 0.0323, "step": 68950 }, { "epoch": 1.058399201903154, "grad_norm": 0.4949459433555603, "learning_rate": 1.0725433117560773e-05, "loss": 0.0266, "step": 68960 }, { "epoch": 1.058552682065843, "grad_norm": 0.4540189802646637, "learning_rate": 1.0722761415150094e-05, "loss": 0.0305, "step": 68970 }, { "epoch": 1.0587061622285319, "grad_norm": 0.5020765066146851, "learning_rate": 1.0720089660876824e-05, "loss": 0.0268, "step": 68980 }, { "epoch": 1.0588596423912209, "grad_norm": 0.3934943974018097, "learning_rate": 1.071741785493268e-05, "loss": 0.0395, "step": 68990 }, { "epoch": 1.05901312255391, "grad_norm": 0.4365004599094391, "learning_rate": 1.071474599750938e-05, "loss": 0.0363, "step": 69000 }, { "epoch": 1.059166602716599, "grad_norm": 0.3430125117301941, "learning_rate": 1.0712074088798647e-05, "loss": 0.0293, "step": 69010 }, { "epoch": 1.059320082879288, "grad_norm": 0.40487098693847656, "learning_rate": 1.0709402128992208e-05, "loss": 0.0312, "step": 69020 }, { "epoch": 1.059473563041977, "grad_norm": 0.6307082772254944, "learning_rate": 1.0706730118281789e-05, "loss": 0.0409, "step": 69030 }, { "epoch": 1.0596270432046657, "grad_norm": 0.5058222413063049, "learning_rate": 1.0704058056859126e-05, "loss": 0.0348, "step": 69040 }, { "epoch": 1.0597805233673547, "grad_norm": 0.3813762068748474, "learning_rate": 1.0701385944915954e-05, "loss": 0.04, "step": 69050 }, { "epoch": 1.0599340035300437, "grad_norm": 0.4618665874004364, "learning_rate": 1.0698713782644017e-05, "loss": 0.0383, "step": 69060 }, { "epoch": 1.0600874836927328, "grad_norm": 0.5121057033538818, "learning_rate": 1.0696041570235056e-05, "loss": 0.0333, "step": 69070 }, { "epoch": 1.0602409638554218, "grad_norm": 0.33516430854797363, "learning_rate": 1.0693369307880817e-05, "loss": 0.0285, "step": 69080 }, { "epoch": 1.0603944440181106, "grad_norm": 0.5526232123374939, "learning_rate": 1.0690696995773058e-05, "loss": 0.0234, "step": 69090 }, { "epoch": 1.0605479241807996, "grad_norm": 0.4623967111110687, "learning_rate": 1.0688024634103527e-05, "loss": 0.0307, "step": 69100 }, { "epoch": 1.0607014043434886, "grad_norm": 0.292483925819397, "learning_rate": 1.0685352223063987e-05, "loss": 0.0343, "step": 69110 }, { "epoch": 1.0608548845061776, "grad_norm": 0.32120078802108765, "learning_rate": 1.0682679762846198e-05, "loss": 0.0379, "step": 69120 }, { "epoch": 1.0610083646688666, "grad_norm": 0.30017662048339844, "learning_rate": 1.0680007253641922e-05, "loss": 0.0334, "step": 69130 }, { "epoch": 1.0611618448315556, "grad_norm": 0.36830052733421326, "learning_rate": 1.0677334695642937e-05, "loss": 0.0321, "step": 69140 }, { "epoch": 1.0613153249942444, "grad_norm": 0.29910731315612793, "learning_rate": 1.0674662089041009e-05, "loss": 0.0322, "step": 69150 }, { "epoch": 1.0614688051569334, "grad_norm": 0.35866305232048035, "learning_rate": 1.0671989434027912e-05, "loss": 0.0369, "step": 69160 }, { "epoch": 1.0616222853196224, "grad_norm": 0.47759467363357544, "learning_rate": 1.066931673079543e-05, "loss": 0.0344, "step": 69170 }, { "epoch": 1.0617757654823115, "grad_norm": 0.3991800546646118, "learning_rate": 1.0666643979535344e-05, "loss": 0.0244, "step": 69180 }, { "epoch": 1.0619292456450005, "grad_norm": 0.4849860370159149, "learning_rate": 1.0663971180439443e-05, "loss": 0.0391, "step": 69190 }, { "epoch": 1.0620827258076893, "grad_norm": 0.4793962240219116, "learning_rate": 1.0661298333699514e-05, "loss": 0.0399, "step": 69200 }, { "epoch": 1.0622362059703783, "grad_norm": 0.29289376735687256, "learning_rate": 1.0658625439507351e-05, "loss": 0.0294, "step": 69210 }, { "epoch": 1.0623896861330673, "grad_norm": 0.4236217439174652, "learning_rate": 1.0655952498054754e-05, "loss": 0.0222, "step": 69220 }, { "epoch": 1.0625431662957563, "grad_norm": 0.7032498121261597, "learning_rate": 1.0653279509533514e-05, "loss": 0.0385, "step": 69230 }, { "epoch": 1.0626966464584453, "grad_norm": 0.5818939805030823, "learning_rate": 1.0650606474135443e-05, "loss": 0.0351, "step": 69240 }, { "epoch": 1.0628501266211343, "grad_norm": 0.22330336272716522, "learning_rate": 1.0647933392052346e-05, "loss": 0.0253, "step": 69250 }, { "epoch": 1.063003606783823, "grad_norm": 0.3315805494785309, "learning_rate": 1.0645260263476029e-05, "loss": 0.0297, "step": 69260 }, { "epoch": 1.0631570869465121, "grad_norm": 0.3370247185230255, "learning_rate": 1.0642587088598313e-05, "loss": 0.0315, "step": 69270 }, { "epoch": 1.0633105671092011, "grad_norm": 0.3209240138530731, "learning_rate": 1.0639913867611006e-05, "loss": 0.0289, "step": 69280 }, { "epoch": 1.0634640472718901, "grad_norm": 0.3728889524936676, "learning_rate": 1.0637240600705934e-05, "loss": 0.0303, "step": 69290 }, { "epoch": 1.0636175274345792, "grad_norm": 0.3641647398471832, "learning_rate": 1.0634567288074919e-05, "loss": 0.0222, "step": 69300 }, { "epoch": 1.0637710075972682, "grad_norm": 0.307584673166275, "learning_rate": 1.0631893929909786e-05, "loss": 0.0335, "step": 69310 }, { "epoch": 1.063924487759957, "grad_norm": 0.4718204140663147, "learning_rate": 1.062922052640237e-05, "loss": 0.0274, "step": 69320 }, { "epoch": 1.064077967922646, "grad_norm": 0.3128567337989807, "learning_rate": 1.0626547077744498e-05, "loss": 0.0277, "step": 69330 }, { "epoch": 1.064231448085335, "grad_norm": 0.3344399929046631, "learning_rate": 1.0623873584128006e-05, "loss": 0.0302, "step": 69340 }, { "epoch": 1.064384928248024, "grad_norm": 0.27125614881515503, "learning_rate": 1.0621200045744744e-05, "loss": 0.0272, "step": 69350 }, { "epoch": 1.064538408410713, "grad_norm": 0.4155939221382141, "learning_rate": 1.061852646278654e-05, "loss": 0.0371, "step": 69360 }, { "epoch": 1.0646918885734018, "grad_norm": 0.3687344789505005, "learning_rate": 1.0615852835445253e-05, "loss": 0.0326, "step": 69370 }, { "epoch": 1.0648453687360908, "grad_norm": 0.38313624262809753, "learning_rate": 1.0613179163912723e-05, "loss": 0.0256, "step": 69380 }, { "epoch": 1.0649988488987798, "grad_norm": 0.6378171443939209, "learning_rate": 1.0610505448380807e-05, "loss": 0.0328, "step": 69390 }, { "epoch": 1.0651523290614688, "grad_norm": 0.4896961450576782, "learning_rate": 1.0607831689041364e-05, "loss": 0.0317, "step": 69400 }, { "epoch": 1.0653058092241579, "grad_norm": 0.42178747057914734, "learning_rate": 1.0605157886086244e-05, "loss": 0.0291, "step": 69410 }, { "epoch": 1.0654592893868466, "grad_norm": 0.2984624207019806, "learning_rate": 1.0602484039707317e-05, "loss": 0.0298, "step": 69420 }, { "epoch": 1.0656127695495357, "grad_norm": 0.3683774471282959, "learning_rate": 1.0599810150096441e-05, "loss": 0.0316, "step": 69430 }, { "epoch": 1.0657662497122247, "grad_norm": 0.44521743059158325, "learning_rate": 1.0597136217445488e-05, "loss": 0.0358, "step": 69440 }, { "epoch": 1.0659197298749137, "grad_norm": 0.36038798093795776, "learning_rate": 1.0594462241946333e-05, "loss": 0.0278, "step": 69450 }, { "epoch": 1.0660732100376027, "grad_norm": 0.3814501166343689, "learning_rate": 1.059178822379084e-05, "loss": 0.0251, "step": 69460 }, { "epoch": 1.0662266902002917, "grad_norm": 0.2882457375526428, "learning_rate": 1.0589114163170895e-05, "loss": 0.0302, "step": 69470 }, { "epoch": 1.0663801703629805, "grad_norm": 0.29546383023262024, "learning_rate": 1.0586440060278375e-05, "loss": 0.028, "step": 69480 }, { "epoch": 1.0665336505256695, "grad_norm": 0.4899384081363678, "learning_rate": 1.0583765915305164e-05, "loss": 0.0271, "step": 69490 }, { "epoch": 1.0666871306883585, "grad_norm": 0.4149687886238098, "learning_rate": 1.0581091728443147e-05, "loss": 0.033, "step": 69500 }, { "epoch": 1.0668406108510475, "grad_norm": 0.34393447637557983, "learning_rate": 1.0578417499884215e-05, "loss": 0.0308, "step": 69510 }, { "epoch": 1.0669940910137365, "grad_norm": 0.35261985659599304, "learning_rate": 1.0575743229820261e-05, "loss": 0.0277, "step": 69520 }, { "epoch": 1.0671475711764256, "grad_norm": 0.43591177463531494, "learning_rate": 1.0573068918443181e-05, "loss": 0.0298, "step": 69530 }, { "epoch": 1.0673010513391143, "grad_norm": 0.49369508028030396, "learning_rate": 1.057039456594487e-05, "loss": 0.0339, "step": 69540 }, { "epoch": 1.0674545315018034, "grad_norm": 0.3581610918045044, "learning_rate": 1.0567720172517228e-05, "loss": 0.0277, "step": 69550 }, { "epoch": 1.0676080116644924, "grad_norm": 0.32083940505981445, "learning_rate": 1.0565045738352167e-05, "loss": 0.0293, "step": 69560 }, { "epoch": 1.0677614918271814, "grad_norm": 0.30025458335876465, "learning_rate": 1.0562371263641588e-05, "loss": 0.0314, "step": 69570 }, { "epoch": 1.0679149719898704, "grad_norm": 0.3289816975593567, "learning_rate": 1.0559696748577404e-05, "loss": 0.0328, "step": 69580 }, { "epoch": 1.0680684521525592, "grad_norm": 0.33719614148139954, "learning_rate": 1.0557022193351526e-05, "loss": 0.0313, "step": 69590 }, { "epoch": 1.0682219323152482, "grad_norm": 0.43421006202697754, "learning_rate": 1.0554347598155865e-05, "loss": 0.0338, "step": 69600 }, { "epoch": 1.0683754124779372, "grad_norm": 0.4000418186187744, "learning_rate": 1.055167296318235e-05, "loss": 0.0326, "step": 69610 }, { "epoch": 1.0685288926406262, "grad_norm": 0.3261450231075287, "learning_rate": 1.0548998288622895e-05, "loss": 0.0246, "step": 69620 }, { "epoch": 1.0686823728033152, "grad_norm": 0.39266300201416016, "learning_rate": 1.0546323574669427e-05, "loss": 0.0304, "step": 69630 }, { "epoch": 1.068835852966004, "grad_norm": 0.4222174882888794, "learning_rate": 1.0543648821513877e-05, "loss": 0.0328, "step": 69640 }, { "epoch": 1.068989333128693, "grad_norm": 0.42888057231903076, "learning_rate": 1.0540974029348167e-05, "loss": 0.0252, "step": 69650 }, { "epoch": 1.069142813291382, "grad_norm": 0.340469628572464, "learning_rate": 1.0538299198364236e-05, "loss": 0.0247, "step": 69660 }, { "epoch": 1.069296293454071, "grad_norm": 0.34227657318115234, "learning_rate": 1.0535624328754017e-05, "loss": 0.0253, "step": 69670 }, { "epoch": 1.06944977361676, "grad_norm": 0.6576254367828369, "learning_rate": 1.0532949420709448e-05, "loss": 0.0351, "step": 69680 }, { "epoch": 1.069603253779449, "grad_norm": 0.39471057057380676, "learning_rate": 1.0530274474422475e-05, "loss": 0.0301, "step": 69690 }, { "epoch": 1.0697567339421379, "grad_norm": 0.38915619254112244, "learning_rate": 1.0527599490085035e-05, "loss": 0.0348, "step": 69700 }, { "epoch": 1.069910214104827, "grad_norm": 0.5017032027244568, "learning_rate": 1.0524924467889078e-05, "loss": 0.0331, "step": 69710 }, { "epoch": 1.070063694267516, "grad_norm": 0.2224217802286148, "learning_rate": 1.0522249408026553e-05, "loss": 0.0282, "step": 69720 }, { "epoch": 1.070217174430205, "grad_norm": 0.40883034467697144, "learning_rate": 1.0519574310689409e-05, "loss": 0.0312, "step": 69730 }, { "epoch": 1.070370654592894, "grad_norm": 0.4976084530353546, "learning_rate": 1.051689917606961e-05, "loss": 0.0384, "step": 69740 }, { "epoch": 1.070524134755583, "grad_norm": 0.35581228137016296, "learning_rate": 1.0514224004359103e-05, "loss": 0.0302, "step": 69750 }, { "epoch": 1.0706776149182717, "grad_norm": 0.4587084949016571, "learning_rate": 1.0511548795749851e-05, "loss": 0.0314, "step": 69760 }, { "epoch": 1.0708310950809607, "grad_norm": 0.4328395128250122, "learning_rate": 1.0508873550433822e-05, "loss": 0.0282, "step": 69770 }, { "epoch": 1.0709845752436498, "grad_norm": 0.29508036375045776, "learning_rate": 1.0506198268602974e-05, "loss": 0.0232, "step": 69780 }, { "epoch": 1.0711380554063388, "grad_norm": 0.28165802359580994, "learning_rate": 1.050352295044928e-05, "loss": 0.0325, "step": 69790 }, { "epoch": 1.0712915355690278, "grad_norm": 0.38192427158355713, "learning_rate": 1.0500847596164712e-05, "loss": 0.0304, "step": 69800 }, { "epoch": 1.0714450157317166, "grad_norm": 0.36464500427246094, "learning_rate": 1.0498172205941234e-05, "loss": 0.0266, "step": 69810 }, { "epoch": 1.0715984958944056, "grad_norm": 0.46837687492370605, "learning_rate": 1.0495496779970834e-05, "loss": 0.0307, "step": 69820 }, { "epoch": 1.0717519760570946, "grad_norm": 0.3773486912250519, "learning_rate": 1.0492821318445479e-05, "loss": 0.0358, "step": 69830 }, { "epoch": 1.0719054562197836, "grad_norm": 0.4798901081085205, "learning_rate": 1.0490145821557157e-05, "loss": 0.0277, "step": 69840 }, { "epoch": 1.0720589363824726, "grad_norm": 0.2527330815792084, "learning_rate": 1.048747028949785e-05, "loss": 0.028, "step": 69850 }, { "epoch": 1.0722124165451616, "grad_norm": 0.328416109085083, "learning_rate": 1.048479472245954e-05, "loss": 0.0256, "step": 69860 }, { "epoch": 1.0723658967078504, "grad_norm": 0.33128947019577026, "learning_rate": 1.0482119120634222e-05, "loss": 0.036, "step": 69870 }, { "epoch": 1.0725193768705394, "grad_norm": 0.3920679986476898, "learning_rate": 1.0479443484213883e-05, "loss": 0.0334, "step": 69880 }, { "epoch": 1.0726728570332285, "grad_norm": 0.3626120388507843, "learning_rate": 1.0476767813390515e-05, "loss": 0.0262, "step": 69890 }, { "epoch": 1.0728263371959175, "grad_norm": 0.5261233448982239, "learning_rate": 1.047409210835612e-05, "loss": 0.0308, "step": 69900 }, { "epoch": 1.0729798173586065, "grad_norm": 0.3897087872028351, "learning_rate": 1.0471416369302689e-05, "loss": 0.0244, "step": 69910 }, { "epoch": 1.0731332975212953, "grad_norm": 0.4817069172859192, "learning_rate": 1.0468740596422227e-05, "loss": 0.0243, "step": 69920 }, { "epoch": 1.0732867776839843, "grad_norm": 0.24458378553390503, "learning_rate": 1.0466064789906737e-05, "loss": 0.0262, "step": 69930 }, { "epoch": 1.0734402578466733, "grad_norm": 0.34959837794303894, "learning_rate": 1.0463388949948224e-05, "loss": 0.0267, "step": 69940 }, { "epoch": 1.0735937380093623, "grad_norm": 0.2075577825307846, "learning_rate": 1.0460713076738695e-05, "loss": 0.0318, "step": 69950 }, { "epoch": 1.0737472181720513, "grad_norm": 0.40683186054229736, "learning_rate": 1.0458037170470161e-05, "loss": 0.0214, "step": 69960 }, { "epoch": 1.0739006983347403, "grad_norm": 0.3858768343925476, "learning_rate": 1.0455361231334637e-05, "loss": 0.0286, "step": 69970 }, { "epoch": 1.0740541784974291, "grad_norm": 0.3002776503562927, "learning_rate": 1.0452685259524135e-05, "loss": 0.0327, "step": 69980 }, { "epoch": 1.0742076586601181, "grad_norm": 0.39134684205055237, "learning_rate": 1.0450009255230676e-05, "loss": 0.0311, "step": 69990 }, { "epoch": 1.0743611388228071, "grad_norm": 0.3158460557460785, "learning_rate": 1.0447333218646274e-05, "loss": 0.0286, "step": 70000 }, { "epoch": 1.0745146189854962, "grad_norm": 0.5595977902412415, "learning_rate": 1.0444657149962956e-05, "loss": 0.0347, "step": 70010 }, { "epoch": 1.0746680991481852, "grad_norm": 0.36127620935440063, "learning_rate": 1.0441981049372745e-05, "loss": 0.0331, "step": 70020 }, { "epoch": 1.074821579310874, "grad_norm": 0.3310118317604065, "learning_rate": 1.0439304917067667e-05, "loss": 0.0296, "step": 70030 }, { "epoch": 1.074975059473563, "grad_norm": 0.30487388372421265, "learning_rate": 1.0436628753239755e-05, "loss": 0.0313, "step": 70040 }, { "epoch": 1.075128539636252, "grad_norm": 0.5182196497917175, "learning_rate": 1.0433952558081033e-05, "loss": 0.0344, "step": 70050 }, { "epoch": 1.075282019798941, "grad_norm": 0.3471643328666687, "learning_rate": 1.043127633178354e-05, "loss": 0.0261, "step": 70060 }, { "epoch": 1.07543549996163, "grad_norm": 0.3484376072883606, "learning_rate": 1.0428600074539313e-05, "loss": 0.0363, "step": 70070 }, { "epoch": 1.075588980124319, "grad_norm": 0.5024443864822388, "learning_rate": 1.0425923786540384e-05, "loss": 0.0329, "step": 70080 }, { "epoch": 1.0757424602870078, "grad_norm": 0.3078747093677521, "learning_rate": 1.0423247467978799e-05, "loss": 0.0328, "step": 70090 }, { "epoch": 1.0758959404496968, "grad_norm": 0.21782295405864716, "learning_rate": 1.0420571119046593e-05, "loss": 0.0347, "step": 70100 }, { "epoch": 1.0760494206123858, "grad_norm": 0.49216729402542114, "learning_rate": 1.0417894739935822e-05, "loss": 0.0269, "step": 70110 }, { "epoch": 1.0762029007750749, "grad_norm": 0.3805989623069763, "learning_rate": 1.0415218330838524e-05, "loss": 0.0286, "step": 70120 }, { "epoch": 1.0763563809377639, "grad_norm": 0.44896209239959717, "learning_rate": 1.0412541891946748e-05, "loss": 0.0321, "step": 70130 }, { "epoch": 1.0765098611004529, "grad_norm": 0.3477303981781006, "learning_rate": 1.0409865423452547e-05, "loss": 0.026, "step": 70140 }, { "epoch": 1.0766633412631417, "grad_norm": 0.2895677983760834, "learning_rate": 1.0407188925547975e-05, "loss": 0.0333, "step": 70150 }, { "epoch": 1.0768168214258307, "grad_norm": 0.3180059492588043, "learning_rate": 1.0404512398425086e-05, "loss": 0.0293, "step": 70160 }, { "epoch": 1.0769703015885197, "grad_norm": 0.41515207290649414, "learning_rate": 1.0401835842275937e-05, "loss": 0.0263, "step": 70170 }, { "epoch": 1.0771237817512087, "grad_norm": 0.20987088978290558, "learning_rate": 1.0399159257292588e-05, "loss": 0.0249, "step": 70180 }, { "epoch": 1.0772772619138977, "grad_norm": 0.28237617015838623, "learning_rate": 1.0396482643667105e-05, "loss": 0.0245, "step": 70190 }, { "epoch": 1.0774307420765865, "grad_norm": 0.36227965354919434, "learning_rate": 1.0393806001591543e-05, "loss": 0.0223, "step": 70200 }, { "epoch": 1.0775842222392755, "grad_norm": 0.4151863157749176, "learning_rate": 1.0391129331257972e-05, "loss": 0.0302, "step": 70210 }, { "epoch": 1.0777377024019645, "grad_norm": 0.5267464518547058, "learning_rate": 1.0388452632858458e-05, "loss": 0.0385, "step": 70220 }, { "epoch": 1.0778911825646535, "grad_norm": 0.3479532301425934, "learning_rate": 1.0385775906585074e-05, "loss": 0.0311, "step": 70230 }, { "epoch": 1.0780446627273426, "grad_norm": 0.4309501051902771, "learning_rate": 1.038309915262989e-05, "loss": 0.0278, "step": 70240 }, { "epoch": 1.0781981428900314, "grad_norm": 0.3540056645870209, "learning_rate": 1.0380422371184977e-05, "loss": 0.0292, "step": 70250 }, { "epoch": 1.0783516230527204, "grad_norm": 0.2623140513896942, "learning_rate": 1.0377745562442413e-05, "loss": 0.0292, "step": 70260 }, { "epoch": 1.0785051032154094, "grad_norm": 0.4071733057498932, "learning_rate": 1.0375068726594275e-05, "loss": 0.0362, "step": 70270 }, { "epoch": 1.0786585833780984, "grad_norm": 0.4543304741382599, "learning_rate": 1.0372391863832643e-05, "loss": 0.0291, "step": 70280 }, { "epoch": 1.0788120635407874, "grad_norm": 0.35891878604888916, "learning_rate": 1.03697149743496e-05, "loss": 0.0218, "step": 70290 }, { "epoch": 1.0789655437034764, "grad_norm": 0.4432147145271301, "learning_rate": 1.0367038058337224e-05, "loss": 0.0355, "step": 70300 }, { "epoch": 1.0791190238661652, "grad_norm": 0.3474404513835907, "learning_rate": 1.0364361115987605e-05, "loss": 0.0382, "step": 70310 }, { "epoch": 1.0792725040288542, "grad_norm": 0.28373557329177856, "learning_rate": 1.0361684147492831e-05, "loss": 0.0344, "step": 70320 }, { "epoch": 1.0794259841915432, "grad_norm": 0.31604814529418945, "learning_rate": 1.0359007153044987e-05, "loss": 0.0318, "step": 70330 }, { "epoch": 1.0795794643542322, "grad_norm": 0.4494902193546295, "learning_rate": 1.0356330132836165e-05, "loss": 0.0383, "step": 70340 }, { "epoch": 1.0797329445169213, "grad_norm": 0.5117530822753906, "learning_rate": 1.035365308705846e-05, "loss": 0.0272, "step": 70350 }, { "epoch": 1.0798864246796103, "grad_norm": 0.4667731821537018, "learning_rate": 1.0350976015903962e-05, "loss": 0.0263, "step": 70360 }, { "epoch": 1.080039904842299, "grad_norm": 0.3644067645072937, "learning_rate": 1.0348298919564775e-05, "loss": 0.029, "step": 70370 }, { "epoch": 1.080193385004988, "grad_norm": 0.513512909412384, "learning_rate": 1.0345621798232986e-05, "loss": 0.0275, "step": 70380 }, { "epoch": 1.080346865167677, "grad_norm": 0.4258959889411926, "learning_rate": 1.0342944652100706e-05, "loss": 0.0371, "step": 70390 }, { "epoch": 1.080500345330366, "grad_norm": 0.36411479115486145, "learning_rate": 1.0340267481360031e-05, "loss": 0.0326, "step": 70400 }, { "epoch": 1.080653825493055, "grad_norm": 0.4465479254722595, "learning_rate": 1.0337590286203063e-05, "loss": 0.0331, "step": 70410 }, { "epoch": 1.080807305655744, "grad_norm": 0.34856918454170227, "learning_rate": 1.0334913066821916e-05, "loss": 0.0291, "step": 70420 }, { "epoch": 1.080960785818433, "grad_norm": 0.33071717619895935, "learning_rate": 1.0332235823408685e-05, "loss": 0.0346, "step": 70430 }, { "epoch": 1.081114265981122, "grad_norm": 0.496346116065979, "learning_rate": 1.0329558556155487e-05, "loss": 0.0264, "step": 70440 }, { "epoch": 1.081267746143811, "grad_norm": 0.28511446714401245, "learning_rate": 1.0326881265254431e-05, "loss": 0.0259, "step": 70450 }, { "epoch": 1.0814212263065, "grad_norm": 0.4381392002105713, "learning_rate": 1.0324203950897627e-05, "loss": 0.0272, "step": 70460 }, { "epoch": 1.0815747064691887, "grad_norm": 0.3643568754196167, "learning_rate": 1.032152661327719e-05, "loss": 0.0313, "step": 70470 }, { "epoch": 1.0817281866318778, "grad_norm": 0.2808644771575928, "learning_rate": 1.0318849252585238e-05, "loss": 0.0257, "step": 70480 }, { "epoch": 1.0818816667945668, "grad_norm": 0.33704861998558044, "learning_rate": 1.0316171869013883e-05, "loss": 0.026, "step": 70490 }, { "epoch": 1.0820351469572558, "grad_norm": 0.4213992953300476, "learning_rate": 1.0313494462755247e-05, "loss": 0.031, "step": 70500 }, { "epoch": 1.0821886271199448, "grad_norm": 0.2778583765029907, "learning_rate": 1.0310817034001449e-05, "loss": 0.0245, "step": 70510 }, { "epoch": 1.0823421072826338, "grad_norm": 0.39102739095687866, "learning_rate": 1.0308139582944614e-05, "loss": 0.0317, "step": 70520 }, { "epoch": 1.0824955874453226, "grad_norm": 0.32046234607696533, "learning_rate": 1.0305462109776863e-05, "loss": 0.0239, "step": 70530 }, { "epoch": 1.0826490676080116, "grad_norm": 0.4449886083602905, "learning_rate": 1.030278461469032e-05, "loss": 0.0246, "step": 70540 }, { "epoch": 1.0828025477707006, "grad_norm": 0.41163504123687744, "learning_rate": 1.0300107097877114e-05, "loss": 0.033, "step": 70550 }, { "epoch": 1.0829560279333896, "grad_norm": 0.2358422428369522, "learning_rate": 1.0297429559529372e-05, "loss": 0.0488, "step": 70560 }, { "epoch": 1.0831095080960786, "grad_norm": 0.33229711651802063, "learning_rate": 1.0294751999839227e-05, "loss": 0.0315, "step": 70570 }, { "epoch": 1.0832629882587677, "grad_norm": 0.22183014452457428, "learning_rate": 1.0292074418998808e-05, "loss": 0.0314, "step": 70580 }, { "epoch": 1.0834164684214564, "grad_norm": 0.41865819692611694, "learning_rate": 1.0289396817200248e-05, "loss": 0.031, "step": 70590 }, { "epoch": 1.0835699485841455, "grad_norm": 0.42891016602516174, "learning_rate": 1.0286719194635679e-05, "loss": 0.0273, "step": 70600 }, { "epoch": 1.0837234287468345, "grad_norm": 0.4165152907371521, "learning_rate": 1.0284041551497242e-05, "loss": 0.0354, "step": 70610 }, { "epoch": 1.0838769089095235, "grad_norm": 0.4418681859970093, "learning_rate": 1.0281363887977071e-05, "loss": 0.0346, "step": 70620 }, { "epoch": 1.0840303890722125, "grad_norm": 0.5238558650016785, "learning_rate": 1.0278686204267308e-05, "loss": 0.029, "step": 70630 }, { "epoch": 1.0841838692349013, "grad_norm": 0.40994375944137573, "learning_rate": 1.0276008500560088e-05, "loss": 0.029, "step": 70640 }, { "epoch": 1.0843373493975903, "grad_norm": 0.45755845308303833, "learning_rate": 1.0273330777047557e-05, "loss": 0.0283, "step": 70650 }, { "epoch": 1.0844908295602793, "grad_norm": 0.25920364260673523, "learning_rate": 1.0270653033921856e-05, "loss": 0.0236, "step": 70660 }, { "epoch": 1.0846443097229683, "grad_norm": 0.3364567756652832, "learning_rate": 1.0267975271375132e-05, "loss": 0.0299, "step": 70670 }, { "epoch": 1.0847977898856573, "grad_norm": 0.42311719059944153, "learning_rate": 1.0265297489599531e-05, "loss": 0.0357, "step": 70680 }, { "epoch": 1.0849512700483464, "grad_norm": 0.5254595279693604, "learning_rate": 1.0262619688787197e-05, "loss": 0.0214, "step": 70690 }, { "epoch": 1.0851047502110351, "grad_norm": 0.38357436656951904, "learning_rate": 1.0259941869130279e-05, "loss": 0.0282, "step": 70700 }, { "epoch": 1.0852582303737242, "grad_norm": 0.25621122121810913, "learning_rate": 1.0257264030820931e-05, "loss": 0.0258, "step": 70710 }, { "epoch": 1.0854117105364132, "grad_norm": 0.3449902832508087, "learning_rate": 1.0254586174051301e-05, "loss": 0.0242, "step": 70720 }, { "epoch": 1.0855651906991022, "grad_norm": 0.32206234335899353, "learning_rate": 1.0251908299013541e-05, "loss": 0.0268, "step": 70730 }, { "epoch": 1.0857186708617912, "grad_norm": 0.3436506390571594, "learning_rate": 1.0249230405899813e-05, "loss": 0.0352, "step": 70740 }, { "epoch": 1.0858721510244802, "grad_norm": 0.35828521847724915, "learning_rate": 1.024655249490226e-05, "loss": 0.0327, "step": 70750 }, { "epoch": 1.086025631187169, "grad_norm": 0.4198165833950043, "learning_rate": 1.0243874566213047e-05, "loss": 0.0358, "step": 70760 }, { "epoch": 1.086179111349858, "grad_norm": 0.42491453886032104, "learning_rate": 1.0241196620024331e-05, "loss": 0.0322, "step": 70770 }, { "epoch": 1.086332591512547, "grad_norm": 0.440173864364624, "learning_rate": 1.023851865652827e-05, "loss": 0.0287, "step": 70780 }, { "epoch": 1.086486071675236, "grad_norm": 0.21384809911251068, "learning_rate": 1.0235840675917026e-05, "loss": 0.0259, "step": 70790 }, { "epoch": 1.086639551837925, "grad_norm": 0.44280147552490234, "learning_rate": 1.0233162678382753e-05, "loss": 0.0247, "step": 70800 }, { "epoch": 1.0867930320006138, "grad_norm": 0.4760541319847107, "learning_rate": 1.0230484664117623e-05, "loss": 0.0351, "step": 70810 }, { "epoch": 1.0869465121633028, "grad_norm": 0.3983308672904968, "learning_rate": 1.0227806633313799e-05, "loss": 0.0231, "step": 70820 }, { "epoch": 1.0870999923259919, "grad_norm": 0.6084511876106262, "learning_rate": 1.0225128586163442e-05, "loss": 0.0317, "step": 70830 }, { "epoch": 1.0872534724886809, "grad_norm": 0.44220563769340515, "learning_rate": 1.0222450522858723e-05, "loss": 0.0257, "step": 70840 }, { "epoch": 1.0874069526513699, "grad_norm": 0.3810437023639679, "learning_rate": 1.0219772443591802e-05, "loss": 0.0308, "step": 70850 }, { "epoch": 1.0875604328140587, "grad_norm": 0.39688682556152344, "learning_rate": 1.0217094348554855e-05, "loss": 0.0234, "step": 70860 }, { "epoch": 1.0877139129767477, "grad_norm": 0.4412108361721039, "learning_rate": 1.0214416237940053e-05, "loss": 0.0253, "step": 70870 }, { "epoch": 1.0878673931394367, "grad_norm": 0.2729331851005554, "learning_rate": 1.0211738111939558e-05, "loss": 0.0353, "step": 70880 }, { "epoch": 1.0880208733021257, "grad_norm": 0.2121928483247757, "learning_rate": 1.0209059970745551e-05, "loss": 0.0226, "step": 70890 }, { "epoch": 1.0881743534648147, "grad_norm": 0.4466598629951477, "learning_rate": 1.0206381814550202e-05, "loss": 0.0391, "step": 70900 }, { "epoch": 1.0883278336275037, "grad_norm": 0.4341365098953247, "learning_rate": 1.0203703643545682e-05, "loss": 0.0346, "step": 70910 }, { "epoch": 1.0884813137901925, "grad_norm": 0.36678948998451233, "learning_rate": 1.0201025457924175e-05, "loss": 0.0257, "step": 70920 }, { "epoch": 1.0886347939528815, "grad_norm": 0.3295556306838989, "learning_rate": 1.0198347257877845e-05, "loss": 0.0253, "step": 70930 }, { "epoch": 1.0887882741155706, "grad_norm": 0.28295862674713135, "learning_rate": 1.019566904359888e-05, "loss": 0.0275, "step": 70940 }, { "epoch": 1.0889417542782596, "grad_norm": 0.32865893840789795, "learning_rate": 1.0192990815279454e-05, "loss": 0.0269, "step": 70950 }, { "epoch": 1.0890952344409486, "grad_norm": 0.4412704408168793, "learning_rate": 1.0190312573111744e-05, "loss": 0.0282, "step": 70960 }, { "epoch": 1.0892487146036376, "grad_norm": 0.6760697364807129, "learning_rate": 1.0187634317287937e-05, "loss": 0.035, "step": 70970 }, { "epoch": 1.0894021947663264, "grad_norm": 0.3442355990409851, "learning_rate": 1.0184956048000208e-05, "loss": 0.0309, "step": 70980 }, { "epoch": 1.0895556749290154, "grad_norm": 0.293568879365921, "learning_rate": 1.0182277765440743e-05, "loss": 0.0293, "step": 70990 }, { "epoch": 1.0897091550917044, "grad_norm": 0.1851864606142044, "learning_rate": 1.0179599469801724e-05, "loss": 0.0265, "step": 71000 }, { "epoch": 1.0898626352543934, "grad_norm": 0.3536299765110016, "learning_rate": 1.0176921161275332e-05, "loss": 0.0242, "step": 71010 }, { "epoch": 1.0900161154170824, "grad_norm": 0.3451089560985565, "learning_rate": 1.0174242840053762e-05, "loss": 0.0338, "step": 71020 }, { "epoch": 1.0901695955797712, "grad_norm": 0.28656840324401855, "learning_rate": 1.0171564506329188e-05, "loss": 0.031, "step": 71030 }, { "epoch": 1.0903230757424602, "grad_norm": 0.3648214340209961, "learning_rate": 1.0168886160293804e-05, "loss": 0.0336, "step": 71040 }, { "epoch": 1.0904765559051492, "grad_norm": 0.473890483379364, "learning_rate": 1.01662078021398e-05, "loss": 0.0278, "step": 71050 }, { "epoch": 1.0906300360678383, "grad_norm": 0.5408678650856018, "learning_rate": 1.0163529432059358e-05, "loss": 0.0354, "step": 71060 }, { "epoch": 1.0907835162305273, "grad_norm": 0.3269411623477936, "learning_rate": 1.0160851050244669e-05, "loss": 0.0294, "step": 71070 }, { "epoch": 1.090936996393216, "grad_norm": 0.4380223751068115, "learning_rate": 1.0158172656887927e-05, "loss": 0.0292, "step": 71080 }, { "epoch": 1.091090476555905, "grad_norm": 0.3540831506252289, "learning_rate": 1.0155494252181322e-05, "loss": 0.0294, "step": 71090 }, { "epoch": 1.091243956718594, "grad_norm": 0.2744330167770386, "learning_rate": 1.0152815836317047e-05, "loss": 0.0242, "step": 71100 }, { "epoch": 1.091397436881283, "grad_norm": 0.4256948232650757, "learning_rate": 1.0150137409487291e-05, "loss": 0.0278, "step": 71110 }, { "epoch": 1.0915509170439721, "grad_norm": 0.34195682406425476, "learning_rate": 1.0147458971884248e-05, "loss": 0.0317, "step": 71120 }, { "epoch": 1.0917043972066611, "grad_norm": 0.4264967143535614, "learning_rate": 1.0144780523700117e-05, "loss": 0.0319, "step": 71130 }, { "epoch": 1.09185787736935, "grad_norm": 0.35671770572662354, "learning_rate": 1.014210206512709e-05, "loss": 0.0282, "step": 71140 }, { "epoch": 1.092011357532039, "grad_norm": 0.4489297568798065, "learning_rate": 1.0139423596357361e-05, "loss": 0.0289, "step": 71150 }, { "epoch": 1.092164837694728, "grad_norm": 0.3888952434062958, "learning_rate": 1.0136745117583136e-05, "loss": 0.0303, "step": 71160 }, { "epoch": 1.092318317857417, "grad_norm": 0.3644949793815613, "learning_rate": 1.0134066628996598e-05, "loss": 0.036, "step": 71170 }, { "epoch": 1.092471798020106, "grad_norm": 0.3678458034992218, "learning_rate": 1.0131388130789956e-05, "loss": 0.0297, "step": 71180 }, { "epoch": 1.092625278182795, "grad_norm": 0.46086543798446655, "learning_rate": 1.0128709623155403e-05, "loss": 0.0287, "step": 71190 }, { "epoch": 1.0927787583454838, "grad_norm": 0.47108131647109985, "learning_rate": 1.0126031106285141e-05, "loss": 0.0377, "step": 71200 }, { "epoch": 1.0929322385081728, "grad_norm": 0.3242071270942688, "learning_rate": 1.0123352580371376e-05, "loss": 0.0291, "step": 71210 }, { "epoch": 1.0930857186708618, "grad_norm": 0.23753614723682404, "learning_rate": 1.0120674045606295e-05, "loss": 0.0279, "step": 71220 }, { "epoch": 1.0932391988335508, "grad_norm": 0.3638492822647095, "learning_rate": 1.0117995502182109e-05, "loss": 0.031, "step": 71230 }, { "epoch": 1.0933926789962398, "grad_norm": 0.38039639592170715, "learning_rate": 1.011531695029102e-05, "loss": 0.0273, "step": 71240 }, { "epoch": 1.0935461591589286, "grad_norm": 0.4385015070438385, "learning_rate": 1.0112638390125226e-05, "loss": 0.0355, "step": 71250 }, { "epoch": 1.0936996393216176, "grad_norm": 0.310846209526062, "learning_rate": 1.0109959821876935e-05, "loss": 0.0234, "step": 71260 }, { "epoch": 1.0938531194843066, "grad_norm": 0.45333436131477356, "learning_rate": 1.0107281245738348e-05, "loss": 0.0276, "step": 71270 }, { "epoch": 1.0940065996469956, "grad_norm": 0.326189249753952, "learning_rate": 1.0104602661901667e-05, "loss": 0.028, "step": 71280 }, { "epoch": 1.0941600798096847, "grad_norm": 0.40739428997039795, "learning_rate": 1.0101924070559106e-05, "loss": 0.029, "step": 71290 }, { "epoch": 1.0943135599723737, "grad_norm": 0.2037872076034546, "learning_rate": 1.0099245471902861e-05, "loss": 0.0235, "step": 71300 }, { "epoch": 1.0944670401350625, "grad_norm": 0.35215210914611816, "learning_rate": 1.0096566866125142e-05, "loss": 0.0243, "step": 71310 }, { "epoch": 1.0946205202977515, "grad_norm": 0.3585691452026367, "learning_rate": 1.0093888253418154e-05, "loss": 0.0335, "step": 71320 }, { "epoch": 1.0947740004604405, "grad_norm": 0.3028140962123871, "learning_rate": 1.0091209633974102e-05, "loss": 0.0305, "step": 71330 }, { "epoch": 1.0949274806231295, "grad_norm": 0.19370919466018677, "learning_rate": 1.0088531007985203e-05, "loss": 0.0287, "step": 71340 }, { "epoch": 1.0950809607858185, "grad_norm": 0.41544851660728455, "learning_rate": 1.0085852375643652e-05, "loss": 0.0299, "step": 71350 }, { "epoch": 1.0952344409485073, "grad_norm": 0.3207491934299469, "learning_rate": 1.0083173737141667e-05, "loss": 0.0348, "step": 71360 }, { "epoch": 1.0953879211111963, "grad_norm": 0.41720065474510193, "learning_rate": 1.0080495092671451e-05, "loss": 0.0269, "step": 71370 }, { "epoch": 1.0955414012738853, "grad_norm": 0.43402159214019775, "learning_rate": 1.0077816442425216e-05, "loss": 0.0287, "step": 71380 }, { "epoch": 1.0956948814365743, "grad_norm": 0.33365142345428467, "learning_rate": 1.0075137786595175e-05, "loss": 0.0251, "step": 71390 }, { "epoch": 1.0958483615992634, "grad_norm": 0.4064629375934601, "learning_rate": 1.0072459125373531e-05, "loss": 0.0317, "step": 71400 }, { "epoch": 1.0960018417619524, "grad_norm": 0.32121118903160095, "learning_rate": 1.0069780458952498e-05, "loss": 0.0303, "step": 71410 }, { "epoch": 1.0961553219246412, "grad_norm": 0.4174277186393738, "learning_rate": 1.0067101787524287e-05, "loss": 0.028, "step": 71420 }, { "epoch": 1.0963088020873302, "grad_norm": 0.348422646522522, "learning_rate": 1.0064423111281108e-05, "loss": 0.0235, "step": 71430 }, { "epoch": 1.0964622822500192, "grad_norm": 0.3524676263332367, "learning_rate": 1.0061744430415175e-05, "loss": 0.0266, "step": 71440 }, { "epoch": 1.0966157624127082, "grad_norm": 0.41166314482688904, "learning_rate": 1.00590657451187e-05, "loss": 0.0238, "step": 71450 }, { "epoch": 1.0967692425753972, "grad_norm": 0.5346678495407104, "learning_rate": 1.005638705558389e-05, "loss": 0.0302, "step": 71460 }, { "epoch": 1.096922722738086, "grad_norm": 0.5818247199058533, "learning_rate": 1.0053708362002963e-05, "loss": 0.0289, "step": 71470 }, { "epoch": 1.097076202900775, "grad_norm": 0.6527828574180603, "learning_rate": 1.0051029664568129e-05, "loss": 0.0494, "step": 71480 }, { "epoch": 1.097229683063464, "grad_norm": 0.4351154863834381, "learning_rate": 1.00483509634716e-05, "loss": 0.0273, "step": 71490 }, { "epoch": 1.097383163226153, "grad_norm": 0.4402543306350708, "learning_rate": 1.0045672258905594e-05, "loss": 0.0285, "step": 71500 }, { "epoch": 1.097536643388842, "grad_norm": 0.5376880764961243, "learning_rate": 1.0042993551062322e-05, "loss": 0.0357, "step": 71510 }, { "epoch": 1.097690123551531, "grad_norm": 0.3018859326839447, "learning_rate": 1.0040314840133997e-05, "loss": 0.0271, "step": 71520 }, { "epoch": 1.0978436037142199, "grad_norm": 0.3374105393886566, "learning_rate": 1.0037636126312832e-05, "loss": 0.0283, "step": 71530 }, { "epoch": 1.0979970838769089, "grad_norm": 0.39212945103645325, "learning_rate": 1.0034957409791044e-05, "loss": 0.0369, "step": 71540 }, { "epoch": 1.0981505640395979, "grad_norm": 0.33100903034210205, "learning_rate": 1.0032278690760846e-05, "loss": 0.0279, "step": 71550 }, { "epoch": 1.0983040442022869, "grad_norm": 0.3930639922618866, "learning_rate": 1.0029599969414455e-05, "loss": 0.0328, "step": 71560 }, { "epoch": 1.098457524364976, "grad_norm": 0.3344021737575531, "learning_rate": 1.002692124594408e-05, "loss": 0.0279, "step": 71570 }, { "epoch": 1.098611004527665, "grad_norm": 0.36309561133384705, "learning_rate": 1.002424252054194e-05, "loss": 0.036, "step": 71580 }, { "epoch": 1.0987644846903537, "grad_norm": 0.353078693151474, "learning_rate": 1.002156379340025e-05, "loss": 0.0247, "step": 71590 }, { "epoch": 1.0989179648530427, "grad_norm": 0.8461511731147766, "learning_rate": 1.0018885064711227e-05, "loss": 0.0314, "step": 71600 }, { "epoch": 1.0990714450157317, "grad_norm": 0.2579898238182068, "learning_rate": 1.0016206334667085e-05, "loss": 0.0319, "step": 71610 }, { "epoch": 1.0992249251784207, "grad_norm": 0.2896692156791687, "learning_rate": 1.0013527603460033e-05, "loss": 0.024, "step": 71620 }, { "epoch": 1.0993784053411098, "grad_norm": 0.4063953161239624, "learning_rate": 1.0010848871282296e-05, "loss": 0.0288, "step": 71630 }, { "epoch": 1.0995318855037985, "grad_norm": 0.6315638422966003, "learning_rate": 1.0008170138326086e-05, "loss": 0.0405, "step": 71640 }, { "epoch": 1.0996853656664876, "grad_norm": 0.4148447513580322, "learning_rate": 1.0005491404783618e-05, "loss": 0.0304, "step": 71650 }, { "epoch": 1.0998388458291766, "grad_norm": 0.3733144998550415, "learning_rate": 1.0002812670847106e-05, "loss": 0.0264, "step": 71660 }, { "epoch": 1.0999923259918656, "grad_norm": 0.27258333563804626, "learning_rate": 1.0000133936708767e-05, "loss": 0.0303, "step": 71670 }, { "epoch": 1.1001458061545546, "grad_norm": 0.3695237338542938, "learning_rate": 9.99745520256082e-06, "loss": 0.0283, "step": 71680 }, { "epoch": 1.1002992863172434, "grad_norm": 0.4315249025821686, "learning_rate": 9.994776468595478e-06, "loss": 0.0249, "step": 71690 }, { "epoch": 1.1004527664799324, "grad_norm": 0.37518078088760376, "learning_rate": 9.992097735004953e-06, "loss": 0.0302, "step": 71700 }, { "epoch": 1.1006062466426214, "grad_norm": 0.2967282235622406, "learning_rate": 9.989419001981466e-06, "loss": 0.0331, "step": 71710 }, { "epoch": 1.1007597268053104, "grad_norm": 0.2912886440753937, "learning_rate": 9.98674026971723e-06, "loss": 0.0326, "step": 71720 }, { "epoch": 1.1009132069679994, "grad_norm": 0.40390250086784363, "learning_rate": 9.984061538404467e-06, "loss": 0.0336, "step": 71730 }, { "epoch": 1.1010666871306884, "grad_norm": 0.4036281406879425, "learning_rate": 9.981382808235382e-06, "loss": 0.0272, "step": 71740 }, { "epoch": 1.1012201672933772, "grad_norm": 0.44275692105293274, "learning_rate": 9.978704079402194e-06, "loss": 0.0423, "step": 71750 }, { "epoch": 1.1013736474560663, "grad_norm": 0.23875077068805695, "learning_rate": 9.976025352097121e-06, "loss": 0.0296, "step": 71760 }, { "epoch": 1.1015271276187553, "grad_norm": 0.486775279045105, "learning_rate": 9.973346626512377e-06, "loss": 0.0361, "step": 71770 }, { "epoch": 1.1016806077814443, "grad_norm": 0.5239401459693909, "learning_rate": 9.97066790284018e-06, "loss": 0.0293, "step": 71780 }, { "epoch": 1.1018340879441333, "grad_norm": 0.5488316416740417, "learning_rate": 9.96798918127274e-06, "loss": 0.0288, "step": 71790 }, { "epoch": 1.1019875681068223, "grad_norm": 0.3634966015815735, "learning_rate": 9.965310462002271e-06, "loss": 0.0306, "step": 71800 }, { "epoch": 1.102141048269511, "grad_norm": 0.37302088737487793, "learning_rate": 9.962631745220993e-06, "loss": 0.0281, "step": 71810 }, { "epoch": 1.1022945284322, "grad_norm": 0.3356874883174896, "learning_rate": 9.959953031121115e-06, "loss": 0.0273, "step": 71820 }, { "epoch": 1.1024480085948891, "grad_norm": 0.4395877718925476, "learning_rate": 9.957274319894858e-06, "loss": 0.0415, "step": 71830 }, { "epoch": 1.1026014887575781, "grad_norm": 0.42484328150749207, "learning_rate": 9.95459561173443e-06, "loss": 0.0265, "step": 71840 }, { "epoch": 1.1027549689202671, "grad_norm": 0.24177588522434235, "learning_rate": 9.951916906832046e-06, "loss": 0.0335, "step": 71850 }, { "epoch": 1.102908449082956, "grad_norm": 0.30316248536109924, "learning_rate": 9.949238205379921e-06, "loss": 0.0225, "step": 71860 }, { "epoch": 1.103061929245645, "grad_norm": 0.41697973012924194, "learning_rate": 9.946559507570272e-06, "loss": 0.0264, "step": 71870 }, { "epoch": 1.103215409408334, "grad_norm": 0.24309051036834717, "learning_rate": 9.943880813595302e-06, "loss": 0.031, "step": 71880 }, { "epoch": 1.103368889571023, "grad_norm": 0.3910534977912903, "learning_rate": 9.941202123647233e-06, "loss": 0.03, "step": 71890 }, { "epoch": 1.103522369733712, "grad_norm": 0.47611114382743835, "learning_rate": 9.938523437918271e-06, "loss": 0.025, "step": 71900 }, { "epoch": 1.1036758498964008, "grad_norm": 0.4628455340862274, "learning_rate": 9.935844756600634e-06, "loss": 0.0266, "step": 71910 }, { "epoch": 1.1038293300590898, "grad_norm": 0.31140196323394775, "learning_rate": 9.933166079886536e-06, "loss": 0.0278, "step": 71920 }, { "epoch": 1.1039828102217788, "grad_norm": 0.37537503242492676, "learning_rate": 9.930487407968176e-06, "loss": 0.0406, "step": 71930 }, { "epoch": 1.1041362903844678, "grad_norm": 0.3182486295700073, "learning_rate": 9.927808741037774e-06, "loss": 0.0315, "step": 71940 }, { "epoch": 1.1042897705471568, "grad_norm": 0.4043887257575989, "learning_rate": 9.925130079287542e-06, "loss": 0.0357, "step": 71950 }, { "epoch": 1.1044432507098458, "grad_norm": 0.2879675626754761, "learning_rate": 9.922451422909688e-06, "loss": 0.0307, "step": 71960 }, { "epoch": 1.1045967308725346, "grad_norm": 0.5293338298797607, "learning_rate": 9.919772772096426e-06, "loss": 0.0258, "step": 71970 }, { "epoch": 1.1047502110352236, "grad_norm": 0.34759730100631714, "learning_rate": 9.91709412703996e-06, "loss": 0.0221, "step": 71980 }, { "epoch": 1.1049036911979127, "grad_norm": 0.346708208322525, "learning_rate": 9.9144154879325e-06, "loss": 0.0294, "step": 71990 }, { "epoch": 1.1050571713606017, "grad_norm": 0.4447742700576782, "learning_rate": 9.911736854966258e-06, "loss": 0.0252, "step": 72000 }, { "epoch": 1.1052106515232907, "grad_norm": 0.36728763580322266, "learning_rate": 9.90905822833344e-06, "loss": 0.026, "step": 72010 }, { "epoch": 1.1053641316859797, "grad_norm": 0.3304627537727356, "learning_rate": 9.90637960822626e-06, "loss": 0.0286, "step": 72020 }, { "epoch": 1.1055176118486685, "grad_norm": 0.3661380410194397, "learning_rate": 9.903700994836917e-06, "loss": 0.0317, "step": 72030 }, { "epoch": 1.1056710920113575, "grad_norm": 0.34298717975616455, "learning_rate": 9.901022388357619e-06, "loss": 0.0322, "step": 72040 }, { "epoch": 1.1058245721740465, "grad_norm": 0.5034670233726501, "learning_rate": 9.898343788980578e-06, "loss": 0.0307, "step": 72050 }, { "epoch": 1.1059780523367355, "grad_norm": 0.2136794924736023, "learning_rate": 9.895665196897994e-06, "loss": 0.0286, "step": 72060 }, { "epoch": 1.1061315324994245, "grad_norm": 0.3895726799964905, "learning_rate": 9.892986612302081e-06, "loss": 0.0376, "step": 72070 }, { "epoch": 1.1062850126621133, "grad_norm": 0.3156774342060089, "learning_rate": 9.890308035385036e-06, "loss": 0.03, "step": 72080 }, { "epoch": 1.1064384928248023, "grad_norm": 0.2806016504764557, "learning_rate": 9.887629466339064e-06, "loss": 0.0246, "step": 72090 }, { "epoch": 1.1065919729874913, "grad_norm": 0.19518303871154785, "learning_rate": 9.884950905356372e-06, "loss": 0.0271, "step": 72100 }, { "epoch": 1.1067454531501804, "grad_norm": 0.4149473309516907, "learning_rate": 9.88227235262916e-06, "loss": 0.039, "step": 72110 }, { "epoch": 1.1068989333128694, "grad_norm": 0.31306391954421997, "learning_rate": 9.879593808349639e-06, "loss": 0.0317, "step": 72120 }, { "epoch": 1.1070524134755584, "grad_norm": 0.40964633226394653, "learning_rate": 9.876915272710002e-06, "loss": 0.0413, "step": 72130 }, { "epoch": 1.1072058936382472, "grad_norm": 0.4010774791240692, "learning_rate": 9.874236745902451e-06, "loss": 0.0366, "step": 72140 }, { "epoch": 1.1073593738009362, "grad_norm": 0.43032440543174744, "learning_rate": 9.87155822811919e-06, "loss": 0.0268, "step": 72150 }, { "epoch": 1.1075128539636252, "grad_norm": 0.4540480077266693, "learning_rate": 9.868879719552417e-06, "loss": 0.0282, "step": 72160 }, { "epoch": 1.1076663341263142, "grad_norm": 0.32423123717308044, "learning_rate": 9.866201220394337e-06, "loss": 0.0249, "step": 72170 }, { "epoch": 1.1078198142890032, "grad_norm": 0.3125394582748413, "learning_rate": 9.863522730837144e-06, "loss": 0.0332, "step": 72180 }, { "epoch": 1.1079732944516922, "grad_norm": 0.34420686960220337, "learning_rate": 9.860844251073031e-06, "loss": 0.0263, "step": 72190 }, { "epoch": 1.108126774614381, "grad_norm": 0.4206451177597046, "learning_rate": 9.858165781294204e-06, "loss": 0.0303, "step": 72200 }, { "epoch": 1.10828025477707, "grad_norm": 0.4067613184452057, "learning_rate": 9.85548732169286e-06, "loss": 0.0295, "step": 72210 }, { "epoch": 1.108433734939759, "grad_norm": 0.2691929042339325, "learning_rate": 9.852808872461192e-06, "loss": 0.0259, "step": 72220 }, { "epoch": 1.108587215102448, "grad_norm": 0.37756240367889404, "learning_rate": 9.850130433791391e-06, "loss": 0.0236, "step": 72230 }, { "epoch": 1.108740695265137, "grad_norm": 0.42948585748672485, "learning_rate": 9.847452005875658e-06, "loss": 0.0309, "step": 72240 }, { "epoch": 1.1088941754278259, "grad_norm": 0.3785998225212097, "learning_rate": 9.844773588906184e-06, "loss": 0.0273, "step": 72250 }, { "epoch": 1.1090476555905149, "grad_norm": 0.40952667593955994, "learning_rate": 9.842095183075162e-06, "loss": 0.0282, "step": 72260 }, { "epoch": 1.109201135753204, "grad_norm": 0.2402651160955429, "learning_rate": 9.839416788574789e-06, "loss": 0.0303, "step": 72270 }, { "epoch": 1.109354615915893, "grad_norm": 0.4001975655555725, "learning_rate": 9.836738405597245e-06, "loss": 0.035, "step": 72280 }, { "epoch": 1.109508096078582, "grad_norm": 0.24978652596473694, "learning_rate": 9.83406003433473e-06, "loss": 0.0293, "step": 72290 }, { "epoch": 1.1096615762412707, "grad_norm": 0.453930526971817, "learning_rate": 9.83138167497943e-06, "loss": 0.0299, "step": 72300 }, { "epoch": 1.1098150564039597, "grad_norm": 0.40402689576148987, "learning_rate": 9.828703327723537e-06, "loss": 0.0387, "step": 72310 }, { "epoch": 1.1099685365666487, "grad_norm": 0.3721274733543396, "learning_rate": 9.826024992759238e-06, "loss": 0.0275, "step": 72320 }, { "epoch": 1.1101220167293377, "grad_norm": 0.39210277795791626, "learning_rate": 9.823346670278713e-06, "loss": 0.0334, "step": 72330 }, { "epoch": 1.1102754968920268, "grad_norm": 0.44013914465904236, "learning_rate": 9.820668360474159e-06, "loss": 0.0306, "step": 72340 }, { "epoch": 1.1104289770547158, "grad_norm": 0.41705989837646484, "learning_rate": 9.817990063537751e-06, "loss": 0.0296, "step": 72350 }, { "epoch": 1.1105824572174046, "grad_norm": 0.3724568784236908, "learning_rate": 9.815311779661684e-06, "loss": 0.0366, "step": 72360 }, { "epoch": 1.1107359373800936, "grad_norm": 0.30719977617263794, "learning_rate": 9.812633509038136e-06, "loss": 0.029, "step": 72370 }, { "epoch": 1.1108894175427826, "grad_norm": 0.3513979911804199, "learning_rate": 9.809955251859285e-06, "loss": 0.0268, "step": 72380 }, { "epoch": 1.1110428977054716, "grad_norm": 0.36915451288223267, "learning_rate": 9.80727700831732e-06, "loss": 0.033, "step": 72390 }, { "epoch": 1.1111963778681606, "grad_norm": 0.3921810984611511, "learning_rate": 9.804598778604415e-06, "loss": 0.0275, "step": 72400 }, { "epoch": 1.1113498580308496, "grad_norm": 0.4891621172428131, "learning_rate": 9.801920562912755e-06, "loss": 0.027, "step": 72410 }, { "epoch": 1.1115033381935384, "grad_norm": 0.45323580503463745, "learning_rate": 9.79924236143452e-06, "loss": 0.0237, "step": 72420 }, { "epoch": 1.1116568183562274, "grad_norm": 0.43952134251594543, "learning_rate": 9.796564174361876e-06, "loss": 0.0382, "step": 72430 }, { "epoch": 1.1118102985189164, "grad_norm": 0.41698288917541504, "learning_rate": 9.793886001887011e-06, "loss": 0.0326, "step": 72440 }, { "epoch": 1.1119637786816055, "grad_norm": 0.43319010734558105, "learning_rate": 9.791207844202097e-06, "loss": 0.0313, "step": 72450 }, { "epoch": 1.1121172588442945, "grad_norm": 0.41743698716163635, "learning_rate": 9.788529701499304e-06, "loss": 0.028, "step": 72460 }, { "epoch": 1.1122707390069833, "grad_norm": 0.3274487257003784, "learning_rate": 9.785851573970818e-06, "loss": 0.0318, "step": 72470 }, { "epoch": 1.1124242191696723, "grad_norm": 0.19675539433956146, "learning_rate": 9.783173461808794e-06, "loss": 0.0287, "step": 72480 }, { "epoch": 1.1125776993323613, "grad_norm": 0.4072601795196533, "learning_rate": 9.780495365205413e-06, "loss": 0.0326, "step": 72490 }, { "epoch": 1.1127311794950503, "grad_norm": 0.4702044427394867, "learning_rate": 9.777817284352845e-06, "loss": 0.0358, "step": 72500 }, { "epoch": 1.1128846596577393, "grad_norm": 0.4429963231086731, "learning_rate": 9.775139219443256e-06, "loss": 0.028, "step": 72510 }, { "epoch": 1.113038139820428, "grad_norm": 0.16090014576911926, "learning_rate": 9.772461170668816e-06, "loss": 0.0368, "step": 72520 }, { "epoch": 1.113191619983117, "grad_norm": 0.47723257541656494, "learning_rate": 9.769783138221692e-06, "loss": 0.0389, "step": 72530 }, { "epoch": 1.1133451001458061, "grad_norm": 0.4429357051849365, "learning_rate": 9.767105122294043e-06, "loss": 0.0382, "step": 72540 }, { "epoch": 1.1134985803084951, "grad_norm": 0.4150412976741791, "learning_rate": 9.76442712307804e-06, "loss": 0.0306, "step": 72550 }, { "epoch": 1.1136520604711841, "grad_norm": 0.28000515699386597, "learning_rate": 9.761749140765843e-06, "loss": 0.0277, "step": 72560 }, { "epoch": 1.1138055406338732, "grad_norm": 0.41022875905036926, "learning_rate": 9.759071175549618e-06, "loss": 0.034, "step": 72570 }, { "epoch": 1.113959020796562, "grad_norm": 0.33262157440185547, "learning_rate": 9.75639322762152e-06, "loss": 0.0214, "step": 72580 }, { "epoch": 1.114112500959251, "grad_norm": 0.33725953102111816, "learning_rate": 9.753715297173709e-06, "loss": 0.0313, "step": 72590 }, { "epoch": 1.11426598112194, "grad_norm": 0.2800136208534241, "learning_rate": 9.751037384398346e-06, "loss": 0.0323, "step": 72600 }, { "epoch": 1.114419461284629, "grad_norm": 0.48110824823379517, "learning_rate": 9.748359489487583e-06, "loss": 0.0258, "step": 72610 }, { "epoch": 1.114572941447318, "grad_norm": 0.26883772015571594, "learning_rate": 9.745681612633584e-06, "loss": 0.0273, "step": 72620 }, { "epoch": 1.114726421610007, "grad_norm": 0.3884360194206238, "learning_rate": 9.743003754028495e-06, "loss": 0.0275, "step": 72630 }, { "epoch": 1.1148799017726958, "grad_norm": 0.4280259609222412, "learning_rate": 9.74032591386447e-06, "loss": 0.0229, "step": 72640 }, { "epoch": 1.1150333819353848, "grad_norm": 0.3831208348274231, "learning_rate": 9.737648092333663e-06, "loss": 0.03, "step": 72650 }, { "epoch": 1.1151868620980738, "grad_norm": 0.5701686143875122, "learning_rate": 9.734970289628222e-06, "loss": 0.0275, "step": 72660 }, { "epoch": 1.1153403422607628, "grad_norm": 0.41257765889167786, "learning_rate": 9.732292505940302e-06, "loss": 0.0248, "step": 72670 }, { "epoch": 1.1154938224234519, "grad_norm": 0.5062206983566284, "learning_rate": 9.729614741462045e-06, "loss": 0.036, "step": 72680 }, { "epoch": 1.1156473025861406, "grad_norm": 0.21189957857131958, "learning_rate": 9.726936996385595e-06, "loss": 0.028, "step": 72690 }, { "epoch": 1.1158007827488297, "grad_norm": 0.38143521547317505, "learning_rate": 9.7242592709031e-06, "loss": 0.0278, "step": 72700 }, { "epoch": 1.1159542629115187, "grad_norm": 0.3108629882335663, "learning_rate": 9.721581565206702e-06, "loss": 0.0316, "step": 72710 }, { "epoch": 1.1161077430742077, "grad_norm": 0.29536163806915283, "learning_rate": 9.718903879488547e-06, "loss": 0.0315, "step": 72720 }, { "epoch": 1.1162612232368967, "grad_norm": 0.22969788312911987, "learning_rate": 9.716226213940771e-06, "loss": 0.0344, "step": 72730 }, { "epoch": 1.1164147033995857, "grad_norm": 0.4629386067390442, "learning_rate": 9.713548568755513e-06, "loss": 0.0321, "step": 72740 }, { "epoch": 1.1165681835622745, "grad_norm": 0.502463698387146, "learning_rate": 9.71087094412491e-06, "loss": 0.0327, "step": 72750 }, { "epoch": 1.1167216637249635, "grad_norm": 0.36934420466423035, "learning_rate": 9.708193340241102e-06, "loss": 0.0387, "step": 72760 }, { "epoch": 1.1168751438876525, "grad_norm": 0.3380437195301056, "learning_rate": 9.705515757296227e-06, "loss": 0.0263, "step": 72770 }, { "epoch": 1.1170286240503415, "grad_norm": 0.23001307249069214, "learning_rate": 9.702838195482406e-06, "loss": 0.0296, "step": 72780 }, { "epoch": 1.1171821042130305, "grad_norm": 0.39412447810173035, "learning_rate": 9.700160654991779e-06, "loss": 0.0248, "step": 72790 }, { "epoch": 1.1173355843757193, "grad_norm": 0.4486425817012787, "learning_rate": 9.697483136016473e-06, "loss": 0.0344, "step": 72800 }, { "epoch": 1.1174890645384083, "grad_norm": 0.42909717559814453, "learning_rate": 9.694805638748619e-06, "loss": 0.0288, "step": 72810 }, { "epoch": 1.1176425447010974, "grad_norm": 0.3787404000759125, "learning_rate": 9.692128163380346e-06, "loss": 0.0362, "step": 72820 }, { "epoch": 1.1177960248637864, "grad_norm": 0.36184796690940857, "learning_rate": 9.689450710103769e-06, "loss": 0.0338, "step": 72830 }, { "epoch": 1.1179495050264754, "grad_norm": 0.39621880650520325, "learning_rate": 9.686773279111023e-06, "loss": 0.0306, "step": 72840 }, { "epoch": 1.1181029851891644, "grad_norm": 0.33745700120925903, "learning_rate": 9.684095870594225e-06, "loss": 0.0295, "step": 72850 }, { "epoch": 1.1182564653518532, "grad_norm": 0.4117249846458435, "learning_rate": 9.681418484745496e-06, "loss": 0.0355, "step": 72860 }, { "epoch": 1.1184099455145422, "grad_norm": 0.3412497341632843, "learning_rate": 9.678741121756961e-06, "loss": 0.0257, "step": 72870 }, { "epoch": 1.1185634256772312, "grad_norm": 0.1973268687725067, "learning_rate": 9.676063781820725e-06, "loss": 0.0274, "step": 72880 }, { "epoch": 1.1187169058399202, "grad_norm": 0.2951533794403076, "learning_rate": 9.673386465128913e-06, "loss": 0.0333, "step": 72890 }, { "epoch": 1.1188703860026092, "grad_norm": 0.3202642500400543, "learning_rate": 9.670709171873636e-06, "loss": 0.0335, "step": 72900 }, { "epoch": 1.119023866165298, "grad_norm": 0.41757437586784363, "learning_rate": 9.668031902247008e-06, "loss": 0.0326, "step": 72910 }, { "epoch": 1.119177346327987, "grad_norm": 0.4007437825202942, "learning_rate": 9.665354656441139e-06, "loss": 0.0305, "step": 72920 }, { "epoch": 1.119330826490676, "grad_norm": 0.3677760362625122, "learning_rate": 9.662677434648135e-06, "loss": 0.034, "step": 72930 }, { "epoch": 1.119484306653365, "grad_norm": 0.5153604745864868, "learning_rate": 9.660000237060106e-06, "loss": 0.0366, "step": 72940 }, { "epoch": 1.119637786816054, "grad_norm": 0.58102947473526, "learning_rate": 9.657323063869157e-06, "loss": 0.0267, "step": 72950 }, { "epoch": 1.119791266978743, "grad_norm": 0.3122849464416504, "learning_rate": 9.654645915267391e-06, "loss": 0.0319, "step": 72960 }, { "epoch": 1.1199447471414319, "grad_norm": 0.37745675444602966, "learning_rate": 9.651968791446914e-06, "loss": 0.0248, "step": 72970 }, { "epoch": 1.120098227304121, "grad_norm": 0.34848833084106445, "learning_rate": 9.649291692599818e-06, "loss": 0.0242, "step": 72980 }, { "epoch": 1.12025170746681, "grad_norm": 0.345695823431015, "learning_rate": 9.646614618918208e-06, "loss": 0.0368, "step": 72990 }, { "epoch": 1.120405187629499, "grad_norm": 0.2521228492259979, "learning_rate": 9.643937570594177e-06, "loss": 0.0272, "step": 73000 }, { "epoch": 1.120558667792188, "grad_norm": 0.2972865104675293, "learning_rate": 9.641260547819822e-06, "loss": 0.0237, "step": 73010 }, { "epoch": 1.120712147954877, "grad_norm": 0.3314930200576782, "learning_rate": 9.638583550787241e-06, "loss": 0.0277, "step": 73020 }, { "epoch": 1.1208656281175657, "grad_norm": 0.29841679334640503, "learning_rate": 9.635906579688512e-06, "loss": 0.0231, "step": 73030 }, { "epoch": 1.1210191082802548, "grad_norm": 0.3586423397064209, "learning_rate": 9.633229634715734e-06, "loss": 0.033, "step": 73040 }, { "epoch": 1.1211725884429438, "grad_norm": 0.2787691354751587, "learning_rate": 9.63055271606099e-06, "loss": 0.0211, "step": 73050 }, { "epoch": 1.1213260686056328, "grad_norm": 0.338329017162323, "learning_rate": 9.627875823916368e-06, "loss": 0.0237, "step": 73060 }, { "epoch": 1.1214795487683218, "grad_norm": 0.5821204781532288, "learning_rate": 9.625198958473953e-06, "loss": 0.0371, "step": 73070 }, { "epoch": 1.1216330289310106, "grad_norm": 0.2698540687561035, "learning_rate": 9.622522119925823e-06, "loss": 0.0259, "step": 73080 }, { "epoch": 1.1217865090936996, "grad_norm": 0.3186262547969818, "learning_rate": 9.619845308464058e-06, "loss": 0.0253, "step": 73090 }, { "epoch": 1.1219399892563886, "grad_norm": 0.4721614718437195, "learning_rate": 9.617168524280737e-06, "loss": 0.0379, "step": 73100 }, { "epoch": 1.1220934694190776, "grad_norm": 0.4631726145744324, "learning_rate": 9.614491767567934e-06, "loss": 0.0377, "step": 73110 }, { "epoch": 1.1222469495817666, "grad_norm": 0.38447535037994385, "learning_rate": 9.611815038517728e-06, "loss": 0.0349, "step": 73120 }, { "epoch": 1.1224004297444554, "grad_norm": 0.4452369511127472, "learning_rate": 9.609138337322186e-06, "loss": 0.0366, "step": 73130 }, { "epoch": 1.1225539099071444, "grad_norm": 0.36879652738571167, "learning_rate": 9.606461664173376e-06, "loss": 0.0262, "step": 73140 }, { "epoch": 1.1227073900698334, "grad_norm": 0.42047372460365295, "learning_rate": 9.60378501926337e-06, "loss": 0.0245, "step": 73150 }, { "epoch": 1.1228608702325225, "grad_norm": 0.47538793087005615, "learning_rate": 9.601108402784231e-06, "loss": 0.0293, "step": 73160 }, { "epoch": 1.1230143503952115, "grad_norm": 0.391291081905365, "learning_rate": 9.59843181492803e-06, "loss": 0.0301, "step": 73170 }, { "epoch": 1.1231678305579005, "grad_norm": 0.4685848355293274, "learning_rate": 9.59575525588682e-06, "loss": 0.0297, "step": 73180 }, { "epoch": 1.1233213107205893, "grad_norm": 0.4478357434272766, "learning_rate": 9.593078725852661e-06, "loss": 0.0365, "step": 73190 }, { "epoch": 1.1234747908832783, "grad_norm": 0.4100147485733032, "learning_rate": 9.590402225017615e-06, "loss": 0.0268, "step": 73200 }, { "epoch": 1.1236282710459673, "grad_norm": 0.6570672392845154, "learning_rate": 9.587725753573734e-06, "loss": 0.0288, "step": 73210 }, { "epoch": 1.1237817512086563, "grad_norm": 0.2838291525840759, "learning_rate": 9.585049311713076e-06, "loss": 0.0341, "step": 73220 }, { "epoch": 1.1239352313713453, "grad_norm": 0.3356608748435974, "learning_rate": 9.582372899627688e-06, "loss": 0.0347, "step": 73230 }, { "epoch": 1.1240887115340343, "grad_norm": 0.3588975667953491, "learning_rate": 9.579696517509617e-06, "loss": 0.0306, "step": 73240 }, { "epoch": 1.1242421916967231, "grad_norm": 0.3723451793193817, "learning_rate": 9.577020165550917e-06, "loss": 0.027, "step": 73250 }, { "epoch": 1.1243956718594121, "grad_norm": 0.4199298024177551, "learning_rate": 9.574343843943625e-06, "loss": 0.0317, "step": 73260 }, { "epoch": 1.1245491520221012, "grad_norm": 0.6217533946037292, "learning_rate": 9.571667552879792e-06, "loss": 0.0448, "step": 73270 }, { "epoch": 1.1247026321847902, "grad_norm": 0.38095805048942566, "learning_rate": 9.568991292551451e-06, "loss": 0.0346, "step": 73280 }, { "epoch": 1.1248561123474792, "grad_norm": 0.2768439054489136, "learning_rate": 9.566315063150643e-06, "loss": 0.033, "step": 73290 }, { "epoch": 1.125009592510168, "grad_norm": 0.3183964192867279, "learning_rate": 9.563638864869403e-06, "loss": 0.0358, "step": 73300 }, { "epoch": 1.125163072672857, "grad_norm": 0.2618885636329651, "learning_rate": 9.560962697899766e-06, "loss": 0.0317, "step": 73310 }, { "epoch": 1.125316552835546, "grad_norm": 0.35265833139419556, "learning_rate": 9.558286562433768e-06, "loss": 0.0304, "step": 73320 }, { "epoch": 1.125470032998235, "grad_norm": 0.2692253589630127, "learning_rate": 9.555610458663428e-06, "loss": 0.0314, "step": 73330 }, { "epoch": 1.125623513160924, "grad_norm": 0.3917578160762787, "learning_rate": 9.55293438678078e-06, "loss": 0.0299, "step": 73340 }, { "epoch": 1.1257769933236128, "grad_norm": 0.301291823387146, "learning_rate": 9.550258346977844e-06, "loss": 0.0293, "step": 73350 }, { "epoch": 1.1259304734863018, "grad_norm": 0.3813604414463043, "learning_rate": 9.547582339446648e-06, "loss": 0.0255, "step": 73360 }, { "epoch": 1.1260839536489908, "grad_norm": 0.37906089425086975, "learning_rate": 9.544906364379212e-06, "loss": 0.0306, "step": 73370 }, { "epoch": 1.1262374338116798, "grad_norm": 0.31166085600852966, "learning_rate": 9.542230421967547e-06, "loss": 0.0282, "step": 73380 }, { "epoch": 1.1263909139743689, "grad_norm": 0.5442651510238647, "learning_rate": 9.539554512403674e-06, "loss": 0.0243, "step": 73390 }, { "epoch": 1.1265443941370579, "grad_norm": 0.27567675709724426, "learning_rate": 9.536878635879602e-06, "loss": 0.0251, "step": 73400 }, { "epoch": 1.1266978742997469, "grad_norm": 0.32812073826789856, "learning_rate": 9.534202792587348e-06, "loss": 0.0303, "step": 73410 }, { "epoch": 1.1268513544624357, "grad_norm": 0.5050378441810608, "learning_rate": 9.531526982718917e-06, "loss": 0.027, "step": 73420 }, { "epoch": 1.1270048346251247, "grad_norm": 0.4603118896484375, "learning_rate": 9.528851206466313e-06, "loss": 0.0285, "step": 73430 }, { "epoch": 1.1271583147878137, "grad_norm": 0.2855187654495239, "learning_rate": 9.526175464021539e-06, "loss": 0.0338, "step": 73440 }, { "epoch": 1.1273117949505027, "grad_norm": 0.3944202959537506, "learning_rate": 9.523499755576596e-06, "loss": 0.0335, "step": 73450 }, { "epoch": 1.1274652751131917, "grad_norm": 0.3125561773777008, "learning_rate": 9.520824081323487e-06, "loss": 0.0277, "step": 73460 }, { "epoch": 1.1276187552758805, "grad_norm": 0.37297576665878296, "learning_rate": 9.51814844145421e-06, "loss": 0.0392, "step": 73470 }, { "epoch": 1.1277722354385695, "grad_norm": 0.3163074851036072, "learning_rate": 9.515472836160746e-06, "loss": 0.0228, "step": 73480 }, { "epoch": 1.1279257156012585, "grad_norm": 0.4994089901447296, "learning_rate": 9.512797265635099e-06, "loss": 0.0327, "step": 73490 }, { "epoch": 1.1280791957639476, "grad_norm": 0.3816872239112854, "learning_rate": 9.51012173006925e-06, "loss": 0.0289, "step": 73500 }, { "epoch": 1.1282326759266366, "grad_norm": 0.29141440987586975, "learning_rate": 9.50744622965519e-06, "loss": 0.0342, "step": 73510 }, { "epoch": 1.1283861560893254, "grad_norm": 0.44735902547836304, "learning_rate": 9.504770764584904e-06, "loss": 0.0287, "step": 73520 }, { "epoch": 1.1285396362520144, "grad_norm": 0.2525363266468048, "learning_rate": 9.502095335050363e-06, "loss": 0.0248, "step": 73530 }, { "epoch": 1.1286931164147034, "grad_norm": 0.2768990099430084, "learning_rate": 9.499419941243558e-06, "loss": 0.0301, "step": 73540 }, { "epoch": 1.1288465965773924, "grad_norm": 0.43560925126075745, "learning_rate": 9.496744583356455e-06, "loss": 0.0304, "step": 73550 }, { "epoch": 1.1290000767400814, "grad_norm": 0.3919423520565033, "learning_rate": 9.494069261581033e-06, "loss": 0.0237, "step": 73560 }, { "epoch": 1.1291535569027702, "grad_norm": 0.25878724455833435, "learning_rate": 9.491393976109269e-06, "loss": 0.0383, "step": 73570 }, { "epoch": 1.1293070370654592, "grad_norm": 0.25432106852531433, "learning_rate": 9.488718727133116e-06, "loss": 0.0241, "step": 73580 }, { "epoch": 1.1294605172281482, "grad_norm": 0.43820813298225403, "learning_rate": 9.486043514844548e-06, "loss": 0.0321, "step": 73590 }, { "epoch": 1.1296139973908372, "grad_norm": 0.5283547639846802, "learning_rate": 9.48336833943553e-06, "loss": 0.0284, "step": 73600 }, { "epoch": 1.1297674775535262, "grad_norm": 0.5576061010360718, "learning_rate": 9.480693201098019e-06, "loss": 0.0327, "step": 73610 }, { "epoch": 1.1299209577162153, "grad_norm": 0.3063471019268036, "learning_rate": 9.478018100023975e-06, "loss": 0.0301, "step": 73620 }, { "epoch": 1.1300744378789043, "grad_norm": 0.2764250636100769, "learning_rate": 9.47534303640535e-06, "loss": 0.0255, "step": 73630 }, { "epoch": 1.130227918041593, "grad_norm": 0.3395726680755615, "learning_rate": 9.472668010434097e-06, "loss": 0.0289, "step": 73640 }, { "epoch": 1.130381398204282, "grad_norm": 0.3666744828224182, "learning_rate": 9.469993022302169e-06, "loss": 0.0318, "step": 73650 }, { "epoch": 1.130534878366971, "grad_norm": 0.5123857259750366, "learning_rate": 9.467318072201508e-06, "loss": 0.0327, "step": 73660 }, { "epoch": 1.13068835852966, "grad_norm": 0.4350740611553192, "learning_rate": 9.464643160324064e-06, "loss": 0.0314, "step": 73670 }, { "epoch": 1.130841838692349, "grad_norm": 0.37169474363327026, "learning_rate": 9.461968286861773e-06, "loss": 0.0275, "step": 73680 }, { "epoch": 1.130995318855038, "grad_norm": 0.31719326972961426, "learning_rate": 9.459293452006574e-06, "loss": 0.0352, "step": 73690 }, { "epoch": 1.131148799017727, "grad_norm": 0.5283253788948059, "learning_rate": 9.456618655950406e-06, "loss": 0.0376, "step": 73700 }, { "epoch": 1.131302279180416, "grad_norm": 0.5564208626747131, "learning_rate": 9.453943898885201e-06, "loss": 0.0406, "step": 73710 }, { "epoch": 1.131455759343105, "grad_norm": 0.39728525280952454, "learning_rate": 9.451269181002891e-06, "loss": 0.0213, "step": 73720 }, { "epoch": 1.131609239505794, "grad_norm": 0.5180972814559937, "learning_rate": 9.4485945024954e-06, "loss": 0.0282, "step": 73730 }, { "epoch": 1.1317627196684827, "grad_norm": 0.33836400508880615, "learning_rate": 9.445919863554651e-06, "loss": 0.0265, "step": 73740 }, { "epoch": 1.1319161998311718, "grad_norm": 0.3000565767288208, "learning_rate": 9.443245264372572e-06, "loss": 0.0189, "step": 73750 }, { "epoch": 1.1320696799938608, "grad_norm": 0.37004396319389343, "learning_rate": 9.440570705141077e-06, "loss": 0.0295, "step": 73760 }, { "epoch": 1.1322231601565498, "grad_norm": 0.2598308324813843, "learning_rate": 9.437896186052089e-06, "loss": 0.0289, "step": 73770 }, { "epoch": 1.1323766403192388, "grad_norm": 0.3459686040878296, "learning_rate": 9.435221707297514e-06, "loss": 0.0284, "step": 73780 }, { "epoch": 1.1325301204819278, "grad_norm": 0.2635383605957031, "learning_rate": 9.43254726906926e-06, "loss": 0.0272, "step": 73790 }, { "epoch": 1.1326836006446166, "grad_norm": 0.32609185576438904, "learning_rate": 9.429872871559245e-06, "loss": 0.0338, "step": 73800 }, { "epoch": 1.1328370808073056, "grad_norm": 0.3906146287918091, "learning_rate": 9.427198514959364e-06, "loss": 0.03, "step": 73810 }, { "epoch": 1.1329905609699946, "grad_norm": 0.38222867250442505, "learning_rate": 9.424524199461526e-06, "loss": 0.0271, "step": 73820 }, { "epoch": 1.1331440411326836, "grad_norm": 0.43892717361450195, "learning_rate": 9.421849925257625e-06, "loss": 0.0323, "step": 73830 }, { "epoch": 1.1332975212953726, "grad_norm": 0.5428193211555481, "learning_rate": 9.419175692539553e-06, "loss": 0.029, "step": 73840 }, { "epoch": 1.1334510014580617, "grad_norm": 0.27272024750709534, "learning_rate": 9.416501501499209e-06, "loss": 0.0306, "step": 73850 }, { "epoch": 1.1336044816207504, "grad_norm": 0.38335829973220825, "learning_rate": 9.413827352328482e-06, "loss": 0.0254, "step": 73860 }, { "epoch": 1.1337579617834395, "grad_norm": 0.3915354013442993, "learning_rate": 9.411153245219262e-06, "loss": 0.0324, "step": 73870 }, { "epoch": 1.1339114419461285, "grad_norm": 0.3001112937927246, "learning_rate": 9.408479180363422e-06, "loss": 0.0254, "step": 73880 }, { "epoch": 1.1340649221088175, "grad_norm": 0.2641538977622986, "learning_rate": 9.405805157952852e-06, "loss": 0.0229, "step": 73890 }, { "epoch": 1.1342184022715065, "grad_norm": 0.3456905484199524, "learning_rate": 9.403131178179424e-06, "loss": 0.0283, "step": 73900 }, { "epoch": 1.1343718824341953, "grad_norm": 0.38318485021591187, "learning_rate": 9.400457241235019e-06, "loss": 0.0268, "step": 73910 }, { "epoch": 1.1345253625968843, "grad_norm": 0.48084205389022827, "learning_rate": 9.397783347311505e-06, "loss": 0.036, "step": 73920 }, { "epoch": 1.1346788427595733, "grad_norm": 0.4194946885108948, "learning_rate": 9.395109496600745e-06, "loss": 0.0299, "step": 73930 }, { "epoch": 1.1348323229222623, "grad_norm": 0.38436296582221985, "learning_rate": 9.392435689294612e-06, "loss": 0.0261, "step": 73940 }, { "epoch": 1.1349858030849513, "grad_norm": 0.4007842242717743, "learning_rate": 9.389761925584964e-06, "loss": 0.0301, "step": 73950 }, { "epoch": 1.1351392832476401, "grad_norm": 0.20704030990600586, "learning_rate": 9.387088205663663e-06, "loss": 0.0339, "step": 73960 }, { "epoch": 1.1352927634103291, "grad_norm": 0.4174993634223938, "learning_rate": 9.384414529722565e-06, "loss": 0.0318, "step": 73970 }, { "epoch": 1.1354462435730182, "grad_norm": 0.403835654258728, "learning_rate": 9.381740897953517e-06, "loss": 0.03, "step": 73980 }, { "epoch": 1.1355997237357072, "grad_norm": 0.3945741653442383, "learning_rate": 9.379067310548375e-06, "loss": 0.0304, "step": 73990 }, { "epoch": 1.1357532038983962, "grad_norm": 0.26625609397888184, "learning_rate": 9.37639376769898e-06, "loss": 0.0253, "step": 74000 }, { "epoch": 1.1359066840610852, "grad_norm": 0.3862162232398987, "learning_rate": 9.373720269597181e-06, "loss": 0.0286, "step": 74010 }, { "epoch": 1.136060164223774, "grad_norm": 0.27109187841415405, "learning_rate": 9.371046816434819e-06, "loss": 0.0251, "step": 74020 }, { "epoch": 1.136213644386463, "grad_norm": 0.35311663150787354, "learning_rate": 9.36837340840372e-06, "loss": 0.0354, "step": 74030 }, { "epoch": 1.136367124549152, "grad_norm": 0.41007527709007263, "learning_rate": 9.365700045695727e-06, "loss": 0.0294, "step": 74040 }, { "epoch": 1.136520604711841, "grad_norm": 0.2998146414756775, "learning_rate": 9.363026728502666e-06, "loss": 0.0374, "step": 74050 }, { "epoch": 1.13667408487453, "grad_norm": 0.34885990619659424, "learning_rate": 9.360353457016368e-06, "loss": 0.0296, "step": 74060 }, { "epoch": 1.136827565037219, "grad_norm": 0.3056555986404419, "learning_rate": 9.357680231428658e-06, "loss": 0.0282, "step": 74070 }, { "epoch": 1.1369810451999078, "grad_norm": 0.5013808608055115, "learning_rate": 9.355007051931346e-06, "loss": 0.0279, "step": 74080 }, { "epoch": 1.1371345253625968, "grad_norm": 0.2985043227672577, "learning_rate": 9.35233391871626e-06, "loss": 0.0268, "step": 74090 }, { "epoch": 1.1372880055252859, "grad_norm": 0.3901989459991455, "learning_rate": 9.349660831975207e-06, "loss": 0.0287, "step": 74100 }, { "epoch": 1.1374414856879749, "grad_norm": 0.3188246786594391, "learning_rate": 9.3469877919e-06, "loss": 0.0367, "step": 74110 }, { "epoch": 1.1375949658506639, "grad_norm": 0.36361104249954224, "learning_rate": 9.34431479868245e-06, "loss": 0.025, "step": 74120 }, { "epoch": 1.1377484460133527, "grad_norm": 0.41112884879112244, "learning_rate": 9.341641852514356e-06, "loss": 0.0377, "step": 74130 }, { "epoch": 1.1379019261760417, "grad_norm": 0.46832475066185, "learning_rate": 9.338968953587518e-06, "loss": 0.0316, "step": 74140 }, { "epoch": 1.1380554063387307, "grad_norm": 0.33175382018089294, "learning_rate": 9.336296102093733e-06, "loss": 0.0302, "step": 74150 }, { "epoch": 1.1382088865014197, "grad_norm": 0.2255270928144455, "learning_rate": 9.333623298224798e-06, "loss": 0.0308, "step": 74160 }, { "epoch": 1.1383623666641087, "grad_norm": 0.3063189387321472, "learning_rate": 9.330950542172504e-06, "loss": 0.0267, "step": 74170 }, { "epoch": 1.1385158468267975, "grad_norm": 0.27017951011657715, "learning_rate": 9.328277834128631e-06, "loss": 0.03, "step": 74180 }, { "epoch": 1.1386693269894865, "grad_norm": 0.2821466326713562, "learning_rate": 9.325605174284967e-06, "loss": 0.0227, "step": 74190 }, { "epoch": 1.1388228071521755, "grad_norm": 0.3415762782096863, "learning_rate": 9.322932562833291e-06, "loss": 0.0383, "step": 74200 }, { "epoch": 1.1389762873148646, "grad_norm": 0.46356064081192017, "learning_rate": 9.320259999965377e-06, "loss": 0.0356, "step": 74210 }, { "epoch": 1.1391297674775536, "grad_norm": 0.22038839757442474, "learning_rate": 9.317587485873006e-06, "loss": 0.0327, "step": 74220 }, { "epoch": 1.1392832476402426, "grad_norm": 0.38902509212493896, "learning_rate": 9.314915020747939e-06, "loss": 0.0264, "step": 74230 }, { "epoch": 1.1394367278029316, "grad_norm": 0.37442365288734436, "learning_rate": 9.312242604781943e-06, "loss": 0.0296, "step": 74240 }, { "epoch": 1.1395902079656204, "grad_norm": 0.2679210305213928, "learning_rate": 9.309570238166782e-06, "loss": 0.03, "step": 74250 }, { "epoch": 1.1397436881283094, "grad_norm": 0.44203850626945496, "learning_rate": 9.306897921094214e-06, "loss": 0.0286, "step": 74260 }, { "epoch": 1.1398971682909984, "grad_norm": 0.3661623001098633, "learning_rate": 9.304225653755998e-06, "loss": 0.0272, "step": 74270 }, { "epoch": 1.1400506484536874, "grad_norm": 0.37632328271865845, "learning_rate": 9.301553436343882e-06, "loss": 0.0278, "step": 74280 }, { "epoch": 1.1402041286163764, "grad_norm": 0.2443421334028244, "learning_rate": 9.29888126904961e-06, "loss": 0.0294, "step": 74290 }, { "epoch": 1.1403576087790652, "grad_norm": 0.49072226881980896, "learning_rate": 9.296209152064935e-06, "loss": 0.0276, "step": 74300 }, { "epoch": 1.1405110889417542, "grad_norm": 0.6794799566268921, "learning_rate": 9.293537085581591e-06, "loss": 0.0316, "step": 74310 }, { "epoch": 1.1406645691044432, "grad_norm": 0.6712286472320557, "learning_rate": 9.290865069791322e-06, "loss": 0.0341, "step": 74320 }, { "epoch": 1.1408180492671323, "grad_norm": 0.4523443877696991, "learning_rate": 9.288193104885858e-06, "loss": 0.0385, "step": 74330 }, { "epoch": 1.1409715294298213, "grad_norm": 0.30192774534225464, "learning_rate": 9.285521191056925e-06, "loss": 0.0238, "step": 74340 }, { "epoch": 1.14112500959251, "grad_norm": 0.43434906005859375, "learning_rate": 9.282849328496254e-06, "loss": 0.0267, "step": 74350 }, { "epoch": 1.141278489755199, "grad_norm": 0.2903830111026764, "learning_rate": 9.280177517395566e-06, "loss": 0.0326, "step": 74360 }, { "epoch": 1.141431969917888, "grad_norm": 0.4794815182685852, "learning_rate": 9.277505757946585e-06, "loss": 0.0377, "step": 74370 }, { "epoch": 1.141585450080577, "grad_norm": 0.4358263611793518, "learning_rate": 9.274834050341016e-06, "loss": 0.0255, "step": 74380 }, { "epoch": 1.1417389302432661, "grad_norm": 0.2926446497440338, "learning_rate": 9.272162394770578e-06, "loss": 0.027, "step": 74390 }, { "epoch": 1.1418924104059551, "grad_norm": 0.3099323511123657, "learning_rate": 9.269490791426977e-06, "loss": 0.0291, "step": 74400 }, { "epoch": 1.142045890568644, "grad_norm": 0.4428026080131531, "learning_rate": 9.266819240501918e-06, "loss": 0.0281, "step": 74410 }, { "epoch": 1.142199370731333, "grad_norm": 0.44558462500572205, "learning_rate": 9.264147742187103e-06, "loss": 0.0338, "step": 74420 }, { "epoch": 1.142352850894022, "grad_norm": 0.35447606444358826, "learning_rate": 9.261476296674222e-06, "loss": 0.029, "step": 74430 }, { "epoch": 1.142506331056711, "grad_norm": 0.28103527426719666, "learning_rate": 9.258804904154972e-06, "loss": 0.0284, "step": 74440 }, { "epoch": 1.1426598112194, "grad_norm": 0.27729517221450806, "learning_rate": 9.25613356482104e-06, "loss": 0.0333, "step": 74450 }, { "epoch": 1.142813291382089, "grad_norm": 0.24944934248924255, "learning_rate": 9.253462278864115e-06, "loss": 0.0239, "step": 74460 }, { "epoch": 1.1429667715447778, "grad_norm": 0.3888784945011139, "learning_rate": 9.250791046475878e-06, "loss": 0.0299, "step": 74470 }, { "epoch": 1.1431202517074668, "grad_norm": 0.39429229497909546, "learning_rate": 9.248119867848e-06, "loss": 0.0283, "step": 74480 }, { "epoch": 1.1432737318701558, "grad_norm": 0.38702550530433655, "learning_rate": 9.24544874317216e-06, "loss": 0.0344, "step": 74490 }, { "epoch": 1.1434272120328448, "grad_norm": 0.33090752363204956, "learning_rate": 9.242777672640025e-06, "loss": 0.0349, "step": 74500 }, { "epoch": 1.1435806921955338, "grad_norm": 0.5526117086410522, "learning_rate": 9.240106656443261e-06, "loss": 0.0236, "step": 74510 }, { "epoch": 1.1437341723582226, "grad_norm": 0.32784655690193176, "learning_rate": 9.237435694773537e-06, "loss": 0.0258, "step": 74520 }, { "epoch": 1.1438876525209116, "grad_norm": 0.30623453855514526, "learning_rate": 9.234764787822498e-06, "loss": 0.0232, "step": 74530 }, { "epoch": 1.1440411326836006, "grad_norm": 0.5058662295341492, "learning_rate": 9.23209393578181e-06, "loss": 0.0314, "step": 74540 }, { "epoch": 1.1441946128462896, "grad_norm": 0.5973299145698547, "learning_rate": 9.229423138843115e-06, "loss": 0.0314, "step": 74550 }, { "epoch": 1.1443480930089787, "grad_norm": 0.384752482175827, "learning_rate": 9.226752397198062e-06, "loss": 0.0281, "step": 74560 }, { "epoch": 1.1445015731716675, "grad_norm": 0.5335838198661804, "learning_rate": 9.224081711038297e-06, "loss": 0.0389, "step": 74570 }, { "epoch": 1.1446550533343565, "grad_norm": 0.29850056767463684, "learning_rate": 9.221411080555452e-06, "loss": 0.0337, "step": 74580 }, { "epoch": 1.1448085334970455, "grad_norm": 0.36720502376556396, "learning_rate": 9.218740505941163e-06, "loss": 0.0241, "step": 74590 }, { "epoch": 1.1449620136597345, "grad_norm": 0.5153264403343201, "learning_rate": 9.216069987387062e-06, "loss": 0.0287, "step": 74600 }, { "epoch": 1.1451154938224235, "grad_norm": 0.64336097240448, "learning_rate": 9.213399525084775e-06, "loss": 0.0279, "step": 74610 }, { "epoch": 1.1452689739851125, "grad_norm": 0.29311028122901917, "learning_rate": 9.210729119225926e-06, "loss": 0.0223, "step": 74620 }, { "epoch": 1.1454224541478013, "grad_norm": 0.5171262621879578, "learning_rate": 9.208058770002124e-06, "loss": 0.0255, "step": 74630 }, { "epoch": 1.1455759343104903, "grad_norm": 0.4351862967014313, "learning_rate": 9.205388477604995e-06, "loss": 0.0284, "step": 74640 }, { "epoch": 1.1457294144731793, "grad_norm": 0.30559027194976807, "learning_rate": 9.202718242226137e-06, "loss": 0.0259, "step": 74650 }, { "epoch": 1.1458828946358683, "grad_norm": 0.4500131607055664, "learning_rate": 9.200048064057166e-06, "loss": 0.0308, "step": 74660 }, { "epoch": 1.1460363747985574, "grad_norm": 0.4031173288822174, "learning_rate": 9.197377943289683e-06, "loss": 0.0299, "step": 74670 }, { "epoch": 1.1461898549612464, "grad_norm": 0.3726949095726013, "learning_rate": 9.19470788011528e-06, "loss": 0.0356, "step": 74680 }, { "epoch": 1.1463433351239352, "grad_norm": 0.2890267074108124, "learning_rate": 9.192037874725552e-06, "loss": 0.0345, "step": 74690 }, { "epoch": 1.1464968152866242, "grad_norm": 0.38369038701057434, "learning_rate": 9.18936792731209e-06, "loss": 0.0326, "step": 74700 }, { "epoch": 1.1466502954493132, "grad_norm": 0.47938185930252075, "learning_rate": 9.186698038066478e-06, "loss": 0.034, "step": 74710 }, { "epoch": 1.1468037756120022, "grad_norm": 0.2940155267715454, "learning_rate": 9.184028207180302e-06, "loss": 0.0264, "step": 74720 }, { "epoch": 1.1469572557746912, "grad_norm": 0.2236396074295044, "learning_rate": 9.181358434845133e-06, "loss": 0.0215, "step": 74730 }, { "epoch": 1.14711073593738, "grad_norm": 0.412727415561676, "learning_rate": 9.17868872125254e-06, "loss": 0.0331, "step": 74740 }, { "epoch": 1.147264216100069, "grad_norm": 0.2864064872264862, "learning_rate": 9.176019066594103e-06, "loss": 0.0263, "step": 74750 }, { "epoch": 1.147417696262758, "grad_norm": 0.28282901644706726, "learning_rate": 9.173349471061375e-06, "loss": 0.0273, "step": 74760 }, { "epoch": 1.147571176425447, "grad_norm": 0.4456724524497986, "learning_rate": 9.170679934845925e-06, "loss": 0.0326, "step": 74770 }, { "epoch": 1.147724656588136, "grad_norm": 0.2891733944416046, "learning_rate": 9.168010458139304e-06, "loss": 0.0478, "step": 74780 }, { "epoch": 1.1478781367508248, "grad_norm": 0.27030524611473083, "learning_rate": 9.16534104113306e-06, "loss": 0.024, "step": 74790 }, { "epoch": 1.1480316169135139, "grad_norm": 0.4559406340122223, "learning_rate": 9.162671684018746e-06, "loss": 0.0335, "step": 74800 }, { "epoch": 1.1481850970762029, "grad_norm": 0.2173275351524353, "learning_rate": 9.160002386987903e-06, "loss": 0.0185, "step": 74810 }, { "epoch": 1.1483385772388919, "grad_norm": 0.5076711177825928, "learning_rate": 9.15733315023207e-06, "loss": 0.0309, "step": 74820 }, { "epoch": 1.148492057401581, "grad_norm": 0.3674907982349396, "learning_rate": 9.154663973942782e-06, "loss": 0.0258, "step": 74830 }, { "epoch": 1.14864553756427, "grad_norm": 0.3487178683280945, "learning_rate": 9.151994858311564e-06, "loss": 0.0303, "step": 74840 }, { "epoch": 1.148799017726959, "grad_norm": 0.3716578185558319, "learning_rate": 9.149325803529946e-06, "loss": 0.0352, "step": 74850 }, { "epoch": 1.1489524978896477, "grad_norm": 0.2075238972902298, "learning_rate": 9.146656809789445e-06, "loss": 0.0246, "step": 74860 }, { "epoch": 1.1491059780523367, "grad_norm": 0.29512351751327515, "learning_rate": 9.143987877281588e-06, "loss": 0.0298, "step": 74870 }, { "epoch": 1.1492594582150257, "grad_norm": 0.4359791874885559, "learning_rate": 9.141319006197876e-06, "loss": 0.0372, "step": 74880 }, { "epoch": 1.1494129383777147, "grad_norm": 0.3230442404747009, "learning_rate": 9.13865019672982e-06, "loss": 0.027, "step": 74890 }, { "epoch": 1.1495664185404038, "grad_norm": 0.6691511273384094, "learning_rate": 9.135981449068927e-06, "loss": 0.0368, "step": 74900 }, { "epoch": 1.1497198987030925, "grad_norm": 0.36131560802459717, "learning_rate": 9.133312763406691e-06, "loss": 0.0263, "step": 74910 }, { "epoch": 1.1498733788657816, "grad_norm": 0.30817925930023193, "learning_rate": 9.130644139934614e-06, "loss": 0.0377, "step": 74920 }, { "epoch": 1.1500268590284706, "grad_norm": 0.343239963054657, "learning_rate": 9.127975578844177e-06, "loss": 0.0302, "step": 74930 }, { "epoch": 1.1501803391911596, "grad_norm": 0.5238693356513977, "learning_rate": 9.125307080326873e-06, "loss": 0.0384, "step": 74940 }, { "epoch": 1.1503338193538486, "grad_norm": 0.30962425470352173, "learning_rate": 9.122638644574177e-06, "loss": 0.0329, "step": 74950 }, { "epoch": 1.1504872995165374, "grad_norm": 0.33841216564178467, "learning_rate": 9.119970271777572e-06, "loss": 0.0293, "step": 74960 }, { "epoch": 1.1506407796792264, "grad_norm": 0.5466331839561462, "learning_rate": 9.11730196212853e-06, "loss": 0.0299, "step": 74970 }, { "epoch": 1.1507942598419154, "grad_norm": 0.26431506872177124, "learning_rate": 9.114633715818512e-06, "loss": 0.0276, "step": 74980 }, { "epoch": 1.1509477400046044, "grad_norm": 0.4170367121696472, "learning_rate": 9.111965533038988e-06, "loss": 0.0293, "step": 74990 }, { "epoch": 1.1511012201672934, "grad_norm": 0.4278716444969177, "learning_rate": 9.109297413981411e-06, "loss": 0.0381, "step": 75000 }, { "epoch": 1.1512547003299822, "grad_norm": 0.4129503667354584, "learning_rate": 9.106629358837244e-06, "loss": 0.0265, "step": 75010 }, { "epoch": 1.1514081804926712, "grad_norm": 0.2480185627937317, "learning_rate": 9.103961367797926e-06, "loss": 0.0332, "step": 75020 }, { "epoch": 1.1515616606553603, "grad_norm": 0.3373395800590515, "learning_rate": 9.101293441054904e-06, "loss": 0.0286, "step": 75030 }, { "epoch": 1.1517151408180493, "grad_norm": 0.30202630162239075, "learning_rate": 9.098625578799622e-06, "loss": 0.034, "step": 75040 }, { "epoch": 1.1518686209807383, "grad_norm": 0.3425472378730774, "learning_rate": 9.095957781223513e-06, "loss": 0.0315, "step": 75050 }, { "epoch": 1.1520221011434273, "grad_norm": 0.37786930799484253, "learning_rate": 9.093290048518012e-06, "loss": 0.0308, "step": 75060 }, { "epoch": 1.1521755813061163, "grad_norm": 0.2706800699234009, "learning_rate": 9.09062238087454e-06, "loss": 0.0256, "step": 75070 }, { "epoch": 1.152329061468805, "grad_norm": 0.5323017835617065, "learning_rate": 9.08795477848452e-06, "loss": 0.0322, "step": 75080 }, { "epoch": 1.152482541631494, "grad_norm": 0.4377790689468384, "learning_rate": 9.085287241539372e-06, "loss": 0.0212, "step": 75090 }, { "epoch": 1.1526360217941831, "grad_norm": 0.5507537722587585, "learning_rate": 9.0826197702305e-06, "loss": 0.0255, "step": 75100 }, { "epoch": 1.1527895019568721, "grad_norm": 0.479478120803833, "learning_rate": 9.079952364749326e-06, "loss": 0.0377, "step": 75110 }, { "epoch": 1.1529429821195611, "grad_norm": 0.45690011978149414, "learning_rate": 9.07728502528724e-06, "loss": 0.0341, "step": 75120 }, { "epoch": 1.15309646228225, "grad_norm": 0.6394461393356323, "learning_rate": 9.074617752035642e-06, "loss": 0.0357, "step": 75130 }, { "epoch": 1.153249942444939, "grad_norm": 0.4107765555381775, "learning_rate": 9.07195054518593e-06, "loss": 0.0319, "step": 75140 }, { "epoch": 1.153403422607628, "grad_norm": 0.42043718695640564, "learning_rate": 9.069283404929486e-06, "loss": 0.0346, "step": 75150 }, { "epoch": 1.153556902770317, "grad_norm": 0.3855248987674713, "learning_rate": 9.066616331457705e-06, "loss": 0.0266, "step": 75160 }, { "epoch": 1.153710382933006, "grad_norm": 0.3788760006427765, "learning_rate": 9.063949324961954e-06, "loss": 0.0296, "step": 75170 }, { "epoch": 1.1538638630956948, "grad_norm": 0.41441577672958374, "learning_rate": 9.061282385633611e-06, "loss": 0.0354, "step": 75180 }, { "epoch": 1.1540173432583838, "grad_norm": 0.6575712561607361, "learning_rate": 9.058615513664048e-06, "loss": 0.0289, "step": 75190 }, { "epoch": 1.1541708234210728, "grad_norm": 0.4727346897125244, "learning_rate": 9.055948709244628e-06, "loss": 0.0341, "step": 75200 }, { "epoch": 1.1543243035837618, "grad_norm": 0.41375577449798584, "learning_rate": 9.053281972566711e-06, "loss": 0.0288, "step": 75210 }, { "epoch": 1.1544777837464508, "grad_norm": 0.3901623487472534, "learning_rate": 9.05061530382165e-06, "loss": 0.0289, "step": 75220 }, { "epoch": 1.1546312639091398, "grad_norm": 0.40758827328681946, "learning_rate": 9.047948703200798e-06, "loss": 0.0306, "step": 75230 }, { "epoch": 1.1547847440718286, "grad_norm": 0.3504333198070526, "learning_rate": 9.045282170895496e-06, "loss": 0.0334, "step": 75240 }, { "epoch": 1.1549382242345176, "grad_norm": 0.44286683201789856, "learning_rate": 9.042615707097087e-06, "loss": 0.0354, "step": 75250 }, { "epoch": 1.1550917043972067, "grad_norm": 0.34933650493621826, "learning_rate": 9.039949311996909e-06, "loss": 0.0296, "step": 75260 }, { "epoch": 1.1552451845598957, "grad_norm": 0.35331252217292786, "learning_rate": 9.037282985786283e-06, "loss": 0.0337, "step": 75270 }, { "epoch": 1.1553986647225847, "grad_norm": 0.4211731553077698, "learning_rate": 9.034616728656544e-06, "loss": 0.0373, "step": 75280 }, { "epoch": 1.1555521448852737, "grad_norm": 0.5178981423377991, "learning_rate": 9.031950540799006e-06, "loss": 0.0346, "step": 75290 }, { "epoch": 1.1557056250479625, "grad_norm": 0.42011725902557373, "learning_rate": 9.02928442240499e-06, "loss": 0.0335, "step": 75300 }, { "epoch": 1.1558591052106515, "grad_norm": 0.3231221139431, "learning_rate": 9.026618373665803e-06, "loss": 0.0274, "step": 75310 }, { "epoch": 1.1560125853733405, "grad_norm": 0.37429097294807434, "learning_rate": 9.023952394772748e-06, "loss": 0.0306, "step": 75320 }, { "epoch": 1.1561660655360295, "grad_norm": 0.34029287099838257, "learning_rate": 9.021286485917131e-06, "loss": 0.0316, "step": 75330 }, { "epoch": 1.1563195456987185, "grad_norm": 0.29364171624183655, "learning_rate": 9.018620647290242e-06, "loss": 0.0203, "step": 75340 }, { "epoch": 1.1564730258614073, "grad_norm": 0.47479262948036194, "learning_rate": 9.015954879083377e-06, "loss": 0.0282, "step": 75350 }, { "epoch": 1.1566265060240963, "grad_norm": 0.29395103454589844, "learning_rate": 9.013289181487821e-06, "loss": 0.0264, "step": 75360 }, { "epoch": 1.1567799861867853, "grad_norm": 0.387546569108963, "learning_rate": 9.010623554694848e-06, "loss": 0.0293, "step": 75370 }, { "epoch": 1.1569334663494744, "grad_norm": 0.5139026045799255, "learning_rate": 9.007957998895736e-06, "loss": 0.0369, "step": 75380 }, { "epoch": 1.1570869465121634, "grad_norm": 0.4392985701560974, "learning_rate": 9.005292514281756e-06, "loss": 0.0246, "step": 75390 }, { "epoch": 1.1572404266748522, "grad_norm": 0.3813653886318207, "learning_rate": 9.002627101044174e-06, "loss": 0.0271, "step": 75400 }, { "epoch": 1.1573939068375412, "grad_norm": 0.2949799597263336, "learning_rate": 8.999961759374251e-06, "loss": 0.0283, "step": 75410 }, { "epoch": 1.1575473870002302, "grad_norm": 0.45572784543037415, "learning_rate": 8.997296489463237e-06, "loss": 0.0321, "step": 75420 }, { "epoch": 1.1577008671629192, "grad_norm": 0.3950442969799042, "learning_rate": 8.994631291502384e-06, "loss": 0.0286, "step": 75430 }, { "epoch": 1.1578543473256082, "grad_norm": 0.34110239148139954, "learning_rate": 8.991966165682934e-06, "loss": 0.035, "step": 75440 }, { "epoch": 1.1580078274882972, "grad_norm": 0.3061131238937378, "learning_rate": 8.989301112196129e-06, "loss": 0.0258, "step": 75450 }, { "epoch": 1.158161307650986, "grad_norm": 0.3124973773956299, "learning_rate": 8.98663613123321e-06, "loss": 0.0237, "step": 75460 }, { "epoch": 1.158314787813675, "grad_norm": 0.41105958819389343, "learning_rate": 8.98397122298539e-06, "loss": 0.0277, "step": 75470 }, { "epoch": 1.158468267976364, "grad_norm": 0.3843025267124176, "learning_rate": 8.981306387643903e-06, "loss": 0.0283, "step": 75480 }, { "epoch": 1.158621748139053, "grad_norm": 0.2428053319454193, "learning_rate": 8.978641625399966e-06, "loss": 0.0276, "step": 75490 }, { "epoch": 1.158775228301742, "grad_norm": 0.3958868682384491, "learning_rate": 8.975976936444791e-06, "loss": 0.0302, "step": 75500 }, { "epoch": 1.158928708464431, "grad_norm": 0.36142227053642273, "learning_rate": 8.97331232096959e-06, "loss": 0.0261, "step": 75510 }, { "epoch": 1.1590821886271199, "grad_norm": 0.40676021575927734, "learning_rate": 8.97064777916556e-06, "loss": 0.0407, "step": 75520 }, { "epoch": 1.1592356687898089, "grad_norm": 0.44846734404563904, "learning_rate": 8.967983311223898e-06, "loss": 0.0356, "step": 75530 }, { "epoch": 1.159389148952498, "grad_norm": 0.2926744520664215, "learning_rate": 8.965318917335797e-06, "loss": 0.0267, "step": 75540 }, { "epoch": 1.159542629115187, "grad_norm": 0.500924289226532, "learning_rate": 8.962654597692447e-06, "loss": 0.0378, "step": 75550 }, { "epoch": 1.159696109277876, "grad_norm": 0.23642010986804962, "learning_rate": 8.959990352485031e-06, "loss": 0.0238, "step": 75560 }, { "epoch": 1.1598495894405647, "grad_norm": 0.45408543944358826, "learning_rate": 8.957326181904719e-06, "loss": 0.0273, "step": 75570 }, { "epoch": 1.1600030696032537, "grad_norm": 0.4310537278652191, "learning_rate": 8.954662086142682e-06, "loss": 0.0298, "step": 75580 }, { "epoch": 1.1601565497659427, "grad_norm": 0.31055760383605957, "learning_rate": 8.95199806539009e-06, "loss": 0.0249, "step": 75590 }, { "epoch": 1.1603100299286317, "grad_norm": 0.5384587645530701, "learning_rate": 8.949334119838095e-06, "loss": 0.0293, "step": 75600 }, { "epoch": 1.1604635100913208, "grad_norm": 0.47031107544898987, "learning_rate": 8.946670249677864e-06, "loss": 0.0373, "step": 75610 }, { "epoch": 1.1606169902540096, "grad_norm": 0.3724829852581024, "learning_rate": 8.944006455100536e-06, "loss": 0.0316, "step": 75620 }, { "epoch": 1.1607704704166986, "grad_norm": 0.2509116232395172, "learning_rate": 8.941342736297255e-06, "loss": 0.0318, "step": 75630 }, { "epoch": 1.1609239505793876, "grad_norm": 0.3633476793766022, "learning_rate": 8.938679093459165e-06, "loss": 0.0254, "step": 75640 }, { "epoch": 1.1610774307420766, "grad_norm": 0.43408674001693726, "learning_rate": 8.936015526777393e-06, "loss": 0.0283, "step": 75650 }, { "epoch": 1.1612309109047656, "grad_norm": 0.470915824174881, "learning_rate": 8.933352036443073e-06, "loss": 0.0358, "step": 75660 }, { "epoch": 1.1613843910674546, "grad_norm": 0.3973499536514282, "learning_rate": 8.93068862264732e-06, "loss": 0.0238, "step": 75670 }, { "epoch": 1.1615378712301436, "grad_norm": 0.39193230867385864, "learning_rate": 8.928025285581253e-06, "loss": 0.0235, "step": 75680 }, { "epoch": 1.1616913513928324, "grad_norm": 0.4577527344226837, "learning_rate": 8.925362025435985e-06, "loss": 0.0223, "step": 75690 }, { "epoch": 1.1618448315555214, "grad_norm": 0.45695897936820984, "learning_rate": 8.922698842402616e-06, "loss": 0.0285, "step": 75700 }, { "epoch": 1.1619983117182104, "grad_norm": 0.30518481135368347, "learning_rate": 8.920035736672252e-06, "loss": 0.036, "step": 75710 }, { "epoch": 1.1621517918808995, "grad_norm": 0.26482048630714417, "learning_rate": 8.917372708435986e-06, "loss": 0.0263, "step": 75720 }, { "epoch": 1.1623052720435885, "grad_norm": 0.35996919870376587, "learning_rate": 8.914709757884899e-06, "loss": 0.0347, "step": 75730 }, { "epoch": 1.1624587522062773, "grad_norm": 0.30191347002983093, "learning_rate": 8.912046885210084e-06, "loss": 0.0304, "step": 75740 }, { "epoch": 1.1626122323689663, "grad_norm": 0.4063597619533539, "learning_rate": 8.909384090602616e-06, "loss": 0.0275, "step": 75750 }, { "epoch": 1.1627657125316553, "grad_norm": 0.3635651767253876, "learning_rate": 8.906721374253566e-06, "loss": 0.0299, "step": 75760 }, { "epoch": 1.1629191926943443, "grad_norm": 0.3355743885040283, "learning_rate": 8.904058736353998e-06, "loss": 0.0253, "step": 75770 }, { "epoch": 1.1630726728570333, "grad_norm": 0.3065219819545746, "learning_rate": 8.901396177094975e-06, "loss": 0.024, "step": 75780 }, { "epoch": 1.163226153019722, "grad_norm": 0.25536298751831055, "learning_rate": 8.89873369666755e-06, "loss": 0.0296, "step": 75790 }, { "epoch": 1.163379633182411, "grad_norm": 0.47268247604370117, "learning_rate": 8.896071295262778e-06, "loss": 0.0253, "step": 75800 }, { "epoch": 1.1635331133451001, "grad_norm": 0.388538658618927, "learning_rate": 8.8934089730717e-06, "loss": 0.0369, "step": 75810 }, { "epoch": 1.1636865935077891, "grad_norm": 0.3398614823818207, "learning_rate": 8.89074673028535e-06, "loss": 0.032, "step": 75820 }, { "epoch": 1.1638400736704781, "grad_norm": 0.2156645506620407, "learning_rate": 8.888084567094764e-06, "loss": 0.0221, "step": 75830 }, { "epoch": 1.163993553833167, "grad_norm": 0.2680366039276123, "learning_rate": 8.885422483690965e-06, "loss": 0.0248, "step": 75840 }, { "epoch": 1.164147033995856, "grad_norm": 0.33090725541114807, "learning_rate": 8.88276048026498e-06, "loss": 0.0261, "step": 75850 }, { "epoch": 1.164300514158545, "grad_norm": 0.6916898488998413, "learning_rate": 8.880098557007824e-06, "loss": 0.023, "step": 75860 }, { "epoch": 1.164453994321234, "grad_norm": 0.462747186422348, "learning_rate": 8.877436714110497e-06, "loss": 0.0291, "step": 75870 }, { "epoch": 1.164607474483923, "grad_norm": 0.455254465341568, "learning_rate": 8.874774951764013e-06, "loss": 0.034, "step": 75880 }, { "epoch": 1.164760954646612, "grad_norm": 0.3997974395751953, "learning_rate": 8.872113270159364e-06, "loss": 0.0301, "step": 75890 }, { "epoch": 1.164914434809301, "grad_norm": 0.37625521421432495, "learning_rate": 8.869451669487545e-06, "loss": 0.0275, "step": 75900 }, { "epoch": 1.1650679149719898, "grad_norm": 0.33640339970588684, "learning_rate": 8.866790149939543e-06, "loss": 0.0273, "step": 75910 }, { "epoch": 1.1652213951346788, "grad_norm": 0.40559083223342896, "learning_rate": 8.864128711706334e-06, "loss": 0.0251, "step": 75920 }, { "epoch": 1.1653748752973678, "grad_norm": 0.43447890877723694, "learning_rate": 8.861467354978897e-06, "loss": 0.0222, "step": 75930 }, { "epoch": 1.1655283554600568, "grad_norm": 0.47348034381866455, "learning_rate": 8.858806079948196e-06, "loss": 0.0306, "step": 75940 }, { "epoch": 1.1656818356227459, "grad_norm": 0.3385111689567566, "learning_rate": 8.8561448868052e-06, "loss": 0.0257, "step": 75950 }, { "epoch": 1.1658353157854346, "grad_norm": 0.439026415348053, "learning_rate": 8.853483775740865e-06, "loss": 0.0325, "step": 75960 }, { "epoch": 1.1659887959481237, "grad_norm": 0.3103896677494049, "learning_rate": 8.850822746946135e-06, "loss": 0.0403, "step": 75970 }, { "epoch": 1.1661422761108127, "grad_norm": 0.31193459033966064, "learning_rate": 8.848161800611963e-06, "loss": 0.0412, "step": 75980 }, { "epoch": 1.1662957562735017, "grad_norm": 0.41226494312286377, "learning_rate": 8.845500936929284e-06, "loss": 0.0348, "step": 75990 }, { "epoch": 1.1664492364361907, "grad_norm": 0.3245631456375122, "learning_rate": 8.842840156089033e-06, "loss": 0.0233, "step": 76000 }, { "epoch": 1.1666027165988795, "grad_norm": 0.2969507873058319, "learning_rate": 8.840179458282143e-06, "loss": 0.0278, "step": 76010 }, { "epoch": 1.1667561967615685, "grad_norm": 0.3841850757598877, "learning_rate": 8.837518843699524e-06, "loss": 0.027, "step": 76020 }, { "epoch": 1.1669096769242575, "grad_norm": 0.27342915534973145, "learning_rate": 8.834858312532097e-06, "loss": 0.0284, "step": 76030 }, { "epoch": 1.1670631570869465, "grad_norm": 0.3121521770954132, "learning_rate": 8.832197864970776e-06, "loss": 0.0258, "step": 76040 }, { "epoch": 1.1672166372496355, "grad_norm": 0.33913180232048035, "learning_rate": 8.829537501206456e-06, "loss": 0.0365, "step": 76050 }, { "epoch": 1.1673701174123245, "grad_norm": 0.5147576928138733, "learning_rate": 8.826877221430044e-06, "loss": 0.0332, "step": 76060 }, { "epoch": 1.1675235975750133, "grad_norm": 0.3355438709259033, "learning_rate": 8.824217025832425e-06, "loss": 0.0264, "step": 76070 }, { "epoch": 1.1676770777377024, "grad_norm": 0.44634997844696045, "learning_rate": 8.821556914604484e-06, "loss": 0.0297, "step": 76080 }, { "epoch": 1.1678305579003914, "grad_norm": 0.32948553562164307, "learning_rate": 8.818896887937104e-06, "loss": 0.0347, "step": 76090 }, { "epoch": 1.1679840380630804, "grad_norm": 0.367549866437912, "learning_rate": 8.816236946021153e-06, "loss": 0.0236, "step": 76100 }, { "epoch": 1.1681375182257694, "grad_norm": 0.4407302141189575, "learning_rate": 8.813577089047508e-06, "loss": 0.026, "step": 76110 }, { "epoch": 1.1682909983884584, "grad_norm": 0.4188770651817322, "learning_rate": 8.810917317207022e-06, "loss": 0.0255, "step": 76120 }, { "epoch": 1.1684444785511472, "grad_norm": 0.5385478734970093, "learning_rate": 8.808257630690549e-06, "loss": 0.0251, "step": 76130 }, { "epoch": 1.1685979587138362, "grad_norm": 0.3991734981536865, "learning_rate": 8.805598029688944e-06, "loss": 0.0286, "step": 76140 }, { "epoch": 1.1687514388765252, "grad_norm": 0.420641154050827, "learning_rate": 8.802938514393042e-06, "loss": 0.0331, "step": 76150 }, { "epoch": 1.1689049190392142, "grad_norm": 0.33653852343559265, "learning_rate": 8.800279084993692e-06, "loss": 0.0257, "step": 76160 }, { "epoch": 1.1690583992019032, "grad_norm": 0.6832795143127441, "learning_rate": 8.797619741681712e-06, "loss": 0.0333, "step": 76170 }, { "epoch": 1.169211879364592, "grad_norm": 0.39508071541786194, "learning_rate": 8.79496048464793e-06, "loss": 0.0249, "step": 76180 }, { "epoch": 1.169365359527281, "grad_norm": 0.3158876895904541, "learning_rate": 8.792301314083168e-06, "loss": 0.0203, "step": 76190 }, { "epoch": 1.16951883968997, "grad_norm": 0.399586021900177, "learning_rate": 8.789642230178233e-06, "loss": 0.0231, "step": 76200 }, { "epoch": 1.169672319852659, "grad_norm": 0.3273504972457886, "learning_rate": 8.786983233123936e-06, "loss": 0.0314, "step": 76210 }, { "epoch": 1.169825800015348, "grad_norm": 0.31329795718193054, "learning_rate": 8.78432432311107e-06, "loss": 0.024, "step": 76220 }, { "epoch": 1.1699792801780369, "grad_norm": 0.3482803702354431, "learning_rate": 8.781665500330433e-06, "loss": 0.0276, "step": 76230 }, { "epoch": 1.1701327603407259, "grad_norm": 0.46154916286468506, "learning_rate": 8.779006764972809e-06, "loss": 0.0273, "step": 76240 }, { "epoch": 1.170286240503415, "grad_norm": 0.28319236636161804, "learning_rate": 8.776348117228978e-06, "loss": 0.0323, "step": 76250 }, { "epoch": 1.170439720666104, "grad_norm": 0.4386870861053467, "learning_rate": 8.773689557289722e-06, "loss": 0.0287, "step": 76260 }, { "epoch": 1.170593200828793, "grad_norm": 0.25947555899620056, "learning_rate": 8.7710310853458e-06, "loss": 0.0311, "step": 76270 }, { "epoch": 1.170746680991482, "grad_norm": 0.37171944975852966, "learning_rate": 8.768372701587975e-06, "loss": 0.0313, "step": 76280 }, { "epoch": 1.1709001611541707, "grad_norm": 0.37837350368499756, "learning_rate": 8.765714406207006e-06, "loss": 0.0301, "step": 76290 }, { "epoch": 1.1710536413168597, "grad_norm": 0.3373764753341675, "learning_rate": 8.763056199393642e-06, "loss": 0.0265, "step": 76300 }, { "epoch": 1.1712071214795488, "grad_norm": 0.3628682792186737, "learning_rate": 8.760398081338629e-06, "loss": 0.0284, "step": 76310 }, { "epoch": 1.1713606016422378, "grad_norm": 0.3600509762763977, "learning_rate": 8.75774005223269e-06, "loss": 0.0224, "step": 76320 }, { "epoch": 1.1715140818049268, "grad_norm": 0.27133581042289734, "learning_rate": 8.755082112266568e-06, "loss": 0.0286, "step": 76330 }, { "epoch": 1.1716675619676158, "grad_norm": 0.33071058988571167, "learning_rate": 8.752424261630981e-06, "loss": 0.0284, "step": 76340 }, { "epoch": 1.1718210421303046, "grad_norm": 0.39413905143737793, "learning_rate": 8.74976650051665e-06, "loss": 0.0276, "step": 76350 }, { "epoch": 1.1719745222929936, "grad_norm": 0.3573242723941803, "learning_rate": 8.747108829114284e-06, "loss": 0.0284, "step": 76360 }, { "epoch": 1.1721280024556826, "grad_norm": 0.4478421211242676, "learning_rate": 8.744451247614583e-06, "loss": 0.0277, "step": 76370 }, { "epoch": 1.1722814826183716, "grad_norm": 0.28333738446235657, "learning_rate": 8.74179375620825e-06, "loss": 0.0397, "step": 76380 }, { "epoch": 1.1724349627810606, "grad_norm": 0.4852474629878998, "learning_rate": 8.739136355085974e-06, "loss": 0.0328, "step": 76390 }, { "epoch": 1.1725884429437494, "grad_norm": 0.4090975522994995, "learning_rate": 8.736479044438441e-06, "loss": 0.0282, "step": 76400 }, { "epoch": 1.1727419231064384, "grad_norm": 0.4256402254104614, "learning_rate": 8.733821824456333e-06, "loss": 0.0271, "step": 76410 }, { "epoch": 1.1728954032691274, "grad_norm": 0.4415140450000763, "learning_rate": 8.731164695330312e-06, "loss": 0.0307, "step": 76420 }, { "epoch": 1.1730488834318165, "grad_norm": 0.41925325989723206, "learning_rate": 8.728507657251053e-06, "loss": 0.0296, "step": 76430 }, { "epoch": 1.1732023635945055, "grad_norm": 0.4326484799385071, "learning_rate": 8.72585071040921e-06, "loss": 0.0271, "step": 76440 }, { "epoch": 1.1733558437571943, "grad_norm": 0.46519747376441956, "learning_rate": 8.723193854995437e-06, "loss": 0.032, "step": 76450 }, { "epoch": 1.1735093239198833, "grad_norm": 0.4902990758419037, "learning_rate": 8.720537091200383e-06, "loss": 0.028, "step": 76460 }, { "epoch": 1.1736628040825723, "grad_norm": 0.4511031210422516, "learning_rate": 8.71788041921468e-06, "loss": 0.0293, "step": 76470 }, { "epoch": 1.1738162842452613, "grad_norm": 0.41545379161834717, "learning_rate": 8.715223839228964e-06, "loss": 0.031, "step": 76480 }, { "epoch": 1.1739697644079503, "grad_norm": 0.315703809261322, "learning_rate": 8.71256735143386e-06, "loss": 0.0325, "step": 76490 }, { "epoch": 1.1741232445706393, "grad_norm": 0.39245596528053284, "learning_rate": 8.70991095601999e-06, "loss": 0.0275, "step": 76500 }, { "epoch": 1.1742767247333283, "grad_norm": 0.4322725832462311, "learning_rate": 8.70725465317797e-06, "loss": 0.0291, "step": 76510 }, { "epoch": 1.1744302048960171, "grad_norm": 0.39725446701049805, "learning_rate": 8.704598443098397e-06, "loss": 0.0256, "step": 76520 }, { "epoch": 1.1745836850587061, "grad_norm": 0.3954593241214752, "learning_rate": 8.701942325971875e-06, "loss": 0.029, "step": 76530 }, { "epoch": 1.1747371652213952, "grad_norm": 0.7062051296234131, "learning_rate": 8.699286301988996e-06, "loss": 0.0306, "step": 76540 }, { "epoch": 1.1748906453840842, "grad_norm": 0.23895934224128723, "learning_rate": 8.696630371340346e-06, "loss": 0.0243, "step": 76550 }, { "epoch": 1.1750441255467732, "grad_norm": 0.35922110080718994, "learning_rate": 8.693974534216513e-06, "loss": 0.0318, "step": 76560 }, { "epoch": 1.175197605709462, "grad_norm": 0.3508399724960327, "learning_rate": 8.691318790808054e-06, "loss": 0.0271, "step": 76570 }, { "epoch": 1.175351085872151, "grad_norm": 0.524699866771698, "learning_rate": 8.688663141305544e-06, "loss": 0.0252, "step": 76580 }, { "epoch": 1.17550456603484, "grad_norm": 0.6130597591400146, "learning_rate": 8.686007585899542e-06, "loss": 0.0341, "step": 76590 }, { "epoch": 1.175658046197529, "grad_norm": 0.2785404622554779, "learning_rate": 8.6833521247806e-06, "loss": 0.0245, "step": 76600 }, { "epoch": 1.175811526360218, "grad_norm": 0.4162721633911133, "learning_rate": 8.680696758139266e-06, "loss": 0.0354, "step": 76610 }, { "epoch": 1.1759650065229068, "grad_norm": 0.3402421176433563, "learning_rate": 8.678041486166074e-06, "loss": 0.0271, "step": 76620 }, { "epoch": 1.1761184866855958, "grad_norm": 0.5197330713272095, "learning_rate": 8.675386309051557e-06, "loss": 0.0276, "step": 76630 }, { "epoch": 1.1762719668482848, "grad_norm": 0.45088282227516174, "learning_rate": 8.672731226986242e-06, "loss": 0.0287, "step": 76640 }, { "epoch": 1.1764254470109738, "grad_norm": 0.5044585466384888, "learning_rate": 8.670076240160647e-06, "loss": 0.0357, "step": 76650 }, { "epoch": 1.1765789271736629, "grad_norm": 0.26412785053253174, "learning_rate": 8.667421348765288e-06, "loss": 0.0225, "step": 76660 }, { "epoch": 1.1767324073363519, "grad_norm": 0.3539689779281616, "learning_rate": 8.664766552990663e-06, "loss": 0.0314, "step": 76670 }, { "epoch": 1.1768858874990407, "grad_norm": 0.3306961953639984, "learning_rate": 8.66211185302727e-06, "loss": 0.0273, "step": 76680 }, { "epoch": 1.1770393676617297, "grad_norm": 0.43799635767936707, "learning_rate": 8.659457249065606e-06, "loss": 0.0295, "step": 76690 }, { "epoch": 1.1771928478244187, "grad_norm": 0.42428743839263916, "learning_rate": 8.65680274129615e-06, "loss": 0.0364, "step": 76700 }, { "epoch": 1.1773463279871077, "grad_norm": 0.37313440442085266, "learning_rate": 8.654148329909386e-06, "loss": 0.0284, "step": 76710 }, { "epoch": 1.1774998081497967, "grad_norm": 0.3300449848175049, "learning_rate": 8.651494015095776e-06, "loss": 0.0245, "step": 76720 }, { "epoch": 1.1776532883124857, "grad_norm": 0.2975548207759857, "learning_rate": 8.648839797045787e-06, "loss": 0.0257, "step": 76730 }, { "epoch": 1.1778067684751745, "grad_norm": 0.5004605054855347, "learning_rate": 8.646185675949876e-06, "loss": 0.0293, "step": 76740 }, { "epoch": 1.1779602486378635, "grad_norm": 0.4610145390033722, "learning_rate": 8.643531651998491e-06, "loss": 0.0255, "step": 76750 }, { "epoch": 1.1781137288005525, "grad_norm": 0.5074883103370667, "learning_rate": 8.64087772538208e-06, "loss": 0.0352, "step": 76760 }, { "epoch": 1.1782672089632416, "grad_norm": 0.42758139967918396, "learning_rate": 8.638223896291072e-06, "loss": 0.0332, "step": 76770 }, { "epoch": 1.1784206891259306, "grad_norm": 0.4023101031780243, "learning_rate": 8.635570164915897e-06, "loss": 0.0295, "step": 76780 }, { "epoch": 1.1785741692886194, "grad_norm": 0.3809380531311035, "learning_rate": 8.63291653144698e-06, "loss": 0.0284, "step": 76790 }, { "epoch": 1.1787276494513084, "grad_norm": 0.2951186001300812, "learning_rate": 8.63026299607473e-06, "loss": 0.0242, "step": 76800 }, { "epoch": 1.1788811296139974, "grad_norm": 0.43648046255111694, "learning_rate": 8.627609558989561e-06, "loss": 0.0321, "step": 76810 }, { "epoch": 1.1790346097766864, "grad_norm": 0.42367613315582275, "learning_rate": 8.62495622038187e-06, "loss": 0.0343, "step": 76820 }, { "epoch": 1.1791880899393754, "grad_norm": 0.2837218940258026, "learning_rate": 8.622302980442049e-06, "loss": 0.023, "step": 76830 }, { "epoch": 1.1793415701020642, "grad_norm": 0.6557644605636597, "learning_rate": 8.619649839360482e-06, "loss": 0.0247, "step": 76840 }, { "epoch": 1.1794950502647532, "grad_norm": 0.4300554394721985, "learning_rate": 8.616996797327558e-06, "loss": 0.0292, "step": 76850 }, { "epoch": 1.1796485304274422, "grad_norm": 0.32030189037323, "learning_rate": 8.614343854533645e-06, "loss": 0.0221, "step": 76860 }, { "epoch": 1.1798020105901312, "grad_norm": 0.4502222239971161, "learning_rate": 8.611691011169101e-06, "loss": 0.0308, "step": 76870 }, { "epoch": 1.1799554907528202, "grad_norm": 0.4410858452320099, "learning_rate": 8.609038267424292e-06, "loss": 0.0305, "step": 76880 }, { "epoch": 1.1801089709155093, "grad_norm": 0.299655556678772, "learning_rate": 8.606385623489563e-06, "loss": 0.025, "step": 76890 }, { "epoch": 1.180262451078198, "grad_norm": 0.3654332458972931, "learning_rate": 8.603733079555265e-06, "loss": 0.0303, "step": 76900 }, { "epoch": 1.180415931240887, "grad_norm": 0.3883744180202484, "learning_rate": 8.60108063581173e-06, "loss": 0.0229, "step": 76910 }, { "epoch": 1.180569411403576, "grad_norm": 0.40734073519706726, "learning_rate": 8.598428292449283e-06, "loss": 0.0236, "step": 76920 }, { "epoch": 1.180722891566265, "grad_norm": 0.34531283378601074, "learning_rate": 8.595776049658253e-06, "loss": 0.0248, "step": 76930 }, { "epoch": 1.180876371728954, "grad_norm": 0.28025057911872864, "learning_rate": 8.593123907628948e-06, "loss": 0.0265, "step": 76940 }, { "epoch": 1.1810298518916431, "grad_norm": 0.2939395606517792, "learning_rate": 8.590471866551682e-06, "loss": 0.0353, "step": 76950 }, { "epoch": 1.181183332054332, "grad_norm": 0.3187776207923889, "learning_rate": 8.587819926616756e-06, "loss": 0.028, "step": 76960 }, { "epoch": 1.181336812217021, "grad_norm": 0.22460567951202393, "learning_rate": 8.585168088014454e-06, "loss": 0.0261, "step": 76970 }, { "epoch": 1.18149029237971, "grad_norm": 0.3855138421058655, "learning_rate": 8.582516350935073e-06, "loss": 0.034, "step": 76980 }, { "epoch": 1.181643772542399, "grad_norm": 0.3696000874042511, "learning_rate": 8.579864715568879e-06, "loss": 0.0285, "step": 76990 }, { "epoch": 1.181797252705088, "grad_norm": 0.2960915267467499, "learning_rate": 8.577213182106155e-06, "loss": 0.0245, "step": 77000 }, { "epoch": 1.1819507328677767, "grad_norm": 0.31072208285331726, "learning_rate": 8.574561750737162e-06, "loss": 0.0249, "step": 77010 }, { "epoch": 1.1821042130304658, "grad_norm": 0.3402506113052368, "learning_rate": 8.571910421652151e-06, "loss": 0.0281, "step": 77020 }, { "epoch": 1.1822576931931548, "grad_norm": 0.2890252470970154, "learning_rate": 8.569259195041376e-06, "loss": 0.0259, "step": 77030 }, { "epoch": 1.1824111733558438, "grad_norm": 0.4284852147102356, "learning_rate": 8.566608071095076e-06, "loss": 0.0333, "step": 77040 }, { "epoch": 1.1825646535185328, "grad_norm": 0.4373025894165039, "learning_rate": 8.56395705000349e-06, "loss": 0.0312, "step": 77050 }, { "epoch": 1.1827181336812216, "grad_norm": 0.38443025946617126, "learning_rate": 8.561306131956842e-06, "loss": 0.0273, "step": 77060 }, { "epoch": 1.1828716138439106, "grad_norm": 0.2360374927520752, "learning_rate": 8.55865531714535e-06, "loss": 0.0269, "step": 77070 }, { "epoch": 1.1830250940065996, "grad_norm": 0.398088276386261, "learning_rate": 8.556004605759229e-06, "loss": 0.0314, "step": 77080 }, { "epoch": 1.1831785741692886, "grad_norm": 0.43606260418891907, "learning_rate": 8.553353997988682e-06, "loss": 0.0238, "step": 77090 }, { "epoch": 1.1833320543319776, "grad_norm": 0.30644723773002625, "learning_rate": 8.550703494023907e-06, "loss": 0.0255, "step": 77100 }, { "epoch": 1.1834855344946666, "grad_norm": 0.48629841208457947, "learning_rate": 8.5480530940551e-06, "loss": 0.0167, "step": 77110 }, { "epoch": 1.1836390146573557, "grad_norm": 0.3829282224178314, "learning_rate": 8.545402798272437e-06, "loss": 0.0307, "step": 77120 }, { "epoch": 1.1837924948200444, "grad_norm": 0.38426679372787476, "learning_rate": 8.542752606866089e-06, "loss": 0.0263, "step": 77130 }, { "epoch": 1.1839459749827335, "grad_norm": 0.35423269867897034, "learning_rate": 8.540102520026234e-06, "loss": 0.0416, "step": 77140 }, { "epoch": 1.1840994551454225, "grad_norm": 0.4262358546257019, "learning_rate": 8.537452537943023e-06, "loss": 0.0338, "step": 77150 }, { "epoch": 1.1842529353081115, "grad_norm": 0.35297200083732605, "learning_rate": 8.534802660806618e-06, "loss": 0.0233, "step": 77160 }, { "epoch": 1.1844064154708005, "grad_norm": 0.34546929597854614, "learning_rate": 8.532152888807155e-06, "loss": 0.0328, "step": 77170 }, { "epoch": 1.1845598956334893, "grad_norm": 0.34438395500183105, "learning_rate": 8.529503222134773e-06, "loss": 0.0244, "step": 77180 }, { "epoch": 1.1847133757961783, "grad_norm": 0.4141579270362854, "learning_rate": 8.526853660979609e-06, "loss": 0.0314, "step": 77190 }, { "epoch": 1.1848668559588673, "grad_norm": 0.39889904856681824, "learning_rate": 8.524204205531775e-06, "loss": 0.0257, "step": 77200 }, { "epoch": 1.1850203361215563, "grad_norm": 0.29464077949523926, "learning_rate": 8.521554855981397e-06, "loss": 0.0231, "step": 77210 }, { "epoch": 1.1851738162842453, "grad_norm": 0.3497016727924347, "learning_rate": 8.518905612518573e-06, "loss": 0.0246, "step": 77220 }, { "epoch": 1.1853272964469341, "grad_norm": 0.532545268535614, "learning_rate": 8.516256475333404e-06, "loss": 0.0253, "step": 77230 }, { "epoch": 1.1854807766096231, "grad_norm": 0.3097689747810364, "learning_rate": 8.513607444615985e-06, "loss": 0.0281, "step": 77240 }, { "epoch": 1.1856342567723122, "grad_norm": 0.37851351499557495, "learning_rate": 8.510958520556399e-06, "loss": 0.0326, "step": 77250 }, { "epoch": 1.1857877369350012, "grad_norm": 0.4619382619857788, "learning_rate": 8.508309703344725e-06, "loss": 0.0314, "step": 77260 }, { "epoch": 1.1859412170976902, "grad_norm": 0.3000585436820984, "learning_rate": 8.50566099317103e-06, "loss": 0.0227, "step": 77270 }, { "epoch": 1.186094697260379, "grad_norm": 0.5137686729431152, "learning_rate": 8.503012390225371e-06, "loss": 0.0291, "step": 77280 }, { "epoch": 1.186248177423068, "grad_norm": 0.5091051459312439, "learning_rate": 8.500363894697807e-06, "loss": 0.0226, "step": 77290 }, { "epoch": 1.186401657585757, "grad_norm": 0.4739089906215668, "learning_rate": 8.497715506778382e-06, "loss": 0.0302, "step": 77300 }, { "epoch": 1.186555137748446, "grad_norm": 0.47933030128479004, "learning_rate": 8.495067226657137e-06, "loss": 0.0359, "step": 77310 }, { "epoch": 1.186708617911135, "grad_norm": 0.41367822885513306, "learning_rate": 8.492419054524097e-06, "loss": 0.0217, "step": 77320 }, { "epoch": 1.186862098073824, "grad_norm": 0.39134496450424194, "learning_rate": 8.489770990569288e-06, "loss": 0.0275, "step": 77330 }, { "epoch": 1.187015578236513, "grad_norm": 0.3682393431663513, "learning_rate": 8.487123034982727e-06, "loss": 0.0295, "step": 77340 }, { "epoch": 1.1871690583992018, "grad_norm": 0.47096705436706543, "learning_rate": 8.484475187954415e-06, "loss": 0.0227, "step": 77350 }, { "epoch": 1.1873225385618909, "grad_norm": 0.3041932284832001, "learning_rate": 8.48182744967436e-06, "loss": 0.0234, "step": 77360 }, { "epoch": 1.1874760187245799, "grad_norm": 0.3076659142971039, "learning_rate": 8.479179820332544e-06, "loss": 0.029, "step": 77370 }, { "epoch": 1.1876294988872689, "grad_norm": 0.32918310165405273, "learning_rate": 8.476532300118959e-06, "loss": 0.0311, "step": 77380 }, { "epoch": 1.1877829790499579, "grad_norm": 0.32406434416770935, "learning_rate": 8.473884889223574e-06, "loss": 0.0211, "step": 77390 }, { "epoch": 1.1879364592126467, "grad_norm": 0.3369094431400299, "learning_rate": 8.471237587836362e-06, "loss": 0.0243, "step": 77400 }, { "epoch": 1.1880899393753357, "grad_norm": 0.6456558704376221, "learning_rate": 8.468590396147284e-06, "loss": 0.033, "step": 77410 }, { "epoch": 1.1882434195380247, "grad_norm": 0.44859060645103455, "learning_rate": 8.465943314346286e-06, "loss": 0.0303, "step": 77420 }, { "epoch": 1.1883968997007137, "grad_norm": 0.379311740398407, "learning_rate": 8.463296342623318e-06, "loss": 0.0226, "step": 77430 }, { "epoch": 1.1885503798634027, "grad_norm": 0.29593151807785034, "learning_rate": 8.460649481168313e-06, "loss": 0.036, "step": 77440 }, { "epoch": 1.1887038600260915, "grad_norm": 0.386051744222641, "learning_rate": 8.458002730171205e-06, "loss": 0.0275, "step": 77450 }, { "epoch": 1.1888573401887805, "grad_norm": 0.28543218970298767, "learning_rate": 8.455356089821911e-06, "loss": 0.0298, "step": 77460 }, { "epoch": 1.1890108203514695, "grad_norm": 0.4119817912578583, "learning_rate": 8.452709560310342e-06, "loss": 0.0328, "step": 77470 }, { "epoch": 1.1891643005141586, "grad_norm": 0.4245769679546356, "learning_rate": 8.450063141826405e-06, "loss": 0.023, "step": 77480 }, { "epoch": 1.1893177806768476, "grad_norm": 0.4062556326389313, "learning_rate": 8.447416834559994e-06, "loss": 0.0271, "step": 77490 }, { "epoch": 1.1894712608395366, "grad_norm": 0.37280356884002686, "learning_rate": 8.444770638701005e-06, "loss": 0.0367, "step": 77500 }, { "epoch": 1.1896247410022254, "grad_norm": 0.5095924139022827, "learning_rate": 8.442124554439314e-06, "loss": 0.0284, "step": 77510 }, { "epoch": 1.1897782211649144, "grad_norm": 0.33508825302124023, "learning_rate": 8.439478581964792e-06, "loss": 0.0329, "step": 77520 }, { "epoch": 1.1899317013276034, "grad_norm": 0.3984353244304657, "learning_rate": 8.436832721467309e-06, "loss": 0.0262, "step": 77530 }, { "epoch": 1.1900851814902924, "grad_norm": 0.36736029386520386, "learning_rate": 8.434186973136715e-06, "loss": 0.0312, "step": 77540 }, { "epoch": 1.1902386616529814, "grad_norm": 0.4139922857284546, "learning_rate": 8.431541337162865e-06, "loss": 0.0254, "step": 77550 }, { "epoch": 1.1903921418156704, "grad_norm": 0.264842689037323, "learning_rate": 8.428895813735599e-06, "loss": 0.0355, "step": 77560 }, { "epoch": 1.1905456219783592, "grad_norm": 0.3826909363269806, "learning_rate": 8.426250403044745e-06, "loss": 0.0266, "step": 77570 }, { "epoch": 1.1906991021410482, "grad_norm": 0.4015151858329773, "learning_rate": 8.423605105280131e-06, "loss": 0.0453, "step": 77580 }, { "epoch": 1.1908525823037373, "grad_norm": 0.37690070271492004, "learning_rate": 8.420959920631573e-06, "loss": 0.0303, "step": 77590 }, { "epoch": 1.1910060624664263, "grad_norm": 0.3928833603858948, "learning_rate": 8.41831484928888e-06, "loss": 0.0324, "step": 77600 }, { "epoch": 1.1911595426291153, "grad_norm": 0.32391858100891113, "learning_rate": 8.415669891441853e-06, "loss": 0.0248, "step": 77610 }, { "epoch": 1.191313022791804, "grad_norm": 0.2725520133972168, "learning_rate": 8.41302504728028e-06, "loss": 0.0356, "step": 77620 }, { "epoch": 1.191466502954493, "grad_norm": 0.6003682613372803, "learning_rate": 8.410380316993948e-06, "loss": 0.0337, "step": 77630 }, { "epoch": 1.191619983117182, "grad_norm": 0.4515671133995056, "learning_rate": 8.40773570077263e-06, "loss": 0.0278, "step": 77640 }, { "epoch": 1.191773463279871, "grad_norm": 0.5924997329711914, "learning_rate": 8.405091198806097e-06, "loss": 0.0353, "step": 77650 }, { "epoch": 1.1919269434425601, "grad_norm": 0.39455538988113403, "learning_rate": 8.40244681128411e-06, "loss": 0.0264, "step": 77660 }, { "epoch": 1.192080423605249, "grad_norm": 0.2981894910335541, "learning_rate": 8.399802538396416e-06, "loss": 0.0258, "step": 77670 }, { "epoch": 1.192233903767938, "grad_norm": 0.4434385299682617, "learning_rate": 8.397158380332755e-06, "loss": 0.0315, "step": 77680 }, { "epoch": 1.192387383930627, "grad_norm": 0.21007207036018372, "learning_rate": 8.394514337282869e-06, "loss": 0.0304, "step": 77690 }, { "epoch": 1.192540864093316, "grad_norm": 0.3467977046966553, "learning_rate": 8.391870409436478e-06, "loss": 0.0326, "step": 77700 }, { "epoch": 1.192694344256005, "grad_norm": 0.29137152433395386, "learning_rate": 8.389226596983309e-06, "loss": 0.0298, "step": 77710 }, { "epoch": 1.192847824418694, "grad_norm": 0.48196592926979065, "learning_rate": 8.386582900113062e-06, "loss": 0.0304, "step": 77720 }, { "epoch": 1.1930013045813828, "grad_norm": 0.3173297941684723, "learning_rate": 8.383939319015442e-06, "loss": 0.0291, "step": 77730 }, { "epoch": 1.1931547847440718, "grad_norm": 0.25678810477256775, "learning_rate": 8.381295853880143e-06, "loss": 0.0244, "step": 77740 }, { "epoch": 1.1933082649067608, "grad_norm": 0.3685513138771057, "learning_rate": 8.37865250489685e-06, "loss": 0.0275, "step": 77750 }, { "epoch": 1.1934617450694498, "grad_norm": 0.4342815577983856, "learning_rate": 8.376009272255241e-06, "loss": 0.0316, "step": 77760 }, { "epoch": 1.1936152252321388, "grad_norm": 0.299193799495697, "learning_rate": 8.373366156144981e-06, "loss": 0.0345, "step": 77770 }, { "epoch": 1.1937687053948278, "grad_norm": 0.33581534028053284, "learning_rate": 8.37072315675573e-06, "loss": 0.0284, "step": 77780 }, { "epoch": 1.1939221855575166, "grad_norm": 0.5894938707351685, "learning_rate": 8.368080274277143e-06, "loss": 0.0321, "step": 77790 }, { "epoch": 1.1940756657202056, "grad_norm": 0.3272929787635803, "learning_rate": 8.365437508898857e-06, "loss": 0.0204, "step": 77800 }, { "epoch": 1.1942291458828946, "grad_norm": 0.30007630586624146, "learning_rate": 8.362794860810517e-06, "loss": 0.0212, "step": 77810 }, { "epoch": 1.1943826260455837, "grad_norm": 0.480278342962265, "learning_rate": 8.360152330201741e-06, "loss": 0.0355, "step": 77820 }, { "epoch": 1.1945361062082727, "grad_norm": 0.32367992401123047, "learning_rate": 8.357509917262147e-06, "loss": 0.0329, "step": 77830 }, { "epoch": 1.1946895863709615, "grad_norm": 0.28591492772102356, "learning_rate": 8.354867622181346e-06, "loss": 0.0375, "step": 77840 }, { "epoch": 1.1948430665336505, "grad_norm": 0.3458956778049469, "learning_rate": 8.352225445148939e-06, "loss": 0.0225, "step": 77850 }, { "epoch": 1.1949965466963395, "grad_norm": 0.3641754686832428, "learning_rate": 8.349583386354523e-06, "loss": 0.0287, "step": 77860 }, { "epoch": 1.1951500268590285, "grad_norm": 0.35188132524490356, "learning_rate": 8.346941445987677e-06, "loss": 0.0262, "step": 77870 }, { "epoch": 1.1953035070217175, "grad_norm": 0.37611356377601624, "learning_rate": 8.344299624237972e-06, "loss": 0.0324, "step": 77880 }, { "epoch": 1.1954569871844063, "grad_norm": 0.4168667495250702, "learning_rate": 8.341657921294986e-06, "loss": 0.0294, "step": 77890 }, { "epoch": 1.1956104673470953, "grad_norm": 0.4495440125465393, "learning_rate": 8.339016337348267e-06, "loss": 0.0321, "step": 77900 }, { "epoch": 1.1957639475097843, "grad_norm": 0.2856307923793793, "learning_rate": 8.336374872587375e-06, "loss": 0.0324, "step": 77910 }, { "epoch": 1.1959174276724733, "grad_norm": 0.32273367047309875, "learning_rate": 8.333733527201841e-06, "loss": 0.0247, "step": 77920 }, { "epoch": 1.1960709078351623, "grad_norm": 0.34940770268440247, "learning_rate": 8.331092301381206e-06, "loss": 0.0268, "step": 77930 }, { "epoch": 1.1962243879978514, "grad_norm": 0.33207792043685913, "learning_rate": 8.328451195314989e-06, "loss": 0.0279, "step": 77940 }, { "epoch": 1.1963778681605404, "grad_norm": 0.474071741104126, "learning_rate": 8.325810209192711e-06, "loss": 0.0282, "step": 77950 }, { "epoch": 1.1965313483232292, "grad_norm": 0.31512948870658875, "learning_rate": 8.323169343203876e-06, "loss": 0.0335, "step": 77960 }, { "epoch": 1.1966848284859182, "grad_norm": 0.42005181312561035, "learning_rate": 8.320528597537979e-06, "loss": 0.0269, "step": 77970 }, { "epoch": 1.1968383086486072, "grad_norm": 0.3887518644332886, "learning_rate": 8.317887972384514e-06, "loss": 0.0293, "step": 77980 }, { "epoch": 1.1969917888112962, "grad_norm": 0.42782559990882874, "learning_rate": 8.31524746793296e-06, "loss": 0.035, "step": 77990 }, { "epoch": 1.1971452689739852, "grad_norm": 0.2546781599521637, "learning_rate": 8.312607084372794e-06, "loss": 0.0333, "step": 78000 }, { "epoch": 1.197298749136674, "grad_norm": 0.2622191309928894, "learning_rate": 8.309966821893477e-06, "loss": 0.0268, "step": 78010 }, { "epoch": 1.197452229299363, "grad_norm": 0.44690218567848206, "learning_rate": 8.30732668068446e-06, "loss": 0.0235, "step": 78020 }, { "epoch": 1.197605709462052, "grad_norm": 0.33866196870803833, "learning_rate": 8.304686660935196e-06, "loss": 0.0255, "step": 78030 }, { "epoch": 1.197759189624741, "grad_norm": 0.35658353567123413, "learning_rate": 8.302046762835118e-06, "loss": 0.0269, "step": 78040 }, { "epoch": 1.19791266978743, "grad_norm": 0.2343120574951172, "learning_rate": 8.299406986573657e-06, "loss": 0.0401, "step": 78050 }, { "epoch": 1.1980661499501188, "grad_norm": 0.40870460867881775, "learning_rate": 8.296767332340236e-06, "loss": 0.0294, "step": 78060 }, { "epoch": 1.1982196301128079, "grad_norm": 0.25385016202926636, "learning_rate": 8.294127800324259e-06, "loss": 0.0234, "step": 78070 }, { "epoch": 1.1983731102754969, "grad_norm": 0.3718556761741638, "learning_rate": 8.291488390715135e-06, "loss": 0.0339, "step": 78080 }, { "epoch": 1.1985265904381859, "grad_norm": 0.4212234914302826, "learning_rate": 8.288849103702254e-06, "loss": 0.0286, "step": 78090 }, { "epoch": 1.198680070600875, "grad_norm": 0.3441188931465149, "learning_rate": 8.286209939475004e-06, "loss": 0.0221, "step": 78100 }, { "epoch": 1.198833550763564, "grad_norm": 0.2969362735748291, "learning_rate": 8.283570898222766e-06, "loss": 0.0331, "step": 78110 }, { "epoch": 1.1989870309262527, "grad_norm": 0.4717599153518677, "learning_rate": 8.280931980134895e-06, "loss": 0.0266, "step": 78120 }, { "epoch": 1.1991405110889417, "grad_norm": 0.370784193277359, "learning_rate": 8.27829318540076e-06, "loss": 0.0242, "step": 78130 }, { "epoch": 1.1992939912516307, "grad_norm": 0.38458359241485596, "learning_rate": 8.275654514209704e-06, "loss": 0.0346, "step": 78140 }, { "epoch": 1.1994474714143197, "grad_norm": 0.6126011610031128, "learning_rate": 8.273015966751074e-06, "loss": 0.0319, "step": 78150 }, { "epoch": 1.1996009515770087, "grad_norm": 0.3567821979522705, "learning_rate": 8.270377543214203e-06, "loss": 0.0233, "step": 78160 }, { "epoch": 1.1997544317396978, "grad_norm": 0.39271247386932373, "learning_rate": 8.267739243788404e-06, "loss": 0.0273, "step": 78170 }, { "epoch": 1.1999079119023865, "grad_norm": 0.3300018906593323, "learning_rate": 8.265101068662999e-06, "loss": 0.0256, "step": 78180 }, { "epoch": 1.2000613920650756, "grad_norm": 0.42516160011291504, "learning_rate": 8.262463018027293e-06, "loss": 0.0279, "step": 78190 }, { "epoch": 1.2002148722277646, "grad_norm": 0.3462461531162262, "learning_rate": 8.259825092070586e-06, "loss": 0.0254, "step": 78200 }, { "epoch": 1.2003683523904536, "grad_norm": 0.5233986973762512, "learning_rate": 8.257187290982156e-06, "loss": 0.0348, "step": 78210 }, { "epoch": 1.2005218325531426, "grad_norm": 0.2925083339214325, "learning_rate": 8.254549614951288e-06, "loss": 0.0299, "step": 78220 }, { "epoch": 1.2006753127158314, "grad_norm": 0.3010175824165344, "learning_rate": 8.251912064167246e-06, "loss": 0.0281, "step": 78230 }, { "epoch": 1.2008287928785204, "grad_norm": 0.415828675031662, "learning_rate": 8.249274638819299e-06, "loss": 0.0308, "step": 78240 }, { "epoch": 1.2009822730412094, "grad_norm": 0.4848349392414093, "learning_rate": 8.246637339096696e-06, "loss": 0.0301, "step": 78250 }, { "epoch": 1.2011357532038984, "grad_norm": 0.3747584819793701, "learning_rate": 8.244000165188673e-06, "loss": 0.0262, "step": 78260 }, { "epoch": 1.2012892333665874, "grad_norm": 0.3670019209384918, "learning_rate": 8.24136311728447e-06, "loss": 0.023, "step": 78270 }, { "epoch": 1.2014427135292762, "grad_norm": 0.5833579301834106, "learning_rate": 8.238726195573308e-06, "loss": 0.0274, "step": 78280 }, { "epoch": 1.2015961936919652, "grad_norm": 0.21450772881507874, "learning_rate": 8.236089400244407e-06, "loss": 0.022, "step": 78290 }, { "epoch": 1.2017496738546543, "grad_norm": 0.40797367691993713, "learning_rate": 8.233452731486971e-06, "loss": 0.0234, "step": 78300 }, { "epoch": 1.2019031540173433, "grad_norm": 0.444044291973114, "learning_rate": 8.230816189490191e-06, "loss": 0.0255, "step": 78310 }, { "epoch": 1.2020566341800323, "grad_norm": 0.4500991702079773, "learning_rate": 8.228179774443265e-06, "loss": 0.0338, "step": 78320 }, { "epoch": 1.2022101143427213, "grad_norm": 0.43744462728500366, "learning_rate": 8.225543486535366e-06, "loss": 0.0318, "step": 78330 }, { "epoch": 1.20236359450541, "grad_norm": 0.20966815948486328, "learning_rate": 8.222907325955666e-06, "loss": 0.0294, "step": 78340 }, { "epoch": 1.202517074668099, "grad_norm": 0.37557995319366455, "learning_rate": 8.220271292893328e-06, "loss": 0.0228, "step": 78350 }, { "epoch": 1.202670554830788, "grad_norm": 0.5071738362312317, "learning_rate": 8.217635387537497e-06, "loss": 0.0424, "step": 78360 }, { "epoch": 1.2028240349934771, "grad_norm": 0.37261587381362915, "learning_rate": 8.21499961007732e-06, "loss": 0.0306, "step": 78370 }, { "epoch": 1.2029775151561661, "grad_norm": 0.3227494955062866, "learning_rate": 8.21236396070193e-06, "loss": 0.034, "step": 78380 }, { "epoch": 1.2031309953188551, "grad_norm": 0.2708671987056732, "learning_rate": 8.209728439600452e-06, "loss": 0.0267, "step": 78390 }, { "epoch": 1.203284475481544, "grad_norm": 0.4601857662200928, "learning_rate": 8.207093046962003e-06, "loss": 0.0275, "step": 78400 }, { "epoch": 1.203437955644233, "grad_norm": 0.31651216745376587, "learning_rate": 8.20445778297568e-06, "loss": 0.0262, "step": 78410 }, { "epoch": 1.203591435806922, "grad_norm": 0.372829407453537, "learning_rate": 8.201822647830587e-06, "loss": 0.0241, "step": 78420 }, { "epoch": 1.203744915969611, "grad_norm": 0.43521931767463684, "learning_rate": 8.199187641715809e-06, "loss": 0.0259, "step": 78430 }, { "epoch": 1.2038983961323, "grad_norm": 0.367744117975235, "learning_rate": 8.19655276482042e-06, "loss": 0.0206, "step": 78440 }, { "epoch": 1.2040518762949888, "grad_norm": 0.3108411133289337, "learning_rate": 8.193918017333501e-06, "loss": 0.0315, "step": 78450 }, { "epoch": 1.2042053564576778, "grad_norm": 0.4466855227947235, "learning_rate": 8.191283399444098e-06, "loss": 0.0364, "step": 78460 }, { "epoch": 1.2043588366203668, "grad_norm": 0.47577857971191406, "learning_rate": 8.188648911341265e-06, "loss": 0.0346, "step": 78470 }, { "epoch": 1.2045123167830558, "grad_norm": 0.5429377555847168, "learning_rate": 8.186014553214046e-06, "loss": 0.0276, "step": 78480 }, { "epoch": 1.2046657969457448, "grad_norm": 0.29111433029174805, "learning_rate": 8.183380325251469e-06, "loss": 0.0306, "step": 78490 }, { "epoch": 1.2048192771084336, "grad_norm": 0.4110618531703949, "learning_rate": 8.180746227642561e-06, "loss": 0.027, "step": 78500 }, { "epoch": 1.2049727572711226, "grad_norm": 0.45598042011260986, "learning_rate": 8.17811226057633e-06, "loss": 0.0283, "step": 78510 }, { "epoch": 1.2051262374338116, "grad_norm": 0.41462478041648865, "learning_rate": 8.17547842424178e-06, "loss": 0.0202, "step": 78520 }, { "epoch": 1.2052797175965007, "grad_norm": 0.416867196559906, "learning_rate": 8.172844718827903e-06, "loss": 0.0343, "step": 78530 }, { "epoch": 1.2054331977591897, "grad_norm": 0.26814666390419006, "learning_rate": 8.170211144523687e-06, "loss": 0.0224, "step": 78540 }, { "epoch": 1.2055866779218787, "grad_norm": 0.5305391550064087, "learning_rate": 8.16757770151811e-06, "loss": 0.029, "step": 78550 }, { "epoch": 1.2057401580845677, "grad_norm": 0.5782811045646667, "learning_rate": 8.164944390000133e-06, "loss": 0.0332, "step": 78560 }, { "epoch": 1.2058936382472565, "grad_norm": 0.3163825273513794, "learning_rate": 8.162311210158712e-06, "loss": 0.0263, "step": 78570 }, { "epoch": 1.2060471184099455, "grad_norm": 0.3792639672756195, "learning_rate": 8.159678162182795e-06, "loss": 0.0315, "step": 78580 }, { "epoch": 1.2062005985726345, "grad_norm": 0.34046539664268494, "learning_rate": 8.157045246261319e-06, "loss": 0.0337, "step": 78590 }, { "epoch": 1.2063540787353235, "grad_norm": 0.5165178179740906, "learning_rate": 8.154412462583216e-06, "loss": 0.0252, "step": 78600 }, { "epoch": 1.2065075588980125, "grad_norm": 0.289382666349411, "learning_rate": 8.151779811337401e-06, "loss": 0.0205, "step": 78610 }, { "epoch": 1.2066610390607013, "grad_norm": 0.6527504920959473, "learning_rate": 8.149147292712782e-06, "loss": 0.0302, "step": 78620 }, { "epoch": 1.2068145192233903, "grad_norm": 0.33540359139442444, "learning_rate": 8.14651490689826e-06, "loss": 0.0287, "step": 78630 }, { "epoch": 1.2069679993860793, "grad_norm": 0.32060420513153076, "learning_rate": 8.143882654082722e-06, "loss": 0.0251, "step": 78640 }, { "epoch": 1.2071214795487684, "grad_norm": 0.595125675201416, "learning_rate": 8.141250534455056e-06, "loss": 0.032, "step": 78650 }, { "epoch": 1.2072749597114574, "grad_norm": 0.3066188097000122, "learning_rate": 8.138618548204126e-06, "loss": 0.0401, "step": 78660 }, { "epoch": 1.2074284398741462, "grad_norm": 0.5236742496490479, "learning_rate": 8.135986695518792e-06, "loss": 0.0294, "step": 78670 }, { "epoch": 1.2075819200368352, "grad_norm": 0.35963842272758484, "learning_rate": 8.133354976587912e-06, "loss": 0.0262, "step": 78680 }, { "epoch": 1.2077354001995242, "grad_norm": 0.4347037076950073, "learning_rate": 8.13072339160032e-06, "loss": 0.0273, "step": 78690 }, { "epoch": 1.2078888803622132, "grad_norm": 0.4400594234466553, "learning_rate": 8.12809194074486e-06, "loss": 0.0325, "step": 78700 }, { "epoch": 1.2080423605249022, "grad_norm": 0.3613346517086029, "learning_rate": 8.125460624210347e-06, "loss": 0.0281, "step": 78710 }, { "epoch": 1.208195840687591, "grad_norm": 0.318651020526886, "learning_rate": 8.122829442185592e-06, "loss": 0.0309, "step": 78720 }, { "epoch": 1.20834932085028, "grad_norm": 0.28448033332824707, "learning_rate": 8.1201983948594e-06, "loss": 0.0259, "step": 78730 }, { "epoch": 1.208502801012969, "grad_norm": 0.40855151414871216, "learning_rate": 8.11756748242057e-06, "loss": 0.0319, "step": 78740 }, { "epoch": 1.208656281175658, "grad_norm": 0.4579380750656128, "learning_rate": 8.114936705057885e-06, "loss": 0.0346, "step": 78750 }, { "epoch": 1.208809761338347, "grad_norm": 0.27205464243888855, "learning_rate": 8.112306062960113e-06, "loss": 0.0255, "step": 78760 }, { "epoch": 1.208963241501036, "grad_norm": 0.38250187039375305, "learning_rate": 8.109675556316025e-06, "loss": 0.0304, "step": 78770 }, { "epoch": 1.209116721663725, "grad_norm": 0.34883180260658264, "learning_rate": 8.107045185314373e-06, "loss": 0.0245, "step": 78780 }, { "epoch": 1.2092702018264139, "grad_norm": 0.35934266448020935, "learning_rate": 8.104414950143906e-06, "loss": 0.0349, "step": 78790 }, { "epoch": 1.2094236819891029, "grad_norm": 0.34928080439567566, "learning_rate": 8.101784850993359e-06, "loss": 0.0257, "step": 78800 }, { "epoch": 1.209577162151792, "grad_norm": 0.4420081079006195, "learning_rate": 8.09915488805145e-06, "loss": 0.0302, "step": 78810 }, { "epoch": 1.209730642314481, "grad_norm": 0.3991457223892212, "learning_rate": 8.096525061506904e-06, "loss": 0.0275, "step": 78820 }, { "epoch": 1.20988412247717, "grad_norm": 0.37488284707069397, "learning_rate": 8.093895371548424e-06, "loss": 0.0278, "step": 78830 }, { "epoch": 1.2100376026398587, "grad_norm": 0.41422706842422485, "learning_rate": 8.091265818364706e-06, "loss": 0.0267, "step": 78840 }, { "epoch": 1.2101910828025477, "grad_norm": 0.289008229970932, "learning_rate": 8.088636402144442e-06, "loss": 0.0309, "step": 78850 }, { "epoch": 1.2103445629652367, "grad_norm": 0.31499385833740234, "learning_rate": 8.086007123076299e-06, "loss": 0.0215, "step": 78860 }, { "epoch": 1.2104980431279257, "grad_norm": 0.370561808347702, "learning_rate": 8.083377981348952e-06, "loss": 0.0287, "step": 78870 }, { "epoch": 1.2106515232906148, "grad_norm": 0.3696964979171753, "learning_rate": 8.080748977151054e-06, "loss": 0.0285, "step": 78880 }, { "epoch": 1.2108050034533036, "grad_norm": 0.27775806188583374, "learning_rate": 8.078120110671253e-06, "loss": 0.029, "step": 78890 }, { "epoch": 1.2109584836159926, "grad_norm": 0.25337812304496765, "learning_rate": 8.07549138209819e-06, "loss": 0.0288, "step": 78900 }, { "epoch": 1.2111119637786816, "grad_norm": 0.4206101596355438, "learning_rate": 8.072862791620487e-06, "loss": 0.0258, "step": 78910 }, { "epoch": 1.2112654439413706, "grad_norm": 0.5995404124259949, "learning_rate": 8.070234339426765e-06, "loss": 0.0283, "step": 78920 }, { "epoch": 1.2114189241040596, "grad_norm": 0.3710654079914093, "learning_rate": 8.067606025705629e-06, "loss": 0.0249, "step": 78930 }, { "epoch": 1.2115724042667486, "grad_norm": 0.364259272813797, "learning_rate": 8.06497785064568e-06, "loss": 0.0368, "step": 78940 }, { "epoch": 1.2117258844294374, "grad_norm": 0.4739886224269867, "learning_rate": 8.062349814435509e-06, "loss": 0.0218, "step": 78950 }, { "epoch": 1.2118793645921264, "grad_norm": 0.41438817977905273, "learning_rate": 8.05972191726368e-06, "loss": 0.0273, "step": 78960 }, { "epoch": 1.2120328447548154, "grad_norm": 0.40482601523399353, "learning_rate": 8.057094159318777e-06, "loss": 0.0239, "step": 78970 }, { "epoch": 1.2121863249175044, "grad_norm": 0.41142478585243225, "learning_rate": 8.054466540789345e-06, "loss": 0.0246, "step": 78980 }, { "epoch": 1.2123398050801935, "grad_norm": 0.4009745419025421, "learning_rate": 8.05183906186394e-06, "loss": 0.0283, "step": 78990 }, { "epoch": 1.2124932852428825, "grad_norm": 0.5362784266471863, "learning_rate": 8.049211722731104e-06, "loss": 0.033, "step": 79000 }, { "epoch": 1.2126467654055713, "grad_norm": 0.3504202365875244, "learning_rate": 8.046584523579349e-06, "loss": 0.0231, "step": 79010 }, { "epoch": 1.2128002455682603, "grad_norm": 0.28904199600219727, "learning_rate": 8.043957464597203e-06, "loss": 0.0306, "step": 79020 }, { "epoch": 1.2129537257309493, "grad_norm": 0.35717588663101196, "learning_rate": 8.041330545973176e-06, "loss": 0.0297, "step": 79030 }, { "epoch": 1.2131072058936383, "grad_norm": 0.32763954997062683, "learning_rate": 8.03870376789576e-06, "loss": 0.023, "step": 79040 }, { "epoch": 1.2132606860563273, "grad_norm": 0.3806358277797699, "learning_rate": 8.036077130553449e-06, "loss": 0.0349, "step": 79050 }, { "epoch": 1.213414166219016, "grad_norm": 0.37479642033576965, "learning_rate": 8.033450634134713e-06, "loss": 0.0377, "step": 79060 }, { "epoch": 1.2135676463817051, "grad_norm": 0.3960908055305481, "learning_rate": 8.03082427882802e-06, "loss": 0.0298, "step": 79070 }, { "epoch": 1.2137211265443941, "grad_norm": 0.39466091990470886, "learning_rate": 8.028198064821832e-06, "loss": 0.0272, "step": 79080 }, { "epoch": 1.2138746067070831, "grad_norm": 0.31064826250076294, "learning_rate": 8.025571992304592e-06, "loss": 0.0232, "step": 79090 }, { "epoch": 1.2140280868697721, "grad_norm": 0.33053284883499146, "learning_rate": 8.022946061464742e-06, "loss": 0.0217, "step": 79100 }, { "epoch": 1.214181567032461, "grad_norm": 0.41717004776000977, "learning_rate": 8.020320272490706e-06, "loss": 0.0308, "step": 79110 }, { "epoch": 1.21433504719515, "grad_norm": 0.35798919200897217, "learning_rate": 8.017694625570896e-06, "loss": 0.025, "step": 79120 }, { "epoch": 1.214488527357839, "grad_norm": 0.35463231801986694, "learning_rate": 8.015069120893723e-06, "loss": 0.0308, "step": 79130 }, { "epoch": 1.214642007520528, "grad_norm": 0.5323959589004517, "learning_rate": 8.012443758647582e-06, "loss": 0.0339, "step": 79140 }, { "epoch": 1.214795487683217, "grad_norm": 0.4323956370353699, "learning_rate": 8.009818539020865e-06, "loss": 0.0332, "step": 79150 }, { "epoch": 1.214948967845906, "grad_norm": 0.4609217047691345, "learning_rate": 8.007193462201938e-06, "loss": 0.0297, "step": 79160 }, { "epoch": 1.2151024480085948, "grad_norm": 0.3506280779838562, "learning_rate": 8.004568528379168e-06, "loss": 0.0286, "step": 79170 }, { "epoch": 1.2152559281712838, "grad_norm": 0.412555456161499, "learning_rate": 8.001943737740916e-06, "loss": 0.0353, "step": 79180 }, { "epoch": 1.2154094083339728, "grad_norm": 0.3379044234752655, "learning_rate": 7.99931909047552e-06, "loss": 0.0287, "step": 79190 }, { "epoch": 1.2155628884966618, "grad_norm": 0.29814833402633667, "learning_rate": 7.996694586771325e-06, "loss": 0.0246, "step": 79200 }, { "epoch": 1.2157163686593508, "grad_norm": 0.36978766322135925, "learning_rate": 7.994070226816644e-06, "loss": 0.0255, "step": 79210 }, { "epoch": 1.2158698488220399, "grad_norm": 0.46712303161621094, "learning_rate": 7.991446010799796e-06, "loss": 0.0256, "step": 79220 }, { "epoch": 1.2160233289847286, "grad_norm": 0.3001457154750824, "learning_rate": 7.988821938909084e-06, "loss": 0.032, "step": 79230 }, { "epoch": 1.2161768091474177, "grad_norm": 0.3516816198825836, "learning_rate": 7.9861980113328e-06, "loss": 0.0355, "step": 79240 }, { "epoch": 1.2163302893101067, "grad_norm": 0.3073880970478058, "learning_rate": 7.983574228259233e-06, "loss": 0.0188, "step": 79250 }, { "epoch": 1.2164837694727957, "grad_norm": 0.4594161808490753, "learning_rate": 7.98095058987665e-06, "loss": 0.0258, "step": 79260 }, { "epoch": 1.2166372496354847, "grad_norm": 0.4787629544734955, "learning_rate": 7.97832709637331e-06, "loss": 0.0304, "step": 79270 }, { "epoch": 1.2167907297981735, "grad_norm": 0.3300345838069916, "learning_rate": 7.975703747937472e-06, "loss": 0.0297, "step": 79280 }, { "epoch": 1.2169442099608625, "grad_norm": 0.2657020688056946, "learning_rate": 7.973080544757375e-06, "loss": 0.0263, "step": 79290 }, { "epoch": 1.2170976901235515, "grad_norm": 0.3606344163417816, "learning_rate": 7.970457487021254e-06, "loss": 0.0353, "step": 79300 }, { "epoch": 1.2172511702862405, "grad_norm": 0.21812018752098083, "learning_rate": 7.96783457491732e-06, "loss": 0.0334, "step": 79310 }, { "epoch": 1.2174046504489295, "grad_norm": 0.3600102365016937, "learning_rate": 7.965211808633789e-06, "loss": 0.0277, "step": 79320 }, { "epoch": 1.2175581306116183, "grad_norm": 0.4061656892299652, "learning_rate": 7.962589188358859e-06, "loss": 0.0222, "step": 79330 }, { "epoch": 1.2177116107743073, "grad_norm": 0.5429428219795227, "learning_rate": 7.959966714280723e-06, "loss": 0.0296, "step": 79340 }, { "epoch": 1.2178650909369964, "grad_norm": 0.2732747197151184, "learning_rate": 7.95734438658756e-06, "loss": 0.0248, "step": 79350 }, { "epoch": 1.2180185710996854, "grad_norm": 0.4257550835609436, "learning_rate": 7.954722205467529e-06, "loss": 0.0295, "step": 79360 }, { "epoch": 1.2181720512623744, "grad_norm": 0.562633216381073, "learning_rate": 7.952100171108795e-06, "loss": 0.0265, "step": 79370 }, { "epoch": 1.2183255314250634, "grad_norm": 0.49115219712257385, "learning_rate": 7.949478283699503e-06, "loss": 0.0297, "step": 79380 }, { "epoch": 1.2184790115877524, "grad_norm": 0.29325681924819946, "learning_rate": 7.946856543427794e-06, "loss": 0.0375, "step": 79390 }, { "epoch": 1.2186324917504412, "grad_norm": 0.559812068939209, "learning_rate": 7.94423495048179e-06, "loss": 0.0293, "step": 79400 }, { "epoch": 1.2187859719131302, "grad_norm": 0.40863773226737976, "learning_rate": 7.941613505049606e-06, "loss": 0.0225, "step": 79410 }, { "epoch": 1.2189394520758192, "grad_norm": 0.23834140598773956, "learning_rate": 7.938992207319347e-06, "loss": 0.0298, "step": 79420 }, { "epoch": 1.2190929322385082, "grad_norm": 0.3400771915912628, "learning_rate": 7.936371057479108e-06, "loss": 0.0339, "step": 79430 }, { "epoch": 1.2192464124011972, "grad_norm": 0.3076317608356476, "learning_rate": 7.933750055716975e-06, "loss": 0.0241, "step": 79440 }, { "epoch": 1.219399892563886, "grad_norm": 0.37331950664520264, "learning_rate": 7.931129202221019e-06, "loss": 0.031, "step": 79450 }, { "epoch": 1.219553372726575, "grad_norm": 0.36664679646492004, "learning_rate": 7.928508497179301e-06, "loss": 0.0293, "step": 79460 }, { "epoch": 1.219706852889264, "grad_norm": 0.4352037310600281, "learning_rate": 7.925887940779873e-06, "loss": 0.0237, "step": 79470 }, { "epoch": 1.219860333051953, "grad_norm": 0.3371019661426544, "learning_rate": 7.923267533210778e-06, "loss": 0.026, "step": 79480 }, { "epoch": 1.220013813214642, "grad_norm": 0.3404976427555084, "learning_rate": 7.920647274660046e-06, "loss": 0.0319, "step": 79490 }, { "epoch": 1.2201672933773309, "grad_norm": 0.3034110963344574, "learning_rate": 7.918027165315698e-06, "loss": 0.0298, "step": 79500 }, { "epoch": 1.2203207735400199, "grad_norm": 0.33681848645210266, "learning_rate": 7.915407205365738e-06, "loss": 0.0273, "step": 79510 }, { "epoch": 1.220474253702709, "grad_norm": 0.31239911913871765, "learning_rate": 7.91278739499817e-06, "loss": 0.0297, "step": 79520 }, { "epoch": 1.220627733865398, "grad_norm": 0.45026397705078125, "learning_rate": 7.910167734400975e-06, "loss": 0.0346, "step": 79530 }, { "epoch": 1.220781214028087, "grad_norm": 0.34149619936943054, "learning_rate": 7.907548223762137e-06, "loss": 0.0252, "step": 79540 }, { "epoch": 1.220934694190776, "grad_norm": 0.3493511378765106, "learning_rate": 7.90492886326962e-06, "loss": 0.0319, "step": 79550 }, { "epoch": 1.2210881743534647, "grad_norm": 0.4254482388496399, "learning_rate": 7.902309653111378e-06, "loss": 0.0272, "step": 79560 }, { "epoch": 1.2212416545161537, "grad_norm": 0.550876259803772, "learning_rate": 7.899690593475352e-06, "loss": 0.0275, "step": 79570 }, { "epoch": 1.2213951346788428, "grad_norm": 0.4689382016658783, "learning_rate": 7.897071684549482e-06, "loss": 0.0304, "step": 79580 }, { "epoch": 1.2215486148415318, "grad_norm": 0.5314178466796875, "learning_rate": 7.894452926521686e-06, "loss": 0.0277, "step": 79590 }, { "epoch": 1.2217020950042208, "grad_norm": 0.3876945674419403, "learning_rate": 7.891834319579882e-06, "loss": 0.0292, "step": 79600 }, { "epoch": 1.2218555751669098, "grad_norm": 0.2788732647895813, "learning_rate": 7.889215863911968e-06, "loss": 0.0249, "step": 79610 }, { "epoch": 1.2220090553295986, "grad_norm": 0.832990825176239, "learning_rate": 7.88659755970583e-06, "loss": 0.0458, "step": 79620 }, { "epoch": 1.2221625354922876, "grad_norm": 0.4399161636829376, "learning_rate": 7.883979407149353e-06, "loss": 0.0276, "step": 79630 }, { "epoch": 1.2223160156549766, "grad_norm": 0.390198677778244, "learning_rate": 7.881361406430402e-06, "loss": 0.0253, "step": 79640 }, { "epoch": 1.2224694958176656, "grad_norm": 0.46017584204673767, "learning_rate": 7.87874355773684e-06, "loss": 0.0381, "step": 79650 }, { "epoch": 1.2226229759803546, "grad_norm": 0.2786122262477875, "learning_rate": 7.87612586125651e-06, "loss": 0.0307, "step": 79660 }, { "epoch": 1.2227764561430434, "grad_norm": 0.3064393699169159, "learning_rate": 7.873508317177246e-06, "loss": 0.032, "step": 79670 }, { "epoch": 1.2229299363057324, "grad_norm": 0.26937368512153625, "learning_rate": 7.870890925686875e-06, "loss": 0.0231, "step": 79680 }, { "epoch": 1.2230834164684214, "grad_norm": 0.3281351327896118, "learning_rate": 7.86827368697321e-06, "loss": 0.0275, "step": 79690 }, { "epoch": 1.2232368966311105, "grad_norm": 0.3297717869281769, "learning_rate": 7.865656601224062e-06, "loss": 0.0356, "step": 79700 }, { "epoch": 1.2233903767937995, "grad_norm": 0.329010933637619, "learning_rate": 7.863039668627213e-06, "loss": 0.0277, "step": 79710 }, { "epoch": 1.2235438569564883, "grad_norm": 0.3324439525604248, "learning_rate": 7.860422889370443e-06, "loss": 0.0281, "step": 79720 }, { "epoch": 1.2236973371191773, "grad_norm": 0.2120041847229004, "learning_rate": 7.85780626364153e-06, "loss": 0.0278, "step": 79730 }, { "epoch": 1.2238508172818663, "grad_norm": 0.48724669218063354, "learning_rate": 7.855189791628228e-06, "loss": 0.0367, "step": 79740 }, { "epoch": 1.2240042974445553, "grad_norm": 0.28198692202568054, "learning_rate": 7.852573473518288e-06, "loss": 0.0225, "step": 79750 }, { "epoch": 1.2241577776072443, "grad_norm": 0.45170658826828003, "learning_rate": 7.849957309499446e-06, "loss": 0.0252, "step": 79760 }, { "epoch": 1.2243112577699333, "grad_norm": 0.2992008626461029, "learning_rate": 7.847341299759423e-06, "loss": 0.034, "step": 79770 }, { "epoch": 1.2244647379326221, "grad_norm": 0.38602814078330994, "learning_rate": 7.844725444485941e-06, "loss": 0.0278, "step": 79780 }, { "epoch": 1.2246182180953111, "grad_norm": 0.4815373718738556, "learning_rate": 7.842109743866698e-06, "loss": 0.0285, "step": 79790 }, { "epoch": 1.2247716982580001, "grad_norm": 0.43550267815589905, "learning_rate": 7.839494198089395e-06, "loss": 0.032, "step": 79800 }, { "epoch": 1.2249251784206892, "grad_norm": 0.41789135336875916, "learning_rate": 7.836878807341704e-06, "loss": 0.0259, "step": 79810 }, { "epoch": 1.2250786585833782, "grad_norm": 0.3938159942626953, "learning_rate": 7.8342635718113e-06, "loss": 0.0244, "step": 79820 }, { "epoch": 1.2252321387460672, "grad_norm": 0.36663538217544556, "learning_rate": 7.831648491685839e-06, "loss": 0.0262, "step": 79830 }, { "epoch": 1.225385618908756, "grad_norm": 0.4095219075679779, "learning_rate": 7.829033567152974e-06, "loss": 0.0254, "step": 79840 }, { "epoch": 1.225539099071445, "grad_norm": 0.29817867279052734, "learning_rate": 7.826418798400343e-06, "loss": 0.0259, "step": 79850 }, { "epoch": 1.225692579234134, "grad_norm": 0.43066027760505676, "learning_rate": 7.823804185615565e-06, "loss": 0.0341, "step": 79860 }, { "epoch": 1.225846059396823, "grad_norm": 0.28320810198783875, "learning_rate": 7.821189728986259e-06, "loss": 0.0335, "step": 79870 }, { "epoch": 1.225999539559512, "grad_norm": 0.4054478704929352, "learning_rate": 7.818575428700026e-06, "loss": 0.0327, "step": 79880 }, { "epoch": 1.2261530197222008, "grad_norm": 0.39352548122406006, "learning_rate": 7.815961284944463e-06, "loss": 0.0311, "step": 79890 }, { "epoch": 1.2263064998848898, "grad_norm": 0.37028080224990845, "learning_rate": 7.813347297907148e-06, "loss": 0.0304, "step": 79900 }, { "epoch": 1.2264599800475788, "grad_norm": 0.30504852533340454, "learning_rate": 7.810733467775646e-06, "loss": 0.0297, "step": 79910 }, { "epoch": 1.2266134602102678, "grad_norm": 0.42282143235206604, "learning_rate": 7.808119794737524e-06, "loss": 0.0276, "step": 79920 }, { "epoch": 1.2267669403729569, "grad_norm": 0.4443841576576233, "learning_rate": 7.80550627898032e-06, "loss": 0.0368, "step": 79930 }, { "epoch": 1.2269204205356457, "grad_norm": 0.30254441499710083, "learning_rate": 7.802892920691579e-06, "loss": 0.0312, "step": 79940 }, { "epoch": 1.2270739006983347, "grad_norm": 0.41224491596221924, "learning_rate": 7.800279720058822e-06, "loss": 0.0273, "step": 79950 }, { "epoch": 1.2272273808610237, "grad_norm": 0.4026307165622711, "learning_rate": 7.797666677269557e-06, "loss": 0.0237, "step": 79960 }, { "epoch": 1.2273808610237127, "grad_norm": 0.5727041363716125, "learning_rate": 7.795053792511294e-06, "loss": 0.0345, "step": 79970 }, { "epoch": 1.2275343411864017, "grad_norm": 0.6174870729446411, "learning_rate": 7.792441065971517e-06, "loss": 0.026, "step": 79980 }, { "epoch": 1.2276878213490907, "grad_norm": 0.33420947194099426, "learning_rate": 7.789828497837712e-06, "loss": 0.0252, "step": 79990 }, { "epoch": 1.2278413015117797, "grad_norm": 0.4169330894947052, "learning_rate": 7.787216088297345e-06, "loss": 0.0323, "step": 80000 }, { "epoch": 1.2279947816744685, "grad_norm": 0.4142647981643677, "learning_rate": 7.784603837537867e-06, "loss": 0.0259, "step": 80010 }, { "epoch": 1.2281482618371575, "grad_norm": 0.5934310555458069, "learning_rate": 7.781991745746728e-06, "loss": 0.0314, "step": 80020 }, { "epoch": 1.2283017419998465, "grad_norm": 0.29126837849617004, "learning_rate": 7.779379813111358e-06, "loss": 0.0373, "step": 80030 }, { "epoch": 1.2284552221625356, "grad_norm": 0.332134872674942, "learning_rate": 7.776768039819185e-06, "loss": 0.0318, "step": 80040 }, { "epoch": 1.2286087023252246, "grad_norm": 0.3074207007884979, "learning_rate": 7.774156426057619e-06, "loss": 0.0427, "step": 80050 }, { "epoch": 1.2287621824879134, "grad_norm": 0.3725697696208954, "learning_rate": 7.771544972014052e-06, "loss": 0.0344, "step": 80060 }, { "epoch": 1.2289156626506024, "grad_norm": 0.4716984033584595, "learning_rate": 7.76893367787588e-06, "loss": 0.0295, "step": 80070 }, { "epoch": 1.2290691428132914, "grad_norm": 0.49551764130592346, "learning_rate": 7.766322543830474e-06, "loss": 0.0318, "step": 80080 }, { "epoch": 1.2292226229759804, "grad_norm": 0.3625153601169586, "learning_rate": 7.7637115700652e-06, "loss": 0.036, "step": 80090 }, { "epoch": 1.2293761031386694, "grad_norm": 0.5067086815834045, "learning_rate": 7.761100756767419e-06, "loss": 0.0305, "step": 80100 }, { "epoch": 1.2295295833013582, "grad_norm": 0.3906097710132599, "learning_rate": 7.758490104124464e-06, "loss": 0.0277, "step": 80110 }, { "epoch": 1.2296830634640472, "grad_norm": 0.37189945578575134, "learning_rate": 7.755879612323667e-06, "loss": 0.0291, "step": 80120 }, { "epoch": 1.2298365436267362, "grad_norm": 0.4839622974395752, "learning_rate": 7.75326928155235e-06, "loss": 0.0349, "step": 80130 }, { "epoch": 1.2299900237894252, "grad_norm": 0.45572248101234436, "learning_rate": 7.750659111997816e-06, "loss": 0.0303, "step": 80140 }, { "epoch": 1.2301435039521142, "grad_norm": 0.24221140146255493, "learning_rate": 7.748049103847368e-06, "loss": 0.0233, "step": 80150 }, { "epoch": 1.230296984114803, "grad_norm": 0.46655091643333435, "learning_rate": 7.745439257288283e-06, "loss": 0.028, "step": 80160 }, { "epoch": 1.230450464277492, "grad_norm": 0.46549659967422485, "learning_rate": 7.742829572507833e-06, "loss": 0.04, "step": 80170 }, { "epoch": 1.230603944440181, "grad_norm": 0.5428808927536011, "learning_rate": 7.740220049693286e-06, "loss": 0.0312, "step": 80180 }, { "epoch": 1.23075742460287, "grad_norm": 0.469547301530838, "learning_rate": 7.737610689031884e-06, "loss": 0.03, "step": 80190 }, { "epoch": 1.230910904765559, "grad_norm": 0.22078809142112732, "learning_rate": 7.735001490710871e-06, "loss": 0.023, "step": 80200 }, { "epoch": 1.231064384928248, "grad_norm": 0.4318990707397461, "learning_rate": 7.732392454917471e-06, "loss": 0.0262, "step": 80210 }, { "epoch": 1.2312178650909371, "grad_norm": 0.492689311504364, "learning_rate": 7.729783581838895e-06, "loss": 0.0279, "step": 80220 }, { "epoch": 1.231371345253626, "grad_norm": 0.33072683215141296, "learning_rate": 7.72717487166235e-06, "loss": 0.0256, "step": 80230 }, { "epoch": 1.231524825416315, "grad_norm": 0.3405160903930664, "learning_rate": 7.724566324575022e-06, "loss": 0.0315, "step": 80240 }, { "epoch": 1.231678305579004, "grad_norm": 0.58525550365448, "learning_rate": 7.7219579407641e-06, "loss": 0.0303, "step": 80250 }, { "epoch": 1.231831785741693, "grad_norm": 0.2264716625213623, "learning_rate": 7.719349720416741e-06, "loss": 0.0346, "step": 80260 }, { "epoch": 1.231985265904382, "grad_norm": 0.3375800848007202, "learning_rate": 7.716741663720104e-06, "loss": 0.0321, "step": 80270 }, { "epoch": 1.2321387460670707, "grad_norm": 0.34224843978881836, "learning_rate": 7.714133770861338e-06, "loss": 0.0286, "step": 80280 }, { "epoch": 1.2322922262297598, "grad_norm": 0.5646222233772278, "learning_rate": 7.711526042027568e-06, "loss": 0.0315, "step": 80290 }, { "epoch": 1.2324457063924488, "grad_norm": 0.38323238492012024, "learning_rate": 7.708918477405922e-06, "loss": 0.0349, "step": 80300 }, { "epoch": 1.2325991865551378, "grad_norm": 0.3284662067890167, "learning_rate": 7.706311077183505e-06, "loss": 0.0256, "step": 80310 }, { "epoch": 1.2327526667178268, "grad_norm": 0.4415644705295563, "learning_rate": 7.703703841547411e-06, "loss": 0.0309, "step": 80320 }, { "epoch": 1.2329061468805156, "grad_norm": 0.3022841513156891, "learning_rate": 7.701096770684732e-06, "loss": 0.0307, "step": 80330 }, { "epoch": 1.2330596270432046, "grad_norm": 0.36472561955451965, "learning_rate": 7.698489864782535e-06, "loss": 0.0361, "step": 80340 }, { "epoch": 1.2332131072058936, "grad_norm": 0.32299456000328064, "learning_rate": 7.695883124027887e-06, "loss": 0.0218, "step": 80350 }, { "epoch": 1.2333665873685826, "grad_norm": 0.2503712475299835, "learning_rate": 7.693276548607835e-06, "loss": 0.0262, "step": 80360 }, { "epoch": 1.2335200675312716, "grad_norm": 0.3526383638381958, "learning_rate": 7.690670138709414e-06, "loss": 0.0354, "step": 80370 }, { "epoch": 1.2336735476939606, "grad_norm": 0.31365543603897095, "learning_rate": 7.688063894519655e-06, "loss": 0.0237, "step": 80380 }, { "epoch": 1.2338270278566494, "grad_norm": 0.40926533937454224, "learning_rate": 7.685457816225571e-06, "loss": 0.0263, "step": 80390 }, { "epoch": 1.2339805080193385, "grad_norm": 0.3155735731124878, "learning_rate": 7.682851904014166e-06, "loss": 0.0295, "step": 80400 }, { "epoch": 1.2341339881820275, "grad_norm": 0.4524790644645691, "learning_rate": 7.680246158072424e-06, "loss": 0.0234, "step": 80410 }, { "epoch": 1.2342874683447165, "grad_norm": 0.35700738430023193, "learning_rate": 7.677640578587327e-06, "loss": 0.0238, "step": 80420 }, { "epoch": 1.2344409485074055, "grad_norm": 0.4507409930229187, "learning_rate": 7.675035165745841e-06, "loss": 0.0271, "step": 80430 }, { "epoch": 1.2345944286700945, "grad_norm": 0.2628761827945709, "learning_rate": 7.672429919734922e-06, "loss": 0.0274, "step": 80440 }, { "epoch": 1.2347479088327833, "grad_norm": 0.33401817083358765, "learning_rate": 7.669824840741515e-06, "loss": 0.0236, "step": 80450 }, { "epoch": 1.2349013889954723, "grad_norm": 0.41442006826400757, "learning_rate": 7.667219928952541e-06, "loss": 0.0318, "step": 80460 }, { "epoch": 1.2350548691581613, "grad_norm": 0.32107171416282654, "learning_rate": 7.664615184554927e-06, "loss": 0.0278, "step": 80470 }, { "epoch": 1.2352083493208503, "grad_norm": 0.4892846643924713, "learning_rate": 7.662010607735575e-06, "loss": 0.0304, "step": 80480 }, { "epoch": 1.2353618294835393, "grad_norm": 0.43279722332954407, "learning_rate": 7.659406198681383e-06, "loss": 0.035, "step": 80490 }, { "epoch": 1.2355153096462281, "grad_norm": 0.4188676178455353, "learning_rate": 7.656801957579233e-06, "loss": 0.0281, "step": 80500 }, { "epoch": 1.2356687898089171, "grad_norm": 0.28657180070877075, "learning_rate": 7.654197884615991e-06, "loss": 0.03, "step": 80510 }, { "epoch": 1.2358222699716062, "grad_norm": 0.34239187836647034, "learning_rate": 7.65159397997852e-06, "loss": 0.0267, "step": 80520 }, { "epoch": 1.2359757501342952, "grad_norm": 0.3425801694393158, "learning_rate": 7.64899024385366e-06, "loss": 0.0221, "step": 80530 }, { "epoch": 1.2361292302969842, "grad_norm": 0.27566730976104736, "learning_rate": 7.646386676428255e-06, "loss": 0.0347, "step": 80540 }, { "epoch": 1.236282710459673, "grad_norm": 0.36819860339164734, "learning_rate": 7.643783277889123e-06, "loss": 0.0282, "step": 80550 }, { "epoch": 1.236436190622362, "grad_norm": 0.29370251297950745, "learning_rate": 7.641180048423067e-06, "loss": 0.0202, "step": 80560 }, { "epoch": 1.236589670785051, "grad_norm": 0.46306735277175903, "learning_rate": 7.638576988216893e-06, "loss": 0.0364, "step": 80570 }, { "epoch": 1.23674315094774, "grad_norm": 0.27522963285446167, "learning_rate": 7.635974097457383e-06, "loss": 0.0243, "step": 80580 }, { "epoch": 1.236896631110429, "grad_norm": 0.3726018965244293, "learning_rate": 7.633371376331312e-06, "loss": 0.0324, "step": 80590 }, { "epoch": 1.237050111273118, "grad_norm": 0.34557807445526123, "learning_rate": 7.630768825025443e-06, "loss": 0.0269, "step": 80600 }, { "epoch": 1.2372035914358068, "grad_norm": 0.4439564347267151, "learning_rate": 7.628166443726519e-06, "loss": 0.0283, "step": 80610 }, { "epoch": 1.2373570715984958, "grad_norm": 0.37054383754730225, "learning_rate": 7.625564232621281e-06, "loss": 0.0314, "step": 80620 }, { "epoch": 1.2375105517611849, "grad_norm": 0.3822784125804901, "learning_rate": 7.622962191896455e-06, "loss": 0.0371, "step": 80630 }, { "epoch": 1.2376640319238739, "grad_norm": 0.3652185797691345, "learning_rate": 7.62036032173875e-06, "loss": 0.0266, "step": 80640 }, { "epoch": 1.2378175120865629, "grad_norm": 0.4794926047325134, "learning_rate": 7.617758622334872e-06, "loss": 0.0268, "step": 80650 }, { "epoch": 1.237970992249252, "grad_norm": 0.3386289179325104, "learning_rate": 7.615157093871503e-06, "loss": 0.0278, "step": 80660 }, { "epoch": 1.2381244724119407, "grad_norm": 0.47104519605636597, "learning_rate": 7.61255573653532e-06, "loss": 0.0306, "step": 80670 }, { "epoch": 1.2382779525746297, "grad_norm": 0.5259228348731995, "learning_rate": 7.609954550512988e-06, "loss": 0.0345, "step": 80680 }, { "epoch": 1.2384314327373187, "grad_norm": 0.22461804747581482, "learning_rate": 7.6073535359911555e-06, "loss": 0.0258, "step": 80690 }, { "epoch": 1.2385849129000077, "grad_norm": 0.27920418977737427, "learning_rate": 7.604752693156467e-06, "loss": 0.0313, "step": 80700 }, { "epoch": 1.2387383930626967, "grad_norm": 0.607594907283783, "learning_rate": 7.602152022195544e-06, "loss": 0.0291, "step": 80710 }, { "epoch": 1.2388918732253855, "grad_norm": 0.3775784969329834, "learning_rate": 7.599551523294999e-06, "loss": 0.0365, "step": 80720 }, { "epoch": 1.2390453533880745, "grad_norm": 0.4232945442199707, "learning_rate": 7.596951196641439e-06, "loss": 0.0336, "step": 80730 }, { "epoch": 1.2391988335507635, "grad_norm": 0.3241710662841797, "learning_rate": 7.594351042421449e-06, "loss": 0.0318, "step": 80740 }, { "epoch": 1.2393523137134526, "grad_norm": 0.35623612999916077, "learning_rate": 7.5917510608216125e-06, "loss": 0.0252, "step": 80750 }, { "epoch": 1.2395057938761416, "grad_norm": 0.416321337223053, "learning_rate": 7.589151252028488e-06, "loss": 0.0343, "step": 80760 }, { "epoch": 1.2396592740388304, "grad_norm": 0.3999592661857605, "learning_rate": 7.586551616228628e-06, "loss": 0.0235, "step": 80770 }, { "epoch": 1.2398127542015194, "grad_norm": 0.22119905054569244, "learning_rate": 7.5839521536085755e-06, "loss": 0.0248, "step": 80780 }, { "epoch": 1.2399662343642084, "grad_norm": 0.525628924369812, "learning_rate": 7.581352864354856e-06, "loss": 0.0346, "step": 80790 }, { "epoch": 1.2401197145268974, "grad_norm": 0.39113950729370117, "learning_rate": 7.578753748653989e-06, "loss": 0.0323, "step": 80800 }, { "epoch": 1.2402731946895864, "grad_norm": 0.5760345458984375, "learning_rate": 7.576154806692471e-06, "loss": 0.0377, "step": 80810 }, { "epoch": 1.2404266748522754, "grad_norm": 0.44234171509742737, "learning_rate": 7.573556038656791e-06, "loss": 0.0287, "step": 80820 }, { "epoch": 1.2405801550149644, "grad_norm": 0.41650980710983276, "learning_rate": 7.570957444733433e-06, "loss": 0.0257, "step": 80830 }, { "epoch": 1.2407336351776532, "grad_norm": 0.3397846817970276, "learning_rate": 7.568359025108858e-06, "loss": 0.034, "step": 80840 }, { "epoch": 1.2408871153403422, "grad_norm": 0.2709822952747345, "learning_rate": 7.565760779969525e-06, "loss": 0.0256, "step": 80850 }, { "epoch": 1.2410405955030313, "grad_norm": 0.458394318819046, "learning_rate": 7.5631627095018645e-06, "loss": 0.0264, "step": 80860 }, { "epoch": 1.2411940756657203, "grad_norm": 0.34320807456970215, "learning_rate": 7.560564813892307e-06, "loss": 0.023, "step": 80870 }, { "epoch": 1.2413475558284093, "grad_norm": 0.44337987899780273, "learning_rate": 7.55796709332727e-06, "loss": 0.0277, "step": 80880 }, { "epoch": 1.241501035991098, "grad_norm": 0.33008936047554016, "learning_rate": 7.555369547993156e-06, "loss": 0.0244, "step": 80890 }, { "epoch": 1.241654516153787, "grad_norm": 0.3533703088760376, "learning_rate": 7.552772178076356e-06, "loss": 0.0315, "step": 80900 }, { "epoch": 1.241807996316476, "grad_norm": 0.3914872705936432, "learning_rate": 7.5501749837632425e-06, "loss": 0.0233, "step": 80910 }, { "epoch": 1.241961476479165, "grad_norm": 0.2740006744861603, "learning_rate": 7.547577965240184e-06, "loss": 0.0236, "step": 80920 }, { "epoch": 1.2421149566418541, "grad_norm": 0.3660562038421631, "learning_rate": 7.544981122693529e-06, "loss": 0.0307, "step": 80930 }, { "epoch": 1.242268436804543, "grad_norm": 0.20830456912517548, "learning_rate": 7.542384456309623e-06, "loss": 0.0224, "step": 80940 }, { "epoch": 1.242421916967232, "grad_norm": 0.43029066920280457, "learning_rate": 7.539787966274789e-06, "loss": 0.0347, "step": 80950 }, { "epoch": 1.242575397129921, "grad_norm": 0.44869962334632874, "learning_rate": 7.5371916527753395e-06, "loss": 0.0255, "step": 80960 }, { "epoch": 1.24272887729261, "grad_norm": 0.37447863817214966, "learning_rate": 7.53459551599758e-06, "loss": 0.0326, "step": 80970 }, { "epoch": 1.242882357455299, "grad_norm": 0.4520285129547119, "learning_rate": 7.531999556127796e-06, "loss": 0.0234, "step": 80980 }, { "epoch": 1.243035837617988, "grad_norm": 0.49471187591552734, "learning_rate": 7.529403773352265e-06, "loss": 0.0369, "step": 80990 }, { "epoch": 1.2431893177806768, "grad_norm": 0.6224773526191711, "learning_rate": 7.526808167857253e-06, "loss": 0.0299, "step": 81000 }, { "epoch": 1.2433427979433658, "grad_norm": 0.48810046911239624, "learning_rate": 7.524212739829005e-06, "loss": 0.0335, "step": 81010 }, { "epoch": 1.2434962781060548, "grad_norm": 0.2643020749092102, "learning_rate": 7.521617489453764e-06, "loss": 0.0312, "step": 81020 }, { "epoch": 1.2436497582687438, "grad_norm": 0.6047942042350769, "learning_rate": 7.51902241691775e-06, "loss": 0.0275, "step": 81030 }, { "epoch": 1.2438032384314328, "grad_norm": 0.4705179035663605, "learning_rate": 7.5164275224071825e-06, "loss": 0.0309, "step": 81040 }, { "epoch": 1.2439567185941218, "grad_norm": 0.5355333089828491, "learning_rate": 7.513832806108259e-06, "loss": 0.0312, "step": 81050 }, { "epoch": 1.2441101987568106, "grad_norm": 0.4427514970302582, "learning_rate": 7.511238268207159e-06, "loss": 0.0244, "step": 81060 }, { "epoch": 1.2442636789194996, "grad_norm": 0.35523557662963867, "learning_rate": 7.5086439088900665e-06, "loss": 0.0313, "step": 81070 }, { "epoch": 1.2444171590821886, "grad_norm": 0.3190138339996338, "learning_rate": 7.506049728343136e-06, "loss": 0.0308, "step": 81080 }, { "epoch": 1.2445706392448777, "grad_norm": 0.4501810371875763, "learning_rate": 7.503455726752521e-06, "loss": 0.0258, "step": 81090 }, { "epoch": 1.2447241194075667, "grad_norm": 0.436014860868454, "learning_rate": 7.500861904304356e-06, "loss": 0.0245, "step": 81100 }, { "epoch": 1.2448775995702555, "grad_norm": 0.24571093916893005, "learning_rate": 7.4982682611847606e-06, "loss": 0.0248, "step": 81110 }, { "epoch": 1.2450310797329445, "grad_norm": 0.3457184433937073, "learning_rate": 7.495674797579846e-06, "loss": 0.0259, "step": 81120 }, { "epoch": 1.2451845598956335, "grad_norm": 0.3181585371494293, "learning_rate": 7.493081513675708e-06, "loss": 0.0247, "step": 81130 }, { "epoch": 1.2453380400583225, "grad_norm": 0.37527844309806824, "learning_rate": 7.490488409658436e-06, "loss": 0.0276, "step": 81140 }, { "epoch": 1.2454915202210115, "grad_norm": 0.40139442682266235, "learning_rate": 7.487895485714099e-06, "loss": 0.0309, "step": 81150 }, { "epoch": 1.2456450003837003, "grad_norm": 0.35715946555137634, "learning_rate": 7.48530274202875e-06, "loss": 0.0282, "step": 81160 }, { "epoch": 1.2457984805463893, "grad_norm": 0.34646227955818176, "learning_rate": 7.482710178788439e-06, "loss": 0.0297, "step": 81170 }, { "epoch": 1.2459519607090783, "grad_norm": 0.4144384264945984, "learning_rate": 7.4801177961792e-06, "loss": 0.0261, "step": 81180 }, { "epoch": 1.2461054408717673, "grad_norm": 0.2897599935531616, "learning_rate": 7.477525594387047e-06, "loss": 0.0316, "step": 81190 }, { "epoch": 1.2462589210344563, "grad_norm": 0.46601372957229614, "learning_rate": 7.474933573597996e-06, "loss": 0.0266, "step": 81200 }, { "epoch": 1.2464124011971454, "grad_norm": 0.31106194853782654, "learning_rate": 7.472341733998029e-06, "loss": 0.0315, "step": 81210 }, { "epoch": 1.2465658813598341, "grad_norm": 0.31972387433052063, "learning_rate": 7.469750075773132e-06, "loss": 0.0287, "step": 81220 }, { "epoch": 1.2467193615225232, "grad_norm": 0.4658429026603699, "learning_rate": 7.467158599109273e-06, "loss": 0.0308, "step": 81230 }, { "epoch": 1.2468728416852122, "grad_norm": 0.4138886630535126, "learning_rate": 7.464567304192404e-06, "loss": 0.0284, "step": 81240 }, { "epoch": 1.2470263218479012, "grad_norm": 0.3698940873146057, "learning_rate": 7.461976191208472e-06, "loss": 0.0268, "step": 81250 }, { "epoch": 1.2471798020105902, "grad_norm": 0.374688982963562, "learning_rate": 7.4593852603433994e-06, "loss": 0.0321, "step": 81260 }, { "epoch": 1.2473332821732792, "grad_norm": 0.3553828001022339, "learning_rate": 7.456794511783101e-06, "loss": 0.0306, "step": 81270 }, { "epoch": 1.247486762335968, "grad_norm": 0.4626254141330719, "learning_rate": 7.454203945713483e-06, "loss": 0.024, "step": 81280 }, { "epoch": 1.247640242498657, "grad_norm": 0.47389060258865356, "learning_rate": 7.451613562320432e-06, "loss": 0.0268, "step": 81290 }, { "epoch": 1.247793722661346, "grad_norm": 0.4359661936759949, "learning_rate": 7.449023361789827e-06, "loss": 0.0264, "step": 81300 }, { "epoch": 1.247947202824035, "grad_norm": 0.2604329288005829, "learning_rate": 7.446433344307529e-06, "loss": 0.0318, "step": 81310 }, { "epoch": 1.248100682986724, "grad_norm": 0.37876588106155396, "learning_rate": 7.443843510059383e-06, "loss": 0.0245, "step": 81320 }, { "epoch": 1.2482541631494128, "grad_norm": 0.45697832107543945, "learning_rate": 7.441253859231235e-06, "loss": 0.0327, "step": 81330 }, { "epoch": 1.2484076433121019, "grad_norm": 0.5682631134986877, "learning_rate": 7.438664392008903e-06, "loss": 0.0251, "step": 81340 }, { "epoch": 1.2485611234747909, "grad_norm": 0.38517358899116516, "learning_rate": 7.436075108578194e-06, "loss": 0.0275, "step": 81350 }, { "epoch": 1.2487146036374799, "grad_norm": 0.32133784890174866, "learning_rate": 7.433486009124911e-06, "loss": 0.0271, "step": 81360 }, { "epoch": 1.248868083800169, "grad_norm": 0.34853479266166687, "learning_rate": 7.430897093834832e-06, "loss": 0.0322, "step": 81370 }, { "epoch": 1.2490215639628577, "grad_norm": 0.5156629681587219, "learning_rate": 7.428308362893735e-06, "loss": 0.0324, "step": 81380 }, { "epoch": 1.2491750441255467, "grad_norm": 0.48077449202537537, "learning_rate": 7.425719816487376e-06, "loss": 0.0315, "step": 81390 }, { "epoch": 1.2493285242882357, "grad_norm": 0.30909037590026855, "learning_rate": 7.423131454801491e-06, "loss": 0.0236, "step": 81400 }, { "epoch": 1.2494820044509247, "grad_norm": 0.38733622431755066, "learning_rate": 7.420543278021821e-06, "loss": 0.0331, "step": 81410 }, { "epoch": 1.2496354846136137, "grad_norm": 0.39274469017982483, "learning_rate": 7.4179552863340755e-06, "loss": 0.0288, "step": 81420 }, { "epoch": 1.2497889647763027, "grad_norm": 0.43132445216178894, "learning_rate": 7.415367479923963e-06, "loss": 0.0241, "step": 81430 }, { "epoch": 1.2499424449389918, "grad_norm": 0.35335028171539307, "learning_rate": 7.412779858977181e-06, "loss": 0.0299, "step": 81440 }, { "epoch": 1.2500959251016805, "grad_norm": 0.32961615920066833, "learning_rate": 7.410192423679394e-06, "loss": 0.032, "step": 81450 }, { "epoch": 1.2502494052643696, "grad_norm": 0.3624826967716217, "learning_rate": 7.4076051742162724e-06, "loss": 0.0247, "step": 81460 }, { "epoch": 1.2504028854270586, "grad_norm": 0.35024333000183105, "learning_rate": 7.405018110773471e-06, "loss": 0.029, "step": 81470 }, { "epoch": 1.2505563655897476, "grad_norm": 0.43511801958084106, "learning_rate": 7.402431233536622e-06, "loss": 0.0299, "step": 81480 }, { "epoch": 1.2507098457524366, "grad_norm": 0.3502195179462433, "learning_rate": 7.399844542691356e-06, "loss": 0.0251, "step": 81490 }, { "epoch": 1.2508633259151254, "grad_norm": 0.30943238735198975, "learning_rate": 7.397258038423278e-06, "loss": 0.0231, "step": 81500 }, { "epoch": 1.2510168060778144, "grad_norm": 0.3368825614452362, "learning_rate": 7.394671720917986e-06, "loss": 0.031, "step": 81510 }, { "epoch": 1.2511702862405034, "grad_norm": 0.34298890829086304, "learning_rate": 7.3920855903610685e-06, "loss": 0.0221, "step": 81520 }, { "epoch": 1.2513237664031924, "grad_norm": 0.3800831735134125, "learning_rate": 7.389499646938092e-06, "loss": 0.0267, "step": 81530 }, { "epoch": 1.2514772465658814, "grad_norm": 0.42540571093559265, "learning_rate": 7.386913890834619e-06, "loss": 0.0316, "step": 81540 }, { "epoch": 1.2516307267285702, "grad_norm": 0.29589274525642395, "learning_rate": 7.38432832223619e-06, "loss": 0.0266, "step": 81550 }, { "epoch": 1.2517842068912592, "grad_norm": 0.4830819368362427, "learning_rate": 7.381742941328333e-06, "loss": 0.023, "step": 81560 }, { "epoch": 1.2519376870539483, "grad_norm": 0.46408167481422424, "learning_rate": 7.379157748296568e-06, "loss": 0.0271, "step": 81570 }, { "epoch": 1.2520911672166373, "grad_norm": 0.5050332546234131, "learning_rate": 7.376572743326398e-06, "loss": 0.0359, "step": 81580 }, { "epoch": 1.2522446473793263, "grad_norm": 0.3581923544406891, "learning_rate": 7.373987926603318e-06, "loss": 0.0309, "step": 81590 }, { "epoch": 1.252398127542015, "grad_norm": 0.6988945603370667, "learning_rate": 7.371403298312797e-06, "loss": 0.0401, "step": 81600 }, { "epoch": 1.252551607704704, "grad_norm": 0.3825167119503021, "learning_rate": 7.3688188586402985e-06, "loss": 0.0244, "step": 81610 }, { "epoch": 1.252705087867393, "grad_norm": 0.36430659890174866, "learning_rate": 7.3662346077712774e-06, "loss": 0.0287, "step": 81620 }, { "epoch": 1.252858568030082, "grad_norm": 0.32858070731163025, "learning_rate": 7.363650545891164e-06, "loss": 0.0296, "step": 81630 }, { "epoch": 1.2530120481927711, "grad_norm": 0.30921027064323425, "learning_rate": 7.361066673185389e-06, "loss": 0.0262, "step": 81640 }, { "epoch": 1.2531655283554601, "grad_norm": 0.3651205003261566, "learning_rate": 7.35848298983935e-06, "loss": 0.0337, "step": 81650 }, { "epoch": 1.2533190085181491, "grad_norm": 0.3359117805957794, "learning_rate": 7.3558994960384485e-06, "loss": 0.0278, "step": 81660 }, { "epoch": 1.253472488680838, "grad_norm": 0.38883328437805176, "learning_rate": 7.3533161919680675e-06, "loss": 0.0267, "step": 81670 }, { "epoch": 1.253625968843527, "grad_norm": 0.5611386299133301, "learning_rate": 7.35073307781357e-06, "loss": 0.0329, "step": 81680 }, { "epoch": 1.253779449006216, "grad_norm": 0.3709181249141693, "learning_rate": 7.348150153760317e-06, "loss": 0.0407, "step": 81690 }, { "epoch": 1.253932929168905, "grad_norm": 0.3755699098110199, "learning_rate": 7.345567419993644e-06, "loss": 0.0278, "step": 81700 }, { "epoch": 1.254086409331594, "grad_norm": 0.2463887631893158, "learning_rate": 7.342984876698878e-06, "loss": 0.0274, "step": 81710 }, { "epoch": 1.2542398894942828, "grad_norm": 0.3312302529811859, "learning_rate": 7.340402524061333e-06, "loss": 0.0389, "step": 81720 }, { "epoch": 1.2543933696569718, "grad_norm": 0.2252374142408371, "learning_rate": 7.3378203622663125e-06, "loss": 0.0329, "step": 81730 }, { "epoch": 1.2545468498196608, "grad_norm": 0.3842664957046509, "learning_rate": 7.335238391499101e-06, "loss": 0.0257, "step": 81740 }, { "epoch": 1.2547003299823498, "grad_norm": 0.22646549344062805, "learning_rate": 7.3326566119449664e-06, "loss": 0.0264, "step": 81750 }, { "epoch": 1.2548538101450388, "grad_norm": 0.3724307417869568, "learning_rate": 7.330075023789171e-06, "loss": 0.0319, "step": 81760 }, { "epoch": 1.2550072903077276, "grad_norm": 0.37979933619499207, "learning_rate": 7.327493627216959e-06, "loss": 0.027, "step": 81770 }, { "epoch": 1.2551607704704166, "grad_norm": 0.4261808395385742, "learning_rate": 7.324912422413562e-06, "loss": 0.0278, "step": 81780 }, { "epoch": 1.2553142506331056, "grad_norm": 0.5430133938789368, "learning_rate": 7.3223314095642e-06, "loss": 0.03, "step": 81790 }, { "epoch": 1.2554677307957947, "grad_norm": 0.33634987473487854, "learning_rate": 7.319750588854069e-06, "loss": 0.0299, "step": 81800 }, { "epoch": 1.2556212109584837, "grad_norm": 0.37795954942703247, "learning_rate": 7.317169960468366e-06, "loss": 0.0273, "step": 81810 }, { "epoch": 1.2557746911211725, "grad_norm": 0.39711105823516846, "learning_rate": 7.3145895245922615e-06, "loss": 0.028, "step": 81820 }, { "epoch": 1.2559281712838617, "grad_norm": 0.5053344368934631, "learning_rate": 7.312009281410923e-06, "loss": 0.0319, "step": 81830 }, { "epoch": 1.2560816514465505, "grad_norm": 0.3858911395072937, "learning_rate": 7.309429231109499e-06, "loss": 0.0397, "step": 81840 }, { "epoch": 1.2562351316092395, "grad_norm": 0.26347360014915466, "learning_rate": 7.306849373873117e-06, "loss": 0.0232, "step": 81850 }, { "epoch": 1.2563886117719285, "grad_norm": 0.3624526262283325, "learning_rate": 7.304269709886905e-06, "loss": 0.0315, "step": 81860 }, { "epoch": 1.2565420919346175, "grad_norm": 0.6143501996994019, "learning_rate": 7.301690239335964e-06, "loss": 0.0361, "step": 81870 }, { "epoch": 1.2566955720973065, "grad_norm": 0.3569210171699524, "learning_rate": 7.299110962405392e-06, "loss": 0.0265, "step": 81880 }, { "epoch": 1.2568490522599953, "grad_norm": 0.5687936544418335, "learning_rate": 7.2965318792802684e-06, "loss": 0.0231, "step": 81890 }, { "epoch": 1.2570025324226843, "grad_norm": 0.4439684748649597, "learning_rate": 7.2939529901456514e-06, "loss": 0.0232, "step": 81900 }, { "epoch": 1.2571560125853734, "grad_norm": 0.3585933744907379, "learning_rate": 7.2913742951865995e-06, "loss": 0.0263, "step": 81910 }, { "epoch": 1.2573094927480624, "grad_norm": 0.3394708037376404, "learning_rate": 7.2887957945881435e-06, "loss": 0.0245, "step": 81920 }, { "epoch": 1.2574629729107514, "grad_norm": 0.36057597398757935, "learning_rate": 7.286217488535314e-06, "loss": 0.0298, "step": 81930 }, { "epoch": 1.2576164530734402, "grad_norm": 0.39732271432876587, "learning_rate": 7.283639377213118e-06, "loss": 0.0233, "step": 81940 }, { "epoch": 1.2577699332361292, "grad_norm": 0.40851983428001404, "learning_rate": 7.281061460806545e-06, "loss": 0.0287, "step": 81950 }, { "epoch": 1.2579234133988182, "grad_norm": 0.43617793917655945, "learning_rate": 7.278483739500584e-06, "loss": 0.0321, "step": 81960 }, { "epoch": 1.2580768935615072, "grad_norm": 0.2652588188648224, "learning_rate": 7.275906213480195e-06, "loss": 0.0216, "step": 81970 }, { "epoch": 1.2582303737241962, "grad_norm": 0.42828401923179626, "learning_rate": 7.2733288829303385e-06, "loss": 0.03, "step": 81980 }, { "epoch": 1.258383853886885, "grad_norm": 0.33176952600479126, "learning_rate": 7.2707517480359555e-06, "loss": 0.0348, "step": 81990 }, { "epoch": 1.258537334049574, "grad_norm": 0.37896862626075745, "learning_rate": 7.268174808981963e-06, "loss": 0.027, "step": 82000 }, { "epoch": 1.258690814212263, "grad_norm": 0.3576151430606842, "learning_rate": 7.265598065953273e-06, "loss": 0.0275, "step": 82010 }, { "epoch": 1.258844294374952, "grad_norm": 0.22383226454257965, "learning_rate": 7.26302151913479e-06, "loss": 0.0245, "step": 82020 }, { "epoch": 1.258997774537641, "grad_norm": 0.46341586112976074, "learning_rate": 7.2604451687113894e-06, "loss": 0.0249, "step": 82030 }, { "epoch": 1.2591512547003298, "grad_norm": 0.3761034905910492, "learning_rate": 7.257869014867951e-06, "loss": 0.0242, "step": 82040 }, { "epoch": 1.259304734863019, "grad_norm": 0.32332026958465576, "learning_rate": 7.255293057789319e-06, "loss": 0.0233, "step": 82050 }, { "epoch": 1.2594582150257079, "grad_norm": 0.5240457653999329, "learning_rate": 7.252717297660335e-06, "loss": 0.0321, "step": 82060 }, { "epoch": 1.2596116951883969, "grad_norm": 0.31906452775001526, "learning_rate": 7.25014173466583e-06, "loss": 0.0368, "step": 82070 }, { "epoch": 1.259765175351086, "grad_norm": 0.2816266119480133, "learning_rate": 7.247566368990615e-06, "loss": 0.0275, "step": 82080 }, { "epoch": 1.259918655513775, "grad_norm": 0.45475804805755615, "learning_rate": 7.244991200819491e-06, "loss": 0.0277, "step": 82090 }, { "epoch": 1.260072135676464, "grad_norm": 0.4133869409561157, "learning_rate": 7.242416230337237e-06, "loss": 0.0249, "step": 82100 }, { "epoch": 1.2602256158391527, "grad_norm": 0.6678616404533386, "learning_rate": 7.239841457728625e-06, "loss": 0.0301, "step": 82110 }, { "epoch": 1.2603790960018417, "grad_norm": 0.3485233783721924, "learning_rate": 7.2372668831784135e-06, "loss": 0.0313, "step": 82120 }, { "epoch": 1.2605325761645307, "grad_norm": 0.40765249729156494, "learning_rate": 7.2346925068713395e-06, "loss": 0.0354, "step": 82130 }, { "epoch": 1.2606860563272198, "grad_norm": 0.5371513366699219, "learning_rate": 7.232118328992136e-06, "loss": 0.0323, "step": 82140 }, { "epoch": 1.2608395364899088, "grad_norm": 0.38383591175079346, "learning_rate": 7.229544349725512e-06, "loss": 0.0269, "step": 82150 }, { "epoch": 1.2609930166525976, "grad_norm": 0.24938060343265533, "learning_rate": 7.2269705692561655e-06, "loss": 0.023, "step": 82160 }, { "epoch": 1.2611464968152866, "grad_norm": 0.4331270456314087, "learning_rate": 7.224396987768785e-06, "loss": 0.0291, "step": 82170 }, { "epoch": 1.2612999769779756, "grad_norm": 0.4126230776309967, "learning_rate": 7.221823605448035e-06, "loss": 0.0307, "step": 82180 }, { "epoch": 1.2614534571406646, "grad_norm": 0.3712690770626068, "learning_rate": 7.219250422478582e-06, "loss": 0.0255, "step": 82190 }, { "epoch": 1.2616069373033536, "grad_norm": 0.40220907330513, "learning_rate": 7.216677439045059e-06, "loss": 0.0269, "step": 82200 }, { "epoch": 1.2617604174660424, "grad_norm": 0.4300629794597626, "learning_rate": 7.2141046553320925e-06, "loss": 0.0309, "step": 82210 }, { "epoch": 1.2619138976287314, "grad_norm": 0.29883483052253723, "learning_rate": 7.211532071524301e-06, "loss": 0.0254, "step": 82220 }, { "epoch": 1.2620673777914204, "grad_norm": 0.36742544174194336, "learning_rate": 7.208959687806279e-06, "loss": 0.0352, "step": 82230 }, { "epoch": 1.2622208579541094, "grad_norm": 0.3400191068649292, "learning_rate": 7.206387504362616e-06, "loss": 0.0239, "step": 82240 }, { "epoch": 1.2623743381167984, "grad_norm": 0.39858806133270264, "learning_rate": 7.203815521377879e-06, "loss": 0.0331, "step": 82250 }, { "epoch": 1.2625278182794875, "grad_norm": 0.37828323245048523, "learning_rate": 7.201243739036618e-06, "loss": 0.03, "step": 82260 }, { "epoch": 1.2626812984421765, "grad_norm": 0.30328604578971863, "learning_rate": 7.198672157523383e-06, "loss": 0.0281, "step": 82270 }, { "epoch": 1.2628347786048653, "grad_norm": 0.43571755290031433, "learning_rate": 7.196100777022699e-06, "loss": 0.0386, "step": 82280 }, { "epoch": 1.2629882587675543, "grad_norm": 0.25012484192848206, "learning_rate": 7.193529597719078e-06, "loss": 0.0246, "step": 82290 }, { "epoch": 1.2631417389302433, "grad_norm": 0.4225461184978485, "learning_rate": 7.190958619797014e-06, "loss": 0.0246, "step": 82300 }, { "epoch": 1.2632952190929323, "grad_norm": 0.20912401378154755, "learning_rate": 7.188387843440995e-06, "loss": 0.026, "step": 82310 }, { "epoch": 1.2634486992556213, "grad_norm": 0.3904713988304138, "learning_rate": 7.1858172688354865e-06, "loss": 0.0277, "step": 82320 }, { "epoch": 1.26360217941831, "grad_norm": 0.41559648513793945, "learning_rate": 7.183246896164947e-06, "loss": 0.0332, "step": 82330 }, { "epoch": 1.2637556595809991, "grad_norm": 0.34292545914649963, "learning_rate": 7.1806767256138175e-06, "loss": 0.0244, "step": 82340 }, { "epoch": 1.2639091397436881, "grad_norm": 0.34150487184524536, "learning_rate": 7.178106757366516e-06, "loss": 0.0261, "step": 82350 }, { "epoch": 1.2640626199063771, "grad_norm": 0.3717138171195984, "learning_rate": 7.175536991607461e-06, "loss": 0.0217, "step": 82360 }, { "epoch": 1.2642161000690662, "grad_norm": 0.3280750811100006, "learning_rate": 7.172967428521044e-06, "loss": 0.0229, "step": 82370 }, { "epoch": 1.264369580231755, "grad_norm": 0.35216179490089417, "learning_rate": 7.1703980682916506e-06, "loss": 0.0304, "step": 82380 }, { "epoch": 1.264523060394444, "grad_norm": 0.4833369851112366, "learning_rate": 7.167828911103651e-06, "loss": 0.0318, "step": 82390 }, { "epoch": 1.264676540557133, "grad_norm": 0.29441431164741516, "learning_rate": 7.165259957141389e-06, "loss": 0.0257, "step": 82400 }, { "epoch": 1.264830020719822, "grad_norm": 0.3305191695690155, "learning_rate": 7.162691206589209e-06, "loss": 0.029, "step": 82410 }, { "epoch": 1.264983500882511, "grad_norm": 0.3531130850315094, "learning_rate": 7.160122659631435e-06, "loss": 0.0288, "step": 82420 }, { "epoch": 1.2651369810451998, "grad_norm": 0.4357326030731201, "learning_rate": 7.157554316452373e-06, "loss": 0.0315, "step": 82430 }, { "epoch": 1.265290461207889, "grad_norm": 0.35100051760673523, "learning_rate": 7.154986177236325e-06, "loss": 0.0231, "step": 82440 }, { "epoch": 1.2654439413705778, "grad_norm": 0.2973662316799164, "learning_rate": 7.152418242167558e-06, "loss": 0.0321, "step": 82450 }, { "epoch": 1.2655974215332668, "grad_norm": 0.4341290295124054, "learning_rate": 7.1498505114303475e-06, "loss": 0.029, "step": 82460 }, { "epoch": 1.2657509016959558, "grad_norm": 0.3787698745727539, "learning_rate": 7.147282985208939e-06, "loss": 0.0282, "step": 82470 }, { "epoch": 1.2659043818586448, "grad_norm": 0.3828136920928955, "learning_rate": 7.1447156636875715e-06, "loss": 0.0262, "step": 82480 }, { "epoch": 1.2660578620213339, "grad_norm": 0.37101179361343384, "learning_rate": 7.142148547050467e-06, "loss": 0.0312, "step": 82490 }, { "epoch": 1.2662113421840226, "grad_norm": 0.4296289086341858, "learning_rate": 7.139581635481827e-06, "loss": 0.031, "step": 82500 }, { "epoch": 1.2663648223467117, "grad_norm": 0.3593124747276306, "learning_rate": 7.137014929165846e-06, "loss": 0.0325, "step": 82510 }, { "epoch": 1.2665183025094007, "grad_norm": 0.39818859100341797, "learning_rate": 7.1344484282867e-06, "loss": 0.0326, "step": 82520 }, { "epoch": 1.2666717826720897, "grad_norm": 0.27796629071235657, "learning_rate": 7.131882133028552e-06, "loss": 0.024, "step": 82530 }, { "epoch": 1.2668252628347787, "grad_norm": 0.3941769301891327, "learning_rate": 7.129316043575555e-06, "loss": 0.0295, "step": 82540 }, { "epoch": 1.2669787429974675, "grad_norm": 0.2776694893836975, "learning_rate": 7.126750160111833e-06, "loss": 0.0337, "step": 82550 }, { "epoch": 1.2671322231601565, "grad_norm": 0.2368156760931015, "learning_rate": 7.124184482821506e-06, "loss": 0.0287, "step": 82560 }, { "epoch": 1.2672857033228455, "grad_norm": 0.4224318563938141, "learning_rate": 7.121619011888681e-06, "loss": 0.0272, "step": 82570 }, { "epoch": 1.2674391834855345, "grad_norm": 0.5152448415756226, "learning_rate": 7.119053747497442e-06, "loss": 0.0249, "step": 82580 }, { "epoch": 1.2675926636482235, "grad_norm": 0.3821704685688019, "learning_rate": 7.116488689831869e-06, "loss": 0.0285, "step": 82590 }, { "epoch": 1.2677461438109123, "grad_norm": 0.5506729483604431, "learning_rate": 7.113923839076012e-06, "loss": 0.0302, "step": 82600 }, { "epoch": 1.2678996239736013, "grad_norm": 0.3015291392803192, "learning_rate": 7.1113591954139184e-06, "loss": 0.0252, "step": 82610 }, { "epoch": 1.2680531041362904, "grad_norm": 0.5706896185874939, "learning_rate": 7.1087947590296205e-06, "loss": 0.031, "step": 82620 }, { "epoch": 1.2682065842989794, "grad_norm": 0.42006754875183105, "learning_rate": 7.106230530107128e-06, "loss": 0.0303, "step": 82630 }, { "epoch": 1.2683600644616684, "grad_norm": 0.3894338309764862, "learning_rate": 7.103666508830445e-06, "loss": 0.0209, "step": 82640 }, { "epoch": 1.2685135446243572, "grad_norm": 0.3796398937702179, "learning_rate": 7.1011026953835516e-06, "loss": 0.0267, "step": 82650 }, { "epoch": 1.2686670247870464, "grad_norm": 0.32156848907470703, "learning_rate": 7.0985390899504156e-06, "loss": 0.0308, "step": 82660 }, { "epoch": 1.2688205049497352, "grad_norm": 0.49567729234695435, "learning_rate": 7.095975692714998e-06, "loss": 0.0254, "step": 82670 }, { "epoch": 1.2689739851124242, "grad_norm": 0.3040471076965332, "learning_rate": 7.093412503861231e-06, "loss": 0.0247, "step": 82680 }, { "epoch": 1.2691274652751132, "grad_norm": 0.4742763340473175, "learning_rate": 7.0908495235730476e-06, "loss": 0.0311, "step": 82690 }, { "epoch": 1.2692809454378022, "grad_norm": 0.3017885982990265, "learning_rate": 7.08828675203435e-06, "loss": 0.0286, "step": 82700 }, { "epoch": 1.2694344256004912, "grad_norm": 0.3984721004962921, "learning_rate": 7.085724189429035e-06, "loss": 0.026, "step": 82710 }, { "epoch": 1.26958790576318, "grad_norm": 0.3413560390472412, "learning_rate": 7.083161835940984e-06, "loss": 0.03, "step": 82720 }, { "epoch": 1.269741385925869, "grad_norm": 0.3988744616508484, "learning_rate": 7.0805996917540595e-06, "loss": 0.0274, "step": 82730 }, { "epoch": 1.269894866088558, "grad_norm": 0.4523310661315918, "learning_rate": 7.078037757052115e-06, "loss": 0.0264, "step": 82740 }, { "epoch": 1.270048346251247, "grad_norm": 0.3131689131259918, "learning_rate": 7.075476032018982e-06, "loss": 0.0296, "step": 82750 }, { "epoch": 1.270201826413936, "grad_norm": 0.3689829707145691, "learning_rate": 7.0729145168384764e-06, "loss": 0.0258, "step": 82760 }, { "epoch": 1.2703553065766249, "grad_norm": 0.28668659925460815, "learning_rate": 7.070353211694409e-06, "loss": 0.0229, "step": 82770 }, { "epoch": 1.2705087867393139, "grad_norm": 0.2748714089393616, "learning_rate": 7.067792116770566e-06, "loss": 0.0247, "step": 82780 }, { "epoch": 1.270662266902003, "grad_norm": 0.46794942021369934, "learning_rate": 7.065231232250726e-06, "loss": 0.042, "step": 82790 }, { "epoch": 1.270815747064692, "grad_norm": 0.6030420064926147, "learning_rate": 7.062670558318641e-06, "loss": 0.0337, "step": 82800 }, { "epoch": 1.270969227227381, "grad_norm": 0.32326263189315796, "learning_rate": 7.060110095158062e-06, "loss": 0.0304, "step": 82810 }, { "epoch": 1.2711227073900697, "grad_norm": 0.354412704706192, "learning_rate": 7.057549842952712e-06, "loss": 0.0305, "step": 82820 }, { "epoch": 1.2712761875527587, "grad_norm": 0.4285905063152313, "learning_rate": 7.0549898018863115e-06, "loss": 0.0342, "step": 82830 }, { "epoch": 1.2714296677154477, "grad_norm": 0.5272032618522644, "learning_rate": 7.052429972142556e-06, "loss": 0.0292, "step": 82840 }, { "epoch": 1.2715831478781368, "grad_norm": 0.21646791696548462, "learning_rate": 7.049870353905126e-06, "loss": 0.0223, "step": 82850 }, { "epoch": 1.2717366280408258, "grad_norm": 0.3712503910064697, "learning_rate": 7.047310947357695e-06, "loss": 0.0247, "step": 82860 }, { "epoch": 1.2718901082035146, "grad_norm": 0.283618688583374, "learning_rate": 7.044751752683911e-06, "loss": 0.0282, "step": 82870 }, { "epoch": 1.2720435883662038, "grad_norm": 0.31087297201156616, "learning_rate": 7.042192770067416e-06, "loss": 0.0308, "step": 82880 }, { "epoch": 1.2721970685288926, "grad_norm": 0.34087836742401123, "learning_rate": 7.039633999691835e-06, "loss": 0.036, "step": 82890 }, { "epoch": 1.2723505486915816, "grad_norm": 0.4956536293029785, "learning_rate": 7.0370754417407685e-06, "loss": 0.028, "step": 82900 }, { "epoch": 1.2725040288542706, "grad_norm": 0.34819984436035156, "learning_rate": 7.034517096397815e-06, "loss": 0.0317, "step": 82910 }, { "epoch": 1.2726575090169596, "grad_norm": 0.40739259123802185, "learning_rate": 7.031958963846546e-06, "loss": 0.0293, "step": 82920 }, { "epoch": 1.2728109891796486, "grad_norm": 0.5497153401374817, "learning_rate": 7.029401044270529e-06, "loss": 0.032, "step": 82930 }, { "epoch": 1.2729644693423374, "grad_norm": 0.4017466902732849, "learning_rate": 7.02684333785331e-06, "loss": 0.0262, "step": 82940 }, { "epoch": 1.2731179495050264, "grad_norm": 0.2981545329093933, "learning_rate": 7.024285844778414e-06, "loss": 0.0236, "step": 82950 }, { "epoch": 1.2732714296677154, "grad_norm": 0.3130412697792053, "learning_rate": 7.021728565229362e-06, "loss": 0.0242, "step": 82960 }, { "epoch": 1.2734249098304045, "grad_norm": 0.34463733434677124, "learning_rate": 7.0191714993896535e-06, "loss": 0.0226, "step": 82970 }, { "epoch": 1.2735783899930935, "grad_norm": 0.37383389472961426, "learning_rate": 7.016614647442774e-06, "loss": 0.0389, "step": 82980 }, { "epoch": 1.2737318701557823, "grad_norm": 0.37265053391456604, "learning_rate": 7.014058009572197e-06, "loss": 0.0302, "step": 82990 }, { "epoch": 1.2738853503184713, "grad_norm": 0.40133753418922424, "learning_rate": 7.011501585961369e-06, "loss": 0.0301, "step": 83000 }, { "epoch": 1.2740388304811603, "grad_norm": 0.4339500665664673, "learning_rate": 7.0089453767937345e-06, "loss": 0.0241, "step": 83010 }, { "epoch": 1.2741923106438493, "grad_norm": 0.3371294438838959, "learning_rate": 7.006389382252714e-06, "loss": 0.0296, "step": 83020 }, { "epoch": 1.2743457908065383, "grad_norm": 0.4032759666442871, "learning_rate": 7.003833602521721e-06, "loss": 0.0258, "step": 83030 }, { "epoch": 1.274499270969227, "grad_norm": 0.4447038173675537, "learning_rate": 7.0012780377841476e-06, "loss": 0.0299, "step": 83040 }, { "epoch": 1.2746527511319161, "grad_norm": 0.6063554286956787, "learning_rate": 6.998722688223365e-06, "loss": 0.0316, "step": 83050 }, { "epoch": 1.2748062312946051, "grad_norm": 0.5128828287124634, "learning_rate": 6.996167554022741e-06, "loss": 0.0276, "step": 83060 }, { "epoch": 1.2749597114572941, "grad_norm": 0.2916688919067383, "learning_rate": 6.9936126353656205e-06, "loss": 0.0323, "step": 83070 }, { "epoch": 1.2751131916199832, "grad_norm": 0.35346564650535583, "learning_rate": 6.9910579324353344e-06, "loss": 0.0281, "step": 83080 }, { "epoch": 1.2752666717826722, "grad_norm": 0.3817979395389557, "learning_rate": 6.988503445415203e-06, "loss": 0.0231, "step": 83090 }, { "epoch": 1.2754201519453612, "grad_norm": 0.313520610332489, "learning_rate": 6.98594917448852e-06, "loss": 0.0203, "step": 83100 }, { "epoch": 1.27557363210805, "grad_norm": 0.4640328586101532, "learning_rate": 6.98339511983857e-06, "loss": 0.027, "step": 83110 }, { "epoch": 1.275727112270739, "grad_norm": 0.28714799880981445, "learning_rate": 6.980841281648627e-06, "loss": 0.0288, "step": 83120 }, { "epoch": 1.275880592433428, "grad_norm": 0.3925086259841919, "learning_rate": 6.9782876601019416e-06, "loss": 0.0356, "step": 83130 }, { "epoch": 1.276034072596117, "grad_norm": 0.30554571747779846, "learning_rate": 6.9757342553817555e-06, "loss": 0.0258, "step": 83140 }, { "epoch": 1.276187552758806, "grad_norm": 0.29079824686050415, "learning_rate": 6.973181067671286e-06, "loss": 0.0271, "step": 83150 }, { "epoch": 1.2763410329214948, "grad_norm": 0.4161977171897888, "learning_rate": 6.970628097153742e-06, "loss": 0.0316, "step": 83160 }, { "epoch": 1.2764945130841838, "grad_norm": 0.43611934781074524, "learning_rate": 6.968075344012317e-06, "loss": 0.0256, "step": 83170 }, { "epoch": 1.2766479932468728, "grad_norm": 0.4465044140815735, "learning_rate": 6.965522808430183e-06, "loss": 0.0242, "step": 83180 }, { "epoch": 1.2768014734095618, "grad_norm": 0.43945491313934326, "learning_rate": 6.962970490590506e-06, "loss": 0.0301, "step": 83190 }, { "epoch": 1.2769549535722509, "grad_norm": 0.3210313022136688, "learning_rate": 6.960418390676425e-06, "loss": 0.0272, "step": 83200 }, { "epoch": 1.2771084337349397, "grad_norm": 0.39387038350105286, "learning_rate": 6.957866508871068e-06, "loss": 0.0248, "step": 83210 }, { "epoch": 1.2772619138976287, "grad_norm": 0.3385421931743622, "learning_rate": 6.955314845357555e-06, "loss": 0.0217, "step": 83220 }, { "epoch": 1.2774153940603177, "grad_norm": 0.40187525749206543, "learning_rate": 6.952763400318976e-06, "loss": 0.0335, "step": 83230 }, { "epoch": 1.2775688742230067, "grad_norm": 0.39094650745391846, "learning_rate": 6.95021217393842e-06, "loss": 0.0291, "step": 83240 }, { "epoch": 1.2777223543856957, "grad_norm": 0.3307245373725891, "learning_rate": 6.947661166398949e-06, "loss": 0.0336, "step": 83250 }, { "epoch": 1.2778758345483845, "grad_norm": 0.34923243522644043, "learning_rate": 6.9451103778836125e-06, "loss": 0.0261, "step": 83260 }, { "epoch": 1.2780293147110737, "grad_norm": 0.6740001440048218, "learning_rate": 6.942559808575449e-06, "loss": 0.0341, "step": 83270 }, { "epoch": 1.2781827948737625, "grad_norm": 0.44595226645469666, "learning_rate": 6.940009458657473e-06, "loss": 0.0339, "step": 83280 }, { "epoch": 1.2783362750364515, "grad_norm": 0.37964630126953125, "learning_rate": 6.937459328312695e-06, "loss": 0.0277, "step": 83290 }, { "epoch": 1.2784897551991405, "grad_norm": 0.57953941822052, "learning_rate": 6.934909417724096e-06, "loss": 0.0268, "step": 83300 }, { "epoch": 1.2786432353618296, "grad_norm": 0.34863314032554626, "learning_rate": 6.9323597270746465e-06, "loss": 0.0217, "step": 83310 }, { "epoch": 1.2787967155245186, "grad_norm": 0.3230600953102112, "learning_rate": 6.92981025654731e-06, "loss": 0.0213, "step": 83320 }, { "epoch": 1.2789501956872074, "grad_norm": 0.5246408581733704, "learning_rate": 6.92726100632502e-06, "loss": 0.0283, "step": 83330 }, { "epoch": 1.2791036758498964, "grad_norm": 0.29993462562561035, "learning_rate": 6.9247119765907074e-06, "loss": 0.028, "step": 83340 }, { "epoch": 1.2792571560125854, "grad_norm": 0.279821515083313, "learning_rate": 6.922163167527274e-06, "loss": 0.0325, "step": 83350 }, { "epoch": 1.2794106361752744, "grad_norm": 0.33303767442703247, "learning_rate": 6.919614579317615e-06, "loss": 0.0313, "step": 83360 }, { "epoch": 1.2795641163379634, "grad_norm": 0.47043102979660034, "learning_rate": 6.917066212144609e-06, "loss": 0.0253, "step": 83370 }, { "epoch": 1.2797175965006522, "grad_norm": 0.3576599061489105, "learning_rate": 6.914518066191116e-06, "loss": 0.0255, "step": 83380 }, { "epoch": 1.2798710766633412, "grad_norm": 0.3084922432899475, "learning_rate": 6.9119701416399834e-06, "loss": 0.0326, "step": 83390 }, { "epoch": 1.2800245568260302, "grad_norm": 0.29743802547454834, "learning_rate": 6.909422438674035e-06, "loss": 0.0326, "step": 83400 }, { "epoch": 1.2801780369887192, "grad_norm": 0.33302029967308044, "learning_rate": 6.906874957476089e-06, "loss": 0.0321, "step": 83410 }, { "epoch": 1.2803315171514082, "grad_norm": 0.37289294600486755, "learning_rate": 6.904327698228939e-06, "loss": 0.0366, "step": 83420 }, { "epoch": 1.280484997314097, "grad_norm": 0.542851448059082, "learning_rate": 6.901780661115372e-06, "loss": 0.0235, "step": 83430 }, { "epoch": 1.280638477476786, "grad_norm": 0.3470197319984436, "learning_rate": 6.899233846318152e-06, "loss": 0.0273, "step": 83440 }, { "epoch": 1.280791957639475, "grad_norm": 0.3489304780960083, "learning_rate": 6.896687254020022e-06, "loss": 0.0251, "step": 83450 }, { "epoch": 1.280945437802164, "grad_norm": 0.20198507606983185, "learning_rate": 6.894140884403724e-06, "loss": 0.0279, "step": 83460 }, { "epoch": 1.281098917964853, "grad_norm": 0.28032246232032776, "learning_rate": 6.89159473765197e-06, "loss": 0.0226, "step": 83470 }, { "epoch": 1.2812523981275419, "grad_norm": 0.45541033148765564, "learning_rate": 6.889048813947467e-06, "loss": 0.0268, "step": 83480 }, { "epoch": 1.2814058782902311, "grad_norm": 0.3952544331550598, "learning_rate": 6.8865031134729e-06, "loss": 0.0323, "step": 83490 }, { "epoch": 1.28155935845292, "grad_norm": 0.3894345462322235, "learning_rate": 6.883957636410932e-06, "loss": 0.0247, "step": 83500 }, { "epoch": 1.281712838615609, "grad_norm": 0.3741258978843689, "learning_rate": 6.881412382944223e-06, "loss": 0.0359, "step": 83510 }, { "epoch": 1.281866318778298, "grad_norm": 0.4451104998588562, "learning_rate": 6.878867353255409e-06, "loss": 0.0263, "step": 83520 }, { "epoch": 1.282019798940987, "grad_norm": 0.3555721640586853, "learning_rate": 6.876322547527112e-06, "loss": 0.0291, "step": 83530 }, { "epoch": 1.282173279103676, "grad_norm": 0.2788316011428833, "learning_rate": 6.873777965941941e-06, "loss": 0.0326, "step": 83540 }, { "epoch": 1.2823267592663647, "grad_norm": 0.3494073450565338, "learning_rate": 6.871233608682477e-06, "loss": 0.0278, "step": 83550 }, { "epoch": 1.2824802394290538, "grad_norm": 0.36639177799224854, "learning_rate": 6.868689475931298e-06, "loss": 0.0242, "step": 83560 }, { "epoch": 1.2826337195917428, "grad_norm": 0.291145920753479, "learning_rate": 6.866145567870961e-06, "loss": 0.0253, "step": 83570 }, { "epoch": 1.2827871997544318, "grad_norm": 0.34963321685791016, "learning_rate": 6.86360188468401e-06, "loss": 0.0317, "step": 83580 }, { "epoch": 1.2829406799171208, "grad_norm": 0.40078508853912354, "learning_rate": 6.861058426552968e-06, "loss": 0.0299, "step": 83590 }, { "epoch": 1.2830941600798096, "grad_norm": 0.389395534992218, "learning_rate": 6.858515193660338e-06, "loss": 0.0293, "step": 83600 }, { "epoch": 1.2832476402424986, "grad_norm": 0.22956448793411255, "learning_rate": 6.85597218618862e-06, "loss": 0.0202, "step": 83610 }, { "epoch": 1.2834011204051876, "grad_norm": 0.3349585235118866, "learning_rate": 6.8534294043202876e-06, "loss": 0.0285, "step": 83620 }, { "epoch": 1.2835546005678766, "grad_norm": 0.2527417242527008, "learning_rate": 6.850886848237801e-06, "loss": 0.0242, "step": 83630 }, { "epoch": 1.2837080807305656, "grad_norm": 0.32385098934173584, "learning_rate": 6.8483445181236084e-06, "loss": 0.032, "step": 83640 }, { "epoch": 1.2838615608932544, "grad_norm": 0.38474419713020325, "learning_rate": 6.845802414160132e-06, "loss": 0.0232, "step": 83650 }, { "epoch": 1.2840150410559434, "grad_norm": 0.45450806617736816, "learning_rate": 6.8432605365297846e-06, "loss": 0.025, "step": 83660 }, { "epoch": 1.2841685212186325, "grad_norm": 0.31362026929855347, "learning_rate": 6.840718885414963e-06, "loss": 0.0293, "step": 83670 }, { "epoch": 1.2843220013813215, "grad_norm": 0.42027369141578674, "learning_rate": 6.838177460998045e-06, "loss": 0.0296, "step": 83680 }, { "epoch": 1.2844754815440105, "grad_norm": 0.35464173555374146, "learning_rate": 6.8356362634613985e-06, "loss": 0.0301, "step": 83690 }, { "epoch": 1.2846289617066995, "grad_norm": 0.45071569085121155, "learning_rate": 6.8330952929873636e-06, "loss": 0.0309, "step": 83700 }, { "epoch": 1.2847824418693885, "grad_norm": 0.38208723068237305, "learning_rate": 6.830554549758271e-06, "loss": 0.0304, "step": 83710 }, { "epoch": 1.2849359220320773, "grad_norm": 0.457278847694397, "learning_rate": 6.828014033956439e-06, "loss": 0.0269, "step": 83720 }, { "epoch": 1.2850894021947663, "grad_norm": 0.3751294016838074, "learning_rate": 6.825473745764161e-06, "loss": 0.028, "step": 83730 }, { "epoch": 1.2852428823574553, "grad_norm": 0.4273888170719147, "learning_rate": 6.822933685363724e-06, "loss": 0.0319, "step": 83740 }, { "epoch": 1.2853963625201443, "grad_norm": 0.32109880447387695, "learning_rate": 6.820393852937387e-06, "loss": 0.0286, "step": 83750 }, { "epoch": 1.2855498426828333, "grad_norm": 0.4099518060684204, "learning_rate": 6.817854248667399e-06, "loss": 0.0268, "step": 83760 }, { "epoch": 1.2857033228455221, "grad_norm": 0.27316704392433167, "learning_rate": 6.815314872735997e-06, "loss": 0.0201, "step": 83770 }, { "epoch": 1.2858568030082111, "grad_norm": 0.3257780075073242, "learning_rate": 6.812775725325391e-06, "loss": 0.028, "step": 83780 }, { "epoch": 1.2860102831709002, "grad_norm": 0.31870830059051514, "learning_rate": 6.810236806617789e-06, "loss": 0.0222, "step": 83790 }, { "epoch": 1.2861637633335892, "grad_norm": 0.395234614610672, "learning_rate": 6.807698116795365e-06, "loss": 0.025, "step": 83800 }, { "epoch": 1.2863172434962782, "grad_norm": 0.40991246700286865, "learning_rate": 6.805159656040288e-06, "loss": 0.0285, "step": 83810 }, { "epoch": 1.286470723658967, "grad_norm": 0.45877107977867126, "learning_rate": 6.802621424534712e-06, "loss": 0.0307, "step": 83820 }, { "epoch": 1.286624203821656, "grad_norm": 0.49807414412498474, "learning_rate": 6.800083422460766e-06, "loss": 0.027, "step": 83830 }, { "epoch": 1.286777683984345, "grad_norm": 0.42980799078941345, "learning_rate": 6.797545650000573e-06, "loss": 0.0343, "step": 83840 }, { "epoch": 1.286931164147034, "grad_norm": 0.4701879918575287, "learning_rate": 6.79500810733623e-06, "loss": 0.0319, "step": 83850 }, { "epoch": 1.287084644309723, "grad_norm": 0.31097641587257385, "learning_rate": 6.792470794649818e-06, "loss": 0.0281, "step": 83860 }, { "epoch": 1.2872381244724118, "grad_norm": 0.4222452938556671, "learning_rate": 6.789933712123409e-06, "loss": 0.0215, "step": 83870 }, { "epoch": 1.287391604635101, "grad_norm": 0.3292378783226013, "learning_rate": 6.787396859939057e-06, "loss": 0.0318, "step": 83880 }, { "epoch": 1.2875450847977898, "grad_norm": 0.31659308075904846, "learning_rate": 6.784860238278794e-06, "loss": 0.0294, "step": 83890 }, { "epoch": 1.2876985649604789, "grad_norm": 0.3840588927268982, "learning_rate": 6.782323847324635e-06, "loss": 0.0347, "step": 83900 }, { "epoch": 1.2878520451231679, "grad_norm": 0.4423116147518158, "learning_rate": 6.779787687258586e-06, "loss": 0.0236, "step": 83910 }, { "epoch": 1.2880055252858569, "grad_norm": 0.2746477723121643, "learning_rate": 6.777251758262629e-06, "loss": 0.025, "step": 83920 }, { "epoch": 1.288159005448546, "grad_norm": 0.32622769474983215, "learning_rate": 6.774716060518738e-06, "loss": 0.0287, "step": 83930 }, { "epoch": 1.2883124856112347, "grad_norm": 0.4111975133419037, "learning_rate": 6.772180594208861e-06, "loss": 0.0201, "step": 83940 }, { "epoch": 1.2884659657739237, "grad_norm": 0.5633955001831055, "learning_rate": 6.769645359514931e-06, "loss": 0.0361, "step": 83950 }, { "epoch": 1.2886194459366127, "grad_norm": 0.4531387984752655, "learning_rate": 6.767110356618871e-06, "loss": 0.0266, "step": 83960 }, { "epoch": 1.2887729260993017, "grad_norm": 0.2996106743812561, "learning_rate": 6.7645755857025785e-06, "loss": 0.0275, "step": 83970 }, { "epoch": 1.2889264062619907, "grad_norm": 0.30801475048065186, "learning_rate": 6.762041046947945e-06, "loss": 0.0255, "step": 83980 }, { "epoch": 1.2890798864246795, "grad_norm": 0.2936535179615021, "learning_rate": 6.759506740536837e-06, "loss": 0.0192, "step": 83990 }, { "epoch": 1.2892333665873685, "grad_norm": 0.5648152232170105, "learning_rate": 6.756972666651103e-06, "loss": 0.0285, "step": 84000 }, { "epoch": 1.2893868467500575, "grad_norm": 0.2998200058937073, "learning_rate": 6.754438825472582e-06, "loss": 0.0394, "step": 84010 }, { "epoch": 1.2895403269127466, "grad_norm": 0.5187774896621704, "learning_rate": 6.7519052171830915e-06, "loss": 0.0349, "step": 84020 }, { "epoch": 1.2896938070754356, "grad_norm": 0.27877211570739746, "learning_rate": 6.749371841964434e-06, "loss": 0.0251, "step": 84030 }, { "epoch": 1.2898472872381244, "grad_norm": 0.5444676280021667, "learning_rate": 6.7468386999984e-06, "loss": 0.027, "step": 84040 }, { "epoch": 1.2900007674008134, "grad_norm": 0.3287384808063507, "learning_rate": 6.744305791466745e-06, "loss": 0.0303, "step": 84050 }, { "epoch": 1.2901542475635024, "grad_norm": 0.3657063841819763, "learning_rate": 6.741773116551232e-06, "loss": 0.0253, "step": 84060 }, { "epoch": 1.2903077277261914, "grad_norm": 0.21923793852329254, "learning_rate": 6.739240675433591e-06, "loss": 0.0218, "step": 84070 }, { "epoch": 1.2904612078888804, "grad_norm": 0.4299047589302063, "learning_rate": 6.736708468295544e-06, "loss": 0.0289, "step": 84080 }, { "epoch": 1.2906146880515692, "grad_norm": 0.40590953826904297, "learning_rate": 6.734176495318792e-06, "loss": 0.0232, "step": 84090 }, { "epoch": 1.2907681682142584, "grad_norm": 0.3208829164505005, "learning_rate": 6.731644756685014e-06, "loss": 0.0326, "step": 84100 }, { "epoch": 1.2909216483769472, "grad_norm": 0.4471072554588318, "learning_rate": 6.729113252575885e-06, "loss": 0.0382, "step": 84110 }, { "epoch": 1.2910751285396362, "grad_norm": 0.4523414075374603, "learning_rate": 6.72658198317305e-06, "loss": 0.0288, "step": 84120 }, { "epoch": 1.2912286087023253, "grad_norm": 0.4817972779273987, "learning_rate": 6.724050948658147e-06, "loss": 0.0343, "step": 84130 }, { "epoch": 1.2913820888650143, "grad_norm": 0.5129828453063965, "learning_rate": 6.721520149212798e-06, "loss": 0.0356, "step": 84140 }, { "epoch": 1.2915355690277033, "grad_norm": 0.47050750255584717, "learning_rate": 6.718989585018592e-06, "loss": 0.0307, "step": 84150 }, { "epoch": 1.291689049190392, "grad_norm": 0.49171119928359985, "learning_rate": 6.716459256257117e-06, "loss": 0.0385, "step": 84160 }, { "epoch": 1.291842529353081, "grad_norm": 0.211181640625, "learning_rate": 6.713929163109946e-06, "loss": 0.0259, "step": 84170 }, { "epoch": 1.29199600951577, "grad_norm": 0.5362522006034851, "learning_rate": 6.7113993057586215e-06, "loss": 0.0303, "step": 84180 }, { "epoch": 1.292149489678459, "grad_norm": 0.42101016640663147, "learning_rate": 6.708869684384681e-06, "loss": 0.0272, "step": 84190 }, { "epoch": 1.2923029698411481, "grad_norm": 0.35809338092803955, "learning_rate": 6.706340299169638e-06, "loss": 0.0302, "step": 84200 }, { "epoch": 1.292456450003837, "grad_norm": 0.33626145124435425, "learning_rate": 6.7038111502949895e-06, "loss": 0.0212, "step": 84210 }, { "epoch": 1.292609930166526, "grad_norm": 0.44382765889167786, "learning_rate": 6.701282237942221e-06, "loss": 0.0281, "step": 84220 }, { "epoch": 1.292763410329215, "grad_norm": 0.31703707575798035, "learning_rate": 6.698753562292796e-06, "loss": 0.0279, "step": 84230 }, { "epoch": 1.292916890491904, "grad_norm": 0.5556284189224243, "learning_rate": 6.696225123528166e-06, "loss": 0.0289, "step": 84240 }, { "epoch": 1.293070370654593, "grad_norm": 0.4176824688911438, "learning_rate": 6.693696921829758e-06, "loss": 0.0252, "step": 84250 }, { "epoch": 1.2932238508172818, "grad_norm": 0.5126659870147705, "learning_rate": 6.691168957378984e-06, "loss": 0.024, "step": 84260 }, { "epoch": 1.2933773309799708, "grad_norm": 0.3021480441093445, "learning_rate": 6.688641230357247e-06, "loss": 0.0273, "step": 84270 }, { "epoch": 1.2935308111426598, "grad_norm": 0.6080146431922913, "learning_rate": 6.686113740945921e-06, "loss": 0.0326, "step": 84280 }, { "epoch": 1.2936842913053488, "grad_norm": 0.39323538541793823, "learning_rate": 6.6835864893263765e-06, "loss": 0.029, "step": 84290 }, { "epoch": 1.2938377714680378, "grad_norm": 0.36213934421539307, "learning_rate": 6.681059475679954e-06, "loss": 0.0305, "step": 84300 }, { "epoch": 1.2939912516307266, "grad_norm": 0.3102864921092987, "learning_rate": 6.6785327001879805e-06, "loss": 0.0252, "step": 84310 }, { "epoch": 1.2941447317934158, "grad_norm": 0.2944985628128052, "learning_rate": 6.6760061630317745e-06, "loss": 0.0237, "step": 84320 }, { "epoch": 1.2942982119561046, "grad_norm": 0.365214079618454, "learning_rate": 6.673479864392622e-06, "loss": 0.0294, "step": 84330 }, { "epoch": 1.2944516921187936, "grad_norm": 0.283592164516449, "learning_rate": 6.67095380445181e-06, "loss": 0.0241, "step": 84340 }, { "epoch": 1.2946051722814826, "grad_norm": 0.19317537546157837, "learning_rate": 6.668427983390593e-06, "loss": 0.0281, "step": 84350 }, { "epoch": 1.2947586524441717, "grad_norm": 0.21060769259929657, "learning_rate": 6.665902401390212e-06, "loss": 0.0288, "step": 84360 }, { "epoch": 1.2949121326068607, "grad_norm": 0.4040389955043793, "learning_rate": 6.663377058631899e-06, "loss": 0.0324, "step": 84370 }, { "epoch": 1.2950656127695495, "grad_norm": 0.31030839681625366, "learning_rate": 6.660851955296858e-06, "loss": 0.0249, "step": 84380 }, { "epoch": 1.2952190929322385, "grad_norm": 0.4383092224597931, "learning_rate": 6.658327091566288e-06, "loss": 0.0241, "step": 84390 }, { "epoch": 1.2953725730949275, "grad_norm": 0.4601169228553772, "learning_rate": 6.655802467621356e-06, "loss": 0.0246, "step": 84400 }, { "epoch": 1.2955260532576165, "grad_norm": 0.3947846293449402, "learning_rate": 6.6532780836432196e-06, "loss": 0.0253, "step": 84410 }, { "epoch": 1.2956795334203055, "grad_norm": 0.354208379983902, "learning_rate": 6.650753939813022e-06, "loss": 0.0268, "step": 84420 }, { "epoch": 1.2958330135829943, "grad_norm": 0.3868870437145233, "learning_rate": 6.648230036311887e-06, "loss": 0.0301, "step": 84430 }, { "epoch": 1.2959864937456833, "grad_norm": 0.43645647168159485, "learning_rate": 6.645706373320921e-06, "loss": 0.0304, "step": 84440 }, { "epoch": 1.2961399739083723, "grad_norm": 0.33336755633354187, "learning_rate": 6.643182951021205e-06, "loss": 0.0263, "step": 84450 }, { "epoch": 1.2962934540710613, "grad_norm": 0.33014312386512756, "learning_rate": 6.640659769593818e-06, "loss": 0.0262, "step": 84460 }, { "epoch": 1.2964469342337503, "grad_norm": 0.36436644196510315, "learning_rate": 6.638136829219808e-06, "loss": 0.0286, "step": 84470 }, { "epoch": 1.2966004143964391, "grad_norm": 0.43487587571144104, "learning_rate": 6.635614130080219e-06, "loss": 0.0279, "step": 84480 }, { "epoch": 1.2967538945591282, "grad_norm": 0.41019633412361145, "learning_rate": 6.633091672356064e-06, "loss": 0.0292, "step": 84490 }, { "epoch": 1.2969073747218172, "grad_norm": 0.24444620311260223, "learning_rate": 6.630569456228344e-06, "loss": 0.0339, "step": 84500 }, { "epoch": 1.2970608548845062, "grad_norm": 0.3390071392059326, "learning_rate": 6.628047481878048e-06, "loss": 0.028, "step": 84510 }, { "epoch": 1.2972143350471952, "grad_norm": 0.35972538590431213, "learning_rate": 6.6255257494861415e-06, "loss": 0.0273, "step": 84520 }, { "epoch": 1.2973678152098842, "grad_norm": 0.2952640950679779, "learning_rate": 6.623004259233576e-06, "loss": 0.024, "step": 84530 }, { "epoch": 1.2975212953725732, "grad_norm": 0.2590095102787018, "learning_rate": 6.620483011301279e-06, "loss": 0.0325, "step": 84540 }, { "epoch": 1.297674775535262, "grad_norm": 0.3306874930858612, "learning_rate": 6.617962005870169e-06, "loss": 0.0266, "step": 84550 }, { "epoch": 1.297828255697951, "grad_norm": 0.35416582226753235, "learning_rate": 6.615441243121146e-06, "loss": 0.0311, "step": 84560 }, { "epoch": 1.29798173586064, "grad_norm": 0.2935742437839508, "learning_rate": 6.6129207232350835e-06, "loss": 0.0268, "step": 84570 }, { "epoch": 1.298135216023329, "grad_norm": 0.3653823137283325, "learning_rate": 6.610400446392854e-06, "loss": 0.0348, "step": 84580 }, { "epoch": 1.298288696186018, "grad_norm": 0.3803237974643707, "learning_rate": 6.607880412775295e-06, "loss": 0.0268, "step": 84590 }, { "epoch": 1.2984421763487068, "grad_norm": 0.32608452439308167, "learning_rate": 6.6053606225632345e-06, "loss": 0.0217, "step": 84600 }, { "epoch": 1.2985956565113959, "grad_norm": 0.33123424649238586, "learning_rate": 6.602841075937488e-06, "loss": 0.025, "step": 84610 }, { "epoch": 1.2987491366740849, "grad_norm": 0.27816712856292725, "learning_rate": 6.600321773078844e-06, "loss": 0.0367, "step": 84620 }, { "epoch": 1.2989026168367739, "grad_norm": 0.40005216002464294, "learning_rate": 6.5978027141680844e-06, "loss": 0.0253, "step": 84630 }, { "epoch": 1.299056096999463, "grad_norm": 0.528114378452301, "learning_rate": 6.59528389938596e-06, "loss": 0.0333, "step": 84640 }, { "epoch": 1.2992095771621517, "grad_norm": 0.3563142418861389, "learning_rate": 6.592765328913212e-06, "loss": 0.0294, "step": 84650 }, { "epoch": 1.2993630573248407, "grad_norm": 0.2948121130466461, "learning_rate": 6.590247002930567e-06, "loss": 0.0248, "step": 84660 }, { "epoch": 1.2995165374875297, "grad_norm": 0.23710867762565613, "learning_rate": 6.587728921618728e-06, "loss": 0.0264, "step": 84670 }, { "epoch": 1.2996700176502187, "grad_norm": 0.496268093585968, "learning_rate": 6.585211085158387e-06, "loss": 0.0309, "step": 84680 }, { "epoch": 1.2998234978129077, "grad_norm": 0.3350389897823334, "learning_rate": 6.58269349373021e-06, "loss": 0.0268, "step": 84690 }, { "epoch": 1.2999769779755965, "grad_norm": 0.6833000183105469, "learning_rate": 6.580176147514847e-06, "loss": 0.0331, "step": 84700 }, { "epoch": 1.3001304581382858, "grad_norm": 0.49398133158683777, "learning_rate": 6.577659046692937e-06, "loss": 0.0294, "step": 84710 }, { "epoch": 1.3002839383009746, "grad_norm": 0.3385235667228699, "learning_rate": 6.575142191445101e-06, "loss": 0.0302, "step": 84720 }, { "epoch": 1.3004374184636636, "grad_norm": 0.4753018021583557, "learning_rate": 6.572625581951936e-06, "loss": 0.0271, "step": 84730 }, { "epoch": 1.3005908986263526, "grad_norm": 0.3870942294597626, "learning_rate": 6.57010921839402e-06, "loss": 0.0368, "step": 84740 }, { "epoch": 1.3007443787890416, "grad_norm": 0.4830029606819153, "learning_rate": 6.567593100951922e-06, "loss": 0.0314, "step": 84750 }, { "epoch": 1.3008978589517306, "grad_norm": 0.2871630787849426, "learning_rate": 6.565077229806187e-06, "loss": 0.0294, "step": 84760 }, { "epoch": 1.3010513391144194, "grad_norm": 0.4857342839241028, "learning_rate": 6.562561605137347e-06, "loss": 0.0231, "step": 84770 }, { "epoch": 1.3012048192771084, "grad_norm": 0.5388533473014832, "learning_rate": 6.560046227125914e-06, "loss": 0.0412, "step": 84780 }, { "epoch": 1.3013582994397974, "grad_norm": 0.3539995849132538, "learning_rate": 6.557531095952376e-06, "loss": 0.0293, "step": 84790 }, { "epoch": 1.3015117796024864, "grad_norm": 0.4525367021560669, "learning_rate": 6.555016211797215e-06, "loss": 0.0289, "step": 84800 }, { "epoch": 1.3016652597651754, "grad_norm": 0.36674392223358154, "learning_rate": 6.552501574840885e-06, "loss": 0.0227, "step": 84810 }, { "epoch": 1.3018187399278642, "grad_norm": 0.405341774225235, "learning_rate": 6.549987185263831e-06, "loss": 0.0274, "step": 84820 }, { "epoch": 1.3019722200905532, "grad_norm": 0.22003935277462006, "learning_rate": 6.5474730432464766e-06, "loss": 0.0314, "step": 84830 }, { "epoch": 1.3021257002532423, "grad_norm": 0.5387740135192871, "learning_rate": 6.544959148969219e-06, "loss": 0.0293, "step": 84840 }, { "epoch": 1.3022791804159313, "grad_norm": 0.5106784105300903, "learning_rate": 6.542445502612454e-06, "loss": 0.0239, "step": 84850 }, { "epoch": 1.3024326605786203, "grad_norm": 0.33247774839401245, "learning_rate": 6.539932104356548e-06, "loss": 0.0249, "step": 84860 }, { "epoch": 1.302586140741309, "grad_norm": 0.5579949021339417, "learning_rate": 6.537418954381854e-06, "loss": 0.0332, "step": 84870 }, { "epoch": 1.302739620903998, "grad_norm": 0.4084395170211792, "learning_rate": 6.534906052868706e-06, "loss": 0.0276, "step": 84880 }, { "epoch": 1.302893101066687, "grad_norm": 0.3435167670249939, "learning_rate": 6.532393399997416e-06, "loss": 0.0281, "step": 84890 }, { "epoch": 1.303046581229376, "grad_norm": 0.5868661999702454, "learning_rate": 6.529880995948287e-06, "loss": 0.0327, "step": 84900 }, { "epoch": 1.3032000613920651, "grad_norm": 0.2858729660511017, "learning_rate": 6.527368840901597e-06, "loss": 0.0295, "step": 84910 }, { "epoch": 1.303353541554754, "grad_norm": 0.4948175251483917, "learning_rate": 6.524856935037611e-06, "loss": 0.0387, "step": 84920 }, { "epoch": 1.3035070217174431, "grad_norm": 0.5029679536819458, "learning_rate": 6.5223452785365745e-06, "loss": 0.0297, "step": 84930 }, { "epoch": 1.303660501880132, "grad_norm": 0.3099406957626343, "learning_rate": 6.519833871578709e-06, "loss": 0.0299, "step": 84940 }, { "epoch": 1.303813982042821, "grad_norm": 0.40518566966056824, "learning_rate": 6.517322714344229e-06, "loss": 0.0306, "step": 84950 }, { "epoch": 1.30396746220551, "grad_norm": 0.4040553569793701, "learning_rate": 6.514811807013321e-06, "loss": 0.022, "step": 84960 }, { "epoch": 1.304120942368199, "grad_norm": 0.32455238699913025, "learning_rate": 6.51230114976616e-06, "loss": 0.0238, "step": 84970 }, { "epoch": 1.304274422530888, "grad_norm": 0.3537676930427551, "learning_rate": 6.509790742782909e-06, "loss": 0.0261, "step": 84980 }, { "epoch": 1.3044279026935768, "grad_norm": 0.32927024364471436, "learning_rate": 6.50728058624369e-06, "loss": 0.0257, "step": 84990 }, { "epoch": 1.3045813828562658, "grad_norm": 0.3290751874446869, "learning_rate": 6.5047706803286305e-06, "loss": 0.0391, "step": 85000 }, { "epoch": 1.3047348630189548, "grad_norm": 0.3684453070163727, "learning_rate": 6.502261025217833e-06, "loss": 0.0282, "step": 85010 }, { "epoch": 1.3048883431816438, "grad_norm": 0.5066929459571838, "learning_rate": 6.499751621091377e-06, "loss": 0.0293, "step": 85020 }, { "epoch": 1.3050418233443328, "grad_norm": 0.324508935213089, "learning_rate": 6.497242468129335e-06, "loss": 0.0247, "step": 85030 }, { "epoch": 1.3051953035070216, "grad_norm": 0.2613449692726135, "learning_rate": 6.494733566511744e-06, "loss": 0.0241, "step": 85040 }, { "epoch": 1.3053487836697106, "grad_norm": 0.29794320464134216, "learning_rate": 6.492224916418638e-06, "loss": 0.0271, "step": 85050 }, { "epoch": 1.3055022638323996, "grad_norm": 0.3332502841949463, "learning_rate": 6.489716518030029e-06, "loss": 0.0274, "step": 85060 }, { "epoch": 1.3056557439950887, "grad_norm": 0.553389847278595, "learning_rate": 6.487208371525908e-06, "loss": 0.0267, "step": 85070 }, { "epoch": 1.3058092241577777, "grad_norm": 0.3595902919769287, "learning_rate": 6.484700477086254e-06, "loss": 0.0244, "step": 85080 }, { "epoch": 1.3059627043204665, "grad_norm": 0.36437347531318665, "learning_rate": 6.4821928348910215e-06, "loss": 0.0278, "step": 85090 }, { "epoch": 1.3061161844831555, "grad_norm": 0.35886287689208984, "learning_rate": 6.4796854451201444e-06, "loss": 0.028, "step": 85100 }, { "epoch": 1.3062696646458445, "grad_norm": 0.462152361869812, "learning_rate": 6.47717830795355e-06, "loss": 0.0304, "step": 85110 }, { "epoch": 1.3064231448085335, "grad_norm": 0.2904425859451294, "learning_rate": 6.474671423571138e-06, "loss": 0.0283, "step": 85120 }, { "epoch": 1.3065766249712225, "grad_norm": 0.27537691593170166, "learning_rate": 6.472164792152795e-06, "loss": 0.0258, "step": 85130 }, { "epoch": 1.3067301051339115, "grad_norm": 0.5365629196166992, "learning_rate": 6.469658413878386e-06, "loss": 0.0303, "step": 85140 }, { "epoch": 1.3068835852966005, "grad_norm": 0.3305548429489136, "learning_rate": 6.467152288927756e-06, "loss": 0.0261, "step": 85150 }, { "epoch": 1.3070370654592893, "grad_norm": 0.39767423272132874, "learning_rate": 6.46464641748074e-06, "loss": 0.0211, "step": 85160 }, { "epoch": 1.3071905456219783, "grad_norm": 0.6174197793006897, "learning_rate": 6.4621407997171445e-06, "loss": 0.0313, "step": 85170 }, { "epoch": 1.3073440257846674, "grad_norm": 0.3217359185218811, "learning_rate": 6.459635435816771e-06, "loss": 0.0243, "step": 85180 }, { "epoch": 1.3074975059473564, "grad_norm": 0.26014450192451477, "learning_rate": 6.457130325959389e-06, "loss": 0.0318, "step": 85190 }, { "epoch": 1.3076509861100454, "grad_norm": 0.32450735569000244, "learning_rate": 6.454625470324753e-06, "loss": 0.0321, "step": 85200 }, { "epoch": 1.3078044662727342, "grad_norm": 0.4522975981235504, "learning_rate": 6.452120869092606e-06, "loss": 0.0346, "step": 85210 }, { "epoch": 1.3079579464354232, "grad_norm": 0.3614056706428528, "learning_rate": 6.4496165224426675e-06, "loss": 0.0294, "step": 85220 }, { "epoch": 1.3081114265981122, "grad_norm": 0.3521830439567566, "learning_rate": 6.447112430554644e-06, "loss": 0.0259, "step": 85230 }, { "epoch": 1.3082649067608012, "grad_norm": 0.4002465605735779, "learning_rate": 6.444608593608214e-06, "loss": 0.0268, "step": 85240 }, { "epoch": 1.3084183869234902, "grad_norm": 0.39970502257347107, "learning_rate": 6.442105011783042e-06, "loss": 0.0293, "step": 85250 }, { "epoch": 1.308571867086179, "grad_norm": 0.3408580422401428, "learning_rate": 6.439601685258778e-06, "loss": 0.0321, "step": 85260 }, { "epoch": 1.308725347248868, "grad_norm": 0.2671176493167877, "learning_rate": 6.437098614215055e-06, "loss": 0.0242, "step": 85270 }, { "epoch": 1.308878827411557, "grad_norm": 0.3659156858921051, "learning_rate": 6.434595798831481e-06, "loss": 0.0263, "step": 85280 }, { "epoch": 1.309032307574246, "grad_norm": 0.38558229804039, "learning_rate": 6.432093239287644e-06, "loss": 0.0261, "step": 85290 }, { "epoch": 1.309185787736935, "grad_norm": 0.37992632389068604, "learning_rate": 6.429590935763123e-06, "loss": 0.0308, "step": 85300 }, { "epoch": 1.3093392678996238, "grad_norm": 0.34400856494903564, "learning_rate": 6.4270888884374715e-06, "loss": 0.0293, "step": 85310 }, { "epoch": 1.309492748062313, "grad_norm": 0.35915517807006836, "learning_rate": 6.424587097490232e-06, "loss": 0.0222, "step": 85320 }, { "epoch": 1.3096462282250019, "grad_norm": 0.3449830710887909, "learning_rate": 6.422085563100917e-06, "loss": 0.0349, "step": 85330 }, { "epoch": 1.3097997083876909, "grad_norm": 0.2865857183933258, "learning_rate": 6.419584285449027e-06, "loss": 0.0264, "step": 85340 }, { "epoch": 1.30995318855038, "grad_norm": 0.4648551642894745, "learning_rate": 6.417083264714049e-06, "loss": 0.0277, "step": 85350 }, { "epoch": 1.310106668713069, "grad_norm": 0.3822779357433319, "learning_rate": 6.414582501075441e-06, "loss": 0.0202, "step": 85360 }, { "epoch": 1.310260148875758, "grad_norm": 0.29519015550613403, "learning_rate": 6.412081994712654e-06, "loss": 0.0223, "step": 85370 }, { "epoch": 1.3104136290384467, "grad_norm": 0.3134832978248596, "learning_rate": 6.409581745805115e-06, "loss": 0.0292, "step": 85380 }, { "epoch": 1.3105671092011357, "grad_norm": 0.2281198650598526, "learning_rate": 6.407081754532223e-06, "loss": 0.0217, "step": 85390 }, { "epoch": 1.3107205893638247, "grad_norm": 0.36954760551452637, "learning_rate": 6.404582021073378e-06, "loss": 0.0282, "step": 85400 }, { "epoch": 1.3108740695265138, "grad_norm": 0.42460858821868896, "learning_rate": 6.402082545607944e-06, "loss": 0.0298, "step": 85410 }, { "epoch": 1.3110275496892028, "grad_norm": 0.27457502484321594, "learning_rate": 6.399583328315279e-06, "loss": 0.0256, "step": 85420 }, { "epoch": 1.3111810298518916, "grad_norm": 0.6010476350784302, "learning_rate": 6.397084369374719e-06, "loss": 0.0256, "step": 85430 }, { "epoch": 1.3113345100145806, "grad_norm": 0.36341845989227295, "learning_rate": 6.394585668965572e-06, "loss": 0.0299, "step": 85440 }, { "epoch": 1.3114879901772696, "grad_norm": 0.35189855098724365, "learning_rate": 6.3920872272671395e-06, "loss": 0.0229, "step": 85450 }, { "epoch": 1.3116414703399586, "grad_norm": 0.2846338748931885, "learning_rate": 6.389589044458699e-06, "loss": 0.0293, "step": 85460 }, { "epoch": 1.3117949505026476, "grad_norm": 0.2822568118572235, "learning_rate": 6.387091120719513e-06, "loss": 0.0331, "step": 85470 }, { "epoch": 1.3119484306653364, "grad_norm": 0.31336700916290283, "learning_rate": 6.384593456228824e-06, "loss": 0.0275, "step": 85480 }, { "epoch": 1.3121019108280254, "grad_norm": 0.3263188302516937, "learning_rate": 6.382096051165847e-06, "loss": 0.0238, "step": 85490 }, { "epoch": 1.3122553909907144, "grad_norm": 0.39789023995399475, "learning_rate": 6.379598905709794e-06, "loss": 0.0237, "step": 85500 }, { "epoch": 1.3124088711534034, "grad_norm": 0.3683182895183563, "learning_rate": 6.377102020039846e-06, "loss": 0.0236, "step": 85510 }, { "epoch": 1.3125623513160924, "grad_norm": 0.3975193500518799, "learning_rate": 6.374605394335172e-06, "loss": 0.0264, "step": 85520 }, { "epoch": 1.3127158314787812, "grad_norm": 0.3264297842979431, "learning_rate": 6.372109028774925e-06, "loss": 0.0294, "step": 85530 }, { "epoch": 1.3128693116414705, "grad_norm": 0.3894682228565216, "learning_rate": 6.369612923538226e-06, "loss": 0.0257, "step": 85540 }, { "epoch": 1.3130227918041593, "grad_norm": 0.42016005516052246, "learning_rate": 6.367117078804188e-06, "loss": 0.0252, "step": 85550 }, { "epoch": 1.3131762719668483, "grad_norm": 0.4426501393318176, "learning_rate": 6.364621494751908e-06, "loss": 0.0274, "step": 85560 }, { "epoch": 1.3133297521295373, "grad_norm": 0.32913628220558167, "learning_rate": 6.362126171560454e-06, "loss": 0.0327, "step": 85570 }, { "epoch": 1.3134832322922263, "grad_norm": 0.5335405468940735, "learning_rate": 6.359631109408888e-06, "loss": 0.0327, "step": 85580 }, { "epoch": 1.3136367124549153, "grad_norm": 0.5272404551506042, "learning_rate": 6.35713630847624e-06, "loss": 0.025, "step": 85590 }, { "epoch": 1.313790192617604, "grad_norm": 0.43320655822753906, "learning_rate": 6.354641768941525e-06, "loss": 0.0258, "step": 85600 }, { "epoch": 1.3139436727802931, "grad_norm": 0.22143231332302094, "learning_rate": 6.352147490983749e-06, "loss": 0.0236, "step": 85610 }, { "epoch": 1.3140971529429821, "grad_norm": 0.5292280316352844, "learning_rate": 6.349653474781886e-06, "loss": 0.0368, "step": 85620 }, { "epoch": 1.3142506331056711, "grad_norm": 0.3589153587818146, "learning_rate": 6.347159720514904e-06, "loss": 0.0264, "step": 85630 }, { "epoch": 1.3144041132683602, "grad_norm": 0.41653671860694885, "learning_rate": 6.344666228361738e-06, "loss": 0.0333, "step": 85640 }, { "epoch": 1.314557593431049, "grad_norm": 0.526569128036499, "learning_rate": 6.34217299850131e-06, "loss": 0.0289, "step": 85650 }, { "epoch": 1.314711073593738, "grad_norm": 0.24681007862091064, "learning_rate": 6.339680031112533e-06, "loss": 0.0276, "step": 85660 }, { "epoch": 1.314864553756427, "grad_norm": 0.41604354977607727, "learning_rate": 6.337187326374285e-06, "loss": 0.0285, "step": 85670 }, { "epoch": 1.315018033919116, "grad_norm": 0.3963349461555481, "learning_rate": 6.334694884465442e-06, "loss": 0.0366, "step": 85680 }, { "epoch": 1.315171514081805, "grad_norm": 0.3780421018600464, "learning_rate": 6.332202705564842e-06, "loss": 0.038, "step": 85690 }, { "epoch": 1.3153249942444938, "grad_norm": 0.44257786870002747, "learning_rate": 6.3297107898513196e-06, "loss": 0.0277, "step": 85700 }, { "epoch": 1.3154784744071828, "grad_norm": 0.3103559613227844, "learning_rate": 6.327219137503684e-06, "loss": 0.0277, "step": 85710 }, { "epoch": 1.3156319545698718, "grad_norm": 0.4959155023097992, "learning_rate": 6.324727748700725e-06, "loss": 0.0265, "step": 85720 }, { "epoch": 1.3157854347325608, "grad_norm": 0.3037033677101135, "learning_rate": 6.322236623621223e-06, "loss": 0.0333, "step": 85730 }, { "epoch": 1.3159389148952498, "grad_norm": 0.3666764199733734, "learning_rate": 6.319745762443921e-06, "loss": 0.0247, "step": 85740 }, { "epoch": 1.3160923950579386, "grad_norm": 0.26612186431884766, "learning_rate": 6.3172551653475575e-06, "loss": 0.0297, "step": 85750 }, { "epoch": 1.3162458752206279, "grad_norm": 0.42608052492141724, "learning_rate": 6.31476483251085e-06, "loss": 0.0277, "step": 85760 }, { "epoch": 1.3163993553833166, "grad_norm": 0.3917756676673889, "learning_rate": 6.312274764112491e-06, "loss": 0.0316, "step": 85770 }, { "epoch": 1.3165528355460057, "grad_norm": 0.33476752042770386, "learning_rate": 6.309784960331168e-06, "loss": 0.0365, "step": 85780 }, { "epoch": 1.3167063157086947, "grad_norm": 0.3166705071926117, "learning_rate": 6.3072954213455276e-06, "loss": 0.0219, "step": 85790 }, { "epoch": 1.3168597958713837, "grad_norm": 0.49059659242630005, "learning_rate": 6.304806147334217e-06, "loss": 0.0346, "step": 85800 }, { "epoch": 1.3170132760340727, "grad_norm": 0.3948754370212555, "learning_rate": 6.3023171384758545e-06, "loss": 0.0291, "step": 85810 }, { "epoch": 1.3171667561967615, "grad_norm": 0.39233317971229553, "learning_rate": 6.299828394949043e-06, "loss": 0.0324, "step": 85820 }, { "epoch": 1.3173202363594505, "grad_norm": 0.2918206751346588, "learning_rate": 6.297339916932367e-06, "loss": 0.0234, "step": 85830 }, { "epoch": 1.3174737165221395, "grad_norm": 0.3145185708999634, "learning_rate": 6.294851704604383e-06, "loss": 0.0268, "step": 85840 }, { "epoch": 1.3176271966848285, "grad_norm": 0.4568954408168793, "learning_rate": 6.2923637581436425e-06, "loss": 0.0295, "step": 85850 }, { "epoch": 1.3177806768475175, "grad_norm": 0.206974595785141, "learning_rate": 6.289876077728667e-06, "loss": 0.0265, "step": 85860 }, { "epoch": 1.3179341570102063, "grad_norm": 0.45822712779045105, "learning_rate": 6.287388663537968e-06, "loss": 0.0288, "step": 85870 }, { "epoch": 1.3180876371728953, "grad_norm": 0.3633142411708832, "learning_rate": 6.28490151575003e-06, "loss": 0.0243, "step": 85880 }, { "epoch": 1.3182411173355844, "grad_norm": 0.48094654083251953, "learning_rate": 6.282414634543317e-06, "loss": 0.0262, "step": 85890 }, { "epoch": 1.3183945974982734, "grad_norm": 0.43206098675727844, "learning_rate": 6.279928020096284e-06, "loss": 0.0247, "step": 85900 }, { "epoch": 1.3185480776609624, "grad_norm": 0.42752155661582947, "learning_rate": 6.277441672587357e-06, "loss": 0.0388, "step": 85910 }, { "epoch": 1.3187015578236512, "grad_norm": 0.3372863233089447, "learning_rate": 6.274955592194951e-06, "loss": 0.0327, "step": 85920 }, { "epoch": 1.3188550379863402, "grad_norm": 0.38852325081825256, "learning_rate": 6.2724697790974565e-06, "loss": 0.0226, "step": 85930 }, { "epoch": 1.3190085181490292, "grad_norm": 0.24854615330696106, "learning_rate": 6.26998423347324e-06, "loss": 0.0271, "step": 85940 }, { "epoch": 1.3191619983117182, "grad_norm": 0.3822893798351288, "learning_rate": 6.267498955500662e-06, "loss": 0.0349, "step": 85950 }, { "epoch": 1.3193154784744072, "grad_norm": 0.27142199873924255, "learning_rate": 6.26501394535805e-06, "loss": 0.027, "step": 85960 }, { "epoch": 1.3194689586370962, "grad_norm": 0.47511935234069824, "learning_rate": 6.262529203223726e-06, "loss": 0.0237, "step": 85970 }, { "epoch": 1.3196224387997852, "grad_norm": 0.4534223675727844, "learning_rate": 6.260044729275985e-06, "loss": 0.0335, "step": 85980 }, { "epoch": 1.319775918962474, "grad_norm": 0.5348135828971863, "learning_rate": 6.257560523693095e-06, "loss": 0.0336, "step": 85990 }, { "epoch": 1.319929399125163, "grad_norm": 0.376230925321579, "learning_rate": 6.255076586653322e-06, "loss": 0.0252, "step": 86000 }, { "epoch": 1.320082879287852, "grad_norm": 0.4784122705459595, "learning_rate": 6.2525929183348965e-06, "loss": 0.0283, "step": 86010 }, { "epoch": 1.320236359450541, "grad_norm": 0.46751266717910767, "learning_rate": 6.250109518916043e-06, "loss": 0.0247, "step": 86020 }, { "epoch": 1.32038983961323, "grad_norm": 0.21575568616390228, "learning_rate": 6.2476263885749614e-06, "loss": 0.0225, "step": 86030 }, { "epoch": 1.3205433197759189, "grad_norm": 0.4348372519016266, "learning_rate": 6.245143527489825e-06, "loss": 0.0298, "step": 86040 }, { "epoch": 1.320696799938608, "grad_norm": 0.2813442349433899, "learning_rate": 6.242660935838798e-06, "loss": 0.0246, "step": 86050 }, { "epoch": 1.320850280101297, "grad_norm": 0.4060543477535248, "learning_rate": 6.240178613800023e-06, "loss": 0.032, "step": 86060 }, { "epoch": 1.321003760263986, "grad_norm": 0.43853360414505005, "learning_rate": 6.23769656155162e-06, "loss": 0.0244, "step": 86070 }, { "epoch": 1.321157240426675, "grad_norm": 0.4921225607395172, "learning_rate": 6.2352147792716955e-06, "loss": 0.0266, "step": 86080 }, { "epoch": 1.3213107205893637, "grad_norm": 0.3845236301422119, "learning_rate": 6.232733267138328e-06, "loss": 0.0258, "step": 86090 }, { "epoch": 1.3214642007520527, "grad_norm": 0.37264978885650635, "learning_rate": 6.230252025329581e-06, "loss": 0.0306, "step": 86100 }, { "epoch": 1.3216176809147417, "grad_norm": 0.49906057119369507, "learning_rate": 6.227771054023501e-06, "loss": 0.0318, "step": 86110 }, { "epoch": 1.3217711610774308, "grad_norm": 0.512341320514679, "learning_rate": 6.225290353398113e-06, "loss": 0.0295, "step": 86120 }, { "epoch": 1.3219246412401198, "grad_norm": 0.48116323351860046, "learning_rate": 6.222809923631425e-06, "loss": 0.0261, "step": 86130 }, { "epoch": 1.3220781214028086, "grad_norm": 0.5350013971328735, "learning_rate": 6.220329764901418e-06, "loss": 0.0323, "step": 86140 }, { "epoch": 1.3222316015654978, "grad_norm": 0.3508281409740448, "learning_rate": 6.217849877386061e-06, "loss": 0.0209, "step": 86150 }, { "epoch": 1.3223850817281866, "grad_norm": 0.28217148780822754, "learning_rate": 6.215370261263303e-06, "loss": 0.0268, "step": 86160 }, { "epoch": 1.3225385618908756, "grad_norm": 0.4535939395427704, "learning_rate": 6.212890916711066e-06, "loss": 0.034, "step": 86170 }, { "epoch": 1.3226920420535646, "grad_norm": 0.32996100187301636, "learning_rate": 6.210411843907267e-06, "loss": 0.0252, "step": 86180 }, { "epoch": 1.3228455222162536, "grad_norm": 0.30439668893814087, "learning_rate": 6.20793304302979e-06, "loss": 0.0231, "step": 86190 }, { "epoch": 1.3229990023789426, "grad_norm": 0.3339299261569977, "learning_rate": 6.2054545142565e-06, "loss": 0.0255, "step": 86200 }, { "epoch": 1.3231524825416314, "grad_norm": 0.5551919937133789, "learning_rate": 6.202976257765254e-06, "loss": 0.0275, "step": 86210 }, { "epoch": 1.3233059627043204, "grad_norm": 0.34730619192123413, "learning_rate": 6.200498273733877e-06, "loss": 0.0265, "step": 86220 }, { "epoch": 1.3234594428670095, "grad_norm": 0.43284064531326294, "learning_rate": 6.198020562340187e-06, "loss": 0.0262, "step": 86230 }, { "epoch": 1.3236129230296985, "grad_norm": 0.499962717294693, "learning_rate": 6.195543123761966e-06, "loss": 0.0259, "step": 86240 }, { "epoch": 1.3237664031923875, "grad_norm": 0.2805246412754059, "learning_rate": 6.193065958176989e-06, "loss": 0.0237, "step": 86250 }, { "epoch": 1.3239198833550763, "grad_norm": 0.592346727848053, "learning_rate": 6.190589065763011e-06, "loss": 0.0238, "step": 86260 }, { "epoch": 1.3240733635177653, "grad_norm": 0.33498212695121765, "learning_rate": 6.188112446697758e-06, "loss": 0.0245, "step": 86270 }, { "epoch": 1.3242268436804543, "grad_norm": 0.26417428255081177, "learning_rate": 6.185636101158952e-06, "loss": 0.0266, "step": 86280 }, { "epoch": 1.3243803238431433, "grad_norm": 0.349211722612381, "learning_rate": 6.1831600293242765e-06, "loss": 0.0352, "step": 86290 }, { "epoch": 1.3245338040058323, "grad_norm": 0.3124637007713318, "learning_rate": 6.180684231371408e-06, "loss": 0.0256, "step": 86300 }, { "epoch": 1.324687284168521, "grad_norm": 0.502000093460083, "learning_rate": 6.178208707478003e-06, "loss": 0.0325, "step": 86310 }, { "epoch": 1.3248407643312101, "grad_norm": 0.23944947123527527, "learning_rate": 6.175733457821691e-06, "loss": 0.0253, "step": 86320 }, { "epoch": 1.3249942444938991, "grad_norm": 0.48349931836128235, "learning_rate": 6.1732584825800945e-06, "loss": 0.033, "step": 86330 }, { "epoch": 1.3251477246565881, "grad_norm": 0.3499644696712494, "learning_rate": 6.170783781930798e-06, "loss": 0.0232, "step": 86340 }, { "epoch": 1.3253012048192772, "grad_norm": 0.6063762903213501, "learning_rate": 6.168309356051384e-06, "loss": 0.0373, "step": 86350 }, { "epoch": 1.325454684981966, "grad_norm": 0.3673337399959564, "learning_rate": 6.165835205119404e-06, "loss": 0.0318, "step": 86360 }, { "epoch": 1.3256081651446552, "grad_norm": 0.279191792011261, "learning_rate": 6.163361329312395e-06, "loss": 0.027, "step": 86370 }, { "epoch": 1.325761645307344, "grad_norm": 0.30301380157470703, "learning_rate": 6.1608877288078764e-06, "loss": 0.0327, "step": 86380 }, { "epoch": 1.325915125470033, "grad_norm": 0.3608494997024536, "learning_rate": 6.158414403783335e-06, "loss": 0.0213, "step": 86390 }, { "epoch": 1.326068605632722, "grad_norm": 0.3847831189632416, "learning_rate": 6.155941354416255e-06, "loss": 0.0255, "step": 86400 }, { "epoch": 1.326222085795411, "grad_norm": 0.43752074241638184, "learning_rate": 6.15346858088409e-06, "loss": 0.0339, "step": 86410 }, { "epoch": 1.3263755659581, "grad_norm": 0.31915581226348877, "learning_rate": 6.150996083364278e-06, "loss": 0.027, "step": 86420 }, { "epoch": 1.3265290461207888, "grad_norm": 0.3552444279193878, "learning_rate": 6.148523862034237e-06, "loss": 0.0266, "step": 86430 }, { "epoch": 1.3266825262834778, "grad_norm": 0.3427118957042694, "learning_rate": 6.14605191707136e-06, "loss": 0.0296, "step": 86440 }, { "epoch": 1.3268360064461668, "grad_norm": 0.47714659571647644, "learning_rate": 6.143580248653027e-06, "loss": 0.0271, "step": 86450 }, { "epoch": 1.3269894866088559, "grad_norm": 0.38297128677368164, "learning_rate": 6.1411088569565944e-06, "loss": 0.0275, "step": 86460 }, { "epoch": 1.3271429667715449, "grad_norm": 0.44085636734962463, "learning_rate": 6.138637742159404e-06, "loss": 0.0262, "step": 86470 }, { "epoch": 1.3272964469342337, "grad_norm": 0.33571311831474304, "learning_rate": 6.13616690443877e-06, "loss": 0.0262, "step": 86480 }, { "epoch": 1.3274499270969227, "grad_norm": 0.2885722815990448, "learning_rate": 6.133696343971987e-06, "loss": 0.0252, "step": 86490 }, { "epoch": 1.3276034072596117, "grad_norm": 0.25645217299461365, "learning_rate": 6.13122606093634e-06, "loss": 0.0231, "step": 86500 }, { "epoch": 1.3277568874223007, "grad_norm": 0.3586161136627197, "learning_rate": 6.128756055509079e-06, "loss": 0.0298, "step": 86510 }, { "epoch": 1.3279103675849897, "grad_norm": 0.3744494616985321, "learning_rate": 6.126286327867449e-06, "loss": 0.0266, "step": 86520 }, { "epoch": 1.3280638477476785, "grad_norm": 0.27274438738822937, "learning_rate": 6.123816878188668e-06, "loss": 0.0228, "step": 86530 }, { "epoch": 1.3282173279103675, "grad_norm": 0.38421231508255005, "learning_rate": 6.121347706649927e-06, "loss": 0.033, "step": 86540 }, { "epoch": 1.3283708080730565, "grad_norm": 0.40954655408859253, "learning_rate": 6.118878813428412e-06, "loss": 0.0229, "step": 86550 }, { "epoch": 1.3285242882357455, "grad_norm": 0.25593364238739014, "learning_rate": 6.116410198701278e-06, "loss": 0.0261, "step": 86560 }, { "epoch": 1.3286777683984345, "grad_norm": 0.46385201811790466, "learning_rate": 6.113941862645665e-06, "loss": 0.0279, "step": 86570 }, { "epoch": 1.3288312485611236, "grad_norm": 0.3974671959877014, "learning_rate": 6.1114738054386915e-06, "loss": 0.0326, "step": 86580 }, { "epoch": 1.3289847287238126, "grad_norm": 0.5479927062988281, "learning_rate": 6.109006027257452e-06, "loss": 0.0248, "step": 86590 }, { "epoch": 1.3291382088865014, "grad_norm": 0.3357169032096863, "learning_rate": 6.106538528279024e-06, "loss": 0.0336, "step": 86600 }, { "epoch": 1.3292916890491904, "grad_norm": 0.34232792258262634, "learning_rate": 6.104071308680475e-06, "loss": 0.0265, "step": 86610 }, { "epoch": 1.3294451692118794, "grad_norm": 0.4960636794567108, "learning_rate": 6.101604368638834e-06, "loss": 0.0296, "step": 86620 }, { "epoch": 1.3295986493745684, "grad_norm": 0.4822627604007721, "learning_rate": 6.099137708331125e-06, "loss": 0.031, "step": 86630 }, { "epoch": 1.3297521295372574, "grad_norm": 0.33264005184173584, "learning_rate": 6.096671327934343e-06, "loss": 0.0257, "step": 86640 }, { "epoch": 1.3299056096999462, "grad_norm": 0.3319410979747772, "learning_rate": 6.094205227625465e-06, "loss": 0.0226, "step": 86650 }, { "epoch": 1.3300590898626352, "grad_norm": 0.6664083003997803, "learning_rate": 6.091739407581452e-06, "loss": 0.0339, "step": 86660 }, { "epoch": 1.3302125700253242, "grad_norm": 0.3154394328594208, "learning_rate": 6.0892738679792375e-06, "loss": 0.0293, "step": 86670 }, { "epoch": 1.3303660501880132, "grad_norm": 0.2960143983364105, "learning_rate": 6.086808608995747e-06, "loss": 0.0275, "step": 86680 }, { "epoch": 1.3305195303507023, "grad_norm": 0.25268977880477905, "learning_rate": 6.084343630807871e-06, "loss": 0.0227, "step": 86690 }, { "epoch": 1.330673010513391, "grad_norm": 0.37784236669540405, "learning_rate": 6.081878933592487e-06, "loss": 0.0313, "step": 86700 }, { "epoch": 1.33082649067608, "grad_norm": 0.3572799563407898, "learning_rate": 6.079414517526455e-06, "loss": 0.0214, "step": 86710 }, { "epoch": 1.330979970838769, "grad_norm": 0.5132676959037781, "learning_rate": 6.076950382786611e-06, "loss": 0.0301, "step": 86720 }, { "epoch": 1.331133451001458, "grad_norm": 0.4293893277645111, "learning_rate": 6.074486529549775e-06, "loss": 0.0264, "step": 86730 }, { "epoch": 1.331286931164147, "grad_norm": 0.3537499010562897, "learning_rate": 6.0720229579927385e-06, "loss": 0.0312, "step": 86740 }, { "epoch": 1.3314404113268359, "grad_norm": 0.37197020649909973, "learning_rate": 6.069559668292277e-06, "loss": 0.0234, "step": 86750 }, { "epoch": 1.3315938914895251, "grad_norm": 0.3968474566936493, "learning_rate": 6.067096660625154e-06, "loss": 0.0219, "step": 86760 }, { "epoch": 1.331747371652214, "grad_norm": 0.4890451431274414, "learning_rate": 6.064633935168098e-06, "loss": 0.0359, "step": 86770 }, { "epoch": 1.331900851814903, "grad_norm": 0.29145321249961853, "learning_rate": 6.062171492097833e-06, "loss": 0.0272, "step": 86780 }, { "epoch": 1.332054331977592, "grad_norm": 0.34175828099250793, "learning_rate": 6.059709331591046e-06, "loss": 0.0246, "step": 86790 }, { "epoch": 1.332207812140281, "grad_norm": 0.3570821285247803, "learning_rate": 6.057247453824415e-06, "loss": 0.0285, "step": 86800 }, { "epoch": 1.33236129230297, "grad_norm": 0.6012244820594788, "learning_rate": 6.054785858974599e-06, "loss": 0.0284, "step": 86810 }, { "epoch": 1.3325147724656587, "grad_norm": 0.30258288979530334, "learning_rate": 6.052324547218226e-06, "loss": 0.0283, "step": 86820 }, { "epoch": 1.3326682526283478, "grad_norm": 0.3917612135410309, "learning_rate": 6.049863518731918e-06, "loss": 0.0272, "step": 86830 }, { "epoch": 1.3328217327910368, "grad_norm": 0.42182421684265137, "learning_rate": 6.047402773692261e-06, "loss": 0.0283, "step": 86840 }, { "epoch": 1.3329752129537258, "grad_norm": 0.4777458608150482, "learning_rate": 6.0449423122758326e-06, "loss": 0.0241, "step": 86850 }, { "epoch": 1.3331286931164148, "grad_norm": 0.2718315124511719, "learning_rate": 6.042482134659185e-06, "loss": 0.0233, "step": 86860 }, { "epoch": 1.3332821732791036, "grad_norm": 0.43323853611946106, "learning_rate": 6.040022241018853e-06, "loss": 0.0309, "step": 86870 }, { "epoch": 1.3334356534417926, "grad_norm": 0.3766447901725769, "learning_rate": 6.03756263153135e-06, "loss": 0.0356, "step": 86880 }, { "epoch": 1.3335891336044816, "grad_norm": 0.34990444779396057, "learning_rate": 6.035103306373162e-06, "loss": 0.0336, "step": 86890 }, { "epoch": 1.3337426137671706, "grad_norm": 0.33887940645217896, "learning_rate": 6.032644265720767e-06, "loss": 0.029, "step": 86900 }, { "epoch": 1.3338960939298596, "grad_norm": 0.38455843925476074, "learning_rate": 6.030185509750612e-06, "loss": 0.0248, "step": 86910 }, { "epoch": 1.3340495740925484, "grad_norm": 0.46454116702079773, "learning_rate": 6.027727038639134e-06, "loss": 0.0278, "step": 86920 }, { "epoch": 1.3342030542552374, "grad_norm": 0.294422447681427, "learning_rate": 6.02526885256274e-06, "loss": 0.0264, "step": 86930 }, { "epoch": 1.3343565344179265, "grad_norm": 0.42979156970977783, "learning_rate": 6.022810951697815e-06, "loss": 0.0285, "step": 86940 }, { "epoch": 1.3345100145806155, "grad_norm": 0.3961469531059265, "learning_rate": 6.020353336220737e-06, "loss": 0.0335, "step": 86950 }, { "epoch": 1.3346634947433045, "grad_norm": 0.3464560806751251, "learning_rate": 6.017896006307847e-06, "loss": 0.0214, "step": 86960 }, { "epoch": 1.3348169749059933, "grad_norm": 0.4553970396518707, "learning_rate": 6.015438962135482e-06, "loss": 0.0311, "step": 86970 }, { "epoch": 1.3349704550686825, "grad_norm": 0.465468168258667, "learning_rate": 6.012982203879947e-06, "loss": 0.0308, "step": 86980 }, { "epoch": 1.3351239352313713, "grad_norm": 0.414063036441803, "learning_rate": 6.010525731717522e-06, "loss": 0.02, "step": 86990 }, { "epoch": 1.3352774153940603, "grad_norm": 0.3969663083553314, "learning_rate": 6.008069545824487e-06, "loss": 0.0235, "step": 87000 }, { "epoch": 1.3354308955567493, "grad_norm": 0.4832218885421753, "learning_rate": 6.005613646377076e-06, "loss": 0.03, "step": 87010 }, { "epoch": 1.3355843757194383, "grad_norm": 0.5387745499610901, "learning_rate": 6.0031580335515236e-06, "loss": 0.0244, "step": 87020 }, { "epoch": 1.3357378558821273, "grad_norm": 0.31949087977409363, "learning_rate": 6.000702707524035e-06, "loss": 0.0245, "step": 87030 }, { "epoch": 1.3358913360448161, "grad_norm": 0.38255739212036133, "learning_rate": 5.9982476684707895e-06, "loss": 0.0368, "step": 87040 }, { "epoch": 1.3360448162075051, "grad_norm": 0.26662373542785645, "learning_rate": 5.9957929165679555e-06, "loss": 0.027, "step": 87050 }, { "epoch": 1.3361982963701942, "grad_norm": 0.3527360260486603, "learning_rate": 5.993338451991673e-06, "loss": 0.0219, "step": 87060 }, { "epoch": 1.3363517765328832, "grad_norm": 0.41262122988700867, "learning_rate": 5.990884274918068e-06, "loss": 0.0266, "step": 87070 }, { "epoch": 1.3365052566955722, "grad_norm": 0.3622097373008728, "learning_rate": 5.988430385523245e-06, "loss": 0.0291, "step": 87080 }, { "epoch": 1.336658736858261, "grad_norm": 0.3576083183288574, "learning_rate": 5.985976783983278e-06, "loss": 0.026, "step": 87090 }, { "epoch": 1.33681221702095, "grad_norm": 0.3350394666194916, "learning_rate": 5.983523470474237e-06, "loss": 0.0258, "step": 87100 }, { "epoch": 1.336965697183639, "grad_norm": 0.34273532032966614, "learning_rate": 5.981070445172155e-06, "loss": 0.0233, "step": 87110 }, { "epoch": 1.337119177346328, "grad_norm": 0.5259072780609131, "learning_rate": 5.978617708253054e-06, "loss": 0.0272, "step": 87120 }, { "epoch": 1.337272657509017, "grad_norm": 0.25592726469039917, "learning_rate": 5.976165259892941e-06, "loss": 0.0272, "step": 87130 }, { "epoch": 1.3374261376717058, "grad_norm": 0.2175486832857132, "learning_rate": 5.973713100267782e-06, "loss": 0.0212, "step": 87140 }, { "epoch": 1.3375796178343948, "grad_norm": 0.31753551959991455, "learning_rate": 5.9712612295535375e-06, "loss": 0.0318, "step": 87150 }, { "epoch": 1.3377330979970838, "grad_norm": 0.37252408266067505, "learning_rate": 5.96880964792615e-06, "loss": 0.0259, "step": 87160 }, { "epoch": 1.3378865781597729, "grad_norm": 0.3549451529979706, "learning_rate": 5.96635835556153e-06, "loss": 0.027, "step": 87170 }, { "epoch": 1.3380400583224619, "grad_norm": 0.34384551644325256, "learning_rate": 5.96390735263558e-06, "loss": 0.028, "step": 87180 }, { "epoch": 1.3381935384851507, "grad_norm": 0.4494522213935852, "learning_rate": 5.961456639324166e-06, "loss": 0.0286, "step": 87190 }, { "epoch": 1.33834701864784, "grad_norm": 0.3832969665527344, "learning_rate": 5.959006215803145e-06, "loss": 0.0306, "step": 87200 }, { "epoch": 1.3385004988105287, "grad_norm": 0.42650294303894043, "learning_rate": 5.9565560822483505e-06, "loss": 0.0255, "step": 87210 }, { "epoch": 1.3386539789732177, "grad_norm": 0.24652770161628723, "learning_rate": 5.954106238835594e-06, "loss": 0.0222, "step": 87220 }, { "epoch": 1.3388074591359067, "grad_norm": 0.40801697969436646, "learning_rate": 5.951656685740671e-06, "loss": 0.0236, "step": 87230 }, { "epoch": 1.3389609392985957, "grad_norm": 0.4100348949432373, "learning_rate": 5.949207423139347e-06, "loss": 0.0286, "step": 87240 }, { "epoch": 1.3391144194612847, "grad_norm": 0.3178150951862335, "learning_rate": 5.94675845120737e-06, "loss": 0.0249, "step": 87250 }, { "epoch": 1.3392678996239735, "grad_norm": 0.42771369218826294, "learning_rate": 5.944309770120476e-06, "loss": 0.0292, "step": 87260 }, { "epoch": 1.3394213797866625, "grad_norm": 0.3031638264656067, "learning_rate": 5.941861380054366e-06, "loss": 0.0249, "step": 87270 }, { "epoch": 1.3395748599493515, "grad_norm": 0.5971754789352417, "learning_rate": 5.939413281184734e-06, "loss": 0.0298, "step": 87280 }, { "epoch": 1.3397283401120406, "grad_norm": 0.5272775888442993, "learning_rate": 5.936965473687241e-06, "loss": 0.03, "step": 87290 }, { "epoch": 1.3398818202747296, "grad_norm": 0.25682079792022705, "learning_rate": 5.934517957737531e-06, "loss": 0.0301, "step": 87300 }, { "epoch": 1.3400353004374184, "grad_norm": 0.2801879644393921, "learning_rate": 5.932070733511233e-06, "loss": 0.0217, "step": 87310 }, { "epoch": 1.3401887806001074, "grad_norm": 0.36381796002388, "learning_rate": 5.929623801183946e-06, "loss": 0.0265, "step": 87320 }, { "epoch": 1.3403422607627964, "grad_norm": 0.47171515226364136, "learning_rate": 5.92717716093126e-06, "loss": 0.0307, "step": 87330 }, { "epoch": 1.3404957409254854, "grad_norm": 0.43177682161331177, "learning_rate": 5.924730812928728e-06, "loss": 0.0317, "step": 87340 }, { "epoch": 1.3406492210881744, "grad_norm": 0.3832899034023285, "learning_rate": 5.922284757351893e-06, "loss": 0.0276, "step": 87350 }, { "epoch": 1.3408027012508632, "grad_norm": 0.42094069719314575, "learning_rate": 5.919838994376278e-06, "loss": 0.0278, "step": 87360 }, { "epoch": 1.3409561814135522, "grad_norm": 0.39930158853530884, "learning_rate": 5.917393524177376e-06, "loss": 0.0285, "step": 87370 }, { "epoch": 1.3411096615762412, "grad_norm": 0.4548403322696686, "learning_rate": 5.914948346930672e-06, "loss": 0.0294, "step": 87380 }, { "epoch": 1.3412631417389302, "grad_norm": 0.32725200057029724, "learning_rate": 5.912503462811617e-06, "loss": 0.0299, "step": 87390 }, { "epoch": 1.3414166219016193, "grad_norm": 0.3507908880710602, "learning_rate": 5.910058871995646e-06, "loss": 0.0284, "step": 87400 }, { "epoch": 1.3415701020643083, "grad_norm": 0.3272308111190796, "learning_rate": 5.907614574658174e-06, "loss": 0.031, "step": 87410 }, { "epoch": 1.3417235822269973, "grad_norm": 0.3590519428253174, "learning_rate": 5.9051705709745965e-06, "loss": 0.027, "step": 87420 }, { "epoch": 1.341877062389686, "grad_norm": 0.21810698509216309, "learning_rate": 5.902726861120289e-06, "loss": 0.0277, "step": 87430 }, { "epoch": 1.342030542552375, "grad_norm": 0.4749332666397095, "learning_rate": 5.900283445270594e-06, "loss": 0.0227, "step": 87440 }, { "epoch": 1.342184022715064, "grad_norm": 0.49197033047676086, "learning_rate": 5.89784032360085e-06, "loss": 0.0255, "step": 87450 }, { "epoch": 1.342337502877753, "grad_norm": 0.23079992830753326, "learning_rate": 5.895397496286358e-06, "loss": 0.0256, "step": 87460 }, { "epoch": 1.3424909830404421, "grad_norm": 0.40923094749450684, "learning_rate": 5.892954963502413e-06, "loss": 0.0282, "step": 87470 }, { "epoch": 1.342644463203131, "grad_norm": 0.28714489936828613, "learning_rate": 5.890512725424282e-06, "loss": 0.0304, "step": 87480 }, { "epoch": 1.34279794336582, "grad_norm": 0.42717015743255615, "learning_rate": 5.8880707822272036e-06, "loss": 0.0232, "step": 87490 }, { "epoch": 1.342951423528509, "grad_norm": 0.4133216142654419, "learning_rate": 5.885629134086408e-06, "loss": 0.0222, "step": 87500 }, { "epoch": 1.343104903691198, "grad_norm": 0.33331114053726196, "learning_rate": 5.883187781177095e-06, "loss": 0.0276, "step": 87510 }, { "epoch": 1.343258383853887, "grad_norm": 0.3960614800453186, "learning_rate": 5.8807467236744505e-06, "loss": 0.0296, "step": 87520 }, { "epoch": 1.3434118640165758, "grad_norm": 0.30959343910217285, "learning_rate": 5.878305961753637e-06, "loss": 0.0251, "step": 87530 }, { "epoch": 1.3435653441792648, "grad_norm": 0.3890429437160492, "learning_rate": 5.8758654955897855e-06, "loss": 0.0261, "step": 87540 }, { "epoch": 1.3437188243419538, "grad_norm": 0.39522191882133484, "learning_rate": 5.873425325358022e-06, "loss": 0.0299, "step": 87550 }, { "epoch": 1.3438723045046428, "grad_norm": 0.24817641079425812, "learning_rate": 5.87098545123344e-06, "loss": 0.0279, "step": 87560 }, { "epoch": 1.3440257846673318, "grad_norm": 0.6775332093238831, "learning_rate": 5.868545873391118e-06, "loss": 0.0312, "step": 87570 }, { "epoch": 1.3441792648300206, "grad_norm": 0.4557536244392395, "learning_rate": 5.866106592006114e-06, "loss": 0.0208, "step": 87580 }, { "epoch": 1.3443327449927098, "grad_norm": 0.3450053334236145, "learning_rate": 5.863667607253452e-06, "loss": 0.0314, "step": 87590 }, { "epoch": 1.3444862251553986, "grad_norm": 0.4674481153488159, "learning_rate": 5.861228919308152e-06, "loss": 0.0332, "step": 87600 }, { "epoch": 1.3446397053180876, "grad_norm": 0.2605952024459839, "learning_rate": 5.8587905283452e-06, "loss": 0.0262, "step": 87610 }, { "epoch": 1.3447931854807766, "grad_norm": 0.37740179896354675, "learning_rate": 5.85635243453957e-06, "loss": 0.0264, "step": 87620 }, { "epoch": 1.3449466656434657, "grad_norm": 0.2760409414768219, "learning_rate": 5.853914638066212e-06, "loss": 0.0328, "step": 87630 }, { "epoch": 1.3451001458061547, "grad_norm": 0.3977777361869812, "learning_rate": 5.851477139100044e-06, "loss": 0.0269, "step": 87640 }, { "epoch": 1.3452536259688435, "grad_norm": 0.4486676752567291, "learning_rate": 5.849039937815978e-06, "loss": 0.0266, "step": 87650 }, { "epoch": 1.3454071061315325, "grad_norm": 0.3150281310081482, "learning_rate": 5.8466030343888955e-06, "loss": 0.0239, "step": 87660 }, { "epoch": 1.3455605862942215, "grad_norm": 0.4518086314201355, "learning_rate": 5.844166428993665e-06, "loss": 0.0311, "step": 87670 }, { "epoch": 1.3457140664569105, "grad_norm": 0.40841588377952576, "learning_rate": 5.841730121805122e-06, "loss": 0.0187, "step": 87680 }, { "epoch": 1.3458675466195995, "grad_norm": 0.3350902199745178, "learning_rate": 5.8392941129980885e-06, "loss": 0.0327, "step": 87690 }, { "epoch": 1.3460210267822883, "grad_norm": 0.3692232370376587, "learning_rate": 5.8368584027473605e-06, "loss": 0.0284, "step": 87700 }, { "epoch": 1.3461745069449773, "grad_norm": 0.554621160030365, "learning_rate": 5.83442299122772e-06, "loss": 0.0332, "step": 87710 }, { "epoch": 1.3463279871076663, "grad_norm": 0.3025486171245575, "learning_rate": 5.831987878613922e-06, "loss": 0.0228, "step": 87720 }, { "epoch": 1.3464814672703553, "grad_norm": 0.18986083567142487, "learning_rate": 5.8295530650807e-06, "loss": 0.025, "step": 87730 }, { "epoch": 1.3466349474330443, "grad_norm": 0.40779027342796326, "learning_rate": 5.827118550802764e-06, "loss": 0.0224, "step": 87740 }, { "epoch": 1.3467884275957331, "grad_norm": 0.3487820029258728, "learning_rate": 5.8246843359548085e-06, "loss": 0.0343, "step": 87750 }, { "epoch": 1.3469419077584222, "grad_norm": 0.34106138348579407, "learning_rate": 5.8222504207114995e-06, "loss": 0.0256, "step": 87760 }, { "epoch": 1.3470953879211112, "grad_norm": 0.327217698097229, "learning_rate": 5.819816805247497e-06, "loss": 0.0249, "step": 87770 }, { "epoch": 1.3472488680838002, "grad_norm": 0.3037319779396057, "learning_rate": 5.817383489737415e-06, "loss": 0.0242, "step": 87780 }, { "epoch": 1.3474023482464892, "grad_norm": 0.5483748316764832, "learning_rate": 5.81495047435586e-06, "loss": 0.0352, "step": 87790 }, { "epoch": 1.347555828409178, "grad_norm": 0.2942506968975067, "learning_rate": 5.8125177592774215e-06, "loss": 0.0314, "step": 87800 }, { "epoch": 1.3477093085718672, "grad_norm": 0.31947463750839233, "learning_rate": 5.8100853446766614e-06, "loss": 0.0287, "step": 87810 }, { "epoch": 1.347862788734556, "grad_norm": 0.3291581869125366, "learning_rate": 5.807653230728118e-06, "loss": 0.0267, "step": 87820 }, { "epoch": 1.348016268897245, "grad_norm": 0.3912237286567688, "learning_rate": 5.805221417606311e-06, "loss": 0.0263, "step": 87830 }, { "epoch": 1.348169749059934, "grad_norm": 0.2590848207473755, "learning_rate": 5.802789905485739e-06, "loss": 0.0216, "step": 87840 }, { "epoch": 1.348323229222623, "grad_norm": 0.35186460614204407, "learning_rate": 5.800358694540878e-06, "loss": 0.0267, "step": 87850 }, { "epoch": 1.348476709385312, "grad_norm": 0.47050735354423523, "learning_rate": 5.797927784946175e-06, "loss": 0.0254, "step": 87860 }, { "epoch": 1.3486301895480008, "grad_norm": 0.5364821553230286, "learning_rate": 5.795497176876078e-06, "loss": 0.034, "step": 87870 }, { "epoch": 1.3487836697106899, "grad_norm": 0.3834889233112335, "learning_rate": 5.793066870504987e-06, "loss": 0.0273, "step": 87880 }, { "epoch": 1.3489371498733789, "grad_norm": 0.47962838411331177, "learning_rate": 5.790636866007288e-06, "loss": 0.0229, "step": 87890 }, { "epoch": 1.3490906300360679, "grad_norm": 0.3662002980709076, "learning_rate": 5.788207163557359e-06, "loss": 0.0239, "step": 87900 }, { "epoch": 1.349244110198757, "grad_norm": 0.3137710392475128, "learning_rate": 5.785777763329542e-06, "loss": 0.0268, "step": 87910 }, { "epoch": 1.3493975903614457, "grad_norm": 0.4200621247291565, "learning_rate": 5.78334866549816e-06, "loss": 0.0289, "step": 87920 }, { "epoch": 1.3495510705241347, "grad_norm": 0.5573728084564209, "learning_rate": 5.78091987023752e-06, "loss": 0.0286, "step": 87930 }, { "epoch": 1.3497045506868237, "grad_norm": 0.5265967845916748, "learning_rate": 5.778491377721898e-06, "loss": 0.037, "step": 87940 }, { "epoch": 1.3498580308495127, "grad_norm": 0.4543044865131378, "learning_rate": 5.776063188125553e-06, "loss": 0.0243, "step": 87950 }, { "epoch": 1.3500115110122017, "grad_norm": 0.3183438181877136, "learning_rate": 5.773635301622726e-06, "loss": 0.0275, "step": 87960 }, { "epoch": 1.3501649911748905, "grad_norm": 0.303112655878067, "learning_rate": 5.771207718387639e-06, "loss": 0.0203, "step": 87970 }, { "epoch": 1.3503184713375795, "grad_norm": 0.39721009135246277, "learning_rate": 5.768780438594469e-06, "loss": 0.027, "step": 87980 }, { "epoch": 1.3504719515002686, "grad_norm": 0.299481064081192, "learning_rate": 5.766353462417402e-06, "loss": 0.0206, "step": 87990 }, { "epoch": 1.3506254316629576, "grad_norm": 0.3330938518047333, "learning_rate": 5.7639267900305855e-06, "loss": 0.0332, "step": 88000 }, { "epoch": 1.3507789118256466, "grad_norm": 0.5016001462936401, "learning_rate": 5.761500421608148e-06, "loss": 0.0192, "step": 88010 }, { "epoch": 1.3509323919883356, "grad_norm": 0.3362727463245392, "learning_rate": 5.759074357324195e-06, "loss": 0.0208, "step": 88020 }, { "epoch": 1.3510858721510246, "grad_norm": 0.4504571259021759, "learning_rate": 5.756648597352812e-06, "loss": 0.0318, "step": 88030 }, { "epoch": 1.3512393523137134, "grad_norm": 0.2509891986846924, "learning_rate": 5.754223141868063e-06, "loss": 0.024, "step": 88040 }, { "epoch": 1.3513928324764024, "grad_norm": 0.4053315222263336, "learning_rate": 5.751797991043984e-06, "loss": 0.0274, "step": 88050 }, { "epoch": 1.3515463126390914, "grad_norm": 0.34064820408821106, "learning_rate": 5.749373145054604e-06, "loss": 0.0268, "step": 88060 }, { "epoch": 1.3516997928017804, "grad_norm": 0.3062223494052887, "learning_rate": 5.746948604073921e-06, "loss": 0.025, "step": 88070 }, { "epoch": 1.3518532729644694, "grad_norm": 0.45540085434913635, "learning_rate": 5.7445243682758965e-06, "loss": 0.0278, "step": 88080 }, { "epoch": 1.3520067531271582, "grad_norm": 0.30777499079704285, "learning_rate": 5.742100437834499e-06, "loss": 0.0261, "step": 88090 }, { "epoch": 1.3521602332898472, "grad_norm": 0.28549808263778687, "learning_rate": 5.7396768129236535e-06, "loss": 0.0221, "step": 88100 }, { "epoch": 1.3523137134525363, "grad_norm": 0.3351326882839203, "learning_rate": 5.737253493717274e-06, "loss": 0.0217, "step": 88110 }, { "epoch": 1.3524671936152253, "grad_norm": 0.4674816131591797, "learning_rate": 5.734830480389245e-06, "loss": 0.0304, "step": 88120 }, { "epoch": 1.3526206737779143, "grad_norm": 0.37955155968666077, "learning_rate": 5.732407773113435e-06, "loss": 0.0255, "step": 88130 }, { "epoch": 1.352774153940603, "grad_norm": 0.4187636077404022, "learning_rate": 5.729985372063687e-06, "loss": 0.0227, "step": 88140 }, { "epoch": 1.352927634103292, "grad_norm": 0.4556178152561188, "learning_rate": 5.727563277413819e-06, "loss": 0.0308, "step": 88150 }, { "epoch": 1.353081114265981, "grad_norm": 0.3008926212787628, "learning_rate": 5.725141489337639e-06, "loss": 0.0275, "step": 88160 }, { "epoch": 1.3532345944286701, "grad_norm": 0.4120533764362335, "learning_rate": 5.722720008008926e-06, "loss": 0.0249, "step": 88170 }, { "epoch": 1.3533880745913591, "grad_norm": 0.41001930832862854, "learning_rate": 5.720298833601425e-06, "loss": 0.0293, "step": 88180 }, { "epoch": 1.353541554754048, "grad_norm": 0.3245674669742584, "learning_rate": 5.71787796628888e-06, "loss": 0.0304, "step": 88190 }, { "epoch": 1.3536950349167371, "grad_norm": 0.25718384981155396, "learning_rate": 5.715457406245003e-06, "loss": 0.0239, "step": 88200 }, { "epoch": 1.353848515079426, "grad_norm": 0.3482201099395752, "learning_rate": 5.713037153643476e-06, "loss": 0.0261, "step": 88210 }, { "epoch": 1.354001995242115, "grad_norm": 0.40449127554893494, "learning_rate": 5.71061720865798e-06, "loss": 0.0324, "step": 88220 }, { "epoch": 1.354155475404804, "grad_norm": 0.40873974561691284, "learning_rate": 5.708197571462151e-06, "loss": 0.03, "step": 88230 }, { "epoch": 1.354308955567493, "grad_norm": 0.34870657324790955, "learning_rate": 5.7057782422296104e-06, "loss": 0.0232, "step": 88240 }, { "epoch": 1.354462435730182, "grad_norm": 0.38512808084487915, "learning_rate": 5.70335922113397e-06, "loss": 0.0264, "step": 88250 }, { "epoch": 1.3546159158928708, "grad_norm": 0.21298620104789734, "learning_rate": 5.700940508348805e-06, "loss": 0.0204, "step": 88260 }, { "epoch": 1.3547693960555598, "grad_norm": 0.3642464876174927, "learning_rate": 5.6985221040476705e-06, "loss": 0.0225, "step": 88270 }, { "epoch": 1.3549228762182488, "grad_norm": 0.4010462760925293, "learning_rate": 5.696104008404104e-06, "loss": 0.0329, "step": 88280 }, { "epoch": 1.3550763563809378, "grad_norm": 0.39377570152282715, "learning_rate": 5.693686221591621e-06, "loss": 0.0306, "step": 88290 }, { "epoch": 1.3552298365436268, "grad_norm": 0.25212669372558594, "learning_rate": 5.691268743783709e-06, "loss": 0.0239, "step": 88300 }, { "epoch": 1.3553833167063156, "grad_norm": 0.45592254400253296, "learning_rate": 5.688851575153834e-06, "loss": 0.0287, "step": 88310 }, { "epoch": 1.3555367968690046, "grad_norm": 0.37368324398994446, "learning_rate": 5.686434715875455e-06, "loss": 0.0326, "step": 88320 }, { "epoch": 1.3556902770316936, "grad_norm": 0.6565023064613342, "learning_rate": 5.684018166121985e-06, "loss": 0.0288, "step": 88330 }, { "epoch": 1.3558437571943827, "grad_norm": 0.41492268443107605, "learning_rate": 5.681601926066826e-06, "loss": 0.0243, "step": 88340 }, { "epoch": 1.3559972373570717, "grad_norm": 0.43600958585739136, "learning_rate": 5.679185995883367e-06, "loss": 0.0294, "step": 88350 }, { "epoch": 1.3561507175197605, "grad_norm": 0.47970420122146606, "learning_rate": 5.67677037574496e-06, "loss": 0.0292, "step": 88360 }, { "epoch": 1.3563041976824495, "grad_norm": 0.44436052441596985, "learning_rate": 5.674355065824942e-06, "loss": 0.0209, "step": 88370 }, { "epoch": 1.3564576778451385, "grad_norm": 0.3565708100795746, "learning_rate": 5.671940066296625e-06, "loss": 0.0273, "step": 88380 }, { "epoch": 1.3566111580078275, "grad_norm": 0.4917445480823517, "learning_rate": 5.669525377333301e-06, "loss": 0.0271, "step": 88390 }, { "epoch": 1.3567646381705165, "grad_norm": 0.4765937328338623, "learning_rate": 5.667110999108241e-06, "loss": 0.0202, "step": 88400 }, { "epoch": 1.3569181183332053, "grad_norm": 0.3914404511451721, "learning_rate": 5.664696931794684e-06, "loss": 0.0259, "step": 88410 }, { "epoch": 1.3570715984958945, "grad_norm": 0.3638537526130676, "learning_rate": 5.6622831755658684e-06, "loss": 0.0342, "step": 88420 }, { "epoch": 1.3572250786585833, "grad_norm": 0.37714096903800964, "learning_rate": 5.659869730594982e-06, "loss": 0.0272, "step": 88430 }, { "epoch": 1.3573785588212723, "grad_norm": 0.3923507630825043, "learning_rate": 5.657456597055207e-06, "loss": 0.0333, "step": 88440 }, { "epoch": 1.3575320389839614, "grad_norm": 0.2150650918483734, "learning_rate": 5.655043775119706e-06, "loss": 0.0221, "step": 88450 }, { "epoch": 1.3576855191466504, "grad_norm": 0.35069236159324646, "learning_rate": 5.652631264961613e-06, "loss": 0.0262, "step": 88460 }, { "epoch": 1.3578389993093394, "grad_norm": 0.3337412476539612, "learning_rate": 5.650219066754037e-06, "loss": 0.0281, "step": 88470 }, { "epoch": 1.3579924794720282, "grad_norm": 0.3841231167316437, "learning_rate": 5.6478071806700706e-06, "loss": 0.0373, "step": 88480 }, { "epoch": 1.3581459596347172, "grad_norm": 0.30376574397087097, "learning_rate": 5.64539560688278e-06, "loss": 0.0381, "step": 88490 }, { "epoch": 1.3582994397974062, "grad_norm": 0.503595232963562, "learning_rate": 5.642984345565208e-06, "loss": 0.0318, "step": 88500 }, { "epoch": 1.3584529199600952, "grad_norm": 0.2979937195777893, "learning_rate": 5.640573396890385e-06, "loss": 0.0272, "step": 88510 }, { "epoch": 1.3586064001227842, "grad_norm": 0.3126510977745056, "learning_rate": 5.638162761031309e-06, "loss": 0.0276, "step": 88520 }, { "epoch": 1.358759880285473, "grad_norm": 0.3726426661014557, "learning_rate": 5.635752438160951e-06, "loss": 0.027, "step": 88530 }, { "epoch": 1.358913360448162, "grad_norm": 0.3409600257873535, "learning_rate": 5.633342428452274e-06, "loss": 0.0211, "step": 88540 }, { "epoch": 1.359066840610851, "grad_norm": 0.5274688005447388, "learning_rate": 5.630932732078209e-06, "loss": 0.0252, "step": 88550 }, { "epoch": 1.35922032077354, "grad_norm": 0.3790157735347748, "learning_rate": 5.628523349211665e-06, "loss": 0.0334, "step": 88560 }, { "epoch": 1.359373800936229, "grad_norm": 0.45111769437789917, "learning_rate": 5.626114280025534e-06, "loss": 0.0265, "step": 88570 }, { "epoch": 1.3595272810989179, "grad_norm": 0.4198604226112366, "learning_rate": 5.6237055246926775e-06, "loss": 0.035, "step": 88580 }, { "epoch": 1.3596807612616069, "grad_norm": 0.44319942593574524, "learning_rate": 5.621297083385939e-06, "loss": 0.0369, "step": 88590 }, { "epoch": 1.3598342414242959, "grad_norm": 0.3471888601779938, "learning_rate": 5.618888956278137e-06, "loss": 0.0223, "step": 88600 }, { "epoch": 1.3599877215869849, "grad_norm": 0.38049906492233276, "learning_rate": 5.616481143542076e-06, "loss": 0.0242, "step": 88610 }, { "epoch": 1.360141201749674, "grad_norm": 0.3962514102458954, "learning_rate": 5.614073645350533e-06, "loss": 0.0178, "step": 88620 }, { "epoch": 1.3602946819123627, "grad_norm": 0.34649690985679626, "learning_rate": 5.6116664618762465e-06, "loss": 0.0279, "step": 88630 }, { "epoch": 1.360448162075052, "grad_norm": 0.39146119356155396, "learning_rate": 5.6092595932919605e-06, "loss": 0.0269, "step": 88640 }, { "epoch": 1.3606016422377407, "grad_norm": 0.35895857214927673, "learning_rate": 5.6068530397703765e-06, "loss": 0.0248, "step": 88650 }, { "epoch": 1.3607551224004297, "grad_norm": 0.4192361533641815, "learning_rate": 5.604446801484183e-06, "loss": 0.0312, "step": 88660 }, { "epoch": 1.3609086025631187, "grad_norm": 0.3836901783943176, "learning_rate": 5.6020408786060395e-06, "loss": 0.0285, "step": 88670 }, { "epoch": 1.3610620827258078, "grad_norm": 0.3679443597793579, "learning_rate": 5.599635271308588e-06, "loss": 0.0235, "step": 88680 }, { "epoch": 1.3612155628884968, "grad_norm": 0.43247660994529724, "learning_rate": 5.597229979764445e-06, "loss": 0.029, "step": 88690 }, { "epoch": 1.3613690430511856, "grad_norm": 0.36021265387535095, "learning_rate": 5.594825004146199e-06, "loss": 0.0302, "step": 88700 }, { "epoch": 1.3615225232138746, "grad_norm": 0.32013484835624695, "learning_rate": 5.592420344626433e-06, "loss": 0.0295, "step": 88710 }, { "epoch": 1.3616760033765636, "grad_norm": 0.2367064356803894, "learning_rate": 5.590016001377692e-06, "loss": 0.0297, "step": 88720 }, { "epoch": 1.3618294835392526, "grad_norm": 0.3605469763278961, "learning_rate": 5.587611974572495e-06, "loss": 0.0323, "step": 88730 }, { "epoch": 1.3619829637019416, "grad_norm": 0.44015800952911377, "learning_rate": 5.585208264383355e-06, "loss": 0.0244, "step": 88740 }, { "epoch": 1.3621364438646304, "grad_norm": 0.43046289682388306, "learning_rate": 5.582804870982751e-06, "loss": 0.0247, "step": 88750 }, { "epoch": 1.3622899240273194, "grad_norm": 0.3163486123085022, "learning_rate": 5.580401794543136e-06, "loss": 0.0325, "step": 88760 }, { "epoch": 1.3624434041900084, "grad_norm": 0.7151488065719604, "learning_rate": 5.577999035236956e-06, "loss": 0.0312, "step": 88770 }, { "epoch": 1.3625968843526974, "grad_norm": 0.44195082783699036, "learning_rate": 5.575596593236614e-06, "loss": 0.0283, "step": 88780 }, { "epoch": 1.3627503645153864, "grad_norm": 0.48193004727363586, "learning_rate": 5.5731944687145e-06, "loss": 0.0263, "step": 88790 }, { "epoch": 1.3629038446780752, "grad_norm": 0.500131368637085, "learning_rate": 5.57079266184299e-06, "loss": 0.0303, "step": 88800 }, { "epoch": 1.3630573248407643, "grad_norm": 0.5929569602012634, "learning_rate": 5.568391172794421e-06, "loss": 0.0381, "step": 88810 }, { "epoch": 1.3632108050034533, "grad_norm": 0.3354494869709015, "learning_rate": 5.565990001741118e-06, "loss": 0.0317, "step": 88820 }, { "epoch": 1.3633642851661423, "grad_norm": 0.32281041145324707, "learning_rate": 5.563589148855378e-06, "loss": 0.0267, "step": 88830 }, { "epoch": 1.3635177653288313, "grad_norm": 0.2752213180065155, "learning_rate": 5.5611886143094785e-06, "loss": 0.0337, "step": 88840 }, { "epoch": 1.3636712454915203, "grad_norm": 0.4213394522666931, "learning_rate": 5.558788398275672e-06, "loss": 0.0235, "step": 88850 }, { "epoch": 1.3638247256542093, "grad_norm": 0.4131079316139221, "learning_rate": 5.556388500926183e-06, "loss": 0.0254, "step": 88860 }, { "epoch": 1.363978205816898, "grad_norm": 0.618237316608429, "learning_rate": 5.5539889224332336e-06, "loss": 0.0309, "step": 88870 }, { "epoch": 1.3641316859795871, "grad_norm": 0.5213946104049683, "learning_rate": 5.551589662968995e-06, "loss": 0.0323, "step": 88880 }, { "epoch": 1.3642851661422761, "grad_norm": 0.4554915726184845, "learning_rate": 5.549190722705631e-06, "loss": 0.0294, "step": 88890 }, { "epoch": 1.3644386463049651, "grad_norm": 0.47325167059898376, "learning_rate": 5.546792101815284e-06, "loss": 0.0228, "step": 88900 }, { "epoch": 1.3645921264676542, "grad_norm": 0.2674314081668854, "learning_rate": 5.54439380047007e-06, "loss": 0.0254, "step": 88910 }, { "epoch": 1.364745606630343, "grad_norm": 0.5732851028442383, "learning_rate": 5.541995818842079e-06, "loss": 0.0265, "step": 88920 }, { "epoch": 1.364899086793032, "grad_norm": 0.3660457730293274, "learning_rate": 5.539598157103383e-06, "loss": 0.0276, "step": 88930 }, { "epoch": 1.365052566955721, "grad_norm": 0.4116060137748718, "learning_rate": 5.537200815426029e-06, "loss": 0.0282, "step": 88940 }, { "epoch": 1.36520604711841, "grad_norm": 0.2130792886018753, "learning_rate": 5.5348037939820394e-06, "loss": 0.0211, "step": 88950 }, { "epoch": 1.365359527281099, "grad_norm": 0.45327824354171753, "learning_rate": 5.532407092943412e-06, "loss": 0.0282, "step": 88960 }, { "epoch": 1.3655130074437878, "grad_norm": 0.31867122650146484, "learning_rate": 5.530010712482138e-06, "loss": 0.0275, "step": 88970 }, { "epoch": 1.3656664876064768, "grad_norm": 0.4645293354988098, "learning_rate": 5.527614652770159e-06, "loss": 0.0279, "step": 88980 }, { "epoch": 1.3658199677691658, "grad_norm": 0.5092137455940247, "learning_rate": 5.5252189139794085e-06, "loss": 0.0309, "step": 88990 }, { "epoch": 1.3659734479318548, "grad_norm": 0.2908690869808197, "learning_rate": 5.5228234962818015e-06, "loss": 0.0329, "step": 89000 }, { "epoch": 1.3661269280945438, "grad_norm": 0.3463688790798187, "learning_rate": 5.520428399849222e-06, "loss": 0.0286, "step": 89010 }, { "epoch": 1.3662804082572326, "grad_norm": 0.2620845139026642, "learning_rate": 5.518033624853532e-06, "loss": 0.0309, "step": 89020 }, { "epoch": 1.3664338884199219, "grad_norm": 0.3708989918231964, "learning_rate": 5.515639171466571e-06, "loss": 0.0275, "step": 89030 }, { "epoch": 1.3665873685826107, "grad_norm": 0.35172873735427856, "learning_rate": 5.513245039860156e-06, "loss": 0.0232, "step": 89040 }, { "epoch": 1.3667408487452997, "grad_norm": 0.3140229880809784, "learning_rate": 5.5108512302060776e-06, "loss": 0.0309, "step": 89050 }, { "epoch": 1.3668943289079887, "grad_norm": 0.2655426859855652, "learning_rate": 5.5084577426761145e-06, "loss": 0.0199, "step": 89060 }, { "epoch": 1.3670478090706777, "grad_norm": 0.32430577278137207, "learning_rate": 5.506064577442012e-06, "loss": 0.028, "step": 89070 }, { "epoch": 1.3672012892333667, "grad_norm": 0.40516138076782227, "learning_rate": 5.503671734675484e-06, "loss": 0.0188, "step": 89080 }, { "epoch": 1.3673547693960555, "grad_norm": 0.32530882954597473, "learning_rate": 5.501279214548245e-06, "loss": 0.031, "step": 89090 }, { "epoch": 1.3675082495587445, "grad_norm": 0.3619536757469177, "learning_rate": 5.4988870172319665e-06, "loss": 0.0226, "step": 89100 }, { "epoch": 1.3676617297214335, "grad_norm": 0.4507516920566559, "learning_rate": 5.496495142898306e-06, "loss": 0.0274, "step": 89110 }, { "epoch": 1.3678152098841225, "grad_norm": 0.2711454927921295, "learning_rate": 5.4941035917188934e-06, "loss": 0.0216, "step": 89120 }, { "epoch": 1.3679686900468115, "grad_norm": 0.4507875442504883, "learning_rate": 5.491712363865337e-06, "loss": 0.0235, "step": 89130 }, { "epoch": 1.3681221702095003, "grad_norm": 0.42268022894859314, "learning_rate": 5.489321459509223e-06, "loss": 0.0307, "step": 89140 }, { "epoch": 1.3682756503721893, "grad_norm": 0.6325443387031555, "learning_rate": 5.48693087882211e-06, "loss": 0.025, "step": 89150 }, { "epoch": 1.3684291305348784, "grad_norm": 0.23816636204719543, "learning_rate": 5.484540621975544e-06, "loss": 0.0281, "step": 89160 }, { "epoch": 1.3685826106975674, "grad_norm": 0.4021874666213989, "learning_rate": 5.482150689141041e-06, "loss": 0.0237, "step": 89170 }, { "epoch": 1.3687360908602564, "grad_norm": 0.3404490351676941, "learning_rate": 5.479761080490082e-06, "loss": 0.0306, "step": 89180 }, { "epoch": 1.3688895710229452, "grad_norm": 0.43286144733428955, "learning_rate": 5.477371796194147e-06, "loss": 0.0206, "step": 89190 }, { "epoch": 1.3690430511856342, "grad_norm": 0.4170428216457367, "learning_rate": 5.474982836424678e-06, "loss": 0.024, "step": 89200 }, { "epoch": 1.3691965313483232, "grad_norm": 0.37925949692726135, "learning_rate": 5.472594201353099e-06, "loss": 0.0229, "step": 89210 }, { "epoch": 1.3693500115110122, "grad_norm": 0.3950158655643463, "learning_rate": 5.470205891150808e-06, "loss": 0.0312, "step": 89220 }, { "epoch": 1.3695034916737012, "grad_norm": 0.5303067564964294, "learning_rate": 5.467817905989181e-06, "loss": 0.0277, "step": 89230 }, { "epoch": 1.36965697183639, "grad_norm": 0.42526617646217346, "learning_rate": 5.465430246039572e-06, "loss": 0.0303, "step": 89240 }, { "epoch": 1.3698104519990792, "grad_norm": 0.3978705108165741, "learning_rate": 5.463042911473304e-06, "loss": 0.0193, "step": 89250 }, { "epoch": 1.369963932161768, "grad_norm": 0.43584010004997253, "learning_rate": 5.4606559024616935e-06, "loss": 0.0241, "step": 89260 }, { "epoch": 1.370117412324457, "grad_norm": 0.5146141052246094, "learning_rate": 5.458269219176021e-06, "loss": 0.0273, "step": 89270 }, { "epoch": 1.370270892487146, "grad_norm": 0.5249451398849487, "learning_rate": 5.455882861787536e-06, "loss": 0.0416, "step": 89280 }, { "epoch": 1.370424372649835, "grad_norm": 0.40791282057762146, "learning_rate": 5.453496830467484e-06, "loss": 0.0209, "step": 89290 }, { "epoch": 1.370577852812524, "grad_norm": 0.23055624961853027, "learning_rate": 5.451111125387074e-06, "loss": 0.022, "step": 89300 }, { "epoch": 1.3707313329752129, "grad_norm": 0.3901681900024414, "learning_rate": 5.448725746717493e-06, "loss": 0.0245, "step": 89310 }, { "epoch": 1.370884813137902, "grad_norm": 0.3580596446990967, "learning_rate": 5.446340694629916e-06, "loss": 0.0358, "step": 89320 }, { "epoch": 1.371038293300591, "grad_norm": 0.4561007022857666, "learning_rate": 5.4439559692954745e-06, "loss": 0.0231, "step": 89330 }, { "epoch": 1.37119177346328, "grad_norm": 0.40334272384643555, "learning_rate": 5.441571570885288e-06, "loss": 0.0228, "step": 89340 }, { "epoch": 1.371345253625969, "grad_norm": 0.3904869556427002, "learning_rate": 5.439187499570457e-06, "loss": 0.0283, "step": 89350 }, { "epoch": 1.3714987337886577, "grad_norm": 0.382823646068573, "learning_rate": 5.436803755522053e-06, "loss": 0.0257, "step": 89360 }, { "epoch": 1.3716522139513467, "grad_norm": 0.29375413060188293, "learning_rate": 5.434420338911123e-06, "loss": 0.0229, "step": 89370 }, { "epoch": 1.3718056941140357, "grad_norm": 0.4676699936389923, "learning_rate": 5.432037249908691e-06, "loss": 0.0231, "step": 89380 }, { "epoch": 1.3719591742767248, "grad_norm": 0.5687477588653564, "learning_rate": 5.429654488685759e-06, "loss": 0.0307, "step": 89390 }, { "epoch": 1.3721126544394138, "grad_norm": 0.5858947038650513, "learning_rate": 5.427272055413302e-06, "loss": 0.0266, "step": 89400 }, { "epoch": 1.3722661346021026, "grad_norm": 0.3458300828933716, "learning_rate": 5.424889950262275e-06, "loss": 0.029, "step": 89410 }, { "epoch": 1.3724196147647916, "grad_norm": 0.3363209068775177, "learning_rate": 5.422508173403618e-06, "loss": 0.0268, "step": 89420 }, { "epoch": 1.3725730949274806, "grad_norm": 0.43490663170814514, "learning_rate": 5.4201267250082275e-06, "loss": 0.0286, "step": 89430 }, { "epoch": 1.3727265750901696, "grad_norm": 0.38988596200942993, "learning_rate": 5.417745605246987e-06, "loss": 0.0211, "step": 89440 }, { "epoch": 1.3728800552528586, "grad_norm": 0.2736121118068695, "learning_rate": 5.415364814290762e-06, "loss": 0.0241, "step": 89450 }, { "epoch": 1.3730335354155476, "grad_norm": 0.3454473912715912, "learning_rate": 5.412984352310389e-06, "loss": 0.0294, "step": 89460 }, { "epoch": 1.3731870155782366, "grad_norm": 0.30630001425743103, "learning_rate": 5.410604219476676e-06, "loss": 0.0286, "step": 89470 }, { "epoch": 1.3733404957409254, "grad_norm": 0.2430882602930069, "learning_rate": 5.408224415960417e-06, "loss": 0.0223, "step": 89480 }, { "epoch": 1.3734939759036144, "grad_norm": 0.3378355801105499, "learning_rate": 5.405844941932374e-06, "loss": 0.0243, "step": 89490 }, { "epoch": 1.3736474560663035, "grad_norm": 0.3174639344215393, "learning_rate": 5.403465797563292e-06, "loss": 0.0294, "step": 89500 }, { "epoch": 1.3738009362289925, "grad_norm": 0.2843548655509949, "learning_rate": 5.401086983023882e-06, "loss": 0.0226, "step": 89510 }, { "epoch": 1.3739544163916815, "grad_norm": 0.39359250664711, "learning_rate": 5.3987084984848524e-06, "loss": 0.0247, "step": 89520 }, { "epoch": 1.3741078965543703, "grad_norm": 0.33309227228164673, "learning_rate": 5.396330344116864e-06, "loss": 0.0252, "step": 89530 }, { "epoch": 1.3742613767170593, "grad_norm": 0.43628937005996704, "learning_rate": 5.39395252009056e-06, "loss": 0.0314, "step": 89540 }, { "epoch": 1.3744148568797483, "grad_norm": 0.2381209284067154, "learning_rate": 5.3915750265765745e-06, "loss": 0.0227, "step": 89550 }, { "epoch": 1.3745683370424373, "grad_norm": 0.39601847529411316, "learning_rate": 5.389197863745504e-06, "loss": 0.0288, "step": 89560 }, { "epoch": 1.3747218172051263, "grad_norm": 0.43927374482154846, "learning_rate": 5.386821031767921e-06, "loss": 0.0265, "step": 89570 }, { "epoch": 1.374875297367815, "grad_norm": 0.27223721146583557, "learning_rate": 5.384444530814383e-06, "loss": 0.0333, "step": 89580 }, { "epoch": 1.3750287775305041, "grad_norm": 0.5753538608551025, "learning_rate": 5.382068361055415e-06, "loss": 0.0298, "step": 89590 }, { "epoch": 1.3751822576931931, "grad_norm": 0.5808812975883484, "learning_rate": 5.3796925226615184e-06, "loss": 0.0371, "step": 89600 }, { "epoch": 1.3753357378558821, "grad_norm": 0.17799578607082367, "learning_rate": 5.377317015803183e-06, "loss": 0.026, "step": 89610 }, { "epoch": 1.3754892180185712, "grad_norm": 0.37465500831604004, "learning_rate": 5.3749418406508645e-06, "loss": 0.0301, "step": 89620 }, { "epoch": 1.37564269818126, "grad_norm": 0.3569856882095337, "learning_rate": 5.372566997374986e-06, "loss": 0.0264, "step": 89630 }, { "epoch": 1.3757961783439492, "grad_norm": 0.34775039553642273, "learning_rate": 5.370192486145968e-06, "loss": 0.025, "step": 89640 }, { "epoch": 1.375949658506638, "grad_norm": 0.39658060669898987, "learning_rate": 5.367818307134195e-06, "loss": 0.0256, "step": 89650 }, { "epoch": 1.376103138669327, "grad_norm": 0.6309138536453247, "learning_rate": 5.365444460510025e-06, "loss": 0.0233, "step": 89660 }, { "epoch": 1.376256618832016, "grad_norm": 0.3319934010505676, "learning_rate": 5.363070946443799e-06, "loss": 0.0265, "step": 89670 }, { "epoch": 1.376410098994705, "grad_norm": 0.42648473381996155, "learning_rate": 5.360697765105831e-06, "loss": 0.0242, "step": 89680 }, { "epoch": 1.376563579157394, "grad_norm": 0.4658387303352356, "learning_rate": 5.35832491666641e-06, "loss": 0.0294, "step": 89690 }, { "epoch": 1.3767170593200828, "grad_norm": 0.20371834933757782, "learning_rate": 5.355952401295799e-06, "loss": 0.0244, "step": 89700 }, { "epoch": 1.3768705394827718, "grad_norm": 0.3992646038532257, "learning_rate": 5.353580219164249e-06, "loss": 0.025, "step": 89710 }, { "epoch": 1.3770240196454608, "grad_norm": 0.35473331809043884, "learning_rate": 5.351208370441978e-06, "loss": 0.0249, "step": 89720 }, { "epoch": 1.3771774998081499, "grad_norm": 0.38089507818222046, "learning_rate": 5.348836855299171e-06, "loss": 0.0263, "step": 89730 }, { "epoch": 1.3773309799708389, "grad_norm": 0.48270347714424133, "learning_rate": 5.346465673906008e-06, "loss": 0.034, "step": 89740 }, { "epoch": 1.3774844601335277, "grad_norm": 0.26765087246894836, "learning_rate": 5.344094826432633e-06, "loss": 0.0294, "step": 89750 }, { "epoch": 1.3776379402962167, "grad_norm": 0.30056998133659363, "learning_rate": 5.34172431304917e-06, "loss": 0.0199, "step": 89760 }, { "epoch": 1.3777914204589057, "grad_norm": 0.30417078733444214, "learning_rate": 5.339354133925716e-06, "loss": 0.0276, "step": 89770 }, { "epoch": 1.3779449006215947, "grad_norm": 0.3496399223804474, "learning_rate": 5.336984289232348e-06, "loss": 0.0244, "step": 89780 }, { "epoch": 1.3780983807842837, "grad_norm": 0.3204765319824219, "learning_rate": 5.334614779139116e-06, "loss": 0.0183, "step": 89790 }, { "epoch": 1.3782518609469725, "grad_norm": 0.5801852345466614, "learning_rate": 5.332245603816042e-06, "loss": 0.0275, "step": 89800 }, { "epoch": 1.3784053411096615, "grad_norm": 0.2529492974281311, "learning_rate": 5.329876763433137e-06, "loss": 0.0278, "step": 89810 }, { "epoch": 1.3785588212723505, "grad_norm": 0.3709559440612793, "learning_rate": 5.327508258160382e-06, "loss": 0.0299, "step": 89820 }, { "epoch": 1.3787123014350395, "grad_norm": 0.4735355079174042, "learning_rate": 5.325140088167718e-06, "loss": 0.0234, "step": 89830 }, { "epoch": 1.3788657815977285, "grad_norm": 0.3660115897655487, "learning_rate": 5.322772253625089e-06, "loss": 0.0239, "step": 89840 }, { "epoch": 1.3790192617604173, "grad_norm": 0.2213214635848999, "learning_rate": 5.320404754702397e-06, "loss": 0.0201, "step": 89850 }, { "epoch": 1.3791727419231066, "grad_norm": 0.36289581656455994, "learning_rate": 5.318037591569521e-06, "loss": 0.0218, "step": 89860 }, { "epoch": 1.3793262220857954, "grad_norm": 0.36439836025238037, "learning_rate": 5.31567076439633e-06, "loss": 0.0286, "step": 89870 }, { "epoch": 1.3794797022484844, "grad_norm": 0.2884625196456909, "learning_rate": 5.313304273352649e-06, "loss": 0.0293, "step": 89880 }, { "epoch": 1.3796331824111734, "grad_norm": 0.39668434858322144, "learning_rate": 5.310938118608286e-06, "loss": 0.0264, "step": 89890 }, { "epoch": 1.3797866625738624, "grad_norm": 0.39098989963531494, "learning_rate": 5.308572300333038e-06, "loss": 0.0314, "step": 89900 }, { "epoch": 1.3799401427365514, "grad_norm": 0.3209354877471924, "learning_rate": 5.30620681869666e-06, "loss": 0.0265, "step": 89910 }, { "epoch": 1.3800936228992402, "grad_norm": 0.2948376536369324, "learning_rate": 5.303841673868892e-06, "loss": 0.0309, "step": 89920 }, { "epoch": 1.3802471030619292, "grad_norm": 0.6032705903053284, "learning_rate": 5.301476866019446e-06, "loss": 0.0279, "step": 89930 }, { "epoch": 1.3804005832246182, "grad_norm": 0.39326298236846924, "learning_rate": 5.299112395318013e-06, "loss": 0.0337, "step": 89940 }, { "epoch": 1.3805540633873072, "grad_norm": 0.46756526827812195, "learning_rate": 5.296748261934259e-06, "loss": 0.0357, "step": 89950 }, { "epoch": 1.3807075435499963, "grad_norm": 0.33600229024887085, "learning_rate": 5.294384466037818e-06, "loss": 0.0324, "step": 89960 }, { "epoch": 1.380861023712685, "grad_norm": 0.36765575408935547, "learning_rate": 5.292021007798323e-06, "loss": 0.0239, "step": 89970 }, { "epoch": 1.381014503875374, "grad_norm": 0.33901533484458923, "learning_rate": 5.289657887385351e-06, "loss": 0.0327, "step": 89980 }, { "epoch": 1.381167984038063, "grad_norm": 0.31112316250801086, "learning_rate": 5.287295104968473e-06, "loss": 0.031, "step": 89990 }, { "epoch": 1.381321464200752, "grad_norm": 0.4052959382534027, "learning_rate": 5.28493266071724e-06, "loss": 0.0345, "step": 90000 }, { "epoch": 1.381474944363441, "grad_norm": 0.2933428883552551, "learning_rate": 5.282570554801168e-06, "loss": 0.0284, "step": 90010 }, { "epoch": 1.3816284245261299, "grad_norm": 0.32250717282295227, "learning_rate": 5.280208787389753e-06, "loss": 0.0253, "step": 90020 }, { "epoch": 1.381781904688819, "grad_norm": 0.3172401785850525, "learning_rate": 5.277847358652468e-06, "loss": 0.0237, "step": 90030 }, { "epoch": 1.381935384851508, "grad_norm": 0.35713592171669006, "learning_rate": 5.275486268758756e-06, "loss": 0.0299, "step": 90040 }, { "epoch": 1.382088865014197, "grad_norm": 0.41261881589889526, "learning_rate": 5.273125517878044e-06, "loss": 0.0292, "step": 90050 }, { "epoch": 1.382242345176886, "grad_norm": 0.3047337830066681, "learning_rate": 5.270765106179724e-06, "loss": 0.0263, "step": 90060 }, { "epoch": 1.3823958253395747, "grad_norm": 0.32180848717689514, "learning_rate": 5.268405033833182e-06, "loss": 0.0214, "step": 90070 }, { "epoch": 1.382549305502264, "grad_norm": 0.4703652858734131, "learning_rate": 5.266045301007757e-06, "loss": 0.0246, "step": 90080 }, { "epoch": 1.3827027856649527, "grad_norm": 0.45562225580215454, "learning_rate": 5.263685907872774e-06, "loss": 0.0255, "step": 90090 }, { "epoch": 1.3828562658276418, "grad_norm": 0.4089960753917694, "learning_rate": 5.261326854597541e-06, "loss": 0.0303, "step": 90100 }, { "epoch": 1.3830097459903308, "grad_norm": 0.7336492538452148, "learning_rate": 5.258968141351333e-06, "loss": 0.035, "step": 90110 }, { "epoch": 1.3831632261530198, "grad_norm": 0.3931660056114197, "learning_rate": 5.2566097683034e-06, "loss": 0.0293, "step": 90120 }, { "epoch": 1.3833167063157088, "grad_norm": 0.26684150099754333, "learning_rate": 5.2542517356229705e-06, "loss": 0.0266, "step": 90130 }, { "epoch": 1.3834701864783976, "grad_norm": 0.3714754283428192, "learning_rate": 5.251894043479246e-06, "loss": 0.024, "step": 90140 }, { "epoch": 1.3836236666410866, "grad_norm": 0.33771514892578125, "learning_rate": 5.2495366920414055e-06, "loss": 0.0296, "step": 90150 }, { "epoch": 1.3837771468037756, "grad_norm": 0.22505056858062744, "learning_rate": 5.247179681478609e-06, "loss": 0.0214, "step": 90160 }, { "epoch": 1.3839306269664646, "grad_norm": 0.3382042646408081, "learning_rate": 5.244823011959987e-06, "loss": 0.0198, "step": 90170 }, { "epoch": 1.3840841071291536, "grad_norm": 0.3497920036315918, "learning_rate": 5.2424666836546325e-06, "loss": 0.0231, "step": 90180 }, { "epoch": 1.3842375872918424, "grad_norm": 0.4396324157714844, "learning_rate": 5.24011069673164e-06, "loss": 0.0286, "step": 90190 }, { "epoch": 1.3843910674545314, "grad_norm": 0.32439348101615906, "learning_rate": 5.237755051360061e-06, "loss": 0.0257, "step": 90200 }, { "epoch": 1.3845445476172205, "grad_norm": 0.34950682520866394, "learning_rate": 5.235399747708926e-06, "loss": 0.0166, "step": 90210 }, { "epoch": 1.3846980277799095, "grad_norm": 0.34189802408218384, "learning_rate": 5.233044785947245e-06, "loss": 0.0291, "step": 90220 }, { "epoch": 1.3848515079425985, "grad_norm": 0.3462778329849243, "learning_rate": 5.230690166244001e-06, "loss": 0.0324, "step": 90230 }, { "epoch": 1.3850049881052873, "grad_norm": 0.4241170883178711, "learning_rate": 5.2283358887681515e-06, "loss": 0.0381, "step": 90240 }, { "epoch": 1.3851584682679763, "grad_norm": 0.39463406801223755, "learning_rate": 5.225981953688626e-06, "loss": 0.0313, "step": 90250 }, { "epoch": 1.3853119484306653, "grad_norm": 0.434193879365921, "learning_rate": 5.223628361174344e-06, "loss": 0.0277, "step": 90260 }, { "epoch": 1.3854654285933543, "grad_norm": 0.47958725690841675, "learning_rate": 5.221275111394188e-06, "loss": 0.0282, "step": 90270 }, { "epoch": 1.3856189087560433, "grad_norm": 0.2863827049732208, "learning_rate": 5.2189222045170075e-06, "loss": 0.0225, "step": 90280 }, { "epoch": 1.3857723889187323, "grad_norm": 0.5655280351638794, "learning_rate": 5.2165696407116484e-06, "loss": 0.0286, "step": 90290 }, { "epoch": 1.3859258690814213, "grad_norm": 0.47256964445114136, "learning_rate": 5.2142174201469205e-06, "loss": 0.0256, "step": 90300 }, { "epoch": 1.3860793492441101, "grad_norm": 0.4484594166278839, "learning_rate": 5.211865542991608e-06, "loss": 0.0259, "step": 90310 }, { "epoch": 1.3862328294067991, "grad_norm": 0.3041714131832123, "learning_rate": 5.209514009414473e-06, "loss": 0.0297, "step": 90320 }, { "epoch": 1.3863863095694882, "grad_norm": 0.23489132523536682, "learning_rate": 5.207162819584253e-06, "loss": 0.0235, "step": 90330 }, { "epoch": 1.3865397897321772, "grad_norm": 0.36226585507392883, "learning_rate": 5.204811973669659e-06, "loss": 0.026, "step": 90340 }, { "epoch": 1.3866932698948662, "grad_norm": 0.42062604427337646, "learning_rate": 5.202461471839376e-06, "loss": 0.0259, "step": 90350 }, { "epoch": 1.386846750057555, "grad_norm": 0.20723293721675873, "learning_rate": 5.200111314262075e-06, "loss": 0.0205, "step": 90360 }, { "epoch": 1.387000230220244, "grad_norm": 0.37749621272087097, "learning_rate": 5.1977615011063946e-06, "loss": 0.0356, "step": 90370 }, { "epoch": 1.387153710382933, "grad_norm": 0.36853042244911194, "learning_rate": 5.195412032540934e-06, "loss": 0.0423, "step": 90380 }, { "epoch": 1.387307190545622, "grad_norm": 0.3717348873615265, "learning_rate": 5.193062908734299e-06, "loss": 0.0264, "step": 90390 }, { "epoch": 1.387460670708311, "grad_norm": 0.3019425570964813, "learning_rate": 5.190714129855043e-06, "loss": 0.0258, "step": 90400 }, { "epoch": 1.3876141508709998, "grad_norm": 0.41836628317832947, "learning_rate": 5.188365696071707e-06, "loss": 0.0242, "step": 90410 }, { "epoch": 1.3877676310336888, "grad_norm": 0.36900871992111206, "learning_rate": 5.186017607552816e-06, "loss": 0.0322, "step": 90420 }, { "epoch": 1.3879211111963778, "grad_norm": 0.3289032578468323, "learning_rate": 5.1836698644668456e-06, "loss": 0.0257, "step": 90430 }, { "epoch": 1.3880745913590669, "grad_norm": 0.3227125406265259, "learning_rate": 5.181322466982264e-06, "loss": 0.0259, "step": 90440 }, { "epoch": 1.3882280715217559, "grad_norm": 0.320452481508255, "learning_rate": 5.178975415267518e-06, "loss": 0.0267, "step": 90450 }, { "epoch": 1.3883815516844447, "grad_norm": 0.29647958278656006, "learning_rate": 5.176628709491019e-06, "loss": 0.0267, "step": 90460 }, { "epoch": 1.388535031847134, "grad_norm": 0.26078131794929504, "learning_rate": 5.174282349821158e-06, "loss": 0.0188, "step": 90470 }, { "epoch": 1.3886885120098227, "grad_norm": 0.30127668380737305, "learning_rate": 5.1719363364263e-06, "loss": 0.0274, "step": 90480 }, { "epoch": 1.3888419921725117, "grad_norm": 0.3509054183959961, "learning_rate": 5.169590669474787e-06, "loss": 0.0242, "step": 90490 }, { "epoch": 1.3889954723352007, "grad_norm": 0.4880271255970001, "learning_rate": 5.167245349134933e-06, "loss": 0.0305, "step": 90500 }, { "epoch": 1.3891489524978897, "grad_norm": 0.19672949612140656, "learning_rate": 5.164900375575028e-06, "loss": 0.0259, "step": 90510 }, { "epoch": 1.3893024326605787, "grad_norm": 0.27958449721336365, "learning_rate": 5.1625557489633495e-06, "loss": 0.0278, "step": 90520 }, { "epoch": 1.3894559128232675, "grad_norm": 0.31538474559783936, "learning_rate": 5.160211469468126e-06, "loss": 0.0248, "step": 90530 }, { "epoch": 1.3896093929859565, "grad_norm": 0.23419661819934845, "learning_rate": 5.157867537257574e-06, "loss": 0.0267, "step": 90540 }, { "epoch": 1.3897628731486456, "grad_norm": 0.44742551445961, "learning_rate": 5.155523952499893e-06, "loss": 0.0302, "step": 90550 }, { "epoch": 1.3899163533113346, "grad_norm": 0.4387819766998291, "learning_rate": 5.153180715363247e-06, "loss": 0.0315, "step": 90560 }, { "epoch": 1.3900698334740236, "grad_norm": 0.5303804874420166, "learning_rate": 5.150837826015777e-06, "loss": 0.0301, "step": 90570 }, { "epoch": 1.3902233136367124, "grad_norm": 0.39488592743873596, "learning_rate": 5.1484952846256e-06, "loss": 0.0304, "step": 90580 }, { "epoch": 1.3903767937994014, "grad_norm": 0.29026108980178833, "learning_rate": 5.146153091360807e-06, "loss": 0.0217, "step": 90590 }, { "epoch": 1.3905302739620904, "grad_norm": 0.4035273492336273, "learning_rate": 5.143811246389465e-06, "loss": 0.0277, "step": 90600 }, { "epoch": 1.3906837541247794, "grad_norm": 0.49103429913520813, "learning_rate": 5.141469749879613e-06, "loss": 0.0307, "step": 90610 }, { "epoch": 1.3908372342874684, "grad_norm": 0.4677177369594574, "learning_rate": 5.139128601999278e-06, "loss": 0.0315, "step": 90620 }, { "epoch": 1.3909907144501572, "grad_norm": 0.195918008685112, "learning_rate": 5.136787802916441e-06, "loss": 0.0283, "step": 90630 }, { "epoch": 1.3911441946128462, "grad_norm": 0.5606734156608582, "learning_rate": 5.134447352799069e-06, "loss": 0.032, "step": 90640 }, { "epoch": 1.3912976747755352, "grad_norm": 0.6866191029548645, "learning_rate": 5.1321072518151105e-06, "loss": 0.0392, "step": 90650 }, { "epoch": 1.3914511549382242, "grad_norm": 0.3434235155582428, "learning_rate": 5.12976750013248e-06, "loss": 0.0209, "step": 90660 }, { "epoch": 1.3916046351009133, "grad_norm": 0.38948917388916016, "learning_rate": 5.127428097919067e-06, "loss": 0.0282, "step": 90670 }, { "epoch": 1.391758115263602, "grad_norm": 0.3127029836177826, "learning_rate": 5.125089045342739e-06, "loss": 0.027, "step": 90680 }, { "epoch": 1.3919115954262913, "grad_norm": 0.3502272963523865, "learning_rate": 5.122750342571339e-06, "loss": 0.0275, "step": 90690 }, { "epoch": 1.39206507558898, "grad_norm": 0.3407180905342102, "learning_rate": 5.120411989772676e-06, "loss": 0.028, "step": 90700 }, { "epoch": 1.392218555751669, "grad_norm": 0.5048041343688965, "learning_rate": 5.118073987114553e-06, "loss": 0.0254, "step": 90710 }, { "epoch": 1.392372035914358, "grad_norm": 0.3080536425113678, "learning_rate": 5.115736334764732e-06, "loss": 0.0228, "step": 90720 }, { "epoch": 1.392525516077047, "grad_norm": 0.4376019537448883, "learning_rate": 5.113399032890946e-06, "loss": 0.0245, "step": 90730 }, { "epoch": 1.3926789962397361, "grad_norm": 0.3040928840637207, "learning_rate": 5.11106208166092e-06, "loss": 0.0269, "step": 90740 }, { "epoch": 1.392832476402425, "grad_norm": 0.33609506487846375, "learning_rate": 5.1087254812423415e-06, "loss": 0.0236, "step": 90750 }, { "epoch": 1.392985956565114, "grad_norm": 0.4487485885620117, "learning_rate": 5.106389231802877e-06, "loss": 0.0229, "step": 90760 }, { "epoch": 1.393139436727803, "grad_norm": 0.583465039730072, "learning_rate": 5.1040533335101645e-06, "loss": 0.0308, "step": 90770 }, { "epoch": 1.393292916890492, "grad_norm": 0.32530146837234497, "learning_rate": 5.101717786531821e-06, "loss": 0.037, "step": 90780 }, { "epoch": 1.393446397053181, "grad_norm": 0.30641573667526245, "learning_rate": 5.099382591035438e-06, "loss": 0.0246, "step": 90790 }, { "epoch": 1.3935998772158698, "grad_norm": 0.2922701835632324, "learning_rate": 5.0970477471885726e-06, "loss": 0.0219, "step": 90800 }, { "epoch": 1.3937533573785588, "grad_norm": 0.45295074582099915, "learning_rate": 5.094713255158776e-06, "loss": 0.0265, "step": 90810 }, { "epoch": 1.3939068375412478, "grad_norm": 0.45268580317497253, "learning_rate": 5.092379115113555e-06, "loss": 0.0266, "step": 90820 }, { "epoch": 1.3940603177039368, "grad_norm": 0.29481056332588196, "learning_rate": 5.090045327220394e-06, "loss": 0.0229, "step": 90830 }, { "epoch": 1.3942137978666258, "grad_norm": 0.30884161591529846, "learning_rate": 5.0877118916467686e-06, "loss": 0.0228, "step": 90840 }, { "epoch": 1.3943672780293146, "grad_norm": 0.3482392728328705, "learning_rate": 5.085378808560109e-06, "loss": 0.0282, "step": 90850 }, { "epoch": 1.3945207581920036, "grad_norm": 0.36892077326774597, "learning_rate": 5.083046078127832e-06, "loss": 0.0269, "step": 90860 }, { "epoch": 1.3946742383546926, "grad_norm": 0.5657491683959961, "learning_rate": 5.080713700517322e-06, "loss": 0.0251, "step": 90870 }, { "epoch": 1.3948277185173816, "grad_norm": 0.3841194212436676, "learning_rate": 5.078381675895942e-06, "loss": 0.0229, "step": 90880 }, { "epoch": 1.3949811986800706, "grad_norm": 0.2968811094760895, "learning_rate": 5.076050004431034e-06, "loss": 0.0242, "step": 90890 }, { "epoch": 1.3951346788427594, "grad_norm": 0.47693076729774475, "learning_rate": 5.073718686289899e-06, "loss": 0.0383, "step": 90900 }, { "epoch": 1.3952881590054487, "grad_norm": 0.3252164125442505, "learning_rate": 5.07138772163984e-06, "loss": 0.0286, "step": 90910 }, { "epoch": 1.3954416391681375, "grad_norm": 0.487953245639801, "learning_rate": 5.069057110648105e-06, "loss": 0.023, "step": 90920 }, { "epoch": 1.3955951193308265, "grad_norm": 0.3131556808948517, "learning_rate": 5.06672685348193e-06, "loss": 0.0258, "step": 90930 }, { "epoch": 1.3957485994935155, "grad_norm": 0.41023537516593933, "learning_rate": 5.0643969503085295e-06, "loss": 0.0289, "step": 90940 }, { "epoch": 1.3959020796562045, "grad_norm": 0.39981576800346375, "learning_rate": 5.06206740129509e-06, "loss": 0.024, "step": 90950 }, { "epoch": 1.3960555598188935, "grad_norm": 0.36609697341918945, "learning_rate": 5.059738206608768e-06, "loss": 0.0289, "step": 90960 }, { "epoch": 1.3962090399815823, "grad_norm": 0.3733054995536804, "learning_rate": 5.057409366416699e-06, "loss": 0.0252, "step": 90970 }, { "epoch": 1.3963625201442713, "grad_norm": 0.31104445457458496, "learning_rate": 5.055080880885991e-06, "loss": 0.0334, "step": 90980 }, { "epoch": 1.3965160003069603, "grad_norm": 0.43239378929138184, "learning_rate": 5.052752750183722e-06, "loss": 0.0331, "step": 90990 }, { "epoch": 1.3966694804696493, "grad_norm": 0.3979281783103943, "learning_rate": 5.050424974476959e-06, "loss": 0.0225, "step": 91000 }, { "epoch": 1.3968229606323384, "grad_norm": 0.472807377576828, "learning_rate": 5.048097553932735e-06, "loss": 0.0274, "step": 91010 }, { "epoch": 1.3969764407950271, "grad_norm": 0.5825197696685791, "learning_rate": 5.045770488718044e-06, "loss": 0.0264, "step": 91020 }, { "epoch": 1.3971299209577162, "grad_norm": 0.3356363773345947, "learning_rate": 5.043443778999878e-06, "loss": 0.0285, "step": 91030 }, { "epoch": 1.3972834011204052, "grad_norm": 0.3169238865375519, "learning_rate": 5.04111742494519e-06, "loss": 0.0299, "step": 91040 }, { "epoch": 1.3974368812830942, "grad_norm": 0.33948782086372375, "learning_rate": 5.0387914267209106e-06, "loss": 0.0209, "step": 91050 }, { "epoch": 1.3975903614457832, "grad_norm": 0.3518749475479126, "learning_rate": 5.036465784493944e-06, "loss": 0.0257, "step": 91060 }, { "epoch": 1.397743841608472, "grad_norm": 0.34029221534729004, "learning_rate": 5.034140498431169e-06, "loss": 0.0256, "step": 91070 }, { "epoch": 1.397897321771161, "grad_norm": 0.3296909034252167, "learning_rate": 5.03181556869944e-06, "loss": 0.0316, "step": 91080 }, { "epoch": 1.39805080193385, "grad_norm": 0.3107759952545166, "learning_rate": 5.029490995465581e-06, "loss": 0.0232, "step": 91090 }, { "epoch": 1.398204282096539, "grad_norm": 0.3973942697048187, "learning_rate": 5.027166778896402e-06, "loss": 0.0309, "step": 91100 }, { "epoch": 1.398357762259228, "grad_norm": 0.3540274500846863, "learning_rate": 5.02484291915868e-06, "loss": 0.0294, "step": 91110 }, { "epoch": 1.398511242421917, "grad_norm": 0.3852299451828003, "learning_rate": 5.022519416419154e-06, "loss": 0.0224, "step": 91120 }, { "epoch": 1.398664722584606, "grad_norm": 0.3954993188381195, "learning_rate": 5.020196270844563e-06, "loss": 0.0238, "step": 91130 }, { "epoch": 1.3988182027472948, "grad_norm": 0.32485318183898926, "learning_rate": 5.017873482601601e-06, "loss": 0.0264, "step": 91140 }, { "epoch": 1.3989716829099839, "grad_norm": 0.3372601568698883, "learning_rate": 5.0155510518569435e-06, "loss": 0.0256, "step": 91150 }, { "epoch": 1.3991251630726729, "grad_norm": 0.5048671960830688, "learning_rate": 5.0132289787772396e-06, "loss": 0.032, "step": 91160 }, { "epoch": 1.3992786432353619, "grad_norm": 0.5310992002487183, "learning_rate": 5.010907263529111e-06, "loss": 0.0281, "step": 91170 }, { "epoch": 1.399432123398051, "grad_norm": 0.4686654210090637, "learning_rate": 5.008585906279157e-06, "loss": 0.0202, "step": 91180 }, { "epoch": 1.3995856035607397, "grad_norm": 0.6345394253730774, "learning_rate": 5.006264907193943e-06, "loss": 0.0247, "step": 91190 }, { "epoch": 1.3997390837234287, "grad_norm": 0.2919403612613678, "learning_rate": 5.003944266440025e-06, "loss": 0.0196, "step": 91200 }, { "epoch": 1.3998925638861177, "grad_norm": 0.32970863580703735, "learning_rate": 5.001623984183923e-06, "loss": 0.024, "step": 91210 }, { "epoch": 1.4000460440488067, "grad_norm": 0.41708123683929443, "learning_rate": 4.999304060592118e-06, "loss": 0.0225, "step": 91220 }, { "epoch": 1.4001995242114957, "grad_norm": 0.49638593196868896, "learning_rate": 4.9969844958310934e-06, "loss": 0.0281, "step": 91230 }, { "epoch": 1.4003530043741845, "grad_norm": 0.3655067980289459, "learning_rate": 4.994665290067286e-06, "loss": 0.0328, "step": 91240 }, { "epoch": 1.4005064845368735, "grad_norm": 0.3010859191417694, "learning_rate": 4.992346443467109e-06, "loss": 0.0303, "step": 91250 }, { "epoch": 1.4006599646995626, "grad_norm": 0.3542807698249817, "learning_rate": 4.990027956196968e-06, "loss": 0.0248, "step": 91260 }, { "epoch": 1.4008134448622516, "grad_norm": 0.3480987846851349, "learning_rate": 4.987709828423215e-06, "loss": 0.0251, "step": 91270 }, { "epoch": 1.4009669250249406, "grad_norm": 0.4403047263622284, "learning_rate": 4.985392060312191e-06, "loss": 0.0266, "step": 91280 }, { "epoch": 1.4011204051876294, "grad_norm": 0.36799201369285583, "learning_rate": 4.983074652030218e-06, "loss": 0.029, "step": 91290 }, { "epoch": 1.4012738853503186, "grad_norm": 0.38967591524124146, "learning_rate": 4.98075760374358e-06, "loss": 0.0314, "step": 91300 }, { "epoch": 1.4014273655130074, "grad_norm": 0.3155238628387451, "learning_rate": 4.978440915618539e-06, "loss": 0.0235, "step": 91310 }, { "epoch": 1.4015808456756964, "grad_norm": 0.3802756071090698, "learning_rate": 4.976124587821332e-06, "loss": 0.0354, "step": 91320 }, { "epoch": 1.4017343258383854, "grad_norm": 0.3019067049026489, "learning_rate": 4.973808620518172e-06, "loss": 0.0228, "step": 91330 }, { "epoch": 1.4018878060010744, "grad_norm": 0.5241510272026062, "learning_rate": 4.97149301387524e-06, "loss": 0.034, "step": 91340 }, { "epoch": 1.4020412861637634, "grad_norm": 0.3320956528186798, "learning_rate": 4.969177768058694e-06, "loss": 0.0256, "step": 91350 }, { "epoch": 1.4021947663264522, "grad_norm": 0.3287354111671448, "learning_rate": 4.966862883234678e-06, "loss": 0.0193, "step": 91360 }, { "epoch": 1.4023482464891412, "grad_norm": 0.4215399920940399, "learning_rate": 4.964548359569287e-06, "loss": 0.0246, "step": 91370 }, { "epoch": 1.4025017266518303, "grad_norm": 0.4428294599056244, "learning_rate": 4.962234197228604e-06, "loss": 0.0311, "step": 91380 }, { "epoch": 1.4026552068145193, "grad_norm": 0.2647292912006378, "learning_rate": 4.95992039637869e-06, "loss": 0.024, "step": 91390 }, { "epoch": 1.4028086869772083, "grad_norm": 0.5005216002464294, "learning_rate": 4.9576069571855725e-06, "loss": 0.0335, "step": 91400 }, { "epoch": 1.402962167139897, "grad_norm": 0.47925445437431335, "learning_rate": 4.955293879815252e-06, "loss": 0.027, "step": 91410 }, { "epoch": 1.403115647302586, "grad_norm": 0.323850154876709, "learning_rate": 4.952981164433711e-06, "loss": 0.0248, "step": 91420 }, { "epoch": 1.403269127465275, "grad_norm": 0.4824916422367096, "learning_rate": 4.950668811206898e-06, "loss": 0.0255, "step": 91430 }, { "epoch": 1.4034226076279641, "grad_norm": 0.48046183586120605, "learning_rate": 4.9483568203007395e-06, "loss": 0.0326, "step": 91440 }, { "epoch": 1.4035760877906531, "grad_norm": 0.34713417291641235, "learning_rate": 4.946045191881129e-06, "loss": 0.0278, "step": 91450 }, { "epoch": 1.403729567953342, "grad_norm": 0.388753741979599, "learning_rate": 4.943733926113955e-06, "loss": 0.0313, "step": 91460 }, { "epoch": 1.403883048116031, "grad_norm": 0.37787482142448425, "learning_rate": 4.941423023165052e-06, "loss": 0.0242, "step": 91470 }, { "epoch": 1.40403652827872, "grad_norm": 0.4684358537197113, "learning_rate": 4.93911248320024e-06, "loss": 0.0317, "step": 91480 }, { "epoch": 1.404190008441409, "grad_norm": 0.302223265171051, "learning_rate": 4.936802306385324e-06, "loss": 0.0301, "step": 91490 }, { "epoch": 1.404343488604098, "grad_norm": 0.34887930750846863, "learning_rate": 4.93449249288607e-06, "loss": 0.0278, "step": 91500 }, { "epoch": 1.4044969687667868, "grad_norm": 0.3333304822444916, "learning_rate": 4.932183042868221e-06, "loss": 0.0202, "step": 91510 }, { "epoch": 1.404650448929476, "grad_norm": 0.29352450370788574, "learning_rate": 4.929873956497493e-06, "loss": 0.0242, "step": 91520 }, { "epoch": 1.4048039290921648, "grad_norm": 0.3581608831882477, "learning_rate": 4.9275652339395785e-06, "loss": 0.0281, "step": 91530 }, { "epoch": 1.4049574092548538, "grad_norm": 0.6182976365089417, "learning_rate": 4.9252568753601385e-06, "loss": 0.03, "step": 91540 }, { "epoch": 1.4051108894175428, "grad_norm": 0.3363160789012909, "learning_rate": 4.9229488809248196e-06, "loss": 0.0266, "step": 91550 }, { "epoch": 1.4052643695802318, "grad_norm": 0.394642174243927, "learning_rate": 4.920641250799234e-06, "loss": 0.0271, "step": 91560 }, { "epoch": 1.4054178497429208, "grad_norm": 0.49067750573158264, "learning_rate": 4.9183339851489574e-06, "loss": 0.0238, "step": 91570 }, { "epoch": 1.4055713299056096, "grad_norm": 0.40158402919769287, "learning_rate": 4.916027084139562e-06, "loss": 0.0229, "step": 91580 }, { "epoch": 1.4057248100682986, "grad_norm": 0.3879950940608978, "learning_rate": 4.913720547936578e-06, "loss": 0.0273, "step": 91590 }, { "epoch": 1.4058782902309876, "grad_norm": 0.44001543521881104, "learning_rate": 4.911414376705515e-06, "loss": 0.0361, "step": 91600 }, { "epoch": 1.4060317703936767, "grad_norm": 0.35094135999679565, "learning_rate": 4.909108570611852e-06, "loss": 0.0188, "step": 91610 }, { "epoch": 1.4061852505563657, "grad_norm": 0.3428800702095032, "learning_rate": 4.906803129821047e-06, "loss": 0.0229, "step": 91620 }, { "epoch": 1.4063387307190545, "grad_norm": 0.4020021855831146, "learning_rate": 4.90449805449853e-06, "loss": 0.0247, "step": 91630 }, { "epoch": 1.4064922108817435, "grad_norm": 0.4720468521118164, "learning_rate": 4.902193344809699e-06, "loss": 0.0283, "step": 91640 }, { "epoch": 1.4066456910444325, "grad_norm": 0.3618486225605011, "learning_rate": 4.899889000919941e-06, "loss": 0.0303, "step": 91650 }, { "epoch": 1.4067991712071215, "grad_norm": 0.4330616891384125, "learning_rate": 4.897585022994604e-06, "loss": 0.0233, "step": 91660 }, { "epoch": 1.4069526513698105, "grad_norm": 0.315652996301651, "learning_rate": 4.895281411199002e-06, "loss": 0.0253, "step": 91670 }, { "epoch": 1.4071061315324993, "grad_norm": 0.44161298871040344, "learning_rate": 4.892978165698446e-06, "loss": 0.0272, "step": 91680 }, { "epoch": 1.4072596116951883, "grad_norm": 0.333640456199646, "learning_rate": 4.8906752866582055e-06, "loss": 0.026, "step": 91690 }, { "epoch": 1.4074130918578773, "grad_norm": 0.3607243299484253, "learning_rate": 4.888372774243521e-06, "loss": 0.0275, "step": 91700 }, { "epoch": 1.4075665720205663, "grad_norm": 0.4139453172683716, "learning_rate": 4.886070628619618e-06, "loss": 0.0312, "step": 91710 }, { "epoch": 1.4077200521832554, "grad_norm": 0.4212114214897156, "learning_rate": 4.883768849951687e-06, "loss": 0.0252, "step": 91720 }, { "epoch": 1.4078735323459444, "grad_norm": 0.3171423375606537, "learning_rate": 4.881467438404893e-06, "loss": 0.0206, "step": 91730 }, { "epoch": 1.4080270125086334, "grad_norm": 0.38140493631362915, "learning_rate": 4.8791663941443764e-06, "loss": 0.0296, "step": 91740 }, { "epoch": 1.4081804926713222, "grad_norm": 0.30975326895713806, "learning_rate": 4.876865717335256e-06, "loss": 0.025, "step": 91750 }, { "epoch": 1.4083339728340112, "grad_norm": 0.31433311104774475, "learning_rate": 4.874565408142621e-06, "loss": 0.0255, "step": 91760 }, { "epoch": 1.4084874529967002, "grad_norm": 0.6103115081787109, "learning_rate": 4.87226546673152e-06, "loss": 0.0231, "step": 91770 }, { "epoch": 1.4086409331593892, "grad_norm": 0.537093460559845, "learning_rate": 4.8699658932670015e-06, "loss": 0.0296, "step": 91780 }, { "epoch": 1.4087944133220782, "grad_norm": 0.4501380920410156, "learning_rate": 4.867666687914069e-06, "loss": 0.029, "step": 91790 }, { "epoch": 1.408947893484767, "grad_norm": 0.33805954456329346, "learning_rate": 4.8653678508377004e-06, "loss": 0.021, "step": 91800 }, { "epoch": 1.409101373647456, "grad_norm": 0.48221707344055176, "learning_rate": 4.863069382202864e-06, "loss": 0.0259, "step": 91810 }, { "epoch": 1.409254853810145, "grad_norm": 0.4151323139667511, "learning_rate": 4.860771282174477e-06, "loss": 0.0278, "step": 91820 }, { "epoch": 1.409408333972834, "grad_norm": 0.3963356912136078, "learning_rate": 4.858473550917442e-06, "loss": 0.021, "step": 91830 }, { "epoch": 1.409561814135523, "grad_norm": 0.3664650321006775, "learning_rate": 4.856176188596643e-06, "loss": 0.025, "step": 91840 }, { "epoch": 1.4097152942982119, "grad_norm": 0.4521973133087158, "learning_rate": 4.853879195376927e-06, "loss": 0.0243, "step": 91850 }, { "epoch": 1.4098687744609009, "grad_norm": 0.32710108160972595, "learning_rate": 4.8515825714231165e-06, "loss": 0.0248, "step": 91860 }, { "epoch": 1.4100222546235899, "grad_norm": 0.3641218841075897, "learning_rate": 4.84928631690001e-06, "loss": 0.0199, "step": 91870 }, { "epoch": 1.410175734786279, "grad_norm": 0.28429460525512695, "learning_rate": 4.846990431972376e-06, "loss": 0.0321, "step": 91880 }, { "epoch": 1.410329214948968, "grad_norm": 0.33838221430778503, "learning_rate": 4.844694916804959e-06, "loss": 0.0296, "step": 91890 }, { "epoch": 1.4104826951116567, "grad_norm": 0.37811407446861267, "learning_rate": 4.842399771562473e-06, "loss": 0.0269, "step": 91900 }, { "epoch": 1.410636175274346, "grad_norm": 0.2587730884552002, "learning_rate": 4.8401049964096204e-06, "loss": 0.0276, "step": 91910 }, { "epoch": 1.4107896554370347, "grad_norm": 0.5561270713806152, "learning_rate": 4.837810591511053e-06, "loss": 0.0304, "step": 91920 }, { "epoch": 1.4109431355997237, "grad_norm": 0.49582070112228394, "learning_rate": 4.835516557031409e-06, "loss": 0.0313, "step": 91930 }, { "epoch": 1.4110966157624127, "grad_norm": 0.35859209299087524, "learning_rate": 4.833222893135307e-06, "loss": 0.029, "step": 91940 }, { "epoch": 1.4112500959251018, "grad_norm": 0.3159336745738983, "learning_rate": 4.830929599987329e-06, "loss": 0.0243, "step": 91950 }, { "epoch": 1.4114035760877908, "grad_norm": 0.3483024835586548, "learning_rate": 4.828636677752031e-06, "loss": 0.0242, "step": 91960 }, { "epoch": 1.4115570562504796, "grad_norm": 0.22397686541080475, "learning_rate": 4.826344126593945e-06, "loss": 0.0305, "step": 91970 }, { "epoch": 1.4117105364131686, "grad_norm": 0.2459166795015335, "learning_rate": 4.824051946677577e-06, "loss": 0.0263, "step": 91980 }, { "epoch": 1.4118640165758576, "grad_norm": 0.30500033497810364, "learning_rate": 4.821760138167403e-06, "loss": 0.0224, "step": 91990 }, { "epoch": 1.4120174967385466, "grad_norm": 0.2612284719944, "learning_rate": 4.819468701227871e-06, "loss": 0.0234, "step": 92000 }, { "epoch": 1.4121709769012356, "grad_norm": 0.3142153322696686, "learning_rate": 4.8171776360234205e-06, "loss": 0.0323, "step": 92010 }, { "epoch": 1.4123244570639244, "grad_norm": 0.2562502324581146, "learning_rate": 4.814886942718433e-06, "loss": 0.0257, "step": 92020 }, { "epoch": 1.4124779372266134, "grad_norm": 0.25671976804733276, "learning_rate": 4.8125966214772825e-06, "loss": 0.0224, "step": 92030 }, { "epoch": 1.4126314173893024, "grad_norm": 0.47874805331230164, "learning_rate": 4.810306672464321e-06, "loss": 0.0263, "step": 92040 }, { "epoch": 1.4127848975519914, "grad_norm": 0.4479101300239563, "learning_rate": 4.808017095843864e-06, "loss": 0.0302, "step": 92050 }, { "epoch": 1.4129383777146804, "grad_norm": 0.2587141990661621, "learning_rate": 4.805727891780199e-06, "loss": 0.0269, "step": 92060 }, { "epoch": 1.4130918578773692, "grad_norm": 0.46892213821411133, "learning_rate": 4.803439060437594e-06, "loss": 0.0249, "step": 92070 }, { "epoch": 1.4132453380400583, "grad_norm": 0.33233070373535156, "learning_rate": 4.801150601980285e-06, "loss": 0.026, "step": 92080 }, { "epoch": 1.4133988182027473, "grad_norm": 0.43024203181266785, "learning_rate": 4.7988625165724815e-06, "loss": 0.0221, "step": 92090 }, { "epoch": 1.4135522983654363, "grad_norm": 0.5169573426246643, "learning_rate": 4.796574804378373e-06, "loss": 0.0271, "step": 92100 }, { "epoch": 1.4137057785281253, "grad_norm": 0.3442826271057129, "learning_rate": 4.794287465562117e-06, "loss": 0.0287, "step": 92110 }, { "epoch": 1.413859258690814, "grad_norm": 0.47368526458740234, "learning_rate": 4.792000500287834e-06, "loss": 0.0318, "step": 92120 }, { "epoch": 1.4140127388535033, "grad_norm": 0.5281981229782104, "learning_rate": 4.7897139087196385e-06, "loss": 0.0356, "step": 92130 }, { "epoch": 1.414166219016192, "grad_norm": 0.3224412500858307, "learning_rate": 4.787427691021604e-06, "loss": 0.0246, "step": 92140 }, { "epoch": 1.4143196991788811, "grad_norm": 0.2920377552509308, "learning_rate": 4.785141847357782e-06, "loss": 0.0251, "step": 92150 }, { "epoch": 1.4144731793415701, "grad_norm": 0.31687796115875244, "learning_rate": 4.7828563778921925e-06, "loss": 0.0275, "step": 92160 }, { "epoch": 1.4146266595042591, "grad_norm": 0.32018712162971497, "learning_rate": 4.780571282788835e-06, "loss": 0.0364, "step": 92170 }, { "epoch": 1.4147801396669482, "grad_norm": 0.5229467749595642, "learning_rate": 4.778286562211678e-06, "loss": 0.0329, "step": 92180 }, { "epoch": 1.414933619829637, "grad_norm": 0.35626131296157837, "learning_rate": 4.776002216324661e-06, "loss": 0.0301, "step": 92190 }, { "epoch": 1.415087099992326, "grad_norm": 0.31792551279067993, "learning_rate": 4.773718245291708e-06, "loss": 0.0213, "step": 92200 }, { "epoch": 1.415240580155015, "grad_norm": 0.3846854865550995, "learning_rate": 4.771434649276705e-06, "loss": 0.028, "step": 92210 }, { "epoch": 1.415394060317704, "grad_norm": 0.3288409411907196, "learning_rate": 4.769151428443507e-06, "loss": 0.0228, "step": 92220 }, { "epoch": 1.415547540480393, "grad_norm": 0.3978468179702759, "learning_rate": 4.766868582955958e-06, "loss": 0.0237, "step": 92230 }, { "epoch": 1.4157010206430818, "grad_norm": 0.2859979569911957, "learning_rate": 4.764586112977863e-06, "loss": 0.0207, "step": 92240 }, { "epoch": 1.4158545008057708, "grad_norm": 0.42506295442581177, "learning_rate": 4.762304018673002e-06, "loss": 0.0264, "step": 92250 }, { "epoch": 1.4160079809684598, "grad_norm": 0.42955276370048523, "learning_rate": 4.760022300205131e-06, "loss": 0.028, "step": 92260 }, { "epoch": 1.4161614611311488, "grad_norm": 0.34934210777282715, "learning_rate": 4.757740957737979e-06, "loss": 0.0315, "step": 92270 }, { "epoch": 1.4163149412938378, "grad_norm": 0.45683223009109497, "learning_rate": 4.755459991435243e-06, "loss": 0.029, "step": 92280 }, { "epoch": 1.4164684214565266, "grad_norm": 0.2912633717060089, "learning_rate": 4.7531794014605934e-06, "loss": 0.0296, "step": 92290 }, { "epoch": 1.4166219016192156, "grad_norm": 0.39066290855407715, "learning_rate": 4.7508991879776855e-06, "loss": 0.0291, "step": 92300 }, { "epoch": 1.4167753817819047, "grad_norm": 0.3103289306163788, "learning_rate": 4.748619351150134e-06, "loss": 0.0267, "step": 92310 }, { "epoch": 1.4169288619445937, "grad_norm": 0.5187187790870667, "learning_rate": 4.746339891141531e-06, "loss": 0.032, "step": 92320 }, { "epoch": 1.4170823421072827, "grad_norm": 0.31088724732398987, "learning_rate": 4.744060808115444e-06, "loss": 0.0266, "step": 92330 }, { "epoch": 1.4172358222699715, "grad_norm": 0.3697698712348938, "learning_rate": 4.741782102235407e-06, "loss": 0.0265, "step": 92340 }, { "epoch": 1.4173893024326607, "grad_norm": 0.38466107845306396, "learning_rate": 4.739503773664932e-06, "loss": 0.0232, "step": 92350 }, { "epoch": 1.4175427825953495, "grad_norm": 0.3512497544288635, "learning_rate": 4.73722582256751e-06, "loss": 0.024, "step": 92360 }, { "epoch": 1.4176962627580385, "grad_norm": 0.3410455286502838, "learning_rate": 4.734948249106591e-06, "loss": 0.0307, "step": 92370 }, { "epoch": 1.4178497429207275, "grad_norm": 0.4172651469707489, "learning_rate": 4.732671053445603e-06, "loss": 0.0229, "step": 92380 }, { "epoch": 1.4180032230834165, "grad_norm": 0.5534937977790833, "learning_rate": 4.730394235747956e-06, "loss": 0.0232, "step": 92390 }, { "epoch": 1.4181567032461055, "grad_norm": 0.21374846994876862, "learning_rate": 4.728117796177022e-06, "loss": 0.0235, "step": 92400 }, { "epoch": 1.4183101834087943, "grad_norm": 0.3069536089897156, "learning_rate": 4.72584173489615e-06, "loss": 0.0261, "step": 92410 }, { "epoch": 1.4184636635714833, "grad_norm": 0.3631099462509155, "learning_rate": 4.723566052068661e-06, "loss": 0.0228, "step": 92420 }, { "epoch": 1.4186171437341724, "grad_norm": 0.40771833062171936, "learning_rate": 4.721290747857851e-06, "loss": 0.0288, "step": 92430 }, { "epoch": 1.4187706238968614, "grad_norm": 0.36398744583129883, "learning_rate": 4.719015822426984e-06, "loss": 0.0295, "step": 92440 }, { "epoch": 1.4189241040595504, "grad_norm": 0.39528822898864746, "learning_rate": 4.716741275939299e-06, "loss": 0.0333, "step": 92450 }, { "epoch": 1.4190775842222392, "grad_norm": 0.4511311948299408, "learning_rate": 4.7144671085580186e-06, "loss": 0.0269, "step": 92460 }, { "epoch": 1.4192310643849282, "grad_norm": 0.29397472739219666, "learning_rate": 4.712193320446318e-06, "loss": 0.0268, "step": 92470 }, { "epoch": 1.4193845445476172, "grad_norm": 0.3156988024711609, "learning_rate": 4.709919911767355e-06, "loss": 0.0224, "step": 92480 }, { "epoch": 1.4195380247103062, "grad_norm": 0.33107277750968933, "learning_rate": 4.707646882684267e-06, "loss": 0.0266, "step": 92490 }, { "epoch": 1.4196915048729952, "grad_norm": 0.38576000928878784, "learning_rate": 4.705374233360155e-06, "loss": 0.0326, "step": 92500 }, { "epoch": 1.419844985035684, "grad_norm": 0.3319009244441986, "learning_rate": 4.703101963958096e-06, "loss": 0.0282, "step": 92510 }, { "epoch": 1.419998465198373, "grad_norm": 0.3882185220718384, "learning_rate": 4.700830074641141e-06, "loss": 0.0233, "step": 92520 }, { "epoch": 1.420151945361062, "grad_norm": 0.37294456362724304, "learning_rate": 4.698558565572308e-06, "loss": 0.027, "step": 92530 }, { "epoch": 1.420305425523751, "grad_norm": 0.2893548607826233, "learning_rate": 4.696287436914595e-06, "loss": 0.0253, "step": 92540 }, { "epoch": 1.42045890568644, "grad_norm": 0.438892662525177, "learning_rate": 4.6940166888309646e-06, "loss": 0.0271, "step": 92550 }, { "epoch": 1.420612385849129, "grad_norm": 0.5327551960945129, "learning_rate": 4.691746321484368e-06, "loss": 0.028, "step": 92560 }, { "epoch": 1.420765866011818, "grad_norm": 0.34838107228279114, "learning_rate": 4.689476335037708e-06, "loss": 0.0296, "step": 92570 }, { "epoch": 1.4209193461745069, "grad_norm": 0.3725984990596771, "learning_rate": 4.687206729653868e-06, "loss": 0.0254, "step": 92580 }, { "epoch": 1.421072826337196, "grad_norm": 0.5582603812217712, "learning_rate": 4.6849375054957165e-06, "loss": 0.0272, "step": 92590 }, { "epoch": 1.421226306499885, "grad_norm": 0.3119427263736725, "learning_rate": 4.6826686627260784e-06, "loss": 0.0281, "step": 92600 }, { "epoch": 1.421379786662574, "grad_norm": 0.32897457480430603, "learning_rate": 4.680400201507758e-06, "loss": 0.0273, "step": 92610 }, { "epoch": 1.421533266825263, "grad_norm": 0.4469063878059387, "learning_rate": 4.678132122003532e-06, "loss": 0.0258, "step": 92620 }, { "epoch": 1.4216867469879517, "grad_norm": 0.33014050126075745, "learning_rate": 4.675864424376146e-06, "loss": 0.028, "step": 92630 }, { "epoch": 1.4218402271506407, "grad_norm": 0.2997905910015106, "learning_rate": 4.673597108788321e-06, "loss": 0.0237, "step": 92640 }, { "epoch": 1.4219937073133297, "grad_norm": 0.43407002091407776, "learning_rate": 4.671330175402759e-06, "loss": 0.0266, "step": 92650 }, { "epoch": 1.4221471874760188, "grad_norm": 0.42643898725509644, "learning_rate": 4.669063624382123e-06, "loss": 0.0286, "step": 92660 }, { "epoch": 1.4223006676387078, "grad_norm": 0.3754884600639343, "learning_rate": 4.666797455889043e-06, "loss": 0.0286, "step": 92670 }, { "epoch": 1.4224541478013966, "grad_norm": 0.26390716433525085, "learning_rate": 4.66453167008614e-06, "loss": 0.022, "step": 92680 }, { "epoch": 1.4226076279640856, "grad_norm": 0.28912511467933655, "learning_rate": 4.662266267135996e-06, "loss": 0.0228, "step": 92690 }, { "epoch": 1.4227611081267746, "grad_norm": 0.49445199966430664, "learning_rate": 4.660001247201168e-06, "loss": 0.0347, "step": 92700 }, { "epoch": 1.4229145882894636, "grad_norm": 0.43425342440605164, "learning_rate": 4.657736610444185e-06, "loss": 0.0301, "step": 92710 }, { "epoch": 1.4230680684521526, "grad_norm": 0.2868424355983734, "learning_rate": 4.655472357027547e-06, "loss": 0.0246, "step": 92720 }, { "epoch": 1.4232215486148414, "grad_norm": 0.35682550072669983, "learning_rate": 4.653208487113729e-06, "loss": 0.0266, "step": 92730 }, { "epoch": 1.4233750287775306, "grad_norm": 0.45401135087013245, "learning_rate": 4.650945000865174e-06, "loss": 0.0356, "step": 92740 }, { "epoch": 1.4235285089402194, "grad_norm": 0.4182145595550537, "learning_rate": 4.6486818984443105e-06, "loss": 0.0265, "step": 92750 }, { "epoch": 1.4236819891029084, "grad_norm": 0.24472837150096893, "learning_rate": 4.646419180013527e-06, "loss": 0.0284, "step": 92760 }, { "epoch": 1.4238354692655975, "grad_norm": 0.4932940900325775, "learning_rate": 4.644156845735177e-06, "loss": 0.0261, "step": 92770 }, { "epoch": 1.4239889494282865, "grad_norm": 0.3441027104854584, "learning_rate": 4.641894895771609e-06, "loss": 0.0311, "step": 92780 }, { "epoch": 1.4241424295909755, "grad_norm": 0.4858127534389496, "learning_rate": 4.639633330285128e-06, "loss": 0.0263, "step": 92790 }, { "epoch": 1.4242959097536643, "grad_norm": 0.48578551411628723, "learning_rate": 4.6373721494380155e-06, "loss": 0.0265, "step": 92800 }, { "epoch": 1.4244493899163533, "grad_norm": 0.43131059408187866, "learning_rate": 4.635111353392524e-06, "loss": 0.0284, "step": 92810 }, { "epoch": 1.4246028700790423, "grad_norm": 0.47274479269981384, "learning_rate": 4.63285094231088e-06, "loss": 0.0333, "step": 92820 }, { "epoch": 1.4247563502417313, "grad_norm": 0.2889518141746521, "learning_rate": 4.630590916355283e-06, "loss": 0.0261, "step": 92830 }, { "epoch": 1.4249098304044203, "grad_norm": 0.2394586205482483, "learning_rate": 4.6283312756878995e-06, "loss": 0.0288, "step": 92840 }, { "epoch": 1.425063310567109, "grad_norm": 0.45765721797943115, "learning_rate": 4.6260720204708785e-06, "loss": 0.0254, "step": 92850 }, { "epoch": 1.4252167907297981, "grad_norm": 0.2845931053161621, "learning_rate": 4.6238131508663344e-06, "loss": 0.031, "step": 92860 }, { "epoch": 1.4253702708924871, "grad_norm": 0.3188953697681427, "learning_rate": 4.6215546670363546e-06, "loss": 0.0217, "step": 92870 }, { "epoch": 1.4255237510551761, "grad_norm": 0.37380778789520264, "learning_rate": 4.619296569142997e-06, "loss": 0.0355, "step": 92880 }, { "epoch": 1.4256772312178652, "grad_norm": 0.497604638338089, "learning_rate": 4.617038857348295e-06, "loss": 0.0261, "step": 92890 }, { "epoch": 1.425830711380554, "grad_norm": 0.3207893967628479, "learning_rate": 4.6147815318142505e-06, "loss": 0.0234, "step": 92900 }, { "epoch": 1.425984191543243, "grad_norm": 0.47949498891830444, "learning_rate": 4.612524592702852e-06, "loss": 0.0271, "step": 92910 }, { "epoch": 1.426137671705932, "grad_norm": 0.7186115980148315, "learning_rate": 4.610268040176037e-06, "loss": 0.0303, "step": 92920 }, { "epoch": 1.426291151868621, "grad_norm": 0.3712631165981293, "learning_rate": 4.608011874395727e-06, "loss": 0.0259, "step": 92930 }, { "epoch": 1.42644463203131, "grad_norm": 0.3242323696613312, "learning_rate": 4.6057560955238235e-06, "loss": 0.0202, "step": 92940 }, { "epoch": 1.4265981121939988, "grad_norm": 0.3753153383731842, "learning_rate": 4.603500703722188e-06, "loss": 0.0209, "step": 92950 }, { "epoch": 1.426751592356688, "grad_norm": 0.3627890348434448, "learning_rate": 4.601245699152659e-06, "loss": 0.0207, "step": 92960 }, { "epoch": 1.4269050725193768, "grad_norm": 0.24151287972927094, "learning_rate": 4.598991081977048e-06, "loss": 0.0205, "step": 92970 }, { "epoch": 1.4270585526820658, "grad_norm": 0.4561198651790619, "learning_rate": 4.596736852357137e-06, "loss": 0.0311, "step": 92980 }, { "epoch": 1.4272120328447548, "grad_norm": 0.48998212814331055, "learning_rate": 4.59448301045468e-06, "loss": 0.0324, "step": 92990 }, { "epoch": 1.4273655130074439, "grad_norm": 0.23971864581108093, "learning_rate": 4.592229556431402e-06, "loss": 0.0269, "step": 93000 }, { "epoch": 1.4275189931701329, "grad_norm": 0.3233088254928589, "learning_rate": 4.589976490449013e-06, "loss": 0.0234, "step": 93010 }, { "epoch": 1.4276724733328217, "grad_norm": 0.3102257251739502, "learning_rate": 4.587723812669173e-06, "loss": 0.0184, "step": 93020 }, { "epoch": 1.4278259534955107, "grad_norm": 0.40222421288490295, "learning_rate": 4.585471523253524e-06, "loss": 0.0272, "step": 93030 }, { "epoch": 1.4279794336581997, "grad_norm": 0.35945266485214233, "learning_rate": 4.583219622363691e-06, "loss": 0.0245, "step": 93040 }, { "epoch": 1.4281329138208887, "grad_norm": 0.4410850703716278, "learning_rate": 4.580968110161258e-06, "loss": 0.0253, "step": 93050 }, { "epoch": 1.4282863939835777, "grad_norm": 0.3879673480987549, "learning_rate": 4.578716986807784e-06, "loss": 0.0279, "step": 93060 }, { "epoch": 1.4284398741462665, "grad_norm": 0.27714166045188904, "learning_rate": 4.576466252464802e-06, "loss": 0.0254, "step": 93070 }, { "epoch": 1.4285933543089555, "grad_norm": 0.40623271465301514, "learning_rate": 4.574215907293815e-06, "loss": 0.032, "step": 93080 }, { "epoch": 1.4287468344716445, "grad_norm": 0.32372626662254333, "learning_rate": 4.5719659514562995e-06, "loss": 0.0261, "step": 93090 }, { "epoch": 1.4289003146343335, "grad_norm": 0.3389006555080414, "learning_rate": 4.569716385113701e-06, "loss": 0.0312, "step": 93100 }, { "epoch": 1.4290537947970225, "grad_norm": 0.5689564347267151, "learning_rate": 4.56746720842745e-06, "loss": 0.0288, "step": 93110 }, { "epoch": 1.4292072749597113, "grad_norm": 0.3989051878452301, "learning_rate": 4.565218421558922e-06, "loss": 0.0241, "step": 93120 }, { "epoch": 1.4293607551224004, "grad_norm": 0.41979995369911194, "learning_rate": 4.562970024669496e-06, "loss": 0.0313, "step": 93130 }, { "epoch": 1.4295142352850894, "grad_norm": 0.4755047559738159, "learning_rate": 4.560722017920503e-06, "loss": 0.029, "step": 93140 }, { "epoch": 1.4296677154477784, "grad_norm": 0.2952399253845215, "learning_rate": 4.558474401473251e-06, "loss": 0.0223, "step": 93150 }, { "epoch": 1.4298211956104674, "grad_norm": 0.39125457406044006, "learning_rate": 4.556227175489021e-06, "loss": 0.0313, "step": 93160 }, { "epoch": 1.4299746757731564, "grad_norm": 0.30963411927223206, "learning_rate": 4.553980340129065e-06, "loss": 0.0247, "step": 93170 }, { "epoch": 1.4301281559358454, "grad_norm": 0.3852275311946869, "learning_rate": 4.551733895554607e-06, "loss": 0.0245, "step": 93180 }, { "epoch": 1.4302816360985342, "grad_norm": 0.3773943781852722, "learning_rate": 4.549487841926841e-06, "loss": 0.0228, "step": 93190 }, { "epoch": 1.4304351162612232, "grad_norm": 0.42058753967285156, "learning_rate": 4.54724217940694e-06, "loss": 0.0265, "step": 93200 }, { "epoch": 1.4305885964239122, "grad_norm": 0.30683955550193787, "learning_rate": 4.544996908156046e-06, "loss": 0.0242, "step": 93210 }, { "epoch": 1.4307420765866012, "grad_norm": 0.3402666449546814, "learning_rate": 4.542752028335261e-06, "loss": 0.0249, "step": 93220 }, { "epoch": 1.4308955567492903, "grad_norm": 0.20433813333511353, "learning_rate": 4.540507540105677e-06, "loss": 0.0265, "step": 93230 }, { "epoch": 1.431049036911979, "grad_norm": 0.3295063078403473, "learning_rate": 4.538263443628349e-06, "loss": 0.0239, "step": 93240 }, { "epoch": 1.431202517074668, "grad_norm": 0.32486477494239807, "learning_rate": 4.536019739064303e-06, "loss": 0.0276, "step": 93250 }, { "epoch": 1.431355997237357, "grad_norm": 0.24363236129283905, "learning_rate": 4.53377642657454e-06, "loss": 0.0294, "step": 93260 }, { "epoch": 1.431509477400046, "grad_norm": 0.38343358039855957, "learning_rate": 4.531533506320029e-06, "loss": 0.026, "step": 93270 }, { "epoch": 1.431662957562735, "grad_norm": 0.42894020676612854, "learning_rate": 4.529290978461717e-06, "loss": 0.0342, "step": 93280 }, { "epoch": 1.4318164377254239, "grad_norm": 0.4511043429374695, "learning_rate": 4.527048843160513e-06, "loss": 0.0275, "step": 93290 }, { "epoch": 1.431969917888113, "grad_norm": 0.30978086590766907, "learning_rate": 4.524807100577312e-06, "loss": 0.0258, "step": 93300 }, { "epoch": 1.432123398050802, "grad_norm": 0.49802064895629883, "learning_rate": 4.522565750872973e-06, "loss": 0.0267, "step": 93310 }, { "epoch": 1.432276878213491, "grad_norm": 0.3135194778442383, "learning_rate": 4.520324794208315e-06, "loss": 0.0332, "step": 93320 }, { "epoch": 1.43243035837618, "grad_norm": 0.2849484980106354, "learning_rate": 4.518084230744152e-06, "loss": 0.0279, "step": 93330 }, { "epoch": 1.4325838385388687, "grad_norm": 0.42897483706474304, "learning_rate": 4.515844060641256e-06, "loss": 0.0218, "step": 93340 }, { "epoch": 1.432737318701558, "grad_norm": 0.4823782444000244, "learning_rate": 4.51360428406037e-06, "loss": 0.0265, "step": 93350 }, { "epoch": 1.4328907988642468, "grad_norm": 0.4736344814300537, "learning_rate": 4.511364901162214e-06, "loss": 0.0263, "step": 93360 }, { "epoch": 1.4330442790269358, "grad_norm": 0.4280276894569397, "learning_rate": 4.509125912107477e-06, "loss": 0.027, "step": 93370 }, { "epoch": 1.4331977591896248, "grad_norm": 0.47632479667663574, "learning_rate": 4.506887317056816e-06, "loss": 0.0318, "step": 93380 }, { "epoch": 1.4333512393523138, "grad_norm": 0.2149643748998642, "learning_rate": 4.504649116170873e-06, "loss": 0.0265, "step": 93390 }, { "epoch": 1.4335047195150028, "grad_norm": 0.3238106369972229, "learning_rate": 4.502411309610247e-06, "loss": 0.023, "step": 93400 }, { "epoch": 1.4336581996776916, "grad_norm": 0.3315191864967346, "learning_rate": 4.500173897535516e-06, "loss": 0.0189, "step": 93410 }, { "epoch": 1.4338116798403806, "grad_norm": 0.3186599016189575, "learning_rate": 4.4979368801072264e-06, "loss": 0.024, "step": 93420 }, { "epoch": 1.4339651600030696, "grad_norm": 0.29735684394836426, "learning_rate": 4.4957002574859e-06, "loss": 0.0314, "step": 93430 }, { "epoch": 1.4341186401657586, "grad_norm": 0.3425590991973877, "learning_rate": 4.493464029832028e-06, "loss": 0.0191, "step": 93440 }, { "epoch": 1.4342721203284476, "grad_norm": 0.6059077978134155, "learning_rate": 4.491228197306068e-06, "loss": 0.0336, "step": 93450 }, { "epoch": 1.4344256004911364, "grad_norm": 0.4093394875526428, "learning_rate": 4.488992760068467e-06, "loss": 0.0236, "step": 93460 }, { "epoch": 1.4345790806538254, "grad_norm": 0.23880210518836975, "learning_rate": 4.486757718279621e-06, "loss": 0.0226, "step": 93470 }, { "epoch": 1.4347325608165145, "grad_norm": 0.551936686038971, "learning_rate": 4.4845230720999065e-06, "loss": 0.0222, "step": 93480 }, { "epoch": 1.4348860409792035, "grad_norm": 0.32626673579216003, "learning_rate": 4.4822888216896824e-06, "loss": 0.0222, "step": 93490 }, { "epoch": 1.4350395211418925, "grad_norm": 0.481771320104599, "learning_rate": 4.480054967209264e-06, "loss": 0.0285, "step": 93500 }, { "epoch": 1.4351930013045813, "grad_norm": 0.6649215817451477, "learning_rate": 4.4778215088189456e-06, "loss": 0.0324, "step": 93510 }, { "epoch": 1.4353464814672703, "grad_norm": 0.5061909556388855, "learning_rate": 4.475588446678992e-06, "loss": 0.0303, "step": 93520 }, { "epoch": 1.4354999616299593, "grad_norm": 0.3755013942718506, "learning_rate": 4.473355780949639e-06, "loss": 0.0219, "step": 93530 }, { "epoch": 1.4356534417926483, "grad_norm": 0.3804945647716522, "learning_rate": 4.471123511791094e-06, "loss": 0.032, "step": 93540 }, { "epoch": 1.4358069219553373, "grad_norm": 0.28746262192726135, "learning_rate": 4.468891639363531e-06, "loss": 0.0249, "step": 93550 }, { "epoch": 1.4359604021180261, "grad_norm": 0.4659945070743561, "learning_rate": 4.466660163827112e-06, "loss": 0.0279, "step": 93560 }, { "epoch": 1.4361138822807153, "grad_norm": 0.4216717779636383, "learning_rate": 4.46442908534195e-06, "loss": 0.0251, "step": 93570 }, { "epoch": 1.4362673624434041, "grad_norm": 0.23291945457458496, "learning_rate": 4.462198404068138e-06, "loss": 0.0242, "step": 93580 }, { "epoch": 1.4364208426060932, "grad_norm": 0.3291264474391937, "learning_rate": 4.4599681201657475e-06, "loss": 0.0281, "step": 93590 }, { "epoch": 1.4365743227687822, "grad_norm": 0.49657872319221497, "learning_rate": 4.4577382337948115e-06, "loss": 0.0322, "step": 93600 }, { "epoch": 1.4367278029314712, "grad_norm": 0.2727392911911011, "learning_rate": 4.455508745115339e-06, "loss": 0.0243, "step": 93610 }, { "epoch": 1.4368812830941602, "grad_norm": 0.28711315989494324, "learning_rate": 4.453279654287309e-06, "loss": 0.0282, "step": 93620 }, { "epoch": 1.437034763256849, "grad_norm": 0.24782301485538483, "learning_rate": 4.451050961470673e-06, "loss": 0.0253, "step": 93630 }, { "epoch": 1.437188243419538, "grad_norm": 0.30515292286872864, "learning_rate": 4.4488226668253535e-06, "loss": 0.0306, "step": 93640 }, { "epoch": 1.437341723582227, "grad_norm": 0.4614463150501251, "learning_rate": 4.446594770511239e-06, "loss": 0.0274, "step": 93650 }, { "epoch": 1.437495203744916, "grad_norm": 0.3015119433403015, "learning_rate": 4.444367272688208e-06, "loss": 0.035, "step": 93660 }, { "epoch": 1.437648683907605, "grad_norm": 0.38060712814331055, "learning_rate": 4.44214017351608e-06, "loss": 0.0237, "step": 93670 }, { "epoch": 1.4378021640702938, "grad_norm": 0.35792845487594604, "learning_rate": 4.439913473154678e-06, "loss": 0.0272, "step": 93680 }, { "epoch": 1.4379556442329828, "grad_norm": 0.3370083272457123, "learning_rate": 4.437687171763775e-06, "loss": 0.0303, "step": 93690 }, { "epoch": 1.4381091243956718, "grad_norm": 0.4661017954349518, "learning_rate": 4.435461269503122e-06, "loss": 0.0331, "step": 93700 }, { "epoch": 1.4382626045583609, "grad_norm": 0.45279717445373535, "learning_rate": 4.433235766532442e-06, "loss": 0.0202, "step": 93710 }, { "epoch": 1.4384160847210499, "grad_norm": 0.4158872365951538, "learning_rate": 4.431010663011428e-06, "loss": 0.0238, "step": 93720 }, { "epoch": 1.4385695648837387, "grad_norm": 0.332120805978775, "learning_rate": 4.428785959099745e-06, "loss": 0.0195, "step": 93730 }, { "epoch": 1.4387230450464277, "grad_norm": 0.4077184796333313, "learning_rate": 4.426561654957025e-06, "loss": 0.0294, "step": 93740 }, { "epoch": 1.4388765252091167, "grad_norm": 0.445034384727478, "learning_rate": 4.424337750742884e-06, "loss": 0.0241, "step": 93750 }, { "epoch": 1.4390300053718057, "grad_norm": 0.4211241900920868, "learning_rate": 4.422114246616901e-06, "loss": 0.0291, "step": 93760 }, { "epoch": 1.4391834855344947, "grad_norm": 0.3968910872936249, "learning_rate": 4.419891142738614e-06, "loss": 0.0275, "step": 93770 }, { "epoch": 1.4393369656971835, "grad_norm": 0.3950593173503876, "learning_rate": 4.417668439267556e-06, "loss": 0.0277, "step": 93780 }, { "epoch": 1.4394904458598727, "grad_norm": 0.2614557147026062, "learning_rate": 4.415446136363216e-06, "loss": 0.0229, "step": 93790 }, { "epoch": 1.4396439260225615, "grad_norm": 0.33643868565559387, "learning_rate": 4.413224234185057e-06, "loss": 0.0307, "step": 93800 }, { "epoch": 1.4397974061852505, "grad_norm": 0.33281344175338745, "learning_rate": 4.411002732892517e-06, "loss": 0.0283, "step": 93810 }, { "epoch": 1.4399508863479396, "grad_norm": 0.334867000579834, "learning_rate": 4.408781632644998e-06, "loss": 0.0258, "step": 93820 }, { "epoch": 1.4401043665106286, "grad_norm": 0.42719346284866333, "learning_rate": 4.406560933601882e-06, "loss": 0.0287, "step": 93830 }, { "epoch": 1.4402578466733176, "grad_norm": 0.35330596566200256, "learning_rate": 4.404340635922512e-06, "loss": 0.0273, "step": 93840 }, { "epoch": 1.4404113268360064, "grad_norm": 0.31319671869277954, "learning_rate": 4.4021207397662155e-06, "loss": 0.0238, "step": 93850 }, { "epoch": 1.4405648069986954, "grad_norm": 0.452260285615921, "learning_rate": 4.399901245292284e-06, "loss": 0.0281, "step": 93860 }, { "epoch": 1.4407182871613844, "grad_norm": 0.32469266653060913, "learning_rate": 4.397682152659969e-06, "loss": 0.0276, "step": 93870 }, { "epoch": 1.4408717673240734, "grad_norm": 0.29337242245674133, "learning_rate": 4.395463462028514e-06, "loss": 0.0249, "step": 93880 }, { "epoch": 1.4410252474867624, "grad_norm": 0.3841347098350525, "learning_rate": 4.3932451735571215e-06, "loss": 0.0249, "step": 93890 }, { "epoch": 1.4411787276494512, "grad_norm": 0.3323168158531189, "learning_rate": 4.391027287404968e-06, "loss": 0.0237, "step": 93900 }, { "epoch": 1.4413322078121402, "grad_norm": 0.19745980203151703, "learning_rate": 4.388809803731197e-06, "loss": 0.0286, "step": 93910 }, { "epoch": 1.4414856879748292, "grad_norm": 0.416829913854599, "learning_rate": 4.386592722694931e-06, "loss": 0.0324, "step": 93920 }, { "epoch": 1.4416391681375182, "grad_norm": 0.24739457666873932, "learning_rate": 4.384376044455253e-06, "loss": 0.0228, "step": 93930 }, { "epoch": 1.4417926483002073, "grad_norm": 0.38027793169021606, "learning_rate": 4.382159769171231e-06, "loss": 0.0244, "step": 93940 }, { "epoch": 1.441946128462896, "grad_norm": 0.4227604866027832, "learning_rate": 4.379943897001894e-06, "loss": 0.0328, "step": 93950 }, { "epoch": 1.442099608625585, "grad_norm": 0.3863832950592041, "learning_rate": 4.377728428106242e-06, "loss": 0.0215, "step": 93960 }, { "epoch": 1.442253088788274, "grad_norm": 0.6158703565597534, "learning_rate": 4.375513362643251e-06, "loss": 0.0286, "step": 93970 }, { "epoch": 1.442406568950963, "grad_norm": 0.710709273815155, "learning_rate": 4.373298700771864e-06, "loss": 0.0273, "step": 93980 }, { "epoch": 1.442560049113652, "grad_norm": 0.38637545704841614, "learning_rate": 4.371084442650998e-06, "loss": 0.024, "step": 93990 }, { "epoch": 1.4427135292763411, "grad_norm": 0.32710206508636475, "learning_rate": 4.368870588439539e-06, "loss": 0.0238, "step": 94000 }, { "epoch": 1.4428670094390301, "grad_norm": 0.261521577835083, "learning_rate": 4.366657138296345e-06, "loss": 0.0336, "step": 94010 }, { "epoch": 1.443020489601719, "grad_norm": 0.2860495150089264, "learning_rate": 4.364444092380244e-06, "loss": 0.0246, "step": 94020 }, { "epoch": 1.443173969764408, "grad_norm": 0.4509503245353699, "learning_rate": 4.362231450850032e-06, "loss": 0.0288, "step": 94030 }, { "epoch": 1.443327449927097, "grad_norm": 0.39965569972991943, "learning_rate": 4.3600192138644885e-06, "loss": 0.028, "step": 94040 }, { "epoch": 1.443480930089786, "grad_norm": 0.5504980087280273, "learning_rate": 4.357807381582353e-06, "loss": 0.0251, "step": 94050 }, { "epoch": 1.443634410252475, "grad_norm": 0.4408370852470398, "learning_rate": 4.3555959541623286e-06, "loss": 0.0293, "step": 94060 }, { "epoch": 1.4437878904151638, "grad_norm": 0.5987091064453125, "learning_rate": 4.353384931763108e-06, "loss": 0.0315, "step": 94070 }, { "epoch": 1.4439413705778528, "grad_norm": 0.3262176215648651, "learning_rate": 4.351174314543345e-06, "loss": 0.0268, "step": 94080 }, { "epoch": 1.4440948507405418, "grad_norm": 0.2072286456823349, "learning_rate": 4.348964102661664e-06, "loss": 0.0232, "step": 94090 }, { "epoch": 1.4442483309032308, "grad_norm": 0.44755125045776367, "learning_rate": 4.3467542962766594e-06, "loss": 0.022, "step": 94100 }, { "epoch": 1.4444018110659198, "grad_norm": 0.37982177734375, "learning_rate": 4.344544895546901e-06, "loss": 0.03, "step": 94110 }, { "epoch": 1.4445552912286086, "grad_norm": 0.3205682933330536, "learning_rate": 4.342335900630925e-06, "loss": 0.0197, "step": 94120 }, { "epoch": 1.4447087713912976, "grad_norm": 0.48247864842414856, "learning_rate": 4.3401273116872375e-06, "loss": 0.0302, "step": 94130 }, { "epoch": 1.4448622515539866, "grad_norm": 0.5338312983512878, "learning_rate": 4.337919128874326e-06, "loss": 0.0296, "step": 94140 }, { "epoch": 1.4450157317166756, "grad_norm": 0.47482621669769287, "learning_rate": 4.3357113523506415e-06, "loss": 0.0316, "step": 94150 }, { "epoch": 1.4451692118793646, "grad_norm": 0.4240528643131256, "learning_rate": 4.333503982274594e-06, "loss": 0.0246, "step": 94160 }, { "epoch": 1.4453226920420534, "grad_norm": 0.3540607690811157, "learning_rate": 4.331297018804586e-06, "loss": 0.0261, "step": 94170 }, { "epoch": 1.4454761722047427, "grad_norm": 0.32795366644859314, "learning_rate": 4.3290904620989785e-06, "loss": 0.028, "step": 94180 }, { "epoch": 1.4456296523674315, "grad_norm": 0.3540031313896179, "learning_rate": 4.3268843123161016e-06, "loss": 0.029, "step": 94190 }, { "epoch": 1.4457831325301205, "grad_norm": 0.5846923589706421, "learning_rate": 4.324678569614272e-06, "loss": 0.0258, "step": 94200 }, { "epoch": 1.4459366126928095, "grad_norm": 0.3578791916370392, "learning_rate": 4.322473234151751e-06, "loss": 0.0278, "step": 94210 }, { "epoch": 1.4460900928554985, "grad_norm": 0.3083571791648865, "learning_rate": 4.320268306086789e-06, "loss": 0.0275, "step": 94220 }, { "epoch": 1.4462435730181875, "grad_norm": 0.4230840504169464, "learning_rate": 4.3180637855776075e-06, "loss": 0.0281, "step": 94230 }, { "epoch": 1.4463970531808763, "grad_norm": 0.2399006485939026, "learning_rate": 4.315859672782393e-06, "loss": 0.0279, "step": 94240 }, { "epoch": 1.4465505333435653, "grad_norm": 0.3218979835510254, "learning_rate": 4.313655967859304e-06, "loss": 0.0317, "step": 94250 }, { "epoch": 1.4467040135062543, "grad_norm": 0.35509398579597473, "learning_rate": 4.311452670966469e-06, "loss": 0.022, "step": 94260 }, { "epoch": 1.4468574936689433, "grad_norm": 0.32639145851135254, "learning_rate": 4.309249782261987e-06, "loss": 0.025, "step": 94270 }, { "epoch": 1.4470109738316324, "grad_norm": 0.34878793358802795, "learning_rate": 4.3070473019039315e-06, "loss": 0.028, "step": 94280 }, { "epoch": 1.4471644539943211, "grad_norm": 0.3730049431324005, "learning_rate": 4.304845230050338e-06, "loss": 0.0241, "step": 94290 }, { "epoch": 1.4473179341570102, "grad_norm": 0.38881197571754456, "learning_rate": 4.302643566859233e-06, "loss": 0.0228, "step": 94300 }, { "epoch": 1.4474714143196992, "grad_norm": 0.37937989830970764, "learning_rate": 4.300442312488584e-06, "loss": 0.0329, "step": 94310 }, { "epoch": 1.4476248944823882, "grad_norm": 0.23331744968891144, "learning_rate": 4.298241467096348e-06, "loss": 0.0189, "step": 94320 }, { "epoch": 1.4477783746450772, "grad_norm": 0.5112202167510986, "learning_rate": 4.296041030840456e-06, "loss": 0.0247, "step": 94330 }, { "epoch": 1.447931854807766, "grad_norm": 0.44612789154052734, "learning_rate": 4.293841003878797e-06, "loss": 0.0316, "step": 94340 }, { "epoch": 1.448085334970455, "grad_norm": 0.3745991289615631, "learning_rate": 4.29164138636924e-06, "loss": 0.0231, "step": 94350 }, { "epoch": 1.448238815133144, "grad_norm": 0.28830716013908386, "learning_rate": 4.289442178469617e-06, "loss": 0.0213, "step": 94360 }, { "epoch": 1.448392295295833, "grad_norm": 0.32391420006752014, "learning_rate": 4.287243380337739e-06, "loss": 0.0328, "step": 94370 }, { "epoch": 1.448545775458522, "grad_norm": 0.38134467601776123, "learning_rate": 4.28504499213138e-06, "loss": 0.0268, "step": 94380 }, { "epoch": 1.4486992556212108, "grad_norm": 0.2730870246887207, "learning_rate": 4.282847014008285e-06, "loss": 0.0238, "step": 94390 }, { "epoch": 1.4488527357839, "grad_norm": 0.283469557762146, "learning_rate": 4.280649446126186e-06, "loss": 0.025, "step": 94400 }, { "epoch": 1.4490062159465888, "grad_norm": 0.38036802411079407, "learning_rate": 4.278452288642759e-06, "loss": 0.037, "step": 94410 }, { "epoch": 1.4491596961092779, "grad_norm": 0.2316645234823227, "learning_rate": 4.276255541715661e-06, "loss": 0.0242, "step": 94420 }, { "epoch": 1.4493131762719669, "grad_norm": 0.25907695293426514, "learning_rate": 4.274059205502534e-06, "loss": 0.0197, "step": 94430 }, { "epoch": 1.4494666564346559, "grad_norm": 0.39878350496292114, "learning_rate": 4.271863280160974e-06, "loss": 0.0282, "step": 94440 }, { "epoch": 1.449620136597345, "grad_norm": 0.37574613094329834, "learning_rate": 4.269667765848551e-06, "loss": 0.0239, "step": 94450 }, { "epoch": 1.4497736167600337, "grad_norm": 0.3223436176776886, "learning_rate": 4.267472662722806e-06, "loss": 0.0239, "step": 94460 }, { "epoch": 1.4499270969227227, "grad_norm": 0.3537680208683014, "learning_rate": 4.265277970941253e-06, "loss": 0.025, "step": 94470 }, { "epoch": 1.4500805770854117, "grad_norm": 0.31671419739723206, "learning_rate": 4.263083690661369e-06, "loss": 0.0244, "step": 94480 }, { "epoch": 1.4502340572481007, "grad_norm": 0.45018014311790466, "learning_rate": 4.260889822040617e-06, "loss": 0.0208, "step": 94490 }, { "epoch": 1.4503875374107897, "grad_norm": 0.3229235112667084, "learning_rate": 4.2586963652364186e-06, "loss": 0.0264, "step": 94500 }, { "epoch": 1.4505410175734785, "grad_norm": 0.4943406283855438, "learning_rate": 4.256503320406158e-06, "loss": 0.0285, "step": 94510 }, { "epoch": 1.4506944977361675, "grad_norm": 0.3547275960445404, "learning_rate": 4.25431068770721e-06, "loss": 0.0238, "step": 94520 }, { "epoch": 1.4508479778988566, "grad_norm": 0.3179625868797302, "learning_rate": 4.252118467296906e-06, "loss": 0.0314, "step": 94530 }, { "epoch": 1.4510014580615456, "grad_norm": 0.47049611806869507, "learning_rate": 4.249926659332552e-06, "loss": 0.0281, "step": 94540 }, { "epoch": 1.4511549382242346, "grad_norm": 0.27437442541122437, "learning_rate": 4.247735263971423e-06, "loss": 0.0206, "step": 94550 }, { "epoch": 1.4513084183869234, "grad_norm": 0.3313382863998413, "learning_rate": 4.245544281370764e-06, "loss": 0.022, "step": 94560 }, { "epoch": 1.4514618985496124, "grad_norm": 0.3543776571750641, "learning_rate": 4.243353711687794e-06, "loss": 0.0235, "step": 94570 }, { "epoch": 1.4516153787123014, "grad_norm": 0.31056398153305054, "learning_rate": 4.241163555079696e-06, "loss": 0.029, "step": 94580 }, { "epoch": 1.4517688588749904, "grad_norm": 0.7093503475189209, "learning_rate": 4.238973811703632e-06, "loss": 0.0317, "step": 94590 }, { "epoch": 1.4519223390376794, "grad_norm": 0.24852176010608673, "learning_rate": 4.236784481716733e-06, "loss": 0.0231, "step": 94600 }, { "epoch": 1.4520758192003684, "grad_norm": 0.4079241454601288, "learning_rate": 4.234595565276082e-06, "loss": 0.0231, "step": 94610 }, { "epoch": 1.4522292993630574, "grad_norm": 0.3467811346054077, "learning_rate": 4.232407062538761e-06, "loss": 0.0209, "step": 94620 }, { "epoch": 1.4523827795257462, "grad_norm": 0.4207746982574463, "learning_rate": 4.2302189736618045e-06, "loss": 0.0266, "step": 94630 }, { "epoch": 1.4525362596884352, "grad_norm": 0.3473546802997589, "learning_rate": 4.228031298802222e-06, "loss": 0.0328, "step": 94640 }, { "epoch": 1.4526897398511243, "grad_norm": 0.29304736852645874, "learning_rate": 4.225844038116991e-06, "loss": 0.0234, "step": 94650 }, { "epoch": 1.4528432200138133, "grad_norm": 0.5280283093452454, "learning_rate": 4.223657191763062e-06, "loss": 0.0234, "step": 94660 }, { "epoch": 1.4529967001765023, "grad_norm": 0.2727784812450409, "learning_rate": 4.221470759897354e-06, "loss": 0.0195, "step": 94670 }, { "epoch": 1.453150180339191, "grad_norm": 0.3352435529232025, "learning_rate": 4.219284742676755e-06, "loss": 0.0266, "step": 94680 }, { "epoch": 1.45330366050188, "grad_norm": 0.42736151814460754, "learning_rate": 4.2170991402581296e-06, "loss": 0.0297, "step": 94690 }, { "epoch": 1.453457140664569, "grad_norm": 0.28956690430641174, "learning_rate": 4.214913952798311e-06, "loss": 0.0317, "step": 94700 }, { "epoch": 1.4536106208272581, "grad_norm": 0.3497812747955322, "learning_rate": 4.212729180454088e-06, "loss": 0.0245, "step": 94710 }, { "epoch": 1.4537641009899471, "grad_norm": 0.41191619634628296, "learning_rate": 4.2105448233822425e-06, "loss": 0.0233, "step": 94720 }, { "epoch": 1.453917581152636, "grad_norm": 0.5199651718139648, "learning_rate": 4.208360881739511e-06, "loss": 0.0325, "step": 94730 }, { "epoch": 1.454071061315325, "grad_norm": 0.39432698488235474, "learning_rate": 4.2061773556826024e-06, "loss": 0.0265, "step": 94740 }, { "epoch": 1.454224541478014, "grad_norm": 0.4850054681301117, "learning_rate": 4.203994245368208e-06, "loss": 0.0253, "step": 94750 }, { "epoch": 1.454378021640703, "grad_norm": 0.3667140007019043, "learning_rate": 4.20181155095297e-06, "loss": 0.0308, "step": 94760 }, { "epoch": 1.454531501803392, "grad_norm": 0.37186169624328613, "learning_rate": 4.199629272593509e-06, "loss": 0.0223, "step": 94770 }, { "epoch": 1.4546849819660808, "grad_norm": 0.26812058687210083, "learning_rate": 4.197447410446425e-06, "loss": 0.0344, "step": 94780 }, { "epoch": 1.45483846212877, "grad_norm": 0.560927152633667, "learning_rate": 4.195265964668274e-06, "loss": 0.0264, "step": 94790 }, { "epoch": 1.4549919422914588, "grad_norm": 0.5080040693283081, "learning_rate": 4.193084935415593e-06, "loss": 0.0308, "step": 94800 }, { "epoch": 1.4551454224541478, "grad_norm": 0.3861078917980194, "learning_rate": 4.19090432284488e-06, "loss": 0.0328, "step": 94810 }, { "epoch": 1.4552989026168368, "grad_norm": 0.3885495662689209, "learning_rate": 4.188724127112609e-06, "loss": 0.0225, "step": 94820 }, { "epoch": 1.4554523827795258, "grad_norm": 0.34655770659446716, "learning_rate": 4.186544348375222e-06, "loss": 0.0199, "step": 94830 }, { "epoch": 1.4556058629422148, "grad_norm": 0.2872280478477478, "learning_rate": 4.184364986789128e-06, "loss": 0.0225, "step": 94840 }, { "epoch": 1.4557593431049036, "grad_norm": 0.46933862566947937, "learning_rate": 4.182186042510722e-06, "loss": 0.0271, "step": 94850 }, { "epoch": 1.4559128232675926, "grad_norm": 0.3876994252204895, "learning_rate": 4.180007515696344e-06, "loss": 0.0216, "step": 94860 }, { "epoch": 1.4560663034302816, "grad_norm": 0.2817677855491638, "learning_rate": 4.177829406502316e-06, "loss": 0.025, "step": 94870 }, { "epoch": 1.4562197835929707, "grad_norm": 0.32775577902793884, "learning_rate": 4.1756517150849404e-06, "loss": 0.0283, "step": 94880 }, { "epoch": 1.4563732637556597, "grad_norm": 0.26885664463043213, "learning_rate": 4.173474441600476e-06, "loss": 0.0273, "step": 94890 }, { "epoch": 1.4565267439183485, "grad_norm": 0.233524352312088, "learning_rate": 4.1712975862051545e-06, "loss": 0.02, "step": 94900 }, { "epoch": 1.4566802240810375, "grad_norm": 0.36437171697616577, "learning_rate": 4.169121149055178e-06, "loss": 0.0261, "step": 94910 }, { "epoch": 1.4568337042437265, "grad_norm": 0.5620560050010681, "learning_rate": 4.166945130306722e-06, "loss": 0.0306, "step": 94920 }, { "epoch": 1.4569871844064155, "grad_norm": 0.3403604030609131, "learning_rate": 4.164769530115927e-06, "loss": 0.028, "step": 94930 }, { "epoch": 1.4571406645691045, "grad_norm": 0.3786468505859375, "learning_rate": 4.162594348638903e-06, "loss": 0.0223, "step": 94940 }, { "epoch": 1.4572941447317933, "grad_norm": 0.30377626419067383, "learning_rate": 4.160419586031743e-06, "loss": 0.032, "step": 94950 }, { "epoch": 1.4574476248944823, "grad_norm": 0.4721396267414093, "learning_rate": 4.158245242450489e-06, "loss": 0.0296, "step": 94960 }, { "epoch": 1.4576011050571713, "grad_norm": 0.5057615637779236, "learning_rate": 4.156071318051164e-06, "loss": 0.0317, "step": 94970 }, { "epoch": 1.4577545852198603, "grad_norm": 0.4178367853164673, "learning_rate": 4.153897812989767e-06, "loss": 0.0311, "step": 94980 }, { "epoch": 1.4579080653825494, "grad_norm": 0.45095375180244446, "learning_rate": 4.151724727422257e-06, "loss": 0.0278, "step": 94990 }, { "epoch": 1.4580615455452381, "grad_norm": 0.33069267868995667, "learning_rate": 4.149552061504567e-06, "loss": 0.0195, "step": 95000 }, { "epoch": 1.4582150257079274, "grad_norm": 0.414438933134079, "learning_rate": 4.1473798153925985e-06, "loss": 0.0274, "step": 95010 }, { "epoch": 1.4583685058706162, "grad_norm": 0.3702337443828583, "learning_rate": 4.145207989242223e-06, "loss": 0.0224, "step": 95020 }, { "epoch": 1.4585219860333052, "grad_norm": 0.3627934455871582, "learning_rate": 4.14303658320928e-06, "loss": 0.027, "step": 95030 }, { "epoch": 1.4586754661959942, "grad_norm": 0.35509929060935974, "learning_rate": 4.140865597449588e-06, "loss": 0.0242, "step": 95040 }, { "epoch": 1.4588289463586832, "grad_norm": 0.3601243793964386, "learning_rate": 4.138695032118929e-06, "loss": 0.0229, "step": 95050 }, { "epoch": 1.4589824265213722, "grad_norm": 0.4028567671775818, "learning_rate": 4.136524887373042e-06, "loss": 0.0277, "step": 95060 }, { "epoch": 1.459135906684061, "grad_norm": 0.43263736367225647, "learning_rate": 4.134355163367661e-06, "loss": 0.0267, "step": 95070 }, { "epoch": 1.45928938684675, "grad_norm": 0.3986871838569641, "learning_rate": 4.132185860258474e-06, "loss": 0.031, "step": 95080 }, { "epoch": 1.459442867009439, "grad_norm": 0.48512837290763855, "learning_rate": 4.130016978201139e-06, "loss": 0.0306, "step": 95090 }, { "epoch": 1.459596347172128, "grad_norm": 0.453756719827652, "learning_rate": 4.127848517351289e-06, "loss": 0.0261, "step": 95100 }, { "epoch": 1.459749827334817, "grad_norm": 0.28741252422332764, "learning_rate": 4.1256804778645246e-06, "loss": 0.0284, "step": 95110 }, { "epoch": 1.4599033074975059, "grad_norm": 0.376807302236557, "learning_rate": 4.123512859896414e-06, "loss": 0.0221, "step": 95120 }, { "epoch": 1.4600567876601949, "grad_norm": 0.22754567861557007, "learning_rate": 4.121345663602495e-06, "loss": 0.0327, "step": 95130 }, { "epoch": 1.4602102678228839, "grad_norm": 0.44602757692337036, "learning_rate": 4.119178889138284e-06, "loss": 0.0227, "step": 95140 }, { "epoch": 1.460363747985573, "grad_norm": 0.24872852861881256, "learning_rate": 4.1170125366592604e-06, "loss": 0.0225, "step": 95150 }, { "epoch": 1.460517228148262, "grad_norm": 0.5333297252655029, "learning_rate": 4.1148466063208635e-06, "loss": 0.0306, "step": 95160 }, { "epoch": 1.4606707083109507, "grad_norm": 0.31800347566604614, "learning_rate": 4.1126810982785225e-06, "loss": 0.0227, "step": 95170 }, { "epoch": 1.4608241884736397, "grad_norm": 0.41265738010406494, "learning_rate": 4.110516012687622e-06, "loss": 0.0277, "step": 95180 }, { "epoch": 1.4609776686363287, "grad_norm": 0.3404484987258911, "learning_rate": 4.10835134970352e-06, "loss": 0.0253, "step": 95190 }, { "epoch": 1.4611311487990177, "grad_norm": 0.664247453212738, "learning_rate": 4.1061871094815445e-06, "loss": 0.0261, "step": 95200 }, { "epoch": 1.4612846289617067, "grad_norm": 0.4511740207672119, "learning_rate": 4.104023292176994e-06, "loss": 0.0218, "step": 95210 }, { "epoch": 1.4614381091243955, "grad_norm": 0.28216883540153503, "learning_rate": 4.101859897945134e-06, "loss": 0.0331, "step": 95220 }, { "epoch": 1.4615915892870848, "grad_norm": 0.7288159728050232, "learning_rate": 4.0996969269412e-06, "loss": 0.0374, "step": 95230 }, { "epoch": 1.4617450694497736, "grad_norm": 0.21237032115459442, "learning_rate": 4.097534379320405e-06, "loss": 0.023, "step": 95240 }, { "epoch": 1.4618985496124626, "grad_norm": 0.2605668306350708, "learning_rate": 4.0953722552379236e-06, "loss": 0.0203, "step": 95250 }, { "epoch": 1.4620520297751516, "grad_norm": 0.4463762938976288, "learning_rate": 4.093210554848892e-06, "loss": 0.0326, "step": 95260 }, { "epoch": 1.4622055099378406, "grad_norm": 0.24830785393714905, "learning_rate": 4.091049278308437e-06, "loss": 0.0241, "step": 95270 }, { "epoch": 1.4623589901005296, "grad_norm": 0.3179181218147278, "learning_rate": 4.088888425771639e-06, "loss": 0.0251, "step": 95280 }, { "epoch": 1.4625124702632184, "grad_norm": 0.4209763705730438, "learning_rate": 4.0867279973935484e-06, "loss": 0.0259, "step": 95290 }, { "epoch": 1.4626659504259074, "grad_norm": 0.3756391704082489, "learning_rate": 4.0845679933292e-06, "loss": 0.0314, "step": 95300 }, { "epoch": 1.4628194305885964, "grad_norm": 0.2571677565574646, "learning_rate": 4.08240841373358e-06, "loss": 0.0205, "step": 95310 }, { "epoch": 1.4629729107512854, "grad_norm": 0.40009796619415283, "learning_rate": 4.080249258761646e-06, "loss": 0.0261, "step": 95320 }, { "epoch": 1.4631263909139745, "grad_norm": 0.21581631898880005, "learning_rate": 4.0780905285683425e-06, "loss": 0.0278, "step": 95330 }, { "epoch": 1.4632798710766632, "grad_norm": 0.30598360300064087, "learning_rate": 4.0759322233085655e-06, "loss": 0.0277, "step": 95340 }, { "epoch": 1.4634333512393523, "grad_norm": 0.6331873536109924, "learning_rate": 4.073774343137188e-06, "loss": 0.0314, "step": 95350 }, { "epoch": 1.4635868314020413, "grad_norm": 0.255513072013855, "learning_rate": 4.07161688820905e-06, "loss": 0.0206, "step": 95360 }, { "epoch": 1.4637403115647303, "grad_norm": 0.3933066725730896, "learning_rate": 4.0694598586789634e-06, "loss": 0.0357, "step": 95370 }, { "epoch": 1.4638937917274193, "grad_norm": 0.2701416313648224, "learning_rate": 4.067303254701708e-06, "loss": 0.0219, "step": 95380 }, { "epoch": 1.464047271890108, "grad_norm": 0.3523969054222107, "learning_rate": 4.06514707643203e-06, "loss": 0.0322, "step": 95390 }, { "epoch": 1.464200752052797, "grad_norm": 0.5902135372161865, "learning_rate": 4.062991324024659e-06, "loss": 0.0245, "step": 95400 }, { "epoch": 1.464354232215486, "grad_norm": 0.3383818566799164, "learning_rate": 4.060835997634273e-06, "loss": 0.0275, "step": 95410 }, { "epoch": 1.4645077123781751, "grad_norm": 0.511039137840271, "learning_rate": 4.058681097415529e-06, "loss": 0.0253, "step": 95420 }, { "epoch": 1.4646611925408641, "grad_norm": 0.5564889907836914, "learning_rate": 4.056526623523063e-06, "loss": 0.0316, "step": 95430 }, { "epoch": 1.4648146727035531, "grad_norm": 0.3438189625740051, "learning_rate": 4.054372576111466e-06, "loss": 0.0209, "step": 95440 }, { "epoch": 1.4649681528662422, "grad_norm": 0.4285798668861389, "learning_rate": 4.0522189553353075e-06, "loss": 0.0257, "step": 95450 }, { "epoch": 1.465121633028931, "grad_norm": 0.3231779932975769, "learning_rate": 4.05006576134912e-06, "loss": 0.0315, "step": 95460 }, { "epoch": 1.46527511319162, "grad_norm": 0.35437697172164917, "learning_rate": 4.047912994307411e-06, "loss": 0.0227, "step": 95470 }, { "epoch": 1.465428593354309, "grad_norm": 0.3044379949569702, "learning_rate": 4.045760654364653e-06, "loss": 0.027, "step": 95480 }, { "epoch": 1.465582073516998, "grad_norm": 0.15278229117393494, "learning_rate": 4.043608741675287e-06, "loss": 0.024, "step": 95490 }, { "epoch": 1.465735553679687, "grad_norm": 0.3408207297325134, "learning_rate": 4.041457256393735e-06, "loss": 0.0287, "step": 95500 }, { "epoch": 1.4658890338423758, "grad_norm": 0.32000619173049927, "learning_rate": 4.039306198674371e-06, "loss": 0.0276, "step": 95510 }, { "epoch": 1.4660425140050648, "grad_norm": 0.3625611662864685, "learning_rate": 4.037155568671546e-06, "loss": 0.0292, "step": 95520 }, { "epoch": 1.4661959941677538, "grad_norm": 0.32540470361709595, "learning_rate": 4.035005366539587e-06, "loss": 0.021, "step": 95530 }, { "epoch": 1.4663494743304428, "grad_norm": 0.5013212561607361, "learning_rate": 4.032855592432782e-06, "loss": 0.0321, "step": 95540 }, { "epoch": 1.4665029544931318, "grad_norm": 0.2331312596797943, "learning_rate": 4.030706246505389e-06, "loss": 0.0232, "step": 95550 }, { "epoch": 1.4666564346558206, "grad_norm": 0.4889649748802185, "learning_rate": 4.028557328911639e-06, "loss": 0.0323, "step": 95560 }, { "epoch": 1.4668099148185096, "grad_norm": 0.6060714721679688, "learning_rate": 4.026408839805729e-06, "loss": 0.0287, "step": 95570 }, { "epoch": 1.4669633949811987, "grad_norm": 0.45641592144966125, "learning_rate": 4.024260779341822e-06, "loss": 0.0331, "step": 95580 }, { "epoch": 1.4671168751438877, "grad_norm": 0.3527994453907013, "learning_rate": 4.0221131476740616e-06, "loss": 0.0263, "step": 95590 }, { "epoch": 1.4672703553065767, "grad_norm": 0.25043076276779175, "learning_rate": 4.019965944956556e-06, "loss": 0.0251, "step": 95600 }, { "epoch": 1.4674238354692655, "grad_norm": 0.332177072763443, "learning_rate": 4.017819171343367e-06, "loss": 0.0295, "step": 95610 }, { "epoch": 1.4675773156319547, "grad_norm": 0.328651487827301, "learning_rate": 4.01567282698855e-06, "loss": 0.0223, "step": 95620 }, { "epoch": 1.4677307957946435, "grad_norm": 0.34088441729545593, "learning_rate": 4.0135269120461175e-06, "loss": 0.0247, "step": 95630 }, { "epoch": 1.4678842759573325, "grad_norm": 0.41544824838638306, "learning_rate": 4.0113814266700475e-06, "loss": 0.0241, "step": 95640 }, { "epoch": 1.4680377561200215, "grad_norm": 0.45347803831100464, "learning_rate": 4.009236371014297e-06, "loss": 0.0307, "step": 95650 }, { "epoch": 1.4681912362827105, "grad_norm": 0.4711955785751343, "learning_rate": 4.007091745232782e-06, "loss": 0.0238, "step": 95660 }, { "epoch": 1.4683447164453995, "grad_norm": 0.3660258650779724, "learning_rate": 4.004947549479395e-06, "loss": 0.0248, "step": 95670 }, { "epoch": 1.4684981966080883, "grad_norm": 0.29417943954467773, "learning_rate": 4.002803783907994e-06, "loss": 0.0179, "step": 95680 }, { "epoch": 1.4686516767707773, "grad_norm": 0.2826808989048004, "learning_rate": 4.00066044867241e-06, "loss": 0.024, "step": 95690 }, { "epoch": 1.4688051569334664, "grad_norm": 0.39229774475097656, "learning_rate": 3.998517543926442e-06, "loss": 0.032, "step": 95700 }, { "epoch": 1.4689586370961554, "grad_norm": 0.43346402049064636, "learning_rate": 3.996375069823849e-06, "loss": 0.0334, "step": 95710 }, { "epoch": 1.4691121172588444, "grad_norm": 0.48091891407966614, "learning_rate": 3.994233026518374e-06, "loss": 0.0344, "step": 95720 }, { "epoch": 1.4692655974215332, "grad_norm": 0.2741861045360565, "learning_rate": 3.9920914141637195e-06, "loss": 0.0278, "step": 95730 }, { "epoch": 1.4694190775842222, "grad_norm": 0.3964294195175171, "learning_rate": 3.989950232913558e-06, "loss": 0.0315, "step": 95740 }, { "epoch": 1.4695725577469112, "grad_norm": 0.270384818315506, "learning_rate": 3.987809482921534e-06, "loss": 0.0267, "step": 95750 }, { "epoch": 1.4697260379096002, "grad_norm": 0.458617627620697, "learning_rate": 3.9856691643412604e-06, "loss": 0.0327, "step": 95760 }, { "epoch": 1.4698795180722892, "grad_norm": 0.2914741039276123, "learning_rate": 3.9835292773263155e-06, "loss": 0.0274, "step": 95770 }, { "epoch": 1.470032998234978, "grad_norm": 0.3881351351737976, "learning_rate": 3.981389822030249e-06, "loss": 0.0427, "step": 95780 }, { "epoch": 1.470186478397667, "grad_norm": 0.4608166515827179, "learning_rate": 3.979250798606585e-06, "loss": 0.0276, "step": 95790 }, { "epoch": 1.470339958560356, "grad_norm": 0.35215696692466736, "learning_rate": 3.977112207208811e-06, "loss": 0.0331, "step": 95800 }, { "epoch": 1.470493438723045, "grad_norm": 0.3653700351715088, "learning_rate": 3.974974047990375e-06, "loss": 0.0328, "step": 95810 }, { "epoch": 1.470646918885734, "grad_norm": 0.4179282784461975, "learning_rate": 3.972836321104714e-06, "loss": 0.026, "step": 95820 }, { "epoch": 1.4708003990484229, "grad_norm": 0.3990766108036041, "learning_rate": 3.970699026705218e-06, "loss": 0.0302, "step": 95830 }, { "epoch": 1.470953879211112, "grad_norm": 0.4133155643939972, "learning_rate": 3.968562164945249e-06, "loss": 0.0303, "step": 95840 }, { "epoch": 1.4711073593738009, "grad_norm": 0.37896957993507385, "learning_rate": 3.96642573597815e-06, "loss": 0.0277, "step": 95850 }, { "epoch": 1.47126083953649, "grad_norm": 0.2864632308483124, "learning_rate": 3.964289739957212e-06, "loss": 0.0219, "step": 95860 }, { "epoch": 1.471414319699179, "grad_norm": 0.4178611636161804, "learning_rate": 3.962154177035707e-06, "loss": 0.0301, "step": 95870 }, { "epoch": 1.471567799861868, "grad_norm": 0.2494952380657196, "learning_rate": 3.96001904736688e-06, "loss": 0.0229, "step": 95880 }, { "epoch": 1.471721280024557, "grad_norm": 0.42078056931495667, "learning_rate": 3.957884351103938e-06, "loss": 0.0238, "step": 95890 }, { "epoch": 1.4718747601872457, "grad_norm": 0.4904268682003021, "learning_rate": 3.9557500884000576e-06, "loss": 0.0285, "step": 95900 }, { "epoch": 1.4720282403499347, "grad_norm": 0.4398101270198822, "learning_rate": 3.953616259408386e-06, "loss": 0.0239, "step": 95910 }, { "epoch": 1.4721817205126237, "grad_norm": 0.3093396723270416, "learning_rate": 3.951482864282038e-06, "loss": 0.0349, "step": 95920 }, { "epoch": 1.4723352006753128, "grad_norm": 0.39094141125679016, "learning_rate": 3.949349903174098e-06, "loss": 0.0233, "step": 95930 }, { "epoch": 1.4724886808380018, "grad_norm": 0.347539484500885, "learning_rate": 3.947217376237616e-06, "loss": 0.0247, "step": 95940 }, { "epoch": 1.4726421610006906, "grad_norm": 0.4800645112991333, "learning_rate": 3.945085283625625e-06, "loss": 0.0284, "step": 95950 }, { "epoch": 1.4727956411633796, "grad_norm": 0.4310096204280853, "learning_rate": 3.942953625491103e-06, "loss": 0.0279, "step": 95960 }, { "epoch": 1.4729491213260686, "grad_norm": 0.4027342200279236, "learning_rate": 3.940822401987011e-06, "loss": 0.0232, "step": 95970 }, { "epoch": 1.4731026014887576, "grad_norm": 0.16349951922893524, "learning_rate": 3.938691613266285e-06, "loss": 0.0262, "step": 95980 }, { "epoch": 1.4732560816514466, "grad_norm": 0.36161008477211, "learning_rate": 3.936561259481819e-06, "loss": 0.0364, "step": 95990 }, { "epoch": 1.4734095618141354, "grad_norm": 0.4446753263473511, "learning_rate": 3.9344313407864774e-06, "loss": 0.0239, "step": 96000 }, { "epoch": 1.4735630419768244, "grad_norm": 0.3331998586654663, "learning_rate": 3.9323018573330964e-06, "loss": 0.0232, "step": 96010 }, { "epoch": 1.4737165221395134, "grad_norm": 0.5407664775848389, "learning_rate": 3.9301728092744785e-06, "loss": 0.0349, "step": 96020 }, { "epoch": 1.4738700023022024, "grad_norm": 0.40443184971809387, "learning_rate": 3.928044196763396e-06, "loss": 0.0207, "step": 96030 }, { "epoch": 1.4740234824648915, "grad_norm": 0.4146654009819031, "learning_rate": 3.925916019952588e-06, "loss": 0.0297, "step": 96040 }, { "epoch": 1.4741769626275805, "grad_norm": 0.3542630970478058, "learning_rate": 3.923788278994775e-06, "loss": 0.0257, "step": 96050 }, { "epoch": 1.4743304427902695, "grad_norm": 0.4833475351333618, "learning_rate": 3.921660974042621e-06, "loss": 0.0339, "step": 96060 }, { "epoch": 1.4744839229529583, "grad_norm": 0.3227700889110565, "learning_rate": 3.919534105248778e-06, "loss": 0.0221, "step": 96070 }, { "epoch": 1.4746374031156473, "grad_norm": 0.37724435329437256, "learning_rate": 3.9174076727658676e-06, "loss": 0.0253, "step": 96080 }, { "epoch": 1.4747908832783363, "grad_norm": 0.2994006872177124, "learning_rate": 3.915281676746469e-06, "loss": 0.0236, "step": 96090 }, { "epoch": 1.4749443634410253, "grad_norm": 0.4312734603881836, "learning_rate": 3.913156117343137e-06, "loss": 0.0284, "step": 96100 }, { "epoch": 1.4750978436037143, "grad_norm": 0.5015597939491272, "learning_rate": 3.911030994708395e-06, "loss": 0.0288, "step": 96110 }, { "epoch": 1.475251323766403, "grad_norm": 0.38029107451438904, "learning_rate": 3.908906308994731e-06, "loss": 0.032, "step": 96120 }, { "epoch": 1.4754048039290921, "grad_norm": 0.40569576621055603, "learning_rate": 3.906782060354603e-06, "loss": 0.0263, "step": 96130 }, { "epoch": 1.4755582840917811, "grad_norm": 0.2906959354877472, "learning_rate": 3.904658248940444e-06, "loss": 0.0231, "step": 96140 }, { "epoch": 1.4757117642544701, "grad_norm": 0.21740089356899261, "learning_rate": 3.902534874904652e-06, "loss": 0.0284, "step": 96150 }, { "epoch": 1.4758652444171592, "grad_norm": 0.43404850363731384, "learning_rate": 3.900411938399581e-06, "loss": 0.0287, "step": 96160 }, { "epoch": 1.476018724579848, "grad_norm": 0.2577831745147705, "learning_rate": 3.898289439577575e-06, "loss": 0.0249, "step": 96170 }, { "epoch": 1.476172204742537, "grad_norm": 0.3047882318496704, "learning_rate": 3.896167378590935e-06, "loss": 0.0223, "step": 96180 }, { "epoch": 1.476325684905226, "grad_norm": 0.6151255369186401, "learning_rate": 3.894045755591929e-06, "loss": 0.0353, "step": 96190 }, { "epoch": 1.476479165067915, "grad_norm": 0.431338369846344, "learning_rate": 3.891924570732798e-06, "loss": 0.0285, "step": 96200 }, { "epoch": 1.476632645230604, "grad_norm": 0.23162825405597687, "learning_rate": 3.88980382416575e-06, "loss": 0.0361, "step": 96210 }, { "epoch": 1.4767861253932928, "grad_norm": 0.36132532358169556, "learning_rate": 3.8876835160429615e-06, "loss": 0.0263, "step": 96220 }, { "epoch": 1.476939605555982, "grad_norm": 0.34917572140693665, "learning_rate": 3.885563646516575e-06, "loss": 0.0256, "step": 96230 }, { "epoch": 1.4770930857186708, "grad_norm": 0.3213818371295929, "learning_rate": 3.88344421573871e-06, "loss": 0.0232, "step": 96240 }, { "epoch": 1.4772465658813598, "grad_norm": 0.47029930353164673, "learning_rate": 3.88132522386145e-06, "loss": 0.0313, "step": 96250 }, { "epoch": 1.4774000460440488, "grad_norm": 0.4233125150203705, "learning_rate": 3.879206671036833e-06, "loss": 0.0235, "step": 96260 }, { "epoch": 1.4775535262067379, "grad_norm": 0.4080733358860016, "learning_rate": 3.877088557416891e-06, "loss": 0.0218, "step": 96270 }, { "epoch": 1.4777070063694269, "grad_norm": 0.4213360548019409, "learning_rate": 3.874970883153607e-06, "loss": 0.0314, "step": 96280 }, { "epoch": 1.4778604865321157, "grad_norm": 0.31116029620170593, "learning_rate": 3.872853648398938e-06, "loss": 0.0314, "step": 96290 }, { "epoch": 1.4780139666948047, "grad_norm": 0.2763354480266571, "learning_rate": 3.870736853304808e-06, "loss": 0.0311, "step": 96300 }, { "epoch": 1.4781674468574937, "grad_norm": 0.2989732623100281, "learning_rate": 3.8686204980231114e-06, "loss": 0.035, "step": 96310 }, { "epoch": 1.4783209270201827, "grad_norm": 0.2992692291736603, "learning_rate": 3.866504582705708e-06, "loss": 0.0282, "step": 96320 }, { "epoch": 1.4784744071828717, "grad_norm": 0.2611199617385864, "learning_rate": 3.864389107504425e-06, "loss": 0.026, "step": 96330 }, { "epoch": 1.4786278873455605, "grad_norm": 0.4621647894382477, "learning_rate": 3.862274072571068e-06, "loss": 0.0276, "step": 96340 }, { "epoch": 1.4787813675082495, "grad_norm": 0.6851842403411865, "learning_rate": 3.860159478057404e-06, "loss": 0.0271, "step": 96350 }, { "epoch": 1.4789348476709385, "grad_norm": 0.41791442036628723, "learning_rate": 3.8580453241151565e-06, "loss": 0.0238, "step": 96360 }, { "epoch": 1.4790883278336275, "grad_norm": 0.3902296721935272, "learning_rate": 3.85593161089604e-06, "loss": 0.0267, "step": 96370 }, { "epoch": 1.4792418079963165, "grad_norm": 0.35989636182785034, "learning_rate": 3.853818338551724e-06, "loss": 0.0214, "step": 96380 }, { "epoch": 1.4793952881590053, "grad_norm": 0.21793986856937408, "learning_rate": 3.8517055072338434e-06, "loss": 0.0184, "step": 96390 }, { "epoch": 1.4795487683216944, "grad_norm": 0.36768507957458496, "learning_rate": 3.8495931170940205e-06, "loss": 0.0311, "step": 96400 }, { "epoch": 1.4797022484843834, "grad_norm": 0.3784051239490509, "learning_rate": 3.84748116828382e-06, "loss": 0.031, "step": 96410 }, { "epoch": 1.4798557286470724, "grad_norm": 0.4120658338069916, "learning_rate": 3.845369660954785e-06, "loss": 0.0239, "step": 96420 }, { "epoch": 1.4800092088097614, "grad_norm": 0.35521194338798523, "learning_rate": 3.84325859525844e-06, "loss": 0.0255, "step": 96430 }, { "epoch": 1.4801626889724502, "grad_norm": 0.2306724637746811, "learning_rate": 3.841147971346262e-06, "loss": 0.0238, "step": 96440 }, { "epoch": 1.4803161691351394, "grad_norm": 0.5131732225418091, "learning_rate": 3.839037789369701e-06, "loss": 0.0357, "step": 96450 }, { "epoch": 1.4804696492978282, "grad_norm": 0.5414533615112305, "learning_rate": 3.836928049480175e-06, "loss": 0.036, "step": 96460 }, { "epoch": 1.4806231294605172, "grad_norm": 0.3499533534049988, "learning_rate": 3.8348187518290725e-06, "loss": 0.0217, "step": 96470 }, { "epoch": 1.4807766096232062, "grad_norm": 0.40723639726638794, "learning_rate": 3.832709896567747e-06, "loss": 0.0312, "step": 96480 }, { "epoch": 1.4809300897858952, "grad_norm": 0.38342106342315674, "learning_rate": 3.830601483847519e-06, "loss": 0.0249, "step": 96490 }, { "epoch": 1.4810835699485843, "grad_norm": 0.3690912127494812, "learning_rate": 3.828493513819691e-06, "loss": 0.0301, "step": 96500 }, { "epoch": 1.481237050111273, "grad_norm": 0.25543954968452454, "learning_rate": 3.826385986635511e-06, "loss": 0.0361, "step": 96510 }, { "epoch": 1.481390530273962, "grad_norm": 0.5405575037002563, "learning_rate": 3.82427890244621e-06, "loss": 0.0257, "step": 96520 }, { "epoch": 1.481544010436651, "grad_norm": 0.3982170522212982, "learning_rate": 3.8221722614029874e-06, "loss": 0.0272, "step": 96530 }, { "epoch": 1.48169749059934, "grad_norm": 0.35121431946754456, "learning_rate": 3.820066063657007e-06, "loss": 0.0338, "step": 96540 }, { "epoch": 1.481850970762029, "grad_norm": 0.5723122954368591, "learning_rate": 3.817960309359402e-06, "loss": 0.0288, "step": 96550 }, { "epoch": 1.4820044509247179, "grad_norm": 0.3474843502044678, "learning_rate": 3.81585499866127e-06, "loss": 0.0283, "step": 96560 }, { "epoch": 1.482157931087407, "grad_norm": 0.3299333155155182, "learning_rate": 3.8137501317136827e-06, "loss": 0.0245, "step": 96570 }, { "epoch": 1.482311411250096, "grad_norm": 0.46930599212646484, "learning_rate": 3.811645708667676e-06, "loss": 0.0242, "step": 96580 }, { "epoch": 1.482464891412785, "grad_norm": 0.40946701169013977, "learning_rate": 3.8095417296742532e-06, "loss": 0.0259, "step": 96590 }, { "epoch": 1.482618371575474, "grad_norm": 0.42032620310783386, "learning_rate": 3.8074381948843964e-06, "loss": 0.0317, "step": 96600 }, { "epoch": 1.4827718517381627, "grad_norm": 0.6797640919685364, "learning_rate": 3.8053351044490385e-06, "loss": 0.0245, "step": 96610 }, { "epoch": 1.4829253319008517, "grad_norm": 0.2469414621591568, "learning_rate": 3.8032324585190873e-06, "loss": 0.0198, "step": 96620 }, { "epoch": 1.4830788120635408, "grad_norm": 0.436333566904068, "learning_rate": 3.801130257245429e-06, "loss": 0.0295, "step": 96630 }, { "epoch": 1.4832322922262298, "grad_norm": 0.3842358887195587, "learning_rate": 3.7990285007789053e-06, "loss": 0.0209, "step": 96640 }, { "epoch": 1.4833857723889188, "grad_norm": 0.37083086371421814, "learning_rate": 3.796927189270331e-06, "loss": 0.0304, "step": 96650 }, { "epoch": 1.4835392525516076, "grad_norm": 0.294315904378891, "learning_rate": 3.7948263228704873e-06, "loss": 0.0268, "step": 96660 }, { "epoch": 1.4836927327142968, "grad_norm": 0.3327304422855377, "learning_rate": 3.7927259017301254e-06, "loss": 0.0302, "step": 96670 }, { "epoch": 1.4838462128769856, "grad_norm": 0.4409524202346802, "learning_rate": 3.790625925999959e-06, "loss": 0.0297, "step": 96680 }, { "epoch": 1.4839996930396746, "grad_norm": 0.3613837957382202, "learning_rate": 3.7885263958306818e-06, "loss": 0.0188, "step": 96690 }, { "epoch": 1.4841531732023636, "grad_norm": 0.44526153802871704, "learning_rate": 3.7864273113729464e-06, "loss": 0.0238, "step": 96700 }, { "epoch": 1.4843066533650526, "grad_norm": 0.3678479790687561, "learning_rate": 3.7843286727773676e-06, "loss": 0.0235, "step": 96710 }, { "epoch": 1.4844601335277416, "grad_norm": 0.35945338010787964, "learning_rate": 3.782230480194544e-06, "loss": 0.0268, "step": 96720 }, { "epoch": 1.4846136136904304, "grad_norm": 0.3345644176006317, "learning_rate": 3.7801327337750306e-06, "loss": 0.0267, "step": 96730 }, { "epoch": 1.4847670938531194, "grad_norm": 0.30056893825531006, "learning_rate": 3.778035433669355e-06, "loss": 0.0245, "step": 96740 }, { "epoch": 1.4849205740158085, "grad_norm": 0.33075299859046936, "learning_rate": 3.775938580028009e-06, "loss": 0.0263, "step": 96750 }, { "epoch": 1.4850740541784975, "grad_norm": 0.375466525554657, "learning_rate": 3.7738421730014562e-06, "loss": 0.0252, "step": 96760 }, { "epoch": 1.4852275343411865, "grad_norm": 0.5298712849617004, "learning_rate": 3.7717462127401274e-06, "loss": 0.028, "step": 96770 }, { "epoch": 1.4853810145038753, "grad_norm": 0.40976041555404663, "learning_rate": 3.769650699394416e-06, "loss": 0.0242, "step": 96780 }, { "epoch": 1.4855344946665643, "grad_norm": 0.40260469913482666, "learning_rate": 3.7675556331146955e-06, "loss": 0.0251, "step": 96790 }, { "epoch": 1.4856879748292533, "grad_norm": 0.3501703143119812, "learning_rate": 3.7654610140513006e-06, "loss": 0.0207, "step": 96800 }, { "epoch": 1.4858414549919423, "grad_norm": 0.28343087434768677, "learning_rate": 3.763366842354521e-06, "loss": 0.0286, "step": 96810 }, { "epoch": 1.4859949351546313, "grad_norm": 0.22466649115085602, "learning_rate": 3.7612731181746374e-06, "loss": 0.0267, "step": 96820 }, { "epoch": 1.4861484153173201, "grad_norm": 0.6265796422958374, "learning_rate": 3.759179841661885e-06, "loss": 0.0316, "step": 96830 }, { "epoch": 1.4863018954800091, "grad_norm": 0.34030357003211975, "learning_rate": 3.7570870129664683e-06, "loss": 0.0264, "step": 96840 }, { "epoch": 1.4864553756426981, "grad_norm": 0.3088202476501465, "learning_rate": 3.754994632238561e-06, "loss": 0.0276, "step": 96850 }, { "epoch": 1.4866088558053872, "grad_norm": 0.27171722054481506, "learning_rate": 3.7529026996283047e-06, "loss": 0.0286, "step": 96860 }, { "epoch": 1.4867623359680762, "grad_norm": 0.40752318501472473, "learning_rate": 3.7508112152858066e-06, "loss": 0.025, "step": 96870 }, { "epoch": 1.4869158161307652, "grad_norm": 0.440464049577713, "learning_rate": 3.748720179361143e-06, "loss": 0.0309, "step": 96880 }, { "epoch": 1.4870692962934542, "grad_norm": 0.3557467460632324, "learning_rate": 3.746629592004364e-06, "loss": 0.0275, "step": 96890 }, { "epoch": 1.487222776456143, "grad_norm": 0.31955111026763916, "learning_rate": 3.744539453365482e-06, "loss": 0.0246, "step": 96900 }, { "epoch": 1.487376256618832, "grad_norm": 0.3408445119857788, "learning_rate": 3.742449763594467e-06, "loss": 0.0292, "step": 96910 }, { "epoch": 1.487529736781521, "grad_norm": 0.5062082409858704, "learning_rate": 3.7403605228412777e-06, "loss": 0.0272, "step": 96920 }, { "epoch": 1.48768321694421, "grad_norm": 0.27950188517570496, "learning_rate": 3.738271731255827e-06, "loss": 0.0275, "step": 96930 }, { "epoch": 1.487836697106899, "grad_norm": 0.47050344944000244, "learning_rate": 3.736183388987994e-06, "loss": 0.0379, "step": 96940 }, { "epoch": 1.4879901772695878, "grad_norm": 0.29722949862480164, "learning_rate": 3.734095496187642e-06, "loss": 0.0219, "step": 96950 }, { "epoch": 1.4881436574322768, "grad_norm": 0.315775066614151, "learning_rate": 3.7320080530045777e-06, "loss": 0.0244, "step": 96960 }, { "epoch": 1.4882971375949658, "grad_norm": 0.42439860105514526, "learning_rate": 3.7299210595885904e-06, "loss": 0.0244, "step": 96970 }, { "epoch": 1.4884506177576549, "grad_norm": 0.23504994809627533, "learning_rate": 3.7278345160894403e-06, "loss": 0.0268, "step": 96980 }, { "epoch": 1.4886040979203439, "grad_norm": 0.36797478795051575, "learning_rate": 3.7257484226568476e-06, "loss": 0.0252, "step": 96990 }, { "epoch": 1.4887575780830327, "grad_norm": 0.4380282759666443, "learning_rate": 3.7236627794405e-06, "loss": 0.0264, "step": 97000 }, { "epoch": 1.4889110582457217, "grad_norm": 0.39057305455207825, "learning_rate": 3.721577586590057e-06, "loss": 0.0256, "step": 97010 }, { "epoch": 1.4890645384084107, "grad_norm": 0.3853476643562317, "learning_rate": 3.7194928442551437e-06, "loss": 0.0239, "step": 97020 }, { "epoch": 1.4892180185710997, "grad_norm": 0.2707424461841583, "learning_rate": 3.717408552585353e-06, "loss": 0.0266, "step": 97030 }, { "epoch": 1.4893714987337887, "grad_norm": 0.36692652106285095, "learning_rate": 3.7153247117302426e-06, "loss": 0.0298, "step": 97040 }, { "epoch": 1.4895249788964775, "grad_norm": 0.34142741560935974, "learning_rate": 3.7132413218393514e-06, "loss": 0.0222, "step": 97050 }, { "epoch": 1.4896784590591667, "grad_norm": 0.2843204736709595, "learning_rate": 3.7111583830621647e-06, "loss": 0.0313, "step": 97060 }, { "epoch": 1.4898319392218555, "grad_norm": 0.2811107039451599, "learning_rate": 3.7090758955481465e-06, "loss": 0.0255, "step": 97070 }, { "epoch": 1.4899854193845445, "grad_norm": 0.31705576181411743, "learning_rate": 3.7069938594467346e-06, "loss": 0.0217, "step": 97080 }, { "epoch": 1.4901388995472336, "grad_norm": 0.3659546971321106, "learning_rate": 3.7049122749073253e-06, "loss": 0.0276, "step": 97090 }, { "epoch": 1.4902923797099226, "grad_norm": 0.3260783553123474, "learning_rate": 3.702831142079284e-06, "loss": 0.0238, "step": 97100 }, { "epoch": 1.4904458598726116, "grad_norm": 0.40961405634880066, "learning_rate": 3.700750461111945e-06, "loss": 0.0246, "step": 97110 }, { "epoch": 1.4905993400353004, "grad_norm": 0.4073597490787506, "learning_rate": 3.698670232154611e-06, "loss": 0.029, "step": 97120 }, { "epoch": 1.4907528201979894, "grad_norm": 0.32760387659072876, "learning_rate": 3.6965904553565514e-06, "loss": 0.0256, "step": 97130 }, { "epoch": 1.4909063003606784, "grad_norm": 0.316034197807312, "learning_rate": 3.694511130867001e-06, "loss": 0.0231, "step": 97140 }, { "epoch": 1.4910597805233674, "grad_norm": 0.35269272327423096, "learning_rate": 3.692432258835166e-06, "loss": 0.0336, "step": 97150 }, { "epoch": 1.4912132606860564, "grad_norm": 0.3350866734981537, "learning_rate": 3.6903538394102166e-06, "loss": 0.0253, "step": 97160 }, { "epoch": 1.4913667408487452, "grad_norm": 0.45669975876808167, "learning_rate": 3.6882758727412915e-06, "loss": 0.0259, "step": 97170 }, { "epoch": 1.4915202210114342, "grad_norm": 0.26285091042518616, "learning_rate": 3.686198358977502e-06, "loss": 0.0292, "step": 97180 }, { "epoch": 1.4916737011741232, "grad_norm": 0.34479397535324097, "learning_rate": 3.6841212982679233e-06, "loss": 0.0352, "step": 97190 }, { "epoch": 1.4918271813368122, "grad_norm": 0.41116273403167725, "learning_rate": 3.682044690761587e-06, "loss": 0.0301, "step": 97200 }, { "epoch": 1.4919806614995013, "grad_norm": 0.3626532554626465, "learning_rate": 3.6799685366075133e-06, "loss": 0.0287, "step": 97210 }, { "epoch": 1.49213414166219, "grad_norm": 0.22067095339298248, "learning_rate": 3.6778928359546764e-06, "loss": 0.0216, "step": 97220 }, { "epoch": 1.492287621824879, "grad_norm": 0.26746347546577454, "learning_rate": 3.675817588952014e-06, "loss": 0.0337, "step": 97230 }, { "epoch": 1.492441101987568, "grad_norm": 0.4001575708389282, "learning_rate": 3.6737427957484506e-06, "loss": 0.023, "step": 97240 }, { "epoch": 1.492594582150257, "grad_norm": 0.28910598158836365, "learning_rate": 3.671668456492856e-06, "loss": 0.026, "step": 97250 }, { "epoch": 1.492748062312946, "grad_norm": 0.42525333166122437, "learning_rate": 3.6695945713340743e-06, "loss": 0.0264, "step": 97260 }, { "epoch": 1.492901542475635, "grad_norm": 0.4153394401073456, "learning_rate": 3.6675211404209276e-06, "loss": 0.0224, "step": 97270 }, { "epoch": 1.4930550226383241, "grad_norm": 0.35102298855781555, "learning_rate": 3.6654481639021955e-06, "loss": 0.0268, "step": 97280 }, { "epoch": 1.493208502801013, "grad_norm": 0.2271663397550583, "learning_rate": 3.6633756419266244e-06, "loss": 0.0241, "step": 97290 }, { "epoch": 1.493361982963702, "grad_norm": 0.4584832787513733, "learning_rate": 3.6613035746429302e-06, "loss": 0.0314, "step": 97300 }, { "epoch": 1.493515463126391, "grad_norm": 0.24111656844615936, "learning_rate": 3.6592319621997996e-06, "loss": 0.0199, "step": 97310 }, { "epoch": 1.49366894328908, "grad_norm": 0.3614368438720703, "learning_rate": 3.6571608047458807e-06, "loss": 0.0254, "step": 97320 }, { "epoch": 1.493822423451769, "grad_norm": 0.39962050318717957, "learning_rate": 3.6550901024297903e-06, "loss": 0.0256, "step": 97330 }, { "epoch": 1.4939759036144578, "grad_norm": 0.2962552607059479, "learning_rate": 3.653019855400123e-06, "loss": 0.0256, "step": 97340 }, { "epoch": 1.4941293837771468, "grad_norm": 0.365833044052124, "learning_rate": 3.6509500638054217e-06, "loss": 0.0225, "step": 97350 }, { "epoch": 1.4942828639398358, "grad_norm": 0.32414689660072327, "learning_rate": 3.6488807277942073e-06, "loss": 0.0214, "step": 97360 }, { "epoch": 1.4944363441025248, "grad_norm": 0.25520721077919006, "learning_rate": 3.646811847514974e-06, "loss": 0.0278, "step": 97370 }, { "epoch": 1.4945898242652138, "grad_norm": 0.3120793402194977, "learning_rate": 3.6447434231161728e-06, "loss": 0.0263, "step": 97380 }, { "epoch": 1.4947433044279026, "grad_norm": 0.26855847239494324, "learning_rate": 3.642675454746227e-06, "loss": 0.022, "step": 97390 }, { "epoch": 1.4948967845905916, "grad_norm": 0.4819706082344055, "learning_rate": 3.6406079425535247e-06, "loss": 0.0297, "step": 97400 }, { "epoch": 1.4950502647532806, "grad_norm": 0.3057165741920471, "learning_rate": 3.6385408866864248e-06, "loss": 0.0233, "step": 97410 }, { "epoch": 1.4952037449159696, "grad_norm": 0.3718501627445221, "learning_rate": 3.636474287293249e-06, "loss": 0.0269, "step": 97420 }, { "epoch": 1.4953572250786586, "grad_norm": 0.29076650738716125, "learning_rate": 3.634408144522286e-06, "loss": 0.0288, "step": 97430 }, { "epoch": 1.4955107052413474, "grad_norm": 0.3417263925075531, "learning_rate": 3.632342458521805e-06, "loss": 0.0213, "step": 97440 }, { "epoch": 1.4956641854040365, "grad_norm": 0.2654634118080139, "learning_rate": 3.63027722944002e-06, "loss": 0.0258, "step": 97450 }, { "epoch": 1.4958176655667255, "grad_norm": 0.3590376079082489, "learning_rate": 3.6282124574251267e-06, "loss": 0.025, "step": 97460 }, { "epoch": 1.4959711457294145, "grad_norm": 0.3997679054737091, "learning_rate": 3.626148142625289e-06, "loss": 0.0241, "step": 97470 }, { "epoch": 1.4961246258921035, "grad_norm": 0.43111559748649597, "learning_rate": 3.6240842851886328e-06, "loss": 0.0236, "step": 97480 }, { "epoch": 1.4962781060547925, "grad_norm": 0.22530630230903625, "learning_rate": 3.622020885263251e-06, "loss": 0.0178, "step": 97490 }, { "epoch": 1.4964315862174815, "grad_norm": 0.4171251058578491, "learning_rate": 3.6199579429972066e-06, "loss": 0.0291, "step": 97500 }, { "epoch": 1.4965850663801703, "grad_norm": 0.39066073298454285, "learning_rate": 3.6178954585385283e-06, "loss": 0.0332, "step": 97510 }, { "epoch": 1.4967385465428593, "grad_norm": 0.3311730921268463, "learning_rate": 3.6158334320352085e-06, "loss": 0.0229, "step": 97520 }, { "epoch": 1.4968920267055483, "grad_norm": 0.32202112674713135, "learning_rate": 3.6137718636352172e-06, "loss": 0.0234, "step": 97530 }, { "epoch": 1.4970455068682373, "grad_norm": 0.2884955108165741, "learning_rate": 3.611710753486485e-06, "loss": 0.024, "step": 97540 }, { "epoch": 1.4971989870309264, "grad_norm": 0.3289060592651367, "learning_rate": 3.6096501017368967e-06, "loss": 0.0276, "step": 97550 }, { "epoch": 1.4973524671936151, "grad_norm": 0.2704569399356842, "learning_rate": 3.60758990853433e-06, "loss": 0.0243, "step": 97560 }, { "epoch": 1.4975059473563042, "grad_norm": 0.4933082163333893, "learning_rate": 3.6055301740266125e-06, "loss": 0.0375, "step": 97570 }, { "epoch": 1.4976594275189932, "grad_norm": 0.35769712924957275, "learning_rate": 3.6034708983615408e-06, "loss": 0.0261, "step": 97580 }, { "epoch": 1.4978129076816822, "grad_norm": 0.5133745670318604, "learning_rate": 3.6014120816868836e-06, "loss": 0.0273, "step": 97590 }, { "epoch": 1.4979663878443712, "grad_norm": 0.546527087688446, "learning_rate": 3.599353724150371e-06, "loss": 0.0265, "step": 97600 }, { "epoch": 1.49811986800706, "grad_norm": 0.4224511682987213, "learning_rate": 3.5972958258997047e-06, "loss": 0.0192, "step": 97610 }, { "epoch": 1.498273348169749, "grad_norm": 0.41679078340530396, "learning_rate": 3.595238387082547e-06, "loss": 0.0246, "step": 97620 }, { "epoch": 1.498426828332438, "grad_norm": 0.31814274191856384, "learning_rate": 3.5931814078465397e-06, "loss": 0.0225, "step": 97630 }, { "epoch": 1.498580308495127, "grad_norm": 0.28798404335975647, "learning_rate": 3.5911248883392823e-06, "loss": 0.0265, "step": 97640 }, { "epoch": 1.498733788657816, "grad_norm": 0.37636134028434753, "learning_rate": 3.5890688287083343e-06, "loss": 0.0268, "step": 97650 }, { "epoch": 1.4988872688205048, "grad_norm": 0.32234638929367065, "learning_rate": 3.5870132291012392e-06, "loss": 0.0237, "step": 97660 }, { "epoch": 1.499040748983194, "grad_norm": 0.22526425123214722, "learning_rate": 3.5849580896654967e-06, "loss": 0.032, "step": 97670 }, { "epoch": 1.4991942291458829, "grad_norm": 0.2866661846637726, "learning_rate": 3.582903410548576e-06, "loss": 0.0239, "step": 97680 }, { "epoch": 1.4993477093085719, "grad_norm": 0.34242576360702515, "learning_rate": 3.5808491918979116e-06, "loss": 0.0158, "step": 97690 }, { "epoch": 1.4995011894712609, "grad_norm": 0.35113629698753357, "learning_rate": 3.5787954338609076e-06, "loss": 0.0259, "step": 97700 }, { "epoch": 1.4996546696339499, "grad_norm": 0.39777863025665283, "learning_rate": 3.576742136584933e-06, "loss": 0.0223, "step": 97710 }, { "epoch": 1.499808149796639, "grad_norm": 0.3560381233692169, "learning_rate": 3.5746893002173224e-06, "loss": 0.0281, "step": 97720 }, { "epoch": 1.4999616299593277, "grad_norm": 0.4345904290676117, "learning_rate": 3.5726369249053838e-06, "loss": 0.0285, "step": 97730 }, { "epoch": 1.5001151101220167, "grad_norm": 0.5549525618553162, "learning_rate": 3.5705850107963903e-06, "loss": 0.0287, "step": 97740 }, { "epoch": 1.5002685902847057, "grad_norm": 0.30386367440223694, "learning_rate": 3.568533558037568e-06, "loss": 0.0291, "step": 97750 }, { "epoch": 1.5004220704473947, "grad_norm": 0.311432421207428, "learning_rate": 3.5664825667761318e-06, "loss": 0.0277, "step": 97760 }, { "epoch": 1.5005755506100837, "grad_norm": 0.4542803466320038, "learning_rate": 3.5644320371592487e-06, "loss": 0.0282, "step": 97770 }, { "epoch": 1.5007290307727725, "grad_norm": 0.2257368266582489, "learning_rate": 3.5623819693340555e-06, "loss": 0.0241, "step": 97780 }, { "epoch": 1.5008825109354615, "grad_norm": 0.3743918836116791, "learning_rate": 3.560332363447666e-06, "loss": 0.0206, "step": 97790 }, { "epoch": 1.5010359910981506, "grad_norm": 0.47416767477989197, "learning_rate": 3.5582832196471417e-06, "loss": 0.028, "step": 97800 }, { "epoch": 1.5011894712608396, "grad_norm": 0.5455893278121948, "learning_rate": 3.5562345380795216e-06, "loss": 0.0247, "step": 97810 }, { "epoch": 1.5013429514235286, "grad_norm": 0.29992368817329407, "learning_rate": 3.5541863188918168e-06, "loss": 0.0258, "step": 97820 }, { "epoch": 1.5014964315862174, "grad_norm": 0.3141776919364929, "learning_rate": 3.5521385622309988e-06, "loss": 0.0225, "step": 97830 }, { "epoch": 1.5016499117489066, "grad_norm": 0.4440822899341583, "learning_rate": 3.550091268244006e-06, "loss": 0.0303, "step": 97840 }, { "epoch": 1.5018033919115954, "grad_norm": 0.3677803575992584, "learning_rate": 3.5480444370777435e-06, "loss": 0.0238, "step": 97850 }, { "epoch": 1.5019568720742844, "grad_norm": 0.4027760922908783, "learning_rate": 3.545998068879084e-06, "loss": 0.0245, "step": 97860 }, { "epoch": 1.5021103522369734, "grad_norm": 0.35826602578163147, "learning_rate": 3.5439521637948672e-06, "loss": 0.0228, "step": 97870 }, { "epoch": 1.5022638323996622, "grad_norm": 0.32821837067604065, "learning_rate": 3.5419067219718962e-06, "loss": 0.0284, "step": 97880 }, { "epoch": 1.5024173125623514, "grad_norm": 0.2864466607570648, "learning_rate": 3.539861743556955e-06, "loss": 0.0348, "step": 97890 }, { "epoch": 1.5025707927250402, "grad_norm": 0.3986973762512207, "learning_rate": 3.5378172286967728e-06, "loss": 0.0248, "step": 97900 }, { "epoch": 1.5027242728877293, "grad_norm": 0.25174981355667114, "learning_rate": 3.5357731775380545e-06, "loss": 0.03, "step": 97910 }, { "epoch": 1.5028777530504183, "grad_norm": 0.35552018880844116, "learning_rate": 3.5337295902274827e-06, "loss": 0.0244, "step": 97920 }, { "epoch": 1.503031233213107, "grad_norm": 0.4216479957103729, "learning_rate": 3.5316864669116923e-06, "loss": 0.0247, "step": 97930 }, { "epoch": 1.5031847133757963, "grad_norm": 0.3327513337135315, "learning_rate": 3.5296438077372908e-06, "loss": 0.0249, "step": 97940 }, { "epoch": 1.503338193538485, "grad_norm": 0.3106215298175812, "learning_rate": 3.5276016128508516e-06, "loss": 0.0217, "step": 97950 }, { "epoch": 1.503491673701174, "grad_norm": 0.47211402654647827, "learning_rate": 3.5255598823989145e-06, "loss": 0.0268, "step": 97960 }, { "epoch": 1.503645153863863, "grad_norm": 0.32934123277664185, "learning_rate": 3.5235186165279857e-06, "loss": 0.0245, "step": 97970 }, { "epoch": 1.5037986340265521, "grad_norm": 0.27707529067993164, "learning_rate": 3.521477815384536e-06, "loss": 0.03, "step": 97980 }, { "epoch": 1.5039521141892411, "grad_norm": 0.3243464529514313, "learning_rate": 3.5194374791150155e-06, "loss": 0.0319, "step": 97990 }, { "epoch": 1.50410559435193, "grad_norm": 0.31478849053382874, "learning_rate": 3.5173976078658213e-06, "loss": 0.0237, "step": 98000 }, { "epoch": 1.504259074514619, "grad_norm": 0.41302749514579773, "learning_rate": 3.5153582017833256e-06, "loss": 0.0292, "step": 98010 }, { "epoch": 1.504412554677308, "grad_norm": 0.6448264718055725, "learning_rate": 3.5133192610138754e-06, "loss": 0.0274, "step": 98020 }, { "epoch": 1.504566034839997, "grad_norm": 0.3742625117301941, "learning_rate": 3.5112807857037745e-06, "loss": 0.0254, "step": 98030 }, { "epoch": 1.504719515002686, "grad_norm": 0.3682464063167572, "learning_rate": 3.5092427759992952e-06, "loss": 0.0228, "step": 98040 }, { "epoch": 1.5048729951653748, "grad_norm": 0.28690671920776367, "learning_rate": 3.5072052320466787e-06, "loss": 0.0242, "step": 98050 }, { "epoch": 1.505026475328064, "grad_norm": 0.2848213016986847, "learning_rate": 3.50516815399213e-06, "loss": 0.0329, "step": 98060 }, { "epoch": 1.5051799554907528, "grad_norm": 0.4615907073020935, "learning_rate": 3.503131541981819e-06, "loss": 0.0243, "step": 98070 }, { "epoch": 1.5053334356534418, "grad_norm": 0.4256250560283661, "learning_rate": 3.501095396161892e-06, "loss": 0.0255, "step": 98080 }, { "epoch": 1.5054869158161308, "grad_norm": 0.42067351937294006, "learning_rate": 3.4990597166784557e-06, "loss": 0.0242, "step": 98090 }, { "epoch": 1.5056403959788196, "grad_norm": 0.4543212950229645, "learning_rate": 3.4970245036775707e-06, "loss": 0.0326, "step": 98100 }, { "epoch": 1.5057938761415088, "grad_norm": 0.30284416675567627, "learning_rate": 3.494989757305288e-06, "loss": 0.0291, "step": 98110 }, { "epoch": 1.5059473563041976, "grad_norm": 0.578809380531311, "learning_rate": 3.4929554777076092e-06, "loss": 0.0376, "step": 98120 }, { "epoch": 1.5061008364668866, "grad_norm": 0.2972169518470764, "learning_rate": 3.4909216650305068e-06, "loss": 0.0303, "step": 98130 }, { "epoch": 1.5062543166295757, "grad_norm": 0.3416050970554352, "learning_rate": 3.4888883194199185e-06, "loss": 0.0241, "step": 98140 }, { "epoch": 1.5064077967922644, "grad_norm": 0.35588470101356506, "learning_rate": 3.4868554410217493e-06, "loss": 0.0291, "step": 98150 }, { "epoch": 1.5065612769549537, "grad_norm": 0.5386831164360046, "learning_rate": 3.4848230299818718e-06, "loss": 0.0348, "step": 98160 }, { "epoch": 1.5067147571176425, "grad_norm": 0.46105507016181946, "learning_rate": 3.4827910864461212e-06, "loss": 0.0252, "step": 98170 }, { "epoch": 1.5068682372803315, "grad_norm": 0.31699368357658386, "learning_rate": 3.480759610560306e-06, "loss": 0.024, "step": 98180 }, { "epoch": 1.5070217174430205, "grad_norm": 0.35010403394699097, "learning_rate": 3.4787286024702003e-06, "loss": 0.0306, "step": 98190 }, { "epoch": 1.5071751976057095, "grad_norm": 0.3001311719417572, "learning_rate": 3.4766980623215295e-06, "loss": 0.0203, "step": 98200 }, { "epoch": 1.5073286777683985, "grad_norm": 0.24264629185199738, "learning_rate": 3.474667990260008e-06, "loss": 0.0257, "step": 98210 }, { "epoch": 1.5074821579310873, "grad_norm": 0.25425446033477783, "learning_rate": 3.4726383864313017e-06, "loss": 0.0274, "step": 98220 }, { "epoch": 1.5076356380937765, "grad_norm": 0.2832367718219757, "learning_rate": 3.4706092509810485e-06, "loss": 0.0237, "step": 98230 }, { "epoch": 1.5077891182564653, "grad_norm": 0.27645206451416016, "learning_rate": 3.4685805840548505e-06, "loss": 0.0282, "step": 98240 }, { "epoch": 1.5079425984191543, "grad_norm": 0.544297456741333, "learning_rate": 3.4665523857982785e-06, "loss": 0.035, "step": 98250 }, { "epoch": 1.5080960785818434, "grad_norm": 0.34616056084632874, "learning_rate": 3.4645246563568657e-06, "loss": 0.0222, "step": 98260 }, { "epoch": 1.5082495587445321, "grad_norm": 0.29824110865592957, "learning_rate": 3.462497395876113e-06, "loss": 0.0232, "step": 98270 }, { "epoch": 1.5084030389072214, "grad_norm": 0.2705300450325012, "learning_rate": 3.460470604501496e-06, "loss": 0.0186, "step": 98280 }, { "epoch": 1.5085565190699102, "grad_norm": 0.3997994065284729, "learning_rate": 3.458444282378444e-06, "loss": 0.0318, "step": 98290 }, { "epoch": 1.5087099992325992, "grad_norm": 0.2941860258579254, "learning_rate": 3.4564184296523584e-06, "loss": 0.0237, "step": 98300 }, { "epoch": 1.5088634793952882, "grad_norm": 0.27851602435112, "learning_rate": 3.454393046468608e-06, "loss": 0.027, "step": 98310 }, { "epoch": 1.509016959557977, "grad_norm": 0.39087578654289246, "learning_rate": 3.4523681329725266e-06, "loss": 0.0353, "step": 98320 }, { "epoch": 1.5091704397206662, "grad_norm": 0.32900911569595337, "learning_rate": 3.45034368930941e-06, "loss": 0.0211, "step": 98330 }, { "epoch": 1.509323919883355, "grad_norm": 0.266175776720047, "learning_rate": 3.448319715624534e-06, "loss": 0.0242, "step": 98340 }, { "epoch": 1.509477400046044, "grad_norm": 0.3016948699951172, "learning_rate": 3.4462962120631215e-06, "loss": 0.0239, "step": 98350 }, { "epoch": 1.509630880208733, "grad_norm": 0.3083822727203369, "learning_rate": 3.444273178770372e-06, "loss": 0.0233, "step": 98360 }, { "epoch": 1.5097843603714218, "grad_norm": 0.4194982349872589, "learning_rate": 3.4422506158914583e-06, "loss": 0.0207, "step": 98370 }, { "epoch": 1.509937840534111, "grad_norm": 0.3551073670387268, "learning_rate": 3.440228523571506e-06, "loss": 0.0285, "step": 98380 }, { "epoch": 1.5100913206967999, "grad_norm": 0.3233261704444885, "learning_rate": 3.438206901955614e-06, "loss": 0.023, "step": 98390 }, { "epoch": 1.5102448008594889, "grad_norm": 0.30795589089393616, "learning_rate": 3.4361857511888463e-06, "loss": 0.0263, "step": 98400 }, { "epoch": 1.5103982810221779, "grad_norm": 0.3619994819164276, "learning_rate": 3.434165071416232e-06, "loss": 0.0272, "step": 98410 }, { "epoch": 1.510551761184867, "grad_norm": 0.3483274281024933, "learning_rate": 3.432144862782768e-06, "loss": 0.0241, "step": 98420 }, { "epoch": 1.510705241347556, "grad_norm": 0.35405874252319336, "learning_rate": 3.430125125433413e-06, "loss": 0.025, "step": 98430 }, { "epoch": 1.5108587215102447, "grad_norm": 0.45217326283454895, "learning_rate": 3.4281058595131067e-06, "loss": 0.0329, "step": 98440 }, { "epoch": 1.511012201672934, "grad_norm": 0.40183091163635254, "learning_rate": 3.4260870651667334e-06, "loss": 0.0311, "step": 98450 }, { "epoch": 1.5111656818356227, "grad_norm": 0.4595767557621002, "learning_rate": 3.424068742539153e-06, "loss": 0.0284, "step": 98460 }, { "epoch": 1.5113191619983117, "grad_norm": 0.3124080002307892, "learning_rate": 3.4220508917752004e-06, "loss": 0.0243, "step": 98470 }, { "epoch": 1.5114726421610007, "grad_norm": 0.32698309421539307, "learning_rate": 3.420033513019665e-06, "loss": 0.0243, "step": 98480 }, { "epoch": 1.5116261223236895, "grad_norm": 0.3965686857700348, "learning_rate": 3.418016606417307e-06, "loss": 0.022, "step": 98490 }, { "epoch": 1.5117796024863788, "grad_norm": 0.4412602484226227, "learning_rate": 3.41600017211285e-06, "loss": 0.0187, "step": 98500 }, { "epoch": 1.5119330826490676, "grad_norm": 0.38205021619796753, "learning_rate": 3.413984210250988e-06, "loss": 0.0201, "step": 98510 }, { "epoch": 1.5120865628117566, "grad_norm": 0.3919168710708618, "learning_rate": 3.4119687209763763e-06, "loss": 0.0215, "step": 98520 }, { "epoch": 1.5122400429744456, "grad_norm": 0.7505297660827637, "learning_rate": 3.4099537044336374e-06, "loss": 0.0206, "step": 98530 }, { "epoch": 1.5123935231371344, "grad_norm": 0.41550639271736145, "learning_rate": 3.4079391607673697e-06, "loss": 0.0292, "step": 98540 }, { "epoch": 1.5125470032998236, "grad_norm": 0.3475075960159302, "learning_rate": 3.4059250901221164e-06, "loss": 0.0236, "step": 98550 }, { "epoch": 1.5127004834625124, "grad_norm": 0.38618186116218567, "learning_rate": 3.40391149264241e-06, "loss": 0.0262, "step": 98560 }, { "epoch": 1.5128539636252014, "grad_norm": 0.413149356842041, "learning_rate": 3.4018983684727346e-06, "loss": 0.0222, "step": 98570 }, { "epoch": 1.5130074437878904, "grad_norm": 0.2734084725379944, "learning_rate": 3.399885717757544e-06, "loss": 0.0235, "step": 98580 }, { "epoch": 1.5131609239505794, "grad_norm": 0.45186781883239746, "learning_rate": 3.397873540641259e-06, "loss": 0.0302, "step": 98590 }, { "epoch": 1.5133144041132685, "grad_norm": 0.3956187665462494, "learning_rate": 3.395861837268265e-06, "loss": 0.0325, "step": 98600 }, { "epoch": 1.5134678842759572, "grad_norm": 0.2807961106300354, "learning_rate": 3.3938506077829146e-06, "loss": 0.0226, "step": 98610 }, { "epoch": 1.5136213644386463, "grad_norm": 0.32499441504478455, "learning_rate": 3.3918398523295238e-06, "loss": 0.0264, "step": 98620 }, { "epoch": 1.5137748446013353, "grad_norm": 0.37816548347473145, "learning_rate": 3.38982957105238e-06, "loss": 0.0214, "step": 98630 }, { "epoch": 1.5139283247640243, "grad_norm": 0.37922564148902893, "learning_rate": 3.387819764095737e-06, "loss": 0.0252, "step": 98640 }, { "epoch": 1.5140818049267133, "grad_norm": 0.2872602939605713, "learning_rate": 3.3858104316037986e-06, "loss": 0.0324, "step": 98650 }, { "epoch": 1.514235285089402, "grad_norm": 0.33587491512298584, "learning_rate": 3.3838015737207575e-06, "loss": 0.0225, "step": 98660 }, { "epoch": 1.5143887652520913, "grad_norm": 0.4312332570552826, "learning_rate": 3.3817931905907577e-06, "loss": 0.0247, "step": 98670 }, { "epoch": 1.51454224541478, "grad_norm": 0.37556737661361694, "learning_rate": 3.3797852823579138e-06, "loss": 0.0268, "step": 98680 }, { "epoch": 1.5146957255774691, "grad_norm": 0.2808096408843994, "learning_rate": 3.377777849166305e-06, "loss": 0.0276, "step": 98690 }, { "epoch": 1.5148492057401581, "grad_norm": 0.29427245259284973, "learning_rate": 3.3757708911599786e-06, "loss": 0.023, "step": 98700 }, { "epoch": 1.515002685902847, "grad_norm": 0.36720752716064453, "learning_rate": 3.3737644084829433e-06, "loss": 0.0222, "step": 98710 }, { "epoch": 1.5151561660655362, "grad_norm": 0.411300927400589, "learning_rate": 3.3717584012791772e-06, "loss": 0.0213, "step": 98720 }, { "epoch": 1.515309646228225, "grad_norm": 0.4554099440574646, "learning_rate": 3.3697528696926264e-06, "loss": 0.0268, "step": 98730 }, { "epoch": 1.515463126390914, "grad_norm": 0.3629314601421356, "learning_rate": 3.3677478138672026e-06, "loss": 0.0287, "step": 98740 }, { "epoch": 1.515616606553603, "grad_norm": 0.2607816457748413, "learning_rate": 3.36574323394677e-06, "loss": 0.023, "step": 98750 }, { "epoch": 1.5157700867162918, "grad_norm": 0.2594608962535858, "learning_rate": 3.3637391300751796e-06, "loss": 0.027, "step": 98760 }, { "epoch": 1.515923566878981, "grad_norm": 0.2760268747806549, "learning_rate": 3.361735502396235e-06, "loss": 0.0243, "step": 98770 }, { "epoch": 1.5160770470416698, "grad_norm": 0.4824506342411041, "learning_rate": 3.3597323510537094e-06, "loss": 0.0227, "step": 98780 }, { "epoch": 1.5162305272043588, "grad_norm": 0.5768527388572693, "learning_rate": 3.3577296761913402e-06, "loss": 0.036, "step": 98790 }, { "epoch": 1.5163840073670478, "grad_norm": 0.45842045545578003, "learning_rate": 3.3557274779528324e-06, "loss": 0.0256, "step": 98800 }, { "epoch": 1.5165374875297368, "grad_norm": 0.4200878143310547, "learning_rate": 3.353725756481857e-06, "loss": 0.0226, "step": 98810 }, { "epoch": 1.5166909676924258, "grad_norm": 0.37896543741226196, "learning_rate": 3.351724511922044e-06, "loss": 0.0334, "step": 98820 }, { "epoch": 1.5168444478551146, "grad_norm": 0.36236390471458435, "learning_rate": 3.3497237444170037e-06, "loss": 0.0252, "step": 98830 }, { "epoch": 1.5169979280178039, "grad_norm": 0.43975162506103516, "learning_rate": 3.3477234541102997e-06, "loss": 0.033, "step": 98840 }, { "epoch": 1.5171514081804927, "grad_norm": 0.3227860629558563, "learning_rate": 3.3457236411454653e-06, "loss": 0.0202, "step": 98850 }, { "epoch": 1.5173048883431817, "grad_norm": 0.3503158986568451, "learning_rate": 3.3437243056659985e-06, "loss": 0.0207, "step": 98860 }, { "epoch": 1.5174583685058707, "grad_norm": 0.4164165258407593, "learning_rate": 3.341725447815366e-06, "loss": 0.0314, "step": 98870 }, { "epoch": 1.5176118486685595, "grad_norm": 0.4046911895275116, "learning_rate": 3.339727067736992e-06, "loss": 0.0242, "step": 98880 }, { "epoch": 1.5177653288312487, "grad_norm": 0.37384289503097534, "learning_rate": 3.337729165574285e-06, "loss": 0.0255, "step": 98890 }, { "epoch": 1.5179188089939375, "grad_norm": 0.2872951626777649, "learning_rate": 3.3357317414705958e-06, "loss": 0.0293, "step": 98900 }, { "epoch": 1.5180722891566265, "grad_norm": 0.20198923349380493, "learning_rate": 3.333734795569251e-06, "loss": 0.0205, "step": 98910 }, { "epoch": 1.5182257693193155, "grad_norm": 0.38332656025886536, "learning_rate": 3.331738328013553e-06, "loss": 0.0295, "step": 98920 }, { "epoch": 1.5183792494820043, "grad_norm": 0.3426017761230469, "learning_rate": 3.3297423389467555e-06, "loss": 0.0252, "step": 98930 }, { "epoch": 1.5185327296446935, "grad_norm": 0.32556653022766113, "learning_rate": 3.327746828512083e-06, "loss": 0.018, "step": 98940 }, { "epoch": 1.5186862098073823, "grad_norm": 0.3384241461753845, "learning_rate": 3.3257517968527255e-06, "loss": 0.0271, "step": 98950 }, { "epoch": 1.5188396899700713, "grad_norm": 0.38908201456069946, "learning_rate": 3.3237572441118394e-06, "loss": 0.0227, "step": 98960 }, { "epoch": 1.5189931701327604, "grad_norm": 0.33290567994117737, "learning_rate": 3.321763170432546e-06, "loss": 0.0344, "step": 98970 }, { "epoch": 1.5191466502954492, "grad_norm": 0.341787725687027, "learning_rate": 3.3197695759579297e-06, "loss": 0.0213, "step": 98980 }, { "epoch": 1.5193001304581384, "grad_norm": 0.3087049722671509, "learning_rate": 3.3177764608310524e-06, "loss": 0.0249, "step": 98990 }, { "epoch": 1.5194536106208272, "grad_norm": 0.289950430393219, "learning_rate": 3.3157838251949228e-06, "loss": 0.0245, "step": 99000 }, { "epoch": 1.5196070907835162, "grad_norm": 0.6071499586105347, "learning_rate": 3.3137916691925243e-06, "loss": 0.031, "step": 99010 }, { "epoch": 1.5197605709462052, "grad_norm": 0.25277361273765564, "learning_rate": 3.3117999929668133e-06, "loss": 0.0302, "step": 99020 }, { "epoch": 1.5199140511088942, "grad_norm": 0.3601873517036438, "learning_rate": 3.3098087966607016e-06, "loss": 0.0239, "step": 99030 }, { "epoch": 1.5200675312715832, "grad_norm": 0.41999122500419617, "learning_rate": 3.30781808041707e-06, "loss": 0.0232, "step": 99040 }, { "epoch": 1.520221011434272, "grad_norm": 0.28797829151153564, "learning_rate": 3.305827844378765e-06, "loss": 0.0334, "step": 99050 }, { "epoch": 1.5203744915969613, "grad_norm": 0.28994014859199524, "learning_rate": 3.303838088688597e-06, "loss": 0.028, "step": 99060 }, { "epoch": 1.52052797175965, "grad_norm": 0.5296593904495239, "learning_rate": 3.3018488134893443e-06, "loss": 0.0237, "step": 99070 }, { "epoch": 1.520681451922339, "grad_norm": 0.4372471272945404, "learning_rate": 3.299860018923746e-06, "loss": 0.0297, "step": 99080 }, { "epoch": 1.520834932085028, "grad_norm": 0.2794446349143982, "learning_rate": 3.29787170513452e-06, "loss": 0.0264, "step": 99090 }, { "epoch": 1.5209884122477169, "grad_norm": 0.3490310311317444, "learning_rate": 3.295883872264328e-06, "loss": 0.0231, "step": 99100 }, { "epoch": 1.521141892410406, "grad_norm": 0.37467190623283386, "learning_rate": 3.293896520455818e-06, "loss": 0.0251, "step": 99110 }, { "epoch": 1.5212953725730949, "grad_norm": 0.26054298877716064, "learning_rate": 3.2919096498515925e-06, "loss": 0.0309, "step": 99120 }, { "epoch": 1.521448852735784, "grad_norm": 0.33558976650238037, "learning_rate": 3.28992326059422e-06, "loss": 0.0244, "step": 99130 }, { "epoch": 1.521602332898473, "grad_norm": 0.4519727826118469, "learning_rate": 3.287937352826238e-06, "loss": 0.025, "step": 99140 }, { "epoch": 1.5217558130611617, "grad_norm": 0.3826119899749756, "learning_rate": 3.285951926690146e-06, "loss": 0.0271, "step": 99150 }, { "epoch": 1.521909293223851, "grad_norm": 0.6006998419761658, "learning_rate": 3.2839669823284126e-06, "loss": 0.0304, "step": 99160 }, { "epoch": 1.5220627733865397, "grad_norm": 0.5299480557441711, "learning_rate": 3.281982519883464e-06, "loss": 0.0255, "step": 99170 }, { "epoch": 1.5222162535492287, "grad_norm": 0.31787431240081787, "learning_rate": 3.2799985394977075e-06, "loss": 0.0268, "step": 99180 }, { "epoch": 1.5223697337119177, "grad_norm": 0.40418457984924316, "learning_rate": 3.2780150413135026e-06, "loss": 0.0268, "step": 99190 }, { "epoch": 1.5225232138746068, "grad_norm": 0.30822432041168213, "learning_rate": 3.276032025473169e-06, "loss": 0.023, "step": 99200 }, { "epoch": 1.5226766940372958, "grad_norm": 0.3059926927089691, "learning_rate": 3.27404949211901e-06, "loss": 0.0287, "step": 99210 }, { "epoch": 1.5228301741999846, "grad_norm": 0.5556276440620422, "learning_rate": 3.272067441393283e-06, "loss": 0.0273, "step": 99220 }, { "epoch": 1.5229836543626736, "grad_norm": 0.7246328592300415, "learning_rate": 3.27008587343821e-06, "loss": 0.0292, "step": 99230 }, { "epoch": 1.5231371345253626, "grad_norm": 0.3271137773990631, "learning_rate": 3.2681047883959817e-06, "loss": 0.0248, "step": 99240 }, { "epoch": 1.5232906146880516, "grad_norm": 0.2662600576877594, "learning_rate": 3.2661241864087533e-06, "loss": 0.0273, "step": 99250 }, { "epoch": 1.5234440948507406, "grad_norm": 0.4506712853908539, "learning_rate": 3.264144067618644e-06, "loss": 0.0289, "step": 99260 }, { "epoch": 1.5235975750134294, "grad_norm": 0.384246289730072, "learning_rate": 3.262164432167738e-06, "loss": 0.0321, "step": 99270 }, { "epoch": 1.5237510551761186, "grad_norm": 0.30004239082336426, "learning_rate": 3.2601852801980927e-06, "loss": 0.0313, "step": 99280 }, { "epoch": 1.5239045353388074, "grad_norm": 0.2949051856994629, "learning_rate": 3.258206611851723e-06, "loss": 0.0305, "step": 99290 }, { "epoch": 1.5240580155014964, "grad_norm": 0.36695781350135803, "learning_rate": 3.256228427270602e-06, "loss": 0.0206, "step": 99300 }, { "epoch": 1.5242114956641855, "grad_norm": 0.2740754783153534, "learning_rate": 3.254250726596687e-06, "loss": 0.0233, "step": 99310 }, { "epoch": 1.5243649758268742, "grad_norm": 0.29204225540161133, "learning_rate": 3.252273509971885e-06, "loss": 0.0246, "step": 99320 }, { "epoch": 1.5245184559895635, "grad_norm": 0.421841025352478, "learning_rate": 3.2502967775380744e-06, "loss": 0.0295, "step": 99330 }, { "epoch": 1.5246719361522523, "grad_norm": 0.36111706495285034, "learning_rate": 3.2483205294370988e-06, "loss": 0.0269, "step": 99340 }, { "epoch": 1.5248254163149413, "grad_norm": 0.4123325049877167, "learning_rate": 3.246344765810766e-06, "loss": 0.0349, "step": 99350 }, { "epoch": 1.5249788964776303, "grad_norm": 0.22621747851371765, "learning_rate": 3.2443694868008445e-06, "loss": 0.0229, "step": 99360 }, { "epoch": 1.525132376640319, "grad_norm": 0.6131038069725037, "learning_rate": 3.24239469254908e-06, "loss": 0.0267, "step": 99370 }, { "epoch": 1.5252858568030083, "grad_norm": 0.6040142774581909, "learning_rate": 3.240420383197174e-06, "loss": 0.0299, "step": 99380 }, { "epoch": 1.5254393369656971, "grad_norm": 0.2712421119213104, "learning_rate": 3.2384465588867943e-06, "loss": 0.0221, "step": 99390 }, { "epoch": 1.5255928171283861, "grad_norm": 0.3251030445098877, "learning_rate": 3.236473219759575e-06, "loss": 0.0272, "step": 99400 }, { "epoch": 1.5257462972910751, "grad_norm": 0.3797972798347473, "learning_rate": 3.234500365957115e-06, "loss": 0.0304, "step": 99410 }, { "epoch": 1.5258997774537642, "grad_norm": 0.3573225438594818, "learning_rate": 3.23252799762098e-06, "loss": 0.0363, "step": 99420 }, { "epoch": 1.5260532576164532, "grad_norm": 0.3213481307029724, "learning_rate": 3.2305561148926955e-06, "loss": 0.0279, "step": 99430 }, { "epoch": 1.526206737779142, "grad_norm": 0.3991149365901947, "learning_rate": 3.2285847179137663e-06, "loss": 0.0268, "step": 99440 }, { "epoch": 1.526360217941831, "grad_norm": 0.344694584608078, "learning_rate": 3.2266138068256416e-06, "loss": 0.0278, "step": 99450 }, { "epoch": 1.52651369810452, "grad_norm": 0.45774245262145996, "learning_rate": 3.2246433817697477e-06, "loss": 0.0323, "step": 99460 }, { "epoch": 1.526667178267209, "grad_norm": 0.2986493706703186, "learning_rate": 3.2226734428874806e-06, "loss": 0.0242, "step": 99470 }, { "epoch": 1.526820658429898, "grad_norm": 0.5243718028068542, "learning_rate": 3.220703990320193e-06, "loss": 0.0236, "step": 99480 }, { "epoch": 1.5269741385925868, "grad_norm": 0.26680925488471985, "learning_rate": 3.218735024209204e-06, "loss": 0.021, "step": 99490 }, { "epoch": 1.527127618755276, "grad_norm": 0.326686829328537, "learning_rate": 3.2167665446957995e-06, "loss": 0.0251, "step": 99500 }, { "epoch": 1.5272810989179648, "grad_norm": 0.5695681571960449, "learning_rate": 3.214798551921231e-06, "loss": 0.0334, "step": 99510 }, { "epoch": 1.5274345790806538, "grad_norm": 0.35844388604164124, "learning_rate": 3.212831046026713e-06, "loss": 0.0217, "step": 99520 }, { "epoch": 1.5275880592433428, "grad_norm": 0.4076713025569916, "learning_rate": 3.210864027153424e-06, "loss": 0.0318, "step": 99530 }, { "epoch": 1.5277415394060316, "grad_norm": 0.42748746275901794, "learning_rate": 3.2088974954425178e-06, "loss": 0.0295, "step": 99540 }, { "epoch": 1.5278950195687209, "grad_norm": 0.3035948574542999, "learning_rate": 3.206931451035098e-06, "loss": 0.0271, "step": 99550 }, { "epoch": 1.5280484997314097, "grad_norm": 0.33396559953689575, "learning_rate": 3.204965894072237e-06, "loss": 0.022, "step": 99560 }, { "epoch": 1.5282019798940987, "grad_norm": 0.6036608219146729, "learning_rate": 3.203000824694985e-06, "loss": 0.0246, "step": 99570 }, { "epoch": 1.5283554600567877, "grad_norm": 0.3696453273296356, "learning_rate": 3.201036243044344e-06, "loss": 0.0303, "step": 99580 }, { "epoch": 1.5285089402194765, "grad_norm": 0.2855280637741089, "learning_rate": 3.1990721492612843e-06, "loss": 0.0261, "step": 99590 }, { "epoch": 1.5286624203821657, "grad_norm": 0.32725170254707336, "learning_rate": 3.197108543486741e-06, "loss": 0.0261, "step": 99600 }, { "epoch": 1.5288159005448545, "grad_norm": 0.3112826943397522, "learning_rate": 3.1951454258616168e-06, "loss": 0.0265, "step": 99610 }, { "epoch": 1.5289693807075435, "grad_norm": 0.3674866557121277, "learning_rate": 3.1931827965267724e-06, "loss": 0.0253, "step": 99620 }, { "epoch": 1.5291228608702325, "grad_norm": 0.35463187098503113, "learning_rate": 3.1912206556230475e-06, "loss": 0.0295, "step": 99630 }, { "epoch": 1.5292763410329215, "grad_norm": 0.38880184292793274, "learning_rate": 3.189259003291235e-06, "loss": 0.0282, "step": 99640 }, { "epoch": 1.5294298211956106, "grad_norm": 0.3808135688304901, "learning_rate": 3.187297839672088e-06, "loss": 0.0289, "step": 99650 }, { "epoch": 1.5295833013582993, "grad_norm": 0.37444955110549927, "learning_rate": 3.185337164906339e-06, "loss": 0.0276, "step": 99660 }, { "epoch": 1.5297367815209886, "grad_norm": 0.4834437370300293, "learning_rate": 3.183376979134679e-06, "loss": 0.0267, "step": 99670 }, { "epoch": 1.5298902616836774, "grad_norm": 0.2910485863685608, "learning_rate": 3.1814172824977606e-06, "loss": 0.023, "step": 99680 }, { "epoch": 1.5300437418463664, "grad_norm": 0.43114739656448364, "learning_rate": 3.1794580751362057e-06, "loss": 0.0236, "step": 99690 }, { "epoch": 1.5301972220090554, "grad_norm": 0.31441730260849, "learning_rate": 3.1774993571905987e-06, "loss": 0.024, "step": 99700 }, { "epoch": 1.5303507021717442, "grad_norm": 0.4689217209815979, "learning_rate": 3.1755411288014893e-06, "loss": 0.0237, "step": 99710 }, { "epoch": 1.5305041823344334, "grad_norm": 0.4717184603214264, "learning_rate": 3.1735833901093906e-06, "loss": 0.0315, "step": 99720 }, { "epoch": 1.5306576624971222, "grad_norm": 0.3455996513366699, "learning_rate": 3.1716261412547876e-06, "loss": 0.0283, "step": 99730 }, { "epoch": 1.5308111426598112, "grad_norm": 0.2809511721134186, "learning_rate": 3.1696693823781257e-06, "loss": 0.0255, "step": 99740 }, { "epoch": 1.5309646228225002, "grad_norm": 0.2953700125217438, "learning_rate": 3.1677131136198047e-06, "loss": 0.0236, "step": 99750 }, { "epoch": 1.531118102985189, "grad_norm": 0.6383498907089233, "learning_rate": 3.1657573351202077e-06, "loss": 0.0308, "step": 99760 }, { "epoch": 1.5312715831478783, "grad_norm": 0.4268086552619934, "learning_rate": 3.1638020470196728e-06, "loss": 0.0264, "step": 99770 }, { "epoch": 1.531425063310567, "grad_norm": 0.2881907522678375, "learning_rate": 3.1618472494585016e-06, "loss": 0.0192, "step": 99780 }, { "epoch": 1.531578543473256, "grad_norm": 0.3981165885925293, "learning_rate": 3.1598929425769633e-06, "loss": 0.0312, "step": 99790 }, { "epoch": 1.531732023635945, "grad_norm": 0.507500410079956, "learning_rate": 3.1579391265152926e-06, "loss": 0.0276, "step": 99800 }, { "epoch": 1.5318855037986339, "grad_norm": 0.3936607241630554, "learning_rate": 3.155985801413688e-06, "loss": 0.0226, "step": 99810 }, { "epoch": 1.532038983961323, "grad_norm": 0.2785334289073944, "learning_rate": 3.154032967412308e-06, "loss": 0.03, "step": 99820 }, { "epoch": 1.5321924641240119, "grad_norm": 0.3697778880596161, "learning_rate": 3.152080624651287e-06, "loss": 0.0252, "step": 99830 }, { "epoch": 1.532345944286701, "grad_norm": 0.47603312134742737, "learning_rate": 3.1501287732707196e-06, "loss": 0.0335, "step": 99840 }, { "epoch": 1.53249942444939, "grad_norm": 0.35012078285217285, "learning_rate": 3.148177413410651e-06, "loss": 0.0204, "step": 99850 }, { "epoch": 1.532652904612079, "grad_norm": 0.353177934885025, "learning_rate": 3.146226545211115e-06, "loss": 0.0201, "step": 99860 }, { "epoch": 1.532806384774768, "grad_norm": 0.3652825951576233, "learning_rate": 3.1442761688120937e-06, "loss": 0.0243, "step": 99870 }, { "epoch": 1.5329598649374567, "grad_norm": 0.5521572232246399, "learning_rate": 3.1423262843535363e-06, "loss": 0.0274, "step": 99880 }, { "epoch": 1.533113345100146, "grad_norm": 0.3535051643848419, "learning_rate": 3.1403768919753684e-06, "loss": 0.0265, "step": 99890 }, { "epoch": 1.5332668252628348, "grad_norm": 0.25254109501838684, "learning_rate": 3.138427991817461e-06, "loss": 0.0215, "step": 99900 }, { "epoch": 1.5334203054255238, "grad_norm": 0.3848615288734436, "learning_rate": 3.1364795840196605e-06, "loss": 0.0209, "step": 99910 }, { "epoch": 1.5335737855882128, "grad_norm": 0.31981712579727173, "learning_rate": 3.134531668721782e-06, "loss": 0.0285, "step": 99920 }, { "epoch": 1.5337272657509016, "grad_norm": 0.382910817861557, "learning_rate": 3.132584246063599e-06, "loss": 0.0223, "step": 99930 }, { "epoch": 1.5338807459135908, "grad_norm": 0.39721548557281494, "learning_rate": 3.1306373161848514e-06, "loss": 0.0347, "step": 99940 }, { "epoch": 1.5340342260762796, "grad_norm": 0.4899502098560333, "learning_rate": 3.128690879225241e-06, "loss": 0.0351, "step": 99950 }, { "epoch": 1.5341877062389686, "grad_norm": 0.41392338275909424, "learning_rate": 3.1267449353244396e-06, "loss": 0.0236, "step": 99960 }, { "epoch": 1.5343411864016576, "grad_norm": 0.3386240303516388, "learning_rate": 3.1247994846220775e-06, "loss": 0.0263, "step": 99970 }, { "epoch": 1.5344946665643464, "grad_norm": 0.46975892782211304, "learning_rate": 3.1228545272577527e-06, "loss": 0.0252, "step": 99980 }, { "epoch": 1.5346481467270356, "grad_norm": 0.3016376495361328, "learning_rate": 3.1209100633710354e-06, "loss": 0.0229, "step": 99990 }, { "epoch": 1.5348016268897244, "grad_norm": 0.3580571413040161, "learning_rate": 3.1189660931014443e-06, "loss": 0.0208, "step": 100000 }, { "epoch": 1.5349551070524134, "grad_norm": 0.3261251449584961, "learning_rate": 3.1170226165884696e-06, "loss": 0.0207, "step": 100010 }, { "epoch": 1.5351085872151025, "grad_norm": 0.3772117495536804, "learning_rate": 3.115079633971577e-06, "loss": 0.0181, "step": 100020 }, { "epoch": 1.5352620673777915, "grad_norm": 0.3175329566001892, "learning_rate": 3.1131371453901813e-06, "loss": 0.0234, "step": 100030 }, { "epoch": 1.5354155475404805, "grad_norm": 0.4430975317955017, "learning_rate": 3.111195150983671e-06, "loss": 0.0257, "step": 100040 }, { "epoch": 1.5355690277031693, "grad_norm": 0.5409932732582092, "learning_rate": 3.109253650891394e-06, "loss": 0.0325, "step": 100050 }, { "epoch": 1.5357225078658583, "grad_norm": 0.32678720355033875, "learning_rate": 3.1073126452526647e-06, "loss": 0.0242, "step": 100060 }, { "epoch": 1.5358759880285473, "grad_norm": 0.6271957755088806, "learning_rate": 3.1053721342067644e-06, "loss": 0.032, "step": 100070 }, { "epoch": 1.5360294681912363, "grad_norm": 0.34672850370407104, "learning_rate": 3.1034321178929304e-06, "loss": 0.0286, "step": 100080 }, { "epoch": 1.5361829483539253, "grad_norm": 0.29833629727363586, "learning_rate": 3.101492596450384e-06, "loss": 0.0232, "step": 100090 }, { "epoch": 1.5363364285166141, "grad_norm": 0.44787195324897766, "learning_rate": 3.0995535700182856e-06, "loss": 0.0247, "step": 100100 }, { "epoch": 1.5364899086793034, "grad_norm": 0.40948787331581116, "learning_rate": 3.097615038735773e-06, "loss": 0.0243, "step": 100110 }, { "epoch": 1.5366433888419921, "grad_norm": 0.30821385979652405, "learning_rate": 3.0956770027419548e-06, "loss": 0.0232, "step": 100120 }, { "epoch": 1.5367968690046812, "grad_norm": 0.29290542006492615, "learning_rate": 3.093739462175893e-06, "loss": 0.0213, "step": 100130 }, { "epoch": 1.5369503491673702, "grad_norm": 0.3706592917442322, "learning_rate": 3.091802417176618e-06, "loss": 0.0267, "step": 100140 }, { "epoch": 1.537103829330059, "grad_norm": 0.3526155948638916, "learning_rate": 3.089865867883126e-06, "loss": 0.0312, "step": 100150 }, { "epoch": 1.5372573094927482, "grad_norm": 0.3332057595252991, "learning_rate": 3.087929814434375e-06, "loss": 0.0247, "step": 100160 }, { "epoch": 1.537410789655437, "grad_norm": 0.3525347411632538, "learning_rate": 3.085994256969287e-06, "loss": 0.0246, "step": 100170 }, { "epoch": 1.537564269818126, "grad_norm": 0.5718719363212585, "learning_rate": 3.084059195626754e-06, "loss": 0.0302, "step": 100180 }, { "epoch": 1.537717749980815, "grad_norm": 0.478575199842453, "learning_rate": 3.0821246305456322e-06, "loss": 0.0226, "step": 100190 }, { "epoch": 1.5378712301435038, "grad_norm": 0.3353622853755951, "learning_rate": 3.0801905618647255e-06, "loss": 0.0206, "step": 100200 }, { "epoch": 1.538024710306193, "grad_norm": 0.34505876898765564, "learning_rate": 3.078256989722828e-06, "loss": 0.0307, "step": 100210 }, { "epoch": 1.5381781904688818, "grad_norm": 0.3114185333251953, "learning_rate": 3.0763239142586786e-06, "loss": 0.0345, "step": 100220 }, { "epoch": 1.5383316706315708, "grad_norm": 0.3652539849281311, "learning_rate": 3.07439133561099e-06, "loss": 0.0334, "step": 100230 }, { "epoch": 1.5384851507942598, "grad_norm": 0.4171869456768036, "learning_rate": 3.072459253918437e-06, "loss": 0.0279, "step": 100240 }, { "epoch": 1.5386386309569489, "grad_norm": 0.2763155400753021, "learning_rate": 3.0705276693196562e-06, "loss": 0.0223, "step": 100250 }, { "epoch": 1.5387921111196379, "grad_norm": 0.4550222158432007, "learning_rate": 3.0685965819532525e-06, "loss": 0.025, "step": 100260 }, { "epoch": 1.5389455912823267, "grad_norm": 0.2489916831254959, "learning_rate": 3.0666659919577903e-06, "loss": 0.0252, "step": 100270 }, { "epoch": 1.5390990714450157, "grad_norm": 0.516703188419342, "learning_rate": 3.064735899471809e-06, "loss": 0.03, "step": 100280 }, { "epoch": 1.5392525516077047, "grad_norm": 0.2843596041202545, "learning_rate": 3.0628063046337976e-06, "loss": 0.0242, "step": 100290 }, { "epoch": 1.5394060317703937, "grad_norm": 0.4281593859195709, "learning_rate": 3.0608772075822134e-06, "loss": 0.0258, "step": 100300 }, { "epoch": 1.5395595119330827, "grad_norm": 0.3562901020050049, "learning_rate": 3.058948608455491e-06, "loss": 0.0278, "step": 100310 }, { "epoch": 1.5397129920957715, "grad_norm": 0.3287760317325592, "learning_rate": 3.0570205073920124e-06, "loss": 0.0219, "step": 100320 }, { "epoch": 1.5398664722584607, "grad_norm": 0.3073590099811554, "learning_rate": 3.0550929045301336e-06, "loss": 0.0354, "step": 100330 }, { "epoch": 1.5400199524211495, "grad_norm": 0.45320624113082886, "learning_rate": 3.05316580000817e-06, "loss": 0.0263, "step": 100340 }, { "epoch": 1.5401734325838385, "grad_norm": 0.34514015913009644, "learning_rate": 3.051239193964405e-06, "loss": 0.0263, "step": 100350 }, { "epoch": 1.5403269127465276, "grad_norm": 0.37634211778640747, "learning_rate": 3.0493130865370833e-06, "loss": 0.0197, "step": 100360 }, { "epoch": 1.5404803929092163, "grad_norm": 0.38155779242515564, "learning_rate": 3.047387477864412e-06, "loss": 0.0235, "step": 100370 }, { "epoch": 1.5406338730719056, "grad_norm": 0.4922143518924713, "learning_rate": 3.045462368084575e-06, "loss": 0.0237, "step": 100380 }, { "epoch": 1.5407873532345944, "grad_norm": 0.37265580892562866, "learning_rate": 3.0435377573357006e-06, "loss": 0.026, "step": 100390 }, { "epoch": 1.5409408333972834, "grad_norm": 0.39185217022895813, "learning_rate": 3.041613645755893e-06, "loss": 0.0235, "step": 100400 }, { "epoch": 1.5410943135599724, "grad_norm": 0.4959617555141449, "learning_rate": 3.0396900334832247e-06, "loss": 0.0326, "step": 100410 }, { "epoch": 1.5412477937226612, "grad_norm": 0.27968278527259827, "learning_rate": 3.0377669206557236e-06, "loss": 0.0334, "step": 100420 }, { "epoch": 1.5414012738853504, "grad_norm": 0.4196673035621643, "learning_rate": 3.035844307411384e-06, "loss": 0.0264, "step": 100430 }, { "epoch": 1.5415547540480392, "grad_norm": 0.389504998922348, "learning_rate": 3.0339221938881668e-06, "loss": 0.0197, "step": 100440 }, { "epoch": 1.5417082342107282, "grad_norm": 0.41475751996040344, "learning_rate": 3.032000580223995e-06, "loss": 0.0218, "step": 100450 }, { "epoch": 1.5418617143734172, "grad_norm": 0.39460378885269165, "learning_rate": 3.0300794665567525e-06, "loss": 0.0203, "step": 100460 }, { "epoch": 1.5420151945361062, "grad_norm": 0.3938986659049988, "learning_rate": 3.0281588530242978e-06, "loss": 0.0307, "step": 100470 }, { "epoch": 1.5421686746987953, "grad_norm": 0.48792338371276855, "learning_rate": 3.0262387397644468e-06, "loss": 0.0222, "step": 100480 }, { "epoch": 1.542322154861484, "grad_norm": 0.4545400142669678, "learning_rate": 3.0243191269149685e-06, "loss": 0.0244, "step": 100490 }, { "epoch": 1.5424756350241733, "grad_norm": 0.1791687160730362, "learning_rate": 3.022400014613619e-06, "loss": 0.0254, "step": 100500 }, { "epoch": 1.542629115186862, "grad_norm": 0.34811219573020935, "learning_rate": 3.0204814029981023e-06, "loss": 0.0309, "step": 100510 }, { "epoch": 1.542782595349551, "grad_norm": 0.4161131978034973, "learning_rate": 3.018563292206089e-06, "loss": 0.0355, "step": 100520 }, { "epoch": 1.54293607551224, "grad_norm": 0.5283134579658508, "learning_rate": 3.0166456823752178e-06, "loss": 0.0286, "step": 100530 }, { "epoch": 1.543089555674929, "grad_norm": 0.4646880030632019, "learning_rate": 3.0147285736430885e-06, "loss": 0.0311, "step": 100540 }, { "epoch": 1.5432430358376181, "grad_norm": 0.5266969203948975, "learning_rate": 3.0128119661472645e-06, "loss": 0.0234, "step": 100550 }, { "epoch": 1.543396516000307, "grad_norm": 0.3010402023792267, "learning_rate": 3.0108958600252713e-06, "loss": 0.0229, "step": 100560 }, { "epoch": 1.543549996162996, "grad_norm": 0.42513740062713623, "learning_rate": 3.0089802554146073e-06, "loss": 0.025, "step": 100570 }, { "epoch": 1.543703476325685, "grad_norm": 0.42137011885643005, "learning_rate": 3.0070651524527304e-06, "loss": 0.0243, "step": 100580 }, { "epoch": 1.5438569564883737, "grad_norm": 0.38089579343795776, "learning_rate": 3.005150551277051e-06, "loss": 0.0234, "step": 100590 }, { "epoch": 1.544010436651063, "grad_norm": 0.2376769632101059, "learning_rate": 3.0032364520249625e-06, "loss": 0.0219, "step": 100600 }, { "epoch": 1.5441639168137518, "grad_norm": 0.40726643800735474, "learning_rate": 3.0013228548338104e-06, "loss": 0.0336, "step": 100610 }, { "epoch": 1.5443173969764408, "grad_norm": 0.36849647760391235, "learning_rate": 2.9994097598409067e-06, "loss": 0.0264, "step": 100620 }, { "epoch": 1.5444708771391298, "grad_norm": 0.3381209969520569, "learning_rate": 2.9974971671835284e-06, "loss": 0.0299, "step": 100630 }, { "epoch": 1.5446243573018186, "grad_norm": 0.35833847522735596, "learning_rate": 2.9955850769989147e-06, "loss": 0.0204, "step": 100640 }, { "epoch": 1.5447778374645078, "grad_norm": 0.34968364238739014, "learning_rate": 2.9936734894242713e-06, "loss": 0.0274, "step": 100650 }, { "epoch": 1.5449313176271966, "grad_norm": 0.3761938810348511, "learning_rate": 2.991762404596763e-06, "loss": 0.0275, "step": 100660 }, { "epoch": 1.5450847977898856, "grad_norm": 0.3120526373386383, "learning_rate": 2.9898518226535277e-06, "loss": 0.0241, "step": 100670 }, { "epoch": 1.5452382779525746, "grad_norm": 0.399457722902298, "learning_rate": 2.9879417437316617e-06, "loss": 0.0267, "step": 100680 }, { "epoch": 1.5453917581152636, "grad_norm": 0.39757925271987915, "learning_rate": 2.986032167968216e-06, "loss": 0.0285, "step": 100690 }, { "epoch": 1.5455452382779526, "grad_norm": 0.31068161129951477, "learning_rate": 2.984123095500222e-06, "loss": 0.0247, "step": 100700 }, { "epoch": 1.5456987184406414, "grad_norm": 0.4041605591773987, "learning_rate": 2.982214526464666e-06, "loss": 0.0273, "step": 100710 }, { "epoch": 1.5458521986033307, "grad_norm": 0.4112880229949951, "learning_rate": 2.9803064609984966e-06, "loss": 0.0253, "step": 100720 }, { "epoch": 1.5460056787660195, "grad_norm": 0.4614012539386749, "learning_rate": 2.978398899238638e-06, "loss": 0.0327, "step": 100730 }, { "epoch": 1.5461591589287085, "grad_norm": 0.24599531292915344, "learning_rate": 2.97649184132196e-06, "loss": 0.0251, "step": 100740 }, { "epoch": 1.5463126390913975, "grad_norm": 0.5177100896835327, "learning_rate": 2.9745852873853064e-06, "loss": 0.0299, "step": 100750 }, { "epoch": 1.5464661192540863, "grad_norm": 0.5366819500923157, "learning_rate": 2.97267923756549e-06, "loss": 0.0361, "step": 100760 }, { "epoch": 1.5466195994167755, "grad_norm": 0.36728236079216003, "learning_rate": 2.970773691999278e-06, "loss": 0.0289, "step": 100770 }, { "epoch": 1.5467730795794643, "grad_norm": 0.32639020681381226, "learning_rate": 2.9688686508234067e-06, "loss": 0.0222, "step": 100780 }, { "epoch": 1.5469265597421533, "grad_norm": 0.5339457392692566, "learning_rate": 2.9669641141745733e-06, "loss": 0.0278, "step": 100790 }, { "epoch": 1.5470800399048423, "grad_norm": 0.3282911479473114, "learning_rate": 2.9650600821894394e-06, "loss": 0.0335, "step": 100800 }, { "epoch": 1.5472335200675311, "grad_norm": 0.3284131586551666, "learning_rate": 2.9631565550046315e-06, "loss": 0.0266, "step": 100810 }, { "epoch": 1.5473870002302204, "grad_norm": 0.43600213527679443, "learning_rate": 2.9612535327567383e-06, "loss": 0.0302, "step": 100820 }, { "epoch": 1.5475404803929091, "grad_norm": 0.6901062726974487, "learning_rate": 2.95935101558232e-06, "loss": 0.0311, "step": 100830 }, { "epoch": 1.5476939605555982, "grad_norm": 0.41644036769866943, "learning_rate": 2.957449003617886e-06, "loss": 0.0218, "step": 100840 }, { "epoch": 1.5478474407182872, "grad_norm": 0.41175925731658936, "learning_rate": 2.9555474969999163e-06, "loss": 0.0257, "step": 100850 }, { "epoch": 1.5480009208809762, "grad_norm": 0.36048561334609985, "learning_rate": 2.9536464958648636e-06, "loss": 0.0234, "step": 100860 }, { "epoch": 1.5481544010436652, "grad_norm": 0.2716650366783142, "learning_rate": 2.9517460003491326e-06, "loss": 0.0216, "step": 100870 }, { "epoch": 1.548307881206354, "grad_norm": 0.20333972573280334, "learning_rate": 2.949846010589095e-06, "loss": 0.0199, "step": 100880 }, { "epoch": 1.548461361369043, "grad_norm": 0.4360888600349426, "learning_rate": 2.947946526721087e-06, "loss": 0.0314, "step": 100890 }, { "epoch": 1.548614841531732, "grad_norm": 0.4814832806587219, "learning_rate": 2.946047548881409e-06, "loss": 0.0304, "step": 100900 }, { "epoch": 1.548768321694421, "grad_norm": 0.4024922847747803, "learning_rate": 2.944149077206324e-06, "loss": 0.0295, "step": 100910 }, { "epoch": 1.54892180185711, "grad_norm": 0.35184815526008606, "learning_rate": 2.9422511118320553e-06, "loss": 0.0235, "step": 100920 }, { "epoch": 1.5490752820197988, "grad_norm": 0.33172959089279175, "learning_rate": 2.940353652894804e-06, "loss": 0.0279, "step": 100930 }, { "epoch": 1.549228762182488, "grad_norm": 0.3241089880466461, "learning_rate": 2.938456700530714e-06, "loss": 0.0292, "step": 100940 }, { "epoch": 1.5493822423451769, "grad_norm": 0.29858237504959106, "learning_rate": 2.936560254875904e-06, "loss": 0.0233, "step": 100950 }, { "epoch": 1.5495357225078659, "grad_norm": 0.465768426656723, "learning_rate": 2.934664316066462e-06, "loss": 0.0298, "step": 100960 }, { "epoch": 1.5496892026705549, "grad_norm": 0.47467002272605896, "learning_rate": 2.9327688842384303e-06, "loss": 0.0326, "step": 100970 }, { "epoch": 1.5498426828332437, "grad_norm": 0.2629810869693756, "learning_rate": 2.930873959527818e-06, "loss": 0.0244, "step": 100980 }, { "epoch": 1.549996162995933, "grad_norm": 0.4206952154636383, "learning_rate": 2.9289795420705967e-06, "loss": 0.0247, "step": 100990 }, { "epoch": 1.5501496431586217, "grad_norm": 0.416863352060318, "learning_rate": 2.927085632002703e-06, "loss": 0.0268, "step": 101000 }, { "epoch": 1.5503031233213107, "grad_norm": 0.32236120104789734, "learning_rate": 2.9251922294600342e-06, "loss": 0.026, "step": 101010 }, { "epoch": 1.5504566034839997, "grad_norm": 0.2825310230255127, "learning_rate": 2.9232993345784586e-06, "loss": 0.0306, "step": 101020 }, { "epoch": 1.5506100836466885, "grad_norm": 0.5074397325515747, "learning_rate": 2.9214069474938033e-06, "loss": 0.0202, "step": 101030 }, { "epoch": 1.5507635638093777, "grad_norm": 0.26123350858688354, "learning_rate": 2.9195150683418504e-06, "loss": 0.0243, "step": 101040 }, { "epoch": 1.5509170439720665, "grad_norm": 0.3696519136428833, "learning_rate": 2.9176236972583625e-06, "loss": 0.0271, "step": 101050 }, { "epoch": 1.5510705241347555, "grad_norm": 0.2978805601596832, "learning_rate": 2.915732834379054e-06, "loss": 0.022, "step": 101060 }, { "epoch": 1.5512240042974446, "grad_norm": 0.3576718866825104, "learning_rate": 2.9138424798396057e-06, "loss": 0.0248, "step": 101070 }, { "epoch": 1.5513774844601336, "grad_norm": 0.4629751145839691, "learning_rate": 2.911952633775663e-06, "loss": 0.0294, "step": 101080 }, { "epoch": 1.5515309646228226, "grad_norm": 0.27630728483200073, "learning_rate": 2.910063296322835e-06, "loss": 0.0237, "step": 101090 }, { "epoch": 1.5516844447855114, "grad_norm": 0.4823661744594574, "learning_rate": 2.90817446761669e-06, "loss": 0.029, "step": 101100 }, { "epoch": 1.5518379249482006, "grad_norm": 0.3805781304836273, "learning_rate": 2.906286147792763e-06, "loss": 0.0253, "step": 101110 }, { "epoch": 1.5519914051108894, "grad_norm": 0.3567237854003906, "learning_rate": 2.904398336986557e-06, "loss": 0.0199, "step": 101120 }, { "epoch": 1.5521448852735784, "grad_norm": 0.28613975644111633, "learning_rate": 2.902511035333535e-06, "loss": 0.0259, "step": 101130 }, { "epoch": 1.5522983654362674, "grad_norm": 0.2723238468170166, "learning_rate": 2.9006242429691136e-06, "loss": 0.0193, "step": 101140 }, { "epoch": 1.5524518455989562, "grad_norm": 0.2572352886199951, "learning_rate": 2.89873796002869e-06, "loss": 0.0244, "step": 101150 }, { "epoch": 1.5526053257616454, "grad_norm": 0.30171388387680054, "learning_rate": 2.896852186647614e-06, "loss": 0.022, "step": 101160 }, { "epoch": 1.5527588059243342, "grad_norm": 0.3981272578239441, "learning_rate": 2.894966922961202e-06, "loss": 0.0314, "step": 101170 }, { "epoch": 1.5529122860870233, "grad_norm": 0.4495629668235779, "learning_rate": 2.893082169104733e-06, "loss": 0.0276, "step": 101180 }, { "epoch": 1.5530657662497123, "grad_norm": 0.32167232036590576, "learning_rate": 2.891197925213448e-06, "loss": 0.0217, "step": 101190 }, { "epoch": 1.553219246412401, "grad_norm": 0.3305893540382385, "learning_rate": 2.8893141914225563e-06, "loss": 0.0244, "step": 101200 }, { "epoch": 1.5533727265750903, "grad_norm": 0.22596073150634766, "learning_rate": 2.887430967867222e-06, "loss": 0.0242, "step": 101210 }, { "epoch": 1.553526206737779, "grad_norm": 0.3956058621406555, "learning_rate": 2.8855482546825854e-06, "loss": 0.0314, "step": 101220 }, { "epoch": 1.553679686900468, "grad_norm": 0.43368062376976013, "learning_rate": 2.883666052003742e-06, "loss": 0.0267, "step": 101230 }, { "epoch": 1.553833167063157, "grad_norm": 0.3529849350452423, "learning_rate": 2.8817843599657423e-06, "loss": 0.0258, "step": 101240 }, { "epoch": 1.553986647225846, "grad_norm": 0.27855467796325684, "learning_rate": 2.8799031787036193e-06, "loss": 0.0192, "step": 101250 }, { "epoch": 1.5541401273885351, "grad_norm": 0.3414759933948517, "learning_rate": 2.878022508352355e-06, "loss": 0.0292, "step": 101260 }, { "epoch": 1.554293607551224, "grad_norm": 0.2792603671550751, "learning_rate": 2.8761423490468965e-06, "loss": 0.0265, "step": 101270 }, { "epoch": 1.554447087713913, "grad_norm": 0.419429749250412, "learning_rate": 2.874262700922167e-06, "loss": 0.0223, "step": 101280 }, { "epoch": 1.554600567876602, "grad_norm": 0.3261839747428894, "learning_rate": 2.8723835641130314e-06, "loss": 0.0274, "step": 101290 }, { "epoch": 1.554754048039291, "grad_norm": 0.36594319343566895, "learning_rate": 2.8705049387543326e-06, "loss": 0.0162, "step": 101300 }, { "epoch": 1.55490752820198, "grad_norm": 0.38888898491859436, "learning_rate": 2.8686268249808767e-06, "loss": 0.0244, "step": 101310 }, { "epoch": 1.5550610083646688, "grad_norm": 0.39439141750335693, "learning_rate": 2.8667492229274296e-06, "loss": 0.0216, "step": 101320 }, { "epoch": 1.555214488527358, "grad_norm": 0.4978175759315491, "learning_rate": 2.8648721327287177e-06, "loss": 0.0256, "step": 101330 }, { "epoch": 1.5553679686900468, "grad_norm": 0.3314140737056732, "learning_rate": 2.8629955545194367e-06, "loss": 0.0271, "step": 101340 }, { "epoch": 1.5555214488527358, "grad_norm": 0.34539565443992615, "learning_rate": 2.8611194884342407e-06, "loss": 0.0284, "step": 101350 }, { "epoch": 1.5556749290154248, "grad_norm": 0.31418484449386597, "learning_rate": 2.8592439346077504e-06, "loss": 0.0272, "step": 101360 }, { "epoch": 1.5558284091781136, "grad_norm": 0.44860658049583435, "learning_rate": 2.8573688931745435e-06, "loss": 0.0274, "step": 101370 }, { "epoch": 1.5559818893408028, "grad_norm": 0.387777715921402, "learning_rate": 2.855494364269177e-06, "loss": 0.0237, "step": 101380 }, { "epoch": 1.5561353695034916, "grad_norm": 0.4108276963233948, "learning_rate": 2.8536203480261494e-06, "loss": 0.0237, "step": 101390 }, { "epoch": 1.5562888496661806, "grad_norm": 0.38401728868484497, "learning_rate": 2.8517468445799336e-06, "loss": 0.0237, "step": 101400 }, { "epoch": 1.5564423298288697, "grad_norm": 0.4180392026901245, "learning_rate": 2.8498738540649706e-06, "loss": 0.0332, "step": 101410 }, { "epoch": 1.5565958099915584, "grad_norm": 0.325299471616745, "learning_rate": 2.8480013766156566e-06, "loss": 0.0305, "step": 101420 }, { "epoch": 1.5567492901542477, "grad_norm": 0.34282180666923523, "learning_rate": 2.846129412366352e-06, "loss": 0.0243, "step": 101430 }, { "epoch": 1.5569027703169365, "grad_norm": 0.3649156093597412, "learning_rate": 2.844257961451384e-06, "loss": 0.0269, "step": 101440 }, { "epoch": 1.5570562504796255, "grad_norm": 0.28830376267433167, "learning_rate": 2.842387024005039e-06, "loss": 0.0243, "step": 101450 }, { "epoch": 1.5572097306423145, "grad_norm": 0.41646328568458557, "learning_rate": 2.8405166001615692e-06, "loss": 0.0317, "step": 101460 }, { "epoch": 1.5573632108050035, "grad_norm": 0.3647473156452179, "learning_rate": 2.838646690055186e-06, "loss": 0.0266, "step": 101470 }, { "epoch": 1.5575166909676925, "grad_norm": 0.3388034701347351, "learning_rate": 2.8367772938200742e-06, "loss": 0.0164, "step": 101480 }, { "epoch": 1.5576701711303813, "grad_norm": 0.3569198250770569, "learning_rate": 2.834908411590368e-06, "loss": 0.0253, "step": 101490 }, { "epoch": 1.5578236512930703, "grad_norm": 0.49524420499801636, "learning_rate": 2.8330400435001703e-06, "loss": 0.0333, "step": 101500 }, { "epoch": 1.5579771314557593, "grad_norm": 0.463407963514328, "learning_rate": 2.8311721896835542e-06, "loss": 0.0323, "step": 101510 }, { "epoch": 1.5581306116184483, "grad_norm": 0.33001917600631714, "learning_rate": 2.8293048502745456e-06, "loss": 0.025, "step": 101520 }, { "epoch": 1.5582840917811374, "grad_norm": 0.41340371966362, "learning_rate": 2.82743802540714e-06, "loss": 0.0277, "step": 101530 }, { "epoch": 1.5584375719438261, "grad_norm": 0.2933785319328308, "learning_rate": 2.8255717152152905e-06, "loss": 0.0198, "step": 101540 }, { "epoch": 1.5585910521065154, "grad_norm": 0.4372055232524872, "learning_rate": 2.8237059198329186e-06, "loss": 0.0281, "step": 101550 }, { "epoch": 1.5587445322692042, "grad_norm": 0.37414395809173584, "learning_rate": 2.8218406393939033e-06, "loss": 0.0365, "step": 101560 }, { "epoch": 1.5588980124318932, "grad_norm": 0.32734498381614685, "learning_rate": 2.819975874032096e-06, "loss": 0.0206, "step": 101570 }, { "epoch": 1.5590514925945822, "grad_norm": 0.4781518280506134, "learning_rate": 2.8181116238813044e-06, "loss": 0.0263, "step": 101580 }, { "epoch": 1.559204972757271, "grad_norm": 0.413077712059021, "learning_rate": 2.816247889075291e-06, "loss": 0.0226, "step": 101590 }, { "epoch": 1.5593584529199602, "grad_norm": 0.37364450097084045, "learning_rate": 2.8143846697477995e-06, "loss": 0.0224, "step": 101600 }, { "epoch": 1.559511933082649, "grad_norm": 0.45210710167884827, "learning_rate": 2.8125219660325252e-06, "loss": 0.0327, "step": 101610 }, { "epoch": 1.559665413245338, "grad_norm": 0.292298287153244, "learning_rate": 2.8106597780631273e-06, "loss": 0.0196, "step": 101620 }, { "epoch": 1.559818893408027, "grad_norm": 0.265654593706131, "learning_rate": 2.8087981059732293e-06, "loss": 0.0219, "step": 101630 }, { "epoch": 1.5599723735707158, "grad_norm": 0.44907915592193604, "learning_rate": 2.8069369498964195e-06, "loss": 0.0306, "step": 101640 }, { "epoch": 1.560125853733405, "grad_norm": 0.5741976499557495, "learning_rate": 2.805076309966246e-06, "loss": 0.0242, "step": 101650 }, { "epoch": 1.5602793338960939, "grad_norm": 0.34756535291671753, "learning_rate": 2.8032161863162166e-06, "loss": 0.0293, "step": 101660 }, { "epoch": 1.5604328140587829, "grad_norm": 0.3183381259441376, "learning_rate": 2.801356579079816e-06, "loss": 0.0291, "step": 101670 }, { "epoch": 1.5605862942214719, "grad_norm": 0.3248264491558075, "learning_rate": 2.7994974883904803e-06, "loss": 0.0274, "step": 101680 }, { "epoch": 1.560739774384161, "grad_norm": 0.2357424646615982, "learning_rate": 2.7976389143816007e-06, "loss": 0.0248, "step": 101690 }, { "epoch": 1.56089325454685, "grad_norm": 0.41178929805755615, "learning_rate": 2.795780857186553e-06, "loss": 0.0244, "step": 101700 }, { "epoch": 1.5610467347095387, "grad_norm": 0.328008234500885, "learning_rate": 2.7939233169386602e-06, "loss": 0.0252, "step": 101710 }, { "epoch": 1.5612002148722277, "grad_norm": 0.3476281464099884, "learning_rate": 2.792066293771213e-06, "loss": 0.025, "step": 101720 }, { "epoch": 1.5613536950349167, "grad_norm": 0.41844823956489563, "learning_rate": 2.7902097878174618e-06, "loss": 0.0295, "step": 101730 }, { "epoch": 1.5615071751976057, "grad_norm": 0.43723514676094055, "learning_rate": 2.7883537992106246e-06, "loss": 0.024, "step": 101740 }, { "epoch": 1.5616606553602947, "grad_norm": 0.2884714901447296, "learning_rate": 2.7864983280838798e-06, "loss": 0.0286, "step": 101750 }, { "epoch": 1.5618141355229835, "grad_norm": 0.3173867464065552, "learning_rate": 2.784643374570365e-06, "loss": 0.0221, "step": 101760 }, { "epoch": 1.5619676156856728, "grad_norm": 0.3266035318374634, "learning_rate": 2.7827889388031903e-06, "loss": 0.0304, "step": 101770 }, { "epoch": 1.5621210958483616, "grad_norm": 0.43448057770729065, "learning_rate": 2.7809350209154253e-06, "loss": 0.0259, "step": 101780 }, { "epoch": 1.5622745760110506, "grad_norm": 0.3657588064670563, "learning_rate": 2.7790816210400883e-06, "loss": 0.0285, "step": 101790 }, { "epoch": 1.5624280561737396, "grad_norm": 0.3040694296360016, "learning_rate": 2.7772287393101814e-06, "loss": 0.0246, "step": 101800 }, { "epoch": 1.5625815363364284, "grad_norm": 0.47164633870124817, "learning_rate": 2.7753763758586594e-06, "loss": 0.0237, "step": 101810 }, { "epoch": 1.5627350164991176, "grad_norm": 0.4783710241317749, "learning_rate": 2.7735245308184367e-06, "loss": 0.0289, "step": 101820 }, { "epoch": 1.5628884966618064, "grad_norm": 0.44770845770835876, "learning_rate": 2.771673204322404e-06, "loss": 0.0236, "step": 101830 }, { "epoch": 1.5630419768244954, "grad_norm": 0.3810548186302185, "learning_rate": 2.7698223965033943e-06, "loss": 0.0273, "step": 101840 }, { "epoch": 1.5631954569871844, "grad_norm": 0.31956738233566284, "learning_rate": 2.7679721074942168e-06, "loss": 0.0204, "step": 101850 }, { "epoch": 1.5633489371498732, "grad_norm": 0.31418246030807495, "learning_rate": 2.766122337427646e-06, "loss": 0.023, "step": 101860 }, { "epoch": 1.5635024173125625, "grad_norm": 0.4494083821773529, "learning_rate": 2.764273086436412e-06, "loss": 0.0255, "step": 101870 }, { "epoch": 1.5636558974752512, "grad_norm": 0.2682803273200989, "learning_rate": 2.7624243546532094e-06, "loss": 0.0199, "step": 101880 }, { "epoch": 1.5638093776379403, "grad_norm": 0.4636569917201996, "learning_rate": 2.760576142210697e-06, "loss": 0.0298, "step": 101890 }, { "epoch": 1.5639628578006293, "grad_norm": 0.44475600123405457, "learning_rate": 2.7587284492414935e-06, "loss": 0.0229, "step": 101900 }, { "epoch": 1.5641163379633183, "grad_norm": 0.41727182269096375, "learning_rate": 2.756881275878185e-06, "loss": 0.0239, "step": 101910 }, { "epoch": 1.5642698181260073, "grad_norm": 0.4516729712486267, "learning_rate": 2.755034622253312e-06, "loss": 0.0194, "step": 101920 }, { "epoch": 1.564423298288696, "grad_norm": 0.3426453471183777, "learning_rate": 2.7531884884993944e-06, "loss": 0.0288, "step": 101930 }, { "epoch": 1.5645767784513853, "grad_norm": 0.3439956307411194, "learning_rate": 2.7513428747488933e-06, "loss": 0.0202, "step": 101940 }, { "epoch": 1.564730258614074, "grad_norm": 0.3244186043739319, "learning_rate": 2.749497781134243e-06, "loss": 0.0229, "step": 101950 }, { "epoch": 1.5648837387767631, "grad_norm": 0.33403974771499634, "learning_rate": 2.747653207787847e-06, "loss": 0.0232, "step": 101960 }, { "epoch": 1.5650372189394521, "grad_norm": 0.3045397102832794, "learning_rate": 2.745809154842061e-06, "loss": 0.024, "step": 101970 }, { "epoch": 1.565190699102141, "grad_norm": 0.45751985907554626, "learning_rate": 2.7439656224292076e-06, "loss": 0.0312, "step": 101980 }, { "epoch": 1.5653441792648302, "grad_norm": 0.34886422753334045, "learning_rate": 2.7421226106815723e-06, "loss": 0.0258, "step": 101990 }, { "epoch": 1.565497659427519, "grad_norm": 0.5948907732963562, "learning_rate": 2.740280119731402e-06, "loss": 0.0276, "step": 102000 }, { "epoch": 1.565651139590208, "grad_norm": 0.5584484338760376, "learning_rate": 2.738438149710906e-06, "loss": 0.0301, "step": 102010 }, { "epoch": 1.565804619752897, "grad_norm": 0.29467910528182983, "learning_rate": 2.7365967007522555e-06, "loss": 0.026, "step": 102020 }, { "epoch": 1.5659580999155858, "grad_norm": 0.3707737326622009, "learning_rate": 2.7347557729875927e-06, "loss": 0.0238, "step": 102030 }, { "epoch": 1.566111580078275, "grad_norm": 0.5249854326248169, "learning_rate": 2.7329153665490084e-06, "loss": 0.0322, "step": 102040 }, { "epoch": 1.5662650602409638, "grad_norm": 0.4245108664035797, "learning_rate": 2.7310754815685627e-06, "loss": 0.0243, "step": 102050 }, { "epoch": 1.5664185404036528, "grad_norm": 0.49215853214263916, "learning_rate": 2.7292361181782843e-06, "loss": 0.0236, "step": 102060 }, { "epoch": 1.5665720205663418, "grad_norm": 0.3815061151981354, "learning_rate": 2.7273972765101563e-06, "loss": 0.024, "step": 102070 }, { "epoch": 1.5667255007290306, "grad_norm": 0.35637104511260986, "learning_rate": 2.7255589566961272e-06, "loss": 0.0217, "step": 102080 }, { "epoch": 1.5668789808917198, "grad_norm": 0.2687082886695862, "learning_rate": 2.723721158868107e-06, "loss": 0.0287, "step": 102090 }, { "epoch": 1.5670324610544086, "grad_norm": 0.44297581911087036, "learning_rate": 2.7218838831579698e-06, "loss": 0.0189, "step": 102100 }, { "epoch": 1.5671859412170976, "grad_norm": 0.34454643726348877, "learning_rate": 2.7200471296975473e-06, "loss": 0.022, "step": 102110 }, { "epoch": 1.5673394213797867, "grad_norm": 0.32244566082954407, "learning_rate": 2.718210898618644e-06, "loss": 0.0213, "step": 102120 }, { "epoch": 1.5674929015424757, "grad_norm": 0.3820238411426544, "learning_rate": 2.716375190053022e-06, "loss": 0.026, "step": 102130 }, { "epoch": 1.5676463817051647, "grad_norm": 0.396880179643631, "learning_rate": 2.7145400041323953e-06, "loss": 0.0275, "step": 102140 }, { "epoch": 1.5677998618678535, "grad_norm": 0.4210452437400818, "learning_rate": 2.7127053409884584e-06, "loss": 0.0245, "step": 102150 }, { "epoch": 1.5679533420305427, "grad_norm": 0.3747473359107971, "learning_rate": 2.710871200752858e-06, "loss": 0.0254, "step": 102160 }, { "epoch": 1.5681068221932315, "grad_norm": 0.3581834137439728, "learning_rate": 2.7090375835572023e-06, "loss": 0.0224, "step": 102170 }, { "epoch": 1.5682603023559205, "grad_norm": 0.4601666033267975, "learning_rate": 2.7072044895330672e-06, "loss": 0.0244, "step": 102180 }, { "epoch": 1.5684137825186095, "grad_norm": 0.32780563831329346, "learning_rate": 2.7053719188119878e-06, "loss": 0.0256, "step": 102190 }, { "epoch": 1.5685672626812983, "grad_norm": 0.45481565594673157, "learning_rate": 2.7035398715254615e-06, "loss": 0.0216, "step": 102200 }, { "epoch": 1.5687207428439875, "grad_norm": 0.3001669943332672, "learning_rate": 2.7017083478049466e-06, "loss": 0.0315, "step": 102210 }, { "epoch": 1.5688742230066763, "grad_norm": 0.42854708433151245, "learning_rate": 2.699877347781872e-06, "loss": 0.0292, "step": 102220 }, { "epoch": 1.5690277031693654, "grad_norm": 0.45391932129859924, "learning_rate": 2.698046871587624e-06, "loss": 0.0222, "step": 102230 }, { "epoch": 1.5691811833320544, "grad_norm": 0.4391266107559204, "learning_rate": 2.6962169193535403e-06, "loss": 0.0267, "step": 102240 }, { "epoch": 1.5693346634947432, "grad_norm": 0.2729214131832123, "learning_rate": 2.6943874912109413e-06, "loss": 0.0214, "step": 102250 }, { "epoch": 1.5694881436574324, "grad_norm": 0.2912343144416809, "learning_rate": 2.6925585872910965e-06, "loss": 0.0245, "step": 102260 }, { "epoch": 1.5696416238201212, "grad_norm": 0.37221553921699524, "learning_rate": 2.6907302077252405e-06, "loss": 0.0312, "step": 102270 }, { "epoch": 1.5697951039828102, "grad_norm": 0.4591432809829712, "learning_rate": 2.6889023526445724e-06, "loss": 0.0272, "step": 102280 }, { "epoch": 1.5699485841454992, "grad_norm": 0.469034343957901, "learning_rate": 2.6870750221802498e-06, "loss": 0.0238, "step": 102290 }, { "epoch": 1.5701020643081882, "grad_norm": 0.5176246166229248, "learning_rate": 2.685248216463396e-06, "loss": 0.0235, "step": 102300 }, { "epoch": 1.5702555444708772, "grad_norm": 0.4260653257369995, "learning_rate": 2.6834219356250934e-06, "loss": 0.0319, "step": 102310 }, { "epoch": 1.570409024633566, "grad_norm": 0.3344203233718872, "learning_rate": 2.681596179796394e-06, "loss": 0.0284, "step": 102320 }, { "epoch": 1.570562504796255, "grad_norm": 0.31475773453712463, "learning_rate": 2.679770949108308e-06, "loss": 0.0301, "step": 102330 }, { "epoch": 1.570715984958944, "grad_norm": 0.32830503582954407, "learning_rate": 2.677946243691796e-06, "loss": 0.0256, "step": 102340 }, { "epoch": 1.570869465121633, "grad_norm": 0.16132718324661255, "learning_rate": 2.676122063677802e-06, "loss": 0.0193, "step": 102350 }, { "epoch": 1.571022945284322, "grad_norm": 0.38550034165382385, "learning_rate": 2.674298409197219e-06, "loss": 0.0192, "step": 102360 }, { "epoch": 1.5711764254470109, "grad_norm": 0.4232825040817261, "learning_rate": 2.672475280380904e-06, "loss": 0.031, "step": 102370 }, { "epoch": 1.5713299056097, "grad_norm": 0.5922051668167114, "learning_rate": 2.670652677359684e-06, "loss": 0.0299, "step": 102380 }, { "epoch": 1.5714833857723889, "grad_norm": 0.3620088994503021, "learning_rate": 2.668830600264335e-06, "loss": 0.0278, "step": 102390 }, { "epoch": 1.571636865935078, "grad_norm": 0.489290326833725, "learning_rate": 2.667009049225602e-06, "loss": 0.0303, "step": 102400 }, { "epoch": 1.571790346097767, "grad_norm": 0.3036799728870392, "learning_rate": 2.665188024374197e-06, "loss": 0.0265, "step": 102410 }, { "epoch": 1.5719438262604557, "grad_norm": 0.31034204363822937, "learning_rate": 2.6633675258407886e-06, "loss": 0.021, "step": 102420 }, { "epoch": 1.572097306423145, "grad_norm": 0.5816525220870972, "learning_rate": 2.6615475537560085e-06, "loss": 0.0237, "step": 102430 }, { "epoch": 1.5722507865858337, "grad_norm": 0.3380580246448517, "learning_rate": 2.65972810825045e-06, "loss": 0.0208, "step": 102440 }, { "epoch": 1.5724042667485227, "grad_norm": 0.6770549416542053, "learning_rate": 2.6579091894546705e-06, "loss": 0.0263, "step": 102450 }, { "epoch": 1.5725577469112118, "grad_norm": 0.3641820549964905, "learning_rate": 2.6560907974991877e-06, "loss": 0.0225, "step": 102460 }, { "epoch": 1.5727112270739005, "grad_norm": 0.3112834095954895, "learning_rate": 2.6542729325144813e-06, "loss": 0.0273, "step": 102470 }, { "epoch": 1.5728647072365898, "grad_norm": 0.527249276638031, "learning_rate": 2.6524555946310002e-06, "loss": 0.0264, "step": 102480 }, { "epoch": 1.5730181873992786, "grad_norm": 0.31525540351867676, "learning_rate": 2.6506387839791437e-06, "loss": 0.0263, "step": 102490 }, { "epoch": 1.5731716675619676, "grad_norm": 0.35255753993988037, "learning_rate": 2.6488225006892775e-06, "loss": 0.0276, "step": 102500 }, { "epoch": 1.5733251477246566, "grad_norm": 0.6162087321281433, "learning_rate": 2.647006744891737e-06, "loss": 0.0234, "step": 102510 }, { "epoch": 1.5734786278873456, "grad_norm": 0.393661230802536, "learning_rate": 2.645191516716812e-06, "loss": 0.0234, "step": 102520 }, { "epoch": 1.5736321080500346, "grad_norm": 0.27232152223587036, "learning_rate": 2.6433768162947558e-06, "loss": 0.0303, "step": 102530 }, { "epoch": 1.5737855882127234, "grad_norm": 0.3506678342819214, "learning_rate": 2.641562643755784e-06, "loss": 0.0215, "step": 102540 }, { "epoch": 1.5739390683754126, "grad_norm": 0.2779332101345062, "learning_rate": 2.639748999230075e-06, "loss": 0.0199, "step": 102550 }, { "epoch": 1.5740925485381014, "grad_norm": 0.4028235375881195, "learning_rate": 2.6379358828477687e-06, "loss": 0.0213, "step": 102560 }, { "epoch": 1.5742460287007904, "grad_norm": 0.40716424584388733, "learning_rate": 2.6361232947389647e-06, "loss": 0.0308, "step": 102570 }, { "epoch": 1.5743995088634795, "grad_norm": 0.5422351360321045, "learning_rate": 2.634311235033736e-06, "loss": 0.0324, "step": 102580 }, { "epoch": 1.5745529890261682, "grad_norm": 0.45600494742393494, "learning_rate": 2.6324997038621005e-06, "loss": 0.0233, "step": 102590 }, { "epoch": 1.5747064691888575, "grad_norm": 0.390231728553772, "learning_rate": 2.630688701354047e-06, "loss": 0.0178, "step": 102600 }, { "epoch": 1.5748599493515463, "grad_norm": 0.40038931369781494, "learning_rate": 2.628878227639531e-06, "loss": 0.0218, "step": 102610 }, { "epoch": 1.5750134295142353, "grad_norm": 0.3273693323135376, "learning_rate": 2.627068282848463e-06, "loss": 0.0232, "step": 102620 }, { "epoch": 1.5751669096769243, "grad_norm": 0.3065985441207886, "learning_rate": 2.625258867110717e-06, "loss": 0.0303, "step": 102630 }, { "epoch": 1.575320389839613, "grad_norm": 0.3900243043899536, "learning_rate": 2.6234499805561307e-06, "loss": 0.0223, "step": 102640 }, { "epoch": 1.5754738700023023, "grad_norm": 0.3996151089668274, "learning_rate": 2.6216416233145024e-06, "loss": 0.0276, "step": 102650 }, { "epoch": 1.5756273501649911, "grad_norm": 0.39164066314697266, "learning_rate": 2.6198337955155904e-06, "loss": 0.0272, "step": 102660 }, { "epoch": 1.5757808303276801, "grad_norm": 0.4743138253688812, "learning_rate": 2.6180264972891225e-06, "loss": 0.0222, "step": 102670 }, { "epoch": 1.5759343104903691, "grad_norm": 0.3766603171825409, "learning_rate": 2.616219728764784e-06, "loss": 0.0288, "step": 102680 }, { "epoch": 1.576087790653058, "grad_norm": 0.5118144154548645, "learning_rate": 2.614413490072213e-06, "loss": 0.0293, "step": 102690 }, { "epoch": 1.5762412708157472, "grad_norm": 0.3250892162322998, "learning_rate": 2.612607781341027e-06, "loss": 0.0321, "step": 102700 }, { "epoch": 1.576394750978436, "grad_norm": 0.339707612991333, "learning_rate": 2.6108026027007927e-06, "loss": 0.0209, "step": 102710 }, { "epoch": 1.576548231141125, "grad_norm": 0.30419236421585083, "learning_rate": 2.608997954281044e-06, "loss": 0.027, "step": 102720 }, { "epoch": 1.576701711303814, "grad_norm": 0.5766764879226685, "learning_rate": 2.6071938362112757e-06, "loss": 0.0359, "step": 102730 }, { "epoch": 1.576855191466503, "grad_norm": 0.4977894723415375, "learning_rate": 2.605390248620945e-06, "loss": 0.024, "step": 102740 }, { "epoch": 1.577008671629192, "grad_norm": 0.3501862585544586, "learning_rate": 2.6035871916394673e-06, "loss": 0.0271, "step": 102750 }, { "epoch": 1.5771621517918808, "grad_norm": 0.3195010721683502, "learning_rate": 2.601784665396224e-06, "loss": 0.0293, "step": 102760 }, { "epoch": 1.57731563195457, "grad_norm": 0.39205867052078247, "learning_rate": 2.599982670020561e-06, "loss": 0.0257, "step": 102770 }, { "epoch": 1.5774691121172588, "grad_norm": 0.3524559736251831, "learning_rate": 2.598181205641783e-06, "loss": 0.0265, "step": 102780 }, { "epoch": 1.5776225922799478, "grad_norm": 0.29311537742614746, "learning_rate": 2.5963802723891475e-06, "loss": 0.029, "step": 102790 }, { "epoch": 1.5777760724426368, "grad_norm": 0.2898404002189636, "learning_rate": 2.5945798703918922e-06, "loss": 0.0244, "step": 102800 }, { "epoch": 1.5779295526053256, "grad_norm": 0.35201025009155273, "learning_rate": 2.5927799997792025e-06, "loss": 0.0278, "step": 102810 }, { "epoch": 1.5780830327680149, "grad_norm": 0.2780212461948395, "learning_rate": 2.5909806606802314e-06, "loss": 0.0224, "step": 102820 }, { "epoch": 1.5782365129307037, "grad_norm": 0.4229152798652649, "learning_rate": 2.589181853224093e-06, "loss": 0.0217, "step": 102830 }, { "epoch": 1.5783899930933927, "grad_norm": 0.48548948764801025, "learning_rate": 2.5873835775398616e-06, "loss": 0.0221, "step": 102840 }, { "epoch": 1.5785434732560817, "grad_norm": 0.3726917505264282, "learning_rate": 2.585585833756574e-06, "loss": 0.0286, "step": 102850 }, { "epoch": 1.5786969534187705, "grad_norm": 0.4024043381214142, "learning_rate": 2.583788622003228e-06, "loss": 0.0237, "step": 102860 }, { "epoch": 1.5788504335814597, "grad_norm": 0.16345056891441345, "learning_rate": 2.5819919424087914e-06, "loss": 0.0197, "step": 102870 }, { "epoch": 1.5790039137441485, "grad_norm": 0.23735485970973969, "learning_rate": 2.580195795102185e-06, "loss": 0.0213, "step": 102880 }, { "epoch": 1.5791573939068375, "grad_norm": 0.5295129418373108, "learning_rate": 2.578400180212285e-06, "loss": 0.0247, "step": 102890 }, { "epoch": 1.5793108740695265, "grad_norm": 0.4329724609851837, "learning_rate": 2.576605097867947e-06, "loss": 0.0254, "step": 102900 }, { "epoch": 1.5794643542322155, "grad_norm": 0.34842023253440857, "learning_rate": 2.5748105481979758e-06, "loss": 0.0262, "step": 102910 }, { "epoch": 1.5796178343949046, "grad_norm": 0.28435125946998596, "learning_rate": 2.5730165313311396e-06, "loss": 0.0179, "step": 102920 }, { "epoch": 1.5797713145575933, "grad_norm": 0.271339476108551, "learning_rate": 2.5712230473961775e-06, "loss": 0.0232, "step": 102930 }, { "epoch": 1.5799247947202824, "grad_norm": 0.40140947699546814, "learning_rate": 2.5694300965217755e-06, "loss": 0.0248, "step": 102940 }, { "epoch": 1.5800782748829714, "grad_norm": 0.49758824706077576, "learning_rate": 2.5676376788365885e-06, "loss": 0.0317, "step": 102950 }, { "epoch": 1.5802317550456604, "grad_norm": 0.3171665370464325, "learning_rate": 2.5658457944692393e-06, "loss": 0.0259, "step": 102960 }, { "epoch": 1.5803852352083494, "grad_norm": 0.34849318861961365, "learning_rate": 2.564054443548304e-06, "loss": 0.0244, "step": 102970 }, { "epoch": 1.5805387153710382, "grad_norm": 0.2999260723590851, "learning_rate": 2.5622636262023214e-06, "loss": 0.0263, "step": 102980 }, { "epoch": 1.5806921955337274, "grad_norm": 0.422868013381958, "learning_rate": 2.5604733425597972e-06, "loss": 0.0274, "step": 102990 }, { "epoch": 1.5808456756964162, "grad_norm": 0.42899438738822937, "learning_rate": 2.5586835927491917e-06, "loss": 0.0247, "step": 103000 }, { "epoch": 1.5809991558591052, "grad_norm": 0.49126675724983215, "learning_rate": 2.5568943768989327e-06, "loss": 0.0259, "step": 103010 }, { "epoch": 1.5811526360217942, "grad_norm": 0.2732314467430115, "learning_rate": 2.555105695137403e-06, "loss": 0.0222, "step": 103020 }, { "epoch": 1.581306116184483, "grad_norm": 0.20901824533939362, "learning_rate": 2.5533175475929616e-06, "loss": 0.0183, "step": 103030 }, { "epoch": 1.5814595963471723, "grad_norm": 0.38505813479423523, "learning_rate": 2.5515299343939106e-06, "loss": 0.0309, "step": 103040 }, { "epoch": 1.581613076509861, "grad_norm": 0.34475213289260864, "learning_rate": 2.5497428556685212e-06, "loss": 0.0183, "step": 103050 }, { "epoch": 1.58176655667255, "grad_norm": 0.49765896797180176, "learning_rate": 2.5479563115450323e-06, "loss": 0.0314, "step": 103060 }, { "epoch": 1.581920036835239, "grad_norm": 0.5698543787002563, "learning_rate": 2.546170302151639e-06, "loss": 0.02, "step": 103070 }, { "epoch": 1.5820735169979279, "grad_norm": 0.2814015746116638, "learning_rate": 2.544384827616497e-06, "loss": 0.022, "step": 103080 }, { "epoch": 1.582226997160617, "grad_norm": 0.3907982409000397, "learning_rate": 2.542599888067725e-06, "loss": 0.0193, "step": 103090 }, { "epoch": 1.582380477323306, "grad_norm": 0.243606835603714, "learning_rate": 2.5408154836334043e-06, "loss": 0.0187, "step": 103100 }, { "epoch": 1.582533957485995, "grad_norm": 0.31861773133277893, "learning_rate": 2.539031614441576e-06, "loss": 0.0237, "step": 103110 }, { "epoch": 1.582687437648684, "grad_norm": 0.3949772119522095, "learning_rate": 2.5372482806202414e-06, "loss": 0.0307, "step": 103120 }, { "epoch": 1.582840917811373, "grad_norm": 0.4080914556980133, "learning_rate": 2.5354654822973747e-06, "loss": 0.0231, "step": 103130 }, { "epoch": 1.582994397974062, "grad_norm": 0.25628846883773804, "learning_rate": 2.533683219600892e-06, "loss": 0.0205, "step": 103140 }, { "epoch": 1.5831478781367507, "grad_norm": 0.3137514889240265, "learning_rate": 2.5319014926586848e-06, "loss": 0.0266, "step": 103150 }, { "epoch": 1.5833013582994397, "grad_norm": 0.37313175201416016, "learning_rate": 2.5301203015986066e-06, "loss": 0.0233, "step": 103160 }, { "epoch": 1.5834548384621288, "grad_norm": 0.31386899948120117, "learning_rate": 2.5283396465484657e-06, "loss": 0.0223, "step": 103170 }, { "epoch": 1.5836083186248178, "grad_norm": 0.34970447421073914, "learning_rate": 2.5265595276360365e-06, "loss": 0.0287, "step": 103180 }, { "epoch": 1.5837617987875068, "grad_norm": 0.4751550853252411, "learning_rate": 2.5247799449890533e-06, "loss": 0.0223, "step": 103190 }, { "epoch": 1.5839152789501956, "grad_norm": 0.3516205847263336, "learning_rate": 2.5230008987352105e-06, "loss": 0.0209, "step": 103200 }, { "epoch": 1.5840687591128848, "grad_norm": 0.5119054317474365, "learning_rate": 2.521222389002165e-06, "loss": 0.0332, "step": 103210 }, { "epoch": 1.5842222392755736, "grad_norm": 0.3864050507545471, "learning_rate": 2.5194444159175402e-06, "loss": 0.0252, "step": 103220 }, { "epoch": 1.5843757194382626, "grad_norm": 0.3685016930103302, "learning_rate": 2.517666979608917e-06, "loss": 0.0329, "step": 103230 }, { "epoch": 1.5845291996009516, "grad_norm": 0.35771551728248596, "learning_rate": 2.5158900802038287e-06, "loss": 0.0258, "step": 103240 }, { "epoch": 1.5846826797636404, "grad_norm": 0.4210904538631439, "learning_rate": 2.514113717829787e-06, "loss": 0.0243, "step": 103250 }, { "epoch": 1.5848361599263296, "grad_norm": 0.3358565866947174, "learning_rate": 2.512337892614255e-06, "loss": 0.0224, "step": 103260 }, { "epoch": 1.5849896400890184, "grad_norm": 0.3745913803577423, "learning_rate": 2.510562604684659e-06, "loss": 0.0268, "step": 103270 }, { "epoch": 1.5851431202517074, "grad_norm": 0.34979552030563354, "learning_rate": 2.508787854168386e-06, "loss": 0.0261, "step": 103280 }, { "epoch": 1.5852966004143965, "grad_norm": 0.4052082896232605, "learning_rate": 2.507013641192786e-06, "loss": 0.0247, "step": 103290 }, { "epoch": 1.5854500805770853, "grad_norm": 0.355122447013855, "learning_rate": 2.505239965885169e-06, "loss": 0.0335, "step": 103300 }, { "epoch": 1.5856035607397745, "grad_norm": 0.3567034602165222, "learning_rate": 2.503466828372805e-06, "loss": 0.0193, "step": 103310 }, { "epoch": 1.5857570409024633, "grad_norm": 0.3278462588787079, "learning_rate": 2.501694228782933e-06, "loss": 0.0174, "step": 103320 }, { "epoch": 1.5859105210651523, "grad_norm": 0.3587205708026886, "learning_rate": 2.4999221672427487e-06, "loss": 0.0243, "step": 103330 }, { "epoch": 1.5860640012278413, "grad_norm": 0.3554786145687103, "learning_rate": 2.498150643879398e-06, "loss": 0.0267, "step": 103340 }, { "epoch": 1.5862174813905303, "grad_norm": 0.34205490350723267, "learning_rate": 2.4963796588200084e-06, "loss": 0.0211, "step": 103350 }, { "epoch": 1.5863709615532193, "grad_norm": 0.4391220510005951, "learning_rate": 2.4946092121916566e-06, "loss": 0.0268, "step": 103360 }, { "epoch": 1.5865244417159081, "grad_norm": 0.34381765127182007, "learning_rate": 2.4928393041213827e-06, "loss": 0.0212, "step": 103370 }, { "epoch": 1.5866779218785974, "grad_norm": 0.46240299940109253, "learning_rate": 2.491069934736188e-06, "loss": 0.0346, "step": 103380 }, { "epoch": 1.5868314020412861, "grad_norm": 0.46512630581855774, "learning_rate": 2.4893011041630354e-06, "loss": 0.0255, "step": 103390 }, { "epoch": 1.5869848822039752, "grad_norm": 0.2821812033653259, "learning_rate": 2.4875328125288513e-06, "loss": 0.0238, "step": 103400 }, { "epoch": 1.5871383623666642, "grad_norm": 0.4707694351673126, "learning_rate": 2.4857650599605167e-06, "loss": 0.0285, "step": 103410 }, { "epoch": 1.587291842529353, "grad_norm": 0.31680065393447876, "learning_rate": 2.483997846584886e-06, "loss": 0.0227, "step": 103420 }, { "epoch": 1.5874453226920422, "grad_norm": 0.43713921308517456, "learning_rate": 2.482231172528765e-06, "loss": 0.0292, "step": 103430 }, { "epoch": 1.587598802854731, "grad_norm": 0.4011920392513275, "learning_rate": 2.480465037918919e-06, "loss": 0.0246, "step": 103440 }, { "epoch": 1.58775228301742, "grad_norm": 0.38776424527168274, "learning_rate": 2.478699442882083e-06, "loss": 0.028, "step": 103450 }, { "epoch": 1.587905763180109, "grad_norm": 0.3987165689468384, "learning_rate": 2.4769343875449504e-06, "loss": 0.0307, "step": 103460 }, { "epoch": 1.5880592433427978, "grad_norm": 0.3902452290058136, "learning_rate": 2.475169872034173e-06, "loss": 0.0221, "step": 103470 }, { "epoch": 1.588212723505487, "grad_norm": 0.3291149437427521, "learning_rate": 2.4734058964763664e-06, "loss": 0.0223, "step": 103480 }, { "epoch": 1.5883662036681758, "grad_norm": 0.35239148139953613, "learning_rate": 2.4716424609981047e-06, "loss": 0.027, "step": 103490 }, { "epoch": 1.5885196838308648, "grad_norm": 0.3951854705810547, "learning_rate": 2.4698795657259243e-06, "loss": 0.0239, "step": 103500 }, { "epoch": 1.5886731639935538, "grad_norm": 0.38989439606666565, "learning_rate": 2.4681172107863304e-06, "loss": 0.0222, "step": 103510 }, { "epoch": 1.5888266441562426, "grad_norm": 0.27085259556770325, "learning_rate": 2.466355396305781e-06, "loss": 0.0258, "step": 103520 }, { "epoch": 1.5889801243189319, "grad_norm": 0.3695546090602875, "learning_rate": 2.4645941224106883e-06, "loss": 0.0245, "step": 103530 }, { "epoch": 1.5891336044816207, "grad_norm": 0.3015522360801697, "learning_rate": 2.4628333892274447e-06, "loss": 0.0224, "step": 103540 }, { "epoch": 1.5892870846443097, "grad_norm": 0.5356687903404236, "learning_rate": 2.461073196882391e-06, "loss": 0.0245, "step": 103550 }, { "epoch": 1.5894405648069987, "grad_norm": 0.29252225160598755, "learning_rate": 2.459313545501829e-06, "loss": 0.025, "step": 103560 }, { "epoch": 1.5895940449696877, "grad_norm": 0.47610872983932495, "learning_rate": 2.4575544352120284e-06, "loss": 0.0301, "step": 103570 }, { "epoch": 1.5897475251323767, "grad_norm": 0.32780107855796814, "learning_rate": 2.4557958661392136e-06, "loss": 0.0198, "step": 103580 }, { "epoch": 1.5899010052950655, "grad_norm": 0.47115081548690796, "learning_rate": 2.454037838409573e-06, "loss": 0.0227, "step": 103590 }, { "epoch": 1.5900544854577547, "grad_norm": 0.6701793074607849, "learning_rate": 2.4522803521492545e-06, "loss": 0.0356, "step": 103600 }, { "epoch": 1.5902079656204435, "grad_norm": 0.286543607711792, "learning_rate": 2.4505234074843732e-06, "loss": 0.0277, "step": 103610 }, { "epoch": 1.5903614457831325, "grad_norm": 0.33960971236228943, "learning_rate": 2.4487670045410007e-06, "loss": 0.0316, "step": 103620 }, { "epoch": 1.5905149259458216, "grad_norm": 0.42967841029167175, "learning_rate": 2.4470111434451606e-06, "loss": 0.0257, "step": 103630 }, { "epoch": 1.5906684061085103, "grad_norm": 0.42934659123420715, "learning_rate": 2.4452558243228563e-06, "loss": 0.0252, "step": 103640 }, { "epoch": 1.5908218862711996, "grad_norm": 0.29335838556289673, "learning_rate": 2.4435010473000407e-06, "loss": 0.0227, "step": 103650 }, { "epoch": 1.5909753664338884, "grad_norm": 0.2330877035856247, "learning_rate": 2.441746812502628e-06, "loss": 0.0271, "step": 103660 }, { "epoch": 1.5911288465965774, "grad_norm": 0.43896758556365967, "learning_rate": 2.4399931200564962e-06, "loss": 0.0218, "step": 103670 }, { "epoch": 1.5912823267592664, "grad_norm": 0.43645143508911133, "learning_rate": 2.4382399700874847e-06, "loss": 0.0246, "step": 103680 }, { "epoch": 1.5914358069219552, "grad_norm": 0.2641679048538208, "learning_rate": 2.4364873627213904e-06, "loss": 0.0228, "step": 103690 }, { "epoch": 1.5915892870846444, "grad_norm": 0.36449429392814636, "learning_rate": 2.4347352980839733e-06, "loss": 0.0219, "step": 103700 }, { "epoch": 1.5917427672473332, "grad_norm": 0.320578396320343, "learning_rate": 2.432983776300959e-06, "loss": 0.0255, "step": 103710 }, { "epoch": 1.5918962474100222, "grad_norm": 0.319395512342453, "learning_rate": 2.4312327974980265e-06, "loss": 0.0272, "step": 103720 }, { "epoch": 1.5920497275727112, "grad_norm": 0.3475053608417511, "learning_rate": 2.429482361800821e-06, "loss": 0.0249, "step": 103730 }, { "epoch": 1.5922032077354002, "grad_norm": 0.42563146352767944, "learning_rate": 2.427732469334947e-06, "loss": 0.0299, "step": 103740 }, { "epoch": 1.5923566878980893, "grad_norm": 0.36182907223701477, "learning_rate": 2.4259831202259697e-06, "loss": 0.0197, "step": 103750 }, { "epoch": 1.592510168060778, "grad_norm": 0.6718066930770874, "learning_rate": 2.424234314599412e-06, "loss": 0.0256, "step": 103760 }, { "epoch": 1.592663648223467, "grad_norm": 0.3372461497783661, "learning_rate": 2.42248605258077e-06, "loss": 0.0317, "step": 103770 }, { "epoch": 1.592817128386156, "grad_norm": 0.31588026881217957, "learning_rate": 2.420738334295486e-06, "loss": 0.0241, "step": 103780 }, { "epoch": 1.592970608548845, "grad_norm": 0.34001681208610535, "learning_rate": 2.4189911598689663e-06, "loss": 0.0279, "step": 103790 }, { "epoch": 1.593124088711534, "grad_norm": 0.36026954650878906, "learning_rate": 2.41724452942659e-06, "loss": 0.0191, "step": 103800 }, { "epoch": 1.593277568874223, "grad_norm": 0.3067636787891388, "learning_rate": 2.4154984430936843e-06, "loss": 0.0234, "step": 103810 }, { "epoch": 1.5934310490369121, "grad_norm": 0.31392958760261536, "learning_rate": 2.413752900995542e-06, "loss": 0.0288, "step": 103820 }, { "epoch": 1.593584529199601, "grad_norm": 0.32864081859588623, "learning_rate": 2.412007903257416e-06, "loss": 0.0236, "step": 103830 }, { "epoch": 1.59373800936229, "grad_norm": 0.30107709765434265, "learning_rate": 2.4102634500045217e-06, "loss": 0.0265, "step": 103840 }, { "epoch": 1.593891489524979, "grad_norm": 0.3757467269897461, "learning_rate": 2.4085195413620333e-06, "loss": 0.0298, "step": 103850 }, { "epoch": 1.5940449696876677, "grad_norm": 0.3267833888530731, "learning_rate": 2.4067761774550857e-06, "loss": 0.0213, "step": 103860 }, { "epoch": 1.594198449850357, "grad_norm": 0.2804464101791382, "learning_rate": 2.4050333584087827e-06, "loss": 0.0263, "step": 103870 }, { "epoch": 1.5943519300130458, "grad_norm": 0.32859522104263306, "learning_rate": 2.403291084348174e-06, "loss": 0.0232, "step": 103880 }, { "epoch": 1.5945054101757348, "grad_norm": 0.4406239688396454, "learning_rate": 2.4015493553982793e-06, "loss": 0.0243, "step": 103890 }, { "epoch": 1.5946588903384238, "grad_norm": 0.27083852887153625, "learning_rate": 2.399808171684085e-06, "loss": 0.0148, "step": 103900 }, { "epoch": 1.5948123705011126, "grad_norm": 0.5751903653144836, "learning_rate": 2.398067533330526e-06, "loss": 0.0287, "step": 103910 }, { "epoch": 1.5949658506638018, "grad_norm": 0.356580525636673, "learning_rate": 2.396327440462507e-06, "loss": 0.0245, "step": 103920 }, { "epoch": 1.5951193308264906, "grad_norm": 0.3287281394004822, "learning_rate": 2.3945878932048883e-06, "loss": 0.0238, "step": 103930 }, { "epoch": 1.5952728109891796, "grad_norm": 0.33382725715637207, "learning_rate": 2.3928488916824945e-06, "loss": 0.0341, "step": 103940 }, { "epoch": 1.5954262911518686, "grad_norm": 0.3279665410518646, "learning_rate": 2.3911104360201077e-06, "loss": 0.0302, "step": 103950 }, { "epoch": 1.5955797713145576, "grad_norm": 0.3526941239833832, "learning_rate": 2.389372526342473e-06, "loss": 0.0249, "step": 103960 }, { "epoch": 1.5957332514772467, "grad_norm": 0.4532032310962677, "learning_rate": 2.387635162774302e-06, "loss": 0.0269, "step": 103970 }, { "epoch": 1.5958867316399354, "grad_norm": 0.45062366127967834, "learning_rate": 2.385898345440254e-06, "loss": 0.0248, "step": 103980 }, { "epoch": 1.5960402118026247, "grad_norm": 0.25845399498939514, "learning_rate": 2.3841620744649553e-06, "loss": 0.0261, "step": 103990 }, { "epoch": 1.5961936919653135, "grad_norm": 0.4180476665496826, "learning_rate": 2.3824263499730016e-06, "loss": 0.0282, "step": 104000 }, { "epoch": 1.5963471721280025, "grad_norm": 0.4405237138271332, "learning_rate": 2.380691172088937e-06, "loss": 0.0266, "step": 104010 }, { "epoch": 1.5965006522906915, "grad_norm": 0.3117101788520813, "learning_rate": 2.3789565409372728e-06, "loss": 0.0179, "step": 104020 }, { "epoch": 1.5966541324533803, "grad_norm": 0.2946341633796692, "learning_rate": 2.3772224566424783e-06, "loss": 0.0244, "step": 104030 }, { "epoch": 1.5968076126160695, "grad_norm": 0.38221412897109985, "learning_rate": 2.3754889193289856e-06, "loss": 0.0232, "step": 104040 }, { "epoch": 1.5969610927787583, "grad_norm": 0.4715086817741394, "learning_rate": 2.373755929121183e-06, "loss": 0.0242, "step": 104050 }, { "epoch": 1.5971145729414473, "grad_norm": 0.48548218607902527, "learning_rate": 2.37202348614343e-06, "loss": 0.0287, "step": 104060 }, { "epoch": 1.5972680531041363, "grad_norm": 0.37007659673690796, "learning_rate": 2.3702915905200395e-06, "loss": 0.0274, "step": 104070 }, { "epoch": 1.5974215332668251, "grad_norm": 0.4671444296836853, "learning_rate": 2.3685602423752763e-06, "loss": 0.0288, "step": 104080 }, { "epoch": 1.5975750134295144, "grad_norm": 0.31732064485549927, "learning_rate": 2.366829441833386e-06, "loss": 0.0234, "step": 104090 }, { "epoch": 1.5977284935922031, "grad_norm": 0.3844725787639618, "learning_rate": 2.3650991890185593e-06, "loss": 0.0234, "step": 104100 }, { "epoch": 1.5978819737548922, "grad_norm": 0.36755552887916565, "learning_rate": 2.3633694840549525e-06, "loss": 0.0286, "step": 104110 }, { "epoch": 1.5980354539175812, "grad_norm": 0.344041109085083, "learning_rate": 2.361640327066684e-06, "loss": 0.0264, "step": 104120 }, { "epoch": 1.59818893408027, "grad_norm": 0.33589228987693787, "learning_rate": 2.3599117181778308e-06, "loss": 0.0229, "step": 104130 }, { "epoch": 1.5983424142429592, "grad_norm": 0.32112255692481995, "learning_rate": 2.3581836575124317e-06, "loss": 0.0276, "step": 104140 }, { "epoch": 1.598495894405648, "grad_norm": 0.2948768436908722, "learning_rate": 2.3564561451944824e-06, "loss": 0.025, "step": 104150 }, { "epoch": 1.598649374568337, "grad_norm": 0.5163176655769348, "learning_rate": 2.354729181347948e-06, "loss": 0.0251, "step": 104160 }, { "epoch": 1.598802854731026, "grad_norm": 0.2708115577697754, "learning_rate": 2.353002766096748e-06, "loss": 0.0241, "step": 104170 }, { "epoch": 1.598956334893715, "grad_norm": 0.5601593852043152, "learning_rate": 2.351276899564757e-06, "loss": 0.0281, "step": 104180 }, { "epoch": 1.599109815056404, "grad_norm": 0.4715636968612671, "learning_rate": 2.349551581875824e-06, "loss": 0.0262, "step": 104190 }, { "epoch": 1.5992632952190928, "grad_norm": 0.31715476512908936, "learning_rate": 2.3478268131537483e-06, "loss": 0.0247, "step": 104200 }, { "epoch": 1.599416775381782, "grad_norm": 0.32989081740379333, "learning_rate": 2.3461025935222935e-06, "loss": 0.0238, "step": 104210 }, { "epoch": 1.5995702555444709, "grad_norm": 0.2877991497516632, "learning_rate": 2.344378923105182e-06, "loss": 0.0282, "step": 104220 }, { "epoch": 1.5997237357071599, "grad_norm": 0.3263464868068695, "learning_rate": 2.342655802026097e-06, "loss": 0.0304, "step": 104230 }, { "epoch": 1.5998772158698489, "grad_norm": 0.35840916633605957, "learning_rate": 2.340933230408684e-06, "loss": 0.026, "step": 104240 }, { "epoch": 1.6000306960325377, "grad_norm": 0.3539210855960846, "learning_rate": 2.339211208376546e-06, "loss": 0.0227, "step": 104250 }, { "epoch": 1.600184176195227, "grad_norm": 0.3418569266796112, "learning_rate": 2.3374897360532543e-06, "loss": 0.0275, "step": 104260 }, { "epoch": 1.6003376563579157, "grad_norm": 0.34780532121658325, "learning_rate": 2.335768813562331e-06, "loss": 0.0255, "step": 104270 }, { "epoch": 1.6004911365206047, "grad_norm": 0.27243274450302124, "learning_rate": 2.3340484410272633e-06, "loss": 0.0306, "step": 104280 }, { "epoch": 1.6006446166832937, "grad_norm": 0.2847500443458557, "learning_rate": 2.3323286185715e-06, "loss": 0.0278, "step": 104290 }, { "epoch": 1.6007980968459825, "grad_norm": 0.39911508560180664, "learning_rate": 2.3306093463184475e-06, "loss": 0.0288, "step": 104300 }, { "epoch": 1.6009515770086717, "grad_norm": 0.3042389154434204, "learning_rate": 2.3288906243914712e-06, "loss": 0.0228, "step": 104310 }, { "epoch": 1.6011050571713605, "grad_norm": 0.30839046835899353, "learning_rate": 2.3271724529139095e-06, "loss": 0.027, "step": 104320 }, { "epoch": 1.6012585373340495, "grad_norm": 0.3886939287185669, "learning_rate": 2.325454832009043e-06, "loss": 0.0229, "step": 104330 }, { "epoch": 1.6014120174967386, "grad_norm": 0.3238111734390259, "learning_rate": 2.3237377618001213e-06, "loss": 0.0246, "step": 104340 }, { "epoch": 1.6015654976594276, "grad_norm": 0.42475560307502747, "learning_rate": 2.3220212424103605e-06, "loss": 0.0228, "step": 104350 }, { "epoch": 1.6017189778221166, "grad_norm": 0.3681512176990509, "learning_rate": 2.3203052739629282e-06, "loss": 0.0216, "step": 104360 }, { "epoch": 1.6018724579848054, "grad_norm": 0.3183867931365967, "learning_rate": 2.318589856580957e-06, "loss": 0.0244, "step": 104370 }, { "epoch": 1.6020259381474944, "grad_norm": 0.35812902450561523, "learning_rate": 2.316874990387538e-06, "loss": 0.0296, "step": 104380 }, { "epoch": 1.6021794183101834, "grad_norm": 0.3094618022441864, "learning_rate": 2.315160675505723e-06, "loss": 0.0268, "step": 104390 }, { "epoch": 1.6023328984728724, "grad_norm": 0.5499431490898132, "learning_rate": 2.3134469120585246e-06, "loss": 0.0303, "step": 104400 }, { "epoch": 1.6024863786355614, "grad_norm": 0.5010577440261841, "learning_rate": 2.3117337001689146e-06, "loss": 0.0196, "step": 104410 }, { "epoch": 1.6026398587982502, "grad_norm": 0.4344290494918823, "learning_rate": 2.310021039959832e-06, "loss": 0.0266, "step": 104420 }, { "epoch": 1.6027933389609395, "grad_norm": 0.2337225079536438, "learning_rate": 2.3083089315541653e-06, "loss": 0.0221, "step": 104430 }, { "epoch": 1.6029468191236282, "grad_norm": 0.3751087486743927, "learning_rate": 2.3065973750747684e-06, "loss": 0.0234, "step": 104440 }, { "epoch": 1.6031002992863173, "grad_norm": 0.40027496218681335, "learning_rate": 2.304886370644459e-06, "loss": 0.028, "step": 104450 }, { "epoch": 1.6032537794490063, "grad_norm": 0.3125742971897125, "learning_rate": 2.3031759183860123e-06, "loss": 0.0195, "step": 104460 }, { "epoch": 1.603407259611695, "grad_norm": 0.3084947466850281, "learning_rate": 2.301466018422164e-06, "loss": 0.0226, "step": 104470 }, { "epoch": 1.6035607397743843, "grad_norm": 0.40467700362205505, "learning_rate": 2.299756670875607e-06, "loss": 0.0227, "step": 104480 }, { "epoch": 1.603714219937073, "grad_norm": 0.42877891659736633, "learning_rate": 2.2980478758689995e-06, "loss": 0.0259, "step": 104490 }, { "epoch": 1.603867700099762, "grad_norm": 0.3316337466239929, "learning_rate": 2.296339633524959e-06, "loss": 0.0231, "step": 104500 }, { "epoch": 1.604021180262451, "grad_norm": 0.4992147982120514, "learning_rate": 2.294631943966058e-06, "loss": 0.0255, "step": 104510 }, { "epoch": 1.60417466042514, "grad_norm": 0.4300643503665924, "learning_rate": 2.2929248073148426e-06, "loss": 0.0272, "step": 104520 }, { "epoch": 1.6043281405878291, "grad_norm": 0.4398733973503113, "learning_rate": 2.2912182236937987e-06, "loss": 0.0293, "step": 104530 }, { "epoch": 1.604481620750518, "grad_norm": 0.2779773771762848, "learning_rate": 2.2895121932253937e-06, "loss": 0.0301, "step": 104540 }, { "epoch": 1.604635100913207, "grad_norm": 0.48272034525871277, "learning_rate": 2.2878067160320426e-06, "loss": 0.0362, "step": 104550 }, { "epoch": 1.604788581075896, "grad_norm": 0.3252134323120117, "learning_rate": 2.286101792236124e-06, "loss": 0.0195, "step": 104560 }, { "epoch": 1.604942061238585, "grad_norm": 0.2685796320438385, "learning_rate": 2.2843974219599763e-06, "loss": 0.0232, "step": 104570 }, { "epoch": 1.605095541401274, "grad_norm": 0.4596734046936035, "learning_rate": 2.2826936053258986e-06, "loss": 0.0252, "step": 104580 }, { "epoch": 1.6052490215639628, "grad_norm": 0.43754497170448303, "learning_rate": 2.28099034245615e-06, "loss": 0.0285, "step": 104590 }, { "epoch": 1.6054025017266518, "grad_norm": 0.3697301149368286, "learning_rate": 2.2792876334729487e-06, "loss": 0.0222, "step": 104600 }, { "epoch": 1.6055559818893408, "grad_norm": 0.36001941561698914, "learning_rate": 2.2775854784984773e-06, "loss": 0.0304, "step": 104610 }, { "epoch": 1.6057094620520298, "grad_norm": 0.5273405909538269, "learning_rate": 2.2758838776548797e-06, "loss": 0.0268, "step": 104620 }, { "epoch": 1.6058629422147188, "grad_norm": 0.34066471457481384, "learning_rate": 2.2741828310642456e-06, "loss": 0.0248, "step": 104630 }, { "epoch": 1.6060164223774076, "grad_norm": 0.28954795002937317, "learning_rate": 2.272482338848644e-06, "loss": 0.0295, "step": 104640 }, { "epoch": 1.6061699025400968, "grad_norm": 0.4431508779525757, "learning_rate": 2.270782401130094e-06, "loss": 0.0203, "step": 104650 }, { "epoch": 1.6063233827027856, "grad_norm": 0.42903372645378113, "learning_rate": 2.2690830180305745e-06, "loss": 0.0309, "step": 104660 }, { "epoch": 1.6064768628654746, "grad_norm": 0.4292815625667572, "learning_rate": 2.2673841896720294e-06, "loss": 0.0283, "step": 104670 }, { "epoch": 1.6066303430281637, "grad_norm": 0.47193482518196106, "learning_rate": 2.2656859161763577e-06, "loss": 0.0245, "step": 104680 }, { "epoch": 1.6067838231908524, "grad_norm": 0.2900678217411041, "learning_rate": 2.2639881976654233e-06, "loss": 0.0195, "step": 104690 }, { "epoch": 1.6069373033535417, "grad_norm": 0.2996598780155182, "learning_rate": 2.2622910342610426e-06, "loss": 0.0232, "step": 104700 }, { "epoch": 1.6070907835162305, "grad_norm": 0.4043394923210144, "learning_rate": 2.2605944260850056e-06, "loss": 0.0275, "step": 104710 }, { "epoch": 1.6072442636789195, "grad_norm": 0.4165303707122803, "learning_rate": 2.258898373259053e-06, "loss": 0.0231, "step": 104720 }, { "epoch": 1.6073977438416085, "grad_norm": 0.40964406728744507, "learning_rate": 2.2572028759048782e-06, "loss": 0.027, "step": 104730 }, { "epoch": 1.6075512240042973, "grad_norm": 0.33412617444992065, "learning_rate": 2.255507934144153e-06, "loss": 0.0171, "step": 104740 }, { "epoch": 1.6077047041669865, "grad_norm": 0.39579200744628906, "learning_rate": 2.253813548098498e-06, "loss": 0.0267, "step": 104750 }, { "epoch": 1.6078581843296753, "grad_norm": 0.35962975025177, "learning_rate": 2.2521197178894937e-06, "loss": 0.028, "step": 104760 }, { "epoch": 1.6080116644923643, "grad_norm": 0.5046136379241943, "learning_rate": 2.250426443638685e-06, "loss": 0.0263, "step": 104770 }, { "epoch": 1.6081651446550533, "grad_norm": 0.3608301877975464, "learning_rate": 2.2487337254675724e-06, "loss": 0.0203, "step": 104780 }, { "epoch": 1.6083186248177423, "grad_norm": 0.4258396327495575, "learning_rate": 2.2470415634976182e-06, "loss": 0.0232, "step": 104790 }, { "epoch": 1.6084721049804314, "grad_norm": 0.30102846026420593, "learning_rate": 2.2453499578502493e-06, "loss": 0.0296, "step": 104800 }, { "epoch": 1.6086255851431202, "grad_norm": 0.44456347823143005, "learning_rate": 2.2436589086468475e-06, "loss": 0.0287, "step": 104810 }, { "epoch": 1.6087790653058094, "grad_norm": 0.3711351752281189, "learning_rate": 2.241968416008755e-06, "loss": 0.0269, "step": 104820 }, { "epoch": 1.6089325454684982, "grad_norm": 0.2752252221107483, "learning_rate": 2.240278480057276e-06, "loss": 0.0248, "step": 104830 }, { "epoch": 1.6090860256311872, "grad_norm": 0.416072279214859, "learning_rate": 2.238589100913674e-06, "loss": 0.0262, "step": 104840 }, { "epoch": 1.6092395057938762, "grad_norm": 0.4361945390701294, "learning_rate": 2.23690027869917e-06, "loss": 0.0284, "step": 104850 }, { "epoch": 1.609392985956565, "grad_norm": 0.3333415389060974, "learning_rate": 2.2352120135349475e-06, "loss": 0.024, "step": 104860 }, { "epoch": 1.6095464661192542, "grad_norm": 0.3262641429901123, "learning_rate": 2.2335243055421573e-06, "loss": 0.028, "step": 104870 }, { "epoch": 1.609699946281943, "grad_norm": 0.3273775577545166, "learning_rate": 2.231837154841894e-06, "loss": 0.0215, "step": 104880 }, { "epoch": 1.609853426444632, "grad_norm": 0.41842249035835266, "learning_rate": 2.230150561555221e-06, "loss": 0.0258, "step": 104890 }, { "epoch": 1.610006906607321, "grad_norm": 0.4180397093296051, "learning_rate": 2.228464525803168e-06, "loss": 0.0244, "step": 104900 }, { "epoch": 1.6101603867700098, "grad_norm": 0.4011812210083008, "learning_rate": 2.226779047706715e-06, "loss": 0.0207, "step": 104910 }, { "epoch": 1.610313866932699, "grad_norm": 0.26342451572418213, "learning_rate": 2.2250941273868066e-06, "loss": 0.0226, "step": 104920 }, { "epoch": 1.6104673470953879, "grad_norm": 0.4238601326942444, "learning_rate": 2.223409764964346e-06, "loss": 0.0245, "step": 104930 }, { "epoch": 1.6106208272580769, "grad_norm": 0.47341790795326233, "learning_rate": 2.221725960560195e-06, "loss": 0.0279, "step": 104940 }, { "epoch": 1.6107743074207659, "grad_norm": 0.34362712502479553, "learning_rate": 2.2200427142951788e-06, "loss": 0.0369, "step": 104950 }, { "epoch": 1.6109277875834547, "grad_norm": 0.43487268686294556, "learning_rate": 2.218360026290076e-06, "loss": 0.0238, "step": 104960 }, { "epoch": 1.611081267746144, "grad_norm": 0.3550567328929901, "learning_rate": 2.2166778966656397e-06, "loss": 0.0268, "step": 104970 }, { "epoch": 1.6112347479088327, "grad_norm": 0.45913779735565186, "learning_rate": 2.2149963255425645e-06, "loss": 0.025, "step": 104980 }, { "epoch": 1.6113882280715217, "grad_norm": 0.313339501619339, "learning_rate": 2.213315313041514e-06, "loss": 0.0257, "step": 104990 }, { "epoch": 1.6115417082342107, "grad_norm": 0.3825761377811432, "learning_rate": 2.211634859283116e-06, "loss": 0.0296, "step": 105000 }, { "epoch": 1.6116951883968997, "grad_norm": 0.3056372106075287, "learning_rate": 2.20995496438795e-06, "loss": 0.024, "step": 105010 }, { "epoch": 1.6118486685595887, "grad_norm": 0.3663638234138489, "learning_rate": 2.2082756284765604e-06, "loss": 0.0232, "step": 105020 }, { "epoch": 1.6120021487222775, "grad_norm": 0.35533151030540466, "learning_rate": 2.206596851669448e-06, "loss": 0.0263, "step": 105030 }, { "epoch": 1.6121556288849668, "grad_norm": 0.3049818277359009, "learning_rate": 2.2049186340870774e-06, "loss": 0.021, "step": 105040 }, { "epoch": 1.6123091090476556, "grad_norm": 0.5186548829078674, "learning_rate": 2.2032409758498676e-06, "loss": 0.019, "step": 105050 }, { "epoch": 1.6124625892103446, "grad_norm": 0.3318844735622406, "learning_rate": 2.2015638770782057e-06, "loss": 0.0252, "step": 105060 }, { "epoch": 1.6126160693730336, "grad_norm": 0.36023229360580444, "learning_rate": 2.1998873378924348e-06, "loss": 0.0298, "step": 105070 }, { "epoch": 1.6127695495357224, "grad_norm": 0.4036886990070343, "learning_rate": 2.1982113584128484e-06, "loss": 0.0201, "step": 105080 }, { "epoch": 1.6129230296984116, "grad_norm": 0.3858018219470978, "learning_rate": 2.1965359387597163e-06, "loss": 0.0263, "step": 105090 }, { "epoch": 1.6130765098611004, "grad_norm": 0.32092127203941345, "learning_rate": 2.1948610790532575e-06, "loss": 0.023, "step": 105100 }, { "epoch": 1.6132299900237894, "grad_norm": 0.2854168713092804, "learning_rate": 2.193186779413653e-06, "loss": 0.0249, "step": 105110 }, { "epoch": 1.6133834701864784, "grad_norm": 0.4828411340713501, "learning_rate": 2.191513039961046e-06, "loss": 0.0285, "step": 105120 }, { "epoch": 1.6135369503491672, "grad_norm": 0.3765040338039398, "learning_rate": 2.189839860815536e-06, "loss": 0.0268, "step": 105130 }, { "epoch": 1.6136904305118565, "grad_norm": 0.49415796995162964, "learning_rate": 2.1881672420971844e-06, "loss": 0.0309, "step": 105140 }, { "epoch": 1.6138439106745452, "grad_norm": 0.36211997270584106, "learning_rate": 2.1864951839260083e-06, "loss": 0.0227, "step": 105150 }, { "epoch": 1.6139973908372343, "grad_norm": 0.27536478638648987, "learning_rate": 2.1848236864219964e-06, "loss": 0.0218, "step": 105160 }, { "epoch": 1.6141508709999233, "grad_norm": 0.3409138023853302, "learning_rate": 2.183152749705085e-06, "loss": 0.0279, "step": 105170 }, { "epoch": 1.6143043511626123, "grad_norm": 0.31377747654914856, "learning_rate": 2.1814823738951687e-06, "loss": 0.0348, "step": 105180 }, { "epoch": 1.6144578313253013, "grad_norm": 0.36357811093330383, "learning_rate": 2.1798125591121143e-06, "loss": 0.0308, "step": 105190 }, { "epoch": 1.61461131148799, "grad_norm": 0.4036098122596741, "learning_rate": 2.1781433054757405e-06, "loss": 0.0258, "step": 105200 }, { "epoch": 1.614764791650679, "grad_norm": 0.3101959824562073, "learning_rate": 2.1764746131058236e-06, "loss": 0.0207, "step": 105210 }, { "epoch": 1.6149182718133681, "grad_norm": 0.3483159840106964, "learning_rate": 2.174806482122104e-06, "loss": 0.0226, "step": 105220 }, { "epoch": 1.6150717519760571, "grad_norm": 0.32041922211647034, "learning_rate": 2.1731389126442813e-06, "loss": 0.0247, "step": 105230 }, { "epoch": 1.6152252321387461, "grad_norm": 0.4760606288909912, "learning_rate": 2.171471904792012e-06, "loss": 0.0237, "step": 105240 }, { "epoch": 1.615378712301435, "grad_norm": 0.32324445247650146, "learning_rate": 2.1698054586849126e-06, "loss": 0.0275, "step": 105250 }, { "epoch": 1.6155321924641242, "grad_norm": 0.33369648456573486, "learning_rate": 2.168139574442567e-06, "loss": 0.024, "step": 105260 }, { "epoch": 1.615685672626813, "grad_norm": 0.3747680187225342, "learning_rate": 2.1664742521845106e-06, "loss": 0.0222, "step": 105270 }, { "epoch": 1.615839152789502, "grad_norm": 0.5095317363739014, "learning_rate": 2.164809492030234e-06, "loss": 0.0237, "step": 105280 }, { "epoch": 1.615992632952191, "grad_norm": 0.4922860562801361, "learning_rate": 2.1631452940992027e-06, "loss": 0.0234, "step": 105290 }, { "epoch": 1.6161461131148798, "grad_norm": 0.3499109447002411, "learning_rate": 2.1614816585108288e-06, "loss": 0.0274, "step": 105300 }, { "epoch": 1.616299593277569, "grad_norm": 0.3897660970687866, "learning_rate": 2.15981858538449e-06, "loss": 0.0257, "step": 105310 }, { "epoch": 1.6164530734402578, "grad_norm": 0.25984153151512146, "learning_rate": 2.1581560748395202e-06, "loss": 0.022, "step": 105320 }, { "epoch": 1.6166065536029468, "grad_norm": 0.3692835867404938, "learning_rate": 2.1564941269952155e-06, "loss": 0.0282, "step": 105330 }, { "epoch": 1.6167600337656358, "grad_norm": 0.4147687554359436, "learning_rate": 2.154832741970829e-06, "loss": 0.026, "step": 105340 }, { "epoch": 1.6169135139283246, "grad_norm": 0.24720869958400726, "learning_rate": 2.1531719198855804e-06, "loss": 0.0243, "step": 105350 }, { "epoch": 1.6170669940910138, "grad_norm": 0.45006975531578064, "learning_rate": 2.1515116608586407e-06, "loss": 0.0278, "step": 105360 }, { "epoch": 1.6172204742537026, "grad_norm": 0.1795983463525772, "learning_rate": 2.1498519650091442e-06, "loss": 0.0209, "step": 105370 }, { "epoch": 1.6173739544163916, "grad_norm": 0.35180550813674927, "learning_rate": 2.148192832456185e-06, "loss": 0.0274, "step": 105380 }, { "epoch": 1.6175274345790807, "grad_norm": 0.3758814036846161, "learning_rate": 2.1465342633188147e-06, "loss": 0.0244, "step": 105390 }, { "epoch": 1.6176809147417697, "grad_norm": 0.4012690484523773, "learning_rate": 2.1448762577160467e-06, "loss": 0.0218, "step": 105400 }, { "epoch": 1.6178343949044587, "grad_norm": 0.3122492730617523, "learning_rate": 2.143218815766849e-06, "loss": 0.0233, "step": 105410 }, { "epoch": 1.6179878750671475, "grad_norm": 0.2832890748977661, "learning_rate": 2.141561937590163e-06, "loss": 0.0234, "step": 105420 }, { "epoch": 1.6181413552298367, "grad_norm": 0.38322344422340393, "learning_rate": 2.139905623304872e-06, "loss": 0.0266, "step": 105430 }, { "epoch": 1.6182948353925255, "grad_norm": 0.4459144175052643, "learning_rate": 2.1382498730298262e-06, "loss": 0.0248, "step": 105440 }, { "epoch": 1.6184483155552145, "grad_norm": 0.3132663369178772, "learning_rate": 2.1365946868838403e-06, "loss": 0.027, "step": 105450 }, { "epoch": 1.6186017957179035, "grad_norm": 0.29722046852111816, "learning_rate": 2.134940064985683e-06, "loss": 0.0224, "step": 105460 }, { "epoch": 1.6187552758805923, "grad_norm": 0.27670830488204956, "learning_rate": 2.133286007454083e-06, "loss": 0.022, "step": 105470 }, { "epoch": 1.6189087560432815, "grad_norm": 0.3474886119365692, "learning_rate": 2.1316325144077287e-06, "loss": 0.0234, "step": 105480 }, { "epoch": 1.6190622362059703, "grad_norm": 0.36836400628089905, "learning_rate": 2.1299795859652693e-06, "loss": 0.0186, "step": 105490 }, { "epoch": 1.6192157163686594, "grad_norm": 0.33710989356040955, "learning_rate": 2.128327222245312e-06, "loss": 0.0228, "step": 105500 }, { "epoch": 1.6193691965313484, "grad_norm": 0.4006875455379486, "learning_rate": 2.126675423366422e-06, "loss": 0.0304, "step": 105510 }, { "epoch": 1.6195226766940372, "grad_norm": 0.4165284335613251, "learning_rate": 2.1250241894471334e-06, "loss": 0.0326, "step": 105520 }, { "epoch": 1.6196761568567264, "grad_norm": 0.37096402049064636, "learning_rate": 2.1233735206059245e-06, "loss": 0.0234, "step": 105530 }, { "epoch": 1.6198296370194152, "grad_norm": 0.34976932406425476, "learning_rate": 2.121723416961241e-06, "loss": 0.0254, "step": 105540 }, { "epoch": 1.6199831171821042, "grad_norm": 0.26499709486961365, "learning_rate": 2.1200738786314933e-06, "loss": 0.0202, "step": 105550 }, { "epoch": 1.6201365973447932, "grad_norm": 0.4110753536224365, "learning_rate": 2.1184249057350437e-06, "loss": 0.0227, "step": 105560 }, { "epoch": 1.620290077507482, "grad_norm": 0.3221534192562103, "learning_rate": 2.116776498390216e-06, "loss": 0.0275, "step": 105570 }, { "epoch": 1.6204435576701712, "grad_norm": 0.32182836532592773, "learning_rate": 2.1151286567152927e-06, "loss": 0.0231, "step": 105580 }, { "epoch": 1.62059703783286, "grad_norm": 0.41956627368927, "learning_rate": 2.113481380828518e-06, "loss": 0.024, "step": 105590 }, { "epoch": 1.620750517995549, "grad_norm": 0.2799855172634125, "learning_rate": 2.1118346708480918e-06, "loss": 0.0287, "step": 105600 }, { "epoch": 1.620903998158238, "grad_norm": 0.40339627861976624, "learning_rate": 2.1101885268921783e-06, "loss": 0.0256, "step": 105610 }, { "epoch": 1.621057478320927, "grad_norm": 0.5140336751937866, "learning_rate": 2.108542949078902e-06, "loss": 0.0256, "step": 105620 }, { "epoch": 1.621210958483616, "grad_norm": 0.3706863224506378, "learning_rate": 2.106897937526332e-06, "loss": 0.0207, "step": 105630 }, { "epoch": 1.6213644386463049, "grad_norm": 0.3442029356956482, "learning_rate": 2.1052534923525193e-06, "loss": 0.021, "step": 105640 }, { "epoch": 1.621517918808994, "grad_norm": 0.3647070825099945, "learning_rate": 2.103609613675457e-06, "loss": 0.0198, "step": 105650 }, { "epoch": 1.6216713989716829, "grad_norm": 0.30895721912384033, "learning_rate": 2.1019663016131053e-06, "loss": 0.0309, "step": 105660 }, { "epoch": 1.621824879134372, "grad_norm": 0.321910560131073, "learning_rate": 2.1003235562833824e-06, "loss": 0.0201, "step": 105670 }, { "epoch": 1.621978359297061, "grad_norm": 0.38306036591529846, "learning_rate": 2.0986813778041638e-06, "loss": 0.0314, "step": 105680 }, { "epoch": 1.6221318394597497, "grad_norm": 0.407487154006958, "learning_rate": 2.0970397662932873e-06, "loss": 0.0236, "step": 105690 }, { "epoch": 1.622285319622439, "grad_norm": 0.36659616231918335, "learning_rate": 2.095398721868547e-06, "loss": 0.0214, "step": 105700 }, { "epoch": 1.6224387997851277, "grad_norm": 0.5236667394638062, "learning_rate": 2.0937582446477e-06, "loss": 0.0298, "step": 105710 }, { "epoch": 1.6225922799478167, "grad_norm": 0.37578532099723816, "learning_rate": 2.092118334748463e-06, "loss": 0.0239, "step": 105720 }, { "epoch": 1.6227457601105058, "grad_norm": 0.33528485894203186, "learning_rate": 2.0904789922885017e-06, "loss": 0.0162, "step": 105730 }, { "epoch": 1.6228992402731945, "grad_norm": 0.2402729094028473, "learning_rate": 2.0888402173854562e-06, "loss": 0.019, "step": 105740 }, { "epoch": 1.6230527204358838, "grad_norm": 0.41327109932899475, "learning_rate": 2.0872020101569167e-06, "loss": 0.0269, "step": 105750 }, { "epoch": 1.6232062005985726, "grad_norm": 0.24966809153556824, "learning_rate": 2.085564370720433e-06, "loss": 0.0238, "step": 105760 }, { "epoch": 1.6233596807612616, "grad_norm": 0.36464887857437134, "learning_rate": 2.0839272991935188e-06, "loss": 0.0205, "step": 105770 }, { "epoch": 1.6235131609239506, "grad_norm": 0.2799210548400879, "learning_rate": 2.082290795693642e-06, "loss": 0.0297, "step": 105780 }, { "epoch": 1.6236666410866396, "grad_norm": 0.4117884933948517, "learning_rate": 2.0806548603382317e-06, "loss": 0.0371, "step": 105790 }, { "epoch": 1.6238201212493286, "grad_norm": 0.3051745891571045, "learning_rate": 2.079019493244675e-06, "loss": 0.0222, "step": 105800 }, { "epoch": 1.6239736014120174, "grad_norm": 0.3792682886123657, "learning_rate": 2.0773846945303235e-06, "loss": 0.0253, "step": 105810 }, { "epoch": 1.6241270815747064, "grad_norm": 0.34301096200942993, "learning_rate": 2.075750464312485e-06, "loss": 0.0177, "step": 105820 }, { "epoch": 1.6242805617373954, "grad_norm": 0.3662112355232239, "learning_rate": 2.074116802708418e-06, "loss": 0.0259, "step": 105830 }, { "epoch": 1.6244340419000844, "grad_norm": 0.41351911425590515, "learning_rate": 2.0724837098353546e-06, "loss": 0.0214, "step": 105840 }, { "epoch": 1.6245875220627735, "grad_norm": 0.2809215486049652, "learning_rate": 2.0708511858104765e-06, "loss": 0.0227, "step": 105850 }, { "epoch": 1.6247410022254622, "grad_norm": 0.31317004561424255, "learning_rate": 2.069219230750926e-06, "loss": 0.026, "step": 105860 }, { "epoch": 1.6248944823881515, "grad_norm": 0.38619279861450195, "learning_rate": 2.0675878447738127e-06, "loss": 0.0245, "step": 105870 }, { "epoch": 1.6250479625508403, "grad_norm": 0.30836448073387146, "learning_rate": 2.0659570279961928e-06, "loss": 0.0278, "step": 105880 }, { "epoch": 1.6252014427135293, "grad_norm": 0.26151764392852783, "learning_rate": 2.0643267805350843e-06, "loss": 0.0239, "step": 105890 }, { "epoch": 1.6253549228762183, "grad_norm": 0.24709609150886536, "learning_rate": 2.062697102507475e-06, "loss": 0.0226, "step": 105900 }, { "epoch": 1.625508403038907, "grad_norm": 0.3464001715183258, "learning_rate": 2.0610679940303003e-06, "loss": 0.0273, "step": 105910 }, { "epoch": 1.6256618832015963, "grad_norm": 0.37978243827819824, "learning_rate": 2.059439455220461e-06, "loss": 0.0262, "step": 105920 }, { "epoch": 1.6258153633642851, "grad_norm": 0.42000383138656616, "learning_rate": 2.0578114861948116e-06, "loss": 0.0366, "step": 105930 }, { "epoch": 1.6259688435269741, "grad_norm": 0.4308673143386841, "learning_rate": 2.0561840870701723e-06, "loss": 0.0229, "step": 105940 }, { "epoch": 1.6261223236896631, "grad_norm": 0.3104948103427887, "learning_rate": 2.0545572579633166e-06, "loss": 0.019, "step": 105950 }, { "epoch": 1.626275803852352, "grad_norm": 0.3510902225971222, "learning_rate": 2.0529309989909784e-06, "loss": 0.037, "step": 105960 }, { "epoch": 1.6264292840150412, "grad_norm": 0.3898927569389343, "learning_rate": 2.051305310269859e-06, "loss": 0.0203, "step": 105970 }, { "epoch": 1.62658276417773, "grad_norm": 0.25135812163352966, "learning_rate": 2.049680191916603e-06, "loss": 0.0215, "step": 105980 }, { "epoch": 1.626736244340419, "grad_norm": 0.2797325849533081, "learning_rate": 2.0480556440478226e-06, "loss": 0.0231, "step": 105990 }, { "epoch": 1.626889724503108, "grad_norm": 0.3854358494281769, "learning_rate": 2.0464316667800965e-06, "loss": 0.0296, "step": 106000 }, { "epoch": 1.627043204665797, "grad_norm": 0.3658623993396759, "learning_rate": 2.0448082602299514e-06, "loss": 0.0278, "step": 106010 }, { "epoch": 1.627196684828486, "grad_norm": 0.397842139005661, "learning_rate": 2.043185424513876e-06, "loss": 0.0245, "step": 106020 }, { "epoch": 1.6273501649911748, "grad_norm": 0.44079431891441345, "learning_rate": 2.041563159748319e-06, "loss": 0.0186, "step": 106030 }, { "epoch": 1.6275036451538638, "grad_norm": 0.4280637800693512, "learning_rate": 2.0399414660496874e-06, "loss": 0.0285, "step": 106040 }, { "epoch": 1.6276571253165528, "grad_norm": 0.34057196974754333, "learning_rate": 2.03832034353435e-06, "loss": 0.0231, "step": 106050 }, { "epoch": 1.6278106054792418, "grad_norm": 0.3851441740989685, "learning_rate": 2.036699792318627e-06, "loss": 0.0264, "step": 106060 }, { "epoch": 1.6279640856419308, "grad_norm": 0.3587940037250519, "learning_rate": 2.035079812518812e-06, "loss": 0.024, "step": 106070 }, { "epoch": 1.6281175658046196, "grad_norm": 0.4036043584346771, "learning_rate": 2.033460404251142e-06, "loss": 0.0243, "step": 106080 }, { "epoch": 1.6282710459673089, "grad_norm": 0.4553154408931732, "learning_rate": 2.031841567631817e-06, "loss": 0.0266, "step": 106090 }, { "epoch": 1.6284245261299977, "grad_norm": 0.40027275681495667, "learning_rate": 2.0302233027770046e-06, "loss": 0.0304, "step": 106100 }, { "epoch": 1.6285780062926867, "grad_norm": 0.25112056732177734, "learning_rate": 2.028605609802824e-06, "loss": 0.0283, "step": 106110 }, { "epoch": 1.6287314864553757, "grad_norm": 0.3791087567806244, "learning_rate": 2.026988488825353e-06, "loss": 0.0259, "step": 106120 }, { "epoch": 1.6288849666180645, "grad_norm": 0.28224363923072815, "learning_rate": 2.0253719399606307e-06, "loss": 0.0188, "step": 106130 }, { "epoch": 1.6290384467807537, "grad_norm": 0.3022988736629486, "learning_rate": 2.023755963324655e-06, "loss": 0.0206, "step": 106140 }, { "epoch": 1.6291919269434425, "grad_norm": 0.47862669825553894, "learning_rate": 2.0221405590333785e-06, "loss": 0.0341, "step": 106150 }, { "epoch": 1.6293454071061315, "grad_norm": 0.5001541376113892, "learning_rate": 2.0205257272027223e-06, "loss": 0.0213, "step": 106160 }, { "epoch": 1.6294988872688205, "grad_norm": 0.40080350637435913, "learning_rate": 2.01891146794856e-06, "loss": 0.0212, "step": 106170 }, { "epoch": 1.6296523674315093, "grad_norm": 0.36946502327919006, "learning_rate": 2.017297781386718e-06, "loss": 0.022, "step": 106180 }, { "epoch": 1.6298058475941986, "grad_norm": 0.349904328584671, "learning_rate": 2.0156846676329945e-06, "loss": 0.025, "step": 106190 }, { "epoch": 1.6299593277568873, "grad_norm": 0.27443811297416687, "learning_rate": 2.0140721268031382e-06, "loss": 0.0175, "step": 106200 }, { "epoch": 1.6301128079195764, "grad_norm": 0.38778984546661377, "learning_rate": 2.01246015901286e-06, "loss": 0.027, "step": 106210 }, { "epoch": 1.6302662880822654, "grad_norm": 0.4518294930458069, "learning_rate": 2.0108487643778273e-06, "loss": 0.0261, "step": 106220 }, { "epoch": 1.6304197682449544, "grad_norm": 0.406787246465683, "learning_rate": 2.0092379430136676e-06, "loss": 0.0164, "step": 106230 }, { "epoch": 1.6305732484076434, "grad_norm": 0.33830970525741577, "learning_rate": 2.0076276950359687e-06, "loss": 0.0226, "step": 106240 }, { "epoch": 1.6307267285703322, "grad_norm": 0.27554044127464294, "learning_rate": 2.006018020560272e-06, "loss": 0.0264, "step": 106250 }, { "epoch": 1.6308802087330214, "grad_norm": 0.3619532883167267, "learning_rate": 2.0044089197020866e-06, "loss": 0.0224, "step": 106260 }, { "epoch": 1.6310336888957102, "grad_norm": 0.2457503080368042, "learning_rate": 2.002800392576876e-06, "loss": 0.0187, "step": 106270 }, { "epoch": 1.6311871690583992, "grad_norm": 0.4179069399833679, "learning_rate": 2.0011924393000536e-06, "loss": 0.0241, "step": 106280 }, { "epoch": 1.6313406492210882, "grad_norm": 0.20606118440628052, "learning_rate": 1.999585059987008e-06, "loss": 0.0233, "step": 106290 }, { "epoch": 1.631494129383777, "grad_norm": 0.5785287022590637, "learning_rate": 1.997978254753077e-06, "loss": 0.0277, "step": 106300 }, { "epoch": 1.6316476095464663, "grad_norm": 0.431242972612381, "learning_rate": 1.9963720237135574e-06, "loss": 0.0249, "step": 106310 }, { "epoch": 1.631801089709155, "grad_norm": 0.4242497980594635, "learning_rate": 1.9947663669837056e-06, "loss": 0.0201, "step": 106320 }, { "epoch": 1.631954569871844, "grad_norm": 0.5670700073242188, "learning_rate": 1.9931612846787398e-06, "loss": 0.0235, "step": 106330 }, { "epoch": 1.632108050034533, "grad_norm": 0.45250168442726135, "learning_rate": 1.9915567769138334e-06, "loss": 0.0258, "step": 106340 }, { "epoch": 1.6322615301972219, "grad_norm": 0.3653561472892761, "learning_rate": 1.989952843804116e-06, "loss": 0.027, "step": 106350 }, { "epoch": 1.632415010359911, "grad_norm": 0.43703997135162354, "learning_rate": 1.9883494854646856e-06, "loss": 0.03, "step": 106360 }, { "epoch": 1.6325684905226, "grad_norm": 0.2534317076206207, "learning_rate": 1.9867467020105936e-06, "loss": 0.0187, "step": 106370 }, { "epoch": 1.632721970685289, "grad_norm": 0.4157889783382416, "learning_rate": 1.985144493556842e-06, "loss": 0.0272, "step": 106380 }, { "epoch": 1.632875450847978, "grad_norm": 0.35975801944732666, "learning_rate": 1.983542860218406e-06, "loss": 0.0354, "step": 106390 }, { "epoch": 1.6330289310106667, "grad_norm": 0.41366055607795715, "learning_rate": 1.98194180211021e-06, "loss": 0.0214, "step": 106400 }, { "epoch": 1.633182411173356, "grad_norm": 0.4302283823490143, "learning_rate": 1.980341319347138e-06, "loss": 0.0365, "step": 106410 }, { "epoch": 1.6333358913360447, "grad_norm": 0.30600613355636597, "learning_rate": 1.978741412044042e-06, "loss": 0.0241, "step": 106420 }, { "epoch": 1.6334893714987337, "grad_norm": 0.44233137369155884, "learning_rate": 1.977142080315717e-06, "loss": 0.0268, "step": 106430 }, { "epoch": 1.6336428516614228, "grad_norm": 0.4252545237541199, "learning_rate": 1.975543324276926e-06, "loss": 0.0308, "step": 106440 }, { "epoch": 1.6337963318241118, "grad_norm": 0.41296401619911194, "learning_rate": 1.973945144042394e-06, "loss": 0.0269, "step": 106450 }, { "epoch": 1.6339498119868008, "grad_norm": 0.26213234663009644, "learning_rate": 1.9723475397267965e-06, "loss": 0.0244, "step": 106460 }, { "epoch": 1.6341032921494896, "grad_norm": 0.3206694424152374, "learning_rate": 1.9707505114447744e-06, "loss": 0.0236, "step": 106470 }, { "epoch": 1.6342567723121788, "grad_norm": 0.3445385694503784, "learning_rate": 1.9691540593109217e-06, "loss": 0.0222, "step": 106480 }, { "epoch": 1.6344102524748676, "grad_norm": 0.5509150624275208, "learning_rate": 1.967558183439795e-06, "loss": 0.0324, "step": 106490 }, { "epoch": 1.6345637326375566, "grad_norm": 0.2903452217578888, "learning_rate": 1.9659628839459076e-06, "loss": 0.0253, "step": 106500 }, { "epoch": 1.6347172128002456, "grad_norm": 0.43741053342819214, "learning_rate": 1.9643681609437305e-06, "loss": 0.0227, "step": 106510 }, { "epoch": 1.6348706929629344, "grad_norm": 0.3559347689151764, "learning_rate": 1.9627740145477013e-06, "loss": 0.028, "step": 106520 }, { "epoch": 1.6350241731256236, "grad_norm": 0.5625346302986145, "learning_rate": 1.9611804448722027e-06, "loss": 0.0289, "step": 106530 }, { "epoch": 1.6351776532883124, "grad_norm": 0.36228156089782715, "learning_rate": 1.9595874520315825e-06, "loss": 0.022, "step": 106540 }, { "epoch": 1.6353311334510015, "grad_norm": 0.3756512403488159, "learning_rate": 1.9579950361401555e-06, "loss": 0.0282, "step": 106550 }, { "epoch": 1.6354846136136905, "grad_norm": 0.6010466814041138, "learning_rate": 1.9564031973121813e-06, "loss": 0.0325, "step": 106560 }, { "epoch": 1.6356380937763793, "grad_norm": 0.44146454334259033, "learning_rate": 1.9548119356618865e-06, "loss": 0.0252, "step": 106570 }, { "epoch": 1.6357915739390685, "grad_norm": 0.2268381118774414, "learning_rate": 1.9532212513034533e-06, "loss": 0.0195, "step": 106580 }, { "epoch": 1.6359450541017573, "grad_norm": 0.4251086711883545, "learning_rate": 1.9516311443510227e-06, "loss": 0.0249, "step": 106590 }, { "epoch": 1.6360985342644463, "grad_norm": 0.3869298994541168, "learning_rate": 1.9500416149186953e-06, "loss": 0.0227, "step": 106600 }, { "epoch": 1.6362520144271353, "grad_norm": 0.41295814514160156, "learning_rate": 1.948452663120528e-06, "loss": 0.0343, "step": 106610 }, { "epoch": 1.6364054945898243, "grad_norm": 0.5139352679252625, "learning_rate": 1.9468642890705404e-06, "loss": 0.022, "step": 106620 }, { "epoch": 1.6365589747525133, "grad_norm": 0.33191734552383423, "learning_rate": 1.945276492882707e-06, "loss": 0.0314, "step": 106630 }, { "epoch": 1.6367124549152021, "grad_norm": 0.4070596396923065, "learning_rate": 1.943689274670959e-06, "loss": 0.0354, "step": 106640 }, { "epoch": 1.6368659350778911, "grad_norm": 0.40859130024909973, "learning_rate": 1.9421026345491946e-06, "loss": 0.025, "step": 106650 }, { "epoch": 1.6370194152405801, "grad_norm": 0.3260469138622284, "learning_rate": 1.9405165726312647e-06, "loss": 0.0231, "step": 106660 }, { "epoch": 1.6371728954032692, "grad_norm": 0.4520599842071533, "learning_rate": 1.938931089030972e-06, "loss": 0.0287, "step": 106670 }, { "epoch": 1.6373263755659582, "grad_norm": 0.42213594913482666, "learning_rate": 1.9373461838620923e-06, "loss": 0.028, "step": 106680 }, { "epoch": 1.637479855728647, "grad_norm": 0.34173107147216797, "learning_rate": 1.9357618572383485e-06, "loss": 0.0251, "step": 106690 }, { "epoch": 1.6376333358913362, "grad_norm": 0.29478201270103455, "learning_rate": 1.9341781092734258e-06, "loss": 0.0268, "step": 106700 }, { "epoch": 1.637786816054025, "grad_norm": 0.34597963094711304, "learning_rate": 1.9325949400809727e-06, "loss": 0.0182, "step": 106710 }, { "epoch": 1.637940296216714, "grad_norm": 0.2490665167570114, "learning_rate": 1.931012349774586e-06, "loss": 0.0212, "step": 106720 }, { "epoch": 1.638093776379403, "grad_norm": 0.4150906801223755, "learning_rate": 1.9294303384678238e-06, "loss": 0.0236, "step": 106730 }, { "epoch": 1.6382472565420918, "grad_norm": 0.41846874356269836, "learning_rate": 1.9278489062742122e-06, "loss": 0.0249, "step": 106740 }, { "epoch": 1.638400736704781, "grad_norm": 0.3587222695350647, "learning_rate": 1.9262680533072266e-06, "loss": 0.0201, "step": 106750 }, { "epoch": 1.6385542168674698, "grad_norm": 0.31743451952934265, "learning_rate": 1.924687779680302e-06, "loss": 0.0253, "step": 106760 }, { "epoch": 1.6387076970301588, "grad_norm": 0.36725351214408875, "learning_rate": 1.9231080855068318e-06, "loss": 0.0188, "step": 106770 }, { "epoch": 1.6388611771928479, "grad_norm": 0.7513608336448669, "learning_rate": 1.9215289709001716e-06, "loss": 0.0387, "step": 106780 }, { "epoch": 1.6390146573555366, "grad_norm": 0.267650842666626, "learning_rate": 1.9199504359736298e-06, "loss": 0.0273, "step": 106790 }, { "epoch": 1.6391681375182259, "grad_norm": 0.6048822402954102, "learning_rate": 1.9183724808404734e-06, "loss": 0.0235, "step": 106800 }, { "epoch": 1.6393216176809147, "grad_norm": 0.4629482924938202, "learning_rate": 1.9167951056139413e-06, "loss": 0.0216, "step": 106810 }, { "epoch": 1.6394750978436037, "grad_norm": 0.25193825364112854, "learning_rate": 1.9152183104072086e-06, "loss": 0.0262, "step": 106820 }, { "epoch": 1.6396285780062927, "grad_norm": 0.3878280818462372, "learning_rate": 1.9136420953334223e-06, "loss": 0.0203, "step": 106830 }, { "epoch": 1.6397820581689817, "grad_norm": 0.46827176213264465, "learning_rate": 1.9120664605056903e-06, "loss": 0.0254, "step": 106840 }, { "epoch": 1.6399355383316707, "grad_norm": 0.39598751068115234, "learning_rate": 1.9104914060370705e-06, "loss": 0.0241, "step": 106850 }, { "epoch": 1.6400890184943595, "grad_norm": 0.3283708393573761, "learning_rate": 1.9089169320405844e-06, "loss": 0.0259, "step": 106860 }, { "epoch": 1.6402424986570487, "grad_norm": 0.3706662654876709, "learning_rate": 1.907343038629209e-06, "loss": 0.0259, "step": 106870 }, { "epoch": 1.6403959788197375, "grad_norm": 0.3029160797595978, "learning_rate": 1.9057697259158815e-06, "loss": 0.0245, "step": 106880 }, { "epoch": 1.6405494589824265, "grad_norm": 0.3634820878505707, "learning_rate": 1.9041969940134952e-06, "loss": 0.0248, "step": 106890 }, { "epoch": 1.6407029391451156, "grad_norm": 0.35113823413848877, "learning_rate": 1.9026248430349026e-06, "loss": 0.0224, "step": 106900 }, { "epoch": 1.6408564193078043, "grad_norm": 0.24759063124656677, "learning_rate": 1.9010532730929232e-06, "loss": 0.0269, "step": 106910 }, { "epoch": 1.6410098994704936, "grad_norm": 0.36631879210472107, "learning_rate": 1.899482284300318e-06, "loss": 0.0213, "step": 106920 }, { "epoch": 1.6411633796331824, "grad_norm": 0.2473556101322174, "learning_rate": 1.8979118767698146e-06, "loss": 0.0247, "step": 106930 }, { "epoch": 1.6413168597958714, "grad_norm": 0.35499271750450134, "learning_rate": 1.8963420506141061e-06, "loss": 0.0289, "step": 106940 }, { "epoch": 1.6414703399585604, "grad_norm": 0.3455429673194885, "learning_rate": 1.8947728059458336e-06, "loss": 0.0226, "step": 106950 }, { "epoch": 1.6416238201212492, "grad_norm": 0.32516783475875854, "learning_rate": 1.8932041428776017e-06, "loss": 0.0251, "step": 106960 }, { "epoch": 1.6417773002839384, "grad_norm": 0.44050973653793335, "learning_rate": 1.8916360615219687e-06, "loss": 0.023, "step": 106970 }, { "epoch": 1.6419307804466272, "grad_norm": 0.3994441628456116, "learning_rate": 1.890068561991456e-06, "loss": 0.0243, "step": 106980 }, { "epoch": 1.6420842606093162, "grad_norm": 0.3119370639324188, "learning_rate": 1.8885016443985394e-06, "loss": 0.0276, "step": 106990 }, { "epoch": 1.6422377407720052, "grad_norm": 0.4318019151687622, "learning_rate": 1.8869353088556586e-06, "loss": 0.0287, "step": 107000 }, { "epoch": 1.642391220934694, "grad_norm": 0.16875921189785004, "learning_rate": 1.8853695554752094e-06, "loss": 0.0267, "step": 107010 }, { "epoch": 1.6425447010973833, "grad_norm": 0.32795578241348267, "learning_rate": 1.883804384369534e-06, "loss": 0.0289, "step": 107020 }, { "epoch": 1.642698181260072, "grad_norm": 0.33340996503829956, "learning_rate": 1.8822397956509541e-06, "loss": 0.0264, "step": 107030 }, { "epoch": 1.642851661422761, "grad_norm": 0.301371306180954, "learning_rate": 1.880675789431733e-06, "loss": 0.0279, "step": 107040 }, { "epoch": 1.64300514158545, "grad_norm": 0.30050283670425415, "learning_rate": 1.8791123658240995e-06, "loss": 0.0255, "step": 107050 }, { "epoch": 1.643158621748139, "grad_norm": 0.4302595257759094, "learning_rate": 1.8775495249402386e-06, "loss": 0.0246, "step": 107060 }, { "epoch": 1.643312101910828, "grad_norm": 0.5285462141036987, "learning_rate": 1.875987266892294e-06, "loss": 0.0278, "step": 107070 }, { "epoch": 1.643465582073517, "grad_norm": 0.3025965392589569, "learning_rate": 1.8744255917923658e-06, "loss": 0.0245, "step": 107080 }, { "epoch": 1.6436190622362061, "grad_norm": 0.2565116286277771, "learning_rate": 1.8728644997525136e-06, "loss": 0.0257, "step": 107090 }, { "epoch": 1.643772542398895, "grad_norm": 0.40459972620010376, "learning_rate": 1.8713039908847596e-06, "loss": 0.021, "step": 107100 }, { "epoch": 1.643926022561584, "grad_norm": 0.30177372694015503, "learning_rate": 1.869744065301079e-06, "loss": 0.0231, "step": 107110 }, { "epoch": 1.644079502724273, "grad_norm": 0.43088313937187195, "learning_rate": 1.8681847231133988e-06, "loss": 0.0205, "step": 107120 }, { "epoch": 1.6442329828869617, "grad_norm": 0.30714789032936096, "learning_rate": 1.8666259644336205e-06, "loss": 0.0244, "step": 107130 }, { "epoch": 1.644386463049651, "grad_norm": 0.38250768184661865, "learning_rate": 1.8650677893735903e-06, "loss": 0.0232, "step": 107140 }, { "epoch": 1.6445399432123398, "grad_norm": 0.35948073863983154, "learning_rate": 1.8635101980451176e-06, "loss": 0.0223, "step": 107150 }, { "epoch": 1.6446934233750288, "grad_norm": 0.2986114025115967, "learning_rate": 1.8619531905599686e-06, "loss": 0.0231, "step": 107160 }, { "epoch": 1.6448469035377178, "grad_norm": 0.3265507221221924, "learning_rate": 1.8603967670298696e-06, "loss": 0.0222, "step": 107170 }, { "epoch": 1.6450003837004066, "grad_norm": 0.3426632583141327, "learning_rate": 1.858840927566502e-06, "loss": 0.0289, "step": 107180 }, { "epoch": 1.6451538638630958, "grad_norm": 0.3866425156593323, "learning_rate": 1.8572856722815058e-06, "loss": 0.0235, "step": 107190 }, { "epoch": 1.6453073440257846, "grad_norm": 0.4326457977294922, "learning_rate": 1.8557310012864837e-06, "loss": 0.0263, "step": 107200 }, { "epoch": 1.6454608241884736, "grad_norm": 0.37397119402885437, "learning_rate": 1.8541769146929944e-06, "loss": 0.0217, "step": 107210 }, { "epoch": 1.6456143043511626, "grad_norm": 0.5917571187019348, "learning_rate": 1.8526234126125453e-06, "loss": 0.0301, "step": 107220 }, { "epoch": 1.6457677845138516, "grad_norm": 0.4035884141921997, "learning_rate": 1.8510704951566161e-06, "loss": 0.0221, "step": 107230 }, { "epoch": 1.6459212646765407, "grad_norm": 0.2482423037290573, "learning_rate": 1.8495181624366364e-06, "loss": 0.0245, "step": 107240 }, { "epoch": 1.6460747448392294, "grad_norm": 0.3220460116863251, "learning_rate": 1.8479664145639942e-06, "loss": 0.0182, "step": 107250 }, { "epoch": 1.6462282250019185, "grad_norm": 0.3473144471645355, "learning_rate": 1.8464152516500433e-06, "loss": 0.0332, "step": 107260 }, { "epoch": 1.6463817051646075, "grad_norm": 0.3696308135986328, "learning_rate": 1.8448646738060816e-06, "loss": 0.0305, "step": 107270 }, { "epoch": 1.6465351853272965, "grad_norm": 0.4500039517879486, "learning_rate": 1.8433146811433732e-06, "loss": 0.0266, "step": 107280 }, { "epoch": 1.6466886654899855, "grad_norm": 0.3326631188392639, "learning_rate": 1.841765273773145e-06, "loss": 0.0236, "step": 107290 }, { "epoch": 1.6468421456526743, "grad_norm": 0.4153326749801636, "learning_rate": 1.8402164518065723e-06, "loss": 0.0304, "step": 107300 }, { "epoch": 1.6469956258153635, "grad_norm": 0.2762555480003357, "learning_rate": 1.8386682153547953e-06, "loss": 0.0193, "step": 107310 }, { "epoch": 1.6471491059780523, "grad_norm": 0.3453510105609894, "learning_rate": 1.8371205645289069e-06, "loss": 0.0253, "step": 107320 }, { "epoch": 1.6473025861407413, "grad_norm": 0.303497850894928, "learning_rate": 1.8355734994399622e-06, "loss": 0.0262, "step": 107330 }, { "epoch": 1.6474560663034303, "grad_norm": 0.3673863708972931, "learning_rate": 1.8340270201989718e-06, "loss": 0.0289, "step": 107340 }, { "epoch": 1.6476095464661191, "grad_norm": 0.2935637831687927, "learning_rate": 1.832481126916903e-06, "loss": 0.0205, "step": 107350 }, { "epoch": 1.6477630266288084, "grad_norm": 0.23090532422065735, "learning_rate": 1.8309358197046911e-06, "loss": 0.0203, "step": 107360 }, { "epoch": 1.6479165067914971, "grad_norm": 0.3716611862182617, "learning_rate": 1.8293910986732132e-06, "loss": 0.0243, "step": 107370 }, { "epoch": 1.6480699869541862, "grad_norm": 0.330752968788147, "learning_rate": 1.8278469639333129e-06, "loss": 0.0248, "step": 107380 }, { "epoch": 1.6482234671168752, "grad_norm": 0.4211586117744446, "learning_rate": 1.826303415595796e-06, "loss": 0.0246, "step": 107390 }, { "epoch": 1.648376947279564, "grad_norm": 0.3648420572280884, "learning_rate": 1.8247604537714193e-06, "loss": 0.0324, "step": 107400 }, { "epoch": 1.6485304274422532, "grad_norm": 0.24269267916679382, "learning_rate": 1.8232180785709007e-06, "loss": 0.0213, "step": 107410 }, { "epoch": 1.648683907604942, "grad_norm": 0.37443798780441284, "learning_rate": 1.8216762901049146e-06, "loss": 0.0218, "step": 107420 }, { "epoch": 1.648837387767631, "grad_norm": 0.36055365204811096, "learning_rate": 1.8201350884840928e-06, "loss": 0.0254, "step": 107430 }, { "epoch": 1.64899086793032, "grad_norm": 0.31922921538352966, "learning_rate": 1.8185944738190275e-06, "loss": 0.0184, "step": 107440 }, { "epoch": 1.649144348093009, "grad_norm": 0.3048839569091797, "learning_rate": 1.8170544462202644e-06, "loss": 0.0215, "step": 107450 }, { "epoch": 1.649297828255698, "grad_norm": 0.5098547339439392, "learning_rate": 1.8155150057983162e-06, "loss": 0.0275, "step": 107460 }, { "epoch": 1.6494513084183868, "grad_norm": 0.45039838552474976, "learning_rate": 1.8139761526636412e-06, "loss": 0.0274, "step": 107470 }, { "epoch": 1.6496047885810758, "grad_norm": 0.5118077397346497, "learning_rate": 1.8124378869266623e-06, "loss": 0.0211, "step": 107480 }, { "epoch": 1.6497582687437649, "grad_norm": 0.3182307779788971, "learning_rate": 1.8109002086977622e-06, "loss": 0.0213, "step": 107490 }, { "epoch": 1.6499117489064539, "grad_norm": 0.37783586978912354, "learning_rate": 1.8093631180872783e-06, "loss": 0.0254, "step": 107500 }, { "epoch": 1.6500652290691429, "grad_norm": 0.48936769366264343, "learning_rate": 1.8078266152055046e-06, "loss": 0.0262, "step": 107510 }, { "epoch": 1.6502187092318317, "grad_norm": 0.30357152223587036, "learning_rate": 1.806290700162696e-06, "loss": 0.0265, "step": 107520 }, { "epoch": 1.650372189394521, "grad_norm": 0.33059170842170715, "learning_rate": 1.8047553730690648e-06, "loss": 0.0201, "step": 107530 }, { "epoch": 1.6505256695572097, "grad_norm": 0.4251563251018524, "learning_rate": 1.8032206340347746e-06, "loss": 0.0232, "step": 107540 }, { "epoch": 1.6506791497198987, "grad_norm": 0.2855939567089081, "learning_rate": 1.80168648316996e-06, "loss": 0.0208, "step": 107550 }, { "epoch": 1.6508326298825877, "grad_norm": 0.4520625174045563, "learning_rate": 1.8001529205847056e-06, "loss": 0.0259, "step": 107560 }, { "epoch": 1.6509861100452765, "grad_norm": 0.3446594178676605, "learning_rate": 1.7986199463890452e-06, "loss": 0.0252, "step": 107570 }, { "epoch": 1.6511395902079657, "grad_norm": 0.37325671315193176, "learning_rate": 1.7970875606929872e-06, "loss": 0.0177, "step": 107580 }, { "epoch": 1.6512930703706545, "grad_norm": 0.38957175612449646, "learning_rate": 1.7955557636064868e-06, "loss": 0.0333, "step": 107590 }, { "epoch": 1.6514465505333435, "grad_norm": 0.3929961025714874, "learning_rate": 1.7940245552394619e-06, "loss": 0.0197, "step": 107600 }, { "epoch": 1.6516000306960326, "grad_norm": 0.28093841671943665, "learning_rate": 1.7924939357017834e-06, "loss": 0.02, "step": 107610 }, { "epoch": 1.6517535108587214, "grad_norm": 0.4049507677555084, "learning_rate": 1.7909639051032846e-06, "loss": 0.0213, "step": 107620 }, { "epoch": 1.6519069910214106, "grad_norm": 0.3568515181541443, "learning_rate": 1.789434463553754e-06, "loss": 0.026, "step": 107630 }, { "epoch": 1.6520604711840994, "grad_norm": 0.2551038861274719, "learning_rate": 1.7879056111629357e-06, "loss": 0.0198, "step": 107640 }, { "epoch": 1.6522139513467884, "grad_norm": 0.4261110723018646, "learning_rate": 1.786377348040539e-06, "loss": 0.0301, "step": 107650 }, { "epoch": 1.6523674315094774, "grad_norm": 0.2546996474266052, "learning_rate": 1.7848496742962274e-06, "loss": 0.0302, "step": 107660 }, { "epoch": 1.6525209116721664, "grad_norm": 0.39712318778038025, "learning_rate": 1.7833225900396133e-06, "loss": 0.0356, "step": 107670 }, { "epoch": 1.6526743918348554, "grad_norm": 0.4220854341983795, "learning_rate": 1.7817960953802805e-06, "loss": 0.023, "step": 107680 }, { "epoch": 1.6528278719975442, "grad_norm": 0.2589690387248993, "learning_rate": 1.780270190427763e-06, "loss": 0.0279, "step": 107690 }, { "epoch": 1.6529813521602335, "grad_norm": 0.3846116065979004, "learning_rate": 1.7787448752915537e-06, "loss": 0.0251, "step": 107700 }, { "epoch": 1.6531348323229222, "grad_norm": 0.3720710277557373, "learning_rate": 1.7772201500811038e-06, "loss": 0.0248, "step": 107710 }, { "epoch": 1.6532883124856113, "grad_norm": 0.27434176206588745, "learning_rate": 1.77569601490582e-06, "loss": 0.0232, "step": 107720 }, { "epoch": 1.6534417926483003, "grad_norm": 0.27760663628578186, "learning_rate": 1.7741724698750707e-06, "loss": 0.0357, "step": 107730 }, { "epoch": 1.653595272810989, "grad_norm": 0.40751293301582336, "learning_rate": 1.772649515098176e-06, "loss": 0.0267, "step": 107740 }, { "epoch": 1.6537487529736783, "grad_norm": 0.3922475278377533, "learning_rate": 1.7711271506844218e-06, "loss": 0.0257, "step": 107750 }, { "epoch": 1.653902233136367, "grad_norm": 0.46010902523994446, "learning_rate": 1.7696053767430488e-06, "loss": 0.0248, "step": 107760 }, { "epoch": 1.654055713299056, "grad_norm": 0.35597869753837585, "learning_rate": 1.768084193383245e-06, "loss": 0.023, "step": 107770 }, { "epoch": 1.654209193461745, "grad_norm": 0.4630354940891266, "learning_rate": 1.7665636007141719e-06, "loss": 0.0317, "step": 107780 }, { "epoch": 1.654362673624434, "grad_norm": 0.5258059501647949, "learning_rate": 1.7650435988449389e-06, "loss": 0.0198, "step": 107790 }, { "epoch": 1.6545161537871231, "grad_norm": 0.3785555362701416, "learning_rate": 1.7635241878846144e-06, "loss": 0.0201, "step": 107800 }, { "epoch": 1.654669633949812, "grad_norm": 0.2986926734447479, "learning_rate": 1.762005367942231e-06, "loss": 0.0286, "step": 107810 }, { "epoch": 1.654823114112501, "grad_norm": 0.5004895329475403, "learning_rate": 1.7604871391267675e-06, "loss": 0.0279, "step": 107820 }, { "epoch": 1.65497659427519, "grad_norm": 0.4052251875400543, "learning_rate": 1.7589695015471653e-06, "loss": 0.0251, "step": 107830 }, { "epoch": 1.6551300744378787, "grad_norm": 0.35904064774513245, "learning_rate": 1.7574524553123306e-06, "loss": 0.0189, "step": 107840 }, { "epoch": 1.655283554600568, "grad_norm": 0.23380498588085175, "learning_rate": 1.7559360005311165e-06, "loss": 0.0301, "step": 107850 }, { "epoch": 1.6554370347632568, "grad_norm": 0.386162668466568, "learning_rate": 1.754420137312338e-06, "loss": 0.0227, "step": 107860 }, { "epoch": 1.6555905149259458, "grad_norm": 0.3644328713417053, "learning_rate": 1.7529048657647697e-06, "loss": 0.0226, "step": 107870 }, { "epoch": 1.6557439950886348, "grad_norm": 0.5014153718948364, "learning_rate": 1.7513901859971405e-06, "loss": 0.0257, "step": 107880 }, { "epoch": 1.6558974752513238, "grad_norm": 0.33719751238822937, "learning_rate": 1.7498760981181373e-06, "loss": 0.0255, "step": 107890 }, { "epoch": 1.6560509554140128, "grad_norm": 0.42550110816955566, "learning_rate": 1.748362602236403e-06, "loss": 0.0204, "step": 107900 }, { "epoch": 1.6562044355767016, "grad_norm": 0.25705665349960327, "learning_rate": 1.7468496984605488e-06, "loss": 0.028, "step": 107910 }, { "epoch": 1.6563579157393908, "grad_norm": 0.35927313566207886, "learning_rate": 1.7453373868991252e-06, "loss": 0.0248, "step": 107920 }, { "epoch": 1.6565113959020796, "grad_norm": 0.3152009844779968, "learning_rate": 1.7438256676606535e-06, "loss": 0.0238, "step": 107930 }, { "epoch": 1.6566648760647686, "grad_norm": 0.2806810736656189, "learning_rate": 1.7423145408536102e-06, "loss": 0.0185, "step": 107940 }, { "epoch": 1.6568183562274577, "grad_norm": 0.28822293877601624, "learning_rate": 1.7408040065864274e-06, "loss": 0.0256, "step": 107950 }, { "epoch": 1.6569718363901464, "grad_norm": 0.3653124272823334, "learning_rate": 1.7392940649674949e-06, "loss": 0.0237, "step": 107960 }, { "epoch": 1.6571253165528357, "grad_norm": 0.4033604562282562, "learning_rate": 1.7377847161051608e-06, "loss": 0.0245, "step": 107970 }, { "epoch": 1.6572787967155245, "grad_norm": 0.29421254992485046, "learning_rate": 1.7362759601077295e-06, "loss": 0.0194, "step": 107980 }, { "epoch": 1.6574322768782135, "grad_norm": 0.3548316955566406, "learning_rate": 1.734767797083463e-06, "loss": 0.0265, "step": 107990 }, { "epoch": 1.6575857570409025, "grad_norm": 0.2548113167285919, "learning_rate": 1.7332602271405808e-06, "loss": 0.0253, "step": 108000 }, { "epoch": 1.6577392372035913, "grad_norm": 0.449895441532135, "learning_rate": 1.731753250387266e-06, "loss": 0.0276, "step": 108010 }, { "epoch": 1.6578927173662805, "grad_norm": 0.33265477418899536, "learning_rate": 1.7302468669316453e-06, "loss": 0.0244, "step": 108020 }, { "epoch": 1.6580461975289693, "grad_norm": 0.5888316035270691, "learning_rate": 1.7287410768818136e-06, "loss": 0.027, "step": 108030 }, { "epoch": 1.6581996776916583, "grad_norm": 0.32837778329849243, "learning_rate": 1.7272358803458244e-06, "loss": 0.024, "step": 108040 }, { "epoch": 1.6583531578543473, "grad_norm": 0.3166445195674896, "learning_rate": 1.7257312774316814e-06, "loss": 0.0176, "step": 108050 }, { "epoch": 1.6585066380170363, "grad_norm": 0.3050139248371124, "learning_rate": 1.7242272682473505e-06, "loss": 0.0254, "step": 108060 }, { "epoch": 1.6586601181797254, "grad_norm": 0.3009578287601471, "learning_rate": 1.7227238529007528e-06, "loss": 0.0252, "step": 108070 }, { "epoch": 1.6588135983424142, "grad_norm": 0.396990567445755, "learning_rate": 1.721221031499768e-06, "loss": 0.0238, "step": 108080 }, { "epoch": 1.6589670785051032, "grad_norm": 0.2103431522846222, "learning_rate": 1.7197188041522294e-06, "loss": 0.0316, "step": 108090 }, { "epoch": 1.6591205586677922, "grad_norm": 0.5410295128822327, "learning_rate": 1.7182171709659379e-06, "loss": 0.0251, "step": 108100 }, { "epoch": 1.6592740388304812, "grad_norm": 0.4014199376106262, "learning_rate": 1.7167161320486437e-06, "loss": 0.0263, "step": 108110 }, { "epoch": 1.6594275189931702, "grad_norm": 0.29069867730140686, "learning_rate": 1.7152156875080484e-06, "loss": 0.0266, "step": 108120 }, { "epoch": 1.659580999155859, "grad_norm": 0.31416264176368713, "learning_rate": 1.713715837451826e-06, "loss": 0.0292, "step": 108130 }, { "epoch": 1.6597344793185482, "grad_norm": 0.3668708801269531, "learning_rate": 1.7122165819875957e-06, "loss": 0.0215, "step": 108140 }, { "epoch": 1.659887959481237, "grad_norm": 0.35765987634658813, "learning_rate": 1.7107179212229409e-06, "loss": 0.0255, "step": 108150 }, { "epoch": 1.660041439643926, "grad_norm": 0.45333513617515564, "learning_rate": 1.7092198552653983e-06, "loss": 0.0262, "step": 108160 }, { "epoch": 1.660194919806615, "grad_norm": 0.3981154263019562, "learning_rate": 1.7077223842224633e-06, "loss": 0.0279, "step": 108170 }, { "epoch": 1.6603483999693038, "grad_norm": 0.34766584634780884, "learning_rate": 1.7062255082015888e-06, "loss": 0.0254, "step": 108180 }, { "epoch": 1.660501880131993, "grad_norm": 0.2752488851547241, "learning_rate": 1.7047292273101835e-06, "loss": 0.0227, "step": 108190 }, { "epoch": 1.6606553602946819, "grad_norm": 0.46217674016952515, "learning_rate": 1.7032335416556178e-06, "loss": 0.0259, "step": 108200 }, { "epoch": 1.6608088404573709, "grad_norm": 0.28690001368522644, "learning_rate": 1.7017384513452184e-06, "loss": 0.0216, "step": 108210 }, { "epoch": 1.6609623206200599, "grad_norm": 0.40055033564567566, "learning_rate": 1.7002439564862584e-06, "loss": 0.0331, "step": 108220 }, { "epoch": 1.6611158007827487, "grad_norm": 0.30545344948768616, "learning_rate": 1.6987500571859849e-06, "loss": 0.0249, "step": 108230 }, { "epoch": 1.661269280945438, "grad_norm": 0.2979290187358856, "learning_rate": 1.6972567535515916e-06, "loss": 0.0243, "step": 108240 }, { "epoch": 1.6614227611081267, "grad_norm": 0.3800571858882904, "learning_rate": 1.6957640456902324e-06, "loss": 0.0256, "step": 108250 }, { "epoch": 1.6615762412708157, "grad_norm": 0.44283461570739746, "learning_rate": 1.6942719337090197e-06, "loss": 0.0271, "step": 108260 }, { "epoch": 1.6617297214335047, "grad_norm": 0.367459774017334, "learning_rate": 1.6927804177150198e-06, "loss": 0.0221, "step": 108270 }, { "epoch": 1.6618832015961937, "grad_norm": 0.3936406373977661, "learning_rate": 1.6912894978152583e-06, "loss": 0.0248, "step": 108280 }, { "epoch": 1.6620366817588828, "grad_norm": 0.376162052154541, "learning_rate": 1.6897991741167163e-06, "loss": 0.0275, "step": 108290 }, { "epoch": 1.6621901619215715, "grad_norm": 0.45166918635368347, "learning_rate": 1.6883094467263383e-06, "loss": 0.0297, "step": 108300 }, { "epoch": 1.6623436420842608, "grad_norm": 0.2577438950538635, "learning_rate": 1.686820315751021e-06, "loss": 0.0286, "step": 108310 }, { "epoch": 1.6624971222469496, "grad_norm": 0.3428652882575989, "learning_rate": 1.6853317812976122e-06, "loss": 0.023, "step": 108320 }, { "epoch": 1.6626506024096386, "grad_norm": 0.32371386885643005, "learning_rate": 1.6838438434729309e-06, "loss": 0.0242, "step": 108330 }, { "epoch": 1.6628040825723276, "grad_norm": 0.47489142417907715, "learning_rate": 1.682356502383743e-06, "loss": 0.0272, "step": 108340 }, { "epoch": 1.6629575627350164, "grad_norm": 0.3161742091178894, "learning_rate": 1.6808697581367705e-06, "loss": 0.0318, "step": 108350 }, { "epoch": 1.6631110428977056, "grad_norm": 0.22713357210159302, "learning_rate": 1.6793836108387062e-06, "loss": 0.0254, "step": 108360 }, { "epoch": 1.6632645230603944, "grad_norm": 0.44740718603134155, "learning_rate": 1.6778980605961814e-06, "loss": 0.0245, "step": 108370 }, { "epoch": 1.6634180032230834, "grad_norm": 0.2882162928581238, "learning_rate": 1.676413107515794e-06, "loss": 0.0229, "step": 108380 }, { "epoch": 1.6635714833857724, "grad_norm": 0.499071329832077, "learning_rate": 1.6749287517041036e-06, "loss": 0.0276, "step": 108390 }, { "epoch": 1.6637249635484612, "grad_norm": 0.4164299964904785, "learning_rate": 1.67344499326762e-06, "loss": 0.025, "step": 108400 }, { "epoch": 1.6638784437111505, "grad_norm": 0.4290817379951477, "learning_rate": 1.6719618323128106e-06, "loss": 0.0215, "step": 108410 }, { "epoch": 1.6640319238738392, "grad_norm": 0.3073110282421112, "learning_rate": 1.6704792689461014e-06, "loss": 0.0247, "step": 108420 }, { "epoch": 1.6641854040365283, "grad_norm": 0.408153772354126, "learning_rate": 1.6689973032738772e-06, "loss": 0.0247, "step": 108430 }, { "epoch": 1.6643388841992173, "grad_norm": 0.2516186535358429, "learning_rate": 1.6675159354024773e-06, "loss": 0.0307, "step": 108440 }, { "epoch": 1.664492364361906, "grad_norm": 0.33736780285835266, "learning_rate": 1.6660351654381958e-06, "loss": 0.0204, "step": 108450 }, { "epoch": 1.6646458445245953, "grad_norm": 0.5301443934440613, "learning_rate": 1.664554993487294e-06, "loss": 0.033, "step": 108460 }, { "epoch": 1.664799324687284, "grad_norm": 0.46601375937461853, "learning_rate": 1.6630754196559784e-06, "loss": 0.022, "step": 108470 }, { "epoch": 1.664952804849973, "grad_norm": 0.36897745728492737, "learning_rate": 1.661596444050415e-06, "loss": 0.0225, "step": 108480 }, { "epoch": 1.6651062850126621, "grad_norm": 0.45351678133010864, "learning_rate": 1.6601180667767359e-06, "loss": 0.0267, "step": 108490 }, { "epoch": 1.6652597651753511, "grad_norm": 0.32798516750335693, "learning_rate": 1.6586402879410212e-06, "loss": 0.023, "step": 108500 }, { "epoch": 1.6654132453380401, "grad_norm": 0.3564244210720062, "learning_rate": 1.65716310764931e-06, "loss": 0.0283, "step": 108510 }, { "epoch": 1.665566725500729, "grad_norm": 0.4315984547138214, "learning_rate": 1.6556865260075993e-06, "loss": 0.0244, "step": 108520 }, { "epoch": 1.6657202056634182, "grad_norm": 0.3297055661678314, "learning_rate": 1.6542105431218436e-06, "loss": 0.0278, "step": 108530 }, { "epoch": 1.665873685826107, "grad_norm": 0.31874579191207886, "learning_rate": 1.6527351590979524e-06, "loss": 0.0289, "step": 108540 }, { "epoch": 1.666027165988796, "grad_norm": 0.3238072693347931, "learning_rate": 1.651260374041792e-06, "loss": 0.0223, "step": 108550 }, { "epoch": 1.666180646151485, "grad_norm": 0.4471262991428375, "learning_rate": 1.649786188059196e-06, "loss": 0.0234, "step": 108560 }, { "epoch": 1.6663341263141738, "grad_norm": 0.5191412568092346, "learning_rate": 1.648312601255936e-06, "loss": 0.0242, "step": 108570 }, { "epoch": 1.666487606476863, "grad_norm": 0.33341315388679504, "learning_rate": 1.6468396137377529e-06, "loss": 0.0288, "step": 108580 }, { "epoch": 1.6666410866395518, "grad_norm": 0.3255692720413208, "learning_rate": 1.6453672256103482e-06, "loss": 0.0219, "step": 108590 }, { "epoch": 1.6667945668022408, "grad_norm": 0.3408708870410919, "learning_rate": 1.6438954369793702e-06, "loss": 0.0212, "step": 108600 }, { "epoch": 1.6669480469649298, "grad_norm": 0.4062584936618805, "learning_rate": 1.6424242479504314e-06, "loss": 0.0208, "step": 108610 }, { "epoch": 1.6671015271276186, "grad_norm": 0.47823068499565125, "learning_rate": 1.6409536586290963e-06, "loss": 0.017, "step": 108620 }, { "epoch": 1.6672550072903078, "grad_norm": 0.32671427726745605, "learning_rate": 1.6394836691208893e-06, "loss": 0.0215, "step": 108630 }, { "epoch": 1.6674084874529966, "grad_norm": 0.47324249148368835, "learning_rate": 1.6380142795312902e-06, "loss": 0.026, "step": 108640 }, { "epoch": 1.6675619676156856, "grad_norm": 0.36334115266799927, "learning_rate": 1.6365454899657407e-06, "loss": 0.0196, "step": 108650 }, { "epoch": 1.6677154477783747, "grad_norm": 0.42834004759788513, "learning_rate": 1.635077300529635e-06, "loss": 0.0304, "step": 108660 }, { "epoch": 1.6678689279410637, "grad_norm": 0.3606434762477875, "learning_rate": 1.633609711328318e-06, "loss": 0.0223, "step": 108670 }, { "epoch": 1.6680224081037527, "grad_norm": 0.36507025361061096, "learning_rate": 1.632142722467105e-06, "loss": 0.0301, "step": 108680 }, { "epoch": 1.6681758882664415, "grad_norm": 0.3249981105327606, "learning_rate": 1.63067633405126e-06, "loss": 0.0221, "step": 108690 }, { "epoch": 1.6683293684291305, "grad_norm": 0.6155100464820862, "learning_rate": 1.6292105461860052e-06, "loss": 0.0268, "step": 108700 }, { "epoch": 1.6684828485918195, "grad_norm": 0.296440988779068, "learning_rate": 1.6277453589765192e-06, "loss": 0.0321, "step": 108710 }, { "epoch": 1.6686363287545085, "grad_norm": 0.4255634546279907, "learning_rate": 1.6262807725279395e-06, "loss": 0.0237, "step": 108720 }, { "epoch": 1.6687898089171975, "grad_norm": 0.4072265923023224, "learning_rate": 1.624816786945358e-06, "loss": 0.021, "step": 108730 }, { "epoch": 1.6689432890798863, "grad_norm": 0.5051757097244263, "learning_rate": 1.6233534023338226e-06, "loss": 0.0311, "step": 108740 }, { "epoch": 1.6690967692425756, "grad_norm": 0.47473540902137756, "learning_rate": 1.6218906187983452e-06, "loss": 0.0292, "step": 108750 }, { "epoch": 1.6692502494052643, "grad_norm": 0.39381060004234314, "learning_rate": 1.6204284364438893e-06, "loss": 0.0217, "step": 108760 }, { "epoch": 1.6694037295679534, "grad_norm": 0.3759111762046814, "learning_rate": 1.618966855375368e-06, "loss": 0.0262, "step": 108770 }, { "epoch": 1.6695572097306424, "grad_norm": 0.29153111577033997, "learning_rate": 1.617505875697667e-06, "loss": 0.0234, "step": 108780 }, { "epoch": 1.6697106898933312, "grad_norm": 0.4988488554954529, "learning_rate": 1.616045497515617e-06, "loss": 0.0293, "step": 108790 }, { "epoch": 1.6698641700560204, "grad_norm": 0.5379105806350708, "learning_rate": 1.61458572093401e-06, "loss": 0.0264, "step": 108800 }, { "epoch": 1.6700176502187092, "grad_norm": 0.2565925121307373, "learning_rate": 1.613126546057594e-06, "loss": 0.0238, "step": 108810 }, { "epoch": 1.6701711303813982, "grad_norm": 0.37524935603141785, "learning_rate": 1.6116679729910733e-06, "loss": 0.0292, "step": 108820 }, { "epoch": 1.6703246105440872, "grad_norm": 0.43349316716194153, "learning_rate": 1.6102100018391098e-06, "loss": 0.0316, "step": 108830 }, { "epoch": 1.670478090706776, "grad_norm": 0.38114410638809204, "learning_rate": 1.6087526327063197e-06, "loss": 0.0271, "step": 108840 }, { "epoch": 1.6706315708694652, "grad_norm": 0.4617238938808441, "learning_rate": 1.6072958656972826e-06, "loss": 0.0205, "step": 108850 }, { "epoch": 1.670785051032154, "grad_norm": 0.33249858021736145, "learning_rate": 1.6058397009165305e-06, "loss": 0.0301, "step": 108860 }, { "epoch": 1.670938531194843, "grad_norm": 0.3313247859477997, "learning_rate": 1.6043841384685444e-06, "loss": 0.0236, "step": 108870 }, { "epoch": 1.671092011357532, "grad_norm": 0.32690051198005676, "learning_rate": 1.6029291784577795e-06, "loss": 0.0258, "step": 108880 }, { "epoch": 1.671245491520221, "grad_norm": 0.24446025490760803, "learning_rate": 1.6014748209886333e-06, "loss": 0.0226, "step": 108890 }, { "epoch": 1.67139897168291, "grad_norm": 0.3889677822589874, "learning_rate": 1.6000210661654625e-06, "loss": 0.0269, "step": 108900 }, { "epoch": 1.6715524518455989, "grad_norm": 0.3065396845340729, "learning_rate": 1.5985679140925925e-06, "loss": 0.0216, "step": 108910 }, { "epoch": 1.6717059320082879, "grad_norm": 0.2897833287715912, "learning_rate": 1.5971153648742853e-06, "loss": 0.0191, "step": 108920 }, { "epoch": 1.6718594121709769, "grad_norm": 0.41860127449035645, "learning_rate": 1.5956634186147745e-06, "loss": 0.0305, "step": 108930 }, { "epoch": 1.672012892333666, "grad_norm": 0.34486332535743713, "learning_rate": 1.5942120754182466e-06, "loss": 0.0284, "step": 108940 }, { "epoch": 1.672166372496355, "grad_norm": 0.38872265815734863, "learning_rate": 1.592761335388846e-06, "loss": 0.0253, "step": 108950 }, { "epoch": 1.6723198526590437, "grad_norm": 0.32588911056518555, "learning_rate": 1.5913111986306695e-06, "loss": 0.0257, "step": 108960 }, { "epoch": 1.672473332821733, "grad_norm": 0.40126001834869385, "learning_rate": 1.5898616652477738e-06, "loss": 0.0292, "step": 108970 }, { "epoch": 1.6726268129844217, "grad_norm": 0.26602891087532043, "learning_rate": 1.5884127353441726e-06, "loss": 0.0261, "step": 108980 }, { "epoch": 1.6727802931471107, "grad_norm": 0.45737341046333313, "learning_rate": 1.5869644090238357e-06, "loss": 0.0279, "step": 108990 }, { "epoch": 1.6729337733097998, "grad_norm": 0.28782370686531067, "learning_rate": 1.5855166863906858e-06, "loss": 0.0234, "step": 109000 }, { "epoch": 1.6730872534724885, "grad_norm": 0.3747733533382416, "learning_rate": 1.584069567548614e-06, "loss": 0.0258, "step": 109010 }, { "epoch": 1.6732407336351778, "grad_norm": 0.45470231771469116, "learning_rate": 1.582623052601453e-06, "loss": 0.0276, "step": 109020 }, { "epoch": 1.6733942137978666, "grad_norm": 0.29049745202064514, "learning_rate": 1.5811771416529986e-06, "loss": 0.0212, "step": 109030 }, { "epoch": 1.6735476939605556, "grad_norm": 0.17811943590641022, "learning_rate": 1.579731834807009e-06, "loss": 0.0281, "step": 109040 }, { "epoch": 1.6737011741232446, "grad_norm": 0.26898297667503357, "learning_rate": 1.5782871321671921e-06, "loss": 0.0224, "step": 109050 }, { "epoch": 1.6738546542859334, "grad_norm": 0.3556258976459503, "learning_rate": 1.576843033837213e-06, "loss": 0.027, "step": 109060 }, { "epoch": 1.6740081344486226, "grad_norm": 0.460254430770874, "learning_rate": 1.5753995399206944e-06, "loss": 0.0269, "step": 109070 }, { "epoch": 1.6741616146113114, "grad_norm": 0.49079039692878723, "learning_rate": 1.5739566505212179e-06, "loss": 0.0207, "step": 109080 }, { "epoch": 1.6743150947740004, "grad_norm": 0.4050644636154175, "learning_rate": 1.5725143657423182e-06, "loss": 0.0185, "step": 109090 }, { "epoch": 1.6744685749366894, "grad_norm": 0.2510051727294922, "learning_rate": 1.5710726856874858e-06, "loss": 0.0253, "step": 109100 }, { "epoch": 1.6746220550993784, "grad_norm": 0.4454234540462494, "learning_rate": 1.5696316104601772e-06, "loss": 0.0246, "step": 109110 }, { "epoch": 1.6747755352620675, "grad_norm": 0.3562236428260803, "learning_rate": 1.5681911401637917e-06, "loss": 0.0199, "step": 109120 }, { "epoch": 1.6749290154247563, "grad_norm": 0.31189289689064026, "learning_rate": 1.5667512749016922e-06, "loss": 0.0225, "step": 109130 }, { "epoch": 1.6750824955874455, "grad_norm": 0.20161280035972595, "learning_rate": 1.5653120147772017e-06, "loss": 0.0174, "step": 109140 }, { "epoch": 1.6752359757501343, "grad_norm": 0.2469026744365692, "learning_rate": 1.5638733598935952e-06, "loss": 0.0269, "step": 109150 }, { "epoch": 1.6753894559128233, "grad_norm": 0.2692593038082123, "learning_rate": 1.5624353103541034e-06, "loss": 0.0226, "step": 109160 }, { "epoch": 1.6755429360755123, "grad_norm": 0.35899055004119873, "learning_rate": 1.5609978662619162e-06, "loss": 0.0231, "step": 109170 }, { "epoch": 1.675696416238201, "grad_norm": 0.38160449266433716, "learning_rate": 1.5595610277201777e-06, "loss": 0.0262, "step": 109180 }, { "epoch": 1.6758498964008903, "grad_norm": 0.3830433189868927, "learning_rate": 1.55812479483199e-06, "loss": 0.0186, "step": 109190 }, { "epoch": 1.6760033765635791, "grad_norm": 0.3298882842063904, "learning_rate": 1.5566891677004148e-06, "loss": 0.0251, "step": 109200 }, { "epoch": 1.6761568567262681, "grad_norm": 0.4766876697540283, "learning_rate": 1.5552541464284666e-06, "loss": 0.0274, "step": 109210 }, { "epoch": 1.6763103368889571, "grad_norm": 0.3710707724094391, "learning_rate": 1.5538197311191116e-06, "loss": 0.0304, "step": 109220 }, { "epoch": 1.676463817051646, "grad_norm": 0.33221447467803955, "learning_rate": 1.5523859218752834e-06, "loss": 0.0324, "step": 109230 }, { "epoch": 1.6766172972143352, "grad_norm": 0.33917906880378723, "learning_rate": 1.5509527187998651e-06, "loss": 0.0273, "step": 109240 }, { "epoch": 1.676770777377024, "grad_norm": 0.2944498360157013, "learning_rate": 1.549520121995699e-06, "loss": 0.0237, "step": 109250 }, { "epoch": 1.676924257539713, "grad_norm": 0.2319294661283493, "learning_rate": 1.5480881315655805e-06, "loss": 0.0264, "step": 109260 }, { "epoch": 1.677077737702402, "grad_norm": 0.34805822372436523, "learning_rate": 1.5466567476122651e-06, "loss": 0.0252, "step": 109270 }, { "epoch": 1.6772312178650908, "grad_norm": 0.20302116870880127, "learning_rate": 1.5452259702384641e-06, "loss": 0.0202, "step": 109280 }, { "epoch": 1.67738469802778, "grad_norm": 0.35471871495246887, "learning_rate": 1.54379579954684e-06, "loss": 0.0262, "step": 109290 }, { "epoch": 1.6775381781904688, "grad_norm": 0.3049379587173462, "learning_rate": 1.5423662356400237e-06, "loss": 0.0202, "step": 109300 }, { "epoch": 1.6776916583531578, "grad_norm": 0.3497775197029114, "learning_rate": 1.5409372786205923e-06, "loss": 0.0239, "step": 109310 }, { "epoch": 1.6778451385158468, "grad_norm": 0.4350874125957489, "learning_rate": 1.539508928591078e-06, "loss": 0.0193, "step": 109320 }, { "epoch": 1.6779986186785358, "grad_norm": 0.7429943680763245, "learning_rate": 1.538081185653979e-06, "loss": 0.029, "step": 109330 }, { "epoch": 1.6781520988412248, "grad_norm": 0.3539084196090698, "learning_rate": 1.5366540499117432e-06, "loss": 0.0264, "step": 109340 }, { "epoch": 1.6783055790039136, "grad_norm": 0.40653175115585327, "learning_rate": 1.5352275214667767e-06, "loss": 0.0251, "step": 109350 }, { "epoch": 1.6784590591666029, "grad_norm": 0.4822320342063904, "learning_rate": 1.5338016004214407e-06, "loss": 0.0243, "step": 109360 }, { "epoch": 1.6786125393292917, "grad_norm": 0.47634756565093994, "learning_rate": 1.5323762868780546e-06, "loss": 0.0268, "step": 109370 }, { "epoch": 1.6787660194919807, "grad_norm": 0.4362415671348572, "learning_rate": 1.530951580938892e-06, "loss": 0.0219, "step": 109380 }, { "epoch": 1.6789194996546697, "grad_norm": 0.4400516450405121, "learning_rate": 1.5295274827061846e-06, "loss": 0.0316, "step": 109390 }, { "epoch": 1.6790729798173585, "grad_norm": 0.44453999400138855, "learning_rate": 1.5281039922821229e-06, "loss": 0.023, "step": 109400 }, { "epoch": 1.6792264599800477, "grad_norm": 0.2904839515686035, "learning_rate": 1.5266811097688504e-06, "loss": 0.0278, "step": 109410 }, { "epoch": 1.6793799401427365, "grad_norm": 0.5565603971481323, "learning_rate": 1.5252588352684638e-06, "loss": 0.034, "step": 109420 }, { "epoch": 1.6795334203054255, "grad_norm": 0.5272695422172546, "learning_rate": 1.5238371688830245e-06, "loss": 0.0372, "step": 109430 }, { "epoch": 1.6796869004681145, "grad_norm": 0.3414779603481293, "learning_rate": 1.522416110714543e-06, "loss": 0.0215, "step": 109440 }, { "epoch": 1.6798403806308033, "grad_norm": 0.2898896038532257, "learning_rate": 1.520995660864989e-06, "loss": 0.0275, "step": 109450 }, { "epoch": 1.6799938607934926, "grad_norm": 0.27977409958839417, "learning_rate": 1.5195758194362943e-06, "loss": 0.0216, "step": 109460 }, { "epoch": 1.6801473409561813, "grad_norm": 0.30184414982795715, "learning_rate": 1.518156586530335e-06, "loss": 0.0223, "step": 109470 }, { "epoch": 1.6803008211188704, "grad_norm": 0.3955806493759155, "learning_rate": 1.5167379622489475e-06, "loss": 0.0259, "step": 109480 }, { "epoch": 1.6804543012815594, "grad_norm": 0.5279735326766968, "learning_rate": 1.515319946693935e-06, "loss": 0.0255, "step": 109490 }, { "epoch": 1.6806077814442484, "grad_norm": 0.41206714510917664, "learning_rate": 1.5139025399670438e-06, "loss": 0.0298, "step": 109500 }, { "epoch": 1.6807612616069374, "grad_norm": 0.2939909100532532, "learning_rate": 1.5124857421699824e-06, "loss": 0.0256, "step": 109510 }, { "epoch": 1.6809147417696262, "grad_norm": 0.2912394106388092, "learning_rate": 1.511069553404415e-06, "loss": 0.0313, "step": 109520 }, { "epoch": 1.6810682219323152, "grad_norm": 0.3355747163295746, "learning_rate": 1.5096539737719618e-06, "loss": 0.0259, "step": 109530 }, { "epoch": 1.6812217020950042, "grad_norm": 0.4163733422756195, "learning_rate": 1.508239003374199e-06, "loss": 0.0206, "step": 109540 }, { "epoch": 1.6813751822576932, "grad_norm": 0.31297823786735535, "learning_rate": 1.5068246423126576e-06, "loss": 0.0215, "step": 109550 }, { "epoch": 1.6815286624203822, "grad_norm": 0.3706381618976593, "learning_rate": 1.5054108906888342e-06, "loss": 0.0221, "step": 109560 }, { "epoch": 1.681682142583071, "grad_norm": 0.5165526866912842, "learning_rate": 1.5039977486041657e-06, "loss": 0.0286, "step": 109570 }, { "epoch": 1.6818356227457603, "grad_norm": 0.28651171922683716, "learning_rate": 1.502585216160055e-06, "loss": 0.0291, "step": 109580 }, { "epoch": 1.681989102908449, "grad_norm": 0.4461756944656372, "learning_rate": 1.5011732934578638e-06, "loss": 0.0299, "step": 109590 }, { "epoch": 1.682142583071138, "grad_norm": 0.30617380142211914, "learning_rate": 1.499761980598905e-06, "loss": 0.0254, "step": 109600 }, { "epoch": 1.682296063233827, "grad_norm": 0.2707558870315552, "learning_rate": 1.4983512776844489e-06, "loss": 0.0276, "step": 109610 }, { "epoch": 1.6824495433965159, "grad_norm": 0.36190685629844666, "learning_rate": 1.4969411848157212e-06, "loss": 0.0272, "step": 109620 }, { "epoch": 1.682603023559205, "grad_norm": 0.34669533371925354, "learning_rate": 1.4955317020939041e-06, "loss": 0.0238, "step": 109630 }, { "epoch": 1.682756503721894, "grad_norm": 0.5291652679443359, "learning_rate": 1.4941228296201394e-06, "loss": 0.0258, "step": 109640 }, { "epoch": 1.682909983884583, "grad_norm": 0.350070595741272, "learning_rate": 1.492714567495518e-06, "loss": 0.0244, "step": 109650 }, { "epoch": 1.683063464047272, "grad_norm": 0.3600931167602539, "learning_rate": 1.491306915821098e-06, "loss": 0.0281, "step": 109660 }, { "epoch": 1.6832169442099607, "grad_norm": 0.4326532781124115, "learning_rate": 1.4898998746978822e-06, "loss": 0.0238, "step": 109670 }, { "epoch": 1.68337042437265, "grad_norm": 0.31387779116630554, "learning_rate": 1.4884934442268328e-06, "loss": 0.0247, "step": 109680 }, { "epoch": 1.6835239045353387, "grad_norm": 0.486742228269577, "learning_rate": 1.487087624508875e-06, "loss": 0.0259, "step": 109690 }, { "epoch": 1.6836773846980277, "grad_norm": 0.28289687633514404, "learning_rate": 1.4856824156448823e-06, "loss": 0.0212, "step": 109700 }, { "epoch": 1.6838308648607168, "grad_norm": 0.4581383168697357, "learning_rate": 1.4842778177356875e-06, "loss": 0.0235, "step": 109710 }, { "epoch": 1.6839843450234058, "grad_norm": 0.34929102659225464, "learning_rate": 1.482873830882079e-06, "loss": 0.0253, "step": 109720 }, { "epoch": 1.6841378251860948, "grad_norm": 0.46080031991004944, "learning_rate": 1.4814704551848025e-06, "loss": 0.0294, "step": 109730 }, { "epoch": 1.6842913053487836, "grad_norm": 0.4974386990070343, "learning_rate": 1.4800676907445544e-06, "loss": 0.0256, "step": 109740 }, { "epoch": 1.6844447855114728, "grad_norm": 0.3428201377391815, "learning_rate": 1.4786655376619985e-06, "loss": 0.0262, "step": 109750 }, { "epoch": 1.6845982656741616, "grad_norm": 0.5564276576042175, "learning_rate": 1.4772639960377466e-06, "loss": 0.0243, "step": 109760 }, { "epoch": 1.6847517458368506, "grad_norm": 0.34663692116737366, "learning_rate": 1.4758630659723617e-06, "loss": 0.0228, "step": 109770 }, { "epoch": 1.6849052259995396, "grad_norm": 0.2503977417945862, "learning_rate": 1.4744627475663752e-06, "loss": 0.0189, "step": 109780 }, { "epoch": 1.6850587061622284, "grad_norm": 0.4683374762535095, "learning_rate": 1.4730630409202684e-06, "loss": 0.0242, "step": 109790 }, { "epoch": 1.6852121863249176, "grad_norm": 0.31692975759506226, "learning_rate": 1.4716639461344761e-06, "loss": 0.0181, "step": 109800 }, { "epoch": 1.6853656664876064, "grad_norm": 0.301074743270874, "learning_rate": 1.4702654633093937e-06, "loss": 0.0189, "step": 109810 }, { "epoch": 1.6855191466502955, "grad_norm": 0.4080883860588074, "learning_rate": 1.468867592545371e-06, "loss": 0.0212, "step": 109820 }, { "epoch": 1.6856726268129845, "grad_norm": 0.36018598079681396, "learning_rate": 1.467470333942712e-06, "loss": 0.0263, "step": 109830 }, { "epoch": 1.6858261069756733, "grad_norm": 0.2519809901714325, "learning_rate": 1.466073687601679e-06, "loss": 0.0228, "step": 109840 }, { "epoch": 1.6859795871383625, "grad_norm": 0.32103124260902405, "learning_rate": 1.464677653622496e-06, "loss": 0.0274, "step": 109850 }, { "epoch": 1.6861330673010513, "grad_norm": 0.32803255319595337, "learning_rate": 1.4632822321053285e-06, "loss": 0.0261, "step": 109860 }, { "epoch": 1.6862865474637403, "grad_norm": 0.38435888290405273, "learning_rate": 1.461887423150309e-06, "loss": 0.0304, "step": 109870 }, { "epoch": 1.6864400276264293, "grad_norm": 0.31619492173194885, "learning_rate": 1.4604932268575267e-06, "loss": 0.0226, "step": 109880 }, { "epoch": 1.686593507789118, "grad_norm": 0.3195808529853821, "learning_rate": 1.459099643327021e-06, "loss": 0.0216, "step": 109890 }, { "epoch": 1.6867469879518073, "grad_norm": 0.39254093170166016, "learning_rate": 1.4577066726587918e-06, "loss": 0.0192, "step": 109900 }, { "epoch": 1.6869004681144961, "grad_norm": 0.32532355189323425, "learning_rate": 1.4563143149527926e-06, "loss": 0.0249, "step": 109910 }, { "epoch": 1.6870539482771851, "grad_norm": 0.48640426993370056, "learning_rate": 1.4549225703089343e-06, "loss": 0.0221, "step": 109920 }, { "epoch": 1.6872074284398741, "grad_norm": 0.37468552589416504, "learning_rate": 1.4535314388270816e-06, "loss": 0.0308, "step": 109930 }, { "epoch": 1.6873609086025632, "grad_norm": 0.3005560636520386, "learning_rate": 1.4521409206070548e-06, "loss": 0.0303, "step": 109940 }, { "epoch": 1.6875143887652522, "grad_norm": 0.3138616681098938, "learning_rate": 1.4507510157486403e-06, "loss": 0.0291, "step": 109950 }, { "epoch": 1.687667868927941, "grad_norm": 0.3844030201435089, "learning_rate": 1.4493617243515622e-06, "loss": 0.0257, "step": 109960 }, { "epoch": 1.6878213490906302, "grad_norm": 0.5725147128105164, "learning_rate": 1.4479730465155184e-06, "loss": 0.0214, "step": 109970 }, { "epoch": 1.687974829253319, "grad_norm": 0.4734945595264435, "learning_rate": 1.4465849823401523e-06, "loss": 0.0231, "step": 109980 }, { "epoch": 1.688128309416008, "grad_norm": 0.3021191954612732, "learning_rate": 1.4451975319250654e-06, "loss": 0.0242, "step": 109990 }, { "epoch": 1.688281789578697, "grad_norm": 0.4559647738933563, "learning_rate": 1.443810695369816e-06, "loss": 0.0263, "step": 110000 }, { "epoch": 1.6884352697413858, "grad_norm": 0.9418560862541199, "learning_rate": 1.44242447277392e-06, "loss": 0.0387, "step": 110010 }, { "epoch": 1.688588749904075, "grad_norm": 0.2466694861650467, "learning_rate": 1.4410388642368444e-06, "loss": 0.018, "step": 110020 }, { "epoch": 1.6887422300667638, "grad_norm": 0.42001062631607056, "learning_rate": 1.4396538698580154e-06, "loss": 0.0289, "step": 110030 }, { "epoch": 1.6888957102294528, "grad_norm": 0.38779473304748535, "learning_rate": 1.4382694897368187e-06, "loss": 0.0223, "step": 110040 }, { "epoch": 1.6890491903921419, "grad_norm": 0.34441933035850525, "learning_rate": 1.436885723972592e-06, "loss": 0.0245, "step": 110050 }, { "epoch": 1.6892026705548306, "grad_norm": 0.6799321174621582, "learning_rate": 1.4355025726646221e-06, "loss": 0.0289, "step": 110060 }, { "epoch": 1.6893561507175199, "grad_norm": 0.25325649976730347, "learning_rate": 1.4341200359121654e-06, "loss": 0.0276, "step": 110070 }, { "epoch": 1.6895096308802087, "grad_norm": 0.39509961009025574, "learning_rate": 1.432738113814426e-06, "loss": 0.0245, "step": 110080 }, { "epoch": 1.6896631110428977, "grad_norm": 0.3993609845638275, "learning_rate": 1.431356806470563e-06, "loss": 0.0224, "step": 110090 }, { "epoch": 1.6898165912055867, "grad_norm": 0.41609981656074524, "learning_rate": 1.4299761139796964e-06, "loss": 0.0249, "step": 110100 }, { "epoch": 1.6899700713682757, "grad_norm": 0.4695713222026825, "learning_rate": 1.428596036440899e-06, "loss": 0.0243, "step": 110110 }, { "epoch": 1.6901235515309647, "grad_norm": 0.30183520913124084, "learning_rate": 1.4272165739531985e-06, "loss": 0.0222, "step": 110120 }, { "epoch": 1.6902770316936535, "grad_norm": 0.489186555147171, "learning_rate": 1.4258377266155776e-06, "loss": 0.033, "step": 110130 }, { "epoch": 1.6904305118563425, "grad_norm": 0.2766572833061218, "learning_rate": 1.424459494526983e-06, "loss": 0.0234, "step": 110140 }, { "epoch": 1.6905839920190315, "grad_norm": 0.3505306541919708, "learning_rate": 1.4230818777863108e-06, "loss": 0.028, "step": 110150 }, { "epoch": 1.6907374721817205, "grad_norm": 0.18072795867919922, "learning_rate": 1.421704876492407e-06, "loss": 0.0258, "step": 110160 }, { "epoch": 1.6908909523444096, "grad_norm": 0.28837451338768005, "learning_rate": 1.4203284907440862e-06, "loss": 0.0223, "step": 110170 }, { "epoch": 1.6910444325070983, "grad_norm": 0.3752061128616333, "learning_rate": 1.41895272064011e-06, "loss": 0.0248, "step": 110180 }, { "epoch": 1.6911979126697876, "grad_norm": 0.24329549074172974, "learning_rate": 1.4175775662791991e-06, "loss": 0.0235, "step": 110190 }, { "epoch": 1.6913513928324764, "grad_norm": 0.44891592860221863, "learning_rate": 1.416203027760028e-06, "loss": 0.0254, "step": 110200 }, { "epoch": 1.6915048729951654, "grad_norm": 0.4262787699699402, "learning_rate": 1.4148291051812301e-06, "loss": 0.0224, "step": 110210 }, { "epoch": 1.6916583531578544, "grad_norm": 0.34588751196861267, "learning_rate": 1.41345579864139e-06, "loss": 0.0195, "step": 110220 }, { "epoch": 1.6918118333205432, "grad_norm": 0.36635610461235046, "learning_rate": 1.412083108239055e-06, "loss": 0.0224, "step": 110230 }, { "epoch": 1.6919653134832324, "grad_norm": 0.3501182496547699, "learning_rate": 1.4107110340727225e-06, "loss": 0.0284, "step": 110240 }, { "epoch": 1.6921187936459212, "grad_norm": 0.28744035959243774, "learning_rate": 1.4093395762408468e-06, "loss": 0.0225, "step": 110250 }, { "epoch": 1.6922722738086102, "grad_norm": 0.33758825063705444, "learning_rate": 1.407968734841838e-06, "loss": 0.0266, "step": 110260 }, { "epoch": 1.6924257539712992, "grad_norm": 0.30360761284828186, "learning_rate": 1.4065985099740631e-06, "loss": 0.026, "step": 110270 }, { "epoch": 1.692579234133988, "grad_norm": 0.24316978454589844, "learning_rate": 1.4052289017358444e-06, "loss": 0.0217, "step": 110280 }, { "epoch": 1.6927327142966773, "grad_norm": 0.3657447397708893, "learning_rate": 1.403859910225457e-06, "loss": 0.0229, "step": 110290 }, { "epoch": 1.692886194459366, "grad_norm": 0.2129330039024353, "learning_rate": 1.4024915355411406e-06, "loss": 0.0262, "step": 110300 }, { "epoch": 1.693039674622055, "grad_norm": 0.43705469369888306, "learning_rate": 1.4011237777810793e-06, "loss": 0.023, "step": 110310 }, { "epoch": 1.693193154784744, "grad_norm": 0.2844933271408081, "learning_rate": 1.3997566370434169e-06, "loss": 0.0244, "step": 110320 }, { "epoch": 1.693346634947433, "grad_norm": 0.5336759090423584, "learning_rate": 1.39839011342626e-06, "loss": 0.0245, "step": 110330 }, { "epoch": 1.693500115110122, "grad_norm": 0.4142025113105774, "learning_rate": 1.3970242070276607e-06, "loss": 0.0292, "step": 110340 }, { "epoch": 1.693653595272811, "grad_norm": 0.3208700120449066, "learning_rate": 1.3956589179456337e-06, "loss": 0.0214, "step": 110350 }, { "epoch": 1.6938070754355, "grad_norm": 0.3380430340766907, "learning_rate": 1.394294246278145e-06, "loss": 0.0158, "step": 110360 }, { "epoch": 1.693960555598189, "grad_norm": 0.44938570261001587, "learning_rate": 1.3929301921231197e-06, "loss": 0.0189, "step": 110370 }, { "epoch": 1.694114035760878, "grad_norm": 0.304344117641449, "learning_rate": 1.3915667555784351e-06, "loss": 0.0273, "step": 110380 }, { "epoch": 1.694267515923567, "grad_norm": 0.32057467103004456, "learning_rate": 1.3902039367419262e-06, "loss": 0.0256, "step": 110390 }, { "epoch": 1.6944209960862557, "grad_norm": 0.3214670419692993, "learning_rate": 1.3888417357113893e-06, "loss": 0.028, "step": 110400 }, { "epoch": 1.694574476248945, "grad_norm": 0.29487180709838867, "learning_rate": 1.3874801525845638e-06, "loss": 0.0299, "step": 110410 }, { "epoch": 1.6947279564116338, "grad_norm": 0.3804648220539093, "learning_rate": 1.3861191874591518e-06, "loss": 0.0247, "step": 110420 }, { "epoch": 1.6948814365743228, "grad_norm": 0.3942685127258301, "learning_rate": 1.384758840432815e-06, "loss": 0.0275, "step": 110430 }, { "epoch": 1.6950349167370118, "grad_norm": 0.2400916963815689, "learning_rate": 1.3833991116031665e-06, "loss": 0.0276, "step": 110440 }, { "epoch": 1.6951883968997006, "grad_norm": 0.3941073715686798, "learning_rate": 1.3820400010677726e-06, "loss": 0.0269, "step": 110450 }, { "epoch": 1.6953418770623898, "grad_norm": 0.233547180891037, "learning_rate": 1.3806815089241587e-06, "loss": 0.019, "step": 110460 }, { "epoch": 1.6954953572250786, "grad_norm": 0.47711753845214844, "learning_rate": 1.3793236352698058e-06, "loss": 0.0399, "step": 110470 }, { "epoch": 1.6956488373877676, "grad_norm": 0.4465146064758301, "learning_rate": 1.3779663802021503e-06, "loss": 0.0265, "step": 110480 }, { "epoch": 1.6958023175504566, "grad_norm": 0.37253838777542114, "learning_rate": 1.3766097438185787e-06, "loss": 0.0271, "step": 110490 }, { "epoch": 1.6959557977131454, "grad_norm": 0.25215038657188416, "learning_rate": 1.3752537262164466e-06, "loss": 0.0181, "step": 110500 }, { "epoch": 1.6961092778758347, "grad_norm": 0.43189749121665955, "learning_rate": 1.3738983274930484e-06, "loss": 0.0235, "step": 110510 }, { "epoch": 1.6962627580385234, "grad_norm": 0.5347896218299866, "learning_rate": 1.3725435477456472e-06, "loss": 0.0241, "step": 110520 }, { "epoch": 1.6964162382012125, "grad_norm": 0.3210979700088501, "learning_rate": 1.3711893870714555e-06, "loss": 0.0271, "step": 110530 }, { "epoch": 1.6965697183639015, "grad_norm": 0.24454756081104279, "learning_rate": 1.369835845567642e-06, "loss": 0.0183, "step": 110540 }, { "epoch": 1.6967231985265905, "grad_norm": 0.3782392740249634, "learning_rate": 1.3684829233313336e-06, "loss": 0.0216, "step": 110550 }, { "epoch": 1.6968766786892795, "grad_norm": 0.3886632025241852, "learning_rate": 1.367130620459608e-06, "loss": 0.0321, "step": 110560 }, { "epoch": 1.6970301588519683, "grad_norm": 0.336393803358078, "learning_rate": 1.3657789370495034e-06, "loss": 0.0226, "step": 110570 }, { "epoch": 1.6971836390146575, "grad_norm": 0.46994075179100037, "learning_rate": 1.3644278731980088e-06, "loss": 0.0204, "step": 110580 }, { "epoch": 1.6973371191773463, "grad_norm": 0.32199445366859436, "learning_rate": 1.3630774290020753e-06, "loss": 0.0197, "step": 110590 }, { "epoch": 1.6974905993400353, "grad_norm": 0.3373853862285614, "learning_rate": 1.361727604558606e-06, "loss": 0.0224, "step": 110600 }, { "epoch": 1.6976440795027243, "grad_norm": 0.34726303815841675, "learning_rate": 1.3603783999644514e-06, "loss": 0.0264, "step": 110610 }, { "epoch": 1.6977975596654131, "grad_norm": 0.37121570110321045, "learning_rate": 1.3590298153164338e-06, "loss": 0.0256, "step": 110620 }, { "epoch": 1.6979510398281024, "grad_norm": 0.287265419960022, "learning_rate": 1.3576818507113187e-06, "loss": 0.0193, "step": 110630 }, { "epoch": 1.6981045199907912, "grad_norm": 0.25645914673805237, "learning_rate": 1.356334506245831e-06, "loss": 0.02, "step": 110640 }, { "epoch": 1.6982580001534802, "grad_norm": 0.39697206020355225, "learning_rate": 1.3549877820166523e-06, "loss": 0.0228, "step": 110650 }, { "epoch": 1.6984114803161692, "grad_norm": 0.42132633924484253, "learning_rate": 1.3536416781204166e-06, "loss": 0.0223, "step": 110660 }, { "epoch": 1.698564960478858, "grad_norm": 0.25053122639656067, "learning_rate": 1.3522961946537173e-06, "loss": 0.0254, "step": 110670 }, { "epoch": 1.6987184406415472, "grad_norm": 0.40513890981674194, "learning_rate": 1.3509513317130963e-06, "loss": 0.0219, "step": 110680 }, { "epoch": 1.698871920804236, "grad_norm": 0.2589646875858307, "learning_rate": 1.3496070893950618e-06, "loss": 0.0237, "step": 110690 }, { "epoch": 1.699025400966925, "grad_norm": 0.29414668679237366, "learning_rate": 1.3482634677960714e-06, "loss": 0.0259, "step": 110700 }, { "epoch": 1.699178881129614, "grad_norm": 0.3701910674571991, "learning_rate": 1.3469204670125325e-06, "loss": 0.0186, "step": 110710 }, { "epoch": 1.6993323612923028, "grad_norm": 0.46472883224487305, "learning_rate": 1.3455780871408174e-06, "loss": 0.0252, "step": 110720 }, { "epoch": 1.699485841454992, "grad_norm": 0.3879184126853943, "learning_rate": 1.344236328277252e-06, "loss": 0.0222, "step": 110730 }, { "epoch": 1.6996393216176808, "grad_norm": 0.39968228340148926, "learning_rate": 1.342895190518112e-06, "loss": 0.0296, "step": 110740 }, { "epoch": 1.6997928017803698, "grad_norm": 0.3764503002166748, "learning_rate": 1.3415546739596341e-06, "loss": 0.0337, "step": 110750 }, { "epoch": 1.6999462819430589, "grad_norm": 0.3266313374042511, "learning_rate": 1.340214778698008e-06, "loss": 0.0237, "step": 110760 }, { "epoch": 1.7000997621057479, "grad_norm": 0.2731549143791199, "learning_rate": 1.338875504829379e-06, "loss": 0.031, "step": 110770 }, { "epoch": 1.7002532422684369, "grad_norm": 0.4399143159389496, "learning_rate": 1.33753685244985e-06, "loss": 0.0298, "step": 110780 }, { "epoch": 1.7004067224311257, "grad_norm": 0.38728657364845276, "learning_rate": 1.3361988216554778e-06, "loss": 0.0272, "step": 110790 }, { "epoch": 1.700560202593815, "grad_norm": 0.31076109409332275, "learning_rate": 1.334861412542272e-06, "loss": 0.023, "step": 110800 }, { "epoch": 1.7007136827565037, "grad_norm": 0.45573312044143677, "learning_rate": 1.3335246252062018e-06, "loss": 0.029, "step": 110810 }, { "epoch": 1.7008671629191927, "grad_norm": 0.3246832489967346, "learning_rate": 1.33218845974319e-06, "loss": 0.029, "step": 110820 }, { "epoch": 1.7010206430818817, "grad_norm": 0.3026238679885864, "learning_rate": 1.3308529162491136e-06, "loss": 0.0241, "step": 110830 }, { "epoch": 1.7011741232445705, "grad_norm": 0.5219113230705261, "learning_rate": 1.3295179948198034e-06, "loss": 0.0321, "step": 110840 }, { "epoch": 1.7013276034072597, "grad_norm": 0.4183046817779541, "learning_rate": 1.3281836955510562e-06, "loss": 0.0203, "step": 110850 }, { "epoch": 1.7014810835699485, "grad_norm": 0.23067571222782135, "learning_rate": 1.3268500185386102e-06, "loss": 0.0222, "step": 110860 }, { "epoch": 1.7016345637326376, "grad_norm": 0.3285585343837738, "learning_rate": 1.325516963878163e-06, "loss": 0.0178, "step": 110870 }, { "epoch": 1.7017880438953266, "grad_norm": 0.43693917989730835, "learning_rate": 1.3241845316653745e-06, "loss": 0.0174, "step": 110880 }, { "epoch": 1.7019415240580154, "grad_norm": 0.39459213614463806, "learning_rate": 1.3228527219958543e-06, "loss": 0.0286, "step": 110890 }, { "epoch": 1.7020950042207046, "grad_norm": 0.2881200313568115, "learning_rate": 1.3215215349651656e-06, "loss": 0.02, "step": 110900 }, { "epoch": 1.7022484843833934, "grad_norm": 0.4674323499202728, "learning_rate": 1.320190970668831e-06, "loss": 0.0255, "step": 110910 }, { "epoch": 1.7024019645460824, "grad_norm": 0.44078534841537476, "learning_rate": 1.3188610292023263e-06, "loss": 0.0214, "step": 110920 }, { "epoch": 1.7025554447087714, "grad_norm": 0.4359641373157501, "learning_rate": 1.3175317106610818e-06, "loss": 0.0297, "step": 110930 }, { "epoch": 1.7027089248714604, "grad_norm": 0.3898446261882782, "learning_rate": 1.3162030151404836e-06, "loss": 0.0296, "step": 110940 }, { "epoch": 1.7028624050341494, "grad_norm": 0.4363858699798584, "learning_rate": 1.3148749427358798e-06, "loss": 0.0312, "step": 110950 }, { "epoch": 1.7030158851968382, "grad_norm": 0.35440751910209656, "learning_rate": 1.3135474935425619e-06, "loss": 0.029, "step": 110960 }, { "epoch": 1.7031693653595272, "grad_norm": 0.36225593090057373, "learning_rate": 1.3122206676557814e-06, "loss": 0.021, "step": 110970 }, { "epoch": 1.7033228455222162, "grad_norm": 0.29790472984313965, "learning_rate": 1.3108944651707512e-06, "loss": 0.0165, "step": 110980 }, { "epoch": 1.7034763256849053, "grad_norm": 0.30761265754699707, "learning_rate": 1.3095688861826317e-06, "loss": 0.0226, "step": 110990 }, { "epoch": 1.7036298058475943, "grad_norm": 0.3621354103088379, "learning_rate": 1.3082439307865424e-06, "loss": 0.0237, "step": 111000 }, { "epoch": 1.703783286010283, "grad_norm": 0.3907930254936218, "learning_rate": 1.3069195990775575e-06, "loss": 0.0243, "step": 111010 }, { "epoch": 1.7039367661729723, "grad_norm": 0.2878836691379547, "learning_rate": 1.3055958911507037e-06, "loss": 0.0224, "step": 111020 }, { "epoch": 1.704090246335661, "grad_norm": 0.4960981011390686, "learning_rate": 1.3042728071009647e-06, "loss": 0.0293, "step": 111030 }, { "epoch": 1.70424372649835, "grad_norm": 0.4955556392669678, "learning_rate": 1.3029503470232841e-06, "loss": 0.0287, "step": 111040 }, { "epoch": 1.704397206661039, "grad_norm": 0.39461395144462585, "learning_rate": 1.301628511012557e-06, "loss": 0.0284, "step": 111050 }, { "epoch": 1.704550686823728, "grad_norm": 0.3631274402141571, "learning_rate": 1.3003072991636256e-06, "loss": 0.0256, "step": 111060 }, { "epoch": 1.7047041669864171, "grad_norm": 0.2829838693141937, "learning_rate": 1.2989867115713016e-06, "loss": 0.021, "step": 111070 }, { "epoch": 1.704857647149106, "grad_norm": 0.5065677165985107, "learning_rate": 1.2976667483303451e-06, "loss": 0.0267, "step": 111080 }, { "epoch": 1.705011127311795, "grad_norm": 0.43239399790763855, "learning_rate": 1.296347409535469e-06, "loss": 0.0232, "step": 111090 }, { "epoch": 1.705164607474484, "grad_norm": 0.39245399832725525, "learning_rate": 1.295028695281345e-06, "loss": 0.0265, "step": 111100 }, { "epoch": 1.7053180876371727, "grad_norm": 0.27685490250587463, "learning_rate": 1.2937106056625992e-06, "loss": 0.0229, "step": 111110 }, { "epoch": 1.705471567799862, "grad_norm": 0.34402555227279663, "learning_rate": 1.2923931407738133e-06, "loss": 0.0245, "step": 111120 }, { "epoch": 1.7056250479625508, "grad_norm": 0.5339286923408508, "learning_rate": 1.2910763007095206e-06, "loss": 0.0296, "step": 111130 }, { "epoch": 1.7057785281252398, "grad_norm": 0.3390246033668518, "learning_rate": 1.2897600855642168e-06, "loss": 0.0268, "step": 111140 }, { "epoch": 1.7059320082879288, "grad_norm": 0.38464874029159546, "learning_rate": 1.2884444954323482e-06, "loss": 0.026, "step": 111150 }, { "epoch": 1.7060854884506178, "grad_norm": 0.3443169593811035, "learning_rate": 1.2871295304083098e-06, "loss": 0.0245, "step": 111160 }, { "epoch": 1.7062389686133068, "grad_norm": 0.4159214496612549, "learning_rate": 1.2858151905864668e-06, "loss": 0.0257, "step": 111170 }, { "epoch": 1.7063924487759956, "grad_norm": 0.45912468433380127, "learning_rate": 1.2845014760611263e-06, "loss": 0.0354, "step": 111180 }, { "epoch": 1.7065459289386846, "grad_norm": 0.17846497893333435, "learning_rate": 1.2831883869265582e-06, "loss": 0.0249, "step": 111190 }, { "epoch": 1.7066994091013736, "grad_norm": 0.3050483465194702, "learning_rate": 1.281875923276983e-06, "loss": 0.0269, "step": 111200 }, { "epoch": 1.7068528892640626, "grad_norm": 0.4357322156429291, "learning_rate": 1.280564085206577e-06, "loss": 0.0246, "step": 111210 }, { "epoch": 1.7070063694267517, "grad_norm": 0.32211846113204956, "learning_rate": 1.2792528728094756e-06, "loss": 0.0278, "step": 111220 }, { "epoch": 1.7071598495894404, "grad_norm": 0.31157761812210083, "learning_rate": 1.277942286179763e-06, "loss": 0.0279, "step": 111230 }, { "epoch": 1.7073133297521297, "grad_norm": 0.30879050493240356, "learning_rate": 1.2766323254114842e-06, "loss": 0.0276, "step": 111240 }, { "epoch": 1.7074668099148185, "grad_norm": 0.40638336539268494, "learning_rate": 1.2753229905986408e-06, "loss": 0.0268, "step": 111250 }, { "epoch": 1.7076202900775075, "grad_norm": 0.4670267105102539, "learning_rate": 1.2740142818351754e-06, "loss": 0.0283, "step": 111260 }, { "epoch": 1.7077737702401965, "grad_norm": 0.43423885107040405, "learning_rate": 1.2727061992150048e-06, "loss": 0.0251, "step": 111270 }, { "epoch": 1.7079272504028853, "grad_norm": 0.1868283897638321, "learning_rate": 1.2713987428319885e-06, "loss": 0.0155, "step": 111280 }, { "epoch": 1.7080807305655745, "grad_norm": 0.345978707075119, "learning_rate": 1.2700919127799437e-06, "loss": 0.0209, "step": 111290 }, { "epoch": 1.7082342107282633, "grad_norm": 0.2458522617816925, "learning_rate": 1.268785709152649e-06, "loss": 0.024, "step": 111300 }, { "epoch": 1.7083876908909523, "grad_norm": 0.4526260793209076, "learning_rate": 1.2674801320438268e-06, "loss": 0.0286, "step": 111310 }, { "epoch": 1.7085411710536413, "grad_norm": 0.2820890545845032, "learning_rate": 1.266175181547159e-06, "loss": 0.021, "step": 111320 }, { "epoch": 1.7086946512163301, "grad_norm": 0.3756527006626129, "learning_rate": 1.2648708577562897e-06, "loss": 0.0225, "step": 111330 }, { "epoch": 1.7088481313790194, "grad_norm": 0.3063057065010071, "learning_rate": 1.2635671607648093e-06, "loss": 0.0287, "step": 111340 }, { "epoch": 1.7090016115417082, "grad_norm": 0.4037475287914276, "learning_rate": 1.2622640906662665e-06, "loss": 0.0266, "step": 111350 }, { "epoch": 1.7091550917043972, "grad_norm": 0.3310839533805847, "learning_rate": 1.260961647554163e-06, "loss": 0.0283, "step": 111360 }, { "epoch": 1.7093085718670862, "grad_norm": 0.3685798645019531, "learning_rate": 1.25965983152196e-06, "loss": 0.03, "step": 111370 }, { "epoch": 1.7094620520297752, "grad_norm": 0.35299110412597656, "learning_rate": 1.2583586426630678e-06, "loss": 0.027, "step": 111380 }, { "epoch": 1.7096155321924642, "grad_norm": 0.3244454264640808, "learning_rate": 1.2570580810708544e-06, "loss": 0.0241, "step": 111390 }, { "epoch": 1.709769012355153, "grad_norm": 0.31779569387435913, "learning_rate": 1.2557581468386493e-06, "loss": 0.0256, "step": 111400 }, { "epoch": 1.7099224925178422, "grad_norm": 0.436204731464386, "learning_rate": 1.2544588400597247e-06, "loss": 0.0232, "step": 111410 }, { "epoch": 1.710075972680531, "grad_norm": 0.30088067054748535, "learning_rate": 1.2531601608273136e-06, "loss": 0.0266, "step": 111420 }, { "epoch": 1.71022945284322, "grad_norm": 0.3753618896007538, "learning_rate": 1.2518621092346073e-06, "loss": 0.0268, "step": 111430 }, { "epoch": 1.710382933005909, "grad_norm": 0.22780536115169525, "learning_rate": 1.2505646853747488e-06, "loss": 0.0227, "step": 111440 }, { "epoch": 1.7105364131685978, "grad_norm": 0.37033435702323914, "learning_rate": 1.249267889340835e-06, "loss": 0.0204, "step": 111450 }, { "epoch": 1.710689893331287, "grad_norm": 0.41529741883277893, "learning_rate": 1.2479717212259191e-06, "loss": 0.0234, "step": 111460 }, { "epoch": 1.7108433734939759, "grad_norm": 0.27955469489097595, "learning_rate": 1.2466761811230099e-06, "loss": 0.0247, "step": 111470 }, { "epoch": 1.7109968536566649, "grad_norm": 0.4505535662174225, "learning_rate": 1.2453812691250688e-06, "loss": 0.0221, "step": 111480 }, { "epoch": 1.7111503338193539, "grad_norm": 0.32987016439437866, "learning_rate": 1.2440869853250137e-06, "loss": 0.0219, "step": 111490 }, { "epoch": 1.7113038139820427, "grad_norm": 0.39462754130363464, "learning_rate": 1.2427933298157235e-06, "loss": 0.0289, "step": 111500 }, { "epoch": 1.711457294144732, "grad_norm": 0.4408751428127289, "learning_rate": 1.2415003026900175e-06, "loss": 0.0316, "step": 111510 }, { "epoch": 1.7116107743074207, "grad_norm": 0.3897675573825836, "learning_rate": 1.2402079040406801e-06, "loss": 0.0215, "step": 111520 }, { "epoch": 1.7117642544701097, "grad_norm": 0.2721061110496521, "learning_rate": 1.238916133960454e-06, "loss": 0.0311, "step": 111530 }, { "epoch": 1.7119177346327987, "grad_norm": 0.3972831964492798, "learning_rate": 1.237624992542027e-06, "loss": 0.026, "step": 111540 }, { "epoch": 1.7120712147954875, "grad_norm": 0.3472435772418976, "learning_rate": 1.2363344798780485e-06, "loss": 0.0212, "step": 111550 }, { "epoch": 1.7122246949581768, "grad_norm": 0.4445433020591736, "learning_rate": 1.2350445960611202e-06, "loss": 0.0242, "step": 111560 }, { "epoch": 1.7123781751208655, "grad_norm": 0.3347983658313751, "learning_rate": 1.2337553411837988e-06, "loss": 0.0253, "step": 111570 }, { "epoch": 1.7125316552835546, "grad_norm": 0.34541836380958557, "learning_rate": 1.232466715338594e-06, "loss": 0.0249, "step": 111580 }, { "epoch": 1.7126851354462436, "grad_norm": 0.303280234336853, "learning_rate": 1.231178718617978e-06, "loss": 0.0193, "step": 111590 }, { "epoch": 1.7128386156089326, "grad_norm": 0.3690550625324249, "learning_rate": 1.2298913511143718e-06, "loss": 0.0196, "step": 111600 }, { "epoch": 1.7129920957716216, "grad_norm": 0.3103812634944916, "learning_rate": 1.2286046129201456e-06, "loss": 0.0289, "step": 111610 }, { "epoch": 1.7131455759343104, "grad_norm": 0.32611286640167236, "learning_rate": 1.227318504127637e-06, "loss": 0.0262, "step": 111620 }, { "epoch": 1.7132990560969996, "grad_norm": 0.40343257784843445, "learning_rate": 1.2260330248291308e-06, "loss": 0.0174, "step": 111630 }, { "epoch": 1.7134525362596884, "grad_norm": 0.36103391647338867, "learning_rate": 1.2247481751168667e-06, "loss": 0.0246, "step": 111640 }, { "epoch": 1.7136060164223774, "grad_norm": 0.4901244044303894, "learning_rate": 1.2234639550830419e-06, "loss": 0.0298, "step": 111650 }, { "epoch": 1.7137594965850664, "grad_norm": 0.5827303528785706, "learning_rate": 1.2221803648198072e-06, "loss": 0.0252, "step": 111660 }, { "epoch": 1.7139129767477552, "grad_norm": 0.4806049168109894, "learning_rate": 1.220897404419268e-06, "loss": 0.0233, "step": 111670 }, { "epoch": 1.7140664569104445, "grad_norm": 0.3601720631122589, "learning_rate": 1.2196150739734813e-06, "loss": 0.0191, "step": 111680 }, { "epoch": 1.7142199370731332, "grad_norm": 0.3523508310317993, "learning_rate": 1.2183333735744674e-06, "loss": 0.0276, "step": 111690 }, { "epoch": 1.7143734172358223, "grad_norm": 0.39759892225265503, "learning_rate": 1.2170523033141957e-06, "loss": 0.0266, "step": 111700 }, { "epoch": 1.7145268973985113, "grad_norm": 0.4940986931324005, "learning_rate": 1.215771863284585e-06, "loss": 0.0287, "step": 111710 }, { "epoch": 1.7146803775612, "grad_norm": 0.38418957591056824, "learning_rate": 1.2144920535775217e-06, "loss": 0.0238, "step": 111720 }, { "epoch": 1.7148338577238893, "grad_norm": 0.26434701681137085, "learning_rate": 1.213212874284837e-06, "loss": 0.0247, "step": 111730 }, { "epoch": 1.714987337886578, "grad_norm": 0.43769630789756775, "learning_rate": 1.2119343254983195e-06, "loss": 0.0218, "step": 111740 }, { "epoch": 1.715140818049267, "grad_norm": 0.35325461626052856, "learning_rate": 1.210656407309715e-06, "loss": 0.0238, "step": 111750 }, { "epoch": 1.7152942982119561, "grad_norm": 0.42926129698753357, "learning_rate": 1.2093791198107196e-06, "loss": 0.0237, "step": 111760 }, { "epoch": 1.7154477783746451, "grad_norm": 0.31299105286598206, "learning_rate": 1.2081024630929871e-06, "loss": 0.0209, "step": 111770 }, { "epoch": 1.7156012585373341, "grad_norm": 0.40088337659835815, "learning_rate": 1.2068264372481253e-06, "loss": 0.023, "step": 111780 }, { "epoch": 1.715754738700023, "grad_norm": 0.3310086131095886, "learning_rate": 1.2055510423676985e-06, "loss": 0.0232, "step": 111790 }, { "epoch": 1.715908218862712, "grad_norm": 0.3906058073043823, "learning_rate": 1.2042762785432261e-06, "loss": 0.0238, "step": 111800 }, { "epoch": 1.716061699025401, "grad_norm": 0.30563777685165405, "learning_rate": 1.2030021458661734e-06, "loss": 0.0254, "step": 111810 }, { "epoch": 1.71621517918809, "grad_norm": 0.33749154210090637, "learning_rate": 1.201728644427973e-06, "loss": 0.0177, "step": 111820 }, { "epoch": 1.716368659350779, "grad_norm": 0.2875853478908539, "learning_rate": 1.200455774320005e-06, "loss": 0.0229, "step": 111830 }, { "epoch": 1.7165221395134678, "grad_norm": 0.3999749422073364, "learning_rate": 1.1991835356336035e-06, "loss": 0.0234, "step": 111840 }, { "epoch": 1.716675619676157, "grad_norm": 0.4040774405002594, "learning_rate": 1.197911928460065e-06, "loss": 0.0279, "step": 111850 }, { "epoch": 1.7168290998388458, "grad_norm": 0.38429439067840576, "learning_rate": 1.1966409528906297e-06, "loss": 0.0246, "step": 111860 }, { "epoch": 1.7169825800015348, "grad_norm": 0.3747395873069763, "learning_rate": 1.1953706090164985e-06, "loss": 0.0303, "step": 111870 }, { "epoch": 1.7171360601642238, "grad_norm": 0.3289603888988495, "learning_rate": 1.1941008969288292e-06, "loss": 0.0261, "step": 111880 }, { "epoch": 1.7172895403269126, "grad_norm": 0.33604225516319275, "learning_rate": 1.192831816718729e-06, "loss": 0.029, "step": 111890 }, { "epoch": 1.7174430204896018, "grad_norm": 0.35842329263687134, "learning_rate": 1.1915633684772654e-06, "loss": 0.0216, "step": 111900 }, { "epoch": 1.7175965006522906, "grad_norm": 0.3331829309463501, "learning_rate": 1.190295552295454e-06, "loss": 0.0227, "step": 111910 }, { "epoch": 1.7177499808149796, "grad_norm": 0.2752619981765747, "learning_rate": 1.18902836826427e-06, "loss": 0.0245, "step": 111920 }, { "epoch": 1.7179034609776687, "grad_norm": 0.26642048358917236, "learning_rate": 1.1877618164746407e-06, "loss": 0.0191, "step": 111930 }, { "epoch": 1.7180569411403575, "grad_norm": 0.40676581859588623, "learning_rate": 1.1864958970174479e-06, "loss": 0.0182, "step": 111940 }, { "epoch": 1.7182104213030467, "grad_norm": 0.29800868034362793, "learning_rate": 1.1852306099835353e-06, "loss": 0.0249, "step": 111950 }, { "epoch": 1.7183639014657355, "grad_norm": 0.3219582438468933, "learning_rate": 1.1839659554636884e-06, "loss": 0.0261, "step": 111960 }, { "epoch": 1.7185173816284245, "grad_norm": 0.2729221284389496, "learning_rate": 1.1827019335486534e-06, "loss": 0.0212, "step": 111970 }, { "epoch": 1.7186708617911135, "grad_norm": 0.21732933819293976, "learning_rate": 1.1814385443291376e-06, "loss": 0.0188, "step": 111980 }, { "epoch": 1.7188243419538025, "grad_norm": 0.3305705487728119, "learning_rate": 1.1801757878957931e-06, "loss": 0.0244, "step": 111990 }, { "epoch": 1.7189778221164915, "grad_norm": 0.2646484375, "learning_rate": 1.178913664339232e-06, "loss": 0.024, "step": 112000 }, { "epoch": 1.7191313022791803, "grad_norm": 0.3548619747161865, "learning_rate": 1.177652173750018e-06, "loss": 0.026, "step": 112010 }, { "epoch": 1.7192847824418696, "grad_norm": 0.25001075863838196, "learning_rate": 1.1763913162186712e-06, "loss": 0.0301, "step": 112020 }, { "epoch": 1.7194382626045583, "grad_norm": 0.416501522064209, "learning_rate": 1.1751310918356662e-06, "loss": 0.0234, "step": 112030 }, { "epoch": 1.7195917427672474, "grad_norm": 0.298982709646225, "learning_rate": 1.1738715006914303e-06, "loss": 0.0214, "step": 112040 }, { "epoch": 1.7197452229299364, "grad_norm": 0.46938300132751465, "learning_rate": 1.1726125428763523e-06, "loss": 0.0273, "step": 112050 }, { "epoch": 1.7198987030926252, "grad_norm": 0.3671005964279175, "learning_rate": 1.1713542184807647e-06, "loss": 0.0308, "step": 112060 }, { "epoch": 1.7200521832553144, "grad_norm": 0.37320584058761597, "learning_rate": 1.1700965275949594e-06, "loss": 0.0274, "step": 112070 }, { "epoch": 1.7202056634180032, "grad_norm": 0.4317037761211395, "learning_rate": 1.1688394703091877e-06, "loss": 0.0302, "step": 112080 }, { "epoch": 1.7203591435806922, "grad_norm": 0.3780519664287567, "learning_rate": 1.1675830467136485e-06, "loss": 0.0231, "step": 112090 }, { "epoch": 1.7205126237433812, "grad_norm": 0.31720736622810364, "learning_rate": 1.1663272568984997e-06, "loss": 0.0216, "step": 112100 }, { "epoch": 1.72066610390607, "grad_norm": 0.27252572774887085, "learning_rate": 1.1650721009538502e-06, "loss": 0.0262, "step": 112110 }, { "epoch": 1.7208195840687592, "grad_norm": 0.42535755038261414, "learning_rate": 1.163817578969767e-06, "loss": 0.0258, "step": 112120 }, { "epoch": 1.720973064231448, "grad_norm": 0.31934839487075806, "learning_rate": 1.162563691036266e-06, "loss": 0.0178, "step": 112130 }, { "epoch": 1.721126544394137, "grad_norm": 0.44388073682785034, "learning_rate": 1.161310437243326e-06, "loss": 0.0209, "step": 112140 }, { "epoch": 1.721280024556826, "grad_norm": 0.32662978768348694, "learning_rate": 1.1600578176808763e-06, "loss": 0.0243, "step": 112150 }, { "epoch": 1.7214335047195148, "grad_norm": 0.3115454614162445, "learning_rate": 1.158805832438794e-06, "loss": 0.0256, "step": 112160 }, { "epoch": 1.721586984882204, "grad_norm": 0.40421977639198303, "learning_rate": 1.1575544816069216e-06, "loss": 0.0271, "step": 112170 }, { "epoch": 1.7217404650448929, "grad_norm": 0.434574156999588, "learning_rate": 1.156303765275051e-06, "loss": 0.0224, "step": 112180 }, { "epoch": 1.7218939452075819, "grad_norm": 0.3444693386554718, "learning_rate": 1.1550536835329262e-06, "loss": 0.0202, "step": 112190 }, { "epoch": 1.722047425370271, "grad_norm": 0.4783186614513397, "learning_rate": 1.1538042364702507e-06, "loss": 0.0225, "step": 112200 }, { "epoch": 1.72220090553296, "grad_norm": 0.39052173495292664, "learning_rate": 1.1525554241766801e-06, "loss": 0.0218, "step": 112210 }, { "epoch": 1.722354385695649, "grad_norm": 0.27537521719932556, "learning_rate": 1.151307246741823e-06, "loss": 0.0234, "step": 112220 }, { "epoch": 1.7225078658583377, "grad_norm": 0.37788236141204834, "learning_rate": 1.1500597042552419e-06, "loss": 0.0264, "step": 112230 }, { "epoch": 1.722661346021027, "grad_norm": 0.30311524868011475, "learning_rate": 1.14881279680646e-06, "loss": 0.0282, "step": 112240 }, { "epoch": 1.7228148261837157, "grad_norm": 0.5233538746833801, "learning_rate": 1.147566524484951e-06, "loss": 0.0227, "step": 112250 }, { "epoch": 1.7229683063464047, "grad_norm": 0.25149744749069214, "learning_rate": 1.1463208873801357e-06, "loss": 0.0251, "step": 112260 }, { "epoch": 1.7231217865090938, "grad_norm": 0.45672670006752014, "learning_rate": 1.1450758855814038e-06, "loss": 0.032, "step": 112270 }, { "epoch": 1.7232752666717825, "grad_norm": 0.38490867614746094, "learning_rate": 1.1438315191780892e-06, "loss": 0.0314, "step": 112280 }, { "epoch": 1.7234287468344718, "grad_norm": 0.30756276845932007, "learning_rate": 1.1425877882594816e-06, "loss": 0.0265, "step": 112290 }, { "epoch": 1.7235822269971606, "grad_norm": 0.36097946763038635, "learning_rate": 1.1413446929148275e-06, "loss": 0.036, "step": 112300 }, { "epoch": 1.7237357071598496, "grad_norm": 0.5938231348991394, "learning_rate": 1.1401022332333278e-06, "loss": 0.0266, "step": 112310 }, { "epoch": 1.7238891873225386, "grad_norm": 0.30242905020713806, "learning_rate": 1.1388604093041345e-06, "loss": 0.0291, "step": 112320 }, { "epoch": 1.7240426674852274, "grad_norm": 0.44642817974090576, "learning_rate": 1.1376192212163539e-06, "loss": 0.0239, "step": 112330 }, { "epoch": 1.7241961476479166, "grad_norm": 0.24421992897987366, "learning_rate": 1.136378669059055e-06, "loss": 0.0235, "step": 112340 }, { "epoch": 1.7243496278106054, "grad_norm": 0.5034466981887817, "learning_rate": 1.1351387529212543e-06, "loss": 0.0247, "step": 112350 }, { "epoch": 1.7245031079732944, "grad_norm": 0.21691733598709106, "learning_rate": 1.1338994728919173e-06, "loss": 0.0199, "step": 112360 }, { "epoch": 1.7246565881359834, "grad_norm": 0.42845138907432556, "learning_rate": 1.132660829059975e-06, "loss": 0.0279, "step": 112370 }, { "epoch": 1.7248100682986724, "grad_norm": 0.3048499524593353, "learning_rate": 1.1314228215143076e-06, "loss": 0.0205, "step": 112380 }, { "epoch": 1.7249635484613615, "grad_norm": 0.26487430930137634, "learning_rate": 1.130185450343746e-06, "loss": 0.0314, "step": 112390 }, { "epoch": 1.7251170286240503, "grad_norm": 0.4534793198108673, "learning_rate": 1.128948715637087e-06, "loss": 0.0306, "step": 112400 }, { "epoch": 1.7252705087867393, "grad_norm": 0.47111135721206665, "learning_rate": 1.1277126174830666e-06, "loss": 0.0294, "step": 112410 }, { "epoch": 1.7254239889494283, "grad_norm": 0.27773401141166687, "learning_rate": 1.1264771559703825e-06, "loss": 0.0206, "step": 112420 }, { "epoch": 1.7255774691121173, "grad_norm": 0.37590163946151733, "learning_rate": 1.1252423311876904e-06, "loss": 0.0235, "step": 112430 }, { "epoch": 1.7257309492748063, "grad_norm": 0.3501344323158264, "learning_rate": 1.1240081432235961e-06, "loss": 0.0232, "step": 112440 }, { "epoch": 1.725884429437495, "grad_norm": 0.4193558692932129, "learning_rate": 1.1227745921666588e-06, "loss": 0.0216, "step": 112450 }, { "epoch": 1.7260379096001843, "grad_norm": 0.5274909138679504, "learning_rate": 1.1215416781053933e-06, "loss": 0.0321, "step": 112460 }, { "epoch": 1.7261913897628731, "grad_norm": 0.4578648507595062, "learning_rate": 1.120309401128271e-06, "loss": 0.0355, "step": 112470 }, { "epoch": 1.7263448699255621, "grad_norm": 0.2357519567012787, "learning_rate": 1.1190777613237124e-06, "loss": 0.0252, "step": 112480 }, { "epoch": 1.7264983500882511, "grad_norm": 0.4877197742462158, "learning_rate": 1.1178467587800957e-06, "loss": 0.0215, "step": 112490 }, { "epoch": 1.72665183025094, "grad_norm": 0.3234021067619324, "learning_rate": 1.1166163935857566e-06, "loss": 0.0245, "step": 112500 }, { "epoch": 1.7268053104136292, "grad_norm": 0.5109652876853943, "learning_rate": 1.115386665828977e-06, "loss": 0.0271, "step": 112510 }, { "epoch": 1.726958790576318, "grad_norm": 0.34054261445999146, "learning_rate": 1.1141575755979972e-06, "loss": 0.0179, "step": 112520 }, { "epoch": 1.727112270739007, "grad_norm": 0.35131046175956726, "learning_rate": 1.1129291229810157e-06, "loss": 0.0228, "step": 112530 }, { "epoch": 1.727265750901696, "grad_norm": 0.24803060293197632, "learning_rate": 1.111701308066181e-06, "loss": 0.0259, "step": 112540 }, { "epoch": 1.7274192310643848, "grad_norm": 0.45942118763923645, "learning_rate": 1.1104741309415933e-06, "loss": 0.0279, "step": 112550 }, { "epoch": 1.727572711227074, "grad_norm": 0.4030701518058777, "learning_rate": 1.1092475916953127e-06, "loss": 0.024, "step": 112560 }, { "epoch": 1.7277261913897628, "grad_norm": 0.39848777651786804, "learning_rate": 1.1080216904153506e-06, "loss": 0.027, "step": 112570 }, { "epoch": 1.7278796715524518, "grad_norm": 0.41193363070487976, "learning_rate": 1.1067964271896714e-06, "loss": 0.0288, "step": 112580 }, { "epoch": 1.7280331517151408, "grad_norm": 0.3622035086154938, "learning_rate": 1.1055718021061955e-06, "loss": 0.0258, "step": 112590 }, { "epoch": 1.7281866318778298, "grad_norm": 0.3597266376018524, "learning_rate": 1.104347815252802e-06, "loss": 0.0264, "step": 112600 }, { "epoch": 1.7283401120405189, "grad_norm": 0.2232247292995453, "learning_rate": 1.1031244667173136e-06, "loss": 0.024, "step": 112610 }, { "epoch": 1.7284935922032076, "grad_norm": 0.4982910454273224, "learning_rate": 1.1019017565875135e-06, "loss": 0.0298, "step": 112620 }, { "epoch": 1.7286470723658967, "grad_norm": 0.3414183557033539, "learning_rate": 1.1006796849511425e-06, "loss": 0.0239, "step": 112630 }, { "epoch": 1.7288005525285857, "grad_norm": 0.265122652053833, "learning_rate": 1.0994582518958897e-06, "loss": 0.0232, "step": 112640 }, { "epoch": 1.7289540326912747, "grad_norm": 0.3167279362678528, "learning_rate": 1.0982374575094e-06, "loss": 0.0256, "step": 112650 }, { "epoch": 1.7291075128539637, "grad_norm": 0.4022950530052185, "learning_rate": 1.097017301879274e-06, "loss": 0.0251, "step": 112660 }, { "epoch": 1.7292609930166525, "grad_norm": 0.39048704504966736, "learning_rate": 1.0957977850930646e-06, "loss": 0.0288, "step": 112670 }, { "epoch": 1.7294144731793417, "grad_norm": 0.34405753016471863, "learning_rate": 1.0945789072382773e-06, "loss": 0.0263, "step": 112680 }, { "epoch": 1.7295679533420305, "grad_norm": 0.2907141149044037, "learning_rate": 1.09336066840238e-06, "loss": 0.0273, "step": 112690 }, { "epoch": 1.7297214335047195, "grad_norm": 0.305698424577713, "learning_rate": 1.0921430686727862e-06, "loss": 0.0285, "step": 112700 }, { "epoch": 1.7298749136674085, "grad_norm": 0.46292844414711, "learning_rate": 1.0909261081368628e-06, "loss": 0.026, "step": 112710 }, { "epoch": 1.7300283938300973, "grad_norm": 0.24877473711967468, "learning_rate": 1.0897097868819384e-06, "loss": 0.022, "step": 112720 }, { "epoch": 1.7301818739927866, "grad_norm": 0.3027844727039337, "learning_rate": 1.0884941049952892e-06, "loss": 0.0233, "step": 112730 }, { "epoch": 1.7303353541554753, "grad_norm": 0.4071662127971649, "learning_rate": 1.0872790625641494e-06, "loss": 0.0234, "step": 112740 }, { "epoch": 1.7304888343181644, "grad_norm": 0.3430473208427429, "learning_rate": 1.0860646596757052e-06, "loss": 0.0185, "step": 112750 }, { "epoch": 1.7306423144808534, "grad_norm": 0.41281935572624207, "learning_rate": 1.0848508964170978e-06, "loss": 0.0293, "step": 112760 }, { "epoch": 1.7307957946435422, "grad_norm": 0.484773725271225, "learning_rate": 1.0836377728754211e-06, "loss": 0.0228, "step": 112770 }, { "epoch": 1.7309492748062314, "grad_norm": 0.5060684680938721, "learning_rate": 1.0824252891377241e-06, "loss": 0.027, "step": 112780 }, { "epoch": 1.7311027549689202, "grad_norm": 0.3755287528038025, "learning_rate": 1.0812134452910116e-06, "loss": 0.0241, "step": 112790 }, { "epoch": 1.7312562351316092, "grad_norm": 0.3901301920413971, "learning_rate": 1.0800022414222421e-06, "loss": 0.0219, "step": 112800 }, { "epoch": 1.7314097152942982, "grad_norm": 0.31368109583854675, "learning_rate": 1.0787916776183217e-06, "loss": 0.0188, "step": 112810 }, { "epoch": 1.7315631954569872, "grad_norm": 0.49170008301734924, "learning_rate": 1.0775817539661203e-06, "loss": 0.0289, "step": 112820 }, { "epoch": 1.7317166756196762, "grad_norm": 0.316101610660553, "learning_rate": 1.076372470552457e-06, "loss": 0.0219, "step": 112830 }, { "epoch": 1.731870155782365, "grad_norm": 0.42219698429107666, "learning_rate": 1.075163827464104e-06, "loss": 0.0199, "step": 112840 }, { "epoch": 1.7320236359450543, "grad_norm": 0.22398260235786438, "learning_rate": 1.07395582478779e-06, "loss": 0.0305, "step": 112850 }, { "epoch": 1.732177116107743, "grad_norm": 0.450924277305603, "learning_rate": 1.0727484626101946e-06, "loss": 0.0285, "step": 112860 }, { "epoch": 1.732330596270432, "grad_norm": 0.5412424206733704, "learning_rate": 1.0715417410179562e-06, "loss": 0.0294, "step": 112870 }, { "epoch": 1.732484076433121, "grad_norm": 0.38767656683921814, "learning_rate": 1.0703356600976611e-06, "loss": 0.0324, "step": 112880 }, { "epoch": 1.7326375565958099, "grad_norm": 0.3073710799217224, "learning_rate": 1.0691302199358567e-06, "loss": 0.023, "step": 112890 }, { "epoch": 1.732791036758499, "grad_norm": 0.42200198769569397, "learning_rate": 1.0679254206190414e-06, "loss": 0.027, "step": 112900 }, { "epoch": 1.732944516921188, "grad_norm": 0.2823970317840576, "learning_rate": 1.066721262233661e-06, "loss": 0.02, "step": 112910 }, { "epoch": 1.733097997083877, "grad_norm": 0.4482854902744293, "learning_rate": 1.065517744866127e-06, "loss": 0.0304, "step": 112920 }, { "epoch": 1.733251477246566, "grad_norm": 0.3706025183200836, "learning_rate": 1.064314868602797e-06, "loss": 0.0232, "step": 112930 }, { "epoch": 1.7334049574092547, "grad_norm": 0.3550638258457184, "learning_rate": 1.0631126335299858e-06, "loss": 0.0219, "step": 112940 }, { "epoch": 1.733558437571944, "grad_norm": 0.3590514659881592, "learning_rate": 1.0619110397339594e-06, "loss": 0.0291, "step": 112950 }, { "epoch": 1.7337119177346327, "grad_norm": 0.42562511563301086, "learning_rate": 1.0607100873009412e-06, "loss": 0.0302, "step": 112960 }, { "epoch": 1.7338653978973217, "grad_norm": 0.36545464396476746, "learning_rate": 1.0595097763171046e-06, "loss": 0.022, "step": 112970 }, { "epoch": 1.7340188780600108, "grad_norm": 0.5456299185752869, "learning_rate": 1.058310106868583e-06, "loss": 0.0277, "step": 112980 }, { "epoch": 1.7341723582226996, "grad_norm": 0.43304216861724854, "learning_rate": 1.0571110790414596e-06, "loss": 0.0229, "step": 112990 }, { "epoch": 1.7343258383853888, "grad_norm": 0.2301902174949646, "learning_rate": 1.0559126929217677e-06, "loss": 0.0261, "step": 113000 }, { "epoch": 1.7344793185480776, "grad_norm": 0.4236028492450714, "learning_rate": 1.0547149485955032e-06, "loss": 0.0247, "step": 113010 }, { "epoch": 1.7346327987107666, "grad_norm": 0.30747827887535095, "learning_rate": 1.0535178461486105e-06, "loss": 0.0234, "step": 113020 }, { "epoch": 1.7347862788734556, "grad_norm": 0.4319405257701874, "learning_rate": 1.0523213856669879e-06, "loss": 0.0266, "step": 113030 }, { "epoch": 1.7349397590361446, "grad_norm": 0.30379220843315125, "learning_rate": 1.0511255672364906e-06, "loss": 0.0197, "step": 113040 }, { "epoch": 1.7350932391988336, "grad_norm": 0.29686564207077026, "learning_rate": 1.049930390942926e-06, "loss": 0.017, "step": 113050 }, { "epoch": 1.7352467193615224, "grad_norm": 0.18965399265289307, "learning_rate": 1.0487358568720529e-06, "loss": 0.0215, "step": 113060 }, { "epoch": 1.7354001995242117, "grad_norm": 0.31055325269699097, "learning_rate": 1.0475419651095864e-06, "loss": 0.0305, "step": 113070 }, { "epoch": 1.7355536796869004, "grad_norm": 0.31967344880104065, "learning_rate": 1.0463487157412e-06, "loss": 0.02, "step": 113080 }, { "epoch": 1.7357071598495895, "grad_norm": 0.33684250712394714, "learning_rate": 1.0451561088525154e-06, "loss": 0.0197, "step": 113090 }, { "epoch": 1.7358606400122785, "grad_norm": 0.33273592591285706, "learning_rate": 1.0439641445291038e-06, "loss": 0.0254, "step": 113100 }, { "epoch": 1.7360141201749673, "grad_norm": 0.4693593978881836, "learning_rate": 1.0427728228565026e-06, "loss": 0.0202, "step": 113110 }, { "epoch": 1.7361676003376565, "grad_norm": 0.2986154556274414, "learning_rate": 1.0415821439201946e-06, "loss": 0.0294, "step": 113120 }, { "epoch": 1.7363210805003453, "grad_norm": 0.21245570480823517, "learning_rate": 1.0403921078056167e-06, "loss": 0.0275, "step": 113130 }, { "epoch": 1.7364745606630343, "grad_norm": 0.4356690049171448, "learning_rate": 1.039202714598163e-06, "loss": 0.0218, "step": 113140 }, { "epoch": 1.7366280408257233, "grad_norm": 0.36097851395606995, "learning_rate": 1.03801396438318e-06, "loss": 0.0311, "step": 113150 }, { "epoch": 1.736781520988412, "grad_norm": 0.2392282634973526, "learning_rate": 1.0368258572459666e-06, "loss": 0.0175, "step": 113160 }, { "epoch": 1.7369350011511013, "grad_norm": 0.4301499128341675, "learning_rate": 1.0356383932717762e-06, "loss": 0.0237, "step": 113170 }, { "epoch": 1.7370884813137901, "grad_norm": 0.32131707668304443, "learning_rate": 1.0344515725458182e-06, "loss": 0.0266, "step": 113180 }, { "epoch": 1.7372419614764791, "grad_norm": 0.2756764888763428, "learning_rate": 1.0332653951532578e-06, "loss": 0.0259, "step": 113190 }, { "epoch": 1.7373954416391681, "grad_norm": 0.46338918805122375, "learning_rate": 1.0320798611792016e-06, "loss": 0.0235, "step": 113200 }, { "epoch": 1.7375489218018572, "grad_norm": 0.4306579828262329, "learning_rate": 1.0308949707087257e-06, "loss": 0.0252, "step": 113210 }, { "epoch": 1.7377024019645462, "grad_norm": 0.21930748224258423, "learning_rate": 1.0297107238268533e-06, "loss": 0.026, "step": 113220 }, { "epoch": 1.737855882127235, "grad_norm": 0.37697187066078186, "learning_rate": 1.0285271206185565e-06, "loss": 0.0291, "step": 113230 }, { "epoch": 1.738009362289924, "grad_norm": 0.41209328174591064, "learning_rate": 1.027344161168774e-06, "loss": 0.0252, "step": 113240 }, { "epoch": 1.738162842452613, "grad_norm": 0.27702268958091736, "learning_rate": 1.0261618455623835e-06, "loss": 0.0194, "step": 113250 }, { "epoch": 1.738316322615302, "grad_norm": 0.2644251883029938, "learning_rate": 1.0249801738842235e-06, "loss": 0.0243, "step": 113260 }, { "epoch": 1.738469802777991, "grad_norm": 0.3338158130645752, "learning_rate": 1.0237991462190911e-06, "loss": 0.0284, "step": 113270 }, { "epoch": 1.7386232829406798, "grad_norm": 0.35270601511001587, "learning_rate": 1.0226187626517292e-06, "loss": 0.0322, "step": 113280 }, { "epoch": 1.738776763103369, "grad_norm": 0.33252567052841187, "learning_rate": 1.0214390232668392e-06, "loss": 0.0221, "step": 113290 }, { "epoch": 1.7389302432660578, "grad_norm": 0.29688429832458496, "learning_rate": 1.0202599281490722e-06, "loss": 0.0304, "step": 113300 }, { "epoch": 1.7390837234287468, "grad_norm": 0.3472723960876465, "learning_rate": 1.0190814773830382e-06, "loss": 0.0275, "step": 113310 }, { "epoch": 1.7392372035914359, "grad_norm": 0.3961465358734131, "learning_rate": 1.0179036710532963e-06, "loss": 0.0268, "step": 113320 }, { "epoch": 1.7393906837541246, "grad_norm": 0.3717910349369049, "learning_rate": 1.0167265092443602e-06, "loss": 0.0306, "step": 113330 }, { "epoch": 1.7395441639168139, "grad_norm": 0.392136812210083, "learning_rate": 1.0155499920407052e-06, "loss": 0.0248, "step": 113340 }, { "epoch": 1.7396976440795027, "grad_norm": 0.42675963044166565, "learning_rate": 1.0143741195267465e-06, "loss": 0.0292, "step": 113350 }, { "epoch": 1.7398511242421917, "grad_norm": 0.34542712569236755, "learning_rate": 1.0131988917868608e-06, "loss": 0.0242, "step": 113360 }, { "epoch": 1.7400046044048807, "grad_norm": 0.24849192798137665, "learning_rate": 1.0120243089053805e-06, "loss": 0.0167, "step": 113370 }, { "epoch": 1.7401580845675695, "grad_norm": 0.4387006163597107, "learning_rate": 1.0108503709665896e-06, "loss": 0.0342, "step": 113380 }, { "epoch": 1.7403115647302587, "grad_norm": 0.4032183587551117, "learning_rate": 1.009677078054725e-06, "loss": 0.0256, "step": 113390 }, { "epoch": 1.7404650448929475, "grad_norm": 0.49499666690826416, "learning_rate": 1.008504430253976e-06, "loss": 0.0262, "step": 113400 }, { "epoch": 1.7406185250556365, "grad_norm": 0.4156091809272766, "learning_rate": 1.0073324276484874e-06, "loss": 0.0289, "step": 113410 }, { "epoch": 1.7407720052183255, "grad_norm": 0.33471450209617615, "learning_rate": 1.00616107032236e-06, "loss": 0.0214, "step": 113420 }, { "epoch": 1.7409254853810145, "grad_norm": 0.3867938816547394, "learning_rate": 1.004990358359642e-06, "loss": 0.0231, "step": 113430 }, { "epoch": 1.7410789655437036, "grad_norm": 0.30353331565856934, "learning_rate": 1.0038202918443451e-06, "loss": 0.0276, "step": 113440 }, { "epoch": 1.7412324457063924, "grad_norm": 0.3443225920200348, "learning_rate": 1.0026508708604233e-06, "loss": 0.0272, "step": 113450 }, { "epoch": 1.7413859258690816, "grad_norm": 0.39682987332344055, "learning_rate": 1.0014820954917903e-06, "loss": 0.0196, "step": 113460 }, { "epoch": 1.7415394060317704, "grad_norm": 0.33399155735969543, "learning_rate": 1.000313965822316e-06, "loss": 0.0311, "step": 113470 }, { "epoch": 1.7416928861944594, "grad_norm": 0.2760373055934906, "learning_rate": 9.991464819358188e-07, "loss": 0.022, "step": 113480 }, { "epoch": 1.7418463663571484, "grad_norm": 0.24576029181480408, "learning_rate": 9.979796439160737e-07, "loss": 0.0203, "step": 113490 }, { "epoch": 1.7419998465198372, "grad_norm": 0.4529472887516022, "learning_rate": 9.968134518468086e-07, "loss": 0.0265, "step": 113500 }, { "epoch": 1.7421533266825264, "grad_norm": 0.28121984004974365, "learning_rate": 9.956479058117041e-07, "loss": 0.0267, "step": 113510 }, { "epoch": 1.7423068068452152, "grad_norm": 0.35651737451553345, "learning_rate": 9.944830058943944e-07, "loss": 0.0236, "step": 113520 }, { "epoch": 1.7424602870079042, "grad_norm": 0.3680671751499176, "learning_rate": 9.933187521784715e-07, "loss": 0.0242, "step": 113530 }, { "epoch": 1.7426137671705932, "grad_norm": 0.4387471675872803, "learning_rate": 9.921551447474775e-07, "loss": 0.027, "step": 113540 }, { "epoch": 1.742767247333282, "grad_norm": 0.31547245383262634, "learning_rate": 9.909921836849024e-07, "loss": 0.0309, "step": 113550 }, { "epoch": 1.7429207274959713, "grad_norm": 0.28051018714904785, "learning_rate": 9.898298690742035e-07, "loss": 0.0229, "step": 113560 }, { "epoch": 1.74307420765866, "grad_norm": 0.531019926071167, "learning_rate": 9.886682009987791e-07, "loss": 0.0252, "step": 113570 }, { "epoch": 1.743227687821349, "grad_norm": 0.35673201084136963, "learning_rate": 9.875071795419888e-07, "loss": 0.0271, "step": 113580 }, { "epoch": 1.743381167984038, "grad_norm": 0.4297962486743927, "learning_rate": 9.863468047871428e-07, "loss": 0.0251, "step": 113590 }, { "epoch": 1.7435346481467269, "grad_norm": 0.3607693910598755, "learning_rate": 9.85187076817503e-07, "loss": 0.0296, "step": 113600 }, { "epoch": 1.743688128309416, "grad_norm": 0.3754030764102936, "learning_rate": 9.84027995716289e-07, "loss": 0.0228, "step": 113610 }, { "epoch": 1.743841608472105, "grad_norm": 0.29846444725990295, "learning_rate": 9.828695615666684e-07, "loss": 0.033, "step": 113620 }, { "epoch": 1.743995088634794, "grad_norm": 0.3595734238624573, "learning_rate": 9.817117744517724e-07, "loss": 0.0195, "step": 113630 }, { "epoch": 1.744148568797483, "grad_norm": 0.41570043563842773, "learning_rate": 9.805546344546769e-07, "loss": 0.024, "step": 113640 }, { "epoch": 1.744302048960172, "grad_norm": 0.5174650549888611, "learning_rate": 9.793981416584097e-07, "loss": 0.0281, "step": 113650 }, { "epoch": 1.744455529122861, "grad_norm": 0.40907758474349976, "learning_rate": 9.782422961459604e-07, "loss": 0.0299, "step": 113660 }, { "epoch": 1.7446090092855497, "grad_norm": 0.2934947609901428, "learning_rate": 9.770870980002678e-07, "loss": 0.0245, "step": 113670 }, { "epoch": 1.744762489448239, "grad_norm": 0.49407729506492615, "learning_rate": 9.759325473042236e-07, "loss": 0.028, "step": 113680 }, { "epoch": 1.7449159696109278, "grad_norm": 0.2393897920846939, "learning_rate": 9.747786441406748e-07, "loss": 0.0168, "step": 113690 }, { "epoch": 1.7450694497736168, "grad_norm": 0.28252944350242615, "learning_rate": 9.736253885924196e-07, "loss": 0.0224, "step": 113700 }, { "epoch": 1.7452229299363058, "grad_norm": 0.41389399766921997, "learning_rate": 9.72472780742212e-07, "loss": 0.021, "step": 113710 }, { "epoch": 1.7453764100989946, "grad_norm": 0.4399464726448059, "learning_rate": 9.713208206727564e-07, "loss": 0.02, "step": 113720 }, { "epoch": 1.7455298902616838, "grad_norm": 0.2800973355770111, "learning_rate": 9.701695084667185e-07, "loss": 0.0254, "step": 113730 }, { "epoch": 1.7456833704243726, "grad_norm": 0.4288235902786255, "learning_rate": 9.690188442067105e-07, "loss": 0.0222, "step": 113740 }, { "epoch": 1.7458368505870616, "grad_norm": 0.39856117963790894, "learning_rate": 9.678688279752934e-07, "loss": 0.023, "step": 113750 }, { "epoch": 1.7459903307497506, "grad_norm": 0.4568045437335968, "learning_rate": 9.667194598549957e-07, "loss": 0.0256, "step": 113760 }, { "epoch": 1.7461438109124394, "grad_norm": 0.4358869194984436, "learning_rate": 9.655707399282877e-07, "loss": 0.0201, "step": 113770 }, { "epoch": 1.7462972910751287, "grad_norm": 0.27877870202064514, "learning_rate": 9.644226682775959e-07, "loss": 0.0254, "step": 113780 }, { "epoch": 1.7464507712378174, "grad_norm": 0.34755954146385193, "learning_rate": 9.632752449853089e-07, "loss": 0.0224, "step": 113790 }, { "epoch": 1.7466042514005065, "grad_norm": 0.5337843298912048, "learning_rate": 9.62128470133753e-07, "loss": 0.0266, "step": 113800 }, { "epoch": 1.7467577315631955, "grad_norm": 0.3254973888397217, "learning_rate": 9.609823438052179e-07, "loss": 0.0266, "step": 113810 }, { "epoch": 1.7469112117258845, "grad_norm": 0.4294431805610657, "learning_rate": 9.59836866081949e-07, "loss": 0.0224, "step": 113820 }, { "epoch": 1.7470646918885735, "grad_norm": 0.4156818091869354, "learning_rate": 9.586920370461384e-07, "loss": 0.033, "step": 113830 }, { "epoch": 1.7472181720512623, "grad_norm": 0.29964643716812134, "learning_rate": 9.575478567799358e-07, "loss": 0.0227, "step": 113840 }, { "epoch": 1.7473716522139513, "grad_norm": 0.29747021198272705, "learning_rate": 9.564043253654432e-07, "loss": 0.0232, "step": 113850 }, { "epoch": 1.7475251323766403, "grad_norm": 0.3899697959423065, "learning_rate": 9.552614428847163e-07, "loss": 0.0198, "step": 113860 }, { "epoch": 1.7476786125393293, "grad_norm": 0.3325483202934265, "learning_rate": 9.541192094197626e-07, "loss": 0.0244, "step": 113870 }, { "epoch": 1.7478320927020183, "grad_norm": 0.3444085121154785, "learning_rate": 9.529776250525425e-07, "loss": 0.0225, "step": 113880 }, { "epoch": 1.7479855728647071, "grad_norm": 0.42736902832984924, "learning_rate": 9.518366898649789e-07, "loss": 0.031, "step": 113890 }, { "epoch": 1.7481390530273964, "grad_norm": 0.2678825855255127, "learning_rate": 9.506964039389354e-07, "loss": 0.023, "step": 113900 }, { "epoch": 1.7482925331900852, "grad_norm": 0.37962937355041504, "learning_rate": 9.495567673562323e-07, "loss": 0.024, "step": 113910 }, { "epoch": 1.7484460133527742, "grad_norm": 0.36030906438827515, "learning_rate": 9.484177801986516e-07, "loss": 0.023, "step": 113920 }, { "epoch": 1.7485994935154632, "grad_norm": 0.3236348032951355, "learning_rate": 9.472794425479192e-07, "loss": 0.0207, "step": 113930 }, { "epoch": 1.748752973678152, "grad_norm": 0.35952043533325195, "learning_rate": 9.461417544857187e-07, "loss": 0.0296, "step": 113940 }, { "epoch": 1.7489064538408412, "grad_norm": 0.5079110264778137, "learning_rate": 9.45004716093687e-07, "loss": 0.024, "step": 113950 }, { "epoch": 1.74905993400353, "grad_norm": 0.26229390501976013, "learning_rate": 9.438683274534111e-07, "loss": 0.0207, "step": 113960 }, { "epoch": 1.749213414166219, "grad_norm": 0.3988468050956726, "learning_rate": 9.427325886464366e-07, "loss": 0.0238, "step": 113970 }, { "epoch": 1.749366894328908, "grad_norm": 0.43156084418296814, "learning_rate": 9.415974997542565e-07, "loss": 0.0245, "step": 113980 }, { "epoch": 1.7495203744915968, "grad_norm": 0.28826552629470825, "learning_rate": 9.404630608583265e-07, "loss": 0.027, "step": 113990 }, { "epoch": 1.749673854654286, "grad_norm": 0.3472306728363037, "learning_rate": 9.393292720400427e-07, "loss": 0.0218, "step": 114000 }, { "epoch": 1.7498273348169748, "grad_norm": 0.5276104211807251, "learning_rate": 9.381961333807633e-07, "loss": 0.0284, "step": 114010 }, { "epoch": 1.7499808149796638, "grad_norm": 0.36560866236686707, "learning_rate": 9.370636449618009e-07, "loss": 0.0248, "step": 114020 }, { "epoch": 1.7501342951423529, "grad_norm": 0.31701353192329407, "learning_rate": 9.359318068644163e-07, "loss": 0.0256, "step": 114030 }, { "epoch": 1.7502877753050419, "grad_norm": 0.3849540650844574, "learning_rate": 9.348006191698267e-07, "loss": 0.0235, "step": 114040 }, { "epoch": 1.7504412554677309, "grad_norm": 0.39104655385017395, "learning_rate": 9.336700819592015e-07, "loss": 0.0202, "step": 114050 }, { "epoch": 1.7505947356304197, "grad_norm": 0.42745184898376465, "learning_rate": 9.325401953136637e-07, "loss": 0.0282, "step": 114060 }, { "epoch": 1.7507482157931087, "grad_norm": 0.4347682595252991, "learning_rate": 9.314109593142873e-07, "loss": 0.0238, "step": 114070 }, { "epoch": 1.7509016959557977, "grad_norm": 0.3675927221775055, "learning_rate": 9.302823740421064e-07, "loss": 0.0216, "step": 114080 }, { "epoch": 1.7510551761184867, "grad_norm": 0.3662028908729553, "learning_rate": 9.291544395781027e-07, "loss": 0.023, "step": 114090 }, { "epoch": 1.7512086562811757, "grad_norm": 0.32454368472099304, "learning_rate": 9.280271560032083e-07, "loss": 0.0285, "step": 114100 }, { "epoch": 1.7513621364438645, "grad_norm": 0.3142722547054291, "learning_rate": 9.269005233983175e-07, "loss": 0.0219, "step": 114110 }, { "epoch": 1.7515156166065537, "grad_norm": 0.48539042472839355, "learning_rate": 9.257745418442721e-07, "loss": 0.0261, "step": 114120 }, { "epoch": 1.7516690967692425, "grad_norm": 0.2982621490955353, "learning_rate": 9.246492114218675e-07, "loss": 0.0169, "step": 114130 }, { "epoch": 1.7518225769319316, "grad_norm": 0.2862091064453125, "learning_rate": 9.235245322118524e-07, "loss": 0.0248, "step": 114140 }, { "epoch": 1.7519760570946206, "grad_norm": 0.3175675868988037, "learning_rate": 9.224005042949313e-07, "loss": 0.0225, "step": 114150 }, { "epoch": 1.7521295372573094, "grad_norm": 0.37119415402412415, "learning_rate": 9.212771277517585e-07, "loss": 0.0225, "step": 114160 }, { "epoch": 1.7522830174199986, "grad_norm": 0.36032819747924805, "learning_rate": 9.201544026629428e-07, "loss": 0.0234, "step": 114170 }, { "epoch": 1.7524364975826874, "grad_norm": 0.3907232880592346, "learning_rate": 9.190323291090497e-07, "loss": 0.0304, "step": 114180 }, { "epoch": 1.7525899777453764, "grad_norm": 0.38845187425613403, "learning_rate": 9.179109071705938e-07, "loss": 0.0285, "step": 114190 }, { "epoch": 1.7527434579080654, "grad_norm": 0.3607634902000427, "learning_rate": 9.167901369280396e-07, "loss": 0.0259, "step": 114200 }, { "epoch": 1.7528969380707542, "grad_norm": 0.29915812611579895, "learning_rate": 9.15670018461815e-07, "loss": 0.0275, "step": 114210 }, { "epoch": 1.7530504182334434, "grad_norm": 0.4179559051990509, "learning_rate": 9.145505518522934e-07, "loss": 0.0238, "step": 114220 }, { "epoch": 1.7532038983961322, "grad_norm": 0.29853400588035583, "learning_rate": 9.134317371798029e-07, "loss": 0.0188, "step": 114230 }, { "epoch": 1.7533573785588212, "grad_norm": 0.3667398989200592, "learning_rate": 9.123135745246259e-07, "loss": 0.0265, "step": 114240 }, { "epoch": 1.7535108587215102, "grad_norm": 0.5738543272018433, "learning_rate": 9.111960639669981e-07, "loss": 0.031, "step": 114250 }, { "epoch": 1.7536643388841993, "grad_norm": 0.3413912355899811, "learning_rate": 9.100792055871055e-07, "loss": 0.0218, "step": 114260 }, { "epoch": 1.7538178190468883, "grad_norm": 0.40442949533462524, "learning_rate": 9.089629994650906e-07, "loss": 0.0248, "step": 114270 }, { "epoch": 1.753971299209577, "grad_norm": 0.48559924960136414, "learning_rate": 9.078474456810493e-07, "loss": 0.0208, "step": 114280 }, { "epoch": 1.7541247793722663, "grad_norm": 0.38807982206344604, "learning_rate": 9.06732544315031e-07, "loss": 0.0311, "step": 114290 }, { "epoch": 1.754278259534955, "grad_norm": 0.41170716285705566, "learning_rate": 9.056182954470305e-07, "loss": 0.0269, "step": 114300 }, { "epoch": 1.754431739697644, "grad_norm": 0.3523755967617035, "learning_rate": 9.045046991570083e-07, "loss": 0.0261, "step": 114310 }, { "epoch": 1.7545852198603331, "grad_norm": 0.43227821588516235, "learning_rate": 9.033917555248684e-07, "loss": 0.0235, "step": 114320 }, { "epoch": 1.754738700023022, "grad_norm": 0.2618171274662018, "learning_rate": 9.022794646304712e-07, "loss": 0.0264, "step": 114330 }, { "epoch": 1.7548921801857111, "grad_norm": 0.36914917826652527, "learning_rate": 9.011678265536361e-07, "loss": 0.0224, "step": 114340 }, { "epoch": 1.7550456603484, "grad_norm": 0.4849275052547455, "learning_rate": 9.000568413741228e-07, "loss": 0.0298, "step": 114350 }, { "epoch": 1.755199140511089, "grad_norm": 0.27517709136009216, "learning_rate": 8.98946509171652e-07, "loss": 0.029, "step": 114360 }, { "epoch": 1.755352620673778, "grad_norm": 0.32746925950050354, "learning_rate": 8.978368300259021e-07, "loss": 0.0235, "step": 114370 }, { "epoch": 1.7555061008364667, "grad_norm": 0.4986775815486908, "learning_rate": 8.967278040164951e-07, "loss": 0.0295, "step": 114380 }, { "epoch": 1.755659580999156, "grad_norm": 0.3873104751110077, "learning_rate": 8.956194312230115e-07, "loss": 0.0234, "step": 114390 }, { "epoch": 1.7558130611618448, "grad_norm": 0.350922167301178, "learning_rate": 8.945117117249846e-07, "loss": 0.0216, "step": 114400 }, { "epoch": 1.7559665413245338, "grad_norm": 0.34214141964912415, "learning_rate": 8.934046456018996e-07, "loss": 0.0307, "step": 114410 }, { "epoch": 1.7561200214872228, "grad_norm": 0.472034215927124, "learning_rate": 8.922982329331953e-07, "loss": 0.0257, "step": 114420 }, { "epoch": 1.7562735016499116, "grad_norm": 0.33185216784477234, "learning_rate": 8.911924737982614e-07, "loss": 0.027, "step": 114430 }, { "epoch": 1.7564269818126008, "grad_norm": 0.4090185761451721, "learning_rate": 8.900873682764499e-07, "loss": 0.027, "step": 114440 }, { "epoch": 1.7565804619752896, "grad_norm": 0.3205241560935974, "learning_rate": 8.889829164470509e-07, "loss": 0.0226, "step": 114450 }, { "epoch": 1.7567339421379786, "grad_norm": 0.4054884612560272, "learning_rate": 8.878791183893176e-07, "loss": 0.028, "step": 114460 }, { "epoch": 1.7568874223006676, "grad_norm": 0.353730708360672, "learning_rate": 8.867759741824578e-07, "loss": 0.0247, "step": 114470 }, { "epoch": 1.7570409024633566, "grad_norm": 0.34343862533569336, "learning_rate": 8.856734839056258e-07, "loss": 0.0264, "step": 114480 }, { "epoch": 1.7571943826260457, "grad_norm": 0.31784817576408386, "learning_rate": 8.84571647637934e-07, "loss": 0.0276, "step": 114490 }, { "epoch": 1.7573478627887344, "grad_norm": 0.3119076192378998, "learning_rate": 8.834704654584447e-07, "loss": 0.0236, "step": 114500 }, { "epoch": 1.7575013429514237, "grad_norm": 0.3292590379714966, "learning_rate": 8.823699374461747e-07, "loss": 0.0248, "step": 114510 }, { "epoch": 1.7576548231141125, "grad_norm": 0.36171990633010864, "learning_rate": 8.812700636800931e-07, "loss": 0.0216, "step": 114520 }, { "epoch": 1.7578083032768015, "grad_norm": 0.32507431507110596, "learning_rate": 8.801708442391221e-07, "loss": 0.0258, "step": 114530 }, { "epoch": 1.7579617834394905, "grad_norm": 0.3548523187637329, "learning_rate": 8.790722792021422e-07, "loss": 0.0254, "step": 114540 }, { "epoch": 1.7581152636021793, "grad_norm": 0.5018185973167419, "learning_rate": 8.779743686479769e-07, "loss": 0.0326, "step": 114550 }, { "epoch": 1.7582687437648685, "grad_norm": 0.3013935983181, "learning_rate": 8.768771126554076e-07, "loss": 0.0264, "step": 114560 }, { "epoch": 1.7584222239275573, "grad_norm": 0.32030192017555237, "learning_rate": 8.757805113031736e-07, "loss": 0.0261, "step": 114570 }, { "epoch": 1.7585757040902463, "grad_norm": 0.2703314423561096, "learning_rate": 8.746845646699609e-07, "loss": 0.0211, "step": 114580 }, { "epoch": 1.7587291842529353, "grad_norm": 0.37039533257484436, "learning_rate": 8.735892728344098e-07, "loss": 0.0264, "step": 114590 }, { "epoch": 1.7588826644156241, "grad_norm": 0.3007342517375946, "learning_rate": 8.72494635875114e-07, "loss": 0.0284, "step": 114600 }, { "epoch": 1.7590361445783134, "grad_norm": 0.25974157452583313, "learning_rate": 8.71400653870621e-07, "loss": 0.0246, "step": 114610 }, { "epoch": 1.7591896247410022, "grad_norm": 0.3488106429576874, "learning_rate": 8.703073268994299e-07, "loss": 0.0226, "step": 114620 }, { "epoch": 1.7593431049036912, "grad_norm": 0.46791303157806396, "learning_rate": 8.69214655039996e-07, "loss": 0.0274, "step": 114630 }, { "epoch": 1.7594965850663802, "grad_norm": 0.3107255697250366, "learning_rate": 8.681226383707253e-07, "loss": 0.022, "step": 114640 }, { "epoch": 1.7596500652290692, "grad_norm": 0.3490760624408722, "learning_rate": 8.670312769699718e-07, "loss": 0.0288, "step": 114650 }, { "epoch": 1.7598035453917582, "grad_norm": 0.526707649230957, "learning_rate": 8.659405709160518e-07, "loss": 0.027, "step": 114660 }, { "epoch": 1.759957025554447, "grad_norm": 0.5768239498138428, "learning_rate": 8.648505202872304e-07, "loss": 0.0273, "step": 114670 }, { "epoch": 1.760110505717136, "grad_norm": 0.48998939990997314, "learning_rate": 8.637611251617239e-07, "loss": 0.0231, "step": 114680 }, { "epoch": 1.760263985879825, "grad_norm": 0.39132440090179443, "learning_rate": 8.626723856177032e-07, "loss": 0.026, "step": 114690 }, { "epoch": 1.760417466042514, "grad_norm": 0.33117473125457764, "learning_rate": 8.615843017332926e-07, "loss": 0.0211, "step": 114700 }, { "epoch": 1.760570946205203, "grad_norm": 0.19530729949474335, "learning_rate": 8.604968735865681e-07, "loss": 0.0237, "step": 114710 }, { "epoch": 1.7607244263678918, "grad_norm": 0.4619505703449249, "learning_rate": 8.594101012555589e-07, "loss": 0.026, "step": 114720 }, { "epoch": 1.760877906530581, "grad_norm": 0.35224539041519165, "learning_rate": 8.583239848182501e-07, "loss": 0.0264, "step": 114730 }, { "epoch": 1.7610313866932699, "grad_norm": 0.44167935848236084, "learning_rate": 8.572385243525772e-07, "loss": 0.0254, "step": 114740 }, { "epoch": 1.7611848668559589, "grad_norm": 0.2700085937976837, "learning_rate": 8.561537199364234e-07, "loss": 0.024, "step": 114750 }, { "epoch": 1.7613383470186479, "grad_norm": 0.31586962938308716, "learning_rate": 8.550695716476364e-07, "loss": 0.0265, "step": 114760 }, { "epoch": 1.7614918271813367, "grad_norm": 0.4918757677078247, "learning_rate": 8.539860795640076e-07, "loss": 0.0242, "step": 114770 }, { "epoch": 1.761645307344026, "grad_norm": 0.32652172446250916, "learning_rate": 8.529032437632856e-07, "loss": 0.0206, "step": 114780 }, { "epoch": 1.7617987875067147, "grad_norm": 0.3372389078140259, "learning_rate": 8.518210643231684e-07, "loss": 0.0186, "step": 114790 }, { "epoch": 1.7619522676694037, "grad_norm": 0.5381664633750916, "learning_rate": 8.507395413213104e-07, "loss": 0.0238, "step": 114800 }, { "epoch": 1.7621057478320927, "grad_norm": 0.38597172498703003, "learning_rate": 8.496586748353186e-07, "loss": 0.0276, "step": 114810 }, { "epoch": 1.7622592279947815, "grad_norm": 0.453964501619339, "learning_rate": 8.485784649427475e-07, "loss": 0.0211, "step": 114820 }, { "epoch": 1.7624127081574708, "grad_norm": 0.4522492289543152, "learning_rate": 8.474989117211141e-07, "loss": 0.0236, "step": 114830 }, { "epoch": 1.7625661883201595, "grad_norm": 0.3315264880657196, "learning_rate": 8.46420015247883e-07, "loss": 0.0224, "step": 114840 }, { "epoch": 1.7627196684828486, "grad_norm": 0.31217774748802185, "learning_rate": 8.453417756004667e-07, "loss": 0.0243, "step": 114850 }, { "epoch": 1.7628731486455376, "grad_norm": 0.43616366386413574, "learning_rate": 8.44264192856239e-07, "loss": 0.0349, "step": 114860 }, { "epoch": 1.7630266288082266, "grad_norm": 0.40688323974609375, "learning_rate": 8.431872670925223e-07, "loss": 0.021, "step": 114870 }, { "epoch": 1.7631801089709156, "grad_norm": 0.5669885873794556, "learning_rate": 8.421109983865928e-07, "loss": 0.0208, "step": 114880 }, { "epoch": 1.7633335891336044, "grad_norm": 0.31058281660079956, "learning_rate": 8.410353868156817e-07, "loss": 0.0233, "step": 114890 }, { "epoch": 1.7634870692962936, "grad_norm": 0.29634666442871094, "learning_rate": 8.399604324569666e-07, "loss": 0.0279, "step": 114900 }, { "epoch": 1.7636405494589824, "grad_norm": 0.40498635172843933, "learning_rate": 8.388861353875832e-07, "loss": 0.0255, "step": 114910 }, { "epoch": 1.7637940296216714, "grad_norm": 0.33007368445396423, "learning_rate": 8.378124956846212e-07, "loss": 0.0206, "step": 114920 }, { "epoch": 1.7639475097843604, "grad_norm": 0.34821847081184387, "learning_rate": 8.367395134251188e-07, "loss": 0.0235, "step": 114930 }, { "epoch": 1.7641009899470492, "grad_norm": 0.2833506464958191, "learning_rate": 8.356671886860701e-07, "loss": 0.021, "step": 114940 }, { "epoch": 1.7642544701097385, "grad_norm": 0.46696722507476807, "learning_rate": 8.345955215444213e-07, "loss": 0.0235, "step": 114950 }, { "epoch": 1.7644079502724273, "grad_norm": 0.4377575218677521, "learning_rate": 8.335245120770707e-07, "loss": 0.0248, "step": 114960 }, { "epoch": 1.7645614304351163, "grad_norm": 0.25831568241119385, "learning_rate": 8.324541603608682e-07, "loss": 0.0283, "step": 114970 }, { "epoch": 1.7647149105978053, "grad_norm": 0.24181123077869415, "learning_rate": 8.313844664726189e-07, "loss": 0.0247, "step": 114980 }, { "epoch": 1.764868390760494, "grad_norm": 0.49606600403785706, "learning_rate": 8.303154304890848e-07, "loss": 0.0209, "step": 114990 }, { "epoch": 1.7650218709231833, "grad_norm": 0.2869401276111603, "learning_rate": 8.292470524869689e-07, "loss": 0.0253, "step": 115000 }, { "epoch": 1.765175351085872, "grad_norm": 0.2567899823188782, "learning_rate": 8.281793325429344e-07, "loss": 0.0188, "step": 115010 }, { "epoch": 1.765328831248561, "grad_norm": 0.40042194724082947, "learning_rate": 8.271122707336021e-07, "loss": 0.0302, "step": 115020 }, { "epoch": 1.7654823114112501, "grad_norm": 0.3454708755016327, "learning_rate": 8.260458671355364e-07, "loss": 0.0234, "step": 115030 }, { "epoch": 1.765635791573939, "grad_norm": 0.37362411618232727, "learning_rate": 8.249801218252596e-07, "loss": 0.0229, "step": 115040 }, { "epoch": 1.7657892717366281, "grad_norm": 0.2679016888141632, "learning_rate": 8.239150348792446e-07, "loss": 0.0325, "step": 115050 }, { "epoch": 1.765942751899317, "grad_norm": 0.4099493622779846, "learning_rate": 8.228506063739183e-07, "loss": 0.0339, "step": 115060 }, { "epoch": 1.766096232062006, "grad_norm": 0.2945781648159027, "learning_rate": 8.217868363856607e-07, "loss": 0.0239, "step": 115070 }, { "epoch": 1.766249712224695, "grad_norm": 0.36718878149986267, "learning_rate": 8.207237249908007e-07, "loss": 0.0241, "step": 115080 }, { "epoch": 1.766403192387384, "grad_norm": 0.25143128633499146, "learning_rate": 8.196612722656306e-07, "loss": 0.025, "step": 115090 }, { "epoch": 1.766556672550073, "grad_norm": 0.37368112802505493, "learning_rate": 8.185994782863793e-07, "loss": 0.0165, "step": 115100 }, { "epoch": 1.7667101527127618, "grad_norm": 0.6398659348487854, "learning_rate": 8.175383431292405e-07, "loss": 0.0264, "step": 115110 }, { "epoch": 1.766863632875451, "grad_norm": 0.46554359793663025, "learning_rate": 8.164778668703588e-07, "loss": 0.0212, "step": 115120 }, { "epoch": 1.7670171130381398, "grad_norm": 0.3215292692184448, "learning_rate": 8.154180495858288e-07, "loss": 0.0184, "step": 115130 }, { "epoch": 1.7671705932008288, "grad_norm": 0.3629295527935028, "learning_rate": 8.143588913516976e-07, "loss": 0.0248, "step": 115140 }, { "epoch": 1.7673240733635178, "grad_norm": 0.3622027635574341, "learning_rate": 8.133003922439686e-07, "loss": 0.0245, "step": 115150 }, { "epoch": 1.7674775535262066, "grad_norm": 0.3100573718547821, "learning_rate": 8.122425523385947e-07, "loss": 0.0281, "step": 115160 }, { "epoch": 1.7676310336888958, "grad_norm": 0.37711265683174133, "learning_rate": 8.111853717114792e-07, "loss": 0.0236, "step": 115170 }, { "epoch": 1.7677845138515846, "grad_norm": 0.37262991070747375, "learning_rate": 8.101288504384874e-07, "loss": 0.0223, "step": 115180 }, { "epoch": 1.7679379940142737, "grad_norm": 0.43255892395973206, "learning_rate": 8.090729885954284e-07, "loss": 0.0284, "step": 115190 }, { "epoch": 1.7680914741769627, "grad_norm": 0.48462754487991333, "learning_rate": 8.08017786258064e-07, "loss": 0.0273, "step": 115200 }, { "epoch": 1.7682449543396515, "grad_norm": 0.5144026279449463, "learning_rate": 8.069632435021158e-07, "loss": 0.0413, "step": 115210 }, { "epoch": 1.7683984345023407, "grad_norm": 0.4425736665725708, "learning_rate": 8.05909360403252e-07, "loss": 0.0231, "step": 115220 }, { "epoch": 1.7685519146650295, "grad_norm": 0.3114392161369324, "learning_rate": 8.048561370370955e-07, "loss": 0.0268, "step": 115230 }, { "epoch": 1.7687053948277185, "grad_norm": 0.433551549911499, "learning_rate": 8.038035734792204e-07, "loss": 0.0305, "step": 115240 }, { "epoch": 1.7688588749904075, "grad_norm": 0.3845413327217102, "learning_rate": 8.027516698051563e-07, "loss": 0.0284, "step": 115250 }, { "epoch": 1.7690123551530965, "grad_norm": 0.31125152111053467, "learning_rate": 8.017004260903827e-07, "loss": 0.0213, "step": 115260 }, { "epoch": 1.7691658353157855, "grad_norm": 0.4465450942516327, "learning_rate": 8.006498424103315e-07, "loss": 0.0248, "step": 115270 }, { "epoch": 1.7693193154784743, "grad_norm": 0.38584020733833313, "learning_rate": 7.995999188403913e-07, "loss": 0.0311, "step": 115280 }, { "epoch": 1.7694727956411633, "grad_norm": 0.38596269488334656, "learning_rate": 7.985506554559019e-07, "loss": 0.0212, "step": 115290 }, { "epoch": 1.7696262758038523, "grad_norm": 0.6624442338943481, "learning_rate": 7.975020523321486e-07, "loss": 0.031, "step": 115300 }, { "epoch": 1.7697797559665414, "grad_norm": 0.43653053045272827, "learning_rate": 7.96454109544379e-07, "loss": 0.0206, "step": 115310 }, { "epoch": 1.7699332361292304, "grad_norm": 0.4034491181373596, "learning_rate": 7.954068271677906e-07, "loss": 0.0205, "step": 115320 }, { "epoch": 1.7700867162919192, "grad_norm": 0.40292543172836304, "learning_rate": 7.94360205277529e-07, "loss": 0.0194, "step": 115330 }, { "epoch": 1.7702401964546084, "grad_norm": 0.21154452860355377, "learning_rate": 7.933142439486985e-07, "loss": 0.0197, "step": 115340 }, { "epoch": 1.7703936766172972, "grad_norm": 0.273966521024704, "learning_rate": 7.922689432563524e-07, "loss": 0.0198, "step": 115350 }, { "epoch": 1.7705471567799862, "grad_norm": 0.26425573229789734, "learning_rate": 7.912243032754974e-07, "loss": 0.021, "step": 115360 }, { "epoch": 1.7707006369426752, "grad_norm": 0.40208008885383606, "learning_rate": 7.901803240810901e-07, "loss": 0.0292, "step": 115370 }, { "epoch": 1.770854117105364, "grad_norm": 0.315384179353714, "learning_rate": 7.891370057480474e-07, "loss": 0.0199, "step": 115380 }, { "epoch": 1.7710075972680532, "grad_norm": 0.33847305178642273, "learning_rate": 7.880943483512326e-07, "loss": 0.0322, "step": 115390 }, { "epoch": 1.771161077430742, "grad_norm": 0.3046483099460602, "learning_rate": 7.870523519654604e-07, "loss": 0.0259, "step": 115400 }, { "epoch": 1.771314557593431, "grad_norm": 0.5361121892929077, "learning_rate": 7.860110166655022e-07, "loss": 0.0205, "step": 115410 }, { "epoch": 1.77146803775612, "grad_norm": 0.4220659136772156, "learning_rate": 7.849703425260802e-07, "loss": 0.0233, "step": 115420 }, { "epoch": 1.7716215179188088, "grad_norm": 0.31852245330810547, "learning_rate": 7.83930329621867e-07, "loss": 0.0279, "step": 115430 }, { "epoch": 1.771774998081498, "grad_norm": 0.45256507396698, "learning_rate": 7.828909780274952e-07, "loss": 0.0301, "step": 115440 }, { "epoch": 1.7719284782441869, "grad_norm": 0.2704036235809326, "learning_rate": 7.818522878175394e-07, "loss": 0.0237, "step": 115450 }, { "epoch": 1.7720819584068759, "grad_norm": 0.49208053946495056, "learning_rate": 7.808142590665324e-07, "loss": 0.0282, "step": 115460 }, { "epoch": 1.772235438569565, "grad_norm": 0.36143237352371216, "learning_rate": 7.797768918489623e-07, "loss": 0.0217, "step": 115470 }, { "epoch": 1.772388918732254, "grad_norm": 0.3899565637111664, "learning_rate": 7.787401862392651e-07, "loss": 0.0267, "step": 115480 }, { "epoch": 1.772542398894943, "grad_norm": 0.3992176353931427, "learning_rate": 7.777041423118314e-07, "loss": 0.0239, "step": 115490 }, { "epoch": 1.7726958790576317, "grad_norm": 0.3521862328052521, "learning_rate": 7.766687601410016e-07, "loss": 0.0192, "step": 115500 }, { "epoch": 1.7728493592203207, "grad_norm": 0.3795531094074249, "learning_rate": 7.756340398010731e-07, "loss": 0.0256, "step": 115510 }, { "epoch": 1.7730028393830097, "grad_norm": 0.33096805214881897, "learning_rate": 7.74599981366293e-07, "loss": 0.0252, "step": 115520 }, { "epoch": 1.7731563195456987, "grad_norm": 0.4130086302757263, "learning_rate": 7.735665849108587e-07, "loss": 0.0226, "step": 115530 }, { "epoch": 1.7733097997083878, "grad_norm": 0.32425522804260254, "learning_rate": 7.725338505089286e-07, "loss": 0.0224, "step": 115540 }, { "epoch": 1.7734632798710765, "grad_norm": 0.31190764904022217, "learning_rate": 7.715017782346013e-07, "loss": 0.0214, "step": 115550 }, { "epoch": 1.7736167600337658, "grad_norm": 0.29473304748535156, "learning_rate": 7.704703681619374e-07, "loss": 0.0205, "step": 115560 }, { "epoch": 1.7737702401964546, "grad_norm": 0.45427629351615906, "learning_rate": 7.694396203649468e-07, "loss": 0.0288, "step": 115570 }, { "epoch": 1.7739237203591436, "grad_norm": 0.29358768463134766, "learning_rate": 7.684095349175924e-07, "loss": 0.0229, "step": 115580 }, { "epoch": 1.7740772005218326, "grad_norm": 0.25962284207344055, "learning_rate": 7.673801118937896e-07, "loss": 0.0257, "step": 115590 }, { "epoch": 1.7742306806845214, "grad_norm": 0.25775131583213806, "learning_rate": 7.663513513674048e-07, "loss": 0.0278, "step": 115600 }, { "epoch": 1.7743841608472106, "grad_norm": 0.36464765667915344, "learning_rate": 7.653232534122567e-07, "loss": 0.0281, "step": 115610 }, { "epoch": 1.7745376410098994, "grad_norm": 0.4916927218437195, "learning_rate": 7.642958181021209e-07, "loss": 0.0312, "step": 115620 }, { "epoch": 1.7746911211725884, "grad_norm": 0.5144627094268799, "learning_rate": 7.632690455107173e-07, "loss": 0.0258, "step": 115630 }, { "epoch": 1.7748446013352774, "grad_norm": 0.3816063106060028, "learning_rate": 7.622429357117312e-07, "loss": 0.0291, "step": 115640 }, { "epoch": 1.7749980814979662, "grad_norm": 0.3339635133743286, "learning_rate": 7.612174887787837e-07, "loss": 0.0305, "step": 115650 }, { "epoch": 1.7751515616606555, "grad_norm": 0.291843980550766, "learning_rate": 7.601927047854607e-07, "loss": 0.0191, "step": 115660 }, { "epoch": 1.7753050418233443, "grad_norm": 0.37860143184661865, "learning_rate": 7.591685838052976e-07, "loss": 0.0243, "step": 115670 }, { "epoch": 1.7754585219860333, "grad_norm": 0.49811217188835144, "learning_rate": 7.5814512591178e-07, "loss": 0.0342, "step": 115680 }, { "epoch": 1.7756120021487223, "grad_norm": 0.4585478901863098, "learning_rate": 7.571223311783482e-07, "loss": 0.0233, "step": 115690 }, { "epoch": 1.7757654823114113, "grad_norm": 0.349556028842926, "learning_rate": 7.561001996783945e-07, "loss": 0.0234, "step": 115700 }, { "epoch": 1.7759189624741003, "grad_norm": 0.3920888900756836, "learning_rate": 7.550787314852615e-07, "loss": 0.0215, "step": 115710 }, { "epoch": 1.776072442636789, "grad_norm": 0.29782944917678833, "learning_rate": 7.540579266722448e-07, "loss": 0.0242, "step": 115720 }, { "epoch": 1.7762259227994783, "grad_norm": 0.37447822093963623, "learning_rate": 7.53037785312597e-07, "loss": 0.0333, "step": 115730 }, { "epoch": 1.7763794029621671, "grad_norm": 0.45221570134162903, "learning_rate": 7.520183074795195e-07, "loss": 0.0261, "step": 115740 }, { "epoch": 1.7765328831248561, "grad_norm": 0.48734167218208313, "learning_rate": 7.509994932461606e-07, "loss": 0.0226, "step": 115750 }, { "epoch": 1.7766863632875451, "grad_norm": 0.28330522775650024, "learning_rate": 7.499813426856328e-07, "loss": 0.0311, "step": 115760 }, { "epoch": 1.776839843450234, "grad_norm": 0.4933253228664398, "learning_rate": 7.489638558709921e-07, "loss": 0.0285, "step": 115770 }, { "epoch": 1.7769933236129232, "grad_norm": 0.408760666847229, "learning_rate": 7.479470328752492e-07, "loss": 0.0222, "step": 115780 }, { "epoch": 1.777146803775612, "grad_norm": 0.26125192642211914, "learning_rate": 7.469308737713677e-07, "loss": 0.0211, "step": 115790 }, { "epoch": 1.777300283938301, "grad_norm": 0.38834455609321594, "learning_rate": 7.459153786322637e-07, "loss": 0.0274, "step": 115800 }, { "epoch": 1.77745376410099, "grad_norm": 0.4296885132789612, "learning_rate": 7.449005475308047e-07, "loss": 0.0305, "step": 115810 }, { "epoch": 1.7776072442636788, "grad_norm": 0.2711113393306732, "learning_rate": 7.4388638053981e-07, "loss": 0.0186, "step": 115820 }, { "epoch": 1.777760724426368, "grad_norm": 0.43728819489479065, "learning_rate": 7.428728777320549e-07, "loss": 0.0324, "step": 115830 }, { "epoch": 1.7779142045890568, "grad_norm": 0.2121163308620453, "learning_rate": 7.418600391802644e-07, "loss": 0.0263, "step": 115840 }, { "epoch": 1.7780676847517458, "grad_norm": 0.3999566435813904, "learning_rate": 7.408478649571127e-07, "loss": 0.0229, "step": 115850 }, { "epoch": 1.7782211649144348, "grad_norm": 0.4352774918079376, "learning_rate": 7.398363551352316e-07, "loss": 0.0262, "step": 115860 }, { "epoch": 1.7783746450771236, "grad_norm": 0.34253960847854614, "learning_rate": 7.388255097872044e-07, "loss": 0.0246, "step": 115870 }, { "epoch": 1.7785281252398129, "grad_norm": 0.4213806092739105, "learning_rate": 7.378153289855639e-07, "loss": 0.0228, "step": 115880 }, { "epoch": 1.7786816054025016, "grad_norm": 0.2605774700641632, "learning_rate": 7.368058128027966e-07, "loss": 0.0165, "step": 115890 }, { "epoch": 1.7788350855651907, "grad_norm": 0.3688752353191376, "learning_rate": 7.357969613113425e-07, "loss": 0.025, "step": 115900 }, { "epoch": 1.7789885657278797, "grad_norm": 0.305070698261261, "learning_rate": 7.347887745835924e-07, "loss": 0.0229, "step": 115910 }, { "epoch": 1.7791420458905687, "grad_norm": 0.39344775676727295, "learning_rate": 7.337812526918886e-07, "loss": 0.0205, "step": 115920 }, { "epoch": 1.7792955260532577, "grad_norm": 0.3504753112792969, "learning_rate": 7.327743957085287e-07, "loss": 0.0322, "step": 115930 }, { "epoch": 1.7794490062159465, "grad_norm": 0.3024454712867737, "learning_rate": 7.317682037057616e-07, "loss": 0.0263, "step": 115940 }, { "epoch": 1.7796024863786357, "grad_norm": 0.42397329211235046, "learning_rate": 7.307626767557874e-07, "loss": 0.0224, "step": 115950 }, { "epoch": 1.7797559665413245, "grad_norm": 0.33685365319252014, "learning_rate": 7.297578149307571e-07, "loss": 0.0184, "step": 115960 }, { "epoch": 1.7799094467040135, "grad_norm": 0.36812126636505127, "learning_rate": 7.287536183027777e-07, "loss": 0.0221, "step": 115970 }, { "epoch": 1.7800629268667025, "grad_norm": 0.37868157029151917, "learning_rate": 7.277500869439025e-07, "loss": 0.0243, "step": 115980 }, { "epoch": 1.7802164070293913, "grad_norm": 0.28510189056396484, "learning_rate": 7.267472209261495e-07, "loss": 0.0209, "step": 115990 }, { "epoch": 1.7803698871920806, "grad_norm": 0.24620124697685242, "learning_rate": 7.257450203214722e-07, "loss": 0.0215, "step": 116000 }, { "epoch": 1.7805233673547693, "grad_norm": 0.41872546076774597, "learning_rate": 7.247434852017865e-07, "loss": 0.0256, "step": 116010 }, { "epoch": 1.7806768475174584, "grad_norm": 0.3437638580799103, "learning_rate": 7.237426156389616e-07, "loss": 0.0324, "step": 116020 }, { "epoch": 1.7808303276801474, "grad_norm": 0.30240872502326965, "learning_rate": 7.227424117048143e-07, "loss": 0.0204, "step": 116030 }, { "epoch": 1.7809838078428362, "grad_norm": 0.31341591477394104, "learning_rate": 7.217428734711152e-07, "loss": 0.0246, "step": 116040 }, { "epoch": 1.7811372880055254, "grad_norm": 0.31168892979621887, "learning_rate": 7.207440010095879e-07, "loss": 0.0249, "step": 116050 }, { "epoch": 1.7812907681682142, "grad_norm": 0.4459346830844879, "learning_rate": 7.197457943919073e-07, "loss": 0.0259, "step": 116060 }, { "epoch": 1.7814442483309032, "grad_norm": 0.3995806574821472, "learning_rate": 7.187482536897006e-07, "loss": 0.0264, "step": 116070 }, { "epoch": 1.7815977284935922, "grad_norm": 0.22922980785369873, "learning_rate": 7.177513789745471e-07, "loss": 0.0216, "step": 116080 }, { "epoch": 1.7817512086562812, "grad_norm": 0.29139894247055054, "learning_rate": 7.167551703179821e-07, "loss": 0.0187, "step": 116090 }, { "epoch": 1.7819046888189702, "grad_norm": 0.38162028789520264, "learning_rate": 7.157596277914858e-07, "loss": 0.0227, "step": 116100 }, { "epoch": 1.782058168981659, "grad_norm": 0.3456236720085144, "learning_rate": 7.147647514664935e-07, "loss": 0.0289, "step": 116110 }, { "epoch": 1.782211649144348, "grad_norm": 0.4055733382701874, "learning_rate": 7.137705414143981e-07, "loss": 0.0261, "step": 116120 }, { "epoch": 1.782365129307037, "grad_norm": 0.34999358654022217, "learning_rate": 7.127769977065402e-07, "loss": 0.0249, "step": 116130 }, { "epoch": 1.782518609469726, "grad_norm": 0.444804310798645, "learning_rate": 7.117841204142073e-07, "loss": 0.0245, "step": 116140 }, { "epoch": 1.782672089632415, "grad_norm": 0.42882081866264343, "learning_rate": 7.107919096086491e-07, "loss": 0.0271, "step": 116150 }, { "epoch": 1.7828255697951039, "grad_norm": 0.5311205983161926, "learning_rate": 7.098003653610619e-07, "loss": 0.0187, "step": 116160 }, { "epoch": 1.782979049957793, "grad_norm": 0.45999571681022644, "learning_rate": 7.088094877425944e-07, "loss": 0.0242, "step": 116170 }, { "epoch": 1.783132530120482, "grad_norm": 0.4214900732040405, "learning_rate": 7.078192768243486e-07, "loss": 0.0246, "step": 116180 }, { "epoch": 1.783286010283171, "grad_norm": 0.34560537338256836, "learning_rate": 7.068297326773787e-07, "loss": 0.0218, "step": 116190 }, { "epoch": 1.78343949044586, "grad_norm": 0.39082297682762146, "learning_rate": 7.058408553726881e-07, "loss": 0.0222, "step": 116200 }, { "epoch": 1.7835929706085487, "grad_norm": 0.41497135162353516, "learning_rate": 7.048526449812399e-07, "loss": 0.0241, "step": 116210 }, { "epoch": 1.783746450771238, "grad_norm": 0.38481518626213074, "learning_rate": 7.038651015739396e-07, "loss": 0.0243, "step": 116220 }, { "epoch": 1.7838999309339267, "grad_norm": 0.3413398861885071, "learning_rate": 7.028782252216514e-07, "loss": 0.0216, "step": 116230 }, { "epoch": 1.7840534110966157, "grad_norm": 0.26938897371292114, "learning_rate": 7.018920159951903e-07, "loss": 0.0255, "step": 116240 }, { "epoch": 1.7842068912593048, "grad_norm": 0.30332356691360474, "learning_rate": 7.009064739653226e-07, "loss": 0.0192, "step": 116250 }, { "epoch": 1.7843603714219936, "grad_norm": 0.40577223896980286, "learning_rate": 6.999215992027652e-07, "loss": 0.0237, "step": 116260 }, { "epoch": 1.7845138515846828, "grad_norm": 0.27159759402275085, "learning_rate": 6.989373917781894e-07, "loss": 0.024, "step": 116270 }, { "epoch": 1.7846673317473716, "grad_norm": 0.3098919093608856, "learning_rate": 6.97953851762222e-07, "loss": 0.0261, "step": 116280 }, { "epoch": 1.7848208119100606, "grad_norm": 0.46160274744033813, "learning_rate": 6.969709792254342e-07, "loss": 0.0235, "step": 116290 }, { "epoch": 1.7849742920727496, "grad_norm": 0.3372844457626343, "learning_rate": 6.95988774238352e-07, "loss": 0.0275, "step": 116300 }, { "epoch": 1.7851277722354386, "grad_norm": 0.3105148375034332, "learning_rate": 6.950072368714588e-07, "loss": 0.0203, "step": 116310 }, { "epoch": 1.7852812523981276, "grad_norm": 0.44044625759124756, "learning_rate": 6.94026367195183e-07, "loss": 0.0274, "step": 116320 }, { "epoch": 1.7854347325608164, "grad_norm": 0.5042412877082825, "learning_rate": 6.930461652799103e-07, "loss": 0.0284, "step": 116330 }, { "epoch": 1.7855882127235057, "grad_norm": 0.3051230013370514, "learning_rate": 6.920666311959745e-07, "loss": 0.0235, "step": 116340 }, { "epoch": 1.7857416928861944, "grad_norm": 0.4708715081214905, "learning_rate": 6.910877650136627e-07, "loss": 0.0236, "step": 116350 }, { "epoch": 1.7858951730488835, "grad_norm": 0.23533643782138824, "learning_rate": 6.901095668032165e-07, "loss": 0.0258, "step": 116360 }, { "epoch": 1.7860486532115725, "grad_norm": 0.3911712169647217, "learning_rate": 6.891320366348241e-07, "loss": 0.0225, "step": 116370 }, { "epoch": 1.7862021333742613, "grad_norm": 0.36305826902389526, "learning_rate": 6.881551745786363e-07, "loss": 0.0291, "step": 116380 }, { "epoch": 1.7863556135369505, "grad_norm": 0.3881266117095947, "learning_rate": 6.871789807047425e-07, "loss": 0.0235, "step": 116390 }, { "epoch": 1.7865090936996393, "grad_norm": 0.45624658465385437, "learning_rate": 6.862034550831908e-07, "loss": 0.0231, "step": 116400 }, { "epoch": 1.7866625738623283, "grad_norm": 0.6258121728897095, "learning_rate": 6.852285977839846e-07, "loss": 0.0243, "step": 116410 }, { "epoch": 1.7868160540250173, "grad_norm": 0.2439531534910202, "learning_rate": 6.842544088770753e-07, "loss": 0.0242, "step": 116420 }, { "epoch": 1.786969534187706, "grad_norm": 0.48185986280441284, "learning_rate": 6.832808884323649e-07, "loss": 0.0285, "step": 116430 }, { "epoch": 1.7871230143503953, "grad_norm": 0.2866286635398865, "learning_rate": 6.82308036519711e-07, "loss": 0.0189, "step": 116440 }, { "epoch": 1.7872764945130841, "grad_norm": 0.3739016354084015, "learning_rate": 6.81335853208922e-07, "loss": 0.0298, "step": 116450 }, { "epoch": 1.7874299746757731, "grad_norm": 0.31843802332878113, "learning_rate": 6.803643385697555e-07, "loss": 0.0218, "step": 116460 }, { "epoch": 1.7875834548384621, "grad_norm": 0.31005018949508667, "learning_rate": 6.79393492671927e-07, "loss": 0.0276, "step": 116470 }, { "epoch": 1.787736935001151, "grad_norm": 0.21666811406612396, "learning_rate": 6.784233155851005e-07, "loss": 0.0237, "step": 116480 }, { "epoch": 1.7878904151638402, "grad_norm": 0.32350197434425354, "learning_rate": 6.774538073788883e-07, "loss": 0.0231, "step": 116490 }, { "epoch": 1.788043895326529, "grad_norm": 0.3507130444049835, "learning_rate": 6.764849681228624e-07, "loss": 0.0231, "step": 116500 }, { "epoch": 1.788197375489218, "grad_norm": 0.3766390085220337, "learning_rate": 6.755167978865429e-07, "loss": 0.0246, "step": 116510 }, { "epoch": 1.788350855651907, "grad_norm": 0.3500533401966095, "learning_rate": 6.745492967394007e-07, "loss": 0.0274, "step": 116520 }, { "epoch": 1.788504335814596, "grad_norm": 0.3377363681793213, "learning_rate": 6.735824647508594e-07, "loss": 0.0225, "step": 116530 }, { "epoch": 1.788657815977285, "grad_norm": 0.3620826303958893, "learning_rate": 6.726163019902965e-07, "loss": 0.0291, "step": 116540 }, { "epoch": 1.7888112961399738, "grad_norm": 0.33768507838249207, "learning_rate": 6.71650808527039e-07, "loss": 0.0278, "step": 116550 }, { "epoch": 1.788964776302663, "grad_norm": 0.3884657919406891, "learning_rate": 6.70685984430367e-07, "loss": 0.0229, "step": 116560 }, { "epoch": 1.7891182564653518, "grad_norm": 0.3003748953342438, "learning_rate": 6.697218297695141e-07, "loss": 0.0249, "step": 116570 }, { "epoch": 1.7892717366280408, "grad_norm": 0.3357473909854889, "learning_rate": 6.687583446136647e-07, "loss": 0.0309, "step": 116580 }, { "epoch": 1.7894252167907299, "grad_norm": 0.32162725925445557, "learning_rate": 6.677955290319515e-07, "loss": 0.0212, "step": 116590 }, { "epoch": 1.7895786969534186, "grad_norm": 0.4190404713153839, "learning_rate": 6.668333830934648e-07, "loss": 0.0274, "step": 116600 }, { "epoch": 1.7897321771161079, "grad_norm": 0.33958959579467773, "learning_rate": 6.658719068672447e-07, "loss": 0.021, "step": 116610 }, { "epoch": 1.7898856572787967, "grad_norm": 0.20874488353729248, "learning_rate": 6.64911100422283e-07, "loss": 0.0269, "step": 116620 }, { "epoch": 1.7900391374414857, "grad_norm": 0.5569067001342773, "learning_rate": 6.639509638275232e-07, "loss": 0.0269, "step": 116630 }, { "epoch": 1.7901926176041747, "grad_norm": 0.2776302397251129, "learning_rate": 6.629914971518603e-07, "loss": 0.0217, "step": 116640 }, { "epoch": 1.7903460977668635, "grad_norm": 0.30283620953559875, "learning_rate": 6.620327004641425e-07, "loss": 0.0258, "step": 116650 }, { "epoch": 1.7904995779295527, "grad_norm": 0.32046040892601013, "learning_rate": 6.610745738331681e-07, "loss": 0.0184, "step": 116660 }, { "epoch": 1.7906530580922415, "grad_norm": 0.3652600944042206, "learning_rate": 6.60117117327691e-07, "loss": 0.0271, "step": 116670 }, { "epoch": 1.7908065382549305, "grad_norm": 0.3899650573730469, "learning_rate": 6.591603310164151e-07, "loss": 0.0254, "step": 116680 }, { "epoch": 1.7909600184176195, "grad_norm": 0.14104829728603363, "learning_rate": 6.582042149679912e-07, "loss": 0.0213, "step": 116690 }, { "epoch": 1.7911134985803085, "grad_norm": 0.28711721301078796, "learning_rate": 6.572487692510309e-07, "loss": 0.0278, "step": 116700 }, { "epoch": 1.7912669787429976, "grad_norm": 0.536133348941803, "learning_rate": 6.562939939340929e-07, "loss": 0.0243, "step": 116710 }, { "epoch": 1.7914204589056864, "grad_norm": 0.2749120593070984, "learning_rate": 6.553398890856843e-07, "loss": 0.0227, "step": 116720 }, { "epoch": 1.7915739390683754, "grad_norm": 0.40134379267692566, "learning_rate": 6.54386454774274e-07, "loss": 0.0375, "step": 116730 }, { "epoch": 1.7917274192310644, "grad_norm": 0.30185848474502563, "learning_rate": 6.534336910682725e-07, "loss": 0.0248, "step": 116740 }, { "epoch": 1.7918808993937534, "grad_norm": 0.3890056312084198, "learning_rate": 6.524815980360466e-07, "loss": 0.0222, "step": 116750 }, { "epoch": 1.7920343795564424, "grad_norm": 0.5917120575904846, "learning_rate": 6.515301757459169e-07, "loss": 0.0266, "step": 116760 }, { "epoch": 1.7921878597191312, "grad_norm": 0.31503763794898987, "learning_rate": 6.505794242661523e-07, "loss": 0.0298, "step": 116770 }, { "epoch": 1.7923413398818204, "grad_norm": 0.5723220705986023, "learning_rate": 6.49629343664977e-07, "loss": 0.0336, "step": 116780 }, { "epoch": 1.7924948200445092, "grad_norm": 0.2198256105184555, "learning_rate": 6.486799340105621e-07, "loss": 0.0195, "step": 116790 }, { "epoch": 1.7926483002071982, "grad_norm": 0.22646351158618927, "learning_rate": 6.477311953710364e-07, "loss": 0.0227, "step": 116800 }, { "epoch": 1.7928017803698872, "grad_norm": 0.2653084695339203, "learning_rate": 6.467831278144765e-07, "loss": 0.0211, "step": 116810 }, { "epoch": 1.792955260532576, "grad_norm": 0.428623229265213, "learning_rate": 6.458357314089103e-07, "loss": 0.0261, "step": 116820 }, { "epoch": 1.7931087406952653, "grad_norm": 0.6278844475746155, "learning_rate": 6.448890062223234e-07, "loss": 0.0327, "step": 116830 }, { "epoch": 1.793262220857954, "grad_norm": 0.37069472670555115, "learning_rate": 6.439429523226471e-07, "loss": 0.0268, "step": 116840 }, { "epoch": 1.793415701020643, "grad_norm": 0.5077194571495056, "learning_rate": 6.429975697777635e-07, "loss": 0.0311, "step": 116850 }, { "epoch": 1.793569181183332, "grad_norm": 0.37822967767715454, "learning_rate": 6.420528586555153e-07, "loss": 0.0251, "step": 116860 }, { "epoch": 1.7937226613460209, "grad_norm": 0.41666412353515625, "learning_rate": 6.41108819023688e-07, "loss": 0.0233, "step": 116870 }, { "epoch": 1.79387614150871, "grad_norm": 0.2802579402923584, "learning_rate": 6.401654509500232e-07, "loss": 0.0268, "step": 116880 }, { "epoch": 1.794029621671399, "grad_norm": 0.32334959506988525, "learning_rate": 6.392227545022134e-07, "loss": 0.0257, "step": 116890 }, { "epoch": 1.794183101834088, "grad_norm": 0.7745110392570496, "learning_rate": 6.382807297479021e-07, "loss": 0.024, "step": 116900 }, { "epoch": 1.794336581996777, "grad_norm": 0.5011505484580994, "learning_rate": 6.373393767546865e-07, "loss": 0.0262, "step": 116910 }, { "epoch": 1.794490062159466, "grad_norm": 0.3421163856983185, "learning_rate": 6.363986955901113e-07, "loss": 0.0294, "step": 116920 }, { "epoch": 1.794643542322155, "grad_norm": 0.3371242880821228, "learning_rate": 6.354586863216827e-07, "loss": 0.0237, "step": 116930 }, { "epoch": 1.7947970224848437, "grad_norm": 0.4661252200603485, "learning_rate": 6.345193490168467e-07, "loss": 0.0325, "step": 116940 }, { "epoch": 1.7949505026475328, "grad_norm": 0.37945815920829773, "learning_rate": 6.335806837430058e-07, "loss": 0.0251, "step": 116950 }, { "epoch": 1.7951039828102218, "grad_norm": 0.47109487652778625, "learning_rate": 6.326426905675187e-07, "loss": 0.0237, "step": 116960 }, { "epoch": 1.7952574629729108, "grad_norm": 0.3205859661102295, "learning_rate": 6.317053695576914e-07, "loss": 0.023, "step": 116970 }, { "epoch": 1.7954109431355998, "grad_norm": 0.40633872151374817, "learning_rate": 6.307687207807811e-07, "loss": 0.0344, "step": 116980 }, { "epoch": 1.7955644232982886, "grad_norm": 0.392251580953598, "learning_rate": 6.298327443039998e-07, "loss": 0.0237, "step": 116990 }, { "epoch": 1.7957179034609778, "grad_norm": 0.2746320068836212, "learning_rate": 6.28897440194508e-07, "loss": 0.02, "step": 117000 }, { "epoch": 1.7958713836236666, "grad_norm": 0.3382672667503357, "learning_rate": 6.27962808519419e-07, "loss": 0.0227, "step": 117010 }, { "epoch": 1.7960248637863556, "grad_norm": 0.3526454567909241, "learning_rate": 6.27028849345801e-07, "loss": 0.0235, "step": 117020 }, { "epoch": 1.7961783439490446, "grad_norm": 0.2675512731075287, "learning_rate": 6.260955627406706e-07, "loss": 0.0262, "step": 117030 }, { "epoch": 1.7963318241117334, "grad_norm": 0.3226448893547058, "learning_rate": 6.251629487709943e-07, "loss": 0.0204, "step": 117040 }, { "epoch": 1.7964853042744227, "grad_norm": 0.29966798424720764, "learning_rate": 6.242310075036961e-07, "loss": 0.0283, "step": 117050 }, { "epoch": 1.7966387844371114, "grad_norm": 0.2206328809261322, "learning_rate": 6.232997390056472e-07, "loss": 0.0234, "step": 117060 }, { "epoch": 1.7967922645998005, "grad_norm": 0.5049750804901123, "learning_rate": 6.223691433436707e-07, "loss": 0.0277, "step": 117070 }, { "epoch": 1.7969457447624895, "grad_norm": 0.4301566481590271, "learning_rate": 6.214392205845454e-07, "loss": 0.0323, "step": 117080 }, { "epoch": 1.7970992249251783, "grad_norm": 0.2575201094150543, "learning_rate": 6.205099707949958e-07, "loss": 0.0224, "step": 117090 }, { "epoch": 1.7972527050878675, "grad_norm": 0.2121102213859558, "learning_rate": 6.195813940417039e-07, "loss": 0.0228, "step": 117100 }, { "epoch": 1.7974061852505563, "grad_norm": 0.4191237688064575, "learning_rate": 6.186534903912977e-07, "loss": 0.0224, "step": 117110 }, { "epoch": 1.7975596654132453, "grad_norm": 0.5459864139556885, "learning_rate": 6.177262599103639e-07, "loss": 0.0247, "step": 117120 }, { "epoch": 1.7977131455759343, "grad_norm": 0.3417057991027832, "learning_rate": 6.16799702665436e-07, "loss": 0.021, "step": 117130 }, { "epoch": 1.7978666257386233, "grad_norm": 0.287946879863739, "learning_rate": 6.158738187229974e-07, "loss": 0.0218, "step": 117140 }, { "epoch": 1.7980201059013123, "grad_norm": 0.37693771719932556, "learning_rate": 6.149486081494893e-07, "loss": 0.0173, "step": 117150 }, { "epoch": 1.7981735860640011, "grad_norm": 0.3485463857650757, "learning_rate": 6.140240710112988e-07, "loss": 0.0274, "step": 117160 }, { "epoch": 1.7983270662266904, "grad_norm": 0.3006444573402405, "learning_rate": 6.131002073747694e-07, "loss": 0.0301, "step": 117170 }, { "epoch": 1.7984805463893792, "grad_norm": 0.47070249915122986, "learning_rate": 6.121770173061925e-07, "loss": 0.0292, "step": 117180 }, { "epoch": 1.7986340265520682, "grad_norm": 0.41195544600486755, "learning_rate": 6.112545008718129e-07, "loss": 0.0201, "step": 117190 }, { "epoch": 1.7987875067147572, "grad_norm": 0.4552505314350128, "learning_rate": 6.103326581378267e-07, "loss": 0.031, "step": 117200 }, { "epoch": 1.798940986877446, "grad_norm": 0.37669745087623596, "learning_rate": 6.094114891703806e-07, "loss": 0.0209, "step": 117210 }, { "epoch": 1.7990944670401352, "grad_norm": 0.4021371603012085, "learning_rate": 6.084909940355776e-07, "loss": 0.0242, "step": 117220 }, { "epoch": 1.799247947202824, "grad_norm": 0.2833136320114136, "learning_rate": 6.075711727994682e-07, "loss": 0.0217, "step": 117230 }, { "epoch": 1.799401427365513, "grad_norm": 0.5213570594787598, "learning_rate": 6.066520255280506e-07, "loss": 0.0291, "step": 117240 }, { "epoch": 1.799554907528202, "grad_norm": 0.2519209384918213, "learning_rate": 6.057335522872853e-07, "loss": 0.0191, "step": 117250 }, { "epoch": 1.7997083876908908, "grad_norm": 0.3160223364830017, "learning_rate": 6.048157531430743e-07, "loss": 0.0195, "step": 117260 }, { "epoch": 1.79986186785358, "grad_norm": 0.40435120463371277, "learning_rate": 6.038986281612768e-07, "loss": 0.0241, "step": 117270 }, { "epoch": 1.8000153480162688, "grad_norm": 0.41953539848327637, "learning_rate": 6.029821774077038e-07, "loss": 0.0287, "step": 117280 }, { "epoch": 1.8001688281789578, "grad_norm": 0.4584865868091583, "learning_rate": 6.020664009481148e-07, "loss": 0.0222, "step": 117290 }, { "epoch": 1.8003223083416469, "grad_norm": 0.4224565029144287, "learning_rate": 6.011512988482193e-07, "loss": 0.0296, "step": 117300 }, { "epoch": 1.8004757885043357, "grad_norm": 0.2503001391887665, "learning_rate": 6.002368711736872e-07, "loss": 0.0151, "step": 117310 }, { "epoch": 1.8006292686670249, "grad_norm": 0.30735287070274353, "learning_rate": 5.993231179901304e-07, "loss": 0.0289, "step": 117320 }, { "epoch": 1.8007827488297137, "grad_norm": 0.30608516931533813, "learning_rate": 5.984100393631187e-07, "loss": 0.0297, "step": 117330 }, { "epoch": 1.8009362289924027, "grad_norm": 0.4487093687057495, "learning_rate": 5.974976353581697e-07, "loss": 0.0246, "step": 117340 }, { "epoch": 1.8010897091550917, "grad_norm": 0.343465119600296, "learning_rate": 5.965859060407542e-07, "loss": 0.0234, "step": 117350 }, { "epoch": 1.8012431893177807, "grad_norm": 0.3922221064567566, "learning_rate": 5.956748514762945e-07, "loss": 0.0267, "step": 117360 }, { "epoch": 1.8013966694804697, "grad_norm": 0.3811892867088318, "learning_rate": 5.947644717301638e-07, "loss": 0.0255, "step": 117370 }, { "epoch": 1.8015501496431585, "grad_norm": 0.4572645425796509, "learning_rate": 5.938547668676897e-07, "loss": 0.0267, "step": 117380 }, { "epoch": 1.8017036298058478, "grad_norm": 0.23656481504440308, "learning_rate": 5.929457369541469e-07, "loss": 0.0241, "step": 117390 }, { "epoch": 1.8018571099685365, "grad_norm": 0.3142581582069397, "learning_rate": 5.92037382054762e-07, "loss": 0.0281, "step": 117400 }, { "epoch": 1.8020105901312256, "grad_norm": 0.44456443190574646, "learning_rate": 5.911297022347195e-07, "loss": 0.0225, "step": 117410 }, { "epoch": 1.8021640702939146, "grad_norm": 0.3464118540287018, "learning_rate": 5.902226975591497e-07, "loss": 0.0251, "step": 117420 }, { "epoch": 1.8023175504566034, "grad_norm": 0.48641157150268555, "learning_rate": 5.893163680931347e-07, "loss": 0.0288, "step": 117430 }, { "epoch": 1.8024710306192926, "grad_norm": 0.5298342704772949, "learning_rate": 5.884107139017104e-07, "loss": 0.0252, "step": 117440 }, { "epoch": 1.8026245107819814, "grad_norm": 0.38823992013931274, "learning_rate": 5.875057350498614e-07, "loss": 0.0267, "step": 117450 }, { "epoch": 1.8027779909446704, "grad_norm": 0.24081355333328247, "learning_rate": 5.866014316025271e-07, "loss": 0.0237, "step": 117460 }, { "epoch": 1.8029314711073594, "grad_norm": 0.4877554476261139, "learning_rate": 5.856978036245942e-07, "loss": 0.0269, "step": 117470 }, { "epoch": 1.8030849512700482, "grad_norm": 0.382808655500412, "learning_rate": 5.847948511809099e-07, "loss": 0.0237, "step": 117480 }, { "epoch": 1.8032384314327374, "grad_norm": 0.35287630558013916, "learning_rate": 5.838925743362589e-07, "loss": 0.023, "step": 117490 }, { "epoch": 1.8033919115954262, "grad_norm": 0.35041362047195435, "learning_rate": 5.829909731553884e-07, "loss": 0.0242, "step": 117500 }, { "epoch": 1.8035453917581152, "grad_norm": 0.3473363518714905, "learning_rate": 5.820900477029956e-07, "loss": 0.0299, "step": 117510 }, { "epoch": 1.8036988719208042, "grad_norm": 0.3048231601715088, "learning_rate": 5.811897980437253e-07, "loss": 0.0253, "step": 117520 }, { "epoch": 1.8038523520834933, "grad_norm": 0.36218252778053284, "learning_rate": 5.802902242421759e-07, "loss": 0.0247, "step": 117530 }, { "epoch": 1.8040058322461823, "grad_norm": 0.42716601490974426, "learning_rate": 5.793913263628992e-07, "loss": 0.0284, "step": 117540 }, { "epoch": 1.804159312408871, "grad_norm": 0.2841051518917084, "learning_rate": 5.784931044703956e-07, "loss": 0.0236, "step": 117550 }, { "epoch": 1.80431279257156, "grad_norm": 0.4259047508239746, "learning_rate": 5.775955586291148e-07, "loss": 0.022, "step": 117560 }, { "epoch": 1.804466272734249, "grad_norm": 0.25461193919181824, "learning_rate": 5.766986889034676e-07, "loss": 0.0224, "step": 117570 }, { "epoch": 1.804619752896938, "grad_norm": 0.4464721977710724, "learning_rate": 5.758024953578079e-07, "loss": 0.0241, "step": 117580 }, { "epoch": 1.8047732330596271, "grad_norm": 0.4631315767765045, "learning_rate": 5.749069780564376e-07, "loss": 0.0251, "step": 117590 }, { "epoch": 1.804926713222316, "grad_norm": 0.3775266408920288, "learning_rate": 5.740121370636231e-07, "loss": 0.0302, "step": 117600 }, { "epoch": 1.8050801933850051, "grad_norm": 0.5728793740272522, "learning_rate": 5.73117972443571e-07, "loss": 0.021, "step": 117610 }, { "epoch": 1.805233673547694, "grad_norm": 0.2624339461326599, "learning_rate": 5.722244842604429e-07, "loss": 0.0227, "step": 117620 }, { "epoch": 1.805387153710383, "grad_norm": 0.35625603795051575, "learning_rate": 5.713316725783535e-07, "loss": 0.0215, "step": 117630 }, { "epoch": 1.805540633873072, "grad_norm": 0.3353302478790283, "learning_rate": 5.704395374613669e-07, "loss": 0.0199, "step": 117640 }, { "epoch": 1.8056941140357607, "grad_norm": 0.5261114239692688, "learning_rate": 5.695480789734998e-07, "loss": 0.0289, "step": 117650 }, { "epoch": 1.80584759419845, "grad_norm": 0.4598756730556488, "learning_rate": 5.686572971787174e-07, "loss": 0.0231, "step": 117660 }, { "epoch": 1.8060010743611388, "grad_norm": 0.3070235252380371, "learning_rate": 5.677671921409422e-07, "loss": 0.0255, "step": 117670 }, { "epoch": 1.8061545545238278, "grad_norm": 0.44792723655700684, "learning_rate": 5.668777639240452e-07, "loss": 0.0268, "step": 117680 }, { "epoch": 1.8063080346865168, "grad_norm": 0.4233737885951996, "learning_rate": 5.659890125918422e-07, "loss": 0.0261, "step": 117690 }, { "epoch": 1.8064615148492056, "grad_norm": 0.44895729422569275, "learning_rate": 5.651009382081141e-07, "loss": 0.0289, "step": 117700 }, { "epoch": 1.8066149950118948, "grad_norm": 0.3793582618236542, "learning_rate": 5.642135408365812e-07, "loss": 0.0285, "step": 117710 }, { "epoch": 1.8067684751745836, "grad_norm": 0.27706649899482727, "learning_rate": 5.633268205409214e-07, "loss": 0.0287, "step": 117720 }, { "epoch": 1.8069219553372726, "grad_norm": 0.2966340482234955, "learning_rate": 5.624407773847618e-07, "loss": 0.0221, "step": 117730 }, { "epoch": 1.8070754354999616, "grad_norm": 0.3787805736064911, "learning_rate": 5.615554114316812e-07, "loss": 0.0255, "step": 117740 }, { "epoch": 1.8072289156626506, "grad_norm": 0.4746447801589966, "learning_rate": 5.606707227452113e-07, "loss": 0.0263, "step": 117750 }, { "epoch": 1.8073823958253397, "grad_norm": 0.27094191312789917, "learning_rate": 5.597867113888311e-07, "loss": 0.0227, "step": 117760 }, { "epoch": 1.8075358759880285, "grad_norm": 0.41655078530311584, "learning_rate": 5.589033774259778e-07, "loss": 0.0225, "step": 117770 }, { "epoch": 1.8076893561507177, "grad_norm": 0.2987808287143707, "learning_rate": 5.580207209200362e-07, "loss": 0.0259, "step": 117780 }, { "epoch": 1.8078428363134065, "grad_norm": 0.35914650559425354, "learning_rate": 5.571387419343355e-07, "loss": 0.0235, "step": 117790 }, { "epoch": 1.8079963164760955, "grad_norm": 0.2667323350906372, "learning_rate": 5.562574405321707e-07, "loss": 0.0175, "step": 117800 }, { "epoch": 1.8081497966387845, "grad_norm": 0.41515052318573, "learning_rate": 5.553768167767781e-07, "loss": 0.0242, "step": 117810 }, { "epoch": 1.8083032768014733, "grad_norm": 0.21057675778865814, "learning_rate": 5.544968707313458e-07, "loss": 0.0241, "step": 117820 }, { "epoch": 1.8084567569641625, "grad_norm": 0.48217713832855225, "learning_rate": 5.536176024590201e-07, "loss": 0.0198, "step": 117830 }, { "epoch": 1.8086102371268513, "grad_norm": 0.2972068190574646, "learning_rate": 5.527390120228892e-07, "loss": 0.0229, "step": 117840 }, { "epoch": 1.8087637172895403, "grad_norm": 0.3149869740009308, "learning_rate": 5.518610994859974e-07, "loss": 0.0202, "step": 117850 }, { "epoch": 1.8089171974522293, "grad_norm": 0.30047807097435, "learning_rate": 5.509838649113442e-07, "loss": 0.028, "step": 117860 }, { "epoch": 1.8090706776149181, "grad_norm": 0.2855299115180969, "learning_rate": 5.501073083618747e-07, "loss": 0.0253, "step": 117870 }, { "epoch": 1.8092241577776074, "grad_norm": 0.45614662766456604, "learning_rate": 5.492314299004876e-07, "loss": 0.0236, "step": 117880 }, { "epoch": 1.8093776379402962, "grad_norm": 0.4565292298793793, "learning_rate": 5.483562295900302e-07, "loss": 0.0204, "step": 117890 }, { "epoch": 1.8095311181029852, "grad_norm": 0.3696654438972473, "learning_rate": 5.47481707493307e-07, "loss": 0.0181, "step": 117900 }, { "epoch": 1.8096845982656742, "grad_norm": 0.3226172924041748, "learning_rate": 5.466078636730677e-07, "loss": 0.0239, "step": 117910 }, { "epoch": 1.809838078428363, "grad_norm": 0.2744421362876892, "learning_rate": 5.457346981920164e-07, "loss": 0.022, "step": 117920 }, { "epoch": 1.8099915585910522, "grad_norm": 0.37079915404319763, "learning_rate": 5.44862211112811e-07, "loss": 0.027, "step": 117930 }, { "epoch": 1.810145038753741, "grad_norm": 0.38897305727005005, "learning_rate": 5.439904024980547e-07, "loss": 0.0241, "step": 117940 }, { "epoch": 1.81029851891643, "grad_norm": 0.28415629267692566, "learning_rate": 5.43119272410304e-07, "loss": 0.0219, "step": 117950 }, { "epoch": 1.810451999079119, "grad_norm": 0.372711181640625, "learning_rate": 5.422488209120713e-07, "loss": 0.0279, "step": 117960 }, { "epoch": 1.810605479241808, "grad_norm": 0.40265440940856934, "learning_rate": 5.413790480658154e-07, "loss": 0.0263, "step": 117970 }, { "epoch": 1.810758959404497, "grad_norm": 0.40751153230667114, "learning_rate": 5.405099539339486e-07, "loss": 0.0254, "step": 117980 }, { "epoch": 1.8109124395671858, "grad_norm": 0.39667192101478577, "learning_rate": 5.396415385788322e-07, "loss": 0.0309, "step": 117990 }, { "epoch": 1.811065919729875, "grad_norm": 0.3599092662334442, "learning_rate": 5.387738020627809e-07, "loss": 0.0201, "step": 118000 }, { "epoch": 1.8112193998925639, "grad_norm": 0.32569989562034607, "learning_rate": 5.379067444480612e-07, "loss": 0.0244, "step": 118010 }, { "epoch": 1.8113728800552529, "grad_norm": 0.32617026567459106, "learning_rate": 5.370403657968882e-07, "loss": 0.0235, "step": 118020 }, { "epoch": 1.8115263602179419, "grad_norm": 0.3101808428764343, "learning_rate": 5.361746661714318e-07, "loss": 0.0307, "step": 118030 }, { "epoch": 1.8116798403806307, "grad_norm": 0.4433128833770752, "learning_rate": 5.353096456338102e-07, "loss": 0.0198, "step": 118040 }, { "epoch": 1.81183332054332, "grad_norm": 0.2721743583679199, "learning_rate": 5.344453042460929e-07, "loss": 0.0197, "step": 118050 }, { "epoch": 1.8119868007060087, "grad_norm": 0.24842022359371185, "learning_rate": 5.335816420703033e-07, "loss": 0.0256, "step": 118060 }, { "epoch": 1.8121402808686977, "grad_norm": 0.40259918570518494, "learning_rate": 5.327186591684141e-07, "loss": 0.0263, "step": 118070 }, { "epoch": 1.8122937610313867, "grad_norm": 0.33110636472702026, "learning_rate": 5.318563556023503e-07, "loss": 0.0212, "step": 118080 }, { "epoch": 1.8124472411940755, "grad_norm": 0.37119606137275696, "learning_rate": 5.309947314339858e-07, "loss": 0.0243, "step": 118090 }, { "epoch": 1.8126007213567648, "grad_norm": 0.43508800864219666, "learning_rate": 5.301337867251488e-07, "loss": 0.0187, "step": 118100 }, { "epoch": 1.8127542015194535, "grad_norm": 0.29836443066596985, "learning_rate": 5.292735215376166e-07, "loss": 0.0229, "step": 118110 }, { "epoch": 1.8129076816821426, "grad_norm": 0.296888142824173, "learning_rate": 5.284139359331186e-07, "loss": 0.0291, "step": 118120 }, { "epoch": 1.8130611618448316, "grad_norm": 0.36229196190834045, "learning_rate": 5.27555029973339e-07, "loss": 0.0254, "step": 118130 }, { "epoch": 1.8132146420075206, "grad_norm": 0.33475935459136963, "learning_rate": 5.266968037199027e-07, "loss": 0.0287, "step": 118140 }, { "epoch": 1.8133681221702096, "grad_norm": 0.21704155206680298, "learning_rate": 5.258392572343973e-07, "loss": 0.0197, "step": 118150 }, { "epoch": 1.8135216023328984, "grad_norm": 0.33527931571006775, "learning_rate": 5.249823905783558e-07, "loss": 0.0327, "step": 118160 }, { "epoch": 1.8136750824955874, "grad_norm": 0.3879428505897522, "learning_rate": 5.241262038132655e-07, "loss": 0.0225, "step": 118170 }, { "epoch": 1.8138285626582764, "grad_norm": 0.25390154123306274, "learning_rate": 5.232706970005608e-07, "loss": 0.0271, "step": 118180 }, { "epoch": 1.8139820428209654, "grad_norm": 0.24942085146903992, "learning_rate": 5.224158702016291e-07, "loss": 0.0218, "step": 118190 }, { "epoch": 1.8141355229836544, "grad_norm": 0.3606094717979431, "learning_rate": 5.215617234778125e-07, "loss": 0.0253, "step": 118200 }, { "epoch": 1.8142890031463432, "grad_norm": 0.2537388801574707, "learning_rate": 5.207082568903976e-07, "loss": 0.0266, "step": 118210 }, { "epoch": 1.8144424833090325, "grad_norm": 0.3921404480934143, "learning_rate": 5.198554705006287e-07, "loss": 0.0292, "step": 118220 }, { "epoch": 1.8145959634717213, "grad_norm": 0.5163798928260803, "learning_rate": 5.19003364369699e-07, "loss": 0.0272, "step": 118230 }, { "epoch": 1.8147494436344103, "grad_norm": 0.42381808161735535, "learning_rate": 5.181519385587486e-07, "loss": 0.0224, "step": 118240 }, { "epoch": 1.8149029237970993, "grad_norm": 0.4392620921134949, "learning_rate": 5.173011931288763e-07, "loss": 0.0249, "step": 118250 }, { "epoch": 1.815056403959788, "grad_norm": 0.3711041212081909, "learning_rate": 5.164511281411278e-07, "loss": 0.0248, "step": 118260 }, { "epoch": 1.8152098841224773, "grad_norm": 0.2575001120567322, "learning_rate": 5.156017436564987e-07, "loss": 0.0291, "step": 118270 }, { "epoch": 1.815363364285166, "grad_norm": 0.3387235403060913, "learning_rate": 5.147530397359379e-07, "loss": 0.0197, "step": 118280 }, { "epoch": 1.815516844447855, "grad_norm": 0.35678911209106445, "learning_rate": 5.13905016440347e-07, "loss": 0.0248, "step": 118290 }, { "epoch": 1.8156703246105441, "grad_norm": 0.3413730561733246, "learning_rate": 5.130576738305748e-07, "loss": 0.0223, "step": 118300 }, { "epoch": 1.815823804773233, "grad_norm": 0.47184911370277405, "learning_rate": 5.122110119674239e-07, "loss": 0.0251, "step": 118310 }, { "epoch": 1.8159772849359221, "grad_norm": 0.3048798739910126, "learning_rate": 5.11365030911648e-07, "loss": 0.0242, "step": 118320 }, { "epoch": 1.816130765098611, "grad_norm": 0.27286219596862793, "learning_rate": 5.105197307239529e-07, "loss": 0.025, "step": 118330 }, { "epoch": 1.8162842452613, "grad_norm": 0.34125277400016785, "learning_rate": 5.096751114649901e-07, "loss": 0.0279, "step": 118340 }, { "epoch": 1.816437725423989, "grad_norm": 0.3572021722793579, "learning_rate": 5.08831173195371e-07, "loss": 0.0323, "step": 118350 }, { "epoch": 1.816591205586678, "grad_norm": 0.45486482977867126, "learning_rate": 5.079879159756495e-07, "loss": 0.0258, "step": 118360 }, { "epoch": 1.816744685749367, "grad_norm": 0.3582109808921814, "learning_rate": 5.07145339866335e-07, "loss": 0.0232, "step": 118370 }, { "epoch": 1.8168981659120558, "grad_norm": 0.4252733886241913, "learning_rate": 5.063034449278914e-07, "loss": 0.0278, "step": 118380 }, { "epoch": 1.8170516460747448, "grad_norm": 0.4770204424858093, "learning_rate": 5.054622312207258e-07, "loss": 0.0241, "step": 118390 }, { "epoch": 1.8172051262374338, "grad_norm": 0.3974839746952057, "learning_rate": 5.046216988052011e-07, "loss": 0.0248, "step": 118400 }, { "epoch": 1.8173586064001228, "grad_norm": 0.3913921117782593, "learning_rate": 5.037818477416312e-07, "loss": 0.0256, "step": 118410 }, { "epoch": 1.8175120865628118, "grad_norm": 0.2749858498573303, "learning_rate": 5.029426780902813e-07, "loss": 0.0262, "step": 118420 }, { "epoch": 1.8176655667255006, "grad_norm": 0.36551594734191895, "learning_rate": 5.021041899113666e-07, "loss": 0.0288, "step": 118430 }, { "epoch": 1.8178190468881898, "grad_norm": 0.3623458743095398, "learning_rate": 5.012663832650532e-07, "loss": 0.0339, "step": 118440 }, { "epoch": 1.8179725270508786, "grad_norm": 0.3285742998123169, "learning_rate": 5.00429258211459e-07, "loss": 0.0272, "step": 118450 }, { "epoch": 1.8181260072135677, "grad_norm": 0.514445424079895, "learning_rate": 4.995928148106544e-07, "loss": 0.0238, "step": 118460 }, { "epoch": 1.8182794873762567, "grad_norm": 0.44278913736343384, "learning_rate": 4.987570531226549e-07, "loss": 0.0233, "step": 118470 }, { "epoch": 1.8184329675389455, "grad_norm": 0.3094031810760498, "learning_rate": 4.979219732074392e-07, "loss": 0.0252, "step": 118480 }, { "epoch": 1.8185864477016347, "grad_norm": 0.5408527255058289, "learning_rate": 4.970875751249226e-07, "loss": 0.0319, "step": 118490 }, { "epoch": 1.8187399278643235, "grad_norm": 0.33554062247276306, "learning_rate": 4.962538589349797e-07, "loss": 0.0201, "step": 118500 }, { "epoch": 1.8188934080270125, "grad_norm": 0.5070085525512695, "learning_rate": 4.954208246974368e-07, "loss": 0.0276, "step": 118510 }, { "epoch": 1.8190468881897015, "grad_norm": 0.5738588571548462, "learning_rate": 4.945884724720684e-07, "loss": 0.0335, "step": 118520 }, { "epoch": 1.8192003683523903, "grad_norm": 0.1777035892009735, "learning_rate": 4.937568023186012e-07, "loss": 0.0204, "step": 118530 }, { "epoch": 1.8193538485150795, "grad_norm": 0.26839739084243774, "learning_rate": 4.92925814296712e-07, "loss": 0.0245, "step": 118540 }, { "epoch": 1.8195073286777683, "grad_norm": 0.4461161494255066, "learning_rate": 4.920955084660295e-07, "loss": 0.0264, "step": 118550 }, { "epoch": 1.8196608088404573, "grad_norm": 0.3643576502799988, "learning_rate": 4.912658848861329e-07, "loss": 0.0262, "step": 118560 }, { "epoch": 1.8198142890031463, "grad_norm": 0.30022937059402466, "learning_rate": 4.904369436165524e-07, "loss": 0.0201, "step": 118570 }, { "epoch": 1.8199677691658354, "grad_norm": 0.44040247797966003, "learning_rate": 4.896086847167725e-07, "loss": 0.0257, "step": 118580 }, { "epoch": 1.8201212493285244, "grad_norm": 0.27969568967819214, "learning_rate": 4.887811082462235e-07, "loss": 0.0183, "step": 118590 }, { "epoch": 1.8202747294912132, "grad_norm": 0.2524052858352661, "learning_rate": 4.879542142642868e-07, "loss": 0.0234, "step": 118600 }, { "epoch": 1.8204282096539024, "grad_norm": 0.4945981204509735, "learning_rate": 4.871280028303027e-07, "loss": 0.0314, "step": 118610 }, { "epoch": 1.8205816898165912, "grad_norm": 0.4364873170852661, "learning_rate": 4.863024740035526e-07, "loss": 0.0214, "step": 118620 }, { "epoch": 1.8207351699792802, "grad_norm": 0.46750637888908386, "learning_rate": 4.854776278432749e-07, "loss": 0.0229, "step": 118630 }, { "epoch": 1.8208886501419692, "grad_norm": 0.35853609442710876, "learning_rate": 4.846534644086576e-07, "loss": 0.0244, "step": 118640 }, { "epoch": 1.821042130304658, "grad_norm": 0.281362920999527, "learning_rate": 4.8382998375884e-07, "loss": 0.0175, "step": 118650 }, { "epoch": 1.8211956104673472, "grad_norm": 0.5068240165710449, "learning_rate": 4.830071859529085e-07, "loss": 0.027, "step": 118660 }, { "epoch": 1.821349090630036, "grad_norm": 0.41100287437438965, "learning_rate": 4.821850710499087e-07, "loss": 0.0339, "step": 118670 }, { "epoch": 1.821502570792725, "grad_norm": 0.3316740393638611, "learning_rate": 4.813636391088306e-07, "loss": 0.0267, "step": 118680 }, { "epoch": 1.821656050955414, "grad_norm": 0.28051719069480896, "learning_rate": 4.805428901886145e-07, "loss": 0.0179, "step": 118690 }, { "epoch": 1.8218095311181028, "grad_norm": 0.4005293548107147, "learning_rate": 4.797228243481578e-07, "loss": 0.0231, "step": 118700 }, { "epoch": 1.821963011280792, "grad_norm": 0.22955743968486786, "learning_rate": 4.789034416463035e-07, "loss": 0.0209, "step": 118710 }, { "epoch": 1.8221164914434809, "grad_norm": 0.30486181378364563, "learning_rate": 4.780847421418466e-07, "loss": 0.0202, "step": 118720 }, { "epoch": 1.8222699716061699, "grad_norm": 0.529467761516571, "learning_rate": 4.77266725893536e-07, "loss": 0.0346, "step": 118730 }, { "epoch": 1.822423451768859, "grad_norm": 0.28279611468315125, "learning_rate": 4.764493929600689e-07, "loss": 0.0244, "step": 118740 }, { "epoch": 1.8225769319315477, "grad_norm": 0.30114424228668213, "learning_rate": 4.756327434000918e-07, "loss": 0.0279, "step": 118750 }, { "epoch": 1.822730412094237, "grad_norm": 0.2833966612815857, "learning_rate": 4.7481677727220565e-07, "loss": 0.0203, "step": 118760 }, { "epoch": 1.8228838922569257, "grad_norm": 0.36370378732681274, "learning_rate": 4.740014946349625e-07, "loss": 0.0196, "step": 118770 }, { "epoch": 1.8230373724196147, "grad_norm": 0.3696259558200836, "learning_rate": 4.7318689554686435e-07, "loss": 0.0247, "step": 118780 }, { "epoch": 1.8231908525823037, "grad_norm": 0.7002537250518799, "learning_rate": 4.72372980066359e-07, "loss": 0.0292, "step": 118790 }, { "epoch": 1.8233443327449927, "grad_norm": 0.4063304662704468, "learning_rate": 4.7155974825185414e-07, "loss": 0.0275, "step": 118800 }, { "epoch": 1.8234978129076818, "grad_norm": 0.17480026185512543, "learning_rate": 4.7074720016170305e-07, "loss": 0.0211, "step": 118810 }, { "epoch": 1.8236512930703705, "grad_norm": 0.4498649537563324, "learning_rate": 4.6993533585421134e-07, "loss": 0.0226, "step": 118820 }, { "epoch": 1.8238047732330598, "grad_norm": 0.39980894327163696, "learning_rate": 4.6912415538763467e-07, "loss": 0.0213, "step": 118830 }, { "epoch": 1.8239582533957486, "grad_norm": 0.3375336527824402, "learning_rate": 4.6831365882018086e-07, "loss": 0.0224, "step": 118840 }, { "epoch": 1.8241117335584376, "grad_norm": 0.35872119665145874, "learning_rate": 4.675038462100068e-07, "loss": 0.0214, "step": 118850 }, { "epoch": 1.8242652137211266, "grad_norm": 0.383771151304245, "learning_rate": 4.6669471761522255e-07, "loss": 0.0203, "step": 118860 }, { "epoch": 1.8244186938838154, "grad_norm": 0.36539578437805176, "learning_rate": 4.6588627309388845e-07, "loss": 0.0268, "step": 118870 }, { "epoch": 1.8245721740465046, "grad_norm": 0.41589006781578064, "learning_rate": 4.650785127040169e-07, "loss": 0.027, "step": 118880 }, { "epoch": 1.8247256542091934, "grad_norm": 0.39989525079727173, "learning_rate": 4.642714365035639e-07, "loss": 0.024, "step": 118890 }, { "epoch": 1.8248791343718824, "grad_norm": 0.6348893046379089, "learning_rate": 4.634650445504485e-07, "loss": 0.0342, "step": 118900 }, { "epoch": 1.8250326145345714, "grad_norm": 0.25407856702804565, "learning_rate": 4.626593369025312e-07, "loss": 0.025, "step": 118910 }, { "epoch": 1.8251860946972602, "grad_norm": 0.44251877069473267, "learning_rate": 4.6185431361762567e-07, "loss": 0.0259, "step": 118920 }, { "epoch": 1.8253395748599495, "grad_norm": 0.3465478718280792, "learning_rate": 4.610499747535013e-07, "loss": 0.02, "step": 118930 }, { "epoch": 1.8254930550226383, "grad_norm": 0.39653459191322327, "learning_rate": 4.6024632036786974e-07, "loss": 0.0249, "step": 118940 }, { "epoch": 1.8256465351853273, "grad_norm": 0.3394966125488281, "learning_rate": 4.594433505184004e-07, "loss": 0.0287, "step": 118950 }, { "epoch": 1.8258000153480163, "grad_norm": 0.3422360122203827, "learning_rate": 4.586410652627116e-07, "loss": 0.0273, "step": 118960 }, { "epoch": 1.8259534955107053, "grad_norm": 0.4105263650417328, "learning_rate": 4.578394646583706e-07, "loss": 0.0278, "step": 118970 }, { "epoch": 1.8261069756733943, "grad_norm": 0.42905572056770325, "learning_rate": 4.5703854876290034e-07, "loss": 0.0181, "step": 118980 }, { "epoch": 1.826260455836083, "grad_norm": 0.4824787378311157, "learning_rate": 4.5623831763376815e-07, "loss": 0.0268, "step": 118990 }, { "epoch": 1.826413935998772, "grad_norm": 0.35227611660957336, "learning_rate": 4.55438771328397e-07, "loss": 0.0203, "step": 119000 }, { "epoch": 1.8265674161614611, "grad_norm": 0.29337772727012634, "learning_rate": 4.5463990990415874e-07, "loss": 0.0251, "step": 119010 }, { "epoch": 1.8267208963241501, "grad_norm": 0.398093044757843, "learning_rate": 4.5384173341837645e-07, "loss": 0.0267, "step": 119020 }, { "epoch": 1.8268743764868391, "grad_norm": 0.5095872282981873, "learning_rate": 4.5304424192832653e-07, "loss": 0.0191, "step": 119030 }, { "epoch": 1.827027856649528, "grad_norm": 0.2929364740848541, "learning_rate": 4.5224743549123205e-07, "loss": 0.0225, "step": 119040 }, { "epoch": 1.8271813368122172, "grad_norm": 0.4873514175415039, "learning_rate": 4.5145131416426625e-07, "loss": 0.0281, "step": 119050 }, { "epoch": 1.827334816974906, "grad_norm": 0.3172917068004608, "learning_rate": 4.5065587800456e-07, "loss": 0.0217, "step": 119060 }, { "epoch": 1.827488297137595, "grad_norm": 0.4059722125530243, "learning_rate": 4.4986112706918993e-07, "loss": 0.0223, "step": 119070 }, { "epoch": 1.827641777300284, "grad_norm": 0.4429406225681305, "learning_rate": 4.4906706141518264e-07, "loss": 0.0235, "step": 119080 }, { "epoch": 1.8277952574629728, "grad_norm": 0.3569411635398865, "learning_rate": 4.482736810995181e-07, "loss": 0.0246, "step": 119090 }, { "epoch": 1.827948737625662, "grad_norm": 0.3537546992301941, "learning_rate": 4.4748098617912625e-07, "loss": 0.0224, "step": 119100 }, { "epoch": 1.8281022177883508, "grad_norm": 0.38955044746398926, "learning_rate": 4.466889767108884e-07, "loss": 0.0237, "step": 119110 }, { "epoch": 1.8282556979510398, "grad_norm": 0.3666055202484131, "learning_rate": 4.458976527516334e-07, "loss": 0.0224, "step": 119120 }, { "epoch": 1.8284091781137288, "grad_norm": 0.5689214468002319, "learning_rate": 4.451070143581493e-07, "loss": 0.0289, "step": 119130 }, { "epoch": 1.8285626582764176, "grad_norm": 0.4554978013038635, "learning_rate": 4.443170615871639e-07, "loss": 0.0209, "step": 119140 }, { "epoch": 1.8287161384391069, "grad_norm": 0.3583739101886749, "learning_rate": 4.4352779449536085e-07, "loss": 0.0304, "step": 119150 }, { "epoch": 1.8288696186017956, "grad_norm": 0.41083580255508423, "learning_rate": 4.427392131393793e-07, "loss": 0.0204, "step": 119160 }, { "epoch": 1.8290230987644847, "grad_norm": 0.2766466438770294, "learning_rate": 4.419513175758028e-07, "loss": 0.0275, "step": 119170 }, { "epoch": 1.8291765789271737, "grad_norm": 0.2603377401828766, "learning_rate": 4.4116410786116726e-07, "loss": 0.0205, "step": 119180 }, { "epoch": 1.8293300590898627, "grad_norm": 0.39691537618637085, "learning_rate": 4.4037758405196087e-07, "loss": 0.022, "step": 119190 }, { "epoch": 1.8294835392525517, "grad_norm": 0.36412256956100464, "learning_rate": 4.395917462046195e-07, "loss": 0.0244, "step": 119200 }, { "epoch": 1.8296370194152405, "grad_norm": 0.4613054692745209, "learning_rate": 4.388065943755326e-07, "loss": 0.023, "step": 119210 }, { "epoch": 1.8297904995779297, "grad_norm": 0.3199596405029297, "learning_rate": 4.3802212862104174e-07, "loss": 0.023, "step": 119220 }, { "epoch": 1.8299439797406185, "grad_norm": 0.3905993402004242, "learning_rate": 4.3723834899743636e-07, "loss": 0.0246, "step": 119230 }, { "epoch": 1.8300974599033075, "grad_norm": 0.24517948925495148, "learning_rate": 4.364552555609547e-07, "loss": 0.0205, "step": 119240 }, { "epoch": 1.8302509400659965, "grad_norm": 0.2171134501695633, "learning_rate": 4.356728483677908e-07, "loss": 0.0226, "step": 119250 }, { "epoch": 1.8304044202286853, "grad_norm": 0.3761599063873291, "learning_rate": 4.348911274740886e-07, "loss": 0.0222, "step": 119260 }, { "epoch": 1.8305579003913746, "grad_norm": 0.3167036771774292, "learning_rate": 4.3411009293593877e-07, "loss": 0.0179, "step": 119270 }, { "epoch": 1.8307113805540633, "grad_norm": 0.3032349646091461, "learning_rate": 4.333297448093865e-07, "loss": 0.0263, "step": 119280 }, { "epoch": 1.8308648607167524, "grad_norm": 0.38791394233703613, "learning_rate": 4.32550083150427e-07, "loss": 0.0239, "step": 119290 }, { "epoch": 1.8310183408794414, "grad_norm": 0.27960485219955444, "learning_rate": 4.3177110801500556e-07, "loss": 0.0192, "step": 119300 }, { "epoch": 1.8311718210421302, "grad_norm": 0.40910181403160095, "learning_rate": 4.309928194590163e-07, "loss": 0.0274, "step": 119310 }, { "epoch": 1.8313253012048194, "grad_norm": 0.45599666237831116, "learning_rate": 4.302152175383101e-07, "loss": 0.0212, "step": 119320 }, { "epoch": 1.8314787813675082, "grad_norm": 0.4233802258968353, "learning_rate": 4.294383023086823e-07, "loss": 0.0252, "step": 119330 }, { "epoch": 1.8316322615301972, "grad_norm": 0.28379902243614197, "learning_rate": 4.2866207382588066e-07, "loss": 0.0322, "step": 119340 }, { "epoch": 1.8317857416928862, "grad_norm": 0.3516146242618561, "learning_rate": 4.2788653214560604e-07, "loss": 0.0249, "step": 119350 }, { "epoch": 1.831939221855575, "grad_norm": 0.22189384698867798, "learning_rate": 4.2711167732350736e-07, "loss": 0.0217, "step": 119360 }, { "epoch": 1.8320927020182642, "grad_norm": 0.3936132788658142, "learning_rate": 4.263375094151867e-07, "loss": 0.0228, "step": 119370 }, { "epoch": 1.832246182180953, "grad_norm": 0.43828096985816956, "learning_rate": 4.2556402847619415e-07, "loss": 0.0235, "step": 119380 }, { "epoch": 1.832399662343642, "grad_norm": 0.3865898847579956, "learning_rate": 4.247912345620309e-07, "loss": 0.0226, "step": 119390 }, { "epoch": 1.832553142506331, "grad_norm": 0.4211479723453522, "learning_rate": 4.2401912772815247e-07, "loss": 0.0205, "step": 119400 }, { "epoch": 1.83270662266902, "grad_norm": 0.370793879032135, "learning_rate": 4.2324770802995795e-07, "loss": 0.024, "step": 119410 }, { "epoch": 1.832860102831709, "grad_norm": 0.15757855772972107, "learning_rate": 4.224769755228064e-07, "loss": 0.02, "step": 119420 }, { "epoch": 1.8330135829943979, "grad_norm": 0.3158162534236908, "learning_rate": 4.2170693026199917e-07, "loss": 0.0234, "step": 119430 }, { "epoch": 1.833167063157087, "grad_norm": 0.31802308559417725, "learning_rate": 4.20937572302792e-07, "loss": 0.02, "step": 119440 }, { "epoch": 1.833320543319776, "grad_norm": 0.2858075499534607, "learning_rate": 4.2016890170039293e-07, "loss": 0.02, "step": 119450 }, { "epoch": 1.833474023482465, "grad_norm": 0.4330871105194092, "learning_rate": 4.194009185099579e-07, "loss": 0.0215, "step": 119460 }, { "epoch": 1.833627503645154, "grad_norm": 0.36218246817588806, "learning_rate": 4.186336227865939e-07, "loss": 0.0295, "step": 119470 }, { "epoch": 1.8337809838078427, "grad_norm": 0.40783777832984924, "learning_rate": 4.1786701458536027e-07, "loss": 0.0242, "step": 119480 }, { "epoch": 1.833934463970532, "grad_norm": 0.3581700921058655, "learning_rate": 4.1710109396126407e-07, "loss": 0.0301, "step": 119490 }, { "epoch": 1.8340879441332207, "grad_norm": 0.4196436107158661, "learning_rate": 4.1633586096926583e-07, "loss": 0.0233, "step": 119500 }, { "epoch": 1.8342414242959098, "grad_norm": 0.31835320591926575, "learning_rate": 4.15571315664276e-07, "loss": 0.0194, "step": 119510 }, { "epoch": 1.8343949044585988, "grad_norm": 0.4534927010536194, "learning_rate": 4.148074581011574e-07, "loss": 0.023, "step": 119520 }, { "epoch": 1.8345483846212876, "grad_norm": 0.3211909532546997, "learning_rate": 4.140442883347162e-07, "loss": 0.0168, "step": 119530 }, { "epoch": 1.8347018647839768, "grad_norm": 0.3931044340133667, "learning_rate": 4.132818064197197e-07, "loss": 0.0266, "step": 119540 }, { "epoch": 1.8348553449466656, "grad_norm": 0.21734298765659332, "learning_rate": 4.1252001241087745e-07, "loss": 0.0288, "step": 119550 }, { "epoch": 1.8350088251093546, "grad_norm": 0.4386036992073059, "learning_rate": 4.1175890636285576e-07, "loss": 0.0258, "step": 119560 }, { "epoch": 1.8351623052720436, "grad_norm": 0.45797261595726013, "learning_rate": 4.1099848833026533e-07, "loss": 0.0255, "step": 119570 }, { "epoch": 1.8353157854347326, "grad_norm": 0.3847275972366333, "learning_rate": 4.102387583676737e-07, "loss": 0.0269, "step": 119580 }, { "epoch": 1.8354692655974216, "grad_norm": 0.4070453643798828, "learning_rate": 4.09479716529595e-07, "loss": 0.0244, "step": 119590 }, { "epoch": 1.8356227457601104, "grad_norm": 0.28762492537498474, "learning_rate": 4.0872136287049336e-07, "loss": 0.0224, "step": 119600 }, { "epoch": 1.8357762259227994, "grad_norm": 0.3704020082950592, "learning_rate": 4.0796369744478867e-07, "loss": 0.024, "step": 119610 }, { "epoch": 1.8359297060854884, "grad_norm": 0.3071637749671936, "learning_rate": 4.072067203068475e-07, "loss": 0.023, "step": 119620 }, { "epoch": 1.8360831862481775, "grad_norm": 0.5092028975486755, "learning_rate": 4.064504315109852e-07, "loss": 0.0211, "step": 119630 }, { "epoch": 1.8362366664108665, "grad_norm": 0.4303053617477417, "learning_rate": 4.056948311114717e-07, "loss": 0.027, "step": 119640 }, { "epoch": 1.8363901465735553, "grad_norm": 0.2314383089542389, "learning_rate": 4.0493991916252693e-07, "loss": 0.0169, "step": 119650 }, { "epoch": 1.8365436267362445, "grad_norm": 0.3635312616825104, "learning_rate": 4.041856957183188e-07, "loss": 0.0263, "step": 119660 }, { "epoch": 1.8366971068989333, "grad_norm": 0.30709606409072876, "learning_rate": 4.0343216083296834e-07, "loss": 0.0249, "step": 119670 }, { "epoch": 1.8368505870616223, "grad_norm": 0.36662396788597107, "learning_rate": 4.0267931456054677e-07, "loss": 0.0274, "step": 119680 }, { "epoch": 1.8370040672243113, "grad_norm": 0.4106817841529846, "learning_rate": 4.0192715695507423e-07, "loss": 0.0259, "step": 119690 }, { "epoch": 1.837157547387, "grad_norm": 0.25971147418022156, "learning_rate": 4.01175688070522e-07, "loss": 0.0271, "step": 119700 }, { "epoch": 1.8373110275496893, "grad_norm": 0.3048384189605713, "learning_rate": 4.004249079608158e-07, "loss": 0.0247, "step": 119710 }, { "epoch": 1.8374645077123781, "grad_norm": 0.4239481985569, "learning_rate": 3.99674816679827e-07, "loss": 0.0345, "step": 119720 }, { "epoch": 1.8376179878750671, "grad_norm": 0.292510062456131, "learning_rate": 3.98925414281377e-07, "loss": 0.0182, "step": 119730 }, { "epoch": 1.8377714680377562, "grad_norm": 0.2851991653442383, "learning_rate": 3.981767008192439e-07, "loss": 0.0193, "step": 119740 }, { "epoch": 1.837924948200445, "grad_norm": 0.33926311135292053, "learning_rate": 3.974286763471502e-07, "loss": 0.0331, "step": 119750 }, { "epoch": 1.8380784283631342, "grad_norm": 0.298989862203598, "learning_rate": 3.966813409187709e-07, "loss": 0.0271, "step": 119760 }, { "epoch": 1.838231908525823, "grad_norm": 0.39380308985710144, "learning_rate": 3.9593469458773626e-07, "loss": 0.0254, "step": 119770 }, { "epoch": 1.838385388688512, "grad_norm": 0.4519099295139313, "learning_rate": 3.951887374076169e-07, "loss": 0.0271, "step": 119780 }, { "epoch": 1.838538868851201, "grad_norm": 0.36824655532836914, "learning_rate": 3.94443469431941e-07, "loss": 0.0234, "step": 119790 }, { "epoch": 1.83869234901389, "grad_norm": 0.32614049315452576, "learning_rate": 3.936988907141892e-07, "loss": 0.0234, "step": 119800 }, { "epoch": 1.838845829176579, "grad_norm": 0.5331850647926331, "learning_rate": 3.9295500130778765e-07, "loss": 0.0256, "step": 119810 }, { "epoch": 1.8389993093392678, "grad_norm": 0.4599120616912842, "learning_rate": 3.922118012661158e-07, "loss": 0.0167, "step": 119820 }, { "epoch": 1.8391527895019568, "grad_norm": 0.42951661348342896, "learning_rate": 3.9146929064250103e-07, "loss": 0.0245, "step": 119830 }, { "epoch": 1.8393062696646458, "grad_norm": 0.4146495759487152, "learning_rate": 3.907274694902252e-07, "loss": 0.0242, "step": 119840 }, { "epoch": 1.8394597498273348, "grad_norm": 0.343266099691391, "learning_rate": 3.8998633786251794e-07, "loss": 0.0273, "step": 119850 }, { "epoch": 1.8396132299900239, "grad_norm": 0.31156694889068604, "learning_rate": 3.8924589581255776e-07, "loss": 0.0224, "step": 119860 }, { "epoch": 1.8397667101527126, "grad_norm": 0.3322487473487854, "learning_rate": 3.8850614339348e-07, "loss": 0.0256, "step": 119870 }, { "epoch": 1.8399201903154019, "grad_norm": 0.4006403386592865, "learning_rate": 3.8776708065836444e-07, "loss": 0.0343, "step": 119880 }, { "epoch": 1.8400736704780907, "grad_norm": 0.28019049763679504, "learning_rate": 3.870287076602408e-07, "loss": 0.0252, "step": 119890 }, { "epoch": 1.8402271506407797, "grad_norm": 0.35292986035346985, "learning_rate": 3.8629102445209675e-07, "loss": 0.023, "step": 119900 }, { "epoch": 1.8403806308034687, "grad_norm": 0.447213351726532, "learning_rate": 3.855540310868622e-07, "loss": 0.0211, "step": 119910 }, { "epoch": 1.8405341109661575, "grad_norm": 0.41578683257102966, "learning_rate": 3.848177276174225e-07, "loss": 0.0235, "step": 119920 }, { "epoch": 1.8406875911288467, "grad_norm": 0.40739384293556213, "learning_rate": 3.840821140966111e-07, "loss": 0.0207, "step": 119930 }, { "epoch": 1.8408410712915355, "grad_norm": 0.36159318685531616, "learning_rate": 3.833471905772135e-07, "loss": 0.0238, "step": 119940 }, { "epoch": 1.8409945514542245, "grad_norm": 0.3160742521286011, "learning_rate": 3.826129571119652e-07, "loss": 0.0231, "step": 119950 }, { "epoch": 1.8411480316169135, "grad_norm": 0.39981770515441895, "learning_rate": 3.8187941375354975e-07, "loss": 0.0275, "step": 119960 }, { "epoch": 1.8413015117796023, "grad_norm": 0.5112229585647583, "learning_rate": 3.8114656055460717e-07, "loss": 0.0282, "step": 119970 }, { "epoch": 1.8414549919422916, "grad_norm": 0.3808874189853668, "learning_rate": 3.8041439756772105e-07, "loss": 0.0266, "step": 119980 }, { "epoch": 1.8416084721049804, "grad_norm": 0.3570481240749359, "learning_rate": 3.7968292484542924e-07, "loss": 0.0293, "step": 119990 }, { "epoch": 1.8417619522676694, "grad_norm": 0.3153580129146576, "learning_rate": 3.7895214244022095e-07, "loss": 0.028, "step": 120000 }, { "epoch": 1.8419154324303584, "grad_norm": 0.49141135811805725, "learning_rate": 3.7822205040453196e-07, "loss": 0.03, "step": 120010 }, { "epoch": 1.8420689125930474, "grad_norm": 0.39690494537353516, "learning_rate": 3.7749264879075375e-07, "loss": 0.0209, "step": 120020 }, { "epoch": 1.8422223927557364, "grad_norm": 0.470857709646225, "learning_rate": 3.767639376512233e-07, "loss": 0.0268, "step": 120030 }, { "epoch": 1.8423758729184252, "grad_norm": 0.5235544443130493, "learning_rate": 3.7603591703822997e-07, "loss": 0.0252, "step": 120040 }, { "epoch": 1.8425293530811144, "grad_norm": 0.36083438992500305, "learning_rate": 3.753085870040141e-07, "loss": 0.0283, "step": 120050 }, { "epoch": 1.8426828332438032, "grad_norm": 0.34154441952705383, "learning_rate": 3.7458194760076725e-07, "loss": 0.0213, "step": 120060 }, { "epoch": 1.8428363134064922, "grad_norm": 0.40325480699539185, "learning_rate": 3.7385599888063006e-07, "loss": 0.0221, "step": 120070 }, { "epoch": 1.8429897935691812, "grad_norm": 0.38680195808410645, "learning_rate": 3.731307408956919e-07, "loss": 0.0223, "step": 120080 }, { "epoch": 1.84314327373187, "grad_norm": 0.5241842865943909, "learning_rate": 3.724061736979967e-07, "loss": 0.0256, "step": 120090 }, { "epoch": 1.8432967538945593, "grad_norm": 0.4735962152481079, "learning_rate": 3.716822973395351e-07, "loss": 0.0302, "step": 120100 }, { "epoch": 1.843450234057248, "grad_norm": 0.39881017804145813, "learning_rate": 3.7095911187225106e-07, "loss": 0.0241, "step": 120110 }, { "epoch": 1.843603714219937, "grad_norm": 0.4216504693031311, "learning_rate": 3.702366173480365e-07, "loss": 0.0299, "step": 120120 }, { "epoch": 1.843757194382626, "grad_norm": 0.31940630078315735, "learning_rate": 3.6951481381873545e-07, "loss": 0.0305, "step": 120130 }, { "epoch": 1.8439106745453149, "grad_norm": 0.4326378107070923, "learning_rate": 3.687937013361409e-07, "loss": 0.0281, "step": 120140 }, { "epoch": 1.844064154708004, "grad_norm": 0.19630107283592224, "learning_rate": 3.6807327995199703e-07, "loss": 0.0198, "step": 120150 }, { "epoch": 1.844217634870693, "grad_norm": 0.5149984955787659, "learning_rate": 3.673535497180003e-07, "loss": 0.0251, "step": 120160 }, { "epoch": 1.844371115033382, "grad_norm": 0.37685638666152954, "learning_rate": 3.6663451068579604e-07, "loss": 0.0246, "step": 120170 }, { "epoch": 1.844524595196071, "grad_norm": 0.5174450278282166, "learning_rate": 3.659161629069763e-07, "loss": 0.0223, "step": 120180 }, { "epoch": 1.8446780753587597, "grad_norm": 0.2454511672258377, "learning_rate": 3.6519850643308985e-07, "loss": 0.0212, "step": 120190 }, { "epoch": 1.844831555521449, "grad_norm": 0.5277053117752075, "learning_rate": 3.644815413156322e-07, "loss": 0.0264, "step": 120200 }, { "epoch": 1.8449850356841377, "grad_norm": 0.3023400604724884, "learning_rate": 3.637652676060499e-07, "loss": 0.0217, "step": 120210 }, { "epoch": 1.8451385158468268, "grad_norm": 0.30258068442344666, "learning_rate": 3.630496853557408e-07, "loss": 0.0206, "step": 120220 }, { "epoch": 1.8452919960095158, "grad_norm": 0.2983379065990448, "learning_rate": 3.6233479461605047e-07, "loss": 0.0238, "step": 120230 }, { "epoch": 1.8454454761722048, "grad_norm": 0.5266883373260498, "learning_rate": 3.61620595438279e-07, "loss": 0.0234, "step": 120240 }, { "epoch": 1.8455989563348938, "grad_norm": 0.39070379734039307, "learning_rate": 3.60907087873672e-07, "loss": 0.0303, "step": 120250 }, { "epoch": 1.8457524364975826, "grad_norm": 0.24232301115989685, "learning_rate": 3.6019427197343084e-07, "loss": 0.018, "step": 120260 }, { "epoch": 1.8459059166602718, "grad_norm": 0.323236882686615, "learning_rate": 3.594821477887045e-07, "loss": 0.0227, "step": 120270 }, { "epoch": 1.8460593968229606, "grad_norm": 0.5022054314613342, "learning_rate": 3.5877071537058774e-07, "loss": 0.0309, "step": 120280 }, { "epoch": 1.8462128769856496, "grad_norm": 0.37815436720848083, "learning_rate": 3.580599747701352e-07, "loss": 0.0231, "step": 120290 }, { "epoch": 1.8463663571483386, "grad_norm": 0.32459554076194763, "learning_rate": 3.573499260383451e-07, "loss": 0.0317, "step": 120300 }, { "epoch": 1.8465198373110274, "grad_norm": 0.4420190751552582, "learning_rate": 3.5664056922616653e-07, "loss": 0.0256, "step": 120310 }, { "epoch": 1.8466733174737167, "grad_norm": 0.32225388288497925, "learning_rate": 3.559319043845044e-07, "loss": 0.024, "step": 120320 }, { "epoch": 1.8468267976364054, "grad_norm": 0.37493249773979187, "learning_rate": 3.552239315642059e-07, "loss": 0.0226, "step": 120330 }, { "epoch": 1.8469802777990945, "grad_norm": 0.3364843428134918, "learning_rate": 3.5451665081607135e-07, "loss": 0.0215, "step": 120340 }, { "epoch": 1.8471337579617835, "grad_norm": 0.3484959900379181, "learning_rate": 3.538100621908569e-07, "loss": 0.0212, "step": 120350 }, { "epoch": 1.8472872381244723, "grad_norm": 0.7851141095161438, "learning_rate": 3.53104165739262e-07, "loss": 0.0238, "step": 120360 }, { "epoch": 1.8474407182871615, "grad_norm": 0.3746209144592285, "learning_rate": 3.523989615119383e-07, "loss": 0.0284, "step": 120370 }, { "epoch": 1.8475941984498503, "grad_norm": 0.3306204080581665, "learning_rate": 3.516944495594909e-07, "loss": 0.0252, "step": 120380 }, { "epoch": 1.8477476786125393, "grad_norm": 0.3740187883377075, "learning_rate": 3.509906299324717e-07, "loss": 0.0246, "step": 120390 }, { "epoch": 1.8479011587752283, "grad_norm": 0.3507987856864929, "learning_rate": 3.5028750268138346e-07, "loss": 0.0219, "step": 120400 }, { "epoch": 1.8480546389379173, "grad_norm": 0.28298670053482056, "learning_rate": 3.495850678566792e-07, "loss": 0.032, "step": 120410 }, { "epoch": 1.8482081191006063, "grad_norm": 0.3534417748451233, "learning_rate": 3.4888332550876756e-07, "loss": 0.0273, "step": 120420 }, { "epoch": 1.8483615992632951, "grad_norm": 0.35784682631492615, "learning_rate": 3.4818227568799714e-07, "loss": 0.0184, "step": 120430 }, { "epoch": 1.8485150794259841, "grad_norm": 0.33819541335105896, "learning_rate": 3.4748191844467536e-07, "loss": 0.0311, "step": 120440 }, { "epoch": 1.8486685595886732, "grad_norm": 0.4234996736049652, "learning_rate": 3.4678225382905664e-07, "loss": 0.0258, "step": 120450 }, { "epoch": 1.8488220397513622, "grad_norm": 0.4526856243610382, "learning_rate": 3.460832818913473e-07, "loss": 0.0266, "step": 120460 }, { "epoch": 1.8489755199140512, "grad_norm": 0.3082897365093231, "learning_rate": 3.4538500268170183e-07, "loss": 0.0174, "step": 120470 }, { "epoch": 1.84912900007674, "grad_norm": 0.4029982388019562, "learning_rate": 3.446874162502256e-07, "loss": 0.028, "step": 120480 }, { "epoch": 1.8492824802394292, "grad_norm": 0.42009273171424866, "learning_rate": 3.439905226469764e-07, "loss": 0.028, "step": 120490 }, { "epoch": 1.849435960402118, "grad_norm": 0.4862423241138458, "learning_rate": 3.432943219219587e-07, "loss": 0.022, "step": 120500 }, { "epoch": 1.849589440564807, "grad_norm": 0.42494887113571167, "learning_rate": 3.4259881412513016e-07, "loss": 0.0272, "step": 120510 }, { "epoch": 1.849742920727496, "grad_norm": 0.3416232764720917, "learning_rate": 3.4190399930639883e-07, "loss": 0.023, "step": 120520 }, { "epoch": 1.8498964008901848, "grad_norm": 0.2749723494052887, "learning_rate": 3.412098775156203e-07, "loss": 0.0193, "step": 120530 }, { "epoch": 1.850049881052874, "grad_norm": 0.41652411222457886, "learning_rate": 3.4051644880260137e-07, "loss": 0.0288, "step": 120540 }, { "epoch": 1.8502033612155628, "grad_norm": 0.36498701572418213, "learning_rate": 3.398237132171023e-07, "loss": 0.026, "step": 120550 }, { "epoch": 1.8503568413782518, "grad_norm": 0.32232433557510376, "learning_rate": 3.391316708088299e-07, "loss": 0.0285, "step": 120560 }, { "epoch": 1.8505103215409409, "grad_norm": 0.3358502984046936, "learning_rate": 3.3844032162744236e-07, "loss": 0.0186, "step": 120570 }, { "epoch": 1.8506638017036297, "grad_norm": 0.2758195400238037, "learning_rate": 3.3774966572254877e-07, "loss": 0.0218, "step": 120580 }, { "epoch": 1.8508172818663189, "grad_norm": 0.26181432604789734, "learning_rate": 3.370597031437073e-07, "loss": 0.0257, "step": 120590 }, { "epoch": 1.8509707620290077, "grad_norm": 0.49438056349754333, "learning_rate": 3.3637043394042725e-07, "loss": 0.0242, "step": 120600 }, { "epoch": 1.8511242421916967, "grad_norm": 0.46328648924827576, "learning_rate": 3.356818581621679e-07, "loss": 0.0288, "step": 120610 }, { "epoch": 1.8512777223543857, "grad_norm": 0.37571045756340027, "learning_rate": 3.349939758583398e-07, "loss": 0.0291, "step": 120620 }, { "epoch": 1.8514312025170747, "grad_norm": 0.4180167615413666, "learning_rate": 3.343067870783001e-07, "loss": 0.0275, "step": 120630 }, { "epoch": 1.8515846826797637, "grad_norm": 0.3900856077671051, "learning_rate": 3.336202918713616e-07, "loss": 0.0257, "step": 120640 }, { "epoch": 1.8517381628424525, "grad_norm": 0.29177039861679077, "learning_rate": 3.329344902867837e-07, "loss": 0.0192, "step": 120650 }, { "epoch": 1.8518916430051415, "grad_norm": 0.36914166808128357, "learning_rate": 3.322493823737771e-07, "loss": 0.0361, "step": 120660 }, { "epoch": 1.8520451231678305, "grad_norm": 0.3309440314769745, "learning_rate": 3.3156496818150143e-07, "loss": 0.0211, "step": 120670 }, { "epoch": 1.8521986033305196, "grad_norm": 0.2668670117855072, "learning_rate": 3.308812477590695e-07, "loss": 0.0188, "step": 120680 }, { "epoch": 1.8523520834932086, "grad_norm": 0.3400498032569885, "learning_rate": 3.301982211555399e-07, "loss": 0.0232, "step": 120690 }, { "epoch": 1.8525055636558974, "grad_norm": 0.38532981276512146, "learning_rate": 3.2951588841992565e-07, "loss": 0.0224, "step": 120700 }, { "epoch": 1.8526590438185866, "grad_norm": 0.4796217083930969, "learning_rate": 3.288342496011887e-07, "loss": 0.0307, "step": 120710 }, { "epoch": 1.8528125239812754, "grad_norm": 0.34801235795021057, "learning_rate": 3.28153304748241e-07, "loss": 0.0303, "step": 120720 }, { "epoch": 1.8529660041439644, "grad_norm": 0.4145975112915039, "learning_rate": 3.274730539099413e-07, "loss": 0.0337, "step": 120730 }, { "epoch": 1.8531194843066534, "grad_norm": 0.2930455207824707, "learning_rate": 3.267934971351061e-07, "loss": 0.0273, "step": 120740 }, { "epoch": 1.8532729644693422, "grad_norm": 0.27583223581314087, "learning_rate": 3.2611463447249634e-07, "loss": 0.0194, "step": 120750 }, { "epoch": 1.8534264446320314, "grad_norm": 0.2556779980659485, "learning_rate": 3.2543646597082313e-07, "loss": 0.0217, "step": 120760 }, { "epoch": 1.8535799247947202, "grad_norm": 0.3829590380191803, "learning_rate": 3.247589916787508e-07, "loss": 0.024, "step": 120770 }, { "epoch": 1.8537334049574092, "grad_norm": 0.442108690738678, "learning_rate": 3.240822116448927e-07, "loss": 0.0304, "step": 120780 }, { "epoch": 1.8538868851200982, "grad_norm": 0.4449041485786438, "learning_rate": 3.2340612591781005e-07, "loss": 0.032, "step": 120790 }, { "epoch": 1.854040365282787, "grad_norm": 0.30547311902046204, "learning_rate": 3.227307345460162e-07, "loss": 0.0237, "step": 120800 }, { "epoch": 1.8541938454454763, "grad_norm": 0.4709734618663788, "learning_rate": 3.22056037577978e-07, "loss": 0.0278, "step": 120810 }, { "epoch": 1.854347325608165, "grad_norm": 0.33036211133003235, "learning_rate": 3.2138203506210663e-07, "loss": 0.0249, "step": 120820 }, { "epoch": 1.854500805770854, "grad_norm": 0.3623247444629669, "learning_rate": 3.207087270467646e-07, "loss": 0.0259, "step": 120830 }, { "epoch": 1.854654285933543, "grad_norm": 0.269233763217926, "learning_rate": 3.2003611358026763e-07, "loss": 0.0266, "step": 120840 }, { "epoch": 1.854807766096232, "grad_norm": 0.3878914415836334, "learning_rate": 3.1936419471088054e-07, "loss": 0.0248, "step": 120850 }, { "epoch": 1.8549612462589211, "grad_norm": 0.3892621397972107, "learning_rate": 3.186929704868158e-07, "loss": 0.0283, "step": 120860 }, { "epoch": 1.85511472642161, "grad_norm": 0.3271894156932831, "learning_rate": 3.180224409562405e-07, "loss": 0.022, "step": 120870 }, { "epoch": 1.8552682065842991, "grad_norm": 0.34810805320739746, "learning_rate": 3.1735260616726714e-07, "loss": 0.0288, "step": 120880 }, { "epoch": 1.855421686746988, "grad_norm": 0.4546721279621124, "learning_rate": 3.166834661679596e-07, "loss": 0.0278, "step": 120890 }, { "epoch": 1.855575166909677, "grad_norm": 0.20283004641532898, "learning_rate": 3.160150210063351e-07, "loss": 0.0265, "step": 120900 }, { "epoch": 1.855728647072366, "grad_norm": 0.2899991571903229, "learning_rate": 3.153472707303584e-07, "loss": 0.0249, "step": 120910 }, { "epoch": 1.8558821272350547, "grad_norm": 0.29784876108169556, "learning_rate": 3.1468021538794357e-07, "loss": 0.0185, "step": 120920 }, { "epoch": 1.856035607397744, "grad_norm": 0.3279317617416382, "learning_rate": 3.140138550269567e-07, "loss": 0.0218, "step": 120930 }, { "epoch": 1.8561890875604328, "grad_norm": 0.4419126808643341, "learning_rate": 3.1334818969521286e-07, "loss": 0.0296, "step": 120940 }, { "epoch": 1.8563425677231218, "grad_norm": 0.3241164982318878, "learning_rate": 3.126832194404783e-07, "loss": 0.0235, "step": 120950 }, { "epoch": 1.8564960478858108, "grad_norm": 0.4722975492477417, "learning_rate": 3.120189443104671e-07, "loss": 0.0256, "step": 120960 }, { "epoch": 1.8566495280484996, "grad_norm": 0.4320305287837982, "learning_rate": 3.1135536435284774e-07, "loss": 0.0269, "step": 120970 }, { "epoch": 1.8568030082111888, "grad_norm": 0.32546210289001465, "learning_rate": 3.106924796152344e-07, "loss": 0.0208, "step": 120980 }, { "epoch": 1.8569564883738776, "grad_norm": 0.34252095222473145, "learning_rate": 3.100302901451924e-07, "loss": 0.0301, "step": 120990 }, { "epoch": 1.8571099685365666, "grad_norm": 0.3505563735961914, "learning_rate": 3.0936879599023916e-07, "loss": 0.0271, "step": 121000 }, { "epoch": 1.8572634486992556, "grad_norm": 0.20489206910133362, "learning_rate": 3.087079971978413e-07, "loss": 0.0264, "step": 121010 }, { "epoch": 1.8574169288619444, "grad_norm": 0.38861432671546936, "learning_rate": 3.080478938154152e-07, "loss": 0.0283, "step": 121020 }, { "epoch": 1.8575704090246337, "grad_norm": 0.5486217141151428, "learning_rate": 3.0738848589032645e-07, "loss": 0.0234, "step": 121030 }, { "epoch": 1.8577238891873225, "grad_norm": 0.28879764676094055, "learning_rate": 3.0672977346989155e-07, "loss": 0.0185, "step": 121040 }, { "epoch": 1.8578773693500115, "grad_norm": 0.3615281879901886, "learning_rate": 3.060717566013782e-07, "loss": 0.0256, "step": 121050 }, { "epoch": 1.8580308495127005, "grad_norm": 0.6022252440452576, "learning_rate": 3.05414435332001e-07, "loss": 0.0299, "step": 121060 }, { "epoch": 1.8581843296753895, "grad_norm": 0.3894750773906708, "learning_rate": 3.04757809708931e-07, "loss": 0.0248, "step": 121070 }, { "epoch": 1.8583378098380785, "grad_norm": 0.39195746183395386, "learning_rate": 3.041018797792816e-07, "loss": 0.0246, "step": 121080 }, { "epoch": 1.8584912900007673, "grad_norm": 0.6393106579780579, "learning_rate": 3.0344664559011974e-07, "loss": 0.0234, "step": 121090 }, { "epoch": 1.8586447701634565, "grad_norm": 0.2844548523426056, "learning_rate": 3.027921071884643e-07, "loss": 0.0264, "step": 121100 }, { "epoch": 1.8587982503261453, "grad_norm": 0.3738735616207123, "learning_rate": 3.021382646212812e-07, "loss": 0.0264, "step": 121110 }, { "epoch": 1.8589517304888343, "grad_norm": 0.3065941035747528, "learning_rate": 3.014851179354894e-07, "loss": 0.0284, "step": 121120 }, { "epoch": 1.8591052106515233, "grad_norm": 0.2819153368473053, "learning_rate": 3.008326671779538e-07, "loss": 0.023, "step": 121130 }, { "epoch": 1.8592586908142121, "grad_norm": 0.525878369808197, "learning_rate": 3.001809123954935e-07, "loss": 0.0231, "step": 121140 }, { "epoch": 1.8594121709769014, "grad_norm": 0.42878687381744385, "learning_rate": 2.995298536348745e-07, "loss": 0.0267, "step": 121150 }, { "epoch": 1.8595656511395902, "grad_norm": 0.40039947628974915, "learning_rate": 2.988794909428161e-07, "loss": 0.0289, "step": 121160 }, { "epoch": 1.8597191313022792, "grad_norm": 0.3204995393753052, "learning_rate": 2.9822982436598535e-07, "loss": 0.0227, "step": 121170 }, { "epoch": 1.8598726114649682, "grad_norm": 0.44383227825164795, "learning_rate": 2.975808539509983e-07, "loss": 0.0254, "step": 121180 }, { "epoch": 1.860026091627657, "grad_norm": 0.5647377967834473, "learning_rate": 2.9693257974442336e-07, "loss": 0.0224, "step": 121190 }, { "epoch": 1.8601795717903462, "grad_norm": 0.34959009289741516, "learning_rate": 2.9628500179277984e-07, "loss": 0.0214, "step": 121200 }, { "epoch": 1.860333051953035, "grad_norm": 0.2667807638645172, "learning_rate": 2.956381201425329e-07, "loss": 0.0202, "step": 121210 }, { "epoch": 1.860486532115724, "grad_norm": 0.45797258615493774, "learning_rate": 2.9499193484010204e-07, "loss": 0.0261, "step": 121220 }, { "epoch": 1.860640012278413, "grad_norm": 0.2579539716243744, "learning_rate": 2.9434644593185457e-07, "loss": 0.0171, "step": 121230 }, { "epoch": 1.860793492441102, "grad_norm": 0.21445909142494202, "learning_rate": 2.9370165346410905e-07, "loss": 0.0242, "step": 121240 }, { "epoch": 1.860946972603791, "grad_norm": 0.44483280181884766, "learning_rate": 2.930575574831307e-07, "loss": 0.0238, "step": 121250 }, { "epoch": 1.8611004527664798, "grad_norm": 0.38207539916038513, "learning_rate": 2.9241415803513917e-07, "loss": 0.0185, "step": 121260 }, { "epoch": 1.8612539329291689, "grad_norm": 0.38729241490364075, "learning_rate": 2.9177145516630423e-07, "loss": 0.024, "step": 121270 }, { "epoch": 1.8614074130918579, "grad_norm": 0.37846991419792175, "learning_rate": 2.911294489227401e-07, "loss": 0.0254, "step": 121280 }, { "epoch": 1.8615608932545469, "grad_norm": 0.21769219636917114, "learning_rate": 2.9048813935051767e-07, "loss": 0.0252, "step": 121290 }, { "epoch": 1.861714373417236, "grad_norm": 0.333744615316391, "learning_rate": 2.898475264956535e-07, "loss": 0.0287, "step": 121300 }, { "epoch": 1.8618678535799247, "grad_norm": 0.28635483980178833, "learning_rate": 2.892076104041153e-07, "loss": 0.0276, "step": 121310 }, { "epoch": 1.862021333742614, "grad_norm": 0.2817990481853485, "learning_rate": 2.885683911218207e-07, "loss": 0.0169, "step": 121320 }, { "epoch": 1.8621748139053027, "grad_norm": 0.314687043428421, "learning_rate": 2.8792986869463966e-07, "loss": 0.0202, "step": 121330 }, { "epoch": 1.8623282940679917, "grad_norm": 0.3393439054489136, "learning_rate": 2.872920431683879e-07, "loss": 0.0297, "step": 121340 }, { "epoch": 1.8624817742306807, "grad_norm": 0.3507830500602722, "learning_rate": 2.866549145888331e-07, "loss": 0.0331, "step": 121350 }, { "epoch": 1.8626352543933695, "grad_norm": 0.28618764877319336, "learning_rate": 2.8601848300169546e-07, "loss": 0.021, "step": 121360 }, { "epoch": 1.8627887345560588, "grad_norm": 0.36183974146842957, "learning_rate": 2.853827484526417e-07, "loss": 0.0259, "step": 121370 }, { "epoch": 1.8629422147187475, "grad_norm": 0.40983229875564575, "learning_rate": 2.847477109872898e-07, "loss": 0.025, "step": 121380 }, { "epoch": 1.8630956948814366, "grad_norm": 0.3471207022666931, "learning_rate": 2.841133706512078e-07, "loss": 0.0229, "step": 121390 }, { "epoch": 1.8632491750441256, "grad_norm": 0.5103759765625, "learning_rate": 2.834797274899126e-07, "loss": 0.0235, "step": 121400 }, { "epoch": 1.8634026552068144, "grad_norm": 0.3902779817581177, "learning_rate": 2.8284678154887115e-07, "loss": 0.0364, "step": 121410 }, { "epoch": 1.8635561353695036, "grad_norm": 0.39583802223205566, "learning_rate": 2.8221453287350486e-07, "loss": 0.0281, "step": 121420 }, { "epoch": 1.8637096155321924, "grad_norm": 0.3971980810165405, "learning_rate": 2.8158298150917863e-07, "loss": 0.0201, "step": 121430 }, { "epoch": 1.8638630956948814, "grad_norm": 0.35261762142181396, "learning_rate": 2.809521275012095e-07, "loss": 0.0253, "step": 121440 }, { "epoch": 1.8640165758575704, "grad_norm": 0.26063427329063416, "learning_rate": 2.803219708948679e-07, "loss": 0.0286, "step": 121450 }, { "epoch": 1.8641700560202594, "grad_norm": 0.3441762328147888, "learning_rate": 2.796925117353699e-07, "loss": 0.0215, "step": 121460 }, { "epoch": 1.8643235361829484, "grad_norm": 0.37047824263572693, "learning_rate": 2.7906375006788277e-07, "loss": 0.0209, "step": 121470 }, { "epoch": 1.8644770163456372, "grad_norm": 0.4224502444267273, "learning_rate": 2.784356859375248e-07, "loss": 0.0261, "step": 121480 }, { "epoch": 1.8646304965083265, "grad_norm": 0.32762834429740906, "learning_rate": 2.7780831938936327e-07, "loss": 0.0256, "step": 121490 }, { "epoch": 1.8647839766710153, "grad_norm": 0.37821489572525024, "learning_rate": 2.771816504684155e-07, "loss": 0.0234, "step": 121500 }, { "epoch": 1.8649374568337043, "grad_norm": 0.2650701105594635, "learning_rate": 2.7655567921964775e-07, "loss": 0.0237, "step": 121510 }, { "epoch": 1.8650909369963933, "grad_norm": 0.3699846565723419, "learning_rate": 2.759304056879797e-07, "loss": 0.0233, "step": 121520 }, { "epoch": 1.865244417159082, "grad_norm": 0.5640255808830261, "learning_rate": 2.7530582991827647e-07, "loss": 0.0316, "step": 121530 }, { "epoch": 1.8653978973217713, "grad_norm": 0.3186202943325043, "learning_rate": 2.7468195195535565e-07, "loss": 0.0212, "step": 121540 }, { "epoch": 1.86555137748446, "grad_norm": 0.4749365448951721, "learning_rate": 2.740587718439847e-07, "loss": 0.0274, "step": 121550 }, { "epoch": 1.865704857647149, "grad_norm": 0.3428754210472107, "learning_rate": 2.734362896288811e-07, "loss": 0.0179, "step": 121560 }, { "epoch": 1.8658583378098381, "grad_norm": 0.26464369893074036, "learning_rate": 2.7281450535471157e-07, "loss": 0.018, "step": 121570 }, { "epoch": 1.866011817972527, "grad_norm": 0.4115942418575287, "learning_rate": 2.721934190660924e-07, "loss": 0.0222, "step": 121580 }, { "epoch": 1.8661652981352161, "grad_norm": 0.3592391312122345, "learning_rate": 2.7157303080759035e-07, "loss": 0.023, "step": 121590 }, { "epoch": 1.866318778297905, "grad_norm": 0.4295918345451355, "learning_rate": 2.70953340623723e-07, "loss": 0.0299, "step": 121600 }, { "epoch": 1.866472258460594, "grad_norm": 0.4102898836135864, "learning_rate": 2.703343485589549e-07, "loss": 0.0283, "step": 121610 }, { "epoch": 1.866625738623283, "grad_norm": 0.35395076870918274, "learning_rate": 2.69716054657706e-07, "loss": 0.0268, "step": 121620 }, { "epoch": 1.8667792187859718, "grad_norm": 0.28940773010253906, "learning_rate": 2.690984589643386e-07, "loss": 0.0216, "step": 121630 }, { "epoch": 1.866932698948661, "grad_norm": 0.3661419451236725, "learning_rate": 2.6848156152317284e-07, "loss": 0.0293, "step": 121640 }, { "epoch": 1.8670861791113498, "grad_norm": 0.30883267521858215, "learning_rate": 2.6786536237847215e-07, "loss": 0.0257, "step": 121650 }, { "epoch": 1.8672396592740388, "grad_norm": 0.25150516629219055, "learning_rate": 2.6724986157445455e-07, "loss": 0.0244, "step": 121660 }, { "epoch": 1.8673931394367278, "grad_norm": 0.3058697581291199, "learning_rate": 2.666350591552846e-07, "loss": 0.0206, "step": 121670 }, { "epoch": 1.8675466195994168, "grad_norm": 0.3971410095691681, "learning_rate": 2.660209551650783e-07, "loss": 0.0267, "step": 121680 }, { "epoch": 1.8677000997621058, "grad_norm": 0.4658638536930084, "learning_rate": 2.654075496479025e-07, "loss": 0.0258, "step": 121690 }, { "epoch": 1.8678535799247946, "grad_norm": 0.4363316297531128, "learning_rate": 2.647948426477709e-07, "loss": 0.0247, "step": 121700 }, { "epoch": 1.8680070600874839, "grad_norm": 0.3266245424747467, "learning_rate": 2.641828342086517e-07, "loss": 0.0221, "step": 121710 }, { "epoch": 1.8681605402501726, "grad_norm": 0.3161647617816925, "learning_rate": 2.6357152437445854e-07, "loss": 0.0206, "step": 121720 }, { "epoch": 1.8683140204128617, "grad_norm": 0.22727647423744202, "learning_rate": 2.6296091318905537e-07, "loss": 0.0236, "step": 121730 }, { "epoch": 1.8684675005755507, "grad_norm": 0.4244134724140167, "learning_rate": 2.623510006962604e-07, "loss": 0.021, "step": 121740 }, { "epoch": 1.8686209807382395, "grad_norm": 0.3015722632408142, "learning_rate": 2.6174178693983645e-07, "loss": 0.0319, "step": 121750 }, { "epoch": 1.8687744609009287, "grad_norm": 0.4890076220035553, "learning_rate": 2.6113327196349957e-07, "loss": 0.0234, "step": 121760 }, { "epoch": 1.8689279410636175, "grad_norm": 0.39530014991760254, "learning_rate": 2.605254558109138e-07, "loss": 0.0147, "step": 121770 }, { "epoch": 1.8690814212263065, "grad_norm": 0.23496828973293304, "learning_rate": 2.5991833852569424e-07, "loss": 0.0292, "step": 121780 }, { "epoch": 1.8692349013889955, "grad_norm": 0.35287126898765564, "learning_rate": 2.5931192015140384e-07, "loss": 0.0391, "step": 121790 }, { "epoch": 1.8693883815516843, "grad_norm": 0.43156561255455017, "learning_rate": 2.587062007315566e-07, "loss": 0.0238, "step": 121800 }, { "epoch": 1.8695418617143735, "grad_norm": 0.270257830619812, "learning_rate": 2.5810118030962004e-07, "loss": 0.02, "step": 121810 }, { "epoch": 1.8696953418770623, "grad_norm": 0.39446476101875305, "learning_rate": 2.574968589290061e-07, "loss": 0.028, "step": 121820 }, { "epoch": 1.8698488220397513, "grad_norm": 0.3163944482803345, "learning_rate": 2.5689323663307677e-07, "loss": 0.0232, "step": 121830 }, { "epoch": 1.8700023022024403, "grad_norm": 0.3458971083164215, "learning_rate": 2.5629031346514846e-07, "loss": 0.0293, "step": 121840 }, { "epoch": 1.8701557823651294, "grad_norm": 0.35876697301864624, "learning_rate": 2.556880894684832e-07, "loss": 0.0222, "step": 121850 }, { "epoch": 1.8703092625278184, "grad_norm": 0.4683779776096344, "learning_rate": 2.5508656468629544e-07, "loss": 0.0238, "step": 121860 }, { "epoch": 1.8704627426905072, "grad_norm": 0.43630725145339966, "learning_rate": 2.5448573916174613e-07, "loss": 0.0221, "step": 121870 }, { "epoch": 1.8706162228531962, "grad_norm": 0.35781434178352356, "learning_rate": 2.538856129379497e-07, "loss": 0.0253, "step": 121880 }, { "epoch": 1.8707697030158852, "grad_norm": 0.2532518208026886, "learning_rate": 2.532861860579683e-07, "loss": 0.028, "step": 121890 }, { "epoch": 1.8709231831785742, "grad_norm": 0.4789019525051117, "learning_rate": 2.5268745856481535e-07, "loss": 0.027, "step": 121900 }, { "epoch": 1.8710766633412632, "grad_norm": 0.4000762701034546, "learning_rate": 2.5208943050145317e-07, "loss": 0.0293, "step": 121910 }, { "epoch": 1.871230143503952, "grad_norm": 0.42469972372055054, "learning_rate": 2.5149210191079296e-07, "loss": 0.0251, "step": 121920 }, { "epoch": 1.8713836236666412, "grad_norm": 0.40164151787757874, "learning_rate": 2.5089547283569826e-07, "loss": 0.0225, "step": 121930 }, { "epoch": 1.87153710382933, "grad_norm": 0.3161861300468445, "learning_rate": 2.502995433189792e-07, "loss": 0.0241, "step": 121940 }, { "epoch": 1.871690583992019, "grad_norm": 0.43784406781196594, "learning_rate": 2.4970431340339827e-07, "loss": 0.0251, "step": 121950 }, { "epoch": 1.871844064154708, "grad_norm": 0.39736685156822205, "learning_rate": 2.4910978313166687e-07, "loss": 0.0294, "step": 121960 }, { "epoch": 1.8719975443173968, "grad_norm": 0.2585800290107727, "learning_rate": 2.4851595254644757e-07, "loss": 0.0259, "step": 121970 }, { "epoch": 1.872151024480086, "grad_norm": 0.28984200954437256, "learning_rate": 2.4792282169034845e-07, "loss": 0.0217, "step": 121980 }, { "epoch": 1.8723045046427749, "grad_norm": 0.2484370619058609, "learning_rate": 2.47330390605931e-07, "loss": 0.022, "step": 121990 }, { "epoch": 1.8724579848054639, "grad_norm": 0.25562769174575806, "learning_rate": 2.4673865933570685e-07, "loss": 0.0206, "step": 122000 }, { "epoch": 1.8724579848054639, "eval_loss": 0.01716664433479309, "eval_runtime": 4.8265, "eval_samples_per_second": 41.438, "eval_steps_per_second": 20.719, "step": 122000 }, { "epoch": 1.872611464968153, "grad_norm": 0.32120731472969055, "learning_rate": 2.4614762792213754e-07, "loss": 0.0243, "step": 122010 }, { "epoch": 1.8727649451308417, "grad_norm": 0.2509273588657379, "learning_rate": 2.455572964076314e-07, "loss": 0.022, "step": 122020 }, { "epoch": 1.872918425293531, "grad_norm": 0.429333359003067, "learning_rate": 2.4496766483454784e-07, "loss": 0.0238, "step": 122030 }, { "epoch": 1.8730719054562197, "grad_norm": 0.3596811592578888, "learning_rate": 2.4437873324519855e-07, "loss": 0.0221, "step": 122040 }, { "epoch": 1.8732253856189087, "grad_norm": 0.34472033381462097, "learning_rate": 2.4379050168184093e-07, "loss": 0.0235, "step": 122050 }, { "epoch": 1.8733788657815977, "grad_norm": 0.3309188485145569, "learning_rate": 2.432029701866856e-07, "loss": 0.0232, "step": 122060 }, { "epoch": 1.8735323459442867, "grad_norm": 0.44132789969444275, "learning_rate": 2.426161388018922e-07, "loss": 0.0312, "step": 122070 }, { "epoch": 1.8736858261069758, "grad_norm": 0.4319400489330292, "learning_rate": 2.4203000756956806e-07, "loss": 0.0245, "step": 122080 }, { "epoch": 1.8738393062696646, "grad_norm": 0.46348220109939575, "learning_rate": 2.414445765317708e-07, "loss": 0.0269, "step": 122090 }, { "epoch": 1.8739927864323536, "grad_norm": 0.3394063413143158, "learning_rate": 2.4085984573051115e-07, "loss": 0.0221, "step": 122100 }, { "epoch": 1.8741462665950426, "grad_norm": 0.38861945271492004, "learning_rate": 2.4027581520774557e-07, "loss": 0.0234, "step": 122110 }, { "epoch": 1.8742997467577316, "grad_norm": 0.44036203622817993, "learning_rate": 2.396924850053828e-07, "loss": 0.0328, "step": 122120 }, { "epoch": 1.8744532269204206, "grad_norm": 0.4014936089515686, "learning_rate": 2.391098551652793e-07, "loss": 0.0204, "step": 122130 }, { "epoch": 1.8746067070831094, "grad_norm": 0.30678534507751465, "learning_rate": 2.385279257292439e-07, "loss": 0.0334, "step": 122140 }, { "epoch": 1.8747601872457986, "grad_norm": 0.6082811951637268, "learning_rate": 2.3794669673903203e-07, "loss": 0.0211, "step": 122150 }, { "epoch": 1.8749136674084874, "grad_norm": 0.3567162752151489, "learning_rate": 2.373661682363504e-07, "loss": 0.0282, "step": 122160 }, { "epoch": 1.8750671475711764, "grad_norm": 0.45835012197494507, "learning_rate": 2.3678634026285897e-07, "loss": 0.0243, "step": 122170 }, { "epoch": 1.8752206277338654, "grad_norm": 0.45736995339393616, "learning_rate": 2.362072128601578e-07, "loss": 0.0266, "step": 122180 }, { "epoch": 1.8753741078965542, "grad_norm": 0.434772253036499, "learning_rate": 2.356287860698081e-07, "loss": 0.0261, "step": 122190 }, { "epoch": 1.8755275880592435, "grad_norm": 0.39697375893592834, "learning_rate": 2.3505105993331446e-07, "loss": 0.0253, "step": 122200 }, { "epoch": 1.8756810682219323, "grad_norm": 0.5177055597305298, "learning_rate": 2.3447403449213035e-07, "loss": 0.0336, "step": 122210 }, { "epoch": 1.8758345483846213, "grad_norm": 0.31414198875427246, "learning_rate": 2.3389770978766269e-07, "loss": 0.0322, "step": 122220 }, { "epoch": 1.8759880285473103, "grad_norm": 0.44594964385032654, "learning_rate": 2.3332208586126503e-07, "loss": 0.027, "step": 122230 }, { "epoch": 1.876141508709999, "grad_norm": 0.30179738998413086, "learning_rate": 2.3274716275424437e-07, "loss": 0.0217, "step": 122240 }, { "epoch": 1.8762949888726883, "grad_norm": 0.27390459179878235, "learning_rate": 2.32172940507851e-07, "loss": 0.0197, "step": 122250 }, { "epoch": 1.876448469035377, "grad_norm": 0.49134987592697144, "learning_rate": 2.3159941916329198e-07, "loss": 0.0218, "step": 122260 }, { "epoch": 1.876601949198066, "grad_norm": 0.3962762653827667, "learning_rate": 2.3102659876172216e-07, "loss": 0.0228, "step": 122270 }, { "epoch": 1.8767554293607551, "grad_norm": 0.38932445645332336, "learning_rate": 2.3045447934423983e-07, "loss": 0.0261, "step": 122280 }, { "epoch": 1.8769089095234441, "grad_norm": 0.21603381633758545, "learning_rate": 2.2988306095190317e-07, "loss": 0.0225, "step": 122290 }, { "epoch": 1.8770623896861331, "grad_norm": 0.37786078453063965, "learning_rate": 2.2931234362571275e-07, "loss": 0.0238, "step": 122300 }, { "epoch": 1.877215869848822, "grad_norm": 0.4600902199745178, "learning_rate": 2.287423274066225e-07, "loss": 0.0329, "step": 122310 }, { "epoch": 1.8773693500115112, "grad_norm": 0.482052743434906, "learning_rate": 2.281730123355319e-07, "loss": 0.0249, "step": 122320 }, { "epoch": 1.8775228301742, "grad_norm": 0.6588420271873474, "learning_rate": 2.2760439845329497e-07, "loss": 0.0267, "step": 122330 }, { "epoch": 1.877676310336889, "grad_norm": 0.49434542655944824, "learning_rate": 2.2703648580071235e-07, "loss": 0.0265, "step": 122340 }, { "epoch": 1.877829790499578, "grad_norm": 0.2799489200115204, "learning_rate": 2.2646927441853595e-07, "loss": 0.0275, "step": 122350 }, { "epoch": 1.8779832706622668, "grad_norm": 0.4223164916038513, "learning_rate": 2.259027643474665e-07, "loss": 0.0259, "step": 122360 }, { "epoch": 1.878136750824956, "grad_norm": 0.32199084758758545, "learning_rate": 2.2533695562815484e-07, "loss": 0.0185, "step": 122370 }, { "epoch": 1.8782902309876448, "grad_norm": 0.3856390714645386, "learning_rate": 2.2477184830119957e-07, "loss": 0.0275, "step": 122380 }, { "epoch": 1.8784437111503338, "grad_norm": 0.25334301590919495, "learning_rate": 2.2420744240715276e-07, "loss": 0.0158, "step": 122390 }, { "epoch": 1.8785971913130228, "grad_norm": 0.26037538051605225, "learning_rate": 2.2364373798651305e-07, "loss": 0.0248, "step": 122400 }, { "epoch": 1.8787506714757116, "grad_norm": 0.28633683919906616, "learning_rate": 2.2308073507972928e-07, "loss": 0.0273, "step": 122410 }, { "epoch": 1.8789041516384009, "grad_norm": 0.4309004843235016, "learning_rate": 2.2251843372720128e-07, "loss": 0.0211, "step": 122420 }, { "epoch": 1.8790576318010896, "grad_norm": 0.26919692754745483, "learning_rate": 2.2195683396927792e-07, "loss": 0.017, "step": 122430 }, { "epoch": 1.8792111119637787, "grad_norm": 0.8685842752456665, "learning_rate": 2.2139593584625474e-07, "loss": 0.0295, "step": 122440 }, { "epoch": 1.8793645921264677, "grad_norm": 0.4703359603881836, "learning_rate": 2.2083573939838399e-07, "loss": 0.0266, "step": 122450 }, { "epoch": 1.8795180722891565, "grad_norm": 0.49400070309638977, "learning_rate": 2.2027624466586016e-07, "loss": 0.0212, "step": 122460 }, { "epoch": 1.8796715524518457, "grad_norm": 0.4150626063346863, "learning_rate": 2.1971745168883118e-07, "loss": 0.02, "step": 122470 }, { "epoch": 1.8798250326145345, "grad_norm": 0.3579172194004059, "learning_rate": 2.1915936050739382e-07, "loss": 0.0221, "step": 122480 }, { "epoch": 1.8799785127772235, "grad_norm": 0.3205524981021881, "learning_rate": 2.1860197116159498e-07, "loss": 0.0277, "step": 122490 }, { "epoch": 1.8801319929399125, "grad_norm": 0.3681524991989136, "learning_rate": 2.180452836914293e-07, "loss": 0.0223, "step": 122500 }, { "epoch": 1.8802854731026015, "grad_norm": 0.42999354004859924, "learning_rate": 2.1748929813684483e-07, "loss": 0.0302, "step": 122510 }, { "epoch": 1.8804389532652905, "grad_norm": 0.6420230269432068, "learning_rate": 2.1693401453773633e-07, "loss": 0.0255, "step": 122520 }, { "epoch": 1.8805924334279793, "grad_norm": 0.3225679099559784, "learning_rate": 2.163794329339475e-07, "loss": 0.0293, "step": 122530 }, { "epoch": 1.8807459135906686, "grad_norm": 0.28248193860054016, "learning_rate": 2.1582555336527312e-07, "loss": 0.0254, "step": 122540 }, { "epoch": 1.8808993937533574, "grad_norm": 0.4870430529117584, "learning_rate": 2.1527237587145921e-07, "loss": 0.0219, "step": 122550 }, { "epoch": 1.8810528739160464, "grad_norm": 0.3572693467140198, "learning_rate": 2.1471990049219958e-07, "loss": 0.0408, "step": 122560 }, { "epoch": 1.8812063540787354, "grad_norm": 0.41111651062965393, "learning_rate": 2.1416812726713476e-07, "loss": 0.0282, "step": 122570 }, { "epoch": 1.8813598342414242, "grad_norm": 0.39232832193374634, "learning_rate": 2.1361705623586082e-07, "loss": 0.0226, "step": 122580 }, { "epoch": 1.8815133144041134, "grad_norm": 0.43465936183929443, "learning_rate": 2.1306668743791946e-07, "loss": 0.0323, "step": 122590 }, { "epoch": 1.8816667945668022, "grad_norm": 0.4091433584690094, "learning_rate": 2.1251702091280356e-07, "loss": 0.0249, "step": 122600 }, { "epoch": 1.8818202747294912, "grad_norm": 0.5681187510490417, "learning_rate": 2.119680566999538e-07, "loss": 0.0262, "step": 122610 }, { "epoch": 1.8819737548921802, "grad_norm": 0.35571250319480896, "learning_rate": 2.1141979483876306e-07, "loss": 0.0252, "step": 122620 }, { "epoch": 1.882127235054869, "grad_norm": 0.29096904397010803, "learning_rate": 2.1087223536857327e-07, "loss": 0.031, "step": 122630 }, { "epoch": 1.8822807152175582, "grad_norm": 0.4515691101551056, "learning_rate": 2.1032537832867183e-07, "loss": 0.0243, "step": 122640 }, { "epoch": 1.882434195380247, "grad_norm": 0.27488189935684204, "learning_rate": 2.0977922375830294e-07, "loss": 0.028, "step": 122650 }, { "epoch": 1.882587675542936, "grad_norm": 0.3423326015472412, "learning_rate": 2.0923377169665527e-07, "loss": 0.018, "step": 122660 }, { "epoch": 1.882741155705625, "grad_norm": 0.37001997232437134, "learning_rate": 2.0868902218286636e-07, "loss": 0.0288, "step": 122670 }, { "epoch": 1.882894635868314, "grad_norm": 0.5460296273231506, "learning_rate": 2.0814497525602829e-07, "loss": 0.0244, "step": 122680 }, { "epoch": 1.883048116031003, "grad_norm": 0.33748310804367065, "learning_rate": 2.076016309551776e-07, "loss": 0.0243, "step": 122690 }, { "epoch": 1.8832015961936919, "grad_norm": 0.422212690114975, "learning_rate": 2.0705898931930314e-07, "loss": 0.0271, "step": 122700 }, { "epoch": 1.8833550763563809, "grad_norm": 0.40074995160102844, "learning_rate": 2.0651705038734482e-07, "loss": 0.0313, "step": 122710 }, { "epoch": 1.88350855651907, "grad_norm": 0.45249703526496887, "learning_rate": 2.0597581419818824e-07, "loss": 0.0324, "step": 122720 }, { "epoch": 1.883662036681759, "grad_norm": 0.31075751781463623, "learning_rate": 2.05435280790669e-07, "loss": 0.022, "step": 122730 }, { "epoch": 1.883815516844448, "grad_norm": 0.33985835313796997, "learning_rate": 2.0489545020357603e-07, "loss": 0.0209, "step": 122740 }, { "epoch": 1.8839689970071367, "grad_norm": 0.6052733063697815, "learning_rate": 2.0435632247564506e-07, "loss": 0.0283, "step": 122750 }, { "epoch": 1.884122477169826, "grad_norm": 0.17382241785526276, "learning_rate": 2.0381789764556182e-07, "loss": 0.0312, "step": 122760 }, { "epoch": 1.8842759573325147, "grad_norm": 0.34116753935813904, "learning_rate": 2.0328017575196091e-07, "loss": 0.0159, "step": 122770 }, { "epoch": 1.8844294374952038, "grad_norm": 0.32305365800857544, "learning_rate": 2.0274315683342816e-07, "loss": 0.0302, "step": 122780 }, { "epoch": 1.8845829176578928, "grad_norm": 0.4752151370048523, "learning_rate": 2.022068409284972e-07, "loss": 0.0213, "step": 122790 }, { "epoch": 1.8847363978205816, "grad_norm": 0.2613511085510254, "learning_rate": 2.0167122807565165e-07, "loss": 0.0195, "step": 122800 }, { "epoch": 1.8848898779832708, "grad_norm": 0.3583933413028717, "learning_rate": 2.0113631831332746e-07, "loss": 0.0224, "step": 122810 }, { "epoch": 1.8850433581459596, "grad_norm": 0.34003445506095886, "learning_rate": 2.0060211167990505e-07, "loss": 0.0299, "step": 122820 }, { "epoch": 1.8851968383086486, "grad_norm": 0.5869508385658264, "learning_rate": 2.00068608213716e-07, "loss": 0.0325, "step": 122830 }, { "epoch": 1.8853503184713376, "grad_norm": 0.3614225387573242, "learning_rate": 1.9953580795304628e-07, "loss": 0.0213, "step": 122840 }, { "epoch": 1.8855037986340264, "grad_norm": 0.6430782079696655, "learning_rate": 1.9900371093612536e-07, "loss": 0.0396, "step": 122850 }, { "epoch": 1.8856572787967156, "grad_norm": 0.4138953387737274, "learning_rate": 1.984723172011349e-07, "loss": 0.0189, "step": 122860 }, { "epoch": 1.8858107589594044, "grad_norm": 0.3707183003425598, "learning_rate": 1.9794162678620553e-07, "loss": 0.0274, "step": 122870 }, { "epoch": 1.8859642391220934, "grad_norm": 0.35962632298469543, "learning_rate": 1.974116397294179e-07, "loss": 0.0263, "step": 122880 }, { "epoch": 1.8861177192847824, "grad_norm": 0.4696175456047058, "learning_rate": 1.9688235606880157e-07, "loss": 0.0292, "step": 122890 }, { "epoch": 1.8862711994474715, "grad_norm": 0.4174690544605255, "learning_rate": 1.9635377584233505e-07, "loss": 0.0223, "step": 122900 }, { "epoch": 1.8864246796101605, "grad_norm": 0.3520238995552063, "learning_rate": 1.9582589908794913e-07, "loss": 0.0294, "step": 122910 }, { "epoch": 1.8865781597728493, "grad_norm": 0.3517850339412689, "learning_rate": 1.9529872584352127e-07, "loss": 0.0273, "step": 122920 }, { "epoch": 1.8867316399355385, "grad_norm": 0.396240770816803, "learning_rate": 1.947722561468779e-07, "loss": 0.0297, "step": 122930 }, { "epoch": 1.8868851200982273, "grad_norm": 0.3045932650566101, "learning_rate": 1.9424649003579877e-07, "loss": 0.0245, "step": 122940 }, { "epoch": 1.8870386002609163, "grad_norm": 0.48971137404441833, "learning_rate": 1.9372142754800928e-07, "loss": 0.0328, "step": 122950 }, { "epoch": 1.8871920804236053, "grad_norm": 0.3146792948246002, "learning_rate": 1.9319706872118705e-07, "loss": 0.023, "step": 122960 }, { "epoch": 1.887345560586294, "grad_norm": 0.23732516169548035, "learning_rate": 1.9267341359295865e-07, "loss": 0.0321, "step": 122970 }, { "epoch": 1.8874990407489833, "grad_norm": 0.3246147036552429, "learning_rate": 1.921504622008974e-07, "loss": 0.0214, "step": 122980 }, { "epoch": 1.8876525209116721, "grad_norm": 0.3690878748893738, "learning_rate": 1.9162821458252879e-07, "loss": 0.0247, "step": 122990 }, { "epoch": 1.8878060010743611, "grad_norm": 0.28850480914115906, "learning_rate": 1.9110667077532842e-07, "loss": 0.016, "step": 123000 }, { "epoch": 1.8879594812370502, "grad_norm": 0.32286372780799866, "learning_rate": 1.9058583081672076e-07, "loss": 0.0239, "step": 123010 }, { "epoch": 1.888112961399739, "grad_norm": 0.3594130277633667, "learning_rate": 1.900656947440771e-07, "loss": 0.0293, "step": 123020 }, { "epoch": 1.8882664415624282, "grad_norm": 0.43298864364624023, "learning_rate": 1.8954626259472197e-07, "loss": 0.0219, "step": 123030 }, { "epoch": 1.888419921725117, "grad_norm": 0.3203187584877014, "learning_rate": 1.890275344059278e-07, "loss": 0.0266, "step": 123040 }, { "epoch": 1.888573401887806, "grad_norm": 0.4028090834617615, "learning_rate": 1.8850951021491593e-07, "loss": 0.0267, "step": 123050 }, { "epoch": 1.888726882050495, "grad_norm": 0.43142518401145935, "learning_rate": 1.879921900588577e-07, "loss": 0.0234, "step": 123060 }, { "epoch": 1.8888803622131838, "grad_norm": 0.29933133721351624, "learning_rate": 1.874755739748746e-07, "loss": 0.0273, "step": 123070 }, { "epoch": 1.889033842375873, "grad_norm": 0.34257829189300537, "learning_rate": 1.869596620000369e-07, "loss": 0.0267, "step": 123080 }, { "epoch": 1.8891873225385618, "grad_norm": 0.3300600051879883, "learning_rate": 1.8644445417136393e-07, "loss": 0.0225, "step": 123090 }, { "epoch": 1.8893408027012508, "grad_norm": 0.3141782879829407, "learning_rate": 1.8592995052582606e-07, "loss": 0.0291, "step": 123100 }, { "epoch": 1.8894942828639398, "grad_norm": 0.23690161108970642, "learning_rate": 1.854161511003416e-07, "loss": 0.0235, "step": 123110 }, { "epoch": 1.8896477630266288, "grad_norm": 0.4057036340236664, "learning_rate": 1.849030559317777e-07, "loss": 0.0278, "step": 123120 }, { "epoch": 1.8898012431893179, "grad_norm": 0.3404492735862732, "learning_rate": 1.843906650569538e-07, "loss": 0.0251, "step": 123130 }, { "epoch": 1.8899547233520066, "grad_norm": 0.2690121829509735, "learning_rate": 1.8387897851263715e-07, "loss": 0.0288, "step": 123140 }, { "epoch": 1.8901082035146959, "grad_norm": 0.43782487511634827, "learning_rate": 1.833679963355428e-07, "loss": 0.0233, "step": 123150 }, { "epoch": 1.8902616836773847, "grad_norm": 0.4171521067619324, "learning_rate": 1.8285771856233815e-07, "loss": 0.0244, "step": 123160 }, { "epoch": 1.8904151638400737, "grad_norm": 0.42186304926872253, "learning_rate": 1.823481452296383e-07, "loss": 0.0289, "step": 123170 }, { "epoch": 1.8905686440027627, "grad_norm": 0.6250588893890381, "learning_rate": 1.8183927637400844e-07, "loss": 0.0194, "step": 123180 }, { "epoch": 1.8907221241654515, "grad_norm": 0.2876954972743988, "learning_rate": 1.8133111203196274e-07, "loss": 0.0203, "step": 123190 }, { "epoch": 1.8908756043281407, "grad_norm": 0.4113311469554901, "learning_rate": 1.8082365223996535e-07, "loss": 0.0257, "step": 123200 }, { "epoch": 1.8910290844908295, "grad_norm": 0.38357242941856384, "learning_rate": 1.8031689703443044e-07, "loss": 0.0264, "step": 123210 }, { "epoch": 1.8911825646535185, "grad_norm": 0.4248015582561493, "learning_rate": 1.7981084645171898e-07, "loss": 0.0234, "step": 123220 }, { "epoch": 1.8913360448162075, "grad_norm": 0.42653048038482666, "learning_rate": 1.793055005281452e-07, "loss": 0.0241, "step": 123230 }, { "epoch": 1.8914895249788963, "grad_norm": 0.28709402680397034, "learning_rate": 1.7880085929996905e-07, "loss": 0.0191, "step": 123240 }, { "epoch": 1.8916430051415856, "grad_norm": 0.4334156811237335, "learning_rate": 1.782969228034026e-07, "loss": 0.0257, "step": 123250 }, { "epoch": 1.8917964853042744, "grad_norm": 0.3972399830818176, "learning_rate": 1.7779369107460698e-07, "loss": 0.0308, "step": 123260 }, { "epoch": 1.8919499654669634, "grad_norm": 0.30051374435424805, "learning_rate": 1.7729116414969106e-07, "loss": 0.0268, "step": 123270 }, { "epoch": 1.8921034456296524, "grad_norm": 0.4264620542526245, "learning_rate": 1.7678934206471377e-07, "loss": 0.0318, "step": 123280 }, { "epoch": 1.8922569257923414, "grad_norm": 0.29510757327079773, "learning_rate": 1.762882248556863e-07, "loss": 0.0304, "step": 123290 }, { "epoch": 1.8924104059550304, "grad_norm": 0.31632792949676514, "learning_rate": 1.7578781255856435e-07, "loss": 0.0186, "step": 123300 }, { "epoch": 1.8925638861177192, "grad_norm": 0.40611666440963745, "learning_rate": 1.7528810520925699e-07, "loss": 0.0235, "step": 123310 }, { "epoch": 1.8927173662804082, "grad_norm": 0.34683364629745483, "learning_rate": 1.7478910284362106e-07, "loss": 0.0235, "step": 123320 }, { "epoch": 1.8928708464430972, "grad_norm": 0.27724123001098633, "learning_rate": 1.7429080549746347e-07, "loss": 0.0253, "step": 123330 }, { "epoch": 1.8930243266057862, "grad_norm": 0.36565127968788147, "learning_rate": 1.7379321320653896e-07, "loss": 0.0308, "step": 123340 }, { "epoch": 1.8931778067684752, "grad_norm": 0.2441468983888626, "learning_rate": 1.7329632600655345e-07, "loss": 0.0198, "step": 123350 }, { "epoch": 1.893331286931164, "grad_norm": 0.38575857877731323, "learning_rate": 1.728001439331628e-07, "loss": 0.0239, "step": 123360 }, { "epoch": 1.8934847670938533, "grad_norm": 0.2696378827095032, "learning_rate": 1.723046670219697e-07, "loss": 0.0209, "step": 123370 }, { "epoch": 1.893638247256542, "grad_norm": 0.30985933542251587, "learning_rate": 1.7180989530852677e-07, "loss": 0.0249, "step": 123380 }, { "epoch": 1.893791727419231, "grad_norm": 0.4662318527698517, "learning_rate": 1.7131582882833896e-07, "loss": 0.0305, "step": 123390 }, { "epoch": 1.89394520758192, "grad_norm": 0.4263102412223816, "learning_rate": 1.7082246761685796e-07, "loss": 0.0271, "step": 123400 }, { "epoch": 1.8940986877446089, "grad_norm": 0.36096981167793274, "learning_rate": 1.7032981170948538e-07, "loss": 0.024, "step": 123410 }, { "epoch": 1.8942521679072981, "grad_norm": 0.34047234058380127, "learning_rate": 1.6983786114157186e-07, "loss": 0.0278, "step": 123420 }, { "epoch": 1.894405648069987, "grad_norm": 0.29143989086151123, "learning_rate": 1.6934661594841916e-07, "loss": 0.0243, "step": 123430 }, { "epoch": 1.894559128232676, "grad_norm": 0.3083045184612274, "learning_rate": 1.6885607616527577e-07, "loss": 0.0201, "step": 123440 }, { "epoch": 1.894712608395365, "grad_norm": 0.3008401691913605, "learning_rate": 1.6836624182734019e-07, "loss": 0.0276, "step": 123450 }, { "epoch": 1.8948660885580537, "grad_norm": 0.24549823999404907, "learning_rate": 1.678771129697654e-07, "loss": 0.0224, "step": 123460 }, { "epoch": 1.895019568720743, "grad_norm": 0.2980325520038605, "learning_rate": 1.6738868962764443e-07, "loss": 0.0252, "step": 123470 }, { "epoch": 1.8951730488834317, "grad_norm": 0.3586727976799011, "learning_rate": 1.6690097183602593e-07, "loss": 0.0289, "step": 123480 }, { "epoch": 1.8953265290461208, "grad_norm": 0.3523592948913574, "learning_rate": 1.6641395962990858e-07, "loss": 0.0346, "step": 123490 }, { "epoch": 1.8954800092088098, "grad_norm": 0.2776118218898773, "learning_rate": 1.6592765304423663e-07, "loss": 0.0294, "step": 123500 }, { "epoch": 1.8956334893714988, "grad_norm": 0.33445465564727783, "learning_rate": 1.6544205211390662e-07, "loss": 0.0295, "step": 123510 }, { "epoch": 1.8957869695341878, "grad_norm": 0.38761019706726074, "learning_rate": 1.649571568737629e-07, "loss": 0.0194, "step": 123520 }, { "epoch": 1.8959404496968766, "grad_norm": 0.3181195557117462, "learning_rate": 1.6447296735859874e-07, "loss": 0.0234, "step": 123530 }, { "epoch": 1.8960939298595656, "grad_norm": 0.44062843918800354, "learning_rate": 1.639894836031597e-07, "loss": 0.0232, "step": 123540 }, { "epoch": 1.8962474100222546, "grad_norm": 0.33008846640586853, "learning_rate": 1.63506705642138e-07, "loss": 0.0303, "step": 123550 }, { "epoch": 1.8964008901849436, "grad_norm": 0.23737430572509766, "learning_rate": 1.6302463351017595e-07, "loss": 0.0176, "step": 123560 }, { "epoch": 1.8965543703476326, "grad_norm": 0.29834863543510437, "learning_rate": 1.6254326724186366e-07, "loss": 0.0219, "step": 123570 }, { "epoch": 1.8967078505103214, "grad_norm": 0.36792680621147156, "learning_rate": 1.6206260687174456e-07, "loss": 0.0241, "step": 123580 }, { "epoch": 1.8968613306730107, "grad_norm": 0.1855546087026596, "learning_rate": 1.615826524343078e-07, "loss": 0.0211, "step": 123590 }, { "epoch": 1.8970148108356994, "grad_norm": 0.35465970635414124, "learning_rate": 1.611034039639925e-07, "loss": 0.0195, "step": 123600 }, { "epoch": 1.8971682909983885, "grad_norm": 0.3901382386684418, "learning_rate": 1.6062486149518885e-07, "loss": 0.0225, "step": 123610 }, { "epoch": 1.8973217711610775, "grad_norm": 0.4550628960132599, "learning_rate": 1.60147025062235e-07, "loss": 0.023, "step": 123620 }, { "epoch": 1.8974752513237663, "grad_norm": 0.30670225620269775, "learning_rate": 1.5966989469941796e-07, "loss": 0.023, "step": 123630 }, { "epoch": 1.8976287314864555, "grad_norm": 0.23975254595279694, "learning_rate": 1.5919347044097477e-07, "loss": 0.018, "step": 123640 }, { "epoch": 1.8977822116491443, "grad_norm": 0.3107439875602722, "learning_rate": 1.5871775232109254e-07, "loss": 0.0236, "step": 123650 }, { "epoch": 1.8979356918118333, "grad_norm": 0.3410751521587372, "learning_rate": 1.582427403739073e-07, "loss": 0.0234, "step": 123660 }, { "epoch": 1.8980891719745223, "grad_norm": 0.39736345410346985, "learning_rate": 1.5776843463350288e-07, "loss": 0.0248, "step": 123670 }, { "epoch": 1.898242652137211, "grad_norm": 0.38193273544311523, "learning_rate": 1.572948351339143e-07, "loss": 0.0268, "step": 123680 }, { "epoch": 1.8983961322999003, "grad_norm": 0.24525552988052368, "learning_rate": 1.5682194190912546e-07, "loss": 0.0245, "step": 123690 }, { "epoch": 1.8985496124625891, "grad_norm": 0.38737550377845764, "learning_rate": 1.5634975499306926e-07, "loss": 0.0227, "step": 123700 }, { "epoch": 1.8987030926252781, "grad_norm": 0.29860812425613403, "learning_rate": 1.5587827441962743e-07, "loss": 0.0214, "step": 123710 }, { "epoch": 1.8988565727879672, "grad_norm": 0.4125438332557678, "learning_rate": 1.5540750022263183e-07, "loss": 0.0224, "step": 123720 }, { "epoch": 1.8990100529506562, "grad_norm": 0.24599355459213257, "learning_rate": 1.5493743243586323e-07, "loss": 0.0217, "step": 123730 }, { "epoch": 1.8991635331133452, "grad_norm": 0.2088795006275177, "learning_rate": 1.544680710930524e-07, "loss": 0.024, "step": 123740 }, { "epoch": 1.899317013276034, "grad_norm": 0.37599265575408936, "learning_rate": 1.5399941622787905e-07, "loss": 0.0201, "step": 123750 }, { "epoch": 1.8994704934387232, "grad_norm": 0.42314374446868896, "learning_rate": 1.53531467873973e-07, "loss": 0.0224, "step": 123760 }, { "epoch": 1.899623973601412, "grad_norm": 0.3614158034324646, "learning_rate": 1.5306422606490956e-07, "loss": 0.0267, "step": 123770 }, { "epoch": 1.899777453764101, "grad_norm": 0.3313369154930115, "learning_rate": 1.525976908342186e-07, "loss": 0.0223, "step": 123780 }, { "epoch": 1.89993093392679, "grad_norm": 0.29706019163131714, "learning_rate": 1.5213186221537557e-07, "loss": 0.0222, "step": 123790 }, { "epoch": 1.9000844140894788, "grad_norm": 0.7893766164779663, "learning_rate": 1.5166674024180706e-07, "loss": 0.022, "step": 123800 }, { "epoch": 1.900237894252168, "grad_norm": 0.3137873709201813, "learning_rate": 1.5120232494688968e-07, "loss": 0.0239, "step": 123810 }, { "epoch": 1.9003913744148568, "grad_norm": 0.3970996141433716, "learning_rate": 1.507386163639457e-07, "loss": 0.032, "step": 123820 }, { "epoch": 1.9005448545775459, "grad_norm": 0.38222625851631165, "learning_rate": 1.5027561452625074e-07, "loss": 0.0226, "step": 123830 }, { "epoch": 1.9006983347402349, "grad_norm": 0.3179643750190735, "learning_rate": 1.4981331946702703e-07, "loss": 0.0192, "step": 123840 }, { "epoch": 1.9008518149029237, "grad_norm": 0.4395809471607208, "learning_rate": 1.4935173121944925e-07, "loss": 0.0252, "step": 123850 }, { "epoch": 1.9010052950656129, "grad_norm": 0.4388952851295471, "learning_rate": 1.4889084981663636e-07, "loss": 0.0227, "step": 123860 }, { "epoch": 1.9011587752283017, "grad_norm": 0.2455756664276123, "learning_rate": 1.484306752916609e-07, "loss": 0.0219, "step": 123870 }, { "epoch": 1.9013122553909907, "grad_norm": 0.2913293242454529, "learning_rate": 1.4797120767754304e-07, "loss": 0.0249, "step": 123880 }, { "epoch": 1.9014657355536797, "grad_norm": 0.3334351181983948, "learning_rate": 1.47512447007252e-07, "loss": 0.0237, "step": 123890 }, { "epoch": 1.9016192157163685, "grad_norm": 0.3982160985469818, "learning_rate": 1.4705439331370697e-07, "loss": 0.0329, "step": 123900 }, { "epoch": 1.9017726958790577, "grad_norm": 0.2967924177646637, "learning_rate": 1.4659704662977837e-07, "loss": 0.0227, "step": 123910 }, { "epoch": 1.9019261760417465, "grad_norm": 0.33037203550338745, "learning_rate": 1.4614040698827993e-07, "loss": 0.0275, "step": 123920 }, { "epoch": 1.9020796562044355, "grad_norm": 0.39713019132614136, "learning_rate": 1.4568447442197876e-07, "loss": 0.0254, "step": 123930 }, { "epoch": 1.9022331363671245, "grad_norm": 0.2939053177833557, "learning_rate": 1.4522924896359313e-07, "loss": 0.0254, "step": 123940 }, { "epoch": 1.9023866165298136, "grad_norm": 0.34729841351509094, "learning_rate": 1.4477473064578805e-07, "loss": 0.0236, "step": 123950 }, { "epoch": 1.9025400966925026, "grad_norm": 0.41996100544929504, "learning_rate": 1.4432091950117633e-07, "loss": 0.0233, "step": 123960 }, { "epoch": 1.9026935768551914, "grad_norm": 0.32697567343711853, "learning_rate": 1.4386781556232187e-07, "loss": 0.0248, "step": 123970 }, { "epoch": 1.9028470570178806, "grad_norm": 0.25919637084007263, "learning_rate": 1.4341541886173982e-07, "loss": 0.0244, "step": 123980 }, { "epoch": 1.9030005371805694, "grad_norm": 0.246768981218338, "learning_rate": 1.4296372943188974e-07, "loss": 0.0202, "step": 123990 }, { "epoch": 1.9031540173432584, "grad_norm": 0.38684162497520447, "learning_rate": 1.425127473051835e-07, "loss": 0.0277, "step": 124000 }, { "epoch": 1.9033074975059474, "grad_norm": 0.32701408863067627, "learning_rate": 1.420624725139852e-07, "loss": 0.0195, "step": 124010 }, { "epoch": 1.9034609776686362, "grad_norm": 0.2706657648086548, "learning_rate": 1.4161290509060122e-07, "loss": 0.0166, "step": 124020 }, { "epoch": 1.9036144578313254, "grad_norm": 0.526533842086792, "learning_rate": 1.4116404506729021e-07, "loss": 0.0305, "step": 124030 }, { "epoch": 1.9037679379940142, "grad_norm": 0.44144290685653687, "learning_rate": 1.4071589247626416e-07, "loss": 0.0252, "step": 124040 }, { "epoch": 1.9039214181567032, "grad_norm": 0.3614506125450134, "learning_rate": 1.4026844734967847e-07, "loss": 0.0261, "step": 124050 }, { "epoch": 1.9040748983193923, "grad_norm": 0.556408166885376, "learning_rate": 1.3982170971964083e-07, "loss": 0.0178, "step": 124060 }, { "epoch": 1.904228378482081, "grad_norm": 0.30293795466423035, "learning_rate": 1.3937567961820664e-07, "loss": 0.0229, "step": 124070 }, { "epoch": 1.9043818586447703, "grad_norm": 0.3482738137245178, "learning_rate": 1.3893035707738256e-07, "loss": 0.0235, "step": 124080 }, { "epoch": 1.904535338807459, "grad_norm": 0.4332679510116577, "learning_rate": 1.384857421291208e-07, "loss": 0.0211, "step": 124090 }, { "epoch": 1.904688818970148, "grad_norm": 0.49036601185798645, "learning_rate": 1.3804183480532806e-07, "loss": 0.0292, "step": 124100 }, { "epoch": 1.904842299132837, "grad_norm": 0.41756099462509155, "learning_rate": 1.3759863513785775e-07, "loss": 0.0307, "step": 124110 }, { "epoch": 1.904995779295526, "grad_norm": 0.4329031705856323, "learning_rate": 1.371561431585089e-07, "loss": 0.0236, "step": 124120 }, { "epoch": 1.9051492594582151, "grad_norm": 0.3008805811405182, "learning_rate": 1.3671435889903494e-07, "loss": 0.0206, "step": 124130 }, { "epoch": 1.905302739620904, "grad_norm": 0.3573925793170929, "learning_rate": 1.3627328239113725e-07, "loss": 0.0265, "step": 124140 }, { "epoch": 1.905456219783593, "grad_norm": 0.3801079988479614, "learning_rate": 1.3583291366646379e-07, "loss": 0.0187, "step": 124150 }, { "epoch": 1.905609699946282, "grad_norm": 0.3564003109931946, "learning_rate": 1.3539325275661598e-07, "loss": 0.026, "step": 124160 }, { "epoch": 1.905763180108971, "grad_norm": 0.3842869699001312, "learning_rate": 1.349542996931419e-07, "loss": 0.0332, "step": 124170 }, { "epoch": 1.90591666027166, "grad_norm": 0.18810375034809113, "learning_rate": 1.3451605450753745e-07, "loss": 0.0226, "step": 124180 }, { "epoch": 1.9060701404343487, "grad_norm": 0.33918747305870056, "learning_rate": 1.3407851723124975e-07, "loss": 0.0234, "step": 124190 }, { "epoch": 1.906223620597038, "grad_norm": 0.3772743046283722, "learning_rate": 1.33641687895677e-07, "loss": 0.0225, "step": 124200 }, { "epoch": 1.9063771007597268, "grad_norm": 0.24193336069583893, "learning_rate": 1.3320556653216298e-07, "loss": 0.0233, "step": 124210 }, { "epoch": 1.9065305809224158, "grad_norm": 0.42404434084892273, "learning_rate": 1.3277015317200049e-07, "loss": 0.0302, "step": 124220 }, { "epoch": 1.9066840610851048, "grad_norm": 0.28926923871040344, "learning_rate": 1.3233544784643558e-07, "loss": 0.0238, "step": 124230 }, { "epoch": 1.9068375412477936, "grad_norm": 0.4359023869037628, "learning_rate": 1.3190145058666005e-07, "loss": 0.0241, "step": 124240 }, { "epoch": 1.9069910214104828, "grad_norm": 0.4283484220504761, "learning_rate": 1.3146816142381558e-07, "loss": 0.0219, "step": 124250 }, { "epoch": 1.9071445015731716, "grad_norm": 0.3071739673614502, "learning_rate": 1.31035580388994e-07, "loss": 0.0254, "step": 124260 }, { "epoch": 1.9072979817358606, "grad_norm": 0.376162052154541, "learning_rate": 1.306037075132349e-07, "loss": 0.0257, "step": 124270 }, { "epoch": 1.9074514618985496, "grad_norm": 0.3268781900405884, "learning_rate": 1.301725428275291e-07, "loss": 0.0238, "step": 124280 }, { "epoch": 1.9076049420612384, "grad_norm": 0.34972766041755676, "learning_rate": 1.2974208636281292e-07, "loss": 0.0241, "step": 124290 }, { "epoch": 1.9077584222239277, "grad_norm": 0.373557984828949, "learning_rate": 1.2931233814997724e-07, "loss": 0.0297, "step": 124300 }, { "epoch": 1.9079119023866165, "grad_norm": 0.281118243932724, "learning_rate": 1.2888329821985846e-07, "loss": 0.0206, "step": 124310 }, { "epoch": 1.9080653825493055, "grad_norm": 0.3438396453857422, "learning_rate": 1.2845496660323974e-07, "loss": 0.0232, "step": 124320 }, { "epoch": 1.9082188627119945, "grad_norm": 0.37966081500053406, "learning_rate": 1.2802734333086098e-07, "loss": 0.022, "step": 124330 }, { "epoch": 1.9083723428746835, "grad_norm": 0.5433283448219299, "learning_rate": 1.2760042843340314e-07, "loss": 0.0266, "step": 124340 }, { "epoch": 1.9085258230373725, "grad_norm": 0.2775411605834961, "learning_rate": 1.2717422194150174e-07, "loss": 0.0238, "step": 124350 }, { "epoch": 1.9086793032000613, "grad_norm": 0.40099096298217773, "learning_rate": 1.267487238857412e-07, "loss": 0.0211, "step": 124360 }, { "epoch": 1.9088327833627505, "grad_norm": 0.28778958320617676, "learning_rate": 1.2632393429665157e-07, "loss": 0.0262, "step": 124370 }, { "epoch": 1.9089862635254393, "grad_norm": 0.44400349259376526, "learning_rate": 1.2589985320471289e-07, "loss": 0.022, "step": 124380 }, { "epoch": 1.9091397436881283, "grad_norm": 0.30029791593551636, "learning_rate": 1.2547648064035855e-07, "loss": 0.0256, "step": 124390 }, { "epoch": 1.9092932238508173, "grad_norm": 0.3364683985710144, "learning_rate": 1.2505381663396654e-07, "loss": 0.0229, "step": 124400 }, { "epoch": 1.9094467040135061, "grad_norm": 0.29898855090141296, "learning_rate": 1.246318612158659e-07, "loss": 0.0292, "step": 124410 }, { "epoch": 1.9096001841761954, "grad_norm": 0.3697163760662079, "learning_rate": 1.2421061441633464e-07, "loss": 0.0272, "step": 124420 }, { "epoch": 1.9097536643388842, "grad_norm": 0.23121294379234314, "learning_rate": 1.2379007626559968e-07, "loss": 0.0265, "step": 124430 }, { "epoch": 1.9099071445015732, "grad_norm": 0.4092789590358734, "learning_rate": 1.2337024679383691e-07, "loss": 0.0279, "step": 124440 }, { "epoch": 1.9100606246642622, "grad_norm": 0.3570510745048523, "learning_rate": 1.229511260311722e-07, "loss": 0.0225, "step": 124450 }, { "epoch": 1.910214104826951, "grad_norm": 0.23209233582019806, "learning_rate": 1.2253271400768042e-07, "loss": 0.0191, "step": 124460 }, { "epoch": 1.9103675849896402, "grad_norm": 0.3620466887950897, "learning_rate": 1.2211501075338418e-07, "loss": 0.0212, "step": 124470 }, { "epoch": 1.910521065152329, "grad_norm": 0.2876073718070984, "learning_rate": 1.2169801629825617e-07, "loss": 0.0203, "step": 124480 }, { "epoch": 1.910674545315018, "grad_norm": 0.44231465458869934, "learning_rate": 1.2128173067221915e-07, "loss": 0.0216, "step": 124490 }, { "epoch": 1.910828025477707, "grad_norm": 0.2397557497024536, "learning_rate": 1.2086615390514477e-07, "loss": 0.0186, "step": 124500 }, { "epoch": 1.9109815056403958, "grad_norm": 0.31512051820755005, "learning_rate": 1.2045128602685142e-07, "loss": 0.0221, "step": 124510 }, { "epoch": 1.911134985803085, "grad_norm": 0.39519113302230835, "learning_rate": 1.2003712706711078e-07, "loss": 0.0194, "step": 124520 }, { "epoch": 1.9112884659657738, "grad_norm": 0.3659255802631378, "learning_rate": 1.1962367705563916e-07, "loss": 0.0273, "step": 124530 }, { "epoch": 1.9114419461284629, "grad_norm": 0.22935201227664948, "learning_rate": 1.1921093602210388e-07, "loss": 0.029, "step": 124540 }, { "epoch": 1.9115954262911519, "grad_norm": 0.2597735822200775, "learning_rate": 1.1879890399612349e-07, "loss": 0.0182, "step": 124550 }, { "epoch": 1.9117489064538409, "grad_norm": 0.34522855281829834, "learning_rate": 1.1838758100726322e-07, "loss": 0.025, "step": 124560 }, { "epoch": 1.91190238661653, "grad_norm": 0.4662873148918152, "learning_rate": 1.1797696708503836e-07, "loss": 0.0211, "step": 124570 }, { "epoch": 1.9120558667792187, "grad_norm": 0.2641022503376007, "learning_rate": 1.175670622589109e-07, "loss": 0.0313, "step": 124580 }, { "epoch": 1.912209346941908, "grad_norm": 0.49786388874053955, "learning_rate": 1.1715786655829731e-07, "loss": 0.0293, "step": 124590 }, { "epoch": 1.9123628271045967, "grad_norm": 0.3565880358219147, "learning_rate": 1.1674938001255742e-07, "loss": 0.0266, "step": 124600 }, { "epoch": 1.9125163072672857, "grad_norm": 0.5018468499183655, "learning_rate": 1.1634160265100335e-07, "loss": 0.0312, "step": 124610 }, { "epoch": 1.9126697874299747, "grad_norm": 0.3038540184497833, "learning_rate": 1.1593453450289504e-07, "loss": 0.0188, "step": 124620 }, { "epoch": 1.9128232675926635, "grad_norm": 0.3382759690284729, "learning_rate": 1.1552817559744467e-07, "loss": 0.0169, "step": 124630 }, { "epoch": 1.9129767477553528, "grad_norm": 0.35336729884147644, "learning_rate": 1.1512252596380668e-07, "loss": 0.0254, "step": 124640 }, { "epoch": 1.9131302279180415, "grad_norm": 0.3430202007293701, "learning_rate": 1.1471758563109337e-07, "loss": 0.0236, "step": 124650 }, { "epoch": 1.9132837080807306, "grad_norm": 0.34082719683647156, "learning_rate": 1.1431335462835924e-07, "loss": 0.0279, "step": 124660 }, { "epoch": 1.9134371882434196, "grad_norm": 0.37721315026283264, "learning_rate": 1.1390983298461111e-07, "loss": 0.0279, "step": 124670 }, { "epoch": 1.9135906684061084, "grad_norm": 0.45181915163993835, "learning_rate": 1.1350702072880359e-07, "loss": 0.0294, "step": 124680 }, { "epoch": 1.9137441485687976, "grad_norm": 0.44222086668014526, "learning_rate": 1.1310491788984135e-07, "loss": 0.0235, "step": 124690 }, { "epoch": 1.9138976287314864, "grad_norm": 0.3318100869655609, "learning_rate": 1.1270352449657685e-07, "loss": 0.0211, "step": 124700 }, { "epoch": 1.9140511088941754, "grad_norm": 0.41956931352615356, "learning_rate": 1.1230284057781483e-07, "loss": 0.0288, "step": 124710 }, { "epoch": 1.9142045890568644, "grad_norm": 0.5429844260215759, "learning_rate": 1.119028661623045e-07, "loss": 0.0301, "step": 124720 }, { "epoch": 1.9143580692195534, "grad_norm": 0.4371183514595032, "learning_rate": 1.1150360127874737e-07, "loss": 0.0231, "step": 124730 }, { "epoch": 1.9145115493822424, "grad_norm": 0.40006572008132935, "learning_rate": 1.1110504595579275e-07, "loss": 0.0249, "step": 124740 }, { "epoch": 1.9146650295449312, "grad_norm": 0.2543032765388489, "learning_rate": 1.1070720022203996e-07, "loss": 0.0241, "step": 124750 }, { "epoch": 1.9148185097076202, "grad_norm": 0.2495075762271881, "learning_rate": 1.1031006410603728e-07, "loss": 0.0232, "step": 124760 }, { "epoch": 1.9149719898703093, "grad_norm": 0.4415033161640167, "learning_rate": 1.0991363763628082e-07, "loss": 0.0268, "step": 124770 }, { "epoch": 1.9151254700329983, "grad_norm": 0.2595331072807312, "learning_rate": 1.095179208412167e-07, "loss": 0.0215, "step": 124780 }, { "epoch": 1.9152789501956873, "grad_norm": 0.3821946680545807, "learning_rate": 1.0912291374924111e-07, "loss": 0.0215, "step": 124790 }, { "epoch": 1.915432430358376, "grad_norm": 0.3554643392562866, "learning_rate": 1.0872861638869691e-07, "loss": 0.0245, "step": 124800 }, { "epoch": 1.9155859105210653, "grad_norm": 0.4098465144634247, "learning_rate": 1.0833502878787816e-07, "loss": 0.027, "step": 124810 }, { "epoch": 1.915739390683754, "grad_norm": 0.42439937591552734, "learning_rate": 1.079421509750267e-07, "loss": 0.0231, "step": 124820 }, { "epoch": 1.915892870846443, "grad_norm": 0.30044952034950256, "learning_rate": 1.075499829783333e-07, "loss": 0.0213, "step": 124830 }, { "epoch": 1.9160463510091321, "grad_norm": 0.49264103174209595, "learning_rate": 1.0715852482593992e-07, "loss": 0.029, "step": 124840 }, { "epoch": 1.916199831171821, "grad_norm": 0.26656270027160645, "learning_rate": 1.0676777654593628e-07, "loss": 0.0237, "step": 124850 }, { "epoch": 1.9163533113345101, "grad_norm": 0.22868561744689941, "learning_rate": 1.0637773816635999e-07, "loss": 0.0282, "step": 124860 }, { "epoch": 1.916506791497199, "grad_norm": 0.25992071628570557, "learning_rate": 1.0598840971519752e-07, "loss": 0.0254, "step": 124870 }, { "epoch": 1.916660271659888, "grad_norm": 0.309478759765625, "learning_rate": 1.0559979122038767e-07, "loss": 0.0248, "step": 124880 }, { "epoch": 1.916813751822577, "grad_norm": 0.3653138279914856, "learning_rate": 1.0521188270981586e-07, "loss": 0.0326, "step": 124890 }, { "epoch": 1.9169672319852658, "grad_norm": 0.24658513069152832, "learning_rate": 1.0482468421131542e-07, "loss": 0.0228, "step": 124900 }, { "epoch": 1.917120712147955, "grad_norm": 0.28571799397468567, "learning_rate": 1.04438195752673e-07, "loss": 0.0239, "step": 124910 }, { "epoch": 1.9172741923106438, "grad_norm": 0.3772113025188446, "learning_rate": 1.0405241736161975e-07, "loss": 0.0249, "step": 124920 }, { "epoch": 1.9174276724733328, "grad_norm": 0.39119940996170044, "learning_rate": 1.0366734906583575e-07, "loss": 0.0198, "step": 124930 }, { "epoch": 1.9175811526360218, "grad_norm": 0.36596769094467163, "learning_rate": 1.0328299089295557e-07, "loss": 0.0225, "step": 124940 }, { "epoch": 1.9177346327987108, "grad_norm": 0.3669349253177643, "learning_rate": 1.0289934287055825e-07, "loss": 0.0277, "step": 124950 }, { "epoch": 1.9178881129613998, "grad_norm": 0.3703598380088806, "learning_rate": 1.0251640502617177e-07, "loss": 0.0313, "step": 124960 }, { "epoch": 1.9180415931240886, "grad_norm": 0.2653179168701172, "learning_rate": 1.0213417738727527e-07, "loss": 0.0243, "step": 124970 }, { "epoch": 1.9181950732867776, "grad_norm": 0.30966615676879883, "learning_rate": 1.0175265998129568e-07, "loss": 0.022, "step": 124980 }, { "epoch": 1.9183485534494666, "grad_norm": 0.274251252412796, "learning_rate": 1.013718528356089e-07, "loss": 0.0259, "step": 124990 }, { "epoch": 1.9185020336121557, "grad_norm": 0.3277386724948883, "learning_rate": 1.0099175597753974e-07, "loss": 0.0156, "step": 125000 }, { "epoch": 1.9186555137748447, "grad_norm": 0.3605925142765045, "learning_rate": 1.0061236943436415e-07, "loss": 0.0225, "step": 125010 }, { "epoch": 1.9188089939375335, "grad_norm": 0.33761170506477356, "learning_rate": 1.0023369323330368e-07, "loss": 0.0249, "step": 125020 }, { "epoch": 1.9189624741002227, "grad_norm": 0.30928200483322144, "learning_rate": 9.985572740153215e-08, "loss": 0.0288, "step": 125030 }, { "epoch": 1.9191159542629115, "grad_norm": 0.3987363278865814, "learning_rate": 9.94784719661701e-08, "loss": 0.0258, "step": 125040 }, { "epoch": 1.9192694344256005, "grad_norm": 0.3020690679550171, "learning_rate": 9.91019269542881e-08, "loss": 0.0219, "step": 125050 }, { "epoch": 1.9194229145882895, "grad_norm": 0.35167989134788513, "learning_rate": 9.872609239290565e-08, "loss": 0.0249, "step": 125060 }, { "epoch": 1.9195763947509783, "grad_norm": 0.45403963327407837, "learning_rate": 9.835096830899116e-08, "loss": 0.0268, "step": 125070 }, { "epoch": 1.9197298749136675, "grad_norm": 0.2852649986743927, "learning_rate": 9.7976554729462e-08, "loss": 0.0232, "step": 125080 }, { "epoch": 1.9198833550763563, "grad_norm": 0.3177233636379242, "learning_rate": 9.760285168118556e-08, "loss": 0.0269, "step": 125090 }, { "epoch": 1.9200368352390453, "grad_norm": 0.33117419481277466, "learning_rate": 9.722985919097482e-08, "loss": 0.02, "step": 125100 }, { "epoch": 1.9201903154017343, "grad_norm": 0.4788026511669159, "learning_rate": 9.685757728559731e-08, "loss": 0.0221, "step": 125110 }, { "epoch": 1.9203437955644231, "grad_norm": 0.42302390933036804, "learning_rate": 9.648600599176494e-08, "loss": 0.0256, "step": 125120 }, { "epoch": 1.9204972757271124, "grad_norm": 0.2874342203140259, "learning_rate": 9.611514533614086e-08, "loss": 0.0157, "step": 125130 }, { "epoch": 1.9206507558898012, "grad_norm": 0.3262382745742798, "learning_rate": 9.574499534533599e-08, "loss": 0.0253, "step": 125140 }, { "epoch": 1.9208042360524902, "grad_norm": 0.44783151149749756, "learning_rate": 9.53755560459102e-08, "loss": 0.0239, "step": 125150 }, { "epoch": 1.9209577162151792, "grad_norm": 0.3278818130493164, "learning_rate": 9.500682746437562e-08, "loss": 0.0245, "step": 125160 }, { "epoch": 1.9211111963778682, "grad_norm": 0.4025818407535553, "learning_rate": 9.463880962718885e-08, "loss": 0.022, "step": 125170 }, { "epoch": 1.9212646765405572, "grad_norm": 0.31866711378097534, "learning_rate": 9.427150256075657e-08, "loss": 0.0351, "step": 125180 }, { "epoch": 1.921418156703246, "grad_norm": 0.20183129608631134, "learning_rate": 9.390490629143767e-08, "loss": 0.0251, "step": 125190 }, { "epoch": 1.9215716368659352, "grad_norm": 0.263709157705307, "learning_rate": 9.353902084553557e-08, "loss": 0.0271, "step": 125200 }, { "epoch": 1.921725117028624, "grad_norm": 0.3808521330356598, "learning_rate": 9.317384624930703e-08, "loss": 0.0233, "step": 125210 }, { "epoch": 1.921878597191313, "grad_norm": 0.4287232458591461, "learning_rate": 9.280938252895333e-08, "loss": 0.0229, "step": 125220 }, { "epoch": 1.922032077354002, "grad_norm": 0.4392538368701935, "learning_rate": 9.244562971062909e-08, "loss": 0.0229, "step": 125230 }, { "epoch": 1.9221855575166908, "grad_norm": 0.34856072068214417, "learning_rate": 9.208258782043455e-08, "loss": 0.0328, "step": 125240 }, { "epoch": 1.92233903767938, "grad_norm": 0.40811797976493835, "learning_rate": 9.172025688442e-08, "loss": 0.0259, "step": 125250 }, { "epoch": 1.9224925178420689, "grad_norm": 0.36699312925338745, "learning_rate": 9.13586369285846e-08, "loss": 0.0245, "step": 125260 }, { "epoch": 1.9226459980047579, "grad_norm": 0.38642382621765137, "learning_rate": 9.099772797887874e-08, "loss": 0.0213, "step": 125270 }, { "epoch": 1.922799478167447, "grad_norm": 0.4413800835609436, "learning_rate": 9.063753006119835e-08, "loss": 0.0253, "step": 125280 }, { "epoch": 1.9229529583301357, "grad_norm": 0.3006323575973511, "learning_rate": 9.027804320138945e-08, "loss": 0.0267, "step": 125290 }, { "epoch": 1.923106438492825, "grad_norm": 0.2170177400112152, "learning_rate": 8.991926742524804e-08, "loss": 0.0171, "step": 125300 }, { "epoch": 1.9232599186555137, "grad_norm": 0.3202025294303894, "learning_rate": 8.956120275852021e-08, "loss": 0.0246, "step": 125310 }, { "epoch": 1.9234133988182027, "grad_norm": 0.3973084092140198, "learning_rate": 8.92038492268954e-08, "loss": 0.0195, "step": 125320 }, { "epoch": 1.9235668789808917, "grad_norm": 0.3110712170600891, "learning_rate": 8.884720685601978e-08, "loss": 0.0239, "step": 125330 }, { "epoch": 1.9237203591435805, "grad_norm": 0.23827265202999115, "learning_rate": 8.849127567148285e-08, "loss": 0.0194, "step": 125340 }, { "epoch": 1.9238738393062698, "grad_norm": 0.48496687412261963, "learning_rate": 8.813605569882533e-08, "loss": 0.024, "step": 125350 }, { "epoch": 1.9240273194689586, "grad_norm": 0.24720081686973572, "learning_rate": 8.778154696353569e-08, "loss": 0.0235, "step": 125360 }, { "epoch": 1.9241807996316476, "grad_norm": 0.35633358359336853, "learning_rate": 8.742774949105248e-08, "loss": 0.0213, "step": 125370 }, { "epoch": 1.9243342797943366, "grad_norm": 0.3798258304595947, "learning_rate": 8.707466330676428e-08, "loss": 0.0308, "step": 125380 }, { "epoch": 1.9244877599570256, "grad_norm": 0.4012235701084137, "learning_rate": 8.672228843600416e-08, "loss": 0.0195, "step": 125390 }, { "epoch": 1.9246412401197146, "grad_norm": 0.44007495045661926, "learning_rate": 8.637062490405968e-08, "loss": 0.0337, "step": 125400 }, { "epoch": 1.9247947202824034, "grad_norm": 0.5056999325752258, "learning_rate": 8.601967273616507e-08, "loss": 0.0275, "step": 125410 }, { "epoch": 1.9249482004450926, "grad_norm": 0.2848358452320099, "learning_rate": 8.566943195750133e-08, "loss": 0.0239, "step": 125420 }, { "epoch": 1.9251016806077814, "grad_norm": 0.3554970324039459, "learning_rate": 8.531990259320166e-08, "loss": 0.0214, "step": 125430 }, { "epoch": 1.9252551607704704, "grad_norm": 0.2558789551258087, "learning_rate": 8.497108466834603e-08, "loss": 0.0201, "step": 125440 }, { "epoch": 1.9254086409331594, "grad_norm": 0.4839095175266266, "learning_rate": 8.46229782079655e-08, "loss": 0.0208, "step": 125450 }, { "epoch": 1.9255621210958482, "grad_norm": 0.27990034222602844, "learning_rate": 8.4275583237039e-08, "loss": 0.0218, "step": 125460 }, { "epoch": 1.9257156012585375, "grad_norm": 0.2573661506175995, "learning_rate": 8.392889978049323e-08, "loss": 0.0209, "step": 125470 }, { "epoch": 1.9258690814212263, "grad_norm": 0.38667386770248413, "learning_rate": 8.358292786320499e-08, "loss": 0.0251, "step": 125480 }, { "epoch": 1.9260225615839153, "grad_norm": 0.3844069242477417, "learning_rate": 8.323766751000107e-08, "loss": 0.0239, "step": 125490 }, { "epoch": 1.9261760417466043, "grad_norm": 0.34540387988090515, "learning_rate": 8.289311874565387e-08, "loss": 0.0231, "step": 125500 }, { "epoch": 1.926329521909293, "grad_norm": 0.35110384225845337, "learning_rate": 8.254928159488918e-08, "loss": 0.0241, "step": 125510 }, { "epoch": 1.9264830020719823, "grad_norm": 0.35635703802108765, "learning_rate": 8.220615608237725e-08, "loss": 0.025, "step": 125520 }, { "epoch": 1.926636482234671, "grad_norm": 0.32317137718200684, "learning_rate": 8.186374223274173e-08, "loss": 0.0192, "step": 125530 }, { "epoch": 1.9267899623973601, "grad_norm": 0.2754032611846924, "learning_rate": 8.152204007055076e-08, "loss": 0.018, "step": 125540 }, { "epoch": 1.9269434425600491, "grad_norm": 0.2902849316596985, "learning_rate": 8.11810496203247e-08, "loss": 0.0192, "step": 125550 }, { "epoch": 1.9270969227227381, "grad_norm": 0.37625908851623535, "learning_rate": 8.084077090653286e-08, "loss": 0.0263, "step": 125560 }, { "epoch": 1.9272504028854271, "grad_norm": 0.31887081265449524, "learning_rate": 8.050120395359018e-08, "loss": 0.0191, "step": 125570 }, { "epoch": 1.927403883048116, "grad_norm": 0.41851192712783813, "learning_rate": 8.016234878586382e-08, "loss": 0.0207, "step": 125580 }, { "epoch": 1.927557363210805, "grad_norm": 0.38771066069602966, "learning_rate": 7.982420542766767e-08, "loss": 0.0321, "step": 125590 }, { "epoch": 1.927710843373494, "grad_norm": 0.3532368242740631, "learning_rate": 7.948677390326786e-08, "loss": 0.0296, "step": 125600 }, { "epoch": 1.927864323536183, "grad_norm": 0.41367635130882263, "learning_rate": 7.915005423687505e-08, "loss": 0.0298, "step": 125610 }, { "epoch": 1.928017803698872, "grad_norm": 0.43388330936431885, "learning_rate": 7.881404645265101e-08, "loss": 0.0176, "step": 125620 }, { "epoch": 1.9281712838615608, "grad_norm": 0.2939906418323517, "learning_rate": 7.847875057470756e-08, "loss": 0.0227, "step": 125630 }, { "epoch": 1.92832476402425, "grad_norm": 0.3530113101005554, "learning_rate": 7.814416662710433e-08, "loss": 0.0206, "step": 125640 }, { "epoch": 1.9284782441869388, "grad_norm": 0.48210784792900085, "learning_rate": 7.781029463384881e-08, "loss": 0.0274, "step": 125650 }, { "epoch": 1.9286317243496278, "grad_norm": 0.4723561704158783, "learning_rate": 7.74771346188985e-08, "loss": 0.0254, "step": 125660 }, { "epoch": 1.9287852045123168, "grad_norm": 0.3682475984096527, "learning_rate": 7.714468660615981e-08, "loss": 0.021, "step": 125670 }, { "epoch": 1.9289386846750056, "grad_norm": 0.2993721067905426, "learning_rate": 7.681295061948813e-08, "loss": 0.0192, "step": 125680 }, { "epoch": 1.9290921648376949, "grad_norm": 0.428314208984375, "learning_rate": 7.648192668268773e-08, "loss": 0.0252, "step": 125690 }, { "epoch": 1.9292456450003836, "grad_norm": 0.38319677114486694, "learning_rate": 7.615161481951184e-08, "loss": 0.0217, "step": 125700 }, { "epoch": 1.9293991251630727, "grad_norm": 0.32353970408439636, "learning_rate": 7.582201505366037e-08, "loss": 0.0198, "step": 125710 }, { "epoch": 1.9295526053257617, "grad_norm": 0.40368586778640747, "learning_rate": 7.549312740878667e-08, "loss": 0.019, "step": 125720 }, { "epoch": 1.9297060854884505, "grad_norm": 0.32552018761634827, "learning_rate": 7.51649519084896e-08, "loss": 0.024, "step": 125730 }, { "epoch": 1.9298595656511397, "grad_norm": 0.3702879846096039, "learning_rate": 7.4837488576317e-08, "loss": 0.0179, "step": 125740 }, { "epoch": 1.9300130458138285, "grad_norm": 0.3978080153465271, "learning_rate": 7.451073743576786e-08, "loss": 0.0255, "step": 125750 }, { "epoch": 1.9301665259765175, "grad_norm": 0.298443466424942, "learning_rate": 7.418469851028676e-08, "loss": 0.0275, "step": 125760 }, { "epoch": 1.9303200061392065, "grad_norm": 0.38100045919418335, "learning_rate": 7.385937182327052e-08, "loss": 0.0248, "step": 125770 }, { "epoch": 1.9304734863018955, "grad_norm": 0.48574575781822205, "learning_rate": 7.35347573980627e-08, "loss": 0.0207, "step": 125780 }, { "epoch": 1.9306269664645845, "grad_norm": 0.3402758836746216, "learning_rate": 7.32108552579558e-08, "loss": 0.0194, "step": 125790 }, { "epoch": 1.9307804466272733, "grad_norm": 0.33749493956565857, "learning_rate": 7.288766542619342e-08, "loss": 0.0242, "step": 125800 }, { "epoch": 1.9309339267899626, "grad_norm": 0.4131222367286682, "learning_rate": 7.256518792596479e-08, "loss": 0.0242, "step": 125810 }, { "epoch": 1.9310874069526514, "grad_norm": 0.6136962175369263, "learning_rate": 7.224342278041141e-08, "loss": 0.021, "step": 125820 }, { "epoch": 1.9312408871153404, "grad_norm": 0.344979852437973, "learning_rate": 7.192237001262037e-08, "loss": 0.0245, "step": 125830 }, { "epoch": 1.9313943672780294, "grad_norm": 0.4313060939311981, "learning_rate": 7.16020296456288e-08, "loss": 0.021, "step": 125840 }, { "epoch": 1.9315478474407182, "grad_norm": 0.4160812795162201, "learning_rate": 7.128240170242496e-08, "loss": 0.0236, "step": 125850 }, { "epoch": 1.9317013276034074, "grad_norm": 0.40485766530036926, "learning_rate": 7.096348620594385e-08, "loss": 0.0272, "step": 125860 }, { "epoch": 1.9318548077660962, "grad_norm": 0.39992836117744446, "learning_rate": 7.064528317906716e-08, "loss": 0.0229, "step": 125870 }, { "epoch": 1.9320082879287852, "grad_norm": 0.38701578974723816, "learning_rate": 7.032779264462997e-08, "loss": 0.0261, "step": 125880 }, { "epoch": 1.9321617680914742, "grad_norm": 0.2723395824432373, "learning_rate": 7.001101462541516e-08, "loss": 0.0163, "step": 125890 }, { "epoch": 1.932315248254163, "grad_norm": 0.37168967723846436, "learning_rate": 6.969494914415121e-08, "loss": 0.0217, "step": 125900 }, { "epoch": 1.9324687284168522, "grad_norm": 0.3566652536392212, "learning_rate": 6.937959622351886e-08, "loss": 0.0293, "step": 125910 }, { "epoch": 1.932622208579541, "grad_norm": 0.3221128582954407, "learning_rate": 6.90649558861467e-08, "loss": 0.0327, "step": 125920 }, { "epoch": 1.93277568874223, "grad_norm": 0.23802384734153748, "learning_rate": 6.87510281546122e-08, "loss": 0.0258, "step": 125930 }, { "epoch": 1.932929168904919, "grad_norm": 0.404514342546463, "learning_rate": 6.84378130514407e-08, "loss": 0.0279, "step": 125940 }, { "epoch": 1.9330826490676078, "grad_norm": 0.34719768166542053, "learning_rate": 6.812531059910865e-08, "loss": 0.024, "step": 125950 }, { "epoch": 1.933236129230297, "grad_norm": 0.3425462543964386, "learning_rate": 6.781352082003922e-08, "loss": 0.0195, "step": 125960 }, { "epoch": 1.9333896093929859, "grad_norm": 0.28574663400650024, "learning_rate": 6.750244373660564e-08, "loss": 0.0188, "step": 125970 }, { "epoch": 1.9335430895556749, "grad_norm": 0.33448147773742676, "learning_rate": 6.719207937112892e-08, "loss": 0.0253, "step": 125980 }, { "epoch": 1.933696569718364, "grad_norm": 0.3491259217262268, "learning_rate": 6.688242774588016e-08, "loss": 0.0265, "step": 125990 }, { "epoch": 1.933850049881053, "grad_norm": 0.4623439311981201, "learning_rate": 6.657348888307935e-08, "loss": 0.0266, "step": 126000 }, { "epoch": 1.934003530043742, "grad_norm": 0.3286382853984833, "learning_rate": 6.62652628048932e-08, "loss": 0.022, "step": 126010 }, { "epoch": 1.9341570102064307, "grad_norm": 0.37370622158050537, "learning_rate": 6.595774953343958e-08, "loss": 0.0243, "step": 126020 }, { "epoch": 1.93431049036912, "grad_norm": 0.27656009793281555, "learning_rate": 6.565094909078529e-08, "loss": 0.0213, "step": 126030 }, { "epoch": 1.9344639705318087, "grad_norm": 0.3597441017627716, "learning_rate": 6.534486149894381e-08, "loss": 0.0213, "step": 126040 }, { "epoch": 1.9346174506944978, "grad_norm": 0.29664602875709534, "learning_rate": 6.503948677988092e-08, "loss": 0.0228, "step": 126050 }, { "epoch": 1.9347709308571868, "grad_norm": 0.38464951515197754, "learning_rate": 6.473482495550576e-08, "loss": 0.0263, "step": 126060 }, { "epoch": 1.9349244110198756, "grad_norm": 0.4098140299320221, "learning_rate": 6.443087604768306e-08, "loss": 0.033, "step": 126070 }, { "epoch": 1.9350778911825648, "grad_norm": 0.437875896692276, "learning_rate": 6.412764007822092e-08, "loss": 0.0278, "step": 126080 }, { "epoch": 1.9352313713452536, "grad_norm": 0.29502832889556885, "learning_rate": 6.382511706887973e-08, "loss": 0.0251, "step": 126090 }, { "epoch": 1.9353848515079426, "grad_norm": 0.4997405409812927, "learning_rate": 6.352330704136544e-08, "loss": 0.024, "step": 126100 }, { "epoch": 1.9355383316706316, "grad_norm": 0.3948541283607483, "learning_rate": 6.322221001733742e-08, "loss": 0.0234, "step": 126110 }, { "epoch": 1.9356918118333204, "grad_norm": 0.4633505940437317, "learning_rate": 6.292182601839946e-08, "loss": 0.0227, "step": 126120 }, { "epoch": 1.9358452919960096, "grad_norm": 0.3894202411174774, "learning_rate": 6.262215506610548e-08, "loss": 0.0253, "step": 126130 }, { "epoch": 1.9359987721586984, "grad_norm": 0.388338178396225, "learning_rate": 6.232319718196045e-08, "loss": 0.0213, "step": 126140 }, { "epoch": 1.9361522523213874, "grad_norm": 0.37357133626937866, "learning_rate": 6.202495238741613e-08, "loss": 0.03, "step": 126150 }, { "epoch": 1.9363057324840764, "grad_norm": 0.26017114520072937, "learning_rate": 6.172742070387205e-08, "loss": 0.02, "step": 126160 }, { "epoch": 1.9364592126467655, "grad_norm": 0.4165627658367157, "learning_rate": 6.143060215267893e-08, "loss": 0.0269, "step": 126170 }, { "epoch": 1.9366126928094545, "grad_norm": 0.5210922360420227, "learning_rate": 6.113449675513528e-08, "loss": 0.0236, "step": 126180 }, { "epoch": 1.9367661729721433, "grad_norm": 0.38625141978263855, "learning_rate": 6.083910453248853e-08, "loss": 0.0211, "step": 126190 }, { "epoch": 1.9369196531348323, "grad_norm": 0.31602975726127625, "learning_rate": 6.054442550593509e-08, "loss": 0.0164, "step": 126200 }, { "epoch": 1.9370731332975213, "grad_norm": 0.25273555517196655, "learning_rate": 6.025045969661914e-08, "loss": 0.0261, "step": 126210 }, { "epoch": 1.9372266134602103, "grad_norm": 0.38344287872314453, "learning_rate": 5.995720712563602e-08, "loss": 0.0324, "step": 126220 }, { "epoch": 1.9373800936228993, "grad_norm": 0.34534430503845215, "learning_rate": 5.966466781402558e-08, "loss": 0.0267, "step": 126230 }, { "epoch": 1.937533573785588, "grad_norm": 0.3020974397659302, "learning_rate": 5.9372841782782134e-08, "loss": 0.0257, "step": 126240 }, { "epoch": 1.9376870539482773, "grad_norm": 0.2581920027732849, "learning_rate": 5.9081729052846705e-08, "loss": 0.0164, "step": 126250 }, { "epoch": 1.9378405341109661, "grad_norm": 0.4302273690700531, "learning_rate": 5.879132964510481e-08, "loss": 0.0216, "step": 126260 }, { "epoch": 1.9379940142736551, "grad_norm": 0.3194984495639801, "learning_rate": 5.850164358039645e-08, "loss": 0.0205, "step": 126270 }, { "epoch": 1.9381474944363442, "grad_norm": 0.39420753717422485, "learning_rate": 5.8212670879509435e-08, "loss": 0.0238, "step": 126280 }, { "epoch": 1.938300974599033, "grad_norm": 0.34701088070869446, "learning_rate": 5.7924411563177183e-08, "loss": 0.0253, "step": 126290 }, { "epoch": 1.9384544547617222, "grad_norm": 0.39065152406692505, "learning_rate": 5.763686565208648e-08, "loss": 0.0271, "step": 126300 }, { "epoch": 1.938607934924411, "grad_norm": 0.35027527809143066, "learning_rate": 5.735003316686749e-08, "loss": 0.0243, "step": 126310 }, { "epoch": 1.9387614150871, "grad_norm": 0.37813428044319153, "learning_rate": 5.706391412810486e-08, "loss": 0.0239, "step": 126320 }, { "epoch": 1.938914895249789, "grad_norm": 0.41683754324913025, "learning_rate": 5.6778508556327716e-08, "loss": 0.026, "step": 126330 }, { "epoch": 1.9390683754124778, "grad_norm": 0.3012596666812897, "learning_rate": 5.649381647201746e-08, "loss": 0.028, "step": 126340 }, { "epoch": 1.939221855575167, "grad_norm": 0.39049288630485535, "learning_rate": 5.6209837895601085e-08, "loss": 0.0196, "step": 126350 }, { "epoch": 1.9393753357378558, "grad_norm": 0.24877925217151642, "learning_rate": 5.592657284745562e-08, "loss": 0.0321, "step": 126360 }, { "epoch": 1.9395288159005448, "grad_norm": 0.3347386121749878, "learning_rate": 5.564402134790814e-08, "loss": 0.0262, "step": 126370 }, { "epoch": 1.9396822960632338, "grad_norm": 0.40060585737228394, "learning_rate": 5.536218341723243e-08, "loss": 0.0247, "step": 126380 }, { "epoch": 1.9398357762259228, "grad_norm": 0.4179554581642151, "learning_rate": 5.508105907565231e-08, "loss": 0.0226, "step": 126390 }, { "epoch": 1.9399892563886119, "grad_norm": 0.33431702852249146, "learning_rate": 5.4800648343341644e-08, "loss": 0.0252, "step": 126400 }, { "epoch": 1.9401427365513007, "grad_norm": 0.39366415143013, "learning_rate": 5.4520951240419896e-08, "loss": 0.0232, "step": 126410 }, { "epoch": 1.9402962167139897, "grad_norm": 0.3090096414089203, "learning_rate": 5.424196778695656e-08, "loss": 0.0253, "step": 126420 }, { "epoch": 1.9404496968766787, "grad_norm": 0.45576074719429016, "learning_rate": 5.39636980029723e-08, "loss": 0.0219, "step": 126430 }, { "epoch": 1.9406031770393677, "grad_norm": 0.35182154178619385, "learning_rate": 5.368614190843446e-08, "loss": 0.0246, "step": 126440 }, { "epoch": 1.9407566572020567, "grad_norm": 0.43580251932144165, "learning_rate": 5.340929952325824e-08, "loss": 0.0352, "step": 126450 }, { "epoch": 1.9409101373647455, "grad_norm": 0.39203163981437683, "learning_rate": 5.313317086730885e-08, "loss": 0.0237, "step": 126460 }, { "epoch": 1.9410636175274347, "grad_norm": 0.3770490288734436, "learning_rate": 5.2857755960401545e-08, "loss": 0.0251, "step": 126470 }, { "epoch": 1.9412170976901235, "grad_norm": 0.41692104935646057, "learning_rate": 5.258305482229831e-08, "loss": 0.0242, "step": 126480 }, { "epoch": 1.9413705778528125, "grad_norm": 0.3638004660606384, "learning_rate": 5.230906747271003e-08, "loss": 0.022, "step": 126490 }, { "epoch": 1.9415240580155015, "grad_norm": 0.38433900475502014, "learning_rate": 5.203579393129765e-08, "loss": 0.0272, "step": 126500 }, { "epoch": 1.9416775381781903, "grad_norm": 0.3206619620323181, "learning_rate": 5.176323421766993e-08, "loss": 0.0199, "step": 126510 }, { "epoch": 1.9418310183408796, "grad_norm": 0.3776750862598419, "learning_rate": 5.149138835138456e-08, "loss": 0.0273, "step": 126520 }, { "epoch": 1.9419844985035684, "grad_norm": 0.42957931756973267, "learning_rate": 5.122025635194927e-08, "loss": 0.025, "step": 126530 }, { "epoch": 1.9421379786662574, "grad_norm": 0.34162959456443787, "learning_rate": 5.0949838238818495e-08, "loss": 0.0179, "step": 126540 }, { "epoch": 1.9422914588289464, "grad_norm": 0.48017778992652893, "learning_rate": 5.068013403139671e-08, "loss": 0.0318, "step": 126550 }, { "epoch": 1.9424449389916352, "grad_norm": 0.5317312479019165, "learning_rate": 5.0411143749036215e-08, "loss": 0.0234, "step": 126560 }, { "epoch": 1.9425984191543244, "grad_norm": 0.42653948068618774, "learning_rate": 5.0142867411039356e-08, "loss": 0.0278, "step": 126570 }, { "epoch": 1.9427518993170132, "grad_norm": 0.3555249273777008, "learning_rate": 4.9875305036656273e-08, "loss": 0.0227, "step": 126580 }, { "epoch": 1.9429053794797022, "grad_norm": 0.2079322338104248, "learning_rate": 4.960845664508718e-08, "loss": 0.0196, "step": 126590 }, { "epoch": 1.9430588596423912, "grad_norm": 0.32557663321495056, "learning_rate": 4.934232225547897e-08, "loss": 0.0182, "step": 126600 }, { "epoch": 1.9432123398050802, "grad_norm": 0.2500770390033722, "learning_rate": 4.90769018869286e-08, "loss": 0.0227, "step": 126610 }, { "epoch": 1.9433658199677692, "grad_norm": 0.31765010952949524, "learning_rate": 4.8812195558480824e-08, "loss": 0.0314, "step": 126620 }, { "epoch": 1.943519300130458, "grad_norm": 0.4059734046459198, "learning_rate": 4.854820328913268e-08, "loss": 0.0212, "step": 126630 }, { "epoch": 1.9436727802931473, "grad_norm": 0.29092836380004883, "learning_rate": 4.828492509782457e-08, "loss": 0.0178, "step": 126640 }, { "epoch": 1.943826260455836, "grad_norm": 0.3206508159637451, "learning_rate": 4.802236100344915e-08, "loss": 0.0272, "step": 126650 }, { "epoch": 1.943979740618525, "grad_norm": 0.4847303330898285, "learning_rate": 4.776051102484802e-08, "loss": 0.0295, "step": 126660 }, { "epoch": 1.944133220781214, "grad_norm": 0.38361266255378723, "learning_rate": 4.749937518080949e-08, "loss": 0.0343, "step": 126670 }, { "epoch": 1.9442867009439029, "grad_norm": 0.4070318937301636, "learning_rate": 4.7238953490070796e-08, "loss": 0.0225, "step": 126680 }, { "epoch": 1.9444401811065921, "grad_norm": 0.46180275082588196, "learning_rate": 4.697924597132031e-08, "loss": 0.0343, "step": 126690 }, { "epoch": 1.944593661269281, "grad_norm": 0.281688392162323, "learning_rate": 4.672025264319313e-08, "loss": 0.0203, "step": 126700 }, { "epoch": 1.94474714143197, "grad_norm": 0.3358409106731415, "learning_rate": 4.6461973524273283e-08, "loss": 0.0208, "step": 126710 }, { "epoch": 1.944900621594659, "grad_norm": 0.4125702679157257, "learning_rate": 4.620440863309483e-08, "loss": 0.0237, "step": 126720 }, { "epoch": 1.9450541017573477, "grad_norm": 0.5574564337730408, "learning_rate": 4.594755798813966e-08, "loss": 0.0335, "step": 126730 }, { "epoch": 1.945207581920037, "grad_norm": 0.30315306782722473, "learning_rate": 4.569142160783746e-08, "loss": 0.019, "step": 126740 }, { "epoch": 1.9453610620827257, "grad_norm": 0.282734215259552, "learning_rate": 4.543599951056688e-08, "loss": 0.0218, "step": 126750 }, { "epoch": 1.9455145422454148, "grad_norm": 0.44280028343200684, "learning_rate": 4.518129171465768e-08, "loss": 0.0229, "step": 126760 }, { "epoch": 1.9456680224081038, "grad_norm": 0.3644430637359619, "learning_rate": 4.492729823838638e-08, "loss": 0.0245, "step": 126770 }, { "epoch": 1.9458215025707926, "grad_norm": 0.42024824023246765, "learning_rate": 4.467401909997837e-08, "loss": 0.0276, "step": 126780 }, { "epoch": 1.9459749827334818, "grad_norm": 0.42423734068870544, "learning_rate": 4.442145431760692e-08, "loss": 0.0275, "step": 126790 }, { "epoch": 1.9461284628961706, "grad_norm": 0.3104996681213379, "learning_rate": 4.416960390939751e-08, "loss": 0.0207, "step": 126800 }, { "epoch": 1.9462819430588596, "grad_norm": 0.45765748620033264, "learning_rate": 4.3918467893420134e-08, "loss": 0.0223, "step": 126810 }, { "epoch": 1.9464354232215486, "grad_norm": 0.2962738871574402, "learning_rate": 4.366804628769483e-08, "loss": 0.0243, "step": 126820 }, { "epoch": 1.9465889033842376, "grad_norm": 0.34981781244277954, "learning_rate": 4.341833911019278e-08, "loss": 0.0182, "step": 126830 }, { "epoch": 1.9467423835469266, "grad_norm": 0.3763815462589264, "learning_rate": 4.3169346378830746e-08, "loss": 0.019, "step": 126840 }, { "epoch": 1.9468958637096154, "grad_norm": 0.3926343023777008, "learning_rate": 4.292106811147667e-08, "loss": 0.0236, "step": 126850 }, { "epoch": 1.9470493438723047, "grad_norm": 0.3744727373123169, "learning_rate": 4.2673504325944084e-08, "loss": 0.0237, "step": 126860 }, { "epoch": 1.9472028240349935, "grad_norm": 0.5448341965675354, "learning_rate": 4.242665503999877e-08, "loss": 0.0316, "step": 126870 }, { "epoch": 1.9473563041976825, "grad_norm": 0.23069864511489868, "learning_rate": 4.218052027135322e-08, "loss": 0.0199, "step": 126880 }, { "epoch": 1.9475097843603715, "grad_norm": 0.3942139446735382, "learning_rate": 4.1935100037668875e-08, "loss": 0.0244, "step": 126890 }, { "epoch": 1.9476632645230603, "grad_norm": 0.2093285173177719, "learning_rate": 4.1690394356557194e-08, "loss": 0.0254, "step": 126900 }, { "epoch": 1.9478167446857495, "grad_norm": 0.34179452061653137, "learning_rate": 4.1446403245576364e-08, "loss": 0.0203, "step": 126910 }, { "epoch": 1.9479702248484383, "grad_norm": 0.4475820064544678, "learning_rate": 4.120312672223459e-08, "loss": 0.0238, "step": 126920 }, { "epoch": 1.9481237050111273, "grad_norm": 0.5084077715873718, "learning_rate": 4.096056480398791e-08, "loss": 0.0227, "step": 126930 }, { "epoch": 1.9482771851738163, "grad_norm": 0.4063122272491455, "learning_rate": 4.07187175082413e-08, "loss": 0.0245, "step": 126940 }, { "epoch": 1.948430665336505, "grad_norm": 0.2812960147857666, "learning_rate": 4.047758485235087e-08, "loss": 0.0261, "step": 126950 }, { "epoch": 1.9485841454991943, "grad_norm": 0.4373510181903839, "learning_rate": 4.0237166853618335e-08, "loss": 0.0283, "step": 126960 }, { "epoch": 1.9487376256618831, "grad_norm": 0.3693028390407562, "learning_rate": 3.9997463529293236e-08, "loss": 0.0219, "step": 126970 }, { "epoch": 1.9488911058245721, "grad_norm": 0.6166362762451172, "learning_rate": 3.975847489657847e-08, "loss": 0.0258, "step": 126980 }, { "epoch": 1.9490445859872612, "grad_norm": 0.32666918635368347, "learning_rate": 3.9520200972621434e-08, "loss": 0.0285, "step": 126990 }, { "epoch": 1.9491980661499502, "grad_norm": 0.5006371736526489, "learning_rate": 3.928264177452068e-08, "loss": 0.0223, "step": 127000 }, { "epoch": 1.9493515463126392, "grad_norm": 0.4547572135925293, "learning_rate": 3.904579731932146e-08, "loss": 0.025, "step": 127010 }, { "epoch": 1.949505026475328, "grad_norm": 0.38897785544395447, "learning_rate": 3.880966762402016e-08, "loss": 0.0272, "step": 127020 }, { "epoch": 1.949658506638017, "grad_norm": 0.29095956683158875, "learning_rate": 3.857425270555992e-08, "loss": 0.0209, "step": 127030 }, { "epoch": 1.949811986800706, "grad_norm": 0.5040608644485474, "learning_rate": 3.833955258083166e-08, "loss": 0.0255, "step": 127040 }, { "epoch": 1.949965466963395, "grad_norm": 0.349757581949234, "learning_rate": 3.810556726667969e-08, "loss": 0.027, "step": 127050 }, { "epoch": 1.950118947126084, "grad_norm": 0.39699068665504456, "learning_rate": 3.787229677989168e-08, "loss": 0.0179, "step": 127060 }, { "epoch": 1.9502724272887728, "grad_norm": 0.30965492129325867, "learning_rate": 3.763974113720648e-08, "loss": 0.0214, "step": 127070 }, { "epoch": 1.950425907451462, "grad_norm": 0.26576125621795654, "learning_rate": 3.740790035531183e-08, "loss": 0.0191, "step": 127080 }, { "epoch": 1.9505793876141508, "grad_norm": 0.419324666261673, "learning_rate": 3.7176774450843335e-08, "loss": 0.0207, "step": 127090 }, { "epoch": 1.9507328677768399, "grad_norm": 0.19939109683036804, "learning_rate": 3.69463634403866e-08, "loss": 0.0208, "step": 127100 }, { "epoch": 1.9508863479395289, "grad_norm": 0.3533880412578583, "learning_rate": 3.6716667340473966e-08, "loss": 0.0205, "step": 127110 }, { "epoch": 1.9510398281022177, "grad_norm": 0.4645238220691681, "learning_rate": 3.64876861675878e-08, "loss": 0.0243, "step": 127120 }, { "epoch": 1.951193308264907, "grad_norm": 0.3010464608669281, "learning_rate": 3.6259419938159404e-08, "loss": 0.0272, "step": 127130 }, { "epoch": 1.9513467884275957, "grad_norm": 0.39540034532546997, "learning_rate": 3.6031868668567895e-08, "loss": 0.0194, "step": 127140 }, { "epoch": 1.9515002685902847, "grad_norm": 0.392439067363739, "learning_rate": 3.5805032375142434e-08, "loss": 0.0227, "step": 127150 }, { "epoch": 1.9516537487529737, "grad_norm": 0.31893429160118103, "learning_rate": 3.5578911074157786e-08, "loss": 0.0159, "step": 127160 }, { "epoch": 1.9518072289156625, "grad_norm": 0.21751584112644196, "learning_rate": 3.5353504781840965e-08, "loss": 0.0217, "step": 127170 }, { "epoch": 1.9519607090783517, "grad_norm": 0.3222677707672119, "learning_rate": 3.51288135143657e-08, "loss": 0.0227, "step": 127180 }, { "epoch": 1.9521141892410405, "grad_norm": 0.3468133807182312, "learning_rate": 3.490483728785687e-08, "loss": 0.0221, "step": 127190 }, { "epoch": 1.9522676694037295, "grad_norm": 0.46460554003715515, "learning_rate": 3.4681576118382744e-08, "loss": 0.0205, "step": 127200 }, { "epoch": 1.9524211495664185, "grad_norm": 0.38500073552131653, "learning_rate": 3.445903002196715e-08, "loss": 0.0297, "step": 127210 }, { "epoch": 1.9525746297291076, "grad_norm": 0.3247276842594147, "learning_rate": 3.4237199014577336e-08, "loss": 0.024, "step": 127220 }, { "epoch": 1.9527281098917966, "grad_norm": 0.42680659890174866, "learning_rate": 3.401608311213056e-08, "loss": 0.028, "step": 127230 }, { "epoch": 1.9528815900544854, "grad_norm": 0.4030935764312744, "learning_rate": 3.379568233049413e-08, "loss": 0.0236, "step": 127240 }, { "epoch": 1.9530350702171746, "grad_norm": 0.40408089756965637, "learning_rate": 3.357599668548428e-08, "loss": 0.0294, "step": 127250 }, { "epoch": 1.9531885503798634, "grad_norm": 0.3371133506298065, "learning_rate": 3.335702619286174e-08, "loss": 0.0241, "step": 127260 }, { "epoch": 1.9533420305425524, "grad_norm": 0.4539135992527008, "learning_rate": 3.313877086834172e-08, "loss": 0.0252, "step": 127270 }, { "epoch": 1.9534955107052414, "grad_norm": 0.34766241908073425, "learning_rate": 3.292123072758502e-08, "loss": 0.0258, "step": 127280 }, { "epoch": 1.9536489908679302, "grad_norm": 0.4004790186882019, "learning_rate": 3.2704405786200265e-08, "loss": 0.0261, "step": 127290 }, { "epoch": 1.9538024710306194, "grad_norm": 0.3632368743419647, "learning_rate": 3.248829605974724e-08, "loss": 0.0236, "step": 127300 }, { "epoch": 1.9539559511933082, "grad_norm": 0.36228328943252563, "learning_rate": 3.227290156373131e-08, "loss": 0.0278, "step": 127310 }, { "epoch": 1.9541094313559972, "grad_norm": 0.42157331109046936, "learning_rate": 3.205822231361122e-08, "loss": 0.0211, "step": 127320 }, { "epoch": 1.9542629115186863, "grad_norm": 0.3381246328353882, "learning_rate": 3.1844258324787994e-08, "loss": 0.0285, "step": 127330 }, { "epoch": 1.954416391681375, "grad_norm": 0.38280317187309265, "learning_rate": 3.163100961261823e-08, "loss": 0.0263, "step": 127340 }, { "epoch": 1.9545698718440643, "grad_norm": 0.20697765052318573, "learning_rate": 3.14184761924019e-08, "loss": 0.0222, "step": 127350 }, { "epoch": 1.954723352006753, "grad_norm": 0.317810982465744, "learning_rate": 3.120665807939127e-08, "loss": 0.0177, "step": 127360 }, { "epoch": 1.954876832169442, "grad_norm": 0.3137691020965576, "learning_rate": 3.099555528878306e-08, "loss": 0.0266, "step": 127370 }, { "epoch": 1.955030312332131, "grad_norm": 0.26313579082489014, "learning_rate": 3.078516783572738e-08, "loss": 0.0189, "step": 127380 }, { "epoch": 1.9551837924948199, "grad_norm": 0.4659796357154846, "learning_rate": 3.0575495735318814e-08, "loss": 0.0219, "step": 127390 }, { "epoch": 1.9553372726575091, "grad_norm": 0.3559480309486389, "learning_rate": 3.036653900260533e-08, "loss": 0.0224, "step": 127400 }, { "epoch": 1.955490752820198, "grad_norm": 0.29530462622642517, "learning_rate": 3.015829765257827e-08, "loss": 0.0235, "step": 127410 }, { "epoch": 1.955644232982887, "grad_norm": 0.2811121642589569, "learning_rate": 2.9950771700181235e-08, "loss": 0.0225, "step": 127420 }, { "epoch": 1.955797713145576, "grad_norm": 0.3667221963405609, "learning_rate": 2.9743961160305646e-08, "loss": 0.0192, "step": 127430 }, { "epoch": 1.955951193308265, "grad_norm": 0.7855426669120789, "learning_rate": 2.9537866047791852e-08, "loss": 0.0315, "step": 127440 }, { "epoch": 1.956104673470954, "grad_norm": 0.3303907811641693, "learning_rate": 2.9332486377428028e-08, "loss": 0.0282, "step": 127450 }, { "epoch": 1.9562581536336427, "grad_norm": 0.5299587845802307, "learning_rate": 2.9127822163951268e-08, "loss": 0.026, "step": 127460 }, { "epoch": 1.956411633796332, "grad_norm": 0.4480117857456207, "learning_rate": 2.892387342204761e-08, "loss": 0.0243, "step": 127470 }, { "epoch": 1.9565651139590208, "grad_norm": 0.307323694229126, "learning_rate": 2.872064016635201e-08, "loss": 0.0277, "step": 127480 }, { "epoch": 1.9567185941217098, "grad_norm": 0.4405696988105774, "learning_rate": 2.8518122411446135e-08, "loss": 0.023, "step": 127490 }, { "epoch": 1.9568720742843988, "grad_norm": 0.38271984457969666, "learning_rate": 2.831632017186392e-08, "loss": 0.0297, "step": 127500 }, { "epoch": 1.9570255544470876, "grad_norm": 0.33807820081710815, "learning_rate": 2.811523346208489e-08, "loss": 0.0355, "step": 127510 }, { "epoch": 1.9571790346097768, "grad_norm": 0.45030665397644043, "learning_rate": 2.7914862296538613e-08, "loss": 0.0302, "step": 127520 }, { "epoch": 1.9573325147724656, "grad_norm": 0.30841827392578125, "learning_rate": 2.771520668960248e-08, "loss": 0.0216, "step": 127530 }, { "epoch": 1.9574859949351546, "grad_norm": 0.30969569087028503, "learning_rate": 2.7516266655602808e-08, "loss": 0.0251, "step": 127540 }, { "epoch": 1.9576394750978436, "grad_norm": 0.2615455389022827, "learning_rate": 2.7318042208815952e-08, "loss": 0.0219, "step": 127550 }, { "epoch": 1.9577929552605324, "grad_norm": 0.41292986273765564, "learning_rate": 2.7120533363464986e-08, "loss": 0.0294, "step": 127560 }, { "epoch": 1.9579464354232217, "grad_norm": 0.3581884801387787, "learning_rate": 2.6923740133723007e-08, "loss": 0.0225, "step": 127570 }, { "epoch": 1.9580999155859105, "grad_norm": 0.3616316616535187, "learning_rate": 2.6727662533709843e-08, "loss": 0.0223, "step": 127580 }, { "epoch": 1.9582533957485995, "grad_norm": 0.3679856061935425, "learning_rate": 2.653230057749645e-08, "loss": 0.0185, "step": 127590 }, { "epoch": 1.9584068759112885, "grad_norm": 0.239900603890419, "learning_rate": 2.6337654279100512e-08, "loss": 0.0268, "step": 127600 }, { "epoch": 1.9585603560739775, "grad_norm": 0.32709768414497375, "learning_rate": 2.6143723652488627e-08, "loss": 0.0189, "step": 127610 }, { "epoch": 1.9587138362366665, "grad_norm": 0.3750886917114258, "learning_rate": 2.595050871157856e-08, "loss": 0.0209, "step": 127620 }, { "epoch": 1.9588673163993553, "grad_norm": 0.30735403299331665, "learning_rate": 2.575800947023255e-08, "loss": 0.0251, "step": 127630 }, { "epoch": 1.9590207965620443, "grad_norm": 0.36701053380966187, "learning_rate": 2.55662259422651e-08, "loss": 0.0217, "step": 127640 }, { "epoch": 1.9591742767247333, "grad_norm": 0.3962028920650482, "learning_rate": 2.5375158141437427e-08, "loss": 0.0215, "step": 127650 }, { "epoch": 1.9593277568874223, "grad_norm": 0.4019494950771332, "learning_rate": 2.5184806081458567e-08, "loss": 0.0261, "step": 127660 }, { "epoch": 1.9594812370501113, "grad_norm": 0.34567132592201233, "learning_rate": 2.4995169775989813e-08, "loss": 0.0287, "step": 127670 }, { "epoch": 1.9596347172128001, "grad_norm": 0.3106740713119507, "learning_rate": 2.4806249238635837e-08, "loss": 0.0204, "step": 127680 }, { "epoch": 1.9597881973754894, "grad_norm": 0.37704703211784363, "learning_rate": 2.4618044482955793e-08, "loss": 0.0243, "step": 127690 }, { "epoch": 1.9599416775381782, "grad_norm": 0.25617942214012146, "learning_rate": 2.4430555522453326e-08, "loss": 0.0226, "step": 127700 }, { "epoch": 1.9600951577008672, "grad_norm": 0.2845228910446167, "learning_rate": 2.4243782370581003e-08, "loss": 0.0235, "step": 127710 }, { "epoch": 1.9602486378635562, "grad_norm": 0.32181382179260254, "learning_rate": 2.4057725040741442e-08, "loss": 0.0272, "step": 127720 }, { "epoch": 1.960402118026245, "grad_norm": 0.2852831780910492, "learning_rate": 2.3872383546287292e-08, "loss": 0.0181, "step": 127730 }, { "epoch": 1.9605555981889342, "grad_norm": 0.28515467047691345, "learning_rate": 2.3687757900514586e-08, "loss": 0.0261, "step": 127740 }, { "epoch": 1.960709078351623, "grad_norm": 0.3426326513290405, "learning_rate": 2.3503848116673833e-08, "loss": 0.0198, "step": 127750 }, { "epoch": 1.960862558514312, "grad_norm": 0.24643240869045258, "learning_rate": 2.3320654207961147e-08, "loss": 0.0231, "step": 127760 }, { "epoch": 1.961016038677001, "grad_norm": 0.33129453659057617, "learning_rate": 2.3138176187521565e-08, "loss": 0.0269, "step": 127770 }, { "epoch": 1.9611695188396898, "grad_norm": 0.36124205589294434, "learning_rate": 2.2956414068449064e-08, "loss": 0.0263, "step": 127780 }, { "epoch": 1.961322999002379, "grad_norm": 0.47612082958221436, "learning_rate": 2.2775367863786536e-08, "loss": 0.0223, "step": 127790 }, { "epoch": 1.9614764791650678, "grad_norm": 0.29883283376693726, "learning_rate": 2.2595037586524705e-08, "loss": 0.023, "step": 127800 }, { "epoch": 1.9616299593277569, "grad_norm": 0.528609573841095, "learning_rate": 2.2415423249603217e-08, "loss": 0.0275, "step": 127810 }, { "epoch": 1.9617834394904459, "grad_norm": 0.4039551317691803, "learning_rate": 2.2236524865910658e-08, "loss": 0.0189, "step": 127820 }, { "epoch": 1.9619369196531349, "grad_norm": 0.2095688283443451, "learning_rate": 2.2058342448284532e-08, "loss": 0.0175, "step": 127830 }, { "epoch": 1.962090399815824, "grad_norm": 0.4243898093700409, "learning_rate": 2.1880876009510164e-08, "loss": 0.0251, "step": 127840 }, { "epoch": 1.9622438799785127, "grad_norm": 0.2863519489765167, "learning_rate": 2.170412556232293e-08, "loss": 0.0219, "step": 127850 }, { "epoch": 1.9623973601412017, "grad_norm": 0.29856181144714355, "learning_rate": 2.1528091119403795e-08, "loss": 0.0301, "step": 127860 }, { "epoch": 1.9625508403038907, "grad_norm": 0.2837302088737488, "learning_rate": 2.1352772693384873e-08, "loss": 0.0241, "step": 127870 }, { "epoch": 1.9627043204665797, "grad_norm": 0.22865638136863708, "learning_rate": 2.117817029684721e-08, "loss": 0.0217, "step": 127880 }, { "epoch": 1.9628578006292687, "grad_norm": 0.29863929748535156, "learning_rate": 2.1004283942319682e-08, "loss": 0.0242, "step": 127890 }, { "epoch": 1.9630112807919575, "grad_norm": 0.2938097417354584, "learning_rate": 2.0831113642278968e-08, "loss": 0.0204, "step": 127900 }, { "epoch": 1.9631647609546468, "grad_norm": 0.3895772695541382, "learning_rate": 2.0658659409151794e-08, "loss": 0.0244, "step": 127910 }, { "epoch": 1.9633182411173355, "grad_norm": 0.3668905198574066, "learning_rate": 2.0486921255311597e-08, "loss": 0.025, "step": 127920 }, { "epoch": 1.9634717212800246, "grad_norm": 0.5531815886497498, "learning_rate": 2.0315899193082966e-08, "loss": 0.0235, "step": 127930 }, { "epoch": 1.9636252014427136, "grad_norm": 0.2430998980998993, "learning_rate": 2.0145593234736083e-08, "loss": 0.0235, "step": 127940 }, { "epoch": 1.9637786816054024, "grad_norm": 0.35351279377937317, "learning_rate": 1.99760033924945e-08, "loss": 0.0218, "step": 127950 }, { "epoch": 1.9639321617680916, "grad_norm": 0.20206467807292938, "learning_rate": 1.9807129678524052e-08, "loss": 0.0207, "step": 127960 }, { "epoch": 1.9640856419307804, "grad_norm": 0.2881046235561371, "learning_rate": 1.963897210494392e-08, "loss": 0.0226, "step": 127970 }, { "epoch": 1.9642391220934694, "grad_norm": 0.2635261118412018, "learning_rate": 1.9471530683821126e-08, "loss": 0.0184, "step": 127980 }, { "epoch": 1.9643926022561584, "grad_norm": 0.30156782269477844, "learning_rate": 1.9304805427169393e-08, "loss": 0.0252, "step": 127990 }, { "epoch": 1.9645460824188472, "grad_norm": 0.37147843837738037, "learning_rate": 1.9138796346952482e-08, "loss": 0.0219, "step": 128000 }, { "epoch": 1.9646995625815364, "grad_norm": 0.2911180853843689, "learning_rate": 1.8973503455083087e-08, "loss": 0.0195, "step": 128010 }, { "epoch": 1.9648530427442252, "grad_norm": 0.3942515552043915, "learning_rate": 1.880892676342172e-08, "loss": 0.0241, "step": 128020 }, { "epoch": 1.9650065229069142, "grad_norm": 0.29767417907714844, "learning_rate": 1.8645066283777826e-08, "loss": 0.0201, "step": 128030 }, { "epoch": 1.9651600030696033, "grad_norm": 0.3893143832683563, "learning_rate": 1.848192202790977e-08, "loss": 0.0232, "step": 128040 }, { "epoch": 1.9653134832322923, "grad_norm": 0.4029473066329956, "learning_rate": 1.8319494007523752e-08, "loss": 0.0283, "step": 128050 }, { "epoch": 1.9654669633949813, "grad_norm": 0.33402353525161743, "learning_rate": 1.815778223427489e-08, "loss": 0.02, "step": 128060 }, { "epoch": 1.96562044355767, "grad_norm": 0.3980516493320465, "learning_rate": 1.7996786719767233e-08, "loss": 0.0245, "step": 128070 }, { "epoch": 1.9657739237203593, "grad_norm": 0.3987390697002411, "learning_rate": 1.7836507475552657e-08, "loss": 0.026, "step": 128080 }, { "epoch": 1.965927403883048, "grad_norm": 0.3682404160499573, "learning_rate": 1.7676944513133064e-08, "loss": 0.0254, "step": 128090 }, { "epoch": 1.966080884045737, "grad_norm": 0.3180505335330963, "learning_rate": 1.7518097843957082e-08, "loss": 0.0234, "step": 128100 }, { "epoch": 1.9662343642084261, "grad_norm": 0.34125855565071106, "learning_rate": 1.7359967479424477e-08, "loss": 0.0192, "step": 128110 }, { "epoch": 1.966387844371115, "grad_norm": 0.551708459854126, "learning_rate": 1.720255343088062e-08, "loss": 0.0263, "step": 128120 }, { "epoch": 1.9665413245338041, "grad_norm": 0.40079689025878906, "learning_rate": 1.704585570962092e-08, "loss": 0.0269, "step": 128130 }, { "epoch": 1.966694804696493, "grad_norm": 0.30104705691337585, "learning_rate": 1.6889874326889712e-08, "loss": 0.0179, "step": 128140 }, { "epoch": 1.966848284859182, "grad_norm": 0.3165094554424286, "learning_rate": 1.6734609293881375e-08, "loss": 0.0236, "step": 128150 }, { "epoch": 1.967001765021871, "grad_norm": 1.0968831777572632, "learning_rate": 1.6580060621733674e-08, "loss": 0.0349, "step": 128160 }, { "epoch": 1.9671552451845598, "grad_norm": 0.35643839836120605, "learning_rate": 1.642622832153995e-08, "loss": 0.0247, "step": 128170 }, { "epoch": 1.967308725347249, "grad_norm": 0.3919713795185089, "learning_rate": 1.6273112404335822e-08, "loss": 0.0241, "step": 128180 }, { "epoch": 1.9674622055099378, "grad_norm": 0.493102103471756, "learning_rate": 1.6120712881110277e-08, "loss": 0.0278, "step": 128190 }, { "epoch": 1.9676156856726268, "grad_norm": 0.41314443945884705, "learning_rate": 1.5969029762797904e-08, "loss": 0.0228, "step": 128200 }, { "epoch": 1.9677691658353158, "grad_norm": 0.608109712600708, "learning_rate": 1.5818063060283328e-08, "loss": 0.0314, "step": 128210 }, { "epoch": 1.9679226459980046, "grad_norm": 0.41450244188308716, "learning_rate": 1.5667812784398995e-08, "loss": 0.0324, "step": 128220 }, { "epoch": 1.9680761261606938, "grad_norm": 0.3654809892177582, "learning_rate": 1.551827894592628e-08, "loss": 0.0234, "step": 128230 }, { "epoch": 1.9682296063233826, "grad_norm": 0.2893281579017639, "learning_rate": 1.5369461555595487e-08, "loss": 0.0236, "step": 128240 }, { "epoch": 1.9683830864860716, "grad_norm": 0.2838125228881836, "learning_rate": 1.5221360624084747e-08, "loss": 0.0191, "step": 128250 }, { "epoch": 1.9685365666487606, "grad_norm": 0.415044903755188, "learning_rate": 1.507397616202111e-08, "loss": 0.0333, "step": 128260 }, { "epoch": 1.9686900468114497, "grad_norm": 0.3800065815448761, "learning_rate": 1.492730817998167e-08, "loss": 0.027, "step": 128270 }, { "epoch": 1.9688435269741387, "grad_norm": 0.36480090022087097, "learning_rate": 1.4781356688489123e-08, "loss": 0.019, "step": 128280 }, { "epoch": 1.9689970071368275, "grad_norm": 0.26042139530181885, "learning_rate": 1.4636121698017314e-08, "loss": 0.0227, "step": 128290 }, { "epoch": 1.9691504872995167, "grad_norm": 0.5283668637275696, "learning_rate": 1.4491603218986794e-08, "loss": 0.0208, "step": 128300 }, { "epoch": 1.9693039674622055, "grad_norm": 0.45773664116859436, "learning_rate": 1.4347801261769267e-08, "loss": 0.0189, "step": 128310 }, { "epoch": 1.9694574476248945, "grad_norm": 0.3355473279953003, "learning_rate": 1.4204715836682037e-08, "loss": 0.024, "step": 128320 }, { "epoch": 1.9696109277875835, "grad_norm": 0.47070515155792236, "learning_rate": 1.4062346953992445e-08, "loss": 0.0263, "step": 128330 }, { "epoch": 1.9697644079502723, "grad_norm": 0.29259565472602844, "learning_rate": 1.3920694623917874e-08, "loss": 0.021, "step": 128340 }, { "epoch": 1.9699178881129615, "grad_norm": 0.3618641197681427, "learning_rate": 1.3779758856620196e-08, "loss": 0.0233, "step": 128350 }, { "epoch": 1.9700713682756503, "grad_norm": 0.3247147500514984, "learning_rate": 1.3639539662214652e-08, "loss": 0.0217, "step": 128360 }, { "epoch": 1.9702248484383393, "grad_norm": 0.16812030971050262, "learning_rate": 1.3500037050762083e-08, "loss": 0.0186, "step": 128370 }, { "epoch": 1.9703783286010284, "grad_norm": 0.271707147359848, "learning_rate": 1.336125103227226e-08, "loss": 0.0234, "step": 128380 }, { "epoch": 1.9705318087637171, "grad_norm": 0.3075472414493561, "learning_rate": 1.3223181616703884e-08, "loss": 0.0225, "step": 128390 }, { "epoch": 1.9706852889264064, "grad_norm": 0.36548876762390137, "learning_rate": 1.3085828813964584e-08, "loss": 0.024, "step": 128400 }, { "epoch": 1.9708387690890952, "grad_norm": 0.2667342722415924, "learning_rate": 1.2949192633910924e-08, "loss": 0.0279, "step": 128410 }, { "epoch": 1.9709922492517842, "grad_norm": 0.33945730328559875, "learning_rate": 1.2813273086346168e-08, "loss": 0.0184, "step": 128420 }, { "epoch": 1.9711457294144732, "grad_norm": 0.29988980293273926, "learning_rate": 1.2678070181024737e-08, "loss": 0.0304, "step": 128430 }, { "epoch": 1.9712992095771622, "grad_norm": 0.3479413390159607, "learning_rate": 1.2543583927647762e-08, "loss": 0.0294, "step": 128440 }, { "epoch": 1.9714526897398512, "grad_norm": 0.328164279460907, "learning_rate": 1.240981433586419e-08, "loss": 0.0269, "step": 128450 }, { "epoch": 1.97160616990254, "grad_norm": 0.45231524109840393, "learning_rate": 1.2276761415274118e-08, "loss": 0.0338, "step": 128460 }, { "epoch": 1.971759650065229, "grad_norm": 0.2920663356781006, "learning_rate": 1.2144425175425468e-08, "loss": 0.0234, "step": 128470 }, { "epoch": 1.971913130227918, "grad_norm": 0.26669469475746155, "learning_rate": 1.2012805625812862e-08, "loss": 0.0217, "step": 128480 }, { "epoch": 1.972066610390607, "grad_norm": 0.27653196454048157, "learning_rate": 1.188190277588097e-08, "loss": 0.0222, "step": 128490 }, { "epoch": 1.972220090553296, "grad_norm": 0.3344608545303345, "learning_rate": 1.1751716635023392e-08, "loss": 0.0257, "step": 128500 }, { "epoch": 1.9723735707159848, "grad_norm": 0.2832534909248352, "learning_rate": 1.1622247212582649e-08, "loss": 0.0239, "step": 128510 }, { "epoch": 1.972527050878674, "grad_norm": 0.2807643711566925, "learning_rate": 1.149349451784687e-08, "loss": 0.0291, "step": 128520 }, { "epoch": 1.9726805310413629, "grad_norm": 0.4378737211227417, "learning_rate": 1.1365458560055331e-08, "loss": 0.0283, "step": 128530 }, { "epoch": 1.9728340112040519, "grad_norm": 0.4658259153366089, "learning_rate": 1.1238139348397348e-08, "loss": 0.0284, "step": 128540 }, { "epoch": 1.972987491366741, "grad_norm": 0.30645301938056946, "learning_rate": 1.1111536892006724e-08, "loss": 0.0307, "step": 128550 }, { "epoch": 1.9731409715294297, "grad_norm": 0.30807435512542725, "learning_rate": 1.0985651199969526e-08, "loss": 0.0238, "step": 128560 }, { "epoch": 1.973294451692119, "grad_norm": 0.3071085214614868, "learning_rate": 1.0860482281317419e-08, "loss": 0.018, "step": 128570 }, { "epoch": 1.9734479318548077, "grad_norm": 0.4184426963329315, "learning_rate": 1.0736030145033215e-08, "loss": 0.0227, "step": 128580 }, { "epoch": 1.9736014120174967, "grad_norm": 0.2544543147087097, "learning_rate": 1.061229480004533e-08, "loss": 0.0244, "step": 128590 }, { "epoch": 1.9737548921801857, "grad_norm": 0.3044353425502777, "learning_rate": 1.0489276255235547e-08, "loss": 0.024, "step": 128600 }, { "epoch": 1.9739083723428745, "grad_norm": 0.3480781316757202, "learning_rate": 1.036697451942903e-08, "loss": 0.0191, "step": 128610 }, { "epoch": 1.9740618525055638, "grad_norm": 0.4723365008831024, "learning_rate": 1.024538960140098e-08, "loss": 0.0291, "step": 128620 }, { "epoch": 1.9742153326682526, "grad_norm": 0.3784266412258148, "learning_rate": 1.012452150987775e-08, "loss": 0.0207, "step": 128630 }, { "epoch": 1.9743688128309416, "grad_norm": 0.5000633597373962, "learning_rate": 1.0004370253531293e-08, "loss": 0.0229, "step": 128640 }, { "epoch": 1.9745222929936306, "grad_norm": 0.41098541021347046, "learning_rate": 9.884935840984711e-09, "loss": 0.0286, "step": 128650 }, { "epoch": 1.9746757731563196, "grad_norm": 0.4667467474937439, "learning_rate": 9.766218280805595e-09, "loss": 0.0236, "step": 128660 }, { "epoch": 1.9748292533190086, "grad_norm": 0.3829849660396576, "learning_rate": 9.648217581514907e-09, "loss": 0.0283, "step": 128670 }, { "epoch": 1.9749827334816974, "grad_norm": 0.31696271896362305, "learning_rate": 9.530933751579208e-09, "loss": 0.0253, "step": 128680 }, { "epoch": 1.9751362136443866, "grad_norm": 0.32935670018196106, "learning_rate": 9.414366799412877e-09, "loss": 0.0239, "step": 128690 }, { "epoch": 1.9752896938070754, "grad_norm": 0.416879266500473, "learning_rate": 9.298516733382557e-09, "loss": 0.0276, "step": 128700 }, { "epoch": 1.9754431739697644, "grad_norm": 0.3356242775917053, "learning_rate": 9.183383561800485e-09, "loss": 0.0279, "step": 128710 }, { "epoch": 1.9755966541324534, "grad_norm": 0.4672963321208954, "learning_rate": 9.068967292927832e-09, "loss": 0.0205, "step": 128720 }, { "epoch": 1.9757501342951422, "grad_norm": 0.2304805964231491, "learning_rate": 8.9552679349747e-09, "loss": 0.0216, "step": 128730 }, { "epoch": 1.9759036144578315, "grad_norm": 0.28940820693969727, "learning_rate": 8.842285496100112e-09, "loss": 0.0187, "step": 128740 }, { "epoch": 1.9760570946205203, "grad_norm": 0.3733677566051483, "learning_rate": 8.73001998441092e-09, "loss": 0.0189, "step": 128750 }, { "epoch": 1.9762105747832093, "grad_norm": 0.3064607083797455, "learning_rate": 8.618471407961793e-09, "loss": 0.0292, "step": 128760 }, { "epoch": 1.9763640549458983, "grad_norm": 0.4222278296947479, "learning_rate": 8.507639774759657e-09, "loss": 0.0284, "step": 128770 }, { "epoch": 1.976517535108587, "grad_norm": 0.2199041098356247, "learning_rate": 8.397525092754822e-09, "loss": 0.0201, "step": 128780 }, { "epoch": 1.9766710152712763, "grad_norm": 0.3326055109500885, "learning_rate": 8.288127369850963e-09, "loss": 0.0275, "step": 128790 }, { "epoch": 1.976824495433965, "grad_norm": 0.32578253746032715, "learning_rate": 8.179446613895137e-09, "loss": 0.0298, "step": 128800 }, { "epoch": 1.9769779755966541, "grad_norm": 0.4505046606063843, "learning_rate": 8.071482832688882e-09, "loss": 0.0225, "step": 128810 }, { "epoch": 1.9771314557593431, "grad_norm": 0.3390182554721832, "learning_rate": 7.964236033977112e-09, "loss": 0.0242, "step": 128820 }, { "epoch": 1.977284935922032, "grad_norm": 0.6282224059104919, "learning_rate": 7.857706225457007e-09, "loss": 0.0245, "step": 128830 }, { "epoch": 1.9774384160847212, "grad_norm": 0.4940771162509918, "learning_rate": 7.75189341477134e-09, "loss": 0.0235, "step": 128840 }, { "epoch": 1.97759189624741, "grad_norm": 0.3584875762462616, "learning_rate": 7.646797609514033e-09, "loss": 0.0283, "step": 128850 }, { "epoch": 1.977745376410099, "grad_norm": 0.33824872970581055, "learning_rate": 7.542418817225727e-09, "loss": 0.0183, "step": 128860 }, { "epoch": 1.977898856572788, "grad_norm": 0.3189050555229187, "learning_rate": 7.438757045395984e-09, "loss": 0.0239, "step": 128870 }, { "epoch": 1.978052336735477, "grad_norm": 0.26127198338508606, "learning_rate": 7.335812301463297e-09, "loss": 0.0163, "step": 128880 }, { "epoch": 1.978205816898166, "grad_norm": 0.37250006198883057, "learning_rate": 7.2335845928150906e-09, "loss": 0.0282, "step": 128890 }, { "epoch": 1.9783592970608548, "grad_norm": 0.30744481086730957, "learning_rate": 7.132073926786609e-09, "loss": 0.0233, "step": 128900 }, { "epoch": 1.978512777223544, "grad_norm": 0.27010494470596313, "learning_rate": 7.031280310662025e-09, "loss": 0.026, "step": 128910 }, { "epoch": 1.9786662573862328, "grad_norm": 0.47309592366218567, "learning_rate": 6.931203751673332e-09, "loss": 0.0266, "step": 128920 }, { "epoch": 1.9788197375489218, "grad_norm": 0.4175543189048767, "learning_rate": 6.831844257001452e-09, "loss": 0.0296, "step": 128930 }, { "epoch": 1.9789732177116108, "grad_norm": 0.47078606486320496, "learning_rate": 6.7332018337773475e-09, "loss": 0.0292, "step": 128940 }, { "epoch": 1.9791266978742996, "grad_norm": 0.3534873127937317, "learning_rate": 6.6352764890786905e-09, "loss": 0.0299, "step": 128950 }, { "epoch": 1.9792801780369889, "grad_norm": 0.5333490967750549, "learning_rate": 6.538068229930972e-09, "loss": 0.0329, "step": 128960 }, { "epoch": 1.9794336581996776, "grad_norm": 0.25876912474632263, "learning_rate": 6.441577063310833e-09, "loss": 0.0191, "step": 128970 }, { "epoch": 1.9795871383623667, "grad_norm": 0.38382282853126526, "learning_rate": 6.3458029961427355e-09, "loss": 0.0234, "step": 128980 }, { "epoch": 1.9797406185250557, "grad_norm": 0.42445072531700134, "learning_rate": 6.25074603529785e-09, "loss": 0.026, "step": 128990 }, { "epoch": 1.9798940986877445, "grad_norm": 0.3273056149482727, "learning_rate": 6.156406187596276e-09, "loss": 0.0227, "step": 129000 }, { "epoch": 1.9800475788504337, "grad_norm": 0.2857319116592407, "learning_rate": 6.062783459809263e-09, "loss": 0.0172, "step": 129010 }, { "epoch": 1.9802010590131225, "grad_norm": 0.5264992117881775, "learning_rate": 5.969877858653661e-09, "loss": 0.0275, "step": 129020 }, { "epoch": 1.9803545391758115, "grad_norm": 0.3600061237812042, "learning_rate": 5.877689390797469e-09, "loss": 0.0248, "step": 129030 }, { "epoch": 1.9805080193385005, "grad_norm": 0.32735902070999146, "learning_rate": 5.786218062854287e-09, "loss": 0.0248, "step": 129040 }, { "epoch": 1.9806614995011895, "grad_norm": 0.3745036721229553, "learning_rate": 5.695463881387753e-09, "loss": 0.0234, "step": 129050 }, { "epoch": 1.9808149796638785, "grad_norm": 0.39193975925445557, "learning_rate": 5.605426852910434e-09, "loss": 0.0243, "step": 129060 }, { "epoch": 1.9809684598265673, "grad_norm": 0.39395326375961304, "learning_rate": 5.516106983883829e-09, "loss": 0.0257, "step": 129070 }, { "epoch": 1.9811219399892563, "grad_norm": 0.3732548654079437, "learning_rate": 5.427504280716145e-09, "loss": 0.0288, "step": 129080 }, { "epoch": 1.9812754201519454, "grad_norm": 0.43808814883232117, "learning_rate": 5.339618749765629e-09, "loss": 0.0318, "step": 129090 }, { "epoch": 1.9814289003146344, "grad_norm": 0.412129282951355, "learning_rate": 5.252450397338349e-09, "loss": 0.0226, "step": 129100 }, { "epoch": 1.9815823804773234, "grad_norm": 0.35844069719314575, "learning_rate": 5.16599922969041e-09, "loss": 0.0214, "step": 129110 }, { "epoch": 1.9817358606400122, "grad_norm": 0.28616413474082947, "learning_rate": 5.08026525302241e-09, "loss": 0.0232, "step": 129120 }, { "epoch": 1.9818893408027014, "grad_norm": 0.5438333749771118, "learning_rate": 4.995248473489422e-09, "loss": 0.0274, "step": 129130 }, { "epoch": 1.9820428209653902, "grad_norm": 0.5606100559234619, "learning_rate": 4.910948897189904e-09, "loss": 0.0267, "step": 129140 }, { "epoch": 1.9821963011280792, "grad_norm": 0.41076788306236267, "learning_rate": 4.82736653017346e-09, "loss": 0.026, "step": 129150 }, { "epoch": 1.9823497812907682, "grad_norm": 0.3502900302410126, "learning_rate": 4.744501378437516e-09, "loss": 0.0271, "step": 129160 }, { "epoch": 1.982503261453457, "grad_norm": 0.2692495882511139, "learning_rate": 4.662353447928425e-09, "loss": 0.0203, "step": 129170 }, { "epoch": 1.9826567416161462, "grad_norm": 0.30058878660202026, "learning_rate": 4.5809227445403614e-09, "loss": 0.0284, "step": 129180 }, { "epoch": 1.982810221778835, "grad_norm": 0.32777833938598633, "learning_rate": 4.50020927411754e-09, "loss": 0.0199, "step": 129190 }, { "epoch": 1.982963701941524, "grad_norm": 0.4872741103172302, "learning_rate": 4.420213042449773e-09, "loss": 0.0266, "step": 129200 }, { "epoch": 1.983117182104213, "grad_norm": 0.3229926526546478, "learning_rate": 4.340934055279133e-09, "loss": 0.0212, "step": 129210 }, { "epoch": 1.9832706622669019, "grad_norm": 0.42198851704597473, "learning_rate": 4.262372318294405e-09, "loss": 0.0256, "step": 129220 }, { "epoch": 1.983424142429591, "grad_norm": 0.3226548433303833, "learning_rate": 4.1845278371310805e-09, "loss": 0.0205, "step": 129230 }, { "epoch": 1.9835776225922799, "grad_norm": 0.32387033104896545, "learning_rate": 4.107400617376911e-09, "loss": 0.027, "step": 129240 }, { "epoch": 1.983731102754969, "grad_norm": 0.29717281460762024, "learning_rate": 4.030990664564139e-09, "loss": 0.0225, "step": 129250 }, { "epoch": 1.983884582917658, "grad_norm": 0.36099299788475037, "learning_rate": 3.9552979841783745e-09, "loss": 0.0258, "step": 129260 }, { "epoch": 1.984038063080347, "grad_norm": 0.3359839618206024, "learning_rate": 3.88032258164861e-09, "loss": 0.0262, "step": 129270 }, { "epoch": 1.984191543243036, "grad_norm": 0.5632138848304749, "learning_rate": 3.806064462356096e-09, "loss": 0.0217, "step": 129280 }, { "epoch": 1.9843450234057247, "grad_norm": 0.32780757546424866, "learning_rate": 3.732523631628792e-09, "loss": 0.0219, "step": 129290 }, { "epoch": 1.9844985035684137, "grad_norm": 0.33121979236602783, "learning_rate": 3.65970009474359e-09, "loss": 0.0203, "step": 129300 }, { "epoch": 1.9846519837311027, "grad_norm": 0.4852220118045807, "learning_rate": 3.587593856926308e-09, "loss": 0.0211, "step": 129310 }, { "epoch": 1.9848054638937918, "grad_norm": 0.3597370982170105, "learning_rate": 3.516204923351696e-09, "loss": 0.0211, "step": 129320 }, { "epoch": 1.9849589440564808, "grad_norm": 0.4615146219730377, "learning_rate": 3.445533299141213e-09, "loss": 0.0256, "step": 129330 }, { "epoch": 1.9851124242191696, "grad_norm": 0.8662866353988647, "learning_rate": 3.375578989366357e-09, "loss": 0.0236, "step": 129340 }, { "epoch": 1.9852659043818588, "grad_norm": 0.2940504252910614, "learning_rate": 3.3063419990464475e-09, "loss": 0.0185, "step": 129350 }, { "epoch": 1.9854193845445476, "grad_norm": 0.3838654160499573, "learning_rate": 3.237822333150842e-09, "loss": 0.0233, "step": 129360 }, { "epoch": 1.9855728647072366, "grad_norm": 0.4503154158592224, "learning_rate": 3.170019996595608e-09, "loss": 0.0231, "step": 129370 }, { "epoch": 1.9857263448699256, "grad_norm": 0.25598445534706116, "learning_rate": 3.102934994244633e-09, "loss": 0.0253, "step": 129380 }, { "epoch": 1.9858798250326144, "grad_norm": 0.3171481490135193, "learning_rate": 3.0365673309140642e-09, "loss": 0.0253, "step": 129390 }, { "epoch": 1.9860333051953036, "grad_norm": 0.35003235936164856, "learning_rate": 2.970917011364538e-09, "loss": 0.0238, "step": 129400 }, { "epoch": 1.9861867853579924, "grad_norm": 0.3309192359447479, "learning_rate": 2.9059840403078408e-09, "loss": 0.023, "step": 129410 }, { "epoch": 1.9863402655206814, "grad_norm": 0.39141878485679626, "learning_rate": 2.841768422402469e-09, "loss": 0.0274, "step": 129420 }, { "epoch": 1.9864937456833704, "grad_norm": 0.3270370066165924, "learning_rate": 2.7782701622569574e-09, "loss": 0.0219, "step": 129430 }, { "epoch": 1.9866472258460592, "grad_norm": 0.3774773180484772, "learning_rate": 2.7154892644265517e-09, "loss": 0.0234, "step": 129440 }, { "epoch": 1.9868007060087485, "grad_norm": 0.26792511343955994, "learning_rate": 2.653425733417647e-09, "loss": 0.0248, "step": 129450 }, { "epoch": 1.9869541861714373, "grad_norm": 0.2590532898902893, "learning_rate": 2.5920795736833484e-09, "loss": 0.024, "step": 129460 }, { "epoch": 1.9871076663341263, "grad_norm": 0.28425535559654236, "learning_rate": 2.5314507896245787e-09, "loss": 0.0308, "step": 129470 }, { "epoch": 1.9872611464968153, "grad_norm": 0.2562273144721985, "learning_rate": 2.471539385592303e-09, "loss": 0.0285, "step": 129480 }, { "epoch": 1.9874146266595043, "grad_norm": 0.3694550395011902, "learning_rate": 2.4123453658864148e-09, "loss": 0.0227, "step": 129490 }, { "epoch": 1.9875681068221933, "grad_norm": 0.3506389558315277, "learning_rate": 2.3538687347546273e-09, "loss": 0.0317, "step": 129500 }, { "epoch": 1.987721586984882, "grad_norm": 0.5279538631439209, "learning_rate": 2.2961094963913635e-09, "loss": 0.0193, "step": 129510 }, { "epoch": 1.9878750671475713, "grad_norm": 0.2948639690876007, "learning_rate": 2.239067654942195e-09, "loss": 0.0237, "step": 129520 }, { "epoch": 1.9880285473102601, "grad_norm": 0.4043794572353363, "learning_rate": 2.1827432144994054e-09, "loss": 0.0294, "step": 129530 }, { "epoch": 1.9881820274729491, "grad_norm": 0.35869500041007996, "learning_rate": 2.127136179106426e-09, "loss": 0.0223, "step": 129540 }, { "epoch": 1.9883355076356382, "grad_norm": 0.5081074237823486, "learning_rate": 2.072246552751178e-09, "loss": 0.0261, "step": 129550 }, { "epoch": 1.988488987798327, "grad_norm": 0.43973392248153687, "learning_rate": 2.0180743393738434e-09, "loss": 0.0288, "step": 129560 }, { "epoch": 1.9886424679610162, "grad_norm": 0.3901899456977844, "learning_rate": 1.9646195428613122e-09, "loss": 0.0237, "step": 129570 }, { "epoch": 1.988795948123705, "grad_norm": 0.29245510697364807, "learning_rate": 1.911882167049406e-09, "loss": 0.0223, "step": 129580 }, { "epoch": 1.988949428286394, "grad_norm": 0.3660264015197754, "learning_rate": 1.8598622157217638e-09, "loss": 0.0237, "step": 129590 }, { "epoch": 1.989102908449083, "grad_norm": 0.3257026672363281, "learning_rate": 1.8085596926120663e-09, "loss": 0.0322, "step": 129600 }, { "epoch": 1.9892563886117718, "grad_norm": 0.5067374110221863, "learning_rate": 1.7579746014007027e-09, "loss": 0.0273, "step": 129610 }, { "epoch": 1.989409868774461, "grad_norm": 0.26612550020217896, "learning_rate": 1.7081069457169918e-09, "loss": 0.019, "step": 129620 }, { "epoch": 1.9895633489371498, "grad_norm": 0.4992109537124634, "learning_rate": 1.6589567291414032e-09, "loss": 0.0257, "step": 129630 }, { "epoch": 1.9897168290998388, "grad_norm": 0.3502905070781708, "learning_rate": 1.6105239551988948e-09, "loss": 0.0176, "step": 129640 }, { "epoch": 1.9898703092625278, "grad_norm": 0.3391066789627075, "learning_rate": 1.5628086273644648e-09, "loss": 0.0282, "step": 129650 }, { "epoch": 1.9900237894252166, "grad_norm": 0.32742971181869507, "learning_rate": 1.5158107490631514e-09, "loss": 0.0218, "step": 129660 }, { "epoch": 1.9901772695879059, "grad_norm": 0.31700846552848816, "learning_rate": 1.4695303236678117e-09, "loss": 0.0232, "step": 129670 }, { "epoch": 1.9903307497505947, "grad_norm": 0.29011112451553345, "learning_rate": 1.4239673544980125e-09, "loss": 0.0189, "step": 129680 }, { "epoch": 1.9904842299132837, "grad_norm": 0.5744863152503967, "learning_rate": 1.3791218448244714e-09, "loss": 0.0232, "step": 129690 }, { "epoch": 1.9906377100759727, "grad_norm": 0.25169795751571655, "learning_rate": 1.334993797863504e-09, "loss": 0.0263, "step": 129700 }, { "epoch": 1.9907911902386617, "grad_norm": 0.2865278422832489, "learning_rate": 1.291583216782577e-09, "loss": 0.0222, "step": 129710 }, { "epoch": 1.9909446704013507, "grad_norm": 0.22122928500175476, "learning_rate": 1.248890104696976e-09, "loss": 0.0257, "step": 129720 }, { "epoch": 1.9910981505640395, "grad_norm": 0.34335729479789734, "learning_rate": 1.2069144646698061e-09, "loss": 0.0234, "step": 129730 }, { "epoch": 1.9912516307267287, "grad_norm": 0.35718318819999695, "learning_rate": 1.1656562997131027e-09, "loss": 0.0214, "step": 129740 }, { "epoch": 1.9914051108894175, "grad_norm": 0.4015505313873291, "learning_rate": 1.1251156127867202e-09, "loss": 0.0301, "step": 129750 }, { "epoch": 1.9915585910521065, "grad_norm": 0.4053427577018738, "learning_rate": 1.0852924068005533e-09, "loss": 0.0252, "step": 129760 }, { "epoch": 1.9917120712147955, "grad_norm": 0.48530617356300354, "learning_rate": 1.0461866846123159e-09, "loss": 0.0241, "step": 129770 }, { "epoch": 1.9918655513774843, "grad_norm": 0.3516625165939331, "learning_rate": 1.0077984490264315e-09, "loss": 0.0225, "step": 129780 }, { "epoch": 1.9920190315401736, "grad_norm": 0.26688045263290405, "learning_rate": 9.701277027995836e-10, "loss": 0.0236, "step": 129790 }, { "epoch": 1.9921725117028624, "grad_norm": 0.3752768933773041, "learning_rate": 9.331744486351658e-10, "loss": 0.0273, "step": 129800 }, { "epoch": 1.9923259918655514, "grad_norm": 0.2825721502304077, "learning_rate": 8.969386891821696e-10, "loss": 0.0277, "step": 129810 }, { "epoch": 1.9924794720282404, "grad_norm": 0.3434578478336334, "learning_rate": 8.614204270429582e-10, "loss": 0.0256, "step": 129820 }, { "epoch": 1.9926329521909292, "grad_norm": 0.22879508137702942, "learning_rate": 8.266196647654934e-10, "loss": 0.0207, "step": 129830 }, { "epoch": 1.9927864323536184, "grad_norm": 0.3449459969997406, "learning_rate": 7.925364048466666e-10, "loss": 0.0294, "step": 129840 }, { "epoch": 1.9929399125163072, "grad_norm": 0.45853838324546814, "learning_rate": 7.591706497322993e-10, "loss": 0.023, "step": 129850 }, { "epoch": 1.9930933926789962, "grad_norm": 0.4303109645843506, "learning_rate": 7.265224018171424e-10, "loss": 0.0252, "step": 129860 }, { "epoch": 1.9932468728416852, "grad_norm": 0.3717004060745239, "learning_rate": 6.945916634426564e-10, "loss": 0.0282, "step": 129870 }, { "epoch": 1.9934003530043742, "grad_norm": 0.4020913541316986, "learning_rate": 6.633784369014518e-10, "loss": 0.0276, "step": 129880 }, { "epoch": 1.9935538331670632, "grad_norm": 0.3275836706161499, "learning_rate": 6.328827244328484e-10, "loss": 0.029, "step": 129890 }, { "epoch": 1.993707313329752, "grad_norm": 0.372345894575119, "learning_rate": 6.031045282250958e-10, "loss": 0.0257, "step": 129900 }, { "epoch": 1.993860793492441, "grad_norm": 0.2815609276294708, "learning_rate": 5.740438504153734e-10, "loss": 0.0225, "step": 129910 }, { "epoch": 1.99401427365513, "grad_norm": 0.3628309369087219, "learning_rate": 5.457006930875697e-10, "loss": 0.0246, "step": 129920 }, { "epoch": 1.994167753817819, "grad_norm": 0.2613752484321594, "learning_rate": 5.180750582756133e-10, "loss": 0.0268, "step": 129930 }, { "epoch": 1.994321233980508, "grad_norm": 0.25722089409828186, "learning_rate": 4.911669479634729e-10, "loss": 0.0214, "step": 129940 }, { "epoch": 1.9944747141431969, "grad_norm": 0.3994995653629303, "learning_rate": 4.6497636408182613e-10, "loss": 0.0274, "step": 129950 }, { "epoch": 1.9946281943058861, "grad_norm": 0.2866731882095337, "learning_rate": 4.3950330850806023e-10, "loss": 0.0208, "step": 129960 }, { "epoch": 1.994781674468575, "grad_norm": 0.4276840388774872, "learning_rate": 4.147477830718227e-10, "loss": 0.0269, "step": 129970 }, { "epoch": 1.994935154631264, "grad_norm": 0.3465040922164917, "learning_rate": 3.9070978954836025e-10, "loss": 0.0211, "step": 129980 }, { "epoch": 1.995088634793953, "grad_norm": 0.35283398628234863, "learning_rate": 3.673893296629594e-10, "loss": 0.0293, "step": 129990 }, { "epoch": 1.9952421149566417, "grad_norm": 0.44387784600257874, "learning_rate": 3.447864050898364e-10, "loss": 0.0235, "step": 130000 }, { "epoch": 1.995395595119331, "grad_norm": 0.2284272015094757, "learning_rate": 3.2290101744880675e-10, "loss": 0.0231, "step": 130010 }, { "epoch": 1.9955490752820197, "grad_norm": 0.3610498309135437, "learning_rate": 3.0173316831305645e-10, "loss": 0.0262, "step": 130020 }, { "epoch": 1.9957025554447088, "grad_norm": 0.3276050388813019, "learning_rate": 2.812828591991501e-10, "loss": 0.0315, "step": 130030 }, { "epoch": 1.9958560356073978, "grad_norm": 0.7428007125854492, "learning_rate": 2.6155009157591284e-10, "loss": 0.0284, "step": 130040 }, { "epoch": 1.9960095157700866, "grad_norm": 0.3870335519313812, "learning_rate": 2.425348668577687e-10, "loss": 0.0352, "step": 130050 }, { "epoch": 1.9961629959327758, "grad_norm": 0.3287801444530487, "learning_rate": 2.2423718641140235e-10, "loss": 0.0181, "step": 130060 }, { "epoch": 1.9963164760954646, "grad_norm": 0.28428781032562256, "learning_rate": 2.066570515479871e-10, "loss": 0.0204, "step": 130070 }, { "epoch": 1.9964699562581536, "grad_norm": 0.3882981538772583, "learning_rate": 1.8979446352984653e-10, "loss": 0.0221, "step": 130080 }, { "epoch": 1.9966234364208426, "grad_norm": 0.313465416431427, "learning_rate": 1.7364942356712376e-10, "loss": 0.0258, "step": 130090 }, { "epoch": 1.9967769165835316, "grad_norm": 0.4006885588169098, "learning_rate": 1.582219328177814e-10, "loss": 0.0326, "step": 130100 }, { "epoch": 1.9969303967462206, "grad_norm": 0.26091575622558594, "learning_rate": 1.4351199238871183e-10, "loss": 0.0221, "step": 130110 }, { "epoch": 1.9970838769089094, "grad_norm": 0.4735684096813202, "learning_rate": 1.295196033357371e-10, "loss": 0.0267, "step": 130120 }, { "epoch": 1.9972373570715987, "grad_norm": 0.19130878150463104, "learning_rate": 1.1624476666249884e-10, "loss": 0.0237, "step": 130130 }, { "epoch": 1.9973908372342875, "grad_norm": 0.4311712682247162, "learning_rate": 1.0368748332267864e-10, "loss": 0.0269, "step": 130140 }, { "epoch": 1.9975443173969765, "grad_norm": 0.4574798047542572, "learning_rate": 9.184775421666737e-11, "loss": 0.0237, "step": 130150 }, { "epoch": 1.9976977975596655, "grad_norm": 0.35597094893455505, "learning_rate": 8.072558019378563e-11, "loss": 0.0308, "step": 130160 }, { "epoch": 1.9978512777223543, "grad_norm": 0.3669886291027069, "learning_rate": 7.032096205117356e-11, "loss": 0.0301, "step": 130170 }, { "epoch": 1.9980047578850435, "grad_norm": 0.251632422208786, "learning_rate": 6.063390053823171e-11, "loss": 0.0219, "step": 130180 }, { "epoch": 1.9981582380477323, "grad_norm": 0.30543646216392517, "learning_rate": 5.166439634773923e-11, "loss": 0.022, "step": 130190 }, { "epoch": 1.9983117182104213, "grad_norm": 0.3739526867866516, "learning_rate": 4.341245012362549e-11, "loss": 0.0267, "step": 130200 }, { "epoch": 1.9984651983731103, "grad_norm": 0.3087455630302429, "learning_rate": 3.587806245874959e-11, "loss": 0.0252, "step": 130210 }, { "epoch": 1.998618678535799, "grad_norm": 0.2547456920146942, "learning_rate": 2.906123389379012e-11, "loss": 0.0222, "step": 130220 }, { "epoch": 1.9987721586984883, "grad_norm": 0.4561668038368225, "learning_rate": 2.2961964916135005e-11, "loss": 0.0216, "step": 130230 }, { "epoch": 1.9989256388611771, "grad_norm": 0.2519993484020233, "learning_rate": 1.758025596654278e-11, "loss": 0.0197, "step": 130240 }, { "epoch": 1.9990791190238661, "grad_norm": 0.3474481701850891, "learning_rate": 1.2916107428040391e-11, "loss": 0.0218, "step": 130250 }, { "epoch": 1.9992325991865552, "grad_norm": 0.4284724295139313, "learning_rate": 8.969519637025415e-12, "loss": 0.027, "step": 130260 }, { "epoch": 1.999386079349244, "grad_norm": 0.4485073983669281, "learning_rate": 5.7404928766047194e-12, "loss": 0.026, "step": 130270 }, { "epoch": 1.9995395595119332, "grad_norm": 0.2979697585105896, "learning_rate": 3.2290273788149195e-12, "loss": 0.0357, "step": 130280 }, { "epoch": 1.999693039674622, "grad_norm": 0.47274136543273926, "learning_rate": 1.4351233224019212e-12, "loss": 0.0253, "step": 130290 }, { "epoch": 1.999846519837311, "grad_norm": 0.2670418322086334, "learning_rate": 3.5878083726181847e-13, "loss": 0.0276, "step": 130300 }, { "epoch": 2.0, "grad_norm": 0.3894950747489929, "learning_rate": 0.0, "loss": 0.0168, "step": 130310 }, { "epoch": 2.0, "step": 130310, "total_flos": 1.1290772041004745e+19, "train_loss": 0.04083466301337325, "train_runtime": 266334.2035, "train_samples_per_second": 7.828, "train_steps_per_second": 0.489 } ], "logging_steps": 10, "max_steps": 130310, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 62000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1290772041004745e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }