{ "best_global_step": 227000, "best_metric": 0.36603018641471863, "best_model_checkpoint": "path_to_checkpoint", "epoch": 1.0, "eval_steps": 1000, "global_step": 232926, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.293209002000636e-05, "grad_norm": 7.446129322052002, "learning_rate": 9e-07, "loss": 2.1290369033813477, "step": 10 }, { "epoch": 8.586418004001271e-05, "grad_norm": 3.0518405437469482, "learning_rate": 1.9e-06, "loss": 2.087772560119629, "step": 20 }, { "epoch": 0.00012879627006001906, "grad_norm": 7.433437824249268, "learning_rate": 2.9e-06, "loss": 2.093190383911133, "step": 30 }, { "epoch": 0.00017172836008002543, "grad_norm": 9.070439338684082, "learning_rate": 3.9e-06, "loss": 2.0560066223144533, "step": 40 }, { "epoch": 0.00021466045010003177, "grad_norm": 6.858762264251709, "learning_rate": 4.9000000000000005e-06, "loss": 2.1954065322875977, "step": 50 }, { "epoch": 0.0002575925401200381, "grad_norm": 13.182660102844238, "learning_rate": 5.9e-06, "loss": 1.8956178665161132, "step": 60 }, { "epoch": 0.00030052463014004446, "grad_norm": 2.741999387741089, "learning_rate": 6.900000000000001e-06, "loss": 1.8288171768188477, "step": 70 }, { "epoch": 0.00034345672016005086, "grad_norm": 9.149059295654297, "learning_rate": 7.9e-06, "loss": 1.4354008674621581, "step": 80 }, { "epoch": 0.0003863888101800572, "grad_norm": 1.0485624074935913, "learning_rate": 8.9e-06, "loss": 0.9927331924438476, "step": 90 }, { "epoch": 0.00042932090020006354, "grad_norm": 4.739808082580566, "learning_rate": 9.900000000000002e-06, "loss": 0.8089075088500977, "step": 100 }, { "epoch": 0.0004722529902200699, "grad_norm": 7.053487777709961, "learning_rate": 1.09e-05, "loss": 0.7783641815185547, "step": 110 }, { "epoch": 0.0005151850802400762, "grad_norm": 3.860586404800415, "learning_rate": 1.19e-05, "loss": 0.5170607089996337, "step": 120 }, { "epoch": 0.0005581171702600826, "grad_norm": 4.848254680633545, "learning_rate": 1.29e-05, "loss": 0.7371084213256835, "step": 130 }, { "epoch": 0.0006010492602800889, "grad_norm": 1.8437591791152954, "learning_rate": 1.3900000000000002e-05, "loss": 0.8458614349365234, "step": 140 }, { "epoch": 0.0006439813503000953, "grad_norm": 0.7110176682472229, "learning_rate": 1.49e-05, "loss": 0.7368865489959717, "step": 150 }, { "epoch": 0.0006869134403201017, "grad_norm": 6.293887615203857, "learning_rate": 1.59e-05, "loss": 0.46790356636047364, "step": 160 }, { "epoch": 0.000729845530340108, "grad_norm": 2.7968358993530273, "learning_rate": 1.69e-05, "loss": 0.37965471744537355, "step": 170 }, { "epoch": 0.0007727776203601144, "grad_norm": 4.619741916656494, "learning_rate": 1.79e-05, "loss": 0.33707737922668457, "step": 180 }, { "epoch": 0.0008157097103801207, "grad_norm": 13.043880462646484, "learning_rate": 1.8900000000000002e-05, "loss": 0.21272482872009277, "step": 190 }, { "epoch": 0.0008586418004001271, "grad_norm": 2.6656389236450195, "learning_rate": 1.9900000000000003e-05, "loss": 0.5739723205566406, "step": 200 }, { "epoch": 0.0009015738904201335, "grad_norm": 0.2941681146621704, "learning_rate": 2.09e-05, "loss": 0.3597684860229492, "step": 210 }, { "epoch": 0.0009445059804401398, "grad_norm": 0.010510088875889778, "learning_rate": 2.19e-05, "loss": 0.4325720310211182, "step": 220 }, { "epoch": 0.0009874380704601462, "grad_norm": 2.2361772060394287, "learning_rate": 2.29e-05, "loss": 0.4555656433105469, "step": 230 }, { "epoch": 0.0010303701604801525, "grad_norm": 1.4009690284729004, "learning_rate": 2.39e-05, "loss": 0.535375452041626, "step": 240 }, { "epoch": 0.0010733022505001587, "grad_norm": 3.090742826461792, "learning_rate": 2.4900000000000002e-05, "loss": 0.4737332820892334, "step": 250 }, { "epoch": 0.0011162343405201653, "grad_norm": 1.8690292835235596, "learning_rate": 2.5900000000000003e-05, "loss": 0.3506669521331787, "step": 260 }, { "epoch": 0.0011591664305401715, "grad_norm": 2.552485942840576, "learning_rate": 2.6900000000000003e-05, "loss": 0.3364131212234497, "step": 270 }, { "epoch": 0.0012020985205601778, "grad_norm": 0.12322066724300385, "learning_rate": 2.7900000000000004e-05, "loss": 0.5483479976654053, "step": 280 }, { "epoch": 0.0012450306105801843, "grad_norm": 0.7322730422019958, "learning_rate": 2.8899999999999998e-05, "loss": 0.48665218353271483, "step": 290 }, { "epoch": 0.0012879627006001906, "grad_norm": 1.7751740217208862, "learning_rate": 2.9900000000000002e-05, "loss": 0.40238499641418457, "step": 300 }, { "epoch": 0.001330894790620197, "grad_norm": 0.5181493759155273, "learning_rate": 3.09e-05, "loss": 0.35978949069976807, "step": 310 }, { "epoch": 0.0013738268806402034, "grad_norm": 1.502162218093872, "learning_rate": 3.19e-05, "loss": 0.32965424060821535, "step": 320 }, { "epoch": 0.0014167589706602097, "grad_norm": 2.794450283050537, "learning_rate": 3.29e-05, "loss": 0.3894787311553955, "step": 330 }, { "epoch": 0.001459691060680216, "grad_norm": 2.374486207962036, "learning_rate": 3.3900000000000004e-05, "loss": 0.40500693321228026, "step": 340 }, { "epoch": 0.0015026231507002223, "grad_norm": 0.6215456128120422, "learning_rate": 3.49e-05, "loss": 0.26663985252380373, "step": 350 }, { "epoch": 0.0015455552407202288, "grad_norm": 18.548954010009766, "learning_rate": 3.59e-05, "loss": 0.4955165386199951, "step": 360 }, { "epoch": 0.001588487330740235, "grad_norm": 0.000909608555957675, "learning_rate": 3.69e-05, "loss": 0.45831050872802737, "step": 370 }, { "epoch": 0.0016314194207602414, "grad_norm": 0.026904495432972908, "learning_rate": 3.79e-05, "loss": 0.43085713386535646, "step": 380 }, { "epoch": 0.0016743515107802479, "grad_norm": 0.03139101713895798, "learning_rate": 3.8900000000000004e-05, "loss": 0.2645646810531616, "step": 390 }, { "epoch": 0.0017172836008002542, "grad_norm": 2.005051374435425, "learning_rate": 3.99e-05, "loss": 0.4847880363464355, "step": 400 }, { "epoch": 0.0017602156908202605, "grad_norm": 2.000047445297241, "learning_rate": 4.09e-05, "loss": 0.4927666664123535, "step": 410 }, { "epoch": 0.001803147780840267, "grad_norm": 2.906313896179199, "learning_rate": 4.19e-05, "loss": 0.4311686038970947, "step": 420 }, { "epoch": 0.0018460798708602733, "grad_norm": 0.0084048667922616, "learning_rate": 4.29e-05, "loss": 0.3550480127334595, "step": 430 }, { "epoch": 0.0018890119608802795, "grad_norm": 0.01702202670276165, "learning_rate": 4.39e-05, "loss": 0.20395941734313966, "step": 440 }, { "epoch": 0.0019319440509002858, "grad_norm": 2.0575366020202637, "learning_rate": 4.49e-05, "loss": 0.44746909141540525, "step": 450 }, { "epoch": 0.0019748761409202923, "grad_norm": 12.149165153503418, "learning_rate": 4.5900000000000004e-05, "loss": 0.3470444917678833, "step": 460 }, { "epoch": 0.002017808230940299, "grad_norm": 6.969229698181152, "learning_rate": 4.69e-05, "loss": 0.15598635673522948, "step": 470 }, { "epoch": 0.002060740320960305, "grad_norm": 0.15074250102043152, "learning_rate": 4.79e-05, "loss": 0.25433228015899656, "step": 480 }, { "epoch": 0.0021036724109803114, "grad_norm": 0.2592933773994446, "learning_rate": 4.89e-05, "loss": 0.30282485485076904, "step": 490 }, { "epoch": 0.0021466045010003175, "grad_norm": 8.743586540222168, "learning_rate": 4.99e-05, "loss": 0.31165275573730467, "step": 500 }, { "epoch": 0.002189536591020324, "grad_norm": 0.11677571386098862, "learning_rate": 5.0900000000000004e-05, "loss": 0.5364396095275878, "step": 510 }, { "epoch": 0.0022324686810403305, "grad_norm": 0.05715586617588997, "learning_rate": 5.19e-05, "loss": 0.3230873107910156, "step": 520 }, { "epoch": 0.0022754007710603366, "grad_norm": 2.341602325439453, "learning_rate": 5.2900000000000005e-05, "loss": 0.3851492166519165, "step": 530 }, { "epoch": 0.002318332861080343, "grad_norm": 8.533411979675293, "learning_rate": 5.390000000000001e-05, "loss": 0.8554889678955078, "step": 540 }, { "epoch": 0.0023612649511003496, "grad_norm": 0.472126305103302, "learning_rate": 5.4900000000000006e-05, "loss": 0.24252398014068605, "step": 550 }, { "epoch": 0.0024041970411203557, "grad_norm": 6.999728679656982, "learning_rate": 5.590000000000001e-05, "loss": 0.3368447065353394, "step": 560 }, { "epoch": 0.002447129131140362, "grad_norm": 0.036653582006692886, "learning_rate": 5.69e-05, "loss": 0.2939608573913574, "step": 570 }, { "epoch": 0.0024900612211603687, "grad_norm": 0.19750595092773438, "learning_rate": 5.79e-05, "loss": 0.2020362138748169, "step": 580 }, { "epoch": 0.0025329933111803747, "grad_norm": 1.7139503955841064, "learning_rate": 5.89e-05, "loss": 0.2586077690124512, "step": 590 }, { "epoch": 0.0025759254012003813, "grad_norm": 0.024881673976778984, "learning_rate": 5.99e-05, "loss": 0.1620743155479431, "step": 600 }, { "epoch": 0.0026188574912203878, "grad_norm": 1.9613510370254517, "learning_rate": 6.09e-05, "loss": 0.4541365623474121, "step": 610 }, { "epoch": 0.002661789581240394, "grad_norm": 0.010247381404042244, "learning_rate": 6.19e-05, "loss": 0.3532832384109497, "step": 620 }, { "epoch": 0.0027047216712604003, "grad_norm": 1.9921550750732422, "learning_rate": 6.29e-05, "loss": 0.48864259719848635, "step": 630 }, { "epoch": 0.002747653761280407, "grad_norm": 0.03684716299176216, "learning_rate": 6.390000000000001e-05, "loss": 0.2768329858779907, "step": 640 }, { "epoch": 0.002790585851300413, "grad_norm": 0.04419621825218201, "learning_rate": 6.49e-05, "loss": 0.29871292114257814, "step": 650 }, { "epoch": 0.0028335179413204194, "grad_norm": 0.4213247299194336, "learning_rate": 6.59e-05, "loss": 0.32069456577301025, "step": 660 }, { "epoch": 0.002876450031340426, "grad_norm": 1.8658652305603027, "learning_rate": 6.690000000000001e-05, "loss": 0.49007110595703124, "step": 670 }, { "epoch": 0.002919382121360432, "grad_norm": 0.02149110846221447, "learning_rate": 6.790000000000001e-05, "loss": 0.5062370300292969, "step": 680 }, { "epoch": 0.0029623142113804385, "grad_norm": 1.4124505519866943, "learning_rate": 6.89e-05, "loss": 0.38771054744720457, "step": 690 }, { "epoch": 0.0030052463014004446, "grad_norm": 0.020706655457615852, "learning_rate": 6.99e-05, "loss": 0.25068256855010984, "step": 700 }, { "epoch": 0.003048178391420451, "grad_norm": 2.2183375358581543, "learning_rate": 7.09e-05, "loss": 0.49508442878723147, "step": 710 }, { "epoch": 0.0030911104814404576, "grad_norm": 6.63649320602417, "learning_rate": 7.19e-05, "loss": 0.478483772277832, "step": 720 }, { "epoch": 0.0031340425714604637, "grad_norm": 1.0420591831207275, "learning_rate": 7.29e-05, "loss": 0.497357177734375, "step": 730 }, { "epoch": 0.00317697466148047, "grad_norm": 3.652012348175049, "learning_rate": 7.390000000000001e-05, "loss": 0.2723304033279419, "step": 740 }, { "epoch": 0.0032199067515004767, "grad_norm": 2.3289570808410645, "learning_rate": 7.49e-05, "loss": 0.6125946521759034, "step": 750 }, { "epoch": 0.0032628388415204827, "grad_norm": 1.3651496171951294, "learning_rate": 7.59e-05, "loss": 0.3251293659210205, "step": 760 }, { "epoch": 0.0033057709315404893, "grad_norm": 1.72046959400177, "learning_rate": 7.69e-05, "loss": 0.32114553451538086, "step": 770 }, { "epoch": 0.0033487030215604958, "grad_norm": 1.1918679475784302, "learning_rate": 7.790000000000001e-05, "loss": 0.41199021339416503, "step": 780 }, { "epoch": 0.003391635111580502, "grad_norm": 0.03947390615940094, "learning_rate": 7.890000000000001e-05, "loss": 0.03135415315628052, "step": 790 }, { "epoch": 0.0034345672016005083, "grad_norm": 1.5788650512695312, "learning_rate": 7.99e-05, "loss": 0.5670211315155029, "step": 800 }, { "epoch": 0.003477499291620515, "grad_norm": 0.019160764291882515, "learning_rate": 8.090000000000001e-05, "loss": 0.2667243242263794, "step": 810 }, { "epoch": 0.003520431381640521, "grad_norm": 5.5093231201171875, "learning_rate": 8.19e-05, "loss": 0.4863880157470703, "step": 820 }, { "epoch": 0.0035633634716605274, "grad_norm": 1.7203266620635986, "learning_rate": 8.29e-05, "loss": 0.23645930290222167, "step": 830 }, { "epoch": 0.003606295561680534, "grad_norm": 1.8550962209701538, "learning_rate": 8.39e-05, "loss": 0.32827005386352537, "step": 840 }, { "epoch": 0.00364922765170054, "grad_norm": 0.002791638718917966, "learning_rate": 8.49e-05, "loss": 0.23109066486358643, "step": 850 }, { "epoch": 0.0036921597417205465, "grad_norm": 0.0013374650152400136, "learning_rate": 8.59e-05, "loss": 0.310694146156311, "step": 860 }, { "epoch": 0.0037350918317405526, "grad_norm": 2.3354532718658447, "learning_rate": 8.69e-05, "loss": 0.48372364044189453, "step": 870 }, { "epoch": 0.003778023921760559, "grad_norm": 5.989701747894287, "learning_rate": 8.790000000000001e-05, "loss": 0.3159054756164551, "step": 880 }, { "epoch": 0.0038209560117805656, "grad_norm": 1.735064148902893, "learning_rate": 8.89e-05, "loss": 0.34026777744293213, "step": 890 }, { "epoch": 0.0038638881018005717, "grad_norm": 0.031852539628744125, "learning_rate": 8.99e-05, "loss": 0.08108786344528199, "step": 900 }, { "epoch": 0.003906820191820579, "grad_norm": 0.03357387334108353, "learning_rate": 9.090000000000001e-05, "loss": 0.46551074981689455, "step": 910 }, { "epoch": 0.003949752281840585, "grad_norm": 0.010642527602612972, "learning_rate": 9.190000000000001e-05, "loss": 0.4507905006408691, "step": 920 }, { "epoch": 0.003992684371860591, "grad_norm": 0.9609713554382324, "learning_rate": 9.290000000000001e-05, "loss": 0.202797269821167, "step": 930 }, { "epoch": 0.004035616461880598, "grad_norm": 2.6735310554504395, "learning_rate": 9.39e-05, "loss": 0.33949248790740966, "step": 940 }, { "epoch": 0.004078548551900604, "grad_norm": 0.14202536642551422, "learning_rate": 9.49e-05, "loss": 0.4102811813354492, "step": 950 }, { "epoch": 0.00412148064192061, "grad_norm": 0.8708382248878479, "learning_rate": 9.59e-05, "loss": 0.40193929672241213, "step": 960 }, { "epoch": 0.004164412731940616, "grad_norm": 0.4100968539714813, "learning_rate": 9.69e-05, "loss": 0.6347273826599121, "step": 970 }, { "epoch": 0.004207344821960623, "grad_norm": 4.046662330627441, "learning_rate": 9.790000000000001e-05, "loss": 0.3081040859222412, "step": 980 }, { "epoch": 0.004250276911980629, "grad_norm": 0.8989287614822388, "learning_rate": 9.89e-05, "loss": 0.5247183799743652, "step": 990 }, { "epoch": 0.004293209002000635, "grad_norm": 4.966592788696289, "learning_rate": 9.99e-05, "loss": 0.3147383213043213, "step": 1000 }, { "epoch": 0.004293209002000635, "eval_loss": 0.6870803236961365, "eval_runtime": 27.4913, "eval_samples_per_second": 3.638, "eval_steps_per_second": 3.638, "step": 1000 }, { "epoch": 0.004336141092020642, "grad_norm": 0.0117591992020607, "learning_rate": 9.999611945189415e-05, "loss": 0.2696777582168579, "step": 1010 }, { "epoch": 0.004379073182040648, "grad_norm": 3.5806615352630615, "learning_rate": 9.999180773177652e-05, "loss": 0.34646382331848147, "step": 1020 }, { "epoch": 0.004422005272060654, "grad_norm": 0.02200966142117977, "learning_rate": 9.99874960116589e-05, "loss": 0.16257576942443847, "step": 1030 }, { "epoch": 0.004464937362080661, "grad_norm": 0.028981667011976242, "learning_rate": 9.998318429154128e-05, "loss": 0.3506030559539795, "step": 1040 }, { "epoch": 0.004507869452100667, "grad_norm": 0.016286378726363182, "learning_rate": 9.997887257142366e-05, "loss": 0.34904468059539795, "step": 1050 }, { "epoch": 0.004550801542120673, "grad_norm": 0.004303790628910065, "learning_rate": 9.997456085130602e-05, "loss": 0.37080433368682864, "step": 1060 }, { "epoch": 0.00459373363214068, "grad_norm": 1.1950221061706543, "learning_rate": 9.99702491311884e-05, "loss": 0.24393827915191652, "step": 1070 }, { "epoch": 0.004636665722160686, "grad_norm": 0.008893678896129131, "learning_rate": 9.996593741107077e-05, "loss": 0.5550142288208008, "step": 1080 }, { "epoch": 0.004679597812180692, "grad_norm": 3.304513454437256, "learning_rate": 9.996162569095315e-05, "loss": 0.2666121244430542, "step": 1090 }, { "epoch": 0.004722529902200699, "grad_norm": 0.027765685692429543, "learning_rate": 9.995731397083553e-05, "loss": 0.16644638776779175, "step": 1100 }, { "epoch": 0.004765461992220705, "grad_norm": 0.42218098044395447, "learning_rate": 9.99530022507179e-05, "loss": 0.3481473922729492, "step": 1110 }, { "epoch": 0.004808394082240711, "grad_norm": 1.156235694885254, "learning_rate": 9.994869053060028e-05, "loss": 0.372220778465271, "step": 1120 }, { "epoch": 0.004851326172260718, "grad_norm": 0.052121300250291824, "learning_rate": 9.994437881048266e-05, "loss": 0.3564959764480591, "step": 1130 }, { "epoch": 0.004894258262280724, "grad_norm": 0.002628577407449484, "learning_rate": 9.994006709036502e-05, "loss": 0.3152308464050293, "step": 1140 }, { "epoch": 0.00493719035230073, "grad_norm": 2.0352439880371094, "learning_rate": 9.99357553702474e-05, "loss": 0.3882049560546875, "step": 1150 }, { "epoch": 0.004980122442320737, "grad_norm": 7.649345397949219, "learning_rate": 9.993144365012979e-05, "loss": 0.40246758460998533, "step": 1160 }, { "epoch": 0.005023054532340743, "grad_norm": 2.1263649463653564, "learning_rate": 9.992713193001217e-05, "loss": 0.14638826847076417, "step": 1170 }, { "epoch": 0.0050659866223607495, "grad_norm": 0.10847385972738266, "learning_rate": 9.992282020989455e-05, "loss": 0.3140259265899658, "step": 1180 }, { "epoch": 0.0051089187123807564, "grad_norm": 0.08196664601564407, "learning_rate": 9.991850848977692e-05, "loss": 0.17124234437942504, "step": 1190 }, { "epoch": 0.0051518508024007625, "grad_norm": 1.618770718574524, "learning_rate": 9.99141967696593e-05, "loss": 0.5490658283233643, "step": 1200 }, { "epoch": 0.005194782892420769, "grad_norm": 0.06352327018976212, "learning_rate": 9.990988504954168e-05, "loss": 0.4400003910064697, "step": 1210 }, { "epoch": 0.0052377149824407755, "grad_norm": 0.04779497906565666, "learning_rate": 9.990557332942404e-05, "loss": 0.19397462606430055, "step": 1220 }, { "epoch": 0.005280647072460782, "grad_norm": 0.008033322170376778, "learning_rate": 9.990126160930642e-05, "loss": 0.5248307228088379, "step": 1230 }, { "epoch": 0.005323579162480788, "grad_norm": 1.4066425561904907, "learning_rate": 9.98969498891888e-05, "loss": 0.4247109889984131, "step": 1240 }, { "epoch": 0.005366511252500795, "grad_norm": 7.272195816040039, "learning_rate": 9.989263816907117e-05, "loss": 0.5679349899291992, "step": 1250 }, { "epoch": 0.005409443342520801, "grad_norm": 0.02575252577662468, "learning_rate": 9.988832644895355e-05, "loss": 0.5577791213989258, "step": 1260 }, { "epoch": 0.005452375432540807, "grad_norm": 2.0221657752990723, "learning_rate": 9.988401472883593e-05, "loss": 0.21536378860473632, "step": 1270 }, { "epoch": 0.005495307522560814, "grad_norm": 2.7715706825256348, "learning_rate": 9.987970300871831e-05, "loss": 0.5313220024108887, "step": 1280 }, { "epoch": 0.00553823961258082, "grad_norm": 0.05551741644740105, "learning_rate": 9.987539128860068e-05, "loss": 0.3612039089202881, "step": 1290 }, { "epoch": 0.005581171702600826, "grad_norm": 0.2868395149707794, "learning_rate": 9.987107956848305e-05, "loss": 0.1773497700691223, "step": 1300 }, { "epoch": 0.005624103792620833, "grad_norm": 0.028904501348733902, "learning_rate": 9.986676784836543e-05, "loss": 0.37716073989868165, "step": 1310 }, { "epoch": 0.005667035882640839, "grad_norm": 5.851991176605225, "learning_rate": 9.98624561282478e-05, "loss": 0.28295397758483887, "step": 1320 }, { "epoch": 0.005709967972660845, "grad_norm": 0.9384163618087769, "learning_rate": 9.985814440813018e-05, "loss": 0.6173577785491944, "step": 1330 }, { "epoch": 0.005752900062680852, "grad_norm": 1.63738214969635, "learning_rate": 9.985383268801256e-05, "loss": 0.02494920641183853, "step": 1340 }, { "epoch": 0.005795832152700858, "grad_norm": 0.03405584394931793, "learning_rate": 9.984952096789493e-05, "loss": 0.23200345039367676, "step": 1350 }, { "epoch": 0.005838764242720864, "grad_norm": 10.848572731018066, "learning_rate": 9.984520924777731e-05, "loss": 0.3212714672088623, "step": 1360 }, { "epoch": 0.00588169633274087, "grad_norm": 8.821533203125, "learning_rate": 9.984089752765969e-05, "loss": 0.2774125814437866, "step": 1370 }, { "epoch": 0.005924628422760877, "grad_norm": 5.936229228973389, "learning_rate": 9.983658580754207e-05, "loss": 0.3172731399536133, "step": 1380 }, { "epoch": 0.005967560512780883, "grad_norm": 0.028739016503095627, "learning_rate": 9.983227408742444e-05, "loss": 0.19269169569015504, "step": 1390 }, { "epoch": 0.006010492602800889, "grad_norm": 0.06869284063577652, "learning_rate": 9.982796236730682e-05, "loss": 0.24538626670837402, "step": 1400 }, { "epoch": 0.006053424692820896, "grad_norm": 0.08283459395170212, "learning_rate": 9.98236506471892e-05, "loss": 0.30588588714599607, "step": 1410 }, { "epoch": 0.006096356782840902, "grad_norm": 0.07285956293344498, "learning_rate": 9.981933892707158e-05, "loss": 0.4131472587585449, "step": 1420 }, { "epoch": 0.006139288872860908, "grad_norm": 0.0415986068546772, "learning_rate": 9.981502720695395e-05, "loss": 0.2583375692367554, "step": 1430 }, { "epoch": 0.006182220962880915, "grad_norm": 0.42376863956451416, "learning_rate": 9.981071548683633e-05, "loss": 0.17857149839401246, "step": 1440 }, { "epoch": 0.006225153052900921, "grad_norm": 0.17374563217163086, "learning_rate": 9.980640376671871e-05, "loss": 0.6782264709472656, "step": 1450 }, { "epoch": 0.006268085142920927, "grad_norm": 0.39217206835746765, "learning_rate": 9.980209204660109e-05, "loss": 0.3628535270690918, "step": 1460 }, { "epoch": 0.006311017232940934, "grad_norm": 0.2118167132139206, "learning_rate": 9.979778032648345e-05, "loss": 0.14380197525024413, "step": 1470 }, { "epoch": 0.00635394932296094, "grad_norm": 2.725713014602661, "learning_rate": 9.979346860636583e-05, "loss": 0.2942746639251709, "step": 1480 }, { "epoch": 0.006396881412980946, "grad_norm": 0.11453196406364441, "learning_rate": 9.97891568862482e-05, "loss": 0.28425648212432864, "step": 1490 }, { "epoch": 0.006439813503000953, "grad_norm": 2.679959535598755, "learning_rate": 9.978484516613058e-05, "loss": 0.23248529434204102, "step": 1500 }, { "epoch": 0.006482745593020959, "grad_norm": 3.3989484310150146, "learning_rate": 9.978053344601296e-05, "loss": 0.6053690433502197, "step": 1510 }, { "epoch": 0.0065256776830409655, "grad_norm": 3.2708628177642822, "learning_rate": 9.977622172589534e-05, "loss": 0.08650413751602173, "step": 1520 }, { "epoch": 0.0065686097730609724, "grad_norm": 2.154971122741699, "learning_rate": 9.977191000577771e-05, "loss": 0.49035396575927737, "step": 1530 }, { "epoch": 0.0066115418630809785, "grad_norm": 0.013774173334240913, "learning_rate": 9.976759828566009e-05, "loss": 0.4272459983825684, "step": 1540 }, { "epoch": 0.006654473953100985, "grad_norm": 1.9301711320877075, "learning_rate": 9.976328656554245e-05, "loss": 0.2707101821899414, "step": 1550 }, { "epoch": 0.0066974060431209915, "grad_norm": 1.391924500465393, "learning_rate": 9.975897484542483e-05, "loss": 0.4632604122161865, "step": 1560 }, { "epoch": 0.006740338133140998, "grad_norm": 4.163330078125, "learning_rate": 9.975466312530721e-05, "loss": 0.26462554931640625, "step": 1570 }, { "epoch": 0.006783270223161004, "grad_norm": 0.18783286213874817, "learning_rate": 9.975035140518959e-05, "loss": 0.3839559078216553, "step": 1580 }, { "epoch": 0.006826202313181011, "grad_norm": 0.347711980342865, "learning_rate": 9.974603968507196e-05, "loss": 0.19243018627166747, "step": 1590 }, { "epoch": 0.006869134403201017, "grad_norm": 5.590195655822754, "learning_rate": 9.974172796495434e-05, "loss": 0.5789153099060058, "step": 1600 }, { "epoch": 0.006912066493221023, "grad_norm": 1.6785224676132202, "learning_rate": 9.973741624483672e-05, "loss": 0.30777108669281006, "step": 1610 }, { "epoch": 0.00695499858324103, "grad_norm": 3.9783945083618164, "learning_rate": 9.97331045247191e-05, "loss": 0.38220555782318116, "step": 1620 }, { "epoch": 0.006997930673261036, "grad_norm": 3.0387625694274902, "learning_rate": 9.972879280460147e-05, "loss": 0.22734384536743163, "step": 1630 }, { "epoch": 0.007040862763281042, "grad_norm": 5.566250801086426, "learning_rate": 9.972448108448385e-05, "loss": 0.49216341972351074, "step": 1640 }, { "epoch": 0.007083794853301049, "grad_norm": 0.014136805199086666, "learning_rate": 9.972016936436623e-05, "loss": 0.3727003812789917, "step": 1650 }, { "epoch": 0.007126726943321055, "grad_norm": 2.337724447250366, "learning_rate": 9.97158576442486e-05, "loss": 0.18289399147033691, "step": 1660 }, { "epoch": 0.007169659033341061, "grad_norm": 0.00662636524066329, "learning_rate": 9.971154592413098e-05, "loss": 0.19337474107742308, "step": 1670 }, { "epoch": 0.007212591123361068, "grad_norm": 0.12630727887153625, "learning_rate": 9.970723420401336e-05, "loss": 0.3763300657272339, "step": 1680 }, { "epoch": 0.007255523213381074, "grad_norm": 0.0527946762740612, "learning_rate": 9.970292248389574e-05, "loss": 0.2878370761871338, "step": 1690 }, { "epoch": 0.00729845530340108, "grad_norm": 2.522291421890259, "learning_rate": 9.969861076377811e-05, "loss": 0.29763169288635255, "step": 1700 }, { "epoch": 0.007341387393421087, "grad_norm": 0.09618072956800461, "learning_rate": 9.969429904366048e-05, "loss": 0.27122509479522705, "step": 1710 }, { "epoch": 0.007384319483441093, "grad_norm": 0.042337898164987564, "learning_rate": 9.968998732354286e-05, "loss": 0.1773484468460083, "step": 1720 }, { "epoch": 0.007427251573461099, "grad_norm": 0.012770925648510456, "learning_rate": 9.968567560342523e-05, "loss": 0.25236375331878663, "step": 1730 }, { "epoch": 0.007470183663481105, "grad_norm": 1.6886173486709595, "learning_rate": 9.968136388330761e-05, "loss": 0.3894350051879883, "step": 1740 }, { "epoch": 0.007513115753501112, "grad_norm": 2.3420915603637695, "learning_rate": 9.967705216318999e-05, "loss": 0.21277191638946533, "step": 1750 }, { "epoch": 0.007556047843521118, "grad_norm": 1.1795216798782349, "learning_rate": 9.967274044307237e-05, "loss": 0.3945819139480591, "step": 1760 }, { "epoch": 0.007598979933541124, "grad_norm": 0.27975350618362427, "learning_rate": 9.966842872295474e-05, "loss": 0.29100914001464845, "step": 1770 }, { "epoch": 0.007641912023561131, "grad_norm": 1.951181173324585, "learning_rate": 9.966411700283712e-05, "loss": 0.23559587001800536, "step": 1780 }, { "epoch": 0.007684844113581137, "grad_norm": 1.1079846620559692, "learning_rate": 9.96598052827195e-05, "loss": 0.3525418758392334, "step": 1790 }, { "epoch": 0.007727776203601143, "grad_norm": 0.04059452936053276, "learning_rate": 9.965549356260186e-05, "loss": 0.16814641952514647, "step": 1800 }, { "epoch": 0.00777070829362115, "grad_norm": 0.5009684562683105, "learning_rate": 9.965118184248424e-05, "loss": 0.5324047088623047, "step": 1810 }, { "epoch": 0.007813640383641157, "grad_norm": 1.176175832748413, "learning_rate": 9.964687012236662e-05, "loss": 0.38814170360565187, "step": 1820 }, { "epoch": 0.007856572473661163, "grad_norm": 0.12649580836296082, "learning_rate": 9.964255840224899e-05, "loss": 0.34246914386749266, "step": 1830 }, { "epoch": 0.00789950456368117, "grad_norm": 4.079379081726074, "learning_rate": 9.963824668213137e-05, "loss": 0.4702861785888672, "step": 1840 }, { "epoch": 0.007942436653701175, "grad_norm": 0.17662937939167023, "learning_rate": 9.963393496201375e-05, "loss": 0.14358367919921874, "step": 1850 }, { "epoch": 0.007985368743721181, "grad_norm": 0.11334045231342316, "learning_rate": 9.962962324189613e-05, "loss": 0.011836948990821838, "step": 1860 }, { "epoch": 0.008028300833741188, "grad_norm": 1.63093101978302, "learning_rate": 9.96253115217785e-05, "loss": 0.3222597599029541, "step": 1870 }, { "epoch": 0.008071232923761195, "grad_norm": 0.038519952446222305, "learning_rate": 9.962099980166088e-05, "loss": 0.21120266914367675, "step": 1880 }, { "epoch": 0.008114165013781201, "grad_norm": 0.0032395331654697657, "learning_rate": 9.961668808154326e-05, "loss": 0.11140183210372925, "step": 1890 }, { "epoch": 0.008157097103801208, "grad_norm": 0.12331575155258179, "learning_rate": 9.961237636142563e-05, "loss": 0.4628122329711914, "step": 1900 }, { "epoch": 0.008200029193821214, "grad_norm": 0.2696305215358734, "learning_rate": 9.960806464130801e-05, "loss": 0.2922004461288452, "step": 1910 }, { "epoch": 0.00824296128384122, "grad_norm": 0.04623638093471527, "learning_rate": 9.960375292119039e-05, "loss": 0.3664160013198853, "step": 1920 }, { "epoch": 0.008285893373861226, "grad_norm": 1.505217432975769, "learning_rate": 9.959944120107277e-05, "loss": 0.32481415271759034, "step": 1930 }, { "epoch": 0.008328825463881232, "grad_norm": 6.866296291351318, "learning_rate": 9.959512948095514e-05, "loss": 0.32975733280181885, "step": 1940 }, { "epoch": 0.00837175755390124, "grad_norm": 0.13486604392528534, "learning_rate": 9.959081776083752e-05, "loss": 0.30583069324493406, "step": 1950 }, { "epoch": 0.008414689643921246, "grad_norm": 1.8435735702514648, "learning_rate": 9.958650604071988e-05, "loss": 0.22341222763061525, "step": 1960 }, { "epoch": 0.008457621733941252, "grad_norm": 6.405146598815918, "learning_rate": 9.958219432060226e-05, "loss": 0.24852497577667237, "step": 1970 }, { "epoch": 0.008500553823961258, "grad_norm": 2.324030876159668, "learning_rate": 9.957788260048464e-05, "loss": 0.4419265270233154, "step": 1980 }, { "epoch": 0.008543485913981264, "grad_norm": 0.13140447437763214, "learning_rate": 9.957357088036702e-05, "loss": 0.2700542688369751, "step": 1990 }, { "epoch": 0.00858641800400127, "grad_norm": 0.13265693187713623, "learning_rate": 9.95692591602494e-05, "loss": 0.14963483810424805, "step": 2000 }, { "epoch": 0.00858641800400127, "eval_loss": 0.5978419780731201, "eval_runtime": 27.4428, "eval_samples_per_second": 3.644, "eval_steps_per_second": 3.644, "step": 2000 }, { "epoch": 0.008629350094021278, "grad_norm": 2.834585666656494, "learning_rate": 9.956494744013177e-05, "loss": 0.3619921922683716, "step": 2010 }, { "epoch": 0.008672282184041284, "grad_norm": 1.4783027172088623, "learning_rate": 9.956063572001415e-05, "loss": 0.3987175941467285, "step": 2020 }, { "epoch": 0.00871521427406129, "grad_norm": 0.1267155259847641, "learning_rate": 9.955632399989653e-05, "loss": 0.031463423371315004, "step": 2030 }, { "epoch": 0.008758146364081296, "grad_norm": 0.013138389214873314, "learning_rate": 9.955201227977889e-05, "loss": 0.33531138896942136, "step": 2040 }, { "epoch": 0.008801078454101302, "grad_norm": 0.03606367111206055, "learning_rate": 9.954770055966127e-05, "loss": 0.3791128873825073, "step": 2050 }, { "epoch": 0.008844010544121308, "grad_norm": 0.011809916235506535, "learning_rate": 9.954338883954364e-05, "loss": 0.22674219608306884, "step": 2060 }, { "epoch": 0.008886942634141316, "grad_norm": 0.028264962136745453, "learning_rate": 9.953907711942602e-05, "loss": 0.4846948623657227, "step": 2070 }, { "epoch": 0.008929874724161322, "grad_norm": 2.4816222190856934, "learning_rate": 9.95347653993084e-05, "loss": 0.28535277843475343, "step": 2080 }, { "epoch": 0.008972806814181328, "grad_norm": 0.15443404018878937, "learning_rate": 9.953045367919078e-05, "loss": 0.3292974948883057, "step": 2090 }, { "epoch": 0.009015738904201334, "grad_norm": 0.2226814329624176, "learning_rate": 9.952614195907315e-05, "loss": 0.1713486909866333, "step": 2100 }, { "epoch": 0.00905867099422134, "grad_norm": 0.12499313801527023, "learning_rate": 9.952183023895553e-05, "loss": 0.27860250473022463, "step": 2110 }, { "epoch": 0.009101603084241346, "grad_norm": 0.37484830617904663, "learning_rate": 9.951751851883791e-05, "loss": 0.16570254564285278, "step": 2120 }, { "epoch": 0.009144535174261354, "grad_norm": 1.3329097032546997, "learning_rate": 9.951320679872029e-05, "loss": 0.4097251892089844, "step": 2130 }, { "epoch": 0.00918746726428136, "grad_norm": 0.030536966398358345, "learning_rate": 9.950889507860266e-05, "loss": 0.254646372795105, "step": 2140 }, { "epoch": 0.009230399354301366, "grad_norm": 2.485957622528076, "learning_rate": 9.950458335848504e-05, "loss": 0.36496973037719727, "step": 2150 }, { "epoch": 0.009273331444321372, "grad_norm": 0.028866570442914963, "learning_rate": 9.950027163836742e-05, "loss": 0.2507104158401489, "step": 2160 }, { "epoch": 0.009316263534341378, "grad_norm": 0.027181223034858704, "learning_rate": 9.94959599182498e-05, "loss": 0.2541772127151489, "step": 2170 }, { "epoch": 0.009359195624361384, "grad_norm": 0.04736591503024101, "learning_rate": 9.949164819813217e-05, "loss": 0.3932987451553345, "step": 2180 }, { "epoch": 0.009402127714381392, "grad_norm": 0.8770988583564758, "learning_rate": 9.948733647801455e-05, "loss": 0.3800723314285278, "step": 2190 }, { "epoch": 0.009445059804401398, "grad_norm": 0.6233557462692261, "learning_rate": 9.948302475789693e-05, "loss": 0.27814540863037107, "step": 2200 }, { "epoch": 0.009487991894421404, "grad_norm": 3.1134743690490723, "learning_rate": 9.947871303777929e-05, "loss": 0.2906489372253418, "step": 2210 }, { "epoch": 0.00953092398444141, "grad_norm": 1.6282368898391724, "learning_rate": 9.947440131766167e-05, "loss": 0.40398077964782714, "step": 2220 }, { "epoch": 0.009573856074461417, "grad_norm": 2.197567939758301, "learning_rate": 9.947008959754405e-05, "loss": 0.24783599376678467, "step": 2230 }, { "epoch": 0.009616788164481423, "grad_norm": 0.3791491985321045, "learning_rate": 9.946577787742642e-05, "loss": 0.3287055253982544, "step": 2240 }, { "epoch": 0.00965972025450143, "grad_norm": 1.3653607368469238, "learning_rate": 9.94614661573088e-05, "loss": 0.3267251253128052, "step": 2250 }, { "epoch": 0.009702652344521437, "grad_norm": 0.005248530767858028, "learning_rate": 9.945715443719118e-05, "loss": 0.42192516326904295, "step": 2260 }, { "epoch": 0.009745584434541443, "grad_norm": 1.4583594799041748, "learning_rate": 9.945284271707356e-05, "loss": 0.17951282262802123, "step": 2270 }, { "epoch": 0.009788516524561449, "grad_norm": 1.393897533416748, "learning_rate": 9.944853099695593e-05, "loss": 0.38530232906341555, "step": 2280 }, { "epoch": 0.009831448614581455, "grad_norm": 2.1110920906066895, "learning_rate": 9.94442192768383e-05, "loss": 0.23096773624420167, "step": 2290 }, { "epoch": 0.00987438070460146, "grad_norm": 2.964541435241699, "learning_rate": 9.943990755672067e-05, "loss": 0.5022055625915527, "step": 2300 }, { "epoch": 0.009917312794621469, "grad_norm": 0.00699465861544013, "learning_rate": 9.943559583660305e-05, "loss": 0.21364190578460693, "step": 2310 }, { "epoch": 0.009960244884641475, "grad_norm": 7.6037092208862305, "learning_rate": 9.943128411648543e-05, "loss": 0.404204797744751, "step": 2320 }, { "epoch": 0.01000317697466148, "grad_norm": 0.004018974490463734, "learning_rate": 9.94269723963678e-05, "loss": 0.3444742441177368, "step": 2330 }, { "epoch": 0.010046109064681487, "grad_norm": 0.2844197154045105, "learning_rate": 9.942266067625018e-05, "loss": 0.28289504051208497, "step": 2340 }, { "epoch": 0.010089041154701493, "grad_norm": 0.0012115357676520944, "learning_rate": 9.941834895613257e-05, "loss": 0.24548511505126952, "step": 2350 }, { "epoch": 0.010131973244721499, "grad_norm": 2.089751958847046, "learning_rate": 9.941403723601495e-05, "loss": 0.44277057647705076, "step": 2360 }, { "epoch": 0.010174905334741505, "grad_norm": 0.24774102866649628, "learning_rate": 9.940972551589732e-05, "loss": 0.3112922191619873, "step": 2370 }, { "epoch": 0.010217837424761513, "grad_norm": 1.9152735471725464, "learning_rate": 9.940541379577969e-05, "loss": 0.3603362560272217, "step": 2380 }, { "epoch": 0.010260769514781519, "grad_norm": 1.628706693649292, "learning_rate": 9.940110207566207e-05, "loss": 0.21727685928344725, "step": 2390 }, { "epoch": 0.010303701604801525, "grad_norm": 2.09602689743042, "learning_rate": 9.939679035554445e-05, "loss": 0.4794466495513916, "step": 2400 }, { "epoch": 0.010346633694821531, "grad_norm": 0.26855140924453735, "learning_rate": 9.939247863542682e-05, "loss": 0.13301979303359984, "step": 2410 }, { "epoch": 0.010389565784841537, "grad_norm": 0.026117384433746338, "learning_rate": 9.93881669153092e-05, "loss": 0.2615302562713623, "step": 2420 }, { "epoch": 0.010432497874861543, "grad_norm": 0.09771610796451569, "learning_rate": 9.938385519519158e-05, "loss": 0.28199918270111085, "step": 2430 }, { "epoch": 0.010475429964881551, "grad_norm": 0.025545388460159302, "learning_rate": 9.937954347507396e-05, "loss": 0.10328556299209594, "step": 2440 }, { "epoch": 0.010518362054901557, "grad_norm": 0.9250702261924744, "learning_rate": 9.937523175495632e-05, "loss": 0.42665858268737794, "step": 2450 }, { "epoch": 0.010561294144921563, "grad_norm": 4.911713600158691, "learning_rate": 9.93709200348387e-05, "loss": 0.36646499633789065, "step": 2460 }, { "epoch": 0.01060422623494157, "grad_norm": 0.04883352667093277, "learning_rate": 9.936660831472108e-05, "loss": 0.2213623046875, "step": 2470 }, { "epoch": 0.010647158324961575, "grad_norm": 4.471435070037842, "learning_rate": 9.936229659460345e-05, "loss": 0.1262168049812317, "step": 2480 }, { "epoch": 0.010690090414981581, "grad_norm": 4.575348377227783, "learning_rate": 9.935798487448583e-05, "loss": 0.346947979927063, "step": 2490 }, { "epoch": 0.01073302250500159, "grad_norm": 0.4783248007297516, "learning_rate": 9.935367315436821e-05, "loss": 0.48537659645080566, "step": 2500 }, { "epoch": 0.010775954595021595, "grad_norm": 0.6317282915115356, "learning_rate": 9.934936143425058e-05, "loss": 0.2175139904022217, "step": 2510 }, { "epoch": 0.010818886685041601, "grad_norm": 0.1488918513059616, "learning_rate": 9.934504971413296e-05, "loss": 0.21612226963043213, "step": 2520 }, { "epoch": 0.010861818775061607, "grad_norm": 0.04579382389783859, "learning_rate": 9.934073799401534e-05, "loss": 0.1966726303100586, "step": 2530 }, { "epoch": 0.010904750865081613, "grad_norm": 2.0830743312835693, "learning_rate": 9.93364262738977e-05, "loss": 0.3072007417678833, "step": 2540 }, { "epoch": 0.01094768295510162, "grad_norm": 0.0915113240480423, "learning_rate": 9.933211455378008e-05, "loss": 0.20240769386291504, "step": 2550 }, { "epoch": 0.010990615045121627, "grad_norm": 7.322422981262207, "learning_rate": 9.932780283366246e-05, "loss": 0.3262624263763428, "step": 2560 }, { "epoch": 0.011033547135141633, "grad_norm": 0.7302671670913696, "learning_rate": 9.932349111354485e-05, "loss": 0.3114789962768555, "step": 2570 }, { "epoch": 0.01107647922516164, "grad_norm": 0.4549922049045563, "learning_rate": 9.931917939342723e-05, "loss": 0.2467747926712036, "step": 2580 }, { "epoch": 0.011119411315181646, "grad_norm": 6.378932476043701, "learning_rate": 9.93148676733096e-05, "loss": 0.24566287994384767, "step": 2590 }, { "epoch": 0.011162343405201652, "grad_norm": 5.444248199462891, "learning_rate": 9.931055595319198e-05, "loss": 0.3524611473083496, "step": 2600 }, { "epoch": 0.011205275495221658, "grad_norm": 0.018262803554534912, "learning_rate": 9.930624423307436e-05, "loss": 0.35527334213256834, "step": 2610 }, { "epoch": 0.011248207585241666, "grad_norm": 0.04499860107898712, "learning_rate": 9.930193251295672e-05, "loss": 0.2583042621612549, "step": 2620 }, { "epoch": 0.011291139675261672, "grad_norm": 1.3830759525299072, "learning_rate": 9.92976207928391e-05, "loss": 0.44364104270935056, "step": 2630 }, { "epoch": 0.011334071765281678, "grad_norm": 0.05449075996875763, "learning_rate": 9.929330907272148e-05, "loss": 0.1979648232460022, "step": 2640 }, { "epoch": 0.011377003855301684, "grad_norm": 0.08771967887878418, "learning_rate": 9.928899735260385e-05, "loss": 0.3086463451385498, "step": 2650 }, { "epoch": 0.01141993594532169, "grad_norm": 2.2221310138702393, "learning_rate": 9.928468563248623e-05, "loss": 0.34687089920043945, "step": 2660 }, { "epoch": 0.011462868035341696, "grad_norm": 0.020478779450058937, "learning_rate": 9.928037391236861e-05, "loss": 0.17319800853729247, "step": 2670 }, { "epoch": 0.011505800125361704, "grad_norm": 0.24883264303207397, "learning_rate": 9.927606219225099e-05, "loss": 0.3666259288787842, "step": 2680 }, { "epoch": 0.01154873221538171, "grad_norm": 0.15961961448192596, "learning_rate": 9.927175047213336e-05, "loss": 0.2250300645828247, "step": 2690 }, { "epoch": 0.011591664305401716, "grad_norm": 0.4793941080570221, "learning_rate": 9.926743875201573e-05, "loss": 0.15139001607894897, "step": 2700 }, { "epoch": 0.011634596395421722, "grad_norm": 4.141111373901367, "learning_rate": 9.92631270318981e-05, "loss": 0.3816200256347656, "step": 2710 }, { "epoch": 0.011677528485441728, "grad_norm": 0.0375005304813385, "learning_rate": 9.925881531178048e-05, "loss": 0.1840839147567749, "step": 2720 }, { "epoch": 0.011720460575461734, "grad_norm": 1.4425864219665527, "learning_rate": 9.925450359166286e-05, "loss": 0.1900520920753479, "step": 2730 }, { "epoch": 0.01176339266548174, "grad_norm": 2.4802801609039307, "learning_rate": 9.925019187154524e-05, "loss": 0.2789858341217041, "step": 2740 }, { "epoch": 0.011806324755501748, "grad_norm": 0.07315325736999512, "learning_rate": 9.924588015142761e-05, "loss": 0.5190474510192871, "step": 2750 }, { "epoch": 0.011849256845521754, "grad_norm": 0.02303297631442547, "learning_rate": 9.924156843130999e-05, "loss": 0.3321236610412598, "step": 2760 }, { "epoch": 0.01189218893554176, "grad_norm": 0.8053700923919678, "learning_rate": 9.923725671119237e-05, "loss": 0.2221919059753418, "step": 2770 }, { "epoch": 0.011935121025561766, "grad_norm": 3.6358132362365723, "learning_rate": 9.923294499107473e-05, "loss": 0.18539355993270873, "step": 2780 }, { "epoch": 0.011978053115581772, "grad_norm": 0.11861757934093475, "learning_rate": 9.922863327095712e-05, "loss": 0.38046112060546877, "step": 2790 }, { "epoch": 0.012020985205601778, "grad_norm": 0.29847654700279236, "learning_rate": 9.92243215508395e-05, "loss": 0.14161444902420045, "step": 2800 }, { "epoch": 0.012063917295621786, "grad_norm": 0.0315970703959465, "learning_rate": 9.922000983072188e-05, "loss": 0.2748135805130005, "step": 2810 }, { "epoch": 0.012106849385641792, "grad_norm": 2.0554513931274414, "learning_rate": 9.921569811060426e-05, "loss": 0.11751353740692139, "step": 2820 }, { "epoch": 0.012149781475661798, "grad_norm": 7.321534633636475, "learning_rate": 9.921138639048663e-05, "loss": 0.5105506896972656, "step": 2830 }, { "epoch": 0.012192713565681804, "grad_norm": 0.8315069079399109, "learning_rate": 9.920707467036901e-05, "loss": 0.2693990707397461, "step": 2840 }, { "epoch": 0.01223564565570181, "grad_norm": 0.07569185644388199, "learning_rate": 9.920276295025139e-05, "loss": 0.2765660285949707, "step": 2850 }, { "epoch": 0.012278577745721816, "grad_norm": 1.9169636964797974, "learning_rate": 9.919845123013376e-05, "loss": 0.33867225646972654, "step": 2860 }, { "epoch": 0.012321509835741824, "grad_norm": 0.7454385161399841, "learning_rate": 9.919413951001613e-05, "loss": 0.23863754272460938, "step": 2870 }, { "epoch": 0.01236444192576183, "grad_norm": 0.4546082615852356, "learning_rate": 9.91898277898985e-05, "loss": 0.182702374458313, "step": 2880 }, { "epoch": 0.012407374015781836, "grad_norm": 5.7228169441223145, "learning_rate": 9.918551606978088e-05, "loss": 0.2921399354934692, "step": 2890 }, { "epoch": 0.012450306105801843, "grad_norm": 0.23378795385360718, "learning_rate": 9.918120434966326e-05, "loss": 0.37920374870300294, "step": 2900 }, { "epoch": 0.012493238195821849, "grad_norm": 0.06861526519060135, "learning_rate": 9.917689262954564e-05, "loss": 0.07848119139671325, "step": 2910 }, { "epoch": 0.012536170285841855, "grad_norm": 1.5398024320602417, "learning_rate": 9.917258090942801e-05, "loss": 0.34091403484344485, "step": 2920 }, { "epoch": 0.012579102375861862, "grad_norm": 0.05262840539216995, "learning_rate": 9.916826918931039e-05, "loss": 0.12453473806381225, "step": 2930 }, { "epoch": 0.012622034465881869, "grad_norm": 3.1975438594818115, "learning_rate": 9.916395746919277e-05, "loss": 0.3661008834838867, "step": 2940 }, { "epoch": 0.012664966555901875, "grad_norm": 2.7964837551116943, "learning_rate": 9.915964574907513e-05, "loss": 0.19229893684387206, "step": 2950 }, { "epoch": 0.01270789864592188, "grad_norm": 0.1857406049966812, "learning_rate": 9.915533402895751e-05, "loss": 0.25612127780914307, "step": 2960 }, { "epoch": 0.012750830735941887, "grad_norm": 0.023666221648454666, "learning_rate": 9.915102230883989e-05, "loss": 0.5594746112823487, "step": 2970 }, { "epoch": 0.012793762825961893, "grad_norm": 1.8557242155075073, "learning_rate": 9.914671058872227e-05, "loss": 0.1518352746963501, "step": 2980 }, { "epoch": 0.0128366949159819, "grad_norm": 1.4806312322616577, "learning_rate": 9.914239886860464e-05, "loss": 0.431013822555542, "step": 2990 }, { "epoch": 0.012879627006001907, "grad_norm": 0.06178463250398636, "learning_rate": 9.913808714848702e-05, "loss": 0.27926638126373293, "step": 3000 }, { "epoch": 0.012879627006001907, "eval_loss": 0.5856176614761353, "eval_runtime": 27.4381, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 3000 }, { "epoch": 0.012922559096021913, "grad_norm": 1.1055095195770264, "learning_rate": 9.91337754283694e-05, "loss": 0.24324522018432618, "step": 3010 }, { "epoch": 0.012965491186041919, "grad_norm": 0.41986164450645447, "learning_rate": 9.912946370825177e-05, "loss": 0.5372348308563233, "step": 3020 }, { "epoch": 0.013008423276061925, "grad_norm": 8.023941040039062, "learning_rate": 9.912515198813415e-05, "loss": 0.48131527900695803, "step": 3030 }, { "epoch": 0.013051355366081931, "grad_norm": 0.047052349895238876, "learning_rate": 9.912084026801653e-05, "loss": 0.17677942514419556, "step": 3040 }, { "epoch": 0.013094287456101939, "grad_norm": 0.6833871603012085, "learning_rate": 9.911652854789891e-05, "loss": 0.09258891344070434, "step": 3050 }, { "epoch": 0.013137219546121945, "grad_norm": 0.14966531097888947, "learning_rate": 9.911221682778128e-05, "loss": 0.18587244749069215, "step": 3060 }, { "epoch": 0.013180151636141951, "grad_norm": 0.06690580397844315, "learning_rate": 9.910790510766366e-05, "loss": 0.22195734977722167, "step": 3070 }, { "epoch": 0.013223083726161957, "grad_norm": 2.9302849769592285, "learning_rate": 9.910359338754604e-05, "loss": 0.3047139644622803, "step": 3080 }, { "epoch": 0.013266015816181963, "grad_norm": 1.5165706872940063, "learning_rate": 9.909928166742842e-05, "loss": 0.4636370182037354, "step": 3090 }, { "epoch": 0.01330894790620197, "grad_norm": 2.706740617752075, "learning_rate": 9.90949699473108e-05, "loss": 0.3813605546951294, "step": 3100 }, { "epoch": 0.013351879996221975, "grad_norm": 0.03528573364019394, "learning_rate": 9.909065822719316e-05, "loss": 0.15370768308639526, "step": 3110 }, { "epoch": 0.013394812086241983, "grad_norm": 0.06504935771226883, "learning_rate": 9.908634650707553e-05, "loss": 0.32649986743927, "step": 3120 }, { "epoch": 0.013437744176261989, "grad_norm": 0.01829485222697258, "learning_rate": 9.908203478695791e-05, "loss": 0.20302400588989258, "step": 3130 }, { "epoch": 0.013480676266281995, "grad_norm": 0.7548179626464844, "learning_rate": 9.907772306684029e-05, "loss": 0.39861307144165037, "step": 3140 }, { "epoch": 0.013523608356302001, "grad_norm": 0.1664927899837494, "learning_rate": 9.907341134672267e-05, "loss": 0.3844759464263916, "step": 3150 }, { "epoch": 0.013566540446322007, "grad_norm": 1.1451348066329956, "learning_rate": 9.906909962660504e-05, "loss": 0.24213221073150634, "step": 3160 }, { "epoch": 0.013609472536342013, "grad_norm": 4.82208776473999, "learning_rate": 9.906478790648742e-05, "loss": 0.4157630443572998, "step": 3170 }, { "epoch": 0.013652404626362021, "grad_norm": 3.070974588394165, "learning_rate": 9.90604761863698e-05, "loss": 0.29048967361450195, "step": 3180 }, { "epoch": 0.013695336716382027, "grad_norm": 5.502252101898193, "learning_rate": 9.905616446625216e-05, "loss": 0.45506629943847654, "step": 3190 }, { "epoch": 0.013738268806402033, "grad_norm": 4.653741836547852, "learning_rate": 9.905185274613454e-05, "loss": 0.58369460105896, "step": 3200 }, { "epoch": 0.01378120089642204, "grad_norm": 0.18484069406986237, "learning_rate": 9.904754102601692e-05, "loss": 0.3674613952636719, "step": 3210 }, { "epoch": 0.013824132986442045, "grad_norm": 0.15102382004261017, "learning_rate": 9.90432293058993e-05, "loss": 0.3542445659637451, "step": 3220 }, { "epoch": 0.013867065076462052, "grad_norm": 0.25356340408325195, "learning_rate": 9.903891758578167e-05, "loss": 0.1704845428466797, "step": 3230 }, { "epoch": 0.01390999716648206, "grad_norm": 5.379843711853027, "learning_rate": 9.903460586566405e-05, "loss": 0.4127946853637695, "step": 3240 }, { "epoch": 0.013952929256502065, "grad_norm": 0.08451557159423828, "learning_rate": 9.903029414554643e-05, "loss": 0.38672802448272703, "step": 3250 }, { "epoch": 0.013995861346522072, "grad_norm": 0.12616507709026337, "learning_rate": 9.90259824254288e-05, "loss": 0.23725695610046388, "step": 3260 }, { "epoch": 0.014038793436542078, "grad_norm": 0.07903803884983063, "learning_rate": 9.902167070531118e-05, "loss": 0.14826754331588746, "step": 3270 }, { "epoch": 0.014081725526562084, "grad_norm": 1.9865450859069824, "learning_rate": 9.901735898519356e-05, "loss": 0.4523752212524414, "step": 3280 }, { "epoch": 0.01412465761658209, "grad_norm": 0.05560386925935745, "learning_rate": 9.901304726507594e-05, "loss": 0.2219390869140625, "step": 3290 }, { "epoch": 0.014167589706602098, "grad_norm": 0.18131622672080994, "learning_rate": 9.900873554495831e-05, "loss": 0.42689967155456543, "step": 3300 }, { "epoch": 0.014210521796622104, "grad_norm": 0.03924006223678589, "learning_rate": 9.900442382484069e-05, "loss": 0.19628711938858032, "step": 3310 }, { "epoch": 0.01425345388664211, "grad_norm": 0.02912324294447899, "learning_rate": 9.900011210472307e-05, "loss": 0.33134822845458983, "step": 3320 }, { "epoch": 0.014296385976662116, "grad_norm": 1.1800485849380493, "learning_rate": 9.899580038460545e-05, "loss": 0.23386192321777344, "step": 3330 }, { "epoch": 0.014339318066682122, "grad_norm": 0.015988456085324287, "learning_rate": 9.899148866448782e-05, "loss": 0.20085477828979492, "step": 3340 }, { "epoch": 0.014382250156702128, "grad_norm": 0.9935466051101685, "learning_rate": 9.89871769443702e-05, "loss": 0.45007762908935545, "step": 3350 }, { "epoch": 0.014425182246722136, "grad_norm": 0.023739760741591454, "learning_rate": 9.898286522425256e-05, "loss": 0.3121100902557373, "step": 3360 }, { "epoch": 0.014468114336742142, "grad_norm": 0.0075697763822972775, "learning_rate": 9.897855350413494e-05, "loss": 0.26780340671539304, "step": 3370 }, { "epoch": 0.014511046426762148, "grad_norm": 0.8707277774810791, "learning_rate": 9.897424178401732e-05, "loss": 0.40902557373046877, "step": 3380 }, { "epoch": 0.014553978516782154, "grad_norm": 0.09020499140024185, "learning_rate": 9.89699300638997e-05, "loss": 0.09538947939872741, "step": 3390 }, { "epoch": 0.01459691060680216, "grad_norm": 15.655129432678223, "learning_rate": 9.896561834378207e-05, "loss": 0.27243244647979736, "step": 3400 }, { "epoch": 0.014639842696822166, "grad_norm": 0.09583932906389236, "learning_rate": 9.896130662366445e-05, "loss": 0.27807865142822263, "step": 3410 }, { "epoch": 0.014682774786842174, "grad_norm": 0.3359926640987396, "learning_rate": 9.895699490354683e-05, "loss": 0.5408889293670655, "step": 3420 }, { "epoch": 0.01472570687686218, "grad_norm": 1.483046293258667, "learning_rate": 9.89526831834292e-05, "loss": 0.470717191696167, "step": 3430 }, { "epoch": 0.014768638966882186, "grad_norm": 0.0958387553691864, "learning_rate": 9.894837146331157e-05, "loss": 0.20184338092803955, "step": 3440 }, { "epoch": 0.014811571056902192, "grad_norm": 2.537693738937378, "learning_rate": 9.894405974319395e-05, "loss": 0.5307780265808105, "step": 3450 }, { "epoch": 0.014854503146922198, "grad_norm": 1.3038251399993896, "learning_rate": 9.893974802307632e-05, "loss": 0.2795207977294922, "step": 3460 }, { "epoch": 0.014897435236942204, "grad_norm": 0.29755839705467224, "learning_rate": 9.89354363029587e-05, "loss": 0.16141926050186156, "step": 3470 }, { "epoch": 0.01494036732696221, "grad_norm": 0.008788419887423515, "learning_rate": 9.893112458284108e-05, "loss": 0.37876596450805666, "step": 3480 }, { "epoch": 0.014983299416982218, "grad_norm": 0.2352016270160675, "learning_rate": 9.892681286272346e-05, "loss": 0.31247496604919434, "step": 3490 }, { "epoch": 0.015026231507002224, "grad_norm": 2.5700554847717285, "learning_rate": 9.892250114260583e-05, "loss": 0.21215453147888183, "step": 3500 }, { "epoch": 0.01506916359702223, "grad_norm": 2.7226498126983643, "learning_rate": 9.891818942248821e-05, "loss": 0.251985502243042, "step": 3510 }, { "epoch": 0.015112095687042236, "grad_norm": 1.7493064403533936, "learning_rate": 9.891387770237059e-05, "loss": 0.28089330196380613, "step": 3520 }, { "epoch": 0.015155027777062242, "grad_norm": 0.0026909897569566965, "learning_rate": 9.890956598225297e-05, "loss": 0.1568443775177002, "step": 3530 }, { "epoch": 0.015197959867082248, "grad_norm": 3.8230791091918945, "learning_rate": 9.890525426213534e-05, "loss": 0.19587650299072265, "step": 3540 }, { "epoch": 0.015240891957102256, "grad_norm": 0.0006467084749601781, "learning_rate": 9.890094254201772e-05, "loss": 0.07268471121788025, "step": 3550 }, { "epoch": 0.015283824047122262, "grad_norm": 0.02135465107858181, "learning_rate": 9.88966308219001e-05, "loss": 0.3168449401855469, "step": 3560 }, { "epoch": 0.015326756137142268, "grad_norm": 0.6610631346702576, "learning_rate": 9.889231910178247e-05, "loss": 0.26426920890808103, "step": 3570 }, { "epoch": 0.015369688227162275, "grad_norm": 0.004358239006251097, "learning_rate": 9.888800738166485e-05, "loss": 0.42137608528137205, "step": 3580 }, { "epoch": 0.01541262031718228, "grad_norm": 0.0010839806636795402, "learning_rate": 9.888369566154723e-05, "loss": 0.040946352481842044, "step": 3590 }, { "epoch": 0.015455552407202287, "grad_norm": 1.6554967164993286, "learning_rate": 9.88793839414296e-05, "loss": 0.24365475177764892, "step": 3600 }, { "epoch": 0.015498484497222294, "grad_norm": 2.889256715774536, "learning_rate": 9.887507222131197e-05, "loss": 0.7947826862335206, "step": 3610 }, { "epoch": 0.0155414165872423, "grad_norm": 16.558940887451172, "learning_rate": 9.887076050119435e-05, "loss": 0.40970120429992674, "step": 3620 }, { "epoch": 0.015584348677262307, "grad_norm": 10.725292205810547, "learning_rate": 9.886644878107673e-05, "loss": 0.477548885345459, "step": 3630 }, { "epoch": 0.015627280767282314, "grad_norm": 0.03413480520248413, "learning_rate": 9.88621370609591e-05, "loss": 0.15648822784423827, "step": 3640 }, { "epoch": 0.01567021285730232, "grad_norm": 3.0283219814300537, "learning_rate": 9.885782534084148e-05, "loss": 0.2519017934799194, "step": 3650 }, { "epoch": 0.015713144947322327, "grad_norm": 0.1699555665254593, "learning_rate": 9.885351362072386e-05, "loss": 0.28411762714385985, "step": 3660 }, { "epoch": 0.01575607703734233, "grad_norm": 0.4008488059043884, "learning_rate": 9.884920190060623e-05, "loss": 0.16037325859069823, "step": 3670 }, { "epoch": 0.01579900912736234, "grad_norm": 0.027547165751457214, "learning_rate": 9.884489018048861e-05, "loss": 0.16036679744720458, "step": 3680 }, { "epoch": 0.015841941217382343, "grad_norm": 1.5933310985565186, "learning_rate": 9.884057846037098e-05, "loss": 0.39816043376922605, "step": 3690 }, { "epoch": 0.01588487330740235, "grad_norm": 4.830496788024902, "learning_rate": 9.883626674025335e-05, "loss": 0.21261231899261473, "step": 3700 }, { "epoch": 0.01592780539742236, "grad_norm": 1.8774749040603638, "learning_rate": 9.883195502013573e-05, "loss": 0.2600428581237793, "step": 3710 }, { "epoch": 0.015970737487442363, "grad_norm": 3.542771816253662, "learning_rate": 9.882764330001811e-05, "loss": 0.29696452617645264, "step": 3720 }, { "epoch": 0.01601366957746237, "grad_norm": 0.059213295578956604, "learning_rate": 9.882333157990048e-05, "loss": 0.4307241916656494, "step": 3730 }, { "epoch": 0.016056601667482375, "grad_norm": 1.7407482862472534, "learning_rate": 9.881901985978286e-05, "loss": 0.4724702835083008, "step": 3740 }, { "epoch": 0.016099533757502383, "grad_norm": 0.2899302840232849, "learning_rate": 9.881470813966524e-05, "loss": 0.13045870065689086, "step": 3750 }, { "epoch": 0.01614246584752239, "grad_norm": 2.5515809059143066, "learning_rate": 9.881039641954763e-05, "loss": 0.3518421411514282, "step": 3760 }, { "epoch": 0.016185397937542395, "grad_norm": 0.06440158188343048, "learning_rate": 9.880608469943e-05, "loss": 0.28357388973236086, "step": 3770 }, { "epoch": 0.016228330027562403, "grad_norm": 1.2010902166366577, "learning_rate": 9.880177297931237e-05, "loss": 0.17321739196777344, "step": 3780 }, { "epoch": 0.016271262117582407, "grad_norm": 0.006291663274168968, "learning_rate": 9.879746125919475e-05, "loss": 0.08664214015007018, "step": 3790 }, { "epoch": 0.016314194207602415, "grad_norm": 0.4199025630950928, "learning_rate": 9.879314953907713e-05, "loss": 0.43680973052978517, "step": 3800 }, { "epoch": 0.01635712629762242, "grad_norm": 1.7818711996078491, "learning_rate": 9.87888378189595e-05, "loss": 0.1816726803779602, "step": 3810 }, { "epoch": 0.016400058387642427, "grad_norm": 9.08803939819336, "learning_rate": 9.878452609884188e-05, "loss": 0.4264963626861572, "step": 3820 }, { "epoch": 0.016442990477662435, "grad_norm": 0.12058204412460327, "learning_rate": 9.878021437872426e-05, "loss": 0.3006183624267578, "step": 3830 }, { "epoch": 0.01648592256768244, "grad_norm": 0.006680858321487904, "learning_rate": 9.877590265860664e-05, "loss": 0.2540662050247192, "step": 3840 }, { "epoch": 0.016528854657702447, "grad_norm": 6.741055011749268, "learning_rate": 9.8771590938489e-05, "loss": 0.20651772022247314, "step": 3850 }, { "epoch": 0.01657178674772245, "grad_norm": 3.3949806690216064, "learning_rate": 9.876727921837138e-05, "loss": 0.3159367561340332, "step": 3860 }, { "epoch": 0.01661471883774246, "grad_norm": 1.8325541019439697, "learning_rate": 9.876296749825375e-05, "loss": 0.4747579097747803, "step": 3870 }, { "epoch": 0.016657650927762464, "grad_norm": 0.1203535795211792, "learning_rate": 9.875865577813613e-05, "loss": 0.2717746257781982, "step": 3880 }, { "epoch": 0.01670058301778247, "grad_norm": 0.09284611791372299, "learning_rate": 9.875434405801851e-05, "loss": 0.3075088977813721, "step": 3890 }, { "epoch": 0.01674351510780248, "grad_norm": 0.04041628912091255, "learning_rate": 9.875003233790089e-05, "loss": 0.4742414951324463, "step": 3900 }, { "epoch": 0.016786447197822484, "grad_norm": 2.080106735229492, "learning_rate": 9.874572061778326e-05, "loss": 0.4726258277893066, "step": 3910 }, { "epoch": 0.01682937928784249, "grad_norm": 0.04380369931459427, "learning_rate": 9.874140889766564e-05, "loss": 0.14977000951766967, "step": 3920 }, { "epoch": 0.016872311377862496, "grad_norm": 1.6222591400146484, "learning_rate": 9.8737097177548e-05, "loss": 0.18420748710632323, "step": 3930 }, { "epoch": 0.016915243467882504, "grad_norm": 0.45909276604652405, "learning_rate": 9.873278545743038e-05, "loss": 0.1671479821205139, "step": 3940 }, { "epoch": 0.01695817555790251, "grad_norm": 0.0356089286506176, "learning_rate": 9.872847373731276e-05, "loss": 0.13457057476043702, "step": 3950 }, { "epoch": 0.017001107647922516, "grad_norm": 1.6384329795837402, "learning_rate": 9.872416201719514e-05, "loss": 0.12244757413864135, "step": 3960 }, { "epoch": 0.017044039737942523, "grad_norm": 0.01140339020639658, "learning_rate": 9.871985029707751e-05, "loss": 0.20679588317871095, "step": 3970 }, { "epoch": 0.017086971827962528, "grad_norm": 0.006532014813274145, "learning_rate": 9.87155385769599e-05, "loss": 0.2710320234298706, "step": 3980 }, { "epoch": 0.017129903917982536, "grad_norm": 0.8082075715065002, "learning_rate": 9.871122685684228e-05, "loss": 0.5106721878051758, "step": 3990 }, { "epoch": 0.01717283600800254, "grad_norm": 1.0562989711761475, "learning_rate": 9.870691513672466e-05, "loss": 0.5093242168426514, "step": 4000 }, { "epoch": 0.01717283600800254, "eval_loss": 0.5767123103141785, "eval_runtime": 27.4361, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 4000 }, { "epoch": 0.017215768098022548, "grad_norm": 0.004736120812594891, "learning_rate": 9.870260341660704e-05, "loss": 0.15739403963088988, "step": 4010 }, { "epoch": 0.017258700188042556, "grad_norm": 0.04196614772081375, "learning_rate": 9.86982916964894e-05, "loss": 0.31789140701293944, "step": 4020 }, { "epoch": 0.01730163227806256, "grad_norm": 1.7766358852386475, "learning_rate": 9.869397997637178e-05, "loss": 0.27827773094177244, "step": 4030 }, { "epoch": 0.017344564368082568, "grad_norm": 1.262876272201538, "learning_rate": 9.868966825625416e-05, "loss": 0.298844575881958, "step": 4040 }, { "epoch": 0.017387496458102572, "grad_norm": 0.02867383137345314, "learning_rate": 9.868535653613653e-05, "loss": 0.353701114654541, "step": 4050 }, { "epoch": 0.01743042854812258, "grad_norm": 0.49855148792266846, "learning_rate": 9.868104481601891e-05, "loss": 0.13109270334243775, "step": 4060 }, { "epoch": 0.017473360638142588, "grad_norm": 1.795688509941101, "learning_rate": 9.867673309590129e-05, "loss": 0.5057300090789795, "step": 4070 }, { "epoch": 0.017516292728162592, "grad_norm": 3.00192928314209, "learning_rate": 9.867242137578366e-05, "loss": 0.216597580909729, "step": 4080 }, { "epoch": 0.0175592248181826, "grad_norm": 0.8749079704284668, "learning_rate": 9.866810965566604e-05, "loss": 0.28190293312072756, "step": 4090 }, { "epoch": 0.017602156908202604, "grad_norm": 2.5471808910369873, "learning_rate": 9.86637979355484e-05, "loss": 0.2924044609069824, "step": 4100 }, { "epoch": 0.017645088998222612, "grad_norm": 0.0811334103345871, "learning_rate": 9.865948621543078e-05, "loss": 0.4297049045562744, "step": 4110 }, { "epoch": 0.017688021088242616, "grad_norm": 4.748598098754883, "learning_rate": 9.865517449531316e-05, "loss": 0.27084424495697024, "step": 4120 }, { "epoch": 0.017730953178262624, "grad_norm": 0.09125273674726486, "learning_rate": 9.865086277519554e-05, "loss": 0.27569165229797366, "step": 4130 }, { "epoch": 0.017773885268282632, "grad_norm": 0.1283591389656067, "learning_rate": 9.864655105507792e-05, "loss": 0.2747859239578247, "step": 4140 }, { "epoch": 0.017816817358302636, "grad_norm": 1.7652469873428345, "learning_rate": 9.864223933496029e-05, "loss": 0.4440613269805908, "step": 4150 }, { "epoch": 0.017859749448322644, "grad_norm": 0.5249306559562683, "learning_rate": 9.863792761484267e-05, "loss": 0.1520497679710388, "step": 4160 }, { "epoch": 0.01790268153834265, "grad_norm": 0.9873408079147339, "learning_rate": 9.863361589472505e-05, "loss": 0.3086827516555786, "step": 4170 }, { "epoch": 0.017945613628362656, "grad_norm": 1.5772066116333008, "learning_rate": 9.862930417460741e-05, "loss": 0.3315431594848633, "step": 4180 }, { "epoch": 0.017988545718382664, "grad_norm": 4.330613136291504, "learning_rate": 9.862499245448979e-05, "loss": 0.3991940259933472, "step": 4190 }, { "epoch": 0.01803147780840267, "grad_norm": 0.22321546077728271, "learning_rate": 9.862068073437218e-05, "loss": 0.3414437294006348, "step": 4200 }, { "epoch": 0.018074409898422676, "grad_norm": 1.90219247341156, "learning_rate": 9.861636901425456e-05, "loss": 0.3252838611602783, "step": 4210 }, { "epoch": 0.01811734198844268, "grad_norm": 0.08938544988632202, "learning_rate": 9.861205729413693e-05, "loss": 0.3285228729248047, "step": 4220 }, { "epoch": 0.01816027407846269, "grad_norm": 1.1169179677963257, "learning_rate": 9.860774557401931e-05, "loss": 0.41843581199645996, "step": 4230 }, { "epoch": 0.018203206168482693, "grad_norm": 3.2259140014648438, "learning_rate": 9.860343385390169e-05, "loss": 0.4892786979675293, "step": 4240 }, { "epoch": 0.0182461382585027, "grad_norm": 2.497501850128174, "learning_rate": 9.859912213378407e-05, "loss": 0.25899789333343504, "step": 4250 }, { "epoch": 0.018289070348522708, "grad_norm": 2.090446949005127, "learning_rate": 9.859481041366643e-05, "loss": 0.34546072483062745, "step": 4260 }, { "epoch": 0.018332002438542713, "grad_norm": 0.05979840084910393, "learning_rate": 9.859049869354881e-05, "loss": 0.3825571537017822, "step": 4270 }, { "epoch": 0.01837493452856272, "grad_norm": 3.4556007385253906, "learning_rate": 9.858618697343118e-05, "loss": 0.4224254131317139, "step": 4280 }, { "epoch": 0.018417866618582725, "grad_norm": 0.04259246960282326, "learning_rate": 9.858187525331356e-05, "loss": 0.19651782512664795, "step": 4290 }, { "epoch": 0.018460798708602733, "grad_norm": 0.057309672236442566, "learning_rate": 9.857756353319594e-05, "loss": 0.22271180152893066, "step": 4300 }, { "epoch": 0.018503730798622737, "grad_norm": 0.8981066942214966, "learning_rate": 9.857325181307832e-05, "loss": 0.4255647659301758, "step": 4310 }, { "epoch": 0.018546662888642745, "grad_norm": 0.04550351947546005, "learning_rate": 9.85689400929607e-05, "loss": 0.2925698280334473, "step": 4320 }, { "epoch": 0.018589594978662752, "grad_norm": 3.6106345653533936, "learning_rate": 9.856462837284307e-05, "loss": 0.2803173303604126, "step": 4330 }, { "epoch": 0.018632527068682757, "grad_norm": 4.29874849319458, "learning_rate": 9.856031665272545e-05, "loss": 0.2041841506958008, "step": 4340 }, { "epoch": 0.018675459158702765, "grad_norm": 0.028424430638551712, "learning_rate": 9.855600493260781e-05, "loss": 0.20737464427948, "step": 4350 }, { "epoch": 0.01871839124872277, "grad_norm": 0.07163364440202713, "learning_rate": 9.855169321249019e-05, "loss": 0.12331546545028686, "step": 4360 }, { "epoch": 0.018761323338742777, "grad_norm": 2.0091068744659424, "learning_rate": 9.854738149237257e-05, "loss": 0.1653214693069458, "step": 4370 }, { "epoch": 0.018804255428762785, "grad_norm": 0.13968168199062347, "learning_rate": 9.854306977225494e-05, "loss": 0.13523595333099364, "step": 4380 }, { "epoch": 0.01884718751878279, "grad_norm": 0.2080664187669754, "learning_rate": 9.853875805213732e-05, "loss": 0.38797965049743655, "step": 4390 }, { "epoch": 0.018890119608802797, "grad_norm": 0.4882734417915344, "learning_rate": 9.85344463320197e-05, "loss": 0.16562176942825318, "step": 4400 }, { "epoch": 0.0189330516988228, "grad_norm": 0.009212512522935867, "learning_rate": 9.853013461190208e-05, "loss": 0.12350815534591675, "step": 4410 }, { "epoch": 0.01897598378884281, "grad_norm": 0.014012163504958153, "learning_rate": 9.852582289178445e-05, "loss": 0.3011958122253418, "step": 4420 }, { "epoch": 0.019018915878862813, "grad_norm": 0.004959054756909609, "learning_rate": 9.852151117166683e-05, "loss": 0.160746431350708, "step": 4430 }, { "epoch": 0.01906184796888282, "grad_norm": 2.8610429763793945, "learning_rate": 9.851719945154921e-05, "loss": 0.33746287822723386, "step": 4440 }, { "epoch": 0.01910478005890283, "grad_norm": 1.2206952571868896, "learning_rate": 9.851288773143159e-05, "loss": 0.39143075942993166, "step": 4450 }, { "epoch": 0.019147712148922833, "grad_norm": 0.013409411534667015, "learning_rate": 9.850857601131396e-05, "loss": 0.3400929689407349, "step": 4460 }, { "epoch": 0.01919064423894284, "grad_norm": 16.17251968383789, "learning_rate": 9.850426429119634e-05, "loss": 0.22110345363616943, "step": 4470 }, { "epoch": 0.019233576328962845, "grad_norm": 0.009083151817321777, "learning_rate": 9.849995257107872e-05, "loss": 0.29708778858184814, "step": 4480 }, { "epoch": 0.019276508418982853, "grad_norm": 1.265600562095642, "learning_rate": 9.84956408509611e-05, "loss": 0.34580533504486083, "step": 4490 }, { "epoch": 0.01931944050900286, "grad_norm": 0.9040108919143677, "learning_rate": 9.849132913084347e-05, "loss": 0.4236451148986816, "step": 4500 }, { "epoch": 0.019362372599022865, "grad_norm": 1.5615957975387573, "learning_rate": 9.848701741072584e-05, "loss": 0.5036997318267822, "step": 4510 }, { "epoch": 0.019405304689042873, "grad_norm": 1.2325830459594727, "learning_rate": 9.848270569060821e-05, "loss": 0.5182486057281495, "step": 4520 }, { "epoch": 0.019448236779062877, "grad_norm": 0.7714042663574219, "learning_rate": 9.847839397049059e-05, "loss": 0.4611947536468506, "step": 4530 }, { "epoch": 0.019491168869082885, "grad_norm": 0.06427251547574997, "learning_rate": 9.847408225037297e-05, "loss": 0.4581316947937012, "step": 4540 }, { "epoch": 0.01953410095910289, "grad_norm": 0.04174116253852844, "learning_rate": 9.846977053025535e-05, "loss": 0.052045691013336184, "step": 4550 }, { "epoch": 0.019577033049122897, "grad_norm": 0.059649962931871414, "learning_rate": 9.846545881013772e-05, "loss": 0.24959084987640381, "step": 4560 }, { "epoch": 0.019619965139142905, "grad_norm": 0.06549002230167389, "learning_rate": 9.84611470900201e-05, "loss": 0.3865674018859863, "step": 4570 }, { "epoch": 0.01966289722916291, "grad_norm": 2.8574886322021484, "learning_rate": 9.845683536990248e-05, "loss": 0.39156625270843504, "step": 4580 }, { "epoch": 0.019705829319182917, "grad_norm": 1.1553655862808228, "learning_rate": 9.845252364978484e-05, "loss": 0.3730152606964111, "step": 4590 }, { "epoch": 0.01974876140920292, "grad_norm": 0.13113917410373688, "learning_rate": 9.844821192966722e-05, "loss": 0.2739130735397339, "step": 4600 }, { "epoch": 0.01979169349922293, "grad_norm": 1.5252152681350708, "learning_rate": 9.84439002095496e-05, "loss": 0.30946390628814696, "step": 4610 }, { "epoch": 0.019834625589242937, "grad_norm": 1.3319313526153564, "learning_rate": 9.843958848943197e-05, "loss": 0.4056663513183594, "step": 4620 }, { "epoch": 0.01987755767926294, "grad_norm": 2.8948614597320557, "learning_rate": 9.843527676931435e-05, "loss": 0.4438942909240723, "step": 4630 }, { "epoch": 0.01992048976928295, "grad_norm": 1.7223323583602905, "learning_rate": 9.843096504919673e-05, "loss": 0.2097313404083252, "step": 4640 }, { "epoch": 0.019963421859302954, "grad_norm": 0.14699095487594604, "learning_rate": 9.84266533290791e-05, "loss": 0.3881974458694458, "step": 4650 }, { "epoch": 0.02000635394932296, "grad_norm": 5.661250114440918, "learning_rate": 9.842234160896148e-05, "loss": 0.2718313694000244, "step": 4660 }, { "epoch": 0.020049286039342966, "grad_norm": 0.37304461002349854, "learning_rate": 9.841802988884386e-05, "loss": 0.1976300835609436, "step": 4670 }, { "epoch": 0.020092218129362974, "grad_norm": 0.10088256746530533, "learning_rate": 9.841371816872624e-05, "loss": 0.24343338012695312, "step": 4680 }, { "epoch": 0.02013515021938298, "grad_norm": 3.5855891704559326, "learning_rate": 9.840940644860861e-05, "loss": 0.5209373950958252, "step": 4690 }, { "epoch": 0.020178082309402986, "grad_norm": 0.08754919469356537, "learning_rate": 9.840509472849099e-05, "loss": 0.3627974271774292, "step": 4700 }, { "epoch": 0.020221014399422994, "grad_norm": 0.05948247015476227, "learning_rate": 9.840078300837337e-05, "loss": 0.2495173692703247, "step": 4710 }, { "epoch": 0.020263946489442998, "grad_norm": 0.13405494391918182, "learning_rate": 9.839647128825575e-05, "loss": 0.1566326141357422, "step": 4720 }, { "epoch": 0.020306878579463006, "grad_norm": 0.04851258918642998, "learning_rate": 9.839215956813812e-05, "loss": 0.09986941814422608, "step": 4730 }, { "epoch": 0.02034981066948301, "grad_norm": 1.3744168281555176, "learning_rate": 9.83878478480205e-05, "loss": 0.2830467462539673, "step": 4740 }, { "epoch": 0.020392742759503018, "grad_norm": 0.04340994358062744, "learning_rate": 9.838353612790288e-05, "loss": 0.18021624088287352, "step": 4750 }, { "epoch": 0.020435674849523026, "grad_norm": 2.098802089691162, "learning_rate": 9.837922440778524e-05, "loss": 0.5201596736907959, "step": 4760 }, { "epoch": 0.02047860693954303, "grad_norm": 0.12326524406671524, "learning_rate": 9.837491268766762e-05, "loss": 0.30697102546691896, "step": 4770 }, { "epoch": 0.020521539029563038, "grad_norm": 1.1608352661132812, "learning_rate": 9.837060096755e-05, "loss": 0.385021448135376, "step": 4780 }, { "epoch": 0.020564471119583042, "grad_norm": 14.848011016845703, "learning_rate": 9.836628924743237e-05, "loss": 0.28464808464050295, "step": 4790 }, { "epoch": 0.02060740320960305, "grad_norm": 0.4765234887599945, "learning_rate": 9.836197752731475e-05, "loss": 0.27699947357177734, "step": 4800 }, { "epoch": 0.020650335299623058, "grad_norm": 14.751072883605957, "learning_rate": 9.835766580719713e-05, "loss": 0.13528286218643187, "step": 4810 }, { "epoch": 0.020693267389643062, "grad_norm": 0.006392804905772209, "learning_rate": 9.835335408707951e-05, "loss": 0.31837561130523684, "step": 4820 }, { "epoch": 0.02073619947966307, "grad_norm": 6.483633041381836, "learning_rate": 9.834904236696188e-05, "loss": 0.25327458381652834, "step": 4830 }, { "epoch": 0.020779131569683074, "grad_norm": 0.027963057160377502, "learning_rate": 9.834473064684425e-05, "loss": 0.2266265869140625, "step": 4840 }, { "epoch": 0.020822063659703082, "grad_norm": 0.0027327449060976505, "learning_rate": 9.834041892672663e-05, "loss": 0.17874222993850708, "step": 4850 }, { "epoch": 0.020864995749723086, "grad_norm": 1.8083893060684204, "learning_rate": 9.8336107206609e-05, "loss": 0.27457170486450194, "step": 4860 }, { "epoch": 0.020907927839743094, "grad_norm": 0.019822167232632637, "learning_rate": 9.833179548649138e-05, "loss": 0.3151801109313965, "step": 4870 }, { "epoch": 0.020950859929763102, "grad_norm": 1.336995244026184, "learning_rate": 9.832748376637376e-05, "loss": 0.3638888359069824, "step": 4880 }, { "epoch": 0.020993792019783106, "grad_norm": 8.536113739013672, "learning_rate": 9.832317204625613e-05, "loss": 0.19324772357940673, "step": 4890 }, { "epoch": 0.021036724109803114, "grad_norm": 0.46870413422584534, "learning_rate": 9.831886032613851e-05, "loss": 0.4881044864654541, "step": 4900 }, { "epoch": 0.02107965619982312, "grad_norm": 3.869471788406372, "learning_rate": 9.831454860602089e-05, "loss": 0.1654451608657837, "step": 4910 }, { "epoch": 0.021122588289843126, "grad_norm": 0.8998738527297974, "learning_rate": 9.831023688590327e-05, "loss": 0.23226535320281982, "step": 4920 }, { "epoch": 0.021165520379863134, "grad_norm": 0.10120627284049988, "learning_rate": 9.830592516578564e-05, "loss": 0.215118145942688, "step": 4930 }, { "epoch": 0.02120845246988314, "grad_norm": 0.005523098167032003, "learning_rate": 9.830161344566802e-05, "loss": 0.24013869762420653, "step": 4940 }, { "epoch": 0.021251384559903146, "grad_norm": 0.03459906205534935, "learning_rate": 9.82973017255504e-05, "loss": 0.10165373086929322, "step": 4950 }, { "epoch": 0.02129431664992315, "grad_norm": 2.4921891689300537, "learning_rate": 9.829299000543278e-05, "loss": 0.6230008602142334, "step": 4960 }, { "epoch": 0.02133724873994316, "grad_norm": 1.8752906322479248, "learning_rate": 9.828867828531515e-05, "loss": 0.22859764099121094, "step": 4970 }, { "epoch": 0.021380180829963163, "grad_norm": 0.13080959022045135, "learning_rate": 9.828436656519753e-05, "loss": 0.22646758556365967, "step": 4980 }, { "epoch": 0.02142311291998317, "grad_norm": 2.735193967819214, "learning_rate": 9.828005484507991e-05, "loss": 0.15832440853118895, "step": 4990 }, { "epoch": 0.02146604501000318, "grad_norm": 7.5099310874938965, "learning_rate": 9.827574312496227e-05, "loss": 0.4262217998504639, "step": 5000 }, { "epoch": 0.02146604501000318, "eval_loss": 0.5495235323905945, "eval_runtime": 27.4385, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 5000 }, { "epoch": 0.021508977100023183, "grad_norm": 0.7837478518486023, "learning_rate": 9.827143140484465e-05, "loss": 0.18935658931732177, "step": 5010 }, { "epoch": 0.02155190919004319, "grad_norm": 0.01325380802154541, "learning_rate": 9.826711968472703e-05, "loss": 0.1605769395828247, "step": 5020 }, { "epoch": 0.021594841280063195, "grad_norm": 1.1370841264724731, "learning_rate": 9.82628079646094e-05, "loss": 0.296016526222229, "step": 5030 }, { "epoch": 0.021637773370083203, "grad_norm": 0.03137364238500595, "learning_rate": 9.825849624449178e-05, "loss": 0.339589524269104, "step": 5040 }, { "epoch": 0.021680705460103207, "grad_norm": 0.05219261720776558, "learning_rate": 9.825418452437416e-05, "loss": 0.3410472869873047, "step": 5050 }, { "epoch": 0.021723637550123215, "grad_norm": 0.011812934651970863, "learning_rate": 9.824987280425654e-05, "loss": 0.13874655961990356, "step": 5060 }, { "epoch": 0.021766569640143223, "grad_norm": 0.027524782344698906, "learning_rate": 9.824556108413891e-05, "loss": 0.23152687549591064, "step": 5070 }, { "epoch": 0.021809501730163227, "grad_norm": 0.09816437214612961, "learning_rate": 9.824124936402129e-05, "loss": 0.33842058181762696, "step": 5080 }, { "epoch": 0.021852433820183235, "grad_norm": 6.345348358154297, "learning_rate": 9.823693764390365e-05, "loss": 0.4534308433532715, "step": 5090 }, { "epoch": 0.02189536591020324, "grad_norm": 2.9625370502471924, "learning_rate": 9.823262592378603e-05, "loss": 0.2510680198669434, "step": 5100 }, { "epoch": 0.021938298000223247, "grad_norm": 1.2221940755844116, "learning_rate": 9.822831420366841e-05, "loss": 0.28204386234283446, "step": 5110 }, { "epoch": 0.021981230090243255, "grad_norm": 2.2707417011260986, "learning_rate": 9.822400248355079e-05, "loss": 0.335250997543335, "step": 5120 }, { "epoch": 0.02202416218026326, "grad_norm": 3.409400701522827, "learning_rate": 9.821969076343316e-05, "loss": 0.3914987087249756, "step": 5130 }, { "epoch": 0.022067094270283267, "grad_norm": 1.3195754289627075, "learning_rate": 9.821537904331554e-05, "loss": 0.49390902519226076, "step": 5140 }, { "epoch": 0.02211002636030327, "grad_norm": 0.27802133560180664, "learning_rate": 9.821106732319792e-05, "loss": 0.18718713521957397, "step": 5150 }, { "epoch": 0.02215295845032328, "grad_norm": 3.635807514190674, "learning_rate": 9.820675560308031e-05, "loss": 0.3083020210266113, "step": 5160 }, { "epoch": 0.022195890540343283, "grad_norm": 0.11027365177869797, "learning_rate": 9.820244388296267e-05, "loss": 0.3606464385986328, "step": 5170 }, { "epoch": 0.02223882263036329, "grad_norm": 0.9344707727432251, "learning_rate": 9.819813216284505e-05, "loss": 0.2589896202087402, "step": 5180 }, { "epoch": 0.0222817547203833, "grad_norm": 0.24270321428775787, "learning_rate": 9.819382044272743e-05, "loss": 0.3288132667541504, "step": 5190 }, { "epoch": 0.022324686810403303, "grad_norm": 0.06861063838005066, "learning_rate": 9.81895087226098e-05, "loss": 0.3274968147277832, "step": 5200 }, { "epoch": 0.02236761890042331, "grad_norm": 2.494279146194458, "learning_rate": 9.818519700249218e-05, "loss": 0.12895745038986206, "step": 5210 }, { "epoch": 0.022410550990443315, "grad_norm": 10.221489906311035, "learning_rate": 9.818088528237456e-05, "loss": 0.15850175619125367, "step": 5220 }, { "epoch": 0.022453483080463323, "grad_norm": 0.10302328318357468, "learning_rate": 9.817657356225694e-05, "loss": 0.26316812038421633, "step": 5230 }, { "epoch": 0.02249641517048333, "grad_norm": 4.582814693450928, "learning_rate": 9.817226184213931e-05, "loss": 0.2203810691833496, "step": 5240 }, { "epoch": 0.022539347260503335, "grad_norm": 0.011359277181327343, "learning_rate": 9.816795012202168e-05, "loss": 0.3015336036682129, "step": 5250 }, { "epoch": 0.022582279350523343, "grad_norm": 0.09045564383268356, "learning_rate": 9.816363840190406e-05, "loss": 0.32758321762084963, "step": 5260 }, { "epoch": 0.022625211440543348, "grad_norm": 0.036633238196372986, "learning_rate": 9.815932668178643e-05, "loss": 0.2647268533706665, "step": 5270 }, { "epoch": 0.022668143530563355, "grad_norm": 0.004347283858805895, "learning_rate": 9.815501496166881e-05, "loss": 0.2765114545822144, "step": 5280 }, { "epoch": 0.02271107562058336, "grad_norm": 0.19933733344078064, "learning_rate": 9.815070324155119e-05, "loss": 0.37896840572357177, "step": 5290 }, { "epoch": 0.022754007710603368, "grad_norm": 0.04528508707880974, "learning_rate": 9.814639152143357e-05, "loss": 0.3411917448043823, "step": 5300 }, { "epoch": 0.022796939800623375, "grad_norm": 1.2522482872009277, "learning_rate": 9.814207980131594e-05, "loss": 0.21302356719970703, "step": 5310 }, { "epoch": 0.02283987189064338, "grad_norm": 4.6196746826171875, "learning_rate": 9.813776808119832e-05, "loss": 0.3238266706466675, "step": 5320 }, { "epoch": 0.022882803980663387, "grad_norm": 1.3250828981399536, "learning_rate": 9.813345636108068e-05, "loss": 0.30894622802734373, "step": 5330 }, { "epoch": 0.022925736070683392, "grad_norm": 0.2489081472158432, "learning_rate": 9.812914464096306e-05, "loss": 0.20912477970123292, "step": 5340 }, { "epoch": 0.0229686681607034, "grad_norm": 4.305191993713379, "learning_rate": 9.812483292084544e-05, "loss": 0.22660810947418214, "step": 5350 }, { "epoch": 0.023011600250723407, "grad_norm": 5.360749244689941, "learning_rate": 9.812052120072782e-05, "loss": 0.46240839958190916, "step": 5360 }, { "epoch": 0.023054532340743412, "grad_norm": 0.011522599495947361, "learning_rate": 9.811620948061019e-05, "loss": 0.17703713178634645, "step": 5370 }, { "epoch": 0.02309746443076342, "grad_norm": 4.123495578765869, "learning_rate": 9.811189776049258e-05, "loss": 0.4162435531616211, "step": 5380 }, { "epoch": 0.023140396520783424, "grad_norm": 0.1706576645374298, "learning_rate": 9.810758604037496e-05, "loss": 0.2197955846786499, "step": 5390 }, { "epoch": 0.02318332861080343, "grad_norm": 1.6054201126098633, "learning_rate": 9.810327432025734e-05, "loss": 0.5216985225677491, "step": 5400 }, { "epoch": 0.023226260700823436, "grad_norm": 2.200653553009033, "learning_rate": 9.809896260013972e-05, "loss": 0.30880715847015383, "step": 5410 }, { "epoch": 0.023269192790843444, "grad_norm": 6.275848865509033, "learning_rate": 9.809465088002208e-05, "loss": 0.4033853054046631, "step": 5420 }, { "epoch": 0.02331212488086345, "grad_norm": 0.5680272579193115, "learning_rate": 9.809033915990446e-05, "loss": 0.18146917819976807, "step": 5430 }, { "epoch": 0.023355056970883456, "grad_norm": 0.37831783294677734, "learning_rate": 9.808602743978683e-05, "loss": 0.2999399185180664, "step": 5440 }, { "epoch": 0.023397989060903464, "grad_norm": 3.798578977584839, "learning_rate": 9.808171571966921e-05, "loss": 0.28671579360961913, "step": 5450 }, { "epoch": 0.023440921150923468, "grad_norm": 0.01146702840924263, "learning_rate": 9.807740399955159e-05, "loss": 0.3553640365600586, "step": 5460 }, { "epoch": 0.023483853240943476, "grad_norm": 0.03364414721727371, "learning_rate": 9.807309227943397e-05, "loss": 0.28850817680358887, "step": 5470 }, { "epoch": 0.02352678533096348, "grad_norm": 0.45987215638160706, "learning_rate": 9.806878055931634e-05, "loss": 0.22811505794525147, "step": 5480 }, { "epoch": 0.023569717420983488, "grad_norm": 0.08059200644493103, "learning_rate": 9.806446883919872e-05, "loss": 0.19109236001968383, "step": 5490 }, { "epoch": 0.023612649511003496, "grad_norm": 0.6227476596832275, "learning_rate": 9.806015711908108e-05, "loss": 0.27296125888824463, "step": 5500 }, { "epoch": 0.0236555816010235, "grad_norm": 0.02348339930176735, "learning_rate": 9.805584539896346e-05, "loss": 0.30638632774353025, "step": 5510 }, { "epoch": 0.023698513691043508, "grad_norm": 0.5054450631141663, "learning_rate": 9.805153367884584e-05, "loss": 0.31245372295379636, "step": 5520 }, { "epoch": 0.023741445781063512, "grad_norm": 9.5831937789917, "learning_rate": 9.804722195872822e-05, "loss": 0.2308908462524414, "step": 5530 }, { "epoch": 0.02378437787108352, "grad_norm": 0.024453381076455116, "learning_rate": 9.80429102386106e-05, "loss": 0.20383148193359374, "step": 5540 }, { "epoch": 0.023827309961103528, "grad_norm": 0.0007577983778901398, "learning_rate": 9.803859851849297e-05, "loss": 0.20267860889434813, "step": 5550 }, { "epoch": 0.023870242051123532, "grad_norm": 0.002279347274452448, "learning_rate": 9.803428679837535e-05, "loss": 0.1286637783050537, "step": 5560 }, { "epoch": 0.02391317414114354, "grad_norm": 1.7347980737686157, "learning_rate": 9.802997507825773e-05, "loss": 0.3176673412322998, "step": 5570 }, { "epoch": 0.023956106231163544, "grad_norm": 0.002336501609534025, "learning_rate": 9.802566335814009e-05, "loss": 0.2759399890899658, "step": 5580 }, { "epoch": 0.023999038321183552, "grad_norm": 2.0233569145202637, "learning_rate": 9.802135163802247e-05, "loss": 0.2526408195495605, "step": 5590 }, { "epoch": 0.024041970411203557, "grad_norm": 0.0020779764745384455, "learning_rate": 9.801703991790486e-05, "loss": 0.15542728900909425, "step": 5600 }, { "epoch": 0.024084902501223564, "grad_norm": 4.984875679016113, "learning_rate": 9.801272819778724e-05, "loss": 0.3817164182662964, "step": 5610 }, { "epoch": 0.024127834591243572, "grad_norm": 0.11502945423126221, "learning_rate": 9.800841647766961e-05, "loss": 0.2196964979171753, "step": 5620 }, { "epoch": 0.024170766681263577, "grad_norm": 0.002261434681713581, "learning_rate": 9.800410475755199e-05, "loss": 0.05837162733078003, "step": 5630 }, { "epoch": 0.024213698771283584, "grad_norm": 0.00039357831701636314, "learning_rate": 9.799979303743437e-05, "loss": 0.07530305981636047, "step": 5640 }, { "epoch": 0.02425663086130359, "grad_norm": 0.0011043796548619866, "learning_rate": 9.799548131731674e-05, "loss": 0.46832637786865233, "step": 5650 }, { "epoch": 0.024299562951323597, "grad_norm": 1.8432406187057495, "learning_rate": 9.799116959719911e-05, "loss": 0.20423316955566406, "step": 5660 }, { "epoch": 0.024342495041343604, "grad_norm": 1.1161885261535645, "learning_rate": 9.798685787708149e-05, "loss": 0.3098815679550171, "step": 5670 }, { "epoch": 0.02438542713136361, "grad_norm": 0.015093029476702213, "learning_rate": 9.798254615696386e-05, "loss": 0.37404484748840333, "step": 5680 }, { "epoch": 0.024428359221383616, "grad_norm": 0.007660869043320417, "learning_rate": 9.797823443684624e-05, "loss": 0.3126490354537964, "step": 5690 }, { "epoch": 0.02447129131140362, "grad_norm": 0.1040581539273262, "learning_rate": 9.797392271672862e-05, "loss": 0.5040182113647461, "step": 5700 }, { "epoch": 0.02451422340142363, "grad_norm": 0.7959339022636414, "learning_rate": 9.7969610996611e-05, "loss": 0.15577397346496583, "step": 5710 }, { "epoch": 0.024557155491443633, "grad_norm": 0.005332805681973696, "learning_rate": 9.796529927649337e-05, "loss": 0.1662292718887329, "step": 5720 }, { "epoch": 0.02460008758146364, "grad_norm": 1.613714575767517, "learning_rate": 9.796098755637575e-05, "loss": 0.3151638269424438, "step": 5730 }, { "epoch": 0.02464301967148365, "grad_norm": 0.009166977368295193, "learning_rate": 9.795667583625811e-05, "loss": 0.22575023174285888, "step": 5740 }, { "epoch": 0.024685951761503653, "grad_norm": 1.1688400506973267, "learning_rate": 9.795236411614049e-05, "loss": 0.230286169052124, "step": 5750 }, { "epoch": 0.02472888385152366, "grad_norm": 0.04237162321805954, "learning_rate": 9.794805239602287e-05, "loss": 0.3470681428909302, "step": 5760 }, { "epoch": 0.024771815941543665, "grad_norm": 0.054464444518089294, "learning_rate": 9.794374067590525e-05, "loss": 0.14838558435440063, "step": 5770 }, { "epoch": 0.024814748031563673, "grad_norm": 1.083876132965088, "learning_rate": 9.793942895578762e-05, "loss": 0.3141624450683594, "step": 5780 }, { "epoch": 0.024857680121583677, "grad_norm": 0.02843441627919674, "learning_rate": 9.793511723567e-05, "loss": 0.11999982595443726, "step": 5790 }, { "epoch": 0.024900612211603685, "grad_norm": 0.0133874686434865, "learning_rate": 9.793080551555238e-05, "loss": 0.19130725860595704, "step": 5800 }, { "epoch": 0.024943544301623693, "grad_norm": 9.813251495361328, "learning_rate": 9.792649379543476e-05, "loss": 0.3944342613220215, "step": 5810 }, { "epoch": 0.024986476391643697, "grad_norm": 1.1463874578475952, "learning_rate": 9.792218207531713e-05, "loss": 0.40923099517822265, "step": 5820 }, { "epoch": 0.025029408481663705, "grad_norm": 0.8478333950042725, "learning_rate": 9.791787035519951e-05, "loss": 0.15334019660949708, "step": 5830 }, { "epoch": 0.02507234057168371, "grad_norm": 2.906216621398926, "learning_rate": 9.791355863508189e-05, "loss": 0.4113955020904541, "step": 5840 }, { "epoch": 0.025115272661703717, "grad_norm": 31.99359130859375, "learning_rate": 9.790924691496426e-05, "loss": 0.2590095281600952, "step": 5850 }, { "epoch": 0.025158204751723725, "grad_norm": 0.6950059533119202, "learning_rate": 9.790493519484664e-05, "loss": 0.5012688636779785, "step": 5860 }, { "epoch": 0.02520113684174373, "grad_norm": 0.020123276859521866, "learning_rate": 9.790062347472902e-05, "loss": 0.2581526517868042, "step": 5870 }, { "epoch": 0.025244068931763737, "grad_norm": 3.909289598464966, "learning_rate": 9.78963117546114e-05, "loss": 0.21584734916687012, "step": 5880 }, { "epoch": 0.02528700102178374, "grad_norm": 1.5961849689483643, "learning_rate": 9.789200003449377e-05, "loss": 0.16069986820220947, "step": 5890 }, { "epoch": 0.02532993311180375, "grad_norm": 0.1236676275730133, "learning_rate": 9.788768831437615e-05, "loss": 0.09747138023376464, "step": 5900 }, { "epoch": 0.025372865201823754, "grad_norm": 1.4221770763397217, "learning_rate": 9.788337659425852e-05, "loss": 0.25554373264312746, "step": 5910 }, { "epoch": 0.02541579729184376, "grad_norm": 0.007482404820621014, "learning_rate": 9.787906487414089e-05, "loss": 0.42077908515930174, "step": 5920 }, { "epoch": 0.02545872938186377, "grad_norm": 0.753391683101654, "learning_rate": 9.787475315402327e-05, "loss": 0.24806034564971924, "step": 5930 }, { "epoch": 0.025501661471883773, "grad_norm": 0.677297055721283, "learning_rate": 9.787044143390565e-05, "loss": 0.30080883502960204, "step": 5940 }, { "epoch": 0.02554459356190378, "grad_norm": 0.007284692022949457, "learning_rate": 9.786612971378802e-05, "loss": 0.2560673713684082, "step": 5950 }, { "epoch": 0.025587525651923786, "grad_norm": 0.06195899099111557, "learning_rate": 9.78618179936704e-05, "loss": 0.2860154390335083, "step": 5960 }, { "epoch": 0.025630457741943793, "grad_norm": 0.029079634696245193, "learning_rate": 9.785750627355278e-05, "loss": 0.30883972644805907, "step": 5970 }, { "epoch": 0.0256733898319638, "grad_norm": 0.1602584421634674, "learning_rate": 9.785319455343516e-05, "loss": 0.2173325777053833, "step": 5980 }, { "epoch": 0.025716321921983806, "grad_norm": 1.476770281791687, "learning_rate": 9.784888283331752e-05, "loss": 0.42102804183959963, "step": 5990 }, { "epoch": 0.025759254012003813, "grad_norm": 0.05325939878821373, "learning_rate": 9.78445711131999e-05, "loss": 0.31200518608093264, "step": 6000 }, { "epoch": 0.025759254012003813, "eval_loss": 0.5343221426010132, "eval_runtime": 27.4848, "eval_samples_per_second": 3.638, "eval_steps_per_second": 3.638, "step": 6000 }, { "epoch": 0.025802186102023818, "grad_norm": 2.2895023822784424, "learning_rate": 9.784025939308228e-05, "loss": 0.22626352310180664, "step": 6010 }, { "epoch": 0.025845118192043826, "grad_norm": 0.3198728561401367, "learning_rate": 9.783594767296465e-05, "loss": 0.44563970565795896, "step": 6020 }, { "epoch": 0.02588805028206383, "grad_norm": 2.3215620517730713, "learning_rate": 9.783163595284703e-05, "loss": 0.3011783123016357, "step": 6030 }, { "epoch": 0.025930982372083838, "grad_norm": 0.813892126083374, "learning_rate": 9.782732423272941e-05, "loss": 0.4721865177154541, "step": 6040 }, { "epoch": 0.025973914462103845, "grad_norm": 0.5148369073867798, "learning_rate": 9.782301251261178e-05, "loss": 0.4094189167022705, "step": 6050 }, { "epoch": 0.02601684655212385, "grad_norm": 0.04373027756810188, "learning_rate": 9.781870079249416e-05, "loss": 0.2998760223388672, "step": 6060 }, { "epoch": 0.026059778642143858, "grad_norm": 0.05778109282255173, "learning_rate": 9.781438907237654e-05, "loss": 0.17186734676361085, "step": 6070 }, { "epoch": 0.026102710732163862, "grad_norm": 1.2136205434799194, "learning_rate": 9.781007735225892e-05, "loss": 0.26401219367980955, "step": 6080 }, { "epoch": 0.02614564282218387, "grad_norm": 0.2967321276664734, "learning_rate": 9.78057656321413e-05, "loss": 0.09281305074691773, "step": 6090 }, { "epoch": 0.026188574912203878, "grad_norm": 1.52336847782135, "learning_rate": 9.780145391202367e-05, "loss": 0.4042607307434082, "step": 6100 }, { "epoch": 0.026231507002223882, "grad_norm": 1.7487553358078003, "learning_rate": 9.779714219190605e-05, "loss": 0.38055739402770994, "step": 6110 }, { "epoch": 0.02627443909224389, "grad_norm": 0.022000228986144066, "learning_rate": 9.779283047178843e-05, "loss": 0.28518545627593994, "step": 6120 }, { "epoch": 0.026317371182263894, "grad_norm": 0.007792261429131031, "learning_rate": 9.77885187516708e-05, "loss": 0.16593751907348633, "step": 6130 }, { "epoch": 0.026360303272283902, "grad_norm": 0.04965021833777428, "learning_rate": 9.778420703155318e-05, "loss": 0.26540796756744384, "step": 6140 }, { "epoch": 0.026403235362303906, "grad_norm": 0.5761759877204895, "learning_rate": 9.777989531143556e-05, "loss": 0.39497976303100585, "step": 6150 }, { "epoch": 0.026446167452323914, "grad_norm": 1.5157667398452759, "learning_rate": 9.777558359131792e-05, "loss": 0.4709909915924072, "step": 6160 }, { "epoch": 0.026489099542343922, "grad_norm": 19.292585372924805, "learning_rate": 9.77712718712003e-05, "loss": 0.4004258632659912, "step": 6170 }, { "epoch": 0.026532031632363926, "grad_norm": 1.9996172189712524, "learning_rate": 9.776696015108268e-05, "loss": 0.28473002910614015, "step": 6180 }, { "epoch": 0.026574963722383934, "grad_norm": 0.23382000625133514, "learning_rate": 9.776264843096505e-05, "loss": 0.49767556190490725, "step": 6190 }, { "epoch": 0.02661789581240394, "grad_norm": 0.38020217418670654, "learning_rate": 9.775833671084743e-05, "loss": 0.18720144033432007, "step": 6200 }, { "epoch": 0.026660827902423946, "grad_norm": 2.16418194770813, "learning_rate": 9.775402499072981e-05, "loss": 0.21355876922607422, "step": 6210 }, { "epoch": 0.02670375999244395, "grad_norm": 0.32309773564338684, "learning_rate": 9.774971327061219e-05, "loss": 0.18259401321411134, "step": 6220 }, { "epoch": 0.02674669208246396, "grad_norm": 0.1124923974275589, "learning_rate": 9.774540155049456e-05, "loss": 0.31576013565063477, "step": 6230 }, { "epoch": 0.026789624172483966, "grad_norm": 0.061951570212841034, "learning_rate": 9.774108983037693e-05, "loss": 0.021699841320514678, "step": 6240 }, { "epoch": 0.02683255626250397, "grad_norm": 0.1531129628419876, "learning_rate": 9.77367781102593e-05, "loss": 0.3296054840087891, "step": 6250 }, { "epoch": 0.026875488352523978, "grad_norm": 0.040223345160484314, "learning_rate": 9.773246639014168e-05, "loss": 0.06907052397727967, "step": 6260 }, { "epoch": 0.026918420442543983, "grad_norm": 4.550394058227539, "learning_rate": 9.772815467002406e-05, "loss": 0.31173703670501707, "step": 6270 }, { "epoch": 0.02696135253256399, "grad_norm": 0.2127457857131958, "learning_rate": 9.772384294990644e-05, "loss": 0.20917901992797852, "step": 6280 }, { "epoch": 0.027004284622583998, "grad_norm": 1.6591854095458984, "learning_rate": 9.771953122978881e-05, "loss": 0.4044227600097656, "step": 6290 }, { "epoch": 0.027047216712604003, "grad_norm": 0.010656671598553658, "learning_rate": 9.771521950967119e-05, "loss": 0.2549355268478394, "step": 6300 }, { "epoch": 0.02709014880262401, "grad_norm": 2.4075212478637695, "learning_rate": 9.771090778955357e-05, "loss": 0.22155897617340087, "step": 6310 }, { "epoch": 0.027133080892644015, "grad_norm": 0.12818868458271027, "learning_rate": 9.770659606943595e-05, "loss": 0.28032028675079346, "step": 6320 }, { "epoch": 0.027176012982664022, "grad_norm": 0.14577949047088623, "learning_rate": 9.770228434931832e-05, "loss": 0.04989034235477448, "step": 6330 }, { "epoch": 0.027218945072684027, "grad_norm": 0.07681692391633987, "learning_rate": 9.76979726292007e-05, "loss": 0.26616883277893066, "step": 6340 }, { "epoch": 0.027261877162704035, "grad_norm": 3.748929738998413, "learning_rate": 9.769366090908308e-05, "loss": 0.19273843765258789, "step": 6350 }, { "epoch": 0.027304809252724042, "grad_norm": 0.9309549331665039, "learning_rate": 9.768934918896546e-05, "loss": 0.24083504676818848, "step": 6360 }, { "epoch": 0.027347741342744047, "grad_norm": 0.014099097810685635, "learning_rate": 9.768503746884783e-05, "loss": 0.2923043489456177, "step": 6370 }, { "epoch": 0.027390673432764055, "grad_norm": 0.12325286120176315, "learning_rate": 9.768072574873021e-05, "loss": 0.1641521692276001, "step": 6380 }, { "epoch": 0.02743360552278406, "grad_norm": 1.3514981269836426, "learning_rate": 9.767641402861259e-05, "loss": 0.21309914588928222, "step": 6390 }, { "epoch": 0.027476537612804067, "grad_norm": 0.001690853270702064, "learning_rate": 9.767210230849495e-05, "loss": 0.21486878395080566, "step": 6400 }, { "epoch": 0.027519469702824075, "grad_norm": 0.00460071163251996, "learning_rate": 9.766779058837733e-05, "loss": 0.49279141426086426, "step": 6410 }, { "epoch": 0.02756240179284408, "grad_norm": 0.0049337283708155155, "learning_rate": 9.76634788682597e-05, "loss": 0.4525193691253662, "step": 6420 }, { "epoch": 0.027605333882864087, "grad_norm": 1.9769833087921143, "learning_rate": 9.765916714814208e-05, "loss": 0.5393567085266113, "step": 6430 }, { "epoch": 0.02764826597288409, "grad_norm": 9.51612377166748, "learning_rate": 9.765485542802446e-05, "loss": 0.25011520385742186, "step": 6440 }, { "epoch": 0.0276911980629041, "grad_norm": 0.08957642316818237, "learning_rate": 9.765054370790684e-05, "loss": 0.2099766731262207, "step": 6450 }, { "epoch": 0.027734130152924103, "grad_norm": 0.20063121616840363, "learning_rate": 9.764623198778921e-05, "loss": 0.34571573734283445, "step": 6460 }, { "epoch": 0.02777706224294411, "grad_norm": 4.328144073486328, "learning_rate": 9.764192026767159e-05, "loss": 0.18804190158843995, "step": 6470 }, { "epoch": 0.02781999433296412, "grad_norm": 0.9789665341377258, "learning_rate": 9.763760854755396e-05, "loss": 0.3236433506011963, "step": 6480 }, { "epoch": 0.027862926422984123, "grad_norm": 0.06257350742816925, "learning_rate": 9.763329682743633e-05, "loss": 0.2720318794250488, "step": 6490 }, { "epoch": 0.02790585851300413, "grad_norm": 0.021415017545223236, "learning_rate": 9.762898510731871e-05, "loss": 0.14671536684036254, "step": 6500 }, { "epoch": 0.027948790603024135, "grad_norm": 0.07387561351060867, "learning_rate": 9.762467338720109e-05, "loss": 0.34039785861968996, "step": 6510 }, { "epoch": 0.027991722693044143, "grad_norm": 0.34364739060401917, "learning_rate": 9.762036166708347e-05, "loss": 0.38445446491241453, "step": 6520 }, { "epoch": 0.02803465478306415, "grad_norm": 0.009217793121933937, "learning_rate": 9.761604994696584e-05, "loss": 0.48569574356079104, "step": 6530 }, { "epoch": 0.028077586873084155, "grad_norm": 0.05632294341921806, "learning_rate": 9.761173822684822e-05, "loss": 0.42383580207824706, "step": 6540 }, { "epoch": 0.028120518963104163, "grad_norm": 0.15093988180160522, "learning_rate": 9.76074265067306e-05, "loss": 0.16558367013931274, "step": 6550 }, { "epoch": 0.028163451053124167, "grad_norm": 0.7927748560905457, "learning_rate": 9.760311478661297e-05, "loss": 0.3002749443054199, "step": 6560 }, { "epoch": 0.028206383143144175, "grad_norm": 0.18128614127635956, "learning_rate": 9.759880306649535e-05, "loss": 0.1237905740737915, "step": 6570 }, { "epoch": 0.02824931523316418, "grad_norm": 1.25431489944458, "learning_rate": 9.759449134637773e-05, "loss": 0.176636004447937, "step": 6580 }, { "epoch": 0.028292247323184187, "grad_norm": 0.11274126172065735, "learning_rate": 9.759017962626011e-05, "loss": 0.2627155065536499, "step": 6590 }, { "epoch": 0.028335179413204195, "grad_norm": 0.9054426550865173, "learning_rate": 9.758586790614248e-05, "loss": 0.3994295120239258, "step": 6600 }, { "epoch": 0.0283781115032242, "grad_norm": 0.3764042258262634, "learning_rate": 9.758155618602486e-05, "loss": 0.32515180110931396, "step": 6610 }, { "epoch": 0.028421043593244207, "grad_norm": 3.711716651916504, "learning_rate": 9.757724446590724e-05, "loss": 0.14573302268981933, "step": 6620 }, { "epoch": 0.02846397568326421, "grad_norm": 0.09025450050830841, "learning_rate": 9.757293274578962e-05, "loss": 0.31324641704559325, "step": 6630 }, { "epoch": 0.02850690777328422, "grad_norm": 2.1718194484710693, "learning_rate": 9.7568621025672e-05, "loss": 0.2684445858001709, "step": 6640 }, { "epoch": 0.028549839863304224, "grad_norm": 0.7220675945281982, "learning_rate": 9.756430930555436e-05, "loss": 0.2534363269805908, "step": 6650 }, { "epoch": 0.02859277195332423, "grad_norm": 0.6206772327423096, "learning_rate": 9.755999758543673e-05, "loss": 0.2221672773361206, "step": 6660 }, { "epoch": 0.02863570404334424, "grad_norm": 0.008323497138917446, "learning_rate": 9.755568586531911e-05, "loss": 0.599291467666626, "step": 6670 }, { "epoch": 0.028678636133364244, "grad_norm": 1.3727705478668213, "learning_rate": 9.755137414520149e-05, "loss": 0.35412213802337644, "step": 6680 }, { "epoch": 0.02872156822338425, "grad_norm": 0.03529423102736473, "learning_rate": 9.754706242508387e-05, "loss": 0.21443359851837157, "step": 6690 }, { "epoch": 0.028764500313404256, "grad_norm": 1.579532504081726, "learning_rate": 9.754275070496624e-05, "loss": 0.24339027404785157, "step": 6700 }, { "epoch": 0.028807432403424264, "grad_norm": 1.398242712020874, "learning_rate": 9.753843898484862e-05, "loss": 0.31939172744750977, "step": 6710 }, { "epoch": 0.02885036449344427, "grad_norm": 0.07951159030199051, "learning_rate": 9.7534127264731e-05, "loss": 0.33771500587463377, "step": 6720 }, { "epoch": 0.028893296583464276, "grad_norm": 6.611891746520996, "learning_rate": 9.752981554461336e-05, "loss": 0.18760627508163452, "step": 6730 }, { "epoch": 0.028936228673484284, "grad_norm": 0.1047222837805748, "learning_rate": 9.752550382449574e-05, "loss": 0.2564627408981323, "step": 6740 }, { "epoch": 0.028979160763504288, "grad_norm": 0.5849888920783997, "learning_rate": 9.752119210437812e-05, "loss": 0.4227277755737305, "step": 6750 }, { "epoch": 0.029022092853524296, "grad_norm": 0.0048598735593259335, "learning_rate": 9.75168803842605e-05, "loss": 0.11246702671051026, "step": 6760 }, { "epoch": 0.0290650249435443, "grad_norm": 0.09654171764850616, "learning_rate": 9.751256866414287e-05, "loss": 0.4050751686096191, "step": 6770 }, { "epoch": 0.029107957033564308, "grad_norm": 0.21590472757816315, "learning_rate": 9.750825694402525e-05, "loss": 0.16975542306900024, "step": 6780 }, { "epoch": 0.029150889123584316, "grad_norm": 0.24438561499118805, "learning_rate": 9.750394522390764e-05, "loss": 0.018460248410701752, "step": 6790 }, { "epoch": 0.02919382121360432, "grad_norm": 0.05073995888233185, "learning_rate": 9.749963350379002e-05, "loss": 0.15996166467666625, "step": 6800 }, { "epoch": 0.029236753303624328, "grad_norm": 2.4738333225250244, "learning_rate": 9.749532178367238e-05, "loss": 0.1290574073791504, "step": 6810 }, { "epoch": 0.029279685393644332, "grad_norm": 2.667616844177246, "learning_rate": 9.749101006355476e-05, "loss": 0.19219365119934081, "step": 6820 }, { "epoch": 0.02932261748366434, "grad_norm": 1.5775728225708008, "learning_rate": 9.748669834343714e-05, "loss": 0.21368303298950195, "step": 6830 }, { "epoch": 0.029365549573684348, "grad_norm": 0.9113060832023621, "learning_rate": 9.748238662331951e-05, "loss": 0.41691412925720217, "step": 6840 }, { "epoch": 0.029408481663704352, "grad_norm": 0.0026496616192162037, "learning_rate": 9.747807490320189e-05, "loss": 0.32430739402770997, "step": 6850 }, { "epoch": 0.02945141375372436, "grad_norm": 0.016415616497397423, "learning_rate": 9.747376318308427e-05, "loss": 0.25125372409820557, "step": 6860 }, { "epoch": 0.029494345843744364, "grad_norm": 1.4902602434158325, "learning_rate": 9.746945146296665e-05, "loss": 0.35351219177246096, "step": 6870 }, { "epoch": 0.029537277933764372, "grad_norm": 0.036443907767534256, "learning_rate": 9.746513974284902e-05, "loss": 0.38958556652069093, "step": 6880 }, { "epoch": 0.029580210023784376, "grad_norm": 0.03572531044483185, "learning_rate": 9.74608280227314e-05, "loss": 0.09846047163009644, "step": 6890 }, { "epoch": 0.029623142113804384, "grad_norm": 2.647747278213501, "learning_rate": 9.745651630261376e-05, "loss": 0.3600107192993164, "step": 6900 }, { "epoch": 0.029666074203824392, "grad_norm": 2.517021894454956, "learning_rate": 9.745220458249614e-05, "loss": 0.15333187580108643, "step": 6910 }, { "epoch": 0.029709006293844396, "grad_norm": 12.12330436706543, "learning_rate": 9.744789286237852e-05, "loss": 0.3379157543182373, "step": 6920 }, { "epoch": 0.029751938383864404, "grad_norm": 0.801120400428772, "learning_rate": 9.74435811422609e-05, "loss": 0.414486026763916, "step": 6930 }, { "epoch": 0.02979487047388441, "grad_norm": 0.80088871717453, "learning_rate": 9.743926942214327e-05, "loss": 0.2384410858154297, "step": 6940 }, { "epoch": 0.029837802563904416, "grad_norm": 0.22154009342193604, "learning_rate": 9.743495770202565e-05, "loss": 0.2071291208267212, "step": 6950 }, { "epoch": 0.02988073465392442, "grad_norm": 2.093872308731079, "learning_rate": 9.743064598190803e-05, "loss": 0.2516919136047363, "step": 6960 }, { "epoch": 0.02992366674394443, "grad_norm": 18.139604568481445, "learning_rate": 9.74263342617904e-05, "loss": 0.442185115814209, "step": 6970 }, { "epoch": 0.029966598833964436, "grad_norm": 0.8278317451477051, "learning_rate": 9.742202254167277e-05, "loss": 0.11277594566345214, "step": 6980 }, { "epoch": 0.03000953092398444, "grad_norm": 0.6661980152130127, "learning_rate": 9.741771082155515e-05, "loss": 0.5284313678741455, "step": 6990 }, { "epoch": 0.03005246301400445, "grad_norm": 0.14535479247570038, "learning_rate": 9.741339910143752e-05, "loss": 0.19856830835342407, "step": 7000 }, { "epoch": 0.03005246301400445, "eval_loss": 0.5455799698829651, "eval_runtime": 27.5675, "eval_samples_per_second": 3.627, "eval_steps_per_second": 3.627, "step": 7000 }, { "epoch": 0.030095395104024453, "grad_norm": 1.4745711088180542, "learning_rate": 9.740908738131991e-05, "loss": 0.322112512588501, "step": 7010 }, { "epoch": 0.03013832719404446, "grad_norm": 0.3533012270927429, "learning_rate": 9.740477566120229e-05, "loss": 0.1881626844406128, "step": 7020 }, { "epoch": 0.03018125928406447, "grad_norm": 0.1096927747130394, "learning_rate": 9.740046394108467e-05, "loss": 0.12380988597869873, "step": 7030 }, { "epoch": 0.030224191374084473, "grad_norm": 0.020163699984550476, "learning_rate": 9.739615222096705e-05, "loss": 0.4424473285675049, "step": 7040 }, { "epoch": 0.03026712346410448, "grad_norm": 6.096765518188477, "learning_rate": 9.739184050084942e-05, "loss": 0.3141198635101318, "step": 7050 }, { "epoch": 0.030310055554124485, "grad_norm": 1.6224082708358765, "learning_rate": 9.738752878073179e-05, "loss": 0.3304691553115845, "step": 7060 }, { "epoch": 0.030352987644144493, "grad_norm": 3.0215723514556885, "learning_rate": 9.738321706061417e-05, "loss": 0.2341548442840576, "step": 7070 }, { "epoch": 0.030395919734164497, "grad_norm": 0.29276198148727417, "learning_rate": 9.737890534049654e-05, "loss": 0.22664909362792968, "step": 7080 }, { "epoch": 0.030438851824184505, "grad_norm": 0.08035605400800705, "learning_rate": 9.737459362037892e-05, "loss": 0.30606210231781006, "step": 7090 }, { "epoch": 0.030481783914204513, "grad_norm": 0.0922674909234047, "learning_rate": 9.73702819002613e-05, "loss": 0.15513947010040283, "step": 7100 }, { "epoch": 0.030524716004224517, "grad_norm": 0.02588404156267643, "learning_rate": 9.736597018014367e-05, "loss": 0.27363131046295164, "step": 7110 }, { "epoch": 0.030567648094244525, "grad_norm": 0.42078208923339844, "learning_rate": 9.736165846002605e-05, "loss": 0.3170907497406006, "step": 7120 }, { "epoch": 0.03061058018426453, "grad_norm": 4.322187900543213, "learning_rate": 9.735734673990843e-05, "loss": 0.3747772216796875, "step": 7130 }, { "epoch": 0.030653512274284537, "grad_norm": 1.444366693496704, "learning_rate": 9.735303501979079e-05, "loss": 0.3339802026748657, "step": 7140 }, { "epoch": 0.030696444364304545, "grad_norm": 0.04174191132187843, "learning_rate": 9.734872329967317e-05, "loss": 0.2466111421585083, "step": 7150 }, { "epoch": 0.03073937645432455, "grad_norm": 0.26613515615463257, "learning_rate": 9.734441157955555e-05, "loss": 0.19390870332717897, "step": 7160 }, { "epoch": 0.030782308544344557, "grad_norm": 7.866360187530518, "learning_rate": 9.734009985943792e-05, "loss": 0.22820355892181396, "step": 7170 }, { "epoch": 0.03082524063436456, "grad_norm": 2.5541608333587646, "learning_rate": 9.73357881393203e-05, "loss": 0.274747633934021, "step": 7180 }, { "epoch": 0.03086817272438457, "grad_norm": 0.15199995040893555, "learning_rate": 9.733147641920268e-05, "loss": 0.10083954334259033, "step": 7190 }, { "epoch": 0.030911104814404573, "grad_norm": 16.545623779296875, "learning_rate": 9.732716469908506e-05, "loss": 0.33107154369354247, "step": 7200 }, { "epoch": 0.03095403690442458, "grad_norm": 6.0723371505737305, "learning_rate": 9.732285297896743e-05, "loss": 0.27182056903839114, "step": 7210 }, { "epoch": 0.03099696899444459, "grad_norm": 3.1646676063537598, "learning_rate": 9.73185412588498e-05, "loss": 0.2546936750411987, "step": 7220 }, { "epoch": 0.031039901084464593, "grad_norm": 0.054179687052965164, "learning_rate": 9.731422953873219e-05, "loss": 0.6185836315155029, "step": 7230 }, { "epoch": 0.0310828331744846, "grad_norm": 0.5206108093261719, "learning_rate": 9.730991781861457e-05, "loss": 0.37745678424835205, "step": 7240 }, { "epoch": 0.031125765264504605, "grad_norm": 0.08467987179756165, "learning_rate": 9.730560609849694e-05, "loss": 0.31608712673187256, "step": 7250 }, { "epoch": 0.031168697354524613, "grad_norm": 1.9469053745269775, "learning_rate": 9.730129437837932e-05, "loss": 0.32785539627075194, "step": 7260 }, { "epoch": 0.03121162944454462, "grad_norm": 0.5517263412475586, "learning_rate": 9.72969826582617e-05, "loss": 0.14303758144378662, "step": 7270 }, { "epoch": 0.03125456153456463, "grad_norm": 2.292470693588257, "learning_rate": 9.729267093814408e-05, "loss": 0.40169806480407716, "step": 7280 }, { "epoch": 0.03129749362458463, "grad_norm": 0.21029160916805267, "learning_rate": 9.728835921802645e-05, "loss": 0.1665691018104553, "step": 7290 }, { "epoch": 0.03134042571460464, "grad_norm": 1.327445387840271, "learning_rate": 9.728404749790883e-05, "loss": 0.2855032682418823, "step": 7300 }, { "epoch": 0.031383357804624645, "grad_norm": 0.7883787155151367, "learning_rate": 9.72797357777912e-05, "loss": 0.18686634302139282, "step": 7310 }, { "epoch": 0.03142628989464465, "grad_norm": 1.578082799911499, "learning_rate": 9.727542405767357e-05, "loss": 0.6148167610168457, "step": 7320 }, { "epoch": 0.031469221984664654, "grad_norm": 3.069875478744507, "learning_rate": 9.727111233755595e-05, "loss": 0.41655569076538085, "step": 7330 }, { "epoch": 0.03151215407468466, "grad_norm": 0.0648500993847847, "learning_rate": 9.726680061743833e-05, "loss": 0.13719666004180908, "step": 7340 }, { "epoch": 0.03155508616470467, "grad_norm": 0.69016033411026, "learning_rate": 9.72624888973207e-05, "loss": 0.12963091135025023, "step": 7350 }, { "epoch": 0.03159801825472468, "grad_norm": 6.941385746002197, "learning_rate": 9.725817717720308e-05, "loss": 0.16945242881774902, "step": 7360 }, { "epoch": 0.031640950344744685, "grad_norm": 0.0247952863574028, "learning_rate": 9.725386545708546e-05, "loss": 0.19187296628952027, "step": 7370 }, { "epoch": 0.031683882434764686, "grad_norm": 14.04268741607666, "learning_rate": 9.724955373696784e-05, "loss": 0.1674031972885132, "step": 7380 }, { "epoch": 0.031726814524784694, "grad_norm": 0.12602205574512482, "learning_rate": 9.72452420168502e-05, "loss": 0.3435555934906006, "step": 7390 }, { "epoch": 0.0317697466148047, "grad_norm": 0.6790499687194824, "learning_rate": 9.724093029673258e-05, "loss": 0.4150827407836914, "step": 7400 }, { "epoch": 0.03181267870482471, "grad_norm": 0.016099615022540092, "learning_rate": 9.723661857661495e-05, "loss": 0.144336473941803, "step": 7410 }, { "epoch": 0.03185561079484472, "grad_norm": 2.7291247844696045, "learning_rate": 9.723230685649733e-05, "loss": 0.4487579822540283, "step": 7420 }, { "epoch": 0.03189854288486472, "grad_norm": 3.9720041751861572, "learning_rate": 9.722799513637971e-05, "loss": 0.22907283306121826, "step": 7430 }, { "epoch": 0.031941474974884726, "grad_norm": 0.013614165596663952, "learning_rate": 9.722368341626209e-05, "loss": 0.1962208032608032, "step": 7440 }, { "epoch": 0.031984407064904734, "grad_norm": 9.466171264648438, "learning_rate": 9.721937169614446e-05, "loss": 0.1261853337287903, "step": 7450 }, { "epoch": 0.03202733915492474, "grad_norm": 16.18906593322754, "learning_rate": 9.721505997602684e-05, "loss": 0.1645114541053772, "step": 7460 }, { "epoch": 0.03207027124494475, "grad_norm": 0.03784366324543953, "learning_rate": 9.721074825590922e-05, "loss": 0.20082948207855225, "step": 7470 }, { "epoch": 0.03211320333496475, "grad_norm": 0.001986953429877758, "learning_rate": 9.72064365357916e-05, "loss": 0.2706491231918335, "step": 7480 }, { "epoch": 0.03215613542498476, "grad_norm": 8.900535583496094, "learning_rate": 9.720212481567397e-05, "loss": 0.34498045444488523, "step": 7490 }, { "epoch": 0.032199067515004766, "grad_norm": 13.675838470458984, "learning_rate": 9.719781309555635e-05, "loss": 0.27431817054748536, "step": 7500 }, { "epoch": 0.032241999605024774, "grad_norm": 0.008109820075333118, "learning_rate": 9.719350137543873e-05, "loss": 0.45601425170898435, "step": 7510 }, { "epoch": 0.03228493169504478, "grad_norm": 0.060955505818128586, "learning_rate": 9.71891896553211e-05, "loss": 0.3038959980010986, "step": 7520 }, { "epoch": 0.03232786378506478, "grad_norm": 0.03209978714585304, "learning_rate": 9.718487793520348e-05, "loss": 0.22680823802947997, "step": 7530 }, { "epoch": 0.03237079587508479, "grad_norm": 1.2087823152542114, "learning_rate": 9.718056621508586e-05, "loss": 0.2598066806793213, "step": 7540 }, { "epoch": 0.0324137279651048, "grad_norm": 0.1490267813205719, "learning_rate": 9.717625449496822e-05, "loss": 0.3735307216644287, "step": 7550 }, { "epoch": 0.032456660055124806, "grad_norm": 1.4957003593444824, "learning_rate": 9.71719427748506e-05, "loss": 0.22178177833557128, "step": 7560 }, { "epoch": 0.03249959214514481, "grad_norm": 4.107588291168213, "learning_rate": 9.716763105473298e-05, "loss": 0.2578477382659912, "step": 7570 }, { "epoch": 0.032542524235164814, "grad_norm": 1.4969598054885864, "learning_rate": 9.716331933461536e-05, "loss": 0.2491441249847412, "step": 7580 }, { "epoch": 0.03258545632518482, "grad_norm": 0.012556682340800762, "learning_rate": 9.715900761449773e-05, "loss": 0.10336203575134277, "step": 7590 }, { "epoch": 0.03262838841520483, "grad_norm": 0.5561529994010925, "learning_rate": 9.715469589438011e-05, "loss": 0.2101090669631958, "step": 7600 }, { "epoch": 0.03267132050522484, "grad_norm": 0.02059774659574032, "learning_rate": 9.715038417426249e-05, "loss": 0.11110254526138305, "step": 7610 }, { "epoch": 0.03271425259524484, "grad_norm": 0.034311916679143906, "learning_rate": 9.714607245414486e-05, "loss": 0.364142918586731, "step": 7620 }, { "epoch": 0.03275718468526485, "grad_norm": 11.460116386413574, "learning_rate": 9.714176073402724e-05, "loss": 0.3021425247192383, "step": 7630 }, { "epoch": 0.032800116775284854, "grad_norm": 4.155307769775391, "learning_rate": 9.71374490139096e-05, "loss": 0.5520795822143555, "step": 7640 }, { "epoch": 0.03284304886530486, "grad_norm": 0.372206449508667, "learning_rate": 9.713313729379198e-05, "loss": 0.3026304006576538, "step": 7650 }, { "epoch": 0.03288598095532487, "grad_norm": 0.045250970870256424, "learning_rate": 9.712882557367436e-05, "loss": 0.21342151165008544, "step": 7660 }, { "epoch": 0.03292891304534487, "grad_norm": 0.015753526240587234, "learning_rate": 9.712451385355674e-05, "loss": 0.21125319004058837, "step": 7670 }, { "epoch": 0.03297184513536488, "grad_norm": 0.05379635840654373, "learning_rate": 9.712020213343912e-05, "loss": 0.29408860206604004, "step": 7680 }, { "epoch": 0.033014777225384886, "grad_norm": 0.02940794639289379, "learning_rate": 9.711589041332149e-05, "loss": 0.34212937355041506, "step": 7690 }, { "epoch": 0.033057709315404894, "grad_norm": 3.105747938156128, "learning_rate": 9.711157869320387e-05, "loss": 0.43459086418151854, "step": 7700 }, { "epoch": 0.0331006414054249, "grad_norm": 0.9573909640312195, "learning_rate": 9.710726697308625e-05, "loss": 0.08446192145347595, "step": 7710 }, { "epoch": 0.0331435734954449, "grad_norm": 0.06909049302339554, "learning_rate": 9.710295525296862e-05, "loss": 0.16420615911483766, "step": 7720 }, { "epoch": 0.03318650558546491, "grad_norm": 1.0823670625686646, "learning_rate": 9.7098643532851e-05, "loss": 0.3220996618270874, "step": 7730 }, { "epoch": 0.03322943767548492, "grad_norm": 0.010625721886754036, "learning_rate": 9.709433181273338e-05, "loss": 0.27078948020935056, "step": 7740 }, { "epoch": 0.033272369765504926, "grad_norm": 0.01120381336659193, "learning_rate": 9.709002009261576e-05, "loss": 0.20582714080810546, "step": 7750 }, { "epoch": 0.03331530185552493, "grad_norm": 0.04764774441719055, "learning_rate": 9.708570837249813e-05, "loss": 0.2562859296798706, "step": 7760 }, { "epoch": 0.033358233945544935, "grad_norm": 0.02361004240810871, "learning_rate": 9.708139665238051e-05, "loss": 0.1400162696838379, "step": 7770 }, { "epoch": 0.03340116603556494, "grad_norm": 11.817069053649902, "learning_rate": 9.707708493226289e-05, "loss": 0.31052820682525634, "step": 7780 }, { "epoch": 0.03344409812558495, "grad_norm": 0.22933602333068848, "learning_rate": 9.707277321214527e-05, "loss": 0.3289341449737549, "step": 7790 }, { "epoch": 0.03348703021560496, "grad_norm": 0.6220147609710693, "learning_rate": 9.706846149202763e-05, "loss": 0.19306013584136963, "step": 7800 }, { "epoch": 0.03352996230562496, "grad_norm": 2.844313383102417, "learning_rate": 9.706414977191001e-05, "loss": 0.08610989451408387, "step": 7810 }, { "epoch": 0.03357289439564497, "grad_norm": 5.575182914733887, "learning_rate": 9.705983805179238e-05, "loss": 0.2134272575378418, "step": 7820 }, { "epoch": 0.033615826485664975, "grad_norm": 0.06617298722267151, "learning_rate": 9.705552633167476e-05, "loss": 0.19738913774490358, "step": 7830 }, { "epoch": 0.03365875857568498, "grad_norm": 0.01672886684536934, "learning_rate": 9.705121461155714e-05, "loss": 0.25262153148651123, "step": 7840 }, { "epoch": 0.03370169066570499, "grad_norm": 0.4519057273864746, "learning_rate": 9.704690289143952e-05, "loss": 0.1626684069633484, "step": 7850 }, { "epoch": 0.03374462275572499, "grad_norm": 0.6325151920318604, "learning_rate": 9.70425911713219e-05, "loss": 0.3695283651351929, "step": 7860 }, { "epoch": 0.033787554845745, "grad_norm": 1.0352468490600586, "learning_rate": 9.703827945120427e-05, "loss": 0.260190749168396, "step": 7870 }, { "epoch": 0.03383048693576501, "grad_norm": 0.08981958031654358, "learning_rate": 9.703396773108663e-05, "loss": 0.29672319889068605, "step": 7880 }, { "epoch": 0.033873419025785015, "grad_norm": 0.9225382208824158, "learning_rate": 9.702965601096901e-05, "loss": 0.18002406358718873, "step": 7890 }, { "epoch": 0.03391635111580502, "grad_norm": 2.109400749206543, "learning_rate": 9.702534429085139e-05, "loss": 0.10338685512542725, "step": 7900 }, { "epoch": 0.033959283205825024, "grad_norm": 0.05137547478079796, "learning_rate": 9.702103257073377e-05, "loss": 0.24235978126525878, "step": 7910 }, { "epoch": 0.03400221529584503, "grad_norm": 1.9895689487457275, "learning_rate": 9.701672085061614e-05, "loss": 0.17490397691726683, "step": 7920 }, { "epoch": 0.03404514738586504, "grad_norm": 4.55086088180542, "learning_rate": 9.701240913049852e-05, "loss": 0.2837538719177246, "step": 7930 }, { "epoch": 0.03408807947588505, "grad_norm": 6.31823205947876, "learning_rate": 9.70080974103809e-05, "loss": 0.2951412916183472, "step": 7940 }, { "epoch": 0.034131011565905055, "grad_norm": 0.03623563051223755, "learning_rate": 9.700378569026328e-05, "loss": 0.06038150191307068, "step": 7950 }, { "epoch": 0.034173943655925056, "grad_norm": 0.15342311561107635, "learning_rate": 9.699947397014565e-05, "loss": 0.24344947338104247, "step": 7960 }, { "epoch": 0.03421687574594506, "grad_norm": 5.179479598999023, "learning_rate": 9.699516225002803e-05, "loss": 0.20732619762420654, "step": 7970 }, { "epoch": 0.03425980783596507, "grad_norm": 0.01350562646985054, "learning_rate": 9.699085052991041e-05, "loss": 0.3624278545379639, "step": 7980 }, { "epoch": 0.03430273992598508, "grad_norm": 3.3068830966949463, "learning_rate": 9.698653880979279e-05, "loss": 0.45979924201965333, "step": 7990 }, { "epoch": 0.03434567201600508, "grad_norm": 1.9648011922836304, "learning_rate": 9.698222708967516e-05, "loss": 0.38761961460113525, "step": 8000 }, { "epoch": 0.03434567201600508, "eval_loss": 0.5358114838600159, "eval_runtime": 27.4067, "eval_samples_per_second": 3.649, "eval_steps_per_second": 3.649, "step": 8000 }, { "epoch": 0.03438860410602509, "grad_norm": 7.072238445281982, "learning_rate": 9.697791536955754e-05, "loss": 0.32979588508605956, "step": 8010 }, { "epoch": 0.034431536196045096, "grad_norm": 0.47375860810279846, "learning_rate": 9.697360364943992e-05, "loss": 0.35645227432250975, "step": 8020 }, { "epoch": 0.0344744682860651, "grad_norm": 6.805625915527344, "learning_rate": 9.69692919293223e-05, "loss": 0.45725326538085936, "step": 8030 }, { "epoch": 0.03451740037608511, "grad_norm": 1.7845739126205444, "learning_rate": 9.696498020920467e-05, "loss": 0.3593878269195557, "step": 8040 }, { "epoch": 0.03456033246610511, "grad_norm": 3.2586820125579834, "learning_rate": 9.696066848908704e-05, "loss": 0.22868585586547852, "step": 8050 }, { "epoch": 0.03460326455612512, "grad_norm": 3.4146788120269775, "learning_rate": 9.695635676896941e-05, "loss": 0.33050105571746824, "step": 8060 }, { "epoch": 0.03464619664614513, "grad_norm": 0.10858240723609924, "learning_rate": 9.695204504885179e-05, "loss": 0.3089393377304077, "step": 8070 }, { "epoch": 0.034689128736165135, "grad_norm": 4.090073108673096, "learning_rate": 9.694773332873417e-05, "loss": 0.2587622404098511, "step": 8080 }, { "epoch": 0.03473206082618514, "grad_norm": 1.6025853157043457, "learning_rate": 9.694342160861655e-05, "loss": 0.2438603162765503, "step": 8090 }, { "epoch": 0.034774992916205144, "grad_norm": 1.3067179918289185, "learning_rate": 9.693910988849892e-05, "loss": 0.40871009826660154, "step": 8100 }, { "epoch": 0.03481792500622515, "grad_norm": 1.1220425367355347, "learning_rate": 9.69347981683813e-05, "loss": 0.2651923656463623, "step": 8110 }, { "epoch": 0.03486085709624516, "grad_norm": 5.3396124839782715, "learning_rate": 9.693048644826368e-05, "loss": 0.3964865684509277, "step": 8120 }, { "epoch": 0.03490378918626517, "grad_norm": 0.22761444747447968, "learning_rate": 9.692617472814604e-05, "loss": 0.3448660373687744, "step": 8130 }, { "epoch": 0.034946721276285175, "grad_norm": 1.0162402391433716, "learning_rate": 9.692186300802842e-05, "loss": 0.4648458480834961, "step": 8140 }, { "epoch": 0.034989653366305176, "grad_norm": 4.638467311859131, "learning_rate": 9.69175512879108e-05, "loss": 0.18537842035293578, "step": 8150 }, { "epoch": 0.035032585456325184, "grad_norm": 0.12439771741628647, "learning_rate": 9.691323956779317e-05, "loss": 0.34654300212860106, "step": 8160 }, { "epoch": 0.03507551754634519, "grad_norm": 1.0256967544555664, "learning_rate": 9.690892784767555e-05, "loss": 0.19255000352859497, "step": 8170 }, { "epoch": 0.0351184496363652, "grad_norm": 0.98158860206604, "learning_rate": 9.690461612755793e-05, "loss": 0.1516349196434021, "step": 8180 }, { "epoch": 0.0351613817263852, "grad_norm": 1.5796022415161133, "learning_rate": 9.69003044074403e-05, "loss": 0.19203962087631227, "step": 8190 }, { "epoch": 0.03520431381640521, "grad_norm": 0.25830766558647156, "learning_rate": 9.68959926873227e-05, "loss": 0.2479844331741333, "step": 8200 }, { "epoch": 0.035247245906425216, "grad_norm": 1.2110050916671753, "learning_rate": 9.689168096720506e-05, "loss": 0.13889732360839843, "step": 8210 }, { "epoch": 0.035290177996445224, "grad_norm": 3.430422306060791, "learning_rate": 9.688736924708744e-05, "loss": 0.3377073287963867, "step": 8220 }, { "epoch": 0.03533311008646523, "grad_norm": 1.386106014251709, "learning_rate": 9.688305752696981e-05, "loss": 0.3620351552963257, "step": 8230 }, { "epoch": 0.03537604217648523, "grad_norm": 0.27436643838882446, "learning_rate": 9.687874580685219e-05, "loss": 0.0908839225769043, "step": 8240 }, { "epoch": 0.03541897426650524, "grad_norm": 0.08288343250751495, "learning_rate": 9.687443408673457e-05, "loss": 0.20369722843170165, "step": 8250 }, { "epoch": 0.03546190635652525, "grad_norm": 0.0119446637108922, "learning_rate": 9.687012236661695e-05, "loss": 0.4165679931640625, "step": 8260 }, { "epoch": 0.035504838446545256, "grad_norm": 0.8607707619667053, "learning_rate": 9.686581064649932e-05, "loss": 0.13348482847213744, "step": 8270 }, { "epoch": 0.035547770536565264, "grad_norm": 1.3179068565368652, "learning_rate": 9.68614989263817e-05, "loss": 0.22979404926300048, "step": 8280 }, { "epoch": 0.035590702626585265, "grad_norm": 1.5900253057479858, "learning_rate": 9.685718720626407e-05, "loss": 0.35587825775146487, "step": 8290 }, { "epoch": 0.03563363471660527, "grad_norm": 0.029487568885087967, "learning_rate": 9.685287548614644e-05, "loss": 0.20372748374938965, "step": 8300 }, { "epoch": 0.03567656680662528, "grad_norm": 0.9263527989387512, "learning_rate": 9.684856376602882e-05, "loss": 0.17852827310562133, "step": 8310 }, { "epoch": 0.03571949889664529, "grad_norm": 2.308929204940796, "learning_rate": 9.68442520459112e-05, "loss": 0.21045780181884766, "step": 8320 }, { "epoch": 0.035762430986665296, "grad_norm": 0.1372317671775818, "learning_rate": 9.683994032579357e-05, "loss": 0.1990463376045227, "step": 8330 }, { "epoch": 0.0358053630766853, "grad_norm": 0.036591432988643646, "learning_rate": 9.683562860567595e-05, "loss": 0.33791069984436034, "step": 8340 }, { "epoch": 0.035848295166705305, "grad_norm": 0.03384740278124809, "learning_rate": 9.683131688555833e-05, "loss": 0.17970336675643922, "step": 8350 }, { "epoch": 0.03589122725672531, "grad_norm": 0.06115401163697243, "learning_rate": 9.68270051654407e-05, "loss": 0.38286256790161133, "step": 8360 }, { "epoch": 0.03593415934674532, "grad_norm": 1.263088345527649, "learning_rate": 9.682269344532308e-05, "loss": 0.42797145843505857, "step": 8370 }, { "epoch": 0.03597709143676533, "grad_norm": 1.6685699224472046, "learning_rate": 9.681838172520545e-05, "loss": 0.4042956352233887, "step": 8380 }, { "epoch": 0.03602002352678533, "grad_norm": 0.38085174560546875, "learning_rate": 9.681407000508783e-05, "loss": 0.3088876008987427, "step": 8390 }, { "epoch": 0.03606295561680534, "grad_norm": 1.448493242263794, "learning_rate": 9.68097582849702e-05, "loss": 0.2769587755203247, "step": 8400 }, { "epoch": 0.036105887706825344, "grad_norm": 0.03457849845290184, "learning_rate": 9.680544656485258e-05, "loss": 0.15025020837783815, "step": 8410 }, { "epoch": 0.03614881979684535, "grad_norm": 0.06887169182300568, "learning_rate": 9.680113484473497e-05, "loss": 0.2938064098358154, "step": 8420 }, { "epoch": 0.03619175188686535, "grad_norm": 1.1634526252746582, "learning_rate": 9.679682312461735e-05, "loss": 0.2335674524307251, "step": 8430 }, { "epoch": 0.03623468397688536, "grad_norm": 1.4806514978408813, "learning_rate": 9.679251140449973e-05, "loss": 0.3837287902832031, "step": 8440 }, { "epoch": 0.03627761606690537, "grad_norm": 2.3947699069976807, "learning_rate": 9.67881996843821e-05, "loss": 0.22996058464050292, "step": 8450 }, { "epoch": 0.03632054815692538, "grad_norm": 1.957567811012268, "learning_rate": 9.678388796426447e-05, "loss": 0.35875611305236815, "step": 8460 }, { "epoch": 0.036363480246945384, "grad_norm": 1.2009612321853638, "learning_rate": 9.677957624414684e-05, "loss": 0.3392146587371826, "step": 8470 }, { "epoch": 0.036406412336965385, "grad_norm": 0.06605121493339539, "learning_rate": 9.677526452402922e-05, "loss": 0.13843777179718017, "step": 8480 }, { "epoch": 0.03644934442698539, "grad_norm": 0.011332832276821136, "learning_rate": 9.67709528039116e-05, "loss": 0.1935071587562561, "step": 8490 }, { "epoch": 0.0364922765170054, "grad_norm": 0.030879681929945946, "learning_rate": 9.676664108379398e-05, "loss": 0.24691197872161866, "step": 8500 }, { "epoch": 0.03653520860702541, "grad_norm": 1.4498698711395264, "learning_rate": 9.676232936367635e-05, "loss": 0.2351780891418457, "step": 8510 }, { "epoch": 0.036578140697045416, "grad_norm": 11.490880966186523, "learning_rate": 9.675801764355873e-05, "loss": 0.20597553253173828, "step": 8520 }, { "epoch": 0.03662107278706542, "grad_norm": 26.793243408203125, "learning_rate": 9.675370592344111e-05, "loss": 0.30491702556610106, "step": 8530 }, { "epoch": 0.036664004877085425, "grad_norm": 2.609809637069702, "learning_rate": 9.674939420332347e-05, "loss": 0.28061366081237793, "step": 8540 }, { "epoch": 0.03670693696710543, "grad_norm": 3.811695098876953, "learning_rate": 9.674508248320585e-05, "loss": 0.20124802589416504, "step": 8550 }, { "epoch": 0.03674986905712544, "grad_norm": 2.070798635482788, "learning_rate": 9.674077076308823e-05, "loss": 0.26569738388061526, "step": 8560 }, { "epoch": 0.03679280114714545, "grad_norm": 0.004637656267732382, "learning_rate": 9.67364590429706e-05, "loss": 0.23294551372528077, "step": 8570 }, { "epoch": 0.03683573323716545, "grad_norm": 1.5657484531402588, "learning_rate": 9.673214732285298e-05, "loss": 0.5221776008605957, "step": 8580 }, { "epoch": 0.03687866532718546, "grad_norm": 0.05727636069059372, "learning_rate": 9.672783560273536e-05, "loss": 0.30965242385864256, "step": 8590 }, { "epoch": 0.036921597417205465, "grad_norm": 0.1257968246936798, "learning_rate": 9.672352388261774e-05, "loss": 0.3840054512023926, "step": 8600 }, { "epoch": 0.03696452950722547, "grad_norm": 1.1655479669570923, "learning_rate": 9.671921216250011e-05, "loss": 0.26555461883544923, "step": 8610 }, { "epoch": 0.037007461597245474, "grad_norm": 1.1678481101989746, "learning_rate": 9.671490044238248e-05, "loss": 0.22103781700134278, "step": 8620 }, { "epoch": 0.03705039368726548, "grad_norm": 2.2759597301483154, "learning_rate": 9.671058872226485e-05, "loss": 0.2630741596221924, "step": 8630 }, { "epoch": 0.03709332577728549, "grad_norm": 0.07085221260786057, "learning_rate": 9.670627700214725e-05, "loss": 0.36764571666717527, "step": 8640 }, { "epoch": 0.0371362578673055, "grad_norm": 0.13278961181640625, "learning_rate": 9.670196528202962e-05, "loss": 0.21104824542999268, "step": 8650 }, { "epoch": 0.037179189957325505, "grad_norm": 0.27886033058166504, "learning_rate": 9.6697653561912e-05, "loss": 0.20368828773498535, "step": 8660 }, { "epoch": 0.037222122047345506, "grad_norm": 0.15865810215473175, "learning_rate": 9.669334184179438e-05, "loss": 0.11764969825744628, "step": 8670 }, { "epoch": 0.037265054137365514, "grad_norm": 2.8486526012420654, "learning_rate": 9.668903012167675e-05, "loss": 0.3190793991088867, "step": 8680 }, { "epoch": 0.03730798622738552, "grad_norm": 2.6860265731811523, "learning_rate": 9.668471840155913e-05, "loss": 0.34027435779571535, "step": 8690 }, { "epoch": 0.03735091831740553, "grad_norm": 0.10721374303102493, "learning_rate": 9.668040668144151e-05, "loss": 0.14675365686416625, "step": 8700 }, { "epoch": 0.03739385040742554, "grad_norm": 1.0816599130630493, "learning_rate": 9.667609496132387e-05, "loss": 0.20134527683258058, "step": 8710 }, { "epoch": 0.03743678249744554, "grad_norm": 3.764616012573242, "learning_rate": 9.667178324120625e-05, "loss": 0.37100017070770264, "step": 8720 }, { "epoch": 0.037479714587465546, "grad_norm": 0.07891738414764404, "learning_rate": 9.666747152108863e-05, "loss": 0.1324814200401306, "step": 8730 }, { "epoch": 0.037522646677485554, "grad_norm": 0.8121300935745239, "learning_rate": 9.6663159800971e-05, "loss": 0.1912623643875122, "step": 8740 }, { "epoch": 0.03756557876750556, "grad_norm": 0.6871600151062012, "learning_rate": 9.665884808085338e-05, "loss": 0.40148634910583497, "step": 8750 }, { "epoch": 0.03760851085752557, "grad_norm": 1.060194492340088, "learning_rate": 9.665453636073576e-05, "loss": 0.23012053966522217, "step": 8760 }, { "epoch": 0.03765144294754557, "grad_norm": 4.619421005249023, "learning_rate": 9.665022464061814e-05, "loss": 0.447019100189209, "step": 8770 }, { "epoch": 0.03769437503756558, "grad_norm": 1.044754147529602, "learning_rate": 9.664591292050051e-05, "loss": 0.2604886293411255, "step": 8780 }, { "epoch": 0.037737307127585586, "grad_norm": 0.06234363093972206, "learning_rate": 9.664160120038288e-05, "loss": 0.3195174694061279, "step": 8790 }, { "epoch": 0.03778023921760559, "grad_norm": 10.548328399658203, "learning_rate": 9.663728948026526e-05, "loss": 0.5750315666198731, "step": 8800 }, { "epoch": 0.0378231713076256, "grad_norm": 0.00025344491587020457, "learning_rate": 9.663297776014763e-05, "loss": 0.09640651941299438, "step": 8810 }, { "epoch": 0.0378661033976456, "grad_norm": 0.042145974934101105, "learning_rate": 9.662866604003001e-05, "loss": 0.42340850830078125, "step": 8820 }, { "epoch": 0.03790903548766561, "grad_norm": 13.936864852905273, "learning_rate": 9.662435431991239e-05, "loss": 0.2134354829788208, "step": 8830 }, { "epoch": 0.03795196757768562, "grad_norm": 7.359281539916992, "learning_rate": 9.662004259979477e-05, "loss": 0.3773427248001099, "step": 8840 }, { "epoch": 0.037994899667705626, "grad_norm": 1.1258646249771118, "learning_rate": 9.661573087967714e-05, "loss": 0.15646349191665648, "step": 8850 }, { "epoch": 0.038037831757725626, "grad_norm": 0.6341869831085205, "learning_rate": 9.661141915955952e-05, "loss": 0.23707275390625, "step": 8860 }, { "epoch": 0.038080763847745634, "grad_norm": 0.02454477548599243, "learning_rate": 9.66071074394419e-05, "loss": 0.3640810251235962, "step": 8870 }, { "epoch": 0.03812369593776564, "grad_norm": 0.47005584836006165, "learning_rate": 9.660279571932427e-05, "loss": 0.1894970417022705, "step": 8880 }, { "epoch": 0.03816662802778565, "grad_norm": 4.86158561706543, "learning_rate": 9.659848399920665e-05, "loss": 0.23494954109191896, "step": 8890 }, { "epoch": 0.03820956011780566, "grad_norm": 1.4044920206069946, "learning_rate": 9.659417227908903e-05, "loss": 0.23539886474609376, "step": 8900 }, { "epoch": 0.03825249220782566, "grad_norm": 1.6679009199142456, "learning_rate": 9.65898605589714e-05, "loss": 0.32893080711364747, "step": 8910 }, { "epoch": 0.038295424297845666, "grad_norm": 0.13777175545692444, "learning_rate": 9.658554883885378e-05, "loss": 0.35626609325408937, "step": 8920 }, { "epoch": 0.038338356387865674, "grad_norm": 2.8348093032836914, "learning_rate": 9.658123711873616e-05, "loss": 0.24591367244720458, "step": 8930 }, { "epoch": 0.03838128847788568, "grad_norm": 5.880456447601318, "learning_rate": 9.657692539861854e-05, "loss": 0.40456109046936034, "step": 8940 }, { "epoch": 0.03842422056790569, "grad_norm": 1.8546247482299805, "learning_rate": 9.65726136785009e-05, "loss": 0.34841208457946776, "step": 8950 }, { "epoch": 0.03846715265792569, "grad_norm": 0.12906832993030548, "learning_rate": 9.656830195838328e-05, "loss": 0.19071357250213622, "step": 8960 }, { "epoch": 0.0385100847479457, "grad_norm": 1.4908124208450317, "learning_rate": 9.656399023826566e-05, "loss": 0.14313187599182128, "step": 8970 }, { "epoch": 0.038553016837965706, "grad_norm": 1.9998912811279297, "learning_rate": 9.655967851814803e-05, "loss": 0.27379117012023924, "step": 8980 }, { "epoch": 0.038595948927985714, "grad_norm": 3.8102869987487793, "learning_rate": 9.655536679803041e-05, "loss": 0.18558567762374878, "step": 8990 }, { "epoch": 0.03863888101800572, "grad_norm": 0.010952414944767952, "learning_rate": 9.655105507791279e-05, "loss": 0.2001218318939209, "step": 9000 }, { "epoch": 0.03863888101800572, "eval_loss": 0.5299412608146667, "eval_runtime": 27.3994, "eval_samples_per_second": 3.65, "eval_steps_per_second": 3.65, "step": 9000 }, { "epoch": 0.03868181310802572, "grad_norm": 0.018099522218108177, "learning_rate": 9.654674335779517e-05, "loss": 0.2434915065765381, "step": 9010 }, { "epoch": 0.03872474519804573, "grad_norm": 2.5344526767730713, "learning_rate": 9.654243163767754e-05, "loss": 0.1441143274307251, "step": 9020 }, { "epoch": 0.03876767728806574, "grad_norm": 0.2745101749897003, "learning_rate": 9.653811991755991e-05, "loss": 0.26484873294830324, "step": 9030 }, { "epoch": 0.038810609378085746, "grad_norm": 3.360513925552368, "learning_rate": 9.653380819744228e-05, "loss": 0.1652582883834839, "step": 9040 }, { "epoch": 0.03885354146810575, "grad_norm": 2.069638729095459, "learning_rate": 9.652949647732466e-05, "loss": 0.037511253356933595, "step": 9050 }, { "epoch": 0.038896473558125755, "grad_norm": 0.9083765745162964, "learning_rate": 9.652518475720704e-05, "loss": 0.1980634808540344, "step": 9060 }, { "epoch": 0.03893940564814576, "grad_norm": 2.604505777359009, "learning_rate": 9.652087303708942e-05, "loss": 0.329839825630188, "step": 9070 }, { "epoch": 0.03898233773816577, "grad_norm": 0.0077382526360452175, "learning_rate": 9.65165613169718e-05, "loss": 0.2762957334518433, "step": 9080 }, { "epoch": 0.03902526982818578, "grad_norm": 0.0359419547021389, "learning_rate": 9.651224959685417e-05, "loss": 0.1422470211982727, "step": 9090 }, { "epoch": 0.03906820191820578, "grad_norm": 0.015391089953482151, "learning_rate": 9.650793787673655e-05, "loss": 0.2391214370727539, "step": 9100 }, { "epoch": 0.03911113400822579, "grad_norm": 3.860656976699829, "learning_rate": 9.650362615661893e-05, "loss": 0.3611257553100586, "step": 9110 }, { "epoch": 0.039154066098245795, "grad_norm": 0.0004279070708435029, "learning_rate": 9.64993144365013e-05, "loss": 0.10785633325576782, "step": 9120 }, { "epoch": 0.0391969981882658, "grad_norm": 2.397517204284668, "learning_rate": 9.649500271638368e-05, "loss": 0.2859855890274048, "step": 9130 }, { "epoch": 0.03923993027828581, "grad_norm": 0.003950928803533316, "learning_rate": 9.649069099626606e-05, "loss": 0.41271052360534666, "step": 9140 }, { "epoch": 0.03928286236830581, "grad_norm": 0.005247312132269144, "learning_rate": 9.648637927614844e-05, "loss": 0.015036699175834656, "step": 9150 }, { "epoch": 0.03932579445832582, "grad_norm": 0.058225881308317184, "learning_rate": 9.648206755603081e-05, "loss": 0.23445894718170165, "step": 9160 }, { "epoch": 0.03936872654834583, "grad_norm": 0.16761603951454163, "learning_rate": 9.647775583591319e-05, "loss": 0.2269341230392456, "step": 9170 }, { "epoch": 0.039411658638365835, "grad_norm": 0.09319623559713364, "learning_rate": 9.647344411579557e-05, "loss": 0.2253098726272583, "step": 9180 }, { "epoch": 0.03945459072838584, "grad_norm": 0.0020092339254915714, "learning_rate": 9.646913239567794e-05, "loss": 0.20597522258758544, "step": 9190 }, { "epoch": 0.03949752281840584, "grad_norm": 0.03602172061800957, "learning_rate": 9.646482067556031e-05, "loss": 0.19670722484588624, "step": 9200 }, { "epoch": 0.03954045490842585, "grad_norm": 0.00028024084167554975, "learning_rate": 9.646050895544269e-05, "loss": 0.2556697607040405, "step": 9210 }, { "epoch": 0.03958338699844586, "grad_norm": 0.00420133862644434, "learning_rate": 9.645619723532506e-05, "loss": 0.292952561378479, "step": 9220 }, { "epoch": 0.03962631908846587, "grad_norm": 5.0143351554870605, "learning_rate": 9.645188551520744e-05, "loss": 0.20465404987335206, "step": 9230 }, { "epoch": 0.039669251178485875, "grad_norm": 0.3830595016479492, "learning_rate": 9.644757379508982e-05, "loss": 0.36404032707214357, "step": 9240 }, { "epoch": 0.039712183268505875, "grad_norm": 0.19368067383766174, "learning_rate": 9.64432620749722e-05, "loss": 0.19242271184921264, "step": 9250 }, { "epoch": 0.03975511535852588, "grad_norm": 1.0904446840286255, "learning_rate": 9.643895035485457e-05, "loss": 0.36784839630126953, "step": 9260 }, { "epoch": 0.03979804744854589, "grad_norm": 0.11875243484973907, "learning_rate": 9.643463863473695e-05, "loss": 0.10239818096160888, "step": 9270 }, { "epoch": 0.0398409795385659, "grad_norm": 1.6222749948501587, "learning_rate": 9.643032691461931e-05, "loss": 0.1739649772644043, "step": 9280 }, { "epoch": 0.0398839116285859, "grad_norm": 0.04937111586332321, "learning_rate": 9.642601519450169e-05, "loss": 0.28331646919250486, "step": 9290 }, { "epoch": 0.03992684371860591, "grad_norm": 0.16273736953735352, "learning_rate": 9.642170347438407e-05, "loss": 0.5411765575408936, "step": 9300 }, { "epoch": 0.039969775808625915, "grad_norm": 0.07416556030511856, "learning_rate": 9.641739175426645e-05, "loss": 0.4258988380432129, "step": 9310 }, { "epoch": 0.04001270789864592, "grad_norm": 1.5317014455795288, "learning_rate": 9.641308003414882e-05, "loss": 0.3962272644042969, "step": 9320 }, { "epoch": 0.04005563998866593, "grad_norm": 0.04391628876328468, "learning_rate": 9.64087683140312e-05, "loss": 0.12165892124176025, "step": 9330 }, { "epoch": 0.04009857207868593, "grad_norm": 0.01463572308421135, "learning_rate": 9.640445659391358e-05, "loss": 0.05805497765541077, "step": 9340 }, { "epoch": 0.04014150416870594, "grad_norm": 0.4974989891052246, "learning_rate": 9.640014487379596e-05, "loss": 0.41655526161193845, "step": 9350 }, { "epoch": 0.04018443625872595, "grad_norm": 0.016591578722000122, "learning_rate": 9.639583315367833e-05, "loss": 0.1912694454193115, "step": 9360 }, { "epoch": 0.040227368348745955, "grad_norm": 4.737026214599609, "learning_rate": 9.639152143356071e-05, "loss": 0.31126954555511477, "step": 9370 }, { "epoch": 0.04027030043876596, "grad_norm": 9.977210998535156, "learning_rate": 9.638720971344309e-05, "loss": 0.24631636142730712, "step": 9380 }, { "epoch": 0.040313232528785964, "grad_norm": 2.310892105102539, "learning_rate": 9.638289799332546e-05, "loss": 0.22611894607543945, "step": 9390 }, { "epoch": 0.04035616461880597, "grad_norm": 0.2425278276205063, "learning_rate": 9.637858627320784e-05, "loss": 0.1980130195617676, "step": 9400 }, { "epoch": 0.04039909670882598, "grad_norm": 3.5154387950897217, "learning_rate": 9.637427455309022e-05, "loss": 0.3665068864822388, "step": 9410 }, { "epoch": 0.04044202879884599, "grad_norm": 0.0005664866184815764, "learning_rate": 9.63699628329726e-05, "loss": 0.09622042179107666, "step": 9420 }, { "epoch": 0.040484960888865995, "grad_norm": 0.485542356967926, "learning_rate": 9.636565111285497e-05, "loss": 0.1345600962638855, "step": 9430 }, { "epoch": 0.040527892978885996, "grad_norm": 0.0050071184523403645, "learning_rate": 9.636133939273735e-05, "loss": 0.18029772043228148, "step": 9440 }, { "epoch": 0.040570825068906004, "grad_norm": 0.18960146605968475, "learning_rate": 9.635702767261972e-05, "loss": 0.3446162939071655, "step": 9450 }, { "epoch": 0.04061375715892601, "grad_norm": 1.3484828472137451, "learning_rate": 9.635271595250209e-05, "loss": 0.3293300151824951, "step": 9460 }, { "epoch": 0.04065668924894602, "grad_norm": 11.18226146697998, "learning_rate": 9.634840423238447e-05, "loss": 0.24156620502471923, "step": 9470 }, { "epoch": 0.04069962133896602, "grad_norm": 0.29985514283180237, "learning_rate": 9.634409251226685e-05, "loss": 0.4308938026428223, "step": 9480 }, { "epoch": 0.04074255342898603, "grad_norm": 0.7258480191230774, "learning_rate": 9.633978079214922e-05, "loss": 0.3799870729446411, "step": 9490 }, { "epoch": 0.040785485519006036, "grad_norm": 0.06664323806762695, "learning_rate": 9.63354690720316e-05, "loss": 0.399434494972229, "step": 9500 }, { "epoch": 0.040828417609026044, "grad_norm": 0.0492563471198082, "learning_rate": 9.633115735191398e-05, "loss": 0.253987193107605, "step": 9510 }, { "epoch": 0.04087134969904605, "grad_norm": 0.0004341186722740531, "learning_rate": 9.632684563179636e-05, "loss": 0.22375996112823487, "step": 9520 }, { "epoch": 0.04091428178906605, "grad_norm": 1.2711418867111206, "learning_rate": 9.632253391167872e-05, "loss": 0.4512036800384521, "step": 9530 }, { "epoch": 0.04095721387908606, "grad_norm": 0.993800163269043, "learning_rate": 9.63182221915611e-05, "loss": 0.269797682762146, "step": 9540 }, { "epoch": 0.04100014596910607, "grad_norm": 1.7412045001983643, "learning_rate": 9.631391047144348e-05, "loss": 0.3986591577529907, "step": 9550 }, { "epoch": 0.041043078059126076, "grad_norm": 1.9665592908859253, "learning_rate": 9.630959875132585e-05, "loss": 0.3017338991165161, "step": 9560 }, { "epoch": 0.041086010149146084, "grad_norm": 2.8084936141967773, "learning_rate": 9.630528703120823e-05, "loss": 0.572250509262085, "step": 9570 }, { "epoch": 0.041128942239166084, "grad_norm": 0.5030828714370728, "learning_rate": 9.630097531109061e-05, "loss": 0.16660358905792236, "step": 9580 }, { "epoch": 0.04117187432918609, "grad_norm": 0.05629109963774681, "learning_rate": 9.629666359097298e-05, "loss": 0.3073833465576172, "step": 9590 }, { "epoch": 0.0412148064192061, "grad_norm": 0.2685781419277191, "learning_rate": 9.629235187085536e-05, "loss": 0.13678088188171386, "step": 9600 }, { "epoch": 0.04125773850922611, "grad_norm": 0.522137463092804, "learning_rate": 9.628804015073774e-05, "loss": 0.5181337833404541, "step": 9610 }, { "epoch": 0.041300670599246116, "grad_norm": 4.692966938018799, "learning_rate": 9.628372843062012e-05, "loss": 0.23460650444030762, "step": 9620 }, { "epoch": 0.04134360268926612, "grad_norm": 0.04318451136350632, "learning_rate": 9.62794167105025e-05, "loss": 0.15581555366516114, "step": 9630 }, { "epoch": 0.041386534779286124, "grad_norm": 8.463669776916504, "learning_rate": 9.627510499038487e-05, "loss": 0.2866586923599243, "step": 9640 }, { "epoch": 0.04142946686930613, "grad_norm": 0.04951479285955429, "learning_rate": 9.627079327026725e-05, "loss": 0.3578941345214844, "step": 9650 }, { "epoch": 0.04147239895932614, "grad_norm": 1.2476704120635986, "learning_rate": 9.626648155014963e-05, "loss": 0.23208155632019042, "step": 9660 }, { "epoch": 0.04151533104934614, "grad_norm": 0.04957498982548714, "learning_rate": 9.6262169830032e-05, "loss": 0.09177039861679077, "step": 9670 }, { "epoch": 0.04155826313936615, "grad_norm": 2.3629302978515625, "learning_rate": 9.625785810991438e-05, "loss": 0.35858590602874757, "step": 9680 }, { "epoch": 0.041601195229386156, "grad_norm": 0.026421379297971725, "learning_rate": 9.625354638979674e-05, "loss": 0.19241467714309693, "step": 9690 }, { "epoch": 0.041644127319406164, "grad_norm": 0.681612491607666, "learning_rate": 9.624923466967912e-05, "loss": 0.33527445793151855, "step": 9700 }, { "epoch": 0.04168705940942617, "grad_norm": 5.426156044006348, "learning_rate": 9.62449229495615e-05, "loss": 0.49652848243713377, "step": 9710 }, { "epoch": 0.04172999149944617, "grad_norm": 0.8129890561103821, "learning_rate": 9.624061122944388e-05, "loss": 0.28535943031311034, "step": 9720 }, { "epoch": 0.04177292358946618, "grad_norm": 0.9698901176452637, "learning_rate": 9.623629950932625e-05, "loss": 0.5740030765533447, "step": 9730 }, { "epoch": 0.04181585567948619, "grad_norm": 1.4005306959152222, "learning_rate": 9.623198778920863e-05, "loss": 0.5011133193969727, "step": 9740 }, { "epoch": 0.041858787769506196, "grad_norm": 0.0013849869137629867, "learning_rate": 9.622767606909101e-05, "loss": 0.291644811630249, "step": 9750 }, { "epoch": 0.041901719859526204, "grad_norm": 0.0004153474292252213, "learning_rate": 9.622336434897339e-05, "loss": 0.3130736112594604, "step": 9760 }, { "epoch": 0.041944651949546205, "grad_norm": 7.660757541656494, "learning_rate": 9.621905262885575e-05, "loss": 0.29653196334838866, "step": 9770 }, { "epoch": 0.04198758403956621, "grad_norm": 2.530366897583008, "learning_rate": 9.621474090873813e-05, "loss": 0.16898977756500244, "step": 9780 }, { "epoch": 0.04203051612958622, "grad_norm": 0.23101027309894562, "learning_rate": 9.62104291886205e-05, "loss": 0.12413444519042968, "step": 9790 }, { "epoch": 0.04207344821960623, "grad_norm": 28.41404151916504, "learning_rate": 9.620611746850288e-05, "loss": 0.15441542863845825, "step": 9800 }, { "epoch": 0.042116380309626236, "grad_norm": 1.6069025993347168, "learning_rate": 9.620180574838526e-05, "loss": 0.3309926509857178, "step": 9810 }, { "epoch": 0.04215931239964624, "grad_norm": 0.01833909936249256, "learning_rate": 9.619749402826764e-05, "loss": 0.18249744176864624, "step": 9820 }, { "epoch": 0.042202244489666245, "grad_norm": 0.002266461029648781, "learning_rate": 9.619318230815003e-05, "loss": 0.34940640926361083, "step": 9830 }, { "epoch": 0.04224517657968625, "grad_norm": 0.07000841945409775, "learning_rate": 9.61888705880324e-05, "loss": 0.1899822473526001, "step": 9840 }, { "epoch": 0.04228810866970626, "grad_norm": 0.5853293538093567, "learning_rate": 9.618455886791478e-05, "loss": 0.40880498886108396, "step": 9850 }, { "epoch": 0.04233104075972627, "grad_norm": 1.3320908546447754, "learning_rate": 9.618024714779715e-05, "loss": 0.3591744422912598, "step": 9860 }, { "epoch": 0.04237397284974627, "grad_norm": 0.3487735986709595, "learning_rate": 9.617593542767952e-05, "loss": 0.1401280641555786, "step": 9870 }, { "epoch": 0.04241690493976628, "grad_norm": 0.9527150988578796, "learning_rate": 9.61716237075619e-05, "loss": 0.2530334949493408, "step": 9880 }, { "epoch": 0.042459837029786285, "grad_norm": 16.177743911743164, "learning_rate": 9.616731198744428e-05, "loss": 0.5055931568145752, "step": 9890 }, { "epoch": 0.04250276911980629, "grad_norm": 0.6620330810546875, "learning_rate": 9.616300026732665e-05, "loss": 0.3270712375640869, "step": 9900 }, { "epoch": 0.042545701209826293, "grad_norm": 0.22643379867076874, "learning_rate": 9.615868854720903e-05, "loss": 0.36966261863708494, "step": 9910 }, { "epoch": 0.0425886332998463, "grad_norm": 0.24828827381134033, "learning_rate": 9.615437682709141e-05, "loss": 0.14217541217803956, "step": 9920 }, { "epoch": 0.04263156538986631, "grad_norm": 0.807939887046814, "learning_rate": 9.615006510697379e-05, "loss": 0.3339355230331421, "step": 9930 }, { "epoch": 0.04267449747988632, "grad_norm": 0.060862597078084946, "learning_rate": 9.614575338685615e-05, "loss": 0.24105873107910156, "step": 9940 }, { "epoch": 0.042717429569906325, "grad_norm": 0.017723804339766502, "learning_rate": 9.614144166673853e-05, "loss": 0.192498779296875, "step": 9950 }, { "epoch": 0.042760361659926326, "grad_norm": 4.271297931671143, "learning_rate": 9.61371299466209e-05, "loss": 0.17440344095230104, "step": 9960 }, { "epoch": 0.04280329374994633, "grad_norm": 0.40663942694664, "learning_rate": 9.613281822650328e-05, "loss": 0.17575368881225586, "step": 9970 }, { "epoch": 0.04284622583996634, "grad_norm": 0.34979286789894104, "learning_rate": 9.612850650638566e-05, "loss": 0.29747810363769533, "step": 9980 }, { "epoch": 0.04288915792998635, "grad_norm": 1.386254906654358, "learning_rate": 9.612419478626804e-05, "loss": 0.20161771774291992, "step": 9990 }, { "epoch": 0.04293209002000636, "grad_norm": 3.231767177581787, "learning_rate": 9.611988306615041e-05, "loss": 0.279221773147583, "step": 10000 }, { "epoch": 0.04293209002000636, "eval_loss": 0.5372155904769897, "eval_runtime": 27.4934, "eval_samples_per_second": 3.637, "eval_steps_per_second": 3.637, "step": 10000 }, { "epoch": 0.04297502211002636, "grad_norm": 1.8212069272994995, "learning_rate": 9.611557134603279e-05, "loss": 0.29707605838775636, "step": 10010 }, { "epoch": 0.043017954200046365, "grad_norm": 0.002717594150453806, "learning_rate": 9.611125962591516e-05, "loss": 0.3414538621902466, "step": 10020 }, { "epoch": 0.04306088629006637, "grad_norm": 4.209356307983398, "learning_rate": 9.610694790579753e-05, "loss": 0.2345595121383667, "step": 10030 }, { "epoch": 0.04310381838008638, "grad_norm": 1.0153359174728394, "learning_rate": 9.610263618567991e-05, "loss": 0.4312474250793457, "step": 10040 }, { "epoch": 0.04314675047010639, "grad_norm": 0.9339410662651062, "learning_rate": 9.60983244655623e-05, "loss": 0.33216402530670164, "step": 10050 }, { "epoch": 0.04318968256012639, "grad_norm": 0.008510474115610123, "learning_rate": 9.609401274544468e-05, "loss": 0.09502204060554505, "step": 10060 }, { "epoch": 0.0432326146501464, "grad_norm": 5.747169017791748, "learning_rate": 9.608970102532706e-05, "loss": 0.44903016090393066, "step": 10070 }, { "epoch": 0.043275546740166405, "grad_norm": 0.0010578184155747294, "learning_rate": 9.608538930520943e-05, "loss": 0.0894723355770111, "step": 10080 }, { "epoch": 0.04331847883018641, "grad_norm": 0.317371129989624, "learning_rate": 9.608107758509181e-05, "loss": 0.33584930896759035, "step": 10090 }, { "epoch": 0.043361410920206414, "grad_norm": 0.15126574039459229, "learning_rate": 9.607676586497417e-05, "loss": 0.26381702423095704, "step": 10100 }, { "epoch": 0.04340434301022642, "grad_norm": 0.037118665874004364, "learning_rate": 9.607245414485655e-05, "loss": 0.09493091106414794, "step": 10110 }, { "epoch": 0.04344727510024643, "grad_norm": 0.04892873018980026, "learning_rate": 9.606814242473893e-05, "loss": 0.2089691638946533, "step": 10120 }, { "epoch": 0.04349020719026644, "grad_norm": 0.00034264527494087815, "learning_rate": 9.60638307046213e-05, "loss": 0.29033823013305665, "step": 10130 }, { "epoch": 0.043533139280286445, "grad_norm": 0.2633804976940155, "learning_rate": 9.605951898450368e-05, "loss": 0.4061037540435791, "step": 10140 }, { "epoch": 0.043576071370306446, "grad_norm": 0.3341623842716217, "learning_rate": 9.605520726438606e-05, "loss": 0.30220742225646974, "step": 10150 }, { "epoch": 0.043619003460326454, "grad_norm": 6.020773887634277, "learning_rate": 9.605089554426844e-05, "loss": 0.15623061656951903, "step": 10160 }, { "epoch": 0.04366193555034646, "grad_norm": 0.6320409774780273, "learning_rate": 9.604658382415082e-05, "loss": 0.19078081846237183, "step": 10170 }, { "epoch": 0.04370486764036647, "grad_norm": 0.03318055346608162, "learning_rate": 9.60422721040332e-05, "loss": 0.21724910736083985, "step": 10180 }, { "epoch": 0.04374779973038648, "grad_norm": 0.004824698902666569, "learning_rate": 9.603796038391556e-05, "loss": 0.08828012347221374, "step": 10190 }, { "epoch": 0.04379073182040648, "grad_norm": 0.01676942966878414, "learning_rate": 9.603364866379793e-05, "loss": 0.26290996074676515, "step": 10200 }, { "epoch": 0.043833663910426486, "grad_norm": 0.6830412745475769, "learning_rate": 9.602933694368031e-05, "loss": 0.13312371969223022, "step": 10210 }, { "epoch": 0.043876596000446494, "grad_norm": 3.4054620265960693, "learning_rate": 9.602502522356269e-05, "loss": 0.4577789783477783, "step": 10220 }, { "epoch": 0.0439195280904665, "grad_norm": 2.1453187465667725, "learning_rate": 9.602071350344507e-05, "loss": 0.3155569553375244, "step": 10230 }, { "epoch": 0.04396246018048651, "grad_norm": 3.6915271282196045, "learning_rate": 9.601640178332744e-05, "loss": 0.16579316854476928, "step": 10240 }, { "epoch": 0.04400539227050651, "grad_norm": 1.2764487266540527, "learning_rate": 9.601209006320982e-05, "loss": 0.4332874298095703, "step": 10250 }, { "epoch": 0.04404832436052652, "grad_norm": 0.006339214742183685, "learning_rate": 9.60077783430922e-05, "loss": 0.33229615688323977, "step": 10260 }, { "epoch": 0.044091256450546526, "grad_norm": 1.9073753356933594, "learning_rate": 9.600346662297458e-05, "loss": 0.4840839862823486, "step": 10270 }, { "epoch": 0.044134188540566534, "grad_norm": 0.22293534874916077, "learning_rate": 9.599915490285695e-05, "loss": 0.3915229797363281, "step": 10280 }, { "epoch": 0.04417712063058654, "grad_norm": 0.058444052934646606, "learning_rate": 9.599484318273933e-05, "loss": 0.21457459926605224, "step": 10290 }, { "epoch": 0.04422005272060654, "grad_norm": 2.155538320541382, "learning_rate": 9.599053146262171e-05, "loss": 0.37848100662231443, "step": 10300 }, { "epoch": 0.04426298481062655, "grad_norm": 2.304004430770874, "learning_rate": 9.598621974250409e-05, "loss": 0.33033792972564696, "step": 10310 }, { "epoch": 0.04430591690064656, "grad_norm": 0.19713164865970612, "learning_rate": 9.598190802238646e-05, "loss": 0.21175870895385743, "step": 10320 }, { "epoch": 0.044348848990666566, "grad_norm": 0.32798734307289124, "learning_rate": 9.597759630226884e-05, "loss": 0.25483083724975586, "step": 10330 }, { "epoch": 0.04439178108068657, "grad_norm": 5.827533721923828, "learning_rate": 9.597328458215122e-05, "loss": 0.22512528896331788, "step": 10340 }, { "epoch": 0.044434713170706575, "grad_norm": 2.3751018047332764, "learning_rate": 9.596897286203358e-05, "loss": 0.2391373634338379, "step": 10350 }, { "epoch": 0.04447764526072658, "grad_norm": 0.21251116693019867, "learning_rate": 9.596466114191596e-05, "loss": 0.31760780811309813, "step": 10360 }, { "epoch": 0.04452057735074659, "grad_norm": 0.04659834876656532, "learning_rate": 9.596034942179834e-05, "loss": 0.1353290319442749, "step": 10370 }, { "epoch": 0.0445635094407666, "grad_norm": 0.03678036853671074, "learning_rate": 9.595603770168071e-05, "loss": 0.24096033573150635, "step": 10380 }, { "epoch": 0.0446064415307866, "grad_norm": 0.24158138036727905, "learning_rate": 9.595172598156309e-05, "loss": 0.4555866241455078, "step": 10390 }, { "epoch": 0.04464937362080661, "grad_norm": 0.34433749318122864, "learning_rate": 9.594741426144547e-05, "loss": 0.1974002480506897, "step": 10400 }, { "epoch": 0.044692305710826614, "grad_norm": 1.9104621410369873, "learning_rate": 9.594310254132785e-05, "loss": 0.29447624683380125, "step": 10410 }, { "epoch": 0.04473523780084662, "grad_norm": 1.7005401849746704, "learning_rate": 9.593879082121022e-05, "loss": 0.25979669094085694, "step": 10420 }, { "epoch": 0.04477816989086663, "grad_norm": 0.15459361672401428, "learning_rate": 9.593447910109259e-05, "loss": 0.26453309059143065, "step": 10430 }, { "epoch": 0.04482110198088663, "grad_norm": 0.07266079634428024, "learning_rate": 9.593016738097496e-05, "loss": 0.1707593321800232, "step": 10440 }, { "epoch": 0.04486403407090664, "grad_norm": 1.2248409986495972, "learning_rate": 9.592585566085734e-05, "loss": 0.13180646896362305, "step": 10450 }, { "epoch": 0.04490696616092665, "grad_norm": 0.01891922391951084, "learning_rate": 9.592154394073972e-05, "loss": 0.3238394737243652, "step": 10460 }, { "epoch": 0.044949898250946654, "grad_norm": 3.500919818878174, "learning_rate": 9.59172322206221e-05, "loss": 0.37556891441345214, "step": 10470 }, { "epoch": 0.04499283034096666, "grad_norm": 6.499775409698486, "learning_rate": 9.591292050050447e-05, "loss": 0.48048176765441897, "step": 10480 }, { "epoch": 0.04503576243098666, "grad_norm": 2.8995563983917236, "learning_rate": 9.590860878038685e-05, "loss": 0.23455042839050294, "step": 10490 }, { "epoch": 0.04507869452100667, "grad_norm": 5.567530632019043, "learning_rate": 9.590429706026923e-05, "loss": 0.26941962242126466, "step": 10500 }, { "epoch": 0.04512162661102668, "grad_norm": 1.6310689449310303, "learning_rate": 9.58999853401516e-05, "loss": 0.27340772151947024, "step": 10510 }, { "epoch": 0.045164558701046686, "grad_norm": 0.0813201442360878, "learning_rate": 9.589567362003398e-05, "loss": 0.06676875352859497, "step": 10520 }, { "epoch": 0.04520749079106669, "grad_norm": 0.1687149703502655, "learning_rate": 9.589136189991636e-05, "loss": 0.00955677181482315, "step": 10530 }, { "epoch": 0.045250422881086695, "grad_norm": 0.011493796482682228, "learning_rate": 9.588705017979874e-05, "loss": 0.2811113357543945, "step": 10540 }, { "epoch": 0.0452933549711067, "grad_norm": 33.732940673828125, "learning_rate": 9.588273845968111e-05, "loss": 0.3537412643432617, "step": 10550 }, { "epoch": 0.04533628706112671, "grad_norm": 7.003033638000488, "learning_rate": 9.587842673956349e-05, "loss": 0.21416730880737306, "step": 10560 }, { "epoch": 0.04537921915114672, "grad_norm": 8.222970008850098, "learning_rate": 9.587411501944587e-05, "loss": 0.26668825149536135, "step": 10570 }, { "epoch": 0.04542215124116672, "grad_norm": 1.0692180395126343, "learning_rate": 9.586980329932825e-05, "loss": 0.29593141078948976, "step": 10580 }, { "epoch": 0.04546508333118673, "grad_norm": 0.018847858533263206, "learning_rate": 9.586549157921062e-05, "loss": 0.5067704200744629, "step": 10590 }, { "epoch": 0.045508015421206735, "grad_norm": 1.3209799528121948, "learning_rate": 9.586117985909299e-05, "loss": 0.3315410137176514, "step": 10600 }, { "epoch": 0.04555094751122674, "grad_norm": 0.009103666990995407, "learning_rate": 9.585686813897536e-05, "loss": 0.18447575569152833, "step": 10610 }, { "epoch": 0.04559387960124675, "grad_norm": 0.11048633605241776, "learning_rate": 9.585255641885774e-05, "loss": 0.29539568424224855, "step": 10620 }, { "epoch": 0.04563681169126675, "grad_norm": 1.421401858329773, "learning_rate": 9.584824469874012e-05, "loss": 0.48968944549560545, "step": 10630 }, { "epoch": 0.04567974378128676, "grad_norm": 0.05576359108090401, "learning_rate": 9.58439329786225e-05, "loss": 0.15116746425628663, "step": 10640 }, { "epoch": 0.04572267587130677, "grad_norm": 0.013207978568971157, "learning_rate": 9.583962125850487e-05, "loss": 0.1754152297973633, "step": 10650 }, { "epoch": 0.045765607961326775, "grad_norm": 0.8618055582046509, "learning_rate": 9.583530953838725e-05, "loss": 0.40839419364929197, "step": 10660 }, { "epoch": 0.04580854005134678, "grad_norm": 0.01856234483420849, "learning_rate": 9.583099781826963e-05, "loss": 0.3880528211593628, "step": 10670 }, { "epoch": 0.045851472141366784, "grad_norm": 18.288997650146484, "learning_rate": 9.582668609815199e-05, "loss": 0.13824949264526368, "step": 10680 }, { "epoch": 0.04589440423138679, "grad_norm": 0.08177149295806885, "learning_rate": 9.582237437803437e-05, "loss": 0.2577815055847168, "step": 10690 }, { "epoch": 0.0459373363214068, "grad_norm": 0.2959858775138855, "learning_rate": 9.581806265791675e-05, "loss": 0.11627799272537231, "step": 10700 }, { "epoch": 0.04598026841142681, "grad_norm": 0.02001349814236164, "learning_rate": 9.581375093779912e-05, "loss": 0.24912896156311035, "step": 10710 }, { "epoch": 0.046023200501446815, "grad_norm": 0.01718798652291298, "learning_rate": 9.58094392176815e-05, "loss": 0.3985838651657104, "step": 10720 }, { "epoch": 0.046066132591466816, "grad_norm": 0.04576408863067627, "learning_rate": 9.580512749756388e-05, "loss": 0.14956194162368774, "step": 10730 }, { "epoch": 0.046109064681486824, "grad_norm": 1.2083806991577148, "learning_rate": 9.580081577744626e-05, "loss": 0.36740641593933104, "step": 10740 }, { "epoch": 0.04615199677150683, "grad_norm": 0.023099783807992935, "learning_rate": 9.579650405732863e-05, "loss": 0.40886964797973635, "step": 10750 }, { "epoch": 0.04619492886152684, "grad_norm": 0.1781107783317566, "learning_rate": 9.579219233721101e-05, "loss": 0.4085477352142334, "step": 10760 }, { "epoch": 0.04623786095154684, "grad_norm": 0.4506646692752838, "learning_rate": 9.578788061709339e-05, "loss": 0.37363567352294924, "step": 10770 }, { "epoch": 0.04628079304156685, "grad_norm": 0.011436011642217636, "learning_rate": 9.578356889697577e-05, "loss": 0.15324031114578246, "step": 10780 }, { "epoch": 0.046323725131586856, "grad_norm": 0.6088100671768188, "learning_rate": 9.577925717685814e-05, "loss": 0.42464280128479004, "step": 10790 }, { "epoch": 0.04636665722160686, "grad_norm": 3.944263458251953, "learning_rate": 9.577494545674052e-05, "loss": 0.16977940797805785, "step": 10800 }, { "epoch": 0.04640958931162687, "grad_norm": 0.0476425401866436, "learning_rate": 9.57706337366229e-05, "loss": 0.3056109189987183, "step": 10810 }, { "epoch": 0.04645252140164687, "grad_norm": 0.6663586497306824, "learning_rate": 9.576632201650528e-05, "loss": 0.22884979248046874, "step": 10820 }, { "epoch": 0.04649545349166688, "grad_norm": 0.06515251845121384, "learning_rate": 9.576201029638765e-05, "loss": 0.3779136180877686, "step": 10830 }, { "epoch": 0.04653838558168689, "grad_norm": 1.205527663230896, "learning_rate": 9.575769857627002e-05, "loss": 0.3730917930603027, "step": 10840 }, { "epoch": 0.046581317671706896, "grad_norm": 0.9694238305091858, "learning_rate": 9.57533868561524e-05, "loss": 0.41217889785766604, "step": 10850 }, { "epoch": 0.0466242497617269, "grad_norm": 0.15640544891357422, "learning_rate": 9.574907513603477e-05, "loss": 0.19687557220458984, "step": 10860 }, { "epoch": 0.046667181851746904, "grad_norm": 0.1485452800989151, "learning_rate": 9.574476341591715e-05, "loss": 0.33020169734954835, "step": 10870 }, { "epoch": 0.04671011394176691, "grad_norm": 0.9703245759010315, "learning_rate": 9.574045169579953e-05, "loss": 0.34535951614379884, "step": 10880 }, { "epoch": 0.04675304603178692, "grad_norm": 11.140542984008789, "learning_rate": 9.57361399756819e-05, "loss": 0.3224964618682861, "step": 10890 }, { "epoch": 0.04679597812180693, "grad_norm": 0.07320046424865723, "learning_rate": 9.573182825556428e-05, "loss": 0.17059063911437988, "step": 10900 }, { "epoch": 0.046838910211826935, "grad_norm": 1.3945448398590088, "learning_rate": 9.572751653544666e-05, "loss": 0.16793534755706788, "step": 10910 }, { "epoch": 0.046881842301846936, "grad_norm": 0.10286663472652435, "learning_rate": 9.572320481532904e-05, "loss": 0.3692343711853027, "step": 10920 }, { "epoch": 0.046924774391866944, "grad_norm": 0.030759811401367188, "learning_rate": 9.57188930952114e-05, "loss": 0.21908931732177733, "step": 10930 }, { "epoch": 0.04696770648188695, "grad_norm": 0.07938531041145325, "learning_rate": 9.571458137509378e-05, "loss": 0.09618297815322877, "step": 10940 }, { "epoch": 0.04701063857190696, "grad_norm": 0.6279019117355347, "learning_rate": 9.571026965497615e-05, "loss": 0.16348246335983277, "step": 10950 }, { "epoch": 0.04705357066192696, "grad_norm": 0.03495902940630913, "learning_rate": 9.570595793485853e-05, "loss": 0.19186799526214598, "step": 10960 }, { "epoch": 0.04709650275194697, "grad_norm": 0.7391979098320007, "learning_rate": 9.570164621474091e-05, "loss": 0.11953030824661255, "step": 10970 }, { "epoch": 0.047139434841966976, "grad_norm": 3.563753604888916, "learning_rate": 9.569733449462329e-05, "loss": 0.2138049602508545, "step": 10980 }, { "epoch": 0.047182366931986984, "grad_norm": 0.14636358618736267, "learning_rate": 9.569302277450566e-05, "loss": 0.23743727207183837, "step": 10990 }, { "epoch": 0.04722529902200699, "grad_norm": 0.006646784488111734, "learning_rate": 9.568871105438804e-05, "loss": 0.2026223659515381, "step": 11000 }, { "epoch": 0.04722529902200699, "eval_loss": 0.5371872186660767, "eval_runtime": 27.437, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 11000 }, { "epoch": 0.04726823111202699, "grad_norm": 0.00699937529861927, "learning_rate": 9.568439933427042e-05, "loss": 0.18824267387390137, "step": 11010 }, { "epoch": 0.047311163202047, "grad_norm": 0.8322146534919739, "learning_rate": 9.56800876141528e-05, "loss": 0.3585317611694336, "step": 11020 }, { "epoch": 0.04735409529206701, "grad_norm": 5.2994704246521, "learning_rate": 9.567577589403517e-05, "loss": 0.3288968563079834, "step": 11030 }, { "epoch": 0.047397027382087016, "grad_norm": 1.3787318468093872, "learning_rate": 9.567146417391755e-05, "loss": 0.4345251560211182, "step": 11040 }, { "epoch": 0.047439959472107024, "grad_norm": 7.704769134521484, "learning_rate": 9.566715245379993e-05, "loss": 0.22723698616027832, "step": 11050 }, { "epoch": 0.047482891562127025, "grad_norm": 0.05537894368171692, "learning_rate": 9.56628407336823e-05, "loss": 0.3707094430923462, "step": 11060 }, { "epoch": 0.04752582365214703, "grad_norm": 2.6444385051727295, "learning_rate": 9.565852901356468e-05, "loss": 0.1761788845062256, "step": 11070 }, { "epoch": 0.04756875574216704, "grad_norm": 0.04609530419111252, "learning_rate": 9.565421729344706e-05, "loss": 0.27322142124176024, "step": 11080 }, { "epoch": 0.04761168783218705, "grad_norm": 2.6429717540740967, "learning_rate": 9.564990557332942e-05, "loss": 0.23322317600250245, "step": 11090 }, { "epoch": 0.047654619922207056, "grad_norm": 6.946089744567871, "learning_rate": 9.56455938532118e-05, "loss": 0.37691373825073243, "step": 11100 }, { "epoch": 0.04769755201222706, "grad_norm": 2.606541395187378, "learning_rate": 9.564128213309418e-05, "loss": 0.3111485242843628, "step": 11110 }, { "epoch": 0.047740484102247065, "grad_norm": 0.16900953650474548, "learning_rate": 9.563697041297656e-05, "loss": 0.24888322353363038, "step": 11120 }, { "epoch": 0.04778341619226707, "grad_norm": 0.026469141244888306, "learning_rate": 9.563265869285893e-05, "loss": 0.33678669929504396, "step": 11130 }, { "epoch": 0.04782634828228708, "grad_norm": 0.8681675791740417, "learning_rate": 9.562834697274131e-05, "loss": 0.3018766403198242, "step": 11140 }, { "epoch": 0.04786928037230709, "grad_norm": 0.10563918203115463, "learning_rate": 9.562403525262369e-05, "loss": 0.37344143390655515, "step": 11150 }, { "epoch": 0.04791221246232709, "grad_norm": 0.05082390457391739, "learning_rate": 9.561972353250606e-05, "loss": 0.19082963466644287, "step": 11160 }, { "epoch": 0.0479551445523471, "grad_norm": 2.553626775741577, "learning_rate": 9.561541181238843e-05, "loss": 0.3182950258255005, "step": 11170 }, { "epoch": 0.047998076642367105, "grad_norm": 2.447514533996582, "learning_rate": 9.56111000922708e-05, "loss": 0.41812405586242674, "step": 11180 }, { "epoch": 0.04804100873238711, "grad_norm": 11.06352424621582, "learning_rate": 9.560678837215318e-05, "loss": 0.31888484954833984, "step": 11190 }, { "epoch": 0.04808394082240711, "grad_norm": 6.056185722351074, "learning_rate": 9.560247665203556e-05, "loss": 0.6062085628509521, "step": 11200 }, { "epoch": 0.04812687291242712, "grad_norm": 0.18857668340206146, "learning_rate": 9.559816493191794e-05, "loss": 0.2257009983062744, "step": 11210 }, { "epoch": 0.04816980500244713, "grad_norm": 6.72116756439209, "learning_rate": 9.559385321180032e-05, "loss": 0.24791340827941893, "step": 11220 }, { "epoch": 0.04821273709246714, "grad_norm": 3.8300418853759766, "learning_rate": 9.558954149168269e-05, "loss": 0.34927642345428467, "step": 11230 }, { "epoch": 0.048255669182487144, "grad_norm": 0.015608267858624458, "learning_rate": 9.558522977156508e-05, "loss": 0.2696120023727417, "step": 11240 }, { "epoch": 0.048298601272507145, "grad_norm": 1.1251463890075684, "learning_rate": 9.558091805144746e-05, "loss": 0.49908971786499023, "step": 11250 }, { "epoch": 0.04834153336252715, "grad_norm": 0.09229426085948944, "learning_rate": 9.557660633132982e-05, "loss": 0.3560775279998779, "step": 11260 }, { "epoch": 0.04838446545254716, "grad_norm": 0.010238938964903355, "learning_rate": 9.55722946112122e-05, "loss": 0.19407575130462645, "step": 11270 }, { "epoch": 0.04842739754256717, "grad_norm": 0.0983290895819664, "learning_rate": 9.556798289109458e-05, "loss": 0.1932140111923218, "step": 11280 }, { "epoch": 0.04847032963258718, "grad_norm": 0.5768459439277649, "learning_rate": 9.556367117097696e-05, "loss": 0.41858468055725095, "step": 11290 }, { "epoch": 0.04851326172260718, "grad_norm": 1.8036881685256958, "learning_rate": 9.555935945085933e-05, "loss": 0.25796501636505126, "step": 11300 }, { "epoch": 0.048556193812627185, "grad_norm": 1.1746166944503784, "learning_rate": 9.555504773074171e-05, "loss": 0.22487566471099854, "step": 11310 }, { "epoch": 0.04859912590264719, "grad_norm": 0.12483537197113037, "learning_rate": 9.555073601062409e-05, "loss": 0.15164095163345337, "step": 11320 }, { "epoch": 0.0486420579926672, "grad_norm": 0.050800539553165436, "learning_rate": 9.554642429050647e-05, "loss": 0.3063875675201416, "step": 11330 }, { "epoch": 0.04868499008268721, "grad_norm": 0.03678128495812416, "learning_rate": 9.554211257038883e-05, "loss": 0.3408423662185669, "step": 11340 }, { "epoch": 0.04872792217270721, "grad_norm": 0.03818840906023979, "learning_rate": 9.553780085027121e-05, "loss": 0.2510481357574463, "step": 11350 }, { "epoch": 0.04877085426272722, "grad_norm": 0.05373666435480118, "learning_rate": 9.553348913015358e-05, "loss": 0.45258536338806155, "step": 11360 }, { "epoch": 0.048813786352747225, "grad_norm": 0.08894126862287521, "learning_rate": 9.552917741003596e-05, "loss": 0.3101092576980591, "step": 11370 }, { "epoch": 0.04885671844276723, "grad_norm": 1.3045735359191895, "learning_rate": 9.552486568991834e-05, "loss": 0.31512742042541503, "step": 11380 }, { "epoch": 0.048899650532787234, "grad_norm": 0.8845686912536621, "learning_rate": 9.552055396980072e-05, "loss": 0.3243434429168701, "step": 11390 }, { "epoch": 0.04894258262280724, "grad_norm": 2.371500253677368, "learning_rate": 9.55162422496831e-05, "loss": 0.15619829893112183, "step": 11400 }, { "epoch": 0.04898551471282725, "grad_norm": 0.05966117978096008, "learning_rate": 9.551193052956547e-05, "loss": 0.3500715732574463, "step": 11410 }, { "epoch": 0.04902844680284726, "grad_norm": 1.649623990058899, "learning_rate": 9.550761880944783e-05, "loss": 0.2953296661376953, "step": 11420 }, { "epoch": 0.049071378892867265, "grad_norm": 0.0920541062951088, "learning_rate": 9.550330708933021e-05, "loss": 0.09942615628242493, "step": 11430 }, { "epoch": 0.049114310982887266, "grad_norm": 0.14348438382148743, "learning_rate": 9.549899536921259e-05, "loss": 0.21229536533355714, "step": 11440 }, { "epoch": 0.049157243072907274, "grad_norm": 1.0420409440994263, "learning_rate": 9.549468364909497e-05, "loss": 0.46654543876647947, "step": 11450 }, { "epoch": 0.04920017516292728, "grad_norm": 11.456270217895508, "learning_rate": 9.549037192897736e-05, "loss": 0.4078725814819336, "step": 11460 }, { "epoch": 0.04924310725294729, "grad_norm": 1.5066547393798828, "learning_rate": 9.548606020885974e-05, "loss": 0.32791011333465575, "step": 11470 }, { "epoch": 0.0492860393429673, "grad_norm": 0.03008863516151905, "learning_rate": 9.548174848874211e-05, "loss": 0.13255956172943115, "step": 11480 }, { "epoch": 0.0493289714329873, "grad_norm": 10.941854476928711, "learning_rate": 9.547743676862449e-05, "loss": 0.4925832271575928, "step": 11490 }, { "epoch": 0.049371903523007306, "grad_norm": 0.006078363861888647, "learning_rate": 9.547312504850685e-05, "loss": 0.40673055648803713, "step": 11500 }, { "epoch": 0.049414835613027314, "grad_norm": 0.009038330987095833, "learning_rate": 9.546881332838923e-05, "loss": 0.21685254573822021, "step": 11510 }, { "epoch": 0.04945776770304732, "grad_norm": 0.0976053774356842, "learning_rate": 9.546450160827161e-05, "loss": 0.06418330669403076, "step": 11520 }, { "epoch": 0.04950069979306733, "grad_norm": 0.03044871799647808, "learning_rate": 9.546018988815399e-05, "loss": 0.008297404646873474, "step": 11530 }, { "epoch": 0.04954363188308733, "grad_norm": 2.4783143997192383, "learning_rate": 9.545587816803636e-05, "loss": 0.5636334419250488, "step": 11540 }, { "epoch": 0.04958656397310734, "grad_norm": 0.3153276741504669, "learning_rate": 9.545156644791874e-05, "loss": 0.07382228970527649, "step": 11550 }, { "epoch": 0.049629496063127346, "grad_norm": 4.899248123168945, "learning_rate": 9.544725472780112e-05, "loss": 0.36443448066711426, "step": 11560 }, { "epoch": 0.049672428153147354, "grad_norm": 0.25590115785598755, "learning_rate": 9.54429430076835e-05, "loss": 0.24122922420501708, "step": 11570 }, { "epoch": 0.049715360243167354, "grad_norm": 1.520410418510437, "learning_rate": 9.543863128756586e-05, "loss": 0.32092602252960206, "step": 11580 }, { "epoch": 0.04975829233318736, "grad_norm": 0.017016088590025902, "learning_rate": 9.543431956744824e-05, "loss": 0.2933270215988159, "step": 11590 }, { "epoch": 0.04980122442320737, "grad_norm": 0.07425573468208313, "learning_rate": 9.543000784733061e-05, "loss": 0.29837546348571775, "step": 11600 }, { "epoch": 0.04984415651322738, "grad_norm": 0.09230173379182816, "learning_rate": 9.542569612721299e-05, "loss": 0.2609511375427246, "step": 11610 }, { "epoch": 0.049887088603247386, "grad_norm": 1.69183349609375, "learning_rate": 9.542138440709537e-05, "loss": 0.2415536403656006, "step": 11620 }, { "epoch": 0.049930020693267387, "grad_norm": 0.032961320132017136, "learning_rate": 9.541707268697775e-05, "loss": 0.19287939071655275, "step": 11630 }, { "epoch": 0.049972952783287394, "grad_norm": 1.4239143133163452, "learning_rate": 9.541276096686012e-05, "loss": 0.3143747806549072, "step": 11640 }, { "epoch": 0.0500158848733074, "grad_norm": 2.7020840644836426, "learning_rate": 9.54084492467425e-05, "loss": 0.411509370803833, "step": 11650 }, { "epoch": 0.05005881696332741, "grad_norm": 1.0710383653640747, "learning_rate": 9.540413752662488e-05, "loss": 0.48816213607788084, "step": 11660 }, { "epoch": 0.05010174905334742, "grad_norm": 0.13522252440452576, "learning_rate": 9.539982580650724e-05, "loss": 0.3829029560089111, "step": 11670 }, { "epoch": 0.05014468114336742, "grad_norm": 0.016100700944662094, "learning_rate": 9.539551408638963e-05, "loss": 0.2695424795150757, "step": 11680 }, { "epoch": 0.050187613233387426, "grad_norm": 0.36224961280822754, "learning_rate": 9.539120236627201e-05, "loss": 0.2530616283416748, "step": 11690 }, { "epoch": 0.050230545323407434, "grad_norm": 0.07011070847511292, "learning_rate": 9.538689064615439e-05, "loss": 0.3647505521774292, "step": 11700 }, { "epoch": 0.05027347741342744, "grad_norm": 0.03421260043978691, "learning_rate": 9.538257892603676e-05, "loss": 0.225752592086792, "step": 11710 }, { "epoch": 0.05031640950344745, "grad_norm": 0.12849198281764984, "learning_rate": 9.537826720591914e-05, "loss": 0.22768645286560057, "step": 11720 }, { "epoch": 0.05035934159346745, "grad_norm": 3.8090126514434814, "learning_rate": 9.537395548580152e-05, "loss": 0.26749815940856936, "step": 11730 }, { "epoch": 0.05040227368348746, "grad_norm": 1.128069519996643, "learning_rate": 9.53696437656839e-05, "loss": 0.3992989778518677, "step": 11740 }, { "epoch": 0.050445205773507466, "grad_norm": 2.9438343048095703, "learning_rate": 9.536533204556626e-05, "loss": 0.38953499794006347, "step": 11750 }, { "epoch": 0.050488137863527474, "grad_norm": 0.017695719376206398, "learning_rate": 9.536102032544864e-05, "loss": 0.19398535490036012, "step": 11760 }, { "epoch": 0.05053106995354748, "grad_norm": 1.8694612979888916, "learning_rate": 9.535670860533101e-05, "loss": 0.2112741470336914, "step": 11770 }, { "epoch": 0.05057400204356748, "grad_norm": 2.410950183868408, "learning_rate": 9.535239688521339e-05, "loss": 0.21994683742523194, "step": 11780 }, { "epoch": 0.05061693413358749, "grad_norm": 1.0458391904830933, "learning_rate": 9.534808516509577e-05, "loss": 0.31306536197662355, "step": 11790 }, { "epoch": 0.0506598662236075, "grad_norm": 0.09369885176420212, "learning_rate": 9.534377344497815e-05, "loss": 0.3466991901397705, "step": 11800 }, { "epoch": 0.050702798313627506, "grad_norm": 0.013919656164944172, "learning_rate": 9.533946172486052e-05, "loss": 0.20056288242340087, "step": 11810 }, { "epoch": 0.05074573040364751, "grad_norm": 0.1818535476922989, "learning_rate": 9.53351500047429e-05, "loss": 0.3760584592819214, "step": 11820 }, { "epoch": 0.050788662493667515, "grad_norm": 0.05832016095519066, "learning_rate": 9.533083828462527e-05, "loss": 0.23105072975158691, "step": 11830 }, { "epoch": 0.05083159458368752, "grad_norm": 2.716463565826416, "learning_rate": 9.532652656450764e-05, "loss": 0.3760308027267456, "step": 11840 }, { "epoch": 0.05087452667370753, "grad_norm": 0.3611092269420624, "learning_rate": 9.532221484439002e-05, "loss": 0.2634706735610962, "step": 11850 }, { "epoch": 0.05091745876372754, "grad_norm": 1.7555878162384033, "learning_rate": 9.53179031242724e-05, "loss": 0.3180107831954956, "step": 11860 }, { "epoch": 0.05096039085374754, "grad_norm": 1.297004222869873, "learning_rate": 9.531359140415477e-05, "loss": 0.2752450227737427, "step": 11870 }, { "epoch": 0.05100332294376755, "grad_norm": 2.4578864574432373, "learning_rate": 9.530927968403715e-05, "loss": 0.33813331127166746, "step": 11880 }, { "epoch": 0.051046255033787555, "grad_norm": 2.5260801315307617, "learning_rate": 9.530496796391953e-05, "loss": 0.3503072738647461, "step": 11890 }, { "epoch": 0.05108918712380756, "grad_norm": 0.04710591956973076, "learning_rate": 9.53006562438019e-05, "loss": 0.31316137313842773, "step": 11900 }, { "epoch": 0.05113211921382757, "grad_norm": 3.3213343620300293, "learning_rate": 9.529634452368428e-05, "loss": 0.3701478004455566, "step": 11910 }, { "epoch": 0.05117505130384757, "grad_norm": 0.3955182731151581, "learning_rate": 9.529203280356666e-05, "loss": 0.22598392963409425, "step": 11920 }, { "epoch": 0.05121798339386758, "grad_norm": 2.304351806640625, "learning_rate": 9.528772108344904e-05, "loss": 0.3806648015975952, "step": 11930 }, { "epoch": 0.05126091548388759, "grad_norm": 2.4194395542144775, "learning_rate": 9.528340936333142e-05, "loss": 0.2859928131103516, "step": 11940 }, { "epoch": 0.051303847573907595, "grad_norm": 0.4967823028564453, "learning_rate": 9.52790976432138e-05, "loss": 0.5555664539337158, "step": 11950 }, { "epoch": 0.0513467796639276, "grad_norm": 0.08415184170007706, "learning_rate": 9.527478592309617e-05, "loss": 0.34697282314300537, "step": 11960 }, { "epoch": 0.0513897117539476, "grad_norm": 1.2147996425628662, "learning_rate": 9.527047420297855e-05, "loss": 0.23912529945373534, "step": 11970 }, { "epoch": 0.05143264384396761, "grad_norm": 0.4464738070964813, "learning_rate": 9.526616248286093e-05, "loss": 0.29886488914489745, "step": 11980 }, { "epoch": 0.05147557593398762, "grad_norm": 1.3995260000228882, "learning_rate": 9.52618507627433e-05, "loss": 0.44214572906494143, "step": 11990 }, { "epoch": 0.05151850802400763, "grad_norm": 0.23975662887096405, "learning_rate": 9.525753904262567e-05, "loss": 0.33844738006591796, "step": 12000 }, { "epoch": 0.05151850802400763, "eval_loss": 0.5358631610870361, "eval_runtime": 27.6176, "eval_samples_per_second": 3.621, "eval_steps_per_second": 3.621, "step": 12000 }, { "epoch": 0.05156144011402763, "grad_norm": 3.157212495803833, "learning_rate": 9.525322732250804e-05, "loss": 0.16573574542999267, "step": 12010 }, { "epoch": 0.051604372204047635, "grad_norm": 1.0986665487289429, "learning_rate": 9.524891560239042e-05, "loss": 0.31950509548187256, "step": 12020 }, { "epoch": 0.05164730429406764, "grad_norm": 0.1084139496088028, "learning_rate": 9.52446038822728e-05, "loss": 0.366551947593689, "step": 12030 }, { "epoch": 0.05169023638408765, "grad_norm": 0.04859800636768341, "learning_rate": 9.524029216215518e-05, "loss": 0.42212815284729005, "step": 12040 }, { "epoch": 0.05173316847410766, "grad_norm": 0.9786454439163208, "learning_rate": 9.523598044203755e-05, "loss": 0.29626033306121824, "step": 12050 }, { "epoch": 0.05177610056412766, "grad_norm": 0.13624534010887146, "learning_rate": 9.523166872191993e-05, "loss": 0.24190120697021483, "step": 12060 }, { "epoch": 0.05181903265414767, "grad_norm": 0.044610291719436646, "learning_rate": 9.522735700180231e-05, "loss": 0.2906489372253418, "step": 12070 }, { "epoch": 0.051861964744167675, "grad_norm": 2.1956253051757812, "learning_rate": 9.522304528168467e-05, "loss": 0.21995768547058106, "step": 12080 }, { "epoch": 0.05190489683418768, "grad_norm": 0.09326007962226868, "learning_rate": 9.521873356156705e-05, "loss": 0.37242443561553956, "step": 12090 }, { "epoch": 0.05194782892420769, "grad_norm": 0.008499284274876118, "learning_rate": 9.521442184144943e-05, "loss": 0.2127228021621704, "step": 12100 }, { "epoch": 0.05199076101422769, "grad_norm": 0.1813715696334839, "learning_rate": 9.52101101213318e-05, "loss": 0.27324044704437256, "step": 12110 }, { "epoch": 0.0520336931042477, "grad_norm": 0.01605965569615364, "learning_rate": 9.520579840121418e-05, "loss": 0.3123704671859741, "step": 12120 }, { "epoch": 0.05207662519426771, "grad_norm": 0.05326181650161743, "learning_rate": 9.520148668109656e-05, "loss": 0.4056252479553223, "step": 12130 }, { "epoch": 0.052119557284287715, "grad_norm": 1.489559531211853, "learning_rate": 9.519717496097894e-05, "loss": 0.2229902982711792, "step": 12140 }, { "epoch": 0.05216248937430772, "grad_norm": 2.0173451900482178, "learning_rate": 9.519286324086131e-05, "loss": 0.2639839887619019, "step": 12150 }, { "epoch": 0.052205421464327724, "grad_norm": 0.535400927066803, "learning_rate": 9.518855152074369e-05, "loss": 0.2586674690246582, "step": 12160 }, { "epoch": 0.05224835355434773, "grad_norm": 0.052150264382362366, "learning_rate": 9.518423980062607e-05, "loss": 0.1961987853050232, "step": 12170 }, { "epoch": 0.05229128564436774, "grad_norm": 0.8536399006843567, "learning_rate": 9.517992808050845e-05, "loss": 0.5551873207092285, "step": 12180 }, { "epoch": 0.05233421773438775, "grad_norm": 8.8615140914917, "learning_rate": 9.517561636039082e-05, "loss": 0.41590356826782227, "step": 12190 }, { "epoch": 0.052377149824407755, "grad_norm": 0.9676482677459717, "learning_rate": 9.51713046402732e-05, "loss": 0.30098705291748046, "step": 12200 }, { "epoch": 0.052420081914427756, "grad_norm": 3.19608736038208, "learning_rate": 9.516699292015558e-05, "loss": 0.2460148811340332, "step": 12210 }, { "epoch": 0.052463014004447764, "grad_norm": 0.4055849611759186, "learning_rate": 9.516268120003795e-05, "loss": 0.09390591979026794, "step": 12220 }, { "epoch": 0.05250594609446777, "grad_norm": 1.5819487571716309, "learning_rate": 9.515836947992033e-05, "loss": 0.2062903642654419, "step": 12230 }, { "epoch": 0.05254887818448778, "grad_norm": 1.0232068300247192, "learning_rate": 9.51540577598027e-05, "loss": 0.28633387088775636, "step": 12240 }, { "epoch": 0.05259181027450778, "grad_norm": 0.008014945313334465, "learning_rate": 9.514974603968507e-05, "loss": 0.1834414482116699, "step": 12250 }, { "epoch": 0.05263474236452779, "grad_norm": 1.5555038452148438, "learning_rate": 9.514543431956745e-05, "loss": 0.44185829162597656, "step": 12260 }, { "epoch": 0.052677674454547796, "grad_norm": 1.1476542949676514, "learning_rate": 9.514112259944983e-05, "loss": 0.1935230612754822, "step": 12270 }, { "epoch": 0.052720606544567804, "grad_norm": 0.12156800925731659, "learning_rate": 9.51368108793322e-05, "loss": 0.29886319637298586, "step": 12280 }, { "epoch": 0.05276353863458781, "grad_norm": 0.029563816264271736, "learning_rate": 9.513249915921458e-05, "loss": 0.33358020782470704, "step": 12290 }, { "epoch": 0.05280647072460781, "grad_norm": 0.47683200240135193, "learning_rate": 9.512818743909696e-05, "loss": 0.2097261905670166, "step": 12300 }, { "epoch": 0.05284940281462782, "grad_norm": 0.04604807123541832, "learning_rate": 9.512387571897934e-05, "loss": 0.2722134828567505, "step": 12310 }, { "epoch": 0.05289233490464783, "grad_norm": 16.888795852661133, "learning_rate": 9.511956399886171e-05, "loss": 0.4344566822052002, "step": 12320 }, { "epoch": 0.052935266994667836, "grad_norm": 0.18765738606452942, "learning_rate": 9.511525227874408e-05, "loss": 0.21546649932861328, "step": 12330 }, { "epoch": 0.052978199084687844, "grad_norm": 0.8789750933647156, "learning_rate": 9.511094055862646e-05, "loss": 0.35911104679107664, "step": 12340 }, { "epoch": 0.053021131174707845, "grad_norm": 0.7323353290557861, "learning_rate": 9.510662883850883e-05, "loss": 0.26637275218963624, "step": 12350 }, { "epoch": 0.05306406326472785, "grad_norm": 0.9971916079521179, "learning_rate": 9.510231711839121e-05, "loss": 0.39113051891326905, "step": 12360 }, { "epoch": 0.05310699535474786, "grad_norm": 2.9714555740356445, "learning_rate": 9.509800539827359e-05, "loss": 0.2813329458236694, "step": 12370 }, { "epoch": 0.05314992744476787, "grad_norm": 0.020251838490366936, "learning_rate": 9.509369367815596e-05, "loss": 0.2521169900894165, "step": 12380 }, { "epoch": 0.053192859534787876, "grad_norm": 2.174328565597534, "learning_rate": 9.508938195803834e-05, "loss": 0.31486124992370607, "step": 12390 }, { "epoch": 0.05323579162480788, "grad_norm": 0.056546323001384735, "learning_rate": 9.508507023792072e-05, "loss": 0.09270382523536683, "step": 12400 }, { "epoch": 0.053278723714827884, "grad_norm": 1.50390625, "learning_rate": 9.50807585178031e-05, "loss": 0.5031360626220703, "step": 12410 }, { "epoch": 0.05332165580484789, "grad_norm": 0.010583514347672462, "learning_rate": 9.507644679768547e-05, "loss": 0.23066141605377197, "step": 12420 }, { "epoch": 0.0533645878948679, "grad_norm": 2.49086332321167, "learning_rate": 9.507213507756785e-05, "loss": 0.30078697204589844, "step": 12430 }, { "epoch": 0.0534075199848879, "grad_norm": 0.44729381799697876, "learning_rate": 9.506782335745023e-05, "loss": 0.17861661911010743, "step": 12440 }, { "epoch": 0.05345045207490791, "grad_norm": 0.10300600528717041, "learning_rate": 9.50635116373326e-05, "loss": 0.332088565826416, "step": 12450 }, { "epoch": 0.05349338416492792, "grad_norm": 1.658432960510254, "learning_rate": 9.505919991721498e-05, "loss": 0.3135262966156006, "step": 12460 }, { "epoch": 0.053536316254947924, "grad_norm": 2.3481667041778564, "learning_rate": 9.505488819709736e-05, "loss": 0.3561201572418213, "step": 12470 }, { "epoch": 0.05357924834496793, "grad_norm": 0.21150454878807068, "learning_rate": 9.505057647697974e-05, "loss": 0.539197587966919, "step": 12480 }, { "epoch": 0.05362218043498793, "grad_norm": 0.907037079334259, "learning_rate": 9.50462647568621e-05, "loss": 0.3842069149017334, "step": 12490 }, { "epoch": 0.05366511252500794, "grad_norm": 8.066739082336426, "learning_rate": 9.504195303674448e-05, "loss": 0.43160204887390136, "step": 12500 }, { "epoch": 0.05370804461502795, "grad_norm": 0.7765117883682251, "learning_rate": 9.503764131662686e-05, "loss": 0.2860031843185425, "step": 12510 }, { "epoch": 0.053750976705047956, "grad_norm": 0.39384740591049194, "learning_rate": 9.503332959650923e-05, "loss": 0.23746469020843505, "step": 12520 }, { "epoch": 0.053793908795067964, "grad_norm": 14.003802299499512, "learning_rate": 9.502901787639161e-05, "loss": 0.14782247543334961, "step": 12530 }, { "epoch": 0.053836840885087965, "grad_norm": 1.0125482082366943, "learning_rate": 9.502470615627399e-05, "loss": 0.21553480625152588, "step": 12540 }, { "epoch": 0.05387977297510797, "grad_norm": 3.473666191101074, "learning_rate": 9.502039443615637e-05, "loss": 0.3330304384231567, "step": 12550 }, { "epoch": 0.05392270506512798, "grad_norm": 0.6401351690292358, "learning_rate": 9.501608271603874e-05, "loss": 0.2649469614028931, "step": 12560 }, { "epoch": 0.05396563715514799, "grad_norm": 0.1529102474451065, "learning_rate": 9.501177099592111e-05, "loss": 0.19821540117263795, "step": 12570 }, { "epoch": 0.054008569245167996, "grad_norm": 0.03825334832072258, "learning_rate": 9.500745927580348e-05, "loss": 0.24926376342773438, "step": 12580 }, { "epoch": 0.054051501335188, "grad_norm": 2.7774956226348877, "learning_rate": 9.500314755568586e-05, "loss": 0.34471452236175537, "step": 12590 }, { "epoch": 0.054094433425208005, "grad_norm": 0.6242231130599976, "learning_rate": 9.499883583556824e-05, "loss": 0.15731912851333618, "step": 12600 }, { "epoch": 0.05413736551522801, "grad_norm": 1.258853793144226, "learning_rate": 9.499452411545062e-05, "loss": 0.33891823291778567, "step": 12610 }, { "epoch": 0.05418029760524802, "grad_norm": 1.258105993270874, "learning_rate": 9.4990212395333e-05, "loss": 0.2595653057098389, "step": 12620 }, { "epoch": 0.05422322969526803, "grad_norm": 3.286238670349121, "learning_rate": 9.498590067521537e-05, "loss": 0.30257110595703124, "step": 12630 }, { "epoch": 0.05426616178528803, "grad_norm": 0.2592448890209198, "learning_rate": 9.498158895509776e-05, "loss": 0.3336447477340698, "step": 12640 }, { "epoch": 0.05430909387530804, "grad_norm": 2.1326916217803955, "learning_rate": 9.497727723498013e-05, "loss": 0.34888103008270266, "step": 12650 }, { "epoch": 0.054352025965328045, "grad_norm": 0.9109242558479309, "learning_rate": 9.49729655148625e-05, "loss": 0.35174739360809326, "step": 12660 }, { "epoch": 0.05439495805534805, "grad_norm": 0.9759131669998169, "learning_rate": 9.496865379474488e-05, "loss": 0.2183375358581543, "step": 12670 }, { "epoch": 0.054437890145368054, "grad_norm": 1.1428704261779785, "learning_rate": 9.496434207462726e-05, "loss": 0.457882022857666, "step": 12680 }, { "epoch": 0.05448082223538806, "grad_norm": 2.1968188285827637, "learning_rate": 9.496003035450964e-05, "loss": 0.2927252292633057, "step": 12690 }, { "epoch": 0.05452375432540807, "grad_norm": 2.174100875854492, "learning_rate": 9.495571863439201e-05, "loss": 0.2782183885574341, "step": 12700 }, { "epoch": 0.05456668641542808, "grad_norm": 0.5769919157028198, "learning_rate": 9.495140691427439e-05, "loss": 0.2357858180999756, "step": 12710 }, { "epoch": 0.054609618505448085, "grad_norm": 0.8519782423973083, "learning_rate": 9.494709519415677e-05, "loss": 0.2905261516571045, "step": 12720 }, { "epoch": 0.054652550595468086, "grad_norm": 0.6527574062347412, "learning_rate": 9.494278347403914e-05, "loss": 0.38331165313720705, "step": 12730 }, { "epoch": 0.054695482685488093, "grad_norm": 3.5559322834014893, "learning_rate": 9.493847175392151e-05, "loss": 0.24324979782104492, "step": 12740 }, { "epoch": 0.0547384147755081, "grad_norm": 12.116598129272461, "learning_rate": 9.493416003380389e-05, "loss": 0.31748697757720945, "step": 12750 }, { "epoch": 0.05478134686552811, "grad_norm": 0.057032838463783264, "learning_rate": 9.492984831368626e-05, "loss": 0.2784019231796265, "step": 12760 }, { "epoch": 0.05482427895554812, "grad_norm": 2.6145899295806885, "learning_rate": 9.492553659356864e-05, "loss": 0.21561193466186523, "step": 12770 }, { "epoch": 0.05486721104556812, "grad_norm": 0.1213018000125885, "learning_rate": 9.492122487345102e-05, "loss": 0.15118281841278075, "step": 12780 }, { "epoch": 0.054910143135588126, "grad_norm": 1.966002345085144, "learning_rate": 9.49169131533334e-05, "loss": 0.206246280670166, "step": 12790 }, { "epoch": 0.05495307522560813, "grad_norm": 0.18352824449539185, "learning_rate": 9.491260143321577e-05, "loss": 0.26455848217010497, "step": 12800 }, { "epoch": 0.05499600731562814, "grad_norm": 0.017357051372528076, "learning_rate": 9.490828971309815e-05, "loss": 0.23155784606933594, "step": 12810 }, { "epoch": 0.05503893940564815, "grad_norm": 0.009231144562363625, "learning_rate": 9.490397799298051e-05, "loss": 0.27017381191253664, "step": 12820 }, { "epoch": 0.05508187149566815, "grad_norm": 1.3265694379806519, "learning_rate": 9.489966627286289e-05, "loss": 0.404315185546875, "step": 12830 }, { "epoch": 0.05512480358568816, "grad_norm": 2.65555477142334, "learning_rate": 9.489535455274527e-05, "loss": 0.30979869365692136, "step": 12840 }, { "epoch": 0.055167735675708165, "grad_norm": 1.8362942934036255, "learning_rate": 9.489104283262765e-05, "loss": 0.19149515628814698, "step": 12850 }, { "epoch": 0.05521066776572817, "grad_norm": 0.008390932343900204, "learning_rate": 9.488673111251004e-05, "loss": 0.11477944850921631, "step": 12860 }, { "epoch": 0.055253599855748174, "grad_norm": 19.582433700561523, "learning_rate": 9.488241939239241e-05, "loss": 0.2537912607192993, "step": 12870 }, { "epoch": 0.05529653194576818, "grad_norm": 0.001269947155378759, "learning_rate": 9.487810767227479e-05, "loss": 0.2521425008773804, "step": 12880 }, { "epoch": 0.05533946403578819, "grad_norm": 0.5160092711448669, "learning_rate": 9.487379595215717e-05, "loss": 0.22307281494140624, "step": 12890 }, { "epoch": 0.0553823961258082, "grad_norm": 2.3668174743652344, "learning_rate": 9.486948423203953e-05, "loss": 0.35943007469177246, "step": 12900 }, { "epoch": 0.055425328215828205, "grad_norm": 0.07710514962673187, "learning_rate": 9.486517251192191e-05, "loss": 0.21076421737670897, "step": 12910 }, { "epoch": 0.055468260305848206, "grad_norm": 0.017242876812815666, "learning_rate": 9.486086079180429e-05, "loss": 0.07508084177970886, "step": 12920 }, { "epoch": 0.055511192395868214, "grad_norm": 0.10909157246351242, "learning_rate": 9.485654907168666e-05, "loss": 0.15512967109680176, "step": 12930 }, { "epoch": 0.05555412448588822, "grad_norm": 0.8637058138847351, "learning_rate": 9.485223735156904e-05, "loss": 0.4028017044067383, "step": 12940 }, { "epoch": 0.05559705657590823, "grad_norm": 1.707521915435791, "learning_rate": 9.484792563145142e-05, "loss": 0.27000603675842283, "step": 12950 }, { "epoch": 0.05563998866592824, "grad_norm": 0.002830659504979849, "learning_rate": 9.48436139113338e-05, "loss": 0.1828344225883484, "step": 12960 }, { "epoch": 0.05568292075594824, "grad_norm": 20.095304489135742, "learning_rate": 9.483930219121617e-05, "loss": 0.5967009544372559, "step": 12970 }, { "epoch": 0.055725852845968246, "grad_norm": 3.065985679626465, "learning_rate": 9.483499047109854e-05, "loss": 0.1959398627281189, "step": 12980 }, { "epoch": 0.055768784935988254, "grad_norm": 1.2307249307632446, "learning_rate": 9.483067875098092e-05, "loss": 0.3424776792526245, "step": 12990 }, { "epoch": 0.05581171702600826, "grad_norm": 0.06579990684986115, "learning_rate": 9.482636703086329e-05, "loss": 0.23139922618865966, "step": 13000 }, { "epoch": 0.05581171702600826, "eval_loss": 0.5209886431694031, "eval_runtime": 27.4824, "eval_samples_per_second": 3.639, "eval_steps_per_second": 3.639, "step": 13000 }, { "epoch": 0.05585464911602827, "grad_norm": 1.030726671218872, "learning_rate": 9.482205531074567e-05, "loss": 0.24501314163208007, "step": 13010 }, { "epoch": 0.05589758120604827, "grad_norm": 0.21431680023670197, "learning_rate": 9.481774359062805e-05, "loss": 0.16148560047149657, "step": 13020 }, { "epoch": 0.05594051329606828, "grad_norm": 4.152802467346191, "learning_rate": 9.481343187051042e-05, "loss": 0.0998810887336731, "step": 13030 }, { "epoch": 0.055983445386088286, "grad_norm": 0.12929271161556244, "learning_rate": 9.48091201503928e-05, "loss": 0.3375421047210693, "step": 13040 }, { "epoch": 0.056026377476108294, "grad_norm": 3.620896577835083, "learning_rate": 9.480480843027518e-05, "loss": 0.32858121395111084, "step": 13050 }, { "epoch": 0.0560693095661283, "grad_norm": 4.256040573120117, "learning_rate": 9.480049671015756e-05, "loss": 0.33753039836883547, "step": 13060 }, { "epoch": 0.0561122416561483, "grad_norm": 0.004805037286132574, "learning_rate": 9.479618499003992e-05, "loss": 0.2353214979171753, "step": 13070 }, { "epoch": 0.05615517374616831, "grad_norm": 1.3188596963882446, "learning_rate": 9.479187326992231e-05, "loss": 0.14669665098190307, "step": 13080 }, { "epoch": 0.05619810583618832, "grad_norm": 0.1885381042957306, "learning_rate": 9.478756154980469e-05, "loss": 0.4346306324005127, "step": 13090 }, { "epoch": 0.056241037926208326, "grad_norm": 0.03456701338291168, "learning_rate": 9.478324982968707e-05, "loss": 0.05812343955039978, "step": 13100 }, { "epoch": 0.05628397001622833, "grad_norm": 0.0404074490070343, "learning_rate": 9.477893810956944e-05, "loss": 0.1765100598335266, "step": 13110 }, { "epoch": 0.056326902106248335, "grad_norm": 1.5887359380722046, "learning_rate": 9.477462638945182e-05, "loss": 0.3653215169906616, "step": 13120 }, { "epoch": 0.05636983419626834, "grad_norm": 0.08524619042873383, "learning_rate": 9.47703146693342e-05, "loss": 0.4067636489868164, "step": 13130 }, { "epoch": 0.05641276628628835, "grad_norm": 0.9142935276031494, "learning_rate": 9.476600294921658e-05, "loss": 0.17017019987106324, "step": 13140 }, { "epoch": 0.05645569837630836, "grad_norm": 3.3787689208984375, "learning_rate": 9.476169122909894e-05, "loss": 0.15745049715042114, "step": 13150 }, { "epoch": 0.05649863046632836, "grad_norm": 1.9701745510101318, "learning_rate": 9.475737950898132e-05, "loss": 0.23845574855804444, "step": 13160 }, { "epoch": 0.05654156255634837, "grad_norm": 3.9363882541656494, "learning_rate": 9.47530677888637e-05, "loss": 0.2576152801513672, "step": 13170 }, { "epoch": 0.056584494646368375, "grad_norm": 0.0014147092588245869, "learning_rate": 9.474875606874607e-05, "loss": 0.18658721446990967, "step": 13180 }, { "epoch": 0.05662742673638838, "grad_norm": 0.4537833333015442, "learning_rate": 9.474444434862845e-05, "loss": 0.3402831554412842, "step": 13190 }, { "epoch": 0.05667035882640839, "grad_norm": 1.0720131397247314, "learning_rate": 9.474013262851083e-05, "loss": 0.27713770866394044, "step": 13200 }, { "epoch": 0.05671329091642839, "grad_norm": 2.2132678031921387, "learning_rate": 9.47358209083932e-05, "loss": 0.13246928453445433, "step": 13210 }, { "epoch": 0.0567562230064484, "grad_norm": 0.004071402829140425, "learning_rate": 9.473150918827558e-05, "loss": 0.3325110912322998, "step": 13220 }, { "epoch": 0.05679915509646841, "grad_norm": 1.9012278318405151, "learning_rate": 9.472719746815794e-05, "loss": 0.3323903799057007, "step": 13230 }, { "epoch": 0.056842087186488414, "grad_norm": 0.011959442868828773, "learning_rate": 9.472288574804032e-05, "loss": 0.2820717811584473, "step": 13240 }, { "epoch": 0.05688501927650842, "grad_norm": 0.05983627215027809, "learning_rate": 9.47185740279227e-05, "loss": 0.15745246410369873, "step": 13250 }, { "epoch": 0.05692795136652842, "grad_norm": 1.0197789669036865, "learning_rate": 9.471426230780508e-05, "loss": 0.12266286611557006, "step": 13260 }, { "epoch": 0.05697088345654843, "grad_norm": 2.0742173194885254, "learning_rate": 9.470995058768745e-05, "loss": 0.20585498809814454, "step": 13270 }, { "epoch": 0.05701381554656844, "grad_norm": 4.345624923706055, "learning_rate": 9.470563886756983e-05, "loss": 0.24817166328430176, "step": 13280 }, { "epoch": 0.05705674763658845, "grad_norm": 2.2066709995269775, "learning_rate": 9.470132714745221e-05, "loss": 0.26742217540740965, "step": 13290 }, { "epoch": 0.05709967972660845, "grad_norm": 0.2013663947582245, "learning_rate": 9.469701542733459e-05, "loss": 0.33151028156280515, "step": 13300 }, { "epoch": 0.057142611816628455, "grad_norm": 0.0009617611649446189, "learning_rate": 9.469270370721696e-05, "loss": 0.4479741096496582, "step": 13310 }, { "epoch": 0.05718554390664846, "grad_norm": 0.02231052704155445, "learning_rate": 9.468839198709934e-05, "loss": 0.3095113277435303, "step": 13320 }, { "epoch": 0.05722847599666847, "grad_norm": 0.9156794548034668, "learning_rate": 9.468408026698172e-05, "loss": 0.3460153341293335, "step": 13330 }, { "epoch": 0.05727140808668848, "grad_norm": 0.059285569936037064, "learning_rate": 9.46797685468641e-05, "loss": 0.15585129261016845, "step": 13340 }, { "epoch": 0.05731434017670848, "grad_norm": 8.328903198242188, "learning_rate": 9.467545682674647e-05, "loss": 0.21851041316986083, "step": 13350 }, { "epoch": 0.05735727226672849, "grad_norm": 9.454680442810059, "learning_rate": 9.467114510662885e-05, "loss": 0.3026920795440674, "step": 13360 }, { "epoch": 0.057400204356748495, "grad_norm": 0.2567834258079529, "learning_rate": 9.466683338651123e-05, "loss": 0.22752134799957274, "step": 13370 }, { "epoch": 0.0574431364467685, "grad_norm": 0.7814629673957825, "learning_rate": 9.46625216663936e-05, "loss": 0.18589977025985718, "step": 13380 }, { "epoch": 0.05748606853678851, "grad_norm": 0.10805714875459671, "learning_rate": 9.465820994627597e-05, "loss": 0.42492899894714353, "step": 13390 }, { "epoch": 0.05752900062680851, "grad_norm": 4.603877067565918, "learning_rate": 9.465389822615835e-05, "loss": 0.5462126731872559, "step": 13400 }, { "epoch": 0.05757193271682852, "grad_norm": 4.8273820877075195, "learning_rate": 9.464958650604072e-05, "loss": 0.16398582458496094, "step": 13410 }, { "epoch": 0.05761486480684853, "grad_norm": 0.11256857961416245, "learning_rate": 9.46452747859231e-05, "loss": 0.2469719886779785, "step": 13420 }, { "epoch": 0.057657796896868535, "grad_norm": 1.4045051336288452, "learning_rate": 9.464096306580548e-05, "loss": 0.14543576240539552, "step": 13430 }, { "epoch": 0.05770072898688854, "grad_norm": 1.5675419569015503, "learning_rate": 9.463665134568785e-05, "loss": 0.1374026656150818, "step": 13440 }, { "epoch": 0.057743661076908544, "grad_norm": 0.041778989136219025, "learning_rate": 9.463233962557023e-05, "loss": 0.2824827194213867, "step": 13450 }, { "epoch": 0.05778659316692855, "grad_norm": 2.386615514755249, "learning_rate": 9.462802790545261e-05, "loss": 0.2739110946655273, "step": 13460 }, { "epoch": 0.05782952525694856, "grad_norm": 0.09681998938322067, "learning_rate": 9.462371618533499e-05, "loss": 0.25143866539001464, "step": 13470 }, { "epoch": 0.05787245734696857, "grad_norm": 0.08663687855005264, "learning_rate": 9.461940446521735e-05, "loss": 0.16926002502441406, "step": 13480 }, { "epoch": 0.057915389436988575, "grad_norm": 0.49383774399757385, "learning_rate": 9.461509274509973e-05, "loss": 0.1362561821937561, "step": 13490 }, { "epoch": 0.057958321527008576, "grad_norm": 0.04574750363826752, "learning_rate": 9.46107810249821e-05, "loss": 0.1413083553314209, "step": 13500 }, { "epoch": 0.058001253617028584, "grad_norm": 1.0846718549728394, "learning_rate": 9.460646930486448e-05, "loss": 0.3449979305267334, "step": 13510 }, { "epoch": 0.05804418570704859, "grad_norm": 0.017421284690499306, "learning_rate": 9.460215758474686e-05, "loss": 0.05157340168952942, "step": 13520 }, { "epoch": 0.0580871177970686, "grad_norm": 0.024469420313835144, "learning_rate": 9.459784586462924e-05, "loss": 0.5607762336730957, "step": 13530 }, { "epoch": 0.0581300498870886, "grad_norm": 0.11292649805545807, "learning_rate": 9.459353414451161e-05, "loss": 0.36970949172973633, "step": 13540 }, { "epoch": 0.05817298197710861, "grad_norm": 5.711234092712402, "learning_rate": 9.458922242439399e-05, "loss": 0.3390767574310303, "step": 13550 }, { "epoch": 0.058215914067128616, "grad_norm": 0.1786605715751648, "learning_rate": 9.458491070427637e-05, "loss": 0.3094156503677368, "step": 13560 }, { "epoch": 0.058258846157148624, "grad_norm": 16.943851470947266, "learning_rate": 9.458059898415875e-05, "loss": 0.29798853397369385, "step": 13570 }, { "epoch": 0.05830177824716863, "grad_norm": 1.147255778312683, "learning_rate": 9.457628726404112e-05, "loss": 0.18354703187942506, "step": 13580 }, { "epoch": 0.05834471033718863, "grad_norm": 0.9971585869789124, "learning_rate": 9.45719755439235e-05, "loss": 0.16321992874145508, "step": 13590 }, { "epoch": 0.05838764242720864, "grad_norm": 0.028112033382058144, "learning_rate": 9.456766382380588e-05, "loss": 0.21919541358947753, "step": 13600 }, { "epoch": 0.05843057451722865, "grad_norm": 1.8403459787368774, "learning_rate": 9.456335210368826e-05, "loss": 0.3318035125732422, "step": 13610 }, { "epoch": 0.058473506607248656, "grad_norm": 4.8288044929504395, "learning_rate": 9.455904038357063e-05, "loss": 0.1652234435081482, "step": 13620 }, { "epoch": 0.05851643869726866, "grad_norm": 1.7937736511230469, "learning_rate": 9.455472866345301e-05, "loss": 0.5239839553833008, "step": 13630 }, { "epoch": 0.058559370787288664, "grad_norm": 0.09594344347715378, "learning_rate": 9.455041694333537e-05, "loss": 0.39256093502044676, "step": 13640 }, { "epoch": 0.05860230287730867, "grad_norm": 3.84417724609375, "learning_rate": 9.454610522321775e-05, "loss": 0.28167335987091063, "step": 13650 }, { "epoch": 0.05864523496732868, "grad_norm": 0.060315635055303574, "learning_rate": 9.454179350310013e-05, "loss": 0.28380427360534666, "step": 13660 }, { "epoch": 0.05868816705734869, "grad_norm": 0.10807570815086365, "learning_rate": 9.45374817829825e-05, "loss": 0.22851717472076416, "step": 13670 }, { "epoch": 0.058731099147368696, "grad_norm": 0.21893347799777985, "learning_rate": 9.453317006286488e-05, "loss": 0.299111008644104, "step": 13680 }, { "epoch": 0.058774031237388696, "grad_norm": 0.656207799911499, "learning_rate": 9.452885834274726e-05, "loss": 0.164165723323822, "step": 13690 }, { "epoch": 0.058816963327408704, "grad_norm": 0.8610594272613525, "learning_rate": 9.452454662262964e-05, "loss": 0.2684111356735229, "step": 13700 }, { "epoch": 0.05885989541742871, "grad_norm": 0.04181723669171333, "learning_rate": 9.452023490251202e-05, "loss": 0.21170082092285156, "step": 13710 }, { "epoch": 0.05890282750744872, "grad_norm": 0.030319862067699432, "learning_rate": 9.451592318239438e-05, "loss": 0.0055593281984329225, "step": 13720 }, { "epoch": 0.05894575959746872, "grad_norm": 2.068293333053589, "learning_rate": 9.451161146227676e-05, "loss": 0.36513285636901854, "step": 13730 }, { "epoch": 0.05898869168748873, "grad_norm": 0.039667144417762756, "learning_rate": 9.450729974215913e-05, "loss": 0.3398525953292847, "step": 13740 }, { "epoch": 0.059031623777508736, "grad_norm": 1.1753939390182495, "learning_rate": 9.450298802204151e-05, "loss": 0.2801861047744751, "step": 13750 }, { "epoch": 0.059074555867528744, "grad_norm": 1.5181111097335815, "learning_rate": 9.449867630192389e-05, "loss": 0.3567522048950195, "step": 13760 }, { "epoch": 0.05911748795754875, "grad_norm": 0.018359310925006866, "learning_rate": 9.449436458180627e-05, "loss": 0.2326261043548584, "step": 13770 }, { "epoch": 0.05916042004756875, "grad_norm": 0.02728513814508915, "learning_rate": 9.449005286168864e-05, "loss": 0.1719570279121399, "step": 13780 }, { "epoch": 0.05920335213758876, "grad_norm": 0.039325762540102005, "learning_rate": 9.448574114157102e-05, "loss": 0.2949488878250122, "step": 13790 }, { "epoch": 0.05924628422760877, "grad_norm": 0.020210914313793182, "learning_rate": 9.44814294214534e-05, "loss": 0.2757899761199951, "step": 13800 }, { "epoch": 0.059289216317628776, "grad_norm": 6.97475528717041, "learning_rate": 9.447711770133578e-05, "loss": 0.3151508092880249, "step": 13810 }, { "epoch": 0.059332148407648784, "grad_norm": 0.18654370307922363, "learning_rate": 9.447280598121815e-05, "loss": 0.3797006368637085, "step": 13820 }, { "epoch": 0.059375080497668785, "grad_norm": 0.02834288775920868, "learning_rate": 9.446849426110053e-05, "loss": 0.28402860164642335, "step": 13830 }, { "epoch": 0.05941801258768879, "grad_norm": 0.07072301208972931, "learning_rate": 9.446418254098291e-05, "loss": 0.2836951971054077, "step": 13840 }, { "epoch": 0.0594609446777088, "grad_norm": 0.10318069159984589, "learning_rate": 9.445987082086529e-05, "loss": 0.22994587421417237, "step": 13850 }, { "epoch": 0.05950387676772881, "grad_norm": 0.228297621011734, "learning_rate": 9.445555910074766e-05, "loss": 0.2023998975753784, "step": 13860 }, { "epoch": 0.059546808857748816, "grad_norm": 1.7108619213104248, "learning_rate": 9.445124738063004e-05, "loss": 0.273990535736084, "step": 13870 }, { "epoch": 0.05958974094776882, "grad_norm": 0.20896852016448975, "learning_rate": 9.444693566051242e-05, "loss": 0.15025321245193482, "step": 13880 }, { "epoch": 0.059632673037788825, "grad_norm": 0.3814859688282013, "learning_rate": 9.444262394039478e-05, "loss": 0.2851571798324585, "step": 13890 }, { "epoch": 0.05967560512780883, "grad_norm": 0.16517090797424316, "learning_rate": 9.443831222027716e-05, "loss": 0.21811909675598146, "step": 13900 }, { "epoch": 0.05971853721782884, "grad_norm": 0.06338504701852798, "learning_rate": 9.443400050015954e-05, "loss": 0.2580056667327881, "step": 13910 }, { "epoch": 0.05976146930784884, "grad_norm": 0.006939787417650223, "learning_rate": 9.442968878004191e-05, "loss": 0.13981956243515015, "step": 13920 }, { "epoch": 0.05980440139786885, "grad_norm": 0.019513536244630814, "learning_rate": 9.442537705992429e-05, "loss": 0.3545896768569946, "step": 13930 }, { "epoch": 0.05984733348788886, "grad_norm": 0.02124965377151966, "learning_rate": 9.442106533980667e-05, "loss": 0.31715543270111085, "step": 13940 }, { "epoch": 0.059890265577908865, "grad_norm": 0.054176412522792816, "learning_rate": 9.441675361968905e-05, "loss": 0.245257568359375, "step": 13950 }, { "epoch": 0.05993319766792887, "grad_norm": 0.04804272949695587, "learning_rate": 9.441244189957142e-05, "loss": 0.06981720924377441, "step": 13960 }, { "epoch": 0.05997612975794887, "grad_norm": 0.024836163967847824, "learning_rate": 9.440813017945379e-05, "loss": 0.31092960834503175, "step": 13970 }, { "epoch": 0.06001906184796888, "grad_norm": 0.05360177159309387, "learning_rate": 9.440381845933616e-05, "loss": 0.27363669872283936, "step": 13980 }, { "epoch": 0.06006199393798889, "grad_norm": 0.25896015763282776, "learning_rate": 9.439950673921854e-05, "loss": 0.37048931121826173, "step": 13990 }, { "epoch": 0.0601049260280089, "grad_norm": 3.391026020050049, "learning_rate": 9.439519501910092e-05, "loss": 0.38292765617370605, "step": 14000 }, { "epoch": 0.0601049260280089, "eval_loss": 0.5119574666023254, "eval_runtime": 27.5231, "eval_samples_per_second": 3.633, "eval_steps_per_second": 3.633, "step": 14000 }, { "epoch": 0.060147858118028905, "grad_norm": 0.10783776640892029, "learning_rate": 9.43908832989833e-05, "loss": 0.4476637363433838, "step": 14010 }, { "epoch": 0.060190790208048905, "grad_norm": 0.0501650795340538, "learning_rate": 9.438657157886567e-05, "loss": 0.19077664613723755, "step": 14020 }, { "epoch": 0.06023372229806891, "grad_norm": 3.299262046813965, "learning_rate": 9.438225985874805e-05, "loss": 0.4837735652923584, "step": 14030 }, { "epoch": 0.06027665438808892, "grad_norm": 0.10058021545410156, "learning_rate": 9.437794813863043e-05, "loss": 0.39572231769561766, "step": 14040 }, { "epoch": 0.06031958647810893, "grad_norm": 2.3452744483947754, "learning_rate": 9.43736364185128e-05, "loss": 0.2922865629196167, "step": 14050 }, { "epoch": 0.06036251856812894, "grad_norm": 0.03752734139561653, "learning_rate": 9.436932469839518e-05, "loss": 0.26948659420013427, "step": 14060 }, { "epoch": 0.06040545065814894, "grad_norm": 0.21016894280910492, "learning_rate": 9.436501297827756e-05, "loss": 0.07004184126853943, "step": 14070 }, { "epoch": 0.060448382748168945, "grad_norm": 1.554970145225525, "learning_rate": 9.436070125815994e-05, "loss": 0.488129997253418, "step": 14080 }, { "epoch": 0.06049131483818895, "grad_norm": 3.1139557361602783, "learning_rate": 9.435638953804231e-05, "loss": 0.2959815740585327, "step": 14090 }, { "epoch": 0.06053424692820896, "grad_norm": 0.07403170317411423, "learning_rate": 9.435207781792469e-05, "loss": 0.14179257154464722, "step": 14100 }, { "epoch": 0.06057717901822897, "grad_norm": 0.5391601324081421, "learning_rate": 9.434776609780707e-05, "loss": 0.11069589853286743, "step": 14110 }, { "epoch": 0.06062011110824897, "grad_norm": 2.3623971939086914, "learning_rate": 9.434345437768945e-05, "loss": 0.3838085412979126, "step": 14120 }, { "epoch": 0.06066304319826898, "grad_norm": 10.063488006591797, "learning_rate": 9.433914265757181e-05, "loss": 0.2548569440841675, "step": 14130 }, { "epoch": 0.060705975288288985, "grad_norm": 1.8734171390533447, "learning_rate": 9.433483093745419e-05, "loss": 0.2592017650604248, "step": 14140 }, { "epoch": 0.06074890737830899, "grad_norm": 0.2549486756324768, "learning_rate": 9.433051921733656e-05, "loss": 0.18551105260849, "step": 14150 }, { "epoch": 0.060791839468328994, "grad_norm": 0.09773687273263931, "learning_rate": 9.432620749721894e-05, "loss": 0.17357507944107056, "step": 14160 }, { "epoch": 0.060834771558349, "grad_norm": 0.09439878910779953, "learning_rate": 9.432189577710132e-05, "loss": 0.45656538009643555, "step": 14170 }, { "epoch": 0.06087770364836901, "grad_norm": 3.9225845336914062, "learning_rate": 9.43175840569837e-05, "loss": 0.24273622035980225, "step": 14180 }, { "epoch": 0.06092063573838902, "grad_norm": 2.183349370956421, "learning_rate": 9.431327233686607e-05, "loss": 0.25051212310791016, "step": 14190 }, { "epoch": 0.060963567828409025, "grad_norm": 3.013693332672119, "learning_rate": 9.430896061674845e-05, "loss": 0.258715295791626, "step": 14200 }, { "epoch": 0.061006499918429026, "grad_norm": 3.5759599208831787, "learning_rate": 9.430464889663083e-05, "loss": 0.27321045398712157, "step": 14210 }, { "epoch": 0.061049432008449034, "grad_norm": 0.26453498005867004, "learning_rate": 9.430033717651319e-05, "loss": 0.2096024513244629, "step": 14220 }, { "epoch": 0.06109236409846904, "grad_norm": 1.056121826171875, "learning_rate": 9.429602545639557e-05, "loss": 0.3136213541030884, "step": 14230 }, { "epoch": 0.06113529618848905, "grad_norm": 0.04215465858578682, "learning_rate": 9.429171373627795e-05, "loss": 0.2560518026351929, "step": 14240 }, { "epoch": 0.06117822827850906, "grad_norm": 0.3055509328842163, "learning_rate": 9.428740201616032e-05, "loss": 0.26824657917022704, "step": 14250 }, { "epoch": 0.06122116036852906, "grad_norm": 2.0319509506225586, "learning_rate": 9.42830902960427e-05, "loss": 0.2057985782623291, "step": 14260 }, { "epoch": 0.061264092458549066, "grad_norm": 0.09097740799188614, "learning_rate": 9.427877857592509e-05, "loss": 0.25459158420562744, "step": 14270 }, { "epoch": 0.061307024548569074, "grad_norm": 1.385293960571289, "learning_rate": 9.427446685580747e-05, "loss": 0.2401709794998169, "step": 14280 }, { "epoch": 0.06134995663858908, "grad_norm": 0.0381929837167263, "learning_rate": 9.427015513568985e-05, "loss": 0.28802106380462644, "step": 14290 }, { "epoch": 0.06139288872860909, "grad_norm": 0.14590157568454742, "learning_rate": 9.426584341557221e-05, "loss": 0.48211469650268557, "step": 14300 }, { "epoch": 0.06143582081862909, "grad_norm": 0.004426190629601479, "learning_rate": 9.426153169545459e-05, "loss": 0.18446681499481202, "step": 14310 }, { "epoch": 0.0614787529086491, "grad_norm": 0.8361203670501709, "learning_rate": 9.425721997533697e-05, "loss": 0.1543604850769043, "step": 14320 }, { "epoch": 0.061521684998669106, "grad_norm": 2.4105887413024902, "learning_rate": 9.425290825521934e-05, "loss": 0.4578250885009766, "step": 14330 }, { "epoch": 0.061564617088689114, "grad_norm": 6.35371208190918, "learning_rate": 9.424859653510172e-05, "loss": 0.1941359043121338, "step": 14340 }, { "epoch": 0.061607549178709115, "grad_norm": 0.030798856168985367, "learning_rate": 9.42442848149841e-05, "loss": 0.38810138702392577, "step": 14350 }, { "epoch": 0.06165048126872912, "grad_norm": 0.5181298851966858, "learning_rate": 9.423997309486648e-05, "loss": 0.05099499821662903, "step": 14360 }, { "epoch": 0.06169341335874913, "grad_norm": 0.10615668445825577, "learning_rate": 9.423566137474885e-05, "loss": 0.11544710397720337, "step": 14370 }, { "epoch": 0.06173634544876914, "grad_norm": 0.006546362768858671, "learning_rate": 9.423134965463122e-05, "loss": 0.20839293003082277, "step": 14380 }, { "epoch": 0.061779277538789146, "grad_norm": 4.777956962585449, "learning_rate": 9.42270379345136e-05, "loss": 0.27350683212280275, "step": 14390 }, { "epoch": 0.06182220962880915, "grad_norm": 0.22227756679058075, "learning_rate": 9.422272621439597e-05, "loss": 0.23988797664642333, "step": 14400 }, { "epoch": 0.061865141718829154, "grad_norm": 6.438022136688232, "learning_rate": 9.421841449427835e-05, "loss": 0.6111140251159668, "step": 14410 }, { "epoch": 0.06190807380884916, "grad_norm": 0.06184747815132141, "learning_rate": 9.421410277416073e-05, "loss": 0.2695784330368042, "step": 14420 }, { "epoch": 0.06195100589886917, "grad_norm": 2.856022834777832, "learning_rate": 9.42097910540431e-05, "loss": 0.16448986530303955, "step": 14430 }, { "epoch": 0.06199393798888918, "grad_norm": 0.024922547861933708, "learning_rate": 9.420547933392548e-05, "loss": 0.24486238956451417, "step": 14440 }, { "epoch": 0.06203687007890918, "grad_norm": 0.06679052859544754, "learning_rate": 9.420116761380786e-05, "loss": 0.32473771572113036, "step": 14450 }, { "epoch": 0.062079802168929187, "grad_norm": 1.0972843170166016, "learning_rate": 9.419685589369022e-05, "loss": 0.23681797981262206, "step": 14460 }, { "epoch": 0.062122734258949194, "grad_norm": 1.9927321672439575, "learning_rate": 9.41925441735726e-05, "loss": 0.24192793369293214, "step": 14470 }, { "epoch": 0.0621656663489692, "grad_norm": 0.08671754598617554, "learning_rate": 9.418823245345498e-05, "loss": 0.1729517936706543, "step": 14480 }, { "epoch": 0.06220859843898921, "grad_norm": 0.3051014542579651, "learning_rate": 9.418392073333737e-05, "loss": 0.28752970695495605, "step": 14490 }, { "epoch": 0.06225153052900921, "grad_norm": 4.915781021118164, "learning_rate": 9.417960901321974e-05, "loss": 0.2712711811065674, "step": 14500 }, { "epoch": 0.06229446261902922, "grad_norm": 1.4001567363739014, "learning_rate": 9.417529729310212e-05, "loss": 0.5032999038696289, "step": 14510 }, { "epoch": 0.062337394709049226, "grad_norm": 1.549265742301941, "learning_rate": 9.41709855729845e-05, "loss": 0.21653423309326172, "step": 14520 }, { "epoch": 0.062380326799069234, "grad_norm": 6.566656589508057, "learning_rate": 9.416667385286688e-05, "loss": 0.30635671615600585, "step": 14530 }, { "epoch": 0.06242325888908924, "grad_norm": 0.2426643669605255, "learning_rate": 9.416236213274925e-05, "loss": 0.2337181568145752, "step": 14540 }, { "epoch": 0.06246619097910924, "grad_norm": 0.9807308316230774, "learning_rate": 9.415805041263162e-05, "loss": 0.2038658857345581, "step": 14550 }, { "epoch": 0.06250912306912926, "grad_norm": 0.10739678144454956, "learning_rate": 9.4153738692514e-05, "loss": 0.20511481761932374, "step": 14560 }, { "epoch": 0.06255205515914926, "grad_norm": 2.38814640045166, "learning_rate": 9.414942697239637e-05, "loss": 0.15794265270233154, "step": 14570 }, { "epoch": 0.06259498724916926, "grad_norm": 8.256712913513184, "learning_rate": 9.414511525227875e-05, "loss": 0.3933896541595459, "step": 14580 }, { "epoch": 0.06263791933918927, "grad_norm": 2.060042142868042, "learning_rate": 9.414080353216113e-05, "loss": 0.18279197216033935, "step": 14590 }, { "epoch": 0.06268085142920927, "grad_norm": 0.012487399391829967, "learning_rate": 9.41364918120435e-05, "loss": 0.14272109270095826, "step": 14600 }, { "epoch": 0.06272378351922929, "grad_norm": 2.3638439178466797, "learning_rate": 9.413218009192588e-05, "loss": 0.1730712652206421, "step": 14610 }, { "epoch": 0.06276671560924929, "grad_norm": 1.4593660831451416, "learning_rate": 9.412786837180826e-05, "loss": 0.23349535465240479, "step": 14620 }, { "epoch": 0.06280964769926929, "grad_norm": 0.04631822183728218, "learning_rate": 9.412355665169062e-05, "loss": 0.3255072832107544, "step": 14630 }, { "epoch": 0.0628525797892893, "grad_norm": 1.6218816041946411, "learning_rate": 9.4119244931573e-05, "loss": 0.3747283935546875, "step": 14640 }, { "epoch": 0.06289551187930931, "grad_norm": 0.73520827293396, "learning_rate": 9.411493321145538e-05, "loss": 0.17614601850509642, "step": 14650 }, { "epoch": 0.06293844396932931, "grad_norm": 0.7931138277053833, "learning_rate": 9.411062149133776e-05, "loss": 0.24755749702453614, "step": 14660 }, { "epoch": 0.06298137605934932, "grad_norm": 0.17208456993103027, "learning_rate": 9.410630977122013e-05, "loss": 0.44512248039245605, "step": 14670 }, { "epoch": 0.06302430814936932, "grad_norm": 0.07860125601291656, "learning_rate": 9.410199805110251e-05, "loss": 0.25961253643035886, "step": 14680 }, { "epoch": 0.06306724023938934, "grad_norm": 1.2539564371109009, "learning_rate": 9.409768633098489e-05, "loss": 0.24830968379974366, "step": 14690 }, { "epoch": 0.06311017232940934, "grad_norm": 0.6141181588172913, "learning_rate": 9.409337461086726e-05, "loss": 0.4124382495880127, "step": 14700 }, { "epoch": 0.06315310441942934, "grad_norm": 2.1591837406158447, "learning_rate": 9.408906289074964e-05, "loss": 0.3804943561553955, "step": 14710 }, { "epoch": 0.06319603650944935, "grad_norm": 0.14408475160598755, "learning_rate": 9.408475117063202e-05, "loss": 0.06406650543212891, "step": 14720 }, { "epoch": 0.06323896859946936, "grad_norm": 0.11168843507766724, "learning_rate": 9.40804394505144e-05, "loss": 0.1355807065963745, "step": 14730 }, { "epoch": 0.06328190068948937, "grad_norm": 8.794105529785156, "learning_rate": 9.407612773039677e-05, "loss": 0.3579146385192871, "step": 14740 }, { "epoch": 0.06332483277950937, "grad_norm": 2.165438175201416, "learning_rate": 9.407181601027915e-05, "loss": 0.24737703800201416, "step": 14750 }, { "epoch": 0.06336776486952937, "grad_norm": 3.3912713527679443, "learning_rate": 9.406750429016153e-05, "loss": 0.3478167295455933, "step": 14760 }, { "epoch": 0.06341069695954939, "grad_norm": 0.035082586109638214, "learning_rate": 9.40631925700439e-05, "loss": 0.2180727243423462, "step": 14770 }, { "epoch": 0.06345362904956939, "grad_norm": 1.1248410940170288, "learning_rate": 9.405888084992628e-05, "loss": 0.20930137634277343, "step": 14780 }, { "epoch": 0.0634965611395894, "grad_norm": 0.03349543362855911, "learning_rate": 9.405456912980865e-05, "loss": 0.2702275276184082, "step": 14790 }, { "epoch": 0.0635394932296094, "grad_norm": 0.04150047525763512, "learning_rate": 9.405025740969102e-05, "loss": 0.18669117689132692, "step": 14800 }, { "epoch": 0.0635824253196294, "grad_norm": 0.07978838682174683, "learning_rate": 9.40459456895734e-05, "loss": 0.12569122314453124, "step": 14810 }, { "epoch": 0.06362535740964942, "grad_norm": 1.4422789812088013, "learning_rate": 9.404163396945578e-05, "loss": 0.2750370979309082, "step": 14820 }, { "epoch": 0.06366828949966942, "grad_norm": 0.08112714439630508, "learning_rate": 9.403732224933816e-05, "loss": 0.35209102630615235, "step": 14830 }, { "epoch": 0.06371122158968943, "grad_norm": 0.05240992456674576, "learning_rate": 9.403301052922053e-05, "loss": 0.1633044719696045, "step": 14840 }, { "epoch": 0.06375415367970944, "grad_norm": 0.5782513618469238, "learning_rate": 9.402869880910291e-05, "loss": 0.28111331462860106, "step": 14850 }, { "epoch": 0.06379708576972944, "grad_norm": 1.0291950702667236, "learning_rate": 9.402438708898529e-05, "loss": 0.18657200336456298, "step": 14860 }, { "epoch": 0.06384001785974945, "grad_norm": 1.3704755306243896, "learning_rate": 9.402007536886767e-05, "loss": 0.33126370906829833, "step": 14870 }, { "epoch": 0.06388294994976945, "grad_norm": 0.014527720399200916, "learning_rate": 9.401576364875003e-05, "loss": 0.27899258136749266, "step": 14880 }, { "epoch": 0.06392588203978947, "grad_norm": 2.0788886547088623, "learning_rate": 9.401145192863241e-05, "loss": 0.248209810256958, "step": 14890 }, { "epoch": 0.06396881412980947, "grad_norm": 1.4462379217147827, "learning_rate": 9.400714020851478e-05, "loss": 0.20047402381896973, "step": 14900 }, { "epoch": 0.06401174621982947, "grad_norm": 1.2522448301315308, "learning_rate": 9.400282848839716e-05, "loss": 0.40030665397644044, "step": 14910 }, { "epoch": 0.06405467830984948, "grad_norm": 0.7433568239212036, "learning_rate": 9.399851676827954e-05, "loss": 0.1965320587158203, "step": 14920 }, { "epoch": 0.06409761039986948, "grad_norm": 0.19484727084636688, "learning_rate": 9.399420504816192e-05, "loss": 0.3624082565307617, "step": 14930 }, { "epoch": 0.0641405424898895, "grad_norm": 2.0891435146331787, "learning_rate": 9.39898933280443e-05, "loss": 0.4050153732299805, "step": 14940 }, { "epoch": 0.0641834745799095, "grad_norm": 0.0240493081510067, "learning_rate": 9.398558160792667e-05, "loss": 0.09888641238212585, "step": 14950 }, { "epoch": 0.0642264066699295, "grad_norm": 0.9053465127944946, "learning_rate": 9.398126988780905e-05, "loss": 0.48057260513305666, "step": 14960 }, { "epoch": 0.06426933875994952, "grad_norm": 0.017222393304109573, "learning_rate": 9.397695816769143e-05, "loss": 0.26117160320281985, "step": 14970 }, { "epoch": 0.06431227084996952, "grad_norm": 0.4678298234939575, "learning_rate": 9.39726464475738e-05, "loss": 0.19400867223739623, "step": 14980 }, { "epoch": 0.06435520293998953, "grad_norm": 3.2162866592407227, "learning_rate": 9.396833472745618e-05, "loss": 0.24385993480682372, "step": 14990 }, { "epoch": 0.06439813503000953, "grad_norm": 0.5383427739143372, "learning_rate": 9.396402300733856e-05, "loss": 0.19195520877838135, "step": 15000 }, { "epoch": 0.06439813503000953, "eval_loss": 0.5142996311187744, "eval_runtime": 27.4677, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 15000 }, { "epoch": 0.06444106712002953, "grad_norm": 0.5139740109443665, "learning_rate": 9.395971128722094e-05, "loss": 0.0931144118309021, "step": 15010 }, { "epoch": 0.06448399921004955, "grad_norm": 1.9202089309692383, "learning_rate": 9.395539956710331e-05, "loss": 0.5149777889251709, "step": 15020 }, { "epoch": 0.06452693130006955, "grad_norm": 1.7345205545425415, "learning_rate": 9.395108784698569e-05, "loss": 0.2525042533874512, "step": 15030 }, { "epoch": 0.06456986339008956, "grad_norm": 0.01726091280579567, "learning_rate": 9.394677612686805e-05, "loss": 0.45943574905395507, "step": 15040 }, { "epoch": 0.06461279548010956, "grad_norm": 0.04527002200484276, "learning_rate": 9.394246440675043e-05, "loss": 0.3036803722381592, "step": 15050 }, { "epoch": 0.06465572757012956, "grad_norm": 0.010524548590183258, "learning_rate": 9.393815268663281e-05, "loss": 0.27003719806671145, "step": 15060 }, { "epoch": 0.06469865966014958, "grad_norm": 0.6304553151130676, "learning_rate": 9.393384096651519e-05, "loss": 0.3148674964904785, "step": 15070 }, { "epoch": 0.06474159175016958, "grad_norm": 1.2246551513671875, "learning_rate": 9.392952924639756e-05, "loss": 0.3761624813079834, "step": 15080 }, { "epoch": 0.06478452384018958, "grad_norm": 3.657578229904175, "learning_rate": 9.392521752627994e-05, "loss": 0.16718716621398927, "step": 15090 }, { "epoch": 0.0648274559302096, "grad_norm": 0.03699451684951782, "learning_rate": 9.392090580616232e-05, "loss": 0.2341052532196045, "step": 15100 }, { "epoch": 0.0648703880202296, "grad_norm": 0.09733054786920547, "learning_rate": 9.39165940860447e-05, "loss": 0.21350882053375245, "step": 15110 }, { "epoch": 0.06491332011024961, "grad_norm": 0.023660734295845032, "learning_rate": 9.391228236592706e-05, "loss": 0.27405989170074463, "step": 15120 }, { "epoch": 0.06495625220026961, "grad_norm": 0.02325832098722458, "learning_rate": 9.390797064580944e-05, "loss": 0.06409944891929627, "step": 15130 }, { "epoch": 0.06499918429028961, "grad_norm": 0.010780767537653446, "learning_rate": 9.390365892569181e-05, "loss": 0.29595441818237306, "step": 15140 }, { "epoch": 0.06504211638030963, "grad_norm": 0.06848180294036865, "learning_rate": 9.389934720557419e-05, "loss": 0.3280792236328125, "step": 15150 }, { "epoch": 0.06508504847032963, "grad_norm": 1.5644136667251587, "learning_rate": 9.389503548545657e-05, "loss": 0.2550457715988159, "step": 15160 }, { "epoch": 0.06512798056034964, "grad_norm": 0.478125661611557, "learning_rate": 9.389072376533895e-05, "loss": 0.27299840450286866, "step": 15170 }, { "epoch": 0.06517091265036964, "grad_norm": 38.812252044677734, "learning_rate": 9.388641204522132e-05, "loss": 0.17638283967971802, "step": 15180 }, { "epoch": 0.06521384474038965, "grad_norm": 0.23070688545703888, "learning_rate": 9.38821003251037e-05, "loss": 0.21997244358062745, "step": 15190 }, { "epoch": 0.06525677683040966, "grad_norm": 0.012052039615809917, "learning_rate": 9.387778860498608e-05, "loss": 0.07902588844299316, "step": 15200 }, { "epoch": 0.06529970892042966, "grad_norm": 0.14975719153881073, "learning_rate": 9.387347688486845e-05, "loss": 0.4755707263946533, "step": 15210 }, { "epoch": 0.06534264101044968, "grad_norm": 29.956811904907227, "learning_rate": 9.386916516475083e-05, "loss": 0.29420592784881594, "step": 15220 }, { "epoch": 0.06538557310046968, "grad_norm": 0.009674137458205223, "learning_rate": 9.386485344463321e-05, "loss": 0.25805816650390623, "step": 15230 }, { "epoch": 0.06542850519048968, "grad_norm": 0.31382089853286743, "learning_rate": 9.386054172451559e-05, "loss": 0.20677318572998046, "step": 15240 }, { "epoch": 0.06547143728050969, "grad_norm": 0.0067361705005168915, "learning_rate": 9.385623000439796e-05, "loss": 0.10377358198165894, "step": 15250 }, { "epoch": 0.0655143693705297, "grad_norm": 0.010374417528510094, "learning_rate": 9.385191828428034e-05, "loss": 0.24403734207153321, "step": 15260 }, { "epoch": 0.06555730146054971, "grad_norm": 0.05472126975655556, "learning_rate": 9.384760656416272e-05, "loss": 0.3620487928390503, "step": 15270 }, { "epoch": 0.06560023355056971, "grad_norm": 0.21027326583862305, "learning_rate": 9.38432948440451e-05, "loss": 0.182614266872406, "step": 15280 }, { "epoch": 0.06564316564058971, "grad_norm": 0.31767529249191284, "learning_rate": 9.383898312392746e-05, "loss": 0.0882587492465973, "step": 15290 }, { "epoch": 0.06568609773060972, "grad_norm": 0.03704505041241646, "learning_rate": 9.383467140380984e-05, "loss": 0.09384621977806092, "step": 15300 }, { "epoch": 0.06572902982062973, "grad_norm": 0.025900261476635933, "learning_rate": 9.383035968369221e-05, "loss": 0.2080439329147339, "step": 15310 }, { "epoch": 0.06577196191064974, "grad_norm": 0.07200033217668533, "learning_rate": 9.382604796357459e-05, "loss": 0.3437242031097412, "step": 15320 }, { "epoch": 0.06581489400066974, "grad_norm": 0.7287998199462891, "learning_rate": 9.382173624345697e-05, "loss": 0.37477612495422363, "step": 15330 }, { "epoch": 0.06585782609068974, "grad_norm": 0.013580391183495522, "learning_rate": 9.381742452333935e-05, "loss": 0.19882876873016359, "step": 15340 }, { "epoch": 0.06590075818070976, "grad_norm": 1.0913523435592651, "learning_rate": 9.381311280322172e-05, "loss": 0.4215532302856445, "step": 15350 }, { "epoch": 0.06594369027072976, "grad_norm": 3.035409688949585, "learning_rate": 9.38088010831041e-05, "loss": 0.31964831352233886, "step": 15360 }, { "epoch": 0.06598662236074977, "grad_norm": 0.4063565731048584, "learning_rate": 9.380448936298647e-05, "loss": 0.22180414199829102, "step": 15370 }, { "epoch": 0.06602955445076977, "grad_norm": 2.7936208248138428, "learning_rate": 9.380017764286884e-05, "loss": 0.28003358840942383, "step": 15380 }, { "epoch": 0.06607248654078977, "grad_norm": 0.5528353452682495, "learning_rate": 9.379586592275122e-05, "loss": 0.45966687202453616, "step": 15390 }, { "epoch": 0.06611541863080979, "grad_norm": 0.123682901263237, "learning_rate": 9.37915542026336e-05, "loss": 0.21645750999450683, "step": 15400 }, { "epoch": 0.06615835072082979, "grad_norm": 1.5124139785766602, "learning_rate": 9.378724248251597e-05, "loss": 0.41568670272827146, "step": 15410 }, { "epoch": 0.0662012828108498, "grad_norm": 0.14867492020130157, "learning_rate": 9.378293076239835e-05, "loss": 0.29629995822906496, "step": 15420 }, { "epoch": 0.0662442149008698, "grad_norm": 0.3669738173484802, "learning_rate": 9.377861904228073e-05, "loss": 0.18541309833526612, "step": 15430 }, { "epoch": 0.0662871469908898, "grad_norm": 0.015653476119041443, "learning_rate": 9.37743073221631e-05, "loss": 0.10632809400558471, "step": 15440 }, { "epoch": 0.06633007908090982, "grad_norm": 0.02738998457789421, "learning_rate": 9.376999560204548e-05, "loss": 0.26311161518096926, "step": 15450 }, { "epoch": 0.06637301117092982, "grad_norm": 0.12375674396753311, "learning_rate": 9.376568388192786e-05, "loss": 0.2682521820068359, "step": 15460 }, { "epoch": 0.06641594326094984, "grad_norm": 0.06628740578889847, "learning_rate": 9.376137216181024e-05, "loss": 0.08452145457267761, "step": 15470 }, { "epoch": 0.06645887535096984, "grad_norm": 0.08715321123600006, "learning_rate": 9.375706044169262e-05, "loss": 0.26986777782440186, "step": 15480 }, { "epoch": 0.06650180744098984, "grad_norm": 0.11993555724620819, "learning_rate": 9.3752748721575e-05, "loss": 0.4222938060760498, "step": 15490 }, { "epoch": 0.06654473953100985, "grad_norm": 0.10585108399391174, "learning_rate": 9.374843700145737e-05, "loss": 0.23077239990234374, "step": 15500 }, { "epoch": 0.06658767162102985, "grad_norm": 0.08384846150875092, "learning_rate": 9.374412528133975e-05, "loss": 0.19676105976104735, "step": 15510 }, { "epoch": 0.06663060371104985, "grad_norm": 1.7130482196807861, "learning_rate": 9.373981356122213e-05, "loss": 0.30971174240112304, "step": 15520 }, { "epoch": 0.06667353580106987, "grad_norm": 1.3083285093307495, "learning_rate": 9.373550184110449e-05, "loss": 0.18229281902313232, "step": 15530 }, { "epoch": 0.06671646789108987, "grad_norm": 0.05157879367470741, "learning_rate": 9.373119012098687e-05, "loss": 0.22775630950927733, "step": 15540 }, { "epoch": 0.06675939998110988, "grad_norm": 0.1158638447523117, "learning_rate": 9.372687840086924e-05, "loss": 0.15219898223876954, "step": 15550 }, { "epoch": 0.06680233207112989, "grad_norm": 1.0765706300735474, "learning_rate": 9.372256668075162e-05, "loss": 0.3147680997848511, "step": 15560 }, { "epoch": 0.06684526416114989, "grad_norm": 1.6944249868392944, "learning_rate": 9.3718254960634e-05, "loss": 0.2515150785446167, "step": 15570 }, { "epoch": 0.0668881962511699, "grad_norm": 2.6989057064056396, "learning_rate": 9.371394324051638e-05, "loss": 0.4220762252807617, "step": 15580 }, { "epoch": 0.0669311283411899, "grad_norm": 0.8688747882843018, "learning_rate": 9.370963152039875e-05, "loss": 0.17261714935302735, "step": 15590 }, { "epoch": 0.06697406043120992, "grad_norm": 0.10762748122215271, "learning_rate": 9.370531980028113e-05, "loss": 0.3298125982284546, "step": 15600 }, { "epoch": 0.06701699252122992, "grad_norm": 0.31058886647224426, "learning_rate": 9.370100808016351e-05, "loss": 0.19337745904922485, "step": 15610 }, { "epoch": 0.06705992461124992, "grad_norm": 4.391843795776367, "learning_rate": 9.369669636004587e-05, "loss": 0.16032843589782714, "step": 15620 }, { "epoch": 0.06710285670126993, "grad_norm": 1.146741509437561, "learning_rate": 9.369238463992825e-05, "loss": 0.21080875396728516, "step": 15630 }, { "epoch": 0.06714578879128993, "grad_norm": 3.3124451637268066, "learning_rate": 9.368807291981063e-05, "loss": 0.17837634086608886, "step": 15640 }, { "epoch": 0.06718872088130995, "grad_norm": 0.04370800778269768, "learning_rate": 9.3683761199693e-05, "loss": 0.17239662408828735, "step": 15650 }, { "epoch": 0.06723165297132995, "grad_norm": 0.13442166149616241, "learning_rate": 9.367944947957538e-05, "loss": 0.09528826475143433, "step": 15660 }, { "epoch": 0.06727458506134995, "grad_norm": 2.838956832885742, "learning_rate": 9.367513775945776e-05, "loss": 0.347308874130249, "step": 15670 }, { "epoch": 0.06731751715136997, "grad_norm": 0.13439913094043732, "learning_rate": 9.367082603934015e-05, "loss": 0.3867277860641479, "step": 15680 }, { "epoch": 0.06736044924138997, "grad_norm": 1.1007906198501587, "learning_rate": 9.366651431922253e-05, "loss": 0.4312880516052246, "step": 15690 }, { "epoch": 0.06740338133140998, "grad_norm": 9.591843605041504, "learning_rate": 9.366220259910489e-05, "loss": 0.25187973976135253, "step": 15700 }, { "epoch": 0.06744631342142998, "grad_norm": 0.6064546704292297, "learning_rate": 9.365789087898727e-05, "loss": 0.18571261167526246, "step": 15710 }, { "epoch": 0.06748924551144998, "grad_norm": 1.0531588792800903, "learning_rate": 9.365357915886965e-05, "loss": 0.3080170154571533, "step": 15720 }, { "epoch": 0.06753217760147, "grad_norm": 3.3235676288604736, "learning_rate": 9.364926743875202e-05, "loss": 0.34673519134521485, "step": 15730 }, { "epoch": 0.06757510969149, "grad_norm": 0.06666219234466553, "learning_rate": 9.36449557186344e-05, "loss": 0.17069684267044066, "step": 15740 }, { "epoch": 0.06761804178151001, "grad_norm": 0.08851531147956848, "learning_rate": 9.364064399851678e-05, "loss": 0.33871867656707766, "step": 15750 }, { "epoch": 0.06766097387153001, "grad_norm": 0.5347506403923035, "learning_rate": 9.363633227839915e-05, "loss": 0.2940122365951538, "step": 15760 }, { "epoch": 0.06770390596155001, "grad_norm": 0.8870237469673157, "learning_rate": 9.363202055828153e-05, "loss": 0.3184309720993042, "step": 15770 }, { "epoch": 0.06774683805157003, "grad_norm": 2.2229931354522705, "learning_rate": 9.36277088381639e-05, "loss": 0.280017352104187, "step": 15780 }, { "epoch": 0.06778977014159003, "grad_norm": 0.06036687269806862, "learning_rate": 9.362339711804627e-05, "loss": 0.3001197576522827, "step": 15790 }, { "epoch": 0.06783270223161005, "grad_norm": 0.059763338416814804, "learning_rate": 9.361908539792865e-05, "loss": 0.29081311225891116, "step": 15800 }, { "epoch": 0.06787563432163005, "grad_norm": 0.09039962291717529, "learning_rate": 9.361477367781103e-05, "loss": 0.30297653675079345, "step": 15810 }, { "epoch": 0.06791856641165005, "grad_norm": 0.24067221581935883, "learning_rate": 9.36104619576934e-05, "loss": 0.063690185546875, "step": 15820 }, { "epoch": 0.06796149850167006, "grad_norm": 1.0791633129119873, "learning_rate": 9.360615023757578e-05, "loss": 0.3398705005645752, "step": 15830 }, { "epoch": 0.06800443059169006, "grad_norm": 4.034187316894531, "learning_rate": 9.360183851745816e-05, "loss": 0.353275990486145, "step": 15840 }, { "epoch": 0.06804736268171008, "grad_norm": 0.6785535216331482, "learning_rate": 9.359752679734054e-05, "loss": 0.13289799690246581, "step": 15850 }, { "epoch": 0.06809029477173008, "grad_norm": 0.7840905785560608, "learning_rate": 9.35932150772229e-05, "loss": 0.12579288482666015, "step": 15860 }, { "epoch": 0.06813322686175008, "grad_norm": 0.13649722933769226, "learning_rate": 9.358890335710528e-05, "loss": 0.25913591384887696, "step": 15870 }, { "epoch": 0.0681761589517701, "grad_norm": 0.06511491537094116, "learning_rate": 9.358459163698766e-05, "loss": 0.21581885814666749, "step": 15880 }, { "epoch": 0.0682190910417901, "grad_norm": 0.08669072389602661, "learning_rate": 9.358027991687003e-05, "loss": 0.3530642032623291, "step": 15890 }, { "epoch": 0.06826202313181011, "grad_norm": 0.0684560239315033, "learning_rate": 9.357596819675242e-05, "loss": 0.24950690269470216, "step": 15900 }, { "epoch": 0.06830495522183011, "grad_norm": 0.1603812426328659, "learning_rate": 9.35716564766348e-05, "loss": 0.2178800582885742, "step": 15910 }, { "epoch": 0.06834788731185011, "grad_norm": 2.852123260498047, "learning_rate": 9.356734475651718e-05, "loss": 0.16984164714813232, "step": 15920 }, { "epoch": 0.06839081940187013, "grad_norm": 1.6295524835586548, "learning_rate": 9.356303303639956e-05, "loss": 0.4048158168792725, "step": 15930 }, { "epoch": 0.06843375149189013, "grad_norm": 3.4276375770568848, "learning_rate": 9.355872131628192e-05, "loss": 0.09007681012153626, "step": 15940 }, { "epoch": 0.06847668358191013, "grad_norm": 27.015365600585938, "learning_rate": 9.35544095961643e-05, "loss": 0.12301526069641114, "step": 15950 }, { "epoch": 0.06851961567193014, "grad_norm": 0.044064607471227646, "learning_rate": 9.355009787604667e-05, "loss": 0.3360439300537109, "step": 15960 }, { "epoch": 0.06856254776195014, "grad_norm": 2.858654737472534, "learning_rate": 9.354578615592905e-05, "loss": 0.6317211151123047, "step": 15970 }, { "epoch": 0.06860547985197016, "grad_norm": 0.2168719321489334, "learning_rate": 9.354147443581143e-05, "loss": 0.10691696405410767, "step": 15980 }, { "epoch": 0.06864841194199016, "grad_norm": 0.17285951972007751, "learning_rate": 9.35371627156938e-05, "loss": 0.18733786344528197, "step": 15990 }, { "epoch": 0.06869134403201016, "grad_norm": 0.08299694955348969, "learning_rate": 9.353285099557618e-05, "loss": 0.09542076587677002, "step": 16000 }, { "epoch": 0.06869134403201016, "eval_loss": 0.526293933391571, "eval_runtime": 27.5007, "eval_samples_per_second": 3.636, "eval_steps_per_second": 3.636, "step": 16000 }, { "epoch": 0.06873427612203017, "grad_norm": 0.05496923252940178, "learning_rate": 9.352853927545856e-05, "loss": 0.1974226117134094, "step": 16010 }, { "epoch": 0.06877720821205018, "grad_norm": 0.11612247675657272, "learning_rate": 9.352422755534094e-05, "loss": 0.3134061574935913, "step": 16020 }, { "epoch": 0.06882014030207019, "grad_norm": 0.06240341067314148, "learning_rate": 9.35199158352233e-05, "loss": 0.09108485579490662, "step": 16030 }, { "epoch": 0.06886307239209019, "grad_norm": 0.018856368958950043, "learning_rate": 9.351560411510568e-05, "loss": 0.1379598021507263, "step": 16040 }, { "epoch": 0.06890600448211019, "grad_norm": 0.02072647213935852, "learning_rate": 9.351129239498806e-05, "loss": 0.29755070209503176, "step": 16050 }, { "epoch": 0.0689489365721302, "grad_norm": 0.02678876370191574, "learning_rate": 9.350698067487043e-05, "loss": 0.19866015911102294, "step": 16060 }, { "epoch": 0.06899186866215021, "grad_norm": 0.02596438117325306, "learning_rate": 9.350266895475281e-05, "loss": 0.23744065761566163, "step": 16070 }, { "epoch": 0.06903480075217022, "grad_norm": 0.0036663708742707968, "learning_rate": 9.349835723463519e-05, "loss": 0.22252767086029052, "step": 16080 }, { "epoch": 0.06907773284219022, "grad_norm": 1.597440242767334, "learning_rate": 9.349404551451757e-05, "loss": 0.3193185329437256, "step": 16090 }, { "epoch": 0.06912066493221022, "grad_norm": 0.05906902626156807, "learning_rate": 9.348973379439994e-05, "loss": 0.2511482238769531, "step": 16100 }, { "epoch": 0.06916359702223024, "grad_norm": 0.03994489088654518, "learning_rate": 9.348542207428231e-05, "loss": 0.24306204319000244, "step": 16110 }, { "epoch": 0.06920652911225024, "grad_norm": 0.9707558155059814, "learning_rate": 9.34811103541647e-05, "loss": 0.3085232496261597, "step": 16120 }, { "epoch": 0.06924946120227025, "grad_norm": 7.231551647186279, "learning_rate": 9.347679863404708e-05, "loss": 0.2969422101974487, "step": 16130 }, { "epoch": 0.06929239329229026, "grad_norm": 0.19959791004657745, "learning_rate": 9.347248691392945e-05, "loss": 0.20120530128479003, "step": 16140 }, { "epoch": 0.06933532538231026, "grad_norm": 3.905804395675659, "learning_rate": 9.346817519381183e-05, "loss": 0.31837916374206543, "step": 16150 }, { "epoch": 0.06937825747233027, "grad_norm": 1.6950525045394897, "learning_rate": 9.346386347369421e-05, "loss": 0.21888651847839355, "step": 16160 }, { "epoch": 0.06942118956235027, "grad_norm": 4.336323261260986, "learning_rate": 9.345955175357658e-05, "loss": 0.5113966464996338, "step": 16170 }, { "epoch": 0.06946412165237029, "grad_norm": 0.053269702941179276, "learning_rate": 9.345524003345896e-05, "loss": 0.18296149969100953, "step": 16180 }, { "epoch": 0.06950705374239029, "grad_norm": 0.09012717008590698, "learning_rate": 9.345092831334133e-05, "loss": 0.1996088743209839, "step": 16190 }, { "epoch": 0.06954998583241029, "grad_norm": 2.09253191947937, "learning_rate": 9.34466165932237e-05, "loss": 0.20683207511901855, "step": 16200 }, { "epoch": 0.0695929179224303, "grad_norm": 0.13705863058567047, "learning_rate": 9.344230487310608e-05, "loss": 0.30519049167633056, "step": 16210 }, { "epoch": 0.0696358500124503, "grad_norm": 0.018574651330709457, "learning_rate": 9.343799315298846e-05, "loss": 0.3084603548049927, "step": 16220 }, { "epoch": 0.06967878210247032, "grad_norm": 0.9541419744491577, "learning_rate": 9.343368143287084e-05, "loss": 0.3294121742248535, "step": 16230 }, { "epoch": 0.06972171419249032, "grad_norm": 1.0622129440307617, "learning_rate": 9.342936971275321e-05, "loss": 0.48809447288513186, "step": 16240 }, { "epoch": 0.06976464628251032, "grad_norm": 0.009066279046237469, "learning_rate": 9.342505799263559e-05, "loss": 0.04652267992496491, "step": 16250 }, { "epoch": 0.06980757837253034, "grad_norm": 0.5664111375808716, "learning_rate": 9.342074627251797e-05, "loss": 0.31479432582855227, "step": 16260 }, { "epoch": 0.06985051046255034, "grad_norm": 0.040703702718019485, "learning_rate": 9.341643455240033e-05, "loss": 0.10909019708633423, "step": 16270 }, { "epoch": 0.06989344255257035, "grad_norm": 0.0748838409781456, "learning_rate": 9.341212283228271e-05, "loss": 0.26613683700561525, "step": 16280 }, { "epoch": 0.06993637464259035, "grad_norm": 1.402508020401001, "learning_rate": 9.340781111216509e-05, "loss": 0.328706693649292, "step": 16290 }, { "epoch": 0.06997930673261035, "grad_norm": 1.6031125783920288, "learning_rate": 9.340349939204746e-05, "loss": 0.20419700145721437, "step": 16300 }, { "epoch": 0.07002223882263037, "grad_norm": 3.960529327392578, "learning_rate": 9.339918767192984e-05, "loss": 0.16980836391448975, "step": 16310 }, { "epoch": 0.07006517091265037, "grad_norm": 0.040454789996147156, "learning_rate": 9.339487595181222e-05, "loss": 0.33831195831298827, "step": 16320 }, { "epoch": 0.07010810300267038, "grad_norm": 0.18430018424987793, "learning_rate": 9.33905642316946e-05, "loss": 0.1638139009475708, "step": 16330 }, { "epoch": 0.07015103509269038, "grad_norm": 2.9964406490325928, "learning_rate": 9.338625251157697e-05, "loss": 0.3662814378738403, "step": 16340 }, { "epoch": 0.07019396718271038, "grad_norm": 0.0014150225324556231, "learning_rate": 9.338194079145935e-05, "loss": 0.2916199445724487, "step": 16350 }, { "epoch": 0.0702368992727304, "grad_norm": 1.233773112297058, "learning_rate": 9.337762907134173e-05, "loss": 0.2907113552093506, "step": 16360 }, { "epoch": 0.0702798313627504, "grad_norm": 0.06687705218791962, "learning_rate": 9.33733173512241e-05, "loss": 0.21224551200866698, "step": 16370 }, { "epoch": 0.0703227634527704, "grad_norm": 3.684509038925171, "learning_rate": 9.336900563110648e-05, "loss": 0.20599827766418458, "step": 16380 }, { "epoch": 0.07036569554279042, "grad_norm": 1.5613230466842651, "learning_rate": 9.336469391098886e-05, "loss": 0.26720128059387205, "step": 16390 }, { "epoch": 0.07040862763281042, "grad_norm": 3.201432466506958, "learning_rate": 9.336038219087124e-05, "loss": 0.11232433319091797, "step": 16400 }, { "epoch": 0.07045155972283043, "grad_norm": 0.6785762906074524, "learning_rate": 9.335607047075361e-05, "loss": 0.2559498310089111, "step": 16410 }, { "epoch": 0.07049449181285043, "grad_norm": 0.44419634342193604, "learning_rate": 9.335175875063599e-05, "loss": 0.6221245765686035, "step": 16420 }, { "epoch": 0.07053742390287043, "grad_norm": 0.08672723919153214, "learning_rate": 9.334744703051837e-05, "loss": 0.22470765113830565, "step": 16430 }, { "epoch": 0.07058035599289045, "grad_norm": 6.374612331390381, "learning_rate": 9.334313531040073e-05, "loss": 0.2854200839996338, "step": 16440 }, { "epoch": 0.07062328808291045, "grad_norm": 0.025272591039538383, "learning_rate": 9.333882359028311e-05, "loss": 0.3004850149154663, "step": 16450 }, { "epoch": 0.07066622017293046, "grad_norm": 1.0149154663085938, "learning_rate": 9.333451187016549e-05, "loss": 0.160288405418396, "step": 16460 }, { "epoch": 0.07070915226295046, "grad_norm": 0.10873989015817642, "learning_rate": 9.333020015004786e-05, "loss": 0.2299208402633667, "step": 16470 }, { "epoch": 0.07075208435297047, "grad_norm": 0.05491790547966957, "learning_rate": 9.332588842993024e-05, "loss": 0.19463187456130981, "step": 16480 }, { "epoch": 0.07079501644299048, "grad_norm": 1.5342626571655273, "learning_rate": 9.332157670981262e-05, "loss": 0.24398891925811766, "step": 16490 }, { "epoch": 0.07083794853301048, "grad_norm": 0.13651759922504425, "learning_rate": 9.3317264989695e-05, "loss": 0.47367658615112307, "step": 16500 }, { "epoch": 0.0708808806230305, "grad_norm": 0.634511411190033, "learning_rate": 9.331295326957737e-05, "loss": 0.47757797241210936, "step": 16510 }, { "epoch": 0.0709238127130505, "grad_norm": 0.09785524755716324, "learning_rate": 9.330864154945974e-05, "loss": 0.17711851596832276, "step": 16520 }, { "epoch": 0.0709667448030705, "grad_norm": 0.026648370549082756, "learning_rate": 9.330432982934212e-05, "loss": 0.14128479957580567, "step": 16530 }, { "epoch": 0.07100967689309051, "grad_norm": 0.26906535029411316, "learning_rate": 9.330001810922449e-05, "loss": 0.142492938041687, "step": 16540 }, { "epoch": 0.07105260898311051, "grad_norm": 1.0589438676834106, "learning_rate": 9.329570638910687e-05, "loss": 0.22687864303588867, "step": 16550 }, { "epoch": 0.07109554107313053, "grad_norm": 1.6760090589523315, "learning_rate": 9.329139466898925e-05, "loss": 0.2446916103363037, "step": 16560 }, { "epoch": 0.07113847316315053, "grad_norm": 0.03415419161319733, "learning_rate": 9.328708294887162e-05, "loss": 0.20921945571899414, "step": 16570 }, { "epoch": 0.07118140525317053, "grad_norm": 0.9828307032585144, "learning_rate": 9.3282771228754e-05, "loss": 0.23197565078735352, "step": 16580 }, { "epoch": 0.07122433734319054, "grad_norm": 0.10277032107114792, "learning_rate": 9.327845950863638e-05, "loss": 0.1265937328338623, "step": 16590 }, { "epoch": 0.07126726943321054, "grad_norm": 0.5460087060928345, "learning_rate": 9.327414778851876e-05, "loss": 0.30683131217956544, "step": 16600 }, { "epoch": 0.07131020152323056, "grad_norm": 1.562453031539917, "learning_rate": 9.326983606840113e-05, "loss": 0.13357644081115722, "step": 16610 }, { "epoch": 0.07135313361325056, "grad_norm": 0.13830183446407318, "learning_rate": 9.326552434828351e-05, "loss": 0.28795995712280276, "step": 16620 }, { "epoch": 0.07139606570327056, "grad_norm": 0.06272502988576889, "learning_rate": 9.326121262816589e-05, "loss": 0.2059838056564331, "step": 16630 }, { "epoch": 0.07143899779329058, "grad_norm": 0.02094428613781929, "learning_rate": 9.325690090804827e-05, "loss": 0.05481228232383728, "step": 16640 }, { "epoch": 0.07148192988331058, "grad_norm": 0.11069640517234802, "learning_rate": 9.325258918793064e-05, "loss": 0.1461290240287781, "step": 16650 }, { "epoch": 0.07152486197333059, "grad_norm": 3.0805158615112305, "learning_rate": 9.324827746781302e-05, "loss": 0.33733808994293213, "step": 16660 }, { "epoch": 0.07156779406335059, "grad_norm": 9.202255249023438, "learning_rate": 9.32439657476954e-05, "loss": 0.339400577545166, "step": 16670 }, { "epoch": 0.0716107261533706, "grad_norm": 0.3002989590167999, "learning_rate": 9.323965402757776e-05, "loss": 0.38298094272613525, "step": 16680 }, { "epoch": 0.07165365824339061, "grad_norm": 1.8727099895477295, "learning_rate": 9.323534230746014e-05, "loss": 0.48968868255615233, "step": 16690 }, { "epoch": 0.07169659033341061, "grad_norm": 0.329200804233551, "learning_rate": 9.323103058734252e-05, "loss": 0.3012394905090332, "step": 16700 }, { "epoch": 0.07173952242343062, "grad_norm": 0.5954709053039551, "learning_rate": 9.32267188672249e-05, "loss": 0.0898634910583496, "step": 16710 }, { "epoch": 0.07178245451345062, "grad_norm": 0.6996222734451294, "learning_rate": 9.322240714710727e-05, "loss": 0.27539935111999514, "step": 16720 }, { "epoch": 0.07182538660347063, "grad_norm": 0.015904199331998825, "learning_rate": 9.321809542698965e-05, "loss": 0.3184134244918823, "step": 16730 }, { "epoch": 0.07186831869349064, "grad_norm": 1.6481454372406006, "learning_rate": 9.321378370687203e-05, "loss": 0.34613637924194335, "step": 16740 }, { "epoch": 0.07191125078351064, "grad_norm": 0.08801378309726715, "learning_rate": 9.32094719867544e-05, "loss": 0.3641823768615723, "step": 16750 }, { "epoch": 0.07195418287353066, "grad_norm": 0.03433467075228691, "learning_rate": 9.320516026663678e-05, "loss": 0.2840471029281616, "step": 16760 }, { "epoch": 0.07199711496355066, "grad_norm": 0.033972445875406265, "learning_rate": 9.320084854651914e-05, "loss": 0.1771819233894348, "step": 16770 }, { "epoch": 0.07204004705357066, "grad_norm": 1.2626923322677612, "learning_rate": 9.319653682640152e-05, "loss": 0.28968324661254885, "step": 16780 }, { "epoch": 0.07208297914359067, "grad_norm": 0.09417697042226791, "learning_rate": 9.31922251062839e-05, "loss": 0.15749802589416503, "step": 16790 }, { "epoch": 0.07212591123361067, "grad_norm": 0.062441833317279816, "learning_rate": 9.318791338616628e-05, "loss": 0.26803529262542725, "step": 16800 }, { "epoch": 0.07216884332363067, "grad_norm": 2.0154871940612793, "learning_rate": 9.318360166604865e-05, "loss": 0.4345219135284424, "step": 16810 }, { "epoch": 0.07221177541365069, "grad_norm": 2.229771614074707, "learning_rate": 9.317928994593103e-05, "loss": 0.1976357340812683, "step": 16820 }, { "epoch": 0.07225470750367069, "grad_norm": 0.26853519678115845, "learning_rate": 9.317497822581341e-05, "loss": 0.2845271587371826, "step": 16830 }, { "epoch": 0.0722976395936907, "grad_norm": 0.0032490019220858812, "learning_rate": 9.317066650569579e-05, "loss": 0.42137975692749025, "step": 16840 }, { "epoch": 0.0723405716837107, "grad_norm": 0.004822635091841221, "learning_rate": 9.316635478557816e-05, "loss": 0.377595853805542, "step": 16850 }, { "epoch": 0.0723835037737307, "grad_norm": 0.10645035654306412, "learning_rate": 9.316204306546054e-05, "loss": 0.15568927526474, "step": 16860 }, { "epoch": 0.07242643586375072, "grad_norm": 0.015432666055858135, "learning_rate": 9.315773134534292e-05, "loss": 0.36362059116363527, "step": 16870 }, { "epoch": 0.07246936795377072, "grad_norm": 0.017693661153316498, "learning_rate": 9.31534196252253e-05, "loss": 0.2413787603378296, "step": 16880 }, { "epoch": 0.07251230004379074, "grad_norm": 0.15146492421627045, "learning_rate": 9.314910790510767e-05, "loss": 0.237416672706604, "step": 16890 }, { "epoch": 0.07255523213381074, "grad_norm": 4.291798114776611, "learning_rate": 9.314479618499005e-05, "loss": 0.4450693130493164, "step": 16900 }, { "epoch": 0.07259816422383074, "grad_norm": 1.8319154977798462, "learning_rate": 9.314048446487243e-05, "loss": 0.33154332637786865, "step": 16910 }, { "epoch": 0.07264109631385075, "grad_norm": 0.23402829468250275, "learning_rate": 9.31361727447548e-05, "loss": 0.21158668994903565, "step": 16920 }, { "epoch": 0.07268402840387075, "grad_norm": 0.9586695432662964, "learning_rate": 9.313186102463717e-05, "loss": 0.2537838935852051, "step": 16930 }, { "epoch": 0.07272696049389077, "grad_norm": 0.7257668375968933, "learning_rate": 9.312754930451955e-05, "loss": 0.4206377983093262, "step": 16940 }, { "epoch": 0.07276989258391077, "grad_norm": 1.5806889533996582, "learning_rate": 9.312323758440192e-05, "loss": 0.20530943870544432, "step": 16950 }, { "epoch": 0.07281282467393077, "grad_norm": 0.2541026771068573, "learning_rate": 9.31189258642843e-05, "loss": 0.40438618659973147, "step": 16960 }, { "epoch": 0.07285575676395079, "grad_norm": 2.763205051422119, "learning_rate": 9.311461414416668e-05, "loss": 0.41117844581604, "step": 16970 }, { "epoch": 0.07289868885397079, "grad_norm": 0.3387240767478943, "learning_rate": 9.311030242404905e-05, "loss": 0.32136220932006837, "step": 16980 }, { "epoch": 0.0729416209439908, "grad_norm": 0.4019925594329834, "learning_rate": 9.310599070393143e-05, "loss": 0.2839751958847046, "step": 16990 }, { "epoch": 0.0729845530340108, "grad_norm": 0.6113116145133972, "learning_rate": 9.310167898381381e-05, "loss": 0.2559647798538208, "step": 17000 }, { "epoch": 0.0729845530340108, "eval_loss": 0.5012018084526062, "eval_runtime": 27.5411, "eval_samples_per_second": 3.631, "eval_steps_per_second": 3.631, "step": 17000 }, { "epoch": 0.0730274851240308, "grad_norm": 0.06289585679769516, "learning_rate": 9.309736726369617e-05, "loss": 0.27092506885528567, "step": 17010 }, { "epoch": 0.07307041721405082, "grad_norm": 3.7301859855651855, "learning_rate": 9.309305554357855e-05, "loss": 0.289726996421814, "step": 17020 }, { "epoch": 0.07311334930407082, "grad_norm": 0.07134075462818146, "learning_rate": 9.308874382346093e-05, "loss": 0.15513920783996582, "step": 17030 }, { "epoch": 0.07315628139409083, "grad_norm": 1.0717676877975464, "learning_rate": 9.30844321033433e-05, "loss": 0.2019169569015503, "step": 17040 }, { "epoch": 0.07319921348411083, "grad_norm": 1.1874752044677734, "learning_rate": 9.308012038322568e-05, "loss": 0.346480655670166, "step": 17050 }, { "epoch": 0.07324214557413083, "grad_norm": 0.29725155234336853, "learning_rate": 9.307580866310806e-05, "loss": 0.2214757204055786, "step": 17060 }, { "epoch": 0.07328507766415085, "grad_norm": 0.0174336489289999, "learning_rate": 9.307149694299044e-05, "loss": 0.19202833175659179, "step": 17070 }, { "epoch": 0.07332800975417085, "grad_norm": 2.515242338180542, "learning_rate": 9.306718522287281e-05, "loss": 0.42656803131103516, "step": 17080 }, { "epoch": 0.07337094184419087, "grad_norm": 0.02410704828798771, "learning_rate": 9.30628735027552e-05, "loss": 0.15156646966934204, "step": 17090 }, { "epoch": 0.07341387393421087, "grad_norm": 7.142855167388916, "learning_rate": 9.305856178263757e-05, "loss": 0.31276555061340333, "step": 17100 }, { "epoch": 0.07345680602423087, "grad_norm": 8.62886905670166, "learning_rate": 9.305425006251995e-05, "loss": 0.3314593553543091, "step": 17110 }, { "epoch": 0.07349973811425088, "grad_norm": 0.042989179491996765, "learning_rate": 9.304993834240232e-05, "loss": 0.1726578950881958, "step": 17120 }, { "epoch": 0.07354267020427088, "grad_norm": 0.3412798345088959, "learning_rate": 9.30456266222847e-05, "loss": 0.2019892692565918, "step": 17130 }, { "epoch": 0.0735856022942909, "grad_norm": 0.10631978511810303, "learning_rate": 9.304131490216708e-05, "loss": 0.32094345092773435, "step": 17140 }, { "epoch": 0.0736285343843109, "grad_norm": 1.4753174781799316, "learning_rate": 9.303700318204946e-05, "loss": 0.3164651393890381, "step": 17150 }, { "epoch": 0.0736714664743309, "grad_norm": 1.849280595779419, "learning_rate": 9.303269146193183e-05, "loss": 0.3136913537979126, "step": 17160 }, { "epoch": 0.07371439856435091, "grad_norm": 0.006545449141412973, "learning_rate": 9.302837974181421e-05, "loss": 0.2594448566436768, "step": 17170 }, { "epoch": 0.07375733065437091, "grad_norm": 0.0032706542406231165, "learning_rate": 9.302406802169657e-05, "loss": 0.13361896276474, "step": 17180 }, { "epoch": 0.07380026274439093, "grad_norm": 0.02022142894566059, "learning_rate": 9.301975630157895e-05, "loss": 0.030848246812820435, "step": 17190 }, { "epoch": 0.07384319483441093, "grad_norm": 0.004030085168778896, "learning_rate": 9.301544458146133e-05, "loss": 0.2393047332763672, "step": 17200 }, { "epoch": 0.07388612692443093, "grad_norm": 0.04448021203279495, "learning_rate": 9.30111328613437e-05, "loss": 0.12692539691925048, "step": 17210 }, { "epoch": 0.07392905901445095, "grad_norm": 0.10861719399690628, "learning_rate": 9.300682114122608e-05, "loss": 0.31166269779205324, "step": 17220 }, { "epoch": 0.07397199110447095, "grad_norm": 0.00539032556116581, "learning_rate": 9.300250942110846e-05, "loss": 0.08787255883216857, "step": 17230 }, { "epoch": 0.07401492319449095, "grad_norm": 1.3360871076583862, "learning_rate": 9.299819770099084e-05, "loss": 0.3009865045547485, "step": 17240 }, { "epoch": 0.07405785528451096, "grad_norm": 0.24125048518180847, "learning_rate": 9.299388598087322e-05, "loss": 0.18031530380249022, "step": 17250 }, { "epoch": 0.07410078737453096, "grad_norm": 0.004599709529429674, "learning_rate": 9.298957426075558e-05, "loss": 0.34679040908813474, "step": 17260 }, { "epoch": 0.07414371946455098, "grad_norm": 5.64448881149292, "learning_rate": 9.298526254063796e-05, "loss": 0.20970757007598878, "step": 17270 }, { "epoch": 0.07418665155457098, "grad_norm": 0.8933638334274292, "learning_rate": 9.298095082052033e-05, "loss": 0.2926939487457275, "step": 17280 }, { "epoch": 0.07422958364459098, "grad_norm": 1.7648670673370361, "learning_rate": 9.297663910040271e-05, "loss": 0.3770651578903198, "step": 17290 }, { "epoch": 0.074272515734611, "grad_norm": 6.239072322845459, "learning_rate": 9.297232738028509e-05, "loss": 0.2613657474517822, "step": 17300 }, { "epoch": 0.074315447824631, "grad_norm": 0.22732043266296387, "learning_rate": 9.296801566016748e-05, "loss": 0.33427302837371825, "step": 17310 }, { "epoch": 0.07435837991465101, "grad_norm": 0.988491415977478, "learning_rate": 9.296370394004986e-05, "loss": 0.11933293342590331, "step": 17320 }, { "epoch": 0.07440131200467101, "grad_norm": 0.3913913369178772, "learning_rate": 9.295939221993223e-05, "loss": 0.2547286033630371, "step": 17330 }, { "epoch": 0.07444424409469101, "grad_norm": 0.3429929316043854, "learning_rate": 9.29550804998146e-05, "loss": 0.30628602504730223, "step": 17340 }, { "epoch": 0.07448717618471103, "grad_norm": 5.126238822937012, "learning_rate": 9.295076877969698e-05, "loss": 0.3914073944091797, "step": 17350 }, { "epoch": 0.07453010827473103, "grad_norm": 0.3128284215927124, "learning_rate": 9.294645705957935e-05, "loss": 0.22532930374145507, "step": 17360 }, { "epoch": 0.07457304036475104, "grad_norm": 0.21137411892414093, "learning_rate": 9.294214533946173e-05, "loss": 0.27229340076446534, "step": 17370 }, { "epoch": 0.07461597245477104, "grad_norm": 1.3947068452835083, "learning_rate": 9.293783361934411e-05, "loss": 0.19315674304962158, "step": 17380 }, { "epoch": 0.07465890454479104, "grad_norm": 1.3445826768875122, "learning_rate": 9.293352189922649e-05, "loss": 0.44831433296203616, "step": 17390 }, { "epoch": 0.07470183663481106, "grad_norm": 1.0823842287063599, "learning_rate": 9.292921017910886e-05, "loss": 0.1613788366317749, "step": 17400 }, { "epoch": 0.07474476872483106, "grad_norm": 0.2984132170677185, "learning_rate": 9.292489845899124e-05, "loss": 0.18345820903778076, "step": 17410 }, { "epoch": 0.07478770081485107, "grad_norm": 1.5152897834777832, "learning_rate": 9.292058673887362e-05, "loss": 0.29555258750915525, "step": 17420 }, { "epoch": 0.07483063290487107, "grad_norm": 0.08002516627311707, "learning_rate": 9.291627501875598e-05, "loss": 0.29400632381439207, "step": 17430 }, { "epoch": 0.07487356499489108, "grad_norm": 0.10808016359806061, "learning_rate": 9.291196329863836e-05, "loss": 0.2884323835372925, "step": 17440 }, { "epoch": 0.07491649708491109, "grad_norm": 0.11248364299535751, "learning_rate": 9.290765157852074e-05, "loss": 0.0785040020942688, "step": 17450 }, { "epoch": 0.07495942917493109, "grad_norm": 0.34658434987068176, "learning_rate": 9.290333985840311e-05, "loss": 0.33166520595550536, "step": 17460 }, { "epoch": 0.0750023612649511, "grad_norm": 0.7928297519683838, "learning_rate": 9.289902813828549e-05, "loss": 0.3397735834121704, "step": 17470 }, { "epoch": 0.07504529335497111, "grad_norm": 0.6116107106208801, "learning_rate": 9.289471641816787e-05, "loss": 0.04122519195079803, "step": 17480 }, { "epoch": 0.07508822544499111, "grad_norm": 0.1864428073167801, "learning_rate": 9.289040469805025e-05, "loss": 0.18004008531570434, "step": 17490 }, { "epoch": 0.07513115753501112, "grad_norm": 0.0013322310987859964, "learning_rate": 9.288609297793262e-05, "loss": 0.10974700450897217, "step": 17500 }, { "epoch": 0.07517408962503112, "grad_norm": 0.008626694791018963, "learning_rate": 9.288178125781499e-05, "loss": 0.2750270128250122, "step": 17510 }, { "epoch": 0.07521702171505114, "grad_norm": 0.013454783707857132, "learning_rate": 9.287746953769736e-05, "loss": 0.1500556468963623, "step": 17520 }, { "epoch": 0.07525995380507114, "grad_norm": 1.009264349937439, "learning_rate": 9.287315781757975e-05, "loss": 0.15861928462982178, "step": 17530 }, { "epoch": 0.07530288589509114, "grad_norm": 3.1030020713806152, "learning_rate": 9.286884609746213e-05, "loss": 0.32444114685058595, "step": 17540 }, { "epoch": 0.07534581798511115, "grad_norm": 0.42069998383522034, "learning_rate": 9.286453437734451e-05, "loss": 0.23469743728637696, "step": 17550 }, { "epoch": 0.07538875007513116, "grad_norm": 0.8916969299316406, "learning_rate": 9.286022265722689e-05, "loss": 0.251232647895813, "step": 17560 }, { "epoch": 0.07543168216515117, "grad_norm": 0.0047593554481863976, "learning_rate": 9.285591093710926e-05, "loss": 0.39376943111419677, "step": 17570 }, { "epoch": 0.07547461425517117, "grad_norm": 2.4464917182922363, "learning_rate": 9.285159921699164e-05, "loss": 0.27484569549560545, "step": 17580 }, { "epoch": 0.07551754634519117, "grad_norm": 2.3647587299346924, "learning_rate": 9.2847287496874e-05, "loss": 0.35099794864654543, "step": 17590 }, { "epoch": 0.07556047843521119, "grad_norm": 4.215408802032471, "learning_rate": 9.284297577675638e-05, "loss": 0.21538822650909423, "step": 17600 }, { "epoch": 0.07560341052523119, "grad_norm": 0.03410341590642929, "learning_rate": 9.283866405663876e-05, "loss": 0.18266010284423828, "step": 17610 }, { "epoch": 0.0756463426152512, "grad_norm": 6.214269638061523, "learning_rate": 9.283435233652114e-05, "loss": 0.42656545639038085, "step": 17620 }, { "epoch": 0.0756892747052712, "grad_norm": 3.918022871017456, "learning_rate": 9.283004061640351e-05, "loss": 0.14886451959609986, "step": 17630 }, { "epoch": 0.0757322067952912, "grad_norm": 0.05858919024467468, "learning_rate": 9.282572889628589e-05, "loss": 0.48270864486694337, "step": 17640 }, { "epoch": 0.07577513888531122, "grad_norm": 22.52327537536621, "learning_rate": 9.282141717616827e-05, "loss": 0.35639214515686035, "step": 17650 }, { "epoch": 0.07581807097533122, "grad_norm": 0.14872047305107117, "learning_rate": 9.281710545605065e-05, "loss": 0.480879545211792, "step": 17660 }, { "epoch": 0.07586100306535122, "grad_norm": 2.3057806491851807, "learning_rate": 9.281279373593301e-05, "loss": 0.2604722023010254, "step": 17670 }, { "epoch": 0.07590393515537124, "grad_norm": 0.18835517764091492, "learning_rate": 9.280848201581539e-05, "loss": 0.17890266180038453, "step": 17680 }, { "epoch": 0.07594686724539124, "grad_norm": 0.07281363755464554, "learning_rate": 9.280417029569776e-05, "loss": 0.4239158630371094, "step": 17690 }, { "epoch": 0.07598979933541125, "grad_norm": 9.378323554992676, "learning_rate": 9.279985857558014e-05, "loss": 0.19697673320770265, "step": 17700 }, { "epoch": 0.07603273142543125, "grad_norm": 0.8428283333778381, "learning_rate": 9.279554685546252e-05, "loss": 0.234600830078125, "step": 17710 }, { "epoch": 0.07607566351545125, "grad_norm": 0.11949367076158524, "learning_rate": 9.27912351353449e-05, "loss": 0.4152794361114502, "step": 17720 }, { "epoch": 0.07611859560547127, "grad_norm": 0.05498175323009491, "learning_rate": 9.278692341522727e-05, "loss": 0.29344398975372316, "step": 17730 }, { "epoch": 0.07616152769549127, "grad_norm": 0.007519569247961044, "learning_rate": 9.278261169510965e-05, "loss": 0.14354888200759888, "step": 17740 }, { "epoch": 0.07620445978551128, "grad_norm": 2.0580191612243652, "learning_rate": 9.277829997499203e-05, "loss": 0.3214442729949951, "step": 17750 }, { "epoch": 0.07624739187553128, "grad_norm": 0.21962420642375946, "learning_rate": 9.27739882548744e-05, "loss": 0.1612050414085388, "step": 17760 }, { "epoch": 0.07629032396555128, "grad_norm": 0.6961410641670227, "learning_rate": 9.276967653475678e-05, "loss": 0.3876644134521484, "step": 17770 }, { "epoch": 0.0763332560555713, "grad_norm": 0.548738420009613, "learning_rate": 9.276536481463916e-05, "loss": 0.28008925914764404, "step": 17780 }, { "epoch": 0.0763761881455913, "grad_norm": 1.0258835554122925, "learning_rate": 9.276105309452154e-05, "loss": 0.2834671974182129, "step": 17790 }, { "epoch": 0.07641912023561132, "grad_norm": 2.0303940773010254, "learning_rate": 9.275674137440392e-05, "loss": 0.37418713569641116, "step": 17800 }, { "epoch": 0.07646205232563132, "grad_norm": 1.1370363235473633, "learning_rate": 9.275242965428629e-05, "loss": 0.3102121353149414, "step": 17810 }, { "epoch": 0.07650498441565132, "grad_norm": 1.8728364706039429, "learning_rate": 9.274811793416867e-05, "loss": 0.13144179582595825, "step": 17820 }, { "epoch": 0.07654791650567133, "grad_norm": 1.3970445394515991, "learning_rate": 9.274380621405105e-05, "loss": 0.27070481777191163, "step": 17830 }, { "epoch": 0.07659084859569133, "grad_norm": 1.4733984470367432, "learning_rate": 9.273949449393341e-05, "loss": 0.15226986408233642, "step": 17840 }, { "epoch": 0.07663378068571135, "grad_norm": 0.010169444605708122, "learning_rate": 9.273518277381579e-05, "loss": 0.2343198299407959, "step": 17850 }, { "epoch": 0.07667671277573135, "grad_norm": 0.03013860061764717, "learning_rate": 9.273087105369817e-05, "loss": 0.1993506908416748, "step": 17860 }, { "epoch": 0.07671964486575135, "grad_norm": 0.0857686772942543, "learning_rate": 9.272655933358054e-05, "loss": 0.13797582387924195, "step": 17870 }, { "epoch": 0.07676257695577136, "grad_norm": 1.5063636302947998, "learning_rate": 9.272224761346292e-05, "loss": 0.19858624935150146, "step": 17880 }, { "epoch": 0.07680550904579136, "grad_norm": 0.0022428890224546194, "learning_rate": 9.27179358933453e-05, "loss": 0.39183568954467773, "step": 17890 }, { "epoch": 0.07684844113581138, "grad_norm": 1.1863030195236206, "learning_rate": 9.271362417322768e-05, "loss": 0.47901058197021484, "step": 17900 }, { "epoch": 0.07689137322583138, "grad_norm": 0.13905732333660126, "learning_rate": 9.270931245311005e-05, "loss": 0.2854343891143799, "step": 17910 }, { "epoch": 0.07693430531585138, "grad_norm": 0.03768673911690712, "learning_rate": 9.270500073299242e-05, "loss": 0.24338369369506835, "step": 17920 }, { "epoch": 0.0769772374058714, "grad_norm": 0.04136132448911667, "learning_rate": 9.27006890128748e-05, "loss": 0.15599859952926637, "step": 17930 }, { "epoch": 0.0770201694958914, "grad_norm": 0.2402133345603943, "learning_rate": 9.269637729275717e-05, "loss": 0.2983910799026489, "step": 17940 }, { "epoch": 0.07706310158591141, "grad_norm": 0.00554937357082963, "learning_rate": 9.269206557263955e-05, "loss": 0.11619760990142822, "step": 17950 }, { "epoch": 0.07710603367593141, "grad_norm": 1.5368311405181885, "learning_rate": 9.268775385252193e-05, "loss": 0.21799736022949218, "step": 17960 }, { "epoch": 0.07714896576595141, "grad_norm": 1.8025269508361816, "learning_rate": 9.26834421324043e-05, "loss": 0.33750646114349364, "step": 17970 }, { "epoch": 0.07719189785597143, "grad_norm": 0.08740323036909103, "learning_rate": 9.267913041228668e-05, "loss": 0.23000924587249755, "step": 17980 }, { "epoch": 0.07723482994599143, "grad_norm": 2.418018341064453, "learning_rate": 9.267481869216906e-05, "loss": 0.21157240867614746, "step": 17990 }, { "epoch": 0.07727776203601144, "grad_norm": 1.756937026977539, "learning_rate": 9.267050697205144e-05, "loss": 0.42026352882385254, "step": 18000 }, { "epoch": 0.07727776203601144, "eval_loss": 0.49356770515441895, "eval_runtime": 27.51, "eval_samples_per_second": 3.635, "eval_steps_per_second": 3.635, "step": 18000 }, { "epoch": 0.07732069412603144, "grad_norm": 0.012592796236276627, "learning_rate": 9.266619525193381e-05, "loss": 0.015883392095565795, "step": 18010 }, { "epoch": 0.07736362621605145, "grad_norm": 0.0440199077129364, "learning_rate": 9.266188353181619e-05, "loss": 0.3735236167907715, "step": 18020 }, { "epoch": 0.07740655830607146, "grad_norm": 0.01626587100327015, "learning_rate": 9.265757181169857e-05, "loss": 0.1956225872039795, "step": 18030 }, { "epoch": 0.07744949039609146, "grad_norm": 1.1902430057525635, "learning_rate": 9.265326009158094e-05, "loss": 0.4065211772918701, "step": 18040 }, { "epoch": 0.07749242248611148, "grad_norm": 0.0007542611565440893, "learning_rate": 9.264894837146332e-05, "loss": 0.32700634002685547, "step": 18050 }, { "epoch": 0.07753535457613148, "grad_norm": 2.0431017875671387, "learning_rate": 9.26446366513457e-05, "loss": 0.3908370494842529, "step": 18060 }, { "epoch": 0.07757828666615148, "grad_norm": 1.1069626808166504, "learning_rate": 9.264032493122808e-05, "loss": 0.555892038345337, "step": 18070 }, { "epoch": 0.07762121875617149, "grad_norm": 0.035903044044971466, "learning_rate": 9.263601321111044e-05, "loss": 0.0531062126159668, "step": 18080 }, { "epoch": 0.0776641508461915, "grad_norm": 0.05693231150507927, "learning_rate": 9.263170149099282e-05, "loss": 0.09009885191917419, "step": 18090 }, { "epoch": 0.0777070829362115, "grad_norm": 0.026545345783233643, "learning_rate": 9.26273897708752e-05, "loss": 0.29533431529998777, "step": 18100 }, { "epoch": 0.07775001502623151, "grad_norm": 0.12718848884105682, "learning_rate": 9.262307805075757e-05, "loss": 0.13758124113082887, "step": 18110 }, { "epoch": 0.07779294711625151, "grad_norm": 1.6255548000335693, "learning_rate": 9.261876633063995e-05, "loss": 0.2296905994415283, "step": 18120 }, { "epoch": 0.07783587920627152, "grad_norm": 0.0008108015172183514, "learning_rate": 9.261445461052233e-05, "loss": 0.2890913963317871, "step": 18130 }, { "epoch": 0.07787881129629153, "grad_norm": 1.9627443552017212, "learning_rate": 9.26101428904047e-05, "loss": 0.3923152446746826, "step": 18140 }, { "epoch": 0.07792174338631153, "grad_norm": 0.3513317406177521, "learning_rate": 9.260583117028708e-05, "loss": 0.10352857112884521, "step": 18150 }, { "epoch": 0.07796467547633154, "grad_norm": 0.09808113425970078, "learning_rate": 9.260151945016946e-05, "loss": 0.17033973932266236, "step": 18160 }, { "epoch": 0.07800760756635154, "grad_norm": 1.067866325378418, "learning_rate": 9.259720773005182e-05, "loss": 0.21410031318664552, "step": 18170 }, { "epoch": 0.07805053965637156, "grad_norm": 1.5204136371612549, "learning_rate": 9.25928960099342e-05, "loss": 0.1276358723640442, "step": 18180 }, { "epoch": 0.07809347174639156, "grad_norm": 0.0007020228658802807, "learning_rate": 9.258858428981658e-05, "loss": 0.22528154850006105, "step": 18190 }, { "epoch": 0.07813640383641156, "grad_norm": 0.009184078313410282, "learning_rate": 9.258427256969896e-05, "loss": 0.2669683456420898, "step": 18200 }, { "epoch": 0.07817933592643157, "grad_norm": 0.16662456095218658, "learning_rate": 9.257996084958133e-05, "loss": 0.2813804864883423, "step": 18210 }, { "epoch": 0.07822226801645157, "grad_norm": 31.754150390625, "learning_rate": 9.257564912946371e-05, "loss": 0.3360316514968872, "step": 18220 }, { "epoch": 0.07826520010647159, "grad_norm": 2.000892400741577, "learning_rate": 9.257133740934609e-05, "loss": 0.28811111450195315, "step": 18230 }, { "epoch": 0.07830813219649159, "grad_norm": 0.47206950187683105, "learning_rate": 9.256702568922846e-05, "loss": 0.13355579376220703, "step": 18240 }, { "epoch": 0.07835106428651159, "grad_norm": 0.0003992223646491766, "learning_rate": 9.256271396911084e-05, "loss": 0.22010478973388672, "step": 18250 }, { "epoch": 0.0783939963765316, "grad_norm": 0.0260959193110466, "learning_rate": 9.255840224899322e-05, "loss": 0.13738093376159669, "step": 18260 }, { "epoch": 0.0784369284665516, "grad_norm": 1.5283880233764648, "learning_rate": 9.25540905288756e-05, "loss": 0.3932394027709961, "step": 18270 }, { "epoch": 0.07847986055657162, "grad_norm": 0.7194616794586182, "learning_rate": 9.254977880875797e-05, "loss": 0.22366058826446533, "step": 18280 }, { "epoch": 0.07852279264659162, "grad_norm": 6.801660537719727, "learning_rate": 9.254546708864035e-05, "loss": 0.3743177652359009, "step": 18290 }, { "epoch": 0.07856572473661162, "grad_norm": 0.004187764599919319, "learning_rate": 9.254115536852273e-05, "loss": 0.09611039161682129, "step": 18300 }, { "epoch": 0.07860865682663164, "grad_norm": 0.01413058489561081, "learning_rate": 9.25368436484051e-05, "loss": 0.21443462371826172, "step": 18310 }, { "epoch": 0.07865158891665164, "grad_norm": 0.06668446213006973, "learning_rate": 9.253253192828748e-05, "loss": 0.3364569902420044, "step": 18320 }, { "epoch": 0.07869452100667165, "grad_norm": 0.010097499936819077, "learning_rate": 9.252822020816985e-05, "loss": 0.2553986072540283, "step": 18330 }, { "epoch": 0.07873745309669165, "grad_norm": 0.10130496323108673, "learning_rate": 9.252390848805222e-05, "loss": 0.44827828407287595, "step": 18340 }, { "epoch": 0.07878038518671165, "grad_norm": 0.970496654510498, "learning_rate": 9.25195967679346e-05, "loss": 0.2125465154647827, "step": 18350 }, { "epoch": 0.07882331727673167, "grad_norm": 0.12793606519699097, "learning_rate": 9.251528504781698e-05, "loss": 0.3075400829315186, "step": 18360 }, { "epoch": 0.07886624936675167, "grad_norm": 0.03173859789967537, "learning_rate": 9.251097332769936e-05, "loss": 0.13301374912261962, "step": 18370 }, { "epoch": 0.07890918145677168, "grad_norm": 0.9702137112617493, "learning_rate": 9.250666160758173e-05, "loss": 0.501566219329834, "step": 18380 }, { "epoch": 0.07895211354679169, "grad_norm": 1.6415718793869019, "learning_rate": 9.250234988746411e-05, "loss": 0.4016451358795166, "step": 18390 }, { "epoch": 0.07899504563681169, "grad_norm": 0.0025606367271393538, "learning_rate": 9.249803816734649e-05, "loss": 0.18235890865325927, "step": 18400 }, { "epoch": 0.0790379777268317, "grad_norm": 1.0034633874893188, "learning_rate": 9.249372644722885e-05, "loss": 0.37052345275878906, "step": 18410 }, { "epoch": 0.0790809098168517, "grad_norm": 1.0105383396148682, "learning_rate": 9.248941472711123e-05, "loss": 0.1658101797103882, "step": 18420 }, { "epoch": 0.07912384190687172, "grad_norm": 0.20500674843788147, "learning_rate": 9.248510300699361e-05, "loss": 0.07586517333984374, "step": 18430 }, { "epoch": 0.07916677399689172, "grad_norm": 2.6038026809692383, "learning_rate": 9.248079128687598e-05, "loss": 0.3654672384262085, "step": 18440 }, { "epoch": 0.07920970608691172, "grad_norm": 1.0341541767120361, "learning_rate": 9.247647956675836e-05, "loss": 0.18789979219436645, "step": 18450 }, { "epoch": 0.07925263817693173, "grad_norm": 0.005500131286680698, "learning_rate": 9.247216784664074e-05, "loss": 0.25736103057861326, "step": 18460 }, { "epoch": 0.07929557026695173, "grad_norm": 3.110564708709717, "learning_rate": 9.246785612652312e-05, "loss": 0.3830663442611694, "step": 18470 }, { "epoch": 0.07933850235697175, "grad_norm": 27.515901565551758, "learning_rate": 9.24635444064055e-05, "loss": 0.2711588621139526, "step": 18480 }, { "epoch": 0.07938143444699175, "grad_norm": 0.05192455276846886, "learning_rate": 9.245923268628787e-05, "loss": 0.14970123767852783, "step": 18490 }, { "epoch": 0.07942436653701175, "grad_norm": 1.9534826278686523, "learning_rate": 9.245492096617025e-05, "loss": 0.41820321083068845, "step": 18500 }, { "epoch": 0.07946729862703177, "grad_norm": 0.18882907927036285, "learning_rate": 9.245060924605263e-05, "loss": 0.10666049718856811, "step": 18510 }, { "epoch": 0.07951023071705177, "grad_norm": 0.5181287527084351, "learning_rate": 9.2446297525935e-05, "loss": 0.2748467445373535, "step": 18520 }, { "epoch": 0.07955316280707177, "grad_norm": 0.07303071022033691, "learning_rate": 9.244198580581738e-05, "loss": 0.11560168266296386, "step": 18530 }, { "epoch": 0.07959609489709178, "grad_norm": 0.2730436623096466, "learning_rate": 9.243767408569976e-05, "loss": 0.14908556938171386, "step": 18540 }, { "epoch": 0.07963902698711178, "grad_norm": 6.777319431304932, "learning_rate": 9.243336236558214e-05, "loss": 0.21021676063537598, "step": 18550 }, { "epoch": 0.0796819590771318, "grad_norm": 2.380194664001465, "learning_rate": 9.242905064546451e-05, "loss": 0.30106852054595945, "step": 18560 }, { "epoch": 0.0797248911671518, "grad_norm": 1.9388470649719238, "learning_rate": 9.242473892534689e-05, "loss": 0.20236554145812988, "step": 18570 }, { "epoch": 0.0797678232571718, "grad_norm": 3.202451467514038, "learning_rate": 9.242042720522925e-05, "loss": 0.2923529863357544, "step": 18580 }, { "epoch": 0.07981075534719181, "grad_norm": 1.1814517974853516, "learning_rate": 9.241611548511163e-05, "loss": 0.35280370712280273, "step": 18590 }, { "epoch": 0.07985368743721181, "grad_norm": 2.723605155944824, "learning_rate": 9.241180376499401e-05, "loss": 0.31783127784729004, "step": 18600 }, { "epoch": 0.07989661952723183, "grad_norm": 0.038898617029190063, "learning_rate": 9.240749204487639e-05, "loss": 0.35399770736694336, "step": 18610 }, { "epoch": 0.07993955161725183, "grad_norm": 0.9012385010719299, "learning_rate": 9.240318032475876e-05, "loss": 0.29789721965789795, "step": 18620 }, { "epoch": 0.07998248370727183, "grad_norm": 0.15400391817092896, "learning_rate": 9.239886860464114e-05, "loss": 0.1943342924118042, "step": 18630 }, { "epoch": 0.08002541579729185, "grad_norm": 0.08272214233875275, "learning_rate": 9.239455688452352e-05, "loss": 0.1672539234161377, "step": 18640 }, { "epoch": 0.08006834788731185, "grad_norm": 0.004393594805151224, "learning_rate": 9.23902451644059e-05, "loss": 0.10813627243041993, "step": 18650 }, { "epoch": 0.08011127997733186, "grad_norm": 1.657952904701233, "learning_rate": 9.238593344428826e-05, "loss": 0.4301816463470459, "step": 18660 }, { "epoch": 0.08015421206735186, "grad_norm": 2.1112592220306396, "learning_rate": 9.238162172417064e-05, "loss": 0.2593135595321655, "step": 18670 }, { "epoch": 0.08019714415737186, "grad_norm": 1.2312036752700806, "learning_rate": 9.237731000405301e-05, "loss": 0.1060512900352478, "step": 18680 }, { "epoch": 0.08024007624739188, "grad_norm": 0.550669252872467, "learning_rate": 9.237299828393539e-05, "loss": 0.16692020893096923, "step": 18690 }, { "epoch": 0.08028300833741188, "grad_norm": 0.06942453235387802, "learning_rate": 9.236868656381777e-05, "loss": 0.23918561935424804, "step": 18700 }, { "epoch": 0.0803259404274319, "grad_norm": 0.8593615293502808, "learning_rate": 9.236437484370015e-05, "loss": 0.34322171211242675, "step": 18710 }, { "epoch": 0.0803688725174519, "grad_norm": 0.001945764059200883, "learning_rate": 9.236006312358254e-05, "loss": 0.1272782564163208, "step": 18720 }, { "epoch": 0.0804118046074719, "grad_norm": 0.14265984296798706, "learning_rate": 9.235575140346491e-05, "loss": 0.17112067937850953, "step": 18730 }, { "epoch": 0.08045473669749191, "grad_norm": 0.7494179010391235, "learning_rate": 9.235143968334728e-05, "loss": 0.32529516220092775, "step": 18740 }, { "epoch": 0.08049766878751191, "grad_norm": 12.334822654724121, "learning_rate": 9.234712796322965e-05, "loss": 0.3763798952102661, "step": 18750 }, { "epoch": 0.08054060087753193, "grad_norm": 10.992851257324219, "learning_rate": 9.234281624311203e-05, "loss": 0.1684706449508667, "step": 18760 }, { "epoch": 0.08058353296755193, "grad_norm": 0.1401337832212448, "learning_rate": 9.233850452299441e-05, "loss": 0.190047287940979, "step": 18770 }, { "epoch": 0.08062646505757193, "grad_norm": 1.823208212852478, "learning_rate": 9.233419280287679e-05, "loss": 0.2534889459609985, "step": 18780 }, { "epoch": 0.08066939714759194, "grad_norm": 0.029730668291449547, "learning_rate": 9.232988108275916e-05, "loss": 0.18909434080123902, "step": 18790 }, { "epoch": 0.08071232923761194, "grad_norm": 2.487135171890259, "learning_rate": 9.232556936264154e-05, "loss": 0.2689740896224976, "step": 18800 }, { "epoch": 0.08075526132763196, "grad_norm": 0.8337386250495911, "learning_rate": 9.232125764252392e-05, "loss": 0.22457048892974854, "step": 18810 }, { "epoch": 0.08079819341765196, "grad_norm": 8.491974830627441, "learning_rate": 9.231694592240628e-05, "loss": 0.1307414174079895, "step": 18820 }, { "epoch": 0.08084112550767196, "grad_norm": 0.0009186516981571913, "learning_rate": 9.231263420228866e-05, "loss": 0.16259074211120605, "step": 18830 }, { "epoch": 0.08088405759769197, "grad_norm": 0.029363462701439857, "learning_rate": 9.230832248217104e-05, "loss": 0.30105087757110593, "step": 18840 }, { "epoch": 0.08092698968771198, "grad_norm": 0.021574202924966812, "learning_rate": 9.230401076205341e-05, "loss": 0.17228692770004272, "step": 18850 }, { "epoch": 0.08096992177773199, "grad_norm": 0.005163417663425207, "learning_rate": 9.229969904193579e-05, "loss": 0.40862216949462893, "step": 18860 }, { "epoch": 0.08101285386775199, "grad_norm": 0.28390929102897644, "learning_rate": 9.229538732181817e-05, "loss": 0.1912772536277771, "step": 18870 }, { "epoch": 0.08105578595777199, "grad_norm": 2.644794225692749, "learning_rate": 9.229107560170055e-05, "loss": 0.5370799541473389, "step": 18880 }, { "epoch": 0.081098718047792, "grad_norm": 0.1604711264371872, "learning_rate": 9.228676388158292e-05, "loss": 0.3262667179107666, "step": 18890 }, { "epoch": 0.08114165013781201, "grad_norm": 0.0013925611274316907, "learning_rate": 9.22824521614653e-05, "loss": 0.1252922773361206, "step": 18900 }, { "epoch": 0.08118458222783201, "grad_norm": 0.019793476909399033, "learning_rate": 9.227814044134767e-05, "loss": 0.25236876010894777, "step": 18910 }, { "epoch": 0.08122751431785202, "grad_norm": 0.09192702174186707, "learning_rate": 9.227382872123004e-05, "loss": 0.1882225513458252, "step": 18920 }, { "epoch": 0.08127044640787202, "grad_norm": 0.003928286023437977, "learning_rate": 9.226951700111242e-05, "loss": 0.21287171840667723, "step": 18930 }, { "epoch": 0.08131337849789204, "grad_norm": 0.1621057242155075, "learning_rate": 9.226520528099481e-05, "loss": 0.0935452401638031, "step": 18940 }, { "epoch": 0.08135631058791204, "grad_norm": 0.14026261866092682, "learning_rate": 9.226089356087719e-05, "loss": 0.2502132415771484, "step": 18950 }, { "epoch": 0.08139924267793204, "grad_norm": 0.028061900287866592, "learning_rate": 9.225658184075957e-05, "loss": 0.07642927169799804, "step": 18960 }, { "epoch": 0.08144217476795206, "grad_norm": 0.06425946205854416, "learning_rate": 9.225227012064194e-05, "loss": 0.17422356605529785, "step": 18970 }, { "epoch": 0.08148510685797206, "grad_norm": 1.6507774591445923, "learning_rate": 9.224795840052432e-05, "loss": 0.2827779293060303, "step": 18980 }, { "epoch": 0.08152803894799207, "grad_norm": 0.08310697227716446, "learning_rate": 9.224364668040668e-05, "loss": 0.3493364334106445, "step": 18990 }, { "epoch": 0.08157097103801207, "grad_norm": 1.1928002834320068, "learning_rate": 9.223933496028906e-05, "loss": 0.48784561157226564, "step": 19000 }, { "epoch": 0.08157097103801207, "eval_loss": 0.5041004419326782, "eval_runtime": 27.5172, "eval_samples_per_second": 3.634, "eval_steps_per_second": 3.634, "step": 19000 }, { "epoch": 0.08161390312803207, "grad_norm": 0.13684290647506714, "learning_rate": 9.223502324017144e-05, "loss": 0.4936811447143555, "step": 19010 }, { "epoch": 0.08165683521805209, "grad_norm": 0.04186616465449333, "learning_rate": 9.223071152005382e-05, "loss": 0.23352341651916503, "step": 19020 }, { "epoch": 0.08169976730807209, "grad_norm": 0.44165217876434326, "learning_rate": 9.22263997999362e-05, "loss": 0.2601867437362671, "step": 19030 }, { "epoch": 0.0817426993980921, "grad_norm": 0.014664696529507637, "learning_rate": 9.222208807981857e-05, "loss": 0.06263558268547058, "step": 19040 }, { "epoch": 0.0817856314881121, "grad_norm": 2.0226991176605225, "learning_rate": 9.221777635970095e-05, "loss": 0.3118800163269043, "step": 19050 }, { "epoch": 0.0818285635781321, "grad_norm": 1.483991265296936, "learning_rate": 9.221346463958333e-05, "loss": 0.20840089321136473, "step": 19060 }, { "epoch": 0.08187149566815212, "grad_norm": 0.1026201993227005, "learning_rate": 9.220915291946569e-05, "loss": 0.20031397342681884, "step": 19070 }, { "epoch": 0.08191442775817212, "grad_norm": 0.11774060875177383, "learning_rate": 9.220484119934807e-05, "loss": 0.19844886064529418, "step": 19080 }, { "epoch": 0.08195735984819214, "grad_norm": 0.018591005355119705, "learning_rate": 9.220052947923044e-05, "loss": 0.2688950538635254, "step": 19090 }, { "epoch": 0.08200029193821214, "grad_norm": 0.03999682515859604, "learning_rate": 9.219621775911282e-05, "loss": 0.3359368324279785, "step": 19100 }, { "epoch": 0.08204322402823214, "grad_norm": 0.0394248366355896, "learning_rate": 9.21919060389952e-05, "loss": 0.2790042877197266, "step": 19110 }, { "epoch": 0.08208615611825215, "grad_norm": 2.473489284515381, "learning_rate": 9.218759431887758e-05, "loss": 0.2525103807449341, "step": 19120 }, { "epoch": 0.08212908820827215, "grad_norm": 3.0899598598480225, "learning_rate": 9.218328259875995e-05, "loss": 0.22865710258483887, "step": 19130 }, { "epoch": 0.08217202029829217, "grad_norm": 1.9980636835098267, "learning_rate": 9.217897087864233e-05, "loss": 0.3162590980529785, "step": 19140 }, { "epoch": 0.08221495238831217, "grad_norm": 0.26501959562301636, "learning_rate": 9.21746591585247e-05, "loss": 0.3742363691329956, "step": 19150 }, { "epoch": 0.08225788447833217, "grad_norm": 2.546135425567627, "learning_rate": 9.217034743840709e-05, "loss": 0.13382744789123535, "step": 19160 }, { "epoch": 0.08230081656835218, "grad_norm": 0.9495781064033508, "learning_rate": 9.216603571828946e-05, "loss": 0.3787665843963623, "step": 19170 }, { "epoch": 0.08234374865837218, "grad_norm": 0.04306304082274437, "learning_rate": 9.216172399817184e-05, "loss": 0.2250833511352539, "step": 19180 }, { "epoch": 0.0823866807483922, "grad_norm": 3.428227424621582, "learning_rate": 9.215741227805422e-05, "loss": 0.6014655590057373, "step": 19190 }, { "epoch": 0.0824296128384122, "grad_norm": 0.19456183910369873, "learning_rate": 9.21531005579366e-05, "loss": 0.13260576725006104, "step": 19200 }, { "epoch": 0.0824725449284322, "grad_norm": 0.9759875535964966, "learning_rate": 9.214878883781897e-05, "loss": 0.23652286529541017, "step": 19210 }, { "epoch": 0.08251547701845222, "grad_norm": 1.5591341257095337, "learning_rate": 9.214447711770135e-05, "loss": 0.3411963701248169, "step": 19220 }, { "epoch": 0.08255840910847222, "grad_norm": 0.06276403367519379, "learning_rate": 9.214016539758371e-05, "loss": 0.46163148880004884, "step": 19230 }, { "epoch": 0.08260134119849223, "grad_norm": 0.541723370552063, "learning_rate": 9.213585367746609e-05, "loss": 0.33110618591308594, "step": 19240 }, { "epoch": 0.08264427328851223, "grad_norm": 1.5728428363800049, "learning_rate": 9.213154195734847e-05, "loss": 0.28666555881500244, "step": 19250 }, { "epoch": 0.08268720537853223, "grad_norm": 0.13925987482070923, "learning_rate": 9.212723023723085e-05, "loss": 0.09053775668144226, "step": 19260 }, { "epoch": 0.08273013746855225, "grad_norm": 0.09296605736017227, "learning_rate": 9.212291851711322e-05, "loss": 0.2339235782623291, "step": 19270 }, { "epoch": 0.08277306955857225, "grad_norm": 1.1923575401306152, "learning_rate": 9.21186067969956e-05, "loss": 0.2979546546936035, "step": 19280 }, { "epoch": 0.08281600164859226, "grad_norm": 0.02540683001279831, "learning_rate": 9.211429507687798e-05, "loss": 0.3172282695770264, "step": 19290 }, { "epoch": 0.08285893373861226, "grad_norm": 0.009578239172697067, "learning_rate": 9.210998335676035e-05, "loss": 0.16685105562210084, "step": 19300 }, { "epoch": 0.08290186582863227, "grad_norm": 0.07192831486463547, "learning_rate": 9.210567163664273e-05, "loss": 0.0985795497894287, "step": 19310 }, { "epoch": 0.08294479791865228, "grad_norm": 0.5271665453910828, "learning_rate": 9.21013599165251e-05, "loss": 0.2767664432525635, "step": 19320 }, { "epoch": 0.08298773000867228, "grad_norm": 0.2542556822299957, "learning_rate": 9.209704819640747e-05, "loss": 0.27911207675933836, "step": 19330 }, { "epoch": 0.08303066209869228, "grad_norm": 0.19310764968395233, "learning_rate": 9.209273647628985e-05, "loss": 0.2386990785598755, "step": 19340 }, { "epoch": 0.0830735941887123, "grad_norm": 1.4710135459899902, "learning_rate": 9.208842475617223e-05, "loss": 0.4310801029205322, "step": 19350 }, { "epoch": 0.0831165262787323, "grad_norm": 1.1130155324935913, "learning_rate": 9.20841130360546e-05, "loss": 0.15598387718200685, "step": 19360 }, { "epoch": 0.08315945836875231, "grad_norm": 0.05505505949258804, "learning_rate": 9.207980131593698e-05, "loss": 0.33764450550079345, "step": 19370 }, { "epoch": 0.08320239045877231, "grad_norm": 0.062077395617961884, "learning_rate": 9.207548959581936e-05, "loss": 0.12927793264389037, "step": 19380 }, { "epoch": 0.08324532254879231, "grad_norm": 0.887370228767395, "learning_rate": 9.207117787570174e-05, "loss": 0.42006869316101075, "step": 19390 }, { "epoch": 0.08328825463881233, "grad_norm": 3.715780258178711, "learning_rate": 9.206686615558411e-05, "loss": 0.28740389347076417, "step": 19400 }, { "epoch": 0.08333118672883233, "grad_norm": 0.3408714532852173, "learning_rate": 9.206255443546649e-05, "loss": 0.2963599681854248, "step": 19410 }, { "epoch": 0.08337411881885234, "grad_norm": 0.04663284495472908, "learning_rate": 9.205824271534887e-05, "loss": 0.33824911117553713, "step": 19420 }, { "epoch": 0.08341705090887234, "grad_norm": 0.008326475508511066, "learning_rate": 9.205393099523125e-05, "loss": 0.1826784133911133, "step": 19430 }, { "epoch": 0.08345998299889235, "grad_norm": 0.11303720623254776, "learning_rate": 9.204961927511362e-05, "loss": 0.27020950317382814, "step": 19440 }, { "epoch": 0.08350291508891236, "grad_norm": 0.08869299292564392, "learning_rate": 9.2045307554996e-05, "loss": 0.10644828081130982, "step": 19450 }, { "epoch": 0.08354584717893236, "grad_norm": 0.030560927465558052, "learning_rate": 9.204099583487838e-05, "loss": 0.04898174703121185, "step": 19460 }, { "epoch": 0.08358877926895238, "grad_norm": 1.178162932395935, "learning_rate": 9.203668411476076e-05, "loss": 0.2633205413818359, "step": 19470 }, { "epoch": 0.08363171135897238, "grad_norm": 0.07836475223302841, "learning_rate": 9.203237239464312e-05, "loss": 0.21295971870422364, "step": 19480 }, { "epoch": 0.08367464344899238, "grad_norm": 0.024082181975245476, "learning_rate": 9.20280606745255e-05, "loss": 0.13825159072875975, "step": 19490 }, { "epoch": 0.08371757553901239, "grad_norm": 2.2076027393341064, "learning_rate": 9.202374895440787e-05, "loss": 0.2961868762969971, "step": 19500 }, { "epoch": 0.0837605076290324, "grad_norm": 8.747289657592773, "learning_rate": 9.201943723429025e-05, "loss": 0.13476651906967163, "step": 19510 }, { "epoch": 0.08380343971905241, "grad_norm": 0.07265298813581467, "learning_rate": 9.201512551417263e-05, "loss": 0.3324207067489624, "step": 19520 }, { "epoch": 0.08384637180907241, "grad_norm": 1.4036122560501099, "learning_rate": 9.2010813794055e-05, "loss": 0.20782277584075928, "step": 19530 }, { "epoch": 0.08388930389909241, "grad_norm": 0.006723584607243538, "learning_rate": 9.200650207393738e-05, "loss": 0.14426236152648925, "step": 19540 }, { "epoch": 0.08393223598911242, "grad_norm": 1.0080065727233887, "learning_rate": 9.200219035381976e-05, "loss": 0.268358039855957, "step": 19550 }, { "epoch": 0.08397516807913243, "grad_norm": 1.6201801300048828, "learning_rate": 9.199787863370212e-05, "loss": 0.42645840644836425, "step": 19560 }, { "epoch": 0.08401810016915244, "grad_norm": 0.13518543541431427, "learning_rate": 9.19935669135845e-05, "loss": 0.12359261512756348, "step": 19570 }, { "epoch": 0.08406103225917244, "grad_norm": 1.122020959854126, "learning_rate": 9.198925519346688e-05, "loss": 0.5048614025115967, "step": 19580 }, { "epoch": 0.08410396434919244, "grad_norm": 1.1860013008117676, "learning_rate": 9.198494347334926e-05, "loss": 0.17101879119873048, "step": 19590 }, { "epoch": 0.08414689643921246, "grad_norm": 1.223297119140625, "learning_rate": 9.198063175323163e-05, "loss": 0.25258498191833495, "step": 19600 }, { "epoch": 0.08418982852923246, "grad_norm": 4.542642116546631, "learning_rate": 9.197632003311401e-05, "loss": 0.2318887710571289, "step": 19610 }, { "epoch": 0.08423276061925247, "grad_norm": 0.05279264226555824, "learning_rate": 9.197200831299639e-05, "loss": 0.19677789211273194, "step": 19620 }, { "epoch": 0.08427569270927247, "grad_norm": 0.022767324000597, "learning_rate": 9.196769659287877e-05, "loss": 0.1827967047691345, "step": 19630 }, { "epoch": 0.08431862479929247, "grad_norm": 0.9736915230751038, "learning_rate": 9.196338487276114e-05, "loss": 0.39751832485198973, "step": 19640 }, { "epoch": 0.08436155688931249, "grad_norm": 7.58955717086792, "learning_rate": 9.195907315264352e-05, "loss": 0.40766420364379885, "step": 19650 }, { "epoch": 0.08440448897933249, "grad_norm": 0.9952966570854187, "learning_rate": 9.19547614325259e-05, "loss": 0.21652648448944092, "step": 19660 }, { "epoch": 0.0844474210693525, "grad_norm": 3.723085641860962, "learning_rate": 9.195044971240828e-05, "loss": 0.3138508081436157, "step": 19670 }, { "epoch": 0.0844903531593725, "grad_norm": 0.11735602468252182, "learning_rate": 9.194613799229065e-05, "loss": 0.2222294807434082, "step": 19680 }, { "epoch": 0.0845332852493925, "grad_norm": 0.11522156000137329, "learning_rate": 9.194182627217303e-05, "loss": 0.29770517349243164, "step": 19690 }, { "epoch": 0.08457621733941252, "grad_norm": 0.09208790957927704, "learning_rate": 9.193751455205541e-05, "loss": 0.1982753038406372, "step": 19700 }, { "epoch": 0.08461914942943252, "grad_norm": 1.2311041355133057, "learning_rate": 9.193320283193778e-05, "loss": 0.3996951818466187, "step": 19710 }, { "epoch": 0.08466208151945254, "grad_norm": 12.743633270263672, "learning_rate": 9.192889111182016e-05, "loss": 0.21535904407501222, "step": 19720 }, { "epoch": 0.08470501360947254, "grad_norm": 0.0034743843134492636, "learning_rate": 9.192457939170253e-05, "loss": 0.18522260189056397, "step": 19730 }, { "epoch": 0.08474794569949254, "grad_norm": 1.9996086359024048, "learning_rate": 9.19202676715849e-05, "loss": 0.29746198654174805, "step": 19740 }, { "epoch": 0.08479087778951255, "grad_norm": 1.7059917449951172, "learning_rate": 9.191595595146728e-05, "loss": 0.24819631576538087, "step": 19750 }, { "epoch": 0.08483380987953255, "grad_norm": 0.013688395731151104, "learning_rate": 9.191164423134966e-05, "loss": 0.13482595682144166, "step": 19760 }, { "epoch": 0.08487674196955255, "grad_norm": 0.7230508327484131, "learning_rate": 9.190733251123204e-05, "loss": 0.22907421588897706, "step": 19770 }, { "epoch": 0.08491967405957257, "grad_norm": 0.7261434197425842, "learning_rate": 9.190302079111441e-05, "loss": 0.2959503173828125, "step": 19780 }, { "epoch": 0.08496260614959257, "grad_norm": 0.03716635704040527, "learning_rate": 9.189870907099679e-05, "loss": 0.15986857414245606, "step": 19790 }, { "epoch": 0.08500553823961259, "grad_norm": 0.005048257298767567, "learning_rate": 9.189439735087917e-05, "loss": 0.1239315152168274, "step": 19800 }, { "epoch": 0.08504847032963259, "grad_norm": 19.985151290893555, "learning_rate": 9.189008563076153e-05, "loss": 0.24769895076751708, "step": 19810 }, { "epoch": 0.08509140241965259, "grad_norm": 0.040633995085954666, "learning_rate": 9.188577391064391e-05, "loss": 0.15116959810256958, "step": 19820 }, { "epoch": 0.0851343345096726, "grad_norm": 0.0350426509976387, "learning_rate": 9.188146219052629e-05, "loss": 0.23688271045684814, "step": 19830 }, { "epoch": 0.0851772665996926, "grad_norm": 0.7044867873191833, "learning_rate": 9.187715047040866e-05, "loss": 0.29199187755584716, "step": 19840 }, { "epoch": 0.08522019868971262, "grad_norm": 0.028381457552313805, "learning_rate": 9.187283875029104e-05, "loss": 0.16368452310562134, "step": 19850 }, { "epoch": 0.08526313077973262, "grad_norm": 0.10046995431184769, "learning_rate": 9.186852703017342e-05, "loss": 0.4022871971130371, "step": 19860 }, { "epoch": 0.08530606286975262, "grad_norm": 2.002215623855591, "learning_rate": 9.18642153100558e-05, "loss": 0.2921769618988037, "step": 19870 }, { "epoch": 0.08534899495977263, "grad_norm": 1.6824100017547607, "learning_rate": 9.185990358993817e-05, "loss": 0.2920323610305786, "step": 19880 }, { "epoch": 0.08539192704979263, "grad_norm": 0.01267918385565281, "learning_rate": 9.185559186982055e-05, "loss": 0.45812501907348635, "step": 19890 }, { "epoch": 0.08543485913981265, "grad_norm": 0.0032960656099021435, "learning_rate": 9.185128014970293e-05, "loss": 0.1701305627822876, "step": 19900 }, { "epoch": 0.08547779122983265, "grad_norm": 0.017295779660344124, "learning_rate": 9.18469684295853e-05, "loss": 0.3156233310699463, "step": 19910 }, { "epoch": 0.08552072331985265, "grad_norm": 0.10581665486097336, "learning_rate": 9.184265670946768e-05, "loss": 0.1895419478416443, "step": 19920 }, { "epoch": 0.08556365540987267, "grad_norm": 2.9387333393096924, "learning_rate": 9.183834498935006e-05, "loss": 0.20859913825988768, "step": 19930 }, { "epoch": 0.08560658749989267, "grad_norm": 0.010136442258954048, "learning_rate": 9.183403326923244e-05, "loss": 0.26719396114349364, "step": 19940 }, { "epoch": 0.08564951958991268, "grad_norm": 3.116539716720581, "learning_rate": 9.182972154911481e-05, "loss": 0.15150291919708253, "step": 19950 }, { "epoch": 0.08569245167993268, "grad_norm": 1.7744215726852417, "learning_rate": 9.182540982899719e-05, "loss": 0.3903425931930542, "step": 19960 }, { "epoch": 0.08573538376995268, "grad_norm": 0.006902491673827171, "learning_rate": 9.182109810887957e-05, "loss": 0.09915790557861329, "step": 19970 }, { "epoch": 0.0857783158599727, "grad_norm": 0.19755761325359344, "learning_rate": 9.181678638876193e-05, "loss": 0.21800611019134522, "step": 19980 }, { "epoch": 0.0858212479499927, "grad_norm": 0.004093521274626255, "learning_rate": 9.181247466864431e-05, "loss": 0.43779420852661133, "step": 19990 }, { "epoch": 0.08586418004001271, "grad_norm": 0.04544459655880928, "learning_rate": 9.180816294852669e-05, "loss": 0.19077495336532593, "step": 20000 }, { "epoch": 0.08586418004001271, "eval_loss": 0.5211971402168274, "eval_runtime": 27.4516, "eval_samples_per_second": 3.643, "eval_steps_per_second": 3.643, "step": 20000 }, { "epoch": 0.08590711213003271, "grad_norm": 2.292238712310791, "learning_rate": 9.180385122840906e-05, "loss": 0.22309489250183107, "step": 20010 }, { "epoch": 0.08595004422005272, "grad_norm": 0.13180018961429596, "learning_rate": 9.179953950829144e-05, "loss": 0.5180996894836426, "step": 20020 }, { "epoch": 0.08599297631007273, "grad_norm": 0.03309754282236099, "learning_rate": 9.179522778817382e-05, "loss": 0.28703348636627196, "step": 20030 }, { "epoch": 0.08603590840009273, "grad_norm": 1.801064133644104, "learning_rate": 9.17909160680562e-05, "loss": 0.34079132080078123, "step": 20040 }, { "epoch": 0.08607884049011275, "grad_norm": 0.1331978440284729, "learning_rate": 9.178660434793857e-05, "loss": 0.09699448347091674, "step": 20050 }, { "epoch": 0.08612177258013275, "grad_norm": 0.053721651434898376, "learning_rate": 9.178229262782094e-05, "loss": 0.06542769670486451, "step": 20060 }, { "epoch": 0.08616470467015275, "grad_norm": 0.33402219414711, "learning_rate": 9.177798090770331e-05, "loss": 0.282747745513916, "step": 20070 }, { "epoch": 0.08620763676017276, "grad_norm": 0.03361477330327034, "learning_rate": 9.177366918758569e-05, "loss": 0.15889936685562134, "step": 20080 }, { "epoch": 0.08625056885019276, "grad_norm": 0.0654355138540268, "learning_rate": 9.176935746746807e-05, "loss": 0.34210996627807616, "step": 20090 }, { "epoch": 0.08629350094021278, "grad_norm": 0.4426393210887909, "learning_rate": 9.176504574735045e-05, "loss": 0.16542912721633912, "step": 20100 }, { "epoch": 0.08633643303023278, "grad_norm": 0.10327360033988953, "learning_rate": 9.176073402723282e-05, "loss": 0.21687026023864747, "step": 20110 }, { "epoch": 0.08637936512025278, "grad_norm": 0.19463887810707092, "learning_rate": 9.175642230711522e-05, "loss": 0.31909153461456297, "step": 20120 }, { "epoch": 0.0864222972102728, "grad_norm": 0.1267862170934677, "learning_rate": 9.175211058699759e-05, "loss": 0.20828518867492676, "step": 20130 }, { "epoch": 0.0864652293002928, "grad_norm": 0.4136047959327698, "learning_rate": 9.174779886687996e-05, "loss": 0.08652875423431397, "step": 20140 }, { "epoch": 0.08650816139031281, "grad_norm": 0.04703577607870102, "learning_rate": 9.174348714676233e-05, "loss": 0.17158129215240478, "step": 20150 }, { "epoch": 0.08655109348033281, "grad_norm": 3.910243511199951, "learning_rate": 9.173917542664471e-05, "loss": 0.2885754108428955, "step": 20160 }, { "epoch": 0.08659402557035281, "grad_norm": 0.11007514595985413, "learning_rate": 9.173486370652709e-05, "loss": 0.2936608076095581, "step": 20170 }, { "epoch": 0.08663695766037283, "grad_norm": 0.2206745147705078, "learning_rate": 9.173055198640947e-05, "loss": 0.2527461528778076, "step": 20180 }, { "epoch": 0.08667988975039283, "grad_norm": 0.09681833535432816, "learning_rate": 9.172624026629184e-05, "loss": 0.13297202587127685, "step": 20190 }, { "epoch": 0.08672282184041283, "grad_norm": 1.4935474395751953, "learning_rate": 9.172192854617422e-05, "loss": 0.10676318407058716, "step": 20200 }, { "epoch": 0.08676575393043284, "grad_norm": 0.0713653564453125, "learning_rate": 9.17176168260566e-05, "loss": 0.43478784561157224, "step": 20210 }, { "epoch": 0.08680868602045284, "grad_norm": 0.05121508985757828, "learning_rate": 9.171330510593896e-05, "loss": 0.11575621366500854, "step": 20220 }, { "epoch": 0.08685161811047286, "grad_norm": 0.10139039158821106, "learning_rate": 9.170899338582134e-05, "loss": 0.26717281341552734, "step": 20230 }, { "epoch": 0.08689455020049286, "grad_norm": 0.2904765009880066, "learning_rate": 9.170468166570372e-05, "loss": 0.2693187236785889, "step": 20240 }, { "epoch": 0.08693748229051286, "grad_norm": 0.34784597158432007, "learning_rate": 9.17003699455861e-05, "loss": 0.10214605331420898, "step": 20250 }, { "epoch": 0.08698041438053287, "grad_norm": 1.9216970205307007, "learning_rate": 9.169605822546847e-05, "loss": 0.15444643497467042, "step": 20260 }, { "epoch": 0.08702334647055288, "grad_norm": 0.014310669153928757, "learning_rate": 9.169174650535085e-05, "loss": 0.3466238260269165, "step": 20270 }, { "epoch": 0.08706627856057289, "grad_norm": 0.15292063355445862, "learning_rate": 9.168743478523323e-05, "loss": 0.18252546787261964, "step": 20280 }, { "epoch": 0.08710921065059289, "grad_norm": 0.7003419995307922, "learning_rate": 9.16831230651156e-05, "loss": 0.18673573732376098, "step": 20290 }, { "epoch": 0.08715214274061289, "grad_norm": 0.32834747433662415, "learning_rate": 9.167881134499797e-05, "loss": 0.25514419078826905, "step": 20300 }, { "epoch": 0.08719507483063291, "grad_norm": 0.07238604873418808, "learning_rate": 9.167449962488034e-05, "loss": 0.28299875259399415, "step": 20310 }, { "epoch": 0.08723800692065291, "grad_norm": 0.18142496049404144, "learning_rate": 9.167018790476272e-05, "loss": 0.10401992797851563, "step": 20320 }, { "epoch": 0.08728093901067292, "grad_norm": 1.2397515773773193, "learning_rate": 9.16658761846451e-05, "loss": 0.4503783702850342, "step": 20330 }, { "epoch": 0.08732387110069292, "grad_norm": 0.0753837451338768, "learning_rate": 9.166156446452749e-05, "loss": 0.20443589687347413, "step": 20340 }, { "epoch": 0.08736680319071292, "grad_norm": 0.766108512878418, "learning_rate": 9.165725274440987e-05, "loss": 0.2131603479385376, "step": 20350 }, { "epoch": 0.08740973528073294, "grad_norm": 0.26683467626571655, "learning_rate": 9.165294102429224e-05, "loss": 0.3456669092178345, "step": 20360 }, { "epoch": 0.08745266737075294, "grad_norm": 7.378421783447266, "learning_rate": 9.164862930417462e-05, "loss": 0.23649468421936035, "step": 20370 }, { "epoch": 0.08749559946077295, "grad_norm": 6.665241241455078, "learning_rate": 9.1644317584057e-05, "loss": 0.19338738918304443, "step": 20380 }, { "epoch": 0.08753853155079296, "grad_norm": 1.4870343208312988, "learning_rate": 9.164000586393936e-05, "loss": 0.2298222303390503, "step": 20390 }, { "epoch": 0.08758146364081296, "grad_norm": 14.525195121765137, "learning_rate": 9.163569414382174e-05, "loss": 0.19451080560684203, "step": 20400 }, { "epoch": 0.08762439573083297, "grad_norm": 5.415810585021973, "learning_rate": 9.163138242370412e-05, "loss": 0.13900411128997803, "step": 20410 }, { "epoch": 0.08766732782085297, "grad_norm": 1.1069865226745605, "learning_rate": 9.16270707035865e-05, "loss": 0.5359042644500732, "step": 20420 }, { "epoch": 0.08771025991087299, "grad_norm": 0.052934516221284866, "learning_rate": 9.162275898346887e-05, "loss": 0.27098309993743896, "step": 20430 }, { "epoch": 0.08775319200089299, "grad_norm": 0.051988791674375534, "learning_rate": 9.161844726335125e-05, "loss": 0.14990001916885376, "step": 20440 }, { "epoch": 0.08779612409091299, "grad_norm": 1.257656216621399, "learning_rate": 9.161413554323363e-05, "loss": 0.28091883659362793, "step": 20450 }, { "epoch": 0.087839056180933, "grad_norm": 0.08454004675149918, "learning_rate": 9.1609823823116e-05, "loss": 0.23843903541564943, "step": 20460 }, { "epoch": 0.087881988270953, "grad_norm": 0.5705059766769409, "learning_rate": 9.160551210299837e-05, "loss": 0.30336081981658936, "step": 20470 }, { "epoch": 0.08792492036097302, "grad_norm": 2.9336891174316406, "learning_rate": 9.160120038288075e-05, "loss": 0.503492784500122, "step": 20480 }, { "epoch": 0.08796785245099302, "grad_norm": 0.20187604427337646, "learning_rate": 9.159688866276312e-05, "loss": 0.27225394248962403, "step": 20490 }, { "epoch": 0.08801078454101302, "grad_norm": 0.04096986725926399, "learning_rate": 9.15925769426455e-05, "loss": 0.23516056537628174, "step": 20500 }, { "epoch": 0.08805371663103304, "grad_norm": 0.0220347847789526, "learning_rate": 9.158826522252788e-05, "loss": 0.2556183099746704, "step": 20510 }, { "epoch": 0.08809664872105304, "grad_norm": 0.33302149176597595, "learning_rate": 9.158395350241025e-05, "loss": 0.17670719623565673, "step": 20520 }, { "epoch": 0.08813958081107305, "grad_norm": 2.4029204845428467, "learning_rate": 9.157964178229263e-05, "loss": 0.20500121116638184, "step": 20530 }, { "epoch": 0.08818251290109305, "grad_norm": 4.5730180740356445, "learning_rate": 9.157533006217501e-05, "loss": 0.2645660161972046, "step": 20540 }, { "epoch": 0.08822544499111305, "grad_norm": 0.03342090919613838, "learning_rate": 9.157101834205737e-05, "loss": 0.1793131113052368, "step": 20550 }, { "epoch": 0.08826837708113307, "grad_norm": 1.0915799140930176, "learning_rate": 9.156670662193976e-05, "loss": 0.24776463508605956, "step": 20560 }, { "epoch": 0.08831130917115307, "grad_norm": 0.20644307136535645, "learning_rate": 9.156239490182214e-05, "loss": 0.06601312756538391, "step": 20570 }, { "epoch": 0.08835424126117308, "grad_norm": 1.0235928297042847, "learning_rate": 9.155808318170452e-05, "loss": 0.3110778331756592, "step": 20580 }, { "epoch": 0.08839717335119308, "grad_norm": 1.791678786277771, "learning_rate": 9.15537714615869e-05, "loss": 0.16456762552261353, "step": 20590 }, { "epoch": 0.08844010544121308, "grad_norm": 0.8701760172843933, "learning_rate": 9.154945974146927e-05, "loss": 0.17454179525375366, "step": 20600 }, { "epoch": 0.0884830375312331, "grad_norm": 0.022485675290226936, "learning_rate": 9.154514802135165e-05, "loss": 0.004853111878037453, "step": 20610 }, { "epoch": 0.0885259696212531, "grad_norm": 4.354240417480469, "learning_rate": 9.154083630123403e-05, "loss": 0.42849555015563967, "step": 20620 }, { "epoch": 0.0885689017112731, "grad_norm": 0.08281465619802475, "learning_rate": 9.153652458111639e-05, "loss": 0.24617836475372315, "step": 20630 }, { "epoch": 0.08861183380129312, "grad_norm": 0.5946925282478333, "learning_rate": 9.153221286099877e-05, "loss": 0.2575437068939209, "step": 20640 }, { "epoch": 0.08865476589131312, "grad_norm": 0.037425171583890915, "learning_rate": 9.152790114088115e-05, "loss": 0.3338440418243408, "step": 20650 }, { "epoch": 0.08869769798133313, "grad_norm": 2.2693583965301514, "learning_rate": 9.152358942076352e-05, "loss": 0.2294626235961914, "step": 20660 }, { "epoch": 0.08874063007135313, "grad_norm": 3.0021560192108154, "learning_rate": 9.15192777006459e-05, "loss": 0.19063858985900878, "step": 20670 }, { "epoch": 0.08878356216137313, "grad_norm": 0.05422629788517952, "learning_rate": 9.151496598052828e-05, "loss": 0.3820706129074097, "step": 20680 }, { "epoch": 0.08882649425139315, "grad_norm": 1.2230654954910278, "learning_rate": 9.151065426041066e-05, "loss": 0.27535200119018555, "step": 20690 }, { "epoch": 0.08886942634141315, "grad_norm": 0.09059865027666092, "learning_rate": 9.150634254029303e-05, "loss": 0.3081681728363037, "step": 20700 }, { "epoch": 0.08891235843143316, "grad_norm": 0.75705486536026, "learning_rate": 9.150203082017541e-05, "loss": 0.3169992446899414, "step": 20710 }, { "epoch": 0.08895529052145316, "grad_norm": 0.0281693022698164, "learning_rate": 9.149771910005777e-05, "loss": 0.19922350645065307, "step": 20720 }, { "epoch": 0.08899822261147317, "grad_norm": 0.5898590087890625, "learning_rate": 9.149340737994015e-05, "loss": 0.3216936349868774, "step": 20730 }, { "epoch": 0.08904115470149318, "grad_norm": 0.8585788607597351, "learning_rate": 9.148909565982253e-05, "loss": 0.15437256097793578, "step": 20740 }, { "epoch": 0.08908408679151318, "grad_norm": 0.8398600220680237, "learning_rate": 9.14847839397049e-05, "loss": 0.232130765914917, "step": 20750 }, { "epoch": 0.0891270188815332, "grad_norm": 0.27168065309524536, "learning_rate": 9.148047221958728e-05, "loss": 0.27273604869842527, "step": 20760 }, { "epoch": 0.0891699509715532, "grad_norm": 0.03227852284908295, "learning_rate": 9.147616049946966e-05, "loss": 0.08514662384986878, "step": 20770 }, { "epoch": 0.0892128830615732, "grad_norm": 1.47744882106781, "learning_rate": 9.147184877935204e-05, "loss": 0.4660014629364014, "step": 20780 }, { "epoch": 0.08925581515159321, "grad_norm": 0.07243969291448593, "learning_rate": 9.146753705923442e-05, "loss": 0.2737978458404541, "step": 20790 }, { "epoch": 0.08929874724161321, "grad_norm": 2.0715065002441406, "learning_rate": 9.14632253391168e-05, "loss": 0.11428431272506714, "step": 20800 }, { "epoch": 0.08934167933163323, "grad_norm": 0.15402251482009888, "learning_rate": 9.145891361899917e-05, "loss": 0.16179636716842652, "step": 20810 }, { "epoch": 0.08938461142165323, "grad_norm": 0.38303110003471375, "learning_rate": 9.145460189888155e-05, "loss": 0.2799507141113281, "step": 20820 }, { "epoch": 0.08942754351167323, "grad_norm": 4.249147891998291, "learning_rate": 9.145029017876393e-05, "loss": 0.4097945213317871, "step": 20830 }, { "epoch": 0.08947047560169324, "grad_norm": 0.028834078460931778, "learning_rate": 9.14459784586463e-05, "loss": 0.2732088565826416, "step": 20840 }, { "epoch": 0.08951340769171325, "grad_norm": 0.13410897552967072, "learning_rate": 9.144166673852868e-05, "loss": 0.09479534029960632, "step": 20850 }, { "epoch": 0.08955633978173326, "grad_norm": 0.16793487966060638, "learning_rate": 9.143735501841106e-05, "loss": 0.26109633445739744, "step": 20860 }, { "epoch": 0.08959927187175326, "grad_norm": 3.912290573120117, "learning_rate": 9.143304329829343e-05, "loss": 0.31730501651763915, "step": 20870 }, { "epoch": 0.08964220396177326, "grad_norm": 2.9360220432281494, "learning_rate": 9.14287315781758e-05, "loss": 0.3117853879928589, "step": 20880 }, { "epoch": 0.08968513605179328, "grad_norm": 0.0618007592856884, "learning_rate": 9.142441985805818e-05, "loss": 0.09674944281578064, "step": 20890 }, { "epoch": 0.08972806814181328, "grad_norm": 0.8049418330192566, "learning_rate": 9.142010813794055e-05, "loss": 0.3263385772705078, "step": 20900 }, { "epoch": 0.08977100023183329, "grad_norm": 0.254452109336853, "learning_rate": 9.141579641782293e-05, "loss": 0.28532981872558594, "step": 20910 }, { "epoch": 0.0898139323218533, "grad_norm": 0.3107840120792389, "learning_rate": 9.141148469770531e-05, "loss": 0.2058807373046875, "step": 20920 }, { "epoch": 0.0898568644118733, "grad_norm": 0.18128502368927002, "learning_rate": 9.140717297758769e-05, "loss": 0.22402665615081788, "step": 20930 }, { "epoch": 0.08989979650189331, "grad_norm": 0.6478937268257141, "learning_rate": 9.140286125747006e-05, "loss": 0.17081331014633178, "step": 20940 }, { "epoch": 0.08994272859191331, "grad_norm": 3.6722099781036377, "learning_rate": 9.139854953735244e-05, "loss": 0.14721962213516235, "step": 20950 }, { "epoch": 0.08998566068193332, "grad_norm": 0.052497293800115585, "learning_rate": 9.13942378172348e-05, "loss": 0.24483840465545653, "step": 20960 }, { "epoch": 0.09002859277195333, "grad_norm": 0.8103175759315491, "learning_rate": 9.138992609711718e-05, "loss": 0.32439870834350587, "step": 20970 }, { "epoch": 0.09007152486197333, "grad_norm": 2.8278698921203613, "learning_rate": 9.138561437699956e-05, "loss": 0.40981359481811525, "step": 20980 }, { "epoch": 0.09011445695199334, "grad_norm": 0.04258492588996887, "learning_rate": 9.138130265688194e-05, "loss": 0.3865856409072876, "step": 20990 }, { "epoch": 0.09015738904201334, "grad_norm": 0.7423449158668518, "learning_rate": 9.137699093676431e-05, "loss": 0.3491669178009033, "step": 21000 }, { "epoch": 0.09015738904201334, "eval_loss": 0.4936206042766571, "eval_runtime": 27.4382, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 21000 }, { "epoch": 0.09020032113203336, "grad_norm": 0.06699751317501068, "learning_rate": 9.137267921664669e-05, "loss": 0.2962368011474609, "step": 21010 }, { "epoch": 0.09024325322205336, "grad_norm": 0.44763314723968506, "learning_rate": 9.136836749652907e-05, "loss": 0.32719204425811765, "step": 21020 }, { "epoch": 0.09028618531207336, "grad_norm": 1.079733967781067, "learning_rate": 9.136405577641145e-05, "loss": 0.44279727935791013, "step": 21030 }, { "epoch": 0.09032911740209337, "grad_norm": 0.30846479535102844, "learning_rate": 9.135974405629382e-05, "loss": 0.06859133243560792, "step": 21040 }, { "epoch": 0.09037204949211337, "grad_norm": 0.005983108654618263, "learning_rate": 9.13554323361762e-05, "loss": 0.29343218803405763, "step": 21050 }, { "epoch": 0.09041498158213337, "grad_norm": 6.895624160766602, "learning_rate": 9.135112061605858e-05, "loss": 0.2820367097854614, "step": 21060 }, { "epoch": 0.09045791367215339, "grad_norm": 1.3584187030792236, "learning_rate": 9.134680889594095e-05, "loss": 0.3605871915817261, "step": 21070 }, { "epoch": 0.09050084576217339, "grad_norm": 0.22483867406845093, "learning_rate": 9.134249717582333e-05, "loss": 0.2500627040863037, "step": 21080 }, { "epoch": 0.0905437778521934, "grad_norm": 1.597132682800293, "learning_rate": 9.133818545570571e-05, "loss": 0.3377506256103516, "step": 21090 }, { "epoch": 0.0905867099422134, "grad_norm": 1.06842041015625, "learning_rate": 9.133387373558809e-05, "loss": 0.22344427108764647, "step": 21100 }, { "epoch": 0.0906296420322334, "grad_norm": 2.9514353275299072, "learning_rate": 9.132956201547046e-05, "loss": 0.30355119705200195, "step": 21110 }, { "epoch": 0.09067257412225342, "grad_norm": 0.08379106968641281, "learning_rate": 9.132525029535284e-05, "loss": 0.1024692177772522, "step": 21120 }, { "epoch": 0.09071550621227342, "grad_norm": 0.6457318663597107, "learning_rate": 9.13209385752352e-05, "loss": 0.17067673206329345, "step": 21130 }, { "epoch": 0.09075843830229344, "grad_norm": 1.7996233701705933, "learning_rate": 9.131662685511758e-05, "loss": 0.2232583522796631, "step": 21140 }, { "epoch": 0.09080137039231344, "grad_norm": 1.7304378747940063, "learning_rate": 9.131231513499996e-05, "loss": 0.5026605129241943, "step": 21150 }, { "epoch": 0.09084430248233344, "grad_norm": 0.26706168055534363, "learning_rate": 9.130800341488234e-05, "loss": 0.1385503053665161, "step": 21160 }, { "epoch": 0.09088723457235345, "grad_norm": 0.011747085489332676, "learning_rate": 9.130369169476471e-05, "loss": 0.18480902910232544, "step": 21170 }, { "epoch": 0.09093016666237345, "grad_norm": 0.5335558652877808, "learning_rate": 9.129937997464709e-05, "loss": 0.24206953048706054, "step": 21180 }, { "epoch": 0.09097309875239347, "grad_norm": 5.258423328399658, "learning_rate": 9.129506825452947e-05, "loss": 0.5212468147277832, "step": 21190 }, { "epoch": 0.09101603084241347, "grad_norm": 0.03475071117281914, "learning_rate": 9.129075653441185e-05, "loss": 0.12896806001663208, "step": 21200 }, { "epoch": 0.09105896293243347, "grad_norm": 1.2318061590194702, "learning_rate": 9.128644481429421e-05, "loss": 0.41077375411987305, "step": 21210 }, { "epoch": 0.09110189502245349, "grad_norm": 0.9526176452636719, "learning_rate": 9.128213309417659e-05, "loss": 0.19505181312561035, "step": 21220 }, { "epoch": 0.09114482711247349, "grad_norm": 0.38301515579223633, "learning_rate": 9.127782137405896e-05, "loss": 0.11115933656692505, "step": 21230 }, { "epoch": 0.0911877592024935, "grad_norm": 0.16873271763324738, "learning_rate": 9.127350965394134e-05, "loss": 0.10547451972961426, "step": 21240 }, { "epoch": 0.0912306912925135, "grad_norm": 0.38345664739608765, "learning_rate": 9.126919793382372e-05, "loss": 0.39136838912963867, "step": 21250 }, { "epoch": 0.0912736233825335, "grad_norm": 0.0167214535176754, "learning_rate": 9.12648862137061e-05, "loss": 0.3666508197784424, "step": 21260 }, { "epoch": 0.09131655547255352, "grad_norm": 0.016246158629655838, "learning_rate": 9.126057449358847e-05, "loss": 0.22289640903472902, "step": 21270 }, { "epoch": 0.09135948756257352, "grad_norm": 3.4992456436157227, "learning_rate": 9.125626277347085e-05, "loss": 0.3967678785324097, "step": 21280 }, { "epoch": 0.09140241965259353, "grad_norm": 0.15353921055793762, "learning_rate": 9.125195105335323e-05, "loss": 0.1501341223716736, "step": 21290 }, { "epoch": 0.09144535174261353, "grad_norm": 0.00800306349992752, "learning_rate": 9.12476393332356e-05, "loss": 0.08457062244415284, "step": 21300 }, { "epoch": 0.09148828383263354, "grad_norm": 0.032460544258356094, "learning_rate": 9.124332761311798e-05, "loss": 0.3322447299957275, "step": 21310 }, { "epoch": 0.09153121592265355, "grad_norm": 0.012716036289930344, "learning_rate": 9.123901589300036e-05, "loss": 0.3365495681762695, "step": 21320 }, { "epoch": 0.09157414801267355, "grad_norm": 0.016938934102654457, "learning_rate": 9.123470417288274e-05, "loss": 0.07940050959587097, "step": 21330 }, { "epoch": 0.09161708010269357, "grad_norm": 0.008436004631221294, "learning_rate": 9.123039245276512e-05, "loss": 0.30735015869140625, "step": 21340 }, { "epoch": 0.09166001219271357, "grad_norm": 1.228560447692871, "learning_rate": 9.122608073264749e-05, "loss": 0.6153797626495361, "step": 21350 }, { "epoch": 0.09170294428273357, "grad_norm": 2.067349433898926, "learning_rate": 9.122176901252987e-05, "loss": 0.06948390603065491, "step": 21360 }, { "epoch": 0.09174587637275358, "grad_norm": 1.3888169527053833, "learning_rate": 9.121745729241223e-05, "loss": 0.472440767288208, "step": 21370 }, { "epoch": 0.09178880846277358, "grad_norm": 0.07951004058122635, "learning_rate": 9.121314557229461e-05, "loss": 0.36643662452697756, "step": 21380 }, { "epoch": 0.0918317405527936, "grad_norm": 0.20464085042476654, "learning_rate": 9.120883385217699e-05, "loss": 0.2653449535369873, "step": 21390 }, { "epoch": 0.0918746726428136, "grad_norm": 0.1735410988330841, "learning_rate": 9.120452213205937e-05, "loss": 0.312151837348938, "step": 21400 }, { "epoch": 0.0919176047328336, "grad_norm": 0.07625383883714676, "learning_rate": 9.120021041194174e-05, "loss": 0.13038358688354493, "step": 21410 }, { "epoch": 0.09196053682285361, "grad_norm": 0.16427874565124512, "learning_rate": 9.119589869182412e-05, "loss": 0.18846406936645507, "step": 21420 }, { "epoch": 0.09200346891287361, "grad_norm": 0.762389600276947, "learning_rate": 9.11915869717065e-05, "loss": 0.3079734563827515, "step": 21430 }, { "epoch": 0.09204640100289363, "grad_norm": 0.22876478731632233, "learning_rate": 9.118727525158888e-05, "loss": 0.3454793691635132, "step": 21440 }, { "epoch": 0.09208933309291363, "grad_norm": 1.3125431537628174, "learning_rate": 9.118296353147125e-05, "loss": 0.23138630390167236, "step": 21450 }, { "epoch": 0.09213226518293363, "grad_norm": 2.790705442428589, "learning_rate": 9.117865181135362e-05, "loss": 0.1917582631111145, "step": 21460 }, { "epoch": 0.09217519727295365, "grad_norm": 0.008920938707888126, "learning_rate": 9.1174340091236e-05, "loss": 0.30131969451904295, "step": 21470 }, { "epoch": 0.09221812936297365, "grad_norm": 0.010895448736846447, "learning_rate": 9.117002837111837e-05, "loss": 0.21453914642333985, "step": 21480 }, { "epoch": 0.09226106145299365, "grad_norm": 0.05802464112639427, "learning_rate": 9.116571665100075e-05, "loss": 0.18757236003875732, "step": 21490 }, { "epoch": 0.09230399354301366, "grad_norm": 0.8384543061256409, "learning_rate": 9.116140493088313e-05, "loss": 0.6185249805450439, "step": 21500 }, { "epoch": 0.09234692563303366, "grad_norm": 0.008186266757547855, "learning_rate": 9.11570932107655e-05, "loss": 0.31588795185089114, "step": 21510 }, { "epoch": 0.09238985772305368, "grad_norm": 0.6603401303291321, "learning_rate": 9.115278149064788e-05, "loss": 0.37519667148590086, "step": 21520 }, { "epoch": 0.09243278981307368, "grad_norm": 0.041270140558481216, "learning_rate": 9.114846977053027e-05, "loss": 0.014683444797992707, "step": 21530 }, { "epoch": 0.09247572190309368, "grad_norm": 4.466777801513672, "learning_rate": 9.114415805041264e-05, "loss": 0.11065888404846191, "step": 21540 }, { "epoch": 0.0925186539931137, "grad_norm": 2.9261770248413086, "learning_rate": 9.113984633029501e-05, "loss": 0.28797388076782227, "step": 21550 }, { "epoch": 0.0925615860831337, "grad_norm": 0.0005723032518289983, "learning_rate": 9.113553461017739e-05, "loss": 0.35480945110321044, "step": 21560 }, { "epoch": 0.09260451817315371, "grad_norm": 0.1928175538778305, "learning_rate": 9.113122289005977e-05, "loss": 0.24199821949005126, "step": 21570 }, { "epoch": 0.09264745026317371, "grad_norm": 1.50874924659729, "learning_rate": 9.112691116994214e-05, "loss": 0.16585899591445924, "step": 21580 }, { "epoch": 0.09269038235319371, "grad_norm": 0.005837564822286367, "learning_rate": 9.112259944982452e-05, "loss": 0.14847633838653565, "step": 21590 }, { "epoch": 0.09273331444321373, "grad_norm": 0.0028368725907057524, "learning_rate": 9.11182877297069e-05, "loss": 0.19323831796646118, "step": 21600 }, { "epoch": 0.09277624653323373, "grad_norm": 0.29099443554878235, "learning_rate": 9.111397600958928e-05, "loss": 0.16607074737548827, "step": 21610 }, { "epoch": 0.09281917862325374, "grad_norm": 0.8555635213851929, "learning_rate": 9.110966428947164e-05, "loss": 0.44680633544921877, "step": 21620 }, { "epoch": 0.09286211071327374, "grad_norm": 0.030142752453684807, "learning_rate": 9.110535256935402e-05, "loss": 0.3203817129135132, "step": 21630 }, { "epoch": 0.09290504280329374, "grad_norm": 3.880009174346924, "learning_rate": 9.11010408492364e-05, "loss": 0.11551387310028076, "step": 21640 }, { "epoch": 0.09294797489331376, "grad_norm": 0.01011581439524889, "learning_rate": 9.109672912911877e-05, "loss": 0.27714385986328127, "step": 21650 }, { "epoch": 0.09299090698333376, "grad_norm": 0.052719537168741226, "learning_rate": 9.109241740900115e-05, "loss": 0.21967732906341553, "step": 21660 }, { "epoch": 0.09303383907335377, "grad_norm": 0.10481604933738708, "learning_rate": 9.108810568888353e-05, "loss": 0.35539584159851073, "step": 21670 }, { "epoch": 0.09307677116337378, "grad_norm": 1.0373491048812866, "learning_rate": 9.10837939687659e-05, "loss": 0.354030704498291, "step": 21680 }, { "epoch": 0.09311970325339378, "grad_norm": 0.013149775564670563, "learning_rate": 9.107948224864828e-05, "loss": 0.2582766056060791, "step": 21690 }, { "epoch": 0.09316263534341379, "grad_norm": 3.9368278980255127, "learning_rate": 9.107517052853065e-05, "loss": 0.14385126829147338, "step": 21700 }, { "epoch": 0.09320556743343379, "grad_norm": 0.036791346967220306, "learning_rate": 9.107085880841302e-05, "loss": 0.17863940000534057, "step": 21710 }, { "epoch": 0.0932484995234538, "grad_norm": 0.7378762364387512, "learning_rate": 9.10665470882954e-05, "loss": 0.20115478038787843, "step": 21720 }, { "epoch": 0.09329143161347381, "grad_norm": 2.740835428237915, "learning_rate": 9.106223536817778e-05, "loss": 0.2319530725479126, "step": 21730 }, { "epoch": 0.09333436370349381, "grad_norm": 0.07379814237356186, "learning_rate": 9.105792364806016e-05, "loss": 0.37325448989868165, "step": 21740 }, { "epoch": 0.09337729579351382, "grad_norm": 0.0846996009349823, "learning_rate": 9.105361192794255e-05, "loss": 0.3439117431640625, "step": 21750 }, { "epoch": 0.09342022788353382, "grad_norm": 1.408515214920044, "learning_rate": 9.104930020782492e-05, "loss": 0.2008873462677002, "step": 21760 }, { "epoch": 0.09346315997355384, "grad_norm": 0.10412348061800003, "learning_rate": 9.10449884877073e-05, "loss": 0.44175071716308595, "step": 21770 }, { "epoch": 0.09350609206357384, "grad_norm": 0.14311371743679047, "learning_rate": 9.104067676758968e-05, "loss": 0.2105050802230835, "step": 21780 }, { "epoch": 0.09354902415359384, "grad_norm": 0.5638694167137146, "learning_rate": 9.103636504747204e-05, "loss": 0.1521025061607361, "step": 21790 }, { "epoch": 0.09359195624361386, "grad_norm": 2.0134546756744385, "learning_rate": 9.103205332735442e-05, "loss": 0.2705402374267578, "step": 21800 }, { "epoch": 0.09363488833363386, "grad_norm": 11.786382675170898, "learning_rate": 9.10277416072368e-05, "loss": 0.19081075191497804, "step": 21810 }, { "epoch": 0.09367782042365387, "grad_norm": 0.10052203387022018, "learning_rate": 9.102342988711917e-05, "loss": 0.014866837859153747, "step": 21820 }, { "epoch": 0.09372075251367387, "grad_norm": 1.2548933029174805, "learning_rate": 9.101911816700155e-05, "loss": 0.2774821281433105, "step": 21830 }, { "epoch": 0.09376368460369387, "grad_norm": 0.7686817646026611, "learning_rate": 9.101480644688393e-05, "loss": 0.30581719875335694, "step": 21840 }, { "epoch": 0.09380661669371389, "grad_norm": 3.6177711486816406, "learning_rate": 9.10104947267663e-05, "loss": 0.27634706497192385, "step": 21850 }, { "epoch": 0.09384954878373389, "grad_norm": 0.13538040220737457, "learning_rate": 9.100618300664868e-05, "loss": 0.11370041370391845, "step": 21860 }, { "epoch": 0.0938924808737539, "grad_norm": 0.8200361132621765, "learning_rate": 9.100187128653105e-05, "loss": 0.24458799362182618, "step": 21870 }, { "epoch": 0.0939354129637739, "grad_norm": 0.12220072746276855, "learning_rate": 9.099755956641342e-05, "loss": 0.19076045751571655, "step": 21880 }, { "epoch": 0.0939783450537939, "grad_norm": 0.4327422082424164, "learning_rate": 9.09932478462958e-05, "loss": 0.2103203535079956, "step": 21890 }, { "epoch": 0.09402127714381392, "grad_norm": 0.013189369812607765, "learning_rate": 9.098893612617818e-05, "loss": 0.2407254695892334, "step": 21900 }, { "epoch": 0.09406420923383392, "grad_norm": 1.8307231664657593, "learning_rate": 9.098462440606056e-05, "loss": 0.39395933151245116, "step": 21910 }, { "epoch": 0.09410714132385392, "grad_norm": 1.2831162214279175, "learning_rate": 9.098031268594293e-05, "loss": 0.1422368049621582, "step": 21920 }, { "epoch": 0.09415007341387394, "grad_norm": 4.390259265899658, "learning_rate": 9.097600096582531e-05, "loss": 0.21208109855651855, "step": 21930 }, { "epoch": 0.09419300550389394, "grad_norm": 2.1142640113830566, "learning_rate": 9.097168924570769e-05, "loss": 0.21083295345306396, "step": 21940 }, { "epoch": 0.09423593759391395, "grad_norm": 0.3554537892341614, "learning_rate": 9.096737752559005e-05, "loss": 0.18508408069610596, "step": 21950 }, { "epoch": 0.09427886968393395, "grad_norm": 1.1310135126113892, "learning_rate": 9.096306580547243e-05, "loss": 0.4078618049621582, "step": 21960 }, { "epoch": 0.09432180177395395, "grad_norm": 0.005195611622184515, "learning_rate": 9.095875408535482e-05, "loss": 0.19787473678588868, "step": 21970 }, { "epoch": 0.09436473386397397, "grad_norm": 0.0982283353805542, "learning_rate": 9.09544423652372e-05, "loss": 0.10687346458435058, "step": 21980 }, { "epoch": 0.09440766595399397, "grad_norm": 0.14892923831939697, "learning_rate": 9.095013064511958e-05, "loss": 0.36613714694976807, "step": 21990 }, { "epoch": 0.09445059804401398, "grad_norm": 0.037686608731746674, "learning_rate": 9.094581892500195e-05, "loss": 0.2646932125091553, "step": 22000 }, { "epoch": 0.09445059804401398, "eval_loss": 0.49971094727516174, "eval_runtime": 27.5223, "eval_samples_per_second": 3.633, "eval_steps_per_second": 3.633, "step": 22000 }, { "epoch": 0.09449353013403398, "grad_norm": 0.008538651280105114, "learning_rate": 9.094150720488433e-05, "loss": 0.18507496118545533, "step": 22010 }, { "epoch": 0.09453646222405399, "grad_norm": 1.6707645654678345, "learning_rate": 9.093719548476671e-05, "loss": 0.39339451789855956, "step": 22020 }, { "epoch": 0.094579394314074, "grad_norm": 5.307521820068359, "learning_rate": 9.093288376464907e-05, "loss": 0.35367393493652344, "step": 22030 }, { "epoch": 0.094622326404094, "grad_norm": 0.9975845217704773, "learning_rate": 9.092857204453145e-05, "loss": 0.35877773761749265, "step": 22040 }, { "epoch": 0.09466525849411402, "grad_norm": 0.07139477878808975, "learning_rate": 9.092426032441383e-05, "loss": 0.2376739501953125, "step": 22050 }, { "epoch": 0.09470819058413402, "grad_norm": 0.013264146633446217, "learning_rate": 9.09199486042962e-05, "loss": 0.3740490674972534, "step": 22060 }, { "epoch": 0.09475112267415402, "grad_norm": 1.3234672546386719, "learning_rate": 9.091563688417858e-05, "loss": 0.35382215976715087, "step": 22070 }, { "epoch": 0.09479405476417403, "grad_norm": 0.0955483466386795, "learning_rate": 9.091132516406096e-05, "loss": 0.20790884494781495, "step": 22080 }, { "epoch": 0.09483698685419403, "grad_norm": 0.02523432858288288, "learning_rate": 9.090701344394333e-05, "loss": 0.19438637495040895, "step": 22090 }, { "epoch": 0.09487991894421405, "grad_norm": 0.7263586521148682, "learning_rate": 9.090270172382571e-05, "loss": 0.31020758152008054, "step": 22100 }, { "epoch": 0.09492285103423405, "grad_norm": 0.03434018790721893, "learning_rate": 9.089839000370808e-05, "loss": 0.2784116744995117, "step": 22110 }, { "epoch": 0.09496578312425405, "grad_norm": 0.05946849659085274, "learning_rate": 9.089407828359045e-05, "loss": 0.28342130184173586, "step": 22120 }, { "epoch": 0.09500871521427406, "grad_norm": 0.05106307566165924, "learning_rate": 9.088976656347283e-05, "loss": 0.17503679990768434, "step": 22130 }, { "epoch": 0.09505164730429407, "grad_norm": 5.778275489807129, "learning_rate": 9.088545484335521e-05, "loss": 0.32182085514068604, "step": 22140 }, { "epoch": 0.09509457939431408, "grad_norm": 0.1018679141998291, "learning_rate": 9.088114312323759e-05, "loss": 0.22511820793151854, "step": 22150 }, { "epoch": 0.09513751148433408, "grad_norm": 0.041737183928489685, "learning_rate": 9.087683140311996e-05, "loss": 0.5281650066375733, "step": 22160 }, { "epoch": 0.09518044357435408, "grad_norm": 0.528108537197113, "learning_rate": 9.087251968300234e-05, "loss": 0.24609570503234862, "step": 22170 }, { "epoch": 0.0952233756643741, "grad_norm": 2.166268825531006, "learning_rate": 9.086820796288472e-05, "loss": 0.1980876326560974, "step": 22180 }, { "epoch": 0.0952663077543941, "grad_norm": 0.004012465942651033, "learning_rate": 9.08638962427671e-05, "loss": 0.08182164430618286, "step": 22190 }, { "epoch": 0.09530923984441411, "grad_norm": 0.8156777620315552, "learning_rate": 9.085958452264947e-05, "loss": 0.34822816848754884, "step": 22200 }, { "epoch": 0.09535217193443411, "grad_norm": 0.11005112528800964, "learning_rate": 9.085527280253185e-05, "loss": 0.3492276191711426, "step": 22210 }, { "epoch": 0.09539510402445411, "grad_norm": 1.879366397857666, "learning_rate": 9.085096108241423e-05, "loss": 0.1570887804031372, "step": 22220 }, { "epoch": 0.09543803611447413, "grad_norm": 3.163851499557495, "learning_rate": 9.08466493622966e-05, "loss": 0.20503544807434082, "step": 22230 }, { "epoch": 0.09548096820449413, "grad_norm": 0.0023948336020112038, "learning_rate": 9.084233764217898e-05, "loss": 0.28005211353302, "step": 22240 }, { "epoch": 0.09552390029451414, "grad_norm": 0.2130800187587738, "learning_rate": 9.083802592206136e-05, "loss": 0.4176482677459717, "step": 22250 }, { "epoch": 0.09556683238453414, "grad_norm": 0.28723445534706116, "learning_rate": 9.083371420194374e-05, "loss": 0.15389590263366698, "step": 22260 }, { "epoch": 0.09560976447455415, "grad_norm": 1.2294330596923828, "learning_rate": 9.082940248182611e-05, "loss": 0.19988157749176025, "step": 22270 }, { "epoch": 0.09565269656457416, "grad_norm": 1.211296796798706, "learning_rate": 9.082509076170848e-05, "loss": 0.1836371898651123, "step": 22280 }, { "epoch": 0.09569562865459416, "grad_norm": 0.012455378659069538, "learning_rate": 9.082077904159085e-05, "loss": 0.18517324924468995, "step": 22290 }, { "epoch": 0.09573856074461418, "grad_norm": 3.2311062812805176, "learning_rate": 9.081646732147323e-05, "loss": 0.12784696817398072, "step": 22300 }, { "epoch": 0.09578149283463418, "grad_norm": 0.06623481214046478, "learning_rate": 9.081215560135561e-05, "loss": 0.3080892086029053, "step": 22310 }, { "epoch": 0.09582442492465418, "grad_norm": 1.6185115575790405, "learning_rate": 9.080784388123799e-05, "loss": 0.13115832805633545, "step": 22320 }, { "epoch": 0.09586735701467419, "grad_norm": 2.0833399295806885, "learning_rate": 9.080353216112036e-05, "loss": 0.25316872596740725, "step": 22330 }, { "epoch": 0.0959102891046942, "grad_norm": 1.0511176586151123, "learning_rate": 9.079922044100274e-05, "loss": 0.365296459197998, "step": 22340 }, { "epoch": 0.0959532211947142, "grad_norm": 0.12100150436162949, "learning_rate": 9.079490872088512e-05, "loss": 0.41388521194458006, "step": 22350 }, { "epoch": 0.09599615328473421, "grad_norm": 0.14868609607219696, "learning_rate": 9.079059700076748e-05, "loss": 0.0745149314403534, "step": 22360 }, { "epoch": 0.09603908537475421, "grad_norm": 4.128106117248535, "learning_rate": 9.078628528064986e-05, "loss": 0.17585846185684204, "step": 22370 }, { "epoch": 0.09608201746477422, "grad_norm": 0.2254243791103363, "learning_rate": 9.078197356053224e-05, "loss": 0.14409635066986085, "step": 22380 }, { "epoch": 0.09612494955479423, "grad_norm": 0.004599638283252716, "learning_rate": 9.077766184041461e-05, "loss": 0.26925704479217527, "step": 22390 }, { "epoch": 0.09616788164481423, "grad_norm": 1.0302200317382812, "learning_rate": 9.077335012029699e-05, "loss": 0.3191660165786743, "step": 22400 }, { "epoch": 0.09621081373483424, "grad_norm": 15.162273406982422, "learning_rate": 9.076903840017937e-05, "loss": 0.28926901817321776, "step": 22410 }, { "epoch": 0.09625374582485424, "grad_norm": 0.34869951009750366, "learning_rate": 9.076472668006175e-05, "loss": 0.21189517974853517, "step": 22420 }, { "epoch": 0.09629667791487426, "grad_norm": 1.3535178899765015, "learning_rate": 9.076041495994412e-05, "loss": 0.22193832397460939, "step": 22430 }, { "epoch": 0.09633961000489426, "grad_norm": 0.17086510360240936, "learning_rate": 9.07561032398265e-05, "loss": 0.40629210472106936, "step": 22440 }, { "epoch": 0.09638254209491426, "grad_norm": 0.9439690113067627, "learning_rate": 9.075179151970888e-05, "loss": 0.24682340621948243, "step": 22450 }, { "epoch": 0.09642547418493427, "grad_norm": 0.005355150904506445, "learning_rate": 9.074747979959126e-05, "loss": 0.21591801643371583, "step": 22460 }, { "epoch": 0.09646840627495427, "grad_norm": 12.08975601196289, "learning_rate": 9.074316807947363e-05, "loss": 0.29039928913116453, "step": 22470 }, { "epoch": 0.09651133836497429, "grad_norm": 0.013028501532971859, "learning_rate": 9.073885635935601e-05, "loss": 0.29697093963623045, "step": 22480 }, { "epoch": 0.09655427045499429, "grad_norm": 2.355160713195801, "learning_rate": 9.073454463923839e-05, "loss": 0.3402007818222046, "step": 22490 }, { "epoch": 0.09659720254501429, "grad_norm": 0.0010523615637794137, "learning_rate": 9.073023291912077e-05, "loss": 0.03701513111591339, "step": 22500 }, { "epoch": 0.0966401346350343, "grad_norm": 0.015033922158181667, "learning_rate": 9.072592119900314e-05, "loss": 0.23244831562042237, "step": 22510 }, { "epoch": 0.0966830667250543, "grad_norm": 0.0025452927220612764, "learning_rate": 9.072160947888552e-05, "loss": 0.24909675121307373, "step": 22520 }, { "epoch": 0.09672599881507432, "grad_norm": 2.8800175189971924, "learning_rate": 9.071729775876788e-05, "loss": 0.46021552085876466, "step": 22530 }, { "epoch": 0.09676893090509432, "grad_norm": 0.0061400760896503925, "learning_rate": 9.071298603865026e-05, "loss": 0.20453925132751466, "step": 22540 }, { "epoch": 0.09681186299511432, "grad_norm": 0.036182962357997894, "learning_rate": 9.070867431853264e-05, "loss": 0.1192806601524353, "step": 22550 }, { "epoch": 0.09685479508513434, "grad_norm": 0.09762956947088242, "learning_rate": 9.070436259841502e-05, "loss": 0.16233222484588622, "step": 22560 }, { "epoch": 0.09689772717515434, "grad_norm": 2.074234962463379, "learning_rate": 9.070005087829739e-05, "loss": 0.3488273620605469, "step": 22570 }, { "epoch": 0.09694065926517435, "grad_norm": 0.06691594421863556, "learning_rate": 9.069573915817977e-05, "loss": 0.09470370411872864, "step": 22580 }, { "epoch": 0.09698359135519435, "grad_norm": 0.08174016326665878, "learning_rate": 9.069142743806215e-05, "loss": 0.0023634165525436403, "step": 22590 }, { "epoch": 0.09702652344521435, "grad_norm": 0.0033455390948802233, "learning_rate": 9.068711571794453e-05, "loss": 0.17757070064544678, "step": 22600 }, { "epoch": 0.09706945553523437, "grad_norm": 0.14201530814170837, "learning_rate": 9.068280399782689e-05, "loss": 0.13758721351623535, "step": 22610 }, { "epoch": 0.09711238762525437, "grad_norm": 5.6771674156188965, "learning_rate": 9.067849227770927e-05, "loss": 0.07379586100578309, "step": 22620 }, { "epoch": 0.09715531971527439, "grad_norm": 1.037272572517395, "learning_rate": 9.067418055759164e-05, "loss": 0.21951465606689452, "step": 22630 }, { "epoch": 0.09719825180529439, "grad_norm": 0.0346045047044754, "learning_rate": 9.066986883747402e-05, "loss": 0.3791325330734253, "step": 22640 }, { "epoch": 0.09724118389531439, "grad_norm": 1.0330848693847656, "learning_rate": 9.06655571173564e-05, "loss": 0.40000143051147463, "step": 22650 }, { "epoch": 0.0972841159853344, "grad_norm": 0.19888627529144287, "learning_rate": 9.066124539723878e-05, "loss": 0.2448514938354492, "step": 22660 }, { "epoch": 0.0973270480753544, "grad_norm": 0.08020024746656418, "learning_rate": 9.065693367712115e-05, "loss": 0.09969213604927063, "step": 22670 }, { "epoch": 0.09736998016537442, "grad_norm": 2.1089484691619873, "learning_rate": 9.065262195700353e-05, "loss": 0.5607002735137939, "step": 22680 }, { "epoch": 0.09741291225539442, "grad_norm": 0.018384765833616257, "learning_rate": 9.064831023688591e-05, "loss": 0.317985463142395, "step": 22690 }, { "epoch": 0.09745584434541442, "grad_norm": 0.06176162511110306, "learning_rate": 9.064399851676829e-05, "loss": 0.19674248695373536, "step": 22700 }, { "epoch": 0.09749877643543443, "grad_norm": 0.00424107676371932, "learning_rate": 9.063968679665066e-05, "loss": 0.24540879726409912, "step": 22710 }, { "epoch": 0.09754170852545443, "grad_norm": 0.09543658047914505, "learning_rate": 9.063537507653304e-05, "loss": 0.35407238006591796, "step": 22720 }, { "epoch": 0.09758464061547445, "grad_norm": 0.019383370876312256, "learning_rate": 9.063106335641542e-05, "loss": 0.1949351906776428, "step": 22730 }, { "epoch": 0.09762757270549445, "grad_norm": 0.008234373293817043, "learning_rate": 9.06267516362978e-05, "loss": 0.4226356029510498, "step": 22740 }, { "epoch": 0.09767050479551445, "grad_norm": 1.8291183710098267, "learning_rate": 9.062243991618017e-05, "loss": 0.25851223468780515, "step": 22750 }, { "epoch": 0.09771343688553447, "grad_norm": 4.839968681335449, "learning_rate": 9.061812819606255e-05, "loss": 0.21467921733856202, "step": 22760 }, { "epoch": 0.09775636897555447, "grad_norm": 0.09866461157798767, "learning_rate": 9.061381647594491e-05, "loss": 0.27601807117462157, "step": 22770 }, { "epoch": 0.09779930106557447, "grad_norm": 7.941098690032959, "learning_rate": 9.060950475582729e-05, "loss": 0.3188823699951172, "step": 22780 }, { "epoch": 0.09784223315559448, "grad_norm": 1.3161903619766235, "learning_rate": 9.060519303570967e-05, "loss": 0.261748743057251, "step": 22790 }, { "epoch": 0.09788516524561448, "grad_norm": 1.3355660438537598, "learning_rate": 9.060088131559204e-05, "loss": 0.3479597568511963, "step": 22800 }, { "epoch": 0.0979280973356345, "grad_norm": 2.689037322998047, "learning_rate": 9.059656959547442e-05, "loss": 0.3787343502044678, "step": 22810 }, { "epoch": 0.0979710294256545, "grad_norm": 1.72267484664917, "learning_rate": 9.05922578753568e-05, "loss": 0.21570446491241455, "step": 22820 }, { "epoch": 0.0980139615156745, "grad_norm": 9.35726547241211, "learning_rate": 9.058794615523918e-05, "loss": 0.21867167949676514, "step": 22830 }, { "epoch": 0.09805689360569451, "grad_norm": 3.232853412628174, "learning_rate": 9.058363443512155e-05, "loss": 0.12764328718185425, "step": 22840 }, { "epoch": 0.09809982569571452, "grad_norm": 0.0028268240857869387, "learning_rate": 9.057932271500392e-05, "loss": 0.273606538772583, "step": 22850 }, { "epoch": 0.09814275778573453, "grad_norm": 0.5840247273445129, "learning_rate": 9.05750109948863e-05, "loss": 0.2192753553390503, "step": 22860 }, { "epoch": 0.09818568987575453, "grad_norm": 0.022768119350075722, "learning_rate": 9.057069927476867e-05, "loss": 0.32812683582305907, "step": 22870 }, { "epoch": 0.09822862196577453, "grad_norm": 0.01956142857670784, "learning_rate": 9.056638755465105e-05, "loss": 0.10067062377929688, "step": 22880 }, { "epoch": 0.09827155405579455, "grad_norm": 0.9932413697242737, "learning_rate": 9.056207583453343e-05, "loss": 0.26803138256073, "step": 22890 }, { "epoch": 0.09831448614581455, "grad_norm": 0.022333988919854164, "learning_rate": 9.05577641144158e-05, "loss": 0.20267021656036377, "step": 22900 }, { "epoch": 0.09835741823583456, "grad_norm": 0.13647231459617615, "learning_rate": 9.055345239429818e-05, "loss": 0.33862149715423584, "step": 22910 }, { "epoch": 0.09840035032585456, "grad_norm": 0.15876804292201996, "learning_rate": 9.054914067418056e-05, "loss": 0.21299707889556885, "step": 22920 }, { "epoch": 0.09844328241587456, "grad_norm": 0.18488116562366486, "learning_rate": 9.054482895406294e-05, "loss": 0.20834004878997803, "step": 22930 }, { "epoch": 0.09848621450589458, "grad_norm": 0.08750098943710327, "learning_rate": 9.054051723394531e-05, "loss": 0.20142159461975098, "step": 22940 }, { "epoch": 0.09852914659591458, "grad_norm": 0.7470207810401917, "learning_rate": 9.053620551382769e-05, "loss": 0.2614895343780518, "step": 22950 }, { "epoch": 0.0985720786859346, "grad_norm": 8.173144340515137, "learning_rate": 9.053189379371007e-05, "loss": 0.11097879409790039, "step": 22960 }, { "epoch": 0.0986150107759546, "grad_norm": 0.004678850993514061, "learning_rate": 9.052758207359245e-05, "loss": 0.24954421520233155, "step": 22970 }, { "epoch": 0.0986579428659746, "grad_norm": 0.14163123071193695, "learning_rate": 9.052327035347482e-05, "loss": 0.1883327603340149, "step": 22980 }, { "epoch": 0.09870087495599461, "grad_norm": 1.834106206893921, "learning_rate": 9.05189586333572e-05, "loss": 0.19506003856658935, "step": 22990 }, { "epoch": 0.09874380704601461, "grad_norm": 0.9665418863296509, "learning_rate": 9.051464691323958e-05, "loss": 0.38878982067108153, "step": 23000 }, { "epoch": 0.09874380704601461, "eval_loss": 0.48440316319465637, "eval_runtime": 27.4341, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 23000 }, { "epoch": 0.09878673913603463, "grad_norm": 13.875965118408203, "learning_rate": 9.051033519312196e-05, "loss": 0.2893874168395996, "step": 23010 }, { "epoch": 0.09882967122605463, "grad_norm": 0.04340042173862457, "learning_rate": 9.050602347300432e-05, "loss": 0.3228999137878418, "step": 23020 }, { "epoch": 0.09887260331607463, "grad_norm": 0.05363241955637932, "learning_rate": 9.05017117528867e-05, "loss": 0.36945428848266604, "step": 23030 }, { "epoch": 0.09891553540609464, "grad_norm": 0.15401561558246613, "learning_rate": 9.049740003276907e-05, "loss": 0.19055159091949464, "step": 23040 }, { "epoch": 0.09895846749611464, "grad_norm": 0.008086834102869034, "learning_rate": 9.049308831265145e-05, "loss": 0.010691716521978378, "step": 23050 }, { "epoch": 0.09900139958613466, "grad_norm": 0.08624149858951569, "learning_rate": 9.048877659253383e-05, "loss": 0.15782138109207153, "step": 23060 }, { "epoch": 0.09904433167615466, "grad_norm": 0.022653276100754738, "learning_rate": 9.04844648724162e-05, "loss": 0.09881403446197509, "step": 23070 }, { "epoch": 0.09908726376617466, "grad_norm": 0.002614100929349661, "learning_rate": 9.048015315229858e-05, "loss": 0.33692307472229005, "step": 23080 }, { "epoch": 0.09913019585619467, "grad_norm": 1.3454197645187378, "learning_rate": 9.047584143218096e-05, "loss": 0.37106547355651853, "step": 23090 }, { "epoch": 0.09917312794621468, "grad_norm": 0.00979001447558403, "learning_rate": 9.047152971206332e-05, "loss": 0.2598975896835327, "step": 23100 }, { "epoch": 0.09921606003623469, "grad_norm": 23.73790740966797, "learning_rate": 9.04672179919457e-05, "loss": 0.33892526626586916, "step": 23110 }, { "epoch": 0.09925899212625469, "grad_norm": 6.083474636077881, "learning_rate": 9.046290627182808e-05, "loss": 0.25849740505218505, "step": 23120 }, { "epoch": 0.09930192421627469, "grad_norm": 0.8224849104881287, "learning_rate": 9.045859455171046e-05, "loss": 0.4263033390045166, "step": 23130 }, { "epoch": 0.09934485630629471, "grad_norm": 0.017254041507840157, "learning_rate": 9.045428283159283e-05, "loss": 0.17393676042556763, "step": 23140 }, { "epoch": 0.09938778839631471, "grad_norm": 0.10545994341373444, "learning_rate": 9.044997111147521e-05, "loss": 0.29897661209106446, "step": 23150 }, { "epoch": 0.09943072048633471, "grad_norm": 0.36653459072113037, "learning_rate": 9.04456593913576e-05, "loss": 0.2769860506057739, "step": 23160 }, { "epoch": 0.09947365257635472, "grad_norm": 1.0582211017608643, "learning_rate": 9.044134767123998e-05, "loss": 0.32262184619903567, "step": 23170 }, { "epoch": 0.09951658466637472, "grad_norm": 10.352930068969727, "learning_rate": 9.043703595112234e-05, "loss": 0.2117457628250122, "step": 23180 }, { "epoch": 0.09955951675639474, "grad_norm": 0.03504159301519394, "learning_rate": 9.043272423100472e-05, "loss": 0.14238468408584595, "step": 23190 }, { "epoch": 0.09960244884641474, "grad_norm": 0.02711699716746807, "learning_rate": 9.04284125108871e-05, "loss": 0.19559116363525392, "step": 23200 }, { "epoch": 0.09964538093643474, "grad_norm": 0.0065023526549339294, "learning_rate": 9.042410079076948e-05, "loss": 0.08624934554100036, "step": 23210 }, { "epoch": 0.09968831302645476, "grad_norm": 0.043089572340250015, "learning_rate": 9.041978907065185e-05, "loss": 0.07906042337417603, "step": 23220 }, { "epoch": 0.09973124511647476, "grad_norm": 0.021948212757706642, "learning_rate": 9.041547735053423e-05, "loss": 0.2752461671829224, "step": 23230 }, { "epoch": 0.09977417720649477, "grad_norm": 0.012341726571321487, "learning_rate": 9.041116563041661e-05, "loss": 0.18004517555236815, "step": 23240 }, { "epoch": 0.09981710929651477, "grad_norm": 1.122619867324829, "learning_rate": 9.040685391029898e-05, "loss": 0.15641025304794312, "step": 23250 }, { "epoch": 0.09986004138653477, "grad_norm": 0.021335327997803688, "learning_rate": 9.040254219018136e-05, "loss": 0.22120120525360107, "step": 23260 }, { "epoch": 0.09990297347655479, "grad_norm": 0.8859668970108032, "learning_rate": 9.039823047006373e-05, "loss": 0.3420703649520874, "step": 23270 }, { "epoch": 0.09994590556657479, "grad_norm": 0.0022604737896472216, "learning_rate": 9.03939187499461e-05, "loss": 0.20502817630767822, "step": 23280 }, { "epoch": 0.0999888376565948, "grad_norm": 0.15698321163654327, "learning_rate": 9.038960702982848e-05, "loss": 0.32589011192321776, "step": 23290 }, { "epoch": 0.1000317697466148, "grad_norm": 1.4402498006820679, "learning_rate": 9.038529530971086e-05, "loss": 0.28263533115386963, "step": 23300 }, { "epoch": 0.1000747018366348, "grad_norm": 1.280652642250061, "learning_rate": 9.038098358959324e-05, "loss": 0.1799672245979309, "step": 23310 }, { "epoch": 0.10011763392665482, "grad_norm": 0.2672135829925537, "learning_rate": 9.037667186947561e-05, "loss": 0.18167222738265992, "step": 23320 }, { "epoch": 0.10016056601667482, "grad_norm": 0.05923588201403618, "learning_rate": 9.037236014935799e-05, "loss": 0.17600538730621337, "step": 23330 }, { "epoch": 0.10020349810669484, "grad_norm": 0.689048707485199, "learning_rate": 9.036804842924037e-05, "loss": 0.3387850522994995, "step": 23340 }, { "epoch": 0.10024643019671484, "grad_norm": 4.445986747741699, "learning_rate": 9.036373670912273e-05, "loss": 0.2602772951126099, "step": 23350 }, { "epoch": 0.10028936228673484, "grad_norm": 4.003875255584717, "learning_rate": 9.035942498900511e-05, "loss": 0.48433527946472166, "step": 23360 }, { "epoch": 0.10033229437675485, "grad_norm": 2.171830177307129, "learning_rate": 9.035511326888749e-05, "loss": 0.30810022354125977, "step": 23370 }, { "epoch": 0.10037522646677485, "grad_norm": 5.789403915405273, "learning_rate": 9.035080154876988e-05, "loss": 0.2114821195602417, "step": 23380 }, { "epoch": 0.10041815855679487, "grad_norm": 0.8029804825782776, "learning_rate": 9.034648982865225e-05, "loss": 0.21316425800323485, "step": 23390 }, { "epoch": 0.10046109064681487, "grad_norm": 3.6660075187683105, "learning_rate": 9.034217810853463e-05, "loss": 0.3459254026412964, "step": 23400 }, { "epoch": 0.10050402273683487, "grad_norm": 0.025712208822369576, "learning_rate": 9.033786638841701e-05, "loss": 0.18357253074645996, "step": 23410 }, { "epoch": 0.10054695482685488, "grad_norm": 1.9867717027664185, "learning_rate": 9.033355466829939e-05, "loss": 0.19808287620544435, "step": 23420 }, { "epoch": 0.10058988691687488, "grad_norm": 0.25725287199020386, "learning_rate": 9.032924294818175e-05, "loss": 0.35228469371795657, "step": 23430 }, { "epoch": 0.1006328190068949, "grad_norm": 2.235778570175171, "learning_rate": 9.032493122806413e-05, "loss": 0.3108292818069458, "step": 23440 }, { "epoch": 0.1006757510969149, "grad_norm": 0.0473552830517292, "learning_rate": 9.03206195079465e-05, "loss": 0.14987121820449828, "step": 23450 }, { "epoch": 0.1007186831869349, "grad_norm": 0.05179880931973457, "learning_rate": 9.031630778782888e-05, "loss": 0.2000588893890381, "step": 23460 }, { "epoch": 0.10076161527695492, "grad_norm": 1.4705721139907837, "learning_rate": 9.031199606771126e-05, "loss": 0.33507752418518066, "step": 23470 }, { "epoch": 0.10080454736697492, "grad_norm": 14.33340072631836, "learning_rate": 9.030768434759364e-05, "loss": 0.39852664470672605, "step": 23480 }, { "epoch": 0.10084747945699493, "grad_norm": 0.0033453968353569508, "learning_rate": 9.030337262747601e-05, "loss": 0.25700409412384034, "step": 23490 }, { "epoch": 0.10089041154701493, "grad_norm": 0.026818279176950455, "learning_rate": 9.029906090735839e-05, "loss": 0.0700181007385254, "step": 23500 }, { "epoch": 0.10093334363703493, "grad_norm": 0.011163324117660522, "learning_rate": 9.029474918724076e-05, "loss": 0.26866855621337893, "step": 23510 }, { "epoch": 0.10097627572705495, "grad_norm": 2.636354684829712, "learning_rate": 9.029043746712313e-05, "loss": 0.30903441905975343, "step": 23520 }, { "epoch": 0.10101920781707495, "grad_norm": 4.494817733764648, "learning_rate": 9.028612574700551e-05, "loss": 0.32044272422790526, "step": 23530 }, { "epoch": 0.10106213990709496, "grad_norm": 1.129416584968567, "learning_rate": 9.028181402688789e-05, "loss": 0.3815204620361328, "step": 23540 }, { "epoch": 0.10110507199711496, "grad_norm": 1.175889015197754, "learning_rate": 9.027750230677026e-05, "loss": 0.15066792964935302, "step": 23550 }, { "epoch": 0.10114800408713497, "grad_norm": 0.4594559967517853, "learning_rate": 9.027319058665264e-05, "loss": 0.32463822364807127, "step": 23560 }, { "epoch": 0.10119093617715498, "grad_norm": 0.20249129831790924, "learning_rate": 9.026887886653502e-05, "loss": 0.16981053352355957, "step": 23570 }, { "epoch": 0.10123386826717498, "grad_norm": 0.18544776737689972, "learning_rate": 9.02645671464174e-05, "loss": 0.2120675802230835, "step": 23580 }, { "epoch": 0.10127680035719498, "grad_norm": 1.3284571170806885, "learning_rate": 9.026025542629976e-05, "loss": 0.29755163192749023, "step": 23590 }, { "epoch": 0.101319732447215, "grad_norm": 0.09027359634637833, "learning_rate": 9.025594370618215e-05, "loss": 0.3194664478302002, "step": 23600 }, { "epoch": 0.101362664537235, "grad_norm": 0.013314232230186462, "learning_rate": 9.025163198606453e-05, "loss": 0.4534477233886719, "step": 23610 }, { "epoch": 0.10140559662725501, "grad_norm": 3.5338640213012695, "learning_rate": 9.02473202659469e-05, "loss": 0.32490148544311526, "step": 23620 }, { "epoch": 0.10144852871727501, "grad_norm": 3.0845723152160645, "learning_rate": 9.024300854582928e-05, "loss": 0.2430340528488159, "step": 23630 }, { "epoch": 0.10149146080729501, "grad_norm": 0.8819409608840942, "learning_rate": 9.023869682571166e-05, "loss": 0.28399295806884767, "step": 23640 }, { "epoch": 0.10153439289731503, "grad_norm": 0.03459606692194939, "learning_rate": 9.023438510559404e-05, "loss": 0.23548510074615478, "step": 23650 }, { "epoch": 0.10157732498733503, "grad_norm": 12.161425590515137, "learning_rate": 9.023007338547642e-05, "loss": 0.39958481788635253, "step": 23660 }, { "epoch": 0.10162025707735504, "grad_norm": 0.08297639340162277, "learning_rate": 9.022576166535879e-05, "loss": 0.15242440700531007, "step": 23670 }, { "epoch": 0.10166318916737505, "grad_norm": 0.17127041518688202, "learning_rate": 9.022144994524116e-05, "loss": 0.22498526573181152, "step": 23680 }, { "epoch": 0.10170612125739505, "grad_norm": 1.2555885314941406, "learning_rate": 9.021713822512353e-05, "loss": 0.3380129337310791, "step": 23690 }, { "epoch": 0.10174905334741506, "grad_norm": 1.3294332027435303, "learning_rate": 9.021282650500591e-05, "loss": 0.22705109119415284, "step": 23700 }, { "epoch": 0.10179198543743506, "grad_norm": 0.050139833241701126, "learning_rate": 9.020851478488829e-05, "loss": 0.3327648401260376, "step": 23710 }, { "epoch": 0.10183491752745508, "grad_norm": 2.695390224456787, "learning_rate": 9.020420306477067e-05, "loss": 0.4076399803161621, "step": 23720 }, { "epoch": 0.10187784961747508, "grad_norm": 0.45680272579193115, "learning_rate": 9.019989134465304e-05, "loss": 0.2645672082901001, "step": 23730 }, { "epoch": 0.10192078170749508, "grad_norm": 4.387186527252197, "learning_rate": 9.019557962453542e-05, "loss": 0.18714665174484252, "step": 23740 }, { "epoch": 0.1019637137975151, "grad_norm": 0.34954363107681274, "learning_rate": 9.01912679044178e-05, "loss": 0.19371408224105835, "step": 23750 }, { "epoch": 0.1020066458875351, "grad_norm": 0.18718601763248444, "learning_rate": 9.018695618430016e-05, "loss": 0.11431068181991577, "step": 23760 }, { "epoch": 0.10204957797755511, "grad_norm": 0.027119481936097145, "learning_rate": 9.018264446418254e-05, "loss": 0.27887601852416993, "step": 23770 }, { "epoch": 0.10209251006757511, "grad_norm": 0.0788949579000473, "learning_rate": 9.017833274406492e-05, "loss": 0.17622390985488892, "step": 23780 }, { "epoch": 0.10213544215759511, "grad_norm": 0.07781904935836792, "learning_rate": 9.01740210239473e-05, "loss": 0.16738327741622924, "step": 23790 }, { "epoch": 0.10217837424761513, "grad_norm": 0.7536662817001343, "learning_rate": 9.016970930382967e-05, "loss": 0.22194280624389648, "step": 23800 }, { "epoch": 0.10222130633763513, "grad_norm": 0.008254468441009521, "learning_rate": 9.016539758371205e-05, "loss": 0.35129923820495607, "step": 23810 }, { "epoch": 0.10226423842765514, "grad_norm": 0.7630317807197571, "learning_rate": 9.016108586359443e-05, "loss": 0.2706347942352295, "step": 23820 }, { "epoch": 0.10230717051767514, "grad_norm": 1.1152968406677246, "learning_rate": 9.01567741434768e-05, "loss": 0.424467134475708, "step": 23830 }, { "epoch": 0.10235010260769514, "grad_norm": 9.643174171447754, "learning_rate": 9.015246242335918e-05, "loss": 0.20278689861297608, "step": 23840 }, { "epoch": 0.10239303469771516, "grad_norm": 0.22802026569843292, "learning_rate": 9.014815070324156e-05, "loss": 0.3205211877822876, "step": 23850 }, { "epoch": 0.10243596678773516, "grad_norm": 0.010943016968667507, "learning_rate": 9.014383898312393e-05, "loss": 0.08449001908302307, "step": 23860 }, { "epoch": 0.10247889887775517, "grad_norm": 2.0597031116485596, "learning_rate": 9.013952726300631e-05, "loss": 0.46553263664245603, "step": 23870 }, { "epoch": 0.10252183096777517, "grad_norm": 0.05474220588803291, "learning_rate": 9.013521554288869e-05, "loss": 0.36751723289489746, "step": 23880 }, { "epoch": 0.10256476305779517, "grad_norm": 0.12750476598739624, "learning_rate": 9.013090382277107e-05, "loss": 0.15174291133880616, "step": 23890 }, { "epoch": 0.10260769514781519, "grad_norm": 2.684352397918701, "learning_rate": 9.012659210265344e-05, "loss": 0.15601576566696168, "step": 23900 }, { "epoch": 0.10265062723783519, "grad_norm": 0.024600274860858917, "learning_rate": 9.012228038253582e-05, "loss": 0.28514063358306885, "step": 23910 }, { "epoch": 0.1026935593278552, "grad_norm": 0.1267337203025818, "learning_rate": 9.011796866241819e-05, "loss": 0.28657100200653074, "step": 23920 }, { "epoch": 0.1027364914178752, "grad_norm": 0.09003254026174545, "learning_rate": 9.011365694230056e-05, "loss": 0.37873115539550783, "step": 23930 }, { "epoch": 0.1027794235078952, "grad_norm": 0.08197905868291855, "learning_rate": 9.010934522218294e-05, "loss": 0.2803717374801636, "step": 23940 }, { "epoch": 0.10282235559791522, "grad_norm": 0.034923408180475235, "learning_rate": 9.010503350206532e-05, "loss": 0.24216341972351074, "step": 23950 }, { "epoch": 0.10286528768793522, "grad_norm": 0.06310239434242249, "learning_rate": 9.01007217819477e-05, "loss": 0.3100374460220337, "step": 23960 }, { "epoch": 0.10290821977795524, "grad_norm": 3.169227361679077, "learning_rate": 9.009641006183007e-05, "loss": 0.352788519859314, "step": 23970 }, { "epoch": 0.10295115186797524, "grad_norm": 0.026941534131765366, "learning_rate": 9.009209834171245e-05, "loss": 0.1345282554626465, "step": 23980 }, { "epoch": 0.10299408395799524, "grad_norm": 1.0152279138565063, "learning_rate": 9.008778662159483e-05, "loss": 0.15926393270492553, "step": 23990 }, { "epoch": 0.10303701604801525, "grad_norm": 0.04590229690074921, "learning_rate": 9.00834749014772e-05, "loss": 0.25821306705474856, "step": 24000 }, { "epoch": 0.10303701604801525, "eval_loss": 0.49062225222587585, "eval_runtime": 27.4363, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 24000 }, { "epoch": 0.10307994813803525, "grad_norm": 3.166994094848633, "learning_rate": 9.007916318135957e-05, "loss": 0.5217941761016845, "step": 24010 }, { "epoch": 0.10312288022805526, "grad_norm": 2.5861167907714844, "learning_rate": 9.007485146124195e-05, "loss": 0.474554443359375, "step": 24020 }, { "epoch": 0.10316581231807527, "grad_norm": 0.3263162672519684, "learning_rate": 9.007053974112432e-05, "loss": 0.09132232069969178, "step": 24030 }, { "epoch": 0.10320874440809527, "grad_norm": 0.027722327038645744, "learning_rate": 9.00662280210067e-05, "loss": 0.22393157482147216, "step": 24040 }, { "epoch": 0.10325167649811529, "grad_norm": 0.041487690061330795, "learning_rate": 9.006191630088908e-05, "loss": 0.2686819553375244, "step": 24050 }, { "epoch": 0.10329460858813529, "grad_norm": 1.3169100284576416, "learning_rate": 9.005760458077145e-05, "loss": 0.34045383930206297, "step": 24060 }, { "epoch": 0.10333754067815529, "grad_norm": 35.14598083496094, "learning_rate": 9.005329286065383e-05, "loss": 0.26218743324279786, "step": 24070 }, { "epoch": 0.1033804727681753, "grad_norm": 5.64556360244751, "learning_rate": 9.004898114053621e-05, "loss": 0.31703526973724366, "step": 24080 }, { "epoch": 0.1034234048581953, "grad_norm": 0.6731160283088684, "learning_rate": 9.004466942041859e-05, "loss": 0.22504048347473143, "step": 24090 }, { "epoch": 0.10346633694821532, "grad_norm": 1.6286805868148804, "learning_rate": 9.004035770030096e-05, "loss": 0.34985036849975587, "step": 24100 }, { "epoch": 0.10350926903823532, "grad_norm": 0.1510307937860489, "learning_rate": 9.003604598018334e-05, "loss": 0.25102245807647705, "step": 24110 }, { "epoch": 0.10355220112825532, "grad_norm": 0.772885262966156, "learning_rate": 9.003173426006572e-05, "loss": 0.35218875408172606, "step": 24120 }, { "epoch": 0.10359513321827533, "grad_norm": 0.02091173827648163, "learning_rate": 9.00274225399481e-05, "loss": 0.2027892827987671, "step": 24130 }, { "epoch": 0.10363806530829534, "grad_norm": 0.8786803483963013, "learning_rate": 9.002311081983047e-05, "loss": 0.31464924812316897, "step": 24140 }, { "epoch": 0.10368099739831535, "grad_norm": 0.006752110552042723, "learning_rate": 9.001879909971285e-05, "loss": 0.2620436906814575, "step": 24150 }, { "epoch": 0.10372392948833535, "grad_norm": 1.4851539134979248, "learning_rate": 9.001448737959523e-05, "loss": 0.30434255599975585, "step": 24160 }, { "epoch": 0.10376686157835535, "grad_norm": 0.0038928573485463858, "learning_rate": 9.001017565947759e-05, "loss": 0.21102461814880372, "step": 24170 }, { "epoch": 0.10380979366837537, "grad_norm": 0.028493205085396767, "learning_rate": 9.000586393935997e-05, "loss": 0.4070149898529053, "step": 24180 }, { "epoch": 0.10385272575839537, "grad_norm": 4.407706260681152, "learning_rate": 9.000155221924235e-05, "loss": 0.30825190544128417, "step": 24190 }, { "epoch": 0.10389565784841538, "grad_norm": 1.131311058998108, "learning_rate": 8.999724049912472e-05, "loss": 0.2937792778015137, "step": 24200 }, { "epoch": 0.10393858993843538, "grad_norm": 1.499942421913147, "learning_rate": 8.99929287790071e-05, "loss": 0.25059003829956056, "step": 24210 }, { "epoch": 0.10398152202845538, "grad_norm": 1.6240562200546265, "learning_rate": 8.998861705888948e-05, "loss": 0.2966706991195679, "step": 24220 }, { "epoch": 0.1040244541184754, "grad_norm": 0.2789813280105591, "learning_rate": 8.998430533877186e-05, "loss": 0.2914541721343994, "step": 24230 }, { "epoch": 0.1040673862084954, "grad_norm": 0.010012147016823292, "learning_rate": 8.997999361865423e-05, "loss": 0.4344785690307617, "step": 24240 }, { "epoch": 0.10411031829851541, "grad_norm": 1.3773272037506104, "learning_rate": 8.99756818985366e-05, "loss": 0.22288546562194825, "step": 24250 }, { "epoch": 0.10415325038853541, "grad_norm": 0.09848473966121674, "learning_rate": 8.997137017841897e-05, "loss": 0.21312005519866944, "step": 24260 }, { "epoch": 0.10419618247855542, "grad_norm": 0.029853790998458862, "learning_rate": 8.996705845830135e-05, "loss": 0.28399271965026857, "step": 24270 }, { "epoch": 0.10423911456857543, "grad_norm": 0.01982700265944004, "learning_rate": 8.996274673818373e-05, "loss": 0.2613893270492554, "step": 24280 }, { "epoch": 0.10428204665859543, "grad_norm": 0.0047102137468755245, "learning_rate": 8.99584350180661e-05, "loss": 0.25738341808319093, "step": 24290 }, { "epoch": 0.10432497874861545, "grad_norm": 2.0769221782684326, "learning_rate": 8.995412329794848e-05, "loss": 0.1588195562362671, "step": 24300 }, { "epoch": 0.10436791083863545, "grad_norm": 0.09412598609924316, "learning_rate": 8.994981157783086e-05, "loss": 0.22160720825195312, "step": 24310 }, { "epoch": 0.10441084292865545, "grad_norm": 0.9660851359367371, "learning_rate": 8.994549985771324e-05, "loss": 0.5141227722167969, "step": 24320 }, { "epoch": 0.10445377501867546, "grad_norm": 0.6510734558105469, "learning_rate": 8.994118813759562e-05, "loss": 0.11361143589019776, "step": 24330 }, { "epoch": 0.10449670710869546, "grad_norm": 0.20415718853473663, "learning_rate": 8.993687641747799e-05, "loss": 0.3651560306549072, "step": 24340 }, { "epoch": 0.10453963919871548, "grad_norm": 0.007316565606743097, "learning_rate": 8.993256469736037e-05, "loss": 0.26250133514404295, "step": 24350 }, { "epoch": 0.10458257128873548, "grad_norm": 1.585828185081482, "learning_rate": 8.992825297724275e-05, "loss": 0.28480618000030516, "step": 24360 }, { "epoch": 0.10462550337875548, "grad_norm": 0.01666135899722576, "learning_rate": 8.992394125712513e-05, "loss": 0.2830787181854248, "step": 24370 }, { "epoch": 0.1046684354687755, "grad_norm": 1.8920531272888184, "learning_rate": 8.99196295370075e-05, "loss": 0.29449806213378904, "step": 24380 }, { "epoch": 0.1047113675587955, "grad_norm": 1.3783341646194458, "learning_rate": 8.991531781688988e-05, "loss": 0.35810155868530275, "step": 24390 }, { "epoch": 0.10475429964881551, "grad_norm": 0.01928071118891239, "learning_rate": 8.991100609677226e-05, "loss": 0.14494633674621582, "step": 24400 }, { "epoch": 0.10479723173883551, "grad_norm": 0.025622989982366562, "learning_rate": 8.990669437665463e-05, "loss": 0.39835395812988283, "step": 24410 }, { "epoch": 0.10484016382885551, "grad_norm": 1.062815546989441, "learning_rate": 8.9902382656537e-05, "loss": 0.5315930366516113, "step": 24420 }, { "epoch": 0.10488309591887553, "grad_norm": 3.022416830062866, "learning_rate": 8.989807093641938e-05, "loss": 0.30136756896972655, "step": 24430 }, { "epoch": 0.10492602800889553, "grad_norm": 1.4548002481460571, "learning_rate": 8.989375921630175e-05, "loss": 0.3413592100143433, "step": 24440 }, { "epoch": 0.10496896009891553, "grad_norm": 0.19854702055454254, "learning_rate": 8.988944749618413e-05, "loss": 0.13472495079040528, "step": 24450 }, { "epoch": 0.10501189218893554, "grad_norm": 2.6877894401550293, "learning_rate": 8.988513577606651e-05, "loss": 0.40897254943847655, "step": 24460 }, { "epoch": 0.10505482427895554, "grad_norm": 1.532139539718628, "learning_rate": 8.988082405594889e-05, "loss": 0.1912761688232422, "step": 24470 }, { "epoch": 0.10509775636897556, "grad_norm": 0.2924424409866333, "learning_rate": 8.987651233583126e-05, "loss": 0.19859321117401124, "step": 24480 }, { "epoch": 0.10514068845899556, "grad_norm": 0.016296448186039925, "learning_rate": 8.987220061571364e-05, "loss": 0.18080949783325195, "step": 24490 }, { "epoch": 0.10518362054901556, "grad_norm": 1.8604021072387695, "learning_rate": 8.9867888895596e-05, "loss": 0.08436711430549622, "step": 24500 }, { "epoch": 0.10522655263903558, "grad_norm": 0.9807556867599487, "learning_rate": 8.986357717547838e-05, "loss": 0.13042645454406737, "step": 24510 }, { "epoch": 0.10526948472905558, "grad_norm": 0.08815609663724899, "learning_rate": 8.985926545536076e-05, "loss": 0.21929657459259033, "step": 24520 }, { "epoch": 0.10531241681907559, "grad_norm": 0.2408457249403, "learning_rate": 8.985495373524314e-05, "loss": 0.27845087051391604, "step": 24530 }, { "epoch": 0.10535534890909559, "grad_norm": 0.027684088796377182, "learning_rate": 8.985064201512551e-05, "loss": 0.40946121215820314, "step": 24540 }, { "epoch": 0.10539828099911559, "grad_norm": 0.05740318447351456, "learning_rate": 8.984633029500789e-05, "loss": 0.10234721899032592, "step": 24550 }, { "epoch": 0.10544121308913561, "grad_norm": 0.026760630309581757, "learning_rate": 8.984201857489027e-05, "loss": 0.24178724288940429, "step": 24560 }, { "epoch": 0.10548414517915561, "grad_norm": 1.6453255414962769, "learning_rate": 8.983770685477266e-05, "loss": 0.22513704299926757, "step": 24570 }, { "epoch": 0.10552707726917562, "grad_norm": 3.5417139530181885, "learning_rate": 8.983339513465502e-05, "loss": 0.44940686225891113, "step": 24580 }, { "epoch": 0.10557000935919562, "grad_norm": 0.02605029195547104, "learning_rate": 8.98290834145374e-05, "loss": 0.33814427852630613, "step": 24590 }, { "epoch": 0.10561294144921562, "grad_norm": 2.6322715282440186, "learning_rate": 8.982477169441978e-05, "loss": 0.3033830404281616, "step": 24600 }, { "epoch": 0.10565587353923564, "grad_norm": 2.977161169052124, "learning_rate": 8.982045997430215e-05, "loss": 0.20005474090576172, "step": 24610 }, { "epoch": 0.10569880562925564, "grad_norm": 0.21439577639102936, "learning_rate": 8.981614825418453e-05, "loss": 0.3983053207397461, "step": 24620 }, { "epoch": 0.10574173771927566, "grad_norm": 0.20449991524219513, "learning_rate": 8.981183653406691e-05, "loss": 0.2888350009918213, "step": 24630 }, { "epoch": 0.10578466980929566, "grad_norm": 0.22445398569107056, "learning_rate": 8.980752481394929e-05, "loss": 0.2892467737197876, "step": 24640 }, { "epoch": 0.10582760189931566, "grad_norm": 2.5356032848358154, "learning_rate": 8.980321309383166e-05, "loss": 0.1676466941833496, "step": 24650 }, { "epoch": 0.10587053398933567, "grad_norm": 1.0866541862487793, "learning_rate": 8.979890137371403e-05, "loss": 0.12875763177871705, "step": 24660 }, { "epoch": 0.10591346607935567, "grad_norm": 19.632532119750977, "learning_rate": 8.97945896535964e-05, "loss": 0.2675110578536987, "step": 24670 }, { "epoch": 0.10595639816937569, "grad_norm": 0.48357057571411133, "learning_rate": 8.979027793347878e-05, "loss": 0.25778658390045167, "step": 24680 }, { "epoch": 0.10599933025939569, "grad_norm": 0.12672166526317596, "learning_rate": 8.978596621336116e-05, "loss": 0.2856446743011475, "step": 24690 }, { "epoch": 0.10604226234941569, "grad_norm": 2.005126953125, "learning_rate": 8.978165449324354e-05, "loss": 0.3275081396102905, "step": 24700 }, { "epoch": 0.1060851944394357, "grad_norm": 0.10233116894960403, "learning_rate": 8.977734277312591e-05, "loss": 0.41061697006225584, "step": 24710 }, { "epoch": 0.1061281265294557, "grad_norm": 0.025767680257558823, "learning_rate": 8.977303105300829e-05, "loss": 0.1702197551727295, "step": 24720 }, { "epoch": 0.10617105861947572, "grad_norm": 0.45195960998535156, "learning_rate": 8.976871933289067e-05, "loss": 0.35706462860107424, "step": 24730 }, { "epoch": 0.10621399070949572, "grad_norm": 0.8962579369544983, "learning_rate": 8.976440761277305e-05, "loss": 0.35692172050476073, "step": 24740 }, { "epoch": 0.10625692279951572, "grad_norm": 3.8452181816101074, "learning_rate": 8.976009589265541e-05, "loss": 0.35666708946228026, "step": 24750 }, { "epoch": 0.10629985488953574, "grad_norm": 0.9256759285926819, "learning_rate": 8.975578417253779e-05, "loss": 0.21581745147705078, "step": 24760 }, { "epoch": 0.10634278697955574, "grad_norm": 0.012155724689364433, "learning_rate": 8.975147245242016e-05, "loss": 0.2863577604293823, "step": 24770 }, { "epoch": 0.10638571906957575, "grad_norm": 1.3886440992355347, "learning_rate": 8.974716073230254e-05, "loss": 0.43889813423156737, "step": 24780 }, { "epoch": 0.10642865115959575, "grad_norm": 0.02271793968975544, "learning_rate": 8.974284901218493e-05, "loss": 0.04355182945728302, "step": 24790 }, { "epoch": 0.10647158324961575, "grad_norm": 0.03789331763982773, "learning_rate": 8.973853729206731e-05, "loss": 0.42855305671691896, "step": 24800 }, { "epoch": 0.10651451533963577, "grad_norm": 0.04173389449715614, "learning_rate": 8.973422557194969e-05, "loss": 0.1199187159538269, "step": 24810 }, { "epoch": 0.10655744742965577, "grad_norm": 0.034947946667671204, "learning_rate": 8.972991385183206e-05, "loss": 0.5193025588989257, "step": 24820 }, { "epoch": 0.10660037951967578, "grad_norm": 0.08006519079208374, "learning_rate": 8.972560213171443e-05, "loss": 0.19815462827682495, "step": 24830 }, { "epoch": 0.10664331160969578, "grad_norm": 0.07318032532930374, "learning_rate": 8.97212904115968e-05, "loss": 0.06539644002914428, "step": 24840 }, { "epoch": 0.10668624369971579, "grad_norm": 0.4578899145126343, "learning_rate": 8.971697869147918e-05, "loss": 0.33419122695922854, "step": 24850 }, { "epoch": 0.1067291757897358, "grad_norm": 0.006664654705673456, "learning_rate": 8.971266697136156e-05, "loss": 0.5630306720733642, "step": 24860 }, { "epoch": 0.1067721078797558, "grad_norm": 0.0594543032348156, "learning_rate": 8.970835525124394e-05, "loss": 0.29572596549987795, "step": 24870 }, { "epoch": 0.1068150399697758, "grad_norm": 0.047232624143362045, "learning_rate": 8.970404353112632e-05, "loss": 0.4749437808990479, "step": 24880 }, { "epoch": 0.10685797205979582, "grad_norm": 0.08967532217502594, "learning_rate": 8.969973181100869e-05, "loss": 0.008667629212141037, "step": 24890 }, { "epoch": 0.10690090414981582, "grad_norm": 0.01756768673658371, "learning_rate": 8.969542009089107e-05, "loss": 0.3080634593963623, "step": 24900 }, { "epoch": 0.10694383623983583, "grad_norm": 1.994766354560852, "learning_rate": 8.969110837077343e-05, "loss": 0.20746369361877443, "step": 24910 }, { "epoch": 0.10698676832985583, "grad_norm": 1.1729373931884766, "learning_rate": 8.968679665065581e-05, "loss": 0.3455613136291504, "step": 24920 }, { "epoch": 0.10702970041987583, "grad_norm": 5.345548152923584, "learning_rate": 8.968248493053819e-05, "loss": 0.44353442192077636, "step": 24930 }, { "epoch": 0.10707263250989585, "grad_norm": 0.9387415647506714, "learning_rate": 8.967817321042057e-05, "loss": 0.3211450338363647, "step": 24940 }, { "epoch": 0.10711556459991585, "grad_norm": 0.5626527667045593, "learning_rate": 8.967386149030294e-05, "loss": 0.24862992763519287, "step": 24950 }, { "epoch": 0.10715849668993586, "grad_norm": 0.015321125276386738, "learning_rate": 8.966954977018532e-05, "loss": 0.24494407176971436, "step": 24960 }, { "epoch": 0.10720142877995587, "grad_norm": 0.11206818372011185, "learning_rate": 8.96652380500677e-05, "loss": 0.17858786582946778, "step": 24970 }, { "epoch": 0.10724436086997587, "grad_norm": 2.545865774154663, "learning_rate": 8.966092632995008e-05, "loss": 0.29294619560241697, "step": 24980 }, { "epoch": 0.10728729295999588, "grad_norm": 1.6691718101501465, "learning_rate": 8.965661460983244e-05, "loss": 0.3634697914123535, "step": 24990 }, { "epoch": 0.10733022505001588, "grad_norm": 0.22247134149074554, "learning_rate": 8.965230288971482e-05, "loss": 0.31081109046936034, "step": 25000 }, { "epoch": 0.10733022505001588, "eval_loss": 0.4849245548248291, "eval_runtime": 27.44, "eval_samples_per_second": 3.644, "eval_steps_per_second": 3.644, "step": 25000 }, { "epoch": 0.1073731571400359, "grad_norm": 0.30741971731185913, "learning_rate": 8.964799116959721e-05, "loss": 0.37556867599487304, "step": 25010 }, { "epoch": 0.1074160892300559, "grad_norm": 0.27221789956092834, "learning_rate": 8.964367944947958e-05, "loss": 0.14985108375549316, "step": 25020 }, { "epoch": 0.1074590213200759, "grad_norm": 1.1760873794555664, "learning_rate": 8.963936772936196e-05, "loss": 0.2914012908935547, "step": 25030 }, { "epoch": 0.10750195341009591, "grad_norm": 1.5843279361724854, "learning_rate": 8.963505600924434e-05, "loss": 0.6233903884887695, "step": 25040 }, { "epoch": 0.10754488550011591, "grad_norm": 0.06669995188713074, "learning_rate": 8.963074428912672e-05, "loss": 0.10967894792556762, "step": 25050 }, { "epoch": 0.10758781759013593, "grad_norm": 0.09085310995578766, "learning_rate": 8.96264325690091e-05, "loss": 0.15039405822753907, "step": 25060 }, { "epoch": 0.10763074968015593, "grad_norm": 0.7689294219017029, "learning_rate": 8.962212084889147e-05, "loss": 0.38093998432159426, "step": 25070 }, { "epoch": 0.10767368177017593, "grad_norm": 0.1175433024764061, "learning_rate": 8.961780912877384e-05, "loss": 0.2116264820098877, "step": 25080 }, { "epoch": 0.10771661386019594, "grad_norm": 0.04340391606092453, "learning_rate": 8.961349740865621e-05, "loss": 0.2621105194091797, "step": 25090 }, { "epoch": 0.10775954595021595, "grad_norm": 1.2593803405761719, "learning_rate": 8.960918568853859e-05, "loss": 0.26323814392089845, "step": 25100 }, { "epoch": 0.10780247804023596, "grad_norm": 1.90890634059906, "learning_rate": 8.960487396842097e-05, "loss": 0.22531487941741943, "step": 25110 }, { "epoch": 0.10784541013025596, "grad_norm": 0.18675051629543304, "learning_rate": 8.960056224830334e-05, "loss": 0.1788573145866394, "step": 25120 }, { "epoch": 0.10788834222027596, "grad_norm": 2.0584561824798584, "learning_rate": 8.959625052818572e-05, "loss": 0.2614114761352539, "step": 25130 }, { "epoch": 0.10793127431029598, "grad_norm": 0.2771095037460327, "learning_rate": 8.95919388080681e-05, "loss": 0.31394219398498535, "step": 25140 }, { "epoch": 0.10797420640031598, "grad_norm": 2.259929895401001, "learning_rate": 8.958762708795048e-05, "loss": 0.23377797603607178, "step": 25150 }, { "epoch": 0.10801713849033599, "grad_norm": 5.453269004821777, "learning_rate": 8.958331536783284e-05, "loss": 0.20484549999237062, "step": 25160 }, { "epoch": 0.108060070580356, "grad_norm": 6.210053443908691, "learning_rate": 8.957900364771522e-05, "loss": 0.46298751831054685, "step": 25170 }, { "epoch": 0.108103002670376, "grad_norm": 0.21815641224384308, "learning_rate": 8.95746919275976e-05, "loss": 0.15839173793792724, "step": 25180 }, { "epoch": 0.10814593476039601, "grad_norm": 0.016512513160705566, "learning_rate": 8.957038020747997e-05, "loss": 0.2333528757095337, "step": 25190 }, { "epoch": 0.10818886685041601, "grad_norm": 1.9446250200271606, "learning_rate": 8.956606848736235e-05, "loss": 0.36624941825866697, "step": 25200 }, { "epoch": 0.10823179894043602, "grad_norm": 1.332423210144043, "learning_rate": 8.956175676724473e-05, "loss": 0.38559038639068605, "step": 25210 }, { "epoch": 0.10827473103045603, "grad_norm": 0.09285473078489304, "learning_rate": 8.95574450471271e-05, "loss": 0.39534683227539064, "step": 25220 }, { "epoch": 0.10831766312047603, "grad_norm": 0.007422926835715771, "learning_rate": 8.955313332700948e-05, "loss": 0.23534021377563477, "step": 25230 }, { "epoch": 0.10836059521049604, "grad_norm": 0.02128664217889309, "learning_rate": 8.954882160689186e-05, "loss": 0.20301442146301268, "step": 25240 }, { "epoch": 0.10840352730051604, "grad_norm": 0.013315998017787933, "learning_rate": 8.954450988677424e-05, "loss": 0.1390451669692993, "step": 25250 }, { "epoch": 0.10844645939053606, "grad_norm": 0.007063603959977627, "learning_rate": 8.954019816665661e-05, "loss": 0.05682712197303772, "step": 25260 }, { "epoch": 0.10848939148055606, "grad_norm": 0.017539670690894127, "learning_rate": 8.953588644653899e-05, "loss": 0.07583492398262023, "step": 25270 }, { "epoch": 0.10853232357057606, "grad_norm": 0.06756465137004852, "learning_rate": 8.953157472642137e-05, "loss": 0.24220926761627198, "step": 25280 }, { "epoch": 0.10857525566059607, "grad_norm": 0.029334593564271927, "learning_rate": 8.952726300630375e-05, "loss": 0.36937508583068845, "step": 25290 }, { "epoch": 0.10861818775061607, "grad_norm": 0.03658639267086983, "learning_rate": 8.952295128618612e-05, "loss": 0.13479502201080323, "step": 25300 }, { "epoch": 0.10866111984063608, "grad_norm": 0.031390100717544556, "learning_rate": 8.95186395660685e-05, "loss": 0.12219811677932739, "step": 25310 }, { "epoch": 0.10870405193065609, "grad_norm": 1.230584740638733, "learning_rate": 8.951432784595086e-05, "loss": 0.3189370632171631, "step": 25320 }, { "epoch": 0.10874698402067609, "grad_norm": 1.7964065074920654, "learning_rate": 8.951001612583324e-05, "loss": 0.1912916660308838, "step": 25330 }, { "epoch": 0.1087899161106961, "grad_norm": 0.06472434848546982, "learning_rate": 8.950570440571562e-05, "loss": 0.05857505798339844, "step": 25340 }, { "epoch": 0.1088328482007161, "grad_norm": 10.742326736450195, "learning_rate": 8.9501392685598e-05, "loss": 0.38807761669158936, "step": 25350 }, { "epoch": 0.10887578029073611, "grad_norm": 0.01679828017950058, "learning_rate": 8.949708096548037e-05, "loss": 0.20313003063201904, "step": 25360 }, { "epoch": 0.10891871238075612, "grad_norm": 1.5688421726226807, "learning_rate": 8.949276924536275e-05, "loss": 0.3708950519561768, "step": 25370 }, { "epoch": 0.10896164447077612, "grad_norm": 0.0956941694021225, "learning_rate": 8.948845752524513e-05, "loss": 0.04957548379898071, "step": 25380 }, { "epoch": 0.10900457656079614, "grad_norm": 0.015790555626153946, "learning_rate": 8.94841458051275e-05, "loss": 0.1790858745574951, "step": 25390 }, { "epoch": 0.10904750865081614, "grad_norm": 1.5737788677215576, "learning_rate": 8.947983408500987e-05, "loss": 0.15643935203552245, "step": 25400 }, { "epoch": 0.10909044074083614, "grad_norm": 3.9349513053894043, "learning_rate": 8.947552236489225e-05, "loss": 0.2875617504119873, "step": 25410 }, { "epoch": 0.10913337283085615, "grad_norm": 0.012580066919326782, "learning_rate": 8.947121064477462e-05, "loss": 0.4018566608428955, "step": 25420 }, { "epoch": 0.10917630492087615, "grad_norm": 3.0949137210845947, "learning_rate": 8.9466898924657e-05, "loss": 0.38313732147216795, "step": 25430 }, { "epoch": 0.10921923701089617, "grad_norm": 0.9923470616340637, "learning_rate": 8.946258720453938e-05, "loss": 0.26145000457763673, "step": 25440 }, { "epoch": 0.10926216910091617, "grad_norm": 0.7341137528419495, "learning_rate": 8.945827548442176e-05, "loss": 0.1047516942024231, "step": 25450 }, { "epoch": 0.10930510119093617, "grad_norm": 11.855466842651367, "learning_rate": 8.945396376430413e-05, "loss": 0.1956933856010437, "step": 25460 }, { "epoch": 0.10934803328095619, "grad_norm": 0.007624823600053787, "learning_rate": 8.944965204418651e-05, "loss": 0.2737696886062622, "step": 25470 }, { "epoch": 0.10939096537097619, "grad_norm": 0.040049996227025986, "learning_rate": 8.944534032406889e-05, "loss": 0.2754047870635986, "step": 25480 }, { "epoch": 0.1094338974609962, "grad_norm": 1.1522254943847656, "learning_rate": 8.944102860395127e-05, "loss": 0.2701746940612793, "step": 25490 }, { "epoch": 0.1094768295510162, "grad_norm": 0.12508395314216614, "learning_rate": 8.943671688383364e-05, "loss": 0.1981469988822937, "step": 25500 }, { "epoch": 0.1095197616410362, "grad_norm": 3.565103530883789, "learning_rate": 8.943240516371602e-05, "loss": 0.34433903694152834, "step": 25510 }, { "epoch": 0.10956269373105622, "grad_norm": 0.1249883770942688, "learning_rate": 8.94280934435984e-05, "loss": 0.08808074593544006, "step": 25520 }, { "epoch": 0.10960562582107622, "grad_norm": 0.03776920959353447, "learning_rate": 8.942378172348078e-05, "loss": 0.1539943814277649, "step": 25530 }, { "epoch": 0.10964855791109623, "grad_norm": 0.05006266012787819, "learning_rate": 8.941947000336315e-05, "loss": 0.1190767765045166, "step": 25540 }, { "epoch": 0.10969149000111623, "grad_norm": 0.1297246515750885, "learning_rate": 8.941515828324553e-05, "loss": 0.34765305519104006, "step": 25550 }, { "epoch": 0.10973442209113624, "grad_norm": 0.0076711843721568584, "learning_rate": 8.941084656312791e-05, "loss": 0.1391259789466858, "step": 25560 }, { "epoch": 0.10977735418115625, "grad_norm": 1.8917896747589111, "learning_rate": 8.940653484301027e-05, "loss": 0.25439977645874023, "step": 25570 }, { "epoch": 0.10982028627117625, "grad_norm": 5.253777503967285, "learning_rate": 8.940222312289265e-05, "loss": 0.19333921670913695, "step": 25580 }, { "epoch": 0.10986321836119627, "grad_norm": 0.07230553030967712, "learning_rate": 8.939791140277503e-05, "loss": 0.17542632818222045, "step": 25590 }, { "epoch": 0.10990615045121627, "grad_norm": 0.011886064894497395, "learning_rate": 8.93935996826574e-05, "loss": 0.23904857635498047, "step": 25600 }, { "epoch": 0.10994908254123627, "grad_norm": 0.002963064704090357, "learning_rate": 8.938928796253978e-05, "loss": 0.22038490772247316, "step": 25610 }, { "epoch": 0.10999201463125628, "grad_norm": 0.01600109040737152, "learning_rate": 8.938497624242216e-05, "loss": 0.1403309464454651, "step": 25620 }, { "epoch": 0.11003494672127628, "grad_norm": 1.7151718139648438, "learning_rate": 8.938066452230453e-05, "loss": 0.31475276947021485, "step": 25630 }, { "epoch": 0.1100778788112963, "grad_norm": 0.000949110253714025, "learning_rate": 8.937635280218691e-05, "loss": 0.15096465349197388, "step": 25640 }, { "epoch": 0.1101208109013163, "grad_norm": 0.16747067868709564, "learning_rate": 8.937204108206928e-05, "loss": 0.3146634101867676, "step": 25650 }, { "epoch": 0.1101637429913363, "grad_norm": 2.464406728744507, "learning_rate": 8.936772936195165e-05, "loss": 0.2665748119354248, "step": 25660 }, { "epoch": 0.11020667508135631, "grad_norm": 0.048684414476156235, "learning_rate": 8.936341764183403e-05, "loss": 0.2529994010925293, "step": 25670 }, { "epoch": 0.11024960717137632, "grad_norm": 0.18311643600463867, "learning_rate": 8.935910592171641e-05, "loss": 0.15747172832489015, "step": 25680 }, { "epoch": 0.11029253926139633, "grad_norm": 0.11484615504741669, "learning_rate": 8.935479420159879e-05, "loss": 0.5332373142242431, "step": 25690 }, { "epoch": 0.11033547135141633, "grad_norm": 0.3284884989261627, "learning_rate": 8.935048248148116e-05, "loss": 0.1697358727455139, "step": 25700 }, { "epoch": 0.11037840344143633, "grad_norm": 0.5205300450325012, "learning_rate": 8.934617076136354e-05, "loss": 0.16440855264663695, "step": 25710 }, { "epoch": 0.11042133553145635, "grad_norm": 0.04689498245716095, "learning_rate": 8.934185904124592e-05, "loss": 0.1195969820022583, "step": 25720 }, { "epoch": 0.11046426762147635, "grad_norm": 2.117689609527588, "learning_rate": 8.93375473211283e-05, "loss": 0.22179570198059081, "step": 25730 }, { "epoch": 0.11050719971149635, "grad_norm": 0.37029099464416504, "learning_rate": 8.933323560101067e-05, "loss": 0.2961350202560425, "step": 25740 }, { "epoch": 0.11055013180151636, "grad_norm": 4.060856819152832, "learning_rate": 8.932892388089305e-05, "loss": 0.4951334476470947, "step": 25750 }, { "epoch": 0.11059306389153636, "grad_norm": 0.07439135760068893, "learning_rate": 8.932461216077543e-05, "loss": 0.18491072654724122, "step": 25760 }, { "epoch": 0.11063599598155638, "grad_norm": 0.6668906211853027, "learning_rate": 8.93203004406578e-05, "loss": 0.4163065910339355, "step": 25770 }, { "epoch": 0.11067892807157638, "grad_norm": 2.0445303916931152, "learning_rate": 8.931598872054018e-05, "loss": 0.3462115049362183, "step": 25780 }, { "epoch": 0.11072186016159638, "grad_norm": 3.0600664615631104, "learning_rate": 8.931167700042256e-05, "loss": 0.30161380767822266, "step": 25790 }, { "epoch": 0.1107647922516164, "grad_norm": 0.5397235751152039, "learning_rate": 8.930736528030494e-05, "loss": 0.4184750556945801, "step": 25800 }, { "epoch": 0.1108077243416364, "grad_norm": 0.1752122938632965, "learning_rate": 8.930305356018731e-05, "loss": 0.29085919857025144, "step": 25810 }, { "epoch": 0.11085065643165641, "grad_norm": 0.0460553839802742, "learning_rate": 8.929874184006968e-05, "loss": 0.37629220485687254, "step": 25820 }, { "epoch": 0.11089358852167641, "grad_norm": 0.3117293417453766, "learning_rate": 8.929443011995205e-05, "loss": 0.15187954902648926, "step": 25830 }, { "epoch": 0.11093652061169641, "grad_norm": 1.2698419094085693, "learning_rate": 8.929011839983443e-05, "loss": 0.28465192317962645, "step": 25840 }, { "epoch": 0.11097945270171643, "grad_norm": 0.17641954123973846, "learning_rate": 8.928580667971681e-05, "loss": 0.26123223304748533, "step": 25850 }, { "epoch": 0.11102238479173643, "grad_norm": 0.019400320947170258, "learning_rate": 8.928149495959919e-05, "loss": 0.05121874809265137, "step": 25860 }, { "epoch": 0.11106531688175644, "grad_norm": 0.049805380403995514, "learning_rate": 8.927718323948156e-05, "loss": 0.14310473203659058, "step": 25870 }, { "epoch": 0.11110824897177644, "grad_norm": 9.800433158874512, "learning_rate": 8.927287151936394e-05, "loss": 0.23860435485839843, "step": 25880 }, { "epoch": 0.11115118106179644, "grad_norm": 0.00014414018369279802, "learning_rate": 8.926855979924632e-05, "loss": 0.30962748527526857, "step": 25890 }, { "epoch": 0.11119411315181646, "grad_norm": 0.004766400903463364, "learning_rate": 8.926424807912868e-05, "loss": 0.5229979038238526, "step": 25900 }, { "epoch": 0.11123704524183646, "grad_norm": 0.028215311467647552, "learning_rate": 8.925993635901106e-05, "loss": 0.18894855976104735, "step": 25910 }, { "epoch": 0.11127997733185647, "grad_norm": 2.7721102237701416, "learning_rate": 8.925562463889344e-05, "loss": 0.27259135246276855, "step": 25920 }, { "epoch": 0.11132290942187648, "grad_norm": 0.02031039260327816, "learning_rate": 8.925131291877581e-05, "loss": 0.04676889479160309, "step": 25930 }, { "epoch": 0.11136584151189648, "grad_norm": 0.2718992829322815, "learning_rate": 8.924700119865819e-05, "loss": 0.16489242315292357, "step": 25940 }, { "epoch": 0.11140877360191649, "grad_norm": 0.9515452980995178, "learning_rate": 8.924268947854057e-05, "loss": 0.3039748430252075, "step": 25950 }, { "epoch": 0.11145170569193649, "grad_norm": 1.0625194311141968, "learning_rate": 8.923837775842295e-05, "loss": 0.3415965557098389, "step": 25960 }, { "epoch": 0.11149463778195651, "grad_norm": 0.040675487369298935, "learning_rate": 8.923406603830532e-05, "loss": 0.33023149967193605, "step": 25970 }, { "epoch": 0.11153756987197651, "grad_norm": 0.011495154350996017, "learning_rate": 8.92297543181877e-05, "loss": 0.255326509475708, "step": 25980 }, { "epoch": 0.11158050196199651, "grad_norm": 3.5770716667175293, "learning_rate": 8.922544259807008e-05, "loss": 0.3099225521087646, "step": 25990 }, { "epoch": 0.11162343405201652, "grad_norm": 0.02177545428276062, "learning_rate": 8.922113087795246e-05, "loss": 0.30356450080871583, "step": 26000 }, { "epoch": 0.11162343405201652, "eval_loss": 0.49951910972595215, "eval_runtime": 27.6012, "eval_samples_per_second": 3.623, "eval_steps_per_second": 3.623, "step": 26000 }, { "epoch": 0.11166636614203652, "grad_norm": 0.01517306175082922, "learning_rate": 8.921681915783483e-05, "loss": 0.2588840961456299, "step": 26010 }, { "epoch": 0.11170929823205654, "grad_norm": 0.010166897438466549, "learning_rate": 8.921250743771721e-05, "loss": 0.10459980964660645, "step": 26020 }, { "epoch": 0.11175223032207654, "grad_norm": 0.045308589935302734, "learning_rate": 8.920819571759959e-05, "loss": 0.18060653209686278, "step": 26030 }, { "epoch": 0.11179516241209654, "grad_norm": 0.2524489462375641, "learning_rate": 8.920388399748197e-05, "loss": 0.26762235164642334, "step": 26040 }, { "epoch": 0.11183809450211656, "grad_norm": 0.039384521543979645, "learning_rate": 8.919957227736434e-05, "loss": 0.3579113483428955, "step": 26050 }, { "epoch": 0.11188102659213656, "grad_norm": 0.7673929333686829, "learning_rate": 8.91952605572467e-05, "loss": 0.2611018896102905, "step": 26060 }, { "epoch": 0.11192395868215657, "grad_norm": 0.08051066100597382, "learning_rate": 8.919094883712908e-05, "loss": 0.15652050971984863, "step": 26070 }, { "epoch": 0.11196689077217657, "grad_norm": 3.0436594486236572, "learning_rate": 8.918663711701146e-05, "loss": 0.40769338607788086, "step": 26080 }, { "epoch": 0.11200982286219657, "grad_norm": 0.5110700726509094, "learning_rate": 8.918232539689384e-05, "loss": 0.11793347597122192, "step": 26090 }, { "epoch": 0.11205275495221659, "grad_norm": 1.4811254739761353, "learning_rate": 8.917801367677622e-05, "loss": 0.2230898141860962, "step": 26100 }, { "epoch": 0.11209568704223659, "grad_norm": 0.7501670122146606, "learning_rate": 8.917370195665859e-05, "loss": 0.4135143280029297, "step": 26110 }, { "epoch": 0.1121386191322566, "grad_norm": 0.24594485759735107, "learning_rate": 8.916939023654097e-05, "loss": 0.35773112773895266, "step": 26120 }, { "epoch": 0.1121815512222766, "grad_norm": 1.2016063928604126, "learning_rate": 8.916507851642335e-05, "loss": 0.2555335521697998, "step": 26130 }, { "epoch": 0.1122244833122966, "grad_norm": 0.007154045160859823, "learning_rate": 8.916076679630571e-05, "loss": 0.114761483669281, "step": 26140 }, { "epoch": 0.11226741540231662, "grad_norm": 2.147310733795166, "learning_rate": 8.915645507618809e-05, "loss": 0.25461688041687014, "step": 26150 }, { "epoch": 0.11231034749233662, "grad_norm": 0.14692267775535583, "learning_rate": 8.915214335607047e-05, "loss": 0.18954638242721558, "step": 26160 }, { "epoch": 0.11235327958235662, "grad_norm": 0.3495458960533142, "learning_rate": 8.914783163595284e-05, "loss": 0.06459863781929016, "step": 26170 }, { "epoch": 0.11239621167237664, "grad_norm": 3.2628321647644043, "learning_rate": 8.914351991583522e-05, "loss": 0.3024146556854248, "step": 26180 }, { "epoch": 0.11243914376239664, "grad_norm": 0.09125436097383499, "learning_rate": 8.91392081957176e-05, "loss": 0.19367423057556152, "step": 26190 }, { "epoch": 0.11248207585241665, "grad_norm": 1.5896530151367188, "learning_rate": 8.913489647559999e-05, "loss": 0.09784256219863892, "step": 26200 }, { "epoch": 0.11252500794243665, "grad_norm": 0.012747851200401783, "learning_rate": 8.913058475548237e-05, "loss": 0.31458463668823244, "step": 26210 }, { "epoch": 0.11256794003245665, "grad_norm": 0.0535753071308136, "learning_rate": 8.912627303536474e-05, "loss": 0.06705414652824401, "step": 26220 }, { "epoch": 0.11261087212247667, "grad_norm": 0.0063928053714334965, "learning_rate": 8.912196131524711e-05, "loss": 0.41733684539794924, "step": 26230 }, { "epoch": 0.11265380421249667, "grad_norm": 0.65238356590271, "learning_rate": 8.911764959512949e-05, "loss": 0.24972774982452392, "step": 26240 }, { "epoch": 0.11269673630251668, "grad_norm": 0.005043783225119114, "learning_rate": 8.911333787501186e-05, "loss": 0.45450987815856936, "step": 26250 }, { "epoch": 0.11273966839253668, "grad_norm": 7.086516380310059, "learning_rate": 8.910902615489424e-05, "loss": 0.28200199604034426, "step": 26260 }, { "epoch": 0.11278260048255669, "grad_norm": 0.09140264987945557, "learning_rate": 8.910471443477662e-05, "loss": 0.12481236457824707, "step": 26270 }, { "epoch": 0.1128255325725767, "grad_norm": 2.038637161254883, "learning_rate": 8.9100402714659e-05, "loss": 0.24641501903533936, "step": 26280 }, { "epoch": 0.1128684646625967, "grad_norm": 11.497899055480957, "learning_rate": 8.909609099454137e-05, "loss": 0.3155627727508545, "step": 26290 }, { "epoch": 0.11291139675261672, "grad_norm": 0.2962181866168976, "learning_rate": 8.909177927442375e-05, "loss": 0.37747180461883545, "step": 26300 }, { "epoch": 0.11295432884263672, "grad_norm": 17.22124671936035, "learning_rate": 8.908746755430611e-05, "loss": 0.17400816679000855, "step": 26310 }, { "epoch": 0.11299726093265672, "grad_norm": 0.027156352996826172, "learning_rate": 8.908315583418849e-05, "loss": 0.328963041305542, "step": 26320 }, { "epoch": 0.11304019302267673, "grad_norm": 0.03348701819777489, "learning_rate": 8.907884411407087e-05, "loss": 0.2583893060684204, "step": 26330 }, { "epoch": 0.11308312511269673, "grad_norm": 0.03124728426337242, "learning_rate": 8.907453239395324e-05, "loss": 0.22128143310546874, "step": 26340 }, { "epoch": 0.11312605720271675, "grad_norm": 0.19246523082256317, "learning_rate": 8.907022067383562e-05, "loss": 0.23182182312011718, "step": 26350 }, { "epoch": 0.11316898929273675, "grad_norm": 0.5085291862487793, "learning_rate": 8.9065908953718e-05, "loss": 0.33657450675964357, "step": 26360 }, { "epoch": 0.11321192138275675, "grad_norm": 0.16103234887123108, "learning_rate": 8.906159723360038e-05, "loss": 0.2063810110092163, "step": 26370 }, { "epoch": 0.11325485347277676, "grad_norm": 0.04276309162378311, "learning_rate": 8.905728551348275e-05, "loss": 0.4347810745239258, "step": 26380 }, { "epoch": 0.11329778556279677, "grad_norm": 0.027480829507112503, "learning_rate": 8.905297379336512e-05, "loss": 0.3524182796478271, "step": 26390 }, { "epoch": 0.11334071765281678, "grad_norm": 1.1285064220428467, "learning_rate": 8.90486620732475e-05, "loss": 0.43365187644958497, "step": 26400 }, { "epoch": 0.11338364974283678, "grad_norm": 0.07447858154773712, "learning_rate": 8.904435035312987e-05, "loss": 0.2533705711364746, "step": 26410 }, { "epoch": 0.11342658183285678, "grad_norm": 0.6848863363265991, "learning_rate": 8.904003863301226e-05, "loss": 0.1212932825088501, "step": 26420 }, { "epoch": 0.1134695139228768, "grad_norm": 0.0208682119846344, "learning_rate": 8.903572691289464e-05, "loss": 0.3657447576522827, "step": 26430 }, { "epoch": 0.1135124460128968, "grad_norm": 0.03788859024643898, "learning_rate": 8.903141519277702e-05, "loss": 0.29512145519256594, "step": 26440 }, { "epoch": 0.11355537810291681, "grad_norm": 0.36164799332618713, "learning_rate": 8.90271034726594e-05, "loss": 0.42470488548278806, "step": 26450 }, { "epoch": 0.11359831019293681, "grad_norm": 3.108694314956665, "learning_rate": 8.902279175254177e-05, "loss": 0.23271114826202394, "step": 26460 }, { "epoch": 0.11364124228295681, "grad_norm": 0.007275083102285862, "learning_rate": 8.901848003242414e-05, "loss": 0.1772995948791504, "step": 26470 }, { "epoch": 0.11368417437297683, "grad_norm": 0.7524713277816772, "learning_rate": 8.901416831230651e-05, "loss": 0.0993366301059723, "step": 26480 }, { "epoch": 0.11372710646299683, "grad_norm": 0.0062967403791844845, "learning_rate": 8.900985659218889e-05, "loss": 0.2973215103149414, "step": 26490 }, { "epoch": 0.11377003855301684, "grad_norm": 0.2719433605670929, "learning_rate": 8.900554487207127e-05, "loss": 0.31873762607574463, "step": 26500 }, { "epoch": 0.11381297064303685, "grad_norm": 0.02310267835855484, "learning_rate": 8.900123315195365e-05, "loss": 0.21906378269195556, "step": 26510 }, { "epoch": 0.11385590273305685, "grad_norm": 0.041628237813711166, "learning_rate": 8.899692143183602e-05, "loss": 0.1306293249130249, "step": 26520 }, { "epoch": 0.11389883482307686, "grad_norm": 0.03699645772576332, "learning_rate": 8.89926097117184e-05, "loss": 0.18234388828277587, "step": 26530 }, { "epoch": 0.11394176691309686, "grad_norm": 0.005653525702655315, "learning_rate": 8.898829799160078e-05, "loss": 0.30573740005493166, "step": 26540 }, { "epoch": 0.11398469900311688, "grad_norm": 0.0038088662549853325, "learning_rate": 8.898398627148316e-05, "loss": 0.14387012720108033, "step": 26550 }, { "epoch": 0.11402763109313688, "grad_norm": 1.9470224380493164, "learning_rate": 8.897967455136552e-05, "loss": 0.20126612186431886, "step": 26560 }, { "epoch": 0.11407056318315688, "grad_norm": 0.16124966740608215, "learning_rate": 8.89753628312479e-05, "loss": 0.1542932868003845, "step": 26570 }, { "epoch": 0.1141134952731769, "grad_norm": 0.011234652251005173, "learning_rate": 8.897105111113027e-05, "loss": 0.1024243950843811, "step": 26580 }, { "epoch": 0.1141564273631969, "grad_norm": 0.050764694809913635, "learning_rate": 8.896673939101265e-05, "loss": 0.21154391765594482, "step": 26590 }, { "epoch": 0.1141993594532169, "grad_norm": 0.7355092763900757, "learning_rate": 8.896242767089503e-05, "loss": 0.48318023681640626, "step": 26600 }, { "epoch": 0.11424229154323691, "grad_norm": 0.0014123255386948586, "learning_rate": 8.89581159507774e-05, "loss": 0.24305782318115235, "step": 26610 }, { "epoch": 0.11428522363325691, "grad_norm": 0.0012762444093823433, "learning_rate": 8.895380423065978e-05, "loss": 0.24177062511444092, "step": 26620 }, { "epoch": 0.11432815572327693, "grad_norm": 2.227457046508789, "learning_rate": 8.894949251054216e-05, "loss": 0.21717729568481445, "step": 26630 }, { "epoch": 0.11437108781329693, "grad_norm": 0.16315631568431854, "learning_rate": 8.894518079042454e-05, "loss": 0.08009040355682373, "step": 26640 }, { "epoch": 0.11441401990331693, "grad_norm": 0.04312557354569435, "learning_rate": 8.894086907030692e-05, "loss": 0.3106640338897705, "step": 26650 }, { "epoch": 0.11445695199333694, "grad_norm": 1.9198765754699707, "learning_rate": 8.893655735018929e-05, "loss": 0.4228626251220703, "step": 26660 }, { "epoch": 0.11449988408335694, "grad_norm": 0.18937277793884277, "learning_rate": 8.893224563007167e-05, "loss": 0.45505146980285643, "step": 26670 }, { "epoch": 0.11454281617337696, "grad_norm": 1.0401482582092285, "learning_rate": 8.892793390995405e-05, "loss": 0.3090991020202637, "step": 26680 }, { "epoch": 0.11458574826339696, "grad_norm": 0.7521312832832336, "learning_rate": 8.892362218983642e-05, "loss": 0.47649755477905276, "step": 26690 }, { "epoch": 0.11462868035341696, "grad_norm": 0.9687482118606567, "learning_rate": 8.89193104697188e-05, "loss": 0.2554279088973999, "step": 26700 }, { "epoch": 0.11467161244343697, "grad_norm": 0.9771291613578796, "learning_rate": 8.891499874960118e-05, "loss": 0.23694472312927245, "step": 26710 }, { "epoch": 0.11471454453345697, "grad_norm": 4.835459232330322, "learning_rate": 8.891068702948354e-05, "loss": 0.17550307512283325, "step": 26720 }, { "epoch": 0.11475747662347699, "grad_norm": 0.5119221806526184, "learning_rate": 8.890637530936592e-05, "loss": 0.30478582382202146, "step": 26730 }, { "epoch": 0.11480040871349699, "grad_norm": 0.17026209831237793, "learning_rate": 8.89020635892483e-05, "loss": 0.1443108320236206, "step": 26740 }, { "epoch": 0.11484334080351699, "grad_norm": 2.911370277404785, "learning_rate": 8.889775186913068e-05, "loss": 0.3410804748535156, "step": 26750 }, { "epoch": 0.114886272893537, "grad_norm": 1.0684778690338135, "learning_rate": 8.889344014901305e-05, "loss": 0.22259902954101562, "step": 26760 }, { "epoch": 0.114929204983557, "grad_norm": 0.11497758328914642, "learning_rate": 8.888912842889543e-05, "loss": 0.24176509380340577, "step": 26770 }, { "epoch": 0.11497213707357702, "grad_norm": 1.0878132581710815, "learning_rate": 8.888481670877781e-05, "loss": 0.08091990947723389, "step": 26780 }, { "epoch": 0.11501506916359702, "grad_norm": 0.0038782337214797735, "learning_rate": 8.888050498866018e-05, "loss": 0.34708499908447266, "step": 26790 }, { "epoch": 0.11505800125361702, "grad_norm": 2.4943995475769043, "learning_rate": 8.887619326854255e-05, "loss": 0.2911946773529053, "step": 26800 }, { "epoch": 0.11510093334363704, "grad_norm": 0.0024789159651845694, "learning_rate": 8.887188154842493e-05, "loss": 0.14854669570922852, "step": 26810 }, { "epoch": 0.11514386543365704, "grad_norm": 0.18486323952674866, "learning_rate": 8.88675698283073e-05, "loss": 0.3237830638885498, "step": 26820 }, { "epoch": 0.11518679752367705, "grad_norm": 0.02643188089132309, "learning_rate": 8.886325810818968e-05, "loss": 0.1400722861289978, "step": 26830 }, { "epoch": 0.11522972961369705, "grad_norm": 2.9142510890960693, "learning_rate": 8.885894638807206e-05, "loss": 0.20289158821105957, "step": 26840 }, { "epoch": 0.11527266170371706, "grad_norm": 6.909871578216553, "learning_rate": 8.885463466795444e-05, "loss": 0.4391770839691162, "step": 26850 }, { "epoch": 0.11531559379373707, "grad_norm": 0.09212353825569153, "learning_rate": 8.885032294783681e-05, "loss": 0.3045357704162598, "step": 26860 }, { "epoch": 0.11535852588375707, "grad_norm": 0.007540345191955566, "learning_rate": 8.884601122771919e-05, "loss": 0.09747713208198547, "step": 26870 }, { "epoch": 0.11540145797377709, "grad_norm": 0.10550621896982193, "learning_rate": 8.884169950760157e-05, "loss": 0.10741302967071534, "step": 26880 }, { "epoch": 0.11544439006379709, "grad_norm": 1.4358404874801636, "learning_rate": 8.883738778748394e-05, "loss": 0.395121693611145, "step": 26890 }, { "epoch": 0.11548732215381709, "grad_norm": 0.007923940196633339, "learning_rate": 8.883307606736632e-05, "loss": 0.3058964252471924, "step": 26900 }, { "epoch": 0.1155302542438371, "grad_norm": 1.1491098403930664, "learning_rate": 8.88287643472487e-05, "loss": 0.3279739856719971, "step": 26910 }, { "epoch": 0.1155731863338571, "grad_norm": 0.1358955353498459, "learning_rate": 8.882445262713108e-05, "loss": 0.1154222846031189, "step": 26920 }, { "epoch": 0.11561611842387712, "grad_norm": 0.6412147283554077, "learning_rate": 8.882014090701345e-05, "loss": 0.14349900484085082, "step": 26930 }, { "epoch": 0.11565905051389712, "grad_norm": 0.020047593861818314, "learning_rate": 8.881582918689583e-05, "loss": 0.24264280796051024, "step": 26940 }, { "epoch": 0.11570198260391712, "grad_norm": 0.9909156560897827, "learning_rate": 8.881151746677821e-05, "loss": 0.5208069801330566, "step": 26950 }, { "epoch": 0.11574491469393713, "grad_norm": 0.19676139950752258, "learning_rate": 8.880720574666059e-05, "loss": 0.37195398807525637, "step": 26960 }, { "epoch": 0.11578784678395714, "grad_norm": 0.03384987264871597, "learning_rate": 8.880289402654295e-05, "loss": 0.3475580453872681, "step": 26970 }, { "epoch": 0.11583077887397715, "grad_norm": 0.018173309043049812, "learning_rate": 8.879858230642533e-05, "loss": 0.21183347702026367, "step": 26980 }, { "epoch": 0.11587371096399715, "grad_norm": 0.25634244084358215, "learning_rate": 8.87942705863077e-05, "loss": 0.2051846981048584, "step": 26990 }, { "epoch": 0.11591664305401715, "grad_norm": 1.4937998056411743, "learning_rate": 8.878995886619008e-05, "loss": 0.1931118369102478, "step": 27000 }, { "epoch": 0.11591664305401715, "eval_loss": 0.4814135432243347, "eval_runtime": 27.4898, "eval_samples_per_second": 3.638, "eval_steps_per_second": 3.638, "step": 27000 }, { "epoch": 0.11595957514403717, "grad_norm": 0.032575830817222595, "learning_rate": 8.878564714607246e-05, "loss": 0.07166704535484314, "step": 27010 }, { "epoch": 0.11600250723405717, "grad_norm": 0.08005379140377045, "learning_rate": 8.878133542595484e-05, "loss": 0.339334774017334, "step": 27020 }, { "epoch": 0.11604543932407717, "grad_norm": 4.995341777801514, "learning_rate": 8.877702370583721e-05, "loss": 0.2974552154541016, "step": 27030 }, { "epoch": 0.11608837141409718, "grad_norm": 0.15797531604766846, "learning_rate": 8.877271198571959e-05, "loss": 0.30518279075622556, "step": 27040 }, { "epoch": 0.11613130350411718, "grad_norm": 0.037030257284641266, "learning_rate": 8.876840026560195e-05, "loss": 0.26148602962493894, "step": 27050 }, { "epoch": 0.1161742355941372, "grad_norm": 1.2287954092025757, "learning_rate": 8.876408854548433e-05, "loss": 0.2081602096557617, "step": 27060 }, { "epoch": 0.1162171676841572, "grad_norm": 0.4174017906188965, "learning_rate": 8.875977682536671e-05, "loss": 0.26671810150146485, "step": 27070 }, { "epoch": 0.1162600997741772, "grad_norm": 0.16252487897872925, "learning_rate": 8.875546510524909e-05, "loss": 0.1370398998260498, "step": 27080 }, { "epoch": 0.11630303186419721, "grad_norm": 2.9258928298950195, "learning_rate": 8.875115338513146e-05, "loss": 0.4124359130859375, "step": 27090 }, { "epoch": 0.11634596395421722, "grad_norm": 0.22780385613441467, "learning_rate": 8.874684166501384e-05, "loss": 0.3003973960876465, "step": 27100 }, { "epoch": 0.11638889604423723, "grad_norm": 1.1312873363494873, "learning_rate": 8.874252994489622e-05, "loss": 0.39095630645751955, "step": 27110 }, { "epoch": 0.11643182813425723, "grad_norm": 0.01758890599012375, "learning_rate": 8.87382182247786e-05, "loss": 0.14932353496551515, "step": 27120 }, { "epoch": 0.11647476022427723, "grad_norm": 0.05419120937585831, "learning_rate": 8.873390650466097e-05, "loss": 0.1397382140159607, "step": 27130 }, { "epoch": 0.11651769231429725, "grad_norm": 2.2360360622406006, "learning_rate": 8.872959478454335e-05, "loss": 0.336977481842041, "step": 27140 }, { "epoch": 0.11656062440431725, "grad_norm": 0.12993699312210083, "learning_rate": 8.872528306442573e-05, "loss": 0.25680654048919677, "step": 27150 }, { "epoch": 0.11660355649433726, "grad_norm": 3.370098829269409, "learning_rate": 8.87209713443081e-05, "loss": 0.20383124351501464, "step": 27160 }, { "epoch": 0.11664648858435726, "grad_norm": 0.07472426444292068, "learning_rate": 8.871665962419048e-05, "loss": 0.1689348340034485, "step": 27170 }, { "epoch": 0.11668942067437726, "grad_norm": 1.1328237056732178, "learning_rate": 8.871234790407286e-05, "loss": 0.3129899501800537, "step": 27180 }, { "epoch": 0.11673235276439728, "grad_norm": 1.5202020406723022, "learning_rate": 8.870803618395524e-05, "loss": 0.31610372066497805, "step": 27190 }, { "epoch": 0.11677528485441728, "grad_norm": 0.8442431092262268, "learning_rate": 8.870372446383762e-05, "loss": 0.18818488121032714, "step": 27200 }, { "epoch": 0.1168182169444373, "grad_norm": 1.796455979347229, "learning_rate": 8.869941274371998e-05, "loss": 0.26075315475463867, "step": 27210 }, { "epoch": 0.1168611490344573, "grad_norm": 4.038948059082031, "learning_rate": 8.869510102360236e-05, "loss": 0.4824398040771484, "step": 27220 }, { "epoch": 0.1169040811244773, "grad_norm": 1.9637843370437622, "learning_rate": 8.869078930348473e-05, "loss": 0.20625545978546142, "step": 27230 }, { "epoch": 0.11694701321449731, "grad_norm": 0.13020427525043488, "learning_rate": 8.868647758336711e-05, "loss": 0.03135204017162323, "step": 27240 }, { "epoch": 0.11698994530451731, "grad_norm": 1.4142533540725708, "learning_rate": 8.868216586324949e-05, "loss": 0.21837081909179687, "step": 27250 }, { "epoch": 0.11703287739453733, "grad_norm": 3.5253872871398926, "learning_rate": 8.867785414313187e-05, "loss": 0.20597407817840577, "step": 27260 }, { "epoch": 0.11707580948455733, "grad_norm": 0.007855327799916267, "learning_rate": 8.867354242301424e-05, "loss": 0.35103812217712405, "step": 27270 }, { "epoch": 0.11711874157457733, "grad_norm": 1.1489592790603638, "learning_rate": 8.866923070289662e-05, "loss": 0.2486743450164795, "step": 27280 }, { "epoch": 0.11716167366459734, "grad_norm": 0.3575959801673889, "learning_rate": 8.8664918982779e-05, "loss": 0.24932396411895752, "step": 27290 }, { "epoch": 0.11720460575461734, "grad_norm": 2.510430335998535, "learning_rate": 8.866060726266136e-05, "loss": 0.2862740755081177, "step": 27300 }, { "epoch": 0.11724753784463736, "grad_norm": 0.14760471880435944, "learning_rate": 8.865629554254374e-05, "loss": 0.1569320559501648, "step": 27310 }, { "epoch": 0.11729046993465736, "grad_norm": 2.2253646850585938, "learning_rate": 8.865198382242612e-05, "loss": 0.18345837593078612, "step": 27320 }, { "epoch": 0.11733340202467736, "grad_norm": 0.17462894320487976, "learning_rate": 8.86476721023085e-05, "loss": 0.059002459049224854, "step": 27330 }, { "epoch": 0.11737633411469738, "grad_norm": 1.675255298614502, "learning_rate": 8.864336038219087e-05, "loss": 0.30129642486572267, "step": 27340 }, { "epoch": 0.11741926620471738, "grad_norm": 0.016447896137833595, "learning_rate": 8.863904866207325e-05, "loss": 0.2761634111404419, "step": 27350 }, { "epoch": 0.11746219829473739, "grad_norm": 0.5405940413475037, "learning_rate": 8.863473694195563e-05, "loss": 0.3909731388092041, "step": 27360 }, { "epoch": 0.11750513038475739, "grad_norm": 0.0973791852593422, "learning_rate": 8.8630425221838e-05, "loss": 0.2007131338119507, "step": 27370 }, { "epoch": 0.11754806247477739, "grad_norm": 0.040704648941755295, "learning_rate": 8.862611350172038e-05, "loss": 0.24662020206451415, "step": 27380 }, { "epoch": 0.11759099456479741, "grad_norm": 2.4736199378967285, "learning_rate": 8.862180178160276e-05, "loss": 0.3135467290878296, "step": 27390 }, { "epoch": 0.11763392665481741, "grad_norm": 0.06006622686982155, "learning_rate": 8.861749006148513e-05, "loss": 0.21347477436065673, "step": 27400 }, { "epoch": 0.11767685874483741, "grad_norm": 2.3658194541931152, "learning_rate": 8.861317834136751e-05, "loss": 0.387444281578064, "step": 27410 }, { "epoch": 0.11771979083485742, "grad_norm": 0.0878182202577591, "learning_rate": 8.860886662124989e-05, "loss": 0.26784508228302, "step": 27420 }, { "epoch": 0.11776272292487742, "grad_norm": 1.5703392028808594, "learning_rate": 8.860455490113227e-05, "loss": 0.242673921585083, "step": 27430 }, { "epoch": 0.11780565501489744, "grad_norm": 0.012026949785649776, "learning_rate": 8.860024318101464e-05, "loss": 0.2383371353149414, "step": 27440 }, { "epoch": 0.11784858710491744, "grad_norm": 3.061283826828003, "learning_rate": 8.859593146089702e-05, "loss": 0.28369903564453125, "step": 27450 }, { "epoch": 0.11789151919493744, "grad_norm": 0.5101853609085083, "learning_rate": 8.859161974077939e-05, "loss": 0.07000910639762878, "step": 27460 }, { "epoch": 0.11793445128495746, "grad_norm": 0.011193258687853813, "learning_rate": 8.858730802066176e-05, "loss": 0.11365052461624145, "step": 27470 }, { "epoch": 0.11797738337497746, "grad_norm": 0.10916262120008469, "learning_rate": 8.858299630054414e-05, "loss": 0.1990136981010437, "step": 27480 }, { "epoch": 0.11802031546499747, "grad_norm": 1.9866777658462524, "learning_rate": 8.857868458042652e-05, "loss": 0.297538685798645, "step": 27490 }, { "epoch": 0.11806324755501747, "grad_norm": 0.7264887690544128, "learning_rate": 8.85743728603089e-05, "loss": 0.25992865562438966, "step": 27500 }, { "epoch": 0.11810617964503747, "grad_norm": 1.078795313835144, "learning_rate": 8.857006114019127e-05, "loss": 0.2584535598754883, "step": 27510 }, { "epoch": 0.11814911173505749, "grad_norm": 0.010630804114043713, "learning_rate": 8.856574942007365e-05, "loss": 0.3359013795852661, "step": 27520 }, { "epoch": 0.11819204382507749, "grad_norm": 0.37588950991630554, "learning_rate": 8.856143769995603e-05, "loss": 0.18968162536621094, "step": 27530 }, { "epoch": 0.1182349759150975, "grad_norm": 1.0637744665145874, "learning_rate": 8.855712597983839e-05, "loss": 0.1399350643157959, "step": 27540 }, { "epoch": 0.1182779080051175, "grad_norm": 3.714017629623413, "learning_rate": 8.855281425972077e-05, "loss": 0.17076128721237183, "step": 27550 }, { "epoch": 0.1183208400951375, "grad_norm": 1.026892066001892, "learning_rate": 8.854850253960315e-05, "loss": 0.3097742795944214, "step": 27560 }, { "epoch": 0.11836377218515752, "grad_norm": 0.08490348607301712, "learning_rate": 8.854419081948552e-05, "loss": 0.45609478950500487, "step": 27570 }, { "epoch": 0.11840670427517752, "grad_norm": 0.7944920063018799, "learning_rate": 8.85398790993679e-05, "loss": 0.2455613613128662, "step": 27580 }, { "epoch": 0.11844963636519754, "grad_norm": 0.11953561753034592, "learning_rate": 8.853556737925028e-05, "loss": 0.13984798192977904, "step": 27590 }, { "epoch": 0.11849256845521754, "grad_norm": 2.450636863708496, "learning_rate": 8.853125565913267e-05, "loss": 0.2631438493728638, "step": 27600 }, { "epoch": 0.11853550054523754, "grad_norm": 1.842597246170044, "learning_rate": 8.852694393901505e-05, "loss": 0.18394358158111573, "step": 27610 }, { "epoch": 0.11857843263525755, "grad_norm": 5.348299980163574, "learning_rate": 8.852263221889742e-05, "loss": 0.10762025117874145, "step": 27620 }, { "epoch": 0.11862136472527755, "grad_norm": 1.779021978378296, "learning_rate": 8.851832049877979e-05, "loss": 0.1610881805419922, "step": 27630 }, { "epoch": 0.11866429681529757, "grad_norm": 1.3260366916656494, "learning_rate": 8.851400877866216e-05, "loss": 0.3329970359802246, "step": 27640 }, { "epoch": 0.11870722890531757, "grad_norm": 0.0022421982139348984, "learning_rate": 8.850969705854454e-05, "loss": 0.2720233678817749, "step": 27650 }, { "epoch": 0.11875016099533757, "grad_norm": 26.460460662841797, "learning_rate": 8.850538533842692e-05, "loss": 0.4188239574432373, "step": 27660 }, { "epoch": 0.11879309308535758, "grad_norm": 0.045316264033317566, "learning_rate": 8.85010736183093e-05, "loss": 0.324761962890625, "step": 27670 }, { "epoch": 0.11883602517537759, "grad_norm": 3.0380566120147705, "learning_rate": 8.849676189819167e-05, "loss": 0.22859654426574708, "step": 27680 }, { "epoch": 0.1188789572653976, "grad_norm": 0.09295903891324997, "learning_rate": 8.849245017807405e-05, "loss": 0.15038024187088012, "step": 27690 }, { "epoch": 0.1189218893554176, "grad_norm": 0.0038858470506966114, "learning_rate": 8.848813845795643e-05, "loss": 0.22608392238616942, "step": 27700 }, { "epoch": 0.1189648214454376, "grad_norm": 0.29266226291656494, "learning_rate": 8.848382673783879e-05, "loss": 0.24526865482330323, "step": 27710 }, { "epoch": 0.11900775353545762, "grad_norm": 1.2969417572021484, "learning_rate": 8.847951501772117e-05, "loss": 0.17613345384597778, "step": 27720 }, { "epoch": 0.11905068562547762, "grad_norm": 0.27322718501091003, "learning_rate": 8.847520329760355e-05, "loss": 0.29839363098144533, "step": 27730 }, { "epoch": 0.11909361771549763, "grad_norm": 0.1163104996085167, "learning_rate": 8.847089157748592e-05, "loss": 0.32402098178863525, "step": 27740 }, { "epoch": 0.11913654980551763, "grad_norm": 0.5185718536376953, "learning_rate": 8.84665798573683e-05, "loss": 0.2267787218093872, "step": 27750 }, { "epoch": 0.11917948189553763, "grad_norm": 0.86121666431427, "learning_rate": 8.846226813725068e-05, "loss": 0.3904379606246948, "step": 27760 }, { "epoch": 0.11922241398555765, "grad_norm": 4.6156721115112305, "learning_rate": 8.845795641713306e-05, "loss": 0.3092981815338135, "step": 27770 }, { "epoch": 0.11926534607557765, "grad_norm": 0.036813993006944656, "learning_rate": 8.845364469701543e-05, "loss": 0.23008527755737304, "step": 27780 }, { "epoch": 0.11930827816559766, "grad_norm": 0.06015315651893616, "learning_rate": 8.84493329768978e-05, "loss": 0.2318122148513794, "step": 27790 }, { "epoch": 0.11935121025561767, "grad_norm": 0.4399387240409851, "learning_rate": 8.844502125678017e-05, "loss": 0.3942227840423584, "step": 27800 }, { "epoch": 0.11939414234563767, "grad_norm": 5.1140313148498535, "learning_rate": 8.844070953666255e-05, "loss": 0.2428600788116455, "step": 27810 }, { "epoch": 0.11943707443565768, "grad_norm": 7.504186153411865, "learning_rate": 8.843639781654494e-05, "loss": 0.34903745651245116, "step": 27820 }, { "epoch": 0.11948000652567768, "grad_norm": 1.5398544073104858, "learning_rate": 8.843208609642732e-05, "loss": 0.38591148853302004, "step": 27830 }, { "epoch": 0.11952293861569768, "grad_norm": 0.020581362769007683, "learning_rate": 8.84277743763097e-05, "loss": 0.3888427972793579, "step": 27840 }, { "epoch": 0.1195658707057177, "grad_norm": 2.014479637145996, "learning_rate": 8.842346265619207e-05, "loss": 0.35665121078491213, "step": 27850 }, { "epoch": 0.1196088027957377, "grad_norm": 0.05373619124293327, "learning_rate": 8.841915093607445e-05, "loss": 0.06771031022071838, "step": 27860 }, { "epoch": 0.11965173488575771, "grad_norm": 0.1480589210987091, "learning_rate": 8.841483921595682e-05, "loss": 0.2560983896255493, "step": 27870 }, { "epoch": 0.11969466697577771, "grad_norm": 0.05488418787717819, "learning_rate": 8.841052749583919e-05, "loss": 0.18749144077301025, "step": 27880 }, { "epoch": 0.11973759906579771, "grad_norm": 0.7366297841072083, "learning_rate": 8.840621577572157e-05, "loss": 0.17107198238372803, "step": 27890 }, { "epoch": 0.11978053115581773, "grad_norm": 0.04739157855510712, "learning_rate": 8.840190405560395e-05, "loss": 0.22733573913574218, "step": 27900 }, { "epoch": 0.11982346324583773, "grad_norm": 0.009907875210046768, "learning_rate": 8.839759233548633e-05, "loss": 0.07253676056861877, "step": 27910 }, { "epoch": 0.11986639533585774, "grad_norm": 0.9760177135467529, "learning_rate": 8.83932806153687e-05, "loss": 0.208707594871521, "step": 27920 }, { "epoch": 0.11990932742587775, "grad_norm": 4.618872165679932, "learning_rate": 8.838896889525108e-05, "loss": 0.2124265670776367, "step": 27930 }, { "epoch": 0.11995225951589775, "grad_norm": 0.017419319599866867, "learning_rate": 8.838465717513346e-05, "loss": 0.23728528022766113, "step": 27940 }, { "epoch": 0.11999519160591776, "grad_norm": 0.03731221705675125, "learning_rate": 8.838034545501582e-05, "loss": 0.12621110677719116, "step": 27950 }, { "epoch": 0.12003812369593776, "grad_norm": 0.08100777119398117, "learning_rate": 8.83760337348982e-05, "loss": 0.0045126181095838545, "step": 27960 }, { "epoch": 0.12008105578595778, "grad_norm": 0.018143413588404655, "learning_rate": 8.837172201478058e-05, "loss": 0.40405783653259275, "step": 27970 }, { "epoch": 0.12012398787597778, "grad_norm": 2.4014012813568115, "learning_rate": 8.836741029466295e-05, "loss": 0.15833113193511963, "step": 27980 }, { "epoch": 0.12016691996599778, "grad_norm": 0.5604978799819946, "learning_rate": 8.836309857454533e-05, "loss": 0.3327100992202759, "step": 27990 }, { "epoch": 0.1202098520560178, "grad_norm": 1.6774513721466064, "learning_rate": 8.835878685442771e-05, "loss": 0.26010899543762206, "step": 28000 }, { "epoch": 0.1202098520560178, "eval_loss": 0.47686854004859924, "eval_runtime": 27.4364, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 28000 }, { "epoch": 0.1202527841460378, "grad_norm": 0.04927496239542961, "learning_rate": 8.835447513431008e-05, "loss": 0.194127357006073, "step": 28010 }, { "epoch": 0.12029571623605781, "grad_norm": 0.009340300224721432, "learning_rate": 8.835016341419246e-05, "loss": 0.3162141084671021, "step": 28020 }, { "epoch": 0.12033864832607781, "grad_norm": 0.0497310571372509, "learning_rate": 8.834585169407484e-05, "loss": 0.3715135812759399, "step": 28030 }, { "epoch": 0.12038158041609781, "grad_norm": 0.6487219929695129, "learning_rate": 8.834153997395722e-05, "loss": 0.22716693878173827, "step": 28040 }, { "epoch": 0.12042451250611783, "grad_norm": 1.9130522012710571, "learning_rate": 8.83372282538396e-05, "loss": 0.3316776275634766, "step": 28050 }, { "epoch": 0.12046744459613783, "grad_norm": 1.4078731536865234, "learning_rate": 8.833291653372197e-05, "loss": 0.25235857963562014, "step": 28060 }, { "epoch": 0.12051037668615784, "grad_norm": 0.01590568758547306, "learning_rate": 8.832860481360435e-05, "loss": 0.05722926259040832, "step": 28070 }, { "epoch": 0.12055330877617784, "grad_norm": 0.12422087043523788, "learning_rate": 8.832429309348673e-05, "loss": 0.4248668670654297, "step": 28080 }, { "epoch": 0.12059624086619784, "grad_norm": 0.1295778602361679, "learning_rate": 8.83199813733691e-05, "loss": 0.2959657430648804, "step": 28090 }, { "epoch": 0.12063917295621786, "grad_norm": 20.151147842407227, "learning_rate": 8.831566965325148e-05, "loss": 0.25739731788635256, "step": 28100 }, { "epoch": 0.12068210504623786, "grad_norm": 0.04763505980372429, "learning_rate": 8.831135793313386e-05, "loss": 0.13446078300476075, "step": 28110 }, { "epoch": 0.12072503713625787, "grad_norm": 0.009243443608283997, "learning_rate": 8.830704621301622e-05, "loss": 0.2809897899627686, "step": 28120 }, { "epoch": 0.12076796922627787, "grad_norm": 0.1913996934890747, "learning_rate": 8.83027344928986e-05, "loss": 0.2904137372970581, "step": 28130 }, { "epoch": 0.12081090131629788, "grad_norm": 0.2755334675312042, "learning_rate": 8.829842277278098e-05, "loss": 0.23877317905426027, "step": 28140 }, { "epoch": 0.12085383340631789, "grad_norm": 0.14645986258983612, "learning_rate": 8.829411105266335e-05, "loss": 0.20860531330108642, "step": 28150 }, { "epoch": 0.12089676549633789, "grad_norm": 0.25226902961730957, "learning_rate": 8.828979933254573e-05, "loss": 0.03910198211669922, "step": 28160 }, { "epoch": 0.1209396975863579, "grad_norm": 0.4876665771007538, "learning_rate": 8.828548761242811e-05, "loss": 0.18497053384780884, "step": 28170 }, { "epoch": 0.1209826296763779, "grad_norm": 0.08259847015142441, "learning_rate": 8.828117589231049e-05, "loss": 0.07651437520980835, "step": 28180 }, { "epoch": 0.12102556176639791, "grad_norm": 1.885874629020691, "learning_rate": 8.827686417219286e-05, "loss": 0.2606934070587158, "step": 28190 }, { "epoch": 0.12106849385641792, "grad_norm": 3.6061160564422607, "learning_rate": 8.827255245207523e-05, "loss": 0.32321016788482665, "step": 28200 }, { "epoch": 0.12111142594643792, "grad_norm": 0.004731371533125639, "learning_rate": 8.82682407319576e-05, "loss": 0.1959924578666687, "step": 28210 }, { "epoch": 0.12115435803645794, "grad_norm": 0.1178203746676445, "learning_rate": 8.826392901183998e-05, "loss": 0.19997456073760986, "step": 28220 }, { "epoch": 0.12119729012647794, "grad_norm": 1.247888445854187, "learning_rate": 8.825961729172236e-05, "loss": 0.30602240562438965, "step": 28230 }, { "epoch": 0.12124022221649794, "grad_norm": 0.704877495765686, "learning_rate": 8.825530557160474e-05, "loss": 0.13399279117584229, "step": 28240 }, { "epoch": 0.12128315430651795, "grad_norm": 1.3607152700424194, "learning_rate": 8.825099385148711e-05, "loss": 0.20442640781402588, "step": 28250 }, { "epoch": 0.12132608639653795, "grad_norm": 3.9458365440368652, "learning_rate": 8.824668213136949e-05, "loss": 0.2593385219573975, "step": 28260 }, { "epoch": 0.12136901848655796, "grad_norm": 5.625955581665039, "learning_rate": 8.824237041125187e-05, "loss": 0.43620920181274414, "step": 28270 }, { "epoch": 0.12141195057657797, "grad_norm": 2.36214280128479, "learning_rate": 8.823805869113425e-05, "loss": 0.6317470073699951, "step": 28280 }, { "epoch": 0.12145488266659797, "grad_norm": 0.24611838161945343, "learning_rate": 8.823374697101662e-05, "loss": 0.29659457206726075, "step": 28290 }, { "epoch": 0.12149781475661799, "grad_norm": 0.3825310170650482, "learning_rate": 8.8229435250899e-05, "loss": 0.3007538318634033, "step": 28300 }, { "epoch": 0.12154074684663799, "grad_norm": 1.0893738269805908, "learning_rate": 8.822512353078138e-05, "loss": 0.09316685199737548, "step": 28310 }, { "epoch": 0.12158367893665799, "grad_norm": 1.2878329753875732, "learning_rate": 8.822081181066376e-05, "loss": 0.2848006248474121, "step": 28320 }, { "epoch": 0.121626611026678, "grad_norm": 1.5610382556915283, "learning_rate": 8.821650009054613e-05, "loss": 0.18019193410873413, "step": 28330 }, { "epoch": 0.121669543116698, "grad_norm": 0.055733054876327515, "learning_rate": 8.821218837042851e-05, "loss": 0.21548593044281006, "step": 28340 }, { "epoch": 0.12171247520671802, "grad_norm": 0.5747507810592651, "learning_rate": 8.820787665031089e-05, "loss": 0.4182239055633545, "step": 28350 }, { "epoch": 0.12175540729673802, "grad_norm": 0.39458662271499634, "learning_rate": 8.820356493019326e-05, "loss": 0.33075120449066164, "step": 28360 }, { "epoch": 0.12179833938675802, "grad_norm": 0.22349131107330322, "learning_rate": 8.819925321007563e-05, "loss": 0.16916000843048096, "step": 28370 }, { "epoch": 0.12184127147677803, "grad_norm": 0.16843147575855255, "learning_rate": 8.8194941489958e-05, "loss": 0.37458953857421873, "step": 28380 }, { "epoch": 0.12188420356679804, "grad_norm": 0.012207563035190105, "learning_rate": 8.819062976984038e-05, "loss": 0.024071575701236726, "step": 28390 }, { "epoch": 0.12192713565681805, "grad_norm": 0.03551925718784332, "learning_rate": 8.818631804972276e-05, "loss": 0.29789299964904786, "step": 28400 }, { "epoch": 0.12197006774683805, "grad_norm": 0.050328079611063004, "learning_rate": 8.818200632960514e-05, "loss": 0.3409437656402588, "step": 28410 }, { "epoch": 0.12201299983685805, "grad_norm": 0.009469667449593544, "learning_rate": 8.817769460948752e-05, "loss": 0.2927106857299805, "step": 28420 }, { "epoch": 0.12205593192687807, "grad_norm": 2.191164970397949, "learning_rate": 8.817338288936989e-05, "loss": 0.4062193870544434, "step": 28430 }, { "epoch": 0.12209886401689807, "grad_norm": 1.214449405670166, "learning_rate": 8.816907116925227e-05, "loss": 0.26831727027893065, "step": 28440 }, { "epoch": 0.12214179610691808, "grad_norm": 0.12962786853313446, "learning_rate": 8.816475944913463e-05, "loss": 0.26643710136413573, "step": 28450 }, { "epoch": 0.12218472819693808, "grad_norm": 2.260606288909912, "learning_rate": 8.816044772901701e-05, "loss": 0.2599010944366455, "step": 28460 }, { "epoch": 0.12222766028695808, "grad_norm": 14.785316467285156, "learning_rate": 8.815613600889939e-05, "loss": 0.3463857650756836, "step": 28470 }, { "epoch": 0.1222705923769781, "grad_norm": 0.17666365206241608, "learning_rate": 8.815182428878177e-05, "loss": 0.3569460868835449, "step": 28480 }, { "epoch": 0.1223135244669981, "grad_norm": 0.2478390485048294, "learning_rate": 8.814751256866414e-05, "loss": 0.25950796604156495, "step": 28490 }, { "epoch": 0.12235645655701811, "grad_norm": 0.018371712416410446, "learning_rate": 8.814320084854652e-05, "loss": 0.6131328582763672, "step": 28500 }, { "epoch": 0.12239938864703812, "grad_norm": 1.7847379446029663, "learning_rate": 8.81388891284289e-05, "loss": 0.3412437677383423, "step": 28510 }, { "epoch": 0.12244232073705812, "grad_norm": 3.38214111328125, "learning_rate": 8.813457740831128e-05, "loss": 0.37290809154510496, "step": 28520 }, { "epoch": 0.12248525282707813, "grad_norm": 0.06898491084575653, "learning_rate": 8.813026568819365e-05, "loss": 0.2055596351623535, "step": 28530 }, { "epoch": 0.12252818491709813, "grad_norm": 2.6301519870758057, "learning_rate": 8.812595396807603e-05, "loss": 0.31752374172210696, "step": 28540 }, { "epoch": 0.12257111700711815, "grad_norm": 2.2748639583587646, "learning_rate": 8.812164224795841e-05, "loss": 0.2857156038284302, "step": 28550 }, { "epoch": 0.12261404909713815, "grad_norm": 0.08286463469266891, "learning_rate": 8.811733052784078e-05, "loss": 0.2685399055480957, "step": 28560 }, { "epoch": 0.12265698118715815, "grad_norm": 2.439558744430542, "learning_rate": 8.811301880772316e-05, "loss": 0.3968736410140991, "step": 28570 }, { "epoch": 0.12269991327717816, "grad_norm": 0.1083855926990509, "learning_rate": 8.810870708760554e-05, "loss": 0.08559461236000061, "step": 28580 }, { "epoch": 0.12274284536719816, "grad_norm": 2.2437236309051514, "learning_rate": 8.810439536748792e-05, "loss": 0.15293599367141725, "step": 28590 }, { "epoch": 0.12278577745721818, "grad_norm": 0.05528045818209648, "learning_rate": 8.81000836473703e-05, "loss": 0.28284125328063964, "step": 28600 }, { "epoch": 0.12282870954723818, "grad_norm": 0.0032772794365882874, "learning_rate": 8.809577192725266e-05, "loss": 0.10475077629089355, "step": 28610 }, { "epoch": 0.12287164163725818, "grad_norm": 0.0006892705569043756, "learning_rate": 8.809146020713504e-05, "loss": 0.17690064907073974, "step": 28620 }, { "epoch": 0.1229145737272782, "grad_norm": 0.005972778424620628, "learning_rate": 8.808714848701741e-05, "loss": 0.16229041814804077, "step": 28630 }, { "epoch": 0.1229575058172982, "grad_norm": 0.0009143303614109755, "learning_rate": 8.808283676689979e-05, "loss": 0.2857463598251343, "step": 28640 }, { "epoch": 0.12300043790731821, "grad_norm": 0.006627853959798813, "learning_rate": 8.807852504678217e-05, "loss": 0.27227063179016114, "step": 28650 }, { "epoch": 0.12304336999733821, "grad_norm": 0.07442281395196915, "learning_rate": 8.807421332666454e-05, "loss": 0.1588853716850281, "step": 28660 }, { "epoch": 0.12308630208735821, "grad_norm": 0.05322013795375824, "learning_rate": 8.806990160654692e-05, "loss": 0.10874921083450317, "step": 28670 }, { "epoch": 0.12312923417737823, "grad_norm": 1.68862783908844, "learning_rate": 8.80655898864293e-05, "loss": 0.33600821495056155, "step": 28680 }, { "epoch": 0.12317216626739823, "grad_norm": 0.10802485793828964, "learning_rate": 8.806127816631166e-05, "loss": 0.15215885639190674, "step": 28690 }, { "epoch": 0.12321509835741823, "grad_norm": 3.715789318084717, "learning_rate": 8.805696644619404e-05, "loss": 0.37726900577545164, "step": 28700 }, { "epoch": 0.12325803044743824, "grad_norm": 0.18458077311515808, "learning_rate": 8.805265472607642e-05, "loss": 0.07564336061477661, "step": 28710 }, { "epoch": 0.12330096253745824, "grad_norm": 0.2841893136501312, "learning_rate": 8.80483430059588e-05, "loss": 0.16145308017730714, "step": 28720 }, { "epoch": 0.12334389462747826, "grad_norm": 0.1921205371618271, "learning_rate": 8.804403128584117e-05, "loss": 0.3189536571502686, "step": 28730 }, { "epoch": 0.12338682671749826, "grad_norm": 12.66451358795166, "learning_rate": 8.803971956572355e-05, "loss": 0.2783447504043579, "step": 28740 }, { "epoch": 0.12342975880751826, "grad_norm": 0.4492635130882263, "learning_rate": 8.803540784560593e-05, "loss": 0.10007522106170655, "step": 28750 }, { "epoch": 0.12347269089753828, "grad_norm": 0.025158774107694626, "learning_rate": 8.80310961254883e-05, "loss": 0.3440743923187256, "step": 28760 }, { "epoch": 0.12351562298755828, "grad_norm": 0.020711267367005348, "learning_rate": 8.802678440537068e-05, "loss": 0.3100674867630005, "step": 28770 }, { "epoch": 0.12355855507757829, "grad_norm": 0.0499393492937088, "learning_rate": 8.802247268525306e-05, "loss": 0.16550155878067016, "step": 28780 }, { "epoch": 0.12360148716759829, "grad_norm": 2.1590919494628906, "learning_rate": 8.801816096513544e-05, "loss": 0.27875704765319825, "step": 28790 }, { "epoch": 0.1236444192576183, "grad_norm": 0.07582681626081467, "learning_rate": 8.801384924501781e-05, "loss": 0.1742209553718567, "step": 28800 }, { "epoch": 0.12368735134763831, "grad_norm": 3.8447768688201904, "learning_rate": 8.800953752490019e-05, "loss": 0.254422926902771, "step": 28810 }, { "epoch": 0.12373028343765831, "grad_norm": 2.4385411739349365, "learning_rate": 8.800522580478257e-05, "loss": 0.1299283981323242, "step": 28820 }, { "epoch": 0.12377321552767832, "grad_norm": 2.0784387588500977, "learning_rate": 8.800091408466495e-05, "loss": 0.3685250520706177, "step": 28830 }, { "epoch": 0.12381614761769832, "grad_norm": 0.04812368005514145, "learning_rate": 8.799660236454732e-05, "loss": 0.18473578691482545, "step": 28840 }, { "epoch": 0.12385907970771833, "grad_norm": 12.649018287658691, "learning_rate": 8.79922906444297e-05, "loss": 0.04986898601055145, "step": 28850 }, { "epoch": 0.12390201179773834, "grad_norm": 0.06861792504787445, "learning_rate": 8.798797892431206e-05, "loss": 0.16036927700042725, "step": 28860 }, { "epoch": 0.12394494388775834, "grad_norm": 8.813103675842285, "learning_rate": 8.798366720419444e-05, "loss": 0.4332833766937256, "step": 28870 }, { "epoch": 0.12398787597777836, "grad_norm": 0.10298661142587662, "learning_rate": 8.797935548407682e-05, "loss": 0.2906115293502808, "step": 28880 }, { "epoch": 0.12403080806779836, "grad_norm": 0.008993543684482574, "learning_rate": 8.79750437639592e-05, "loss": 0.1359240174293518, "step": 28890 }, { "epoch": 0.12407374015781836, "grad_norm": 3.4008004665374756, "learning_rate": 8.797073204384157e-05, "loss": 0.29442768096923827, "step": 28900 }, { "epoch": 0.12411667224783837, "grad_norm": 0.010733728297054768, "learning_rate": 8.796642032372395e-05, "loss": 0.418992280960083, "step": 28910 }, { "epoch": 0.12415960433785837, "grad_norm": 0.8852447867393494, "learning_rate": 8.796210860360633e-05, "loss": 0.2331214427947998, "step": 28920 }, { "epoch": 0.12420253642787839, "grad_norm": 2.394709587097168, "learning_rate": 8.79577968834887e-05, "loss": 0.25450208187103274, "step": 28930 }, { "epoch": 0.12424546851789839, "grad_norm": 0.205605149269104, "learning_rate": 8.795348516337107e-05, "loss": 0.16247061491012574, "step": 28940 }, { "epoch": 0.12428840060791839, "grad_norm": 1.9020476341247559, "learning_rate": 8.794917344325345e-05, "loss": 0.6566081047058105, "step": 28950 }, { "epoch": 0.1243313326979384, "grad_norm": 0.033048976212739944, "learning_rate": 8.794486172313582e-05, "loss": 0.32285366058349607, "step": 28960 }, { "epoch": 0.1243742647879584, "grad_norm": 0.02315451204776764, "learning_rate": 8.79405500030182e-05, "loss": 0.12404880523681641, "step": 28970 }, { "epoch": 0.12441719687797842, "grad_norm": 0.011411737650632858, "learning_rate": 8.793623828290058e-05, "loss": 0.018471239507198332, "step": 28980 }, { "epoch": 0.12446012896799842, "grad_norm": 0.11584151536226273, "learning_rate": 8.793192656278296e-05, "loss": 0.3177420854568481, "step": 28990 }, { "epoch": 0.12450306105801842, "grad_norm": 0.003826763015240431, "learning_rate": 8.792761484266533e-05, "loss": 0.1817054867744446, "step": 29000 }, { "epoch": 0.12450306105801842, "eval_loss": 0.4792439639568329, "eval_runtime": 27.5002, "eval_samples_per_second": 3.636, "eval_steps_per_second": 3.636, "step": 29000 }, { "epoch": 0.12454599314803844, "grad_norm": 0.684794545173645, "learning_rate": 8.792330312254772e-05, "loss": 0.11615618467330932, "step": 29010 }, { "epoch": 0.12458892523805844, "grad_norm": 3.8045012950897217, "learning_rate": 8.791899140243009e-05, "loss": 0.29504122734069826, "step": 29020 }, { "epoch": 0.12463185732807845, "grad_norm": 0.0482652485370636, "learning_rate": 8.791467968231247e-05, "loss": 0.06096305847167969, "step": 29030 }, { "epoch": 0.12467478941809845, "grad_norm": 0.11692120879888535, "learning_rate": 8.791036796219484e-05, "loss": 0.2260446786880493, "step": 29040 }, { "epoch": 0.12471772150811845, "grad_norm": 0.003027498023584485, "learning_rate": 8.790605624207722e-05, "loss": 0.1288272976875305, "step": 29050 }, { "epoch": 0.12476065359813847, "grad_norm": 1.686038613319397, "learning_rate": 8.79017445219596e-05, "loss": 0.4686258316040039, "step": 29060 }, { "epoch": 0.12480358568815847, "grad_norm": 0.27779895067214966, "learning_rate": 8.789743280184197e-05, "loss": 0.20123896598815919, "step": 29070 }, { "epoch": 0.12484651777817848, "grad_norm": 5.1882805824279785, "learning_rate": 8.789312108172435e-05, "loss": 0.41694116592407227, "step": 29080 }, { "epoch": 0.12488944986819848, "grad_norm": 0.6722069978713989, "learning_rate": 8.788880936160673e-05, "loss": 0.29966685771942136, "step": 29090 }, { "epoch": 0.12493238195821849, "grad_norm": 1.6656306982040405, "learning_rate": 8.788449764148911e-05, "loss": 0.15465418100357056, "step": 29100 }, { "epoch": 0.1249753140482385, "grad_norm": 0.1642853319644928, "learning_rate": 8.788018592137147e-05, "loss": 0.3329266309738159, "step": 29110 }, { "epoch": 0.12501824613825852, "grad_norm": 2.975208044052124, "learning_rate": 8.787587420125385e-05, "loss": 0.1679534912109375, "step": 29120 }, { "epoch": 0.1250611782282785, "grad_norm": 0.11888681352138519, "learning_rate": 8.787156248113623e-05, "loss": 0.06416876316070556, "step": 29130 }, { "epoch": 0.12510411031829852, "grad_norm": 1.4387938976287842, "learning_rate": 8.78672507610186e-05, "loss": 0.26547038555145264, "step": 29140 }, { "epoch": 0.12514704240831853, "grad_norm": 0.0023374587763100863, "learning_rate": 8.786293904090098e-05, "loss": 0.10321999788284301, "step": 29150 }, { "epoch": 0.12518997449833852, "grad_norm": 6.412931442260742, "learning_rate": 8.785862732078336e-05, "loss": 0.49428510665893555, "step": 29160 }, { "epoch": 0.12523290658835853, "grad_norm": 0.05729628726840019, "learning_rate": 8.785431560066573e-05, "loss": 0.2725868225097656, "step": 29170 }, { "epoch": 0.12527583867837855, "grad_norm": 0.06620445102453232, "learning_rate": 8.785000388054811e-05, "loss": 0.27253124713897703, "step": 29180 }, { "epoch": 0.12531877076839854, "grad_norm": 0.11173108965158463, "learning_rate": 8.784569216043048e-05, "loss": 0.2758491992950439, "step": 29190 }, { "epoch": 0.12536170285841855, "grad_norm": 1.4134966135025024, "learning_rate": 8.784138044031285e-05, "loss": 0.2473074197769165, "step": 29200 }, { "epoch": 0.12540463494843856, "grad_norm": 3.598130941390991, "learning_rate": 8.783706872019523e-05, "loss": 0.19799611568450928, "step": 29210 }, { "epoch": 0.12544756703845858, "grad_norm": 0.28464409708976746, "learning_rate": 8.783275700007761e-05, "loss": 0.23102359771728515, "step": 29220 }, { "epoch": 0.12549049912847857, "grad_norm": 0.09032626450061798, "learning_rate": 8.782844527996e-05, "loss": 0.23052756786346434, "step": 29230 }, { "epoch": 0.12553343121849858, "grad_norm": 0.725068211555481, "learning_rate": 8.782413355984238e-05, "loss": 0.3960889339447021, "step": 29240 }, { "epoch": 0.1255763633085186, "grad_norm": 0.06919623166322708, "learning_rate": 8.781982183972475e-05, "loss": 0.12070854902267455, "step": 29250 }, { "epoch": 0.12561929539853858, "grad_norm": 2.4769787788391113, "learning_rate": 8.781551011960713e-05, "loss": 0.21523828506469728, "step": 29260 }, { "epoch": 0.1256622274885586, "grad_norm": 0.012916326522827148, "learning_rate": 8.78111983994895e-05, "loss": 0.08447671532630921, "step": 29270 }, { "epoch": 0.1257051595785786, "grad_norm": 0.08832432329654694, "learning_rate": 8.780688667937187e-05, "loss": 0.17949587106704712, "step": 29280 }, { "epoch": 0.1257480916685986, "grad_norm": 1.1982070207595825, "learning_rate": 8.780257495925425e-05, "loss": 0.3396637439727783, "step": 29290 }, { "epoch": 0.12579102375861861, "grad_norm": 0.0024188838433474302, "learning_rate": 8.779826323913663e-05, "loss": 0.15142393112182617, "step": 29300 }, { "epoch": 0.12583395584863863, "grad_norm": 1.0369142293930054, "learning_rate": 8.7793951519019e-05, "loss": 0.3342637300491333, "step": 29310 }, { "epoch": 0.12587688793865862, "grad_norm": 0.015218590386211872, "learning_rate": 8.778963979890138e-05, "loss": 0.04915739297866821, "step": 29320 }, { "epoch": 0.12591982002867863, "grad_norm": 5.906143665313721, "learning_rate": 8.778532807878376e-05, "loss": 0.18448089361190795, "step": 29330 }, { "epoch": 0.12596275211869865, "grad_norm": 5.609766483306885, "learning_rate": 8.778101635866614e-05, "loss": 0.16584160327911376, "step": 29340 }, { "epoch": 0.12600568420871866, "grad_norm": 1.786668300628662, "learning_rate": 8.77767046385485e-05, "loss": 0.2621778964996338, "step": 29350 }, { "epoch": 0.12604861629873865, "grad_norm": 0.0621095634996891, "learning_rate": 8.777239291843088e-05, "loss": 0.35020430088043214, "step": 29360 }, { "epoch": 0.12609154838875866, "grad_norm": 0.5814092755317688, "learning_rate": 8.776808119831325e-05, "loss": 0.20505285263061523, "step": 29370 }, { "epoch": 0.12613448047877868, "grad_norm": 1.501968502998352, "learning_rate": 8.776376947819563e-05, "loss": 0.28874716758728025, "step": 29380 }, { "epoch": 0.12617741256879866, "grad_norm": 2.841974973678589, "learning_rate": 8.775945775807801e-05, "loss": 0.30657010078430175, "step": 29390 }, { "epoch": 0.12622034465881868, "grad_norm": 0.15167303383350372, "learning_rate": 8.775514603796039e-05, "loss": 0.20814452171325684, "step": 29400 }, { "epoch": 0.1262632767488387, "grad_norm": 0.4209035634994507, "learning_rate": 8.775083431784276e-05, "loss": 0.1624962329864502, "step": 29410 }, { "epoch": 0.12630620883885868, "grad_norm": 0.1125643402338028, "learning_rate": 8.774652259772514e-05, "loss": 0.20734481811523436, "step": 29420 }, { "epoch": 0.1263491409288787, "grad_norm": 0.6038672924041748, "learning_rate": 8.774221087760752e-05, "loss": 0.13862569332122804, "step": 29430 }, { "epoch": 0.1263920730188987, "grad_norm": 1.3439053297042847, "learning_rate": 8.773789915748988e-05, "loss": 0.18771252632141114, "step": 29440 }, { "epoch": 0.12643500510891872, "grad_norm": 0.06666406989097595, "learning_rate": 8.773358743737227e-05, "loss": 0.20646681785583496, "step": 29450 }, { "epoch": 0.1264779371989387, "grad_norm": 0.06401768326759338, "learning_rate": 8.772927571725465e-05, "loss": 0.2532331466674805, "step": 29460 }, { "epoch": 0.12652086928895873, "grad_norm": 0.024090800434350967, "learning_rate": 8.772496399713703e-05, "loss": 0.17938752174377443, "step": 29470 }, { "epoch": 0.12656380137897874, "grad_norm": 0.011575686745345592, "learning_rate": 8.77206522770194e-05, "loss": 0.21051297187805176, "step": 29480 }, { "epoch": 0.12660673346899873, "grad_norm": 0.15312832593917847, "learning_rate": 8.771634055690178e-05, "loss": 0.20514678955078125, "step": 29490 }, { "epoch": 0.12664966555901874, "grad_norm": 27.025413513183594, "learning_rate": 8.771202883678416e-05, "loss": 0.21771588325500488, "step": 29500 }, { "epoch": 0.12669259764903876, "grad_norm": 0.03582566976547241, "learning_rate": 8.770771711666654e-05, "loss": 0.43050341606140136, "step": 29510 }, { "epoch": 0.12673552973905874, "grad_norm": 0.01910443976521492, "learning_rate": 8.77034053965489e-05, "loss": 0.3661238193511963, "step": 29520 }, { "epoch": 0.12677846182907876, "grad_norm": 0.4073588252067566, "learning_rate": 8.769909367643128e-05, "loss": 0.24593477249145507, "step": 29530 }, { "epoch": 0.12682139391909877, "grad_norm": 0.019970379769802094, "learning_rate": 8.769478195631366e-05, "loss": 0.300111198425293, "step": 29540 }, { "epoch": 0.1268643260091188, "grad_norm": 0.026408078148961067, "learning_rate": 8.769047023619603e-05, "loss": 0.17894766330718995, "step": 29550 }, { "epoch": 0.12690725809913878, "grad_norm": 0.013225136324763298, "learning_rate": 8.768615851607841e-05, "loss": 0.23364582061767578, "step": 29560 }, { "epoch": 0.1269501901891588, "grad_norm": 0.043238185346126556, "learning_rate": 8.768184679596079e-05, "loss": 0.35565433502197263, "step": 29570 }, { "epoch": 0.1269931222791788, "grad_norm": 0.3078600764274597, "learning_rate": 8.767753507584317e-05, "loss": 0.2342392921447754, "step": 29580 }, { "epoch": 0.1270360543691988, "grad_norm": 7.824125289916992, "learning_rate": 8.767322335572554e-05, "loss": 0.21343417167663575, "step": 29590 }, { "epoch": 0.1270789864592188, "grad_norm": 0.45437708497047424, "learning_rate": 8.76689116356079e-05, "loss": 0.0968501091003418, "step": 29600 }, { "epoch": 0.12712191854923882, "grad_norm": 2.1558401584625244, "learning_rate": 8.766459991549028e-05, "loss": 0.25272440910339355, "step": 29610 }, { "epoch": 0.1271648506392588, "grad_norm": 0.08883268386125565, "learning_rate": 8.766028819537266e-05, "loss": 0.21234755516052245, "step": 29620 }, { "epoch": 0.12720778272927882, "grad_norm": 0.0019184901611879468, "learning_rate": 8.765597647525504e-05, "loss": 0.10313694477081299, "step": 29630 }, { "epoch": 0.12725071481929884, "grad_norm": 0.31680187582969666, "learning_rate": 8.765166475513742e-05, "loss": 0.20983920097351075, "step": 29640 }, { "epoch": 0.12729364690931885, "grad_norm": 1.115886926651001, "learning_rate": 8.764735303501979e-05, "loss": 0.061583316326141356, "step": 29650 }, { "epoch": 0.12733657899933884, "grad_norm": 0.011549929156899452, "learning_rate": 8.764304131490217e-05, "loss": 0.009761539101600648, "step": 29660 }, { "epoch": 0.12737951108935885, "grad_norm": 0.7803761959075928, "learning_rate": 8.763872959478455e-05, "loss": 0.26551635265350343, "step": 29670 }, { "epoch": 0.12742244317937887, "grad_norm": 0.0001464521192247048, "learning_rate": 8.763441787466693e-05, "loss": 0.24804928302764892, "step": 29680 }, { "epoch": 0.12746537526939886, "grad_norm": 0.04745858907699585, "learning_rate": 8.76301061545493e-05, "loss": 0.07876437306404113, "step": 29690 }, { "epoch": 0.12750830735941887, "grad_norm": 3.122537136077881, "learning_rate": 8.762579443443168e-05, "loss": 0.5715325832366943, "step": 29700 }, { "epoch": 0.12755123944943889, "grad_norm": 0.0008646674104966223, "learning_rate": 8.762148271431406e-05, "loss": 0.4441582679748535, "step": 29710 }, { "epoch": 0.12759417153945887, "grad_norm": 0.30260586738586426, "learning_rate": 8.761717099419643e-05, "loss": 0.495958948135376, "step": 29720 }, { "epoch": 0.1276371036294789, "grad_norm": 1.3628997802734375, "learning_rate": 8.761285927407881e-05, "loss": 0.1913755178451538, "step": 29730 }, { "epoch": 0.1276800357194989, "grad_norm": 0.05150032043457031, "learning_rate": 8.760854755396119e-05, "loss": 0.04136924743652344, "step": 29740 }, { "epoch": 0.1277229678095189, "grad_norm": 0.016917334869503975, "learning_rate": 8.760423583384357e-05, "loss": 0.3162794828414917, "step": 29750 }, { "epoch": 0.1277658998995389, "grad_norm": 1.61897611618042, "learning_rate": 8.759992411372593e-05, "loss": 0.2959132194519043, "step": 29760 }, { "epoch": 0.12780883198955892, "grad_norm": 0.04096636176109314, "learning_rate": 8.759561239360831e-05, "loss": 0.2208381175994873, "step": 29770 }, { "epoch": 0.12785176407957893, "grad_norm": 1.580322265625, "learning_rate": 8.759130067349068e-05, "loss": 0.2737943172454834, "step": 29780 }, { "epoch": 0.12789469616959892, "grad_norm": 0.030307283625006676, "learning_rate": 8.758698895337306e-05, "loss": 0.16907756328582763, "step": 29790 }, { "epoch": 0.12793762825961894, "grad_norm": 0.04987514391541481, "learning_rate": 8.758267723325544e-05, "loss": 0.07499375939369202, "step": 29800 }, { "epoch": 0.12798056034963895, "grad_norm": 0.363908976316452, "learning_rate": 8.757836551313782e-05, "loss": 0.30732824802398684, "step": 29810 }, { "epoch": 0.12802349243965894, "grad_norm": 0.3449194133281708, "learning_rate": 8.75740537930202e-05, "loss": 0.4999234676361084, "step": 29820 }, { "epoch": 0.12806642452967895, "grad_norm": 0.8750787377357483, "learning_rate": 8.756974207290257e-05, "loss": 0.30790679454803466, "step": 29830 }, { "epoch": 0.12810935661969897, "grad_norm": 1.4069339036941528, "learning_rate": 8.756543035278495e-05, "loss": 0.21252622604370117, "step": 29840 }, { "epoch": 0.12815228870971895, "grad_norm": 0.7201815247535706, "learning_rate": 8.756111863266731e-05, "loss": 0.2888744831085205, "step": 29850 }, { "epoch": 0.12819522079973897, "grad_norm": 3.888648509979248, "learning_rate": 8.755680691254969e-05, "loss": 0.08338718414306641, "step": 29860 }, { "epoch": 0.12823815288975898, "grad_norm": 0.02963382750749588, "learning_rate": 8.755249519243207e-05, "loss": 0.032802003622055056, "step": 29870 }, { "epoch": 0.128281084979779, "grad_norm": 0.7857760787010193, "learning_rate": 8.754818347231444e-05, "loss": 0.16297609806060792, "step": 29880 }, { "epoch": 0.12832401706979898, "grad_norm": 2.595731019973755, "learning_rate": 8.754387175219682e-05, "loss": 0.19706374406814575, "step": 29890 }, { "epoch": 0.128366949159819, "grad_norm": 1.7243250608444214, "learning_rate": 8.75395600320792e-05, "loss": 0.3066516637802124, "step": 29900 }, { "epoch": 0.12840988124983901, "grad_norm": 0.018426483497023582, "learning_rate": 8.753524831196158e-05, "loss": 0.46737966537475584, "step": 29910 }, { "epoch": 0.128452813339859, "grad_norm": 2.102694272994995, "learning_rate": 8.753093659184395e-05, "loss": 0.23304004669189454, "step": 29920 }, { "epoch": 0.12849574542987902, "grad_norm": 0.03168783336877823, "learning_rate": 8.752662487172633e-05, "loss": 0.32050702571868894, "step": 29930 }, { "epoch": 0.12853867751989903, "grad_norm": 3.586268663406372, "learning_rate": 8.752231315160871e-05, "loss": 0.4045413494110107, "step": 29940 }, { "epoch": 0.12858160960991902, "grad_norm": 0.01576436124742031, "learning_rate": 8.751800143149109e-05, "loss": 0.19724249839782715, "step": 29950 }, { "epoch": 0.12862454169993903, "grad_norm": 0.03131139278411865, "learning_rate": 8.751368971137346e-05, "loss": 0.1462864637374878, "step": 29960 }, { "epoch": 0.12866747378995905, "grad_norm": 1.590073585510254, "learning_rate": 8.750937799125584e-05, "loss": 0.643623161315918, "step": 29970 }, { "epoch": 0.12871040587997906, "grad_norm": 0.17035600543022156, "learning_rate": 8.750506627113822e-05, "loss": 0.22655746936798096, "step": 29980 }, { "epoch": 0.12875333796999905, "grad_norm": 0.10757172107696533, "learning_rate": 8.75007545510206e-05, "loss": 0.4978126049041748, "step": 29990 }, { "epoch": 0.12879627006001906, "grad_norm": 0.06894957274198532, "learning_rate": 8.749644283090297e-05, "loss": 0.10065504312515258, "step": 30000 }, { "epoch": 0.12879627006001906, "eval_loss": 0.4761184751987457, "eval_runtime": 27.4021, "eval_samples_per_second": 3.649, "eval_steps_per_second": 3.649, "step": 30000 }, { "epoch": 0.12883920215003908, "grad_norm": 1.8203767538070679, "learning_rate": 8.749213111078534e-05, "loss": 0.20030674934387208, "step": 30010 }, { "epoch": 0.12888213424005907, "grad_norm": 2.7183761596679688, "learning_rate": 8.748781939066771e-05, "loss": 0.263798451423645, "step": 30020 }, { "epoch": 0.12892506633007908, "grad_norm": 0.060718487948179245, "learning_rate": 8.748350767055009e-05, "loss": 0.2561044692993164, "step": 30030 }, { "epoch": 0.1289679984200991, "grad_norm": 4.440418720245361, "learning_rate": 8.747919595043247e-05, "loss": 0.14885461330413818, "step": 30040 }, { "epoch": 0.12901093051011908, "grad_norm": 0.02631818689405918, "learning_rate": 8.747488423031485e-05, "loss": 0.05507946014404297, "step": 30050 }, { "epoch": 0.1290538626001391, "grad_norm": 1.9612177610397339, "learning_rate": 8.747057251019722e-05, "loss": 0.3454012632369995, "step": 30060 }, { "epoch": 0.1290967946901591, "grad_norm": 9.800521850585938, "learning_rate": 8.74662607900796e-05, "loss": 0.38835835456848145, "step": 30070 }, { "epoch": 0.12913972678017913, "grad_norm": 0.984861433506012, "learning_rate": 8.746194906996198e-05, "loss": 0.19478857517242432, "step": 30080 }, { "epoch": 0.1291826588701991, "grad_norm": 4.223658084869385, "learning_rate": 8.745763734984434e-05, "loss": 0.11445388793945313, "step": 30090 }, { "epoch": 0.12922559096021913, "grad_norm": 0.6742361783981323, "learning_rate": 8.745332562972672e-05, "loss": 0.29052252769470216, "step": 30100 }, { "epoch": 0.12926852305023914, "grad_norm": 1.4758702516555786, "learning_rate": 8.74490139096091e-05, "loss": 0.053003185987472536, "step": 30110 }, { "epoch": 0.12931145514025913, "grad_norm": 0.11309056729078293, "learning_rate": 8.744470218949147e-05, "loss": 0.12848434448242188, "step": 30120 }, { "epoch": 0.12935438723027914, "grad_norm": 0.041068606078624725, "learning_rate": 8.744039046937385e-05, "loss": 0.31078152656555175, "step": 30130 }, { "epoch": 0.12939731932029916, "grad_norm": 1.0843498706817627, "learning_rate": 8.743607874925623e-05, "loss": 0.2896578788757324, "step": 30140 }, { "epoch": 0.12944025141031915, "grad_norm": 0.13427147269248962, "learning_rate": 8.74317670291386e-05, "loss": 0.07495735883712769, "step": 30150 }, { "epoch": 0.12948318350033916, "grad_norm": 0.03397704288363457, "learning_rate": 8.742745530902098e-05, "loss": 0.291317081451416, "step": 30160 }, { "epoch": 0.12952611559035918, "grad_norm": 0.003932945430278778, "learning_rate": 8.742314358890336e-05, "loss": 0.2433781862258911, "step": 30170 }, { "epoch": 0.12956904768037916, "grad_norm": 0.04297548905014992, "learning_rate": 8.741883186878574e-05, "loss": 0.3208784580230713, "step": 30180 }, { "epoch": 0.12961197977039918, "grad_norm": 0.07267145812511444, "learning_rate": 8.741452014866812e-05, "loss": 0.4771559715270996, "step": 30190 }, { "epoch": 0.1296549118604192, "grad_norm": 0.1928095817565918, "learning_rate": 8.741020842855049e-05, "loss": 0.054413968324661256, "step": 30200 }, { "epoch": 0.1296978439504392, "grad_norm": 0.00119906070176512, "learning_rate": 8.740589670843287e-05, "loss": 0.1544780135154724, "step": 30210 }, { "epoch": 0.1297407760404592, "grad_norm": 5.354598522186279, "learning_rate": 8.740158498831525e-05, "loss": 0.29805917739868165, "step": 30220 }, { "epoch": 0.1297837081304792, "grad_norm": 18.67365074157715, "learning_rate": 8.739727326819762e-05, "loss": 0.13847802877426146, "step": 30230 }, { "epoch": 0.12982664022049922, "grad_norm": 1.6385403871536255, "learning_rate": 8.739296154808e-05, "loss": 0.21875405311584473, "step": 30240 }, { "epoch": 0.1298695723105192, "grad_norm": 7.299107074737549, "learning_rate": 8.738864982796238e-05, "loss": 0.24007067680358887, "step": 30250 }, { "epoch": 0.12991250440053922, "grad_norm": 0.05835841968655586, "learning_rate": 8.738433810784474e-05, "loss": 0.1011542797088623, "step": 30260 }, { "epoch": 0.12995543649055924, "grad_norm": 0.01663939282298088, "learning_rate": 8.738002638772712e-05, "loss": 0.23251605033874512, "step": 30270 }, { "epoch": 0.12999836858057923, "grad_norm": 0.441267728805542, "learning_rate": 8.73757146676095e-05, "loss": 0.3203972101211548, "step": 30280 }, { "epoch": 0.13004130067059924, "grad_norm": 0.03272169828414917, "learning_rate": 8.737140294749188e-05, "loss": 0.05306289196014404, "step": 30290 }, { "epoch": 0.13008423276061926, "grad_norm": 0.06436789780855179, "learning_rate": 8.736709122737425e-05, "loss": 0.2278662919998169, "step": 30300 }, { "epoch": 0.13012716485063927, "grad_norm": 0.3606188893318176, "learning_rate": 8.736277950725663e-05, "loss": 0.4411325931549072, "step": 30310 }, { "epoch": 0.13017009694065926, "grad_norm": 0.052819643169641495, "learning_rate": 8.735846778713901e-05, "loss": 0.2932943820953369, "step": 30320 }, { "epoch": 0.13021302903067927, "grad_norm": 0.0145418681204319, "learning_rate": 8.735415606702138e-05, "loss": 0.1358722925186157, "step": 30330 }, { "epoch": 0.1302559611206993, "grad_norm": 3.2161331176757812, "learning_rate": 8.734984434690375e-05, "loss": 0.13851587772369384, "step": 30340 }, { "epoch": 0.13029889321071927, "grad_norm": 0.01784207485616207, "learning_rate": 8.734553262678613e-05, "loss": 0.2483436346054077, "step": 30350 }, { "epoch": 0.1303418253007393, "grad_norm": 0.6265615820884705, "learning_rate": 8.73412209066685e-05, "loss": 0.29143610000610354, "step": 30360 }, { "epoch": 0.1303847573907593, "grad_norm": 0.11181551963090897, "learning_rate": 8.733690918655088e-05, "loss": 0.1619391918182373, "step": 30370 }, { "epoch": 0.1304276894807793, "grad_norm": 0.06604292243719101, "learning_rate": 8.733259746643326e-05, "loss": 0.07990115880966187, "step": 30380 }, { "epoch": 0.1304706215707993, "grad_norm": 0.1553955078125, "learning_rate": 8.732828574631564e-05, "loss": 0.13872065544128417, "step": 30390 }, { "epoch": 0.13051355366081932, "grad_norm": 2.5285756587982178, "learning_rate": 8.732397402619801e-05, "loss": 0.26338150501251223, "step": 30400 }, { "epoch": 0.13055648575083934, "grad_norm": 0.31697508692741394, "learning_rate": 8.731966230608039e-05, "loss": 0.5478155612945557, "step": 30410 }, { "epoch": 0.13059941784085932, "grad_norm": 0.1644754558801651, "learning_rate": 8.731535058596277e-05, "loss": 0.11594902276992798, "step": 30420 }, { "epoch": 0.13064234993087934, "grad_norm": 0.03448955714702606, "learning_rate": 8.731103886584514e-05, "loss": 0.11595598459243775, "step": 30430 }, { "epoch": 0.13068528202089935, "grad_norm": 3.270960807800293, "learning_rate": 8.730672714572752e-05, "loss": 0.2628152847290039, "step": 30440 }, { "epoch": 0.13072821411091934, "grad_norm": 0.08907277882099152, "learning_rate": 8.73024154256099e-05, "loss": 0.2462904453277588, "step": 30450 }, { "epoch": 0.13077114620093935, "grad_norm": 0.35055065155029297, "learning_rate": 8.729810370549228e-05, "loss": 0.3140165567398071, "step": 30460 }, { "epoch": 0.13081407829095937, "grad_norm": 3.522132635116577, "learning_rate": 8.729379198537465e-05, "loss": 0.22482681274414062, "step": 30470 }, { "epoch": 0.13085701038097936, "grad_norm": 2.6127853393554688, "learning_rate": 8.728948026525703e-05, "loss": 0.20301475524902343, "step": 30480 }, { "epoch": 0.13089994247099937, "grad_norm": 0.01888544298708439, "learning_rate": 8.728516854513941e-05, "loss": 0.1393720030784607, "step": 30490 }, { "epoch": 0.13094287456101938, "grad_norm": 2.050440549850464, "learning_rate": 8.728085682502177e-05, "loss": 0.20471715927124023, "step": 30500 }, { "epoch": 0.1309858066510394, "grad_norm": 2.6003317832946777, "learning_rate": 8.727654510490415e-05, "loss": 0.26553878784179685, "step": 30510 }, { "epoch": 0.1310287387410594, "grad_norm": 0.009445443749427795, "learning_rate": 8.727223338478653e-05, "loss": 0.25958957672119143, "step": 30520 }, { "epoch": 0.1310716708310794, "grad_norm": 0.04686051234602928, "learning_rate": 8.72679216646689e-05, "loss": 0.3284756660461426, "step": 30530 }, { "epoch": 0.13111460292109942, "grad_norm": 1.3024394512176514, "learning_rate": 8.726360994455128e-05, "loss": 0.2646768569946289, "step": 30540 }, { "epoch": 0.1311575350111194, "grad_norm": 1.392385482788086, "learning_rate": 8.725929822443366e-05, "loss": 0.2610862016677856, "step": 30550 }, { "epoch": 0.13120046710113942, "grad_norm": 0.39657700061798096, "learning_rate": 8.725498650431604e-05, "loss": 0.12167308330535889, "step": 30560 }, { "epoch": 0.13124339919115943, "grad_norm": 0.020843392238020897, "learning_rate": 8.725067478419841e-05, "loss": 0.055149370431900026, "step": 30570 }, { "epoch": 0.13128633128117942, "grad_norm": 3.253476619720459, "learning_rate": 8.724636306408079e-05, "loss": 0.16969624757766724, "step": 30580 }, { "epoch": 0.13132926337119943, "grad_norm": 1.004389762878418, "learning_rate": 8.724205134396315e-05, "loss": 0.095842045545578, "step": 30590 }, { "epoch": 0.13137219546121945, "grad_norm": 0.9034222364425659, "learning_rate": 8.723773962384553e-05, "loss": 0.3282526254653931, "step": 30600 }, { "epoch": 0.13141512755123944, "grad_norm": 0.10621494054794312, "learning_rate": 8.723342790372791e-05, "loss": 0.12817639112472534, "step": 30610 }, { "epoch": 0.13145805964125945, "grad_norm": 5.3974761962890625, "learning_rate": 8.722911618361029e-05, "loss": 0.3108220100402832, "step": 30620 }, { "epoch": 0.13150099173127947, "grad_norm": 0.13631728291511536, "learning_rate": 8.722480446349266e-05, "loss": 0.31007301807403564, "step": 30630 }, { "epoch": 0.13154392382129948, "grad_norm": 0.018317611888051033, "learning_rate": 8.722049274337506e-05, "loss": 0.1647101402282715, "step": 30640 }, { "epoch": 0.13158685591131947, "grad_norm": 0.646333634853363, "learning_rate": 8.721618102325743e-05, "loss": 0.41881618499755857, "step": 30650 }, { "epoch": 0.13162978800133948, "grad_norm": 0.03925095498561859, "learning_rate": 8.721186930313981e-05, "loss": 0.22201454639434814, "step": 30660 }, { "epoch": 0.1316727200913595, "grad_norm": 1.520325779914856, "learning_rate": 8.720755758302217e-05, "loss": 0.23899271488189697, "step": 30670 }, { "epoch": 0.13171565218137948, "grad_norm": 0.3242422640323639, "learning_rate": 8.720324586290455e-05, "loss": 0.42775511741638184, "step": 30680 }, { "epoch": 0.1317585842713995, "grad_norm": 1.1560478210449219, "learning_rate": 8.719893414278693e-05, "loss": 0.4399539947509766, "step": 30690 }, { "epoch": 0.1318015163614195, "grad_norm": 0.62732994556427, "learning_rate": 8.71946224226693e-05, "loss": 0.15830096006393432, "step": 30700 }, { "epoch": 0.1318444484514395, "grad_norm": 1.1266449689865112, "learning_rate": 8.719031070255168e-05, "loss": 0.4181190013885498, "step": 30710 }, { "epoch": 0.13188738054145951, "grad_norm": 1.3097193241119385, "learning_rate": 8.718599898243406e-05, "loss": 0.15668928623199463, "step": 30720 }, { "epoch": 0.13193031263147953, "grad_norm": 0.01987522467970848, "learning_rate": 8.718168726231644e-05, "loss": 0.20157217979431152, "step": 30730 }, { "epoch": 0.13197324472149954, "grad_norm": 1.8036832809448242, "learning_rate": 8.717737554219882e-05, "loss": 0.2997883319854736, "step": 30740 }, { "epoch": 0.13201617681151953, "grad_norm": 2.8631534576416016, "learning_rate": 8.717306382208118e-05, "loss": 0.45325074195861814, "step": 30750 }, { "epoch": 0.13205910890153955, "grad_norm": 4.630545616149902, "learning_rate": 8.716875210196356e-05, "loss": 0.15610417127609252, "step": 30760 }, { "epoch": 0.13210204099155956, "grad_norm": 1.621471643447876, "learning_rate": 8.716444038184593e-05, "loss": 0.26927714347839354, "step": 30770 }, { "epoch": 0.13214497308157955, "grad_norm": 0.38395434617996216, "learning_rate": 8.716012866172831e-05, "loss": 0.2567978143692017, "step": 30780 }, { "epoch": 0.13218790517159956, "grad_norm": 0.8037998676300049, "learning_rate": 8.715581694161069e-05, "loss": 0.24332842826843262, "step": 30790 }, { "epoch": 0.13223083726161958, "grad_norm": 0.4493197202682495, "learning_rate": 8.715150522149307e-05, "loss": 0.3514964818954468, "step": 30800 }, { "epoch": 0.13227376935163956, "grad_norm": 0.16325241327285767, "learning_rate": 8.714719350137544e-05, "loss": 0.40372166633605955, "step": 30810 }, { "epoch": 0.13231670144165958, "grad_norm": 0.019779078662395477, "learning_rate": 8.714288178125782e-05, "loss": 0.31420106887817384, "step": 30820 }, { "epoch": 0.1323596335316796, "grad_norm": 0.44937989115715027, "learning_rate": 8.713857006114018e-05, "loss": 0.22593259811401367, "step": 30830 }, { "epoch": 0.1324025656216996, "grad_norm": 6.273345947265625, "learning_rate": 8.713425834102256e-05, "loss": 0.30472588539123535, "step": 30840 }, { "epoch": 0.1324454977117196, "grad_norm": 1.8497966527938843, "learning_rate": 8.712994662090494e-05, "loss": 0.3565536022186279, "step": 30850 }, { "epoch": 0.1324884298017396, "grad_norm": 0.015776457265019417, "learning_rate": 8.712563490078733e-05, "loss": 0.22413876056671142, "step": 30860 }, { "epoch": 0.13253136189175962, "grad_norm": 0.12957410514354706, "learning_rate": 8.712132318066971e-05, "loss": 0.360329270362854, "step": 30870 }, { "epoch": 0.1325742939817796, "grad_norm": 0.01238189171999693, "learning_rate": 8.711701146055208e-05, "loss": 0.2678189754486084, "step": 30880 }, { "epoch": 0.13261722607179963, "grad_norm": 1.247525930404663, "learning_rate": 8.711269974043446e-05, "loss": 0.23769049644470214, "step": 30890 }, { "epoch": 0.13266015816181964, "grad_norm": 1.3736680746078491, "learning_rate": 8.710838802031684e-05, "loss": 0.2896465539932251, "step": 30900 }, { "epoch": 0.13270309025183963, "grad_norm": 0.0654890313744545, "learning_rate": 8.710407630019922e-05, "loss": 0.11143224239349366, "step": 30910 }, { "epoch": 0.13274602234185964, "grad_norm": 0.011445770040154457, "learning_rate": 8.709976458008158e-05, "loss": 0.09457647800445557, "step": 30920 }, { "epoch": 0.13278895443187966, "grad_norm": 1.6107486486434937, "learning_rate": 8.709545285996396e-05, "loss": 0.4207982063293457, "step": 30930 }, { "epoch": 0.13283188652189967, "grad_norm": 1.2543550729751587, "learning_rate": 8.709114113984633e-05, "loss": 0.16696133613586425, "step": 30940 }, { "epoch": 0.13287481861191966, "grad_norm": 0.011349753476679325, "learning_rate": 8.708682941972871e-05, "loss": 0.1631263017654419, "step": 30950 }, { "epoch": 0.13291775070193967, "grad_norm": 0.016437632963061333, "learning_rate": 8.708251769961109e-05, "loss": 0.14615211486816407, "step": 30960 }, { "epoch": 0.1329606827919597, "grad_norm": 0.021030467003583908, "learning_rate": 8.707820597949347e-05, "loss": 0.16817245483398438, "step": 30970 }, { "epoch": 0.13300361488197968, "grad_norm": 0.1420525759458542, "learning_rate": 8.707389425937584e-05, "loss": 0.3803170919418335, "step": 30980 }, { "epoch": 0.1330465469719997, "grad_norm": 7.2748894691467285, "learning_rate": 8.706958253925822e-05, "loss": 0.27606379985809326, "step": 30990 }, { "epoch": 0.1330894790620197, "grad_norm": 3.6429316997528076, "learning_rate": 8.706527081914059e-05, "loss": 0.22091338634490967, "step": 31000 }, { "epoch": 0.1330894790620197, "eval_loss": 0.5054401159286499, "eval_runtime": 27.4631, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 31000 }, { "epoch": 0.1331324111520397, "grad_norm": 2.900641918182373, "learning_rate": 8.706095909902296e-05, "loss": 0.33292906284332274, "step": 31010 }, { "epoch": 0.1331753432420597, "grad_norm": 6.009944915771484, "learning_rate": 8.705664737890534e-05, "loss": 0.4029203414916992, "step": 31020 }, { "epoch": 0.13321827533207972, "grad_norm": 2.333559274673462, "learning_rate": 8.705233565878772e-05, "loss": 0.2746156930923462, "step": 31030 }, { "epoch": 0.1332612074220997, "grad_norm": 1.5716809034347534, "learning_rate": 8.70480239386701e-05, "loss": 0.3722160816192627, "step": 31040 }, { "epoch": 0.13330413951211972, "grad_norm": 3.9740467071533203, "learning_rate": 8.704371221855247e-05, "loss": 0.3705629348754883, "step": 31050 }, { "epoch": 0.13334707160213974, "grad_norm": 0.7232141494750977, "learning_rate": 8.703940049843485e-05, "loss": 0.35708882808685305, "step": 31060 }, { "epoch": 0.13339000369215975, "grad_norm": 0.0947902724146843, "learning_rate": 8.703508877831723e-05, "loss": 0.36917526721954347, "step": 31070 }, { "epoch": 0.13343293578217974, "grad_norm": 0.2062128782272339, "learning_rate": 8.70307770581996e-05, "loss": 0.219962477684021, "step": 31080 }, { "epoch": 0.13347586787219975, "grad_norm": 2.3485822677612305, "learning_rate": 8.702646533808198e-05, "loss": 0.5045081615447998, "step": 31090 }, { "epoch": 0.13351879996221977, "grad_norm": 2.278395891189575, "learning_rate": 8.702215361796436e-05, "loss": 0.333439302444458, "step": 31100 }, { "epoch": 0.13356173205223976, "grad_norm": 0.18220630288124084, "learning_rate": 8.701784189784674e-05, "loss": 0.12184329032897949, "step": 31110 }, { "epoch": 0.13360466414225977, "grad_norm": 19.915332794189453, "learning_rate": 8.701353017772911e-05, "loss": 0.2725220680236816, "step": 31120 }, { "epoch": 0.1336475962322798, "grad_norm": 4.639437198638916, "learning_rate": 8.700921845761149e-05, "loss": 0.44598069190979006, "step": 31130 }, { "epoch": 0.13369052832229977, "grad_norm": 0.005592535249888897, "learning_rate": 8.700490673749387e-05, "loss": 0.31151866912841797, "step": 31140 }, { "epoch": 0.1337334604123198, "grad_norm": 0.07216699421405792, "learning_rate": 8.700059501737625e-05, "loss": 0.3183210134506226, "step": 31150 }, { "epoch": 0.1337763925023398, "grad_norm": 0.9691863656044006, "learning_rate": 8.699628329725861e-05, "loss": 0.6177532196044921, "step": 31160 }, { "epoch": 0.13381932459235982, "grad_norm": 0.2667335271835327, "learning_rate": 8.699197157714099e-05, "loss": 0.34618918895721434, "step": 31170 }, { "epoch": 0.1338622566823798, "grad_norm": 2.0624542236328125, "learning_rate": 8.698765985702336e-05, "loss": 0.21124274730682374, "step": 31180 }, { "epoch": 0.13390518877239982, "grad_norm": 2.3349645137786865, "learning_rate": 8.698334813690574e-05, "loss": 0.3152353286743164, "step": 31190 }, { "epoch": 0.13394812086241983, "grad_norm": 0.0925971269607544, "learning_rate": 8.697903641678812e-05, "loss": 0.2945190668106079, "step": 31200 }, { "epoch": 0.13399105295243982, "grad_norm": 0.03744291141629219, "learning_rate": 8.69747246966705e-05, "loss": 0.2564548015594482, "step": 31210 }, { "epoch": 0.13403398504245984, "grad_norm": 0.07750023901462555, "learning_rate": 8.697041297655287e-05, "loss": 0.24486761093139647, "step": 31220 }, { "epoch": 0.13407691713247985, "grad_norm": 0.05882472172379494, "learning_rate": 8.696610125643525e-05, "loss": 0.2871314525604248, "step": 31230 }, { "epoch": 0.13411984922249984, "grad_norm": 0.05573529750108719, "learning_rate": 8.696178953631763e-05, "loss": 0.2048487186431885, "step": 31240 }, { "epoch": 0.13416278131251985, "grad_norm": 6.236080646514893, "learning_rate": 8.695747781619999e-05, "loss": 0.19539493322372437, "step": 31250 }, { "epoch": 0.13420571340253987, "grad_norm": 0.01104716956615448, "learning_rate": 8.695316609608237e-05, "loss": 0.2785011053085327, "step": 31260 }, { "epoch": 0.13424864549255988, "grad_norm": 2.238067626953125, "learning_rate": 8.694885437596475e-05, "loss": 0.5471057891845703, "step": 31270 }, { "epoch": 0.13429157758257987, "grad_norm": 2.1154680252075195, "learning_rate": 8.694454265584712e-05, "loss": 0.33014640808105467, "step": 31280 }, { "epoch": 0.13433450967259988, "grad_norm": 7.169861316680908, "learning_rate": 8.69402309357295e-05, "loss": 0.23499386310577391, "step": 31290 }, { "epoch": 0.1343774417626199, "grad_norm": 0.7031405568122864, "learning_rate": 8.693591921561188e-05, "loss": 0.1952307939529419, "step": 31300 }, { "epoch": 0.13442037385263989, "grad_norm": 0.07406271994113922, "learning_rate": 8.693160749549426e-05, "loss": 0.3307734489440918, "step": 31310 }, { "epoch": 0.1344633059426599, "grad_norm": 0.04302635416388512, "learning_rate": 8.692729577537663e-05, "loss": 0.2610581398010254, "step": 31320 }, { "epoch": 0.13450623803267991, "grad_norm": 0.966715395450592, "learning_rate": 8.692298405525901e-05, "loss": 0.2458191156387329, "step": 31330 }, { "epoch": 0.1345491701226999, "grad_norm": 0.2640382945537567, "learning_rate": 8.691867233514139e-05, "loss": 0.13566402196884156, "step": 31340 }, { "epoch": 0.13459210221271992, "grad_norm": 0.02546941116452217, "learning_rate": 8.691436061502377e-05, "loss": 0.1587485432624817, "step": 31350 }, { "epoch": 0.13463503430273993, "grad_norm": 0.028399016708135605, "learning_rate": 8.691004889490614e-05, "loss": 0.16007002592086791, "step": 31360 }, { "epoch": 0.13467796639275995, "grad_norm": 0.26582005620002747, "learning_rate": 8.690573717478852e-05, "loss": 0.09273542761802674, "step": 31370 }, { "epoch": 0.13472089848277993, "grad_norm": 0.08304465562105179, "learning_rate": 8.69014254546709e-05, "loss": 0.12200015783309937, "step": 31380 }, { "epoch": 0.13476383057279995, "grad_norm": 0.12915168702602386, "learning_rate": 8.689711373455327e-05, "loss": 0.08989648818969727, "step": 31390 }, { "epoch": 0.13480676266281996, "grad_norm": 0.0017127407481893897, "learning_rate": 8.689280201443565e-05, "loss": 0.16513725519180297, "step": 31400 }, { "epoch": 0.13484969475283995, "grad_norm": 0.09435081481933594, "learning_rate": 8.688849029431802e-05, "loss": 0.3568329095840454, "step": 31410 }, { "epoch": 0.13489262684285996, "grad_norm": 3.7495338916778564, "learning_rate": 8.688417857420039e-05, "loss": 0.2168562412261963, "step": 31420 }, { "epoch": 0.13493555893287998, "grad_norm": 0.16012614965438843, "learning_rate": 8.687986685408277e-05, "loss": 0.3556508541107178, "step": 31430 }, { "epoch": 0.13497849102289997, "grad_norm": 0.6222158670425415, "learning_rate": 8.687555513396515e-05, "loss": 0.3658663988113403, "step": 31440 }, { "epoch": 0.13502142311291998, "grad_norm": 0.12118512392044067, "learning_rate": 8.687124341384753e-05, "loss": 0.25071876049041747, "step": 31450 }, { "epoch": 0.13506435520294, "grad_norm": 8.053659439086914, "learning_rate": 8.68669316937299e-05, "loss": 0.35620319843292236, "step": 31460 }, { "epoch": 0.13510728729295998, "grad_norm": 0.005498465616255999, "learning_rate": 8.686261997361228e-05, "loss": 0.13012741804122924, "step": 31470 }, { "epoch": 0.13515021938298, "grad_norm": 1.4227027893066406, "learning_rate": 8.685830825349466e-05, "loss": 0.40222697257995604, "step": 31480 }, { "epoch": 0.135193151473, "grad_norm": 18.48629379272461, "learning_rate": 8.685399653337702e-05, "loss": 0.294164252281189, "step": 31490 }, { "epoch": 0.13523608356302003, "grad_norm": 0.008073339238762856, "learning_rate": 8.68496848132594e-05, "loss": 0.4413759231567383, "step": 31500 }, { "epoch": 0.13527901565304, "grad_norm": 0.48230382800102234, "learning_rate": 8.684537309314178e-05, "loss": 0.28578667640686034, "step": 31510 }, { "epoch": 0.13532194774306003, "grad_norm": 3.201517105102539, "learning_rate": 8.684106137302415e-05, "loss": 0.1512368679046631, "step": 31520 }, { "epoch": 0.13536487983308004, "grad_norm": 15.804793357849121, "learning_rate": 8.683674965290653e-05, "loss": 0.19707468748092652, "step": 31530 }, { "epoch": 0.13540781192310003, "grad_norm": 0.42527276277542114, "learning_rate": 8.683243793278891e-05, "loss": 0.142280113697052, "step": 31540 }, { "epoch": 0.13545074401312004, "grad_norm": 0.46108368039131165, "learning_rate": 8.682812621267128e-05, "loss": 0.35723319053649905, "step": 31550 }, { "epoch": 0.13549367610314006, "grad_norm": 1.848292589187622, "learning_rate": 8.682381449255366e-05, "loss": 0.28166794776916504, "step": 31560 }, { "epoch": 0.13553660819316005, "grad_norm": 1.9825315475463867, "learning_rate": 8.681950277243604e-05, "loss": 0.24795372486114503, "step": 31570 }, { "epoch": 0.13557954028318006, "grad_norm": 0.008994282223284245, "learning_rate": 8.681519105231842e-05, "loss": 0.10199071168899536, "step": 31580 }, { "epoch": 0.13562247237320008, "grad_norm": 0.554589569568634, "learning_rate": 8.68108793322008e-05, "loss": 0.18277888298034667, "step": 31590 }, { "epoch": 0.1356654044632201, "grad_norm": 1.266161322593689, "learning_rate": 8.680656761208317e-05, "loss": 0.213988733291626, "step": 31600 }, { "epoch": 0.13570833655324008, "grad_norm": 0.0008511008927598596, "learning_rate": 8.680225589196555e-05, "loss": 0.2665504693984985, "step": 31610 }, { "epoch": 0.1357512686432601, "grad_norm": 2.124117851257324, "learning_rate": 8.679794417184793e-05, "loss": 0.35089409351348877, "step": 31620 }, { "epoch": 0.1357942007332801, "grad_norm": 0.08872847259044647, "learning_rate": 8.67936324517303e-05, "loss": 0.11770111322402954, "step": 31630 }, { "epoch": 0.1358371328233001, "grad_norm": 0.013181965798139572, "learning_rate": 8.678932073161268e-05, "loss": 0.1267725944519043, "step": 31640 }, { "epoch": 0.1358800649133201, "grad_norm": 0.3185889720916748, "learning_rate": 8.678500901149506e-05, "loss": 0.16408768892288209, "step": 31650 }, { "epoch": 0.13592299700334012, "grad_norm": 0.0036954637616872787, "learning_rate": 8.678069729137742e-05, "loss": 0.2898323774337769, "step": 31660 }, { "epoch": 0.1359659290933601, "grad_norm": 0.06999905407428741, "learning_rate": 8.67763855712598e-05, "loss": 0.2554055690765381, "step": 31670 }, { "epoch": 0.13600886118338013, "grad_norm": 9.497495651245117, "learning_rate": 8.677207385114218e-05, "loss": 0.3093088626861572, "step": 31680 }, { "epoch": 0.13605179327340014, "grad_norm": 0.4421633183956146, "learning_rate": 8.676776213102455e-05, "loss": 0.19062780141830443, "step": 31690 }, { "epoch": 0.13609472536342015, "grad_norm": 2.385646104812622, "learning_rate": 8.676345041090693e-05, "loss": 0.34704439640045165, "step": 31700 }, { "epoch": 0.13613765745344014, "grad_norm": 1.0288870334625244, "learning_rate": 8.675913869078931e-05, "loss": 0.2951198101043701, "step": 31710 }, { "epoch": 0.13618058954346016, "grad_norm": 0.036432646214962006, "learning_rate": 8.675482697067169e-05, "loss": 0.3489818811416626, "step": 31720 }, { "epoch": 0.13622352163348017, "grad_norm": 2.0441031455993652, "learning_rate": 8.675051525055406e-05, "loss": 0.194374942779541, "step": 31730 }, { "epoch": 0.13626645372350016, "grad_norm": 0.02840869128704071, "learning_rate": 8.674620353043643e-05, "loss": 0.35605788230895996, "step": 31740 }, { "epoch": 0.13630938581352017, "grad_norm": 0.003902270458638668, "learning_rate": 8.67418918103188e-05, "loss": 0.12478889226913452, "step": 31750 }, { "epoch": 0.1363523179035402, "grad_norm": 1.4010862112045288, "learning_rate": 8.673758009020118e-05, "loss": 0.25406508445739745, "step": 31760 }, { "epoch": 0.13639524999356017, "grad_norm": 0.030482197180390358, "learning_rate": 8.673326837008356e-05, "loss": 0.1824798583984375, "step": 31770 }, { "epoch": 0.1364381820835802, "grad_norm": 0.1671326905488968, "learning_rate": 8.672895664996594e-05, "loss": 0.19233707189559937, "step": 31780 }, { "epoch": 0.1364811141736002, "grad_norm": 0.020653098821640015, "learning_rate": 8.672464492984831e-05, "loss": 0.17775663137435913, "step": 31790 }, { "epoch": 0.13652404626362022, "grad_norm": 22.064794540405273, "learning_rate": 8.672033320973069e-05, "loss": 0.26563799381256104, "step": 31800 }, { "epoch": 0.1365669783536402, "grad_norm": 0.03927993029356003, "learning_rate": 8.671602148961307e-05, "loss": 0.17628468275070192, "step": 31810 }, { "epoch": 0.13660991044366022, "grad_norm": 1.8453829288482666, "learning_rate": 8.671170976949545e-05, "loss": 0.1508329391479492, "step": 31820 }, { "epoch": 0.13665284253368024, "grad_norm": 0.1535903513431549, "learning_rate": 8.670739804937782e-05, "loss": 0.3724507331848145, "step": 31830 }, { "epoch": 0.13669577462370022, "grad_norm": 1.699593186378479, "learning_rate": 8.67030863292602e-05, "loss": 0.18699527978897096, "step": 31840 }, { "epoch": 0.13673870671372024, "grad_norm": 0.051007404923439026, "learning_rate": 8.669877460914258e-05, "loss": 0.27728071212768557, "step": 31850 }, { "epoch": 0.13678163880374025, "grad_norm": 5.522481441497803, "learning_rate": 8.669446288902496e-05, "loss": 0.3032398700714111, "step": 31860 }, { "epoch": 0.13682457089376024, "grad_norm": 0.02729124389588833, "learning_rate": 8.669015116890733e-05, "loss": 0.1534939408302307, "step": 31870 }, { "epoch": 0.13686750298378025, "grad_norm": 2.797128438949585, "learning_rate": 8.668583944878971e-05, "loss": 0.3220533847808838, "step": 31880 }, { "epoch": 0.13691043507380027, "grad_norm": 1.324243426322937, "learning_rate": 8.668152772867209e-05, "loss": 0.17927749156951905, "step": 31890 }, { "epoch": 0.13695336716382026, "grad_norm": 0.02539249137043953, "learning_rate": 8.667721600855445e-05, "loss": 0.28399274349212644, "step": 31900 }, { "epoch": 0.13699629925384027, "grad_norm": 0.3492501378059387, "learning_rate": 8.667290428843683e-05, "loss": 0.27191436290740967, "step": 31910 }, { "epoch": 0.13703923134386028, "grad_norm": 0.005278902594000101, "learning_rate": 8.66685925683192e-05, "loss": 0.16442601680755614, "step": 31920 }, { "epoch": 0.1370821634338803, "grad_norm": 0.029702888801693916, "learning_rate": 8.666428084820158e-05, "loss": 0.19043039083480834, "step": 31930 }, { "epoch": 0.1371250955239003, "grad_norm": 3.249835968017578, "learning_rate": 8.665996912808396e-05, "loss": 0.22955794334411622, "step": 31940 }, { "epoch": 0.1371680276139203, "grad_norm": 0.15154388546943665, "learning_rate": 8.665565740796634e-05, "loss": 0.19120630025863647, "step": 31950 }, { "epoch": 0.13721095970394032, "grad_norm": 0.06035231798887253, "learning_rate": 8.665134568784872e-05, "loss": 0.3377244234085083, "step": 31960 }, { "epoch": 0.1372538917939603, "grad_norm": 0.1840345859527588, "learning_rate": 8.664703396773109e-05, "loss": 0.09188364148139953, "step": 31970 }, { "epoch": 0.13729682388398032, "grad_norm": 1.6915229558944702, "learning_rate": 8.664272224761347e-05, "loss": 0.42070856094360354, "step": 31980 }, { "epoch": 0.13733975597400033, "grad_norm": 0.14676789939403534, "learning_rate": 8.663841052749583e-05, "loss": 0.06923063993453979, "step": 31990 }, { "epoch": 0.13738268806402032, "grad_norm": 0.46177831292152405, "learning_rate": 8.663409880737821e-05, "loss": 0.3868566513061523, "step": 32000 }, { "epoch": 0.13738268806402032, "eval_loss": 0.48874905705451965, "eval_runtime": 27.4796, "eval_samples_per_second": 3.639, "eval_steps_per_second": 3.639, "step": 32000 }, { "epoch": 0.13742562015404033, "grad_norm": 0.011651808395981789, "learning_rate": 8.662978708726059e-05, "loss": 0.3492263317108154, "step": 32010 }, { "epoch": 0.13746855224406035, "grad_norm": 0.3724205493927002, "learning_rate": 8.662547536714297e-05, "loss": 0.23693232536315917, "step": 32020 }, { "epoch": 0.13751148433408036, "grad_norm": 1.7988815307617188, "learning_rate": 8.662116364702534e-05, "loss": 0.17499951124191285, "step": 32030 }, { "epoch": 0.13755441642410035, "grad_norm": 0.036245301365852356, "learning_rate": 8.661685192690772e-05, "loss": 0.2796959400177002, "step": 32040 }, { "epoch": 0.13759734851412037, "grad_norm": 0.031076082959771156, "learning_rate": 8.661254020679011e-05, "loss": 0.3143959045410156, "step": 32050 }, { "epoch": 0.13764028060414038, "grad_norm": 0.21341530978679657, "learning_rate": 8.660822848667249e-05, "loss": 0.2533440351486206, "step": 32060 }, { "epoch": 0.13768321269416037, "grad_norm": 0.3128828704357147, "learning_rate": 8.660391676655485e-05, "loss": 0.37330625057220457, "step": 32070 }, { "epoch": 0.13772614478418038, "grad_norm": 0.05614300072193146, "learning_rate": 8.659960504643723e-05, "loss": 0.1680017113685608, "step": 32080 }, { "epoch": 0.1377690768742004, "grad_norm": 0.010020498186349869, "learning_rate": 8.659529332631961e-05, "loss": 0.11806988716125488, "step": 32090 }, { "epoch": 0.13781200896422038, "grad_norm": 0.06427690386772156, "learning_rate": 8.659098160620198e-05, "loss": 0.2253105878829956, "step": 32100 }, { "epoch": 0.1378549410542404, "grad_norm": 0.04789144545793533, "learning_rate": 8.658666988608436e-05, "loss": 0.10743888616561889, "step": 32110 }, { "epoch": 0.1378978731442604, "grad_norm": 0.019688574597239494, "learning_rate": 8.658235816596674e-05, "loss": 0.1399633526802063, "step": 32120 }, { "epoch": 0.13794080523428043, "grad_norm": 0.16319715976715088, "learning_rate": 8.657804644584912e-05, "loss": 0.0964900016784668, "step": 32130 }, { "epoch": 0.13798373732430042, "grad_norm": 2.0481417179107666, "learning_rate": 8.65737347257315e-05, "loss": 0.35899038314819337, "step": 32140 }, { "epoch": 0.13802666941432043, "grad_norm": 0.015374544076621532, "learning_rate": 8.656942300561386e-05, "loss": 0.2831136226654053, "step": 32150 }, { "epoch": 0.13806960150434044, "grad_norm": 0.05547713488340378, "learning_rate": 8.656511128549624e-05, "loss": 0.16382434368133544, "step": 32160 }, { "epoch": 0.13811253359436043, "grad_norm": 1.0987155437469482, "learning_rate": 8.656079956537861e-05, "loss": 0.3500218391418457, "step": 32170 }, { "epoch": 0.13815546568438045, "grad_norm": 4.993858814239502, "learning_rate": 8.655648784526099e-05, "loss": 0.35623295307159425, "step": 32180 }, { "epoch": 0.13819839777440046, "grad_norm": 2.2642040252685547, "learning_rate": 8.655217612514337e-05, "loss": 0.38486814498901367, "step": 32190 }, { "epoch": 0.13824132986442045, "grad_norm": 0.10652873665094376, "learning_rate": 8.654786440502574e-05, "loss": 0.09749494791030884, "step": 32200 }, { "epoch": 0.13828426195444046, "grad_norm": 0.17305311560630798, "learning_rate": 8.654355268490812e-05, "loss": 0.22684135437011718, "step": 32210 }, { "epoch": 0.13832719404446048, "grad_norm": 0.5436341762542725, "learning_rate": 8.65392409647905e-05, "loss": 0.19008939266204833, "step": 32220 }, { "epoch": 0.1383701261344805, "grad_norm": 0.03433492034673691, "learning_rate": 8.653492924467286e-05, "loss": 0.3322418212890625, "step": 32230 }, { "epoch": 0.13841305822450048, "grad_norm": 0.032812800258398056, "learning_rate": 8.653061752455524e-05, "loss": 0.37228755950927733, "step": 32240 }, { "epoch": 0.1384559903145205, "grad_norm": 0.00702094379812479, "learning_rate": 8.652630580443762e-05, "loss": 0.29015960693359377, "step": 32250 }, { "epoch": 0.1384989224045405, "grad_norm": 0.9049208760261536, "learning_rate": 8.652199408432e-05, "loss": 0.06290289163589477, "step": 32260 }, { "epoch": 0.1385418544945605, "grad_norm": 0.31068122386932373, "learning_rate": 8.651768236420239e-05, "loss": 0.1787291646003723, "step": 32270 }, { "epoch": 0.1385847865845805, "grad_norm": 0.12767857313156128, "learning_rate": 8.651337064408476e-05, "loss": 0.2031085252761841, "step": 32280 }, { "epoch": 0.13862771867460053, "grad_norm": 1.2406351566314697, "learning_rate": 8.650905892396714e-05, "loss": 0.40726299285888673, "step": 32290 }, { "epoch": 0.1386706507646205, "grad_norm": 0.18899884819984436, "learning_rate": 8.650474720384952e-05, "loss": 0.38270137310028074, "step": 32300 }, { "epoch": 0.13871358285464053, "grad_norm": 0.06344349682331085, "learning_rate": 8.650043548373188e-05, "loss": 0.23246891498565675, "step": 32310 }, { "epoch": 0.13875651494466054, "grad_norm": 1.4559452533721924, "learning_rate": 8.649612376361426e-05, "loss": 0.27634477615356445, "step": 32320 }, { "epoch": 0.13879944703468053, "grad_norm": 1.1971638202667236, "learning_rate": 8.649181204349664e-05, "loss": 0.24708399772644044, "step": 32330 }, { "epoch": 0.13884237912470054, "grad_norm": 0.05968537554144859, "learning_rate": 8.648750032337901e-05, "loss": 0.2924589872360229, "step": 32340 }, { "epoch": 0.13888531121472056, "grad_norm": 0.07050938904285431, "learning_rate": 8.648318860326139e-05, "loss": 0.22881875038146973, "step": 32350 }, { "epoch": 0.13892824330474057, "grad_norm": 0.013065168634057045, "learning_rate": 8.647887688314377e-05, "loss": 0.1992401123046875, "step": 32360 }, { "epoch": 0.13897117539476056, "grad_norm": 1.5027996301651, "learning_rate": 8.647456516302615e-05, "loss": 0.16590211391448975, "step": 32370 }, { "epoch": 0.13901410748478057, "grad_norm": 0.3740582764148712, "learning_rate": 8.647025344290852e-05, "loss": 0.34262235164642335, "step": 32380 }, { "epoch": 0.1390570395748006, "grad_norm": 0.7905094623565674, "learning_rate": 8.64659417227909e-05, "loss": 0.4150557994842529, "step": 32390 }, { "epoch": 0.13909997166482058, "grad_norm": 0.06105173006653786, "learning_rate": 8.646163000267326e-05, "loss": 0.19480823278427123, "step": 32400 }, { "epoch": 0.1391429037548406, "grad_norm": 0.008457164280116558, "learning_rate": 8.645731828255564e-05, "loss": 0.26109282970428466, "step": 32410 }, { "epoch": 0.1391858358448606, "grad_norm": 1.475132942199707, "learning_rate": 8.645300656243802e-05, "loss": 0.5467658042907715, "step": 32420 }, { "epoch": 0.1392287679348806, "grad_norm": 0.09985917806625366, "learning_rate": 8.64486948423204e-05, "loss": 0.30468990802764895, "step": 32430 }, { "epoch": 0.1392717000249006, "grad_norm": 0.09741347283124924, "learning_rate": 8.644438312220277e-05, "loss": 0.15210098028182983, "step": 32440 }, { "epoch": 0.13931463211492062, "grad_norm": 2.485933780670166, "learning_rate": 8.644007140208515e-05, "loss": 0.22832133769989013, "step": 32450 }, { "epoch": 0.13935756420494064, "grad_norm": 0.9260600209236145, "learning_rate": 8.643575968196753e-05, "loss": 0.15713369846343994, "step": 32460 }, { "epoch": 0.13940049629496062, "grad_norm": 0.0251544788479805, "learning_rate": 8.64314479618499e-05, "loss": 0.15889840126037597, "step": 32470 }, { "epoch": 0.13944342838498064, "grad_norm": 0.8846043944358826, "learning_rate": 8.642713624173227e-05, "loss": 0.30323312282562254, "step": 32480 }, { "epoch": 0.13948636047500065, "grad_norm": 1.401147484779358, "learning_rate": 8.642282452161466e-05, "loss": 0.4211751461029053, "step": 32490 }, { "epoch": 0.13952929256502064, "grad_norm": 4.154013156890869, "learning_rate": 8.641851280149704e-05, "loss": 0.28515145778656004, "step": 32500 }, { "epoch": 0.13957222465504066, "grad_norm": 0.0072095938958227634, "learning_rate": 8.641420108137941e-05, "loss": 0.07046842575073242, "step": 32510 }, { "epoch": 0.13961515674506067, "grad_norm": 0.10458221286535263, "learning_rate": 8.640988936126179e-05, "loss": 0.3335193872451782, "step": 32520 }, { "epoch": 0.13965808883508066, "grad_norm": 0.07076560705900192, "learning_rate": 8.640557764114417e-05, "loss": 0.28255205154418944, "step": 32530 }, { "epoch": 0.13970102092510067, "grad_norm": 0.05078651383519173, "learning_rate": 8.640126592102655e-05, "loss": 0.09788010120391846, "step": 32540 }, { "epoch": 0.1397439530151207, "grad_norm": 3.8852310180664062, "learning_rate": 8.639695420090892e-05, "loss": 0.5485908508300781, "step": 32550 }, { "epoch": 0.1397868851051407, "grad_norm": 0.8929144144058228, "learning_rate": 8.639264248079129e-05, "loss": 0.15849707126617432, "step": 32560 }, { "epoch": 0.1398298171951607, "grad_norm": 2.173306465148926, "learning_rate": 8.638833076067367e-05, "loss": 0.21721320152282714, "step": 32570 }, { "epoch": 0.1398727492851807, "grad_norm": 0.19807979464530945, "learning_rate": 8.638401904055604e-05, "loss": 0.10829294919967651, "step": 32580 }, { "epoch": 0.13991568137520072, "grad_norm": 0.1245853528380394, "learning_rate": 8.637970732043842e-05, "loss": 0.2582393169403076, "step": 32590 }, { "epoch": 0.1399586134652207, "grad_norm": 1.730551838874817, "learning_rate": 8.63753956003208e-05, "loss": 0.34633893966674806, "step": 32600 }, { "epoch": 0.14000154555524072, "grad_norm": 2.3339028358459473, "learning_rate": 8.637108388020317e-05, "loss": 0.41213231086730956, "step": 32610 }, { "epoch": 0.14004447764526073, "grad_norm": 4.648174285888672, "learning_rate": 8.636677216008555e-05, "loss": 0.1871417284011841, "step": 32620 }, { "epoch": 0.14008740973528072, "grad_norm": 0.6128503084182739, "learning_rate": 8.636246043996793e-05, "loss": 0.0874154508113861, "step": 32630 }, { "epoch": 0.14013034182530074, "grad_norm": 0.02867172844707966, "learning_rate": 8.63581487198503e-05, "loss": 0.2719635009765625, "step": 32640 }, { "epoch": 0.14017327391532075, "grad_norm": 0.9320842027664185, "learning_rate": 8.635383699973267e-05, "loss": 0.040377697348594664, "step": 32650 }, { "epoch": 0.14021620600534077, "grad_norm": 0.039206285029649734, "learning_rate": 8.634952527961505e-05, "loss": 0.09994704723358154, "step": 32660 }, { "epoch": 0.14025913809536075, "grad_norm": 2.78364896774292, "learning_rate": 8.634521355949743e-05, "loss": 0.432407283782959, "step": 32670 }, { "epoch": 0.14030207018538077, "grad_norm": 0.29419511556625366, "learning_rate": 8.63409018393798e-05, "loss": 0.30876913070678713, "step": 32680 }, { "epoch": 0.14034500227540078, "grad_norm": 5.2350850105285645, "learning_rate": 8.633659011926218e-05, "loss": 0.18348314762115478, "step": 32690 }, { "epoch": 0.14038793436542077, "grad_norm": 0.013051588088274002, "learning_rate": 8.633227839914456e-05, "loss": 0.0500313937664032, "step": 32700 }, { "epoch": 0.14043086645544078, "grad_norm": 1.8791477680206299, "learning_rate": 8.632796667902693e-05, "loss": 0.16241765022277832, "step": 32710 }, { "epoch": 0.1404737985454608, "grad_norm": 0.018941722810268402, "learning_rate": 8.632365495890931e-05, "loss": 0.2940650701522827, "step": 32720 }, { "epoch": 0.14051673063548079, "grad_norm": 0.2370922714471817, "learning_rate": 8.631934323879169e-05, "loss": 0.15933622121810914, "step": 32730 }, { "epoch": 0.1405596627255008, "grad_norm": 0.06801789999008179, "learning_rate": 8.631503151867407e-05, "loss": 0.3181285858154297, "step": 32740 }, { "epoch": 0.14060259481552081, "grad_norm": 4.759559631347656, "learning_rate": 8.631071979855644e-05, "loss": 0.4246529102325439, "step": 32750 }, { "epoch": 0.1406455269055408, "grad_norm": 0.021498646587133408, "learning_rate": 8.630640807843882e-05, "loss": 0.09310616850852967, "step": 32760 }, { "epoch": 0.14068845899556082, "grad_norm": 0.009470085613429546, "learning_rate": 8.63020963583212e-05, "loss": 0.15859633684158325, "step": 32770 }, { "epoch": 0.14073139108558083, "grad_norm": 0.5190895795822144, "learning_rate": 8.629778463820358e-05, "loss": 0.32474589347839355, "step": 32780 }, { "epoch": 0.14077432317560085, "grad_norm": 0.008953122422099113, "learning_rate": 8.629347291808595e-05, "loss": 0.2130033016204834, "step": 32790 }, { "epoch": 0.14081725526562083, "grad_norm": 0.12810958921909332, "learning_rate": 8.628916119796833e-05, "loss": 0.14106868505477904, "step": 32800 }, { "epoch": 0.14086018735564085, "grad_norm": 42.244422912597656, "learning_rate": 8.62848494778507e-05, "loss": 0.2511881351470947, "step": 32810 }, { "epoch": 0.14090311944566086, "grad_norm": 0.08070328831672668, "learning_rate": 8.628053775773307e-05, "loss": 0.003995791077613831, "step": 32820 }, { "epoch": 0.14094605153568085, "grad_norm": 0.0576271191239357, "learning_rate": 8.627622603761545e-05, "loss": 0.1484993577003479, "step": 32830 }, { "epoch": 0.14098898362570086, "grad_norm": 2.7077083587646484, "learning_rate": 8.627191431749783e-05, "loss": 0.31101303100585936, "step": 32840 }, { "epoch": 0.14103191571572088, "grad_norm": 0.6947262287139893, "learning_rate": 8.62676025973802e-05, "loss": 0.3318142890930176, "step": 32850 }, { "epoch": 0.14107484780574087, "grad_norm": 2.176297187805176, "learning_rate": 8.626329087726258e-05, "loss": 0.22213470935821533, "step": 32860 }, { "epoch": 0.14111777989576088, "grad_norm": 2.0945627689361572, "learning_rate": 8.625897915714496e-05, "loss": 0.08620821237564087, "step": 32870 }, { "epoch": 0.1411607119857809, "grad_norm": 0.004814998712390661, "learning_rate": 8.625466743702734e-05, "loss": 0.13406879901885987, "step": 32880 }, { "epoch": 0.1412036440758009, "grad_norm": 0.11976204067468643, "learning_rate": 8.62503557169097e-05, "loss": 0.15420854091644287, "step": 32890 }, { "epoch": 0.1412465761658209, "grad_norm": 0.0031996588222682476, "learning_rate": 8.624604399679208e-05, "loss": 0.3452379941940308, "step": 32900 }, { "epoch": 0.1412895082558409, "grad_norm": 0.20073994994163513, "learning_rate": 8.624173227667445e-05, "loss": 0.12277450561523437, "step": 32910 }, { "epoch": 0.14133244034586093, "grad_norm": 2.7127232551574707, "learning_rate": 8.623742055655683e-05, "loss": 0.21957502365112305, "step": 32920 }, { "epoch": 0.1413753724358809, "grad_norm": 0.026280561462044716, "learning_rate": 8.623310883643921e-05, "loss": 0.29603774547576905, "step": 32930 }, { "epoch": 0.14141830452590093, "grad_norm": 0.009255579672753811, "learning_rate": 8.622879711632159e-05, "loss": 0.18040038347244264, "step": 32940 }, { "epoch": 0.14146123661592094, "grad_norm": 0.021447597071528435, "learning_rate": 8.622448539620396e-05, "loss": 0.20096633434295655, "step": 32950 }, { "epoch": 0.14150416870594093, "grad_norm": 0.15856556594371796, "learning_rate": 8.622017367608634e-05, "loss": 0.21111953258514404, "step": 32960 }, { "epoch": 0.14154710079596095, "grad_norm": 0.9573673605918884, "learning_rate": 8.621586195596872e-05, "loss": 0.31001458168029783, "step": 32970 }, { "epoch": 0.14159003288598096, "grad_norm": 0.006918746512383223, "learning_rate": 8.62115502358511e-05, "loss": 0.12518359422683717, "step": 32980 }, { "epoch": 0.14163296497600097, "grad_norm": 0.004179741255939007, "learning_rate": 8.620723851573347e-05, "loss": 0.19539453983306884, "step": 32990 }, { "epoch": 0.14167589706602096, "grad_norm": 0.005447585601359606, "learning_rate": 8.620292679561585e-05, "loss": 0.2963005542755127, "step": 33000 }, { "epoch": 0.14167589706602096, "eval_loss": 0.47957244515419006, "eval_runtime": 27.4369, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 33000 }, { "epoch": 0.14171882915604098, "grad_norm": 1.1165666580200195, "learning_rate": 8.619861507549823e-05, "loss": 0.2317833423614502, "step": 33010 }, { "epoch": 0.141761761246061, "grad_norm": 0.40649619698524475, "learning_rate": 8.61943033553806e-05, "loss": 0.19553804397583008, "step": 33020 }, { "epoch": 0.14180469333608098, "grad_norm": 1.847734808921814, "learning_rate": 8.618999163526298e-05, "loss": 0.3180288314819336, "step": 33030 }, { "epoch": 0.141847625426101, "grad_norm": 0.02948109805583954, "learning_rate": 8.618567991514536e-05, "loss": 0.24348065853118897, "step": 33040 }, { "epoch": 0.141890557516121, "grad_norm": 0.050934720784425735, "learning_rate": 8.618136819502772e-05, "loss": 0.11437109708786011, "step": 33050 }, { "epoch": 0.141933489606141, "grad_norm": 1.0916680097579956, "learning_rate": 8.61770564749101e-05, "loss": 0.3583144903182983, "step": 33060 }, { "epoch": 0.141976421696161, "grad_norm": 4.1211323738098145, "learning_rate": 8.617274475479248e-05, "loss": 0.2798927307128906, "step": 33070 }, { "epoch": 0.14201935378618102, "grad_norm": 1.4547139406204224, "learning_rate": 8.616843303467486e-05, "loss": 0.3336412668228149, "step": 33080 }, { "epoch": 0.14206228587620104, "grad_norm": 0.12225791811943054, "learning_rate": 8.616412131455723e-05, "loss": 0.20298798084259034, "step": 33090 }, { "epoch": 0.14210521796622103, "grad_norm": 0.34304502606391907, "learning_rate": 8.615980959443961e-05, "loss": 0.24174034595489502, "step": 33100 }, { "epoch": 0.14214815005624104, "grad_norm": 1.5324758291244507, "learning_rate": 8.615549787432199e-05, "loss": 0.08523032665252686, "step": 33110 }, { "epoch": 0.14219108214626106, "grad_norm": 1.3764361143112183, "learning_rate": 8.615118615420437e-05, "loss": 0.22169878482818603, "step": 33120 }, { "epoch": 0.14223401423628104, "grad_norm": 1.85866379737854, "learning_rate": 8.614687443408674e-05, "loss": 0.4486417770385742, "step": 33130 }, { "epoch": 0.14227694632630106, "grad_norm": 0.02785349264740944, "learning_rate": 8.61425627139691e-05, "loss": 0.28274946212768554, "step": 33140 }, { "epoch": 0.14231987841632107, "grad_norm": 0.09572744369506836, "learning_rate": 8.613825099385148e-05, "loss": 0.3118321180343628, "step": 33150 }, { "epoch": 0.14236281050634106, "grad_norm": 0.05197792127728462, "learning_rate": 8.613393927373386e-05, "loss": 0.15959925651550294, "step": 33160 }, { "epoch": 0.14240574259636107, "grad_norm": 11.495828628540039, "learning_rate": 8.612962755361624e-05, "loss": 0.4407163143157959, "step": 33170 }, { "epoch": 0.1424486746863811, "grad_norm": 0.09204380214214325, "learning_rate": 8.612531583349862e-05, "loss": 0.2962942600250244, "step": 33180 }, { "epoch": 0.14249160677640108, "grad_norm": 0.0430777408182621, "learning_rate": 8.612100411338099e-05, "loss": 0.33332481384277346, "step": 33190 }, { "epoch": 0.1425345388664211, "grad_norm": 0.018142318353056908, "learning_rate": 8.611669239326337e-05, "loss": 0.3872080326080322, "step": 33200 }, { "epoch": 0.1425774709564411, "grad_norm": 0.29330870509147644, "learning_rate": 8.611238067314575e-05, "loss": 0.23970427513122558, "step": 33210 }, { "epoch": 0.14262040304646112, "grad_norm": 1.2924166917800903, "learning_rate": 8.610806895302813e-05, "loss": 0.24695563316345215, "step": 33220 }, { "epoch": 0.1426633351364811, "grad_norm": 1.2489582300186157, "learning_rate": 8.61037572329105e-05, "loss": 0.16087877750396729, "step": 33230 }, { "epoch": 0.14270626722650112, "grad_norm": 1.7169822454452515, "learning_rate": 8.609944551279288e-05, "loss": 0.24377684593200682, "step": 33240 }, { "epoch": 0.14274919931652114, "grad_norm": 1.4972940683364868, "learning_rate": 8.609513379267526e-05, "loss": 0.2680682182312012, "step": 33250 }, { "epoch": 0.14279213140654112, "grad_norm": 0.9008892178535461, "learning_rate": 8.609082207255763e-05, "loss": 0.11140121221542358, "step": 33260 }, { "epoch": 0.14283506349656114, "grad_norm": 0.009071459993720055, "learning_rate": 8.608651035244001e-05, "loss": 0.12064872980117798, "step": 33270 }, { "epoch": 0.14287799558658115, "grad_norm": 0.0024141615722328424, "learning_rate": 8.608219863232239e-05, "loss": 0.19180315732955933, "step": 33280 }, { "epoch": 0.14292092767660114, "grad_norm": 0.255353718996048, "learning_rate": 8.607788691220477e-05, "loss": 0.1809109091758728, "step": 33290 }, { "epoch": 0.14296385976662115, "grad_norm": 3.5779504776000977, "learning_rate": 8.607357519208713e-05, "loss": 0.18246450424194335, "step": 33300 }, { "epoch": 0.14300679185664117, "grad_norm": 1.472702145576477, "learning_rate": 8.606926347196951e-05, "loss": 0.2409532070159912, "step": 33310 }, { "epoch": 0.14304972394666118, "grad_norm": 2.000717878341675, "learning_rate": 8.606495175185188e-05, "loss": 0.20780344009399415, "step": 33320 }, { "epoch": 0.14309265603668117, "grad_norm": 3.4083595275878906, "learning_rate": 8.606064003173426e-05, "loss": 0.26924920082092285, "step": 33330 }, { "epoch": 0.14313558812670119, "grad_norm": 0.6678909063339233, "learning_rate": 8.605632831161664e-05, "loss": 0.015250737965106963, "step": 33340 }, { "epoch": 0.1431785202167212, "grad_norm": 1.3295899629592896, "learning_rate": 8.605201659149902e-05, "loss": 0.2518535852432251, "step": 33350 }, { "epoch": 0.1432214523067412, "grad_norm": 0.013177858665585518, "learning_rate": 8.60477048713814e-05, "loss": 0.041883692145347595, "step": 33360 }, { "epoch": 0.1432643843967612, "grad_norm": 0.0020102905109524727, "learning_rate": 8.604339315126377e-05, "loss": 0.19797074794769287, "step": 33370 }, { "epoch": 0.14330731648678122, "grad_norm": 0.26644909381866455, "learning_rate": 8.603908143114614e-05, "loss": 0.27244253158569337, "step": 33380 }, { "epoch": 0.1433502485768012, "grad_norm": 0.0014582815347239375, "learning_rate": 8.603476971102851e-05, "loss": 0.174778950214386, "step": 33390 }, { "epoch": 0.14339318066682122, "grad_norm": 1.2381072044372559, "learning_rate": 8.603045799091089e-05, "loss": 0.42563595771789553, "step": 33400 }, { "epoch": 0.14343611275684123, "grad_norm": 0.0005544184823520482, "learning_rate": 8.602614627079327e-05, "loss": 0.2671182632446289, "step": 33410 }, { "epoch": 0.14347904484686125, "grad_norm": 3.252762794494629, "learning_rate": 8.602183455067564e-05, "loss": 0.47478280067443845, "step": 33420 }, { "epoch": 0.14352197693688123, "grad_norm": 0.0010757598793134093, "learning_rate": 8.601752283055802e-05, "loss": 0.35443868637084963, "step": 33430 }, { "epoch": 0.14356490902690125, "grad_norm": 0.7422657012939453, "learning_rate": 8.60132111104404e-05, "loss": 0.295544958114624, "step": 33440 }, { "epoch": 0.14360784111692126, "grad_norm": 0.08659780770540237, "learning_rate": 8.600889939032278e-05, "loss": 0.26724236011505126, "step": 33450 }, { "epoch": 0.14365077320694125, "grad_norm": 0.07881903648376465, "learning_rate": 8.600458767020517e-05, "loss": 0.09010640978813171, "step": 33460 }, { "epoch": 0.14369370529696127, "grad_norm": 0.7163306474685669, "learning_rate": 8.600027595008753e-05, "loss": 0.2510689258575439, "step": 33470 }, { "epoch": 0.14373663738698128, "grad_norm": 0.031614236533641815, "learning_rate": 8.599596422996991e-05, "loss": 0.1949402093887329, "step": 33480 }, { "epoch": 0.14377956947700127, "grad_norm": 3.6333954334259033, "learning_rate": 8.599165250985229e-05, "loss": 0.3583311796188354, "step": 33490 }, { "epoch": 0.14382250156702128, "grad_norm": 0.11175089329481125, "learning_rate": 8.598734078973466e-05, "loss": 0.12634716033935547, "step": 33500 }, { "epoch": 0.1438654336570413, "grad_norm": 2.5286669731140137, "learning_rate": 8.598302906961704e-05, "loss": 0.43870835304260253, "step": 33510 }, { "epoch": 0.1439083657470613, "grad_norm": 0.5033771395683289, "learning_rate": 8.597871734949942e-05, "loss": 0.2515478849411011, "step": 33520 }, { "epoch": 0.1439512978370813, "grad_norm": 0.0760180726647377, "learning_rate": 8.59744056293818e-05, "loss": 0.22663774490356445, "step": 33530 }, { "epoch": 0.1439942299271013, "grad_norm": 4.9512810707092285, "learning_rate": 8.597009390926417e-05, "loss": 0.27660746574401857, "step": 33540 }, { "epoch": 0.14403716201712133, "grad_norm": 6.3641839027404785, "learning_rate": 8.596578218914654e-05, "loss": 0.22129251956939697, "step": 33550 }, { "epoch": 0.14408009410714132, "grad_norm": 0.016861692070961, "learning_rate": 8.596147046902891e-05, "loss": 0.2914477586746216, "step": 33560 }, { "epoch": 0.14412302619716133, "grad_norm": 0.005491959396749735, "learning_rate": 8.595715874891129e-05, "loss": 0.20667738914489747, "step": 33570 }, { "epoch": 0.14416595828718134, "grad_norm": 0.02153095416724682, "learning_rate": 8.595284702879367e-05, "loss": 0.3282850503921509, "step": 33580 }, { "epoch": 0.14420889037720133, "grad_norm": 0.1113702580332756, "learning_rate": 8.594853530867605e-05, "loss": 0.22480969429016112, "step": 33590 }, { "epoch": 0.14425182246722135, "grad_norm": 0.01539696753025055, "learning_rate": 8.594422358855842e-05, "loss": 0.060040348768234254, "step": 33600 }, { "epoch": 0.14429475455724136, "grad_norm": 0.11859507858753204, "learning_rate": 8.59399118684408e-05, "loss": 0.2394228458404541, "step": 33610 }, { "epoch": 0.14433768664726135, "grad_norm": 0.110122911632061, "learning_rate": 8.593560014832318e-05, "loss": 0.18123446702957152, "step": 33620 }, { "epoch": 0.14438061873728136, "grad_norm": 6.010119438171387, "learning_rate": 8.593128842820554e-05, "loss": 0.5520340919494628, "step": 33630 }, { "epoch": 0.14442355082730138, "grad_norm": 2.263671398162842, "learning_rate": 8.592697670808792e-05, "loss": 0.19788291454315185, "step": 33640 }, { "epoch": 0.1444664829173214, "grad_norm": 1.149238109588623, "learning_rate": 8.59226649879703e-05, "loss": 0.11483936309814453, "step": 33650 }, { "epoch": 0.14450941500734138, "grad_norm": 0.1901119351387024, "learning_rate": 8.591835326785267e-05, "loss": 0.14860138893127442, "step": 33660 }, { "epoch": 0.1445523470973614, "grad_norm": 1.1363589763641357, "learning_rate": 8.591404154773505e-05, "loss": 0.21624512672424318, "step": 33670 }, { "epoch": 0.1445952791873814, "grad_norm": 0.7980696558952332, "learning_rate": 8.590972982761744e-05, "loss": 0.046781697869300844, "step": 33680 }, { "epoch": 0.1446382112774014, "grad_norm": 2.2780401706695557, "learning_rate": 8.590541810749982e-05, "loss": 0.10103254318237305, "step": 33690 }, { "epoch": 0.1446811433674214, "grad_norm": 0.20395316183567047, "learning_rate": 8.59011063873822e-05, "loss": 0.401483154296875, "step": 33700 }, { "epoch": 0.14472407545744143, "grad_norm": 0.1297791302204132, "learning_rate": 8.589679466726456e-05, "loss": 0.15389554500579833, "step": 33710 }, { "epoch": 0.1447670075474614, "grad_norm": 0.9008198976516724, "learning_rate": 8.589248294714694e-05, "loss": 0.27394144535064696, "step": 33720 }, { "epoch": 0.14480993963748143, "grad_norm": 7.308492183685303, "learning_rate": 8.588817122702932e-05, "loss": 0.39116473197937013, "step": 33730 }, { "epoch": 0.14485287172750144, "grad_norm": 0.006610206328332424, "learning_rate": 8.588385950691169e-05, "loss": 0.47578039169311526, "step": 33740 }, { "epoch": 0.14489580381752146, "grad_norm": 0.020005585625767708, "learning_rate": 8.587954778679407e-05, "loss": 0.10659658908843994, "step": 33750 }, { "epoch": 0.14493873590754144, "grad_norm": 0.3955053687095642, "learning_rate": 8.587523606667645e-05, "loss": 0.052762389183044434, "step": 33760 }, { "epoch": 0.14498166799756146, "grad_norm": 0.07789677381515503, "learning_rate": 8.587092434655882e-05, "loss": 0.17416226863861084, "step": 33770 }, { "epoch": 0.14502460008758147, "grad_norm": 1.851404070854187, "learning_rate": 8.58666126264412e-05, "loss": 0.1859425663948059, "step": 33780 }, { "epoch": 0.14506753217760146, "grad_norm": 1.213042974472046, "learning_rate": 8.586230090632358e-05, "loss": 0.4020374774932861, "step": 33790 }, { "epoch": 0.14511046426762148, "grad_norm": 0.8941920399665833, "learning_rate": 8.585798918620594e-05, "loss": 0.27128329277038576, "step": 33800 }, { "epoch": 0.1451533963576415, "grad_norm": 3.843531370162964, "learning_rate": 8.585367746608832e-05, "loss": 0.0402255117893219, "step": 33810 }, { "epoch": 0.14519632844766148, "grad_norm": 0.19719567894935608, "learning_rate": 8.58493657459707e-05, "loss": 0.3777024269104004, "step": 33820 }, { "epoch": 0.1452392605376815, "grad_norm": 2.3761401176452637, "learning_rate": 8.584505402585308e-05, "loss": 0.2160428524017334, "step": 33830 }, { "epoch": 0.1452821926277015, "grad_norm": 0.03809600695967674, "learning_rate": 8.584074230573545e-05, "loss": 0.2593549251556396, "step": 33840 }, { "epoch": 0.14532512471772152, "grad_norm": 0.027907975018024445, "learning_rate": 8.583643058561783e-05, "loss": 0.343958306312561, "step": 33850 }, { "epoch": 0.1453680568077415, "grad_norm": 0.02713572420179844, "learning_rate": 8.583211886550021e-05, "loss": 0.40753722190856934, "step": 33860 }, { "epoch": 0.14541098889776152, "grad_norm": 3.2029054164886475, "learning_rate": 8.582780714538258e-05, "loss": 0.42441816329956056, "step": 33870 }, { "epoch": 0.14545392098778154, "grad_norm": 0.043370820581912994, "learning_rate": 8.582349542526495e-05, "loss": 0.0937444269657135, "step": 33880 }, { "epoch": 0.14549685307780152, "grad_norm": 0.2185685634613037, "learning_rate": 8.581918370514733e-05, "loss": 0.0842089295387268, "step": 33890 }, { "epoch": 0.14553978516782154, "grad_norm": 1.985492467880249, "learning_rate": 8.581487198502972e-05, "loss": 0.16136040687561035, "step": 33900 }, { "epoch": 0.14558271725784155, "grad_norm": 0.01867522858083248, "learning_rate": 8.58105602649121e-05, "loss": 0.2270805597305298, "step": 33910 }, { "epoch": 0.14562564934786154, "grad_norm": 0.01364646665751934, "learning_rate": 8.580624854479447e-05, "loss": 0.1863769292831421, "step": 33920 }, { "epoch": 0.14566858143788156, "grad_norm": 0.00913103949278593, "learning_rate": 8.580193682467685e-05, "loss": 0.44718332290649415, "step": 33930 }, { "epoch": 0.14571151352790157, "grad_norm": 0.17968043684959412, "learning_rate": 8.579762510455923e-05, "loss": 0.20691509246826173, "step": 33940 }, { "epoch": 0.14575444561792159, "grad_norm": 3.070237874984741, "learning_rate": 8.57933133844416e-05, "loss": 0.13816176652908324, "step": 33950 }, { "epoch": 0.14579737770794157, "grad_norm": 1.2599154710769653, "learning_rate": 8.578900166432397e-05, "loss": 0.4113800525665283, "step": 33960 }, { "epoch": 0.1458403097979616, "grad_norm": 0.007566008251160383, "learning_rate": 8.578468994420634e-05, "loss": 0.28733956813812256, "step": 33970 }, { "epoch": 0.1458832418879816, "grad_norm": 0.15226389467716217, "learning_rate": 8.578037822408872e-05, "loss": 0.22472400665283204, "step": 33980 }, { "epoch": 0.1459261739780016, "grad_norm": 1.13027822971344, "learning_rate": 8.57760665039711e-05, "loss": 0.14060845375061035, "step": 33990 }, { "epoch": 0.1459691060680216, "grad_norm": 0.20782363414764404, "learning_rate": 8.577175478385348e-05, "loss": 0.2679033041000366, "step": 34000 }, { "epoch": 0.1459691060680216, "eval_loss": 0.4690183997154236, "eval_runtime": 27.4503, "eval_samples_per_second": 3.643, "eval_steps_per_second": 3.643, "step": 34000 }, { "epoch": 0.14601203815804162, "grad_norm": 0.19579826295375824, "learning_rate": 8.576744306373585e-05, "loss": 0.009440401196479797, "step": 34010 }, { "epoch": 0.1460549702480616, "grad_norm": 0.06560419499874115, "learning_rate": 8.576313134361823e-05, "loss": 0.15190430879592895, "step": 34020 }, { "epoch": 0.14609790233808162, "grad_norm": 0.04176841303706169, "learning_rate": 8.575881962350061e-05, "loss": 0.3664478063583374, "step": 34030 }, { "epoch": 0.14614083442810163, "grad_norm": 0.13518881797790527, "learning_rate": 8.575450790338297e-05, "loss": 0.3177423715591431, "step": 34040 }, { "epoch": 0.14618376651812162, "grad_norm": 0.00969105027616024, "learning_rate": 8.575019618326535e-05, "loss": 0.16859444379806518, "step": 34050 }, { "epoch": 0.14622669860814164, "grad_norm": 0.030884014442563057, "learning_rate": 8.574588446314773e-05, "loss": 0.2813561916351318, "step": 34060 }, { "epoch": 0.14626963069816165, "grad_norm": 0.008490712381899357, "learning_rate": 8.57415727430301e-05, "loss": 0.3318132162094116, "step": 34070 }, { "epoch": 0.14631256278818167, "grad_norm": 1.6599934101104736, "learning_rate": 8.573726102291248e-05, "loss": 0.3509896039962769, "step": 34080 }, { "epoch": 0.14635549487820165, "grad_norm": 2.326103687286377, "learning_rate": 8.573294930279486e-05, "loss": 0.3958756923675537, "step": 34090 }, { "epoch": 0.14639842696822167, "grad_norm": 0.005022017750889063, "learning_rate": 8.572863758267724e-05, "loss": 0.2757140874862671, "step": 34100 }, { "epoch": 0.14644135905824168, "grad_norm": 0.013056546449661255, "learning_rate": 8.572432586255961e-05, "loss": 0.49457626342773436, "step": 34110 }, { "epoch": 0.14648429114826167, "grad_norm": 0.14255377650260925, "learning_rate": 8.572001414244199e-05, "loss": 0.12429465055465698, "step": 34120 }, { "epoch": 0.14652722323828168, "grad_norm": 3.096914768218994, "learning_rate": 8.571570242232437e-05, "loss": 0.3980607032775879, "step": 34130 }, { "epoch": 0.1465701553283017, "grad_norm": 0.2588160037994385, "learning_rate": 8.571139070220675e-05, "loss": 0.21047422885894776, "step": 34140 }, { "epoch": 0.14661308741832169, "grad_norm": 1.245520830154419, "learning_rate": 8.570707898208912e-05, "loss": 0.19313105344772338, "step": 34150 }, { "epoch": 0.1466560195083417, "grad_norm": 0.012002730742096901, "learning_rate": 8.57027672619715e-05, "loss": 0.19543395042419434, "step": 34160 }, { "epoch": 0.14669895159836172, "grad_norm": 0.4202781021595001, "learning_rate": 8.569845554185388e-05, "loss": 0.38264172077178954, "step": 34170 }, { "epoch": 0.14674188368838173, "grad_norm": 1.3057153224945068, "learning_rate": 8.569414382173626e-05, "loss": 0.3494687795639038, "step": 34180 }, { "epoch": 0.14678481577840172, "grad_norm": 0.11200109869241714, "learning_rate": 8.568983210161863e-05, "loss": 0.17912702560424804, "step": 34190 }, { "epoch": 0.14682774786842173, "grad_norm": 0.04614344611763954, "learning_rate": 8.568552038150101e-05, "loss": 0.2561028957366943, "step": 34200 }, { "epoch": 0.14687067995844175, "grad_norm": 0.9324910044670105, "learning_rate": 8.568120866138337e-05, "loss": 0.22830185890197754, "step": 34210 }, { "epoch": 0.14691361204846173, "grad_norm": 0.08826933801174164, "learning_rate": 8.567689694126575e-05, "loss": 0.2512583494186401, "step": 34220 }, { "epoch": 0.14695654413848175, "grad_norm": 0.8753888010978699, "learning_rate": 8.567258522114813e-05, "loss": 0.1659464120864868, "step": 34230 }, { "epoch": 0.14699947622850176, "grad_norm": 0.03925846144556999, "learning_rate": 8.56682735010305e-05, "loss": 0.15697722434997557, "step": 34240 }, { "epoch": 0.14704240831852175, "grad_norm": 0.00965725164860487, "learning_rate": 8.566396178091288e-05, "loss": 0.3077390670776367, "step": 34250 }, { "epoch": 0.14708534040854176, "grad_norm": 0.08784633129835129, "learning_rate": 8.565965006079526e-05, "loss": 0.2271416425704956, "step": 34260 }, { "epoch": 0.14712827249856178, "grad_norm": 1.227187991142273, "learning_rate": 8.565533834067764e-05, "loss": 0.31869919300079347, "step": 34270 }, { "epoch": 0.1471712045885818, "grad_norm": 0.004089116118848324, "learning_rate": 8.565102662056001e-05, "loss": 0.21388025283813478, "step": 34280 }, { "epoch": 0.14721413667860178, "grad_norm": 0.0882837250828743, "learning_rate": 8.564671490044238e-05, "loss": 0.3842825651168823, "step": 34290 }, { "epoch": 0.1472570687686218, "grad_norm": 0.05654463544487953, "learning_rate": 8.564240318032476e-05, "loss": 0.15656944513320922, "step": 34300 }, { "epoch": 0.1473000008586418, "grad_norm": 0.03386420011520386, "learning_rate": 8.563809146020713e-05, "loss": 0.2900910615921021, "step": 34310 }, { "epoch": 0.1473429329486618, "grad_norm": 22.915380477905273, "learning_rate": 8.563377974008951e-05, "loss": 0.20082504749298097, "step": 34320 }, { "epoch": 0.1473858650386818, "grad_norm": 4.276361465454102, "learning_rate": 8.562946801997189e-05, "loss": 0.2055532693862915, "step": 34330 }, { "epoch": 0.14742879712870183, "grad_norm": 0.037190258502960205, "learning_rate": 8.562515629985427e-05, "loss": 0.2975011825561523, "step": 34340 }, { "epoch": 0.14747172921872181, "grad_norm": 0.050163384526968, "learning_rate": 8.562084457973664e-05, "loss": 0.39753849506378175, "step": 34350 }, { "epoch": 0.14751466130874183, "grad_norm": 1.5702651739120483, "learning_rate": 8.561653285961902e-05, "loss": 0.25239105224609376, "step": 34360 }, { "epoch": 0.14755759339876184, "grad_norm": 1.009832501411438, "learning_rate": 8.56122211395014e-05, "loss": 0.15066080093383788, "step": 34370 }, { "epoch": 0.14760052548878186, "grad_norm": 3.242133617401123, "learning_rate": 8.560790941938377e-05, "loss": 0.4163659572601318, "step": 34380 }, { "epoch": 0.14764345757880185, "grad_norm": 2.42297101020813, "learning_rate": 8.560359769926615e-05, "loss": 0.2022876501083374, "step": 34390 }, { "epoch": 0.14768638966882186, "grad_norm": 1.9914714097976685, "learning_rate": 8.559928597914853e-05, "loss": 0.13610408306121827, "step": 34400 }, { "epoch": 0.14772932175884188, "grad_norm": 1.3775780200958252, "learning_rate": 8.559497425903091e-05, "loss": 0.1667981743812561, "step": 34410 }, { "epoch": 0.14777225384886186, "grad_norm": 0.5672871470451355, "learning_rate": 8.559066253891328e-05, "loss": 0.09758582711219788, "step": 34420 }, { "epoch": 0.14781518593888188, "grad_norm": 1.395330548286438, "learning_rate": 8.558635081879566e-05, "loss": 0.49545893669128416, "step": 34430 }, { "epoch": 0.1478581180289019, "grad_norm": 0.6163213849067688, "learning_rate": 8.558203909867804e-05, "loss": 0.41956653594970705, "step": 34440 }, { "epoch": 0.14790105011892188, "grad_norm": 0.0650164932012558, "learning_rate": 8.55777273785604e-05, "loss": 0.41419262886047364, "step": 34450 }, { "epoch": 0.1479439822089419, "grad_norm": 0.08980961889028549, "learning_rate": 8.557341565844278e-05, "loss": 0.11508655548095703, "step": 34460 }, { "epoch": 0.1479869142989619, "grad_norm": 0.10371974110603333, "learning_rate": 8.556910393832516e-05, "loss": 0.2601905107498169, "step": 34470 }, { "epoch": 0.1480298463889819, "grad_norm": 1.2678031921386719, "learning_rate": 8.556479221820753e-05, "loss": 0.2797661066055298, "step": 34480 }, { "epoch": 0.1480727784790019, "grad_norm": 0.013037784025073051, "learning_rate": 8.556048049808991e-05, "loss": 0.1015774130821228, "step": 34490 }, { "epoch": 0.14811571056902192, "grad_norm": 4.056503772735596, "learning_rate": 8.555616877797229e-05, "loss": 0.41338410377502444, "step": 34500 }, { "epoch": 0.14815864265904194, "grad_norm": 1.123368740081787, "learning_rate": 8.555185705785467e-05, "loss": 0.2618500471115112, "step": 34510 }, { "epoch": 0.14820157474906193, "grad_norm": 1.8491207361221313, "learning_rate": 8.554754533773704e-05, "loss": 0.14376912117004395, "step": 34520 }, { "epoch": 0.14824450683908194, "grad_norm": 0.02913055010139942, "learning_rate": 8.554323361761942e-05, "loss": 0.30904397964477537, "step": 34530 }, { "epoch": 0.14828743892910196, "grad_norm": 12.26372241973877, "learning_rate": 8.553892189750179e-05, "loss": 0.1958317518234253, "step": 34540 }, { "epoch": 0.14833037101912194, "grad_norm": 0.00536407670006156, "learning_rate": 8.553461017738416e-05, "loss": 0.2335893154144287, "step": 34550 }, { "epoch": 0.14837330310914196, "grad_norm": 0.021669838577508926, "learning_rate": 8.553029845726654e-05, "loss": 0.07356876134872437, "step": 34560 }, { "epoch": 0.14841623519916197, "grad_norm": 0.9531659483909607, "learning_rate": 8.552598673714892e-05, "loss": 0.37895355224609373, "step": 34570 }, { "epoch": 0.14845916728918196, "grad_norm": 1.2019336223602295, "learning_rate": 8.55216750170313e-05, "loss": 0.1651861548423767, "step": 34580 }, { "epoch": 0.14850209937920197, "grad_norm": 2.597381114959717, "learning_rate": 8.551736329691367e-05, "loss": 0.31086115837097167, "step": 34590 }, { "epoch": 0.148545031469222, "grad_norm": 11.423806190490723, "learning_rate": 8.551305157679605e-05, "loss": 0.22325286865234376, "step": 34600 }, { "epoch": 0.148587963559242, "grad_norm": 0.13982784748077393, "learning_rate": 8.550873985667843e-05, "loss": 0.32322494983673095, "step": 34610 }, { "epoch": 0.148630895649262, "grad_norm": 38.70827102661133, "learning_rate": 8.55044281365608e-05, "loss": 0.3321423292160034, "step": 34620 }, { "epoch": 0.148673827739282, "grad_norm": 0.12706917524337769, "learning_rate": 8.550011641644318e-05, "loss": 0.2533221960067749, "step": 34630 }, { "epoch": 0.14871675982930202, "grad_norm": 0.9390490055084229, "learning_rate": 8.549580469632556e-05, "loss": 0.45719470977783205, "step": 34640 }, { "epoch": 0.148759691919322, "grad_norm": 4.5604248046875, "learning_rate": 8.549149297620794e-05, "loss": 0.20422539710998536, "step": 34650 }, { "epoch": 0.14880262400934202, "grad_norm": 1.5207419395446777, "learning_rate": 8.548718125609031e-05, "loss": 0.2647045135498047, "step": 34660 }, { "epoch": 0.14884555609936204, "grad_norm": 0.008388262242078781, "learning_rate": 8.548286953597269e-05, "loss": 0.2994123935699463, "step": 34670 }, { "epoch": 0.14888848818938202, "grad_norm": 0.038906846195459366, "learning_rate": 8.547855781585507e-05, "loss": 0.07747592329978943, "step": 34680 }, { "epoch": 0.14893142027940204, "grad_norm": 0.0024672250729054213, "learning_rate": 8.547424609573745e-05, "loss": 0.4452563762664795, "step": 34690 }, { "epoch": 0.14897435236942205, "grad_norm": 1.3578065633773804, "learning_rate": 8.546993437561981e-05, "loss": 0.4643740653991699, "step": 34700 }, { "epoch": 0.14901728445944207, "grad_norm": 0.0018323465483263135, "learning_rate": 8.546562265550219e-05, "loss": 0.33861527442932127, "step": 34710 }, { "epoch": 0.14906021654946205, "grad_norm": 0.0014579965500161052, "learning_rate": 8.546131093538456e-05, "loss": 0.15541526079177856, "step": 34720 }, { "epoch": 0.14910314863948207, "grad_norm": 0.5606818199157715, "learning_rate": 8.545699921526694e-05, "loss": 0.1587108016014099, "step": 34730 }, { "epoch": 0.14914608072950208, "grad_norm": 1.585154414176941, "learning_rate": 8.545268749514932e-05, "loss": 0.4065643310546875, "step": 34740 }, { "epoch": 0.14918901281952207, "grad_norm": 0.0026883201207965612, "learning_rate": 8.54483757750317e-05, "loss": 0.14222111701965331, "step": 34750 }, { "epoch": 0.14923194490954209, "grad_norm": 0.15939301252365112, "learning_rate": 8.544406405491407e-05, "loss": 0.09957548379898071, "step": 34760 }, { "epoch": 0.1492748769995621, "grad_norm": 0.010179446078836918, "learning_rate": 8.543975233479645e-05, "loss": 0.15829137563705445, "step": 34770 }, { "epoch": 0.1493178090895821, "grad_norm": 0.0022038391325622797, "learning_rate": 8.543544061467881e-05, "loss": 0.13406049013137816, "step": 34780 }, { "epoch": 0.1493607411796021, "grad_norm": 4.663673400878906, "learning_rate": 8.543112889456119e-05, "loss": 0.43924455642700194, "step": 34790 }, { "epoch": 0.14940367326962212, "grad_norm": 1.3737190961837769, "learning_rate": 8.542681717444357e-05, "loss": 0.24775092601776122, "step": 34800 }, { "epoch": 0.14944660535964213, "grad_norm": 0.09232733398675919, "learning_rate": 8.542250545432595e-05, "loss": 0.17557703256607055, "step": 34810 }, { "epoch": 0.14948953744966212, "grad_norm": 1.6460373401641846, "learning_rate": 8.541819373420832e-05, "loss": 0.2807705640792847, "step": 34820 }, { "epoch": 0.14953246953968213, "grad_norm": 2.7181038856506348, "learning_rate": 8.54138820140907e-05, "loss": 0.24563355445861818, "step": 34830 }, { "epoch": 0.14957540162970215, "grad_norm": 0.001726570655591786, "learning_rate": 8.540957029397308e-05, "loss": 0.13799943923950195, "step": 34840 }, { "epoch": 0.14961833371972214, "grad_norm": 3.9309792518615723, "learning_rate": 8.540525857385546e-05, "loss": 0.31938905715942384, "step": 34850 }, { "epoch": 0.14966126580974215, "grad_norm": 0.9790728688240051, "learning_rate": 8.540094685373783e-05, "loss": 0.4770151138305664, "step": 34860 }, { "epoch": 0.14970419789976216, "grad_norm": 1.415012240409851, "learning_rate": 8.539663513362021e-05, "loss": 0.041997873783111574, "step": 34870 }, { "epoch": 0.14974712998978215, "grad_norm": 1.7993477582931519, "learning_rate": 8.539232341350259e-05, "loss": 0.40750322341918943, "step": 34880 }, { "epoch": 0.14979006207980217, "grad_norm": 6.537695407867432, "learning_rate": 8.538801169338497e-05, "loss": 0.5166696071624756, "step": 34890 }, { "epoch": 0.14983299416982218, "grad_norm": 0.040551360696554184, "learning_rate": 8.538369997326734e-05, "loss": 0.3467602014541626, "step": 34900 }, { "epoch": 0.14987592625984217, "grad_norm": 4.1986894607543945, "learning_rate": 8.537938825314972e-05, "loss": 0.42667579650878906, "step": 34910 }, { "epoch": 0.14991885834986218, "grad_norm": 1.751068115234375, "learning_rate": 8.53750765330321e-05, "loss": 0.1723111629486084, "step": 34920 }, { "epoch": 0.1499617904398822, "grad_norm": 1.7199437618255615, "learning_rate": 8.537076481291447e-05, "loss": 0.31918811798095703, "step": 34930 }, { "epoch": 0.1500047225299022, "grad_norm": 0.026622159406542778, "learning_rate": 8.536645309279685e-05, "loss": 0.16448687314987182, "step": 34940 }, { "epoch": 0.1500476546199222, "grad_norm": 0.0029959080275148153, "learning_rate": 8.536214137267922e-05, "loss": 0.4394689083099365, "step": 34950 }, { "epoch": 0.15009058670994221, "grad_norm": 1.8133684396743774, "learning_rate": 8.535782965256159e-05, "loss": 0.29315714836120604, "step": 34960 }, { "epoch": 0.15013351879996223, "grad_norm": 0.012649440206587315, "learning_rate": 8.535351793244397e-05, "loss": 0.06308199763298035, "step": 34970 }, { "epoch": 0.15017645088998222, "grad_norm": 0.2371370643377304, "learning_rate": 8.534920621232635e-05, "loss": 0.14844993352890015, "step": 34980 }, { "epoch": 0.15021938298000223, "grad_norm": 2.117079734802246, "learning_rate": 8.534489449220872e-05, "loss": 0.2953939914703369, "step": 34990 }, { "epoch": 0.15026231507002225, "grad_norm": 0.02927054464817047, "learning_rate": 8.53405827720911e-05, "loss": 0.3870770215988159, "step": 35000 }, { "epoch": 0.15026231507002225, "eval_loss": 0.47191593050956726, "eval_runtime": 27.5864, "eval_samples_per_second": 3.625, "eval_steps_per_second": 3.625, "step": 35000 }, { "epoch": 0.15030524716004223, "grad_norm": 0.3207988440990448, "learning_rate": 8.533627105197348e-05, "loss": 0.12940698862075806, "step": 35010 }, { "epoch": 0.15034817925006225, "grad_norm": 0.08552608639001846, "learning_rate": 8.533195933185586e-05, "loss": 0.17130987644195556, "step": 35020 }, { "epoch": 0.15039111134008226, "grad_norm": 2.546262741088867, "learning_rate": 8.532764761173822e-05, "loss": 0.20994532108306885, "step": 35030 }, { "epoch": 0.15043404343010228, "grad_norm": 0.012148946523666382, "learning_rate": 8.53233358916206e-05, "loss": 0.3189948081970215, "step": 35040 }, { "epoch": 0.15047697552012226, "grad_norm": 0.0012491026427596807, "learning_rate": 8.531902417150298e-05, "loss": 0.06121616363525391, "step": 35050 }, { "epoch": 0.15051990761014228, "grad_norm": 2.2094056606292725, "learning_rate": 8.531471245138535e-05, "loss": 0.4135121822357178, "step": 35060 }, { "epoch": 0.1505628397001623, "grad_norm": 1.279274344444275, "learning_rate": 8.531040073126773e-05, "loss": 0.2774710416793823, "step": 35070 }, { "epoch": 0.15060577179018228, "grad_norm": 0.022777825593948364, "learning_rate": 8.530608901115012e-05, "loss": 0.15013327598571777, "step": 35080 }, { "epoch": 0.1506487038802023, "grad_norm": 0.1570606827735901, "learning_rate": 8.53017772910325e-05, "loss": 0.679659652709961, "step": 35090 }, { "epoch": 0.1506916359702223, "grad_norm": 0.04781366512179375, "learning_rate": 8.529746557091488e-05, "loss": 0.3544900417327881, "step": 35100 }, { "epoch": 0.1507345680602423, "grad_norm": 0.9712049961090088, "learning_rate": 8.529315385079724e-05, "loss": 0.13232774734497071, "step": 35110 }, { "epoch": 0.1507775001502623, "grad_norm": 2.364048719406128, "learning_rate": 8.528884213067962e-05, "loss": 0.2865861654281616, "step": 35120 }, { "epoch": 0.15082043224028233, "grad_norm": 0.16181397438049316, "learning_rate": 8.5284530410562e-05, "loss": 0.10180512666702271, "step": 35130 }, { "epoch": 0.15086336433030234, "grad_norm": 0.04067031294107437, "learning_rate": 8.528021869044437e-05, "loss": 0.18961187601089477, "step": 35140 }, { "epoch": 0.15090629642032233, "grad_norm": 1.3849711418151855, "learning_rate": 8.527590697032675e-05, "loss": 0.36178433895111084, "step": 35150 }, { "epoch": 0.15094922851034234, "grad_norm": 1.1178500652313232, "learning_rate": 8.527159525020913e-05, "loss": 0.5713705062866211, "step": 35160 }, { "epoch": 0.15099216060036236, "grad_norm": 1.7924635410308838, "learning_rate": 8.52672835300915e-05, "loss": 0.2083521842956543, "step": 35170 }, { "epoch": 0.15103509269038234, "grad_norm": 0.4806084632873535, "learning_rate": 8.526297180997388e-05, "loss": 0.06166212558746338, "step": 35180 }, { "epoch": 0.15107802478040236, "grad_norm": 0.2358323186635971, "learning_rate": 8.525866008985624e-05, "loss": 0.2636121273040771, "step": 35190 }, { "epoch": 0.15112095687042237, "grad_norm": 0.12955226004123688, "learning_rate": 8.525434836973862e-05, "loss": 0.2113489627838135, "step": 35200 }, { "epoch": 0.15116388896044236, "grad_norm": 0.0469609797000885, "learning_rate": 8.5250036649621e-05, "loss": 0.2918891906738281, "step": 35210 }, { "epoch": 0.15120682105046238, "grad_norm": 1.7934201955795288, "learning_rate": 8.524572492950338e-05, "loss": 0.32982540130615234, "step": 35220 }, { "epoch": 0.1512497531404824, "grad_norm": 0.09866645187139511, "learning_rate": 8.524141320938575e-05, "loss": 0.3262962818145752, "step": 35230 }, { "epoch": 0.1512926852305024, "grad_norm": 1.6658072471618652, "learning_rate": 8.523710148926813e-05, "loss": 0.35833468437194826, "step": 35240 }, { "epoch": 0.1513356173205224, "grad_norm": 0.013539593666791916, "learning_rate": 8.523278976915051e-05, "loss": 0.16183923482894896, "step": 35250 }, { "epoch": 0.1513785494105424, "grad_norm": 2.3311607837677, "learning_rate": 8.522847804903289e-05, "loss": 0.6225554466247558, "step": 35260 }, { "epoch": 0.15142148150056242, "grad_norm": 1.097491979598999, "learning_rate": 8.522416632891526e-05, "loss": 0.2803528308868408, "step": 35270 }, { "epoch": 0.1514644135905824, "grad_norm": 0.015554245561361313, "learning_rate": 8.521985460879763e-05, "loss": 0.23459622859954835, "step": 35280 }, { "epoch": 0.15150734568060242, "grad_norm": 3.9107282161712646, "learning_rate": 8.521554288868e-05, "loss": 0.23622241020202636, "step": 35290 }, { "epoch": 0.15155027777062244, "grad_norm": 0.09115851670503616, "learning_rate": 8.52112311685624e-05, "loss": 0.13579467535018921, "step": 35300 }, { "epoch": 0.15159320986064242, "grad_norm": 0.28334158658981323, "learning_rate": 8.520691944844477e-05, "loss": 0.18963055610656737, "step": 35310 }, { "epoch": 0.15163614195066244, "grad_norm": 25.784404754638672, "learning_rate": 8.520260772832715e-05, "loss": 0.2784431934356689, "step": 35320 }, { "epoch": 0.15167907404068245, "grad_norm": 1.8637523651123047, "learning_rate": 8.519829600820953e-05, "loss": 0.21581711769104003, "step": 35330 }, { "epoch": 0.15172200613070244, "grad_norm": 0.2694854736328125, "learning_rate": 8.51939842880919e-05, "loss": 0.3204442262649536, "step": 35340 }, { "epoch": 0.15176493822072246, "grad_norm": 0.12213743478059769, "learning_rate": 8.518967256797428e-05, "loss": 0.2358182430267334, "step": 35350 }, { "epoch": 0.15180787031074247, "grad_norm": 0.08006271719932556, "learning_rate": 8.518536084785665e-05, "loss": 0.10651001930236817, "step": 35360 }, { "epoch": 0.15185080240076249, "grad_norm": 2.5007402896881104, "learning_rate": 8.518104912773902e-05, "loss": 0.1309428334236145, "step": 35370 }, { "epoch": 0.15189373449078247, "grad_norm": 2.820734739303589, "learning_rate": 8.51767374076214e-05, "loss": 0.16440974473953246, "step": 35380 }, { "epoch": 0.1519366665808025, "grad_norm": 0.014351708814501762, "learning_rate": 8.517242568750378e-05, "loss": 0.1614371657371521, "step": 35390 }, { "epoch": 0.1519795986708225, "grad_norm": 2.705998420715332, "learning_rate": 8.516811396738616e-05, "loss": 0.21752235889434815, "step": 35400 }, { "epoch": 0.1520225307608425, "grad_norm": 0.0937918946146965, "learning_rate": 8.516380224726853e-05, "loss": 0.4111818313598633, "step": 35410 }, { "epoch": 0.1520654628508625, "grad_norm": 0.09626717865467072, "learning_rate": 8.515949052715091e-05, "loss": 0.20632579326629638, "step": 35420 }, { "epoch": 0.15210839494088252, "grad_norm": 12.47982406616211, "learning_rate": 8.515517880703329e-05, "loss": 0.20486812591552733, "step": 35430 }, { "epoch": 0.1521513270309025, "grad_norm": 4.971863746643066, "learning_rate": 8.515086708691565e-05, "loss": 0.30284805297851564, "step": 35440 }, { "epoch": 0.15219425912092252, "grad_norm": 0.019148241728544235, "learning_rate": 8.514655536679803e-05, "loss": 0.24144928455352782, "step": 35450 }, { "epoch": 0.15223719121094254, "grad_norm": 0.03687391057610512, "learning_rate": 8.51422436466804e-05, "loss": 0.25850441455841067, "step": 35460 }, { "epoch": 0.15228012330096255, "grad_norm": 0.0728311687707901, "learning_rate": 8.513793192656278e-05, "loss": 0.1594499707221985, "step": 35470 }, { "epoch": 0.15232305539098254, "grad_norm": 0.8129069209098816, "learning_rate": 8.513362020644516e-05, "loss": 0.313718581199646, "step": 35480 }, { "epoch": 0.15236598748100255, "grad_norm": 0.1573277711868286, "learning_rate": 8.512930848632754e-05, "loss": 0.3664534091949463, "step": 35490 }, { "epoch": 0.15240891957102257, "grad_norm": 27.10266876220703, "learning_rate": 8.512499676620992e-05, "loss": 0.2637137174606323, "step": 35500 }, { "epoch": 0.15245185166104255, "grad_norm": 0.012873655185103416, "learning_rate": 8.512068504609229e-05, "loss": 0.1119425654411316, "step": 35510 }, { "epoch": 0.15249478375106257, "grad_norm": 0.017580613493919373, "learning_rate": 8.511637332597467e-05, "loss": 0.14549771547317505, "step": 35520 }, { "epoch": 0.15253771584108258, "grad_norm": 0.008063385263085365, "learning_rate": 8.511206160585705e-05, "loss": 0.11777844429016113, "step": 35530 }, { "epoch": 0.15258064793110257, "grad_norm": 0.03290800005197525, "learning_rate": 8.510774988573942e-05, "loss": 0.4041603565216064, "step": 35540 }, { "epoch": 0.15262358002112258, "grad_norm": 0.2318921834230423, "learning_rate": 8.51034381656218e-05, "loss": 0.44219579696655276, "step": 35550 }, { "epoch": 0.1526665121111426, "grad_norm": 0.7928853631019592, "learning_rate": 8.509912644550418e-05, "loss": 0.48702564239501955, "step": 35560 }, { "epoch": 0.15270944420116261, "grad_norm": 0.044648732990026474, "learning_rate": 8.509481472538656e-05, "loss": 0.2403254747390747, "step": 35570 }, { "epoch": 0.1527523762911826, "grad_norm": 1.3173476457595825, "learning_rate": 8.509050300526893e-05, "loss": 0.5910422801971436, "step": 35580 }, { "epoch": 0.15279530838120262, "grad_norm": 0.02605602703988552, "learning_rate": 8.508619128515131e-05, "loss": 0.20394675731658934, "step": 35590 }, { "epoch": 0.15283824047122263, "grad_norm": 0.10889720916748047, "learning_rate": 8.508187956503368e-05, "loss": 0.3139126062393188, "step": 35600 }, { "epoch": 0.15288117256124262, "grad_norm": 2.5993807315826416, "learning_rate": 8.507756784491605e-05, "loss": 0.31171181201934817, "step": 35610 }, { "epoch": 0.15292410465126263, "grad_norm": 0.08966104686260223, "learning_rate": 8.507325612479843e-05, "loss": 0.06715420484542847, "step": 35620 }, { "epoch": 0.15296703674128265, "grad_norm": 1.294317603111267, "learning_rate": 8.506894440468081e-05, "loss": 0.16832005977630615, "step": 35630 }, { "epoch": 0.15300996883130263, "grad_norm": 0.05570885166525841, "learning_rate": 8.506463268456318e-05, "loss": 0.07923851013183594, "step": 35640 }, { "epoch": 0.15305290092132265, "grad_norm": 0.770665168762207, "learning_rate": 8.506032096444556e-05, "loss": 0.1516018271446228, "step": 35650 }, { "epoch": 0.15309583301134266, "grad_norm": 0.013801711611449718, "learning_rate": 8.505600924432794e-05, "loss": 0.21667106151580812, "step": 35660 }, { "epoch": 0.15313876510136268, "grad_norm": 0.03809322789311409, "learning_rate": 8.505169752421032e-05, "loss": 0.23052852153778075, "step": 35670 }, { "epoch": 0.15318169719138267, "grad_norm": 1.606998324394226, "learning_rate": 8.50473858040927e-05, "loss": 0.2474133014678955, "step": 35680 }, { "epoch": 0.15322462928140268, "grad_norm": 1.4611258506774902, "learning_rate": 8.504307408397506e-05, "loss": 0.22607591152191162, "step": 35690 }, { "epoch": 0.1532675613714227, "grad_norm": 0.01265826728194952, "learning_rate": 8.503876236385744e-05, "loss": 0.03770926296710968, "step": 35700 }, { "epoch": 0.15331049346144268, "grad_norm": 1.7693620920181274, "learning_rate": 8.503445064373981e-05, "loss": 0.15623393058776855, "step": 35710 }, { "epoch": 0.1533534255514627, "grad_norm": 0.03232761472463608, "learning_rate": 8.503013892362219e-05, "loss": 0.13219951391220092, "step": 35720 }, { "epoch": 0.1533963576414827, "grad_norm": 0.002780086826533079, "learning_rate": 8.502582720350457e-05, "loss": 0.2331317663192749, "step": 35730 }, { "epoch": 0.1534392897315027, "grad_norm": 1.7431129217147827, "learning_rate": 8.502151548338694e-05, "loss": 0.2198162317276001, "step": 35740 }, { "epoch": 0.1534822218215227, "grad_norm": 0.009173325262963772, "learning_rate": 8.501720376326932e-05, "loss": 0.011808304488658905, "step": 35750 }, { "epoch": 0.15352515391154273, "grad_norm": 1.4654935598373413, "learning_rate": 8.50128920431517e-05, "loss": 0.5061048984527587, "step": 35760 }, { "epoch": 0.15356808600156271, "grad_norm": 0.0031481690239161253, "learning_rate": 8.500858032303408e-05, "loss": 0.3672648906707764, "step": 35770 }, { "epoch": 0.15361101809158273, "grad_norm": 0.00255988840945065, "learning_rate": 8.500426860291645e-05, "loss": 0.33251783847808836, "step": 35780 }, { "epoch": 0.15365395018160274, "grad_norm": 0.08080971240997314, "learning_rate": 8.499995688279883e-05, "loss": 0.44880151748657227, "step": 35790 }, { "epoch": 0.15369688227162276, "grad_norm": 0.02960016205906868, "learning_rate": 8.499564516268121e-05, "loss": 0.1435869574546814, "step": 35800 }, { "epoch": 0.15373981436164275, "grad_norm": 1.4224238395690918, "learning_rate": 8.499133344256359e-05, "loss": 0.24386231899261473, "step": 35810 }, { "epoch": 0.15378274645166276, "grad_norm": 2.1380560398101807, "learning_rate": 8.498702172244596e-05, "loss": 0.22258059978485106, "step": 35820 }, { "epoch": 0.15382567854168278, "grad_norm": 3.0995121002197266, "learning_rate": 8.498271000232834e-05, "loss": 0.269660496711731, "step": 35830 }, { "epoch": 0.15386861063170276, "grad_norm": 0.09611407667398453, "learning_rate": 8.497839828221072e-05, "loss": 0.04883643090724945, "step": 35840 }, { "epoch": 0.15391154272172278, "grad_norm": 1.2176029682159424, "learning_rate": 8.497408656209308e-05, "loss": 0.16574220657348632, "step": 35850 }, { "epoch": 0.1539544748117428, "grad_norm": 2.6885287761688232, "learning_rate": 8.496977484197546e-05, "loss": 0.15115034580230713, "step": 35860 }, { "epoch": 0.15399740690176278, "grad_norm": 0.02519896999001503, "learning_rate": 8.496546312185784e-05, "loss": 0.32552452087402345, "step": 35870 }, { "epoch": 0.1540403389917828, "grad_norm": 2.205995798110962, "learning_rate": 8.496115140174021e-05, "loss": 0.3075399875640869, "step": 35880 }, { "epoch": 0.1540832710818028, "grad_norm": 1.0606608390808105, "learning_rate": 8.495683968162259e-05, "loss": 0.22299137115478515, "step": 35890 }, { "epoch": 0.15412620317182282, "grad_norm": 0.01584595814347267, "learning_rate": 8.495252796150497e-05, "loss": 0.1980022072792053, "step": 35900 }, { "epoch": 0.1541691352618428, "grad_norm": 0.8658286929130554, "learning_rate": 8.494821624138735e-05, "loss": 0.045036503672599794, "step": 35910 }, { "epoch": 0.15421206735186282, "grad_norm": 5.6871771812438965, "learning_rate": 8.494390452126972e-05, "loss": 0.2639256238937378, "step": 35920 }, { "epoch": 0.15425499944188284, "grad_norm": 0.06883329898118973, "learning_rate": 8.493959280115209e-05, "loss": 0.07429475784301758, "step": 35930 }, { "epoch": 0.15429793153190283, "grad_norm": 34.059383392333984, "learning_rate": 8.493528108103446e-05, "loss": 0.3349571228027344, "step": 35940 }, { "epoch": 0.15434086362192284, "grad_norm": 0.40555959939956665, "learning_rate": 8.493096936091684e-05, "loss": 0.09937132000923157, "step": 35950 }, { "epoch": 0.15438379571194286, "grad_norm": 3.8519086837768555, "learning_rate": 8.492665764079922e-05, "loss": 0.2603405714035034, "step": 35960 }, { "epoch": 0.15442672780196284, "grad_norm": 0.5037367939949036, "learning_rate": 8.49223459206816e-05, "loss": 0.06734598278999329, "step": 35970 }, { "epoch": 0.15446965989198286, "grad_norm": 0.23887856304645538, "learning_rate": 8.491803420056397e-05, "loss": 0.30143325328826903, "step": 35980 }, { "epoch": 0.15451259198200287, "grad_norm": 0.14862442016601562, "learning_rate": 8.491372248044635e-05, "loss": 0.2484194278717041, "step": 35990 }, { "epoch": 0.1545555240720229, "grad_norm": 0.24175690114498138, "learning_rate": 8.490941076032873e-05, "loss": 0.17390332221984864, "step": 36000 }, { "epoch": 0.1545555240720229, "eval_loss": 0.5203341841697693, "eval_runtime": 27.4513, "eval_samples_per_second": 3.643, "eval_steps_per_second": 3.643, "step": 36000 }, { "epoch": 0.15459845616204287, "grad_norm": 0.9831352829933167, "learning_rate": 8.49050990402111e-05, "loss": 0.2074409008026123, "step": 36010 }, { "epoch": 0.1546413882520629, "grad_norm": 0.041125647723674774, "learning_rate": 8.490078732009348e-05, "loss": 0.14144506454467773, "step": 36020 }, { "epoch": 0.1546843203420829, "grad_norm": 0.06358243525028229, "learning_rate": 8.489647559997586e-05, "loss": 0.3165971040725708, "step": 36030 }, { "epoch": 0.1547272524321029, "grad_norm": 1.3796640634536743, "learning_rate": 8.489216387985824e-05, "loss": 0.2835206985473633, "step": 36040 }, { "epoch": 0.1547701845221229, "grad_norm": 0.26504936814308167, "learning_rate": 8.488785215974061e-05, "loss": 0.26969594955444337, "step": 36050 }, { "epoch": 0.15481311661214292, "grad_norm": 0.022056257352232933, "learning_rate": 8.488354043962299e-05, "loss": 0.1651803135871887, "step": 36060 }, { "epoch": 0.1548560487021629, "grad_norm": 0.06279142946004868, "learning_rate": 8.487922871950537e-05, "loss": 0.2755633592605591, "step": 36070 }, { "epoch": 0.15489898079218292, "grad_norm": 0.0036553270183503628, "learning_rate": 8.487491699938775e-05, "loss": 0.29899609088897705, "step": 36080 }, { "epoch": 0.15494191288220294, "grad_norm": 2.297783613204956, "learning_rate": 8.487060527927012e-05, "loss": 0.5786831378936768, "step": 36090 }, { "epoch": 0.15498484497222295, "grad_norm": 7.233226299285889, "learning_rate": 8.486629355915249e-05, "loss": 0.5784254550933838, "step": 36100 }, { "epoch": 0.15502777706224294, "grad_norm": 0.054397862404584885, "learning_rate": 8.486198183903487e-05, "loss": 0.16331673860549928, "step": 36110 }, { "epoch": 0.15507070915226295, "grad_norm": 18.670793533325195, "learning_rate": 8.485767011891724e-05, "loss": 0.19945695400238037, "step": 36120 }, { "epoch": 0.15511364124228297, "grad_norm": 0.08316559344530106, "learning_rate": 8.485335839879962e-05, "loss": 0.13774746656417847, "step": 36130 }, { "epoch": 0.15515657333230296, "grad_norm": 5.581072807312012, "learning_rate": 8.4849046678682e-05, "loss": 0.25059173107147215, "step": 36140 }, { "epoch": 0.15519950542232297, "grad_norm": 0.045821044594049454, "learning_rate": 8.484473495856437e-05, "loss": 0.1590253233909607, "step": 36150 }, { "epoch": 0.15524243751234298, "grad_norm": 0.0009496421553194523, "learning_rate": 8.484042323844675e-05, "loss": 0.3221937894821167, "step": 36160 }, { "epoch": 0.15528536960236297, "grad_norm": 0.12948010861873627, "learning_rate": 8.483611151832913e-05, "loss": 0.06770175695419312, "step": 36170 }, { "epoch": 0.155328301692383, "grad_norm": 1.3551721572875977, "learning_rate": 8.48317997982115e-05, "loss": 0.45805158615112307, "step": 36180 }, { "epoch": 0.155371233782403, "grad_norm": 1.7094125747680664, "learning_rate": 8.482748807809387e-05, "loss": 0.3378595352172852, "step": 36190 }, { "epoch": 0.155414165872423, "grad_norm": 1.1882890462875366, "learning_rate": 8.482317635797625e-05, "loss": 0.22665467262268066, "step": 36200 }, { "epoch": 0.155457097962443, "grad_norm": 0.08076924830675125, "learning_rate": 8.481886463785863e-05, "loss": 0.2971139669418335, "step": 36210 }, { "epoch": 0.15550003005246302, "grad_norm": 3.9227700233459473, "learning_rate": 8.4814552917741e-05, "loss": 0.16980533599853515, "step": 36220 }, { "epoch": 0.15554296214248303, "grad_norm": 1.2522578239440918, "learning_rate": 8.481024119762338e-05, "loss": 0.21915044784545898, "step": 36230 }, { "epoch": 0.15558589423250302, "grad_norm": 17.51162338256836, "learning_rate": 8.480592947750576e-05, "loss": 0.3069744110107422, "step": 36240 }, { "epoch": 0.15562882632252303, "grad_norm": 1.3388787508010864, "learning_rate": 8.480161775738813e-05, "loss": 0.22943878173828125, "step": 36250 }, { "epoch": 0.15567175841254305, "grad_norm": 0.29865941405296326, "learning_rate": 8.479730603727051e-05, "loss": 0.2652363538742065, "step": 36260 }, { "epoch": 0.15571469050256304, "grad_norm": 0.10734880715608597, "learning_rate": 8.479299431715289e-05, "loss": 0.11708941459655761, "step": 36270 }, { "epoch": 0.15575762259258305, "grad_norm": 0.14861108362674713, "learning_rate": 8.478868259703527e-05, "loss": 0.24161868095397948, "step": 36280 }, { "epoch": 0.15580055468260307, "grad_norm": 0.6159643530845642, "learning_rate": 8.478437087691764e-05, "loss": 0.3283845901489258, "step": 36290 }, { "epoch": 0.15584348677262305, "grad_norm": 0.009295495226979256, "learning_rate": 8.478005915680002e-05, "loss": 0.16799557209014893, "step": 36300 }, { "epoch": 0.15588641886264307, "grad_norm": 0.19504667818546295, "learning_rate": 8.47757474366824e-05, "loss": 0.06348141431808471, "step": 36310 }, { "epoch": 0.15592935095266308, "grad_norm": 1.60503089427948, "learning_rate": 8.477143571656478e-05, "loss": 0.14590160846710204, "step": 36320 }, { "epoch": 0.1559722830426831, "grad_norm": 0.0022120329085737467, "learning_rate": 8.476712399644715e-05, "loss": 0.33194334506988527, "step": 36330 }, { "epoch": 0.15601521513270308, "grad_norm": 5.712146282196045, "learning_rate": 8.476281227632953e-05, "loss": 0.260235071182251, "step": 36340 }, { "epoch": 0.1560581472227231, "grad_norm": 1.7155770063400269, "learning_rate": 8.47585005562119e-05, "loss": 0.3236240386962891, "step": 36350 }, { "epoch": 0.1561010793127431, "grad_norm": 0.96340012550354, "learning_rate": 8.475418883609427e-05, "loss": 0.3302635669708252, "step": 36360 }, { "epoch": 0.1561440114027631, "grad_norm": 0.015084980055689812, "learning_rate": 8.474987711597665e-05, "loss": 0.1243367314338684, "step": 36370 }, { "epoch": 0.15618694349278311, "grad_norm": 7.593947410583496, "learning_rate": 8.474556539585903e-05, "loss": 0.24390408992767335, "step": 36380 }, { "epoch": 0.15622987558280313, "grad_norm": 0.6490558981895447, "learning_rate": 8.47412536757414e-05, "loss": 0.25506789684295655, "step": 36390 }, { "epoch": 0.15627280767282312, "grad_norm": 0.027163298800587654, "learning_rate": 8.473694195562378e-05, "loss": 0.3595500707626343, "step": 36400 }, { "epoch": 0.15631573976284313, "grad_norm": 0.00042546645272523165, "learning_rate": 8.473263023550616e-05, "loss": 0.14694883823394775, "step": 36410 }, { "epoch": 0.15635867185286315, "grad_norm": 0.024790508672595024, "learning_rate": 8.472831851538854e-05, "loss": 0.18631467819213868, "step": 36420 }, { "epoch": 0.15640160394288316, "grad_norm": 0.0071322438307106495, "learning_rate": 8.47240067952709e-05, "loss": 0.42502856254577637, "step": 36430 }, { "epoch": 0.15644453603290315, "grad_norm": 4.77712869644165, "learning_rate": 8.471969507515328e-05, "loss": 0.22539980411529542, "step": 36440 }, { "epoch": 0.15648746812292316, "grad_norm": 0.6240074634552002, "learning_rate": 8.471538335503565e-05, "loss": 0.13043266534805298, "step": 36450 }, { "epoch": 0.15653040021294318, "grad_norm": 2.7969493865966797, "learning_rate": 8.471107163491803e-05, "loss": 0.322530198097229, "step": 36460 }, { "epoch": 0.15657333230296316, "grad_norm": 2.325531244277954, "learning_rate": 8.470675991480041e-05, "loss": 0.2518151044845581, "step": 36470 }, { "epoch": 0.15661626439298318, "grad_norm": 0.013661663047969341, "learning_rate": 8.470244819468279e-05, "loss": 0.28728699684143066, "step": 36480 }, { "epoch": 0.1566591964830032, "grad_norm": 2.1703040599823, "learning_rate": 8.469813647456518e-05, "loss": 0.1820298433303833, "step": 36490 }, { "epoch": 0.15670212857302318, "grad_norm": 0.004863078705966473, "learning_rate": 8.469382475444755e-05, "loss": 0.43201313018798826, "step": 36500 }, { "epoch": 0.1567450606630432, "grad_norm": 0.0009345727739855647, "learning_rate": 8.468951303432992e-05, "loss": 0.19721375703811644, "step": 36510 }, { "epoch": 0.1567879927530632, "grad_norm": 0.022429587319493294, "learning_rate": 8.46852013142123e-05, "loss": 0.15839117765426636, "step": 36520 }, { "epoch": 0.15683092484308322, "grad_norm": 0.006632617674767971, "learning_rate": 8.468088959409467e-05, "loss": 0.06467214822769166, "step": 36530 }, { "epoch": 0.1568738569331032, "grad_norm": 0.013401179574429989, "learning_rate": 8.467657787397705e-05, "loss": 0.35011792182922363, "step": 36540 }, { "epoch": 0.15691678902312323, "grad_norm": 0.0018219004850834608, "learning_rate": 8.467226615385943e-05, "loss": 0.2899348497390747, "step": 36550 }, { "epoch": 0.15695972111314324, "grad_norm": 8.564081192016602, "learning_rate": 8.46679544337418e-05, "loss": 0.48504009246826174, "step": 36560 }, { "epoch": 0.15700265320316323, "grad_norm": 0.031425461173057556, "learning_rate": 8.466364271362418e-05, "loss": 0.19798953533172609, "step": 36570 }, { "epoch": 0.15704558529318324, "grad_norm": 1.5136810541152954, "learning_rate": 8.465933099350656e-05, "loss": 0.2838395595550537, "step": 36580 }, { "epoch": 0.15708851738320326, "grad_norm": 0.0031062799971550703, "learning_rate": 8.465501927338892e-05, "loss": 0.1323293685913086, "step": 36590 }, { "epoch": 0.15713144947322324, "grad_norm": 0.07706096768379211, "learning_rate": 8.46507075532713e-05, "loss": 0.10394009351730346, "step": 36600 }, { "epoch": 0.15717438156324326, "grad_norm": 1.4084281921386719, "learning_rate": 8.464639583315368e-05, "loss": 0.30106706619262696, "step": 36610 }, { "epoch": 0.15721731365326327, "grad_norm": 0.29943132400512695, "learning_rate": 8.464208411303606e-05, "loss": 0.19430623054504395, "step": 36620 }, { "epoch": 0.15726024574328326, "grad_norm": 0.004392318427562714, "learning_rate": 8.463777239291843e-05, "loss": 0.2858541488647461, "step": 36630 }, { "epoch": 0.15730317783330328, "grad_norm": 0.006837871856987476, "learning_rate": 8.463346067280081e-05, "loss": 0.3170685529708862, "step": 36640 }, { "epoch": 0.1573461099233233, "grad_norm": 0.005509480368345976, "learning_rate": 8.462914895268319e-05, "loss": 0.1708204984664917, "step": 36650 }, { "epoch": 0.1573890420133433, "grad_norm": 0.007557610981166363, "learning_rate": 8.462483723256557e-05, "loss": 0.05756177306175232, "step": 36660 }, { "epoch": 0.1574319741033633, "grad_norm": 0.020103711634874344, "learning_rate": 8.462052551244793e-05, "loss": 0.1241947054862976, "step": 36670 }, { "epoch": 0.1574749061933833, "grad_norm": 0.11283276975154877, "learning_rate": 8.46162137923303e-05, "loss": 0.4595289707183838, "step": 36680 }, { "epoch": 0.15751783828340332, "grad_norm": 2.015280246734619, "learning_rate": 8.461190207221268e-05, "loss": 0.25780715942382815, "step": 36690 }, { "epoch": 0.1575607703734233, "grad_norm": 0.0033210236579179764, "learning_rate": 8.460759035209506e-05, "loss": 0.22688713073730468, "step": 36700 }, { "epoch": 0.15760370246344332, "grad_norm": 0.051066651940345764, "learning_rate": 8.460327863197745e-05, "loss": 0.153584885597229, "step": 36710 }, { "epoch": 0.15764663455346334, "grad_norm": 0.017148710787296295, "learning_rate": 8.459896691185983e-05, "loss": 0.2938904523849487, "step": 36720 }, { "epoch": 0.15768956664348333, "grad_norm": 0.14058199524879456, "learning_rate": 8.45946551917422e-05, "loss": 0.1530178666114807, "step": 36730 }, { "epoch": 0.15773249873350334, "grad_norm": 0.011079216375946999, "learning_rate": 8.459034347162458e-05, "loss": 0.23626675605773925, "step": 36740 }, { "epoch": 0.15777543082352335, "grad_norm": 1.6957927942276, "learning_rate": 8.458603175150696e-05, "loss": 0.2987285375595093, "step": 36750 }, { "epoch": 0.15781836291354337, "grad_norm": 1.522979736328125, "learning_rate": 8.458172003138932e-05, "loss": 0.325986385345459, "step": 36760 }, { "epoch": 0.15786129500356336, "grad_norm": 0.02568388730287552, "learning_rate": 8.45774083112717e-05, "loss": 0.16397476196289062, "step": 36770 }, { "epoch": 0.15790422709358337, "grad_norm": 0.028842099010944366, "learning_rate": 8.457309659115408e-05, "loss": 0.16065794229507446, "step": 36780 }, { "epoch": 0.1579471591836034, "grad_norm": 0.01377957034856081, "learning_rate": 8.456878487103646e-05, "loss": 0.1282724380493164, "step": 36790 }, { "epoch": 0.15799009127362337, "grad_norm": 0.01206178404390812, "learning_rate": 8.456447315091883e-05, "loss": 0.15207911729812623, "step": 36800 }, { "epoch": 0.1580330233636434, "grad_norm": 0.4078097343444824, "learning_rate": 8.456016143080121e-05, "loss": 0.21211161613464355, "step": 36810 }, { "epoch": 0.1580759554536634, "grad_norm": 1.6632593870162964, "learning_rate": 8.455584971068359e-05, "loss": 0.14590305089950562, "step": 36820 }, { "epoch": 0.1581188875436834, "grad_norm": 3.642359972000122, "learning_rate": 8.455153799056597e-05, "loss": 0.16237971782684327, "step": 36830 }, { "epoch": 0.1581618196337034, "grad_norm": 0.7893156409263611, "learning_rate": 8.454722627044833e-05, "loss": 0.25860013961791994, "step": 36840 }, { "epoch": 0.15820475172372342, "grad_norm": 0.0552891306579113, "learning_rate": 8.454291455033071e-05, "loss": 0.12260349988937377, "step": 36850 }, { "epoch": 0.15824768381374343, "grad_norm": 0.0010891527635976672, "learning_rate": 8.453860283021308e-05, "loss": 0.1928635835647583, "step": 36860 }, { "epoch": 0.15829061590376342, "grad_norm": 0.009116712026298046, "learning_rate": 8.453429111009546e-05, "loss": 0.39304156303405763, "step": 36870 }, { "epoch": 0.15833354799378344, "grad_norm": 1.2738218307495117, "learning_rate": 8.452997938997784e-05, "loss": 0.2733743190765381, "step": 36880 }, { "epoch": 0.15837648008380345, "grad_norm": 0.06841573864221573, "learning_rate": 8.452566766986022e-05, "loss": 0.286100172996521, "step": 36890 }, { "epoch": 0.15841941217382344, "grad_norm": 0.003039463423192501, "learning_rate": 8.45213559497426e-05, "loss": 0.28062286376953127, "step": 36900 }, { "epoch": 0.15846234426384345, "grad_norm": 3.5420756340026855, "learning_rate": 8.451704422962497e-05, "loss": 0.3182435274124146, "step": 36910 }, { "epoch": 0.15850527635386347, "grad_norm": 0.7639907002449036, "learning_rate": 8.451273250950734e-05, "loss": 0.13674689531326295, "step": 36920 }, { "epoch": 0.15854820844388345, "grad_norm": 3.4631237983703613, "learning_rate": 8.450842078938973e-05, "loss": 0.505620002746582, "step": 36930 }, { "epoch": 0.15859114053390347, "grad_norm": 0.16935129463672638, "learning_rate": 8.45041090692721e-05, "loss": 0.3636377573013306, "step": 36940 }, { "epoch": 0.15863407262392348, "grad_norm": 0.040034808218479156, "learning_rate": 8.449979734915448e-05, "loss": 0.2877780914306641, "step": 36950 }, { "epoch": 0.1586770047139435, "grad_norm": 1.0555989742279053, "learning_rate": 8.449548562903686e-05, "loss": 0.40925869941711424, "step": 36960 }, { "epoch": 0.15871993680396349, "grad_norm": 0.3184961676597595, "learning_rate": 8.449117390891924e-05, "loss": 0.27372145652770996, "step": 36970 }, { "epoch": 0.1587628688939835, "grad_norm": 3.1204659938812256, "learning_rate": 8.448686218880161e-05, "loss": 0.08157891035079956, "step": 36980 }, { "epoch": 0.15880580098400351, "grad_norm": 1.3315865993499756, "learning_rate": 8.448255046868399e-05, "loss": 0.3175251245498657, "step": 36990 }, { "epoch": 0.1588487330740235, "grad_norm": 0.01965898834168911, "learning_rate": 8.447823874856635e-05, "loss": 0.17869733572006224, "step": 37000 }, { "epoch": 0.1588487330740235, "eval_loss": 0.4670685827732086, "eval_runtime": 27.4012, "eval_samples_per_second": 3.649, "eval_steps_per_second": 3.649, "step": 37000 }, { "epoch": 0.15889166516404352, "grad_norm": 9.808270454406738, "learning_rate": 8.447392702844873e-05, "loss": 0.2566872835159302, "step": 37010 }, { "epoch": 0.15893459725406353, "grad_norm": 0.01820688508450985, "learning_rate": 8.446961530833111e-05, "loss": 0.1685408592224121, "step": 37020 }, { "epoch": 0.15897752934408352, "grad_norm": 5.085228443145752, "learning_rate": 8.446530358821349e-05, "loss": 0.13185917139053344, "step": 37030 }, { "epoch": 0.15902046143410353, "grad_norm": 0.7252869606018066, "learning_rate": 8.446099186809586e-05, "loss": 0.0749538004398346, "step": 37040 }, { "epoch": 0.15906339352412355, "grad_norm": 0.09191256016492844, "learning_rate": 8.445668014797824e-05, "loss": 0.30454657077789304, "step": 37050 }, { "epoch": 0.15910632561414353, "grad_norm": 0.0034016177523881197, "learning_rate": 8.445236842786062e-05, "loss": 0.22394905090332032, "step": 37060 }, { "epoch": 0.15914925770416355, "grad_norm": 0.07419943064451218, "learning_rate": 8.4448056707743e-05, "loss": 0.24617114067077636, "step": 37070 }, { "epoch": 0.15919218979418356, "grad_norm": 19.793487548828125, "learning_rate": 8.444374498762537e-05, "loss": 0.3588016748428345, "step": 37080 }, { "epoch": 0.15923512188420358, "grad_norm": 0.8470166921615601, "learning_rate": 8.443943326750774e-05, "loss": 0.15342183113098146, "step": 37090 }, { "epoch": 0.15927805397422357, "grad_norm": 0.09765598922967911, "learning_rate": 8.443512154739011e-05, "loss": 0.395892071723938, "step": 37100 }, { "epoch": 0.15932098606424358, "grad_norm": 0.03148749843239784, "learning_rate": 8.443080982727249e-05, "loss": 0.14055685997009276, "step": 37110 }, { "epoch": 0.1593639181542636, "grad_norm": 0.9235115647315979, "learning_rate": 8.442649810715487e-05, "loss": 0.04926349222660065, "step": 37120 }, { "epoch": 0.15940685024428358, "grad_norm": 2.640244483947754, "learning_rate": 8.442218638703725e-05, "loss": 0.11464877128601074, "step": 37130 }, { "epoch": 0.1594497823343036, "grad_norm": 0.016359224915504456, "learning_rate": 8.441787466691962e-05, "loss": 0.2091724157333374, "step": 37140 }, { "epoch": 0.1594927144243236, "grad_norm": 1.4536552429199219, "learning_rate": 8.4413562946802e-05, "loss": 0.45433721542358396, "step": 37150 }, { "epoch": 0.1595356465143436, "grad_norm": 3.6631672382354736, "learning_rate": 8.440925122668438e-05, "loss": 0.3959526300430298, "step": 37160 }, { "epoch": 0.1595785786043636, "grad_norm": 0.037146102637052536, "learning_rate": 8.440493950656676e-05, "loss": 0.11330243349075317, "step": 37170 }, { "epoch": 0.15962151069438363, "grad_norm": 0.1929967701435089, "learning_rate": 8.440062778644913e-05, "loss": 0.2217888355255127, "step": 37180 }, { "epoch": 0.15966444278440364, "grad_norm": 0.16690918803215027, "learning_rate": 8.439631606633151e-05, "loss": 0.26970853805541994, "step": 37190 }, { "epoch": 0.15970737487442363, "grad_norm": 0.018210075795650482, "learning_rate": 8.439200434621389e-05, "loss": 0.00593365877866745, "step": 37200 }, { "epoch": 0.15975030696444364, "grad_norm": 0.005669247359037399, "learning_rate": 8.438769262609626e-05, "loss": 0.35240318775177004, "step": 37210 }, { "epoch": 0.15979323905446366, "grad_norm": 8.740744590759277, "learning_rate": 8.438338090597864e-05, "loss": 0.14758760929107667, "step": 37220 }, { "epoch": 0.15983617114448365, "grad_norm": 0.017681747674942017, "learning_rate": 8.437906918586102e-05, "loss": 0.1936264991760254, "step": 37230 }, { "epoch": 0.15987910323450366, "grad_norm": 2.3267531394958496, "learning_rate": 8.43747574657434e-05, "loss": 0.25931923389434813, "step": 37240 }, { "epoch": 0.15992203532452368, "grad_norm": 0.01451034378260374, "learning_rate": 8.437044574562576e-05, "loss": 0.2892030954360962, "step": 37250 }, { "epoch": 0.15996496741454366, "grad_norm": 0.5280787348747253, "learning_rate": 8.436613402550814e-05, "loss": 0.35807051658630373, "step": 37260 }, { "epoch": 0.16000789950456368, "grad_norm": 1.7871356010437012, "learning_rate": 8.436182230539052e-05, "loss": 0.4931039333343506, "step": 37270 }, { "epoch": 0.1600508315945837, "grad_norm": 0.7012388706207275, "learning_rate": 8.435751058527289e-05, "loss": 0.07530275583267212, "step": 37280 }, { "epoch": 0.1600937636846037, "grad_norm": 1.1680361032485962, "learning_rate": 8.435319886515527e-05, "loss": 0.23157844543457032, "step": 37290 }, { "epoch": 0.1601366957746237, "grad_norm": 0.012162178754806519, "learning_rate": 8.434888714503765e-05, "loss": 0.19087698459625244, "step": 37300 }, { "epoch": 0.1601796278646437, "grad_norm": 2.447385787963867, "learning_rate": 8.434457542492002e-05, "loss": 0.21148381233215333, "step": 37310 }, { "epoch": 0.16022255995466372, "grad_norm": 3.5636308193206787, "learning_rate": 8.43402637048024e-05, "loss": 0.4460421085357666, "step": 37320 }, { "epoch": 0.1602654920446837, "grad_norm": 1.0085793733596802, "learning_rate": 8.433595198468477e-05, "loss": 0.21883647441864013, "step": 37330 }, { "epoch": 0.16030842413470373, "grad_norm": 0.15825310349464417, "learning_rate": 8.433164026456714e-05, "loss": 0.25668346881866455, "step": 37340 }, { "epoch": 0.16035135622472374, "grad_norm": 0.021424664184451103, "learning_rate": 8.432732854444952e-05, "loss": 0.3583883285522461, "step": 37350 }, { "epoch": 0.16039428831474373, "grad_norm": 0.020336715504527092, "learning_rate": 8.43230168243319e-05, "loss": 0.09908640384674072, "step": 37360 }, { "epoch": 0.16043722040476374, "grad_norm": 0.04126065596938133, "learning_rate": 8.431870510421428e-05, "loss": 0.3000126123428345, "step": 37370 }, { "epoch": 0.16048015249478376, "grad_norm": 0.26878777146339417, "learning_rate": 8.431439338409665e-05, "loss": 0.18765181303024292, "step": 37380 }, { "epoch": 0.16052308458480377, "grad_norm": 0.025135841220617294, "learning_rate": 8.431008166397903e-05, "loss": 0.03208061158657074, "step": 37390 }, { "epoch": 0.16056601667482376, "grad_norm": 0.005874186288565397, "learning_rate": 8.430576994386141e-05, "loss": 0.11698212623596191, "step": 37400 }, { "epoch": 0.16060894876484377, "grad_norm": 2.410247325897217, "learning_rate": 8.430145822374378e-05, "loss": 0.41802539825439455, "step": 37410 }, { "epoch": 0.1606518808548638, "grad_norm": 0.014765270985662937, "learning_rate": 8.429714650362616e-05, "loss": 0.19299309253692626, "step": 37420 }, { "epoch": 0.16069481294488377, "grad_norm": 0.8710483312606812, "learning_rate": 8.429283478350854e-05, "loss": 0.14042598009109497, "step": 37430 }, { "epoch": 0.1607377450349038, "grad_norm": 0.06303207576274872, "learning_rate": 8.428852306339092e-05, "loss": 0.08297204971313477, "step": 37440 }, { "epoch": 0.1607806771249238, "grad_norm": 0.020654955878853798, "learning_rate": 8.42842113432733e-05, "loss": 0.2506140470504761, "step": 37450 }, { "epoch": 0.1608236092149438, "grad_norm": 0.2789359390735626, "learning_rate": 8.427989962315567e-05, "loss": 0.20466957092285157, "step": 37460 }, { "epoch": 0.1608665413049638, "grad_norm": 4.288355350494385, "learning_rate": 8.427558790303805e-05, "loss": 0.2857557773590088, "step": 37470 }, { "epoch": 0.16090947339498382, "grad_norm": 3.2750890254974365, "learning_rate": 8.427127618292043e-05, "loss": 0.2011713981628418, "step": 37480 }, { "epoch": 0.1609524054850038, "grad_norm": 3.755577564239502, "learning_rate": 8.42669644628028e-05, "loss": 0.30210537910461427, "step": 37490 }, { "epoch": 0.16099533757502382, "grad_norm": 2.026909112930298, "learning_rate": 8.426265274268517e-05, "loss": 0.41617717742919924, "step": 37500 }, { "epoch": 0.16103826966504384, "grad_norm": 4.035932540893555, "learning_rate": 8.425834102256754e-05, "loss": 0.263014030456543, "step": 37510 }, { "epoch": 0.16108120175506385, "grad_norm": 0.35158804059028625, "learning_rate": 8.425402930244992e-05, "loss": 0.007732333242893219, "step": 37520 }, { "epoch": 0.16112413384508384, "grad_norm": 0.1560453325510025, "learning_rate": 8.42497175823323e-05, "loss": 0.1600959300994873, "step": 37530 }, { "epoch": 0.16116706593510385, "grad_norm": 0.06052204594016075, "learning_rate": 8.424540586221468e-05, "loss": 0.2086103916168213, "step": 37540 }, { "epoch": 0.16120999802512387, "grad_norm": 0.007203094661235809, "learning_rate": 8.424109414209705e-05, "loss": 0.11548494100570679, "step": 37550 }, { "epoch": 0.16125293011514386, "grad_norm": 0.02814324013888836, "learning_rate": 8.423678242197943e-05, "loss": 0.09334410429000854, "step": 37560 }, { "epoch": 0.16129586220516387, "grad_norm": 0.9473418593406677, "learning_rate": 8.423247070186181e-05, "loss": 0.09891886115074158, "step": 37570 }, { "epoch": 0.16133879429518388, "grad_norm": 1.5969558954238892, "learning_rate": 8.422815898174417e-05, "loss": 0.3447112560272217, "step": 37580 }, { "epoch": 0.16138172638520387, "grad_norm": 0.9284400343894958, "learning_rate": 8.422384726162655e-05, "loss": 0.36273190975189207, "step": 37590 }, { "epoch": 0.1614246584752239, "grad_norm": 0.7826418280601501, "learning_rate": 8.421953554150893e-05, "loss": 0.06588080525398254, "step": 37600 }, { "epoch": 0.1614675905652439, "grad_norm": 1.0751705169677734, "learning_rate": 8.42152238213913e-05, "loss": 0.20182514190673828, "step": 37610 }, { "epoch": 0.16151052265526392, "grad_norm": 1.7379590272903442, "learning_rate": 8.421091210127368e-05, "loss": 0.35259897708892823, "step": 37620 }, { "epoch": 0.1615534547452839, "grad_norm": 1.1306309700012207, "learning_rate": 8.420660038115606e-05, "loss": 0.3186183452606201, "step": 37630 }, { "epoch": 0.16159638683530392, "grad_norm": 0.6245974898338318, "learning_rate": 8.420228866103844e-05, "loss": 0.19649020433425904, "step": 37640 }, { "epoch": 0.16163931892532393, "grad_norm": 1.8757081031799316, "learning_rate": 8.419797694092081e-05, "loss": 0.16638264656066895, "step": 37650 }, { "epoch": 0.16168225101534392, "grad_norm": 1.2016990184783936, "learning_rate": 8.419366522080319e-05, "loss": 0.34046337604522703, "step": 37660 }, { "epoch": 0.16172518310536393, "grad_norm": 18.265628814697266, "learning_rate": 8.418935350068557e-05, "loss": 0.13246519565582277, "step": 37670 }, { "epoch": 0.16176811519538395, "grad_norm": 2.4141156673431396, "learning_rate": 8.418504178056795e-05, "loss": 0.24419615268707276, "step": 37680 }, { "epoch": 0.16181104728540394, "grad_norm": 1.8383233547210693, "learning_rate": 8.418073006045032e-05, "loss": 0.23851385116577148, "step": 37690 }, { "epoch": 0.16185397937542395, "grad_norm": 0.0010904427617788315, "learning_rate": 8.41764183403327e-05, "loss": 0.03627316951751709, "step": 37700 }, { "epoch": 0.16189691146544397, "grad_norm": 1.3042843341827393, "learning_rate": 8.417210662021508e-05, "loss": 0.12120405435562134, "step": 37710 }, { "epoch": 0.16193984355546398, "grad_norm": 0.03553265333175659, "learning_rate": 8.416779490009745e-05, "loss": 0.29834039211273194, "step": 37720 }, { "epoch": 0.16198277564548397, "grad_norm": 0.03280557319521904, "learning_rate": 8.416348317997983e-05, "loss": 0.22968535423278807, "step": 37730 }, { "epoch": 0.16202570773550398, "grad_norm": 0.3235560357570648, "learning_rate": 8.41591714598622e-05, "loss": 0.22959692478179933, "step": 37740 }, { "epoch": 0.162068639825524, "grad_norm": 3.005444288253784, "learning_rate": 8.415485973974457e-05, "loss": 0.22586019039154054, "step": 37750 }, { "epoch": 0.16211157191554398, "grad_norm": 0.9470056891441345, "learning_rate": 8.415054801962695e-05, "loss": 0.27291393280029297, "step": 37760 }, { "epoch": 0.162154504005564, "grad_norm": 0.06368622928857803, "learning_rate": 8.414623629950933e-05, "loss": 0.3721860647201538, "step": 37770 }, { "epoch": 0.162197436095584, "grad_norm": 0.3762516379356384, "learning_rate": 8.41419245793917e-05, "loss": 0.14585323333740235, "step": 37780 }, { "epoch": 0.162240368185604, "grad_norm": 4.130314826965332, "learning_rate": 8.413761285927408e-05, "loss": 0.3997512340545654, "step": 37790 }, { "epoch": 0.16228330027562402, "grad_norm": 0.42971429228782654, "learning_rate": 8.413330113915646e-05, "loss": 0.238565993309021, "step": 37800 }, { "epoch": 0.16232623236564403, "grad_norm": 0.07225003093481064, "learning_rate": 8.412898941903884e-05, "loss": 0.3309208154678345, "step": 37810 }, { "epoch": 0.16236916445566402, "grad_norm": 3.640429735183716, "learning_rate": 8.412467769892121e-05, "loss": 0.280135440826416, "step": 37820 }, { "epoch": 0.16241209654568403, "grad_norm": 0.04398081824183464, "learning_rate": 8.412036597880358e-05, "loss": 0.35233240127563475, "step": 37830 }, { "epoch": 0.16245502863570405, "grad_norm": 2.9799294471740723, "learning_rate": 8.411605425868596e-05, "loss": 0.5887829780578613, "step": 37840 }, { "epoch": 0.16249796072572406, "grad_norm": 1.3944103717803955, "learning_rate": 8.411174253856833e-05, "loss": 0.37382643222808837, "step": 37850 }, { "epoch": 0.16254089281574405, "grad_norm": 0.040403928607702255, "learning_rate": 8.410743081845071e-05, "loss": 0.11647080183029175, "step": 37860 }, { "epoch": 0.16258382490576406, "grad_norm": 1.098812222480774, "learning_rate": 8.410311909833309e-05, "loss": 0.19743818044662476, "step": 37870 }, { "epoch": 0.16262675699578408, "grad_norm": 3.2215824127197266, "learning_rate": 8.409880737821547e-05, "loss": 0.24659082889556885, "step": 37880 }, { "epoch": 0.16266968908580406, "grad_norm": 0.6110808253288269, "learning_rate": 8.409449565809784e-05, "loss": 0.28290505409240724, "step": 37890 }, { "epoch": 0.16271262117582408, "grad_norm": 0.11528348922729492, "learning_rate": 8.409018393798023e-05, "loss": 0.36647610664367675, "step": 37900 }, { "epoch": 0.1627555532658441, "grad_norm": 2.565178155899048, "learning_rate": 8.40858722178626e-05, "loss": 0.29935436248779296, "step": 37910 }, { "epoch": 0.16279848535586408, "grad_norm": 0.04275639355182648, "learning_rate": 8.408156049774497e-05, "loss": 0.2516483783721924, "step": 37920 }, { "epoch": 0.1628414174458841, "grad_norm": 2.4341046810150146, "learning_rate": 8.407724877762735e-05, "loss": 0.39244976043701174, "step": 37930 }, { "epoch": 0.1628843495359041, "grad_norm": 0.3288959562778473, "learning_rate": 8.407293705750973e-05, "loss": 0.1974055528640747, "step": 37940 }, { "epoch": 0.16292728162592413, "grad_norm": 0.2797980010509491, "learning_rate": 8.406862533739211e-05, "loss": 0.2573603391647339, "step": 37950 }, { "epoch": 0.1629702137159441, "grad_norm": 2.3312742710113525, "learning_rate": 8.406431361727448e-05, "loss": 0.4253364562988281, "step": 37960 }, { "epoch": 0.16301314580596413, "grad_norm": 0.15798674523830414, "learning_rate": 8.406000189715686e-05, "loss": 0.2994741201400757, "step": 37970 }, { "epoch": 0.16305607789598414, "grad_norm": 0.6870970726013184, "learning_rate": 8.405569017703924e-05, "loss": 0.3028193712234497, "step": 37980 }, { "epoch": 0.16309900998600413, "grad_norm": 1.180202603340149, "learning_rate": 8.40513784569216e-05, "loss": 0.20432660579681397, "step": 37990 }, { "epoch": 0.16314194207602414, "grad_norm": 0.05655556917190552, "learning_rate": 8.404706673680398e-05, "loss": 0.08881938457489014, "step": 38000 }, { "epoch": 0.16314194207602414, "eval_loss": 0.4883147180080414, "eval_runtime": 27.6249, "eval_samples_per_second": 3.62, "eval_steps_per_second": 3.62, "step": 38000 }, { "epoch": 0.16318487416604416, "grad_norm": 1.2886759042739868, "learning_rate": 8.404275501668636e-05, "loss": 0.30479917526245115, "step": 38010 }, { "epoch": 0.16322780625606415, "grad_norm": 1.9093838930130005, "learning_rate": 8.403844329656873e-05, "loss": 0.33849563598632815, "step": 38020 }, { "epoch": 0.16327073834608416, "grad_norm": 0.09240783751010895, "learning_rate": 8.403413157645111e-05, "loss": 0.3744537353515625, "step": 38030 }, { "epoch": 0.16331367043610417, "grad_norm": 0.06679513305425644, "learning_rate": 8.402981985633349e-05, "loss": 0.24076201915740966, "step": 38040 }, { "epoch": 0.1633566025261242, "grad_norm": 1.3797893524169922, "learning_rate": 8.402550813621587e-05, "loss": 0.30988612174987795, "step": 38050 }, { "epoch": 0.16339953461614418, "grad_norm": 3.352512836456299, "learning_rate": 8.402119641609824e-05, "loss": 0.3587747097015381, "step": 38060 }, { "epoch": 0.1634424667061642, "grad_norm": 17.774965286254883, "learning_rate": 8.401688469598061e-05, "loss": 0.402712345123291, "step": 38070 }, { "epoch": 0.1634853987961842, "grad_norm": 3.710422992706299, "learning_rate": 8.401257297586299e-05, "loss": 0.2656731128692627, "step": 38080 }, { "epoch": 0.1635283308862042, "grad_norm": 0.02095325104892254, "learning_rate": 8.400826125574536e-05, "loss": 0.04868249893188477, "step": 38090 }, { "epoch": 0.1635712629762242, "grad_norm": 0.013746113516390324, "learning_rate": 8.400394953562774e-05, "loss": 0.2931448221206665, "step": 38100 }, { "epoch": 0.16361419506624422, "grad_norm": 0.6826333999633789, "learning_rate": 8.399963781551012e-05, "loss": 0.015343394875526429, "step": 38110 }, { "epoch": 0.1636571271562642, "grad_norm": 1.5140215158462524, "learning_rate": 8.399532609539251e-05, "loss": 0.372647762298584, "step": 38120 }, { "epoch": 0.16370005924628422, "grad_norm": 0.013481021858751774, "learning_rate": 8.399101437527489e-05, "loss": 0.20894341468811034, "step": 38130 }, { "epoch": 0.16374299133630424, "grad_norm": 1.5430631637573242, "learning_rate": 8.398670265515726e-05, "loss": 0.11156511306762695, "step": 38140 }, { "epoch": 0.16378592342632425, "grad_norm": 0.005721495021134615, "learning_rate": 8.398239093503963e-05, "loss": 0.32973394393920896, "step": 38150 }, { "epoch": 0.16382885551634424, "grad_norm": 0.028001677244901657, "learning_rate": 8.3978079214922e-05, "loss": 0.2010807991027832, "step": 38160 }, { "epoch": 0.16387178760636426, "grad_norm": 2.5055196285247803, "learning_rate": 8.397376749480438e-05, "loss": 0.3040961742401123, "step": 38170 }, { "epoch": 0.16391471969638427, "grad_norm": 0.2771667540073395, "learning_rate": 8.396945577468676e-05, "loss": 0.47368788719177246, "step": 38180 }, { "epoch": 0.16395765178640426, "grad_norm": 0.07175099849700928, "learning_rate": 8.396514405456914e-05, "loss": 0.27983407974243163, "step": 38190 }, { "epoch": 0.16400058387642427, "grad_norm": 4.176136493682861, "learning_rate": 8.396083233445151e-05, "loss": 0.2169114828109741, "step": 38200 }, { "epoch": 0.1640435159664443, "grad_norm": 3.844069242477417, "learning_rate": 8.395652061433389e-05, "loss": 0.21165781021118163, "step": 38210 }, { "epoch": 0.16408644805646427, "grad_norm": 46.019081115722656, "learning_rate": 8.395220889421627e-05, "loss": 0.25102906227111815, "step": 38220 }, { "epoch": 0.1641293801464843, "grad_norm": 0.062254827469587326, "learning_rate": 8.394789717409865e-05, "loss": 0.20149946212768555, "step": 38230 }, { "epoch": 0.1641723122365043, "grad_norm": 2.3644673824310303, "learning_rate": 8.394358545398101e-05, "loss": 0.20966775417327882, "step": 38240 }, { "epoch": 0.1642152443265243, "grad_norm": 0.4895040988922119, "learning_rate": 8.393927373386339e-05, "loss": 0.4971171855926514, "step": 38250 }, { "epoch": 0.1642581764165443, "grad_norm": 0.3563346862792969, "learning_rate": 8.393496201374576e-05, "loss": 0.15563546419143676, "step": 38260 }, { "epoch": 0.16430110850656432, "grad_norm": 0.06369837373495102, "learning_rate": 8.393065029362814e-05, "loss": 0.39025804996490476, "step": 38270 }, { "epoch": 0.16434404059658433, "grad_norm": 0.1078178659081459, "learning_rate": 8.392633857351052e-05, "loss": 0.2438561201095581, "step": 38280 }, { "epoch": 0.16438697268660432, "grad_norm": 0.012385016307234764, "learning_rate": 8.39220268533929e-05, "loss": 0.15948692560195923, "step": 38290 }, { "epoch": 0.16442990477662434, "grad_norm": 1.3638696670532227, "learning_rate": 8.391771513327527e-05, "loss": 0.13995628356933593, "step": 38300 }, { "epoch": 0.16447283686664435, "grad_norm": 0.22712171077728271, "learning_rate": 8.391340341315765e-05, "loss": 0.18561434745788574, "step": 38310 }, { "epoch": 0.16451576895666434, "grad_norm": 0.051490023732185364, "learning_rate": 8.390909169304001e-05, "loss": 0.2995032548904419, "step": 38320 }, { "epoch": 0.16455870104668435, "grad_norm": 0.010072890669107437, "learning_rate": 8.390477997292239e-05, "loss": 0.29950876235961915, "step": 38330 }, { "epoch": 0.16460163313670437, "grad_norm": 0.013207241892814636, "learning_rate": 8.390046825280478e-05, "loss": 0.3185295820236206, "step": 38340 }, { "epoch": 0.16464456522672435, "grad_norm": 0.010572721250355244, "learning_rate": 8.389615653268716e-05, "loss": 0.16593341827392577, "step": 38350 }, { "epoch": 0.16468749731674437, "grad_norm": 2.061591386795044, "learning_rate": 8.389184481256954e-05, "loss": 0.02529054880142212, "step": 38360 }, { "epoch": 0.16473042940676438, "grad_norm": 0.02285430207848549, "learning_rate": 8.388753309245191e-05, "loss": 0.2505360126495361, "step": 38370 }, { "epoch": 0.1647733614967844, "grad_norm": 4.048598766326904, "learning_rate": 8.388322137233429e-05, "loss": 0.3513182640075684, "step": 38380 }, { "epoch": 0.16481629358680439, "grad_norm": 0.014832216314971447, "learning_rate": 8.387890965221667e-05, "loss": 0.04674802124500275, "step": 38390 }, { "epoch": 0.1648592256768244, "grad_norm": 9.298293113708496, "learning_rate": 8.387459793209903e-05, "loss": 0.16453347206115723, "step": 38400 }, { "epoch": 0.16490215776684441, "grad_norm": 0.04931933432817459, "learning_rate": 8.387028621198141e-05, "loss": 0.3382518768310547, "step": 38410 }, { "epoch": 0.1649450898568644, "grad_norm": 0.01446615345776081, "learning_rate": 8.386597449186379e-05, "loss": 0.2328251838684082, "step": 38420 }, { "epoch": 0.16498802194688442, "grad_norm": 0.0058226981200277805, "learning_rate": 8.386166277174617e-05, "loss": 0.24504258632659912, "step": 38430 }, { "epoch": 0.16503095403690443, "grad_norm": 0.8236973881721497, "learning_rate": 8.385735105162854e-05, "loss": 0.35173616409301756, "step": 38440 }, { "epoch": 0.16507388612692442, "grad_norm": 0.08230235427618027, "learning_rate": 8.385303933151092e-05, "loss": 0.12847490310668946, "step": 38450 }, { "epoch": 0.16511681821694443, "grad_norm": 0.006980022415518761, "learning_rate": 8.38487276113933e-05, "loss": 0.3361195087432861, "step": 38460 }, { "epoch": 0.16515975030696445, "grad_norm": 4.36317253112793, "learning_rate": 8.384441589127567e-05, "loss": 0.44304499626159666, "step": 38470 }, { "epoch": 0.16520268239698446, "grad_norm": 0.03942442685365677, "learning_rate": 8.384010417115804e-05, "loss": 0.23771591186523439, "step": 38480 }, { "epoch": 0.16524561448700445, "grad_norm": 8.045804977416992, "learning_rate": 8.383579245104042e-05, "loss": 0.24896998405456544, "step": 38490 }, { "epoch": 0.16528854657702446, "grad_norm": 0.01741672493517399, "learning_rate": 8.383148073092279e-05, "loss": 0.038726434111595154, "step": 38500 }, { "epoch": 0.16533147866704448, "grad_norm": 20.042821884155273, "learning_rate": 8.382716901080517e-05, "loss": 0.28770525455474855, "step": 38510 }, { "epoch": 0.16537441075706447, "grad_norm": 0.5119468569755554, "learning_rate": 8.382285729068755e-05, "loss": 0.2299135446548462, "step": 38520 }, { "epoch": 0.16541734284708448, "grad_norm": 0.4313623011112213, "learning_rate": 8.381854557056992e-05, "loss": 0.16424697637557983, "step": 38530 }, { "epoch": 0.1654602749371045, "grad_norm": 2.6440560817718506, "learning_rate": 8.38142338504523e-05, "loss": 0.3694345712661743, "step": 38540 }, { "epoch": 0.16550320702712448, "grad_norm": 0.9811276793479919, "learning_rate": 8.380992213033468e-05, "loss": 0.300356388092041, "step": 38550 }, { "epoch": 0.1655461391171445, "grad_norm": 0.2382563352584839, "learning_rate": 8.380561041021706e-05, "loss": 0.44238839149475095, "step": 38560 }, { "epoch": 0.1655890712071645, "grad_norm": 0.06049331650137901, "learning_rate": 8.380129869009943e-05, "loss": 0.13134862184524537, "step": 38570 }, { "epoch": 0.16563200329718453, "grad_norm": 0.07067941129207611, "learning_rate": 8.379698696998181e-05, "loss": 0.26463468074798585, "step": 38580 }, { "epoch": 0.1656749353872045, "grad_norm": 0.07888341695070267, "learning_rate": 8.379267524986419e-05, "loss": 0.17368587255477905, "step": 38590 }, { "epoch": 0.16571786747722453, "grad_norm": 0.030739109963178635, "learning_rate": 8.378836352974657e-05, "loss": 0.2229764461517334, "step": 38600 }, { "epoch": 0.16576079956724454, "grad_norm": 3.1724841594696045, "learning_rate": 8.378405180962894e-05, "loss": 0.4950582504272461, "step": 38610 }, { "epoch": 0.16580373165726453, "grad_norm": 0.7055134773254395, "learning_rate": 8.377974008951132e-05, "loss": 0.32541618347167967, "step": 38620 }, { "epoch": 0.16584666374728455, "grad_norm": 6.156464576721191, "learning_rate": 8.37754283693937e-05, "loss": 0.2963061809539795, "step": 38630 }, { "epoch": 0.16588959583730456, "grad_norm": 0.15203474462032318, "learning_rate": 8.377111664927608e-05, "loss": 0.12441049814224243, "step": 38640 }, { "epoch": 0.16593252792732455, "grad_norm": 1.184449553489685, "learning_rate": 8.376680492915844e-05, "loss": 0.23802320957183837, "step": 38650 }, { "epoch": 0.16597546001734456, "grad_norm": 0.4919390380382538, "learning_rate": 8.376249320904082e-05, "loss": 0.18030476570129395, "step": 38660 }, { "epoch": 0.16601839210736458, "grad_norm": 5.8653669357299805, "learning_rate": 8.37581814889232e-05, "loss": 0.0840020477771759, "step": 38670 }, { "epoch": 0.16606132419738456, "grad_norm": 0.10958441346883774, "learning_rate": 8.375386976880557e-05, "loss": 0.1868067502975464, "step": 38680 }, { "epoch": 0.16610425628740458, "grad_norm": 0.012140369974076748, "learning_rate": 8.374955804868795e-05, "loss": 0.19965635538101195, "step": 38690 }, { "epoch": 0.1661471883774246, "grad_norm": 5.1828932762146, "learning_rate": 8.374524632857033e-05, "loss": 0.24122245311737062, "step": 38700 }, { "epoch": 0.1661901204674446, "grad_norm": 0.11190202832221985, "learning_rate": 8.37409346084527e-05, "loss": 0.2018735408782959, "step": 38710 }, { "epoch": 0.1662330525574646, "grad_norm": 0.09691668301820755, "learning_rate": 8.373662288833508e-05, "loss": 0.17452099323272705, "step": 38720 }, { "epoch": 0.1662759846474846, "grad_norm": 1.8215162754058838, "learning_rate": 8.373231116821744e-05, "loss": 0.37970480918884275, "step": 38730 }, { "epoch": 0.16631891673750462, "grad_norm": 0.8948038220405579, "learning_rate": 8.372799944809982e-05, "loss": 0.3769418001174927, "step": 38740 }, { "epoch": 0.1663618488275246, "grad_norm": 2.60607647895813, "learning_rate": 8.37236877279822e-05, "loss": 0.42996745109558104, "step": 38750 }, { "epoch": 0.16640478091754463, "grad_norm": 0.06351848691701889, "learning_rate": 8.371937600786458e-05, "loss": 0.2707476854324341, "step": 38760 }, { "epoch": 0.16644771300756464, "grad_norm": 0.2907825708389282, "learning_rate": 8.371506428774695e-05, "loss": 0.22936019897460938, "step": 38770 }, { "epoch": 0.16649064509758463, "grad_norm": 0.14782923460006714, "learning_rate": 8.371075256762933e-05, "loss": 0.3442514657974243, "step": 38780 }, { "epoch": 0.16653357718760464, "grad_norm": 0.015982678160071373, "learning_rate": 8.370644084751171e-05, "loss": 0.2651229381561279, "step": 38790 }, { "epoch": 0.16657650927762466, "grad_norm": 1.0819584131240845, "learning_rate": 8.370212912739409e-05, "loss": 0.5299183845520019, "step": 38800 }, { "epoch": 0.16661944136764467, "grad_norm": 0.06313298642635345, "learning_rate": 8.369781740727646e-05, "loss": 0.042073649168014524, "step": 38810 }, { "epoch": 0.16666237345766466, "grad_norm": 1.9854984283447266, "learning_rate": 8.369350568715884e-05, "loss": 0.15298646688461304, "step": 38820 }, { "epoch": 0.16670530554768467, "grad_norm": 0.0021584900096058846, "learning_rate": 8.368919396704122e-05, "loss": 0.19425376653671264, "step": 38830 }, { "epoch": 0.1667482376377047, "grad_norm": 0.0018256878247484565, "learning_rate": 8.36848822469236e-05, "loss": 0.08114975094795226, "step": 38840 }, { "epoch": 0.16679116972772468, "grad_norm": 1.6823513507843018, "learning_rate": 8.368057052680597e-05, "loss": 0.3098435401916504, "step": 38850 }, { "epoch": 0.1668341018177447, "grad_norm": 1.3453181982040405, "learning_rate": 8.367625880668835e-05, "loss": 0.3250315189361572, "step": 38860 }, { "epoch": 0.1668770339077647, "grad_norm": 0.5118316411972046, "learning_rate": 8.367194708657073e-05, "loss": 0.22069883346557617, "step": 38870 }, { "epoch": 0.1669199659977847, "grad_norm": 0.005584300495684147, "learning_rate": 8.36676353664531e-05, "loss": 0.4313997268676758, "step": 38880 }, { "epoch": 0.1669628980878047, "grad_norm": 0.02512713335454464, "learning_rate": 8.366332364633548e-05, "loss": 0.22731406688690187, "step": 38890 }, { "epoch": 0.16700583017782472, "grad_norm": 0.18230260908603668, "learning_rate": 8.365901192621785e-05, "loss": 0.19736795425415038, "step": 38900 }, { "epoch": 0.16704876226784474, "grad_norm": 0.20787113904953003, "learning_rate": 8.365470020610022e-05, "loss": 0.11477090120315551, "step": 38910 }, { "epoch": 0.16709169435786472, "grad_norm": 0.20564806461334229, "learning_rate": 8.36503884859826e-05, "loss": 0.20185043811798095, "step": 38920 }, { "epoch": 0.16713462644788474, "grad_norm": 1.9511644840240479, "learning_rate": 8.364607676586498e-05, "loss": 0.27272036075592043, "step": 38930 }, { "epoch": 0.16717755853790475, "grad_norm": 0.10560095310211182, "learning_rate": 8.364176504574736e-05, "loss": 0.11860344409942628, "step": 38940 }, { "epoch": 0.16722049062792474, "grad_norm": 2.4232141971588135, "learning_rate": 8.363745332562973e-05, "loss": 0.3961954116821289, "step": 38950 }, { "epoch": 0.16726342271794475, "grad_norm": 1.6023025512695312, "learning_rate": 8.363314160551211e-05, "loss": 0.1828877091407776, "step": 38960 }, { "epoch": 0.16730635480796477, "grad_norm": 0.0037863650359213352, "learning_rate": 8.362882988539449e-05, "loss": 0.2468169927597046, "step": 38970 }, { "epoch": 0.16734928689798476, "grad_norm": 0.004623674787580967, "learning_rate": 8.362451816527685e-05, "loss": 0.2642094135284424, "step": 38980 }, { "epoch": 0.16739221898800477, "grad_norm": 0.8750384449958801, "learning_rate": 8.362020644515923e-05, "loss": 0.4149921894073486, "step": 38990 }, { "epoch": 0.16743515107802479, "grad_norm": 6.293197154998779, "learning_rate": 8.36158947250416e-05, "loss": 0.1499064326286316, "step": 39000 }, { "epoch": 0.16743515107802479, "eval_loss": 0.47232383489608765, "eval_runtime": 27.4652, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 39000 }, { "epoch": 0.1674780831680448, "grad_norm": 0.1681136190891266, "learning_rate": 8.361158300492398e-05, "loss": 0.2481478214263916, "step": 39010 }, { "epoch": 0.1675210152580648, "grad_norm": 0.026456119492650032, "learning_rate": 8.360727128480636e-05, "loss": 0.08642295002937317, "step": 39020 }, { "epoch": 0.1675639473480848, "grad_norm": 5.313982963562012, "learning_rate": 8.360295956468874e-05, "loss": 0.25217013359069823, "step": 39030 }, { "epoch": 0.16760687943810482, "grad_norm": 1.552626609802246, "learning_rate": 8.359864784457112e-05, "loss": 0.4480263710021973, "step": 39040 }, { "epoch": 0.1676498115281248, "grad_norm": 5.05755615234375, "learning_rate": 8.359433612445349e-05, "loss": 0.43097610473632814, "step": 39050 }, { "epoch": 0.16769274361814482, "grad_norm": 0.3707757890224457, "learning_rate": 8.359002440433587e-05, "loss": 0.20354764461517333, "step": 39060 }, { "epoch": 0.16773567570816483, "grad_norm": 11.889803886413574, "learning_rate": 8.358571268421825e-05, "loss": 0.3243248701095581, "step": 39070 }, { "epoch": 0.16777860779818482, "grad_norm": 0.4324072599411011, "learning_rate": 8.358140096410062e-05, "loss": 0.1808680295944214, "step": 39080 }, { "epoch": 0.16782153988820483, "grad_norm": 0.42824888229370117, "learning_rate": 8.3577089243983e-05, "loss": 0.14043327569961547, "step": 39090 }, { "epoch": 0.16786447197822485, "grad_norm": 0.08431851863861084, "learning_rate": 8.357277752386538e-05, "loss": 0.3430315017700195, "step": 39100 }, { "epoch": 0.16790740406824484, "grad_norm": 0.020337456837296486, "learning_rate": 8.356846580374776e-05, "loss": 0.1652182936668396, "step": 39110 }, { "epoch": 0.16795033615826485, "grad_norm": 14.583819389343262, "learning_rate": 8.356415408363013e-05, "loss": 0.11169205904006958, "step": 39120 }, { "epoch": 0.16799326824828487, "grad_norm": 1.863498330116272, "learning_rate": 8.355984236351251e-05, "loss": 0.10965640544891357, "step": 39130 }, { "epoch": 0.16803620033830488, "grad_norm": 0.011555775068700314, "learning_rate": 8.355553064339488e-05, "loss": 0.4207490921020508, "step": 39140 }, { "epoch": 0.16807913242832487, "grad_norm": 0.7759541869163513, "learning_rate": 8.355121892327725e-05, "loss": 0.3634244441986084, "step": 39150 }, { "epoch": 0.16812206451834488, "grad_norm": 0.04319796711206436, "learning_rate": 8.354690720315963e-05, "loss": 0.22236130237579346, "step": 39160 }, { "epoch": 0.1681649966083649, "grad_norm": 0.022113563492894173, "learning_rate": 8.354259548304201e-05, "loss": 0.22561120986938477, "step": 39170 }, { "epoch": 0.16820792869838488, "grad_norm": 0.5064992308616638, "learning_rate": 8.353828376292438e-05, "loss": 0.2085291862487793, "step": 39180 }, { "epoch": 0.1682508607884049, "grad_norm": 4.303263187408447, "learning_rate": 8.353397204280676e-05, "loss": 0.1187258243560791, "step": 39190 }, { "epoch": 0.1682937928784249, "grad_norm": 0.09465790539979935, "learning_rate": 8.352966032268914e-05, "loss": 0.20660836696624757, "step": 39200 }, { "epoch": 0.1683367249684449, "grad_norm": 0.19095900654792786, "learning_rate": 8.352534860257152e-05, "loss": 0.2394178867340088, "step": 39210 }, { "epoch": 0.16837965705846492, "grad_norm": 0.12276028841733932, "learning_rate": 8.352103688245388e-05, "loss": 0.35126869678497313, "step": 39220 }, { "epoch": 0.16842258914848493, "grad_norm": 5.452217102050781, "learning_rate": 8.351672516233626e-05, "loss": 0.41213340759277345, "step": 39230 }, { "epoch": 0.16846552123850494, "grad_norm": 0.2814752459526062, "learning_rate": 8.351241344221863e-05, "loss": 0.1482291579246521, "step": 39240 }, { "epoch": 0.16850845332852493, "grad_norm": 0.04191487655043602, "learning_rate": 8.350810172210101e-05, "loss": 0.24670803546905518, "step": 39250 }, { "epoch": 0.16855138541854495, "grad_norm": 0.04874122142791748, "learning_rate": 8.350379000198339e-05, "loss": 0.05260342955589294, "step": 39260 }, { "epoch": 0.16859431750856496, "grad_norm": 0.040154241025447845, "learning_rate": 8.349947828186577e-05, "loss": 0.22768948078155518, "step": 39270 }, { "epoch": 0.16863724959858495, "grad_norm": 0.3587961792945862, "learning_rate": 8.349516656174814e-05, "loss": 0.17796025276184083, "step": 39280 }, { "epoch": 0.16868018168860496, "grad_norm": 0.0058316877111792564, "learning_rate": 8.349085484163052e-05, "loss": 0.3597349405288696, "step": 39290 }, { "epoch": 0.16872311377862498, "grad_norm": 0.5243772864341736, "learning_rate": 8.34865431215129e-05, "loss": 0.2949937582015991, "step": 39300 }, { "epoch": 0.16876604586864496, "grad_norm": 1.1572577953338623, "learning_rate": 8.348223140139528e-05, "loss": 0.28983776569366454, "step": 39310 }, { "epoch": 0.16880897795866498, "grad_norm": 5.315773010253906, "learning_rate": 8.347791968127765e-05, "loss": 0.24558615684509277, "step": 39320 }, { "epoch": 0.168851910048685, "grad_norm": 3.1546738147735596, "learning_rate": 8.347360796116003e-05, "loss": 0.427794361114502, "step": 39330 }, { "epoch": 0.168894842138705, "grad_norm": 14.885223388671875, "learning_rate": 8.346929624104241e-05, "loss": 0.04551592469215393, "step": 39340 }, { "epoch": 0.168937774228725, "grad_norm": 0.022690167650580406, "learning_rate": 8.346498452092479e-05, "loss": 0.21496713161468506, "step": 39350 }, { "epoch": 0.168980706318745, "grad_norm": 0.11703800410032272, "learning_rate": 8.346067280080716e-05, "loss": 0.1862417697906494, "step": 39360 }, { "epoch": 0.16902363840876503, "grad_norm": 1.971624493598938, "learning_rate": 8.345636108068954e-05, "loss": 0.26860053539276124, "step": 39370 }, { "epoch": 0.169066570498785, "grad_norm": 1.7773724794387817, "learning_rate": 8.345204936057192e-05, "loss": 0.5067049026489258, "step": 39380 }, { "epoch": 0.16910950258880503, "grad_norm": 0.8315603733062744, "learning_rate": 8.344773764045428e-05, "loss": 0.27385969161987306, "step": 39390 }, { "epoch": 0.16915243467882504, "grad_norm": 6.316635608673096, "learning_rate": 8.344342592033666e-05, "loss": 0.16933102607727052, "step": 39400 }, { "epoch": 0.16919536676884503, "grad_norm": 0.08984553068876266, "learning_rate": 8.343911420021904e-05, "loss": 0.2484668970108032, "step": 39410 }, { "epoch": 0.16923829885886504, "grad_norm": 3.962224245071411, "learning_rate": 8.343480248010141e-05, "loss": 0.3198500633239746, "step": 39420 }, { "epoch": 0.16928123094888506, "grad_norm": 1.6144697666168213, "learning_rate": 8.343049075998379e-05, "loss": 0.1846301794052124, "step": 39430 }, { "epoch": 0.16932416303890507, "grad_norm": 0.06883388757705688, "learning_rate": 8.342617903986617e-05, "loss": 0.31305603981018065, "step": 39440 }, { "epoch": 0.16936709512892506, "grad_norm": 0.22652801871299744, "learning_rate": 8.342186731974855e-05, "loss": 0.12272937297821045, "step": 39450 }, { "epoch": 0.16941002721894508, "grad_norm": 0.03398356959223747, "learning_rate": 8.341755559963092e-05, "loss": 0.2238314390182495, "step": 39460 }, { "epoch": 0.1694529593089651, "grad_norm": 0.009827976115047932, "learning_rate": 8.341324387951329e-05, "loss": 0.16203200817108154, "step": 39470 }, { "epoch": 0.16949589139898508, "grad_norm": 2.6871118545532227, "learning_rate": 8.340893215939566e-05, "loss": 0.41886377334594727, "step": 39480 }, { "epoch": 0.1695388234890051, "grad_norm": 2.0361788272857666, "learning_rate": 8.340462043927804e-05, "loss": 0.2878458023071289, "step": 39490 }, { "epoch": 0.1695817555790251, "grad_norm": 0.22583113610744476, "learning_rate": 8.340030871916042e-05, "loss": 0.33467190265655516, "step": 39500 }, { "epoch": 0.1696246876690451, "grad_norm": 2.315829277038574, "learning_rate": 8.33959969990428e-05, "loss": 0.20003714561462402, "step": 39510 }, { "epoch": 0.1696676197590651, "grad_norm": 1.986151099205017, "learning_rate": 8.339168527892517e-05, "loss": 0.1555501937866211, "step": 39520 }, { "epoch": 0.16971055184908512, "grad_norm": 0.007451371289789677, "learning_rate": 8.338737355880756e-05, "loss": 0.07800998091697693, "step": 39530 }, { "epoch": 0.1697534839391051, "grad_norm": 21.262826919555664, "learning_rate": 8.338306183868994e-05, "loss": 0.16026850938796997, "step": 39540 }, { "epoch": 0.16979641602912512, "grad_norm": 0.18673354387283325, "learning_rate": 8.33787501185723e-05, "loss": 0.19166061878204346, "step": 39550 }, { "epoch": 0.16983934811914514, "grad_norm": 0.9280945658683777, "learning_rate": 8.337443839845468e-05, "loss": 0.10031145811080933, "step": 39560 }, { "epoch": 0.16988228020916515, "grad_norm": 0.27290984988212585, "learning_rate": 8.337012667833706e-05, "loss": 0.3064888000488281, "step": 39570 }, { "epoch": 0.16992521229918514, "grad_norm": 3.447659730911255, "learning_rate": 8.336581495821944e-05, "loss": 0.21117255687713624, "step": 39580 }, { "epoch": 0.16996814438920516, "grad_norm": 0.550126314163208, "learning_rate": 8.336150323810181e-05, "loss": 0.26300458908081054, "step": 39590 }, { "epoch": 0.17001107647922517, "grad_norm": 0.05337730422616005, "learning_rate": 8.335719151798419e-05, "loss": 0.14356883764266967, "step": 39600 }, { "epoch": 0.17005400856924516, "grad_norm": 2.290454626083374, "learning_rate": 8.335287979786657e-05, "loss": 0.34690542221069337, "step": 39610 }, { "epoch": 0.17009694065926517, "grad_norm": 0.43621301651000977, "learning_rate": 8.334856807774895e-05, "loss": 0.31757278442382814, "step": 39620 }, { "epoch": 0.1701398727492852, "grad_norm": 0.0348827950656414, "learning_rate": 8.334425635763132e-05, "loss": 0.4615351676940918, "step": 39630 }, { "epoch": 0.17018280483930517, "grad_norm": 1.76190984249115, "learning_rate": 8.333994463751369e-05, "loss": 0.31025004386901855, "step": 39640 }, { "epoch": 0.1702257369293252, "grad_norm": 0.04441544786095619, "learning_rate": 8.333563291739607e-05, "loss": 0.1971571445465088, "step": 39650 }, { "epoch": 0.1702686690193452, "grad_norm": 0.07046888023614883, "learning_rate": 8.333132119727844e-05, "loss": 0.27866973876953127, "step": 39660 }, { "epoch": 0.17031160110936522, "grad_norm": 0.023150363937020302, "learning_rate": 8.332700947716082e-05, "loss": 0.14343875646591187, "step": 39670 }, { "epoch": 0.1703545331993852, "grad_norm": 2.077636480331421, "learning_rate": 8.33226977570432e-05, "loss": 0.2821050643920898, "step": 39680 }, { "epoch": 0.17039746528940522, "grad_norm": 1.0875072479248047, "learning_rate": 8.331838603692557e-05, "loss": 0.3242486953735352, "step": 39690 }, { "epoch": 0.17044039737942523, "grad_norm": 2.452104330062866, "learning_rate": 8.331407431680795e-05, "loss": 0.12137858867645264, "step": 39700 }, { "epoch": 0.17048332946944522, "grad_norm": 0.6530572772026062, "learning_rate": 8.330976259669033e-05, "loss": 0.13224581480026246, "step": 39710 }, { "epoch": 0.17052626155946524, "grad_norm": 0.2018723040819168, "learning_rate": 8.330545087657269e-05, "loss": 0.16498252153396606, "step": 39720 }, { "epoch": 0.17056919364948525, "grad_norm": 5.927464962005615, "learning_rate": 8.330113915645507e-05, "loss": 0.37170138359069826, "step": 39730 }, { "epoch": 0.17061212573950524, "grad_norm": 0.03498421981930733, "learning_rate": 8.329682743633745e-05, "loss": 0.28318626880645753, "step": 39740 }, { "epoch": 0.17065505782952525, "grad_norm": 0.20146240293979645, "learning_rate": 8.329251571621984e-05, "loss": 0.3529873609542847, "step": 39750 }, { "epoch": 0.17069798991954527, "grad_norm": 0.021076412871479988, "learning_rate": 8.328820399610222e-05, "loss": 0.20006706714630126, "step": 39760 }, { "epoch": 0.17074092200956528, "grad_norm": 1.7591829299926758, "learning_rate": 8.32838922759846e-05, "loss": 0.2442645788192749, "step": 39770 }, { "epoch": 0.17078385409958527, "grad_norm": 0.038296714425086975, "learning_rate": 8.327958055586697e-05, "loss": 0.06047918200492859, "step": 39780 }, { "epoch": 0.17082678618960528, "grad_norm": 2.6467275619506836, "learning_rate": 8.327526883574935e-05, "loss": 0.2018986940383911, "step": 39790 }, { "epoch": 0.1708697182796253, "grad_norm": 0.1143501028418541, "learning_rate": 8.327095711563171e-05, "loss": 0.18430989980697632, "step": 39800 }, { "epoch": 0.17091265036964529, "grad_norm": 0.04112870246171951, "learning_rate": 8.326664539551409e-05, "loss": 0.448679256439209, "step": 39810 }, { "epoch": 0.1709555824596653, "grad_norm": 1.7692079544067383, "learning_rate": 8.326233367539647e-05, "loss": 0.40442686080932616, "step": 39820 }, { "epoch": 0.17099851454968532, "grad_norm": 33.07231140136719, "learning_rate": 8.325802195527884e-05, "loss": 0.10954853296279907, "step": 39830 }, { "epoch": 0.1710414466397053, "grad_norm": 0.02489142306149006, "learning_rate": 8.325371023516122e-05, "loss": 0.1942327857017517, "step": 39840 }, { "epoch": 0.17108437872972532, "grad_norm": 0.01788198947906494, "learning_rate": 8.32493985150436e-05, "loss": 0.22420060634613037, "step": 39850 }, { "epoch": 0.17112731081974533, "grad_norm": 0.19262222945690155, "learning_rate": 8.324508679492598e-05, "loss": 0.18279441595077514, "step": 39860 }, { "epoch": 0.17117024290976535, "grad_norm": 0.37432318925857544, "learning_rate": 8.324077507480835e-05, "loss": 0.0887138843536377, "step": 39870 }, { "epoch": 0.17121317499978533, "grad_norm": 0.007091057952493429, "learning_rate": 8.323646335469072e-05, "loss": 0.3868217945098877, "step": 39880 }, { "epoch": 0.17125610708980535, "grad_norm": 2.7712514400482178, "learning_rate": 8.32321516345731e-05, "loss": 0.34295656681060793, "step": 39890 }, { "epoch": 0.17129903917982536, "grad_norm": 0.012256702408194542, "learning_rate": 8.322783991445547e-05, "loss": 0.08540812134742737, "step": 39900 }, { "epoch": 0.17134197126984535, "grad_norm": 2.801025867462158, "learning_rate": 8.322352819433785e-05, "loss": 0.30571622848510743, "step": 39910 }, { "epoch": 0.17138490335986536, "grad_norm": 1.7197296619415283, "learning_rate": 8.321921647422023e-05, "loss": 0.5131281852722168, "step": 39920 }, { "epoch": 0.17142783544988538, "grad_norm": 0.019641146063804626, "learning_rate": 8.32149047541026e-05, "loss": 0.22599990367889405, "step": 39930 }, { "epoch": 0.17147076753990537, "grad_norm": 0.03997062146663666, "learning_rate": 8.321059303398498e-05, "loss": 0.15376033782958984, "step": 39940 }, { "epoch": 0.17151369962992538, "grad_norm": 0.41821911931037903, "learning_rate": 8.320628131386736e-05, "loss": 0.23149895668029785, "step": 39950 }, { "epoch": 0.1715566317199454, "grad_norm": 0.30758556723594666, "learning_rate": 8.320196959374972e-05, "loss": 0.14392002820968627, "step": 39960 }, { "epoch": 0.17159956380996538, "grad_norm": 0.23203805088996887, "learning_rate": 8.319765787363211e-05, "loss": 0.5086146831512451, "step": 39970 }, { "epoch": 0.1716424958999854, "grad_norm": 0.42892053723335266, "learning_rate": 8.319334615351449e-05, "loss": 0.20939264297485352, "step": 39980 }, { "epoch": 0.1716854279900054, "grad_norm": 0.9444646239280701, "learning_rate": 8.318903443339687e-05, "loss": 0.2334125280380249, "step": 39990 }, { "epoch": 0.17172836008002543, "grad_norm": 4.157931804656982, "learning_rate": 8.318472271327925e-05, "loss": 0.3207542419433594, "step": 40000 }, { "epoch": 0.17172836008002543, "eval_loss": 0.4570688307285309, "eval_runtime": 27.607, "eval_samples_per_second": 3.622, "eval_steps_per_second": 3.622, "step": 40000 }, { "epoch": 0.17177129217004541, "grad_norm": 4.318746566772461, "learning_rate": 8.318041099316162e-05, "loss": 0.05203125476837158, "step": 40010 }, { "epoch": 0.17181422426006543, "grad_norm": 1.4116801023483276, "learning_rate": 8.3176099273044e-05, "loss": 0.4147486686706543, "step": 40020 }, { "epoch": 0.17185715635008544, "grad_norm": 2.940032958984375, "learning_rate": 8.317178755292638e-05, "loss": 0.28691074848175047, "step": 40030 }, { "epoch": 0.17190008844010543, "grad_norm": 0.46757587790489197, "learning_rate": 8.316747583280875e-05, "loss": 0.25227696895599366, "step": 40040 }, { "epoch": 0.17194302053012545, "grad_norm": 0.18634115159511566, "learning_rate": 8.316316411269112e-05, "loss": 0.007025787234306335, "step": 40050 }, { "epoch": 0.17198595262014546, "grad_norm": 0.1440853774547577, "learning_rate": 8.31588523925735e-05, "loss": 0.23718724250793458, "step": 40060 }, { "epoch": 0.17202888471016545, "grad_norm": 2.4190330505371094, "learning_rate": 8.315454067245587e-05, "loss": 0.39030263423919676, "step": 40070 }, { "epoch": 0.17207181680018546, "grad_norm": 1.9734594821929932, "learning_rate": 8.315022895233825e-05, "loss": 0.3922515630722046, "step": 40080 }, { "epoch": 0.17211474889020548, "grad_norm": 0.6959921717643738, "learning_rate": 8.314591723222063e-05, "loss": 0.3082964181900024, "step": 40090 }, { "epoch": 0.1721576809802255, "grad_norm": 0.06165720522403717, "learning_rate": 8.3141605512103e-05, "loss": 0.17087411880493164, "step": 40100 }, { "epoch": 0.17220061307024548, "grad_norm": 5.1847100257873535, "learning_rate": 8.313729379198538e-05, "loss": 0.1909353256225586, "step": 40110 }, { "epoch": 0.1722435451602655, "grad_norm": 0.07140224426984787, "learning_rate": 8.313298207186776e-05, "loss": 0.3157165050506592, "step": 40120 }, { "epoch": 0.1722864772502855, "grad_norm": 2.0928823947906494, "learning_rate": 8.312867035175012e-05, "loss": 0.22499537467956543, "step": 40130 }, { "epoch": 0.1723294093403055, "grad_norm": 0.9970523118972778, "learning_rate": 8.31243586316325e-05, "loss": 0.3301548004150391, "step": 40140 }, { "epoch": 0.1723723414303255, "grad_norm": 0.13911788165569305, "learning_rate": 8.312004691151488e-05, "loss": 0.07332115769386291, "step": 40150 }, { "epoch": 0.17241527352034552, "grad_norm": 1.103356122970581, "learning_rate": 8.311573519139726e-05, "loss": 0.34607686996459963, "step": 40160 }, { "epoch": 0.1724582056103655, "grad_norm": 0.013140438124537468, "learning_rate": 8.311142347127963e-05, "loss": 0.3559520483016968, "step": 40170 }, { "epoch": 0.17250113770038553, "grad_norm": 3.9121856689453125, "learning_rate": 8.310711175116201e-05, "loss": 0.2703645944595337, "step": 40180 }, { "epoch": 0.17254406979040554, "grad_norm": 1.812775731086731, "learning_rate": 8.310280003104439e-05, "loss": 0.3910071849822998, "step": 40190 }, { "epoch": 0.17258700188042556, "grad_norm": 2.7580201625823975, "learning_rate": 8.309848831092676e-05, "loss": 0.16809067726135254, "step": 40200 }, { "epoch": 0.17262993397044554, "grad_norm": 0.010955448262393475, "learning_rate": 8.309417659080914e-05, "loss": 0.2514191150665283, "step": 40210 }, { "epoch": 0.17267286606046556, "grad_norm": 3.1789658069610596, "learning_rate": 8.308986487069152e-05, "loss": 0.19436899423599244, "step": 40220 }, { "epoch": 0.17271579815048557, "grad_norm": 4.4677252769470215, "learning_rate": 8.30855531505739e-05, "loss": 0.4041276454925537, "step": 40230 }, { "epoch": 0.17275873024050556, "grad_norm": 0.10957568138837814, "learning_rate": 8.308124143045627e-05, "loss": 0.22359180450439453, "step": 40240 }, { "epoch": 0.17280166233052557, "grad_norm": 0.10355505347251892, "learning_rate": 8.307692971033865e-05, "loss": 0.1507003903388977, "step": 40250 }, { "epoch": 0.1728445944205456, "grad_norm": 1.1672568321228027, "learning_rate": 8.307261799022103e-05, "loss": 0.2168051481246948, "step": 40260 }, { "epoch": 0.17288752651056558, "grad_norm": 1.6299830675125122, "learning_rate": 8.30683062701034e-05, "loss": 0.29830398559570315, "step": 40270 }, { "epoch": 0.1729304586005856, "grad_norm": 0.09983634203672409, "learning_rate": 8.306399454998578e-05, "loss": 0.24507391452789307, "step": 40280 }, { "epoch": 0.1729733906906056, "grad_norm": 1.581100583076477, "learning_rate": 8.305968282986815e-05, "loss": 0.2604495048522949, "step": 40290 }, { "epoch": 0.17301632278062562, "grad_norm": 2.6971969604492188, "learning_rate": 8.305537110975052e-05, "loss": 0.23511652946472167, "step": 40300 }, { "epoch": 0.1730592548706456, "grad_norm": 0.9278387427330017, "learning_rate": 8.30510593896329e-05, "loss": 0.2202451229095459, "step": 40310 }, { "epoch": 0.17310218696066562, "grad_norm": 1.2856556177139282, "learning_rate": 8.304674766951528e-05, "loss": 0.09628130197525024, "step": 40320 }, { "epoch": 0.17314511905068564, "grad_norm": 0.8613991141319275, "learning_rate": 8.304243594939766e-05, "loss": 0.17015516757965088, "step": 40330 }, { "epoch": 0.17318805114070562, "grad_norm": 0.4966510236263275, "learning_rate": 8.303812422928003e-05, "loss": 0.2915595293045044, "step": 40340 }, { "epoch": 0.17323098323072564, "grad_norm": 3.0629260540008545, "learning_rate": 8.303381250916241e-05, "loss": 0.26964516639709474, "step": 40350 }, { "epoch": 0.17327391532074565, "grad_norm": 0.004435913171619177, "learning_rate": 8.302950078904479e-05, "loss": 0.13189687728881835, "step": 40360 }, { "epoch": 0.17331684741076564, "grad_norm": 0.012355843558907509, "learning_rate": 8.302518906892717e-05, "loss": 0.12814369201660156, "step": 40370 }, { "epoch": 0.17335977950078565, "grad_norm": 0.001818435383029282, "learning_rate": 8.302087734880953e-05, "loss": 0.15914521217346192, "step": 40380 }, { "epoch": 0.17340271159080567, "grad_norm": 2.8527755737304688, "learning_rate": 8.301656562869191e-05, "loss": 0.3045322418212891, "step": 40390 }, { "epoch": 0.17344564368082566, "grad_norm": 4.89120626449585, "learning_rate": 8.301225390857428e-05, "loss": 0.14866328239440918, "step": 40400 }, { "epoch": 0.17348857577084567, "grad_norm": 1.227921962738037, "learning_rate": 8.300794218845666e-05, "loss": 0.2188586711883545, "step": 40410 }, { "epoch": 0.17353150786086569, "grad_norm": 0.14513230323791504, "learning_rate": 8.300363046833904e-05, "loss": 0.31578736305236815, "step": 40420 }, { "epoch": 0.1735744399508857, "grad_norm": 0.004886118695139885, "learning_rate": 8.299931874822142e-05, "loss": 0.1223494529724121, "step": 40430 }, { "epoch": 0.1736173720409057, "grad_norm": 0.0034907562658190727, "learning_rate": 8.29950070281038e-05, "loss": 0.35157797336578367, "step": 40440 }, { "epoch": 0.1736603041309257, "grad_norm": 0.002745399484410882, "learning_rate": 8.299069530798617e-05, "loss": 0.3651834011077881, "step": 40450 }, { "epoch": 0.17370323622094572, "grad_norm": 1.5831266641616821, "learning_rate": 8.298638358786855e-05, "loss": 0.19076406955718994, "step": 40460 }, { "epoch": 0.1737461683109657, "grad_norm": 0.0015953868860378861, "learning_rate": 8.298207186775093e-05, "loss": 0.25166780948638917, "step": 40470 }, { "epoch": 0.17378910040098572, "grad_norm": 0.03768213838338852, "learning_rate": 8.29777601476333e-05, "loss": 0.32109010219573975, "step": 40480 }, { "epoch": 0.17383203249100573, "grad_norm": 0.5360517501831055, "learning_rate": 8.297344842751568e-05, "loss": 0.23133792877197265, "step": 40490 }, { "epoch": 0.17387496458102572, "grad_norm": 1.6855971813201904, "learning_rate": 8.296913670739806e-05, "loss": 0.22416160106658936, "step": 40500 }, { "epoch": 0.17391789667104574, "grad_norm": 2.100095510482788, "learning_rate": 8.296482498728044e-05, "loss": 0.3099149942398071, "step": 40510 }, { "epoch": 0.17396082876106575, "grad_norm": 0.001875672023743391, "learning_rate": 8.296051326716281e-05, "loss": 0.16080970764160157, "step": 40520 }, { "epoch": 0.17400376085108576, "grad_norm": 0.03504222258925438, "learning_rate": 8.295620154704519e-05, "loss": 0.16347267627716064, "step": 40530 }, { "epoch": 0.17404669294110575, "grad_norm": 0.009600917808711529, "learning_rate": 8.295188982692755e-05, "loss": 0.39733908176422117, "step": 40540 }, { "epoch": 0.17408962503112577, "grad_norm": 2.063370704650879, "learning_rate": 8.294757810680993e-05, "loss": 0.31655449867248536, "step": 40550 }, { "epoch": 0.17413255712114578, "grad_norm": 1.423431634902954, "learning_rate": 8.294326638669231e-05, "loss": 0.42395715713500975, "step": 40560 }, { "epoch": 0.17417548921116577, "grad_norm": 0.05649884417653084, "learning_rate": 8.293895466657469e-05, "loss": 0.06664473414421082, "step": 40570 }, { "epoch": 0.17421842130118578, "grad_norm": 0.06601168215274811, "learning_rate": 8.293464294645706e-05, "loss": 0.2564336538314819, "step": 40580 }, { "epoch": 0.1742613533912058, "grad_norm": 1.3635640144348145, "learning_rate": 8.293033122633944e-05, "loss": 0.2436471700668335, "step": 40590 }, { "epoch": 0.17430428548122578, "grad_norm": 0.6892813444137573, "learning_rate": 8.292601950622182e-05, "loss": 0.1424673914909363, "step": 40600 }, { "epoch": 0.1743472175712458, "grad_norm": 0.30439263582229614, "learning_rate": 8.29217077861042e-05, "loss": 0.2444373369216919, "step": 40610 }, { "epoch": 0.17439014966126581, "grad_norm": 0.3621372580528259, "learning_rate": 8.291739606598656e-05, "loss": 0.12778749465942382, "step": 40620 }, { "epoch": 0.17443308175128583, "grad_norm": 0.1329677700996399, "learning_rate": 8.291308434586894e-05, "loss": 0.18559612035751344, "step": 40630 }, { "epoch": 0.17447601384130582, "grad_norm": 1.1921926736831665, "learning_rate": 8.290877262575131e-05, "loss": 0.2326160192489624, "step": 40640 }, { "epoch": 0.17451894593132583, "grad_norm": 0.9672372937202454, "learning_rate": 8.290446090563369e-05, "loss": 0.4107320308685303, "step": 40650 }, { "epoch": 0.17456187802134585, "grad_norm": 1.4626498222351074, "learning_rate": 8.290014918551607e-05, "loss": 0.2904258489608765, "step": 40660 }, { "epoch": 0.17460481011136583, "grad_norm": 0.010346812196075916, "learning_rate": 8.289583746539845e-05, "loss": 0.10135916471481324, "step": 40670 }, { "epoch": 0.17464774220138585, "grad_norm": 1.992686152458191, "learning_rate": 8.289152574528082e-05, "loss": 0.2161275863647461, "step": 40680 }, { "epoch": 0.17469067429140586, "grad_norm": 0.03004583902657032, "learning_rate": 8.28872140251632e-05, "loss": 0.19164289236068727, "step": 40690 }, { "epoch": 0.17473360638142585, "grad_norm": 0.106281578540802, "learning_rate": 8.288290230504558e-05, "loss": 0.21553289890289307, "step": 40700 }, { "epoch": 0.17477653847144586, "grad_norm": 0.8848221898078918, "learning_rate": 8.287859058492796e-05, "loss": 0.17538909912109374, "step": 40710 }, { "epoch": 0.17481947056146588, "grad_norm": 0.006868957541882992, "learning_rate": 8.287427886481033e-05, "loss": 0.26656954288482665, "step": 40720 }, { "epoch": 0.1748624026514859, "grad_norm": 15.10798454284668, "learning_rate": 8.286996714469271e-05, "loss": 0.4881472587585449, "step": 40730 }, { "epoch": 0.17490533474150588, "grad_norm": 0.7054204344749451, "learning_rate": 8.286565542457509e-05, "loss": 0.2571603536605835, "step": 40740 }, { "epoch": 0.1749482668315259, "grad_norm": 0.016062183305621147, "learning_rate": 8.286134370445746e-05, "loss": 0.2579342365264893, "step": 40750 }, { "epoch": 0.1749911989215459, "grad_norm": 0.19596177339553833, "learning_rate": 8.285703198433984e-05, "loss": 0.29378952980041506, "step": 40760 }, { "epoch": 0.1750341310115659, "grad_norm": 2.484384059906006, "learning_rate": 8.285272026422222e-05, "loss": 0.24021799564361573, "step": 40770 }, { "epoch": 0.1750770631015859, "grad_norm": 0.2761439085006714, "learning_rate": 8.28484085441046e-05, "loss": 0.1571637988090515, "step": 40780 }, { "epoch": 0.17511999519160593, "grad_norm": 2.089289903640747, "learning_rate": 8.284409682398696e-05, "loss": 0.21136255264282228, "step": 40790 }, { "epoch": 0.1751629272816259, "grad_norm": 0.3317461311817169, "learning_rate": 8.283978510386934e-05, "loss": 0.3460866928100586, "step": 40800 }, { "epoch": 0.17520585937164593, "grad_norm": 10.414310455322266, "learning_rate": 8.283547338375172e-05, "loss": 0.2301114559173584, "step": 40810 }, { "epoch": 0.17524879146166594, "grad_norm": 0.1468975692987442, "learning_rate": 8.283116166363409e-05, "loss": 0.32068285942077634, "step": 40820 }, { "epoch": 0.17529172355168593, "grad_norm": 0.01950320042669773, "learning_rate": 8.282684994351647e-05, "loss": 0.10026562213897705, "step": 40830 }, { "epoch": 0.17533465564170594, "grad_norm": 1.38692045211792, "learning_rate": 8.282253822339885e-05, "loss": 0.3668211936950684, "step": 40840 }, { "epoch": 0.17537758773172596, "grad_norm": 0.07779578119516373, "learning_rate": 8.281822650328122e-05, "loss": 0.4057168960571289, "step": 40850 }, { "epoch": 0.17542051982174597, "grad_norm": 0.011414109729230404, "learning_rate": 8.28139147831636e-05, "loss": 0.08641638159751892, "step": 40860 }, { "epoch": 0.17546345191176596, "grad_norm": 0.026972273364663124, "learning_rate": 8.280960306304597e-05, "loss": 0.1714036226272583, "step": 40870 }, { "epoch": 0.17550638400178598, "grad_norm": 0.11325936764478683, "learning_rate": 8.280529134292834e-05, "loss": 0.18394806385040283, "step": 40880 }, { "epoch": 0.175549316091806, "grad_norm": 1.1557916402816772, "learning_rate": 8.280097962281072e-05, "loss": 0.26514892578125, "step": 40890 }, { "epoch": 0.17559224818182598, "grad_norm": 3.6013333797454834, "learning_rate": 8.27966679026931e-05, "loss": 0.19369306564331054, "step": 40900 }, { "epoch": 0.175635180271846, "grad_norm": 1.8386772871017456, "learning_rate": 8.279235618257548e-05, "loss": 0.3296739816665649, "step": 40910 }, { "epoch": 0.175678112361866, "grad_norm": 0.2031298577785492, "learning_rate": 8.278804446245785e-05, "loss": 0.42958965301513674, "step": 40920 }, { "epoch": 0.175721044451886, "grad_norm": 0.014757785946130753, "learning_rate": 8.278373274234023e-05, "loss": 0.10978200435638427, "step": 40930 }, { "epoch": 0.175763976541906, "grad_norm": 0.0178249292075634, "learning_rate": 8.277942102222262e-05, "loss": 0.21592988967895507, "step": 40940 }, { "epoch": 0.17580690863192602, "grad_norm": 0.13518759608268738, "learning_rate": 8.277510930210498e-05, "loss": 0.08380707502365112, "step": 40950 }, { "epoch": 0.17584984072194604, "grad_norm": 4.348465442657471, "learning_rate": 8.277079758198736e-05, "loss": 0.20458722114562988, "step": 40960 }, { "epoch": 0.17589277281196602, "grad_norm": 0.040076449513435364, "learning_rate": 8.276648586186974e-05, "loss": 0.2710048913955688, "step": 40970 }, { "epoch": 0.17593570490198604, "grad_norm": 7.410130977630615, "learning_rate": 8.276217414175212e-05, "loss": 0.584017276763916, "step": 40980 }, { "epoch": 0.17597863699200605, "grad_norm": 0.06982174515724182, "learning_rate": 8.27578624216345e-05, "loss": 0.008809458464384079, "step": 40990 }, { "epoch": 0.17602156908202604, "grad_norm": 0.008431232534348965, "learning_rate": 8.275355070151687e-05, "loss": 0.3506460189819336, "step": 41000 }, { "epoch": 0.17602156908202604, "eval_loss": 0.4541700780391693, "eval_runtime": 27.5582, "eval_samples_per_second": 3.629, "eval_steps_per_second": 3.629, "step": 41000 }, { "epoch": 0.17606450117204606, "grad_norm": 0.009548353031277657, "learning_rate": 8.274923898139925e-05, "loss": 0.2820699453353882, "step": 41010 }, { "epoch": 0.17610743326206607, "grad_norm": 0.009989765472710133, "learning_rate": 8.274492726128163e-05, "loss": 0.25205843448638915, "step": 41020 }, { "epoch": 0.17615036535208606, "grad_norm": 0.23707497119903564, "learning_rate": 8.274061554116399e-05, "loss": 0.263662314414978, "step": 41030 }, { "epoch": 0.17619329744210607, "grad_norm": 0.0967523604631424, "learning_rate": 8.273630382104637e-05, "loss": 0.23872857093811034, "step": 41040 }, { "epoch": 0.1762362295321261, "grad_norm": 12.732280731201172, "learning_rate": 8.273199210092874e-05, "loss": 0.1568984270095825, "step": 41050 }, { "epoch": 0.1762791616221461, "grad_norm": 0.02078324370086193, "learning_rate": 8.272768038081112e-05, "loss": 0.18609501123428346, "step": 41060 }, { "epoch": 0.1763220937121661, "grad_norm": 0.015798605978488922, "learning_rate": 8.27233686606935e-05, "loss": 0.30507678985595704, "step": 41070 }, { "epoch": 0.1763650258021861, "grad_norm": 1.2599973678588867, "learning_rate": 8.271905694057588e-05, "loss": 0.311283016204834, "step": 41080 }, { "epoch": 0.17640795789220612, "grad_norm": 0.40799635648727417, "learning_rate": 8.271474522045825e-05, "loss": 0.3520747423171997, "step": 41090 }, { "epoch": 0.1764508899822261, "grad_norm": 1.6615732908248901, "learning_rate": 8.271043350034063e-05, "loss": 0.2939105987548828, "step": 41100 }, { "epoch": 0.17649382207224612, "grad_norm": 1.2815062999725342, "learning_rate": 8.270612178022301e-05, "loss": 0.28693690299987795, "step": 41110 }, { "epoch": 0.17653675416226614, "grad_norm": 0.01296562422066927, "learning_rate": 8.270181006010537e-05, "loss": 0.12417706251144409, "step": 41120 }, { "epoch": 0.17657968625228612, "grad_norm": 0.043650198727846146, "learning_rate": 8.269749833998775e-05, "loss": 0.48966689109802247, "step": 41130 }, { "epoch": 0.17662261834230614, "grad_norm": 1.0135577917099, "learning_rate": 8.269318661987013e-05, "loss": 0.11902866363525391, "step": 41140 }, { "epoch": 0.17666555043232615, "grad_norm": 0.048548776656389236, "learning_rate": 8.26888748997525e-05, "loss": 0.21979255676269532, "step": 41150 }, { "epoch": 0.17670848252234617, "grad_norm": 8.79423713684082, "learning_rate": 8.26845631796349e-05, "loss": 0.09572447538375854, "step": 41160 }, { "epoch": 0.17675141461236615, "grad_norm": 1.3120108842849731, "learning_rate": 8.268025145951727e-05, "loss": 0.3123744487762451, "step": 41170 }, { "epoch": 0.17679434670238617, "grad_norm": 0.3405528962612152, "learning_rate": 8.267593973939965e-05, "loss": 0.14704158306121826, "step": 41180 }, { "epoch": 0.17683727879240618, "grad_norm": 0.2074926495552063, "learning_rate": 8.267162801928203e-05, "loss": 0.28987486362457277, "step": 41190 }, { "epoch": 0.17688021088242617, "grad_norm": 0.663354754447937, "learning_rate": 8.266731629916439e-05, "loss": 0.07300347089767456, "step": 41200 }, { "epoch": 0.17692314297244618, "grad_norm": 0.36614349484443665, "learning_rate": 8.266300457904677e-05, "loss": 0.2700439214706421, "step": 41210 }, { "epoch": 0.1769660750624662, "grad_norm": 2.989150047302246, "learning_rate": 8.265869285892915e-05, "loss": 0.1963904857635498, "step": 41220 }, { "epoch": 0.1770090071524862, "grad_norm": 0.02262544445693493, "learning_rate": 8.265438113881152e-05, "loss": 0.1728546142578125, "step": 41230 }, { "epoch": 0.1770519392425062, "grad_norm": 3.4345920085906982, "learning_rate": 8.26500694186939e-05, "loss": 0.4857645988464355, "step": 41240 }, { "epoch": 0.17709487133252622, "grad_norm": 0.12471339851617813, "learning_rate": 8.264575769857628e-05, "loss": 0.24559836387634276, "step": 41250 }, { "epoch": 0.1771378034225462, "grad_norm": 2.662829875946045, "learning_rate": 8.264144597845865e-05, "loss": 0.25598394870758057, "step": 41260 }, { "epoch": 0.17718073551256622, "grad_norm": 1.2927974462509155, "learning_rate": 8.263713425834103e-05, "loss": 0.31288459300994875, "step": 41270 }, { "epoch": 0.17722366760258623, "grad_norm": 1.086735486984253, "learning_rate": 8.26328225382234e-05, "loss": 0.4813650608062744, "step": 41280 }, { "epoch": 0.17726659969260625, "grad_norm": 3.4481236934661865, "learning_rate": 8.262851081810577e-05, "loss": 0.204862380027771, "step": 41290 }, { "epoch": 0.17730953178262623, "grad_norm": 0.015600155107676983, "learning_rate": 8.262419909798815e-05, "loss": 0.31542177200317384, "step": 41300 }, { "epoch": 0.17735246387264625, "grad_norm": 1.3220545053482056, "learning_rate": 8.261988737787053e-05, "loss": 0.3501570224761963, "step": 41310 }, { "epoch": 0.17739539596266626, "grad_norm": 0.05844166874885559, "learning_rate": 8.26155756577529e-05, "loss": 0.10675265789031982, "step": 41320 }, { "epoch": 0.17743832805268625, "grad_norm": 0.009374409914016724, "learning_rate": 8.261126393763528e-05, "loss": 0.21986827850341797, "step": 41330 }, { "epoch": 0.17748126014270627, "grad_norm": 0.1090593934059143, "learning_rate": 8.260695221751766e-05, "loss": 0.28274610042572024, "step": 41340 }, { "epoch": 0.17752419223272628, "grad_norm": 4.326338768005371, "learning_rate": 8.260264049740004e-05, "loss": 0.14196935892105103, "step": 41350 }, { "epoch": 0.17756712432274627, "grad_norm": 0.08671343326568604, "learning_rate": 8.25983287772824e-05, "loss": 0.43413190841674804, "step": 41360 }, { "epoch": 0.17761005641276628, "grad_norm": 1.5184999704360962, "learning_rate": 8.259401705716478e-05, "loss": 0.2877436637878418, "step": 41370 }, { "epoch": 0.1776529885027863, "grad_norm": 0.7909638285636902, "learning_rate": 8.258970533704717e-05, "loss": 0.26583013534545896, "step": 41380 }, { "epoch": 0.1776959205928063, "grad_norm": 2.740144729614258, "learning_rate": 8.258539361692955e-05, "loss": 0.27145915031433104, "step": 41390 }, { "epoch": 0.1777388526828263, "grad_norm": 2.0741074085235596, "learning_rate": 8.258108189681192e-05, "loss": 0.28565273284912107, "step": 41400 }, { "epoch": 0.1777817847728463, "grad_norm": 0.8548224568367004, "learning_rate": 8.25767701766943e-05, "loss": 0.15239741802215576, "step": 41410 }, { "epoch": 0.17782471686286633, "grad_norm": 0.21049945056438446, "learning_rate": 8.257245845657668e-05, "loss": 0.1537050724029541, "step": 41420 }, { "epoch": 0.17786764895288631, "grad_norm": 0.023179372772574425, "learning_rate": 8.256814673645906e-05, "loss": 0.19055650234222413, "step": 41430 }, { "epoch": 0.17791058104290633, "grad_norm": 1.333984613418579, "learning_rate": 8.256383501634143e-05, "loss": 0.45396127700805666, "step": 41440 }, { "epoch": 0.17795351313292634, "grad_norm": 0.789638102054596, "learning_rate": 8.25595232962238e-05, "loss": 0.19562928676605223, "step": 41450 }, { "epoch": 0.17799644522294633, "grad_norm": 0.018816199153661728, "learning_rate": 8.255521157610617e-05, "loss": 0.20530054569244385, "step": 41460 }, { "epoch": 0.17803937731296635, "grad_norm": 0.03177690878510475, "learning_rate": 8.255089985598855e-05, "loss": 0.2804241180419922, "step": 41470 }, { "epoch": 0.17808230940298636, "grad_norm": 0.057153262197971344, "learning_rate": 8.254658813587093e-05, "loss": 0.0474990576505661, "step": 41480 }, { "epoch": 0.17812524149300638, "grad_norm": 0.4023728370666504, "learning_rate": 8.25422764157533e-05, "loss": 0.011729182302951812, "step": 41490 }, { "epoch": 0.17816817358302636, "grad_norm": 0.3172079622745514, "learning_rate": 8.253796469563568e-05, "loss": 0.29478228092193604, "step": 41500 }, { "epoch": 0.17821110567304638, "grad_norm": 0.0051256525330245495, "learning_rate": 8.253365297551806e-05, "loss": 0.1952364444732666, "step": 41510 }, { "epoch": 0.1782540377630664, "grad_norm": 0.07210695743560791, "learning_rate": 8.252934125540044e-05, "loss": 0.17813553810119628, "step": 41520 }, { "epoch": 0.17829696985308638, "grad_norm": 1.6913539171218872, "learning_rate": 8.25250295352828e-05, "loss": 0.29789347648620607, "step": 41530 }, { "epoch": 0.1783399019431064, "grad_norm": 0.7843422293663025, "learning_rate": 8.252071781516518e-05, "loss": 0.07679291367530823, "step": 41540 }, { "epoch": 0.1783828340331264, "grad_norm": 0.1356504112482071, "learning_rate": 8.251640609504756e-05, "loss": 0.14666395187377929, "step": 41550 }, { "epoch": 0.1784257661231464, "grad_norm": 4.225996971130371, "learning_rate": 8.251209437492993e-05, "loss": 0.26961095333099366, "step": 41560 }, { "epoch": 0.1784686982131664, "grad_norm": 5.507353782653809, "learning_rate": 8.250778265481231e-05, "loss": 0.42926778793334963, "step": 41570 }, { "epoch": 0.17851163030318642, "grad_norm": 0.029965883120894432, "learning_rate": 8.250347093469469e-05, "loss": 0.15851476192474365, "step": 41580 }, { "epoch": 0.17855456239320644, "grad_norm": 4.527895450592041, "learning_rate": 8.249915921457707e-05, "loss": 0.3978370428085327, "step": 41590 }, { "epoch": 0.17859749448322643, "grad_norm": 0.023621153086423874, "learning_rate": 8.249484749445944e-05, "loss": 0.04133687913417816, "step": 41600 }, { "epoch": 0.17864042657324644, "grad_norm": 0.011967599391937256, "learning_rate": 8.249053577434182e-05, "loss": 0.20162932872772216, "step": 41610 }, { "epoch": 0.17868335866326646, "grad_norm": 0.05433309078216553, "learning_rate": 8.24862240542242e-05, "loss": 0.21610791683197023, "step": 41620 }, { "epoch": 0.17872629075328644, "grad_norm": 0.5024532675743103, "learning_rate": 8.248191233410658e-05, "loss": 0.25946621894836425, "step": 41630 }, { "epoch": 0.17876922284330646, "grad_norm": 0.04332248494029045, "learning_rate": 8.247760061398895e-05, "loss": 0.2946115255355835, "step": 41640 }, { "epoch": 0.17881215493332647, "grad_norm": 0.04508750140666962, "learning_rate": 8.247328889387133e-05, "loss": 0.27277469635009766, "step": 41650 }, { "epoch": 0.17885508702334646, "grad_norm": 0.09932596236467361, "learning_rate": 8.246897717375371e-05, "loss": 0.13800952434539795, "step": 41660 }, { "epoch": 0.17889801911336647, "grad_norm": 2.0395140647888184, "learning_rate": 8.246466545363609e-05, "loss": 0.4774670124053955, "step": 41670 }, { "epoch": 0.1789409512033865, "grad_norm": 0.00492170499637723, "learning_rate": 8.246035373351846e-05, "loss": 0.06394680142402649, "step": 41680 }, { "epoch": 0.17898388329340648, "grad_norm": 5.37308931350708, "learning_rate": 8.245604201340083e-05, "loss": 0.22611804008483888, "step": 41690 }, { "epoch": 0.1790268153834265, "grad_norm": 0.35247814655303955, "learning_rate": 8.24517302932832e-05, "loss": 0.133196222782135, "step": 41700 }, { "epoch": 0.1790697474734465, "grad_norm": 0.07962910085916519, "learning_rate": 8.244741857316558e-05, "loss": 0.02865527868270874, "step": 41710 }, { "epoch": 0.17911267956346652, "grad_norm": 0.22916433215141296, "learning_rate": 8.244310685304796e-05, "loss": 0.3877936124801636, "step": 41720 }, { "epoch": 0.1791556116534865, "grad_norm": 0.14095786213874817, "learning_rate": 8.243879513293034e-05, "loss": 0.15569915771484374, "step": 41730 }, { "epoch": 0.17919854374350652, "grad_norm": 2.432192325592041, "learning_rate": 8.243448341281271e-05, "loss": 0.13069474697113037, "step": 41740 }, { "epoch": 0.17924147583352654, "grad_norm": 0.006407095119357109, "learning_rate": 8.243017169269509e-05, "loss": 0.36841678619384766, "step": 41750 }, { "epoch": 0.17928440792354652, "grad_norm": 2.003530263900757, "learning_rate": 8.242585997257747e-05, "loss": 0.3995458364486694, "step": 41760 }, { "epoch": 0.17932734001356654, "grad_norm": 2.8348910808563232, "learning_rate": 8.242154825245983e-05, "loss": 0.33652119636535643, "step": 41770 }, { "epoch": 0.17937027210358655, "grad_norm": 2.0822720527648926, "learning_rate": 8.241723653234221e-05, "loss": 0.3451982498168945, "step": 41780 }, { "epoch": 0.17941320419360654, "grad_norm": 1.157434105873108, "learning_rate": 8.241292481222459e-05, "loss": 0.2674999475479126, "step": 41790 }, { "epoch": 0.17945613628362656, "grad_norm": 0.003249003551900387, "learning_rate": 8.240861309210696e-05, "loss": 0.4022883415222168, "step": 41800 }, { "epoch": 0.17949906837364657, "grad_norm": 0.005270634777843952, "learning_rate": 8.240430137198934e-05, "loss": 0.17502715587615966, "step": 41810 }, { "epoch": 0.17954200046366658, "grad_norm": 0.00106601242441684, "learning_rate": 8.239998965187172e-05, "loss": 0.19975433349609376, "step": 41820 }, { "epoch": 0.17958493255368657, "grad_norm": 0.0015999526949599385, "learning_rate": 8.23956779317541e-05, "loss": 0.07730457186698914, "step": 41830 }, { "epoch": 0.1796278646437066, "grad_norm": 1.591696858406067, "learning_rate": 8.239136621163647e-05, "loss": 0.2634559631347656, "step": 41840 }, { "epoch": 0.1796707967337266, "grad_norm": 0.1443047970533371, "learning_rate": 8.238705449151885e-05, "loss": 0.09159661531448364, "step": 41850 }, { "epoch": 0.1797137288237466, "grad_norm": 2.106316089630127, "learning_rate": 8.238274277140123e-05, "loss": 0.34165334701538086, "step": 41860 }, { "epoch": 0.1797566609137666, "grad_norm": 0.41624924540519714, "learning_rate": 8.23784310512836e-05, "loss": 0.16944279670715331, "step": 41870 }, { "epoch": 0.17979959300378662, "grad_norm": 0.09014492481946945, "learning_rate": 8.237411933116598e-05, "loss": 0.32366485595703126, "step": 41880 }, { "epoch": 0.1798425250938066, "grad_norm": 3.917996883392334, "learning_rate": 8.236980761104836e-05, "loss": 0.3025381565093994, "step": 41890 }, { "epoch": 0.17988545718382662, "grad_norm": 0.019549962133169174, "learning_rate": 8.236549589093074e-05, "loss": 0.13743315935134887, "step": 41900 }, { "epoch": 0.17992838927384663, "grad_norm": 0.008352905511856079, "learning_rate": 8.236118417081311e-05, "loss": 0.16161361932754517, "step": 41910 }, { "epoch": 0.17997132136386665, "grad_norm": 0.2613341808319092, "learning_rate": 8.235687245069549e-05, "loss": 0.1621633529663086, "step": 41920 }, { "epoch": 0.18001425345388664, "grad_norm": 0.011575527489185333, "learning_rate": 8.235256073057787e-05, "loss": 0.3463058233261108, "step": 41930 }, { "epoch": 0.18005718554390665, "grad_norm": 0.03169770911335945, "learning_rate": 8.234824901046023e-05, "loss": 0.22944414615631104, "step": 41940 }, { "epoch": 0.18010011763392667, "grad_norm": 2.3105247020721436, "learning_rate": 8.234393729034261e-05, "loss": 0.33264400959014895, "step": 41950 }, { "epoch": 0.18014304972394665, "grad_norm": 2.5481674671173096, "learning_rate": 8.233962557022499e-05, "loss": 0.1760340452194214, "step": 41960 }, { "epoch": 0.18018598181396667, "grad_norm": 0.025155572220683098, "learning_rate": 8.233531385010736e-05, "loss": 0.17142497301101683, "step": 41970 }, { "epoch": 0.18022891390398668, "grad_norm": 0.008635635487735271, "learning_rate": 8.233100212998974e-05, "loss": 0.06564597487449646, "step": 41980 }, { "epoch": 0.18027184599400667, "grad_norm": 5.458888530731201, "learning_rate": 8.232669040987212e-05, "loss": 0.2626389741897583, "step": 41990 }, { "epoch": 0.18031477808402668, "grad_norm": 0.04839470610022545, "learning_rate": 8.23223786897545e-05, "loss": 0.10072095394134521, "step": 42000 }, { "epoch": 0.18031477808402668, "eval_loss": 0.4729278087615967, "eval_runtime": 27.4307, "eval_samples_per_second": 3.646, "eval_steps_per_second": 3.646, "step": 42000 }, { "epoch": 0.1803577101740467, "grad_norm": 1.669901967048645, "learning_rate": 8.231806696963687e-05, "loss": 0.20919899940490722, "step": 42010 }, { "epoch": 0.1804006422640667, "grad_norm": 0.21869534254074097, "learning_rate": 8.231375524951924e-05, "loss": 0.42443008422851564, "step": 42020 }, { "epoch": 0.1804435743540867, "grad_norm": 0.06950627267360687, "learning_rate": 8.230944352940162e-05, "loss": 0.2566273927688599, "step": 42030 }, { "epoch": 0.18048650644410671, "grad_norm": 0.002647142857313156, "learning_rate": 8.230513180928399e-05, "loss": 0.3426970958709717, "step": 42040 }, { "epoch": 0.18052943853412673, "grad_norm": 0.2864302396774292, "learning_rate": 8.230082008916637e-05, "loss": 0.18864575624465943, "step": 42050 }, { "epoch": 0.18057237062414672, "grad_norm": 0.0666997879743576, "learning_rate": 8.229650836904875e-05, "loss": 0.49730625152587893, "step": 42060 }, { "epoch": 0.18061530271416673, "grad_norm": 0.04434436559677124, "learning_rate": 8.229219664893112e-05, "loss": 0.22104194164276122, "step": 42070 }, { "epoch": 0.18065823480418675, "grad_norm": 0.15641604363918304, "learning_rate": 8.22878849288135e-05, "loss": 0.22287838459014891, "step": 42080 }, { "epoch": 0.18070116689420673, "grad_norm": 0.672537088394165, "learning_rate": 8.228357320869588e-05, "loss": 0.3182207584381104, "step": 42090 }, { "epoch": 0.18074409898422675, "grad_norm": 0.039332207292318344, "learning_rate": 8.227926148857826e-05, "loss": 0.27091445922851565, "step": 42100 }, { "epoch": 0.18078703107424676, "grad_norm": 0.060980744659900665, "learning_rate": 8.227494976846063e-05, "loss": 0.11896921396255493, "step": 42110 }, { "epoch": 0.18082996316426675, "grad_norm": 0.03762351721525192, "learning_rate": 8.227063804834301e-05, "loss": 0.15179141759872436, "step": 42120 }, { "epoch": 0.18087289525428676, "grad_norm": 0.38810792565345764, "learning_rate": 8.226632632822539e-05, "loss": 0.19824562072753907, "step": 42130 }, { "epoch": 0.18091582734430678, "grad_norm": 0.01573784649372101, "learning_rate": 8.226201460810777e-05, "loss": 0.30882740020751953, "step": 42140 }, { "epoch": 0.1809587594343268, "grad_norm": 0.029129816219210625, "learning_rate": 8.225770288799014e-05, "loss": 0.2297649383544922, "step": 42150 }, { "epoch": 0.18100169152434678, "grad_norm": 6.418994426727295, "learning_rate": 8.225339116787252e-05, "loss": 0.3056930065155029, "step": 42160 }, { "epoch": 0.1810446236143668, "grad_norm": 0.07934199273586273, "learning_rate": 8.22490794477549e-05, "loss": 0.30078830718994143, "step": 42170 }, { "epoch": 0.1810875557043868, "grad_norm": 1.3679813146591187, "learning_rate": 8.224476772763728e-05, "loss": 0.2010509490966797, "step": 42180 }, { "epoch": 0.1811304877944068, "grad_norm": 2.9077062606811523, "learning_rate": 8.224045600751964e-05, "loss": 0.11555584669113159, "step": 42190 }, { "epoch": 0.1811734198844268, "grad_norm": 0.609990119934082, "learning_rate": 8.223614428740202e-05, "loss": 0.14299447536468507, "step": 42200 }, { "epoch": 0.18121635197444683, "grad_norm": 0.04955404996871948, "learning_rate": 8.22318325672844e-05, "loss": 0.15406620502471924, "step": 42210 }, { "epoch": 0.1812592840644668, "grad_norm": 2.1297833919525146, "learning_rate": 8.222752084716677e-05, "loss": 0.31810736656188965, "step": 42220 }, { "epoch": 0.18130221615448683, "grad_norm": 0.11107069253921509, "learning_rate": 8.222320912704915e-05, "loss": 0.2613961696624756, "step": 42230 }, { "epoch": 0.18134514824450684, "grad_norm": 0.04765243083238602, "learning_rate": 8.221889740693153e-05, "loss": 0.12535251379013063, "step": 42240 }, { "epoch": 0.18138808033452686, "grad_norm": 22.591459274291992, "learning_rate": 8.22145856868139e-05, "loss": 0.2527450084686279, "step": 42250 }, { "epoch": 0.18143101242454684, "grad_norm": 0.5352850556373596, "learning_rate": 8.221027396669628e-05, "loss": 0.43744745254516604, "step": 42260 }, { "epoch": 0.18147394451456686, "grad_norm": 0.6301557421684265, "learning_rate": 8.220596224657864e-05, "loss": 0.1265001654624939, "step": 42270 }, { "epoch": 0.18151687660458687, "grad_norm": 0.7245357036590576, "learning_rate": 8.220165052646102e-05, "loss": 0.137465763092041, "step": 42280 }, { "epoch": 0.18155980869460686, "grad_norm": 0.04410898685455322, "learning_rate": 8.21973388063434e-05, "loss": 0.353134298324585, "step": 42290 }, { "epoch": 0.18160274078462688, "grad_norm": 0.023124389350414276, "learning_rate": 8.219302708622578e-05, "loss": 0.08382171988487244, "step": 42300 }, { "epoch": 0.1816456728746469, "grad_norm": 0.9598243236541748, "learning_rate": 8.218871536610815e-05, "loss": 0.09931755065917969, "step": 42310 }, { "epoch": 0.18168860496466688, "grad_norm": 0.006847964599728584, "learning_rate": 8.218440364599053e-05, "loss": 0.18975690603256226, "step": 42320 }, { "epoch": 0.1817315370546869, "grad_norm": 1.4635645151138306, "learning_rate": 8.218009192587291e-05, "loss": 0.18958462476730348, "step": 42330 }, { "epoch": 0.1817744691447069, "grad_norm": 6.285202503204346, "learning_rate": 8.21757802057553e-05, "loss": 0.4254147052764893, "step": 42340 }, { "epoch": 0.18181740123472692, "grad_norm": 1.0863999128341675, "learning_rate": 8.217146848563766e-05, "loss": 0.4314442157745361, "step": 42350 }, { "epoch": 0.1818603333247469, "grad_norm": 0.8314317464828491, "learning_rate": 8.216715676552004e-05, "loss": 0.3709148168563843, "step": 42360 }, { "epoch": 0.18190326541476692, "grad_norm": 0.7620775699615479, "learning_rate": 8.216284504540242e-05, "loss": 0.19915242195129396, "step": 42370 }, { "epoch": 0.18194619750478694, "grad_norm": 1.8169580698013306, "learning_rate": 8.21585333252848e-05, "loss": 0.39284143447875974, "step": 42380 }, { "epoch": 0.18198912959480693, "grad_norm": 0.15456518530845642, "learning_rate": 8.215422160516717e-05, "loss": 0.2430192708969116, "step": 42390 }, { "epoch": 0.18203206168482694, "grad_norm": 0.012154005467891693, "learning_rate": 8.214990988504955e-05, "loss": 0.22760634422302245, "step": 42400 }, { "epoch": 0.18207499377484695, "grad_norm": 0.19953225553035736, "learning_rate": 8.214559816493193e-05, "loss": 0.17155539989471436, "step": 42410 }, { "epoch": 0.18211792586486694, "grad_norm": 0.028711501508951187, "learning_rate": 8.21412864448143e-05, "loss": 0.12059812545776367, "step": 42420 }, { "epoch": 0.18216085795488696, "grad_norm": 5.916935443878174, "learning_rate": 8.213697472469667e-05, "loss": 0.11507797241210938, "step": 42430 }, { "epoch": 0.18220379004490697, "grad_norm": 0.39378035068511963, "learning_rate": 8.213266300457905e-05, "loss": 0.6166230201721191, "step": 42440 }, { "epoch": 0.182246722134927, "grad_norm": 1.32924222946167, "learning_rate": 8.212835128446142e-05, "loss": 0.2679696798324585, "step": 42450 }, { "epoch": 0.18228965422494697, "grad_norm": 0.06272288411855698, "learning_rate": 8.21240395643438e-05, "loss": 0.29037282466888426, "step": 42460 }, { "epoch": 0.182332586314967, "grad_norm": 0.163888618350029, "learning_rate": 8.211972784422618e-05, "loss": 0.0659745216369629, "step": 42470 }, { "epoch": 0.182375518404987, "grad_norm": 0.491892546415329, "learning_rate": 8.211541612410856e-05, "loss": 0.2176523208618164, "step": 42480 }, { "epoch": 0.182418450495007, "grad_norm": 0.5934542417526245, "learning_rate": 8.211110440399093e-05, "loss": 0.24393646717071532, "step": 42490 }, { "epoch": 0.182461382585027, "grad_norm": 2.1584486961364746, "learning_rate": 8.210679268387331e-05, "loss": 0.3037613868713379, "step": 42500 }, { "epoch": 0.18250431467504702, "grad_norm": 0.009108933620154858, "learning_rate": 8.210248096375567e-05, "loss": 0.15713260173797608, "step": 42510 }, { "epoch": 0.182547246765067, "grad_norm": 0.4322991669178009, "learning_rate": 8.209816924363805e-05, "loss": 0.17076945304870605, "step": 42520 }, { "epoch": 0.18259017885508702, "grad_norm": 0.08237133920192719, "learning_rate": 8.209385752352043e-05, "loss": 0.21980366706848145, "step": 42530 }, { "epoch": 0.18263311094510704, "grad_norm": 8.105067253112793, "learning_rate": 8.20895458034028e-05, "loss": 0.2191450834274292, "step": 42540 }, { "epoch": 0.18267604303512702, "grad_norm": 0.0717390924692154, "learning_rate": 8.208523408328518e-05, "loss": 0.39773619174957275, "step": 42550 }, { "epoch": 0.18271897512514704, "grad_norm": 4.677486896514893, "learning_rate": 8.208092236316757e-05, "loss": 0.15335140228271485, "step": 42560 }, { "epoch": 0.18276190721516705, "grad_norm": 4.2845377922058105, "learning_rate": 8.207661064304995e-05, "loss": 0.37812421321868894, "step": 42570 }, { "epoch": 0.18280483930518707, "grad_norm": 10.023009300231934, "learning_rate": 8.207229892293233e-05, "loss": 0.22293882369995116, "step": 42580 }, { "epoch": 0.18284777139520705, "grad_norm": 0.15922299027442932, "learning_rate": 8.20679872028147e-05, "loss": 0.21837432384490968, "step": 42590 }, { "epoch": 0.18289070348522707, "grad_norm": 2.1236989498138428, "learning_rate": 8.206367548269707e-05, "loss": 0.24211366176605226, "step": 42600 }, { "epoch": 0.18293363557524708, "grad_norm": 0.8039203882217407, "learning_rate": 8.205936376257945e-05, "loss": 0.28853487968444824, "step": 42610 }, { "epoch": 0.18297656766526707, "grad_norm": 0.020852619782090187, "learning_rate": 8.205505204246182e-05, "loss": 0.23580570220947267, "step": 42620 }, { "epoch": 0.18301949975528709, "grad_norm": 0.0024098677095025778, "learning_rate": 8.20507403223442e-05, "loss": 0.03820265829563141, "step": 42630 }, { "epoch": 0.1830624318453071, "grad_norm": 0.018851445987820625, "learning_rate": 8.204642860222658e-05, "loss": 0.15569354295730592, "step": 42640 }, { "epoch": 0.1831053639353271, "grad_norm": 1.3270905017852783, "learning_rate": 8.204211688210896e-05, "loss": 0.3316626071929932, "step": 42650 }, { "epoch": 0.1831482960253471, "grad_norm": 0.7571421265602112, "learning_rate": 8.203780516199133e-05, "loss": 0.1260097622871399, "step": 42660 }, { "epoch": 0.18319122811536712, "grad_norm": 0.11175458878278732, "learning_rate": 8.203349344187371e-05, "loss": 0.2821631908416748, "step": 42670 }, { "epoch": 0.18323416020538713, "grad_norm": 2.5466408729553223, "learning_rate": 8.202918172175607e-05, "loss": 0.1871044158935547, "step": 42680 }, { "epoch": 0.18327709229540712, "grad_norm": 1.2957346439361572, "learning_rate": 8.202487000163845e-05, "loss": 0.30394940376281737, "step": 42690 }, { "epoch": 0.18332002438542713, "grad_norm": 0.023146262392401695, "learning_rate": 8.202055828152083e-05, "loss": 0.1928611397743225, "step": 42700 }, { "epoch": 0.18336295647544715, "grad_norm": 0.003472856944426894, "learning_rate": 8.201624656140321e-05, "loss": 0.2598503351211548, "step": 42710 }, { "epoch": 0.18340588856546713, "grad_norm": 0.004443190060555935, "learning_rate": 8.201193484128558e-05, "loss": 0.20475189685821532, "step": 42720 }, { "epoch": 0.18344882065548715, "grad_norm": 0.41619807481765747, "learning_rate": 8.200762312116796e-05, "loss": 0.2871143102645874, "step": 42730 }, { "epoch": 0.18349175274550716, "grad_norm": 9.765052795410156, "learning_rate": 8.200331140105034e-05, "loss": 0.4204115867614746, "step": 42740 }, { "epoch": 0.18353468483552715, "grad_norm": 0.0008733683498576283, "learning_rate": 8.199899968093272e-05, "loss": 0.2748641729354858, "step": 42750 }, { "epoch": 0.18357761692554717, "grad_norm": 0.3988295793533325, "learning_rate": 8.199468796081508e-05, "loss": 0.3808812141418457, "step": 42760 }, { "epoch": 0.18362054901556718, "grad_norm": 0.009889461100101471, "learning_rate": 8.199037624069746e-05, "loss": 0.16420258283615113, "step": 42770 }, { "epoch": 0.1836634811055872, "grad_norm": 0.004972816910594702, "learning_rate": 8.198606452057985e-05, "loss": 0.19041136503219605, "step": 42780 }, { "epoch": 0.18370641319560718, "grad_norm": 0.03542853519320488, "learning_rate": 8.198175280046223e-05, "loss": 0.10970422029495239, "step": 42790 }, { "epoch": 0.1837493452856272, "grad_norm": 2.3165123462677, "learning_rate": 8.19774410803446e-05, "loss": 0.18427956104278564, "step": 42800 }, { "epoch": 0.1837922773756472, "grad_norm": 0.012739105150103569, "learning_rate": 8.197312936022698e-05, "loss": 0.1309381127357483, "step": 42810 }, { "epoch": 0.1838352094656672, "grad_norm": 0.005955998320132494, "learning_rate": 8.196881764010936e-05, "loss": 0.20765841007232666, "step": 42820 }, { "epoch": 0.1838781415556872, "grad_norm": 0.0022688531316816807, "learning_rate": 8.196450591999174e-05, "loss": 0.3057706356048584, "step": 42830 }, { "epoch": 0.18392107364570723, "grad_norm": 0.004292634781450033, "learning_rate": 8.19601941998741e-05, "loss": 0.2799015283584595, "step": 42840 }, { "epoch": 0.18396400573572722, "grad_norm": 2.1287357807159424, "learning_rate": 8.195588247975648e-05, "loss": 0.17402925491333007, "step": 42850 }, { "epoch": 0.18400693782574723, "grad_norm": 0.19822724163532257, "learning_rate": 8.195157075963885e-05, "loss": 0.3075894594192505, "step": 42860 }, { "epoch": 0.18404986991576724, "grad_norm": 0.3585273325443268, "learning_rate": 8.194725903952123e-05, "loss": 0.07946454286575318, "step": 42870 }, { "epoch": 0.18409280200578726, "grad_norm": 2.302215814590454, "learning_rate": 8.194294731940361e-05, "loss": 0.36985416412353517, "step": 42880 }, { "epoch": 0.18413573409580725, "grad_norm": 0.5702426433563232, "learning_rate": 8.193863559928599e-05, "loss": 0.371164870262146, "step": 42890 }, { "epoch": 0.18417866618582726, "grad_norm": 3.3670296669006348, "learning_rate": 8.193432387916836e-05, "loss": 0.2882568359375, "step": 42900 }, { "epoch": 0.18422159827584728, "grad_norm": 0.10460800677537918, "learning_rate": 8.193001215905074e-05, "loss": 0.15762473344802858, "step": 42910 }, { "epoch": 0.18426453036586726, "grad_norm": 0.09293296188116074, "learning_rate": 8.192570043893312e-05, "loss": 0.28543522357940676, "step": 42920 }, { "epoch": 0.18430746245588728, "grad_norm": 1.2589787244796753, "learning_rate": 8.192138871881548e-05, "loss": 0.11235880851745605, "step": 42930 }, { "epoch": 0.1843503945459073, "grad_norm": 0.021063808351755142, "learning_rate": 8.191707699869786e-05, "loss": 0.3688133478164673, "step": 42940 }, { "epoch": 0.18439332663592728, "grad_norm": 4.861030101776123, "learning_rate": 8.191276527858024e-05, "loss": 0.16458606719970703, "step": 42950 }, { "epoch": 0.1844362587259473, "grad_norm": 0.6773809790611267, "learning_rate": 8.190845355846261e-05, "loss": 0.4682401180267334, "step": 42960 }, { "epoch": 0.1844791908159673, "grad_norm": 0.4810944199562073, "learning_rate": 8.190414183834499e-05, "loss": 0.28884458541870117, "step": 42970 }, { "epoch": 0.1845221229059873, "grad_norm": 2.6145782470703125, "learning_rate": 8.189983011822737e-05, "loss": 0.05625054836273193, "step": 42980 }, { "epoch": 0.1845650549960073, "grad_norm": 2.071237564086914, "learning_rate": 8.189551839810975e-05, "loss": 0.20640103816986083, "step": 42990 }, { "epoch": 0.18460798708602733, "grad_norm": 0.013447938486933708, "learning_rate": 8.189120667799212e-05, "loss": 0.2571903944015503, "step": 43000 }, { "epoch": 0.18460798708602733, "eval_loss": 0.4624760150909424, "eval_runtime": 27.5883, "eval_samples_per_second": 3.625, "eval_steps_per_second": 3.625, "step": 43000 }, { "epoch": 0.18465091917604734, "grad_norm": 0.09999194741249084, "learning_rate": 8.18868949578745e-05, "loss": 0.0944497048854828, "step": 43010 }, { "epoch": 0.18469385126606733, "grad_norm": 2.868917942047119, "learning_rate": 8.188258323775688e-05, "loss": 0.26001758575439454, "step": 43020 }, { "epoch": 0.18473678335608734, "grad_norm": 3.3130545616149902, "learning_rate": 8.187827151763925e-05, "loss": 0.3481125354766846, "step": 43030 }, { "epoch": 0.18477971544610736, "grad_norm": 0.002941107377409935, "learning_rate": 8.187395979752163e-05, "loss": 0.19944119453430176, "step": 43040 }, { "epoch": 0.18482264753612734, "grad_norm": 14.443581581115723, "learning_rate": 8.186964807740401e-05, "loss": 0.2885287761688232, "step": 43050 }, { "epoch": 0.18486557962614736, "grad_norm": 3.258791446685791, "learning_rate": 8.186533635728639e-05, "loss": 0.17024463415145874, "step": 43060 }, { "epoch": 0.18490851171616737, "grad_norm": 0.04481988027691841, "learning_rate": 8.186102463716876e-05, "loss": 0.1365174889564514, "step": 43070 }, { "epoch": 0.18495144380618736, "grad_norm": 0.05688230320811272, "learning_rate": 8.185671291705114e-05, "loss": 0.427608060836792, "step": 43080 }, { "epoch": 0.18499437589620737, "grad_norm": 3.2461934089660645, "learning_rate": 8.18524011969335e-05, "loss": 0.19203683137893676, "step": 43090 }, { "epoch": 0.1850373079862274, "grad_norm": 0.08931227028369904, "learning_rate": 8.184808947681588e-05, "loss": 0.31490564346313477, "step": 43100 }, { "epoch": 0.1850802400762474, "grad_norm": 1.140378713607788, "learning_rate": 8.184377775669826e-05, "loss": 0.1009800910949707, "step": 43110 }, { "epoch": 0.1851231721662674, "grad_norm": 0.06171561777591705, "learning_rate": 8.183946603658064e-05, "loss": 0.07267990708351135, "step": 43120 }, { "epoch": 0.1851661042562874, "grad_norm": 0.047086216509342194, "learning_rate": 8.183515431646301e-05, "loss": 0.3527937650680542, "step": 43130 }, { "epoch": 0.18520903634630742, "grad_norm": 0.013647548854351044, "learning_rate": 8.183084259634539e-05, "loss": 0.26084198951721194, "step": 43140 }, { "epoch": 0.1852519684363274, "grad_norm": 0.07848212122917175, "learning_rate": 8.182653087622777e-05, "loss": 0.39730684757232665, "step": 43150 }, { "epoch": 0.18529490052634742, "grad_norm": 0.002793788444250822, "learning_rate": 8.182221915611015e-05, "loss": 0.23335707187652588, "step": 43160 }, { "epoch": 0.18533783261636744, "grad_norm": 0.018101299181580544, "learning_rate": 8.181790743599251e-05, "loss": 0.2064742088317871, "step": 43170 }, { "epoch": 0.18538076470638742, "grad_norm": 0.9697562456130981, "learning_rate": 8.181359571587489e-05, "loss": 0.2400217294692993, "step": 43180 }, { "epoch": 0.18542369679640744, "grad_norm": 0.07094231992959976, "learning_rate": 8.180928399575727e-05, "loss": 0.3039929151535034, "step": 43190 }, { "epoch": 0.18546662888642745, "grad_norm": 0.013909654691815376, "learning_rate": 8.180497227563964e-05, "loss": 0.27940926551818845, "step": 43200 }, { "epoch": 0.18550956097644747, "grad_norm": 0.03464217483997345, "learning_rate": 8.180066055552202e-05, "loss": 0.06249539852142334, "step": 43210 }, { "epoch": 0.18555249306646746, "grad_norm": 0.022050119936466217, "learning_rate": 8.17963488354044e-05, "loss": 0.2281078577041626, "step": 43220 }, { "epoch": 0.18559542515648747, "grad_norm": 1.9440895318984985, "learning_rate": 8.179203711528677e-05, "loss": 0.5420735836029053, "step": 43230 }, { "epoch": 0.18563835724650748, "grad_norm": 3.1730079650878906, "learning_rate": 8.178772539516915e-05, "loss": 0.1774951696395874, "step": 43240 }, { "epoch": 0.18568128933652747, "grad_norm": 0.05383119732141495, "learning_rate": 8.178341367505153e-05, "loss": 0.11135185956954956, "step": 43250 }, { "epoch": 0.1857242214265475, "grad_norm": 0.0012574224965646863, "learning_rate": 8.17791019549339e-05, "loss": 0.2155439853668213, "step": 43260 }, { "epoch": 0.1857671535165675, "grad_norm": 0.07369806617498398, "learning_rate": 8.177479023481628e-05, "loss": 0.313634467124939, "step": 43270 }, { "epoch": 0.1858100856065875, "grad_norm": 0.4887748062610626, "learning_rate": 8.177047851469866e-05, "loss": 0.3466495037078857, "step": 43280 }, { "epoch": 0.1858530176966075, "grad_norm": 0.11614210903644562, "learning_rate": 8.176616679458104e-05, "loss": 0.16023856401443481, "step": 43290 }, { "epoch": 0.18589594978662752, "grad_norm": 0.01644720695912838, "learning_rate": 8.176185507446342e-05, "loss": 0.14743173122406006, "step": 43300 }, { "epoch": 0.18593888187664753, "grad_norm": 2.1997272968292236, "learning_rate": 8.17575433543458e-05, "loss": 0.15039613246917724, "step": 43310 }, { "epoch": 0.18598181396666752, "grad_norm": 0.0693756565451622, "learning_rate": 8.175323163422817e-05, "loss": 0.3908696174621582, "step": 43320 }, { "epoch": 0.18602474605668753, "grad_norm": 0.02574329636991024, "learning_rate": 8.174891991411055e-05, "loss": 0.15041611194610596, "step": 43330 }, { "epoch": 0.18606767814670755, "grad_norm": 3.0219974517822266, "learning_rate": 8.174460819399291e-05, "loss": 0.37598633766174316, "step": 43340 }, { "epoch": 0.18611061023672754, "grad_norm": 6.371044635772705, "learning_rate": 8.174029647387529e-05, "loss": 0.24004921913146973, "step": 43350 }, { "epoch": 0.18615354232674755, "grad_norm": 0.007055271882563829, "learning_rate": 8.173598475375767e-05, "loss": 0.1907490611076355, "step": 43360 }, { "epoch": 0.18619647441676757, "grad_norm": 0.8306063413619995, "learning_rate": 8.173167303364004e-05, "loss": 0.2930524587631226, "step": 43370 }, { "epoch": 0.18623940650678755, "grad_norm": 0.6232219934463501, "learning_rate": 8.172736131352242e-05, "loss": 0.27646067142486574, "step": 43380 }, { "epoch": 0.18628233859680757, "grad_norm": 5.046137809753418, "learning_rate": 8.17230495934048e-05, "loss": 0.2907963752746582, "step": 43390 }, { "epoch": 0.18632527068682758, "grad_norm": 0.12482646852731705, "learning_rate": 8.171873787328718e-05, "loss": 0.2309260129928589, "step": 43400 }, { "epoch": 0.18636820277684757, "grad_norm": 1.5691694021224976, "learning_rate": 8.171442615316955e-05, "loss": 0.267235255241394, "step": 43410 }, { "epoch": 0.18641113486686758, "grad_norm": 0.054734162986278534, "learning_rate": 8.171011443305192e-05, "loss": 0.5326226711273193, "step": 43420 }, { "epoch": 0.1864540669568876, "grad_norm": 0.022658541798591614, "learning_rate": 8.17058027129343e-05, "loss": 0.3653179883956909, "step": 43430 }, { "epoch": 0.1864969990469076, "grad_norm": 1.6763198375701904, "learning_rate": 8.170149099281667e-05, "loss": 0.38337364196777346, "step": 43440 }, { "epoch": 0.1865399311369276, "grad_norm": 5.016429424285889, "learning_rate": 8.169717927269905e-05, "loss": 0.19746410846710205, "step": 43450 }, { "epoch": 0.18658286322694762, "grad_norm": 1.6247531175613403, "learning_rate": 8.169286755258143e-05, "loss": 0.10856088399887084, "step": 43460 }, { "epoch": 0.18662579531696763, "grad_norm": 0.14577797055244446, "learning_rate": 8.16885558324638e-05, "loss": 0.29924936294555665, "step": 43470 }, { "epoch": 0.18666872740698762, "grad_norm": 0.13083939254283905, "learning_rate": 8.168424411234618e-05, "loss": 0.2384188652038574, "step": 43480 }, { "epoch": 0.18671165949700763, "grad_norm": 7.798047065734863, "learning_rate": 8.167993239222856e-05, "loss": 0.30132806301116943, "step": 43490 }, { "epoch": 0.18675459158702765, "grad_norm": 0.5570793151855469, "learning_rate": 8.167562067211094e-05, "loss": 0.27629728317260743, "step": 43500 }, { "epoch": 0.18679752367704763, "grad_norm": 0.05040042847394943, "learning_rate": 8.167130895199331e-05, "loss": 0.060433989763259886, "step": 43510 }, { "epoch": 0.18684045576706765, "grad_norm": 0.03875486180186272, "learning_rate": 8.166699723187569e-05, "loss": 0.22466940879821778, "step": 43520 }, { "epoch": 0.18688338785708766, "grad_norm": 2.4116384983062744, "learning_rate": 8.166268551175807e-05, "loss": 0.35434484481811523, "step": 43530 }, { "epoch": 0.18692631994710768, "grad_norm": 4.129846096038818, "learning_rate": 8.165837379164045e-05, "loss": 0.46059222221374513, "step": 43540 }, { "epoch": 0.18696925203712766, "grad_norm": 0.02307182177901268, "learning_rate": 8.165406207152282e-05, "loss": 0.17703438997268678, "step": 43550 }, { "epoch": 0.18701218412714768, "grad_norm": 3.282421827316284, "learning_rate": 8.16497503514052e-05, "loss": 0.20219864845275878, "step": 43560 }, { "epoch": 0.1870551162171677, "grad_norm": 2.4306914806365967, "learning_rate": 8.164543863128758e-05, "loss": 0.3165069341659546, "step": 43570 }, { "epoch": 0.18709804830718768, "grad_norm": 3.8549060821533203, "learning_rate": 8.164112691116994e-05, "loss": 0.20757479667663575, "step": 43580 }, { "epoch": 0.1871409803972077, "grad_norm": 0.05286364257335663, "learning_rate": 8.163681519105232e-05, "loss": 0.20257492065429689, "step": 43590 }, { "epoch": 0.1871839124872277, "grad_norm": 1.4247897863388062, "learning_rate": 8.16325034709347e-05, "loss": 0.1439652681350708, "step": 43600 }, { "epoch": 0.1872268445772477, "grad_norm": 1.548986792564392, "learning_rate": 8.162819175081707e-05, "loss": 0.18846890926361085, "step": 43610 }, { "epoch": 0.1872697766672677, "grad_norm": 0.008255302906036377, "learning_rate": 8.162388003069945e-05, "loss": 0.20841715335845948, "step": 43620 }, { "epoch": 0.18731270875728773, "grad_norm": 0.42507603764533997, "learning_rate": 8.161956831058183e-05, "loss": 0.2078617811203003, "step": 43630 }, { "epoch": 0.18735564084730774, "grad_norm": 0.04114179313182831, "learning_rate": 8.16152565904642e-05, "loss": 0.04051432609558105, "step": 43640 }, { "epoch": 0.18739857293732773, "grad_norm": 0.010206053033471107, "learning_rate": 8.161094487034658e-05, "loss": 0.35066268444061277, "step": 43650 }, { "epoch": 0.18744150502734774, "grad_norm": 0.012079720385372639, "learning_rate": 8.160663315022896e-05, "loss": 0.17734284400939943, "step": 43660 }, { "epoch": 0.18748443711736776, "grad_norm": 0.00811173114925623, "learning_rate": 8.160232143011132e-05, "loss": 0.1144661545753479, "step": 43670 }, { "epoch": 0.18752736920738775, "grad_norm": 0.3730086088180542, "learning_rate": 8.15980097099937e-05, "loss": 0.16818068027496338, "step": 43680 }, { "epoch": 0.18757030129740776, "grad_norm": 0.024107687175273895, "learning_rate": 8.159369798987608e-05, "loss": 0.28643581867218015, "step": 43690 }, { "epoch": 0.18761323338742777, "grad_norm": 0.023587489500641823, "learning_rate": 8.158938626975846e-05, "loss": 0.27016143798828124, "step": 43700 }, { "epoch": 0.18765616547744776, "grad_norm": 1.4881666898727417, "learning_rate": 8.158507454964083e-05, "loss": 0.2365088939666748, "step": 43710 }, { "epoch": 0.18769909756746778, "grad_norm": 0.2194991409778595, "learning_rate": 8.158076282952321e-05, "loss": 0.31122922897338867, "step": 43720 }, { "epoch": 0.1877420296574878, "grad_norm": 3.471586227416992, "learning_rate": 8.157645110940559e-05, "loss": 0.30818378925323486, "step": 43730 }, { "epoch": 0.1877849617475078, "grad_norm": 3.3636248111724854, "learning_rate": 8.157213938928796e-05, "loss": 0.13375380039215087, "step": 43740 }, { "epoch": 0.1878278938375278, "grad_norm": 3.003509521484375, "learning_rate": 8.156782766917034e-05, "loss": 0.17285083532333373, "step": 43750 }, { "epoch": 0.1878708259275478, "grad_norm": 0.09808559715747833, "learning_rate": 8.156351594905272e-05, "loss": 0.18395047187805175, "step": 43760 }, { "epoch": 0.18791375801756782, "grad_norm": 0.08174111694097519, "learning_rate": 8.15592042289351e-05, "loss": 0.31794826984405516, "step": 43770 }, { "epoch": 0.1879566901075878, "grad_norm": 3.1812849044799805, "learning_rate": 8.155489250881747e-05, "loss": 0.259110426902771, "step": 43780 }, { "epoch": 0.18799962219760782, "grad_norm": 2.050807476043701, "learning_rate": 8.155058078869985e-05, "loss": 0.2605229139328003, "step": 43790 }, { "epoch": 0.18804255428762784, "grad_norm": 1.9166364669799805, "learning_rate": 8.154626906858223e-05, "loss": 0.3418402910232544, "step": 43800 }, { "epoch": 0.18808548637764783, "grad_norm": 0.7631783485412598, "learning_rate": 8.15419573484646e-05, "loss": 0.2409060001373291, "step": 43810 }, { "epoch": 0.18812841846766784, "grad_norm": 2.0551998615264893, "learning_rate": 8.153764562834698e-05, "loss": 0.31589469909667967, "step": 43820 }, { "epoch": 0.18817135055768786, "grad_norm": 0.06294999271631241, "learning_rate": 8.153333390822935e-05, "loss": 0.4397084712982178, "step": 43830 }, { "epoch": 0.18821428264770784, "grad_norm": 3.0229833126068115, "learning_rate": 8.152902218811172e-05, "loss": 0.24915146827697754, "step": 43840 }, { "epoch": 0.18825721473772786, "grad_norm": 0.04572201520204544, "learning_rate": 8.15247104679941e-05, "loss": 0.2244415283203125, "step": 43850 }, { "epoch": 0.18830014682774787, "grad_norm": 2.7412846088409424, "learning_rate": 8.152039874787648e-05, "loss": 0.2302554130554199, "step": 43860 }, { "epoch": 0.1883430789177679, "grad_norm": 1.3648289442062378, "learning_rate": 8.151608702775886e-05, "loss": 0.2644734144210815, "step": 43870 }, { "epoch": 0.18838601100778787, "grad_norm": 0.25226426124572754, "learning_rate": 8.151177530764123e-05, "loss": 0.11064702272415161, "step": 43880 }, { "epoch": 0.1884289430978079, "grad_norm": 0.028316723182797432, "learning_rate": 8.150746358752361e-05, "loss": 0.3318798542022705, "step": 43890 }, { "epoch": 0.1884718751878279, "grad_norm": 4.9640631675720215, "learning_rate": 8.150315186740599e-05, "loss": 0.2857161521911621, "step": 43900 }, { "epoch": 0.1885148072778479, "grad_norm": 0.480720579624176, "learning_rate": 8.149884014728835e-05, "loss": 0.1920159101486206, "step": 43910 }, { "epoch": 0.1885577393678679, "grad_norm": 0.23835653066635132, "learning_rate": 8.149452842717073e-05, "loss": 0.3155388355255127, "step": 43920 }, { "epoch": 0.18860067145788792, "grad_norm": 0.02639954164624214, "learning_rate": 8.149021670705311e-05, "loss": 0.30919642448425294, "step": 43930 }, { "epoch": 0.1886436035479079, "grad_norm": 0.004375265445560217, "learning_rate": 8.148590498693548e-05, "loss": 0.245635724067688, "step": 43940 }, { "epoch": 0.18868653563792792, "grad_norm": 0.0014972257195040584, "learning_rate": 8.148159326681786e-05, "loss": 0.3194050073623657, "step": 43950 }, { "epoch": 0.18872946772794794, "grad_norm": 0.3180157244205475, "learning_rate": 8.147728154670024e-05, "loss": 0.05688638091087341, "step": 43960 }, { "epoch": 0.18877239981796795, "grad_norm": 1.7093822956085205, "learning_rate": 8.147296982658263e-05, "loss": 0.3212114334106445, "step": 43970 }, { "epoch": 0.18881533190798794, "grad_norm": 1.0756118297576904, "learning_rate": 8.146865810646501e-05, "loss": 0.27136006355285647, "step": 43980 }, { "epoch": 0.18885826399800795, "grad_norm": 0.05142730847001076, "learning_rate": 8.146434638634738e-05, "loss": 0.2458888292312622, "step": 43990 }, { "epoch": 0.18890119608802797, "grad_norm": 2.8642890453338623, "learning_rate": 8.146003466622975e-05, "loss": 0.2910621643066406, "step": 44000 }, { "epoch": 0.18890119608802797, "eval_loss": 0.4605604410171509, "eval_runtime": 27.4334, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 44000 }, { "epoch": 0.18894412817804795, "grad_norm": 1.807096004486084, "learning_rate": 8.145572294611213e-05, "loss": 0.3342024564743042, "step": 44010 }, { "epoch": 0.18898706026806797, "grad_norm": 0.6657182574272156, "learning_rate": 8.14514112259945e-05, "loss": 0.05959969758987427, "step": 44020 }, { "epoch": 0.18902999235808798, "grad_norm": 1.3843122720718384, "learning_rate": 8.144709950587688e-05, "loss": 0.10991348028182983, "step": 44030 }, { "epoch": 0.18907292444810797, "grad_norm": 6.659622669219971, "learning_rate": 8.144278778575926e-05, "loss": 0.5495349407196045, "step": 44040 }, { "epoch": 0.18911585653812799, "grad_norm": 3.2412028312683105, "learning_rate": 8.143847606564164e-05, "loss": 0.29310102462768556, "step": 44050 }, { "epoch": 0.189158788628148, "grad_norm": 2.484363317489624, "learning_rate": 8.143416434552401e-05, "loss": 0.4231656551361084, "step": 44060 }, { "epoch": 0.18920172071816801, "grad_norm": 0.0007273709634318948, "learning_rate": 8.142985262540639e-05, "loss": 0.0684486985206604, "step": 44070 }, { "epoch": 0.189244652808188, "grad_norm": 5.1812920570373535, "learning_rate": 8.142554090528875e-05, "loss": 0.2998863935470581, "step": 44080 }, { "epoch": 0.18928758489820802, "grad_norm": 0.029622757807374, "learning_rate": 8.142122918517113e-05, "loss": 0.18725346326828002, "step": 44090 }, { "epoch": 0.18933051698822803, "grad_norm": 0.0006494583212770522, "learning_rate": 8.141691746505351e-05, "loss": 0.22647314071655272, "step": 44100 }, { "epoch": 0.18937344907824802, "grad_norm": 1.4165912866592407, "learning_rate": 8.141260574493589e-05, "loss": 0.28343505859375, "step": 44110 }, { "epoch": 0.18941638116826803, "grad_norm": 0.4761173725128174, "learning_rate": 8.140829402481826e-05, "loss": 0.11881612539291382, "step": 44120 }, { "epoch": 0.18945931325828805, "grad_norm": 0.06735536456108093, "learning_rate": 8.140398230470064e-05, "loss": 0.3430104494094849, "step": 44130 }, { "epoch": 0.18950224534830803, "grad_norm": 0.007762636058032513, "learning_rate": 8.139967058458302e-05, "loss": 0.5389622211456299, "step": 44140 }, { "epoch": 0.18954517743832805, "grad_norm": 1.3800119161605835, "learning_rate": 8.13953588644654e-05, "loss": 0.24931068420410157, "step": 44150 }, { "epoch": 0.18958810952834806, "grad_norm": 0.880368173122406, "learning_rate": 8.139104714434776e-05, "loss": 0.12496919631958008, "step": 44160 }, { "epoch": 0.18963104161836808, "grad_norm": 0.0014428679132834077, "learning_rate": 8.138673542423014e-05, "loss": 0.07668641209602356, "step": 44170 }, { "epoch": 0.18967397370838807, "grad_norm": 2.079153060913086, "learning_rate": 8.138242370411251e-05, "loss": 0.2590550422668457, "step": 44180 }, { "epoch": 0.18971690579840808, "grad_norm": 0.15682633221149445, "learning_rate": 8.13781119839949e-05, "loss": 0.05421972870826721, "step": 44190 }, { "epoch": 0.1897598378884281, "grad_norm": 0.07640580087900162, "learning_rate": 8.137380026387728e-05, "loss": 0.19816200733184813, "step": 44200 }, { "epoch": 0.18980276997844808, "grad_norm": 0.005920249968767166, "learning_rate": 8.136948854375966e-05, "loss": 0.34413843154907225, "step": 44210 }, { "epoch": 0.1898457020684681, "grad_norm": 2.7683632373809814, "learning_rate": 8.136517682364204e-05, "loss": 0.4210531234741211, "step": 44220 }, { "epoch": 0.1898886341584881, "grad_norm": 1.1772396564483643, "learning_rate": 8.136086510352441e-05, "loss": 0.2988885879516602, "step": 44230 }, { "epoch": 0.1899315662485081, "grad_norm": 0.8943934440612793, "learning_rate": 8.135655338340678e-05, "loss": 0.2540787696838379, "step": 44240 }, { "epoch": 0.1899744983385281, "grad_norm": 0.004835850093513727, "learning_rate": 8.135224166328916e-05, "loss": 0.08346214890480042, "step": 44250 }, { "epoch": 0.19001743042854813, "grad_norm": 4.239825248718262, "learning_rate": 8.134792994317153e-05, "loss": 0.22071146965026855, "step": 44260 }, { "epoch": 0.19006036251856812, "grad_norm": 0.07698159664869308, "learning_rate": 8.134361822305391e-05, "loss": 0.1325700879096985, "step": 44270 }, { "epoch": 0.19010329460858813, "grad_norm": 2.4468345642089844, "learning_rate": 8.133930650293629e-05, "loss": 0.3593152046203613, "step": 44280 }, { "epoch": 0.19014622669860815, "grad_norm": 41.485435485839844, "learning_rate": 8.133499478281866e-05, "loss": 0.08537226915359497, "step": 44290 }, { "epoch": 0.19018915878862816, "grad_norm": 2.0043179988861084, "learning_rate": 8.133068306270104e-05, "loss": 0.135001802444458, "step": 44300 }, { "epoch": 0.19023209087864815, "grad_norm": 0.0291176475584507, "learning_rate": 8.132637134258342e-05, "loss": 0.2878988742828369, "step": 44310 }, { "epoch": 0.19027502296866816, "grad_norm": 0.2772849202156067, "learning_rate": 8.132205962246578e-05, "loss": 0.20665276050567627, "step": 44320 }, { "epoch": 0.19031795505868818, "grad_norm": 0.02503611147403717, "learning_rate": 8.131774790234816e-05, "loss": 0.29029097557067873, "step": 44330 }, { "epoch": 0.19036088714870816, "grad_norm": 1.3659212589263916, "learning_rate": 8.131343618223054e-05, "loss": 0.21326007843017578, "step": 44340 }, { "epoch": 0.19040381923872818, "grad_norm": 2.154261589050293, "learning_rate": 8.130912446211292e-05, "loss": 0.29660470485687257, "step": 44350 }, { "epoch": 0.1904467513287482, "grad_norm": 0.5898604989051819, "learning_rate": 8.130481274199529e-05, "loss": 0.2806436777114868, "step": 44360 }, { "epoch": 0.19048968341876818, "grad_norm": 0.006019935477524996, "learning_rate": 8.130050102187767e-05, "loss": 0.21853513717651368, "step": 44370 }, { "epoch": 0.1905326155087882, "grad_norm": 0.09301898628473282, "learning_rate": 8.129618930176005e-05, "loss": 0.44185843467712405, "step": 44380 }, { "epoch": 0.1905755475988082, "grad_norm": 0.16995981335639954, "learning_rate": 8.129187758164242e-05, "loss": 0.22304224967956543, "step": 44390 }, { "epoch": 0.19061847968882822, "grad_norm": 0.0012088268995285034, "learning_rate": 8.12875658615248e-05, "loss": 0.23868961334228517, "step": 44400 }, { "epoch": 0.1906614117788482, "grad_norm": 0.14039476215839386, "learning_rate": 8.128325414140718e-05, "loss": 0.2872094869613647, "step": 44410 }, { "epoch": 0.19070434386886823, "grad_norm": 0.004024143796414137, "learning_rate": 8.127894242128956e-05, "loss": 0.19596340656280517, "step": 44420 }, { "epoch": 0.19074727595888824, "grad_norm": 0.02372003346681595, "learning_rate": 8.127463070117193e-05, "loss": 0.295436954498291, "step": 44430 }, { "epoch": 0.19079020804890823, "grad_norm": 0.023033304139971733, "learning_rate": 8.127031898105431e-05, "loss": 0.34102492332458495, "step": 44440 }, { "epoch": 0.19083314013892824, "grad_norm": 3.642709493637085, "learning_rate": 8.126600726093669e-05, "loss": 0.3784060478210449, "step": 44450 }, { "epoch": 0.19087607222894826, "grad_norm": 0.009738151915371418, "learning_rate": 8.126169554081907e-05, "loss": 0.33837051391601564, "step": 44460 }, { "epoch": 0.19091900431896824, "grad_norm": 0.08202206343412399, "learning_rate": 8.125738382070144e-05, "loss": 0.23539764881134034, "step": 44470 }, { "epoch": 0.19096193640898826, "grad_norm": 0.3848929703235626, "learning_rate": 8.125307210058382e-05, "loss": 0.2307135581970215, "step": 44480 }, { "epoch": 0.19100486849900827, "grad_norm": 0.019833676517009735, "learning_rate": 8.124876038046618e-05, "loss": 0.22377758026123046, "step": 44490 }, { "epoch": 0.1910478005890283, "grad_norm": 0.027888990938663483, "learning_rate": 8.124444866034856e-05, "loss": 0.2692965030670166, "step": 44500 }, { "epoch": 0.19109073267904828, "grad_norm": 0.05748312547802925, "learning_rate": 8.124013694023094e-05, "loss": 0.31379878520965576, "step": 44510 }, { "epoch": 0.1911336647690683, "grad_norm": 0.06689203530550003, "learning_rate": 8.123582522011332e-05, "loss": 0.32233970165252684, "step": 44520 }, { "epoch": 0.1911765968590883, "grad_norm": 0.008352093398571014, "learning_rate": 8.12315134999957e-05, "loss": 0.1896106481552124, "step": 44530 }, { "epoch": 0.1912195289491083, "grad_norm": 0.14758487045764923, "learning_rate": 8.122720177987807e-05, "loss": 0.1729714035987854, "step": 44540 }, { "epoch": 0.1912624610391283, "grad_norm": 0.03349534422159195, "learning_rate": 8.122289005976045e-05, "loss": 0.16641179323196412, "step": 44550 }, { "epoch": 0.19130539312914832, "grad_norm": 0.08998719602823257, "learning_rate": 8.121857833964283e-05, "loss": 0.23800258636474608, "step": 44560 }, { "epoch": 0.1913483252191683, "grad_norm": 2.7044553756713867, "learning_rate": 8.121426661952519e-05, "loss": 0.1590886116027832, "step": 44570 }, { "epoch": 0.19139125730918832, "grad_norm": 1.6016440391540527, "learning_rate": 8.120995489940757e-05, "loss": 0.36989946365356446, "step": 44580 }, { "epoch": 0.19143418939920834, "grad_norm": 3.2121012210845947, "learning_rate": 8.120564317928994e-05, "loss": 0.31107995510101316, "step": 44590 }, { "epoch": 0.19147712148922835, "grad_norm": 0.006490649655461311, "learning_rate": 8.120133145917232e-05, "loss": 0.21261978149414062, "step": 44600 }, { "epoch": 0.19152005357924834, "grad_norm": 0.019323797896504402, "learning_rate": 8.11970197390547e-05, "loss": 0.12796977758407593, "step": 44610 }, { "epoch": 0.19156298566926835, "grad_norm": 2.3980066776275635, "learning_rate": 8.119270801893708e-05, "loss": 0.29838697910308837, "step": 44620 }, { "epoch": 0.19160591775928837, "grad_norm": 10.988465309143066, "learning_rate": 8.118839629881945e-05, "loss": 0.4121725082397461, "step": 44630 }, { "epoch": 0.19164884984930836, "grad_norm": 0.031216247007250786, "learning_rate": 8.118408457870183e-05, "loss": 0.1725583553314209, "step": 44640 }, { "epoch": 0.19169178193932837, "grad_norm": 0.5122570395469666, "learning_rate": 8.117977285858421e-05, "loss": 0.26382031440734866, "step": 44650 }, { "epoch": 0.19173471402934839, "grad_norm": 0.006867983378469944, "learning_rate": 8.117546113846659e-05, "loss": 0.03699140548706055, "step": 44660 }, { "epoch": 0.19177764611936837, "grad_norm": 1.920220971107483, "learning_rate": 8.117114941834896e-05, "loss": 0.29547007083892823, "step": 44670 }, { "epoch": 0.1918205782093884, "grad_norm": 0.6627082228660583, "learning_rate": 8.116683769823134e-05, "loss": 0.21683313846588134, "step": 44680 }, { "epoch": 0.1918635102994084, "grad_norm": 1.7581837177276611, "learning_rate": 8.116252597811372e-05, "loss": 0.11833486557006836, "step": 44690 }, { "epoch": 0.1919064423894284, "grad_norm": 1.6450203657150269, "learning_rate": 8.11582142579961e-05, "loss": 0.35963356494903564, "step": 44700 }, { "epoch": 0.1919493744794484, "grad_norm": 2.345304489135742, "learning_rate": 8.115390253787847e-05, "loss": 0.28931460380554197, "step": 44710 }, { "epoch": 0.19199230656946842, "grad_norm": 0.010913246311247349, "learning_rate": 8.114959081776085e-05, "loss": 0.22629737854003906, "step": 44720 }, { "epoch": 0.19203523865948843, "grad_norm": 0.5741439461708069, "learning_rate": 8.114527909764323e-05, "loss": 0.25016350746154786, "step": 44730 }, { "epoch": 0.19207817074950842, "grad_norm": 0.7402640581130981, "learning_rate": 8.114096737752559e-05, "loss": 0.32834055423736574, "step": 44740 }, { "epoch": 0.19212110283952843, "grad_norm": 0.04131797328591347, "learning_rate": 8.113665565740797e-05, "loss": 0.3026629686355591, "step": 44750 }, { "epoch": 0.19216403492954845, "grad_norm": 1.0897802114486694, "learning_rate": 8.113234393729035e-05, "loss": 0.18321956396102906, "step": 44760 }, { "epoch": 0.19220696701956844, "grad_norm": 0.0162035059183836, "learning_rate": 8.112803221717272e-05, "loss": 0.22167975902557374, "step": 44770 }, { "epoch": 0.19224989910958845, "grad_norm": 0.018093420192599297, "learning_rate": 8.11237204970551e-05, "loss": 0.04558416903018951, "step": 44780 }, { "epoch": 0.19229283119960847, "grad_norm": 0.211838498711586, "learning_rate": 8.111940877693748e-05, "loss": 0.33308026790618894, "step": 44790 }, { "epoch": 0.19233576328962845, "grad_norm": 0.0031913614366203547, "learning_rate": 8.111509705681985e-05, "loss": 0.2766871929168701, "step": 44800 }, { "epoch": 0.19237869537964847, "grad_norm": 0.028377799317240715, "learning_rate": 8.111078533670223e-05, "loss": 0.11854208707809448, "step": 44810 }, { "epoch": 0.19242162746966848, "grad_norm": 0.04484991356730461, "learning_rate": 8.11064736165846e-05, "loss": 0.09982895851135254, "step": 44820 }, { "epoch": 0.1924645595596885, "grad_norm": 0.08299509435892105, "learning_rate": 8.110216189646697e-05, "loss": 0.40526819229125977, "step": 44830 }, { "epoch": 0.19250749164970848, "grad_norm": 0.25029635429382324, "learning_rate": 8.109785017634935e-05, "loss": 0.008327079564332962, "step": 44840 }, { "epoch": 0.1925504237397285, "grad_norm": 0.3442751169204712, "learning_rate": 8.109353845623173e-05, "loss": 0.13076180219650269, "step": 44850 }, { "epoch": 0.1925933558297485, "grad_norm": 0.012612867169082165, "learning_rate": 8.10892267361141e-05, "loss": 0.0522712230682373, "step": 44860 }, { "epoch": 0.1926362879197685, "grad_norm": 0.09985598921775818, "learning_rate": 8.108491501599648e-05, "loss": 0.4725681781768799, "step": 44870 }, { "epoch": 0.19267922000978852, "grad_norm": 0.15437287092208862, "learning_rate": 8.108060329587886e-05, "loss": 0.2096705436706543, "step": 44880 }, { "epoch": 0.19272215209980853, "grad_norm": 3.897529363632202, "learning_rate": 8.107629157576124e-05, "loss": 0.24176254272460937, "step": 44890 }, { "epoch": 0.19276508418982852, "grad_norm": 1.8788121938705444, "learning_rate": 8.107197985564361e-05, "loss": 0.13124208450317382, "step": 44900 }, { "epoch": 0.19280801627984853, "grad_norm": 3.367654800415039, "learning_rate": 8.106766813552599e-05, "loss": 0.15661016702651978, "step": 44910 }, { "epoch": 0.19285094836986855, "grad_norm": 1.950370192527771, "learning_rate": 8.106335641540837e-05, "loss": 0.23335545063018798, "step": 44920 }, { "epoch": 0.19289388045988856, "grad_norm": 0.7737196087837219, "learning_rate": 8.105904469529075e-05, "loss": 0.1777315616607666, "step": 44930 }, { "epoch": 0.19293681254990855, "grad_norm": 6.4171462059021, "learning_rate": 8.105473297517312e-05, "loss": 0.4596564769744873, "step": 44940 }, { "epoch": 0.19297974463992856, "grad_norm": 0.08766023814678192, "learning_rate": 8.10504212550555e-05, "loss": 0.1520202040672302, "step": 44950 }, { "epoch": 0.19302267672994858, "grad_norm": 0.14796678721904755, "learning_rate": 8.104610953493788e-05, "loss": 0.41483144760131835, "step": 44960 }, { "epoch": 0.19306560881996856, "grad_norm": 1.5162951946258545, "learning_rate": 8.104179781482026e-05, "loss": 0.22687315940856934, "step": 44970 }, { "epoch": 0.19310854090998858, "grad_norm": 0.051669228821992874, "learning_rate": 8.103748609470262e-05, "loss": 0.42571425437927246, "step": 44980 }, { "epoch": 0.1931514730000086, "grad_norm": 1.4661331176757812, "learning_rate": 8.1033174374585e-05, "loss": 0.1966134190559387, "step": 44990 }, { "epoch": 0.19319440509002858, "grad_norm": 3.3682007789611816, "learning_rate": 8.102886265446737e-05, "loss": 0.19375011920928956, "step": 45000 }, { "epoch": 0.19319440509002858, "eval_loss": 0.4712151288986206, "eval_runtime": 27.4644, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 45000 }, { "epoch": 0.1932373371800486, "grad_norm": 1.5242284536361694, "learning_rate": 8.102455093434975e-05, "loss": 0.28296632766723634, "step": 45010 }, { "epoch": 0.1932802692700686, "grad_norm": 0.13998153805732727, "learning_rate": 8.102023921423213e-05, "loss": 0.2556852102279663, "step": 45020 }, { "epoch": 0.19332320136008863, "grad_norm": 1.6157907247543335, "learning_rate": 8.10159274941145e-05, "loss": 0.13697457313537598, "step": 45030 }, { "epoch": 0.1933661334501086, "grad_norm": 0.015710826963186264, "learning_rate": 8.101161577399688e-05, "loss": 0.18795580863952638, "step": 45040 }, { "epoch": 0.19340906554012863, "grad_norm": 0.883563756942749, "learning_rate": 8.100730405387926e-05, "loss": 0.17424606084823607, "step": 45050 }, { "epoch": 0.19345199763014864, "grad_norm": 0.014735180884599686, "learning_rate": 8.100299233376163e-05, "loss": 0.192516553401947, "step": 45060 }, { "epoch": 0.19349492972016863, "grad_norm": 0.012432921677827835, "learning_rate": 8.0998680613644e-05, "loss": 0.29526875019073484, "step": 45070 }, { "epoch": 0.19353786181018864, "grad_norm": 0.280377596616745, "learning_rate": 8.099436889352638e-05, "loss": 0.3330434799194336, "step": 45080 }, { "epoch": 0.19358079390020866, "grad_norm": 0.03499651327729225, "learning_rate": 8.099005717340876e-05, "loss": 0.12696655988693237, "step": 45090 }, { "epoch": 0.19362372599022865, "grad_norm": 2.5255191326141357, "learning_rate": 8.098574545329113e-05, "loss": 0.12479696273803711, "step": 45100 }, { "epoch": 0.19366665808024866, "grad_norm": 0.135679230093956, "learning_rate": 8.098143373317351e-05, "loss": 0.19221022129058837, "step": 45110 }, { "epoch": 0.19370959017026868, "grad_norm": 0.0067241620272397995, "learning_rate": 8.097712201305589e-05, "loss": 0.1390742063522339, "step": 45120 }, { "epoch": 0.19375252226028866, "grad_norm": 1.7752548456192017, "learning_rate": 8.097281029293827e-05, "loss": 0.2600010633468628, "step": 45130 }, { "epoch": 0.19379545435030868, "grad_norm": 0.21976950764656067, "learning_rate": 8.096849857282064e-05, "loss": 0.19792779684066772, "step": 45140 }, { "epoch": 0.1938383864403287, "grad_norm": 0.033322304487228394, "learning_rate": 8.096418685270302e-05, "loss": 0.12441951036453247, "step": 45150 }, { "epoch": 0.1938813185303487, "grad_norm": 1.1145471334457397, "learning_rate": 8.09598751325854e-05, "loss": 0.23317229747772217, "step": 45160 }, { "epoch": 0.1939242506203687, "grad_norm": 1.6260700225830078, "learning_rate": 8.095556341246778e-05, "loss": 0.39760532379150393, "step": 45170 }, { "epoch": 0.1939671827103887, "grad_norm": 1.8564512729644775, "learning_rate": 8.095125169235015e-05, "loss": 0.36122798919677734, "step": 45180 }, { "epoch": 0.19401011480040872, "grad_norm": 3.138972043991089, "learning_rate": 8.094693997223253e-05, "loss": 0.2895623207092285, "step": 45190 }, { "epoch": 0.1940530468904287, "grad_norm": 0.09472132474184036, "learning_rate": 8.094262825211491e-05, "loss": 0.33398478031158446, "step": 45200 }, { "epoch": 0.19409597898044872, "grad_norm": 0.3423907160758972, "learning_rate": 8.093831653199729e-05, "loss": 0.182434344291687, "step": 45210 }, { "epoch": 0.19413891107046874, "grad_norm": 2.8738393783569336, "learning_rate": 8.093400481187966e-05, "loss": 0.2890277624130249, "step": 45220 }, { "epoch": 0.19418184316048873, "grad_norm": 29.463497161865234, "learning_rate": 8.092969309176203e-05, "loss": 0.3977688789367676, "step": 45230 }, { "epoch": 0.19422477525050874, "grad_norm": 0.23220296204090118, "learning_rate": 8.09253813716444e-05, "loss": 0.360453462600708, "step": 45240 }, { "epoch": 0.19426770734052876, "grad_norm": 0.10836378484964371, "learning_rate": 8.092106965152678e-05, "loss": 0.23855133056640626, "step": 45250 }, { "epoch": 0.19431063943054877, "grad_norm": 0.307105153799057, "learning_rate": 8.091675793140916e-05, "loss": 0.42743630409240724, "step": 45260 }, { "epoch": 0.19435357152056876, "grad_norm": 4.20743989944458, "learning_rate": 8.091244621129154e-05, "loss": 0.24763765335083007, "step": 45270 }, { "epoch": 0.19439650361058877, "grad_norm": 0.007932635955512524, "learning_rate": 8.090813449117391e-05, "loss": 0.19730513095855712, "step": 45280 }, { "epoch": 0.1944394357006088, "grad_norm": 9.092166900634766, "learning_rate": 8.090382277105629e-05, "loss": 0.3102408409118652, "step": 45290 }, { "epoch": 0.19448236779062877, "grad_norm": 2.336500406265259, "learning_rate": 8.089951105093867e-05, "loss": 0.15682946443557738, "step": 45300 }, { "epoch": 0.1945252998806488, "grad_norm": 0.035069532692432404, "learning_rate": 8.089519933082103e-05, "loss": 0.13510195016860962, "step": 45310 }, { "epoch": 0.1945682319706688, "grad_norm": 0.09467840194702148, "learning_rate": 8.089088761070341e-05, "loss": 0.23605630397796631, "step": 45320 }, { "epoch": 0.1946111640606888, "grad_norm": 0.30349770188331604, "learning_rate": 8.088657589058579e-05, "loss": 0.3186908006668091, "step": 45330 }, { "epoch": 0.1946540961507088, "grad_norm": 0.1543532758951187, "learning_rate": 8.088226417046816e-05, "loss": 0.10231984853744507, "step": 45340 }, { "epoch": 0.19469702824072882, "grad_norm": 0.08216534554958344, "learning_rate": 8.087795245035054e-05, "loss": 0.16797515153884887, "step": 45350 }, { "epoch": 0.19473996033074883, "grad_norm": 0.15961258113384247, "learning_rate": 8.087364073023292e-05, "loss": 0.21318306922912597, "step": 45360 }, { "epoch": 0.19478289242076882, "grad_norm": 0.15939001739025116, "learning_rate": 8.08693290101153e-05, "loss": 0.2917843580245972, "step": 45370 }, { "epoch": 0.19482582451078884, "grad_norm": 1.0433694124221802, "learning_rate": 8.086501728999769e-05, "loss": 0.5787501811981202, "step": 45380 }, { "epoch": 0.19486875660080885, "grad_norm": 3.5344173908233643, "learning_rate": 8.086070556988005e-05, "loss": 0.1859116554260254, "step": 45390 }, { "epoch": 0.19491168869082884, "grad_norm": 1.2646311521530151, "learning_rate": 8.085639384976243e-05, "loss": 0.3138707399368286, "step": 45400 }, { "epoch": 0.19495462078084885, "grad_norm": 1.0844224691390991, "learning_rate": 8.08520821296448e-05, "loss": 0.09525437951087952, "step": 45410 }, { "epoch": 0.19499755287086887, "grad_norm": 2.304156541824341, "learning_rate": 8.084777040952718e-05, "loss": 0.2916342973709106, "step": 45420 }, { "epoch": 0.19504048496088885, "grad_norm": 3.35841703414917, "learning_rate": 8.084345868940956e-05, "loss": 0.34789600372314455, "step": 45430 }, { "epoch": 0.19508341705090887, "grad_norm": 0.004667060449719429, "learning_rate": 8.083914696929194e-05, "loss": 0.32075483798980714, "step": 45440 }, { "epoch": 0.19512634914092888, "grad_norm": 25.836605072021484, "learning_rate": 8.083483524917431e-05, "loss": 0.288403058052063, "step": 45450 }, { "epoch": 0.1951692812309489, "grad_norm": 1.2741717100143433, "learning_rate": 8.083052352905669e-05, "loss": 0.42087607383728026, "step": 45460 }, { "epoch": 0.19521221332096889, "grad_norm": 3.004124879837036, "learning_rate": 8.082621180893907e-05, "loss": 0.12746796607971192, "step": 45470 }, { "epoch": 0.1952551454109889, "grad_norm": 2.3572230339050293, "learning_rate": 8.082190008882143e-05, "loss": 0.3464216947555542, "step": 45480 }, { "epoch": 0.19529807750100892, "grad_norm": 2.225754976272583, "learning_rate": 8.081758836870381e-05, "loss": 0.23277955055236815, "step": 45490 }, { "epoch": 0.1953410095910289, "grad_norm": 0.18235382437705994, "learning_rate": 8.081327664858619e-05, "loss": 0.3772527456283569, "step": 45500 }, { "epoch": 0.19538394168104892, "grad_norm": 0.11060876399278641, "learning_rate": 8.080896492846856e-05, "loss": 0.15486321449279786, "step": 45510 }, { "epoch": 0.19542687377106893, "grad_norm": 4.628691673278809, "learning_rate": 8.080465320835094e-05, "loss": 0.436336088180542, "step": 45520 }, { "epoch": 0.19546980586108892, "grad_norm": 4.060401439666748, "learning_rate": 8.080034148823332e-05, "loss": 0.27948029041290284, "step": 45530 }, { "epoch": 0.19551273795110893, "grad_norm": 0.030168868601322174, "learning_rate": 8.07960297681157e-05, "loss": 0.12020124197006225, "step": 45540 }, { "epoch": 0.19555567004112895, "grad_norm": 0.1102621778845787, "learning_rate": 8.079171804799807e-05, "loss": 0.2364802360534668, "step": 45550 }, { "epoch": 0.19559860213114894, "grad_norm": 1.1610252857208252, "learning_rate": 8.078740632788044e-05, "loss": 0.41650800704956054, "step": 45560 }, { "epoch": 0.19564153422116895, "grad_norm": 0.2328631728887558, "learning_rate": 8.078309460776282e-05, "loss": 0.2034066677093506, "step": 45570 }, { "epoch": 0.19568446631118896, "grad_norm": 0.0528264120221138, "learning_rate": 8.077878288764519e-05, "loss": 0.22113289833068847, "step": 45580 }, { "epoch": 0.19572739840120898, "grad_norm": 0.9496865272521973, "learning_rate": 8.077447116752757e-05, "loss": 0.15208523273468016, "step": 45590 }, { "epoch": 0.19577033049122897, "grad_norm": 2.1166505813598633, "learning_rate": 8.077015944740996e-05, "loss": 0.36841602325439454, "step": 45600 }, { "epoch": 0.19581326258124898, "grad_norm": 0.0011364739621058106, "learning_rate": 8.076584772729234e-05, "loss": 0.24271912574768068, "step": 45610 }, { "epoch": 0.195856194671269, "grad_norm": 0.042179618030786514, "learning_rate": 8.076153600717472e-05, "loss": 0.22032694816589354, "step": 45620 }, { "epoch": 0.19589912676128898, "grad_norm": 0.07593486458063126, "learning_rate": 8.075722428705709e-05, "loss": 0.1293318510055542, "step": 45630 }, { "epoch": 0.195942058851309, "grad_norm": 0.23231108486652374, "learning_rate": 8.075291256693946e-05, "loss": 0.30386996269226074, "step": 45640 }, { "epoch": 0.195984990941329, "grad_norm": 0.14182336628437042, "learning_rate": 8.074860084682183e-05, "loss": 0.2837024450302124, "step": 45650 }, { "epoch": 0.196027923031349, "grad_norm": 1.3760734796524048, "learning_rate": 8.074428912670421e-05, "loss": 0.5788982391357422, "step": 45660 }, { "epoch": 0.19607085512136901, "grad_norm": 0.14667318761348724, "learning_rate": 8.073997740658659e-05, "loss": 0.20872790813446046, "step": 45670 }, { "epoch": 0.19611378721138903, "grad_norm": 0.699029803276062, "learning_rate": 8.073566568646897e-05, "loss": 0.3040825605392456, "step": 45680 }, { "epoch": 0.19615671930140904, "grad_norm": 1.2123295068740845, "learning_rate": 8.073135396635134e-05, "loss": 0.33064205646514894, "step": 45690 }, { "epoch": 0.19619965139142903, "grad_norm": 0.019776469096541405, "learning_rate": 8.072704224623372e-05, "loss": 0.35395164489746095, "step": 45700 }, { "epoch": 0.19624258348144905, "grad_norm": 0.46024268865585327, "learning_rate": 8.07227305261161e-05, "loss": 0.18437256813049316, "step": 45710 }, { "epoch": 0.19628551557146906, "grad_norm": 2.247520923614502, "learning_rate": 8.071841880599846e-05, "loss": 0.48839592933654785, "step": 45720 }, { "epoch": 0.19632844766148905, "grad_norm": 0.6118911504745483, "learning_rate": 8.071410708588084e-05, "loss": 0.05164738297462464, "step": 45730 }, { "epoch": 0.19637137975150906, "grad_norm": 0.27519097924232483, "learning_rate": 8.070979536576322e-05, "loss": 0.33614468574523926, "step": 45740 }, { "epoch": 0.19641431184152908, "grad_norm": 2.999359130859375, "learning_rate": 8.07054836456456e-05, "loss": 0.27184247970581055, "step": 45750 }, { "epoch": 0.19645724393154906, "grad_norm": 0.006510626524686813, "learning_rate": 8.070117192552797e-05, "loss": 0.35111246109008787, "step": 45760 }, { "epoch": 0.19650017602156908, "grad_norm": 4.526912689208984, "learning_rate": 8.069686020541035e-05, "loss": 0.2468196392059326, "step": 45770 }, { "epoch": 0.1965431081115891, "grad_norm": 0.006337666884064674, "learning_rate": 8.069254848529273e-05, "loss": 0.2162738561630249, "step": 45780 }, { "epoch": 0.1965860402016091, "grad_norm": 0.05239209532737732, "learning_rate": 8.06882367651751e-05, "loss": 0.26372151374816893, "step": 45790 }, { "epoch": 0.1966289722916291, "grad_norm": 0.0049435412511229515, "learning_rate": 8.068392504505748e-05, "loss": 0.22712130546569825, "step": 45800 }, { "epoch": 0.1966719043816491, "grad_norm": 0.3726454973220825, "learning_rate": 8.067961332493984e-05, "loss": 0.41004314422607424, "step": 45810 }, { "epoch": 0.19671483647166912, "grad_norm": 0.12642237544059753, "learning_rate": 8.067530160482224e-05, "loss": 0.282349967956543, "step": 45820 }, { "epoch": 0.1967577685616891, "grad_norm": 0.10512173175811768, "learning_rate": 8.067098988470461e-05, "loss": 0.09284887313842774, "step": 45830 }, { "epoch": 0.19680070065170913, "grad_norm": 15.117431640625, "learning_rate": 8.066667816458699e-05, "loss": 0.21897082328796386, "step": 45840 }, { "epoch": 0.19684363274172914, "grad_norm": 3.6390509605407715, "learning_rate": 8.066236644446937e-05, "loss": 0.3061159610748291, "step": 45850 }, { "epoch": 0.19688656483174913, "grad_norm": 0.017141474410891533, "learning_rate": 8.065805472435174e-05, "loss": 0.13499906063079833, "step": 45860 }, { "epoch": 0.19692949692176914, "grad_norm": 0.03623701259493828, "learning_rate": 8.065374300423412e-05, "loss": 0.17264734506607055, "step": 45870 }, { "epoch": 0.19697242901178916, "grad_norm": 0.1779487431049347, "learning_rate": 8.06494312841165e-05, "loss": 0.3331061601638794, "step": 45880 }, { "epoch": 0.19701536110180914, "grad_norm": 0.05001157894730568, "learning_rate": 8.064511956399886e-05, "loss": 0.20098867416381835, "step": 45890 }, { "epoch": 0.19705829319182916, "grad_norm": 0.002003852277994156, "learning_rate": 8.064080784388124e-05, "loss": 0.09526382088661194, "step": 45900 }, { "epoch": 0.19710122528184917, "grad_norm": 3.046260118484497, "learning_rate": 8.063649612376362e-05, "loss": 0.34651336669921873, "step": 45910 }, { "epoch": 0.1971441573718692, "grad_norm": 0.08373679220676422, "learning_rate": 8.0632184403646e-05, "loss": 0.12939629554748536, "step": 45920 }, { "epoch": 0.19718708946188918, "grad_norm": 0.00509268743917346, "learning_rate": 8.062787268352837e-05, "loss": 0.2628002166748047, "step": 45930 }, { "epoch": 0.1972300215519092, "grad_norm": 0.006324404384940863, "learning_rate": 8.062356096341075e-05, "loss": 0.12146024703979492, "step": 45940 }, { "epoch": 0.1972729536419292, "grad_norm": 1.9545040130615234, "learning_rate": 8.061924924329313e-05, "loss": 0.31609747409820554, "step": 45950 }, { "epoch": 0.1973158857319492, "grad_norm": 0.3493014872074127, "learning_rate": 8.06149375231755e-05, "loss": 0.3573979139328003, "step": 45960 }, { "epoch": 0.1973588178219692, "grad_norm": 0.21159689128398895, "learning_rate": 8.061062580305787e-05, "loss": 0.39376125335693357, "step": 45970 }, { "epoch": 0.19740174991198922, "grad_norm": 0.6115752458572388, "learning_rate": 8.060631408294025e-05, "loss": 0.32958173751831055, "step": 45980 }, { "epoch": 0.1974446820020092, "grad_norm": 0.007062565069645643, "learning_rate": 8.060200236282262e-05, "loss": 0.40116105079650877, "step": 45990 }, { "epoch": 0.19748761409202922, "grad_norm": 3.6431825160980225, "learning_rate": 8.0597690642705e-05, "loss": 0.1228832483291626, "step": 46000 }, { "epoch": 0.19748761409202922, "eval_loss": 0.4566121995449066, "eval_runtime": 27.523, "eval_samples_per_second": 3.633, "eval_steps_per_second": 3.633, "step": 46000 }, { "epoch": 0.19753054618204924, "grad_norm": 1.6863194704055786, "learning_rate": 8.059337892258738e-05, "loss": 0.2996741533279419, "step": 46010 }, { "epoch": 0.19757347827206925, "grad_norm": 0.15534216165542603, "learning_rate": 8.058906720246976e-05, "loss": 0.22710778713226318, "step": 46020 }, { "epoch": 0.19761641036208924, "grad_norm": 1.8629953861236572, "learning_rate": 8.058475548235213e-05, "loss": 0.37529573440551756, "step": 46030 }, { "epoch": 0.19765934245210925, "grad_norm": 8.502998352050781, "learning_rate": 8.058044376223451e-05, "loss": 0.30512685775756837, "step": 46040 }, { "epoch": 0.19770227454212927, "grad_norm": 1.7396048307418823, "learning_rate": 8.057613204211689e-05, "loss": 0.2545022487640381, "step": 46050 }, { "epoch": 0.19774520663214926, "grad_norm": 0.018455829471349716, "learning_rate": 8.057182032199926e-05, "loss": 0.2788164377212524, "step": 46060 }, { "epoch": 0.19778813872216927, "grad_norm": 0.017023645341396332, "learning_rate": 8.056750860188164e-05, "loss": 0.11776541471481324, "step": 46070 }, { "epoch": 0.19783107081218929, "grad_norm": 0.05966558679938316, "learning_rate": 8.056319688176402e-05, "loss": 0.23098490238189698, "step": 46080 }, { "epoch": 0.19787400290220927, "grad_norm": 0.006063948851078749, "learning_rate": 8.05588851616464e-05, "loss": 0.500779104232788, "step": 46090 }, { "epoch": 0.1979169349922293, "grad_norm": 0.02215203270316124, "learning_rate": 8.055457344152877e-05, "loss": 0.2442542552947998, "step": 46100 }, { "epoch": 0.1979598670822493, "grad_norm": 0.5211237072944641, "learning_rate": 8.055026172141115e-05, "loss": 0.3820308446884155, "step": 46110 }, { "epoch": 0.19800279917226932, "grad_norm": 0.09570766985416412, "learning_rate": 8.054595000129353e-05, "loss": 0.30534040927886963, "step": 46120 }, { "epoch": 0.1980457312622893, "grad_norm": 2.1877946853637695, "learning_rate": 8.054163828117589e-05, "loss": 0.20296092033386232, "step": 46130 }, { "epoch": 0.19808866335230932, "grad_norm": 0.19210763275623322, "learning_rate": 8.053732656105827e-05, "loss": 0.004410789161920547, "step": 46140 }, { "epoch": 0.19813159544232933, "grad_norm": 9.662697792053223, "learning_rate": 8.053301484094065e-05, "loss": 0.28416242599487307, "step": 46150 }, { "epoch": 0.19817452753234932, "grad_norm": 6.199631214141846, "learning_rate": 8.052870312082302e-05, "loss": 0.43329510688781736, "step": 46160 }, { "epoch": 0.19821745962236934, "grad_norm": 2.9202873706817627, "learning_rate": 8.05243914007054e-05, "loss": 0.4505885124206543, "step": 46170 }, { "epoch": 0.19826039171238935, "grad_norm": 1.2385050058364868, "learning_rate": 8.052007968058778e-05, "loss": 0.3089656114578247, "step": 46180 }, { "epoch": 0.19830332380240934, "grad_norm": 0.002173739019781351, "learning_rate": 8.051576796047016e-05, "loss": 0.17922030687332152, "step": 46190 }, { "epoch": 0.19834625589242935, "grad_norm": 3.4676084518432617, "learning_rate": 8.051145624035253e-05, "loss": 0.28135807514190675, "step": 46200 }, { "epoch": 0.19838918798244937, "grad_norm": 1.0874375104904175, "learning_rate": 8.050714452023491e-05, "loss": 0.4869321346282959, "step": 46210 }, { "epoch": 0.19843212007246938, "grad_norm": 0.16380393505096436, "learning_rate": 8.050283280011727e-05, "loss": 0.36575796604156496, "step": 46220 }, { "epoch": 0.19847505216248937, "grad_norm": 0.09360513091087341, "learning_rate": 8.049852107999965e-05, "loss": 0.2708542585372925, "step": 46230 }, { "epoch": 0.19851798425250938, "grad_norm": 0.0363796204328537, "learning_rate": 8.049420935988203e-05, "loss": 0.324708080291748, "step": 46240 }, { "epoch": 0.1985609163425294, "grad_norm": 0.06994099169969559, "learning_rate": 8.048989763976441e-05, "loss": 0.25118064880371094, "step": 46250 }, { "epoch": 0.19860384843254938, "grad_norm": 2.9435386657714844, "learning_rate": 8.048558591964678e-05, "loss": 0.17363402843475342, "step": 46260 }, { "epoch": 0.1986467805225694, "grad_norm": 0.10383245348930359, "learning_rate": 8.048127419952916e-05, "loss": 0.0918418288230896, "step": 46270 }, { "epoch": 0.19868971261258941, "grad_norm": 0.03149556368589401, "learning_rate": 8.047696247941154e-05, "loss": 0.2588630199432373, "step": 46280 }, { "epoch": 0.1987326447026094, "grad_norm": 1.807033896446228, "learning_rate": 8.047265075929392e-05, "loss": 0.45576953887939453, "step": 46290 }, { "epoch": 0.19877557679262942, "grad_norm": 1.1389695405960083, "learning_rate": 8.04683390391763e-05, "loss": 0.23019568920135497, "step": 46300 }, { "epoch": 0.19881850888264943, "grad_norm": 0.5787996053695679, "learning_rate": 8.046402731905867e-05, "loss": 0.25288972854614256, "step": 46310 }, { "epoch": 0.19886144097266942, "grad_norm": 0.0006389050977304578, "learning_rate": 8.045971559894105e-05, "loss": 0.048640355467796326, "step": 46320 }, { "epoch": 0.19890437306268943, "grad_norm": 1.3906055688858032, "learning_rate": 8.045540387882343e-05, "loss": 0.17603120803833008, "step": 46330 }, { "epoch": 0.19894730515270945, "grad_norm": 0.7192102670669556, "learning_rate": 8.04510921587058e-05, "loss": 0.06997541189193726, "step": 46340 }, { "epoch": 0.19899023724272946, "grad_norm": 1.2777167558670044, "learning_rate": 8.044678043858818e-05, "loss": 0.1271460771560669, "step": 46350 }, { "epoch": 0.19903316933274945, "grad_norm": 0.14615023136138916, "learning_rate": 8.044246871847056e-05, "loss": 0.1982070803642273, "step": 46360 }, { "epoch": 0.19907610142276946, "grad_norm": 2.230384588241577, "learning_rate": 8.043815699835294e-05, "loss": 0.3583315372467041, "step": 46370 }, { "epoch": 0.19911903351278948, "grad_norm": 7.727930545806885, "learning_rate": 8.04338452782353e-05, "loss": 0.5741009712219238, "step": 46380 }, { "epoch": 0.19916196560280947, "grad_norm": 5.949665069580078, "learning_rate": 8.042953355811768e-05, "loss": 0.6069200038909912, "step": 46390 }, { "epoch": 0.19920489769282948, "grad_norm": 1.577398419380188, "learning_rate": 8.042522183800005e-05, "loss": 0.20890703201293945, "step": 46400 }, { "epoch": 0.1992478297828495, "grad_norm": 0.2532603442668915, "learning_rate": 8.042091011788243e-05, "loss": 0.22831459045410157, "step": 46410 }, { "epoch": 0.19929076187286948, "grad_norm": 0.25338953733444214, "learning_rate": 8.041659839776481e-05, "loss": 0.279406476020813, "step": 46420 }, { "epoch": 0.1993336939628895, "grad_norm": 0.10169660300016403, "learning_rate": 8.041228667764719e-05, "loss": 0.2838901996612549, "step": 46430 }, { "epoch": 0.1993766260529095, "grad_norm": 1.0535625219345093, "learning_rate": 8.040797495752956e-05, "loss": 0.09383861422538757, "step": 46440 }, { "epoch": 0.19941955814292953, "grad_norm": 0.002683205297216773, "learning_rate": 8.040366323741194e-05, "loss": 0.14773426055908204, "step": 46450 }, { "epoch": 0.1994624902329495, "grad_norm": 9.37723159790039, "learning_rate": 8.03993515172943e-05, "loss": 0.3080390691757202, "step": 46460 }, { "epoch": 0.19950542232296953, "grad_norm": 0.5413791537284851, "learning_rate": 8.039503979717668e-05, "loss": 0.3077335596084595, "step": 46470 }, { "epoch": 0.19954835441298954, "grad_norm": 0.00279863178730011, "learning_rate": 8.039072807705906e-05, "loss": 0.21013128757476807, "step": 46480 }, { "epoch": 0.19959128650300953, "grad_norm": 0.6538106203079224, "learning_rate": 8.038641635694144e-05, "loss": 0.23853034973144532, "step": 46490 }, { "epoch": 0.19963421859302954, "grad_norm": 1.3677074909210205, "learning_rate": 8.038210463682381e-05, "loss": 0.34636821746826174, "step": 46500 }, { "epoch": 0.19967715068304956, "grad_norm": 0.2341076135635376, "learning_rate": 8.037779291670619e-05, "loss": 0.2113889217376709, "step": 46510 }, { "epoch": 0.19972008277306955, "grad_norm": 0.06166021525859833, "learning_rate": 8.037348119658857e-05, "loss": 0.14099152088165284, "step": 46520 }, { "epoch": 0.19976301486308956, "grad_norm": 5.088956356048584, "learning_rate": 8.036916947647095e-05, "loss": 0.32044038772583006, "step": 46530 }, { "epoch": 0.19980594695310958, "grad_norm": 0.05206717550754547, "learning_rate": 8.036485775635332e-05, "loss": 0.050091874599456784, "step": 46540 }, { "epoch": 0.1998488790431296, "grad_norm": 0.015598599798977375, "learning_rate": 8.03605460362357e-05, "loss": 0.13630733489990235, "step": 46550 }, { "epoch": 0.19989181113314958, "grad_norm": 1.6684378385543823, "learning_rate": 8.035623431611808e-05, "loss": 0.16195347309112548, "step": 46560 }, { "epoch": 0.1999347432231696, "grad_norm": 0.1458691656589508, "learning_rate": 8.035192259600045e-05, "loss": 0.27073700428009034, "step": 46570 }, { "epoch": 0.1999776753131896, "grad_norm": 0.6499917507171631, "learning_rate": 8.034761087588283e-05, "loss": 0.24764013290405273, "step": 46580 }, { "epoch": 0.2000206074032096, "grad_norm": 0.0752706378698349, "learning_rate": 8.034329915576521e-05, "loss": 0.26877541542053224, "step": 46590 }, { "epoch": 0.2000635394932296, "grad_norm": 2.6770122051239014, "learning_rate": 8.033898743564759e-05, "loss": 0.2864994049072266, "step": 46600 }, { "epoch": 0.20010647158324962, "grad_norm": 0.03613373264670372, "learning_rate": 8.033467571552996e-05, "loss": 0.12052092552185059, "step": 46610 }, { "epoch": 0.2001494036732696, "grad_norm": 0.0742354616522789, "learning_rate": 8.033036399541234e-05, "loss": 0.25440454483032227, "step": 46620 }, { "epoch": 0.20019233576328962, "grad_norm": 0.05916782096028328, "learning_rate": 8.03260522752947e-05, "loss": 0.08873311877250671, "step": 46630 }, { "epoch": 0.20023526785330964, "grad_norm": 0.015279405750334263, "learning_rate": 8.032174055517708e-05, "loss": 0.3313145637512207, "step": 46640 }, { "epoch": 0.20027819994332965, "grad_norm": 0.27925458550453186, "learning_rate": 8.031742883505946e-05, "loss": 0.3915241718292236, "step": 46650 }, { "epoch": 0.20032113203334964, "grad_norm": 0.05769550800323486, "learning_rate": 8.031311711494184e-05, "loss": 0.06632600426673889, "step": 46660 }, { "epoch": 0.20036406412336966, "grad_norm": 0.20242290198802948, "learning_rate": 8.030880539482421e-05, "loss": 0.0038001593202352524, "step": 46670 }, { "epoch": 0.20040699621338967, "grad_norm": 1.7053717374801636, "learning_rate": 8.030449367470659e-05, "loss": 0.12617350816726686, "step": 46680 }, { "epoch": 0.20044992830340966, "grad_norm": 0.10039971768856049, "learning_rate": 8.030018195458897e-05, "loss": 0.39122159481048585, "step": 46690 }, { "epoch": 0.20049286039342967, "grad_norm": 3.4095280170440674, "learning_rate": 8.029587023447135e-05, "loss": 0.14573875665664673, "step": 46700 }, { "epoch": 0.2005357924834497, "grad_norm": 0.5203276872634888, "learning_rate": 8.029155851435371e-05, "loss": 0.37523181438446046, "step": 46710 }, { "epoch": 0.20057872457346967, "grad_norm": 0.043886229395866394, "learning_rate": 8.028724679423609e-05, "loss": 0.27476327419281005, "step": 46720 }, { "epoch": 0.2006216566634897, "grad_norm": 0.0021896434482187033, "learning_rate": 8.028293507411847e-05, "loss": 0.16201258897781373, "step": 46730 }, { "epoch": 0.2006645887535097, "grad_norm": 0.07912927120923996, "learning_rate": 8.027862335400084e-05, "loss": 0.1216330885887146, "step": 46740 }, { "epoch": 0.2007075208435297, "grad_norm": 3.0536763668060303, "learning_rate": 8.027431163388322e-05, "loss": 0.20984141826629638, "step": 46750 }, { "epoch": 0.2007504529335497, "grad_norm": 0.001347496872767806, "learning_rate": 8.02699999137656e-05, "loss": 0.2748946905136108, "step": 46760 }, { "epoch": 0.20079338502356972, "grad_norm": 1.7918879985809326, "learning_rate": 8.026568819364797e-05, "loss": 0.3410180568695068, "step": 46770 }, { "epoch": 0.20083631711358974, "grad_norm": 2.6103463172912598, "learning_rate": 8.026137647353035e-05, "loss": 0.36960854530334475, "step": 46780 }, { "epoch": 0.20087924920360972, "grad_norm": 0.000803425966296345, "learning_rate": 8.025706475341273e-05, "loss": 0.0754163384437561, "step": 46790 }, { "epoch": 0.20092218129362974, "grad_norm": 2.1734330654144287, "learning_rate": 8.02527530332951e-05, "loss": 0.3364823579788208, "step": 46800 }, { "epoch": 0.20096511338364975, "grad_norm": 0.031898133456707, "learning_rate": 8.024844131317748e-05, "loss": 0.15868821144104003, "step": 46810 }, { "epoch": 0.20100804547366974, "grad_norm": 0.46334612369537354, "learning_rate": 8.024412959305986e-05, "loss": 0.1317250609397888, "step": 46820 }, { "epoch": 0.20105097756368975, "grad_norm": 5.54944372177124, "learning_rate": 8.023981787294224e-05, "loss": 0.26093254089355467, "step": 46830 }, { "epoch": 0.20109390965370977, "grad_norm": 0.004121364559978247, "learning_rate": 8.023550615282462e-05, "loss": 0.33663909435272216, "step": 46840 }, { "epoch": 0.20113684174372976, "grad_norm": 1.8446154594421387, "learning_rate": 8.0231194432707e-05, "loss": 0.4318349361419678, "step": 46850 }, { "epoch": 0.20117977383374977, "grad_norm": 0.3243059515953064, "learning_rate": 8.022688271258937e-05, "loss": 0.1755039095878601, "step": 46860 }, { "epoch": 0.20122270592376978, "grad_norm": 0.0003927726356778294, "learning_rate": 8.022257099247173e-05, "loss": 0.15141257047653198, "step": 46870 }, { "epoch": 0.2012656380137898, "grad_norm": 0.06570783257484436, "learning_rate": 8.021825927235411e-05, "loss": 0.04607608914375305, "step": 46880 }, { "epoch": 0.2013085701038098, "grad_norm": 0.037901636213064194, "learning_rate": 8.021394755223649e-05, "loss": 0.18285064697265624, "step": 46890 }, { "epoch": 0.2013515021938298, "grad_norm": 0.009038684889674187, "learning_rate": 8.020963583211887e-05, "loss": 0.3561609983444214, "step": 46900 }, { "epoch": 0.20139443428384982, "grad_norm": 0.007474920246750116, "learning_rate": 8.020532411200124e-05, "loss": 0.16592724323272706, "step": 46910 }, { "epoch": 0.2014373663738698, "grad_norm": 2.1837422847747803, "learning_rate": 8.020101239188362e-05, "loss": 0.23149852752685546, "step": 46920 }, { "epoch": 0.20148029846388982, "grad_norm": 0.005031005013734102, "learning_rate": 8.0196700671766e-05, "loss": 0.35479423999786375, "step": 46930 }, { "epoch": 0.20152323055390983, "grad_norm": 5.587767601013184, "learning_rate": 8.019238895164838e-05, "loss": 0.25224766731262205, "step": 46940 }, { "epoch": 0.20156616264392982, "grad_norm": 0.10802139341831207, "learning_rate": 8.018807723153075e-05, "loss": 0.3127424240112305, "step": 46950 }, { "epoch": 0.20160909473394983, "grad_norm": 2.2037267684936523, "learning_rate": 8.018376551141312e-05, "loss": 0.1420138955116272, "step": 46960 }, { "epoch": 0.20165202682396985, "grad_norm": 3.9398605823516846, "learning_rate": 8.01794537912955e-05, "loss": 0.3298606872558594, "step": 46970 }, { "epoch": 0.20169495891398986, "grad_norm": 1.5411865711212158, "learning_rate": 8.017514207117787e-05, "loss": 0.4772407054901123, "step": 46980 }, { "epoch": 0.20173789100400985, "grad_norm": 4.572746753692627, "learning_rate": 8.017083035106025e-05, "loss": 0.16311756372451783, "step": 46990 }, { "epoch": 0.20178082309402987, "grad_norm": 0.0878385528922081, "learning_rate": 8.016651863094263e-05, "loss": 0.032969492673873904, "step": 47000 }, { "epoch": 0.20178082309402987, "eval_loss": 0.4502008557319641, "eval_runtime": 27.5462, "eval_samples_per_second": 3.63, "eval_steps_per_second": 3.63, "step": 47000 }, { "epoch": 0.20182375518404988, "grad_norm": 2.5271151065826416, "learning_rate": 8.016220691082502e-05, "loss": 0.08027942180633545, "step": 47010 }, { "epoch": 0.20186668727406987, "grad_norm": 5.350552082061768, "learning_rate": 8.01578951907074e-05, "loss": 0.3245659112930298, "step": 47020 }, { "epoch": 0.20190961936408988, "grad_norm": 0.009119064547121525, "learning_rate": 8.015358347058977e-05, "loss": 0.23723113536834717, "step": 47030 }, { "epoch": 0.2019525514541099, "grad_norm": 0.005342130549252033, "learning_rate": 8.014927175047214e-05, "loss": 0.32947022914886476, "step": 47040 }, { "epoch": 0.20199548354412988, "grad_norm": 2.6926896572113037, "learning_rate": 8.014496003035451e-05, "loss": 0.24790339469909667, "step": 47050 }, { "epoch": 0.2020384156341499, "grad_norm": 2.7984976768493652, "learning_rate": 8.014064831023689e-05, "loss": 0.41055974960327146, "step": 47060 }, { "epoch": 0.2020813477241699, "grad_norm": 1.719354271888733, "learning_rate": 8.013633659011927e-05, "loss": 0.3034384727478027, "step": 47070 }, { "epoch": 0.20212427981418993, "grad_norm": 1.4050602912902832, "learning_rate": 8.013202487000165e-05, "loss": 0.4208504676818848, "step": 47080 }, { "epoch": 0.20216721190420991, "grad_norm": 2.7856647968292236, "learning_rate": 8.012771314988402e-05, "loss": 0.21937999725341797, "step": 47090 }, { "epoch": 0.20221014399422993, "grad_norm": 5.089998722076416, "learning_rate": 8.01234014297664e-05, "loss": 0.41013269424438475, "step": 47100 }, { "epoch": 0.20225307608424994, "grad_norm": 1.1964845657348633, "learning_rate": 8.011908970964878e-05, "loss": 0.3722221374511719, "step": 47110 }, { "epoch": 0.20229600817426993, "grad_norm": 0.4336620271205902, "learning_rate": 8.011477798953114e-05, "loss": 0.1501123547554016, "step": 47120 }, { "epoch": 0.20233894026428995, "grad_norm": 3.1869595050811768, "learning_rate": 8.011046626941352e-05, "loss": 0.3358729839324951, "step": 47130 }, { "epoch": 0.20238187235430996, "grad_norm": 0.2135515958070755, "learning_rate": 8.01061545492959e-05, "loss": 0.3324007987976074, "step": 47140 }, { "epoch": 0.20242480444432995, "grad_norm": 2.570157289505005, "learning_rate": 8.010184282917827e-05, "loss": 0.25740299224853513, "step": 47150 }, { "epoch": 0.20246773653434996, "grad_norm": 1.2199064493179321, "learning_rate": 8.009753110906065e-05, "loss": 0.33509321212768556, "step": 47160 }, { "epoch": 0.20251066862436998, "grad_norm": 16.68321990966797, "learning_rate": 8.009321938894303e-05, "loss": 0.16514809131622316, "step": 47170 }, { "epoch": 0.20255360071438996, "grad_norm": 0.36014726758003235, "learning_rate": 8.00889076688254e-05, "loss": 0.16338274478912354, "step": 47180 }, { "epoch": 0.20259653280440998, "grad_norm": 0.06622673571109772, "learning_rate": 8.008459594870778e-05, "loss": 0.1958828091621399, "step": 47190 }, { "epoch": 0.20263946489443, "grad_norm": 1.5034043788909912, "learning_rate": 8.008028422859015e-05, "loss": 0.07967668771743774, "step": 47200 }, { "epoch": 0.20268239698445, "grad_norm": 0.004219403024762869, "learning_rate": 8.007597250847252e-05, "loss": 0.17253371477127075, "step": 47210 }, { "epoch": 0.20272532907447, "grad_norm": 1.8605031967163086, "learning_rate": 8.00716607883549e-05, "loss": 0.30866689682006837, "step": 47220 }, { "epoch": 0.20276826116449, "grad_norm": 0.007136452943086624, "learning_rate": 8.006734906823729e-05, "loss": 0.09472379088401794, "step": 47230 }, { "epoch": 0.20281119325451002, "grad_norm": 0.31589406728744507, "learning_rate": 8.006303734811967e-05, "loss": 0.1566672444343567, "step": 47240 }, { "epoch": 0.20285412534453, "grad_norm": 0.6907039284706116, "learning_rate": 8.005872562800205e-05, "loss": 0.24461641311645507, "step": 47250 }, { "epoch": 0.20289705743455003, "grad_norm": 0.014545142650604248, "learning_rate": 8.005441390788442e-05, "loss": 0.19249277114868163, "step": 47260 }, { "epoch": 0.20293998952457004, "grad_norm": 0.11242897063493729, "learning_rate": 8.00501021877668e-05, "loss": 0.19730584621429442, "step": 47270 }, { "epoch": 0.20298292161459003, "grad_norm": 7.016503810882568, "learning_rate": 8.004579046764918e-05, "loss": 0.3570088863372803, "step": 47280 }, { "epoch": 0.20302585370461004, "grad_norm": 0.21266454458236694, "learning_rate": 8.004147874753154e-05, "loss": 0.16338672637939453, "step": 47290 }, { "epoch": 0.20306878579463006, "grad_norm": 0.05758526921272278, "learning_rate": 8.003716702741392e-05, "loss": 0.25838274955749513, "step": 47300 }, { "epoch": 0.20311171788465007, "grad_norm": 0.19154588878154755, "learning_rate": 8.00328553072963e-05, "loss": 0.11227208375930786, "step": 47310 }, { "epoch": 0.20315464997467006, "grad_norm": 0.09942365437746048, "learning_rate": 8.002854358717867e-05, "loss": 0.2624546527862549, "step": 47320 }, { "epoch": 0.20319758206469007, "grad_norm": 1.0413005352020264, "learning_rate": 8.002423186706105e-05, "loss": 0.26270604133605957, "step": 47330 }, { "epoch": 0.2032405141547101, "grad_norm": 6.982503414154053, "learning_rate": 8.001992014694343e-05, "loss": 0.4072850704193115, "step": 47340 }, { "epoch": 0.20328344624473008, "grad_norm": 2.139129877090454, "learning_rate": 8.00156084268258e-05, "loss": 0.23219172954559325, "step": 47350 }, { "epoch": 0.2033263783347501, "grad_norm": 2.4974660873413086, "learning_rate": 8.001129670670818e-05, "loss": 0.388437819480896, "step": 47360 }, { "epoch": 0.2033693104247701, "grad_norm": 1.4110585451126099, "learning_rate": 8.000698498659055e-05, "loss": 0.3643111944198608, "step": 47370 }, { "epoch": 0.2034122425147901, "grad_norm": 0.06494349241256714, "learning_rate": 8.000267326647292e-05, "loss": 0.434461784362793, "step": 47380 }, { "epoch": 0.2034551746048101, "grad_norm": 2.335343599319458, "learning_rate": 7.99983615463553e-05, "loss": 0.15915360450744628, "step": 47390 }, { "epoch": 0.20349810669483012, "grad_norm": 0.8555454611778259, "learning_rate": 7.999404982623768e-05, "loss": 0.278378963470459, "step": 47400 }, { "epoch": 0.20354103878485014, "grad_norm": 0.016613325104117393, "learning_rate": 7.998973810612006e-05, "loss": 0.28450276851654055, "step": 47410 }, { "epoch": 0.20358397087487012, "grad_norm": 2.753087043762207, "learning_rate": 7.998542638600243e-05, "loss": 0.4102034091949463, "step": 47420 }, { "epoch": 0.20362690296489014, "grad_norm": 0.015861574560403824, "learning_rate": 7.998111466588481e-05, "loss": 0.20828280448913575, "step": 47430 }, { "epoch": 0.20366983505491015, "grad_norm": 0.007401157170534134, "learning_rate": 7.997680294576719e-05, "loss": 0.22065355777740478, "step": 47440 }, { "epoch": 0.20371276714493014, "grad_norm": 0.12466619908809662, "learning_rate": 7.997249122564957e-05, "loss": 0.24476051330566406, "step": 47450 }, { "epoch": 0.20375569923495016, "grad_norm": 1.485001802444458, "learning_rate": 7.996817950553194e-05, "loss": 0.3162158727645874, "step": 47460 }, { "epoch": 0.20379863132497017, "grad_norm": 0.3976515829563141, "learning_rate": 7.996386778541432e-05, "loss": 0.2629575490951538, "step": 47470 }, { "epoch": 0.20384156341499016, "grad_norm": 0.6344526410102844, "learning_rate": 7.99595560652967e-05, "loss": 0.3330104112625122, "step": 47480 }, { "epoch": 0.20388449550501017, "grad_norm": 0.02858610637485981, "learning_rate": 7.995524434517908e-05, "loss": 0.14648046493530273, "step": 47490 }, { "epoch": 0.2039274275950302, "grad_norm": 0.2695685029029846, "learning_rate": 7.995093262506145e-05, "loss": 0.32228114604949953, "step": 47500 }, { "epoch": 0.2039703596850502, "grad_norm": 0.3541853129863739, "learning_rate": 7.994662090494383e-05, "loss": 0.30073845386505127, "step": 47510 }, { "epoch": 0.2040132917750702, "grad_norm": 1.344916820526123, "learning_rate": 7.994230918482621e-05, "loss": 0.3133988857269287, "step": 47520 }, { "epoch": 0.2040562238650902, "grad_norm": 7.818868160247803, "learning_rate": 7.993799746470857e-05, "loss": 0.18457093238830566, "step": 47530 }, { "epoch": 0.20409915595511022, "grad_norm": 0.05128054320812225, "learning_rate": 7.993368574459095e-05, "loss": 0.205673885345459, "step": 47540 }, { "epoch": 0.2041420880451302, "grad_norm": 3.529289960861206, "learning_rate": 7.992937402447333e-05, "loss": 0.242510986328125, "step": 47550 }, { "epoch": 0.20418502013515022, "grad_norm": 2.2482082843780518, "learning_rate": 7.99250623043557e-05, "loss": 0.2730981111526489, "step": 47560 }, { "epoch": 0.20422795222517023, "grad_norm": 2.0920770168304443, "learning_rate": 7.992075058423808e-05, "loss": 0.19865950345993041, "step": 47570 }, { "epoch": 0.20427088431519022, "grad_norm": 0.25885283946990967, "learning_rate": 7.991643886412046e-05, "loss": 0.2900604009628296, "step": 47580 }, { "epoch": 0.20431381640521024, "grad_norm": 0.07762018591165543, "learning_rate": 7.991212714400284e-05, "loss": 0.08530986309051514, "step": 47590 }, { "epoch": 0.20435674849523025, "grad_norm": 4.367431640625, "learning_rate": 7.990781542388521e-05, "loss": 0.2783456563949585, "step": 47600 }, { "epoch": 0.20439968058525024, "grad_norm": 2.544442892074585, "learning_rate": 7.990350370376758e-05, "loss": 0.15141881704330445, "step": 47610 }, { "epoch": 0.20444261267527025, "grad_norm": 1.1436129808425903, "learning_rate": 7.989919198364995e-05, "loss": 0.3077658176422119, "step": 47620 }, { "epoch": 0.20448554476529027, "grad_norm": 0.8395715355873108, "learning_rate": 7.989488026353233e-05, "loss": 0.2867176294326782, "step": 47630 }, { "epoch": 0.20452847685531028, "grad_norm": 3.702817440032959, "learning_rate": 7.989056854341471e-05, "loss": 0.5052554130554199, "step": 47640 }, { "epoch": 0.20457140894533027, "grad_norm": 0.28353357315063477, "learning_rate": 7.988625682329709e-05, "loss": 0.12239044904708862, "step": 47650 }, { "epoch": 0.20461434103535028, "grad_norm": 0.5833907723426819, "learning_rate": 7.988194510317946e-05, "loss": 0.322785758972168, "step": 47660 }, { "epoch": 0.2046572731253703, "grad_norm": 0.173982173204422, "learning_rate": 7.987763338306184e-05, "loss": 0.19359149932861328, "step": 47670 }, { "epoch": 0.20470020521539029, "grad_norm": 0.8384180665016174, "learning_rate": 7.987332166294422e-05, "loss": 0.11619726419448853, "step": 47680 }, { "epoch": 0.2047431373054103, "grad_norm": 0.6231016516685486, "learning_rate": 7.98690099428266e-05, "loss": 0.1496443510055542, "step": 47690 }, { "epoch": 0.20478606939543031, "grad_norm": 0.0853211060166359, "learning_rate": 7.986469822270897e-05, "loss": 0.3600142002105713, "step": 47700 }, { "epoch": 0.2048290014854503, "grad_norm": 0.2062334269285202, "learning_rate": 7.986038650259135e-05, "loss": 0.23907241821289063, "step": 47710 }, { "epoch": 0.20487193357547032, "grad_norm": 0.02482936903834343, "learning_rate": 7.985607478247373e-05, "loss": 0.029480090737342833, "step": 47720 }, { "epoch": 0.20491486566549033, "grad_norm": 0.01601765677332878, "learning_rate": 7.98517630623561e-05, "loss": 0.19208526611328125, "step": 47730 }, { "epoch": 0.20495779775551035, "grad_norm": 0.06261662393808365, "learning_rate": 7.984745134223848e-05, "loss": 0.24382977485656737, "step": 47740 }, { "epoch": 0.20500072984553033, "grad_norm": 0.025355610996484756, "learning_rate": 7.984313962212086e-05, "loss": 0.1378118634223938, "step": 47750 }, { "epoch": 0.20504366193555035, "grad_norm": 0.8861377239227295, "learning_rate": 7.983882790200324e-05, "loss": 0.26656255722045896, "step": 47760 }, { "epoch": 0.20508659402557036, "grad_norm": 0.013460827060043812, "learning_rate": 7.983451618188561e-05, "loss": 0.3513129234313965, "step": 47770 }, { "epoch": 0.20512952611559035, "grad_norm": 0.061860062181949615, "learning_rate": 7.983020446176798e-05, "loss": 0.23082308769226073, "step": 47780 }, { "epoch": 0.20517245820561036, "grad_norm": 24.27768325805664, "learning_rate": 7.982589274165036e-05, "loss": 0.47048091888427734, "step": 47790 }, { "epoch": 0.20521539029563038, "grad_norm": 4.150501251220703, "learning_rate": 7.982158102153273e-05, "loss": 0.25462424755096436, "step": 47800 }, { "epoch": 0.20525832238565037, "grad_norm": 0.03856050595641136, "learning_rate": 7.981726930141511e-05, "loss": 0.44991393089294435, "step": 47810 }, { "epoch": 0.20530125447567038, "grad_norm": 0.11038416624069214, "learning_rate": 7.981295758129749e-05, "loss": 0.16237845420837402, "step": 47820 }, { "epoch": 0.2053441865656904, "grad_norm": 0.07443532347679138, "learning_rate": 7.980864586117986e-05, "loss": 0.15313451290130614, "step": 47830 }, { "epoch": 0.2053871186557104, "grad_norm": 0.5222668647766113, "learning_rate": 7.980433414106224e-05, "loss": 0.15321272611618042, "step": 47840 }, { "epoch": 0.2054300507457304, "grad_norm": 0.038755644112825394, "learning_rate": 7.980002242094462e-05, "loss": 0.29925990104675293, "step": 47850 }, { "epoch": 0.2054729828357504, "grad_norm": 0.08253604918718338, "learning_rate": 7.979571070082698e-05, "loss": 0.13226243257522582, "step": 47860 }, { "epoch": 0.20551591492577043, "grad_norm": 2.4042482376098633, "learning_rate": 7.979139898070936e-05, "loss": 0.22691683769226073, "step": 47870 }, { "epoch": 0.2055588470157904, "grad_norm": 1.6290377378463745, "learning_rate": 7.978708726059174e-05, "loss": 0.15348259210586548, "step": 47880 }, { "epoch": 0.20560177910581043, "grad_norm": 0.04041058570146561, "learning_rate": 7.978277554047412e-05, "loss": 0.32028086185455323, "step": 47890 }, { "epoch": 0.20564471119583044, "grad_norm": 10.279193878173828, "learning_rate": 7.977846382035649e-05, "loss": 0.16360957622528077, "step": 47900 }, { "epoch": 0.20568764328585043, "grad_norm": 0.9638367295265198, "learning_rate": 7.977415210023887e-05, "loss": 0.2235480308532715, "step": 47910 }, { "epoch": 0.20573057537587044, "grad_norm": 3.813462495803833, "learning_rate": 7.976984038012125e-05, "loss": 0.24002406597137452, "step": 47920 }, { "epoch": 0.20577350746589046, "grad_norm": 2.7070462703704834, "learning_rate": 7.976552866000362e-05, "loss": 0.12926928997039794, "step": 47930 }, { "epoch": 0.20581643955591047, "grad_norm": 1.2344295978546143, "learning_rate": 7.9761216939886e-05, "loss": 0.3576169490814209, "step": 47940 }, { "epoch": 0.20585937164593046, "grad_norm": 0.0029906737618148327, "learning_rate": 7.975690521976838e-05, "loss": 0.0424612283706665, "step": 47950 }, { "epoch": 0.20590230373595048, "grad_norm": 0.02020765095949173, "learning_rate": 7.975259349965076e-05, "loss": 0.30192534923553466, "step": 47960 }, { "epoch": 0.2059452358259705, "grad_norm": 0.011675640009343624, "learning_rate": 7.974828177953313e-05, "loss": 0.2707512617111206, "step": 47970 }, { "epoch": 0.20598816791599048, "grad_norm": 1.3537681102752686, "learning_rate": 7.974397005941551e-05, "loss": 0.4337340831756592, "step": 47980 }, { "epoch": 0.2060311000060105, "grad_norm": 0.001996510662138462, "learning_rate": 7.973965833929789e-05, "loss": 0.26091752052307127, "step": 47990 }, { "epoch": 0.2060740320960305, "grad_norm": 0.003297002287581563, "learning_rate": 7.973534661918027e-05, "loss": 0.21362035274505614, "step": 48000 }, { "epoch": 0.2060740320960305, "eval_loss": 0.46487903594970703, "eval_runtime": 27.53, "eval_samples_per_second": 3.632, "eval_steps_per_second": 3.632, "step": 48000 }, { "epoch": 0.2061169641860505, "grad_norm": 1.6414936780929565, "learning_rate": 7.973103489906264e-05, "loss": 0.28532023429870607, "step": 48010 }, { "epoch": 0.2061598962760705, "grad_norm": 0.18141505122184753, "learning_rate": 7.972672317894502e-05, "loss": 0.07151886224746704, "step": 48020 }, { "epoch": 0.20620282836609052, "grad_norm": 0.007624962832778692, "learning_rate": 7.972241145882738e-05, "loss": 0.2982606887817383, "step": 48030 }, { "epoch": 0.2062457604561105, "grad_norm": 1.486085057258606, "learning_rate": 7.971809973870976e-05, "loss": 0.29765031337738035, "step": 48040 }, { "epoch": 0.20628869254613053, "grad_norm": 0.03330766037106514, "learning_rate": 7.971378801859214e-05, "loss": 0.3565992832183838, "step": 48050 }, { "epoch": 0.20633162463615054, "grad_norm": 0.002912812400609255, "learning_rate": 7.970947629847452e-05, "loss": 0.047986358404159546, "step": 48060 }, { "epoch": 0.20637455672617055, "grad_norm": 0.053293656557798386, "learning_rate": 7.97051645783569e-05, "loss": 0.08486682176589966, "step": 48070 }, { "epoch": 0.20641748881619054, "grad_norm": 0.6024067997932434, "learning_rate": 7.970085285823927e-05, "loss": 0.04551941752433777, "step": 48080 }, { "epoch": 0.20646042090621056, "grad_norm": 1.5999566316604614, "learning_rate": 7.969654113812165e-05, "loss": 0.21173477172851562, "step": 48090 }, { "epoch": 0.20650335299623057, "grad_norm": 5.380755424499512, "learning_rate": 7.969222941800403e-05, "loss": 0.21389317512512207, "step": 48100 }, { "epoch": 0.20654628508625056, "grad_norm": 1.6914737224578857, "learning_rate": 7.968791769788639e-05, "loss": 0.1295459032058716, "step": 48110 }, { "epoch": 0.20658921717627057, "grad_norm": 2.0489678382873535, "learning_rate": 7.968360597776877e-05, "loss": 0.3609046459197998, "step": 48120 }, { "epoch": 0.2066321492662906, "grad_norm": 0.09325380623340607, "learning_rate": 7.967929425765114e-05, "loss": 0.31286661624908446, "step": 48130 }, { "epoch": 0.20667508135631057, "grad_norm": 0.002050831215456128, "learning_rate": 7.967498253753352e-05, "loss": 0.08834596276283264, "step": 48140 }, { "epoch": 0.2067180134463306, "grad_norm": 1.406714677810669, "learning_rate": 7.96706708174159e-05, "loss": 0.2861358165740967, "step": 48150 }, { "epoch": 0.2067609455363506, "grad_norm": 1.750779151916504, "learning_rate": 7.966635909729828e-05, "loss": 0.2869063138961792, "step": 48160 }, { "epoch": 0.20680387762637062, "grad_norm": 0.0026512339245527983, "learning_rate": 7.966204737718065e-05, "loss": 0.028093031048774718, "step": 48170 }, { "epoch": 0.2068468097163906, "grad_norm": 0.6258159875869751, "learning_rate": 7.965773565706303e-05, "loss": 0.4752193450927734, "step": 48180 }, { "epoch": 0.20688974180641062, "grad_norm": 0.0013247814495116472, "learning_rate": 7.965342393694541e-05, "loss": 0.3037768840789795, "step": 48190 }, { "epoch": 0.20693267389643064, "grad_norm": 0.012789330445230007, "learning_rate": 7.964911221682779e-05, "loss": 0.21984691619873048, "step": 48200 }, { "epoch": 0.20697560598645062, "grad_norm": 0.0047979154624044895, "learning_rate": 7.964480049671016e-05, "loss": 0.26316077709198, "step": 48210 }, { "epoch": 0.20701853807647064, "grad_norm": 0.025677207857370377, "learning_rate": 7.964048877659254e-05, "loss": 0.15255630016326904, "step": 48220 }, { "epoch": 0.20706147016649065, "grad_norm": 2.580648183822632, "learning_rate": 7.963617705647492e-05, "loss": 0.45435514450073244, "step": 48230 }, { "epoch": 0.20710440225651064, "grad_norm": 0.012255949899554253, "learning_rate": 7.96318653363573e-05, "loss": 0.20427842140197755, "step": 48240 }, { "epoch": 0.20714733434653065, "grad_norm": 0.17026881873607635, "learning_rate": 7.962755361623967e-05, "loss": 0.07440086603164672, "step": 48250 }, { "epoch": 0.20719026643655067, "grad_norm": 2.2256710529327393, "learning_rate": 7.962324189612205e-05, "loss": 0.1965106248855591, "step": 48260 }, { "epoch": 0.20723319852657068, "grad_norm": 0.021333398297429085, "learning_rate": 7.961893017600441e-05, "loss": 0.06148759126663208, "step": 48270 }, { "epoch": 0.20727613061659067, "grad_norm": 0.03702492266893387, "learning_rate": 7.961461845588679e-05, "loss": 0.19015427827835082, "step": 48280 }, { "epoch": 0.20731906270661069, "grad_norm": 0.015393697656691074, "learning_rate": 7.961030673576917e-05, "loss": 0.2020171880722046, "step": 48290 }, { "epoch": 0.2073619947966307, "grad_norm": 0.0388808436691761, "learning_rate": 7.960599501565155e-05, "loss": 0.025816604495048523, "step": 48300 }, { "epoch": 0.2074049268866507, "grad_norm": 0.2894894778728485, "learning_rate": 7.960168329553392e-05, "loss": 0.17407466173171998, "step": 48310 }, { "epoch": 0.2074478589766707, "grad_norm": 5.096240043640137, "learning_rate": 7.95973715754163e-05, "loss": 0.3877495050430298, "step": 48320 }, { "epoch": 0.20749079106669072, "grad_norm": 0.0010817173169925809, "learning_rate": 7.959305985529868e-05, "loss": 0.11308754682540893, "step": 48330 }, { "epoch": 0.2075337231567107, "grad_norm": 1.192091703414917, "learning_rate": 7.958874813518105e-05, "loss": 0.6353956699371338, "step": 48340 }, { "epoch": 0.20757665524673072, "grad_norm": 1.444043517112732, "learning_rate": 7.958443641506343e-05, "loss": 0.10261656045913696, "step": 48350 }, { "epoch": 0.20761958733675073, "grad_norm": 0.001375035266391933, "learning_rate": 7.95801246949458e-05, "loss": 0.21776161193847657, "step": 48360 }, { "epoch": 0.20766251942677075, "grad_norm": 0.9765628576278687, "learning_rate": 7.957581297482817e-05, "loss": 0.3838587522506714, "step": 48370 }, { "epoch": 0.20770545151679073, "grad_norm": 2.84651780128479, "learning_rate": 7.957150125471055e-05, "loss": 0.287298583984375, "step": 48380 }, { "epoch": 0.20774838360681075, "grad_norm": 0.068606436252594, "learning_rate": 7.956718953459293e-05, "loss": 0.11763962507247924, "step": 48390 }, { "epoch": 0.20779131569683076, "grad_norm": 1.5584431886672974, "learning_rate": 7.95628778144753e-05, "loss": 0.03175153732299805, "step": 48400 }, { "epoch": 0.20783424778685075, "grad_norm": 0.10847458988428116, "learning_rate": 7.955856609435768e-05, "loss": 0.12701599597930907, "step": 48410 }, { "epoch": 0.20787717987687077, "grad_norm": 0.516201913356781, "learning_rate": 7.955425437424007e-05, "loss": 0.2435328722000122, "step": 48420 }, { "epoch": 0.20792011196689078, "grad_norm": 0.09531274437904358, "learning_rate": 7.954994265412245e-05, "loss": 0.17576621770858764, "step": 48430 }, { "epoch": 0.20796304405691077, "grad_norm": 3.824319839477539, "learning_rate": 7.954563093400481e-05, "loss": 0.31635844707489014, "step": 48440 }, { "epoch": 0.20800597614693078, "grad_norm": 9.462091445922852, "learning_rate": 7.954131921388719e-05, "loss": 0.1305789351463318, "step": 48450 }, { "epoch": 0.2080489082369508, "grad_norm": 1.0754421949386597, "learning_rate": 7.953700749376957e-05, "loss": 0.359479284286499, "step": 48460 }, { "epoch": 0.20809184032697078, "grad_norm": 1.957236647605896, "learning_rate": 7.953269577365195e-05, "loss": 0.3150218963623047, "step": 48470 }, { "epoch": 0.2081347724169908, "grad_norm": 0.002105705440044403, "learning_rate": 7.952838405353432e-05, "loss": 0.1954740524291992, "step": 48480 }, { "epoch": 0.2081777045070108, "grad_norm": 12.278348922729492, "learning_rate": 7.95240723334167e-05, "loss": 0.28711376190185545, "step": 48490 }, { "epoch": 0.20822063659703083, "grad_norm": 2.2454352378845215, "learning_rate": 7.951976061329908e-05, "loss": 0.2067957878112793, "step": 48500 }, { "epoch": 0.20826356868705082, "grad_norm": 5.584324836730957, "learning_rate": 7.951544889318146e-05, "loss": 0.3211017847061157, "step": 48510 }, { "epoch": 0.20830650077707083, "grad_norm": 0.7228730916976929, "learning_rate": 7.951113717306382e-05, "loss": 0.3107947826385498, "step": 48520 }, { "epoch": 0.20834943286709084, "grad_norm": 0.015868451446294785, "learning_rate": 7.95068254529462e-05, "loss": 0.2286367654800415, "step": 48530 }, { "epoch": 0.20839236495711083, "grad_norm": 1.4947551488876343, "learning_rate": 7.950251373282857e-05, "loss": 0.25593032836914065, "step": 48540 }, { "epoch": 0.20843529704713085, "grad_norm": 0.027004824951291084, "learning_rate": 7.949820201271095e-05, "loss": 0.03589800000190735, "step": 48550 }, { "epoch": 0.20847822913715086, "grad_norm": 1.1982914209365845, "learning_rate": 7.949389029259333e-05, "loss": 0.24310851097106934, "step": 48560 }, { "epoch": 0.20852116122717085, "grad_norm": 7.410946846008301, "learning_rate": 7.94895785724757e-05, "loss": 0.16178617477416993, "step": 48570 }, { "epoch": 0.20856409331719086, "grad_norm": 0.0017027149442583323, "learning_rate": 7.948526685235808e-05, "loss": 0.36240453720092775, "step": 48580 }, { "epoch": 0.20860702540721088, "grad_norm": 0.210503488779068, "learning_rate": 7.948095513224046e-05, "loss": 0.2611191749572754, "step": 48590 }, { "epoch": 0.2086499574972309, "grad_norm": 0.11781711131334305, "learning_rate": 7.947664341212283e-05, "loss": 0.19964562654495238, "step": 48600 }, { "epoch": 0.20869288958725088, "grad_norm": 0.006439481396228075, "learning_rate": 7.94723316920052e-05, "loss": 0.17323254346847533, "step": 48610 }, { "epoch": 0.2087358216772709, "grad_norm": 1.1420397758483887, "learning_rate": 7.946801997188758e-05, "loss": 0.23409998416900635, "step": 48620 }, { "epoch": 0.2087787537672909, "grad_norm": 0.12256089597940445, "learning_rate": 7.946370825176996e-05, "loss": 0.17933051586151122, "step": 48630 }, { "epoch": 0.2088216858573109, "grad_norm": 0.063877634704113, "learning_rate": 7.945939653165235e-05, "loss": 0.181551992893219, "step": 48640 }, { "epoch": 0.2088646179473309, "grad_norm": 0.5018807649612427, "learning_rate": 7.945508481153473e-05, "loss": 0.1766904592514038, "step": 48650 }, { "epoch": 0.20890755003735093, "grad_norm": 0.2746643126010895, "learning_rate": 7.94507730914171e-05, "loss": 0.11217392683029175, "step": 48660 }, { "epoch": 0.2089504821273709, "grad_norm": 7.02359676361084, "learning_rate": 7.944646137129948e-05, "loss": 0.3691625833511353, "step": 48670 }, { "epoch": 0.20899341421739093, "grad_norm": 0.07516085356473923, "learning_rate": 7.944214965118184e-05, "loss": 0.263259220123291, "step": 48680 }, { "epoch": 0.20903634630741094, "grad_norm": 0.17022983729839325, "learning_rate": 7.943783793106422e-05, "loss": 0.2957159996032715, "step": 48690 }, { "epoch": 0.20907927839743096, "grad_norm": 0.14971184730529785, "learning_rate": 7.94335262109466e-05, "loss": 0.3658233880996704, "step": 48700 }, { "epoch": 0.20912221048745094, "grad_norm": 0.29934176802635193, "learning_rate": 7.942921449082898e-05, "loss": 0.03634549081325531, "step": 48710 }, { "epoch": 0.20916514257747096, "grad_norm": 0.04933731257915497, "learning_rate": 7.942490277071135e-05, "loss": 0.1716094732284546, "step": 48720 }, { "epoch": 0.20920807466749097, "grad_norm": 1.1368640661239624, "learning_rate": 7.942059105059373e-05, "loss": 0.12027144432067871, "step": 48730 }, { "epoch": 0.20925100675751096, "grad_norm": 0.014941312372684479, "learning_rate": 7.941627933047611e-05, "loss": 0.13583672046661377, "step": 48740 }, { "epoch": 0.20929393884753097, "grad_norm": 0.12930569052696228, "learning_rate": 7.941196761035849e-05, "loss": 0.1166748046875, "step": 48750 }, { "epoch": 0.209336870937551, "grad_norm": 0.1529303342103958, "learning_rate": 7.940765589024086e-05, "loss": 0.17531336545944215, "step": 48760 }, { "epoch": 0.20937980302757098, "grad_norm": 2.075014114379883, "learning_rate": 7.940334417012323e-05, "loss": 0.45829410552978517, "step": 48770 }, { "epoch": 0.209422735117591, "grad_norm": 1.3795547485351562, "learning_rate": 7.93990324500056e-05, "loss": 0.16225976943969728, "step": 48780 }, { "epoch": 0.209465667207611, "grad_norm": 5.576574325561523, "learning_rate": 7.939472072988798e-05, "loss": 0.16702204942703247, "step": 48790 }, { "epoch": 0.20950859929763102, "grad_norm": 1.667356014251709, "learning_rate": 7.939040900977036e-05, "loss": 0.5205566883087158, "step": 48800 }, { "epoch": 0.209551531387651, "grad_norm": 0.20947889983654022, "learning_rate": 7.938609728965274e-05, "loss": 0.03103659749031067, "step": 48810 }, { "epoch": 0.20959446347767102, "grad_norm": 0.021028542891144753, "learning_rate": 7.938178556953511e-05, "loss": 0.12524155378341675, "step": 48820 }, { "epoch": 0.20963739556769104, "grad_norm": 19.887067794799805, "learning_rate": 7.937747384941749e-05, "loss": 0.4347548007965088, "step": 48830 }, { "epoch": 0.20968032765771102, "grad_norm": 0.003681926289573312, "learning_rate": 7.937316212929987e-05, "loss": 0.09043388366699219, "step": 48840 }, { "epoch": 0.20972325974773104, "grad_norm": 0.1534494310617447, "learning_rate": 7.936885040918223e-05, "loss": 0.2118082046508789, "step": 48850 }, { "epoch": 0.20976619183775105, "grad_norm": 4.88729190826416, "learning_rate": 7.936453868906462e-05, "loss": 0.4548500061035156, "step": 48860 }, { "epoch": 0.20980912392777104, "grad_norm": 2.5274174213409424, "learning_rate": 7.9360226968947e-05, "loss": 0.1766461491584778, "step": 48870 }, { "epoch": 0.20985205601779106, "grad_norm": 1.0588138103485107, "learning_rate": 7.935591524882938e-05, "loss": 0.06500946283340454, "step": 48880 }, { "epoch": 0.20989498810781107, "grad_norm": 0.6156249642372131, "learning_rate": 7.935160352871175e-05, "loss": 0.32105815410614014, "step": 48890 }, { "epoch": 0.20993792019783106, "grad_norm": 0.002214090432971716, "learning_rate": 7.934729180859413e-05, "loss": 0.05551689863204956, "step": 48900 }, { "epoch": 0.20998085228785107, "grad_norm": 1.122273564338684, "learning_rate": 7.934298008847651e-05, "loss": 0.18426462411880493, "step": 48910 }, { "epoch": 0.2100237843778711, "grad_norm": 0.9584456086158752, "learning_rate": 7.933866836835889e-05, "loss": 0.3217077970504761, "step": 48920 }, { "epoch": 0.2100667164678911, "grad_norm": 15.653519630432129, "learning_rate": 7.933435664824125e-05, "loss": 0.3382516860961914, "step": 48930 }, { "epoch": 0.2101096485579111, "grad_norm": 0.024164466187357903, "learning_rate": 7.933004492812363e-05, "loss": 0.2831050157546997, "step": 48940 }, { "epoch": 0.2101525806479311, "grad_norm": 0.6815698742866516, "learning_rate": 7.9325733208006e-05, "loss": 0.17481757402420045, "step": 48950 }, { "epoch": 0.21019551273795112, "grad_norm": 0.1216021403670311, "learning_rate": 7.932142148788838e-05, "loss": 0.2150895595550537, "step": 48960 }, { "epoch": 0.2102384448279711, "grad_norm": 0.11215940117835999, "learning_rate": 7.931710976777076e-05, "loss": 0.005117279291152954, "step": 48970 }, { "epoch": 0.21028137691799112, "grad_norm": 0.006101830396801233, "learning_rate": 7.931279804765314e-05, "loss": 0.04583222866058349, "step": 48980 }, { "epoch": 0.21032430900801113, "grad_norm": 0.2531506419181824, "learning_rate": 7.930848632753551e-05, "loss": 0.08759585618972779, "step": 48990 }, { "epoch": 0.21036724109803112, "grad_norm": 0.005882403813302517, "learning_rate": 7.930417460741789e-05, "loss": 0.2158358573913574, "step": 49000 }, { "epoch": 0.21036724109803112, "eval_loss": 0.44679224491119385, "eval_runtime": 27.4542, "eval_samples_per_second": 3.642, "eval_steps_per_second": 3.642, "step": 49000 }, { "epoch": 0.21041017318805114, "grad_norm": 0.05826570838689804, "learning_rate": 7.929986288730026e-05, "loss": 0.2593266248703003, "step": 49010 }, { "epoch": 0.21045310527807115, "grad_norm": 2.3180344104766846, "learning_rate": 7.929555116718263e-05, "loss": 0.487042760848999, "step": 49020 }, { "epoch": 0.21049603736809117, "grad_norm": 0.07739551365375519, "learning_rate": 7.929123944706501e-05, "loss": 0.13294532299041747, "step": 49030 }, { "epoch": 0.21053896945811115, "grad_norm": 0.03116386942565441, "learning_rate": 7.928692772694739e-05, "loss": 0.24828364849090576, "step": 49040 }, { "epoch": 0.21058190154813117, "grad_norm": 0.9739252924919128, "learning_rate": 7.928261600682976e-05, "loss": 0.3101269960403442, "step": 49050 }, { "epoch": 0.21062483363815118, "grad_norm": 1.394478678703308, "learning_rate": 7.927830428671214e-05, "loss": 0.12690937519073486, "step": 49060 }, { "epoch": 0.21066776572817117, "grad_norm": 0.045092012733221054, "learning_rate": 7.927399256659452e-05, "loss": 0.1577238082885742, "step": 49070 }, { "epoch": 0.21071069781819118, "grad_norm": 0.9610133767127991, "learning_rate": 7.92696808464769e-05, "loss": 0.26647014617919923, "step": 49080 }, { "epoch": 0.2107536299082112, "grad_norm": 0.04043450951576233, "learning_rate": 7.926536912635927e-05, "loss": 0.20672433376312255, "step": 49090 }, { "epoch": 0.21079656199823119, "grad_norm": 0.27440470457077026, "learning_rate": 7.926105740624165e-05, "loss": 0.19790778160095215, "step": 49100 }, { "epoch": 0.2108394940882512, "grad_norm": 0.011567025445401669, "learning_rate": 7.925674568612403e-05, "loss": 0.2572205066680908, "step": 49110 }, { "epoch": 0.21088242617827122, "grad_norm": 0.020354948937892914, "learning_rate": 7.92524339660064e-05, "loss": 0.15365511178970337, "step": 49120 }, { "epoch": 0.21092535826829123, "grad_norm": 0.011306763626635075, "learning_rate": 7.924812224588878e-05, "loss": 0.20448198318481445, "step": 49130 }, { "epoch": 0.21096829035831122, "grad_norm": 0.03681041672825813, "learning_rate": 7.924381052577116e-05, "loss": 0.1980149745941162, "step": 49140 }, { "epoch": 0.21101122244833123, "grad_norm": 1.6105586290359497, "learning_rate": 7.923949880565354e-05, "loss": 0.38074944019317625, "step": 49150 }, { "epoch": 0.21105415453835125, "grad_norm": 0.0796269029378891, "learning_rate": 7.923518708553592e-05, "loss": 0.19975688457489013, "step": 49160 }, { "epoch": 0.21109708662837123, "grad_norm": 0.9233969449996948, "learning_rate": 7.923087536541829e-05, "loss": 0.4761190414428711, "step": 49170 }, { "epoch": 0.21114001871839125, "grad_norm": 0.034418150782585144, "learning_rate": 7.922656364530066e-05, "loss": 0.02614889442920685, "step": 49180 }, { "epoch": 0.21118295080841126, "grad_norm": 2.6076526641845703, "learning_rate": 7.922225192518303e-05, "loss": 0.10714485645294189, "step": 49190 }, { "epoch": 0.21122588289843125, "grad_norm": 5.970362186431885, "learning_rate": 7.921794020506541e-05, "loss": 0.3683354616165161, "step": 49200 }, { "epoch": 0.21126881498845126, "grad_norm": 0.006423947401344776, "learning_rate": 7.921362848494779e-05, "loss": 0.23473081588745118, "step": 49210 }, { "epoch": 0.21131174707847128, "grad_norm": 1.2695651054382324, "learning_rate": 7.920931676483017e-05, "loss": 0.12597305774688722, "step": 49220 }, { "epoch": 0.2113546791684913, "grad_norm": 6.1923909187316895, "learning_rate": 7.920500504471254e-05, "loss": 0.20782461166381835, "step": 49230 }, { "epoch": 0.21139761125851128, "grad_norm": 0.12185300886631012, "learning_rate": 7.920069332459492e-05, "loss": 0.36885390281677244, "step": 49240 }, { "epoch": 0.2114405433485313, "grad_norm": 1.584035873413086, "learning_rate": 7.91963816044773e-05, "loss": 0.19908725023269652, "step": 49250 }, { "epoch": 0.2114834754385513, "grad_norm": 5.6183271408081055, "learning_rate": 7.919206988435966e-05, "loss": 0.22750062942504884, "step": 49260 }, { "epoch": 0.2115264075285713, "grad_norm": 0.7646486759185791, "learning_rate": 7.918775816424204e-05, "loss": 0.15708142518997192, "step": 49270 }, { "epoch": 0.2115693396185913, "grad_norm": 4.9651360511779785, "learning_rate": 7.918344644412442e-05, "loss": 0.33751084804534914, "step": 49280 }, { "epoch": 0.21161227170861133, "grad_norm": 0.23684753477573395, "learning_rate": 7.91791347240068e-05, "loss": 0.18519192934036255, "step": 49290 }, { "epoch": 0.21165520379863131, "grad_norm": 0.07096804678440094, "learning_rate": 7.917482300388917e-05, "loss": 0.47649459838867186, "step": 49300 }, { "epoch": 0.21169813588865133, "grad_norm": 0.22593224048614502, "learning_rate": 7.917051128377155e-05, "loss": 0.06051828265190125, "step": 49310 }, { "epoch": 0.21174106797867134, "grad_norm": 0.11356212198734283, "learning_rate": 7.916619956365393e-05, "loss": 0.3678733348846436, "step": 49320 }, { "epoch": 0.21178400006869133, "grad_norm": 0.17641082406044006, "learning_rate": 7.91618878435363e-05, "loss": 0.14328333139419555, "step": 49330 }, { "epoch": 0.21182693215871135, "grad_norm": 0.1647193282842636, "learning_rate": 7.915757612341868e-05, "loss": 0.200999116897583, "step": 49340 }, { "epoch": 0.21186986424873136, "grad_norm": 0.04526711627840996, "learning_rate": 7.915326440330106e-05, "loss": 0.23270890712738038, "step": 49350 }, { "epoch": 0.21191279633875137, "grad_norm": 1.2517226934432983, "learning_rate": 7.914895268318344e-05, "loss": 0.3122552394866943, "step": 49360 }, { "epoch": 0.21195572842877136, "grad_norm": 5.0854363441467285, "learning_rate": 7.914464096306581e-05, "loss": 0.2330098867416382, "step": 49370 }, { "epoch": 0.21199866051879138, "grad_norm": 1.9728223085403442, "learning_rate": 7.914032924294819e-05, "loss": 0.17663989067077637, "step": 49380 }, { "epoch": 0.2120415926088114, "grad_norm": 7.4587082862854, "learning_rate": 7.913601752283057e-05, "loss": 0.34292399883270264, "step": 49390 }, { "epoch": 0.21208452469883138, "grad_norm": 1.4386489391326904, "learning_rate": 7.913170580271294e-05, "loss": 0.21951770782470703, "step": 49400 }, { "epoch": 0.2121274567888514, "grad_norm": 0.9777666926383972, "learning_rate": 7.912739408259532e-05, "loss": 0.3396186828613281, "step": 49410 }, { "epoch": 0.2121703888788714, "grad_norm": 0.07708708941936493, "learning_rate": 7.912308236247769e-05, "loss": 0.16404324769973755, "step": 49420 }, { "epoch": 0.2122133209688914, "grad_norm": 0.13779999315738678, "learning_rate": 7.911877064236006e-05, "loss": 0.08597908020019532, "step": 49430 }, { "epoch": 0.2122562530589114, "grad_norm": 0.14609119296073914, "learning_rate": 7.911445892224244e-05, "loss": 0.08698570728302002, "step": 49440 }, { "epoch": 0.21229918514893142, "grad_norm": 1.8430030345916748, "learning_rate": 7.911014720212482e-05, "loss": 0.3098950147628784, "step": 49450 }, { "epoch": 0.21234211723895144, "grad_norm": 0.020075861364603043, "learning_rate": 7.91058354820072e-05, "loss": 0.3637603044509888, "step": 49460 }, { "epoch": 0.21238504932897143, "grad_norm": 2.2435684204101562, "learning_rate": 7.910152376188957e-05, "loss": 0.2776512861251831, "step": 49470 }, { "epoch": 0.21242798141899144, "grad_norm": 0.01798534207046032, "learning_rate": 7.909721204177195e-05, "loss": 0.06801215410232545, "step": 49480 }, { "epoch": 0.21247091350901146, "grad_norm": 1.8715423345565796, "learning_rate": 7.909290032165433e-05, "loss": 0.19331105947494506, "step": 49490 }, { "epoch": 0.21251384559903144, "grad_norm": 2.531074047088623, "learning_rate": 7.90885886015367e-05, "loss": 0.36147854328155515, "step": 49500 }, { "epoch": 0.21255677768905146, "grad_norm": 0.11733974516391754, "learning_rate": 7.908427688141907e-05, "loss": 0.07555552721023559, "step": 49510 }, { "epoch": 0.21259970977907147, "grad_norm": 1.9904005527496338, "learning_rate": 7.907996516130145e-05, "loss": 0.2713768720626831, "step": 49520 }, { "epoch": 0.21264264186909146, "grad_norm": 2.1456587314605713, "learning_rate": 7.907565344118382e-05, "loss": 0.11105477809906006, "step": 49530 }, { "epoch": 0.21268557395911147, "grad_norm": 0.11958852410316467, "learning_rate": 7.90713417210662e-05, "loss": 0.07827035188674927, "step": 49540 }, { "epoch": 0.2127285060491315, "grad_norm": 1.6413260698318481, "learning_rate": 7.906703000094858e-05, "loss": 0.33407251834869384, "step": 49550 }, { "epoch": 0.2127714381391515, "grad_norm": 0.09317167103290558, "learning_rate": 7.906271828083096e-05, "loss": 0.3209323167800903, "step": 49560 }, { "epoch": 0.2128143702291715, "grad_norm": 0.28604403138160706, "learning_rate": 7.905840656071333e-05, "loss": 0.34248862266540525, "step": 49570 }, { "epoch": 0.2128573023191915, "grad_norm": 0.20116518437862396, "learning_rate": 7.905409484059571e-05, "loss": 0.2403996467590332, "step": 49580 }, { "epoch": 0.21290023440921152, "grad_norm": 0.008949404582381248, "learning_rate": 7.904978312047809e-05, "loss": 0.21593551635742186, "step": 49590 }, { "epoch": 0.2129431664992315, "grad_norm": 0.48243486881256104, "learning_rate": 7.904547140036046e-05, "loss": 0.26777894496917726, "step": 49600 }, { "epoch": 0.21298609858925152, "grad_norm": 0.39803624153137207, "learning_rate": 7.904115968024284e-05, "loss": 0.3349623680114746, "step": 49610 }, { "epoch": 0.21302903067927154, "grad_norm": 0.3646968901157379, "learning_rate": 7.903684796012522e-05, "loss": 0.14400217533111573, "step": 49620 }, { "epoch": 0.21307196276929152, "grad_norm": 0.3641029894351959, "learning_rate": 7.90325362400076e-05, "loss": 0.1390596866607666, "step": 49630 }, { "epoch": 0.21311489485931154, "grad_norm": 0.07330887019634247, "learning_rate": 7.902822451988997e-05, "loss": 0.21187987327575683, "step": 49640 }, { "epoch": 0.21315782694933155, "grad_norm": 0.018047798424959183, "learning_rate": 7.902391279977235e-05, "loss": 0.07641729712486267, "step": 49650 }, { "epoch": 0.21320075903935157, "grad_norm": 4.7064056396484375, "learning_rate": 7.901960107965473e-05, "loss": 0.15568535327911376, "step": 49660 }, { "epoch": 0.21324369112937155, "grad_norm": 1.504948616027832, "learning_rate": 7.901528935953709e-05, "loss": 0.39885008335113525, "step": 49670 }, { "epoch": 0.21328662321939157, "grad_norm": 1.1078081130981445, "learning_rate": 7.901097763941947e-05, "loss": 0.2893169164657593, "step": 49680 }, { "epoch": 0.21332955530941158, "grad_norm": 0.1008782684803009, "learning_rate": 7.900666591930185e-05, "loss": 0.15592665672302247, "step": 49690 }, { "epoch": 0.21337248739943157, "grad_norm": 0.4503695070743561, "learning_rate": 7.900235419918422e-05, "loss": 0.3227583646774292, "step": 49700 }, { "epoch": 0.21341541948945159, "grad_norm": 0.0678926333785057, "learning_rate": 7.89980424790666e-05, "loss": 0.21938278675079345, "step": 49710 }, { "epoch": 0.2134583515794716, "grad_norm": 8.68582534790039, "learning_rate": 7.899373075894898e-05, "loss": 0.27250258922576903, "step": 49720 }, { "epoch": 0.2135012836694916, "grad_norm": 3.055732011795044, "learning_rate": 7.898941903883136e-05, "loss": 0.07037267684936524, "step": 49730 }, { "epoch": 0.2135442157595116, "grad_norm": 1.5734680891036987, "learning_rate": 7.898510731871373e-05, "loss": 0.19788484573364257, "step": 49740 }, { "epoch": 0.21358714784953162, "grad_norm": 1.193137288093567, "learning_rate": 7.89807955985961e-05, "loss": 0.4758903026580811, "step": 49750 }, { "epoch": 0.2136300799395516, "grad_norm": 4.80873441696167, "learning_rate": 7.897648387847847e-05, "loss": 0.40531315803527834, "step": 49760 }, { "epoch": 0.21367301202957162, "grad_norm": 0.38741931319236755, "learning_rate": 7.897217215836085e-05, "loss": 0.035053136944770816, "step": 49770 }, { "epoch": 0.21371594411959163, "grad_norm": 0.19839559495449066, "learning_rate": 7.896786043824323e-05, "loss": 0.2304708480834961, "step": 49780 }, { "epoch": 0.21375887620961165, "grad_norm": 2.424915313720703, "learning_rate": 7.896354871812561e-05, "loss": 0.21277079582214356, "step": 49790 }, { "epoch": 0.21380180829963163, "grad_norm": 1.4982346296310425, "learning_rate": 7.895923699800798e-05, "loss": 0.14394724369049072, "step": 49800 }, { "epoch": 0.21384474038965165, "grad_norm": 0.07861235737800598, "learning_rate": 7.895492527789036e-05, "loss": 0.301897668838501, "step": 49810 }, { "epoch": 0.21388767247967166, "grad_norm": 0.12516728043556213, "learning_rate": 7.895061355777275e-05, "loss": 0.2530207633972168, "step": 49820 }, { "epoch": 0.21393060456969165, "grad_norm": 0.12502363324165344, "learning_rate": 7.894630183765513e-05, "loss": 0.29697988033294676, "step": 49830 }, { "epoch": 0.21397353665971167, "grad_norm": 2.6676998138427734, "learning_rate": 7.89419901175375e-05, "loss": 0.22099952697753905, "step": 49840 }, { "epoch": 0.21401646874973168, "grad_norm": 0.7491877675056458, "learning_rate": 7.893767839741987e-05, "loss": 0.24905600547790527, "step": 49850 }, { "epoch": 0.21405940083975167, "grad_norm": 2.600252866744995, "learning_rate": 7.893336667730225e-05, "loss": 0.2663122177124023, "step": 49860 }, { "epoch": 0.21410233292977168, "grad_norm": 0.0847255140542984, "learning_rate": 7.892905495718463e-05, "loss": 0.19019591808319092, "step": 49870 }, { "epoch": 0.2141452650197917, "grad_norm": 0.29786983132362366, "learning_rate": 7.8924743237067e-05, "loss": 0.3501892566680908, "step": 49880 }, { "epoch": 0.2141881971098117, "grad_norm": 0.028485197573900223, "learning_rate": 7.892043151694938e-05, "loss": 0.20272502899169922, "step": 49890 }, { "epoch": 0.2142311291998317, "grad_norm": 0.3347903788089752, "learning_rate": 7.891611979683176e-05, "loss": 0.23856263160705565, "step": 49900 }, { "epoch": 0.2142740612898517, "grad_norm": 0.2699025273323059, "learning_rate": 7.891180807671413e-05, "loss": 0.44274020195007324, "step": 49910 }, { "epoch": 0.21431699337987173, "grad_norm": 2.3436944484710693, "learning_rate": 7.89074963565965e-05, "loss": 0.19832544326782225, "step": 49920 }, { "epoch": 0.21435992546989172, "grad_norm": 0.07889475673437119, "learning_rate": 7.890318463647888e-05, "loss": 0.261244535446167, "step": 49930 }, { "epoch": 0.21440285755991173, "grad_norm": 0.9712432026863098, "learning_rate": 7.889887291636125e-05, "loss": 0.12264488935470581, "step": 49940 }, { "epoch": 0.21444578964993175, "grad_norm": 0.015600843355059624, "learning_rate": 7.889456119624363e-05, "loss": 0.12793511152267456, "step": 49950 }, { "epoch": 0.21448872173995173, "grad_norm": 1.866066813468933, "learning_rate": 7.889024947612601e-05, "loss": 0.14210785627365113, "step": 49960 }, { "epoch": 0.21453165382997175, "grad_norm": 0.024746781215071678, "learning_rate": 7.888593775600839e-05, "loss": 0.0965128481388092, "step": 49970 }, { "epoch": 0.21457458591999176, "grad_norm": 5.134984970092773, "learning_rate": 7.888162603589076e-05, "loss": 0.2880469560623169, "step": 49980 }, { "epoch": 0.21461751801001178, "grad_norm": 0.09031741321086884, "learning_rate": 7.887731431577314e-05, "loss": 0.24112091064453126, "step": 49990 }, { "epoch": 0.21466045010003176, "grad_norm": 2.713540554046631, "learning_rate": 7.88730025956555e-05, "loss": 0.20801658630371095, "step": 50000 }, { "epoch": 0.21466045010003176, "eval_loss": 0.4581822454929352, "eval_runtime": 27.4412, "eval_samples_per_second": 3.644, "eval_steps_per_second": 3.644, "step": 50000 }, { "epoch": 0.21470338219005178, "grad_norm": 0.019549906253814697, "learning_rate": 7.886869087553788e-05, "loss": 0.38978161811828616, "step": 50010 }, { "epoch": 0.2147463142800718, "grad_norm": 0.023972727358341217, "learning_rate": 7.886437915542026e-05, "loss": 0.1592766284942627, "step": 50020 }, { "epoch": 0.21478924637009178, "grad_norm": 0.38454845547676086, "learning_rate": 7.886006743530264e-05, "loss": 0.14972034692764283, "step": 50030 }, { "epoch": 0.2148321784601118, "grad_norm": 0.04076255112886429, "learning_rate": 7.885575571518503e-05, "loss": 0.1856519103050232, "step": 50040 }, { "epoch": 0.2148751105501318, "grad_norm": 1.6896454095840454, "learning_rate": 7.88514439950674e-05, "loss": 0.20486097335815429, "step": 50050 }, { "epoch": 0.2149180426401518, "grad_norm": 2.6618430614471436, "learning_rate": 7.884713227494978e-05, "loss": 0.13558430671691896, "step": 50060 }, { "epoch": 0.2149609747301718, "grad_norm": 0.027252428233623505, "learning_rate": 7.884282055483216e-05, "loss": 0.1477035403251648, "step": 50070 }, { "epoch": 0.21500390682019183, "grad_norm": 3.012190580368042, "learning_rate": 7.883850883471452e-05, "loss": 0.3262253046035767, "step": 50080 }, { "epoch": 0.21504683891021184, "grad_norm": 3.021601438522339, "learning_rate": 7.88341971145969e-05, "loss": 0.20172638893127443, "step": 50090 }, { "epoch": 0.21508977100023183, "grad_norm": 0.03279409557580948, "learning_rate": 7.882988539447928e-05, "loss": 0.16631543636322021, "step": 50100 }, { "epoch": 0.21513270309025184, "grad_norm": 2.786000967025757, "learning_rate": 7.882557367436165e-05, "loss": 0.27362570762634275, "step": 50110 }, { "epoch": 0.21517563518027186, "grad_norm": 1.3738958835601807, "learning_rate": 7.882126195424403e-05, "loss": 0.3563390254974365, "step": 50120 }, { "epoch": 0.21521856727029184, "grad_norm": 0.10527623444795609, "learning_rate": 7.881695023412641e-05, "loss": 0.29485454559326174, "step": 50130 }, { "epoch": 0.21526149936031186, "grad_norm": 0.037768494337797165, "learning_rate": 7.881263851400879e-05, "loss": 0.317557168006897, "step": 50140 }, { "epoch": 0.21530443145033187, "grad_norm": 0.04559774324297905, "learning_rate": 7.880832679389116e-05, "loss": 0.1404987096786499, "step": 50150 }, { "epoch": 0.21534736354035186, "grad_norm": 2.323525905609131, "learning_rate": 7.880401507377354e-05, "loss": 0.19473350048065186, "step": 50160 }, { "epoch": 0.21539029563037188, "grad_norm": 0.11371485143899918, "learning_rate": 7.87997033536559e-05, "loss": 0.32835702896118163, "step": 50170 }, { "epoch": 0.2154332277203919, "grad_norm": 0.05827485769987106, "learning_rate": 7.879539163353828e-05, "loss": 0.2388458251953125, "step": 50180 }, { "epoch": 0.21547615981041188, "grad_norm": 0.2014988660812378, "learning_rate": 7.879107991342066e-05, "loss": 0.16453293561935425, "step": 50190 }, { "epoch": 0.2155190919004319, "grad_norm": 0.03847496211528778, "learning_rate": 7.878676819330304e-05, "loss": 0.22010433673858643, "step": 50200 }, { "epoch": 0.2155620239904519, "grad_norm": 0.2672198414802551, "learning_rate": 7.878245647318541e-05, "loss": 0.24380803108215332, "step": 50210 }, { "epoch": 0.21560495608047192, "grad_norm": 32.0848388671875, "learning_rate": 7.877814475306779e-05, "loss": 0.12520097494125365, "step": 50220 }, { "epoch": 0.2156478881704919, "grad_norm": 0.7219252586364746, "learning_rate": 7.877383303295017e-05, "loss": 0.3051914691925049, "step": 50230 }, { "epoch": 0.21569082026051192, "grad_norm": 0.18801699578762054, "learning_rate": 7.876952131283255e-05, "loss": 0.35453855991363525, "step": 50240 }, { "epoch": 0.21573375235053194, "grad_norm": 5.704795837402344, "learning_rate": 7.876520959271491e-05, "loss": 0.4324212074279785, "step": 50250 }, { "epoch": 0.21577668444055192, "grad_norm": 9.323872566223145, "learning_rate": 7.87608978725973e-05, "loss": 0.3148200273513794, "step": 50260 }, { "epoch": 0.21581961653057194, "grad_norm": 0.027610288932919502, "learning_rate": 7.875658615247968e-05, "loss": 0.32387876510620117, "step": 50270 }, { "epoch": 0.21586254862059195, "grad_norm": 0.2569706439971924, "learning_rate": 7.875227443236206e-05, "loss": 0.2268768310546875, "step": 50280 }, { "epoch": 0.21590548071061194, "grad_norm": 2.0304300785064697, "learning_rate": 7.874796271224443e-05, "loss": 0.16933313608169556, "step": 50290 }, { "epoch": 0.21594841280063196, "grad_norm": 5.008077621459961, "learning_rate": 7.874365099212681e-05, "loss": 0.20965242385864258, "step": 50300 }, { "epoch": 0.21599134489065197, "grad_norm": 0.01447244081646204, "learning_rate": 7.873933927200919e-05, "loss": 0.16726946830749512, "step": 50310 }, { "epoch": 0.21603427698067199, "grad_norm": 0.36545318365097046, "learning_rate": 7.873502755189157e-05, "loss": 0.10592725276947021, "step": 50320 }, { "epoch": 0.21607720907069197, "grad_norm": 0.00590835977345705, "learning_rate": 7.873071583177393e-05, "loss": 0.4637115001678467, "step": 50330 }, { "epoch": 0.216120141160712, "grad_norm": 0.16791151463985443, "learning_rate": 7.87264041116563e-05, "loss": 0.1303458571434021, "step": 50340 }, { "epoch": 0.216163073250732, "grad_norm": 0.9499484300613403, "learning_rate": 7.872209239153868e-05, "loss": 0.19090571403503417, "step": 50350 }, { "epoch": 0.216206005340752, "grad_norm": 0.0053385356441140175, "learning_rate": 7.871778067142106e-05, "loss": 0.13217315673828126, "step": 50360 }, { "epoch": 0.216248937430772, "grad_norm": 1.6161103248596191, "learning_rate": 7.871346895130344e-05, "loss": 0.27801764011383057, "step": 50370 }, { "epoch": 0.21629186952079202, "grad_norm": 1.6639463901519775, "learning_rate": 7.870915723118582e-05, "loss": 0.09341565370559693, "step": 50380 }, { "epoch": 0.216334801610812, "grad_norm": 1.7329559326171875, "learning_rate": 7.87048455110682e-05, "loss": 0.24032018184661866, "step": 50390 }, { "epoch": 0.21637773370083202, "grad_norm": 1.2680294513702393, "learning_rate": 7.870053379095057e-05, "loss": 0.3167436599731445, "step": 50400 }, { "epoch": 0.21642066579085203, "grad_norm": 0.31326672434806824, "learning_rate": 7.869622207083293e-05, "loss": 0.10480024814605712, "step": 50410 }, { "epoch": 0.21646359788087205, "grad_norm": 0.006115011405199766, "learning_rate": 7.869191035071531e-05, "loss": 0.12340999841690063, "step": 50420 }, { "epoch": 0.21650652997089204, "grad_norm": 1.273048996925354, "learning_rate": 7.868759863059769e-05, "loss": 0.19264719486236573, "step": 50430 }, { "epoch": 0.21654946206091205, "grad_norm": 0.18304884433746338, "learning_rate": 7.868328691048007e-05, "loss": 0.20949811935424806, "step": 50440 }, { "epoch": 0.21659239415093207, "grad_norm": 1.0161921977996826, "learning_rate": 7.867897519036244e-05, "loss": 0.19382405281066895, "step": 50450 }, { "epoch": 0.21663532624095205, "grad_norm": 0.22904996573925018, "learning_rate": 7.867466347024482e-05, "loss": 0.25605947971343995, "step": 50460 }, { "epoch": 0.21667825833097207, "grad_norm": 0.9911039471626282, "learning_rate": 7.86703517501272e-05, "loss": 0.2965787410736084, "step": 50470 }, { "epoch": 0.21672119042099208, "grad_norm": 4.8198137283325195, "learning_rate": 7.866604003000958e-05, "loss": 0.4654858589172363, "step": 50480 }, { "epoch": 0.21676412251101207, "grad_norm": 0.00046917685540392995, "learning_rate": 7.866172830989195e-05, "loss": 0.06836865544319153, "step": 50490 }, { "epoch": 0.21680705460103208, "grad_norm": 2.65632700920105, "learning_rate": 7.865741658977433e-05, "loss": 0.24790689945220948, "step": 50500 }, { "epoch": 0.2168499866910521, "grad_norm": 0.031371332705020905, "learning_rate": 7.865310486965671e-05, "loss": 0.14242541790008545, "step": 50510 }, { "epoch": 0.2168929187810721, "grad_norm": 0.8935719728469849, "learning_rate": 7.864879314953909e-05, "loss": 0.33764503002166746, "step": 50520 }, { "epoch": 0.2169358508710921, "grad_norm": 0.015160330571234226, "learning_rate": 7.864448142942146e-05, "loss": 0.20800716876983644, "step": 50530 }, { "epoch": 0.21697878296111212, "grad_norm": 0.004046137910336256, "learning_rate": 7.864016970930384e-05, "loss": 0.06134725213050842, "step": 50540 }, { "epoch": 0.21702171505113213, "grad_norm": 0.5130903124809265, "learning_rate": 7.863585798918622e-05, "loss": 0.4054374694824219, "step": 50550 }, { "epoch": 0.21706464714115212, "grad_norm": 1.5228362083435059, "learning_rate": 7.86315462690686e-05, "loss": 0.2966635227203369, "step": 50560 }, { "epoch": 0.21710757923117213, "grad_norm": 3.034628391265869, "learning_rate": 7.862723454895097e-05, "loss": 0.4351348400115967, "step": 50570 }, { "epoch": 0.21715051132119215, "grad_norm": 0.5908950567245483, "learning_rate": 7.862292282883334e-05, "loss": 0.1472018241882324, "step": 50580 }, { "epoch": 0.21719344341121213, "grad_norm": 4.488155364990234, "learning_rate": 7.861861110871571e-05, "loss": 0.3412757635116577, "step": 50590 }, { "epoch": 0.21723637550123215, "grad_norm": 0.028289830312132835, "learning_rate": 7.861429938859809e-05, "loss": 0.35039472579956055, "step": 50600 }, { "epoch": 0.21727930759125216, "grad_norm": 0.017389526590704918, "learning_rate": 7.860998766848047e-05, "loss": 0.23729236125946046, "step": 50610 }, { "epoch": 0.21732223968127215, "grad_norm": 0.01772734522819519, "learning_rate": 7.860567594836285e-05, "loss": 0.11716750860214234, "step": 50620 }, { "epoch": 0.21736517177129216, "grad_norm": 0.2764197587966919, "learning_rate": 7.860136422824522e-05, "loss": 0.3149959325790405, "step": 50630 }, { "epoch": 0.21740810386131218, "grad_norm": 0.022921325638890266, "learning_rate": 7.85970525081276e-05, "loss": 0.22837281227111816, "step": 50640 }, { "epoch": 0.2174510359513322, "grad_norm": 1.474914312362671, "learning_rate": 7.859274078800998e-05, "loss": 0.2354658842086792, "step": 50650 }, { "epoch": 0.21749396804135218, "grad_norm": 0.021082280203700066, "learning_rate": 7.858842906789234e-05, "loss": 0.0798882246017456, "step": 50660 }, { "epoch": 0.2175369001313722, "grad_norm": 2.043031692504883, "learning_rate": 7.858411734777472e-05, "loss": 0.34849650859832765, "step": 50670 }, { "epoch": 0.2175798322213922, "grad_norm": 4.649127006530762, "learning_rate": 7.85798056276571e-05, "loss": 0.27826337814331054, "step": 50680 }, { "epoch": 0.2176227643114122, "grad_norm": 0.021960316225886345, "learning_rate": 7.857549390753947e-05, "loss": 0.5113425254821777, "step": 50690 }, { "epoch": 0.2176656964014322, "grad_norm": 2.443013906478882, "learning_rate": 7.857118218742185e-05, "loss": 0.14948725700378418, "step": 50700 }, { "epoch": 0.21770862849145223, "grad_norm": 0.029022017493844032, "learning_rate": 7.856687046730423e-05, "loss": 0.44149346351623536, "step": 50710 }, { "epoch": 0.21775156058147221, "grad_norm": 0.3463537096977234, "learning_rate": 7.85625587471866e-05, "loss": 0.3035578727722168, "step": 50720 }, { "epoch": 0.21779449267149223, "grad_norm": 0.6163672804832458, "learning_rate": 7.855824702706898e-05, "loss": 0.1137476921081543, "step": 50730 }, { "epoch": 0.21783742476151224, "grad_norm": 0.005244885571300983, "learning_rate": 7.855393530695136e-05, "loss": 0.13025596141815185, "step": 50740 }, { "epoch": 0.21788035685153226, "grad_norm": 0.7355568408966064, "learning_rate": 7.854962358683374e-05, "loss": 0.2603000164031982, "step": 50750 }, { "epoch": 0.21792328894155225, "grad_norm": 1.664304494857788, "learning_rate": 7.854531186671611e-05, "loss": 0.19281330108642578, "step": 50760 }, { "epoch": 0.21796622103157226, "grad_norm": 0.004859315697103739, "learning_rate": 7.854100014659849e-05, "loss": 0.2792229175567627, "step": 50770 }, { "epoch": 0.21800915312159228, "grad_norm": 0.006048300769180059, "learning_rate": 7.853668842648087e-05, "loss": 0.20728557109832763, "step": 50780 }, { "epoch": 0.21805208521161226, "grad_norm": 0.02996540069580078, "learning_rate": 7.853237670636325e-05, "loss": 0.14032127857208251, "step": 50790 }, { "epoch": 0.21809501730163228, "grad_norm": 1.4114487171173096, "learning_rate": 7.852806498624562e-05, "loss": 0.2734131097793579, "step": 50800 }, { "epoch": 0.2181379493916523, "grad_norm": 0.00662500225007534, "learning_rate": 7.8523753266128e-05, "loss": 0.11571886539459228, "step": 50810 }, { "epoch": 0.21818088148167228, "grad_norm": 0.08047362416982651, "learning_rate": 7.851944154601036e-05, "loss": 0.07731739282608033, "step": 50820 }, { "epoch": 0.2182238135716923, "grad_norm": 9.156729698181152, "learning_rate": 7.851512982589274e-05, "loss": 0.3950646162033081, "step": 50830 }, { "epoch": 0.2182667456617123, "grad_norm": 0.11400093883275986, "learning_rate": 7.851081810577512e-05, "loss": 0.09669422507286071, "step": 50840 }, { "epoch": 0.21830967775173232, "grad_norm": 0.0044946083799004555, "learning_rate": 7.85065063856575e-05, "loss": 0.22031335830688475, "step": 50850 }, { "epoch": 0.2183526098417523, "grad_norm": 1.9205845594406128, "learning_rate": 7.850219466553987e-05, "loss": 0.47330074310302733, "step": 50860 }, { "epoch": 0.21839554193177232, "grad_norm": 0.13260437548160553, "learning_rate": 7.849788294542225e-05, "loss": 0.09311110377311707, "step": 50870 }, { "epoch": 0.21843847402179234, "grad_norm": 1.7268750667572021, "learning_rate": 7.849357122530463e-05, "loss": 0.22776448726654053, "step": 50880 }, { "epoch": 0.21848140611181233, "grad_norm": 0.041452158242464066, "learning_rate": 7.8489259505187e-05, "loss": 0.2687647581100464, "step": 50890 }, { "epoch": 0.21852433820183234, "grad_norm": 0.011913030408322811, "learning_rate": 7.848494778506938e-05, "loss": 0.14741060733795167, "step": 50900 }, { "epoch": 0.21856727029185236, "grad_norm": 1.0277550220489502, "learning_rate": 7.848063606495175e-05, "loss": 0.1561746120452881, "step": 50910 }, { "epoch": 0.21861020238187234, "grad_norm": 2.3949055671691895, "learning_rate": 7.847632434483412e-05, "loss": 0.2793707847595215, "step": 50920 }, { "epoch": 0.21865313447189236, "grad_norm": 0.0005611925153061748, "learning_rate": 7.84720126247165e-05, "loss": 0.1184156894683838, "step": 50930 }, { "epoch": 0.21869606656191237, "grad_norm": 0.06453302502632141, "learning_rate": 7.846770090459888e-05, "loss": 0.2204530954360962, "step": 50940 }, { "epoch": 0.2187389986519324, "grad_norm": 7.096034049987793, "learning_rate": 7.846338918448126e-05, "loss": 0.25634169578552246, "step": 50950 }, { "epoch": 0.21878193074195237, "grad_norm": 5.026423454284668, "learning_rate": 7.845907746436363e-05, "loss": 0.2579002857208252, "step": 50960 }, { "epoch": 0.2188248628319724, "grad_norm": 0.1146092489361763, "learning_rate": 7.845476574424601e-05, "loss": 0.2994891405105591, "step": 50970 }, { "epoch": 0.2188677949219924, "grad_norm": 4.33756685256958, "learning_rate": 7.845045402412839e-05, "loss": 0.2136240243911743, "step": 50980 }, { "epoch": 0.2189107270120124, "grad_norm": 0.13741017878055573, "learning_rate": 7.844614230401077e-05, "loss": 0.3473642826080322, "step": 50990 }, { "epoch": 0.2189536591020324, "grad_norm": 4.680849552154541, "learning_rate": 7.844183058389314e-05, "loss": 0.3754714012145996, "step": 51000 }, { "epoch": 0.2189536591020324, "eval_loss": 0.4405979812145233, "eval_runtime": 27.5677, "eval_samples_per_second": 3.627, "eval_steps_per_second": 3.627, "step": 51000 }, { "epoch": 0.21899659119205242, "grad_norm": 0.8056715130805969, "learning_rate": 7.843751886377552e-05, "loss": 0.283284330368042, "step": 51010 }, { "epoch": 0.2190395232820724, "grad_norm": 1.782196283340454, "learning_rate": 7.84332071436579e-05, "loss": 0.2841160297393799, "step": 51020 }, { "epoch": 0.21908245537209242, "grad_norm": 0.09319104999303818, "learning_rate": 7.842889542354028e-05, "loss": 0.4059459686279297, "step": 51030 }, { "epoch": 0.21912538746211244, "grad_norm": 0.29425910115242004, "learning_rate": 7.842458370342265e-05, "loss": 0.2115602970123291, "step": 51040 }, { "epoch": 0.21916831955213242, "grad_norm": 4.665130615234375, "learning_rate": 7.842027198330503e-05, "loss": 0.0615256130695343, "step": 51050 }, { "epoch": 0.21921125164215244, "grad_norm": 0.13821355998516083, "learning_rate": 7.841596026318741e-05, "loss": 0.1643990159034729, "step": 51060 }, { "epoch": 0.21925418373217245, "grad_norm": 0.00384420994669199, "learning_rate": 7.841164854306977e-05, "loss": 0.08403640389442443, "step": 51070 }, { "epoch": 0.21929711582219247, "grad_norm": 0.0846899077296257, "learning_rate": 7.840733682295215e-05, "loss": 0.34184112548828127, "step": 51080 }, { "epoch": 0.21934004791221245, "grad_norm": 0.30739402770996094, "learning_rate": 7.840302510283453e-05, "loss": 0.17862628698348998, "step": 51090 }, { "epoch": 0.21938298000223247, "grad_norm": 0.09101126343011856, "learning_rate": 7.83987133827169e-05, "loss": 0.3576796531677246, "step": 51100 }, { "epoch": 0.21942591209225248, "grad_norm": 3.593675374984741, "learning_rate": 7.839440166259928e-05, "loss": 0.13683393001556396, "step": 51110 }, { "epoch": 0.21946884418227247, "grad_norm": 0.01158614456653595, "learning_rate": 7.839008994248166e-05, "loss": 0.24310684204101562, "step": 51120 }, { "epoch": 0.21951177627229249, "grad_norm": 0.03459831327199936, "learning_rate": 7.838577822236404e-05, "loss": 0.2010591983795166, "step": 51130 }, { "epoch": 0.2195547083623125, "grad_norm": 0.8882039785385132, "learning_rate": 7.838146650224641e-05, "loss": 0.23802576065063477, "step": 51140 }, { "epoch": 0.2195976404523325, "grad_norm": 0.25156083703041077, "learning_rate": 7.837715478212878e-05, "loss": 0.19319934844970704, "step": 51150 }, { "epoch": 0.2196405725423525, "grad_norm": 0.1898663491010666, "learning_rate": 7.837284306201115e-05, "loss": 0.21830298900604247, "step": 51160 }, { "epoch": 0.21968350463237252, "grad_norm": 0.0030881662387400866, "learning_rate": 7.836853134189353e-05, "loss": 0.272199010848999, "step": 51170 }, { "epoch": 0.21972643672239253, "grad_norm": 0.13047702610492706, "learning_rate": 7.836421962177591e-05, "loss": 0.2839015483856201, "step": 51180 }, { "epoch": 0.21976936881241252, "grad_norm": 0.03696379438042641, "learning_rate": 7.835990790165829e-05, "loss": 0.2720863103866577, "step": 51190 }, { "epoch": 0.21981230090243253, "grad_norm": 0.05394358187913895, "learning_rate": 7.835559618154066e-05, "loss": 0.4942734718322754, "step": 51200 }, { "epoch": 0.21985523299245255, "grad_norm": 0.6661820411682129, "learning_rate": 7.835128446142304e-05, "loss": 0.1750645637512207, "step": 51210 }, { "epoch": 0.21989816508247254, "grad_norm": 0.453449010848999, "learning_rate": 7.834697274130542e-05, "loss": 0.17545816898345948, "step": 51220 }, { "epoch": 0.21994109717249255, "grad_norm": 1.2218490839004517, "learning_rate": 7.83426610211878e-05, "loss": 0.2454913854598999, "step": 51230 }, { "epoch": 0.21998402926251256, "grad_norm": 3.4764392375946045, "learning_rate": 7.833834930107017e-05, "loss": 0.43323559761047364, "step": 51240 }, { "epoch": 0.22002696135253255, "grad_norm": 0.006227872334420681, "learning_rate": 7.833403758095255e-05, "loss": 0.22940664291381835, "step": 51250 }, { "epoch": 0.22006989344255257, "grad_norm": 0.045368924736976624, "learning_rate": 7.832972586083493e-05, "loss": 0.4383875370025635, "step": 51260 }, { "epoch": 0.22011282553257258, "grad_norm": 1.0571563243865967, "learning_rate": 7.83254141407173e-05, "loss": 0.374654483795166, "step": 51270 }, { "epoch": 0.2201557576225926, "grad_norm": 0.04682515189051628, "learning_rate": 7.832110242059968e-05, "loss": 0.38846986293792723, "step": 51280 }, { "epoch": 0.22019868971261258, "grad_norm": 4.054689407348633, "learning_rate": 7.831679070048206e-05, "loss": 0.4694014072418213, "step": 51290 }, { "epoch": 0.2202416218026326, "grad_norm": 0.7244497537612915, "learning_rate": 7.831247898036444e-05, "loss": 0.17690551280975342, "step": 51300 }, { "epoch": 0.2202845538926526, "grad_norm": 1.9599987268447876, "learning_rate": 7.830816726024681e-05, "loss": 0.28881473541259767, "step": 51310 }, { "epoch": 0.2203274859826726, "grad_norm": 0.9945737719535828, "learning_rate": 7.830385554012918e-05, "loss": 0.2523778438568115, "step": 51320 }, { "epoch": 0.22037041807269261, "grad_norm": 0.011761073023080826, "learning_rate": 7.829954382001156e-05, "loss": 0.08957783579826355, "step": 51330 }, { "epoch": 0.22041335016271263, "grad_norm": 0.03757171332836151, "learning_rate": 7.829523209989393e-05, "loss": 0.2829038858413696, "step": 51340 }, { "epoch": 0.22045628225273262, "grad_norm": 4.216167449951172, "learning_rate": 7.829092037977631e-05, "loss": 0.33401927947998045, "step": 51350 }, { "epoch": 0.22049921434275263, "grad_norm": 0.00842796266078949, "learning_rate": 7.828660865965869e-05, "loss": 0.34130475521087644, "step": 51360 }, { "epoch": 0.22054214643277265, "grad_norm": 1.3092318773269653, "learning_rate": 7.828229693954106e-05, "loss": 0.37724757194519043, "step": 51370 }, { "epoch": 0.22058507852279266, "grad_norm": 6.285627365112305, "learning_rate": 7.827798521942344e-05, "loss": 0.22345025539398194, "step": 51380 }, { "epoch": 0.22062801061281265, "grad_norm": 1.7613346576690674, "learning_rate": 7.827367349930582e-05, "loss": 0.3523125410079956, "step": 51390 }, { "epoch": 0.22067094270283266, "grad_norm": 1.3106528520584106, "learning_rate": 7.826936177918818e-05, "loss": 0.2903813362121582, "step": 51400 }, { "epoch": 0.22071387479285268, "grad_norm": 0.02184089459478855, "learning_rate": 7.826505005907056e-05, "loss": 0.142237389087677, "step": 51410 }, { "epoch": 0.22075680688287266, "grad_norm": 0.05910355970263481, "learning_rate": 7.826073833895294e-05, "loss": 0.0735704779624939, "step": 51420 }, { "epoch": 0.22079973897289268, "grad_norm": 1.5817798376083374, "learning_rate": 7.825642661883531e-05, "loss": 0.345813250541687, "step": 51430 }, { "epoch": 0.2208426710629127, "grad_norm": 0.017727894708514214, "learning_rate": 7.825211489871769e-05, "loss": 0.30463998317718505, "step": 51440 }, { "epoch": 0.22088560315293268, "grad_norm": 21.531631469726562, "learning_rate": 7.824780317860008e-05, "loss": 0.4145090103149414, "step": 51450 }, { "epoch": 0.2209285352429527, "grad_norm": 0.38469067215919495, "learning_rate": 7.824349145848246e-05, "loss": 0.3056338310241699, "step": 51460 }, { "epoch": 0.2209714673329727, "grad_norm": 3.433922052383423, "learning_rate": 7.823917973836484e-05, "loss": 0.1726033091545105, "step": 51470 }, { "epoch": 0.2210143994229927, "grad_norm": 0.021967153996229172, "learning_rate": 7.82348680182472e-05, "loss": 0.3242464303970337, "step": 51480 }, { "epoch": 0.2210573315130127, "grad_norm": 0.008535942994058132, "learning_rate": 7.823055629812958e-05, "loss": 0.11328502893447875, "step": 51490 }, { "epoch": 0.22110026360303273, "grad_norm": 0.030140530318021774, "learning_rate": 7.822624457801196e-05, "loss": 0.21845624446868897, "step": 51500 }, { "epoch": 0.22114319569305274, "grad_norm": 0.3550052046775818, "learning_rate": 7.822193285789433e-05, "loss": 0.23196914196014404, "step": 51510 }, { "epoch": 0.22118612778307273, "grad_norm": 0.03214557096362114, "learning_rate": 7.821762113777671e-05, "loss": 0.3088571071624756, "step": 51520 }, { "epoch": 0.22122905987309274, "grad_norm": 0.009252496063709259, "learning_rate": 7.821330941765909e-05, "loss": 0.2549436569213867, "step": 51530 }, { "epoch": 0.22127199196311276, "grad_norm": 0.009182988665997982, "learning_rate": 7.820899769754147e-05, "loss": 0.13270962238311768, "step": 51540 }, { "epoch": 0.22131492405313274, "grad_norm": 0.061013370752334595, "learning_rate": 7.820468597742384e-05, "loss": 0.4141830921173096, "step": 51550 }, { "epoch": 0.22135785614315276, "grad_norm": 0.0408286526799202, "learning_rate": 7.820037425730621e-05, "loss": 0.232647705078125, "step": 51560 }, { "epoch": 0.22140078823317277, "grad_norm": 0.02865544892847538, "learning_rate": 7.819606253718858e-05, "loss": 0.12392929792404175, "step": 51570 }, { "epoch": 0.22144372032319276, "grad_norm": 0.14595501124858856, "learning_rate": 7.819175081707096e-05, "loss": 0.2198183059692383, "step": 51580 }, { "epoch": 0.22148665241321278, "grad_norm": 0.2862647473812103, "learning_rate": 7.818743909695334e-05, "loss": 0.22051844596862794, "step": 51590 }, { "epoch": 0.2215295845032328, "grad_norm": 1.7248533964157104, "learning_rate": 7.818312737683572e-05, "loss": 0.32520184516906736, "step": 51600 }, { "epoch": 0.2215725165932528, "grad_norm": 0.4680907726287842, "learning_rate": 7.81788156567181e-05, "loss": 0.14819936752319335, "step": 51610 }, { "epoch": 0.2216154486832728, "grad_norm": 0.028154416009783745, "learning_rate": 7.817450393660047e-05, "loss": 0.16450451612472533, "step": 51620 }, { "epoch": 0.2216583807732928, "grad_norm": 5.077931880950928, "learning_rate": 7.817019221648285e-05, "loss": 0.36899194717407224, "step": 51630 }, { "epoch": 0.22170131286331282, "grad_norm": 3.5552923679351807, "learning_rate": 7.816588049636523e-05, "loss": 0.4686445713043213, "step": 51640 }, { "epoch": 0.2217442449533328, "grad_norm": 0.06549005210399628, "learning_rate": 7.816156877624759e-05, "loss": 0.16026378870010377, "step": 51650 }, { "epoch": 0.22178717704335282, "grad_norm": 0.31749969720840454, "learning_rate": 7.815725705612997e-05, "loss": 0.13566253185272217, "step": 51660 }, { "epoch": 0.22183010913337284, "grad_norm": 0.7159654498100281, "learning_rate": 7.815294533601236e-05, "loss": 0.2575033903121948, "step": 51670 }, { "epoch": 0.22187304122339283, "grad_norm": 0.003270721761509776, "learning_rate": 7.814863361589473e-05, "loss": 0.21550092697143555, "step": 51680 }, { "epoch": 0.22191597331341284, "grad_norm": 1.244299054145813, "learning_rate": 7.814432189577711e-05, "loss": 0.45678186416625977, "step": 51690 }, { "epoch": 0.22195890540343285, "grad_norm": 0.0017754979198798537, "learning_rate": 7.814001017565949e-05, "loss": 0.21440205574035645, "step": 51700 }, { "epoch": 0.22200183749345287, "grad_norm": 0.15123091638088226, "learning_rate": 7.813569845554187e-05, "loss": 0.32442948818206785, "step": 51710 }, { "epoch": 0.22204476958347286, "grad_norm": 0.16990530490875244, "learning_rate": 7.813138673542424e-05, "loss": 0.3471397638320923, "step": 51720 }, { "epoch": 0.22208770167349287, "grad_norm": 0.029147816821932793, "learning_rate": 7.812707501530661e-05, "loss": 0.1523631453514099, "step": 51730 }, { "epoch": 0.22213063376351289, "grad_norm": 4.789177417755127, "learning_rate": 7.812276329518899e-05, "loss": 0.32821714878082275, "step": 51740 }, { "epoch": 0.22217356585353287, "grad_norm": 0.01579742506146431, "learning_rate": 7.811845157507136e-05, "loss": 0.3338596820831299, "step": 51750 }, { "epoch": 0.2222164979435529, "grad_norm": 0.010001111775636673, "learning_rate": 7.811413985495374e-05, "loss": 0.279772424697876, "step": 51760 }, { "epoch": 0.2222594300335729, "grad_norm": 0.09585186839103699, "learning_rate": 7.810982813483612e-05, "loss": 0.322173547744751, "step": 51770 }, { "epoch": 0.2223023621235929, "grad_norm": 0.8465486764907837, "learning_rate": 7.81055164147185e-05, "loss": 0.26959757804870604, "step": 51780 }, { "epoch": 0.2223452942136129, "grad_norm": 1.0950313806533813, "learning_rate": 7.810120469460087e-05, "loss": 0.30342302322387693, "step": 51790 }, { "epoch": 0.22238822630363292, "grad_norm": 2.7534327507019043, "learning_rate": 7.809689297448325e-05, "loss": 0.2312720537185669, "step": 51800 }, { "epoch": 0.22243115839365293, "grad_norm": 4.048535346984863, "learning_rate": 7.809258125436561e-05, "loss": 0.3151477575302124, "step": 51810 }, { "epoch": 0.22247409048367292, "grad_norm": 0.23564355075359344, "learning_rate": 7.808826953424799e-05, "loss": 0.2035139799118042, "step": 51820 }, { "epoch": 0.22251702257369294, "grad_norm": 0.7216160893440247, "learning_rate": 7.808395781413037e-05, "loss": 0.30786886215209963, "step": 51830 }, { "epoch": 0.22255995466371295, "grad_norm": 6.129444122314453, "learning_rate": 7.807964609401275e-05, "loss": 0.14896565675735474, "step": 51840 }, { "epoch": 0.22260288675373294, "grad_norm": 0.6112780570983887, "learning_rate": 7.807533437389512e-05, "loss": 0.2353865385055542, "step": 51850 }, { "epoch": 0.22264581884375295, "grad_norm": 0.012277986854314804, "learning_rate": 7.80710226537775e-05, "loss": 0.18124105930328369, "step": 51860 }, { "epoch": 0.22268875093377297, "grad_norm": 0.8171926140785217, "learning_rate": 7.806671093365988e-05, "loss": 0.23719098567962646, "step": 51870 }, { "epoch": 0.22273168302379295, "grad_norm": 0.03393758833408356, "learning_rate": 7.806239921354225e-05, "loss": 0.37589733600616454, "step": 51880 }, { "epoch": 0.22277461511381297, "grad_norm": 0.15871426463127136, "learning_rate": 7.805808749342463e-05, "loss": 0.2771576404571533, "step": 51890 }, { "epoch": 0.22281754720383298, "grad_norm": 0.05300932005047798, "learning_rate": 7.805377577330701e-05, "loss": 0.0938086450099945, "step": 51900 }, { "epoch": 0.22286047929385297, "grad_norm": 1.3278213739395142, "learning_rate": 7.804946405318939e-05, "loss": 0.364089298248291, "step": 51910 }, { "epoch": 0.22290341138387298, "grad_norm": 2.5431289672851562, "learning_rate": 7.804515233307176e-05, "loss": 0.20374348163604736, "step": 51920 }, { "epoch": 0.222946343473893, "grad_norm": 0.009660803712904453, "learning_rate": 7.804084061295414e-05, "loss": 0.016839735209941864, "step": 51930 }, { "epoch": 0.22298927556391301, "grad_norm": 30.04834747314453, "learning_rate": 7.803652889283652e-05, "loss": 0.23668146133422852, "step": 51940 }, { "epoch": 0.223032207653933, "grad_norm": 0.14079128205776215, "learning_rate": 7.80322171727189e-05, "loss": 0.16221317052841186, "step": 51950 }, { "epoch": 0.22307513974395302, "grad_norm": 0.22580532729625702, "learning_rate": 7.802790545260127e-05, "loss": 0.13811932802200316, "step": 51960 }, { "epoch": 0.22311807183397303, "grad_norm": 0.21858012676239014, "learning_rate": 7.802359373248364e-05, "loss": 0.21348247528076172, "step": 51970 }, { "epoch": 0.22316100392399302, "grad_norm": 1.3461596965789795, "learning_rate": 7.801928201236601e-05, "loss": 0.3414191246032715, "step": 51980 }, { "epoch": 0.22320393601401303, "grad_norm": 0.048035021871328354, "learning_rate": 7.801497029224839e-05, "loss": 0.17132971286773682, "step": 51990 }, { "epoch": 0.22324686810403305, "grad_norm": 1.9410839080810547, "learning_rate": 7.801065857213077e-05, "loss": 0.35149712562561036, "step": 52000 }, { "epoch": 0.22324686810403305, "eval_loss": 0.4512121081352234, "eval_runtime": 27.411, "eval_samples_per_second": 3.648, "eval_steps_per_second": 3.648, "step": 52000 }, { "epoch": 0.22328980019405303, "grad_norm": 0.03847186267375946, "learning_rate": 7.800634685201315e-05, "loss": 0.1860820770263672, "step": 52010 }, { "epoch": 0.22333273228407305, "grad_norm": 3.8256406784057617, "learning_rate": 7.800203513189552e-05, "loss": 0.4085991859436035, "step": 52020 }, { "epoch": 0.22337566437409306, "grad_norm": 0.0686342716217041, "learning_rate": 7.79977234117779e-05, "loss": 0.1791319727897644, "step": 52030 }, { "epoch": 0.22341859646411308, "grad_norm": 0.07664453983306885, "learning_rate": 7.799341169166028e-05, "loss": 0.1927349805831909, "step": 52040 }, { "epoch": 0.22346152855413307, "grad_norm": 0.023954255506396294, "learning_rate": 7.798909997154266e-05, "loss": 0.07940815091133117, "step": 52050 }, { "epoch": 0.22350446064415308, "grad_norm": 0.02951730787754059, "learning_rate": 7.798478825142502e-05, "loss": 0.2144526720046997, "step": 52060 }, { "epoch": 0.2235473927341731, "grad_norm": 1.5430526733398438, "learning_rate": 7.79804765313074e-05, "loss": 0.24770851135253907, "step": 52070 }, { "epoch": 0.22359032482419308, "grad_norm": 0.08437793701887131, "learning_rate": 7.797616481118977e-05, "loss": 0.24535858631134033, "step": 52080 }, { "epoch": 0.2236332569142131, "grad_norm": 4.446104049682617, "learning_rate": 7.797185309107215e-05, "loss": 0.3557150840759277, "step": 52090 }, { "epoch": 0.2236761890042331, "grad_norm": 0.9419364929199219, "learning_rate": 7.796754137095453e-05, "loss": 0.3013280391693115, "step": 52100 }, { "epoch": 0.2237191210942531, "grad_norm": 0.004197876434773207, "learning_rate": 7.79632296508369e-05, "loss": 0.197326397895813, "step": 52110 }, { "epoch": 0.2237620531842731, "grad_norm": 15.142932891845703, "learning_rate": 7.795891793071928e-05, "loss": 0.18400282859802247, "step": 52120 }, { "epoch": 0.22380498527429313, "grad_norm": 0.013756810687482357, "learning_rate": 7.795460621060166e-05, "loss": 0.1299996018409729, "step": 52130 }, { "epoch": 0.22384791736431314, "grad_norm": 10.383787155151367, "learning_rate": 7.795029449048404e-05, "loss": 0.28579258918762207, "step": 52140 }, { "epoch": 0.22389084945433313, "grad_norm": 1.5638196468353271, "learning_rate": 7.794598277036642e-05, "loss": 0.30528721809387205, "step": 52150 }, { "epoch": 0.22393378154435314, "grad_norm": 0.033577535301446915, "learning_rate": 7.794167105024879e-05, "loss": 0.10886262655258179, "step": 52160 }, { "epoch": 0.22397671363437316, "grad_norm": 4.6401872634887695, "learning_rate": 7.793735933013117e-05, "loss": 0.33708269596099855, "step": 52170 }, { "epoch": 0.22401964572439315, "grad_norm": 0.004224882926791906, "learning_rate": 7.793304761001355e-05, "loss": 0.029603365063667297, "step": 52180 }, { "epoch": 0.22406257781441316, "grad_norm": 0.027500273659825325, "learning_rate": 7.792873588989593e-05, "loss": 0.12923910617828369, "step": 52190 }, { "epoch": 0.22410550990443318, "grad_norm": 24.483905792236328, "learning_rate": 7.79244241697783e-05, "loss": 0.08223460912704468, "step": 52200 }, { "epoch": 0.22414844199445316, "grad_norm": 0.11597905308008194, "learning_rate": 7.792011244966068e-05, "loss": 0.1549830913543701, "step": 52210 }, { "epoch": 0.22419137408447318, "grad_norm": 1.219777226448059, "learning_rate": 7.791580072954304e-05, "loss": 0.2020240068435669, "step": 52220 }, { "epoch": 0.2242343061744932, "grad_norm": 0.08328064531087875, "learning_rate": 7.791148900942542e-05, "loss": 0.017869633436203004, "step": 52230 }, { "epoch": 0.2242772382645132, "grad_norm": 0.06575699150562286, "learning_rate": 7.79071772893078e-05, "loss": 0.3781913757324219, "step": 52240 }, { "epoch": 0.2243201703545332, "grad_norm": 0.034926220774650574, "learning_rate": 7.790286556919018e-05, "loss": 0.2552025556564331, "step": 52250 }, { "epoch": 0.2243631024445532, "grad_norm": 2.1973154544830322, "learning_rate": 7.789855384907255e-05, "loss": 0.3631817102432251, "step": 52260 }, { "epoch": 0.22440603453457322, "grad_norm": 0.027149952948093414, "learning_rate": 7.789424212895493e-05, "loss": 0.1516263008117676, "step": 52270 }, { "epoch": 0.2244489666245932, "grad_norm": 3.6887452602386475, "learning_rate": 7.788993040883731e-05, "loss": 0.18411051034927367, "step": 52280 }, { "epoch": 0.22449189871461322, "grad_norm": 0.003940201830118895, "learning_rate": 7.788561868871969e-05, "loss": 0.1607654571533203, "step": 52290 }, { "epoch": 0.22453483080463324, "grad_norm": 2.560877561569214, "learning_rate": 7.788130696860205e-05, "loss": 0.17027069330215455, "step": 52300 }, { "epoch": 0.22457776289465323, "grad_norm": 1.8159563541412354, "learning_rate": 7.787699524848443e-05, "loss": 0.2474687099456787, "step": 52310 }, { "epoch": 0.22462069498467324, "grad_norm": 0.7241863012313843, "learning_rate": 7.78726835283668e-05, "loss": 0.34167027473449707, "step": 52320 }, { "epoch": 0.22466362707469326, "grad_norm": 0.028914542868733406, "learning_rate": 7.786837180824918e-05, "loss": 0.36235129833221436, "step": 52330 }, { "epoch": 0.22470655916471324, "grad_norm": 0.052308402955532074, "learning_rate": 7.786406008813156e-05, "loss": 0.3940187215805054, "step": 52340 }, { "epoch": 0.22474949125473326, "grad_norm": 0.37863701581954956, "learning_rate": 7.785974836801394e-05, "loss": 0.07285253405570984, "step": 52350 }, { "epoch": 0.22479242334475327, "grad_norm": 0.019866731017827988, "learning_rate": 7.785543664789631e-05, "loss": 0.3209496021270752, "step": 52360 }, { "epoch": 0.2248353554347733, "grad_norm": 23.818340301513672, "learning_rate": 7.785112492777869e-05, "loss": 0.2861109733581543, "step": 52370 }, { "epoch": 0.22487828752479327, "grad_norm": 0.8396596908569336, "learning_rate": 7.784681320766107e-05, "loss": 0.23325965404510499, "step": 52380 }, { "epoch": 0.2249212196148133, "grad_norm": 0.047412410378456116, "learning_rate": 7.784250148754344e-05, "loss": 0.5725963592529297, "step": 52390 }, { "epoch": 0.2249641517048333, "grad_norm": 0.1795731782913208, "learning_rate": 7.783818976742582e-05, "loss": 0.20154619216918945, "step": 52400 }, { "epoch": 0.2250070837948533, "grad_norm": 1.0902587175369263, "learning_rate": 7.78338780473082e-05, "loss": 0.1822667360305786, "step": 52410 }, { "epoch": 0.2250500158848733, "grad_norm": 0.8377712368965149, "learning_rate": 7.782956632719058e-05, "loss": 0.1981184482574463, "step": 52420 }, { "epoch": 0.22509294797489332, "grad_norm": 1.0425002574920654, "learning_rate": 7.782525460707295e-05, "loss": 0.2192686080932617, "step": 52430 }, { "epoch": 0.2251358800649133, "grad_norm": 0.003001261968165636, "learning_rate": 7.782094288695533e-05, "loss": 0.3034151554107666, "step": 52440 }, { "epoch": 0.22517881215493332, "grad_norm": 0.33139747381210327, "learning_rate": 7.781663116683771e-05, "loss": 0.10466808080673218, "step": 52450 }, { "epoch": 0.22522174424495334, "grad_norm": 7.508664608001709, "learning_rate": 7.781231944672009e-05, "loss": 0.25111525058746337, "step": 52460 }, { "epoch": 0.22526467633497335, "grad_norm": 0.8088374137878418, "learning_rate": 7.780800772660245e-05, "loss": 0.18727099895477295, "step": 52470 }, { "epoch": 0.22530760842499334, "grad_norm": 0.007046096492558718, "learning_rate": 7.780369600648483e-05, "loss": 0.1947243928909302, "step": 52480 }, { "epoch": 0.22535054051501335, "grad_norm": 0.04045693576335907, "learning_rate": 7.77993842863672e-05, "loss": 0.10588670969009399, "step": 52490 }, { "epoch": 0.22539347260503337, "grad_norm": 0.5456598401069641, "learning_rate": 7.779507256624958e-05, "loss": 0.3418902397155762, "step": 52500 }, { "epoch": 0.22543640469505336, "grad_norm": 0.7057582139968872, "learning_rate": 7.779076084613196e-05, "loss": 0.3276927947998047, "step": 52510 }, { "epoch": 0.22547933678507337, "grad_norm": 0.7780113816261292, "learning_rate": 7.778644912601434e-05, "loss": 0.1883944869041443, "step": 52520 }, { "epoch": 0.22552226887509338, "grad_norm": 1.5828453302383423, "learning_rate": 7.778213740589671e-05, "loss": 0.12598092555999757, "step": 52530 }, { "epoch": 0.22556520096511337, "grad_norm": 0.01957198604941368, "learning_rate": 7.777782568577909e-05, "loss": 0.1770286202430725, "step": 52540 }, { "epoch": 0.2256081330551334, "grad_norm": 1.2329034805297852, "learning_rate": 7.777351396566146e-05, "loss": 0.12175513505935669, "step": 52550 }, { "epoch": 0.2256510651451534, "grad_norm": 0.43187209963798523, "learning_rate": 7.776920224554383e-05, "loss": 0.3777428865432739, "step": 52560 }, { "epoch": 0.22569399723517342, "grad_norm": 2.0818798542022705, "learning_rate": 7.776489052542621e-05, "loss": 0.26906890869140626, "step": 52570 }, { "epoch": 0.2257369293251934, "grad_norm": 0.02331465110182762, "learning_rate": 7.776057880530859e-05, "loss": 0.1667281985282898, "step": 52580 }, { "epoch": 0.22577986141521342, "grad_norm": 0.013449318706989288, "learning_rate": 7.775626708519096e-05, "loss": 0.2873204708099365, "step": 52590 }, { "epoch": 0.22582279350523343, "grad_norm": 1.743377447128296, "learning_rate": 7.775195536507334e-05, "loss": 0.197990345954895, "step": 52600 }, { "epoch": 0.22586572559525342, "grad_norm": 0.013360187411308289, "learning_rate": 7.774764364495572e-05, "loss": 0.1657071590423584, "step": 52610 }, { "epoch": 0.22590865768527343, "grad_norm": 0.7187745571136475, "learning_rate": 7.77433319248381e-05, "loss": 0.2067204475402832, "step": 52620 }, { "epoch": 0.22595158977529345, "grad_norm": 1.9448988437652588, "learning_rate": 7.773902020472047e-05, "loss": 0.40744667053222655, "step": 52630 }, { "epoch": 0.22599452186531344, "grad_norm": 11.273550987243652, "learning_rate": 7.773470848460285e-05, "loss": 0.05754717588424683, "step": 52640 }, { "epoch": 0.22603745395533345, "grad_norm": 1.3800010681152344, "learning_rate": 7.773039676448523e-05, "loss": 0.33999783992767335, "step": 52650 }, { "epoch": 0.22608038604535347, "grad_norm": 0.02123466692864895, "learning_rate": 7.77260850443676e-05, "loss": 0.2705632209777832, "step": 52660 }, { "epoch": 0.22612331813537348, "grad_norm": 38.30419921875, "learning_rate": 7.772177332424998e-05, "loss": 0.14641456604003905, "step": 52670 }, { "epoch": 0.22616625022539347, "grad_norm": 0.04562552645802498, "learning_rate": 7.771746160413236e-05, "loss": 0.31936705112457275, "step": 52680 }, { "epoch": 0.22620918231541348, "grad_norm": 1.2478864192962646, "learning_rate": 7.771314988401474e-05, "loss": 0.2298142910003662, "step": 52690 }, { "epoch": 0.2262521144054335, "grad_norm": 2.6947596073150635, "learning_rate": 7.770883816389712e-05, "loss": 0.337100887298584, "step": 52700 }, { "epoch": 0.22629504649545348, "grad_norm": 0.1122974082827568, "learning_rate": 7.770452644377949e-05, "loss": 0.08905890583992004, "step": 52710 }, { "epoch": 0.2263379785854735, "grad_norm": 0.01768355444073677, "learning_rate": 7.770021472366186e-05, "loss": 0.08977657556533813, "step": 52720 }, { "epoch": 0.2263809106754935, "grad_norm": 7.6935954093933105, "learning_rate": 7.769590300354423e-05, "loss": 0.35249192714691163, "step": 52730 }, { "epoch": 0.2264238427655135, "grad_norm": 0.0323822908103466, "learning_rate": 7.769159128342661e-05, "loss": 0.21579148769378662, "step": 52740 }, { "epoch": 0.22646677485553351, "grad_norm": 0.024706387892365456, "learning_rate": 7.768727956330899e-05, "loss": 0.42194533348083496, "step": 52750 }, { "epoch": 0.22650970694555353, "grad_norm": 1.9699130058288574, "learning_rate": 7.768296784319137e-05, "loss": 0.39843249320983887, "step": 52760 }, { "epoch": 0.22655263903557352, "grad_norm": 0.020310785621404648, "learning_rate": 7.767865612307374e-05, "loss": 0.18961342573165893, "step": 52770 }, { "epoch": 0.22659557112559353, "grad_norm": 3.1800918579101562, "learning_rate": 7.767434440295612e-05, "loss": 0.40066027641296387, "step": 52780 }, { "epoch": 0.22663850321561355, "grad_norm": 0.09538385272026062, "learning_rate": 7.76700326828385e-05, "loss": 0.13351293802261352, "step": 52790 }, { "epoch": 0.22668143530563356, "grad_norm": 4.516862869262695, "learning_rate": 7.766572096272086e-05, "loss": 0.29952595233917234, "step": 52800 }, { "epoch": 0.22672436739565355, "grad_norm": 0.4217206537723541, "learning_rate": 7.766140924260324e-05, "loss": 0.28686773777008057, "step": 52810 }, { "epoch": 0.22676729948567356, "grad_norm": 0.08405343443155289, "learning_rate": 7.765709752248562e-05, "loss": 0.09559805393218994, "step": 52820 }, { "epoch": 0.22681023157569358, "grad_norm": 0.01651051454246044, "learning_rate": 7.7652785802368e-05, "loss": 0.25448591709136964, "step": 52830 }, { "epoch": 0.22685316366571356, "grad_norm": 0.9871359467506409, "learning_rate": 7.764847408225037e-05, "loss": 0.2132502317428589, "step": 52840 }, { "epoch": 0.22689609575573358, "grad_norm": 1.333776593208313, "learning_rate": 7.764416236213275e-05, "loss": 0.08467223048210144, "step": 52850 }, { "epoch": 0.2269390278457536, "grad_norm": 0.1418377310037613, "learning_rate": 7.763985064201514e-05, "loss": 0.2629718780517578, "step": 52860 }, { "epoch": 0.22698195993577358, "grad_norm": 0.13721498847007751, "learning_rate": 7.763553892189752e-05, "loss": 0.24046893119812013, "step": 52870 }, { "epoch": 0.2270248920257936, "grad_norm": 0.06743840873241425, "learning_rate": 7.763122720177988e-05, "loss": 0.37756991386413574, "step": 52880 }, { "epoch": 0.2270678241158136, "grad_norm": 3.4788317680358887, "learning_rate": 7.762691548166226e-05, "loss": 0.24660749435424806, "step": 52890 }, { "epoch": 0.22711075620583362, "grad_norm": 3.4126675128936768, "learning_rate": 7.762260376154464e-05, "loss": 0.26963019371032715, "step": 52900 }, { "epoch": 0.2271536882958536, "grad_norm": 2.278189182281494, "learning_rate": 7.761829204142701e-05, "loss": 0.41298890113830566, "step": 52910 }, { "epoch": 0.22719662038587363, "grad_norm": 0.010370615869760513, "learning_rate": 7.761398032130939e-05, "loss": 0.15059603452682496, "step": 52920 }, { "epoch": 0.22723955247589364, "grad_norm": 3.394544839859009, "learning_rate": 7.760966860119177e-05, "loss": 0.273115348815918, "step": 52930 }, { "epoch": 0.22728248456591363, "grad_norm": 1.2083812952041626, "learning_rate": 7.760535688107414e-05, "loss": 0.16042221784591676, "step": 52940 }, { "epoch": 0.22732541665593364, "grad_norm": 0.2176787108182907, "learning_rate": 7.760104516095652e-05, "loss": 0.08922134637832642, "step": 52950 }, { "epoch": 0.22736834874595366, "grad_norm": 1.2652984857559204, "learning_rate": 7.759673344083889e-05, "loss": 0.34654817581176756, "step": 52960 }, { "epoch": 0.22741128083597364, "grad_norm": 0.0635618269443512, "learning_rate": 7.759242172072126e-05, "loss": 0.3084728240966797, "step": 52970 }, { "epoch": 0.22745421292599366, "grad_norm": 1.6466708183288574, "learning_rate": 7.758811000060364e-05, "loss": 0.03555763363838196, "step": 52980 }, { "epoch": 0.22749714501601367, "grad_norm": 7.08722448348999, "learning_rate": 7.758379828048602e-05, "loss": 0.1829333186149597, "step": 52990 }, { "epoch": 0.2275400771060337, "grad_norm": 9.112763404846191, "learning_rate": 7.75794865603684e-05, "loss": 0.5256698131561279, "step": 53000 }, { "epoch": 0.2275400771060337, "eval_loss": 0.4434048533439636, "eval_runtime": 27.4709, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 53000 }, { "epoch": 0.22758300919605368, "grad_norm": 1.2349168062210083, "learning_rate": 7.757517484025077e-05, "loss": 0.35252578258514405, "step": 53010 }, { "epoch": 0.2276259412860737, "grad_norm": 0.010905789211392403, "learning_rate": 7.757086312013315e-05, "loss": 0.21037817001342773, "step": 53020 }, { "epoch": 0.2276688733760937, "grad_norm": 1.4286820888519287, "learning_rate": 7.756655140001553e-05, "loss": 0.546109676361084, "step": 53030 }, { "epoch": 0.2277118054661137, "grad_norm": 0.023641018196940422, "learning_rate": 7.756223967989789e-05, "loss": 0.1473099946975708, "step": 53040 }, { "epoch": 0.2277547375561337, "grad_norm": 0.14704179763793945, "learning_rate": 7.755792795978027e-05, "loss": 0.15295494794845582, "step": 53050 }, { "epoch": 0.22779766964615372, "grad_norm": 0.004135909024626017, "learning_rate": 7.755361623966265e-05, "loss": 0.2492366313934326, "step": 53060 }, { "epoch": 0.2278406017361737, "grad_norm": 0.040785808116197586, "learning_rate": 7.754930451954502e-05, "loss": 0.32564263343811034, "step": 53070 }, { "epoch": 0.22788353382619372, "grad_norm": 2.620953321456909, "learning_rate": 7.754499279942741e-05, "loss": 0.3289715051651001, "step": 53080 }, { "epoch": 0.22792646591621374, "grad_norm": 0.7488147616386414, "learning_rate": 7.754068107930979e-05, "loss": 0.4312422275543213, "step": 53090 }, { "epoch": 0.22796939800623375, "grad_norm": 4.4017510414123535, "learning_rate": 7.753636935919217e-05, "loss": 0.33054094314575194, "step": 53100 }, { "epoch": 0.22801233009625374, "grad_norm": 0.023379260674118996, "learning_rate": 7.753205763907455e-05, "loss": 0.21203324794769288, "step": 53110 }, { "epoch": 0.22805526218627376, "grad_norm": 3.7151591777801514, "learning_rate": 7.752774591895692e-05, "loss": 0.1834011673927307, "step": 53120 }, { "epoch": 0.22809819427629377, "grad_norm": 2.511679172515869, "learning_rate": 7.752343419883929e-05, "loss": 0.13200852870941163, "step": 53130 }, { "epoch": 0.22814112636631376, "grad_norm": 3.0376436710357666, "learning_rate": 7.751912247872166e-05, "loss": 0.3159782886505127, "step": 53140 }, { "epoch": 0.22818405845633377, "grad_norm": 1.4104591608047485, "learning_rate": 7.751481075860404e-05, "loss": 0.33030409812927247, "step": 53150 }, { "epoch": 0.2282269905463538, "grad_norm": 0.09129254519939423, "learning_rate": 7.751049903848642e-05, "loss": 0.0683474063873291, "step": 53160 }, { "epoch": 0.22826992263637377, "grad_norm": 0.025976408272981644, "learning_rate": 7.75061873183688e-05, "loss": 0.22922194004058838, "step": 53170 }, { "epoch": 0.2283128547263938, "grad_norm": 0.007180104032158852, "learning_rate": 7.750187559825117e-05, "loss": 0.2635721445083618, "step": 53180 }, { "epoch": 0.2283557868164138, "grad_norm": 1.0476505756378174, "learning_rate": 7.749756387813355e-05, "loss": 0.24734883308410643, "step": 53190 }, { "epoch": 0.2283987189064338, "grad_norm": 0.4360477328300476, "learning_rate": 7.749325215801593e-05, "loss": 0.37479491233825685, "step": 53200 }, { "epoch": 0.2284416509964538, "grad_norm": 0.3107700049877167, "learning_rate": 7.748894043789829e-05, "loss": 0.11275169849395753, "step": 53210 }, { "epoch": 0.22848458308647382, "grad_norm": 0.023104490712285042, "learning_rate": 7.748462871778067e-05, "loss": 0.3658841848373413, "step": 53220 }, { "epoch": 0.22852751517649383, "grad_norm": 1.2826489210128784, "learning_rate": 7.748031699766305e-05, "loss": 0.31947362422943115, "step": 53230 }, { "epoch": 0.22857044726651382, "grad_norm": 0.03451520577073097, "learning_rate": 7.747600527754542e-05, "loss": 0.10847625732421876, "step": 53240 }, { "epoch": 0.22861337935653384, "grad_norm": 5.027333736419678, "learning_rate": 7.74716935574278e-05, "loss": 0.328680682182312, "step": 53250 }, { "epoch": 0.22865631144655385, "grad_norm": 0.01842356100678444, "learning_rate": 7.746738183731018e-05, "loss": 0.1819998264312744, "step": 53260 }, { "epoch": 0.22869924353657384, "grad_norm": 0.000408778665587306, "learning_rate": 7.746307011719256e-05, "loss": 0.2438007116317749, "step": 53270 }, { "epoch": 0.22874217562659385, "grad_norm": 0.10073164105415344, "learning_rate": 7.745875839707493e-05, "loss": 0.2796761989593506, "step": 53280 }, { "epoch": 0.22878510771661387, "grad_norm": 0.0005582142039202154, "learning_rate": 7.74544466769573e-05, "loss": 0.12742369174957274, "step": 53290 }, { "epoch": 0.22882803980663385, "grad_norm": 3.7237370014190674, "learning_rate": 7.745013495683969e-05, "loss": 0.3490560293197632, "step": 53300 }, { "epoch": 0.22887097189665387, "grad_norm": 0.01251291949301958, "learning_rate": 7.744582323672207e-05, "loss": 0.13892886638641358, "step": 53310 }, { "epoch": 0.22891390398667388, "grad_norm": 0.03795626387000084, "learning_rate": 7.744151151660444e-05, "loss": 0.22728416919708253, "step": 53320 }, { "epoch": 0.2289568360766939, "grad_norm": 2.3000359535217285, "learning_rate": 7.743719979648682e-05, "loss": 0.3141765117645264, "step": 53330 }, { "epoch": 0.22899976816671389, "grad_norm": 0.028027264401316643, "learning_rate": 7.74328880763692e-05, "loss": 0.12524042129516602, "step": 53340 }, { "epoch": 0.2290427002567339, "grad_norm": 0.43016451597213745, "learning_rate": 7.742857635625158e-05, "loss": 0.3535402536392212, "step": 53350 }, { "epoch": 0.22908563234675391, "grad_norm": 0.16803216934204102, "learning_rate": 7.742426463613395e-05, "loss": 0.14710566997528077, "step": 53360 }, { "epoch": 0.2291285644367739, "grad_norm": 0.01865394227206707, "learning_rate": 7.741995291601632e-05, "loss": 0.19491530656814576, "step": 53370 }, { "epoch": 0.22917149652679392, "grad_norm": 2.5932819843292236, "learning_rate": 7.74156411958987e-05, "loss": 0.2611079692840576, "step": 53380 }, { "epoch": 0.22921442861681393, "grad_norm": 6.514557838439941, "learning_rate": 7.741132947578107e-05, "loss": 0.26472296714782717, "step": 53390 }, { "epoch": 0.22925736070683392, "grad_norm": 0.017307903617620468, "learning_rate": 7.740701775566345e-05, "loss": 0.27357475757598876, "step": 53400 }, { "epoch": 0.22930029279685393, "grad_norm": 0.8172531723976135, "learning_rate": 7.740270603554583e-05, "loss": 0.2582735300064087, "step": 53410 }, { "epoch": 0.22934322488687395, "grad_norm": 0.009025703184306622, "learning_rate": 7.73983943154282e-05, "loss": 0.3182665824890137, "step": 53420 }, { "epoch": 0.22938615697689396, "grad_norm": 0.002536490559577942, "learning_rate": 7.739408259531058e-05, "loss": 0.42736306190490725, "step": 53430 }, { "epoch": 0.22942908906691395, "grad_norm": 0.06554874032735825, "learning_rate": 7.738977087519296e-05, "loss": 0.4487005710601807, "step": 53440 }, { "epoch": 0.22947202115693396, "grad_norm": 1.08189058303833, "learning_rate": 7.738545915507533e-05, "loss": 0.3008427143096924, "step": 53450 }, { "epoch": 0.22951495324695398, "grad_norm": 0.0046480633318424225, "learning_rate": 7.73811474349577e-05, "loss": 0.23597433567047119, "step": 53460 }, { "epoch": 0.22955788533697397, "grad_norm": 0.08075997978448868, "learning_rate": 7.737683571484008e-05, "loss": 0.15743098258972169, "step": 53470 }, { "epoch": 0.22960081742699398, "grad_norm": 0.0488070547580719, "learning_rate": 7.737252399472245e-05, "loss": 0.3878929138183594, "step": 53480 }, { "epoch": 0.229643749517014, "grad_norm": 3.296687126159668, "learning_rate": 7.736821227460483e-05, "loss": 0.3832553863525391, "step": 53490 }, { "epoch": 0.22968668160703398, "grad_norm": 0.013115516863763332, "learning_rate": 7.736390055448721e-05, "loss": 0.3670145750045776, "step": 53500 }, { "epoch": 0.229729613697054, "grad_norm": 0.26563796401023865, "learning_rate": 7.735958883436959e-05, "loss": 0.20897796154022216, "step": 53510 }, { "epoch": 0.229772545787074, "grad_norm": 0.6128681302070618, "learning_rate": 7.735527711425196e-05, "loss": 0.17986410856246948, "step": 53520 }, { "epoch": 0.22981547787709403, "grad_norm": 4.410193920135498, "learning_rate": 7.735096539413434e-05, "loss": 0.282076096534729, "step": 53530 }, { "epoch": 0.229858409967114, "grad_norm": 0.06367503851652145, "learning_rate": 7.734665367401672e-05, "loss": 0.29574127197265626, "step": 53540 }, { "epoch": 0.22990134205713403, "grad_norm": 0.0185005571693182, "learning_rate": 7.73423419538991e-05, "loss": 0.34318268299102783, "step": 53550 }, { "epoch": 0.22994427414715404, "grad_norm": 1.0150419473648071, "learning_rate": 7.733803023378147e-05, "loss": 0.301274037361145, "step": 53560 }, { "epoch": 0.22998720623717403, "grad_norm": 0.0939207673072815, "learning_rate": 7.733371851366385e-05, "loss": 0.1255855679512024, "step": 53570 }, { "epoch": 0.23003013832719404, "grad_norm": 0.126878023147583, "learning_rate": 7.732940679354623e-05, "loss": 0.11698832511901855, "step": 53580 }, { "epoch": 0.23007307041721406, "grad_norm": 0.0007859831675887108, "learning_rate": 7.73250950734286e-05, "loss": 0.12006430625915528, "step": 53590 }, { "epoch": 0.23011600250723405, "grad_norm": 1.9789891242980957, "learning_rate": 7.732078335331098e-05, "loss": 0.1413771390914917, "step": 53600 }, { "epoch": 0.23015893459725406, "grad_norm": 0.010845298878848553, "learning_rate": 7.731647163319336e-05, "loss": 0.07959707379341126, "step": 53610 }, { "epoch": 0.23020186668727408, "grad_norm": 0.02713046595454216, "learning_rate": 7.731215991307572e-05, "loss": 0.11618701219558716, "step": 53620 }, { "epoch": 0.23024479877729406, "grad_norm": 1.4147077798843384, "learning_rate": 7.73078481929581e-05, "loss": 0.45875911712646483, "step": 53630 }, { "epoch": 0.23028773086731408, "grad_norm": 0.2035217583179474, "learning_rate": 7.730353647284048e-05, "loss": 0.29228217601776124, "step": 53640 }, { "epoch": 0.2303306629573341, "grad_norm": 2.1338682174682617, "learning_rate": 7.729922475272285e-05, "loss": 0.31885855197906493, "step": 53650 }, { "epoch": 0.2303735950473541, "grad_norm": 0.08031083643436432, "learning_rate": 7.729491303260523e-05, "loss": 0.1363338828086853, "step": 53660 }, { "epoch": 0.2304165271373741, "grad_norm": 0.00543932942673564, "learning_rate": 7.729060131248761e-05, "loss": 0.2697357177734375, "step": 53670 }, { "epoch": 0.2304594592273941, "grad_norm": 0.0035474589094519615, "learning_rate": 7.728628959236999e-05, "loss": 0.12337535619735718, "step": 53680 }, { "epoch": 0.23050239131741412, "grad_norm": 0.11553837358951569, "learning_rate": 7.728197787225236e-05, "loss": 0.40886611938476564, "step": 53690 }, { "epoch": 0.2305453234074341, "grad_norm": 1.4451885223388672, "learning_rate": 7.727766615213473e-05, "loss": 0.3630185127258301, "step": 53700 }, { "epoch": 0.23058825549745413, "grad_norm": 0.3057561218738556, "learning_rate": 7.72733544320171e-05, "loss": 0.12788234949111937, "step": 53710 }, { "epoch": 0.23063118758747414, "grad_norm": 2.0138747692108154, "learning_rate": 7.726904271189948e-05, "loss": 0.2607387065887451, "step": 53720 }, { "epoch": 0.23067411967749413, "grad_norm": 1.4901790618896484, "learning_rate": 7.726473099178186e-05, "loss": 0.5151829242706298, "step": 53730 }, { "epoch": 0.23071705176751414, "grad_norm": 0.6877745389938354, "learning_rate": 7.726041927166424e-05, "loss": 0.44553771018981936, "step": 53740 }, { "epoch": 0.23075998385753416, "grad_norm": 0.06983120739459991, "learning_rate": 7.725610755154661e-05, "loss": 0.2058812141418457, "step": 53750 }, { "epoch": 0.23080291594755417, "grad_norm": 0.10931398719549179, "learning_rate": 7.725179583142899e-05, "loss": 0.11302660703659058, "step": 53760 }, { "epoch": 0.23084584803757416, "grad_norm": 0.3568594753742218, "learning_rate": 7.724748411131137e-05, "loss": 0.09234002232551575, "step": 53770 }, { "epoch": 0.23088878012759417, "grad_norm": 0.11842846125364304, "learning_rate": 7.724317239119375e-05, "loss": 0.1256554365158081, "step": 53780 }, { "epoch": 0.2309317122176142, "grad_norm": 0.07234813272953033, "learning_rate": 7.723886067107612e-05, "loss": 0.049526515603065493, "step": 53790 }, { "epoch": 0.23097464430763417, "grad_norm": 2.2963318824768066, "learning_rate": 7.72345489509585e-05, "loss": 0.20251133441925048, "step": 53800 }, { "epoch": 0.2310175763976542, "grad_norm": 0.0010859980247914791, "learning_rate": 7.723023723084088e-05, "loss": 0.1640162944793701, "step": 53810 }, { "epoch": 0.2310605084876742, "grad_norm": 0.024189893156290054, "learning_rate": 7.722592551072326e-05, "loss": 0.07351300120353699, "step": 53820 }, { "epoch": 0.2311034405776942, "grad_norm": 0.46556511521339417, "learning_rate": 7.722161379060563e-05, "loss": 0.20398468971252443, "step": 53830 }, { "epoch": 0.2311463726677142, "grad_norm": 4.37669563293457, "learning_rate": 7.721730207048801e-05, "loss": 0.23910939693450928, "step": 53840 }, { "epoch": 0.23118930475773422, "grad_norm": 1.5534576177597046, "learning_rate": 7.721299035037039e-05, "loss": 0.21463370323181152, "step": 53850 }, { "epoch": 0.23123223684775424, "grad_norm": 2.1490960121154785, "learning_rate": 7.720867863025277e-05, "loss": 0.3465791940689087, "step": 53860 }, { "epoch": 0.23127516893777422, "grad_norm": 23.90878677368164, "learning_rate": 7.720436691013513e-05, "loss": 0.5125598430633544, "step": 53870 }, { "epoch": 0.23131810102779424, "grad_norm": 0.027876272797584534, "learning_rate": 7.72000551900175e-05, "loss": 0.06181851625442505, "step": 53880 }, { "epoch": 0.23136103311781425, "grad_norm": 4.011077880859375, "learning_rate": 7.719574346989988e-05, "loss": 0.18830808401107788, "step": 53890 }, { "epoch": 0.23140396520783424, "grad_norm": 5.805874824523926, "learning_rate": 7.719143174978226e-05, "loss": 0.09969301223754883, "step": 53900 }, { "epoch": 0.23144689729785425, "grad_norm": 0.0011275582946836948, "learning_rate": 7.718712002966464e-05, "loss": 0.13450464010238647, "step": 53910 }, { "epoch": 0.23148982938787427, "grad_norm": 1.8485000133514404, "learning_rate": 7.718280830954702e-05, "loss": 0.25277459621429443, "step": 53920 }, { "epoch": 0.23153276147789426, "grad_norm": 2.114288091659546, "learning_rate": 7.717849658942939e-05, "loss": 0.07729903459548951, "step": 53930 }, { "epoch": 0.23157569356791427, "grad_norm": 0.2792128324508667, "learning_rate": 7.717418486931177e-05, "loss": 0.23469130992889403, "step": 53940 }, { "epoch": 0.23161862565793429, "grad_norm": 2.9970715045928955, "learning_rate": 7.716987314919413e-05, "loss": 0.3448359727859497, "step": 53950 }, { "epoch": 0.2316615577479543, "grad_norm": 2.2078139781951904, "learning_rate": 7.716556142907651e-05, "loss": 0.40146422386169434, "step": 53960 }, { "epoch": 0.2317044898379743, "grad_norm": 1.174996256828308, "learning_rate": 7.716124970895889e-05, "loss": 0.24278640747070312, "step": 53970 }, { "epoch": 0.2317474219279943, "grad_norm": 1.443159580230713, "learning_rate": 7.715693798884127e-05, "loss": 0.1192315697669983, "step": 53980 }, { "epoch": 0.23179035401801432, "grad_norm": 0.5862539410591125, "learning_rate": 7.715262626872364e-05, "loss": 0.2521751642227173, "step": 53990 }, { "epoch": 0.2318332861080343, "grad_norm": 0.0605054534971714, "learning_rate": 7.714831454860602e-05, "loss": 0.17406871318817138, "step": 54000 }, { "epoch": 0.2318332861080343, "eval_loss": 0.4490349292755127, "eval_runtime": 27.4617, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 54000 }, { "epoch": 0.23187621819805432, "grad_norm": 1.6182562112808228, "learning_rate": 7.71440028284884e-05, "loss": 0.3006648778915405, "step": 54010 }, { "epoch": 0.23191915028807433, "grad_norm": 0.0011926308507099748, "learning_rate": 7.713969110837078e-05, "loss": 0.2160343647003174, "step": 54020 }, { "epoch": 0.23196208237809432, "grad_norm": 0.0024554634001106024, "learning_rate": 7.713537938825315e-05, "loss": 0.07509487271308898, "step": 54030 }, { "epoch": 0.23200501446811433, "grad_norm": 0.013232232071459293, "learning_rate": 7.713106766813553e-05, "loss": 0.2723365068435669, "step": 54040 }, { "epoch": 0.23204794655813435, "grad_norm": 0.032803673297166824, "learning_rate": 7.712675594801791e-05, "loss": 0.129007089138031, "step": 54050 }, { "epoch": 0.23209087864815434, "grad_norm": 1.168349266052246, "learning_rate": 7.712244422790029e-05, "loss": 0.16661056280136108, "step": 54060 }, { "epoch": 0.23213381073817435, "grad_norm": 5.672686576843262, "learning_rate": 7.711813250778266e-05, "loss": 0.17700178623199464, "step": 54070 }, { "epoch": 0.23217674282819437, "grad_norm": 0.29278627038002014, "learning_rate": 7.711382078766504e-05, "loss": 0.23657283782958985, "step": 54080 }, { "epoch": 0.23221967491821438, "grad_norm": 2.1834146976470947, "learning_rate": 7.710950906754742e-05, "loss": 0.39358129501342776, "step": 54090 }, { "epoch": 0.23226260700823437, "grad_norm": 0.009153024293482304, "learning_rate": 7.71051973474298e-05, "loss": 0.002811253070831299, "step": 54100 }, { "epoch": 0.23230553909825438, "grad_norm": 1.72090482711792, "learning_rate": 7.710088562731216e-05, "loss": 0.3168710470199585, "step": 54110 }, { "epoch": 0.2323484711882744, "grad_norm": 3.161130666732788, "learning_rate": 7.709657390719454e-05, "loss": 0.26244750022888186, "step": 54120 }, { "epoch": 0.23239140327829438, "grad_norm": 0.01198434829711914, "learning_rate": 7.709226218707691e-05, "loss": 0.2590029239654541, "step": 54130 }, { "epoch": 0.2324343353683144, "grad_norm": 0.01950439251959324, "learning_rate": 7.708795046695929e-05, "loss": 0.2357182741165161, "step": 54140 }, { "epoch": 0.2324772674583344, "grad_norm": 15.0639066696167, "learning_rate": 7.708363874684167e-05, "loss": 0.14678561687469482, "step": 54150 }, { "epoch": 0.2325201995483544, "grad_norm": 0.0018443474546074867, "learning_rate": 7.707932702672404e-05, "loss": 0.22300183773040771, "step": 54160 }, { "epoch": 0.23256313163837442, "grad_norm": 2.399331569671631, "learning_rate": 7.707501530660642e-05, "loss": 0.2935328483581543, "step": 54170 }, { "epoch": 0.23260606372839443, "grad_norm": 0.046965453773736954, "learning_rate": 7.70707035864888e-05, "loss": 0.2203977346420288, "step": 54180 }, { "epoch": 0.23264899581841444, "grad_norm": 0.029289549216628075, "learning_rate": 7.706639186637118e-05, "loss": 0.09207029342651367, "step": 54190 }, { "epoch": 0.23269192790843443, "grad_norm": 0.40686535835266113, "learning_rate": 7.706208014625354e-05, "loss": 0.16840204000473022, "step": 54200 }, { "epoch": 0.23273485999845445, "grad_norm": 46.78797912597656, "learning_rate": 7.705776842613592e-05, "loss": 0.3027779579162598, "step": 54210 }, { "epoch": 0.23277779208847446, "grad_norm": 0.009091447107493877, "learning_rate": 7.70534567060183e-05, "loss": 0.19085679054260254, "step": 54220 }, { "epoch": 0.23282072417849445, "grad_norm": 0.13714081048965454, "learning_rate": 7.704914498590067e-05, "loss": 0.24470322132110595, "step": 54230 }, { "epoch": 0.23286365626851446, "grad_norm": 0.0014627090422436595, "learning_rate": 7.704483326578305e-05, "loss": 0.2286834239959717, "step": 54240 }, { "epoch": 0.23290658835853448, "grad_norm": 0.0014132530195638537, "learning_rate": 7.704052154566543e-05, "loss": 0.1336017966270447, "step": 54250 }, { "epoch": 0.23294952044855446, "grad_norm": 1.9230753183364868, "learning_rate": 7.70362098255478e-05, "loss": 0.2634952306747437, "step": 54260 }, { "epoch": 0.23299245253857448, "grad_norm": 0.004964998923242092, "learning_rate": 7.70318981054302e-05, "loss": 0.2046299934387207, "step": 54270 }, { "epoch": 0.2330353846285945, "grad_norm": 2.2121899127960205, "learning_rate": 7.702758638531256e-05, "loss": 0.2678264379501343, "step": 54280 }, { "epoch": 0.2330783167186145, "grad_norm": 0.22293893992900848, "learning_rate": 7.702327466519494e-05, "loss": 0.1930667996406555, "step": 54290 }, { "epoch": 0.2331212488086345, "grad_norm": 0.5882869958877563, "learning_rate": 7.701896294507731e-05, "loss": 0.2323695182800293, "step": 54300 }, { "epoch": 0.2331641808986545, "grad_norm": 8.391152381896973, "learning_rate": 7.701465122495969e-05, "loss": 0.19190495014190673, "step": 54310 }, { "epoch": 0.23320711298867453, "grad_norm": 2.274019956588745, "learning_rate": 7.701033950484207e-05, "loss": 0.3386978626251221, "step": 54320 }, { "epoch": 0.2332500450786945, "grad_norm": 0.2136969417333603, "learning_rate": 7.700602778472445e-05, "loss": 0.08582958579063416, "step": 54330 }, { "epoch": 0.23329297716871453, "grad_norm": 0.07544512301683426, "learning_rate": 7.700171606460682e-05, "loss": 0.2182629346847534, "step": 54340 }, { "epoch": 0.23333590925873454, "grad_norm": 0.00047073987661860883, "learning_rate": 7.69974043444892e-05, "loss": 0.15393882989883423, "step": 54350 }, { "epoch": 0.23337884134875453, "grad_norm": 0.22471746802330017, "learning_rate": 7.699309262437156e-05, "loss": 0.33650147914886475, "step": 54360 }, { "epoch": 0.23342177343877454, "grad_norm": 0.00527210533618927, "learning_rate": 7.698878090425394e-05, "loss": 0.09793091416358948, "step": 54370 }, { "epoch": 0.23346470552879456, "grad_norm": 0.011522647924721241, "learning_rate": 7.698446918413632e-05, "loss": 0.21272644996643067, "step": 54380 }, { "epoch": 0.23350763761881455, "grad_norm": 1.7179675102233887, "learning_rate": 7.69801574640187e-05, "loss": 0.3838799238204956, "step": 54390 }, { "epoch": 0.23355056970883456, "grad_norm": 1.6328366994857788, "learning_rate": 7.697584574390107e-05, "loss": 0.1944947361946106, "step": 54400 }, { "epoch": 0.23359350179885457, "grad_norm": 0.007709296885877848, "learning_rate": 7.697153402378345e-05, "loss": 0.06385926604270935, "step": 54410 }, { "epoch": 0.2336364338888746, "grad_norm": 1.1928542852401733, "learning_rate": 7.696722230366583e-05, "loss": 0.24402003288269042, "step": 54420 }, { "epoch": 0.23367936597889458, "grad_norm": 7.334529876708984, "learning_rate": 7.69629105835482e-05, "loss": 0.31890459060668946, "step": 54430 }, { "epoch": 0.2337222980689146, "grad_norm": 0.03360019251704216, "learning_rate": 7.695859886343057e-05, "loss": 0.11006944179534912, "step": 54440 }, { "epoch": 0.2337652301589346, "grad_norm": 0.17549341917037964, "learning_rate": 7.695428714331295e-05, "loss": 0.4170397758483887, "step": 54450 }, { "epoch": 0.2338081622489546, "grad_norm": 12.329401969909668, "learning_rate": 7.694997542319532e-05, "loss": 0.32720706462860105, "step": 54460 }, { "epoch": 0.2338510943389746, "grad_norm": 1.2573758363723755, "learning_rate": 7.69456637030777e-05, "loss": 0.22783832550048827, "step": 54470 }, { "epoch": 0.23389402642899462, "grad_norm": 0.006898627616465092, "learning_rate": 7.694135198296008e-05, "loss": 0.30946395397186277, "step": 54480 }, { "epoch": 0.2339369585190146, "grad_norm": 0.024012990295886993, "learning_rate": 7.693704026284247e-05, "loss": 0.1696092128753662, "step": 54490 }, { "epoch": 0.23397989060903462, "grad_norm": 1.0321308374404907, "learning_rate": 7.693272854272485e-05, "loss": 0.2145857572555542, "step": 54500 }, { "epoch": 0.23402282269905464, "grad_norm": 2.9807746410369873, "learning_rate": 7.692841682260722e-05, "loss": 0.43792023658752444, "step": 54510 }, { "epoch": 0.23406575478907465, "grad_norm": 8.073158264160156, "learning_rate": 7.692410510248959e-05, "loss": 0.19019222259521484, "step": 54520 }, { "epoch": 0.23410868687909464, "grad_norm": 0.033755868673324585, "learning_rate": 7.691979338237197e-05, "loss": 0.14710177183151246, "step": 54530 }, { "epoch": 0.23415161896911466, "grad_norm": 0.09791547805070877, "learning_rate": 7.691548166225434e-05, "loss": 0.31218585968017576, "step": 54540 }, { "epoch": 0.23419455105913467, "grad_norm": 0.0015638612676411867, "learning_rate": 7.691116994213672e-05, "loss": 0.17328212261199952, "step": 54550 }, { "epoch": 0.23423748314915466, "grad_norm": 0.015360639430582523, "learning_rate": 7.69068582220191e-05, "loss": 0.13364744186401367, "step": 54560 }, { "epoch": 0.23428041523917467, "grad_norm": 0.06556349992752075, "learning_rate": 7.690254650190148e-05, "loss": 0.33662867546081543, "step": 54570 }, { "epoch": 0.2343233473291947, "grad_norm": 0.049693044275045395, "learning_rate": 7.689823478178385e-05, "loss": 0.09475327134132386, "step": 54580 }, { "epoch": 0.23436627941921467, "grad_norm": 0.03847317025065422, "learning_rate": 7.689392306166623e-05, "loss": 0.06982783675193786, "step": 54590 }, { "epoch": 0.2344092115092347, "grad_norm": 0.09052468091249466, "learning_rate": 7.688961134154861e-05, "loss": 0.34469263553619384, "step": 54600 }, { "epoch": 0.2344521435992547, "grad_norm": 2.4752421379089355, "learning_rate": 7.688529962143097e-05, "loss": 0.1300334930419922, "step": 54610 }, { "epoch": 0.23449507568927472, "grad_norm": 0.7740895748138428, "learning_rate": 7.688098790131335e-05, "loss": 0.3067962646484375, "step": 54620 }, { "epoch": 0.2345380077792947, "grad_norm": 0.32579708099365234, "learning_rate": 7.687667618119573e-05, "loss": 0.3612945795059204, "step": 54630 }, { "epoch": 0.23458093986931472, "grad_norm": 0.7105182409286499, "learning_rate": 7.68723644610781e-05, "loss": 0.2577735662460327, "step": 54640 }, { "epoch": 0.23462387195933473, "grad_norm": 0.016220765188336372, "learning_rate": 7.686805274096048e-05, "loss": 0.3978657007217407, "step": 54650 }, { "epoch": 0.23466680404935472, "grad_norm": 0.26025742292404175, "learning_rate": 7.686374102084286e-05, "loss": 0.10141566991806031, "step": 54660 }, { "epoch": 0.23470973613937474, "grad_norm": 1.8236098289489746, "learning_rate": 7.685942930072524e-05, "loss": 0.23605387210845946, "step": 54670 }, { "epoch": 0.23475266822939475, "grad_norm": 0.005074288230389357, "learning_rate": 7.685511758060761e-05, "loss": 0.19536244869232178, "step": 54680 }, { "epoch": 0.23479560031941474, "grad_norm": 0.013264862820506096, "learning_rate": 7.685080586048998e-05, "loss": 0.14476516246795654, "step": 54690 }, { "epoch": 0.23483853240943475, "grad_norm": 1.0898460149765015, "learning_rate": 7.684649414037235e-05, "loss": 0.5413854598999024, "step": 54700 }, { "epoch": 0.23488146449945477, "grad_norm": 2.291823625564575, "learning_rate": 7.684218242025474e-05, "loss": 0.2193145990371704, "step": 54710 }, { "epoch": 0.23492439658947478, "grad_norm": 0.01983988657593727, "learning_rate": 7.683787070013712e-05, "loss": 0.0784152865409851, "step": 54720 }, { "epoch": 0.23496732867949477, "grad_norm": 3.9424784183502197, "learning_rate": 7.68335589800195e-05, "loss": 0.2317206859588623, "step": 54730 }, { "epoch": 0.23501026076951478, "grad_norm": 0.19252511858940125, "learning_rate": 7.682924725990188e-05, "loss": 0.2624601125717163, "step": 54740 }, { "epoch": 0.2350531928595348, "grad_norm": 0.22246962785720825, "learning_rate": 7.682493553978425e-05, "loss": 0.3319044351577759, "step": 54750 }, { "epoch": 0.23509612494955479, "grad_norm": 0.001225695596076548, "learning_rate": 7.682062381966663e-05, "loss": 0.118217933177948, "step": 54760 }, { "epoch": 0.2351390570395748, "grad_norm": 7.75153923034668, "learning_rate": 7.6816312099549e-05, "loss": 0.218511700630188, "step": 54770 }, { "epoch": 0.23518198912959482, "grad_norm": 3.493736743927002, "learning_rate": 7.681200037943137e-05, "loss": 0.4513263702392578, "step": 54780 }, { "epoch": 0.2352249212196148, "grad_norm": 1.2474606037139893, "learning_rate": 7.680768865931375e-05, "loss": 0.3103821277618408, "step": 54790 }, { "epoch": 0.23526785330963482, "grad_norm": 0.39426982402801514, "learning_rate": 7.680337693919613e-05, "loss": 0.04423748850822449, "step": 54800 }, { "epoch": 0.23531078539965483, "grad_norm": 0.11555972695350647, "learning_rate": 7.67990652190785e-05, "loss": 0.12222168445587159, "step": 54810 }, { "epoch": 0.23535371748967482, "grad_norm": 0.9844247698783875, "learning_rate": 7.679475349896088e-05, "loss": 0.2519564151763916, "step": 54820 }, { "epoch": 0.23539664957969483, "grad_norm": 0.5366085171699524, "learning_rate": 7.679044177884326e-05, "loss": 0.29870994091033937, "step": 54830 }, { "epoch": 0.23543958166971485, "grad_norm": 0.006447316147387028, "learning_rate": 7.678613005872564e-05, "loss": 0.2809410810470581, "step": 54840 }, { "epoch": 0.23548251375973486, "grad_norm": 0.05402093753218651, "learning_rate": 7.6781818338608e-05, "loss": 0.24879958629608154, "step": 54850 }, { "epoch": 0.23552544584975485, "grad_norm": 0.024269670248031616, "learning_rate": 7.677750661849038e-05, "loss": 0.16608129739761351, "step": 54860 }, { "epoch": 0.23556837793977486, "grad_norm": 0.48942995071411133, "learning_rate": 7.677319489837275e-05, "loss": 0.12601350545883178, "step": 54870 }, { "epoch": 0.23561131002979488, "grad_norm": 0.03332279622554779, "learning_rate": 7.676888317825513e-05, "loss": 0.1936497211456299, "step": 54880 }, { "epoch": 0.23565424211981487, "grad_norm": 0.03991253674030304, "learning_rate": 7.676457145813751e-05, "loss": 0.3474759101867676, "step": 54890 }, { "epoch": 0.23569717420983488, "grad_norm": 4.706600666046143, "learning_rate": 7.676025973801989e-05, "loss": 0.29954354763031005, "step": 54900 }, { "epoch": 0.2357401062998549, "grad_norm": 1.776058316230774, "learning_rate": 7.675594801790226e-05, "loss": 0.20013415813446045, "step": 54910 }, { "epoch": 0.23578303838987488, "grad_norm": 1.0966745615005493, "learning_rate": 7.675163629778464e-05, "loss": 0.454547119140625, "step": 54920 }, { "epoch": 0.2358259704798949, "grad_norm": 0.009377327747642994, "learning_rate": 7.674732457766702e-05, "loss": 0.08590722680091858, "step": 54930 }, { "epoch": 0.2358689025699149, "grad_norm": 0.002148397732526064, "learning_rate": 7.67430128575494e-05, "loss": 0.45615296363830565, "step": 54940 }, { "epoch": 0.23591183465993493, "grad_norm": 4.385854721069336, "learning_rate": 7.673870113743177e-05, "loss": 0.20920178890228272, "step": 54950 }, { "epoch": 0.23595476674995491, "grad_norm": 0.004134920425713062, "learning_rate": 7.673438941731415e-05, "loss": 0.21172521114349366, "step": 54960 }, { "epoch": 0.23599769883997493, "grad_norm": 10.730145454406738, "learning_rate": 7.673007769719653e-05, "loss": 0.44742717742919924, "step": 54970 }, { "epoch": 0.23604063092999494, "grad_norm": 1.315672755241394, "learning_rate": 7.67257659770789e-05, "loss": 0.2971693277359009, "step": 54980 }, { "epoch": 0.23608356302001493, "grad_norm": 0.08187035471200943, "learning_rate": 7.672145425696128e-05, "loss": 0.1317402720451355, "step": 54990 }, { "epoch": 0.23612649511003495, "grad_norm": 0.010946123860776424, "learning_rate": 7.671714253684366e-05, "loss": 0.2432734489440918, "step": 55000 }, { "epoch": 0.23612649511003495, "eval_loss": 0.4557407796382904, "eval_runtime": 27.4059, "eval_samples_per_second": 3.649, "eval_steps_per_second": 3.649, "step": 55000 }, { "epoch": 0.23616942720005496, "grad_norm": 1.7199127674102783, "learning_rate": 7.671283081672604e-05, "loss": 0.33434352874755857, "step": 55010 }, { "epoch": 0.23621235929007495, "grad_norm": 0.16656917333602905, "learning_rate": 7.67085190966084e-05, "loss": 0.23526394367218018, "step": 55020 }, { "epoch": 0.23625529138009496, "grad_norm": 1.2178751230239868, "learning_rate": 7.670420737649078e-05, "loss": 0.3808588027954102, "step": 55030 }, { "epoch": 0.23629822347011498, "grad_norm": 0.9661920666694641, "learning_rate": 7.669989565637316e-05, "loss": 0.19774439334869384, "step": 55040 }, { "epoch": 0.236341155560135, "grad_norm": 7.985832214355469, "learning_rate": 7.669558393625553e-05, "loss": 0.3549797058105469, "step": 55050 }, { "epoch": 0.23638408765015498, "grad_norm": 0.17887651920318604, "learning_rate": 7.669127221613791e-05, "loss": 0.06525392532348633, "step": 55060 }, { "epoch": 0.236427019740175, "grad_norm": 0.2913016676902771, "learning_rate": 7.668696049602029e-05, "loss": 0.2070707082748413, "step": 55070 }, { "epoch": 0.236469951830195, "grad_norm": 0.9555326700210571, "learning_rate": 7.668264877590267e-05, "loss": 0.09528316259384155, "step": 55080 }, { "epoch": 0.236512883920215, "grad_norm": 2.1643640995025635, "learning_rate": 7.667833705578504e-05, "loss": 0.16205003261566162, "step": 55090 }, { "epoch": 0.236555816010235, "grad_norm": 0.15171417593955994, "learning_rate": 7.66740253356674e-05, "loss": 0.2057335615158081, "step": 55100 }, { "epoch": 0.23659874810025502, "grad_norm": 1.3308279514312744, "learning_rate": 7.666971361554978e-05, "loss": 0.24363973140716552, "step": 55110 }, { "epoch": 0.236641680190275, "grad_norm": 2.389375925064087, "learning_rate": 7.666540189543216e-05, "loss": 0.4016840934753418, "step": 55120 }, { "epoch": 0.23668461228029503, "grad_norm": 0.001500018173828721, "learning_rate": 7.666109017531454e-05, "loss": 0.13236029148101808, "step": 55130 }, { "epoch": 0.23672754437031504, "grad_norm": 1.7535425424575806, "learning_rate": 7.665677845519692e-05, "loss": 0.2635908603668213, "step": 55140 }, { "epoch": 0.23677047646033506, "grad_norm": 0.0043300143443048, "learning_rate": 7.66524667350793e-05, "loss": 0.3937635660171509, "step": 55150 }, { "epoch": 0.23681340855035504, "grad_norm": 1.2087223529815674, "learning_rate": 7.664815501496167e-05, "loss": 0.19942669868469237, "step": 55160 }, { "epoch": 0.23685634064037506, "grad_norm": 0.04313937947154045, "learning_rate": 7.664384329484405e-05, "loss": 0.37582294940948485, "step": 55170 }, { "epoch": 0.23689927273039507, "grad_norm": 1.0092852115631104, "learning_rate": 7.663953157472643e-05, "loss": 0.1623198390007019, "step": 55180 }, { "epoch": 0.23694220482041506, "grad_norm": 1.9008547067642212, "learning_rate": 7.66352198546088e-05, "loss": 0.25249829292297366, "step": 55190 }, { "epoch": 0.23698513691043507, "grad_norm": 2.1248939037323, "learning_rate": 7.663090813449118e-05, "loss": 0.30942888259887696, "step": 55200 }, { "epoch": 0.2370280690004551, "grad_norm": 0.10744863003492355, "learning_rate": 7.662659641437356e-05, "loss": 0.20057756900787355, "step": 55210 }, { "epoch": 0.23707100109047508, "grad_norm": 0.06358012557029724, "learning_rate": 7.662228469425593e-05, "loss": 0.09414655566215516, "step": 55220 }, { "epoch": 0.2371139331804951, "grad_norm": 1.4875483512878418, "learning_rate": 7.661797297413831e-05, "loss": 0.4498072624206543, "step": 55230 }, { "epoch": 0.2371568652705151, "grad_norm": 0.09652744233608246, "learning_rate": 7.661366125402069e-05, "loss": 0.2348034620285034, "step": 55240 }, { "epoch": 0.2371997973605351, "grad_norm": 0.00626254640519619, "learning_rate": 7.660934953390307e-05, "loss": 0.24348032474517822, "step": 55250 }, { "epoch": 0.2372427294505551, "grad_norm": 0.47817111015319824, "learning_rate": 7.660503781378544e-05, "loss": 0.22016806602478028, "step": 55260 }, { "epoch": 0.23728566154057512, "grad_norm": 2.5576000213623047, "learning_rate": 7.660072609366781e-05, "loss": 0.45274782180786133, "step": 55270 }, { "epoch": 0.23732859363059514, "grad_norm": 0.03902919217944145, "learning_rate": 7.659641437355019e-05, "loss": 0.12822468280792237, "step": 55280 }, { "epoch": 0.23737152572061512, "grad_norm": 3.338451623916626, "learning_rate": 7.659210265343256e-05, "loss": 0.10585402250289917, "step": 55290 }, { "epoch": 0.23741445781063514, "grad_norm": 3.582664966583252, "learning_rate": 7.658779093331494e-05, "loss": 0.31997177600860593, "step": 55300 }, { "epoch": 0.23745738990065515, "grad_norm": 0.2807246446609497, "learning_rate": 7.658347921319732e-05, "loss": 0.3692594289779663, "step": 55310 }, { "epoch": 0.23750032199067514, "grad_norm": 1.0550538301467896, "learning_rate": 7.65791674930797e-05, "loss": 0.17689560651779174, "step": 55320 }, { "epoch": 0.23754325408069515, "grad_norm": 0.022598102688789368, "learning_rate": 7.657485577296207e-05, "loss": 0.29569780826568604, "step": 55330 }, { "epoch": 0.23758618617071517, "grad_norm": 1.544817566871643, "learning_rate": 7.657054405284445e-05, "loss": 0.21274950504302978, "step": 55340 }, { "epoch": 0.23762911826073516, "grad_norm": 1.3617031574249268, "learning_rate": 7.656623233272681e-05, "loss": 0.4670865058898926, "step": 55350 }, { "epoch": 0.23767205035075517, "grad_norm": 0.3003019690513611, "learning_rate": 7.656192061260919e-05, "loss": 0.48572516441345215, "step": 55360 }, { "epoch": 0.23771498244077519, "grad_norm": 1.8010348081588745, "learning_rate": 7.655760889249157e-05, "loss": 0.27597956657409667, "step": 55370 }, { "epoch": 0.2377579145307952, "grad_norm": 3.2601473331451416, "learning_rate": 7.655329717237395e-05, "loss": 0.3330512523651123, "step": 55380 }, { "epoch": 0.2378008466208152, "grad_norm": 2.0501697063446045, "learning_rate": 7.654898545225632e-05, "loss": 0.37035629749298093, "step": 55390 }, { "epoch": 0.2378437787108352, "grad_norm": 2.374547004699707, "learning_rate": 7.65446737321387e-05, "loss": 0.2998343944549561, "step": 55400 }, { "epoch": 0.23788671080085522, "grad_norm": 0.07484851777553558, "learning_rate": 7.654036201202108e-05, "loss": 0.3185651063919067, "step": 55410 }, { "epoch": 0.2379296428908752, "grad_norm": 0.2303788661956787, "learning_rate": 7.653605029190345e-05, "loss": 0.09342051148414612, "step": 55420 }, { "epoch": 0.23797257498089522, "grad_norm": 0.06799449026584625, "learning_rate": 7.653173857178583e-05, "loss": 0.10728926658630371, "step": 55430 }, { "epoch": 0.23801550707091523, "grad_norm": 1.861104130744934, "learning_rate": 7.652742685166821e-05, "loss": 0.2747479438781738, "step": 55440 }, { "epoch": 0.23805843916093522, "grad_norm": 1.7472214698791504, "learning_rate": 7.652311513155059e-05, "loss": 0.23076362609863282, "step": 55450 }, { "epoch": 0.23810137125095523, "grad_norm": 4.591130256652832, "learning_rate": 7.651880341143296e-05, "loss": 0.3691649198532104, "step": 55460 }, { "epoch": 0.23814430334097525, "grad_norm": 0.005838882178068161, "learning_rate": 7.651449169131534e-05, "loss": 0.3147615909576416, "step": 55470 }, { "epoch": 0.23818723543099526, "grad_norm": 0.44340893626213074, "learning_rate": 7.651017997119772e-05, "loss": 0.22155678272247314, "step": 55480 }, { "epoch": 0.23823016752101525, "grad_norm": 1.6629033088684082, "learning_rate": 7.65058682510801e-05, "loss": 0.42931222915649414, "step": 55490 }, { "epoch": 0.23827309961103527, "grad_norm": 0.005505688022822142, "learning_rate": 7.650155653096247e-05, "loss": 0.11621900796890258, "step": 55500 }, { "epoch": 0.23831603170105528, "grad_norm": 2.3384525775909424, "learning_rate": 7.649724481084484e-05, "loss": 0.32160401344299316, "step": 55510 }, { "epoch": 0.23835896379107527, "grad_norm": 0.007327724248170853, "learning_rate": 7.649293309072721e-05, "loss": 0.11470060348510742, "step": 55520 }, { "epoch": 0.23840189588109528, "grad_norm": 1.3797262907028198, "learning_rate": 7.648862137060959e-05, "loss": 0.13075582981109618, "step": 55530 }, { "epoch": 0.2384448279711153, "grad_norm": 0.02711120806634426, "learning_rate": 7.648430965049197e-05, "loss": 0.23798139095306398, "step": 55540 }, { "epoch": 0.23848776006113528, "grad_norm": 1.339031457901001, "learning_rate": 7.647999793037435e-05, "loss": 0.3770133972167969, "step": 55550 }, { "epoch": 0.2385306921511553, "grad_norm": 0.011666370555758476, "learning_rate": 7.647568621025672e-05, "loss": 0.17394804954528809, "step": 55560 }, { "epoch": 0.2385736242411753, "grad_norm": 0.018904006108641624, "learning_rate": 7.64713744901391e-05, "loss": 0.19867415428161622, "step": 55570 }, { "epoch": 0.23861655633119533, "grad_norm": 0.21838410198688507, "learning_rate": 7.646706277002148e-05, "loss": 0.1692600965499878, "step": 55580 }, { "epoch": 0.23865948842121532, "grad_norm": 10.107283592224121, "learning_rate": 7.646275104990384e-05, "loss": 0.3083909273147583, "step": 55590 }, { "epoch": 0.23870242051123533, "grad_norm": 3.883472204208374, "learning_rate": 7.645843932978622e-05, "loss": 0.42784271240234373, "step": 55600 }, { "epoch": 0.23874535260125535, "grad_norm": 1.2638131380081177, "learning_rate": 7.64541276096686e-05, "loss": 0.45655550956726076, "step": 55610 }, { "epoch": 0.23878828469127533, "grad_norm": 0.11055152118206024, "learning_rate": 7.644981588955097e-05, "loss": 0.08500316143035888, "step": 55620 }, { "epoch": 0.23883121678129535, "grad_norm": 2.233280658721924, "learning_rate": 7.644550416943335e-05, "loss": 0.10072469711303711, "step": 55630 }, { "epoch": 0.23887414887131536, "grad_norm": 0.38892972469329834, "learning_rate": 7.644119244931573e-05, "loss": 0.13284404277801515, "step": 55640 }, { "epoch": 0.23891708096133535, "grad_norm": 0.12464083731174469, "learning_rate": 7.64368807291981e-05, "loss": 0.09709331393241882, "step": 55650 }, { "epoch": 0.23896001305135536, "grad_norm": 0.0404987558722496, "learning_rate": 7.643256900908048e-05, "loss": 0.2879699945449829, "step": 55660 }, { "epoch": 0.23900294514137538, "grad_norm": 1.0981417894363403, "learning_rate": 7.642825728896286e-05, "loss": 0.21002488136291503, "step": 55670 }, { "epoch": 0.23904587723139537, "grad_norm": 0.09827089309692383, "learning_rate": 7.642394556884524e-05, "loss": 0.2971586942672729, "step": 55680 }, { "epoch": 0.23908880932141538, "grad_norm": 0.030421625822782516, "learning_rate": 7.641963384872762e-05, "loss": 0.17070308923721314, "step": 55690 }, { "epoch": 0.2391317414114354, "grad_norm": 2.331212043762207, "learning_rate": 7.641532212860999e-05, "loss": 0.2533195972442627, "step": 55700 }, { "epoch": 0.2391746735014554, "grad_norm": 18.21701431274414, "learning_rate": 7.641101040849237e-05, "loss": 0.3127460479736328, "step": 55710 }, { "epoch": 0.2392176055914754, "grad_norm": 2.263760566711426, "learning_rate": 7.640669868837475e-05, "loss": 0.2071403980255127, "step": 55720 }, { "epoch": 0.2392605376814954, "grad_norm": 0.051928453147411346, "learning_rate": 7.640238696825713e-05, "loss": 0.1774152398109436, "step": 55730 }, { "epoch": 0.23930346977151543, "grad_norm": 0.07828470319509506, "learning_rate": 7.63980752481395e-05, "loss": 0.04226884543895722, "step": 55740 }, { "epoch": 0.2393464018615354, "grad_norm": 0.04483730345964432, "learning_rate": 7.639376352802188e-05, "loss": 0.24645552635192872, "step": 55750 }, { "epoch": 0.23938933395155543, "grad_norm": 0.9185608625411987, "learning_rate": 7.638945180790424e-05, "loss": 0.3080772638320923, "step": 55760 }, { "epoch": 0.23943226604157544, "grad_norm": 0.9898508787155151, "learning_rate": 7.638514008778662e-05, "loss": 0.25601682662963865, "step": 55770 }, { "epoch": 0.23947519813159543, "grad_norm": 0.08774905651807785, "learning_rate": 7.6380828367669e-05, "loss": 0.4679861545562744, "step": 55780 }, { "epoch": 0.23951813022161544, "grad_norm": 0.374380886554718, "learning_rate": 7.637651664755138e-05, "loss": 0.4750388145446777, "step": 55790 }, { "epoch": 0.23956106231163546, "grad_norm": 1.6377544403076172, "learning_rate": 7.637220492743375e-05, "loss": 0.12196850776672363, "step": 55800 }, { "epoch": 0.23960399440165547, "grad_norm": 0.02132430486381054, "learning_rate": 7.636789320731613e-05, "loss": 0.3349509000778198, "step": 55810 }, { "epoch": 0.23964692649167546, "grad_norm": 0.2864157259464264, "learning_rate": 7.636358148719851e-05, "loss": 0.1535439133644104, "step": 55820 }, { "epoch": 0.23968985858169548, "grad_norm": 0.0163569413125515, "learning_rate": 7.635926976708089e-05, "loss": 0.261598801612854, "step": 55830 }, { "epoch": 0.2397327906717155, "grad_norm": 1.0393491983413696, "learning_rate": 7.635495804696325e-05, "loss": 0.3698171377182007, "step": 55840 }, { "epoch": 0.23977572276173548, "grad_norm": 0.13531573116779327, "learning_rate": 7.635064632684563e-05, "loss": 0.23707644939422606, "step": 55850 }, { "epoch": 0.2398186548517555, "grad_norm": 0.01517938356846571, "learning_rate": 7.6346334606728e-05, "loss": 0.30153985023498536, "step": 55860 }, { "epoch": 0.2398615869417755, "grad_norm": 1.438055396080017, "learning_rate": 7.634202288661038e-05, "loss": 0.09777184724807739, "step": 55870 }, { "epoch": 0.2399045190317955, "grad_norm": 0.011465308256447315, "learning_rate": 7.633771116649276e-05, "loss": 0.46697440147399905, "step": 55880 }, { "epoch": 0.2399474511218155, "grad_norm": 0.008004733361303806, "learning_rate": 7.633339944637514e-05, "loss": 0.31014342308044435, "step": 55890 }, { "epoch": 0.23999038321183552, "grad_norm": 0.11556950956583023, "learning_rate": 7.632908772625753e-05, "loss": 0.33110072612762453, "step": 55900 }, { "epoch": 0.24003331530185554, "grad_norm": 0.05171780288219452, "learning_rate": 7.63247760061399e-05, "loss": 0.10481221675872802, "step": 55910 }, { "epoch": 0.24007624739187552, "grad_norm": 17.309627532958984, "learning_rate": 7.632046428602227e-05, "loss": 0.21331896781921386, "step": 55920 }, { "epoch": 0.24011917948189554, "grad_norm": 0.24218101799488068, "learning_rate": 7.631615256590464e-05, "loss": 0.2619100332260132, "step": 55930 }, { "epoch": 0.24016211157191555, "grad_norm": 3.7106101512908936, "learning_rate": 7.631184084578702e-05, "loss": 0.42134804725646974, "step": 55940 }, { "epoch": 0.24020504366193554, "grad_norm": 0.018480826169252396, "learning_rate": 7.63075291256694e-05, "loss": 0.2024019479751587, "step": 55950 }, { "epoch": 0.24024797575195556, "grad_norm": 0.6436042785644531, "learning_rate": 7.630321740555178e-05, "loss": 0.3535203218460083, "step": 55960 }, { "epoch": 0.24029090784197557, "grad_norm": 0.06518207490444183, "learning_rate": 7.629890568543415e-05, "loss": 0.14140788316726685, "step": 55970 }, { "epoch": 0.24033383993199556, "grad_norm": 0.10466308891773224, "learning_rate": 7.629459396531653e-05, "loss": 0.017547784745693205, "step": 55980 }, { "epoch": 0.24037677202201557, "grad_norm": 0.03298651799559593, "learning_rate": 7.629028224519891e-05, "loss": 0.12694171667099, "step": 55990 }, { "epoch": 0.2404197041120356, "grad_norm": 0.0035074560437351465, "learning_rate": 7.628597052508129e-05, "loss": 0.25837092399597167, "step": 56000 }, { "epoch": 0.2404197041120356, "eval_loss": 0.4490428566932678, "eval_runtime": 27.3737, "eval_samples_per_second": 3.653, "eval_steps_per_second": 3.653, "step": 56000 }, { "epoch": 0.2404626362020556, "grad_norm": 1.269037127494812, "learning_rate": 7.628165880496365e-05, "loss": 0.3254170656204224, "step": 56010 }, { "epoch": 0.2405055682920756, "grad_norm": 0.20150920748710632, "learning_rate": 7.627734708484603e-05, "loss": 0.28833024501800536, "step": 56020 }, { "epoch": 0.2405485003820956, "grad_norm": 2.5157318115234375, "learning_rate": 7.62730353647284e-05, "loss": 0.4094663143157959, "step": 56030 }, { "epoch": 0.24059143247211562, "grad_norm": 0.044199321419000626, "learning_rate": 7.626872364461078e-05, "loss": 0.17894190549850464, "step": 56040 }, { "epoch": 0.2406343645621356, "grad_norm": 0.2634064257144928, "learning_rate": 7.626441192449316e-05, "loss": 0.29080748558044434, "step": 56050 }, { "epoch": 0.24067729665215562, "grad_norm": 0.024507373571395874, "learning_rate": 7.626010020437554e-05, "loss": 0.22542548179626465, "step": 56060 }, { "epoch": 0.24072022874217563, "grad_norm": 1.499601125717163, "learning_rate": 7.625578848425791e-05, "loss": 0.41116743087768554, "step": 56070 }, { "epoch": 0.24076316083219562, "grad_norm": 0.085579052567482, "learning_rate": 7.625147676414029e-05, "loss": 0.06765444278717041, "step": 56080 }, { "epoch": 0.24080609292221564, "grad_norm": 2.3689451217651367, "learning_rate": 7.624716504402266e-05, "loss": 0.2160788059234619, "step": 56090 }, { "epoch": 0.24084902501223565, "grad_norm": 0.9572241306304932, "learning_rate": 7.624285332390503e-05, "loss": 0.5079023361206054, "step": 56100 }, { "epoch": 0.24089195710225564, "grad_norm": 0.2550654709339142, "learning_rate": 7.623854160378741e-05, "loss": 0.23361270427703856, "step": 56110 }, { "epoch": 0.24093488919227565, "grad_norm": 0.27372607588768005, "learning_rate": 7.62342298836698e-05, "loss": 0.20449163913726806, "step": 56120 }, { "epoch": 0.24097782128229567, "grad_norm": 0.12788492441177368, "learning_rate": 7.622991816355218e-05, "loss": 0.2756718873977661, "step": 56130 }, { "epoch": 0.24102075337231568, "grad_norm": 0.03664913773536682, "learning_rate": 7.622560644343456e-05, "loss": 0.0964701533317566, "step": 56140 }, { "epoch": 0.24106368546233567, "grad_norm": 1.1955845355987549, "learning_rate": 7.622129472331693e-05, "loss": 0.5049872398376465, "step": 56150 }, { "epoch": 0.24110661755235568, "grad_norm": 0.07352086156606674, "learning_rate": 7.621698300319931e-05, "loss": 0.31375598907470703, "step": 56160 }, { "epoch": 0.2411495496423757, "grad_norm": 0.02647421695291996, "learning_rate": 7.621267128308167e-05, "loss": 0.3265992164611816, "step": 56170 }, { "epoch": 0.24119248173239569, "grad_norm": 1.968076467514038, "learning_rate": 7.620835956296405e-05, "loss": 0.2549333095550537, "step": 56180 }, { "epoch": 0.2412354138224157, "grad_norm": 0.08361245691776276, "learning_rate": 7.620404784284643e-05, "loss": 0.0806192398071289, "step": 56190 }, { "epoch": 0.24127834591243572, "grad_norm": 3.567979097366333, "learning_rate": 7.61997361227288e-05, "loss": 0.24559214115142822, "step": 56200 }, { "epoch": 0.2413212780024557, "grad_norm": 0.0898161232471466, "learning_rate": 7.619542440261118e-05, "loss": 0.051443439722061154, "step": 56210 }, { "epoch": 0.24136421009247572, "grad_norm": 2.6911261081695557, "learning_rate": 7.619111268249356e-05, "loss": 0.3605805397033691, "step": 56220 }, { "epoch": 0.24140714218249573, "grad_norm": 0.23818203806877136, "learning_rate": 7.618680096237594e-05, "loss": 0.4218717575073242, "step": 56230 }, { "epoch": 0.24145007427251575, "grad_norm": 0.10996730625629425, "learning_rate": 7.618248924225832e-05, "loss": 0.09531922936439514, "step": 56240 }, { "epoch": 0.24149300636253573, "grad_norm": 0.03727518394589424, "learning_rate": 7.617817752214068e-05, "loss": 0.0390720546245575, "step": 56250 }, { "epoch": 0.24153593845255575, "grad_norm": 0.011419006623327732, "learning_rate": 7.617386580202306e-05, "loss": 0.09541901350021362, "step": 56260 }, { "epoch": 0.24157887054257576, "grad_norm": 0.03283700719475746, "learning_rate": 7.616955408190543e-05, "loss": 0.06369619965553283, "step": 56270 }, { "epoch": 0.24162180263259575, "grad_norm": 0.012161415070295334, "learning_rate": 7.616524236178781e-05, "loss": 0.002637416496872902, "step": 56280 }, { "epoch": 0.24166473472261576, "grad_norm": 2.356962203979492, "learning_rate": 7.616093064167019e-05, "loss": 0.15238213539123535, "step": 56290 }, { "epoch": 0.24170766681263578, "grad_norm": 0.002443671924993396, "learning_rate": 7.615661892155257e-05, "loss": 0.351188063621521, "step": 56300 }, { "epoch": 0.24175059890265577, "grad_norm": 0.008870512247085571, "learning_rate": 7.615230720143494e-05, "loss": 0.39712421894073485, "step": 56310 }, { "epoch": 0.24179353099267578, "grad_norm": 0.04952370002865791, "learning_rate": 7.614799548131732e-05, "loss": 0.25222814083099365, "step": 56320 }, { "epoch": 0.2418364630826958, "grad_norm": 0.015394582413136959, "learning_rate": 7.614368376119968e-05, "loss": 0.1964707851409912, "step": 56330 }, { "epoch": 0.2418793951727158, "grad_norm": 0.09816809743642807, "learning_rate": 7.613937204108208e-05, "loss": 0.15900418758392335, "step": 56340 }, { "epoch": 0.2419223272627358, "grad_norm": 0.009692016057670116, "learning_rate": 7.613506032096445e-05, "loss": 0.2867144584655762, "step": 56350 }, { "epoch": 0.2419652593527558, "grad_norm": 0.028097325935959816, "learning_rate": 7.613074860084683e-05, "loss": 0.17743611335754395, "step": 56360 }, { "epoch": 0.24200819144277583, "grad_norm": 0.5458617210388184, "learning_rate": 7.612643688072921e-05, "loss": 0.2927206754684448, "step": 56370 }, { "epoch": 0.24205112353279581, "grad_norm": 2.905992031097412, "learning_rate": 7.612212516061158e-05, "loss": 0.3399640083312988, "step": 56380 }, { "epoch": 0.24209405562281583, "grad_norm": 0.04598947614431381, "learning_rate": 7.611781344049396e-05, "loss": 0.27964622974395753, "step": 56390 }, { "epoch": 0.24213698771283584, "grad_norm": 1.6085865497589111, "learning_rate": 7.611350172037634e-05, "loss": 0.36804425716400146, "step": 56400 }, { "epoch": 0.24217991980285583, "grad_norm": 2.989314556121826, "learning_rate": 7.610919000025872e-05, "loss": 0.3538304328918457, "step": 56410 }, { "epoch": 0.24222285189287585, "grad_norm": 0.038225866854190826, "learning_rate": 7.610487828014108e-05, "loss": 0.28265354633331297, "step": 56420 }, { "epoch": 0.24226578398289586, "grad_norm": 0.040036361664533615, "learning_rate": 7.610056656002346e-05, "loss": 0.2343906879425049, "step": 56430 }, { "epoch": 0.24230871607291588, "grad_norm": 0.022909799590706825, "learning_rate": 7.609625483990584e-05, "loss": 0.26263716220855715, "step": 56440 }, { "epoch": 0.24235164816293586, "grad_norm": 0.19417926669120789, "learning_rate": 7.609194311978821e-05, "loss": 0.1913734793663025, "step": 56450 }, { "epoch": 0.24239458025295588, "grad_norm": 0.0552341528236866, "learning_rate": 7.608763139967059e-05, "loss": 0.14965476989746093, "step": 56460 }, { "epoch": 0.2424375123429759, "grad_norm": 0.03202309086918831, "learning_rate": 7.608331967955297e-05, "loss": 0.21325488090515138, "step": 56470 }, { "epoch": 0.24248044443299588, "grad_norm": 3.847653388977051, "learning_rate": 7.607900795943534e-05, "loss": 0.3930112600326538, "step": 56480 }, { "epoch": 0.2425233765230159, "grad_norm": 9.966902732849121, "learning_rate": 7.607469623931772e-05, "loss": 0.14765695333480836, "step": 56490 }, { "epoch": 0.2425663086130359, "grad_norm": 7.9163408279418945, "learning_rate": 7.607038451920009e-05, "loss": 0.21990103721618653, "step": 56500 }, { "epoch": 0.2426092407030559, "grad_norm": 0.0343107134103775, "learning_rate": 7.606607279908246e-05, "loss": 0.2227553129196167, "step": 56510 }, { "epoch": 0.2426521727930759, "grad_norm": 0.019609419628977776, "learning_rate": 7.606176107896484e-05, "loss": 0.264559006690979, "step": 56520 }, { "epoch": 0.24269510488309592, "grad_norm": 0.00810485053807497, "learning_rate": 7.605744935884722e-05, "loss": 0.27818167209625244, "step": 56530 }, { "epoch": 0.2427380369731159, "grad_norm": 0.00926060788333416, "learning_rate": 7.60531376387296e-05, "loss": 0.07828181982040405, "step": 56540 }, { "epoch": 0.24278096906313593, "grad_norm": 2.1390297412872314, "learning_rate": 7.604882591861197e-05, "loss": 0.35963263511657717, "step": 56550 }, { "epoch": 0.24282390115315594, "grad_norm": 1.0619257688522339, "learning_rate": 7.604451419849435e-05, "loss": 0.1911607027053833, "step": 56560 }, { "epoch": 0.24286683324317596, "grad_norm": 1.8809216022491455, "learning_rate": 7.604020247837673e-05, "loss": 0.27733912467956545, "step": 56570 }, { "epoch": 0.24290976533319594, "grad_norm": 0.45261240005493164, "learning_rate": 7.60358907582591e-05, "loss": 0.2474360704421997, "step": 56580 }, { "epoch": 0.24295269742321596, "grad_norm": 0.0851302370429039, "learning_rate": 7.603157903814148e-05, "loss": 0.24859883785247802, "step": 56590 }, { "epoch": 0.24299562951323597, "grad_norm": 0.07440662384033203, "learning_rate": 7.602726731802386e-05, "loss": 0.15816271305084229, "step": 56600 }, { "epoch": 0.24303856160325596, "grad_norm": 0.008213400840759277, "learning_rate": 7.602295559790624e-05, "loss": 0.13967883586883545, "step": 56610 }, { "epoch": 0.24308149369327597, "grad_norm": 1.484141230583191, "learning_rate": 7.601864387778861e-05, "loss": 0.3525965690612793, "step": 56620 }, { "epoch": 0.243124425783296, "grad_norm": 0.016760317608714104, "learning_rate": 7.601433215767099e-05, "loss": 0.14842404127120973, "step": 56630 }, { "epoch": 0.24316735787331598, "grad_norm": 0.6866039633750916, "learning_rate": 7.601002043755337e-05, "loss": 0.42420759201049807, "step": 56640 }, { "epoch": 0.243210289963336, "grad_norm": 2.159764051437378, "learning_rate": 7.600570871743575e-05, "loss": 0.3373197317123413, "step": 56650 }, { "epoch": 0.243253222053356, "grad_norm": 3.4098398685455322, "learning_rate": 7.600139699731811e-05, "loss": 0.15713222026824952, "step": 56660 }, { "epoch": 0.24329615414337602, "grad_norm": 0.052497498691082, "learning_rate": 7.599708527720049e-05, "loss": 0.30463128089904784, "step": 56670 }, { "epoch": 0.243339086233396, "grad_norm": 0.9863664507865906, "learning_rate": 7.599277355708286e-05, "loss": 0.36327598094940183, "step": 56680 }, { "epoch": 0.24338201832341602, "grad_norm": 1.23508620262146, "learning_rate": 7.598846183696524e-05, "loss": 0.21613700389862062, "step": 56690 }, { "epoch": 0.24342495041343604, "grad_norm": 0.3627750873565674, "learning_rate": 7.598415011684762e-05, "loss": 0.5012299060821533, "step": 56700 }, { "epoch": 0.24346788250345602, "grad_norm": 0.17750516533851624, "learning_rate": 7.597983839673e-05, "loss": 0.20147082805633545, "step": 56710 }, { "epoch": 0.24351081459347604, "grad_norm": 0.028639402240514755, "learning_rate": 7.597552667661237e-05, "loss": 0.35656213760375977, "step": 56720 }, { "epoch": 0.24355374668349605, "grad_norm": 0.027626140043139458, "learning_rate": 7.597121495649475e-05, "loss": 0.23855340480804443, "step": 56730 }, { "epoch": 0.24359667877351604, "grad_norm": 1.3562794923782349, "learning_rate": 7.596690323637713e-05, "loss": 0.45727100372314455, "step": 56740 }, { "epoch": 0.24363961086353605, "grad_norm": 8.56227970123291, "learning_rate": 7.596259151625949e-05, "loss": 0.29800164699554443, "step": 56750 }, { "epoch": 0.24368254295355607, "grad_norm": 3.818679094314575, "learning_rate": 7.595827979614187e-05, "loss": 0.2236952304840088, "step": 56760 }, { "epoch": 0.24372547504357608, "grad_norm": 0.007640776690095663, "learning_rate": 7.595396807602425e-05, "loss": 0.18620272874832153, "step": 56770 }, { "epoch": 0.24376840713359607, "grad_norm": 0.2855760455131531, "learning_rate": 7.594965635590662e-05, "loss": 0.38433017730712893, "step": 56780 }, { "epoch": 0.24381133922361609, "grad_norm": 2.060290813446045, "learning_rate": 7.5945344635789e-05, "loss": 0.30763907432556153, "step": 56790 }, { "epoch": 0.2438542713136361, "grad_norm": 0.3530263900756836, "learning_rate": 7.594103291567138e-05, "loss": 0.3436635971069336, "step": 56800 }, { "epoch": 0.2438972034036561, "grad_norm": 3.5977604389190674, "learning_rate": 7.593672119555376e-05, "loss": 0.3365695714950562, "step": 56810 }, { "epoch": 0.2439401354936761, "grad_norm": 2.2566967010498047, "learning_rate": 7.593240947543613e-05, "loss": 0.23258376121520996, "step": 56820 }, { "epoch": 0.24398306758369612, "grad_norm": 2.2684240341186523, "learning_rate": 7.592809775531851e-05, "loss": 0.3743330240249634, "step": 56830 }, { "epoch": 0.2440259996737161, "grad_norm": 5.0866193771362305, "learning_rate": 7.592378603520089e-05, "loss": 0.28389415740966795, "step": 56840 }, { "epoch": 0.24406893176373612, "grad_norm": 0.016268953680992126, "learning_rate": 7.591947431508327e-05, "loss": 0.11955786943435669, "step": 56850 }, { "epoch": 0.24411186385375613, "grad_norm": 0.507336437702179, "learning_rate": 7.591516259496564e-05, "loss": 0.2459397315979004, "step": 56860 }, { "epoch": 0.24415479594377615, "grad_norm": 3.0881454944610596, "learning_rate": 7.591085087484802e-05, "loss": 0.21296381950378418, "step": 56870 }, { "epoch": 0.24419772803379614, "grad_norm": 3.4023799896240234, "learning_rate": 7.59065391547304e-05, "loss": 0.14938076734542846, "step": 56880 }, { "epoch": 0.24424066012381615, "grad_norm": 3.494509696960449, "learning_rate": 7.590222743461277e-05, "loss": 0.3454356908798218, "step": 56890 }, { "epoch": 0.24428359221383616, "grad_norm": 0.10136851668357849, "learning_rate": 7.589791571449515e-05, "loss": 0.1815933346748352, "step": 56900 }, { "epoch": 0.24432652430385615, "grad_norm": 0.017074063420295715, "learning_rate": 7.589360399437752e-05, "loss": 0.1557396173477173, "step": 56910 }, { "epoch": 0.24436945639387617, "grad_norm": 0.008806165307760239, "learning_rate": 7.58892922742599e-05, "loss": 0.01468321681022644, "step": 56920 }, { "epoch": 0.24441238848389618, "grad_norm": 0.004361881874501705, "learning_rate": 7.588498055414227e-05, "loss": 0.18163535594940186, "step": 56930 }, { "epoch": 0.24445532057391617, "grad_norm": 2.808199167251587, "learning_rate": 7.588066883402465e-05, "loss": 0.13877879381179808, "step": 56940 }, { "epoch": 0.24449825266393618, "grad_norm": 0.0035761999897658825, "learning_rate": 7.587635711390703e-05, "loss": 0.23683485984802247, "step": 56950 }, { "epoch": 0.2445411847539562, "grad_norm": 0.0018882449949160218, "learning_rate": 7.58720453937894e-05, "loss": 0.040769991278648374, "step": 56960 }, { "epoch": 0.24458411684397618, "grad_norm": 0.0017095755320042372, "learning_rate": 7.586773367367178e-05, "loss": 0.07406737804412841, "step": 56970 }, { "epoch": 0.2446270489339962, "grad_norm": 0.0053458381444215775, "learning_rate": 7.586342195355416e-05, "loss": 0.19818114042282103, "step": 56980 }, { "epoch": 0.24466998102401621, "grad_norm": 0.2893374264240265, "learning_rate": 7.585911023343652e-05, "loss": 0.06889631152153015, "step": 56990 }, { "epoch": 0.24471291311403623, "grad_norm": 0.43643325567245483, "learning_rate": 7.58547985133189e-05, "loss": 0.11115390062332153, "step": 57000 }, { "epoch": 0.24471291311403623, "eval_loss": 0.45208680629730225, "eval_runtime": 27.4112, "eval_samples_per_second": 3.648, "eval_steps_per_second": 3.648, "step": 57000 }, { "epoch": 0.24475584520405622, "grad_norm": 2.3712055683135986, "learning_rate": 7.585048679320128e-05, "loss": 0.2907646894454956, "step": 57010 }, { "epoch": 0.24479877729407623, "grad_norm": 2.5993006229400635, "learning_rate": 7.584617507308365e-05, "loss": 0.2117297887802124, "step": 57020 }, { "epoch": 0.24484170938409625, "grad_norm": 1.6458356380462646, "learning_rate": 7.584186335296603e-05, "loss": 0.3011794567108154, "step": 57030 }, { "epoch": 0.24488464147411623, "grad_norm": 0.010335003025829792, "learning_rate": 7.583755163284841e-05, "loss": 0.1275590181350708, "step": 57040 }, { "epoch": 0.24492757356413625, "grad_norm": 0.008200617507100105, "learning_rate": 7.583323991273079e-05, "loss": 0.0024192286655306816, "step": 57050 }, { "epoch": 0.24497050565415626, "grad_norm": 1.3849883079528809, "learning_rate": 7.582892819261316e-05, "loss": 0.23618090152740479, "step": 57060 }, { "epoch": 0.24501343774417625, "grad_norm": 0.004566836636513472, "learning_rate": 7.582461647249554e-05, "loss": 0.21608200073242187, "step": 57070 }, { "epoch": 0.24505636983419626, "grad_norm": 0.018896836787462234, "learning_rate": 7.582030475237792e-05, "loss": 0.2848828792572021, "step": 57080 }, { "epoch": 0.24509930192421628, "grad_norm": 0.02968161180615425, "learning_rate": 7.58159930322603e-05, "loss": 0.3523215055465698, "step": 57090 }, { "epoch": 0.2451422340142363, "grad_norm": 1.484802007675171, "learning_rate": 7.581168131214267e-05, "loss": 0.2556891441345215, "step": 57100 }, { "epoch": 0.24518516610425628, "grad_norm": 1.1301987171173096, "learning_rate": 7.580736959202505e-05, "loss": 0.34354567527770996, "step": 57110 }, { "epoch": 0.2452280981942763, "grad_norm": 0.0026234660763293505, "learning_rate": 7.580305787190743e-05, "loss": 0.11568312644958496, "step": 57120 }, { "epoch": 0.2452710302842963, "grad_norm": 0.03418371081352234, "learning_rate": 7.57987461517898e-05, "loss": 0.31449136734008787, "step": 57130 }, { "epoch": 0.2453139623743163, "grad_norm": 9.43087100982666, "learning_rate": 7.579443443167218e-05, "loss": 0.3676129341125488, "step": 57140 }, { "epoch": 0.2453568944643363, "grad_norm": 0.024150878190994263, "learning_rate": 7.579012271155456e-05, "loss": 0.19816354513168336, "step": 57150 }, { "epoch": 0.24539982655435633, "grad_norm": 0.05833771079778671, "learning_rate": 7.578581099143692e-05, "loss": 0.2208428144454956, "step": 57160 }, { "epoch": 0.2454427586443763, "grad_norm": 0.1591957062482834, "learning_rate": 7.57814992713193e-05, "loss": 0.11497148275375366, "step": 57170 }, { "epoch": 0.24548569073439633, "grad_norm": 0.15572971105575562, "learning_rate": 7.577718755120168e-05, "loss": 0.3440711975097656, "step": 57180 }, { "epoch": 0.24552862282441634, "grad_norm": 2.8513052463531494, "learning_rate": 7.577287583108405e-05, "loss": 0.46184988021850587, "step": 57190 }, { "epoch": 0.24557155491443636, "grad_norm": 1.0112202167510986, "learning_rate": 7.576856411096643e-05, "loss": 0.22247018814086914, "step": 57200 }, { "epoch": 0.24561448700445634, "grad_norm": 0.9999586939811707, "learning_rate": 7.576425239084881e-05, "loss": 0.32436323165893555, "step": 57210 }, { "epoch": 0.24565741909447636, "grad_norm": 16.790267944335938, "learning_rate": 7.575994067073119e-05, "loss": 0.3138288974761963, "step": 57220 }, { "epoch": 0.24570035118449637, "grad_norm": 1.5769797563552856, "learning_rate": 7.575562895061356e-05, "loss": 0.26715841293334963, "step": 57230 }, { "epoch": 0.24574328327451636, "grad_norm": 0.003906694240868092, "learning_rate": 7.575131723049593e-05, "loss": 0.398896598815918, "step": 57240 }, { "epoch": 0.24578621536453638, "grad_norm": 0.04367861524224281, "learning_rate": 7.57470055103783e-05, "loss": 0.14916378259658813, "step": 57250 }, { "epoch": 0.2458291474545564, "grad_norm": 0.001283388352021575, "learning_rate": 7.574269379026068e-05, "loss": 0.2797567367553711, "step": 57260 }, { "epoch": 0.24587207954457638, "grad_norm": 0.13986949622631073, "learning_rate": 7.573838207014306e-05, "loss": 0.1595933437347412, "step": 57270 }, { "epoch": 0.2459150116345964, "grad_norm": 0.010912904515862465, "learning_rate": 7.573407035002544e-05, "loss": 0.2850444555282593, "step": 57280 }, { "epoch": 0.2459579437246164, "grad_norm": 1.6686103343963623, "learning_rate": 7.572975862990781e-05, "loss": 0.36696457862854004, "step": 57290 }, { "epoch": 0.24600087581463642, "grad_norm": 0.0024620601907372475, "learning_rate": 7.57254469097902e-05, "loss": 0.18678882122039794, "step": 57300 }, { "epoch": 0.2460438079046564, "grad_norm": 0.10246946662664413, "learning_rate": 7.572113518967258e-05, "loss": 0.15630044937133789, "step": 57310 }, { "epoch": 0.24608673999467642, "grad_norm": 0.022064056247472763, "learning_rate": 7.571682346955495e-05, "loss": 0.05567708015441895, "step": 57320 }, { "epoch": 0.24612967208469644, "grad_norm": 0.002969966037198901, "learning_rate": 7.571251174943732e-05, "loss": 0.36348717212677, "step": 57330 }, { "epoch": 0.24617260417471643, "grad_norm": 0.17457488179206848, "learning_rate": 7.57082000293197e-05, "loss": 0.33927862644195556, "step": 57340 }, { "epoch": 0.24621553626473644, "grad_norm": 1.373376488685608, "learning_rate": 7.570388830920208e-05, "loss": 0.2118394136428833, "step": 57350 }, { "epoch": 0.24625846835475645, "grad_norm": 0.04611791670322418, "learning_rate": 7.569957658908446e-05, "loss": 0.18210265636444092, "step": 57360 }, { "epoch": 0.24630140044477644, "grad_norm": 0.9812759160995483, "learning_rate": 7.569526486896683e-05, "loss": 0.1476440668106079, "step": 57370 }, { "epoch": 0.24634433253479646, "grad_norm": 0.03836577385663986, "learning_rate": 7.569095314884921e-05, "loss": 0.22062788009643555, "step": 57380 }, { "epoch": 0.24638726462481647, "grad_norm": 0.0069273305125534534, "learning_rate": 7.568664142873159e-05, "loss": 0.38461480140686033, "step": 57390 }, { "epoch": 0.24643019671483646, "grad_norm": 0.07969487458467484, "learning_rate": 7.568232970861395e-05, "loss": 0.1768990635871887, "step": 57400 }, { "epoch": 0.24647312880485647, "grad_norm": 0.04361164569854736, "learning_rate": 7.567801798849633e-05, "loss": 0.3352889776229858, "step": 57410 }, { "epoch": 0.2465160608948765, "grad_norm": 0.12355928122997284, "learning_rate": 7.56737062683787e-05, "loss": 0.3005270719528198, "step": 57420 }, { "epoch": 0.2465589929848965, "grad_norm": 0.0032305035274475813, "learning_rate": 7.566939454826108e-05, "loss": 0.39624905586242676, "step": 57430 }, { "epoch": 0.2466019250749165, "grad_norm": 11.76025676727295, "learning_rate": 7.566508282814346e-05, "loss": 0.2909027814865112, "step": 57440 }, { "epoch": 0.2466448571649365, "grad_norm": 0.0032621347345411777, "learning_rate": 7.566077110802584e-05, "loss": 0.3369508028030396, "step": 57450 }, { "epoch": 0.24668778925495652, "grad_norm": 1.9829251766204834, "learning_rate": 7.565645938790822e-05, "loss": 0.4466217517852783, "step": 57460 }, { "epoch": 0.2467307213449765, "grad_norm": 1.7846029996871948, "learning_rate": 7.565214766779059e-05, "loss": 0.10857169628143311, "step": 57470 }, { "epoch": 0.24677365343499652, "grad_norm": 0.6172264218330383, "learning_rate": 7.564783594767297e-05, "loss": 0.3345313787460327, "step": 57480 }, { "epoch": 0.24681658552501654, "grad_norm": 0.24104170501232147, "learning_rate": 7.564352422755533e-05, "loss": 0.202384614944458, "step": 57490 }, { "epoch": 0.24685951761503652, "grad_norm": 1.0128809213638306, "learning_rate": 7.563921250743771e-05, "loss": 0.12638015747070314, "step": 57500 }, { "epoch": 0.24690244970505654, "grad_norm": 5.046428203582764, "learning_rate": 7.563490078732009e-05, "loss": 0.18070136308670043, "step": 57510 }, { "epoch": 0.24694538179507655, "grad_norm": 0.14580535888671875, "learning_rate": 7.563058906720248e-05, "loss": 0.12394511699676514, "step": 57520 }, { "epoch": 0.24698831388509657, "grad_norm": 3.3323802947998047, "learning_rate": 7.562627734708486e-05, "loss": 0.1436055302619934, "step": 57530 }, { "epoch": 0.24703124597511655, "grad_norm": 0.06035997346043587, "learning_rate": 7.562196562696723e-05, "loss": 0.17422177791595458, "step": 57540 }, { "epoch": 0.24707417806513657, "grad_norm": 0.04609677940607071, "learning_rate": 7.561765390684961e-05, "loss": 0.3345479965209961, "step": 57550 }, { "epoch": 0.24711711015515658, "grad_norm": 0.022857768461108208, "learning_rate": 7.561334218673199e-05, "loss": 0.061411714553833006, "step": 57560 }, { "epoch": 0.24716004224517657, "grad_norm": 0.03906315565109253, "learning_rate": 7.560903046661435e-05, "loss": 0.2037139892578125, "step": 57570 }, { "epoch": 0.24720297433519658, "grad_norm": 2.5178000926971436, "learning_rate": 7.560471874649673e-05, "loss": 0.24743380546569824, "step": 57580 }, { "epoch": 0.2472459064252166, "grad_norm": 0.10384848713874817, "learning_rate": 7.560040702637911e-05, "loss": 0.2382965087890625, "step": 57590 }, { "epoch": 0.2472888385152366, "grad_norm": 0.04842836409807205, "learning_rate": 7.559609530626149e-05, "loss": 0.13167185783386232, "step": 57600 }, { "epoch": 0.2473317706052566, "grad_norm": 0.111887127161026, "learning_rate": 7.559178358614386e-05, "loss": 0.07016860842704772, "step": 57610 }, { "epoch": 0.24737470269527662, "grad_norm": 0.675987184047699, "learning_rate": 7.558747186602624e-05, "loss": 0.20956764221191407, "step": 57620 }, { "epoch": 0.24741763478529663, "grad_norm": 0.003355368971824646, "learning_rate": 7.558316014590862e-05, "loss": 0.347391676902771, "step": 57630 }, { "epoch": 0.24746056687531662, "grad_norm": 1.6586114168167114, "learning_rate": 7.5578848425791e-05, "loss": 0.35160102844238283, "step": 57640 }, { "epoch": 0.24750349896533663, "grad_norm": 0.19788451492786407, "learning_rate": 7.557453670567336e-05, "loss": 0.1748092770576477, "step": 57650 }, { "epoch": 0.24754643105535665, "grad_norm": 3.0574142932891846, "learning_rate": 7.557022498555574e-05, "loss": 0.26805412769317627, "step": 57660 }, { "epoch": 0.24758936314537663, "grad_norm": 0.028504248708486557, "learning_rate": 7.556591326543811e-05, "loss": 0.44770083427429197, "step": 57670 }, { "epoch": 0.24763229523539665, "grad_norm": 1.19331693649292, "learning_rate": 7.556160154532049e-05, "loss": 0.2743785858154297, "step": 57680 }, { "epoch": 0.24767522732541666, "grad_norm": 14.593634605407715, "learning_rate": 7.555728982520287e-05, "loss": 0.14170204401016234, "step": 57690 }, { "epoch": 0.24771815941543665, "grad_norm": 0.3801772892475128, "learning_rate": 7.555297810508524e-05, "loss": 0.37049763202667235, "step": 57700 }, { "epoch": 0.24776109150545667, "grad_norm": 1.7399812936782837, "learning_rate": 7.554866638496762e-05, "loss": 0.31119160652160643, "step": 57710 }, { "epoch": 0.24780402359547668, "grad_norm": 16.13619613647461, "learning_rate": 7.554435466485e-05, "loss": 0.4131039619445801, "step": 57720 }, { "epoch": 0.2478469556854967, "grad_norm": 12.335932731628418, "learning_rate": 7.554004294473236e-05, "loss": 0.2520522356033325, "step": 57730 }, { "epoch": 0.24788988777551668, "grad_norm": 0.017610453069210052, "learning_rate": 7.553573122461475e-05, "loss": 0.143160879611969, "step": 57740 }, { "epoch": 0.2479328198655367, "grad_norm": 0.005706754047423601, "learning_rate": 7.553141950449713e-05, "loss": 0.09667201042175293, "step": 57750 }, { "epoch": 0.2479757519555567, "grad_norm": 0.4213595390319824, "learning_rate": 7.552710778437951e-05, "loss": 0.26541934013366697, "step": 57760 }, { "epoch": 0.2480186840455767, "grad_norm": 0.9437307715415955, "learning_rate": 7.552279606426189e-05, "loss": 0.5367563247680665, "step": 57770 }, { "epoch": 0.2480616161355967, "grad_norm": 0.007601315155625343, "learning_rate": 7.551848434414426e-05, "loss": 0.17007397413253783, "step": 57780 }, { "epoch": 0.24810454822561673, "grad_norm": 1.5910160541534424, "learning_rate": 7.551417262402664e-05, "loss": 0.20665650367736815, "step": 57790 }, { "epoch": 0.24814748031563671, "grad_norm": 0.00580306351184845, "learning_rate": 7.550986090390902e-05, "loss": 0.026889517903327942, "step": 57800 }, { "epoch": 0.24819041240565673, "grad_norm": 0.003274303860962391, "learning_rate": 7.55055491837914e-05, "loss": 0.3093209981918335, "step": 57810 }, { "epoch": 0.24823334449567674, "grad_norm": 0.10975901782512665, "learning_rate": 7.550123746367376e-05, "loss": 0.06779348254203796, "step": 57820 }, { "epoch": 0.24827627658569673, "grad_norm": 0.02362568862736225, "learning_rate": 7.549692574355614e-05, "loss": 0.16981412172317506, "step": 57830 }, { "epoch": 0.24831920867571675, "grad_norm": 0.4983203113079071, "learning_rate": 7.549261402343851e-05, "loss": 0.21854088306427003, "step": 57840 }, { "epoch": 0.24836214076573676, "grad_norm": 2.061974287033081, "learning_rate": 7.548830230332089e-05, "loss": 0.21698181629180907, "step": 57850 }, { "epoch": 0.24840507285575678, "grad_norm": 0.048223454505205154, "learning_rate": 7.548399058320327e-05, "loss": 0.12005207538604737, "step": 57860 }, { "epoch": 0.24844800494577676, "grad_norm": 1.0992143154144287, "learning_rate": 7.547967886308565e-05, "loss": 0.23123271465301515, "step": 57870 }, { "epoch": 0.24849093703579678, "grad_norm": 1.3549517393112183, "learning_rate": 7.547536714296802e-05, "loss": 0.46811304092407224, "step": 57880 }, { "epoch": 0.2485338691258168, "grad_norm": 0.07769492268562317, "learning_rate": 7.54710554228504e-05, "loss": 0.22488780021667482, "step": 57890 }, { "epoch": 0.24857680121583678, "grad_norm": 0.06210146099328995, "learning_rate": 7.546674370273276e-05, "loss": 0.3013421058654785, "step": 57900 }, { "epoch": 0.2486197333058568, "grad_norm": 0.14759461581707, "learning_rate": 7.546243198261514e-05, "loss": 0.1355152726173401, "step": 57910 }, { "epoch": 0.2486626653958768, "grad_norm": 1.5669615268707275, "learning_rate": 7.545812026249752e-05, "loss": 0.1003315806388855, "step": 57920 }, { "epoch": 0.2487055974858968, "grad_norm": 0.0019271537894383073, "learning_rate": 7.54538085423799e-05, "loss": 0.17151752710342408, "step": 57930 }, { "epoch": 0.2487485295759168, "grad_norm": 0.09990391135215759, "learning_rate": 7.544949682226227e-05, "loss": 0.2588262796401978, "step": 57940 }, { "epoch": 0.24879146166593682, "grad_norm": 1.4302527904510498, "learning_rate": 7.544518510214465e-05, "loss": 0.17463706731796264, "step": 57950 }, { "epoch": 0.24883439375595684, "grad_norm": 0.0026034568436443806, "learning_rate": 7.544087338202703e-05, "loss": 0.11903560161590576, "step": 57960 }, { "epoch": 0.24887732584597683, "grad_norm": 0.02368382178246975, "learning_rate": 7.54365616619094e-05, "loss": 0.1572781801223755, "step": 57970 }, { "epoch": 0.24892025793599684, "grad_norm": 0.5909692645072937, "learning_rate": 7.543224994179178e-05, "loss": 0.22923357486724855, "step": 57980 }, { "epoch": 0.24896319002601686, "grad_norm": 0.1352832168340683, "learning_rate": 7.542793822167416e-05, "loss": 0.5410185813903808, "step": 57990 }, { "epoch": 0.24900612211603684, "grad_norm": 0.6172003746032715, "learning_rate": 7.542362650155654e-05, "loss": 0.4828122615814209, "step": 58000 }, { "epoch": 0.24900612211603684, "eval_loss": 0.43727967143058777, "eval_runtime": 27.5929, "eval_samples_per_second": 3.624, "eval_steps_per_second": 3.624, "step": 58000 }, { "epoch": 0.24904905420605686, "grad_norm": 1.3846662044525146, "learning_rate": 7.541931478143892e-05, "loss": 0.2785909414291382, "step": 58010 }, { "epoch": 0.24909198629607687, "grad_norm": 1.3835780620574951, "learning_rate": 7.541500306132129e-05, "loss": 0.485788631439209, "step": 58020 }, { "epoch": 0.24913491838609686, "grad_norm": 0.17436742782592773, "learning_rate": 7.541069134120367e-05, "loss": 0.19860415458679198, "step": 58030 }, { "epoch": 0.24917785047611687, "grad_norm": 0.09022471308708191, "learning_rate": 7.540637962108605e-05, "loss": 0.11170189380645752, "step": 58040 }, { "epoch": 0.2492207825661369, "grad_norm": 0.00697419373318553, "learning_rate": 7.540206790096842e-05, "loss": 0.27677962779998777, "step": 58050 }, { "epoch": 0.2492637146561569, "grad_norm": 0.30201953649520874, "learning_rate": 7.539775618085079e-05, "loss": 0.07165834903717042, "step": 58060 }, { "epoch": 0.2493066467461769, "grad_norm": 1.4676119089126587, "learning_rate": 7.539344446073317e-05, "loss": 0.29705684185028075, "step": 58070 }, { "epoch": 0.2493495788361969, "grad_norm": 0.36478012800216675, "learning_rate": 7.538913274061554e-05, "loss": 0.25623486042022703, "step": 58080 }, { "epoch": 0.24939251092621692, "grad_norm": 0.09869907796382904, "learning_rate": 7.538482102049792e-05, "loss": 0.40760035514831544, "step": 58090 }, { "epoch": 0.2494354430162369, "grad_norm": 1.5321518182754517, "learning_rate": 7.53805093003803e-05, "loss": 0.3262056827545166, "step": 58100 }, { "epoch": 0.24947837510625692, "grad_norm": 0.03607706353068352, "learning_rate": 7.537619758026268e-05, "loss": 0.1874048590660095, "step": 58110 }, { "epoch": 0.24952130719627694, "grad_norm": 0.04087758809328079, "learning_rate": 7.537188586014505e-05, "loss": 0.24982888698577882, "step": 58120 }, { "epoch": 0.24956423928629692, "grad_norm": 2.5731465816497803, "learning_rate": 7.536757414002743e-05, "loss": 0.12112574577331543, "step": 58130 }, { "epoch": 0.24960717137631694, "grad_norm": 0.34729182720184326, "learning_rate": 7.53632624199098e-05, "loss": 0.3160462141036987, "step": 58140 }, { "epoch": 0.24965010346633695, "grad_norm": 0.011773771606385708, "learning_rate": 7.535895069979217e-05, "loss": 0.121714186668396, "step": 58150 }, { "epoch": 0.24969303555635697, "grad_norm": 0.1527256816625595, "learning_rate": 7.535463897967455e-05, "loss": 0.2888143301010132, "step": 58160 }, { "epoch": 0.24973596764637696, "grad_norm": 3.5751779079437256, "learning_rate": 7.535032725955693e-05, "loss": 0.14579278230667114, "step": 58170 }, { "epoch": 0.24977889973639697, "grad_norm": 0.0369403250515461, "learning_rate": 7.53460155394393e-05, "loss": 0.14296526908874513, "step": 58180 }, { "epoch": 0.24982183182641698, "grad_norm": 0.06302861869335175, "learning_rate": 7.534170381932168e-05, "loss": 0.3374220848083496, "step": 58190 }, { "epoch": 0.24986476391643697, "grad_norm": 1.6855621337890625, "learning_rate": 7.533739209920406e-05, "loss": 0.4434662818908691, "step": 58200 }, { "epoch": 0.249907696006457, "grad_norm": 0.0061494093388319016, "learning_rate": 7.533308037908644e-05, "loss": 0.16283658742904664, "step": 58210 }, { "epoch": 0.249950628096477, "grad_norm": 0.04007069393992424, "learning_rate": 7.532876865896881e-05, "loss": 0.08710308074951172, "step": 58220 }, { "epoch": 0.249993560186497, "grad_norm": 0.1617412120103836, "learning_rate": 7.532445693885119e-05, "loss": 0.3287935256958008, "step": 58230 }, { "epoch": 0.25003649227651703, "grad_norm": 0.0027142164763063192, "learning_rate": 7.532014521873357e-05, "loss": 0.07832266092300415, "step": 58240 }, { "epoch": 0.250079424366537, "grad_norm": 0.02477000653743744, "learning_rate": 7.531583349861594e-05, "loss": 0.17995315790176392, "step": 58250 }, { "epoch": 0.250122356456557, "grad_norm": 0.25469478964805603, "learning_rate": 7.531152177849832e-05, "loss": 0.439362907409668, "step": 58260 }, { "epoch": 0.25016528854657705, "grad_norm": 0.14019158482551575, "learning_rate": 7.53072100583807e-05, "loss": 0.160223650932312, "step": 58270 }, { "epoch": 0.25020822063659703, "grad_norm": 0.09695712476968765, "learning_rate": 7.530289833826308e-05, "loss": 0.15709249973297118, "step": 58280 }, { "epoch": 0.250251152726617, "grad_norm": 1.4217171669006348, "learning_rate": 7.529858661814545e-05, "loss": 0.1981913208961487, "step": 58290 }, { "epoch": 0.25029408481663706, "grad_norm": 0.00611657602712512, "learning_rate": 7.529427489802783e-05, "loss": 0.16728440523147584, "step": 58300 }, { "epoch": 0.25033701690665705, "grad_norm": 0.018936268985271454, "learning_rate": 7.52899631779102e-05, "loss": 0.11950817108154296, "step": 58310 }, { "epoch": 0.25037994899667704, "grad_norm": 0.23093880712985992, "learning_rate": 7.528565145779257e-05, "loss": 0.1709181547164917, "step": 58320 }, { "epoch": 0.2504228810866971, "grad_norm": 2.0191850662231445, "learning_rate": 7.528133973767495e-05, "loss": 0.3569988965988159, "step": 58330 }, { "epoch": 0.25046581317671707, "grad_norm": 7.1040472984313965, "learning_rate": 7.527702801755733e-05, "loss": 0.15479474067687987, "step": 58340 }, { "epoch": 0.25050874526673705, "grad_norm": 2.3190762996673584, "learning_rate": 7.52727162974397e-05, "loss": 0.07152878642082214, "step": 58350 }, { "epoch": 0.2505516773567571, "grad_norm": 3.2260525226593018, "learning_rate": 7.526840457732208e-05, "loss": 0.17637765407562256, "step": 58360 }, { "epoch": 0.2505946094467771, "grad_norm": 0.09935518354177475, "learning_rate": 7.526409285720446e-05, "loss": 0.29577863216400146, "step": 58370 }, { "epoch": 0.25063754153679707, "grad_norm": 8.207350730895996, "learning_rate": 7.525978113708684e-05, "loss": 0.3453080177307129, "step": 58380 }, { "epoch": 0.2506804736268171, "grad_norm": 0.001084555173292756, "learning_rate": 7.52554694169692e-05, "loss": 0.18551015853881836, "step": 58390 }, { "epoch": 0.2507234057168371, "grad_norm": 0.03261413797736168, "learning_rate": 7.525115769685158e-05, "loss": 0.18724746704101564, "step": 58400 }, { "epoch": 0.2507663378068571, "grad_norm": 0.0074689267203211784, "learning_rate": 7.524684597673395e-05, "loss": 0.07106940746307373, "step": 58410 }, { "epoch": 0.25080926989687713, "grad_norm": 0.023070115596055984, "learning_rate": 7.524253425661633e-05, "loss": 0.1185571312904358, "step": 58420 }, { "epoch": 0.2508522019868971, "grad_norm": 0.015555283054709435, "learning_rate": 7.523822253649871e-05, "loss": 0.2828044891357422, "step": 58430 }, { "epoch": 0.25089513407691716, "grad_norm": 0.016580946743488312, "learning_rate": 7.523391081638109e-05, "loss": 0.2243107318878174, "step": 58440 }, { "epoch": 0.25093806616693715, "grad_norm": 0.6320114135742188, "learning_rate": 7.522959909626346e-05, "loss": 0.14534295797348024, "step": 58450 }, { "epoch": 0.25098099825695713, "grad_norm": 0.019783420488238335, "learning_rate": 7.522528737614584e-05, "loss": 0.08147812485694886, "step": 58460 }, { "epoch": 0.2510239303469772, "grad_norm": 0.048422615975141525, "learning_rate": 7.522097565602822e-05, "loss": 0.19120093584060668, "step": 58470 }, { "epoch": 0.25106686243699716, "grad_norm": 0.12801282107830048, "learning_rate": 7.52166639359106e-05, "loss": 0.17279950380325318, "step": 58480 }, { "epoch": 0.25110979452701715, "grad_norm": 1.5858972072601318, "learning_rate": 7.521235221579297e-05, "loss": 0.48514652252197266, "step": 58490 }, { "epoch": 0.2511527266170372, "grad_norm": 0.030733229592442513, "learning_rate": 7.520804049567535e-05, "loss": 0.30051817893981936, "step": 58500 }, { "epoch": 0.2511956587070572, "grad_norm": 1.0026241540908813, "learning_rate": 7.520372877555773e-05, "loss": 0.1893421769142151, "step": 58510 }, { "epoch": 0.25123859079707717, "grad_norm": 0.048215437680482864, "learning_rate": 7.51994170554401e-05, "loss": 0.22662749290466308, "step": 58520 }, { "epoch": 0.2512815228870972, "grad_norm": 0.38019275665283203, "learning_rate": 7.519510533532248e-05, "loss": 0.3480359077453613, "step": 58530 }, { "epoch": 0.2513244549771172, "grad_norm": 1.5052499771118164, "learning_rate": 7.519079361520486e-05, "loss": 0.37305445671081544, "step": 58540 }, { "epoch": 0.2513673870671372, "grad_norm": 0.2208074927330017, "learning_rate": 7.518648189508724e-05, "loss": 0.16929982900619506, "step": 58550 }, { "epoch": 0.2514103191571572, "grad_norm": 0.006902139633893967, "learning_rate": 7.51821701749696e-05, "loss": 0.2761898756027222, "step": 58560 }, { "epoch": 0.2514532512471772, "grad_norm": 1.5039210319519043, "learning_rate": 7.517785845485198e-05, "loss": 0.2421489715576172, "step": 58570 }, { "epoch": 0.2514961833371972, "grad_norm": 0.40178382396698, "learning_rate": 7.517354673473436e-05, "loss": 0.1107286810874939, "step": 58580 }, { "epoch": 0.25153911542721724, "grad_norm": 1.3667283058166504, "learning_rate": 7.516923501461673e-05, "loss": 0.4685997009277344, "step": 58590 }, { "epoch": 0.25158204751723723, "grad_norm": 0.09705159813165665, "learning_rate": 7.516492329449911e-05, "loss": 0.2425306797027588, "step": 58600 }, { "epoch": 0.2516249796072572, "grad_norm": 2.8658628463745117, "learning_rate": 7.516061157438149e-05, "loss": 0.18263787031173706, "step": 58610 }, { "epoch": 0.25166791169727726, "grad_norm": 1.7215452194213867, "learning_rate": 7.515629985426387e-05, "loss": 0.07476306557655335, "step": 58620 }, { "epoch": 0.25171084378729724, "grad_norm": 2.6153411865234375, "learning_rate": 7.515198813414624e-05, "loss": 0.2928791522979736, "step": 58630 }, { "epoch": 0.25175377587731723, "grad_norm": 1.717194676399231, "learning_rate": 7.51476764140286e-05, "loss": 0.22246553897857665, "step": 58640 }, { "epoch": 0.2517967079673373, "grad_norm": 1.2184993028640747, "learning_rate": 7.514336469391098e-05, "loss": 0.24329085350036622, "step": 58650 }, { "epoch": 0.25183964005735726, "grad_norm": 0.03462841361761093, "learning_rate": 7.513905297379336e-05, "loss": 0.15679537057876586, "step": 58660 }, { "epoch": 0.2518825721473773, "grad_norm": 0.11113351583480835, "learning_rate": 7.513474125367574e-05, "loss": 0.3553584575653076, "step": 58670 }, { "epoch": 0.2519255042373973, "grad_norm": 0.03636935353279114, "learning_rate": 7.513042953355812e-05, "loss": 0.2889517307281494, "step": 58680 }, { "epoch": 0.2519684363274173, "grad_norm": 0.5163435935974121, "learning_rate": 7.51261178134405e-05, "loss": 0.23903121948242187, "step": 58690 }, { "epoch": 0.2520113684174373, "grad_norm": 0.11025349795818329, "learning_rate": 7.512180609332287e-05, "loss": 0.15377535820007324, "step": 58700 }, { "epoch": 0.2520543005074573, "grad_norm": 0.07065358012914658, "learning_rate": 7.511749437320526e-05, "loss": 0.2515211820602417, "step": 58710 }, { "epoch": 0.2520972325974773, "grad_norm": 0.03804844617843628, "learning_rate": 7.511318265308763e-05, "loss": 0.3007489204406738, "step": 58720 }, { "epoch": 0.25214016468749734, "grad_norm": 0.008624300360679626, "learning_rate": 7.510887093297e-05, "loss": 0.3628600835800171, "step": 58730 }, { "epoch": 0.2521830967775173, "grad_norm": 0.08319579809904099, "learning_rate": 7.510455921285238e-05, "loss": 0.25749032497406005, "step": 58740 }, { "epoch": 0.2522260288675373, "grad_norm": 0.9691449403762817, "learning_rate": 7.510024749273476e-05, "loss": 0.2763275384902954, "step": 58750 }, { "epoch": 0.25226896095755735, "grad_norm": 0.08982349932193756, "learning_rate": 7.509593577261713e-05, "loss": 0.313446044921875, "step": 58760 }, { "epoch": 0.25231189304757734, "grad_norm": 8.64424991607666, "learning_rate": 7.509162405249951e-05, "loss": 0.13973323106765748, "step": 58770 }, { "epoch": 0.2523548251375973, "grad_norm": 0.08145991712808609, "learning_rate": 7.508731233238189e-05, "loss": 0.3383770227432251, "step": 58780 }, { "epoch": 0.25239775722761737, "grad_norm": 1.739539623260498, "learning_rate": 7.508300061226427e-05, "loss": 0.3257728099822998, "step": 58790 }, { "epoch": 0.25244068931763736, "grad_norm": 0.14261537790298462, "learning_rate": 7.507868889214663e-05, "loss": 0.3546148300170898, "step": 58800 }, { "epoch": 0.25248362140765734, "grad_norm": 0.45535334944725037, "learning_rate": 7.507437717202901e-05, "loss": 0.1574306845664978, "step": 58810 }, { "epoch": 0.2525265534976774, "grad_norm": 12.764909744262695, "learning_rate": 7.507006545191139e-05, "loss": 0.4659184455871582, "step": 58820 }, { "epoch": 0.2525694855876974, "grad_norm": 0.034577127546072006, "learning_rate": 7.506575373179376e-05, "loss": 0.23389551639556885, "step": 58830 }, { "epoch": 0.25261241767771736, "grad_norm": 0.021863384172320366, "learning_rate": 7.506144201167614e-05, "loss": 0.3838773012161255, "step": 58840 }, { "epoch": 0.2526553497677374, "grad_norm": 1.3971561193466187, "learning_rate": 7.505713029155852e-05, "loss": 0.3844272613525391, "step": 58850 }, { "epoch": 0.2526982818577574, "grad_norm": 1.2504379749298096, "learning_rate": 7.50528185714409e-05, "loss": 0.214629602432251, "step": 58860 }, { "epoch": 0.25274121394777743, "grad_norm": 0.005005622748285532, "learning_rate": 7.504850685132327e-05, "loss": 0.1708429217338562, "step": 58870 }, { "epoch": 0.2527841460377974, "grad_norm": 1.2264432907104492, "learning_rate": 7.504419513120564e-05, "loss": 0.245497465133667, "step": 58880 }, { "epoch": 0.2528270781278174, "grad_norm": 0.3678496181964874, "learning_rate": 7.503988341108801e-05, "loss": 0.05550463199615478, "step": 58890 }, { "epoch": 0.25287001021783745, "grad_norm": 0.07211752980947495, "learning_rate": 7.503557169097039e-05, "loss": 0.15995752811431885, "step": 58900 }, { "epoch": 0.25291294230785744, "grad_norm": 0.1209837943315506, "learning_rate": 7.503125997085277e-05, "loss": 0.26348142623901366, "step": 58910 }, { "epoch": 0.2529558743978774, "grad_norm": 0.011889472603797913, "learning_rate": 7.502694825073515e-05, "loss": 0.4343246936798096, "step": 58920 }, { "epoch": 0.25299880648789747, "grad_norm": 0.04902556538581848, "learning_rate": 7.502263653061754e-05, "loss": 0.2803981065750122, "step": 58930 }, { "epoch": 0.25304173857791745, "grad_norm": 3.0477848052978516, "learning_rate": 7.501832481049991e-05, "loss": 0.17602345943450928, "step": 58940 }, { "epoch": 0.25308467066793744, "grad_norm": 2.994198799133301, "learning_rate": 7.501401309038229e-05, "loss": 0.2608646869659424, "step": 58950 }, { "epoch": 0.2531276027579575, "grad_norm": 0.003419468877837062, "learning_rate": 7.500970137026467e-05, "loss": 0.28305621147155763, "step": 58960 }, { "epoch": 0.25317053484797747, "grad_norm": 0.018120309337973595, "learning_rate": 7.500538965014703e-05, "loss": 0.2792409896850586, "step": 58970 }, { "epoch": 0.25321346693799746, "grad_norm": 0.04600166156888008, "learning_rate": 7.500107793002941e-05, "loss": 0.06299285888671875, "step": 58980 }, { "epoch": 0.2532563990280175, "grad_norm": 0.9423048496246338, "learning_rate": 7.499676620991179e-05, "loss": 0.06454428434371948, "step": 58990 }, { "epoch": 0.2532993311180375, "grad_norm": 0.00812695175409317, "learning_rate": 7.499245448979416e-05, "loss": 0.12127074003219604, "step": 59000 }, { "epoch": 0.2532993311180375, "eval_loss": 0.45360976457595825, "eval_runtime": 27.4239, "eval_samples_per_second": 3.646, "eval_steps_per_second": 3.646, "step": 59000 }, { "epoch": 0.25334226320805747, "grad_norm": 0.29408156871795654, "learning_rate": 7.498814276967654e-05, "loss": 0.18373805284500122, "step": 59010 }, { "epoch": 0.2533851952980775, "grad_norm": 0.03120812214910984, "learning_rate": 7.498383104955892e-05, "loss": 0.2167065382003784, "step": 59020 }, { "epoch": 0.2534281273880975, "grad_norm": 0.0006006321054883301, "learning_rate": 7.49795193294413e-05, "loss": 0.13845841884613036, "step": 59030 }, { "epoch": 0.2534710594781175, "grad_norm": 0.0009152772836387157, "learning_rate": 7.497520760932367e-05, "loss": 0.19261682033538818, "step": 59040 }, { "epoch": 0.25351399156813753, "grad_norm": 0.014247381128370762, "learning_rate": 7.497089588920604e-05, "loss": 0.3147283554077148, "step": 59050 }, { "epoch": 0.2535569236581575, "grad_norm": 1.0784889459609985, "learning_rate": 7.496658416908841e-05, "loss": 0.201631760597229, "step": 59060 }, { "epoch": 0.2535998557481775, "grad_norm": 0.0418044738471508, "learning_rate": 7.496227244897079e-05, "loss": 0.06878702044486999, "step": 59070 }, { "epoch": 0.25364278783819755, "grad_norm": 0.021921556442975998, "learning_rate": 7.495796072885317e-05, "loss": 0.22168614864349365, "step": 59080 }, { "epoch": 0.25368571992821753, "grad_norm": 0.06008792296051979, "learning_rate": 7.495364900873555e-05, "loss": 0.13947556018829346, "step": 59090 }, { "epoch": 0.2537286520182376, "grad_norm": 0.0034183943644165993, "learning_rate": 7.494933728861792e-05, "loss": 0.14371013641357422, "step": 59100 }, { "epoch": 0.25377158410825756, "grad_norm": 2.4520370960235596, "learning_rate": 7.49450255685003e-05, "loss": 0.2792895078659058, "step": 59110 }, { "epoch": 0.25381451619827755, "grad_norm": 0.6797070503234863, "learning_rate": 7.494071384838268e-05, "loss": 0.285917854309082, "step": 59120 }, { "epoch": 0.2538574482882976, "grad_norm": 1.3992570638656616, "learning_rate": 7.493640212826504e-05, "loss": 0.13060109615325927, "step": 59130 }, { "epoch": 0.2539003803783176, "grad_norm": 55.88208770751953, "learning_rate": 7.493209040814742e-05, "loss": 0.2650261402130127, "step": 59140 }, { "epoch": 0.25394331246833757, "grad_norm": 0.7198887467384338, "learning_rate": 7.492777868802981e-05, "loss": 0.14854855537414552, "step": 59150 }, { "epoch": 0.2539862445583576, "grad_norm": 0.0003937442961614579, "learning_rate": 7.492346696791219e-05, "loss": 0.033887633681297304, "step": 59160 }, { "epoch": 0.2540291766483776, "grad_norm": 0.0008705161744728684, "learning_rate": 7.491915524779457e-05, "loss": 0.15312042236328124, "step": 59170 }, { "epoch": 0.2540721087383976, "grad_norm": 2.6492867469787598, "learning_rate": 7.491484352767694e-05, "loss": 0.3735518455505371, "step": 59180 }, { "epoch": 0.2541150408284176, "grad_norm": 0.0005868783337064087, "learning_rate": 7.491053180755932e-05, "loss": 0.28540782928466796, "step": 59190 }, { "epoch": 0.2541579729184376, "grad_norm": 2.3117873668670654, "learning_rate": 7.49062200874417e-05, "loss": 0.15944833755493165, "step": 59200 }, { "epoch": 0.2542009050084576, "grad_norm": 0.009221755899488926, "learning_rate": 7.490190836732406e-05, "loss": 0.29479308128356935, "step": 59210 }, { "epoch": 0.25424383709847764, "grad_norm": 0.49922358989715576, "learning_rate": 7.489759664720644e-05, "loss": 0.2660544395446777, "step": 59220 }, { "epoch": 0.25428676918849763, "grad_norm": 0.21182771027088165, "learning_rate": 7.489328492708882e-05, "loss": 0.1434078812599182, "step": 59230 }, { "epoch": 0.2543297012785176, "grad_norm": 0.9136849045753479, "learning_rate": 7.488897320697119e-05, "loss": 0.29035928249359133, "step": 59240 }, { "epoch": 0.25437263336853766, "grad_norm": 0.4379677474498749, "learning_rate": 7.488466148685357e-05, "loss": 0.19489789009094238, "step": 59250 }, { "epoch": 0.25441556545855765, "grad_norm": 0.1081492155790329, "learning_rate": 7.488034976673595e-05, "loss": 0.2349034309387207, "step": 59260 }, { "epoch": 0.25445849754857763, "grad_norm": 1.3798065185546875, "learning_rate": 7.487603804661833e-05, "loss": 0.38103649616241453, "step": 59270 }, { "epoch": 0.2545014296385977, "grad_norm": 0.09544280916452408, "learning_rate": 7.48717263265007e-05, "loss": 0.16104166507720946, "step": 59280 }, { "epoch": 0.25454436172861766, "grad_norm": 0.009478382766246796, "learning_rate": 7.486741460638308e-05, "loss": 0.12755447626113892, "step": 59290 }, { "epoch": 0.2545872938186377, "grad_norm": 0.04885256290435791, "learning_rate": 7.486310288626544e-05, "loss": 0.1894428014755249, "step": 59300 }, { "epoch": 0.2546302259086577, "grad_norm": 0.1674768328666687, "learning_rate": 7.485879116614782e-05, "loss": 0.18973815441131592, "step": 59310 }, { "epoch": 0.2546731579986777, "grad_norm": 0.018817557021975517, "learning_rate": 7.48544794460302e-05, "loss": 0.13941391706466674, "step": 59320 }, { "epoch": 0.2547160900886977, "grad_norm": 0.09732145071029663, "learning_rate": 7.485016772591258e-05, "loss": 0.17832938432693482, "step": 59330 }, { "epoch": 0.2547590221787177, "grad_norm": 0.10007254779338837, "learning_rate": 7.484585600579495e-05, "loss": 0.2736708402633667, "step": 59340 }, { "epoch": 0.2548019542687377, "grad_norm": 1.3420660495758057, "learning_rate": 7.484154428567733e-05, "loss": 0.11637892723083496, "step": 59350 }, { "epoch": 0.25484488635875774, "grad_norm": 1.2482755184173584, "learning_rate": 7.483723256555971e-05, "loss": 0.24099578857421874, "step": 59360 }, { "epoch": 0.2548878184487777, "grad_norm": 0.0030424906872212887, "learning_rate": 7.483292084544208e-05, "loss": 0.40054893493652344, "step": 59370 }, { "epoch": 0.2549307505387977, "grad_norm": 0.15289510786533356, "learning_rate": 7.482860912532446e-05, "loss": 0.27155065536499023, "step": 59380 }, { "epoch": 0.25497368262881775, "grad_norm": 0.15874122083187103, "learning_rate": 7.482429740520684e-05, "loss": 0.3292946100234985, "step": 59390 }, { "epoch": 0.25501661471883774, "grad_norm": 0.7041093707084656, "learning_rate": 7.481998568508922e-05, "loss": 0.09723674654960632, "step": 59400 }, { "epoch": 0.25505954680885773, "grad_norm": 0.03337360545992851, "learning_rate": 7.48156739649716e-05, "loss": 0.1512210488319397, "step": 59410 }, { "epoch": 0.25510247889887777, "grad_norm": 0.47118431329727173, "learning_rate": 7.481136224485397e-05, "loss": 0.19009388685226442, "step": 59420 }, { "epoch": 0.25514541098889776, "grad_norm": 1.1213189363479614, "learning_rate": 7.480705052473635e-05, "loss": 0.32139723300933837, "step": 59430 }, { "epoch": 0.25518834307891775, "grad_norm": 0.02947445586323738, "learning_rate": 7.480273880461873e-05, "loss": 0.13824256658554077, "step": 59440 }, { "epoch": 0.2552312751689378, "grad_norm": 0.2438763827085495, "learning_rate": 7.47984270845011e-05, "loss": 0.216261887550354, "step": 59450 }, { "epoch": 0.2552742072589578, "grad_norm": 0.01802912726998329, "learning_rate": 7.479411536438347e-05, "loss": 0.10054420232772827, "step": 59460 }, { "epoch": 0.25531713934897776, "grad_norm": 4.936689376831055, "learning_rate": 7.478980364426584e-05, "loss": 0.3198725700378418, "step": 59470 }, { "epoch": 0.2553600714389978, "grad_norm": 1.1724637746810913, "learning_rate": 7.478549192414822e-05, "loss": 0.22834150791168212, "step": 59480 }, { "epoch": 0.2554030035290178, "grad_norm": 0.04114644601941109, "learning_rate": 7.47811802040306e-05, "loss": 0.09133874773979186, "step": 59490 }, { "epoch": 0.2554459356190378, "grad_norm": 0.24322256445884705, "learning_rate": 7.477686848391298e-05, "loss": 0.16889041662216187, "step": 59500 }, { "epoch": 0.2554888677090578, "grad_norm": 1.2901346683502197, "learning_rate": 7.477255676379535e-05, "loss": 0.04150072932243347, "step": 59510 }, { "epoch": 0.2555317997990778, "grad_norm": 4.035179138183594, "learning_rate": 7.476824504367773e-05, "loss": 0.2927661418914795, "step": 59520 }, { "epoch": 0.25557473188909785, "grad_norm": 4.362427234649658, "learning_rate": 7.476393332356011e-05, "loss": 0.24192519187927247, "step": 59530 }, { "epoch": 0.25561766397911784, "grad_norm": 0.0030299974605441093, "learning_rate": 7.475962160344247e-05, "loss": 0.14141761064529418, "step": 59540 }, { "epoch": 0.2556605960691378, "grad_norm": 2.231828451156616, "learning_rate": 7.475530988332485e-05, "loss": 0.30825395584106446, "step": 59550 }, { "epoch": 0.25570352815915787, "grad_norm": 0.09296996891498566, "learning_rate": 7.475099816320723e-05, "loss": 0.25759999752044677, "step": 59560 }, { "epoch": 0.25574646024917785, "grad_norm": 0.18211933970451355, "learning_rate": 7.47466864430896e-05, "loss": 0.1883184313774109, "step": 59570 }, { "epoch": 0.25578939233919784, "grad_norm": 0.07114825397729874, "learning_rate": 7.474237472297198e-05, "loss": 0.17066493034362792, "step": 59580 }, { "epoch": 0.2558323244292179, "grad_norm": 3.3013527393341064, "learning_rate": 7.473806300285436e-05, "loss": 0.35579254627227785, "step": 59590 }, { "epoch": 0.25587525651923787, "grad_norm": 0.6940078139305115, "learning_rate": 7.473375128273674e-05, "loss": 0.15746339559555053, "step": 59600 }, { "epoch": 0.25591818860925786, "grad_norm": 0.4556047320365906, "learning_rate": 7.472943956261911e-05, "loss": 0.3222770690917969, "step": 59610 }, { "epoch": 0.2559611206992779, "grad_norm": 0.66447913646698, "learning_rate": 7.472512784250149e-05, "loss": 0.29715771675109864, "step": 59620 }, { "epoch": 0.2560040527892979, "grad_norm": 0.20826716721057892, "learning_rate": 7.472081612238387e-05, "loss": 0.3474601745605469, "step": 59630 }, { "epoch": 0.2560469848793179, "grad_norm": 0.8093248009681702, "learning_rate": 7.471650440226625e-05, "loss": 0.2916031122207642, "step": 59640 }, { "epoch": 0.2560899169693379, "grad_norm": 0.1829816699028015, "learning_rate": 7.471219268214862e-05, "loss": 0.3677819728851318, "step": 59650 }, { "epoch": 0.2561328490593579, "grad_norm": 0.4850645661354065, "learning_rate": 7.4707880962031e-05, "loss": 0.20362043380737305, "step": 59660 }, { "epoch": 0.2561757811493779, "grad_norm": 1.195241928100586, "learning_rate": 7.470356924191338e-05, "loss": 0.22135019302368164, "step": 59670 }, { "epoch": 0.25621871323939793, "grad_norm": 1.7356536388397217, "learning_rate": 7.469925752179576e-05, "loss": 0.21251988410949707, "step": 59680 }, { "epoch": 0.2562616453294179, "grad_norm": 0.22998811304569244, "learning_rate": 7.469494580167813e-05, "loss": 0.153911817073822, "step": 59690 }, { "epoch": 0.2563045774194379, "grad_norm": 10.037912368774414, "learning_rate": 7.469063408156051e-05, "loss": 0.23573625087738037, "step": 59700 }, { "epoch": 0.25634750950945795, "grad_norm": 0.022693343460559845, "learning_rate": 7.468632236144287e-05, "loss": 0.19555919170379638, "step": 59710 }, { "epoch": 0.25639044159947794, "grad_norm": 3.438164710998535, "learning_rate": 7.468201064132525e-05, "loss": 0.2517883539199829, "step": 59720 }, { "epoch": 0.256433373689498, "grad_norm": 0.8784910440444946, "learning_rate": 7.467769892120763e-05, "loss": 0.14495769739151002, "step": 59730 }, { "epoch": 0.25647630577951797, "grad_norm": 0.06742941588163376, "learning_rate": 7.467338720109e-05, "loss": 0.08876391649246215, "step": 59740 }, { "epoch": 0.25651923786953795, "grad_norm": 0.14247381687164307, "learning_rate": 7.466907548097238e-05, "loss": 0.07637916207313537, "step": 59750 }, { "epoch": 0.256562169959558, "grad_norm": 0.00508470181375742, "learning_rate": 7.466476376085476e-05, "loss": 0.0826393187046051, "step": 59760 }, { "epoch": 0.256605102049578, "grad_norm": 0.02827954664826393, "learning_rate": 7.466045204073714e-05, "loss": 0.16171613931655884, "step": 59770 }, { "epoch": 0.25664803413959797, "grad_norm": 0.035804182291030884, "learning_rate": 7.465614032061952e-05, "loss": 0.12921042442321778, "step": 59780 }, { "epoch": 0.256690966229618, "grad_norm": 0.17865890264511108, "learning_rate": 7.465182860050188e-05, "loss": 0.13270853757858275, "step": 59790 }, { "epoch": 0.256733898319638, "grad_norm": 0.003751277457922697, "learning_rate": 7.464751688038426e-05, "loss": 0.11085785627365112, "step": 59800 }, { "epoch": 0.256776830409658, "grad_norm": 0.4030987620353699, "learning_rate": 7.464320516026663e-05, "loss": 0.27253243923187254, "step": 59810 }, { "epoch": 0.25681976249967803, "grad_norm": 0.009502650238573551, "learning_rate": 7.463889344014901e-05, "loss": 0.1683057427406311, "step": 59820 }, { "epoch": 0.256862694589698, "grad_norm": 0.006902703549712896, "learning_rate": 7.463458172003139e-05, "loss": 0.2961188077926636, "step": 59830 }, { "epoch": 0.256905626679718, "grad_norm": 4.155785083770752, "learning_rate": 7.463026999991377e-05, "loss": 0.42710652351379397, "step": 59840 }, { "epoch": 0.25694855876973804, "grad_norm": 0.012135523371398449, "learning_rate": 7.462595827979614e-05, "loss": 0.0692120611667633, "step": 59850 }, { "epoch": 0.25699149085975803, "grad_norm": 0.07463029026985168, "learning_rate": 7.462164655967852e-05, "loss": 0.13505473136901855, "step": 59860 }, { "epoch": 0.257034422949778, "grad_norm": 1.5088930130004883, "learning_rate": 7.46173348395609e-05, "loss": 0.170632004737854, "step": 59870 }, { "epoch": 0.25707735503979806, "grad_norm": 1.4531193971633911, "learning_rate": 7.461302311944328e-05, "loss": 0.2785643100738525, "step": 59880 }, { "epoch": 0.25712028712981805, "grad_norm": 16.77339744567871, "learning_rate": 7.460871139932565e-05, "loss": 0.19587944746017455, "step": 59890 }, { "epoch": 0.25716321921983804, "grad_norm": 1.1448432207107544, "learning_rate": 7.460439967920803e-05, "loss": 0.18592535257339476, "step": 59900 }, { "epoch": 0.2572061513098581, "grad_norm": 0.000942026439588517, "learning_rate": 7.460008795909041e-05, "loss": 0.28868684768676756, "step": 59910 }, { "epoch": 0.25724908339987806, "grad_norm": 1.498420238494873, "learning_rate": 7.459577623897278e-05, "loss": 0.3154158115386963, "step": 59920 }, { "epoch": 0.25729201548989805, "grad_norm": 0.030521482229232788, "learning_rate": 7.459146451885516e-05, "loss": 0.29817702770233157, "step": 59930 }, { "epoch": 0.2573349475799181, "grad_norm": 0.9940090179443359, "learning_rate": 7.458715279873754e-05, "loss": 0.2779792070388794, "step": 59940 }, { "epoch": 0.2573778796699381, "grad_norm": 0.004922129213809967, "learning_rate": 7.45828410786199e-05, "loss": 0.26671979427337644, "step": 59950 }, { "epoch": 0.2574208117599581, "grad_norm": 0.09344282746315002, "learning_rate": 7.457852935850228e-05, "loss": 0.20685970783233643, "step": 59960 }, { "epoch": 0.2574637438499781, "grad_norm": 2.032776117324829, "learning_rate": 7.457421763838466e-05, "loss": 0.4120779514312744, "step": 59970 }, { "epoch": 0.2575066759399981, "grad_norm": 3.692190170288086, "learning_rate": 7.456990591826704e-05, "loss": 0.28315205574035646, "step": 59980 }, { "epoch": 0.25754960803001814, "grad_norm": 3.3654446601867676, "learning_rate": 7.456559419814941e-05, "loss": 0.2883306503295898, "step": 59990 }, { "epoch": 0.2575925401200381, "grad_norm": 0.10398758202791214, "learning_rate": 7.456128247803179e-05, "loss": 0.22278923988342286, "step": 60000 }, { "epoch": 0.2575925401200381, "eval_loss": 0.4334496557712555, "eval_runtime": 27.6311, "eval_samples_per_second": 3.619, "eval_steps_per_second": 3.619, "step": 60000 }, { "epoch": 0.2576354722100581, "grad_norm": 0.4824603199958801, "learning_rate": 7.455697075791417e-05, "loss": 0.12448277473449706, "step": 60010 }, { "epoch": 0.25767840430007816, "grad_norm": 0.1026025265455246, "learning_rate": 7.455265903779654e-05, "loss": 0.34478096961975097, "step": 60020 }, { "epoch": 0.25772133639009814, "grad_norm": 0.0035014173481613398, "learning_rate": 7.454834731767892e-05, "loss": 0.21980509757995606, "step": 60030 }, { "epoch": 0.25776426848011813, "grad_norm": 1.0586053133010864, "learning_rate": 7.454403559756129e-05, "loss": 0.2351222515106201, "step": 60040 }, { "epoch": 0.2578072005701382, "grad_norm": 0.901056706905365, "learning_rate": 7.453972387744366e-05, "loss": 0.2706778526306152, "step": 60050 }, { "epoch": 0.25785013266015816, "grad_norm": 0.008316353894770145, "learning_rate": 7.453541215732604e-05, "loss": 0.16525228023529054, "step": 60060 }, { "epoch": 0.25789306475017815, "grad_norm": 0.4059770405292511, "learning_rate": 7.453110043720842e-05, "loss": 0.17263598442077638, "step": 60070 }, { "epoch": 0.2579359968401982, "grad_norm": 2.582120180130005, "learning_rate": 7.45267887170908e-05, "loss": 0.09478598833084106, "step": 60080 }, { "epoch": 0.2579789289302182, "grad_norm": 0.0057938783429563046, "learning_rate": 7.452247699697317e-05, "loss": 0.2529701471328735, "step": 60090 }, { "epoch": 0.25802186102023816, "grad_norm": 0.014004064723849297, "learning_rate": 7.451816527685555e-05, "loss": 0.2436042308807373, "step": 60100 }, { "epoch": 0.2580647931102582, "grad_norm": 0.13050612807273865, "learning_rate": 7.451385355673793e-05, "loss": 0.249086594581604, "step": 60110 }, { "epoch": 0.2581077252002782, "grad_norm": 0.011018295772373676, "learning_rate": 7.45095418366203e-05, "loss": 0.24939985275268556, "step": 60120 }, { "epoch": 0.2581506572902982, "grad_norm": 0.37946784496307373, "learning_rate": 7.450523011650268e-05, "loss": 0.08818068504333496, "step": 60130 }, { "epoch": 0.2581935893803182, "grad_norm": 0.572830080986023, "learning_rate": 7.450091839638506e-05, "loss": 0.1864118456840515, "step": 60140 }, { "epoch": 0.2582365214703382, "grad_norm": 0.03537747636437416, "learning_rate": 7.449660667626744e-05, "loss": 0.11507842540740967, "step": 60150 }, { "epoch": 0.25827945356035825, "grad_norm": 0.003414865816012025, "learning_rate": 7.449229495614981e-05, "loss": 0.19977638721466065, "step": 60160 }, { "epoch": 0.25832238565037824, "grad_norm": 0.05791240185499191, "learning_rate": 7.448798323603219e-05, "loss": 0.09170815348625183, "step": 60170 }, { "epoch": 0.2583653177403982, "grad_norm": 0.9761412143707275, "learning_rate": 7.448367151591457e-05, "loss": 0.26405317783355714, "step": 60180 }, { "epoch": 0.25840824983041827, "grad_norm": 7.960112998262048e-05, "learning_rate": 7.447935979579695e-05, "loss": 0.07050980925559998, "step": 60190 }, { "epoch": 0.25845118192043826, "grad_norm": 0.0009460471337661147, "learning_rate": 7.447504807567931e-05, "loss": 0.1875847339630127, "step": 60200 }, { "epoch": 0.25849411401045824, "grad_norm": 0.05905630439519882, "learning_rate": 7.447073635556169e-05, "loss": 0.07909368872642517, "step": 60210 }, { "epoch": 0.2585370461004783, "grad_norm": 4.739538192749023, "learning_rate": 7.446642463544406e-05, "loss": 0.327526330947876, "step": 60220 }, { "epoch": 0.25857997819049827, "grad_norm": 1.2322735786437988, "learning_rate": 7.446211291532644e-05, "loss": 0.6427930355072021, "step": 60230 }, { "epoch": 0.25862291028051826, "grad_norm": 0.22826102375984192, "learning_rate": 7.445780119520882e-05, "loss": 0.23440871238708497, "step": 60240 }, { "epoch": 0.2586658423705383, "grad_norm": 0.3581126928329468, "learning_rate": 7.44534894750912e-05, "loss": 0.11961183547973633, "step": 60250 }, { "epoch": 0.2587087744605583, "grad_norm": 0.009623724035918713, "learning_rate": 7.444917775497357e-05, "loss": 0.24155714511871337, "step": 60260 }, { "epoch": 0.2587517065505783, "grad_norm": 0.11129165440797806, "learning_rate": 7.444486603485595e-05, "loss": 0.12745927572250365, "step": 60270 }, { "epoch": 0.2587946386405983, "grad_norm": 0.0014452520990744233, "learning_rate": 7.444055431473831e-05, "loss": 0.1345227837562561, "step": 60280 }, { "epoch": 0.2588375707306183, "grad_norm": 1.325457215309143, "learning_rate": 7.443624259462069e-05, "loss": 0.21954782009124757, "step": 60290 }, { "epoch": 0.2588805028206383, "grad_norm": 3.4132261276245117, "learning_rate": 7.443193087450307e-05, "loss": 0.2938798189163208, "step": 60300 }, { "epoch": 0.25892343491065833, "grad_norm": 3.204024076461792, "learning_rate": 7.442761915438545e-05, "loss": 0.17051490545272827, "step": 60310 }, { "epoch": 0.2589663670006783, "grad_norm": 0.0036249810364097357, "learning_rate": 7.442330743426782e-05, "loss": 0.18110159635543824, "step": 60320 }, { "epoch": 0.2590092990906983, "grad_norm": 0.8694581985473633, "learning_rate": 7.44189957141502e-05, "loss": 0.23339805603027344, "step": 60330 }, { "epoch": 0.25905223118071835, "grad_norm": 4.196567058563232, "learning_rate": 7.441468399403259e-05, "loss": 0.13609391450881958, "step": 60340 }, { "epoch": 0.25909516327073834, "grad_norm": 0.347924143075943, "learning_rate": 7.441037227391497e-05, "loss": 0.16419798135757446, "step": 60350 }, { "epoch": 0.2591380953607583, "grad_norm": 1.3597310781478882, "learning_rate": 7.440606055379735e-05, "loss": 0.05138199329376221, "step": 60360 }, { "epoch": 0.25918102745077837, "grad_norm": 0.029793528839945793, "learning_rate": 7.440174883367971e-05, "loss": 0.16066555976867675, "step": 60370 }, { "epoch": 0.25922395954079835, "grad_norm": 0.06812033802270889, "learning_rate": 7.439743711356209e-05, "loss": 0.2599786281585693, "step": 60380 }, { "epoch": 0.2592668916308184, "grad_norm": 0.000821946538053453, "learning_rate": 7.439312539344447e-05, "loss": 0.19980417490005492, "step": 60390 }, { "epoch": 0.2593098237208384, "grad_norm": 0.6022235751152039, "learning_rate": 7.438881367332684e-05, "loss": 0.21292507648468018, "step": 60400 }, { "epoch": 0.25935275581085837, "grad_norm": 0.05035751685500145, "learning_rate": 7.438450195320922e-05, "loss": 0.23559396266937255, "step": 60410 }, { "epoch": 0.2593956879008784, "grad_norm": 0.026783062145113945, "learning_rate": 7.43801902330916e-05, "loss": 0.23798015117645263, "step": 60420 }, { "epoch": 0.2594386199908984, "grad_norm": 0.021695852279663086, "learning_rate": 7.437587851297397e-05, "loss": 0.07167540788650513, "step": 60430 }, { "epoch": 0.2594815520809184, "grad_norm": 4.8020301619544625e-05, "learning_rate": 7.437156679285635e-05, "loss": 0.27671382427215574, "step": 60440 }, { "epoch": 0.25952448417093843, "grad_norm": 0.06290990114212036, "learning_rate": 7.436725507273872e-05, "loss": 0.28073556423187257, "step": 60450 }, { "epoch": 0.2595674162609584, "grad_norm": 0.20219306647777557, "learning_rate": 7.43629433526211e-05, "loss": 0.028297588229179382, "step": 60460 }, { "epoch": 0.2596103483509784, "grad_norm": 1.8177192211151123, "learning_rate": 7.435863163250347e-05, "loss": 0.14526848793029784, "step": 60470 }, { "epoch": 0.25965328044099845, "grad_norm": 0.676733136177063, "learning_rate": 7.435431991238585e-05, "loss": 0.23378782272338866, "step": 60480 }, { "epoch": 0.25969621253101843, "grad_norm": 0.136207714676857, "learning_rate": 7.435000819226823e-05, "loss": 0.06558563113212586, "step": 60490 }, { "epoch": 0.2597391446210384, "grad_norm": 0.0994429960846901, "learning_rate": 7.43456964721506e-05, "loss": 0.11308003664016723, "step": 60500 }, { "epoch": 0.25978207671105846, "grad_norm": 3.658642530441284, "learning_rate": 7.434138475203298e-05, "loss": 0.46092705726623534, "step": 60510 }, { "epoch": 0.25982500880107845, "grad_norm": 0.09556927531957626, "learning_rate": 7.433707303191536e-05, "loss": 0.359284782409668, "step": 60520 }, { "epoch": 0.25986794089109844, "grad_norm": 0.01794356107711792, "learning_rate": 7.433276131179772e-05, "loss": 0.22540340423583985, "step": 60530 }, { "epoch": 0.2599108729811185, "grad_norm": 6.463069915771484, "learning_rate": 7.43284495916801e-05, "loss": 0.1235724925994873, "step": 60540 }, { "epoch": 0.25995380507113847, "grad_norm": 0.486515611410141, "learning_rate": 7.432413787156248e-05, "loss": 0.2912492513656616, "step": 60550 }, { "epoch": 0.25999673716115845, "grad_norm": 0.0728234276175499, "learning_rate": 7.431982615144487e-05, "loss": 0.08355991840362549, "step": 60560 }, { "epoch": 0.2600396692511785, "grad_norm": 0.006520441733300686, "learning_rate": 7.431551443132724e-05, "loss": 0.11683311462402343, "step": 60570 }, { "epoch": 0.2600826013411985, "grad_norm": 0.021994153037667274, "learning_rate": 7.431120271120962e-05, "loss": 0.21824615001678466, "step": 60580 }, { "epoch": 0.2601255334312185, "grad_norm": 0.02452601119875908, "learning_rate": 7.4306890991092e-05, "loss": 0.2871460199356079, "step": 60590 }, { "epoch": 0.2601684655212385, "grad_norm": 0.002121657133102417, "learning_rate": 7.430257927097438e-05, "loss": 0.3002606391906738, "step": 60600 }, { "epoch": 0.2602113976112585, "grad_norm": 1.038457989692688, "learning_rate": 7.429826755085674e-05, "loss": 0.15184781551361085, "step": 60610 }, { "epoch": 0.26025432970127854, "grad_norm": 0.3401985168457031, "learning_rate": 7.429395583073912e-05, "loss": 0.24284937381744384, "step": 60620 }, { "epoch": 0.26029726179129853, "grad_norm": 1.9231289625167847, "learning_rate": 7.42896441106215e-05, "loss": 0.36919641494750977, "step": 60630 }, { "epoch": 0.2603401938813185, "grad_norm": 0.0033993495162576437, "learning_rate": 7.428533239050387e-05, "loss": 0.1777048110961914, "step": 60640 }, { "epoch": 0.26038312597133856, "grad_norm": 0.17801420390605927, "learning_rate": 7.428102067038625e-05, "loss": 0.27388036251068115, "step": 60650 }, { "epoch": 0.26042605806135855, "grad_norm": 0.0023477845825254917, "learning_rate": 7.427670895026863e-05, "loss": 0.24748921394348145, "step": 60660 }, { "epoch": 0.26046899015137853, "grad_norm": 8.327670097351074, "learning_rate": 7.4272397230151e-05, "loss": 0.21605587005615234, "step": 60670 }, { "epoch": 0.2605119222413986, "grad_norm": 0.02114141546189785, "learning_rate": 7.426808551003338e-05, "loss": 0.23582861423492432, "step": 60680 }, { "epoch": 0.26055485433141856, "grad_norm": 2.584583044052124, "learning_rate": 7.426377378991575e-05, "loss": 0.3142396450042725, "step": 60690 }, { "epoch": 0.26059778642143855, "grad_norm": 0.015979178249835968, "learning_rate": 7.425946206979812e-05, "loss": 0.38975841999053956, "step": 60700 }, { "epoch": 0.2606407185114586, "grad_norm": 0.07706556469202042, "learning_rate": 7.42551503496805e-05, "loss": 0.26714980602264404, "step": 60710 }, { "epoch": 0.2606836506014786, "grad_norm": 0.3661552369594574, "learning_rate": 7.425083862956288e-05, "loss": 0.17433593273162842, "step": 60720 }, { "epoch": 0.26072658269149857, "grad_norm": 0.9815655946731567, "learning_rate": 7.424652690944525e-05, "loss": 0.33178982734680174, "step": 60730 }, { "epoch": 0.2607695147815186, "grad_norm": 2.396820068359375, "learning_rate": 7.424221518932763e-05, "loss": 0.16736443042755128, "step": 60740 }, { "epoch": 0.2608124468715386, "grad_norm": 0.06286032497882843, "learning_rate": 7.423790346921001e-05, "loss": 0.19664554595947265, "step": 60750 }, { "epoch": 0.2608553789615586, "grad_norm": 0.01892467401921749, "learning_rate": 7.423359174909239e-05, "loss": 0.06982214450836181, "step": 60760 }, { "epoch": 0.2608983110515786, "grad_norm": 0.031845398247241974, "learning_rate": 7.422928002897476e-05, "loss": 0.21323072910308838, "step": 60770 }, { "epoch": 0.2609412431415986, "grad_norm": 0.0011438673827797174, "learning_rate": 7.422496830885714e-05, "loss": 0.1070354700088501, "step": 60780 }, { "epoch": 0.2609841752316186, "grad_norm": 0.015031355433166027, "learning_rate": 7.422065658873952e-05, "loss": 0.16533334255218507, "step": 60790 }, { "epoch": 0.26102710732163864, "grad_norm": 0.2482471913099289, "learning_rate": 7.42163448686219e-05, "loss": 0.17978440523147582, "step": 60800 }, { "epoch": 0.2610700394116586, "grad_norm": 0.02777029201388359, "learning_rate": 7.421203314850427e-05, "loss": 0.16307601928710938, "step": 60810 }, { "epoch": 0.26111297150167867, "grad_norm": 0.7483030557632446, "learning_rate": 7.420772142838665e-05, "loss": 0.1298598289489746, "step": 60820 }, { "epoch": 0.26115590359169866, "grad_norm": 0.051577258855104446, "learning_rate": 7.420340970826903e-05, "loss": 0.23332612514495848, "step": 60830 }, { "epoch": 0.26119883568171864, "grad_norm": 0.011332403868436813, "learning_rate": 7.41990979881514e-05, "loss": 0.21819396018981935, "step": 60840 }, { "epoch": 0.2612417677717387, "grad_norm": 0.01872512884438038, "learning_rate": 7.419478626803378e-05, "loss": 0.12530485391616822, "step": 60850 }, { "epoch": 0.2612846998617587, "grad_norm": 5.371342182159424, "learning_rate": 7.419047454791615e-05, "loss": 0.3505243301391602, "step": 60860 }, { "epoch": 0.26132763195177866, "grad_norm": 0.9568906426429749, "learning_rate": 7.418616282779852e-05, "loss": 0.3265427827835083, "step": 60870 }, { "epoch": 0.2613705640417987, "grad_norm": 0.0020468125585466623, "learning_rate": 7.41818511076809e-05, "loss": 0.3428152084350586, "step": 60880 }, { "epoch": 0.2614134961318187, "grad_norm": 0.013312124647200108, "learning_rate": 7.417753938756328e-05, "loss": 0.3842325210571289, "step": 60890 }, { "epoch": 0.2614564282218387, "grad_norm": 2.257316827774048, "learning_rate": 7.417322766744566e-05, "loss": 0.4196054935455322, "step": 60900 }, { "epoch": 0.2614993603118587, "grad_norm": 0.17104367911815643, "learning_rate": 7.416891594732803e-05, "loss": 0.2096198558807373, "step": 60910 }, { "epoch": 0.2615422924018787, "grad_norm": 1.1753196716308594, "learning_rate": 7.416460422721041e-05, "loss": 0.480803918838501, "step": 60920 }, { "epoch": 0.2615852244918987, "grad_norm": 3.2457165718078613, "learning_rate": 7.416029250709279e-05, "loss": 0.2990148067474365, "step": 60930 }, { "epoch": 0.26162815658191874, "grad_norm": 0.9289878010749817, "learning_rate": 7.415598078697515e-05, "loss": 0.3847595930099487, "step": 60940 }, { "epoch": 0.2616710886719387, "grad_norm": 0.02137531340122223, "learning_rate": 7.415166906685753e-05, "loss": 0.16399847269058226, "step": 60950 }, { "epoch": 0.2617140207619587, "grad_norm": 0.7735177874565125, "learning_rate": 7.41473573467399e-05, "loss": 0.22880373001098633, "step": 60960 }, { "epoch": 0.26175695285197875, "grad_norm": 0.3202783167362213, "learning_rate": 7.414304562662228e-05, "loss": 0.29592530727386473, "step": 60970 }, { "epoch": 0.26179988494199874, "grad_norm": 0.01103890035301447, "learning_rate": 7.413873390650466e-05, "loss": 0.16606005430221557, "step": 60980 }, { "epoch": 0.2618428170320187, "grad_norm": 0.036014948040246964, "learning_rate": 7.413442218638704e-05, "loss": 0.15441317558288575, "step": 60990 }, { "epoch": 0.26188574912203877, "grad_norm": 1.53452467918396, "learning_rate": 7.413011046626942e-05, "loss": 0.34018664360046386, "step": 61000 }, { "epoch": 0.26188574912203877, "eval_loss": 0.4289569854736328, "eval_runtime": 27.4285, "eval_samples_per_second": 3.646, "eval_steps_per_second": 3.646, "step": 61000 }, { "epoch": 0.26192868121205876, "grad_norm": 0.02968435175716877, "learning_rate": 7.412579874615179e-05, "loss": 0.09290619492530823, "step": 61010 }, { "epoch": 0.2619716133020788, "grad_norm": 0.3154435157775879, "learning_rate": 7.412148702603417e-05, "loss": 0.11877356767654419, "step": 61020 }, { "epoch": 0.2620145453920988, "grad_norm": 0.050260115414857864, "learning_rate": 7.411717530591655e-05, "loss": 0.2617194414138794, "step": 61030 }, { "epoch": 0.2620574774821188, "grad_norm": 0.033218640834093094, "learning_rate": 7.411286358579893e-05, "loss": 0.3478349447250366, "step": 61040 }, { "epoch": 0.2621004095721388, "grad_norm": 0.12185132503509521, "learning_rate": 7.41085518656813e-05, "loss": 0.21468589305877686, "step": 61050 }, { "epoch": 0.2621433416621588, "grad_norm": 4.730641841888428, "learning_rate": 7.410424014556368e-05, "loss": 0.2136781930923462, "step": 61060 }, { "epoch": 0.2621862737521788, "grad_norm": 0.03792364150285721, "learning_rate": 7.409992842544606e-05, "loss": 0.22919461727142335, "step": 61070 }, { "epoch": 0.26222920584219883, "grad_norm": 0.03635554760694504, "learning_rate": 7.409561670532843e-05, "loss": 0.14863102436065673, "step": 61080 }, { "epoch": 0.2622721379322188, "grad_norm": 0.6019494533538818, "learning_rate": 7.409130498521081e-05, "loss": 0.13661930561065674, "step": 61090 }, { "epoch": 0.2623150700222388, "grad_norm": 0.3180076777935028, "learning_rate": 7.408699326509319e-05, "loss": 0.0830872893333435, "step": 61100 }, { "epoch": 0.26235800211225885, "grad_norm": 0.09637082368135452, "learning_rate": 7.408268154497555e-05, "loss": 0.2411327838897705, "step": 61110 }, { "epoch": 0.26240093420227883, "grad_norm": 0.07667584717273712, "learning_rate": 7.407836982485793e-05, "loss": 0.3642971277236938, "step": 61120 }, { "epoch": 0.2624438662922988, "grad_norm": 1.2912453413009644, "learning_rate": 7.407405810474031e-05, "loss": 0.21056365966796875, "step": 61130 }, { "epoch": 0.26248679838231886, "grad_norm": 0.04481130465865135, "learning_rate": 7.406974638462268e-05, "loss": 0.2890332221984863, "step": 61140 }, { "epoch": 0.26252973047233885, "grad_norm": 3.0413002967834473, "learning_rate": 7.406543466450506e-05, "loss": 0.2858407974243164, "step": 61150 }, { "epoch": 0.26257266256235884, "grad_norm": 0.009976262226700783, "learning_rate": 7.406112294438744e-05, "loss": 0.22789084911346436, "step": 61160 }, { "epoch": 0.2626155946523789, "grad_norm": 0.10438531637191772, "learning_rate": 7.405681122426982e-05, "loss": 0.15007318258285524, "step": 61170 }, { "epoch": 0.26265852674239887, "grad_norm": 1.8232972621917725, "learning_rate": 7.40524995041522e-05, "loss": 0.3524114370346069, "step": 61180 }, { "epoch": 0.26270145883241885, "grad_norm": 0.10440938919782639, "learning_rate": 7.404818778403456e-05, "loss": 0.10628962516784668, "step": 61190 }, { "epoch": 0.2627443909224389, "grad_norm": 0.1989804059267044, "learning_rate": 7.404387606391694e-05, "loss": 0.09079195857048035, "step": 61200 }, { "epoch": 0.2627873230124589, "grad_norm": 0.007323736324906349, "learning_rate": 7.403956434379931e-05, "loss": 0.3279279232025146, "step": 61210 }, { "epoch": 0.26283025510247887, "grad_norm": 0.019976818934082985, "learning_rate": 7.403525262368169e-05, "loss": 0.349375319480896, "step": 61220 }, { "epoch": 0.2628731871924989, "grad_norm": 0.07553509622812271, "learning_rate": 7.403094090356407e-05, "loss": 0.22444021701812744, "step": 61230 }, { "epoch": 0.2629161192825189, "grad_norm": 0.6492979526519775, "learning_rate": 7.402662918344644e-05, "loss": 0.3088306665420532, "step": 61240 }, { "epoch": 0.26295905137253894, "grad_norm": 0.02214295044541359, "learning_rate": 7.402231746332882e-05, "loss": 0.24729163646698, "step": 61250 }, { "epoch": 0.26300198346255893, "grad_norm": 0.5439571142196655, "learning_rate": 7.40180057432112e-05, "loss": 0.2864963531494141, "step": 61260 }, { "epoch": 0.2630449155525789, "grad_norm": 0.003847939195111394, "learning_rate": 7.401369402309358e-05, "loss": 0.19426699876785278, "step": 61270 }, { "epoch": 0.26308784764259896, "grad_norm": 5.374867916107178, "learning_rate": 7.400938230297595e-05, "loss": 0.2439223051071167, "step": 61280 }, { "epoch": 0.26313077973261895, "grad_norm": 0.053749583661556244, "learning_rate": 7.400507058285833e-05, "loss": 0.14222971200942994, "step": 61290 }, { "epoch": 0.26317371182263893, "grad_norm": 3.159461259841919, "learning_rate": 7.400075886274071e-05, "loss": 0.34703402519226073, "step": 61300 }, { "epoch": 0.263216643912659, "grad_norm": 0.19708716869354248, "learning_rate": 7.399644714262309e-05, "loss": 0.24233169555664064, "step": 61310 }, { "epoch": 0.26325957600267896, "grad_norm": 0.06329300999641418, "learning_rate": 7.399213542250546e-05, "loss": 0.2854588270187378, "step": 61320 }, { "epoch": 0.26330250809269895, "grad_norm": 0.44259560108184814, "learning_rate": 7.398782370238784e-05, "loss": 0.3140692949295044, "step": 61330 }, { "epoch": 0.263345440182719, "grad_norm": 1.087395429611206, "learning_rate": 7.398351198227022e-05, "loss": 0.1668491005897522, "step": 61340 }, { "epoch": 0.263388372272739, "grad_norm": 2.940399646759033, "learning_rate": 7.397920026215258e-05, "loss": 0.3233808994293213, "step": 61350 }, { "epoch": 0.26343130436275897, "grad_norm": 0.06883282214403152, "learning_rate": 7.397488854203496e-05, "loss": 0.21582355499267578, "step": 61360 }, { "epoch": 0.263474236452779, "grad_norm": 0.3769819438457489, "learning_rate": 7.397057682191734e-05, "loss": 0.21639833450317383, "step": 61370 }, { "epoch": 0.263517168542799, "grad_norm": 3.261446475982666, "learning_rate": 7.396626510179971e-05, "loss": 0.34641716480255125, "step": 61380 }, { "epoch": 0.263560100632819, "grad_norm": 3.089873790740967, "learning_rate": 7.396195338168209e-05, "loss": 0.2210846424102783, "step": 61390 }, { "epoch": 0.263603032722839, "grad_norm": 4.159793376922607, "learning_rate": 7.395764166156447e-05, "loss": 0.2680304527282715, "step": 61400 }, { "epoch": 0.263645964812859, "grad_norm": 1.3745867013931274, "learning_rate": 7.395332994144685e-05, "loss": 0.295793342590332, "step": 61410 }, { "epoch": 0.263688896902879, "grad_norm": 0.010196246206760406, "learning_rate": 7.394901822132922e-05, "loss": 0.18006807565689087, "step": 61420 }, { "epoch": 0.26373182899289904, "grad_norm": 0.20760205388069153, "learning_rate": 7.394470650121159e-05, "loss": 0.21314103603363038, "step": 61430 }, { "epoch": 0.26377476108291903, "grad_norm": 2.232950448989868, "learning_rate": 7.394039478109396e-05, "loss": 0.24137496948242188, "step": 61440 }, { "epoch": 0.26381769317293907, "grad_norm": 1.0074223279953003, "learning_rate": 7.393608306097634e-05, "loss": 0.210703444480896, "step": 61450 }, { "epoch": 0.26386062526295906, "grad_norm": 0.6803957223892212, "learning_rate": 7.393177134085872e-05, "loss": 0.40192604064941406, "step": 61460 }, { "epoch": 0.26390355735297905, "grad_norm": 4.76837682723999, "learning_rate": 7.39274596207411e-05, "loss": 0.18026487827301024, "step": 61470 }, { "epoch": 0.2639464894429991, "grad_norm": 7.9994988441467285, "learning_rate": 7.392314790062347e-05, "loss": 0.302392315864563, "step": 61480 }, { "epoch": 0.2639894215330191, "grad_norm": 4.274667739868164, "learning_rate": 7.391883618050585e-05, "loss": 0.17441201210021973, "step": 61490 }, { "epoch": 0.26403235362303906, "grad_norm": 0.026807954534888268, "learning_rate": 7.391452446038823e-05, "loss": 0.2764727115631104, "step": 61500 }, { "epoch": 0.2640752857130591, "grad_norm": 5.083781719207764, "learning_rate": 7.39102127402706e-05, "loss": 0.45632572174072267, "step": 61510 }, { "epoch": 0.2641182178030791, "grad_norm": 1.9132654666900635, "learning_rate": 7.390590102015298e-05, "loss": 0.22143087387084961, "step": 61520 }, { "epoch": 0.2641611498930991, "grad_norm": 0.5253879427909851, "learning_rate": 7.390158930003536e-05, "loss": 0.600839900970459, "step": 61530 }, { "epoch": 0.2642040819831191, "grad_norm": 1.4570778608322144, "learning_rate": 7.389727757991774e-05, "loss": 0.2691011905670166, "step": 61540 }, { "epoch": 0.2642470140731391, "grad_norm": 24.884841918945312, "learning_rate": 7.389296585980012e-05, "loss": 0.18936102390289306, "step": 61550 }, { "epoch": 0.2642899461631591, "grad_norm": 0.4241613745689392, "learning_rate": 7.388865413968249e-05, "loss": 0.2016587734222412, "step": 61560 }, { "epoch": 0.26433287825317914, "grad_norm": 1.7418383359909058, "learning_rate": 7.388434241956487e-05, "loss": 0.06550635695457459, "step": 61570 }, { "epoch": 0.2643758103431991, "grad_norm": 1.0983703136444092, "learning_rate": 7.388003069944725e-05, "loss": 0.31223478317260744, "step": 61580 }, { "epoch": 0.2644187424332191, "grad_norm": 0.04658321663737297, "learning_rate": 7.387571897932962e-05, "loss": 0.20343027114868165, "step": 61590 }, { "epoch": 0.26446167452323915, "grad_norm": 6.366367816925049, "learning_rate": 7.387140725921199e-05, "loss": 0.25048508644104006, "step": 61600 }, { "epoch": 0.26450460661325914, "grad_norm": 0.2299579381942749, "learning_rate": 7.386709553909437e-05, "loss": 0.1549118161201477, "step": 61610 }, { "epoch": 0.26454753870327913, "grad_norm": 3.3510000705718994, "learning_rate": 7.386278381897674e-05, "loss": 0.4528806686401367, "step": 61620 }, { "epoch": 0.26459047079329917, "grad_norm": 6.139395236968994, "learning_rate": 7.385847209885912e-05, "loss": 0.23490145206451415, "step": 61630 }, { "epoch": 0.26463340288331916, "grad_norm": 5.379457473754883, "learning_rate": 7.38541603787415e-05, "loss": 0.29587996006011963, "step": 61640 }, { "epoch": 0.26467633497333914, "grad_norm": 0.06632285565137863, "learning_rate": 7.384984865862388e-05, "loss": 0.17854514122009277, "step": 61650 }, { "epoch": 0.2647192670633592, "grad_norm": 1.1752125024795532, "learning_rate": 7.384553693850625e-05, "loss": 0.10081328153610229, "step": 61660 }, { "epoch": 0.2647621991533792, "grad_norm": 0.03773083910346031, "learning_rate": 7.384122521838863e-05, "loss": 0.24993579387664794, "step": 61670 }, { "epoch": 0.2648051312433992, "grad_norm": 0.823442280292511, "learning_rate": 7.3836913498271e-05, "loss": 0.2658759355545044, "step": 61680 }, { "epoch": 0.2648480633334192, "grad_norm": 3.046865940093994, "learning_rate": 7.383260177815337e-05, "loss": 0.4629732608795166, "step": 61690 }, { "epoch": 0.2648909954234392, "grad_norm": 0.8300249576568604, "learning_rate": 7.382829005803575e-05, "loss": 0.21923952102661132, "step": 61700 }, { "epoch": 0.26493392751345923, "grad_norm": 0.0004211229388602078, "learning_rate": 7.382397833791813e-05, "loss": 0.3185274124145508, "step": 61710 }, { "epoch": 0.2649768596034792, "grad_norm": 0.012865206226706505, "learning_rate": 7.38196666178005e-05, "loss": 0.20773918628692628, "step": 61720 }, { "epoch": 0.2650197916934992, "grad_norm": 1.0187405347824097, "learning_rate": 7.381535489768288e-05, "loss": 0.33336970806121824, "step": 61730 }, { "epoch": 0.26506272378351925, "grad_norm": 0.7479426860809326, "learning_rate": 7.381104317756526e-05, "loss": 0.3752789258956909, "step": 61740 }, { "epoch": 0.26510565587353924, "grad_norm": 2.9396021366119385, "learning_rate": 7.380673145744765e-05, "loss": 0.09405736327171325, "step": 61750 }, { "epoch": 0.2651485879635592, "grad_norm": 0.04090379178524017, "learning_rate": 7.380241973733001e-05, "loss": 0.17366938591003417, "step": 61760 }, { "epoch": 0.26519152005357927, "grad_norm": 5.305620193481445, "learning_rate": 7.379810801721239e-05, "loss": 0.35981733798980714, "step": 61770 }, { "epoch": 0.26523445214359925, "grad_norm": 0.04921811819076538, "learning_rate": 7.379379629709477e-05, "loss": 0.029331964254379273, "step": 61780 }, { "epoch": 0.26527738423361924, "grad_norm": 0.32620060443878174, "learning_rate": 7.378948457697714e-05, "loss": 0.24157421588897704, "step": 61790 }, { "epoch": 0.2653203163236393, "grad_norm": 0.010810810141265392, "learning_rate": 7.378517285685952e-05, "loss": 0.08367153406143188, "step": 61800 }, { "epoch": 0.26536324841365927, "grad_norm": 0.39795973896980286, "learning_rate": 7.37808611367419e-05, "loss": 0.2598013639450073, "step": 61810 }, { "epoch": 0.26540618050367926, "grad_norm": 0.05956602096557617, "learning_rate": 7.377654941662428e-05, "loss": 0.18582472801208497, "step": 61820 }, { "epoch": 0.2654491125936993, "grad_norm": 3.81455659866333, "learning_rate": 7.377223769650665e-05, "loss": 0.35067429542541506, "step": 61830 }, { "epoch": 0.2654920446837193, "grad_norm": 1.979435682296753, "learning_rate": 7.376792597638903e-05, "loss": 0.34986724853515627, "step": 61840 }, { "epoch": 0.2655349767737393, "grad_norm": 2.7388412952423096, "learning_rate": 7.37636142562714e-05, "loss": 0.3203420639038086, "step": 61850 }, { "epoch": 0.2655779088637593, "grad_norm": 0.003037786576896906, "learning_rate": 7.375930253615377e-05, "loss": 0.19214255809783937, "step": 61860 }, { "epoch": 0.2656208409537793, "grad_norm": 1.1273523569107056, "learning_rate": 7.375499081603615e-05, "loss": 0.20186288356781007, "step": 61870 }, { "epoch": 0.26566377304379934, "grad_norm": 1.959748387336731, "learning_rate": 7.375067909591853e-05, "loss": 0.1970548987388611, "step": 61880 }, { "epoch": 0.26570670513381933, "grad_norm": 1.9517850875854492, "learning_rate": 7.37463673758009e-05, "loss": 0.20934562683105468, "step": 61890 }, { "epoch": 0.2657496372238393, "grad_norm": 0.18594670295715332, "learning_rate": 7.374205565568328e-05, "loss": 0.13820563554763793, "step": 61900 }, { "epoch": 0.26579256931385936, "grad_norm": 3.1220526695251465, "learning_rate": 7.373774393556566e-05, "loss": 0.3077308893203735, "step": 61910 }, { "epoch": 0.26583550140387935, "grad_norm": 37.886390686035156, "learning_rate": 7.373343221544804e-05, "loss": 0.2505849123001099, "step": 61920 }, { "epoch": 0.26587843349389934, "grad_norm": 1.8187617063522339, "learning_rate": 7.37291204953304e-05, "loss": 0.2245555639266968, "step": 61930 }, { "epoch": 0.2659213655839194, "grad_norm": 2.576911687850952, "learning_rate": 7.372480877521278e-05, "loss": 0.3204474687576294, "step": 61940 }, { "epoch": 0.26596429767393936, "grad_norm": 1.5807483196258545, "learning_rate": 7.372049705509515e-05, "loss": 0.23133842945098876, "step": 61950 }, { "epoch": 0.26600722976395935, "grad_norm": 0.004031818360090256, "learning_rate": 7.371618533497753e-05, "loss": 0.30945868492126466, "step": 61960 }, { "epoch": 0.2660501618539794, "grad_norm": 0.18390342593193054, "learning_rate": 7.371187361485992e-05, "loss": 0.21341826915740966, "step": 61970 }, { "epoch": 0.2660930939439994, "grad_norm": 3.562913656234741, "learning_rate": 7.37075618947423e-05, "loss": 0.2335270404815674, "step": 61980 }, { "epoch": 0.26613602603401937, "grad_norm": 8.34090518951416, "learning_rate": 7.370325017462468e-05, "loss": 0.1888748288154602, "step": 61990 }, { "epoch": 0.2661789581240394, "grad_norm": 0.03201881796121597, "learning_rate": 7.369893845450706e-05, "loss": 0.22842373847961425, "step": 62000 }, { "epoch": 0.2661789581240394, "eval_loss": 0.43444526195526123, "eval_runtime": 27.4274, "eval_samples_per_second": 3.646, "eval_steps_per_second": 3.646, "step": 62000 }, { "epoch": 0.2662218902140594, "grad_norm": 0.0013862289488315582, "learning_rate": 7.369462673438942e-05, "loss": 0.10994983911514282, "step": 62010 }, { "epoch": 0.2662648223040794, "grad_norm": 0.0036061222199350595, "learning_rate": 7.36903150142718e-05, "loss": 0.05537129044532776, "step": 62020 }, { "epoch": 0.2663077543940994, "grad_norm": 0.1683593988418579, "learning_rate": 7.368600329415417e-05, "loss": 0.4459203243255615, "step": 62030 }, { "epoch": 0.2663506864841194, "grad_norm": 1.4167718887329102, "learning_rate": 7.368169157403655e-05, "loss": 0.4820812702178955, "step": 62040 }, { "epoch": 0.2663936185741394, "grad_norm": 0.642146110534668, "learning_rate": 7.367737985391893e-05, "loss": 0.2838521242141724, "step": 62050 }, { "epoch": 0.26643655066415944, "grad_norm": 0.9649590253829956, "learning_rate": 7.36730681338013e-05, "loss": 0.1945676326751709, "step": 62060 }, { "epoch": 0.26647948275417943, "grad_norm": 0.06836599111557007, "learning_rate": 7.366875641368368e-05, "loss": 0.26728594303131104, "step": 62070 }, { "epoch": 0.2665224148441994, "grad_norm": 0.029291236773133278, "learning_rate": 7.366444469356606e-05, "loss": 0.00536903589963913, "step": 62080 }, { "epoch": 0.26656534693421946, "grad_norm": 0.01653222367167473, "learning_rate": 7.366013297344842e-05, "loss": 0.3317889928817749, "step": 62090 }, { "epoch": 0.26660827902423945, "grad_norm": 0.5249482989311218, "learning_rate": 7.36558212533308e-05, "loss": 0.22167203426361085, "step": 62100 }, { "epoch": 0.2666512111142595, "grad_norm": 0.7123648524284363, "learning_rate": 7.365150953321318e-05, "loss": 0.29805192947387693, "step": 62110 }, { "epoch": 0.2666941432042795, "grad_norm": 0.003714463673532009, "learning_rate": 7.364719781309556e-05, "loss": 0.20269992351531982, "step": 62120 }, { "epoch": 0.26673707529429946, "grad_norm": 0.057309433817863464, "learning_rate": 7.364288609297793e-05, "loss": 0.43726015090942383, "step": 62130 }, { "epoch": 0.2667800073843195, "grad_norm": 0.005555383395403624, "learning_rate": 7.363857437286031e-05, "loss": 0.3711522340774536, "step": 62140 }, { "epoch": 0.2668229394743395, "grad_norm": 0.39166271686553955, "learning_rate": 7.363426265274269e-05, "loss": 0.11932632923126221, "step": 62150 }, { "epoch": 0.2668658715643595, "grad_norm": 0.7060807943344116, "learning_rate": 7.362995093262507e-05, "loss": 0.3223679780960083, "step": 62160 }, { "epoch": 0.2669088036543795, "grad_norm": 0.23493017256259918, "learning_rate": 7.362563921250744e-05, "loss": 0.14590442180633545, "step": 62170 }, { "epoch": 0.2669517357443995, "grad_norm": 0.04516841843724251, "learning_rate": 7.36213274923898e-05, "loss": 0.13015230894088745, "step": 62180 }, { "epoch": 0.2669946678344195, "grad_norm": 6.605303764343262, "learning_rate": 7.36170157722722e-05, "loss": 0.30696520805358884, "step": 62190 }, { "epoch": 0.26703759992443954, "grad_norm": 0.44605475664138794, "learning_rate": 7.361270405215457e-05, "loss": 0.20978827476501466, "step": 62200 }, { "epoch": 0.2670805320144595, "grad_norm": 0.04324696585536003, "learning_rate": 7.360839233203695e-05, "loss": 0.1520734667778015, "step": 62210 }, { "epoch": 0.2671234641044795, "grad_norm": 0.11113552004098892, "learning_rate": 7.360408061191933e-05, "loss": 0.23489885330200194, "step": 62220 }, { "epoch": 0.26716639619449956, "grad_norm": 0.17562143504619598, "learning_rate": 7.359976889180171e-05, "loss": 0.2564015626907349, "step": 62230 }, { "epoch": 0.26720932828451954, "grad_norm": 0.2895164489746094, "learning_rate": 7.359545717168408e-05, "loss": 0.17047150135040284, "step": 62240 }, { "epoch": 0.26725226037453953, "grad_norm": 0.21865420043468475, "learning_rate": 7.359114545156646e-05, "loss": 0.1545347213745117, "step": 62250 }, { "epoch": 0.2672951924645596, "grad_norm": 0.05775504559278488, "learning_rate": 7.358683373144883e-05, "loss": 0.17338919639587402, "step": 62260 }, { "epoch": 0.26733812455457956, "grad_norm": 0.0026504702400416136, "learning_rate": 7.35825220113312e-05, "loss": 0.08958525061607361, "step": 62270 }, { "epoch": 0.26738105664459955, "grad_norm": 1.8347655534744263, "learning_rate": 7.357821029121358e-05, "loss": 0.2308887481689453, "step": 62280 }, { "epoch": 0.2674239887346196, "grad_norm": 0.02356887236237526, "learning_rate": 7.357389857109596e-05, "loss": 0.046024075150489806, "step": 62290 }, { "epoch": 0.2674669208246396, "grad_norm": 2.049039840698242, "learning_rate": 7.356958685097833e-05, "loss": 0.3371156930923462, "step": 62300 }, { "epoch": 0.2675098529146596, "grad_norm": 0.5070636868476868, "learning_rate": 7.356527513086071e-05, "loss": 0.30076262950897215, "step": 62310 }, { "epoch": 0.2675527850046796, "grad_norm": 2.742283821105957, "learning_rate": 7.356096341074309e-05, "loss": 0.22705247402191162, "step": 62320 }, { "epoch": 0.2675957170946996, "grad_norm": 0.020929275080561638, "learning_rate": 7.355665169062547e-05, "loss": 0.4815894603729248, "step": 62330 }, { "epoch": 0.26763864918471963, "grad_norm": 2.0954997539520264, "learning_rate": 7.355233997050783e-05, "loss": 0.2702434778213501, "step": 62340 }, { "epoch": 0.2676815812747396, "grad_norm": 0.01475040428340435, "learning_rate": 7.354802825039021e-05, "loss": 0.10310671329498292, "step": 62350 }, { "epoch": 0.2677245133647596, "grad_norm": 0.07373770326375961, "learning_rate": 7.354371653027259e-05, "loss": 0.3095245838165283, "step": 62360 }, { "epoch": 0.26776744545477965, "grad_norm": 7.2039265632629395, "learning_rate": 7.353940481015496e-05, "loss": 0.14268449544906617, "step": 62370 }, { "epoch": 0.26781037754479964, "grad_norm": 1.4639053344726562, "learning_rate": 7.353509309003734e-05, "loss": 0.19518343210220337, "step": 62380 }, { "epoch": 0.2678533096348196, "grad_norm": 26.01327133178711, "learning_rate": 7.353078136991972e-05, "loss": 0.05920148491859436, "step": 62390 }, { "epoch": 0.26789624172483967, "grad_norm": 1.3359770774841309, "learning_rate": 7.35264696498021e-05, "loss": 0.37773053646087645, "step": 62400 }, { "epoch": 0.26793917381485965, "grad_norm": 1.264256238937378, "learning_rate": 7.352215792968447e-05, "loss": 0.3920429706573486, "step": 62410 }, { "epoch": 0.26798210590487964, "grad_norm": 0.02614930272102356, "learning_rate": 7.351784620956685e-05, "loss": 0.18842644691467286, "step": 62420 }, { "epoch": 0.2680250379948997, "grad_norm": 0.5546753406524658, "learning_rate": 7.351353448944923e-05, "loss": 0.2389291286468506, "step": 62430 }, { "epoch": 0.26806797008491967, "grad_norm": 0.9541937112808228, "learning_rate": 7.35092227693316e-05, "loss": 0.36570725440979, "step": 62440 }, { "epoch": 0.26811090217493966, "grad_norm": 3.901456594467163, "learning_rate": 7.350491104921398e-05, "loss": 0.2512011766433716, "step": 62450 }, { "epoch": 0.2681538342649597, "grad_norm": 3.098036050796509, "learning_rate": 7.350059932909636e-05, "loss": 0.2772386074066162, "step": 62460 }, { "epoch": 0.2681967663549797, "grad_norm": 0.006769151426851749, "learning_rate": 7.349628760897874e-05, "loss": 0.33778197765350343, "step": 62470 }, { "epoch": 0.2682396984449997, "grad_norm": 1.5368144512176514, "learning_rate": 7.349197588886111e-05, "loss": 0.23536946773529052, "step": 62480 }, { "epoch": 0.2682826305350197, "grad_norm": 0.004882279317826033, "learning_rate": 7.348766416874349e-05, "loss": 0.1848854899406433, "step": 62490 }, { "epoch": 0.2683255626250397, "grad_norm": 0.657514214515686, "learning_rate": 7.348335244862585e-05, "loss": 0.29116086959838866, "step": 62500 }, { "epoch": 0.2683684947150597, "grad_norm": 0.07048039138317108, "learning_rate": 7.347904072850823e-05, "loss": 0.1695142149925232, "step": 62510 }, { "epoch": 0.26841142680507973, "grad_norm": 0.012177824974060059, "learning_rate": 7.347472900839061e-05, "loss": 0.28824386596679685, "step": 62520 }, { "epoch": 0.2684543588950997, "grad_norm": 0.058346137404441833, "learning_rate": 7.347041728827299e-05, "loss": 0.26667845249176025, "step": 62530 }, { "epoch": 0.26849729098511976, "grad_norm": 0.008558766916394234, "learning_rate": 7.346610556815536e-05, "loss": 0.2703331708908081, "step": 62540 }, { "epoch": 0.26854022307513975, "grad_norm": 0.916430652141571, "learning_rate": 7.346179384803774e-05, "loss": 0.15587707757949829, "step": 62550 }, { "epoch": 0.26858315516515974, "grad_norm": 0.5451849699020386, "learning_rate": 7.345748212792012e-05, "loss": 0.25034103393554685, "step": 62560 }, { "epoch": 0.2686260872551798, "grad_norm": 2.4739830493927, "learning_rate": 7.34531704078025e-05, "loss": 0.13914070129394532, "step": 62570 }, { "epoch": 0.26866901934519977, "grad_norm": 0.006528899073600769, "learning_rate": 7.344885868768487e-05, "loss": 0.28967092037200926, "step": 62580 }, { "epoch": 0.26871195143521975, "grad_norm": 0.05252145603299141, "learning_rate": 7.344454696756724e-05, "loss": 0.32130486965179444, "step": 62590 }, { "epoch": 0.2687548835252398, "grad_norm": 3.624072313308716, "learning_rate": 7.344023524744961e-05, "loss": 0.23874349594116212, "step": 62600 }, { "epoch": 0.2687978156152598, "grad_norm": 0.6444095373153687, "learning_rate": 7.343592352733199e-05, "loss": 0.11211415529251098, "step": 62610 }, { "epoch": 0.26884074770527977, "grad_norm": 1.74681556224823, "learning_rate": 7.343161180721437e-05, "loss": 0.24570322036743164, "step": 62620 }, { "epoch": 0.2688836797952998, "grad_norm": 2.3091390132904053, "learning_rate": 7.342730008709675e-05, "loss": 0.1712045431137085, "step": 62630 }, { "epoch": 0.2689266118853198, "grad_norm": 3.986283540725708, "learning_rate": 7.342298836697912e-05, "loss": 0.3222727537155151, "step": 62640 }, { "epoch": 0.2689695439753398, "grad_norm": 2.886242389678955, "learning_rate": 7.34186766468615e-05, "loss": 0.3050569534301758, "step": 62650 }, { "epoch": 0.26901247606535983, "grad_norm": 1.565330147743225, "learning_rate": 7.341436492674388e-05, "loss": 0.12091318368911744, "step": 62660 }, { "epoch": 0.2690554081553798, "grad_norm": 2.3400821685791016, "learning_rate": 7.341005320662626e-05, "loss": 0.17482272386550904, "step": 62670 }, { "epoch": 0.2690983402453998, "grad_norm": 0.005332406144589186, "learning_rate": 7.340574148650863e-05, "loss": 0.20458731651306153, "step": 62680 }, { "epoch": 0.26914127233541985, "grad_norm": 1.2362321615219116, "learning_rate": 7.340142976639101e-05, "loss": 0.1346642255783081, "step": 62690 }, { "epoch": 0.26918420442543983, "grad_norm": 4.819741249084473, "learning_rate": 7.339711804627339e-05, "loss": 0.24375250339508056, "step": 62700 }, { "epoch": 0.2692271365154598, "grad_norm": 0.2714204788208008, "learning_rate": 7.339280632615577e-05, "loss": 0.16298720836639405, "step": 62710 }, { "epoch": 0.26927006860547986, "grad_norm": 1.7343765497207642, "learning_rate": 7.338849460603814e-05, "loss": 0.12646753787994386, "step": 62720 }, { "epoch": 0.26931300069549985, "grad_norm": 0.6741864681243896, "learning_rate": 7.338418288592052e-05, "loss": 0.18241225481033324, "step": 62730 }, { "epoch": 0.2693559327855199, "grad_norm": 0.6616902351379395, "learning_rate": 7.33798711658029e-05, "loss": 0.22634491920471192, "step": 62740 }, { "epoch": 0.2693988648755399, "grad_norm": 4.4191460609436035, "learning_rate": 7.337555944568526e-05, "loss": 0.2351382255554199, "step": 62750 }, { "epoch": 0.26944179696555987, "grad_norm": 0.004057802725583315, "learning_rate": 7.337124772556764e-05, "loss": 0.012336998432874679, "step": 62760 }, { "epoch": 0.2694847290555799, "grad_norm": 0.0004717262927442789, "learning_rate": 7.336693600545002e-05, "loss": 0.17408014535903932, "step": 62770 }, { "epoch": 0.2695276611455999, "grad_norm": 2.6368041038513184, "learning_rate": 7.336262428533239e-05, "loss": 0.26660282611846925, "step": 62780 }, { "epoch": 0.2695705932356199, "grad_norm": 0.03568703308701515, "learning_rate": 7.335831256521477e-05, "loss": 0.3751511812210083, "step": 62790 }, { "epoch": 0.2696135253256399, "grad_norm": 1.9657670259475708, "learning_rate": 7.335400084509715e-05, "loss": 0.28092188835144044, "step": 62800 }, { "epoch": 0.2696564574156599, "grad_norm": 1.9834064245224, "learning_rate": 7.334968912497953e-05, "loss": 0.1973501205444336, "step": 62810 }, { "epoch": 0.2696993895056799, "grad_norm": 11.444055557250977, "learning_rate": 7.33453774048619e-05, "loss": 0.2275404930114746, "step": 62820 }, { "epoch": 0.26974232159569994, "grad_norm": 7.298734188079834, "learning_rate": 7.334106568474427e-05, "loss": 0.14433352947235106, "step": 62830 }, { "epoch": 0.26978525368571993, "grad_norm": 5.212087631225586, "learning_rate": 7.333675396462664e-05, "loss": 0.26730144023895264, "step": 62840 }, { "epoch": 0.2698281857757399, "grad_norm": 0.022054264321923256, "learning_rate": 7.333244224450902e-05, "loss": 0.3407695770263672, "step": 62850 }, { "epoch": 0.26987111786575996, "grad_norm": 0.010150299407541752, "learning_rate": 7.33281305243914e-05, "loss": 0.14565470218658447, "step": 62860 }, { "epoch": 0.26991404995577994, "grad_norm": 0.04980747401714325, "learning_rate": 7.332381880427378e-05, "loss": 0.3148836135864258, "step": 62870 }, { "epoch": 0.26995698204579993, "grad_norm": 0.029979297891259193, "learning_rate": 7.331950708415615e-05, "loss": 0.20162365436553956, "step": 62880 }, { "epoch": 0.26999991413582, "grad_norm": 0.3095214068889618, "learning_rate": 7.331519536403853e-05, "loss": 0.07629244923591613, "step": 62890 }, { "epoch": 0.27004284622583996, "grad_norm": 0.05000022426247597, "learning_rate": 7.331088364392091e-05, "loss": 0.20231847763061522, "step": 62900 }, { "epoch": 0.27008577831585995, "grad_norm": 0.016812866553664207, "learning_rate": 7.330657192380328e-05, "loss": 0.1995375156402588, "step": 62910 }, { "epoch": 0.27012871040588, "grad_norm": 0.03599417582154274, "learning_rate": 7.330226020368566e-05, "loss": 0.09891985058784485, "step": 62920 }, { "epoch": 0.2701716424959, "grad_norm": 0.3898320496082306, "learning_rate": 7.329794848356804e-05, "loss": 0.12806140184402465, "step": 62930 }, { "epoch": 0.27021457458591996, "grad_norm": 0.009992690756917, "learning_rate": 7.329363676345042e-05, "loss": 0.12640279531478882, "step": 62940 }, { "epoch": 0.27025750667594, "grad_norm": 0.11849182844161987, "learning_rate": 7.32893250433328e-05, "loss": 0.3613492488861084, "step": 62950 }, { "epoch": 0.27030043876596, "grad_norm": 1.4423352479934692, "learning_rate": 7.328501332321517e-05, "loss": 0.4773262977600098, "step": 62960 }, { "epoch": 0.27034337085598004, "grad_norm": 0.05382090061903, "learning_rate": 7.328070160309755e-05, "loss": 0.17667585611343384, "step": 62970 }, { "epoch": 0.270386302946, "grad_norm": 0.3748687207698822, "learning_rate": 7.327638988297993e-05, "loss": 0.32606868743896483, "step": 62980 }, { "epoch": 0.27042923503602, "grad_norm": 1.5009466409683228, "learning_rate": 7.32720781628623e-05, "loss": 0.21618103981018066, "step": 62990 }, { "epoch": 0.27047216712604005, "grad_norm": 0.018100149929523468, "learning_rate": 7.326776644274467e-05, "loss": 0.2740698575973511, "step": 63000 }, { "epoch": 0.27047216712604005, "eval_loss": 0.4309654235839844, "eval_runtime": 27.4414, "eval_samples_per_second": 3.644, "eval_steps_per_second": 3.644, "step": 63000 }, { "epoch": 0.27051509921606004, "grad_norm": 0.007100074086338282, "learning_rate": 7.326345472262704e-05, "loss": 0.326142692565918, "step": 63010 }, { "epoch": 0.27055803130608, "grad_norm": 4.347993850708008, "learning_rate": 7.325914300250942e-05, "loss": 0.08076274394989014, "step": 63020 }, { "epoch": 0.27060096339610007, "grad_norm": 0.4008999466896057, "learning_rate": 7.32548312823918e-05, "loss": 0.3010993242263794, "step": 63030 }, { "epoch": 0.27064389548612006, "grad_norm": 1.1022944450378418, "learning_rate": 7.325051956227418e-05, "loss": 0.30613150596618655, "step": 63040 }, { "epoch": 0.27068682757614004, "grad_norm": 0.004065214190632105, "learning_rate": 7.324620784215655e-05, "loss": 0.08829469680786133, "step": 63050 }, { "epoch": 0.2707297596661601, "grad_norm": 1.723278522491455, "learning_rate": 7.324189612203893e-05, "loss": 0.4858261585235596, "step": 63060 }, { "epoch": 0.2707726917561801, "grad_norm": 3.401308298110962, "learning_rate": 7.323758440192131e-05, "loss": 0.40488252639770506, "step": 63070 }, { "epoch": 0.27081562384620006, "grad_norm": 0.10068821161985397, "learning_rate": 7.323327268180367e-05, "loss": 0.10350894927978516, "step": 63080 }, { "epoch": 0.2708585559362201, "grad_norm": 3.938289165496826, "learning_rate": 7.322896096168605e-05, "loss": 0.27509207725524903, "step": 63090 }, { "epoch": 0.2709014880262401, "grad_norm": 0.045174308121204376, "learning_rate": 7.322464924156843e-05, "loss": 0.2806502342224121, "step": 63100 }, { "epoch": 0.2709444201162601, "grad_norm": 0.09346527606248856, "learning_rate": 7.32203375214508e-05, "loss": 0.4489753723144531, "step": 63110 }, { "epoch": 0.2709873522062801, "grad_norm": 14.672597885131836, "learning_rate": 7.321602580133318e-05, "loss": 0.22636539936065675, "step": 63120 }, { "epoch": 0.2710302842963001, "grad_norm": 0.023005153983831406, "learning_rate": 7.321171408121556e-05, "loss": 0.3792133331298828, "step": 63130 }, { "epoch": 0.2710732163863201, "grad_norm": 0.024886643514037132, "learning_rate": 7.320740236109794e-05, "loss": 0.23559410572052003, "step": 63140 }, { "epoch": 0.27111614847634014, "grad_norm": 1.5846565961837769, "learning_rate": 7.320309064098031e-05, "loss": 0.34138927459716795, "step": 63150 }, { "epoch": 0.2711590805663601, "grad_norm": 8.62157917022705, "learning_rate": 7.319877892086269e-05, "loss": 0.150002384185791, "step": 63160 }, { "epoch": 0.27120201265638016, "grad_norm": 4.657135963439941, "learning_rate": 7.319446720074507e-05, "loss": 0.165123450756073, "step": 63170 }, { "epoch": 0.27124494474640015, "grad_norm": 0.5713585615158081, "learning_rate": 7.319015548062745e-05, "loss": 0.39691801071166993, "step": 63180 }, { "epoch": 0.27128787683642014, "grad_norm": 0.8264710903167725, "learning_rate": 7.318584376050982e-05, "loss": 0.18502925634384154, "step": 63190 }, { "epoch": 0.2713308089264402, "grad_norm": 0.016064446419477463, "learning_rate": 7.31815320403922e-05, "loss": 0.13511744737625123, "step": 63200 }, { "epoch": 0.27137374101646017, "grad_norm": 0.029205219820141792, "learning_rate": 7.317722032027458e-05, "loss": 0.0855722427368164, "step": 63210 }, { "epoch": 0.27141667310648016, "grad_norm": 0.019982269033789635, "learning_rate": 7.317290860015696e-05, "loss": 0.291228199005127, "step": 63220 }, { "epoch": 0.2714596051965002, "grad_norm": 1.64790678024292, "learning_rate": 7.316859688003933e-05, "loss": 0.33681635856628417, "step": 63230 }, { "epoch": 0.2715025372865202, "grad_norm": 1.967416524887085, "learning_rate": 7.31642851599217e-05, "loss": 0.22783217430114747, "step": 63240 }, { "epoch": 0.27154546937654017, "grad_norm": 1.4456120729446411, "learning_rate": 7.315997343980407e-05, "loss": 0.2588557004928589, "step": 63250 }, { "epoch": 0.2715884014665602, "grad_norm": 1.7478188276290894, "learning_rate": 7.315566171968645e-05, "loss": 0.33290698528289797, "step": 63260 }, { "epoch": 0.2716313335565802, "grad_norm": 0.002389519242569804, "learning_rate": 7.315134999956883e-05, "loss": 0.31861014366149903, "step": 63270 }, { "epoch": 0.2716742656466002, "grad_norm": 0.009542024694383144, "learning_rate": 7.31470382794512e-05, "loss": 0.20476877689361572, "step": 63280 }, { "epoch": 0.27171719773662023, "grad_norm": 3.3123133182525635, "learning_rate": 7.314272655933358e-05, "loss": 0.28285880088806153, "step": 63290 }, { "epoch": 0.2717601298266402, "grad_norm": 0.1270076185464859, "learning_rate": 7.313841483921596e-05, "loss": 0.0846854031085968, "step": 63300 }, { "epoch": 0.2718030619166602, "grad_norm": 1.4891453981399536, "learning_rate": 7.313410311909834e-05, "loss": 0.1929972767829895, "step": 63310 }, { "epoch": 0.27184599400668025, "grad_norm": 1.1889742612838745, "learning_rate": 7.312979139898072e-05, "loss": 0.27428336143493653, "step": 63320 }, { "epoch": 0.27188892609670023, "grad_norm": 2.5377233028411865, "learning_rate": 7.312547967886308e-05, "loss": 0.2582359313964844, "step": 63330 }, { "epoch": 0.2719318581867202, "grad_norm": 0.23425069451332092, "learning_rate": 7.312116795874546e-05, "loss": 0.14315102100372315, "step": 63340 }, { "epoch": 0.27197479027674026, "grad_norm": 3.611154317855835, "learning_rate": 7.311685623862783e-05, "loss": 0.23460731506347657, "step": 63350 }, { "epoch": 0.27201772236676025, "grad_norm": 2.1825406551361084, "learning_rate": 7.311254451851021e-05, "loss": 0.2122056007385254, "step": 63360 }, { "epoch": 0.27206065445678024, "grad_norm": 0.25567686557769775, "learning_rate": 7.310823279839259e-05, "loss": 0.22430589199066162, "step": 63370 }, { "epoch": 0.2721035865468003, "grad_norm": 1.1983752250671387, "learning_rate": 7.310392107827498e-05, "loss": 0.16110701560974122, "step": 63380 }, { "epoch": 0.27214651863682027, "grad_norm": 0.022660892456769943, "learning_rate": 7.309960935815736e-05, "loss": 0.20464136600494384, "step": 63390 }, { "epoch": 0.2721894507268403, "grad_norm": 2.540070056915283, "learning_rate": 7.309529763803973e-05, "loss": 0.3315227746963501, "step": 63400 }, { "epoch": 0.2722323828168603, "grad_norm": 3.014178514480591, "learning_rate": 7.30909859179221e-05, "loss": 0.1401577115058899, "step": 63410 }, { "epoch": 0.2722753149068803, "grad_norm": 2.405425786972046, "learning_rate": 7.308667419780448e-05, "loss": 0.5511850833892822, "step": 63420 }, { "epoch": 0.2723182469969003, "grad_norm": 0.009172724559903145, "learning_rate": 7.308236247768685e-05, "loss": 0.318274998664856, "step": 63430 }, { "epoch": 0.2723611790869203, "grad_norm": 0.16494855284690857, "learning_rate": 7.307805075756923e-05, "loss": 0.0355972558259964, "step": 63440 }, { "epoch": 0.2724041111769403, "grad_norm": 2.663872718811035, "learning_rate": 7.307373903745161e-05, "loss": 0.29265077114105226, "step": 63450 }, { "epoch": 0.27244704326696034, "grad_norm": 1.3054550886154175, "learning_rate": 7.306942731733398e-05, "loss": 0.08337785601615906, "step": 63460 }, { "epoch": 0.27248997535698033, "grad_norm": 2.61496901512146, "learning_rate": 7.306511559721636e-05, "loss": 0.23691489696502685, "step": 63470 }, { "epoch": 0.2725329074470003, "grad_norm": 0.04049558565020561, "learning_rate": 7.306080387709874e-05, "loss": 0.16273421049118042, "step": 63480 }, { "epoch": 0.27257583953702036, "grad_norm": 1.6018909215927124, "learning_rate": 7.30564921569811e-05, "loss": 0.19723182916641235, "step": 63490 }, { "epoch": 0.27261877162704035, "grad_norm": 0.0021466747857630253, "learning_rate": 7.305218043686348e-05, "loss": 0.025392404198646544, "step": 63500 }, { "epoch": 0.27266170371706033, "grad_norm": 4.236203670501709, "learning_rate": 7.304786871674586e-05, "loss": 0.16003117561340333, "step": 63510 }, { "epoch": 0.2727046358070804, "grad_norm": 0.028405891731381416, "learning_rate": 7.304355699662824e-05, "loss": 0.32063629627227785, "step": 63520 }, { "epoch": 0.27274756789710036, "grad_norm": 0.028358174487948418, "learning_rate": 7.303924527651061e-05, "loss": 0.03217737674713135, "step": 63530 }, { "epoch": 0.27279049998712035, "grad_norm": 0.0237318966537714, "learning_rate": 7.303493355639299e-05, "loss": 0.18059996366500855, "step": 63540 }, { "epoch": 0.2728334320771404, "grad_norm": 0.002409103326499462, "learning_rate": 7.303062183627537e-05, "loss": 0.09812519550323487, "step": 63550 }, { "epoch": 0.2728763641671604, "grad_norm": 0.16705992817878723, "learning_rate": 7.302631011615774e-05, "loss": 0.19015454053878783, "step": 63560 }, { "epoch": 0.27291929625718037, "grad_norm": 0.0017375649185851216, "learning_rate": 7.302199839604011e-05, "loss": 0.18906515836715698, "step": 63570 }, { "epoch": 0.2729622283472004, "grad_norm": 1.643397331237793, "learning_rate": 7.301768667592249e-05, "loss": 0.37471392154693606, "step": 63580 }, { "epoch": 0.2730051604372204, "grad_norm": 0.008646286092698574, "learning_rate": 7.301337495580486e-05, "loss": 0.38129801750183107, "step": 63590 }, { "epoch": 0.27304809252724044, "grad_norm": 3.7929863929748535, "learning_rate": 7.300906323568725e-05, "loss": 0.24006271362304688, "step": 63600 }, { "epoch": 0.2730910246172604, "grad_norm": 5.849998474121094, "learning_rate": 7.300475151556963e-05, "loss": 0.13114572763442994, "step": 63610 }, { "epoch": 0.2731339567072804, "grad_norm": 0.018327688798308372, "learning_rate": 7.300043979545201e-05, "loss": 0.08518844246864318, "step": 63620 }, { "epoch": 0.27317688879730045, "grad_norm": 0.14316414296627045, "learning_rate": 7.299612807533439e-05, "loss": 0.052723509073257444, "step": 63630 }, { "epoch": 0.27321982088732044, "grad_norm": 2.5703630447387695, "learning_rate": 7.299181635521676e-05, "loss": 0.26565487384796144, "step": 63640 }, { "epoch": 0.27326275297734043, "grad_norm": 1.4851138591766357, "learning_rate": 7.298750463509914e-05, "loss": 0.18685510158538818, "step": 63650 }, { "epoch": 0.27330568506736047, "grad_norm": 0.04608583077788353, "learning_rate": 7.29831929149815e-05, "loss": 0.11118690967559815, "step": 63660 }, { "epoch": 0.27334861715738046, "grad_norm": 0.024671832099556923, "learning_rate": 7.297888119486388e-05, "loss": 0.30791258811950684, "step": 63670 }, { "epoch": 0.27339154924740044, "grad_norm": 18.297765731811523, "learning_rate": 7.297456947474626e-05, "loss": 0.1248279333114624, "step": 63680 }, { "epoch": 0.2734344813374205, "grad_norm": 34.01502990722656, "learning_rate": 7.297025775462864e-05, "loss": 0.20545308589935302, "step": 63690 }, { "epoch": 0.2734774134274405, "grad_norm": 0.03677314892411232, "learning_rate": 7.296594603451101e-05, "loss": 0.21320362091064454, "step": 63700 }, { "epoch": 0.27352034551746046, "grad_norm": 2.0875508785247803, "learning_rate": 7.296163431439339e-05, "loss": 0.3435171604156494, "step": 63710 }, { "epoch": 0.2735632776074805, "grad_norm": 0.06830603629350662, "learning_rate": 7.295732259427577e-05, "loss": 0.17610445022583007, "step": 63720 }, { "epoch": 0.2736062096975005, "grad_norm": 0.676224410533905, "learning_rate": 7.295301087415815e-05, "loss": 0.3897466897964478, "step": 63730 }, { "epoch": 0.2736491417875205, "grad_norm": 4.646862983703613, "learning_rate": 7.294869915404051e-05, "loss": 0.28968157768249514, "step": 63740 }, { "epoch": 0.2736920738775405, "grad_norm": 0.4177786409854889, "learning_rate": 7.294438743392289e-05, "loss": 0.266361141204834, "step": 63750 }, { "epoch": 0.2737350059675605, "grad_norm": 0.4810597002506256, "learning_rate": 7.294007571380526e-05, "loss": 0.2344064950942993, "step": 63760 }, { "epoch": 0.2737779380575805, "grad_norm": 0.0033130869269371033, "learning_rate": 7.293576399368764e-05, "loss": 0.15541367530822753, "step": 63770 }, { "epoch": 0.27382087014760054, "grad_norm": 0.339785635471344, "learning_rate": 7.293145227357002e-05, "loss": 0.24946272373199463, "step": 63780 }, { "epoch": 0.2738638022376205, "grad_norm": 0.017482534050941467, "learning_rate": 7.29271405534524e-05, "loss": 0.20425059795379638, "step": 63790 }, { "epoch": 0.2739067343276405, "grad_norm": 7.477847576141357, "learning_rate": 7.292282883333477e-05, "loss": 0.14926481246948242, "step": 63800 }, { "epoch": 0.27394966641766055, "grad_norm": 0.003075790125876665, "learning_rate": 7.291851711321715e-05, "loss": 0.18997302055358886, "step": 63810 }, { "epoch": 0.27399259850768054, "grad_norm": 0.0006680086953565478, "learning_rate": 7.291420539309953e-05, "loss": 0.2547083616256714, "step": 63820 }, { "epoch": 0.2740355305977006, "grad_norm": 0.021308038383722305, "learning_rate": 7.29098936729819e-05, "loss": 0.2349924325942993, "step": 63830 }, { "epoch": 0.27407846268772057, "grad_norm": 4.248676300048828, "learning_rate": 7.290558195286428e-05, "loss": 0.4020249843597412, "step": 63840 }, { "epoch": 0.27412139477774056, "grad_norm": 0.2813398540019989, "learning_rate": 7.290127023274666e-05, "loss": 0.30169265270233153, "step": 63850 }, { "epoch": 0.2741643268677606, "grad_norm": 0.5681740641593933, "learning_rate": 7.289695851262904e-05, "loss": 0.10693715810775757, "step": 63860 }, { "epoch": 0.2742072589577806, "grad_norm": 10.923131942749023, "learning_rate": 7.289264679251141e-05, "loss": 0.15194600820541382, "step": 63870 }, { "epoch": 0.2742501910478006, "grad_norm": 0.02402135357260704, "learning_rate": 7.288833507239379e-05, "loss": 0.17886065244674682, "step": 63880 }, { "epoch": 0.2742931231378206, "grad_norm": 1.7143903970718384, "learning_rate": 7.288402335227617e-05, "loss": 0.40548253059387207, "step": 63890 }, { "epoch": 0.2743360552278406, "grad_norm": 0.28345656394958496, "learning_rate": 7.287971163215853e-05, "loss": 0.13184144496917724, "step": 63900 }, { "epoch": 0.2743789873178606, "grad_norm": 0.12064622342586517, "learning_rate": 7.287539991204091e-05, "loss": 0.30831031799316405, "step": 63910 }, { "epoch": 0.27442191940788063, "grad_norm": 1.588731050491333, "learning_rate": 7.287108819192329e-05, "loss": 0.10332268476486206, "step": 63920 }, { "epoch": 0.2744648514979006, "grad_norm": 0.0007470548735000193, "learning_rate": 7.286677647180567e-05, "loss": 0.15131561756134032, "step": 63930 }, { "epoch": 0.2745077835879206, "grad_norm": 1.6601921319961548, "learning_rate": 7.286246475168804e-05, "loss": 0.19120069742202758, "step": 63940 }, { "epoch": 0.27455071567794065, "grad_norm": 0.511339545249939, "learning_rate": 7.285815303157042e-05, "loss": 0.16427149772644042, "step": 63950 }, { "epoch": 0.27459364776796064, "grad_norm": 1.9782980680465698, "learning_rate": 7.28538413114528e-05, "loss": 0.22477269172668457, "step": 63960 }, { "epoch": 0.2746365798579806, "grad_norm": 0.11178219318389893, "learning_rate": 7.284952959133517e-05, "loss": 0.09104756116867066, "step": 63970 }, { "epoch": 0.27467951194800067, "grad_norm": 0.16948817670345306, "learning_rate": 7.284521787121754e-05, "loss": 0.3317331075668335, "step": 63980 }, { "epoch": 0.27472244403802065, "grad_norm": 0.03879360482096672, "learning_rate": 7.284090615109992e-05, "loss": 0.3537483215332031, "step": 63990 }, { "epoch": 0.27476537612804064, "grad_norm": 0.005190638825297356, "learning_rate": 7.28365944309823e-05, "loss": 0.1923106789588928, "step": 64000 }, { "epoch": 0.27476537612804064, "eval_loss": 0.4264955222606659, "eval_runtime": 27.3919, "eval_samples_per_second": 3.651, "eval_steps_per_second": 3.651, "step": 64000 }, { "epoch": 0.2748083082180607, "grad_norm": 11.412931442260742, "learning_rate": 7.283228271086467e-05, "loss": 0.1319342851638794, "step": 64010 }, { "epoch": 0.27485124030808067, "grad_norm": 2.2989232540130615, "learning_rate": 7.282797099074705e-05, "loss": 0.3625922441482544, "step": 64020 }, { "epoch": 0.2748941723981007, "grad_norm": 0.015120278112590313, "learning_rate": 7.282365927062943e-05, "loss": 0.21226191520690918, "step": 64030 }, { "epoch": 0.2749371044881207, "grad_norm": 1.596508502960205, "learning_rate": 7.28193475505118e-05, "loss": 0.2687784433364868, "step": 64040 }, { "epoch": 0.2749800365781407, "grad_norm": 0.04696401581168175, "learning_rate": 7.281503583039418e-05, "loss": 0.19433155059814453, "step": 64050 }, { "epoch": 0.2750229686681607, "grad_norm": 2.4514739513397217, "learning_rate": 7.281072411027656e-05, "loss": 0.27985870838165283, "step": 64060 }, { "epoch": 0.2750659007581807, "grad_norm": 0.027478374540805817, "learning_rate": 7.280641239015893e-05, "loss": 0.3420846462249756, "step": 64070 }, { "epoch": 0.2751088328482007, "grad_norm": 0.002903494518250227, "learning_rate": 7.280210067004131e-05, "loss": 0.0563319206237793, "step": 64080 }, { "epoch": 0.27515176493822074, "grad_norm": 1.0330356359481812, "learning_rate": 7.279778894992369e-05, "loss": 0.38048701286315917, "step": 64090 }, { "epoch": 0.27519469702824073, "grad_norm": 0.028292180970311165, "learning_rate": 7.279347722980607e-05, "loss": 0.13075207471847533, "step": 64100 }, { "epoch": 0.2752376291182607, "grad_norm": 0.10859852284193039, "learning_rate": 7.278916550968844e-05, "loss": 0.03039872646331787, "step": 64110 }, { "epoch": 0.27528056120828076, "grad_norm": 0.02786979079246521, "learning_rate": 7.278485378957082e-05, "loss": 0.1169194221496582, "step": 64120 }, { "epoch": 0.27532349329830075, "grad_norm": 16.22138023376465, "learning_rate": 7.27805420694532e-05, "loss": 0.2297053813934326, "step": 64130 }, { "epoch": 0.27536642538832073, "grad_norm": 0.013343838974833488, "learning_rate": 7.277623034933558e-05, "loss": 0.0033049676567316055, "step": 64140 }, { "epoch": 0.2754093574783408, "grad_norm": 0.0016603783005848527, "learning_rate": 7.277191862921794e-05, "loss": 0.40090460777282716, "step": 64150 }, { "epoch": 0.27545228956836076, "grad_norm": 0.025042593479156494, "learning_rate": 7.276760690910032e-05, "loss": 0.10281308889389038, "step": 64160 }, { "epoch": 0.27549522165838075, "grad_norm": 0.031472593545913696, "learning_rate": 7.27632951889827e-05, "loss": 0.05010194182395935, "step": 64170 }, { "epoch": 0.2755381537484008, "grad_norm": 7.79873514175415, "learning_rate": 7.275898346886507e-05, "loss": 0.3900416851043701, "step": 64180 }, { "epoch": 0.2755810858384208, "grad_norm": 0.182487353682518, "learning_rate": 7.275467174874745e-05, "loss": 0.2953609228134155, "step": 64190 }, { "epoch": 0.27562401792844077, "grad_norm": 2.5202181339263916, "learning_rate": 7.275036002862983e-05, "loss": 0.19363387823104858, "step": 64200 }, { "epoch": 0.2756669500184608, "grad_norm": 0.009070219472050667, "learning_rate": 7.27460483085122e-05, "loss": 0.10885969400405884, "step": 64210 }, { "epoch": 0.2757098821084808, "grad_norm": 0.16483668982982635, "learning_rate": 7.274173658839458e-05, "loss": 0.19616938829421998, "step": 64220 }, { "epoch": 0.2757528141985008, "grad_norm": 1.6876084804534912, "learning_rate": 7.273742486827695e-05, "loss": 0.3560824394226074, "step": 64230 }, { "epoch": 0.2757957462885208, "grad_norm": 0.10044686496257782, "learning_rate": 7.273311314815932e-05, "loss": 0.3663048505783081, "step": 64240 }, { "epoch": 0.2758386783785408, "grad_norm": 2.984308958053589, "learning_rate": 7.27288014280417e-05, "loss": 0.31598410606384275, "step": 64250 }, { "epoch": 0.27588161046856086, "grad_norm": 0.024099402129650116, "learning_rate": 7.272448970792408e-05, "loss": 0.3546855688095093, "step": 64260 }, { "epoch": 0.27592454255858084, "grad_norm": 0.4444213807582855, "learning_rate": 7.272017798780645e-05, "loss": 0.3234513998031616, "step": 64270 }, { "epoch": 0.27596747464860083, "grad_norm": 0.12474822998046875, "learning_rate": 7.271586626768883e-05, "loss": 0.17412568330764772, "step": 64280 }, { "epoch": 0.2760104067386209, "grad_norm": 15.606555938720703, "learning_rate": 7.271155454757121e-05, "loss": 0.2279426336288452, "step": 64290 }, { "epoch": 0.27605333882864086, "grad_norm": 0.028739456087350845, "learning_rate": 7.270724282745359e-05, "loss": 0.2794104337692261, "step": 64300 }, { "epoch": 0.27609627091866085, "grad_norm": 1.804038643836975, "learning_rate": 7.270293110733596e-05, "loss": 0.22412712574005128, "step": 64310 }, { "epoch": 0.2761392030086809, "grad_norm": 2.9447500705718994, "learning_rate": 7.269861938721834e-05, "loss": 0.239043664932251, "step": 64320 }, { "epoch": 0.2761821350987009, "grad_norm": 0.13474737107753754, "learning_rate": 7.269430766710072e-05, "loss": 0.33125255107879636, "step": 64330 }, { "epoch": 0.27622506718872086, "grad_norm": 0.08215455710887909, "learning_rate": 7.26899959469831e-05, "loss": 0.10604830980300903, "step": 64340 }, { "epoch": 0.2762679992787409, "grad_norm": 1.0933629274368286, "learning_rate": 7.268568422686547e-05, "loss": 0.22616326808929443, "step": 64350 }, { "epoch": 0.2763109313687609, "grad_norm": 1.9105830192565918, "learning_rate": 7.268137250674785e-05, "loss": 0.2156665563583374, "step": 64360 }, { "epoch": 0.2763538634587809, "grad_norm": 0.22264964878559113, "learning_rate": 7.267706078663023e-05, "loss": 0.17556583881378174, "step": 64370 }, { "epoch": 0.2763967955488009, "grad_norm": 2.2902016639709473, "learning_rate": 7.26727490665126e-05, "loss": 0.15605368614196777, "step": 64380 }, { "epoch": 0.2764397276388209, "grad_norm": 3.392604351043701, "learning_rate": 7.266843734639498e-05, "loss": 0.3492011070251465, "step": 64390 }, { "epoch": 0.2764826597288409, "grad_norm": 1.067148208618164, "learning_rate": 7.266412562627735e-05, "loss": 0.33337039947509767, "step": 64400 }, { "epoch": 0.27652559181886094, "grad_norm": 3.995007038116455, "learning_rate": 7.265981390615972e-05, "loss": 0.2956079483032227, "step": 64410 }, { "epoch": 0.2765685239088809, "grad_norm": 2.1864147186279297, "learning_rate": 7.26555021860421e-05, "loss": 0.2727602481842041, "step": 64420 }, { "epoch": 0.2766114559989009, "grad_norm": 0.38919079303741455, "learning_rate": 7.265119046592448e-05, "loss": 0.2046382188796997, "step": 64430 }, { "epoch": 0.27665438808892096, "grad_norm": 0.08920589834451675, "learning_rate": 7.264687874580686e-05, "loss": 0.009954053908586502, "step": 64440 }, { "epoch": 0.27669732017894094, "grad_norm": 0.05094486102461815, "learning_rate": 7.264256702568923e-05, "loss": 0.08167248368263244, "step": 64450 }, { "epoch": 0.276740252268961, "grad_norm": 1.2057560682296753, "learning_rate": 7.263825530557161e-05, "loss": 0.21390509605407715, "step": 64460 }, { "epoch": 0.27678318435898097, "grad_norm": 1.2782899141311646, "learning_rate": 7.263394358545399e-05, "loss": 0.27840044498443606, "step": 64470 }, { "epoch": 0.27682611644900096, "grad_norm": 0.01978636533021927, "learning_rate": 7.262963186533635e-05, "loss": 0.08450507521629333, "step": 64480 }, { "epoch": 0.276869048539021, "grad_norm": 0.07557360827922821, "learning_rate": 7.262532014521873e-05, "loss": 0.21510164737701415, "step": 64490 }, { "epoch": 0.276911980629041, "grad_norm": 1.8195322751998901, "learning_rate": 7.26210084251011e-05, "loss": 0.38956317901611326, "step": 64500 }, { "epoch": 0.276954912719061, "grad_norm": 0.05904494225978851, "learning_rate": 7.261669670498348e-05, "loss": 0.3122358798980713, "step": 64510 }, { "epoch": 0.276997844809081, "grad_norm": 4.156544208526611, "learning_rate": 7.261238498486586e-05, "loss": 0.22088017463684081, "step": 64520 }, { "epoch": 0.277040776899101, "grad_norm": 2.8702967166900635, "learning_rate": 7.260807326474824e-05, "loss": 0.1237061619758606, "step": 64530 }, { "epoch": 0.277083708989121, "grad_norm": 0.3356216549873352, "learning_rate": 7.260376154463062e-05, "loss": 0.2359468460083008, "step": 64540 }, { "epoch": 0.27712664107914103, "grad_norm": 44.1283073425293, "learning_rate": 7.259944982451299e-05, "loss": 0.40196738243103025, "step": 64550 }, { "epoch": 0.277169573169161, "grad_norm": 0.044547755271196365, "learning_rate": 7.259513810439537e-05, "loss": 0.3468801736831665, "step": 64560 }, { "epoch": 0.277212505259181, "grad_norm": 4.803623199462891, "learning_rate": 7.259082638427775e-05, "loss": 0.15955498218536376, "step": 64570 }, { "epoch": 0.27725543734920105, "grad_norm": 0.03871012479066849, "learning_rate": 7.258651466416012e-05, "loss": 0.09484660625457764, "step": 64580 }, { "epoch": 0.27729836943922104, "grad_norm": 2.620669364929199, "learning_rate": 7.25822029440425e-05, "loss": 0.25763897895812987, "step": 64590 }, { "epoch": 0.277341301529241, "grad_norm": 0.027607867494225502, "learning_rate": 7.257789122392488e-05, "loss": 0.14064944982528688, "step": 64600 }, { "epoch": 0.27738423361926107, "grad_norm": 0.013144542463123798, "learning_rate": 7.257357950380726e-05, "loss": 0.36486527919769285, "step": 64610 }, { "epoch": 0.27742716570928105, "grad_norm": 0.011130384169518948, "learning_rate": 7.256926778368963e-05, "loss": 0.15375130176544188, "step": 64620 }, { "epoch": 0.27747009779930104, "grad_norm": 0.0812644436955452, "learning_rate": 7.256495606357201e-05, "loss": 0.25952873229980467, "step": 64630 }, { "epoch": 0.2775130298893211, "grad_norm": 1.9299334287643433, "learning_rate": 7.256064434345438e-05, "loss": 0.31588876247406006, "step": 64640 }, { "epoch": 0.27755596197934107, "grad_norm": 0.5452513694763184, "learning_rate": 7.255633262333675e-05, "loss": 0.2496518611907959, "step": 64650 }, { "epoch": 0.27759889406936106, "grad_norm": 1.4004237651824951, "learning_rate": 7.255202090321913e-05, "loss": 0.2847594738006592, "step": 64660 }, { "epoch": 0.2776418261593811, "grad_norm": 0.16185790300369263, "learning_rate": 7.254770918310151e-05, "loss": 0.2840761184692383, "step": 64670 }, { "epoch": 0.2776847582494011, "grad_norm": 5.281464099884033, "learning_rate": 7.254339746298388e-05, "loss": 0.23582305908203124, "step": 64680 }, { "epoch": 0.27772769033942113, "grad_norm": 0.009189880453050137, "learning_rate": 7.253908574286626e-05, "loss": 0.2242586851119995, "step": 64690 }, { "epoch": 0.2777706224294411, "grad_norm": 0.04949837923049927, "learning_rate": 7.253477402274864e-05, "loss": 0.1539124608039856, "step": 64700 }, { "epoch": 0.2778135545194611, "grad_norm": 0.002345997141674161, "learning_rate": 7.253046230263102e-05, "loss": 0.2814460515975952, "step": 64710 }, { "epoch": 0.27785648660948115, "grad_norm": 6.816084861755371, "learning_rate": 7.25261505825134e-05, "loss": 0.247141432762146, "step": 64720 }, { "epoch": 0.27789941869950113, "grad_norm": 3.8454461097717285, "learning_rate": 7.252183886239576e-05, "loss": 0.12018647193908691, "step": 64730 }, { "epoch": 0.2779423507895211, "grad_norm": 2.1291558742523193, "learning_rate": 7.251752714227814e-05, "loss": 0.2656750202178955, "step": 64740 }, { "epoch": 0.27798528287954116, "grad_norm": 1.8832786083221436, "learning_rate": 7.251321542216051e-05, "loss": 0.40599308013916013, "step": 64750 }, { "epoch": 0.27802821496956115, "grad_norm": 1.8022648096084595, "learning_rate": 7.250890370204289e-05, "loss": 0.3666477918624878, "step": 64760 }, { "epoch": 0.27807114705958114, "grad_norm": 4.709822654724121, "learning_rate": 7.250459198192527e-05, "loss": 0.2990487813949585, "step": 64770 }, { "epoch": 0.2781140791496012, "grad_norm": 1.3807032108306885, "learning_rate": 7.250028026180766e-05, "loss": 0.3683363437652588, "step": 64780 }, { "epoch": 0.27815701123962117, "grad_norm": 0.8225862979888916, "learning_rate": 7.249596854169004e-05, "loss": 0.29018213748931887, "step": 64790 }, { "epoch": 0.27819994332964115, "grad_norm": 0.14179366827011108, "learning_rate": 7.249165682157241e-05, "loss": 0.4528830051422119, "step": 64800 }, { "epoch": 0.2782428754196612, "grad_norm": 0.004524201154708862, "learning_rate": 7.248734510145478e-05, "loss": 0.08117425441741943, "step": 64810 }, { "epoch": 0.2782858075096812, "grad_norm": 1.0559335947036743, "learning_rate": 7.248303338133715e-05, "loss": 0.366524338722229, "step": 64820 }, { "epoch": 0.27832873959970117, "grad_norm": 2.1862030029296875, "learning_rate": 7.247872166121953e-05, "loss": 0.4338692188262939, "step": 64830 }, { "epoch": 0.2783716716897212, "grad_norm": 1.6690665483474731, "learning_rate": 7.247440994110191e-05, "loss": 0.3967538833618164, "step": 64840 }, { "epoch": 0.2784146037797412, "grad_norm": 0.0165514275431633, "learning_rate": 7.247009822098429e-05, "loss": 0.14806462526321412, "step": 64850 }, { "epoch": 0.2784575358697612, "grad_norm": 0.47653165459632874, "learning_rate": 7.246578650086666e-05, "loss": 0.176730740070343, "step": 64860 }, { "epoch": 0.27850046795978123, "grad_norm": 0.2633458077907562, "learning_rate": 7.246147478074904e-05, "loss": 0.191974937915802, "step": 64870 }, { "epoch": 0.2785434000498012, "grad_norm": 0.10138729214668274, "learning_rate": 7.245716306063142e-05, "loss": 0.2707113742828369, "step": 64880 }, { "epoch": 0.27858633213982126, "grad_norm": 0.013255268335342407, "learning_rate": 7.245285134051378e-05, "loss": 0.03636242747306824, "step": 64890 }, { "epoch": 0.27862926422984124, "grad_norm": 0.002024088054895401, "learning_rate": 7.244853962039616e-05, "loss": 0.14607818126678468, "step": 64900 }, { "epoch": 0.27867219631986123, "grad_norm": 0.0680466890335083, "learning_rate": 7.244422790027854e-05, "loss": 0.2507662773132324, "step": 64910 }, { "epoch": 0.2787151284098813, "grad_norm": 0.5838708281517029, "learning_rate": 7.243991618016091e-05, "loss": 0.3808858394622803, "step": 64920 }, { "epoch": 0.27875806049990126, "grad_norm": 0.08791586011648178, "learning_rate": 7.243560446004329e-05, "loss": 0.31563124656677244, "step": 64930 }, { "epoch": 0.27880099258992125, "grad_norm": 4.641262531280518, "learning_rate": 7.243129273992567e-05, "loss": 0.10856708288192748, "step": 64940 }, { "epoch": 0.2788439246799413, "grad_norm": 3.5792038440704346, "learning_rate": 7.242698101980805e-05, "loss": 0.19974257946014404, "step": 64950 }, { "epoch": 0.2788868567699613, "grad_norm": 0.45726874470710754, "learning_rate": 7.242266929969042e-05, "loss": 0.39138312339782716, "step": 64960 }, { "epoch": 0.27892978885998126, "grad_norm": 26.862560272216797, "learning_rate": 7.241835757957279e-05, "loss": 0.31828885078430175, "step": 64970 }, { "epoch": 0.2789727209500013, "grad_norm": 2.756909132003784, "learning_rate": 7.241404585945516e-05, "loss": 0.2442950963973999, "step": 64980 }, { "epoch": 0.2790156530400213, "grad_norm": 0.04665480926632881, "learning_rate": 7.240973413933754e-05, "loss": 0.3142120838165283, "step": 64990 }, { "epoch": 0.2790585851300413, "grad_norm": 1.2649515867233276, "learning_rate": 7.240542241921993e-05, "loss": 0.3911418914794922, "step": 65000 }, { "epoch": 0.2790585851300413, "eval_loss": 0.4333057403564453, "eval_runtime": 27.4187, "eval_samples_per_second": 3.647, "eval_steps_per_second": 3.647, "step": 65000 }, { "epoch": 0.2791015172200613, "grad_norm": 2.0587477684020996, "learning_rate": 7.240111069910231e-05, "loss": 0.3389150142669678, "step": 65010 }, { "epoch": 0.2791444493100813, "grad_norm": 3.1368260383605957, "learning_rate": 7.239679897898469e-05, "loss": 0.2532327651977539, "step": 65020 }, { "epoch": 0.2791873814001013, "grad_norm": 0.43125808238983154, "learning_rate": 7.239248725886706e-05, "loss": 0.1379924535751343, "step": 65030 }, { "epoch": 0.27923031349012134, "grad_norm": 1.2250033617019653, "learning_rate": 7.238817553874944e-05, "loss": 0.17046927213668822, "step": 65040 }, { "epoch": 0.2792732455801413, "grad_norm": 0.06205086410045624, "learning_rate": 7.23838638186318e-05, "loss": 0.1221045970916748, "step": 65050 }, { "epoch": 0.2793161776701613, "grad_norm": 4.098349571228027, "learning_rate": 7.237955209851418e-05, "loss": 0.350847053527832, "step": 65060 }, { "epoch": 0.27935910976018136, "grad_norm": 0.5678528547286987, "learning_rate": 7.237524037839656e-05, "loss": 0.3726787567138672, "step": 65070 }, { "epoch": 0.27940204185020134, "grad_norm": 0.13252170383930206, "learning_rate": 7.237092865827894e-05, "loss": 0.2217256784439087, "step": 65080 }, { "epoch": 0.27944497394022133, "grad_norm": 15.361329078674316, "learning_rate": 7.236661693816132e-05, "loss": 0.2631575345993042, "step": 65090 }, { "epoch": 0.2794879060302414, "grad_norm": 0.052907731384038925, "learning_rate": 7.236230521804369e-05, "loss": 0.06527657508850097, "step": 65100 }, { "epoch": 0.27953083812026136, "grad_norm": 0.037164174020290375, "learning_rate": 7.235799349792607e-05, "loss": 0.25826058387756345, "step": 65110 }, { "epoch": 0.2795737702102814, "grad_norm": 0.03161971643567085, "learning_rate": 7.235368177780845e-05, "loss": 0.21099441051483153, "step": 65120 }, { "epoch": 0.2796167023003014, "grad_norm": 1.0425679683685303, "learning_rate": 7.234937005769082e-05, "loss": 0.203094482421875, "step": 65130 }, { "epoch": 0.2796596343903214, "grad_norm": 0.013792422600090504, "learning_rate": 7.234505833757319e-05, "loss": 0.38997640609741213, "step": 65140 }, { "epoch": 0.2797025664803414, "grad_norm": 0.2896706759929657, "learning_rate": 7.234074661745557e-05, "loss": 0.09263712167739868, "step": 65150 }, { "epoch": 0.2797454985703614, "grad_norm": 4.325671672821045, "learning_rate": 7.233643489733794e-05, "loss": 0.42386541366577146, "step": 65160 }, { "epoch": 0.2797884306603814, "grad_norm": 0.24621273577213287, "learning_rate": 7.233212317722032e-05, "loss": 0.16344807147979737, "step": 65170 }, { "epoch": 0.27983136275040144, "grad_norm": 1.3230857849121094, "learning_rate": 7.23278114571027e-05, "loss": 0.010197050869464874, "step": 65180 }, { "epoch": 0.2798742948404214, "grad_norm": 0.006933805998414755, "learning_rate": 7.232349973698508e-05, "loss": 0.22311086654663087, "step": 65190 }, { "epoch": 0.2799172269304414, "grad_norm": 0.058281030505895615, "learning_rate": 7.231918801686745e-05, "loss": 0.2943753719329834, "step": 65200 }, { "epoch": 0.27996015902046145, "grad_norm": 3.4062752723693848, "learning_rate": 7.231487629674983e-05, "loss": 0.3292430877685547, "step": 65210 }, { "epoch": 0.28000309111048144, "grad_norm": 0.09572996944189072, "learning_rate": 7.231056457663221e-05, "loss": 0.21628963947296143, "step": 65220 }, { "epoch": 0.2800460232005014, "grad_norm": 1.1184502840042114, "learning_rate": 7.230625285651458e-05, "loss": 0.23993027210235596, "step": 65230 }, { "epoch": 0.28008895529052147, "grad_norm": 0.04423844814300537, "learning_rate": 7.230194113639696e-05, "loss": 0.24691359996795653, "step": 65240 }, { "epoch": 0.28013188738054146, "grad_norm": 0.10323113948106766, "learning_rate": 7.229762941627934e-05, "loss": 0.17943016290664673, "step": 65250 }, { "epoch": 0.28017481947056144, "grad_norm": 0.00937010906636715, "learning_rate": 7.229331769616172e-05, "loss": 0.3377179384231567, "step": 65260 }, { "epoch": 0.2802177515605815, "grad_norm": 0.0016765515320003033, "learning_rate": 7.22890059760441e-05, "loss": 0.32412896156311033, "step": 65270 }, { "epoch": 0.28026068365060147, "grad_norm": 0.14862895011901855, "learning_rate": 7.228469425592647e-05, "loss": 0.23623664379119874, "step": 65280 }, { "epoch": 0.28030361574062146, "grad_norm": 1.2214627265930176, "learning_rate": 7.228038253580885e-05, "loss": 0.23559386730194093, "step": 65290 }, { "epoch": 0.2803465478306415, "grad_norm": 1.4163013696670532, "learning_rate": 7.227607081569121e-05, "loss": 0.10575599670410156, "step": 65300 }, { "epoch": 0.2803894799206615, "grad_norm": 0.042076561599969864, "learning_rate": 7.227175909557359e-05, "loss": 0.2040954351425171, "step": 65310 }, { "epoch": 0.28043241201068153, "grad_norm": 0.006046614143997431, "learning_rate": 7.226744737545597e-05, "loss": 0.2361140012741089, "step": 65320 }, { "epoch": 0.2804753441007015, "grad_norm": 2.3810617923736572, "learning_rate": 7.226313565533834e-05, "loss": 0.20760879516601563, "step": 65330 }, { "epoch": 0.2805182761907215, "grad_norm": 0.013670995831489563, "learning_rate": 7.225882393522072e-05, "loss": 0.4020346164703369, "step": 65340 }, { "epoch": 0.28056120828074155, "grad_norm": 0.6690922379493713, "learning_rate": 7.22545122151031e-05, "loss": 0.29248411655426027, "step": 65350 }, { "epoch": 0.28060414037076153, "grad_norm": 1.0356889963150024, "learning_rate": 7.225020049498548e-05, "loss": 0.41963953971862794, "step": 65360 }, { "epoch": 0.2806470724607815, "grad_norm": 0.17851421236991882, "learning_rate": 7.224588877486785e-05, "loss": 0.2683266639709473, "step": 65370 }, { "epoch": 0.28069000455080156, "grad_norm": 0.002332353964447975, "learning_rate": 7.224157705475022e-05, "loss": 0.1336469292640686, "step": 65380 }, { "epoch": 0.28073293664082155, "grad_norm": 1.1590781211853027, "learning_rate": 7.22372653346326e-05, "loss": 0.5141021251678467, "step": 65390 }, { "epoch": 0.28077586873084154, "grad_norm": 0.9057397842407227, "learning_rate": 7.223295361451497e-05, "loss": 0.19209452867507934, "step": 65400 }, { "epoch": 0.2808188008208616, "grad_norm": 0.00840451568365097, "learning_rate": 7.222864189439735e-05, "loss": 0.1584153175354004, "step": 65410 }, { "epoch": 0.28086173291088157, "grad_norm": 2.096689224243164, "learning_rate": 7.222433017427973e-05, "loss": 0.26500711441040037, "step": 65420 }, { "epoch": 0.28090466500090155, "grad_norm": 0.008882677182555199, "learning_rate": 7.22200184541621e-05, "loss": 0.28088667392730715, "step": 65430 }, { "epoch": 0.2809475970909216, "grad_norm": 0.6018815636634827, "learning_rate": 7.221570673404448e-05, "loss": 0.1276005268096924, "step": 65440 }, { "epoch": 0.2809905291809416, "grad_norm": 5.839265823364258, "learning_rate": 7.221139501392686e-05, "loss": 0.2710998773574829, "step": 65450 }, { "epoch": 0.28103346127096157, "grad_norm": 1.5453418493270874, "learning_rate": 7.220708329380924e-05, "loss": 0.3031306266784668, "step": 65460 }, { "epoch": 0.2810763933609816, "grad_norm": 0.0062354025430977345, "learning_rate": 7.220277157369161e-05, "loss": 0.13311941623687745, "step": 65470 }, { "epoch": 0.2811193254510016, "grad_norm": 0.000264729984337464, "learning_rate": 7.219845985357399e-05, "loss": 0.2866328954696655, "step": 65480 }, { "epoch": 0.2811622575410216, "grad_norm": 0.12876111268997192, "learning_rate": 7.219414813345637e-05, "loss": 0.21349844932556153, "step": 65490 }, { "epoch": 0.28120518963104163, "grad_norm": 0.243903249502182, "learning_rate": 7.218983641333875e-05, "loss": 0.15504903793334962, "step": 65500 }, { "epoch": 0.2812481217210616, "grad_norm": 0.019903426989912987, "learning_rate": 7.218552469322112e-05, "loss": 0.19683150053024293, "step": 65510 }, { "epoch": 0.2812910538110816, "grad_norm": 0.027894608676433563, "learning_rate": 7.21812129731035e-05, "loss": 0.15850646495819093, "step": 65520 }, { "epoch": 0.28133398590110165, "grad_norm": 0.2418050765991211, "learning_rate": 7.217690125298588e-05, "loss": 0.2152719497680664, "step": 65530 }, { "epoch": 0.28137691799112163, "grad_norm": 0.0038344464264810085, "learning_rate": 7.217258953286826e-05, "loss": 0.2340897798538208, "step": 65540 }, { "epoch": 0.2814198500811417, "grad_norm": 1.888227939605713, "learning_rate": 7.216827781275062e-05, "loss": 0.18425893783569336, "step": 65550 }, { "epoch": 0.28146278217116166, "grad_norm": 0.010996916331350803, "learning_rate": 7.2163966092633e-05, "loss": 0.13371667861938477, "step": 65560 }, { "epoch": 0.28150571426118165, "grad_norm": 0.4016614258289337, "learning_rate": 7.215965437251537e-05, "loss": 0.3497531652450562, "step": 65570 }, { "epoch": 0.2815486463512017, "grad_norm": 0.10766914486885071, "learning_rate": 7.215534265239775e-05, "loss": 0.0871292769908905, "step": 65580 }, { "epoch": 0.2815915784412217, "grad_norm": 1.6982859373092651, "learning_rate": 7.215103093228013e-05, "loss": 0.38111917972564696, "step": 65590 }, { "epoch": 0.28163451053124167, "grad_norm": 0.00048144563334062696, "learning_rate": 7.21467192121625e-05, "loss": 0.02695021629333496, "step": 65600 }, { "epoch": 0.2816774426212617, "grad_norm": 0.06240568682551384, "learning_rate": 7.214240749204488e-05, "loss": 0.1083644151687622, "step": 65610 }, { "epoch": 0.2817203747112817, "grad_norm": 0.0006455762195400894, "learning_rate": 7.213809577192726e-05, "loss": 0.011432316899299622, "step": 65620 }, { "epoch": 0.2817633068013017, "grad_norm": 0.010076041333377361, "learning_rate": 7.213378405180962e-05, "loss": 0.1570556640625, "step": 65630 }, { "epoch": 0.2818062388913217, "grad_norm": 1.5382864475250244, "learning_rate": 7.2129472331692e-05, "loss": 0.21324005126953124, "step": 65640 }, { "epoch": 0.2818491709813417, "grad_norm": 1.173854947090149, "learning_rate": 7.212516061157438e-05, "loss": 0.18811701536178588, "step": 65650 }, { "epoch": 0.2818921030713617, "grad_norm": 0.16060331463813782, "learning_rate": 7.212084889145676e-05, "loss": 0.25907483100891116, "step": 65660 }, { "epoch": 0.28193503516138174, "grad_norm": 0.09696793556213379, "learning_rate": 7.211653717133913e-05, "loss": 0.5335704803466796, "step": 65670 }, { "epoch": 0.28197796725140173, "grad_norm": 3.50130558013916, "learning_rate": 7.211222545122151e-05, "loss": 0.32176215648651124, "step": 65680 }, { "epoch": 0.2820208993414217, "grad_norm": 0.09307514131069183, "learning_rate": 7.210791373110389e-05, "loss": 0.37757627964019774, "step": 65690 }, { "epoch": 0.28206383143144176, "grad_norm": 0.23182889819145203, "learning_rate": 7.210360201098627e-05, "loss": 0.30493721961975095, "step": 65700 }, { "epoch": 0.28210676352146175, "grad_norm": 0.06591078639030457, "learning_rate": 7.209929029086864e-05, "loss": 0.20234324932098388, "step": 65710 }, { "epoch": 0.28214969561148173, "grad_norm": 0.02225450985133648, "learning_rate": 7.209497857075102e-05, "loss": 0.35156002044677737, "step": 65720 }, { "epoch": 0.2821926277015018, "grad_norm": 1.354718804359436, "learning_rate": 7.20906668506334e-05, "loss": 0.1738152265548706, "step": 65730 }, { "epoch": 0.28223555979152176, "grad_norm": 3.91957426071167, "learning_rate": 7.208635513051577e-05, "loss": 0.3712831974029541, "step": 65740 }, { "epoch": 0.2822784918815418, "grad_norm": 0.14563311636447906, "learning_rate": 7.208204341039815e-05, "loss": 0.19410089254379273, "step": 65750 }, { "epoch": 0.2823214239715618, "grad_norm": 0.3047499358654022, "learning_rate": 7.207773169028053e-05, "loss": 0.25509042739868165, "step": 65760 }, { "epoch": 0.2823643560615818, "grad_norm": 1.5158109664916992, "learning_rate": 7.207341997016291e-05, "loss": 0.09974059462547302, "step": 65770 }, { "epoch": 0.2824072881516018, "grad_norm": 0.09712890535593033, "learning_rate": 7.206910825004528e-05, "loss": 0.15192004442214965, "step": 65780 }, { "epoch": 0.2824502202416218, "grad_norm": 1.342818021774292, "learning_rate": 7.206479652992765e-05, "loss": 0.19996538162231445, "step": 65790 }, { "epoch": 0.2824931523316418, "grad_norm": 1.0634775161743164, "learning_rate": 7.206048480981003e-05, "loss": 0.310014009475708, "step": 65800 }, { "epoch": 0.28253608442166184, "grad_norm": 4.057530879974365, "learning_rate": 7.20561730896924e-05, "loss": 0.40732660293579104, "step": 65810 }, { "epoch": 0.2825790165116818, "grad_norm": 1.2841671705245972, "learning_rate": 7.205186136957478e-05, "loss": 0.30788025856018064, "step": 65820 }, { "epoch": 0.2826219486017018, "grad_norm": 0.039597101509571075, "learning_rate": 7.204754964945716e-05, "loss": 0.3503837823867798, "step": 65830 }, { "epoch": 0.28266488069172185, "grad_norm": 3.8208327293395996, "learning_rate": 7.204323792933953e-05, "loss": 0.07890766263008117, "step": 65840 }, { "epoch": 0.28270781278174184, "grad_norm": 0.004489063750952482, "learning_rate": 7.203892620922191e-05, "loss": 0.23929812908172607, "step": 65850 }, { "epoch": 0.2827507448717618, "grad_norm": 0.13525481522083282, "learning_rate": 7.203461448910429e-05, "loss": 0.1935684561729431, "step": 65860 }, { "epoch": 0.28279367696178187, "grad_norm": 0.10572947561740875, "learning_rate": 7.203030276898667e-05, "loss": 0.45615386962890625, "step": 65870 }, { "epoch": 0.28283660905180186, "grad_norm": 10.497499465942383, "learning_rate": 7.202599104886903e-05, "loss": 0.22629842758178711, "step": 65880 }, { "epoch": 0.28287954114182184, "grad_norm": 1.8166733980178833, "learning_rate": 7.202167932875141e-05, "loss": 0.37073335647583006, "step": 65890 }, { "epoch": 0.2829224732318419, "grad_norm": 1.114739179611206, "learning_rate": 7.201736760863379e-05, "loss": 0.16271429061889647, "step": 65900 }, { "epoch": 0.2829654053218619, "grad_norm": 3.9710497856140137, "learning_rate": 7.201305588851616e-05, "loss": 0.17485971450805665, "step": 65910 }, { "epoch": 0.28300833741188186, "grad_norm": 2.7692909240722656, "learning_rate": 7.200874416839854e-05, "loss": 0.3034187078475952, "step": 65920 }, { "epoch": 0.2830512695019019, "grad_norm": 0.754224419593811, "learning_rate": 7.200443244828092e-05, "loss": 0.39126248359680177, "step": 65930 }, { "epoch": 0.2830942015919219, "grad_norm": 2.726775884628296, "learning_rate": 7.20001207281633e-05, "loss": 0.3949368953704834, "step": 65940 }, { "epoch": 0.2831371336819419, "grad_norm": 0.03907603397965431, "learning_rate": 7.199580900804567e-05, "loss": 0.3045235872268677, "step": 65950 }, { "epoch": 0.2831800657719619, "grad_norm": 0.15216980874538422, "learning_rate": 7.199149728792805e-05, "loss": 0.24437999725341797, "step": 65960 }, { "epoch": 0.2832229978619819, "grad_norm": 0.5572879314422607, "learning_rate": 7.198718556781043e-05, "loss": 0.21077256202697753, "step": 65970 }, { "epoch": 0.28326592995200195, "grad_norm": 2.963178873062134, "learning_rate": 7.19828738476928e-05, "loss": 0.451598072052002, "step": 65980 }, { "epoch": 0.28330886204202194, "grad_norm": 0.026027904823422432, "learning_rate": 7.197856212757518e-05, "loss": 0.2929235935211182, "step": 65990 }, { "epoch": 0.2833517941320419, "grad_norm": 0.835507333278656, "learning_rate": 7.197425040745756e-05, "loss": 0.36316940784454343, "step": 66000 }, { "epoch": 0.2833517941320419, "eval_loss": 0.44882506132125854, "eval_runtime": 27.4625, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 66000 }, { "epoch": 0.28339472622206197, "grad_norm": 0.22148838639259338, "learning_rate": 7.196993868733994e-05, "loss": 0.2025505542755127, "step": 66010 }, { "epoch": 0.28343765831208195, "grad_norm": 0.04498464986681938, "learning_rate": 7.196562696722231e-05, "loss": 0.09973494410514831, "step": 66020 }, { "epoch": 0.28348059040210194, "grad_norm": 0.04255395382642746, "learning_rate": 7.196131524710469e-05, "loss": 0.27003774642944334, "step": 66030 }, { "epoch": 0.283523522492122, "grad_norm": 1.8950724601745605, "learning_rate": 7.195700352698705e-05, "loss": 0.15587079524993896, "step": 66040 }, { "epoch": 0.28356645458214197, "grad_norm": 1.7096226215362549, "learning_rate": 7.195269180686943e-05, "loss": 0.1669179916381836, "step": 66050 }, { "epoch": 0.28360938667216196, "grad_norm": 2.672077178955078, "learning_rate": 7.194838008675181e-05, "loss": 0.29187939167022703, "step": 66060 }, { "epoch": 0.283652318762182, "grad_norm": 0.11574865877628326, "learning_rate": 7.194406836663419e-05, "loss": 0.2783296346664429, "step": 66070 }, { "epoch": 0.283695250852202, "grad_norm": 0.25796473026275635, "learning_rate": 7.193975664651656e-05, "loss": 0.2895475149154663, "step": 66080 }, { "epoch": 0.283738182942222, "grad_norm": 0.04511742293834686, "learning_rate": 7.193544492639894e-05, "loss": 0.12669681310653685, "step": 66090 }, { "epoch": 0.283781115032242, "grad_norm": 0.29782024025917053, "learning_rate": 7.193113320628132e-05, "loss": 0.13074166774749757, "step": 66100 }, { "epoch": 0.283824047122262, "grad_norm": 0.03458978608250618, "learning_rate": 7.19268214861637e-05, "loss": 0.27550182342529295, "step": 66110 }, { "epoch": 0.283866979212282, "grad_norm": 1.4848647117614746, "learning_rate": 7.192250976604606e-05, "loss": 0.21677308082580565, "step": 66120 }, { "epoch": 0.28390991130230203, "grad_norm": 0.0015100378077477217, "learning_rate": 7.191819804592844e-05, "loss": 0.291225266456604, "step": 66130 }, { "epoch": 0.283952843392322, "grad_norm": 0.002405498642474413, "learning_rate": 7.191388632581081e-05, "loss": 0.2970550060272217, "step": 66140 }, { "epoch": 0.283995775482342, "grad_norm": 2.132266044616699, "learning_rate": 7.190957460569319e-05, "loss": 0.2530239105224609, "step": 66150 }, { "epoch": 0.28403870757236205, "grad_norm": 0.8479958772659302, "learning_rate": 7.190526288557557e-05, "loss": 0.14648141860961914, "step": 66160 }, { "epoch": 0.28408163966238204, "grad_norm": 1.6402133703231812, "learning_rate": 7.190095116545795e-05, "loss": 0.26374919414520265, "step": 66170 }, { "epoch": 0.2841245717524021, "grad_norm": 0.11759825795888901, "learning_rate": 7.189663944534032e-05, "loss": 0.25805234909057617, "step": 66180 }, { "epoch": 0.28416750384242206, "grad_norm": 0.018354952335357666, "learning_rate": 7.189232772522271e-05, "loss": 0.3803222417831421, "step": 66190 }, { "epoch": 0.28421043593244205, "grad_norm": 3.1105453968048096, "learning_rate": 7.188801600510509e-05, "loss": 0.35515432357788085, "step": 66200 }, { "epoch": 0.2842533680224621, "grad_norm": 0.3357342481613159, "learning_rate": 7.188370428498746e-05, "loss": 0.06009315848350525, "step": 66210 }, { "epoch": 0.2842963001124821, "grad_norm": 0.053794365376234055, "learning_rate": 7.187939256486983e-05, "loss": 0.13056585788726807, "step": 66220 }, { "epoch": 0.28433923220250207, "grad_norm": 0.7919422388076782, "learning_rate": 7.187508084475221e-05, "loss": 0.1529833197593689, "step": 66230 }, { "epoch": 0.2843821642925221, "grad_norm": 1.3930153846740723, "learning_rate": 7.187076912463459e-05, "loss": 0.230507230758667, "step": 66240 }, { "epoch": 0.2844250963825421, "grad_norm": 0.05847423896193504, "learning_rate": 7.186645740451697e-05, "loss": 0.03660984933376312, "step": 66250 }, { "epoch": 0.2844680284725621, "grad_norm": 2.146456718444824, "learning_rate": 7.186214568439934e-05, "loss": 0.10765037536621094, "step": 66260 }, { "epoch": 0.2845109605625821, "grad_norm": 1.673628807067871, "learning_rate": 7.185783396428172e-05, "loss": 0.3041319131851196, "step": 66270 }, { "epoch": 0.2845538926526021, "grad_norm": 0.039230331778526306, "learning_rate": 7.18535222441641e-05, "loss": 0.210577654838562, "step": 66280 }, { "epoch": 0.2845968247426221, "grad_norm": 0.006200359668582678, "learning_rate": 7.184921052404646e-05, "loss": 0.23379092216491698, "step": 66290 }, { "epoch": 0.28463975683264214, "grad_norm": 1.6170132160186768, "learning_rate": 7.184489880392884e-05, "loss": 0.2834723949432373, "step": 66300 }, { "epoch": 0.28468268892266213, "grad_norm": 1.9372655153274536, "learning_rate": 7.184058708381122e-05, "loss": 0.31765921115875245, "step": 66310 }, { "epoch": 0.2847256210126821, "grad_norm": 2.323282241821289, "learning_rate": 7.183627536369359e-05, "loss": 0.2632134199142456, "step": 66320 }, { "epoch": 0.28476855310270216, "grad_norm": 8.279928207397461, "learning_rate": 7.183196364357597e-05, "loss": 0.383182692527771, "step": 66330 }, { "epoch": 0.28481148519272215, "grad_norm": 1.6048333644866943, "learning_rate": 7.182765192345835e-05, "loss": 0.09258521795272827, "step": 66340 }, { "epoch": 0.28485441728274213, "grad_norm": 3.5633950233459473, "learning_rate": 7.182334020334072e-05, "loss": 0.3062487363815308, "step": 66350 }, { "epoch": 0.2848973493727622, "grad_norm": 0.0061171273700892925, "learning_rate": 7.18190284832231e-05, "loss": 0.29340639114379885, "step": 66360 }, { "epoch": 0.28494028146278216, "grad_norm": 0.019328560680150986, "learning_rate": 7.181471676310547e-05, "loss": 0.40865530967712405, "step": 66370 }, { "epoch": 0.28498321355280215, "grad_norm": 1.3364512920379639, "learning_rate": 7.181040504298784e-05, "loss": 0.3623195171356201, "step": 66380 }, { "epoch": 0.2850261456428222, "grad_norm": 0.02536897547543049, "learning_rate": 7.180609332287022e-05, "loss": 0.24478592872619628, "step": 66390 }, { "epoch": 0.2850690777328422, "grad_norm": 0.009088271297514439, "learning_rate": 7.18017816027526e-05, "loss": 0.1288319706916809, "step": 66400 }, { "epoch": 0.2851120098228622, "grad_norm": 0.004414925817400217, "learning_rate": 7.179746988263499e-05, "loss": 0.1057388424873352, "step": 66410 }, { "epoch": 0.2851549419128822, "grad_norm": 5.123507499694824, "learning_rate": 7.179315816251737e-05, "loss": 0.37679688930511473, "step": 66420 }, { "epoch": 0.2851978740029022, "grad_norm": 0.06788595765829086, "learning_rate": 7.178884644239974e-05, "loss": 0.09671139717102051, "step": 66430 }, { "epoch": 0.28524080609292224, "grad_norm": 0.06418441236019135, "learning_rate": 7.178453472228212e-05, "loss": 0.09671286940574646, "step": 66440 }, { "epoch": 0.2852837381829422, "grad_norm": 3.502713680267334, "learning_rate": 7.178022300216448e-05, "loss": 0.2851149559020996, "step": 66450 }, { "epoch": 0.2853266702729622, "grad_norm": 0.14976099133491516, "learning_rate": 7.177591128204686e-05, "loss": 0.17432072162628173, "step": 66460 }, { "epoch": 0.28536960236298226, "grad_norm": 0.05505011975765228, "learning_rate": 7.177159956192924e-05, "loss": 0.02504686117172241, "step": 66470 }, { "epoch": 0.28541253445300224, "grad_norm": 2.7165024280548096, "learning_rate": 7.176728784181162e-05, "loss": 0.3118079662322998, "step": 66480 }, { "epoch": 0.28545546654302223, "grad_norm": 0.00147499970626086, "learning_rate": 7.1762976121694e-05, "loss": 0.2843587398529053, "step": 66490 }, { "epoch": 0.28549839863304227, "grad_norm": 0.2036382257938385, "learning_rate": 7.175866440157637e-05, "loss": 0.03493208289146423, "step": 66500 }, { "epoch": 0.28554133072306226, "grad_norm": 5.055944442749023, "learning_rate": 7.175435268145875e-05, "loss": 0.17728381156921386, "step": 66510 }, { "epoch": 0.28558426281308225, "grad_norm": 1.751897931098938, "learning_rate": 7.175004096134113e-05, "loss": 0.30946590900421145, "step": 66520 }, { "epoch": 0.2856271949031023, "grad_norm": 0.10786402225494385, "learning_rate": 7.174572924122349e-05, "loss": 0.07910744547843933, "step": 66530 }, { "epoch": 0.2856701269931223, "grad_norm": 2.8931570053100586, "learning_rate": 7.174141752110587e-05, "loss": 0.37531414031982424, "step": 66540 }, { "epoch": 0.28571305908314226, "grad_norm": 0.021041302010416985, "learning_rate": 7.173710580098824e-05, "loss": 0.25701830387115476, "step": 66550 }, { "epoch": 0.2857559911731623, "grad_norm": 0.1704130321741104, "learning_rate": 7.173279408087062e-05, "loss": 0.33431432247161863, "step": 66560 }, { "epoch": 0.2857989232631823, "grad_norm": 0.004376660101115704, "learning_rate": 7.1728482360753e-05, "loss": 0.1233670949935913, "step": 66570 }, { "epoch": 0.2858418553532023, "grad_norm": 21.381446838378906, "learning_rate": 7.172417064063538e-05, "loss": 0.27247138023376466, "step": 66580 }, { "epoch": 0.2858847874432223, "grad_norm": 3.642610549926758, "learning_rate": 7.171985892051775e-05, "loss": 0.46688222885131836, "step": 66590 }, { "epoch": 0.2859277195332423, "grad_norm": 1.3897994756698608, "learning_rate": 7.171554720040013e-05, "loss": 0.2915907621383667, "step": 66600 }, { "epoch": 0.28597065162326235, "grad_norm": 0.07208789139986038, "learning_rate": 7.171123548028251e-05, "loss": 0.1163141369819641, "step": 66610 }, { "epoch": 0.28601358371328234, "grad_norm": 0.042390916496515274, "learning_rate": 7.170692376016487e-05, "loss": 0.13545933961868287, "step": 66620 }, { "epoch": 0.2860565158033023, "grad_norm": 2.161851644515991, "learning_rate": 7.170261204004726e-05, "loss": 0.14295434951782227, "step": 66630 }, { "epoch": 0.28609944789332237, "grad_norm": 1.8208708763122559, "learning_rate": 7.169830031992964e-05, "loss": 0.25981476306915285, "step": 66640 }, { "epoch": 0.28614237998334235, "grad_norm": 0.023029552772641182, "learning_rate": 7.169398859981202e-05, "loss": 0.2782176971435547, "step": 66650 }, { "epoch": 0.28618531207336234, "grad_norm": 4.7402167320251465, "learning_rate": 7.16896768796944e-05, "loss": 0.3271470546722412, "step": 66660 }, { "epoch": 0.2862282441633824, "grad_norm": 0.027461495250463486, "learning_rate": 7.168536515957677e-05, "loss": 0.13882352113723756, "step": 66670 }, { "epoch": 0.28627117625340237, "grad_norm": 9.312287330627441, "learning_rate": 7.168105343945915e-05, "loss": 0.33524518013000487, "step": 66680 }, { "epoch": 0.28631410834342236, "grad_norm": 0.040560901165008545, "learning_rate": 7.167674171934153e-05, "loss": 0.030652105808258057, "step": 66690 }, { "epoch": 0.2863570404334424, "grad_norm": 0.009825235232710838, "learning_rate": 7.167242999922389e-05, "loss": 0.09193103313446045, "step": 66700 }, { "epoch": 0.2863999725234624, "grad_norm": 0.24824531376361847, "learning_rate": 7.166811827910627e-05, "loss": 0.16448047161102294, "step": 66710 }, { "epoch": 0.2864429046134824, "grad_norm": 1.160897970199585, "learning_rate": 7.166380655898865e-05, "loss": 0.29864816665649413, "step": 66720 }, { "epoch": 0.2864858367035024, "grad_norm": 0.026330584660172462, "learning_rate": 7.165949483887102e-05, "loss": 0.07965230941772461, "step": 66730 }, { "epoch": 0.2865287687935224, "grad_norm": 7.835456371307373, "learning_rate": 7.16551831187534e-05, "loss": 0.21117513179779052, "step": 66740 }, { "epoch": 0.2865717008835424, "grad_norm": 6.6683030128479, "learning_rate": 7.165087139863578e-05, "loss": 0.23535704612731934, "step": 66750 }, { "epoch": 0.28661463297356243, "grad_norm": 2.4860293865203857, "learning_rate": 7.164655967851816e-05, "loss": 0.12097716331481934, "step": 66760 }, { "epoch": 0.2866575650635824, "grad_norm": 0.5875166058540344, "learning_rate": 7.164224795840053e-05, "loss": 0.11552761793136597, "step": 66770 }, { "epoch": 0.2867004971536024, "grad_norm": 0.6846771240234375, "learning_rate": 7.16379362382829e-05, "loss": 0.37979416847229003, "step": 66780 }, { "epoch": 0.28674342924362245, "grad_norm": 0.0027324643451720476, "learning_rate": 7.163362451816527e-05, "loss": 0.22684979438781738, "step": 66790 }, { "epoch": 0.28678636133364244, "grad_norm": 6.466740608215332, "learning_rate": 7.162931279804765e-05, "loss": 0.17428145408630372, "step": 66800 }, { "epoch": 0.2868292934236624, "grad_norm": 0.005279346369206905, "learning_rate": 7.162500107793003e-05, "loss": 0.305089545249939, "step": 66810 }, { "epoch": 0.28687222551368247, "grad_norm": 0.0043772244825959206, "learning_rate": 7.16206893578124e-05, "loss": 0.07529987692832947, "step": 66820 }, { "epoch": 0.28691515760370245, "grad_norm": 0.015128378756344318, "learning_rate": 7.161637763769478e-05, "loss": 0.1512007474899292, "step": 66830 }, { "epoch": 0.2869580896937225, "grad_norm": 0.033646125346422195, "learning_rate": 7.161206591757716e-05, "loss": 0.2316761016845703, "step": 66840 }, { "epoch": 0.2870010217837425, "grad_norm": 0.038412321358919144, "learning_rate": 7.160775419745954e-05, "loss": 0.2079862356185913, "step": 66850 }, { "epoch": 0.28704395387376247, "grad_norm": 41.8834228515625, "learning_rate": 7.160344247734192e-05, "loss": 0.1129407525062561, "step": 66860 }, { "epoch": 0.2870868859637825, "grad_norm": 0.0051442538388073444, "learning_rate": 7.159913075722429e-05, "loss": 0.12028219699859619, "step": 66870 }, { "epoch": 0.2871298180538025, "grad_norm": 0.014405815862119198, "learning_rate": 7.159481903710667e-05, "loss": 0.1231608271598816, "step": 66880 }, { "epoch": 0.2871727501438225, "grad_norm": 0.27976086735725403, "learning_rate": 7.159050731698905e-05, "loss": 0.23082361221313477, "step": 66890 }, { "epoch": 0.28721568223384253, "grad_norm": 0.2852309048175812, "learning_rate": 7.158619559687142e-05, "loss": 0.44276866912841795, "step": 66900 }, { "epoch": 0.2872586143238625, "grad_norm": 0.0018568943487480283, "learning_rate": 7.15818838767538e-05, "loss": 0.09268001317977906, "step": 66910 }, { "epoch": 0.2873015464138825, "grad_norm": 0.0033980561420321465, "learning_rate": 7.157757215663618e-05, "loss": 0.06650593280792236, "step": 66920 }, { "epoch": 0.28734447850390255, "grad_norm": 0.44758087396621704, "learning_rate": 7.157326043651856e-05, "loss": 0.1356669306755066, "step": 66930 }, { "epoch": 0.28738741059392253, "grad_norm": 0.2766318619251251, "learning_rate": 7.156894871640093e-05, "loss": 0.22032556533813477, "step": 66940 }, { "epoch": 0.2874303426839425, "grad_norm": 0.01758783869445324, "learning_rate": 7.15646369962833e-05, "loss": 0.20557994842529298, "step": 66950 }, { "epoch": 0.28747327477396256, "grad_norm": 0.0034948191605508327, "learning_rate": 7.156032527616568e-05, "loss": 0.27143542766571044, "step": 66960 }, { "epoch": 0.28751620686398255, "grad_norm": 2.4819626808166504, "learning_rate": 7.155601355604805e-05, "loss": 0.4705145835876465, "step": 66970 }, { "epoch": 0.28755913895400254, "grad_norm": 5.097228050231934, "learning_rate": 7.155170183593043e-05, "loss": 0.39361963272094724, "step": 66980 }, { "epoch": 0.2876020710440226, "grad_norm": 0.016346966847777367, "learning_rate": 7.154739011581281e-05, "loss": 0.2513259410858154, "step": 66990 }, { "epoch": 0.28764500313404257, "grad_norm": 0.09284532070159912, "learning_rate": 7.154307839569518e-05, "loss": 0.2665305852890015, "step": 67000 }, { "epoch": 0.28764500313404257, "eval_loss": 0.43117061257362366, "eval_runtime": 27.4525, "eval_samples_per_second": 3.643, "eval_steps_per_second": 3.643, "step": 67000 }, { "epoch": 0.28768793522406255, "grad_norm": 0.4193928837776184, "learning_rate": 7.153876667557756e-05, "loss": 0.2339691400527954, "step": 67010 }, { "epoch": 0.2877308673140826, "grad_norm": 0.05756061151623726, "learning_rate": 7.153445495545994e-05, "loss": 0.16777576208114625, "step": 67020 }, { "epoch": 0.2877737994041026, "grad_norm": 0.1018446758389473, "learning_rate": 7.15301432353423e-05, "loss": 0.22809865474700927, "step": 67030 }, { "epoch": 0.2878167314941226, "grad_norm": 0.7277085185050964, "learning_rate": 7.152583151522468e-05, "loss": 0.22245891094207765, "step": 67040 }, { "epoch": 0.2878596635841426, "grad_norm": 0.005714256316423416, "learning_rate": 7.152151979510706e-05, "loss": 0.029804161190986632, "step": 67050 }, { "epoch": 0.2879025956741626, "grad_norm": 0.5130506753921509, "learning_rate": 7.151720807498943e-05, "loss": 0.18485331535339355, "step": 67060 }, { "epoch": 0.28794552776418264, "grad_norm": 0.007058766670525074, "learning_rate": 7.151289635487181e-05, "loss": 0.10065820217132568, "step": 67070 }, { "epoch": 0.2879884598542026, "grad_norm": 0.005129760131239891, "learning_rate": 7.150858463475419e-05, "loss": 0.3468196153640747, "step": 67080 }, { "epoch": 0.2880313919442226, "grad_norm": 0.015246695838868618, "learning_rate": 7.150427291463657e-05, "loss": 0.3642794847488403, "step": 67090 }, { "epoch": 0.28807432403424266, "grad_norm": 0.2805442810058594, "learning_rate": 7.149996119451894e-05, "loss": 0.17592545747756957, "step": 67100 }, { "epoch": 0.28811725612426264, "grad_norm": 3.7581307888031006, "learning_rate": 7.149564947440132e-05, "loss": 0.20889370441436766, "step": 67110 }, { "epoch": 0.28816018821428263, "grad_norm": 0.5101104974746704, "learning_rate": 7.14913377542837e-05, "loss": 0.25342817306518556, "step": 67120 }, { "epoch": 0.2882031203043027, "grad_norm": 0.0816846638917923, "learning_rate": 7.148702603416608e-05, "loss": 0.27064502239227295, "step": 67130 }, { "epoch": 0.28824605239432266, "grad_norm": 0.0570346862077713, "learning_rate": 7.148271431404845e-05, "loss": 0.18118938207626342, "step": 67140 }, { "epoch": 0.28828898448434265, "grad_norm": 0.31971192359924316, "learning_rate": 7.147840259393083e-05, "loss": 0.15357050895690919, "step": 67150 }, { "epoch": 0.2883319165743627, "grad_norm": 0.07988704741001129, "learning_rate": 7.147409087381321e-05, "loss": 0.24975745677947997, "step": 67160 }, { "epoch": 0.2883748486643827, "grad_norm": 0.0052708168514072895, "learning_rate": 7.146977915369559e-05, "loss": 0.092576003074646, "step": 67170 }, { "epoch": 0.28841778075440266, "grad_norm": 0.08117439597845078, "learning_rate": 7.146546743357796e-05, "loss": 0.12661879062652587, "step": 67180 }, { "epoch": 0.2884607128444227, "grad_norm": 5.074268341064453, "learning_rate": 7.146115571346033e-05, "loss": 0.1526340961456299, "step": 67190 }, { "epoch": 0.2885036449344427, "grad_norm": 0.05365569517016411, "learning_rate": 7.14568439933427e-05, "loss": 0.143519389629364, "step": 67200 }, { "epoch": 0.2885465770244627, "grad_norm": 3.570014238357544, "learning_rate": 7.145253227322508e-05, "loss": 0.17729175090789795, "step": 67210 }, { "epoch": 0.2885895091144827, "grad_norm": 2.256596088409424, "learning_rate": 7.144822055310746e-05, "loss": 0.31090943813323973, "step": 67220 }, { "epoch": 0.2886324412045027, "grad_norm": 0.00617602001875639, "learning_rate": 7.144390883298984e-05, "loss": 0.38187849521636963, "step": 67230 }, { "epoch": 0.2886753732945227, "grad_norm": 0.03439110144972801, "learning_rate": 7.143959711287221e-05, "loss": 0.40892391204833983, "step": 67240 }, { "epoch": 0.28871830538454274, "grad_norm": 1.2373721599578857, "learning_rate": 7.143528539275459e-05, "loss": 0.0434207022190094, "step": 67250 }, { "epoch": 0.2887612374745627, "grad_norm": 0.04851150885224342, "learning_rate": 7.143097367263697e-05, "loss": 0.10299129486083984, "step": 67260 }, { "epoch": 0.28880416956458277, "grad_norm": 0.47567617893218994, "learning_rate": 7.142666195251935e-05, "loss": 0.40652313232421877, "step": 67270 }, { "epoch": 0.28884710165460276, "grad_norm": 0.44068795442581177, "learning_rate": 7.142235023240171e-05, "loss": 0.01274416446685791, "step": 67280 }, { "epoch": 0.28889003374462274, "grad_norm": 1.2657181024551392, "learning_rate": 7.141803851228409e-05, "loss": 0.2137005090713501, "step": 67290 }, { "epoch": 0.2889329658346428, "grad_norm": 3.1350607872009277, "learning_rate": 7.141372679216646e-05, "loss": 0.25659914016723634, "step": 67300 }, { "epoch": 0.2889758979246628, "grad_norm": 2.780965566635132, "learning_rate": 7.140941507204884e-05, "loss": 0.3482322692871094, "step": 67310 }, { "epoch": 0.28901883001468276, "grad_norm": 1.2729345560073853, "learning_rate": 7.140510335193122e-05, "loss": 0.11825804710388184, "step": 67320 }, { "epoch": 0.2890617621047028, "grad_norm": 0.5161279439926147, "learning_rate": 7.14007916318136e-05, "loss": 0.3750854253768921, "step": 67330 }, { "epoch": 0.2891046941947228, "grad_norm": 1.1493275165557861, "learning_rate": 7.139647991169597e-05, "loss": 0.2764685392379761, "step": 67340 }, { "epoch": 0.2891476262847428, "grad_norm": 6.515742778778076, "learning_rate": 7.139216819157835e-05, "loss": 0.249403977394104, "step": 67350 }, { "epoch": 0.2891905583747628, "grad_norm": 11.825860977172852, "learning_rate": 7.138785647146073e-05, "loss": 0.28220572471618655, "step": 67360 }, { "epoch": 0.2892334904647828, "grad_norm": 3.0165984630584717, "learning_rate": 7.13835447513431e-05, "loss": 0.3849741697311401, "step": 67370 }, { "epoch": 0.2892764225548028, "grad_norm": 0.16258086264133453, "learning_rate": 7.137923303122548e-05, "loss": 0.1100645899772644, "step": 67380 }, { "epoch": 0.28931935464482283, "grad_norm": 0.044878143817186356, "learning_rate": 7.137492131110786e-05, "loss": 0.21178960800170898, "step": 67390 }, { "epoch": 0.2893622867348428, "grad_norm": 0.0038589336909353733, "learning_rate": 7.137060959099024e-05, "loss": 0.2345569133758545, "step": 67400 }, { "epoch": 0.2894052188248628, "grad_norm": 0.0768958106637001, "learning_rate": 7.136629787087261e-05, "loss": 0.21593918800354003, "step": 67410 }, { "epoch": 0.28944815091488285, "grad_norm": 7.253081798553467, "learning_rate": 7.136198615075499e-05, "loss": 0.4909097194671631, "step": 67420 }, { "epoch": 0.28949108300490284, "grad_norm": 0.04475796967744827, "learning_rate": 7.135767443063737e-05, "loss": 0.4545122623443604, "step": 67430 }, { "epoch": 0.2895340150949228, "grad_norm": 0.08378562331199646, "learning_rate": 7.135336271051973e-05, "loss": 0.1959518313407898, "step": 67440 }, { "epoch": 0.28957694718494287, "grad_norm": 0.05304625630378723, "learning_rate": 7.134905099040211e-05, "loss": 0.08540345430374145, "step": 67450 }, { "epoch": 0.28961987927496285, "grad_norm": 0.02249019965529442, "learning_rate": 7.134473927028449e-05, "loss": 0.2787889003753662, "step": 67460 }, { "epoch": 0.2896628113649829, "grad_norm": 0.8801037669181824, "learning_rate": 7.134042755016687e-05, "loss": 0.4670060157775879, "step": 67470 }, { "epoch": 0.2897057434550029, "grad_norm": 1.267013430595398, "learning_rate": 7.133611583004924e-05, "loss": 0.13036876916885376, "step": 67480 }, { "epoch": 0.28974867554502287, "grad_norm": 0.3037998378276825, "learning_rate": 7.133180410993162e-05, "loss": 0.10819134712219239, "step": 67490 }, { "epoch": 0.2897916076350429, "grad_norm": 0.7789495587348938, "learning_rate": 7.1327492389814e-05, "loss": 0.17645723819732667, "step": 67500 }, { "epoch": 0.2898345397250629, "grad_norm": 0.044124431908130646, "learning_rate": 7.132318066969637e-05, "loss": 0.0688193678855896, "step": 67510 }, { "epoch": 0.2898774718150829, "grad_norm": 0.00780621450394392, "learning_rate": 7.131886894957874e-05, "loss": 0.3849108457565308, "step": 67520 }, { "epoch": 0.28992040390510293, "grad_norm": 0.6088647842407227, "learning_rate": 7.131455722946112e-05, "loss": 0.1863906502723694, "step": 67530 }, { "epoch": 0.2899633359951229, "grad_norm": 0.7508590817451477, "learning_rate": 7.13102455093435e-05, "loss": 0.2708500146865845, "step": 67540 }, { "epoch": 0.2900062680851429, "grad_norm": 3.738001823425293, "learning_rate": 7.130593378922587e-05, "loss": 0.130331552028656, "step": 67550 }, { "epoch": 0.29004920017516295, "grad_norm": 4.640090465545654, "learning_rate": 7.130162206910825e-05, "loss": 0.2554504871368408, "step": 67560 }, { "epoch": 0.29009213226518293, "grad_norm": 0.12082592397928238, "learning_rate": 7.129731034899063e-05, "loss": 0.1677726149559021, "step": 67570 }, { "epoch": 0.2901350643552029, "grad_norm": 0.016347909346222878, "learning_rate": 7.1292998628873e-05, "loss": 0.17969557046890258, "step": 67580 }, { "epoch": 0.29017799644522296, "grad_norm": 0.012287224642932415, "learning_rate": 7.128868690875538e-05, "loss": 0.1445378541946411, "step": 67590 }, { "epoch": 0.29022092853524295, "grad_norm": 0.002170619321987033, "learning_rate": 7.128437518863776e-05, "loss": 0.1796630859375, "step": 67600 }, { "epoch": 0.29026386062526294, "grad_norm": 0.3804681897163391, "learning_rate": 7.128006346852013e-05, "loss": 0.3209116220474243, "step": 67610 }, { "epoch": 0.290306792715283, "grad_norm": 1.3779412508010864, "learning_rate": 7.127575174840251e-05, "loss": 0.33678340911865234, "step": 67620 }, { "epoch": 0.29034972480530297, "grad_norm": 3.7422566413879395, "learning_rate": 7.127144002828489e-05, "loss": 0.1122518539428711, "step": 67630 }, { "epoch": 0.29039265689532295, "grad_norm": 0.008284986019134521, "learning_rate": 7.126712830816727e-05, "loss": 0.3207037687301636, "step": 67640 }, { "epoch": 0.290435588985343, "grad_norm": 0.4197596609592438, "learning_rate": 7.126281658804964e-05, "loss": 0.2883680105209351, "step": 67650 }, { "epoch": 0.290478521075363, "grad_norm": 0.02145151048898697, "learning_rate": 7.125850486793202e-05, "loss": 0.16351989507675171, "step": 67660 }, { "epoch": 0.29052145316538297, "grad_norm": 0.7493966221809387, "learning_rate": 7.12541931478144e-05, "loss": 0.30967581272125244, "step": 67670 }, { "epoch": 0.290564385255403, "grad_norm": 0.016078148037195206, "learning_rate": 7.124988142769678e-05, "loss": 0.13175559043884277, "step": 67680 }, { "epoch": 0.290607317345423, "grad_norm": 0.4908279776573181, "learning_rate": 7.124556970757914e-05, "loss": 0.02809107005596161, "step": 67690 }, { "epoch": 0.29065024943544304, "grad_norm": 0.9280158281326294, "learning_rate": 7.124125798746152e-05, "loss": 0.33881824016571044, "step": 67700 }, { "epoch": 0.29069318152546303, "grad_norm": 1.8869291543960571, "learning_rate": 7.12369462673439e-05, "loss": 0.2556769847869873, "step": 67710 }, { "epoch": 0.290736113615483, "grad_norm": 0.4702579975128174, "learning_rate": 7.123263454722627e-05, "loss": 0.13894178867340087, "step": 67720 }, { "epoch": 0.29077904570550306, "grad_norm": 0.002194383880123496, "learning_rate": 7.122832282710865e-05, "loss": 0.35664730072021483, "step": 67730 }, { "epoch": 0.29082197779552305, "grad_norm": 0.21256765723228455, "learning_rate": 7.122401110699103e-05, "loss": 0.10273618698120117, "step": 67740 }, { "epoch": 0.29086490988554303, "grad_norm": 0.020599033683538437, "learning_rate": 7.12196993868734e-05, "loss": 0.12157354354858399, "step": 67750 }, { "epoch": 0.2909078419755631, "grad_norm": 2.6698665618896484, "learning_rate": 7.121538766675578e-05, "loss": 0.19090875387191772, "step": 67760 }, { "epoch": 0.29095077406558306, "grad_norm": 0.03797188401222229, "learning_rate": 7.121107594663815e-05, "loss": 0.1711806058883667, "step": 67770 }, { "epoch": 0.29099370615560305, "grad_norm": 2.388545513153076, "learning_rate": 7.120676422652052e-05, "loss": 0.26054646968841555, "step": 67780 }, { "epoch": 0.2910366382456231, "grad_norm": 0.009794231504201889, "learning_rate": 7.12024525064029e-05, "loss": 0.4301589488983154, "step": 67790 }, { "epoch": 0.2910795703356431, "grad_norm": 2.32464337348938, "learning_rate": 7.119814078628528e-05, "loss": 0.22652745246887207, "step": 67800 }, { "epoch": 0.29112250242566307, "grad_norm": 3.0685410499572754, "learning_rate": 7.119382906616765e-05, "loss": 0.08744192123413086, "step": 67810 }, { "epoch": 0.2911654345156831, "grad_norm": 0.011893724091351032, "learning_rate": 7.118951734605005e-05, "loss": 0.13607373237609863, "step": 67820 }, { "epoch": 0.2912083666057031, "grad_norm": 1.304750680923462, "learning_rate": 7.118520562593242e-05, "loss": 0.2696163892745972, "step": 67830 }, { "epoch": 0.2912512986957231, "grad_norm": 0.005757071543484926, "learning_rate": 7.11808939058148e-05, "loss": 0.10221372842788697, "step": 67840 }, { "epoch": 0.2912942307857431, "grad_norm": 1.33371102809906, "learning_rate": 7.117658218569716e-05, "loss": 0.36364994049072263, "step": 67850 }, { "epoch": 0.2913371628757631, "grad_norm": 3.079373598098755, "learning_rate": 7.117227046557954e-05, "loss": 0.36846587657928465, "step": 67860 }, { "epoch": 0.2913800949657831, "grad_norm": 5.180749416351318, "learning_rate": 7.116795874546192e-05, "loss": 0.4598280906677246, "step": 67870 }, { "epoch": 0.29142302705580314, "grad_norm": 3.1711230278015137, "learning_rate": 7.11636470253443e-05, "loss": 0.3365186214447021, "step": 67880 }, { "epoch": 0.29146595914582313, "grad_norm": 0.05353054031729698, "learning_rate": 7.115933530522667e-05, "loss": 0.20130722522735595, "step": 67890 }, { "epoch": 0.29150889123584317, "grad_norm": 0.003241224680095911, "learning_rate": 7.115502358510905e-05, "loss": 0.28517961502075195, "step": 67900 }, { "epoch": 0.29155182332586316, "grad_norm": 0.6127050518989563, "learning_rate": 7.115071186499143e-05, "loss": 0.47597332000732423, "step": 67910 }, { "epoch": 0.29159475541588314, "grad_norm": 0.01519143395125866, "learning_rate": 7.11464001448738e-05, "loss": 0.11472551822662354, "step": 67920 }, { "epoch": 0.2916376875059032, "grad_norm": 1.374168038368225, "learning_rate": 7.114208842475617e-05, "loss": 0.22478947639465333, "step": 67930 }, { "epoch": 0.2916806195959232, "grad_norm": 0.489167183637619, "learning_rate": 7.113777670463855e-05, "loss": 0.3274041175842285, "step": 67940 }, { "epoch": 0.29172355168594316, "grad_norm": 0.012719135731458664, "learning_rate": 7.113346498452092e-05, "loss": 0.1357038736343384, "step": 67950 }, { "epoch": 0.2917664837759632, "grad_norm": 0.001968483906239271, "learning_rate": 7.11291532644033e-05, "loss": 0.13717812299728394, "step": 67960 }, { "epoch": 0.2918094158659832, "grad_norm": 0.003384147770702839, "learning_rate": 7.112484154428568e-05, "loss": 0.2852107763290405, "step": 67970 }, { "epoch": 0.2918523479560032, "grad_norm": 0.6869329810142517, "learning_rate": 7.112052982416806e-05, "loss": 0.26191256046295164, "step": 67980 }, { "epoch": 0.2918952800460232, "grad_norm": 0.020543674007058144, "learning_rate": 7.111621810405043e-05, "loss": 0.0620194137096405, "step": 67990 }, { "epoch": 0.2919382121360432, "grad_norm": 4.142035007476807, "learning_rate": 7.111190638393281e-05, "loss": 0.20071187019348144, "step": 68000 }, { "epoch": 0.2919382121360432, "eval_loss": 0.43928349018096924, "eval_runtime": 27.3939, "eval_samples_per_second": 3.65, "eval_steps_per_second": 3.65, "step": 68000 }, { "epoch": 0.2919811442260632, "grad_norm": 2.2618143558502197, "learning_rate": 7.110759466381519e-05, "loss": 0.45159592628479006, "step": 68010 }, { "epoch": 0.29202407631608324, "grad_norm": 0.001515640295110643, "learning_rate": 7.110328294369755e-05, "loss": 0.14594651460647584, "step": 68020 }, { "epoch": 0.2920670084061032, "grad_norm": 0.8493977189064026, "learning_rate": 7.109897122357993e-05, "loss": 0.2396256685256958, "step": 68030 }, { "epoch": 0.2921099404961232, "grad_norm": 4.17466926574707, "learning_rate": 7.109465950346232e-05, "loss": 0.18733444213867187, "step": 68040 }, { "epoch": 0.29215287258614325, "grad_norm": 3.635678291320801, "learning_rate": 7.10903477833447e-05, "loss": 0.20183224678039552, "step": 68050 }, { "epoch": 0.29219580467616324, "grad_norm": 1.21640145778656, "learning_rate": 7.108603606322707e-05, "loss": 0.22180395126342772, "step": 68060 }, { "epoch": 0.2922387367661832, "grad_norm": 2.761414051055908, "learning_rate": 7.108172434310945e-05, "loss": 0.44273886680603025, "step": 68070 }, { "epoch": 0.29228166885620327, "grad_norm": 0.0016735456883907318, "learning_rate": 7.107741262299183e-05, "loss": 0.15880162715911866, "step": 68080 }, { "epoch": 0.29232460094622326, "grad_norm": 1.637378215789795, "learning_rate": 7.10731009028742e-05, "loss": 0.31997501850128174, "step": 68090 }, { "epoch": 0.29236753303624324, "grad_norm": 0.04662550240755081, "learning_rate": 7.106878918275657e-05, "loss": 0.3928894758224487, "step": 68100 }, { "epoch": 0.2924104651262633, "grad_norm": 4.416837692260742, "learning_rate": 7.106447746263895e-05, "loss": 0.49892539978027345, "step": 68110 }, { "epoch": 0.2924533972162833, "grad_norm": 0.03326794505119324, "learning_rate": 7.106016574252132e-05, "loss": 0.15234217643737794, "step": 68120 }, { "epoch": 0.2924963293063033, "grad_norm": 1.8150595426559448, "learning_rate": 7.10558540224037e-05, "loss": 0.2633387327194214, "step": 68130 }, { "epoch": 0.2925392613963233, "grad_norm": 0.07998789101839066, "learning_rate": 7.105154230228608e-05, "loss": 0.2532381772994995, "step": 68140 }, { "epoch": 0.2925821934863433, "grad_norm": 0.01822078414261341, "learning_rate": 7.104723058216846e-05, "loss": 0.11664667129516601, "step": 68150 }, { "epoch": 0.29262512557636333, "grad_norm": 0.9801276922225952, "learning_rate": 7.104291886205083e-05, "loss": 0.1693058967590332, "step": 68160 }, { "epoch": 0.2926680576663833, "grad_norm": 0.011544733308255672, "learning_rate": 7.103860714193321e-05, "loss": 0.09046794772148133, "step": 68170 }, { "epoch": 0.2927109897564033, "grad_norm": 0.043812867254018784, "learning_rate": 7.103429542181558e-05, "loss": 0.12290755510330201, "step": 68180 }, { "epoch": 0.29275392184642335, "grad_norm": 0.06413479149341583, "learning_rate": 7.102998370169795e-05, "loss": 0.293648362159729, "step": 68190 }, { "epoch": 0.29279685393644334, "grad_norm": 0.049631934612989426, "learning_rate": 7.102567198158033e-05, "loss": 0.3742121458053589, "step": 68200 }, { "epoch": 0.2928397860264633, "grad_norm": 0.16051065921783447, "learning_rate": 7.102136026146271e-05, "loss": 0.31449887752532957, "step": 68210 }, { "epoch": 0.29288271811648336, "grad_norm": 6.327853679656982, "learning_rate": 7.101704854134508e-05, "loss": 0.3112722158432007, "step": 68220 }, { "epoch": 0.29292565020650335, "grad_norm": 0.05760200321674347, "learning_rate": 7.101273682122746e-05, "loss": 0.09027496576309205, "step": 68230 }, { "epoch": 0.29296858229652334, "grad_norm": 0.029430439695715904, "learning_rate": 7.100842510110984e-05, "loss": 0.2163465738296509, "step": 68240 }, { "epoch": 0.2930115143865434, "grad_norm": 1.0040699243545532, "learning_rate": 7.100411338099222e-05, "loss": 0.4775634765625, "step": 68250 }, { "epoch": 0.29305444647656337, "grad_norm": 1.0060920715332031, "learning_rate": 7.09998016608746e-05, "loss": 0.3704617977142334, "step": 68260 }, { "epoch": 0.29309737856658336, "grad_norm": 1.6154614686965942, "learning_rate": 7.099548994075697e-05, "loss": 0.12538934946060182, "step": 68270 }, { "epoch": 0.2931403106566034, "grad_norm": 0.7863723039627075, "learning_rate": 7.099117822063935e-05, "loss": 0.14734208583831787, "step": 68280 }, { "epoch": 0.2931832427466234, "grad_norm": 0.03183925896883011, "learning_rate": 7.098686650052173e-05, "loss": 0.3232817888259888, "step": 68290 }, { "epoch": 0.29322617483664337, "grad_norm": 0.5198999643325806, "learning_rate": 7.09825547804041e-05, "loss": 0.11065690517425537, "step": 68300 }, { "epoch": 0.2932691069266634, "grad_norm": 0.624901294708252, "learning_rate": 7.097824306028648e-05, "loss": 0.4588596343994141, "step": 68310 }, { "epoch": 0.2933120390166834, "grad_norm": 0.07914484292268753, "learning_rate": 7.097393134016886e-05, "loss": 0.055303043127059935, "step": 68320 }, { "epoch": 0.29335497110670344, "grad_norm": 0.6766675114631653, "learning_rate": 7.096961962005124e-05, "loss": 0.15219202041625976, "step": 68330 }, { "epoch": 0.29339790319672343, "grad_norm": 0.02503044158220291, "learning_rate": 7.09653078999336e-05, "loss": 0.19463895559310912, "step": 68340 }, { "epoch": 0.2934408352867434, "grad_norm": 0.24121232330799103, "learning_rate": 7.096099617981598e-05, "loss": 0.16598477363586425, "step": 68350 }, { "epoch": 0.29348376737676346, "grad_norm": 0.007208712864667177, "learning_rate": 7.095668445969835e-05, "loss": 0.16176241636276245, "step": 68360 }, { "epoch": 0.29352669946678345, "grad_norm": 0.0008645054767839611, "learning_rate": 7.095237273958073e-05, "loss": 0.09948625564575195, "step": 68370 }, { "epoch": 0.29356963155680343, "grad_norm": 2.0432870388031006, "learning_rate": 7.094806101946311e-05, "loss": 0.462565279006958, "step": 68380 }, { "epoch": 0.2936125636468235, "grad_norm": 0.0818505734205246, "learning_rate": 7.094374929934549e-05, "loss": 0.344118595123291, "step": 68390 }, { "epoch": 0.29365549573684346, "grad_norm": 0.09661252796649933, "learning_rate": 7.093943757922786e-05, "loss": 0.04238423705101013, "step": 68400 }, { "epoch": 0.29369842782686345, "grad_norm": 0.04637506231665611, "learning_rate": 7.093512585911024e-05, "loss": 0.11444630622863769, "step": 68410 }, { "epoch": 0.2937413599168835, "grad_norm": 0.03868831321597099, "learning_rate": 7.093081413899262e-05, "loss": 0.19466127157211305, "step": 68420 }, { "epoch": 0.2937842920069035, "grad_norm": 1.6524996757507324, "learning_rate": 7.092650241887498e-05, "loss": 0.39613971710205076, "step": 68430 }, { "epoch": 0.29382722409692347, "grad_norm": 0.06135096400976181, "learning_rate": 7.092219069875736e-05, "loss": 0.2066957712173462, "step": 68440 }, { "epoch": 0.2938701561869435, "grad_norm": 0.0012592696584761143, "learning_rate": 7.091787897863974e-05, "loss": 0.06958057880401611, "step": 68450 }, { "epoch": 0.2939130882769635, "grad_norm": 1.4627882242202759, "learning_rate": 7.091356725852211e-05, "loss": 0.26883270740509035, "step": 68460 }, { "epoch": 0.2939560203669835, "grad_norm": 0.07310977578163147, "learning_rate": 7.090925553840449e-05, "loss": 0.15684688091278076, "step": 68470 }, { "epoch": 0.2939989524570035, "grad_norm": 3.431220531463623, "learning_rate": 7.090494381828687e-05, "loss": 0.34428939819335935, "step": 68480 }, { "epoch": 0.2940418845470235, "grad_norm": 5.300132751464844, "learning_rate": 7.090063209816925e-05, "loss": 0.22276744842529297, "step": 68490 }, { "epoch": 0.2940848166370435, "grad_norm": 0.12124665081501007, "learning_rate": 7.089632037805162e-05, "loss": 0.1893091917037964, "step": 68500 }, { "epoch": 0.29412774872706354, "grad_norm": 0.025070803239941597, "learning_rate": 7.0892008657934e-05, "loss": 0.24306130409240723, "step": 68510 }, { "epoch": 0.29417068081708353, "grad_norm": 5.163175106048584, "learning_rate": 7.088769693781638e-05, "loss": 0.26326632499694824, "step": 68520 }, { "epoch": 0.2942136129071035, "grad_norm": 0.02456679567694664, "learning_rate": 7.088338521769876e-05, "loss": 0.22474491596221924, "step": 68530 }, { "epoch": 0.29425654499712356, "grad_norm": 1.350258708000183, "learning_rate": 7.087907349758113e-05, "loss": 0.18058979511260986, "step": 68540 }, { "epoch": 0.29429947708714355, "grad_norm": 2.1283679008483887, "learning_rate": 7.087476177746351e-05, "loss": 0.4500577449798584, "step": 68550 }, { "epoch": 0.2943424091771636, "grad_norm": 2.402839183807373, "learning_rate": 7.087045005734589e-05, "loss": 0.25330147743225095, "step": 68560 }, { "epoch": 0.2943853412671836, "grad_norm": 0.008127194829285145, "learning_rate": 7.086613833722826e-05, "loss": 0.1757526159286499, "step": 68570 }, { "epoch": 0.29442827335720356, "grad_norm": 1.464109182357788, "learning_rate": 7.086182661711064e-05, "loss": 0.18842368125915526, "step": 68580 }, { "epoch": 0.2944712054472236, "grad_norm": 0.08519778400659561, "learning_rate": 7.0857514896993e-05, "loss": 0.309142017364502, "step": 68590 }, { "epoch": 0.2945141375372436, "grad_norm": 0.08478078246116638, "learning_rate": 7.085320317687538e-05, "loss": 0.18999476432800294, "step": 68600 }, { "epoch": 0.2945570696272636, "grad_norm": 0.05416850745677948, "learning_rate": 7.084889145675776e-05, "loss": 0.2712838649749756, "step": 68610 }, { "epoch": 0.2946000017172836, "grad_norm": 0.4658629596233368, "learning_rate": 7.084457973664014e-05, "loss": 0.11947870254516602, "step": 68620 }, { "epoch": 0.2946429338073036, "grad_norm": 0.005190864205360413, "learning_rate": 7.084026801652252e-05, "loss": 0.15439292192459106, "step": 68630 }, { "epoch": 0.2946858658973236, "grad_norm": 1.495048999786377, "learning_rate": 7.083595629640489e-05, "loss": 0.1966106414794922, "step": 68640 }, { "epoch": 0.29472879798734364, "grad_norm": 0.19685158133506775, "learning_rate": 7.083164457628727e-05, "loss": 0.09233436584472657, "step": 68650 }, { "epoch": 0.2947717300773636, "grad_norm": 0.1266477257013321, "learning_rate": 7.082733285616965e-05, "loss": 0.17769633531570433, "step": 68660 }, { "epoch": 0.2948146621673836, "grad_norm": 0.002195771085098386, "learning_rate": 7.082302113605201e-05, "loss": 0.13989298343658446, "step": 68670 }, { "epoch": 0.29485759425740365, "grad_norm": 0.7707890868186951, "learning_rate": 7.081870941593439e-05, "loss": 0.05584725141525269, "step": 68680 }, { "epoch": 0.29490052634742364, "grad_norm": 0.5744310617446899, "learning_rate": 7.081439769581677e-05, "loss": 0.3359058141708374, "step": 68690 }, { "epoch": 0.29494345843744363, "grad_norm": 1.2864564657211304, "learning_rate": 7.081008597569914e-05, "loss": 0.19174413681030272, "step": 68700 }, { "epoch": 0.29498639052746367, "grad_norm": 0.052966032177209854, "learning_rate": 7.080577425558152e-05, "loss": 0.12695059776306153, "step": 68710 }, { "epoch": 0.29502932261748366, "grad_norm": 1.6851404905319214, "learning_rate": 7.08014625354639e-05, "loss": 0.29085845947265626, "step": 68720 }, { "epoch": 0.29507225470750365, "grad_norm": 3.938408851623535, "learning_rate": 7.079715081534628e-05, "loss": 0.31063499450683596, "step": 68730 }, { "epoch": 0.2951151867975237, "grad_norm": 1.0033713579177856, "learning_rate": 7.079283909522865e-05, "loss": 0.3633733749389648, "step": 68740 }, { "epoch": 0.2951581188875437, "grad_norm": 0.31211256980895996, "learning_rate": 7.078852737511103e-05, "loss": 0.1458314061164856, "step": 68750 }, { "epoch": 0.2952010509775637, "grad_norm": 0.009130554273724556, "learning_rate": 7.078421565499341e-05, "loss": 0.36089942455291746, "step": 68760 }, { "epoch": 0.2952439830675837, "grad_norm": 1.35874605178833, "learning_rate": 7.077990393487578e-05, "loss": 0.10489203929901122, "step": 68770 }, { "epoch": 0.2952869151576037, "grad_norm": 7.000607490539551, "learning_rate": 7.077559221475816e-05, "loss": 0.3951005458831787, "step": 68780 }, { "epoch": 0.29532984724762373, "grad_norm": 0.020006684586405754, "learning_rate": 7.077128049464054e-05, "loss": 0.3116251230239868, "step": 68790 }, { "epoch": 0.2953727793376437, "grad_norm": 0.32888463139533997, "learning_rate": 7.076696877452292e-05, "loss": 0.1497477889060974, "step": 68800 }, { "epoch": 0.2954157114276637, "grad_norm": 5.472551345825195, "learning_rate": 7.07626570544053e-05, "loss": 0.2952433109283447, "step": 68810 }, { "epoch": 0.29545864351768375, "grad_norm": 0.719142496585846, "learning_rate": 7.075834533428767e-05, "loss": 0.28906295299530027, "step": 68820 }, { "epoch": 0.29550157560770374, "grad_norm": 0.2726041376590729, "learning_rate": 7.075403361417005e-05, "loss": 0.19550033807754516, "step": 68830 }, { "epoch": 0.2955445076977237, "grad_norm": 1.4349033832550049, "learning_rate": 7.074972189405241e-05, "loss": 0.15906682014465331, "step": 68840 }, { "epoch": 0.29558743978774377, "grad_norm": 0.06669703871011734, "learning_rate": 7.074541017393479e-05, "loss": 0.21837499141693115, "step": 68850 }, { "epoch": 0.29563037187776375, "grad_norm": 2.129164934158325, "learning_rate": 7.074109845381717e-05, "loss": 0.04467359185218811, "step": 68860 }, { "epoch": 0.29567330396778374, "grad_norm": 1.0307523012161255, "learning_rate": 7.073678673369954e-05, "loss": 0.19184517860412598, "step": 68870 }, { "epoch": 0.2957162360578038, "grad_norm": 2.2985711097717285, "learning_rate": 7.073247501358192e-05, "loss": 0.09638724327087403, "step": 68880 }, { "epoch": 0.29575916814782377, "grad_norm": 1.2666059732437134, "learning_rate": 7.07281632934643e-05, "loss": 0.16064202785491943, "step": 68890 }, { "epoch": 0.29580210023784376, "grad_norm": 2.663952350616455, "learning_rate": 7.072385157334668e-05, "loss": 0.21061155796051026, "step": 68900 }, { "epoch": 0.2958450323278638, "grad_norm": 5.3934645652771, "learning_rate": 7.071953985322905e-05, "loss": 0.20032162666320802, "step": 68910 }, { "epoch": 0.2958879644178838, "grad_norm": 1.9548537731170654, "learning_rate": 7.071522813311142e-05, "loss": 0.3333185911178589, "step": 68920 }, { "epoch": 0.2959308965079038, "grad_norm": 5.220151901245117, "learning_rate": 7.07109164129938e-05, "loss": 0.32145798206329346, "step": 68930 }, { "epoch": 0.2959738285979238, "grad_norm": 1.7756158113479614, "learning_rate": 7.070660469287617e-05, "loss": 0.3678144931793213, "step": 68940 }, { "epoch": 0.2960167606879438, "grad_norm": 0.0175294429063797, "learning_rate": 7.070229297275855e-05, "loss": 0.1472090721130371, "step": 68950 }, { "epoch": 0.2960596927779638, "grad_norm": 0.5595688819885254, "learning_rate": 7.069798125264093e-05, "loss": 0.4218907833099365, "step": 68960 }, { "epoch": 0.29610262486798383, "grad_norm": 0.03466328606009483, "learning_rate": 7.06936695325233e-05, "loss": 0.28534204959869386, "step": 68970 }, { "epoch": 0.2961455569580038, "grad_norm": 0.37167027592658997, "learning_rate": 7.068935781240568e-05, "loss": 0.20634891986846923, "step": 68980 }, { "epoch": 0.29618848904802386, "grad_norm": 2.8753232955932617, "learning_rate": 7.068504609228806e-05, "loss": 0.13908033370971679, "step": 68990 }, { "epoch": 0.29623142113804385, "grad_norm": 0.08938822895288467, "learning_rate": 7.068073437217044e-05, "loss": 0.11308764219284058, "step": 69000 }, { "epoch": 0.29623142113804385, "eval_loss": 0.43790289759635925, "eval_runtime": 27.3935, "eval_samples_per_second": 3.65, "eval_steps_per_second": 3.65, "step": 69000 }, { "epoch": 0.29627435322806384, "grad_norm": 1.1804087162017822, "learning_rate": 7.067642265205281e-05, "loss": 0.22331843376159669, "step": 69010 }, { "epoch": 0.2963172853180839, "grad_norm": 3.558773994445801, "learning_rate": 7.067211093193519e-05, "loss": 0.26021552085876465, "step": 69020 }, { "epoch": 0.29636021740810387, "grad_norm": 0.1038755252957344, "learning_rate": 7.066779921181757e-05, "loss": 0.17431793212890626, "step": 69030 }, { "epoch": 0.29640314949812385, "grad_norm": 0.0383944995701313, "learning_rate": 7.066348749169995e-05, "loss": 0.05176795721054077, "step": 69040 }, { "epoch": 0.2964460815881439, "grad_norm": 0.09961086511611938, "learning_rate": 7.065917577158232e-05, "loss": 0.190877366065979, "step": 69050 }, { "epoch": 0.2964890136781639, "grad_norm": 0.02250022441148758, "learning_rate": 7.06548640514647e-05, "loss": 0.2138050079345703, "step": 69060 }, { "epoch": 0.29653194576818387, "grad_norm": 0.122157022356987, "learning_rate": 7.065055233134708e-05, "loss": 0.30884108543395994, "step": 69070 }, { "epoch": 0.2965748778582039, "grad_norm": 6.911462783813477, "learning_rate": 7.064624061122945e-05, "loss": 0.09625717401504516, "step": 69080 }, { "epoch": 0.2966178099482239, "grad_norm": 0.8818526268005371, "learning_rate": 7.064192889111182e-05, "loss": 0.09233015179634094, "step": 69090 }, { "epoch": 0.2966607420382439, "grad_norm": 1.978317141532898, "learning_rate": 7.06376171709942e-05, "loss": 0.12204514741897583, "step": 69100 }, { "epoch": 0.29670367412826393, "grad_norm": 0.002704236889258027, "learning_rate": 7.063330545087657e-05, "loss": 0.3270133018493652, "step": 69110 }, { "epoch": 0.2967466062182839, "grad_norm": 3.32967209815979, "learning_rate": 7.062899373075895e-05, "loss": 0.32077441215515134, "step": 69120 }, { "epoch": 0.2967895383083039, "grad_norm": 2.797989845275879, "learning_rate": 7.062468201064133e-05, "loss": 0.14576636552810668, "step": 69130 }, { "epoch": 0.29683247039832394, "grad_norm": 0.3258748948574066, "learning_rate": 7.06203702905237e-05, "loss": 0.3708260774612427, "step": 69140 }, { "epoch": 0.29687540248834393, "grad_norm": 0.020132817327976227, "learning_rate": 7.061605857040608e-05, "loss": 0.17735791206359863, "step": 69150 }, { "epoch": 0.2969183345783639, "grad_norm": 0.07214616239070892, "learning_rate": 7.061174685028846e-05, "loss": 0.31556928157806396, "step": 69160 }, { "epoch": 0.29696126666838396, "grad_norm": 0.4991401731967926, "learning_rate": 7.060743513017082e-05, "loss": 0.12665189504623414, "step": 69170 }, { "epoch": 0.29700419875840395, "grad_norm": 0.6309173703193665, "learning_rate": 7.06031234100532e-05, "loss": 0.27704677581787107, "step": 69180 }, { "epoch": 0.297047130848424, "grad_norm": 2.7611091136932373, "learning_rate": 7.059881168993558e-05, "loss": 0.17989909648895264, "step": 69190 }, { "epoch": 0.297090062938444, "grad_norm": 0.04773523658514023, "learning_rate": 7.059449996981796e-05, "loss": 0.13966834545135498, "step": 69200 }, { "epoch": 0.29713299502846396, "grad_norm": 8.029800415039062, "learning_rate": 7.059018824970033e-05, "loss": 0.2252267599105835, "step": 69210 }, { "epoch": 0.297175927118484, "grad_norm": 38.85501480102539, "learning_rate": 7.058587652958271e-05, "loss": 0.1387203812599182, "step": 69220 }, { "epoch": 0.297218859208504, "grad_norm": 3.258969783782959, "learning_rate": 7.05815648094651e-05, "loss": 0.1611212134361267, "step": 69230 }, { "epoch": 0.297261791298524, "grad_norm": 1.5080939531326294, "learning_rate": 7.057725308934748e-05, "loss": 0.25683202743530276, "step": 69240 }, { "epoch": 0.297304723388544, "grad_norm": 0.06847647577524185, "learning_rate": 7.057294136922984e-05, "loss": 0.14771102666854857, "step": 69250 }, { "epoch": 0.297347655478564, "grad_norm": 0.033461250364780426, "learning_rate": 7.056862964911222e-05, "loss": 0.24316053390502929, "step": 69260 }, { "epoch": 0.297390587568584, "grad_norm": 1.9887748956680298, "learning_rate": 7.05643179289946e-05, "loss": 0.4812785148620605, "step": 69270 }, { "epoch": 0.29743351965860404, "grad_norm": 1.3982434272766113, "learning_rate": 7.056000620887697e-05, "loss": 0.2308140754699707, "step": 69280 }, { "epoch": 0.297476451748624, "grad_norm": 0.10920906066894531, "learning_rate": 7.055569448875935e-05, "loss": 0.10632593631744384, "step": 69290 }, { "epoch": 0.297519383838644, "grad_norm": 0.03187263384461403, "learning_rate": 7.055138276864173e-05, "loss": 0.24314732551574708, "step": 69300 }, { "epoch": 0.29756231592866406, "grad_norm": 0.7231481671333313, "learning_rate": 7.05470710485241e-05, "loss": 0.1606484532356262, "step": 69310 }, { "epoch": 0.29760524801868404, "grad_norm": 0.023674849420785904, "learning_rate": 7.054275932840648e-05, "loss": 0.28617658615112307, "step": 69320 }, { "epoch": 0.29764818010870403, "grad_norm": 0.12634412944316864, "learning_rate": 7.053844760828885e-05, "loss": 0.168298602104187, "step": 69330 }, { "epoch": 0.2976911121987241, "grad_norm": 0.1226951852440834, "learning_rate": 7.053413588817123e-05, "loss": 0.30879578590393064, "step": 69340 }, { "epoch": 0.29773404428874406, "grad_norm": 0.12052540481090546, "learning_rate": 7.05298241680536e-05, "loss": 0.2331835985183716, "step": 69350 }, { "epoch": 0.29777697637876405, "grad_norm": 3.8223581314086914, "learning_rate": 7.052551244793598e-05, "loss": 0.23448073863983154, "step": 69360 }, { "epoch": 0.2978199084687841, "grad_norm": 0.14944325387477875, "learning_rate": 7.052120072781836e-05, "loss": 0.20286400318145753, "step": 69370 }, { "epoch": 0.2978628405588041, "grad_norm": 0.1501256376504898, "learning_rate": 7.051688900770073e-05, "loss": 0.30396556854248047, "step": 69380 }, { "epoch": 0.29790577264882406, "grad_norm": 3.373429775238037, "learning_rate": 7.051257728758311e-05, "loss": 0.27126803398132326, "step": 69390 }, { "epoch": 0.2979487047388441, "grad_norm": 14.590777397155762, "learning_rate": 7.050826556746549e-05, "loss": 0.29711642265319826, "step": 69400 }, { "epoch": 0.2979916368288641, "grad_norm": 2.8559489250183105, "learning_rate": 7.050395384734785e-05, "loss": 0.21114108562469483, "step": 69410 }, { "epoch": 0.29803456891888414, "grad_norm": 0.48258069157600403, "learning_rate": 7.049964212723023e-05, "loss": 0.13180533647537232, "step": 69420 }, { "epoch": 0.2980775010089041, "grad_norm": 0.030082767829298973, "learning_rate": 7.049533040711261e-05, "loss": 0.10944579839706421, "step": 69430 }, { "epoch": 0.2981204330989241, "grad_norm": 0.02870088629424572, "learning_rate": 7.049101868699499e-05, "loss": 0.3478094577789307, "step": 69440 }, { "epoch": 0.29816336518894415, "grad_norm": 2.597738027572632, "learning_rate": 7.048670696687738e-05, "loss": 0.23423190116882325, "step": 69450 }, { "epoch": 0.29820629727896414, "grad_norm": 2.4963412284851074, "learning_rate": 7.048239524675975e-05, "loss": 0.17621252536773682, "step": 69460 }, { "epoch": 0.2982492293689841, "grad_norm": 0.020082371309399605, "learning_rate": 7.047808352664213e-05, "loss": 0.253519868850708, "step": 69470 }, { "epoch": 0.29829216145900417, "grad_norm": 0.0038464032113552094, "learning_rate": 7.047377180652451e-05, "loss": 0.2777001619338989, "step": 69480 }, { "epoch": 0.29833509354902416, "grad_norm": 0.08930832892656326, "learning_rate": 7.046946008640689e-05, "loss": 0.12960487604141235, "step": 69490 }, { "epoch": 0.29837802563904414, "grad_norm": 3.2200212478637695, "learning_rate": 7.046514836628925e-05, "loss": 0.2296536684036255, "step": 69500 }, { "epoch": 0.2984209577290642, "grad_norm": 1.753504991531372, "learning_rate": 7.046083664617163e-05, "loss": 0.13932392597198487, "step": 69510 }, { "epoch": 0.29846388981908417, "grad_norm": 2.262216091156006, "learning_rate": 7.0456524926054e-05, "loss": 0.2738852262496948, "step": 69520 }, { "epoch": 0.29850682190910416, "grad_norm": 0.021889301016926765, "learning_rate": 7.045221320593638e-05, "loss": 0.23410627841949463, "step": 69530 }, { "epoch": 0.2985497539991242, "grad_norm": 0.9385930299758911, "learning_rate": 7.044790148581876e-05, "loss": 0.2929945468902588, "step": 69540 }, { "epoch": 0.2985926860891442, "grad_norm": 0.025515921413898468, "learning_rate": 7.044358976570114e-05, "loss": 0.1091389536857605, "step": 69550 }, { "epoch": 0.2986356181791642, "grad_norm": 1.3053462505340576, "learning_rate": 7.043927804558351e-05, "loss": 0.37116103172302245, "step": 69560 }, { "epoch": 0.2986785502691842, "grad_norm": 1.738684058189392, "learning_rate": 7.043496632546589e-05, "loss": 0.30579180717468263, "step": 69570 }, { "epoch": 0.2987214823592042, "grad_norm": 0.06269654631614685, "learning_rate": 7.043065460534825e-05, "loss": 0.17386873960494995, "step": 69580 }, { "epoch": 0.2987644144492242, "grad_norm": 4.997331142425537, "learning_rate": 7.042634288523063e-05, "loss": 0.3920506715774536, "step": 69590 }, { "epoch": 0.29880734653924423, "grad_norm": 0.6335716843605042, "learning_rate": 7.042203116511301e-05, "loss": 0.16370421648025513, "step": 69600 }, { "epoch": 0.2988502786292642, "grad_norm": 0.012116850353777409, "learning_rate": 7.041771944499539e-05, "loss": 0.11120028495788574, "step": 69610 }, { "epoch": 0.29889321071928426, "grad_norm": 0.7054433226585388, "learning_rate": 7.041340772487776e-05, "loss": 0.1680054783821106, "step": 69620 }, { "epoch": 0.29893614280930425, "grad_norm": 0.049377284944057465, "learning_rate": 7.040909600476014e-05, "loss": 0.19020957946777345, "step": 69630 }, { "epoch": 0.29897907489932424, "grad_norm": 0.00042023861897177994, "learning_rate": 7.040478428464252e-05, "loss": 0.177628231048584, "step": 69640 }, { "epoch": 0.2990220069893443, "grad_norm": 0.062201373279094696, "learning_rate": 7.04004725645249e-05, "loss": 0.20424156188964843, "step": 69650 }, { "epoch": 0.29906493907936427, "grad_norm": 0.0926150307059288, "learning_rate": 7.039616084440726e-05, "loss": 0.30764992237091066, "step": 69660 }, { "epoch": 0.29910787116938425, "grad_norm": 0.006616492290049791, "learning_rate": 7.039184912428965e-05, "loss": 0.1089470624923706, "step": 69670 }, { "epoch": 0.2991508032594043, "grad_norm": 1.036904215812683, "learning_rate": 7.038753740417203e-05, "loss": 0.23160901069641113, "step": 69680 }, { "epoch": 0.2991937353494243, "grad_norm": 5.831925868988037, "learning_rate": 7.03832256840544e-05, "loss": 0.15994585752487184, "step": 69690 }, { "epoch": 0.29923666743944427, "grad_norm": 0.011926480568945408, "learning_rate": 7.037891396393678e-05, "loss": 0.10858608484268188, "step": 69700 }, { "epoch": 0.2992795995294643, "grad_norm": 2.732725143432617, "learning_rate": 7.037460224381916e-05, "loss": 0.327674412727356, "step": 69710 }, { "epoch": 0.2993225316194843, "grad_norm": 0.10609839856624603, "learning_rate": 7.037029052370154e-05, "loss": 0.38145086765289304, "step": 69720 }, { "epoch": 0.2993654637095043, "grad_norm": 1.7052667140960693, "learning_rate": 7.036597880358391e-05, "loss": 0.31780190467834474, "step": 69730 }, { "epoch": 0.29940839579952433, "grad_norm": 0.3516540229320526, "learning_rate": 7.036166708346628e-05, "loss": 0.22891452312469482, "step": 69740 }, { "epoch": 0.2994513278895443, "grad_norm": 6.445019721984863, "learning_rate": 7.035735536334866e-05, "loss": 0.1429811716079712, "step": 69750 }, { "epoch": 0.2994942599795643, "grad_norm": 0.27430006861686707, "learning_rate": 7.035304364323103e-05, "loss": 0.15352680683135986, "step": 69760 }, { "epoch": 0.29953719206958435, "grad_norm": 5.184157371520996, "learning_rate": 7.034873192311341e-05, "loss": 0.28881843090057374, "step": 69770 }, { "epoch": 0.29958012415960433, "grad_norm": 0.6079696416854858, "learning_rate": 7.034442020299579e-05, "loss": 0.32554547786712645, "step": 69780 }, { "epoch": 0.2996230562496243, "grad_norm": 1.9618726968765259, "learning_rate": 7.034010848287817e-05, "loss": 0.4187826156616211, "step": 69790 }, { "epoch": 0.29966598833964436, "grad_norm": 1.6079884767532349, "learning_rate": 7.033579676276054e-05, "loss": 0.1687183380126953, "step": 69800 }, { "epoch": 0.29970892042966435, "grad_norm": 2.7777047157287598, "learning_rate": 7.033148504264292e-05, "loss": 0.4267683506011963, "step": 69810 }, { "epoch": 0.29975185251968434, "grad_norm": 1.4220722913742065, "learning_rate": 7.03271733225253e-05, "loss": 0.2763724088668823, "step": 69820 }, { "epoch": 0.2997947846097044, "grad_norm": 0.5493017435073853, "learning_rate": 7.032286160240766e-05, "loss": 0.2672281265258789, "step": 69830 }, { "epoch": 0.29983771669972437, "grad_norm": 0.06145060807466507, "learning_rate": 7.031854988229004e-05, "loss": 0.35633857250213624, "step": 69840 }, { "epoch": 0.2998806487897444, "grad_norm": 0.10371090471744537, "learning_rate": 7.031423816217242e-05, "loss": 0.328914475440979, "step": 69850 }, { "epoch": 0.2999235808797644, "grad_norm": 2.0575599670410156, "learning_rate": 7.030992644205479e-05, "loss": 0.49924526214599607, "step": 69860 }, { "epoch": 0.2999665129697844, "grad_norm": 2.1705596446990967, "learning_rate": 7.030561472193717e-05, "loss": 0.24522743225097657, "step": 69870 }, { "epoch": 0.3000094450598044, "grad_norm": 0.055285509675741196, "learning_rate": 7.030130300181955e-05, "loss": 0.15846848487854004, "step": 69880 }, { "epoch": 0.3000523771498244, "grad_norm": 3.6779916286468506, "learning_rate": 7.029699128170192e-05, "loss": 0.29879536628723147, "step": 69890 }, { "epoch": 0.3000953092398444, "grad_norm": 12.207128524780273, "learning_rate": 7.02926795615843e-05, "loss": 0.31245851516723633, "step": 69900 }, { "epoch": 0.30013824132986444, "grad_norm": 0.012846590019762516, "learning_rate": 7.028836784146668e-05, "loss": 0.2062364101409912, "step": 69910 }, { "epoch": 0.30018117341988443, "grad_norm": 0.05187515914440155, "learning_rate": 7.028405612134906e-05, "loss": 0.18940675258636475, "step": 69920 }, { "epoch": 0.3002241055099044, "grad_norm": 1.3474045991897583, "learning_rate": 7.027974440123143e-05, "loss": 0.38222217559814453, "step": 69930 }, { "epoch": 0.30026703759992446, "grad_norm": 0.007387023419141769, "learning_rate": 7.027543268111381e-05, "loss": 0.1886347770690918, "step": 69940 }, { "epoch": 0.30030996968994444, "grad_norm": 0.03471825644373894, "learning_rate": 7.027112096099619e-05, "loss": 0.20559465885162354, "step": 69950 }, { "epoch": 0.30035290177996443, "grad_norm": 0.9603984951972961, "learning_rate": 7.026680924087857e-05, "loss": 0.23707988262176513, "step": 69960 }, { "epoch": 0.3003958338699845, "grad_norm": 0.7676080465316772, "learning_rate": 7.026249752076094e-05, "loss": 0.17447848320007325, "step": 69970 }, { "epoch": 0.30043876596000446, "grad_norm": 0.2487894594669342, "learning_rate": 7.025818580064332e-05, "loss": 0.06069231033325195, "step": 69980 }, { "epoch": 0.30048169805002445, "grad_norm": 1.2301409244537354, "learning_rate": 7.025387408052568e-05, "loss": 0.15381040573120117, "step": 69990 }, { "epoch": 0.3005246301400445, "grad_norm": 1.970497727394104, "learning_rate": 7.024956236040806e-05, "loss": 0.1707751750946045, "step": 70000 }, { "epoch": 0.3005246301400445, "eval_loss": 0.46616458892822266, "eval_runtime": 27.4872, "eval_samples_per_second": 3.638, "eval_steps_per_second": 3.638, "step": 70000 }, { "epoch": 0.3005675622300645, "grad_norm": 0.060369785875082016, "learning_rate": 7.024525064029044e-05, "loss": 0.07141480445861817, "step": 70010 }, { "epoch": 0.30061049432008446, "grad_norm": 5.950823783874512, "learning_rate": 7.024093892017282e-05, "loss": 0.28537325859069823, "step": 70020 }, { "epoch": 0.3006534264101045, "grad_norm": 0.11006926745176315, "learning_rate": 7.02366272000552e-05, "loss": 0.2802592754364014, "step": 70030 }, { "epoch": 0.3006963585001245, "grad_norm": 0.1244291216135025, "learning_rate": 7.023231547993757e-05, "loss": 0.04770747721195221, "step": 70040 }, { "epoch": 0.30073929059014454, "grad_norm": 0.04021500051021576, "learning_rate": 7.022800375981995e-05, "loss": 0.1004258632659912, "step": 70050 }, { "epoch": 0.3007822226801645, "grad_norm": 2.83384108543396, "learning_rate": 7.022369203970233e-05, "loss": 0.2835664987564087, "step": 70060 }, { "epoch": 0.3008251547701845, "grad_norm": 0.002115644747391343, "learning_rate": 7.021938031958469e-05, "loss": 0.3332578897476196, "step": 70070 }, { "epoch": 0.30086808686020455, "grad_norm": 0.010867947712540627, "learning_rate": 7.021506859946707e-05, "loss": 0.22531168460845946, "step": 70080 }, { "epoch": 0.30091101895022454, "grad_norm": 0.05015119910240173, "learning_rate": 7.021075687934944e-05, "loss": 0.05852286815643311, "step": 70090 }, { "epoch": 0.3009539510402445, "grad_norm": 2.656311273574829, "learning_rate": 7.020644515923182e-05, "loss": 0.24830961227416992, "step": 70100 }, { "epoch": 0.30099688313026457, "grad_norm": 1.9714524745941162, "learning_rate": 7.02021334391142e-05, "loss": 0.23079366683959962, "step": 70110 }, { "epoch": 0.30103981522028456, "grad_norm": 0.1279212236404419, "learning_rate": 7.019782171899658e-05, "loss": 0.46315436363220214, "step": 70120 }, { "epoch": 0.30108274731030454, "grad_norm": 2.974700450897217, "learning_rate": 7.019350999887895e-05, "loss": 0.3252775430679321, "step": 70130 }, { "epoch": 0.3011256794003246, "grad_norm": 0.02138546295464039, "learning_rate": 7.018919827876133e-05, "loss": 0.2596855401992798, "step": 70140 }, { "epoch": 0.3011686114903446, "grad_norm": 0.11534173041582108, "learning_rate": 7.018488655864371e-05, "loss": 0.149857234954834, "step": 70150 }, { "epoch": 0.30121154358036456, "grad_norm": 23.620689392089844, "learning_rate": 7.018057483852609e-05, "loss": 0.26149740219116213, "step": 70160 }, { "epoch": 0.3012544756703846, "grad_norm": 0.0409373976290226, "learning_rate": 7.017626311840846e-05, "loss": 0.09922696948051453, "step": 70170 }, { "epoch": 0.3012974077604046, "grad_norm": 0.11758884787559509, "learning_rate": 7.017195139829084e-05, "loss": 0.14113497734069824, "step": 70180 }, { "epoch": 0.3013403398504246, "grad_norm": 1.1054000854492188, "learning_rate": 7.016763967817322e-05, "loss": 0.04130702018737793, "step": 70190 }, { "epoch": 0.3013832719404446, "grad_norm": 0.002341791521757841, "learning_rate": 7.01633279580556e-05, "loss": 0.1668557643890381, "step": 70200 }, { "epoch": 0.3014262040304646, "grad_norm": 2.24662184715271, "learning_rate": 7.015901623793797e-05, "loss": 0.22402334213256836, "step": 70210 }, { "epoch": 0.3014691361204846, "grad_norm": 0.0016270908527076244, "learning_rate": 7.015470451782035e-05, "loss": 0.20528647899627686, "step": 70220 }, { "epoch": 0.30151206821050464, "grad_norm": 3.114729642868042, "learning_rate": 7.015039279770273e-05, "loss": 0.050827699899673465, "step": 70230 }, { "epoch": 0.3015550003005246, "grad_norm": 0.05784850940108299, "learning_rate": 7.014608107758509e-05, "loss": 0.2820718288421631, "step": 70240 }, { "epoch": 0.3015979323905446, "grad_norm": 2.5486347675323486, "learning_rate": 7.014176935746747e-05, "loss": 0.2247065305709839, "step": 70250 }, { "epoch": 0.30164086448056465, "grad_norm": 0.03793482482433319, "learning_rate": 7.013745763734985e-05, "loss": 0.37115206718444826, "step": 70260 }, { "epoch": 0.30168379657058464, "grad_norm": 2.88460373878479, "learning_rate": 7.013314591723222e-05, "loss": 0.31713476181030276, "step": 70270 }, { "epoch": 0.3017267286606047, "grad_norm": 0.14875128865242004, "learning_rate": 7.01288341971146e-05, "loss": 0.14124822616577148, "step": 70280 }, { "epoch": 0.30176966075062467, "grad_norm": 0.025722453370690346, "learning_rate": 7.012452247699698e-05, "loss": 0.4006799697875977, "step": 70290 }, { "epoch": 0.30181259284064466, "grad_norm": 0.25134965777397156, "learning_rate": 7.012021075687936e-05, "loss": 0.24204401969909667, "step": 70300 }, { "epoch": 0.3018555249306647, "grad_norm": 0.04924899339675903, "learning_rate": 7.011589903676173e-05, "loss": 0.13902003765106202, "step": 70310 }, { "epoch": 0.3018984570206847, "grad_norm": 0.13278919458389282, "learning_rate": 7.01115873166441e-05, "loss": 0.24274742603302002, "step": 70320 }, { "epoch": 0.30194138911070467, "grad_norm": 0.0036019114777445793, "learning_rate": 7.010727559652647e-05, "loss": 0.13215283155441285, "step": 70330 }, { "epoch": 0.3019843212007247, "grad_norm": 0.4529847800731659, "learning_rate": 7.010296387640885e-05, "loss": 0.2583324909210205, "step": 70340 }, { "epoch": 0.3020272532907447, "grad_norm": 23.820642471313477, "learning_rate": 7.009865215629123e-05, "loss": 0.20266332626342773, "step": 70350 }, { "epoch": 0.3020701853807647, "grad_norm": 0.018192386254668236, "learning_rate": 7.00943404361736e-05, "loss": 0.2031033754348755, "step": 70360 }, { "epoch": 0.30211311747078473, "grad_norm": 0.046048324555158615, "learning_rate": 7.009002871605598e-05, "loss": 0.1358464002609253, "step": 70370 }, { "epoch": 0.3021560495608047, "grad_norm": 0.020841889083385468, "learning_rate": 7.008571699593836e-05, "loss": 0.057205605506896975, "step": 70380 }, { "epoch": 0.3021989816508247, "grad_norm": 0.0114286495372653, "learning_rate": 7.008140527582074e-05, "loss": 0.2284595251083374, "step": 70390 }, { "epoch": 0.30224191374084475, "grad_norm": 0.003460024017840624, "learning_rate": 7.007709355570312e-05, "loss": 0.18587533235549927, "step": 70400 }, { "epoch": 0.30228484583086473, "grad_norm": 0.02072158455848694, "learning_rate": 7.007278183558549e-05, "loss": 0.14335600137710572, "step": 70410 }, { "epoch": 0.3023277779208847, "grad_norm": 0.047993358224630356, "learning_rate": 7.006847011546787e-05, "loss": 0.21287600994110106, "step": 70420 }, { "epoch": 0.30237071001090476, "grad_norm": 0.08274947106838226, "learning_rate": 7.006415839535025e-05, "loss": 0.2267000913619995, "step": 70430 }, { "epoch": 0.30241364210092475, "grad_norm": 0.0346342995762825, "learning_rate": 7.005984667523262e-05, "loss": 0.19687675237655639, "step": 70440 }, { "epoch": 0.30245657419094474, "grad_norm": 0.008197636343538761, "learning_rate": 7.0055534955115e-05, "loss": 0.29254143238067626, "step": 70450 }, { "epoch": 0.3024995062809648, "grad_norm": 0.18406546115875244, "learning_rate": 7.005122323499738e-05, "loss": 0.1060758113861084, "step": 70460 }, { "epoch": 0.30254243837098477, "grad_norm": 2.8977458477020264, "learning_rate": 7.004691151487976e-05, "loss": 0.2420039415359497, "step": 70470 }, { "epoch": 0.3025853704610048, "grad_norm": 1.7334059476852417, "learning_rate": 7.004259979476212e-05, "loss": 0.13808833360671996, "step": 70480 }, { "epoch": 0.3026283025510248, "grad_norm": 0.10556143522262573, "learning_rate": 7.00382880746445e-05, "loss": 0.15416749715805053, "step": 70490 }, { "epoch": 0.3026712346410448, "grad_norm": 1.188407301902771, "learning_rate": 7.003397635452688e-05, "loss": 0.1022602915763855, "step": 70500 }, { "epoch": 0.3027141667310648, "grad_norm": 0.012010055594146252, "learning_rate": 7.002966463440925e-05, "loss": 0.06482537388801575, "step": 70510 }, { "epoch": 0.3027570988210848, "grad_norm": 0.19251011312007904, "learning_rate": 7.002535291429163e-05, "loss": 0.5247401714324951, "step": 70520 }, { "epoch": 0.3028000309111048, "grad_norm": 0.18594901263713837, "learning_rate": 7.002104119417401e-05, "loss": 0.3951719760894775, "step": 70530 }, { "epoch": 0.30284296300112484, "grad_norm": 2.7943243980407715, "learning_rate": 7.001672947405638e-05, "loss": 0.4492646217346191, "step": 70540 }, { "epoch": 0.30288589509114483, "grad_norm": 0.014538971707224846, "learning_rate": 7.001241775393876e-05, "loss": 0.05542449355125427, "step": 70550 }, { "epoch": 0.3029288271811648, "grad_norm": 1.1423956155776978, "learning_rate": 7.000810603382114e-05, "loss": 0.2628788948059082, "step": 70560 }, { "epoch": 0.30297175927118486, "grad_norm": 2.0259108543395996, "learning_rate": 7.00037943137035e-05, "loss": 0.313212251663208, "step": 70570 }, { "epoch": 0.30301469136120485, "grad_norm": 0.6860762238502502, "learning_rate": 6.999948259358588e-05, "loss": 0.31650581359863283, "step": 70580 }, { "epoch": 0.30305762345122483, "grad_norm": 0.48423272371292114, "learning_rate": 6.999517087346826e-05, "loss": 0.5084094524383544, "step": 70590 }, { "epoch": 0.3031005555412449, "grad_norm": 0.3925926387310028, "learning_rate": 6.999085915335063e-05, "loss": 0.2517316102981567, "step": 70600 }, { "epoch": 0.30314348763126486, "grad_norm": 2.200504779815674, "learning_rate": 6.998654743323301e-05, "loss": 0.42492990493774413, "step": 70610 }, { "epoch": 0.30318641972128485, "grad_norm": 0.0720943883061409, "learning_rate": 6.998223571311539e-05, "loss": 0.1834435820579529, "step": 70620 }, { "epoch": 0.3032293518113049, "grad_norm": 0.38618120551109314, "learning_rate": 6.997792399299777e-05, "loss": 0.11539990901947021, "step": 70630 }, { "epoch": 0.3032722839013249, "grad_norm": 0.1201120987534523, "learning_rate": 6.997361227288016e-05, "loss": 0.3394498348236084, "step": 70640 }, { "epoch": 0.30331521599134487, "grad_norm": 1.1716912984848022, "learning_rate": 6.996930055276252e-05, "loss": 0.1997692346572876, "step": 70650 }, { "epoch": 0.3033581480813649, "grad_norm": 0.014452880248427391, "learning_rate": 6.99649888326449e-05, "loss": 0.3649930953979492, "step": 70660 }, { "epoch": 0.3034010801713849, "grad_norm": 0.08201514929533005, "learning_rate": 6.996067711252728e-05, "loss": 0.2555066108703613, "step": 70670 }, { "epoch": 0.3034440122614049, "grad_norm": 0.1451413631439209, "learning_rate": 6.995636539240965e-05, "loss": 0.10937966108322143, "step": 70680 }, { "epoch": 0.3034869443514249, "grad_norm": 5.239630222320557, "learning_rate": 6.995205367229203e-05, "loss": 0.3625665187835693, "step": 70690 }, { "epoch": 0.3035298764414449, "grad_norm": 0.008332494646310806, "learning_rate": 6.994774195217441e-05, "loss": 0.2099602222442627, "step": 70700 }, { "epoch": 0.30357280853146495, "grad_norm": 0.022745434194803238, "learning_rate": 6.994343023205679e-05, "loss": 0.10325167179107667, "step": 70710 }, { "epoch": 0.30361574062148494, "grad_norm": 0.854358434677124, "learning_rate": 6.993911851193916e-05, "loss": 0.14770570993423462, "step": 70720 }, { "epoch": 0.30365867271150493, "grad_norm": 0.2198524922132492, "learning_rate": 6.993480679182153e-05, "loss": 0.3121105432510376, "step": 70730 }, { "epoch": 0.30370160480152497, "grad_norm": 0.21637657284736633, "learning_rate": 6.99304950717039e-05, "loss": 0.28340332508087157, "step": 70740 }, { "epoch": 0.30374453689154496, "grad_norm": 1.856569766998291, "learning_rate": 6.992618335158628e-05, "loss": 0.2528532028198242, "step": 70750 }, { "epoch": 0.30378746898156495, "grad_norm": 0.015768440440297127, "learning_rate": 6.992187163146866e-05, "loss": 0.3581262111663818, "step": 70760 }, { "epoch": 0.303830401071585, "grad_norm": 1.2636868953704834, "learning_rate": 6.991755991135104e-05, "loss": 0.25124645233154297, "step": 70770 }, { "epoch": 0.303873333161605, "grad_norm": 0.03816381096839905, "learning_rate": 6.991324819123341e-05, "loss": 0.13545433282852173, "step": 70780 }, { "epoch": 0.30391626525162496, "grad_norm": 29.71138572692871, "learning_rate": 6.990893647111579e-05, "loss": 0.26535260677337646, "step": 70790 }, { "epoch": 0.303959197341645, "grad_norm": 0.0020745396614074707, "learning_rate": 6.990462475099817e-05, "loss": 0.08012622594833374, "step": 70800 }, { "epoch": 0.304002129431665, "grad_norm": 0.01548818126320839, "learning_rate": 6.990031303088053e-05, "loss": 0.22493376731872558, "step": 70810 }, { "epoch": 0.304045061521685, "grad_norm": 0.6447784304618835, "learning_rate": 6.989600131076291e-05, "loss": 0.2580659627914429, "step": 70820 }, { "epoch": 0.304087993611705, "grad_norm": 0.018228799104690552, "learning_rate": 6.989168959064529e-05, "loss": 0.08474425077438355, "step": 70830 }, { "epoch": 0.304130925701725, "grad_norm": 1.223036527633667, "learning_rate": 6.988737787052766e-05, "loss": 0.5076069831848145, "step": 70840 }, { "epoch": 0.304173857791745, "grad_norm": 67.93170166015625, "learning_rate": 6.988306615041004e-05, "loss": 0.23856496810913086, "step": 70850 }, { "epoch": 0.30421678988176504, "grad_norm": 1.5760152339935303, "learning_rate": 6.987875443029243e-05, "loss": 0.3166388988494873, "step": 70860 }, { "epoch": 0.304259721971785, "grad_norm": 0.09973370283842087, "learning_rate": 6.987444271017481e-05, "loss": 0.1548475742340088, "step": 70870 }, { "epoch": 0.304302654061805, "grad_norm": 0.08377603441476822, "learning_rate": 6.987013099005719e-05, "loss": 0.3000709295272827, "step": 70880 }, { "epoch": 0.30434558615182505, "grad_norm": 1.1292842626571655, "learning_rate": 6.986581926993955e-05, "loss": 0.2452075958251953, "step": 70890 }, { "epoch": 0.30438851824184504, "grad_norm": 0.024154068902134895, "learning_rate": 6.986150754982193e-05, "loss": 0.12045891284942627, "step": 70900 }, { "epoch": 0.3044314503318651, "grad_norm": 3.3469178676605225, "learning_rate": 6.98571958297043e-05, "loss": 0.2237870454788208, "step": 70910 }, { "epoch": 0.30447438242188507, "grad_norm": 0.032343361526727676, "learning_rate": 6.985288410958668e-05, "loss": 0.15854912996292114, "step": 70920 }, { "epoch": 0.30451731451190506, "grad_norm": 0.8938616514205933, "learning_rate": 6.984857238946906e-05, "loss": 0.29627814292907717, "step": 70930 }, { "epoch": 0.3045602466019251, "grad_norm": 0.006615492049604654, "learning_rate": 6.984426066935144e-05, "loss": 0.10426685810089112, "step": 70940 }, { "epoch": 0.3046031786919451, "grad_norm": 0.1046452522277832, "learning_rate": 6.983994894923381e-05, "loss": 0.23253164291381836, "step": 70950 }, { "epoch": 0.3046461107819651, "grad_norm": 0.8799397349357605, "learning_rate": 6.983563722911619e-05, "loss": 0.1915224313735962, "step": 70960 }, { "epoch": 0.3046890428719851, "grad_norm": 0.2595174312591553, "learning_rate": 6.983132550899857e-05, "loss": 0.12004296779632569, "step": 70970 }, { "epoch": 0.3047319749620051, "grad_norm": 0.1250929832458496, "learning_rate": 6.982701378888093e-05, "loss": 0.2012336254119873, "step": 70980 }, { "epoch": 0.3047749070520251, "grad_norm": 0.023574380204081535, "learning_rate": 6.982270206876331e-05, "loss": 0.2144022226333618, "step": 70990 }, { "epoch": 0.30481783914204513, "grad_norm": 8.696722984313965, "learning_rate": 6.981839034864569e-05, "loss": 0.17126057147979737, "step": 71000 }, { "epoch": 0.30481783914204513, "eval_loss": 0.4333540201187134, "eval_runtime": 27.4472, "eval_samples_per_second": 3.643, "eval_steps_per_second": 3.643, "step": 71000 }, { "epoch": 0.3048607712320651, "grad_norm": 0.08526608347892761, "learning_rate": 6.981407862852807e-05, "loss": 0.0943373441696167, "step": 71010 }, { "epoch": 0.3049037033220851, "grad_norm": 0.00957313273102045, "learning_rate": 6.980976690841044e-05, "loss": 0.2425380229949951, "step": 71020 }, { "epoch": 0.30494663541210515, "grad_norm": 0.13296842575073242, "learning_rate": 6.980545518829282e-05, "loss": 0.21628108024597167, "step": 71030 }, { "epoch": 0.30498956750212514, "grad_norm": 0.4499169886112213, "learning_rate": 6.98011434681752e-05, "loss": 0.31735036373138426, "step": 71040 }, { "epoch": 0.3050324995921451, "grad_norm": 2.7393083572387695, "learning_rate": 6.979683174805757e-05, "loss": 0.49162960052490234, "step": 71050 }, { "epoch": 0.30507543168216517, "grad_norm": 17.325475692749023, "learning_rate": 6.979252002793994e-05, "loss": 0.2496135950088501, "step": 71060 }, { "epoch": 0.30511836377218515, "grad_norm": 0.9037123918533325, "learning_rate": 6.978820830782232e-05, "loss": 0.2521227836608887, "step": 71070 }, { "epoch": 0.30516129586220514, "grad_norm": 2.061171293258667, "learning_rate": 6.97838965877047e-05, "loss": 0.3289464235305786, "step": 71080 }, { "epoch": 0.3052042279522252, "grad_norm": 0.059992242604494095, "learning_rate": 6.977958486758708e-05, "loss": 0.1082227110862732, "step": 71090 }, { "epoch": 0.30524716004224517, "grad_norm": 0.16897283494472504, "learning_rate": 6.977527314746946e-05, "loss": 0.27176353931427, "step": 71100 }, { "epoch": 0.30529009213226516, "grad_norm": 1.2346889972686768, "learning_rate": 6.977096142735184e-05, "loss": 0.3244923114776611, "step": 71110 }, { "epoch": 0.3053330242222852, "grad_norm": 1.0333776473999023, "learning_rate": 6.976664970723422e-05, "loss": 0.28680243492126467, "step": 71120 }, { "epoch": 0.3053759563123052, "grad_norm": 3.706778049468994, "learning_rate": 6.97623379871166e-05, "loss": 0.28829283714294435, "step": 71130 }, { "epoch": 0.30541888840232523, "grad_norm": 0.7992908358573914, "learning_rate": 6.975802626699896e-05, "loss": 0.19528234004974365, "step": 71140 }, { "epoch": 0.3054618204923452, "grad_norm": 0.07427681982517242, "learning_rate": 6.975371454688133e-05, "loss": 0.1444224238395691, "step": 71150 }, { "epoch": 0.3055047525823652, "grad_norm": 0.055710192769765854, "learning_rate": 6.974940282676371e-05, "loss": 0.14568614959716797, "step": 71160 }, { "epoch": 0.30554768467238524, "grad_norm": 0.008985698223114014, "learning_rate": 6.974509110664609e-05, "loss": 0.41927037239074705, "step": 71170 }, { "epoch": 0.30559061676240523, "grad_norm": 0.0678943544626236, "learning_rate": 6.974077938652847e-05, "loss": 0.24876203536987304, "step": 71180 }, { "epoch": 0.3056335488524252, "grad_norm": 0.27909696102142334, "learning_rate": 6.973646766641084e-05, "loss": 0.20138399600982665, "step": 71190 }, { "epoch": 0.30567648094244526, "grad_norm": 0.06332625448703766, "learning_rate": 6.973215594629322e-05, "loss": 0.41253089904785156, "step": 71200 }, { "epoch": 0.30571941303246525, "grad_norm": 3.1160600185394287, "learning_rate": 6.97278442261756e-05, "loss": 0.4033061027526855, "step": 71210 }, { "epoch": 0.30576234512248524, "grad_norm": 0.02980274148285389, "learning_rate": 6.972353250605796e-05, "loss": 0.19519845247268677, "step": 71220 }, { "epoch": 0.3058052772125053, "grad_norm": 0.04582500085234642, "learning_rate": 6.971922078594034e-05, "loss": 0.21372950077056885, "step": 71230 }, { "epoch": 0.30584820930252526, "grad_norm": 1.7604162693023682, "learning_rate": 6.971490906582272e-05, "loss": 0.14104797840118408, "step": 71240 }, { "epoch": 0.30589114139254525, "grad_norm": 2.9321041107177734, "learning_rate": 6.97105973457051e-05, "loss": 0.27418735027313235, "step": 71250 }, { "epoch": 0.3059340734825653, "grad_norm": 0.4169948101043701, "learning_rate": 6.970628562558747e-05, "loss": 0.2097461462020874, "step": 71260 }, { "epoch": 0.3059770055725853, "grad_norm": 0.19751910865306854, "learning_rate": 6.970197390546985e-05, "loss": 0.10453450679779053, "step": 71270 }, { "epoch": 0.30601993766260527, "grad_norm": 5.6523637771606445, "learning_rate": 6.969766218535223e-05, "loss": 0.37703814506530764, "step": 71280 }, { "epoch": 0.3060628697526253, "grad_norm": 0.0058932071551680565, "learning_rate": 6.96933504652346e-05, "loss": 0.11596277952194214, "step": 71290 }, { "epoch": 0.3061058018426453, "grad_norm": 2.179844617843628, "learning_rate": 6.968903874511698e-05, "loss": 0.3431446075439453, "step": 71300 }, { "epoch": 0.3061487339326653, "grad_norm": 4.3101043701171875, "learning_rate": 6.968472702499936e-05, "loss": 0.21665070056915284, "step": 71310 }, { "epoch": 0.3061916660226853, "grad_norm": 0.08224528282880783, "learning_rate": 6.968041530488174e-05, "loss": 0.2836304664611816, "step": 71320 }, { "epoch": 0.3062345981127053, "grad_norm": 1.7605150938034058, "learning_rate": 6.967610358476411e-05, "loss": 0.1909969210624695, "step": 71330 }, { "epoch": 0.30627753020272536, "grad_norm": 0.15108336508274078, "learning_rate": 6.967179186464649e-05, "loss": 0.19276317358016967, "step": 71340 }, { "epoch": 0.30632046229274534, "grad_norm": 0.17520533502101898, "learning_rate": 6.966748014452887e-05, "loss": 0.3102010011672974, "step": 71350 }, { "epoch": 0.30636339438276533, "grad_norm": 0.004262985661625862, "learning_rate": 6.966316842441125e-05, "loss": 0.21410059928894043, "step": 71360 }, { "epoch": 0.3064063264727854, "grad_norm": 0.07552611082792282, "learning_rate": 6.965885670429362e-05, "loss": 0.12926443815231323, "step": 71370 }, { "epoch": 0.30644925856280536, "grad_norm": 0.0392976850271225, "learning_rate": 6.9654544984176e-05, "loss": 0.3050688743591309, "step": 71380 }, { "epoch": 0.30649219065282535, "grad_norm": 0.03745187073945999, "learning_rate": 6.965023326405836e-05, "loss": 0.13380508422851561, "step": 71390 }, { "epoch": 0.3065351227428454, "grad_norm": 6.481908798217773, "learning_rate": 6.964592154394074e-05, "loss": 0.3676483631134033, "step": 71400 }, { "epoch": 0.3065780548328654, "grad_norm": 0.06287180632352829, "learning_rate": 6.964160982382312e-05, "loss": 0.20733683109283446, "step": 71410 }, { "epoch": 0.30662098692288536, "grad_norm": 0.9534348249435425, "learning_rate": 6.96372981037055e-05, "loss": 0.3034853458404541, "step": 71420 }, { "epoch": 0.3066639190129054, "grad_norm": 0.8343759179115295, "learning_rate": 6.963298638358787e-05, "loss": 0.18012170791625975, "step": 71430 }, { "epoch": 0.3067068511029254, "grad_norm": 0.07756144553422928, "learning_rate": 6.962867466347025e-05, "loss": 0.29481561183929444, "step": 71440 }, { "epoch": 0.3067497831929454, "grad_norm": 0.06801965832710266, "learning_rate": 6.962436294335263e-05, "loss": 0.3844615459442139, "step": 71450 }, { "epoch": 0.3067927152829654, "grad_norm": 0.026445409283041954, "learning_rate": 6.9620051223235e-05, "loss": 0.33446853160858153, "step": 71460 }, { "epoch": 0.3068356473729854, "grad_norm": 0.30800873041152954, "learning_rate": 6.961573950311737e-05, "loss": 0.18386055231094361, "step": 71470 }, { "epoch": 0.3068785794630054, "grad_norm": 0.5849953889846802, "learning_rate": 6.961142778299975e-05, "loss": 0.1202467918395996, "step": 71480 }, { "epoch": 0.30692151155302544, "grad_norm": 0.10062210261821747, "learning_rate": 6.960711606288212e-05, "loss": 0.23699476718902587, "step": 71490 }, { "epoch": 0.3069644436430454, "grad_norm": 1.1067657470703125, "learning_rate": 6.96028043427645e-05, "loss": 0.2898426532745361, "step": 71500 }, { "epoch": 0.3070073757330654, "grad_norm": 0.0480431467294693, "learning_rate": 6.959849262264688e-05, "loss": 0.18034435510635377, "step": 71510 }, { "epoch": 0.30705030782308546, "grad_norm": 0.010232225991785526, "learning_rate": 6.959418090252926e-05, "loss": 0.20612285137176514, "step": 71520 }, { "epoch": 0.30709323991310544, "grad_norm": 0.0968809723854065, "learning_rate": 6.958986918241163e-05, "loss": 0.20248787403106688, "step": 71530 }, { "epoch": 0.30713617200312543, "grad_norm": 0.018053434789180756, "learning_rate": 6.958555746229401e-05, "loss": 0.34410016536712645, "step": 71540 }, { "epoch": 0.30717910409314547, "grad_norm": 0.11472825706005096, "learning_rate": 6.958124574217639e-05, "loss": 0.283540153503418, "step": 71550 }, { "epoch": 0.30722203618316546, "grad_norm": 0.10586521029472351, "learning_rate": 6.957693402205876e-05, "loss": 0.004880695044994355, "step": 71560 }, { "epoch": 0.3072649682731855, "grad_norm": 4.804992198944092, "learning_rate": 6.957262230194114e-05, "loss": 0.2283937931060791, "step": 71570 }, { "epoch": 0.3073079003632055, "grad_norm": 0.0098240552470088, "learning_rate": 6.956831058182352e-05, "loss": 0.21513535976409912, "step": 71580 }, { "epoch": 0.3073508324532255, "grad_norm": 0.007225578185170889, "learning_rate": 6.95639988617059e-05, "loss": 0.1269293785095215, "step": 71590 }, { "epoch": 0.3073937645432455, "grad_norm": 1.4325543642044067, "learning_rate": 6.955968714158827e-05, "loss": 0.23351283073425294, "step": 71600 }, { "epoch": 0.3074366966332655, "grad_norm": 0.5364208221435547, "learning_rate": 6.955537542147065e-05, "loss": 0.1389673113822937, "step": 71610 }, { "epoch": 0.3074796287232855, "grad_norm": 0.0047192987985908985, "learning_rate": 6.955106370135303e-05, "loss": 0.47339601516723634, "step": 71620 }, { "epoch": 0.30752256081330553, "grad_norm": 0.027396438643336296, "learning_rate": 6.95467519812354e-05, "loss": 0.29778280258178713, "step": 71630 }, { "epoch": 0.3075654929033255, "grad_norm": 0.6361259818077087, "learning_rate": 6.954244026111777e-05, "loss": 0.3184041500091553, "step": 71640 }, { "epoch": 0.3076084249933455, "grad_norm": 0.19302690029144287, "learning_rate": 6.953812854100015e-05, "loss": 0.009601826965808868, "step": 71650 }, { "epoch": 0.30765135708336555, "grad_norm": 2.3101580142974854, "learning_rate": 6.953381682088252e-05, "loss": 0.17634633779525757, "step": 71660 }, { "epoch": 0.30769428917338554, "grad_norm": 0.10807690024375916, "learning_rate": 6.95295051007649e-05, "loss": 0.35539629459381106, "step": 71670 }, { "epoch": 0.3077372212634055, "grad_norm": 0.008286534808576107, "learning_rate": 6.952519338064728e-05, "loss": 0.10938578844070435, "step": 71680 }, { "epoch": 0.30778015335342557, "grad_norm": 0.8377420902252197, "learning_rate": 6.952088166052966e-05, "loss": 0.27280521392822266, "step": 71690 }, { "epoch": 0.30782308544344555, "grad_norm": 5.454530715942383, "learning_rate": 6.951656994041203e-05, "loss": 0.4018850326538086, "step": 71700 }, { "epoch": 0.30786601753346554, "grad_norm": 0.4904543161392212, "learning_rate": 6.951225822029441e-05, "loss": 0.391569995880127, "step": 71710 }, { "epoch": 0.3079089496234856, "grad_norm": 0.6449229121208191, "learning_rate": 6.950794650017678e-05, "loss": 0.052797901630401614, "step": 71720 }, { "epoch": 0.30795188171350557, "grad_norm": 2.045611619949341, "learning_rate": 6.950363478005915e-05, "loss": 0.2161909818649292, "step": 71730 }, { "epoch": 0.30799481380352556, "grad_norm": 0.11193674057722092, "learning_rate": 6.949932305994153e-05, "loss": 0.2853853225708008, "step": 71740 }, { "epoch": 0.3080377458935456, "grad_norm": 0.03395124524831772, "learning_rate": 6.949501133982391e-05, "loss": 0.38172454833984376, "step": 71750 }, { "epoch": 0.3080806779835656, "grad_norm": 9.028738975524902, "learning_rate": 6.949069961970628e-05, "loss": 0.1702930212020874, "step": 71760 }, { "epoch": 0.30812361007358563, "grad_norm": 0.923577606678009, "learning_rate": 6.948638789958866e-05, "loss": 0.2921849966049194, "step": 71770 }, { "epoch": 0.3081665421636056, "grad_norm": 0.685820460319519, "learning_rate": 6.948207617947104e-05, "loss": 0.22313647270202636, "step": 71780 }, { "epoch": 0.3082094742536256, "grad_norm": 3.264664888381958, "learning_rate": 6.947776445935342e-05, "loss": 0.05310268402099609, "step": 71790 }, { "epoch": 0.30825240634364565, "grad_norm": 14.52664566040039, "learning_rate": 6.94734527392358e-05, "loss": 0.28851985931396484, "step": 71800 }, { "epoch": 0.30829533843366563, "grad_norm": 13.558172225952148, "learning_rate": 6.946914101911817e-05, "loss": 0.054348152875900266, "step": 71810 }, { "epoch": 0.3083382705236856, "grad_norm": 0.22968202829360962, "learning_rate": 6.946482929900055e-05, "loss": 0.23391096591949462, "step": 71820 }, { "epoch": 0.30838120261370566, "grad_norm": 0.044967085123062134, "learning_rate": 6.946051757888293e-05, "loss": 0.35891730785369874, "step": 71830 }, { "epoch": 0.30842413470372565, "grad_norm": 2.2250916957855225, "learning_rate": 6.94562058587653e-05, "loss": 0.28689022064208985, "step": 71840 }, { "epoch": 0.30846706679374564, "grad_norm": 3.7705438137054443, "learning_rate": 6.945189413864768e-05, "loss": 0.3127246379852295, "step": 71850 }, { "epoch": 0.3085099988837657, "grad_norm": 0.48120859265327454, "learning_rate": 6.944758241853006e-05, "loss": 0.17397620677947997, "step": 71860 }, { "epoch": 0.30855293097378567, "grad_norm": 17.947019577026367, "learning_rate": 6.944327069841244e-05, "loss": 0.32586348056793213, "step": 71870 }, { "epoch": 0.30859586306380565, "grad_norm": 1.6339031457901, "learning_rate": 6.94389589782948e-05, "loss": 0.3424349308013916, "step": 71880 }, { "epoch": 0.3086387951538257, "grad_norm": 1.6345598697662354, "learning_rate": 6.943464725817718e-05, "loss": 0.07327746748924255, "step": 71890 }, { "epoch": 0.3086817272438457, "grad_norm": 0.05315762758255005, "learning_rate": 6.943033553805955e-05, "loss": 0.2570945262908936, "step": 71900 }, { "epoch": 0.30872465933386567, "grad_norm": 0.1542612761259079, "learning_rate": 6.942602381794193e-05, "loss": 0.34127881526947024, "step": 71910 }, { "epoch": 0.3087675914238857, "grad_norm": 0.016485799103975296, "learning_rate": 6.942171209782431e-05, "loss": 0.2881777286529541, "step": 71920 }, { "epoch": 0.3088105235139057, "grad_norm": 0.6435169577598572, "learning_rate": 6.941740037770669e-05, "loss": 0.2654039144515991, "step": 71930 }, { "epoch": 0.3088534556039257, "grad_norm": 1.8059978485107422, "learning_rate": 6.941308865758906e-05, "loss": 0.27082371711730957, "step": 71940 }, { "epoch": 0.30889638769394573, "grad_norm": 2.602182149887085, "learning_rate": 6.940877693747144e-05, "loss": 0.22784204483032228, "step": 71950 }, { "epoch": 0.3089393197839657, "grad_norm": 2.098834991455078, "learning_rate": 6.94044652173538e-05, "loss": 0.32148375511169436, "step": 71960 }, { "epoch": 0.3089822518739857, "grad_norm": 0.5187646150588989, "learning_rate": 6.940015349723618e-05, "loss": 0.15208892822265624, "step": 71970 }, { "epoch": 0.30902518396400575, "grad_norm": 0.12590381503105164, "learning_rate": 6.939584177711856e-05, "loss": 0.15220378637313842, "step": 71980 }, { "epoch": 0.30906811605402573, "grad_norm": 2.8593339920043945, "learning_rate": 6.939153005700094e-05, "loss": 0.668053674697876, "step": 71990 }, { "epoch": 0.3091110481440458, "grad_norm": 0.256454735994339, "learning_rate": 6.938721833688331e-05, "loss": 0.17973003387451172, "step": 72000 }, { "epoch": 0.3091110481440458, "eval_loss": 0.42271023988723755, "eval_runtime": 27.4321, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 72000 }, { "epoch": 0.30915398023406576, "grad_norm": 0.047602299600839615, "learning_rate": 6.938290661676569e-05, "loss": 0.2981420516967773, "step": 72010 }, { "epoch": 0.30919691232408575, "grad_norm": 0.27392998337745667, "learning_rate": 6.937859489664807e-05, "loss": 0.25982108116149905, "step": 72020 }, { "epoch": 0.3092398444141058, "grad_norm": 0.010714475996792316, "learning_rate": 6.937428317653045e-05, "loss": 0.28580350875854493, "step": 72030 }, { "epoch": 0.3092827765041258, "grad_norm": 1.4530272483825684, "learning_rate": 6.936997145641284e-05, "loss": 0.1686078667640686, "step": 72040 }, { "epoch": 0.30932570859414577, "grad_norm": 0.15697383880615234, "learning_rate": 6.93656597362952e-05, "loss": 0.10101045370101928, "step": 72050 }, { "epoch": 0.3093686406841658, "grad_norm": 0.018088942393660545, "learning_rate": 6.936134801617758e-05, "loss": 0.16199415922164917, "step": 72060 }, { "epoch": 0.3094115727741858, "grad_norm": 8.666268348693848, "learning_rate": 6.935703629605996e-05, "loss": 0.3642237424850464, "step": 72070 }, { "epoch": 0.3094545048642058, "grad_norm": 0.11770039796829224, "learning_rate": 6.935272457594233e-05, "loss": 0.23574357032775878, "step": 72080 }, { "epoch": 0.3094974369542258, "grad_norm": 0.0496184416115284, "learning_rate": 6.934841285582471e-05, "loss": 0.05372920632362366, "step": 72090 }, { "epoch": 0.3095403690442458, "grad_norm": 29.345449447631836, "learning_rate": 6.934410113570709e-05, "loss": 0.2644599437713623, "step": 72100 }, { "epoch": 0.3095833011342658, "grad_norm": 2.0647425651550293, "learning_rate": 6.933978941558946e-05, "loss": 0.16928220987319947, "step": 72110 }, { "epoch": 0.30962623322428584, "grad_norm": 0.017716316506266594, "learning_rate": 6.933547769547184e-05, "loss": 0.2569295406341553, "step": 72120 }, { "epoch": 0.3096691653143058, "grad_norm": 0.0034017576836049557, "learning_rate": 6.93311659753542e-05, "loss": 0.20230543613433838, "step": 72130 }, { "epoch": 0.3097120974043258, "grad_norm": 0.9167028069496155, "learning_rate": 6.932685425523658e-05, "loss": 0.22303533554077148, "step": 72140 }, { "epoch": 0.30975502949434586, "grad_norm": 1.8220903873443604, "learning_rate": 6.932254253511896e-05, "loss": 0.28549671173095703, "step": 72150 }, { "epoch": 0.30979796158436584, "grad_norm": 0.004097741097211838, "learning_rate": 6.931823081500134e-05, "loss": 0.07687397599220276, "step": 72160 }, { "epoch": 0.30984089367438583, "grad_norm": 0.12848952412605286, "learning_rate": 6.931391909488372e-05, "loss": 0.18670018911361694, "step": 72170 }, { "epoch": 0.3098838257644059, "grad_norm": 1.0430200099945068, "learning_rate": 6.930960737476609e-05, "loss": 0.27053136825561525, "step": 72180 }, { "epoch": 0.30992675785442586, "grad_norm": 1.271836757659912, "learning_rate": 6.930529565464847e-05, "loss": 0.28454022407531737, "step": 72190 }, { "epoch": 0.3099696899444459, "grad_norm": 0.024914631620049477, "learning_rate": 6.930098393453085e-05, "loss": 0.15766094923019408, "step": 72200 }, { "epoch": 0.3100126220344659, "grad_norm": 2.382819652557373, "learning_rate": 6.929667221441321e-05, "loss": 0.32537169456481935, "step": 72210 }, { "epoch": 0.3100555541244859, "grad_norm": 0.07590825110673904, "learning_rate": 6.929236049429559e-05, "loss": 0.2822270393371582, "step": 72220 }, { "epoch": 0.3100984862145059, "grad_norm": 2.425994873046875, "learning_rate": 6.928804877417797e-05, "loss": 0.207551908493042, "step": 72230 }, { "epoch": 0.3101414183045259, "grad_norm": 0.014780079945921898, "learning_rate": 6.928373705406034e-05, "loss": 0.2179882287979126, "step": 72240 }, { "epoch": 0.3101843503945459, "grad_norm": 0.013886822387576103, "learning_rate": 6.927942533394272e-05, "loss": 0.3754821062088013, "step": 72250 }, { "epoch": 0.31022728248456594, "grad_norm": 0.035959966480731964, "learning_rate": 6.927511361382511e-05, "loss": 0.1514652729034424, "step": 72260 }, { "epoch": 0.3102702145745859, "grad_norm": 0.0032398924231529236, "learning_rate": 6.927080189370749e-05, "loss": 0.1065395712852478, "step": 72270 }, { "epoch": 0.3103131466646059, "grad_norm": 0.05704474821686745, "learning_rate": 6.926649017358987e-05, "loss": 0.051316624879837035, "step": 72280 }, { "epoch": 0.31035607875462595, "grad_norm": 0.048175569623708725, "learning_rate": 6.926217845347223e-05, "loss": 0.3055478572845459, "step": 72290 }, { "epoch": 0.31039901084464594, "grad_norm": 1.6316146850585938, "learning_rate": 6.925786673335461e-05, "loss": 0.10036553144454956, "step": 72300 }, { "epoch": 0.3104419429346659, "grad_norm": 0.14009523391723633, "learning_rate": 6.925355501323698e-05, "loss": 0.21372523307800292, "step": 72310 }, { "epoch": 0.31048487502468597, "grad_norm": 0.007102209143340588, "learning_rate": 6.924924329311936e-05, "loss": 0.1738092541694641, "step": 72320 }, { "epoch": 0.31052780711470596, "grad_norm": 1.0038173198699951, "learning_rate": 6.924493157300174e-05, "loss": 0.2518671989440918, "step": 72330 }, { "epoch": 0.31057073920472594, "grad_norm": 0.21360069513320923, "learning_rate": 6.924061985288412e-05, "loss": 0.15478349924087526, "step": 72340 }, { "epoch": 0.310613671294746, "grad_norm": 0.0035239006392657757, "learning_rate": 6.92363081327665e-05, "loss": 0.1550774335861206, "step": 72350 }, { "epoch": 0.310656603384766, "grad_norm": 0.20199644565582275, "learning_rate": 6.923199641264887e-05, "loss": 0.22670469284057618, "step": 72360 }, { "epoch": 0.31069953547478596, "grad_norm": 0.1095641553401947, "learning_rate": 6.922768469253125e-05, "loss": 0.34234280586242677, "step": 72370 }, { "epoch": 0.310742467564806, "grad_norm": 0.009776102378964424, "learning_rate": 6.922337297241361e-05, "loss": 0.20321106910705566, "step": 72380 }, { "epoch": 0.310785399654826, "grad_norm": 0.00039248340181075037, "learning_rate": 6.921906125229599e-05, "loss": 0.2770804166793823, "step": 72390 }, { "epoch": 0.310828331744846, "grad_norm": 6.714611530303955, "learning_rate": 6.921474953217837e-05, "loss": 0.3584035873413086, "step": 72400 }, { "epoch": 0.310871263834866, "grad_norm": 0.2857927680015564, "learning_rate": 6.921043781206074e-05, "loss": 0.1929535150527954, "step": 72410 }, { "epoch": 0.310914195924886, "grad_norm": 1.3031257390975952, "learning_rate": 6.920612609194312e-05, "loss": 0.20522711277008057, "step": 72420 }, { "epoch": 0.31095712801490605, "grad_norm": 0.08546615391969681, "learning_rate": 6.92018143718255e-05, "loss": 0.02166993319988251, "step": 72430 }, { "epoch": 0.31100006010492603, "grad_norm": 1.505024790763855, "learning_rate": 6.919750265170788e-05, "loss": 0.45034017562866213, "step": 72440 }, { "epoch": 0.311042992194946, "grad_norm": 0.022659284994006157, "learning_rate": 6.919319093159025e-05, "loss": 0.1870231509208679, "step": 72450 }, { "epoch": 0.31108592428496606, "grad_norm": 1.7719067335128784, "learning_rate": 6.918887921147262e-05, "loss": 0.2802696228027344, "step": 72460 }, { "epoch": 0.31112885637498605, "grad_norm": 2.1437034606933594, "learning_rate": 6.9184567491355e-05, "loss": 0.332187294960022, "step": 72470 }, { "epoch": 0.31117178846500604, "grad_norm": 4.503712177276611, "learning_rate": 6.918025577123739e-05, "loss": 0.2613640308380127, "step": 72480 }, { "epoch": 0.3112147205550261, "grad_norm": 2.8580639362335205, "learning_rate": 6.917594405111976e-05, "loss": 0.42650775909423827, "step": 72490 }, { "epoch": 0.31125765264504607, "grad_norm": 0.2938775420188904, "learning_rate": 6.917163233100214e-05, "loss": 0.008270031958818435, "step": 72500 }, { "epoch": 0.31130058473506605, "grad_norm": 0.04436694458127022, "learning_rate": 6.916732061088452e-05, "loss": 0.48149800300598145, "step": 72510 }, { "epoch": 0.3113435168250861, "grad_norm": 0.09820667654275894, "learning_rate": 6.91630088907669e-05, "loss": 0.1050878882408142, "step": 72520 }, { "epoch": 0.3113864489151061, "grad_norm": 0.04176689684391022, "learning_rate": 6.915869717064927e-05, "loss": 0.3850319623947144, "step": 72530 }, { "epoch": 0.31142938100512607, "grad_norm": 2.246731996536255, "learning_rate": 6.915438545053164e-05, "loss": 0.2246485948562622, "step": 72540 }, { "epoch": 0.3114723130951461, "grad_norm": 0.03071710281074047, "learning_rate": 6.915007373041401e-05, "loss": 0.2287735939025879, "step": 72550 }, { "epoch": 0.3115152451851661, "grad_norm": 1.2058755159378052, "learning_rate": 6.914576201029639e-05, "loss": 0.3163428783416748, "step": 72560 }, { "epoch": 0.3115581772751861, "grad_norm": 1.6118654012680054, "learning_rate": 6.914145029017877e-05, "loss": 0.43007454872131345, "step": 72570 }, { "epoch": 0.31160110936520613, "grad_norm": 0.9232771396636963, "learning_rate": 6.913713857006115e-05, "loss": 0.2806085586547852, "step": 72580 }, { "epoch": 0.3116440414552261, "grad_norm": 0.06702742725610733, "learning_rate": 6.913282684994352e-05, "loss": 0.3380074739456177, "step": 72590 }, { "epoch": 0.3116869735452461, "grad_norm": 2.0718653202056885, "learning_rate": 6.91285151298259e-05, "loss": 0.26555030345916747, "step": 72600 }, { "epoch": 0.31172990563526615, "grad_norm": 2.719993829727173, "learning_rate": 6.912420340970828e-05, "loss": 0.304264760017395, "step": 72610 }, { "epoch": 0.31177283772528613, "grad_norm": 1.0876257419586182, "learning_rate": 6.911989168959064e-05, "loss": 0.13385069370269775, "step": 72620 }, { "epoch": 0.3118157698153062, "grad_norm": 4.189262866973877, "learning_rate": 6.911557996947302e-05, "loss": 0.43100762367248535, "step": 72630 }, { "epoch": 0.31185870190532616, "grad_norm": 0.024808503687381744, "learning_rate": 6.91112682493554e-05, "loss": 0.4125965118408203, "step": 72640 }, { "epoch": 0.31190163399534615, "grad_norm": 0.04348301887512207, "learning_rate": 6.910695652923777e-05, "loss": 0.2724820852279663, "step": 72650 }, { "epoch": 0.3119445660853662, "grad_norm": 25.774370193481445, "learning_rate": 6.910264480912015e-05, "loss": 0.16499853134155273, "step": 72660 }, { "epoch": 0.3119874981753862, "grad_norm": 0.05208896845579147, "learning_rate": 6.909833308900253e-05, "loss": 0.2335808277130127, "step": 72670 }, { "epoch": 0.31203043026540617, "grad_norm": 1.2792284488677979, "learning_rate": 6.90940213688849e-05, "loss": 0.2558072566986084, "step": 72680 }, { "epoch": 0.3120733623554262, "grad_norm": 0.013050318695604801, "learning_rate": 6.908970964876728e-05, "loss": 0.27600100040435793, "step": 72690 }, { "epoch": 0.3121162944454462, "grad_norm": 0.13687624037265778, "learning_rate": 6.908539792864966e-05, "loss": 0.2716856002807617, "step": 72700 }, { "epoch": 0.3121592265354662, "grad_norm": 3.646947145462036, "learning_rate": 6.908108620853204e-05, "loss": 0.3618535757064819, "step": 72710 }, { "epoch": 0.3122021586254862, "grad_norm": 0.8492512106895447, "learning_rate": 6.907677448841441e-05, "loss": 0.1546394109725952, "step": 72720 }, { "epoch": 0.3122450907155062, "grad_norm": 0.43165260553359985, "learning_rate": 6.907246276829679e-05, "loss": 0.08699611425399781, "step": 72730 }, { "epoch": 0.3122880228055262, "grad_norm": 2.5769057273864746, "learning_rate": 6.906815104817917e-05, "loss": 0.41105146408081056, "step": 72740 }, { "epoch": 0.31233095489554624, "grad_norm": 0.024652574211359024, "learning_rate": 6.906383932806155e-05, "loss": 0.14959990978240967, "step": 72750 }, { "epoch": 0.31237388698556623, "grad_norm": 0.3725256621837616, "learning_rate": 6.905952760794392e-05, "loss": 0.24816575050354003, "step": 72760 }, { "epoch": 0.3124168190755862, "grad_norm": 0.0020951335318386555, "learning_rate": 6.90552158878263e-05, "loss": 0.20660154819488524, "step": 72770 }, { "epoch": 0.31245975116560626, "grad_norm": 0.001740924664773047, "learning_rate": 6.905090416770868e-05, "loss": 0.2389366626739502, "step": 72780 }, { "epoch": 0.31250268325562625, "grad_norm": 0.007729761768132448, "learning_rate": 6.904659244759104e-05, "loss": 0.1722651243209839, "step": 72790 }, { "epoch": 0.31254561534564623, "grad_norm": 3.117405891418457, "learning_rate": 6.904228072747342e-05, "loss": 0.5300877571105957, "step": 72800 }, { "epoch": 0.3125885474356663, "grad_norm": 4.650268077850342, "learning_rate": 6.90379690073558e-05, "loss": 0.46052017211914065, "step": 72810 }, { "epoch": 0.31263147952568626, "grad_norm": 0.14857514202594757, "learning_rate": 6.903365728723817e-05, "loss": 0.30590009689331055, "step": 72820 }, { "epoch": 0.31267441161570625, "grad_norm": 0.005492858123034239, "learning_rate": 6.902934556712055e-05, "loss": 0.21419360637664794, "step": 72830 }, { "epoch": 0.3127173437057263, "grad_norm": 0.028469033539295197, "learning_rate": 6.902503384700293e-05, "loss": 0.23694248199462892, "step": 72840 }, { "epoch": 0.3127602757957463, "grad_norm": 14.499115943908691, "learning_rate": 6.90207221268853e-05, "loss": 0.20476045608520507, "step": 72850 }, { "epoch": 0.3128032078857663, "grad_norm": 0.03953484818339348, "learning_rate": 6.901641040676768e-05, "loss": 0.04689075648784637, "step": 72860 }, { "epoch": 0.3128461399757863, "grad_norm": 0.0067932335659861565, "learning_rate": 6.901209868665005e-05, "loss": 0.22675859928131104, "step": 72870 }, { "epoch": 0.3128890720658063, "grad_norm": 0.010385886766016483, "learning_rate": 6.900778696653243e-05, "loss": 0.25794124603271484, "step": 72880 }, { "epoch": 0.31293200415582634, "grad_norm": 4.429790496826172, "learning_rate": 6.90034752464148e-05, "loss": 0.2344876766204834, "step": 72890 }, { "epoch": 0.3129749362458463, "grad_norm": 0.017854005098342896, "learning_rate": 6.899916352629718e-05, "loss": 0.18946893215179444, "step": 72900 }, { "epoch": 0.3130178683358663, "grad_norm": 4.130064487457275, "learning_rate": 6.899485180617956e-05, "loss": 0.25511124134063723, "step": 72910 }, { "epoch": 0.31306080042588635, "grad_norm": 2.605947732925415, "learning_rate": 6.899054008606193e-05, "loss": 0.17304036617279053, "step": 72920 }, { "epoch": 0.31310373251590634, "grad_norm": 0.005477610044181347, "learning_rate": 6.898622836594431e-05, "loss": 0.3045832872390747, "step": 72930 }, { "epoch": 0.31314666460592633, "grad_norm": 0.02059878222644329, "learning_rate": 6.898191664582669e-05, "loss": 0.3340297222137451, "step": 72940 }, { "epoch": 0.31318959669594637, "grad_norm": 0.27422747015953064, "learning_rate": 6.897760492570907e-05, "loss": 0.19570144414901733, "step": 72950 }, { "epoch": 0.31323252878596636, "grad_norm": 0.03176238015294075, "learning_rate": 6.897329320559144e-05, "loss": 0.5011603355407714, "step": 72960 }, { "epoch": 0.31327546087598634, "grad_norm": 0.1306491494178772, "learning_rate": 6.896898148547382e-05, "loss": 0.1884814977645874, "step": 72970 }, { "epoch": 0.3133183929660064, "grad_norm": 0.5676948428153992, "learning_rate": 6.89646697653562e-05, "loss": 0.22620975971221924, "step": 72980 }, { "epoch": 0.3133613250560264, "grad_norm": 1.5327205657958984, "learning_rate": 6.896035804523858e-05, "loss": 0.3974642515182495, "step": 72990 }, { "epoch": 0.31340425714604636, "grad_norm": 0.1722613275051117, "learning_rate": 6.895604632512095e-05, "loss": 0.348058819770813, "step": 73000 }, { "epoch": 0.31340425714604636, "eval_loss": 0.4194343686103821, "eval_runtime": 27.4782, "eval_samples_per_second": 3.639, "eval_steps_per_second": 3.639, "step": 73000 }, { "epoch": 0.3134471892360664, "grad_norm": 0.05100061744451523, "learning_rate": 6.895173460500333e-05, "loss": 0.08028401732444763, "step": 73010 }, { "epoch": 0.3134901213260864, "grad_norm": 0.04568387567996979, "learning_rate": 6.894742288488571e-05, "loss": 0.14936983585357666, "step": 73020 }, { "epoch": 0.3135330534161064, "grad_norm": 0.04025249928236008, "learning_rate": 6.894311116476807e-05, "loss": 0.3439459323883057, "step": 73030 }, { "epoch": 0.3135759855061264, "grad_norm": 0.29795241355895996, "learning_rate": 6.893879944465045e-05, "loss": 0.3217799425125122, "step": 73040 }, { "epoch": 0.3136189175961464, "grad_norm": 0.019797299057245255, "learning_rate": 6.893448772453283e-05, "loss": 0.200309419631958, "step": 73050 }, { "epoch": 0.31366184968616645, "grad_norm": 1.5113530158996582, "learning_rate": 6.89301760044152e-05, "loss": 0.34020171165466306, "step": 73060 }, { "epoch": 0.31370478177618644, "grad_norm": 0.3040628731250763, "learning_rate": 6.892586428429758e-05, "loss": 0.11433427333831787, "step": 73070 }, { "epoch": 0.3137477138662064, "grad_norm": 0.3089415729045868, "learning_rate": 6.892155256417996e-05, "loss": 0.19839117527008057, "step": 73080 }, { "epoch": 0.31379064595622647, "grad_norm": 0.00015485839685425162, "learning_rate": 6.891724084406234e-05, "loss": 0.22629897594451903, "step": 73090 }, { "epoch": 0.31383357804624645, "grad_norm": 1.1724250316619873, "learning_rate": 6.891292912394471e-05, "loss": 0.164984393119812, "step": 73100 }, { "epoch": 0.31387651013626644, "grad_norm": 6.634596347808838, "learning_rate": 6.890861740382709e-05, "loss": 0.2887473821640015, "step": 73110 }, { "epoch": 0.3139194422262865, "grad_norm": 0.07911381870508194, "learning_rate": 6.890430568370945e-05, "loss": 0.23019626140594482, "step": 73120 }, { "epoch": 0.31396237431630647, "grad_norm": 0.08132822811603546, "learning_rate": 6.889999396359183e-05, "loss": 0.13312993049621583, "step": 73130 }, { "epoch": 0.31400530640632646, "grad_norm": 6.580303192138672, "learning_rate": 6.889568224347421e-05, "loss": 0.3567745447158813, "step": 73140 }, { "epoch": 0.3140482384963465, "grad_norm": 0.11964696645736694, "learning_rate": 6.889137052335659e-05, "loss": 0.09761974215507507, "step": 73150 }, { "epoch": 0.3140911705863665, "grad_norm": 2.271083116531372, "learning_rate": 6.888705880323896e-05, "loss": 0.29648566246032715, "step": 73160 }, { "epoch": 0.3141341026763865, "grad_norm": 1.732062578201294, "learning_rate": 6.888274708312134e-05, "loss": 0.15959405899047852, "step": 73170 }, { "epoch": 0.3141770347664065, "grad_norm": 0.026476643979549408, "learning_rate": 6.887843536300372e-05, "loss": 0.10188188552856445, "step": 73180 }, { "epoch": 0.3142199668564265, "grad_norm": 0.05823148041963577, "learning_rate": 6.88741236428861e-05, "loss": 0.0034772615879774095, "step": 73190 }, { "epoch": 0.3142628989464465, "grad_norm": 1.547411561012268, "learning_rate": 6.886981192276847e-05, "loss": 0.20585508346557618, "step": 73200 }, { "epoch": 0.31430583103646653, "grad_norm": 0.08510097116231918, "learning_rate": 6.886550020265085e-05, "loss": 0.23898713588714598, "step": 73210 }, { "epoch": 0.3143487631264865, "grad_norm": 0.7906961441040039, "learning_rate": 6.886118848253323e-05, "loss": 0.20077693462371826, "step": 73220 }, { "epoch": 0.3143916952165065, "grad_norm": 11.559499740600586, "learning_rate": 6.88568767624156e-05, "loss": 0.3527076721191406, "step": 73230 }, { "epoch": 0.31443462730652655, "grad_norm": 0.05195503309369087, "learning_rate": 6.885256504229798e-05, "loss": 0.20611882209777832, "step": 73240 }, { "epoch": 0.31447755939654654, "grad_norm": 0.00046401165309362113, "learning_rate": 6.884825332218036e-05, "loss": 0.15216498374938964, "step": 73250 }, { "epoch": 0.3145204914865665, "grad_norm": 0.16569949686527252, "learning_rate": 6.884394160206274e-05, "loss": 0.33675241470336914, "step": 73260 }, { "epoch": 0.31456342357658656, "grad_norm": 3.5443081855773926, "learning_rate": 6.883962988194511e-05, "loss": 0.3302395582199097, "step": 73270 }, { "epoch": 0.31460635566660655, "grad_norm": 1.1270393133163452, "learning_rate": 6.883531816182748e-05, "loss": 0.3283742666244507, "step": 73280 }, { "epoch": 0.3146492877566266, "grad_norm": 2.613358974456787, "learning_rate": 6.883100644170986e-05, "loss": 0.3033371210098267, "step": 73290 }, { "epoch": 0.3146922198466466, "grad_norm": 0.4473738670349121, "learning_rate": 6.882669472159223e-05, "loss": 0.11812324523925781, "step": 73300 }, { "epoch": 0.31473515193666657, "grad_norm": 1.625314474105835, "learning_rate": 6.882238300147461e-05, "loss": 0.03831766247749328, "step": 73310 }, { "epoch": 0.3147780840266866, "grad_norm": 0.1999804824590683, "learning_rate": 6.881807128135699e-05, "loss": 0.2408984899520874, "step": 73320 }, { "epoch": 0.3148210161167066, "grad_norm": 0.01782737672328949, "learning_rate": 6.881375956123936e-05, "loss": 0.2375476837158203, "step": 73330 }, { "epoch": 0.3148639482067266, "grad_norm": 0.005965593736618757, "learning_rate": 6.880944784112174e-05, "loss": 0.22016007900238038, "step": 73340 }, { "epoch": 0.3149068802967466, "grad_norm": 0.18683241307735443, "learning_rate": 6.880513612100412e-05, "loss": 0.34534764289855957, "step": 73350 }, { "epoch": 0.3149498123867666, "grad_norm": 0.07245934754610062, "learning_rate": 6.880082440088648e-05, "loss": 0.20898077487945557, "step": 73360 }, { "epoch": 0.3149927444767866, "grad_norm": 0.06859976798295975, "learning_rate": 6.879651268076886e-05, "loss": 0.32060167789459226, "step": 73370 }, { "epoch": 0.31503567656680664, "grad_norm": 0.044097669422626495, "learning_rate": 6.879220096065124e-05, "loss": 0.5895505428314209, "step": 73380 }, { "epoch": 0.31507860865682663, "grad_norm": 3.05902099609375, "learning_rate": 6.878788924053362e-05, "loss": 0.2975430488586426, "step": 73390 }, { "epoch": 0.3151215407468466, "grad_norm": 2.7847342491149902, "learning_rate": 6.878357752041599e-05, "loss": 0.24648573398590087, "step": 73400 }, { "epoch": 0.31516447283686666, "grad_norm": 0.6280997395515442, "learning_rate": 6.877926580029837e-05, "loss": 0.16343621015548707, "step": 73410 }, { "epoch": 0.31520740492688665, "grad_norm": 14.133800506591797, "learning_rate": 6.877495408018075e-05, "loss": 0.20844955444335939, "step": 73420 }, { "epoch": 0.31525033701690663, "grad_norm": 0.0016242277342826128, "learning_rate": 6.877064236006312e-05, "loss": 0.3425706148147583, "step": 73430 }, { "epoch": 0.3152932691069267, "grad_norm": 0.28610384464263916, "learning_rate": 6.87663306399455e-05, "loss": 0.1372280716896057, "step": 73440 }, { "epoch": 0.31533620119694666, "grad_norm": 1.4336791038513184, "learning_rate": 6.876201891982788e-05, "loss": 0.4349491596221924, "step": 73450 }, { "epoch": 0.31537913328696665, "grad_norm": 0.7685583233833313, "learning_rate": 6.875770719971026e-05, "loss": 0.13812743425369262, "step": 73460 }, { "epoch": 0.3154220653769867, "grad_norm": 0.8320184946060181, "learning_rate": 6.875339547959263e-05, "loss": 0.2647475004196167, "step": 73470 }, { "epoch": 0.3154649974670067, "grad_norm": 11.539794921875, "learning_rate": 6.874908375947501e-05, "loss": 0.2847954273223877, "step": 73480 }, { "epoch": 0.3155079295570267, "grad_norm": 2.478747606277466, "learning_rate": 6.874477203935739e-05, "loss": 0.47316579818725585, "step": 73490 }, { "epoch": 0.3155508616470467, "grad_norm": 0.12313472479581833, "learning_rate": 6.874046031923977e-05, "loss": 0.12275233268737792, "step": 73500 }, { "epoch": 0.3155937937370667, "grad_norm": 0.18190690875053406, "learning_rate": 6.873614859912214e-05, "loss": 0.34535109996795654, "step": 73510 }, { "epoch": 0.31563672582708674, "grad_norm": 0.02219126932322979, "learning_rate": 6.873183687900452e-05, "loss": 0.3690331935882568, "step": 73520 }, { "epoch": 0.3156796579171067, "grad_norm": 0.7718960642814636, "learning_rate": 6.872752515888688e-05, "loss": 0.1922664999961853, "step": 73530 }, { "epoch": 0.3157225900071267, "grad_norm": 17.84370994567871, "learning_rate": 6.872321343876926e-05, "loss": 0.20998814105987548, "step": 73540 }, { "epoch": 0.31576552209714676, "grad_norm": 4.637472629547119, "learning_rate": 6.871890171865164e-05, "loss": 0.3144806146621704, "step": 73550 }, { "epoch": 0.31580845418716674, "grad_norm": 4.393991470336914, "learning_rate": 6.871458999853402e-05, "loss": 0.39880125522613524, "step": 73560 }, { "epoch": 0.31585138627718673, "grad_norm": 0.05799943953752518, "learning_rate": 6.87102782784164e-05, "loss": 0.230562424659729, "step": 73570 }, { "epoch": 0.3158943183672068, "grad_norm": 0.9074521064758301, "learning_rate": 6.870596655829877e-05, "loss": 0.08199453353881836, "step": 73580 }, { "epoch": 0.31593725045722676, "grad_norm": 0.012529193423688412, "learning_rate": 6.870165483818115e-05, "loss": 0.15863250494003295, "step": 73590 }, { "epoch": 0.31598018254724675, "grad_norm": 0.0102114612236619, "learning_rate": 6.869734311806353e-05, "loss": 0.1947621464729309, "step": 73600 }, { "epoch": 0.3160231146372668, "grad_norm": 0.08841849118471146, "learning_rate": 6.869303139794589e-05, "loss": 0.14853737354278565, "step": 73610 }, { "epoch": 0.3160660467272868, "grad_norm": 3.2766990661621094, "learning_rate": 6.868871967782827e-05, "loss": 0.11773378849029541, "step": 73620 }, { "epoch": 0.31610897881730676, "grad_norm": 0.004059187136590481, "learning_rate": 6.868440795771064e-05, "loss": 0.22974061965942383, "step": 73630 }, { "epoch": 0.3161519109073268, "grad_norm": 0.0031375617254525423, "learning_rate": 6.868009623759302e-05, "loss": 0.22927279472351075, "step": 73640 }, { "epoch": 0.3161948429973468, "grad_norm": 5.625444412231445, "learning_rate": 6.86757845174754e-05, "loss": 0.4188673496246338, "step": 73650 }, { "epoch": 0.3162377750873668, "grad_norm": 0.02534160204231739, "learning_rate": 6.867147279735778e-05, "loss": 0.2420274257659912, "step": 73660 }, { "epoch": 0.3162807071773868, "grad_norm": 1.61578369140625, "learning_rate": 6.866716107724017e-05, "loss": 0.2600194692611694, "step": 73670 }, { "epoch": 0.3163236392674068, "grad_norm": 0.0764639675617218, "learning_rate": 6.866284935712254e-05, "loss": 0.38288352489471433, "step": 73680 }, { "epoch": 0.3163665713574268, "grad_norm": 0.19459417462348938, "learning_rate": 6.865853763700491e-05, "loss": 0.06477647423744201, "step": 73690 }, { "epoch": 0.31640950344744684, "grad_norm": 0.07276225835084915, "learning_rate": 6.865422591688729e-05, "loss": 0.0916989028453827, "step": 73700 }, { "epoch": 0.3164524355374668, "grad_norm": 2.2095179557800293, "learning_rate": 6.864991419676966e-05, "loss": 0.3283759832382202, "step": 73710 }, { "epoch": 0.31649536762748687, "grad_norm": 0.13781090080738068, "learning_rate": 6.864560247665204e-05, "loss": 0.34083831310272217, "step": 73720 }, { "epoch": 0.31653829971750685, "grad_norm": 3.043226718902588, "learning_rate": 6.864129075653442e-05, "loss": 0.22454166412353516, "step": 73730 }, { "epoch": 0.31658123180752684, "grad_norm": 0.4412704408168793, "learning_rate": 6.86369790364168e-05, "loss": 0.11458765268325806, "step": 73740 }, { "epoch": 0.3166241638975469, "grad_norm": 4.447032451629639, "learning_rate": 6.863266731629917e-05, "loss": 0.3687690496444702, "step": 73750 }, { "epoch": 0.31666709598756687, "grad_norm": 0.015125907026231289, "learning_rate": 6.862835559618155e-05, "loss": 0.08853949904441834, "step": 73760 }, { "epoch": 0.31671002807758686, "grad_norm": 3.1621484756469727, "learning_rate": 6.862404387606391e-05, "loss": 0.46956467628479004, "step": 73770 }, { "epoch": 0.3167529601676069, "grad_norm": 2.4286065101623535, "learning_rate": 6.861973215594629e-05, "loss": 0.15351874828338624, "step": 73780 }, { "epoch": 0.3167958922576269, "grad_norm": 3.618520736694336, "learning_rate": 6.861542043582867e-05, "loss": 0.20479018688201905, "step": 73790 }, { "epoch": 0.3168388243476469, "grad_norm": 0.027950339019298553, "learning_rate": 6.861110871571105e-05, "loss": 0.26309866905212403, "step": 73800 }, { "epoch": 0.3168817564376669, "grad_norm": 0.4110260605812073, "learning_rate": 6.860679699559342e-05, "loss": 0.14416571855545043, "step": 73810 }, { "epoch": 0.3169246885276869, "grad_norm": 4.954768657684326, "learning_rate": 6.86024852754758e-05, "loss": 0.43427104949951173, "step": 73820 }, { "epoch": 0.3169676206177069, "grad_norm": 0.0031284948345273733, "learning_rate": 6.859817355535818e-05, "loss": 0.07026217579841613, "step": 73830 }, { "epoch": 0.31701055270772693, "grad_norm": 0.07524044066667557, "learning_rate": 6.859386183524056e-05, "loss": 0.03691762983798981, "step": 73840 }, { "epoch": 0.3170534847977469, "grad_norm": 0.018756557255983353, "learning_rate": 6.858955011512293e-05, "loss": 0.28873724937438966, "step": 73850 }, { "epoch": 0.3170964168877669, "grad_norm": 0.36779269576072693, "learning_rate": 6.85852383950053e-05, "loss": 0.3000300884246826, "step": 73860 }, { "epoch": 0.31713934897778695, "grad_norm": 2.9124794006347656, "learning_rate": 6.858092667488767e-05, "loss": 0.29564919471740725, "step": 73870 }, { "epoch": 0.31718228106780694, "grad_norm": 1.56826651096344, "learning_rate": 6.857661495477005e-05, "loss": 0.11024869680404663, "step": 73880 }, { "epoch": 0.3172252131578269, "grad_norm": 2.3062291145324707, "learning_rate": 6.857230323465244e-05, "loss": 0.26093406677246095, "step": 73890 }, { "epoch": 0.31726814524784697, "grad_norm": 0.014564376324415207, "learning_rate": 6.856799151453482e-05, "loss": 0.5606659412384033, "step": 73900 }, { "epoch": 0.31731107733786695, "grad_norm": 0.31643006205558777, "learning_rate": 6.85636797944172e-05, "loss": 0.3782673358917236, "step": 73910 }, { "epoch": 0.317354009427887, "grad_norm": 2.9848568439483643, "learning_rate": 6.855936807429957e-05, "loss": 0.43987135887145995, "step": 73920 }, { "epoch": 0.317396941517907, "grad_norm": 2.8447110652923584, "learning_rate": 6.855505635418195e-05, "loss": 0.4308504581451416, "step": 73930 }, { "epoch": 0.31743987360792697, "grad_norm": 0.21271449327468872, "learning_rate": 6.855074463406432e-05, "loss": 0.31659440994262694, "step": 73940 }, { "epoch": 0.317482805697947, "grad_norm": 0.08096141368150711, "learning_rate": 6.854643291394669e-05, "loss": 0.18132725954055787, "step": 73950 }, { "epoch": 0.317525737787967, "grad_norm": 0.6696780323982239, "learning_rate": 6.854212119382907e-05, "loss": 0.05133354067802429, "step": 73960 }, { "epoch": 0.317568669877987, "grad_norm": 1.4350579977035522, "learning_rate": 6.853780947371145e-05, "loss": 0.30485239028930666, "step": 73970 }, { "epoch": 0.31761160196800703, "grad_norm": 2.1010446548461914, "learning_rate": 6.853349775359382e-05, "loss": 0.1809307336807251, "step": 73980 }, { "epoch": 0.317654534058027, "grad_norm": 2.349207878112793, "learning_rate": 6.85291860334762e-05, "loss": 0.5290529251098632, "step": 73990 }, { "epoch": 0.317697466148047, "grad_norm": 0.06218874827027321, "learning_rate": 6.852487431335858e-05, "loss": 0.13957602977752687, "step": 74000 }, { "epoch": 0.317697466148047, "eval_loss": 0.4288139045238495, "eval_runtime": 27.4541, "eval_samples_per_second": 3.642, "eval_steps_per_second": 3.642, "step": 74000 }, { "epoch": 0.31774039823806705, "grad_norm": 22.351932525634766, "learning_rate": 6.852056259324096e-05, "loss": 0.08331415057182312, "step": 74010 }, { "epoch": 0.31778333032808703, "grad_norm": 0.6534700989723206, "learning_rate": 6.851625087312332e-05, "loss": 0.21025574207305908, "step": 74020 }, { "epoch": 0.317826262418107, "grad_norm": 2.180680990219116, "learning_rate": 6.85119391530057e-05, "loss": 0.2656111478805542, "step": 74030 }, { "epoch": 0.31786919450812706, "grad_norm": 0.5378935933113098, "learning_rate": 6.850762743288807e-05, "loss": 0.0576973021030426, "step": 74040 }, { "epoch": 0.31791212659814705, "grad_norm": 0.13711658120155334, "learning_rate": 6.850331571277045e-05, "loss": 0.15462011098861694, "step": 74050 }, { "epoch": 0.31795505868816704, "grad_norm": 0.42226970195770264, "learning_rate": 6.849900399265283e-05, "loss": 0.36159141063690187, "step": 74060 }, { "epoch": 0.3179979907781871, "grad_norm": 0.34076371788978577, "learning_rate": 6.849469227253521e-05, "loss": 0.06672542691230773, "step": 74070 }, { "epoch": 0.31804092286820707, "grad_norm": 0.08435351401567459, "learning_rate": 6.849038055241758e-05, "loss": 0.2677072763442993, "step": 74080 }, { "epoch": 0.31808385495822705, "grad_norm": 1.4236209392547607, "learning_rate": 6.848606883229996e-05, "loss": 0.21868863105773925, "step": 74090 }, { "epoch": 0.3181267870482471, "grad_norm": 2.8784008026123047, "learning_rate": 6.848175711218233e-05, "loss": 0.21964583396911622, "step": 74100 }, { "epoch": 0.3181697191382671, "grad_norm": 1.7891312837600708, "learning_rate": 6.847744539206472e-05, "loss": 0.23284506797790527, "step": 74110 }, { "epoch": 0.31821265122828707, "grad_norm": 0.48343974351882935, "learning_rate": 6.84731336719471e-05, "loss": 0.16025389432907106, "step": 74120 }, { "epoch": 0.3182555833183071, "grad_norm": 4.8825531005859375, "learning_rate": 6.846882195182947e-05, "loss": 0.2873832225799561, "step": 74130 }, { "epoch": 0.3182985154083271, "grad_norm": 2.3623721599578857, "learning_rate": 6.846451023171185e-05, "loss": 0.14504092931747437, "step": 74140 }, { "epoch": 0.31834144749834714, "grad_norm": 6.830047607421875, "learning_rate": 6.846019851159423e-05, "loss": 0.266190767288208, "step": 74150 }, { "epoch": 0.31838437958836713, "grad_norm": 1.440556287765503, "learning_rate": 6.84558867914766e-05, "loss": 0.31242871284484863, "step": 74160 }, { "epoch": 0.3184273116783871, "grad_norm": 1.8908369541168213, "learning_rate": 6.845157507135898e-05, "loss": 0.32432739734649657, "step": 74170 }, { "epoch": 0.31847024376840716, "grad_norm": 0.2510699927806854, "learning_rate": 6.844726335124136e-05, "loss": 0.3110947847366333, "step": 74180 }, { "epoch": 0.31851317585842714, "grad_norm": 0.12024804204702377, "learning_rate": 6.844295163112372e-05, "loss": 0.13613009452819824, "step": 74190 }, { "epoch": 0.31855610794844713, "grad_norm": 1.5495117902755737, "learning_rate": 6.84386399110061e-05, "loss": 0.442257022857666, "step": 74200 }, { "epoch": 0.3185990400384672, "grad_norm": 0.110983707010746, "learning_rate": 6.843432819088848e-05, "loss": 0.1462443709373474, "step": 74210 }, { "epoch": 0.31864197212848716, "grad_norm": 3.793349504470825, "learning_rate": 6.843001647077085e-05, "loss": 0.2545795440673828, "step": 74220 }, { "epoch": 0.31868490421850715, "grad_norm": 0.16117846965789795, "learning_rate": 6.842570475065323e-05, "loss": 0.15870895385742187, "step": 74230 }, { "epoch": 0.3187278363085272, "grad_norm": 0.07271076738834381, "learning_rate": 6.842139303053561e-05, "loss": 0.12695436477661132, "step": 74240 }, { "epoch": 0.3187707683985472, "grad_norm": 1.6894328594207764, "learning_rate": 6.841708131041799e-05, "loss": 0.18799281120300293, "step": 74250 }, { "epoch": 0.31881370048856716, "grad_norm": 0.026538612321019173, "learning_rate": 6.841276959030036e-05, "loss": 0.25241885185241697, "step": 74260 }, { "epoch": 0.3188566325785872, "grad_norm": 0.17398607730865479, "learning_rate": 6.840845787018273e-05, "loss": 0.12818098068237305, "step": 74270 }, { "epoch": 0.3188995646686072, "grad_norm": 0.011165103875100613, "learning_rate": 6.84041461500651e-05, "loss": 0.005557307228446007, "step": 74280 }, { "epoch": 0.3189424967586272, "grad_norm": 4.761146068572998, "learning_rate": 6.839983442994748e-05, "loss": 0.2648351192474365, "step": 74290 }, { "epoch": 0.3189854288486472, "grad_norm": 0.0018934222171083093, "learning_rate": 6.839552270982986e-05, "loss": 0.24793648719787598, "step": 74300 }, { "epoch": 0.3190283609386672, "grad_norm": 13.782504081726074, "learning_rate": 6.839121098971224e-05, "loss": 0.4354073524475098, "step": 74310 }, { "epoch": 0.3190712930286872, "grad_norm": 5.460157871246338, "learning_rate": 6.838689926959461e-05, "loss": 0.2691751003265381, "step": 74320 }, { "epoch": 0.31911422511870724, "grad_norm": 3.517686605453491, "learning_rate": 6.838258754947699e-05, "loss": 0.10529460906982421, "step": 74330 }, { "epoch": 0.3191571572087272, "grad_norm": 0.017410220578312874, "learning_rate": 6.837827582935937e-05, "loss": 0.26190121173858644, "step": 74340 }, { "epoch": 0.31920008929874727, "grad_norm": 0.043979302048683167, "learning_rate": 6.837396410924175e-05, "loss": 0.18496758937835694, "step": 74350 }, { "epoch": 0.31924302138876726, "grad_norm": 0.17498759925365448, "learning_rate": 6.836965238912412e-05, "loss": 0.39980545043945315, "step": 74360 }, { "epoch": 0.31928595347878724, "grad_norm": 0.07269234210252762, "learning_rate": 6.83653406690065e-05, "loss": 0.25225677490234377, "step": 74370 }, { "epoch": 0.3193288855688073, "grad_norm": 1.516465425491333, "learning_rate": 6.836102894888888e-05, "loss": 0.09321829676628113, "step": 74380 }, { "epoch": 0.3193718176588273, "grad_norm": 0.11955863237380981, "learning_rate": 6.835671722877125e-05, "loss": 0.054157298803329465, "step": 74390 }, { "epoch": 0.31941474974884726, "grad_norm": 2.1415810585021973, "learning_rate": 6.835240550865363e-05, "loss": 0.19859232902526855, "step": 74400 }, { "epoch": 0.3194576818388673, "grad_norm": 0.006748868618160486, "learning_rate": 6.834809378853601e-05, "loss": 0.34521052837371824, "step": 74410 }, { "epoch": 0.3195006139288873, "grad_norm": 0.4633532166481018, "learning_rate": 6.834378206841839e-05, "loss": 0.2189234495162964, "step": 74420 }, { "epoch": 0.3195435460189073, "grad_norm": 1.0470963716506958, "learning_rate": 6.833947034830075e-05, "loss": 0.5068300247192383, "step": 74430 }, { "epoch": 0.3195864781089273, "grad_norm": 0.03167863190174103, "learning_rate": 6.833515862818313e-05, "loss": 0.06632805466651917, "step": 74440 }, { "epoch": 0.3196294101989473, "grad_norm": 1.881988763809204, "learning_rate": 6.83308469080655e-05, "loss": 0.17728158235549926, "step": 74450 }, { "epoch": 0.3196723422889673, "grad_norm": 0.2718351483345032, "learning_rate": 6.832653518794788e-05, "loss": 0.09898674488067627, "step": 74460 }, { "epoch": 0.31971527437898734, "grad_norm": 0.0583646185696125, "learning_rate": 6.832222346783026e-05, "loss": 0.07836299538612365, "step": 74470 }, { "epoch": 0.3197582064690073, "grad_norm": 0.01715688779950142, "learning_rate": 6.831791174771264e-05, "loss": 0.3526939392089844, "step": 74480 }, { "epoch": 0.3198011385590273, "grad_norm": 0.007413911167532206, "learning_rate": 6.831360002759501e-05, "loss": 0.0661340057849884, "step": 74490 }, { "epoch": 0.31984407064904735, "grad_norm": 0.28797730803489685, "learning_rate": 6.830928830747739e-05, "loss": 0.3759117603302002, "step": 74500 }, { "epoch": 0.31988700273906734, "grad_norm": 4.893228054046631, "learning_rate": 6.830497658735976e-05, "loss": 0.26215925216674807, "step": 74510 }, { "epoch": 0.3199299348290873, "grad_norm": 1.035056233406067, "learning_rate": 6.830066486724213e-05, "loss": 0.21864049434661864, "step": 74520 }, { "epoch": 0.31997286691910737, "grad_norm": 2.0399975776672363, "learning_rate": 6.829635314712451e-05, "loss": 0.12469936609268188, "step": 74530 }, { "epoch": 0.32001579900912736, "grad_norm": 0.0653163492679596, "learning_rate": 6.829204142700689e-05, "loss": 0.24412922859191893, "step": 74540 }, { "epoch": 0.32005873109914734, "grad_norm": 0.26795297861099243, "learning_rate": 6.828772970688927e-05, "loss": 0.028508707880973816, "step": 74550 }, { "epoch": 0.3201016631891674, "grad_norm": 0.033264756202697754, "learning_rate": 6.828341798677164e-05, "loss": 0.21181397438049315, "step": 74560 }, { "epoch": 0.32014459527918737, "grad_norm": 0.015528388321399689, "learning_rate": 6.827910626665402e-05, "loss": 0.13729554414749146, "step": 74570 }, { "epoch": 0.3201875273692074, "grad_norm": 0.007677197456359863, "learning_rate": 6.82747945465364e-05, "loss": 0.04282234907150269, "step": 74580 }, { "epoch": 0.3202304594592274, "grad_norm": 0.007951623760163784, "learning_rate": 6.827048282641877e-05, "loss": 0.11191353797912598, "step": 74590 }, { "epoch": 0.3202733915492474, "grad_norm": 0.7950018048286438, "learning_rate": 6.826617110630115e-05, "loss": 0.19527859687805177, "step": 74600 }, { "epoch": 0.32031632363926743, "grad_norm": 0.007828062400221825, "learning_rate": 6.826185938618353e-05, "loss": 0.21099915504455566, "step": 74610 }, { "epoch": 0.3203592557292874, "grad_norm": 1.7859470844268799, "learning_rate": 6.82575476660659e-05, "loss": 0.3485477685928345, "step": 74620 }, { "epoch": 0.3204021878193074, "grad_norm": 0.11276675760746002, "learning_rate": 6.825323594594828e-05, "loss": 0.2156670331954956, "step": 74630 }, { "epoch": 0.32044511990932745, "grad_norm": 0.005295217968523502, "learning_rate": 6.824892422583066e-05, "loss": 0.3430546760559082, "step": 74640 }, { "epoch": 0.32048805199934743, "grad_norm": 0.006576932035386562, "learning_rate": 6.824461250571304e-05, "loss": 0.03515567183494568, "step": 74650 }, { "epoch": 0.3205309840893674, "grad_norm": 3.4251933097839355, "learning_rate": 6.824030078559542e-05, "loss": 0.32103025913238525, "step": 74660 }, { "epoch": 0.32057391617938746, "grad_norm": 0.833284854888916, "learning_rate": 6.82359890654778e-05, "loss": 0.10578702688217163, "step": 74670 }, { "epoch": 0.32061684826940745, "grad_norm": 1.1916800737380981, "learning_rate": 6.823167734536016e-05, "loss": 0.5401986598968506, "step": 74680 }, { "epoch": 0.32065978035942744, "grad_norm": 12.759496688842773, "learning_rate": 6.822736562524253e-05, "loss": 0.1452803134918213, "step": 74690 }, { "epoch": 0.3207027124494475, "grad_norm": 0.20818258821964264, "learning_rate": 6.822305390512491e-05, "loss": 0.08625043630599975, "step": 74700 }, { "epoch": 0.32074564453946747, "grad_norm": 0.002943378174677491, "learning_rate": 6.821874218500729e-05, "loss": 0.16818757057189943, "step": 74710 }, { "epoch": 0.32078857662948745, "grad_norm": 0.009216200560331345, "learning_rate": 6.821443046488967e-05, "loss": 0.09053115248680114, "step": 74720 }, { "epoch": 0.3208315087195075, "grad_norm": 1.6070560216903687, "learning_rate": 6.821011874477204e-05, "loss": 0.27480525970458985, "step": 74730 }, { "epoch": 0.3208744408095275, "grad_norm": 0.4009722173213959, "learning_rate": 6.820580702465442e-05, "loss": 0.36229352951049804, "step": 74740 }, { "epoch": 0.32091737289954747, "grad_norm": 0.006991582922637463, "learning_rate": 6.82014953045368e-05, "loss": 0.21590156555175782, "step": 74750 }, { "epoch": 0.3209603049895675, "grad_norm": 3.9237425327301025, "learning_rate": 6.819718358441916e-05, "loss": 0.26285347938537595, "step": 74760 }, { "epoch": 0.3210032370795875, "grad_norm": 0.0030905550811439753, "learning_rate": 6.819287186430154e-05, "loss": 0.20134003162384034, "step": 74770 }, { "epoch": 0.32104616916960754, "grad_norm": 24.15610694885254, "learning_rate": 6.818856014418392e-05, "loss": 0.1851871132850647, "step": 74780 }, { "epoch": 0.32108910125962753, "grad_norm": 2.26943039894104, "learning_rate": 6.81842484240663e-05, "loss": 0.17499132156372071, "step": 74790 }, { "epoch": 0.3211320333496475, "grad_norm": 0.013470095582306385, "learning_rate": 6.817993670394867e-05, "loss": 0.24905192852020264, "step": 74800 }, { "epoch": 0.32117496543966756, "grad_norm": 4.934391975402832, "learning_rate": 6.817562498383105e-05, "loss": 0.3353193521499634, "step": 74810 }, { "epoch": 0.32121789752968755, "grad_norm": 1.404799222946167, "learning_rate": 6.817131326371343e-05, "loss": 0.20187389850616455, "step": 74820 }, { "epoch": 0.32126082961970753, "grad_norm": 0.2565377652645111, "learning_rate": 6.81670015435958e-05, "loss": 0.03901310861110687, "step": 74830 }, { "epoch": 0.3213037617097276, "grad_norm": 0.18820512294769287, "learning_rate": 6.816268982347818e-05, "loss": 0.07977944612503052, "step": 74840 }, { "epoch": 0.32134669379974756, "grad_norm": 4.582075595855713, "learning_rate": 6.815837810336056e-05, "loss": 0.23041229248046874, "step": 74850 }, { "epoch": 0.32138962588976755, "grad_norm": 0.007188515271991491, "learning_rate": 6.815406638324294e-05, "loss": 0.2832122564315796, "step": 74860 }, { "epoch": 0.3214325579797876, "grad_norm": 1.4931919574737549, "learning_rate": 6.814975466312531e-05, "loss": 0.43709635734558105, "step": 74870 }, { "epoch": 0.3214754900698076, "grad_norm": 1.0869759321212769, "learning_rate": 6.814544294300769e-05, "loss": 0.24327874183654785, "step": 74880 }, { "epoch": 0.32151842215982757, "grad_norm": 1.867722988128662, "learning_rate": 6.814113122289007e-05, "loss": 0.23782784938812257, "step": 74890 }, { "epoch": 0.3215613542498476, "grad_norm": 0.01522792037576437, "learning_rate": 6.813681950277245e-05, "loss": 0.28627400398254393, "step": 74900 }, { "epoch": 0.3216042863398676, "grad_norm": 0.21601015329360962, "learning_rate": 6.813250778265482e-05, "loss": 0.3485018253326416, "step": 74910 }, { "epoch": 0.3216472184298876, "grad_norm": 0.009931655600667, "learning_rate": 6.81281960625372e-05, "loss": 0.37991507053375245, "step": 74920 }, { "epoch": 0.3216901505199076, "grad_norm": 0.8376566767692566, "learning_rate": 6.812388434241956e-05, "loss": 0.20397210121154785, "step": 74930 }, { "epoch": 0.3217330826099276, "grad_norm": 0.018569491803646088, "learning_rate": 6.811957262230194e-05, "loss": 0.29088473320007324, "step": 74940 }, { "epoch": 0.3217760146999476, "grad_norm": 0.08200386166572571, "learning_rate": 6.811526090218432e-05, "loss": 0.04362513422966004, "step": 74950 }, { "epoch": 0.32181894678996764, "grad_norm": 1.1255528926849365, "learning_rate": 6.81109491820667e-05, "loss": 0.06557718515396119, "step": 74960 }, { "epoch": 0.32186187887998763, "grad_norm": 0.5678053498268127, "learning_rate": 6.810663746194907e-05, "loss": 0.2532555818557739, "step": 74970 }, { "epoch": 0.3219048109700076, "grad_norm": 0.052461788058280945, "learning_rate": 6.810232574183145e-05, "loss": 0.26459851264953616, "step": 74980 }, { "epoch": 0.32194774306002766, "grad_norm": 0.7850907444953918, "learning_rate": 6.809801402171383e-05, "loss": 0.1933504819869995, "step": 74990 }, { "epoch": 0.32199067515004764, "grad_norm": 0.07535672187805176, "learning_rate": 6.80937023015962e-05, "loss": 0.19619510173797608, "step": 75000 }, { "epoch": 0.32199067515004764, "eval_loss": 0.42375648021698, "eval_runtime": 27.4204, "eval_samples_per_second": 3.647, "eval_steps_per_second": 3.647, "step": 75000 }, { "epoch": 0.3220336072400677, "grad_norm": 0.07737399637699127, "learning_rate": 6.808939058147857e-05, "loss": 0.24549593925476074, "step": 75010 }, { "epoch": 0.3220765393300877, "grad_norm": 5.281481742858887, "learning_rate": 6.808507886136095e-05, "loss": 0.16492899656295776, "step": 75020 }, { "epoch": 0.32211947142010766, "grad_norm": 1.1363037824630737, "learning_rate": 6.808076714124332e-05, "loss": 0.23363871574401857, "step": 75030 }, { "epoch": 0.3221624035101277, "grad_norm": 0.8865850567817688, "learning_rate": 6.80764554211257e-05, "loss": 0.04637100994586944, "step": 75040 }, { "epoch": 0.3222053356001477, "grad_norm": 0.05453188344836235, "learning_rate": 6.807214370100808e-05, "loss": 0.28700239658355714, "step": 75050 }, { "epoch": 0.3222482676901677, "grad_norm": 0.06403063237667084, "learning_rate": 6.806783198089046e-05, "loss": 0.29975502490997313, "step": 75060 }, { "epoch": 0.3222911997801877, "grad_norm": 0.08798815310001373, "learning_rate": 6.806352026077283e-05, "loss": 0.15733823776245118, "step": 75070 }, { "epoch": 0.3223341318702077, "grad_norm": 1.5155143737792969, "learning_rate": 6.805920854065522e-05, "loss": 0.21812820434570312, "step": 75080 }, { "epoch": 0.3223770639602277, "grad_norm": 0.12022317945957184, "learning_rate": 6.805489682053759e-05, "loss": 0.077181476354599, "step": 75090 }, { "epoch": 0.32241999605024774, "grad_norm": 1.108117938041687, "learning_rate": 6.805058510041996e-05, "loss": 0.17434661388397216, "step": 75100 }, { "epoch": 0.3224629281402677, "grad_norm": 0.009858007542788982, "learning_rate": 6.804627338030234e-05, "loss": 0.3324748039245605, "step": 75110 }, { "epoch": 0.3225058602302877, "grad_norm": 0.001360285677947104, "learning_rate": 6.804196166018472e-05, "loss": 0.20198981761932372, "step": 75120 }, { "epoch": 0.32254879232030775, "grad_norm": 0.0015368229942396283, "learning_rate": 6.80376499400671e-05, "loss": 0.18393850326538086, "step": 75130 }, { "epoch": 0.32259172441032774, "grad_norm": 0.01634756661951542, "learning_rate": 6.803333821994947e-05, "loss": 0.19422999620437623, "step": 75140 }, { "epoch": 0.3226346565003477, "grad_norm": 27.322790145874023, "learning_rate": 6.802902649983185e-05, "loss": 0.26277408599853513, "step": 75150 }, { "epoch": 0.32267758859036777, "grad_norm": 0.08929922431707382, "learning_rate": 6.802471477971423e-05, "loss": 0.30891809463500974, "step": 75160 }, { "epoch": 0.32272052068038776, "grad_norm": 0.030177028849720955, "learning_rate": 6.802040305959659e-05, "loss": 0.4126291275024414, "step": 75170 }, { "epoch": 0.32276345277040774, "grad_norm": 0.8247568011283875, "learning_rate": 6.801609133947897e-05, "loss": 0.09560133814811707, "step": 75180 }, { "epoch": 0.3228063848604278, "grad_norm": 0.13243091106414795, "learning_rate": 6.801177961936135e-05, "loss": 0.2584502696990967, "step": 75190 }, { "epoch": 0.3228493169504478, "grad_norm": 0.01154093537479639, "learning_rate": 6.800746789924372e-05, "loss": 0.14872738122940063, "step": 75200 }, { "epoch": 0.32289224904046776, "grad_norm": 0.01844400353729725, "learning_rate": 6.80031561791261e-05, "loss": 0.2512629747390747, "step": 75210 }, { "epoch": 0.3229351811304878, "grad_norm": 4.3735833168029785, "learning_rate": 6.799884445900848e-05, "loss": 0.08456424474716187, "step": 75220 }, { "epoch": 0.3229781132205078, "grad_norm": 3.748575210571289, "learning_rate": 6.799453273889086e-05, "loss": 0.4353503227233887, "step": 75230 }, { "epoch": 0.32302104531052783, "grad_norm": 0.2333959937095642, "learning_rate": 6.799022101877323e-05, "loss": 0.08659371733665466, "step": 75240 }, { "epoch": 0.3230639774005478, "grad_norm": 1.1548364162445068, "learning_rate": 6.79859092986556e-05, "loss": 0.29887216091156005, "step": 75250 }, { "epoch": 0.3231069094905678, "grad_norm": 0.009117556735873222, "learning_rate": 6.798159757853798e-05, "loss": 0.155653178691864, "step": 75260 }, { "epoch": 0.32314984158058785, "grad_norm": 0.19610357284545898, "learning_rate": 6.797728585842035e-05, "loss": 0.22091832160949706, "step": 75270 }, { "epoch": 0.32319277367060784, "grad_norm": 0.062415748834609985, "learning_rate": 6.797297413830273e-05, "loss": 0.44664454460144043, "step": 75280 }, { "epoch": 0.3232357057606278, "grad_norm": 0.06516305357217789, "learning_rate": 6.796866241818511e-05, "loss": 0.22703559398651124, "step": 75290 }, { "epoch": 0.32327863785064787, "grad_norm": 0.10712216049432755, "learning_rate": 6.79643506980675e-05, "loss": 0.1619246482849121, "step": 75300 }, { "epoch": 0.32332156994066785, "grad_norm": 0.02630920521914959, "learning_rate": 6.796003897794988e-05, "loss": 0.17304229736328125, "step": 75310 }, { "epoch": 0.32336450203068784, "grad_norm": 0.2714967131614685, "learning_rate": 6.795572725783225e-05, "loss": 0.1583251476287842, "step": 75320 }, { "epoch": 0.3234074341207079, "grad_norm": 6.2658796310424805, "learning_rate": 6.795141553771463e-05, "loss": 0.23793091773986816, "step": 75330 }, { "epoch": 0.32345036621072787, "grad_norm": 0.015765508636832237, "learning_rate": 6.7947103817597e-05, "loss": 0.34331545829772947, "step": 75340 }, { "epoch": 0.32349329830074786, "grad_norm": 1.6817232370376587, "learning_rate": 6.794279209747937e-05, "loss": 0.3233363151550293, "step": 75350 }, { "epoch": 0.3235362303907679, "grad_norm": 0.00951626431196928, "learning_rate": 6.793848037736175e-05, "loss": 0.11996997594833374, "step": 75360 }, { "epoch": 0.3235791624807879, "grad_norm": 0.21259069442749023, "learning_rate": 6.793416865724413e-05, "loss": 0.2515986442565918, "step": 75370 }, { "epoch": 0.32362209457080787, "grad_norm": 0.036104682832956314, "learning_rate": 6.79298569371265e-05, "loss": 0.3075053930282593, "step": 75380 }, { "epoch": 0.3236650266608279, "grad_norm": 0.05259181931614876, "learning_rate": 6.792554521700888e-05, "loss": 0.29380111694335936, "step": 75390 }, { "epoch": 0.3237079587508479, "grad_norm": 0.09340565651655197, "learning_rate": 6.792123349689126e-05, "loss": 0.23864850997924805, "step": 75400 }, { "epoch": 0.3237508908408679, "grad_norm": 0.0508541613817215, "learning_rate": 6.791692177677364e-05, "loss": 0.26605610847473143, "step": 75410 }, { "epoch": 0.32379382293088793, "grad_norm": 0.4147844910621643, "learning_rate": 6.7912610056656e-05, "loss": 0.1429394006729126, "step": 75420 }, { "epoch": 0.3238367550209079, "grad_norm": 2.3247790336608887, "learning_rate": 6.790829833653838e-05, "loss": 0.41593217849731445, "step": 75430 }, { "epoch": 0.32387968711092796, "grad_norm": 0.4674864113330841, "learning_rate": 6.790398661642075e-05, "loss": 0.14860082864761354, "step": 75440 }, { "epoch": 0.32392261920094795, "grad_norm": 2.115438938140869, "learning_rate": 6.789967489630313e-05, "loss": 0.21622676849365235, "step": 75450 }, { "epoch": 0.32396555129096793, "grad_norm": 1.429050087928772, "learning_rate": 6.789536317618551e-05, "loss": 0.3115101337432861, "step": 75460 }, { "epoch": 0.324008483380988, "grad_norm": 0.019108422100543976, "learning_rate": 6.789105145606789e-05, "loss": 0.29048006534576415, "step": 75470 }, { "epoch": 0.32405141547100796, "grad_norm": 89.55338287353516, "learning_rate": 6.788673973595026e-05, "loss": 0.274165678024292, "step": 75480 }, { "epoch": 0.32409434756102795, "grad_norm": 0.29647406935691833, "learning_rate": 6.788242801583264e-05, "loss": 0.23708763122558593, "step": 75490 }, { "epoch": 0.324137279651048, "grad_norm": 2.5495612621307373, "learning_rate": 6.7878116295715e-05, "loss": 0.27585015296936033, "step": 75500 }, { "epoch": 0.324180211741068, "grad_norm": 4.890408515930176, "learning_rate": 6.787380457559738e-05, "loss": 0.2675506830215454, "step": 75510 }, { "epoch": 0.32422314383108797, "grad_norm": 5.719459056854248, "learning_rate": 6.786949285547977e-05, "loss": 0.25751235485076907, "step": 75520 }, { "epoch": 0.324266075921108, "grad_norm": 0.04946063086390495, "learning_rate": 6.786518113536215e-05, "loss": 0.2648316860198975, "step": 75530 }, { "epoch": 0.324309008011128, "grad_norm": 2.4120781421661377, "learning_rate": 6.786086941524453e-05, "loss": 0.16416383981704713, "step": 75540 }, { "epoch": 0.324351940101148, "grad_norm": 2.5934016704559326, "learning_rate": 6.78565576951269e-05, "loss": 0.3151400566101074, "step": 75550 }, { "epoch": 0.324394872191168, "grad_norm": 0.235334113240242, "learning_rate": 6.785224597500928e-05, "loss": 0.29945969581604004, "step": 75560 }, { "epoch": 0.324437804281188, "grad_norm": 0.02608591318130493, "learning_rate": 6.784793425489166e-05, "loss": 0.1716364622116089, "step": 75570 }, { "epoch": 0.324480736371208, "grad_norm": 6.218681335449219, "learning_rate": 6.784362253477402e-05, "loss": 0.20043642520904542, "step": 75580 }, { "epoch": 0.32452366846122804, "grad_norm": 4.614294528961182, "learning_rate": 6.78393108146564e-05, "loss": 0.21403853893280028, "step": 75590 }, { "epoch": 0.32456660055124803, "grad_norm": 0.23961031436920166, "learning_rate": 6.783499909453878e-05, "loss": 0.28221883773803713, "step": 75600 }, { "epoch": 0.324609532641268, "grad_norm": 0.9346398115158081, "learning_rate": 6.783068737442116e-05, "loss": 0.3928943395614624, "step": 75610 }, { "epoch": 0.32465246473128806, "grad_norm": 3.2818591594696045, "learning_rate": 6.782637565430353e-05, "loss": 0.27822265625, "step": 75620 }, { "epoch": 0.32469539682130805, "grad_norm": 14.885111808776855, "learning_rate": 6.782206393418591e-05, "loss": 0.18708184957504273, "step": 75630 }, { "epoch": 0.32473832891132803, "grad_norm": 0.09145841747522354, "learning_rate": 6.781775221406829e-05, "loss": 0.17132962942123414, "step": 75640 }, { "epoch": 0.3247812610013481, "grad_norm": 0.312010258436203, "learning_rate": 6.781344049395066e-05, "loss": 0.22032701969146729, "step": 75650 }, { "epoch": 0.32482419309136806, "grad_norm": 0.20743966102600098, "learning_rate": 6.780912877383304e-05, "loss": 0.32837064266204835, "step": 75660 }, { "epoch": 0.3248671251813881, "grad_norm": 4.621267318725586, "learning_rate": 6.78048170537154e-05, "loss": 0.2122286558151245, "step": 75670 }, { "epoch": 0.3249100572714081, "grad_norm": 0.028458695858716965, "learning_rate": 6.780050533359778e-05, "loss": 0.09199699759483337, "step": 75680 }, { "epoch": 0.3249529893614281, "grad_norm": 0.6898798942565918, "learning_rate": 6.779619361348016e-05, "loss": 0.31503407955169677, "step": 75690 }, { "epoch": 0.3249959214514481, "grad_norm": 1.5483331680297852, "learning_rate": 6.779188189336254e-05, "loss": 0.2990516901016235, "step": 75700 }, { "epoch": 0.3250388535414681, "grad_norm": 1.5893640518188477, "learning_rate": 6.778757017324492e-05, "loss": 0.3719264268875122, "step": 75710 }, { "epoch": 0.3250817856314881, "grad_norm": 0.2527483105659485, "learning_rate": 6.778325845312729e-05, "loss": 0.08039749264717103, "step": 75720 }, { "epoch": 0.32512471772150814, "grad_norm": 0.07131931930780411, "learning_rate": 6.777894673300967e-05, "loss": 0.17600921392440796, "step": 75730 }, { "epoch": 0.3251676498115281, "grad_norm": 0.007941215299069881, "learning_rate": 6.777463501289205e-05, "loss": 0.4083552360534668, "step": 75740 }, { "epoch": 0.3252105819015481, "grad_norm": 28.28816795349121, "learning_rate": 6.777032329277442e-05, "loss": 0.1733398914337158, "step": 75750 }, { "epoch": 0.32525351399156816, "grad_norm": 2.974475622177124, "learning_rate": 6.77660115726568e-05, "loss": 0.3968287229537964, "step": 75760 }, { "epoch": 0.32529644608158814, "grad_norm": 5.3154072761535645, "learning_rate": 6.776169985253918e-05, "loss": 0.28736727237701415, "step": 75770 }, { "epoch": 0.32533937817160813, "grad_norm": 0.040719084441661835, "learning_rate": 6.775738813242156e-05, "loss": 0.11909763813018799, "step": 75780 }, { "epoch": 0.32538231026162817, "grad_norm": 2.0676820278167725, "learning_rate": 6.775307641230393e-05, "loss": 0.39493000507354736, "step": 75790 }, { "epoch": 0.32542524235164816, "grad_norm": 0.23623089492321014, "learning_rate": 6.774876469218631e-05, "loss": 0.12532161474227904, "step": 75800 }, { "epoch": 0.32546817444166815, "grad_norm": 3.196317672729492, "learning_rate": 6.774445297206869e-05, "loss": 0.4569605827331543, "step": 75810 }, { "epoch": 0.3255111065316882, "grad_norm": 0.234226793050766, "learning_rate": 6.774014125195107e-05, "loss": 0.2106410503387451, "step": 75820 }, { "epoch": 0.3255540386217082, "grad_norm": 3.3291547298431396, "learning_rate": 6.773582953183343e-05, "loss": 0.17373330593109132, "step": 75830 }, { "epoch": 0.32559697071172816, "grad_norm": 6.981594085693359, "learning_rate": 6.773151781171581e-05, "loss": 0.32075550556182864, "step": 75840 }, { "epoch": 0.3256399028017482, "grad_norm": 0.0836896300315857, "learning_rate": 6.772720609159818e-05, "loss": 0.17245697975158691, "step": 75850 }, { "epoch": 0.3256828348917682, "grad_norm": 0.007298530079424381, "learning_rate": 6.772289437148056e-05, "loss": 0.4353139877319336, "step": 75860 }, { "epoch": 0.32572576698178823, "grad_norm": 1.379041314125061, "learning_rate": 6.771858265136294e-05, "loss": 0.22218732833862304, "step": 75870 }, { "epoch": 0.3257686990718082, "grad_norm": 0.578391969203949, "learning_rate": 6.771427093124532e-05, "loss": 0.18814339637756347, "step": 75880 }, { "epoch": 0.3258116311618282, "grad_norm": 0.09199251979589462, "learning_rate": 6.77099592111277e-05, "loss": 0.38448591232299806, "step": 75890 }, { "epoch": 0.32585456325184825, "grad_norm": 0.03338582068681717, "learning_rate": 6.770564749101007e-05, "loss": 0.21661453247070311, "step": 75900 }, { "epoch": 0.32589749534186824, "grad_norm": 1.5796095132827759, "learning_rate": 6.770133577089243e-05, "loss": 0.4152249336242676, "step": 75910 }, { "epoch": 0.3259404274318882, "grad_norm": 1.3694597482681274, "learning_rate": 6.769702405077481e-05, "loss": 0.3295388460159302, "step": 75920 }, { "epoch": 0.32598335952190827, "grad_norm": 2.6144042015075684, "learning_rate": 6.769271233065719e-05, "loss": 0.43881473541259763, "step": 75930 }, { "epoch": 0.32602629161192825, "grad_norm": 0.142476424574852, "learning_rate": 6.768840061053957e-05, "loss": 0.17999660968780518, "step": 75940 }, { "epoch": 0.32606922370194824, "grad_norm": 1.8619637489318848, "learning_rate": 6.768408889042194e-05, "loss": 0.2617930889129639, "step": 75950 }, { "epoch": 0.3261121557919683, "grad_norm": 0.06892193853855133, "learning_rate": 6.767977717030432e-05, "loss": 0.15514886379241943, "step": 75960 }, { "epoch": 0.32615508788198827, "grad_norm": 0.0553896464407444, "learning_rate": 6.76754654501867e-05, "loss": 0.2816772937774658, "step": 75970 }, { "epoch": 0.32619801997200826, "grad_norm": 4.053797721862793, "learning_rate": 6.767115373006908e-05, "loss": 0.0834043264389038, "step": 75980 }, { "epoch": 0.3262409520620283, "grad_norm": 1.032049536705017, "learning_rate": 6.766684200995145e-05, "loss": 0.3517958641052246, "step": 75990 }, { "epoch": 0.3262838841520483, "grad_norm": 0.01598850078880787, "learning_rate": 6.766253028983383e-05, "loss": 0.18671306371688842, "step": 76000 }, { "epoch": 0.3262838841520483, "eval_loss": 0.4156621992588043, "eval_runtime": 27.4602, "eval_samples_per_second": 3.642, "eval_steps_per_second": 3.642, "step": 76000 }, { "epoch": 0.3263268162420683, "grad_norm": 0.04511842504143715, "learning_rate": 6.765821856971621e-05, "loss": 0.3244593381881714, "step": 76010 }, { "epoch": 0.3263697483320883, "grad_norm": 2.1493868827819824, "learning_rate": 6.765390684959859e-05, "loss": 0.09227357506752014, "step": 76020 }, { "epoch": 0.3264126804221083, "grad_norm": 0.003547315252944827, "learning_rate": 6.764959512948096e-05, "loss": 0.23017759323120118, "step": 76030 }, { "epoch": 0.3264556125121283, "grad_norm": 0.0195333119481802, "learning_rate": 6.764528340936334e-05, "loss": 0.2469775676727295, "step": 76040 }, { "epoch": 0.32649854460214833, "grad_norm": 0.05551581457257271, "learning_rate": 6.764097168924572e-05, "loss": 0.2187352418899536, "step": 76050 }, { "epoch": 0.3265414766921683, "grad_norm": 0.043517522513866425, "learning_rate": 6.76366599691281e-05, "loss": 0.20890164375305176, "step": 76060 }, { "epoch": 0.3265844087821883, "grad_norm": 1.1082757711410522, "learning_rate": 6.763234824901047e-05, "loss": 0.5428919792175293, "step": 76070 }, { "epoch": 0.32662734087220835, "grad_norm": 0.612888514995575, "learning_rate": 6.762803652889284e-05, "loss": 0.22457678318023683, "step": 76080 }, { "epoch": 0.32667027296222834, "grad_norm": 0.03663385286927223, "learning_rate": 6.762372480877521e-05, "loss": 0.020015633106231688, "step": 76090 }, { "epoch": 0.3267132050522484, "grad_norm": 0.04500538483262062, "learning_rate": 6.761941308865759e-05, "loss": 0.16043226718902587, "step": 76100 }, { "epoch": 0.32675613714226837, "grad_norm": 0.23189817368984222, "learning_rate": 6.761510136853997e-05, "loss": 0.16690292358398437, "step": 76110 }, { "epoch": 0.32679906923228835, "grad_norm": 17.376708984375, "learning_rate": 6.761078964842235e-05, "loss": 0.391109824180603, "step": 76120 }, { "epoch": 0.3268420013223084, "grad_norm": 10.385045051574707, "learning_rate": 6.760647792830472e-05, "loss": 0.4553534507751465, "step": 76130 }, { "epoch": 0.3268849334123284, "grad_norm": 0.05477939546108246, "learning_rate": 6.76021662081871e-05, "loss": 0.11088311672210693, "step": 76140 }, { "epoch": 0.32692786550234837, "grad_norm": 0.12639902532100677, "learning_rate": 6.759785448806948e-05, "loss": 0.20579469203948975, "step": 76150 }, { "epoch": 0.3269707975923684, "grad_norm": 0.39953383803367615, "learning_rate": 6.759354276795184e-05, "loss": 0.44393744468688967, "step": 76160 }, { "epoch": 0.3270137296823884, "grad_norm": 0.9820276498794556, "learning_rate": 6.758923104783422e-05, "loss": 0.1274343490600586, "step": 76170 }, { "epoch": 0.3270566617724084, "grad_norm": 0.04637445881962776, "learning_rate": 6.75849193277166e-05, "loss": 0.11465686559677124, "step": 76180 }, { "epoch": 0.32709959386242843, "grad_norm": 1.4160728454589844, "learning_rate": 6.758060760759897e-05, "loss": 0.24932937622070311, "step": 76190 }, { "epoch": 0.3271425259524484, "grad_norm": 0.046061545610427856, "learning_rate": 6.757629588748135e-05, "loss": 0.16362361907958983, "step": 76200 }, { "epoch": 0.3271854580424684, "grad_norm": 0.05247507616877556, "learning_rate": 6.757198416736373e-05, "loss": 0.27802603244781493, "step": 76210 }, { "epoch": 0.32722839013248844, "grad_norm": 4.0940022468566895, "learning_rate": 6.75676724472461e-05, "loss": 0.43665170669555664, "step": 76220 }, { "epoch": 0.32727132222250843, "grad_norm": 1.1918656826019287, "learning_rate": 6.756336072712848e-05, "loss": 0.25766515731811523, "step": 76230 }, { "epoch": 0.3273142543125284, "grad_norm": 1.1751809120178223, "learning_rate": 6.755904900701086e-05, "loss": 0.2382516622543335, "step": 76240 }, { "epoch": 0.32735718640254846, "grad_norm": 0.05170591548085213, "learning_rate": 6.755473728689324e-05, "loss": 0.12441542148590087, "step": 76250 }, { "epoch": 0.32740011849256845, "grad_norm": 0.06157025322318077, "learning_rate": 6.755042556677561e-05, "loss": 0.1379122853279114, "step": 76260 }, { "epoch": 0.32744305058258844, "grad_norm": 0.013994595035910606, "learning_rate": 6.754611384665799e-05, "loss": 0.1110068678855896, "step": 76270 }, { "epoch": 0.3274859826726085, "grad_norm": 0.002745084697380662, "learning_rate": 6.754180212654037e-05, "loss": 0.1953068971633911, "step": 76280 }, { "epoch": 0.32752891476262846, "grad_norm": 0.03550855815410614, "learning_rate": 6.753749040642275e-05, "loss": 0.3258711338043213, "step": 76290 }, { "epoch": 0.3275718468526485, "grad_norm": 2.1965603828430176, "learning_rate": 6.753317868630512e-05, "loss": 0.4237357616424561, "step": 76300 }, { "epoch": 0.3276147789426685, "grad_norm": 1.426078200340271, "learning_rate": 6.75288669661875e-05, "loss": 0.2333233594894409, "step": 76310 }, { "epoch": 0.3276577110326885, "grad_norm": 1.203570008277893, "learning_rate": 6.752455524606987e-05, "loss": 0.10217641592025757, "step": 76320 }, { "epoch": 0.3277006431227085, "grad_norm": 0.2727987766265869, "learning_rate": 6.752024352595224e-05, "loss": 0.2010711431503296, "step": 76330 }, { "epoch": 0.3277435752127285, "grad_norm": 0.03920082747936249, "learning_rate": 6.751593180583462e-05, "loss": 0.33216466903686526, "step": 76340 }, { "epoch": 0.3277865073027485, "grad_norm": 0.2013687789440155, "learning_rate": 6.7511620085717e-05, "loss": 0.13307746648788452, "step": 76350 }, { "epoch": 0.32782943939276854, "grad_norm": 0.030201489105820656, "learning_rate": 6.750730836559937e-05, "loss": 0.12909989356994628, "step": 76360 }, { "epoch": 0.3278723714827885, "grad_norm": 0.027242738753557205, "learning_rate": 6.750299664548175e-05, "loss": 0.25510106086730955, "step": 76370 }, { "epoch": 0.3279153035728085, "grad_norm": 0.35596781969070435, "learning_rate": 6.749868492536413e-05, "loss": 0.4509886264801025, "step": 76380 }, { "epoch": 0.32795823566282856, "grad_norm": 0.3249008357524872, "learning_rate": 6.74943732052465e-05, "loss": 0.16739230155944823, "step": 76390 }, { "epoch": 0.32800116775284854, "grad_norm": 0.23222234845161438, "learning_rate": 6.749006148512888e-05, "loss": 0.16387200355529785, "step": 76400 }, { "epoch": 0.32804409984286853, "grad_norm": 0.080628402531147, "learning_rate": 6.748574976501125e-05, "loss": 0.26126348972320557, "step": 76410 }, { "epoch": 0.3280870319328886, "grad_norm": 0.04311169683933258, "learning_rate": 6.748143804489363e-05, "loss": 0.1209375500679016, "step": 76420 }, { "epoch": 0.32812996402290856, "grad_norm": 3.4792978763580322, "learning_rate": 6.7477126324776e-05, "loss": 0.1685411214828491, "step": 76430 }, { "epoch": 0.32817289611292855, "grad_norm": 0.004967233166098595, "learning_rate": 6.747281460465838e-05, "loss": 0.23701400756835939, "step": 76440 }, { "epoch": 0.3282158282029486, "grad_norm": 2.156458616256714, "learning_rate": 6.746850288454076e-05, "loss": 0.4158666133880615, "step": 76450 }, { "epoch": 0.3282587602929686, "grad_norm": 3.7077934741973877, "learning_rate": 6.746419116442313e-05, "loss": 0.31851413249969485, "step": 76460 }, { "epoch": 0.32830169238298856, "grad_norm": 4.856022834777832, "learning_rate": 6.745987944430551e-05, "loss": 0.25244870185852053, "step": 76470 }, { "epoch": 0.3283446244730086, "grad_norm": 2.416544198989868, "learning_rate": 6.745556772418789e-05, "loss": 0.28528871536254885, "step": 76480 }, { "epoch": 0.3283875565630286, "grad_norm": 0.025426995009183884, "learning_rate": 6.745125600407027e-05, "loss": 0.1763625144958496, "step": 76490 }, { "epoch": 0.3284304886530486, "grad_norm": 0.033551640808582306, "learning_rate": 6.744694428395264e-05, "loss": 0.2682926893234253, "step": 76500 }, { "epoch": 0.3284734207430686, "grad_norm": 0.4935181140899658, "learning_rate": 6.744263256383502e-05, "loss": 0.23141703605651856, "step": 76510 }, { "epoch": 0.3285163528330886, "grad_norm": 0.017396489158272743, "learning_rate": 6.74383208437174e-05, "loss": 0.34695000648498536, "step": 76520 }, { "epoch": 0.32855928492310865, "grad_norm": 0.004304240923374891, "learning_rate": 6.743400912359978e-05, "loss": 0.17491270303726197, "step": 76530 }, { "epoch": 0.32860221701312864, "grad_norm": 0.0214606374502182, "learning_rate": 6.742969740348215e-05, "loss": 0.3861451387405396, "step": 76540 }, { "epoch": 0.3286451491031486, "grad_norm": 0.11962751299142838, "learning_rate": 6.742538568336453e-05, "loss": 0.1299293279647827, "step": 76550 }, { "epoch": 0.32868808119316867, "grad_norm": 0.019032057374715805, "learning_rate": 6.742107396324691e-05, "loss": 0.34933903217315676, "step": 76560 }, { "epoch": 0.32873101328318866, "grad_norm": 0.00490832282230258, "learning_rate": 6.741676224312927e-05, "loss": 0.17231972217559816, "step": 76570 }, { "epoch": 0.32877394537320864, "grad_norm": 4.506997585296631, "learning_rate": 6.741245052301165e-05, "loss": 0.33571021556854247, "step": 76580 }, { "epoch": 0.3288168774632287, "grad_norm": 0.009630247950553894, "learning_rate": 6.740813880289403e-05, "loss": 0.0702921986579895, "step": 76590 }, { "epoch": 0.32885980955324867, "grad_norm": 1.6018216609954834, "learning_rate": 6.74038270827764e-05, "loss": 0.27153944969177246, "step": 76600 }, { "epoch": 0.32890274164326866, "grad_norm": 1.0840051174163818, "learning_rate": 6.739951536265878e-05, "loss": 0.23780350685119628, "step": 76610 }, { "epoch": 0.3289456737332887, "grad_norm": 0.767017126083374, "learning_rate": 6.739520364254116e-05, "loss": 0.08334184885025024, "step": 76620 }, { "epoch": 0.3289886058233087, "grad_norm": 0.03613976761698723, "learning_rate": 6.739089192242354e-05, "loss": 0.29817283153533936, "step": 76630 }, { "epoch": 0.3290315379133287, "grad_norm": 4.877974510192871, "learning_rate": 6.738658020230591e-05, "loss": 0.3710965633392334, "step": 76640 }, { "epoch": 0.3290744700033487, "grad_norm": 0.05267646536231041, "learning_rate": 6.738226848218828e-05, "loss": 0.06984040141105652, "step": 76650 }, { "epoch": 0.3291174020933687, "grad_norm": 6.402245044708252, "learning_rate": 6.737795676207065e-05, "loss": 0.2605679750442505, "step": 76660 }, { "epoch": 0.3291603341833887, "grad_norm": 0.4752952754497528, "learning_rate": 6.737364504195303e-05, "loss": 0.20503077507019044, "step": 76670 }, { "epoch": 0.32920326627340873, "grad_norm": 0.02644643560051918, "learning_rate": 6.736933332183541e-05, "loss": 0.19144694805145263, "step": 76680 }, { "epoch": 0.3292461983634287, "grad_norm": 0.024135204032063484, "learning_rate": 6.736502160171779e-05, "loss": 0.22330479621887206, "step": 76690 }, { "epoch": 0.3292891304534487, "grad_norm": 0.06849802285432816, "learning_rate": 6.736070988160016e-05, "loss": 0.3209495782852173, "step": 76700 }, { "epoch": 0.32933206254346875, "grad_norm": 0.14241854846477509, "learning_rate": 6.735639816148255e-05, "loss": 0.22532942295074462, "step": 76710 }, { "epoch": 0.32937499463348874, "grad_norm": 0.23180896043777466, "learning_rate": 6.735208644136493e-05, "loss": 0.009979206323623657, "step": 76720 }, { "epoch": 0.3294179267235088, "grad_norm": 0.006083738524466753, "learning_rate": 6.734777472124731e-05, "loss": 0.09892491698265075, "step": 76730 }, { "epoch": 0.32946085881352877, "grad_norm": 1.0296895503997803, "learning_rate": 6.734346300112967e-05, "loss": 0.2149442434310913, "step": 76740 }, { "epoch": 0.32950379090354875, "grad_norm": 4.450377941131592, "learning_rate": 6.733915128101205e-05, "loss": 0.4012149333953857, "step": 76750 }, { "epoch": 0.3295467229935688, "grad_norm": 0.052618831396102905, "learning_rate": 6.733483956089443e-05, "loss": 0.23020663261413574, "step": 76760 }, { "epoch": 0.3295896550835888, "grad_norm": 0.047180309891700745, "learning_rate": 6.73305278407768e-05, "loss": 0.14085540771484376, "step": 76770 }, { "epoch": 0.32963258717360877, "grad_norm": 0.043389927595853806, "learning_rate": 6.732621612065918e-05, "loss": 0.32704901695251465, "step": 76780 }, { "epoch": 0.3296755192636288, "grad_norm": 2.241126298904419, "learning_rate": 6.732190440054156e-05, "loss": 0.09795815348625184, "step": 76790 }, { "epoch": 0.3297184513536488, "grad_norm": 1.1609137058258057, "learning_rate": 6.731759268042394e-05, "loss": 0.27942144870758057, "step": 76800 }, { "epoch": 0.3297613834436688, "grad_norm": 2.5255086421966553, "learning_rate": 6.731328096030631e-05, "loss": 0.12147682905197144, "step": 76810 }, { "epoch": 0.32980431553368883, "grad_norm": 0.1601771116256714, "learning_rate": 6.730896924018868e-05, "loss": 0.09735663533210755, "step": 76820 }, { "epoch": 0.3298472476237088, "grad_norm": 0.021886374801397324, "learning_rate": 6.730465752007106e-05, "loss": 0.41047191619873047, "step": 76830 }, { "epoch": 0.3298901797137288, "grad_norm": 0.005706735420972109, "learning_rate": 6.730034579995343e-05, "loss": 0.1476006031036377, "step": 76840 }, { "epoch": 0.32993311180374885, "grad_norm": 0.006201342213898897, "learning_rate": 6.729603407983581e-05, "loss": 0.31725659370422366, "step": 76850 }, { "epoch": 0.32997604389376883, "grad_norm": 4.594889163970947, "learning_rate": 6.729172235971819e-05, "loss": 0.4527462005615234, "step": 76860 }, { "epoch": 0.3300189759837888, "grad_norm": 1.3257827758789062, "learning_rate": 6.728741063960056e-05, "loss": 0.35180106163024905, "step": 76870 }, { "epoch": 0.33006190807380886, "grad_norm": 0.00488060899078846, "learning_rate": 6.728309891948294e-05, "loss": 0.20447189807891847, "step": 76880 }, { "epoch": 0.33010484016382885, "grad_norm": 1.7747087478637695, "learning_rate": 6.727878719936532e-05, "loss": 0.33049397468566893, "step": 76890 }, { "epoch": 0.33014777225384884, "grad_norm": 1.723191261291504, "learning_rate": 6.727447547924768e-05, "loss": 0.2903787612915039, "step": 76900 }, { "epoch": 0.3301907043438689, "grad_norm": 0.006007712800055742, "learning_rate": 6.727016375913006e-05, "loss": 0.16813333034515382, "step": 76910 }, { "epoch": 0.33023363643388887, "grad_norm": 0.06792984157800674, "learning_rate": 6.726585203901244e-05, "loss": 0.1623067855834961, "step": 76920 }, { "epoch": 0.33027656852390885, "grad_norm": 0.9910484552383423, "learning_rate": 6.726154031889483e-05, "loss": 0.2884075403213501, "step": 76930 }, { "epoch": 0.3303195006139289, "grad_norm": 1.2997990846633911, "learning_rate": 6.72572285987772e-05, "loss": 0.22271907329559326, "step": 76940 }, { "epoch": 0.3303624327039489, "grad_norm": 0.0020591760985553265, "learning_rate": 6.725291687865958e-05, "loss": 0.2424839735031128, "step": 76950 }, { "epoch": 0.3304053647939689, "grad_norm": 0.002365125808864832, "learning_rate": 6.724860515854196e-05, "loss": 0.15611679553985597, "step": 76960 }, { "epoch": 0.3304482968839889, "grad_norm": 1.1765587329864502, "learning_rate": 6.724429343842434e-05, "loss": 0.2949581861495972, "step": 76970 }, { "epoch": 0.3304912289740089, "grad_norm": 0.004968604538589716, "learning_rate": 6.72399817183067e-05, "loss": 0.19246065616607666, "step": 76980 }, { "epoch": 0.33053416106402894, "grad_norm": 1.2751332521438599, "learning_rate": 6.723566999818908e-05, "loss": 0.32780234813690184, "step": 76990 }, { "epoch": 0.33057709315404893, "grad_norm": 0.08275651931762695, "learning_rate": 6.723135827807146e-05, "loss": 0.13589673042297362, "step": 77000 }, { "epoch": 0.33057709315404893, "eval_loss": 0.4413558542728424, "eval_runtime": 27.5166, "eval_samples_per_second": 3.634, "eval_steps_per_second": 3.634, "step": 77000 }, { "epoch": 0.3306200252440689, "grad_norm": 0.03493470698595047, "learning_rate": 6.722704655795383e-05, "loss": 0.2439117193222046, "step": 77010 }, { "epoch": 0.33066295733408896, "grad_norm": 0.0626208558678627, "learning_rate": 6.722273483783621e-05, "loss": 0.22538723945617675, "step": 77020 }, { "epoch": 0.33070588942410895, "grad_norm": 0.7499268054962158, "learning_rate": 6.721842311771859e-05, "loss": 0.06841785907745361, "step": 77030 }, { "epoch": 0.33074882151412893, "grad_norm": 0.09875093400478363, "learning_rate": 6.721411139760097e-05, "loss": 0.04120129942893982, "step": 77040 }, { "epoch": 0.330791753604149, "grad_norm": 0.4667881429195404, "learning_rate": 6.720979967748334e-05, "loss": 0.32832248210906984, "step": 77050 }, { "epoch": 0.33083468569416896, "grad_norm": 1.0091041326522827, "learning_rate": 6.720548795736571e-05, "loss": 0.16251636743545533, "step": 77060 }, { "epoch": 0.33087761778418895, "grad_norm": 0.01553149800747633, "learning_rate": 6.720117623724808e-05, "loss": 0.044237416982650754, "step": 77070 }, { "epoch": 0.330920549874209, "grad_norm": 0.00838327594101429, "learning_rate": 6.719686451713046e-05, "loss": 0.084531968832016, "step": 77080 }, { "epoch": 0.330963481964229, "grad_norm": 4.201925754547119, "learning_rate": 6.719255279701284e-05, "loss": 0.31850759983062743, "step": 77090 }, { "epoch": 0.33100641405424897, "grad_norm": 0.7134585976600647, "learning_rate": 6.718824107689522e-05, "loss": 0.20272321701049806, "step": 77100 }, { "epoch": 0.331049346144269, "grad_norm": 2.2711195945739746, "learning_rate": 6.71839293567776e-05, "loss": 0.3575067281723022, "step": 77110 }, { "epoch": 0.331092278234289, "grad_norm": 3.7381644248962402, "learning_rate": 6.717961763665997e-05, "loss": 0.10863602161407471, "step": 77120 }, { "epoch": 0.331135210324309, "grad_norm": 0.08643455803394318, "learning_rate": 6.717530591654235e-05, "loss": 0.36785068511962893, "step": 77130 }, { "epoch": 0.331178142414329, "grad_norm": 0.03152371197938919, "learning_rate": 6.717099419642473e-05, "loss": 0.4662750720977783, "step": 77140 }, { "epoch": 0.331221074504349, "grad_norm": 2.6884765625, "learning_rate": 6.71666824763071e-05, "loss": 0.11766870021820068, "step": 77150 }, { "epoch": 0.33126400659436905, "grad_norm": 0.04446806013584137, "learning_rate": 6.716237075618948e-05, "loss": 0.19636658430099488, "step": 77160 }, { "epoch": 0.33130693868438904, "grad_norm": 0.9991649389266968, "learning_rate": 6.715805903607186e-05, "loss": 0.1769741654396057, "step": 77170 }, { "epoch": 0.331349870774409, "grad_norm": 0.03879518061876297, "learning_rate": 6.715374731595424e-05, "loss": 0.16789230108261108, "step": 77180 }, { "epoch": 0.33139280286442907, "grad_norm": 0.009082995355129242, "learning_rate": 6.714943559583661e-05, "loss": 0.2841722726821899, "step": 77190 }, { "epoch": 0.33143573495444906, "grad_norm": 3.0159389972686768, "learning_rate": 6.714512387571899e-05, "loss": 0.08515748977661133, "step": 77200 }, { "epoch": 0.33147866704446904, "grad_norm": 2.8059897422790527, "learning_rate": 6.714081215560137e-05, "loss": 0.16116414070129395, "step": 77210 }, { "epoch": 0.3315215991344891, "grad_norm": 0.8414933085441589, "learning_rate": 6.713650043548374e-05, "loss": 0.24936089515686036, "step": 77220 }, { "epoch": 0.3315645312245091, "grad_norm": 0.011716615408658981, "learning_rate": 6.713218871536611e-05, "loss": 0.19275407791137694, "step": 77230 }, { "epoch": 0.33160746331452906, "grad_norm": 0.003756319172680378, "learning_rate": 6.712787699524849e-05, "loss": 0.3036526679992676, "step": 77240 }, { "epoch": 0.3316503954045491, "grad_norm": 0.998163104057312, "learning_rate": 6.712356527513086e-05, "loss": 0.30022311210632324, "step": 77250 }, { "epoch": 0.3316933274945691, "grad_norm": 0.19256161153316498, "learning_rate": 6.711925355501324e-05, "loss": 0.07962195873260498, "step": 77260 }, { "epoch": 0.3317362595845891, "grad_norm": 0.003725707530975342, "learning_rate": 6.711494183489562e-05, "loss": 0.2163766622543335, "step": 77270 }, { "epoch": 0.3317791916746091, "grad_norm": 2.38783860206604, "learning_rate": 6.7110630114778e-05, "loss": 0.3317635774612427, "step": 77280 }, { "epoch": 0.3318221237646291, "grad_norm": 0.018589181825518608, "learning_rate": 6.710631839466037e-05, "loss": 0.15659880638122559, "step": 77290 }, { "epoch": 0.3318650558546491, "grad_norm": 0.03237080201506615, "learning_rate": 6.710200667454275e-05, "loss": 0.19567469358444214, "step": 77300 }, { "epoch": 0.33190798794466914, "grad_norm": 0.005493505857884884, "learning_rate": 6.709769495442511e-05, "loss": 0.2278986692428589, "step": 77310 }, { "epoch": 0.3319509200346891, "grad_norm": 1.1253221035003662, "learning_rate": 6.709338323430749e-05, "loss": 0.24583756923675537, "step": 77320 }, { "epoch": 0.3319938521247091, "grad_norm": 11.32041072845459, "learning_rate": 6.708907151418987e-05, "loss": 0.32453408241271975, "step": 77330 }, { "epoch": 0.33203678421472915, "grad_norm": 0.0009816490346565843, "learning_rate": 6.708475979407225e-05, "loss": 0.3192496061325073, "step": 77340 }, { "epoch": 0.33207971630474914, "grad_norm": 1.2368215322494507, "learning_rate": 6.708044807395462e-05, "loss": 0.1536526083946228, "step": 77350 }, { "epoch": 0.3321226483947691, "grad_norm": 1.075612187385559, "learning_rate": 6.7076136353837e-05, "loss": 0.24217960834503174, "step": 77360 }, { "epoch": 0.33216558048478917, "grad_norm": 0.016452711075544357, "learning_rate": 6.707182463371938e-05, "loss": 0.1228145956993103, "step": 77370 }, { "epoch": 0.33220851257480916, "grad_norm": 0.51668781042099, "learning_rate": 6.706751291360176e-05, "loss": 0.3324535608291626, "step": 77380 }, { "epoch": 0.3322514446648292, "grad_norm": 0.038861099630594254, "learning_rate": 6.706320119348413e-05, "loss": 0.18577756881713867, "step": 77390 }, { "epoch": 0.3322943767548492, "grad_norm": 0.0012117112055420876, "learning_rate": 6.705888947336651e-05, "loss": 0.13234348297119142, "step": 77400 }, { "epoch": 0.3323373088448692, "grad_norm": 0.09664575755596161, "learning_rate": 6.705457775324889e-05, "loss": 0.1769404411315918, "step": 77410 }, { "epoch": 0.3323802409348892, "grad_norm": 0.14990632236003876, "learning_rate": 6.705026603313126e-05, "loss": 0.19105058908462524, "step": 77420 }, { "epoch": 0.3324231730249092, "grad_norm": 6.025893688201904, "learning_rate": 6.704595431301364e-05, "loss": 0.2703073978424072, "step": 77430 }, { "epoch": 0.3324661051149292, "grad_norm": 2.898651123046875, "learning_rate": 6.704164259289602e-05, "loss": 0.33019936084747314, "step": 77440 }, { "epoch": 0.33250903720494923, "grad_norm": 0.02029103972017765, "learning_rate": 6.70373308727784e-05, "loss": 0.272914457321167, "step": 77450 }, { "epoch": 0.3325519692949692, "grad_norm": 6.893616199493408, "learning_rate": 6.703301915266077e-05, "loss": 0.2154677391052246, "step": 77460 }, { "epoch": 0.3325949013849892, "grad_norm": 0.013883881270885468, "learning_rate": 6.702870743254315e-05, "loss": 0.2001575469970703, "step": 77470 }, { "epoch": 0.33263783347500925, "grad_norm": 1.378208041191101, "learning_rate": 6.702439571242552e-05, "loss": 0.16259143352508545, "step": 77480 }, { "epoch": 0.33268076556502924, "grad_norm": 1.3300849199295044, "learning_rate": 6.702008399230789e-05, "loss": 0.13338098526000977, "step": 77490 }, { "epoch": 0.3327236976550492, "grad_norm": 0.22622959315776825, "learning_rate": 6.701577227219027e-05, "loss": 0.07103768587112427, "step": 77500 }, { "epoch": 0.33276662974506926, "grad_norm": 0.0017219501314684749, "learning_rate": 6.701146055207265e-05, "loss": 0.12565276622772217, "step": 77510 }, { "epoch": 0.33280956183508925, "grad_norm": 1.9283549785614014, "learning_rate": 6.700714883195502e-05, "loss": 0.48866925239562986, "step": 77520 }, { "epoch": 0.33285249392510924, "grad_norm": 3.044318914413452, "learning_rate": 6.70028371118374e-05, "loss": 0.1290292263031006, "step": 77530 }, { "epoch": 0.3328954260151293, "grad_norm": 0.029055681079626083, "learning_rate": 6.699852539171978e-05, "loss": 0.3163025140762329, "step": 77540 }, { "epoch": 0.33293835810514927, "grad_norm": 0.7699205875396729, "learning_rate": 6.699421367160216e-05, "loss": 0.007592477649450302, "step": 77550 }, { "epoch": 0.33298129019516925, "grad_norm": 0.026558570563793182, "learning_rate": 6.698990195148452e-05, "loss": 0.2869630575180054, "step": 77560 }, { "epoch": 0.3330242222851893, "grad_norm": 8.344164848327637, "learning_rate": 6.69855902313669e-05, "loss": 0.49628534317016604, "step": 77570 }, { "epoch": 0.3330671543752093, "grad_norm": 0.06096833571791649, "learning_rate": 6.698127851124927e-05, "loss": 0.3495140314102173, "step": 77580 }, { "epoch": 0.3331100864652293, "grad_norm": 1.687309741973877, "learning_rate": 6.697696679113165e-05, "loss": 0.2711905241012573, "step": 77590 }, { "epoch": 0.3331530185552493, "grad_norm": 3.5439131259918213, "learning_rate": 6.697265507101403e-05, "loss": 0.47504277229309083, "step": 77600 }, { "epoch": 0.3331959506452693, "grad_norm": 2.0073721408843994, "learning_rate": 6.696834335089641e-05, "loss": 0.20028815269470215, "step": 77610 }, { "epoch": 0.33323888273528934, "grad_norm": 1.768701434135437, "learning_rate": 6.696403163077878e-05, "loss": 0.3161693811416626, "step": 77620 }, { "epoch": 0.33328181482530933, "grad_norm": 0.281827449798584, "learning_rate": 6.695971991066116e-05, "loss": 0.19931249618530272, "step": 77630 }, { "epoch": 0.3333247469153293, "grad_norm": 1.3660832643508911, "learning_rate": 6.695540819054354e-05, "loss": 0.19346129894256592, "step": 77640 }, { "epoch": 0.33336767900534936, "grad_norm": 1.3048043251037598, "learning_rate": 6.695109647042592e-05, "loss": 0.14530563354492188, "step": 77650 }, { "epoch": 0.33341061109536935, "grad_norm": 0.03229120001196861, "learning_rate": 6.69467847503083e-05, "loss": 0.36408612728118894, "step": 77660 }, { "epoch": 0.33345354318538933, "grad_norm": 1.1294801235198975, "learning_rate": 6.694247303019067e-05, "loss": 0.33203485012054446, "step": 77670 }, { "epoch": 0.3334964752754094, "grad_norm": 0.07346589118242264, "learning_rate": 6.693816131007305e-05, "loss": 0.17584925889968872, "step": 77680 }, { "epoch": 0.33353940736542936, "grad_norm": 3.982257604598999, "learning_rate": 6.693384958995543e-05, "loss": 0.06325796842575074, "step": 77690 }, { "epoch": 0.33358233945544935, "grad_norm": 0.03161519393324852, "learning_rate": 6.69295378698378e-05, "loss": 0.20434770584106446, "step": 77700 }, { "epoch": 0.3336252715454694, "grad_norm": 0.42918524146080017, "learning_rate": 6.692522614972018e-05, "loss": 0.23393239974975585, "step": 77710 }, { "epoch": 0.3336682036354894, "grad_norm": 0.058732885867357254, "learning_rate": 6.692091442960254e-05, "loss": 0.2614431619644165, "step": 77720 }, { "epoch": 0.33371113572550937, "grad_norm": 3.1066834926605225, "learning_rate": 6.691660270948492e-05, "loss": 0.2023782968521118, "step": 77730 }, { "epoch": 0.3337540678155294, "grad_norm": 0.013925936073064804, "learning_rate": 6.69122909893673e-05, "loss": 0.08589456081390381, "step": 77740 }, { "epoch": 0.3337969999055494, "grad_norm": 0.9002230167388916, "learning_rate": 6.690797926924968e-05, "loss": 0.21781384944915771, "step": 77750 }, { "epoch": 0.3338399319955694, "grad_norm": 0.013174137100577354, "learning_rate": 6.690366754913205e-05, "loss": 0.18269798755645753, "step": 77760 }, { "epoch": 0.3338828640855894, "grad_norm": 0.03236332908272743, "learning_rate": 6.689935582901443e-05, "loss": 0.15968105792999268, "step": 77770 }, { "epoch": 0.3339257961756094, "grad_norm": 0.15361715853214264, "learning_rate": 6.689504410889681e-05, "loss": 0.36173908710479735, "step": 77780 }, { "epoch": 0.3339687282656294, "grad_norm": 0.5949799418449402, "learning_rate": 6.689073238877919e-05, "loss": 0.3477851390838623, "step": 77790 }, { "epoch": 0.33401166035564944, "grad_norm": 0.023436477407813072, "learning_rate": 6.688642066866155e-05, "loss": 0.11365103721618652, "step": 77800 }, { "epoch": 0.33405459244566943, "grad_norm": 0.003403679234907031, "learning_rate": 6.688210894854393e-05, "loss": 0.20094313621520996, "step": 77810 }, { "epoch": 0.33409752453568947, "grad_norm": 0.027733566239476204, "learning_rate": 6.68777972284263e-05, "loss": 0.17328678369522094, "step": 77820 }, { "epoch": 0.33414045662570946, "grad_norm": 0.16135767102241516, "learning_rate": 6.687348550830868e-05, "loss": 0.04763566255569458, "step": 77830 }, { "epoch": 0.33418338871572945, "grad_norm": 0.38207441568374634, "learning_rate": 6.686917378819106e-05, "loss": 0.13231130838394164, "step": 77840 }, { "epoch": 0.3342263208057495, "grad_norm": 0.04116377979516983, "learning_rate": 6.686486206807344e-05, "loss": 0.22739045619964598, "step": 77850 }, { "epoch": 0.3342692528957695, "grad_norm": 1.2879244089126587, "learning_rate": 6.686055034795581e-05, "loss": 0.3036820650100708, "step": 77860 }, { "epoch": 0.33431218498578946, "grad_norm": 4.732523441314697, "learning_rate": 6.685623862783819e-05, "loss": 0.4515406608581543, "step": 77870 }, { "epoch": 0.3343551170758095, "grad_norm": 0.006630048621445894, "learning_rate": 6.685192690772057e-05, "loss": 0.08107486963272095, "step": 77880 }, { "epoch": 0.3343980491658295, "grad_norm": 1.9734116792678833, "learning_rate": 6.684761518760295e-05, "loss": 0.22512927055358886, "step": 77890 }, { "epoch": 0.3344409812558495, "grad_norm": 0.08230617642402649, "learning_rate": 6.684330346748532e-05, "loss": 0.18863047361373902, "step": 77900 }, { "epoch": 0.3344839133458695, "grad_norm": 1.6849898099899292, "learning_rate": 6.68389917473677e-05, "loss": 0.17646138668060302, "step": 77910 }, { "epoch": 0.3345268454358895, "grad_norm": 1.1425594091415405, "learning_rate": 6.683468002725008e-05, "loss": 0.33102991580963137, "step": 77920 }, { "epoch": 0.3345697775259095, "grad_norm": 0.003869341453537345, "learning_rate": 6.683036830713245e-05, "loss": 0.20403015613555908, "step": 77930 }, { "epoch": 0.33461270961592954, "grad_norm": 0.0006105902139097452, "learning_rate": 6.682605658701483e-05, "loss": 0.20998027324676513, "step": 77940 }, { "epoch": 0.3346556417059495, "grad_norm": 0.05356431007385254, "learning_rate": 6.682174486689721e-05, "loss": 0.20201430320739747, "step": 77950 }, { "epoch": 0.3346985737959695, "grad_norm": 5.7862091064453125, "learning_rate": 6.681743314677959e-05, "loss": 0.36837890148162844, "step": 77960 }, { "epoch": 0.33474150588598955, "grad_norm": 0.9469812512397766, "learning_rate": 6.681312142666195e-05, "loss": 0.21669418811798097, "step": 77970 }, { "epoch": 0.33478443797600954, "grad_norm": 2.1246681213378906, "learning_rate": 6.680880970654433e-05, "loss": 0.5200969696044921, "step": 77980 }, { "epoch": 0.33482737006602953, "grad_norm": 2.986363649368286, "learning_rate": 6.68044979864267e-05, "loss": 0.44148664474487304, "step": 77990 }, { "epoch": 0.33487030215604957, "grad_norm": 0.021413365378975868, "learning_rate": 6.680018626630908e-05, "loss": 0.2957026243209839, "step": 78000 }, { "epoch": 0.33487030215604957, "eval_loss": 0.4273984134197235, "eval_runtime": 27.4708, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 78000 }, { "epoch": 0.33491323424606956, "grad_norm": 0.030063766986131668, "learning_rate": 6.679587454619146e-05, "loss": 0.16760578155517578, "step": 78010 }, { "epoch": 0.3349561663360896, "grad_norm": 1.2150861024856567, "learning_rate": 6.679156282607384e-05, "loss": 0.3231816291809082, "step": 78020 }, { "epoch": 0.3349990984261096, "grad_norm": 0.07824573665857315, "learning_rate": 6.678725110595621e-05, "loss": 0.6120881080627442, "step": 78030 }, { "epoch": 0.3350420305161296, "grad_norm": 0.3086092472076416, "learning_rate": 6.678293938583859e-05, "loss": 0.18578628301620484, "step": 78040 }, { "epoch": 0.3350849626061496, "grad_norm": 236.54244995117188, "learning_rate": 6.677862766572096e-05, "loss": 0.27489328384399414, "step": 78050 }, { "epoch": 0.3351278946961696, "grad_norm": 4.726393699645996, "learning_rate": 6.677431594560333e-05, "loss": 0.19919774532318116, "step": 78060 }, { "epoch": 0.3351708267861896, "grad_norm": 5.875321388244629, "learning_rate": 6.677000422548571e-05, "loss": 0.17618502378463746, "step": 78070 }, { "epoch": 0.33521375887620963, "grad_norm": 0.017372211441397667, "learning_rate": 6.676569250536809e-05, "loss": 0.2838648796081543, "step": 78080 }, { "epoch": 0.3352566909662296, "grad_norm": 0.20975318551063538, "learning_rate": 6.676138078525047e-05, "loss": 0.1958579897880554, "step": 78090 }, { "epoch": 0.3352996230562496, "grad_norm": 0.8249739408493042, "learning_rate": 6.675706906513284e-05, "loss": 0.3073481798171997, "step": 78100 }, { "epoch": 0.33534255514626965, "grad_norm": 0.01300235278904438, "learning_rate": 6.675275734501522e-05, "loss": 0.1496501684188843, "step": 78110 }, { "epoch": 0.33538548723628964, "grad_norm": 0.06454453617334366, "learning_rate": 6.674844562489761e-05, "loss": 0.19467418193817138, "step": 78120 }, { "epoch": 0.3354284193263096, "grad_norm": 1.8716318607330322, "learning_rate": 6.674413390477997e-05, "loss": 0.20901689529418946, "step": 78130 }, { "epoch": 0.33547135141632967, "grad_norm": 0.023373577743768692, "learning_rate": 6.673982218466235e-05, "loss": 0.23439536094665528, "step": 78140 }, { "epoch": 0.33551428350634965, "grad_norm": 2.2952160835266113, "learning_rate": 6.673551046454473e-05, "loss": 0.36932108402252195, "step": 78150 }, { "epoch": 0.33555721559636964, "grad_norm": 0.006046372931450605, "learning_rate": 6.67311987444271e-05, "loss": 0.16835312843322753, "step": 78160 }, { "epoch": 0.3356001476863897, "grad_norm": 0.00841306522488594, "learning_rate": 6.672688702430948e-05, "loss": 0.22861673831939697, "step": 78170 }, { "epoch": 0.33564307977640967, "grad_norm": 0.09327050298452377, "learning_rate": 6.672257530419186e-05, "loss": 0.17674237489700317, "step": 78180 }, { "epoch": 0.33568601186642966, "grad_norm": 0.5533800721168518, "learning_rate": 6.671826358407424e-05, "loss": 0.40234909057617185, "step": 78190 }, { "epoch": 0.3357289439564497, "grad_norm": 0.018703026697039604, "learning_rate": 6.671395186395662e-05, "loss": 0.08413835167884827, "step": 78200 }, { "epoch": 0.3357718760464697, "grad_norm": 0.5634124875068665, "learning_rate": 6.6709640143839e-05, "loss": 0.12944929599761962, "step": 78210 }, { "epoch": 0.3358148081364897, "grad_norm": 2.1403753757476807, "learning_rate": 6.670532842372136e-05, "loss": 0.20814151763916017, "step": 78220 }, { "epoch": 0.3358577402265097, "grad_norm": 3.3457531929016113, "learning_rate": 6.670101670360373e-05, "loss": 0.17461209297180175, "step": 78230 }, { "epoch": 0.3359006723165297, "grad_norm": 0.2121453732252121, "learning_rate": 6.669670498348611e-05, "loss": 0.1593286395072937, "step": 78240 }, { "epoch": 0.33594360440654975, "grad_norm": 5.8306403160095215, "learning_rate": 6.669239326336849e-05, "loss": 0.09663127064704895, "step": 78250 }, { "epoch": 0.33598653649656973, "grad_norm": 0.11331497132778168, "learning_rate": 6.668808154325087e-05, "loss": 0.32361640930175783, "step": 78260 }, { "epoch": 0.3360294685865897, "grad_norm": 0.011147410608828068, "learning_rate": 6.668376982313324e-05, "loss": 0.14730819463729858, "step": 78270 }, { "epoch": 0.33607240067660976, "grad_norm": 0.006566631607711315, "learning_rate": 6.667945810301562e-05, "loss": 0.38276846408843995, "step": 78280 }, { "epoch": 0.33611533276662975, "grad_norm": 4.870085716247559, "learning_rate": 6.6675146382898e-05, "loss": 0.40787491798400877, "step": 78290 }, { "epoch": 0.33615826485664974, "grad_norm": 0.06610367447137833, "learning_rate": 6.667083466278036e-05, "loss": 0.11140779256820679, "step": 78300 }, { "epoch": 0.3362011969466698, "grad_norm": 0.08063236624002457, "learning_rate": 6.666652294266274e-05, "loss": 0.10844311714172364, "step": 78310 }, { "epoch": 0.33624412903668977, "grad_norm": 0.005386178847402334, "learning_rate": 6.666221122254512e-05, "loss": 0.23304014205932616, "step": 78320 }, { "epoch": 0.33628706112670975, "grad_norm": 1.1538461446762085, "learning_rate": 6.66578995024275e-05, "loss": 0.24594826698303224, "step": 78330 }, { "epoch": 0.3363299932167298, "grad_norm": 0.05892343819141388, "learning_rate": 6.665358778230989e-05, "loss": 0.2531136035919189, "step": 78340 }, { "epoch": 0.3363729253067498, "grad_norm": 0.0801706314086914, "learning_rate": 6.664927606219226e-05, "loss": 0.3033801555633545, "step": 78350 }, { "epoch": 0.33641585739676977, "grad_norm": 5.389936447143555, "learning_rate": 6.664496434207464e-05, "loss": 0.3112450122833252, "step": 78360 }, { "epoch": 0.3364587894867898, "grad_norm": 7.637985706329346, "learning_rate": 6.664065262195702e-05, "loss": 0.25974130630493164, "step": 78370 }, { "epoch": 0.3365017215768098, "grad_norm": 1.631525993347168, "learning_rate": 6.663634090183938e-05, "loss": 0.07255272269248962, "step": 78380 }, { "epoch": 0.3365446536668298, "grad_norm": 1.459124207496643, "learning_rate": 6.663202918172176e-05, "loss": 0.314791202545166, "step": 78390 }, { "epoch": 0.3365875857568498, "grad_norm": 0.12745119631290436, "learning_rate": 6.662771746160414e-05, "loss": 0.06704494953155518, "step": 78400 }, { "epoch": 0.3366305178468698, "grad_norm": 2.5058093070983887, "learning_rate": 6.662340574148651e-05, "loss": 0.3335081100463867, "step": 78410 }, { "epoch": 0.3366734499368898, "grad_norm": 0.35333049297332764, "learning_rate": 6.661909402136889e-05, "loss": 0.2435668706893921, "step": 78420 }, { "epoch": 0.33671638202690984, "grad_norm": 0.24012352526187897, "learning_rate": 6.661478230125127e-05, "loss": 0.2389286994934082, "step": 78430 }, { "epoch": 0.33675931411692983, "grad_norm": 0.20842285454273224, "learning_rate": 6.661047058113365e-05, "loss": 0.21304574012756347, "step": 78440 }, { "epoch": 0.3368022462069499, "grad_norm": 0.11603209376335144, "learning_rate": 6.660615886101602e-05, "loss": 0.33554635047912595, "step": 78450 }, { "epoch": 0.33684517829696986, "grad_norm": 1.2757017612457275, "learning_rate": 6.660184714089839e-05, "loss": 0.24091644287109376, "step": 78460 }, { "epoch": 0.33688811038698985, "grad_norm": 0.002968688029795885, "learning_rate": 6.659753542078076e-05, "loss": 0.3429348707199097, "step": 78470 }, { "epoch": 0.3369310424770099, "grad_norm": 1.2616757154464722, "learning_rate": 6.659322370066314e-05, "loss": 0.40502257347106935, "step": 78480 }, { "epoch": 0.3369739745670299, "grad_norm": 0.0027247348334640265, "learning_rate": 6.658891198054552e-05, "loss": 0.10175679922103882, "step": 78490 }, { "epoch": 0.33701690665704986, "grad_norm": 0.030607003718614578, "learning_rate": 6.65846002604279e-05, "loss": 0.45574145317077636, "step": 78500 }, { "epoch": 0.3370598387470699, "grad_norm": 0.8096807599067688, "learning_rate": 6.658028854031027e-05, "loss": 0.1723085403442383, "step": 78510 }, { "epoch": 0.3371027708370899, "grad_norm": 3.5476157665252686, "learning_rate": 6.657597682019265e-05, "loss": 0.3150531768798828, "step": 78520 }, { "epoch": 0.3371457029271099, "grad_norm": 2.841060161590576, "learning_rate": 6.657166510007503e-05, "loss": 0.12495272159576416, "step": 78530 }, { "epoch": 0.3371886350171299, "grad_norm": 0.03028915636241436, "learning_rate": 6.656735337995739e-05, "loss": 0.16928415298461913, "step": 78540 }, { "epoch": 0.3372315671071499, "grad_norm": 0.006461134646087885, "learning_rate": 6.656304165983977e-05, "loss": 0.21656641960144044, "step": 78550 }, { "epoch": 0.3372744991971699, "grad_norm": 0.02571956440806389, "learning_rate": 6.655872993972216e-05, "loss": 0.3709154844284058, "step": 78560 }, { "epoch": 0.33731743128718994, "grad_norm": 0.4863227605819702, "learning_rate": 6.655441821960454e-05, "loss": 0.10554158687591553, "step": 78570 }, { "epoch": 0.3373603633772099, "grad_norm": 0.036538202315568924, "learning_rate": 6.655010649948691e-05, "loss": 0.08269681334495545, "step": 78580 }, { "epoch": 0.3374032954672299, "grad_norm": 0.061518244445323944, "learning_rate": 6.654579477936929e-05, "loss": 0.017988350987434388, "step": 78590 }, { "epoch": 0.33744622755724996, "grad_norm": 0.021507324650883675, "learning_rate": 6.654148305925167e-05, "loss": 0.12341071367263794, "step": 78600 }, { "epoch": 0.33748915964726994, "grad_norm": 0.038689348846673965, "learning_rate": 6.653717133913405e-05, "loss": 0.3153055191040039, "step": 78610 }, { "epoch": 0.33753209173728993, "grad_norm": 0.13708128035068512, "learning_rate": 6.653285961901642e-05, "loss": 0.36469757556915283, "step": 78620 }, { "epoch": 0.33757502382731, "grad_norm": 2.7672643661499023, "learning_rate": 6.652854789889879e-05, "loss": 0.2653116941452026, "step": 78630 }, { "epoch": 0.33761795591732996, "grad_norm": 12.475768089294434, "learning_rate": 6.652423617878116e-05, "loss": 0.29444379806518556, "step": 78640 }, { "epoch": 0.33766088800734995, "grad_norm": 1.8217633962631226, "learning_rate": 6.651992445866354e-05, "loss": 0.33646581172943113, "step": 78650 }, { "epoch": 0.33770382009737, "grad_norm": 0.08565472811460495, "learning_rate": 6.651561273854592e-05, "loss": 0.35305285453796387, "step": 78660 }, { "epoch": 0.33774675218739, "grad_norm": 0.071127749979496, "learning_rate": 6.65113010184283e-05, "loss": 0.1562727212905884, "step": 78670 }, { "epoch": 0.33778968427741, "grad_norm": 0.9672734141349792, "learning_rate": 6.650698929831067e-05, "loss": 0.20825026035308838, "step": 78680 }, { "epoch": 0.33783261636743, "grad_norm": 0.3451642096042633, "learning_rate": 6.650267757819305e-05, "loss": 0.12548161745071412, "step": 78690 }, { "epoch": 0.33787554845745, "grad_norm": 0.023614799603819847, "learning_rate": 6.649836585807543e-05, "loss": 0.2497967004776001, "step": 78700 }, { "epoch": 0.33791848054747003, "grad_norm": 0.11236807703971863, "learning_rate": 6.649405413795779e-05, "loss": 0.3373664140701294, "step": 78710 }, { "epoch": 0.33796141263749, "grad_norm": 0.00224771024659276, "learning_rate": 6.648974241784017e-05, "loss": 0.18866246938705444, "step": 78720 }, { "epoch": 0.33800434472751, "grad_norm": 0.33207470178604126, "learning_rate": 6.648543069772255e-05, "loss": 0.2463681936264038, "step": 78730 }, { "epoch": 0.33804727681753005, "grad_norm": 3.1149215698242188, "learning_rate": 6.648111897760492e-05, "loss": 0.3616062641143799, "step": 78740 }, { "epoch": 0.33809020890755004, "grad_norm": 0.40803834795951843, "learning_rate": 6.64768072574873e-05, "loss": 0.17350724935531617, "step": 78750 }, { "epoch": 0.33813314099757, "grad_norm": 7.756111145019531, "learning_rate": 6.647249553736968e-05, "loss": 0.2586477994918823, "step": 78760 }, { "epoch": 0.33817607308759007, "grad_norm": 1.2022218704223633, "learning_rate": 6.646818381725206e-05, "loss": 0.28184449672698975, "step": 78770 }, { "epoch": 0.33821900517761005, "grad_norm": 0.03321431577205658, "learning_rate": 6.646387209713443e-05, "loss": 0.27339468002319334, "step": 78780 }, { "epoch": 0.33826193726763004, "grad_norm": 10.436616897583008, "learning_rate": 6.645956037701681e-05, "loss": 0.39653310775756834, "step": 78790 }, { "epoch": 0.3383048693576501, "grad_norm": 0.02555697225034237, "learning_rate": 6.645524865689919e-05, "loss": 0.11828770637512206, "step": 78800 }, { "epoch": 0.33834780144767007, "grad_norm": 0.8284059166908264, "learning_rate": 6.645093693678157e-05, "loss": 0.18440791368484497, "step": 78810 }, { "epoch": 0.33839073353769006, "grad_norm": 9.03634262084961, "learning_rate": 6.644662521666394e-05, "loss": 0.2156630277633667, "step": 78820 }, { "epoch": 0.3384336656277101, "grad_norm": 0.044301051646471024, "learning_rate": 6.644231349654632e-05, "loss": 0.13982144594192505, "step": 78830 }, { "epoch": 0.3384765977177301, "grad_norm": 0.06587236374616623, "learning_rate": 6.64380017764287e-05, "loss": 0.05743167400360107, "step": 78840 }, { "epoch": 0.3385195298077501, "grad_norm": 0.006749128457158804, "learning_rate": 6.643369005631108e-05, "loss": 0.1766321063041687, "step": 78850 }, { "epoch": 0.3385624618977701, "grad_norm": 0.1305362582206726, "learning_rate": 6.642937833619345e-05, "loss": 0.16463260650634765, "step": 78860 }, { "epoch": 0.3386053939877901, "grad_norm": 0.29861336946487427, "learning_rate": 6.642506661607582e-05, "loss": 0.4933755397796631, "step": 78870 }, { "epoch": 0.33864832607781015, "grad_norm": 0.1558838188648224, "learning_rate": 6.64207548959582e-05, "loss": 0.16021682024002076, "step": 78880 }, { "epoch": 0.33869125816783013, "grad_norm": 0.12155012786388397, "learning_rate": 6.641644317584057e-05, "loss": 0.18769705295562744, "step": 78890 }, { "epoch": 0.3387341902578501, "grad_norm": 0.003283413592725992, "learning_rate": 6.641213145572295e-05, "loss": 0.07501508593559265, "step": 78900 }, { "epoch": 0.33877712234787016, "grad_norm": 0.007337587419897318, "learning_rate": 6.640781973560533e-05, "loss": 0.23510518074035644, "step": 78910 }, { "epoch": 0.33882005443789015, "grad_norm": 1.428571343421936, "learning_rate": 6.64035080154877e-05, "loss": 0.2963968276977539, "step": 78920 }, { "epoch": 0.33886298652791014, "grad_norm": 0.006976236589252949, "learning_rate": 6.639919629537008e-05, "loss": 0.38243851661682127, "step": 78930 }, { "epoch": 0.3389059186179302, "grad_norm": 0.13427439332008362, "learning_rate": 6.639488457525246e-05, "loss": 0.19198756217956542, "step": 78940 }, { "epoch": 0.33894885070795017, "grad_norm": 0.16244111955165863, "learning_rate": 6.639057285513484e-05, "loss": 0.2261066198348999, "step": 78950 }, { "epoch": 0.33899178279797015, "grad_norm": 0.5172285437583923, "learning_rate": 6.63862611350172e-05, "loss": 0.23315682411193847, "step": 78960 }, { "epoch": 0.3390347148879902, "grad_norm": 2.285973072052002, "learning_rate": 6.638194941489958e-05, "loss": 0.16300102472305297, "step": 78970 }, { "epoch": 0.3390776469780102, "grad_norm": 0.0691041648387909, "learning_rate": 6.637763769478195e-05, "loss": 0.3021952390670776, "step": 78980 }, { "epoch": 0.33912057906803017, "grad_norm": 0.045362479984760284, "learning_rate": 6.637332597466433e-05, "loss": 0.14102792739868164, "step": 78990 }, { "epoch": 0.3391635111580502, "grad_norm": 27.12893295288086, "learning_rate": 6.636901425454671e-05, "loss": 0.21746547222137452, "step": 79000 }, { "epoch": 0.3391635111580502, "eval_loss": 0.4396827220916748, "eval_runtime": 27.1677, "eval_samples_per_second": 3.681, "eval_steps_per_second": 3.681, "step": 79000 }, { "epoch": 0.3392064432480702, "grad_norm": 0.014844651333987713, "learning_rate": 6.636470253442909e-05, "loss": 0.273270845413208, "step": 79010 }, { "epoch": 0.3392493753380902, "grad_norm": 0.0699886828660965, "learning_rate": 6.636039081431146e-05, "loss": 0.36042520999908445, "step": 79020 }, { "epoch": 0.33929230742811023, "grad_norm": 0.13504739105701447, "learning_rate": 6.635607909419384e-05, "loss": 0.16561659574508666, "step": 79030 }, { "epoch": 0.3393352395181302, "grad_norm": 0.2788245379924774, "learning_rate": 6.635176737407622e-05, "loss": 0.17961949110031128, "step": 79040 }, { "epoch": 0.3393781716081502, "grad_norm": 0.19097217917442322, "learning_rate": 6.63474556539586e-05, "loss": 0.14346141815185548, "step": 79050 }, { "epoch": 0.33942110369817025, "grad_norm": 0.07555440813302994, "learning_rate": 6.634314393384097e-05, "loss": 0.13214304447174072, "step": 79060 }, { "epoch": 0.33946403578819023, "grad_norm": 0.00638962909579277, "learning_rate": 6.633883221372335e-05, "loss": 0.14622740745544432, "step": 79070 }, { "epoch": 0.3395069678782102, "grad_norm": 0.004908399190753698, "learning_rate": 6.633452049360573e-05, "loss": 0.12188636064529419, "step": 79080 }, { "epoch": 0.33954989996823026, "grad_norm": 0.016102461144328117, "learning_rate": 6.63302087734881e-05, "loss": 0.17850993871688842, "step": 79090 }, { "epoch": 0.33959283205825025, "grad_norm": 0.3554086685180664, "learning_rate": 6.632589705337048e-05, "loss": 0.05948584079742432, "step": 79100 }, { "epoch": 0.3396357641482703, "grad_norm": 165.74964904785156, "learning_rate": 6.632158533325286e-05, "loss": 0.12853333950042725, "step": 79110 }, { "epoch": 0.3396786962382903, "grad_norm": 2.9265964031219482, "learning_rate": 6.631727361313522e-05, "loss": 0.3278707504272461, "step": 79120 }, { "epoch": 0.33972162832831027, "grad_norm": 0.002426425227895379, "learning_rate": 6.63129618930176e-05, "loss": 0.17605364322662354, "step": 79130 }, { "epoch": 0.3397645604183303, "grad_norm": 5.70521879196167, "learning_rate": 6.630865017289998e-05, "loss": 0.29195294380187986, "step": 79140 }, { "epoch": 0.3398074925083503, "grad_norm": 0.0027679158374667168, "learning_rate": 6.630433845278236e-05, "loss": 0.2246103286743164, "step": 79150 }, { "epoch": 0.3398504245983703, "grad_norm": 2.1907317638397217, "learning_rate": 6.630002673266473e-05, "loss": 0.24519672393798828, "step": 79160 }, { "epoch": 0.3398933566883903, "grad_norm": 0.015526399947702885, "learning_rate": 6.629571501254711e-05, "loss": 0.048506084084510806, "step": 79170 }, { "epoch": 0.3399362887784103, "grad_norm": 2.9595799446105957, "learning_rate": 6.629140329242949e-05, "loss": 0.3247170925140381, "step": 79180 }, { "epoch": 0.3399792208684303, "grad_norm": 1.484932780265808, "learning_rate": 6.628709157231186e-05, "loss": 0.15729442834854127, "step": 79190 }, { "epoch": 0.34002215295845034, "grad_norm": 0.011102179065346718, "learning_rate": 6.628277985219423e-05, "loss": 0.16122729778289796, "step": 79200 }, { "epoch": 0.34006508504847033, "grad_norm": 0.05957547575235367, "learning_rate": 6.62784681320766e-05, "loss": 0.038811984658241275, "step": 79210 }, { "epoch": 0.3401080171384903, "grad_norm": 0.033005524426698685, "learning_rate": 6.627415641195898e-05, "loss": 0.09726451635360718, "step": 79220 }, { "epoch": 0.34015094922851036, "grad_norm": 0.0013659193646162748, "learning_rate": 6.626984469184136e-05, "loss": 0.2876324415206909, "step": 79230 }, { "epoch": 0.34019388131853034, "grad_norm": 0.0034706296864897013, "learning_rate": 6.626553297172374e-05, "loss": 0.004077165573835373, "step": 79240 }, { "epoch": 0.34023681340855033, "grad_norm": 3.6416494846343994, "learning_rate": 6.626122125160611e-05, "loss": 0.3369143486022949, "step": 79250 }, { "epoch": 0.3402797454985704, "grad_norm": 0.0032283675391227007, "learning_rate": 6.625690953148849e-05, "loss": 0.21591832637786865, "step": 79260 }, { "epoch": 0.34032267758859036, "grad_norm": 0.9636190533638, "learning_rate": 6.625259781137087e-05, "loss": 0.2059413194656372, "step": 79270 }, { "epoch": 0.34036560967861035, "grad_norm": 5.577648639678955, "learning_rate": 6.624828609125325e-05, "loss": 0.23836958408355713, "step": 79280 }, { "epoch": 0.3404085417686304, "grad_norm": 0.010573506355285645, "learning_rate": 6.624397437113562e-05, "loss": 0.19436086416244508, "step": 79290 }, { "epoch": 0.3404514738586504, "grad_norm": 3.705470561981201, "learning_rate": 6.6239662651018e-05, "loss": 0.32446415424346925, "step": 79300 }, { "epoch": 0.3404944059486704, "grad_norm": 0.40182867646217346, "learning_rate": 6.623535093090038e-05, "loss": 0.27088615894317625, "step": 79310 }, { "epoch": 0.3405373380386904, "grad_norm": 1.5534511804580688, "learning_rate": 6.623103921078276e-05, "loss": 0.17054343223571777, "step": 79320 }, { "epoch": 0.3405802701287104, "grad_norm": 0.03681186959147453, "learning_rate": 6.622672749066513e-05, "loss": 0.3183943271636963, "step": 79330 }, { "epoch": 0.34062320221873044, "grad_norm": 0.05637132748961449, "learning_rate": 6.622241577054751e-05, "loss": 0.0830637276172638, "step": 79340 }, { "epoch": 0.3406661343087504, "grad_norm": 0.01597636006772518, "learning_rate": 6.621810405042989e-05, "loss": 0.2368089437484741, "step": 79350 }, { "epoch": 0.3407090663987704, "grad_norm": 0.07524899393320084, "learning_rate": 6.621379233031227e-05, "loss": 0.3124144792556763, "step": 79360 }, { "epoch": 0.34075199848879045, "grad_norm": 0.017055392265319824, "learning_rate": 6.620948061019463e-05, "loss": 0.28198373317718506, "step": 79370 }, { "epoch": 0.34079493057881044, "grad_norm": 0.9158375263214111, "learning_rate": 6.620516889007701e-05, "loss": 0.20923035144805907, "step": 79380 }, { "epoch": 0.3408378626688304, "grad_norm": 0.0037546558305621147, "learning_rate": 6.620085716995938e-05, "loss": 0.0971619188785553, "step": 79390 }, { "epoch": 0.34088079475885047, "grad_norm": 0.33954140543937683, "learning_rate": 6.619654544984176e-05, "loss": 0.2176882266998291, "step": 79400 }, { "epoch": 0.34092372684887046, "grad_norm": 0.06124844774603844, "learning_rate": 6.619223372972414e-05, "loss": 0.27284195423126223, "step": 79410 }, { "epoch": 0.34096665893889044, "grad_norm": 0.781387448310852, "learning_rate": 6.618792200960652e-05, "loss": 0.34500880241394044, "step": 79420 }, { "epoch": 0.3410095910289105, "grad_norm": 0.05491228029131889, "learning_rate": 6.61836102894889e-05, "loss": 0.08464367985725403, "step": 79430 }, { "epoch": 0.3410525231189305, "grad_norm": 0.39764919877052307, "learning_rate": 6.617929856937127e-05, "loss": 0.12523021697998046, "step": 79440 }, { "epoch": 0.34109545520895046, "grad_norm": 0.017724506556987762, "learning_rate": 6.617498684925363e-05, "loss": 0.012008582800626754, "step": 79450 }, { "epoch": 0.3411383872989705, "grad_norm": 0.5780832767486572, "learning_rate": 6.617067512913601e-05, "loss": 0.21086423397064208, "step": 79460 }, { "epoch": 0.3411813193889905, "grad_norm": 0.03017611987888813, "learning_rate": 6.616636340901839e-05, "loss": 0.14146060943603517, "step": 79470 }, { "epoch": 0.3412242514790105, "grad_norm": 0.0034709612373262644, "learning_rate": 6.616205168890077e-05, "loss": 0.22980284690856934, "step": 79480 }, { "epoch": 0.3412671835690305, "grad_norm": 1.7348976135253906, "learning_rate": 6.615773996878314e-05, "loss": 0.273857569694519, "step": 79490 }, { "epoch": 0.3413101156590505, "grad_norm": 0.12557612359523773, "learning_rate": 6.615342824866552e-05, "loss": 0.22032973766326905, "step": 79500 }, { "epoch": 0.3413530477490705, "grad_norm": 0.018461521714925766, "learning_rate": 6.61491165285479e-05, "loss": 0.33548736572265625, "step": 79510 }, { "epoch": 0.34139597983909054, "grad_norm": 0.0520981103181839, "learning_rate": 6.614480480843029e-05, "loss": 0.30472311973571775, "step": 79520 }, { "epoch": 0.3414389119291105, "grad_norm": 0.8297507762908936, "learning_rate": 6.614049308831265e-05, "loss": 0.1717914819717407, "step": 79530 }, { "epoch": 0.34148184401913056, "grad_norm": 0.06621135771274567, "learning_rate": 6.613618136819503e-05, "loss": 0.23071463108062745, "step": 79540 }, { "epoch": 0.34152477610915055, "grad_norm": 0.3965785503387451, "learning_rate": 6.613186964807741e-05, "loss": 0.18059780597686767, "step": 79550 }, { "epoch": 0.34156770819917054, "grad_norm": 0.024892650544643402, "learning_rate": 6.612755792795979e-05, "loss": 0.38873488903045655, "step": 79560 }, { "epoch": 0.3416106402891906, "grad_norm": 0.01708587259054184, "learning_rate": 6.612324620784216e-05, "loss": 0.29357964992523194, "step": 79570 }, { "epoch": 0.34165357237921057, "grad_norm": 8.060369491577148, "learning_rate": 6.611893448772454e-05, "loss": 0.0989808738231659, "step": 79580 }, { "epoch": 0.34169650446923056, "grad_norm": 0.3194878101348877, "learning_rate": 6.611462276760692e-05, "loss": 0.2561430692672729, "step": 79590 }, { "epoch": 0.3417394365592506, "grad_norm": 0.02276020310819149, "learning_rate": 6.61103110474893e-05, "loss": 0.3739812135696411, "step": 79600 }, { "epoch": 0.3417823686492706, "grad_norm": 0.10554180294275284, "learning_rate": 6.610599932737166e-05, "loss": 0.22100362777709961, "step": 79610 }, { "epoch": 0.34182530073929057, "grad_norm": 0.18519657850265503, "learning_rate": 6.610168760725404e-05, "loss": 0.12010836601257324, "step": 79620 }, { "epoch": 0.3418682328293106, "grad_norm": 1.409715175628662, "learning_rate": 6.609737588713641e-05, "loss": 0.41119384765625, "step": 79630 }, { "epoch": 0.3419111649193306, "grad_norm": 0.06727342307567596, "learning_rate": 6.609306416701879e-05, "loss": 0.12643046379089357, "step": 79640 }, { "epoch": 0.3419540970093506, "grad_norm": 0.005273744929581881, "learning_rate": 6.608875244690117e-05, "loss": 0.002772852033376694, "step": 79650 }, { "epoch": 0.34199702909937063, "grad_norm": 5.838228225708008, "learning_rate": 6.608444072678355e-05, "loss": 0.304068922996521, "step": 79660 }, { "epoch": 0.3420399611893906, "grad_norm": 7.21917200088501, "learning_rate": 6.608012900666592e-05, "loss": 0.19127278327941893, "step": 79670 }, { "epoch": 0.3420828932794106, "grad_norm": 0.8204609751701355, "learning_rate": 6.60758172865483e-05, "loss": 0.2872287750244141, "step": 79680 }, { "epoch": 0.34212582536943065, "grad_norm": 0.00942230224609375, "learning_rate": 6.607150556643068e-05, "loss": 0.2193960189819336, "step": 79690 }, { "epoch": 0.34216875745945063, "grad_norm": 0.606311559677124, "learning_rate": 6.606719384631304e-05, "loss": 0.25763740539550783, "step": 79700 }, { "epoch": 0.3422116895494706, "grad_norm": 0.0011694321874529123, "learning_rate": 6.606288212619542e-05, "loss": 0.263020396232605, "step": 79710 }, { "epoch": 0.34225462163949066, "grad_norm": 1.0757243633270264, "learning_rate": 6.60585704060778e-05, "loss": 0.26357810497283934, "step": 79720 }, { "epoch": 0.34229755372951065, "grad_norm": 0.06414375454187393, "learning_rate": 6.605425868596017e-05, "loss": 0.12486532926559449, "step": 79730 }, { "epoch": 0.3423404858195307, "grad_norm": 2.9872426986694336, "learning_rate": 6.604994696584256e-05, "loss": 0.30005874633789065, "step": 79740 }, { "epoch": 0.3423834179095507, "grad_norm": 1.153652310371399, "learning_rate": 6.604563524572494e-05, "loss": 0.15315334796905516, "step": 79750 }, { "epoch": 0.34242634999957067, "grad_norm": 4.178336143493652, "learning_rate": 6.604132352560732e-05, "loss": 0.17489372491836547, "step": 79760 }, { "epoch": 0.3424692820895907, "grad_norm": 5.421525478363037, "learning_rate": 6.60370118054897e-05, "loss": 0.26713321208953855, "step": 79770 }, { "epoch": 0.3425122141796107, "grad_norm": 14.969881057739258, "learning_rate": 6.603270008537206e-05, "loss": 0.09526968002319336, "step": 79780 }, { "epoch": 0.3425551462696307, "grad_norm": 4.366916656494141, "learning_rate": 6.602838836525444e-05, "loss": 0.22230844497680663, "step": 79790 }, { "epoch": 0.3425980783596507, "grad_norm": 0.004848931450396776, "learning_rate": 6.602407664513681e-05, "loss": 0.08430198431015015, "step": 79800 }, { "epoch": 0.3426410104496707, "grad_norm": 0.4716125726699829, "learning_rate": 6.601976492501919e-05, "loss": 0.2586265325546265, "step": 79810 }, { "epoch": 0.3426839425396907, "grad_norm": 0.0038550784811377525, "learning_rate": 6.601545320490157e-05, "loss": 0.12058546543121337, "step": 79820 }, { "epoch": 0.34272687462971074, "grad_norm": 1.374908447265625, "learning_rate": 6.601114148478395e-05, "loss": 0.13479118347167968, "step": 79830 }, { "epoch": 0.34276980671973073, "grad_norm": 0.004285231698304415, "learning_rate": 6.600682976466632e-05, "loss": 0.19934382438659667, "step": 79840 }, { "epoch": 0.3428127388097507, "grad_norm": 0.00542460847645998, "learning_rate": 6.60025180445487e-05, "loss": 0.24936530590057374, "step": 79850 }, { "epoch": 0.34285567089977076, "grad_norm": 0.011068413965404034, "learning_rate": 6.599820632443107e-05, "loss": 0.09880251884460449, "step": 79860 }, { "epoch": 0.34289860298979075, "grad_norm": 0.10294083505868912, "learning_rate": 6.599389460431344e-05, "loss": 0.1930585980415344, "step": 79870 }, { "epoch": 0.34294153507981073, "grad_norm": 0.4128507971763611, "learning_rate": 6.598958288419582e-05, "loss": 0.1906563401222229, "step": 79880 }, { "epoch": 0.3429844671698308, "grad_norm": 2.6419787406921387, "learning_rate": 6.59852711640782e-05, "loss": 0.38313093185424807, "step": 79890 }, { "epoch": 0.34302739925985076, "grad_norm": 2.718538761138916, "learning_rate": 6.598095944396057e-05, "loss": 0.2621057748794556, "step": 79900 }, { "epoch": 0.34307033134987075, "grad_norm": 0.04285736754536629, "learning_rate": 6.597664772384295e-05, "loss": 0.12957130670547484, "step": 79910 }, { "epoch": 0.3431132634398908, "grad_norm": 0.025662913918495178, "learning_rate": 6.597233600372533e-05, "loss": 0.3805588960647583, "step": 79920 }, { "epoch": 0.3431561955299108, "grad_norm": 1.7533470392227173, "learning_rate": 6.59680242836077e-05, "loss": 0.25470623970031736, "step": 79930 }, { "epoch": 0.34319912761993077, "grad_norm": 0.02234153263270855, "learning_rate": 6.596371256349007e-05, "loss": 0.23534765243530273, "step": 79940 }, { "epoch": 0.3432420597099508, "grad_norm": 0.014367325231432915, "learning_rate": 6.595940084337245e-05, "loss": 0.17741996049880981, "step": 79950 }, { "epoch": 0.3432849917999708, "grad_norm": 0.0687040314078331, "learning_rate": 6.595508912325484e-05, "loss": 0.3304955005645752, "step": 79960 }, { "epoch": 0.34332792388999084, "grad_norm": 0.014946906827390194, "learning_rate": 6.595077740313722e-05, "loss": 0.1195443868637085, "step": 79970 }, { "epoch": 0.3433708559800108, "grad_norm": 3.6829023361206055, "learning_rate": 6.59464656830196e-05, "loss": 0.18149445056915284, "step": 79980 }, { "epoch": 0.3434137880700308, "grad_norm": 3.5131752490997314, "learning_rate": 6.594215396290197e-05, "loss": 0.3053800106048584, "step": 79990 }, { "epoch": 0.34345672016005085, "grad_norm": 0.002581225708127022, "learning_rate": 6.593784224278435e-05, "loss": 0.05220865607261658, "step": 80000 }, { "epoch": 0.34345672016005085, "eval_loss": 0.4368164837360382, "eval_runtime": 27.1598, "eval_samples_per_second": 3.682, "eval_steps_per_second": 3.682, "step": 80000 }, { "epoch": 0.34349965225007084, "grad_norm": 3.8440799713134766, "learning_rate": 6.593353052266673e-05, "loss": 0.2896945714950562, "step": 80010 }, { "epoch": 0.34354258434009083, "grad_norm": 2.2781224250793457, "learning_rate": 6.59292188025491e-05, "loss": 0.15586977005004882, "step": 80020 }, { "epoch": 0.34358551643011087, "grad_norm": 0.09280993789434433, "learning_rate": 6.592490708243147e-05, "loss": 0.16732913255691528, "step": 80030 }, { "epoch": 0.34362844852013086, "grad_norm": 0.0976981520652771, "learning_rate": 6.592059536231384e-05, "loss": 0.5183558940887452, "step": 80040 }, { "epoch": 0.34367138061015085, "grad_norm": 0.35272637009620667, "learning_rate": 6.591628364219622e-05, "loss": 0.2636963129043579, "step": 80050 }, { "epoch": 0.3437143127001709, "grad_norm": 0.32257652282714844, "learning_rate": 6.59119719220786e-05, "loss": 0.07576382756233216, "step": 80060 }, { "epoch": 0.3437572447901909, "grad_norm": 0.0030997225549072027, "learning_rate": 6.590766020196098e-05, "loss": 0.11930543184280396, "step": 80070 }, { "epoch": 0.34380017688021086, "grad_norm": 0.3869069814682007, "learning_rate": 6.590334848184335e-05, "loss": 0.18810378313064574, "step": 80080 }, { "epoch": 0.3438431089702309, "grad_norm": 1.9977630376815796, "learning_rate": 6.589903676172573e-05, "loss": 0.08379656076431274, "step": 80090 }, { "epoch": 0.3438860410602509, "grad_norm": 0.01002898346632719, "learning_rate": 6.589472504160811e-05, "loss": 0.10857353210449219, "step": 80100 }, { "epoch": 0.3439289731502709, "grad_norm": 6.863121509552002, "learning_rate": 6.589041332149047e-05, "loss": 0.2604074001312256, "step": 80110 }, { "epoch": 0.3439719052402909, "grad_norm": 1.263120174407959, "learning_rate": 6.588610160137285e-05, "loss": 0.26923954486846924, "step": 80120 }, { "epoch": 0.3440148373303109, "grad_norm": 0.08355128020048141, "learning_rate": 6.588178988125523e-05, "loss": 0.11677829027175904, "step": 80130 }, { "epoch": 0.3440577694203309, "grad_norm": 0.001254008966498077, "learning_rate": 6.58774781611376e-05, "loss": 0.20639264583587646, "step": 80140 }, { "epoch": 0.34410070151035094, "grad_norm": 0.11388286203145981, "learning_rate": 6.587316644101998e-05, "loss": 0.2153681516647339, "step": 80150 }, { "epoch": 0.3441436336003709, "grad_norm": 1.5901732444763184, "learning_rate": 6.586885472090236e-05, "loss": 0.3067173004150391, "step": 80160 }, { "epoch": 0.34418656569039097, "grad_norm": 1.3476285934448242, "learning_rate": 6.586454300078474e-05, "loss": 0.21053199768066405, "step": 80170 }, { "epoch": 0.34422949778041095, "grad_norm": 0.006064875982701778, "learning_rate": 6.586023128066711e-05, "loss": 0.29959542751312257, "step": 80180 }, { "epoch": 0.34427242987043094, "grad_norm": 0.016881000250577927, "learning_rate": 6.585591956054949e-05, "loss": 0.22309458255767822, "step": 80190 }, { "epoch": 0.344315361960451, "grad_norm": 0.3344466984272003, "learning_rate": 6.585160784043187e-05, "loss": 0.23730158805847168, "step": 80200 }, { "epoch": 0.34435829405047097, "grad_norm": 0.016367772594094276, "learning_rate": 6.584729612031425e-05, "loss": 0.06344168782234191, "step": 80210 }, { "epoch": 0.34440122614049096, "grad_norm": 1.8823529481887817, "learning_rate": 6.584298440019662e-05, "loss": 0.31463663578033446, "step": 80220 }, { "epoch": 0.344444158230511, "grad_norm": 0.4750448763370514, "learning_rate": 6.5838672680079e-05, "loss": 0.2528752326965332, "step": 80230 }, { "epoch": 0.344487090320531, "grad_norm": 0.0015717342030256987, "learning_rate": 6.583436095996138e-05, "loss": 0.13928334712982177, "step": 80240 }, { "epoch": 0.344530022410551, "grad_norm": 0.0036773881874978542, "learning_rate": 6.583004923984375e-05, "loss": 0.1504261612892151, "step": 80250 }, { "epoch": 0.344572954500571, "grad_norm": 0.010615227743983269, "learning_rate": 6.582573751972613e-05, "loss": 0.21787447929382325, "step": 80260 }, { "epoch": 0.344615886590591, "grad_norm": 0.06478608399629593, "learning_rate": 6.58214257996085e-05, "loss": 0.44989643096923826, "step": 80270 }, { "epoch": 0.344658818680611, "grad_norm": 0.025874905288219452, "learning_rate": 6.581711407949087e-05, "loss": 0.101692795753479, "step": 80280 }, { "epoch": 0.34470175077063103, "grad_norm": 0.005986157804727554, "learning_rate": 6.581280235937325e-05, "loss": 0.2578385829925537, "step": 80290 }, { "epoch": 0.344744682860651, "grad_norm": 0.009471284225583076, "learning_rate": 6.580849063925563e-05, "loss": 0.2216710329055786, "step": 80300 }, { "epoch": 0.344787614950671, "grad_norm": 4.648142337799072, "learning_rate": 6.5804178919138e-05, "loss": 0.3399980545043945, "step": 80310 }, { "epoch": 0.34483054704069105, "grad_norm": 0.1861869841814041, "learning_rate": 6.579986719902038e-05, "loss": 0.2353301763534546, "step": 80320 }, { "epoch": 0.34487347913071104, "grad_norm": 1.3739330768585205, "learning_rate": 6.579555547890276e-05, "loss": 0.13581985235214233, "step": 80330 }, { "epoch": 0.344916411220731, "grad_norm": 0.8927357196807861, "learning_rate": 6.579124375878514e-05, "loss": 0.4658341884613037, "step": 80340 }, { "epoch": 0.34495934331075107, "grad_norm": 0.46321922540664673, "learning_rate": 6.57869320386675e-05, "loss": 0.11834501028060913, "step": 80350 }, { "epoch": 0.34500227540077105, "grad_norm": 1.620883822441101, "learning_rate": 6.578262031854988e-05, "loss": 0.38476948738098143, "step": 80360 }, { "epoch": 0.34504520749079104, "grad_norm": 0.024614451453089714, "learning_rate": 6.577830859843226e-05, "loss": 0.1613122820854187, "step": 80370 }, { "epoch": 0.3450881395808111, "grad_norm": 0.20275190472602844, "learning_rate": 6.577399687831463e-05, "loss": 0.21819941997528075, "step": 80380 }, { "epoch": 0.34513107167083107, "grad_norm": 0.005242771469056606, "learning_rate": 6.576968515819701e-05, "loss": 0.04298856854438782, "step": 80390 }, { "epoch": 0.3451740037608511, "grad_norm": 0.0025338917039334774, "learning_rate": 6.576537343807939e-05, "loss": 0.004497027024626732, "step": 80400 }, { "epoch": 0.3452169358508711, "grad_norm": 1.8911714553833008, "learning_rate": 6.576106171796176e-05, "loss": 0.39356627464294436, "step": 80410 }, { "epoch": 0.3452598679408911, "grad_norm": 0.009503847919404507, "learning_rate": 6.575674999784414e-05, "loss": 0.15246884822845458, "step": 80420 }, { "epoch": 0.34530280003091113, "grad_norm": 0.18555688858032227, "learning_rate": 6.575243827772652e-05, "loss": 0.0459955096244812, "step": 80430 }, { "epoch": 0.3453457321209311, "grad_norm": 2.2830750942230225, "learning_rate": 6.57481265576089e-05, "loss": 0.11132620573043824, "step": 80440 }, { "epoch": 0.3453886642109511, "grad_norm": 0.4190322756767273, "learning_rate": 6.574381483749127e-05, "loss": 0.21302645206451415, "step": 80450 }, { "epoch": 0.34543159630097114, "grad_norm": 0.005154999904334545, "learning_rate": 6.573950311737365e-05, "loss": 0.3834122657775879, "step": 80460 }, { "epoch": 0.34547452839099113, "grad_norm": 0.5437856316566467, "learning_rate": 6.573519139725603e-05, "loss": 0.1995234966278076, "step": 80470 }, { "epoch": 0.3455174604810111, "grad_norm": 0.0034222968388348818, "learning_rate": 6.57308796771384e-05, "loss": 0.2423619508743286, "step": 80480 }, { "epoch": 0.34556039257103116, "grad_norm": 0.626228392124176, "learning_rate": 6.572656795702078e-05, "loss": 0.059421378374099734, "step": 80490 }, { "epoch": 0.34560332466105115, "grad_norm": 10.068717956542969, "learning_rate": 6.572225623690316e-05, "loss": 0.3919938087463379, "step": 80500 }, { "epoch": 0.34564625675107113, "grad_norm": 5.153544902801514, "learning_rate": 6.571794451678554e-05, "loss": 0.23537764549255372, "step": 80510 }, { "epoch": 0.3456891888410912, "grad_norm": 0.004742398392409086, "learning_rate": 6.57136327966679e-05, "loss": 0.06173415184020996, "step": 80520 }, { "epoch": 0.34573212093111116, "grad_norm": 0.45885029435157776, "learning_rate": 6.570932107655028e-05, "loss": 0.24032649993896485, "step": 80530 }, { "epoch": 0.34577505302113115, "grad_norm": 0.08064588904380798, "learning_rate": 6.570500935643266e-05, "loss": 0.2223383903503418, "step": 80540 }, { "epoch": 0.3458179851111512, "grad_norm": 0.685327410697937, "learning_rate": 6.570069763631503e-05, "loss": 0.16256893873214723, "step": 80550 }, { "epoch": 0.3458609172011712, "grad_norm": 0.257320374250412, "learning_rate": 6.569638591619741e-05, "loss": 0.37799150943756105, "step": 80560 }, { "epoch": 0.34590384929119117, "grad_norm": 0.09647884964942932, "learning_rate": 6.569207419607979e-05, "loss": 0.006505146622657776, "step": 80570 }, { "epoch": 0.3459467813812112, "grad_norm": 0.12064115703105927, "learning_rate": 6.568776247596217e-05, "loss": 0.11497091054916382, "step": 80580 }, { "epoch": 0.3459897134712312, "grad_norm": 0.052829205989837646, "learning_rate": 6.568345075584454e-05, "loss": 0.1395114541053772, "step": 80590 }, { "epoch": 0.34603264556125124, "grad_norm": 1.4082953929901123, "learning_rate": 6.567913903572691e-05, "loss": 0.6587924003601074, "step": 80600 }, { "epoch": 0.3460755776512712, "grad_norm": 2.774576187133789, "learning_rate": 6.567482731560928e-05, "loss": 0.18932981491088868, "step": 80610 }, { "epoch": 0.3461185097412912, "grad_norm": 0.4535244107246399, "learning_rate": 6.567051559549166e-05, "loss": 0.1731483221054077, "step": 80620 }, { "epoch": 0.34616144183131126, "grad_norm": 1.3304637670516968, "learning_rate": 6.566620387537404e-05, "loss": 0.4254453659057617, "step": 80630 }, { "epoch": 0.34620437392133124, "grad_norm": 0.028769170865416527, "learning_rate": 6.566189215525642e-05, "loss": 0.2866819381713867, "step": 80640 }, { "epoch": 0.34624730601135123, "grad_norm": 0.17192727327346802, "learning_rate": 6.56575804351388e-05, "loss": 0.22045984268188476, "step": 80650 }, { "epoch": 0.3462902381013713, "grad_norm": 39.970855712890625, "learning_rate": 6.565326871502117e-05, "loss": 0.17609299421310426, "step": 80660 }, { "epoch": 0.34633317019139126, "grad_norm": 0.0026669171638786793, "learning_rate": 6.564895699490355e-05, "loss": 0.1487862229347229, "step": 80670 }, { "epoch": 0.34637610228141125, "grad_norm": 0.017173703759908676, "learning_rate": 6.564464527478593e-05, "loss": 0.1477797031402588, "step": 80680 }, { "epoch": 0.3464190343714313, "grad_norm": 0.01858958974480629, "learning_rate": 6.56403335546683e-05, "loss": 0.40072221755981446, "step": 80690 }, { "epoch": 0.3464619664614513, "grad_norm": 0.7842692136764526, "learning_rate": 6.563602183455068e-05, "loss": 0.268584680557251, "step": 80700 }, { "epoch": 0.34650489855147126, "grad_norm": 0.08671054989099503, "learning_rate": 6.563171011443306e-05, "loss": 0.1292457699775696, "step": 80710 }, { "epoch": 0.3465478306414913, "grad_norm": 2.038288116455078, "learning_rate": 6.562739839431544e-05, "loss": 0.15267176628112794, "step": 80720 }, { "epoch": 0.3465907627315113, "grad_norm": 11.327775955200195, "learning_rate": 6.562308667419781e-05, "loss": 0.16896125078201293, "step": 80730 }, { "epoch": 0.3466336948215313, "grad_norm": 4.022800445556641, "learning_rate": 6.561877495408019e-05, "loss": 0.1759350538253784, "step": 80740 }, { "epoch": 0.3466766269115513, "grad_norm": 0.009791653603315353, "learning_rate": 6.561446323396257e-05, "loss": 0.20523180961608886, "step": 80750 }, { "epoch": 0.3467195590015713, "grad_norm": 2.14821195602417, "learning_rate": 6.561015151384494e-05, "loss": 0.2979742527008057, "step": 80760 }, { "epoch": 0.3467624910915913, "grad_norm": 0.2958136796951294, "learning_rate": 6.560583979372731e-05, "loss": 0.15679970979690552, "step": 80770 }, { "epoch": 0.34680542318161134, "grad_norm": 0.14770112931728363, "learning_rate": 6.560152807360969e-05, "loss": 0.13254650831222534, "step": 80780 }, { "epoch": 0.3468483552716313, "grad_norm": 8.195019721984863, "learning_rate": 6.559721635349206e-05, "loss": 0.2113429069519043, "step": 80790 }, { "epoch": 0.3468912873616513, "grad_norm": 0.05892874300479889, "learning_rate": 6.559290463337444e-05, "loss": 0.27900118827819825, "step": 80800 }, { "epoch": 0.34693421945167136, "grad_norm": 1.09965181350708, "learning_rate": 6.558859291325682e-05, "loss": 0.22384970188140868, "step": 80810 }, { "epoch": 0.34697715154169134, "grad_norm": 0.1914658546447754, "learning_rate": 6.55842811931392e-05, "loss": 0.28952152729034425, "step": 80820 }, { "epoch": 0.3470200836317114, "grad_norm": 0.04727429524064064, "learning_rate": 6.557996947302157e-05, "loss": 0.2773630380630493, "step": 80830 }, { "epoch": 0.34706301572173137, "grad_norm": 0.033436909317970276, "learning_rate": 6.557565775290395e-05, "loss": 0.06806041002273559, "step": 80840 }, { "epoch": 0.34710594781175136, "grad_norm": 0.021389199420809746, "learning_rate": 6.557134603278631e-05, "loss": 0.14047728776931762, "step": 80850 }, { "epoch": 0.3471488799017714, "grad_norm": 1.8197565078735352, "learning_rate": 6.556703431266869e-05, "loss": 0.2777894973754883, "step": 80860 }, { "epoch": 0.3471918119917914, "grad_norm": 1.6493991613388062, "learning_rate": 6.556272259255107e-05, "loss": 0.19073477983474732, "step": 80870 }, { "epoch": 0.3472347440818114, "grad_norm": 1.2035952806472778, "learning_rate": 6.555841087243345e-05, "loss": 0.3294836044311523, "step": 80880 }, { "epoch": 0.3472776761718314, "grad_norm": 0.013203203678131104, "learning_rate": 6.555409915231582e-05, "loss": 0.21418993473052977, "step": 80890 }, { "epoch": 0.3473206082618514, "grad_norm": 0.02511315606534481, "learning_rate": 6.55497874321982e-05, "loss": 0.0967091977596283, "step": 80900 }, { "epoch": 0.3473635403518714, "grad_norm": 0.8609216213226318, "learning_rate": 6.554547571208058e-05, "loss": 0.3208725929260254, "step": 80910 }, { "epoch": 0.34740647244189143, "grad_norm": 0.01230608019977808, "learning_rate": 6.554116399196296e-05, "loss": 0.15385348796844484, "step": 80920 }, { "epoch": 0.3474494045319114, "grad_norm": 4.483237266540527, "learning_rate": 6.553685227184533e-05, "loss": 0.25249333381652833, "step": 80930 }, { "epoch": 0.3474923366219314, "grad_norm": 1.1705982685089111, "learning_rate": 6.553254055172771e-05, "loss": 0.4617762088775635, "step": 80940 }, { "epoch": 0.34753526871195145, "grad_norm": 2.1686246395111084, "learning_rate": 6.552822883161009e-05, "loss": 0.22434430122375487, "step": 80950 }, { "epoch": 0.34757820080197144, "grad_norm": 4.323497295379639, "learning_rate": 6.552391711149246e-05, "loss": 0.29828619956970215, "step": 80960 }, { "epoch": 0.3476211328919914, "grad_norm": 0.18413817882537842, "learning_rate": 6.551960539137484e-05, "loss": 0.4002058029174805, "step": 80970 }, { "epoch": 0.34766406498201147, "grad_norm": 1.3192716836929321, "learning_rate": 6.551529367125722e-05, "loss": 0.077541983127594, "step": 80980 }, { "epoch": 0.34770699707203145, "grad_norm": 0.3532804846763611, "learning_rate": 6.55109819511396e-05, "loss": 0.0771310031414032, "step": 80990 }, { "epoch": 0.34774992916205144, "grad_norm": 0.008650779724121094, "learning_rate": 6.550667023102197e-05, "loss": 0.3339291334152222, "step": 81000 }, { "epoch": 0.34774992916205144, "eval_loss": 0.42357951402664185, "eval_runtime": 27.1096, "eval_samples_per_second": 3.689, "eval_steps_per_second": 3.689, "step": 81000 }, { "epoch": 0.3477928612520715, "grad_norm": 34.085533142089844, "learning_rate": 6.550235851090434e-05, "loss": 0.08599871993064881, "step": 81010 }, { "epoch": 0.34783579334209147, "grad_norm": 0.00876838993281126, "learning_rate": 6.549804679078671e-05, "loss": 0.2552550554275513, "step": 81020 }, { "epoch": 0.3478787254321115, "grad_norm": 0.2916661500930786, "learning_rate": 6.549373507066909e-05, "loss": 0.20430076122283936, "step": 81030 }, { "epoch": 0.3479216575221315, "grad_norm": 0.004903197754174471, "learning_rate": 6.548942335055147e-05, "loss": 0.16290855407714844, "step": 81040 }, { "epoch": 0.3479645896121515, "grad_norm": 0.08551827818155289, "learning_rate": 6.548511163043385e-05, "loss": 0.27665128707885744, "step": 81050 }, { "epoch": 0.34800752170217153, "grad_norm": 5.609275817871094, "learning_rate": 6.548079991031622e-05, "loss": 0.26043925285339353, "step": 81060 }, { "epoch": 0.3480504537921915, "grad_norm": 0.0011216587154194713, "learning_rate": 6.54764881901986e-05, "loss": 0.2311476230621338, "step": 81070 }, { "epoch": 0.3480933858822115, "grad_norm": 0.4421592056751251, "learning_rate": 6.547217647008098e-05, "loss": 0.11889767646789551, "step": 81080 }, { "epoch": 0.34813631797223155, "grad_norm": 0.0412554107606411, "learning_rate": 6.546786474996336e-05, "loss": 0.17613816261291504, "step": 81090 }, { "epoch": 0.34817925006225153, "grad_norm": 0.13568167388439178, "learning_rate": 6.546355302984572e-05, "loss": 0.25402672290802003, "step": 81100 }, { "epoch": 0.3482221821522715, "grad_norm": 4.991754531860352, "learning_rate": 6.54592413097281e-05, "loss": 0.34817531108856203, "step": 81110 }, { "epoch": 0.34826511424229156, "grad_norm": 3.6499862670898438, "learning_rate": 6.545492958961047e-05, "loss": 0.23540241718292237, "step": 81120 }, { "epoch": 0.34830804633231155, "grad_norm": 1.1497873067855835, "learning_rate": 6.545061786949285e-05, "loss": 0.30424203872680666, "step": 81130 }, { "epoch": 0.34835097842233154, "grad_norm": 0.029126333072781563, "learning_rate": 6.544630614937523e-05, "loss": 0.21944479942321776, "step": 81140 }, { "epoch": 0.3483939105123516, "grad_norm": 0.658740758895874, "learning_rate": 6.544199442925762e-05, "loss": 0.3712824583053589, "step": 81150 }, { "epoch": 0.34843684260237157, "grad_norm": 0.039526522159576416, "learning_rate": 6.543768270914e-05, "loss": 0.1300313353538513, "step": 81160 }, { "epoch": 0.34847977469239155, "grad_norm": 1.5271961688995361, "learning_rate": 6.543337098902238e-05, "loss": 0.1440997838973999, "step": 81170 }, { "epoch": 0.3485227067824116, "grad_norm": 0.008994216099381447, "learning_rate": 6.542905926890474e-05, "loss": 0.24037206172943115, "step": 81180 }, { "epoch": 0.3485656388724316, "grad_norm": 0.9555651545524597, "learning_rate": 6.542474754878712e-05, "loss": 0.21935515403747557, "step": 81190 }, { "epoch": 0.34860857096245157, "grad_norm": 0.02722945250570774, "learning_rate": 6.54204358286695e-05, "loss": 0.18047159910202026, "step": 81200 }, { "epoch": 0.3486515030524716, "grad_norm": 3.5013840198516846, "learning_rate": 6.541612410855187e-05, "loss": 0.3040013790130615, "step": 81210 }, { "epoch": 0.3486944351424916, "grad_norm": 0.0028997049666941166, "learning_rate": 6.541181238843425e-05, "loss": 0.19227596521377563, "step": 81220 }, { "epoch": 0.3487373672325116, "grad_norm": 0.13611142337322235, "learning_rate": 6.540750066831663e-05, "loss": 0.14200282096862793, "step": 81230 }, { "epoch": 0.34878029932253163, "grad_norm": 2.3464372158050537, "learning_rate": 6.5403188948199e-05, "loss": 0.36781165599822996, "step": 81240 }, { "epoch": 0.3488232314125516, "grad_norm": 0.003250357462093234, "learning_rate": 6.539887722808138e-05, "loss": 0.28147766590118406, "step": 81250 }, { "epoch": 0.34886616350257166, "grad_norm": 0.10627375543117523, "learning_rate": 6.539456550796374e-05, "loss": 0.15539608001708985, "step": 81260 }, { "epoch": 0.34890909559259164, "grad_norm": 0.1069926768541336, "learning_rate": 6.539025378784612e-05, "loss": 0.07610588073730469, "step": 81270 }, { "epoch": 0.34895202768261163, "grad_norm": 13.996943473815918, "learning_rate": 6.53859420677285e-05, "loss": 0.3478125333786011, "step": 81280 }, { "epoch": 0.3489949597726317, "grad_norm": 36.15385437011719, "learning_rate": 6.538163034761088e-05, "loss": 0.07145402431488038, "step": 81290 }, { "epoch": 0.34903789186265166, "grad_norm": 11.668716430664062, "learning_rate": 6.537731862749325e-05, "loss": 0.06767347455024719, "step": 81300 }, { "epoch": 0.34908082395267165, "grad_norm": 2.9824609756469727, "learning_rate": 6.537300690737563e-05, "loss": 0.1598757028579712, "step": 81310 }, { "epoch": 0.3491237560426917, "grad_norm": 0.33760035037994385, "learning_rate": 6.536869518725801e-05, "loss": 0.4008010387420654, "step": 81320 }, { "epoch": 0.3491666881327117, "grad_norm": 0.5066002607345581, "learning_rate": 6.536438346714039e-05, "loss": 0.22687935829162598, "step": 81330 }, { "epoch": 0.34920962022273166, "grad_norm": 0.28217196464538574, "learning_rate": 6.536007174702275e-05, "loss": 0.1519029140472412, "step": 81340 }, { "epoch": 0.3492525523127517, "grad_norm": 0.15852105617523193, "learning_rate": 6.535576002690513e-05, "loss": 0.19549806118011476, "step": 81350 }, { "epoch": 0.3492954844027717, "grad_norm": 0.8499207496643066, "learning_rate": 6.53514483067875e-05, "loss": 0.10695688724517823, "step": 81360 }, { "epoch": 0.3493384164927917, "grad_norm": 0.019876671954989433, "learning_rate": 6.53471365866699e-05, "loss": 0.15644168853759766, "step": 81370 }, { "epoch": 0.3493813485828117, "grad_norm": 3.817972183227539, "learning_rate": 6.534282486655227e-05, "loss": 0.19872108697891236, "step": 81380 }, { "epoch": 0.3494242806728317, "grad_norm": 1.0998066663742065, "learning_rate": 6.533851314643465e-05, "loss": 0.31540355682373045, "step": 81390 }, { "epoch": 0.3494672127628517, "grad_norm": 0.6520820260047913, "learning_rate": 6.533420142631703e-05, "loss": 0.3238629579544067, "step": 81400 }, { "epoch": 0.34951014485287174, "grad_norm": 1.3481556177139282, "learning_rate": 6.53298897061994e-05, "loss": 0.2559787750244141, "step": 81410 }, { "epoch": 0.3495530769428917, "grad_norm": 8.32856273651123, "learning_rate": 6.532557798608177e-05, "loss": 0.40641307830810547, "step": 81420 }, { "epoch": 0.3495960090329117, "grad_norm": 3.496859550476074, "learning_rate": 6.532126626596415e-05, "loss": 0.07484019994735717, "step": 81430 }, { "epoch": 0.34963894112293176, "grad_norm": 2.567354679107666, "learning_rate": 6.531695454584652e-05, "loss": 0.20389926433563232, "step": 81440 }, { "epoch": 0.34968187321295174, "grad_norm": 7.194469451904297, "learning_rate": 6.53126428257289e-05, "loss": 0.20474486351013182, "step": 81450 }, { "epoch": 0.3497248053029718, "grad_norm": 1.0319026708602905, "learning_rate": 6.530833110561128e-05, "loss": 0.2735546112060547, "step": 81460 }, { "epoch": 0.3497677373929918, "grad_norm": 0.02630757726728916, "learning_rate": 6.530401938549365e-05, "loss": 0.21943216323852538, "step": 81470 }, { "epoch": 0.34981066948301176, "grad_norm": 0.6678802371025085, "learning_rate": 6.529970766537603e-05, "loss": 0.054949283599853516, "step": 81480 }, { "epoch": 0.3498536015730318, "grad_norm": 0.010042681358754635, "learning_rate": 6.529539594525841e-05, "loss": 0.15706361532211305, "step": 81490 }, { "epoch": 0.3498965336630518, "grad_norm": 3.5283749103546143, "learning_rate": 6.529108422514079e-05, "loss": 0.46043691635131834, "step": 81500 }, { "epoch": 0.3499394657530718, "grad_norm": 0.2358131855726242, "learning_rate": 6.528677250502315e-05, "loss": 0.3449904680252075, "step": 81510 }, { "epoch": 0.3499823978430918, "grad_norm": 1.6699026823043823, "learning_rate": 6.528246078490553e-05, "loss": 0.3223626375198364, "step": 81520 }, { "epoch": 0.3500253299331118, "grad_norm": 0.12843438982963562, "learning_rate": 6.52781490647879e-05, "loss": 0.13025667667388915, "step": 81530 }, { "epoch": 0.3500682620231318, "grad_norm": 2.23526930809021, "learning_rate": 6.527383734467028e-05, "loss": 0.1384149193763733, "step": 81540 }, { "epoch": 0.35011119411315184, "grad_norm": 0.0064170872792601585, "learning_rate": 6.526952562455266e-05, "loss": 0.15591152906417846, "step": 81550 }, { "epoch": 0.3501541262031718, "grad_norm": 0.08279679715633392, "learning_rate": 6.526521390443504e-05, "loss": 0.18339295387268068, "step": 81560 }, { "epoch": 0.3501970582931918, "grad_norm": 1.4734292030334473, "learning_rate": 6.526090218431741e-05, "loss": 0.14943455457687377, "step": 81570 }, { "epoch": 0.35023999038321185, "grad_norm": 0.010176564566791058, "learning_rate": 6.525659046419979e-05, "loss": 0.21766793727874756, "step": 81580 }, { "epoch": 0.35028292247323184, "grad_norm": 0.0007287487387657166, "learning_rate": 6.525227874408217e-05, "loss": 0.12911440134048463, "step": 81590 }, { "epoch": 0.3503258545632518, "grad_norm": 0.24612395465373993, "learning_rate": 6.524796702396455e-05, "loss": 0.123654305934906, "step": 81600 }, { "epoch": 0.35036878665327187, "grad_norm": 0.07536415010690689, "learning_rate": 6.524365530384692e-05, "loss": 0.015879042446613312, "step": 81610 }, { "epoch": 0.35041171874329186, "grad_norm": 0.16817022860050201, "learning_rate": 6.52393435837293e-05, "loss": 0.35819756984710693, "step": 81620 }, { "epoch": 0.35045465083331184, "grad_norm": 0.053329501301050186, "learning_rate": 6.523503186361168e-05, "loss": 0.2505997657775879, "step": 81630 }, { "epoch": 0.3504975829233319, "grad_norm": 0.20820234715938568, "learning_rate": 6.523072014349406e-05, "loss": 0.16442860364913942, "step": 81640 }, { "epoch": 0.35054051501335187, "grad_norm": 2.309272289276123, "learning_rate": 6.522640842337643e-05, "loss": 0.25239014625549316, "step": 81650 }, { "epoch": 0.35058344710337186, "grad_norm": 3.8517117500305176, "learning_rate": 6.522209670325881e-05, "loss": 0.21353816986083984, "step": 81660 }, { "epoch": 0.3506263791933919, "grad_norm": 0.11886537075042725, "learning_rate": 6.521778498314117e-05, "loss": 0.26951894760131834, "step": 81670 }, { "epoch": 0.3506693112834119, "grad_norm": 4.493960857391357, "learning_rate": 6.521347326302355e-05, "loss": 0.17917612791061402, "step": 81680 }, { "epoch": 0.35071224337343193, "grad_norm": 0.054700762033462524, "learning_rate": 6.520916154290593e-05, "loss": 0.2787285566329956, "step": 81690 }, { "epoch": 0.3507551754634519, "grad_norm": 6.3242058753967285, "learning_rate": 6.52048498227883e-05, "loss": 0.2191849946975708, "step": 81700 }, { "epoch": 0.3507981075534719, "grad_norm": 5.571452617645264, "learning_rate": 6.520053810267068e-05, "loss": 0.3427495241165161, "step": 81710 }, { "epoch": 0.35084103964349195, "grad_norm": 0.028819601982831955, "learning_rate": 6.519622638255306e-05, "loss": 0.07852021455764771, "step": 81720 }, { "epoch": 0.35088397173351193, "grad_norm": 0.002837139181792736, "learning_rate": 6.519191466243544e-05, "loss": 0.17152912616729737, "step": 81730 }, { "epoch": 0.3509269038235319, "grad_norm": 0.9089310765266418, "learning_rate": 6.518760294231782e-05, "loss": 0.2090291976928711, "step": 81740 }, { "epoch": 0.35096983591355196, "grad_norm": 0.0009550207760185003, "learning_rate": 6.518329122220018e-05, "loss": 0.2496708393096924, "step": 81750 }, { "epoch": 0.35101276800357195, "grad_norm": 0.0026640784926712513, "learning_rate": 6.517897950208256e-05, "loss": 0.4584530830383301, "step": 81760 }, { "epoch": 0.35105570009359194, "grad_norm": 0.010816243477165699, "learning_rate": 6.517466778196493e-05, "loss": 0.2485302448272705, "step": 81770 }, { "epoch": 0.351098632183612, "grad_norm": 0.004385428968816996, "learning_rate": 6.517035606184731e-05, "loss": 0.13215600252151488, "step": 81780 }, { "epoch": 0.35114156427363197, "grad_norm": 0.21605446934700012, "learning_rate": 6.516604434172969e-05, "loss": 0.255491304397583, "step": 81790 }, { "epoch": 0.35118449636365195, "grad_norm": 0.6936253905296326, "learning_rate": 6.516173262161207e-05, "loss": 0.1723204016685486, "step": 81800 }, { "epoch": 0.351227428453672, "grad_norm": 0.04460751265287399, "learning_rate": 6.515742090149444e-05, "loss": 0.13337093591690063, "step": 81810 }, { "epoch": 0.351270360543692, "grad_norm": 0.7156800031661987, "learning_rate": 6.515310918137682e-05, "loss": 0.11102476119995117, "step": 81820 }, { "epoch": 0.35131329263371197, "grad_norm": 5.608897686004639, "learning_rate": 6.51487974612592e-05, "loss": 0.33392949104309083, "step": 81830 }, { "epoch": 0.351356224723732, "grad_norm": 0.050913892686367035, "learning_rate": 6.514448574114158e-05, "loss": 0.23823533058166504, "step": 81840 }, { "epoch": 0.351399156813752, "grad_norm": 0.43708866834640503, "learning_rate": 6.514017402102395e-05, "loss": 0.23856070041656494, "step": 81850 }, { "epoch": 0.351442088903772, "grad_norm": 0.10660059750080109, "learning_rate": 6.513586230090633e-05, "loss": 0.28337559700012205, "step": 81860 }, { "epoch": 0.35148502099379203, "grad_norm": 0.298322468996048, "learning_rate": 6.513155058078871e-05, "loss": 0.28602511882781984, "step": 81870 }, { "epoch": 0.351527953083812, "grad_norm": 0.006606437731534243, "learning_rate": 6.512723886067109e-05, "loss": 0.1852823853492737, "step": 81880 }, { "epoch": 0.35157088517383206, "grad_norm": 8.826769828796387, "learning_rate": 6.512292714055346e-05, "loss": 0.48439674377441405, "step": 81890 }, { "epoch": 0.35161381726385205, "grad_norm": 0.0020860633812844753, "learning_rate": 6.511861542043584e-05, "loss": 0.015766726434230806, "step": 81900 }, { "epoch": 0.35165674935387203, "grad_norm": 3.8179450035095215, "learning_rate": 6.511430370031822e-05, "loss": 0.41085362434387207, "step": 81910 }, { "epoch": 0.3516996814438921, "grad_norm": 0.3400474786758423, "learning_rate": 6.510999198020058e-05, "loss": 0.13819591999053954, "step": 81920 }, { "epoch": 0.35174261353391206, "grad_norm": 2.31620192527771, "learning_rate": 6.510568026008296e-05, "loss": 0.29909653663635255, "step": 81930 }, { "epoch": 0.35178554562393205, "grad_norm": 1.8688679933547974, "learning_rate": 6.510136853996534e-05, "loss": 0.1573151469230652, "step": 81940 }, { "epoch": 0.3518284777139521, "grad_norm": 0.1233760342001915, "learning_rate": 6.509705681984771e-05, "loss": 0.2249547004699707, "step": 81950 }, { "epoch": 0.3518714098039721, "grad_norm": 1.5025233030319214, "learning_rate": 6.509274509973009e-05, "loss": 0.09836132526397705, "step": 81960 }, { "epoch": 0.35191434189399207, "grad_norm": 21.100440979003906, "learning_rate": 6.508843337961247e-05, "loss": 0.09114798903465271, "step": 81970 }, { "epoch": 0.3519572739840121, "grad_norm": 0.033003196120262146, "learning_rate": 6.508412165949485e-05, "loss": 0.14782416820526123, "step": 81980 }, { "epoch": 0.3520002060740321, "grad_norm": 0.015041773207485676, "learning_rate": 6.507980993937722e-05, "loss": 0.18848912715911864, "step": 81990 }, { "epoch": 0.3520431381640521, "grad_norm": 0.018849393352866173, "learning_rate": 6.507549821925959e-05, "loss": 0.3703503370285034, "step": 82000 }, { "epoch": 0.3520431381640521, "eval_loss": 0.43411049246788025, "eval_runtime": 27.2695, "eval_samples_per_second": 3.667, "eval_steps_per_second": 3.667, "step": 82000 }, { "epoch": 0.3520860702540721, "grad_norm": 0.5550269484519958, "learning_rate": 6.507118649914196e-05, "loss": 0.246940279006958, "step": 82010 }, { "epoch": 0.3521290023440921, "grad_norm": 0.24801412224769592, "learning_rate": 6.506687477902434e-05, "loss": 0.4794013977050781, "step": 82020 }, { "epoch": 0.3521719344341121, "grad_norm": 0.02586125209927559, "learning_rate": 6.506256305890672e-05, "loss": 0.27535793781280515, "step": 82030 }, { "epoch": 0.35221486652413214, "grad_norm": 0.39585307240486145, "learning_rate": 6.50582513387891e-05, "loss": 0.1664145588874817, "step": 82040 }, { "epoch": 0.35225779861415213, "grad_norm": 0.010906996205449104, "learning_rate": 6.505393961867147e-05, "loss": 0.2748828649520874, "step": 82050 }, { "epoch": 0.3523007307041721, "grad_norm": 0.005688409321010113, "learning_rate": 6.504962789855385e-05, "loss": 0.05075052976608276, "step": 82060 }, { "epoch": 0.35234366279419216, "grad_norm": 0.010103407315909863, "learning_rate": 6.504531617843623e-05, "loss": 0.2605855464935303, "step": 82070 }, { "epoch": 0.35238659488421215, "grad_norm": 0.12392642349004745, "learning_rate": 6.50410044583186e-05, "loss": 0.14412637948989868, "step": 82080 }, { "epoch": 0.35242952697423213, "grad_norm": 0.0025631142780184746, "learning_rate": 6.503669273820098e-05, "loss": 0.13340569734573365, "step": 82090 }, { "epoch": 0.3524724590642522, "grad_norm": 1.0315361022949219, "learning_rate": 6.503238101808336e-05, "loss": 0.30212409496307374, "step": 82100 }, { "epoch": 0.35251539115427216, "grad_norm": 0.1816161572933197, "learning_rate": 6.502806929796574e-05, "loss": 0.15246015787124634, "step": 82110 }, { "epoch": 0.3525583232442922, "grad_norm": 0.39198920130729675, "learning_rate": 6.502375757784811e-05, "loss": 0.31164183616638186, "step": 82120 }, { "epoch": 0.3526012553343122, "grad_norm": 3.912492275238037, "learning_rate": 6.501944585773049e-05, "loss": 0.27409753799438474, "step": 82130 }, { "epoch": 0.3526441874243322, "grad_norm": 1.958382487297058, "learning_rate": 6.501513413761287e-05, "loss": 0.1995323419570923, "step": 82140 }, { "epoch": 0.3526871195143522, "grad_norm": 3.8886985778808594, "learning_rate": 6.501082241749525e-05, "loss": 0.2748485803604126, "step": 82150 }, { "epoch": 0.3527300516043722, "grad_norm": 0.135552778840065, "learning_rate": 6.500651069737761e-05, "loss": 0.23292884826660157, "step": 82160 }, { "epoch": 0.3527729836943922, "grad_norm": 2.7165191173553467, "learning_rate": 6.500219897725999e-05, "loss": 0.28181753158569334, "step": 82170 }, { "epoch": 0.35281591578441224, "grad_norm": 1.8693931102752686, "learning_rate": 6.499788725714236e-05, "loss": 0.33779211044311525, "step": 82180 }, { "epoch": 0.3528588478744322, "grad_norm": 0.1916666328907013, "learning_rate": 6.499357553702474e-05, "loss": 0.1582764983177185, "step": 82190 }, { "epoch": 0.3529017799644522, "grad_norm": 0.04685550183057785, "learning_rate": 6.498926381690712e-05, "loss": 0.12637512683868407, "step": 82200 }, { "epoch": 0.35294471205447225, "grad_norm": 0.13845305144786835, "learning_rate": 6.49849520967895e-05, "loss": 0.1528358817100525, "step": 82210 }, { "epoch": 0.35298764414449224, "grad_norm": 0.0018608167301863432, "learning_rate": 6.498064037667187e-05, "loss": 0.07130320072174072, "step": 82220 }, { "epoch": 0.35303057623451223, "grad_norm": 0.09361238777637482, "learning_rate": 6.497632865655425e-05, "loss": 0.0021398985758423806, "step": 82230 }, { "epoch": 0.35307350832453227, "grad_norm": 0.10198186337947845, "learning_rate": 6.497201693643663e-05, "loss": 0.3469515800476074, "step": 82240 }, { "epoch": 0.35311644041455226, "grad_norm": 0.0016866042278707027, "learning_rate": 6.496770521631899e-05, "loss": 0.09589306712150573, "step": 82250 }, { "epoch": 0.35315937250457224, "grad_norm": 0.007193189579993486, "learning_rate": 6.496339349620137e-05, "loss": 0.2937009334564209, "step": 82260 }, { "epoch": 0.3532023045945923, "grad_norm": 0.0004173514316789806, "learning_rate": 6.495908177608375e-05, "loss": 0.3419705629348755, "step": 82270 }, { "epoch": 0.3532452366846123, "grad_norm": 0.1558980494737625, "learning_rate": 6.495477005596612e-05, "loss": 0.32904758453369143, "step": 82280 }, { "epoch": 0.35328816877463226, "grad_norm": 0.22352668642997742, "learning_rate": 6.49504583358485e-05, "loss": 0.029522615671157836, "step": 82290 }, { "epoch": 0.3533311008646523, "grad_norm": 0.2652186155319214, "learning_rate": 6.494614661573088e-05, "loss": 0.3830788850784302, "step": 82300 }, { "epoch": 0.3533740329546723, "grad_norm": 1.32157564163208, "learning_rate": 6.494183489561326e-05, "loss": 0.24220361709594726, "step": 82310 }, { "epoch": 0.35341696504469233, "grad_norm": 1.0047389268875122, "learning_rate": 6.493752317549563e-05, "loss": 0.34980719089508056, "step": 82320 }, { "epoch": 0.3534598971347123, "grad_norm": 0.2502935528755188, "learning_rate": 6.493321145537801e-05, "loss": 0.2659575939178467, "step": 82330 }, { "epoch": 0.3535028292247323, "grad_norm": 0.8654706478118896, "learning_rate": 6.492889973526039e-05, "loss": 0.22834746837615966, "step": 82340 }, { "epoch": 0.35354576131475235, "grad_norm": 0.940198540687561, "learning_rate": 6.492458801514277e-05, "loss": 0.1983107566833496, "step": 82350 }, { "epoch": 0.35358869340477234, "grad_norm": 2.899003744125366, "learning_rate": 6.492027629502514e-05, "loss": 0.2748394012451172, "step": 82360 }, { "epoch": 0.3536316254947923, "grad_norm": 0.007231111638247967, "learning_rate": 6.491596457490752e-05, "loss": 0.07944477796554565, "step": 82370 }, { "epoch": 0.35367455758481237, "grad_norm": 0.0028690106701105833, "learning_rate": 6.49116528547899e-05, "loss": 0.19711775779724122, "step": 82380 }, { "epoch": 0.35371748967483235, "grad_norm": 1.741270661354065, "learning_rate": 6.490734113467228e-05, "loss": 0.17087342739105224, "step": 82390 }, { "epoch": 0.35376042176485234, "grad_norm": 0.0005304127698764205, "learning_rate": 6.490302941455465e-05, "loss": 0.27425274848937986, "step": 82400 }, { "epoch": 0.3538033538548724, "grad_norm": 1.3317078351974487, "learning_rate": 6.489871769443702e-05, "loss": 0.3008429765701294, "step": 82410 }, { "epoch": 0.35384628594489237, "grad_norm": 0.018071355298161507, "learning_rate": 6.48944059743194e-05, "loss": 0.2681131362915039, "step": 82420 }, { "epoch": 0.35388921803491236, "grad_norm": 1.2450603246688843, "learning_rate": 6.489009425420177e-05, "loss": 0.1308046340942383, "step": 82430 }, { "epoch": 0.3539321501249324, "grad_norm": 0.0007980667287483811, "learning_rate": 6.488578253408415e-05, "loss": 0.20263445377349854, "step": 82440 }, { "epoch": 0.3539750822149524, "grad_norm": 1.565921425819397, "learning_rate": 6.488147081396653e-05, "loss": 0.3779308795928955, "step": 82450 }, { "epoch": 0.3540180143049724, "grad_norm": 1.4699956178665161, "learning_rate": 6.48771590938489e-05, "loss": 0.368353009223938, "step": 82460 }, { "epoch": 0.3540609463949924, "grad_norm": 5.17052698135376, "learning_rate": 6.487284737373128e-05, "loss": 0.22542569637298585, "step": 82470 }, { "epoch": 0.3541038784850124, "grad_norm": 0.41042616963386536, "learning_rate": 6.486853565361366e-05, "loss": 0.17170801162719726, "step": 82480 }, { "epoch": 0.3541468105750324, "grad_norm": 0.00907969567924738, "learning_rate": 6.486422393349602e-05, "loss": 0.16800456047058104, "step": 82490 }, { "epoch": 0.35418974266505243, "grad_norm": 1.903733491897583, "learning_rate": 6.48599122133784e-05, "loss": 0.27847657203674314, "step": 82500 }, { "epoch": 0.3542326747550724, "grad_norm": 0.8449665307998657, "learning_rate": 6.485560049326078e-05, "loss": 0.19843802452087403, "step": 82510 }, { "epoch": 0.3542756068450924, "grad_norm": 0.00030148582300171256, "learning_rate": 6.485128877314315e-05, "loss": 0.17515969276428223, "step": 82520 }, { "epoch": 0.35431853893511245, "grad_norm": 0.0056775761768221855, "learning_rate": 6.484697705302553e-05, "loss": 0.11901193857192993, "step": 82530 }, { "epoch": 0.35436147102513244, "grad_norm": 0.005599792581051588, "learning_rate": 6.484266533290791e-05, "loss": 0.3014923810958862, "step": 82540 }, { "epoch": 0.3544044031151525, "grad_norm": 0.14583712816238403, "learning_rate": 6.483835361279029e-05, "loss": 0.09148666262626648, "step": 82550 }, { "epoch": 0.35444733520517246, "grad_norm": 0.03529001399874687, "learning_rate": 6.483404189267268e-05, "loss": 0.2952387571334839, "step": 82560 }, { "epoch": 0.35449026729519245, "grad_norm": 0.021427098661661148, "learning_rate": 6.482973017255505e-05, "loss": 0.16118075847625732, "step": 82570 }, { "epoch": 0.3545331993852125, "grad_norm": 0.02916530705988407, "learning_rate": 6.482541845243742e-05, "loss": 0.27104642391204836, "step": 82580 }, { "epoch": 0.3545761314752325, "grad_norm": 0.00043326994637027383, "learning_rate": 6.48211067323198e-05, "loss": 0.2643244504928589, "step": 82590 }, { "epoch": 0.35461906356525247, "grad_norm": 4.119890213012695, "learning_rate": 6.481679501220217e-05, "loss": 0.39766530990600585, "step": 82600 }, { "epoch": 0.3546619956552725, "grad_norm": 1.3540154695510864, "learning_rate": 6.481248329208455e-05, "loss": 0.27116658687591555, "step": 82610 }, { "epoch": 0.3547049277452925, "grad_norm": 1.0312505960464478, "learning_rate": 6.480817157196693e-05, "loss": 0.33033294677734376, "step": 82620 }, { "epoch": 0.3547478598353125, "grad_norm": 5.983156204223633, "learning_rate": 6.48038598518493e-05, "loss": 0.3525785207748413, "step": 82630 }, { "epoch": 0.3547907919253325, "grad_norm": 0.2073276787996292, "learning_rate": 6.479954813173168e-05, "loss": 0.0813725769519806, "step": 82640 }, { "epoch": 0.3548337240153525, "grad_norm": 2.0966362953186035, "learning_rate": 6.479523641161406e-05, "loss": 0.14142994880676268, "step": 82650 }, { "epoch": 0.3548766561053725, "grad_norm": 0.07701893895864487, "learning_rate": 6.479092469149642e-05, "loss": 0.1387007713317871, "step": 82660 }, { "epoch": 0.35491958819539254, "grad_norm": 0.033894576132297516, "learning_rate": 6.47866129713788e-05, "loss": 0.22242212295532227, "step": 82670 }, { "epoch": 0.35496252028541253, "grad_norm": 1.597336769104004, "learning_rate": 6.478230125126118e-05, "loss": 0.29877052307128904, "step": 82680 }, { "epoch": 0.3550054523754325, "grad_norm": 0.0051557328552007675, "learning_rate": 6.477798953114356e-05, "loss": 0.3312845230102539, "step": 82690 }, { "epoch": 0.35504838446545256, "grad_norm": 0.05306149646639824, "learning_rate": 6.477367781102593e-05, "loss": 0.2668110132217407, "step": 82700 }, { "epoch": 0.35509131655547255, "grad_norm": 0.5295519828796387, "learning_rate": 6.476936609090831e-05, "loss": 0.49150676727294923, "step": 82710 }, { "epoch": 0.35513424864549253, "grad_norm": 2.946373462677002, "learning_rate": 6.476505437079069e-05, "loss": 0.23695406913757325, "step": 82720 }, { "epoch": 0.3551771807355126, "grad_norm": 1.3482404947280884, "learning_rate": 6.476074265067306e-05, "loss": 0.3448635578155518, "step": 82730 }, { "epoch": 0.35522011282553256, "grad_norm": 1.2963811159133911, "learning_rate": 6.475643093055543e-05, "loss": 0.3682236194610596, "step": 82740 }, { "epoch": 0.3552630449155526, "grad_norm": 0.2663824260234833, "learning_rate": 6.47521192104378e-05, "loss": 0.31581456661224366, "step": 82750 }, { "epoch": 0.3553059770055726, "grad_norm": 1.069393515586853, "learning_rate": 6.474780749032018e-05, "loss": 0.44371590614318845, "step": 82760 }, { "epoch": 0.3553489090955926, "grad_norm": 0.7076935172080994, "learning_rate": 6.474349577020256e-05, "loss": 0.27069649696350095, "step": 82770 }, { "epoch": 0.3553918411856126, "grad_norm": 5.1295695304870605, "learning_rate": 6.473918405008495e-05, "loss": 0.2316493034362793, "step": 82780 }, { "epoch": 0.3554347732756326, "grad_norm": 2.332552194595337, "learning_rate": 6.473487232996733e-05, "loss": 0.159147310256958, "step": 82790 }, { "epoch": 0.3554777053656526, "grad_norm": 0.1406073421239853, "learning_rate": 6.47305606098497e-05, "loss": 0.28304622173309324, "step": 82800 }, { "epoch": 0.35552063745567264, "grad_norm": 0.30248576402664185, "learning_rate": 6.472624888973208e-05, "loss": 0.3555166721343994, "step": 82810 }, { "epoch": 0.3555635695456926, "grad_norm": 0.0017919761594384909, "learning_rate": 6.472193716961445e-05, "loss": 0.12960315942764283, "step": 82820 }, { "epoch": 0.3556065016357126, "grad_norm": 0.4125811755657196, "learning_rate": 6.471762544949682e-05, "loss": 0.2231734037399292, "step": 82830 }, { "epoch": 0.35564943372573266, "grad_norm": 0.026550287380814552, "learning_rate": 6.47133137293792e-05, "loss": 0.004985055699944496, "step": 82840 }, { "epoch": 0.35569236581575264, "grad_norm": 0.6616964340209961, "learning_rate": 6.470900200926158e-05, "loss": 0.05897141098976135, "step": 82850 }, { "epoch": 0.35573529790577263, "grad_norm": 4.38887357711792, "learning_rate": 6.470469028914396e-05, "loss": 0.24175820350646973, "step": 82860 }, { "epoch": 0.35577822999579267, "grad_norm": 0.04604952037334442, "learning_rate": 6.470037856902633e-05, "loss": 0.2245575189590454, "step": 82870 }, { "epoch": 0.35582116208581266, "grad_norm": 0.8017083406448364, "learning_rate": 6.469606684890871e-05, "loss": 0.1778331756591797, "step": 82880 }, { "epoch": 0.35586409417583265, "grad_norm": 1.9243903160095215, "learning_rate": 6.469175512879109e-05, "loss": 0.2638943910598755, "step": 82890 }, { "epoch": 0.3559070262658527, "grad_norm": 1.4982459545135498, "learning_rate": 6.468744340867345e-05, "loss": 0.4658337116241455, "step": 82900 }, { "epoch": 0.3559499583558727, "grad_norm": 0.10741309821605682, "learning_rate": 6.468313168855583e-05, "loss": 0.2154242753982544, "step": 82910 }, { "epoch": 0.35599289044589266, "grad_norm": 0.015362433157861233, "learning_rate": 6.467881996843821e-05, "loss": 0.10966675281524658, "step": 82920 }, { "epoch": 0.3560358225359127, "grad_norm": 1.1122446060180664, "learning_rate": 6.467450824832058e-05, "loss": 0.1882996916770935, "step": 82930 }, { "epoch": 0.3560787546259327, "grad_norm": 2.366774320602417, "learning_rate": 6.467019652820296e-05, "loss": 0.2599278688430786, "step": 82940 }, { "epoch": 0.3561216867159527, "grad_norm": 0.016711033880710602, "learning_rate": 6.466588480808534e-05, "loss": 0.0729515790939331, "step": 82950 }, { "epoch": 0.3561646188059727, "grad_norm": 0.8760409355163574, "learning_rate": 6.466157308796772e-05, "loss": 0.324887752532959, "step": 82960 }, { "epoch": 0.3562075508959927, "grad_norm": 0.08435682952404022, "learning_rate": 6.46572613678501e-05, "loss": 0.17404361963272094, "step": 82970 }, { "epoch": 0.35625048298601275, "grad_norm": 0.0024207860697060823, "learning_rate": 6.465294964773247e-05, "loss": 0.02005379945039749, "step": 82980 }, { "epoch": 0.35629341507603274, "grad_norm": 26.516660690307617, "learning_rate": 6.464863792761483e-05, "loss": 0.1444633960723877, "step": 82990 }, { "epoch": 0.3563363471660527, "grad_norm": 10.03338623046875, "learning_rate": 6.464432620749723e-05, "loss": 0.23125367164611815, "step": 83000 }, { "epoch": 0.3563363471660527, "eval_loss": 0.42964938282966614, "eval_runtime": 27.1156, "eval_samples_per_second": 3.688, "eval_steps_per_second": 3.688, "step": 83000 }, { "epoch": 0.35637927925607277, "grad_norm": 1.4059851169586182, "learning_rate": 6.46400144873796e-05, "loss": 0.30113949775695803, "step": 83010 }, { "epoch": 0.35642221134609275, "grad_norm": 0.0022157509811222553, "learning_rate": 6.463570276726198e-05, "loss": 0.09114395380020142, "step": 83020 }, { "epoch": 0.35646514343611274, "grad_norm": 0.022014634683728218, "learning_rate": 6.463139104714436e-05, "loss": 0.2713270902633667, "step": 83030 }, { "epoch": 0.3565080755261328, "grad_norm": 1.2134231328964233, "learning_rate": 6.462707932702673e-05, "loss": 0.1482961058616638, "step": 83040 }, { "epoch": 0.35655100761615277, "grad_norm": 0.13742724061012268, "learning_rate": 6.462276760690911e-05, "loss": 0.2234196186065674, "step": 83050 }, { "epoch": 0.35659393970617276, "grad_norm": 0.008671365678310394, "learning_rate": 6.461845588679149e-05, "loss": 0.20016765594482422, "step": 83060 }, { "epoch": 0.3566368717961928, "grad_norm": 6.836441993713379, "learning_rate": 6.461414416667385e-05, "loss": 0.25808258056640626, "step": 83070 }, { "epoch": 0.3566798038862128, "grad_norm": 1.2114756107330322, "learning_rate": 6.460983244655623e-05, "loss": 0.09312286376953124, "step": 83080 }, { "epoch": 0.3567227359762328, "grad_norm": 0.00714797992259264, "learning_rate": 6.460552072643861e-05, "loss": 0.13834846019744873, "step": 83090 }, { "epoch": 0.3567656680662528, "grad_norm": 10.475757598876953, "learning_rate": 6.460120900632099e-05, "loss": 0.29724955558776855, "step": 83100 }, { "epoch": 0.3568086001562728, "grad_norm": 0.007221123669296503, "learning_rate": 6.459689728620336e-05, "loss": 0.13953012228012085, "step": 83110 }, { "epoch": 0.3568515322462928, "grad_norm": 1.4416766166687012, "learning_rate": 6.459258556608574e-05, "loss": 0.19707913398742677, "step": 83120 }, { "epoch": 0.35689446433631283, "grad_norm": 0.09982796013355255, "learning_rate": 6.458827384596812e-05, "loss": 0.24213099479675293, "step": 83130 }, { "epoch": 0.3569373964263328, "grad_norm": 0.5285547375679016, "learning_rate": 6.45839621258505e-05, "loss": 0.2476147174835205, "step": 83140 }, { "epoch": 0.3569803285163528, "grad_norm": 1.9965113401412964, "learning_rate": 6.457965040573286e-05, "loss": 0.21606626510620117, "step": 83150 }, { "epoch": 0.35702326060637285, "grad_norm": 1.2305128574371338, "learning_rate": 6.457533868561524e-05, "loss": 0.5005519390106201, "step": 83160 }, { "epoch": 0.35706619269639284, "grad_norm": 0.06872189790010452, "learning_rate": 6.457102696549761e-05, "loss": 0.37816739082336426, "step": 83170 }, { "epoch": 0.3571091247864129, "grad_norm": 0.0010791983222588897, "learning_rate": 6.456671524537999e-05, "loss": 0.37646634578704835, "step": 83180 }, { "epoch": 0.35715205687643287, "grad_norm": 0.0028619798831641674, "learning_rate": 6.456240352526237e-05, "loss": 0.18022361993789673, "step": 83190 }, { "epoch": 0.35719498896645285, "grad_norm": 1.1735813617706299, "learning_rate": 6.455809180514475e-05, "loss": 0.23538787364959718, "step": 83200 }, { "epoch": 0.3572379210564729, "grad_norm": 2.028512716293335, "learning_rate": 6.455378008502712e-05, "loss": 0.28205409049987795, "step": 83210 }, { "epoch": 0.3572808531464929, "grad_norm": 0.35201296210289, "learning_rate": 6.45494683649095e-05, "loss": 0.06089202761650085, "step": 83220 }, { "epoch": 0.35732378523651287, "grad_norm": 0.07536812871694565, "learning_rate": 6.454515664479188e-05, "loss": 0.2602132797241211, "step": 83230 }, { "epoch": 0.3573667173265329, "grad_norm": 0.014509957283735275, "learning_rate": 6.454084492467425e-05, "loss": 0.24068257808685303, "step": 83240 }, { "epoch": 0.3574096494165529, "grad_norm": 3.485565185546875, "learning_rate": 6.453653320455663e-05, "loss": 0.14384920597076417, "step": 83250 }, { "epoch": 0.3574525815065729, "grad_norm": 0.010356109589338303, "learning_rate": 6.453222148443901e-05, "loss": 0.17156875133514404, "step": 83260 }, { "epoch": 0.35749551359659293, "grad_norm": 0.8293179273605347, "learning_rate": 6.452790976432139e-05, "loss": 0.14238990545272828, "step": 83270 }, { "epoch": 0.3575384456866129, "grad_norm": 1.1598352193832397, "learning_rate": 6.452359804420376e-05, "loss": 0.42585220336914065, "step": 83280 }, { "epoch": 0.3575813777766329, "grad_norm": 1.778437852859497, "learning_rate": 6.451928632408614e-05, "loss": 0.2574995279312134, "step": 83290 }, { "epoch": 0.35762430986665295, "grad_norm": 0.6484081745147705, "learning_rate": 6.451497460396852e-05, "loss": 0.28310160636901854, "step": 83300 }, { "epoch": 0.35766724195667293, "grad_norm": 0.2528004050254822, "learning_rate": 6.45106628838509e-05, "loss": 0.11739856004714966, "step": 83310 }, { "epoch": 0.3577101740466929, "grad_norm": 0.029177529737353325, "learning_rate": 6.450635116373326e-05, "loss": 0.026236182451248168, "step": 83320 }, { "epoch": 0.35775310613671296, "grad_norm": 8.064805030822754, "learning_rate": 6.450203944361564e-05, "loss": 0.23941564559936523, "step": 83330 }, { "epoch": 0.35779603822673295, "grad_norm": 0.12642982602119446, "learning_rate": 6.449772772349801e-05, "loss": 0.17229573726654052, "step": 83340 }, { "epoch": 0.35783897031675294, "grad_norm": 0.01093371957540512, "learning_rate": 6.449341600338039e-05, "loss": 0.25434882640838624, "step": 83350 }, { "epoch": 0.357881902406773, "grad_norm": 0.5927442908287048, "learning_rate": 6.448910428326277e-05, "loss": 0.39759347438812254, "step": 83360 }, { "epoch": 0.35792483449679297, "grad_norm": 0.5692152976989746, "learning_rate": 6.448479256314515e-05, "loss": 0.2229149341583252, "step": 83370 }, { "epoch": 0.35796776658681295, "grad_norm": 0.9230725169181824, "learning_rate": 6.448048084302752e-05, "loss": 0.0902398943901062, "step": 83380 }, { "epoch": 0.358010698676833, "grad_norm": 0.0053199403919279575, "learning_rate": 6.44761691229099e-05, "loss": 0.20757627487182617, "step": 83390 }, { "epoch": 0.358053630766853, "grad_norm": 1.6971391439437866, "learning_rate": 6.447185740279227e-05, "loss": 0.5305335044860839, "step": 83400 }, { "epoch": 0.358096562856873, "grad_norm": 0.02110726200044155, "learning_rate": 6.446754568267464e-05, "loss": 0.07769713997840881, "step": 83410 }, { "epoch": 0.358139494946893, "grad_norm": 0.005539445672184229, "learning_rate": 6.446323396255702e-05, "loss": 0.18624429702758788, "step": 83420 }, { "epoch": 0.358182427036913, "grad_norm": 0.006300756242126226, "learning_rate": 6.44589222424394e-05, "loss": 0.18109878301620483, "step": 83430 }, { "epoch": 0.35822535912693304, "grad_norm": 3.080735683441162, "learning_rate": 6.445461052232177e-05, "loss": 0.42140870094299315, "step": 83440 }, { "epoch": 0.358268291216953, "grad_norm": 0.41588640213012695, "learning_rate": 6.445029880220415e-05, "loss": 0.28173749446868895, "step": 83450 }, { "epoch": 0.358311223306973, "grad_norm": 0.07146584987640381, "learning_rate": 6.444598708208653e-05, "loss": 0.23534250259399414, "step": 83460 }, { "epoch": 0.35835415539699306, "grad_norm": 0.0056983851827681065, "learning_rate": 6.44416753619689e-05, "loss": 0.11927134990692138, "step": 83470 }, { "epoch": 0.35839708748701304, "grad_norm": 0.008995592594146729, "learning_rate": 6.443736364185128e-05, "loss": 0.24893059730529785, "step": 83480 }, { "epoch": 0.35844001957703303, "grad_norm": 0.6611373424530029, "learning_rate": 6.443305192173366e-05, "loss": 0.18371953964233398, "step": 83490 }, { "epoch": 0.3584829516670531, "grad_norm": 0.01235401164740324, "learning_rate": 6.442874020161604e-05, "loss": 0.05725529193878174, "step": 83500 }, { "epoch": 0.35852588375707306, "grad_norm": 0.182949036359787, "learning_rate": 6.442442848149842e-05, "loss": 0.22440974712371825, "step": 83510 }, { "epoch": 0.35856881584709305, "grad_norm": 5.291581153869629, "learning_rate": 6.442011676138079e-05, "loss": 0.283705997467041, "step": 83520 }, { "epoch": 0.3586117479371131, "grad_norm": 0.016548916697502136, "learning_rate": 6.441580504126317e-05, "loss": 0.6019593238830566, "step": 83530 }, { "epoch": 0.3586546800271331, "grad_norm": 0.08653654158115387, "learning_rate": 6.441149332114555e-05, "loss": 0.08072211146354676, "step": 83540 }, { "epoch": 0.35869761211715306, "grad_norm": 0.13255316019058228, "learning_rate": 6.440718160102793e-05, "loss": 0.20978965759277343, "step": 83550 }, { "epoch": 0.3587405442071731, "grad_norm": 0.0007236854871734977, "learning_rate": 6.440286988091029e-05, "loss": 0.07688579559326172, "step": 83560 }, { "epoch": 0.3587834762971931, "grad_norm": 0.010213850066065788, "learning_rate": 6.439855816079267e-05, "loss": 0.12269260883331298, "step": 83570 }, { "epoch": 0.3588264083872131, "grad_norm": 0.001452255412004888, "learning_rate": 6.439424644067504e-05, "loss": 0.10330994129180908, "step": 83580 }, { "epoch": 0.3588693404772331, "grad_norm": 0.02720882184803486, "learning_rate": 6.438993472055742e-05, "loss": 0.08257366418838501, "step": 83590 }, { "epoch": 0.3589122725672531, "grad_norm": 1.2951226234436035, "learning_rate": 6.43856230004398e-05, "loss": 0.0941824495792389, "step": 83600 }, { "epoch": 0.35895520465727315, "grad_norm": 0.021939700469374657, "learning_rate": 6.438131128032218e-05, "loss": 0.1827239990234375, "step": 83610 }, { "epoch": 0.35899813674729314, "grad_norm": 0.001229679910466075, "learning_rate": 6.437699956020455e-05, "loss": 0.23367047309875488, "step": 83620 }, { "epoch": 0.3590410688373131, "grad_norm": 0.013304614461958408, "learning_rate": 6.437268784008693e-05, "loss": 0.32072179317474364, "step": 83630 }, { "epoch": 0.35908400092733317, "grad_norm": 0.08952384442090988, "learning_rate": 6.436837611996931e-05, "loss": 0.6800857543945312, "step": 83640 }, { "epoch": 0.35912693301735316, "grad_norm": 0.0006670946022495627, "learning_rate": 6.436406439985167e-05, "loss": 0.1628146767616272, "step": 83650 }, { "epoch": 0.35916986510737314, "grad_norm": 1.8681294918060303, "learning_rate": 6.435975267973405e-05, "loss": 0.3500849485397339, "step": 83660 }, { "epoch": 0.3592127971973932, "grad_norm": 0.05229023098945618, "learning_rate": 6.435544095961643e-05, "loss": 0.1595700979232788, "step": 83670 }, { "epoch": 0.3592557292874132, "grad_norm": 3.6567986011505127, "learning_rate": 6.43511292394988e-05, "loss": 0.16614724397659303, "step": 83680 }, { "epoch": 0.35929866137743316, "grad_norm": 2.2354629039764404, "learning_rate": 6.434681751938118e-05, "loss": 0.24699800014495848, "step": 83690 }, { "epoch": 0.3593415934674532, "grad_norm": 1.6552985906600952, "learning_rate": 6.434250579926356e-05, "loss": 0.2638379096984863, "step": 83700 }, { "epoch": 0.3593845255574732, "grad_norm": 0.1515263170003891, "learning_rate": 6.433819407914594e-05, "loss": 0.36674022674560547, "step": 83710 }, { "epoch": 0.3594274576474932, "grad_norm": 1.1364909410476685, "learning_rate": 6.433388235902831e-05, "loss": 0.11204863786697387, "step": 83720 }, { "epoch": 0.3594703897375132, "grad_norm": 0.1920362263917923, "learning_rate": 6.432957063891069e-05, "loss": 0.09521974325180053, "step": 83730 }, { "epoch": 0.3595133218275332, "grad_norm": 1.830244541168213, "learning_rate": 6.432525891879307e-05, "loss": 0.26940653324127195, "step": 83740 }, { "epoch": 0.3595562539175532, "grad_norm": 8.78477954864502, "learning_rate": 6.432094719867544e-05, "loss": 0.21584708690643312, "step": 83750 }, { "epoch": 0.35959918600757323, "grad_norm": 0.012690392322838306, "learning_rate": 6.431663547855782e-05, "loss": 0.16282269954681397, "step": 83760 }, { "epoch": 0.3596421180975932, "grad_norm": 0.03721415251493454, "learning_rate": 6.43123237584402e-05, "loss": 0.2313145875930786, "step": 83770 }, { "epoch": 0.3596850501876132, "grad_norm": 0.06290198862552643, "learning_rate": 6.430801203832258e-05, "loss": 0.0945240318775177, "step": 83780 }, { "epoch": 0.35972798227763325, "grad_norm": 0.15895670652389526, "learning_rate": 6.430370031820495e-05, "loss": 0.19284458160400392, "step": 83790 }, { "epoch": 0.35977091436765324, "grad_norm": 1.286566138267517, "learning_rate": 6.429938859808733e-05, "loss": 0.09456119537353516, "step": 83800 }, { "epoch": 0.3598138464576732, "grad_norm": 0.011230570264160633, "learning_rate": 6.42950768779697e-05, "loss": 0.08266975283622742, "step": 83810 }, { "epoch": 0.35985677854769327, "grad_norm": 0.011820230633020401, "learning_rate": 6.429076515785207e-05, "loss": 0.16751736402511597, "step": 83820 }, { "epoch": 0.35989971063771325, "grad_norm": 2.0594077110290527, "learning_rate": 6.428645343773445e-05, "loss": 0.2954521417617798, "step": 83830 }, { "epoch": 0.3599426427277333, "grad_norm": 0.07682941108942032, "learning_rate": 6.428214171761683e-05, "loss": 0.0959571897983551, "step": 83840 }, { "epoch": 0.3599855748177533, "grad_norm": 0.026437979191541672, "learning_rate": 6.42778299974992e-05, "loss": 0.25702903270721433, "step": 83850 }, { "epoch": 0.36002850690777327, "grad_norm": 0.12116403132677078, "learning_rate": 6.427351827738158e-05, "loss": 0.2732200860977173, "step": 83860 }, { "epoch": 0.3600714389977933, "grad_norm": 1.381988763809204, "learning_rate": 6.426920655726396e-05, "loss": 0.2366987943649292, "step": 83870 }, { "epoch": 0.3601143710878133, "grad_norm": 0.008017339743673801, "learning_rate": 6.426489483714634e-05, "loss": 0.2086495876312256, "step": 83880 }, { "epoch": 0.3601573031778333, "grad_norm": 0.03098870813846588, "learning_rate": 6.42605831170287e-05, "loss": 0.3768089771270752, "step": 83890 }, { "epoch": 0.36020023526785333, "grad_norm": 0.017910556867718697, "learning_rate": 6.425627139691108e-05, "loss": 0.22290031909942626, "step": 83900 }, { "epoch": 0.3602431673578733, "grad_norm": 0.010782677680253983, "learning_rate": 6.425195967679346e-05, "loss": 0.24496030807495117, "step": 83910 }, { "epoch": 0.3602860994478933, "grad_norm": 0.1066930741071701, "learning_rate": 6.424764795667583e-05, "loss": 0.05222045183181763, "step": 83920 }, { "epoch": 0.36032903153791335, "grad_norm": 10.818086624145508, "learning_rate": 6.424333623655821e-05, "loss": 0.06975013017654419, "step": 83930 }, { "epoch": 0.36037196362793333, "grad_norm": 1.1675182580947876, "learning_rate": 6.423902451644059e-05, "loss": 0.26229162216186525, "step": 83940 }, { "epoch": 0.3604148957179533, "grad_norm": 0.01140518020838499, "learning_rate": 6.423471279632296e-05, "loss": 0.15673032999038697, "step": 83950 }, { "epoch": 0.36045782780797336, "grad_norm": 0.2980732321739197, "learning_rate": 6.423040107620534e-05, "loss": 0.31344664096832275, "step": 83960 }, { "epoch": 0.36050075989799335, "grad_norm": 2.793539524078369, "learning_rate": 6.422608935608772e-05, "loss": 0.23717548847198486, "step": 83970 }, { "epoch": 0.36054369198801334, "grad_norm": 0.06608152389526367, "learning_rate": 6.42217776359701e-05, "loss": 0.10230793952941894, "step": 83980 }, { "epoch": 0.3605866240780334, "grad_norm": 1.9413366317749023, "learning_rate": 6.421746591585247e-05, "loss": 0.38025608062744143, "step": 83990 }, { "epoch": 0.36062955616805337, "grad_norm": 0.32591816782951355, "learning_rate": 6.421315419573485e-05, "loss": 0.10384665727615357, "step": 84000 }, { "epoch": 0.36062955616805337, "eval_loss": 0.4329765737056732, "eval_runtime": 27.2682, "eval_samples_per_second": 3.667, "eval_steps_per_second": 3.667, "step": 84000 }, { "epoch": 0.36067248825807335, "grad_norm": 2.1451854705810547, "learning_rate": 6.420884247561723e-05, "loss": 0.23780550956726074, "step": 84010 }, { "epoch": 0.3607154203480934, "grad_norm": 0.008518136106431484, "learning_rate": 6.42045307554996e-05, "loss": 0.15147947072982787, "step": 84020 }, { "epoch": 0.3607583524381134, "grad_norm": 1.9867619276046753, "learning_rate": 6.420021903538198e-05, "loss": 0.3307706594467163, "step": 84030 }, { "epoch": 0.3608012845281334, "grad_norm": 0.012360497377812862, "learning_rate": 6.419590731526436e-05, "loss": 0.2077253818511963, "step": 84040 }, { "epoch": 0.3608442166181534, "grad_norm": 0.1372559815645218, "learning_rate": 6.419159559514674e-05, "loss": 0.12807326316833495, "step": 84050 }, { "epoch": 0.3608871487081734, "grad_norm": 0.07335194945335388, "learning_rate": 6.41872838750291e-05, "loss": 0.2796891212463379, "step": 84060 }, { "epoch": 0.36093008079819344, "grad_norm": 4.2282304763793945, "learning_rate": 6.418297215491148e-05, "loss": 0.2018202781677246, "step": 84070 }, { "epoch": 0.36097301288821343, "grad_norm": 2.3254733085632324, "learning_rate": 6.417866043479386e-05, "loss": 0.07892566919326782, "step": 84080 }, { "epoch": 0.3610159449782334, "grad_norm": 0.038394901901483536, "learning_rate": 6.417434871467623e-05, "loss": 0.23869423866271972, "step": 84090 }, { "epoch": 0.36105887706825346, "grad_norm": 6.333953380584717, "learning_rate": 6.417003699455861e-05, "loss": 0.22153542041778565, "step": 84100 }, { "epoch": 0.36110180915827345, "grad_norm": 0.09468687325716019, "learning_rate": 6.416572527444099e-05, "loss": 0.6222201824188233, "step": 84110 }, { "epoch": 0.36114474124829343, "grad_norm": 0.07344312965869904, "learning_rate": 6.416141355432337e-05, "loss": 0.3869171619415283, "step": 84120 }, { "epoch": 0.3611876733383135, "grad_norm": 0.34509482979774475, "learning_rate": 6.415710183420574e-05, "loss": 0.10307228565216064, "step": 84130 }, { "epoch": 0.36123060542833346, "grad_norm": 0.09708867967128754, "learning_rate": 6.415279011408811e-05, "loss": 0.14578685760498047, "step": 84140 }, { "epoch": 0.36127353751835345, "grad_norm": 0.014592817053198814, "learning_rate": 6.414847839397048e-05, "loss": 0.22615408897399902, "step": 84150 }, { "epoch": 0.3613164696083735, "grad_norm": 0.04447157680988312, "learning_rate": 6.414416667385286e-05, "loss": 0.170421826839447, "step": 84160 }, { "epoch": 0.3613594016983935, "grad_norm": 0.013009901158511639, "learning_rate": 6.413985495373524e-05, "loss": 0.2288771390914917, "step": 84170 }, { "epoch": 0.36140233378841347, "grad_norm": 0.04273710027337074, "learning_rate": 6.413554323361762e-05, "loss": 0.10149039030075073, "step": 84180 }, { "epoch": 0.3614452658784335, "grad_norm": 0.683621883392334, "learning_rate": 6.413123151350001e-05, "loss": 0.467113733291626, "step": 84190 }, { "epoch": 0.3614881979684535, "grad_norm": 1.7198909521102905, "learning_rate": 6.412691979338238e-05, "loss": 0.3722904443740845, "step": 84200 }, { "epoch": 0.3615311300584735, "grad_norm": 0.16589732468128204, "learning_rate": 6.412260807326476e-05, "loss": 0.1510645031929016, "step": 84210 }, { "epoch": 0.3615740621484935, "grad_norm": 0.452720046043396, "learning_rate": 6.411829635314713e-05, "loss": 0.3065751075744629, "step": 84220 }, { "epoch": 0.3616169942385135, "grad_norm": 0.0688873827457428, "learning_rate": 6.41139846330295e-05, "loss": 0.11986181735992432, "step": 84230 }, { "epoch": 0.3616599263285335, "grad_norm": 0.2770315408706665, "learning_rate": 6.410967291291188e-05, "loss": 0.34214606285095217, "step": 84240 }, { "epoch": 0.36170285841855354, "grad_norm": 1.144366979598999, "learning_rate": 6.410536119279426e-05, "loss": 0.3191501617431641, "step": 84250 }, { "epoch": 0.36174579050857353, "grad_norm": 5.4182047843933105, "learning_rate": 6.410104947267664e-05, "loss": 0.26186683177948, "step": 84260 }, { "epoch": 0.36178872259859357, "grad_norm": 0.4700573682785034, "learning_rate": 6.409673775255901e-05, "loss": 0.16876962184906005, "step": 84270 }, { "epoch": 0.36183165468861356, "grad_norm": 3.8683853149414062, "learning_rate": 6.409242603244139e-05, "loss": 0.11432063579559326, "step": 84280 }, { "epoch": 0.36187458677863354, "grad_norm": 5.655078411102295, "learning_rate": 6.408811431232377e-05, "loss": 0.21527984142303466, "step": 84290 }, { "epoch": 0.3619175188686536, "grad_norm": 3.533766746520996, "learning_rate": 6.408380259220613e-05, "loss": 0.43784823417663576, "step": 84300 }, { "epoch": 0.3619604509586736, "grad_norm": 2.929824113845825, "learning_rate": 6.407949087208851e-05, "loss": 0.14191383123397827, "step": 84310 }, { "epoch": 0.36200338304869356, "grad_norm": 0.17527572810649872, "learning_rate": 6.407517915197089e-05, "loss": 0.17120797634124757, "step": 84320 }, { "epoch": 0.3620463151387136, "grad_norm": 0.04647151753306389, "learning_rate": 6.407086743185326e-05, "loss": 0.11106468439102173, "step": 84330 }, { "epoch": 0.3620892472287336, "grad_norm": 3.4737348556518555, "learning_rate": 6.406655571173564e-05, "loss": 0.26698980331420896, "step": 84340 }, { "epoch": 0.3621321793187536, "grad_norm": 0.062151987105607986, "learning_rate": 6.406224399161802e-05, "loss": 0.2645150423049927, "step": 84350 }, { "epoch": 0.3621751114087736, "grad_norm": 0.3555840253829956, "learning_rate": 6.40579322715004e-05, "loss": 0.22405788898468018, "step": 84360 }, { "epoch": 0.3622180434987936, "grad_norm": 0.04546572268009186, "learning_rate": 6.405362055138277e-05, "loss": 0.22337851524353028, "step": 84370 }, { "epoch": 0.3622609755888136, "grad_norm": 0.0068736146204173565, "learning_rate": 6.404930883126515e-05, "loss": 0.166854190826416, "step": 84380 }, { "epoch": 0.36230390767883364, "grad_norm": 51.327964782714844, "learning_rate": 6.404499711114751e-05, "loss": 0.23231921195983887, "step": 84390 }, { "epoch": 0.3623468397688536, "grad_norm": 1.8268455266952515, "learning_rate": 6.404068539102989e-05, "loss": 0.31146812438964844, "step": 84400 }, { "epoch": 0.3623897718588736, "grad_norm": 0.02235632948577404, "learning_rate": 6.403637367091228e-05, "loss": 0.26765482425689696, "step": 84410 }, { "epoch": 0.36243270394889365, "grad_norm": 0.039541054517030716, "learning_rate": 6.403206195079466e-05, "loss": 0.15650541782379152, "step": 84420 }, { "epoch": 0.36247563603891364, "grad_norm": 2.285512685775757, "learning_rate": 6.402775023067704e-05, "loss": 0.1562897801399231, "step": 84430 }, { "epoch": 0.3625185681289336, "grad_norm": 0.002735693706199527, "learning_rate": 6.402343851055941e-05, "loss": 0.2821861743927002, "step": 84440 }, { "epoch": 0.36256150021895367, "grad_norm": 0.5640804171562195, "learning_rate": 6.401912679044179e-05, "loss": 0.2460437536239624, "step": 84450 }, { "epoch": 0.36260443230897366, "grad_norm": 0.6940175890922546, "learning_rate": 6.401481507032417e-05, "loss": 0.26609799861907957, "step": 84460 }, { "epoch": 0.3626473643989937, "grad_norm": 0.008508739061653614, "learning_rate": 6.401050335020653e-05, "loss": 0.0524605393409729, "step": 84470 }, { "epoch": 0.3626902964890137, "grad_norm": 1.0394551753997803, "learning_rate": 6.400619163008891e-05, "loss": 0.18994983434677123, "step": 84480 }, { "epoch": 0.3627332285790337, "grad_norm": 2.4201126098632812, "learning_rate": 6.400187990997129e-05, "loss": 0.13662683963775635, "step": 84490 }, { "epoch": 0.3627761606690537, "grad_norm": 0.029414892196655273, "learning_rate": 6.399756818985366e-05, "loss": 0.41878905296325686, "step": 84500 }, { "epoch": 0.3628190927590737, "grad_norm": 2.0169215202331543, "learning_rate": 6.399325646973604e-05, "loss": 0.2553415775299072, "step": 84510 }, { "epoch": 0.3628620248490937, "grad_norm": 4.364803314208984, "learning_rate": 6.398894474961842e-05, "loss": 0.2932882308959961, "step": 84520 }, { "epoch": 0.36290495693911373, "grad_norm": 22.403047561645508, "learning_rate": 6.39846330295008e-05, "loss": 0.21755385398864746, "step": 84530 }, { "epoch": 0.3629478890291337, "grad_norm": 0.10820141434669495, "learning_rate": 6.398032130938317e-05, "loss": 0.30017387866973877, "step": 84540 }, { "epoch": 0.3629908211191537, "grad_norm": 0.06273016333580017, "learning_rate": 6.397600958926554e-05, "loss": 0.11642609834671021, "step": 84550 }, { "epoch": 0.36303375320917375, "grad_norm": 0.03808317705988884, "learning_rate": 6.397169786914791e-05, "loss": 0.2759079456329346, "step": 84560 }, { "epoch": 0.36307668529919374, "grad_norm": 4.863452434539795, "learning_rate": 6.396738614903029e-05, "loss": 0.20633177757263182, "step": 84570 }, { "epoch": 0.3631196173892137, "grad_norm": 0.004839466884732246, "learning_rate": 6.396307442891267e-05, "loss": 0.31665678024291993, "step": 84580 }, { "epoch": 0.36316254947923376, "grad_norm": 0.16685040295124054, "learning_rate": 6.395876270879505e-05, "loss": 0.07087898254394531, "step": 84590 }, { "epoch": 0.36320548156925375, "grad_norm": 54.194393157958984, "learning_rate": 6.395445098867742e-05, "loss": 0.170097279548645, "step": 84600 }, { "epoch": 0.36324841365927374, "grad_norm": 0.17545737326145172, "learning_rate": 6.39501392685598e-05, "loss": 0.10149813890457153, "step": 84610 }, { "epoch": 0.3632913457492938, "grad_norm": 17.983226776123047, "learning_rate": 6.394582754844218e-05, "loss": 0.15684278011322023, "step": 84620 }, { "epoch": 0.36333427783931377, "grad_norm": 1.1058499813079834, "learning_rate": 6.394151582832456e-05, "loss": 0.25113801956176757, "step": 84630 }, { "epoch": 0.36337720992933376, "grad_norm": 0.1441742181777954, "learning_rate": 6.393720410820693e-05, "loss": 0.24742982387542725, "step": 84640 }, { "epoch": 0.3634201420193538, "grad_norm": 0.03703548014163971, "learning_rate": 6.393289238808931e-05, "loss": 0.11437108516693115, "step": 84650 }, { "epoch": 0.3634630741093738, "grad_norm": 2.121002197265625, "learning_rate": 6.392858066797169e-05, "loss": 0.2250274896621704, "step": 84660 }, { "epoch": 0.36350600619939377, "grad_norm": 0.003910502418875694, "learning_rate": 6.392426894785407e-05, "loss": 0.2840403079986572, "step": 84670 }, { "epoch": 0.3635489382894138, "grad_norm": 0.12619902193546295, "learning_rate": 6.391995722773644e-05, "loss": 0.10449312925338745, "step": 84680 }, { "epoch": 0.3635918703794338, "grad_norm": 2.015465497970581, "learning_rate": 6.391564550761882e-05, "loss": 0.08222501277923584, "step": 84690 }, { "epoch": 0.36363480246945384, "grad_norm": 2.5868568420410156, "learning_rate": 6.39113337875012e-05, "loss": 0.45690197944641114, "step": 84700 }, { "epoch": 0.36367773455947383, "grad_norm": 4.2143096923828125, "learning_rate": 6.390702206738356e-05, "loss": 0.28699326515197754, "step": 84710 }, { "epoch": 0.3637206666494938, "grad_norm": 0.997793972492218, "learning_rate": 6.390271034726594e-05, "loss": 0.1634911060333252, "step": 84720 }, { "epoch": 0.36376359873951386, "grad_norm": 0.06723709404468536, "learning_rate": 6.389839862714832e-05, "loss": 0.1785125970840454, "step": 84730 }, { "epoch": 0.36380653082953385, "grad_norm": 0.07979264855384827, "learning_rate": 6.38940869070307e-05, "loss": 0.2313058614730835, "step": 84740 }, { "epoch": 0.36384946291955383, "grad_norm": 1.5970796346664429, "learning_rate": 6.388977518691307e-05, "loss": 0.11084781885147095, "step": 84750 }, { "epoch": 0.3638923950095739, "grad_norm": 0.09508782625198364, "learning_rate": 6.388546346679545e-05, "loss": 0.19648040533065797, "step": 84760 }, { "epoch": 0.36393532709959386, "grad_norm": 0.004324431996792555, "learning_rate": 6.388115174667783e-05, "loss": 0.12327823638916016, "step": 84770 }, { "epoch": 0.36397825918961385, "grad_norm": 9.756826400756836, "learning_rate": 6.38768400265602e-05, "loss": 0.15264017581939698, "step": 84780 }, { "epoch": 0.3640211912796339, "grad_norm": 2.773954391479492, "learning_rate": 6.387252830644258e-05, "loss": 0.24624147415161132, "step": 84790 }, { "epoch": 0.3640641233696539, "grad_norm": 41.181640625, "learning_rate": 6.386821658632494e-05, "loss": 0.3611812353134155, "step": 84800 }, { "epoch": 0.36410705545967387, "grad_norm": 3.124552011489868, "learning_rate": 6.386390486620732e-05, "loss": 0.17753114700317382, "step": 84810 }, { "epoch": 0.3641499875496939, "grad_norm": 0.17336466908454895, "learning_rate": 6.38595931460897e-05, "loss": 0.28252851963043213, "step": 84820 }, { "epoch": 0.3641929196397139, "grad_norm": 2.1589248180389404, "learning_rate": 6.385528142597208e-05, "loss": 0.3980719566345215, "step": 84830 }, { "epoch": 0.3642358517297339, "grad_norm": 0.006989433895796537, "learning_rate": 6.385096970585445e-05, "loss": 0.32679018974304197, "step": 84840 }, { "epoch": 0.3642787838197539, "grad_norm": 0.03605174273252487, "learning_rate": 6.384665798573683e-05, "loss": 0.4704775333404541, "step": 84850 }, { "epoch": 0.3643217159097739, "grad_norm": 4.0266523361206055, "learning_rate": 6.384234626561921e-05, "loss": 0.16243298053741456, "step": 84860 }, { "epoch": 0.3643646479997939, "grad_norm": 0.20851069688796997, "learning_rate": 6.383803454550159e-05, "loss": 0.15209826231002807, "step": 84870 }, { "epoch": 0.36440758008981394, "grad_norm": 2.2499935626983643, "learning_rate": 6.383372282538396e-05, "loss": 0.2580280303955078, "step": 84880 }, { "epoch": 0.36445051217983393, "grad_norm": 1.435296654701233, "learning_rate": 6.382941110526634e-05, "loss": 0.3792762756347656, "step": 84890 }, { "epoch": 0.364493444269854, "grad_norm": 0.5152401924133301, "learning_rate": 6.382509938514872e-05, "loss": 0.21894633769989014, "step": 84900 }, { "epoch": 0.36453637635987396, "grad_norm": 0.04706510156393051, "learning_rate": 6.38207876650311e-05, "loss": 0.23475384712219238, "step": 84910 }, { "epoch": 0.36457930844989395, "grad_norm": 4.913792133331299, "learning_rate": 6.381647594491347e-05, "loss": 0.3245802879333496, "step": 84920 }, { "epoch": 0.364622240539914, "grad_norm": 0.2699010372161865, "learning_rate": 6.381216422479585e-05, "loss": 0.11922560930252075, "step": 84930 }, { "epoch": 0.364665172629934, "grad_norm": 0.00447971373796463, "learning_rate": 6.380785250467823e-05, "loss": 0.10177613496780395, "step": 84940 }, { "epoch": 0.36470810471995396, "grad_norm": 0.0390581451356411, "learning_rate": 6.38035407845606e-05, "loss": 0.09930930137634278, "step": 84950 }, { "epoch": 0.364751036809974, "grad_norm": 0.002693427260965109, "learning_rate": 6.379922906444297e-05, "loss": 0.36963183879852296, "step": 84960 }, { "epoch": 0.364793968899994, "grad_norm": 0.002317856065928936, "learning_rate": 6.379491734432535e-05, "loss": 0.36330618858337405, "step": 84970 }, { "epoch": 0.364836900990014, "grad_norm": 2.9636216163635254, "learning_rate": 6.379060562420772e-05, "loss": 0.26229047775268555, "step": 84980 }, { "epoch": 0.364879833080034, "grad_norm": 0.3010944724082947, "learning_rate": 6.37862939040901e-05, "loss": 0.07681341767311096, "step": 84990 }, { "epoch": 0.364922765170054, "grad_norm": 1.5422542095184326, "learning_rate": 6.378198218397248e-05, "loss": 0.18676744699478148, "step": 85000 }, { "epoch": 0.364922765170054, "eval_loss": 0.43643614649772644, "eval_runtime": 27.1696, "eval_samples_per_second": 3.681, "eval_steps_per_second": 3.681, "step": 85000 }, { "epoch": 0.364965697260074, "grad_norm": 1.8786780834197998, "learning_rate": 6.377767046385485e-05, "loss": 0.29012351036071776, "step": 85010 }, { "epoch": 0.36500862935009404, "grad_norm": 3.3790924549102783, "learning_rate": 6.377335874373723e-05, "loss": 0.27429943084716796, "step": 85020 }, { "epoch": 0.365051561440114, "grad_norm": 0.01983797922730446, "learning_rate": 6.376904702361961e-05, "loss": 0.2209841251373291, "step": 85030 }, { "epoch": 0.365094493530134, "grad_norm": 0.0703393742442131, "learning_rate": 6.376473530350197e-05, "loss": 0.38023080825805666, "step": 85040 }, { "epoch": 0.36513742562015405, "grad_norm": 0.014833835884928703, "learning_rate": 6.376042358338435e-05, "loss": 0.23475439548492433, "step": 85050 }, { "epoch": 0.36518035771017404, "grad_norm": 0.014177965931594372, "learning_rate": 6.375611186326673e-05, "loss": 0.19169943332672118, "step": 85060 }, { "epoch": 0.36522328980019403, "grad_norm": 0.021022124215960503, "learning_rate": 6.37518001431491e-05, "loss": 0.23145816326141358, "step": 85070 }, { "epoch": 0.36526622189021407, "grad_norm": 0.8904299139976501, "learning_rate": 6.374748842303148e-05, "loss": 0.19575103521347045, "step": 85080 }, { "epoch": 0.36530915398023406, "grad_norm": 1.63796067237854, "learning_rate": 6.374317670291386e-05, "loss": 0.20815696716308593, "step": 85090 }, { "epoch": 0.36535208607025405, "grad_norm": 0.1709212213754654, "learning_rate": 6.373886498279624e-05, "loss": 0.11579384803771972, "step": 85100 }, { "epoch": 0.3653950181602741, "grad_norm": 0.10949070751667023, "learning_rate": 6.373455326267861e-05, "loss": 0.14310755729675292, "step": 85110 }, { "epoch": 0.3654379502502941, "grad_norm": 0.05624139681458473, "learning_rate": 6.373024154256099e-05, "loss": 0.20229153633117675, "step": 85120 }, { "epoch": 0.3654808823403141, "grad_norm": 0.008176709525287151, "learning_rate": 6.372592982244337e-05, "loss": 0.09302598834037781, "step": 85130 }, { "epoch": 0.3655238144303341, "grad_norm": 1.395006537437439, "learning_rate": 6.372161810232575e-05, "loss": 0.13691210746765137, "step": 85140 }, { "epoch": 0.3655667465203541, "grad_norm": 0.0070328544825315475, "learning_rate": 6.371730638220812e-05, "loss": 0.07568751573562622, "step": 85150 }, { "epoch": 0.36560967861037413, "grad_norm": 0.9231022596359253, "learning_rate": 6.37129946620905e-05, "loss": 0.21788375377655028, "step": 85160 }, { "epoch": 0.3656526107003941, "grad_norm": 6.861789226531982, "learning_rate": 6.370868294197288e-05, "loss": 0.4098199844360352, "step": 85170 }, { "epoch": 0.3656955427904141, "grad_norm": 0.015893081203103065, "learning_rate": 6.370437122185526e-05, "loss": 0.1904071807861328, "step": 85180 }, { "epoch": 0.36573847488043415, "grad_norm": 0.09461787343025208, "learning_rate": 6.370005950173763e-05, "loss": 0.22163398265838624, "step": 85190 }, { "epoch": 0.36578140697045414, "grad_norm": 0.006280150264501572, "learning_rate": 6.369574778162001e-05, "loss": 0.056524789333343504, "step": 85200 }, { "epoch": 0.3658243390604741, "grad_norm": 0.5843717455863953, "learning_rate": 6.369143606150237e-05, "loss": 0.43214945793151854, "step": 85210 }, { "epoch": 0.36586727115049417, "grad_norm": 0.04193728789687157, "learning_rate": 6.368712434138475e-05, "loss": 0.15790834426879882, "step": 85220 }, { "epoch": 0.36591020324051415, "grad_norm": 0.024928787723183632, "learning_rate": 6.368281262126713e-05, "loss": 0.1830769658088684, "step": 85230 }, { "epoch": 0.36595313533053414, "grad_norm": 0.05677323043346405, "learning_rate": 6.36785009011495e-05, "loss": 0.24995017051696777, "step": 85240 }, { "epoch": 0.3659960674205542, "grad_norm": 1.3689550161361694, "learning_rate": 6.367418918103188e-05, "loss": 0.1743025302886963, "step": 85250 }, { "epoch": 0.36603899951057417, "grad_norm": 1.934273362159729, "learning_rate": 6.366987746091426e-05, "loss": 0.19505960941314698, "step": 85260 }, { "epoch": 0.36608193160059416, "grad_norm": 1.3472764492034912, "learning_rate": 6.366556574079664e-05, "loss": 0.30374248027801515, "step": 85270 }, { "epoch": 0.3661248636906142, "grad_norm": 0.033247269690036774, "learning_rate": 6.366125402067902e-05, "loss": 0.12651869058609008, "step": 85280 }, { "epoch": 0.3661677957806342, "grad_norm": 0.18167521059513092, "learning_rate": 6.365694230056138e-05, "loss": 0.08280782699584961, "step": 85290 }, { "epoch": 0.3662107278706542, "grad_norm": 0.9661501049995422, "learning_rate": 6.365263058044376e-05, "loss": 0.06233731508255005, "step": 85300 }, { "epoch": 0.3662536599606742, "grad_norm": 1.2363988161087036, "learning_rate": 6.364831886032613e-05, "loss": 0.36128768920898435, "step": 85310 }, { "epoch": 0.3662965920506942, "grad_norm": 0.04574267193675041, "learning_rate": 6.364400714020851e-05, "loss": 0.21424858570098876, "step": 85320 }, { "epoch": 0.36633952414071425, "grad_norm": 3.4972920417785645, "learning_rate": 6.363969542009089e-05, "loss": 0.3097004175186157, "step": 85330 }, { "epoch": 0.36638245623073423, "grad_norm": 8.96206283569336, "learning_rate": 6.363538369997327e-05, "loss": 0.33934545516967773, "step": 85340 }, { "epoch": 0.3664253883207542, "grad_norm": 0.020525289699435234, "learning_rate": 6.363107197985564e-05, "loss": 0.30977373123168944, "step": 85350 }, { "epoch": 0.36646832041077426, "grad_norm": 0.051443614065647125, "learning_rate": 6.362676025973802e-05, "loss": 0.2037959337234497, "step": 85360 }, { "epoch": 0.36651125250079425, "grad_norm": 0.0785449668765068, "learning_rate": 6.36224485396204e-05, "loss": 0.2925347089767456, "step": 85370 }, { "epoch": 0.36655418459081424, "grad_norm": 0.15500248968601227, "learning_rate": 6.361813681950278e-05, "loss": 0.18055803775787355, "step": 85380 }, { "epoch": 0.3665971166808343, "grad_norm": 0.03973287343978882, "learning_rate": 6.361382509938515e-05, "loss": 0.3326643228530884, "step": 85390 }, { "epoch": 0.36664004877085427, "grad_norm": 0.893135130405426, "learning_rate": 6.360951337926753e-05, "loss": 0.297300910949707, "step": 85400 }, { "epoch": 0.36668298086087425, "grad_norm": 4.013089656829834, "learning_rate": 6.360520165914991e-05, "loss": 0.3480620622634888, "step": 85410 }, { "epoch": 0.3667259129508943, "grad_norm": 0.017691394314169884, "learning_rate": 6.360088993903229e-05, "loss": 0.03665188848972321, "step": 85420 }, { "epoch": 0.3667688450409143, "grad_norm": 0.9386993646621704, "learning_rate": 6.359657821891466e-05, "loss": 0.3473350048065186, "step": 85430 }, { "epoch": 0.36681177713093427, "grad_norm": 14.044384002685547, "learning_rate": 6.359226649879704e-05, "loss": 0.2297840118408203, "step": 85440 }, { "epoch": 0.3668547092209543, "grad_norm": 1.6908752918243408, "learning_rate": 6.35879547786794e-05, "loss": 0.15974087715148927, "step": 85450 }, { "epoch": 0.3668976413109743, "grad_norm": 3.4702935218811035, "learning_rate": 6.358364305856178e-05, "loss": 0.1769618272781372, "step": 85460 }, { "epoch": 0.3669405734009943, "grad_norm": 0.022644072771072388, "learning_rate": 6.357933133844416e-05, "loss": 0.426633882522583, "step": 85470 }, { "epoch": 0.36698350549101433, "grad_norm": 1.2880467176437378, "learning_rate": 6.357501961832654e-05, "loss": 0.2576338052749634, "step": 85480 }, { "epoch": 0.3670264375810343, "grad_norm": 4.9231743812561035, "learning_rate": 6.357070789820891e-05, "loss": 0.3569629192352295, "step": 85490 }, { "epoch": 0.3670693696710543, "grad_norm": 0.006700332276523113, "learning_rate": 6.356639617809129e-05, "loss": 0.1909146189689636, "step": 85500 }, { "epoch": 0.36711230176107434, "grad_norm": 0.5436863303184509, "learning_rate": 6.356208445797367e-05, "loss": 0.20022847652435302, "step": 85510 }, { "epoch": 0.36715523385109433, "grad_norm": 0.03987458720803261, "learning_rate": 6.355777273785604e-05, "loss": 0.2638335466384888, "step": 85520 }, { "epoch": 0.3671981659411143, "grad_norm": 3.6243085861206055, "learning_rate": 6.355346101773842e-05, "loss": 0.271874213218689, "step": 85530 }, { "epoch": 0.36724109803113436, "grad_norm": 1.5448380708694458, "learning_rate": 6.354914929762079e-05, "loss": 0.3061804294586182, "step": 85540 }, { "epoch": 0.36728403012115435, "grad_norm": 0.32279542088508606, "learning_rate": 6.354483757750316e-05, "loss": 0.04409662783145905, "step": 85550 }, { "epoch": 0.3673269622111744, "grad_norm": 0.03715287521481514, "learning_rate": 6.354052585738554e-05, "loss": 0.21204564571380616, "step": 85560 }, { "epoch": 0.3673698943011944, "grad_norm": 0.0034397614654153585, "learning_rate": 6.353621413726792e-05, "loss": 0.05352402925491333, "step": 85570 }, { "epoch": 0.36741282639121436, "grad_norm": 0.10549236088991165, "learning_rate": 6.35319024171503e-05, "loss": 0.2931922674179077, "step": 85580 }, { "epoch": 0.3674557584812344, "grad_norm": 0.061459217220544815, "learning_rate": 6.352759069703267e-05, "loss": 0.10618021488189697, "step": 85590 }, { "epoch": 0.3674986905712544, "grad_norm": 0.68232262134552, "learning_rate": 6.352327897691506e-05, "loss": 0.2445995330810547, "step": 85600 }, { "epoch": 0.3675416226612744, "grad_norm": 0.13406546413898468, "learning_rate": 6.351896725679744e-05, "loss": 0.05700792670249939, "step": 85610 }, { "epoch": 0.3675845547512944, "grad_norm": 1.9055348634719849, "learning_rate": 6.35146555366798e-05, "loss": 0.4043318748474121, "step": 85620 }, { "epoch": 0.3676274868413144, "grad_norm": 1.7802194356918335, "learning_rate": 6.351034381656218e-05, "loss": 0.15079153776168824, "step": 85630 }, { "epoch": 0.3676704189313344, "grad_norm": 3.792257785797119, "learning_rate": 6.350603209644456e-05, "loss": 0.026846662163734436, "step": 85640 }, { "epoch": 0.36771335102135444, "grad_norm": 0.4064582586288452, "learning_rate": 6.350172037632694e-05, "loss": 0.27849345207214354, "step": 85650 }, { "epoch": 0.3677562831113744, "grad_norm": 2.508204460144043, "learning_rate": 6.349740865620931e-05, "loss": 0.1909274935722351, "step": 85660 }, { "epoch": 0.3677992152013944, "grad_norm": 1.741932988166809, "learning_rate": 6.349309693609169e-05, "loss": 0.28033435344696045, "step": 85670 }, { "epoch": 0.36784214729141446, "grad_norm": 0.08544985949993134, "learning_rate": 6.348878521597407e-05, "loss": 0.2724045991897583, "step": 85680 }, { "epoch": 0.36788507938143444, "grad_norm": 1.7382010221481323, "learning_rate": 6.348447349585645e-05, "loss": 0.4020527839660645, "step": 85690 }, { "epoch": 0.36792801147145443, "grad_norm": 0.20443333685398102, "learning_rate": 6.348016177573881e-05, "loss": 0.25672647953033445, "step": 85700 }, { "epoch": 0.3679709435614745, "grad_norm": 0.056993529200553894, "learning_rate": 6.347585005562119e-05, "loss": 0.10209436416625976, "step": 85710 }, { "epoch": 0.36801387565149446, "grad_norm": 2.561861276626587, "learning_rate": 6.347153833550356e-05, "loss": 0.12558455467224122, "step": 85720 }, { "epoch": 0.36805680774151445, "grad_norm": 0.21617703139781952, "learning_rate": 6.346722661538594e-05, "loss": 0.030121004581451415, "step": 85730 }, { "epoch": 0.3680997398315345, "grad_norm": 1.5870252847671509, "learning_rate": 6.346291489526832e-05, "loss": 0.19039595127105713, "step": 85740 }, { "epoch": 0.3681426719215545, "grad_norm": 1.228922963142395, "learning_rate": 6.34586031751507e-05, "loss": 0.3952412843704224, "step": 85750 }, { "epoch": 0.3681856040115745, "grad_norm": 1.321328043937683, "learning_rate": 6.345429145503307e-05, "loss": 0.18042598962783812, "step": 85760 }, { "epoch": 0.3682285361015945, "grad_norm": 1.9140307903289795, "learning_rate": 6.344997973491545e-05, "loss": 0.1848459005355835, "step": 85770 }, { "epoch": 0.3682714681916145, "grad_norm": 0.00764481769874692, "learning_rate": 6.344566801479782e-05, "loss": 0.10012017488479615, "step": 85780 }, { "epoch": 0.36831440028163454, "grad_norm": 0.9462195634841919, "learning_rate": 6.344135629468019e-05, "loss": 0.301893949508667, "step": 85790 }, { "epoch": 0.3683573323716545, "grad_norm": 0.013289229944348335, "learning_rate": 6.343704457456257e-05, "loss": 0.3308419704437256, "step": 85800 }, { "epoch": 0.3684002644616745, "grad_norm": 0.002317358274012804, "learning_rate": 6.343273285444495e-05, "loss": 0.30897011756896975, "step": 85810 }, { "epoch": 0.36844319655169455, "grad_norm": 0.2647899091243744, "learning_rate": 6.342842113432734e-05, "loss": 0.1876887321472168, "step": 85820 }, { "epoch": 0.36848612864171454, "grad_norm": 0.22192956507205963, "learning_rate": 6.342410941420972e-05, "loss": 0.0698373019695282, "step": 85830 }, { "epoch": 0.3685290607317345, "grad_norm": 0.0032855840399861336, "learning_rate": 6.341979769409209e-05, "loss": 0.205841588973999, "step": 85840 }, { "epoch": 0.36857199282175457, "grad_norm": 0.017166977748274803, "learning_rate": 6.341548597397447e-05, "loss": 0.39359591007232664, "step": 85850 }, { "epoch": 0.36861492491177456, "grad_norm": 2.2726593017578125, "learning_rate": 6.341117425385685e-05, "loss": 0.2558701992034912, "step": 85860 }, { "epoch": 0.36865785700179454, "grad_norm": 4.76535177230835, "learning_rate": 6.340686253373921e-05, "loss": 0.4349191188812256, "step": 85870 }, { "epoch": 0.3687007890918146, "grad_norm": 0.05056734010577202, "learning_rate": 6.340255081362159e-05, "loss": 0.15551151037216188, "step": 85880 }, { "epoch": 0.36874372118183457, "grad_norm": 0.014606560580432415, "learning_rate": 6.339823909350397e-05, "loss": 0.10910115242004395, "step": 85890 }, { "epoch": 0.36878665327185456, "grad_norm": 4.6601128578186035, "learning_rate": 6.339392737338634e-05, "loss": 0.41695199012756345, "step": 85900 }, { "epoch": 0.3688295853618746, "grad_norm": 0.02767966315150261, "learning_rate": 6.338961565326872e-05, "loss": 0.26373655796051027, "step": 85910 }, { "epoch": 0.3688725174518946, "grad_norm": 0.00913521833717823, "learning_rate": 6.33853039331511e-05, "loss": 0.13180015087127686, "step": 85920 }, { "epoch": 0.3689154495419146, "grad_norm": 2.545062780380249, "learning_rate": 6.338099221303348e-05, "loss": 0.1353710412979126, "step": 85930 }, { "epoch": 0.3689583816319346, "grad_norm": 1.2263450622558594, "learning_rate": 6.337668049291585e-05, "loss": 0.5144428253173828, "step": 85940 }, { "epoch": 0.3690013137219546, "grad_norm": 0.11725396662950516, "learning_rate": 6.337236877279822e-05, "loss": 0.09795107245445252, "step": 85950 }, { "epoch": 0.3690442458119746, "grad_norm": 0.9828706979751587, "learning_rate": 6.33680570526806e-05, "loss": 0.08187245130538941, "step": 85960 }, { "epoch": 0.36908717790199463, "grad_norm": 1.8195759057998657, "learning_rate": 6.336374533256297e-05, "loss": 0.3792530298233032, "step": 85970 }, { "epoch": 0.3691301099920146, "grad_norm": 6.6570000648498535, "learning_rate": 6.335943361244535e-05, "loss": 0.1150534987449646, "step": 85980 }, { "epoch": 0.36917304208203466, "grad_norm": 0.7525167465209961, "learning_rate": 6.335512189232773e-05, "loss": 0.15633391141891478, "step": 85990 }, { "epoch": 0.36921597417205465, "grad_norm": 0.055644210427999496, "learning_rate": 6.33508101722101e-05, "loss": 0.04366555511951446, "step": 86000 }, { "epoch": 0.36921597417205465, "eval_loss": 0.4317818582057953, "eval_runtime": 27.1777, "eval_samples_per_second": 3.679, "eval_steps_per_second": 3.679, "step": 86000 }, { "epoch": 0.36925890626207464, "grad_norm": 0.035437412559986115, "learning_rate": 6.334649845209248e-05, "loss": 0.26076564788818357, "step": 86010 }, { "epoch": 0.3693018383520947, "grad_norm": 0.12425762414932251, "learning_rate": 6.334218673197486e-05, "loss": 0.3686336040496826, "step": 86020 }, { "epoch": 0.36934477044211467, "grad_norm": 0.00577976368367672, "learning_rate": 6.333787501185722e-05, "loss": 0.19180816411972046, "step": 86030 }, { "epoch": 0.36938770253213465, "grad_norm": 0.10685226321220398, "learning_rate": 6.333356329173961e-05, "loss": 0.14715638160705566, "step": 86040 }, { "epoch": 0.3694306346221547, "grad_norm": 0.037477098405361176, "learning_rate": 6.332925157162199e-05, "loss": 0.1778426170349121, "step": 86050 }, { "epoch": 0.3694735667121747, "grad_norm": 0.2366664856672287, "learning_rate": 6.332493985150437e-05, "loss": 0.23205168247222902, "step": 86060 }, { "epoch": 0.36951649880219467, "grad_norm": 16.036067962646484, "learning_rate": 6.332062813138674e-05, "loss": 0.14291188716888428, "step": 86070 }, { "epoch": 0.3695594308922147, "grad_norm": 0.0030410068575292826, "learning_rate": 6.331631641126912e-05, "loss": 0.0985245168209076, "step": 86080 }, { "epoch": 0.3696023629822347, "grad_norm": 2.2322115898132324, "learning_rate": 6.33120046911515e-05, "loss": 0.3554996013641357, "step": 86090 }, { "epoch": 0.3696452950722547, "grad_norm": 2.5456347465515137, "learning_rate": 6.330769297103388e-05, "loss": 0.11362524032592773, "step": 86100 }, { "epoch": 0.36968822716227473, "grad_norm": 0.03111317940056324, "learning_rate": 6.330338125091624e-05, "loss": 0.39795794486999514, "step": 86110 }, { "epoch": 0.3697311592522947, "grad_norm": 1.2640771865844727, "learning_rate": 6.329906953079862e-05, "loss": 0.3355656623840332, "step": 86120 }, { "epoch": 0.3697740913423147, "grad_norm": 0.7932472229003906, "learning_rate": 6.3294757810681e-05, "loss": 0.19024043083190917, "step": 86130 }, { "epoch": 0.36981702343233475, "grad_norm": 1.5137596130371094, "learning_rate": 6.329044609056337e-05, "loss": 0.21221389770507812, "step": 86140 }, { "epoch": 0.36985995552235473, "grad_norm": 0.9180253148078918, "learning_rate": 6.328613437044575e-05, "loss": 0.3574000358581543, "step": 86150 }, { "epoch": 0.3699028876123747, "grad_norm": 3.4195072650909424, "learning_rate": 6.328182265032813e-05, "loss": 0.33246517181396484, "step": 86160 }, { "epoch": 0.36994581970239476, "grad_norm": 0.40100616216659546, "learning_rate": 6.32775109302105e-05, "loss": 0.14388233423233032, "step": 86170 }, { "epoch": 0.36998875179241475, "grad_norm": 0.16329815983772278, "learning_rate": 6.327319921009288e-05, "loss": 0.23172502517700194, "step": 86180 }, { "epoch": 0.3700316838824348, "grad_norm": 0.4270736873149872, "learning_rate": 6.326888748997526e-05, "loss": 0.17500052452087403, "step": 86190 }, { "epoch": 0.3700746159724548, "grad_norm": 1.2367000579833984, "learning_rate": 6.326457576985762e-05, "loss": 0.1254699110984802, "step": 86200 }, { "epoch": 0.37011754806247477, "grad_norm": 0.02919580042362213, "learning_rate": 6.326026404974e-05, "loss": 0.2366110563278198, "step": 86210 }, { "epoch": 0.3701604801524948, "grad_norm": 0.2189096361398697, "learning_rate": 6.325595232962238e-05, "loss": 0.16678482294082642, "step": 86220 }, { "epoch": 0.3702034122425148, "grad_norm": 0.012859572656452656, "learning_rate": 6.325164060950475e-05, "loss": 0.3061917543411255, "step": 86230 }, { "epoch": 0.3702463443325348, "grad_norm": 0.0979384034872055, "learning_rate": 6.324732888938713e-05, "loss": 0.16311391592025756, "step": 86240 }, { "epoch": 0.3702892764225548, "grad_norm": 0.9386596083641052, "learning_rate": 6.324301716926951e-05, "loss": 0.14398750066757202, "step": 86250 }, { "epoch": 0.3703322085125748, "grad_norm": 0.08035529404878616, "learning_rate": 6.323870544915189e-05, "loss": 0.2772280931472778, "step": 86260 }, { "epoch": 0.3703751406025948, "grad_norm": 0.00924977008253336, "learning_rate": 6.323439372903426e-05, "loss": 0.3513696908950806, "step": 86270 }, { "epoch": 0.37041807269261484, "grad_norm": 0.46690458059310913, "learning_rate": 6.323008200891664e-05, "loss": 0.19348812103271484, "step": 86280 }, { "epoch": 0.37046100478263483, "grad_norm": 0.008506237529218197, "learning_rate": 6.322577028879902e-05, "loss": 0.18626704216003417, "step": 86290 }, { "epoch": 0.3705039368726548, "grad_norm": 1.465051531791687, "learning_rate": 6.32214585686814e-05, "loss": 0.085543692111969, "step": 86300 }, { "epoch": 0.37054686896267486, "grad_norm": 0.24718154966831207, "learning_rate": 6.321714684856377e-05, "loss": 0.5089605808258056, "step": 86310 }, { "epoch": 0.37058980105269484, "grad_norm": 1.2836334705352783, "learning_rate": 6.321283512844615e-05, "loss": 0.313499903678894, "step": 86320 }, { "epoch": 0.37063273314271483, "grad_norm": 2.7097294330596924, "learning_rate": 6.320852340832853e-05, "loss": 0.2826047420501709, "step": 86330 }, { "epoch": 0.3706756652327349, "grad_norm": 10.594082832336426, "learning_rate": 6.32042116882109e-05, "loss": 0.2791079759597778, "step": 86340 }, { "epoch": 0.37071859732275486, "grad_norm": 0.009699574671685696, "learning_rate": 6.319989996809328e-05, "loss": 0.16034698486328125, "step": 86350 }, { "epoch": 0.37076152941277485, "grad_norm": 0.37866318225860596, "learning_rate": 6.319558824797565e-05, "loss": 0.1663529634475708, "step": 86360 }, { "epoch": 0.3708044615027949, "grad_norm": 6.374588489532471, "learning_rate": 6.319127652785802e-05, "loss": 0.2670762538909912, "step": 86370 }, { "epoch": 0.3708473935928149, "grad_norm": 0.12831802666187286, "learning_rate": 6.31869648077404e-05, "loss": 0.3458155393600464, "step": 86380 }, { "epoch": 0.37089032568283486, "grad_norm": 0.013740170747041702, "learning_rate": 6.318265308762278e-05, "loss": 0.19004298448562623, "step": 86390 }, { "epoch": 0.3709332577728549, "grad_norm": 0.009914065711200237, "learning_rate": 6.317834136750516e-05, "loss": 0.18159607648849488, "step": 86400 }, { "epoch": 0.3709761898628749, "grad_norm": 0.11303571611642838, "learning_rate": 6.317402964738753e-05, "loss": 0.19277287721633912, "step": 86410 }, { "epoch": 0.37101912195289494, "grad_norm": 0.2996561825275421, "learning_rate": 6.316971792726991e-05, "loss": 0.09895474910736084, "step": 86420 }, { "epoch": 0.3710620540429149, "grad_norm": 0.4050360321998596, "learning_rate": 6.316540620715229e-05, "loss": 0.29994912147521974, "step": 86430 }, { "epoch": 0.3711049861329349, "grad_norm": 2.208390474319458, "learning_rate": 6.316109448703465e-05, "loss": 0.2208533763885498, "step": 86440 }, { "epoch": 0.37114791822295495, "grad_norm": 0.021781016141176224, "learning_rate": 6.315678276691703e-05, "loss": 0.09090492129325867, "step": 86450 }, { "epoch": 0.37119085031297494, "grad_norm": 0.028992371633648872, "learning_rate": 6.31524710467994e-05, "loss": 0.2799768209457397, "step": 86460 }, { "epoch": 0.3712337824029949, "grad_norm": 0.1658248007297516, "learning_rate": 6.314815932668178e-05, "loss": 0.1564157485961914, "step": 86470 }, { "epoch": 0.37127671449301497, "grad_norm": 12.077330589294434, "learning_rate": 6.314384760656416e-05, "loss": 0.25147428512573244, "step": 86480 }, { "epoch": 0.37131964658303496, "grad_norm": 0.13617444038391113, "learning_rate": 6.313953588644654e-05, "loss": 0.4071957588195801, "step": 86490 }, { "epoch": 0.37136257867305494, "grad_norm": 1.4712886810302734, "learning_rate": 6.313522416632892e-05, "loss": 0.1500526785850525, "step": 86500 }, { "epoch": 0.371405510763075, "grad_norm": 0.09021810442209244, "learning_rate": 6.31309124462113e-05, "loss": 0.07514996528625488, "step": 86510 }, { "epoch": 0.371448442853095, "grad_norm": 6.454043388366699, "learning_rate": 6.312660072609367e-05, "loss": 0.5268967628479004, "step": 86520 }, { "epoch": 0.37149137494311496, "grad_norm": 0.22059176862239838, "learning_rate": 6.312228900597605e-05, "loss": 0.21707556247711182, "step": 86530 }, { "epoch": 0.371534307033135, "grad_norm": 1.2967723608016968, "learning_rate": 6.311797728585843e-05, "loss": 0.2733563184738159, "step": 86540 }, { "epoch": 0.371577239123155, "grad_norm": 1.554063081741333, "learning_rate": 6.31136655657408e-05, "loss": 0.16565685272216796, "step": 86550 }, { "epoch": 0.371620171213175, "grad_norm": 0.09298480302095413, "learning_rate": 6.310935384562318e-05, "loss": 0.18474723100662233, "step": 86560 }, { "epoch": 0.371663103303195, "grad_norm": 4.236249923706055, "learning_rate": 6.310504212550556e-05, "loss": 0.3226930618286133, "step": 86570 }, { "epoch": 0.371706035393215, "grad_norm": 0.6600306630134583, "learning_rate": 6.310073040538793e-05, "loss": 0.2629078388214111, "step": 86580 }, { "epoch": 0.371748967483235, "grad_norm": 2.777444839477539, "learning_rate": 6.309641868527031e-05, "loss": 0.17999004125595092, "step": 86590 }, { "epoch": 0.37179189957325504, "grad_norm": 0.15516115725040436, "learning_rate": 6.309210696515269e-05, "loss": 0.19380651712417601, "step": 86600 }, { "epoch": 0.371834831663275, "grad_norm": 0.7865200638771057, "learning_rate": 6.308779524503505e-05, "loss": 0.2544250965118408, "step": 86610 }, { "epoch": 0.37187776375329507, "grad_norm": 0.14352178573608398, "learning_rate": 6.308348352491743e-05, "loss": 0.3288907527923584, "step": 86620 }, { "epoch": 0.37192069584331505, "grad_norm": 64.95818328857422, "learning_rate": 6.307917180479981e-05, "loss": 0.3481205940246582, "step": 86630 }, { "epoch": 0.37196362793333504, "grad_norm": 5.94766092300415, "learning_rate": 6.307486008468219e-05, "loss": 0.26511225700378416, "step": 86640 }, { "epoch": 0.3720065600233551, "grad_norm": 0.028272368013858795, "learning_rate": 6.307054836456456e-05, "loss": 0.24481871128082275, "step": 86650 }, { "epoch": 0.37204949211337507, "grad_norm": 0.1931532770395279, "learning_rate": 6.306623664444694e-05, "loss": 0.0944499909877777, "step": 86660 }, { "epoch": 0.37209242420339506, "grad_norm": 0.02626582235097885, "learning_rate": 6.306192492432932e-05, "loss": 0.17654716968536377, "step": 86670 }, { "epoch": 0.3721353562934151, "grad_norm": 5.341935634613037, "learning_rate": 6.30576132042117e-05, "loss": 0.2550107479095459, "step": 86680 }, { "epoch": 0.3721782883834351, "grad_norm": 0.019004661589860916, "learning_rate": 6.305330148409406e-05, "loss": 0.2920238018035889, "step": 86690 }, { "epoch": 0.37222122047345507, "grad_norm": 0.12061820179224014, "learning_rate": 6.304898976397644e-05, "loss": 0.13331669569015503, "step": 86700 }, { "epoch": 0.3722641525634751, "grad_norm": 0.004373606294393539, "learning_rate": 6.304467804385881e-05, "loss": 0.14938576221466066, "step": 86710 }, { "epoch": 0.3723070846534951, "grad_norm": 0.06134732440114021, "learning_rate": 6.304036632374119e-05, "loss": 0.3664681434631348, "step": 86720 }, { "epoch": 0.3723500167435151, "grad_norm": 0.03127064183354378, "learning_rate": 6.303605460362357e-05, "loss": 0.19766569137573242, "step": 86730 }, { "epoch": 0.37239294883353513, "grad_norm": 0.6509777903556824, "learning_rate": 6.303174288350595e-05, "loss": 0.3099424123764038, "step": 86740 }, { "epoch": 0.3724358809235551, "grad_norm": 0.10416044294834137, "learning_rate": 6.302743116338832e-05, "loss": 0.20144286155700683, "step": 86750 }, { "epoch": 0.3724788130135751, "grad_norm": 0.034175265580415726, "learning_rate": 6.30231194432707e-05, "loss": 0.3239238739013672, "step": 86760 }, { "epoch": 0.37252174510359515, "grad_norm": 1.1124687194824219, "learning_rate": 6.301880772315308e-05, "loss": 0.3482966899871826, "step": 86770 }, { "epoch": 0.37256467719361513, "grad_norm": 2.5001158714294434, "learning_rate": 6.301449600303545e-05, "loss": 0.1421452760696411, "step": 86780 }, { "epoch": 0.3726076092836351, "grad_norm": 2.893115520477295, "learning_rate": 6.301018428291783e-05, "loss": 0.2680665493011475, "step": 86790 }, { "epoch": 0.37265054137365516, "grad_norm": 1.1407378911972046, "learning_rate": 6.300587256280021e-05, "loss": 0.24747891426086427, "step": 86800 }, { "epoch": 0.37269347346367515, "grad_norm": 3.4893882274627686, "learning_rate": 6.300156084268259e-05, "loss": 0.17726287841796876, "step": 86810 }, { "epoch": 0.37273640555369514, "grad_norm": 0.4985063672065735, "learning_rate": 6.299724912256496e-05, "loss": 0.195892333984375, "step": 86820 }, { "epoch": 0.3727793376437152, "grad_norm": 3.9934985637664795, "learning_rate": 6.299293740244734e-05, "loss": 0.15681538581848145, "step": 86830 }, { "epoch": 0.37282226973373517, "grad_norm": 2.1810057163238525, "learning_rate": 6.298862568232972e-05, "loss": 0.2681360006332397, "step": 86840 }, { "epoch": 0.3728652018237552, "grad_norm": 2.539444923400879, "learning_rate": 6.298431396221208e-05, "loss": 0.030637264251708984, "step": 86850 }, { "epoch": 0.3729081339137752, "grad_norm": 0.027268722653388977, "learning_rate": 6.298000224209446e-05, "loss": 0.35657052993774413, "step": 86860 }, { "epoch": 0.3729510660037952, "grad_norm": 0.06975622475147247, "learning_rate": 6.297569052197684e-05, "loss": 0.014813748002052308, "step": 86870 }, { "epoch": 0.3729939980938152, "grad_norm": 0.36983099579811096, "learning_rate": 6.297137880185921e-05, "loss": 0.2915071487426758, "step": 86880 }, { "epoch": 0.3730369301838352, "grad_norm": 0.021252155303955078, "learning_rate": 6.296706708174159e-05, "loss": 0.1459917426109314, "step": 86890 }, { "epoch": 0.3730798622738552, "grad_norm": 3.1471076011657715, "learning_rate": 6.296275536162397e-05, "loss": 0.36190290451049806, "step": 86900 }, { "epoch": 0.37312279436387524, "grad_norm": 0.017519652843475342, "learning_rate": 6.295844364150635e-05, "loss": 0.09693054556846618, "step": 86910 }, { "epoch": 0.37316572645389523, "grad_norm": 2.969122886657715, "learning_rate": 6.295413192138872e-05, "loss": 0.31624269485473633, "step": 86920 }, { "epoch": 0.3732086585439152, "grad_norm": 5.2511420249938965, "learning_rate": 6.29498202012711e-05, "loss": 0.22679619789123534, "step": 86930 }, { "epoch": 0.37325159063393526, "grad_norm": 0.019706908613443375, "learning_rate": 6.294550848115347e-05, "loss": 0.2215877056121826, "step": 86940 }, { "epoch": 0.37329452272395525, "grad_norm": 5.416234493255615, "learning_rate": 6.294119676103584e-05, "loss": 0.38700578212738035, "step": 86950 }, { "epoch": 0.37333745481397523, "grad_norm": 2.552222490310669, "learning_rate": 6.293688504091822e-05, "loss": 0.3708165168762207, "step": 86960 }, { "epoch": 0.3733803869039953, "grad_norm": 0.34137478470802307, "learning_rate": 6.29325733208006e-05, "loss": 0.19809558391571044, "step": 86970 }, { "epoch": 0.37342331899401526, "grad_norm": 0.11157994717359543, "learning_rate": 6.292826160068297e-05, "loss": 0.25876359939575194, "step": 86980 }, { "epoch": 0.37346625108403525, "grad_norm": 2.443289041519165, "learning_rate": 6.292394988056535e-05, "loss": 0.14729411602020265, "step": 86990 }, { "epoch": 0.3735091831740553, "grad_norm": 0.17522040009498596, "learning_rate": 6.291963816044774e-05, "loss": 0.3029524564743042, "step": 87000 }, { "epoch": 0.3735091831740553, "eval_loss": 0.4400550425052643, "eval_runtime": 27.2101, "eval_samples_per_second": 3.675, "eval_steps_per_second": 3.675, "step": 87000 }, { "epoch": 0.3735521152640753, "grad_norm": 2.0458552837371826, "learning_rate": 6.291532644033012e-05, "loss": 0.18736555576324462, "step": 87010 }, { "epoch": 0.37359504735409527, "grad_norm": 2.114137649536133, "learning_rate": 6.291101472021248e-05, "loss": 0.05374835729598999, "step": 87020 }, { "epoch": 0.3736379794441153, "grad_norm": 0.018394406884908676, "learning_rate": 6.290670300009486e-05, "loss": 0.18129925727844237, "step": 87030 }, { "epoch": 0.3736809115341353, "grad_norm": 0.13711842894554138, "learning_rate": 6.290239127997724e-05, "loss": 0.2827707052230835, "step": 87040 }, { "epoch": 0.37372384362415534, "grad_norm": 0.027511335909366608, "learning_rate": 6.289807955985962e-05, "loss": 0.2441173553466797, "step": 87050 }, { "epoch": 0.3737667757141753, "grad_norm": 2.395310640335083, "learning_rate": 6.289376783974199e-05, "loss": 0.11282216310501099, "step": 87060 }, { "epoch": 0.3738097078041953, "grad_norm": 5.538151741027832, "learning_rate": 6.288945611962437e-05, "loss": 0.0628732681274414, "step": 87070 }, { "epoch": 0.37385263989421536, "grad_norm": 0.09157660603523254, "learning_rate": 6.288514439950675e-05, "loss": 0.09940847158432006, "step": 87080 }, { "epoch": 0.37389557198423534, "grad_norm": 0.004044859204441309, "learning_rate": 6.288083267938913e-05, "loss": 0.012469526380300522, "step": 87090 }, { "epoch": 0.37393850407425533, "grad_norm": 0.02948996238410473, "learning_rate": 6.287652095927149e-05, "loss": 0.22384850978851317, "step": 87100 }, { "epoch": 0.37398143616427537, "grad_norm": 0.02249191142618656, "learning_rate": 6.287220923915387e-05, "loss": 0.2673391580581665, "step": 87110 }, { "epoch": 0.37402436825429536, "grad_norm": 0.003082884708419442, "learning_rate": 6.286789751903624e-05, "loss": 0.12813795804977418, "step": 87120 }, { "epoch": 0.37406730034431535, "grad_norm": 0.01360277272760868, "learning_rate": 6.286358579891862e-05, "loss": 0.22645974159240723, "step": 87130 }, { "epoch": 0.3741102324343354, "grad_norm": 0.021722465753555298, "learning_rate": 6.2859274078801e-05, "loss": 0.1636356830596924, "step": 87140 }, { "epoch": 0.3741531645243554, "grad_norm": 4.585870742797852, "learning_rate": 6.285496235868338e-05, "loss": 0.2738921642303467, "step": 87150 }, { "epoch": 0.37419609661437536, "grad_norm": 2.0261483192443848, "learning_rate": 6.285065063856575e-05, "loss": 0.2621325969696045, "step": 87160 }, { "epoch": 0.3742390287043954, "grad_norm": 1.8282241821289062, "learning_rate": 6.284633891844813e-05, "loss": 0.23655502796173095, "step": 87170 }, { "epoch": 0.3742819607944154, "grad_norm": 0.038744006305933, "learning_rate": 6.28420271983305e-05, "loss": 0.28763017654418943, "step": 87180 }, { "epoch": 0.3743248928844354, "grad_norm": 0.030506085604429245, "learning_rate": 6.283771547821287e-05, "loss": 0.1275754451751709, "step": 87190 }, { "epoch": 0.3743678249744554, "grad_norm": 0.0035087086725980043, "learning_rate": 6.283340375809525e-05, "loss": 0.1500408411026001, "step": 87200 }, { "epoch": 0.3744107570644754, "grad_norm": 5.859970569610596, "learning_rate": 6.282909203797763e-05, "loss": 0.2530032157897949, "step": 87210 }, { "epoch": 0.3744536891544954, "grad_norm": 0.0439981184899807, "learning_rate": 6.282478031786002e-05, "loss": 0.2651748895645142, "step": 87220 }, { "epoch": 0.37449662124451544, "grad_norm": 3.4375038146972656, "learning_rate": 6.28204685977424e-05, "loss": 0.2220317840576172, "step": 87230 }, { "epoch": 0.3745395533345354, "grad_norm": 0.005168871488422155, "learning_rate": 6.281615687762477e-05, "loss": 0.43964247703552245, "step": 87240 }, { "epoch": 0.3745824854245554, "grad_norm": 0.041050877422094345, "learning_rate": 6.281184515750715e-05, "loss": 0.4249211311340332, "step": 87250 }, { "epoch": 0.37462541751457545, "grad_norm": 0.08297502249479294, "learning_rate": 6.280753343738951e-05, "loss": 0.4031353950500488, "step": 87260 }, { "epoch": 0.37466834960459544, "grad_norm": 0.10702119022607803, "learning_rate": 6.280322171727189e-05, "loss": 0.5200258731842041, "step": 87270 }, { "epoch": 0.3747112816946155, "grad_norm": 0.04025140777230263, "learning_rate": 6.279890999715427e-05, "loss": 0.10984960794448853, "step": 87280 }, { "epoch": 0.37475421378463547, "grad_norm": 0.033183638006448746, "learning_rate": 6.279459827703664e-05, "loss": 0.32496328353881837, "step": 87290 }, { "epoch": 0.37479714587465546, "grad_norm": 4.962788105010986, "learning_rate": 6.279028655691902e-05, "loss": 0.18989670276641846, "step": 87300 }, { "epoch": 0.3748400779646755, "grad_norm": 0.16489867866039276, "learning_rate": 6.27859748368014e-05, "loss": 0.23146965503692626, "step": 87310 }, { "epoch": 0.3748830100546955, "grad_norm": 0.2948302626609802, "learning_rate": 6.278166311668378e-05, "loss": 0.21655528545379638, "step": 87320 }, { "epoch": 0.3749259421447155, "grad_norm": 1.196093201637268, "learning_rate": 6.277735139656615e-05, "loss": 0.39217898845672605, "step": 87330 }, { "epoch": 0.3749688742347355, "grad_norm": 1.7001733779907227, "learning_rate": 6.277303967644853e-05, "loss": 0.25654077529907227, "step": 87340 }, { "epoch": 0.3750118063247555, "grad_norm": 0.7644587755203247, "learning_rate": 6.27687279563309e-05, "loss": 0.2915396451950073, "step": 87350 }, { "epoch": 0.3750547384147755, "grad_norm": 0.12401857227087021, "learning_rate": 6.276441623621327e-05, "loss": 0.3159175872802734, "step": 87360 }, { "epoch": 0.37509767050479553, "grad_norm": 0.022166695445775986, "learning_rate": 6.276010451609565e-05, "loss": 0.1497678279876709, "step": 87370 }, { "epoch": 0.3751406025948155, "grad_norm": 0.4812676012516022, "learning_rate": 6.275579279597803e-05, "loss": 0.12899582386016845, "step": 87380 }, { "epoch": 0.3751835346848355, "grad_norm": 0.10921736806631088, "learning_rate": 6.27514810758604e-05, "loss": 0.1952446460723877, "step": 87390 }, { "epoch": 0.37522646677485555, "grad_norm": 0.9592009782791138, "learning_rate": 6.274716935574278e-05, "loss": 0.2865952730178833, "step": 87400 }, { "epoch": 0.37526939886487554, "grad_norm": 0.010206897743046284, "learning_rate": 6.274285763562516e-05, "loss": 0.2494706392288208, "step": 87410 }, { "epoch": 0.3753123309548955, "grad_norm": 0.1299165040254593, "learning_rate": 6.273854591550754e-05, "loss": 0.1522657871246338, "step": 87420 }, { "epoch": 0.37535526304491557, "grad_norm": 0.009295583702623844, "learning_rate": 6.27342341953899e-05, "loss": 0.01438492089509964, "step": 87430 }, { "epoch": 0.37539819513493555, "grad_norm": 0.699145495891571, "learning_rate": 6.272992247527229e-05, "loss": 0.15755927562713623, "step": 87440 }, { "epoch": 0.37544112722495554, "grad_norm": 13.779772758483887, "learning_rate": 6.272561075515467e-05, "loss": 0.10173404216766357, "step": 87450 }, { "epoch": 0.3754840593149756, "grad_norm": 0.06877686083316803, "learning_rate": 6.272129903503705e-05, "loss": 0.11068353652954102, "step": 87460 }, { "epoch": 0.37552699140499557, "grad_norm": 1.3501126766204834, "learning_rate": 6.271698731491942e-05, "loss": 0.38095693588256835, "step": 87470 }, { "epoch": 0.3755699234950156, "grad_norm": 9.027297973632812, "learning_rate": 6.27126755948018e-05, "loss": 0.2303825855255127, "step": 87480 }, { "epoch": 0.3756128555850356, "grad_norm": 0.003722363617271185, "learning_rate": 6.270836387468418e-05, "loss": 0.33140523433685304, "step": 87490 }, { "epoch": 0.3756557876750556, "grad_norm": 0.3718756139278412, "learning_rate": 6.270405215456656e-05, "loss": 0.24214658737182618, "step": 87500 }, { "epoch": 0.37569871976507563, "grad_norm": 3.6789193153381348, "learning_rate": 6.269974043444892e-05, "loss": 0.20105509757995604, "step": 87510 }, { "epoch": 0.3757416518550956, "grad_norm": 0.7093227505683899, "learning_rate": 6.26954287143313e-05, "loss": 0.16223851442337037, "step": 87520 }, { "epoch": 0.3757845839451156, "grad_norm": 0.2839501202106476, "learning_rate": 6.269111699421367e-05, "loss": 0.2748666763305664, "step": 87530 }, { "epoch": 0.37582751603513564, "grad_norm": 0.004592269193381071, "learning_rate": 6.268680527409605e-05, "loss": 0.20933430194854735, "step": 87540 }, { "epoch": 0.37587044812515563, "grad_norm": 1.9469878673553467, "learning_rate": 6.268249355397843e-05, "loss": 0.21942739486694335, "step": 87550 }, { "epoch": 0.3759133802151756, "grad_norm": 0.05047018826007843, "learning_rate": 6.26781818338608e-05, "loss": 0.17022851705551148, "step": 87560 }, { "epoch": 0.37595631230519566, "grad_norm": 0.0031462402548640966, "learning_rate": 6.267387011374318e-05, "loss": 0.15885262489318847, "step": 87570 }, { "epoch": 0.37599924439521565, "grad_norm": 0.9981968402862549, "learning_rate": 6.266955839362556e-05, "loss": 0.40951828956604003, "step": 87580 }, { "epoch": 0.37604217648523564, "grad_norm": 0.5256025791168213, "learning_rate": 6.266524667350792e-05, "loss": 0.03144158124923706, "step": 87590 }, { "epoch": 0.3760851085752557, "grad_norm": 1.802929162979126, "learning_rate": 6.26609349533903e-05, "loss": 0.23492536544799805, "step": 87600 }, { "epoch": 0.37612804066527566, "grad_norm": 2.9922046661376953, "learning_rate": 6.265662323327268e-05, "loss": 0.016963517665863036, "step": 87610 }, { "epoch": 0.37617097275529565, "grad_norm": 12.366278648376465, "learning_rate": 6.265231151315506e-05, "loss": 0.33857011795043945, "step": 87620 }, { "epoch": 0.3762139048453157, "grad_norm": 1.1289916038513184, "learning_rate": 6.264799979303743e-05, "loss": 0.4807577610015869, "step": 87630 }, { "epoch": 0.3762568369353357, "grad_norm": 1.944948673248291, "learning_rate": 6.264368807291981e-05, "loss": 0.244700026512146, "step": 87640 }, { "epoch": 0.37629976902535567, "grad_norm": 1.5617321729660034, "learning_rate": 6.263937635280219e-05, "loss": 0.3912205219268799, "step": 87650 }, { "epoch": 0.3763427011153757, "grad_norm": 0.002875519683584571, "learning_rate": 6.263506463268457e-05, "loss": 0.07082734107971192, "step": 87660 }, { "epoch": 0.3763856332053957, "grad_norm": 0.6047908663749695, "learning_rate": 6.263075291256694e-05, "loss": 0.03974010944366455, "step": 87670 }, { "epoch": 0.3764285652954157, "grad_norm": 0.021802136674523354, "learning_rate": 6.262644119244932e-05, "loss": 0.12680743932723998, "step": 87680 }, { "epoch": 0.3764714973854357, "grad_norm": 1.6270766258239746, "learning_rate": 6.26221294723317e-05, "loss": 0.2659365177154541, "step": 87690 }, { "epoch": 0.3765144294754557, "grad_norm": 1.6786472797393799, "learning_rate": 6.261781775221408e-05, "loss": 0.26304676532745364, "step": 87700 }, { "epoch": 0.37655736156547576, "grad_norm": 0.0038254903629422188, "learning_rate": 6.261350603209645e-05, "loss": 0.2967263698577881, "step": 87710 }, { "epoch": 0.37660029365549574, "grad_norm": 0.021582119166851044, "learning_rate": 6.260919431197883e-05, "loss": 0.2736553907394409, "step": 87720 }, { "epoch": 0.37664322574551573, "grad_norm": 2.2392396926879883, "learning_rate": 6.260488259186121e-05, "loss": 0.19963181018829346, "step": 87730 }, { "epoch": 0.3766861578355358, "grad_norm": 3.8308897018432617, "learning_rate": 6.260057087174358e-05, "loss": 0.33628668785095217, "step": 87740 }, { "epoch": 0.37672908992555576, "grad_norm": 4.784469127655029, "learning_rate": 6.259625915162596e-05, "loss": 0.16687393188476562, "step": 87750 }, { "epoch": 0.37677202201557575, "grad_norm": 9.717080116271973, "learning_rate": 6.259194743150833e-05, "loss": 0.14312649965286256, "step": 87760 }, { "epoch": 0.3768149541055958, "grad_norm": 0.1213330551981926, "learning_rate": 6.25876357113907e-05, "loss": 0.04287871420383453, "step": 87770 }, { "epoch": 0.3768578861956158, "grad_norm": 3.9862990379333496, "learning_rate": 6.258332399127308e-05, "loss": 0.12331254482269287, "step": 87780 }, { "epoch": 0.37690081828563576, "grad_norm": 0.02957821451127529, "learning_rate": 6.257901227115546e-05, "loss": 0.20847928524017334, "step": 87790 }, { "epoch": 0.3769437503756558, "grad_norm": 0.5684049129486084, "learning_rate": 6.257470055103784e-05, "loss": 0.22962512969970703, "step": 87800 }, { "epoch": 0.3769866824656758, "grad_norm": 1.9917635917663574, "learning_rate": 6.257038883092021e-05, "loss": 0.465483283996582, "step": 87810 }, { "epoch": 0.3770296145556958, "grad_norm": 0.004031947813928127, "learning_rate": 6.256607711080259e-05, "loss": 0.10231343507766724, "step": 87820 }, { "epoch": 0.3770725466457158, "grad_norm": 2.37412691116333, "learning_rate": 6.256176539068497e-05, "loss": 0.20844850540161133, "step": 87830 }, { "epoch": 0.3771154787357358, "grad_norm": 0.03548434004187584, "learning_rate": 6.255745367056733e-05, "loss": 0.31219236850738524, "step": 87840 }, { "epoch": 0.3771584108257558, "grad_norm": 0.015120014548301697, "learning_rate": 6.255314195044971e-05, "loss": 0.2350841760635376, "step": 87850 }, { "epoch": 0.37720134291577584, "grad_norm": 0.021302489563822746, "learning_rate": 6.254883023033209e-05, "loss": 0.11671949625015259, "step": 87860 }, { "epoch": 0.3772442750057958, "grad_norm": 0.023564008995890617, "learning_rate": 6.254451851021446e-05, "loss": 0.23691887855529786, "step": 87870 }, { "epoch": 0.3772872070958158, "grad_norm": 2.984902858734131, "learning_rate": 6.254020679009684e-05, "loss": 0.24498653411865234, "step": 87880 }, { "epoch": 0.37733013918583586, "grad_norm": 0.09107789397239685, "learning_rate": 6.253589506997922e-05, "loss": 0.21076819896697999, "step": 87890 }, { "epoch": 0.37737307127585584, "grad_norm": 2.0704238414764404, "learning_rate": 6.25315833498616e-05, "loss": 0.13833118677139283, "step": 87900 }, { "epoch": 0.3774160033658759, "grad_norm": 0.008586696349084377, "learning_rate": 6.252727162974397e-05, "loss": 0.34412851333618166, "step": 87910 }, { "epoch": 0.37745893545589587, "grad_norm": 0.24232687056064606, "learning_rate": 6.252295990962635e-05, "loss": 0.1365652084350586, "step": 87920 }, { "epoch": 0.37750186754591586, "grad_norm": 0.003949606791138649, "learning_rate": 6.251864818950873e-05, "loss": 0.26673967838287355, "step": 87930 }, { "epoch": 0.3775447996359359, "grad_norm": 2.1608128547668457, "learning_rate": 6.25143364693911e-05, "loss": 0.29307947158813474, "step": 87940 }, { "epoch": 0.3775877317259559, "grad_norm": 1.1866542100906372, "learning_rate": 6.251002474927348e-05, "loss": 0.33245224952697755, "step": 87950 }, { "epoch": 0.3776306638159759, "grad_norm": 0.047932349145412445, "learning_rate": 6.250571302915586e-05, "loss": 0.08770357966423034, "step": 87960 }, { "epoch": 0.3776735959059959, "grad_norm": 3.1019997596740723, "learning_rate": 6.250140130903824e-05, "loss": 0.18892154693603516, "step": 87970 }, { "epoch": 0.3777165279960159, "grad_norm": 6.068072319030762, "learning_rate": 6.249708958892061e-05, "loss": 0.23206815719604493, "step": 87980 }, { "epoch": 0.3777594600860359, "grad_norm": 0.910423219203949, "learning_rate": 6.249277786880299e-05, "loss": 0.35682499408721924, "step": 87990 }, { "epoch": 0.37780239217605593, "grad_norm": 4.847745895385742, "learning_rate": 6.248846614868535e-05, "loss": 0.39348864555358887, "step": 88000 }, { "epoch": 0.37780239217605593, "eval_loss": 0.4211534261703491, "eval_runtime": 27.3365, "eval_samples_per_second": 3.658, "eval_steps_per_second": 3.658, "step": 88000 }, { "epoch": 0.3778453242660759, "grad_norm": 1.5804665088653564, "learning_rate": 6.248415442856773e-05, "loss": 0.10657334327697754, "step": 88010 }, { "epoch": 0.3778882563560959, "grad_norm": 2.5569381713867188, "learning_rate": 6.247984270845011e-05, "loss": 0.19821202754974365, "step": 88020 }, { "epoch": 0.37793118844611595, "grad_norm": 0.07708010822534561, "learning_rate": 6.247553098833249e-05, "loss": 0.13068944215774536, "step": 88030 }, { "epoch": 0.37797412053613594, "grad_norm": 0.36910587549209595, "learning_rate": 6.247121926821486e-05, "loss": 0.1964368462562561, "step": 88040 }, { "epoch": 0.3780170526261559, "grad_norm": 0.6348251700401306, "learning_rate": 6.246690754809724e-05, "loss": 0.29259209632873534, "step": 88050 }, { "epoch": 0.37805998471617597, "grad_norm": 1.2968003749847412, "learning_rate": 6.246259582797962e-05, "loss": 0.31879286766052245, "step": 88060 }, { "epoch": 0.37810291680619595, "grad_norm": 0.01956726796925068, "learning_rate": 6.2458284107862e-05, "loss": 0.10881918668746948, "step": 88070 }, { "epoch": 0.37814584889621594, "grad_norm": 0.05537987872958183, "learning_rate": 6.245397238774437e-05, "loss": 0.3085724592208862, "step": 88080 }, { "epoch": 0.378188780986236, "grad_norm": 0.010545702651143074, "learning_rate": 6.244966066762674e-05, "loss": 0.153480064868927, "step": 88090 }, { "epoch": 0.37823171307625597, "grad_norm": 6.235033988952637, "learning_rate": 6.244534894750911e-05, "loss": 0.30473511219024657, "step": 88100 }, { "epoch": 0.37827464516627596, "grad_norm": 1.8143759965896606, "learning_rate": 6.244103722739149e-05, "loss": 0.35205180644989015, "step": 88110 }, { "epoch": 0.378317577256296, "grad_norm": 1.0785503387451172, "learning_rate": 6.243672550727387e-05, "loss": 0.2504326343536377, "step": 88120 }, { "epoch": 0.378360509346316, "grad_norm": 5.685330867767334, "learning_rate": 6.243241378715625e-05, "loss": 0.2820307970046997, "step": 88130 }, { "epoch": 0.37840344143633603, "grad_norm": 0.021171852946281433, "learning_rate": 6.242810206703862e-05, "loss": 0.10080406665802003, "step": 88140 }, { "epoch": 0.378446373526356, "grad_norm": 0.008075188845396042, "learning_rate": 6.2423790346921e-05, "loss": 0.0766272485256195, "step": 88150 }, { "epoch": 0.378489305616376, "grad_norm": 0.523626446723938, "learning_rate": 6.241947862680338e-05, "loss": 0.3302738189697266, "step": 88160 }, { "epoch": 0.37853223770639605, "grad_norm": 1.6001014709472656, "learning_rate": 6.241516690668576e-05, "loss": 0.12499821186065674, "step": 88170 }, { "epoch": 0.37857516979641603, "grad_norm": 0.010254238732159138, "learning_rate": 6.241085518656813e-05, "loss": 0.1693050742149353, "step": 88180 }, { "epoch": 0.378618101886436, "grad_norm": 0.06305219233036041, "learning_rate": 6.240654346645051e-05, "loss": 0.28610665798187257, "step": 88190 }, { "epoch": 0.37866103397645606, "grad_norm": 0.9428789615631104, "learning_rate": 6.240223174633289e-05, "loss": 0.2268359899520874, "step": 88200 }, { "epoch": 0.37870396606647605, "grad_norm": 0.20084306597709656, "learning_rate": 6.239792002621527e-05, "loss": 0.36552431583404543, "step": 88210 }, { "epoch": 0.37874689815649604, "grad_norm": 1.3825594186782837, "learning_rate": 6.239360830609764e-05, "loss": 0.42622647285461424, "step": 88220 }, { "epoch": 0.3787898302465161, "grad_norm": 0.0558270625770092, "learning_rate": 6.238929658598002e-05, "loss": 0.265299391746521, "step": 88230 }, { "epoch": 0.37883276233653607, "grad_norm": 0.01165605615824461, "learning_rate": 6.23849848658624e-05, "loss": 0.12851730585098267, "step": 88240 }, { "epoch": 0.37887569442655605, "grad_norm": 0.011059621348977089, "learning_rate": 6.238067314574476e-05, "loss": 0.295037055015564, "step": 88250 }, { "epoch": 0.3789186265165761, "grad_norm": 30.9415340423584, "learning_rate": 6.237636142562714e-05, "loss": 0.330984902381897, "step": 88260 }, { "epoch": 0.3789615586065961, "grad_norm": 0.08154928684234619, "learning_rate": 6.237204970550952e-05, "loss": 0.29265894889831545, "step": 88270 }, { "epoch": 0.37900449069661607, "grad_norm": 5.459999084472656, "learning_rate": 6.23677379853919e-05, "loss": 0.4351907253265381, "step": 88280 }, { "epoch": 0.3790474227866361, "grad_norm": 0.3953285217285156, "learning_rate": 6.236342626527427e-05, "loss": 0.13419573307037352, "step": 88290 }, { "epoch": 0.3790903548766561, "grad_norm": 0.019297489896416664, "learning_rate": 6.235911454515665e-05, "loss": 0.2427154302597046, "step": 88300 }, { "epoch": 0.3791332869666761, "grad_norm": 1.6823676824569702, "learning_rate": 6.235480282503903e-05, "loss": 0.17967859506607056, "step": 88310 }, { "epoch": 0.37917621905669613, "grad_norm": 0.00811012089252472, "learning_rate": 6.23504911049214e-05, "loss": 0.1739598512649536, "step": 88320 }, { "epoch": 0.3792191511467161, "grad_norm": 0.5991887450218201, "learning_rate": 6.234617938480377e-05, "loss": 0.09256443977355958, "step": 88330 }, { "epoch": 0.37926208323673616, "grad_norm": 1.724471092224121, "learning_rate": 6.234186766468614e-05, "loss": 0.2317936897277832, "step": 88340 }, { "epoch": 0.37930501532675615, "grad_norm": 0.15021900832653046, "learning_rate": 6.233755594456852e-05, "loss": 0.22777304649353028, "step": 88350 }, { "epoch": 0.37934794741677613, "grad_norm": 0.007549840956926346, "learning_rate": 6.23332442244509e-05, "loss": 0.08671526312828064, "step": 88360 }, { "epoch": 0.3793908795067962, "grad_norm": 0.0036326975096017122, "learning_rate": 6.232893250433328e-05, "loss": 0.2025750160217285, "step": 88370 }, { "epoch": 0.37943381159681616, "grad_norm": 0.011411375366151333, "learning_rate": 6.232462078421565e-05, "loss": 0.32199418544769287, "step": 88380 }, { "epoch": 0.37947674368683615, "grad_norm": 0.10104304552078247, "learning_rate": 6.232030906409803e-05, "loss": 0.2756567716598511, "step": 88390 }, { "epoch": 0.3795196757768562, "grad_norm": 0.26092761754989624, "learning_rate": 6.231599734398041e-05, "loss": 0.038532555103302, "step": 88400 }, { "epoch": 0.3795626078668762, "grad_norm": 0.013410309329628944, "learning_rate": 6.23116856238628e-05, "loss": 0.3681929111480713, "step": 88410 }, { "epoch": 0.37960553995689617, "grad_norm": 0.0316680446267128, "learning_rate": 6.230737390374516e-05, "loss": 0.059136635065078734, "step": 88420 }, { "epoch": 0.3796484720469162, "grad_norm": 0.10209963470697403, "learning_rate": 6.230306218362754e-05, "loss": 0.3428246021270752, "step": 88430 }, { "epoch": 0.3796914041369362, "grad_norm": 0.009456177242100239, "learning_rate": 6.229875046350992e-05, "loss": 0.04225753843784332, "step": 88440 }, { "epoch": 0.3797343362269562, "grad_norm": 1.0888117551803589, "learning_rate": 6.22944387433923e-05, "loss": 0.07131630182266235, "step": 88450 }, { "epoch": 0.3797772683169762, "grad_norm": 8.46331787109375, "learning_rate": 6.229012702327467e-05, "loss": 0.21127099990844728, "step": 88460 }, { "epoch": 0.3798202004069962, "grad_norm": 0.002510474296286702, "learning_rate": 6.228581530315705e-05, "loss": 0.3312537431716919, "step": 88470 }, { "epoch": 0.3798631324970162, "grad_norm": 0.028360135853290558, "learning_rate": 6.228150358303943e-05, "loss": 0.10665277242660523, "step": 88480 }, { "epoch": 0.37990606458703624, "grad_norm": 0.011928168125450611, "learning_rate": 6.22771918629218e-05, "loss": 0.42289509773254397, "step": 88490 }, { "epoch": 0.3799489966770562, "grad_norm": 1.114374041557312, "learning_rate": 6.227288014280417e-05, "loss": 0.26947269439697263, "step": 88500 }, { "epoch": 0.3799919287670762, "grad_norm": 0.034509461373090744, "learning_rate": 6.226856842268655e-05, "loss": 0.16201649904251098, "step": 88510 }, { "epoch": 0.38003486085709626, "grad_norm": 0.006820637732744217, "learning_rate": 6.226425670256892e-05, "loss": 0.012356171011924743, "step": 88520 }, { "epoch": 0.38007779294711624, "grad_norm": 0.09843739867210388, "learning_rate": 6.22599449824513e-05, "loss": 0.13386306762695313, "step": 88530 }, { "epoch": 0.38012072503713623, "grad_norm": 1.5230803489685059, "learning_rate": 6.225563326233368e-05, "loss": 0.22778193950653075, "step": 88540 }, { "epoch": 0.3801636571271563, "grad_norm": 0.2800234854221344, "learning_rate": 6.225132154221605e-05, "loss": 0.20246121883392335, "step": 88550 }, { "epoch": 0.38020658921717626, "grad_norm": 1.4027798175811768, "learning_rate": 6.224700982209843e-05, "loss": 0.2890122175216675, "step": 88560 }, { "epoch": 0.3802495213071963, "grad_norm": 0.01395477820187807, "learning_rate": 6.224269810198081e-05, "loss": 0.18396477699279784, "step": 88570 }, { "epoch": 0.3802924533972163, "grad_norm": 0.1274530589580536, "learning_rate": 6.223838638186317e-05, "loss": 0.0587466835975647, "step": 88580 }, { "epoch": 0.3803353854872363, "grad_norm": 0.07611143589019775, "learning_rate": 6.223407466174555e-05, "loss": 0.26608948707580565, "step": 88590 }, { "epoch": 0.3803783175772563, "grad_norm": 0.9535974860191345, "learning_rate": 6.222976294162793e-05, "loss": 0.30292160511016847, "step": 88600 }, { "epoch": 0.3804212496672763, "grad_norm": 0.015950385481119156, "learning_rate": 6.22254512215103e-05, "loss": 0.2924375057220459, "step": 88610 }, { "epoch": 0.3804641817572963, "grad_norm": 0.011609098874032497, "learning_rate": 6.222113950139268e-05, "loss": 0.11583701372146607, "step": 88620 }, { "epoch": 0.38050711384731634, "grad_norm": 2.3866891860961914, "learning_rate": 6.221682778127507e-05, "loss": 0.33168849945068357, "step": 88630 }, { "epoch": 0.3805500459373363, "grad_norm": 3.5178093910217285, "learning_rate": 6.221251606115745e-05, "loss": 0.2832959175109863, "step": 88640 }, { "epoch": 0.3805929780273563, "grad_norm": 0.0034149482380598783, "learning_rate": 6.220820434103983e-05, "loss": 0.16352550983428954, "step": 88650 }, { "epoch": 0.38063591011737635, "grad_norm": 1.0718631744384766, "learning_rate": 6.220389262092219e-05, "loss": 0.22328333854675292, "step": 88660 }, { "epoch": 0.38067884220739634, "grad_norm": 0.0010514046298339963, "learning_rate": 6.219958090080457e-05, "loss": 0.21548836231231688, "step": 88670 }, { "epoch": 0.3807217742974163, "grad_norm": 0.01005020085722208, "learning_rate": 6.219526918068695e-05, "loss": 0.18633402585983277, "step": 88680 }, { "epoch": 0.38076470638743637, "grad_norm": 0.013129375874996185, "learning_rate": 6.219095746056932e-05, "loss": 0.2295675277709961, "step": 88690 }, { "epoch": 0.38080763847745636, "grad_norm": 1.2943649291992188, "learning_rate": 6.21866457404517e-05, "loss": 0.32459824085235595, "step": 88700 }, { "epoch": 0.38085057056747634, "grad_norm": 0.011330182664096355, "learning_rate": 6.218233402033408e-05, "loss": 0.10692859888076782, "step": 88710 }, { "epoch": 0.3808935026574964, "grad_norm": 1.1285091638565063, "learning_rate": 6.217802230021646e-05, "loss": 0.2512716770172119, "step": 88720 }, { "epoch": 0.3809364347475164, "grad_norm": 2.265028476715088, "learning_rate": 6.217371058009883e-05, "loss": 0.11686290502548217, "step": 88730 }, { "epoch": 0.38097936683753636, "grad_norm": 1.254490852355957, "learning_rate": 6.216939885998121e-05, "loss": 0.4674684524536133, "step": 88740 }, { "epoch": 0.3810222989275564, "grad_norm": 0.0046071987599134445, "learning_rate": 6.216508713986357e-05, "loss": 0.32127716541290285, "step": 88750 }, { "epoch": 0.3810652310175764, "grad_norm": 1.2878999710083008, "learning_rate": 6.216077541974595e-05, "loss": 0.25494959354400637, "step": 88760 }, { "epoch": 0.38110816310759643, "grad_norm": 0.03140444681048393, "learning_rate": 6.215646369962833e-05, "loss": 0.26149399280548097, "step": 88770 }, { "epoch": 0.3811510951976164, "grad_norm": 1.8332608938217163, "learning_rate": 6.21521519795107e-05, "loss": 0.4215863704681396, "step": 88780 }, { "epoch": 0.3811940272876364, "grad_norm": 0.016005532816052437, "learning_rate": 6.214784025939308e-05, "loss": 0.18732340335845948, "step": 88790 }, { "epoch": 0.38123695937765645, "grad_norm": 0.9914336204528809, "learning_rate": 6.214352853927546e-05, "loss": 0.40707788467407224, "step": 88800 }, { "epoch": 0.38127989146767644, "grad_norm": 0.029879281297326088, "learning_rate": 6.213921681915784e-05, "loss": 0.2158869504928589, "step": 88810 }, { "epoch": 0.3813228235576964, "grad_norm": 34.581050872802734, "learning_rate": 6.213490509904022e-05, "loss": 0.3515269994735718, "step": 88820 }, { "epoch": 0.38136575564771646, "grad_norm": 1.1815255880355835, "learning_rate": 6.213059337892258e-05, "loss": 0.3360306262969971, "step": 88830 }, { "epoch": 0.38140868773773645, "grad_norm": 0.08978642523288727, "learning_rate": 6.212628165880496e-05, "loss": 0.029180306196212768, "step": 88840 }, { "epoch": 0.38145161982775644, "grad_norm": 0.3570984899997711, "learning_rate": 6.212196993868735e-05, "loss": 0.1382278323173523, "step": 88850 }, { "epoch": 0.3814945519177765, "grad_norm": 0.2925564646720886, "learning_rate": 6.211765821856973e-05, "loss": 0.12889453172683715, "step": 88860 }, { "epoch": 0.38153748400779647, "grad_norm": 0.6374716758728027, "learning_rate": 6.21133464984521e-05, "loss": 0.2685490608215332, "step": 88870 }, { "epoch": 0.38158041609781645, "grad_norm": 0.07237851619720459, "learning_rate": 6.210903477833448e-05, "loss": 0.034711554646492004, "step": 88880 }, { "epoch": 0.3816233481878365, "grad_norm": 0.08661754429340363, "learning_rate": 6.210472305821686e-05, "loss": 0.26672289371490476, "step": 88890 }, { "epoch": 0.3816662802778565, "grad_norm": 1.017971158027649, "learning_rate": 6.210041133809923e-05, "loss": 0.26748826503753664, "step": 88900 }, { "epoch": 0.38170921236787647, "grad_norm": 4.834644794464111, "learning_rate": 6.20960996179816e-05, "loss": 0.12524523735046386, "step": 88910 }, { "epoch": 0.3817521444578965, "grad_norm": 0.36303168535232544, "learning_rate": 6.209178789786398e-05, "loss": 0.2487999677658081, "step": 88920 }, { "epoch": 0.3817950765479165, "grad_norm": 1.2829387187957764, "learning_rate": 6.208747617774635e-05, "loss": 0.1831027865409851, "step": 88930 }, { "epoch": 0.3818380086379365, "grad_norm": 2.0571751594543457, "learning_rate": 6.208316445762873e-05, "loss": 0.28766908645629885, "step": 88940 }, { "epoch": 0.38188094072795653, "grad_norm": 0.15016916394233704, "learning_rate": 6.207885273751111e-05, "loss": 0.19521453380584716, "step": 88950 }, { "epoch": 0.3819238728179765, "grad_norm": 0.10648097097873688, "learning_rate": 6.207454101739348e-05, "loss": 0.29527380466461184, "step": 88960 }, { "epoch": 0.3819668049079965, "grad_norm": 0.014429030939936638, "learning_rate": 6.207022929727586e-05, "loss": 0.1196025252342224, "step": 88970 }, { "epoch": 0.38200973699801655, "grad_norm": 0.022127997130155563, "learning_rate": 6.206591757715824e-05, "loss": 0.27551116943359377, "step": 88980 }, { "epoch": 0.38205266908803653, "grad_norm": 0.04227323830127716, "learning_rate": 6.20616058570406e-05, "loss": 0.13282071352005004, "step": 88990 }, { "epoch": 0.3820956011780566, "grad_norm": 0.009546788409352303, "learning_rate": 6.205729413692298e-05, "loss": 0.2698558807373047, "step": 89000 }, { "epoch": 0.3820956011780566, "eval_loss": 0.4414127767086029, "eval_runtime": 27.1823, "eval_samples_per_second": 3.679, "eval_steps_per_second": 3.679, "step": 89000 }, { "epoch": 0.38213853326807656, "grad_norm": 0.037746500223875046, "learning_rate": 6.205298241680536e-05, "loss": 0.3705944776535034, "step": 89010 }, { "epoch": 0.38218146535809655, "grad_norm": 0.18876878917217255, "learning_rate": 6.204867069668774e-05, "loss": 0.1594752550125122, "step": 89020 }, { "epoch": 0.3822243974481166, "grad_norm": 0.006270625162869692, "learning_rate": 6.204435897657011e-05, "loss": 0.0447078138589859, "step": 89030 }, { "epoch": 0.3822673295381366, "grad_norm": 13.3214111328125, "learning_rate": 6.204004725645249e-05, "loss": 0.293184232711792, "step": 89040 }, { "epoch": 0.38231026162815657, "grad_norm": 2.262474298477173, "learning_rate": 6.203573553633487e-05, "loss": 0.12684309482574463, "step": 89050 }, { "epoch": 0.3823531937181766, "grad_norm": 0.18495914340019226, "learning_rate": 6.203142381621724e-05, "loss": 0.12461878061294555, "step": 89060 }, { "epoch": 0.3823961258081966, "grad_norm": 1.989532470703125, "learning_rate": 6.202711209609962e-05, "loss": 0.3887135982513428, "step": 89070 }, { "epoch": 0.3824390578982166, "grad_norm": 0.06415251642465591, "learning_rate": 6.2022800375982e-05, "loss": 0.12190742492675781, "step": 89080 }, { "epoch": 0.3824819899882366, "grad_norm": 1.0233348608016968, "learning_rate": 6.201848865586438e-05, "loss": 0.16654955148696898, "step": 89090 }, { "epoch": 0.3825249220782566, "grad_norm": 0.013394716195762157, "learning_rate": 6.201417693574675e-05, "loss": 0.3020642757415771, "step": 89100 }, { "epoch": 0.3825678541682766, "grad_norm": 0.10267344862222672, "learning_rate": 6.200986521562913e-05, "loss": 0.18132349252700805, "step": 89110 }, { "epoch": 0.38261078625829664, "grad_norm": 1.8178410530090332, "learning_rate": 6.200555349551151e-05, "loss": 0.3368029832839966, "step": 89120 }, { "epoch": 0.38265371834831663, "grad_norm": 0.005919590126723051, "learning_rate": 6.200124177539389e-05, "loss": 0.17633817195892335, "step": 89130 }, { "epoch": 0.3826966504383366, "grad_norm": 1.393633484840393, "learning_rate": 6.199693005527626e-05, "loss": 0.4469784736633301, "step": 89140 }, { "epoch": 0.38273958252835666, "grad_norm": 0.07115230709314346, "learning_rate": 6.199261833515864e-05, "loss": 0.3376830339431763, "step": 89150 }, { "epoch": 0.38278251461837665, "grad_norm": 0.03959648311138153, "learning_rate": 6.1988306615041e-05, "loss": 0.05683208107948303, "step": 89160 }, { "epoch": 0.38282544670839663, "grad_norm": 0.031142039224505424, "learning_rate": 6.198399489492338e-05, "loss": 0.09311988949775696, "step": 89170 }, { "epoch": 0.3828683787984167, "grad_norm": 0.0646190270781517, "learning_rate": 6.197968317480576e-05, "loss": 0.4846409797668457, "step": 89180 }, { "epoch": 0.38291131088843666, "grad_norm": 0.0026598910335451365, "learning_rate": 6.197537145468814e-05, "loss": 0.3485872268676758, "step": 89190 }, { "epoch": 0.3829542429784567, "grad_norm": 0.010424236766994, "learning_rate": 6.197105973457051e-05, "loss": 0.353135085105896, "step": 89200 }, { "epoch": 0.3829971750684767, "grad_norm": 3.2976186275482178, "learning_rate": 6.196674801445289e-05, "loss": 0.1672595739364624, "step": 89210 }, { "epoch": 0.3830401071584967, "grad_norm": 0.7941388487815857, "learning_rate": 6.196243629433527e-05, "loss": 0.371187424659729, "step": 89220 }, { "epoch": 0.3830830392485167, "grad_norm": 1.3344122171401978, "learning_rate": 6.195812457421765e-05, "loss": 0.2827876806259155, "step": 89230 }, { "epoch": 0.3831259713385367, "grad_norm": 3.533451795578003, "learning_rate": 6.195381285410001e-05, "loss": 0.15025629997253417, "step": 89240 }, { "epoch": 0.3831689034285567, "grad_norm": 0.4801417589187622, "learning_rate": 6.194950113398239e-05, "loss": 0.17558751106262208, "step": 89250 }, { "epoch": 0.38321183551857674, "grad_norm": 1.5117595195770264, "learning_rate": 6.194518941386476e-05, "loss": 0.40224194526672363, "step": 89260 }, { "epoch": 0.3832547676085967, "grad_norm": 13.442787170410156, "learning_rate": 6.194087769374714e-05, "loss": 0.132839834690094, "step": 89270 }, { "epoch": 0.3832976996986167, "grad_norm": 9.266427040100098, "learning_rate": 6.193656597362952e-05, "loss": 0.38136115074157717, "step": 89280 }, { "epoch": 0.38334063178863675, "grad_norm": 0.11393710970878601, "learning_rate": 6.19322542535119e-05, "loss": 0.09291578531265259, "step": 89290 }, { "epoch": 0.38338356387865674, "grad_norm": 3.6095845699310303, "learning_rate": 6.192794253339427e-05, "loss": 0.2640878200531006, "step": 89300 }, { "epoch": 0.38342649596867673, "grad_norm": 1.8564183712005615, "learning_rate": 6.192363081327665e-05, "loss": 0.29603559970855714, "step": 89310 }, { "epoch": 0.38346942805869677, "grad_norm": 0.913407027721405, "learning_rate": 6.191931909315903e-05, "loss": 0.28334932327270507, "step": 89320 }, { "epoch": 0.38351236014871676, "grad_norm": 0.0012131475377827883, "learning_rate": 6.19150073730414e-05, "loss": 0.0755197286605835, "step": 89330 }, { "epoch": 0.38355529223873674, "grad_norm": 0.2928409278392792, "learning_rate": 6.191069565292378e-05, "loss": 0.18231843709945678, "step": 89340 }, { "epoch": 0.3835982243287568, "grad_norm": 0.03733060136437416, "learning_rate": 6.190638393280616e-05, "loss": 0.25749950408935546, "step": 89350 }, { "epoch": 0.3836411564187768, "grad_norm": 0.039106305688619614, "learning_rate": 6.190207221268854e-05, "loss": 0.26590471267700194, "step": 89360 }, { "epoch": 0.38368408850879676, "grad_norm": 1.9658153057098389, "learning_rate": 6.189776049257092e-05, "loss": 0.4718448638916016, "step": 89370 }, { "epoch": 0.3837270205988168, "grad_norm": 4.088800430297852, "learning_rate": 6.189344877245329e-05, "loss": 0.11173911094665527, "step": 89380 }, { "epoch": 0.3837699526888368, "grad_norm": 9.061023712158203, "learning_rate": 6.188913705233567e-05, "loss": 0.23896961212158202, "step": 89390 }, { "epoch": 0.3838128847788568, "grad_norm": 0.018133578822016716, "learning_rate": 6.188482533221803e-05, "loss": 0.36962740421295165, "step": 89400 }, { "epoch": 0.3838558168688768, "grad_norm": 0.03915409743785858, "learning_rate": 6.188051361210041e-05, "loss": 0.12740023136138917, "step": 89410 }, { "epoch": 0.3838987489588968, "grad_norm": 0.032663632184267044, "learning_rate": 6.187620189198279e-05, "loss": 0.15168129205703734, "step": 89420 }, { "epoch": 0.38394168104891685, "grad_norm": 0.005301196593791246, "learning_rate": 6.187189017186517e-05, "loss": 0.04259181320667267, "step": 89430 }, { "epoch": 0.38398461313893684, "grad_norm": 0.01621132902801037, "learning_rate": 6.186757845174754e-05, "loss": 0.3094786167144775, "step": 89440 }, { "epoch": 0.3840275452289568, "grad_norm": 0.007388011552393436, "learning_rate": 6.186326673162992e-05, "loss": 0.17484718561172485, "step": 89450 }, { "epoch": 0.38407047731897687, "grad_norm": 0.06491857022047043, "learning_rate": 6.18589550115123e-05, "loss": 0.22071504592895508, "step": 89460 }, { "epoch": 0.38411340940899685, "grad_norm": 0.08055774867534637, "learning_rate": 6.185464329139468e-05, "loss": 0.18757699728012084, "step": 89470 }, { "epoch": 0.38415634149901684, "grad_norm": 0.2903849482536316, "learning_rate": 6.185033157127705e-05, "loss": 0.21719033718109132, "step": 89480 }, { "epoch": 0.3841992735890369, "grad_norm": 0.0042831008322536945, "learning_rate": 6.184601985115942e-05, "loss": 0.35606064796447756, "step": 89490 }, { "epoch": 0.38424220567905687, "grad_norm": 0.009732181206345558, "learning_rate": 6.18417081310418e-05, "loss": 0.13790091276168823, "step": 89500 }, { "epoch": 0.38428513776907686, "grad_norm": 0.02407350391149521, "learning_rate": 6.183739641092417e-05, "loss": 0.19972857236862182, "step": 89510 }, { "epoch": 0.3843280698590969, "grad_norm": 0.003901082556694746, "learning_rate": 6.183308469080655e-05, "loss": 0.23962645530700682, "step": 89520 }, { "epoch": 0.3843710019491169, "grad_norm": 4.334282398223877, "learning_rate": 6.182877297068893e-05, "loss": 0.1551468014717102, "step": 89530 }, { "epoch": 0.3844139340391369, "grad_norm": 0.1462079882621765, "learning_rate": 6.18244612505713e-05, "loss": 0.2502715826034546, "step": 89540 }, { "epoch": 0.3844568661291569, "grad_norm": 0.002122892765328288, "learning_rate": 6.182014953045368e-05, "loss": 0.24722750186920167, "step": 89550 }, { "epoch": 0.3844997982191769, "grad_norm": 0.08925088495016098, "learning_rate": 6.181583781033606e-05, "loss": 0.17703983783721924, "step": 89560 }, { "epoch": 0.3845427303091969, "grad_norm": 0.19102783501148224, "learning_rate": 6.181152609021844e-05, "loss": 0.269983696937561, "step": 89570 }, { "epoch": 0.38458566239921693, "grad_norm": 0.05688232555985451, "learning_rate": 6.180721437010081e-05, "loss": 0.2018204689025879, "step": 89580 }, { "epoch": 0.3846285944892369, "grad_norm": 0.09904558211565018, "learning_rate": 6.180290264998319e-05, "loss": 0.12981395721435546, "step": 89590 }, { "epoch": 0.3846715265792569, "grad_norm": 0.1914941668510437, "learning_rate": 6.179859092986557e-05, "loss": 0.12313872575759888, "step": 89600 }, { "epoch": 0.38471445866927695, "grad_norm": 0.019617699086666107, "learning_rate": 6.179427920974794e-05, "loss": 0.1696901798248291, "step": 89610 }, { "epoch": 0.38475739075929694, "grad_norm": 0.008060222491621971, "learning_rate": 6.178996748963032e-05, "loss": 0.2816110610961914, "step": 89620 }, { "epoch": 0.384800322849317, "grad_norm": 0.03914284333586693, "learning_rate": 6.17856557695127e-05, "loss": 0.017223869264125825, "step": 89630 }, { "epoch": 0.38484325493933697, "grad_norm": 0.02014545537531376, "learning_rate": 6.178134404939508e-05, "loss": 0.07916730642318726, "step": 89640 }, { "epoch": 0.38488618702935695, "grad_norm": 0.474691241979599, "learning_rate": 6.177703232927744e-05, "loss": 0.2577148675918579, "step": 89650 }, { "epoch": 0.384929119119377, "grad_norm": 0.13608905673027039, "learning_rate": 6.177272060915982e-05, "loss": 0.1876443147659302, "step": 89660 }, { "epoch": 0.384972051209397, "grad_norm": 0.0023101787082850933, "learning_rate": 6.17684088890422e-05, "loss": 0.33298826217651367, "step": 89670 }, { "epoch": 0.38501498329941697, "grad_norm": 1.3062105178833008, "learning_rate": 6.176409716892457e-05, "loss": 0.1542769193649292, "step": 89680 }, { "epoch": 0.385057915389437, "grad_norm": 0.005330412648618221, "learning_rate": 6.175978544880695e-05, "loss": 0.23672285079956054, "step": 89690 }, { "epoch": 0.385100847479457, "grad_norm": 1.3016077280044556, "learning_rate": 6.175547372868933e-05, "loss": 0.2891198396682739, "step": 89700 }, { "epoch": 0.385143779569477, "grad_norm": 0.10209011286497116, "learning_rate": 6.17511620085717e-05, "loss": 0.14344640970230102, "step": 89710 }, { "epoch": 0.385186711659497, "grad_norm": 0.4255737364292145, "learning_rate": 6.174685028845408e-05, "loss": 0.29021642208099363, "step": 89720 }, { "epoch": 0.385229643749517, "grad_norm": 0.11061238497495651, "learning_rate": 6.174253856833645e-05, "loss": 0.17971653938293458, "step": 89730 }, { "epoch": 0.385272575839537, "grad_norm": 0.034988872706890106, "learning_rate": 6.173822684821882e-05, "loss": 0.1534808397293091, "step": 89740 }, { "epoch": 0.38531550792955704, "grad_norm": 1.2242257595062256, "learning_rate": 6.17339151281012e-05, "loss": 0.41945724487304686, "step": 89750 }, { "epoch": 0.38535844001957703, "grad_norm": 0.007697496097534895, "learning_rate": 6.172960340798358e-05, "loss": 0.3344566822052002, "step": 89760 }, { "epoch": 0.385401372109597, "grad_norm": 6.822228908538818, "learning_rate": 6.172529168786595e-05, "loss": 0.3244169235229492, "step": 89770 }, { "epoch": 0.38544430419961706, "grad_norm": 2.186828374862671, "learning_rate": 6.172097996774833e-05, "loss": 0.4501382827758789, "step": 89780 }, { "epoch": 0.38548723628963705, "grad_norm": 0.2173774540424347, "learning_rate": 6.171666824763071e-05, "loss": 0.23860557079315187, "step": 89790 }, { "epoch": 0.38553016837965703, "grad_norm": 0.04658151790499687, "learning_rate": 6.171235652751309e-05, "loss": 0.1996417760848999, "step": 89800 }, { "epoch": 0.3855731004696771, "grad_norm": 0.02228648215532303, "learning_rate": 6.170804480739546e-05, "loss": 0.22978696823120118, "step": 89810 }, { "epoch": 0.38561603255969706, "grad_norm": 2.033644437789917, "learning_rate": 6.170373308727784e-05, "loss": 0.2479996681213379, "step": 89820 }, { "epoch": 0.38565896464971705, "grad_norm": 0.8258361220359802, "learning_rate": 6.169942136716022e-05, "loss": 0.3214423656463623, "step": 89830 }, { "epoch": 0.3857018967397371, "grad_norm": 0.01886231079697609, "learning_rate": 6.16951096470426e-05, "loss": 0.31837873458862304, "step": 89840 }, { "epoch": 0.3857448288297571, "grad_norm": 2.3415749073028564, "learning_rate": 6.169079792692497e-05, "loss": 0.08363971710205079, "step": 89850 }, { "epoch": 0.3857877609197771, "grad_norm": 0.0015655852621421218, "learning_rate": 6.168648620680735e-05, "loss": 0.004582761600613594, "step": 89860 }, { "epoch": 0.3858306930097971, "grad_norm": 0.011474518105387688, "learning_rate": 6.168217448668973e-05, "loss": 0.32756221294403076, "step": 89870 }, { "epoch": 0.3858736250998171, "grad_norm": 17.503582000732422, "learning_rate": 6.16778627665721e-05, "loss": 0.11809229850769043, "step": 89880 }, { "epoch": 0.38591655718983714, "grad_norm": 0.06740511953830719, "learning_rate": 6.167355104645448e-05, "loss": 0.19865074157714843, "step": 89890 }, { "epoch": 0.3859594892798571, "grad_norm": 1.3640791177749634, "learning_rate": 6.166923932633685e-05, "loss": 0.5600822448730469, "step": 89900 }, { "epoch": 0.3860024213698771, "grad_norm": 0.012692203745245934, "learning_rate": 6.166492760621922e-05, "loss": 0.1536575198173523, "step": 89910 }, { "epoch": 0.38604535345989716, "grad_norm": 2.2844793796539307, "learning_rate": 6.16606158861016e-05, "loss": 0.34578001499176025, "step": 89920 }, { "epoch": 0.38608828554991714, "grad_norm": 7.917400360107422, "learning_rate": 6.165630416598398e-05, "loss": 0.2631634473800659, "step": 89930 }, { "epoch": 0.38613121763993713, "grad_norm": 2.6729063987731934, "learning_rate": 6.165199244586636e-05, "loss": 0.2432703971862793, "step": 89940 }, { "epoch": 0.3861741497299572, "grad_norm": 0.0074627818539738655, "learning_rate": 6.164768072574873e-05, "loss": 0.11718699932098389, "step": 89950 }, { "epoch": 0.38621708181997716, "grad_norm": 0.6019059419631958, "learning_rate": 6.164336900563111e-05, "loss": 0.3247262716293335, "step": 89960 }, { "epoch": 0.38626001390999715, "grad_norm": 0.9982324838638306, "learning_rate": 6.163905728551349e-05, "loss": 0.22884700298309327, "step": 89970 }, { "epoch": 0.3863029460000172, "grad_norm": 0.01680828258395195, "learning_rate": 6.163474556539585e-05, "loss": 0.16624144315719605, "step": 89980 }, { "epoch": 0.3863458780900372, "grad_norm": 0.020545918494462967, "learning_rate": 6.163043384527823e-05, "loss": 0.13353365659713745, "step": 89990 }, { "epoch": 0.38638881018005716, "grad_norm": 0.009240344166755676, "learning_rate": 6.16261221251606e-05, "loss": 0.28017189502716067, "step": 90000 }, { "epoch": 0.38638881018005716, "eval_loss": 0.42577657103538513, "eval_runtime": 27.1328, "eval_samples_per_second": 3.686, "eval_steps_per_second": 3.686, "step": 90000 }, { "epoch": 0.3864317422700772, "grad_norm": 2.90472149848938, "learning_rate": 6.162181040504298e-05, "loss": 0.10356920957565308, "step": 90010 }, { "epoch": 0.3864746743600972, "grad_norm": 0.43290838599205017, "learning_rate": 6.161749868492536e-05, "loss": 0.1836892247200012, "step": 90020 }, { "epoch": 0.3865176064501172, "grad_norm": 0.1611838936805725, "learning_rate": 6.161318696480774e-05, "loss": 0.18457709550857543, "step": 90030 }, { "epoch": 0.3865605385401372, "grad_norm": 0.20687133073806763, "learning_rate": 6.160887524469013e-05, "loss": 0.2525459051132202, "step": 90040 }, { "epoch": 0.3866034706301572, "grad_norm": 0.10173666477203369, "learning_rate": 6.160456352457251e-05, "loss": 0.18307244777679443, "step": 90050 }, { "epoch": 0.38664640272017725, "grad_norm": 0.30298879742622375, "learning_rate": 6.160025180445487e-05, "loss": 0.2733901500701904, "step": 90060 }, { "epoch": 0.38668933481019724, "grad_norm": 3.4610722064971924, "learning_rate": 6.159594008433725e-05, "loss": 0.238478422164917, "step": 90070 }, { "epoch": 0.3867322669002172, "grad_norm": 18.95323944091797, "learning_rate": 6.159162836421963e-05, "loss": 0.1477491021156311, "step": 90080 }, { "epoch": 0.38677519899023727, "grad_norm": 0.0035032792948186398, "learning_rate": 6.1587316644102e-05, "loss": 0.22136282920837402, "step": 90090 }, { "epoch": 0.38681813108025725, "grad_norm": 0.27647852897644043, "learning_rate": 6.158300492398438e-05, "loss": 0.1641569495201111, "step": 90100 }, { "epoch": 0.38686106317027724, "grad_norm": 0.33862271904945374, "learning_rate": 6.157869320386676e-05, "loss": 0.31995439529418945, "step": 90110 }, { "epoch": 0.3869039952602973, "grad_norm": 0.6005828976631165, "learning_rate": 6.157438148374913e-05, "loss": 0.26249744892120364, "step": 90120 }, { "epoch": 0.38694692735031727, "grad_norm": 0.013982965610921383, "learning_rate": 6.157006976363151e-05, "loss": 0.16258199214935304, "step": 90130 }, { "epoch": 0.38698985944033726, "grad_norm": 0.46348854899406433, "learning_rate": 6.156575804351388e-05, "loss": 0.19305338859558105, "step": 90140 }, { "epoch": 0.3870327915303573, "grad_norm": 3.7916457653045654, "learning_rate": 6.156144632339625e-05, "loss": 0.40165019035339355, "step": 90150 }, { "epoch": 0.3870757236203773, "grad_norm": 0.041253313422203064, "learning_rate": 6.155713460327863e-05, "loss": 0.027036473155021667, "step": 90160 }, { "epoch": 0.3871186557103973, "grad_norm": 0.05210256576538086, "learning_rate": 6.155282288316101e-05, "loss": 0.26577320098876955, "step": 90170 }, { "epoch": 0.3871615878004173, "grad_norm": 0.48661676049232483, "learning_rate": 6.154851116304339e-05, "loss": 0.2865142822265625, "step": 90180 }, { "epoch": 0.3872045198904373, "grad_norm": 1.0749396085739136, "learning_rate": 6.154419944292576e-05, "loss": 0.2049044370651245, "step": 90190 }, { "epoch": 0.3872474519804573, "grad_norm": 0.06901497393846512, "learning_rate": 6.153988772280814e-05, "loss": 0.07353252768516541, "step": 90200 }, { "epoch": 0.38729038407047733, "grad_norm": 0.007591388188302517, "learning_rate": 6.153557600269052e-05, "loss": 0.22325377464294432, "step": 90210 }, { "epoch": 0.3873333161604973, "grad_norm": 0.03437507525086403, "learning_rate": 6.15312642825729e-05, "loss": 0.22393579483032228, "step": 90220 }, { "epoch": 0.3873762482505173, "grad_norm": 0.022142138332128525, "learning_rate": 6.152695256245526e-05, "loss": 0.26709728240966796, "step": 90230 }, { "epoch": 0.38741918034053735, "grad_norm": 0.015263757668435574, "learning_rate": 6.152264084233764e-05, "loss": 0.39098849296569826, "step": 90240 }, { "epoch": 0.38746211243055734, "grad_norm": 0.131820410490036, "learning_rate": 6.151832912222001e-05, "loss": 0.4499326229095459, "step": 90250 }, { "epoch": 0.3875050445205773, "grad_norm": 0.005828079301863909, "learning_rate": 6.15140174021024e-05, "loss": 0.20458014011383058, "step": 90260 }, { "epoch": 0.38754797661059737, "grad_norm": 0.16223856806755066, "learning_rate": 6.150970568198478e-05, "loss": 0.24097530841827391, "step": 90270 }, { "epoch": 0.38759090870061735, "grad_norm": 1.206310749053955, "learning_rate": 6.150539396186716e-05, "loss": 0.21269071102142334, "step": 90280 }, { "epoch": 0.3876338407906374, "grad_norm": 1.7968086004257202, "learning_rate": 6.150108224174954e-05, "loss": 0.18731291294097902, "step": 90290 }, { "epoch": 0.3876767728806574, "grad_norm": 1.2903820276260376, "learning_rate": 6.149677052163191e-05, "loss": 0.22080676555633544, "step": 90300 }, { "epoch": 0.38771970497067737, "grad_norm": 0.005241678096354008, "learning_rate": 6.149245880151428e-05, "loss": 0.06120513081550598, "step": 90310 }, { "epoch": 0.3877626370606974, "grad_norm": 0.29554280638694763, "learning_rate": 6.148814708139665e-05, "loss": 0.08351311683654786, "step": 90320 }, { "epoch": 0.3878055691507174, "grad_norm": 0.0033163491170853376, "learning_rate": 6.148383536127903e-05, "loss": 0.13242911100387572, "step": 90330 }, { "epoch": 0.3878485012407374, "grad_norm": 0.016962099820375443, "learning_rate": 6.147952364116141e-05, "loss": 0.22619824409484862, "step": 90340 }, { "epoch": 0.38789143333075743, "grad_norm": 0.5038765668869019, "learning_rate": 6.147521192104379e-05, "loss": 0.11690418720245362, "step": 90350 }, { "epoch": 0.3879343654207774, "grad_norm": 0.136198490858078, "learning_rate": 6.147090020092616e-05, "loss": 0.060647428035736084, "step": 90360 }, { "epoch": 0.3879772975107974, "grad_norm": 0.01311681792140007, "learning_rate": 6.146658848080854e-05, "loss": 0.26727359294891356, "step": 90370 }, { "epoch": 0.38802022960081745, "grad_norm": 5.027169227600098, "learning_rate": 6.146227676069092e-05, "loss": 0.2338623046875, "step": 90380 }, { "epoch": 0.38806316169083743, "grad_norm": 0.007105580065399408, "learning_rate": 6.145796504057328e-05, "loss": 0.1273871660232544, "step": 90390 }, { "epoch": 0.3881060937808574, "grad_norm": 5.178821563720703, "learning_rate": 6.145365332045566e-05, "loss": 0.3393435001373291, "step": 90400 }, { "epoch": 0.38814902587087746, "grad_norm": 0.008485247381031513, "learning_rate": 6.144934160033804e-05, "loss": 0.2772747278213501, "step": 90410 }, { "epoch": 0.38819195796089745, "grad_norm": 2.125918388366699, "learning_rate": 6.144502988022041e-05, "loss": 0.6011878967285156, "step": 90420 }, { "epoch": 0.38823489005091744, "grad_norm": 3.208209753036499, "learning_rate": 6.144071816010279e-05, "loss": 0.14225001335144044, "step": 90430 }, { "epoch": 0.3882778221409375, "grad_norm": 2.0369322299957275, "learning_rate": 6.143640643998517e-05, "loss": 0.2426603078842163, "step": 90440 }, { "epoch": 0.38832075423095747, "grad_norm": 6.514814853668213, "learning_rate": 6.143209471986755e-05, "loss": 0.2078092575073242, "step": 90450 }, { "epoch": 0.38836368632097745, "grad_norm": 0.1408424824476242, "learning_rate": 6.142778299974992e-05, "loss": 0.22487127780914307, "step": 90460 }, { "epoch": 0.3884066184109975, "grad_norm": 0.9192783236503601, "learning_rate": 6.142347127963229e-05, "loss": 0.32507715225219724, "step": 90470 }, { "epoch": 0.3884495505010175, "grad_norm": 0.02033153735101223, "learning_rate": 6.141915955951468e-05, "loss": 0.09576951265335083, "step": 90480 }, { "epoch": 0.3884924825910375, "grad_norm": 0.005774068646132946, "learning_rate": 6.141484783939706e-05, "loss": 0.32229766845703123, "step": 90490 }, { "epoch": 0.3885354146810575, "grad_norm": 2.699652910232544, "learning_rate": 6.141053611927943e-05, "loss": 0.34198732376098634, "step": 90500 }, { "epoch": 0.3885783467710775, "grad_norm": 0.06830363720655441, "learning_rate": 6.140622439916181e-05, "loss": 0.23384521007537842, "step": 90510 }, { "epoch": 0.38862127886109754, "grad_norm": 1.3438990116119385, "learning_rate": 6.140191267904419e-05, "loss": 0.35765848159790037, "step": 90520 }, { "epoch": 0.38866421095111753, "grad_norm": 1.0712690353393555, "learning_rate": 6.139760095892657e-05, "loss": 0.2655247926712036, "step": 90530 }, { "epoch": 0.3887071430411375, "grad_norm": 0.019178811460733414, "learning_rate": 6.139328923880894e-05, "loss": 0.0837427020072937, "step": 90540 }, { "epoch": 0.38875007513115756, "grad_norm": 1.6567952632904053, "learning_rate": 6.138897751869132e-05, "loss": 0.29982790946960447, "step": 90550 }, { "epoch": 0.38879300722117754, "grad_norm": 1.4304605722427368, "learning_rate": 6.138466579857368e-05, "loss": 0.3659378528594971, "step": 90560 }, { "epoch": 0.38883593931119753, "grad_norm": 0.2591516673564911, "learning_rate": 6.138035407845606e-05, "loss": 0.20934972763061524, "step": 90570 }, { "epoch": 0.3888788714012176, "grad_norm": 43.77493667602539, "learning_rate": 6.137604235833844e-05, "loss": 0.2530338764190674, "step": 90580 }, { "epoch": 0.38892180349123756, "grad_norm": 1.3299733400344849, "learning_rate": 6.137173063822082e-05, "loss": 0.16540772914886476, "step": 90590 }, { "epoch": 0.38896473558125755, "grad_norm": 1.6994363069534302, "learning_rate": 6.136741891810319e-05, "loss": 0.3406243324279785, "step": 90600 }, { "epoch": 0.3890076676712776, "grad_norm": 1.222379207611084, "learning_rate": 6.136310719798557e-05, "loss": 0.2493795394897461, "step": 90610 }, { "epoch": 0.3890505997612976, "grad_norm": 1.4869258403778076, "learning_rate": 6.135879547786795e-05, "loss": 0.3182868242263794, "step": 90620 }, { "epoch": 0.38909353185131756, "grad_norm": 0.08125866949558258, "learning_rate": 6.135448375775033e-05, "loss": 0.03489781022071838, "step": 90630 }, { "epoch": 0.3891364639413376, "grad_norm": 1.8703261613845825, "learning_rate": 6.135017203763269e-05, "loss": 0.2631047487258911, "step": 90640 }, { "epoch": 0.3891793960313576, "grad_norm": 0.9088125228881836, "learning_rate": 6.134586031751507e-05, "loss": 0.2439821720123291, "step": 90650 }, { "epoch": 0.3892223281213776, "grad_norm": 4.052822113037109, "learning_rate": 6.134154859739744e-05, "loss": 0.1447490930557251, "step": 90660 }, { "epoch": 0.3892652602113976, "grad_norm": 0.35810771584510803, "learning_rate": 6.133723687727982e-05, "loss": 0.2471384048461914, "step": 90670 }, { "epoch": 0.3893081923014176, "grad_norm": 0.06162268668413162, "learning_rate": 6.13329251571622e-05, "loss": 0.1883344292640686, "step": 90680 }, { "epoch": 0.3893511243914376, "grad_norm": 1.449040412902832, "learning_rate": 6.132861343704458e-05, "loss": 0.18545243740081788, "step": 90690 }, { "epoch": 0.38939405648145764, "grad_norm": 0.01574113965034485, "learning_rate": 6.132430171692695e-05, "loss": 0.27572879791259763, "step": 90700 }, { "epoch": 0.3894369885714776, "grad_norm": 0.47327515482902527, "learning_rate": 6.131998999680933e-05, "loss": 0.06660547256469726, "step": 90710 }, { "epoch": 0.38947992066149767, "grad_norm": 2.003725290298462, "learning_rate": 6.131567827669171e-05, "loss": 0.23638532161712647, "step": 90720 }, { "epoch": 0.38952285275151766, "grad_norm": 0.05184811353683472, "learning_rate": 6.131136655657408e-05, "loss": 0.23077709674835206, "step": 90730 }, { "epoch": 0.38956578484153764, "grad_norm": 0.01105536986142397, "learning_rate": 6.130705483645646e-05, "loss": 0.3030509948730469, "step": 90740 }, { "epoch": 0.3896087169315577, "grad_norm": 0.7250270247459412, "learning_rate": 6.130274311633884e-05, "loss": 0.11001847982406616, "step": 90750 }, { "epoch": 0.3896516490215777, "grad_norm": 7.975006580352783, "learning_rate": 6.129843139622122e-05, "loss": 0.3358157634735107, "step": 90760 }, { "epoch": 0.38969458111159766, "grad_norm": 0.15005670487880707, "learning_rate": 6.12941196761036e-05, "loss": 0.19826846122741698, "step": 90770 }, { "epoch": 0.3897375132016177, "grad_norm": 0.12587501108646393, "learning_rate": 6.128980795598597e-05, "loss": 0.17790353298187256, "step": 90780 }, { "epoch": 0.3897804452916377, "grad_norm": 0.02037876844406128, "learning_rate": 6.128549623586835e-05, "loss": 0.20419929027557374, "step": 90790 }, { "epoch": 0.3898233773816577, "grad_norm": 0.016147956252098083, "learning_rate": 6.128118451575071e-05, "loss": 0.1969937801361084, "step": 90800 }, { "epoch": 0.3898663094716777, "grad_norm": 0.711434006690979, "learning_rate": 6.127687279563309e-05, "loss": 0.17485193014144898, "step": 90810 }, { "epoch": 0.3899092415616977, "grad_norm": 0.005552069749683142, "learning_rate": 6.127256107551547e-05, "loss": 0.33175613880157473, "step": 90820 }, { "epoch": 0.3899521736517177, "grad_norm": 0.00681539298966527, "learning_rate": 6.126824935539784e-05, "loss": 0.11351083517074585, "step": 90830 }, { "epoch": 0.38999510574173774, "grad_norm": 0.006817379966378212, "learning_rate": 6.126393763528022e-05, "loss": 0.25123019218444825, "step": 90840 }, { "epoch": 0.3900380378317577, "grad_norm": 0.02623910829424858, "learning_rate": 6.12596259151626e-05, "loss": 0.26213512420654295, "step": 90850 }, { "epoch": 0.3900809699217777, "grad_norm": 21.62917137145996, "learning_rate": 6.125531419504498e-05, "loss": 0.27692012786865233, "step": 90860 }, { "epoch": 0.39012390201179775, "grad_norm": 0.008832416497170925, "learning_rate": 6.125100247492735e-05, "loss": 0.2713014602661133, "step": 90870 }, { "epoch": 0.39016683410181774, "grad_norm": 0.0016400536987930536, "learning_rate": 6.124669075480972e-05, "loss": 0.17731016874313354, "step": 90880 }, { "epoch": 0.3902097661918377, "grad_norm": 0.11445105075836182, "learning_rate": 6.12423790346921e-05, "loss": 0.3271883487701416, "step": 90890 }, { "epoch": 0.39025269828185777, "grad_norm": 0.06182974949479103, "learning_rate": 6.123806731457447e-05, "loss": 0.09882626533508301, "step": 90900 }, { "epoch": 0.39029563037187776, "grad_norm": 0.010938971303403378, "learning_rate": 6.123375559445685e-05, "loss": 0.19311379194259642, "step": 90910 }, { "epoch": 0.3903385624618978, "grad_norm": 0.3110086917877197, "learning_rate": 6.122944387433923e-05, "loss": 0.2579014301300049, "step": 90920 }, { "epoch": 0.3903814945519178, "grad_norm": 0.0062742093577980995, "learning_rate": 6.12251321542216e-05, "loss": 0.12250322103500366, "step": 90930 }, { "epoch": 0.39042442664193777, "grad_norm": 0.3339044153690338, "learning_rate": 6.122082043410398e-05, "loss": 0.38106014728546145, "step": 90940 }, { "epoch": 0.3904673587319578, "grad_norm": 0.5673826932907104, "learning_rate": 6.121650871398636e-05, "loss": 0.26010856628417967, "step": 90950 }, { "epoch": 0.3905102908219778, "grad_norm": 0.878169596195221, "learning_rate": 6.121219699386874e-05, "loss": 0.27169249057769773, "step": 90960 }, { "epoch": 0.3905532229119978, "grad_norm": 0.04031570255756378, "learning_rate": 6.120788527375111e-05, "loss": 0.05566856265068054, "step": 90970 }, { "epoch": 0.39059615500201783, "grad_norm": 0.007886065170168877, "learning_rate": 6.120357355363349e-05, "loss": 0.004305092990398407, "step": 90980 }, { "epoch": 0.3906390870920378, "grad_norm": 0.11100518703460693, "learning_rate": 6.119926183351587e-05, "loss": 0.09936892390251159, "step": 90990 }, { "epoch": 0.3906820191820578, "grad_norm": 1.3127731084823608, "learning_rate": 6.119495011339825e-05, "loss": 0.18303449153900148, "step": 91000 }, { "epoch": 0.3906820191820578, "eval_loss": 0.42876288294792175, "eval_runtime": 27.0908, "eval_samples_per_second": 3.691, "eval_steps_per_second": 3.691, "step": 91000 }, { "epoch": 0.39072495127207785, "grad_norm": 0.011186370626091957, "learning_rate": 6.119063839328062e-05, "loss": 0.1739388585090637, "step": 91010 }, { "epoch": 0.39076788336209783, "grad_norm": 0.18774914741516113, "learning_rate": 6.1186326673163e-05, "loss": 0.23218233585357667, "step": 91020 }, { "epoch": 0.3908108154521178, "grad_norm": 0.02326073683798313, "learning_rate": 6.118201495304538e-05, "loss": 0.18793890476226807, "step": 91030 }, { "epoch": 0.39085374754213786, "grad_norm": 0.026896804571151733, "learning_rate": 6.117770323292776e-05, "loss": 0.11842405796051025, "step": 91040 }, { "epoch": 0.39089667963215785, "grad_norm": 5.239316940307617, "learning_rate": 6.117339151281012e-05, "loss": 0.3570317983627319, "step": 91050 }, { "epoch": 0.39093961172217784, "grad_norm": 0.013899068348109722, "learning_rate": 6.11690797926925e-05, "loss": 0.09054339528083802, "step": 91060 }, { "epoch": 0.3909825438121979, "grad_norm": 0.0057420190423727036, "learning_rate": 6.116476807257487e-05, "loss": 0.045521339774131774, "step": 91070 }, { "epoch": 0.39102547590221787, "grad_norm": 0.033044878393411636, "learning_rate": 6.116045635245725e-05, "loss": 0.08407972455024719, "step": 91080 }, { "epoch": 0.39106840799223785, "grad_norm": 0.008280238136649132, "learning_rate": 6.115614463233963e-05, "loss": 0.1877911329269409, "step": 91090 }, { "epoch": 0.3911113400822579, "grad_norm": 0.0190195944160223, "learning_rate": 6.1151832912222e-05, "loss": 0.12840522527694703, "step": 91100 }, { "epoch": 0.3911542721722779, "grad_norm": 0.006106968969106674, "learning_rate": 6.114752119210438e-05, "loss": 0.09369821548461914, "step": 91110 }, { "epoch": 0.39119720426229787, "grad_norm": 46.82925796508789, "learning_rate": 6.114320947198676e-05, "loss": 0.1664884328842163, "step": 91120 }, { "epoch": 0.3912401363523179, "grad_norm": 0.0006028416682966053, "learning_rate": 6.113889775186912e-05, "loss": 0.20093607902526855, "step": 91130 }, { "epoch": 0.3912830684423379, "grad_norm": 3.5816397666931152, "learning_rate": 6.11345860317515e-05, "loss": 0.3592935800552368, "step": 91140 }, { "epoch": 0.39132600053235794, "grad_norm": 2.122619867324829, "learning_rate": 6.113027431163388e-05, "loss": 0.14562952518463135, "step": 91150 }, { "epoch": 0.39136893262237793, "grad_norm": 0.0007185607682913542, "learning_rate": 6.112596259151626e-05, "loss": 0.1464229106903076, "step": 91160 }, { "epoch": 0.3914118647123979, "grad_norm": 0.10905706137418747, "learning_rate": 6.112165087139863e-05, "loss": 0.11112669706344605, "step": 91170 }, { "epoch": 0.39145479680241796, "grad_norm": 0.8562988638877869, "learning_rate": 6.111733915128101e-05, "loss": 0.2025907516479492, "step": 91180 }, { "epoch": 0.39149772889243795, "grad_norm": 0.01761394925415516, "learning_rate": 6.111302743116339e-05, "loss": 0.10299869775772094, "step": 91190 }, { "epoch": 0.39154066098245793, "grad_norm": 2.9216980934143066, "learning_rate": 6.110871571104577e-05, "loss": 0.25521130561828614, "step": 91200 }, { "epoch": 0.391583593072478, "grad_norm": 0.001002555713057518, "learning_rate": 6.110440399092814e-05, "loss": 0.15010665655136107, "step": 91210 }, { "epoch": 0.39162652516249796, "grad_norm": 0.005722669418901205, "learning_rate": 6.110009227081052e-05, "loss": 0.2383742332458496, "step": 91220 }, { "epoch": 0.39166945725251795, "grad_norm": 1.7847390174865723, "learning_rate": 6.10957805506929e-05, "loss": 0.19723060131072997, "step": 91230 }, { "epoch": 0.391712389342538, "grad_norm": 0.0781240239739418, "learning_rate": 6.109146883057528e-05, "loss": 0.2860633373260498, "step": 91240 }, { "epoch": 0.391755321432558, "grad_norm": 3.915970802307129, "learning_rate": 6.108715711045765e-05, "loss": 0.12680907249450685, "step": 91250 }, { "epoch": 0.39179825352257797, "grad_norm": 0.037515632808208466, "learning_rate": 6.108284539034003e-05, "loss": 0.3318891763687134, "step": 91260 }, { "epoch": 0.391841185612598, "grad_norm": 1.438825249671936, "learning_rate": 6.107853367022241e-05, "loss": 0.27229154109954834, "step": 91270 }, { "epoch": 0.391884117702618, "grad_norm": 1.8069391250610352, "learning_rate": 6.107422195010478e-05, "loss": 0.135508930683136, "step": 91280 }, { "epoch": 0.391927049792638, "grad_norm": 0.00529401283711195, "learning_rate": 6.106991022998716e-05, "loss": 0.3083492755889893, "step": 91290 }, { "epoch": 0.391969981882658, "grad_norm": 0.45584940910339355, "learning_rate": 6.106559850986953e-05, "loss": 0.25160813331604004, "step": 91300 }, { "epoch": 0.392012913972678, "grad_norm": 0.9806048274040222, "learning_rate": 6.10612867897519e-05, "loss": 0.20746960639953613, "step": 91310 }, { "epoch": 0.392055846062698, "grad_norm": 0.007097348570823669, "learning_rate": 6.105697506963428e-05, "loss": 0.18413705825805665, "step": 91320 }, { "epoch": 0.39209877815271804, "grad_norm": 0.017154572531580925, "learning_rate": 6.105266334951666e-05, "loss": 0.17170287370681764, "step": 91330 }, { "epoch": 0.39214171024273803, "grad_norm": 5.822543621063232, "learning_rate": 6.104835162939904e-05, "loss": 0.15484896898269654, "step": 91340 }, { "epoch": 0.39218464233275807, "grad_norm": 0.002759866416454315, "learning_rate": 6.104403990928141e-05, "loss": 0.0738214910030365, "step": 91350 }, { "epoch": 0.39222757442277806, "grad_norm": 0.6158252358436584, "learning_rate": 6.103972818916379e-05, "loss": 0.08194655179977417, "step": 91360 }, { "epoch": 0.39227050651279805, "grad_norm": 0.1930149346590042, "learning_rate": 6.103541646904617e-05, "loss": 0.15161114931106567, "step": 91370 }, { "epoch": 0.3923134386028181, "grad_norm": 1.6960185766220093, "learning_rate": 6.103110474892854e-05, "loss": 0.2454068899154663, "step": 91380 }, { "epoch": 0.3923563706928381, "grad_norm": 0.28160202503204346, "learning_rate": 6.1026793028810915e-05, "loss": 0.158779513835907, "step": 91390 }, { "epoch": 0.39239930278285806, "grad_norm": 0.050931766629219055, "learning_rate": 6.102248130869329e-05, "loss": 0.12442408800125122, "step": 91400 }, { "epoch": 0.3924422348728781, "grad_norm": 1.0036672353744507, "learning_rate": 6.101816958857567e-05, "loss": 0.1712067246437073, "step": 91410 }, { "epoch": 0.3924851669628981, "grad_norm": 0.1027325913310051, "learning_rate": 6.101385786845805e-05, "loss": 0.4437578201293945, "step": 91420 }, { "epoch": 0.3925280990529181, "grad_norm": 1.9295845031738281, "learning_rate": 6.1009546148340424e-05, "loss": 0.4104002475738525, "step": 91430 }, { "epoch": 0.3925710311429381, "grad_norm": 0.3841754198074341, "learning_rate": 6.10052344282228e-05, "loss": 0.37738420963287356, "step": 91440 }, { "epoch": 0.3926139632329581, "grad_norm": 0.07138053327798843, "learning_rate": 6.100092270810518e-05, "loss": 0.06622268557548523, "step": 91450 }, { "epoch": 0.3926568953229781, "grad_norm": 0.012696065939962864, "learning_rate": 6.099661098798754e-05, "loss": 0.1565432906150818, "step": 91460 }, { "epoch": 0.39269982741299814, "grad_norm": 8.017718315124512, "learning_rate": 6.099229926786992e-05, "loss": 0.20162699222564698, "step": 91470 }, { "epoch": 0.3927427595030181, "grad_norm": 0.8579626679420471, "learning_rate": 6.0987987547752304e-05, "loss": 0.29092042446136473, "step": 91480 }, { "epoch": 0.3927856915930381, "grad_norm": 1.805871605873108, "learning_rate": 6.098367582763468e-05, "loss": 0.34696040153503416, "step": 91490 }, { "epoch": 0.39282862368305815, "grad_norm": 0.008364181965589523, "learning_rate": 6.097936410751706e-05, "loss": 0.24159841537475585, "step": 91500 }, { "epoch": 0.39287155577307814, "grad_norm": 0.0062538920901715755, "learning_rate": 6.0975052387399436e-05, "loss": 0.211861252784729, "step": 91510 }, { "epoch": 0.3929144878630981, "grad_norm": 3.8626821041107178, "learning_rate": 6.0970740667281814e-05, "loss": 0.23252692222595214, "step": 91520 }, { "epoch": 0.39295741995311817, "grad_norm": 0.014011452905833721, "learning_rate": 6.096642894716419e-05, "loss": 0.12460508346557617, "step": 91530 }, { "epoch": 0.39300035204313816, "grad_norm": 0.009509447030723095, "learning_rate": 6.0962117227046555e-05, "loss": 0.21465637683868408, "step": 91540 }, { "epoch": 0.39304328413315814, "grad_norm": 0.0024375561624765396, "learning_rate": 6.095780550692893e-05, "loss": 0.21100099086761476, "step": 91550 }, { "epoch": 0.3930862162231782, "grad_norm": 4.144960403442383, "learning_rate": 6.095349378681131e-05, "loss": 0.2823643684387207, "step": 91560 }, { "epoch": 0.3931291483131982, "grad_norm": 1.322467565536499, "learning_rate": 6.094918206669369e-05, "loss": 0.08026805520057678, "step": 91570 }, { "epoch": 0.3931720804032182, "grad_norm": 0.06816992163658142, "learning_rate": 6.0944870346576064e-05, "loss": 0.06737584471702576, "step": 91580 }, { "epoch": 0.3932150124932382, "grad_norm": 0.3935220539569855, "learning_rate": 6.094055862645844e-05, "loss": 0.12359728813171386, "step": 91590 }, { "epoch": 0.3932579445832582, "grad_norm": 2.4706647396087646, "learning_rate": 6.093624690634082e-05, "loss": 0.3537791967391968, "step": 91600 }, { "epoch": 0.39330087667327823, "grad_norm": 1.1445716619491577, "learning_rate": 6.0931935186223196e-05, "loss": 0.1657669425010681, "step": 91610 }, { "epoch": 0.3933438087632982, "grad_norm": 0.05122748389840126, "learning_rate": 6.092762346610557e-05, "loss": 0.20379807949066162, "step": 91620 }, { "epoch": 0.3933867408533182, "grad_norm": 0.1313648372888565, "learning_rate": 6.0923311745987944e-05, "loss": 0.09172605872154235, "step": 91630 }, { "epoch": 0.39342967294333825, "grad_norm": 0.07003146409988403, "learning_rate": 6.091900002587032e-05, "loss": 0.22285842895507812, "step": 91640 }, { "epoch": 0.39347260503335824, "grad_norm": 1.9692494869232178, "learning_rate": 6.09146883057527e-05, "loss": 0.18839340209960936, "step": 91650 }, { "epoch": 0.3935155371233782, "grad_norm": 0.0328078456223011, "learning_rate": 6.0910376585635076e-05, "loss": 0.32558040618896483, "step": 91660 }, { "epoch": 0.39355846921339827, "grad_norm": 0.007657351437956095, "learning_rate": 6.0906064865517454e-05, "loss": 0.16526817083358764, "step": 91670 }, { "epoch": 0.39360140130341825, "grad_norm": 0.055285923182964325, "learning_rate": 6.090175314539983e-05, "loss": 0.27820913791656493, "step": 91680 }, { "epoch": 0.39364433339343824, "grad_norm": 0.49281400442123413, "learning_rate": 6.089744142528221e-05, "loss": 0.09620480537414551, "step": 91690 }, { "epoch": 0.3936872654834583, "grad_norm": 4.030596733093262, "learning_rate": 6.0893129705164586e-05, "loss": 0.35482666492462156, "step": 91700 }, { "epoch": 0.39373019757347827, "grad_norm": 2.9866480827331543, "learning_rate": 6.0888817985046956e-05, "loss": 0.0979809284210205, "step": 91710 }, { "epoch": 0.39377312966349826, "grad_norm": 0.010686771012842655, "learning_rate": 6.0884506264929333e-05, "loss": 0.34946651458740235, "step": 91720 }, { "epoch": 0.3938160617535183, "grad_norm": 0.002895164769142866, "learning_rate": 6.088019454481171e-05, "loss": 0.09829755425453186, "step": 91730 }, { "epoch": 0.3938589938435383, "grad_norm": 0.07746174186468124, "learning_rate": 6.087588282469409e-05, "loss": 0.3091606378555298, "step": 91740 }, { "epoch": 0.3939019259335583, "grad_norm": 0.1524367332458496, "learning_rate": 6.0871571104576466e-05, "loss": 0.1508937358856201, "step": 91750 }, { "epoch": 0.3939448580235783, "grad_norm": 0.011452741920948029, "learning_rate": 6.086725938445884e-05, "loss": 0.317603611946106, "step": 91760 }, { "epoch": 0.3939877901135983, "grad_norm": 0.004681314807385206, "learning_rate": 6.086294766434122e-05, "loss": 0.291134238243103, "step": 91770 }, { "epoch": 0.3940307222036183, "grad_norm": 0.022366631776094437, "learning_rate": 6.08586359442236e-05, "loss": 0.2740320682525635, "step": 91780 }, { "epoch": 0.39407365429363833, "grad_norm": 3.361569404602051, "learning_rate": 6.085432422410596e-05, "loss": 0.4950369358062744, "step": 91790 }, { "epoch": 0.3941165863836583, "grad_norm": 0.0028315645176917315, "learning_rate": 6.085001250398834e-05, "loss": 0.1323166847229004, "step": 91800 }, { "epoch": 0.39415951847367836, "grad_norm": 1.0123827457427979, "learning_rate": 6.0845700783870716e-05, "loss": 0.38972649574279783, "step": 91810 }, { "epoch": 0.39420245056369835, "grad_norm": 0.039406854659318924, "learning_rate": 6.084138906375309e-05, "loss": 0.1629611849784851, "step": 91820 }, { "epoch": 0.39424538265371833, "grad_norm": 1.5213974714279175, "learning_rate": 6.083707734363547e-05, "loss": 0.4453934669494629, "step": 91830 }, { "epoch": 0.3942883147437384, "grad_norm": 2.449429750442505, "learning_rate": 6.083276562351785e-05, "loss": 0.32040867805480955, "step": 91840 }, { "epoch": 0.39433124683375836, "grad_norm": 0.08724970370531082, "learning_rate": 6.0828453903400225e-05, "loss": 0.07561442852020264, "step": 91850 }, { "epoch": 0.39437417892377835, "grad_norm": 0.008718972094357014, "learning_rate": 6.082414218328261e-05, "loss": 0.30009486675262453, "step": 91860 }, { "epoch": 0.3944171110137984, "grad_norm": 0.21262004971504211, "learning_rate": 6.081983046316497e-05, "loss": 0.30516757965087893, "step": 91870 }, { "epoch": 0.3944600431038184, "grad_norm": 0.03169046714901924, "learning_rate": 6.081551874304735e-05, "loss": 0.29369001388549804, "step": 91880 }, { "epoch": 0.39450297519383837, "grad_norm": 6.781734943389893, "learning_rate": 6.081120702292973e-05, "loss": 0.2241981029510498, "step": 91890 }, { "epoch": 0.3945459072838584, "grad_norm": 0.7466356158256531, "learning_rate": 6.0806895302812105e-05, "loss": 0.012508706748485565, "step": 91900 }, { "epoch": 0.3945888393738784, "grad_norm": 0.011181380599737167, "learning_rate": 6.080258358269448e-05, "loss": 0.3503504037857056, "step": 91910 }, { "epoch": 0.3946317714638984, "grad_norm": 0.03524165600538254, "learning_rate": 6.079827186257686e-05, "loss": 0.006903672963380814, "step": 91920 }, { "epoch": 0.3946747035539184, "grad_norm": 0.06213952973484993, "learning_rate": 6.079396014245924e-05, "loss": 0.07258799076080322, "step": 91930 }, { "epoch": 0.3947176356439384, "grad_norm": 1.010565996170044, "learning_rate": 6.0789648422341615e-05, "loss": 0.17450504302978515, "step": 91940 }, { "epoch": 0.3947605677339584, "grad_norm": 0.02723545767366886, "learning_rate": 6.0785336702223985e-05, "loss": 0.2947016000747681, "step": 91950 }, { "epoch": 0.39480349982397844, "grad_norm": 0.00735612353309989, "learning_rate": 6.078102498210636e-05, "loss": 0.13675864934921264, "step": 91960 }, { "epoch": 0.39484643191399843, "grad_norm": 0.020295286551117897, "learning_rate": 6.077671326198874e-05, "loss": 0.15206470489501953, "step": 91970 }, { "epoch": 0.3948893640040184, "grad_norm": 0.10978235304355621, "learning_rate": 6.077240154187112e-05, "loss": 0.21314818859100343, "step": 91980 }, { "epoch": 0.39493229609403846, "grad_norm": 0.025493459776043892, "learning_rate": 6.0768089821753495e-05, "loss": 0.03302266895771026, "step": 91990 }, { "epoch": 0.39497522818405845, "grad_norm": 0.07763177156448364, "learning_rate": 6.076377810163587e-05, "loss": 0.15637160539627076, "step": 92000 }, { "epoch": 0.39497522818405845, "eval_loss": 0.43165695667266846, "eval_runtime": 27.0763, "eval_samples_per_second": 3.693, "eval_steps_per_second": 3.693, "step": 92000 }, { "epoch": 0.3950181602740785, "grad_norm": 20.20836067199707, "learning_rate": 6.075946638151825e-05, "loss": 0.27847981452941895, "step": 92010 }, { "epoch": 0.3950610923640985, "grad_norm": 0.032609887421131134, "learning_rate": 6.075515466140063e-05, "loss": 0.26609823703765867, "step": 92020 }, { "epoch": 0.39510402445411846, "grad_norm": 0.07131468504667282, "learning_rate": 6.0750842941283004e-05, "loss": 0.15527513027191162, "step": 92030 }, { "epoch": 0.3951469565441385, "grad_norm": 2.1843528747558594, "learning_rate": 6.074653122116537e-05, "loss": 0.22598354816436766, "step": 92040 }, { "epoch": 0.3951898886341585, "grad_norm": 1.2208198308944702, "learning_rate": 6.0742219501047745e-05, "loss": 0.19779176712036134, "step": 92050 }, { "epoch": 0.3952328207241785, "grad_norm": 0.03513918071985245, "learning_rate": 6.073790778093012e-05, "loss": 0.08396974802017212, "step": 92060 }, { "epoch": 0.3952757528141985, "grad_norm": 0.7641962766647339, "learning_rate": 6.07335960608125e-05, "loss": 0.2717055082321167, "step": 92070 }, { "epoch": 0.3953186849042185, "grad_norm": 1.8674321174621582, "learning_rate": 6.0729284340694884e-05, "loss": 0.24090301990509033, "step": 92080 }, { "epoch": 0.3953616169942385, "grad_norm": 1.9578057527542114, "learning_rate": 6.072497262057726e-05, "loss": 0.24970598220825196, "step": 92090 }, { "epoch": 0.39540454908425854, "grad_norm": 2.490677833557129, "learning_rate": 6.072066090045964e-05, "loss": 0.25661425590515136, "step": 92100 }, { "epoch": 0.3954474811742785, "grad_norm": 19.66390037536621, "learning_rate": 6.0716349180342016e-05, "loss": 0.16211674213409424, "step": 92110 }, { "epoch": 0.3954904132642985, "grad_norm": 0.046901851892471313, "learning_rate": 6.071203746022438e-05, "loss": 0.05299463272094727, "step": 92120 }, { "epoch": 0.39553334535431856, "grad_norm": 1.2311347723007202, "learning_rate": 6.070772574010676e-05, "loss": 0.1265595555305481, "step": 92130 }, { "epoch": 0.39557627744433854, "grad_norm": 1.5069876909255981, "learning_rate": 6.0703414019989134e-05, "loss": 0.25828940868377687, "step": 92140 }, { "epoch": 0.39561920953435853, "grad_norm": 0.444572776556015, "learning_rate": 6.069910229987151e-05, "loss": 0.09917322397232056, "step": 92150 }, { "epoch": 0.39566214162437857, "grad_norm": 0.0789375901222229, "learning_rate": 6.069479057975389e-05, "loss": 0.29941983222961427, "step": 92160 }, { "epoch": 0.39570507371439856, "grad_norm": 1.7957231998443604, "learning_rate": 6.0690478859636266e-05, "loss": 0.2737236976623535, "step": 92170 }, { "epoch": 0.39574800580441855, "grad_norm": 0.04896816238760948, "learning_rate": 6.0686167139518644e-05, "loss": 0.08509291410446167, "step": 92180 }, { "epoch": 0.3957909378944386, "grad_norm": 0.020770037546753883, "learning_rate": 6.068185541940102e-05, "loss": 0.4327548980712891, "step": 92190 }, { "epoch": 0.3958338699844586, "grad_norm": 0.5426750779151917, "learning_rate": 6.067754369928339e-05, "loss": 0.3243721008300781, "step": 92200 }, { "epoch": 0.39587680207447856, "grad_norm": 16.816232681274414, "learning_rate": 6.067323197916577e-05, "loss": 0.18375918865203858, "step": 92210 }, { "epoch": 0.3959197341644986, "grad_norm": 0.09073749929666519, "learning_rate": 6.0668920259048146e-05, "loss": 0.0876370370388031, "step": 92220 }, { "epoch": 0.3959626662545186, "grad_norm": 0.7398306727409363, "learning_rate": 6.0664608538930524e-05, "loss": 0.18924959897994995, "step": 92230 }, { "epoch": 0.39600559834453863, "grad_norm": 0.16764898598194122, "learning_rate": 6.06602968188129e-05, "loss": 0.16392149925231933, "step": 92240 }, { "epoch": 0.3960485304345586, "grad_norm": 0.027810113504529, "learning_rate": 6.065598509869528e-05, "loss": 0.3249415636062622, "step": 92250 }, { "epoch": 0.3960914625245786, "grad_norm": 0.11741064488887787, "learning_rate": 6.0651673378577656e-05, "loss": 0.17678941488265992, "step": 92260 }, { "epoch": 0.39613439461459865, "grad_norm": 0.01126841176301241, "learning_rate": 6.064736165846003e-05, "loss": 0.1792654275894165, "step": 92270 }, { "epoch": 0.39617732670461864, "grad_norm": 0.0017827929696068168, "learning_rate": 6.06430499383424e-05, "loss": 0.19189642667770385, "step": 92280 }, { "epoch": 0.3962202587946386, "grad_norm": 2.9198524951934814, "learning_rate": 6.0638738218224774e-05, "loss": 0.2880232334136963, "step": 92290 }, { "epoch": 0.39626319088465867, "grad_norm": 0.021255875006318092, "learning_rate": 6.063442649810716e-05, "loss": 0.1363054871559143, "step": 92300 }, { "epoch": 0.39630612297467865, "grad_norm": 0.002346013905480504, "learning_rate": 6.0630114777989536e-05, "loss": 0.31574110984802245, "step": 92310 }, { "epoch": 0.39634905506469864, "grad_norm": 1.1382430791854858, "learning_rate": 6.062580305787191e-05, "loss": 0.2634714365005493, "step": 92320 }, { "epoch": 0.3963919871547187, "grad_norm": 0.002902445150539279, "learning_rate": 6.062149133775429e-05, "loss": 0.24987235069274902, "step": 92330 }, { "epoch": 0.39643491924473867, "grad_norm": 0.0020218868739902973, "learning_rate": 6.061717961763667e-05, "loss": 0.22082626819610596, "step": 92340 }, { "epoch": 0.39647785133475866, "grad_norm": 0.3430553078651428, "learning_rate": 6.0612867897519045e-05, "loss": 0.3638188362121582, "step": 92350 }, { "epoch": 0.3965207834247787, "grad_norm": 0.0021895577665418386, "learning_rate": 6.060855617740141e-05, "loss": 0.11037677526473999, "step": 92360 }, { "epoch": 0.3965637155147987, "grad_norm": 0.8205850124359131, "learning_rate": 6.0604244457283786e-05, "loss": 0.2192380666732788, "step": 92370 }, { "epoch": 0.3966066476048187, "grad_norm": 2.2314140796661377, "learning_rate": 6.0599932737166164e-05, "loss": 0.20187749862670898, "step": 92380 }, { "epoch": 0.3966495796948387, "grad_norm": 3.9422032833099365, "learning_rate": 6.059562101704854e-05, "loss": 0.15174542665481566, "step": 92390 }, { "epoch": 0.3966925117848587, "grad_norm": 0.10271573066711426, "learning_rate": 6.059130929693092e-05, "loss": 0.22555267810821533, "step": 92400 }, { "epoch": 0.3967354438748787, "grad_norm": 0.29038718342781067, "learning_rate": 6.0586997576813296e-05, "loss": 0.2885429382324219, "step": 92410 }, { "epoch": 0.39677837596489873, "grad_norm": 0.03839349001646042, "learning_rate": 6.058268585669567e-05, "loss": 0.25131521224975584, "step": 92420 }, { "epoch": 0.3968213080549187, "grad_norm": 0.4567015767097473, "learning_rate": 6.057837413657805e-05, "loss": 0.1675976276397705, "step": 92430 }, { "epoch": 0.39686424014493876, "grad_norm": 1.6474500894546509, "learning_rate": 6.057406241646043e-05, "loss": 0.247251296043396, "step": 92440 }, { "epoch": 0.39690717223495875, "grad_norm": 3.3086657524108887, "learning_rate": 6.05697506963428e-05, "loss": 0.2922791004180908, "step": 92450 }, { "epoch": 0.39695010432497874, "grad_norm": 1.4948768615722656, "learning_rate": 6.0565438976225176e-05, "loss": 0.34301207065582273, "step": 92460 }, { "epoch": 0.3969930364149988, "grad_norm": 0.05721181258559227, "learning_rate": 6.056112725610755e-05, "loss": 0.32348856925964353, "step": 92470 }, { "epoch": 0.39703596850501877, "grad_norm": 0.10996733605861664, "learning_rate": 6.055681553598993e-05, "loss": 0.23233428001403808, "step": 92480 }, { "epoch": 0.39707890059503875, "grad_norm": 0.6285014152526855, "learning_rate": 6.055250381587231e-05, "loss": 0.28221845626831055, "step": 92490 }, { "epoch": 0.3971218326850588, "grad_norm": 7.208746910095215, "learning_rate": 6.0548192095754685e-05, "loss": 0.48242778778076173, "step": 92500 }, { "epoch": 0.3971647647750788, "grad_norm": 6.577877044677734, "learning_rate": 6.054388037563706e-05, "loss": 0.25366084575653075, "step": 92510 }, { "epoch": 0.39720769686509877, "grad_norm": 0.10521818697452545, "learning_rate": 6.053956865551944e-05, "loss": 0.3314534664154053, "step": 92520 }, { "epoch": 0.3972506289551188, "grad_norm": 1.7988253831863403, "learning_rate": 6.053525693540181e-05, "loss": 0.4622535228729248, "step": 92530 }, { "epoch": 0.3972935610451388, "grad_norm": 0.1328478753566742, "learning_rate": 6.053094521528419e-05, "loss": 0.18051735162734986, "step": 92540 }, { "epoch": 0.3973364931351588, "grad_norm": 1.4796525239944458, "learning_rate": 6.0526633495166565e-05, "loss": 0.20069704055786133, "step": 92550 }, { "epoch": 0.39737942522517883, "grad_norm": 0.2786750793457031, "learning_rate": 6.052232177504894e-05, "loss": 0.32162978649139407, "step": 92560 }, { "epoch": 0.3974223573151988, "grad_norm": 0.107948899269104, "learning_rate": 6.051801005493132e-05, "loss": 0.3327937602996826, "step": 92570 }, { "epoch": 0.3974652894052188, "grad_norm": 0.012413183227181435, "learning_rate": 6.05136983348137e-05, "loss": 0.2087266206741333, "step": 92580 }, { "epoch": 0.39750822149523884, "grad_norm": 0.7624533176422119, "learning_rate": 6.0509386614696074e-05, "loss": 0.302428150177002, "step": 92590 }, { "epoch": 0.39755115358525883, "grad_norm": 0.001710769603960216, "learning_rate": 6.050507489457845e-05, "loss": 0.053492683172225955, "step": 92600 }, { "epoch": 0.3975940856752788, "grad_norm": 0.037044957280159, "learning_rate": 6.0500763174460815e-05, "loss": 0.27043685913085935, "step": 92610 }, { "epoch": 0.39763701776529886, "grad_norm": 0.39931926131248474, "learning_rate": 6.049645145434319e-05, "loss": 0.2734602689743042, "step": 92620 }, { "epoch": 0.39767994985531885, "grad_norm": 0.019506726413965225, "learning_rate": 6.049213973422557e-05, "loss": 0.1141431450843811, "step": 92630 }, { "epoch": 0.39772288194533884, "grad_norm": 0.001194458338432014, "learning_rate": 6.048782801410795e-05, "loss": 0.09379836320877075, "step": 92640 }, { "epoch": 0.3977658140353589, "grad_norm": 0.011507615447044373, "learning_rate": 6.0483516293990325e-05, "loss": 0.10641751289367676, "step": 92650 }, { "epoch": 0.39780874612537886, "grad_norm": 0.9543510675430298, "learning_rate": 6.04792045738727e-05, "loss": 0.3608994483947754, "step": 92660 }, { "epoch": 0.3978516782153989, "grad_norm": 0.00274080503731966, "learning_rate": 6.0474892853755086e-05, "loss": 0.20310893058776855, "step": 92670 }, { "epoch": 0.3978946103054189, "grad_norm": 0.0007183632696978748, "learning_rate": 6.0470581133637464e-05, "loss": 0.36690948009490965, "step": 92680 }, { "epoch": 0.3979375423954389, "grad_norm": 0.03603541851043701, "learning_rate": 6.046626941351983e-05, "loss": 0.1928205370903015, "step": 92690 }, { "epoch": 0.3979804744854589, "grad_norm": 3.2046563625335693, "learning_rate": 6.0461957693402205e-05, "loss": 0.2937966823577881, "step": 92700 }, { "epoch": 0.3980234065754789, "grad_norm": 3.496809959411621, "learning_rate": 6.045764597328458e-05, "loss": 0.27285568714141845, "step": 92710 }, { "epoch": 0.3980663386654989, "grad_norm": 0.005234590731561184, "learning_rate": 6.045333425316696e-05, "loss": 0.31596553325653076, "step": 92720 }, { "epoch": 0.39810927075551894, "grad_norm": 0.9226164817810059, "learning_rate": 6.044902253304934e-05, "loss": 0.2640789747238159, "step": 92730 }, { "epoch": 0.3981522028455389, "grad_norm": 0.05384889245033264, "learning_rate": 6.0444710812931714e-05, "loss": 0.18334288597106935, "step": 92740 }, { "epoch": 0.3981951349355589, "grad_norm": 1.8284244537353516, "learning_rate": 6.044039909281409e-05, "loss": 0.267243480682373, "step": 92750 }, { "epoch": 0.39823806702557896, "grad_norm": 0.17038202285766602, "learning_rate": 6.043608737269647e-05, "loss": 0.25903208255767823, "step": 92760 }, { "epoch": 0.39828099911559894, "grad_norm": 0.10753041505813599, "learning_rate": 6.0431775652578846e-05, "loss": 0.10099961757659912, "step": 92770 }, { "epoch": 0.39832393120561893, "grad_norm": 0.1653003692626953, "learning_rate": 6.042746393246122e-05, "loss": 0.17818518877029418, "step": 92780 }, { "epoch": 0.398366863295639, "grad_norm": 5.727450847625732, "learning_rate": 6.0423152212343594e-05, "loss": 0.21343677043914794, "step": 92790 }, { "epoch": 0.39840979538565896, "grad_norm": 0.0009643832454457879, "learning_rate": 6.041884049222597e-05, "loss": 0.24222161769866943, "step": 92800 }, { "epoch": 0.39845272747567895, "grad_norm": 1.5220520496368408, "learning_rate": 6.041452877210835e-05, "loss": 0.11254243850708008, "step": 92810 }, { "epoch": 0.398495659565699, "grad_norm": 0.029565613716840744, "learning_rate": 6.0410217051990726e-05, "loss": 0.011484414339065552, "step": 92820 }, { "epoch": 0.398538591655719, "grad_norm": 0.001762153347954154, "learning_rate": 6.04059053318731e-05, "loss": 0.2063227891921997, "step": 92830 }, { "epoch": 0.39858152374573896, "grad_norm": 0.02957475185394287, "learning_rate": 6.040159361175548e-05, "loss": 0.0242899626493454, "step": 92840 }, { "epoch": 0.398624455835759, "grad_norm": 0.0029483300168067217, "learning_rate": 6.039728189163786e-05, "loss": 0.3035967588424683, "step": 92850 }, { "epoch": 0.398667387925779, "grad_norm": 0.05529223382472992, "learning_rate": 6.039297017152022e-05, "loss": 0.30932021141052246, "step": 92860 }, { "epoch": 0.39871032001579904, "grad_norm": 14.569122314453125, "learning_rate": 6.03886584514026e-05, "loss": 0.13557947874069215, "step": 92870 }, { "epoch": 0.398753252105819, "grad_norm": 2.028317451477051, "learning_rate": 6.0384346731284977e-05, "loss": 0.41254119873046874, "step": 92880 }, { "epoch": 0.398796184195839, "grad_norm": 5.376577854156494, "learning_rate": 6.038003501116736e-05, "loss": 0.47383694648742675, "step": 92890 }, { "epoch": 0.39883911628585905, "grad_norm": 47.30210876464844, "learning_rate": 6.037572329104974e-05, "loss": 0.23314952850341797, "step": 92900 }, { "epoch": 0.39888204837587904, "grad_norm": 0.0007397299632430077, "learning_rate": 6.0371411570932115e-05, "loss": 0.10217365026473998, "step": 92910 }, { "epoch": 0.398924980465899, "grad_norm": 0.045869532972574234, "learning_rate": 6.036709985081449e-05, "loss": 0.13935530185699463, "step": 92920 }, { "epoch": 0.39896791255591907, "grad_norm": 0.3745325803756714, "learning_rate": 6.036278813069687e-05, "loss": 0.3490712881088257, "step": 92930 }, { "epoch": 0.39901084464593906, "grad_norm": 1.1765798330307007, "learning_rate": 6.0358476410579234e-05, "loss": 0.4222440242767334, "step": 92940 }, { "epoch": 0.39905377673595904, "grad_norm": 4.688851833343506, "learning_rate": 6.035416469046161e-05, "loss": 0.37330188751220705, "step": 92950 }, { "epoch": 0.3990967088259791, "grad_norm": 0.005485413130372763, "learning_rate": 6.034985297034399e-05, "loss": 0.2636329412460327, "step": 92960 }, { "epoch": 0.39913964091599907, "grad_norm": 0.1594797968864441, "learning_rate": 6.0345541250226366e-05, "loss": 0.22802400588989258, "step": 92970 }, { "epoch": 0.39918257300601906, "grad_norm": 0.018702786415815353, "learning_rate": 6.034122953010874e-05, "loss": 0.19402458667755126, "step": 92980 }, { "epoch": 0.3992255050960391, "grad_norm": 0.49566909670829773, "learning_rate": 6.033691780999112e-05, "loss": 0.1691906452178955, "step": 92990 }, { "epoch": 0.3992684371860591, "grad_norm": 0.5254350304603577, "learning_rate": 6.03326060898735e-05, "loss": 0.20444183349609374, "step": 93000 }, { "epoch": 0.3992684371860591, "eval_loss": 0.42532363533973694, "eval_runtime": 27.1264, "eval_samples_per_second": 3.686, "eval_steps_per_second": 3.686, "step": 93000 }, { "epoch": 0.3993113692760791, "grad_norm": 0.021253425627946854, "learning_rate": 6.0328294369755875e-05, "loss": 0.18103508949279784, "step": 93010 }, { "epoch": 0.3993543013660991, "grad_norm": 0.04606667160987854, "learning_rate": 6.0323982649638246e-05, "loss": 0.00736895427107811, "step": 93020 }, { "epoch": 0.3993972334561191, "grad_norm": 2.99416446685791, "learning_rate": 6.031967092952062e-05, "loss": 0.19407373666763306, "step": 93030 }, { "epoch": 0.3994401655461391, "grad_norm": 4.798525333404541, "learning_rate": 6.0315359209403e-05, "loss": 0.20663731098175048, "step": 93040 }, { "epoch": 0.39948309763615913, "grad_norm": 0.004676021169871092, "learning_rate": 6.031104748928538e-05, "loss": 0.22965459823608397, "step": 93050 }, { "epoch": 0.3995260297261791, "grad_norm": 0.027508612722158432, "learning_rate": 6.0306735769167755e-05, "loss": 0.3717710018157959, "step": 93060 }, { "epoch": 0.3995689618161991, "grad_norm": 0.7421900629997253, "learning_rate": 6.030242404905013e-05, "loss": 0.18571395874023439, "step": 93070 }, { "epoch": 0.39961189390621915, "grad_norm": 1.7135603427886963, "learning_rate": 6.029811232893251e-05, "loss": 0.13353700637817384, "step": 93080 }, { "epoch": 0.39965482599623914, "grad_norm": 0.02101057395339012, "learning_rate": 6.029380060881489e-05, "loss": 0.19278452396392823, "step": 93090 }, { "epoch": 0.3996977580862592, "grad_norm": 1.4164752960205078, "learning_rate": 6.0289488888697265e-05, "loss": 0.2833911657333374, "step": 93100 }, { "epoch": 0.39974069017627917, "grad_norm": 0.5472753643989563, "learning_rate": 6.0285177168579635e-05, "loss": 0.3730371713638306, "step": 93110 }, { "epoch": 0.39978362226629915, "grad_norm": 4.260237216949463, "learning_rate": 6.028086544846201e-05, "loss": 0.0819745659828186, "step": 93120 }, { "epoch": 0.3998265543563192, "grad_norm": 0.03258398547768593, "learning_rate": 6.027655372834439e-05, "loss": 0.41202602386474607, "step": 93130 }, { "epoch": 0.3998694864463392, "grad_norm": 0.012231654487550259, "learning_rate": 6.027224200822677e-05, "loss": 0.054266971349716184, "step": 93140 }, { "epoch": 0.39991241853635917, "grad_norm": 0.02671731263399124, "learning_rate": 6.0267930288109144e-05, "loss": 0.13896874189376832, "step": 93150 }, { "epoch": 0.3999553506263792, "grad_norm": 0.008304188027977943, "learning_rate": 6.026361856799152e-05, "loss": 0.3450723171234131, "step": 93160 }, { "epoch": 0.3999982827163992, "grad_norm": 0.0143406568095088, "learning_rate": 6.02593068478739e-05, "loss": 0.13900293111801149, "step": 93170 }, { "epoch": 0.4000412148064192, "grad_norm": 0.0057134004309773445, "learning_rate": 6.0254995127756276e-05, "loss": 0.21512312889099122, "step": 93180 }, { "epoch": 0.40008414689643923, "grad_norm": 1.5521478652954102, "learning_rate": 6.025068340763864e-05, "loss": 0.20870733261108398, "step": 93190 }, { "epoch": 0.4001270789864592, "grad_norm": 0.022138668224215508, "learning_rate": 6.024637168752102e-05, "loss": 0.34106059074401857, "step": 93200 }, { "epoch": 0.4001700110764792, "grad_norm": 0.04387650266289711, "learning_rate": 6.0242059967403395e-05, "loss": 0.0750705897808075, "step": 93210 }, { "epoch": 0.40021294316649925, "grad_norm": 1.2559690475463867, "learning_rate": 6.023774824728577e-05, "loss": 0.3357060432434082, "step": 93220 }, { "epoch": 0.40025587525651923, "grad_norm": 0.3811001479625702, "learning_rate": 6.023343652716815e-05, "loss": 0.009803864359855651, "step": 93230 }, { "epoch": 0.4002988073465392, "grad_norm": 9.359700202941895, "learning_rate": 6.022912480705053e-05, "loss": 0.2810043811798096, "step": 93240 }, { "epoch": 0.40034173943655926, "grad_norm": 0.04405032843351364, "learning_rate": 6.0224813086932904e-05, "loss": 0.4757356643676758, "step": 93250 }, { "epoch": 0.40038467152657925, "grad_norm": 0.004825854208320379, "learning_rate": 6.022050136681529e-05, "loss": 0.06741368770599365, "step": 93260 }, { "epoch": 0.40042760361659924, "grad_norm": 0.03525272756814957, "learning_rate": 6.021618964669765e-05, "loss": 0.1233631730079651, "step": 93270 }, { "epoch": 0.4004705357066193, "grad_norm": 9.752348899841309, "learning_rate": 6.021187792658003e-05, "loss": 0.3055421829223633, "step": 93280 }, { "epoch": 0.40051346779663927, "grad_norm": 0.08953861892223358, "learning_rate": 6.020756620646241e-05, "loss": 0.12309778928756714, "step": 93290 }, { "epoch": 0.4005563998866593, "grad_norm": 4.488521575927734, "learning_rate": 6.0203254486344784e-05, "loss": 0.2833517789840698, "step": 93300 }, { "epoch": 0.4005993319766793, "grad_norm": 6.068156719207764, "learning_rate": 6.019894276622716e-05, "loss": 0.1711531400680542, "step": 93310 }, { "epoch": 0.4006422640666993, "grad_norm": 3.3427748680114746, "learning_rate": 6.019463104610954e-05, "loss": 0.21299960613250732, "step": 93320 }, { "epoch": 0.4006851961567193, "grad_norm": 1.6745113134384155, "learning_rate": 6.0190319325991916e-05, "loss": 0.1890343427658081, "step": 93330 }, { "epoch": 0.4007281282467393, "grad_norm": 0.08644956350326538, "learning_rate": 6.0186007605874294e-05, "loss": 0.15870969295501708, "step": 93340 }, { "epoch": 0.4007710603367593, "grad_norm": 0.10776764899492264, "learning_rate": 6.0181695885756664e-05, "loss": 0.24700050354003905, "step": 93350 }, { "epoch": 0.40081399242677934, "grad_norm": 0.008111716248095036, "learning_rate": 6.017738416563904e-05, "loss": 0.20516328811645507, "step": 93360 }, { "epoch": 0.40085692451679933, "grad_norm": 0.1302001029253006, "learning_rate": 6.017307244552142e-05, "loss": 0.21231431961059571, "step": 93370 }, { "epoch": 0.4008998566068193, "grad_norm": 0.0018937139539048076, "learning_rate": 6.0168760725403796e-05, "loss": 0.1605125904083252, "step": 93380 }, { "epoch": 0.40094278869683936, "grad_norm": 0.510441243648529, "learning_rate": 6.0164449005286174e-05, "loss": 0.24825668334960938, "step": 93390 }, { "epoch": 0.40098572078685935, "grad_norm": 0.051314398646354675, "learning_rate": 6.016013728516855e-05, "loss": 0.2400137424468994, "step": 93400 }, { "epoch": 0.40102865287687933, "grad_norm": 0.021915512159466743, "learning_rate": 6.015582556505093e-05, "loss": 0.14309494495391845, "step": 93410 }, { "epoch": 0.4010715849668994, "grad_norm": 0.7100977897644043, "learning_rate": 6.0151513844933306e-05, "loss": 0.3975811243057251, "step": 93420 }, { "epoch": 0.40111451705691936, "grad_norm": 0.010824207216501236, "learning_rate": 6.014720212481567e-05, "loss": 0.3093941450119019, "step": 93430 }, { "epoch": 0.40115744914693935, "grad_norm": 0.038194987922906876, "learning_rate": 6.014289040469805e-05, "loss": 0.34295101165771485, "step": 93440 }, { "epoch": 0.4012003812369594, "grad_norm": 0.03417959436774254, "learning_rate": 6.0138578684580424e-05, "loss": 0.03890349268913269, "step": 93450 }, { "epoch": 0.4012433133269794, "grad_norm": 0.007218822371214628, "learning_rate": 6.01342669644628e-05, "loss": 0.04127689599990845, "step": 93460 }, { "epoch": 0.40128624541699937, "grad_norm": 1.0656627416610718, "learning_rate": 6.012995524434518e-05, "loss": 0.24317021369934083, "step": 93470 }, { "epoch": 0.4013291775070194, "grad_norm": 0.0067567359656095505, "learning_rate": 6.012564352422756e-05, "loss": 0.25871007442474364, "step": 93480 }, { "epoch": 0.4013721095970394, "grad_norm": 0.0030029634945094585, "learning_rate": 6.012133180410994e-05, "loss": 0.19204816818237305, "step": 93490 }, { "epoch": 0.4014150416870594, "grad_norm": 4.520451068878174, "learning_rate": 6.011702008399232e-05, "loss": 0.22543861865997314, "step": 93500 }, { "epoch": 0.4014579737770794, "grad_norm": 0.6664943099021912, "learning_rate": 6.0112708363874695e-05, "loss": 0.09824045896530151, "step": 93510 }, { "epoch": 0.4015009058670994, "grad_norm": 3.1957309246063232, "learning_rate": 6.010839664375706e-05, "loss": 0.26783323287963867, "step": 93520 }, { "epoch": 0.40154383795711945, "grad_norm": 2.948349952697754, "learning_rate": 6.0104084923639436e-05, "loss": 0.1502614974975586, "step": 93530 }, { "epoch": 0.40158677004713944, "grad_norm": 0.17878828942775726, "learning_rate": 6.009977320352181e-05, "loss": 0.0759764313697815, "step": 93540 }, { "epoch": 0.40162970213715943, "grad_norm": 1.7953228950500488, "learning_rate": 6.009546148340419e-05, "loss": 0.3401905059814453, "step": 93550 }, { "epoch": 0.40167263422717947, "grad_norm": 0.6826522946357727, "learning_rate": 6.009114976328657e-05, "loss": 0.21881649494171143, "step": 93560 }, { "epoch": 0.40171556631719946, "grad_norm": 0.2658919095993042, "learning_rate": 6.0086838043168945e-05, "loss": 0.04690901637077331, "step": 93570 }, { "epoch": 0.40175849840721944, "grad_norm": 1.6003676652908325, "learning_rate": 6.008252632305132e-05, "loss": 0.33685760498046874, "step": 93580 }, { "epoch": 0.4018014304972395, "grad_norm": 0.014093033969402313, "learning_rate": 6.00782146029337e-05, "loss": 0.06862253546714783, "step": 93590 }, { "epoch": 0.4018443625872595, "grad_norm": 0.03570076823234558, "learning_rate": 6.007390288281607e-05, "loss": 0.14127914905548095, "step": 93600 }, { "epoch": 0.40188729467727946, "grad_norm": 1.6219851970672607, "learning_rate": 6.006959116269845e-05, "loss": 0.2750361919403076, "step": 93610 }, { "epoch": 0.4019302267672995, "grad_norm": 0.13300123810768127, "learning_rate": 6.0065279442580825e-05, "loss": 0.18249515295028687, "step": 93620 }, { "epoch": 0.4019731588573195, "grad_norm": 0.015724139288067818, "learning_rate": 6.00609677224632e-05, "loss": 0.14590342044830323, "step": 93630 }, { "epoch": 0.4020160909473395, "grad_norm": 0.49315378069877625, "learning_rate": 6.005665600234558e-05, "loss": 0.13075822591781616, "step": 93640 }, { "epoch": 0.4020590230373595, "grad_norm": 0.05315891280770302, "learning_rate": 6.005234428222796e-05, "loss": 0.19189344644546508, "step": 93650 }, { "epoch": 0.4021019551273795, "grad_norm": 1.898503303527832, "learning_rate": 6.0048032562110335e-05, "loss": 0.10279072523117065, "step": 93660 }, { "epoch": 0.4021448872173995, "grad_norm": 3.9733712673187256, "learning_rate": 6.004372084199271e-05, "loss": 0.3545947551727295, "step": 93670 }, { "epoch": 0.40218781930741954, "grad_norm": 3.26906681060791, "learning_rate": 6.0039409121875076e-05, "loss": 0.5459408283233642, "step": 93680 }, { "epoch": 0.4022307513974395, "grad_norm": 1.936118483543396, "learning_rate": 6.003509740175745e-05, "loss": 0.18651505708694457, "step": 93690 }, { "epoch": 0.4022736834874595, "grad_norm": 4.495886325836182, "learning_rate": 6.003078568163984e-05, "loss": 0.23359956741333007, "step": 93700 }, { "epoch": 0.40231661557747955, "grad_norm": 0.06830295920372009, "learning_rate": 6.0026473961522215e-05, "loss": 0.28993537425994875, "step": 93710 }, { "epoch": 0.40235954766749954, "grad_norm": 0.10418267548084259, "learning_rate": 6.002216224140459e-05, "loss": 0.2117173671722412, "step": 93720 }, { "epoch": 0.4024024797575196, "grad_norm": 5.263204097747803, "learning_rate": 6.001785052128697e-05, "loss": 0.097961688041687, "step": 93730 }, { "epoch": 0.40244541184753957, "grad_norm": 0.30054160952568054, "learning_rate": 6.001353880116935e-05, "loss": 0.1039130449295044, "step": 93740 }, { "epoch": 0.40248834393755956, "grad_norm": 1.7757928371429443, "learning_rate": 6.0009227081051724e-05, "loss": 0.13803932666778565, "step": 93750 }, { "epoch": 0.4025312760275796, "grad_norm": 0.01852819323539734, "learning_rate": 6.000491536093409e-05, "loss": 0.10215220451354981, "step": 93760 }, { "epoch": 0.4025742081175996, "grad_norm": 2.4717369079589844, "learning_rate": 6.0000603640816465e-05, "loss": 0.344983434677124, "step": 93770 }, { "epoch": 0.4026171402076196, "grad_norm": 0.09923583269119263, "learning_rate": 5.999629192069884e-05, "loss": 0.2237044095993042, "step": 93780 }, { "epoch": 0.4026600722976396, "grad_norm": 2.792940139770508, "learning_rate": 5.999198020058122e-05, "loss": 0.3041109800338745, "step": 93790 }, { "epoch": 0.4027030043876596, "grad_norm": 1.725498080253601, "learning_rate": 5.99876684804636e-05, "loss": 0.056520164012908936, "step": 93800 }, { "epoch": 0.4027459364776796, "grad_norm": 0.30796095728874207, "learning_rate": 5.9983356760345975e-05, "loss": 0.2576672792434692, "step": 93810 }, { "epoch": 0.40278886856769963, "grad_norm": 1.5174881219863892, "learning_rate": 5.997904504022835e-05, "loss": 0.23054022789001466, "step": 93820 }, { "epoch": 0.4028318006577196, "grad_norm": 0.016648801043629646, "learning_rate": 5.997473332011073e-05, "loss": 0.3647672891616821, "step": 93830 }, { "epoch": 0.4028747327477396, "grad_norm": 1.8972567319869995, "learning_rate": 5.9970421599993107e-05, "loss": 0.24868409633636473, "step": 93840 }, { "epoch": 0.40291766483775965, "grad_norm": 1.3365458250045776, "learning_rate": 5.996610987987548e-05, "loss": 0.36391315460205076, "step": 93850 }, { "epoch": 0.40296059692777964, "grad_norm": 6.127025604248047, "learning_rate": 5.9961798159757854e-05, "loss": 0.1571225643157959, "step": 93860 }, { "epoch": 0.4030035290177996, "grad_norm": 0.01666446588933468, "learning_rate": 5.995748643964023e-05, "loss": 0.18446390628814696, "step": 93870 }, { "epoch": 0.40304646110781966, "grad_norm": 0.014780756086111069, "learning_rate": 5.995317471952261e-05, "loss": 0.1700269341468811, "step": 93880 }, { "epoch": 0.40308939319783965, "grad_norm": 0.3110774755477905, "learning_rate": 5.9948862999404987e-05, "loss": 0.2906758785247803, "step": 93890 }, { "epoch": 0.40313232528785964, "grad_norm": 0.0019415556453168392, "learning_rate": 5.9944551279287364e-05, "loss": 0.11864016056060792, "step": 93900 }, { "epoch": 0.4031752573778797, "grad_norm": 0.12294873595237732, "learning_rate": 5.994023955916974e-05, "loss": 0.2842595100402832, "step": 93910 }, { "epoch": 0.40321818946789967, "grad_norm": 1.0812486410140991, "learning_rate": 5.993592783905212e-05, "loss": 0.3086820602416992, "step": 93920 }, { "epoch": 0.40326112155791966, "grad_norm": 0.0038773079868406057, "learning_rate": 5.993161611893449e-05, "loss": 0.18593962192535402, "step": 93930 }, { "epoch": 0.4033040536479397, "grad_norm": 1.0432664155960083, "learning_rate": 5.9927304398816866e-05, "loss": 0.37398381233215333, "step": 93940 }, { "epoch": 0.4033469857379597, "grad_norm": 2.64205002784729, "learning_rate": 5.9922992678699244e-05, "loss": 0.2709501266479492, "step": 93950 }, { "epoch": 0.4033899178279797, "grad_norm": 1.6943469047546387, "learning_rate": 5.991868095858162e-05, "loss": 0.15391974449157714, "step": 93960 }, { "epoch": 0.4034328499179997, "grad_norm": 0.0060857306234538555, "learning_rate": 5.9914369238464e-05, "loss": 0.17449193000793456, "step": 93970 }, { "epoch": 0.4034757820080197, "grad_norm": 0.9577922224998474, "learning_rate": 5.9910057518346376e-05, "loss": 0.057951831817626955, "step": 93980 }, { "epoch": 0.40351871409803974, "grad_norm": 1.1919249296188354, "learning_rate": 5.990574579822875e-05, "loss": 0.16909236907958985, "step": 93990 }, { "epoch": 0.40356164618805973, "grad_norm": 4.153521537780762, "learning_rate": 5.990143407811113e-05, "loss": 0.43216743469238283, "step": 94000 }, { "epoch": 0.40356164618805973, "eval_loss": 0.4400307834148407, "eval_runtime": 27.1541, "eval_samples_per_second": 3.683, "eval_steps_per_second": 3.683, "step": 94000 }, { "epoch": 0.4036045782780797, "grad_norm": 0.017568619921803474, "learning_rate": 5.9897122357993494e-05, "loss": 0.2042163848876953, "step": 94010 }, { "epoch": 0.40364751036809976, "grad_norm": 0.0071970331482589245, "learning_rate": 5.989281063787587e-05, "loss": 0.17381359338760377, "step": 94020 }, { "epoch": 0.40369044245811975, "grad_norm": 1.0322858095169067, "learning_rate": 5.988849891775825e-05, "loss": 0.21790926456451415, "step": 94030 }, { "epoch": 0.40373337454813973, "grad_norm": 1.535051941871643, "learning_rate": 5.9884187197640626e-05, "loss": 0.3981826066970825, "step": 94040 }, { "epoch": 0.4037763066381598, "grad_norm": 0.05718700960278511, "learning_rate": 5.9879875477523004e-05, "loss": 0.26243913173675537, "step": 94050 }, { "epoch": 0.40381923872817976, "grad_norm": 0.01205611601471901, "learning_rate": 5.987556375740538e-05, "loss": 0.16048460006713866, "step": 94060 }, { "epoch": 0.40386217081819975, "grad_norm": 1.6763887405395508, "learning_rate": 5.987125203728776e-05, "loss": 0.13859224319458008, "step": 94070 }, { "epoch": 0.4039051029082198, "grad_norm": 0.006425794214010239, "learning_rate": 5.986694031717014e-05, "loss": 0.1734892249107361, "step": 94080 }, { "epoch": 0.4039480349982398, "grad_norm": 0.09611303359270096, "learning_rate": 5.9862628597052506e-05, "loss": 0.2162872314453125, "step": 94090 }, { "epoch": 0.40399096708825977, "grad_norm": 17.159536361694336, "learning_rate": 5.9858316876934884e-05, "loss": 0.13703900575637817, "step": 94100 }, { "epoch": 0.4040338991782798, "grad_norm": 1.338617205619812, "learning_rate": 5.985400515681726e-05, "loss": 0.4566403865814209, "step": 94110 }, { "epoch": 0.4040768312682998, "grad_norm": 0.023423107340931892, "learning_rate": 5.984969343669964e-05, "loss": 0.22699213027954102, "step": 94120 }, { "epoch": 0.4041197633583198, "grad_norm": 2.107382297515869, "learning_rate": 5.9845381716582016e-05, "loss": 0.3840765953063965, "step": 94130 }, { "epoch": 0.4041626954483398, "grad_norm": 2.3195486068725586, "learning_rate": 5.984106999646439e-05, "loss": 0.19965558052062987, "step": 94140 }, { "epoch": 0.4042056275383598, "grad_norm": 0.16824714839458466, "learning_rate": 5.983675827634677e-05, "loss": 0.08387279510498047, "step": 94150 }, { "epoch": 0.40424855962837986, "grad_norm": 0.07217488437891006, "learning_rate": 5.983244655622915e-05, "loss": 0.23426990509033202, "step": 94160 }, { "epoch": 0.40429149171839984, "grad_norm": 0.016545869410037994, "learning_rate": 5.982813483611152e-05, "loss": 0.12855671644210814, "step": 94170 }, { "epoch": 0.40433442380841983, "grad_norm": 0.38350731134414673, "learning_rate": 5.9823823115993896e-05, "loss": 0.29489014148712156, "step": 94180 }, { "epoch": 0.40437735589843987, "grad_norm": 2.4663097858428955, "learning_rate": 5.981951139587627e-05, "loss": 0.30193476676940917, "step": 94190 }, { "epoch": 0.40442028798845986, "grad_norm": 0.07116344571113586, "learning_rate": 5.981519967575865e-05, "loss": 0.25567481517791746, "step": 94200 }, { "epoch": 0.40446322007847985, "grad_norm": 0.08018473535776138, "learning_rate": 5.981088795564103e-05, "loss": 0.23793158531188965, "step": 94210 }, { "epoch": 0.4045061521684999, "grad_norm": 6.177248477935791, "learning_rate": 5.9806576235523405e-05, "loss": 0.14144766330718994, "step": 94220 }, { "epoch": 0.4045490842585199, "grad_norm": 0.1142013818025589, "learning_rate": 5.980226451540578e-05, "loss": 0.14134806394577026, "step": 94230 }, { "epoch": 0.40459201634853986, "grad_norm": 1.9092756509780884, "learning_rate": 5.979795279528816e-05, "loss": 0.1723836898803711, "step": 94240 }, { "epoch": 0.4046349484385599, "grad_norm": 0.13086585700511932, "learning_rate": 5.979364107517054e-05, "loss": 0.25963706970214845, "step": 94250 }, { "epoch": 0.4046778805285799, "grad_norm": 0.13701823353767395, "learning_rate": 5.97893293550529e-05, "loss": 0.2061779022216797, "step": 94260 }, { "epoch": 0.4047208126185999, "grad_norm": 0.02567523717880249, "learning_rate": 5.978501763493528e-05, "loss": 0.09171283841133118, "step": 94270 }, { "epoch": 0.4047637447086199, "grad_norm": 0.053751952946186066, "learning_rate": 5.9780705914817655e-05, "loss": 0.38718242645263673, "step": 94280 }, { "epoch": 0.4048066767986399, "grad_norm": 0.13871848583221436, "learning_rate": 5.977639419470003e-05, "loss": 0.1819378614425659, "step": 94290 }, { "epoch": 0.4048496088886599, "grad_norm": 0.02344353497028351, "learning_rate": 5.977208247458242e-05, "loss": 0.15964871644973755, "step": 94300 }, { "epoch": 0.40489254097867994, "grad_norm": 0.005774588789790869, "learning_rate": 5.9767770754464794e-05, "loss": 0.10087209939956665, "step": 94310 }, { "epoch": 0.4049354730686999, "grad_norm": 0.001752114505507052, "learning_rate": 5.976345903434717e-05, "loss": 0.11794418096542358, "step": 94320 }, { "epoch": 0.4049784051587199, "grad_norm": 0.23419494926929474, "learning_rate": 5.975914731422955e-05, "loss": 0.17973036766052247, "step": 94330 }, { "epoch": 0.40502133724873995, "grad_norm": 0.2058352679014206, "learning_rate": 5.975483559411191e-05, "loss": 0.29049394130706785, "step": 94340 }, { "epoch": 0.40506426933875994, "grad_norm": 5.32999324798584, "learning_rate": 5.975052387399429e-05, "loss": 0.29587693214416505, "step": 94350 }, { "epoch": 0.40510720142877993, "grad_norm": 0.037094105035066605, "learning_rate": 5.974621215387667e-05, "loss": 0.17742139101028442, "step": 94360 }, { "epoch": 0.40515013351879997, "grad_norm": 0.015149621292948723, "learning_rate": 5.9741900433759045e-05, "loss": 0.19037646055221558, "step": 94370 }, { "epoch": 0.40519306560881996, "grad_norm": 7.238763809204102, "learning_rate": 5.973758871364142e-05, "loss": 0.16131314039230346, "step": 94380 }, { "epoch": 0.40523599769884, "grad_norm": 0.09510552138090134, "learning_rate": 5.97332769935238e-05, "loss": 0.10398712158203124, "step": 94390 }, { "epoch": 0.40527892978886, "grad_norm": 0.030364129692316055, "learning_rate": 5.972896527340618e-05, "loss": 0.0476971834897995, "step": 94400 }, { "epoch": 0.40532186187888, "grad_norm": 0.006177715957164764, "learning_rate": 5.9724653553288554e-05, "loss": 0.1935707688331604, "step": 94410 }, { "epoch": 0.4053647939689, "grad_norm": 0.022529790177941322, "learning_rate": 5.9720341833170925e-05, "loss": 0.18455986976623534, "step": 94420 }, { "epoch": 0.40540772605892, "grad_norm": 0.0013470775447785854, "learning_rate": 5.97160301130533e-05, "loss": 0.13666459321975707, "step": 94430 }, { "epoch": 0.40545065814894, "grad_norm": 0.05355757102370262, "learning_rate": 5.971171839293568e-05, "loss": 0.12402044534683228, "step": 94440 }, { "epoch": 0.40549359023896003, "grad_norm": 3.122445821762085, "learning_rate": 5.970740667281806e-05, "loss": 0.3857335090637207, "step": 94450 }, { "epoch": 0.40553652232898, "grad_norm": 0.004708373919129372, "learning_rate": 5.9703094952700434e-05, "loss": 0.19442360401153563, "step": 94460 }, { "epoch": 0.405579454419, "grad_norm": 0.008243762888014317, "learning_rate": 5.969878323258281e-05, "loss": 0.2473069190979004, "step": 94470 }, { "epoch": 0.40562238650902005, "grad_norm": 0.0011318456381559372, "learning_rate": 5.969447151246519e-05, "loss": 0.04169896841049194, "step": 94480 }, { "epoch": 0.40566531859904004, "grad_norm": 3.41719388961792, "learning_rate": 5.9690159792347566e-05, "loss": 0.12270849943161011, "step": 94490 }, { "epoch": 0.40570825068906, "grad_norm": 0.03121386095881462, "learning_rate": 5.968584807222993e-05, "loss": 0.23815858364105225, "step": 94500 }, { "epoch": 0.40575118277908007, "grad_norm": 22.28656768798828, "learning_rate": 5.968153635211231e-05, "loss": 0.2772752523422241, "step": 94510 }, { "epoch": 0.40579411486910005, "grad_norm": 3.8086886405944824, "learning_rate": 5.967722463199469e-05, "loss": 0.2415098190307617, "step": 94520 }, { "epoch": 0.40583704695912004, "grad_norm": 0.04217388480901718, "learning_rate": 5.967291291187707e-05, "loss": 0.17928813695907592, "step": 94530 }, { "epoch": 0.4058799790491401, "grad_norm": 2.3201487064361572, "learning_rate": 5.9668601191759446e-05, "loss": 0.232806396484375, "step": 94540 }, { "epoch": 0.40592291113916007, "grad_norm": 25.63894271850586, "learning_rate": 5.966428947164182e-05, "loss": 0.29534387588500977, "step": 94550 }, { "epoch": 0.40596584322918006, "grad_norm": 0.24793045222759247, "learning_rate": 5.96599777515242e-05, "loss": 0.18396825790405275, "step": 94560 }, { "epoch": 0.4060087753192001, "grad_norm": 0.7542315721511841, "learning_rate": 5.965566603140658e-05, "loss": 0.2694231510162354, "step": 94570 }, { "epoch": 0.4060517074092201, "grad_norm": 1.3585526943206787, "learning_rate": 5.9651354311288955e-05, "loss": 0.30352447032928465, "step": 94580 }, { "epoch": 0.40609463949924013, "grad_norm": 5.630841255187988, "learning_rate": 5.964704259117132e-05, "loss": 0.2450554370880127, "step": 94590 }, { "epoch": 0.4061375715892601, "grad_norm": 0.04023003205657005, "learning_rate": 5.9642730871053697e-05, "loss": 0.11513957977294922, "step": 94600 }, { "epoch": 0.4061805036792801, "grad_norm": 0.9769871830940247, "learning_rate": 5.9638419150936074e-05, "loss": 0.08849529027938843, "step": 94610 }, { "epoch": 0.40622343576930015, "grad_norm": 0.661646842956543, "learning_rate": 5.963410743081845e-05, "loss": 0.4164388656616211, "step": 94620 }, { "epoch": 0.40626636785932013, "grad_norm": 9.560547828674316, "learning_rate": 5.962979571070083e-05, "loss": 0.2010348320007324, "step": 94630 }, { "epoch": 0.4063092999493401, "grad_norm": 2.1238622665405273, "learning_rate": 5.9625483990583206e-05, "loss": 0.07822906374931335, "step": 94640 }, { "epoch": 0.40635223203936016, "grad_norm": 0.9077053070068359, "learning_rate": 5.962117227046558e-05, "loss": 0.40604825019836427, "step": 94650 }, { "epoch": 0.40639516412938015, "grad_norm": 0.06327524781227112, "learning_rate": 5.961686055034796e-05, "loss": 0.09599577188491822, "step": 94660 }, { "epoch": 0.40643809621940014, "grad_norm": 0.869484007358551, "learning_rate": 5.961254883023033e-05, "loss": 0.14397214651107787, "step": 94670 }, { "epoch": 0.4064810283094202, "grad_norm": 0.007432250771671534, "learning_rate": 5.960823711011271e-05, "loss": 0.24939417839050293, "step": 94680 }, { "epoch": 0.40652396039944017, "grad_norm": 1.876278042793274, "learning_rate": 5.9603925389995086e-05, "loss": 0.10063667297363281, "step": 94690 }, { "epoch": 0.40656689248946015, "grad_norm": 1.803780198097229, "learning_rate": 5.959961366987746e-05, "loss": 0.28752832412719725, "step": 94700 }, { "epoch": 0.4066098245794802, "grad_norm": 0.41878649592399597, "learning_rate": 5.959530194975984e-05, "loss": 0.14709892272949218, "step": 94710 }, { "epoch": 0.4066527566695002, "grad_norm": 0.00410389993339777, "learning_rate": 5.959099022964222e-05, "loss": 0.0757517397403717, "step": 94720 }, { "epoch": 0.40669568875952017, "grad_norm": 0.4839799106121063, "learning_rate": 5.9586678509524595e-05, "loss": 0.21969540119171144, "step": 94730 }, { "epoch": 0.4067386208495402, "grad_norm": 0.0009927289793267846, "learning_rate": 5.958236678940697e-05, "loss": 0.08641666769981385, "step": 94740 }, { "epoch": 0.4067815529395602, "grad_norm": 0.07800525426864624, "learning_rate": 5.957805506928934e-05, "loss": 0.21268873214721679, "step": 94750 }, { "epoch": 0.4068244850295802, "grad_norm": 0.2969856560230255, "learning_rate": 5.957374334917172e-05, "loss": 0.20551798343658448, "step": 94760 }, { "epoch": 0.4068674171196002, "grad_norm": 3.2476003170013428, "learning_rate": 5.95694316290541e-05, "loss": 0.49648799896240237, "step": 94770 }, { "epoch": 0.4069103492096202, "grad_norm": 0.14998094737529755, "learning_rate": 5.9565119908936475e-05, "loss": 0.10299062728881836, "step": 94780 }, { "epoch": 0.4069532812996402, "grad_norm": 0.001216806354932487, "learning_rate": 5.956080818881885e-05, "loss": 0.3082596778869629, "step": 94790 }, { "epoch": 0.40699621338966024, "grad_norm": 0.010087787173688412, "learning_rate": 5.955649646870123e-05, "loss": 0.2633021593093872, "step": 94800 }, { "epoch": 0.40703914547968023, "grad_norm": 0.008466735482215881, "learning_rate": 5.955218474858361e-05, "loss": 0.22031636238098146, "step": 94810 }, { "epoch": 0.4070820775697003, "grad_norm": 2.877070903778076, "learning_rate": 5.9547873028465985e-05, "loss": 0.21617393493652343, "step": 94820 }, { "epoch": 0.40712500965972026, "grad_norm": 0.013431715779006481, "learning_rate": 5.954356130834835e-05, "loss": 0.39271633625030516, "step": 94830 }, { "epoch": 0.40716794174974025, "grad_norm": 0.016712641343474388, "learning_rate": 5.9539249588230726e-05, "loss": 0.055677926540374754, "step": 94840 }, { "epoch": 0.4072108738397603, "grad_norm": 0.01818036660552025, "learning_rate": 5.95349378681131e-05, "loss": 0.21650080680847167, "step": 94850 }, { "epoch": 0.4072538059297803, "grad_norm": 0.501991868019104, "learning_rate": 5.953062614799548e-05, "loss": 0.19998964071273803, "step": 94860 }, { "epoch": 0.40729673801980026, "grad_norm": 0.8588781356811523, "learning_rate": 5.952631442787786e-05, "loss": 0.36342079639434816, "step": 94870 }, { "epoch": 0.4073396701098203, "grad_norm": 0.020705191418528557, "learning_rate": 5.9522002707760235e-05, "loss": 0.1025011658668518, "step": 94880 }, { "epoch": 0.4073826021998403, "grad_norm": 0.8543851971626282, "learning_rate": 5.951769098764262e-05, "loss": 0.2673808574676514, "step": 94890 }, { "epoch": 0.4074255342898603, "grad_norm": 1.015897512435913, "learning_rate": 5.9513379267524997e-05, "loss": 0.375284481048584, "step": 94900 }, { "epoch": 0.4074684663798803, "grad_norm": 3.083343982696533, "learning_rate": 5.950906754740736e-05, "loss": 0.23698792457580567, "step": 94910 }, { "epoch": 0.4075113984699003, "grad_norm": 0.2520955801010132, "learning_rate": 5.950475582728974e-05, "loss": 0.29218809604644774, "step": 94920 }, { "epoch": 0.4075543305599203, "grad_norm": 1.5698250532150269, "learning_rate": 5.9500444107172115e-05, "loss": 0.17973952293395995, "step": 94930 }, { "epoch": 0.40759726264994034, "grad_norm": 1.5335873365402222, "learning_rate": 5.949613238705449e-05, "loss": 0.10807563066482544, "step": 94940 }, { "epoch": 0.4076401947399603, "grad_norm": 0.9593386054039001, "learning_rate": 5.949182066693687e-05, "loss": 0.04367157220840454, "step": 94950 }, { "epoch": 0.4076831268299803, "grad_norm": 0.003926532808691263, "learning_rate": 5.948750894681925e-05, "loss": 0.19878900051116943, "step": 94960 }, { "epoch": 0.40772605892000036, "grad_norm": 0.010859617032110691, "learning_rate": 5.9483197226701624e-05, "loss": 0.2264415979385376, "step": 94970 }, { "epoch": 0.40776899101002034, "grad_norm": 0.06168222054839134, "learning_rate": 5.9478885506584e-05, "loss": 0.1437380075454712, "step": 94980 }, { "epoch": 0.40781192310004033, "grad_norm": 0.030141912400722504, "learning_rate": 5.947457378646638e-05, "loss": 0.3269296407699585, "step": 94990 }, { "epoch": 0.4078548551900604, "grad_norm": 2.8415722846984863, "learning_rate": 5.947026206634875e-05, "loss": 0.16797289848327637, "step": 95000 }, { "epoch": 0.4078548551900604, "eval_loss": 0.42135003209114075, "eval_runtime": 27.162, "eval_samples_per_second": 3.682, "eval_steps_per_second": 3.682, "step": 95000 }, { "epoch": 0.40789778728008036, "grad_norm": 0.0548337884247303, "learning_rate": 5.946595034623113e-05, "loss": 0.1138761043548584, "step": 95010 }, { "epoch": 0.4079407193701004, "grad_norm": 0.05829789489507675, "learning_rate": 5.9461638626113504e-05, "loss": 0.22611532211303711, "step": 95020 }, { "epoch": 0.4079836514601204, "grad_norm": 8.076669692993164, "learning_rate": 5.945732690599588e-05, "loss": 0.2649090766906738, "step": 95030 }, { "epoch": 0.4080265835501404, "grad_norm": 1.6947511434555054, "learning_rate": 5.945301518587826e-05, "loss": 0.14479960203170777, "step": 95040 }, { "epoch": 0.4080695156401604, "grad_norm": 1.4850653409957886, "learning_rate": 5.9448703465760636e-05, "loss": 0.5386390209197998, "step": 95050 }, { "epoch": 0.4081124477301804, "grad_norm": 1.1699714660644531, "learning_rate": 5.9444391745643014e-05, "loss": 0.3699991226196289, "step": 95060 }, { "epoch": 0.4081553798202004, "grad_norm": 0.0662231370806694, "learning_rate": 5.944008002552539e-05, "loss": 0.20802528858184816, "step": 95070 }, { "epoch": 0.40819831191022043, "grad_norm": 5.796939373016357, "learning_rate": 5.9435768305407755e-05, "loss": 0.26230273246765134, "step": 95080 }, { "epoch": 0.4082412440002404, "grad_norm": 0.015105532482266426, "learning_rate": 5.943145658529013e-05, "loss": 0.18045105934143066, "step": 95090 }, { "epoch": 0.4082841760902604, "grad_norm": 0.05598805844783783, "learning_rate": 5.942714486517251e-05, "loss": 0.10713293552398681, "step": 95100 }, { "epoch": 0.40832710818028045, "grad_norm": 0.004577355924993753, "learning_rate": 5.9422833145054894e-05, "loss": 0.2067034959793091, "step": 95110 }, { "epoch": 0.40837004027030044, "grad_norm": 0.0111719761043787, "learning_rate": 5.941852142493727e-05, "loss": 0.1303783416748047, "step": 95120 }, { "epoch": 0.4084129723603204, "grad_norm": 2.2829782962799072, "learning_rate": 5.941420970481965e-05, "loss": 0.15735230445861817, "step": 95130 }, { "epoch": 0.40845590445034047, "grad_norm": 0.0009828386828303337, "learning_rate": 5.9409897984702026e-05, "loss": 0.34636309146881106, "step": 95140 }, { "epoch": 0.40849883654036045, "grad_norm": 0.04014360159635544, "learning_rate": 5.94055862645844e-05, "loss": 0.41129145622253416, "step": 95150 }, { "epoch": 0.40854176863038044, "grad_norm": 2.1843440532684326, "learning_rate": 5.940127454446677e-05, "loss": 0.25074067115783694, "step": 95160 }, { "epoch": 0.4085847007204005, "grad_norm": 0.015182511880993843, "learning_rate": 5.9396962824349144e-05, "loss": 0.18507274389266967, "step": 95170 }, { "epoch": 0.40862763281042047, "grad_norm": 3.183384418487549, "learning_rate": 5.939265110423152e-05, "loss": 0.25386662483215333, "step": 95180 }, { "epoch": 0.40867056490044046, "grad_norm": 9.70214557647705, "learning_rate": 5.93883393841139e-05, "loss": 0.29502627849578855, "step": 95190 }, { "epoch": 0.4087134969904605, "grad_norm": 2.1986756324768066, "learning_rate": 5.9384027663996276e-05, "loss": 0.4716000556945801, "step": 95200 }, { "epoch": 0.4087564290804805, "grad_norm": 2.2466917037963867, "learning_rate": 5.9379715943878653e-05, "loss": 0.3876519203186035, "step": 95210 }, { "epoch": 0.4087993611705005, "grad_norm": 1.9093042612075806, "learning_rate": 5.937540422376103e-05, "loss": 0.3541229009628296, "step": 95220 }, { "epoch": 0.4088422932605205, "grad_norm": 0.0312630720436573, "learning_rate": 5.937109250364341e-05, "loss": 0.2550151824951172, "step": 95230 }, { "epoch": 0.4088852253505405, "grad_norm": 0.13463670015335083, "learning_rate": 5.936678078352578e-05, "loss": 0.1566945195198059, "step": 95240 }, { "epoch": 0.40892815744056055, "grad_norm": 0.04153773561120033, "learning_rate": 5.9362469063408156e-05, "loss": 0.1386013150215149, "step": 95250 }, { "epoch": 0.40897108953058053, "grad_norm": 1.534032940864563, "learning_rate": 5.9358157343290533e-05, "loss": 0.13509927988052367, "step": 95260 }, { "epoch": 0.4090140216206005, "grad_norm": 0.010034897364675999, "learning_rate": 5.935384562317291e-05, "loss": 0.2785799503326416, "step": 95270 }, { "epoch": 0.40905695371062056, "grad_norm": 0.5196402668952942, "learning_rate": 5.934953390305529e-05, "loss": 0.2804682731628418, "step": 95280 }, { "epoch": 0.40909988580064055, "grad_norm": 0.3092677593231201, "learning_rate": 5.9345222182937665e-05, "loss": 0.14761451482772828, "step": 95290 }, { "epoch": 0.40914281789066054, "grad_norm": 0.0070295692421495914, "learning_rate": 5.934091046282004e-05, "loss": 0.09876445531845093, "step": 95300 }, { "epoch": 0.4091857499806806, "grad_norm": 0.0008774647722020745, "learning_rate": 5.933659874270242e-05, "loss": 0.04889726340770721, "step": 95310 }, { "epoch": 0.40922868207070057, "grad_norm": 0.04171142727136612, "learning_rate": 5.93322870225848e-05, "loss": 0.06542414426803589, "step": 95320 }, { "epoch": 0.40927161416072055, "grad_norm": 0.9298404455184937, "learning_rate": 5.932797530246717e-05, "loss": 0.2098308563232422, "step": 95330 }, { "epoch": 0.4093145462507406, "grad_norm": 6.4145660400390625, "learning_rate": 5.9323663582349545e-05, "loss": 0.40647087097167967, "step": 95340 }, { "epoch": 0.4093574783407606, "grad_norm": 0.0009694885229691863, "learning_rate": 5.931935186223192e-05, "loss": 0.2989876508712769, "step": 95350 }, { "epoch": 0.40940041043078057, "grad_norm": 0.12345188856124878, "learning_rate": 5.93150401421143e-05, "loss": 0.2628939628601074, "step": 95360 }, { "epoch": 0.4094433425208006, "grad_norm": 1.712976336479187, "learning_rate": 5.931072842199668e-05, "loss": 0.2650137424468994, "step": 95370 }, { "epoch": 0.4094862746108206, "grad_norm": 1.8961756229400635, "learning_rate": 5.9306416701879055e-05, "loss": 0.3032198905944824, "step": 95380 }, { "epoch": 0.4095292067008406, "grad_norm": 4.40653657913208, "learning_rate": 5.930210498176143e-05, "loss": 0.1689983367919922, "step": 95390 }, { "epoch": 0.40957213879086063, "grad_norm": 0.022152697667479515, "learning_rate": 5.929779326164381e-05, "loss": 0.13820401430130005, "step": 95400 }, { "epoch": 0.4096150708808806, "grad_norm": 2.1956160068511963, "learning_rate": 5.929348154152617e-05, "loss": 0.19749439954757692, "step": 95410 }, { "epoch": 0.4096580029709006, "grad_norm": 0.05495762452483177, "learning_rate": 5.928916982140855e-05, "loss": 0.35087130069732664, "step": 95420 }, { "epoch": 0.40970093506092065, "grad_norm": 1.406990647315979, "learning_rate": 5.928485810129093e-05, "loss": 0.28234546184539794, "step": 95430 }, { "epoch": 0.40974386715094063, "grad_norm": 1.1641638278961182, "learning_rate": 5.9280546381173305e-05, "loss": 0.3042471885681152, "step": 95440 }, { "epoch": 0.4097867992409607, "grad_norm": 1.1633776426315308, "learning_rate": 5.927623466105568e-05, "loss": 0.2691806793212891, "step": 95450 }, { "epoch": 0.40982973133098066, "grad_norm": 0.018605902791023254, "learning_rate": 5.927192294093806e-05, "loss": 0.0012621838599443437, "step": 95460 }, { "epoch": 0.40987266342100065, "grad_norm": 0.018871862441301346, "learning_rate": 5.926761122082044e-05, "loss": 0.30165996551513674, "step": 95470 }, { "epoch": 0.4099155955110207, "grad_norm": 0.6964154839515686, "learning_rate": 5.9263299500702815e-05, "loss": 0.19526594877243042, "step": 95480 }, { "epoch": 0.4099585276010407, "grad_norm": 0.030805258080363274, "learning_rate": 5.9258987780585185e-05, "loss": 0.39353554248809813, "step": 95490 }, { "epoch": 0.41000145969106067, "grad_norm": 2.8309528827667236, "learning_rate": 5.925467606046756e-05, "loss": 0.21136395931243895, "step": 95500 }, { "epoch": 0.4100443917810807, "grad_norm": 0.00797537062317133, "learning_rate": 5.925036434034994e-05, "loss": 0.39774174690246583, "step": 95510 }, { "epoch": 0.4100873238711007, "grad_norm": 0.0260153915733099, "learning_rate": 5.924605262023232e-05, "loss": 0.05480254292488098, "step": 95520 }, { "epoch": 0.4101302559611207, "grad_norm": 0.008767174556851387, "learning_rate": 5.9241740900114695e-05, "loss": 0.16443458795547486, "step": 95530 }, { "epoch": 0.4101731880511407, "grad_norm": 1.7948821783065796, "learning_rate": 5.923742917999707e-05, "loss": 0.3718928337097168, "step": 95540 }, { "epoch": 0.4102161201411607, "grad_norm": 0.07295206189155579, "learning_rate": 5.923311745987945e-05, "loss": 0.15985893011093139, "step": 95550 }, { "epoch": 0.4102590522311807, "grad_norm": 2.3034417629241943, "learning_rate": 5.9228805739761827e-05, "loss": 0.3065182685852051, "step": 95560 }, { "epoch": 0.41030198432120074, "grad_norm": 0.009524974972009659, "learning_rate": 5.92244940196442e-05, "loss": 0.21257288455963136, "step": 95570 }, { "epoch": 0.41034491641122073, "grad_norm": 0.3217892348766327, "learning_rate": 5.9220182299526575e-05, "loss": 0.07395639419555664, "step": 95580 }, { "epoch": 0.4103878485012407, "grad_norm": 0.016713928431272507, "learning_rate": 5.921587057940895e-05, "loss": 0.1688373327255249, "step": 95590 }, { "epoch": 0.41043078059126076, "grad_norm": 1.5730935335159302, "learning_rate": 5.921155885929133e-05, "loss": 0.35242772102355957, "step": 95600 }, { "epoch": 0.41047371268128074, "grad_norm": 0.1674475222826004, "learning_rate": 5.9207247139173707e-05, "loss": 0.1739656448364258, "step": 95610 }, { "epoch": 0.41051664477130073, "grad_norm": 0.7928306460380554, "learning_rate": 5.9202935419056084e-05, "loss": 0.23594801425933837, "step": 95620 }, { "epoch": 0.4105595768613208, "grad_norm": 2.0737826824188232, "learning_rate": 5.919862369893846e-05, "loss": 0.21393814086914062, "step": 95630 }, { "epoch": 0.41060250895134076, "grad_norm": 0.039834145456552505, "learning_rate": 5.919431197882084e-05, "loss": 0.24037718772888184, "step": 95640 }, { "epoch": 0.41064544104136075, "grad_norm": 0.0070899007841944695, "learning_rate": 5.9190000258703216e-05, "loss": 0.11014029979705811, "step": 95650 }, { "epoch": 0.4106883731313808, "grad_norm": 0.39396798610687256, "learning_rate": 5.918568853858558e-05, "loss": 0.21011085510253907, "step": 95660 }, { "epoch": 0.4107313052214008, "grad_norm": 1.3160746097564697, "learning_rate": 5.918137681846796e-05, "loss": 0.27565855979919435, "step": 95670 }, { "epoch": 0.4107742373114208, "grad_norm": 2.246863603591919, "learning_rate": 5.9177065098350334e-05, "loss": 0.4046793937683105, "step": 95680 }, { "epoch": 0.4108171694014408, "grad_norm": 2.2735283374786377, "learning_rate": 5.917275337823271e-05, "loss": 0.08375327587127686, "step": 95690 }, { "epoch": 0.4108601014914608, "grad_norm": 0.06356479972600937, "learning_rate": 5.916844165811509e-05, "loss": 0.12829431295394897, "step": 95700 }, { "epoch": 0.41090303358148084, "grad_norm": 0.8090630173683167, "learning_rate": 5.916412993799747e-05, "loss": 0.20278749465942383, "step": 95710 }, { "epoch": 0.4109459656715008, "grad_norm": 3.7976224422454834, "learning_rate": 5.915981821787985e-05, "loss": 0.37968668937683103, "step": 95720 }, { "epoch": 0.4109888977615208, "grad_norm": 0.013438595458865166, "learning_rate": 5.915550649776223e-05, "loss": 0.2663418292999268, "step": 95730 }, { "epoch": 0.41103182985154085, "grad_norm": 0.00032999878749251366, "learning_rate": 5.915119477764459e-05, "loss": 0.2249680519104004, "step": 95740 }, { "epoch": 0.41107476194156084, "grad_norm": 1.3823471069335938, "learning_rate": 5.914688305752697e-05, "loss": 0.17618353366851808, "step": 95750 }, { "epoch": 0.4111176940315808, "grad_norm": 1.6660122871398926, "learning_rate": 5.9142571337409346e-05, "loss": 0.23736302852630614, "step": 95760 }, { "epoch": 0.41116062612160087, "grad_norm": 0.014189798384904861, "learning_rate": 5.9138259617291724e-05, "loss": 0.209371018409729, "step": 95770 }, { "epoch": 0.41120355821162086, "grad_norm": 0.003713384736329317, "learning_rate": 5.91339478971741e-05, "loss": 0.1889081835746765, "step": 95780 }, { "epoch": 0.41124649030164084, "grad_norm": 0.03438572585582733, "learning_rate": 5.912963617705648e-05, "loss": 0.07586662769317627, "step": 95790 }, { "epoch": 0.4112894223916609, "grad_norm": 0.8181175589561462, "learning_rate": 5.9125324456938856e-05, "loss": 0.276668381690979, "step": 95800 }, { "epoch": 0.4113323544816809, "grad_norm": 0.10987686365842819, "learning_rate": 5.912101273682123e-05, "loss": 0.01944877505302429, "step": 95810 }, { "epoch": 0.41137528657170086, "grad_norm": 0.28580111265182495, "learning_rate": 5.9116701016703604e-05, "loss": 0.40301976203918455, "step": 95820 }, { "epoch": 0.4114182186617209, "grad_norm": 6.955211162567139, "learning_rate": 5.911238929658598e-05, "loss": 0.2764917850494385, "step": 95830 }, { "epoch": 0.4114611507517409, "grad_norm": 3.055581569671631, "learning_rate": 5.910807757646836e-05, "loss": 0.33856046199798584, "step": 95840 }, { "epoch": 0.4115040828417609, "grad_norm": 1.8550695180892944, "learning_rate": 5.9103765856350736e-05, "loss": 0.26514904499053954, "step": 95850 }, { "epoch": 0.4115470149317809, "grad_norm": 0.23635394871234894, "learning_rate": 5.909945413623311e-05, "loss": 0.3311293601989746, "step": 95860 }, { "epoch": 0.4115899470218009, "grad_norm": 0.018756048753857613, "learning_rate": 5.909514241611549e-05, "loss": 0.1716057538986206, "step": 95870 }, { "epoch": 0.41163287911182095, "grad_norm": 0.006415795534849167, "learning_rate": 5.909083069599787e-05, "loss": 0.07712118029594421, "step": 95880 }, { "epoch": 0.41167581120184094, "grad_norm": 0.05445994809269905, "learning_rate": 5.9086518975880245e-05, "loss": 0.2775907516479492, "step": 95890 }, { "epoch": 0.4117187432918609, "grad_norm": 0.17175628244876862, "learning_rate": 5.908220725576261e-05, "loss": 0.2930448055267334, "step": 95900 }, { "epoch": 0.41176167538188096, "grad_norm": 0.004957165569067001, "learning_rate": 5.9077895535644986e-05, "loss": 0.2307873010635376, "step": 95910 }, { "epoch": 0.41180460747190095, "grad_norm": 0.0006048490176908672, "learning_rate": 5.9073583815527364e-05, "loss": 0.27237510681152344, "step": 95920 }, { "epoch": 0.41184753956192094, "grad_norm": 1.6527477502822876, "learning_rate": 5.906927209540975e-05, "loss": 0.2211667776107788, "step": 95930 }, { "epoch": 0.411890471651941, "grad_norm": 0.8207837343215942, "learning_rate": 5.9064960375292125e-05, "loss": 0.18572959899902344, "step": 95940 }, { "epoch": 0.41193340374196097, "grad_norm": 2.7235312461853027, "learning_rate": 5.90606486551745e-05, "loss": 0.3151681423187256, "step": 95950 }, { "epoch": 0.41197633583198096, "grad_norm": 0.0987667366862297, "learning_rate": 5.905633693505688e-05, "loss": 0.27903921604156495, "step": 95960 }, { "epoch": 0.412019267922001, "grad_norm": 0.03398576378822327, "learning_rate": 5.905202521493926e-05, "loss": 0.09625001549720764, "step": 95970 }, { "epoch": 0.412062200012021, "grad_norm": 0.6507900357246399, "learning_rate": 5.904771349482162e-05, "loss": 0.11882567405700684, "step": 95980 }, { "epoch": 0.41210513210204097, "grad_norm": 1.7718161344528198, "learning_rate": 5.9043401774704e-05, "loss": 0.259985089302063, "step": 95990 }, { "epoch": 0.412148064192061, "grad_norm": 0.06375284492969513, "learning_rate": 5.9039090054586375e-05, "loss": 0.3681522846221924, "step": 96000 }, { "epoch": 0.412148064192061, "eval_loss": 0.4350675940513611, "eval_runtime": 27.252, "eval_samples_per_second": 3.669, "eval_steps_per_second": 3.669, "step": 96000 }, { "epoch": 0.412190996282081, "grad_norm": 22.450407028198242, "learning_rate": 5.903477833446875e-05, "loss": 0.2040266752243042, "step": 96010 }, { "epoch": 0.412233928372101, "grad_norm": 0.30055972933769226, "learning_rate": 5.903046661435113e-05, "loss": 0.14661065340042115, "step": 96020 }, { "epoch": 0.41227686046212103, "grad_norm": 0.19807764887809753, "learning_rate": 5.902615489423351e-05, "loss": 0.3193605661392212, "step": 96030 }, { "epoch": 0.412319792552141, "grad_norm": 0.03959206864237785, "learning_rate": 5.9021843174115885e-05, "loss": 0.24810662269592285, "step": 96040 }, { "epoch": 0.412362724642161, "grad_norm": 0.11264007538557053, "learning_rate": 5.901753145399826e-05, "loss": 0.3553786039352417, "step": 96050 }, { "epoch": 0.41240565673218105, "grad_norm": 0.16534768044948578, "learning_rate": 5.901321973388064e-05, "loss": 0.13765889406204224, "step": 96060 }, { "epoch": 0.41244858882220103, "grad_norm": 3.1089465618133545, "learning_rate": 5.900890801376301e-05, "loss": 0.5068204879760743, "step": 96070 }, { "epoch": 0.412491520912221, "grad_norm": 0.051101043820381165, "learning_rate": 5.900459629364539e-05, "loss": 0.22767856121063232, "step": 96080 }, { "epoch": 0.41253445300224106, "grad_norm": 0.2743438482284546, "learning_rate": 5.9000284573527765e-05, "loss": 0.0870218575000763, "step": 96090 }, { "epoch": 0.41257738509226105, "grad_norm": 1.2850000858306885, "learning_rate": 5.899597285341014e-05, "loss": 0.1808406949043274, "step": 96100 }, { "epoch": 0.4126203171822811, "grad_norm": 1.991323709487915, "learning_rate": 5.899166113329252e-05, "loss": 0.21970710754394532, "step": 96110 }, { "epoch": 0.4126632492723011, "grad_norm": 1.307265043258667, "learning_rate": 5.89873494131749e-05, "loss": 0.2848405599594116, "step": 96120 }, { "epoch": 0.41270618136232107, "grad_norm": 0.2070932537317276, "learning_rate": 5.8983037693057274e-05, "loss": 0.25841717720031737, "step": 96130 }, { "epoch": 0.4127491134523411, "grad_norm": 0.18773120641708374, "learning_rate": 5.897872597293965e-05, "loss": 0.07489589452743531, "step": 96140 }, { "epoch": 0.4127920455423611, "grad_norm": 0.01186260674148798, "learning_rate": 5.897441425282202e-05, "loss": 0.22005856037139893, "step": 96150 }, { "epoch": 0.4128349776323811, "grad_norm": 20.258750915527344, "learning_rate": 5.89701025327044e-05, "loss": 0.2249882698059082, "step": 96160 }, { "epoch": 0.4128779097224011, "grad_norm": 50.0006217956543, "learning_rate": 5.896579081258678e-05, "loss": 0.0812033772468567, "step": 96170 }, { "epoch": 0.4129208418124211, "grad_norm": 0.009035931900143623, "learning_rate": 5.8961479092469154e-05, "loss": 0.19636874198913573, "step": 96180 }, { "epoch": 0.4129637739024411, "grad_norm": 0.030025752261281013, "learning_rate": 5.895716737235153e-05, "loss": 0.030920588970184328, "step": 96190 }, { "epoch": 0.41300670599246114, "grad_norm": 0.6336904764175415, "learning_rate": 5.895285565223391e-05, "loss": 0.26514263153076173, "step": 96200 }, { "epoch": 0.41304963808248113, "grad_norm": 0.5668049454689026, "learning_rate": 5.8948543932116286e-05, "loss": 0.44312324523925783, "step": 96210 }, { "epoch": 0.4130925701725011, "grad_norm": 1.6628981828689575, "learning_rate": 5.8944232211998663e-05, "loss": 0.09441558122634888, "step": 96220 }, { "epoch": 0.41313550226252116, "grad_norm": 0.16483436524868011, "learning_rate": 5.893992049188103e-05, "loss": 0.3925278663635254, "step": 96230 }, { "epoch": 0.41317843435254115, "grad_norm": 0.0029956032522022724, "learning_rate": 5.8935608771763405e-05, "loss": 0.1006605863571167, "step": 96240 }, { "epoch": 0.41322136644256113, "grad_norm": 2.036149263381958, "learning_rate": 5.893129705164578e-05, "loss": 0.29947011470794677, "step": 96250 }, { "epoch": 0.4132642985325812, "grad_norm": 2.5441179275512695, "learning_rate": 5.892698533152816e-05, "loss": 0.49998650550842283, "step": 96260 }, { "epoch": 0.41330723062260116, "grad_norm": 0.021060464903712273, "learning_rate": 5.892267361141054e-05, "loss": 0.1435585379600525, "step": 96270 }, { "epoch": 0.41335016271262115, "grad_norm": 0.011224511079490185, "learning_rate": 5.8918361891292914e-05, "loss": 0.29232838153839114, "step": 96280 }, { "epoch": 0.4133930948026412, "grad_norm": 5.085150241851807, "learning_rate": 5.891405017117529e-05, "loss": 0.3620931625366211, "step": 96290 }, { "epoch": 0.4134360268926612, "grad_norm": 1.911069393157959, "learning_rate": 5.8909738451057675e-05, "loss": 0.45119342803955076, "step": 96300 }, { "epoch": 0.4134789589826812, "grad_norm": 2.3250467777252197, "learning_rate": 5.890542673094004e-05, "loss": 0.21939477920532227, "step": 96310 }, { "epoch": 0.4135218910727012, "grad_norm": 0.4628746211528778, "learning_rate": 5.8901115010822417e-05, "loss": 0.07731515765190125, "step": 96320 }, { "epoch": 0.4135648231627212, "grad_norm": 0.017649687826633453, "learning_rate": 5.8896803290704794e-05, "loss": 0.27119874954223633, "step": 96330 }, { "epoch": 0.41360775525274124, "grad_norm": 0.002815448446199298, "learning_rate": 5.889249157058717e-05, "loss": 0.1231507658958435, "step": 96340 }, { "epoch": 0.4136506873427612, "grad_norm": 0.06919790059328079, "learning_rate": 5.888817985046955e-05, "loss": 0.0627961814403534, "step": 96350 }, { "epoch": 0.4136936194327812, "grad_norm": 0.21034856140613556, "learning_rate": 5.8883868130351926e-05, "loss": 0.025909900665283203, "step": 96360 }, { "epoch": 0.41373655152280125, "grad_norm": 0.1855197697877884, "learning_rate": 5.88795564102343e-05, "loss": 0.2717406749725342, "step": 96370 }, { "epoch": 0.41377948361282124, "grad_norm": 2.263113498687744, "learning_rate": 5.887524469011668e-05, "loss": 0.19362281560897826, "step": 96380 }, { "epoch": 0.41382241570284123, "grad_norm": 0.03105918876826763, "learning_rate": 5.887093296999906e-05, "loss": 0.450689697265625, "step": 96390 }, { "epoch": 0.41386534779286127, "grad_norm": 0.005474720615893602, "learning_rate": 5.886662124988143e-05, "loss": 0.29455156326293946, "step": 96400 }, { "epoch": 0.41390827988288126, "grad_norm": 74.08353424072266, "learning_rate": 5.8862309529763806e-05, "loss": 0.2952181577682495, "step": 96410 }, { "epoch": 0.41395121197290125, "grad_norm": 0.029871761798858643, "learning_rate": 5.885799780964618e-05, "loss": 0.09040093421936035, "step": 96420 }, { "epoch": 0.4139941440629213, "grad_norm": 2.837618589401245, "learning_rate": 5.885368608952856e-05, "loss": 0.2539191722869873, "step": 96430 }, { "epoch": 0.4140370761529413, "grad_norm": 0.24232065677642822, "learning_rate": 5.884937436941094e-05, "loss": 0.1582349181175232, "step": 96440 }, { "epoch": 0.41408000824296126, "grad_norm": 3.9438517093658447, "learning_rate": 5.8845062649293315e-05, "loss": 0.16419055461883544, "step": 96450 }, { "epoch": 0.4141229403329813, "grad_norm": 3.0583577156066895, "learning_rate": 5.884075092917569e-05, "loss": 0.10935056209564209, "step": 96460 }, { "epoch": 0.4141658724230013, "grad_norm": 0.005584963131695986, "learning_rate": 5.883643920905807e-05, "loss": 0.05950572490692139, "step": 96470 }, { "epoch": 0.4142088045130213, "grad_norm": 0.0063299755565822124, "learning_rate": 5.8832127488940434e-05, "loss": 0.1775718331336975, "step": 96480 }, { "epoch": 0.4142517366030413, "grad_norm": 3.7204408645629883, "learning_rate": 5.882781576882281e-05, "loss": 0.365419864654541, "step": 96490 }, { "epoch": 0.4142946686930613, "grad_norm": 0.05920688807964325, "learning_rate": 5.882350404870519e-05, "loss": 0.21246140003204345, "step": 96500 }, { "epoch": 0.4143376007830813, "grad_norm": 1.7804783582687378, "learning_rate": 5.8819192328587566e-05, "loss": 0.13792389631271362, "step": 96510 }, { "epoch": 0.41438053287310134, "grad_norm": 5.373781204223633, "learning_rate": 5.881488060846995e-05, "loss": 0.40992259979248047, "step": 96520 }, { "epoch": 0.4144234649631213, "grad_norm": 6.659917831420898, "learning_rate": 5.881056888835233e-05, "loss": 0.42495036125183105, "step": 96530 }, { "epoch": 0.41446639705314137, "grad_norm": 0.08701377362012863, "learning_rate": 5.8806257168234705e-05, "loss": 0.10634886026382447, "step": 96540 }, { "epoch": 0.41450932914316135, "grad_norm": 0.10711979866027832, "learning_rate": 5.880194544811708e-05, "loss": 0.1699918031692505, "step": 96550 }, { "epoch": 0.41455226123318134, "grad_norm": 0.0040659112855792046, "learning_rate": 5.8797633727999446e-05, "loss": 0.08797727227210998, "step": 96560 }, { "epoch": 0.4145951933232014, "grad_norm": 0.21539410948753357, "learning_rate": 5.879332200788182e-05, "loss": 0.1780964970588684, "step": 96570 }, { "epoch": 0.41463812541322137, "grad_norm": 1.781267762184143, "learning_rate": 5.87890102877642e-05, "loss": 0.18408920764923095, "step": 96580 }, { "epoch": 0.41468105750324136, "grad_norm": 0.0017236159183084965, "learning_rate": 5.878469856764658e-05, "loss": 0.2172388792037964, "step": 96590 }, { "epoch": 0.4147239895932614, "grad_norm": 0.13159137964248657, "learning_rate": 5.8780386847528955e-05, "loss": 0.2666104078292847, "step": 96600 }, { "epoch": 0.4147669216832814, "grad_norm": 0.0033857496455311775, "learning_rate": 5.877607512741133e-05, "loss": 0.308839225769043, "step": 96610 }, { "epoch": 0.4148098537733014, "grad_norm": 5.20973014831543, "learning_rate": 5.877176340729371e-05, "loss": 0.15720083713531494, "step": 96620 }, { "epoch": 0.4148527858633214, "grad_norm": 0.9707467555999756, "learning_rate": 5.876745168717609e-05, "loss": 0.21422302722930908, "step": 96630 }, { "epoch": 0.4148957179533414, "grad_norm": 1.6356661319732666, "learning_rate": 5.876313996705846e-05, "loss": 0.14773917198181152, "step": 96640 }, { "epoch": 0.4149386500433614, "grad_norm": 0.115482859313488, "learning_rate": 5.8758828246940835e-05, "loss": 0.2064431428909302, "step": 96650 }, { "epoch": 0.41498158213338143, "grad_norm": 0.043903812766075134, "learning_rate": 5.875451652682321e-05, "loss": 0.3388453245162964, "step": 96660 }, { "epoch": 0.4150245142234014, "grad_norm": 0.010513490065932274, "learning_rate": 5.875020480670559e-05, "loss": 0.13178856372833253, "step": 96670 }, { "epoch": 0.4150674463134214, "grad_norm": 1.041566014289856, "learning_rate": 5.874589308658797e-05, "loss": 0.13429534435272217, "step": 96680 }, { "epoch": 0.41511037840344145, "grad_norm": 0.06681652367115021, "learning_rate": 5.8741581366470344e-05, "loss": 0.13894530534744262, "step": 96690 }, { "epoch": 0.41515331049346144, "grad_norm": 1.6779704093933105, "learning_rate": 5.873726964635272e-05, "loss": 0.31052098274230955, "step": 96700 }, { "epoch": 0.4151962425834814, "grad_norm": 2.150348424911499, "learning_rate": 5.87329579262351e-05, "loss": 0.18519182205200196, "step": 96710 }, { "epoch": 0.41523917467350147, "grad_norm": 10.719417572021484, "learning_rate": 5.872864620611746e-05, "loss": 0.2912954807281494, "step": 96720 }, { "epoch": 0.41528210676352145, "grad_norm": 1.351884126663208, "learning_rate": 5.872433448599984e-05, "loss": 0.3322085618972778, "step": 96730 }, { "epoch": 0.4153250388535415, "grad_norm": 4.0024189949035645, "learning_rate": 5.8720022765882224e-05, "loss": 0.0987987458705902, "step": 96740 }, { "epoch": 0.4153679709435615, "grad_norm": 0.0032169087789952755, "learning_rate": 5.87157110457646e-05, "loss": 0.29510352611541746, "step": 96750 }, { "epoch": 0.41541090303358147, "grad_norm": 1.3937362432479858, "learning_rate": 5.871139932564698e-05, "loss": 0.412169885635376, "step": 96760 }, { "epoch": 0.4154538351236015, "grad_norm": 0.1955924928188324, "learning_rate": 5.8707087605529356e-05, "loss": 0.2589648008346558, "step": 96770 }, { "epoch": 0.4154967672136215, "grad_norm": 0.37724238634109497, "learning_rate": 5.8702775885411734e-05, "loss": 0.20537447929382324, "step": 96780 }, { "epoch": 0.4155396993036415, "grad_norm": 1.6324776411056519, "learning_rate": 5.869846416529411e-05, "loss": 0.40856242179870605, "step": 96790 }, { "epoch": 0.41558263139366153, "grad_norm": 0.4038811922073364, "learning_rate": 5.869415244517649e-05, "loss": 0.20042769908905028, "step": 96800 }, { "epoch": 0.4156255634836815, "grad_norm": 3.4731292724609375, "learning_rate": 5.868984072505885e-05, "loss": 0.36867854595184324, "step": 96810 }, { "epoch": 0.4156684955737015, "grad_norm": 1.9070109128952026, "learning_rate": 5.868552900494123e-05, "loss": 0.25679750442504884, "step": 96820 }, { "epoch": 0.41571142766372154, "grad_norm": 0.04523250088095665, "learning_rate": 5.868121728482361e-05, "loss": 0.12389757633209228, "step": 96830 }, { "epoch": 0.41575435975374153, "grad_norm": 2.1402366161346436, "learning_rate": 5.8676905564705984e-05, "loss": 0.46518592834472655, "step": 96840 }, { "epoch": 0.4157972918437615, "grad_norm": 3.220548629760742, "learning_rate": 5.867259384458836e-05, "loss": 0.09400500059127807, "step": 96850 }, { "epoch": 0.41584022393378156, "grad_norm": 0.0733582079410553, "learning_rate": 5.866828212447074e-05, "loss": 0.18302125930786134, "step": 96860 }, { "epoch": 0.41588315602380155, "grad_norm": 0.02238384075462818, "learning_rate": 5.8663970404353116e-05, "loss": 0.24416136741638184, "step": 96870 }, { "epoch": 0.41592608811382153, "grad_norm": 1.7831960916519165, "learning_rate": 5.8659658684235494e-05, "loss": 0.2511852025985718, "step": 96880 }, { "epoch": 0.4159690202038416, "grad_norm": 0.06032947823405266, "learning_rate": 5.8655346964117864e-05, "loss": 0.2421018123626709, "step": 96890 }, { "epoch": 0.41601195229386156, "grad_norm": 0.001062124385498464, "learning_rate": 5.865103524400024e-05, "loss": 0.07756858468055725, "step": 96900 }, { "epoch": 0.41605488438388155, "grad_norm": 2.137861490249634, "learning_rate": 5.864672352388262e-05, "loss": 0.17247992753982544, "step": 96910 }, { "epoch": 0.4160978164739016, "grad_norm": 0.13371747732162476, "learning_rate": 5.8642411803764996e-05, "loss": 0.36290051937103274, "step": 96920 }, { "epoch": 0.4161407485639216, "grad_norm": 0.030226441100239754, "learning_rate": 5.8638100083647373e-05, "loss": 0.15298424959182738, "step": 96930 }, { "epoch": 0.41618368065394157, "grad_norm": 0.024282341822981834, "learning_rate": 5.863378836352975e-05, "loss": 0.1876778483390808, "step": 96940 }, { "epoch": 0.4162266127439616, "grad_norm": 0.005532603710889816, "learning_rate": 5.862947664341213e-05, "loss": 0.10570700168609619, "step": 96950 }, { "epoch": 0.4162695448339816, "grad_norm": 0.0017162829171866179, "learning_rate": 5.8625164923294506e-05, "loss": 0.22160534858703612, "step": 96960 }, { "epoch": 0.41631247692400164, "grad_norm": 1.481353759765625, "learning_rate": 5.8620853203176876e-05, "loss": 0.4609670162200928, "step": 96970 }, { "epoch": 0.4163554090140216, "grad_norm": 0.036570560187101364, "learning_rate": 5.8616541483059253e-05, "loss": 0.33320496082305906, "step": 96980 }, { "epoch": 0.4163983411040416, "grad_norm": 1.344350814819336, "learning_rate": 5.861222976294163e-05, "loss": 0.16209814548492432, "step": 96990 }, { "epoch": 0.41644127319406166, "grad_norm": 0.002683450933545828, "learning_rate": 5.860791804282401e-05, "loss": 0.17441989183425904, "step": 97000 }, { "epoch": 0.41644127319406166, "eval_loss": 0.4123704433441162, "eval_runtime": 27.0919, "eval_samples_per_second": 3.691, "eval_steps_per_second": 3.691, "step": 97000 }, { "epoch": 0.41648420528408164, "grad_norm": 0.1961876004934311, "learning_rate": 5.8603606322706385e-05, "loss": 0.07715705633163453, "step": 97010 }, { "epoch": 0.41652713737410163, "grad_norm": 0.01836606301367283, "learning_rate": 5.859929460258876e-05, "loss": 0.33584434986114503, "step": 97020 }, { "epoch": 0.4165700694641217, "grad_norm": 2.514364719390869, "learning_rate": 5.859498288247114e-05, "loss": 0.1604735016822815, "step": 97030 }, { "epoch": 0.41661300155414166, "grad_norm": 0.13551877439022064, "learning_rate": 5.859067116235352e-05, "loss": 0.05233732461929321, "step": 97040 }, { "epoch": 0.41665593364416165, "grad_norm": 0.32872146368026733, "learning_rate": 5.858635944223588e-05, "loss": 0.18588415384292603, "step": 97050 }, { "epoch": 0.4166988657341817, "grad_norm": 0.009478704072535038, "learning_rate": 5.858204772211826e-05, "loss": 0.32800989151000975, "step": 97060 }, { "epoch": 0.4167417978242017, "grad_norm": 2.2177982330322266, "learning_rate": 5.8577736002000636e-05, "loss": 0.40303688049316405, "step": 97070 }, { "epoch": 0.41678472991422166, "grad_norm": 2.8492250442504883, "learning_rate": 5.857342428188301e-05, "loss": 0.22970712184906006, "step": 97080 }, { "epoch": 0.4168276620042417, "grad_norm": 5.243490219116211, "learning_rate": 5.856911256176539e-05, "loss": 0.2636042833328247, "step": 97090 }, { "epoch": 0.4168705940942617, "grad_norm": 0.758879542350769, "learning_rate": 5.856480084164777e-05, "loss": 0.13969314098358154, "step": 97100 }, { "epoch": 0.4169135261842817, "grad_norm": 0.11793681234121323, "learning_rate": 5.856048912153015e-05, "loss": 0.13169124126434326, "step": 97110 }, { "epoch": 0.4169564582743017, "grad_norm": 0.13415993750095367, "learning_rate": 5.855617740141253e-05, "loss": 0.1481427550315857, "step": 97120 }, { "epoch": 0.4169993903643217, "grad_norm": 1.363560676574707, "learning_rate": 5.855186568129491e-05, "loss": 0.22657833099365235, "step": 97130 }, { "epoch": 0.4170423224543417, "grad_norm": 2.9042086601257324, "learning_rate": 5.854755396117727e-05, "loss": 0.14560015201568605, "step": 97140 }, { "epoch": 0.41708525454436174, "grad_norm": 1.215306282043457, "learning_rate": 5.854324224105965e-05, "loss": 0.36920900344848634, "step": 97150 }, { "epoch": 0.4171281866343817, "grad_norm": 1.1477103233337402, "learning_rate": 5.8538930520942025e-05, "loss": 0.16628166437149047, "step": 97160 }, { "epoch": 0.41717111872440177, "grad_norm": 3.602811574935913, "learning_rate": 5.85346188008244e-05, "loss": 0.2822127819061279, "step": 97170 }, { "epoch": 0.41721405081442176, "grad_norm": 0.006275674793869257, "learning_rate": 5.853030708070678e-05, "loss": 0.1495327353477478, "step": 97180 }, { "epoch": 0.41725698290444174, "grad_norm": 0.0992029532790184, "learning_rate": 5.852599536058916e-05, "loss": 0.2783435106277466, "step": 97190 }, { "epoch": 0.4172999149944618, "grad_norm": 1.2494653463363647, "learning_rate": 5.8521683640471535e-05, "loss": 0.07135303020477295, "step": 97200 }, { "epoch": 0.41734284708448177, "grad_norm": 1.0946789979934692, "learning_rate": 5.851737192035391e-05, "loss": 0.25925579071044924, "step": 97210 }, { "epoch": 0.41738577917450176, "grad_norm": 1.9514451026916504, "learning_rate": 5.851306020023628e-05, "loss": 0.4609402656555176, "step": 97220 }, { "epoch": 0.4174287112645218, "grad_norm": 0.012375736609101295, "learning_rate": 5.850874848011866e-05, "loss": 0.3152902603149414, "step": 97230 }, { "epoch": 0.4174716433545418, "grad_norm": 0.3962627649307251, "learning_rate": 5.850443676000104e-05, "loss": 0.2831923007965088, "step": 97240 }, { "epoch": 0.4175145754445618, "grad_norm": 0.19984892010688782, "learning_rate": 5.8500125039883415e-05, "loss": 0.010816796123981476, "step": 97250 }, { "epoch": 0.4175575075345818, "grad_norm": 0.09338950365781784, "learning_rate": 5.849581331976579e-05, "loss": 0.3447253704071045, "step": 97260 }, { "epoch": 0.4176004396246018, "grad_norm": 0.032562434673309326, "learning_rate": 5.849150159964817e-05, "loss": 0.14689211845397948, "step": 97270 }, { "epoch": 0.4176433717146218, "grad_norm": 0.1437419056892395, "learning_rate": 5.848718987953055e-05, "loss": 0.1887149214744568, "step": 97280 }, { "epoch": 0.41768630380464183, "grad_norm": 0.8966065049171448, "learning_rate": 5.8482878159412924e-05, "loss": 0.11084823608398438, "step": 97290 }, { "epoch": 0.4177292358946618, "grad_norm": 0.007662808522582054, "learning_rate": 5.847856643929529e-05, "loss": 0.32145259380340574, "step": 97300 }, { "epoch": 0.4177721679846818, "grad_norm": 0.02117268368601799, "learning_rate": 5.8474254719177665e-05, "loss": 0.044441819190979004, "step": 97310 }, { "epoch": 0.41781510007470185, "grad_norm": 0.01486627385020256, "learning_rate": 5.846994299906004e-05, "loss": 0.5016047477722168, "step": 97320 }, { "epoch": 0.41785803216472184, "grad_norm": 0.8546322584152222, "learning_rate": 5.8465631278942427e-05, "loss": 0.32282209396362305, "step": 97330 }, { "epoch": 0.4179009642547418, "grad_norm": 22.95284080505371, "learning_rate": 5.8461319558824804e-05, "loss": 0.21673707962036132, "step": 97340 }, { "epoch": 0.41794389634476187, "grad_norm": 0.05270211771130562, "learning_rate": 5.845700783870718e-05, "loss": 0.06792616844177246, "step": 97350 }, { "epoch": 0.41798682843478185, "grad_norm": 0.16774402558803558, "learning_rate": 5.845269611858956e-05, "loss": 0.33600101470947263, "step": 97360 }, { "epoch": 0.41802976052480184, "grad_norm": 0.011165248230099678, "learning_rate": 5.8448384398471936e-05, "loss": 0.1859840750694275, "step": 97370 }, { "epoch": 0.4180726926148219, "grad_norm": 1.0918728113174438, "learning_rate": 5.84440726783543e-05, "loss": 0.16099945306777955, "step": 97380 }, { "epoch": 0.41811562470484187, "grad_norm": 1.3140350580215454, "learning_rate": 5.843976095823668e-05, "loss": 0.29539175033569337, "step": 97390 }, { "epoch": 0.4181585567948619, "grad_norm": 0.12994621694087982, "learning_rate": 5.8435449238119054e-05, "loss": 0.38184683322906493, "step": 97400 }, { "epoch": 0.4182014888848819, "grad_norm": 0.04722442105412483, "learning_rate": 5.843113751800143e-05, "loss": 0.25283255577087405, "step": 97410 }, { "epoch": 0.4182444209749019, "grad_norm": 0.008985154330730438, "learning_rate": 5.842682579788381e-05, "loss": 0.22941651344299316, "step": 97420 }, { "epoch": 0.41828735306492193, "grad_norm": 0.011790527030825615, "learning_rate": 5.8422514077766186e-05, "loss": 0.2190561532974243, "step": 97430 }, { "epoch": 0.4183302851549419, "grad_norm": 0.33150047063827515, "learning_rate": 5.8418202357648564e-05, "loss": 0.26463005542755125, "step": 97440 }, { "epoch": 0.4183732172449619, "grad_norm": 0.11258593201637268, "learning_rate": 5.841389063753094e-05, "loss": 0.07938405871391296, "step": 97450 }, { "epoch": 0.41841614933498195, "grad_norm": 0.0037087793461978436, "learning_rate": 5.840957891741331e-05, "loss": 0.10488677024841309, "step": 97460 }, { "epoch": 0.41845908142500193, "grad_norm": 3.129962682723999, "learning_rate": 5.840526719729569e-05, "loss": 0.5519233703613281, "step": 97470 }, { "epoch": 0.4185020135150219, "grad_norm": 0.023688072338700294, "learning_rate": 5.8400955477178066e-05, "loss": 0.04940264523029327, "step": 97480 }, { "epoch": 0.41854494560504196, "grad_norm": 0.45331892371177673, "learning_rate": 5.8396643757060444e-05, "loss": 0.23521509170532226, "step": 97490 }, { "epoch": 0.41858787769506195, "grad_norm": 1.5123122930526733, "learning_rate": 5.839233203694282e-05, "loss": 0.3438755512237549, "step": 97500 }, { "epoch": 0.41863080978508194, "grad_norm": 0.016864225268363953, "learning_rate": 5.83880203168252e-05, "loss": 0.2781703948974609, "step": 97510 }, { "epoch": 0.418673741875102, "grad_norm": 0.37317079305648804, "learning_rate": 5.8383708596707576e-05, "loss": 0.3207017183303833, "step": 97520 }, { "epoch": 0.41871667396512197, "grad_norm": 1.5420507192611694, "learning_rate": 5.837939687658995e-05, "loss": 0.4660184383392334, "step": 97530 }, { "epoch": 0.41875960605514195, "grad_norm": 3.5812125205993652, "learning_rate": 5.837508515647233e-05, "loss": 0.3520195960998535, "step": 97540 }, { "epoch": 0.418802538145162, "grad_norm": 0.13337750732898712, "learning_rate": 5.83707734363547e-05, "loss": 0.26060965061187746, "step": 97550 }, { "epoch": 0.418845470235182, "grad_norm": 0.35369372367858887, "learning_rate": 5.836646171623708e-05, "loss": 0.22286348342895507, "step": 97560 }, { "epoch": 0.41888840232520197, "grad_norm": 2.425368070602417, "learning_rate": 5.8362149996119456e-05, "loss": 0.17919390201568602, "step": 97570 }, { "epoch": 0.418931334415222, "grad_norm": 0.0627279132604599, "learning_rate": 5.835783827600183e-05, "loss": 0.26877822875976565, "step": 97580 }, { "epoch": 0.418974266505242, "grad_norm": 2.9158642292022705, "learning_rate": 5.835352655588421e-05, "loss": 0.10325750112533569, "step": 97590 }, { "epoch": 0.41901719859526204, "grad_norm": 0.11321698129177094, "learning_rate": 5.834921483576659e-05, "loss": 0.19380873441696167, "step": 97600 }, { "epoch": 0.41906013068528203, "grad_norm": 0.023406973108649254, "learning_rate": 5.8344903115648965e-05, "loss": 0.17096658945083618, "step": 97610 }, { "epoch": 0.419103062775302, "grad_norm": 0.08598892390727997, "learning_rate": 5.834059139553134e-05, "loss": 0.08025979399681091, "step": 97620 }, { "epoch": 0.41914599486532206, "grad_norm": 0.0026491612661629915, "learning_rate": 5.8336279675413706e-05, "loss": 0.16931982040405275, "step": 97630 }, { "epoch": 0.41918892695534204, "grad_norm": 0.004537897650152445, "learning_rate": 5.8331967955296084e-05, "loss": 0.17108237743377686, "step": 97640 }, { "epoch": 0.41923185904536203, "grad_norm": 0.19760261476039886, "learning_rate": 5.832765623517846e-05, "loss": 0.21934175491333008, "step": 97650 }, { "epoch": 0.4192747911353821, "grad_norm": 1.2101079225540161, "learning_rate": 5.832334451506084e-05, "loss": 0.3915103435516357, "step": 97660 }, { "epoch": 0.41931772322540206, "grad_norm": 0.015090469270944595, "learning_rate": 5.8319032794943216e-05, "loss": 0.13569951057434082, "step": 97670 }, { "epoch": 0.41936065531542205, "grad_norm": 0.014168272726237774, "learning_rate": 5.831472107482559e-05, "loss": 0.22858223915100098, "step": 97680 }, { "epoch": 0.4194035874054421, "grad_norm": 0.03062673658132553, "learning_rate": 5.831040935470797e-05, "loss": 0.23994054794311523, "step": 97690 }, { "epoch": 0.4194465194954621, "grad_norm": 0.020169761031866074, "learning_rate": 5.830609763459035e-05, "loss": 0.22672290802001954, "step": 97700 }, { "epoch": 0.41948945158548206, "grad_norm": 6.934675693511963, "learning_rate": 5.830178591447272e-05, "loss": 0.3498288869857788, "step": 97710 }, { "epoch": 0.4195323836755021, "grad_norm": 0.15747320652008057, "learning_rate": 5.8297474194355096e-05, "loss": 0.3790154457092285, "step": 97720 }, { "epoch": 0.4195753157655221, "grad_norm": 0.33371075987815857, "learning_rate": 5.829316247423747e-05, "loss": 0.14788826704025268, "step": 97730 }, { "epoch": 0.4196182478555421, "grad_norm": 0.058594830334186554, "learning_rate": 5.828885075411985e-05, "loss": 0.5652408599853516, "step": 97740 }, { "epoch": 0.4196611799455621, "grad_norm": 0.15235500037670135, "learning_rate": 5.828453903400223e-05, "loss": 0.16906936168670655, "step": 97750 }, { "epoch": 0.4197041120355821, "grad_norm": 0.10869899392127991, "learning_rate": 5.8280227313884605e-05, "loss": 0.12809311151504515, "step": 97760 }, { "epoch": 0.4197470441256021, "grad_norm": 0.16404810547828674, "learning_rate": 5.827591559376698e-05, "loss": 0.11475526094436646, "step": 97770 }, { "epoch": 0.41978997621562214, "grad_norm": 1.0307124853134155, "learning_rate": 5.827160387364936e-05, "loss": 0.22724244594573975, "step": 97780 }, { "epoch": 0.4198329083056421, "grad_norm": 0.008609895594418049, "learning_rate": 5.826729215353173e-05, "loss": 0.23866071701049804, "step": 97790 }, { "epoch": 0.4198758403956621, "grad_norm": 3.9870755672454834, "learning_rate": 5.826298043341411e-05, "loss": 0.21454358100891113, "step": 97800 }, { "epoch": 0.41991877248568216, "grad_norm": 4.111325740814209, "learning_rate": 5.8258668713296485e-05, "loss": 0.10587308406829835, "step": 97810 }, { "epoch": 0.41996170457570214, "grad_norm": 0.05987037718296051, "learning_rate": 5.825435699317886e-05, "loss": 0.22635457515716553, "step": 97820 }, { "epoch": 0.4200046366657222, "grad_norm": 1.7137806415557861, "learning_rate": 5.825004527306124e-05, "loss": 0.3221606731414795, "step": 97830 }, { "epoch": 0.4200475687557422, "grad_norm": 0.037945330142974854, "learning_rate": 5.824573355294362e-05, "loss": 0.026730722188949584, "step": 97840 }, { "epoch": 0.42009050084576216, "grad_norm": 0.6939473152160645, "learning_rate": 5.8241421832825994e-05, "loss": 0.24455561637878417, "step": 97850 }, { "epoch": 0.4201334329357822, "grad_norm": 1.968237042427063, "learning_rate": 5.823711011270837e-05, "loss": 0.4216612815856934, "step": 97860 }, { "epoch": 0.4201763650258022, "grad_norm": 0.005326097831130028, "learning_rate": 5.823279839259075e-05, "loss": 0.1221767544746399, "step": 97870 }, { "epoch": 0.4202192971158222, "grad_norm": 4.377943992614746, "learning_rate": 5.822848667247311e-05, "loss": 0.3343919277191162, "step": 97880 }, { "epoch": 0.4202622292058422, "grad_norm": 2.684236764907837, "learning_rate": 5.822417495235549e-05, "loss": 0.25210669040679934, "step": 97890 }, { "epoch": 0.4203051612958622, "grad_norm": 0.35695880651474, "learning_rate": 5.821986323223787e-05, "loss": 0.11876271963119507, "step": 97900 }, { "epoch": 0.4203480933858822, "grad_norm": 0.12343742698431015, "learning_rate": 5.8215551512120245e-05, "loss": 0.15900148153305055, "step": 97910 }, { "epoch": 0.42039102547590224, "grad_norm": 0.5640662312507629, "learning_rate": 5.821123979200262e-05, "loss": 0.11388663053512574, "step": 97920 }, { "epoch": 0.4204339575659222, "grad_norm": 3.1699492931365967, "learning_rate": 5.8206928071885006e-05, "loss": 0.20103952884674073, "step": 97930 }, { "epoch": 0.4204768896559422, "grad_norm": 0.04569260776042938, "learning_rate": 5.8202616351767383e-05, "loss": 0.10158834457397461, "step": 97940 }, { "epoch": 0.42051982174596225, "grad_norm": 1.2294700145721436, "learning_rate": 5.819830463164976e-05, "loss": 0.23906416893005372, "step": 97950 }, { "epoch": 0.42056275383598224, "grad_norm": 0.3550843596458435, "learning_rate": 5.8193992911532125e-05, "loss": 0.0822509467601776, "step": 97960 }, { "epoch": 0.4206056859260022, "grad_norm": 2.002542734146118, "learning_rate": 5.81896811914145e-05, "loss": 0.2353053092956543, "step": 97970 }, { "epoch": 0.42064861801602227, "grad_norm": 0.06945760548114777, "learning_rate": 5.818536947129688e-05, "loss": 0.236065673828125, "step": 97980 }, { "epoch": 0.42069155010604226, "grad_norm": 0.017344938591122627, "learning_rate": 5.818105775117926e-05, "loss": 0.11889860630035401, "step": 97990 }, { "epoch": 0.42073448219606224, "grad_norm": 0.03761398419737816, "learning_rate": 5.8176746031061634e-05, "loss": 0.2174436092376709, "step": 98000 }, { "epoch": 0.42073448219606224, "eval_loss": 0.4053749442100525, "eval_runtime": 27.2888, "eval_samples_per_second": 3.665, "eval_steps_per_second": 3.665, "step": 98000 }, { "epoch": 0.4207774142860823, "grad_norm": 0.5761705040931702, "learning_rate": 5.817243431094401e-05, "loss": 0.08439416885375976, "step": 98010 }, { "epoch": 0.42082034637610227, "grad_norm": 0.11438914388418198, "learning_rate": 5.816812259082639e-05, "loss": 0.08448938131332398, "step": 98020 }, { "epoch": 0.4208632784661223, "grad_norm": 0.05054575949907303, "learning_rate": 5.8163810870708766e-05, "loss": 0.34284517765045164, "step": 98030 }, { "epoch": 0.4209062105561423, "grad_norm": 1.153587818145752, "learning_rate": 5.8159499150591137e-05, "loss": 0.23103575706481932, "step": 98040 }, { "epoch": 0.4209491426461623, "grad_norm": 0.6570166349411011, "learning_rate": 5.8155187430473514e-05, "loss": 0.5498339176177979, "step": 98050 }, { "epoch": 0.42099207473618233, "grad_norm": 1.3087819814682007, "learning_rate": 5.815087571035589e-05, "loss": 0.23551971912384034, "step": 98060 }, { "epoch": 0.4210350068262023, "grad_norm": 0.23769740760326385, "learning_rate": 5.814656399023827e-05, "loss": 0.347845721244812, "step": 98070 }, { "epoch": 0.4210779389162223, "grad_norm": 1.0044053792953491, "learning_rate": 5.8142252270120646e-05, "loss": 0.09060815572738648, "step": 98080 }, { "epoch": 0.42112087100624235, "grad_norm": 1.1900349855422974, "learning_rate": 5.813794055000302e-05, "loss": 0.2937170028686523, "step": 98090 }, { "epoch": 0.42116380309626233, "grad_norm": 0.9972975254058838, "learning_rate": 5.81336288298854e-05, "loss": 0.04283437132835388, "step": 98100 }, { "epoch": 0.4212067351862823, "grad_norm": 2.356654644012451, "learning_rate": 5.812931710976778e-05, "loss": 0.14631413221359252, "step": 98110 }, { "epoch": 0.42124966727630236, "grad_norm": 5.286679267883301, "learning_rate": 5.812500538965014e-05, "loss": 0.2211087465286255, "step": 98120 }, { "epoch": 0.42129259936632235, "grad_norm": 2.404400587081909, "learning_rate": 5.812069366953252e-05, "loss": 0.20612568855285646, "step": 98130 }, { "epoch": 0.42133553145634234, "grad_norm": 0.02629181742668152, "learning_rate": 5.8116381949414896e-05, "loss": 0.05725756883621216, "step": 98140 }, { "epoch": 0.4213784635463624, "grad_norm": 1.1704928874969482, "learning_rate": 5.811207022929728e-05, "loss": 0.35143492221832273, "step": 98150 }, { "epoch": 0.42142139563638237, "grad_norm": 0.5044357180595398, "learning_rate": 5.810775850917966e-05, "loss": 0.3639864206314087, "step": 98160 }, { "epoch": 0.42146432772640235, "grad_norm": 0.005088315811008215, "learning_rate": 5.8103446789062035e-05, "loss": 0.24358859062194824, "step": 98170 }, { "epoch": 0.4215072598164224, "grad_norm": 0.05479007959365845, "learning_rate": 5.809913506894441e-05, "loss": 0.20665128231048585, "step": 98180 }, { "epoch": 0.4215501919064424, "grad_norm": 0.9962042570114136, "learning_rate": 5.809482334882679e-05, "loss": 0.29216752052307127, "step": 98190 }, { "epoch": 0.42159312399646237, "grad_norm": 0.5849328637123108, "learning_rate": 5.809051162870917e-05, "loss": 0.3833322286605835, "step": 98200 }, { "epoch": 0.4216360560864824, "grad_norm": 0.5021851658821106, "learning_rate": 5.808619990859153e-05, "loss": 0.39357795715332033, "step": 98210 }, { "epoch": 0.4216789881765024, "grad_norm": 0.338433176279068, "learning_rate": 5.808188818847391e-05, "loss": 0.12214242219924927, "step": 98220 }, { "epoch": 0.4217219202665224, "grad_norm": 3.6846652030944824, "learning_rate": 5.8077576468356286e-05, "loss": 0.24050602912902833, "step": 98230 }, { "epoch": 0.42176485235654243, "grad_norm": 2.1717207431793213, "learning_rate": 5.807326474823866e-05, "loss": 0.25009167194366455, "step": 98240 }, { "epoch": 0.4218077844465624, "grad_norm": 0.14511777460575104, "learning_rate": 5.806895302812104e-05, "loss": 0.12174754142761231, "step": 98250 }, { "epoch": 0.42185071653658246, "grad_norm": 0.389323353767395, "learning_rate": 5.806464130800342e-05, "loss": 0.16142910718917847, "step": 98260 }, { "epoch": 0.42189364862660245, "grad_norm": 2.4261882305145264, "learning_rate": 5.8060329587885795e-05, "loss": 0.391825270652771, "step": 98270 }, { "epoch": 0.42193658071662243, "grad_norm": 0.28029099106788635, "learning_rate": 5.805601786776817e-05, "loss": 0.1543244242668152, "step": 98280 }, { "epoch": 0.4219795128066425, "grad_norm": 1.7710871696472168, "learning_rate": 5.805170614765054e-05, "loss": 0.37169885635375977, "step": 98290 }, { "epoch": 0.42202244489666246, "grad_norm": 0.004360601771622896, "learning_rate": 5.804739442753292e-05, "loss": 0.16414964199066162, "step": 98300 }, { "epoch": 0.42206537698668245, "grad_norm": 0.01507270336151123, "learning_rate": 5.80430827074153e-05, "loss": 0.18333516120910645, "step": 98310 }, { "epoch": 0.4221083090767025, "grad_norm": 1.3948227167129517, "learning_rate": 5.8038770987297675e-05, "loss": 0.18821496963500978, "step": 98320 }, { "epoch": 0.4221512411667225, "grad_norm": 1.3636878728866577, "learning_rate": 5.803445926718005e-05, "loss": 0.14322640895843505, "step": 98330 }, { "epoch": 0.42219417325674247, "grad_norm": 0.031215589493513107, "learning_rate": 5.803014754706243e-05, "loss": 0.1651058316230774, "step": 98340 }, { "epoch": 0.4222371053467625, "grad_norm": 2.045180082321167, "learning_rate": 5.802583582694481e-05, "loss": 0.16372413635253907, "step": 98350 }, { "epoch": 0.4222800374367825, "grad_norm": 6.121435165405273, "learning_rate": 5.8021524106827184e-05, "loss": 0.15840452909469604, "step": 98360 }, { "epoch": 0.4223229695268025, "grad_norm": 23.101932525634766, "learning_rate": 5.8017212386709555e-05, "loss": 0.16662964820861817, "step": 98370 }, { "epoch": 0.4223659016168225, "grad_norm": 1.8428597450256348, "learning_rate": 5.801290066659193e-05, "loss": 0.3516210079193115, "step": 98380 }, { "epoch": 0.4224088337068425, "grad_norm": 0.46591079235076904, "learning_rate": 5.800858894647431e-05, "loss": 0.1679096817970276, "step": 98390 }, { "epoch": 0.4224517657968625, "grad_norm": 3.5978612899780273, "learning_rate": 5.800427722635669e-05, "loss": 0.2919458866119385, "step": 98400 }, { "epoch": 0.42249469788688254, "grad_norm": 0.003413701429963112, "learning_rate": 5.7999965506239064e-05, "loss": 0.18819609880447388, "step": 98410 }, { "epoch": 0.42253762997690253, "grad_norm": 0.2167656570672989, "learning_rate": 5.799565378612144e-05, "loss": 0.08948023915290833, "step": 98420 }, { "epoch": 0.4225805620669225, "grad_norm": 0.01767050288617611, "learning_rate": 5.799134206600382e-05, "loss": 0.40622797012329104, "step": 98430 }, { "epoch": 0.42262349415694256, "grad_norm": 0.04388017579913139, "learning_rate": 5.7987030345886196e-05, "loss": 0.2401859998703003, "step": 98440 }, { "epoch": 0.42266642624696255, "grad_norm": 3.0155787467956543, "learning_rate": 5.798271862576856e-05, "loss": 0.3773958683013916, "step": 98450 }, { "epoch": 0.4227093583369826, "grad_norm": 1.8744343519210815, "learning_rate": 5.797840690565094e-05, "loss": 0.21185708045959473, "step": 98460 }, { "epoch": 0.4227522904270026, "grad_norm": 0.007587990257889032, "learning_rate": 5.7974095185533315e-05, "loss": 0.2657394647598267, "step": 98470 }, { "epoch": 0.42279522251702256, "grad_norm": 0.4172123670578003, "learning_rate": 5.796978346541569e-05, "loss": 0.076595139503479, "step": 98480 }, { "epoch": 0.4228381546070426, "grad_norm": 1.0345379114151, "learning_rate": 5.796547174529807e-05, "loss": 0.3573399782180786, "step": 98490 }, { "epoch": 0.4228810866970626, "grad_norm": 0.10183721035718918, "learning_rate": 5.796116002518045e-05, "loss": 0.13347251415252687, "step": 98500 }, { "epoch": 0.4229240187870826, "grad_norm": 0.05460893362760544, "learning_rate": 5.7956848305062824e-05, "loss": 0.0026408961042761804, "step": 98510 }, { "epoch": 0.4229669508771026, "grad_norm": 1.4982709884643555, "learning_rate": 5.795253658494521e-05, "loss": 0.24210147857666015, "step": 98520 }, { "epoch": 0.4230098829671226, "grad_norm": 0.048997290432453156, "learning_rate": 5.794822486482757e-05, "loss": 0.1325900673866272, "step": 98530 }, { "epoch": 0.4230528150571426, "grad_norm": 2.290670156478882, "learning_rate": 5.794391314470995e-05, "loss": 0.08407641649246216, "step": 98540 }, { "epoch": 0.42309574714716264, "grad_norm": 0.0597001388669014, "learning_rate": 5.793960142459233e-05, "loss": 0.3538869619369507, "step": 98550 }, { "epoch": 0.4231386792371826, "grad_norm": 2.8032407760620117, "learning_rate": 5.7935289704474704e-05, "loss": 0.41775975227355955, "step": 98560 }, { "epoch": 0.4231816113272026, "grad_norm": 0.243858203291893, "learning_rate": 5.793097798435708e-05, "loss": 0.2470792770385742, "step": 98570 }, { "epoch": 0.42322454341722265, "grad_norm": 0.4460698068141937, "learning_rate": 5.792666626423946e-05, "loss": 0.10146148204803467, "step": 98580 }, { "epoch": 0.42326747550724264, "grad_norm": 0.24464015662670135, "learning_rate": 5.7922354544121836e-05, "loss": 0.19643570184707643, "step": 98590 }, { "epoch": 0.42331040759726263, "grad_norm": 1.338011622428894, "learning_rate": 5.7918042824004214e-05, "loss": 0.1863824486732483, "step": 98600 }, { "epoch": 0.42335333968728267, "grad_norm": 0.011673185974359512, "learning_rate": 5.791373110388659e-05, "loss": 0.06954334378242492, "step": 98610 }, { "epoch": 0.42339627177730266, "grad_norm": 0.050423551350831985, "learning_rate": 5.790941938376896e-05, "loss": 0.28564701080322263, "step": 98620 }, { "epoch": 0.42343920386732264, "grad_norm": 6.057043552398682, "learning_rate": 5.790510766365134e-05, "loss": 0.3044910192489624, "step": 98630 }, { "epoch": 0.4234821359573427, "grad_norm": 1.2167507410049438, "learning_rate": 5.7900795943533716e-05, "loss": 0.3324015140533447, "step": 98640 }, { "epoch": 0.4235250680473627, "grad_norm": 0.0021167939994484186, "learning_rate": 5.7896484223416094e-05, "loss": 0.1250348687171936, "step": 98650 }, { "epoch": 0.42356800013738266, "grad_norm": 0.003853866131976247, "learning_rate": 5.789217250329847e-05, "loss": 0.21840078830718995, "step": 98660 }, { "epoch": 0.4236109322274027, "grad_norm": 8.526963233947754, "learning_rate": 5.788786078318085e-05, "loss": 0.4191432952880859, "step": 98670 }, { "epoch": 0.4236538643174227, "grad_norm": 0.0189904123544693, "learning_rate": 5.7883549063063226e-05, "loss": 0.30071659088134767, "step": 98680 }, { "epoch": 0.42369679640744273, "grad_norm": 0.03762015700340271, "learning_rate": 5.78792373429456e-05, "loss": 0.23586046695709229, "step": 98690 }, { "epoch": 0.4237397284974627, "grad_norm": 0.04850266873836517, "learning_rate": 5.787492562282797e-05, "loss": 0.3225547313690186, "step": 98700 }, { "epoch": 0.4237826605874827, "grad_norm": 0.03791253641247749, "learning_rate": 5.7870613902710344e-05, "loss": 0.11718940734863281, "step": 98710 }, { "epoch": 0.42382559267750275, "grad_norm": 0.030987627804279327, "learning_rate": 5.786630218259272e-05, "loss": 0.21683728694915771, "step": 98720 }, { "epoch": 0.42386852476752274, "grad_norm": 15.335086822509766, "learning_rate": 5.78619904624751e-05, "loss": 0.35996179580688475, "step": 98730 }, { "epoch": 0.4239114568575427, "grad_norm": 0.002879585837945342, "learning_rate": 5.785767874235748e-05, "loss": 0.44658312797546384, "step": 98740 }, { "epoch": 0.42395438894756277, "grad_norm": 0.14629849791526794, "learning_rate": 5.785336702223986e-05, "loss": 0.0736695647239685, "step": 98750 }, { "epoch": 0.42399732103758275, "grad_norm": 0.13867326080799103, "learning_rate": 5.784905530212224e-05, "loss": 0.12435241937637329, "step": 98760 }, { "epoch": 0.42404025312760274, "grad_norm": 0.03923346847295761, "learning_rate": 5.7844743582004615e-05, "loss": 0.21064164638519287, "step": 98770 }, { "epoch": 0.4240831852176228, "grad_norm": 0.10467129945755005, "learning_rate": 5.784043186188698e-05, "loss": 0.15324407815933228, "step": 98780 }, { "epoch": 0.42412611730764277, "grad_norm": 0.024315934628248215, "learning_rate": 5.7836120141769356e-05, "loss": 0.15878936052322387, "step": 98790 }, { "epoch": 0.42416904939766276, "grad_norm": 0.013896098360419273, "learning_rate": 5.783180842165173e-05, "loss": 0.22481341361999513, "step": 98800 }, { "epoch": 0.4242119814876828, "grad_norm": 0.004909253679215908, "learning_rate": 5.782749670153411e-05, "loss": 0.3054252862930298, "step": 98810 }, { "epoch": 0.4242549135777028, "grad_norm": 2.7937729358673096, "learning_rate": 5.782318498141649e-05, "loss": 0.3032632350921631, "step": 98820 }, { "epoch": 0.4242978456677228, "grad_norm": 2.777571439743042, "learning_rate": 5.7818873261298865e-05, "loss": 0.3512873649597168, "step": 98830 }, { "epoch": 0.4243407777577428, "grad_norm": 3.122788429260254, "learning_rate": 5.781456154118124e-05, "loss": 0.2130906105041504, "step": 98840 }, { "epoch": 0.4243837098477628, "grad_norm": 0.04536563903093338, "learning_rate": 5.781024982106362e-05, "loss": 0.10688605308532714, "step": 98850 }, { "epoch": 0.4244266419377828, "grad_norm": 1.177663803100586, "learning_rate": 5.780593810094599e-05, "loss": 0.28160221576690675, "step": 98860 }, { "epoch": 0.42446957402780283, "grad_norm": 0.0027043658774346113, "learning_rate": 5.780162638082837e-05, "loss": 0.226685094833374, "step": 98870 }, { "epoch": 0.4245125061178228, "grad_norm": 2.435696840286255, "learning_rate": 5.7797314660710745e-05, "loss": 0.3340808153152466, "step": 98880 }, { "epoch": 0.42455543820784286, "grad_norm": 0.0020951908081769943, "learning_rate": 5.779300294059312e-05, "loss": 0.1408405900001526, "step": 98890 }, { "epoch": 0.42459837029786285, "grad_norm": 2.14007306098938, "learning_rate": 5.77886912204755e-05, "loss": 0.2413254737854004, "step": 98900 }, { "epoch": 0.42464130238788284, "grad_norm": 0.0012880098074674606, "learning_rate": 5.778437950035788e-05, "loss": 0.16362298727035524, "step": 98910 }, { "epoch": 0.4246842344779029, "grad_norm": 0.004306933842599392, "learning_rate": 5.7780067780240255e-05, "loss": 0.2394641399383545, "step": 98920 }, { "epoch": 0.42472716656792286, "grad_norm": 1.6067447662353516, "learning_rate": 5.777575606012263e-05, "loss": 0.48149833679199217, "step": 98930 }, { "epoch": 0.42477009865794285, "grad_norm": 2.7072231769561768, "learning_rate": 5.777144434000501e-05, "loss": 0.17587217092514038, "step": 98940 }, { "epoch": 0.4248130307479629, "grad_norm": 4.478725433349609, "learning_rate": 5.776713261988737e-05, "loss": 0.30828609466552737, "step": 98950 }, { "epoch": 0.4248559628379829, "grad_norm": 0.7346305251121521, "learning_rate": 5.776282089976976e-05, "loss": 0.1656672477722168, "step": 98960 }, { "epoch": 0.42489889492800287, "grad_norm": 0.003329846076667309, "learning_rate": 5.7758509179652135e-05, "loss": 0.37731635570526123, "step": 98970 }, { "epoch": 0.4249418270180229, "grad_norm": 0.0670686811208725, "learning_rate": 5.775419745953451e-05, "loss": 0.2388993263244629, "step": 98980 }, { "epoch": 0.4249847591080429, "grad_norm": 0.21079060435295105, "learning_rate": 5.774988573941689e-05, "loss": 0.082888263463974, "step": 98990 }, { "epoch": 0.4250276911980629, "grad_norm": 3.8923022747039795, "learning_rate": 5.774557401929927e-05, "loss": 0.21767971515655518, "step": 99000 }, { "epoch": 0.4250276911980629, "eval_loss": 0.42039725184440613, "eval_runtime": 27.092, "eval_samples_per_second": 3.691, "eval_steps_per_second": 3.691, "step": 99000 }, { "epoch": 0.4250706232880829, "grad_norm": 0.049764856696128845, "learning_rate": 5.7741262299181644e-05, "loss": 0.1819413423538208, "step": 99010 }, { "epoch": 0.4251135553781029, "grad_norm": 0.061445679515600204, "learning_rate": 5.773695057906402e-05, "loss": 0.20978860855102538, "step": 99020 }, { "epoch": 0.4251564874681229, "grad_norm": 0.15067073702812195, "learning_rate": 5.7732638858946385e-05, "loss": 0.25956437587738035, "step": 99030 }, { "epoch": 0.42519941955814294, "grad_norm": 0.017767034471035004, "learning_rate": 5.772832713882876e-05, "loss": 0.12931965589523314, "step": 99040 }, { "epoch": 0.42524235164816293, "grad_norm": 58.04544448852539, "learning_rate": 5.772401541871114e-05, "loss": 0.1122699499130249, "step": 99050 }, { "epoch": 0.4252852837381829, "grad_norm": 0.08649862557649612, "learning_rate": 5.771970369859352e-05, "loss": 0.07707861661911011, "step": 99060 }, { "epoch": 0.42532821582820296, "grad_norm": 4.430203914642334, "learning_rate": 5.7715391978475894e-05, "loss": 0.1516265392303467, "step": 99070 }, { "epoch": 0.42537114791822295, "grad_norm": 0.8073638677597046, "learning_rate": 5.771108025835827e-05, "loss": 0.2441352128982544, "step": 99080 }, { "epoch": 0.42541408000824293, "grad_norm": 0.08090592175722122, "learning_rate": 5.770676853824065e-05, "loss": 0.10868796110153198, "step": 99090 }, { "epoch": 0.425457012098263, "grad_norm": 5.911368370056152, "learning_rate": 5.7702456818123027e-05, "loss": 0.3562882423400879, "step": 99100 }, { "epoch": 0.42549994418828296, "grad_norm": 0.2530055046081543, "learning_rate": 5.76981450980054e-05, "loss": 0.19297826290130615, "step": 99110 }, { "epoch": 0.425542876278303, "grad_norm": 1.9001591205596924, "learning_rate": 5.7693833377887774e-05, "loss": 0.3040750503540039, "step": 99120 }, { "epoch": 0.425585808368323, "grad_norm": 0.07195616513490677, "learning_rate": 5.768952165777015e-05, "loss": 0.25295019149780273, "step": 99130 }, { "epoch": 0.425628740458343, "grad_norm": 2.257978916168213, "learning_rate": 5.768520993765253e-05, "loss": 0.45368361473083496, "step": 99140 }, { "epoch": 0.425671672548363, "grad_norm": 0.25597015023231506, "learning_rate": 5.7680898217534906e-05, "loss": 0.1789316415786743, "step": 99150 }, { "epoch": 0.425714604638383, "grad_norm": 0.1592808961868286, "learning_rate": 5.7676586497417284e-05, "loss": 0.15700260400772095, "step": 99160 }, { "epoch": 0.425757536728403, "grad_norm": 0.3460257649421692, "learning_rate": 5.767227477729966e-05, "loss": 0.27511794567108155, "step": 99170 }, { "epoch": 0.42580046881842304, "grad_norm": 0.01419067569077015, "learning_rate": 5.766796305718204e-05, "loss": 0.20267207622528077, "step": 99180 }, { "epoch": 0.425843400908443, "grad_norm": 1.0129761695861816, "learning_rate": 5.766365133706441e-05, "loss": 0.2106555700302124, "step": 99190 }, { "epoch": 0.425886332998463, "grad_norm": 1.3225549459457397, "learning_rate": 5.7659339616946786e-05, "loss": 0.08879272937774658, "step": 99200 }, { "epoch": 0.42592926508848306, "grad_norm": 0.10503018647432327, "learning_rate": 5.7655027896829164e-05, "loss": 0.08622437715530396, "step": 99210 }, { "epoch": 0.42597219717850304, "grad_norm": 2.210472345352173, "learning_rate": 5.765071617671154e-05, "loss": 0.2910381555557251, "step": 99220 }, { "epoch": 0.42601512926852303, "grad_norm": 0.008246196433901787, "learning_rate": 5.764640445659392e-05, "loss": 0.3339352607727051, "step": 99230 }, { "epoch": 0.42605806135854307, "grad_norm": 1.0068203210830688, "learning_rate": 5.7642092736476296e-05, "loss": 0.2700967311859131, "step": 99240 }, { "epoch": 0.42610099344856306, "grad_norm": 0.06918929517269135, "learning_rate": 5.763778101635867e-05, "loss": 0.015539254248142242, "step": 99250 }, { "epoch": 0.42614392553858305, "grad_norm": 16.384273529052734, "learning_rate": 5.763346929624105e-05, "loss": 0.2665158033370972, "step": 99260 }, { "epoch": 0.4261868576286031, "grad_norm": 0.05230254307389259, "learning_rate": 5.7629157576123414e-05, "loss": 0.3403724908828735, "step": 99270 }, { "epoch": 0.4262297897186231, "grad_norm": 0.1716545820236206, "learning_rate": 5.762484585600579e-05, "loss": 0.025423717498779298, "step": 99280 }, { "epoch": 0.42627272180864306, "grad_norm": 0.3432859778404236, "learning_rate": 5.762053413588817e-05, "loss": 0.27904059886932375, "step": 99290 }, { "epoch": 0.4263156538986631, "grad_norm": 0.0010938121704384685, "learning_rate": 5.7616222415770546e-05, "loss": 0.33778557777404783, "step": 99300 }, { "epoch": 0.4263585859886831, "grad_norm": 2.464271306991577, "learning_rate": 5.7611910695652924e-05, "loss": 0.26821877956390383, "step": 99310 }, { "epoch": 0.42640151807870313, "grad_norm": 0.015062485821545124, "learning_rate": 5.76075989755353e-05, "loss": 0.20908112525939943, "step": 99320 }, { "epoch": 0.4264444501687231, "grad_norm": 19.407758712768555, "learning_rate": 5.760328725541768e-05, "loss": 0.18962712287902833, "step": 99330 }, { "epoch": 0.4264873822587431, "grad_norm": 1.7343069314956665, "learning_rate": 5.759897553530006e-05, "loss": 0.43570146560668943, "step": 99340 }, { "epoch": 0.42653031434876315, "grad_norm": 0.0023813594598323107, "learning_rate": 5.759466381518244e-05, "loss": 0.10327715873718261, "step": 99350 }, { "epoch": 0.42657324643878314, "grad_norm": 2.541740894317627, "learning_rate": 5.7590352095064804e-05, "loss": 0.22181310653686523, "step": 99360 }, { "epoch": 0.4266161785288031, "grad_norm": 1.6314506530761719, "learning_rate": 5.758604037494718e-05, "loss": 0.35480194091796874, "step": 99370 }, { "epoch": 0.42665911061882317, "grad_norm": 1.3725117444992065, "learning_rate": 5.758172865482956e-05, "loss": 0.23750679492950438, "step": 99380 }, { "epoch": 0.42670204270884315, "grad_norm": 0.8450286984443665, "learning_rate": 5.7577416934711936e-05, "loss": 0.25147688388824463, "step": 99390 }, { "epoch": 0.42674497479886314, "grad_norm": 1.480108618736267, "learning_rate": 5.757310521459431e-05, "loss": 0.20741603374481202, "step": 99400 }, { "epoch": 0.4267879068888832, "grad_norm": 2.16528582572937, "learning_rate": 5.756879349447669e-05, "loss": 0.21702051162719727, "step": 99410 }, { "epoch": 0.42683083897890317, "grad_norm": 1.2046661376953125, "learning_rate": 5.756448177435907e-05, "loss": 0.22789404392242432, "step": 99420 }, { "epoch": 0.42687377106892316, "grad_norm": 0.09745719283819199, "learning_rate": 5.7560170054241445e-05, "loss": 0.23347074985504152, "step": 99430 }, { "epoch": 0.4269167031589432, "grad_norm": 6.254867076873779, "learning_rate": 5.7555858334123816e-05, "loss": 0.21007938385009767, "step": 99440 }, { "epoch": 0.4269596352489632, "grad_norm": 1.8788691759109497, "learning_rate": 5.755154661400619e-05, "loss": 0.31277174949645997, "step": 99450 }, { "epoch": 0.4270025673389832, "grad_norm": 0.12879228591918945, "learning_rate": 5.754723489388857e-05, "loss": 0.29340462684631347, "step": 99460 }, { "epoch": 0.4270454994290032, "grad_norm": 0.2092210054397583, "learning_rate": 5.754292317377095e-05, "loss": 0.20556654930114746, "step": 99470 }, { "epoch": 0.4270884315190232, "grad_norm": 0.1479301005601883, "learning_rate": 5.7538611453653325e-05, "loss": 0.18042335510253907, "step": 99480 }, { "epoch": 0.4271313636090432, "grad_norm": 0.05552195385098457, "learning_rate": 5.75342997335357e-05, "loss": 0.09226502776145935, "step": 99490 }, { "epoch": 0.42717429569906323, "grad_norm": 0.001192165189422667, "learning_rate": 5.752998801341808e-05, "loss": 0.26993460655212403, "step": 99500 }, { "epoch": 0.4272172277890832, "grad_norm": 9.776094436645508, "learning_rate": 5.752567629330046e-05, "loss": 0.3855778694152832, "step": 99510 }, { "epoch": 0.4272601598791032, "grad_norm": 0.0043556843884289265, "learning_rate": 5.752136457318282e-05, "loss": 0.275787353515625, "step": 99520 }, { "epoch": 0.42730309196912325, "grad_norm": 0.9076972603797913, "learning_rate": 5.75170528530652e-05, "loss": 0.21448926925659179, "step": 99530 }, { "epoch": 0.42734602405914324, "grad_norm": 0.17568263411521912, "learning_rate": 5.7512741132947575e-05, "loss": 0.13522560596466066, "step": 99540 }, { "epoch": 0.4273889561491633, "grad_norm": 0.026702409610152245, "learning_rate": 5.750842941282995e-05, "loss": 0.08444958925247192, "step": 99550 }, { "epoch": 0.42743188823918327, "grad_norm": 0.08688662946224213, "learning_rate": 5.750411769271234e-05, "loss": 0.058204162120819095, "step": 99560 }, { "epoch": 0.42747482032920325, "grad_norm": 6.395595550537109, "learning_rate": 5.7499805972594714e-05, "loss": 0.28020992279052737, "step": 99570 }, { "epoch": 0.4275177524192233, "grad_norm": 0.0059433989226818085, "learning_rate": 5.749549425247709e-05, "loss": 0.12280683517456055, "step": 99580 }, { "epoch": 0.4275606845092433, "grad_norm": 0.017601149156689644, "learning_rate": 5.749118253235947e-05, "loss": 0.2879619836807251, "step": 99590 }, { "epoch": 0.42760361659926327, "grad_norm": 0.01707869954407215, "learning_rate": 5.748687081224183e-05, "loss": 0.22087881565093995, "step": 99600 }, { "epoch": 0.4276465486892833, "grad_norm": 3.0199363231658936, "learning_rate": 5.748255909212421e-05, "loss": 0.140790593624115, "step": 99610 }, { "epoch": 0.4276894807793033, "grad_norm": 2.2495341300964355, "learning_rate": 5.747824737200659e-05, "loss": 0.1820667266845703, "step": 99620 }, { "epoch": 0.4277324128693233, "grad_norm": 0.32152339816093445, "learning_rate": 5.7473935651888965e-05, "loss": 0.25116183757781985, "step": 99630 }, { "epoch": 0.42777534495934333, "grad_norm": 0.04170903190970421, "learning_rate": 5.746962393177134e-05, "loss": 0.09960184693336487, "step": 99640 }, { "epoch": 0.4278182770493633, "grad_norm": 2.308204412460327, "learning_rate": 5.746531221165372e-05, "loss": 0.10755871534347534, "step": 99650 }, { "epoch": 0.4278612091393833, "grad_norm": 0.012859346345067024, "learning_rate": 5.74610004915361e-05, "loss": 0.08167902231216431, "step": 99660 }, { "epoch": 0.42790414122940335, "grad_norm": 20.540184020996094, "learning_rate": 5.7456688771418474e-05, "loss": 0.32033615112304686, "step": 99670 }, { "epoch": 0.42794707331942333, "grad_norm": 0.052581265568733215, "learning_rate": 5.745237705130085e-05, "loss": 0.22763388156890868, "step": 99680 }, { "epoch": 0.4279900054094433, "grad_norm": 0.024297883734107018, "learning_rate": 5.744806533118322e-05, "loss": 0.13195170164108277, "step": 99690 }, { "epoch": 0.42803293749946336, "grad_norm": 0.0014119200641289353, "learning_rate": 5.74437536110656e-05, "loss": 0.22947165966033936, "step": 99700 }, { "epoch": 0.42807586958948335, "grad_norm": 1.5014564990997314, "learning_rate": 5.743944189094798e-05, "loss": 0.16373069286346437, "step": 99710 }, { "epoch": 0.42811880167950334, "grad_norm": 0.0006927020149305463, "learning_rate": 5.7435130170830354e-05, "loss": 0.08164035081863404, "step": 99720 }, { "epoch": 0.4281617337695234, "grad_norm": 0.10113275796175003, "learning_rate": 5.743081845071273e-05, "loss": 0.2783978939056396, "step": 99730 }, { "epoch": 0.42820466585954337, "grad_norm": 1.616166353225708, "learning_rate": 5.742650673059511e-05, "loss": 0.27019352912902833, "step": 99740 }, { "epoch": 0.4282475979495634, "grad_norm": 5.727255344390869, "learning_rate": 5.7422195010477486e-05, "loss": 0.2755590438842773, "step": 99750 }, { "epoch": 0.4282905300395834, "grad_norm": 0.010672148317098618, "learning_rate": 5.7417883290359863e-05, "loss": 0.28162527084350586, "step": 99760 }, { "epoch": 0.4283334621296034, "grad_norm": 10.59469223022461, "learning_rate": 5.741357157024223e-05, "loss": 0.23261218070983886, "step": 99770 }, { "epoch": 0.4283763942196234, "grad_norm": 0.07830287516117096, "learning_rate": 5.740925985012461e-05, "loss": 0.10009453296661378, "step": 99780 }, { "epoch": 0.4284193263096434, "grad_norm": 4.014584064483643, "learning_rate": 5.740494813000699e-05, "loss": 0.1561530590057373, "step": 99790 }, { "epoch": 0.4284622583996634, "grad_norm": 1.0443463325500488, "learning_rate": 5.7400636409889366e-05, "loss": 0.28882346153259275, "step": 99800 }, { "epoch": 0.42850519048968344, "grad_norm": 0.11487990617752075, "learning_rate": 5.739632468977174e-05, "loss": 0.3919788122177124, "step": 99810 }, { "epoch": 0.4285481225797034, "grad_norm": 0.5756773352622986, "learning_rate": 5.739201296965412e-05, "loss": 0.19305258989334106, "step": 99820 }, { "epoch": 0.4285910546697234, "grad_norm": 2.1908233165740967, "learning_rate": 5.73877012495365e-05, "loss": 0.1436487317085266, "step": 99830 }, { "epoch": 0.42863398675974346, "grad_norm": 0.23491205275058746, "learning_rate": 5.7383389529418875e-05, "loss": 0.356546688079834, "step": 99840 }, { "epoch": 0.42867691884976344, "grad_norm": 0.009992998093366623, "learning_rate": 5.737907780930124e-05, "loss": 0.08113847374916076, "step": 99850 }, { "epoch": 0.42871985093978343, "grad_norm": 0.22079306840896606, "learning_rate": 5.7374766089183616e-05, "loss": 0.12962546348571777, "step": 99860 }, { "epoch": 0.4287627830298035, "grad_norm": 0.006492846179753542, "learning_rate": 5.7370454369065994e-05, "loss": 0.4033665180206299, "step": 99870 }, { "epoch": 0.42880571511982346, "grad_norm": 0.015185476280748844, "learning_rate": 5.736614264894837e-05, "loss": 0.1210568904876709, "step": 99880 }, { "epoch": 0.42884864720984345, "grad_norm": 0.23486827313899994, "learning_rate": 5.736183092883075e-05, "loss": 0.2891173601150513, "step": 99890 }, { "epoch": 0.4288915792998635, "grad_norm": 1.5405279397964478, "learning_rate": 5.7357519208713126e-05, "loss": 0.017226718366146088, "step": 99900 }, { "epoch": 0.4289345113898835, "grad_norm": 0.23540741205215454, "learning_rate": 5.73532074885955e-05, "loss": 0.37728137969970704, "step": 99910 }, { "epoch": 0.42897744347990346, "grad_norm": 1.2578777074813843, "learning_rate": 5.734889576847788e-05, "loss": 0.3932936191558838, "step": 99920 }, { "epoch": 0.4290203755699235, "grad_norm": 0.02093084156513214, "learning_rate": 5.734458404836025e-05, "loss": 0.23207945823669435, "step": 99930 }, { "epoch": 0.4290633076599435, "grad_norm": 1.5797219276428223, "learning_rate": 5.734027232824263e-05, "loss": 0.19565119743347167, "step": 99940 }, { "epoch": 0.4291062397499635, "grad_norm": 0.023647375404834747, "learning_rate": 5.7335960608125006e-05, "loss": 0.2564146280288696, "step": 99950 }, { "epoch": 0.4291491718399835, "grad_norm": 0.0429295189678669, "learning_rate": 5.733164888800738e-05, "loss": 0.27921204566955565, "step": 99960 }, { "epoch": 0.4291921039300035, "grad_norm": 0.6207567453384399, "learning_rate": 5.732733716788976e-05, "loss": 0.15734999179840087, "step": 99970 }, { "epoch": 0.42923503602002355, "grad_norm": 0.5371975898742676, "learning_rate": 5.732302544777214e-05, "loss": 0.21604642868041993, "step": 99980 }, { "epoch": 0.42927796811004354, "grad_norm": 0.22142738103866577, "learning_rate": 5.7318713727654515e-05, "loss": 0.08812406063079833, "step": 99990 }, { "epoch": 0.4293209002000635, "grad_norm": 3.723952054977417, "learning_rate": 5.731440200753689e-05, "loss": 0.40993666648864746, "step": 100000 }, { "epoch": 0.4293209002000635, "eval_loss": 0.41385185718536377, "eval_runtime": 27.272, "eval_samples_per_second": 3.667, "eval_steps_per_second": 3.667, "step": 100000 }, { "epoch": 0.42936383229008357, "grad_norm": 1.978222131729126, "learning_rate": 5.731009028741927e-05, "loss": 0.41112370491027833, "step": 100010 }, { "epoch": 0.42940676438010356, "grad_norm": 0.35666602849960327, "learning_rate": 5.730577856730164e-05, "loss": 0.19882118701934814, "step": 100020 }, { "epoch": 0.42944969647012354, "grad_norm": 0.1557963639497757, "learning_rate": 5.730146684718402e-05, "loss": 0.15280017852783204, "step": 100030 }, { "epoch": 0.4294926285601436, "grad_norm": 1.474320650100708, "learning_rate": 5.7297155127066395e-05, "loss": 0.32379262447357177, "step": 100040 }, { "epoch": 0.4295355606501636, "grad_norm": 0.0014996053650975227, "learning_rate": 5.729284340694877e-05, "loss": 0.18067800998687744, "step": 100050 }, { "epoch": 0.42957849274018356, "grad_norm": 1.359062671661377, "learning_rate": 5.728853168683115e-05, "loss": 0.2790334939956665, "step": 100060 }, { "epoch": 0.4296214248302036, "grad_norm": 0.6800263524055481, "learning_rate": 5.728421996671353e-05, "loss": 0.28148727416992186, "step": 100070 }, { "epoch": 0.4296643569202236, "grad_norm": 1.2884678840637207, "learning_rate": 5.7279908246595904e-05, "loss": 0.435483455657959, "step": 100080 }, { "epoch": 0.4297072890102436, "grad_norm": 4.299257755279541, "learning_rate": 5.727559652647828e-05, "loss": 0.2659862995147705, "step": 100090 }, { "epoch": 0.4297502211002636, "grad_norm": 0.6796963214874268, "learning_rate": 5.7271284806360646e-05, "loss": 0.017660659551620484, "step": 100100 }, { "epoch": 0.4297931531902836, "grad_norm": 0.042083896696567535, "learning_rate": 5.726697308624302e-05, "loss": 0.24837009906768798, "step": 100110 }, { "epoch": 0.4298360852803036, "grad_norm": 0.06338540464639664, "learning_rate": 5.72626613661254e-05, "loss": 0.20939581394195556, "step": 100120 }, { "epoch": 0.42987901737032364, "grad_norm": 1.4580801725387573, "learning_rate": 5.725834964600778e-05, "loss": 0.295143723487854, "step": 100130 }, { "epoch": 0.4299219494603436, "grad_norm": 2.420668601989746, "learning_rate": 5.7254037925890155e-05, "loss": 0.35013642311096194, "step": 100140 }, { "epoch": 0.4299648815503636, "grad_norm": 0.0071824113838374615, "learning_rate": 5.724972620577254e-05, "loss": 0.18279858827590942, "step": 100150 }, { "epoch": 0.43000781364038365, "grad_norm": 1.749664306640625, "learning_rate": 5.7245414485654916e-05, "loss": 0.27730727195739746, "step": 100160 }, { "epoch": 0.43005074573040364, "grad_norm": 0.1663660705089569, "learning_rate": 5.7241102765537294e-05, "loss": 0.16716526746749877, "step": 100170 }, { "epoch": 0.4300936778204237, "grad_norm": 0.03588825836777687, "learning_rate": 5.723679104541966e-05, "loss": 0.1703078031539917, "step": 100180 }, { "epoch": 0.43013660991044367, "grad_norm": 0.03353295102715492, "learning_rate": 5.7232479325302035e-05, "loss": 0.10920073986053466, "step": 100190 }, { "epoch": 0.43017954200046365, "grad_norm": 1.1376320123672485, "learning_rate": 5.722816760518441e-05, "loss": 0.15631842613220215, "step": 100200 }, { "epoch": 0.4302224740904837, "grad_norm": 0.003353550098836422, "learning_rate": 5.722385588506679e-05, "loss": 0.3201023578643799, "step": 100210 }, { "epoch": 0.4302654061805037, "grad_norm": 0.004149756394326687, "learning_rate": 5.721954416494917e-05, "loss": 0.21888132095336915, "step": 100220 }, { "epoch": 0.43030833827052367, "grad_norm": 0.052119333297014236, "learning_rate": 5.7215232444831544e-05, "loss": 0.09456470608711243, "step": 100230 }, { "epoch": 0.4303512703605437, "grad_norm": 26.883987426757812, "learning_rate": 5.721092072471392e-05, "loss": 0.3765913963317871, "step": 100240 }, { "epoch": 0.4303942024505637, "grad_norm": 0.2676823139190674, "learning_rate": 5.72066090045963e-05, "loss": 0.2983670473098755, "step": 100250 }, { "epoch": 0.4304371345405837, "grad_norm": 0.09823568910360336, "learning_rate": 5.720229728447867e-05, "loss": 0.2189718246459961, "step": 100260 }, { "epoch": 0.43048006663060373, "grad_norm": 0.07216641306877136, "learning_rate": 5.719798556436105e-05, "loss": 0.22153894901275634, "step": 100270 }, { "epoch": 0.4305229987206237, "grad_norm": 0.49933910369873047, "learning_rate": 5.7193673844243424e-05, "loss": 0.26543402671813965, "step": 100280 }, { "epoch": 0.4305659308106437, "grad_norm": 2.6107425689697266, "learning_rate": 5.71893621241258e-05, "loss": 0.2502300500869751, "step": 100290 }, { "epoch": 0.43060886290066375, "grad_norm": 0.032215360552072525, "learning_rate": 5.718505040400818e-05, "loss": 0.2651889085769653, "step": 100300 }, { "epoch": 0.43065179499068373, "grad_norm": 0.2990207374095917, "learning_rate": 5.7180738683890556e-05, "loss": 0.1141858458518982, "step": 100310 }, { "epoch": 0.4306947270807037, "grad_norm": 1.3636616468429565, "learning_rate": 5.7176426963772934e-05, "loss": 0.12704546451568605, "step": 100320 }, { "epoch": 0.43073765917072376, "grad_norm": 0.05208112671971321, "learning_rate": 5.717211524365531e-05, "loss": 0.2751093626022339, "step": 100330 }, { "epoch": 0.43078059126074375, "grad_norm": 0.0030079390853643417, "learning_rate": 5.7167803523537675e-05, "loss": 0.24836654663085939, "step": 100340 }, { "epoch": 0.43082352335076374, "grad_norm": 0.13114942610263824, "learning_rate": 5.716349180342005e-05, "loss": 0.12937086820602417, "step": 100350 }, { "epoch": 0.4308664554407838, "grad_norm": 0.06914433091878891, "learning_rate": 5.715918008330243e-05, "loss": 0.33217825889587405, "step": 100360 }, { "epoch": 0.43090938753080377, "grad_norm": 0.8148689866065979, "learning_rate": 5.7154868363184814e-05, "loss": 0.13250348567962647, "step": 100370 }, { "epoch": 0.43095231962082375, "grad_norm": 8.22535514831543, "learning_rate": 5.715055664306719e-05, "loss": 0.48774261474609376, "step": 100380 }, { "epoch": 0.4309952517108438, "grad_norm": 0.14958524703979492, "learning_rate": 5.714624492294957e-05, "loss": 0.39161689281463624, "step": 100390 }, { "epoch": 0.4310381838008638, "grad_norm": 0.1226431354880333, "learning_rate": 5.7141933202831946e-05, "loss": 0.25230252742767334, "step": 100400 }, { "epoch": 0.4310811158908838, "grad_norm": 0.5269021987915039, "learning_rate": 5.713762148271432e-05, "loss": 0.19572103023529053, "step": 100410 }, { "epoch": 0.4311240479809038, "grad_norm": 0.0011158520355820656, "learning_rate": 5.71333097625967e-05, "loss": 0.10176982879638671, "step": 100420 }, { "epoch": 0.4311669800709238, "grad_norm": 0.028794605284929276, "learning_rate": 5.7128998042479064e-05, "loss": 0.30993683338165284, "step": 100430 }, { "epoch": 0.43120991216094384, "grad_norm": 0.06879492849111557, "learning_rate": 5.712468632236144e-05, "loss": 0.2143555164337158, "step": 100440 }, { "epoch": 0.43125284425096383, "grad_norm": 0.020163610577583313, "learning_rate": 5.712037460224382e-05, "loss": 0.26235687732696533, "step": 100450 }, { "epoch": 0.4312957763409838, "grad_norm": 0.6938256025314331, "learning_rate": 5.7116062882126196e-05, "loss": 0.24439234733581544, "step": 100460 }, { "epoch": 0.43133870843100386, "grad_norm": 0.029514238238334656, "learning_rate": 5.7111751162008573e-05, "loss": 0.14914549589157106, "step": 100470 }, { "epoch": 0.43138164052102385, "grad_norm": 0.04761035367846489, "learning_rate": 5.710743944189095e-05, "loss": 0.30039823055267334, "step": 100480 }, { "epoch": 0.43142457261104383, "grad_norm": 10.50465202331543, "learning_rate": 5.710312772177333e-05, "loss": 0.1130056619644165, "step": 100490 }, { "epoch": 0.4314675047010639, "grad_norm": 0.05910739675164223, "learning_rate": 5.7098816001655705e-05, "loss": 0.13838412761688232, "step": 100500 }, { "epoch": 0.43151043679108386, "grad_norm": 0.022242436185479164, "learning_rate": 5.7094504281538076e-05, "loss": 0.20934643745422363, "step": 100510 }, { "epoch": 0.43155336888110385, "grad_norm": 1.522462248802185, "learning_rate": 5.709019256142045e-05, "loss": 0.30031299591064453, "step": 100520 }, { "epoch": 0.4315963009711239, "grad_norm": 1.5211167335510254, "learning_rate": 5.708588084130283e-05, "loss": 0.22337019443511963, "step": 100530 }, { "epoch": 0.4316392330611439, "grad_norm": 2.0634267330169678, "learning_rate": 5.708156912118521e-05, "loss": 0.19905364513397217, "step": 100540 }, { "epoch": 0.43168216515116387, "grad_norm": 0.9744377136230469, "learning_rate": 5.7077257401067585e-05, "loss": 0.2310699224472046, "step": 100550 }, { "epoch": 0.4317250972411839, "grad_norm": 0.00894715916365385, "learning_rate": 5.707294568094996e-05, "loss": 0.3550968408584595, "step": 100560 }, { "epoch": 0.4317680293312039, "grad_norm": 0.10767655074596405, "learning_rate": 5.706863396083234e-05, "loss": 0.18549597263336182, "step": 100570 }, { "epoch": 0.4318109614212239, "grad_norm": 0.009473497979342937, "learning_rate": 5.706432224071472e-05, "loss": 0.18380953073501588, "step": 100580 }, { "epoch": 0.4318538935112439, "grad_norm": 0.0027962077874690294, "learning_rate": 5.706001052059709e-05, "loss": 0.2939144134521484, "step": 100590 }, { "epoch": 0.4318968256012639, "grad_norm": 1.0059584379196167, "learning_rate": 5.7055698800479465e-05, "loss": 0.22364864349365235, "step": 100600 }, { "epoch": 0.43193975769128395, "grad_norm": 0.13138823211193085, "learning_rate": 5.705138708036184e-05, "loss": 0.4219820022583008, "step": 100610 }, { "epoch": 0.43198268978130394, "grad_norm": 3.08107590675354, "learning_rate": 5.704707536024422e-05, "loss": 0.24338581562042236, "step": 100620 }, { "epoch": 0.43202562187132393, "grad_norm": 0.029116906225681305, "learning_rate": 5.70427636401266e-05, "loss": 0.12408316135406494, "step": 100630 }, { "epoch": 0.43206855396134397, "grad_norm": 0.01473816204816103, "learning_rate": 5.7038451920008975e-05, "loss": 0.23553252220153809, "step": 100640 }, { "epoch": 0.43211148605136396, "grad_norm": 0.46792203187942505, "learning_rate": 5.703414019989135e-05, "loss": 0.07469576001167297, "step": 100650 }, { "epoch": 0.43215441814138394, "grad_norm": 0.1712280511856079, "learning_rate": 5.702982847977373e-05, "loss": 0.21774008274078369, "step": 100660 }, { "epoch": 0.432197350231404, "grad_norm": 0.0017177191330119967, "learning_rate": 5.702551675965609e-05, "loss": 0.18529870510101318, "step": 100670 }, { "epoch": 0.432240282321424, "grad_norm": 0.0058692400343716145, "learning_rate": 5.702120503953847e-05, "loss": 0.10345749855041504, "step": 100680 }, { "epoch": 0.43228321441144396, "grad_norm": 1.4120231866836548, "learning_rate": 5.701689331942085e-05, "loss": 0.13298569917678832, "step": 100690 }, { "epoch": 0.432326146501464, "grad_norm": 0.7551203966140747, "learning_rate": 5.7012581599303225e-05, "loss": 0.09198079109191895, "step": 100700 }, { "epoch": 0.432369078591484, "grad_norm": 0.03402279317378998, "learning_rate": 5.70082698791856e-05, "loss": 0.49449887275695803, "step": 100710 }, { "epoch": 0.432412010681504, "grad_norm": 0.002699479693546891, "learning_rate": 5.700395815906798e-05, "loss": 0.28117620944976807, "step": 100720 }, { "epoch": 0.432454942771524, "grad_norm": 0.04458874464035034, "learning_rate": 5.699964643895036e-05, "loss": 0.20188190937042236, "step": 100730 }, { "epoch": 0.432497874861544, "grad_norm": 0.013446901924908161, "learning_rate": 5.699533471883274e-05, "loss": 0.07479428052902222, "step": 100740 }, { "epoch": 0.432540806951564, "grad_norm": 3.244983434677124, "learning_rate": 5.699102299871512e-05, "loss": 0.1925884008407593, "step": 100750 }, { "epoch": 0.43258373904158404, "grad_norm": 0.005489411298185587, "learning_rate": 5.698671127859748e-05, "loss": 0.20378761291503905, "step": 100760 }, { "epoch": 0.432626671131604, "grad_norm": 0.06427086144685745, "learning_rate": 5.698239955847986e-05, "loss": 0.10766814947128296, "step": 100770 }, { "epoch": 0.432669603221624, "grad_norm": 0.009699106216430664, "learning_rate": 5.697808783836224e-05, "loss": 0.06433448791503907, "step": 100780 }, { "epoch": 0.43271253531164405, "grad_norm": 0.016040675342082977, "learning_rate": 5.6973776118244615e-05, "loss": 0.09252756834030151, "step": 100790 }, { "epoch": 0.43275546740166404, "grad_norm": 6.488517761230469, "learning_rate": 5.696946439812699e-05, "loss": 0.3273911952972412, "step": 100800 }, { "epoch": 0.432798399491684, "grad_norm": 32.52063751220703, "learning_rate": 5.696515267800937e-05, "loss": 0.15922000408172607, "step": 100810 }, { "epoch": 0.43284133158170407, "grad_norm": 1.7135497331619263, "learning_rate": 5.6960840957891747e-05, "loss": 0.2881903171539307, "step": 100820 }, { "epoch": 0.43288426367172406, "grad_norm": 8.541507720947266, "learning_rate": 5.6956529237774124e-05, "loss": 0.2778735399246216, "step": 100830 }, { "epoch": 0.4329271957617441, "grad_norm": 0.0020988276228308678, "learning_rate": 5.6952217517656494e-05, "loss": 0.15711830854415892, "step": 100840 }, { "epoch": 0.4329701278517641, "grad_norm": 0.117733895778656, "learning_rate": 5.694790579753887e-05, "loss": 0.23034195899963378, "step": 100850 }, { "epoch": 0.4330130599417841, "grad_norm": 1.125941276550293, "learning_rate": 5.694359407742125e-05, "loss": 0.31992788314819337, "step": 100860 }, { "epoch": 0.4330559920318041, "grad_norm": 0.4024018943309784, "learning_rate": 5.6939282357303626e-05, "loss": 0.11440498828887939, "step": 100870 }, { "epoch": 0.4330989241218241, "grad_norm": 0.6626192927360535, "learning_rate": 5.6934970637186004e-05, "loss": 0.009410639107227326, "step": 100880 }, { "epoch": 0.4331418562118441, "grad_norm": 0.8921322822570801, "learning_rate": 5.693065891706838e-05, "loss": 0.18852705955505372, "step": 100890 }, { "epoch": 0.43318478830186413, "grad_norm": 2.921628952026367, "learning_rate": 5.692634719695076e-05, "loss": 0.1942846179008484, "step": 100900 }, { "epoch": 0.4332277203918841, "grad_norm": 0.0008382099331356585, "learning_rate": 5.6922035476833136e-05, "loss": 0.26661832332611085, "step": 100910 }, { "epoch": 0.4332706524819041, "grad_norm": 1.34660005569458, "learning_rate": 5.69177237567155e-05, "loss": 0.3429716110229492, "step": 100920 }, { "epoch": 0.43331358457192415, "grad_norm": 0.41357937455177307, "learning_rate": 5.691341203659788e-05, "loss": 0.16713021993637084, "step": 100930 }, { "epoch": 0.43335651666194414, "grad_norm": 0.03519873693585396, "learning_rate": 5.6909100316480254e-05, "loss": 0.10253534317016602, "step": 100940 }, { "epoch": 0.4333994487519641, "grad_norm": 0.01499355398118496, "learning_rate": 5.690478859636263e-05, "loss": 0.21979115009307862, "step": 100950 }, { "epoch": 0.43344238084198417, "grad_norm": 0.0017051781760528684, "learning_rate": 5.6900476876245016e-05, "loss": 0.1390715718269348, "step": 100960 }, { "epoch": 0.43348531293200415, "grad_norm": 0.07887522876262665, "learning_rate": 5.689616515612739e-05, "loss": 0.36191625595092775, "step": 100970 }, { "epoch": 0.43352824502202414, "grad_norm": 2.325713634490967, "learning_rate": 5.689185343600977e-05, "loss": 0.06256782412528991, "step": 100980 }, { "epoch": 0.4335711771120442, "grad_norm": 0.08764491975307465, "learning_rate": 5.688754171589215e-05, "loss": 0.2967262029647827, "step": 100990 }, { "epoch": 0.43361410920206417, "grad_norm": 1.0276341438293457, "learning_rate": 5.688322999577451e-05, "loss": 0.5041379928588867, "step": 101000 }, { "epoch": 0.43361410920206417, "eval_loss": 0.4284164309501648, "eval_runtime": 27.1219, "eval_samples_per_second": 3.687, "eval_steps_per_second": 3.687, "step": 101000 }, { "epoch": 0.43365704129208416, "grad_norm": 0.161605104804039, "learning_rate": 5.687891827565689e-05, "loss": 0.20394842624664306, "step": 101010 }, { "epoch": 0.4336999733821042, "grad_norm": 23.645925521850586, "learning_rate": 5.6874606555539266e-05, "loss": 0.28007776737213136, "step": 101020 }, { "epoch": 0.4337429054721242, "grad_norm": 0.01618562825024128, "learning_rate": 5.6870294835421644e-05, "loss": 0.3052077293395996, "step": 101030 }, { "epoch": 0.4337858375621442, "grad_norm": 0.5638077259063721, "learning_rate": 5.686598311530402e-05, "loss": 0.3267589807510376, "step": 101040 }, { "epoch": 0.4338287696521642, "grad_norm": 3.6884796619415283, "learning_rate": 5.68616713951864e-05, "loss": 0.3442475080490112, "step": 101050 }, { "epoch": 0.4338717017421842, "grad_norm": 0.0034317022655159235, "learning_rate": 5.6857359675068776e-05, "loss": 0.2257145643234253, "step": 101060 }, { "epoch": 0.43391463383220424, "grad_norm": 0.02168785035610199, "learning_rate": 5.685304795495115e-05, "loss": 0.10080244541168212, "step": 101070 }, { "epoch": 0.43395756592222423, "grad_norm": 0.024649212136864662, "learning_rate": 5.6848736234833524e-05, "loss": 0.15533299446105958, "step": 101080 }, { "epoch": 0.4340004980122442, "grad_norm": 0.5593084096908569, "learning_rate": 5.68444245147159e-05, "loss": 0.20375723838806153, "step": 101090 }, { "epoch": 0.43404343010226426, "grad_norm": 0.07193995267152786, "learning_rate": 5.684011279459828e-05, "loss": 0.17142632007598876, "step": 101100 }, { "epoch": 0.43408636219228425, "grad_norm": 0.004242885857820511, "learning_rate": 5.6835801074480656e-05, "loss": 0.3353287935256958, "step": 101110 }, { "epoch": 0.43412929428230423, "grad_norm": 0.9329225420951843, "learning_rate": 5.683148935436303e-05, "loss": 0.20502972602844238, "step": 101120 }, { "epoch": 0.4341722263723243, "grad_norm": 2.0356602668762207, "learning_rate": 5.682717763424541e-05, "loss": 0.16802514791488649, "step": 101130 }, { "epoch": 0.43421515846234426, "grad_norm": 4.4602837562561035, "learning_rate": 5.682286591412779e-05, "loss": 0.35306363105773925, "step": 101140 }, { "epoch": 0.43425809055236425, "grad_norm": 0.0073759243823587894, "learning_rate": 5.6818554194010165e-05, "loss": 0.11986923217773438, "step": 101150 }, { "epoch": 0.4343010226423843, "grad_norm": 0.014532789587974548, "learning_rate": 5.681424247389254e-05, "loss": 0.21006014347076415, "step": 101160 }, { "epoch": 0.4343439547324043, "grad_norm": 1.4600069522857666, "learning_rate": 5.6809930753774906e-05, "loss": 0.3080946445465088, "step": 101170 }, { "epoch": 0.43438688682242427, "grad_norm": 3.517202854156494, "learning_rate": 5.680561903365729e-05, "loss": 0.23347063064575196, "step": 101180 }, { "epoch": 0.4344298189124443, "grad_norm": 0.09192164242267609, "learning_rate": 5.680130731353967e-05, "loss": 0.08297334313392639, "step": 101190 }, { "epoch": 0.4344727510024643, "grad_norm": 14.890531539916992, "learning_rate": 5.6796995593422045e-05, "loss": 0.3029682397842407, "step": 101200 }, { "epoch": 0.4345156830924843, "grad_norm": 0.08431587368249893, "learning_rate": 5.679268387330442e-05, "loss": 0.05657966732978821, "step": 101210 }, { "epoch": 0.4345586151825043, "grad_norm": 0.004789439029991627, "learning_rate": 5.67883721531868e-05, "loss": 0.06281794905662537, "step": 101220 }, { "epoch": 0.4346015472725243, "grad_norm": 0.007313930429518223, "learning_rate": 5.678406043306918e-05, "loss": 0.23483545780181886, "step": 101230 }, { "epoch": 0.4346444793625443, "grad_norm": 0.006183262914419174, "learning_rate": 5.6779748712951554e-05, "loss": 0.320169997215271, "step": 101240 }, { "epoch": 0.43468741145256434, "grad_norm": 2.053785800933838, "learning_rate": 5.677543699283392e-05, "loss": 0.4159492015838623, "step": 101250 }, { "epoch": 0.43473034354258433, "grad_norm": 0.33240818977355957, "learning_rate": 5.6771125272716295e-05, "loss": 0.10303497314453125, "step": 101260 }, { "epoch": 0.4347732756326044, "grad_norm": 3.343055486679077, "learning_rate": 5.676681355259867e-05, "loss": 0.28133907318115237, "step": 101270 }, { "epoch": 0.43481620772262436, "grad_norm": 0.10711721330881119, "learning_rate": 5.676250183248105e-05, "loss": 0.2311713457107544, "step": 101280 }, { "epoch": 0.43485913981264435, "grad_norm": 0.08864771574735641, "learning_rate": 5.675819011236343e-05, "loss": 0.3245322465896606, "step": 101290 }, { "epoch": 0.4349020719026644, "grad_norm": 0.005172067321836948, "learning_rate": 5.6753878392245805e-05, "loss": 0.1757732152938843, "step": 101300 }, { "epoch": 0.4349450039926844, "grad_norm": 0.08573029935359955, "learning_rate": 5.674956667212818e-05, "loss": 0.07301101684570313, "step": 101310 }, { "epoch": 0.43498793608270436, "grad_norm": 0.5399850606918335, "learning_rate": 5.674525495201056e-05, "loss": 0.1713407278060913, "step": 101320 }, { "epoch": 0.4350308681727244, "grad_norm": 0.3291313052177429, "learning_rate": 5.674094323189293e-05, "loss": 0.09000024199485779, "step": 101330 }, { "epoch": 0.4350738002627444, "grad_norm": 0.020964227616786957, "learning_rate": 5.673663151177531e-05, "loss": 0.07782799601554871, "step": 101340 }, { "epoch": 0.4351167323527644, "grad_norm": 0.020042628049850464, "learning_rate": 5.6732319791657685e-05, "loss": 0.48969502449035646, "step": 101350 }, { "epoch": 0.4351596644427844, "grad_norm": 0.0018864471931010485, "learning_rate": 5.672800807154006e-05, "loss": 0.11262784004211426, "step": 101360 }, { "epoch": 0.4352025965328044, "grad_norm": 0.0246067363768816, "learning_rate": 5.672369635142244e-05, "loss": 0.2091688871383667, "step": 101370 }, { "epoch": 0.4352455286228244, "grad_norm": 4.4579548835754395, "learning_rate": 5.671938463130482e-05, "loss": 0.06489126682281494, "step": 101380 }, { "epoch": 0.43528846071284444, "grad_norm": 1.8331623077392578, "learning_rate": 5.6715072911187194e-05, "loss": 0.22519593238830565, "step": 101390 }, { "epoch": 0.4353313928028644, "grad_norm": 0.13178208470344543, "learning_rate": 5.671076119106957e-05, "loss": 0.08844862580299377, "step": 101400 }, { "epoch": 0.4353743248928844, "grad_norm": 5.8470306396484375, "learning_rate": 5.670644947095194e-05, "loss": 0.283965539932251, "step": 101410 }, { "epoch": 0.43541725698290445, "grad_norm": 1.977359414100647, "learning_rate": 5.670213775083432e-05, "loss": 0.31743721961975097, "step": 101420 }, { "epoch": 0.43546018907292444, "grad_norm": 0.006298946216702461, "learning_rate": 5.66978260307167e-05, "loss": 0.2732826232910156, "step": 101430 }, { "epoch": 0.43550312116294443, "grad_norm": 0.015092175453901291, "learning_rate": 5.6693514310599074e-05, "loss": 0.1754346489906311, "step": 101440 }, { "epoch": 0.43554605325296447, "grad_norm": 0.006818375550210476, "learning_rate": 5.668920259048145e-05, "loss": 0.06602421402931213, "step": 101450 }, { "epoch": 0.43558898534298446, "grad_norm": 0.10856548696756363, "learning_rate": 5.668489087036383e-05, "loss": 0.41567420959472656, "step": 101460 }, { "epoch": 0.4356319174330045, "grad_norm": 0.0782633051276207, "learning_rate": 5.6680579150246206e-05, "loss": 0.1458239197731018, "step": 101470 }, { "epoch": 0.4356748495230245, "grad_norm": 0.009810393676161766, "learning_rate": 5.6676267430128583e-05, "loss": 0.30247931480407714, "step": 101480 }, { "epoch": 0.4357177816130445, "grad_norm": 1.1154206991195679, "learning_rate": 5.667195571001096e-05, "loss": 0.1663208246231079, "step": 101490 }, { "epoch": 0.4357607137030645, "grad_norm": 1.5516237020492554, "learning_rate": 5.6667643989893325e-05, "loss": 0.41340136528015137, "step": 101500 }, { "epoch": 0.4358036457930845, "grad_norm": 0.004893908742815256, "learning_rate": 5.66633322697757e-05, "loss": 0.30065782070159913, "step": 101510 }, { "epoch": 0.4358465778831045, "grad_norm": 0.7853567004203796, "learning_rate": 5.665902054965808e-05, "loss": 0.24362916946411134, "step": 101520 }, { "epoch": 0.43588950997312453, "grad_norm": 1.3496156930923462, "learning_rate": 5.6654708829540457e-05, "loss": 0.17322909832000732, "step": 101530 }, { "epoch": 0.4359324420631445, "grad_norm": 0.8741046786308289, "learning_rate": 5.6650397109422834e-05, "loss": 0.08911564946174622, "step": 101540 }, { "epoch": 0.4359753741531645, "grad_norm": 0.005866996478289366, "learning_rate": 5.664608538930521e-05, "loss": 0.11345422267913818, "step": 101550 }, { "epoch": 0.43601830624318455, "grad_norm": 0.0030817079823464155, "learning_rate": 5.6641773669187595e-05, "loss": 0.08362066745758057, "step": 101560 }, { "epoch": 0.43606123833320454, "grad_norm": 0.022512266412377357, "learning_rate": 5.663746194906997e-05, "loss": 0.2745735883712769, "step": 101570 }, { "epoch": 0.4361041704232245, "grad_norm": 1.2275443077087402, "learning_rate": 5.6633150228952337e-05, "loss": 0.4065723419189453, "step": 101580 }, { "epoch": 0.43614710251324457, "grad_norm": 0.006209479179233313, "learning_rate": 5.6628838508834714e-05, "loss": 0.4551173210144043, "step": 101590 }, { "epoch": 0.43619003460326455, "grad_norm": 1.0085053443908691, "learning_rate": 5.662452678871709e-05, "loss": 0.17605886459350586, "step": 101600 }, { "epoch": 0.43623296669328454, "grad_norm": 3.473280906677246, "learning_rate": 5.662021506859947e-05, "loss": 0.21241440773010253, "step": 101610 }, { "epoch": 0.4362758987833046, "grad_norm": 1.7464669942855835, "learning_rate": 5.6615903348481846e-05, "loss": 0.1767161011695862, "step": 101620 }, { "epoch": 0.43631883087332457, "grad_norm": 0.19424454867839813, "learning_rate": 5.661159162836422e-05, "loss": 0.2732317686080933, "step": 101630 }, { "epoch": 0.43636176296334456, "grad_norm": 0.05577385425567627, "learning_rate": 5.66072799082466e-05, "loss": 0.2823940753936768, "step": 101640 }, { "epoch": 0.4364046950533646, "grad_norm": 3.1323673725128174, "learning_rate": 5.660296818812898e-05, "loss": 0.18861196041107178, "step": 101650 }, { "epoch": 0.4364476271433846, "grad_norm": 0.3599117398262024, "learning_rate": 5.659865646801135e-05, "loss": 0.1460519552230835, "step": 101660 }, { "epoch": 0.4364905592334046, "grad_norm": 1.0804738998413086, "learning_rate": 5.6594344747893726e-05, "loss": 0.33149147033691406, "step": 101670 }, { "epoch": 0.4365334913234246, "grad_norm": 0.6159604787826538, "learning_rate": 5.65900330277761e-05, "loss": 0.15043935775756836, "step": 101680 }, { "epoch": 0.4365764234134446, "grad_norm": 0.06314518302679062, "learning_rate": 5.658572130765848e-05, "loss": 0.1825084686279297, "step": 101690 }, { "epoch": 0.43661935550346465, "grad_norm": 0.017460836097598076, "learning_rate": 5.658140958754086e-05, "loss": 0.23410093784332275, "step": 101700 }, { "epoch": 0.43666228759348463, "grad_norm": 0.8213013410568237, "learning_rate": 5.6577097867423235e-05, "loss": 0.10457472801208496, "step": 101710 }, { "epoch": 0.4367052196835046, "grad_norm": 3.3704280853271484, "learning_rate": 5.657278614730561e-05, "loss": 0.2720768690109253, "step": 101720 }, { "epoch": 0.43674815177352466, "grad_norm": 1.2922886610031128, "learning_rate": 5.656847442718799e-05, "loss": 0.27418065071105957, "step": 101730 }, { "epoch": 0.43679108386354465, "grad_norm": 0.06490170955657959, "learning_rate": 5.6564162707070354e-05, "loss": 0.17207794189453124, "step": 101740 }, { "epoch": 0.43683401595356464, "grad_norm": 0.8208549618721008, "learning_rate": 5.655985098695273e-05, "loss": 0.2595613718032837, "step": 101750 }, { "epoch": 0.4368769480435847, "grad_norm": 0.004950092639774084, "learning_rate": 5.655553926683511e-05, "loss": 0.1528960108757019, "step": 101760 }, { "epoch": 0.43691988013360467, "grad_norm": 0.660977840423584, "learning_rate": 5.6551227546717486e-05, "loss": 0.44907441139221194, "step": 101770 }, { "epoch": 0.43696281222362465, "grad_norm": 6.029102802276611, "learning_rate": 5.654691582659987e-05, "loss": 0.12078993320465088, "step": 101780 }, { "epoch": 0.4370057443136447, "grad_norm": 0.2080804705619812, "learning_rate": 5.654260410648225e-05, "loss": 0.15819777250289918, "step": 101790 }, { "epoch": 0.4370486764036647, "grad_norm": 0.1170460432767868, "learning_rate": 5.6538292386364625e-05, "loss": 0.21379849910736085, "step": 101800 }, { "epoch": 0.43709160849368467, "grad_norm": 0.16317272186279297, "learning_rate": 5.6533980666247e-05, "loss": 0.1071736216545105, "step": 101810 }, { "epoch": 0.4371345405837047, "grad_norm": 0.06004194915294647, "learning_rate": 5.6529668946129366e-05, "loss": 0.32128081321716306, "step": 101820 }, { "epoch": 0.4371774726737247, "grad_norm": 0.3741120398044586, "learning_rate": 5.652535722601174e-05, "loss": 0.25509469509124755, "step": 101830 }, { "epoch": 0.4372204047637447, "grad_norm": 0.04972869157791138, "learning_rate": 5.652104550589412e-05, "loss": 0.24276161193847656, "step": 101840 }, { "epoch": 0.43726333685376473, "grad_norm": 0.10509629547595978, "learning_rate": 5.65167337857765e-05, "loss": 0.27646722793579104, "step": 101850 }, { "epoch": 0.4373062689437847, "grad_norm": 0.03949575871229172, "learning_rate": 5.6512422065658875e-05, "loss": 0.2635036945343018, "step": 101860 }, { "epoch": 0.4373492010338047, "grad_norm": 0.13845627009868622, "learning_rate": 5.650811034554125e-05, "loss": 0.25788857936859133, "step": 101870 }, { "epoch": 0.43739213312382474, "grad_norm": 0.3628508448600769, "learning_rate": 5.650379862542363e-05, "loss": 0.2939404726028442, "step": 101880 }, { "epoch": 0.43743506521384473, "grad_norm": 3.7908737659454346, "learning_rate": 5.649948690530601e-05, "loss": 0.3650730848312378, "step": 101890 }, { "epoch": 0.4374779973038648, "grad_norm": 0.15260189771652222, "learning_rate": 5.6495175185188384e-05, "loss": 0.2733223676681519, "step": 101900 }, { "epoch": 0.43752092939388476, "grad_norm": 0.03667456656694412, "learning_rate": 5.6490863465070755e-05, "loss": 0.23879833221435548, "step": 101910 }, { "epoch": 0.43756386148390475, "grad_norm": 1.1296567916870117, "learning_rate": 5.648655174495313e-05, "loss": 0.3958956241607666, "step": 101920 }, { "epoch": 0.4376067935739248, "grad_norm": 0.9460829496383667, "learning_rate": 5.648224002483551e-05, "loss": 0.17357568740844725, "step": 101930 }, { "epoch": 0.4376497256639448, "grad_norm": 0.013191147707402706, "learning_rate": 5.647792830471789e-05, "loss": 0.2423572540283203, "step": 101940 }, { "epoch": 0.43769265775396476, "grad_norm": 0.08220737427473068, "learning_rate": 5.6473616584600264e-05, "loss": 0.10584760904312134, "step": 101950 }, { "epoch": 0.4377355898439848, "grad_norm": 0.17858093976974487, "learning_rate": 5.646930486448264e-05, "loss": 0.31906838417053224, "step": 101960 }, { "epoch": 0.4377785219340048, "grad_norm": 1.531294822692871, "learning_rate": 5.646499314436502e-05, "loss": 0.2012697458267212, "step": 101970 }, { "epoch": 0.4378214540240248, "grad_norm": 0.1400694102048874, "learning_rate": 5.6460681424247396e-05, "loss": 0.09353853464126587, "step": 101980 }, { "epoch": 0.4378643861140448, "grad_norm": 0.09306161850690842, "learning_rate": 5.645636970412976e-05, "loss": 0.4895349025726318, "step": 101990 }, { "epoch": 0.4379073182040648, "grad_norm": 1.4866420030593872, "learning_rate": 5.6452057984012144e-05, "loss": 0.2991748094558716, "step": 102000 }, { "epoch": 0.4379073182040648, "eval_loss": 0.4135998785495758, "eval_runtime": 27.1015, "eval_samples_per_second": 3.69, "eval_steps_per_second": 3.69, "step": 102000 }, { "epoch": 0.4379502502940848, "grad_norm": 0.11813008040189743, "learning_rate": 5.644774626389452e-05, "loss": 0.25430753231048586, "step": 102010 }, { "epoch": 0.43799318238410484, "grad_norm": 0.5968402028083801, "learning_rate": 5.64434345437769e-05, "loss": 0.4001627445220947, "step": 102020 }, { "epoch": 0.4380361144741248, "grad_norm": 2.1716525554656982, "learning_rate": 5.6439122823659276e-05, "loss": 0.30237655639648436, "step": 102030 }, { "epoch": 0.4380790465641448, "grad_norm": 0.002318573882803321, "learning_rate": 5.6434811103541654e-05, "loss": 0.2868093967437744, "step": 102040 }, { "epoch": 0.43812197865416486, "grad_norm": 1.7742433547973633, "learning_rate": 5.643049938342403e-05, "loss": 0.056132668256759645, "step": 102050 }, { "epoch": 0.43816491074418484, "grad_norm": 4.7987470626831055, "learning_rate": 5.642618766330641e-05, "loss": 0.29152579307556153, "step": 102060 }, { "epoch": 0.43820784283420483, "grad_norm": 0.07227819412946701, "learning_rate": 5.642187594318877e-05, "loss": 0.2815880537033081, "step": 102070 }, { "epoch": 0.4382507749242249, "grad_norm": 0.06919845938682556, "learning_rate": 5.641756422307115e-05, "loss": 0.13695106506347657, "step": 102080 }, { "epoch": 0.43829370701424486, "grad_norm": 4.543615341186523, "learning_rate": 5.641325250295353e-05, "loss": 0.18619425296783448, "step": 102090 }, { "epoch": 0.43833663910426485, "grad_norm": 0.25457215309143066, "learning_rate": 5.6408940782835904e-05, "loss": 0.14458426237106323, "step": 102100 }, { "epoch": 0.4383795711942849, "grad_norm": 0.011515560559928417, "learning_rate": 5.640462906271828e-05, "loss": 0.1838878870010376, "step": 102110 }, { "epoch": 0.4384225032843049, "grad_norm": 0.0019815945997834206, "learning_rate": 5.640031734260066e-05, "loss": 0.20781657695770264, "step": 102120 }, { "epoch": 0.4384654353743249, "grad_norm": 1.029922366142273, "learning_rate": 5.6396005622483036e-05, "loss": 0.20602426528930665, "step": 102130 }, { "epoch": 0.4385083674643449, "grad_norm": 0.0010533623863011599, "learning_rate": 5.6391693902365414e-05, "loss": 0.03147282600402832, "step": 102140 }, { "epoch": 0.4385512995543649, "grad_norm": 0.26850226521492004, "learning_rate": 5.6387382182247784e-05, "loss": 0.28639023303985595, "step": 102150 }, { "epoch": 0.43859423164438494, "grad_norm": 3.8238465785980225, "learning_rate": 5.638307046213016e-05, "loss": 0.1597683310508728, "step": 102160 }, { "epoch": 0.4386371637344049, "grad_norm": 1.4224358797073364, "learning_rate": 5.637875874201254e-05, "loss": 0.36101374626159666, "step": 102170 }, { "epoch": 0.4386800958244249, "grad_norm": 5.2266364097595215, "learning_rate": 5.6374447021894916e-05, "loss": 0.24237642288208008, "step": 102180 }, { "epoch": 0.43872302791444495, "grad_norm": 0.024161502718925476, "learning_rate": 5.6370135301777293e-05, "loss": 0.37608938217163085, "step": 102190 }, { "epoch": 0.43876596000446494, "grad_norm": 0.06186579167842865, "learning_rate": 5.636582358165967e-05, "loss": 0.332311224937439, "step": 102200 }, { "epoch": 0.4388088920944849, "grad_norm": 0.03216930106282234, "learning_rate": 5.636151186154205e-05, "loss": 0.30623137950897217, "step": 102210 }, { "epoch": 0.43885182418450497, "grad_norm": 0.9261085391044617, "learning_rate": 5.6357200141424425e-05, "loss": 0.12676268815994263, "step": 102220 }, { "epoch": 0.43889475627452496, "grad_norm": 0.28767600655555725, "learning_rate": 5.63528884213068e-05, "loss": 0.06856579780578613, "step": 102230 }, { "epoch": 0.43893768836454494, "grad_norm": 1.7868995666503906, "learning_rate": 5.6348576701189173e-05, "loss": 0.03691851794719696, "step": 102240 }, { "epoch": 0.438980620454565, "grad_norm": 0.3403833210468292, "learning_rate": 5.634426498107155e-05, "loss": 0.11112273931503296, "step": 102250 }, { "epoch": 0.43902355254458497, "grad_norm": 0.07535845786333084, "learning_rate": 5.633995326095393e-05, "loss": 0.1796416759490967, "step": 102260 }, { "epoch": 0.43906648463460496, "grad_norm": 0.021636424586176872, "learning_rate": 5.6335641540836305e-05, "loss": 0.07033033967018128, "step": 102270 }, { "epoch": 0.439109416724625, "grad_norm": 2.452381134033203, "learning_rate": 5.633132982071868e-05, "loss": 0.2959134578704834, "step": 102280 }, { "epoch": 0.439152348814645, "grad_norm": 2.967747926712036, "learning_rate": 5.632701810060106e-05, "loss": 0.20051445960998535, "step": 102290 }, { "epoch": 0.439195280904665, "grad_norm": 1.3664977550506592, "learning_rate": 5.632270638048344e-05, "loss": 0.12291600704193115, "step": 102300 }, { "epoch": 0.439238212994685, "grad_norm": 0.00837288424372673, "learning_rate": 5.6318394660365815e-05, "loss": 0.26719226837158205, "step": 102310 }, { "epoch": 0.439281145084705, "grad_norm": 0.0758778303861618, "learning_rate": 5.631408294024818e-05, "loss": 0.21944777965545653, "step": 102320 }, { "epoch": 0.43932407717472505, "grad_norm": 4.773903846740723, "learning_rate": 5.6309771220130556e-05, "loss": 0.30995585918426516, "step": 102330 }, { "epoch": 0.43936700926474503, "grad_norm": 0.9918110370635986, "learning_rate": 5.630545950001293e-05, "loss": 0.24170682430267335, "step": 102340 }, { "epoch": 0.439409941354765, "grad_norm": 0.03276235982775688, "learning_rate": 5.630114777989531e-05, "loss": 0.11611915826797485, "step": 102350 }, { "epoch": 0.43945287344478506, "grad_norm": 0.03927241638302803, "learning_rate": 5.629683605977769e-05, "loss": 0.24892120361328124, "step": 102360 }, { "epoch": 0.43949580553480505, "grad_norm": 0.04267513006925583, "learning_rate": 5.629252433966007e-05, "loss": 0.10058658123016358, "step": 102370 }, { "epoch": 0.43953873762482504, "grad_norm": 0.0018779346719384193, "learning_rate": 5.628821261954245e-05, "loss": 0.29012117385864256, "step": 102380 }, { "epoch": 0.4395816697148451, "grad_norm": 0.9698354005813599, "learning_rate": 5.628390089942483e-05, "loss": 0.3414642572402954, "step": 102390 }, { "epoch": 0.43962460180486507, "grad_norm": 2.9953103065490723, "learning_rate": 5.627958917930719e-05, "loss": 0.0756386935710907, "step": 102400 }, { "epoch": 0.43966753389488505, "grad_norm": 0.4900497496128082, "learning_rate": 5.627527745918957e-05, "loss": 0.34758667945861815, "step": 102410 }, { "epoch": 0.4397104659849051, "grad_norm": 0.02272651344537735, "learning_rate": 5.6270965739071945e-05, "loss": 0.20230214595794677, "step": 102420 }, { "epoch": 0.4397533980749251, "grad_norm": 0.03989941254258156, "learning_rate": 5.626665401895432e-05, "loss": 0.2181776762008667, "step": 102430 }, { "epoch": 0.43979633016494507, "grad_norm": 0.026858249679207802, "learning_rate": 5.62623422988367e-05, "loss": 0.18684104681015015, "step": 102440 }, { "epoch": 0.4398392622549651, "grad_norm": 0.23959459364414215, "learning_rate": 5.625803057871908e-05, "loss": 0.1459541440010071, "step": 102450 }, { "epoch": 0.4398821943449851, "grad_norm": 0.004670017398893833, "learning_rate": 5.6253718858601455e-05, "loss": 0.11860576868057252, "step": 102460 }, { "epoch": 0.4399251264350051, "grad_norm": 0.011067052371799946, "learning_rate": 5.624940713848383e-05, "loss": 0.39524543285369873, "step": 102470 }, { "epoch": 0.43996805852502513, "grad_norm": 0.03430342674255371, "learning_rate": 5.62450954183662e-05, "loss": 0.3463152885437012, "step": 102480 }, { "epoch": 0.4400109906150451, "grad_norm": 0.03823309764266014, "learning_rate": 5.624078369824858e-05, "loss": 0.2293764352798462, "step": 102490 }, { "epoch": 0.4400539227050651, "grad_norm": 0.017376506701111794, "learning_rate": 5.623647197813096e-05, "loss": 0.2071463108062744, "step": 102500 }, { "epoch": 0.44009685479508515, "grad_norm": 1.2558318376541138, "learning_rate": 5.6232160258013335e-05, "loss": 0.32691106796264646, "step": 102510 }, { "epoch": 0.44013978688510513, "grad_norm": 2.95967698097229, "learning_rate": 5.622784853789571e-05, "loss": 0.17968182563781737, "step": 102520 }, { "epoch": 0.4401827189751251, "grad_norm": 1.033097743988037, "learning_rate": 5.622353681777809e-05, "loss": 0.19898836612701415, "step": 102530 }, { "epoch": 0.44022565106514516, "grad_norm": 0.016062144190073013, "learning_rate": 5.6219225097660467e-05, "loss": 0.09094843864440919, "step": 102540 }, { "epoch": 0.44026858315516515, "grad_norm": 0.4532967805862427, "learning_rate": 5.6214913377542844e-05, "loss": 0.18726831674575806, "step": 102550 }, { "epoch": 0.4403115152451852, "grad_norm": 0.014149283058941364, "learning_rate": 5.621060165742522e-05, "loss": 0.05682987570762634, "step": 102560 }, { "epoch": 0.4403544473352052, "grad_norm": 2.137563467025757, "learning_rate": 5.6206289937307585e-05, "loss": 0.2961072206497192, "step": 102570 }, { "epoch": 0.44039737942522517, "grad_norm": 0.04554494842886925, "learning_rate": 5.620197821718996e-05, "loss": 0.03142690062522888, "step": 102580 }, { "epoch": 0.4404403115152452, "grad_norm": 0.005631288979202509, "learning_rate": 5.6197666497072347e-05, "loss": 0.12604271173477172, "step": 102590 }, { "epoch": 0.4404832436052652, "grad_norm": 0.003667443059384823, "learning_rate": 5.6193354776954724e-05, "loss": 0.02955203056335449, "step": 102600 }, { "epoch": 0.4405261756952852, "grad_norm": 0.05866995453834534, "learning_rate": 5.61890430568371e-05, "loss": 0.08454251289367676, "step": 102610 }, { "epoch": 0.4405691077853052, "grad_norm": 0.913500964641571, "learning_rate": 5.618473133671948e-05, "loss": 0.262734055519104, "step": 102620 }, { "epoch": 0.4406120398753252, "grad_norm": 4.365779399871826, "learning_rate": 5.6180419616601856e-05, "loss": 0.23714911937713623, "step": 102630 }, { "epoch": 0.4406549719653452, "grad_norm": 1.4142851829528809, "learning_rate": 5.617610789648423e-05, "loss": 0.30322554111480715, "step": 102640 }, { "epoch": 0.44069790405536524, "grad_norm": 5.523765563964844, "learning_rate": 5.61717961763666e-05, "loss": 0.2837998867034912, "step": 102650 }, { "epoch": 0.44074083614538523, "grad_norm": 7.742196083068848, "learning_rate": 5.6167484456248974e-05, "loss": 0.2016591787338257, "step": 102660 }, { "epoch": 0.4407837682354052, "grad_norm": 0.39852452278137207, "learning_rate": 5.616317273613135e-05, "loss": 0.16970374584197997, "step": 102670 }, { "epoch": 0.44082670032542526, "grad_norm": 0.02069421112537384, "learning_rate": 5.615886101601373e-05, "loss": 0.323093843460083, "step": 102680 }, { "epoch": 0.44086963241544525, "grad_norm": 0.009212727658450603, "learning_rate": 5.6154549295896106e-05, "loss": 0.22446053028106688, "step": 102690 }, { "epoch": 0.44091256450546523, "grad_norm": 0.0052307466976344585, "learning_rate": 5.6150237575778484e-05, "loss": 0.16936511993408204, "step": 102700 }, { "epoch": 0.4409554965954853, "grad_norm": 0.8425884246826172, "learning_rate": 5.614592585566086e-05, "loss": 0.41118998527526857, "step": 102710 }, { "epoch": 0.44099842868550526, "grad_norm": 0.08288142830133438, "learning_rate": 5.614161413554324e-05, "loss": 0.08106495141983032, "step": 102720 }, { "epoch": 0.44104136077552525, "grad_norm": 0.9253395199775696, "learning_rate": 5.613730241542561e-05, "loss": 0.2048487663269043, "step": 102730 }, { "epoch": 0.4410842928655453, "grad_norm": 3.6987619400024414, "learning_rate": 5.6132990695307986e-05, "loss": 0.49891486167907717, "step": 102740 }, { "epoch": 0.4411272249555653, "grad_norm": 0.009961171075701714, "learning_rate": 5.6128678975190364e-05, "loss": 0.3197080373764038, "step": 102750 }, { "epoch": 0.4411701570455853, "grad_norm": 0.00707285525277257, "learning_rate": 5.612436725507274e-05, "loss": 0.4116492748260498, "step": 102760 }, { "epoch": 0.4412130891356053, "grad_norm": 0.00316762481816113, "learning_rate": 5.612005553495512e-05, "loss": 0.10139093399047852, "step": 102770 }, { "epoch": 0.4412560212256253, "grad_norm": 0.002529738238081336, "learning_rate": 5.6115743814837496e-05, "loss": 0.10213443040847778, "step": 102780 }, { "epoch": 0.44129895331564534, "grad_norm": 0.6762450933456421, "learning_rate": 5.611143209471987e-05, "loss": 0.2196059226989746, "step": 102790 }, { "epoch": 0.4413418854056653, "grad_norm": 15.398003578186035, "learning_rate": 5.610712037460225e-05, "loss": 0.18061717748641967, "step": 102800 }, { "epoch": 0.4413848174956853, "grad_norm": 0.22043964266777039, "learning_rate": 5.610280865448462e-05, "loss": 0.2987262487411499, "step": 102810 }, { "epoch": 0.44142774958570535, "grad_norm": 1.8148123025894165, "learning_rate": 5.6098496934367e-05, "loss": 0.2733000755310059, "step": 102820 }, { "epoch": 0.44147068167572534, "grad_norm": 0.22438614070415497, "learning_rate": 5.6094185214249376e-05, "loss": 0.23932878971099852, "step": 102830 }, { "epoch": 0.4415136137657453, "grad_norm": 2.330376148223877, "learning_rate": 5.608987349413175e-05, "loss": 0.2667943000793457, "step": 102840 }, { "epoch": 0.44155654585576537, "grad_norm": 12.250018119812012, "learning_rate": 5.608556177401413e-05, "loss": 0.0842181921005249, "step": 102850 }, { "epoch": 0.44159947794578536, "grad_norm": 1.1223269701004028, "learning_rate": 5.608125005389651e-05, "loss": 0.40932598114013674, "step": 102860 }, { "epoch": 0.44164241003580534, "grad_norm": 4.876951694488525, "learning_rate": 5.6076938333778885e-05, "loss": 0.30170905590057373, "step": 102870 }, { "epoch": 0.4416853421258254, "grad_norm": 2.0066170692443848, "learning_rate": 5.607262661366126e-05, "loss": 0.2661459445953369, "step": 102880 }, { "epoch": 0.4417282742158454, "grad_norm": 0.12877433001995087, "learning_rate": 5.6068314893543626e-05, "loss": 0.17716137170791627, "step": 102890 }, { "epoch": 0.44177120630586536, "grad_norm": 0.1708947718143463, "learning_rate": 5.6064003173426003e-05, "loss": 0.29579811096191405, "step": 102900 }, { "epoch": 0.4418141383958854, "grad_norm": 0.8348886370658875, "learning_rate": 5.605969145330838e-05, "loss": 0.23079733848571776, "step": 102910 }, { "epoch": 0.4418570704859054, "grad_norm": 1.4094451665878296, "learning_rate": 5.605537973319076e-05, "loss": 0.3656616687774658, "step": 102920 }, { "epoch": 0.4419000025759254, "grad_norm": 0.21388374269008636, "learning_rate": 5.6051068013073136e-05, "loss": 0.23285670280456544, "step": 102930 }, { "epoch": 0.4419429346659454, "grad_norm": 0.02697441726922989, "learning_rate": 5.604675629295551e-05, "loss": 0.08836208581924439, "step": 102940 }, { "epoch": 0.4419858667559654, "grad_norm": 0.2437342256307602, "learning_rate": 5.604244457283789e-05, "loss": 0.04906468987464905, "step": 102950 }, { "epoch": 0.4420287988459854, "grad_norm": 0.09356532245874405, "learning_rate": 5.603813285272027e-05, "loss": 0.09088559746742249, "step": 102960 }, { "epoch": 0.44207173093600544, "grad_norm": 0.03725534304976463, "learning_rate": 5.603382113260265e-05, "loss": 0.06402713060379028, "step": 102970 }, { "epoch": 0.4421146630260254, "grad_norm": 0.011978763155639172, "learning_rate": 5.6029509412485015e-05, "loss": 0.3434773921966553, "step": 102980 }, { "epoch": 0.44215759511604547, "grad_norm": 0.4410317540168762, "learning_rate": 5.602519769236739e-05, "loss": 0.17957134246826173, "step": 102990 }, { "epoch": 0.44220052720606545, "grad_norm": 2.408623695373535, "learning_rate": 5.602088597224977e-05, "loss": 0.2665703296661377, "step": 103000 }, { "epoch": 0.44220052720606545, "eval_loss": 0.41615238785743713, "eval_runtime": 27.2641, "eval_samples_per_second": 3.668, "eval_steps_per_second": 3.668, "step": 103000 }, { "epoch": 0.44224345929608544, "grad_norm": 2.4489293098449707, "learning_rate": 5.601657425213215e-05, "loss": 0.23038191795349122, "step": 103010 }, { "epoch": 0.4422863913861055, "grad_norm": 2.3956902027130127, "learning_rate": 5.6012262532014525e-05, "loss": 0.24207894802093505, "step": 103020 }, { "epoch": 0.44232932347612547, "grad_norm": 0.13477887213230133, "learning_rate": 5.60079508118969e-05, "loss": 0.11965099573135377, "step": 103030 }, { "epoch": 0.44237225556614546, "grad_norm": 0.0049563124775886536, "learning_rate": 5.600363909177928e-05, "loss": 0.21348881721496582, "step": 103040 }, { "epoch": 0.4424151876561655, "grad_norm": 0.3449452817440033, "learning_rate": 5.599932737166166e-05, "loss": 0.4346416473388672, "step": 103050 }, { "epoch": 0.4424581197461855, "grad_norm": 0.8822908401489258, "learning_rate": 5.599501565154403e-05, "loss": 0.21301577091217042, "step": 103060 }, { "epoch": 0.4425010518362055, "grad_norm": 1.8892277479171753, "learning_rate": 5.5990703931426405e-05, "loss": 0.33349945545196535, "step": 103070 }, { "epoch": 0.4425439839262255, "grad_norm": 0.06545145809650421, "learning_rate": 5.598639221130878e-05, "loss": 0.30561869144439696, "step": 103080 }, { "epoch": 0.4425869160162455, "grad_norm": 0.2756239175796509, "learning_rate": 5.598208049119116e-05, "loss": 0.2424759864807129, "step": 103090 }, { "epoch": 0.4426298481062655, "grad_norm": 2.4025254249572754, "learning_rate": 5.597776877107354e-05, "loss": 0.2664868116378784, "step": 103100 }, { "epoch": 0.44267278019628553, "grad_norm": 0.028143398463726044, "learning_rate": 5.5973457050955914e-05, "loss": 0.10620641708374023, "step": 103110 }, { "epoch": 0.4427157122863055, "grad_norm": 2.8042232990264893, "learning_rate": 5.596914533083829e-05, "loss": 0.1460793733596802, "step": 103120 }, { "epoch": 0.4427586443763255, "grad_norm": 0.010151490569114685, "learning_rate": 5.596483361072067e-05, "loss": 0.17683175802230836, "step": 103130 }, { "epoch": 0.44280157646634555, "grad_norm": 2.218151569366455, "learning_rate": 5.596052189060303e-05, "loss": 0.3955509901046753, "step": 103140 }, { "epoch": 0.44284450855636553, "grad_norm": 4.830830097198486, "learning_rate": 5.595621017048541e-05, "loss": 0.3088000059127808, "step": 103150 }, { "epoch": 0.4428874406463855, "grad_norm": 0.014260631985962391, "learning_rate": 5.595189845036779e-05, "loss": 0.1861141800880432, "step": 103160 }, { "epoch": 0.44293037273640556, "grad_norm": 0.09907328337430954, "learning_rate": 5.5947586730250165e-05, "loss": 0.14836844205856323, "step": 103170 }, { "epoch": 0.44297330482642555, "grad_norm": 0.05011352524161339, "learning_rate": 5.594327501013254e-05, "loss": 0.21681520938873292, "step": 103180 }, { "epoch": 0.4430162369164456, "grad_norm": 1.6237834692001343, "learning_rate": 5.5938963290014926e-05, "loss": 0.23171391487121581, "step": 103190 }, { "epoch": 0.4430591690064656, "grad_norm": 0.8891550898551941, "learning_rate": 5.5934651569897303e-05, "loss": 0.27312302589416504, "step": 103200 }, { "epoch": 0.44310210109648557, "grad_norm": 1.871790885925293, "learning_rate": 5.593033984977968e-05, "loss": 0.16714088916778563, "step": 103210 }, { "epoch": 0.4431450331865056, "grad_norm": 1.3890925645828247, "learning_rate": 5.5926028129662045e-05, "loss": 0.19409667253494262, "step": 103220 }, { "epoch": 0.4431879652765256, "grad_norm": 4.021583557128906, "learning_rate": 5.592171640954442e-05, "loss": 0.24269635677337648, "step": 103230 }, { "epoch": 0.4432308973665456, "grad_norm": 0.05253671109676361, "learning_rate": 5.59174046894268e-05, "loss": 0.06833736896514893, "step": 103240 }, { "epoch": 0.4432738294565656, "grad_norm": 0.5878363847732544, "learning_rate": 5.5913092969309177e-05, "loss": 0.2748382091522217, "step": 103250 }, { "epoch": 0.4433167615465856, "grad_norm": 0.2838081419467926, "learning_rate": 5.5908781249191554e-05, "loss": 0.18605902194976806, "step": 103260 }, { "epoch": 0.4433596936366056, "grad_norm": 0.3230079710483551, "learning_rate": 5.590446952907393e-05, "loss": 0.1064450740814209, "step": 103270 }, { "epoch": 0.44340262572662564, "grad_norm": 0.11365670710802078, "learning_rate": 5.590015780895631e-05, "loss": 0.37896459102630614, "step": 103280 }, { "epoch": 0.44344555781664563, "grad_norm": 0.025720424950122833, "learning_rate": 5.5895846088838686e-05, "loss": 0.14678401947021485, "step": 103290 }, { "epoch": 0.4434884899066656, "grad_norm": 1.161470890045166, "learning_rate": 5.589153436872106e-05, "loss": 0.3876520872116089, "step": 103300 }, { "epoch": 0.44353142199668566, "grad_norm": 25.90959358215332, "learning_rate": 5.5887222648603434e-05, "loss": 0.17435024976730346, "step": 103310 }, { "epoch": 0.44357435408670565, "grad_norm": 6.646755695343018, "learning_rate": 5.588291092848581e-05, "loss": 0.17923879623413086, "step": 103320 }, { "epoch": 0.44361728617672563, "grad_norm": 1.0259877443313599, "learning_rate": 5.587859920836819e-05, "loss": 0.325650691986084, "step": 103330 }, { "epoch": 0.4436602182667457, "grad_norm": 0.1227472648024559, "learning_rate": 5.5874287488250566e-05, "loss": 0.3091316223144531, "step": 103340 }, { "epoch": 0.44370315035676566, "grad_norm": 0.048743315041065216, "learning_rate": 5.586997576813294e-05, "loss": 0.20743951797485352, "step": 103350 }, { "epoch": 0.44374608244678565, "grad_norm": 0.8254276514053345, "learning_rate": 5.586566404801532e-05, "loss": 0.3583249807357788, "step": 103360 }, { "epoch": 0.4437890145368057, "grad_norm": 0.02611999399960041, "learning_rate": 5.58613523278977e-05, "loss": 0.1163441300392151, "step": 103370 }, { "epoch": 0.4438319466268257, "grad_norm": 0.006742026656866074, "learning_rate": 5.5857040607780075e-05, "loss": 0.2178345203399658, "step": 103380 }, { "epoch": 0.44387487871684567, "grad_norm": 0.008877522312104702, "learning_rate": 5.585272888766244e-05, "loss": 0.24348392486572265, "step": 103390 }, { "epoch": 0.4439178108068657, "grad_norm": 0.008085265755653381, "learning_rate": 5.5848417167544816e-05, "loss": 0.20122940540313722, "step": 103400 }, { "epoch": 0.4439607428968857, "grad_norm": 0.14661838114261627, "learning_rate": 5.58441054474272e-05, "loss": 0.2685752630233765, "step": 103410 }, { "epoch": 0.44400367498690574, "grad_norm": 6.539491176605225, "learning_rate": 5.583979372730958e-05, "loss": 0.2309859037399292, "step": 103420 }, { "epoch": 0.4440466070769257, "grad_norm": 0.24323442578315735, "learning_rate": 5.5835482007191955e-05, "loss": 0.19980394840240479, "step": 103430 }, { "epoch": 0.4440895391669457, "grad_norm": 0.01880057342350483, "learning_rate": 5.583117028707433e-05, "loss": 0.08100860714912414, "step": 103440 }, { "epoch": 0.44413247125696576, "grad_norm": 1.389131784439087, "learning_rate": 5.582685856695671e-05, "loss": 0.40127172470092776, "step": 103450 }, { "epoch": 0.44417540334698574, "grad_norm": 0.010281624272465706, "learning_rate": 5.582254684683909e-05, "loss": 0.15416443347930908, "step": 103460 }, { "epoch": 0.44421833543700573, "grad_norm": 1.043463945388794, "learning_rate": 5.581823512672145e-05, "loss": 0.41317176818847656, "step": 103470 }, { "epoch": 0.44426126752702577, "grad_norm": 0.0015052884118631482, "learning_rate": 5.581392340660383e-05, "loss": 0.1989218235015869, "step": 103480 }, { "epoch": 0.44430419961704576, "grad_norm": 0.22993981838226318, "learning_rate": 5.5809611686486206e-05, "loss": 0.1768964171409607, "step": 103490 }, { "epoch": 0.44434713170706575, "grad_norm": 1.827546238899231, "learning_rate": 5.580529996636858e-05, "loss": 0.10534555912017822, "step": 103500 }, { "epoch": 0.4443900637970858, "grad_norm": 0.15109845995903015, "learning_rate": 5.580098824625096e-05, "loss": 0.3064572811126709, "step": 103510 }, { "epoch": 0.4444329958871058, "grad_norm": 0.8713210821151733, "learning_rate": 5.579667652613334e-05, "loss": 0.1682778477668762, "step": 103520 }, { "epoch": 0.44447592797712576, "grad_norm": 1.4505832195281982, "learning_rate": 5.5792364806015715e-05, "loss": 0.2827667236328125, "step": 103530 }, { "epoch": 0.4445188600671458, "grad_norm": 0.14652542769908905, "learning_rate": 5.578805308589809e-05, "loss": 0.08168087601661682, "step": 103540 }, { "epoch": 0.4445617921571658, "grad_norm": 0.006383778993040323, "learning_rate": 5.578374136578046e-05, "loss": 0.09196740984916688, "step": 103550 }, { "epoch": 0.4446047242471858, "grad_norm": 0.003078729147091508, "learning_rate": 5.577942964566284e-05, "loss": 0.20092494487762452, "step": 103560 }, { "epoch": 0.4446476563372058, "grad_norm": 18.05199432373047, "learning_rate": 5.577511792554522e-05, "loss": 0.28601033687591554, "step": 103570 }, { "epoch": 0.4446905884272258, "grad_norm": 1.4970247745513916, "learning_rate": 5.5770806205427595e-05, "loss": 0.19839348793029785, "step": 103580 }, { "epoch": 0.4447335205172458, "grad_norm": 4.3401665687561035, "learning_rate": 5.576649448530997e-05, "loss": 0.32708165645599363, "step": 103590 }, { "epoch": 0.44477645260726584, "grad_norm": 0.07379349321126938, "learning_rate": 5.576218276519235e-05, "loss": 0.1674059510231018, "step": 103600 }, { "epoch": 0.4448193846972858, "grad_norm": 0.015309474430978298, "learning_rate": 5.575787104507473e-05, "loss": 0.37714362144470215, "step": 103610 }, { "epoch": 0.44486231678730587, "grad_norm": 2.3161532878875732, "learning_rate": 5.5753559324957104e-05, "loss": 0.3296446561813354, "step": 103620 }, { "epoch": 0.44490524887732585, "grad_norm": 0.1761620193719864, "learning_rate": 5.5749247604839475e-05, "loss": 0.14058544635772705, "step": 103630 }, { "epoch": 0.44494818096734584, "grad_norm": 0.02455052174627781, "learning_rate": 5.574493588472185e-05, "loss": 0.1107941746711731, "step": 103640 }, { "epoch": 0.4449911130573659, "grad_norm": 2.150546073913574, "learning_rate": 5.574062416460423e-05, "loss": 0.39648966789245604, "step": 103650 }, { "epoch": 0.44503404514738587, "grad_norm": 0.16114000976085663, "learning_rate": 5.573631244448661e-05, "loss": 0.22998547554016113, "step": 103660 }, { "epoch": 0.44507697723740586, "grad_norm": 1.723541498184204, "learning_rate": 5.5732000724368984e-05, "loss": 0.23313498497009277, "step": 103670 }, { "epoch": 0.4451199093274259, "grad_norm": 1.5442637205123901, "learning_rate": 5.572768900425136e-05, "loss": 0.4073235511779785, "step": 103680 }, { "epoch": 0.4451628414174459, "grad_norm": 0.019211404025554657, "learning_rate": 5.572337728413374e-05, "loss": 0.2632411003112793, "step": 103690 }, { "epoch": 0.4452057735074659, "grad_norm": 0.1311320811510086, "learning_rate": 5.5719065564016116e-05, "loss": 0.23861315250396728, "step": 103700 }, { "epoch": 0.4452487055974859, "grad_norm": 0.015002823434770107, "learning_rate": 5.5714753843898494e-05, "loss": 0.2565034866333008, "step": 103710 }, { "epoch": 0.4452916376875059, "grad_norm": 1.0120104551315308, "learning_rate": 5.571044212378086e-05, "loss": 0.11774532794952393, "step": 103720 }, { "epoch": 0.4453345697775259, "grad_norm": 1.3540277481079102, "learning_rate": 5.5706130403663235e-05, "loss": 0.19017940759658813, "step": 103730 }, { "epoch": 0.44537750186754593, "grad_norm": 0.017274409532546997, "learning_rate": 5.570181868354561e-05, "loss": 0.16923511028289795, "step": 103740 }, { "epoch": 0.4454204339575659, "grad_norm": 0.006057052407413721, "learning_rate": 5.569750696342799e-05, "loss": 0.27709307670593264, "step": 103750 }, { "epoch": 0.4454633660475859, "grad_norm": 0.012291841208934784, "learning_rate": 5.569319524331037e-05, "loss": 0.19354288578033446, "step": 103760 }, { "epoch": 0.44550629813760595, "grad_norm": 0.002555753802880645, "learning_rate": 5.5688883523192744e-05, "loss": 0.23224294185638428, "step": 103770 }, { "epoch": 0.44554923022762594, "grad_norm": 2.2619755268096924, "learning_rate": 5.568457180307513e-05, "loss": 0.06295624971389771, "step": 103780 }, { "epoch": 0.4455921623176459, "grad_norm": 0.001481866231188178, "learning_rate": 5.5680260082957506e-05, "loss": 0.17975410223007202, "step": 103790 }, { "epoch": 0.44563509440766597, "grad_norm": 0.09197922050952911, "learning_rate": 5.567594836283987e-05, "loss": 0.03728804886341095, "step": 103800 }, { "epoch": 0.44567802649768595, "grad_norm": 1.473630666732788, "learning_rate": 5.567163664272225e-05, "loss": 0.18659095764160155, "step": 103810 }, { "epoch": 0.44572095858770594, "grad_norm": 0.0031739831902086735, "learning_rate": 5.5667324922604624e-05, "loss": 0.026174116134643554, "step": 103820 }, { "epoch": 0.445763890677726, "grad_norm": 0.011447089724242687, "learning_rate": 5.5663013202487e-05, "loss": 0.4874469757080078, "step": 103830 }, { "epoch": 0.44580682276774597, "grad_norm": 1.396328091621399, "learning_rate": 5.565870148236938e-05, "loss": 0.13257690668106079, "step": 103840 }, { "epoch": 0.445849754857766, "grad_norm": 0.08562108129262924, "learning_rate": 5.5654389762251756e-05, "loss": 0.11450157165527344, "step": 103850 }, { "epoch": 0.445892686947786, "grad_norm": 3.2500569820404053, "learning_rate": 5.5650078042134134e-05, "loss": 0.176918363571167, "step": 103860 }, { "epoch": 0.445935619037806, "grad_norm": 2.0813026428222656, "learning_rate": 5.564576632201651e-05, "loss": 0.19415004253387452, "step": 103870 }, { "epoch": 0.44597855112782603, "grad_norm": 0.8829070329666138, "learning_rate": 5.564145460189888e-05, "loss": 0.17182838916778564, "step": 103880 }, { "epoch": 0.446021483217846, "grad_norm": 0.007004234939813614, "learning_rate": 5.563714288178126e-05, "loss": 0.36884973049163816, "step": 103890 }, { "epoch": 0.446064415307866, "grad_norm": 0.8837750554084778, "learning_rate": 5.5632831161663636e-05, "loss": 0.11918337345123291, "step": 103900 }, { "epoch": 0.44610734739788604, "grad_norm": 0.0010567853460088372, "learning_rate": 5.5628519441546013e-05, "loss": 0.17197400331497192, "step": 103910 }, { "epoch": 0.44615027948790603, "grad_norm": 0.09773020446300507, "learning_rate": 5.562420772142839e-05, "loss": 0.2357560873031616, "step": 103920 }, { "epoch": 0.446193211577926, "grad_norm": 0.0009565603686496615, "learning_rate": 5.561989600131077e-05, "loss": 0.14261248111724853, "step": 103930 }, { "epoch": 0.44623614366794606, "grad_norm": 4.308777809143066, "learning_rate": 5.5615584281193146e-05, "loss": 0.30736939907073973, "step": 103940 }, { "epoch": 0.44627907575796605, "grad_norm": 7.586215496063232, "learning_rate": 5.561127256107552e-05, "loss": 0.44284768104553224, "step": 103950 }, { "epoch": 0.44632200784798604, "grad_norm": 1.932542324066162, "learning_rate": 5.560696084095789e-05, "loss": 0.11989340782165528, "step": 103960 }, { "epoch": 0.4463649399380061, "grad_norm": 0.006522905547171831, "learning_rate": 5.5602649120840264e-05, "loss": 0.005932082608342171, "step": 103970 }, { "epoch": 0.44640787202802606, "grad_norm": 0.022067038342356682, "learning_rate": 5.559833740072264e-05, "loss": 0.15941121578216552, "step": 103980 }, { "epoch": 0.44645080411804605, "grad_norm": 2.1581637859344482, "learning_rate": 5.559402568060502e-05, "loss": 0.11742219924926758, "step": 103990 }, { "epoch": 0.4464937362080661, "grad_norm": 0.3017823100090027, "learning_rate": 5.55897139604874e-05, "loss": 0.14415110349655152, "step": 104000 }, { "epoch": 0.4464937362080661, "eval_loss": 0.412598580121994, "eval_runtime": 27.092, "eval_samples_per_second": 3.691, "eval_steps_per_second": 3.691, "step": 104000 }, { "epoch": 0.4465366682980861, "grad_norm": 0.09379760175943375, "learning_rate": 5.558540224036978e-05, "loss": 0.11643334627151489, "step": 104010 }, { "epoch": 0.44657960038810607, "grad_norm": 2.794205904006958, "learning_rate": 5.558109052025216e-05, "loss": 0.21521177291870117, "step": 104020 }, { "epoch": 0.4466225324781261, "grad_norm": 2.2005207538604736, "learning_rate": 5.5576778800134535e-05, "loss": 0.050330376625061034, "step": 104030 }, { "epoch": 0.4466654645681461, "grad_norm": 0.002720118500292301, "learning_rate": 5.557246708001691e-05, "loss": 0.43977041244506837, "step": 104040 }, { "epoch": 0.44670839665816614, "grad_norm": 0.008064290508627892, "learning_rate": 5.5568155359899276e-05, "loss": 0.2373410940170288, "step": 104050 }, { "epoch": 0.4467513287481861, "grad_norm": 1.018776297569275, "learning_rate": 5.556384363978165e-05, "loss": 0.23385534286499024, "step": 104060 }, { "epoch": 0.4467942608382061, "grad_norm": 0.9403790831565857, "learning_rate": 5.555953191966403e-05, "loss": 0.08395354151725769, "step": 104070 }, { "epoch": 0.44683719292822616, "grad_norm": 10.480605125427246, "learning_rate": 5.555522019954641e-05, "loss": 0.26746931076049807, "step": 104080 }, { "epoch": 0.44688012501824614, "grad_norm": 0.001435067504644394, "learning_rate": 5.5550908479428785e-05, "loss": 0.22385041713714598, "step": 104090 }, { "epoch": 0.44692305710826613, "grad_norm": 1.9128391742706299, "learning_rate": 5.554659675931116e-05, "loss": 0.2415689468383789, "step": 104100 }, { "epoch": 0.4469659891982862, "grad_norm": 0.10824067145586014, "learning_rate": 5.554228503919354e-05, "loss": 0.25131275653839114, "step": 104110 }, { "epoch": 0.44700892128830616, "grad_norm": 6.406500816345215, "learning_rate": 5.553797331907592e-05, "loss": 0.24533276557922362, "step": 104120 }, { "epoch": 0.44705185337832615, "grad_norm": 0.07218021154403687, "learning_rate": 5.553366159895829e-05, "loss": 0.16491100788116456, "step": 104130 }, { "epoch": 0.4470947854683462, "grad_norm": 1.6558536291122437, "learning_rate": 5.5529349878840665e-05, "loss": 0.3282376527786255, "step": 104140 }, { "epoch": 0.4471377175583662, "grad_norm": 0.9896855354309082, "learning_rate": 5.552503815872304e-05, "loss": 0.31535341739654543, "step": 104150 }, { "epoch": 0.44718064964838616, "grad_norm": 0.11811896413564682, "learning_rate": 5.552072643860542e-05, "loss": 0.3985059976577759, "step": 104160 }, { "epoch": 0.4472235817384062, "grad_norm": 1.187322974205017, "learning_rate": 5.55164147184878e-05, "loss": 0.10966448783874512, "step": 104170 }, { "epoch": 0.4472665138284262, "grad_norm": 1.861840009689331, "learning_rate": 5.5512102998370175e-05, "loss": 0.3425853729248047, "step": 104180 }, { "epoch": 0.4473094459184462, "grad_norm": 0.001304112607613206, "learning_rate": 5.550779127825255e-05, "loss": 0.47119789123535155, "step": 104190 }, { "epoch": 0.4473523780084662, "grad_norm": 1.7016571760177612, "learning_rate": 5.550347955813493e-05, "loss": 0.34137959480285646, "step": 104200 }, { "epoch": 0.4473953100984862, "grad_norm": 0.11588651686906815, "learning_rate": 5.549916783801729e-05, "loss": 0.18648021221160888, "step": 104210 }, { "epoch": 0.4474382421885062, "grad_norm": 0.008340016007423401, "learning_rate": 5.549485611789968e-05, "loss": 0.10814428329467773, "step": 104220 }, { "epoch": 0.44748117427852624, "grad_norm": 0.051742419600486755, "learning_rate": 5.5490544397782055e-05, "loss": 0.06334395408630371, "step": 104230 }, { "epoch": 0.4475241063685462, "grad_norm": 3.1890809535980225, "learning_rate": 5.548623267766443e-05, "loss": 0.21675529479980468, "step": 104240 }, { "epoch": 0.4475670384585662, "grad_norm": 0.020858777686953545, "learning_rate": 5.548192095754681e-05, "loss": 0.15983496904373168, "step": 104250 }, { "epoch": 0.44760997054858626, "grad_norm": 0.0014625436160713434, "learning_rate": 5.5477609237429187e-05, "loss": 0.1737871289253235, "step": 104260 }, { "epoch": 0.44765290263860624, "grad_norm": 0.08136744052171707, "learning_rate": 5.5473297517311564e-05, "loss": 0.25660922527313235, "step": 104270 }, { "epoch": 0.4476958347286263, "grad_norm": 1.1136901378631592, "learning_rate": 5.546898579719394e-05, "loss": 0.09946085810661316, "step": 104280 }, { "epoch": 0.44773876681864627, "grad_norm": 0.002379036508500576, "learning_rate": 5.5464674077076305e-05, "loss": 0.355484676361084, "step": 104290 }, { "epoch": 0.44778169890866626, "grad_norm": 0.09864296764135361, "learning_rate": 5.546036235695868e-05, "loss": 0.3599360942840576, "step": 104300 }, { "epoch": 0.4478246309986863, "grad_norm": 3.0941085815429688, "learning_rate": 5.545605063684106e-05, "loss": 0.27424373626708987, "step": 104310 }, { "epoch": 0.4478675630887063, "grad_norm": 1.5314997434616089, "learning_rate": 5.545173891672344e-05, "loss": 0.26385200023651123, "step": 104320 }, { "epoch": 0.4479104951787263, "grad_norm": 0.6546000242233276, "learning_rate": 5.5447427196605814e-05, "loss": 0.1692166805267334, "step": 104330 }, { "epoch": 0.4479534272687463, "grad_norm": 0.0019183208933100104, "learning_rate": 5.544311547648819e-05, "loss": 0.2954787969589233, "step": 104340 }, { "epoch": 0.4479963593587663, "grad_norm": 0.8082722425460815, "learning_rate": 5.543880375637057e-05, "loss": 0.1485131025314331, "step": 104350 }, { "epoch": 0.4480392914487863, "grad_norm": 0.015951262786984444, "learning_rate": 5.5434492036252946e-05, "loss": 0.20640509128570556, "step": 104360 }, { "epoch": 0.44808222353880633, "grad_norm": 35.86313247680664, "learning_rate": 5.543018031613532e-05, "loss": 0.30402724742889403, "step": 104370 }, { "epoch": 0.4481251556288263, "grad_norm": 0.08005943894386292, "learning_rate": 5.5425868596017694e-05, "loss": 0.09017500877380372, "step": 104380 }, { "epoch": 0.4481680877188463, "grad_norm": 0.06653320044279099, "learning_rate": 5.542155687590007e-05, "loss": 0.1858425498008728, "step": 104390 }, { "epoch": 0.44821101980886635, "grad_norm": 1.3172448873519897, "learning_rate": 5.541724515578245e-05, "loss": 0.3179537534713745, "step": 104400 }, { "epoch": 0.44825395189888634, "grad_norm": 1.2068978548049927, "learning_rate": 5.5412933435664826e-05, "loss": 0.21991288661956787, "step": 104410 }, { "epoch": 0.4482968839889063, "grad_norm": 0.0010691984789445996, "learning_rate": 5.5408621715547204e-05, "loss": 0.322251296043396, "step": 104420 }, { "epoch": 0.44833981607892637, "grad_norm": 1.4022953510284424, "learning_rate": 5.540430999542958e-05, "loss": 0.3753403663635254, "step": 104430 }, { "epoch": 0.44838274816894635, "grad_norm": 1.5526142120361328, "learning_rate": 5.539999827531196e-05, "loss": 0.28666656017303466, "step": 104440 }, { "epoch": 0.44842568025896634, "grad_norm": 0.03407391533255577, "learning_rate": 5.5395686555194336e-05, "loss": 0.028074699640274047, "step": 104450 }, { "epoch": 0.4484686123489864, "grad_norm": 1.6679810285568237, "learning_rate": 5.5391374835076706e-05, "loss": 0.28589587211608886, "step": 104460 }, { "epoch": 0.44851154443900637, "grad_norm": 0.2455066740512848, "learning_rate": 5.5387063114959084e-05, "loss": 0.10405961275100709, "step": 104470 }, { "epoch": 0.4485544765290264, "grad_norm": 1.4242631196975708, "learning_rate": 5.538275139484146e-05, "loss": 0.3303467035293579, "step": 104480 }, { "epoch": 0.4485974086190464, "grad_norm": 1.238398790359497, "learning_rate": 5.537843967472384e-05, "loss": 0.14865484237670898, "step": 104490 }, { "epoch": 0.4486403407090664, "grad_norm": 0.03500519320368767, "learning_rate": 5.5374127954606216e-05, "loss": 0.19715001583099365, "step": 104500 }, { "epoch": 0.44868327279908643, "grad_norm": 0.02038196101784706, "learning_rate": 5.536981623448859e-05, "loss": 0.15497336387634278, "step": 104510 }, { "epoch": 0.4487262048891064, "grad_norm": 0.027887515723705292, "learning_rate": 5.536550451437097e-05, "loss": 0.0818180799484253, "step": 104520 }, { "epoch": 0.4487691369791264, "grad_norm": 7.665521144866943, "learning_rate": 5.536119279425335e-05, "loss": 0.3511594533920288, "step": 104530 }, { "epoch": 0.44881206906914645, "grad_norm": 0.0011700972681865096, "learning_rate": 5.535688107413571e-05, "loss": 0.1457660675048828, "step": 104540 }, { "epoch": 0.44885500115916643, "grad_norm": 0.3345299959182739, "learning_rate": 5.535256935401809e-05, "loss": 0.2602620363235474, "step": 104550 }, { "epoch": 0.4488979332491864, "grad_norm": 7.855331897735596, "learning_rate": 5.5348257633900466e-05, "loss": 0.22493176460266112, "step": 104560 }, { "epoch": 0.44894086533920646, "grad_norm": 3.3931214809417725, "learning_rate": 5.5343945913782844e-05, "loss": 0.09702563285827637, "step": 104570 }, { "epoch": 0.44898379742922645, "grad_norm": 4.353238582611084, "learning_rate": 5.533963419366522e-05, "loss": 0.23266072273254396, "step": 104580 }, { "epoch": 0.44902672951924644, "grad_norm": 0.5245272517204285, "learning_rate": 5.5335322473547605e-05, "loss": 0.3167107105255127, "step": 104590 }, { "epoch": 0.4490696616092665, "grad_norm": 0.013726268894970417, "learning_rate": 5.533101075342998e-05, "loss": 0.22341177463531495, "step": 104600 }, { "epoch": 0.44911259369928647, "grad_norm": 0.0004888771800324321, "learning_rate": 5.532669903331236e-05, "loss": 0.24303507804870605, "step": 104610 }, { "epoch": 0.44915552578930645, "grad_norm": 3.4889206886291504, "learning_rate": 5.5322387313194724e-05, "loss": 0.2107637643814087, "step": 104620 }, { "epoch": 0.4491984578793265, "grad_norm": 0.34107357263565063, "learning_rate": 5.53180755930771e-05, "loss": 0.12795854806900026, "step": 104630 }, { "epoch": 0.4492413899693465, "grad_norm": 2.047565221786499, "learning_rate": 5.531376387295948e-05, "loss": 0.14512872695922852, "step": 104640 }, { "epoch": 0.44928432205936647, "grad_norm": 1.2649197578430176, "learning_rate": 5.5309452152841856e-05, "loss": 0.20695888996124268, "step": 104650 }, { "epoch": 0.4493272541493865, "grad_norm": 1.462600827217102, "learning_rate": 5.530514043272423e-05, "loss": 0.3829000234603882, "step": 104660 }, { "epoch": 0.4493701862394065, "grad_norm": 0.29148611426353455, "learning_rate": 5.530082871260661e-05, "loss": 0.1434500813484192, "step": 104670 }, { "epoch": 0.4494131183294265, "grad_norm": 0.692503809928894, "learning_rate": 5.529651699248899e-05, "loss": 0.16208131313323976, "step": 104680 }, { "epoch": 0.44945605041944653, "grad_norm": 0.05662372708320618, "learning_rate": 5.5292205272371365e-05, "loss": 0.08052989840507507, "step": 104690 }, { "epoch": 0.4494989825094665, "grad_norm": 4.697552680969238, "learning_rate": 5.5287893552253735e-05, "loss": 0.24397377967834472, "step": 104700 }, { "epoch": 0.44954191459948656, "grad_norm": 0.10242107510566711, "learning_rate": 5.528358183213611e-05, "loss": 0.22177133560180665, "step": 104710 }, { "epoch": 0.44958484668950655, "grad_norm": 0.17377902567386627, "learning_rate": 5.527927011201849e-05, "loss": 0.1945713758468628, "step": 104720 }, { "epoch": 0.44962777877952653, "grad_norm": 3.188666582107544, "learning_rate": 5.527495839190087e-05, "loss": 0.2882867336273193, "step": 104730 }, { "epoch": 0.4496707108695466, "grad_norm": 0.009148088283836842, "learning_rate": 5.5270646671783245e-05, "loss": 0.06424065828323364, "step": 104740 }, { "epoch": 0.44971364295956656, "grad_norm": 0.07122382521629333, "learning_rate": 5.526633495166562e-05, "loss": 0.1966516852378845, "step": 104750 }, { "epoch": 0.44975657504958655, "grad_norm": 0.015124008990824223, "learning_rate": 5.5262023231548e-05, "loss": 0.1615827798843384, "step": 104760 }, { "epoch": 0.4497995071396066, "grad_norm": 1.248611330986023, "learning_rate": 5.525771151143038e-05, "loss": 0.16856156587600707, "step": 104770 }, { "epoch": 0.4498424392296266, "grad_norm": 1.596437931060791, "learning_rate": 5.5253399791312754e-05, "loss": 0.12455008029937745, "step": 104780 }, { "epoch": 0.44988537131964657, "grad_norm": 0.0020168491173535585, "learning_rate": 5.524908807119512e-05, "loss": 0.27606155872344973, "step": 104790 }, { "epoch": 0.4499283034096666, "grad_norm": 1.187111735343933, "learning_rate": 5.5244776351077495e-05, "loss": 0.5580471992492676, "step": 104800 }, { "epoch": 0.4499712354996866, "grad_norm": 1.0547586679458618, "learning_rate": 5.524046463095988e-05, "loss": 0.3080447196960449, "step": 104810 }, { "epoch": 0.4500141675897066, "grad_norm": 0.06692945212125778, "learning_rate": 5.523615291084226e-05, "loss": 0.17507950067520142, "step": 104820 }, { "epoch": 0.4500570996797266, "grad_norm": 0.009819112718105316, "learning_rate": 5.5231841190724634e-05, "loss": 0.24413628578186036, "step": 104830 }, { "epoch": 0.4501000317697466, "grad_norm": 0.011352095752954483, "learning_rate": 5.522752947060701e-05, "loss": 0.332852840423584, "step": 104840 }, { "epoch": 0.4501429638597666, "grad_norm": 0.01721765473484993, "learning_rate": 5.522321775048939e-05, "loss": 0.246399450302124, "step": 104850 }, { "epoch": 0.45018589594978664, "grad_norm": 0.46497970819473267, "learning_rate": 5.5218906030371766e-05, "loss": 0.12881628274917603, "step": 104860 }, { "epoch": 0.45022882803980663, "grad_norm": 2.9362080097198486, "learning_rate": 5.521459431025413e-05, "loss": 0.25533831119537354, "step": 104870 }, { "epoch": 0.4502717601298266, "grad_norm": 0.056996967643499374, "learning_rate": 5.521028259013651e-05, "loss": 0.1687253475189209, "step": 104880 }, { "epoch": 0.45031469221984666, "grad_norm": 0.15789049863815308, "learning_rate": 5.5205970870018885e-05, "loss": 0.11047836542129516, "step": 104890 }, { "epoch": 0.45035762430986664, "grad_norm": 0.4039181172847748, "learning_rate": 5.520165914990126e-05, "loss": 0.2870154619216919, "step": 104900 }, { "epoch": 0.4504005563998867, "grad_norm": 0.2132159173488617, "learning_rate": 5.519734742978364e-05, "loss": 0.08048205375671387, "step": 104910 }, { "epoch": 0.4504434884899067, "grad_norm": 1.834272861480713, "learning_rate": 5.519303570966602e-05, "loss": 0.1264325976371765, "step": 104920 }, { "epoch": 0.45048642057992666, "grad_norm": 0.7455428242683411, "learning_rate": 5.5188723989548394e-05, "loss": 0.3383171081542969, "step": 104930 }, { "epoch": 0.4505293526699467, "grad_norm": 0.11336026340723038, "learning_rate": 5.518441226943077e-05, "loss": 0.09172531962394714, "step": 104940 }, { "epoch": 0.4505722847599667, "grad_norm": 0.37443527579307556, "learning_rate": 5.518010054931314e-05, "loss": 0.17477719783782958, "step": 104950 }, { "epoch": 0.4506152168499867, "grad_norm": 0.007864640094339848, "learning_rate": 5.517578882919552e-05, "loss": 0.11320825815200805, "step": 104960 }, { "epoch": 0.4506581489400067, "grad_norm": 2.6541857719421387, "learning_rate": 5.51714771090779e-05, "loss": 0.2526413440704346, "step": 104970 }, { "epoch": 0.4507010810300267, "grad_norm": 0.2613985538482666, "learning_rate": 5.5167165388960274e-05, "loss": 0.22820723056793213, "step": 104980 }, { "epoch": 0.4507440131200467, "grad_norm": 0.20190514624118805, "learning_rate": 5.516285366884265e-05, "loss": 0.2348994493484497, "step": 104990 }, { "epoch": 0.45078694521006674, "grad_norm": 0.798771858215332, "learning_rate": 5.515854194872503e-05, "loss": 0.1506957530975342, "step": 105000 }, { "epoch": 0.45078694521006674, "eval_loss": 0.4291705787181854, "eval_runtime": 27.1182, "eval_samples_per_second": 3.688, "eval_steps_per_second": 3.688, "step": 105000 }, { "epoch": 0.4508298773000867, "grad_norm": 0.0026851454749703407, "learning_rate": 5.5154230228607406e-05, "loss": 0.11021552085876465, "step": 105010 }, { "epoch": 0.4508728093901067, "grad_norm": 5.40506649017334, "learning_rate": 5.514991850848978e-05, "loss": 0.22361650466918945, "step": 105020 }, { "epoch": 0.45091574148012675, "grad_norm": 0.20230934023857117, "learning_rate": 5.5145606788372154e-05, "loss": 0.16922760009765625, "step": 105030 }, { "epoch": 0.45095867357014674, "grad_norm": 0.03748038411140442, "learning_rate": 5.514129506825453e-05, "loss": 0.1610949993133545, "step": 105040 }, { "epoch": 0.4510016056601667, "grad_norm": 0.15671096742153168, "learning_rate": 5.513698334813691e-05, "loss": 0.20407464504241943, "step": 105050 }, { "epoch": 0.45104453775018677, "grad_norm": 0.0348237119615078, "learning_rate": 5.5132671628019286e-05, "loss": 0.2760026454925537, "step": 105060 }, { "epoch": 0.45108746984020676, "grad_norm": 0.10463076829910278, "learning_rate": 5.512835990790166e-05, "loss": 0.24972622394561766, "step": 105070 }, { "epoch": 0.45113040193022674, "grad_norm": 2.421722412109375, "learning_rate": 5.512404818778404e-05, "loss": 0.030455023050308228, "step": 105080 }, { "epoch": 0.4511733340202468, "grad_norm": 0.026346173137426376, "learning_rate": 5.511973646766642e-05, "loss": 0.2886451959609985, "step": 105090 }, { "epoch": 0.4512162661102668, "grad_norm": 0.0042284042574465275, "learning_rate": 5.5115424747548795e-05, "loss": 0.10194617509841919, "step": 105100 }, { "epoch": 0.45125919820028676, "grad_norm": 1.1205313205718994, "learning_rate": 5.511111302743117e-05, "loss": 0.23844561576843262, "step": 105110 }, { "epoch": 0.4513021302903068, "grad_norm": 0.6483297944068909, "learning_rate": 5.5106801307313536e-05, "loss": 0.3492732524871826, "step": 105120 }, { "epoch": 0.4513450623803268, "grad_norm": 0.04426403343677521, "learning_rate": 5.5102489587195914e-05, "loss": 0.2877930164337158, "step": 105130 }, { "epoch": 0.45138799447034683, "grad_norm": 2.4738221168518066, "learning_rate": 5.509817786707829e-05, "loss": 0.27635483741760253, "step": 105140 }, { "epoch": 0.4514309265603668, "grad_norm": 0.09570404887199402, "learning_rate": 5.509386614696067e-05, "loss": 0.16080989837646484, "step": 105150 }, { "epoch": 0.4514738586503868, "grad_norm": 0.7927491664886475, "learning_rate": 5.5089554426843046e-05, "loss": 0.34032866954803465, "step": 105160 }, { "epoch": 0.45151679074040685, "grad_norm": 0.3021717369556427, "learning_rate": 5.508524270672542e-05, "loss": 0.10694130659103393, "step": 105170 }, { "epoch": 0.45155972283042684, "grad_norm": 0.022390982136130333, "learning_rate": 5.50809309866078e-05, "loss": 0.23462820053100586, "step": 105180 }, { "epoch": 0.4516026549204468, "grad_norm": 0.3534424304962158, "learning_rate": 5.5076619266490185e-05, "loss": 0.3277368783950806, "step": 105190 }, { "epoch": 0.45164558701046686, "grad_norm": 0.13010990619659424, "learning_rate": 5.507230754637255e-05, "loss": 0.3046452760696411, "step": 105200 }, { "epoch": 0.45168851910048685, "grad_norm": 0.014195505529642105, "learning_rate": 5.5067995826254926e-05, "loss": 0.14671356678009034, "step": 105210 }, { "epoch": 0.45173145119050684, "grad_norm": 1.4591064453125, "learning_rate": 5.50636841061373e-05, "loss": 0.251104474067688, "step": 105220 }, { "epoch": 0.4517743832805269, "grad_norm": 0.0026446059346199036, "learning_rate": 5.505937238601968e-05, "loss": 0.23639440536499023, "step": 105230 }, { "epoch": 0.45181731537054687, "grad_norm": 0.11398119479417801, "learning_rate": 5.505506066590206e-05, "loss": 0.1752384662628174, "step": 105240 }, { "epoch": 0.45186024746056686, "grad_norm": 1.354035496711731, "learning_rate": 5.5050748945784435e-05, "loss": 0.2977996587753296, "step": 105250 }, { "epoch": 0.4519031795505869, "grad_norm": 3.3216819763183594, "learning_rate": 5.504643722566681e-05, "loss": 0.33972654342651365, "step": 105260 }, { "epoch": 0.4519461116406069, "grad_norm": 0.008711867034435272, "learning_rate": 5.504212550554919e-05, "loss": 0.13020092248916626, "step": 105270 }, { "epoch": 0.45198904373062687, "grad_norm": 0.24850133061408997, "learning_rate": 5.503781378543156e-05, "loss": 0.2301051139831543, "step": 105280 }, { "epoch": 0.4520319758206469, "grad_norm": 0.0009402823052369058, "learning_rate": 5.503350206531394e-05, "loss": 0.24847860336303712, "step": 105290 }, { "epoch": 0.4520749079106669, "grad_norm": 0.003734735306352377, "learning_rate": 5.5029190345196315e-05, "loss": 0.21702148914337158, "step": 105300 }, { "epoch": 0.4521178400006869, "grad_norm": 0.0419401153922081, "learning_rate": 5.502487862507869e-05, "loss": 0.4774662494659424, "step": 105310 }, { "epoch": 0.45216077209070693, "grad_norm": 0.2750690281391144, "learning_rate": 5.502056690496107e-05, "loss": 0.0887843906879425, "step": 105320 }, { "epoch": 0.4522037041807269, "grad_norm": 0.48757249116897583, "learning_rate": 5.501625518484345e-05, "loss": 0.36505000591278075, "step": 105330 }, { "epoch": 0.45224663627074696, "grad_norm": 2.4312708377838135, "learning_rate": 5.5011943464725824e-05, "loss": 0.36078386306762694, "step": 105340 }, { "epoch": 0.45228956836076695, "grad_norm": 0.9031078219413757, "learning_rate": 5.50076317446082e-05, "loss": 0.19097875356674193, "step": 105350 }, { "epoch": 0.45233250045078693, "grad_norm": 0.5606090426445007, "learning_rate": 5.5003320024490566e-05, "loss": 0.08003722429275513, "step": 105360 }, { "epoch": 0.452375432540807, "grad_norm": 0.04021068662405014, "learning_rate": 5.499900830437294e-05, "loss": 0.07359545230865479, "step": 105370 }, { "epoch": 0.45241836463082696, "grad_norm": 0.09893706440925598, "learning_rate": 5.499469658425532e-05, "loss": 0.35075531005859373, "step": 105380 }, { "epoch": 0.45246129672084695, "grad_norm": 0.01163639035075903, "learning_rate": 5.49903848641377e-05, "loss": 0.059896016120910646, "step": 105390 }, { "epoch": 0.452504228810867, "grad_norm": 0.027939310297369957, "learning_rate": 5.4986073144020075e-05, "loss": 0.15823696851730346, "step": 105400 }, { "epoch": 0.452547160900887, "grad_norm": 0.862416684627533, "learning_rate": 5.498176142390246e-05, "loss": 0.264521050453186, "step": 105410 }, { "epoch": 0.45259009299090697, "grad_norm": 2.1110284328460693, "learning_rate": 5.4977449703784836e-05, "loss": 0.29168670177459716, "step": 105420 }, { "epoch": 0.452633025080927, "grad_norm": 0.08526672422885895, "learning_rate": 5.4973137983667214e-05, "loss": 0.12457125186920166, "step": 105430 }, { "epoch": 0.452675957170947, "grad_norm": 0.030613580718636513, "learning_rate": 5.496882626354958e-05, "loss": 0.2875051021575928, "step": 105440 }, { "epoch": 0.452718889260967, "grad_norm": 0.18582996726036072, "learning_rate": 5.4964514543431955e-05, "loss": 0.0911365807056427, "step": 105450 }, { "epoch": 0.452761821350987, "grad_norm": 0.17034812271595, "learning_rate": 5.496020282331433e-05, "loss": 0.22752113342285157, "step": 105460 }, { "epoch": 0.452804753441007, "grad_norm": 0.5781890749931335, "learning_rate": 5.495589110319671e-05, "loss": 0.3640397071838379, "step": 105470 }, { "epoch": 0.452847685531027, "grad_norm": 4.468594074249268, "learning_rate": 5.495157938307909e-05, "loss": 0.20832431316375732, "step": 105480 }, { "epoch": 0.45289061762104704, "grad_norm": 1.2272095680236816, "learning_rate": 5.4947267662961464e-05, "loss": 0.03705971837043762, "step": 105490 }, { "epoch": 0.45293354971106703, "grad_norm": 0.00048346107359975576, "learning_rate": 5.494295594284384e-05, "loss": 0.3444437265396118, "step": 105500 }, { "epoch": 0.452976481801087, "grad_norm": 0.0061003537848591805, "learning_rate": 5.493864422272622e-05, "loss": 0.2898735046386719, "step": 105510 }, { "epoch": 0.45301941389110706, "grad_norm": 0.12422608584165573, "learning_rate": 5.4934332502608596e-05, "loss": 0.2567976713180542, "step": 105520 }, { "epoch": 0.45306234598112705, "grad_norm": 0.3058219850063324, "learning_rate": 5.493002078249097e-05, "loss": 0.27314488887786864, "step": 105530 }, { "epoch": 0.45310527807114703, "grad_norm": 0.1873115599155426, "learning_rate": 5.4925709062373344e-05, "loss": 0.14641257524490356, "step": 105540 }, { "epoch": 0.4531482101611671, "grad_norm": 10.684101104736328, "learning_rate": 5.492139734225572e-05, "loss": 0.17531346082687377, "step": 105550 }, { "epoch": 0.45319114225118706, "grad_norm": 0.10758557915687561, "learning_rate": 5.49170856221381e-05, "loss": 0.10541104078292847, "step": 105560 }, { "epoch": 0.4532340743412071, "grad_norm": 0.1083778589963913, "learning_rate": 5.4912773902020476e-05, "loss": 0.16405218839645386, "step": 105570 }, { "epoch": 0.4532770064312271, "grad_norm": 0.05349402502179146, "learning_rate": 5.4908462181902854e-05, "loss": 0.41372342109680177, "step": 105580 }, { "epoch": 0.4533199385212471, "grad_norm": 0.02954784967005253, "learning_rate": 5.490415046178523e-05, "loss": 0.12565935850143434, "step": 105590 }, { "epoch": 0.4533628706112671, "grad_norm": 0.10741877555847168, "learning_rate": 5.489983874166761e-05, "loss": 0.06284834146499634, "step": 105600 }, { "epoch": 0.4534058027012871, "grad_norm": 0.0008346183458343148, "learning_rate": 5.489552702154997e-05, "loss": 0.09273313879966735, "step": 105610 }, { "epoch": 0.4534487347913071, "grad_norm": 0.06353622674942017, "learning_rate": 5.489121530143235e-05, "loss": 0.08961839079856873, "step": 105620 }, { "epoch": 0.45349166688132714, "grad_norm": 3.129249334335327, "learning_rate": 5.4886903581314734e-05, "loss": 0.26601376533508303, "step": 105630 }, { "epoch": 0.4535345989713471, "grad_norm": 0.028794599696993828, "learning_rate": 5.488259186119711e-05, "loss": 0.07939417958259583, "step": 105640 }, { "epoch": 0.4535775310613671, "grad_norm": 29.774160385131836, "learning_rate": 5.487828014107949e-05, "loss": 0.17747390270233154, "step": 105650 }, { "epoch": 0.45362046315138715, "grad_norm": 0.9835271239280701, "learning_rate": 5.4873968420961866e-05, "loss": 0.25790364742279054, "step": 105660 }, { "epoch": 0.45366339524140714, "grad_norm": 0.1610698699951172, "learning_rate": 5.486965670084424e-05, "loss": 0.23586997985839844, "step": 105670 }, { "epoch": 0.45370632733142713, "grad_norm": 2.178144693374634, "learning_rate": 5.486534498072662e-05, "loss": 0.42916340827941896, "step": 105680 }, { "epoch": 0.45374925942144717, "grad_norm": 0.03143854811787605, "learning_rate": 5.4861033260608984e-05, "loss": 0.2098684072494507, "step": 105690 }, { "epoch": 0.45379219151146716, "grad_norm": 0.031577371060848236, "learning_rate": 5.485672154049136e-05, "loss": 0.23385446071624755, "step": 105700 }, { "epoch": 0.45383512360148714, "grad_norm": 0.0011513188946992159, "learning_rate": 5.485240982037374e-05, "loss": 0.1985771417617798, "step": 105710 }, { "epoch": 0.4538780556915072, "grad_norm": 1.0164504051208496, "learning_rate": 5.4848098100256116e-05, "loss": 0.17143828868865968, "step": 105720 }, { "epoch": 0.4539209877815272, "grad_norm": 0.13020484149456024, "learning_rate": 5.484378638013849e-05, "loss": 0.12143751382827758, "step": 105730 }, { "epoch": 0.45396391987154716, "grad_norm": 0.025720888748764992, "learning_rate": 5.483947466002087e-05, "loss": 0.20034878253936766, "step": 105740 }, { "epoch": 0.4540068519615672, "grad_norm": 1.9531680345535278, "learning_rate": 5.483516293990325e-05, "loss": 0.4207042694091797, "step": 105750 }, { "epoch": 0.4540497840515872, "grad_norm": 8.067658424377441, "learning_rate": 5.4830851219785625e-05, "loss": 0.18401105403900148, "step": 105760 }, { "epoch": 0.45409271614160723, "grad_norm": 0.0019036878366023302, "learning_rate": 5.4826539499667996e-05, "loss": 0.3329533815383911, "step": 105770 }, { "epoch": 0.4541356482316272, "grad_norm": 1.9565998315811157, "learning_rate": 5.482222777955037e-05, "loss": 0.3015397071838379, "step": 105780 }, { "epoch": 0.4541785803216472, "grad_norm": 0.07071906328201294, "learning_rate": 5.481791605943275e-05, "loss": 0.09857931733131409, "step": 105790 }, { "epoch": 0.45422151241166725, "grad_norm": 3.0820274353027344, "learning_rate": 5.481360433931513e-05, "loss": 0.15218595266342164, "step": 105800 }, { "epoch": 0.45426444450168724, "grad_norm": 0.03130373731255531, "learning_rate": 5.4809292619197505e-05, "loss": 0.23463423252105714, "step": 105810 }, { "epoch": 0.4543073765917072, "grad_norm": 1.1514767408370972, "learning_rate": 5.480498089907988e-05, "loss": 0.2280792474746704, "step": 105820 }, { "epoch": 0.45435030868172727, "grad_norm": 0.039772145450115204, "learning_rate": 5.480066917896226e-05, "loss": 0.2732717037200928, "step": 105830 }, { "epoch": 0.45439324077174725, "grad_norm": 1.2524688243865967, "learning_rate": 5.479635745884464e-05, "loss": 0.12637274265289306, "step": 105840 }, { "epoch": 0.45443617286176724, "grad_norm": 0.054476819932460785, "learning_rate": 5.4792045738727015e-05, "loss": 0.20405046939849852, "step": 105850 }, { "epoch": 0.4544791049517873, "grad_norm": 0.6834324598312378, "learning_rate": 5.4787734018609385e-05, "loss": 0.2571122646331787, "step": 105860 }, { "epoch": 0.45452203704180727, "grad_norm": 0.043317679315805435, "learning_rate": 5.478342229849176e-05, "loss": 0.1396381139755249, "step": 105870 }, { "epoch": 0.45456496913182726, "grad_norm": 0.023231295868754387, "learning_rate": 5.477911057837414e-05, "loss": 0.3477238416671753, "step": 105880 }, { "epoch": 0.4546079012218473, "grad_norm": 4.941470146179199, "learning_rate": 5.477479885825652e-05, "loss": 0.2937323093414307, "step": 105890 }, { "epoch": 0.4546508333118673, "grad_norm": 0.0022386584896594286, "learning_rate": 5.4770487138138895e-05, "loss": 0.1340307593345642, "step": 105900 }, { "epoch": 0.4546937654018873, "grad_norm": 24.781116485595703, "learning_rate": 5.476617541802127e-05, "loss": 0.1788191556930542, "step": 105910 }, { "epoch": 0.4547366974919073, "grad_norm": 0.0002703432401176542, "learning_rate": 5.476186369790365e-05, "loss": 0.08865943551063538, "step": 105920 }, { "epoch": 0.4547796295819273, "grad_norm": 0.36172252893447876, "learning_rate": 5.475755197778603e-05, "loss": 0.1634334444999695, "step": 105930 }, { "epoch": 0.4548225616719473, "grad_norm": 0.014767971821129322, "learning_rate": 5.475324025766839e-05, "loss": 0.19812369346618652, "step": 105940 }, { "epoch": 0.45486549376196733, "grad_norm": 0.052083760499954224, "learning_rate": 5.474892853755077e-05, "loss": 0.1582965612411499, "step": 105950 }, { "epoch": 0.4549084258519873, "grad_norm": 0.03402931988239288, "learning_rate": 5.4744616817433145e-05, "loss": 0.09308210015296936, "step": 105960 }, { "epoch": 0.4549513579420073, "grad_norm": 0.001018917071633041, "learning_rate": 5.474030509731552e-05, "loss": 0.04058152437210083, "step": 105970 }, { "epoch": 0.45499429003202735, "grad_norm": 0.00025437428848817945, "learning_rate": 5.47359933771979e-05, "loss": 0.21476302146911622, "step": 105980 }, { "epoch": 0.45503722212204734, "grad_norm": 0.7949357032775879, "learning_rate": 5.473168165708028e-05, "loss": 0.22767140865325927, "step": 105990 }, { "epoch": 0.4550801542120674, "grad_norm": 0.001639080117456615, "learning_rate": 5.472736993696266e-05, "loss": 0.25164577960968015, "step": 106000 }, { "epoch": 0.4550801542120674, "eval_loss": 0.41482996940612793, "eval_runtime": 27.1731, "eval_samples_per_second": 3.68, "eval_steps_per_second": 3.68, "step": 106000 }, { "epoch": 0.45512308630208737, "grad_norm": 6.3070597648620605, "learning_rate": 5.472305821684504e-05, "loss": 0.29818522930145264, "step": 106010 }, { "epoch": 0.45516601839210735, "grad_norm": 0.07904084026813507, "learning_rate": 5.47187464967274e-05, "loss": 0.22123868465423585, "step": 106020 }, { "epoch": 0.4552089504821274, "grad_norm": 0.02109706960618496, "learning_rate": 5.471443477660978e-05, "loss": 0.2515869617462158, "step": 106030 }, { "epoch": 0.4552518825721474, "grad_norm": 1.6628992557525635, "learning_rate": 5.471012305649216e-05, "loss": 0.17183525562286378, "step": 106040 }, { "epoch": 0.45529481466216737, "grad_norm": 0.00023969076573848724, "learning_rate": 5.4705811336374534e-05, "loss": 0.1583714485168457, "step": 106050 }, { "epoch": 0.4553377467521874, "grad_norm": 0.0442027822136879, "learning_rate": 5.470149961625691e-05, "loss": 0.06777175664901733, "step": 106060 }, { "epoch": 0.4553806788422074, "grad_norm": 0.03461259976029396, "learning_rate": 5.469718789613929e-05, "loss": 0.3223992347717285, "step": 106070 }, { "epoch": 0.4554236109322274, "grad_norm": 0.0009801493724808097, "learning_rate": 5.4692876176021667e-05, "loss": 0.07548410296440125, "step": 106080 }, { "epoch": 0.4554665430222474, "grad_norm": 0.00773969478905201, "learning_rate": 5.4688564455904044e-05, "loss": 0.36380581855773925, "step": 106090 }, { "epoch": 0.4555094751122674, "grad_norm": 0.9940565228462219, "learning_rate": 5.4684252735786414e-05, "loss": 0.20437259674072267, "step": 106100 }, { "epoch": 0.4555524072022874, "grad_norm": 5.216660499572754, "learning_rate": 5.467994101566879e-05, "loss": 0.33365843296051023, "step": 106110 }, { "epoch": 0.45559533929230744, "grad_norm": 0.005851436872035265, "learning_rate": 5.467562929555117e-05, "loss": 0.1647118330001831, "step": 106120 }, { "epoch": 0.45563827138232743, "grad_norm": 0.002860630862414837, "learning_rate": 5.4671317575433546e-05, "loss": 0.12592718601226807, "step": 106130 }, { "epoch": 0.4556812034723474, "grad_norm": 2.543055295944214, "learning_rate": 5.4667005855315924e-05, "loss": 0.06108694672584534, "step": 106140 }, { "epoch": 0.45572413556236746, "grad_norm": 0.21334119141101837, "learning_rate": 5.46626941351983e-05, "loss": 0.1180303454399109, "step": 106150 }, { "epoch": 0.45576706765238745, "grad_norm": 0.0378948450088501, "learning_rate": 5.465838241508068e-05, "loss": 0.2481198787689209, "step": 106160 }, { "epoch": 0.45580999974240743, "grad_norm": 2.029277801513672, "learning_rate": 5.4654070694963056e-05, "loss": 0.2699997663497925, "step": 106170 }, { "epoch": 0.4558529318324275, "grad_norm": 0.03349459171295166, "learning_rate": 5.464975897484542e-05, "loss": 0.20755038261413575, "step": 106180 }, { "epoch": 0.45589586392244746, "grad_norm": 0.27105414867401123, "learning_rate": 5.46454472547278e-05, "loss": 0.17104097604751586, "step": 106190 }, { "epoch": 0.4559387960124675, "grad_norm": 0.001149525516666472, "learning_rate": 5.4641135534610174e-05, "loss": 0.1373377561569214, "step": 106200 }, { "epoch": 0.4559817281024875, "grad_norm": 0.08853041380643845, "learning_rate": 5.463682381449255e-05, "loss": 0.19982340335845947, "step": 106210 }, { "epoch": 0.4560246601925075, "grad_norm": 0.7553684711456299, "learning_rate": 5.4632512094374936e-05, "loss": 0.24692888259887696, "step": 106220 }, { "epoch": 0.4560675922825275, "grad_norm": 2.763723134994507, "learning_rate": 5.462820037425731e-05, "loss": 0.2985665321350098, "step": 106230 }, { "epoch": 0.4561105243725475, "grad_norm": 0.9032507538795471, "learning_rate": 5.462388865413969e-05, "loss": 0.1298931360244751, "step": 106240 }, { "epoch": 0.4561534564625675, "grad_norm": 0.11341998726129532, "learning_rate": 5.461957693402207e-05, "loss": 0.1300404191017151, "step": 106250 }, { "epoch": 0.45619638855258754, "grad_norm": 0.3565244674682617, "learning_rate": 5.4615265213904445e-05, "loss": 0.26439990997314455, "step": 106260 }, { "epoch": 0.4562393206426075, "grad_norm": 5.0023274421691895, "learning_rate": 5.461095349378681e-05, "loss": 0.39081289768218996, "step": 106270 }, { "epoch": 0.4562822527326275, "grad_norm": 1.4395017623901367, "learning_rate": 5.4606641773669186e-05, "loss": 0.33194308280944823, "step": 106280 }, { "epoch": 0.45632518482264756, "grad_norm": 1.879360318183899, "learning_rate": 5.4602330053551564e-05, "loss": 0.1768111228942871, "step": 106290 }, { "epoch": 0.45636811691266754, "grad_norm": 0.0011820903746411204, "learning_rate": 5.459801833343394e-05, "loss": 0.2108830690383911, "step": 106300 }, { "epoch": 0.45641104900268753, "grad_norm": 4.044092178344727, "learning_rate": 5.459370661331632e-05, "loss": 0.23956904411315919, "step": 106310 }, { "epoch": 0.4564539810927076, "grad_norm": 0.018155014142394066, "learning_rate": 5.4589394893198696e-05, "loss": 0.14681684970855713, "step": 106320 }, { "epoch": 0.45649691318272756, "grad_norm": 2.6040914058685303, "learning_rate": 5.458508317308107e-05, "loss": 0.43901724815368653, "step": 106330 }, { "epoch": 0.45653984527274755, "grad_norm": 0.6595094799995422, "learning_rate": 5.458077145296345e-05, "loss": 0.3422011137008667, "step": 106340 }, { "epoch": 0.4565827773627676, "grad_norm": 0.03562316671013832, "learning_rate": 5.457645973284582e-05, "loss": 0.273145866394043, "step": 106350 }, { "epoch": 0.4566257094527876, "grad_norm": 0.10383918136358261, "learning_rate": 5.45721480127282e-05, "loss": 0.07594062089920044, "step": 106360 }, { "epoch": 0.45666864154280756, "grad_norm": 5.044617176055908, "learning_rate": 5.4567836292610576e-05, "loss": 0.12228653430938721, "step": 106370 }, { "epoch": 0.4567115736328276, "grad_norm": 1.1260477304458618, "learning_rate": 5.456352457249295e-05, "loss": 0.3477102518081665, "step": 106380 }, { "epoch": 0.4567545057228476, "grad_norm": 1.9273653030395508, "learning_rate": 5.455921285237533e-05, "loss": 0.35800995826721194, "step": 106390 }, { "epoch": 0.4567974378128676, "grad_norm": 0.11359955370426178, "learning_rate": 5.455490113225771e-05, "loss": 0.15946197509765625, "step": 106400 }, { "epoch": 0.4568403699028876, "grad_norm": 0.5352416634559631, "learning_rate": 5.4550589412140085e-05, "loss": 0.11623773574829102, "step": 106410 }, { "epoch": 0.4568833019929076, "grad_norm": 1.6490083932876587, "learning_rate": 5.454627769202246e-05, "loss": 0.3729348659515381, "step": 106420 }, { "epoch": 0.45692623408292765, "grad_norm": 2.265371561050415, "learning_rate": 5.4541965971904826e-05, "loss": 0.21114826202392578, "step": 106430 }, { "epoch": 0.45696916617294764, "grad_norm": 5.321807384490967, "learning_rate": 5.453765425178721e-05, "loss": 0.21526637077331542, "step": 106440 }, { "epoch": 0.4570120982629676, "grad_norm": 3.7925004959106445, "learning_rate": 5.453334253166959e-05, "loss": 0.4127657890319824, "step": 106450 }, { "epoch": 0.45705503035298767, "grad_norm": 0.0988013967871666, "learning_rate": 5.4529030811551965e-05, "loss": 0.13126038312911986, "step": 106460 }, { "epoch": 0.45709796244300765, "grad_norm": 0.010926156304776669, "learning_rate": 5.452471909143434e-05, "loss": 0.278293776512146, "step": 106470 }, { "epoch": 0.45714089453302764, "grad_norm": 0.004582703113555908, "learning_rate": 5.452040737131672e-05, "loss": 0.4080946922302246, "step": 106480 }, { "epoch": 0.4571838266230477, "grad_norm": 0.10418860614299774, "learning_rate": 5.45160956511991e-05, "loss": 0.1536099433898926, "step": 106490 }, { "epoch": 0.45722675871306767, "grad_norm": 0.017425889149308205, "learning_rate": 5.4511783931081474e-05, "loss": 0.09255735278129577, "step": 106500 }, { "epoch": 0.45726969080308766, "grad_norm": 0.9808640480041504, "learning_rate": 5.450747221096384e-05, "loss": 0.45423202514648436, "step": 106510 }, { "epoch": 0.4573126228931077, "grad_norm": 0.5739647746086121, "learning_rate": 5.4503160490846215e-05, "loss": 0.2469416856765747, "step": 106520 }, { "epoch": 0.4573555549831277, "grad_norm": 14.410594940185547, "learning_rate": 5.449884877072859e-05, "loss": 0.24274146556854248, "step": 106530 }, { "epoch": 0.4573984870731477, "grad_norm": 9.88992691040039, "learning_rate": 5.449453705061097e-05, "loss": 0.23896820545196534, "step": 106540 }, { "epoch": 0.4574414191631677, "grad_norm": 0.022617029026150703, "learning_rate": 5.449022533049335e-05, "loss": 0.11045522689819336, "step": 106550 }, { "epoch": 0.4574843512531877, "grad_norm": 1.0978119373321533, "learning_rate": 5.4485913610375725e-05, "loss": 0.25470197200775146, "step": 106560 }, { "epoch": 0.4575272833432077, "grad_norm": 0.12575271725654602, "learning_rate": 5.44816018902581e-05, "loss": 0.17973484992980956, "step": 106570 }, { "epoch": 0.45757021543322773, "grad_norm": 1.3486829996109009, "learning_rate": 5.447729017014048e-05, "loss": 0.2512737035751343, "step": 106580 }, { "epoch": 0.4576131475232477, "grad_norm": 1.4863824844360352, "learning_rate": 5.447297845002286e-05, "loss": 0.2442696809768677, "step": 106590 }, { "epoch": 0.4576560796132677, "grad_norm": 0.02740716002881527, "learning_rate": 5.446866672990523e-05, "loss": 0.12065447568893432, "step": 106600 }, { "epoch": 0.45769901170328775, "grad_norm": 0.007653544191271067, "learning_rate": 5.4464355009787605e-05, "loss": 0.09701498150825501, "step": 106610 }, { "epoch": 0.45774194379330774, "grad_norm": 0.006488314364105463, "learning_rate": 5.446004328966998e-05, "loss": 0.12032349109649658, "step": 106620 }, { "epoch": 0.4577848758833278, "grad_norm": 2.8242900371551514, "learning_rate": 5.445573156955236e-05, "loss": 0.3414148330688477, "step": 106630 }, { "epoch": 0.45782780797334777, "grad_norm": 1.6868880987167358, "learning_rate": 5.445141984943474e-05, "loss": 0.2605616331100464, "step": 106640 }, { "epoch": 0.45787074006336775, "grad_norm": 0.0032382814679294825, "learning_rate": 5.4447108129317114e-05, "loss": 0.16553410291671752, "step": 106650 }, { "epoch": 0.4579136721533878, "grad_norm": 6.745307922363281, "learning_rate": 5.444279640919949e-05, "loss": 0.24076588153839112, "step": 106660 }, { "epoch": 0.4579566042434078, "grad_norm": 0.6660043597221375, "learning_rate": 5.443848468908187e-05, "loss": 0.22255656719207764, "step": 106670 }, { "epoch": 0.45799953633342777, "grad_norm": 0.03691640496253967, "learning_rate": 5.443417296896424e-05, "loss": 0.21677632331848146, "step": 106680 }, { "epoch": 0.4580424684234478, "grad_norm": 1.0660126209259033, "learning_rate": 5.442986124884662e-05, "loss": 0.4634716510772705, "step": 106690 }, { "epoch": 0.4580854005134678, "grad_norm": 0.06456798315048218, "learning_rate": 5.4425549528728994e-05, "loss": 0.14308961629867553, "step": 106700 }, { "epoch": 0.4581283326034878, "grad_norm": 5.615670204162598, "learning_rate": 5.442123780861137e-05, "loss": 0.28072869777679443, "step": 106710 }, { "epoch": 0.45817126469350783, "grad_norm": 1.6977615356445312, "learning_rate": 5.441692608849375e-05, "loss": 0.22022032737731934, "step": 106720 }, { "epoch": 0.4582141967835278, "grad_norm": 0.4748421907424927, "learning_rate": 5.4412614368376126e-05, "loss": 0.12203251123428345, "step": 106730 }, { "epoch": 0.4582571288735478, "grad_norm": 0.09836887568235397, "learning_rate": 5.44083026482585e-05, "loss": 0.1722171664237976, "step": 106740 }, { "epoch": 0.45830006096356785, "grad_norm": 0.15481862425804138, "learning_rate": 5.440399092814088e-05, "loss": 0.22251276969909667, "step": 106750 }, { "epoch": 0.45834299305358783, "grad_norm": 0.6792683005332947, "learning_rate": 5.4399679208023245e-05, "loss": 0.15424517393112183, "step": 106760 }, { "epoch": 0.4583859251436078, "grad_norm": 2.5286638736724854, "learning_rate": 5.439536748790562e-05, "loss": 0.23278870582580566, "step": 106770 }, { "epoch": 0.45842885723362786, "grad_norm": 0.0009078033617697656, "learning_rate": 5.4391055767788e-05, "loss": 0.343894362449646, "step": 106780 }, { "epoch": 0.45847178932364785, "grad_norm": 0.2713935077190399, "learning_rate": 5.4386744047670377e-05, "loss": 0.22668204307556153, "step": 106790 }, { "epoch": 0.45851472141366784, "grad_norm": 5.523571491241455, "learning_rate": 5.4382432327552754e-05, "loss": 0.17648887634277344, "step": 106800 }, { "epoch": 0.4585576535036879, "grad_norm": 0.008552341721951962, "learning_rate": 5.437812060743513e-05, "loss": 0.09792098999023438, "step": 106810 }, { "epoch": 0.45860058559370787, "grad_norm": 2.73484468460083, "learning_rate": 5.4373808887317515e-05, "loss": 0.12865368127822877, "step": 106820 }, { "epoch": 0.45864351768372785, "grad_norm": 0.06436431407928467, "learning_rate": 5.436949716719989e-05, "loss": 0.28464598655700685, "step": 106830 }, { "epoch": 0.4586864497737479, "grad_norm": 0.030549952760338783, "learning_rate": 5.4365185447082256e-05, "loss": 0.01779765635728836, "step": 106840 }, { "epoch": 0.4587293818637679, "grad_norm": 0.03825048729777336, "learning_rate": 5.4360873726964634e-05, "loss": 0.21697518825531006, "step": 106850 }, { "epoch": 0.4587723139537879, "grad_norm": 0.00542847765609622, "learning_rate": 5.435656200684701e-05, "loss": 0.04956367015838623, "step": 106860 }, { "epoch": 0.4588152460438079, "grad_norm": 0.3514080345630646, "learning_rate": 5.435225028672939e-05, "loss": 0.18809044361114502, "step": 106870 }, { "epoch": 0.4588581781338279, "grad_norm": 0.012438662350177765, "learning_rate": 5.4347938566611766e-05, "loss": 0.08882975578308105, "step": 106880 }, { "epoch": 0.45890111022384794, "grad_norm": 0.25532791018486023, "learning_rate": 5.434362684649414e-05, "loss": 0.24946775436401367, "step": 106890 }, { "epoch": 0.45894404231386793, "grad_norm": 5.779187202453613, "learning_rate": 5.433931512637652e-05, "loss": 0.2366427183151245, "step": 106900 }, { "epoch": 0.4589869744038879, "grad_norm": 0.01807689107954502, "learning_rate": 5.43350034062589e-05, "loss": 0.20517139434814452, "step": 106910 }, { "epoch": 0.45902990649390796, "grad_norm": 0.014423569664359093, "learning_rate": 5.433069168614127e-05, "loss": 0.15260753631591797, "step": 106920 }, { "epoch": 0.45907283858392794, "grad_norm": 0.008579591289162636, "learning_rate": 5.4326379966023646e-05, "loss": 0.23585269451141358, "step": 106930 }, { "epoch": 0.45911577067394793, "grad_norm": 1.2639870643615723, "learning_rate": 5.432206824590602e-05, "loss": 0.21346158981323243, "step": 106940 }, { "epoch": 0.459158702763968, "grad_norm": 0.5312317609786987, "learning_rate": 5.43177565257884e-05, "loss": 0.2956232070922852, "step": 106950 }, { "epoch": 0.45920163485398796, "grad_norm": 0.003094247542321682, "learning_rate": 5.431344480567078e-05, "loss": 0.15500446557998657, "step": 106960 }, { "epoch": 0.45924456694400795, "grad_norm": 0.17038699984550476, "learning_rate": 5.4309133085553155e-05, "loss": 0.04464645087718964, "step": 106970 }, { "epoch": 0.459287499034028, "grad_norm": 0.003357459092512727, "learning_rate": 5.430482136543553e-05, "loss": 0.1722028970718384, "step": 106980 }, { "epoch": 0.459330431124048, "grad_norm": 0.051209039986133575, "learning_rate": 5.430050964531791e-05, "loss": 0.30563194751739503, "step": 106990 }, { "epoch": 0.45937336321406796, "grad_norm": 0.14684444665908813, "learning_rate": 5.429619792520029e-05, "loss": 0.11479036808013916, "step": 107000 }, { "epoch": 0.45937336321406796, "eval_loss": 0.410770058631897, "eval_runtime": 27.1924, "eval_samples_per_second": 3.677, "eval_steps_per_second": 3.677, "step": 107000 }, { "epoch": 0.459416295304088, "grad_norm": 0.0816546231508255, "learning_rate": 5.429188620508265e-05, "loss": 0.11942390203475953, "step": 107010 }, { "epoch": 0.459459227394108, "grad_norm": 1.5017467737197876, "learning_rate": 5.428757448496503e-05, "loss": 0.23939263820648193, "step": 107020 }, { "epoch": 0.459502159484128, "grad_norm": 0.0422968752682209, "learning_rate": 5.4283262764847406e-05, "loss": 0.09964839220046998, "step": 107030 }, { "epoch": 0.459545091574148, "grad_norm": 0.0042203571647405624, "learning_rate": 5.427895104472979e-05, "loss": 0.3719061613082886, "step": 107040 }, { "epoch": 0.459588023664168, "grad_norm": 0.25797560811042786, "learning_rate": 5.427463932461217e-05, "loss": 0.0998129665851593, "step": 107050 }, { "epoch": 0.45963095575418805, "grad_norm": 1.907709002494812, "learning_rate": 5.4270327604494544e-05, "loss": 0.31435647010803225, "step": 107060 }, { "epoch": 0.45967388784420804, "grad_norm": 0.050441499799489975, "learning_rate": 5.426601588437692e-05, "loss": 0.007012385129928589, "step": 107070 }, { "epoch": 0.459716819934228, "grad_norm": 0.015389510430395603, "learning_rate": 5.42617041642593e-05, "loss": 0.12824069261550902, "step": 107080 }, { "epoch": 0.45975975202424807, "grad_norm": 1.227673888206482, "learning_rate": 5.425739244414166e-05, "loss": 0.40938620567321776, "step": 107090 }, { "epoch": 0.45980268411426806, "grad_norm": 0.0380869060754776, "learning_rate": 5.425308072402404e-05, "loss": 0.3142383337020874, "step": 107100 }, { "epoch": 0.45984561620428804, "grad_norm": 0.0016160336090251803, "learning_rate": 5.424876900390642e-05, "loss": 0.21405341625213622, "step": 107110 }, { "epoch": 0.4598885482943081, "grad_norm": 3.592622995376587, "learning_rate": 5.4244457283788795e-05, "loss": 0.1815681576728821, "step": 107120 }, { "epoch": 0.4599314803843281, "grad_norm": 0.01647309958934784, "learning_rate": 5.424014556367117e-05, "loss": 0.21015794277191163, "step": 107130 }, { "epoch": 0.45997441247434806, "grad_norm": 1.1071962118148804, "learning_rate": 5.423583384355355e-05, "loss": 0.36042187213897703, "step": 107140 }, { "epoch": 0.4600173445643681, "grad_norm": 0.0012331603793427348, "learning_rate": 5.423152212343593e-05, "loss": 0.11302400827407837, "step": 107150 }, { "epoch": 0.4600602766543881, "grad_norm": 0.006316584534943104, "learning_rate": 5.4227210403318304e-05, "loss": 0.3362801313400269, "step": 107160 }, { "epoch": 0.4601032087444081, "grad_norm": 0.020972879603505135, "learning_rate": 5.4222898683200675e-05, "loss": 0.10412032604217529, "step": 107170 }, { "epoch": 0.4601461408344281, "grad_norm": 5.101585865020752, "learning_rate": 5.421858696308305e-05, "loss": 0.18556244373321534, "step": 107180 }, { "epoch": 0.4601890729244481, "grad_norm": 0.0018986143404617906, "learning_rate": 5.421427524296543e-05, "loss": 0.15888286828994752, "step": 107190 }, { "epoch": 0.4602320050144681, "grad_norm": 1.5135869979858398, "learning_rate": 5.420996352284781e-05, "loss": 0.37897820472717286, "step": 107200 }, { "epoch": 0.46027493710448814, "grad_norm": 0.06372527033090591, "learning_rate": 5.4205651802730184e-05, "loss": 0.29553487300872805, "step": 107210 }, { "epoch": 0.4603178691945081, "grad_norm": 1.9978270530700684, "learning_rate": 5.420134008261256e-05, "loss": 0.4286454677581787, "step": 107220 }, { "epoch": 0.4603608012845281, "grad_norm": 0.21217115223407745, "learning_rate": 5.419702836249494e-05, "loss": 0.2591038703918457, "step": 107230 }, { "epoch": 0.46040373337454815, "grad_norm": 0.09320700168609619, "learning_rate": 5.4192716642377316e-05, "loss": 0.26857402324676516, "step": 107240 }, { "epoch": 0.46044666546456814, "grad_norm": 1.601436734199524, "learning_rate": 5.418840492225968e-05, "loss": 0.19982035160064698, "step": 107250 }, { "epoch": 0.4604895975545881, "grad_norm": 0.6431751847267151, "learning_rate": 5.4184093202142064e-05, "loss": 0.31042304039001467, "step": 107260 }, { "epoch": 0.46053252964460817, "grad_norm": 6.150503635406494, "learning_rate": 5.417978148202444e-05, "loss": 0.2688772439956665, "step": 107270 }, { "epoch": 0.46057546173462816, "grad_norm": 1.0778108835220337, "learning_rate": 5.417546976190682e-05, "loss": 0.13753099441528321, "step": 107280 }, { "epoch": 0.4606183938246482, "grad_norm": 1.964769721031189, "learning_rate": 5.4171158041789196e-05, "loss": 0.3952221393585205, "step": 107290 }, { "epoch": 0.4606613259146682, "grad_norm": 0.42801520228385925, "learning_rate": 5.4166846321671574e-05, "loss": 0.38815133571624755, "step": 107300 }, { "epoch": 0.46070425800468817, "grad_norm": 0.25493547320365906, "learning_rate": 5.416253460155395e-05, "loss": 0.03624766170978546, "step": 107310 }, { "epoch": 0.4607471900947082, "grad_norm": 0.002436437178403139, "learning_rate": 5.415822288143633e-05, "loss": 0.19822332859039307, "step": 107320 }, { "epoch": 0.4607901221847282, "grad_norm": 0.02002480812370777, "learning_rate": 5.4153911161318706e-05, "loss": 0.2200617551803589, "step": 107330 }, { "epoch": 0.4608330542747482, "grad_norm": 0.013750090263783932, "learning_rate": 5.414959944120107e-05, "loss": 0.15083693265914916, "step": 107340 }, { "epoch": 0.46087598636476823, "grad_norm": 7.042511463165283, "learning_rate": 5.414528772108345e-05, "loss": 0.03854614496231079, "step": 107350 }, { "epoch": 0.4609189184547882, "grad_norm": 0.013194686733186245, "learning_rate": 5.4140976000965824e-05, "loss": 0.25104031562805174, "step": 107360 }, { "epoch": 0.4609618505448082, "grad_norm": 0.3216243386268616, "learning_rate": 5.41366642808482e-05, "loss": 0.20811998844146729, "step": 107370 }, { "epoch": 0.46100478263482825, "grad_norm": 0.005275218281894922, "learning_rate": 5.413235256073058e-05, "loss": 0.18954060077667237, "step": 107380 }, { "epoch": 0.46104771472484823, "grad_norm": 1.6544564962387085, "learning_rate": 5.4128040840612956e-05, "loss": 0.37151012420654295, "step": 107390 }, { "epoch": 0.4610906468148682, "grad_norm": 0.029340092092752457, "learning_rate": 5.4123729120495333e-05, "loss": 0.10839523077011108, "step": 107400 }, { "epoch": 0.46113357890488826, "grad_norm": 0.0010873668361455202, "learning_rate": 5.411941740037772e-05, "loss": 0.22068979740142822, "step": 107410 }, { "epoch": 0.46117651099490825, "grad_norm": 0.7604783177375793, "learning_rate": 5.411510568026008e-05, "loss": 0.29136641025543214, "step": 107420 }, { "epoch": 0.46121944308492824, "grad_norm": 2.561589241027832, "learning_rate": 5.411079396014246e-05, "loss": 0.2376845359802246, "step": 107430 }, { "epoch": 0.4612623751749483, "grad_norm": 0.03527192771434784, "learning_rate": 5.4106482240024836e-05, "loss": 0.3317641496658325, "step": 107440 }, { "epoch": 0.46130530726496827, "grad_norm": 0.05285614728927612, "learning_rate": 5.4102170519907213e-05, "loss": 0.2993663549423218, "step": 107450 }, { "epoch": 0.46134823935498825, "grad_norm": 0.08579917997121811, "learning_rate": 5.409785879978959e-05, "loss": 0.454376745223999, "step": 107460 }, { "epoch": 0.4613911714450083, "grad_norm": 2.122178316116333, "learning_rate": 5.409354707967197e-05, "loss": 0.35166881084442136, "step": 107470 }, { "epoch": 0.4614341035350283, "grad_norm": 1.0269190073013306, "learning_rate": 5.4089235359554345e-05, "loss": 0.2291714906692505, "step": 107480 }, { "epoch": 0.4614770356250483, "grad_norm": 0.23851673305034637, "learning_rate": 5.408492363943672e-05, "loss": 0.12133759260177612, "step": 107490 }, { "epoch": 0.4615199677150683, "grad_norm": 3.512455940246582, "learning_rate": 5.408061191931909e-05, "loss": 0.14880177974700928, "step": 107500 }, { "epoch": 0.4615628998050883, "grad_norm": 0.008847239427268505, "learning_rate": 5.407630019920147e-05, "loss": 0.2780074834823608, "step": 107510 }, { "epoch": 0.46160583189510834, "grad_norm": 0.14573852717876434, "learning_rate": 5.407198847908385e-05, "loss": 0.13713104724884034, "step": 107520 }, { "epoch": 0.46164876398512833, "grad_norm": 0.02383585087954998, "learning_rate": 5.4067676758966225e-05, "loss": 0.4326714038848877, "step": 107530 }, { "epoch": 0.4616916960751483, "grad_norm": 2.5402002334594727, "learning_rate": 5.40633650388486e-05, "loss": 0.39748458862304686, "step": 107540 }, { "epoch": 0.46173462816516836, "grad_norm": 0.00459776958450675, "learning_rate": 5.405905331873098e-05, "loss": 0.11582623720169068, "step": 107550 }, { "epoch": 0.46177756025518835, "grad_norm": 0.00286311749368906, "learning_rate": 5.405474159861336e-05, "loss": 0.28338334560394285, "step": 107560 }, { "epoch": 0.46182049234520833, "grad_norm": 2.2359275817871094, "learning_rate": 5.4050429878495735e-05, "loss": 0.19136548042297363, "step": 107570 }, { "epoch": 0.4618634244352284, "grad_norm": 1.865877389907837, "learning_rate": 5.40461181583781e-05, "loss": 0.21330225467681885, "step": 107580 }, { "epoch": 0.46190635652524836, "grad_norm": 0.017468079924583435, "learning_rate": 5.4041806438260476e-05, "loss": 0.08362210988998413, "step": 107590 }, { "epoch": 0.46194928861526835, "grad_norm": 0.003552723675966263, "learning_rate": 5.403749471814285e-05, "loss": 0.3091665983200073, "step": 107600 }, { "epoch": 0.4619922207052884, "grad_norm": 0.2940342426300049, "learning_rate": 5.403318299802523e-05, "loss": 0.22590343952178954, "step": 107610 }, { "epoch": 0.4620351527953084, "grad_norm": 0.058316994458436966, "learning_rate": 5.402887127790761e-05, "loss": 0.20781214237213136, "step": 107620 }, { "epoch": 0.46207808488532837, "grad_norm": 0.006735431496053934, "learning_rate": 5.402455955778999e-05, "loss": 0.41092872619628906, "step": 107630 }, { "epoch": 0.4621210169753484, "grad_norm": 0.0062255957163870335, "learning_rate": 5.402024783767237e-05, "loss": 0.23068614006042482, "step": 107640 }, { "epoch": 0.4621639490653684, "grad_norm": 0.05530041456222534, "learning_rate": 5.401593611755475e-05, "loss": 0.15006642341613768, "step": 107650 }, { "epoch": 0.4622068811553884, "grad_norm": 0.00756365992128849, "learning_rate": 5.4011624397437124e-05, "loss": 0.2585622310638428, "step": 107660 }, { "epoch": 0.4622498132454084, "grad_norm": 0.8214569687843323, "learning_rate": 5.400731267731949e-05, "loss": 0.1466231107711792, "step": 107670 }, { "epoch": 0.4622927453354284, "grad_norm": 5.284905910491943, "learning_rate": 5.4003000957201865e-05, "loss": 0.4204678535461426, "step": 107680 }, { "epoch": 0.4623356774254484, "grad_norm": 3.108691692352295, "learning_rate": 5.399868923708424e-05, "loss": 0.2649915456771851, "step": 107690 }, { "epoch": 0.46237860951546844, "grad_norm": 3.194406032562256, "learning_rate": 5.399437751696662e-05, "loss": 0.2339545726776123, "step": 107700 }, { "epoch": 0.46242154160548843, "grad_norm": 1.4864603281021118, "learning_rate": 5.3990065796849e-05, "loss": 0.2706879138946533, "step": 107710 }, { "epoch": 0.46246447369550847, "grad_norm": 1.7163327932357788, "learning_rate": 5.3985754076731375e-05, "loss": 0.26778957843780515, "step": 107720 }, { "epoch": 0.46250740578552846, "grad_norm": 0.005073135253041983, "learning_rate": 5.398144235661375e-05, "loss": 0.18886086940765381, "step": 107730 }, { "epoch": 0.46255033787554845, "grad_norm": 3.492783308029175, "learning_rate": 5.397713063649613e-05, "loss": 0.3263582468032837, "step": 107740 }, { "epoch": 0.4625932699655685, "grad_norm": 1.7698097229003906, "learning_rate": 5.39728189163785e-05, "loss": 0.3023443460464478, "step": 107750 }, { "epoch": 0.4626362020555885, "grad_norm": 0.1830163300037384, "learning_rate": 5.396850719626088e-05, "loss": 0.168390691280365, "step": 107760 }, { "epoch": 0.46267913414560846, "grad_norm": 0.008062989450991154, "learning_rate": 5.3964195476143255e-05, "loss": 0.2124844789505005, "step": 107770 }, { "epoch": 0.4627220662356285, "grad_norm": 0.07734813541173935, "learning_rate": 5.395988375602563e-05, "loss": 0.20680534839630127, "step": 107780 }, { "epoch": 0.4627649983256485, "grad_norm": 0.07653836160898209, "learning_rate": 5.395557203590801e-05, "loss": 0.14306381940841675, "step": 107790 }, { "epoch": 0.4628079304156685, "grad_norm": 0.08502493053674698, "learning_rate": 5.3951260315790387e-05, "loss": 0.07291386127471924, "step": 107800 }, { "epoch": 0.4628508625056885, "grad_norm": 2.5778799057006836, "learning_rate": 5.3946948595672764e-05, "loss": 0.28781633377075194, "step": 107810 }, { "epoch": 0.4628937945957085, "grad_norm": 0.006881984416395426, "learning_rate": 5.394263687555514e-05, "loss": 0.12385448217391967, "step": 107820 }, { "epoch": 0.4629367266857285, "grad_norm": 1.7388111352920532, "learning_rate": 5.3938325155437505e-05, "loss": 0.41140213012695315, "step": 107830 }, { "epoch": 0.46297965877574854, "grad_norm": 1.329716682434082, "learning_rate": 5.393401343531988e-05, "loss": 0.1415262460708618, "step": 107840 }, { "epoch": 0.4630225908657685, "grad_norm": 3.928546905517578, "learning_rate": 5.3929701715202266e-05, "loss": 0.25690150260925293, "step": 107850 }, { "epoch": 0.4630655229557885, "grad_norm": 0.09528861194849014, "learning_rate": 5.3925389995084644e-05, "loss": 0.1600080370903015, "step": 107860 }, { "epoch": 0.46310845504580855, "grad_norm": 0.04780289903283119, "learning_rate": 5.392107827496702e-05, "loss": 0.0948940396308899, "step": 107870 }, { "epoch": 0.46315138713582854, "grad_norm": 0.505102813243866, "learning_rate": 5.39167665548494e-05, "loss": 0.20008227825164795, "step": 107880 }, { "epoch": 0.4631943192258485, "grad_norm": 1.3333100080490112, "learning_rate": 5.3912454834731776e-05, "loss": 0.38938562870025634, "step": 107890 }, { "epoch": 0.46323725131586857, "grad_norm": 0.10062599927186966, "learning_rate": 5.390814311461415e-05, "loss": 0.1558384656906128, "step": 107900 }, { "epoch": 0.46328018340588856, "grad_norm": 0.2624666690826416, "learning_rate": 5.390383139449652e-05, "loss": 0.34660289287567136, "step": 107910 }, { "epoch": 0.4633231154959086, "grad_norm": 5.808780193328857, "learning_rate": 5.3899519674378894e-05, "loss": 0.2815376043319702, "step": 107920 }, { "epoch": 0.4633660475859286, "grad_norm": 0.02222195826470852, "learning_rate": 5.389520795426127e-05, "loss": 0.1592707395553589, "step": 107930 }, { "epoch": 0.4634089796759486, "grad_norm": 0.1832973062992096, "learning_rate": 5.389089623414365e-05, "loss": 0.051647549867630003, "step": 107940 }, { "epoch": 0.4634519117659686, "grad_norm": 1.8670419454574585, "learning_rate": 5.3886584514026026e-05, "loss": 0.17604377269744872, "step": 107950 }, { "epoch": 0.4634948438559886, "grad_norm": 2.1626927852630615, "learning_rate": 5.3882272793908404e-05, "loss": 0.2858207941055298, "step": 107960 }, { "epoch": 0.4635377759460086, "grad_norm": 1.8692584037780762, "learning_rate": 5.387796107379078e-05, "loss": 0.28811016082763674, "step": 107970 }, { "epoch": 0.46358070803602863, "grad_norm": 1.229792594909668, "learning_rate": 5.387364935367316e-05, "loss": 0.19653548002243043, "step": 107980 }, { "epoch": 0.4636236401260486, "grad_norm": 1.7526077032089233, "learning_rate": 5.386933763355553e-05, "loss": 0.10595974922180176, "step": 107990 }, { "epoch": 0.4636665722160686, "grad_norm": 0.05710328370332718, "learning_rate": 5.3865025913437906e-05, "loss": 0.21571879386901854, "step": 108000 }, { "epoch": 0.4636665722160686, "eval_loss": 0.41005194187164307, "eval_runtime": 27.2514, "eval_samples_per_second": 3.67, "eval_steps_per_second": 3.67, "step": 108000 }, { "epoch": 0.46370950430608865, "grad_norm": 0.7972310185432434, "learning_rate": 5.3860714193320284e-05, "loss": 0.17171356678009034, "step": 108010 }, { "epoch": 0.46375243639610864, "grad_norm": 2.4184482097625732, "learning_rate": 5.385640247320266e-05, "loss": 0.33305227756500244, "step": 108020 }, { "epoch": 0.4637953684861286, "grad_norm": 1.5586384534835815, "learning_rate": 5.385209075308504e-05, "loss": 0.2196629285812378, "step": 108030 }, { "epoch": 0.46383830057614867, "grad_norm": 3.1902902126312256, "learning_rate": 5.3847779032967416e-05, "loss": 0.2836449146270752, "step": 108040 }, { "epoch": 0.46388123266616865, "grad_norm": 0.7188327312469482, "learning_rate": 5.384346731284979e-05, "loss": 0.2961325168609619, "step": 108050 }, { "epoch": 0.46392416475618864, "grad_norm": 0.04909314587712288, "learning_rate": 5.383915559273217e-05, "loss": 0.22378733158111572, "step": 108060 }, { "epoch": 0.4639670968462087, "grad_norm": 0.5020912885665894, "learning_rate": 5.383484387261455e-05, "loss": 0.21214513778686522, "step": 108070 }, { "epoch": 0.46401002893622867, "grad_norm": 1.335729718208313, "learning_rate": 5.383053215249692e-05, "loss": 0.2735942602157593, "step": 108080 }, { "epoch": 0.46405296102624866, "grad_norm": 0.004877585452049971, "learning_rate": 5.3826220432379296e-05, "loss": 0.10161428451538086, "step": 108090 }, { "epoch": 0.4640958931162687, "grad_norm": 0.014859105460345745, "learning_rate": 5.382190871226167e-05, "loss": 0.19523313045501708, "step": 108100 }, { "epoch": 0.4641388252062887, "grad_norm": 0.0427483394742012, "learning_rate": 5.381759699214405e-05, "loss": 0.3561259746551514, "step": 108110 }, { "epoch": 0.4641817572963087, "grad_norm": 0.011663105338811874, "learning_rate": 5.381328527202643e-05, "loss": 0.11659363508224488, "step": 108120 }, { "epoch": 0.4642246893863287, "grad_norm": 0.10213266313076019, "learning_rate": 5.3808973551908805e-05, "loss": 0.18022869825363158, "step": 108130 }, { "epoch": 0.4642676214763487, "grad_norm": 0.03695356100797653, "learning_rate": 5.380466183179118e-05, "loss": 0.2642909288406372, "step": 108140 }, { "epoch": 0.46431055356636874, "grad_norm": 1.6254346370697021, "learning_rate": 5.380035011167356e-05, "loss": 0.2565154552459717, "step": 108150 }, { "epoch": 0.46435348565638873, "grad_norm": 1.142285943031311, "learning_rate": 5.3796038391555923e-05, "loss": 0.35707571506500246, "step": 108160 }, { "epoch": 0.4643964177464087, "grad_norm": 0.008432451635599136, "learning_rate": 5.37917266714383e-05, "loss": 0.2093576431274414, "step": 108170 }, { "epoch": 0.46443934983642876, "grad_norm": 1.543778896331787, "learning_rate": 5.378741495132068e-05, "loss": 0.19204574823379517, "step": 108180 }, { "epoch": 0.46448228192644875, "grad_norm": 0.12307104468345642, "learning_rate": 5.3783103231203055e-05, "loss": 0.20635404586791992, "step": 108190 }, { "epoch": 0.46452521401646873, "grad_norm": 9.1827974319458, "learning_rate": 5.377879151108543e-05, "loss": 0.3973682880401611, "step": 108200 }, { "epoch": 0.4645681461064888, "grad_norm": 0.09982259571552277, "learning_rate": 5.377447979096781e-05, "loss": 0.08464877605438233, "step": 108210 }, { "epoch": 0.46461107819650876, "grad_norm": 0.39693737030029297, "learning_rate": 5.3770168070850194e-05, "loss": 0.2325838804244995, "step": 108220 }, { "epoch": 0.46465401028652875, "grad_norm": 0.02635379508137703, "learning_rate": 5.376585635073257e-05, "loss": 0.3548180103302002, "step": 108230 }, { "epoch": 0.4646969423765488, "grad_norm": 0.046965520828962326, "learning_rate": 5.3761544630614935e-05, "loss": 0.22502679824829103, "step": 108240 }, { "epoch": 0.4647398744665688, "grad_norm": 0.8190841674804688, "learning_rate": 5.375723291049731e-05, "loss": 0.19198547601699828, "step": 108250 }, { "epoch": 0.46478280655658877, "grad_norm": 0.005247071385383606, "learning_rate": 5.375292119037969e-05, "loss": 0.1480405807495117, "step": 108260 }, { "epoch": 0.4648257386466088, "grad_norm": 0.0025028225500136614, "learning_rate": 5.374860947026207e-05, "loss": 0.25399935245513916, "step": 108270 }, { "epoch": 0.4648686707366288, "grad_norm": 0.04282236471772194, "learning_rate": 5.3744297750144445e-05, "loss": 0.17009189128875732, "step": 108280 }, { "epoch": 0.4649116028266488, "grad_norm": 12.793172836303711, "learning_rate": 5.373998603002682e-05, "loss": 0.4070758819580078, "step": 108290 }, { "epoch": 0.4649545349166688, "grad_norm": 0.03920111805200577, "learning_rate": 5.37356743099092e-05, "loss": 0.12617360353469848, "step": 108300 }, { "epoch": 0.4649974670066888, "grad_norm": 5.277186393737793, "learning_rate": 5.373136258979158e-05, "loss": 0.3769539356231689, "step": 108310 }, { "epoch": 0.4650403990967088, "grad_norm": 0.0842542052268982, "learning_rate": 5.372705086967395e-05, "loss": 0.227150559425354, "step": 108320 }, { "epoch": 0.46508333118672884, "grad_norm": 3.8030989170074463, "learning_rate": 5.3722739149556325e-05, "loss": 0.34054558277130126, "step": 108330 }, { "epoch": 0.46512626327674883, "grad_norm": 3.126784324645996, "learning_rate": 5.37184274294387e-05, "loss": 0.34538271427154543, "step": 108340 }, { "epoch": 0.4651691953667688, "grad_norm": 4.654267311096191, "learning_rate": 5.371411570932108e-05, "loss": 0.3616941452026367, "step": 108350 }, { "epoch": 0.46521212745678886, "grad_norm": 1.575369119644165, "learning_rate": 5.370980398920346e-05, "loss": 0.3219918727874756, "step": 108360 }, { "epoch": 0.46525505954680885, "grad_norm": 0.09393744170665741, "learning_rate": 5.3705492269085834e-05, "loss": 0.38858466148376464, "step": 108370 }, { "epoch": 0.4652979916368289, "grad_norm": 1.2401673793792725, "learning_rate": 5.370118054896821e-05, "loss": 0.342305064201355, "step": 108380 }, { "epoch": 0.4653409237268489, "grad_norm": 0.6935057044029236, "learning_rate": 5.369686882885059e-05, "loss": 0.19834787845611573, "step": 108390 }, { "epoch": 0.46538385581686886, "grad_norm": 0.6531186103820801, "learning_rate": 5.3692557108732966e-05, "loss": 0.20809459686279297, "step": 108400 }, { "epoch": 0.4654267879068889, "grad_norm": 0.16829413175582886, "learning_rate": 5.368824538861533e-05, "loss": 0.20340075492858886, "step": 108410 }, { "epoch": 0.4654697199969089, "grad_norm": 0.010563013143837452, "learning_rate": 5.368393366849771e-05, "loss": 0.3063130140304565, "step": 108420 }, { "epoch": 0.4655126520869289, "grad_norm": 5.520454406738281, "learning_rate": 5.3679621948380085e-05, "loss": 0.17612496614456177, "step": 108430 }, { "epoch": 0.4655555841769489, "grad_norm": 0.07296188920736313, "learning_rate": 5.367531022826247e-05, "loss": 0.18347402811050414, "step": 108440 }, { "epoch": 0.4655985162669689, "grad_norm": 0.04715902730822563, "learning_rate": 5.3670998508144846e-05, "loss": 0.09383673667907715, "step": 108450 }, { "epoch": 0.4656414483569889, "grad_norm": 1.912249207496643, "learning_rate": 5.3666686788027223e-05, "loss": 0.21449682712554932, "step": 108460 }, { "epoch": 0.46568438044700894, "grad_norm": 0.007666005752980709, "learning_rate": 5.36623750679096e-05, "loss": 0.08209252953529358, "step": 108470 }, { "epoch": 0.4657273125370289, "grad_norm": 1.7507456541061401, "learning_rate": 5.365806334779198e-05, "loss": 0.2532007932662964, "step": 108480 }, { "epoch": 0.4657702446270489, "grad_norm": 0.05488888546824455, "learning_rate": 5.365375162767434e-05, "loss": 0.11761891841888428, "step": 108490 }, { "epoch": 0.46581317671706896, "grad_norm": 7.336211681365967, "learning_rate": 5.364943990755672e-05, "loss": 0.3313975095748901, "step": 108500 }, { "epoch": 0.46585610880708894, "grad_norm": 1.9621061086654663, "learning_rate": 5.3645128187439097e-05, "loss": 0.16002817153930665, "step": 108510 }, { "epoch": 0.46589904089710893, "grad_norm": 0.01421435084193945, "learning_rate": 5.3640816467321474e-05, "loss": 0.18043923377990723, "step": 108520 }, { "epoch": 0.46594197298712897, "grad_norm": 0.8421728014945984, "learning_rate": 5.363650474720385e-05, "loss": 0.1255861282348633, "step": 108530 }, { "epoch": 0.46598490507714896, "grad_norm": 2.173349618911743, "learning_rate": 5.363219302708623e-05, "loss": 0.2318701982498169, "step": 108540 }, { "epoch": 0.46602783716716895, "grad_norm": 0.006124487146735191, "learning_rate": 5.3627881306968606e-05, "loss": 0.1734129548072815, "step": 108550 }, { "epoch": 0.466070769257189, "grad_norm": 0.003295590402558446, "learning_rate": 5.362356958685098e-05, "loss": 0.2409308671951294, "step": 108560 }, { "epoch": 0.466113701347209, "grad_norm": 0.4986785650253296, "learning_rate": 5.3619257866733354e-05, "loss": 0.15486088991165162, "step": 108570 }, { "epoch": 0.466156633437229, "grad_norm": 2.084712028503418, "learning_rate": 5.361494614661573e-05, "loss": 0.3027064085006714, "step": 108580 }, { "epoch": 0.466199565527249, "grad_norm": 0.852931797504425, "learning_rate": 5.361063442649811e-05, "loss": 0.12534109354019166, "step": 108590 }, { "epoch": 0.466242497617269, "grad_norm": 0.9069915413856506, "learning_rate": 5.3606322706380486e-05, "loss": 0.4733282089233398, "step": 108600 }, { "epoch": 0.46628542970728903, "grad_norm": 1.9747471809387207, "learning_rate": 5.360201098626286e-05, "loss": 0.21740949153900146, "step": 108610 }, { "epoch": 0.466328361797309, "grad_norm": 0.005773196928203106, "learning_rate": 5.359769926614524e-05, "loss": 0.23748469352722168, "step": 108620 }, { "epoch": 0.466371293887329, "grad_norm": 1.2689443826675415, "learning_rate": 5.359338754602762e-05, "loss": 0.04138025641441345, "step": 108630 }, { "epoch": 0.46641422597734905, "grad_norm": 2.238314151763916, "learning_rate": 5.3589075825909995e-05, "loss": 0.17560259103775025, "step": 108640 }, { "epoch": 0.46645715806736904, "grad_norm": 0.3152078688144684, "learning_rate": 5.358476410579236e-05, "loss": 0.363291072845459, "step": 108650 }, { "epoch": 0.466500090157389, "grad_norm": 1.2952321767807007, "learning_rate": 5.358045238567474e-05, "loss": 0.1913788437843323, "step": 108660 }, { "epoch": 0.46654302224740907, "grad_norm": 5.483338832855225, "learning_rate": 5.357614066555712e-05, "loss": 0.13968578577041627, "step": 108670 }, { "epoch": 0.46658595433742905, "grad_norm": 1.802327275276184, "learning_rate": 5.35718289454395e-05, "loss": 0.3405932903289795, "step": 108680 }, { "epoch": 0.46662888642744904, "grad_norm": 0.006264100782573223, "learning_rate": 5.3567517225321875e-05, "loss": 0.005323518067598343, "step": 108690 }, { "epoch": 0.4666718185174691, "grad_norm": 1.5411258935928345, "learning_rate": 5.356320550520425e-05, "loss": 0.37329974174499514, "step": 108700 }, { "epoch": 0.46671475060748907, "grad_norm": 0.034903932362794876, "learning_rate": 5.355889378508663e-05, "loss": 0.20319321155548095, "step": 108710 }, { "epoch": 0.46675768269750906, "grad_norm": 0.0059880223125219345, "learning_rate": 5.355458206496901e-05, "loss": 0.2106635332107544, "step": 108720 }, { "epoch": 0.4668006147875291, "grad_norm": 0.1818341761827469, "learning_rate": 5.355027034485137e-05, "loss": 0.10562245845794678, "step": 108730 }, { "epoch": 0.4668435468775491, "grad_norm": 3.9489457607269287, "learning_rate": 5.354595862473375e-05, "loss": 0.14832943677902222, "step": 108740 }, { "epoch": 0.4668864789675691, "grad_norm": 0.0023902824614197016, "learning_rate": 5.3541646904616126e-05, "loss": 0.14622147083282472, "step": 108750 }, { "epoch": 0.4669294110575891, "grad_norm": 0.004773691762238741, "learning_rate": 5.35373351844985e-05, "loss": 0.06058769226074219, "step": 108760 }, { "epoch": 0.4669723431476091, "grad_norm": 2.265639305114746, "learning_rate": 5.353302346438088e-05, "loss": 0.18167110681533813, "step": 108770 }, { "epoch": 0.4670152752376291, "grad_norm": 0.02852572314441204, "learning_rate": 5.352871174426326e-05, "loss": 0.08986608982086182, "step": 108780 }, { "epoch": 0.46705820732764913, "grad_norm": 0.047613635659217834, "learning_rate": 5.3524400024145635e-05, "loss": 0.16906614303588868, "step": 108790 }, { "epoch": 0.4671011394176691, "grad_norm": 0.41578468680381775, "learning_rate": 5.352008830402801e-05, "loss": 0.16157785654067994, "step": 108800 }, { "epoch": 0.46714407150768916, "grad_norm": 1.8083186149597168, "learning_rate": 5.351577658391039e-05, "loss": 0.124634051322937, "step": 108810 }, { "epoch": 0.46718700359770915, "grad_norm": 3.3372297286987305, "learning_rate": 5.351146486379276e-05, "loss": 0.23483569622039796, "step": 108820 }, { "epoch": 0.46722993568772914, "grad_norm": 0.03171273320913315, "learning_rate": 5.350715314367514e-05, "loss": 0.40413923263549806, "step": 108830 }, { "epoch": 0.4672728677777492, "grad_norm": 0.006995361298322678, "learning_rate": 5.3502841423557515e-05, "loss": 0.2489945650100708, "step": 108840 }, { "epoch": 0.46731579986776917, "grad_norm": 1.9916613101959229, "learning_rate": 5.349852970343989e-05, "loss": 0.23514945507049562, "step": 108850 }, { "epoch": 0.46735873195778915, "grad_norm": 2.323140859603882, "learning_rate": 5.349421798332227e-05, "loss": 0.27089509963989256, "step": 108860 }, { "epoch": 0.4674016640478092, "grad_norm": 0.2249898463487625, "learning_rate": 5.348990626320465e-05, "loss": 0.37014145851135255, "step": 108870 }, { "epoch": 0.4674445961378292, "grad_norm": 0.0015096982242539525, "learning_rate": 5.3485594543087024e-05, "loss": 0.2118845224380493, "step": 108880 }, { "epoch": 0.46748752822784917, "grad_norm": 0.017406558617949486, "learning_rate": 5.34812828229694e-05, "loss": 0.22873473167419434, "step": 108890 }, { "epoch": 0.4675304603178692, "grad_norm": 0.0044220988638699055, "learning_rate": 5.347697110285177e-05, "loss": 0.25328223705291747, "step": 108900 }, { "epoch": 0.4675733924078892, "grad_norm": 0.4546348750591278, "learning_rate": 5.347265938273415e-05, "loss": 0.1413517713546753, "step": 108910 }, { "epoch": 0.4676163244979092, "grad_norm": 0.05150995030999184, "learning_rate": 5.346834766261653e-05, "loss": 0.42641348838806153, "step": 108920 }, { "epoch": 0.46765925658792923, "grad_norm": 0.05774686112999916, "learning_rate": 5.3464035942498904e-05, "loss": 0.15248560905456543, "step": 108930 }, { "epoch": 0.4677021886779492, "grad_norm": 0.18553832173347473, "learning_rate": 5.345972422238128e-05, "loss": 0.3334579706192017, "step": 108940 }, { "epoch": 0.4677451207679692, "grad_norm": 0.004935821518301964, "learning_rate": 5.345541250226366e-05, "loss": 0.4072174072265625, "step": 108950 }, { "epoch": 0.46778805285798924, "grad_norm": 2.7452855110168457, "learning_rate": 5.3451100782146036e-05, "loss": 0.34284253120422364, "step": 108960 }, { "epoch": 0.46783098494800923, "grad_norm": 0.3916510045528412, "learning_rate": 5.3446789062028414e-05, "loss": 0.1928103446960449, "step": 108970 }, { "epoch": 0.4678739170380292, "grad_norm": 0.15699084103107452, "learning_rate": 5.344247734191078e-05, "loss": 0.14758397340774537, "step": 108980 }, { "epoch": 0.46791684912804926, "grad_norm": 10.559981346130371, "learning_rate": 5.3438165621793155e-05, "loss": 0.16865952014923097, "step": 108990 }, { "epoch": 0.46795978121806925, "grad_norm": 0.046823471784591675, "learning_rate": 5.343385390167553e-05, "loss": 0.2123495101928711, "step": 109000 }, { "epoch": 0.46795978121806925, "eval_loss": 0.4111691415309906, "eval_runtime": 27.1166, "eval_samples_per_second": 3.688, "eval_steps_per_second": 3.688, "step": 109000 }, { "epoch": 0.4680027133080893, "grad_norm": 0.4841916561126709, "learning_rate": 5.342954218155791e-05, "loss": 0.05959020853042603, "step": 109010 }, { "epoch": 0.4680456453981093, "grad_norm": 0.018183136358857155, "learning_rate": 5.342523046144029e-05, "loss": 0.2590435266494751, "step": 109020 }, { "epoch": 0.46808857748812926, "grad_norm": 1.3049360513687134, "learning_rate": 5.3420918741322664e-05, "loss": 0.13173724412918092, "step": 109030 }, { "epoch": 0.4681315095781493, "grad_norm": 0.007529239635914564, "learning_rate": 5.341660702120505e-05, "loss": 0.04893192052841187, "step": 109040 }, { "epoch": 0.4681744416681693, "grad_norm": 0.007698412984609604, "learning_rate": 5.3412295301087426e-05, "loss": 0.17854329347610473, "step": 109050 }, { "epoch": 0.4682173737581893, "grad_norm": 0.04524169862270355, "learning_rate": 5.340798358096979e-05, "loss": 0.10847384929656982, "step": 109060 }, { "epoch": 0.4682603058482093, "grad_norm": 1.1935988664627075, "learning_rate": 5.340367186085217e-05, "loss": 0.40726146697998045, "step": 109070 }, { "epoch": 0.4683032379382293, "grad_norm": 0.02711350843310356, "learning_rate": 5.3399360140734544e-05, "loss": 0.24927499294281005, "step": 109080 }, { "epoch": 0.4683461700282493, "grad_norm": 1.875342607498169, "learning_rate": 5.339504842061692e-05, "loss": 0.24080886840820312, "step": 109090 }, { "epoch": 0.46838910211826934, "grad_norm": 0.5155624747276306, "learning_rate": 5.33907367004993e-05, "loss": 0.2785279989242554, "step": 109100 }, { "epoch": 0.4684320342082893, "grad_norm": 0.4726239740848541, "learning_rate": 5.3386424980381676e-05, "loss": 0.11514071226119996, "step": 109110 }, { "epoch": 0.4684749662983093, "grad_norm": 1.5348830223083496, "learning_rate": 5.3382113260264053e-05, "loss": 0.29875593185424804, "step": 109120 }, { "epoch": 0.46851789838832936, "grad_norm": 0.020795587450265884, "learning_rate": 5.337780154014643e-05, "loss": 0.26834123134613036, "step": 109130 }, { "epoch": 0.46856083047834934, "grad_norm": 0.6724562644958496, "learning_rate": 5.337348982002881e-05, "loss": 0.32038588523864747, "step": 109140 }, { "epoch": 0.46860376256836933, "grad_norm": 1.894964337348938, "learning_rate": 5.336917809991118e-05, "loss": 0.19419206380844117, "step": 109150 }, { "epoch": 0.4686466946583894, "grad_norm": 0.018505332991480827, "learning_rate": 5.3364866379793556e-05, "loss": 0.09740328192710876, "step": 109160 }, { "epoch": 0.46868962674840936, "grad_norm": 1.5556461811065674, "learning_rate": 5.3360554659675933e-05, "loss": 0.2878258228302002, "step": 109170 }, { "epoch": 0.46873255883842935, "grad_norm": 0.02745775319635868, "learning_rate": 5.335624293955831e-05, "loss": 0.2433319091796875, "step": 109180 }, { "epoch": 0.4687754909284494, "grad_norm": 0.02605215087532997, "learning_rate": 5.335193121944069e-05, "loss": 0.3428020477294922, "step": 109190 }, { "epoch": 0.4688184230184694, "grad_norm": 1.8436968326568604, "learning_rate": 5.3347619499323065e-05, "loss": 0.3498523712158203, "step": 109200 }, { "epoch": 0.46886135510848936, "grad_norm": 0.005615072324872017, "learning_rate": 5.334330777920544e-05, "loss": 0.30814356803894044, "step": 109210 }, { "epoch": 0.4689042871985094, "grad_norm": 4.321868896484375, "learning_rate": 5.333899605908782e-05, "loss": 0.41483297348022463, "step": 109220 }, { "epoch": 0.4689472192885294, "grad_norm": 0.4750598669052124, "learning_rate": 5.3334684338970184e-05, "loss": 0.12364082336425782, "step": 109230 }, { "epoch": 0.46899015137854944, "grad_norm": 1.165134072303772, "learning_rate": 5.333037261885256e-05, "loss": 0.22800350189208984, "step": 109240 }, { "epoch": 0.4690330834685694, "grad_norm": 0.007292190100997686, "learning_rate": 5.332606089873494e-05, "loss": 0.2522198915481567, "step": 109250 }, { "epoch": 0.4690760155585894, "grad_norm": 6.852068901062012, "learning_rate": 5.332174917861732e-05, "loss": 0.3143084287643433, "step": 109260 }, { "epoch": 0.46911894764860945, "grad_norm": 1.5385671854019165, "learning_rate": 5.33174374584997e-05, "loss": 0.12840102910995482, "step": 109270 }, { "epoch": 0.46916187973862944, "grad_norm": 0.03560010343790054, "learning_rate": 5.331312573838208e-05, "loss": 0.10503036975860595, "step": 109280 }, { "epoch": 0.4692048118286494, "grad_norm": 0.30751195549964905, "learning_rate": 5.3308814018264455e-05, "loss": 0.09718015193939208, "step": 109290 }, { "epoch": 0.46924774391866947, "grad_norm": 0.061752088367938995, "learning_rate": 5.330450229814683e-05, "loss": 0.22438604831695558, "step": 109300 }, { "epoch": 0.46929067600868946, "grad_norm": 0.007534640375524759, "learning_rate": 5.3300190578029196e-05, "loss": 0.22838716506958007, "step": 109310 }, { "epoch": 0.46933360809870944, "grad_norm": 4.146587371826172, "learning_rate": 5.329587885791157e-05, "loss": 0.12649660110473632, "step": 109320 }, { "epoch": 0.4693765401887295, "grad_norm": 4.130105018615723, "learning_rate": 5.329156713779395e-05, "loss": 0.36939468383789065, "step": 109330 }, { "epoch": 0.46941947227874947, "grad_norm": 0.7898271083831787, "learning_rate": 5.328725541767633e-05, "loss": 0.08706992864608765, "step": 109340 }, { "epoch": 0.46946240436876946, "grad_norm": 0.09479454159736633, "learning_rate": 5.3282943697558705e-05, "loss": 0.29662575721740725, "step": 109350 }, { "epoch": 0.4695053364587895, "grad_norm": 0.06022655963897705, "learning_rate": 5.327863197744108e-05, "loss": 0.2366532564163208, "step": 109360 }, { "epoch": 0.4695482685488095, "grad_norm": 0.016174262389540672, "learning_rate": 5.327432025732346e-05, "loss": 0.29160394668579104, "step": 109370 }, { "epoch": 0.4695912006388295, "grad_norm": 1.7989237308502197, "learning_rate": 5.327000853720584e-05, "loss": 0.3305816650390625, "step": 109380 }, { "epoch": 0.4696341327288495, "grad_norm": 2.016319751739502, "learning_rate": 5.326569681708821e-05, "loss": 0.250068736076355, "step": 109390 }, { "epoch": 0.4696770648188695, "grad_norm": 0.8311567306518555, "learning_rate": 5.3261385096970585e-05, "loss": 0.3437382936477661, "step": 109400 }, { "epoch": 0.4697199969088895, "grad_norm": 33.06391143798828, "learning_rate": 5.325707337685296e-05, "loss": 0.40858283042907717, "step": 109410 }, { "epoch": 0.46976292899890953, "grad_norm": 1.9839880466461182, "learning_rate": 5.325276165673534e-05, "loss": 0.28404059410095217, "step": 109420 }, { "epoch": 0.4698058610889295, "grad_norm": 0.5857807993888855, "learning_rate": 5.324844993661772e-05, "loss": 0.10082913637161255, "step": 109430 }, { "epoch": 0.46984879317894956, "grad_norm": 0.2136264443397522, "learning_rate": 5.3244138216500095e-05, "loss": 0.35057988166809084, "step": 109440 }, { "epoch": 0.46989172526896955, "grad_norm": 0.2856413722038269, "learning_rate": 5.323982649638247e-05, "loss": 0.1828877568244934, "step": 109450 }, { "epoch": 0.46993465735898954, "grad_norm": 0.009458329528570175, "learning_rate": 5.323551477626485e-05, "loss": 0.113239586353302, "step": 109460 }, { "epoch": 0.4699775894490096, "grad_norm": 0.04635424166917801, "learning_rate": 5.323120305614723e-05, "loss": 0.29793319702148435, "step": 109470 }, { "epoch": 0.47002052153902957, "grad_norm": 1.4999786615371704, "learning_rate": 5.32268913360296e-05, "loss": 0.2726386547088623, "step": 109480 }, { "epoch": 0.47006345362904955, "grad_norm": 4.102458953857422, "learning_rate": 5.3222579615911975e-05, "loss": 0.22052557468414308, "step": 109490 }, { "epoch": 0.4701063857190696, "grad_norm": 1.3203516006469727, "learning_rate": 5.321826789579435e-05, "loss": 0.511915636062622, "step": 109500 }, { "epoch": 0.4701493178090896, "grad_norm": 0.005893752444535494, "learning_rate": 5.321395617567673e-05, "loss": 0.0750274658203125, "step": 109510 }, { "epoch": 0.47019224989910957, "grad_norm": 0.012768547981977463, "learning_rate": 5.3209644455559107e-05, "loss": 0.14960445165634156, "step": 109520 }, { "epoch": 0.4702351819891296, "grad_norm": 0.05208965763449669, "learning_rate": 5.3205332735441484e-05, "loss": 0.08533784747123718, "step": 109530 }, { "epoch": 0.4702781140791496, "grad_norm": 0.016734503209590912, "learning_rate": 5.320102101532386e-05, "loss": 0.18010318279266357, "step": 109540 }, { "epoch": 0.4703210461691696, "grad_norm": 0.019492534920573235, "learning_rate": 5.319670929520624e-05, "loss": 0.2558018922805786, "step": 109550 }, { "epoch": 0.47036397825918963, "grad_norm": 0.07820451259613037, "learning_rate": 5.31923975750886e-05, "loss": 0.1978413224220276, "step": 109560 }, { "epoch": 0.4704069103492096, "grad_norm": 1.5199116468429565, "learning_rate": 5.318808585497098e-05, "loss": 0.14105242490768433, "step": 109570 }, { "epoch": 0.4704498424392296, "grad_norm": 0.027323994785547256, "learning_rate": 5.318377413485336e-05, "loss": 0.17264108657836913, "step": 109580 }, { "epoch": 0.47049277452924965, "grad_norm": 0.03172069415450096, "learning_rate": 5.3179462414735734e-05, "loss": 0.2262335777282715, "step": 109590 }, { "epoch": 0.47053570661926963, "grad_norm": 2.656409978866577, "learning_rate": 5.317515069461811e-05, "loss": 0.22067790031433104, "step": 109600 }, { "epoch": 0.4705786387092896, "grad_norm": 0.37605583667755127, "learning_rate": 5.317083897450049e-05, "loss": 0.09247267246246338, "step": 109610 }, { "epoch": 0.47062157079930966, "grad_norm": 1.5773375034332275, "learning_rate": 5.3166527254382866e-05, "loss": 0.26245760917663574, "step": 109620 }, { "epoch": 0.47066450288932965, "grad_norm": 0.8994236588478088, "learning_rate": 5.316221553426525e-05, "loss": 0.4384958744049072, "step": 109630 }, { "epoch": 0.47070743497934964, "grad_norm": 0.13297097384929657, "learning_rate": 5.3157903814147614e-05, "loss": 0.23621623516082763, "step": 109640 }, { "epoch": 0.4707503670693697, "grad_norm": 1.8098262548446655, "learning_rate": 5.315359209402999e-05, "loss": 0.3028961420059204, "step": 109650 }, { "epoch": 0.47079329915938967, "grad_norm": 0.0058063119649887085, "learning_rate": 5.314928037391237e-05, "loss": 0.18072078227996827, "step": 109660 }, { "epoch": 0.4708362312494097, "grad_norm": 0.9555726051330566, "learning_rate": 5.3144968653794746e-05, "loss": 0.3762235641479492, "step": 109670 }, { "epoch": 0.4708791633394297, "grad_norm": 0.0875546932220459, "learning_rate": 5.3140656933677124e-05, "loss": 0.3112565755844116, "step": 109680 }, { "epoch": 0.4709220954294497, "grad_norm": 1.1810975074768066, "learning_rate": 5.31363452135595e-05, "loss": 0.20165557861328126, "step": 109690 }, { "epoch": 0.4709650275194697, "grad_norm": 3.7935070991516113, "learning_rate": 5.313203349344188e-05, "loss": 0.15693705081939696, "step": 109700 }, { "epoch": 0.4710079596094897, "grad_norm": 0.036692049354314804, "learning_rate": 5.3127721773324256e-05, "loss": 0.1284176826477051, "step": 109710 }, { "epoch": 0.4710508916995097, "grad_norm": 3.318089008331299, "learning_rate": 5.3123410053206626e-05, "loss": 0.3561805248260498, "step": 109720 }, { "epoch": 0.47109382378952974, "grad_norm": 1.2822105884552002, "learning_rate": 5.3119098333089004e-05, "loss": 0.1795423984527588, "step": 109730 }, { "epoch": 0.47113675587954973, "grad_norm": 0.8673492074012756, "learning_rate": 5.311478661297138e-05, "loss": 0.15667368173599244, "step": 109740 }, { "epoch": 0.4711796879695697, "grad_norm": 3.5049326419830322, "learning_rate": 5.311047489285376e-05, "loss": 0.07165217995643616, "step": 109750 }, { "epoch": 0.47122262005958976, "grad_norm": 0.13629527390003204, "learning_rate": 5.3106163172736136e-05, "loss": 0.2818335294723511, "step": 109760 }, { "epoch": 0.47126555214960975, "grad_norm": 4.236764430999756, "learning_rate": 5.310185145261851e-05, "loss": 0.2754476547241211, "step": 109770 }, { "epoch": 0.47130848423962973, "grad_norm": 2.255838394165039, "learning_rate": 5.309753973250089e-05, "loss": 0.19391024112701416, "step": 109780 }, { "epoch": 0.4713514163296498, "grad_norm": 0.0032226841431111097, "learning_rate": 5.309322801238327e-05, "loss": 0.42916345596313477, "step": 109790 }, { "epoch": 0.47139434841966976, "grad_norm": 0.003980181645601988, "learning_rate": 5.308891629226563e-05, "loss": 0.16325417757034302, "step": 109800 }, { "epoch": 0.47143728050968975, "grad_norm": 0.017930980771780014, "learning_rate": 5.308460457214801e-05, "loss": 0.10078818798065185, "step": 109810 }, { "epoch": 0.4714802125997098, "grad_norm": 0.7667815089225769, "learning_rate": 5.3080292852030386e-05, "loss": 0.13166972398757934, "step": 109820 }, { "epoch": 0.4715231446897298, "grad_norm": 6.7686848640441895, "learning_rate": 5.3075981131912764e-05, "loss": 0.09097627997398376, "step": 109830 }, { "epoch": 0.47156607677974977, "grad_norm": 0.042791981250047684, "learning_rate": 5.307166941179514e-05, "loss": 0.2671439409255981, "step": 109840 }, { "epoch": 0.4716090088697698, "grad_norm": 1.2552294731140137, "learning_rate": 5.3067357691677525e-05, "loss": 0.17842416763305663, "step": 109850 }, { "epoch": 0.4716519409597898, "grad_norm": 0.04525917023420334, "learning_rate": 5.30630459715599e-05, "loss": 0.19458953142166138, "step": 109860 }, { "epoch": 0.47169487304980984, "grad_norm": 0.9127861857414246, "learning_rate": 5.305873425144228e-05, "loss": 0.2227538824081421, "step": 109870 }, { "epoch": 0.4717378051398298, "grad_norm": 1.8166645765304565, "learning_rate": 5.305442253132466e-05, "loss": 0.2958853721618652, "step": 109880 }, { "epoch": 0.4717807372298498, "grad_norm": 2.7529006004333496, "learning_rate": 5.305011081120702e-05, "loss": 0.3049177885055542, "step": 109890 }, { "epoch": 0.47182366931986985, "grad_norm": 0.006838400382548571, "learning_rate": 5.30457990910894e-05, "loss": 0.20164821147918702, "step": 109900 }, { "epoch": 0.47186660140988984, "grad_norm": 6.462440013885498, "learning_rate": 5.3041487370971775e-05, "loss": 0.3127782344818115, "step": 109910 }, { "epoch": 0.47190953349990983, "grad_norm": 0.17948110401630402, "learning_rate": 5.303717565085415e-05, "loss": 0.1981052875518799, "step": 109920 }, { "epoch": 0.47195246558992987, "grad_norm": 0.9323148131370544, "learning_rate": 5.303286393073653e-05, "loss": 0.38721270561218263, "step": 109930 }, { "epoch": 0.47199539767994986, "grad_norm": 0.008327801711857319, "learning_rate": 5.302855221061891e-05, "loss": 0.20434670448303222, "step": 109940 }, { "epoch": 0.47203832976996984, "grad_norm": 0.12778253853321075, "learning_rate": 5.3024240490501285e-05, "loss": 0.3304303646087646, "step": 109950 }, { "epoch": 0.4720812618599899, "grad_norm": 0.2479141652584076, "learning_rate": 5.301992877038366e-05, "loss": 0.19963374137878417, "step": 109960 }, { "epoch": 0.4721241939500099, "grad_norm": 1.2295243740081787, "learning_rate": 5.301561705026603e-05, "loss": 0.187886381149292, "step": 109970 }, { "epoch": 0.47216712604002986, "grad_norm": 0.06274612247943878, "learning_rate": 5.301130533014841e-05, "loss": 0.192645800113678, "step": 109980 }, { "epoch": 0.4722100581300499, "grad_norm": 0.0075582414865493774, "learning_rate": 5.300699361003079e-05, "loss": 0.14299780130386353, "step": 109990 }, { "epoch": 0.4722529902200699, "grad_norm": 2.172623872756958, "learning_rate": 5.3002681889913165e-05, "loss": 0.23523974418640137, "step": 110000 }, { "epoch": 0.4722529902200699, "eval_loss": 0.40026649832725525, "eval_runtime": 27.1709, "eval_samples_per_second": 3.68, "eval_steps_per_second": 3.68, "step": 110000 }, { "epoch": 0.4722959223100899, "grad_norm": 0.006778498645871878, "learning_rate": 5.299837016979554e-05, "loss": 0.2251523494720459, "step": 110010 }, { "epoch": 0.4723388544001099, "grad_norm": 3.404278039932251, "learning_rate": 5.299405844967792e-05, "loss": 0.3573782920837402, "step": 110020 }, { "epoch": 0.4723817864901299, "grad_norm": 1.1682584285736084, "learning_rate": 5.29897467295603e-05, "loss": 0.37567944526672364, "step": 110030 }, { "epoch": 0.4724247185801499, "grad_norm": 0.08961494266986847, "learning_rate": 5.2985435009442674e-05, "loss": 0.14649477005004882, "step": 110040 }, { "epoch": 0.47246765067016994, "grad_norm": 0.005352088250219822, "learning_rate": 5.298112328932504e-05, "loss": 0.13340699672698975, "step": 110050 }, { "epoch": 0.4725105827601899, "grad_norm": 3.589823007583618, "learning_rate": 5.2976811569207415e-05, "loss": 0.18872573375701904, "step": 110060 }, { "epoch": 0.4725535148502099, "grad_norm": 0.025254419073462486, "learning_rate": 5.29724998490898e-05, "loss": 0.011775702983140946, "step": 110070 }, { "epoch": 0.47259644694022995, "grad_norm": 4.436582088470459, "learning_rate": 5.296818812897218e-05, "loss": 0.2874211072921753, "step": 110080 }, { "epoch": 0.47263937903024994, "grad_norm": 0.1682950258255005, "learning_rate": 5.2963876408854554e-05, "loss": 0.21236715316772461, "step": 110090 }, { "epoch": 0.47268231112027, "grad_norm": 0.1564854085445404, "learning_rate": 5.295956468873693e-05, "loss": 0.1416730046272278, "step": 110100 }, { "epoch": 0.47272524321028997, "grad_norm": 0.7669548392295837, "learning_rate": 5.295525296861931e-05, "loss": 0.17509924173355101, "step": 110110 }, { "epoch": 0.47276817530030996, "grad_norm": 0.0032598222605884075, "learning_rate": 5.2950941248501686e-05, "loss": 0.2041093111038208, "step": 110120 }, { "epoch": 0.47281110739033, "grad_norm": 0.001301642507314682, "learning_rate": 5.294662952838405e-05, "loss": 0.1385445475578308, "step": 110130 }, { "epoch": 0.47285403948035, "grad_norm": 1.4497944116592407, "learning_rate": 5.294231780826643e-05, "loss": 0.2178436517715454, "step": 110140 }, { "epoch": 0.47289697157037, "grad_norm": 0.007498675025999546, "learning_rate": 5.2938006088148805e-05, "loss": 0.07806483507156373, "step": 110150 }, { "epoch": 0.47293990366039, "grad_norm": 0.051783930510282516, "learning_rate": 5.293369436803118e-05, "loss": 0.23047008514404296, "step": 110160 }, { "epoch": 0.47298283575041, "grad_norm": 0.0037600200157612562, "learning_rate": 5.292938264791356e-05, "loss": 0.2534470081329346, "step": 110170 }, { "epoch": 0.47302576784043, "grad_norm": 0.002208675490692258, "learning_rate": 5.292507092779594e-05, "loss": 0.23899390697479247, "step": 110180 }, { "epoch": 0.47306869993045003, "grad_norm": 0.8531462550163269, "learning_rate": 5.2920759207678314e-05, "loss": 0.29016637802124023, "step": 110190 }, { "epoch": 0.47311163202047, "grad_norm": 0.02837371453642845, "learning_rate": 5.291644748756069e-05, "loss": 0.15528700351715088, "step": 110200 }, { "epoch": 0.47315456411049, "grad_norm": 0.017523042857646942, "learning_rate": 5.291213576744307e-05, "loss": 0.0026233930140733717, "step": 110210 }, { "epoch": 0.47319749620051005, "grad_norm": 5.990091323852539, "learning_rate": 5.290782404732544e-05, "loss": 0.3041208744049072, "step": 110220 }, { "epoch": 0.47324042829053004, "grad_norm": 0.006588002201169729, "learning_rate": 5.2903512327207817e-05, "loss": 0.20339715480804443, "step": 110230 }, { "epoch": 0.47328336038055, "grad_norm": 0.0035168312024325132, "learning_rate": 5.2899200607090194e-05, "loss": 0.09533035755157471, "step": 110240 }, { "epoch": 0.47332629247057006, "grad_norm": 0.055465489625930786, "learning_rate": 5.289488888697257e-05, "loss": 0.27167162895202634, "step": 110250 }, { "epoch": 0.47336922456059005, "grad_norm": 0.08093491941690445, "learning_rate": 5.289057716685495e-05, "loss": 0.38339624404907224, "step": 110260 }, { "epoch": 0.47341215665061004, "grad_norm": 0.005503931548446417, "learning_rate": 5.2886265446737326e-05, "loss": 0.1939884305000305, "step": 110270 }, { "epoch": 0.4734550887406301, "grad_norm": 2.163684129714966, "learning_rate": 5.28819537266197e-05, "loss": 0.14528403282165528, "step": 110280 }, { "epoch": 0.47349802083065007, "grad_norm": 1.828765869140625, "learning_rate": 5.287764200650208e-05, "loss": 0.2360905170440674, "step": 110290 }, { "epoch": 0.4735409529206701, "grad_norm": 1.3224155902862549, "learning_rate": 5.287333028638445e-05, "loss": 0.3001755475997925, "step": 110300 }, { "epoch": 0.4735838850106901, "grad_norm": 0.03000202588737011, "learning_rate": 5.286901856626683e-05, "loss": 0.08096457123756409, "step": 110310 }, { "epoch": 0.4736268171007101, "grad_norm": 0.008006912656128407, "learning_rate": 5.2864706846149206e-05, "loss": 0.13578498363494873, "step": 110320 }, { "epoch": 0.4736697491907301, "grad_norm": 0.00391837302595377, "learning_rate": 5.286039512603158e-05, "loss": 0.10231612920761109, "step": 110330 }, { "epoch": 0.4737126812807501, "grad_norm": 0.004665139596909285, "learning_rate": 5.285608340591396e-05, "loss": 0.07968645691871643, "step": 110340 }, { "epoch": 0.4737556133707701, "grad_norm": 3.0334272384643555, "learning_rate": 5.285177168579634e-05, "loss": 0.11113240718841552, "step": 110350 }, { "epoch": 0.47379854546079014, "grad_norm": 0.28551867604255676, "learning_rate": 5.2847459965678715e-05, "loss": 0.1603380799293518, "step": 110360 }, { "epoch": 0.47384147755081013, "grad_norm": 1.029355764389038, "learning_rate": 5.284314824556109e-05, "loss": 0.34641385078430176, "step": 110370 }, { "epoch": 0.4738844096408301, "grad_norm": 2.454301357269287, "learning_rate": 5.2838836525443456e-05, "loss": 0.26242704391479493, "step": 110380 }, { "epoch": 0.47392734173085016, "grad_norm": 30.86275863647461, "learning_rate": 5.2834524805325834e-05, "loss": 0.23771564960479735, "step": 110390 }, { "epoch": 0.47397027382087015, "grad_norm": 0.00213251612149179, "learning_rate": 5.283021308520821e-05, "loss": 0.1638559579849243, "step": 110400 }, { "epoch": 0.47401320591089013, "grad_norm": 1.0422087907791138, "learning_rate": 5.282590136509059e-05, "loss": 0.45887227058410646, "step": 110410 }, { "epoch": 0.4740561380009102, "grad_norm": 10.386863708496094, "learning_rate": 5.2821589644972966e-05, "loss": 0.2633431196212769, "step": 110420 }, { "epoch": 0.47409907009093016, "grad_norm": 0.05543503910303116, "learning_rate": 5.281727792485534e-05, "loss": 0.11826821565628051, "step": 110430 }, { "epoch": 0.47414200218095015, "grad_norm": 8.459673881530762, "learning_rate": 5.281296620473772e-05, "loss": 0.18388675451278685, "step": 110440 }, { "epoch": 0.4741849342709702, "grad_norm": 0.0549483522772789, "learning_rate": 5.2808654484620105e-05, "loss": 0.5347713470458985, "step": 110450 }, { "epoch": 0.4742278663609902, "grad_norm": 3.4024839401245117, "learning_rate": 5.280434276450247e-05, "loss": 0.15446404218673707, "step": 110460 }, { "epoch": 0.47427079845101017, "grad_norm": 0.011814040131866932, "learning_rate": 5.2800031044384846e-05, "loss": 0.23509562015533447, "step": 110470 }, { "epoch": 0.4743137305410302, "grad_norm": 0.2656792104244232, "learning_rate": 5.279571932426722e-05, "loss": 0.2127734661102295, "step": 110480 }, { "epoch": 0.4743566626310502, "grad_norm": 8.172897338867188, "learning_rate": 5.27914076041496e-05, "loss": 0.17217317819595337, "step": 110490 }, { "epoch": 0.4743995947210702, "grad_norm": 13.168989181518555, "learning_rate": 5.278709588403198e-05, "loss": 0.24612603187561036, "step": 110500 }, { "epoch": 0.4744425268110902, "grad_norm": 3.857973337173462, "learning_rate": 5.2782784163914355e-05, "loss": 0.3045464754104614, "step": 110510 }, { "epoch": 0.4744854589011102, "grad_norm": 1.6314276456832886, "learning_rate": 5.277847244379673e-05, "loss": 0.13810135126113893, "step": 110520 }, { "epoch": 0.47452839099113026, "grad_norm": 0.989060640335083, "learning_rate": 5.277416072367911e-05, "loss": 0.26412765979766845, "step": 110530 }, { "epoch": 0.47457132308115024, "grad_norm": 0.005572533700615168, "learning_rate": 5.276984900356148e-05, "loss": 0.15340116024017333, "step": 110540 }, { "epoch": 0.47461425517117023, "grad_norm": 0.20932750403881073, "learning_rate": 5.276553728344386e-05, "loss": 0.32161037921905516, "step": 110550 }, { "epoch": 0.47465718726119027, "grad_norm": 0.5605794191360474, "learning_rate": 5.2761225563326235e-05, "loss": 0.1692768692970276, "step": 110560 }, { "epoch": 0.47470011935121026, "grad_norm": 0.016393939033150673, "learning_rate": 5.275691384320861e-05, "loss": 0.20412814617156982, "step": 110570 }, { "epoch": 0.47474305144123025, "grad_norm": 0.30977731943130493, "learning_rate": 5.275260212309099e-05, "loss": 0.16929389238357545, "step": 110580 }, { "epoch": 0.4747859835312503, "grad_norm": 0.8243743777275085, "learning_rate": 5.274829040297337e-05, "loss": 0.21461176872253418, "step": 110590 }, { "epoch": 0.4748289156212703, "grad_norm": 1.380896806716919, "learning_rate": 5.2743978682855744e-05, "loss": 0.3001677989959717, "step": 110600 }, { "epoch": 0.47487184771129026, "grad_norm": 4.265039920806885, "learning_rate": 5.273966696273812e-05, "loss": 0.2183671236038208, "step": 110610 }, { "epoch": 0.4749147798013103, "grad_norm": 0.011042381636798382, "learning_rate": 5.27353552426205e-05, "loss": 0.15000921487808228, "step": 110620 }, { "epoch": 0.4749577118913303, "grad_norm": 0.1344660520553589, "learning_rate": 5.273104352250286e-05, "loss": 0.3278426885604858, "step": 110630 }, { "epoch": 0.4750006439813503, "grad_norm": 1.2693231105804443, "learning_rate": 5.272673180238524e-05, "loss": 0.18513789176940917, "step": 110640 }, { "epoch": 0.4750435760713703, "grad_norm": 1.4191405773162842, "learning_rate": 5.272242008226762e-05, "loss": 0.31360783576965334, "step": 110650 }, { "epoch": 0.4750865081613903, "grad_norm": 2.736665725708008, "learning_rate": 5.2718108362149995e-05, "loss": 0.5891505718231201, "step": 110660 }, { "epoch": 0.4751294402514103, "grad_norm": 3.2283921241760254, "learning_rate": 5.271379664203238e-05, "loss": 0.4493126392364502, "step": 110670 }, { "epoch": 0.47517237234143034, "grad_norm": 3.095923900604248, "learning_rate": 5.2709484921914756e-05, "loss": 0.2838698625564575, "step": 110680 }, { "epoch": 0.4752153044314503, "grad_norm": 0.1428341418504715, "learning_rate": 5.2705173201797134e-05, "loss": 0.25497987270355227, "step": 110690 }, { "epoch": 0.4752582365214703, "grad_norm": 1.663386344909668, "learning_rate": 5.270086148167951e-05, "loss": 0.33310327529907224, "step": 110700 }, { "epoch": 0.47530116861149035, "grad_norm": 1.6262311935424805, "learning_rate": 5.2696549761561875e-05, "loss": 0.2923267841339111, "step": 110710 }, { "epoch": 0.47534410070151034, "grad_norm": 0.10630851984024048, "learning_rate": 5.269223804144425e-05, "loss": 0.05050194263458252, "step": 110720 }, { "epoch": 0.4753870327915304, "grad_norm": 3.5088255405426025, "learning_rate": 5.268792632132663e-05, "loss": 0.17360684871673585, "step": 110730 }, { "epoch": 0.47542996488155037, "grad_norm": 1.5751526355743408, "learning_rate": 5.268361460120901e-05, "loss": 0.3811746120452881, "step": 110740 }, { "epoch": 0.47547289697157036, "grad_norm": 0.017309105023741722, "learning_rate": 5.2679302881091384e-05, "loss": 0.08757068514823914, "step": 110750 }, { "epoch": 0.4755158290615904, "grad_norm": 0.0026281701866537333, "learning_rate": 5.267499116097376e-05, "loss": 0.14881271123886108, "step": 110760 }, { "epoch": 0.4755587611516104, "grad_norm": 0.1079968512058258, "learning_rate": 5.267067944085614e-05, "loss": 0.17866290807724, "step": 110770 }, { "epoch": 0.4756016932416304, "grad_norm": 0.003995122853666544, "learning_rate": 5.2666367720738516e-05, "loss": 0.1523299217224121, "step": 110780 }, { "epoch": 0.4756446253316504, "grad_norm": 0.0031116269528865814, "learning_rate": 5.266205600062089e-05, "loss": 0.3764505386352539, "step": 110790 }, { "epoch": 0.4756875574216704, "grad_norm": 0.026464959606528282, "learning_rate": 5.2657744280503264e-05, "loss": 0.20965878963470458, "step": 110800 }, { "epoch": 0.4757304895116904, "grad_norm": 0.03027081862092018, "learning_rate": 5.265343256038564e-05, "loss": 0.2689178705215454, "step": 110810 }, { "epoch": 0.47577342160171043, "grad_norm": 0.015275675803422928, "learning_rate": 5.264912084026802e-05, "loss": 0.4521240234375, "step": 110820 }, { "epoch": 0.4758163536917304, "grad_norm": 0.05949430167675018, "learning_rate": 5.2644809120150396e-05, "loss": 0.28630361557006834, "step": 110830 }, { "epoch": 0.4758592857817504, "grad_norm": 0.05996134132146835, "learning_rate": 5.2640497400032774e-05, "loss": 0.252076530456543, "step": 110840 }, { "epoch": 0.47590221787177045, "grad_norm": 0.060242872685194016, "learning_rate": 5.263618567991515e-05, "loss": 0.1737877368927002, "step": 110850 }, { "epoch": 0.47594514996179044, "grad_norm": 2.192213773727417, "learning_rate": 5.263187395979753e-05, "loss": 0.09967674612998963, "step": 110860 }, { "epoch": 0.4759880820518104, "grad_norm": 1.1650608777999878, "learning_rate": 5.262756223967989e-05, "loss": 0.1381733775138855, "step": 110870 }, { "epoch": 0.47603101414183047, "grad_norm": 0.07127422839403152, "learning_rate": 5.262325051956227e-05, "loss": 0.22627367973327636, "step": 110880 }, { "epoch": 0.47607394623185045, "grad_norm": 0.010779001750051975, "learning_rate": 5.2618938799444653e-05, "loss": 0.02740491330623627, "step": 110890 }, { "epoch": 0.47611687832187044, "grad_norm": 2.1994879245758057, "learning_rate": 5.261462707932703e-05, "loss": 0.21756505966186523, "step": 110900 }, { "epoch": 0.4761598104118905, "grad_norm": 0.00900544598698616, "learning_rate": 5.261031535920941e-05, "loss": 0.4406434535980225, "step": 110910 }, { "epoch": 0.47620274250191047, "grad_norm": 0.3732303977012634, "learning_rate": 5.2606003639091785e-05, "loss": 0.17997138500213622, "step": 110920 }, { "epoch": 0.47624567459193046, "grad_norm": 0.13721691071987152, "learning_rate": 5.260169191897416e-05, "loss": 0.13512275218963624, "step": 110930 }, { "epoch": 0.4762886066819505, "grad_norm": 0.8601694107055664, "learning_rate": 5.259738019885654e-05, "loss": 0.24069135189056395, "step": 110940 }, { "epoch": 0.4763315387719705, "grad_norm": 1.0583916902542114, "learning_rate": 5.259306847873892e-05, "loss": 0.058995991945266724, "step": 110950 }, { "epoch": 0.47637447086199053, "grad_norm": 0.05748229846358299, "learning_rate": 5.258875675862128e-05, "loss": 0.2452263593673706, "step": 110960 }, { "epoch": 0.4764174029520105, "grad_norm": 1.6908286809921265, "learning_rate": 5.258444503850366e-05, "loss": 0.3112839698791504, "step": 110970 }, { "epoch": 0.4764603350420305, "grad_norm": 3.540738105773926, "learning_rate": 5.2580133318386036e-05, "loss": 0.36870133876800537, "step": 110980 }, { "epoch": 0.47650326713205055, "grad_norm": 0.00804793369024992, "learning_rate": 5.257582159826841e-05, "loss": 0.3045008659362793, "step": 110990 }, { "epoch": 0.47654619922207053, "grad_norm": 0.06705295294523239, "learning_rate": 5.257150987815079e-05, "loss": 0.12070378065109252, "step": 111000 }, { "epoch": 0.47654619922207053, "eval_loss": 0.4230143427848816, "eval_runtime": 27.1135, "eval_samples_per_second": 3.688, "eval_steps_per_second": 3.688, "step": 111000 }, { "epoch": 0.4765891313120905, "grad_norm": 0.004154821392148733, "learning_rate": 5.256719815803317e-05, "loss": 0.3670050144195557, "step": 111010 }, { "epoch": 0.47663206340211056, "grad_norm": 0.09109306335449219, "learning_rate": 5.2562886437915545e-05, "loss": 0.10622333288192749, "step": 111020 }, { "epoch": 0.47667499549213055, "grad_norm": 0.0096572982147336, "learning_rate": 5.255857471779792e-05, "loss": 0.08712666034698487, "step": 111030 }, { "epoch": 0.47671792758215054, "grad_norm": 0.024771859869360924, "learning_rate": 5.255426299768029e-05, "loss": 0.26303396224975584, "step": 111040 }, { "epoch": 0.4767608596721706, "grad_norm": 0.7985782623291016, "learning_rate": 5.254995127756267e-05, "loss": 0.06067940592765808, "step": 111050 }, { "epoch": 0.47680379176219057, "grad_norm": 1.2885322570800781, "learning_rate": 5.254563955744505e-05, "loss": 0.2834064483642578, "step": 111060 }, { "epoch": 0.47684672385221055, "grad_norm": 0.044166211038827896, "learning_rate": 5.2541327837327425e-05, "loss": 0.15519756078720093, "step": 111070 }, { "epoch": 0.4768896559422306, "grad_norm": 0.010398822836577892, "learning_rate": 5.25370161172098e-05, "loss": 0.23026049137115479, "step": 111080 }, { "epoch": 0.4769325880322506, "grad_norm": 0.013979957439005375, "learning_rate": 5.253270439709218e-05, "loss": 0.19074589014053345, "step": 111090 }, { "epoch": 0.47697552012227057, "grad_norm": 0.026473939418792725, "learning_rate": 5.252839267697456e-05, "loss": 0.1719115138053894, "step": 111100 }, { "epoch": 0.4770184522122906, "grad_norm": 1.366654872894287, "learning_rate": 5.2524080956856935e-05, "loss": 0.18147858381271362, "step": 111110 }, { "epoch": 0.4770613843023106, "grad_norm": 3.7447845935821533, "learning_rate": 5.2519769236739305e-05, "loss": 0.40271430015563964, "step": 111120 }, { "epoch": 0.4771043163923306, "grad_norm": 3.210329532623291, "learning_rate": 5.251545751662168e-05, "loss": 0.19403756856918336, "step": 111130 }, { "epoch": 0.4771472484823506, "grad_norm": 1.8033745288848877, "learning_rate": 5.251114579650406e-05, "loss": 0.3300994634628296, "step": 111140 }, { "epoch": 0.4771901805723706, "grad_norm": 0.12008707970380783, "learning_rate": 5.250683407638644e-05, "loss": 0.33939058780670167, "step": 111150 }, { "epoch": 0.47723311266239066, "grad_norm": 2.0061378479003906, "learning_rate": 5.2502522356268815e-05, "loss": 0.26279187202453613, "step": 111160 }, { "epoch": 0.47727604475241064, "grad_norm": 0.08203338086605072, "learning_rate": 5.249821063615119e-05, "loss": 0.10013169050216675, "step": 111170 }, { "epoch": 0.47731897684243063, "grad_norm": 0.15610380470752716, "learning_rate": 5.249389891603357e-05, "loss": 0.18954191207885743, "step": 111180 }, { "epoch": 0.4773619089324507, "grad_norm": 4.496754169464111, "learning_rate": 5.248958719591595e-05, "loss": 0.1938277006149292, "step": 111190 }, { "epoch": 0.47740484102247066, "grad_norm": 0.13328373432159424, "learning_rate": 5.248527547579831e-05, "loss": 0.24010121822357178, "step": 111200 }, { "epoch": 0.47744777311249065, "grad_norm": 0.1279555857181549, "learning_rate": 5.248096375568069e-05, "loss": 0.11613349914550782, "step": 111210 }, { "epoch": 0.4774907052025107, "grad_norm": 0.0011030016466975212, "learning_rate": 5.2476652035563065e-05, "loss": 0.07406458854675294, "step": 111220 }, { "epoch": 0.4775336372925307, "grad_norm": 3.7059450149536133, "learning_rate": 5.247234031544544e-05, "loss": 0.18384324312210082, "step": 111230 }, { "epoch": 0.47757656938255066, "grad_norm": 1.8988912105560303, "learning_rate": 5.246802859532782e-05, "loss": 0.407755708694458, "step": 111240 }, { "epoch": 0.4776195014725707, "grad_norm": 2.294595956802368, "learning_rate": 5.24637168752102e-05, "loss": 0.3127762317657471, "step": 111250 }, { "epoch": 0.4776624335625907, "grad_norm": 0.10534092783927917, "learning_rate": 5.245940515509258e-05, "loss": 0.29289281368255615, "step": 111260 }, { "epoch": 0.4777053656526107, "grad_norm": 0.11372819542884827, "learning_rate": 5.245509343497496e-05, "loss": 0.1034854531288147, "step": 111270 }, { "epoch": 0.4777482977426307, "grad_norm": 3.390481472015381, "learning_rate": 5.245078171485732e-05, "loss": 0.14818179607391357, "step": 111280 }, { "epoch": 0.4777912298326507, "grad_norm": 0.8811556100845337, "learning_rate": 5.24464699947397e-05, "loss": 0.2420907974243164, "step": 111290 }, { "epoch": 0.4778341619226707, "grad_norm": 34.448577880859375, "learning_rate": 5.244215827462208e-05, "loss": 0.21140847206115723, "step": 111300 }, { "epoch": 0.47787709401269074, "grad_norm": 0.5432401299476624, "learning_rate": 5.2437846554504454e-05, "loss": 0.11056315898895264, "step": 111310 }, { "epoch": 0.4779200261027107, "grad_norm": 0.012676320970058441, "learning_rate": 5.243353483438683e-05, "loss": 0.32334301471710203, "step": 111320 }, { "epoch": 0.4779629581927307, "grad_norm": 0.014294223859906197, "learning_rate": 5.242922311426921e-05, "loss": 0.09135165214538574, "step": 111330 }, { "epoch": 0.47800589028275076, "grad_norm": 0.05797756835818291, "learning_rate": 5.2424911394151586e-05, "loss": 0.048809555172920224, "step": 111340 }, { "epoch": 0.47804882237277074, "grad_norm": 2.065965175628662, "learning_rate": 5.2420599674033964e-05, "loss": 0.1818181872367859, "step": 111350 }, { "epoch": 0.47809175446279073, "grad_norm": 0.17510123550891876, "learning_rate": 5.241628795391634e-05, "loss": 0.1729954957962036, "step": 111360 }, { "epoch": 0.4781346865528108, "grad_norm": 4.402050971984863, "learning_rate": 5.241197623379871e-05, "loss": 0.4581557273864746, "step": 111370 }, { "epoch": 0.47817761864283076, "grad_norm": 1.0645898580551147, "learning_rate": 5.240766451368109e-05, "loss": 0.08990298509597779, "step": 111380 }, { "epoch": 0.4782205507328508, "grad_norm": 0.0008161990554071963, "learning_rate": 5.2403352793563466e-05, "loss": 0.30890114307403566, "step": 111390 }, { "epoch": 0.4782634828228708, "grad_norm": 0.07157833129167557, "learning_rate": 5.2399041073445844e-05, "loss": 0.3999182224273682, "step": 111400 }, { "epoch": 0.4783064149128908, "grad_norm": 0.011867137625813484, "learning_rate": 5.239472935332822e-05, "loss": 0.24128921031951905, "step": 111410 }, { "epoch": 0.4783493470029108, "grad_norm": 1.8518315553665161, "learning_rate": 5.23904176332106e-05, "loss": 0.22349026203155517, "step": 111420 }, { "epoch": 0.4783922790929308, "grad_norm": 0.2319876104593277, "learning_rate": 5.2386105913092976e-05, "loss": 0.26111981868743894, "step": 111430 }, { "epoch": 0.4784352111829508, "grad_norm": 0.006205259822309017, "learning_rate": 5.238179419297535e-05, "loss": 0.26369264125823977, "step": 111440 }, { "epoch": 0.47847814327297084, "grad_norm": 0.11651434004306793, "learning_rate": 5.237748247285772e-05, "loss": 0.3667259931564331, "step": 111450 }, { "epoch": 0.4785210753629908, "grad_norm": 0.014610327780246735, "learning_rate": 5.2373170752740094e-05, "loss": 0.1900045394897461, "step": 111460 }, { "epoch": 0.4785640074530108, "grad_norm": 0.023935405537486076, "learning_rate": 5.236885903262247e-05, "loss": 0.177459454536438, "step": 111470 }, { "epoch": 0.47860693954303085, "grad_norm": 7.586323261260986, "learning_rate": 5.2364547312504856e-05, "loss": 0.30872330665588377, "step": 111480 }, { "epoch": 0.47864987163305084, "grad_norm": 0.0036038036923855543, "learning_rate": 5.236023559238723e-05, "loss": 0.14827797412872315, "step": 111490 }, { "epoch": 0.4786928037230708, "grad_norm": 0.006241375580430031, "learning_rate": 5.235592387226961e-05, "loss": 0.16753029823303223, "step": 111500 }, { "epoch": 0.47873573581309087, "grad_norm": 1.5585517883300781, "learning_rate": 5.235161215215199e-05, "loss": 0.1405550241470337, "step": 111510 }, { "epoch": 0.47877866790311085, "grad_norm": 0.1075335368514061, "learning_rate": 5.2347300432034365e-05, "loss": 0.08326172828674316, "step": 111520 }, { "epoch": 0.47882159999313084, "grad_norm": 0.9561521410942078, "learning_rate": 5.234298871191673e-05, "loss": 0.18973840475082399, "step": 111530 }, { "epoch": 0.4788645320831509, "grad_norm": 0.9297080039978027, "learning_rate": 5.2338676991799106e-05, "loss": 0.13529865741729735, "step": 111540 }, { "epoch": 0.47890746417317087, "grad_norm": 10.015151977539062, "learning_rate": 5.2334365271681484e-05, "loss": 0.4769240379333496, "step": 111550 }, { "epoch": 0.47895039626319086, "grad_norm": 1.3597183227539062, "learning_rate": 5.233005355156386e-05, "loss": 0.30367794036865237, "step": 111560 }, { "epoch": 0.4789933283532109, "grad_norm": 0.005315948743373156, "learning_rate": 5.232574183144624e-05, "loss": 0.2673523187637329, "step": 111570 }, { "epoch": 0.4790362604432309, "grad_norm": 0.38153043389320374, "learning_rate": 5.2321430111328616e-05, "loss": 0.3261179685592651, "step": 111580 }, { "epoch": 0.47907919253325093, "grad_norm": 0.05338694155216217, "learning_rate": 5.231711839121099e-05, "loss": 0.1568708062171936, "step": 111590 }, { "epoch": 0.4791221246232709, "grad_norm": 0.016230806708335876, "learning_rate": 5.231280667109337e-05, "loss": 0.37447845935821533, "step": 111600 }, { "epoch": 0.4791650567132909, "grad_norm": 0.18410959839820862, "learning_rate": 5.230849495097574e-05, "loss": 0.05639318823814392, "step": 111610 }, { "epoch": 0.47920798880331095, "grad_norm": 1.1493514776229858, "learning_rate": 5.230418323085812e-05, "loss": 0.22289519309997557, "step": 111620 }, { "epoch": 0.47925092089333093, "grad_norm": 0.036008015275001526, "learning_rate": 5.2299871510740496e-05, "loss": 0.011840692907571792, "step": 111630 }, { "epoch": 0.4792938529833509, "grad_norm": 0.007809455972164869, "learning_rate": 5.229555979062287e-05, "loss": 0.09044709801673889, "step": 111640 }, { "epoch": 0.47933678507337096, "grad_norm": 0.03709058091044426, "learning_rate": 5.229124807050525e-05, "loss": 0.042278504371643065, "step": 111650 }, { "epoch": 0.47937971716339095, "grad_norm": 1.3962702751159668, "learning_rate": 5.228693635038763e-05, "loss": 0.3972173690795898, "step": 111660 }, { "epoch": 0.47942264925341094, "grad_norm": 1.8785520792007446, "learning_rate": 5.2282624630270005e-05, "loss": 0.3868700504302979, "step": 111670 }, { "epoch": 0.479465581343431, "grad_norm": 1.7913538217544556, "learning_rate": 5.227831291015238e-05, "loss": 0.11502017974853515, "step": 111680 }, { "epoch": 0.47950851343345097, "grad_norm": 0.012045775540173054, "learning_rate": 5.227400119003476e-05, "loss": 0.1622507929801941, "step": 111690 }, { "epoch": 0.47955144552347095, "grad_norm": 0.004656339529901743, "learning_rate": 5.226968946991713e-05, "loss": 0.1231924057006836, "step": 111700 }, { "epoch": 0.479594377613491, "grad_norm": 1.5003231763839722, "learning_rate": 5.226537774979951e-05, "loss": 0.31814677715301515, "step": 111710 }, { "epoch": 0.479637309703511, "grad_norm": 0.01938486658036709, "learning_rate": 5.2261066029681885e-05, "loss": 0.08459774255752564, "step": 111720 }, { "epoch": 0.47968024179353097, "grad_norm": 1.3566876649856567, "learning_rate": 5.225675430956426e-05, "loss": 0.07076024413108825, "step": 111730 }, { "epoch": 0.479723173883551, "grad_norm": 1.2478437423706055, "learning_rate": 5.225244258944664e-05, "loss": 0.2324007272720337, "step": 111740 }, { "epoch": 0.479766105973571, "grad_norm": 1.2657784223556519, "learning_rate": 5.224813086932902e-05, "loss": 0.20952317714691163, "step": 111750 }, { "epoch": 0.479809038063591, "grad_norm": 1.6232753992080688, "learning_rate": 5.2243819149211394e-05, "loss": 0.19485619068145751, "step": 111760 }, { "epoch": 0.47985197015361103, "grad_norm": 0.009501464664936066, "learning_rate": 5.223950742909377e-05, "loss": 0.20127930641174316, "step": 111770 }, { "epoch": 0.479894902243631, "grad_norm": 0.003903453005477786, "learning_rate": 5.2235195708976135e-05, "loss": 0.17025061845779418, "step": 111780 }, { "epoch": 0.479937834333651, "grad_norm": 0.023910705000162125, "learning_rate": 5.223088398885851e-05, "loss": 0.2176452398300171, "step": 111790 }, { "epoch": 0.47998076642367105, "grad_norm": 0.0150348711758852, "learning_rate": 5.222657226874089e-05, "loss": 0.15627822875976563, "step": 111800 }, { "epoch": 0.48002369851369103, "grad_norm": 0.006376425735652447, "learning_rate": 5.222226054862327e-05, "loss": 0.2449338436126709, "step": 111810 }, { "epoch": 0.4800666306037111, "grad_norm": 0.03453676775097847, "learning_rate": 5.2217948828505645e-05, "loss": 0.07500687837600709, "step": 111820 }, { "epoch": 0.48010956269373106, "grad_norm": 0.01049389410763979, "learning_rate": 5.221363710838802e-05, "loss": 0.38739445209503176, "step": 111830 }, { "epoch": 0.48015249478375105, "grad_norm": 0.003912179730832577, "learning_rate": 5.22093253882704e-05, "loss": 0.06761714220046997, "step": 111840 }, { "epoch": 0.4801954268737711, "grad_norm": 1.2374663352966309, "learning_rate": 5.2205013668152784e-05, "loss": 0.22694122791290283, "step": 111850 }, { "epoch": 0.4802383589637911, "grad_norm": 0.4927384853363037, "learning_rate": 5.220070194803515e-05, "loss": 0.02246246635913849, "step": 111860 }, { "epoch": 0.48028129105381107, "grad_norm": 54.35266876220703, "learning_rate": 5.2196390227917525e-05, "loss": 0.1613282561302185, "step": 111870 }, { "epoch": 0.4803242231438311, "grad_norm": 1.6677411794662476, "learning_rate": 5.21920785077999e-05, "loss": 0.036837369203567505, "step": 111880 }, { "epoch": 0.4803671552338511, "grad_norm": 0.15668274462223053, "learning_rate": 5.218776678768228e-05, "loss": 0.08144662380218506, "step": 111890 }, { "epoch": 0.4804100873238711, "grad_norm": 0.2714574933052063, "learning_rate": 5.218345506756466e-05, "loss": 0.1942346692085266, "step": 111900 }, { "epoch": 0.4804530194138911, "grad_norm": 3.1457133293151855, "learning_rate": 5.2179143347447034e-05, "loss": 0.3857900857925415, "step": 111910 }, { "epoch": 0.4804959515039111, "grad_norm": 2.2899222373962402, "learning_rate": 5.217483162732941e-05, "loss": 0.2843789577484131, "step": 111920 }, { "epoch": 0.4805388835939311, "grad_norm": 0.030405929312109947, "learning_rate": 5.217051990721179e-05, "loss": 0.08297332525253295, "step": 111930 }, { "epoch": 0.48058181568395114, "grad_norm": 0.010032770223915577, "learning_rate": 5.216620818709416e-05, "loss": 0.08996413946151734, "step": 111940 }, { "epoch": 0.48062474777397113, "grad_norm": 0.0011612273519858718, "learning_rate": 5.216189646697654e-05, "loss": 0.2334982395172119, "step": 111950 }, { "epoch": 0.4806676798639911, "grad_norm": 0.02428930439054966, "learning_rate": 5.2157584746858914e-05, "loss": 0.21660916805267333, "step": 111960 }, { "epoch": 0.48071061195401116, "grad_norm": 0.00992624368518591, "learning_rate": 5.215327302674129e-05, "loss": 0.08710908889770508, "step": 111970 }, { "epoch": 0.48075354404403114, "grad_norm": 1.4480361938476562, "learning_rate": 5.214896130662367e-05, "loss": 0.20752573013305664, "step": 111980 }, { "epoch": 0.48079647613405113, "grad_norm": 0.0048288386315107346, "learning_rate": 5.2144649586506046e-05, "loss": 0.25462770462036133, "step": 111990 }, { "epoch": 0.4808394082240712, "grad_norm": 3.1555609703063965, "learning_rate": 5.214033786638842e-05, "loss": 0.06526825428009034, "step": 112000 }, { "epoch": 0.4808394082240712, "eval_loss": 0.4083115756511688, "eval_runtime": 27.112, "eval_samples_per_second": 3.688, "eval_steps_per_second": 3.688, "step": 112000 }, { "epoch": 0.48088234031409116, "grad_norm": 0.0345025397837162, "learning_rate": 5.21360261462708e-05, "loss": 0.20930685997009277, "step": 112010 }, { "epoch": 0.4809252724041112, "grad_norm": 0.7449776530265808, "learning_rate": 5.213171442615318e-05, "loss": 0.521756362915039, "step": 112020 }, { "epoch": 0.4809682044941312, "grad_norm": 0.010897096246480942, "learning_rate": 5.212740270603554e-05, "loss": 0.06535886526107788, "step": 112030 }, { "epoch": 0.4810111365841512, "grad_norm": 1.3106818199157715, "learning_rate": 5.212309098591792e-05, "loss": 0.2568356037139893, "step": 112040 }, { "epoch": 0.4810540686741712, "grad_norm": 1.2762078046798706, "learning_rate": 5.2118779265800296e-05, "loss": 0.1292945384979248, "step": 112050 }, { "epoch": 0.4810970007641912, "grad_norm": 1.3804891109466553, "learning_rate": 5.2114467545682674e-05, "loss": 0.32599267959594724, "step": 112060 }, { "epoch": 0.4811399328542112, "grad_norm": 0.0030066012404859066, "learning_rate": 5.211015582556506e-05, "loss": 0.33525660037994387, "step": 112070 }, { "epoch": 0.48118286494423124, "grad_norm": 3.0664565563201904, "learning_rate": 5.2105844105447435e-05, "loss": 0.2135364294052124, "step": 112080 }, { "epoch": 0.4812257970342512, "grad_norm": 1.4243364334106445, "learning_rate": 5.210153238532981e-05, "loss": 0.33560497760772706, "step": 112090 }, { "epoch": 0.4812687291242712, "grad_norm": 6.101076126098633, "learning_rate": 5.209722066521219e-05, "loss": 0.43417649269104003, "step": 112100 }, { "epoch": 0.48131166121429125, "grad_norm": 0.004235812928527594, "learning_rate": 5.2092908945094554e-05, "loss": 0.2360063076019287, "step": 112110 }, { "epoch": 0.48135459330431124, "grad_norm": 3.2225868701934814, "learning_rate": 5.208859722497693e-05, "loss": 0.3149883270263672, "step": 112120 }, { "epoch": 0.4813975253943312, "grad_norm": 0.22777503728866577, "learning_rate": 5.208428550485931e-05, "loss": 0.11221933364868164, "step": 112130 }, { "epoch": 0.48144045748435127, "grad_norm": 0.0066849710419774055, "learning_rate": 5.2079973784741686e-05, "loss": 0.20609259605407715, "step": 112140 }, { "epoch": 0.48148338957437126, "grad_norm": 2.6385879516601562, "learning_rate": 5.207566206462406e-05, "loss": 0.10182238817214966, "step": 112150 }, { "epoch": 0.48152632166439124, "grad_norm": 6.075347900390625, "learning_rate": 5.207135034450644e-05, "loss": 0.1820530891418457, "step": 112160 }, { "epoch": 0.4815692537544113, "grad_norm": 0.19385981559753418, "learning_rate": 5.206703862438882e-05, "loss": 0.25064907073974607, "step": 112170 }, { "epoch": 0.4816121858444313, "grad_norm": 0.003163368906825781, "learning_rate": 5.2062726904271195e-05, "loss": 0.2682561159133911, "step": 112180 }, { "epoch": 0.48165511793445126, "grad_norm": 0.0014554493827745318, "learning_rate": 5.2058415184153566e-05, "loss": 0.07060363292694091, "step": 112190 }, { "epoch": 0.4816980500244713, "grad_norm": 1.6436724662780762, "learning_rate": 5.205410346403594e-05, "loss": 0.2483814001083374, "step": 112200 }, { "epoch": 0.4817409821144913, "grad_norm": 0.10719986259937286, "learning_rate": 5.204979174391832e-05, "loss": 0.10259265899658203, "step": 112210 }, { "epoch": 0.4817839142045113, "grad_norm": 0.07681338489055634, "learning_rate": 5.20454800238007e-05, "loss": 0.1327905535697937, "step": 112220 }, { "epoch": 0.4818268462945313, "grad_norm": 2.0866005420684814, "learning_rate": 5.2041168303683075e-05, "loss": 0.16332383155822755, "step": 112230 }, { "epoch": 0.4818697783845513, "grad_norm": 0.059452347457408905, "learning_rate": 5.203685658356545e-05, "loss": 0.25697765350341795, "step": 112240 }, { "epoch": 0.48191271047457135, "grad_norm": 0.689960777759552, "learning_rate": 5.203254486344783e-05, "loss": 0.22725882530212402, "step": 112250 }, { "epoch": 0.48195564256459134, "grad_norm": 2.9958412647247314, "learning_rate": 5.202823314333021e-05, "loss": 0.11253809928894043, "step": 112260 }, { "epoch": 0.4819985746546113, "grad_norm": 0.010006138123571873, "learning_rate": 5.202392142321257e-05, "loss": 0.2789454936981201, "step": 112270 }, { "epoch": 0.48204150674463137, "grad_norm": 29.61517906188965, "learning_rate": 5.201960970309495e-05, "loss": 0.29062979221343993, "step": 112280 }, { "epoch": 0.48208443883465135, "grad_norm": 1.6461533308029175, "learning_rate": 5.201529798297733e-05, "loss": 0.26518611907958983, "step": 112290 }, { "epoch": 0.48212737092467134, "grad_norm": 0.8490138053894043, "learning_rate": 5.201098626285971e-05, "loss": 0.14062355756759642, "step": 112300 }, { "epoch": 0.4821703030146914, "grad_norm": 1.953096628189087, "learning_rate": 5.200667454274209e-05, "loss": 0.2001904010772705, "step": 112310 }, { "epoch": 0.48221323510471137, "grad_norm": 0.07899369299411774, "learning_rate": 5.2002362822624464e-05, "loss": 0.078703773021698, "step": 112320 }, { "epoch": 0.48225616719473136, "grad_norm": 0.0437178909778595, "learning_rate": 5.199805110250684e-05, "loss": 0.08275541067123413, "step": 112330 }, { "epoch": 0.4822990992847514, "grad_norm": 0.14887292683124542, "learning_rate": 5.199373938238922e-05, "loss": 0.15887333154678346, "step": 112340 }, { "epoch": 0.4823420313747714, "grad_norm": 0.015832485631108284, "learning_rate": 5.198942766227158e-05, "loss": 0.2449730396270752, "step": 112350 }, { "epoch": 0.48238496346479137, "grad_norm": 1.3846042156219482, "learning_rate": 5.198511594215396e-05, "loss": 0.4144141674041748, "step": 112360 }, { "epoch": 0.4824278955548114, "grad_norm": 0.39883336424827576, "learning_rate": 5.198080422203634e-05, "loss": 0.17197943925857545, "step": 112370 }, { "epoch": 0.4824708276448314, "grad_norm": 0.035733215510845184, "learning_rate": 5.1976492501918715e-05, "loss": 0.2461705446243286, "step": 112380 }, { "epoch": 0.4825137597348514, "grad_norm": 4.164612293243408, "learning_rate": 5.197218078180109e-05, "loss": 0.0967523694038391, "step": 112390 }, { "epoch": 0.48255669182487143, "grad_norm": 1.2880102396011353, "learning_rate": 5.196786906168347e-05, "loss": 0.10995590686798096, "step": 112400 }, { "epoch": 0.4825996239148914, "grad_norm": 22.918487548828125, "learning_rate": 5.196355734156585e-05, "loss": 0.14426627159118652, "step": 112410 }, { "epoch": 0.4826425560049114, "grad_norm": 0.009621814824640751, "learning_rate": 5.1959245621448224e-05, "loss": 0.05674814581871033, "step": 112420 }, { "epoch": 0.48268548809493145, "grad_norm": 1.0595000982284546, "learning_rate": 5.19549339013306e-05, "loss": 0.17759565114974976, "step": 112430 }, { "epoch": 0.48272842018495143, "grad_norm": 0.08510831743478775, "learning_rate": 5.195062218121297e-05, "loss": 0.4445448875427246, "step": 112440 }, { "epoch": 0.4827713522749715, "grad_norm": 28.908931732177734, "learning_rate": 5.194631046109535e-05, "loss": 0.34495205879211427, "step": 112450 }, { "epoch": 0.48281428436499146, "grad_norm": 0.07188957184553146, "learning_rate": 5.194199874097773e-05, "loss": 0.23050713539123535, "step": 112460 }, { "epoch": 0.48285721645501145, "grad_norm": 0.01424756832420826, "learning_rate": 5.1937687020860104e-05, "loss": 0.14939489364624023, "step": 112470 }, { "epoch": 0.4829001485450315, "grad_norm": 1.1364645957946777, "learning_rate": 5.193337530074248e-05, "loss": 0.3412284851074219, "step": 112480 }, { "epoch": 0.4829430806350515, "grad_norm": 0.003248439868912101, "learning_rate": 5.192906358062486e-05, "loss": 0.1077422022819519, "step": 112490 }, { "epoch": 0.48298601272507147, "grad_norm": 0.3519462049007416, "learning_rate": 5.1924751860507236e-05, "loss": 0.27584822177886964, "step": 112500 }, { "epoch": 0.4830289448150915, "grad_norm": 0.0038426381070166826, "learning_rate": 5.1920440140389614e-05, "loss": 0.19264932870864868, "step": 112510 }, { "epoch": 0.4830718769051115, "grad_norm": 0.17295725643634796, "learning_rate": 5.1916128420271984e-05, "loss": 0.4067643642425537, "step": 112520 }, { "epoch": 0.4831148089951315, "grad_norm": 0.014537591487169266, "learning_rate": 5.191181670015436e-05, "loss": 0.12884639501571654, "step": 112530 }, { "epoch": 0.4831577410851515, "grad_norm": 3.0052669048309326, "learning_rate": 5.190750498003674e-05, "loss": 0.16474716663360595, "step": 112540 }, { "epoch": 0.4832006731751715, "grad_norm": 0.0008527276222594082, "learning_rate": 5.1903193259919116e-05, "loss": 0.15970051288604736, "step": 112550 }, { "epoch": 0.4832436052651915, "grad_norm": 3.4651949405670166, "learning_rate": 5.1898881539801494e-05, "loss": 0.3635735273361206, "step": 112560 }, { "epoch": 0.48328653735521154, "grad_norm": 0.04663752019405365, "learning_rate": 5.189456981968387e-05, "loss": 0.16978216171264648, "step": 112570 }, { "epoch": 0.48332946944523153, "grad_norm": 0.04115908965468407, "learning_rate": 5.189025809956625e-05, "loss": 0.18163001537322998, "step": 112580 }, { "epoch": 0.4833724015352515, "grad_norm": 0.003294061403721571, "learning_rate": 5.1885946379448626e-05, "loss": 0.21141493320465088, "step": 112590 }, { "epoch": 0.48341533362527156, "grad_norm": 0.26114872097969055, "learning_rate": 5.188163465933099e-05, "loss": 0.2178705930709839, "step": 112600 }, { "epoch": 0.48345826571529155, "grad_norm": 0.08138860762119293, "learning_rate": 5.187732293921337e-05, "loss": 0.1656543493270874, "step": 112610 }, { "epoch": 0.48350119780531153, "grad_norm": 0.029856372624635696, "learning_rate": 5.1873011219095744e-05, "loss": 0.27136709690093996, "step": 112620 }, { "epoch": 0.4835441298953316, "grad_norm": 0.8668439388275146, "learning_rate": 5.186869949897812e-05, "loss": 0.06196349859237671, "step": 112630 }, { "epoch": 0.48358706198535156, "grad_norm": 0.5561325550079346, "learning_rate": 5.18643877788605e-05, "loss": 0.05971835851669312, "step": 112640 }, { "epoch": 0.48362999407537155, "grad_norm": 0.10063795745372772, "learning_rate": 5.1860076058742876e-05, "loss": 0.21844873428344727, "step": 112650 }, { "epoch": 0.4836729261653916, "grad_norm": 0.0034989193081855774, "learning_rate": 5.1855764338625253e-05, "loss": 0.1683848023414612, "step": 112660 }, { "epoch": 0.4837158582554116, "grad_norm": 0.005735354032367468, "learning_rate": 5.185145261850764e-05, "loss": 0.05226213932037353, "step": 112670 }, { "epoch": 0.4837587903454316, "grad_norm": 0.01305412221699953, "learning_rate": 5.184714089839e-05, "loss": 0.1914324641227722, "step": 112680 }, { "epoch": 0.4838017224354516, "grad_norm": 0.005273500457406044, "learning_rate": 5.184282917827238e-05, "loss": 0.09522543549537658, "step": 112690 }, { "epoch": 0.4838446545254716, "grad_norm": 1.982298731803894, "learning_rate": 5.1838517458154756e-05, "loss": 0.1866297483444214, "step": 112700 }, { "epoch": 0.48388758661549164, "grad_norm": 1.0073444843292236, "learning_rate": 5.183420573803713e-05, "loss": 0.1342653751373291, "step": 112710 }, { "epoch": 0.4839305187055116, "grad_norm": 2.56182599067688, "learning_rate": 5.182989401791951e-05, "loss": 0.5220149040222168, "step": 112720 }, { "epoch": 0.4839734507955316, "grad_norm": 0.007171725854277611, "learning_rate": 5.182558229780189e-05, "loss": 0.1385490655899048, "step": 112730 }, { "epoch": 0.48401638288555165, "grad_norm": 0.1748906672000885, "learning_rate": 5.1821270577684265e-05, "loss": 0.19893691539764405, "step": 112740 }, { "epoch": 0.48405931497557164, "grad_norm": 3.96621036529541, "learning_rate": 5.181695885756664e-05, "loss": 0.4268038272857666, "step": 112750 }, { "epoch": 0.48410224706559163, "grad_norm": 0.04518533870577812, "learning_rate": 5.181264713744902e-05, "loss": 0.04139855802059174, "step": 112760 }, { "epoch": 0.48414517915561167, "grad_norm": 0.47020474076271057, "learning_rate": 5.180833541733139e-05, "loss": 0.20869805812835693, "step": 112770 }, { "epoch": 0.48418811124563166, "grad_norm": 0.0010732099181041121, "learning_rate": 5.180402369721377e-05, "loss": 0.18752647638320924, "step": 112780 }, { "epoch": 0.48423104333565165, "grad_norm": 0.10524271428585052, "learning_rate": 5.1799711977096145e-05, "loss": 0.14028940200805665, "step": 112790 }, { "epoch": 0.4842739754256717, "grad_norm": 0.10299362242221832, "learning_rate": 5.179540025697852e-05, "loss": 0.2775475740432739, "step": 112800 }, { "epoch": 0.4843169075156917, "grad_norm": 0.15372726321220398, "learning_rate": 5.17910885368609e-05, "loss": 0.22584834098815917, "step": 112810 }, { "epoch": 0.48435983960571166, "grad_norm": 0.01749584637582302, "learning_rate": 5.178677681674328e-05, "loss": 0.31413612365722654, "step": 112820 }, { "epoch": 0.4844027716957317, "grad_norm": 0.005402527749538422, "learning_rate": 5.1782465096625655e-05, "loss": 0.3106074810028076, "step": 112830 }, { "epoch": 0.4844457037857517, "grad_norm": 2.2965476512908936, "learning_rate": 5.177815337650803e-05, "loss": 0.2918811798095703, "step": 112840 }, { "epoch": 0.4844886358757717, "grad_norm": 0.01444154977798462, "learning_rate": 5.1773841656390396e-05, "loss": 0.1380111336708069, "step": 112850 }, { "epoch": 0.4845315679657917, "grad_norm": 0.0188368521630764, "learning_rate": 5.176952993627277e-05, "loss": 0.20080742835998536, "step": 112860 }, { "epoch": 0.4845745000558117, "grad_norm": 0.36231333017349243, "learning_rate": 5.176521821615515e-05, "loss": 0.223512601852417, "step": 112870 }, { "epoch": 0.48461743214583175, "grad_norm": 0.07586616277694702, "learning_rate": 5.176090649603753e-05, "loss": 0.07849234938621522, "step": 112880 }, { "epoch": 0.48466036423585174, "grad_norm": 0.36311033368110657, "learning_rate": 5.175659477591991e-05, "loss": 0.28073141574859617, "step": 112890 }, { "epoch": 0.4847032963258717, "grad_norm": 0.022957701236009598, "learning_rate": 5.175228305580229e-05, "loss": 0.31559345722198484, "step": 112900 }, { "epoch": 0.48474622841589177, "grad_norm": 1.596835970878601, "learning_rate": 5.174797133568467e-05, "loss": 0.2369527578353882, "step": 112910 }, { "epoch": 0.48478916050591175, "grad_norm": 0.09815307706594467, "learning_rate": 5.1743659615567044e-05, "loss": 0.3749623537063599, "step": 112920 }, { "epoch": 0.48483209259593174, "grad_norm": 0.024481289088726044, "learning_rate": 5.173934789544941e-05, "loss": 0.10471458435058593, "step": 112930 }, { "epoch": 0.4848750246859518, "grad_norm": 2.419257164001465, "learning_rate": 5.1735036175331785e-05, "loss": 0.15412943363189696, "step": 112940 }, { "epoch": 0.48491795677597177, "grad_norm": 0.015406905673444271, "learning_rate": 5.173072445521416e-05, "loss": 0.1519034743309021, "step": 112950 }, { "epoch": 0.48496088886599176, "grad_norm": 0.03381875902414322, "learning_rate": 5.172641273509654e-05, "loss": 0.40987367630004884, "step": 112960 }, { "epoch": 0.4850038209560118, "grad_norm": 0.03254738822579384, "learning_rate": 5.172210101497892e-05, "loss": 0.1492979645729065, "step": 112970 }, { "epoch": 0.4850467530460318, "grad_norm": 0.060563910752534866, "learning_rate": 5.1717789294861295e-05, "loss": 0.22253785133361817, "step": 112980 }, { "epoch": 0.4850896851360518, "grad_norm": 0.05428497865796089, "learning_rate": 5.171347757474367e-05, "loss": 0.1761980414390564, "step": 112990 }, { "epoch": 0.4851326172260718, "grad_norm": 7.6052141189575195, "learning_rate": 5.170916585462605e-05, "loss": 0.02835783362388611, "step": 113000 }, { "epoch": 0.4851326172260718, "eval_loss": 0.40955492854118347, "eval_runtime": 27.2062, "eval_samples_per_second": 3.676, "eval_steps_per_second": 3.676, "step": 113000 }, { "epoch": 0.4851755493160918, "grad_norm": 0.03771714121103287, "learning_rate": 5.170485413450842e-05, "loss": 0.18397490978240966, "step": 113010 }, { "epoch": 0.4852184814061118, "grad_norm": 0.0032267896458506584, "learning_rate": 5.17005424143908e-05, "loss": 0.044160327315330504, "step": 113020 }, { "epoch": 0.48526141349613183, "grad_norm": 1.068603754043579, "learning_rate": 5.1696230694273174e-05, "loss": 0.15256195068359374, "step": 113030 }, { "epoch": 0.4853043455861518, "grad_norm": 6.162492275238037, "learning_rate": 5.169191897415555e-05, "loss": 0.18141252994537355, "step": 113040 }, { "epoch": 0.4853472776761718, "grad_norm": 0.0070702810771763325, "learning_rate": 5.168760725403793e-05, "loss": 0.49767394065856935, "step": 113050 }, { "epoch": 0.48539020976619185, "grad_norm": 0.0628747045993805, "learning_rate": 5.1683295533920306e-05, "loss": 0.1891782522201538, "step": 113060 }, { "epoch": 0.48543314185621184, "grad_norm": 0.00538325309753418, "learning_rate": 5.1678983813802684e-05, "loss": 0.12313296794891357, "step": 113070 }, { "epoch": 0.4854760739462318, "grad_norm": 0.3890695869922638, "learning_rate": 5.167467209368506e-05, "loss": 0.007072269916534424, "step": 113080 }, { "epoch": 0.48551900603625187, "grad_norm": 0.1812918335199356, "learning_rate": 5.1670360373567425e-05, "loss": 0.16834324598312378, "step": 113090 }, { "epoch": 0.48556193812627185, "grad_norm": 1.4702471494674683, "learning_rate": 5.16660486534498e-05, "loss": 0.11569063663482666, "step": 113100 }, { "epoch": 0.4856048702162919, "grad_norm": 0.1837807446718216, "learning_rate": 5.1661736933332186e-05, "loss": 0.12595237493515016, "step": 113110 }, { "epoch": 0.4856478023063119, "grad_norm": 3.104374885559082, "learning_rate": 5.1657425213214564e-05, "loss": 0.2997664213180542, "step": 113120 }, { "epoch": 0.48569073439633187, "grad_norm": 0.06540167331695557, "learning_rate": 5.165311349309694e-05, "loss": 0.15600260496139526, "step": 113130 }, { "epoch": 0.4857336664863519, "grad_norm": 2.046344041824341, "learning_rate": 5.164880177297932e-05, "loss": 0.1797309160232544, "step": 113140 }, { "epoch": 0.4857765985763719, "grad_norm": 0.003572971560060978, "learning_rate": 5.1644490052861696e-05, "loss": 0.2054067850112915, "step": 113150 }, { "epoch": 0.4858195306663919, "grad_norm": 0.017250074073672295, "learning_rate": 5.164017833274407e-05, "loss": 0.40507144927978517, "step": 113160 }, { "epoch": 0.48586246275641193, "grad_norm": 0.5369052290916443, "learning_rate": 5.163586661262645e-05, "loss": 0.448713493347168, "step": 113170 }, { "epoch": 0.4859053948464319, "grad_norm": 0.4330086410045624, "learning_rate": 5.1631554892508814e-05, "loss": 0.1150307297706604, "step": 113180 }, { "epoch": 0.4859483269364519, "grad_norm": 0.06005656346678734, "learning_rate": 5.162724317239119e-05, "loss": 0.5557120323181153, "step": 113190 }, { "epoch": 0.48599125902647194, "grad_norm": 0.04298854619264603, "learning_rate": 5.162293145227357e-05, "loss": 0.14865396022796631, "step": 113200 }, { "epoch": 0.48603419111649193, "grad_norm": 0.14638099074363708, "learning_rate": 5.1618619732155946e-05, "loss": 0.14868471622467042, "step": 113210 }, { "epoch": 0.4860771232065119, "grad_norm": 0.03624096140265465, "learning_rate": 5.1614308012038324e-05, "loss": 0.20731801986694337, "step": 113220 }, { "epoch": 0.48612005529653196, "grad_norm": 0.004342042841017246, "learning_rate": 5.16099962919207e-05, "loss": 0.11572116613388062, "step": 113230 }, { "epoch": 0.48616298738655195, "grad_norm": 1.1985704898834229, "learning_rate": 5.160568457180308e-05, "loss": 0.11593867540359497, "step": 113240 }, { "epoch": 0.48620591947657193, "grad_norm": 0.6859614849090576, "learning_rate": 5.1601372851685456e-05, "loss": 0.17292320728302002, "step": 113250 }, { "epoch": 0.486248851566592, "grad_norm": 1.7073107957839966, "learning_rate": 5.1597061131567826e-05, "loss": 0.32567050457000735, "step": 113260 }, { "epoch": 0.48629178365661196, "grad_norm": 1.3878463506698608, "learning_rate": 5.1592749411450204e-05, "loss": 0.32795984745025636, "step": 113270 }, { "epoch": 0.48633471574663195, "grad_norm": 0.05851823836565018, "learning_rate": 5.158843769133258e-05, "loss": 0.1408895492553711, "step": 113280 }, { "epoch": 0.486377647836652, "grad_norm": 0.006685543339699507, "learning_rate": 5.158412597121496e-05, "loss": 0.16352103948593139, "step": 113290 }, { "epoch": 0.486420579926672, "grad_norm": 0.2041519284248352, "learning_rate": 5.1579814251097336e-05, "loss": 0.3879366397857666, "step": 113300 }, { "epoch": 0.486463512016692, "grad_norm": 1.1903445720672607, "learning_rate": 5.157550253097971e-05, "loss": 0.3377037525177002, "step": 113310 }, { "epoch": 0.486506444106712, "grad_norm": 2.3848395347595215, "learning_rate": 5.157119081086209e-05, "loss": 0.18910210132598876, "step": 113320 }, { "epoch": 0.486549376196732, "grad_norm": 1.192986249923706, "learning_rate": 5.156687909074447e-05, "loss": 0.24098713397979737, "step": 113330 }, { "epoch": 0.48659230828675204, "grad_norm": 0.9725649356842041, "learning_rate": 5.156256737062684e-05, "loss": 0.3244537115097046, "step": 113340 }, { "epoch": 0.486635240376772, "grad_norm": 0.1657193899154663, "learning_rate": 5.1558255650509216e-05, "loss": 0.12726542949676514, "step": 113350 }, { "epoch": 0.486678172466792, "grad_norm": 0.17325559258460999, "learning_rate": 5.155394393039159e-05, "loss": 0.2331669569015503, "step": 113360 }, { "epoch": 0.48672110455681206, "grad_norm": 1.8775490522384644, "learning_rate": 5.154963221027397e-05, "loss": 0.15301434993743895, "step": 113370 }, { "epoch": 0.48676403664683204, "grad_norm": 0.09411582350730896, "learning_rate": 5.154532049015635e-05, "loss": 0.15416059494018555, "step": 113380 }, { "epoch": 0.48680696873685203, "grad_norm": 2.0111148357391357, "learning_rate": 5.1541008770038725e-05, "loss": 0.3471565008163452, "step": 113390 }, { "epoch": 0.4868499008268721, "grad_norm": 0.0025355094112455845, "learning_rate": 5.15366970499211e-05, "loss": 0.38851518630981446, "step": 113400 }, { "epoch": 0.48689283291689206, "grad_norm": 0.3443334996700287, "learning_rate": 5.153238532980348e-05, "loss": 0.2016951322555542, "step": 113410 }, { "epoch": 0.48693576500691205, "grad_norm": 0.005935294553637505, "learning_rate": 5.1528073609685843e-05, "loss": 0.3032489538192749, "step": 113420 }, { "epoch": 0.4869786970969321, "grad_norm": 5.571764945983887, "learning_rate": 5.152376188956822e-05, "loss": 0.32014267444610595, "step": 113430 }, { "epoch": 0.4870216291869521, "grad_norm": 0.26585423946380615, "learning_rate": 5.15194501694506e-05, "loss": 0.09606272578239441, "step": 113440 }, { "epoch": 0.48706456127697206, "grad_norm": 5.361762523651123, "learning_rate": 5.1515138449332975e-05, "loss": 0.3446836233139038, "step": 113450 }, { "epoch": 0.4871074933669921, "grad_norm": 1.5857845544815063, "learning_rate": 5.151082672921535e-05, "loss": 0.08704413771629334, "step": 113460 }, { "epoch": 0.4871504254570121, "grad_norm": 0.07909522950649261, "learning_rate": 5.150651500909773e-05, "loss": 0.10852447748184205, "step": 113470 }, { "epoch": 0.4871933575470321, "grad_norm": 0.011265222914516926, "learning_rate": 5.1502203288980114e-05, "loss": 0.03509455025196075, "step": 113480 }, { "epoch": 0.4872362896370521, "grad_norm": 0.5867186784744263, "learning_rate": 5.149789156886249e-05, "loss": 0.2819956302642822, "step": 113490 }, { "epoch": 0.4872792217270721, "grad_norm": 0.015352309681475163, "learning_rate": 5.149357984874487e-05, "loss": 0.17799873352050782, "step": 113500 }, { "epoch": 0.4873221538170921, "grad_norm": 3.170710802078247, "learning_rate": 5.148926812862723e-05, "loss": 0.2149796962738037, "step": 113510 }, { "epoch": 0.48736508590711214, "grad_norm": 0.02258378453552723, "learning_rate": 5.148495640850961e-05, "loss": 0.16186734437942504, "step": 113520 }, { "epoch": 0.4874080179971321, "grad_norm": 0.015534556470811367, "learning_rate": 5.148064468839199e-05, "loss": 0.281885027885437, "step": 113530 }, { "epoch": 0.48745095008715217, "grad_norm": 0.7893404364585876, "learning_rate": 5.1476332968274365e-05, "loss": 0.13327269554138182, "step": 113540 }, { "epoch": 0.48749388217717216, "grad_norm": 0.028485532850027084, "learning_rate": 5.147202124815674e-05, "loss": 0.27498056888580324, "step": 113550 }, { "epoch": 0.48753681426719214, "grad_norm": 3.629746437072754, "learning_rate": 5.146770952803912e-05, "loss": 0.19867525100708008, "step": 113560 }, { "epoch": 0.4875797463572122, "grad_norm": 1.8920320272445679, "learning_rate": 5.14633978079215e-05, "loss": 0.30650577545166013, "step": 113570 }, { "epoch": 0.48762267844723217, "grad_norm": 3.0940048694610596, "learning_rate": 5.1459086087803874e-05, "loss": 0.19545323848724366, "step": 113580 }, { "epoch": 0.48766561053725216, "grad_norm": 0.041216179728507996, "learning_rate": 5.1454774367686245e-05, "loss": 0.2117311477661133, "step": 113590 }, { "epoch": 0.4877085426272722, "grad_norm": 0.07402265071868896, "learning_rate": 5.145046264756862e-05, "loss": 0.21154932975769042, "step": 113600 }, { "epoch": 0.4877514747172922, "grad_norm": 0.004615292884409428, "learning_rate": 5.1446150927451e-05, "loss": 0.2221027135848999, "step": 113610 }, { "epoch": 0.4877944068073122, "grad_norm": 0.05810505896806717, "learning_rate": 5.144183920733338e-05, "loss": 0.12148820161819458, "step": 113620 }, { "epoch": 0.4878373388973322, "grad_norm": 0.13232535123825073, "learning_rate": 5.1437527487215754e-05, "loss": 0.3908156633377075, "step": 113630 }, { "epoch": 0.4878802709873522, "grad_norm": 0.018233170732855797, "learning_rate": 5.143321576709813e-05, "loss": 0.19533765316009521, "step": 113640 }, { "epoch": 0.4879232030773722, "grad_norm": 0.09772051870822906, "learning_rate": 5.142890404698051e-05, "loss": 0.2228538751602173, "step": 113650 }, { "epoch": 0.48796613516739223, "grad_norm": 0.013814345002174377, "learning_rate": 5.1424592326862886e-05, "loss": 0.40313119888305665, "step": 113660 }, { "epoch": 0.4880090672574122, "grad_norm": 0.018356265500187874, "learning_rate": 5.142028060674525e-05, "loss": 0.3107442855834961, "step": 113670 }, { "epoch": 0.4880519993474322, "grad_norm": 1.6710891723632812, "learning_rate": 5.141596888662763e-05, "loss": 0.11449255943298339, "step": 113680 }, { "epoch": 0.48809493143745225, "grad_norm": 0.003830577014014125, "learning_rate": 5.1411657166510005e-05, "loss": 0.1889503240585327, "step": 113690 }, { "epoch": 0.48813786352747224, "grad_norm": 0.1663198471069336, "learning_rate": 5.140734544639239e-05, "loss": 0.2108973741531372, "step": 113700 }, { "epoch": 0.4881807956174922, "grad_norm": 0.09352283924818039, "learning_rate": 5.1403033726274766e-05, "loss": 0.342281436920166, "step": 113710 }, { "epoch": 0.48822372770751227, "grad_norm": 16.00942039489746, "learning_rate": 5.139872200615714e-05, "loss": 0.16771682500839233, "step": 113720 }, { "epoch": 0.48826665979753225, "grad_norm": 0.3389633595943451, "learning_rate": 5.139441028603952e-05, "loss": 0.2200084686279297, "step": 113730 }, { "epoch": 0.4883095918875523, "grad_norm": 0.07837370038032532, "learning_rate": 5.13900985659219e-05, "loss": 0.10341674089431763, "step": 113740 }, { "epoch": 0.4883525239775723, "grad_norm": 0.8534498810768127, "learning_rate": 5.138578684580426e-05, "loss": 0.19216973781585694, "step": 113750 }, { "epoch": 0.48839545606759227, "grad_norm": 0.02206382155418396, "learning_rate": 5.138147512568664e-05, "loss": 0.23950448036193847, "step": 113760 }, { "epoch": 0.4884383881576123, "grad_norm": 0.008670583367347717, "learning_rate": 5.1377163405569017e-05, "loss": 0.4655170440673828, "step": 113770 }, { "epoch": 0.4884813202476323, "grad_norm": 1.3391717672348022, "learning_rate": 5.1372851685451394e-05, "loss": 0.3201841115951538, "step": 113780 }, { "epoch": 0.4885242523376523, "grad_norm": 6.023839473724365, "learning_rate": 5.136853996533377e-05, "loss": 0.3036703586578369, "step": 113790 }, { "epoch": 0.48856718442767233, "grad_norm": 0.001943222712725401, "learning_rate": 5.136422824521615e-05, "loss": 0.34198386669158937, "step": 113800 }, { "epoch": 0.4886101165176923, "grad_norm": 3.020825147628784, "learning_rate": 5.1359916525098526e-05, "loss": 0.10895280838012696, "step": 113810 }, { "epoch": 0.4886530486077123, "grad_norm": 0.034754056483507156, "learning_rate": 5.13556048049809e-05, "loss": 0.055017054080963135, "step": 113820 }, { "epoch": 0.48869598069773235, "grad_norm": 1.155048131942749, "learning_rate": 5.1351293084863274e-05, "loss": 0.11602849960327148, "step": 113830 }, { "epoch": 0.48873891278775233, "grad_norm": 0.03798946365714073, "learning_rate": 5.134698136474565e-05, "loss": 0.09914991855621338, "step": 113840 }, { "epoch": 0.4887818448777723, "grad_norm": 0.05045421048998833, "learning_rate": 5.134266964462803e-05, "loss": 0.18398324251174927, "step": 113850 }, { "epoch": 0.48882477696779236, "grad_norm": 1.2780303955078125, "learning_rate": 5.1338357924510406e-05, "loss": 0.11606618165969848, "step": 113860 }, { "epoch": 0.48886770905781235, "grad_norm": 0.3048155605792999, "learning_rate": 5.133404620439278e-05, "loss": 0.3404239177703857, "step": 113870 }, { "epoch": 0.48891064114783234, "grad_norm": 1.7724405527114868, "learning_rate": 5.132973448427516e-05, "loss": 0.3235398530960083, "step": 113880 }, { "epoch": 0.4889535732378524, "grad_norm": 2.0320141315460205, "learning_rate": 5.132542276415754e-05, "loss": 0.4618537902832031, "step": 113890 }, { "epoch": 0.48899650532787237, "grad_norm": 0.7048388719558716, "learning_rate": 5.1321111044039915e-05, "loss": 0.26695349216461184, "step": 113900 }, { "epoch": 0.48903943741789235, "grad_norm": 0.5743983387947083, "learning_rate": 5.131679932392229e-05, "loss": 0.034299665689468385, "step": 113910 }, { "epoch": 0.4890823695079124, "grad_norm": 0.0030826705042272806, "learning_rate": 5.131248760380466e-05, "loss": 0.25197324752807615, "step": 113920 }, { "epoch": 0.4891253015979324, "grad_norm": 0.0842275321483612, "learning_rate": 5.130817588368704e-05, "loss": 0.2050330400466919, "step": 113930 }, { "epoch": 0.48916823368795237, "grad_norm": 0.2123071551322937, "learning_rate": 5.130386416356942e-05, "loss": 0.3663323402404785, "step": 113940 }, { "epoch": 0.4892111657779724, "grad_norm": 0.0033182615879923105, "learning_rate": 5.1299552443451795e-05, "loss": 0.2355727195739746, "step": 113950 }, { "epoch": 0.4892540978679924, "grad_norm": 0.00881041120737791, "learning_rate": 5.129524072333417e-05, "loss": 0.1516958475112915, "step": 113960 }, { "epoch": 0.48929702995801244, "grad_norm": 1.4162713289260864, "learning_rate": 5.129092900321655e-05, "loss": 0.28292834758758545, "step": 113970 }, { "epoch": 0.48933996204803243, "grad_norm": 0.005841487552970648, "learning_rate": 5.128661728309893e-05, "loss": 0.19476345777511597, "step": 113980 }, { "epoch": 0.4893828941380524, "grad_norm": 0.00311907846480608, "learning_rate": 5.1282305562981305e-05, "loss": 0.19814854860305786, "step": 113990 }, { "epoch": 0.48942582622807246, "grad_norm": 5.6313557624816895, "learning_rate": 5.127799384286367e-05, "loss": 0.23403961658477784, "step": 114000 }, { "epoch": 0.48942582622807246, "eval_loss": 0.40528884530067444, "eval_runtime": 27.1409, "eval_samples_per_second": 3.684, "eval_steps_per_second": 3.684, "step": 114000 }, { "epoch": 0.48946875831809245, "grad_norm": 1.033115267753601, "learning_rate": 5.1273682122746046e-05, "loss": 0.35855731964111326, "step": 114010 }, { "epoch": 0.48951169040811243, "grad_norm": 0.8171420097351074, "learning_rate": 5.126937040262842e-05, "loss": 0.1914085030555725, "step": 114020 }, { "epoch": 0.4895546224981325, "grad_norm": 0.03207477182149887, "learning_rate": 5.12650586825108e-05, "loss": 0.21447515487670898, "step": 114030 }, { "epoch": 0.48959755458815246, "grad_norm": 0.037823960185050964, "learning_rate": 5.126074696239318e-05, "loss": 0.24335236549377443, "step": 114040 }, { "epoch": 0.48964048667817245, "grad_norm": 5.856334686279297, "learning_rate": 5.1256435242275555e-05, "loss": 0.20237469673156738, "step": 114050 }, { "epoch": 0.4896834187681925, "grad_norm": 0.7282851338386536, "learning_rate": 5.125212352215793e-05, "loss": 0.15984612703323364, "step": 114060 }, { "epoch": 0.4897263508582125, "grad_norm": 0.05635266751050949, "learning_rate": 5.124781180204031e-05, "loss": 0.14566596746444702, "step": 114070 }, { "epoch": 0.48976928294823247, "grad_norm": 0.4587521255016327, "learning_rate": 5.124350008192268e-05, "loss": 0.03238787353038788, "step": 114080 }, { "epoch": 0.4898122150382525, "grad_norm": 0.017040731385350227, "learning_rate": 5.123918836180506e-05, "loss": 0.0728563904762268, "step": 114090 }, { "epoch": 0.4898551471282725, "grad_norm": 1.607393503189087, "learning_rate": 5.1234876641687435e-05, "loss": 0.05377238988876343, "step": 114100 }, { "epoch": 0.4898980792182925, "grad_norm": 0.1536511927843094, "learning_rate": 5.123056492156981e-05, "loss": 0.25159103870391847, "step": 114110 }, { "epoch": 0.4899410113083125, "grad_norm": 0.09835078567266464, "learning_rate": 5.122625320145219e-05, "loss": 0.1219517469406128, "step": 114120 }, { "epoch": 0.4899839433983325, "grad_norm": 0.20577646791934967, "learning_rate": 5.122194148133457e-05, "loss": 0.19758754968643188, "step": 114130 }, { "epoch": 0.4900268754883525, "grad_norm": 0.0003962178307119757, "learning_rate": 5.1217629761216944e-05, "loss": 0.1164817452430725, "step": 114140 }, { "epoch": 0.49006980757837254, "grad_norm": 36.232933044433594, "learning_rate": 5.121331804109932e-05, "loss": 0.262998104095459, "step": 114150 }, { "epoch": 0.4901127396683925, "grad_norm": 0.004099687095731497, "learning_rate": 5.120900632098169e-05, "loss": 0.2619943141937256, "step": 114160 }, { "epoch": 0.49015567175841257, "grad_norm": 1.8435301780700684, "learning_rate": 5.120469460086407e-05, "loss": 0.3348772764205933, "step": 114170 }, { "epoch": 0.49019860384843256, "grad_norm": 0.10621852427721024, "learning_rate": 5.120038288074645e-05, "loss": 0.26117191314697263, "step": 114180 }, { "epoch": 0.49024153593845254, "grad_norm": 1.9179303646087646, "learning_rate": 5.1196071160628824e-05, "loss": 0.3860593318939209, "step": 114190 }, { "epoch": 0.4902844680284726, "grad_norm": 0.0544576533138752, "learning_rate": 5.11917594405112e-05, "loss": 0.33986380100250246, "step": 114200 }, { "epoch": 0.4903274001184926, "grad_norm": 0.003706451505422592, "learning_rate": 5.118744772039358e-05, "loss": 0.12367761135101318, "step": 114210 }, { "epoch": 0.49037033220851256, "grad_norm": 0.1084350049495697, "learning_rate": 5.1183136000275956e-05, "loss": 0.2694986343383789, "step": 114220 }, { "epoch": 0.4904132642985326, "grad_norm": 1.461536169052124, "learning_rate": 5.1178824280158334e-05, "loss": 0.17770261764526368, "step": 114230 }, { "epoch": 0.4904561963885526, "grad_norm": 5.399726390838623, "learning_rate": 5.117451256004071e-05, "loss": 0.3205733299255371, "step": 114240 }, { "epoch": 0.4904991284785726, "grad_norm": 0.01629946567118168, "learning_rate": 5.1170200839923075e-05, "loss": 0.047853922843933104, "step": 114250 }, { "epoch": 0.4905420605685926, "grad_norm": 0.0395156666636467, "learning_rate": 5.116588911980545e-05, "loss": 0.3599144458770752, "step": 114260 }, { "epoch": 0.4905849926586126, "grad_norm": 3.736130475997925, "learning_rate": 5.116157739968783e-05, "loss": 0.299530553817749, "step": 114270 }, { "epoch": 0.4906279247486326, "grad_norm": 0.013340250588953495, "learning_rate": 5.115726567957021e-05, "loss": 0.1913688898086548, "step": 114280 }, { "epoch": 0.49067085683865264, "grad_norm": 3.088780403137207, "learning_rate": 5.1152953959452584e-05, "loss": 0.37713470458984377, "step": 114290 }, { "epoch": 0.4907137889286726, "grad_norm": 1.7537235021591187, "learning_rate": 5.114864223933497e-05, "loss": 0.2248607873916626, "step": 114300 }, { "epoch": 0.4907567210186926, "grad_norm": 0.20872154831886292, "learning_rate": 5.1144330519217346e-05, "loss": 0.3299898147583008, "step": 114310 }, { "epoch": 0.49079965310871265, "grad_norm": 0.05676392465829849, "learning_rate": 5.114001879909972e-05, "loss": 0.16998063325881957, "step": 114320 }, { "epoch": 0.49084258519873264, "grad_norm": 1.6964046955108643, "learning_rate": 5.113570707898209e-05, "loss": 0.21034905910491944, "step": 114330 }, { "epoch": 0.4908855172887526, "grad_norm": 0.03953026235103607, "learning_rate": 5.1131395358864464e-05, "loss": 0.4823922634124756, "step": 114340 }, { "epoch": 0.49092844937877267, "grad_norm": 0.2652646005153656, "learning_rate": 5.112708363874684e-05, "loss": 0.28086440563201903, "step": 114350 }, { "epoch": 0.49097138146879266, "grad_norm": 0.023406682536005974, "learning_rate": 5.112277191862922e-05, "loss": 0.14792462587356567, "step": 114360 }, { "epoch": 0.49101431355881264, "grad_norm": 0.017988061532378197, "learning_rate": 5.1118460198511596e-05, "loss": 0.10092716217041016, "step": 114370 }, { "epoch": 0.4910572456488327, "grad_norm": 0.343831866979599, "learning_rate": 5.1114148478393973e-05, "loss": 0.06460964083671569, "step": 114380 }, { "epoch": 0.4911001777388527, "grad_norm": 0.30238035321235657, "learning_rate": 5.110983675827635e-05, "loss": 0.2701455354690552, "step": 114390 }, { "epoch": 0.4911431098288727, "grad_norm": 0.0027301537338644266, "learning_rate": 5.110552503815873e-05, "loss": 0.21025633811950684, "step": 114400 }, { "epoch": 0.4911860419188927, "grad_norm": 0.024237489327788353, "learning_rate": 5.11012133180411e-05, "loss": 0.2406532049179077, "step": 114410 }, { "epoch": 0.4912289740089127, "grad_norm": 0.09928843379020691, "learning_rate": 5.1096901597923476e-05, "loss": 0.19101529121398925, "step": 114420 }, { "epoch": 0.49127190609893273, "grad_norm": 0.013309850357472897, "learning_rate": 5.1092589877805853e-05, "loss": 0.21105690002441407, "step": 114430 }, { "epoch": 0.4913148381889527, "grad_norm": 0.041601624339818954, "learning_rate": 5.108827815768823e-05, "loss": 0.22969114780426025, "step": 114440 }, { "epoch": 0.4913577702789727, "grad_norm": 0.0371188260614872, "learning_rate": 5.108396643757061e-05, "loss": 0.15212260484695433, "step": 114450 }, { "epoch": 0.49140070236899275, "grad_norm": 0.01270721573382616, "learning_rate": 5.1079654717452985e-05, "loss": 0.1889193534851074, "step": 114460 }, { "epoch": 0.49144363445901273, "grad_norm": 0.2520737946033478, "learning_rate": 5.107534299733536e-05, "loss": 0.2561648845672607, "step": 114470 }, { "epoch": 0.4914865665490327, "grad_norm": 2.448040008544922, "learning_rate": 5.107103127721774e-05, "loss": 0.33474059104919435, "step": 114480 }, { "epoch": 0.49152949863905276, "grad_norm": 0.042958565056324005, "learning_rate": 5.1066719557100104e-05, "loss": 0.19774354696273805, "step": 114490 }, { "epoch": 0.49157243072907275, "grad_norm": 0.07219936698675156, "learning_rate": 5.106240783698248e-05, "loss": 0.14528067111968995, "step": 114500 }, { "epoch": 0.49161536281909274, "grad_norm": 2.0706522464752197, "learning_rate": 5.105809611686486e-05, "loss": 0.11780478954315185, "step": 114510 }, { "epoch": 0.4916582949091128, "grad_norm": 0.023876963183283806, "learning_rate": 5.105378439674724e-05, "loss": 0.10775841474533081, "step": 114520 }, { "epoch": 0.49170122699913277, "grad_norm": 0.006041590124368668, "learning_rate": 5.104947267662962e-05, "loss": 0.0707572877407074, "step": 114530 }, { "epoch": 0.49174415908915275, "grad_norm": 0.09596258401870728, "learning_rate": 5.1045160956512e-05, "loss": 0.23318018913269042, "step": 114540 }, { "epoch": 0.4917870911791728, "grad_norm": 5.521082878112793, "learning_rate": 5.1040849236394375e-05, "loss": 0.3243843078613281, "step": 114550 }, { "epoch": 0.4918300232691928, "grad_norm": 6.089881896972656, "learning_rate": 5.103653751627675e-05, "loss": 0.15074145793914795, "step": 114560 }, { "epoch": 0.49187295535921277, "grad_norm": 0.09942862391471863, "learning_rate": 5.103222579615913e-05, "loss": 0.1948152780532837, "step": 114570 }, { "epoch": 0.4919158874492328, "grad_norm": 0.05901400372385979, "learning_rate": 5.102791407604149e-05, "loss": 0.27257773876190183, "step": 114580 }, { "epoch": 0.4919588195392528, "grad_norm": 0.6826162934303284, "learning_rate": 5.102360235592387e-05, "loss": 0.25472943782806395, "step": 114590 }, { "epoch": 0.49200175162927284, "grad_norm": 3.5605061054229736, "learning_rate": 5.101929063580625e-05, "loss": 0.33192465305328367, "step": 114600 }, { "epoch": 0.49204468371929283, "grad_norm": 1.9935866594314575, "learning_rate": 5.1014978915688625e-05, "loss": 0.27500338554382325, "step": 114610 }, { "epoch": 0.4920876158093128, "grad_norm": 0.005706002935767174, "learning_rate": 5.1010667195571e-05, "loss": 0.31219775676727296, "step": 114620 }, { "epoch": 0.49213054789933286, "grad_norm": 0.11540130525827408, "learning_rate": 5.100635547545338e-05, "loss": 0.22799878120422362, "step": 114630 }, { "epoch": 0.49217347998935285, "grad_norm": 5.40323543548584, "learning_rate": 5.100204375533576e-05, "loss": 0.23047494888305664, "step": 114640 }, { "epoch": 0.49221641207937283, "grad_norm": 0.0065496861934661865, "learning_rate": 5.0997732035218135e-05, "loss": 0.11466679573059083, "step": 114650 }, { "epoch": 0.4922593441693929, "grad_norm": 0.027168719097971916, "learning_rate": 5.0993420315100505e-05, "loss": 0.12369179725646973, "step": 114660 }, { "epoch": 0.49230227625941286, "grad_norm": 0.31811249256134033, "learning_rate": 5.098910859498288e-05, "loss": 0.1178821325302124, "step": 114670 }, { "epoch": 0.49234520834943285, "grad_norm": 0.0057378895580768585, "learning_rate": 5.098479687486526e-05, "loss": 0.3201324462890625, "step": 114680 }, { "epoch": 0.4923881404394529, "grad_norm": 0.03739020228385925, "learning_rate": 5.098048515474764e-05, "loss": 0.28364293575286864, "step": 114690 }, { "epoch": 0.4924310725294729, "grad_norm": 0.014455210417509079, "learning_rate": 5.0976173434630015e-05, "loss": 0.1237752914428711, "step": 114700 }, { "epoch": 0.49247400461949287, "grad_norm": 0.05205453932285309, "learning_rate": 5.097186171451239e-05, "loss": 0.18404200077056884, "step": 114710 }, { "epoch": 0.4925169367095129, "grad_norm": 12.226811408996582, "learning_rate": 5.096754999439477e-05, "loss": 0.4771873474121094, "step": 114720 }, { "epoch": 0.4925598687995329, "grad_norm": 2.7584354877471924, "learning_rate": 5.0963238274277147e-05, "loss": 0.2770639657974243, "step": 114730 }, { "epoch": 0.4926028008895529, "grad_norm": 0.0015160737093538046, "learning_rate": 5.095892655415952e-05, "loss": 0.2827488422393799, "step": 114740 }, { "epoch": 0.4926457329795729, "grad_norm": 0.006125219166278839, "learning_rate": 5.0954614834041894e-05, "loss": 0.06660739183425904, "step": 114750 }, { "epoch": 0.4926886650695929, "grad_norm": 0.1934569776058197, "learning_rate": 5.095030311392427e-05, "loss": 0.13013845682144165, "step": 114760 }, { "epoch": 0.4927315971596129, "grad_norm": 0.5882942080497742, "learning_rate": 5.094599139380665e-05, "loss": 0.19378734827041627, "step": 114770 }, { "epoch": 0.49277452924963294, "grad_norm": 0.19520607590675354, "learning_rate": 5.0941679673689027e-05, "loss": 0.24061408042907714, "step": 114780 }, { "epoch": 0.49281746133965293, "grad_norm": 0.09184468537569046, "learning_rate": 5.0937367953571404e-05, "loss": 0.12484440803527833, "step": 114790 }, { "epoch": 0.4928603934296729, "grad_norm": 0.0007245758315548301, "learning_rate": 5.093305623345378e-05, "loss": 0.0343919962644577, "step": 114800 }, { "epoch": 0.49290332551969296, "grad_norm": 0.10586666315793991, "learning_rate": 5.092874451333616e-05, "loss": 0.3540113210678101, "step": 114810 }, { "epoch": 0.49294625760971295, "grad_norm": 7.368129730224609, "learning_rate": 5.092443279321852e-05, "loss": 0.12444863319396973, "step": 114820 }, { "epoch": 0.492989189699733, "grad_norm": 1.8489593267440796, "learning_rate": 5.09201210731009e-05, "loss": 0.10474470853805543, "step": 114830 }, { "epoch": 0.493032121789753, "grad_norm": 2.487107515335083, "learning_rate": 5.091580935298328e-05, "loss": 0.22773370742797852, "step": 114840 }, { "epoch": 0.49307505387977296, "grad_norm": 0.15341585874557495, "learning_rate": 5.0911497632865654e-05, "loss": 0.15825777053833007, "step": 114850 }, { "epoch": 0.493117985969793, "grad_norm": 1.0390082597732544, "learning_rate": 5.090718591274803e-05, "loss": 0.14906710386276245, "step": 114860 }, { "epoch": 0.493160918059813, "grad_norm": 4.846892833709717, "learning_rate": 5.090287419263041e-05, "loss": 0.31097846031188964, "step": 114870 }, { "epoch": 0.493203850149833, "grad_norm": 4.312889099121094, "learning_rate": 5.0898562472512786e-05, "loss": 0.33429646492004395, "step": 114880 }, { "epoch": 0.493246782239853, "grad_norm": 0.00027503733872435987, "learning_rate": 5.089425075239517e-05, "loss": 0.06513903737068176, "step": 114890 }, { "epoch": 0.493289714329873, "grad_norm": 2.0348081588745117, "learning_rate": 5.0889939032277534e-05, "loss": 0.3529532432556152, "step": 114900 }, { "epoch": 0.493332646419893, "grad_norm": 0.02989332005381584, "learning_rate": 5.088562731215991e-05, "loss": 0.14261656999588013, "step": 114910 }, { "epoch": 0.49337557850991304, "grad_norm": 0.09872865676879883, "learning_rate": 5.088131559204229e-05, "loss": 0.23729093074798585, "step": 114920 }, { "epoch": 0.493418510599933, "grad_norm": 0.004361944738775492, "learning_rate": 5.0877003871924666e-05, "loss": 0.1671282172203064, "step": 114930 }, { "epoch": 0.493461442689953, "grad_norm": 0.19537287950515747, "learning_rate": 5.0872692151807044e-05, "loss": 0.27259490489959715, "step": 114940 }, { "epoch": 0.49350437477997305, "grad_norm": 0.21257755160331726, "learning_rate": 5.086838043168942e-05, "loss": 0.136817467212677, "step": 114950 }, { "epoch": 0.49354730686999304, "grad_norm": 1.6787389516830444, "learning_rate": 5.08640687115718e-05, "loss": 0.24155523777008056, "step": 114960 }, { "epoch": 0.49359023896001303, "grad_norm": 6.152478218078613, "learning_rate": 5.0859756991454176e-05, "loss": 0.09717947244644165, "step": 114970 }, { "epoch": 0.49363317105003307, "grad_norm": 1.884873390197754, "learning_rate": 5.085544527133655e-05, "loss": 0.3864275455474854, "step": 114980 }, { "epoch": 0.49367610314005306, "grad_norm": 2.2125508785247803, "learning_rate": 5.0851133551218924e-05, "loss": 0.1695851445198059, "step": 114990 }, { "epoch": 0.49371903523007304, "grad_norm": 1.7585374116897583, "learning_rate": 5.08468218311013e-05, "loss": 0.2917444705963135, "step": 115000 }, { "epoch": 0.49371903523007304, "eval_loss": 0.4183075726032257, "eval_runtime": 27.1776, "eval_samples_per_second": 3.68, "eval_steps_per_second": 3.68, "step": 115000 }, { "epoch": 0.4937619673200931, "grad_norm": 0.01179379504173994, "learning_rate": 5.084251011098368e-05, "loss": 0.3183038949966431, "step": 115010 }, { "epoch": 0.4938048994101131, "grad_norm": 0.91953045129776, "learning_rate": 5.0838198390866056e-05, "loss": 0.2725319147109985, "step": 115020 }, { "epoch": 0.4938478315001331, "grad_norm": 0.1682545691728592, "learning_rate": 5.083388667074843e-05, "loss": 0.1873003602027893, "step": 115030 }, { "epoch": 0.4938907635901531, "grad_norm": 1.7073339223861694, "learning_rate": 5.082957495063081e-05, "loss": 0.3962739944458008, "step": 115040 }, { "epoch": 0.4939336956801731, "grad_norm": 1.3014336824417114, "learning_rate": 5.082526323051319e-05, "loss": 0.24986536502838136, "step": 115050 }, { "epoch": 0.49397662777019313, "grad_norm": 0.003316489513963461, "learning_rate": 5.0820951510395565e-05, "loss": 0.06500194072723389, "step": 115060 }, { "epoch": 0.4940195598602131, "grad_norm": 5.03993034362793, "learning_rate": 5.081663979027793e-05, "loss": 0.2883466720581055, "step": 115070 }, { "epoch": 0.4940624919502331, "grad_norm": 0.00388818490318954, "learning_rate": 5.0812328070160306e-05, "loss": 0.25894618034362793, "step": 115080 }, { "epoch": 0.49410542404025315, "grad_norm": 3.0344207286834717, "learning_rate": 5.0808016350042683e-05, "loss": 0.16486616134643556, "step": 115090 }, { "epoch": 0.49414835613027314, "grad_norm": 1.1234501600265503, "learning_rate": 5.080370462992506e-05, "loss": 0.14668926000595092, "step": 115100 }, { "epoch": 0.4941912882202931, "grad_norm": 2.564479351043701, "learning_rate": 5.0799392909807445e-05, "loss": 0.23735263347625732, "step": 115110 }, { "epoch": 0.49423422031031317, "grad_norm": 0.11280937492847443, "learning_rate": 5.079508118968982e-05, "loss": 0.4572554111480713, "step": 115120 }, { "epoch": 0.49427715240033315, "grad_norm": 3.4304449558258057, "learning_rate": 5.07907694695722e-05, "loss": 0.21845850944519044, "step": 115130 }, { "epoch": 0.49432008449035314, "grad_norm": 3.999896764755249, "learning_rate": 5.078645774945458e-05, "loss": 0.30247633457183837, "step": 115140 }, { "epoch": 0.4943630165803732, "grad_norm": 1.523688793182373, "learning_rate": 5.078214602933694e-05, "loss": 0.28164350986480713, "step": 115150 }, { "epoch": 0.49440594867039317, "grad_norm": 1.8061667680740356, "learning_rate": 5.077783430921932e-05, "loss": 0.14227923154830932, "step": 115160 }, { "epoch": 0.49444888076041316, "grad_norm": 6.9911417961120605, "learning_rate": 5.0773522589101695e-05, "loss": 0.2907358169555664, "step": 115170 }, { "epoch": 0.4944918128504332, "grad_norm": 0.6293550133705139, "learning_rate": 5.076921086898407e-05, "loss": 0.31427788734436035, "step": 115180 }, { "epoch": 0.4945347449404532, "grad_norm": 3.471299648284912, "learning_rate": 5.076489914886645e-05, "loss": 0.15946595668792723, "step": 115190 }, { "epoch": 0.4945776770304732, "grad_norm": 0.005677805282175541, "learning_rate": 5.076058742874883e-05, "loss": 0.10743122100830078, "step": 115200 }, { "epoch": 0.4946206091204932, "grad_norm": 2.375127077102661, "learning_rate": 5.0756275708631205e-05, "loss": 0.2899677515029907, "step": 115210 }, { "epoch": 0.4946635412105132, "grad_norm": 0.005286522675305605, "learning_rate": 5.075196398851358e-05, "loss": 0.2951892137527466, "step": 115220 }, { "epoch": 0.4947064733005332, "grad_norm": 0.008677645586431026, "learning_rate": 5.074765226839595e-05, "loss": 0.10036700963973999, "step": 115230 }, { "epoch": 0.49474940539055323, "grad_norm": 0.16775937378406525, "learning_rate": 5.074334054827833e-05, "loss": 0.4316593647003174, "step": 115240 }, { "epoch": 0.4947923374805732, "grad_norm": 0.08770100772380829, "learning_rate": 5.073902882816071e-05, "loss": 0.2387186050415039, "step": 115250 }, { "epoch": 0.49483526957059326, "grad_norm": 1.3633723258972168, "learning_rate": 5.0734717108043085e-05, "loss": 0.22010915279388427, "step": 115260 }, { "epoch": 0.49487820166061325, "grad_norm": 1.4718842506408691, "learning_rate": 5.073040538792546e-05, "loss": 0.15852314233779907, "step": 115270 }, { "epoch": 0.49492113375063324, "grad_norm": 0.005069954786449671, "learning_rate": 5.072609366780784e-05, "loss": 0.28095600605010984, "step": 115280 }, { "epoch": 0.4949640658406533, "grad_norm": 0.18669170141220093, "learning_rate": 5.072178194769022e-05, "loss": 0.13922568559646606, "step": 115290 }, { "epoch": 0.49500699793067326, "grad_norm": 1.5393754243850708, "learning_rate": 5.0717470227572594e-05, "loss": 0.2517578363418579, "step": 115300 }, { "epoch": 0.49504993002069325, "grad_norm": 1.222355842590332, "learning_rate": 5.071315850745497e-05, "loss": 0.1524641990661621, "step": 115310 }, { "epoch": 0.4950928621107133, "grad_norm": 0.27816998958587646, "learning_rate": 5.0708846787337335e-05, "loss": 0.20404572486877443, "step": 115320 }, { "epoch": 0.4951357942007333, "grad_norm": 0.2993142008781433, "learning_rate": 5.070453506721972e-05, "loss": 0.20290870666503907, "step": 115330 }, { "epoch": 0.49517872629075327, "grad_norm": 2.6680591106414795, "learning_rate": 5.07002233471021e-05, "loss": 0.05664870738983154, "step": 115340 }, { "epoch": 0.4952216583807733, "grad_norm": 0.09985325485467911, "learning_rate": 5.0695911626984474e-05, "loss": 0.21010010242462157, "step": 115350 }, { "epoch": 0.4952645904707933, "grad_norm": 2.5624895095825195, "learning_rate": 5.069159990686685e-05, "loss": 0.23997178077697753, "step": 115360 }, { "epoch": 0.4953075225608133, "grad_norm": 0.13102348148822784, "learning_rate": 5.068728818674923e-05, "loss": 0.27090566158294677, "step": 115370 }, { "epoch": 0.4953504546508333, "grad_norm": 0.15313896536827087, "learning_rate": 5.0682976466631606e-05, "loss": 0.2939656496047974, "step": 115380 }, { "epoch": 0.4953933867408533, "grad_norm": 0.003520481288433075, "learning_rate": 5.0678664746513983e-05, "loss": 0.18922423124313353, "step": 115390 }, { "epoch": 0.4954363188308733, "grad_norm": 10.516127586364746, "learning_rate": 5.067435302639635e-05, "loss": 0.27016212940216067, "step": 115400 }, { "epoch": 0.49547925092089334, "grad_norm": 18.63870620727539, "learning_rate": 5.0670041306278725e-05, "loss": 0.1411109447479248, "step": 115410 }, { "epoch": 0.49552218301091333, "grad_norm": 1.1475532054901123, "learning_rate": 5.06657295861611e-05, "loss": 0.2901994466781616, "step": 115420 }, { "epoch": 0.4955651151009333, "grad_norm": 4.191442489624023, "learning_rate": 5.066141786604348e-05, "loss": 0.34753267765045165, "step": 115430 }, { "epoch": 0.49560804719095336, "grad_norm": 0.3751876950263977, "learning_rate": 5.0657106145925857e-05, "loss": 0.2544296979904175, "step": 115440 }, { "epoch": 0.49565097928097335, "grad_norm": 1.2921435832977295, "learning_rate": 5.0652794425808234e-05, "loss": 0.12484211921691894, "step": 115450 }, { "epoch": 0.4956939113709934, "grad_norm": 3.32179594039917, "learning_rate": 5.064848270569061e-05, "loss": 0.3070408821105957, "step": 115460 }, { "epoch": 0.4957368434610134, "grad_norm": 0.002367328619584441, "learning_rate": 5.064417098557299e-05, "loss": 0.1736527442932129, "step": 115470 }, { "epoch": 0.49577977555103336, "grad_norm": 0.16558293998241425, "learning_rate": 5.063985926545536e-05, "loss": 0.3311375856399536, "step": 115480 }, { "epoch": 0.4958227076410534, "grad_norm": 3.545715093612671, "learning_rate": 5.0635547545337737e-05, "loss": 0.3532984495162964, "step": 115490 }, { "epoch": 0.4958656397310734, "grad_norm": 0.015997091308236122, "learning_rate": 5.0631235825220114e-05, "loss": 0.12923310995101928, "step": 115500 }, { "epoch": 0.4959085718210934, "grad_norm": 4.145458698272705, "learning_rate": 5.062692410510249e-05, "loss": 0.29781742095947267, "step": 115510 }, { "epoch": 0.4959515039111134, "grad_norm": 0.6753315329551697, "learning_rate": 5.062261238498487e-05, "loss": 0.2975428342819214, "step": 115520 }, { "epoch": 0.4959944360011334, "grad_norm": 0.40239396691322327, "learning_rate": 5.0618300664867246e-05, "loss": 0.17573122978210448, "step": 115530 }, { "epoch": 0.4960373680911534, "grad_norm": 1.7181788682937622, "learning_rate": 5.061398894474962e-05, "loss": 0.26423892974853513, "step": 115540 }, { "epoch": 0.49608030018117344, "grad_norm": 0.3651997447013855, "learning_rate": 5.0609677224632e-05, "loss": 0.20962002277374267, "step": 115550 }, { "epoch": 0.4961232322711934, "grad_norm": 22.775222778320312, "learning_rate": 5.060536550451437e-05, "loss": 0.2838634967803955, "step": 115560 }, { "epoch": 0.4961661643612134, "grad_norm": 2.0066092014312744, "learning_rate": 5.060105378439675e-05, "loss": 0.3719503164291382, "step": 115570 }, { "epoch": 0.49620909645123346, "grad_norm": 0.005026548635214567, "learning_rate": 5.0596742064279126e-05, "loss": 0.3048412322998047, "step": 115580 }, { "epoch": 0.49625202854125344, "grad_norm": 2.4710865020751953, "learning_rate": 5.05924303441615e-05, "loss": 0.2783297061920166, "step": 115590 }, { "epoch": 0.49629496063127343, "grad_norm": 2.661207914352417, "learning_rate": 5.058811862404388e-05, "loss": 0.18164483308792115, "step": 115600 }, { "epoch": 0.49633789272129347, "grad_norm": 8.226025965996087e-05, "learning_rate": 5.058380690392626e-05, "loss": 0.2600426197052002, "step": 115610 }, { "epoch": 0.49638082481131346, "grad_norm": 7.507086277008057, "learning_rate": 5.0579495183808635e-05, "loss": 0.08841606378555297, "step": 115620 }, { "epoch": 0.49642375690133345, "grad_norm": 0.0025435436982661486, "learning_rate": 5.057518346369101e-05, "loss": 0.32819669246673583, "step": 115630 }, { "epoch": 0.4964666889913535, "grad_norm": 0.012275975197553635, "learning_rate": 5.0570871743573376e-05, "loss": 0.11880701780319214, "step": 115640 }, { "epoch": 0.4965096210813735, "grad_norm": 2.4064557552337646, "learning_rate": 5.0566560023455754e-05, "loss": 0.06984534859657288, "step": 115650 }, { "epoch": 0.49655255317139346, "grad_norm": 0.04519079998135567, "learning_rate": 5.056224830333813e-05, "loss": 0.3047942638397217, "step": 115660 }, { "epoch": 0.4965954852614135, "grad_norm": 0.003867541207000613, "learning_rate": 5.055793658322051e-05, "loss": 0.1949278235435486, "step": 115670 }, { "epoch": 0.4966384173514335, "grad_norm": 29.170747756958008, "learning_rate": 5.0553624863102886e-05, "loss": 0.17468286752700807, "step": 115680 }, { "epoch": 0.49668134944145353, "grad_norm": 0.8490044474601746, "learning_rate": 5.054931314298526e-05, "loss": 0.10209113359451294, "step": 115690 }, { "epoch": 0.4967242815314735, "grad_norm": 0.07221094518899918, "learning_rate": 5.054500142286765e-05, "loss": 0.26395204067230227, "step": 115700 }, { "epoch": 0.4967672136214935, "grad_norm": 2.139655828475952, "learning_rate": 5.0540689702750025e-05, "loss": 0.25615296363830564, "step": 115710 }, { "epoch": 0.49681014571151355, "grad_norm": 3.531961441040039, "learning_rate": 5.05363779826324e-05, "loss": 0.34113330841064454, "step": 115720 }, { "epoch": 0.49685307780153354, "grad_norm": 0.0022067560348659754, "learning_rate": 5.0532066262514766e-05, "loss": 0.2792416334152222, "step": 115730 }, { "epoch": 0.4968960098915535, "grad_norm": 0.0736042708158493, "learning_rate": 5.052775454239714e-05, "loss": 0.03103959560394287, "step": 115740 }, { "epoch": 0.49693894198157357, "grad_norm": 0.21198052167892456, "learning_rate": 5.052344282227952e-05, "loss": 0.16917411088943482, "step": 115750 }, { "epoch": 0.49698187407159355, "grad_norm": 0.041175056248903275, "learning_rate": 5.05191311021619e-05, "loss": 0.2969489336013794, "step": 115760 }, { "epoch": 0.49702480616161354, "grad_norm": 0.0025716687086969614, "learning_rate": 5.0514819382044275e-05, "loss": 0.27678828239440917, "step": 115770 }, { "epoch": 0.4970677382516336, "grad_norm": 0.03726640343666077, "learning_rate": 5.051050766192665e-05, "loss": 0.21982161998748778, "step": 115780 }, { "epoch": 0.49711067034165357, "grad_norm": 0.061146777123212814, "learning_rate": 5.050619594180903e-05, "loss": 0.16412194967269897, "step": 115790 }, { "epoch": 0.49715360243167356, "grad_norm": 0.6972399353981018, "learning_rate": 5.050188422169141e-05, "loss": 0.19004242420196532, "step": 115800 }, { "epoch": 0.4971965345216936, "grad_norm": 0.057269349694252014, "learning_rate": 5.049757250157378e-05, "loss": 0.12597191333770752, "step": 115810 }, { "epoch": 0.4972394666117136, "grad_norm": 23.783647537231445, "learning_rate": 5.0493260781456155e-05, "loss": 0.23004698753356934, "step": 115820 }, { "epoch": 0.4972823987017336, "grad_norm": 0.4776040315628052, "learning_rate": 5.048894906133853e-05, "loss": 0.16874227523803711, "step": 115830 }, { "epoch": 0.4973253307917536, "grad_norm": 1.4436566829681396, "learning_rate": 5.048463734122091e-05, "loss": 0.09769206047058106, "step": 115840 }, { "epoch": 0.4973682628817736, "grad_norm": 0.0008209854131564498, "learning_rate": 5.048032562110329e-05, "loss": 0.21059079170227052, "step": 115850 }, { "epoch": 0.4974111949717936, "grad_norm": 0.003916463814675808, "learning_rate": 5.0476013900985664e-05, "loss": 0.006223606318235398, "step": 115860 }, { "epoch": 0.49745412706181363, "grad_norm": 0.16059304773807526, "learning_rate": 5.047170218086804e-05, "loss": 0.2492811918258667, "step": 115870 }, { "epoch": 0.4974970591518336, "grad_norm": 5.057432651519775, "learning_rate": 5.046739046075042e-05, "loss": 0.3591012477874756, "step": 115880 }, { "epoch": 0.49753999124185366, "grad_norm": 0.0025596225168555975, "learning_rate": 5.046307874063278e-05, "loss": 0.17509062290191652, "step": 115890 }, { "epoch": 0.49758292333187365, "grad_norm": 0.0018866428872570395, "learning_rate": 5.045876702051516e-05, "loss": 0.1546018362045288, "step": 115900 }, { "epoch": 0.49762585542189364, "grad_norm": 0.004940045066177845, "learning_rate": 5.045445530039754e-05, "loss": 0.270506763458252, "step": 115910 }, { "epoch": 0.4976687875119137, "grad_norm": 0.15914833545684814, "learning_rate": 5.045014358027992e-05, "loss": 0.3475140333175659, "step": 115920 }, { "epoch": 0.49771171960193367, "grad_norm": 2.368710994720459, "learning_rate": 5.04458318601623e-05, "loss": 0.11058902740478516, "step": 115930 }, { "epoch": 0.49775465169195365, "grad_norm": 1.7389672994613647, "learning_rate": 5.0441520140044676e-05, "loss": 0.07232906222343445, "step": 115940 }, { "epoch": 0.4977975837819737, "grad_norm": 0.010259340517222881, "learning_rate": 5.0437208419927054e-05, "loss": 0.27948570251464844, "step": 115950 }, { "epoch": 0.4978405158719937, "grad_norm": 0.05925685167312622, "learning_rate": 5.043289669980943e-05, "loss": 0.16085286140441896, "step": 115960 }, { "epoch": 0.49788344796201367, "grad_norm": 2.66933536529541, "learning_rate": 5.0428584979691795e-05, "loss": 0.313973593711853, "step": 115970 }, { "epoch": 0.4979263800520337, "grad_norm": 1.533821940422058, "learning_rate": 5.042427325957417e-05, "loss": 0.3402239799499512, "step": 115980 }, { "epoch": 0.4979693121420537, "grad_norm": 0.11309437453746796, "learning_rate": 5.041996153945655e-05, "loss": 0.37814791202545167, "step": 115990 }, { "epoch": 0.4980122442320737, "grad_norm": 1.377073884010315, "learning_rate": 5.041564981933893e-05, "loss": 0.2134047269821167, "step": 116000 }, { "epoch": 0.4980122442320737, "eval_loss": 0.413227915763855, "eval_runtime": 27.1799, "eval_samples_per_second": 3.679, "eval_steps_per_second": 3.679, "step": 116000 }, { "epoch": 0.49805517632209373, "grad_norm": 1.5735690593719482, "learning_rate": 5.0411338099221304e-05, "loss": 0.18733230829238892, "step": 116010 }, { "epoch": 0.4980981084121137, "grad_norm": 3.668860673904419, "learning_rate": 5.040702637910368e-05, "loss": 0.2786614179611206, "step": 116020 }, { "epoch": 0.4981410405021337, "grad_norm": 0.0029571440536528826, "learning_rate": 5.040271465898606e-05, "loss": 0.21538405418395995, "step": 116030 }, { "epoch": 0.49818397259215375, "grad_norm": 0.014692588709294796, "learning_rate": 5.0398402938868436e-05, "loss": 0.2976950407028198, "step": 116040 }, { "epoch": 0.49822690468217373, "grad_norm": 0.007402835413813591, "learning_rate": 5.0394091218750814e-05, "loss": 0.05344715118408203, "step": 116050 }, { "epoch": 0.4982698367721937, "grad_norm": 3.5247769355773926, "learning_rate": 5.0389779498633184e-05, "loss": 0.05012491941452026, "step": 116060 }, { "epoch": 0.49831276886221376, "grad_norm": 0.0088571198284626, "learning_rate": 5.038546777851556e-05, "loss": 0.2535048484802246, "step": 116070 }, { "epoch": 0.49835570095223375, "grad_norm": 0.02746264822781086, "learning_rate": 5.038115605839794e-05, "loss": 0.2725075721740723, "step": 116080 }, { "epoch": 0.49839863304225374, "grad_norm": 0.002486494602635503, "learning_rate": 5.0376844338280316e-05, "loss": 0.2322972297668457, "step": 116090 }, { "epoch": 0.4984415651322738, "grad_norm": 4.655661106109619, "learning_rate": 5.0372532618162693e-05, "loss": 0.36279830932617185, "step": 116100 }, { "epoch": 0.49848449722229377, "grad_norm": 0.012788623571395874, "learning_rate": 5.036822089804507e-05, "loss": 0.2191479206085205, "step": 116110 }, { "epoch": 0.4985274293123138, "grad_norm": 5.198680877685547, "learning_rate": 5.036390917792745e-05, "loss": 0.4196025371551514, "step": 116120 }, { "epoch": 0.4985703614023338, "grad_norm": 0.01135294046252966, "learning_rate": 5.0359597457809826e-05, "loss": 0.29812934398651125, "step": 116130 }, { "epoch": 0.4986132934923538, "grad_norm": 1.5958086252212524, "learning_rate": 5.0355285737692196e-05, "loss": 0.24831123352050782, "step": 116140 }, { "epoch": 0.4986562255823738, "grad_norm": 2.2128734588623047, "learning_rate": 5.0350974017574573e-05, "loss": 0.13145445585250853, "step": 116150 }, { "epoch": 0.4986991576723938, "grad_norm": 40.567691802978516, "learning_rate": 5.034666229745695e-05, "loss": 0.2323824405670166, "step": 116160 }, { "epoch": 0.4987420897624138, "grad_norm": 0.033427074551582336, "learning_rate": 5.034235057733933e-05, "loss": 0.04265216588973999, "step": 116170 }, { "epoch": 0.49878502185243384, "grad_norm": 0.03815077617764473, "learning_rate": 5.0338038857221705e-05, "loss": 0.2679791212081909, "step": 116180 }, { "epoch": 0.49882795394245383, "grad_norm": 0.0019072276772931218, "learning_rate": 5.033372713710408e-05, "loss": 0.17380179166793824, "step": 116190 }, { "epoch": 0.4988708860324738, "grad_norm": 0.010305742733180523, "learning_rate": 5.032941541698646e-05, "loss": 0.33670363426208494, "step": 116200 }, { "epoch": 0.49891381812249386, "grad_norm": 0.07689042389392853, "learning_rate": 5.032510369686884e-05, "loss": 0.16827105283737182, "step": 116210 }, { "epoch": 0.49895675021251384, "grad_norm": 0.10213097184896469, "learning_rate": 5.03207919767512e-05, "loss": 0.3205841541290283, "step": 116220 }, { "epoch": 0.49899968230253383, "grad_norm": 0.0006489379447884858, "learning_rate": 5.031648025663358e-05, "loss": 0.4195698738098145, "step": 116230 }, { "epoch": 0.4990426143925539, "grad_norm": 3.99538254737854, "learning_rate": 5.0312168536515956e-05, "loss": 0.32759990692138674, "step": 116240 }, { "epoch": 0.49908554648257386, "grad_norm": 1.1936728954315186, "learning_rate": 5.030785681639833e-05, "loss": 0.4055778503417969, "step": 116250 }, { "epoch": 0.49912847857259385, "grad_norm": 0.5046306848526001, "learning_rate": 5.030354509628071e-05, "loss": 0.200126576423645, "step": 116260 }, { "epoch": 0.4991714106626139, "grad_norm": 0.042749445885419846, "learning_rate": 5.029923337616309e-05, "loss": 0.33313579559326173, "step": 116270 }, { "epoch": 0.4992143427526339, "grad_norm": 3.7502360343933105, "learning_rate": 5.0294921656045465e-05, "loss": 0.15824503898620607, "step": 116280 }, { "epoch": 0.49925727484265386, "grad_norm": 1.967415690422058, "learning_rate": 5.029060993592784e-05, "loss": 0.19596970081329346, "step": 116290 }, { "epoch": 0.4993002069326739, "grad_norm": 28.932861328125, "learning_rate": 5.028629821581021e-05, "loss": 0.1746343493461609, "step": 116300 }, { "epoch": 0.4993431390226939, "grad_norm": 3.073617696762085, "learning_rate": 5.028198649569259e-05, "loss": 0.2759540557861328, "step": 116310 }, { "epoch": 0.49938607111271394, "grad_norm": 1.3334208726882935, "learning_rate": 5.027767477557497e-05, "loss": 0.29316024780273436, "step": 116320 }, { "epoch": 0.4994290032027339, "grad_norm": 0.10985347628593445, "learning_rate": 5.0273363055457345e-05, "loss": 0.359764838218689, "step": 116330 }, { "epoch": 0.4994719352927539, "grad_norm": 0.09878139942884445, "learning_rate": 5.026905133533972e-05, "loss": 0.4100006103515625, "step": 116340 }, { "epoch": 0.49951486738277395, "grad_norm": 0.025054529309272766, "learning_rate": 5.02647396152221e-05, "loss": 0.2300884246826172, "step": 116350 }, { "epoch": 0.49955779947279394, "grad_norm": 3.4533677101135254, "learning_rate": 5.026042789510448e-05, "loss": 0.38540575504302976, "step": 116360 }, { "epoch": 0.4996007315628139, "grad_norm": 0.10880227386951447, "learning_rate": 5.0256116174986855e-05, "loss": 0.4489396095275879, "step": 116370 }, { "epoch": 0.49964366365283397, "grad_norm": 0.20102038979530334, "learning_rate": 5.0251804454869225e-05, "loss": 0.12855820655822753, "step": 116380 }, { "epoch": 0.49968659574285396, "grad_norm": 1.3258622884750366, "learning_rate": 5.02474927347516e-05, "loss": 0.3025193452835083, "step": 116390 }, { "epoch": 0.49972952783287394, "grad_norm": 3.7796823978424072, "learning_rate": 5.024318101463398e-05, "loss": 0.1849290609359741, "step": 116400 }, { "epoch": 0.499772459922894, "grad_norm": 4.854727745056152, "learning_rate": 5.023886929451636e-05, "loss": 0.3261184930801392, "step": 116410 }, { "epoch": 0.499815392012914, "grad_norm": 1.9094047546386719, "learning_rate": 5.0234557574398735e-05, "loss": 0.2567267656326294, "step": 116420 }, { "epoch": 0.49985832410293396, "grad_norm": 1.9529017210006714, "learning_rate": 5.023024585428111e-05, "loss": 0.24922068119049073, "step": 116430 }, { "epoch": 0.499901256192954, "grad_norm": 2.076061964035034, "learning_rate": 5.022593413416349e-05, "loss": 0.3763160467147827, "step": 116440 }, { "epoch": 0.499944188282974, "grad_norm": 0.1453094780445099, "learning_rate": 5.0221622414045867e-05, "loss": 0.24987459182739258, "step": 116450 }, { "epoch": 0.499987120372994, "grad_norm": 0.044320449233055115, "learning_rate": 5.0217310693928244e-05, "loss": 0.13236111402511597, "step": 116460 }, { "epoch": 0.500030052463014, "grad_norm": 0.0024830172769725323, "learning_rate": 5.021299897381061e-05, "loss": 0.2829590320587158, "step": 116470 }, { "epoch": 0.5000729845530341, "grad_norm": 0.11648912727832794, "learning_rate": 5.0208687253692985e-05, "loss": 0.1768946409225464, "step": 116480 }, { "epoch": 0.500115916643054, "grad_norm": 2.029343605041504, "learning_rate": 5.020437553357536e-05, "loss": 0.38065204620361326, "step": 116490 }, { "epoch": 0.500158848733074, "grad_norm": 0.008621471002697945, "learning_rate": 5.020006381345774e-05, "loss": 0.21182801723480224, "step": 116500 }, { "epoch": 0.5002017808230941, "grad_norm": 0.057470615953207016, "learning_rate": 5.019575209334012e-05, "loss": 0.06660780906677247, "step": 116510 }, { "epoch": 0.500244712913114, "grad_norm": 0.17172105610370636, "learning_rate": 5.01914403732225e-05, "loss": 0.31643340587615965, "step": 116520 }, { "epoch": 0.500287645003134, "grad_norm": 2.378706216812134, "learning_rate": 5.018712865310488e-05, "loss": 0.33999631404876707, "step": 116530 }, { "epoch": 0.5003305770931541, "grad_norm": 0.3760903477668762, "learning_rate": 5.0182816932987256e-05, "loss": 0.12946596145629882, "step": 116540 }, { "epoch": 0.500373509183174, "grad_norm": 0.08980166167020798, "learning_rate": 5.017850521286962e-05, "loss": 0.036372536420822145, "step": 116550 }, { "epoch": 0.5004164412731941, "grad_norm": 0.9005488157272339, "learning_rate": 5.0174193492752e-05, "loss": 0.22982125282287597, "step": 116560 }, { "epoch": 0.5004593733632141, "grad_norm": 0.0490412674844265, "learning_rate": 5.0169881772634374e-05, "loss": 0.17876067161560058, "step": 116570 }, { "epoch": 0.500502305453234, "grad_norm": 0.03490692749619484, "learning_rate": 5.016557005251675e-05, "loss": 0.23765029907226562, "step": 116580 }, { "epoch": 0.5005452375432541, "grad_norm": 1.0722746849060059, "learning_rate": 5.016125833239913e-05, "loss": 0.359777045249939, "step": 116590 }, { "epoch": 0.5005881696332741, "grad_norm": 4.997632026672363, "learning_rate": 5.0156946612281506e-05, "loss": 0.5328747272491455, "step": 116600 }, { "epoch": 0.5006311017232941, "grad_norm": 4.145293712615967, "learning_rate": 5.0152634892163884e-05, "loss": 0.1871986985206604, "step": 116610 }, { "epoch": 0.5006740338133141, "grad_norm": 2.7955899238586426, "learning_rate": 5.014832317204626e-05, "loss": 0.3289355993270874, "step": 116620 }, { "epoch": 0.5007169659033341, "grad_norm": 0.015839189291000366, "learning_rate": 5.014401145192863e-05, "loss": 0.18843262195587157, "step": 116630 }, { "epoch": 0.5007598979933541, "grad_norm": 2.8927204608917236, "learning_rate": 5.013969973181101e-05, "loss": 0.35386366844177247, "step": 116640 }, { "epoch": 0.5008028300833741, "grad_norm": 1.1801815032958984, "learning_rate": 5.0135388011693386e-05, "loss": 0.3398331642150879, "step": 116650 }, { "epoch": 0.5008457621733942, "grad_norm": 0.26834312081336975, "learning_rate": 5.0131076291575764e-05, "loss": 0.16700916290283202, "step": 116660 }, { "epoch": 0.5008886942634141, "grad_norm": 0.03399025276303291, "learning_rate": 5.012676457145814e-05, "loss": 0.26661252975463867, "step": 116670 }, { "epoch": 0.5009316263534341, "grad_norm": 0.6200994253158569, "learning_rate": 5.012245285134052e-05, "loss": 0.21167898178100586, "step": 116680 }, { "epoch": 0.5009745584434542, "grad_norm": 0.026551874354481697, "learning_rate": 5.0118141131222896e-05, "loss": 0.052762043476104734, "step": 116690 }, { "epoch": 0.5010174905334741, "grad_norm": 1.6990082263946533, "learning_rate": 5.011382941110527e-05, "loss": 0.36727452278137207, "step": 116700 }, { "epoch": 0.5010604226234942, "grad_norm": 0.5137987732887268, "learning_rate": 5.010951769098764e-05, "loss": 0.16924527883529664, "step": 116710 }, { "epoch": 0.5011033547135142, "grad_norm": 2.603691577911377, "learning_rate": 5.0105205970870014e-05, "loss": 0.13312125205993652, "step": 116720 }, { "epoch": 0.5011462868035341, "grad_norm": 2.065608263015747, "learning_rate": 5.010089425075239e-05, "loss": 0.36140103340148927, "step": 116730 }, { "epoch": 0.5011892188935542, "grad_norm": 0.2081688642501831, "learning_rate": 5.0096582530634776e-05, "loss": 0.07831467390060425, "step": 116740 }, { "epoch": 0.5012321509835742, "grad_norm": 2.544560432434082, "learning_rate": 5.009227081051715e-05, "loss": 0.35781326293945315, "step": 116750 }, { "epoch": 0.5012750830735941, "grad_norm": 0.03773084655404091, "learning_rate": 5.008795909039953e-05, "loss": 0.18783485889434814, "step": 116760 }, { "epoch": 0.5013180151636142, "grad_norm": 0.04964379593729973, "learning_rate": 5.008364737028191e-05, "loss": 0.14863022565841674, "step": 116770 }, { "epoch": 0.5013609472536342, "grad_norm": 0.17665715515613556, "learning_rate": 5.0079335650164285e-05, "loss": 0.25568058490753176, "step": 116780 }, { "epoch": 0.5014038793436542, "grad_norm": 2.9642350673675537, "learning_rate": 5.007502393004666e-05, "loss": 0.21143810749053954, "step": 116790 }, { "epoch": 0.5014468114336742, "grad_norm": 1.360346794128418, "learning_rate": 5.0070712209929026e-05, "loss": 0.12712502479553223, "step": 116800 }, { "epoch": 0.5014897435236942, "grad_norm": 1.1762696504592896, "learning_rate": 5.0066400489811404e-05, "loss": 0.3075927972793579, "step": 116810 }, { "epoch": 0.5015326756137142, "grad_norm": 0.009866449050605297, "learning_rate": 5.006208876969378e-05, "loss": 0.3283320665359497, "step": 116820 }, { "epoch": 0.5015756077037342, "grad_norm": 1.7027781009674072, "learning_rate": 5.005777704957616e-05, "loss": 0.09247349500656128, "step": 116830 }, { "epoch": 0.5016185397937543, "grad_norm": 1.6753307580947876, "learning_rate": 5.0053465329458536e-05, "loss": 0.05889575481414795, "step": 116840 }, { "epoch": 0.5016614718837742, "grad_norm": 3.2303237915039062, "learning_rate": 5.004915360934091e-05, "loss": 0.21741180419921874, "step": 116850 }, { "epoch": 0.5017044039737942, "grad_norm": 0.03603691607713699, "learning_rate": 5.004484188922329e-05, "loss": 0.013999029994010925, "step": 116860 }, { "epoch": 0.5017473360638143, "grad_norm": 3.4006402492523193, "learning_rate": 5.004053016910567e-05, "loss": 0.14681098461151124, "step": 116870 }, { "epoch": 0.5017902681538343, "grad_norm": 0.019151249900460243, "learning_rate": 5.003621844898804e-05, "loss": 0.052489012479782104, "step": 116880 }, { "epoch": 0.5018332002438542, "grad_norm": 0.04653005301952362, "learning_rate": 5.0031906728870415e-05, "loss": 0.2115783929824829, "step": 116890 }, { "epoch": 0.5018761323338743, "grad_norm": 0.05448725074529648, "learning_rate": 5.002759500875279e-05, "loss": 0.22183005809783934, "step": 116900 }, { "epoch": 0.5019190644238943, "grad_norm": 0.02877284586429596, "learning_rate": 5.002328328863517e-05, "loss": 0.29645206928253176, "step": 116910 }, { "epoch": 0.5019619965139143, "grad_norm": 0.5961487293243408, "learning_rate": 5.001897156851755e-05, "loss": 0.12437942028045654, "step": 116920 }, { "epoch": 0.5020049286039343, "grad_norm": 0.03502122312784195, "learning_rate": 5.0014659848399925e-05, "loss": 0.31128199100494386, "step": 116930 }, { "epoch": 0.5020478606939544, "grad_norm": 0.007991598919034004, "learning_rate": 5.00103481282823e-05, "loss": 0.36748878955841063, "step": 116940 }, { "epoch": 0.5020907927839743, "grad_norm": 1.8723878860473633, "learning_rate": 5.000603640816468e-05, "loss": 0.16076483726501464, "step": 116950 }, { "epoch": 0.5021337248739943, "grad_norm": 0.004238491412252188, "learning_rate": 5.000172468804705e-05, "loss": 0.2704092264175415, "step": 116960 }, { "epoch": 0.5021766569640144, "grad_norm": 0.09497665613889694, "learning_rate": 4.999741296792943e-05, "loss": 0.00860070288181305, "step": 116970 }, { "epoch": 0.5022195890540343, "grad_norm": 0.5038676857948303, "learning_rate": 4.9993101247811805e-05, "loss": 0.14574838876724244, "step": 116980 }, { "epoch": 0.5022625211440543, "grad_norm": 0.05492497235536575, "learning_rate": 4.998878952769418e-05, "loss": 0.31498830318450927, "step": 116990 }, { "epoch": 0.5023054532340744, "grad_norm": 0.40970903635025024, "learning_rate": 4.998447780757656e-05, "loss": 0.24021148681640625, "step": 117000 }, { "epoch": 0.5023054532340744, "eval_loss": 0.41057536005973816, "eval_runtime": 27.1683, "eval_samples_per_second": 3.681, "eval_steps_per_second": 3.681, "step": 117000 }, { "epoch": 0.5023483853240943, "grad_norm": 0.010372740216553211, "learning_rate": 4.998016608745894e-05, "loss": 0.24350297451019287, "step": 117010 }, { "epoch": 0.5023913174141144, "grad_norm": 0.09824706614017487, "learning_rate": 4.997585436734131e-05, "loss": 0.22170097827911378, "step": 117020 }, { "epoch": 0.5024342495041344, "grad_norm": 0.6410292387008667, "learning_rate": 4.9971542647223685e-05, "loss": 0.22455050945281982, "step": 117030 }, { "epoch": 0.5024771815941543, "grad_norm": 9.528117179870605, "learning_rate": 4.996723092710606e-05, "loss": 0.16655960083007812, "step": 117040 }, { "epoch": 0.5025201136841744, "grad_norm": 0.033820074051618576, "learning_rate": 4.996291920698844e-05, "loss": 0.2521155118942261, "step": 117050 }, { "epoch": 0.5025630457741944, "grad_norm": 0.9928510189056396, "learning_rate": 4.995860748687081e-05, "loss": 0.2452853202819824, "step": 117060 }, { "epoch": 0.5026059778642143, "grad_norm": 0.0058114430867135525, "learning_rate": 4.995429576675319e-05, "loss": 0.05519225597381592, "step": 117070 }, { "epoch": 0.5026489099542344, "grad_norm": 0.09580115973949432, "learning_rate": 4.9949984046635565e-05, "loss": 0.14227598905563354, "step": 117080 }, { "epoch": 0.5026918420442544, "grad_norm": 0.11800723522901535, "learning_rate": 4.994567232651794e-05, "loss": 0.0920566439628601, "step": 117090 }, { "epoch": 0.5027347741342744, "grad_norm": 4.133057594299316, "learning_rate": 4.994136060640032e-05, "loss": 0.22397398948669434, "step": 117100 }, { "epoch": 0.5027777062242944, "grad_norm": 0.14109517633914948, "learning_rate": 4.99370488862827e-05, "loss": 0.1774275779724121, "step": 117110 }, { "epoch": 0.5028206383143144, "grad_norm": 1.8681215047836304, "learning_rate": 4.9932737166165074e-05, "loss": 0.37117633819580076, "step": 117120 }, { "epoch": 0.5028635704043344, "grad_norm": 0.02314554899930954, "learning_rate": 4.992842544604745e-05, "loss": 0.0521328866481781, "step": 117130 }, { "epoch": 0.5029065024943544, "grad_norm": 0.8494148850440979, "learning_rate": 4.992411372592983e-05, "loss": 0.30360457897186277, "step": 117140 }, { "epoch": 0.5029494345843745, "grad_norm": 0.003587879240512848, "learning_rate": 4.99198020058122e-05, "loss": 0.06846604943275451, "step": 117150 }, { "epoch": 0.5029923666743944, "grad_norm": 24.003463745117188, "learning_rate": 4.991549028569458e-05, "loss": 0.16479328870773316, "step": 117160 }, { "epoch": 0.5030352987644144, "grad_norm": 15.54176139831543, "learning_rate": 4.9911178565576954e-05, "loss": 0.16109709739685057, "step": 117170 }, { "epoch": 0.5030782308544345, "grad_norm": 0.03265627101063728, "learning_rate": 4.990686684545933e-05, "loss": 0.12660821676254272, "step": 117180 }, { "epoch": 0.5031211629444544, "grad_norm": 0.37020328640937805, "learning_rate": 4.99025551253417e-05, "loss": 0.13424829244613648, "step": 117190 }, { "epoch": 0.5031640950344745, "grad_norm": 0.03218246251344681, "learning_rate": 4.989824340522408e-05, "loss": 0.2562605857849121, "step": 117200 }, { "epoch": 0.5032070271244945, "grad_norm": 0.22651711106300354, "learning_rate": 4.9893931685106457e-05, "loss": 0.13423599004745485, "step": 117210 }, { "epoch": 0.5032499592145144, "grad_norm": 1.5119297504425049, "learning_rate": 4.988961996498884e-05, "loss": 0.11704769134521484, "step": 117220 }, { "epoch": 0.5032928913045345, "grad_norm": 4.174872875213623, "learning_rate": 4.988530824487121e-05, "loss": 0.3308922290802002, "step": 117230 }, { "epoch": 0.5033358233945545, "grad_norm": 0.8240285515785217, "learning_rate": 4.988099652475359e-05, "loss": 0.2682643890380859, "step": 117240 }, { "epoch": 0.5033787554845744, "grad_norm": 0.32659050822257996, "learning_rate": 4.9876684804635966e-05, "loss": 0.33257737159729006, "step": 117250 }, { "epoch": 0.5034216875745945, "grad_norm": 0.6228490471839905, "learning_rate": 4.987237308451834e-05, "loss": 0.19845781326293946, "step": 117260 }, { "epoch": 0.5034646196646145, "grad_norm": 1.7231818437576294, "learning_rate": 4.9868061364400714e-05, "loss": 0.3892753839492798, "step": 117270 }, { "epoch": 0.5035075517546345, "grad_norm": 0.023025069385766983, "learning_rate": 4.986374964428309e-05, "loss": 0.16233171224594117, "step": 117280 }, { "epoch": 0.5035504838446545, "grad_norm": 0.018026838079094887, "learning_rate": 4.985943792416547e-05, "loss": 0.16559386253356934, "step": 117290 }, { "epoch": 0.5035934159346745, "grad_norm": 0.025355422869324684, "learning_rate": 4.9855126204047846e-05, "loss": 0.2693866014480591, "step": 117300 }, { "epoch": 0.5036363480246946, "grad_norm": 0.15633586049079895, "learning_rate": 4.9850814483930216e-05, "loss": 0.2386932611465454, "step": 117310 }, { "epoch": 0.5036792801147145, "grad_norm": 0.18697188794612885, "learning_rate": 4.9846502763812594e-05, "loss": 0.09443596601486207, "step": 117320 }, { "epoch": 0.5037222122047346, "grad_norm": 1.1669023036956787, "learning_rate": 4.984219104369498e-05, "loss": 0.41175551414489747, "step": 117330 }, { "epoch": 0.5037651442947546, "grad_norm": 2.8192169666290283, "learning_rate": 4.9837879323577355e-05, "loss": 0.16009420156478882, "step": 117340 }, { "epoch": 0.5038080763847745, "grad_norm": 1.1283766031265259, "learning_rate": 4.9833567603459726e-05, "loss": 0.31158037185668946, "step": 117350 }, { "epoch": 0.5038510084747946, "grad_norm": 2.5143420696258545, "learning_rate": 4.98292558833421e-05, "loss": 0.38306190967559817, "step": 117360 }, { "epoch": 0.5038939405648146, "grad_norm": 0.010936361737549305, "learning_rate": 4.982494416322448e-05, "loss": 0.1006517767906189, "step": 117370 }, { "epoch": 0.5039368726548346, "grad_norm": 1.180523157119751, "learning_rate": 4.982063244310686e-05, "loss": 0.14739004373550416, "step": 117380 }, { "epoch": 0.5039798047448546, "grad_norm": 1.164581537246704, "learning_rate": 4.981632072298923e-05, "loss": 0.2527391672134399, "step": 117390 }, { "epoch": 0.5040227368348746, "grad_norm": 0.006947158835828304, "learning_rate": 4.9812009002871606e-05, "loss": 0.1402176260948181, "step": 117400 }, { "epoch": 0.5040656689248946, "grad_norm": 0.014355388469994068, "learning_rate": 4.980769728275398e-05, "loss": 0.3870328426361084, "step": 117410 }, { "epoch": 0.5041086010149146, "grad_norm": 3.5852952003479004, "learning_rate": 4.980338556263636e-05, "loss": 0.21521148681640626, "step": 117420 }, { "epoch": 0.5041515331049347, "grad_norm": 1.0364006757736206, "learning_rate": 4.979907384251873e-05, "loss": 0.1514366865158081, "step": 117430 }, { "epoch": 0.5041944651949546, "grad_norm": 0.036176733672618866, "learning_rate": 4.9794762122401115e-05, "loss": 0.37396066188812255, "step": 117440 }, { "epoch": 0.5042373972849746, "grad_norm": 1.1058789491653442, "learning_rate": 4.979045040228349e-05, "loss": 0.30147347450256345, "step": 117450 }, { "epoch": 0.5042803293749947, "grad_norm": 0.6830658912658691, "learning_rate": 4.978613868216587e-05, "loss": 0.2363212823867798, "step": 117460 }, { "epoch": 0.5043232614650146, "grad_norm": 1.5272681713104248, "learning_rate": 4.978182696204824e-05, "loss": 0.14829931259155274, "step": 117470 }, { "epoch": 0.5043661935550346, "grad_norm": 0.2594696283340454, "learning_rate": 4.977751524193062e-05, "loss": 0.19520469903945922, "step": 117480 }, { "epoch": 0.5044091256450547, "grad_norm": 0.1625552475452423, "learning_rate": 4.9773203521812995e-05, "loss": 0.25294084548950196, "step": 117490 }, { "epoch": 0.5044520577350746, "grad_norm": 0.07057518512010574, "learning_rate": 4.976889180169537e-05, "loss": 0.2937706232070923, "step": 117500 }, { "epoch": 0.5044949898250947, "grad_norm": 2.936040163040161, "learning_rate": 4.976458008157775e-05, "loss": 0.3090040683746338, "step": 117510 }, { "epoch": 0.5045379219151147, "grad_norm": 0.03770938143134117, "learning_rate": 4.976026836146012e-05, "loss": 0.004101923853158951, "step": 117520 }, { "epoch": 0.5045808540051346, "grad_norm": 0.001742324442602694, "learning_rate": 4.97559566413425e-05, "loss": 0.21159942150115968, "step": 117530 }, { "epoch": 0.5046237860951547, "grad_norm": 3.898867607116699, "learning_rate": 4.9751644921224875e-05, "loss": 0.29637346267700193, "step": 117540 }, { "epoch": 0.5046667181851747, "grad_norm": 1.8550221920013428, "learning_rate": 4.974733320110725e-05, "loss": 0.3649588108062744, "step": 117550 }, { "epoch": 0.5047096502751947, "grad_norm": 2.0680346488952637, "learning_rate": 4.974302148098963e-05, "loss": 0.3192767143249512, "step": 117560 }, { "epoch": 0.5047525823652147, "grad_norm": 0.9255982637405396, "learning_rate": 4.973870976087201e-05, "loss": 0.31056628227233884, "step": 117570 }, { "epoch": 0.5047955144552347, "grad_norm": 1.4304261207580566, "learning_rate": 4.9734398040754384e-05, "loss": 0.30201447010040283, "step": 117580 }, { "epoch": 0.5048384465452547, "grad_norm": 0.030385779216885567, "learning_rate": 4.973008632063676e-05, "loss": 0.2219762086868286, "step": 117590 }, { "epoch": 0.5048813786352747, "grad_norm": 0.004098663106560707, "learning_rate": 4.972577460051913e-05, "loss": 0.14338167905807495, "step": 117600 }, { "epoch": 0.5049243107252948, "grad_norm": 0.2958376705646515, "learning_rate": 4.972146288040151e-05, "loss": 0.09196502566337586, "step": 117610 }, { "epoch": 0.5049672428153147, "grad_norm": 1.395702600479126, "learning_rate": 4.971715116028389e-05, "loss": 0.24472627639770508, "step": 117620 }, { "epoch": 0.5050101749053347, "grad_norm": 8.358769416809082, "learning_rate": 4.9712839440166264e-05, "loss": 0.28646397590637207, "step": 117630 }, { "epoch": 0.5050531069953548, "grad_norm": 0.11586333066225052, "learning_rate": 4.9708527720048635e-05, "loss": 0.26342833042144775, "step": 117640 }, { "epoch": 0.5050960390853747, "grad_norm": 0.17295314371585846, "learning_rate": 4.970421599993101e-05, "loss": 0.29183268547058105, "step": 117650 }, { "epoch": 0.5051389711753947, "grad_norm": 0.013368518091738224, "learning_rate": 4.969990427981339e-05, "loss": 0.16364799737930297, "step": 117660 }, { "epoch": 0.5051819032654148, "grad_norm": 1.6267229318618774, "learning_rate": 4.969559255969577e-05, "loss": 0.050443482398986814, "step": 117670 }, { "epoch": 0.5052248353554347, "grad_norm": 0.510202944278717, "learning_rate": 4.9691280839578144e-05, "loss": 0.122452712059021, "step": 117680 }, { "epoch": 0.5052677674454548, "grad_norm": 6.765218257904053, "learning_rate": 4.968696911946052e-05, "loss": 0.20582473278045654, "step": 117690 }, { "epoch": 0.5053106995354748, "grad_norm": 1.7956032752990723, "learning_rate": 4.96826573993429e-05, "loss": 0.18374691009521485, "step": 117700 }, { "epoch": 0.5053536316254947, "grad_norm": 2.8887641429901123, "learning_rate": 4.9678345679225276e-05, "loss": 0.3921834945678711, "step": 117710 }, { "epoch": 0.5053965637155148, "grad_norm": 0.5638045072555542, "learning_rate": 4.967403395910765e-05, "loss": 0.40248904228210447, "step": 117720 }, { "epoch": 0.5054394958055348, "grad_norm": 0.07434771209955215, "learning_rate": 4.9669722238990024e-05, "loss": 0.39189608097076417, "step": 117730 }, { "epoch": 0.5054824278955549, "grad_norm": 0.026078205555677414, "learning_rate": 4.96654105188724e-05, "loss": 0.1321608304977417, "step": 117740 }, { "epoch": 0.5055253599855748, "grad_norm": 0.03281189501285553, "learning_rate": 4.966109879875478e-05, "loss": 0.11588050127029419, "step": 117750 }, { "epoch": 0.5055682920755948, "grad_norm": 0.013210115022957325, "learning_rate": 4.965678707863715e-05, "loss": 0.12477741241455079, "step": 117760 }, { "epoch": 0.5056112241656149, "grad_norm": 3.3516409397125244, "learning_rate": 4.965247535851953e-05, "loss": 0.04821877479553223, "step": 117770 }, { "epoch": 0.5056541562556348, "grad_norm": 0.0025698766112327576, "learning_rate": 4.9648163638401904e-05, "loss": 0.1576859474182129, "step": 117780 }, { "epoch": 0.5056970883456549, "grad_norm": 0.23797300457954407, "learning_rate": 4.964385191828428e-05, "loss": 0.26349759101867676, "step": 117790 }, { "epoch": 0.5057400204356749, "grad_norm": 0.005504325032234192, "learning_rate": 4.963954019816666e-05, "loss": 0.06040756106376648, "step": 117800 }, { "epoch": 0.5057829525256948, "grad_norm": 0.004711473826318979, "learning_rate": 4.9635228478049036e-05, "loss": 0.16386046409606933, "step": 117810 }, { "epoch": 0.5058258846157149, "grad_norm": 0.019762564450502396, "learning_rate": 4.9630916757931414e-05, "loss": 0.1610340356826782, "step": 117820 }, { "epoch": 0.5058688167057349, "grad_norm": 0.07730504125356674, "learning_rate": 4.962660503781379e-05, "loss": 0.29164934158325195, "step": 117830 }, { "epoch": 0.5059117487957548, "grad_norm": 0.011327949352562428, "learning_rate": 4.962229331769617e-05, "loss": 0.10377846956253052, "step": 117840 }, { "epoch": 0.5059546808857749, "grad_norm": 0.00887768529355526, "learning_rate": 4.961798159757854e-05, "loss": 0.2585068941116333, "step": 117850 }, { "epoch": 0.5059976129757949, "grad_norm": 0.10135412216186523, "learning_rate": 4.9613669877460916e-05, "loss": 0.17118458747863768, "step": 117860 }, { "epoch": 0.5060405450658149, "grad_norm": 0.12075807899236679, "learning_rate": 4.9609358157343293e-05, "loss": 0.16684346199035643, "step": 117870 }, { "epoch": 0.5060834771558349, "grad_norm": 0.00484396331012249, "learning_rate": 4.960504643722567e-05, "loss": 0.17456436157226562, "step": 117880 }, { "epoch": 0.506126409245855, "grad_norm": 0.04089738428592682, "learning_rate": 4.960073471710804e-05, "loss": 0.21289370059967042, "step": 117890 }, { "epoch": 0.5061693413358749, "grad_norm": 0.032251860946416855, "learning_rate": 4.959642299699042e-05, "loss": 0.1787124514579773, "step": 117900 }, { "epoch": 0.5062122734258949, "grad_norm": 0.22456131875514984, "learning_rate": 4.9592111276872796e-05, "loss": 0.2632343530654907, "step": 117910 }, { "epoch": 0.506255205515915, "grad_norm": 4.649175643920898, "learning_rate": 4.958779955675517e-05, "loss": 0.2298372507095337, "step": 117920 }, { "epoch": 0.5062981376059349, "grad_norm": 0.06047212332487106, "learning_rate": 4.958348783663755e-05, "loss": 0.20818607807159423, "step": 117930 }, { "epoch": 0.5063410696959549, "grad_norm": 0.00024737833882682025, "learning_rate": 4.957917611651993e-05, "loss": 0.17201005220413207, "step": 117940 }, { "epoch": 0.506384001785975, "grad_norm": 3.057046413421631, "learning_rate": 4.9574864396402305e-05, "loss": 0.35734987258911133, "step": 117950 }, { "epoch": 0.5064269338759949, "grad_norm": 0.15315313637256622, "learning_rate": 4.957055267628468e-05, "loss": 0.09107869863510132, "step": 117960 }, { "epoch": 0.506469865966015, "grad_norm": 0.0059364596381783485, "learning_rate": 4.956624095616705e-05, "loss": 0.1393264651298523, "step": 117970 }, { "epoch": 0.506512798056035, "grad_norm": 13.115792274475098, "learning_rate": 4.956192923604943e-05, "loss": 0.3588475942611694, "step": 117980 }, { "epoch": 0.5065557301460549, "grad_norm": 0.02436887100338936, "learning_rate": 4.955761751593181e-05, "loss": 0.005839229002594948, "step": 117990 }, { "epoch": 0.506598662236075, "grad_norm": 0.24447183310985565, "learning_rate": 4.9553305795814185e-05, "loss": 0.3777074575424194, "step": 118000 }, { "epoch": 0.506598662236075, "eval_loss": 0.403300017118454, "eval_runtime": 27.2572, "eval_samples_per_second": 3.669, "eval_steps_per_second": 3.669, "step": 118000 }, { "epoch": 0.506641594326095, "grad_norm": 0.008575129322707653, "learning_rate": 4.9548994075696556e-05, "loss": 0.1603256344795227, "step": 118010 }, { "epoch": 0.5066845264161149, "grad_norm": 0.004096082877367735, "learning_rate": 4.954468235557893e-05, "loss": 0.31450352668762205, "step": 118020 }, { "epoch": 0.506727458506135, "grad_norm": 0.07715465873479843, "learning_rate": 4.954037063546131e-05, "loss": 0.11344932317733765, "step": 118030 }, { "epoch": 0.506770390596155, "grad_norm": 0.030346719548106194, "learning_rate": 4.9536058915343695e-05, "loss": 0.1910154104232788, "step": 118040 }, { "epoch": 0.506813322686175, "grad_norm": 2.2040464878082275, "learning_rate": 4.9531747195226065e-05, "loss": 0.12421665191650391, "step": 118050 }, { "epoch": 0.506856254776195, "grad_norm": 0.10436452925205231, "learning_rate": 4.952743547510844e-05, "loss": 0.18091598749160767, "step": 118060 }, { "epoch": 0.506899186866215, "grad_norm": 0.2847166061401367, "learning_rate": 4.952312375499082e-05, "loss": 0.3907632827758789, "step": 118070 }, { "epoch": 0.506942118956235, "grad_norm": 0.19107811152935028, "learning_rate": 4.95188120348732e-05, "loss": 0.16047124862670897, "step": 118080 }, { "epoch": 0.506985051046255, "grad_norm": 0.03470180928707123, "learning_rate": 4.951450031475557e-05, "loss": 0.09653306007385254, "step": 118090 }, { "epoch": 0.5070279831362751, "grad_norm": 0.01914292760193348, "learning_rate": 4.9510188594637945e-05, "loss": 0.1548427700996399, "step": 118100 }, { "epoch": 0.507070915226295, "grad_norm": 4.845828533172607, "learning_rate": 4.950587687452032e-05, "loss": 0.23194398880004882, "step": 118110 }, { "epoch": 0.507113847316315, "grad_norm": 0.6345359086990356, "learning_rate": 4.95015651544027e-05, "loss": 0.14568690061569214, "step": 118120 }, { "epoch": 0.5071567794063351, "grad_norm": 0.39725396037101746, "learning_rate": 4.949725343428507e-05, "loss": 0.199809730052948, "step": 118130 }, { "epoch": 0.507199711496355, "grad_norm": 0.0025165737606585026, "learning_rate": 4.949294171416745e-05, "loss": 0.19158271551132203, "step": 118140 }, { "epoch": 0.507242643586375, "grad_norm": 0.005341788753867149, "learning_rate": 4.948862999404983e-05, "loss": 0.11959943771362305, "step": 118150 }, { "epoch": 0.5072855756763951, "grad_norm": 0.16199614107608795, "learning_rate": 4.948431827393221e-05, "loss": 0.08734560012817383, "step": 118160 }, { "epoch": 0.5073285077664151, "grad_norm": 0.012718594633042812, "learning_rate": 4.948000655381458e-05, "loss": 0.19825737476348876, "step": 118170 }, { "epoch": 0.5073714398564351, "grad_norm": 0.008708270266652107, "learning_rate": 4.947569483369696e-05, "loss": 0.3302353620529175, "step": 118180 }, { "epoch": 0.5074143719464551, "grad_norm": 0.02464340068399906, "learning_rate": 4.9471383113579335e-05, "loss": 0.21059913635253907, "step": 118190 }, { "epoch": 0.5074573040364752, "grad_norm": 1.7081856727600098, "learning_rate": 4.946707139346171e-05, "loss": 0.3520483493804932, "step": 118200 }, { "epoch": 0.5075002361264951, "grad_norm": 2.243417739868164, "learning_rate": 4.946275967334409e-05, "loss": 0.10270900726318359, "step": 118210 }, { "epoch": 0.5075431682165151, "grad_norm": 0.32691332697868347, "learning_rate": 4.945844795322646e-05, "loss": 0.22291224002838134, "step": 118220 }, { "epoch": 0.5075861003065352, "grad_norm": 0.038161855190992355, "learning_rate": 4.945413623310884e-05, "loss": 0.340277099609375, "step": 118230 }, { "epoch": 0.5076290323965551, "grad_norm": 0.5222945213317871, "learning_rate": 4.9449824512991214e-05, "loss": 0.1506492018699646, "step": 118240 }, { "epoch": 0.5076719644865751, "grad_norm": 0.043260689824819565, "learning_rate": 4.944551279287359e-05, "loss": 0.10362763404846191, "step": 118250 }, { "epoch": 0.5077148965765952, "grad_norm": 0.003030435647815466, "learning_rate": 4.944120107275597e-05, "loss": 0.1216086745262146, "step": 118260 }, { "epoch": 0.5077578286666151, "grad_norm": 0.0064033265225589275, "learning_rate": 4.9436889352638347e-05, "loss": 0.0058049742132425305, "step": 118270 }, { "epoch": 0.5078007607566352, "grad_norm": 0.030727287754416466, "learning_rate": 4.9432577632520724e-05, "loss": 0.03154313564300537, "step": 118280 }, { "epoch": 0.5078436928466552, "grad_norm": 0.0021809639874845743, "learning_rate": 4.94282659124031e-05, "loss": 0.3597226142883301, "step": 118290 }, { "epoch": 0.5078866249366751, "grad_norm": 1.5678521394729614, "learning_rate": 4.942395419228547e-05, "loss": 0.09091430306434631, "step": 118300 }, { "epoch": 0.5079295570266952, "grad_norm": 0.012192213907837868, "learning_rate": 4.941964247216785e-05, "loss": 0.15386734008789063, "step": 118310 }, { "epoch": 0.5079724891167152, "grad_norm": 0.06318014115095139, "learning_rate": 4.9415330752050226e-05, "loss": 0.27775509357452394, "step": 118320 }, { "epoch": 0.5080154212067352, "grad_norm": 0.0260726660490036, "learning_rate": 4.9411019031932604e-05, "loss": 0.2071290969848633, "step": 118330 }, { "epoch": 0.5080583532967552, "grad_norm": 0.01756739243865013, "learning_rate": 4.9406707311814974e-05, "loss": 0.18228111267089844, "step": 118340 }, { "epoch": 0.5081012853867752, "grad_norm": 2.401149034500122, "learning_rate": 4.940239559169735e-05, "loss": 0.06631351113319398, "step": 118350 }, { "epoch": 0.5081442174767952, "grad_norm": 0.49134570360183716, "learning_rate": 4.939808387157973e-05, "loss": 0.3181680917739868, "step": 118360 }, { "epoch": 0.5081871495668152, "grad_norm": 0.1767713576555252, "learning_rate": 4.9393772151462106e-05, "loss": 0.3349648714065552, "step": 118370 }, { "epoch": 0.5082300816568353, "grad_norm": 0.002477077068760991, "learning_rate": 4.9389460431344484e-05, "loss": 0.3104588270187378, "step": 118380 }, { "epoch": 0.5082730137468552, "grad_norm": 1.2304719686508179, "learning_rate": 4.938514871122686e-05, "loss": 0.18686811923980712, "step": 118390 }, { "epoch": 0.5083159458368752, "grad_norm": 1.6199240684509277, "learning_rate": 4.938083699110924e-05, "loss": 0.6678519725799561, "step": 118400 }, { "epoch": 0.5083588779268953, "grad_norm": 0.006245411932468414, "learning_rate": 4.9376525270991616e-05, "loss": 0.13646771907806396, "step": 118410 }, { "epoch": 0.5084018100169152, "grad_norm": 1.9105803966522217, "learning_rate": 4.9372213550873986e-05, "loss": 0.15426579713821412, "step": 118420 }, { "epoch": 0.5084447421069352, "grad_norm": 0.1612144559621811, "learning_rate": 4.9367901830756364e-05, "loss": 0.06480223536491395, "step": 118430 }, { "epoch": 0.5084876741969553, "grad_norm": 0.00061203254153952, "learning_rate": 4.936359011063874e-05, "loss": 0.21237497329711913, "step": 118440 }, { "epoch": 0.5085306062869752, "grad_norm": 18.185203552246094, "learning_rate": 4.935927839052112e-05, "loss": 0.5285263538360596, "step": 118450 }, { "epoch": 0.5085735383769953, "grad_norm": 0.09754689037799835, "learning_rate": 4.935496667040349e-05, "loss": 0.3198472261428833, "step": 118460 }, { "epoch": 0.5086164704670153, "grad_norm": 0.00233909348025918, "learning_rate": 4.9350654950285866e-05, "loss": 0.12812756299972533, "step": 118470 }, { "epoch": 0.5086594025570352, "grad_norm": 0.07711761444807053, "learning_rate": 4.9346343230168244e-05, "loss": 0.30146214962005613, "step": 118480 }, { "epoch": 0.5087023346470553, "grad_norm": 1.0213673114776611, "learning_rate": 4.934203151005062e-05, "loss": 0.17265766859054565, "step": 118490 }, { "epoch": 0.5087452667370753, "grad_norm": 3.939648151397705, "learning_rate": 4.9337719789933e-05, "loss": 0.10305318832397461, "step": 118500 }, { "epoch": 0.5087881988270953, "grad_norm": 0.01822766847908497, "learning_rate": 4.9333408069815376e-05, "loss": 0.22157607078552247, "step": 118510 }, { "epoch": 0.5088311309171153, "grad_norm": 0.013578972779214382, "learning_rate": 4.932909634969775e-05, "loss": 0.03819190561771393, "step": 118520 }, { "epoch": 0.5088740630071353, "grad_norm": 3.911149263381958, "learning_rate": 4.932478462958013e-05, "loss": 0.10772855281829834, "step": 118530 }, { "epoch": 0.5089169950971553, "grad_norm": 0.11129100620746613, "learning_rate": 4.93204729094625e-05, "loss": 0.021247430145740508, "step": 118540 }, { "epoch": 0.5089599271871753, "grad_norm": 0.060637425631284714, "learning_rate": 4.931616118934488e-05, "loss": 0.19166574478149415, "step": 118550 }, { "epoch": 0.5090028592771954, "grad_norm": 0.006027820520102978, "learning_rate": 4.9311849469227256e-05, "loss": 0.09662819504737855, "step": 118560 }, { "epoch": 0.5090457913672153, "grad_norm": 0.03252384066581726, "learning_rate": 4.930753774910963e-05, "loss": 0.17281004190444946, "step": 118570 }, { "epoch": 0.5090887234572353, "grad_norm": 0.010324299335479736, "learning_rate": 4.930322602899201e-05, "loss": 0.22369747161865233, "step": 118580 }, { "epoch": 0.5091316555472554, "grad_norm": 0.9110094904899597, "learning_rate": 4.929891430887438e-05, "loss": 0.1740880250930786, "step": 118590 }, { "epoch": 0.5091745876372754, "grad_norm": 1.0386021137237549, "learning_rate": 4.929460258875676e-05, "loss": 0.19195722341537474, "step": 118600 }, { "epoch": 0.5092175197272953, "grad_norm": 1.992875337600708, "learning_rate": 4.9290290868639136e-05, "loss": 0.31609842777252195, "step": 118610 }, { "epoch": 0.5092604518173154, "grad_norm": 14.978772163391113, "learning_rate": 4.928597914852151e-05, "loss": 0.34784390926361086, "step": 118620 }, { "epoch": 0.5093033839073354, "grad_norm": 0.0040539707988500595, "learning_rate": 4.928166742840389e-05, "loss": 0.2548795223236084, "step": 118630 }, { "epoch": 0.5093463159973554, "grad_norm": 0.03759922459721565, "learning_rate": 4.927735570828627e-05, "loss": 0.13416372537612914, "step": 118640 }, { "epoch": 0.5093892480873754, "grad_norm": 1.1706840991973877, "learning_rate": 4.9273043988168645e-05, "loss": 0.28877017498016355, "step": 118650 }, { "epoch": 0.5094321801773954, "grad_norm": 0.03230566531419754, "learning_rate": 4.926873226805102e-05, "loss": 0.16305909156799317, "step": 118660 }, { "epoch": 0.5094751122674154, "grad_norm": 0.005947391968220472, "learning_rate": 4.926442054793339e-05, "loss": 0.1977899193763733, "step": 118670 }, { "epoch": 0.5095180443574354, "grad_norm": 0.025694016367197037, "learning_rate": 4.926010882781577e-05, "loss": 0.3352708339691162, "step": 118680 }, { "epoch": 0.5095609764474555, "grad_norm": 0.11599358171224594, "learning_rate": 4.925579710769815e-05, "loss": 0.1837789535522461, "step": 118690 }, { "epoch": 0.5096039085374754, "grad_norm": 0.008364609442651272, "learning_rate": 4.9251485387580525e-05, "loss": 0.25064802169799805, "step": 118700 }, { "epoch": 0.5096468406274954, "grad_norm": 1.6999173164367676, "learning_rate": 4.9247173667462895e-05, "loss": 0.16257811784744264, "step": 118710 }, { "epoch": 0.5096897727175155, "grad_norm": 1.4505658149719238, "learning_rate": 4.924286194734527e-05, "loss": 0.22113513946533203, "step": 118720 }, { "epoch": 0.5097327048075354, "grad_norm": 1.9441550970077515, "learning_rate": 4.923855022722765e-05, "loss": 0.28859150409698486, "step": 118730 }, { "epoch": 0.5097756368975555, "grad_norm": 0.007706368342041969, "learning_rate": 4.9234238507110034e-05, "loss": 0.1654113531112671, "step": 118740 }, { "epoch": 0.5098185689875755, "grad_norm": 0.0734504833817482, "learning_rate": 4.9229926786992405e-05, "loss": 0.21147520542144777, "step": 118750 }, { "epoch": 0.5098615010775954, "grad_norm": 0.9500401616096497, "learning_rate": 4.922561506687478e-05, "loss": 0.12635742425918578, "step": 118760 }, { "epoch": 0.5099044331676155, "grad_norm": 1.7449133396148682, "learning_rate": 4.922130334675716e-05, "loss": 0.2086487293243408, "step": 118770 }, { "epoch": 0.5099473652576355, "grad_norm": 0.0021773355547338724, "learning_rate": 4.921699162663954e-05, "loss": 0.4723085880279541, "step": 118780 }, { "epoch": 0.5099902973476554, "grad_norm": 0.013304928317666054, "learning_rate": 4.921267990652191e-05, "loss": 0.24113748073577881, "step": 118790 }, { "epoch": 0.5100332294376755, "grad_norm": 1.3635761737823486, "learning_rate": 4.9208368186404285e-05, "loss": 0.2260056495666504, "step": 118800 }, { "epoch": 0.5100761615276955, "grad_norm": 1.5315487384796143, "learning_rate": 4.920405646628666e-05, "loss": 0.19219486713409423, "step": 118810 }, { "epoch": 0.5101190936177155, "grad_norm": 1.452343225479126, "learning_rate": 4.919974474616904e-05, "loss": 0.24570739269256592, "step": 118820 }, { "epoch": 0.5101620257077355, "grad_norm": 0.014296891167759895, "learning_rate": 4.919543302605141e-05, "loss": 0.1545376181602478, "step": 118830 }, { "epoch": 0.5102049577977555, "grad_norm": 0.02429923228919506, "learning_rate": 4.919112130593379e-05, "loss": 0.30917627811431886, "step": 118840 }, { "epoch": 0.5102478898877755, "grad_norm": 2.3836729526519775, "learning_rate": 4.918680958581617e-05, "loss": 0.2750249862670898, "step": 118850 }, { "epoch": 0.5102908219777955, "grad_norm": 0.08615285903215408, "learning_rate": 4.918249786569855e-05, "loss": 0.0027405740693211555, "step": 118860 }, { "epoch": 0.5103337540678156, "grad_norm": 2.3817460536956787, "learning_rate": 4.917818614558092e-05, "loss": 0.17307817935943604, "step": 118870 }, { "epoch": 0.5103766861578355, "grad_norm": 0.009556726552546024, "learning_rate": 4.91738744254633e-05, "loss": 0.13647642135620117, "step": 118880 }, { "epoch": 0.5104196182478555, "grad_norm": 0.01113821566104889, "learning_rate": 4.9169562705345674e-05, "loss": 0.21420552730560302, "step": 118890 }, { "epoch": 0.5104625503378756, "grad_norm": 0.1573408842086792, "learning_rate": 4.916525098522805e-05, "loss": 0.08282997012138367, "step": 118900 }, { "epoch": 0.5105054824278955, "grad_norm": 0.009088823571801186, "learning_rate": 4.916093926511042e-05, "loss": 0.4955763339996338, "step": 118910 }, { "epoch": 0.5105484145179155, "grad_norm": 3.0080113410949707, "learning_rate": 4.91566275449928e-05, "loss": 0.18253889083862304, "step": 118920 }, { "epoch": 0.5105913466079356, "grad_norm": 0.040299657732248306, "learning_rate": 4.9152315824875177e-05, "loss": 0.13436200618743896, "step": 118930 }, { "epoch": 0.5106342786979555, "grad_norm": 2.8525309562683105, "learning_rate": 4.9148004104757554e-05, "loss": 0.15410553216934203, "step": 118940 }, { "epoch": 0.5106772107879756, "grad_norm": 2.061488151550293, "learning_rate": 4.914369238463993e-05, "loss": 0.2232752561569214, "step": 118950 }, { "epoch": 0.5107201428779956, "grad_norm": 0.007045496255159378, "learning_rate": 4.913938066452231e-05, "loss": 0.15050796270370484, "step": 118960 }, { "epoch": 0.5107630749680155, "grad_norm": 1.3432217836380005, "learning_rate": 4.9135068944404686e-05, "loss": 0.4139756202697754, "step": 118970 }, { "epoch": 0.5108060070580356, "grad_norm": 3.406526803970337, "learning_rate": 4.913075722428706e-05, "loss": 0.17657510042190552, "step": 118980 }, { "epoch": 0.5108489391480556, "grad_norm": 0.16910117864608765, "learning_rate": 4.912644550416944e-05, "loss": 0.30909993648529055, "step": 118990 }, { "epoch": 0.5108918712380756, "grad_norm": 0.06404928863048553, "learning_rate": 4.912213378405181e-05, "loss": 0.11059070825576782, "step": 119000 }, { "epoch": 0.5108918712380756, "eval_loss": 0.40124770998954773, "eval_runtime": 27.1649, "eval_samples_per_second": 3.681, "eval_steps_per_second": 3.681, "step": 119000 }, { "epoch": 0.5109348033280956, "grad_norm": 0.006009700242429972, "learning_rate": 4.911782206393419e-05, "loss": 0.35542969703674315, "step": 119010 }, { "epoch": 0.5109777354181156, "grad_norm": 4.224801540374756, "learning_rate": 4.9113510343816566e-05, "loss": 0.11748298406600952, "step": 119020 }, { "epoch": 0.5110206675081357, "grad_norm": 2.7295689582824707, "learning_rate": 4.910919862369894e-05, "loss": 0.2602656602859497, "step": 119030 }, { "epoch": 0.5110635995981556, "grad_norm": 0.4950346052646637, "learning_rate": 4.9104886903581314e-05, "loss": 0.16591581106185913, "step": 119040 }, { "epoch": 0.5111065316881757, "grad_norm": 0.09243180602788925, "learning_rate": 4.910057518346369e-05, "loss": 0.09594988822937012, "step": 119050 }, { "epoch": 0.5111494637781957, "grad_norm": 0.20909126102924347, "learning_rate": 4.909626346334607e-05, "loss": 0.21844382286071778, "step": 119060 }, { "epoch": 0.5111923958682156, "grad_norm": 0.017204085364937782, "learning_rate": 4.9091951743228446e-05, "loss": 0.035818496346473695, "step": 119070 }, { "epoch": 0.5112353279582357, "grad_norm": 0.016038501635193825, "learning_rate": 4.908764002311082e-05, "loss": 0.15340156555175782, "step": 119080 }, { "epoch": 0.5112782600482557, "grad_norm": 0.12767556309700012, "learning_rate": 4.90833283029932e-05, "loss": 0.1794809341430664, "step": 119090 }, { "epoch": 0.5113211921382756, "grad_norm": 1.6308916807174683, "learning_rate": 4.907901658287558e-05, "loss": 0.28574433326721194, "step": 119100 }, { "epoch": 0.5113641242282957, "grad_norm": 0.07135585695505142, "learning_rate": 4.9074704862757955e-05, "loss": 0.10939698219299317, "step": 119110 }, { "epoch": 0.5114070563183157, "grad_norm": 0.0036214394494891167, "learning_rate": 4.9070393142640326e-05, "loss": 0.02490311712026596, "step": 119120 }, { "epoch": 0.5114499884083357, "grad_norm": 0.07109871506690979, "learning_rate": 4.90660814225227e-05, "loss": 0.22565762996673583, "step": 119130 }, { "epoch": 0.5114929204983557, "grad_norm": 0.05331380292773247, "learning_rate": 4.906176970240508e-05, "loss": 0.05728686451911926, "step": 119140 }, { "epoch": 0.5115358525883758, "grad_norm": 2.1476547718048096, "learning_rate": 4.905745798228746e-05, "loss": 0.18775432109832763, "step": 119150 }, { "epoch": 0.5115787846783957, "grad_norm": 0.019761990755796432, "learning_rate": 4.905314626216983e-05, "loss": 0.07299734950065613, "step": 119160 }, { "epoch": 0.5116217167684157, "grad_norm": 6.146413803100586, "learning_rate": 4.9048834542052206e-05, "loss": 0.331719183921814, "step": 119170 }, { "epoch": 0.5116646488584358, "grad_norm": 0.03874233737587929, "learning_rate": 4.904452282193458e-05, "loss": 0.17403292655944824, "step": 119180 }, { "epoch": 0.5117075809484557, "grad_norm": 0.5045718550682068, "learning_rate": 4.904021110181696e-05, "loss": 0.14510732889175415, "step": 119190 }, { "epoch": 0.5117505130384757, "grad_norm": 1.7479007244110107, "learning_rate": 4.903589938169934e-05, "loss": 0.4036064147949219, "step": 119200 }, { "epoch": 0.5117934451284958, "grad_norm": 0.01966957002878189, "learning_rate": 4.9031587661581715e-05, "loss": 0.28596360683441163, "step": 119210 }, { "epoch": 0.5118363772185157, "grad_norm": 0.0067664021626114845, "learning_rate": 4.902727594146409e-05, "loss": 0.1794750690460205, "step": 119220 }, { "epoch": 0.5118793093085358, "grad_norm": 0.01229294016957283, "learning_rate": 4.902296422134647e-05, "loss": 0.26773154735565186, "step": 119230 }, { "epoch": 0.5119222413985558, "grad_norm": 1.3651846647262573, "learning_rate": 4.901865250122884e-05, "loss": 0.25625336170196533, "step": 119240 }, { "epoch": 0.5119651734885757, "grad_norm": 0.006748523097485304, "learning_rate": 4.901434078111122e-05, "loss": 0.21946640014648439, "step": 119250 }, { "epoch": 0.5120081055785958, "grad_norm": 7.10434627532959, "learning_rate": 4.9010029060993595e-05, "loss": 0.1346738815307617, "step": 119260 }, { "epoch": 0.5120510376686158, "grad_norm": 0.001597086084075272, "learning_rate": 4.900571734087597e-05, "loss": 0.3982970714569092, "step": 119270 }, { "epoch": 0.5120939697586357, "grad_norm": 6.853429794311523, "learning_rate": 4.900140562075834e-05, "loss": 0.37209444046020507, "step": 119280 }, { "epoch": 0.5121369018486558, "grad_norm": 0.052766378968954086, "learning_rate": 4.899709390064072e-05, "loss": 0.150531005859375, "step": 119290 }, { "epoch": 0.5121798339386758, "grad_norm": 1.664138913154602, "learning_rate": 4.89927821805231e-05, "loss": 0.2832683324813843, "step": 119300 }, { "epoch": 0.5122227660286958, "grad_norm": 0.017171716317534447, "learning_rate": 4.8988470460405475e-05, "loss": 0.047049257159233096, "step": 119310 }, { "epoch": 0.5122656981187158, "grad_norm": 3.7572007179260254, "learning_rate": 4.898415874028785e-05, "loss": 0.19532525539398193, "step": 119320 }, { "epoch": 0.5123086302087358, "grad_norm": 2.1995935440063477, "learning_rate": 4.897984702017023e-05, "loss": 0.29763474464416506, "step": 119330 }, { "epoch": 0.5123515622987558, "grad_norm": 0.06697755306959152, "learning_rate": 4.897553530005261e-05, "loss": 0.16395822763442994, "step": 119340 }, { "epoch": 0.5123944943887758, "grad_norm": 2.4659860134124756, "learning_rate": 4.8971223579934984e-05, "loss": 0.3857898712158203, "step": 119350 }, { "epoch": 0.5124374264787959, "grad_norm": 4.263999938964844, "learning_rate": 4.896691185981736e-05, "loss": 0.34842002391815186, "step": 119360 }, { "epoch": 0.5124803585688158, "grad_norm": 0.004874889738857746, "learning_rate": 4.896260013969973e-05, "loss": 0.2140347480773926, "step": 119370 }, { "epoch": 0.5125232906588358, "grad_norm": 0.015676314011216164, "learning_rate": 4.895828841958211e-05, "loss": 0.15982836484909058, "step": 119380 }, { "epoch": 0.5125662227488559, "grad_norm": 0.0671662986278534, "learning_rate": 4.895397669946449e-05, "loss": 0.37576262950897216, "step": 119390 }, { "epoch": 0.5126091548388758, "grad_norm": 0.5890264511108398, "learning_rate": 4.8949664979346864e-05, "loss": 0.2299262046813965, "step": 119400 }, { "epoch": 0.5126520869288959, "grad_norm": 0.02277068980038166, "learning_rate": 4.8945353259229235e-05, "loss": 0.3137866735458374, "step": 119410 }, { "epoch": 0.5126950190189159, "grad_norm": 1.7423510551452637, "learning_rate": 4.894104153911161e-05, "loss": 0.42569632530212403, "step": 119420 }, { "epoch": 0.5127379511089358, "grad_norm": 0.08310827612876892, "learning_rate": 4.893672981899399e-05, "loss": 0.09291516542434693, "step": 119430 }, { "epoch": 0.5127808831989559, "grad_norm": 0.0016216287622228265, "learning_rate": 4.8932418098876374e-05, "loss": 0.031064292788505553, "step": 119440 }, { "epoch": 0.5128238152889759, "grad_norm": 4.090322017669678, "learning_rate": 4.8928106378758744e-05, "loss": 0.43431825637817384, "step": 119450 }, { "epoch": 0.512866747378996, "grad_norm": 0.022212985903024673, "learning_rate": 4.892379465864112e-05, "loss": 0.07562644481658935, "step": 119460 }, { "epoch": 0.5129096794690159, "grad_norm": 1.6900726556777954, "learning_rate": 4.89194829385235e-05, "loss": 0.17154940366744995, "step": 119470 }, { "epoch": 0.5129526115590359, "grad_norm": 0.9105339646339417, "learning_rate": 4.8915171218405876e-05, "loss": 0.05909296274185181, "step": 119480 }, { "epoch": 0.512995543649056, "grad_norm": 4.826657295227051, "learning_rate": 4.891085949828825e-05, "loss": 0.21760139465332032, "step": 119490 }, { "epoch": 0.5130384757390759, "grad_norm": 0.024427048861980438, "learning_rate": 4.8906547778170624e-05, "loss": 0.21957478523254395, "step": 119500 }, { "epoch": 0.513081407829096, "grad_norm": 1.8900254964828491, "learning_rate": 4.8902236058053e-05, "loss": 0.24639198780059815, "step": 119510 }, { "epoch": 0.513124339919116, "grad_norm": 0.015116676688194275, "learning_rate": 4.889792433793538e-05, "loss": 0.07427915930747986, "step": 119520 }, { "epoch": 0.5131672720091359, "grad_norm": 1.8460825681686401, "learning_rate": 4.889361261781775e-05, "loss": 0.29505224227905275, "step": 119530 }, { "epoch": 0.513210204099156, "grad_norm": 0.009372652508318424, "learning_rate": 4.888930089770013e-05, "loss": 0.17351727485656737, "step": 119540 }, { "epoch": 0.513253136189176, "grad_norm": 9.46724796295166, "learning_rate": 4.888498917758251e-05, "loss": 0.2226557493209839, "step": 119550 }, { "epoch": 0.5132960682791959, "grad_norm": 0.013306156732141972, "learning_rate": 4.888067745746489e-05, "loss": 0.2522123336791992, "step": 119560 }, { "epoch": 0.513339000369216, "grad_norm": 2.3092589378356934, "learning_rate": 4.887636573734726e-05, "loss": 0.31043570041656493, "step": 119570 }, { "epoch": 0.513381932459236, "grad_norm": 1.2920349836349487, "learning_rate": 4.8872054017229636e-05, "loss": 0.26676900386810304, "step": 119580 }, { "epoch": 0.513424864549256, "grad_norm": 2.2212910652160645, "learning_rate": 4.8867742297112013e-05, "loss": 0.19261443614959717, "step": 119590 }, { "epoch": 0.513467796639276, "grad_norm": 0.011586138047277927, "learning_rate": 4.886343057699439e-05, "loss": 0.09315774440765381, "step": 119600 }, { "epoch": 0.513510728729296, "grad_norm": 4.446086883544922, "learning_rate": 4.885911885687676e-05, "loss": 0.11330181360244751, "step": 119610 }, { "epoch": 0.513553660819316, "grad_norm": 3.613748550415039, "learning_rate": 4.885480713675914e-05, "loss": 0.36722991466522215, "step": 119620 }, { "epoch": 0.513596592909336, "grad_norm": 0.012964880093932152, "learning_rate": 4.8850495416641516e-05, "loss": 0.21476566791534424, "step": 119630 }, { "epoch": 0.5136395249993561, "grad_norm": 0.008148579858243465, "learning_rate": 4.8846183696523893e-05, "loss": 0.3132286310195923, "step": 119640 }, { "epoch": 0.513682457089376, "grad_norm": 10.44156551361084, "learning_rate": 4.8841871976406264e-05, "loss": 0.3538014888763428, "step": 119650 }, { "epoch": 0.513725389179396, "grad_norm": 0.010352738201618195, "learning_rate": 4.883756025628865e-05, "loss": 0.09612705111503601, "step": 119660 }, { "epoch": 0.5137683212694161, "grad_norm": 0.028367938473820686, "learning_rate": 4.8833248536171025e-05, "loss": 0.12962062358856202, "step": 119670 }, { "epoch": 0.513811253359436, "grad_norm": 1.7438544034957886, "learning_rate": 4.88289368160534e-05, "loss": 0.09261025190353393, "step": 119680 }, { "epoch": 0.513854185449456, "grad_norm": 0.004251221194863319, "learning_rate": 4.882462509593578e-05, "loss": 0.169364333152771, "step": 119690 }, { "epoch": 0.5138971175394761, "grad_norm": 2.1003313064575195, "learning_rate": 4.882031337581815e-05, "loss": 0.26846721172332766, "step": 119700 }, { "epoch": 0.513940049629496, "grad_norm": 2.0436689853668213, "learning_rate": 4.881600165570053e-05, "loss": 0.25663022994995116, "step": 119710 }, { "epoch": 0.5139829817195161, "grad_norm": 0.13745620846748352, "learning_rate": 4.8811689935582905e-05, "loss": 0.13834238052368164, "step": 119720 }, { "epoch": 0.5140259138095361, "grad_norm": 1.7277699708938599, "learning_rate": 4.880737821546528e-05, "loss": 0.3672468662261963, "step": 119730 }, { "epoch": 0.514068845899556, "grad_norm": 1.5249468088150024, "learning_rate": 4.880306649534765e-05, "loss": 0.26362793445587157, "step": 119740 }, { "epoch": 0.5141117779895761, "grad_norm": 0.02339192107319832, "learning_rate": 4.879875477523003e-05, "loss": 0.12202959060668946, "step": 119750 }, { "epoch": 0.5141547100795961, "grad_norm": 0.012677889317274094, "learning_rate": 4.879444305511241e-05, "loss": 0.2084029197692871, "step": 119760 }, { "epoch": 0.514197642169616, "grad_norm": 0.8021616339683533, "learning_rate": 4.8790131334994785e-05, "loss": 0.11093568801879883, "step": 119770 }, { "epoch": 0.5142405742596361, "grad_norm": 0.0014070516917854548, "learning_rate": 4.878581961487716e-05, "loss": 0.35231709480285645, "step": 119780 }, { "epoch": 0.5142835063496561, "grad_norm": 5.40916109085083, "learning_rate": 4.878150789475954e-05, "loss": 0.19839794635772706, "step": 119790 }, { "epoch": 0.5143264384396761, "grad_norm": 0.730078935623169, "learning_rate": 4.877719617464192e-05, "loss": 0.050247853994369505, "step": 119800 }, { "epoch": 0.5143693705296961, "grad_norm": 0.39397040009498596, "learning_rate": 4.8772884454524295e-05, "loss": 0.06387539505958557, "step": 119810 }, { "epoch": 0.5144123026197162, "grad_norm": 0.00646138796582818, "learning_rate": 4.8768572734406665e-05, "loss": 0.19105199575424195, "step": 119820 }, { "epoch": 0.5144552347097361, "grad_norm": 0.008075837977230549, "learning_rate": 4.876426101428904e-05, "loss": 0.1513899803161621, "step": 119830 }, { "epoch": 0.5144981667997561, "grad_norm": 1.2805685997009277, "learning_rate": 4.875994929417142e-05, "loss": 0.18917036056518555, "step": 119840 }, { "epoch": 0.5145410988897762, "grad_norm": 0.06163414567708969, "learning_rate": 4.87556375740538e-05, "loss": 0.1583377242088318, "step": 119850 }, { "epoch": 0.5145840309797961, "grad_norm": 1.6436272859573364, "learning_rate": 4.875132585393617e-05, "loss": 0.17201461791992187, "step": 119860 }, { "epoch": 0.5146269630698161, "grad_norm": 1.2979214191436768, "learning_rate": 4.8747014133818545e-05, "loss": 0.2652168273925781, "step": 119870 }, { "epoch": 0.5146698951598362, "grad_norm": 2.1574161052703857, "learning_rate": 4.874270241370092e-05, "loss": 0.2809084415435791, "step": 119880 }, { "epoch": 0.5147128272498562, "grad_norm": 0.0017827115952968597, "learning_rate": 4.87383906935833e-05, "loss": 0.3289920806884766, "step": 119890 }, { "epoch": 0.5147557593398762, "grad_norm": 3.049043655395508, "learning_rate": 4.873407897346568e-05, "loss": 0.2237234592437744, "step": 119900 }, { "epoch": 0.5147986914298962, "grad_norm": 0.01375525165349245, "learning_rate": 4.8729767253348055e-05, "loss": 0.13331135511398315, "step": 119910 }, { "epoch": 0.5148416235199162, "grad_norm": 0.5105711817741394, "learning_rate": 4.872545553323043e-05, "loss": 0.22515881061553955, "step": 119920 }, { "epoch": 0.5148845556099362, "grad_norm": 0.6324796676635742, "learning_rate": 4.872114381311281e-05, "loss": 0.20807430744171143, "step": 119930 }, { "epoch": 0.5149274876999562, "grad_norm": 1.271294355392456, "learning_rate": 4.871683209299518e-05, "loss": 0.10505075454711914, "step": 119940 }, { "epoch": 0.5149704197899763, "grad_norm": 0.09919047355651855, "learning_rate": 4.871252037287756e-05, "loss": 0.07043569684028625, "step": 119950 }, { "epoch": 0.5150133518799962, "grad_norm": 0.12949281930923462, "learning_rate": 4.8708208652759935e-05, "loss": 0.19191750288009643, "step": 119960 }, { "epoch": 0.5150562839700162, "grad_norm": 0.09126322716474533, "learning_rate": 4.870389693264231e-05, "loss": 0.19513965845108033, "step": 119970 }, { "epoch": 0.5150992160600363, "grad_norm": 0.8619544506072998, "learning_rate": 4.869958521252468e-05, "loss": 0.36798958778381347, "step": 119980 }, { "epoch": 0.5151421481500562, "grad_norm": 0.046924423426389694, "learning_rate": 4.869527349240706e-05, "loss": 0.2555686473846436, "step": 119990 }, { "epoch": 0.5151850802400763, "grad_norm": 0.0034495419822633266, "learning_rate": 4.869096177228944e-05, "loss": 0.278632378578186, "step": 120000 }, { "epoch": 0.5151850802400763, "eval_loss": 0.4063738286495209, "eval_runtime": 27.1171, "eval_samples_per_second": 3.688, "eval_steps_per_second": 3.688, "step": 120000 }, { "epoch": 0.5152280123300963, "grad_norm": 0.049443356692790985, "learning_rate": 4.8686650052171814e-05, "loss": 0.4057520866394043, "step": 120010 }, { "epoch": 0.5152709444201162, "grad_norm": 0.0015346826985478401, "learning_rate": 4.868233833205419e-05, "loss": 0.10722736120224, "step": 120020 }, { "epoch": 0.5153138765101363, "grad_norm": 0.0473739318549633, "learning_rate": 4.867802661193657e-05, "loss": 0.14505916833877563, "step": 120030 }, { "epoch": 0.5153568086001563, "grad_norm": 0.023228967562317848, "learning_rate": 4.8673714891818946e-05, "loss": 0.2812461853027344, "step": 120040 }, { "epoch": 0.5153997406901762, "grad_norm": 0.006087969522923231, "learning_rate": 4.8669403171701324e-05, "loss": 0.2892911911010742, "step": 120050 }, { "epoch": 0.5154426727801963, "grad_norm": 0.00686612306162715, "learning_rate": 4.86650914515837e-05, "loss": 0.2778724431991577, "step": 120060 }, { "epoch": 0.5154856048702163, "grad_norm": 0.6555600166320801, "learning_rate": 4.866077973146607e-05, "loss": 0.051532381772994997, "step": 120070 }, { "epoch": 0.5155285369602363, "grad_norm": 6.1181464195251465, "learning_rate": 4.865646801134845e-05, "loss": 0.46932668685913087, "step": 120080 }, { "epoch": 0.5155714690502563, "grad_norm": 0.0027987684588879347, "learning_rate": 4.8652156291230826e-05, "loss": 0.09915638566017151, "step": 120090 }, { "epoch": 0.5156144011402763, "grad_norm": 1.746132493019104, "learning_rate": 4.8647844571113204e-05, "loss": 0.19194701910018921, "step": 120100 }, { "epoch": 0.5156573332302963, "grad_norm": 0.0009860859718173742, "learning_rate": 4.8643532850995574e-05, "loss": 0.03388981223106384, "step": 120110 }, { "epoch": 0.5157002653203163, "grad_norm": 0.7049172520637512, "learning_rate": 4.863922113087795e-05, "loss": 0.278376030921936, "step": 120120 }, { "epoch": 0.5157431974103364, "grad_norm": 0.0033822215627878904, "learning_rate": 4.863490941076033e-05, "loss": 0.134699809551239, "step": 120130 }, { "epoch": 0.5157861295003563, "grad_norm": 0.8424586653709412, "learning_rate": 4.8630597690642706e-05, "loss": 0.33212087154388426, "step": 120140 }, { "epoch": 0.5158290615903763, "grad_norm": 2.1320204734802246, "learning_rate": 4.8626285970525084e-05, "loss": 0.26734697818756104, "step": 120150 }, { "epoch": 0.5158719936803964, "grad_norm": 1.3596432209014893, "learning_rate": 4.862197425040746e-05, "loss": 0.10061935186386109, "step": 120160 }, { "epoch": 0.5159149257704163, "grad_norm": 1.0820353031158447, "learning_rate": 4.861766253028984e-05, "loss": 0.17431243658065795, "step": 120170 }, { "epoch": 0.5159578578604364, "grad_norm": 0.0008929084287956357, "learning_rate": 4.8613350810172216e-05, "loss": 0.14325375556945802, "step": 120180 }, { "epoch": 0.5160007899504564, "grad_norm": 2.565387010574341, "learning_rate": 4.8609039090054586e-05, "loss": 0.33260061740875246, "step": 120190 }, { "epoch": 0.5160437220404763, "grad_norm": 0.011970599181950092, "learning_rate": 4.8604727369936964e-05, "loss": 0.18246814012527465, "step": 120200 }, { "epoch": 0.5160866541304964, "grad_norm": 0.02352987229824066, "learning_rate": 4.860041564981934e-05, "loss": 0.21905741691589356, "step": 120210 }, { "epoch": 0.5161295862205164, "grad_norm": 0.03168490156531334, "learning_rate": 4.859610392970172e-05, "loss": 0.2257610082626343, "step": 120220 }, { "epoch": 0.5161725183105363, "grad_norm": 0.12218201160430908, "learning_rate": 4.859179220958409e-05, "loss": 0.32497642040252683, "step": 120230 }, { "epoch": 0.5162154504005564, "grad_norm": 4.0117034912109375, "learning_rate": 4.8587480489466466e-05, "loss": 0.00710456520318985, "step": 120240 }, { "epoch": 0.5162583824905764, "grad_norm": 0.032607097178697586, "learning_rate": 4.8583168769348844e-05, "loss": 0.28474650382995603, "step": 120250 }, { "epoch": 0.5163013145805964, "grad_norm": 0.1141975000500679, "learning_rate": 4.857885704923123e-05, "loss": 0.18312805891036987, "step": 120260 }, { "epoch": 0.5163442466706164, "grad_norm": 0.008268962614238262, "learning_rate": 4.85745453291136e-05, "loss": 0.18339786529541016, "step": 120270 }, { "epoch": 0.5163871787606364, "grad_norm": 5.46415376663208, "learning_rate": 4.8570233608995976e-05, "loss": 0.37812774181365966, "step": 120280 }, { "epoch": 0.5164301108506564, "grad_norm": 4.220892429351807, "learning_rate": 4.856592188887835e-05, "loss": 0.24817111492156982, "step": 120290 }, { "epoch": 0.5164730429406764, "grad_norm": 2.257537841796875, "learning_rate": 4.856161016876073e-05, "loss": 0.31221191883087157, "step": 120300 }, { "epoch": 0.5165159750306965, "grad_norm": 0.1420465111732483, "learning_rate": 4.85572984486431e-05, "loss": 0.08064374923706055, "step": 120310 }, { "epoch": 0.5165589071207165, "grad_norm": 2.9821970462799072, "learning_rate": 4.855298672852548e-05, "loss": 0.33227217197418213, "step": 120320 }, { "epoch": 0.5166018392107364, "grad_norm": 0.23036722838878632, "learning_rate": 4.8548675008407856e-05, "loss": 0.15705791711807252, "step": 120330 }, { "epoch": 0.5166447713007565, "grad_norm": 0.01970132440328598, "learning_rate": 4.854436328829023e-05, "loss": 0.3197164058685303, "step": 120340 }, { "epoch": 0.5166877033907765, "grad_norm": 1.0530132055282593, "learning_rate": 4.8540051568172603e-05, "loss": 0.10031181573867798, "step": 120350 }, { "epoch": 0.5167306354807965, "grad_norm": 0.05745401605963707, "learning_rate": 4.853573984805498e-05, "loss": 0.15597434043884278, "step": 120360 }, { "epoch": 0.5167735675708165, "grad_norm": 0.003141403431072831, "learning_rate": 4.8531428127937365e-05, "loss": 0.035469675064086915, "step": 120370 }, { "epoch": 0.5168164996608365, "grad_norm": 2.398857831954956, "learning_rate": 4.852711640781974e-05, "loss": 0.32916827201843263, "step": 120380 }, { "epoch": 0.5168594317508565, "grad_norm": 3.834961175918579, "learning_rate": 4.852280468770212e-05, "loss": 0.08573095798492432, "step": 120390 }, { "epoch": 0.5169023638408765, "grad_norm": 0.13391876220703125, "learning_rate": 4.851849296758449e-05, "loss": 0.15842015743255616, "step": 120400 }, { "epoch": 0.5169452959308966, "grad_norm": 0.6579734086990356, "learning_rate": 4.851418124746687e-05, "loss": 0.16875349283218383, "step": 120410 }, { "epoch": 0.5169882280209165, "grad_norm": 7.994966506958008, "learning_rate": 4.8509869527349245e-05, "loss": 0.18427584171295167, "step": 120420 }, { "epoch": 0.5170311601109365, "grad_norm": 0.0009446104522794485, "learning_rate": 4.850555780723162e-05, "loss": 0.18941471576690674, "step": 120430 }, { "epoch": 0.5170740922009566, "grad_norm": 0.09993302077054977, "learning_rate": 4.850124608711399e-05, "loss": 0.14466171264648436, "step": 120440 }, { "epoch": 0.5171170242909765, "grad_norm": 0.06350284069776535, "learning_rate": 4.849693436699637e-05, "loss": 0.3882965326309204, "step": 120450 }, { "epoch": 0.5171599563809965, "grad_norm": 0.24465759098529816, "learning_rate": 4.849262264687875e-05, "loss": 0.2968221426010132, "step": 120460 }, { "epoch": 0.5172028884710166, "grad_norm": 0.36420390009880066, "learning_rate": 4.8488310926761125e-05, "loss": 0.20713613033294678, "step": 120470 }, { "epoch": 0.5172458205610365, "grad_norm": 1.466484546661377, "learning_rate": 4.84839992066435e-05, "loss": 0.1342691659927368, "step": 120480 }, { "epoch": 0.5172887526510566, "grad_norm": 4.7978620529174805, "learning_rate": 4.847968748652588e-05, "loss": 0.3547311305999756, "step": 120490 }, { "epoch": 0.5173316847410766, "grad_norm": 0.018700161948800087, "learning_rate": 4.847537576640826e-05, "loss": 0.12048419713973998, "step": 120500 }, { "epoch": 0.5173746168310965, "grad_norm": 0.02476024068892002, "learning_rate": 4.8471064046290634e-05, "loss": 0.10956237316131592, "step": 120510 }, { "epoch": 0.5174175489211166, "grad_norm": 0.3054960370063782, "learning_rate": 4.8466752326173005e-05, "loss": 0.014632061123847961, "step": 120520 }, { "epoch": 0.5174604810111366, "grad_norm": 0.015139803290367126, "learning_rate": 4.846244060605538e-05, "loss": 0.05507909655570984, "step": 120530 }, { "epoch": 0.5175034131011566, "grad_norm": 0.054174065589904785, "learning_rate": 4.845812888593776e-05, "loss": 0.1126629114151001, "step": 120540 }, { "epoch": 0.5175463451911766, "grad_norm": 0.22714291512966156, "learning_rate": 4.845381716582014e-05, "loss": 0.40899171829223635, "step": 120550 }, { "epoch": 0.5175892772811966, "grad_norm": 0.03834908828139305, "learning_rate": 4.844950544570251e-05, "loss": 0.2631853103637695, "step": 120560 }, { "epoch": 0.5176322093712166, "grad_norm": 0.007033985573798418, "learning_rate": 4.8445193725584885e-05, "loss": 0.2556145429611206, "step": 120570 }, { "epoch": 0.5176751414612366, "grad_norm": 2.763967990875244, "learning_rate": 4.844088200546726e-05, "loss": 0.37081136703491213, "step": 120580 }, { "epoch": 0.5177180735512567, "grad_norm": 0.0014255971182137728, "learning_rate": 4.843657028534964e-05, "loss": 0.14834992885589598, "step": 120590 }, { "epoch": 0.5177610056412766, "grad_norm": 0.9102689027786255, "learning_rate": 4.843225856523202e-05, "loss": 0.23219048976898193, "step": 120600 }, { "epoch": 0.5178039377312966, "grad_norm": 0.015008285641670227, "learning_rate": 4.8427946845114394e-05, "loss": 0.1743320941925049, "step": 120610 }, { "epoch": 0.5178468698213167, "grad_norm": 1.1973248720169067, "learning_rate": 4.842363512499677e-05, "loss": 0.27267420291900635, "step": 120620 }, { "epoch": 0.5178898019113366, "grad_norm": 0.023041389882564545, "learning_rate": 4.841932340487915e-05, "loss": 0.18333592414855956, "step": 120630 }, { "epoch": 0.5179327340013566, "grad_norm": 0.01211979053914547, "learning_rate": 4.841501168476152e-05, "loss": 0.10341588258743287, "step": 120640 }, { "epoch": 0.5179756660913767, "grad_norm": 5.778181552886963, "learning_rate": 4.84106999646439e-05, "loss": 0.1535136342048645, "step": 120650 }, { "epoch": 0.5180185981813966, "grad_norm": 0.2170112133026123, "learning_rate": 4.8406388244526274e-05, "loss": 0.2336575508117676, "step": 120660 }, { "epoch": 0.5180615302714167, "grad_norm": 0.015231816098093987, "learning_rate": 4.840207652440865e-05, "loss": 0.2085479497909546, "step": 120670 }, { "epoch": 0.5181044623614367, "grad_norm": 0.02248145081102848, "learning_rate": 4.839776480429102e-05, "loss": 0.06460311412811279, "step": 120680 }, { "epoch": 0.5181473944514566, "grad_norm": 5.987329006195068, "learning_rate": 4.83934530841734e-05, "loss": 0.30335426330566406, "step": 120690 }, { "epoch": 0.5181903265414767, "grad_norm": 3.8316006660461426, "learning_rate": 4.8389141364055777e-05, "loss": 0.2360456943511963, "step": 120700 }, { "epoch": 0.5182332586314967, "grad_norm": 0.05581430345773697, "learning_rate": 4.8384829643938154e-05, "loss": 0.07034187912940978, "step": 120710 }, { "epoch": 0.5182761907215166, "grad_norm": 1.4331754446029663, "learning_rate": 4.838051792382053e-05, "loss": 0.2972614288330078, "step": 120720 }, { "epoch": 0.5183191228115367, "grad_norm": 0.013728511519730091, "learning_rate": 4.837620620370291e-05, "loss": 0.06504011750221253, "step": 120730 }, { "epoch": 0.5183620549015567, "grad_norm": 0.8004436492919922, "learning_rate": 4.8371894483585286e-05, "loss": 0.2647553443908691, "step": 120740 }, { "epoch": 0.5184049869915768, "grad_norm": 0.23927471041679382, "learning_rate": 4.836758276346766e-05, "loss": 0.17371283769607543, "step": 120750 }, { "epoch": 0.5184479190815967, "grad_norm": 1.3092291355133057, "learning_rate": 4.836327104335004e-05, "loss": 0.20586049556732178, "step": 120760 }, { "epoch": 0.5184908511716168, "grad_norm": 0.6740944981575012, "learning_rate": 4.835895932323241e-05, "loss": 0.2501313924789429, "step": 120770 }, { "epoch": 0.5185337832616368, "grad_norm": 0.01920218952000141, "learning_rate": 4.835464760311479e-05, "loss": 0.38378133773803713, "step": 120780 }, { "epoch": 0.5185767153516567, "grad_norm": 1.44028639793396, "learning_rate": 4.8350335882997166e-05, "loss": 0.3875690698623657, "step": 120790 }, { "epoch": 0.5186196474416768, "grad_norm": 1.6943719387054443, "learning_rate": 4.834602416287954e-05, "loss": 0.21529688835144042, "step": 120800 }, { "epoch": 0.5186625795316968, "grad_norm": 1.4529924392700195, "learning_rate": 4.8341712442761914e-05, "loss": 0.13335733413696288, "step": 120810 }, { "epoch": 0.5187055116217167, "grad_norm": 0.08313264697790146, "learning_rate": 4.833740072264429e-05, "loss": 0.2530683517456055, "step": 120820 }, { "epoch": 0.5187484437117368, "grad_norm": 0.0469275526702404, "learning_rate": 4.833308900252667e-05, "loss": 0.09530457854270935, "step": 120830 }, { "epoch": 0.5187913758017568, "grad_norm": 0.0735456719994545, "learning_rate": 4.8328777282409046e-05, "loss": 0.28104963302612307, "step": 120840 }, { "epoch": 0.5188343078917768, "grad_norm": 8.66446590423584, "learning_rate": 4.832446556229142e-05, "loss": 0.2639842748641968, "step": 120850 }, { "epoch": 0.5188772399817968, "grad_norm": 1.215241551399231, "learning_rate": 4.83201538421738e-05, "loss": 0.15650783777236937, "step": 120860 }, { "epoch": 0.5189201720718168, "grad_norm": 1.2033604383468628, "learning_rate": 4.831584212205618e-05, "loss": 0.2996173620223999, "step": 120870 }, { "epoch": 0.5189631041618368, "grad_norm": 0.020583370700478554, "learning_rate": 4.8311530401938555e-05, "loss": 0.05468282699584961, "step": 120880 }, { "epoch": 0.5190060362518568, "grad_norm": 0.12773597240447998, "learning_rate": 4.8307218681820926e-05, "loss": 0.2769580602645874, "step": 120890 }, { "epoch": 0.5190489683418769, "grad_norm": 0.0935334786772728, "learning_rate": 4.83029069617033e-05, "loss": 0.03761700987815857, "step": 120900 }, { "epoch": 0.5190919004318968, "grad_norm": 0.11925974488258362, "learning_rate": 4.829859524158568e-05, "loss": 0.26186795234680177, "step": 120910 }, { "epoch": 0.5191348325219168, "grad_norm": 7.694597244262695, "learning_rate": 4.829428352146806e-05, "loss": 0.32566494941711427, "step": 120920 }, { "epoch": 0.5191777646119369, "grad_norm": 0.3018726408481598, "learning_rate": 4.828997180135043e-05, "loss": 0.1309618830680847, "step": 120930 }, { "epoch": 0.5192206967019568, "grad_norm": 0.0553029403090477, "learning_rate": 4.8285660081232806e-05, "loss": 0.09666863679885865, "step": 120940 }, { "epoch": 0.5192636287919769, "grad_norm": 3.9531891345977783, "learning_rate": 4.828134836111518e-05, "loss": 0.12105257511138916, "step": 120950 }, { "epoch": 0.5193065608819969, "grad_norm": 0.8712236285209656, "learning_rate": 4.827703664099757e-05, "loss": 0.1712932825088501, "step": 120960 }, { "epoch": 0.5193494929720168, "grad_norm": 0.032162413001060486, "learning_rate": 4.827272492087994e-05, "loss": 0.1988675117492676, "step": 120970 }, { "epoch": 0.5193924250620369, "grad_norm": 5.646240234375, "learning_rate": 4.8268413200762315e-05, "loss": 0.21210241317749023, "step": 120980 }, { "epoch": 0.5194353571520569, "grad_norm": 0.017885640263557434, "learning_rate": 4.826410148064469e-05, "loss": 0.0828397035598755, "step": 120990 }, { "epoch": 0.5194782892420768, "grad_norm": 0.05320185795426369, "learning_rate": 4.825978976052707e-05, "loss": 0.22818078994750976, "step": 121000 }, { "epoch": 0.5194782892420768, "eval_loss": 0.4079383909702301, "eval_runtime": 27.187, "eval_samples_per_second": 3.678, "eval_steps_per_second": 3.678, "step": 121000 }, { "epoch": 0.5195212213320969, "grad_norm": 0.0048073879443109035, "learning_rate": 4.825547804040944e-05, "loss": 0.33350112438201907, "step": 121010 }, { "epoch": 0.5195641534221169, "grad_norm": 2.622666120529175, "learning_rate": 4.825116632029182e-05, "loss": 0.2112447738647461, "step": 121020 }, { "epoch": 0.5196070855121369, "grad_norm": 0.03589218854904175, "learning_rate": 4.8246854600174195e-05, "loss": 0.2726699113845825, "step": 121030 }, { "epoch": 0.5196500176021569, "grad_norm": 0.6701458692550659, "learning_rate": 4.824254288005657e-05, "loss": 0.09031559228897094, "step": 121040 }, { "epoch": 0.5196929496921769, "grad_norm": 0.08984647691249847, "learning_rate": 4.823823115993894e-05, "loss": 0.043091869354248045, "step": 121050 }, { "epoch": 0.5197358817821969, "grad_norm": 0.08775527030229568, "learning_rate": 4.823391943982132e-05, "loss": 0.32499117851257325, "step": 121060 }, { "epoch": 0.5197788138722169, "grad_norm": 0.07303040474653244, "learning_rate": 4.8229607719703704e-05, "loss": 0.24889826774597168, "step": 121070 }, { "epoch": 0.519821745962237, "grad_norm": 0.01556952204555273, "learning_rate": 4.822529599958608e-05, "loss": 0.30057711601257325, "step": 121080 }, { "epoch": 0.5198646780522569, "grad_norm": 1.1740511655807495, "learning_rate": 4.822098427946845e-05, "loss": 0.2835972309112549, "step": 121090 }, { "epoch": 0.5199076101422769, "grad_norm": 3.6784210205078125, "learning_rate": 4.821667255935083e-05, "loss": 0.15858445167541504, "step": 121100 }, { "epoch": 0.519950542232297, "grad_norm": 0.025471484288573265, "learning_rate": 4.821236083923321e-05, "loss": 0.09517346024513244, "step": 121110 }, { "epoch": 0.5199934743223169, "grad_norm": 0.07156090438365936, "learning_rate": 4.8208049119115584e-05, "loss": 0.1761907935142517, "step": 121120 }, { "epoch": 0.520036406412337, "grad_norm": 3.4601879119873047, "learning_rate": 4.820373739899796e-05, "loss": 0.15483657121658326, "step": 121130 }, { "epoch": 0.520079338502357, "grad_norm": 1.6287287473678589, "learning_rate": 4.819942567888033e-05, "loss": 0.3838613271713257, "step": 121140 }, { "epoch": 0.5201222705923769, "grad_norm": 2.1926848888397217, "learning_rate": 4.819511395876271e-05, "loss": 0.3708859920501709, "step": 121150 }, { "epoch": 0.520165202682397, "grad_norm": 0.10348998755216599, "learning_rate": 4.819080223864509e-05, "loss": 0.13804128170013427, "step": 121160 }, { "epoch": 0.520208134772417, "grad_norm": 0.029189372435212135, "learning_rate": 4.8186490518527464e-05, "loss": 0.1679968476295471, "step": 121170 }, { "epoch": 0.520251066862437, "grad_norm": 0.614188551902771, "learning_rate": 4.818217879840984e-05, "loss": 0.17322077751159667, "step": 121180 }, { "epoch": 0.520293998952457, "grad_norm": 1.1057333946228027, "learning_rate": 4.817786707829222e-05, "loss": 0.20551702976226807, "step": 121190 }, { "epoch": 0.520336931042477, "grad_norm": 0.011034536175429821, "learning_rate": 4.8173555358174596e-05, "loss": 0.08477430939674377, "step": 121200 }, { "epoch": 0.5203798631324971, "grad_norm": 0.0020858512725681067, "learning_rate": 4.8169243638056974e-05, "loss": 0.19864094257354736, "step": 121210 }, { "epoch": 0.520422795222517, "grad_norm": 0.018683254718780518, "learning_rate": 4.8164931917939344e-05, "loss": 0.2104574680328369, "step": 121220 }, { "epoch": 0.520465727312537, "grad_norm": 1.2795490026474, "learning_rate": 4.816062019782172e-05, "loss": 0.19712650775909424, "step": 121230 }, { "epoch": 0.5205086594025571, "grad_norm": 6.894408226013184, "learning_rate": 4.81563084777041e-05, "loss": 0.28277971744537356, "step": 121240 }, { "epoch": 0.520551591492577, "grad_norm": 1.9820345640182495, "learning_rate": 4.8151996757586476e-05, "loss": 0.15565208196640015, "step": 121250 }, { "epoch": 0.5205945235825971, "grad_norm": 3.0856571197509766, "learning_rate": 4.814768503746885e-05, "loss": 0.17781033515930175, "step": 121260 }, { "epoch": 0.5206374556726171, "grad_norm": 0.05069053918123245, "learning_rate": 4.8143373317351224e-05, "loss": 0.195988667011261, "step": 121270 }, { "epoch": 0.520680387762637, "grad_norm": 0.011564699932932854, "learning_rate": 4.81390615972336e-05, "loss": 0.33460540771484376, "step": 121280 }, { "epoch": 0.5207233198526571, "grad_norm": 0.003371889004483819, "learning_rate": 4.813474987711598e-05, "loss": 0.1601473569869995, "step": 121290 }, { "epoch": 0.5207662519426771, "grad_norm": 0.013386544771492481, "learning_rate": 4.8130438156998356e-05, "loss": 0.2145592212677002, "step": 121300 }, { "epoch": 0.520809184032697, "grad_norm": 4.002100944519043, "learning_rate": 4.8126126436880733e-05, "loss": 0.3833177089691162, "step": 121310 }, { "epoch": 0.5208521161227171, "grad_norm": 5.617799758911133, "learning_rate": 4.812181471676311e-05, "loss": 0.32094109058380127, "step": 121320 }, { "epoch": 0.5208950482127371, "grad_norm": 3.256422519683838, "learning_rate": 4.811750299664549e-05, "loss": 0.16867411136627197, "step": 121330 }, { "epoch": 0.5209379803027571, "grad_norm": 1.2565315961837769, "learning_rate": 4.811319127652786e-05, "loss": 0.3385310649871826, "step": 121340 }, { "epoch": 0.5209809123927771, "grad_norm": 1.6213438510894775, "learning_rate": 4.8108879556410236e-05, "loss": 0.3337943315505981, "step": 121350 }, { "epoch": 0.5210238444827971, "grad_norm": 5.023358345031738, "learning_rate": 4.8104567836292613e-05, "loss": 0.09535663723945617, "step": 121360 }, { "epoch": 0.5210667765728171, "grad_norm": 1.1963624954223633, "learning_rate": 4.810025611617499e-05, "loss": 0.32074360847473143, "step": 121370 }, { "epoch": 0.5211097086628371, "grad_norm": 2.154831647872925, "learning_rate": 4.809594439605736e-05, "loss": 0.42052149772644043, "step": 121380 }, { "epoch": 0.5211526407528572, "grad_norm": 0.126048281788826, "learning_rate": 4.809163267593974e-05, "loss": 0.26276843547821044, "step": 121390 }, { "epoch": 0.5211955728428771, "grad_norm": 0.3626399338245392, "learning_rate": 4.8087320955822116e-05, "loss": 0.24476270675659179, "step": 121400 }, { "epoch": 0.5212385049328971, "grad_norm": 0.47341856360435486, "learning_rate": 4.808300923570449e-05, "loss": 0.10128217935562134, "step": 121410 }, { "epoch": 0.5212814370229172, "grad_norm": 0.6205283999443054, "learning_rate": 4.807869751558687e-05, "loss": 0.31277463436126707, "step": 121420 }, { "epoch": 0.5213243691129371, "grad_norm": 0.01310188602656126, "learning_rate": 4.807438579546925e-05, "loss": 0.2594514608383179, "step": 121430 }, { "epoch": 0.5213673012029572, "grad_norm": 0.26950836181640625, "learning_rate": 4.8070074075351625e-05, "loss": 0.2451323986053467, "step": 121440 }, { "epoch": 0.5214102332929772, "grad_norm": 0.004093456082046032, "learning_rate": 4.8065762355234e-05, "loss": 0.2703315496444702, "step": 121450 }, { "epoch": 0.5214531653829971, "grad_norm": 1.2792869806289673, "learning_rate": 4.806145063511637e-05, "loss": 0.2487691879272461, "step": 121460 }, { "epoch": 0.5214960974730172, "grad_norm": 1.077581524848938, "learning_rate": 4.805713891499875e-05, "loss": 0.2256721019744873, "step": 121470 }, { "epoch": 0.5215390295630372, "grad_norm": 0.6290398240089417, "learning_rate": 4.805282719488113e-05, "loss": 0.11926252841949463, "step": 121480 }, { "epoch": 0.5215819616530571, "grad_norm": 0.009173483587801456, "learning_rate": 4.8048515474763505e-05, "loss": 0.17269976139068605, "step": 121490 }, { "epoch": 0.5216248937430772, "grad_norm": 0.09031513333320618, "learning_rate": 4.804420375464588e-05, "loss": 0.22257568836212158, "step": 121500 }, { "epoch": 0.5216678258330972, "grad_norm": 0.06083008274435997, "learning_rate": 4.803989203452825e-05, "loss": 0.15785495042800904, "step": 121510 }, { "epoch": 0.5217107579231172, "grad_norm": 0.10042346268892288, "learning_rate": 4.803558031441063e-05, "loss": 0.16853535175323486, "step": 121520 }, { "epoch": 0.5217536900131372, "grad_norm": 0.5119935274124146, "learning_rate": 4.803126859429301e-05, "loss": 0.22263917922973633, "step": 121530 }, { "epoch": 0.5217966221031572, "grad_norm": 0.029625194147229195, "learning_rate": 4.8026956874175385e-05, "loss": 0.29853482246398927, "step": 121540 }, { "epoch": 0.5218395541931772, "grad_norm": 1.4290279150009155, "learning_rate": 4.802264515405776e-05, "loss": 0.17295366525650024, "step": 121550 }, { "epoch": 0.5218824862831972, "grad_norm": 0.2421247512102127, "learning_rate": 4.801833343394014e-05, "loss": 0.24902019500732422, "step": 121560 }, { "epoch": 0.5219254183732173, "grad_norm": 0.017065497115254402, "learning_rate": 4.801402171382252e-05, "loss": 0.17499693632125854, "step": 121570 }, { "epoch": 0.5219683504632372, "grad_norm": 1.5281258821487427, "learning_rate": 4.8009709993704895e-05, "loss": 0.2719564914703369, "step": 121580 }, { "epoch": 0.5220112825532572, "grad_norm": 0.008518542163074017, "learning_rate": 4.8005398273587265e-05, "loss": 0.23640027046203613, "step": 121590 }, { "epoch": 0.5220542146432773, "grad_norm": 0.04925156384706497, "learning_rate": 4.800108655346964e-05, "loss": 0.13170560598373413, "step": 121600 }, { "epoch": 0.5220971467332973, "grad_norm": 0.30054551362991333, "learning_rate": 4.799677483335202e-05, "loss": 0.3642754554748535, "step": 121610 }, { "epoch": 0.5221400788233173, "grad_norm": 0.5951288938522339, "learning_rate": 4.79924631132344e-05, "loss": 0.042504727840423584, "step": 121620 }, { "epoch": 0.5221830109133373, "grad_norm": 0.07040644437074661, "learning_rate": 4.798815139311677e-05, "loss": 0.29732842445373536, "step": 121630 }, { "epoch": 0.5222259430033573, "grad_norm": 0.005738586187362671, "learning_rate": 4.7983839672999145e-05, "loss": 0.15685839653015138, "step": 121640 }, { "epoch": 0.5222688750933773, "grad_norm": 0.9687967896461487, "learning_rate": 4.797952795288152e-05, "loss": 0.05645252466201782, "step": 121650 }, { "epoch": 0.5223118071833973, "grad_norm": 0.8229928612709045, "learning_rate": 4.79752162327639e-05, "loss": 0.11232262849807739, "step": 121660 }, { "epoch": 0.5223547392734174, "grad_norm": 0.00725303590297699, "learning_rate": 4.797090451264628e-05, "loss": 0.25438005924224855, "step": 121670 }, { "epoch": 0.5223976713634373, "grad_norm": 0.046411290764808655, "learning_rate": 4.7966592792528655e-05, "loss": 0.12618789672851563, "step": 121680 }, { "epoch": 0.5224406034534573, "grad_norm": 0.37380075454711914, "learning_rate": 4.796228107241103e-05, "loss": 0.2813948392868042, "step": 121690 }, { "epoch": 0.5224835355434774, "grad_norm": 1.166673183441162, "learning_rate": 4.795796935229341e-05, "loss": 0.5213551998138428, "step": 121700 }, { "epoch": 0.5225264676334973, "grad_norm": 0.4081331491470337, "learning_rate": 4.795365763217578e-05, "loss": 0.1555892825126648, "step": 121710 }, { "epoch": 0.5225693997235173, "grad_norm": 3.880535840988159, "learning_rate": 4.794934591205816e-05, "loss": 0.3266066789627075, "step": 121720 }, { "epoch": 0.5226123318135374, "grad_norm": 0.028656592592597008, "learning_rate": 4.7945034191940534e-05, "loss": 0.12044739723205566, "step": 121730 }, { "epoch": 0.5226552639035573, "grad_norm": 3.4097096920013428, "learning_rate": 4.794072247182291e-05, "loss": 0.26642622947692873, "step": 121740 }, { "epoch": 0.5226981959935774, "grad_norm": 0.35419461131095886, "learning_rate": 4.793641075170528e-05, "loss": 0.24461143016815184, "step": 121750 }, { "epoch": 0.5227411280835974, "grad_norm": 4.800034046173096, "learning_rate": 4.793209903158766e-05, "loss": 0.15310922861099244, "step": 121760 }, { "epoch": 0.5227840601736173, "grad_norm": 0.08995475620031357, "learning_rate": 4.792778731147004e-05, "loss": 0.40237984657287595, "step": 121770 }, { "epoch": 0.5228269922636374, "grad_norm": 0.35849398374557495, "learning_rate": 4.792347559135242e-05, "loss": 0.17247823476791382, "step": 121780 }, { "epoch": 0.5228699243536574, "grad_norm": 0.0034624820109456778, "learning_rate": 4.791916387123479e-05, "loss": 0.048566815257072446, "step": 121790 }, { "epoch": 0.5229128564436774, "grad_norm": 0.023735811933875084, "learning_rate": 4.791485215111717e-05, "loss": 0.21525065898895263, "step": 121800 }, { "epoch": 0.5229557885336974, "grad_norm": 2.3782215118408203, "learning_rate": 4.7910540430999546e-05, "loss": 0.21360011100769044, "step": 121810 }, { "epoch": 0.5229987206237174, "grad_norm": 2.4215610027313232, "learning_rate": 4.7906228710881924e-05, "loss": 0.24774274826049805, "step": 121820 }, { "epoch": 0.5230416527137374, "grad_norm": 0.03247027471661568, "learning_rate": 4.7901916990764294e-05, "loss": 0.2859419107437134, "step": 121830 }, { "epoch": 0.5230845848037574, "grad_norm": 0.0032771273981779814, "learning_rate": 4.789760527064667e-05, "loss": 0.08464333415031433, "step": 121840 }, { "epoch": 0.5231275168937775, "grad_norm": 0.9314298033714294, "learning_rate": 4.789329355052905e-05, "loss": 0.3259695529937744, "step": 121850 }, { "epoch": 0.5231704489837974, "grad_norm": 0.08188939094543457, "learning_rate": 4.7888981830411426e-05, "loss": 0.07389405965805054, "step": 121860 }, { "epoch": 0.5232133810738174, "grad_norm": 0.027831045910716057, "learning_rate": 4.7884670110293804e-05, "loss": 0.18900372982025146, "step": 121870 }, { "epoch": 0.5232563131638375, "grad_norm": 0.04473964497447014, "learning_rate": 4.7880358390176174e-05, "loss": 0.15805684328079223, "step": 121880 }, { "epoch": 0.5232992452538574, "grad_norm": 0.005574927665293217, "learning_rate": 4.787604667005856e-05, "loss": 0.1679774761199951, "step": 121890 }, { "epoch": 0.5233421773438774, "grad_norm": 1.2569983005523682, "learning_rate": 4.7871734949940936e-05, "loss": 0.20932137966156006, "step": 121900 }, { "epoch": 0.5233851094338975, "grad_norm": 0.020131045952439308, "learning_rate": 4.786742322982331e-05, "loss": 0.19063591957092285, "step": 121910 }, { "epoch": 0.5234280415239174, "grad_norm": 0.026454292237758636, "learning_rate": 4.7863111509705684e-05, "loss": 0.1315123200416565, "step": 121920 }, { "epoch": 0.5234709736139375, "grad_norm": 0.20026637613773346, "learning_rate": 4.785879978958806e-05, "loss": 0.14659813642501832, "step": 121930 }, { "epoch": 0.5235139057039575, "grad_norm": 0.018991755321621895, "learning_rate": 4.785448806947044e-05, "loss": 0.28632752895355223, "step": 121940 }, { "epoch": 0.5235568377939774, "grad_norm": 0.01908653788268566, "learning_rate": 4.7850176349352816e-05, "loss": 0.3429946184158325, "step": 121950 }, { "epoch": 0.5235997698839975, "grad_norm": 3.118744373321533, "learning_rate": 4.7845864629235186e-05, "loss": 0.20299386978149414, "step": 121960 }, { "epoch": 0.5236427019740175, "grad_norm": 0.13232176005840302, "learning_rate": 4.7841552909117564e-05, "loss": 0.2375958204269409, "step": 121970 }, { "epoch": 0.5236856340640375, "grad_norm": 1.2320634126663208, "learning_rate": 4.783724118899994e-05, "loss": 0.2093639373779297, "step": 121980 }, { "epoch": 0.5237285661540575, "grad_norm": 1.9221858978271484, "learning_rate": 4.783292946888232e-05, "loss": 0.1392124056816101, "step": 121990 }, { "epoch": 0.5237714982440775, "grad_norm": 0.579514741897583, "learning_rate": 4.7828617748764696e-05, "loss": 0.3182239532470703, "step": 122000 }, { "epoch": 0.5237714982440775, "eval_loss": 0.4002816081047058, "eval_runtime": 27.3332, "eval_samples_per_second": 3.659, "eval_steps_per_second": 3.659, "step": 122000 }, { "epoch": 0.5238144303340975, "grad_norm": 2.985659599304199, "learning_rate": 4.782430602864707e-05, "loss": 0.10451849699020385, "step": 122010 }, { "epoch": 0.5238573624241175, "grad_norm": 0.028085991740226746, "learning_rate": 4.781999430852945e-05, "loss": 0.2830367565155029, "step": 122020 }, { "epoch": 0.5239002945141376, "grad_norm": 0.0021224322263151407, "learning_rate": 4.781568258841183e-05, "loss": 0.37227163314819334, "step": 122030 }, { "epoch": 0.5239432266041576, "grad_norm": 0.025949900969862938, "learning_rate": 4.78113708682942e-05, "loss": 0.19543081521987915, "step": 122040 }, { "epoch": 0.5239861586941775, "grad_norm": 0.020989255979657173, "learning_rate": 4.7807059148176576e-05, "loss": 0.27584066390991213, "step": 122050 }, { "epoch": 0.5240290907841976, "grad_norm": 1.4416751861572266, "learning_rate": 4.780274742805895e-05, "loss": 0.15617939233779907, "step": 122060 }, { "epoch": 0.5240720228742176, "grad_norm": 3.7463674545288086, "learning_rate": 4.779843570794133e-05, "loss": 0.4121096611022949, "step": 122070 }, { "epoch": 0.5241149549642375, "grad_norm": 0.019098132848739624, "learning_rate": 4.77941239878237e-05, "loss": 0.20457956790924073, "step": 122080 }, { "epoch": 0.5241578870542576, "grad_norm": 0.05528656765818596, "learning_rate": 4.778981226770608e-05, "loss": 0.21979448795318604, "step": 122090 }, { "epoch": 0.5242008191442776, "grad_norm": 0.5462521910667419, "learning_rate": 4.7785500547588455e-05, "loss": 0.14519410133361815, "step": 122100 }, { "epoch": 0.5242437512342976, "grad_norm": 4.878800392150879, "learning_rate": 4.778118882747083e-05, "loss": 0.4063398361206055, "step": 122110 }, { "epoch": 0.5242866833243176, "grad_norm": 0.035689231008291245, "learning_rate": 4.777687710735321e-05, "loss": 0.06844155192375183, "step": 122120 }, { "epoch": 0.5243296154143376, "grad_norm": 1.3210026025772095, "learning_rate": 4.777256538723559e-05, "loss": 0.11408164501190185, "step": 122130 }, { "epoch": 0.5243725475043576, "grad_norm": 0.03476414084434509, "learning_rate": 4.7768253667117965e-05, "loss": 0.1776628613471985, "step": 122140 }, { "epoch": 0.5244154795943776, "grad_norm": 0.0011379508068785071, "learning_rate": 4.776394194700034e-05, "loss": 0.16172831058502196, "step": 122150 }, { "epoch": 0.5244584116843977, "grad_norm": 0.4380556344985962, "learning_rate": 4.775963022688271e-05, "loss": 0.23161208629608154, "step": 122160 }, { "epoch": 0.5245013437744176, "grad_norm": 1.6195735931396484, "learning_rate": 4.775531850676509e-05, "loss": 0.250444221496582, "step": 122170 }, { "epoch": 0.5245442758644376, "grad_norm": 3.016010046005249, "learning_rate": 4.775100678664747e-05, "loss": 0.34918644428253176, "step": 122180 }, { "epoch": 0.5245872079544577, "grad_norm": 0.0846981480717659, "learning_rate": 4.7746695066529845e-05, "loss": 0.32720112800598145, "step": 122190 }, { "epoch": 0.5246301400444776, "grad_norm": 0.0056806099601089954, "learning_rate": 4.7742383346412215e-05, "loss": 0.015474987030029298, "step": 122200 }, { "epoch": 0.5246730721344977, "grad_norm": 0.9824272990226746, "learning_rate": 4.773807162629459e-05, "loss": 0.32699992656707766, "step": 122210 }, { "epoch": 0.5247160042245177, "grad_norm": 1.053566336631775, "learning_rate": 4.773375990617697e-05, "loss": 0.4157695293426514, "step": 122220 }, { "epoch": 0.5247589363145376, "grad_norm": 0.001544310594908893, "learning_rate": 4.772944818605935e-05, "loss": 0.1750641345977783, "step": 122230 }, { "epoch": 0.5248018684045577, "grad_norm": 0.019139086827635765, "learning_rate": 4.7725136465941725e-05, "loss": 0.31319055557250974, "step": 122240 }, { "epoch": 0.5248448004945777, "grad_norm": 0.5469844937324524, "learning_rate": 4.77208247458241e-05, "loss": 0.19357837438583375, "step": 122250 }, { "epoch": 0.5248877325845976, "grad_norm": 0.00616453168913722, "learning_rate": 4.771651302570648e-05, "loss": 0.11588430404663086, "step": 122260 }, { "epoch": 0.5249306646746177, "grad_norm": 2.7403957843780518, "learning_rate": 4.771220130558886e-05, "loss": 0.21277878284454346, "step": 122270 }, { "epoch": 0.5249735967646377, "grad_norm": 0.18290357291698456, "learning_rate": 4.7707889585471234e-05, "loss": 0.28863661289215087, "step": 122280 }, { "epoch": 0.5250165288546577, "grad_norm": 0.15102115273475647, "learning_rate": 4.7703577865353605e-05, "loss": 0.21934478282928466, "step": 122290 }, { "epoch": 0.5250594609446777, "grad_norm": 0.009661280550062656, "learning_rate": 4.769926614523598e-05, "loss": 0.22365834712982177, "step": 122300 }, { "epoch": 0.5251023930346977, "grad_norm": 0.04367806389927864, "learning_rate": 4.769495442511836e-05, "loss": 0.1199500560760498, "step": 122310 }, { "epoch": 0.5251453251247177, "grad_norm": 0.009130306541919708, "learning_rate": 4.769064270500074e-05, "loss": 0.12299952507019044, "step": 122320 }, { "epoch": 0.5251882572147377, "grad_norm": 0.0018692787270992994, "learning_rate": 4.768633098488311e-05, "loss": 0.19006558656692504, "step": 122330 }, { "epoch": 0.5252311893047578, "grad_norm": 0.5307639837265015, "learning_rate": 4.7682019264765485e-05, "loss": 0.35393648147583007, "step": 122340 }, { "epoch": 0.5252741213947777, "grad_norm": 2.1476428508758545, "learning_rate": 4.767770754464786e-05, "loss": 0.32211828231811523, "step": 122350 }, { "epoch": 0.5253170534847977, "grad_norm": 0.0038509471341967583, "learning_rate": 4.767339582453024e-05, "loss": 0.167023229598999, "step": 122360 }, { "epoch": 0.5253599855748178, "grad_norm": 0.22803162038326263, "learning_rate": 4.766908410441262e-05, "loss": 0.3219106435775757, "step": 122370 }, { "epoch": 0.5254029176648377, "grad_norm": 0.9811261892318726, "learning_rate": 4.7664772384294994e-05, "loss": 0.05802839994430542, "step": 122380 }, { "epoch": 0.5254458497548578, "grad_norm": 0.08851549029350281, "learning_rate": 4.766046066417737e-05, "loss": 0.0848920226097107, "step": 122390 }, { "epoch": 0.5254887818448778, "grad_norm": 4.3986124992370605, "learning_rate": 4.765614894405975e-05, "loss": 0.07044092416763306, "step": 122400 }, { "epoch": 0.5255317139348977, "grad_norm": 0.05002744123339653, "learning_rate": 4.765183722394212e-05, "loss": 0.1601981997489929, "step": 122410 }, { "epoch": 0.5255746460249178, "grad_norm": 0.029643887653946877, "learning_rate": 4.7647525503824497e-05, "loss": 0.2490626811981201, "step": 122420 }, { "epoch": 0.5256175781149378, "grad_norm": 0.0011830313596874475, "learning_rate": 4.7643213783706874e-05, "loss": 0.10901587009429932, "step": 122430 }, { "epoch": 0.5256605102049577, "grad_norm": 0.0028918858151882887, "learning_rate": 4.763890206358925e-05, "loss": 0.4196751594543457, "step": 122440 }, { "epoch": 0.5257034422949778, "grad_norm": 0.028822220861911774, "learning_rate": 4.763459034347162e-05, "loss": 0.15838117599487306, "step": 122450 }, { "epoch": 0.5257463743849978, "grad_norm": 2.5338051319122314, "learning_rate": 4.7630278623354e-05, "loss": 0.3437318801879883, "step": 122460 }, { "epoch": 0.5257893064750179, "grad_norm": 0.00381831219419837, "learning_rate": 4.7625966903236377e-05, "loss": 0.1670363187789917, "step": 122470 }, { "epoch": 0.5258322385650378, "grad_norm": 0.0786067470908165, "learning_rate": 4.762165518311876e-05, "loss": 0.2615427732467651, "step": 122480 }, { "epoch": 0.5258751706550578, "grad_norm": 8.087389945983887, "learning_rate": 4.761734346300113e-05, "loss": 0.4246623992919922, "step": 122490 }, { "epoch": 0.5259181027450779, "grad_norm": 0.005552296061068773, "learning_rate": 4.761303174288351e-05, "loss": 0.1938941717147827, "step": 122500 }, { "epoch": 0.5259610348350978, "grad_norm": 0.047699443995952606, "learning_rate": 4.7608720022765886e-05, "loss": 0.32389297485351565, "step": 122510 }, { "epoch": 0.5260039669251179, "grad_norm": 0.04659678041934967, "learning_rate": 4.760440830264826e-05, "loss": 0.10534238815307617, "step": 122520 }, { "epoch": 0.5260468990151379, "grad_norm": 0.01996210776269436, "learning_rate": 4.7600096582530634e-05, "loss": 0.2690871238708496, "step": 122530 }, { "epoch": 0.5260898311051578, "grad_norm": 0.12138628214597702, "learning_rate": 4.759578486241301e-05, "loss": 0.15548104047775269, "step": 122540 }, { "epoch": 0.5261327631951779, "grad_norm": 0.45005640387535095, "learning_rate": 4.759147314229539e-05, "loss": 0.2214979648590088, "step": 122550 }, { "epoch": 0.5261756952851979, "grad_norm": 0.0013523201923817396, "learning_rate": 4.7587161422177766e-05, "loss": 0.3759445667266846, "step": 122560 }, { "epoch": 0.5262186273752179, "grad_norm": 0.012963851913809776, "learning_rate": 4.758284970206014e-05, "loss": 0.3076311111450195, "step": 122570 }, { "epoch": 0.5262615594652379, "grad_norm": 0.36688700318336487, "learning_rate": 4.7578537981942514e-05, "loss": 0.2342298746109009, "step": 122580 }, { "epoch": 0.5263044915552579, "grad_norm": 1.1012883186340332, "learning_rate": 4.75742262618249e-05, "loss": 0.13014146089553832, "step": 122590 }, { "epoch": 0.5263474236452779, "grad_norm": 1.8125981092453003, "learning_rate": 4.7569914541707275e-05, "loss": 0.3951392412185669, "step": 122600 }, { "epoch": 0.5263903557352979, "grad_norm": 0.04614179953932762, "learning_rate": 4.756560282158965e-05, "loss": 0.1506880760192871, "step": 122610 }, { "epoch": 0.526433287825318, "grad_norm": 0.05191076546907425, "learning_rate": 4.756129110147202e-05, "loss": 0.0537009060382843, "step": 122620 }, { "epoch": 0.5264762199153379, "grad_norm": 2.3037290573120117, "learning_rate": 4.75569793813544e-05, "loss": 0.23060684204101561, "step": 122630 }, { "epoch": 0.5265191520053579, "grad_norm": 0.06191762164235115, "learning_rate": 4.755266766123678e-05, "loss": 0.19760955572128297, "step": 122640 }, { "epoch": 0.526562084095378, "grad_norm": 0.018758106976747513, "learning_rate": 4.7548355941119155e-05, "loss": 0.1421829104423523, "step": 122650 }, { "epoch": 0.5266050161853979, "grad_norm": 3.5670557022094727, "learning_rate": 4.7544044221001526e-05, "loss": 0.1848344922065735, "step": 122660 }, { "epoch": 0.5266479482754179, "grad_norm": 0.0048149810172617435, "learning_rate": 4.75397325008839e-05, "loss": 0.1379055380821228, "step": 122670 }, { "epoch": 0.526690880365438, "grad_norm": 4.6595234870910645, "learning_rate": 4.753542078076628e-05, "loss": 0.24073870182037355, "step": 122680 }, { "epoch": 0.5267338124554579, "grad_norm": 0.7435564994812012, "learning_rate": 4.753110906064866e-05, "loss": 0.2687734603881836, "step": 122690 }, { "epoch": 0.526776744545478, "grad_norm": 0.32143405079841614, "learning_rate": 4.7526797340531035e-05, "loss": 0.13571771383285522, "step": 122700 }, { "epoch": 0.526819676635498, "grad_norm": 0.2254776656627655, "learning_rate": 4.752248562041341e-05, "loss": 0.24260897636413575, "step": 122710 }, { "epoch": 0.5268626087255179, "grad_norm": 0.33794358372688293, "learning_rate": 4.751817390029579e-05, "loss": 0.1821001172065735, "step": 122720 }, { "epoch": 0.526905540815538, "grad_norm": 0.010684690438210964, "learning_rate": 4.751386218017817e-05, "loss": 0.14596030712127686, "step": 122730 }, { "epoch": 0.526948472905558, "grad_norm": 0.005989206023514271, "learning_rate": 4.750955046006054e-05, "loss": 0.03930140435695648, "step": 122740 }, { "epoch": 0.526991404995578, "grad_norm": 0.2036050409078598, "learning_rate": 4.7505238739942915e-05, "loss": 0.22046027183532715, "step": 122750 }, { "epoch": 0.527034337085598, "grad_norm": 0.004267920274287462, "learning_rate": 4.750092701982529e-05, "loss": 0.559827184677124, "step": 122760 }, { "epoch": 0.527077269175618, "grad_norm": 0.9968888163566589, "learning_rate": 4.749661529970767e-05, "loss": 0.2507550954818726, "step": 122770 }, { "epoch": 0.527120201265638, "grad_norm": 0.015453596599400043, "learning_rate": 4.749230357959004e-05, "loss": 0.1077796459197998, "step": 122780 }, { "epoch": 0.527163133355658, "grad_norm": 0.10399241745471954, "learning_rate": 4.748799185947242e-05, "loss": 0.21866052150726317, "step": 122790 }, { "epoch": 0.527206065445678, "grad_norm": 0.0058853523805737495, "learning_rate": 4.7483680139354795e-05, "loss": 0.2769232034683228, "step": 122800 }, { "epoch": 0.527248997535698, "grad_norm": 0.6753274202346802, "learning_rate": 4.747936841923717e-05, "loss": 0.07704102993011475, "step": 122810 }, { "epoch": 0.527291929625718, "grad_norm": 0.010032746009528637, "learning_rate": 4.747505669911955e-05, "loss": 0.16229265928268433, "step": 122820 }, { "epoch": 0.5273348617157381, "grad_norm": 0.12110492587089539, "learning_rate": 4.747074497900193e-05, "loss": 0.26483950614929197, "step": 122830 }, { "epoch": 0.527377793805758, "grad_norm": 2.6830825805664062, "learning_rate": 4.7466433258884304e-05, "loss": 0.3436319351196289, "step": 122840 }, { "epoch": 0.527420725895778, "grad_norm": 0.017832154408097267, "learning_rate": 4.746212153876668e-05, "loss": 0.12279645204544068, "step": 122850 }, { "epoch": 0.5274636579857981, "grad_norm": 6.973689556121826, "learning_rate": 4.745780981864905e-05, "loss": 0.2290245532989502, "step": 122860 }, { "epoch": 0.527506590075818, "grad_norm": 3.703192710876465, "learning_rate": 4.745349809853143e-05, "loss": 0.20900051593780516, "step": 122870 }, { "epoch": 0.5275495221658381, "grad_norm": 1.3952735662460327, "learning_rate": 4.744918637841381e-05, "loss": 0.18111791610717773, "step": 122880 }, { "epoch": 0.5275924542558581, "grad_norm": 0.038036517798900604, "learning_rate": 4.7444874658296184e-05, "loss": 0.11206220388412476, "step": 122890 }, { "epoch": 0.5276353863458781, "grad_norm": 1.2479053735733032, "learning_rate": 4.7440562938178555e-05, "loss": 0.27330703735351564, "step": 122900 }, { "epoch": 0.5276783184358981, "grad_norm": 0.05419410392642021, "learning_rate": 4.743625121806093e-05, "loss": 0.10534077882766724, "step": 122910 }, { "epoch": 0.5277212505259181, "grad_norm": 2.2733733654022217, "learning_rate": 4.743193949794331e-05, "loss": 0.35091605186462405, "step": 122920 }, { "epoch": 0.5277641826159382, "grad_norm": 0.20193079113960266, "learning_rate": 4.742762777782569e-05, "loss": 0.21767075061798097, "step": 122930 }, { "epoch": 0.5278071147059581, "grad_norm": 0.024202005937695503, "learning_rate": 4.7423316057708064e-05, "loss": 0.08835142850875854, "step": 122940 }, { "epoch": 0.5278500467959781, "grad_norm": 0.003215222619473934, "learning_rate": 4.741900433759044e-05, "loss": 0.2013381004333496, "step": 122950 }, { "epoch": 0.5278929788859982, "grad_norm": 0.051946718245744705, "learning_rate": 4.741469261747282e-05, "loss": 0.4013204097747803, "step": 122960 }, { "epoch": 0.5279359109760181, "grad_norm": 0.05380121245980263, "learning_rate": 4.7410380897355196e-05, "loss": 0.1067008137702942, "step": 122970 }, { "epoch": 0.5279788430660382, "grad_norm": 1.3877129554748535, "learning_rate": 4.7406069177237574e-05, "loss": 0.30157127380371096, "step": 122980 }, { "epoch": 0.5280217751560582, "grad_norm": 1.0387351512908936, "learning_rate": 4.7401757457119944e-05, "loss": 0.23853034973144532, "step": 122990 }, { "epoch": 0.5280647072460781, "grad_norm": 0.2567809224128723, "learning_rate": 4.739744573700232e-05, "loss": 0.2381465196609497, "step": 123000 }, { "epoch": 0.5280647072460781, "eval_loss": 0.40844210982322693, "eval_runtime": 27.1299, "eval_samples_per_second": 3.686, "eval_steps_per_second": 3.686, "step": 123000 }, { "epoch": 0.5281076393360982, "grad_norm": 0.0031749005429446697, "learning_rate": 4.73931340168847e-05, "loss": 0.08643372058868408, "step": 123010 }, { "epoch": 0.5281505714261182, "grad_norm": 0.02016310580074787, "learning_rate": 4.7388822296767076e-05, "loss": 0.11112987995147705, "step": 123020 }, { "epoch": 0.5281935035161381, "grad_norm": 0.9397972822189331, "learning_rate": 4.738451057664945e-05, "loss": 0.2053551197052002, "step": 123030 }, { "epoch": 0.5282364356061582, "grad_norm": 0.024424588307738304, "learning_rate": 4.7380198856531824e-05, "loss": 0.18155556917190552, "step": 123040 }, { "epoch": 0.5282793676961782, "grad_norm": 0.015114293433725834, "learning_rate": 4.73758871364142e-05, "loss": 0.10795472860336304, "step": 123050 }, { "epoch": 0.5283222997861982, "grad_norm": 0.02613051049411297, "learning_rate": 4.737157541629658e-05, "loss": 0.1803101658821106, "step": 123060 }, { "epoch": 0.5283652318762182, "grad_norm": 2.2235827445983887, "learning_rate": 4.7367263696178956e-05, "loss": 0.2081122636795044, "step": 123070 }, { "epoch": 0.5284081639662382, "grad_norm": 0.23082391917705536, "learning_rate": 4.7362951976061333e-05, "loss": 0.16542841196060182, "step": 123080 }, { "epoch": 0.5284510960562582, "grad_norm": 0.2546762526035309, "learning_rate": 4.735864025594371e-05, "loss": 0.17060707807540892, "step": 123090 }, { "epoch": 0.5284940281462782, "grad_norm": 8.392339706420898, "learning_rate": 4.735432853582609e-05, "loss": 0.24863219261169434, "step": 123100 }, { "epoch": 0.5285369602362983, "grad_norm": 0.11422450095415115, "learning_rate": 4.735001681570846e-05, "loss": 0.19755786657333374, "step": 123110 }, { "epoch": 0.5285798923263182, "grad_norm": 0.0031740881968289614, "learning_rate": 4.7345705095590836e-05, "loss": 0.18587934970855713, "step": 123120 }, { "epoch": 0.5286228244163382, "grad_norm": 0.0874519944190979, "learning_rate": 4.7341393375473213e-05, "loss": 0.21035304069519042, "step": 123130 }, { "epoch": 0.5286657565063583, "grad_norm": 0.6707064509391785, "learning_rate": 4.733708165535559e-05, "loss": 0.09855753779411316, "step": 123140 }, { "epoch": 0.5287086885963782, "grad_norm": 1.0456721782684326, "learning_rate": 4.733276993523796e-05, "loss": 0.34525909423828127, "step": 123150 }, { "epoch": 0.5287516206863982, "grad_norm": 0.0036562460009008646, "learning_rate": 4.732845821512034e-05, "loss": 0.15217121839523315, "step": 123160 }, { "epoch": 0.5287945527764183, "grad_norm": 0.022391438484191895, "learning_rate": 4.7324146495002716e-05, "loss": 0.1648841142654419, "step": 123170 }, { "epoch": 0.5288374848664382, "grad_norm": 63.53861999511719, "learning_rate": 4.73198347748851e-05, "loss": 0.273171591758728, "step": 123180 }, { "epoch": 0.5288804169564583, "grad_norm": 30.67633819580078, "learning_rate": 4.731552305476747e-05, "loss": 0.2636192798614502, "step": 123190 }, { "epoch": 0.5289233490464783, "grad_norm": 2.2658443450927734, "learning_rate": 4.731121133464985e-05, "loss": 0.2464972734451294, "step": 123200 }, { "epoch": 0.5289662811364982, "grad_norm": 4.664850234985352, "learning_rate": 4.7306899614532225e-05, "loss": 0.37572057247161866, "step": 123210 }, { "epoch": 0.5290092132265183, "grad_norm": 1.0588277578353882, "learning_rate": 4.73025878944146e-05, "loss": 0.521503496170044, "step": 123220 }, { "epoch": 0.5290521453165383, "grad_norm": 0.036139827221632004, "learning_rate": 4.729827617429697e-05, "loss": 0.2847903728485107, "step": 123230 }, { "epoch": 0.5290950774065583, "grad_norm": 0.06087147817015648, "learning_rate": 4.729396445417935e-05, "loss": 0.21260426044464112, "step": 123240 }, { "epoch": 0.5291380094965783, "grad_norm": 2.3397505283355713, "learning_rate": 4.728965273406173e-05, "loss": 0.1197127103805542, "step": 123250 }, { "epoch": 0.5291809415865983, "grad_norm": 1.8557378053665161, "learning_rate": 4.7285341013944105e-05, "loss": 0.43969006538391114, "step": 123260 }, { "epoch": 0.5292238736766183, "grad_norm": 14.346938133239746, "learning_rate": 4.7281029293826476e-05, "loss": 0.19784982204437257, "step": 123270 }, { "epoch": 0.5292668057666383, "grad_norm": 0.6737160682678223, "learning_rate": 4.727671757370885e-05, "loss": 0.2786275863647461, "step": 123280 }, { "epoch": 0.5293097378566584, "grad_norm": 1.9091942310333252, "learning_rate": 4.727240585359124e-05, "loss": 0.13269058465957642, "step": 123290 }, { "epoch": 0.5293526699466783, "grad_norm": 0.02550147846341133, "learning_rate": 4.7268094133473615e-05, "loss": 0.20986504554748536, "step": 123300 }, { "epoch": 0.5293956020366983, "grad_norm": 0.08701743930578232, "learning_rate": 4.726378241335599e-05, "loss": 0.2978894948959351, "step": 123310 }, { "epoch": 0.5294385341267184, "grad_norm": 15.151698112487793, "learning_rate": 4.725947069323836e-05, "loss": 0.39813718795776365, "step": 123320 }, { "epoch": 0.5294814662167384, "grad_norm": 0.26273056864738464, "learning_rate": 4.725515897312074e-05, "loss": 0.20274786949157714, "step": 123330 }, { "epoch": 0.5295243983067583, "grad_norm": 1.5247877836227417, "learning_rate": 4.725084725300312e-05, "loss": 0.28449985980987547, "step": 123340 }, { "epoch": 0.5295673303967784, "grad_norm": 0.25654134154319763, "learning_rate": 4.7246535532885495e-05, "loss": 0.27149922847747804, "step": 123350 }, { "epoch": 0.5296102624867984, "grad_norm": 0.0787576287984848, "learning_rate": 4.7242223812767865e-05, "loss": 0.2487567663192749, "step": 123360 }, { "epoch": 0.5296531945768184, "grad_norm": 0.6149705648422241, "learning_rate": 4.723791209265024e-05, "loss": 0.10222749710083008, "step": 123370 }, { "epoch": 0.5296961266668384, "grad_norm": 0.02223723568022251, "learning_rate": 4.723360037253262e-05, "loss": 0.2543445348739624, "step": 123380 }, { "epoch": 0.5297390587568584, "grad_norm": 0.005376000422984362, "learning_rate": 4.7229288652415e-05, "loss": 0.34765603542327883, "step": 123390 }, { "epoch": 0.5297819908468784, "grad_norm": 1.3201158046722412, "learning_rate": 4.7224976932297375e-05, "loss": 0.13802237510681153, "step": 123400 }, { "epoch": 0.5298249229368984, "grad_norm": 0.019846005365252495, "learning_rate": 4.722066521217975e-05, "loss": 0.09360415935516357, "step": 123410 }, { "epoch": 0.5298678550269185, "grad_norm": 0.03975502401590347, "learning_rate": 4.721635349206213e-05, "loss": 0.17938061952590942, "step": 123420 }, { "epoch": 0.5299107871169384, "grad_norm": 0.024910472333431244, "learning_rate": 4.7212041771944507e-05, "loss": 0.30192625522613525, "step": 123430 }, { "epoch": 0.5299537192069584, "grad_norm": 0.2583577334880829, "learning_rate": 4.720773005182688e-05, "loss": 0.20642051696777344, "step": 123440 }, { "epoch": 0.5299966512969785, "grad_norm": 10.739752769470215, "learning_rate": 4.7203418331709254e-05, "loss": 0.35497708320617677, "step": 123450 }, { "epoch": 0.5300395833869984, "grad_norm": 19.12002182006836, "learning_rate": 4.719910661159163e-05, "loss": 0.41153979301452637, "step": 123460 }, { "epoch": 0.5300825154770185, "grad_norm": 0.04216668754816055, "learning_rate": 4.719479489147401e-05, "loss": 0.2261986255645752, "step": 123470 }, { "epoch": 0.5301254475670385, "grad_norm": 0.1999616026878357, "learning_rate": 4.719048317135638e-05, "loss": 0.23764095306396485, "step": 123480 }, { "epoch": 0.5301683796570584, "grad_norm": 4.300946235656738, "learning_rate": 4.718617145123876e-05, "loss": 0.17631118297576903, "step": 123490 }, { "epoch": 0.5302113117470785, "grad_norm": 2.1016876697540283, "learning_rate": 4.7181859731121134e-05, "loss": 0.366302490234375, "step": 123500 }, { "epoch": 0.5302542438370985, "grad_norm": 0.1305193454027176, "learning_rate": 4.717754801100351e-05, "loss": 0.1007119059562683, "step": 123510 }, { "epoch": 0.5302971759271184, "grad_norm": 0.10050223022699356, "learning_rate": 4.717323629088589e-05, "loss": 0.13999812602996825, "step": 123520 }, { "epoch": 0.5303401080171385, "grad_norm": 0.003933720290660858, "learning_rate": 4.7168924570768266e-05, "loss": 0.05451310276985168, "step": 123530 }, { "epoch": 0.5303830401071585, "grad_norm": 1.8059325218200684, "learning_rate": 4.7164612850650644e-05, "loss": 0.23415093421936034, "step": 123540 }, { "epoch": 0.5304259721971785, "grad_norm": 0.011281573213636875, "learning_rate": 4.716030113053302e-05, "loss": 0.15493345260620117, "step": 123550 }, { "epoch": 0.5304689042871985, "grad_norm": 0.015826251357793808, "learning_rate": 4.715598941041539e-05, "loss": 0.0832473635673523, "step": 123560 }, { "epoch": 0.5305118363772185, "grad_norm": 0.03475892171263695, "learning_rate": 4.715167769029777e-05, "loss": 0.19375609159469603, "step": 123570 }, { "epoch": 0.5305547684672385, "grad_norm": 0.022834021598100662, "learning_rate": 4.7147365970180146e-05, "loss": 0.47736759185791017, "step": 123580 }, { "epoch": 0.5305977005572585, "grad_norm": 14.527593612670898, "learning_rate": 4.7143054250062524e-05, "loss": 0.11089812517166138, "step": 123590 }, { "epoch": 0.5306406326472786, "grad_norm": 0.8763924837112427, "learning_rate": 4.7138742529944894e-05, "loss": 0.2431772232055664, "step": 123600 }, { "epoch": 0.5306835647372985, "grad_norm": 0.01501018088310957, "learning_rate": 4.713443080982727e-05, "loss": 0.1265444278717041, "step": 123610 }, { "epoch": 0.5307264968273185, "grad_norm": 0.02495141699910164, "learning_rate": 4.713011908970965e-05, "loss": 0.11369569301605224, "step": 123620 }, { "epoch": 0.5307694289173386, "grad_norm": 0.03437520191073418, "learning_rate": 4.7125807369592026e-05, "loss": 0.3247214317321777, "step": 123630 }, { "epoch": 0.5308123610073585, "grad_norm": 0.08870543539524078, "learning_rate": 4.7121495649474404e-05, "loss": 0.1731701374053955, "step": 123640 }, { "epoch": 0.5308552930973786, "grad_norm": 3.2430336475372314, "learning_rate": 4.711718392935678e-05, "loss": 0.25606422424316405, "step": 123650 }, { "epoch": 0.5308982251873986, "grad_norm": 23.563745498657227, "learning_rate": 4.711287220923916e-05, "loss": 0.16244746446609498, "step": 123660 }, { "epoch": 0.5309411572774185, "grad_norm": 3.8496384620666504, "learning_rate": 4.7108560489121536e-05, "loss": 0.22676796913146974, "step": 123670 }, { "epoch": 0.5309840893674386, "grad_norm": 0.01404566876590252, "learning_rate": 4.710424876900391e-05, "loss": 0.08764925003051757, "step": 123680 }, { "epoch": 0.5310270214574586, "grad_norm": 0.02645733766257763, "learning_rate": 4.7099937048886284e-05, "loss": 0.24724922180175782, "step": 123690 }, { "epoch": 0.5310699535474785, "grad_norm": 0.026252178475260735, "learning_rate": 4.709562532876866e-05, "loss": 0.055978184938430785, "step": 123700 }, { "epoch": 0.5311128856374986, "grad_norm": 3.005872964859009, "learning_rate": 4.709131360865104e-05, "loss": 0.27054011821746826, "step": 123710 }, { "epoch": 0.5311558177275186, "grad_norm": 0.014248663559556007, "learning_rate": 4.7087001888533416e-05, "loss": 0.20635383129119872, "step": 123720 }, { "epoch": 0.5311987498175386, "grad_norm": 0.1993233859539032, "learning_rate": 4.7082690168415786e-05, "loss": 0.03018401861190796, "step": 123730 }, { "epoch": 0.5312416819075586, "grad_norm": 0.053939998149871826, "learning_rate": 4.7078378448298164e-05, "loss": 0.15145121812820433, "step": 123740 }, { "epoch": 0.5312846139975786, "grad_norm": 0.07720816135406494, "learning_rate": 4.707406672818054e-05, "loss": 0.24113116264343262, "step": 123750 }, { "epoch": 0.5313275460875987, "grad_norm": 0.032163117080926895, "learning_rate": 4.706975500806292e-05, "loss": 0.08998279571533203, "step": 123760 }, { "epoch": 0.5313704781776186, "grad_norm": 0.05457861348986626, "learning_rate": 4.7065443287945296e-05, "loss": 0.18543795347213746, "step": 123770 }, { "epoch": 0.5314134102676387, "grad_norm": 0.015958484262228012, "learning_rate": 4.706113156782767e-05, "loss": 0.14293922185897828, "step": 123780 }, { "epoch": 0.5314563423576587, "grad_norm": 0.0022434417624026537, "learning_rate": 4.705681984771005e-05, "loss": 0.3324514150619507, "step": 123790 }, { "epoch": 0.5314992744476786, "grad_norm": 0.02598206140100956, "learning_rate": 4.705250812759243e-05, "loss": 0.3836965560913086, "step": 123800 }, { "epoch": 0.5315422065376987, "grad_norm": 0.031823523342609406, "learning_rate": 4.70481964074748e-05, "loss": 0.37702322006225586, "step": 123810 }, { "epoch": 0.5315851386277187, "grad_norm": 2.530062675476074, "learning_rate": 4.7043884687357176e-05, "loss": 0.2851561546325684, "step": 123820 }, { "epoch": 0.5316280707177387, "grad_norm": 0.5450431108474731, "learning_rate": 4.703957296723955e-05, "loss": 0.2075505018234253, "step": 123830 }, { "epoch": 0.5316710028077587, "grad_norm": 2.2344067096710205, "learning_rate": 4.703526124712193e-05, "loss": 0.3167113780975342, "step": 123840 }, { "epoch": 0.5317139348977787, "grad_norm": 0.0959930494427681, "learning_rate": 4.70309495270043e-05, "loss": 0.09773850440979004, "step": 123850 }, { "epoch": 0.5317568669877987, "grad_norm": 0.0059434291906654835, "learning_rate": 4.702663780688668e-05, "loss": 0.09885787367820739, "step": 123860 }, { "epoch": 0.5317997990778187, "grad_norm": 3.9031686782836914, "learning_rate": 4.7022326086769055e-05, "loss": 0.34107317924499514, "step": 123870 }, { "epoch": 0.5318427311678388, "grad_norm": 0.011664203368127346, "learning_rate": 4.701801436665143e-05, "loss": 0.20996668338775634, "step": 123880 }, { "epoch": 0.5318856632578587, "grad_norm": 4.821422576904297, "learning_rate": 4.701370264653381e-05, "loss": 0.1527780294418335, "step": 123890 }, { "epoch": 0.5319285953478787, "grad_norm": 0.02439550682902336, "learning_rate": 4.700939092641619e-05, "loss": 0.10840038061141968, "step": 123900 }, { "epoch": 0.5319715274378988, "grad_norm": 2.292161226272583, "learning_rate": 4.7005079206298565e-05, "loss": 0.32342574596405027, "step": 123910 }, { "epoch": 0.5320144595279187, "grad_norm": 0.783797562122345, "learning_rate": 4.700076748618094e-05, "loss": 0.11824526786804199, "step": 123920 }, { "epoch": 0.5320573916179387, "grad_norm": 0.008362102322280407, "learning_rate": 4.699645576606331e-05, "loss": 0.19398202896118164, "step": 123930 }, { "epoch": 0.5321003237079588, "grad_norm": 0.007039368152618408, "learning_rate": 4.699214404594569e-05, "loss": 0.2674627065658569, "step": 123940 }, { "epoch": 0.5321432557979787, "grad_norm": 1.1618794202804565, "learning_rate": 4.698783232582807e-05, "loss": 0.17610886096954345, "step": 123950 }, { "epoch": 0.5321861878879988, "grad_norm": 0.01650865189731121, "learning_rate": 4.6983520605710445e-05, "loss": 0.09232497215270996, "step": 123960 }, { "epoch": 0.5322291199780188, "grad_norm": 2.4566853046417236, "learning_rate": 4.6979208885592815e-05, "loss": 0.13579462766647338, "step": 123970 }, { "epoch": 0.5322720520680387, "grad_norm": 1.1726164817810059, "learning_rate": 4.697489716547519e-05, "loss": 0.31279146671295166, "step": 123980 }, { "epoch": 0.5323149841580588, "grad_norm": 0.13944868743419647, "learning_rate": 4.697058544535757e-05, "loss": 0.38595051765441896, "step": 123990 }, { "epoch": 0.5323579162480788, "grad_norm": 0.43487343192100525, "learning_rate": 4.6966273725239954e-05, "loss": 0.2199528455734253, "step": 124000 }, { "epoch": 0.5323579162480788, "eval_loss": 0.4122171103954315, "eval_runtime": 27.083, "eval_samples_per_second": 3.692, "eval_steps_per_second": 3.692, "step": 124000 }, { "epoch": 0.5324008483380988, "grad_norm": 3.2876408100128174, "learning_rate": 4.6961962005122325e-05, "loss": 0.2153330087661743, "step": 124010 }, { "epoch": 0.5324437804281188, "grad_norm": 0.03849121183156967, "learning_rate": 4.69576502850047e-05, "loss": 0.33109848499298095, "step": 124020 }, { "epoch": 0.5324867125181388, "grad_norm": 1.7557862997055054, "learning_rate": 4.695333856488708e-05, "loss": 0.17541335821151732, "step": 124030 }, { "epoch": 0.5325296446081588, "grad_norm": 1.1375805139541626, "learning_rate": 4.694902684476946e-05, "loss": 0.1093302845954895, "step": 124040 }, { "epoch": 0.5325725766981788, "grad_norm": 0.16217398643493652, "learning_rate": 4.6944715124651834e-05, "loss": 0.18123213052749634, "step": 124050 }, { "epoch": 0.5326155087881989, "grad_norm": 2.6146907806396484, "learning_rate": 4.6940403404534205e-05, "loss": 0.1957655668258667, "step": 124060 }, { "epoch": 0.5326584408782188, "grad_norm": 0.575221598148346, "learning_rate": 4.693609168441658e-05, "loss": 0.1171625018119812, "step": 124070 }, { "epoch": 0.5327013729682388, "grad_norm": 1.2705405950546265, "learning_rate": 4.693177996429896e-05, "loss": 0.12601308822631835, "step": 124080 }, { "epoch": 0.5327443050582589, "grad_norm": 0.022567199543118477, "learning_rate": 4.692746824418134e-05, "loss": 0.2454216480255127, "step": 124090 }, { "epoch": 0.5327872371482788, "grad_norm": 0.0011852540774270892, "learning_rate": 4.692315652406371e-05, "loss": 0.17979726791381836, "step": 124100 }, { "epoch": 0.5328301692382988, "grad_norm": 0.01909797079861164, "learning_rate": 4.691884480394609e-05, "loss": 0.42721829414367674, "step": 124110 }, { "epoch": 0.5328731013283189, "grad_norm": 0.006946041248738766, "learning_rate": 4.691453308382847e-05, "loss": 0.06694617271423339, "step": 124120 }, { "epoch": 0.5329160334183388, "grad_norm": 5.16895866394043, "learning_rate": 4.6910221363710846e-05, "loss": 0.49288372993469237, "step": 124130 }, { "epoch": 0.5329589655083589, "grad_norm": 1.4857653379440308, "learning_rate": 4.690590964359322e-05, "loss": 0.20648424625396727, "step": 124140 }, { "epoch": 0.5330018975983789, "grad_norm": 0.9698376655578613, "learning_rate": 4.6901597923475594e-05, "loss": 0.26796138286590576, "step": 124150 }, { "epoch": 0.5330448296883988, "grad_norm": 2.552114486694336, "learning_rate": 4.689728620335797e-05, "loss": 0.5469475746154785, "step": 124160 }, { "epoch": 0.5330877617784189, "grad_norm": 3.912579298019409, "learning_rate": 4.689297448324035e-05, "loss": 0.2692500352859497, "step": 124170 }, { "epoch": 0.5331306938684389, "grad_norm": 0.17444412410259247, "learning_rate": 4.688866276312272e-05, "loss": 0.23923828601837158, "step": 124180 }, { "epoch": 0.533173625958459, "grad_norm": 1.8246395587921143, "learning_rate": 4.6884351043005097e-05, "loss": 0.2045379400253296, "step": 124190 }, { "epoch": 0.5332165580484789, "grad_norm": 0.006754355505108833, "learning_rate": 4.6880039322887474e-05, "loss": 0.334868335723877, "step": 124200 }, { "epoch": 0.5332594901384989, "grad_norm": 0.019909320399165154, "learning_rate": 4.687572760276985e-05, "loss": 0.32880401611328125, "step": 124210 }, { "epoch": 0.533302422228519, "grad_norm": 0.026598718017339706, "learning_rate": 4.687141588265223e-05, "loss": 0.04149231910705566, "step": 124220 }, { "epoch": 0.5333453543185389, "grad_norm": 0.2718566358089447, "learning_rate": 4.6867104162534606e-05, "loss": 0.13211541175842284, "step": 124230 }, { "epoch": 0.533388286408559, "grad_norm": 0.05366518348455429, "learning_rate": 4.686279244241698e-05, "loss": 0.15838115215301513, "step": 124240 }, { "epoch": 0.533431218498579, "grad_norm": 0.02057690918445587, "learning_rate": 4.685848072229936e-05, "loss": 0.19188928604125977, "step": 124250 }, { "epoch": 0.5334741505885989, "grad_norm": 2.503087282180786, "learning_rate": 4.685416900218173e-05, "loss": 0.25132534503936765, "step": 124260 }, { "epoch": 0.533517082678619, "grad_norm": 0.10337914526462555, "learning_rate": 4.684985728206411e-05, "loss": 0.3497090101242065, "step": 124270 }, { "epoch": 0.533560014768639, "grad_norm": 5.80239200592041, "learning_rate": 4.6845545561946486e-05, "loss": 0.4027796745300293, "step": 124280 }, { "epoch": 0.5336029468586589, "grad_norm": 0.9824345111846924, "learning_rate": 4.684123384182886e-05, "loss": 0.511225700378418, "step": 124290 }, { "epoch": 0.533645878948679, "grad_norm": 0.02229215018451214, "learning_rate": 4.6836922121711234e-05, "loss": 0.05699042677879333, "step": 124300 }, { "epoch": 0.533688811038699, "grad_norm": 0.01698029786348343, "learning_rate": 4.683261040159361e-05, "loss": 0.32143568992614746, "step": 124310 }, { "epoch": 0.533731743128719, "grad_norm": 2.416086435317993, "learning_rate": 4.682829868147599e-05, "loss": 0.333212685585022, "step": 124320 }, { "epoch": 0.533774675218739, "grad_norm": 0.6163731813430786, "learning_rate": 4.6823986961358366e-05, "loss": 0.396596884727478, "step": 124330 }, { "epoch": 0.533817607308759, "grad_norm": 0.07051825523376465, "learning_rate": 4.681967524124074e-05, "loss": 0.27659101486206056, "step": 124340 }, { "epoch": 0.533860539398779, "grad_norm": 0.10191439092159271, "learning_rate": 4.681536352112312e-05, "loss": 0.30634074211120604, "step": 124350 }, { "epoch": 0.533903471488799, "grad_norm": 2.1454920768737793, "learning_rate": 4.68110518010055e-05, "loss": 0.169644033908844, "step": 124360 }, { "epoch": 0.5339464035788191, "grad_norm": 0.3064160645008087, "learning_rate": 4.6806740080887875e-05, "loss": 0.19605166912078859, "step": 124370 }, { "epoch": 0.533989335668839, "grad_norm": 0.005128131248056889, "learning_rate": 4.6802428360770246e-05, "loss": 0.16128937005996705, "step": 124380 }, { "epoch": 0.534032267758859, "grad_norm": 3.605325222015381, "learning_rate": 4.679811664065262e-05, "loss": 0.15976146459579468, "step": 124390 }, { "epoch": 0.5340751998488791, "grad_norm": 0.13232477009296417, "learning_rate": 4.6793804920535e-05, "loss": 0.1932427167892456, "step": 124400 }, { "epoch": 0.534118131938899, "grad_norm": 0.008470027707517147, "learning_rate": 4.678949320041738e-05, "loss": 0.20527310371398927, "step": 124410 }, { "epoch": 0.534161064028919, "grad_norm": 0.7935301661491394, "learning_rate": 4.6785181480299755e-05, "loss": 0.25196199417114257, "step": 124420 }, { "epoch": 0.5342039961189391, "grad_norm": 0.017037170007824898, "learning_rate": 4.6780869760182126e-05, "loss": 0.20903384685516357, "step": 124430 }, { "epoch": 0.534246928208959, "grad_norm": 5.581609725952148, "learning_rate": 4.67765580400645e-05, "loss": 0.06474516391754151, "step": 124440 }, { "epoch": 0.5342898602989791, "grad_norm": 0.0051388125866651535, "learning_rate": 4.677224631994688e-05, "loss": 0.05504382252693176, "step": 124450 }, { "epoch": 0.5343327923889991, "grad_norm": 4.44582986831665, "learning_rate": 4.676793459982926e-05, "loss": 0.3161288261413574, "step": 124460 }, { "epoch": 0.534375724479019, "grad_norm": 0.9643357992172241, "learning_rate": 4.6763622879711635e-05, "loss": 0.27371511459350584, "step": 124470 }, { "epoch": 0.5344186565690391, "grad_norm": 1.267791509628296, "learning_rate": 4.675931115959401e-05, "loss": 0.10485801696777344, "step": 124480 }, { "epoch": 0.5344615886590591, "grad_norm": 0.0774456262588501, "learning_rate": 4.675499943947639e-05, "loss": 0.23535749912261963, "step": 124490 }, { "epoch": 0.5345045207490791, "grad_norm": 0.023274218663573265, "learning_rate": 4.675068771935877e-05, "loss": 0.40593762397766114, "step": 124500 }, { "epoch": 0.5345474528390991, "grad_norm": 0.30989450216293335, "learning_rate": 4.674637599924114e-05, "loss": 0.07787411212921143, "step": 124510 }, { "epoch": 0.5345903849291191, "grad_norm": 0.14625364542007446, "learning_rate": 4.6742064279123515e-05, "loss": 0.13913246393203735, "step": 124520 }, { "epoch": 0.5346333170191391, "grad_norm": 0.0016882647760212421, "learning_rate": 4.673775255900589e-05, "loss": 0.24402050971984862, "step": 124530 }, { "epoch": 0.5346762491091591, "grad_norm": 0.2706347703933716, "learning_rate": 4.673344083888827e-05, "loss": 0.1284398317337036, "step": 124540 }, { "epoch": 0.5347191811991792, "grad_norm": 14.831525802612305, "learning_rate": 4.672912911877064e-05, "loss": 0.3947265148162842, "step": 124550 }, { "epoch": 0.5347621132891991, "grad_norm": 0.07119249552488327, "learning_rate": 4.672481739865302e-05, "loss": 0.1879923701286316, "step": 124560 }, { "epoch": 0.5348050453792191, "grad_norm": 8.25128173828125, "learning_rate": 4.6720505678535395e-05, "loss": 0.16921440362930298, "step": 124570 }, { "epoch": 0.5348479774692392, "grad_norm": 0.17425376176834106, "learning_rate": 4.671619395841777e-05, "loss": 0.20272107124328614, "step": 124580 }, { "epoch": 0.5348909095592591, "grad_norm": 0.05234595015645027, "learning_rate": 4.671188223830015e-05, "loss": 0.3504507064819336, "step": 124590 }, { "epoch": 0.5349338416492792, "grad_norm": 0.008333982899785042, "learning_rate": 4.670757051818253e-05, "loss": 0.11752091646194458, "step": 124600 }, { "epoch": 0.5349767737392992, "grad_norm": 0.008150231093168259, "learning_rate": 4.6703258798064904e-05, "loss": 0.27742657661437986, "step": 124610 }, { "epoch": 0.5350197058293192, "grad_norm": 0.011584865860641003, "learning_rate": 4.669894707794728e-05, "loss": 0.1437814712524414, "step": 124620 }, { "epoch": 0.5350626379193392, "grad_norm": 0.4360119104385376, "learning_rate": 4.669463535782965e-05, "loss": 0.2068427562713623, "step": 124630 }, { "epoch": 0.5351055700093592, "grad_norm": 1.7382227182388306, "learning_rate": 4.669032363771203e-05, "loss": 0.20213468074798585, "step": 124640 }, { "epoch": 0.5351485020993793, "grad_norm": 0.0036807823926210403, "learning_rate": 4.668601191759441e-05, "loss": 0.2461169481277466, "step": 124650 }, { "epoch": 0.5351914341893992, "grad_norm": 0.02192399837076664, "learning_rate": 4.6681700197476784e-05, "loss": 0.33381974697113037, "step": 124660 }, { "epoch": 0.5352343662794192, "grad_norm": 0.022471271455287933, "learning_rate": 4.6677388477359155e-05, "loss": 0.08194748163223267, "step": 124670 }, { "epoch": 0.5352772983694393, "grad_norm": 1.029158115386963, "learning_rate": 4.667307675724153e-05, "loss": 0.2929563045501709, "step": 124680 }, { "epoch": 0.5353202304594592, "grad_norm": 0.5128310918807983, "learning_rate": 4.666876503712391e-05, "loss": 0.37208659648895265, "step": 124690 }, { "epoch": 0.5353631625494792, "grad_norm": 0.00456015020608902, "learning_rate": 4.6664453317006294e-05, "loss": 0.013756263256072997, "step": 124700 }, { "epoch": 0.5354060946394993, "grad_norm": 2.446556568145752, "learning_rate": 4.6660141596888664e-05, "loss": 0.13984161615371704, "step": 124710 }, { "epoch": 0.5354490267295192, "grad_norm": 0.08037838339805603, "learning_rate": 4.665582987677104e-05, "loss": 0.17831218242645264, "step": 124720 }, { "epoch": 0.5354919588195393, "grad_norm": 0.05052134767174721, "learning_rate": 4.665151815665342e-05, "loss": 0.11403819322586059, "step": 124730 }, { "epoch": 0.5355348909095593, "grad_norm": 5.381241798400879, "learning_rate": 4.6647206436535796e-05, "loss": 0.2679091691970825, "step": 124740 }, { "epoch": 0.5355778229995792, "grad_norm": 4.498979091644287, "learning_rate": 4.664289471641817e-05, "loss": 0.07987023591995239, "step": 124750 }, { "epoch": 0.5356207550895993, "grad_norm": 0.004411382135003805, "learning_rate": 4.6638582996300544e-05, "loss": 0.07055896520614624, "step": 124760 }, { "epoch": 0.5356636871796193, "grad_norm": 0.209492027759552, "learning_rate": 4.663427127618292e-05, "loss": 0.11898517608642578, "step": 124770 }, { "epoch": 0.5357066192696393, "grad_norm": 0.750752866268158, "learning_rate": 4.66299595560653e-05, "loss": 0.12291603088378907, "step": 124780 }, { "epoch": 0.5357495513596593, "grad_norm": 0.0013108792481943965, "learning_rate": 4.6625647835947676e-05, "loss": 0.10511652231216431, "step": 124790 }, { "epoch": 0.5357924834496793, "grad_norm": 0.011161865666508675, "learning_rate": 4.662133611583005e-05, "loss": 0.2695204734802246, "step": 124800 }, { "epoch": 0.5358354155396993, "grad_norm": 0.7787624001502991, "learning_rate": 4.661702439571243e-05, "loss": 0.16277107000350952, "step": 124810 }, { "epoch": 0.5358783476297193, "grad_norm": 0.3624718487262726, "learning_rate": 4.661271267559481e-05, "loss": 0.33316256999969485, "step": 124820 }, { "epoch": 0.5359212797197394, "grad_norm": 0.002750060521066189, "learning_rate": 4.6608400955477186e-05, "loss": 0.055188989639282225, "step": 124830 }, { "epoch": 0.5359642118097593, "grad_norm": 0.0005346073885448277, "learning_rate": 4.6604089235359556e-05, "loss": 0.10296810865402221, "step": 124840 }, { "epoch": 0.5360071438997793, "grad_norm": 2.2687649726867676, "learning_rate": 4.6599777515241933e-05, "loss": 0.4165465831756592, "step": 124850 }, { "epoch": 0.5360500759897994, "grad_norm": 0.014060785993933678, "learning_rate": 4.659546579512431e-05, "loss": 0.23372213840484618, "step": 124860 }, { "epoch": 0.5360930080798193, "grad_norm": 1.1599324941635132, "learning_rate": 4.659115407500669e-05, "loss": 0.23743939399719238, "step": 124870 }, { "epoch": 0.5361359401698393, "grad_norm": 0.017570368945598602, "learning_rate": 4.658684235488906e-05, "loss": 0.1744380474090576, "step": 124880 }, { "epoch": 0.5361788722598594, "grad_norm": 4.546342372894287, "learning_rate": 4.6582530634771436e-05, "loss": 0.3124069690704346, "step": 124890 }, { "epoch": 0.5362218043498793, "grad_norm": 6.73213529586792, "learning_rate": 4.657821891465381e-05, "loss": 0.17486449480056762, "step": 124900 }, { "epoch": 0.5362647364398994, "grad_norm": 1.787294864654541, "learning_rate": 4.657390719453619e-05, "loss": 0.44237399101257324, "step": 124910 }, { "epoch": 0.5363076685299194, "grad_norm": 0.009232389740645885, "learning_rate": 4.656959547441857e-05, "loss": 0.389838719367981, "step": 124920 }, { "epoch": 0.5363506006199393, "grad_norm": 0.020139772444963455, "learning_rate": 4.6565283754300945e-05, "loss": 0.3448354482650757, "step": 124930 }, { "epoch": 0.5363935327099594, "grad_norm": 1.5293805599212646, "learning_rate": 4.656097203418332e-05, "loss": 0.3066279888153076, "step": 124940 }, { "epoch": 0.5364364647999794, "grad_norm": 0.040268316864967346, "learning_rate": 4.65566603140657e-05, "loss": 0.16958212852478027, "step": 124950 }, { "epoch": 0.5364793968899993, "grad_norm": 0.10245020687580109, "learning_rate": 4.655234859394807e-05, "loss": 0.12522271871566773, "step": 124960 }, { "epoch": 0.5365223289800194, "grad_norm": 0.008734374307096004, "learning_rate": 4.654803687383045e-05, "loss": 0.051020973920822145, "step": 124970 }, { "epoch": 0.5365652610700394, "grad_norm": 2.040437698364258, "learning_rate": 4.6543725153712825e-05, "loss": 0.4698540210723877, "step": 124980 }, { "epoch": 0.5366081931600594, "grad_norm": 0.544279932975769, "learning_rate": 4.65394134335952e-05, "loss": 0.1698075771331787, "step": 124990 }, { "epoch": 0.5366511252500794, "grad_norm": 0.049962181597948074, "learning_rate": 4.653510171347757e-05, "loss": 0.1048028826713562, "step": 125000 }, { "epoch": 0.5366511252500794, "eval_loss": 0.41585448384284973, "eval_runtime": 27.1534, "eval_samples_per_second": 3.683, "eval_steps_per_second": 3.683, "step": 125000 }, { "epoch": 0.5366940573400995, "grad_norm": 4.228307723999023, "learning_rate": 4.653078999335995e-05, "loss": 0.3834389209747314, "step": 125010 }, { "epoch": 0.5367369894301194, "grad_norm": 0.5724061727523804, "learning_rate": 4.652647827324233e-05, "loss": 0.28278398513793945, "step": 125020 }, { "epoch": 0.5367799215201394, "grad_norm": 1.0082062482833862, "learning_rate": 4.6522166553124705e-05, "loss": 0.21530394554138182, "step": 125030 }, { "epoch": 0.5368228536101595, "grad_norm": 0.10343755036592484, "learning_rate": 4.651785483300708e-05, "loss": 0.3212897777557373, "step": 125040 }, { "epoch": 0.5368657857001795, "grad_norm": 0.04396173357963562, "learning_rate": 4.651354311288946e-05, "loss": 0.28393266201019285, "step": 125050 }, { "epoch": 0.5369087177901994, "grad_norm": 0.003487182315438986, "learning_rate": 4.650923139277184e-05, "loss": 0.09013047218322753, "step": 125060 }, { "epoch": 0.5369516498802195, "grad_norm": 2.7585346698760986, "learning_rate": 4.6504919672654215e-05, "loss": 0.09315488934516906, "step": 125070 }, { "epoch": 0.5369945819702395, "grad_norm": 5.236676216125488, "learning_rate": 4.6500607952536585e-05, "loss": 0.18706350326538085, "step": 125080 }, { "epoch": 0.5370375140602595, "grad_norm": 0.20571869611740112, "learning_rate": 4.649629623241896e-05, "loss": 0.09702978134155274, "step": 125090 }, { "epoch": 0.5370804461502795, "grad_norm": 0.003286497900262475, "learning_rate": 4.649198451230134e-05, "loss": 0.12894362211227417, "step": 125100 }, { "epoch": 0.5371233782402995, "grad_norm": 1.3346261978149414, "learning_rate": 4.648767279218372e-05, "loss": 0.4185777187347412, "step": 125110 }, { "epoch": 0.5371663103303195, "grad_norm": 8.176697731018066, "learning_rate": 4.6483361072066095e-05, "loss": 0.2767629146575928, "step": 125120 }, { "epoch": 0.5372092424203395, "grad_norm": 1.114349365234375, "learning_rate": 4.6479049351948465e-05, "loss": 0.20764529705047607, "step": 125130 }, { "epoch": 0.5372521745103596, "grad_norm": 1.419968605041504, "learning_rate": 4.647473763183084e-05, "loss": 0.16337499618530274, "step": 125140 }, { "epoch": 0.5372951066003795, "grad_norm": 0.01131153292953968, "learning_rate": 4.647042591171322e-05, "loss": 0.2339179515838623, "step": 125150 }, { "epoch": 0.5373380386903995, "grad_norm": 0.006680184509605169, "learning_rate": 4.64661141915956e-05, "loss": 0.20181403160095215, "step": 125160 }, { "epoch": 0.5373809707804196, "grad_norm": 0.006397879216820002, "learning_rate": 4.6461802471477975e-05, "loss": 0.26559085845947267, "step": 125170 }, { "epoch": 0.5374239028704395, "grad_norm": 0.08209287375211716, "learning_rate": 4.645749075136035e-05, "loss": 0.11008179187774658, "step": 125180 }, { "epoch": 0.5374668349604595, "grad_norm": 0.004642953164875507, "learning_rate": 4.645317903124273e-05, "loss": 0.08794822096824646, "step": 125190 }, { "epoch": 0.5375097670504796, "grad_norm": 0.34100475907325745, "learning_rate": 4.6448867311125107e-05, "loss": 0.18877944946289063, "step": 125200 }, { "epoch": 0.5375526991404995, "grad_norm": 3.0378482341766357, "learning_rate": 4.644455559100748e-05, "loss": 0.34736883640289307, "step": 125210 }, { "epoch": 0.5375956312305196, "grad_norm": 0.012231471948325634, "learning_rate": 4.6440243870889854e-05, "loss": 0.13005506992340088, "step": 125220 }, { "epoch": 0.5376385633205396, "grad_norm": 1.5531368255615234, "learning_rate": 4.643593215077223e-05, "loss": 0.28213071823120117, "step": 125230 }, { "epoch": 0.5376814954105595, "grad_norm": 0.022608119994401932, "learning_rate": 4.643162043065461e-05, "loss": 0.09758681654930115, "step": 125240 }, { "epoch": 0.5377244275005796, "grad_norm": 0.006008662283420563, "learning_rate": 4.642730871053698e-05, "loss": 0.32650175094604494, "step": 125250 }, { "epoch": 0.5377673595905996, "grad_norm": 0.020104752853512764, "learning_rate": 4.642299699041936e-05, "loss": 0.024312908947467803, "step": 125260 }, { "epoch": 0.5378102916806196, "grad_norm": 0.0024187087547034025, "learning_rate": 4.6418685270301734e-05, "loss": 0.1383475184440613, "step": 125270 }, { "epoch": 0.5378532237706396, "grad_norm": 0.08186737447977066, "learning_rate": 4.641437355018411e-05, "loss": 0.18166197538375856, "step": 125280 }, { "epoch": 0.5378961558606596, "grad_norm": 0.06082170829176903, "learning_rate": 4.641006183006649e-05, "loss": 0.1468608021736145, "step": 125290 }, { "epoch": 0.5379390879506796, "grad_norm": 0.5789875984191895, "learning_rate": 4.6405750109948866e-05, "loss": 0.17679991722106933, "step": 125300 }, { "epoch": 0.5379820200406996, "grad_norm": 5.591590881347656, "learning_rate": 4.6401438389831244e-05, "loss": 0.2792628049850464, "step": 125310 }, { "epoch": 0.5380249521307197, "grad_norm": 0.12889571487903595, "learning_rate": 4.639712666971362e-05, "loss": 0.34988381862640383, "step": 125320 }, { "epoch": 0.5380678842207396, "grad_norm": 0.001045150333084166, "learning_rate": 4.639281494959599e-05, "loss": 0.11754976511001587, "step": 125330 }, { "epoch": 0.5381108163107596, "grad_norm": 0.4624530076980591, "learning_rate": 4.638850322947837e-05, "loss": 0.11660113334655761, "step": 125340 }, { "epoch": 0.5381537484007797, "grad_norm": 0.006142620462924242, "learning_rate": 4.6384191509360746e-05, "loss": 0.3070712089538574, "step": 125350 }, { "epoch": 0.5381966804907996, "grad_norm": 0.0023859955836087465, "learning_rate": 4.6379879789243124e-05, "loss": 0.33227355480194093, "step": 125360 }, { "epoch": 0.5382396125808196, "grad_norm": 1.9227700233459473, "learning_rate": 4.6375568069125494e-05, "loss": 0.33113319873809816, "step": 125370 }, { "epoch": 0.5382825446708397, "grad_norm": 0.9203783869743347, "learning_rate": 4.637125634900787e-05, "loss": 0.1220745325088501, "step": 125380 }, { "epoch": 0.5383254767608596, "grad_norm": 0.005649822298437357, "learning_rate": 4.636694462889025e-05, "loss": 0.147062349319458, "step": 125390 }, { "epoch": 0.5383684088508797, "grad_norm": 0.13790778815746307, "learning_rate": 4.6362632908772626e-05, "loss": 0.2776653289794922, "step": 125400 }, { "epoch": 0.5384113409408997, "grad_norm": 0.009624933823943138, "learning_rate": 4.6358321188655004e-05, "loss": 0.1833699584007263, "step": 125410 }, { "epoch": 0.5384542730309196, "grad_norm": 0.9451597929000854, "learning_rate": 4.635400946853738e-05, "loss": 0.14015096426010132, "step": 125420 }, { "epoch": 0.5384972051209397, "grad_norm": 0.1229841560125351, "learning_rate": 4.634969774841976e-05, "loss": 0.21246821880340577, "step": 125430 }, { "epoch": 0.5385401372109597, "grad_norm": 0.9702297449111938, "learning_rate": 4.6345386028302136e-05, "loss": 0.18682045936584474, "step": 125440 }, { "epoch": 0.5385830693009797, "grad_norm": 1.5991899967193604, "learning_rate": 4.6341074308184506e-05, "loss": 0.22341518402099608, "step": 125450 }, { "epoch": 0.5386260013909997, "grad_norm": 0.04757259413599968, "learning_rate": 4.6336762588066884e-05, "loss": 0.24468319416046141, "step": 125460 }, { "epoch": 0.5386689334810197, "grad_norm": 0.15516333281993866, "learning_rate": 4.633245086794926e-05, "loss": 0.2808207035064697, "step": 125470 }, { "epoch": 0.5387118655710398, "grad_norm": 0.002046056091785431, "learning_rate": 4.632813914783164e-05, "loss": 0.03726526498794556, "step": 125480 }, { "epoch": 0.5387547976610597, "grad_norm": 0.0016733687371015549, "learning_rate": 4.6323827427714016e-05, "loss": 0.21174945831298828, "step": 125490 }, { "epoch": 0.5387977297510798, "grad_norm": 0.9975637197494507, "learning_rate": 4.6319515707596386e-05, "loss": 0.20011298656463622, "step": 125500 }, { "epoch": 0.5388406618410998, "grad_norm": 0.001489278394728899, "learning_rate": 4.6315203987478764e-05, "loss": 0.21532111167907714, "step": 125510 }, { "epoch": 0.5388835939311197, "grad_norm": 0.0019053419819101691, "learning_rate": 4.631089226736115e-05, "loss": 0.06455349326133727, "step": 125520 }, { "epoch": 0.5389265260211398, "grad_norm": 0.12390390038490295, "learning_rate": 4.6306580547243525e-05, "loss": 0.25326411724090575, "step": 125530 }, { "epoch": 0.5389694581111598, "grad_norm": 2.383126735687256, "learning_rate": 4.6302268827125896e-05, "loss": 0.23500394821166992, "step": 125540 }, { "epoch": 0.5390123902011797, "grad_norm": 1.2543652057647705, "learning_rate": 4.629795710700827e-05, "loss": 0.43595452308654786, "step": 125550 }, { "epoch": 0.5390553222911998, "grad_norm": 0.01510920561850071, "learning_rate": 4.629364538689065e-05, "loss": 0.24233436584472656, "step": 125560 }, { "epoch": 0.5390982543812198, "grad_norm": 0.0008209710358642042, "learning_rate": 4.628933366677303e-05, "loss": 0.14674782752990723, "step": 125570 }, { "epoch": 0.5391411864712398, "grad_norm": 2.904439687728882, "learning_rate": 4.62850219466554e-05, "loss": 0.2904531955718994, "step": 125580 }, { "epoch": 0.5391841185612598, "grad_norm": 0.5623788237571716, "learning_rate": 4.6280710226537775e-05, "loss": 0.2724529027938843, "step": 125590 }, { "epoch": 0.5392270506512798, "grad_norm": 0.030613474547863007, "learning_rate": 4.627639850642015e-05, "loss": 0.3674274206161499, "step": 125600 }, { "epoch": 0.5392699827412998, "grad_norm": 0.08635491877794266, "learning_rate": 4.627208678630253e-05, "loss": 0.06295985579490662, "step": 125610 }, { "epoch": 0.5393129148313198, "grad_norm": 0.2094467282295227, "learning_rate": 4.62677750661849e-05, "loss": 0.1437790036201477, "step": 125620 }, { "epoch": 0.5393558469213399, "grad_norm": 0.5031962990760803, "learning_rate": 4.6263463346067285e-05, "loss": 0.2865575313568115, "step": 125630 }, { "epoch": 0.5393987790113598, "grad_norm": 5.898693084716797, "learning_rate": 4.625915162594966e-05, "loss": 0.2831977367401123, "step": 125640 }, { "epoch": 0.5394417111013798, "grad_norm": 0.5221174955368042, "learning_rate": 4.625483990583204e-05, "loss": 0.018702538311481477, "step": 125650 }, { "epoch": 0.5394846431913999, "grad_norm": 1.3053752183914185, "learning_rate": 4.625052818571441e-05, "loss": 0.2751390218734741, "step": 125660 }, { "epoch": 0.5395275752814198, "grad_norm": 1.5146193504333496, "learning_rate": 4.624621646559679e-05, "loss": 0.14024045467376708, "step": 125670 }, { "epoch": 0.5395705073714399, "grad_norm": 0.007814999669790268, "learning_rate": 4.6241904745479165e-05, "loss": 0.37259511947631835, "step": 125680 }, { "epoch": 0.5396134394614599, "grad_norm": 2.595086097717285, "learning_rate": 4.623759302536154e-05, "loss": 0.33952600955963136, "step": 125690 }, { "epoch": 0.5396563715514798, "grad_norm": 1.5511152744293213, "learning_rate": 4.623328130524391e-05, "loss": 0.33948214054107667, "step": 125700 }, { "epoch": 0.5396993036414999, "grad_norm": 0.6245251297950745, "learning_rate": 4.622896958512629e-05, "loss": 0.009097591787576676, "step": 125710 }, { "epoch": 0.5397422357315199, "grad_norm": 0.017570942640304565, "learning_rate": 4.622465786500867e-05, "loss": 0.13392648696899415, "step": 125720 }, { "epoch": 0.5397851678215398, "grad_norm": 0.0011469552991911769, "learning_rate": 4.6220346144891045e-05, "loss": 0.04894132018089294, "step": 125730 }, { "epoch": 0.5398280999115599, "grad_norm": 0.04077253118157387, "learning_rate": 4.621603442477342e-05, "loss": 0.11998807191848755, "step": 125740 }, { "epoch": 0.5398710320015799, "grad_norm": 2.001880407333374, "learning_rate": 4.62117227046558e-05, "loss": 0.4320225238800049, "step": 125750 }, { "epoch": 0.5399139640915999, "grad_norm": 0.0013287104666233063, "learning_rate": 4.620741098453818e-05, "loss": 0.18426822423934935, "step": 125760 }, { "epoch": 0.5399568961816199, "grad_norm": 1.52393639087677, "learning_rate": 4.6203099264420554e-05, "loss": 0.14336030483245848, "step": 125770 }, { "epoch": 0.53999982827164, "grad_norm": 1.1323847770690918, "learning_rate": 4.6198787544302925e-05, "loss": 0.1529453158378601, "step": 125780 }, { "epoch": 0.5400427603616599, "grad_norm": 2.1432337760925293, "learning_rate": 4.61944758241853e-05, "loss": 0.3418971300125122, "step": 125790 }, { "epoch": 0.5400856924516799, "grad_norm": 0.18537476658821106, "learning_rate": 4.619016410406768e-05, "loss": 0.25544416904449463, "step": 125800 }, { "epoch": 0.5401286245417, "grad_norm": 0.00806635431945324, "learning_rate": 4.618585238395006e-05, "loss": 0.29964916706085204, "step": 125810 }, { "epoch": 0.5401715566317199, "grad_norm": 1.3490689992904663, "learning_rate": 4.618154066383243e-05, "loss": 0.3459489345550537, "step": 125820 }, { "epoch": 0.5402144887217399, "grad_norm": 0.0129646435379982, "learning_rate": 4.6177228943714805e-05, "loss": 0.26499371528625487, "step": 125830 }, { "epoch": 0.54025742081176, "grad_norm": 1.2936310768127441, "learning_rate": 4.617291722359718e-05, "loss": 0.2571291923522949, "step": 125840 }, { "epoch": 0.5403003529017799, "grad_norm": 0.0039204442873597145, "learning_rate": 4.616860550347956e-05, "loss": 0.17692142724990845, "step": 125850 }, { "epoch": 0.5403432849918, "grad_norm": 0.3188647925853729, "learning_rate": 4.616429378336194e-05, "loss": 0.08579055070877076, "step": 125860 }, { "epoch": 0.54038621708182, "grad_norm": 0.24737665057182312, "learning_rate": 4.6159982063244314e-05, "loss": 0.0415299266576767, "step": 125870 }, { "epoch": 0.5404291491718399, "grad_norm": 6.9003777503967285, "learning_rate": 4.615567034312669e-05, "loss": 0.3434094190597534, "step": 125880 }, { "epoch": 0.54047208126186, "grad_norm": 0.0020558438263833523, "learning_rate": 4.615135862300907e-05, "loss": 0.2131648302078247, "step": 125890 }, { "epoch": 0.54051501335188, "grad_norm": 0.6378905177116394, "learning_rate": 4.6147046902891446e-05, "loss": 0.12402830123901368, "step": 125900 }, { "epoch": 0.5405579454419001, "grad_norm": 4.6194000244140625, "learning_rate": 4.6142735182773817e-05, "loss": 0.276075553894043, "step": 125910 }, { "epoch": 0.54060087753192, "grad_norm": 0.018461063504219055, "learning_rate": 4.6138423462656194e-05, "loss": 0.13185503482818603, "step": 125920 }, { "epoch": 0.54064380962194, "grad_norm": 4.305511951446533, "learning_rate": 4.613411174253857e-05, "loss": 0.27200567722320557, "step": 125930 }, { "epoch": 0.5406867417119601, "grad_norm": 0.00458148866891861, "learning_rate": 4.612980002242095e-05, "loss": 0.0953050673007965, "step": 125940 }, { "epoch": 0.54072967380198, "grad_norm": 2.7567226886749268, "learning_rate": 4.612548830230332e-05, "loss": 0.2230508804321289, "step": 125950 }, { "epoch": 0.540772605892, "grad_norm": 1.6046864986419678, "learning_rate": 4.6121176582185697e-05, "loss": 0.3258085012435913, "step": 125960 }, { "epoch": 0.5408155379820201, "grad_norm": 1.2450833320617676, "learning_rate": 4.6116864862068074e-05, "loss": 0.36345996856689455, "step": 125970 }, { "epoch": 0.54085847007204, "grad_norm": 0.006003808230161667, "learning_rate": 4.611255314195045e-05, "loss": 0.08728630542755127, "step": 125980 }, { "epoch": 0.5409014021620601, "grad_norm": 0.04204230010509491, "learning_rate": 4.610824142183283e-05, "loss": 0.3631253480911255, "step": 125990 }, { "epoch": 0.5409443342520801, "grad_norm": 5.768819332122803, "learning_rate": 4.6103929701715206e-05, "loss": 0.08400842547416687, "step": 126000 }, { "epoch": 0.5409443342520801, "eval_loss": 0.3944237530231476, "eval_runtime": 27.2327, "eval_samples_per_second": 3.672, "eval_steps_per_second": 3.672, "step": 126000 }, { "epoch": 0.5409872663421, "grad_norm": 0.007012277841567993, "learning_rate": 4.609961798159758e-05, "loss": 0.18022228479385377, "step": 126010 }, { "epoch": 0.5410301984321201, "grad_norm": 1.9996778964996338, "learning_rate": 4.609530626147996e-05, "loss": 0.3052912712097168, "step": 126020 }, { "epoch": 0.5410731305221401, "grad_norm": 5.9770073890686035, "learning_rate": 4.609099454136233e-05, "loss": 0.2505201816558838, "step": 126030 }, { "epoch": 0.54111606261216, "grad_norm": 0.027300434187054634, "learning_rate": 4.608668282124471e-05, "loss": 0.1408507227897644, "step": 126040 }, { "epoch": 0.5411589947021801, "grad_norm": 0.0006558285094797611, "learning_rate": 4.6082371101127086e-05, "loss": 0.15853034257888793, "step": 126050 }, { "epoch": 0.5412019267922001, "grad_norm": 2.2059273719787598, "learning_rate": 4.607805938100946e-05, "loss": 0.27972419261932374, "step": 126060 }, { "epoch": 0.5412448588822201, "grad_norm": 1.6577632427215576, "learning_rate": 4.6073747660891834e-05, "loss": 0.12001736164093017, "step": 126070 }, { "epoch": 0.5412877909722401, "grad_norm": 2.1002368927001953, "learning_rate": 4.606943594077421e-05, "loss": 0.0606159508228302, "step": 126080 }, { "epoch": 0.5413307230622602, "grad_norm": 0.039800770580768585, "learning_rate": 4.606512422065659e-05, "loss": 0.10772346258163452, "step": 126090 }, { "epoch": 0.5413736551522801, "grad_norm": 0.26574936509132385, "learning_rate": 4.6060812500538966e-05, "loss": 0.18571717739105226, "step": 126100 }, { "epoch": 0.5414165872423001, "grad_norm": 0.1073109582066536, "learning_rate": 4.605650078042134e-05, "loss": 0.3292713165283203, "step": 126110 }, { "epoch": 0.5414595193323202, "grad_norm": 0.049754850566387177, "learning_rate": 4.605218906030372e-05, "loss": 0.24871139526367186, "step": 126120 }, { "epoch": 0.5415024514223401, "grad_norm": 0.008045070804655552, "learning_rate": 4.60478773401861e-05, "loss": 0.4405210971832275, "step": 126130 }, { "epoch": 0.5415453835123601, "grad_norm": 0.027863921597599983, "learning_rate": 4.6043565620068475e-05, "loss": 0.25272364616394044, "step": 126140 }, { "epoch": 0.5415883156023802, "grad_norm": 9.032499313354492, "learning_rate": 4.6039253899950846e-05, "loss": 0.0940330982208252, "step": 126150 }, { "epoch": 0.5416312476924001, "grad_norm": 0.0032740167807787657, "learning_rate": 4.603494217983322e-05, "loss": 0.09897719025611877, "step": 126160 }, { "epoch": 0.5416741797824202, "grad_norm": 0.8318930268287659, "learning_rate": 4.60306304597156e-05, "loss": 0.33201496601104735, "step": 126170 }, { "epoch": 0.5417171118724402, "grad_norm": 4.014708518981934, "learning_rate": 4.602631873959798e-05, "loss": 0.3847514629364014, "step": 126180 }, { "epoch": 0.5417600439624601, "grad_norm": 0.0012932472163811326, "learning_rate": 4.602200701948035e-05, "loss": 0.26420438289642334, "step": 126190 }, { "epoch": 0.5418029760524802, "grad_norm": 0.14375215768814087, "learning_rate": 4.6017695299362726e-05, "loss": 0.18487160205841063, "step": 126200 }, { "epoch": 0.5418459081425002, "grad_norm": 1.129388689994812, "learning_rate": 4.60133835792451e-05, "loss": 0.3273311614990234, "step": 126210 }, { "epoch": 0.5418888402325202, "grad_norm": 0.07141338288784027, "learning_rate": 4.600907185912749e-05, "loss": 0.15279535055160523, "step": 126220 }, { "epoch": 0.5419317723225402, "grad_norm": 0.19866278767585754, "learning_rate": 4.6004760139009864e-05, "loss": 0.10880914926528931, "step": 126230 }, { "epoch": 0.5419747044125602, "grad_norm": 2.901414394378662, "learning_rate": 4.6000448418892235e-05, "loss": 0.19144272804260254, "step": 126240 }, { "epoch": 0.5420176365025802, "grad_norm": 2.086766242980957, "learning_rate": 4.599613669877461e-05, "loss": 0.2593048334121704, "step": 126250 }, { "epoch": 0.5420605685926002, "grad_norm": 4.121439456939697, "learning_rate": 4.599182497865699e-05, "loss": 0.15519894361495973, "step": 126260 }, { "epoch": 0.5421035006826203, "grad_norm": 0.00668214401230216, "learning_rate": 4.598751325853937e-05, "loss": 0.29855411052703856, "step": 126270 }, { "epoch": 0.5421464327726402, "grad_norm": 0.015316566452383995, "learning_rate": 4.598320153842174e-05, "loss": 0.1899287462234497, "step": 126280 }, { "epoch": 0.5421893648626602, "grad_norm": 0.20664560794830322, "learning_rate": 4.5978889818304115e-05, "loss": 0.2651756048202515, "step": 126290 }, { "epoch": 0.5422322969526803, "grad_norm": 0.8667112588882446, "learning_rate": 4.597457809818649e-05, "loss": 0.20992062091827393, "step": 126300 }, { "epoch": 0.5422752290427002, "grad_norm": 4.961584568023682, "learning_rate": 4.597026637806887e-05, "loss": 0.14463411569595336, "step": 126310 }, { "epoch": 0.5423181611327202, "grad_norm": 0.01307595707476139, "learning_rate": 4.596595465795124e-05, "loss": 0.07341977953910828, "step": 126320 }, { "epoch": 0.5423610932227403, "grad_norm": 0.019085580483078957, "learning_rate": 4.5961642937833624e-05, "loss": 0.2257392644882202, "step": 126330 }, { "epoch": 0.5424040253127603, "grad_norm": 0.17916961014270782, "learning_rate": 4.5957331217716e-05, "loss": 0.377333927154541, "step": 126340 }, { "epoch": 0.5424469574027803, "grad_norm": 2.313668727874756, "learning_rate": 4.595301949759838e-05, "loss": 0.13621577024459838, "step": 126350 }, { "epoch": 0.5424898894928003, "grad_norm": 5.779814720153809, "learning_rate": 4.594870777748075e-05, "loss": 0.3154439926147461, "step": 126360 }, { "epoch": 0.5425328215828203, "grad_norm": 0.008008879609405994, "learning_rate": 4.594439605736313e-05, "loss": 0.13945317268371582, "step": 126370 }, { "epoch": 0.5425757536728403, "grad_norm": 0.04248788207769394, "learning_rate": 4.5940084337245504e-05, "loss": 0.29401838779449463, "step": 126380 }, { "epoch": 0.5426186857628603, "grad_norm": 4.502487659454346, "learning_rate": 4.593577261712788e-05, "loss": 0.18828514814376832, "step": 126390 }, { "epoch": 0.5426616178528804, "grad_norm": 7.196977138519287, "learning_rate": 4.593146089701025e-05, "loss": 0.47498259544372556, "step": 126400 }, { "epoch": 0.5427045499429003, "grad_norm": 2.026784896850586, "learning_rate": 4.592714917689263e-05, "loss": 0.3331002950668335, "step": 126410 }, { "epoch": 0.5427474820329203, "grad_norm": 1.6803898811340332, "learning_rate": 4.592283745677501e-05, "loss": 0.33963189125061033, "step": 126420 }, { "epoch": 0.5427904141229404, "grad_norm": 3.6730363368988037, "learning_rate": 4.5918525736657384e-05, "loss": 0.21576766967773436, "step": 126430 }, { "epoch": 0.5428333462129603, "grad_norm": 0.039370764046907425, "learning_rate": 4.591421401653976e-05, "loss": 0.1567553162574768, "step": 126440 }, { "epoch": 0.5428762783029804, "grad_norm": 0.008856616914272308, "learning_rate": 4.590990229642214e-05, "loss": 0.1858171582221985, "step": 126450 }, { "epoch": 0.5429192103930004, "grad_norm": 0.15867328643798828, "learning_rate": 4.5905590576304516e-05, "loss": 0.21397511959075927, "step": 126460 }, { "epoch": 0.5429621424830203, "grad_norm": 2.5111141204833984, "learning_rate": 4.5901278856186894e-05, "loss": 0.18194727897644042, "step": 126470 }, { "epoch": 0.5430050745730404, "grad_norm": 24.574228286743164, "learning_rate": 4.5896967136069264e-05, "loss": 0.18436410427093505, "step": 126480 }, { "epoch": 0.5430480066630604, "grad_norm": 1.3025906085968018, "learning_rate": 4.589265541595164e-05, "loss": 0.3645550012588501, "step": 126490 }, { "epoch": 0.5430909387530803, "grad_norm": 0.013285533525049686, "learning_rate": 4.588834369583402e-05, "loss": 0.17530207633972167, "step": 126500 }, { "epoch": 0.5431338708431004, "grad_norm": 8.834342002868652, "learning_rate": 4.5884031975716396e-05, "loss": 0.12580578327178954, "step": 126510 }, { "epoch": 0.5431768029331204, "grad_norm": 0.870551586151123, "learning_rate": 4.587972025559877e-05, "loss": 0.17170372009277343, "step": 126520 }, { "epoch": 0.5432197350231404, "grad_norm": 4.450483798980713, "learning_rate": 4.5875408535481144e-05, "loss": 0.29233579635620116, "step": 126530 }, { "epoch": 0.5432626671131604, "grad_norm": 1.8690029382705688, "learning_rate": 4.587109681536352e-05, "loss": 0.10871686935424804, "step": 126540 }, { "epoch": 0.5433055992031804, "grad_norm": 0.014682224951684475, "learning_rate": 4.58667850952459e-05, "loss": 0.1381064772605896, "step": 126550 }, { "epoch": 0.5433485312932004, "grad_norm": 2.533137798309326, "learning_rate": 4.5862473375128276e-05, "loss": 0.15920352935791016, "step": 126560 }, { "epoch": 0.5433914633832204, "grad_norm": 0.013429299928247929, "learning_rate": 4.5858161655010653e-05, "loss": 0.049779373407363894, "step": 126570 }, { "epoch": 0.5434343954732405, "grad_norm": 1.3268059492111206, "learning_rate": 4.585384993489303e-05, "loss": 0.22604901790618898, "step": 126580 }, { "epoch": 0.5434773275632604, "grad_norm": 1.8555922508239746, "learning_rate": 4.584953821477541e-05, "loss": 0.13655784130096435, "step": 126590 }, { "epoch": 0.5435202596532804, "grad_norm": 0.03693840652704239, "learning_rate": 4.5845226494657785e-05, "loss": 0.20669023990631102, "step": 126600 }, { "epoch": 0.5435631917433005, "grad_norm": 0.21512533724308014, "learning_rate": 4.5840914774540156e-05, "loss": 0.08742049932479859, "step": 126610 }, { "epoch": 0.5436061238333204, "grad_norm": 0.3347058892250061, "learning_rate": 4.5836603054422533e-05, "loss": 0.2082576036453247, "step": 126620 }, { "epoch": 0.5436490559233405, "grad_norm": 0.4242490828037262, "learning_rate": 4.583229133430491e-05, "loss": 0.13741503953933715, "step": 126630 }, { "epoch": 0.5436919880133605, "grad_norm": 0.010707002133131027, "learning_rate": 4.582797961418729e-05, "loss": 0.25746917724609375, "step": 126640 }, { "epoch": 0.5437349201033804, "grad_norm": 0.004346279427409172, "learning_rate": 4.582366789406966e-05, "loss": 0.15046364068984985, "step": 126650 }, { "epoch": 0.5437778521934005, "grad_norm": 0.9764925837516785, "learning_rate": 4.5819356173952036e-05, "loss": 0.2195502758026123, "step": 126660 }, { "epoch": 0.5438207842834205, "grad_norm": 0.2153153419494629, "learning_rate": 4.581504445383441e-05, "loss": 0.1246726393699646, "step": 126670 }, { "epoch": 0.5438637163734404, "grad_norm": 4.666318416595459, "learning_rate": 4.581073273371679e-05, "loss": 0.28361926078796384, "step": 126680 }, { "epoch": 0.5439066484634605, "grad_norm": 0.01102722529321909, "learning_rate": 4.580642101359917e-05, "loss": 0.22579269409179686, "step": 126690 }, { "epoch": 0.5439495805534805, "grad_norm": 0.002459155162796378, "learning_rate": 4.5802109293481545e-05, "loss": 0.46924777030944825, "step": 126700 }, { "epoch": 0.5439925126435005, "grad_norm": 0.16135826706886292, "learning_rate": 4.579779757336392e-05, "loss": 0.1990830659866333, "step": 126710 }, { "epoch": 0.5440354447335205, "grad_norm": 0.00882128719240427, "learning_rate": 4.57934858532463e-05, "loss": 0.1869539737701416, "step": 126720 }, { "epoch": 0.5440783768235405, "grad_norm": 0.04351044446229935, "learning_rate": 4.578917413312867e-05, "loss": 0.3000959873199463, "step": 126730 }, { "epoch": 0.5441213089135605, "grad_norm": 5.979353904724121, "learning_rate": 4.578486241301105e-05, "loss": 0.19326921701431274, "step": 126740 }, { "epoch": 0.5441642410035805, "grad_norm": 42.468711853027344, "learning_rate": 4.5780550692893425e-05, "loss": 0.10602353811264038, "step": 126750 }, { "epoch": 0.5442071730936006, "grad_norm": 0.016265008598566055, "learning_rate": 4.57762389727758e-05, "loss": 0.3011580228805542, "step": 126760 }, { "epoch": 0.5442501051836206, "grad_norm": 0.18827781081199646, "learning_rate": 4.577192725265817e-05, "loss": 0.10657843351364135, "step": 126770 }, { "epoch": 0.5442930372736405, "grad_norm": 2.5613012313842773, "learning_rate": 4.576761553254055e-05, "loss": 0.16385369300842284, "step": 126780 }, { "epoch": 0.5443359693636606, "grad_norm": 0.015754317864775658, "learning_rate": 4.576330381242293e-05, "loss": 0.19457767009735108, "step": 126790 }, { "epoch": 0.5443789014536806, "grad_norm": 0.05552869662642479, "learning_rate": 4.5758992092305305e-05, "loss": 0.2870542764663696, "step": 126800 }, { "epoch": 0.5444218335437006, "grad_norm": 3.7243716716766357, "learning_rate": 4.575468037218768e-05, "loss": 0.10843846797943116, "step": 126810 }, { "epoch": 0.5444647656337206, "grad_norm": 0.0405440479516983, "learning_rate": 4.575036865207006e-05, "loss": 0.05366473197937012, "step": 126820 }, { "epoch": 0.5445076977237406, "grad_norm": 40.28740310668945, "learning_rate": 4.574605693195244e-05, "loss": 0.17246224880218505, "step": 126830 }, { "epoch": 0.5445506298137606, "grad_norm": 0.7835990786552429, "learning_rate": 4.5741745211834815e-05, "loss": 0.03872422575950622, "step": 126840 }, { "epoch": 0.5445935619037806, "grad_norm": 0.027119014412164688, "learning_rate": 4.5737433491717185e-05, "loss": 0.29775936603546144, "step": 126850 }, { "epoch": 0.5446364939938007, "grad_norm": 0.17144376039505005, "learning_rate": 4.573312177159956e-05, "loss": 0.2765123128890991, "step": 126860 }, { "epoch": 0.5446794260838206, "grad_norm": 0.059212543070316315, "learning_rate": 4.572881005148194e-05, "loss": 0.12597641944885254, "step": 126870 }, { "epoch": 0.5447223581738406, "grad_norm": 0.03909136354923248, "learning_rate": 4.572449833136432e-05, "loss": 0.07332187294960021, "step": 126880 }, { "epoch": 0.5447652902638607, "grad_norm": 21.765789031982422, "learning_rate": 4.572018661124669e-05, "loss": 0.3605009078979492, "step": 126890 }, { "epoch": 0.5448082223538806, "grad_norm": 0.06298067420721054, "learning_rate": 4.5715874891129065e-05, "loss": 0.2783812046051025, "step": 126900 }, { "epoch": 0.5448511544439006, "grad_norm": 0.011054006405174732, "learning_rate": 4.571156317101144e-05, "loss": 0.2735854148864746, "step": 126910 }, { "epoch": 0.5448940865339207, "grad_norm": 0.23685322701931, "learning_rate": 4.5707251450893827e-05, "loss": 0.3598737001419067, "step": 126920 }, { "epoch": 0.5449370186239406, "grad_norm": 8.397737503051758, "learning_rate": 4.57029397307762e-05, "loss": 0.20286760330200196, "step": 126930 }, { "epoch": 0.5449799507139607, "grad_norm": 7.1092939376831055, "learning_rate": 4.5698628010658574e-05, "loss": 0.4456644535064697, "step": 126940 }, { "epoch": 0.5450228828039807, "grad_norm": 0.0011188010685145855, "learning_rate": 4.569431629054095e-05, "loss": 0.4357870101928711, "step": 126950 }, { "epoch": 0.5450658148940006, "grad_norm": 0.0300370492041111, "learning_rate": 4.569000457042333e-05, "loss": 0.26468491554260254, "step": 126960 }, { "epoch": 0.5451087469840207, "grad_norm": 0.003478456288576126, "learning_rate": 4.5685692850305707e-05, "loss": 0.3201757907867432, "step": 126970 }, { "epoch": 0.5451516790740407, "grad_norm": 0.04247760400176048, "learning_rate": 4.568138113018808e-05, "loss": 0.22876076698303222, "step": 126980 }, { "epoch": 0.5451946111640606, "grad_norm": 0.017776286229491234, "learning_rate": 4.5677069410070454e-05, "loss": 0.1681857228279114, "step": 126990 }, { "epoch": 0.5452375432540807, "grad_norm": 0.07333787530660629, "learning_rate": 4.567275768995283e-05, "loss": 0.22127797603607177, "step": 127000 }, { "epoch": 0.5452375432540807, "eval_loss": 0.41241249442100525, "eval_runtime": 27.1526, "eval_samples_per_second": 3.683, "eval_steps_per_second": 3.683, "step": 127000 }, { "epoch": 0.5452804753441007, "grad_norm": 0.04378829896450043, "learning_rate": 4.566844596983521e-05, "loss": 0.1355321526527405, "step": 127010 }, { "epoch": 0.5453234074341207, "grad_norm": 0.024462919682264328, "learning_rate": 4.566413424971758e-05, "loss": 0.23159735202789306, "step": 127020 }, { "epoch": 0.5453663395241407, "grad_norm": 5.287015914916992, "learning_rate": 4.5659822529599964e-05, "loss": 0.09170323610305786, "step": 127030 }, { "epoch": 0.5454092716141608, "grad_norm": 5.614040851593018, "learning_rate": 4.565551080948234e-05, "loss": 0.21654801368713378, "step": 127040 }, { "epoch": 0.5454522037041807, "grad_norm": 0.028848685324192047, "learning_rate": 4.565119908936472e-05, "loss": 0.15463857650756835, "step": 127050 }, { "epoch": 0.5454951357942007, "grad_norm": 0.30613818764686584, "learning_rate": 4.564688736924709e-05, "loss": 0.2023834228515625, "step": 127060 }, { "epoch": 0.5455380678842208, "grad_norm": 0.015514791011810303, "learning_rate": 4.5642575649129466e-05, "loss": 0.08889861702919007, "step": 127070 }, { "epoch": 0.5455809999742407, "grad_norm": 0.002453985158354044, "learning_rate": 4.5638263929011844e-05, "loss": 0.00347699411213398, "step": 127080 }, { "epoch": 0.5456239320642607, "grad_norm": 0.44067129492759705, "learning_rate": 4.563395220889422e-05, "loss": 0.13874021768569947, "step": 127090 }, { "epoch": 0.5456668641542808, "grad_norm": 1.6640559434890747, "learning_rate": 4.562964048877659e-05, "loss": 0.31788613796234133, "step": 127100 }, { "epoch": 0.5457097962443007, "grad_norm": 0.002911994466558099, "learning_rate": 4.562532876865897e-05, "loss": 0.23183104991912842, "step": 127110 }, { "epoch": 0.5457527283343208, "grad_norm": 0.0019254583166912198, "learning_rate": 4.5621017048541346e-05, "loss": 0.15993919372558593, "step": 127120 }, { "epoch": 0.5457956604243408, "grad_norm": 1.9986472129821777, "learning_rate": 4.5616705328423724e-05, "loss": 0.2222136974334717, "step": 127130 }, { "epoch": 0.5458385925143607, "grad_norm": 0.020649341866374016, "learning_rate": 4.56123936083061e-05, "loss": 0.1820436477661133, "step": 127140 }, { "epoch": 0.5458815246043808, "grad_norm": 2.087904930114746, "learning_rate": 4.560808188818848e-05, "loss": 0.15863490104675293, "step": 127150 }, { "epoch": 0.5459244566944008, "grad_norm": 0.0071143195964396, "learning_rate": 4.5603770168070856e-05, "loss": 0.3248276710510254, "step": 127160 }, { "epoch": 0.5459673887844207, "grad_norm": 0.23474909365177155, "learning_rate": 4.559945844795323e-05, "loss": 0.10756796598434448, "step": 127170 }, { "epoch": 0.5460103208744408, "grad_norm": 0.10987505316734314, "learning_rate": 4.5595146727835604e-05, "loss": 0.25221178531646726, "step": 127180 }, { "epoch": 0.5460532529644608, "grad_norm": 0.12152563780546188, "learning_rate": 4.559083500771798e-05, "loss": 0.3498376369476318, "step": 127190 }, { "epoch": 0.5460961850544809, "grad_norm": 1.011543869972229, "learning_rate": 4.558652328760036e-05, "loss": 0.33713626861572266, "step": 127200 }, { "epoch": 0.5461391171445008, "grad_norm": 11.715632438659668, "learning_rate": 4.5582211567482736e-05, "loss": 0.33246119022369386, "step": 127210 }, { "epoch": 0.5461820492345208, "grad_norm": 0.20116642117500305, "learning_rate": 4.5577899847365106e-05, "loss": 0.15852749347686768, "step": 127220 }, { "epoch": 0.5462249813245409, "grad_norm": 1.2381221055984497, "learning_rate": 4.5573588127247484e-05, "loss": 0.13592121601104737, "step": 127230 }, { "epoch": 0.5462679134145608, "grad_norm": 0.1290377825498581, "learning_rate": 4.556927640712986e-05, "loss": 0.10746562480926514, "step": 127240 }, { "epoch": 0.5463108455045809, "grad_norm": 0.029829656705260277, "learning_rate": 4.556496468701224e-05, "loss": 0.14033961296081543, "step": 127250 }, { "epoch": 0.5463537775946009, "grad_norm": 0.04186912998557091, "learning_rate": 4.5560652966894616e-05, "loss": 0.25282788276672363, "step": 127260 }, { "epoch": 0.5463967096846208, "grad_norm": 0.020326996222138405, "learning_rate": 4.555634124677699e-05, "loss": 0.298588490486145, "step": 127270 }, { "epoch": 0.5464396417746409, "grad_norm": 0.09214252233505249, "learning_rate": 4.555202952665937e-05, "loss": 0.21430680751800538, "step": 127280 }, { "epoch": 0.5464825738646609, "grad_norm": 0.06303048133850098, "learning_rate": 4.554771780654175e-05, "loss": 0.2151409149169922, "step": 127290 }, { "epoch": 0.5465255059546809, "grad_norm": 0.01923828199505806, "learning_rate": 4.5543406086424125e-05, "loss": 0.16676281690597533, "step": 127300 }, { "epoch": 0.5465684380447009, "grad_norm": 0.006287601310759783, "learning_rate": 4.5539094366306496e-05, "loss": 0.321563196182251, "step": 127310 }, { "epoch": 0.5466113701347209, "grad_norm": 0.9648750424385071, "learning_rate": 4.553478264618887e-05, "loss": 0.13181719779968262, "step": 127320 }, { "epoch": 0.5466543022247409, "grad_norm": 1.9215424060821533, "learning_rate": 4.553047092607125e-05, "loss": 0.25484459400177, "step": 127330 }, { "epoch": 0.5466972343147609, "grad_norm": 2.3266000747680664, "learning_rate": 4.552615920595363e-05, "loss": 0.2846336364746094, "step": 127340 }, { "epoch": 0.546740166404781, "grad_norm": 1.2252665758132935, "learning_rate": 4.5521847485836e-05, "loss": 0.3566447734832764, "step": 127350 }, { "epoch": 0.5467830984948009, "grad_norm": 0.11931613087654114, "learning_rate": 4.5517535765718375e-05, "loss": 0.286944317817688, "step": 127360 }, { "epoch": 0.5468260305848209, "grad_norm": 0.00794187467545271, "learning_rate": 4.551322404560075e-05, "loss": 0.2526960849761963, "step": 127370 }, { "epoch": 0.546868962674841, "grad_norm": 0.09007790684700012, "learning_rate": 4.550891232548313e-05, "loss": 0.24354372024536133, "step": 127380 }, { "epoch": 0.5469118947648609, "grad_norm": 1.4762769937515259, "learning_rate": 4.550460060536551e-05, "loss": 0.2883622407913208, "step": 127390 }, { "epoch": 0.546954826854881, "grad_norm": 0.0014529629843309522, "learning_rate": 4.5500288885247885e-05, "loss": 0.20099167823791503, "step": 127400 }, { "epoch": 0.546997758944901, "grad_norm": 0.060556598007678986, "learning_rate": 4.549597716513026e-05, "loss": 0.2658257246017456, "step": 127410 }, { "epoch": 0.5470406910349209, "grad_norm": 0.04418276250362396, "learning_rate": 4.549166544501264e-05, "loss": 0.1786326289176941, "step": 127420 }, { "epoch": 0.547083623124941, "grad_norm": 0.006653594318777323, "learning_rate": 4.548735372489501e-05, "loss": 0.38285691738128663, "step": 127430 }, { "epoch": 0.547126555214961, "grad_norm": 0.14826098084449768, "learning_rate": 4.548304200477739e-05, "loss": 0.21129980087280273, "step": 127440 }, { "epoch": 0.5471694873049809, "grad_norm": 3.9380741119384766, "learning_rate": 4.5478730284659765e-05, "loss": 0.2790215015411377, "step": 127450 }, { "epoch": 0.547212419395001, "grad_norm": 0.0008009527227841318, "learning_rate": 4.547441856454214e-05, "loss": 0.12772501707077027, "step": 127460 }, { "epoch": 0.547255351485021, "grad_norm": 8.273842811584473, "learning_rate": 4.547010684442451e-05, "loss": 0.3419928073883057, "step": 127470 }, { "epoch": 0.547298283575041, "grad_norm": 0.0034350028727203608, "learning_rate": 4.546579512430689e-05, "loss": 0.08441671133041381, "step": 127480 }, { "epoch": 0.547341215665061, "grad_norm": 5.727380275726318, "learning_rate": 4.546148340418927e-05, "loss": 0.14370622634887695, "step": 127490 }, { "epoch": 0.547384147755081, "grad_norm": 0.05439167469739914, "learning_rate": 4.5457171684071645e-05, "loss": 0.09095125794410705, "step": 127500 }, { "epoch": 0.547427079845101, "grad_norm": 0.013659200631082058, "learning_rate": 4.545285996395402e-05, "loss": 0.19422099590301514, "step": 127510 }, { "epoch": 0.547470011935121, "grad_norm": 0.06669435650110245, "learning_rate": 4.54485482438364e-05, "loss": 0.2521942615509033, "step": 127520 }, { "epoch": 0.5475129440251411, "grad_norm": 0.016093647107481956, "learning_rate": 4.544423652371878e-05, "loss": 0.13946938514709473, "step": 127530 }, { "epoch": 0.547555876115161, "grad_norm": 0.021878918632864952, "learning_rate": 4.5439924803601154e-05, "loss": 0.14208060503005981, "step": 127540 }, { "epoch": 0.547598808205181, "grad_norm": 1.2861130237579346, "learning_rate": 4.5435613083483525e-05, "loss": 0.24725103378295898, "step": 127550 }, { "epoch": 0.5476417402952011, "grad_norm": 6.029102325439453, "learning_rate": 4.54313013633659e-05, "loss": 0.20380520820617676, "step": 127560 }, { "epoch": 0.547684672385221, "grad_norm": 0.45046472549438477, "learning_rate": 4.542698964324828e-05, "loss": 0.3024513006210327, "step": 127570 }, { "epoch": 0.547727604475241, "grad_norm": 5.435580253601074, "learning_rate": 4.542267792313066e-05, "loss": 0.16858481168746947, "step": 127580 }, { "epoch": 0.5477705365652611, "grad_norm": 0.027056651189923286, "learning_rate": 4.541836620301303e-05, "loss": 0.1475573182106018, "step": 127590 }, { "epoch": 0.547813468655281, "grad_norm": 0.004209815990179777, "learning_rate": 4.5414054482895405e-05, "loss": 0.16616259813308715, "step": 127600 }, { "epoch": 0.5478564007453011, "grad_norm": 0.004726664163172245, "learning_rate": 4.540974276277778e-05, "loss": 0.1367909550666809, "step": 127610 }, { "epoch": 0.5478993328353211, "grad_norm": 23.20948028564453, "learning_rate": 4.540543104266016e-05, "loss": 0.06587894558906555, "step": 127620 }, { "epoch": 0.5479422649253411, "grad_norm": 0.2586648464202881, "learning_rate": 4.5401119322542537e-05, "loss": 0.37384953498840334, "step": 127630 }, { "epoch": 0.5479851970153611, "grad_norm": 1.3465591669082642, "learning_rate": 4.5396807602424914e-05, "loss": 0.3581723928451538, "step": 127640 }, { "epoch": 0.5480281291053811, "grad_norm": 1.7588071823120117, "learning_rate": 4.539249588230729e-05, "loss": 0.09634592533111572, "step": 127650 }, { "epoch": 0.5480710611954012, "grad_norm": 0.1279430240392685, "learning_rate": 4.538818416218967e-05, "loss": 0.12120214700698853, "step": 127660 }, { "epoch": 0.5481139932854211, "grad_norm": 0.026256712153553963, "learning_rate": 4.5383872442072046e-05, "loss": 0.09571439027786255, "step": 127670 }, { "epoch": 0.5481569253754411, "grad_norm": 0.001200821716338396, "learning_rate": 4.5379560721954417e-05, "loss": 0.16283975839614867, "step": 127680 }, { "epoch": 0.5481998574654612, "grad_norm": 0.4395844042301178, "learning_rate": 4.5375249001836794e-05, "loss": 0.21897361278533936, "step": 127690 }, { "epoch": 0.5482427895554811, "grad_norm": 0.8989280462265015, "learning_rate": 4.537093728171917e-05, "loss": 0.09027788639068604, "step": 127700 }, { "epoch": 0.5482857216455012, "grad_norm": 0.00886700302362442, "learning_rate": 4.536662556160155e-05, "loss": 0.10902763605117798, "step": 127710 }, { "epoch": 0.5483286537355212, "grad_norm": 1.8262544870376587, "learning_rate": 4.536231384148392e-05, "loss": 0.23176026344299316, "step": 127720 }, { "epoch": 0.5483715858255411, "grad_norm": 0.0683245062828064, "learning_rate": 4.5358002121366296e-05, "loss": 0.06529564261436463, "step": 127730 }, { "epoch": 0.5484145179155612, "grad_norm": 0.007358227856457233, "learning_rate": 4.535369040124868e-05, "loss": 0.16256901025772094, "step": 127740 }, { "epoch": 0.5484574500055812, "grad_norm": 0.02833356335759163, "learning_rate": 4.534937868113106e-05, "loss": 0.0007393436040729285, "step": 127750 }, { "epoch": 0.5485003820956011, "grad_norm": 4.225329399108887, "learning_rate": 4.534506696101343e-05, "loss": 0.2745645523071289, "step": 127760 }, { "epoch": 0.5485433141856212, "grad_norm": 0.0018272794550284743, "learning_rate": 4.5340755240895806e-05, "loss": 0.1887308955192566, "step": 127770 }, { "epoch": 0.5485862462756412, "grad_norm": 0.07719375193119049, "learning_rate": 4.533644352077818e-05, "loss": 0.36277971267700193, "step": 127780 }, { "epoch": 0.5486291783656612, "grad_norm": 1.7213512659072876, "learning_rate": 4.533213180066056e-05, "loss": 0.23921799659729004, "step": 127790 }, { "epoch": 0.5486721104556812, "grad_norm": 0.007673746906220913, "learning_rate": 4.532782008054293e-05, "loss": 0.1820694923400879, "step": 127800 }, { "epoch": 0.5487150425457012, "grad_norm": 1.0937405824661255, "learning_rate": 4.532350836042531e-05, "loss": 0.06678230762481689, "step": 127810 }, { "epoch": 0.5487579746357212, "grad_norm": 12.98852252960205, "learning_rate": 4.5319196640307686e-05, "loss": 0.1009778380393982, "step": 127820 }, { "epoch": 0.5488009067257412, "grad_norm": 0.05488551780581474, "learning_rate": 4.531488492019006e-05, "loss": 0.1254146456718445, "step": 127830 }, { "epoch": 0.5488438388157613, "grad_norm": 1.9723633527755737, "learning_rate": 4.5310573200072434e-05, "loss": 0.13314028978347778, "step": 127840 }, { "epoch": 0.5488867709057812, "grad_norm": 1.2804629802703857, "learning_rate": 4.530626147995482e-05, "loss": 0.21371521949768066, "step": 127850 }, { "epoch": 0.5489297029958012, "grad_norm": 0.0441896878182888, "learning_rate": 4.5301949759837195e-05, "loss": 0.10532666444778442, "step": 127860 }, { "epoch": 0.5489726350858213, "grad_norm": 0.00991550087928772, "learning_rate": 4.529763803971957e-05, "loss": 0.28925399780273436, "step": 127870 }, { "epoch": 0.5490155671758412, "grad_norm": 0.38075557351112366, "learning_rate": 4.529332631960194e-05, "loss": 0.07656524777412414, "step": 127880 }, { "epoch": 0.5490584992658613, "grad_norm": 1.3430020809173584, "learning_rate": 4.528901459948432e-05, "loss": 0.26489455699920655, "step": 127890 }, { "epoch": 0.5491014313558813, "grad_norm": 2.7436866760253906, "learning_rate": 4.52847028793667e-05, "loss": 0.16090919971466064, "step": 127900 }, { "epoch": 0.5491443634459012, "grad_norm": 0.4776339828968048, "learning_rate": 4.5280391159249075e-05, "loss": 0.13634073734283447, "step": 127910 }, { "epoch": 0.5491872955359213, "grad_norm": 0.45546698570251465, "learning_rate": 4.5276079439131446e-05, "loss": 0.2898316621780396, "step": 127920 }, { "epoch": 0.5492302276259413, "grad_norm": 0.012597916647791862, "learning_rate": 4.527176771901382e-05, "loss": 0.08472794890403748, "step": 127930 }, { "epoch": 0.5492731597159612, "grad_norm": 0.07850173115730286, "learning_rate": 4.52674559988962e-05, "loss": 0.2743486166000366, "step": 127940 }, { "epoch": 0.5493160918059813, "grad_norm": 3.1064162254333496, "learning_rate": 4.526314427877858e-05, "loss": 0.6178059577941895, "step": 127950 }, { "epoch": 0.5493590238960013, "grad_norm": 2.259978771209717, "learning_rate": 4.5258832558660955e-05, "loss": 0.24268021583557128, "step": 127960 }, { "epoch": 0.5494019559860213, "grad_norm": 0.07178674638271332, "learning_rate": 4.525452083854333e-05, "loss": 0.20910418033599854, "step": 127970 }, { "epoch": 0.5494448880760413, "grad_norm": 7.745858192443848, "learning_rate": 4.525020911842571e-05, "loss": 0.40424814224243166, "step": 127980 }, { "epoch": 0.5494878201660613, "grad_norm": 0.003248595166951418, "learning_rate": 4.524589739830809e-05, "loss": 0.18874037265777588, "step": 127990 }, { "epoch": 0.5495307522560813, "grad_norm": 0.0067419628612697124, "learning_rate": 4.524158567819046e-05, "loss": 0.11825863122940064, "step": 128000 }, { "epoch": 0.5495307522560813, "eval_loss": 0.39745041728019714, "eval_runtime": 27.1308, "eval_samples_per_second": 3.686, "eval_steps_per_second": 3.686, "step": 128000 }, { "epoch": 0.5495736843461013, "grad_norm": 0.023375723510980606, "learning_rate": 4.5237273958072835e-05, "loss": 0.1321635961532593, "step": 128010 }, { "epoch": 0.5496166164361214, "grad_norm": 0.0028161010704934597, "learning_rate": 4.523296223795521e-05, "loss": 0.3294975996017456, "step": 128020 }, { "epoch": 0.5496595485261413, "grad_norm": 0.03735275939106941, "learning_rate": 4.522865051783759e-05, "loss": 0.00880674496293068, "step": 128030 }, { "epoch": 0.5497024806161613, "grad_norm": 1.1383004188537598, "learning_rate": 4.522433879771997e-05, "loss": 0.059259730577468875, "step": 128040 }, { "epoch": 0.5497454127061814, "grad_norm": 0.0009236226323992014, "learning_rate": 4.522002707760234e-05, "loss": 0.11791167259216309, "step": 128050 }, { "epoch": 0.5497883447962014, "grad_norm": 2.122427225112915, "learning_rate": 4.5215715357484715e-05, "loss": 0.20672807693481446, "step": 128060 }, { "epoch": 0.5498312768862214, "grad_norm": 2.029148578643799, "learning_rate": 4.521140363736709e-05, "loss": 0.40177311897277834, "step": 128070 }, { "epoch": 0.5498742089762414, "grad_norm": 0.0030512872617691755, "learning_rate": 4.520709191724947e-05, "loss": 0.4742741107940674, "step": 128080 }, { "epoch": 0.5499171410662614, "grad_norm": 0.05324231833219528, "learning_rate": 4.520278019713185e-05, "loss": 0.18547524213790895, "step": 128090 }, { "epoch": 0.5499600731562814, "grad_norm": 0.0029397367034107447, "learning_rate": 4.5198468477014224e-05, "loss": 0.3278725385665894, "step": 128100 }, { "epoch": 0.5500030052463014, "grad_norm": 0.012124622240662575, "learning_rate": 4.51941567568966e-05, "loss": 0.24240074157714844, "step": 128110 }, { "epoch": 0.5500459373363215, "grad_norm": 8.108833312988281, "learning_rate": 4.518984503677898e-05, "loss": 0.22392873764038085, "step": 128120 }, { "epoch": 0.5500888694263414, "grad_norm": 2.374791383743286, "learning_rate": 4.518553331666135e-05, "loss": 0.1306439995765686, "step": 128130 }, { "epoch": 0.5501318015163614, "grad_norm": 0.005056803114712238, "learning_rate": 4.518122159654373e-05, "loss": 0.13596285581588746, "step": 128140 }, { "epoch": 0.5501747336063815, "grad_norm": 0.07786231487989426, "learning_rate": 4.5176909876426104e-05, "loss": 0.16290545463562012, "step": 128150 }, { "epoch": 0.5502176656964014, "grad_norm": 1.501407265663147, "learning_rate": 4.517259815630848e-05, "loss": 0.3343447923660278, "step": 128160 }, { "epoch": 0.5502605977864214, "grad_norm": 1.5795830488204956, "learning_rate": 4.516828643619085e-05, "loss": 0.39916000366210935, "step": 128170 }, { "epoch": 0.5503035298764415, "grad_norm": 0.021032895892858505, "learning_rate": 4.516397471607323e-05, "loss": 0.3229613542556763, "step": 128180 }, { "epoch": 0.5503464619664614, "grad_norm": 0.8697996735572815, "learning_rate": 4.515966299595561e-05, "loss": 0.19066758155822755, "step": 128190 }, { "epoch": 0.5503893940564815, "grad_norm": 0.006133753340691328, "learning_rate": 4.5155351275837984e-05, "loss": 0.09759909510612488, "step": 128200 }, { "epoch": 0.5504323261465015, "grad_norm": 1.7772566080093384, "learning_rate": 4.515103955572036e-05, "loss": 0.20648114681243895, "step": 128210 }, { "epoch": 0.5504752582365214, "grad_norm": 0.0055733914487063885, "learning_rate": 4.514672783560274e-05, "loss": 0.18312152624130248, "step": 128220 }, { "epoch": 0.5505181903265415, "grad_norm": 0.5254460573196411, "learning_rate": 4.5142416115485116e-05, "loss": 0.21679816246032715, "step": 128230 }, { "epoch": 0.5505611224165615, "grad_norm": 0.005690948572009802, "learning_rate": 4.5138104395367494e-05, "loss": 0.09224072098731995, "step": 128240 }, { "epoch": 0.5506040545065815, "grad_norm": 0.0012284154072403908, "learning_rate": 4.5133792675249864e-05, "loss": 0.35671429634094237, "step": 128250 }, { "epoch": 0.5506469865966015, "grad_norm": 1.7502591609954834, "learning_rate": 4.512948095513224e-05, "loss": 0.07723878622055054, "step": 128260 }, { "epoch": 0.5506899186866215, "grad_norm": 0.7807573676109314, "learning_rate": 4.512516923501462e-05, "loss": 0.19361660480499268, "step": 128270 }, { "epoch": 0.5507328507766415, "grad_norm": 0.0044706715270876884, "learning_rate": 4.5120857514896996e-05, "loss": 0.15273451805114746, "step": 128280 }, { "epoch": 0.5507757828666615, "grad_norm": 1.5963835716247559, "learning_rate": 4.511654579477937e-05, "loss": 0.23731424808502197, "step": 128290 }, { "epoch": 0.5508187149566816, "grad_norm": 1.5934256315231323, "learning_rate": 4.5112234074661744e-05, "loss": 0.4009242534637451, "step": 128300 }, { "epoch": 0.5508616470467015, "grad_norm": 0.028949454426765442, "learning_rate": 4.510792235454412e-05, "loss": 0.4464095592498779, "step": 128310 }, { "epoch": 0.5509045791367215, "grad_norm": 1.0958473682403564, "learning_rate": 4.51036106344265e-05, "loss": 0.22179553508758545, "step": 128320 }, { "epoch": 0.5509475112267416, "grad_norm": 0.18953019380569458, "learning_rate": 4.5099298914308876e-05, "loss": 0.06211835741996765, "step": 128330 }, { "epoch": 0.5509904433167615, "grad_norm": 0.22642917931079865, "learning_rate": 4.5094987194191253e-05, "loss": 0.34406998157501223, "step": 128340 }, { "epoch": 0.5510333754067815, "grad_norm": 0.35814112424850464, "learning_rate": 4.509067547407363e-05, "loss": 0.245259428024292, "step": 128350 }, { "epoch": 0.5510763074968016, "grad_norm": 0.060045354068279266, "learning_rate": 4.508636375395601e-05, "loss": 0.15209543704986572, "step": 128360 }, { "epoch": 0.5511192395868215, "grad_norm": 0.06613177061080933, "learning_rate": 4.508205203383838e-05, "loss": 0.2412318468093872, "step": 128370 }, { "epoch": 0.5511621716768416, "grad_norm": 3.185753345489502, "learning_rate": 4.5077740313720756e-05, "loss": 0.18033461570739745, "step": 128380 }, { "epoch": 0.5512051037668616, "grad_norm": 4.417606353759766, "learning_rate": 4.507342859360313e-05, "loss": 0.2318558692932129, "step": 128390 }, { "epoch": 0.5512480358568815, "grad_norm": 8.20495319366455, "learning_rate": 4.506911687348551e-05, "loss": 0.3258568286895752, "step": 128400 }, { "epoch": 0.5512909679469016, "grad_norm": 0.00039666148950345814, "learning_rate": 4.506480515336789e-05, "loss": 0.3040439605712891, "step": 128410 }, { "epoch": 0.5513339000369216, "grad_norm": 0.0841839537024498, "learning_rate": 4.506049343325026e-05, "loss": 0.15690793991088867, "step": 128420 }, { "epoch": 0.5513768321269416, "grad_norm": 0.011664030142128468, "learning_rate": 4.5056181713132636e-05, "loss": 0.061115825176239015, "step": 128430 }, { "epoch": 0.5514197642169616, "grad_norm": 2.6365978717803955, "learning_rate": 4.505186999301502e-05, "loss": 0.19872939586639404, "step": 128440 }, { "epoch": 0.5514626963069816, "grad_norm": 0.004380214959383011, "learning_rate": 4.50475582728974e-05, "loss": 0.04639408886432648, "step": 128450 }, { "epoch": 0.5515056283970016, "grad_norm": 3.333980083465576, "learning_rate": 4.504324655277977e-05, "loss": 0.31141598224639894, "step": 128460 }, { "epoch": 0.5515485604870216, "grad_norm": 0.06421362608671188, "learning_rate": 4.5038934832662145e-05, "loss": 0.19181085824966432, "step": 128470 }, { "epoch": 0.5515914925770417, "grad_norm": 1.3378186225891113, "learning_rate": 4.503462311254452e-05, "loss": 0.2731289863586426, "step": 128480 }, { "epoch": 0.5516344246670617, "grad_norm": 0.03502466529607773, "learning_rate": 4.50303113924269e-05, "loss": 0.08915647268295288, "step": 128490 }, { "epoch": 0.5516773567570816, "grad_norm": 0.13968119025230408, "learning_rate": 4.502599967230927e-05, "loss": 0.28186616897583006, "step": 128500 }, { "epoch": 0.5517202888471017, "grad_norm": 1.2802331447601318, "learning_rate": 4.502168795219165e-05, "loss": 0.09446300268173217, "step": 128510 }, { "epoch": 0.5517632209371217, "grad_norm": 1.2596136331558228, "learning_rate": 4.5017376232074025e-05, "loss": 0.4868049621582031, "step": 128520 }, { "epoch": 0.5518061530271416, "grad_norm": 0.017403313890099525, "learning_rate": 4.50130645119564e-05, "loss": 0.280033278465271, "step": 128530 }, { "epoch": 0.5518490851171617, "grad_norm": 0.016531366854906082, "learning_rate": 4.500875279183877e-05, "loss": 0.10965802669525146, "step": 128540 }, { "epoch": 0.5518920172071817, "grad_norm": 0.9327275156974792, "learning_rate": 4.500444107172116e-05, "loss": 0.20025987625122071, "step": 128550 }, { "epoch": 0.5519349492972017, "grad_norm": 1.1419161558151245, "learning_rate": 4.5000129351603535e-05, "loss": 0.4087411880493164, "step": 128560 }, { "epoch": 0.5519778813872217, "grad_norm": 1.9150222539901733, "learning_rate": 4.499581763148591e-05, "loss": 0.31905062198638917, "step": 128570 }, { "epoch": 0.5520208134772417, "grad_norm": 0.32453474402427673, "learning_rate": 4.499150591136828e-05, "loss": 0.13009634017944335, "step": 128580 }, { "epoch": 0.5520637455672617, "grad_norm": 0.012127312831580639, "learning_rate": 4.498719419125066e-05, "loss": 0.04768897294998169, "step": 128590 }, { "epoch": 0.5521066776572817, "grad_norm": 0.004312812816351652, "learning_rate": 4.498288247113304e-05, "loss": 0.22252929210662842, "step": 128600 }, { "epoch": 0.5521496097473018, "grad_norm": 1.6124236583709717, "learning_rate": 4.4978570751015415e-05, "loss": 0.3631885051727295, "step": 128610 }, { "epoch": 0.5521925418373217, "grad_norm": 0.09973174333572388, "learning_rate": 4.4974259030897785e-05, "loss": 0.1778208613395691, "step": 128620 }, { "epoch": 0.5522354739273417, "grad_norm": 5.448539733886719, "learning_rate": 4.496994731078016e-05, "loss": 0.24020824432373047, "step": 128630 }, { "epoch": 0.5522784060173618, "grad_norm": 2.429185628890991, "learning_rate": 4.496563559066254e-05, "loss": 0.28744187355041506, "step": 128640 }, { "epoch": 0.5523213381073817, "grad_norm": 0.15772603452205658, "learning_rate": 4.496132387054492e-05, "loss": 0.15476926565170288, "step": 128650 }, { "epoch": 0.5523642701974018, "grad_norm": 0.17234504222869873, "learning_rate": 4.4957012150427295e-05, "loss": 0.35704262256622316, "step": 128660 }, { "epoch": 0.5524072022874218, "grad_norm": 0.0040247696451842785, "learning_rate": 4.495270043030967e-05, "loss": 0.24672627449035645, "step": 128670 }, { "epoch": 0.5524501343774417, "grad_norm": 0.04398768022656441, "learning_rate": 4.494838871019205e-05, "loss": 0.09762682318687439, "step": 128680 }, { "epoch": 0.5524930664674618, "grad_norm": 0.058741435408592224, "learning_rate": 4.4944076990074427e-05, "loss": 0.1486470341682434, "step": 128690 }, { "epoch": 0.5525359985574818, "grad_norm": 0.9128871560096741, "learning_rate": 4.49397652699568e-05, "loss": 0.28860721588134763, "step": 128700 }, { "epoch": 0.5525789306475017, "grad_norm": 0.007726700510829687, "learning_rate": 4.4935453549839174e-05, "loss": 0.1310112237930298, "step": 128710 }, { "epoch": 0.5526218627375218, "grad_norm": 4.7020368576049805, "learning_rate": 4.493114182972155e-05, "loss": 0.1983258008956909, "step": 128720 }, { "epoch": 0.5526647948275418, "grad_norm": 0.29634737968444824, "learning_rate": 4.492683010960393e-05, "loss": 0.2613669872283936, "step": 128730 }, { "epoch": 0.5527077269175618, "grad_norm": 0.0016307096229866147, "learning_rate": 4.49225183894863e-05, "loss": 0.20361201763153075, "step": 128740 }, { "epoch": 0.5527506590075818, "grad_norm": 0.008389080874621868, "learning_rate": 4.491820666936868e-05, "loss": 0.13784075975418092, "step": 128750 }, { "epoch": 0.5527935910976018, "grad_norm": 0.11101187020540237, "learning_rate": 4.4913894949251054e-05, "loss": 0.250306224822998, "step": 128760 }, { "epoch": 0.5528365231876218, "grad_norm": 2.010310649871826, "learning_rate": 4.490958322913343e-05, "loss": 0.21546776294708253, "step": 128770 }, { "epoch": 0.5528794552776418, "grad_norm": 0.577353835105896, "learning_rate": 4.490527150901581e-05, "loss": 0.04757989346981049, "step": 128780 }, { "epoch": 0.5529223873676619, "grad_norm": 2.2937493324279785, "learning_rate": 4.4900959788898186e-05, "loss": 0.19977052211761476, "step": 128790 }, { "epoch": 0.5529653194576818, "grad_norm": 0.5439838767051697, "learning_rate": 4.4896648068780564e-05, "loss": 0.28266804218292235, "step": 128800 }, { "epoch": 0.5530082515477018, "grad_norm": 0.027256833389401436, "learning_rate": 4.489233634866294e-05, "loss": 0.04045622944831848, "step": 128810 }, { "epoch": 0.5530511836377219, "grad_norm": 0.00477907620370388, "learning_rate": 4.488802462854532e-05, "loss": 0.1647333025932312, "step": 128820 }, { "epoch": 0.5530941157277418, "grad_norm": 0.9636557102203369, "learning_rate": 4.488371290842769e-05, "loss": 0.4419291019439697, "step": 128830 }, { "epoch": 0.5531370478177619, "grad_norm": 1.1076637506484985, "learning_rate": 4.4879401188310066e-05, "loss": 0.3644695520401001, "step": 128840 }, { "epoch": 0.5531799799077819, "grad_norm": 1.1289384365081787, "learning_rate": 4.4875089468192444e-05, "loss": 0.3530080795288086, "step": 128850 }, { "epoch": 0.5532229119978018, "grad_norm": 0.3441978991031647, "learning_rate": 4.487077774807482e-05, "loss": 0.19301141500473024, "step": 128860 }, { "epoch": 0.5532658440878219, "grad_norm": 2.600874662399292, "learning_rate": 4.486646602795719e-05, "loss": 0.33310320377349856, "step": 128870 }, { "epoch": 0.5533087761778419, "grad_norm": 0.01902606710791588, "learning_rate": 4.486215430783957e-05, "loss": 0.24450154304504396, "step": 128880 }, { "epoch": 0.5533517082678618, "grad_norm": 7.304584503173828, "learning_rate": 4.4857842587721946e-05, "loss": 0.27731058597564695, "step": 128890 }, { "epoch": 0.5533946403578819, "grad_norm": 0.12720587849617004, "learning_rate": 4.4853530867604324e-05, "loss": 0.13187975883483888, "step": 128900 }, { "epoch": 0.5534375724479019, "grad_norm": 3.016563653945923, "learning_rate": 4.48492191474867e-05, "loss": 0.09463203549385071, "step": 128910 }, { "epoch": 0.553480504537922, "grad_norm": 0.22556646168231964, "learning_rate": 4.484490742736908e-05, "loss": 0.070140540599823, "step": 128920 }, { "epoch": 0.5535234366279419, "grad_norm": 0.01902354694902897, "learning_rate": 4.4840595707251456e-05, "loss": 0.18325456380844116, "step": 128930 }, { "epoch": 0.5535663687179619, "grad_norm": 0.0022118953056633472, "learning_rate": 4.483628398713383e-05, "loss": 0.15008280277252198, "step": 128940 }, { "epoch": 0.553609300807982, "grad_norm": 4.692033290863037, "learning_rate": 4.4831972267016204e-05, "loss": 0.3606921911239624, "step": 128950 }, { "epoch": 0.5536522328980019, "grad_norm": 0.025387076660990715, "learning_rate": 4.482766054689858e-05, "loss": 0.0033765774220228194, "step": 128960 }, { "epoch": 0.553695164988022, "grad_norm": 0.14318892359733582, "learning_rate": 4.482334882678096e-05, "loss": 0.122013258934021, "step": 128970 }, { "epoch": 0.553738097078042, "grad_norm": 0.019069045782089233, "learning_rate": 4.4819037106663336e-05, "loss": 0.40986361503601076, "step": 128980 }, { "epoch": 0.5537810291680619, "grad_norm": 0.1801142543554306, "learning_rate": 4.4814725386545706e-05, "loss": 0.2598786592483521, "step": 128990 }, { "epoch": 0.553823961258082, "grad_norm": 0.0032951515167951584, "learning_rate": 4.4810413666428084e-05, "loss": 0.3131296157836914, "step": 129000 }, { "epoch": 0.553823961258082, "eval_loss": 0.41541945934295654, "eval_runtime": 27.1184, "eval_samples_per_second": 3.688, "eval_steps_per_second": 3.688, "step": 129000 }, { "epoch": 0.553866893348102, "grad_norm": 0.007539977785199881, "learning_rate": 4.480610194631046e-05, "loss": 0.13454591035842894, "step": 129010 }, { "epoch": 0.553909825438122, "grad_norm": 2.2462775707244873, "learning_rate": 4.480179022619284e-05, "loss": 0.3304026126861572, "step": 129020 }, { "epoch": 0.553952757528142, "grad_norm": 2.5247340202331543, "learning_rate": 4.4797478506075216e-05, "loss": 0.23408877849578857, "step": 129030 }, { "epoch": 0.553995689618162, "grad_norm": 0.6959047317504883, "learning_rate": 4.479316678595759e-05, "loss": 0.18713444471359253, "step": 129040 }, { "epoch": 0.554038621708182, "grad_norm": 2.4266421794891357, "learning_rate": 4.478885506583997e-05, "loss": 0.21233630180358887, "step": 129050 }, { "epoch": 0.554081553798202, "grad_norm": 1.3284385204315186, "learning_rate": 4.478454334572235e-05, "loss": 0.1826010227203369, "step": 129060 }, { "epoch": 0.554124485888222, "grad_norm": 3.7686238288879395, "learning_rate": 4.478023162560472e-05, "loss": 0.5331307411193847, "step": 129070 }, { "epoch": 0.554167417978242, "grad_norm": 0.6449288725852966, "learning_rate": 4.4775919905487095e-05, "loss": 0.36653029918670654, "step": 129080 }, { "epoch": 0.554210350068262, "grad_norm": 1.2267459630966187, "learning_rate": 4.477160818536947e-05, "loss": 0.22919206619262694, "step": 129090 }, { "epoch": 0.5542532821582821, "grad_norm": 0.0552271232008934, "learning_rate": 4.476729646525185e-05, "loss": 0.07877624034881592, "step": 129100 }, { "epoch": 0.554296214248302, "grad_norm": 1.44490647315979, "learning_rate": 4.476298474513422e-05, "loss": 0.30919790267944336, "step": 129110 }, { "epoch": 0.554339146338322, "grad_norm": 0.7900761961936951, "learning_rate": 4.47586730250166e-05, "loss": 0.18931243419647217, "step": 129120 }, { "epoch": 0.5543820784283421, "grad_norm": 0.06050022318959236, "learning_rate": 4.4754361304898975e-05, "loss": 0.4321730613708496, "step": 129130 }, { "epoch": 0.554425010518362, "grad_norm": 0.04257184639573097, "learning_rate": 4.475004958478135e-05, "loss": 0.031278005242347716, "step": 129140 }, { "epoch": 0.5544679426083821, "grad_norm": 0.23344434797763824, "learning_rate": 4.474573786466374e-05, "loss": 0.18850735425949097, "step": 129150 }, { "epoch": 0.5545108746984021, "grad_norm": 2.25657320022583, "learning_rate": 4.474142614454611e-05, "loss": 0.25844502449035645, "step": 129160 }, { "epoch": 0.554553806788422, "grad_norm": 1.844508409500122, "learning_rate": 4.4737114424428485e-05, "loss": 0.14059102535247803, "step": 129170 }, { "epoch": 0.5545967388784421, "grad_norm": 2.2424442768096924, "learning_rate": 4.473280270431086e-05, "loss": 0.3256859064102173, "step": 129180 }, { "epoch": 0.5546396709684621, "grad_norm": 2.073718547821045, "learning_rate": 4.472849098419324e-05, "loss": 0.1257432818412781, "step": 129190 }, { "epoch": 0.554682603058482, "grad_norm": 0.24247653782367706, "learning_rate": 4.472417926407561e-05, "loss": 0.3258751153945923, "step": 129200 }, { "epoch": 0.5547255351485021, "grad_norm": 0.04378345608711243, "learning_rate": 4.471986754395799e-05, "loss": 0.07690662741661072, "step": 129210 }, { "epoch": 0.5547684672385221, "grad_norm": 0.0017137116519734263, "learning_rate": 4.4715555823840365e-05, "loss": 0.21572604179382324, "step": 129220 }, { "epoch": 0.5548113993285421, "grad_norm": 4.380400657653809, "learning_rate": 4.471124410372274e-05, "loss": 0.14595119953155516, "step": 129230 }, { "epoch": 0.5548543314185621, "grad_norm": 0.11375784128904343, "learning_rate": 4.470693238360511e-05, "loss": 0.14446167945861815, "step": 129240 }, { "epoch": 0.5548972635085822, "grad_norm": 0.0006020744331181049, "learning_rate": 4.470262066348749e-05, "loss": 0.11202117204666137, "step": 129250 }, { "epoch": 0.5549401955986021, "grad_norm": 0.47002795338630676, "learning_rate": 4.4698308943369874e-05, "loss": 0.24686095714569092, "step": 129260 }, { "epoch": 0.5549831276886221, "grad_norm": 0.08183751255273819, "learning_rate": 4.469399722325225e-05, "loss": 0.007567648589611053, "step": 129270 }, { "epoch": 0.5550260597786422, "grad_norm": 0.013336584903299809, "learning_rate": 4.468968550313462e-05, "loss": 0.10117838382720948, "step": 129280 }, { "epoch": 0.5550689918686621, "grad_norm": 0.001083645736798644, "learning_rate": 4.4685373783017e-05, "loss": 0.2760536909103394, "step": 129290 }, { "epoch": 0.5551119239586821, "grad_norm": 5.931659698486328, "learning_rate": 4.468106206289938e-05, "loss": 0.4119400978088379, "step": 129300 }, { "epoch": 0.5551548560487022, "grad_norm": 0.023911086842417717, "learning_rate": 4.4676750342781754e-05, "loss": 0.18249212503433226, "step": 129310 }, { "epoch": 0.5551977881387221, "grad_norm": 2.3981242179870605, "learning_rate": 4.4672438622664125e-05, "loss": 0.314291787147522, "step": 129320 }, { "epoch": 0.5552407202287422, "grad_norm": 0.017882850021123886, "learning_rate": 4.46681269025465e-05, "loss": 0.016638435423374176, "step": 129330 }, { "epoch": 0.5552836523187622, "grad_norm": 0.0006807830650359392, "learning_rate": 4.466381518242888e-05, "loss": 0.26828172206878664, "step": 129340 }, { "epoch": 0.5553265844087822, "grad_norm": 0.004719418473541737, "learning_rate": 4.465950346231126e-05, "loss": 0.14387308359146117, "step": 129350 }, { "epoch": 0.5553695164988022, "grad_norm": 0.0066816494800150394, "learning_rate": 4.465519174219363e-05, "loss": 0.14007120132446288, "step": 129360 }, { "epoch": 0.5554124485888222, "grad_norm": 0.0010613937629386783, "learning_rate": 4.465088002207601e-05, "loss": 0.0076937094330787655, "step": 129370 }, { "epoch": 0.5554553806788423, "grad_norm": 1.7576144933700562, "learning_rate": 4.464656830195839e-05, "loss": 0.3215049743652344, "step": 129380 }, { "epoch": 0.5554983127688622, "grad_norm": 8.515897750854492, "learning_rate": 4.4642256581840766e-05, "loss": 0.2446056604385376, "step": 129390 }, { "epoch": 0.5555412448588822, "grad_norm": 0.01369958184659481, "learning_rate": 4.4637944861723137e-05, "loss": 0.21584432125091552, "step": 129400 }, { "epoch": 0.5555841769489023, "grad_norm": 0.005753880832344294, "learning_rate": 4.4633633141605514e-05, "loss": 0.062202876806259154, "step": 129410 }, { "epoch": 0.5556271090389222, "grad_norm": 0.45579853653907776, "learning_rate": 4.462932142148789e-05, "loss": 0.2092193603515625, "step": 129420 }, { "epoch": 0.5556700411289422, "grad_norm": 0.007810491602867842, "learning_rate": 4.462500970137027e-05, "loss": 0.05864572525024414, "step": 129430 }, { "epoch": 0.5557129732189623, "grad_norm": 0.005244900938123465, "learning_rate": 4.462069798125264e-05, "loss": 0.20220718383789063, "step": 129440 }, { "epoch": 0.5557559053089822, "grad_norm": 1.7433593273162842, "learning_rate": 4.4616386261135017e-05, "loss": 0.34491286277770994, "step": 129450 }, { "epoch": 0.5557988373990023, "grad_norm": 0.5952501893043518, "learning_rate": 4.4612074541017394e-05, "loss": 0.19884873628616334, "step": 129460 }, { "epoch": 0.5558417694890223, "grad_norm": 0.05651632323861122, "learning_rate": 4.460776282089977e-05, "loss": 0.324522876739502, "step": 129470 }, { "epoch": 0.5558847015790422, "grad_norm": 0.29743555188179016, "learning_rate": 4.460345110078215e-05, "loss": 0.42862529754638673, "step": 129480 }, { "epoch": 0.5559276336690623, "grad_norm": 0.037468746304512024, "learning_rate": 4.4599139380664526e-05, "loss": 0.33815324306488037, "step": 129490 }, { "epoch": 0.5559705657590823, "grad_norm": 1.3633570671081543, "learning_rate": 4.45948276605469e-05, "loss": 0.31829593181610105, "step": 129500 }, { "epoch": 0.5560134978491023, "grad_norm": 2.867215633392334, "learning_rate": 4.459051594042928e-05, "loss": 0.24856970310211182, "step": 129510 }, { "epoch": 0.5560564299391223, "grad_norm": 0.30264541506767273, "learning_rate": 4.458620422031166e-05, "loss": 0.1394771933555603, "step": 129520 }, { "epoch": 0.5560993620291423, "grad_norm": 3.5270214080810547, "learning_rate": 4.458189250019403e-05, "loss": 0.11074033975601197, "step": 129530 }, { "epoch": 0.5561422941191623, "grad_norm": 0.8371272087097168, "learning_rate": 4.4577580780076406e-05, "loss": 0.15264673233032228, "step": 129540 }, { "epoch": 0.5561852262091823, "grad_norm": 0.09782962501049042, "learning_rate": 4.457326905995878e-05, "loss": 0.24928109645843505, "step": 129550 }, { "epoch": 0.5562281582992024, "grad_norm": 1.7487066984176636, "learning_rate": 4.456895733984116e-05, "loss": 0.3725933313369751, "step": 129560 }, { "epoch": 0.5562710903892223, "grad_norm": 0.0029988440219312906, "learning_rate": 4.456464561972353e-05, "loss": 0.3409987688064575, "step": 129570 }, { "epoch": 0.5563140224792423, "grad_norm": 1.3888320922851562, "learning_rate": 4.456033389960591e-05, "loss": 0.15952913761138915, "step": 129580 }, { "epoch": 0.5563569545692624, "grad_norm": 0.002297930186614394, "learning_rate": 4.4556022179488286e-05, "loss": 0.28969786167144773, "step": 129590 }, { "epoch": 0.5563998866592823, "grad_norm": 2.3459360599517822, "learning_rate": 4.455171045937066e-05, "loss": 0.270216703414917, "step": 129600 }, { "epoch": 0.5564428187493023, "grad_norm": 2.2138748168945312, "learning_rate": 4.454739873925304e-05, "loss": 0.3445254325866699, "step": 129610 }, { "epoch": 0.5564857508393224, "grad_norm": 0.04111243784427643, "learning_rate": 4.454308701913542e-05, "loss": 0.24446432590484618, "step": 129620 }, { "epoch": 0.5565286829293423, "grad_norm": 6.398478031158447, "learning_rate": 4.4538775299017795e-05, "loss": 0.25752198696136475, "step": 129630 }, { "epoch": 0.5565716150193624, "grad_norm": 0.03157360851764679, "learning_rate": 4.453446357890017e-05, "loss": 0.25403120517730715, "step": 129640 }, { "epoch": 0.5566145471093824, "grad_norm": 0.6314763426780701, "learning_rate": 4.453015185878254e-05, "loss": 0.2993044376373291, "step": 129650 }, { "epoch": 0.5566574791994023, "grad_norm": 0.11549243330955505, "learning_rate": 4.452584013866492e-05, "loss": 0.14770009517669677, "step": 129660 }, { "epoch": 0.5567004112894224, "grad_norm": 1.7644439935684204, "learning_rate": 4.45215284185473e-05, "loss": 0.26133854389190675, "step": 129670 }, { "epoch": 0.5567433433794424, "grad_norm": 1.5984668731689453, "learning_rate": 4.4517216698429675e-05, "loss": 0.32401697635650634, "step": 129680 }, { "epoch": 0.5567862754694624, "grad_norm": 3.6902859210968018, "learning_rate": 4.4512904978312046e-05, "loss": 0.19751888513565063, "step": 129690 }, { "epoch": 0.5568292075594824, "grad_norm": 0.004566490650177002, "learning_rate": 4.450859325819442e-05, "loss": 0.26257147789001467, "step": 129700 }, { "epoch": 0.5568721396495024, "grad_norm": 0.012955792248249054, "learning_rate": 4.45042815380768e-05, "loss": 0.2490767478942871, "step": 129710 }, { "epoch": 0.5569150717395224, "grad_norm": 0.005359560716897249, "learning_rate": 4.449996981795918e-05, "loss": 0.18116750717163085, "step": 129720 }, { "epoch": 0.5569580038295424, "grad_norm": 0.06892059743404388, "learning_rate": 4.4495658097841555e-05, "loss": 0.1894413113594055, "step": 129730 }, { "epoch": 0.5570009359195625, "grad_norm": 0.0009415155509486794, "learning_rate": 4.449134637772393e-05, "loss": 0.23534340858459474, "step": 129740 }, { "epoch": 0.5570438680095824, "grad_norm": 0.2022874653339386, "learning_rate": 4.448703465760631e-05, "loss": 0.3457710027694702, "step": 129750 }, { "epoch": 0.5570868000996024, "grad_norm": 0.19633248448371887, "learning_rate": 4.448272293748869e-05, "loss": 0.12056779861450195, "step": 129760 }, { "epoch": 0.5571297321896225, "grad_norm": 0.0035316748544573784, "learning_rate": 4.447841121737106e-05, "loss": 0.12530456781387328, "step": 129770 }, { "epoch": 0.5571726642796425, "grad_norm": 0.18400853872299194, "learning_rate": 4.4474099497253435e-05, "loss": 0.1890039324760437, "step": 129780 }, { "epoch": 0.5572155963696624, "grad_norm": 0.07773124426603317, "learning_rate": 4.446978777713581e-05, "loss": 0.3870898962020874, "step": 129790 }, { "epoch": 0.5572585284596825, "grad_norm": 0.06875111162662506, "learning_rate": 4.446547605701819e-05, "loss": 0.32651290893554685, "step": 129800 }, { "epoch": 0.5573014605497025, "grad_norm": 1.4054416418075562, "learning_rate": 4.446116433690056e-05, "loss": 0.3409204244613647, "step": 129810 }, { "epoch": 0.5573443926397225, "grad_norm": 5.326071739196777, "learning_rate": 4.445685261678294e-05, "loss": 0.2509272336959839, "step": 129820 }, { "epoch": 0.5573873247297425, "grad_norm": 0.06678488850593567, "learning_rate": 4.4452540896665315e-05, "loss": 0.09128606915473939, "step": 129830 }, { "epoch": 0.5574302568197625, "grad_norm": 2.2432029247283936, "learning_rate": 4.444822917654769e-05, "loss": 0.19574520587921143, "step": 129840 }, { "epoch": 0.5574731889097825, "grad_norm": 8.737180709838867, "learning_rate": 4.4443917456430076e-05, "loss": 0.3267951726913452, "step": 129850 }, { "epoch": 0.5575161209998025, "grad_norm": 0.00803994107991457, "learning_rate": 4.443960573631245e-05, "loss": 0.36591262817382814, "step": 129860 }, { "epoch": 0.5575590530898226, "grad_norm": 1.9719129800796509, "learning_rate": 4.4435294016194824e-05, "loss": 0.34435100555419923, "step": 129870 }, { "epoch": 0.5576019851798425, "grad_norm": 0.20269423723220825, "learning_rate": 4.44309822960772e-05, "loss": 0.1376429319381714, "step": 129880 }, { "epoch": 0.5576449172698625, "grad_norm": 0.07807984948158264, "learning_rate": 4.442667057595958e-05, "loss": 0.08359581232070923, "step": 129890 }, { "epoch": 0.5576878493598826, "grad_norm": 0.1486503630876541, "learning_rate": 4.442235885584195e-05, "loss": 0.35685715675354, "step": 129900 }, { "epoch": 0.5577307814499025, "grad_norm": 0.018347645178437233, "learning_rate": 4.441804713572433e-05, "loss": 0.15195566415786743, "step": 129910 }, { "epoch": 0.5577737135399226, "grad_norm": 0.09118858724832535, "learning_rate": 4.4413735415606704e-05, "loss": 0.2308788299560547, "step": 129920 }, { "epoch": 0.5578166456299426, "grad_norm": 0.5971916913986206, "learning_rate": 4.440942369548908e-05, "loss": 0.09032097458839417, "step": 129930 }, { "epoch": 0.5578595777199625, "grad_norm": 3.844421625137329, "learning_rate": 4.440511197537145e-05, "loss": 0.3063809871673584, "step": 129940 }, { "epoch": 0.5579025098099826, "grad_norm": 2.842432975769043, "learning_rate": 4.440080025525383e-05, "loss": 0.2679614543914795, "step": 129950 }, { "epoch": 0.5579454419000026, "grad_norm": 2.7169439792633057, "learning_rate": 4.4396488535136214e-05, "loss": 0.2645352840423584, "step": 129960 }, { "epoch": 0.5579883739900225, "grad_norm": 1.920378565788269, "learning_rate": 4.439217681501859e-05, "loss": 0.22816011905670167, "step": 129970 }, { "epoch": 0.5580313060800426, "grad_norm": 0.016523117199540138, "learning_rate": 4.438786509490096e-05, "loss": 0.2725505828857422, "step": 129980 }, { "epoch": 0.5580742381700626, "grad_norm": 1.1269477605819702, "learning_rate": 4.438355337478334e-05, "loss": 0.27584056854248046, "step": 129990 }, { "epoch": 0.5581171702600826, "grad_norm": 0.30185261368751526, "learning_rate": 4.4379241654665716e-05, "loss": 0.14589924812316896, "step": 130000 }, { "epoch": 0.5581171702600826, "eval_loss": 0.396757572889328, "eval_runtime": 27.2906, "eval_samples_per_second": 3.664, "eval_steps_per_second": 3.664, "step": 130000 }, { "epoch": 0.5581601023501026, "grad_norm": 0.13702532649040222, "learning_rate": 4.4374929934548094e-05, "loss": 0.3244907379150391, "step": 130010 }, { "epoch": 0.5582030344401226, "grad_norm": 1.8142026662826538, "learning_rate": 4.4370618214430464e-05, "loss": 0.2708948850631714, "step": 130020 }, { "epoch": 0.5582459665301426, "grad_norm": 0.06915269047021866, "learning_rate": 4.436630649431284e-05, "loss": 0.2838058710098267, "step": 130030 }, { "epoch": 0.5582888986201626, "grad_norm": 0.12360896915197372, "learning_rate": 4.436199477419522e-05, "loss": 0.10447860956192016, "step": 130040 }, { "epoch": 0.5583318307101827, "grad_norm": 5.960447788238525, "learning_rate": 4.4357683054077596e-05, "loss": 0.13814549446105956, "step": 130050 }, { "epoch": 0.5583747628002026, "grad_norm": 0.5248103737831116, "learning_rate": 4.435337133395997e-05, "loss": 0.21094183921813964, "step": 130060 }, { "epoch": 0.5584176948902226, "grad_norm": 0.48741307854652405, "learning_rate": 4.434905961384235e-05, "loss": 0.10333422422409058, "step": 130070 }, { "epoch": 0.5584606269802427, "grad_norm": 1.0012564659118652, "learning_rate": 4.434474789372473e-05, "loss": 0.2197781801223755, "step": 130080 }, { "epoch": 0.5585035590702626, "grad_norm": 0.07096364349126816, "learning_rate": 4.4340436173607105e-05, "loss": 0.2918126583099365, "step": 130090 }, { "epoch": 0.5585464911602827, "grad_norm": 1.9969171285629272, "learning_rate": 4.4336124453489476e-05, "loss": 0.15286139249801636, "step": 130100 }, { "epoch": 0.5585894232503027, "grad_norm": 0.4066997766494751, "learning_rate": 4.433181273337185e-05, "loss": 0.25106277465820315, "step": 130110 }, { "epoch": 0.5586323553403226, "grad_norm": 1.3090592622756958, "learning_rate": 4.432750101325423e-05, "loss": 0.2082353115081787, "step": 130120 }, { "epoch": 0.5586752874303427, "grad_norm": 0.007877206429839134, "learning_rate": 4.432318929313661e-05, "loss": 0.22714662551879883, "step": 130130 }, { "epoch": 0.5587182195203627, "grad_norm": 0.03949381038546562, "learning_rate": 4.431887757301898e-05, "loss": 0.1773894190788269, "step": 130140 }, { "epoch": 0.5587611516103826, "grad_norm": 0.2964007556438446, "learning_rate": 4.4314565852901356e-05, "loss": 0.056935679912567136, "step": 130150 }, { "epoch": 0.5588040837004027, "grad_norm": 3.9917354583740234, "learning_rate": 4.431025413278373e-05, "loss": 0.10850996971130371, "step": 130160 }, { "epoch": 0.5588470157904227, "grad_norm": 0.019908474758267403, "learning_rate": 4.430594241266611e-05, "loss": 0.15592384338378906, "step": 130170 }, { "epoch": 0.5588899478804427, "grad_norm": 0.9341161847114563, "learning_rate": 4.430163069254849e-05, "loss": 0.11459755897521973, "step": 130180 }, { "epoch": 0.5589328799704627, "grad_norm": 0.004427948500961065, "learning_rate": 4.4297318972430865e-05, "loss": 0.17416831254959106, "step": 130190 }, { "epoch": 0.5589758120604827, "grad_norm": 0.29199692606925964, "learning_rate": 4.429300725231324e-05, "loss": 0.4187572956085205, "step": 130200 }, { "epoch": 0.5590187441505028, "grad_norm": 2.022859573364258, "learning_rate": 4.428869553219562e-05, "loss": 0.11252399682998657, "step": 130210 }, { "epoch": 0.5590616762405227, "grad_norm": 2.3882155418395996, "learning_rate": 4.4284383812078e-05, "loss": 0.1012803077697754, "step": 130220 }, { "epoch": 0.5591046083305428, "grad_norm": 0.0023776378948241472, "learning_rate": 4.428007209196037e-05, "loss": 0.3746946811676025, "step": 130230 }, { "epoch": 0.5591475404205628, "grad_norm": 1.6952602863311768, "learning_rate": 4.4275760371842745e-05, "loss": 0.28969037532806396, "step": 130240 }, { "epoch": 0.5591904725105827, "grad_norm": 0.619696319103241, "learning_rate": 4.427144865172512e-05, "loss": 0.22678766250610352, "step": 130250 }, { "epoch": 0.5592334046006028, "grad_norm": 8.467891693115234, "learning_rate": 4.42671369316075e-05, "loss": 0.4200021743774414, "step": 130260 }, { "epoch": 0.5592763366906228, "grad_norm": 5.092950820922852, "learning_rate": 4.426282521148987e-05, "loss": 0.22055649757385254, "step": 130270 }, { "epoch": 0.5593192687806428, "grad_norm": 0.061911582946777344, "learning_rate": 4.425851349137225e-05, "loss": 0.2685711145401001, "step": 130280 }, { "epoch": 0.5593622008706628, "grad_norm": 2.5211803913116455, "learning_rate": 4.4254201771254625e-05, "loss": 0.2446134328842163, "step": 130290 }, { "epoch": 0.5594051329606828, "grad_norm": 0.7915776371955872, "learning_rate": 4.4249890051137e-05, "loss": 0.12928345203399658, "step": 130300 }, { "epoch": 0.5594480650507028, "grad_norm": 1.423632025718689, "learning_rate": 4.424557833101938e-05, "loss": 0.11890238523483276, "step": 130310 }, { "epoch": 0.5594909971407228, "grad_norm": 5.311061382293701, "learning_rate": 4.424126661090176e-05, "loss": 0.16063714027404785, "step": 130320 }, { "epoch": 0.5595339292307429, "grad_norm": 2.1655609607696533, "learning_rate": 4.4236954890784135e-05, "loss": 0.4018458366394043, "step": 130330 }, { "epoch": 0.5595768613207628, "grad_norm": 0.07226397097110748, "learning_rate": 4.423264317066651e-05, "loss": 0.02289922386407852, "step": 130340 }, { "epoch": 0.5596197934107828, "grad_norm": 1.9848915338516235, "learning_rate": 4.422833145054888e-05, "loss": 0.2881872892379761, "step": 130350 }, { "epoch": 0.5596627255008029, "grad_norm": 0.004091145936399698, "learning_rate": 4.422401973043126e-05, "loss": 0.16486506462097167, "step": 130360 }, { "epoch": 0.5597056575908228, "grad_norm": 0.008164674043655396, "learning_rate": 4.421970801031364e-05, "loss": 0.039905214309692384, "step": 130370 }, { "epoch": 0.5597485896808428, "grad_norm": 0.020558584481477737, "learning_rate": 4.4215396290196015e-05, "loss": 0.10911152362823487, "step": 130380 }, { "epoch": 0.5597915217708629, "grad_norm": 0.06221897155046463, "learning_rate": 4.4211084570078385e-05, "loss": 0.22285866737365723, "step": 130390 }, { "epoch": 0.5598344538608828, "grad_norm": 6.663233280181885, "learning_rate": 4.420677284996076e-05, "loss": 0.22175629138946534, "step": 130400 }, { "epoch": 0.5598773859509029, "grad_norm": 0.8797337412834167, "learning_rate": 4.420246112984314e-05, "loss": 0.20046689510345458, "step": 130410 }, { "epoch": 0.5599203180409229, "grad_norm": 6.092191696166992, "learning_rate": 4.419814940972552e-05, "loss": 0.27480373382568357, "step": 130420 }, { "epoch": 0.5599632501309428, "grad_norm": 0.015885712578892708, "learning_rate": 4.4193837689607894e-05, "loss": 0.09221312403678894, "step": 130430 }, { "epoch": 0.5600061822209629, "grad_norm": 1.9088473320007324, "learning_rate": 4.418952596949027e-05, "loss": 0.42419586181640623, "step": 130440 }, { "epoch": 0.5600491143109829, "grad_norm": 6.146290302276611, "learning_rate": 4.418521424937265e-05, "loss": 0.2966385126113892, "step": 130450 }, { "epoch": 0.5600920464010029, "grad_norm": 0.005371685605496168, "learning_rate": 4.4180902529255027e-05, "loss": 0.19647728204727172, "step": 130460 }, { "epoch": 0.5601349784910229, "grad_norm": 0.14021477103233337, "learning_rate": 4.41765908091374e-05, "loss": 0.24573662281036376, "step": 130470 }, { "epoch": 0.5601779105810429, "grad_norm": 1.327796220779419, "learning_rate": 4.4172279089019774e-05, "loss": 0.3573757648468018, "step": 130480 }, { "epoch": 0.5602208426710629, "grad_norm": 1.4445470571517944, "learning_rate": 4.416796736890215e-05, "loss": 0.2246248245239258, "step": 130490 }, { "epoch": 0.5602637747610829, "grad_norm": 0.6600468158721924, "learning_rate": 4.416365564878453e-05, "loss": 0.15008944272994995, "step": 130500 }, { "epoch": 0.560306706851103, "grad_norm": 0.23366093635559082, "learning_rate": 4.41593439286669e-05, "loss": 0.3061052799224854, "step": 130510 }, { "epoch": 0.5603496389411229, "grad_norm": 2.143343687057495, "learning_rate": 4.415503220854928e-05, "loss": 0.2754983425140381, "step": 130520 }, { "epoch": 0.5603925710311429, "grad_norm": 0.9091231822967529, "learning_rate": 4.4150720488431654e-05, "loss": 0.17608437538146973, "step": 130530 }, { "epoch": 0.560435503121163, "grad_norm": 0.24302370846271515, "learning_rate": 4.414640876831403e-05, "loss": 0.21974198818206786, "step": 130540 }, { "epoch": 0.5604784352111829, "grad_norm": 0.24109004437923431, "learning_rate": 4.414209704819641e-05, "loss": 0.2817169427871704, "step": 130550 }, { "epoch": 0.5605213673012029, "grad_norm": 2.0368618965148926, "learning_rate": 4.4137785328078786e-05, "loss": 0.2441883087158203, "step": 130560 }, { "epoch": 0.560564299391223, "grad_norm": 3.77104115486145, "learning_rate": 4.4133473607961164e-05, "loss": 0.11188125610351562, "step": 130570 }, { "epoch": 0.5606072314812429, "grad_norm": 0.8214830160140991, "learning_rate": 4.412916188784354e-05, "loss": 0.428134822845459, "step": 130580 }, { "epoch": 0.560650163571263, "grad_norm": 1.9323092699050903, "learning_rate": 4.412485016772592e-05, "loss": 0.05530650019645691, "step": 130590 }, { "epoch": 0.560693095661283, "grad_norm": 0.003310135565698147, "learning_rate": 4.412053844760829e-05, "loss": 0.2841073751449585, "step": 130600 }, { "epoch": 0.5607360277513029, "grad_norm": 0.0028873884584754705, "learning_rate": 4.4116226727490666e-05, "loss": 0.2064734935760498, "step": 130610 }, { "epoch": 0.560778959841323, "grad_norm": 0.4137687087059021, "learning_rate": 4.4111915007373044e-05, "loss": 0.15328125953674315, "step": 130620 }, { "epoch": 0.560821891931343, "grad_norm": 0.1398584544658661, "learning_rate": 4.410760328725542e-05, "loss": 0.20579054355621337, "step": 130630 }, { "epoch": 0.5608648240213631, "grad_norm": 0.7580888271331787, "learning_rate": 4.410329156713779e-05, "loss": 0.3941477298736572, "step": 130640 }, { "epoch": 0.560907756111383, "grad_norm": 1.2244528532028198, "learning_rate": 4.409897984702017e-05, "loss": 0.13406124114990234, "step": 130650 }, { "epoch": 0.560950688201403, "grad_norm": 0.018945829942822456, "learning_rate": 4.409466812690255e-05, "loss": 0.09961066246032715, "step": 130660 }, { "epoch": 0.5609936202914231, "grad_norm": 6.098886013031006, "learning_rate": 4.409035640678493e-05, "loss": 0.3770725727081299, "step": 130670 }, { "epoch": 0.561036552381443, "grad_norm": 13.162020683288574, "learning_rate": 4.40860446866673e-05, "loss": 0.06884355545043945, "step": 130680 }, { "epoch": 0.561079484471463, "grad_norm": 0.17313456535339355, "learning_rate": 4.408173296654968e-05, "loss": 0.22985308170318602, "step": 130690 }, { "epoch": 0.5611224165614831, "grad_norm": 0.05977223441004753, "learning_rate": 4.4077421246432056e-05, "loss": 0.1930696964263916, "step": 130700 }, { "epoch": 0.561165348651503, "grad_norm": 0.021493813022971153, "learning_rate": 4.407310952631443e-05, "loss": 0.11986234188079833, "step": 130710 }, { "epoch": 0.5612082807415231, "grad_norm": 2.4718406200408936, "learning_rate": 4.4068797806196804e-05, "loss": 0.13716913461685182, "step": 130720 }, { "epoch": 0.5612512128315431, "grad_norm": 0.5843445062637329, "learning_rate": 4.406448608607918e-05, "loss": 0.2553562641143799, "step": 130730 }, { "epoch": 0.561294144921563, "grad_norm": 0.7953113317489624, "learning_rate": 4.406017436596156e-05, "loss": 0.15884344577789306, "step": 130740 }, { "epoch": 0.5613370770115831, "grad_norm": 0.041502032428979874, "learning_rate": 4.4055862645843936e-05, "loss": 0.5208531856536865, "step": 130750 }, { "epoch": 0.5613800091016031, "grad_norm": 7.92971658706665, "learning_rate": 4.4051550925726306e-05, "loss": 0.31333098411560056, "step": 130760 }, { "epoch": 0.5614229411916231, "grad_norm": 0.10923028737306595, "learning_rate": 4.404723920560869e-05, "loss": 0.24447739124298096, "step": 130770 }, { "epoch": 0.5614658732816431, "grad_norm": 0.002735383342951536, "learning_rate": 4.404292748549107e-05, "loss": 0.26939570903778076, "step": 130780 }, { "epoch": 0.5615088053716631, "grad_norm": 0.04318393021821976, "learning_rate": 4.4038615765373445e-05, "loss": 0.2544058322906494, "step": 130790 }, { "epoch": 0.5615517374616831, "grad_norm": 0.25320038199424744, "learning_rate": 4.4034304045255816e-05, "loss": 0.028018417954444885, "step": 130800 }, { "epoch": 0.5615946695517031, "grad_norm": 0.05331530421972275, "learning_rate": 4.402999232513819e-05, "loss": 0.3315607786178589, "step": 130810 }, { "epoch": 0.5616376016417232, "grad_norm": 0.031914785504341125, "learning_rate": 4.402568060502057e-05, "loss": 0.33244547843933103, "step": 130820 }, { "epoch": 0.5616805337317431, "grad_norm": 0.039703018963336945, "learning_rate": 4.402136888490295e-05, "loss": 0.2509101390838623, "step": 130830 }, { "epoch": 0.5617234658217631, "grad_norm": 10.16286563873291, "learning_rate": 4.401705716478532e-05, "loss": 0.11450953483581543, "step": 130840 }, { "epoch": 0.5617663979117832, "grad_norm": 4.78218936920166, "learning_rate": 4.4012745444667695e-05, "loss": 0.035375076532363894, "step": 130850 }, { "epoch": 0.5618093300018031, "grad_norm": 0.626234769821167, "learning_rate": 4.400843372455007e-05, "loss": 0.11089576482772827, "step": 130860 }, { "epoch": 0.5618522620918232, "grad_norm": 0.020610149949789047, "learning_rate": 4.400412200443245e-05, "loss": 0.2556567430496216, "step": 130870 }, { "epoch": 0.5618951941818432, "grad_norm": 5.132352352142334, "learning_rate": 4.399981028431483e-05, "loss": 0.3633986473083496, "step": 130880 }, { "epoch": 0.5619381262718631, "grad_norm": 0.013127193786203861, "learning_rate": 4.3995498564197205e-05, "loss": 0.13249205350875853, "step": 130890 }, { "epoch": 0.5619810583618832, "grad_norm": 0.0006753028719685972, "learning_rate": 4.399118684407958e-05, "loss": 0.09555227160453797, "step": 130900 }, { "epoch": 0.5620239904519032, "grad_norm": 1.3687077760696411, "learning_rate": 4.398687512396196e-05, "loss": 0.6257006645202636, "step": 130910 }, { "epoch": 0.5620669225419231, "grad_norm": 0.6645755171775818, "learning_rate": 4.398256340384433e-05, "loss": 0.22954320907592773, "step": 130920 }, { "epoch": 0.5621098546319432, "grad_norm": 0.013810350559651852, "learning_rate": 4.397825168372671e-05, "loss": 0.08926523327827454, "step": 130930 }, { "epoch": 0.5621527867219632, "grad_norm": 3.6319634914398193, "learning_rate": 4.3973939963609085e-05, "loss": 0.23496651649475098, "step": 130940 }, { "epoch": 0.5621957188119832, "grad_norm": 1.0557408332824707, "learning_rate": 4.396962824349146e-05, "loss": 0.17628427743911743, "step": 130950 }, { "epoch": 0.5622386509020032, "grad_norm": 0.4414917230606079, "learning_rate": 4.396531652337384e-05, "loss": 0.19747617244720458, "step": 130960 }, { "epoch": 0.5622815829920232, "grad_norm": 0.00444777961820364, "learning_rate": 4.396100480325621e-05, "loss": 0.1586161732673645, "step": 130970 }, { "epoch": 0.5623245150820432, "grad_norm": 0.053738441318273544, "learning_rate": 4.395669308313859e-05, "loss": 0.25755629539489744, "step": 130980 }, { "epoch": 0.5623674471720632, "grad_norm": 0.2864459156990051, "learning_rate": 4.3952381363020965e-05, "loss": 0.02709081768989563, "step": 130990 }, { "epoch": 0.5624103792620833, "grad_norm": 0.0064376420341432095, "learning_rate": 4.394806964290334e-05, "loss": 0.2746001958847046, "step": 131000 }, { "epoch": 0.5624103792620833, "eval_loss": 0.3990839719772339, "eval_runtime": 27.1241, "eval_samples_per_second": 3.687, "eval_steps_per_second": 3.687, "step": 131000 }, { "epoch": 0.5624533113521032, "grad_norm": 0.8062077164649963, "learning_rate": 4.394375792278572e-05, "loss": 0.27772514820098876, "step": 131010 }, { "epoch": 0.5624962434421232, "grad_norm": 0.6455739736557007, "learning_rate": 4.39394462026681e-05, "loss": 0.02453451752662659, "step": 131020 }, { "epoch": 0.5625391755321433, "grad_norm": 0.08529993891716003, "learning_rate": 4.3935134482550474e-05, "loss": 0.1866888642311096, "step": 131030 }, { "epoch": 0.5625821076221632, "grad_norm": 1.3479008674621582, "learning_rate": 4.393082276243285e-05, "loss": 0.33279035091400144, "step": 131040 }, { "epoch": 0.5626250397121833, "grad_norm": 15.479037284851074, "learning_rate": 4.392651104231522e-05, "loss": 0.20916690826416015, "step": 131050 }, { "epoch": 0.5626679718022033, "grad_norm": 0.09797138720750809, "learning_rate": 4.39221993221976e-05, "loss": 0.04719350934028625, "step": 131060 }, { "epoch": 0.5627109038922233, "grad_norm": 3.2566111087799072, "learning_rate": 4.391788760207998e-05, "loss": 0.23531157970428468, "step": 131070 }, { "epoch": 0.5627538359822433, "grad_norm": 0.12084054201841354, "learning_rate": 4.3913575881962354e-05, "loss": 0.11477060317993164, "step": 131080 }, { "epoch": 0.5627967680722633, "grad_norm": 9.098376274108887, "learning_rate": 4.3909264161844725e-05, "loss": 0.3104034185409546, "step": 131090 }, { "epoch": 0.5628397001622834, "grad_norm": 0.09239120036363602, "learning_rate": 4.39049524417271e-05, "loss": 0.16618393659591674, "step": 131100 }, { "epoch": 0.5628826322523033, "grad_norm": 1.3476864099502563, "learning_rate": 4.390064072160948e-05, "loss": 0.11332845687866211, "step": 131110 }, { "epoch": 0.5629255643423233, "grad_norm": 0.9282361268997192, "learning_rate": 4.3896329001491857e-05, "loss": 0.25531725883483886, "step": 131120 }, { "epoch": 0.5629684964323434, "grad_norm": 0.039876118302345276, "learning_rate": 4.3892017281374234e-05, "loss": 0.19124466180801392, "step": 131130 }, { "epoch": 0.5630114285223633, "grad_norm": 0.0030488877091556787, "learning_rate": 4.388770556125661e-05, "loss": 0.2110595464706421, "step": 131140 }, { "epoch": 0.5630543606123833, "grad_norm": 0.10496467351913452, "learning_rate": 4.388339384113899e-05, "loss": 0.33485052585601804, "step": 131150 }, { "epoch": 0.5630972927024034, "grad_norm": 0.3290857970714569, "learning_rate": 4.3879082121021366e-05, "loss": 0.1390450119972229, "step": 131160 }, { "epoch": 0.5631402247924233, "grad_norm": 0.0027307202108204365, "learning_rate": 4.3874770400903737e-05, "loss": 0.3555908679962158, "step": 131170 }, { "epoch": 0.5631831568824434, "grad_norm": 10.344257354736328, "learning_rate": 4.3870458680786114e-05, "loss": 0.31735968589782715, "step": 131180 }, { "epoch": 0.5632260889724634, "grad_norm": 0.0717422142624855, "learning_rate": 4.386614696066849e-05, "loss": 0.20104632377624512, "step": 131190 }, { "epoch": 0.5632690210624833, "grad_norm": 0.003424069145694375, "learning_rate": 4.386183524055087e-05, "loss": 0.2732577323913574, "step": 131200 }, { "epoch": 0.5633119531525034, "grad_norm": 2.7449123859405518, "learning_rate": 4.385752352043324e-05, "loss": 0.2550164222717285, "step": 131210 }, { "epoch": 0.5633548852425234, "grad_norm": 0.03599822148680687, "learning_rate": 4.3853211800315616e-05, "loss": 0.19458101987838744, "step": 131220 }, { "epoch": 0.5633978173325433, "grad_norm": 0.17414642870426178, "learning_rate": 4.3848900080197994e-05, "loss": 0.22085530757904054, "step": 131230 }, { "epoch": 0.5634407494225634, "grad_norm": 24.48627281188965, "learning_rate": 4.384458836008037e-05, "loss": 0.134981906414032, "step": 131240 }, { "epoch": 0.5634836815125834, "grad_norm": 0.002881893888115883, "learning_rate": 4.384027663996275e-05, "loss": 0.2002098321914673, "step": 131250 }, { "epoch": 0.5635266136026034, "grad_norm": 0.007777002640068531, "learning_rate": 4.3835964919845126e-05, "loss": 0.13187239170074463, "step": 131260 }, { "epoch": 0.5635695456926234, "grad_norm": 0.005385277327150106, "learning_rate": 4.38316531997275e-05, "loss": 0.07089147567749024, "step": 131270 }, { "epoch": 0.5636124777826435, "grad_norm": 0.007684089709073305, "learning_rate": 4.382734147960988e-05, "loss": 0.33400042057037355, "step": 131280 }, { "epoch": 0.5636554098726634, "grad_norm": 0.18498967587947845, "learning_rate": 4.382302975949225e-05, "loss": 0.19423152208328248, "step": 131290 }, { "epoch": 0.5636983419626834, "grad_norm": 0.3919488489627838, "learning_rate": 4.381871803937463e-05, "loss": 0.20056331157684326, "step": 131300 }, { "epoch": 0.5637412740527035, "grad_norm": 0.9636663794517517, "learning_rate": 4.3814406319257006e-05, "loss": 0.08319100737571716, "step": 131310 }, { "epoch": 0.5637842061427234, "grad_norm": 0.06585653126239777, "learning_rate": 4.381009459913938e-05, "loss": 0.1678343176841736, "step": 131320 }, { "epoch": 0.5638271382327434, "grad_norm": 0.20225432515144348, "learning_rate": 4.380578287902176e-05, "loss": 0.16132519245147706, "step": 131330 }, { "epoch": 0.5638700703227635, "grad_norm": 1.0769022703170776, "learning_rate": 4.380147115890413e-05, "loss": 0.5539116382598877, "step": 131340 }, { "epoch": 0.5639130024127834, "grad_norm": 0.03397184610366821, "learning_rate": 4.379715943878651e-05, "loss": 0.30623517036437986, "step": 131350 }, { "epoch": 0.5639559345028035, "grad_norm": 0.07383184134960175, "learning_rate": 4.3792847718668886e-05, "loss": 0.15672016143798828, "step": 131360 }, { "epoch": 0.5639988665928235, "grad_norm": 0.23466603457927704, "learning_rate": 4.378853599855127e-05, "loss": 0.193856680393219, "step": 131370 }, { "epoch": 0.5640417986828434, "grad_norm": 0.0022643166594207287, "learning_rate": 4.378422427843364e-05, "loss": 0.028151240944862366, "step": 131380 }, { "epoch": 0.5640847307728635, "grad_norm": 0.020985648036003113, "learning_rate": 4.377991255831602e-05, "loss": 0.24543752670288085, "step": 131390 }, { "epoch": 0.5641276628628835, "grad_norm": 1.4199275970458984, "learning_rate": 4.3775600838198395e-05, "loss": 0.3444381237030029, "step": 131400 }, { "epoch": 0.5641705949529034, "grad_norm": 0.010642913170158863, "learning_rate": 4.377128911808077e-05, "loss": 0.0823455810546875, "step": 131410 }, { "epoch": 0.5642135270429235, "grad_norm": 0.0010722661390900612, "learning_rate": 4.376697739796314e-05, "loss": 0.15871133804321289, "step": 131420 }, { "epoch": 0.5642564591329435, "grad_norm": 57.9321174621582, "learning_rate": 4.376266567784552e-05, "loss": 0.14185404777526855, "step": 131430 }, { "epoch": 0.5642993912229635, "grad_norm": 2.7046358585357666, "learning_rate": 4.37583539577279e-05, "loss": 0.41197805404663085, "step": 131440 }, { "epoch": 0.5643423233129835, "grad_norm": 0.12308663129806519, "learning_rate": 4.3754042237610275e-05, "loss": 0.2550692319869995, "step": 131450 }, { "epoch": 0.5643852554030035, "grad_norm": 2.411984920501709, "learning_rate": 4.3749730517492646e-05, "loss": 0.42504286766052246, "step": 131460 }, { "epoch": 0.5644281874930235, "grad_norm": 0.002382261911407113, "learning_rate": 4.374541879737502e-05, "loss": 0.1963081955909729, "step": 131470 }, { "epoch": 0.5644711195830435, "grad_norm": 0.0080162538215518, "learning_rate": 4.374110707725741e-05, "loss": 0.29774773120880127, "step": 131480 }, { "epoch": 0.5645140516730636, "grad_norm": 2.8535964488983154, "learning_rate": 4.3736795357139784e-05, "loss": 0.2715657472610474, "step": 131490 }, { "epoch": 0.5645569837630836, "grad_norm": 0.12202345579862595, "learning_rate": 4.3732483637022155e-05, "loss": 0.12172391414642333, "step": 131500 }, { "epoch": 0.5645999158531035, "grad_norm": 0.040142424404621124, "learning_rate": 4.372817191690453e-05, "loss": 0.2592360496520996, "step": 131510 }, { "epoch": 0.5646428479431236, "grad_norm": 1.4805283546447754, "learning_rate": 4.372386019678691e-05, "loss": 0.2895283460617065, "step": 131520 }, { "epoch": 0.5646857800331436, "grad_norm": 1.0980712175369263, "learning_rate": 4.371954847666929e-05, "loss": 0.26518375873565675, "step": 131530 }, { "epoch": 0.5647287121231636, "grad_norm": 0.09577146172523499, "learning_rate": 4.371523675655166e-05, "loss": 0.037435561418533325, "step": 131540 }, { "epoch": 0.5647716442131836, "grad_norm": 0.04082731157541275, "learning_rate": 4.3710925036434035e-05, "loss": 0.26807751655578616, "step": 131550 }, { "epoch": 0.5648145763032036, "grad_norm": 0.017090164124965668, "learning_rate": 4.370661331631641e-05, "loss": 0.1559891700744629, "step": 131560 }, { "epoch": 0.5648575083932236, "grad_norm": 1.3990321159362793, "learning_rate": 4.370230159619879e-05, "loss": 0.4341771125793457, "step": 131570 }, { "epoch": 0.5649004404832436, "grad_norm": 0.31079280376434326, "learning_rate": 4.369798987608116e-05, "loss": 0.12254334688186645, "step": 131580 }, { "epoch": 0.5649433725732637, "grad_norm": 1.2702585458755493, "learning_rate": 4.3693678155963544e-05, "loss": 0.13572754859924316, "step": 131590 }, { "epoch": 0.5649863046632836, "grad_norm": 0.020670155063271523, "learning_rate": 4.368936643584592e-05, "loss": 0.10454627275466918, "step": 131600 }, { "epoch": 0.5650292367533036, "grad_norm": 0.010946854017674923, "learning_rate": 4.36850547157283e-05, "loss": 0.1574021100997925, "step": 131610 }, { "epoch": 0.5650721688433237, "grad_norm": 0.022414082661271095, "learning_rate": 4.368074299561067e-05, "loss": 0.16641606092453004, "step": 131620 }, { "epoch": 0.5651151009333436, "grad_norm": 0.07762713730335236, "learning_rate": 4.367643127549305e-05, "loss": 0.3286291122436523, "step": 131630 }, { "epoch": 0.5651580330233636, "grad_norm": 0.0033479107078164816, "learning_rate": 4.3672119555375424e-05, "loss": 0.12209920883178711, "step": 131640 }, { "epoch": 0.5652009651133837, "grad_norm": 0.0006103302584961057, "learning_rate": 4.36678078352578e-05, "loss": 0.03355185687541962, "step": 131650 }, { "epoch": 0.5652438972034036, "grad_norm": 0.013331168331205845, "learning_rate": 4.366349611514017e-05, "loss": 0.15870726108551025, "step": 131660 }, { "epoch": 0.5652868292934237, "grad_norm": 0.006360860541462898, "learning_rate": 4.365918439502255e-05, "loss": 0.20273292064666748, "step": 131670 }, { "epoch": 0.5653297613834437, "grad_norm": 0.0005006656865589321, "learning_rate": 4.365487267490493e-05, "loss": 0.19195096492767333, "step": 131680 }, { "epoch": 0.5653726934734636, "grad_norm": 0.000357257726136595, "learning_rate": 4.3650560954787304e-05, "loss": 0.16304749250411987, "step": 131690 }, { "epoch": 0.5654156255634837, "grad_norm": 0.013322567567229271, "learning_rate": 4.364624923466968e-05, "loss": 0.37918355464935305, "step": 131700 }, { "epoch": 0.5654585576535037, "grad_norm": 0.0018318496877327561, "learning_rate": 4.364193751455206e-05, "loss": 0.3940425395965576, "step": 131710 }, { "epoch": 0.5655014897435237, "grad_norm": 1.2409061193466187, "learning_rate": 4.3637625794434436e-05, "loss": 0.3135698318481445, "step": 131720 }, { "epoch": 0.5655444218335437, "grad_norm": 0.11461956053972244, "learning_rate": 4.3633314074316814e-05, "loss": 0.19950928688049316, "step": 131730 }, { "epoch": 0.5655873539235637, "grad_norm": 0.07321076840162277, "learning_rate": 4.362900235419919e-05, "loss": 0.08899721503257751, "step": 131740 }, { "epoch": 0.5656302860135837, "grad_norm": 0.00024293846217915416, "learning_rate": 4.362469063408156e-05, "loss": 0.38968701362609864, "step": 131750 }, { "epoch": 0.5656732181036037, "grad_norm": 0.00021523365285247564, "learning_rate": 4.362037891396394e-05, "loss": 0.2745439767837524, "step": 131760 }, { "epoch": 0.5657161501936238, "grad_norm": 0.29328349232673645, "learning_rate": 4.3616067193846316e-05, "loss": 0.2557271957397461, "step": 131770 }, { "epoch": 0.5657590822836437, "grad_norm": 3.7032077312469482, "learning_rate": 4.3611755473728693e-05, "loss": 0.2522104263305664, "step": 131780 }, { "epoch": 0.5658020143736637, "grad_norm": 0.000676521216519177, "learning_rate": 4.3607443753611064e-05, "loss": 0.23237941265106202, "step": 131790 }, { "epoch": 0.5658449464636838, "grad_norm": 1.2005796432495117, "learning_rate": 4.360313203349344e-05, "loss": 0.22138490676879882, "step": 131800 }, { "epoch": 0.5658878785537037, "grad_norm": 0.49399295449256897, "learning_rate": 4.359882031337582e-05, "loss": 0.14007023572921753, "step": 131810 }, { "epoch": 0.5659308106437237, "grad_norm": 0.01492351945489645, "learning_rate": 4.3594508593258196e-05, "loss": 0.08878965377807617, "step": 131820 }, { "epoch": 0.5659737427337438, "grad_norm": 0.2737496495246887, "learning_rate": 4.3590196873140573e-05, "loss": 0.11913632154464722, "step": 131830 }, { "epoch": 0.5660166748237637, "grad_norm": 0.0028132752049714327, "learning_rate": 4.358588515302295e-05, "loss": 0.1496422529220581, "step": 131840 }, { "epoch": 0.5660596069137838, "grad_norm": 8.923117637634277, "learning_rate": 4.358157343290533e-05, "loss": 0.3230890274047852, "step": 131850 }, { "epoch": 0.5661025390038038, "grad_norm": 0.12785527110099792, "learning_rate": 4.3577261712787705e-05, "loss": 0.41931886672973634, "step": 131860 }, { "epoch": 0.5661454710938237, "grad_norm": 1.3026045560836792, "learning_rate": 4.3572949992670076e-05, "loss": 0.455197811126709, "step": 131870 }, { "epoch": 0.5661884031838438, "grad_norm": 6.643957138061523, "learning_rate": 4.356863827255245e-05, "loss": 0.1968802809715271, "step": 131880 }, { "epoch": 0.5662313352738638, "grad_norm": 0.10116653144359589, "learning_rate": 4.356432655243483e-05, "loss": 0.0696562647819519, "step": 131890 }, { "epoch": 0.5662742673638838, "grad_norm": 0.09920462220907211, "learning_rate": 4.356001483231721e-05, "loss": 0.12258206605911255, "step": 131900 }, { "epoch": 0.5663171994539038, "grad_norm": 0.06810770183801651, "learning_rate": 4.355570311219958e-05, "loss": 0.06588384509086609, "step": 131910 }, { "epoch": 0.5663601315439238, "grad_norm": 0.010695497505366802, "learning_rate": 4.3551391392081956e-05, "loss": 0.315065336227417, "step": 131920 }, { "epoch": 0.5664030636339439, "grad_norm": 0.014091585762798786, "learning_rate": 4.354707967196433e-05, "loss": 0.13193466663360595, "step": 131930 }, { "epoch": 0.5664459957239638, "grad_norm": 0.1220465674996376, "learning_rate": 4.354276795184671e-05, "loss": 0.3290372610092163, "step": 131940 }, { "epoch": 0.5664889278139839, "grad_norm": 1.8874152898788452, "learning_rate": 4.353845623172909e-05, "loss": 0.3399007558822632, "step": 131950 }, { "epoch": 0.5665318599040039, "grad_norm": 0.05992142856121063, "learning_rate": 4.3534144511611465e-05, "loss": 0.3105152606964111, "step": 131960 }, { "epoch": 0.5665747919940238, "grad_norm": 4.826685428619385, "learning_rate": 4.352983279149384e-05, "loss": 0.41585707664489746, "step": 131970 }, { "epoch": 0.5666177240840439, "grad_norm": 0.002173103392124176, "learning_rate": 4.352552107137622e-05, "loss": 0.05335950255393982, "step": 131980 }, { "epoch": 0.5666606561740639, "grad_norm": 0.002653565490618348, "learning_rate": 4.352120935125859e-05, "loss": 0.22259302139282228, "step": 131990 }, { "epoch": 0.5667035882640838, "grad_norm": 4.037930488586426, "learning_rate": 4.351689763114097e-05, "loss": 0.23362507820129394, "step": 132000 }, { "epoch": 0.5667035882640838, "eval_loss": 0.4016095995903015, "eval_runtime": 27.123, "eval_samples_per_second": 3.687, "eval_steps_per_second": 3.687, "step": 132000 }, { "epoch": 0.5667465203541039, "grad_norm": 1.3028349876403809, "learning_rate": 4.3512585911023345e-05, "loss": 0.16070892810821533, "step": 132010 }, { "epoch": 0.5667894524441239, "grad_norm": 4.420613765716553, "learning_rate": 4.350827419090572e-05, "loss": 0.2285766839981079, "step": 132020 }, { "epoch": 0.5668323845341439, "grad_norm": 0.03554035350680351, "learning_rate": 4.35039624707881e-05, "loss": 0.19530502557754517, "step": 132030 }, { "epoch": 0.5668753166241639, "grad_norm": 5.485271453857422, "learning_rate": 4.349965075067047e-05, "loss": 0.23321409225463868, "step": 132040 }, { "epoch": 0.566918248714184, "grad_norm": 0.029682185500860214, "learning_rate": 4.349533903055285e-05, "loss": 0.30017259120941164, "step": 132050 }, { "epoch": 0.5669611808042039, "grad_norm": 0.03592360019683838, "learning_rate": 4.3491027310435225e-05, "loss": 0.1560931086540222, "step": 132060 }, { "epoch": 0.5670041128942239, "grad_norm": 1.4533315896987915, "learning_rate": 4.348671559031761e-05, "loss": 0.26650247573852537, "step": 132070 }, { "epoch": 0.567047044984244, "grad_norm": 0.13568095862865448, "learning_rate": 4.348240387019998e-05, "loss": 0.2090909719467163, "step": 132080 }, { "epoch": 0.5670899770742639, "grad_norm": 0.04799778759479523, "learning_rate": 4.347809215008236e-05, "loss": 0.3779847860336304, "step": 132090 }, { "epoch": 0.5671329091642839, "grad_norm": 0.007508368697017431, "learning_rate": 4.3473780429964735e-05, "loss": 0.2174311876296997, "step": 132100 }, { "epoch": 0.567175841254304, "grad_norm": 1.3405908346176147, "learning_rate": 4.346946870984711e-05, "loss": 0.14547221660614013, "step": 132110 }, { "epoch": 0.5672187733443239, "grad_norm": 1.839350938796997, "learning_rate": 4.346515698972948e-05, "loss": 0.2051999568939209, "step": 132120 }, { "epoch": 0.567261705434344, "grad_norm": 1.1987416744232178, "learning_rate": 4.346084526961186e-05, "loss": 0.15413864850997924, "step": 132130 }, { "epoch": 0.567304637524364, "grad_norm": 0.0008382200030609965, "learning_rate": 4.345653354949424e-05, "loss": 0.053211706876754764, "step": 132140 }, { "epoch": 0.5673475696143839, "grad_norm": 0.9950153231620789, "learning_rate": 4.3452221829376614e-05, "loss": 0.5215589523315429, "step": 132150 }, { "epoch": 0.567390501704404, "grad_norm": 1.078520655632019, "learning_rate": 4.3447910109258985e-05, "loss": 0.15002841949462892, "step": 132160 }, { "epoch": 0.567433433794424, "grad_norm": 0.6501953601837158, "learning_rate": 4.344359838914136e-05, "loss": 0.07707089185714722, "step": 132170 }, { "epoch": 0.567476365884444, "grad_norm": 3.6814463138580322, "learning_rate": 4.3439286669023747e-05, "loss": 0.3484069347381592, "step": 132180 }, { "epoch": 0.567519297974464, "grad_norm": 0.01013046782463789, "learning_rate": 4.3434974948906124e-05, "loss": 0.2226715564727783, "step": 132190 }, { "epoch": 0.567562230064484, "grad_norm": 0.07840029150247574, "learning_rate": 4.3430663228788494e-05, "loss": 0.0511742115020752, "step": 132200 }, { "epoch": 0.567605162154504, "grad_norm": 0.06084107980132103, "learning_rate": 4.342635150867087e-05, "loss": 0.18663891553878784, "step": 132210 }, { "epoch": 0.567648094244524, "grad_norm": 0.0007634757785126567, "learning_rate": 4.342203978855325e-05, "loss": 0.10051954984664917, "step": 132220 }, { "epoch": 0.567691026334544, "grad_norm": 0.05226648971438408, "learning_rate": 4.3417728068435626e-05, "loss": 0.11277986764907837, "step": 132230 }, { "epoch": 0.567733958424564, "grad_norm": 1.3088717460632324, "learning_rate": 4.3413416348318e-05, "loss": 0.23899502754211427, "step": 132240 }, { "epoch": 0.567776890514584, "grad_norm": 0.001747295493260026, "learning_rate": 4.3409104628200374e-05, "loss": 0.21408369541168212, "step": 132250 }, { "epoch": 0.5678198226046041, "grad_norm": 1.5225026607513428, "learning_rate": 4.340479290808275e-05, "loss": 0.2718491077423096, "step": 132260 }, { "epoch": 0.567862754694624, "grad_norm": 0.8086637854576111, "learning_rate": 4.340048118796513e-05, "loss": 0.16083817481994628, "step": 132270 }, { "epoch": 0.567905686784644, "grad_norm": 1.550381064414978, "learning_rate": 4.33961694678475e-05, "loss": 0.18627991676330566, "step": 132280 }, { "epoch": 0.5679486188746641, "grad_norm": 0.007228048052638769, "learning_rate": 4.3391857747729884e-05, "loss": 0.18209935426712037, "step": 132290 }, { "epoch": 0.567991550964684, "grad_norm": 0.027518831193447113, "learning_rate": 4.338754602761226e-05, "loss": 0.2660661697387695, "step": 132300 }, { "epoch": 0.568034483054704, "grad_norm": 0.001855163834989071, "learning_rate": 4.338323430749464e-05, "loss": 0.10220578908920289, "step": 132310 }, { "epoch": 0.5680774151447241, "grad_norm": 0.0008658714359626174, "learning_rate": 4.337892258737701e-05, "loss": 0.09651315808296204, "step": 132320 }, { "epoch": 0.568120347234744, "grad_norm": 0.13477729260921478, "learning_rate": 4.3374610867259386e-05, "loss": 0.23384594917297363, "step": 132330 }, { "epoch": 0.5681632793247641, "grad_norm": 0.1917153149843216, "learning_rate": 4.3370299147141764e-05, "loss": 0.09794583320617675, "step": 132340 }, { "epoch": 0.5682062114147841, "grad_norm": 0.21307621896266937, "learning_rate": 4.336598742702414e-05, "loss": 0.3008575439453125, "step": 132350 }, { "epoch": 0.5682491435048042, "grad_norm": 0.007791618350893259, "learning_rate": 4.336167570690651e-05, "loss": 0.12317459583282471, "step": 132360 }, { "epoch": 0.5682920755948241, "grad_norm": 2.7668819427490234, "learning_rate": 4.335736398678889e-05, "loss": 0.2858917236328125, "step": 132370 }, { "epoch": 0.5683350076848441, "grad_norm": 2.066178560256958, "learning_rate": 4.3353052266671266e-05, "loss": 0.24323766231536864, "step": 132380 }, { "epoch": 0.5683779397748642, "grad_norm": 0.7362213730812073, "learning_rate": 4.3348740546553644e-05, "loss": 0.15425702333450317, "step": 132390 }, { "epoch": 0.5684208718648841, "grad_norm": 0.06285788863897324, "learning_rate": 4.334442882643602e-05, "loss": 0.2881110906600952, "step": 132400 }, { "epoch": 0.5684638039549041, "grad_norm": 0.00026648957282304764, "learning_rate": 4.33401171063184e-05, "loss": 0.24016718864440917, "step": 132410 }, { "epoch": 0.5685067360449242, "grad_norm": 0.054259609431028366, "learning_rate": 4.3335805386200776e-05, "loss": 0.1691007137298584, "step": 132420 }, { "epoch": 0.5685496681349441, "grad_norm": 0.0385783426463604, "learning_rate": 4.333149366608315e-05, "loss": 0.23008294105529786, "step": 132430 }, { "epoch": 0.5685926002249642, "grad_norm": 0.7211818695068359, "learning_rate": 4.332718194596553e-05, "loss": 0.34435076713562013, "step": 132440 }, { "epoch": 0.5686355323149842, "grad_norm": 1.870476245880127, "learning_rate": 4.33228702258479e-05, "loss": 0.2791109323501587, "step": 132450 }, { "epoch": 0.5686784644050041, "grad_norm": 1.46523916721344, "learning_rate": 4.331855850573028e-05, "loss": 0.19431822299957274, "step": 132460 }, { "epoch": 0.5687213964950242, "grad_norm": 0.025228098034858704, "learning_rate": 4.3314246785612656e-05, "loss": 0.4006353378295898, "step": 132470 }, { "epoch": 0.5687643285850442, "grad_norm": 0.02704770676791668, "learning_rate": 4.330993506549503e-05, "loss": 0.09785645008087158, "step": 132480 }, { "epoch": 0.5688072606750642, "grad_norm": 2.4727747440338135, "learning_rate": 4.3305623345377404e-05, "loss": 0.3076622486114502, "step": 132490 }, { "epoch": 0.5688501927650842, "grad_norm": 0.0011699130991473794, "learning_rate": 4.330131162525978e-05, "loss": 0.2031773567199707, "step": 132500 }, { "epoch": 0.5688931248551042, "grad_norm": 0.9709953665733337, "learning_rate": 4.329699990514216e-05, "loss": 0.5550169944763184, "step": 132510 }, { "epoch": 0.5689360569451242, "grad_norm": 0.10869079828262329, "learning_rate": 4.3292688185024536e-05, "loss": 0.07343157529830932, "step": 132520 }, { "epoch": 0.5689789890351442, "grad_norm": 0.04295245185494423, "learning_rate": 4.328837646490691e-05, "loss": 0.2245166063308716, "step": 132530 }, { "epoch": 0.5690219211251643, "grad_norm": 0.005385030992329121, "learning_rate": 4.328406474478929e-05, "loss": 0.03220377266407013, "step": 132540 }, { "epoch": 0.5690648532151842, "grad_norm": 1.353747844696045, "learning_rate": 4.327975302467167e-05, "loss": 0.12378357648849488, "step": 132550 }, { "epoch": 0.5691077853052042, "grad_norm": 0.06430882960557938, "learning_rate": 4.3275441304554045e-05, "loss": 0.155087149143219, "step": 132560 }, { "epoch": 0.5691507173952243, "grad_norm": 0.0034910349640995264, "learning_rate": 4.3271129584436415e-05, "loss": 0.16095657348632814, "step": 132570 }, { "epoch": 0.5691936494852442, "grad_norm": 0.0006872376543469727, "learning_rate": 4.326681786431879e-05, "loss": 0.0919446349143982, "step": 132580 }, { "epoch": 0.5692365815752642, "grad_norm": 2.450606107711792, "learning_rate": 4.326250614420117e-05, "loss": 0.5073282241821289, "step": 132590 }, { "epoch": 0.5692795136652843, "grad_norm": 0.015987534075975418, "learning_rate": 4.325819442408355e-05, "loss": 0.11756675243377686, "step": 132600 }, { "epoch": 0.5693224457553042, "grad_norm": 2.127434015274048, "learning_rate": 4.325388270396592e-05, "loss": 0.30617260932922363, "step": 132610 }, { "epoch": 0.5693653778453243, "grad_norm": 0.5571884512901306, "learning_rate": 4.3249570983848295e-05, "loss": 0.2655909061431885, "step": 132620 }, { "epoch": 0.5694083099353443, "grad_norm": 2.569355010986328, "learning_rate": 4.324525926373067e-05, "loss": 0.17807893753051757, "step": 132630 }, { "epoch": 0.5694512420253642, "grad_norm": 0.01739351451396942, "learning_rate": 4.324094754361305e-05, "loss": 0.19666470289230348, "step": 132640 }, { "epoch": 0.5694941741153843, "grad_norm": 0.07557443529367447, "learning_rate": 4.323663582349543e-05, "loss": 0.2611125707626343, "step": 132650 }, { "epoch": 0.5695371062054043, "grad_norm": 6.050956726074219, "learning_rate": 4.3232324103377805e-05, "loss": 0.37708406448364257, "step": 132660 }, { "epoch": 0.5695800382954243, "grad_norm": 0.09482987970113754, "learning_rate": 4.322801238326018e-05, "loss": 0.17632991075515747, "step": 132670 }, { "epoch": 0.5696229703854443, "grad_norm": 2.6997487545013428, "learning_rate": 4.322370066314256e-05, "loss": 0.16271533966064453, "step": 132680 }, { "epoch": 0.5696659024754643, "grad_norm": 2.603530168533325, "learning_rate": 4.321938894302493e-05, "loss": 0.36246230602264407, "step": 132690 }, { "epoch": 0.5697088345654843, "grad_norm": 0.08923777937889099, "learning_rate": 4.321507722290731e-05, "loss": 0.21388437747955322, "step": 132700 }, { "epoch": 0.5697517666555043, "grad_norm": 0.35162797570228577, "learning_rate": 4.3210765502789685e-05, "loss": 0.3742487668991089, "step": 132710 }, { "epoch": 0.5697946987455244, "grad_norm": 2.965769052505493, "learning_rate": 4.320645378267206e-05, "loss": 0.2451101779937744, "step": 132720 }, { "epoch": 0.5698376308355443, "grad_norm": 0.01462629809975624, "learning_rate": 4.320214206255443e-05, "loss": 0.047423246502876285, "step": 132730 }, { "epoch": 0.5698805629255643, "grad_norm": 0.017881186679005623, "learning_rate": 4.319783034243681e-05, "loss": 0.20612313747406005, "step": 132740 }, { "epoch": 0.5699234950155844, "grad_norm": 0.0030642226338386536, "learning_rate": 4.319351862231919e-05, "loss": 0.05692043900489807, "step": 132750 }, { "epoch": 0.5699664271056043, "grad_norm": 0.03154606372117996, "learning_rate": 4.3189206902201565e-05, "loss": 0.13520408868789674, "step": 132760 }, { "epoch": 0.5700093591956243, "grad_norm": 0.005595480091869831, "learning_rate": 4.318489518208394e-05, "loss": 0.28533225059509276, "step": 132770 }, { "epoch": 0.5700522912856444, "grad_norm": 0.037034157663583755, "learning_rate": 4.318058346196632e-05, "loss": 0.28535511493682864, "step": 132780 }, { "epoch": 0.5700952233756644, "grad_norm": 1.1709529161453247, "learning_rate": 4.31762717418487e-05, "loss": 0.302028226852417, "step": 132790 }, { "epoch": 0.5701381554656844, "grad_norm": 1.4559121131896973, "learning_rate": 4.3171960021731074e-05, "loss": 0.3461907148361206, "step": 132800 }, { "epoch": 0.5701810875557044, "grad_norm": 0.17438967525959015, "learning_rate": 4.316764830161345e-05, "loss": 0.16365714073181153, "step": 132810 }, { "epoch": 0.5702240196457244, "grad_norm": 0.021815786138176918, "learning_rate": 4.316333658149582e-05, "loss": 0.1973589062690735, "step": 132820 }, { "epoch": 0.5702669517357444, "grad_norm": 0.1921364665031433, "learning_rate": 4.31590248613782e-05, "loss": 0.09176667928695678, "step": 132830 }, { "epoch": 0.5703098838257644, "grad_norm": 0.006648586597293615, "learning_rate": 4.315471314126058e-05, "loss": 0.2943988561630249, "step": 132840 }, { "epoch": 0.5703528159157845, "grad_norm": 0.008222861215472221, "learning_rate": 4.3150401421142954e-05, "loss": 0.22311089038848878, "step": 132850 }, { "epoch": 0.5703957480058044, "grad_norm": 1.4243155717849731, "learning_rate": 4.3146089701025325e-05, "loss": 0.3057553768157959, "step": 132860 }, { "epoch": 0.5704386800958244, "grad_norm": 10.4828462600708, "learning_rate": 4.31417779809077e-05, "loss": 0.30001082420349123, "step": 132870 }, { "epoch": 0.5704816121858445, "grad_norm": 2.4526772499084473, "learning_rate": 4.313746626079008e-05, "loss": 0.20372674465179444, "step": 132880 }, { "epoch": 0.5705245442758644, "grad_norm": 3.1110787391662598, "learning_rate": 4.313315454067246e-05, "loss": 0.21619665622711182, "step": 132890 }, { "epoch": 0.5705674763658845, "grad_norm": 0.05998954549431801, "learning_rate": 4.3128842820554834e-05, "loss": 0.16995031833648683, "step": 132900 }, { "epoch": 0.5706104084559045, "grad_norm": 4.894834995269775, "learning_rate": 4.312453110043721e-05, "loss": 0.4124492645263672, "step": 132910 }, { "epoch": 0.5706533405459244, "grad_norm": 0.019320698454976082, "learning_rate": 4.312021938031959e-05, "loss": 0.30166399478912354, "step": 132920 }, { "epoch": 0.5706962726359445, "grad_norm": 0.003763388143852353, "learning_rate": 4.3115907660201966e-05, "loss": 0.2777060031890869, "step": 132930 }, { "epoch": 0.5707392047259645, "grad_norm": 0.003979962319135666, "learning_rate": 4.3111595940084336e-05, "loss": 0.16536861658096313, "step": 132940 }, { "epoch": 0.5707821368159844, "grad_norm": 0.02409444749355316, "learning_rate": 4.3107284219966714e-05, "loss": 0.27788710594177246, "step": 132950 }, { "epoch": 0.5708250689060045, "grad_norm": 0.026255948469042778, "learning_rate": 4.310297249984909e-05, "loss": 0.2729495525360107, "step": 132960 }, { "epoch": 0.5708680009960245, "grad_norm": 2.919468879699707, "learning_rate": 4.309866077973147e-05, "loss": 0.18315572738647462, "step": 132970 }, { "epoch": 0.5709109330860445, "grad_norm": 0.021574953570961952, "learning_rate": 4.309434905961384e-05, "loss": 0.37256340980529784, "step": 132980 }, { "epoch": 0.5709538651760645, "grad_norm": 1.8033825159072876, "learning_rate": 4.3090037339496216e-05, "loss": 0.11639236211776734, "step": 132990 }, { "epoch": 0.5709967972660845, "grad_norm": 0.05781601369380951, "learning_rate": 4.30857256193786e-05, "loss": 0.004620448499917984, "step": 133000 }, { "epoch": 0.5709967972660845, "eval_loss": 0.41767171025276184, "eval_runtime": 27.1415, "eval_samples_per_second": 3.684, "eval_steps_per_second": 3.684, "step": 133000 }, { "epoch": 0.5710397293561045, "grad_norm": 0.09215544909238815, "learning_rate": 4.308141389926098e-05, "loss": 0.08097963333129883, "step": 133010 }, { "epoch": 0.5710826614461245, "grad_norm": 0.034179724752902985, "learning_rate": 4.307710217914335e-05, "loss": 0.08727078437805176, "step": 133020 }, { "epoch": 0.5711255935361446, "grad_norm": 5.184309959411621, "learning_rate": 4.3072790459025726e-05, "loss": 0.276810884475708, "step": 133030 }, { "epoch": 0.5711685256261645, "grad_norm": 1.454801082611084, "learning_rate": 4.30684787389081e-05, "loss": 0.47367095947265625, "step": 133040 }, { "epoch": 0.5712114577161845, "grad_norm": 0.03418707475066185, "learning_rate": 4.306416701879048e-05, "loss": 0.030638954043388365, "step": 133050 }, { "epoch": 0.5712543898062046, "grad_norm": 0.03641417250037193, "learning_rate": 4.305985529867285e-05, "loss": 0.14549466371536254, "step": 133060 }, { "epoch": 0.5712973218962245, "grad_norm": 2.0202322006225586, "learning_rate": 4.305554357855523e-05, "loss": 0.1501977562904358, "step": 133070 }, { "epoch": 0.5713402539862446, "grad_norm": 2.3619489669799805, "learning_rate": 4.3051231858437606e-05, "loss": 0.24671728610992433, "step": 133080 }, { "epoch": 0.5713831860762646, "grad_norm": 2.6512274742126465, "learning_rate": 4.304692013831998e-05, "loss": 0.1540529489517212, "step": 133090 }, { "epoch": 0.5714261181662845, "grad_norm": 0.024750245735049248, "learning_rate": 4.3042608418202354e-05, "loss": 0.15671908855438232, "step": 133100 }, { "epoch": 0.5714690502563046, "grad_norm": 3.0600099563598633, "learning_rate": 4.303829669808474e-05, "loss": 0.15059789419174194, "step": 133110 }, { "epoch": 0.5715119823463246, "grad_norm": 0.39473623037338257, "learning_rate": 4.3033984977967115e-05, "loss": 0.21995272636413574, "step": 133120 }, { "epoch": 0.5715549144363445, "grad_norm": 2.5885555744171143, "learning_rate": 4.302967325784949e-05, "loss": 0.2738445281982422, "step": 133130 }, { "epoch": 0.5715978465263646, "grad_norm": 0.8623188734054565, "learning_rate": 4.302536153773187e-05, "loss": 0.3122952938079834, "step": 133140 }, { "epoch": 0.5716407786163846, "grad_norm": 0.2685237526893616, "learning_rate": 4.302104981761424e-05, "loss": 0.10587440729141236, "step": 133150 }, { "epoch": 0.5716837107064046, "grad_norm": 2.3580923080444336, "learning_rate": 4.301673809749662e-05, "loss": 0.24532074928283693, "step": 133160 }, { "epoch": 0.5717266427964246, "grad_norm": 0.01954667828977108, "learning_rate": 4.3012426377378995e-05, "loss": 0.10717763900756835, "step": 133170 }, { "epoch": 0.5717695748864446, "grad_norm": 0.01179230585694313, "learning_rate": 4.300811465726137e-05, "loss": 0.2986689805984497, "step": 133180 }, { "epoch": 0.5718125069764646, "grad_norm": 32.5291862487793, "learning_rate": 4.300380293714374e-05, "loss": 0.06174714565277099, "step": 133190 }, { "epoch": 0.5718554390664846, "grad_norm": 0.019292457029223442, "learning_rate": 4.299949121702612e-05, "loss": 0.21613140106201173, "step": 133200 }, { "epoch": 0.5718983711565047, "grad_norm": 0.0038005278911441565, "learning_rate": 4.29951794969085e-05, "loss": 0.19410216808319092, "step": 133210 }, { "epoch": 0.5719413032465247, "grad_norm": 0.04521465674042702, "learning_rate": 4.2990867776790875e-05, "loss": 0.0654529094696045, "step": 133220 }, { "epoch": 0.5719842353365446, "grad_norm": 4.110771656036377, "learning_rate": 4.298655605667325e-05, "loss": 0.1877021908760071, "step": 133230 }, { "epoch": 0.5720271674265647, "grad_norm": 0.01429817546159029, "learning_rate": 4.298224433655563e-05, "loss": 0.055100107192993165, "step": 133240 }, { "epoch": 0.5720700995165847, "grad_norm": 1.2499785423278809, "learning_rate": 4.297793261643801e-05, "loss": 0.16364293098449706, "step": 133250 }, { "epoch": 0.5721130316066046, "grad_norm": 2.7436704635620117, "learning_rate": 4.2973620896320384e-05, "loss": 0.06553627848625183, "step": 133260 }, { "epoch": 0.5721559636966247, "grad_norm": 0.036512356251478195, "learning_rate": 4.2969309176202755e-05, "loss": 0.23155527114868163, "step": 133270 }, { "epoch": 0.5721988957866447, "grad_norm": 0.13235700130462646, "learning_rate": 4.296499745608513e-05, "loss": 0.22186541557312012, "step": 133280 }, { "epoch": 0.5722418278766647, "grad_norm": 1.8723384141921997, "learning_rate": 4.296068573596751e-05, "loss": 0.11128606796264648, "step": 133290 }, { "epoch": 0.5722847599666847, "grad_norm": 0.0058643571101129055, "learning_rate": 4.295637401584989e-05, "loss": 0.10969338417053223, "step": 133300 }, { "epoch": 0.5723276920567048, "grad_norm": 0.0006499195005744696, "learning_rate": 4.295206229573226e-05, "loss": 0.2320557117462158, "step": 133310 }, { "epoch": 0.5723706241467247, "grad_norm": 0.2941928803920746, "learning_rate": 4.2947750575614635e-05, "loss": 0.17140878438949586, "step": 133320 }, { "epoch": 0.5724135562367447, "grad_norm": 0.023136064410209656, "learning_rate": 4.294343885549701e-05, "loss": 0.2508916139602661, "step": 133330 }, { "epoch": 0.5724564883267648, "grad_norm": 1.5124292373657227, "learning_rate": 4.293912713537939e-05, "loss": 0.4470512390136719, "step": 133340 }, { "epoch": 0.5724994204167847, "grad_norm": 0.0816258117556572, "learning_rate": 4.293481541526177e-05, "loss": 0.10875993967056274, "step": 133350 }, { "epoch": 0.5725423525068047, "grad_norm": 12.681973457336426, "learning_rate": 4.2930503695144144e-05, "loss": 0.0813460648059845, "step": 133360 }, { "epoch": 0.5725852845968248, "grad_norm": 1.014262080192566, "learning_rate": 4.292619197502652e-05, "loss": 0.1749336004257202, "step": 133370 }, { "epoch": 0.5726282166868447, "grad_norm": 0.023005418479442596, "learning_rate": 4.29218802549089e-05, "loss": 0.14777911901474, "step": 133380 }, { "epoch": 0.5726711487768648, "grad_norm": 0.012674704194068909, "learning_rate": 4.291756853479127e-05, "loss": 0.268320631980896, "step": 133390 }, { "epoch": 0.5727140808668848, "grad_norm": 0.10391156375408173, "learning_rate": 4.291325681467365e-05, "loss": 0.07142456769943237, "step": 133400 }, { "epoch": 0.5727570129569047, "grad_norm": 15.536042213439941, "learning_rate": 4.2908945094556024e-05, "loss": 0.22836570739746093, "step": 133410 }, { "epoch": 0.5727999450469248, "grad_norm": 0.3068942129611969, "learning_rate": 4.29046333744384e-05, "loss": 0.11900877952575684, "step": 133420 }, { "epoch": 0.5728428771369448, "grad_norm": 0.0022904151119291782, "learning_rate": 4.290032165432077e-05, "loss": 0.07925449013710022, "step": 133430 }, { "epoch": 0.5728858092269647, "grad_norm": 0.07070890814065933, "learning_rate": 4.289600993420315e-05, "loss": 0.3149399995803833, "step": 133440 }, { "epoch": 0.5729287413169848, "grad_norm": 0.0037805649917572737, "learning_rate": 4.289169821408553e-05, "loss": 0.05543408393859863, "step": 133450 }, { "epoch": 0.5729716734070048, "grad_norm": 0.0010744985193014145, "learning_rate": 4.2887386493967904e-05, "loss": 0.21528022289276122, "step": 133460 }, { "epoch": 0.5730146054970248, "grad_norm": 1.0971367359161377, "learning_rate": 4.288307477385028e-05, "loss": 0.2108835458755493, "step": 133470 }, { "epoch": 0.5730575375870448, "grad_norm": 0.0034420473966747522, "learning_rate": 4.287876305373266e-05, "loss": 0.22491807937622071, "step": 133480 }, { "epoch": 0.5731004696770648, "grad_norm": 0.10176596790552139, "learning_rate": 4.2874451333615036e-05, "loss": 0.2868778705596924, "step": 133490 }, { "epoch": 0.5731434017670848, "grad_norm": 0.0029990740586072206, "learning_rate": 4.2870139613497413e-05, "loss": 0.2023683547973633, "step": 133500 }, { "epoch": 0.5731863338571048, "grad_norm": 0.7405256628990173, "learning_rate": 4.286582789337979e-05, "loss": 0.1412230134010315, "step": 133510 }, { "epoch": 0.5732292659471249, "grad_norm": 0.005497376900166273, "learning_rate": 4.286151617326216e-05, "loss": 0.31963338851928713, "step": 133520 }, { "epoch": 0.5732721980371448, "grad_norm": 0.010010753758251667, "learning_rate": 4.285720445314454e-05, "loss": 0.2021845817565918, "step": 133530 }, { "epoch": 0.5733151301271648, "grad_norm": 0.0025157523341476917, "learning_rate": 4.2852892733026916e-05, "loss": 0.10246919393539429, "step": 133540 }, { "epoch": 0.5733580622171849, "grad_norm": 2.5009193420410156, "learning_rate": 4.2848581012909293e-05, "loss": 0.147149920463562, "step": 133550 }, { "epoch": 0.5734009943072048, "grad_norm": 0.006231301464140415, "learning_rate": 4.2844269292791664e-05, "loss": 0.14908454418182374, "step": 133560 }, { "epoch": 0.5734439263972249, "grad_norm": 6.109598636627197, "learning_rate": 4.283995757267404e-05, "loss": 0.2803361415863037, "step": 133570 }, { "epoch": 0.5734868584872449, "grad_norm": 0.08631127327680588, "learning_rate": 4.283564585255642e-05, "loss": 0.2282865047454834, "step": 133580 }, { "epoch": 0.5735297905772648, "grad_norm": 1.3754993677139282, "learning_rate": 4.28313341324388e-05, "loss": 0.30085842609405516, "step": 133590 }, { "epoch": 0.5735727226672849, "grad_norm": 0.0010205082362517715, "learning_rate": 4.282702241232117e-05, "loss": 0.0039805609732866285, "step": 133600 }, { "epoch": 0.5736156547573049, "grad_norm": 1.0616086721420288, "learning_rate": 4.282271069220355e-05, "loss": 0.2139833688735962, "step": 133610 }, { "epoch": 0.5736585868473248, "grad_norm": 0.22987225651741028, "learning_rate": 4.281839897208593e-05, "loss": 0.22784335613250734, "step": 133620 }, { "epoch": 0.5737015189373449, "grad_norm": 0.009210779331624508, "learning_rate": 4.2814087251968305e-05, "loss": 0.15790432691574097, "step": 133630 }, { "epoch": 0.5737444510273649, "grad_norm": 0.0013962037628516555, "learning_rate": 4.2809775531850676e-05, "loss": 0.11812090873718262, "step": 133640 }, { "epoch": 0.573787383117385, "grad_norm": 1.9198707342147827, "learning_rate": 4.280546381173305e-05, "loss": 0.32148313522338867, "step": 133650 }, { "epoch": 0.5738303152074049, "grad_norm": 0.027793236076831818, "learning_rate": 4.280115209161543e-05, "loss": 0.07742552757263184, "step": 133660 }, { "epoch": 0.573873247297425, "grad_norm": 0.0017426724079996347, "learning_rate": 4.279684037149781e-05, "loss": 0.31075618267059324, "step": 133670 }, { "epoch": 0.573916179387445, "grad_norm": 8.916657447814941, "learning_rate": 4.279252865138018e-05, "loss": 0.3325552463531494, "step": 133680 }, { "epoch": 0.5739591114774649, "grad_norm": 0.039197370409965515, "learning_rate": 4.2788216931262556e-05, "loss": 0.146637225151062, "step": 133690 }, { "epoch": 0.574002043567485, "grad_norm": 0.03254859149456024, "learning_rate": 4.278390521114494e-05, "loss": 0.33236918449401853, "step": 133700 }, { "epoch": 0.574044975657505, "grad_norm": 0.04245612770318985, "learning_rate": 4.277959349102732e-05, "loss": 0.18411213159561157, "step": 133710 }, { "epoch": 0.5740879077475249, "grad_norm": 1.4738399982452393, "learning_rate": 4.277528177090969e-05, "loss": 0.5320202827453613, "step": 133720 }, { "epoch": 0.574130839837545, "grad_norm": 0.03626309707760811, "learning_rate": 4.2770970050792065e-05, "loss": 0.21667177677154542, "step": 133730 }, { "epoch": 0.574173771927565, "grad_norm": 1.9234563112258911, "learning_rate": 4.276665833067444e-05, "loss": 0.35777835845947265, "step": 133740 }, { "epoch": 0.574216704017585, "grad_norm": 0.9428356885910034, "learning_rate": 4.276234661055682e-05, "loss": 0.4586705207824707, "step": 133750 }, { "epoch": 0.574259636107605, "grad_norm": 2.4569039344787598, "learning_rate": 4.275803489043919e-05, "loss": 0.29904091358184814, "step": 133760 }, { "epoch": 0.574302568197625, "grad_norm": 16.694744110107422, "learning_rate": 4.275372317032157e-05, "loss": 0.19398369789123535, "step": 133770 }, { "epoch": 0.574345500287645, "grad_norm": 0.0017974856309592724, "learning_rate": 4.2749411450203945e-05, "loss": 0.009163709729909897, "step": 133780 }, { "epoch": 0.574388432377665, "grad_norm": 0.010474118404090405, "learning_rate": 4.274509973008632e-05, "loss": 0.11285858154296875, "step": 133790 }, { "epoch": 0.5744313644676851, "grad_norm": 0.040957894176244736, "learning_rate": 4.274078800996869e-05, "loss": 0.2543015480041504, "step": 133800 }, { "epoch": 0.574474296557705, "grad_norm": 1.2445433139801025, "learning_rate": 4.273647628985108e-05, "loss": 0.27809972763061525, "step": 133810 }, { "epoch": 0.574517228647725, "grad_norm": 0.0012289606966078281, "learning_rate": 4.2732164569733455e-05, "loss": 0.277581524848938, "step": 133820 }, { "epoch": 0.5745601607377451, "grad_norm": 0.0005551620270125568, "learning_rate": 4.272785284961583e-05, "loss": 0.36834819316864015, "step": 133830 }, { "epoch": 0.574603092827765, "grad_norm": 0.029749320819973946, "learning_rate": 4.27235411294982e-05, "loss": 0.0925449252128601, "step": 133840 }, { "epoch": 0.574646024917785, "grad_norm": 0.0027887921314686537, "learning_rate": 4.271922940938058e-05, "loss": 0.34550836086273196, "step": 133850 }, { "epoch": 0.5746889570078051, "grad_norm": 1.5625827312469482, "learning_rate": 4.271491768926296e-05, "loss": 0.20014092922210694, "step": 133860 }, { "epoch": 0.574731889097825, "grad_norm": 0.07165715843439102, "learning_rate": 4.2710605969145335e-05, "loss": 0.25347466468811036, "step": 133870 }, { "epoch": 0.5747748211878451, "grad_norm": 0.03872102126479149, "learning_rate": 4.270629424902771e-05, "loss": 0.04969048798084259, "step": 133880 }, { "epoch": 0.5748177532778651, "grad_norm": 1.2972571849822998, "learning_rate": 4.270198252891008e-05, "loss": 0.3419404268264771, "step": 133890 }, { "epoch": 0.574860685367885, "grad_norm": 1.4392205476760864, "learning_rate": 4.269767080879246e-05, "loss": 0.3982239246368408, "step": 133900 }, { "epoch": 0.5749036174579051, "grad_norm": 1.936293125152588, "learning_rate": 4.269335908867484e-05, "loss": 0.14439613819122316, "step": 133910 }, { "epoch": 0.5749465495479251, "grad_norm": 0.0033235508017241955, "learning_rate": 4.2689047368557214e-05, "loss": 0.25156464576721194, "step": 133920 }, { "epoch": 0.574989481637945, "grad_norm": 1.7011040449142456, "learning_rate": 4.268473564843959e-05, "loss": 0.4407416820526123, "step": 133930 }, { "epoch": 0.5750324137279651, "grad_norm": 0.1319187879562378, "learning_rate": 4.268042392832197e-05, "loss": 0.20434746742248536, "step": 133940 }, { "epoch": 0.5750753458179851, "grad_norm": 1.9218518733978271, "learning_rate": 4.2676112208204346e-05, "loss": 0.14832651615142822, "step": 133950 }, { "epoch": 0.5751182779080051, "grad_norm": 0.0007688776240684092, "learning_rate": 4.2671800488086724e-05, "loss": 0.1170761227607727, "step": 133960 }, { "epoch": 0.5751612099980251, "grad_norm": 2.263821840286255, "learning_rate": 4.2667488767969094e-05, "loss": 0.22509450912475587, "step": 133970 }, { "epoch": 0.5752041420880452, "grad_norm": 0.000915932294446975, "learning_rate": 4.266317704785147e-05, "loss": 0.007984549552202225, "step": 133980 }, { "epoch": 0.5752470741780651, "grad_norm": 0.1721668690443039, "learning_rate": 4.265886532773385e-05, "loss": 0.3790409088134766, "step": 133990 }, { "epoch": 0.5752900062680851, "grad_norm": 0.15518277883529663, "learning_rate": 4.2654553607616226e-05, "loss": 0.1313472032546997, "step": 134000 }, { "epoch": 0.5752900062680851, "eval_loss": 0.4008947014808655, "eval_runtime": 27.183, "eval_samples_per_second": 3.679, "eval_steps_per_second": 3.679, "step": 134000 }, { "epoch": 0.5753329383581052, "grad_norm": 0.3634943962097168, "learning_rate": 4.26502418874986e-05, "loss": 0.07963572144508362, "step": 134010 }, { "epoch": 0.5753758704481251, "grad_norm": 0.2079387754201889, "learning_rate": 4.2645930167380974e-05, "loss": 0.11615699529647827, "step": 134020 }, { "epoch": 0.5754188025381451, "grad_norm": 8.19499397277832, "learning_rate": 4.264161844726335e-05, "loss": 0.15074779987335205, "step": 134030 }, { "epoch": 0.5754617346281652, "grad_norm": 0.008482100442051888, "learning_rate": 4.263730672714573e-05, "loss": 0.06598466634750366, "step": 134040 }, { "epoch": 0.5755046667181851, "grad_norm": 9.044384956359863, "learning_rate": 4.2632995007028106e-05, "loss": 0.3111989974975586, "step": 134050 }, { "epoch": 0.5755475988082052, "grad_norm": 0.14206865429878235, "learning_rate": 4.2628683286910484e-05, "loss": 0.19637430906295777, "step": 134060 }, { "epoch": 0.5755905308982252, "grad_norm": 0.06369198113679886, "learning_rate": 4.262437156679286e-05, "loss": 0.18411502838134766, "step": 134070 }, { "epoch": 0.5756334629882452, "grad_norm": 1.4943736791610718, "learning_rate": 4.262005984667524e-05, "loss": 0.23669490814208985, "step": 134080 }, { "epoch": 0.5756763950782652, "grad_norm": 0.059160616248846054, "learning_rate": 4.261574812655761e-05, "loss": 0.2464158058166504, "step": 134090 }, { "epoch": 0.5757193271682852, "grad_norm": 1.0862027406692505, "learning_rate": 4.2611436406439986e-05, "loss": 0.11739879846572876, "step": 134100 }, { "epoch": 0.5757622592583053, "grad_norm": 30.739408493041992, "learning_rate": 4.2607124686322364e-05, "loss": 0.21538019180297852, "step": 134110 }, { "epoch": 0.5758051913483252, "grad_norm": 0.002959183417260647, "learning_rate": 4.260281296620474e-05, "loss": 0.17276872396469117, "step": 134120 }, { "epoch": 0.5758481234383452, "grad_norm": 1.208294153213501, "learning_rate": 4.259850124608711e-05, "loss": 0.2686814785003662, "step": 134130 }, { "epoch": 0.5758910555283653, "grad_norm": 0.003919269423931837, "learning_rate": 4.259418952596949e-05, "loss": 0.10101698637008667, "step": 134140 }, { "epoch": 0.5759339876183852, "grad_norm": 0.005994774866849184, "learning_rate": 4.2589877805851866e-05, "loss": 0.18420990705490112, "step": 134150 }, { "epoch": 0.5759769197084053, "grad_norm": 1.3661763668060303, "learning_rate": 4.2585566085734244e-05, "loss": 0.36013386249542234, "step": 134160 }, { "epoch": 0.5760198517984253, "grad_norm": 0.03579791262745857, "learning_rate": 4.258125436561662e-05, "loss": 0.22522573471069335, "step": 134170 }, { "epoch": 0.5760627838884452, "grad_norm": 4.322238445281982, "learning_rate": 4.2576942645499e-05, "loss": 0.2739940404891968, "step": 134180 }, { "epoch": 0.5761057159784653, "grad_norm": 1.1436915397644043, "learning_rate": 4.2572630925381376e-05, "loss": 0.1775204062461853, "step": 134190 }, { "epoch": 0.5761486480684853, "grad_norm": 0.012154379859566689, "learning_rate": 4.256831920526375e-05, "loss": 0.1556593060493469, "step": 134200 }, { "epoch": 0.5761915801585052, "grad_norm": 4.292967319488525, "learning_rate": 4.2564007485146124e-05, "loss": 0.23525865077972413, "step": 134210 }, { "epoch": 0.5762345122485253, "grad_norm": 1.399591326713562, "learning_rate": 4.25596957650285e-05, "loss": 0.19609031677246094, "step": 134220 }, { "epoch": 0.5762774443385453, "grad_norm": 1.8917382955551147, "learning_rate": 4.255538404491088e-05, "loss": 0.12764239311218262, "step": 134230 }, { "epoch": 0.5763203764285653, "grad_norm": 0.0013518787454813719, "learning_rate": 4.2551072324793256e-05, "loss": 0.25250537395477296, "step": 134240 }, { "epoch": 0.5763633085185853, "grad_norm": 1.5671014785766602, "learning_rate": 4.254676060467563e-05, "loss": 0.15098211765289307, "step": 134250 }, { "epoch": 0.5764062406086053, "grad_norm": 0.003938416950404644, "learning_rate": 4.2542448884558003e-05, "loss": 0.3070345640182495, "step": 134260 }, { "epoch": 0.5764491726986253, "grad_norm": 2.018571376800537, "learning_rate": 4.253813716444038e-05, "loss": 0.10029573440551758, "step": 134270 }, { "epoch": 0.5764921047886453, "grad_norm": 0.004540940281003714, "learning_rate": 4.253382544432276e-05, "loss": 0.13509360551834107, "step": 134280 }, { "epoch": 0.5765350368786654, "grad_norm": 0.014921938069164753, "learning_rate": 4.252951372420514e-05, "loss": 0.20777909755706786, "step": 134290 }, { "epoch": 0.5765779689686853, "grad_norm": 0.012604661285877228, "learning_rate": 4.252520200408751e-05, "loss": 0.45519323348999025, "step": 134300 }, { "epoch": 0.5766209010587053, "grad_norm": 0.2351997345685959, "learning_rate": 4.252089028396989e-05, "loss": 0.09932146072387696, "step": 134310 }, { "epoch": 0.5766638331487254, "grad_norm": 0.006004502065479755, "learning_rate": 4.251657856385227e-05, "loss": 0.17851873636245727, "step": 134320 }, { "epoch": 0.5767067652387453, "grad_norm": 0.17117968201637268, "learning_rate": 4.2512266843734645e-05, "loss": 0.17499239444732667, "step": 134330 }, { "epoch": 0.5767496973287654, "grad_norm": 0.08349964022636414, "learning_rate": 4.2507955123617015e-05, "loss": 0.25635461807250975, "step": 134340 }, { "epoch": 0.5767926294187854, "grad_norm": 0.07518622279167175, "learning_rate": 4.250364340349939e-05, "loss": 0.1901835560798645, "step": 134350 }, { "epoch": 0.5768355615088053, "grad_norm": 0.03478851169347763, "learning_rate": 4.249933168338177e-05, "loss": 0.06834203600883484, "step": 134360 }, { "epoch": 0.5768784935988254, "grad_norm": 1.326767086982727, "learning_rate": 4.249501996326415e-05, "loss": 0.18276243209838866, "step": 134370 }, { "epoch": 0.5769214256888454, "grad_norm": 1.6591578722000122, "learning_rate": 4.249070824314652e-05, "loss": 0.1553168773651123, "step": 134380 }, { "epoch": 0.5769643577788653, "grad_norm": 0.05299576371908188, "learning_rate": 4.2486396523028895e-05, "loss": 0.3026347875595093, "step": 134390 }, { "epoch": 0.5770072898688854, "grad_norm": 0.5824448466300964, "learning_rate": 4.248208480291128e-05, "loss": 0.2127014398574829, "step": 134400 }, { "epoch": 0.5770502219589054, "grad_norm": 0.16627003252506256, "learning_rate": 4.247777308279366e-05, "loss": 0.19256727695465087, "step": 134410 }, { "epoch": 0.5770931540489254, "grad_norm": 0.4351569414138794, "learning_rate": 4.247346136267603e-05, "loss": 0.008743252605199814, "step": 134420 }, { "epoch": 0.5771360861389454, "grad_norm": 3.0534777641296387, "learning_rate": 4.2469149642558405e-05, "loss": 0.21316053867340087, "step": 134430 }, { "epoch": 0.5771790182289654, "grad_norm": 1.68711256980896, "learning_rate": 4.246483792244078e-05, "loss": 0.2891331434249878, "step": 134440 }, { "epoch": 0.5772219503189854, "grad_norm": 0.023587733507156372, "learning_rate": 4.246052620232316e-05, "loss": 0.22894837856292724, "step": 134450 }, { "epoch": 0.5772648824090054, "grad_norm": 0.07950850576162338, "learning_rate": 4.245621448220553e-05, "loss": 0.05865427851676941, "step": 134460 }, { "epoch": 0.5773078144990255, "grad_norm": 0.0960141196846962, "learning_rate": 4.245190276208791e-05, "loss": 0.22196877002716064, "step": 134470 }, { "epoch": 0.5773507465890454, "grad_norm": 0.01585448905825615, "learning_rate": 4.2447591041970285e-05, "loss": 0.29195680618286135, "step": 134480 }, { "epoch": 0.5773936786790654, "grad_norm": 0.05223434045910835, "learning_rate": 4.244327932185266e-05, "loss": 0.07586430311203003, "step": 134490 }, { "epoch": 0.5774366107690855, "grad_norm": 10.157803535461426, "learning_rate": 4.243896760173503e-05, "loss": 0.40190610885620115, "step": 134500 }, { "epoch": 0.5774795428591055, "grad_norm": 2.9556596279144287, "learning_rate": 4.243465588161742e-05, "loss": 0.3310459852218628, "step": 134510 }, { "epoch": 0.5775224749491255, "grad_norm": 1.8336342573165894, "learning_rate": 4.2430344161499794e-05, "loss": 0.2758753299713135, "step": 134520 }, { "epoch": 0.5775654070391455, "grad_norm": 0.02351958490908146, "learning_rate": 4.242603244138217e-05, "loss": 0.3395817995071411, "step": 134530 }, { "epoch": 0.5776083391291655, "grad_norm": 1.333340048789978, "learning_rate": 4.242172072126454e-05, "loss": 0.19011322259902955, "step": 134540 }, { "epoch": 0.5776512712191855, "grad_norm": 0.27546411752700806, "learning_rate": 4.241740900114692e-05, "loss": 0.2953900575637817, "step": 134550 }, { "epoch": 0.5776942033092055, "grad_norm": 3.004845142364502, "learning_rate": 4.24130972810293e-05, "loss": 0.23668646812438965, "step": 134560 }, { "epoch": 0.5777371353992256, "grad_norm": 2.5741522312164307, "learning_rate": 4.2408785560911674e-05, "loss": 0.1605753183364868, "step": 134570 }, { "epoch": 0.5777800674892455, "grad_norm": 0.09854859858751297, "learning_rate": 4.240447384079405e-05, "loss": 0.3948371171951294, "step": 134580 }, { "epoch": 0.5778229995792655, "grad_norm": 0.07081442326307297, "learning_rate": 4.240016212067642e-05, "loss": 0.26161210536956786, "step": 134590 }, { "epoch": 0.5778659316692856, "grad_norm": 0.08974912762641907, "learning_rate": 4.23958504005588e-05, "loss": 0.27458915710449217, "step": 134600 }, { "epoch": 0.5779088637593055, "grad_norm": 0.7241619229316711, "learning_rate": 4.2391538680441177e-05, "loss": 0.23674228191375732, "step": 134610 }, { "epoch": 0.5779517958493255, "grad_norm": 0.09924507886171341, "learning_rate": 4.2387226960323554e-05, "loss": 0.35862672328948975, "step": 134620 }, { "epoch": 0.5779947279393456, "grad_norm": 0.3530639708042145, "learning_rate": 4.238291524020593e-05, "loss": 0.24312853813171387, "step": 134630 }, { "epoch": 0.5780376600293655, "grad_norm": 0.06290092319250107, "learning_rate": 4.237860352008831e-05, "loss": 0.24631965160369873, "step": 134640 }, { "epoch": 0.5780805921193856, "grad_norm": 0.24874712526798248, "learning_rate": 4.2374291799970686e-05, "loss": 0.12623435258865356, "step": 134650 }, { "epoch": 0.5781235242094056, "grad_norm": 1.555324912071228, "learning_rate": 4.236998007985306e-05, "loss": 0.4415943145751953, "step": 134660 }, { "epoch": 0.5781664562994255, "grad_norm": 0.0042195431888103485, "learning_rate": 4.2365668359735434e-05, "loss": 0.3294379711151123, "step": 134670 }, { "epoch": 0.5782093883894456, "grad_norm": 2.310861587524414, "learning_rate": 4.236135663961781e-05, "loss": 0.4276157855987549, "step": 134680 }, { "epoch": 0.5782523204794656, "grad_norm": 8.112129211425781, "learning_rate": 4.235704491950019e-05, "loss": 0.14049152135849, "step": 134690 }, { "epoch": 0.5782952525694856, "grad_norm": 4.619983196258545, "learning_rate": 4.2352733199382566e-05, "loss": 0.0853570580482483, "step": 134700 }, { "epoch": 0.5783381846595056, "grad_norm": 0.12085939943790436, "learning_rate": 4.2348421479264936e-05, "loss": 0.225472354888916, "step": 134710 }, { "epoch": 0.5783811167495256, "grad_norm": 8.128291130065918, "learning_rate": 4.2344109759147314e-05, "loss": 0.24805455207824706, "step": 134720 }, { "epoch": 0.5784240488395456, "grad_norm": 0.3583569824695587, "learning_rate": 4.233979803902969e-05, "loss": 0.2094562292098999, "step": 134730 }, { "epoch": 0.5784669809295656, "grad_norm": 0.04759635776281357, "learning_rate": 4.233548631891207e-05, "loss": 0.24561095237731934, "step": 134740 }, { "epoch": 0.5785099130195857, "grad_norm": 2.9910125732421875, "learning_rate": 4.2331174598794446e-05, "loss": 0.31358211040496825, "step": 134750 }, { "epoch": 0.5785528451096056, "grad_norm": 6.14967155456543, "learning_rate": 4.232686287867682e-05, "loss": 0.3080138206481934, "step": 134760 }, { "epoch": 0.5785957771996256, "grad_norm": 0.6008140444755554, "learning_rate": 4.23225511585592e-05, "loss": 0.242822003364563, "step": 134770 }, { "epoch": 0.5786387092896457, "grad_norm": 0.17511683702468872, "learning_rate": 4.231823943844158e-05, "loss": 0.13240162134170533, "step": 134780 }, { "epoch": 0.5786816413796656, "grad_norm": 0.07600205391645432, "learning_rate": 4.231392771832395e-05, "loss": 0.2748347282409668, "step": 134790 }, { "epoch": 0.5787245734696856, "grad_norm": 0.017275772988796234, "learning_rate": 4.2309615998206326e-05, "loss": 0.16032135486602783, "step": 134800 }, { "epoch": 0.5787675055597057, "grad_norm": 0.017104055732488632, "learning_rate": 4.23053042780887e-05, "loss": 0.10543267726898194, "step": 134810 }, { "epoch": 0.5788104376497256, "grad_norm": 5.630326747894287, "learning_rate": 4.230099255797108e-05, "loss": 0.23811829090118408, "step": 134820 }, { "epoch": 0.5788533697397457, "grad_norm": 0.06849951297044754, "learning_rate": 4.229668083785345e-05, "loss": 0.28866872787475584, "step": 134830 }, { "epoch": 0.5788963018297657, "grad_norm": 0.1363976001739502, "learning_rate": 4.229236911773583e-05, "loss": 0.21454071998596191, "step": 134840 }, { "epoch": 0.5789392339197856, "grad_norm": 0.07092367857694626, "learning_rate": 4.2288057397618206e-05, "loss": 0.2816061019897461, "step": 134850 }, { "epoch": 0.5789821660098057, "grad_norm": 0.03237995132803917, "learning_rate": 4.228374567750058e-05, "loss": 0.1369353413581848, "step": 134860 }, { "epoch": 0.5790250980998257, "grad_norm": 0.14592450857162476, "learning_rate": 4.227943395738296e-05, "loss": 0.16824755668640137, "step": 134870 }, { "epoch": 0.5790680301898457, "grad_norm": 0.13835661113262177, "learning_rate": 4.227512223726534e-05, "loss": 0.1760498046875, "step": 134880 }, { "epoch": 0.5791109622798657, "grad_norm": 0.913736879825592, "learning_rate": 4.2270810517147715e-05, "loss": 0.17487471103668212, "step": 134890 }, { "epoch": 0.5791538943698857, "grad_norm": 0.0023353216238319874, "learning_rate": 4.226649879703009e-05, "loss": 0.27238619327545166, "step": 134900 }, { "epoch": 0.5791968264599057, "grad_norm": 0.0350453183054924, "learning_rate": 4.226218707691246e-05, "loss": 0.07492238283157349, "step": 134910 }, { "epoch": 0.5792397585499257, "grad_norm": 0.006240634713321924, "learning_rate": 4.225787535679484e-05, "loss": 0.30922422409057615, "step": 134920 }, { "epoch": 0.5792826906399458, "grad_norm": 2.5151445865631104, "learning_rate": 4.225356363667722e-05, "loss": 0.11893032789230347, "step": 134930 }, { "epoch": 0.5793256227299658, "grad_norm": 0.21421582996845245, "learning_rate": 4.2249251916559595e-05, "loss": 0.37455198764801023, "step": 134940 }, { "epoch": 0.5793685548199857, "grad_norm": 3.1594583988189697, "learning_rate": 4.224494019644197e-05, "loss": 0.27971127033233645, "step": 134950 }, { "epoch": 0.5794114869100058, "grad_norm": 0.001456442754715681, "learning_rate": 4.224062847632434e-05, "loss": 0.3134411096572876, "step": 134960 }, { "epoch": 0.5794544190000258, "grad_norm": 0.03771829977631569, "learning_rate": 4.223631675620672e-05, "loss": 0.13740975856781007, "step": 134970 }, { "epoch": 0.5794973510900457, "grad_norm": 0.6887321472167969, "learning_rate": 4.22320050360891e-05, "loss": 0.27420816421508787, "step": 134980 }, { "epoch": 0.5795402831800658, "grad_norm": 0.015955857932567596, "learning_rate": 4.2227693315971475e-05, "loss": 0.13951371908187865, "step": 134990 }, { "epoch": 0.5795832152700858, "grad_norm": 3.5314512252807617, "learning_rate": 4.222338159585385e-05, "loss": 0.20095674991607665, "step": 135000 }, { "epoch": 0.5795832152700858, "eval_loss": 0.39967551827430725, "eval_runtime": 27.1223, "eval_samples_per_second": 3.687, "eval_steps_per_second": 3.687, "step": 135000 }, { "epoch": 0.5796261473601058, "grad_norm": 0.18443207442760468, "learning_rate": 4.221906987573623e-05, "loss": 0.16879316568374633, "step": 135010 }, { "epoch": 0.5796690794501258, "grad_norm": 0.12480297684669495, "learning_rate": 4.221475815561861e-05, "loss": 0.03386820554733276, "step": 135020 }, { "epoch": 0.5797120115401458, "grad_norm": 0.09635353833436966, "learning_rate": 4.2210446435500984e-05, "loss": 0.2205876588821411, "step": 135030 }, { "epoch": 0.5797549436301658, "grad_norm": 0.8061900734901428, "learning_rate": 4.2206134715383355e-05, "loss": 0.15954869985580444, "step": 135040 }, { "epoch": 0.5797978757201858, "grad_norm": 0.03748125210404396, "learning_rate": 4.220182299526573e-05, "loss": 0.06913055181503296, "step": 135050 }, { "epoch": 0.5798408078102059, "grad_norm": 2.611194610595703, "learning_rate": 4.219751127514811e-05, "loss": 0.06778600215911865, "step": 135060 }, { "epoch": 0.5798837399002258, "grad_norm": 0.07131896167993546, "learning_rate": 4.219319955503049e-05, "loss": 0.18905138969421387, "step": 135070 }, { "epoch": 0.5799266719902458, "grad_norm": 1.3885791301727295, "learning_rate": 4.218888783491286e-05, "loss": 0.2527278423309326, "step": 135080 }, { "epoch": 0.5799696040802659, "grad_norm": 0.007560020312666893, "learning_rate": 4.2184576114795235e-05, "loss": 0.0921245813369751, "step": 135090 }, { "epoch": 0.5800125361702858, "grad_norm": 0.008850879967212677, "learning_rate": 4.218026439467761e-05, "loss": 0.3411016702651978, "step": 135100 }, { "epoch": 0.5800554682603059, "grad_norm": 0.023805882781744003, "learning_rate": 4.2175952674559996e-05, "loss": 0.17042380571365356, "step": 135110 }, { "epoch": 0.5800984003503259, "grad_norm": 0.17893634736537933, "learning_rate": 4.217164095444237e-05, "loss": 0.3274564266204834, "step": 135120 }, { "epoch": 0.5801413324403458, "grad_norm": 0.0015078054275363684, "learning_rate": 4.2167329234324744e-05, "loss": 0.4564652442932129, "step": 135130 }, { "epoch": 0.5801842645303659, "grad_norm": 0.9611347317695618, "learning_rate": 4.216301751420712e-05, "loss": 0.183595871925354, "step": 135140 }, { "epoch": 0.5802271966203859, "grad_norm": 0.0027880992274731398, "learning_rate": 4.21587057940895e-05, "loss": 0.2240854024887085, "step": 135150 }, { "epoch": 0.5802701287104058, "grad_norm": 9.634644508361816, "learning_rate": 4.215439407397187e-05, "loss": 0.24422330856323243, "step": 135160 }, { "epoch": 0.5803130608004259, "grad_norm": 0.005043448880314827, "learning_rate": 4.215008235385425e-05, "loss": 0.3264390230178833, "step": 135170 }, { "epoch": 0.5803559928904459, "grad_norm": 0.02733713388442993, "learning_rate": 4.2145770633736624e-05, "loss": 0.3870868682861328, "step": 135180 }, { "epoch": 0.5803989249804659, "grad_norm": 2.4984142780303955, "learning_rate": 4.2141458913619e-05, "loss": 0.2757649183273315, "step": 135190 }, { "epoch": 0.5804418570704859, "grad_norm": 0.08463925123214722, "learning_rate": 4.213714719350137e-05, "loss": 0.07629125714302062, "step": 135200 }, { "epoch": 0.5804847891605059, "grad_norm": 0.5990811586380005, "learning_rate": 4.213283547338375e-05, "loss": 0.12150636911392212, "step": 135210 }, { "epoch": 0.5805277212505259, "grad_norm": 0.08332854509353638, "learning_rate": 4.2128523753266134e-05, "loss": 0.14046154022216797, "step": 135220 }, { "epoch": 0.5805706533405459, "grad_norm": 0.011388557031750679, "learning_rate": 4.212421203314851e-05, "loss": 0.005377927795052528, "step": 135230 }, { "epoch": 0.580613585430566, "grad_norm": 0.5718120336532593, "learning_rate": 4.211990031303088e-05, "loss": 0.1931094765663147, "step": 135240 }, { "epoch": 0.5806565175205859, "grad_norm": 0.014416528865695, "learning_rate": 4.211558859291326e-05, "loss": 0.2976668357849121, "step": 135250 }, { "epoch": 0.5806994496106059, "grad_norm": 0.021259639412164688, "learning_rate": 4.2111276872795636e-05, "loss": 0.11948213577270508, "step": 135260 }, { "epoch": 0.580742381700626, "grad_norm": 2.3595197200775146, "learning_rate": 4.2106965152678013e-05, "loss": 0.2929967164993286, "step": 135270 }, { "epoch": 0.5807853137906459, "grad_norm": 2.1110732555389404, "learning_rate": 4.2102653432560384e-05, "loss": 0.21944453716278076, "step": 135280 }, { "epoch": 0.580828245880666, "grad_norm": 0.5368881225585938, "learning_rate": 4.209834171244276e-05, "loss": 0.13306208848953247, "step": 135290 }, { "epoch": 0.580871177970686, "grad_norm": 0.3135150074958801, "learning_rate": 4.209402999232514e-05, "loss": 0.16415667533874512, "step": 135300 }, { "epoch": 0.5809141100607059, "grad_norm": 1.6263655424118042, "learning_rate": 4.2089718272207516e-05, "loss": 0.3677335262298584, "step": 135310 }, { "epoch": 0.580957042150726, "grad_norm": 1.274057149887085, "learning_rate": 4.2085406552089893e-05, "loss": 0.1566999673843384, "step": 135320 }, { "epoch": 0.580999974240746, "grad_norm": 0.06209641322493553, "learning_rate": 4.208109483197227e-05, "loss": 0.22127158641815187, "step": 135330 }, { "epoch": 0.5810429063307659, "grad_norm": 0.05129466578364372, "learning_rate": 4.207678311185465e-05, "loss": 0.2501818180084229, "step": 135340 }, { "epoch": 0.581085838420786, "grad_norm": 0.02592042274773121, "learning_rate": 4.2072471391737025e-05, "loss": 0.3072038650512695, "step": 135350 }, { "epoch": 0.581128770510806, "grad_norm": 0.02230093814432621, "learning_rate": 4.20681596716194e-05, "loss": 0.32774603366851807, "step": 135360 }, { "epoch": 0.5811717026008261, "grad_norm": 0.9433190822601318, "learning_rate": 4.206384795150177e-05, "loss": 0.3484883546829224, "step": 135370 }, { "epoch": 0.581214634690846, "grad_norm": 0.004100241232663393, "learning_rate": 4.205953623138415e-05, "loss": 0.25763185024261476, "step": 135380 }, { "epoch": 0.581257566780866, "grad_norm": 0.16599683463573456, "learning_rate": 4.205522451126653e-05, "loss": 0.2505820274353027, "step": 135390 }, { "epoch": 0.5813004988708861, "grad_norm": 0.03479776531457901, "learning_rate": 4.2050912791148905e-05, "loss": 0.16014200448989868, "step": 135400 }, { "epoch": 0.581343430960906, "grad_norm": 2.5248851776123047, "learning_rate": 4.2046601071031276e-05, "loss": 0.3151650667190552, "step": 135410 }, { "epoch": 0.5813863630509261, "grad_norm": 0.9223288893699646, "learning_rate": 4.204228935091365e-05, "loss": 0.2693711996078491, "step": 135420 }, { "epoch": 0.5814292951409461, "grad_norm": 0.21809233725070953, "learning_rate": 4.203797763079603e-05, "loss": 0.0456573337316513, "step": 135430 }, { "epoch": 0.581472227230966, "grad_norm": 1.4331225156784058, "learning_rate": 4.203366591067841e-05, "loss": 0.18495348691940308, "step": 135440 }, { "epoch": 0.5815151593209861, "grad_norm": 5.573430061340332, "learning_rate": 4.2029354190560785e-05, "loss": 0.3089458465576172, "step": 135450 }, { "epoch": 0.5815580914110061, "grad_norm": 0.005199179518967867, "learning_rate": 4.202504247044316e-05, "loss": 0.1684165120124817, "step": 135460 }, { "epoch": 0.581601023501026, "grad_norm": 1.1721888780593872, "learning_rate": 4.202073075032554e-05, "loss": 0.30724520683288575, "step": 135470 }, { "epoch": 0.5816439555910461, "grad_norm": 0.004575973842293024, "learning_rate": 4.201641903020792e-05, "loss": 0.15588613748550414, "step": 135480 }, { "epoch": 0.5816868876810661, "grad_norm": 0.021832408383488655, "learning_rate": 4.201210731009029e-05, "loss": 0.04639666378498077, "step": 135490 }, { "epoch": 0.5817298197710861, "grad_norm": 2.099874973297119, "learning_rate": 4.2007795589972665e-05, "loss": 0.23207998275756836, "step": 135500 }, { "epoch": 0.5817727518611061, "grad_norm": 0.16797228157520294, "learning_rate": 4.200348386985504e-05, "loss": 0.18904383182525636, "step": 135510 }, { "epoch": 0.5818156839511262, "grad_norm": 0.009826351888477802, "learning_rate": 4.199917214973742e-05, "loss": 0.13186975717544555, "step": 135520 }, { "epoch": 0.5818586160411461, "grad_norm": 1.4313544034957886, "learning_rate": 4.199486042961979e-05, "loss": 0.191562283039093, "step": 135530 }, { "epoch": 0.5819015481311661, "grad_norm": 0.16057445108890533, "learning_rate": 4.199054870950217e-05, "loss": 0.07290871739387512, "step": 135540 }, { "epoch": 0.5819444802211862, "grad_norm": 1.3174405097961426, "learning_rate": 4.1986236989384545e-05, "loss": 0.20775175094604492, "step": 135550 }, { "epoch": 0.5819874123112061, "grad_norm": 1.7092238664627075, "learning_rate": 4.198192526926692e-05, "loss": 0.22169027328491211, "step": 135560 }, { "epoch": 0.5820303444012261, "grad_norm": 0.14638565480709076, "learning_rate": 4.19776135491493e-05, "loss": 0.11512167453765869, "step": 135570 }, { "epoch": 0.5820732764912462, "grad_norm": 0.22287026047706604, "learning_rate": 4.197330182903168e-05, "loss": 0.31687591075897215, "step": 135580 }, { "epoch": 0.5821162085812661, "grad_norm": 1.248632788658142, "learning_rate": 4.1968990108914055e-05, "loss": 0.16288487911224364, "step": 135590 }, { "epoch": 0.5821591406712862, "grad_norm": 1.9880515336990356, "learning_rate": 4.196467838879643e-05, "loss": 0.07543573975563049, "step": 135600 }, { "epoch": 0.5822020727613062, "grad_norm": 0.03087700717151165, "learning_rate": 4.19603666686788e-05, "loss": 0.23086144924163818, "step": 135610 }, { "epoch": 0.5822450048513261, "grad_norm": 0.011071198619902134, "learning_rate": 4.195605494856118e-05, "loss": 0.3214784383773804, "step": 135620 }, { "epoch": 0.5822879369413462, "grad_norm": 1.2527273893356323, "learning_rate": 4.195174322844356e-05, "loss": 0.5449256896972656, "step": 135630 }, { "epoch": 0.5823308690313662, "grad_norm": 8.132579803466797, "learning_rate": 4.1947431508325934e-05, "loss": 0.29090328216552735, "step": 135640 }, { "epoch": 0.5823738011213861, "grad_norm": 6.972388744354248, "learning_rate": 4.1943119788208305e-05, "loss": 0.3711055278778076, "step": 135650 }, { "epoch": 0.5824167332114062, "grad_norm": 0.29669734835624695, "learning_rate": 4.193880806809068e-05, "loss": 0.27991471290588377, "step": 135660 }, { "epoch": 0.5824596653014262, "grad_norm": 0.022353434935212135, "learning_rate": 4.193449634797306e-05, "loss": 0.07086479663848877, "step": 135670 }, { "epoch": 0.5825025973914462, "grad_norm": 0.013063160702586174, "learning_rate": 4.193018462785544e-05, "loss": 0.2063464879989624, "step": 135680 }, { "epoch": 0.5825455294814662, "grad_norm": 1.8125168085098267, "learning_rate": 4.1925872907737814e-05, "loss": 0.2858951330184937, "step": 135690 }, { "epoch": 0.5825884615714862, "grad_norm": 0.05596992373466492, "learning_rate": 4.192156118762019e-05, "loss": 0.08188768625259399, "step": 135700 }, { "epoch": 0.5826313936615062, "grad_norm": 0.027829289436340332, "learning_rate": 4.191724946750257e-05, "loss": 0.3078721761703491, "step": 135710 }, { "epoch": 0.5826743257515262, "grad_norm": 0.04408969357609749, "learning_rate": 4.1912937747384946e-05, "loss": 0.26244316101074217, "step": 135720 }, { "epoch": 0.5827172578415463, "grad_norm": 0.01925904117524624, "learning_rate": 4.1908626027267324e-05, "loss": 0.10048316717147827, "step": 135730 }, { "epoch": 0.5827601899315662, "grad_norm": 0.034861356019973755, "learning_rate": 4.1904314307149694e-05, "loss": 0.266707706451416, "step": 135740 }, { "epoch": 0.5828031220215862, "grad_norm": 0.011179475113749504, "learning_rate": 4.190000258703207e-05, "loss": 0.118767249584198, "step": 135750 }, { "epoch": 0.5828460541116063, "grad_norm": 0.34233781695365906, "learning_rate": 4.189569086691445e-05, "loss": 0.21334223747253417, "step": 135760 }, { "epoch": 0.5828889862016262, "grad_norm": 1.855711817741394, "learning_rate": 4.1891379146796826e-05, "loss": 0.2099367380142212, "step": 135770 }, { "epoch": 0.5829319182916463, "grad_norm": 0.0024516810663044453, "learning_rate": 4.18870674266792e-05, "loss": 0.1486857533454895, "step": 135780 }, { "epoch": 0.5829748503816663, "grad_norm": 0.004157458897680044, "learning_rate": 4.1882755706561574e-05, "loss": 0.2245945692062378, "step": 135790 }, { "epoch": 0.5830177824716863, "grad_norm": 0.8604725003242493, "learning_rate": 4.187844398644395e-05, "loss": 0.21948962211608886, "step": 135800 }, { "epoch": 0.5830607145617063, "grad_norm": 0.6217984557151794, "learning_rate": 4.1874132266326336e-05, "loss": 0.2490144968032837, "step": 135810 }, { "epoch": 0.5831036466517263, "grad_norm": 0.030795378610491753, "learning_rate": 4.1869820546208706e-05, "loss": 0.3707392930984497, "step": 135820 }, { "epoch": 0.5831465787417464, "grad_norm": 0.021570665761828423, "learning_rate": 4.1865508826091084e-05, "loss": 0.4674999237060547, "step": 135830 }, { "epoch": 0.5831895108317663, "grad_norm": 0.17849326133728027, "learning_rate": 4.186119710597346e-05, "loss": 0.11789641380310059, "step": 135840 }, { "epoch": 0.5832324429217863, "grad_norm": 0.024820350110530853, "learning_rate": 4.185688538585584e-05, "loss": 0.0743901014328003, "step": 135850 }, { "epoch": 0.5832753750118064, "grad_norm": 0.003563225269317627, "learning_rate": 4.185257366573821e-05, "loss": 0.2821930408477783, "step": 135860 }, { "epoch": 0.5833183071018263, "grad_norm": 0.21024373173713684, "learning_rate": 4.1848261945620586e-05, "loss": 0.04329926371574402, "step": 135870 }, { "epoch": 0.5833612391918463, "grad_norm": 0.002765827113762498, "learning_rate": 4.1843950225502964e-05, "loss": 0.297293496131897, "step": 135880 }, { "epoch": 0.5834041712818664, "grad_norm": 0.0005890359170734882, "learning_rate": 4.183963850538534e-05, "loss": 0.05487467646598816, "step": 135890 }, { "epoch": 0.5834471033718863, "grad_norm": 0.00514583382755518, "learning_rate": 4.183532678526771e-05, "loss": 0.06383908390998841, "step": 135900 }, { "epoch": 0.5834900354619064, "grad_norm": 0.1808479130268097, "learning_rate": 4.183101506515009e-05, "loss": 0.2124013900756836, "step": 135910 }, { "epoch": 0.5835329675519264, "grad_norm": 0.027015380561351776, "learning_rate": 4.182670334503247e-05, "loss": 0.09643960595130921, "step": 135920 }, { "epoch": 0.5835758996419463, "grad_norm": 2.4606871604919434, "learning_rate": 4.182239162491485e-05, "loss": 0.3026925325393677, "step": 135930 }, { "epoch": 0.5836188317319664, "grad_norm": 0.0760890394449234, "learning_rate": 4.181807990479722e-05, "loss": 0.4359142303466797, "step": 135940 }, { "epoch": 0.5836617638219864, "grad_norm": 0.12953132390975952, "learning_rate": 4.18137681846796e-05, "loss": 0.2932579040527344, "step": 135950 }, { "epoch": 0.5837046959120064, "grad_norm": 1.3948577642440796, "learning_rate": 4.1809456464561976e-05, "loss": 0.25689287185668946, "step": 135960 }, { "epoch": 0.5837476280020264, "grad_norm": 0.15391452610492706, "learning_rate": 4.180514474444435e-05, "loss": 0.3240983486175537, "step": 135970 }, { "epoch": 0.5837905600920464, "grad_norm": 0.2480820119380951, "learning_rate": 4.1800833024326723e-05, "loss": 0.031171566247940062, "step": 135980 }, { "epoch": 0.5838334921820664, "grad_norm": 0.11079661548137665, "learning_rate": 4.17965213042091e-05, "loss": 0.34694485664367675, "step": 135990 }, { "epoch": 0.5838764242720864, "grad_norm": 1.795325517654419, "learning_rate": 4.179220958409148e-05, "loss": 0.08840734362602234, "step": 136000 }, { "epoch": 0.5838764242720864, "eval_loss": 0.4013851284980774, "eval_runtime": 27.0902, "eval_samples_per_second": 3.691, "eval_steps_per_second": 3.691, "step": 136000 }, { "epoch": 0.5839193563621065, "grad_norm": 0.025133926421403885, "learning_rate": 4.1787897863973856e-05, "loss": 0.1564624071121216, "step": 136010 }, { "epoch": 0.5839622884521264, "grad_norm": 0.009930574335157871, "learning_rate": 4.1783586143856226e-05, "loss": 0.1523146629333496, "step": 136020 }, { "epoch": 0.5840052205421464, "grad_norm": 0.08300717920064926, "learning_rate": 4.177927442373861e-05, "loss": 0.09917604327201843, "step": 136030 }, { "epoch": 0.5840481526321665, "grad_norm": 1.5571662187576294, "learning_rate": 4.177496270362099e-05, "loss": 0.2037580966949463, "step": 136040 }, { "epoch": 0.5840910847221864, "grad_norm": 0.001757492427714169, "learning_rate": 4.1770650983503365e-05, "loss": 0.21353843212127685, "step": 136050 }, { "epoch": 0.5841340168122064, "grad_norm": 0.0005412331083789468, "learning_rate": 4.176633926338574e-05, "loss": 0.09854020476341248, "step": 136060 }, { "epoch": 0.5841769489022265, "grad_norm": 1.7572810649871826, "learning_rate": 4.176202754326811e-05, "loss": 0.15027815103530884, "step": 136070 }, { "epoch": 0.5842198809922464, "grad_norm": 0.08901038765907288, "learning_rate": 4.175771582315049e-05, "loss": 0.036554208397865294, "step": 136080 }, { "epoch": 0.5842628130822665, "grad_norm": 0.03874312341213226, "learning_rate": 4.175340410303287e-05, "loss": 0.08065502643585205, "step": 136090 }, { "epoch": 0.5843057451722865, "grad_norm": 0.0010567232966423035, "learning_rate": 4.1749092382915245e-05, "loss": 0.11951396465301514, "step": 136100 }, { "epoch": 0.5843486772623064, "grad_norm": 0.32772913575172424, "learning_rate": 4.1744780662797615e-05, "loss": 0.007878247648477554, "step": 136110 }, { "epoch": 0.5843916093523265, "grad_norm": 10.457266807556152, "learning_rate": 4.174046894267999e-05, "loss": 0.3108761072158813, "step": 136120 }, { "epoch": 0.5844345414423465, "grad_norm": 5.449347972869873, "learning_rate": 4.173615722256237e-05, "loss": 0.09093397259712219, "step": 136130 }, { "epoch": 0.5844774735323665, "grad_norm": 0.06236616149544716, "learning_rate": 4.173184550244475e-05, "loss": 0.202095890045166, "step": 136140 }, { "epoch": 0.5845204056223865, "grad_norm": 0.038584187626838684, "learning_rate": 4.1727533782327125e-05, "loss": 0.16632542610168458, "step": 136150 }, { "epoch": 0.5845633377124065, "grad_norm": 0.006404801271855831, "learning_rate": 4.17232220622095e-05, "loss": 0.15790761709213258, "step": 136160 }, { "epoch": 0.5846062698024265, "grad_norm": 1.5933825969696045, "learning_rate": 4.171891034209188e-05, "loss": 0.326295280456543, "step": 136170 }, { "epoch": 0.5846492018924465, "grad_norm": 0.18334230780601501, "learning_rate": 4.171459862197426e-05, "loss": 0.16310806274414064, "step": 136180 }, { "epoch": 0.5846921339824666, "grad_norm": 2.3053152561187744, "learning_rate": 4.171028690185663e-05, "loss": 0.13464561700820923, "step": 136190 }, { "epoch": 0.5847350660724865, "grad_norm": 0.6618971228599548, "learning_rate": 4.1705975181739005e-05, "loss": 0.12008780241012573, "step": 136200 }, { "epoch": 0.5847779981625065, "grad_norm": 0.05321823060512543, "learning_rate": 4.170166346162138e-05, "loss": 0.2755621671676636, "step": 136210 }, { "epoch": 0.5848209302525266, "grad_norm": 0.3745076656341553, "learning_rate": 4.169735174150376e-05, "loss": 0.22431304454803466, "step": 136220 }, { "epoch": 0.5848638623425466, "grad_norm": 0.003494755132123828, "learning_rate": 4.169304002138613e-05, "loss": 0.09709044694900512, "step": 136230 }, { "epoch": 0.5849067944325665, "grad_norm": 0.010988358408212662, "learning_rate": 4.168872830126851e-05, "loss": 0.30329973697662355, "step": 136240 }, { "epoch": 0.5849497265225866, "grad_norm": 0.014921767637133598, "learning_rate": 4.1684416581150885e-05, "loss": 0.171160888671875, "step": 136250 }, { "epoch": 0.5849926586126066, "grad_norm": 0.07725328207015991, "learning_rate": 4.168010486103326e-05, "loss": 0.04647146165370941, "step": 136260 }, { "epoch": 0.5850355907026266, "grad_norm": 0.023668555542826653, "learning_rate": 4.167579314091564e-05, "loss": 0.09584589004516601, "step": 136270 }, { "epoch": 0.5850785227926466, "grad_norm": 0.9428960680961609, "learning_rate": 4.167148142079802e-05, "loss": 0.26876513957977294, "step": 136280 }, { "epoch": 0.5851214548826666, "grad_norm": 2.354199171066284, "learning_rate": 4.1667169700680394e-05, "loss": 0.13666833639144899, "step": 136290 }, { "epoch": 0.5851643869726866, "grad_norm": 0.027356013655662537, "learning_rate": 4.166285798056277e-05, "loss": 0.6024814128875733, "step": 136300 }, { "epoch": 0.5852073190627066, "grad_norm": 3.293339252471924, "learning_rate": 4.165854626044514e-05, "loss": 0.29547069072723386, "step": 136310 }, { "epoch": 0.5852502511527267, "grad_norm": 0.022677229717373848, "learning_rate": 4.165423454032752e-05, "loss": 0.22778058052062988, "step": 136320 }, { "epoch": 0.5852931832427466, "grad_norm": 0.14646054804325104, "learning_rate": 4.16499228202099e-05, "loss": 0.3935666084289551, "step": 136330 }, { "epoch": 0.5853361153327666, "grad_norm": 0.07201693207025528, "learning_rate": 4.1645611100092274e-05, "loss": 0.20393807888031007, "step": 136340 }, { "epoch": 0.5853790474227867, "grad_norm": 0.009186267852783203, "learning_rate": 4.1641299379974645e-05, "loss": 0.2039719820022583, "step": 136350 }, { "epoch": 0.5854219795128066, "grad_norm": 0.13840673863887787, "learning_rate": 4.163698765985702e-05, "loss": 0.27695910930633544, "step": 136360 }, { "epoch": 0.5854649116028267, "grad_norm": 0.0026640286669135094, "learning_rate": 4.16326759397394e-05, "loss": 0.2907125949859619, "step": 136370 }, { "epoch": 0.5855078436928467, "grad_norm": 5.028379917144775, "learning_rate": 4.1628364219621777e-05, "loss": 0.2319865942001343, "step": 136380 }, { "epoch": 0.5855507757828666, "grad_norm": 0.854225218296051, "learning_rate": 4.1624052499504154e-05, "loss": 0.21682953834533691, "step": 136390 }, { "epoch": 0.5855937078728867, "grad_norm": 0.0029383532237261534, "learning_rate": 4.161974077938653e-05, "loss": 0.10343109369277954, "step": 136400 }, { "epoch": 0.5856366399629067, "grad_norm": 2.0140578746795654, "learning_rate": 4.161542905926891e-05, "loss": 0.47559642791748047, "step": 136410 }, { "epoch": 0.5856795720529266, "grad_norm": 1.8081625699996948, "learning_rate": 4.1611117339151286e-05, "loss": 0.22019548416137696, "step": 136420 }, { "epoch": 0.5857225041429467, "grad_norm": 0.3506382703781128, "learning_rate": 4.160680561903366e-05, "loss": 0.3239941358566284, "step": 136430 }, { "epoch": 0.5857654362329667, "grad_norm": 0.23854027688503265, "learning_rate": 4.1602493898916034e-05, "loss": 0.2802767276763916, "step": 136440 }, { "epoch": 0.5858083683229867, "grad_norm": 0.02299383655190468, "learning_rate": 4.159818217879841e-05, "loss": 0.1584943175315857, "step": 136450 }, { "epoch": 0.5858513004130067, "grad_norm": 0.9188843369483948, "learning_rate": 4.159387045868079e-05, "loss": 0.10783770084381103, "step": 136460 }, { "epoch": 0.5858942325030267, "grad_norm": 6.9701714515686035, "learning_rate": 4.1589558738563166e-05, "loss": 0.11004831790924072, "step": 136470 }, { "epoch": 0.5859371645930467, "grad_norm": 0.01801559142768383, "learning_rate": 4.1585247018445536e-05, "loss": 0.14326504468917847, "step": 136480 }, { "epoch": 0.5859800966830667, "grad_norm": 0.0658947303891182, "learning_rate": 4.1580935298327914e-05, "loss": 0.1374224305152893, "step": 136490 }, { "epoch": 0.5860230287730868, "grad_norm": 0.2008223682641983, "learning_rate": 4.157662357821029e-05, "loss": 0.30278620719909666, "step": 136500 }, { "epoch": 0.5860659608631067, "grad_norm": 0.3907320201396942, "learning_rate": 4.157231185809267e-05, "loss": 0.2772698163986206, "step": 136510 }, { "epoch": 0.5861088929531267, "grad_norm": 0.04549934342503548, "learning_rate": 4.1568000137975046e-05, "loss": 0.10799868106842041, "step": 136520 }, { "epoch": 0.5861518250431468, "grad_norm": 0.05788165330886841, "learning_rate": 4.156368841785742e-05, "loss": 0.22272298336029053, "step": 136530 }, { "epoch": 0.5861947571331667, "grad_norm": 0.045943133533000946, "learning_rate": 4.15593766977398e-05, "loss": 0.04048386216163635, "step": 136540 }, { "epoch": 0.5862376892231868, "grad_norm": 0.02736165188252926, "learning_rate": 4.155506497762218e-05, "loss": 0.21302416324615478, "step": 136550 }, { "epoch": 0.5862806213132068, "grad_norm": 0.0015208977274596691, "learning_rate": 4.155075325750455e-05, "loss": 0.04716380536556244, "step": 136560 }, { "epoch": 0.5863235534032267, "grad_norm": 0.8822836875915527, "learning_rate": 4.1546441537386926e-05, "loss": 0.10152556896209716, "step": 136570 }, { "epoch": 0.5863664854932468, "grad_norm": 0.005391272716224194, "learning_rate": 4.15421298172693e-05, "loss": 0.38786365985870364, "step": 136580 }, { "epoch": 0.5864094175832668, "grad_norm": 0.001887031365185976, "learning_rate": 4.153781809715168e-05, "loss": 0.16974475383758544, "step": 136590 }, { "epoch": 0.5864523496732867, "grad_norm": 0.05451970919966698, "learning_rate": 4.153350637703405e-05, "loss": 0.0324835479259491, "step": 136600 }, { "epoch": 0.5864952817633068, "grad_norm": 0.5006858706474304, "learning_rate": 4.152919465691643e-05, "loss": 0.01447709947824478, "step": 136610 }, { "epoch": 0.5865382138533268, "grad_norm": 0.9552084803581238, "learning_rate": 4.1524882936798806e-05, "loss": 0.21060488224029542, "step": 136620 }, { "epoch": 0.5865811459433468, "grad_norm": 12.546867370605469, "learning_rate": 4.152057121668119e-05, "loss": 0.28877010345458987, "step": 136630 }, { "epoch": 0.5866240780333668, "grad_norm": 0.008536313660442829, "learning_rate": 4.151625949656356e-05, "loss": 0.25785582065582274, "step": 136640 }, { "epoch": 0.5866670101233868, "grad_norm": 10.071556091308594, "learning_rate": 4.151194777644594e-05, "loss": 0.3655982494354248, "step": 136650 }, { "epoch": 0.5867099422134069, "grad_norm": 0.36693134903907776, "learning_rate": 4.1507636056328315e-05, "loss": 0.12875553369522094, "step": 136660 }, { "epoch": 0.5867528743034268, "grad_norm": 0.004068433307111263, "learning_rate": 4.150332433621069e-05, "loss": 0.19526032209396363, "step": 136670 }, { "epoch": 0.5867958063934469, "grad_norm": 0.011420795693993568, "learning_rate": 4.149901261609306e-05, "loss": 0.047950607538223264, "step": 136680 }, { "epoch": 0.5868387384834669, "grad_norm": 1.7081316709518433, "learning_rate": 4.149470089597544e-05, "loss": 0.14381338357925416, "step": 136690 }, { "epoch": 0.5868816705734868, "grad_norm": 0.000932833063416183, "learning_rate": 4.149038917585782e-05, "loss": 0.09737264513969421, "step": 136700 }, { "epoch": 0.5869246026635069, "grad_norm": 0.6827306747436523, "learning_rate": 4.1486077455740195e-05, "loss": 0.3292029857635498, "step": 136710 }, { "epoch": 0.5869675347535269, "grad_norm": 0.247352734208107, "learning_rate": 4.1481765735622566e-05, "loss": 0.20491859912872315, "step": 136720 }, { "epoch": 0.5870104668435469, "grad_norm": 2.4465835094451904, "learning_rate": 4.147745401550494e-05, "loss": 0.5619071960449219, "step": 136730 }, { "epoch": 0.5870533989335669, "grad_norm": 4.127469062805176, "learning_rate": 4.147314229538733e-05, "loss": 0.30172004699707033, "step": 136740 }, { "epoch": 0.5870963310235869, "grad_norm": 0.15143781900405884, "learning_rate": 4.1468830575269704e-05, "loss": 0.04340499341487884, "step": 136750 }, { "epoch": 0.5871392631136069, "grad_norm": 1.3688448667526245, "learning_rate": 4.146451885515208e-05, "loss": 0.26014227867126466, "step": 136760 }, { "epoch": 0.5871821952036269, "grad_norm": 0.8132941722869873, "learning_rate": 4.146020713503445e-05, "loss": 0.20901427268981934, "step": 136770 }, { "epoch": 0.587225127293647, "grad_norm": 1.0680354833602905, "learning_rate": 4.145589541491683e-05, "loss": 0.13693716526031494, "step": 136780 }, { "epoch": 0.5872680593836669, "grad_norm": 0.024545999243855476, "learning_rate": 4.145158369479921e-05, "loss": 0.17069380283355712, "step": 136790 }, { "epoch": 0.5873109914736869, "grad_norm": 0.020502969622612, "learning_rate": 4.1447271974681584e-05, "loss": 0.09372333884239196, "step": 136800 }, { "epoch": 0.587353923563707, "grad_norm": 1.3037524223327637, "learning_rate": 4.1442960254563955e-05, "loss": 0.2567019462585449, "step": 136810 }, { "epoch": 0.5873968556537269, "grad_norm": 0.127951979637146, "learning_rate": 4.143864853444633e-05, "loss": 0.3536144018173218, "step": 136820 }, { "epoch": 0.5874397877437469, "grad_norm": 0.030075104907155037, "learning_rate": 4.143433681432871e-05, "loss": 0.14973297119140624, "step": 136830 }, { "epoch": 0.587482719833767, "grad_norm": 0.015654215589165688, "learning_rate": 4.143002509421109e-05, "loss": 0.3059299230575562, "step": 136840 }, { "epoch": 0.5875256519237869, "grad_norm": 0.15828372538089752, "learning_rate": 4.1425713374093464e-05, "loss": 0.17411817312240602, "step": 136850 }, { "epoch": 0.587568584013807, "grad_norm": 2.2743732929229736, "learning_rate": 4.142140165397584e-05, "loss": 0.32296817302703856, "step": 136860 }, { "epoch": 0.587611516103827, "grad_norm": 0.006136009003967047, "learning_rate": 4.141708993385822e-05, "loss": 0.1016544222831726, "step": 136870 }, { "epoch": 0.5876544481938469, "grad_norm": 0.22323651611804962, "learning_rate": 4.1412778213740596e-05, "loss": 0.13947161436080932, "step": 136880 }, { "epoch": 0.587697380283867, "grad_norm": 5.105968952178955, "learning_rate": 4.140846649362297e-05, "loss": 0.22951793670654297, "step": 136890 }, { "epoch": 0.587740312373887, "grad_norm": 1.0845637321472168, "learning_rate": 4.1404154773505344e-05, "loss": 0.28329808712005616, "step": 136900 }, { "epoch": 0.587783244463907, "grad_norm": 1.287009835243225, "learning_rate": 4.139984305338772e-05, "loss": 0.3320283889770508, "step": 136910 }, { "epoch": 0.587826176553927, "grad_norm": 0.004043887369334698, "learning_rate": 4.13955313332701e-05, "loss": 0.13691928386688232, "step": 136920 }, { "epoch": 0.587869108643947, "grad_norm": 0.025284389033913612, "learning_rate": 4.139121961315247e-05, "loss": 0.2444899320602417, "step": 136930 }, { "epoch": 0.587912040733967, "grad_norm": 1.5387986898422241, "learning_rate": 4.138690789303485e-05, "loss": 0.3062190055847168, "step": 136940 }, { "epoch": 0.587954972823987, "grad_norm": 0.003443187801167369, "learning_rate": 4.1382596172917224e-05, "loss": 0.07713412046432495, "step": 136950 }, { "epoch": 0.587997904914007, "grad_norm": 0.006569002289324999, "learning_rate": 4.13782844527996e-05, "loss": 0.05037772059440613, "step": 136960 }, { "epoch": 0.588040837004027, "grad_norm": 0.034869421273469925, "learning_rate": 4.137397273268198e-05, "loss": 0.2030102491378784, "step": 136970 }, { "epoch": 0.588083769094047, "grad_norm": 0.007195018697530031, "learning_rate": 4.1369661012564356e-05, "loss": 0.29705181121826174, "step": 136980 }, { "epoch": 0.5881267011840671, "grad_norm": 0.0021977697033435106, "learning_rate": 4.1365349292446733e-05, "loss": 0.39129085540771485, "step": 136990 }, { "epoch": 0.588169633274087, "grad_norm": 4.102361679077148, "learning_rate": 4.136103757232911e-05, "loss": 0.2191321849822998, "step": 137000 }, { "epoch": 0.588169633274087, "eval_loss": 0.3964814841747284, "eval_runtime": 27.2479, "eval_samples_per_second": 3.67, "eval_steps_per_second": 3.67, "step": 137000 }, { "epoch": 0.588212565364107, "grad_norm": 0.01128480490297079, "learning_rate": 4.135672585221148e-05, "loss": 0.16608066558837892, "step": 137010 }, { "epoch": 0.5882554974541271, "grad_norm": 2.656604051589966, "learning_rate": 4.135241413209386e-05, "loss": 0.27288265228271485, "step": 137020 }, { "epoch": 0.588298429544147, "grad_norm": 0.0048962910659611225, "learning_rate": 4.1348102411976236e-05, "loss": 0.3048482656478882, "step": 137030 }, { "epoch": 0.5883413616341671, "grad_norm": 1.3734676837921143, "learning_rate": 4.1343790691858613e-05, "loss": 0.1047094702720642, "step": 137040 }, { "epoch": 0.5883842937241871, "grad_norm": 0.005558597389608622, "learning_rate": 4.1339478971740984e-05, "loss": 0.18236162662506103, "step": 137050 }, { "epoch": 0.588427225814207, "grad_norm": 0.23292341828346252, "learning_rate": 4.133516725162336e-05, "loss": 0.3300944328308105, "step": 137060 }, { "epoch": 0.5884701579042271, "grad_norm": 1.9232966899871826, "learning_rate": 4.133085553150574e-05, "loss": 0.2621130466461182, "step": 137070 }, { "epoch": 0.5885130899942471, "grad_norm": 0.0006944802007637918, "learning_rate": 4.1326543811388116e-05, "loss": 0.26037561893463135, "step": 137080 }, { "epoch": 0.5885560220842672, "grad_norm": 0.08561205863952637, "learning_rate": 4.132223209127049e-05, "loss": 0.2053438186645508, "step": 137090 }, { "epoch": 0.5885989541742871, "grad_norm": 0.37227103114128113, "learning_rate": 4.131792037115287e-05, "loss": 0.22090797424316405, "step": 137100 }, { "epoch": 0.5886418862643071, "grad_norm": 1.0453672409057617, "learning_rate": 4.131360865103525e-05, "loss": 0.3329441547393799, "step": 137110 }, { "epoch": 0.5886848183543272, "grad_norm": 0.08478416502475739, "learning_rate": 4.1309296930917625e-05, "loss": 0.28282577991485597, "step": 137120 }, { "epoch": 0.5887277504443471, "grad_norm": 0.0006766520091332495, "learning_rate": 4.13049852108e-05, "loss": 0.08505859375, "step": 137130 }, { "epoch": 0.5887706825343672, "grad_norm": 1.1568154096603394, "learning_rate": 4.130067349068237e-05, "loss": 0.0769594967365265, "step": 137140 }, { "epoch": 0.5888136146243872, "grad_norm": 0.004215045366436243, "learning_rate": 4.129636177056475e-05, "loss": 0.16643118858337402, "step": 137150 }, { "epoch": 0.5888565467144071, "grad_norm": 12.162041664123535, "learning_rate": 4.129205005044713e-05, "loss": 0.15993156433105468, "step": 137160 }, { "epoch": 0.5888994788044272, "grad_norm": 0.13196447491645813, "learning_rate": 4.1287738330329505e-05, "loss": 0.1578353762626648, "step": 137170 }, { "epoch": 0.5889424108944472, "grad_norm": 0.6302490830421448, "learning_rate": 4.1283426610211876e-05, "loss": 0.1752261519432068, "step": 137180 }, { "epoch": 0.5889853429844671, "grad_norm": 4.5182013511657715, "learning_rate": 4.127911489009425e-05, "loss": 0.3914973497390747, "step": 137190 }, { "epoch": 0.5890282750744872, "grad_norm": 0.09273222088813782, "learning_rate": 4.127480316997663e-05, "loss": 0.0532687246799469, "step": 137200 }, { "epoch": 0.5890712071645072, "grad_norm": 0.080787293612957, "learning_rate": 4.127049144985901e-05, "loss": 0.14222522974014282, "step": 137210 }, { "epoch": 0.5891141392545272, "grad_norm": 1.3788886070251465, "learning_rate": 4.1266179729741385e-05, "loss": 0.22479968070983886, "step": 137220 }, { "epoch": 0.5891570713445472, "grad_norm": 0.0028310460038483143, "learning_rate": 4.126186800962376e-05, "loss": 0.2262204885482788, "step": 137230 }, { "epoch": 0.5892000034345672, "grad_norm": 3.5362157821655273, "learning_rate": 4.125755628950614e-05, "loss": 0.4202101230621338, "step": 137240 }, { "epoch": 0.5892429355245872, "grad_norm": 0.7681101560592651, "learning_rate": 4.125324456938852e-05, "loss": 0.19651840925216674, "step": 137250 }, { "epoch": 0.5892858676146072, "grad_norm": 0.762321949005127, "learning_rate": 4.124893284927089e-05, "loss": 0.362211275100708, "step": 137260 }, { "epoch": 0.5893287997046273, "grad_norm": 0.074244923889637, "learning_rate": 4.1244621129153265e-05, "loss": 0.13443008661270142, "step": 137270 }, { "epoch": 0.5893717317946472, "grad_norm": 0.258041650056839, "learning_rate": 4.124030940903564e-05, "loss": 0.1949497103691101, "step": 137280 }, { "epoch": 0.5894146638846672, "grad_norm": 0.00303852092474699, "learning_rate": 4.123599768891802e-05, "loss": 0.056662237644195555, "step": 137290 }, { "epoch": 0.5894575959746873, "grad_norm": 0.027162009850144386, "learning_rate": 4.123168596880039e-05, "loss": 0.2888782501220703, "step": 137300 }, { "epoch": 0.5895005280647072, "grad_norm": 4.259036064147949, "learning_rate": 4.122737424868277e-05, "loss": 0.4366584300994873, "step": 137310 }, { "epoch": 0.5895434601547273, "grad_norm": 0.08108695596456528, "learning_rate": 4.1223062528565145e-05, "loss": 0.02968863248825073, "step": 137320 }, { "epoch": 0.5895863922447473, "grad_norm": 1.7827603816986084, "learning_rate": 4.121875080844753e-05, "loss": 0.3578782081604004, "step": 137330 }, { "epoch": 0.5896293243347672, "grad_norm": 1.2169469594955444, "learning_rate": 4.12144390883299e-05, "loss": 0.074959796667099, "step": 137340 }, { "epoch": 0.5896722564247873, "grad_norm": 1.509886384010315, "learning_rate": 4.121012736821228e-05, "loss": 0.3637458086013794, "step": 137350 }, { "epoch": 0.5897151885148073, "grad_norm": 1.6214172840118408, "learning_rate": 4.1205815648094655e-05, "loss": 0.22257492542266846, "step": 137360 }, { "epoch": 0.5897581206048272, "grad_norm": 0.003348621539771557, "learning_rate": 4.120150392797703e-05, "loss": 0.07453447580337524, "step": 137370 }, { "epoch": 0.5898010526948473, "grad_norm": 0.012746520340442657, "learning_rate": 4.11971922078594e-05, "loss": 0.30892822742462156, "step": 137380 }, { "epoch": 0.5898439847848673, "grad_norm": 4.19691276550293, "learning_rate": 4.119288048774178e-05, "loss": 0.2979278564453125, "step": 137390 }, { "epoch": 0.5898869168748873, "grad_norm": 2.4384191036224365, "learning_rate": 4.118856876762416e-05, "loss": 0.21095747947692872, "step": 137400 }, { "epoch": 0.5899298489649073, "grad_norm": 0.03696135804057121, "learning_rate": 4.1184257047506534e-05, "loss": 0.10608190298080444, "step": 137410 }, { "epoch": 0.5899727810549273, "grad_norm": 0.05472942441701889, "learning_rate": 4.1179945327388905e-05, "loss": 0.04571995139122009, "step": 137420 }, { "epoch": 0.5900157131449473, "grad_norm": 0.10192541033029556, "learning_rate": 4.117563360727128e-05, "loss": 0.10677586793899536, "step": 137430 }, { "epoch": 0.5900586452349673, "grad_norm": 5.087360858917236, "learning_rate": 4.1171321887153666e-05, "loss": 0.3197747230529785, "step": 137440 }, { "epoch": 0.5901015773249874, "grad_norm": 0.09760670363903046, "learning_rate": 4.1167010167036044e-05, "loss": 0.05229000449180603, "step": 137450 }, { "epoch": 0.5901445094150073, "grad_norm": 0.377534419298172, "learning_rate": 4.1162698446918414e-05, "loss": 0.12750136852264404, "step": 137460 }, { "epoch": 0.5901874415050273, "grad_norm": 0.02546530030667782, "learning_rate": 4.115838672680079e-05, "loss": 0.16499568223953248, "step": 137470 }, { "epoch": 0.5902303735950474, "grad_norm": 3.4862186908721924, "learning_rate": 4.115407500668317e-05, "loss": 0.31398231983184816, "step": 137480 }, { "epoch": 0.5902733056850673, "grad_norm": 7.320980072021484, "learning_rate": 4.1149763286565546e-05, "loss": 0.25650997161865235, "step": 137490 }, { "epoch": 0.5903162377750873, "grad_norm": 15.762680053710938, "learning_rate": 4.1145451566447924e-05, "loss": 0.4242871284484863, "step": 137500 }, { "epoch": 0.5903591698651074, "grad_norm": 0.008897298946976662, "learning_rate": 4.1141139846330294e-05, "loss": 0.11407696008682251, "step": 137510 }, { "epoch": 0.5904021019551274, "grad_norm": 33.672706604003906, "learning_rate": 4.113682812621267e-05, "loss": 0.3351729869842529, "step": 137520 }, { "epoch": 0.5904450340451474, "grad_norm": 8.253597259521484, "learning_rate": 4.113251640609505e-05, "loss": 0.5670902252197265, "step": 137530 }, { "epoch": 0.5904879661351674, "grad_norm": 0.49867475032806396, "learning_rate": 4.1128204685977426e-05, "loss": 0.07423478364944458, "step": 137540 }, { "epoch": 0.5905308982251875, "grad_norm": 0.0026952065527439117, "learning_rate": 4.1123892965859804e-05, "loss": 0.2668862819671631, "step": 137550 }, { "epoch": 0.5905738303152074, "grad_norm": 0.010958666913211346, "learning_rate": 4.111958124574218e-05, "loss": 0.07709743976593017, "step": 137560 }, { "epoch": 0.5906167624052274, "grad_norm": 0.01955607905983925, "learning_rate": 4.111526952562456e-05, "loss": 0.09136215448379517, "step": 137570 }, { "epoch": 0.5906596944952475, "grad_norm": 0.005562023725360632, "learning_rate": 4.1110957805506936e-05, "loss": 0.1114089846611023, "step": 137580 }, { "epoch": 0.5907026265852674, "grad_norm": 0.03999653458595276, "learning_rate": 4.1106646085389306e-05, "loss": 0.16414806842803956, "step": 137590 }, { "epoch": 0.5907455586752874, "grad_norm": 0.008433864451944828, "learning_rate": 4.1102334365271684e-05, "loss": 0.3284775257110596, "step": 137600 }, { "epoch": 0.5907884907653075, "grad_norm": 0.011860636994242668, "learning_rate": 4.109802264515406e-05, "loss": 0.2962829828262329, "step": 137610 }, { "epoch": 0.5908314228553274, "grad_norm": 0.029396725818514824, "learning_rate": 4.109371092503644e-05, "loss": 0.20384597778320312, "step": 137620 }, { "epoch": 0.5908743549453475, "grad_norm": 1.1890825033187866, "learning_rate": 4.108939920491881e-05, "loss": 0.12327685356140136, "step": 137630 }, { "epoch": 0.5909172870353675, "grad_norm": 1.923099398612976, "learning_rate": 4.1085087484801186e-05, "loss": 0.18159937858581543, "step": 137640 }, { "epoch": 0.5909602191253874, "grad_norm": 0.0022021017502993345, "learning_rate": 4.1080775764683564e-05, "loss": 0.2301816463470459, "step": 137650 }, { "epoch": 0.5910031512154075, "grad_norm": 1.2682898044586182, "learning_rate": 4.107646404456594e-05, "loss": 0.2662432909011841, "step": 137660 }, { "epoch": 0.5910460833054275, "grad_norm": 0.0558965802192688, "learning_rate": 4.107215232444832e-05, "loss": 0.24787378311157227, "step": 137670 }, { "epoch": 0.5910890153954474, "grad_norm": 0.0006554294959641993, "learning_rate": 4.1067840604330696e-05, "loss": 0.08674585819244385, "step": 137680 }, { "epoch": 0.5911319474854675, "grad_norm": 0.0752268061041832, "learning_rate": 4.106352888421307e-05, "loss": 0.3500924587249756, "step": 137690 }, { "epoch": 0.5911748795754875, "grad_norm": 4.126988410949707, "learning_rate": 4.105921716409545e-05, "loss": 0.12794922590255736, "step": 137700 }, { "epoch": 0.5912178116655075, "grad_norm": 2.2941765785217285, "learning_rate": 4.105490544397782e-05, "loss": 0.14108363389968873, "step": 137710 }, { "epoch": 0.5912607437555275, "grad_norm": 0.008788630366325378, "learning_rate": 4.10505937238602e-05, "loss": 0.15378959178924562, "step": 137720 }, { "epoch": 0.5913036758455475, "grad_norm": 0.009542332962155342, "learning_rate": 4.1046282003742576e-05, "loss": 0.16104387044906615, "step": 137730 }, { "epoch": 0.5913466079355675, "grad_norm": 1.5980418920516968, "learning_rate": 4.104197028362495e-05, "loss": 0.3658533811569214, "step": 137740 }, { "epoch": 0.5913895400255875, "grad_norm": 2.4138245582580566, "learning_rate": 4.1037658563507323e-05, "loss": 0.2696866035461426, "step": 137750 }, { "epoch": 0.5914324721156076, "grad_norm": 0.588241696357727, "learning_rate": 4.10333468433897e-05, "loss": 0.21146905422210693, "step": 137760 }, { "epoch": 0.5914754042056275, "grad_norm": 0.44645923376083374, "learning_rate": 4.102903512327208e-05, "loss": 0.17989760637283325, "step": 137770 }, { "epoch": 0.5915183362956475, "grad_norm": 0.5778979063034058, "learning_rate": 4.1024723403154455e-05, "loss": 0.15354671478271484, "step": 137780 }, { "epoch": 0.5915612683856676, "grad_norm": 0.0002853994374163449, "learning_rate": 4.102041168303683e-05, "loss": 0.27571873664855956, "step": 137790 }, { "epoch": 0.5916042004756875, "grad_norm": 0.005918090231716633, "learning_rate": 4.101609996291921e-05, "loss": 0.2757784128189087, "step": 137800 }, { "epoch": 0.5916471325657076, "grad_norm": 0.0009141897899098694, "learning_rate": 4.101178824280159e-05, "loss": 0.11418241262435913, "step": 137810 }, { "epoch": 0.5916900646557276, "grad_norm": 0.02456584945321083, "learning_rate": 4.1007476522683965e-05, "loss": 0.15693914890289307, "step": 137820 }, { "epoch": 0.5917329967457475, "grad_norm": 0.002220330759882927, "learning_rate": 4.1003164802566335e-05, "loss": 0.252367901802063, "step": 137830 }, { "epoch": 0.5917759288357676, "grad_norm": 1.6629785299301147, "learning_rate": 4.099885308244871e-05, "loss": 0.17761930227279663, "step": 137840 }, { "epoch": 0.5918188609257876, "grad_norm": 1.0520602464675903, "learning_rate": 4.099454136233109e-05, "loss": 0.11995333433151245, "step": 137850 }, { "epoch": 0.5918617930158075, "grad_norm": 0.0027449713088572025, "learning_rate": 4.099022964221347e-05, "loss": 0.3772153854370117, "step": 137860 }, { "epoch": 0.5919047251058276, "grad_norm": 0.0017124215373769403, "learning_rate": 4.0985917922095845e-05, "loss": 0.16851363182067872, "step": 137870 }, { "epoch": 0.5919476571958476, "grad_norm": 0.55049067735672, "learning_rate": 4.0981606201978215e-05, "loss": 0.09786216020584107, "step": 137880 }, { "epoch": 0.5919905892858676, "grad_norm": 1.6705586910247803, "learning_rate": 4.097729448186059e-05, "loss": 0.17155834436416625, "step": 137890 }, { "epoch": 0.5920335213758876, "grad_norm": 0.9425643682479858, "learning_rate": 4.097298276174297e-05, "loss": 0.032494351267814636, "step": 137900 }, { "epoch": 0.5920764534659076, "grad_norm": 3.071489095687866, "learning_rate": 4.096867104162535e-05, "loss": 0.09281741380691529, "step": 137910 }, { "epoch": 0.5921193855559276, "grad_norm": 2.000361919403076, "learning_rate": 4.0964359321507725e-05, "loss": 0.26554825305938723, "step": 137920 }, { "epoch": 0.5921623176459476, "grad_norm": 0.0017703615594655275, "learning_rate": 4.09600476013901e-05, "loss": 0.08092322945594788, "step": 137930 }, { "epoch": 0.5922052497359677, "grad_norm": 0.1381872445344925, "learning_rate": 4.095573588127248e-05, "loss": 0.18154406547546387, "step": 137940 }, { "epoch": 0.5922481818259877, "grad_norm": 10.791793823242188, "learning_rate": 4.095142416115486e-05, "loss": 0.3010018110275269, "step": 137950 }, { "epoch": 0.5922911139160076, "grad_norm": 0.0003797747485805303, "learning_rate": 4.094711244103723e-05, "loss": 0.09593486785888672, "step": 137960 }, { "epoch": 0.5923340460060277, "grad_norm": 0.6908388137817383, "learning_rate": 4.0942800720919605e-05, "loss": 0.16052920818328859, "step": 137970 }, { "epoch": 0.5923769780960477, "grad_norm": 1.987916350364685, "learning_rate": 4.093848900080198e-05, "loss": 0.29268622398376465, "step": 137980 }, { "epoch": 0.5924199101860677, "grad_norm": 0.0018756146309897304, "learning_rate": 4.093417728068436e-05, "loss": 0.18456127643585205, "step": 137990 }, { "epoch": 0.5924628422760877, "grad_norm": 17.71635627746582, "learning_rate": 4.092986556056673e-05, "loss": 0.21318068504333496, "step": 138000 }, { "epoch": 0.5924628422760877, "eval_loss": 0.3928622305393219, "eval_runtime": 27.2123, "eval_samples_per_second": 3.675, "eval_steps_per_second": 3.675, "step": 138000 }, { "epoch": 0.5925057743661077, "grad_norm": 2.7416698932647705, "learning_rate": 4.092555384044911e-05, "loss": 0.19002535343170165, "step": 138010 }, { "epoch": 0.5925487064561277, "grad_norm": 0.0005497373058460653, "learning_rate": 4.0921242120331485e-05, "loss": 0.2710278987884521, "step": 138020 }, { "epoch": 0.5925916385461477, "grad_norm": 0.013134874403476715, "learning_rate": 4.091693040021387e-05, "loss": 0.3821166753768921, "step": 138030 }, { "epoch": 0.5926345706361678, "grad_norm": 0.16501188278198242, "learning_rate": 4.091261868009624e-05, "loss": 0.14334306716918946, "step": 138040 }, { "epoch": 0.5926775027261877, "grad_norm": 0.011494440026581287, "learning_rate": 4.090830695997862e-05, "loss": 0.20109798908233642, "step": 138050 }, { "epoch": 0.5927204348162077, "grad_norm": 0.027806028723716736, "learning_rate": 4.0903995239860994e-05, "loss": 0.11723495721817016, "step": 138060 }, { "epoch": 0.5927633669062278, "grad_norm": 1.9640231132507324, "learning_rate": 4.089968351974337e-05, "loss": 0.32733218669891356, "step": 138070 }, { "epoch": 0.5928062989962477, "grad_norm": 0.0590534470975399, "learning_rate": 4.089537179962574e-05, "loss": 0.034361538290977475, "step": 138080 }, { "epoch": 0.5928492310862677, "grad_norm": 0.04286135733127594, "learning_rate": 4.089106007950812e-05, "loss": 0.06894341707229615, "step": 138090 }, { "epoch": 0.5928921631762878, "grad_norm": 0.04913228377699852, "learning_rate": 4.0886748359390497e-05, "loss": 0.04862775206565857, "step": 138100 }, { "epoch": 0.5929350952663077, "grad_norm": 0.0012994530843570828, "learning_rate": 4.0882436639272874e-05, "loss": 0.311983060836792, "step": 138110 }, { "epoch": 0.5929780273563278, "grad_norm": 0.03957013785839081, "learning_rate": 4.0878124919155244e-05, "loss": 0.390786337852478, "step": 138120 }, { "epoch": 0.5930209594463478, "grad_norm": 0.04898397624492645, "learning_rate": 4.087381319903762e-05, "loss": 0.21631765365600586, "step": 138130 }, { "epoch": 0.5930638915363677, "grad_norm": 5.083647727966309, "learning_rate": 4.0869501478920006e-05, "loss": 0.2936803102493286, "step": 138140 }, { "epoch": 0.5931068236263878, "grad_norm": 2.1491951942443848, "learning_rate": 4.086518975880238e-05, "loss": 0.14799619913101197, "step": 138150 }, { "epoch": 0.5931497557164078, "grad_norm": 3.132175922393799, "learning_rate": 4.0860878038684754e-05, "loss": 0.2657571077346802, "step": 138160 }, { "epoch": 0.5931926878064278, "grad_norm": 0.015707481652498245, "learning_rate": 4.085656631856713e-05, "loss": 0.12225933074951172, "step": 138170 }, { "epoch": 0.5932356198964478, "grad_norm": 1.2788748741149902, "learning_rate": 4.085225459844951e-05, "loss": 0.1507628083229065, "step": 138180 }, { "epoch": 0.5932785519864678, "grad_norm": 0.042883019894361496, "learning_rate": 4.0847942878331886e-05, "loss": 0.26679019927978515, "step": 138190 }, { "epoch": 0.5933214840764878, "grad_norm": 0.10420157760381699, "learning_rate": 4.0843631158214256e-05, "loss": 0.07498223185539246, "step": 138200 }, { "epoch": 0.5933644161665078, "grad_norm": 23.004911422729492, "learning_rate": 4.0839319438096634e-05, "loss": 0.3628678560256958, "step": 138210 }, { "epoch": 0.5934073482565279, "grad_norm": 0.019570820033550262, "learning_rate": 4.083500771797901e-05, "loss": 0.14614968299865722, "step": 138220 }, { "epoch": 0.5934502803465478, "grad_norm": 0.01614968292415142, "learning_rate": 4.083069599786139e-05, "loss": 0.1902191162109375, "step": 138230 }, { "epoch": 0.5934932124365678, "grad_norm": 0.009563574567437172, "learning_rate": 4.0826384277743766e-05, "loss": 0.1708337903022766, "step": 138240 }, { "epoch": 0.5935361445265879, "grad_norm": 0.003760694758966565, "learning_rate": 4.082207255762614e-05, "loss": 0.03917034566402435, "step": 138250 }, { "epoch": 0.5935790766166078, "grad_norm": 0.029983574524521828, "learning_rate": 4.081776083750852e-05, "loss": 0.15364230871200563, "step": 138260 }, { "epoch": 0.5936220087066278, "grad_norm": 0.0570920892059803, "learning_rate": 4.08134491173909e-05, "loss": 0.043169844150543216, "step": 138270 }, { "epoch": 0.5936649407966479, "grad_norm": 0.0028020674362778664, "learning_rate": 4.0809137397273275e-05, "loss": 0.3166723966598511, "step": 138280 }, { "epoch": 0.5937078728866678, "grad_norm": 0.026932211592793465, "learning_rate": 4.0804825677155646e-05, "loss": 0.24892525672912597, "step": 138290 }, { "epoch": 0.5937508049766879, "grad_norm": 0.009527665562927723, "learning_rate": 4.080051395703802e-05, "loss": 0.11965830326080322, "step": 138300 }, { "epoch": 0.5937937370667079, "grad_norm": 1.14619779586792, "learning_rate": 4.07962022369204e-05, "loss": 0.17031424045562743, "step": 138310 }, { "epoch": 0.5938366691567278, "grad_norm": 0.0052214134484529495, "learning_rate": 4.079189051680278e-05, "loss": 0.2664251089096069, "step": 138320 }, { "epoch": 0.5938796012467479, "grad_norm": 2.2635042667388916, "learning_rate": 4.078757879668515e-05, "loss": 0.1969763994216919, "step": 138330 }, { "epoch": 0.5939225333367679, "grad_norm": 0.00652336934581399, "learning_rate": 4.0783267076567526e-05, "loss": 0.4026478290557861, "step": 138340 }, { "epoch": 0.5939654654267879, "grad_norm": 1.975538730621338, "learning_rate": 4.07789553564499e-05, "loss": 0.4983255863189697, "step": 138350 }, { "epoch": 0.5940083975168079, "grad_norm": 0.02280971221625805, "learning_rate": 4.077464363633228e-05, "loss": 0.27170419692993164, "step": 138360 }, { "epoch": 0.5940513296068279, "grad_norm": 4.479470729827881, "learning_rate": 4.077033191621466e-05, "loss": 0.22238686084747314, "step": 138370 }, { "epoch": 0.594094261696848, "grad_norm": 0.004820001777261496, "learning_rate": 4.0766020196097035e-05, "loss": 0.2552459478378296, "step": 138380 }, { "epoch": 0.5941371937868679, "grad_norm": 0.20357632637023926, "learning_rate": 4.076170847597941e-05, "loss": 0.3800384759902954, "step": 138390 }, { "epoch": 0.594180125876888, "grad_norm": 1.3113895654678345, "learning_rate": 4.075739675586179e-05, "loss": 0.19150335788726808, "step": 138400 }, { "epoch": 0.594223057966908, "grad_norm": 0.018385406583547592, "learning_rate": 4.075308503574416e-05, "loss": 0.19509342908859253, "step": 138410 }, { "epoch": 0.5942659900569279, "grad_norm": 5.595469951629639, "learning_rate": 4.074877331562654e-05, "loss": 0.1117366075515747, "step": 138420 }, { "epoch": 0.594308922146948, "grad_norm": 0.20212681591510773, "learning_rate": 4.0744461595508915e-05, "loss": 0.3475062370300293, "step": 138430 }, { "epoch": 0.594351854236968, "grad_norm": 0.028907861560583115, "learning_rate": 4.074014987539129e-05, "loss": 0.12352882623672486, "step": 138440 }, { "epoch": 0.594394786326988, "grad_norm": 0.03655627369880676, "learning_rate": 4.073583815527366e-05, "loss": 0.29398033618927, "step": 138450 }, { "epoch": 0.594437718417008, "grad_norm": 3.132746696472168, "learning_rate": 4.073152643515604e-05, "loss": 0.36105301380157473, "step": 138460 }, { "epoch": 0.594480650507028, "grad_norm": 0.06194941699504852, "learning_rate": 4.072721471503842e-05, "loss": 0.23717799186706542, "step": 138470 }, { "epoch": 0.594523582597048, "grad_norm": 0.014329830184578896, "learning_rate": 4.0722902994920795e-05, "loss": 0.16158952713012695, "step": 138480 }, { "epoch": 0.594566514687068, "grad_norm": 2.4566140174865723, "learning_rate": 4.071859127480317e-05, "loss": 0.13322386741638184, "step": 138490 }, { "epoch": 0.594609446777088, "grad_norm": 0.051132217049598694, "learning_rate": 4.071427955468555e-05, "loss": 0.15299041271209718, "step": 138500 }, { "epoch": 0.594652378867108, "grad_norm": 0.057655222713947296, "learning_rate": 4.070996783456793e-05, "loss": 0.2507340669631958, "step": 138510 }, { "epoch": 0.594695310957128, "grad_norm": 1.7659785747528076, "learning_rate": 4.0705656114450304e-05, "loss": 0.2532395839691162, "step": 138520 }, { "epoch": 0.5947382430471481, "grad_norm": 0.2619655728340149, "learning_rate": 4.0701344394332675e-05, "loss": 0.025167009234428404, "step": 138530 }, { "epoch": 0.594781175137168, "grad_norm": 0.259661465883255, "learning_rate": 4.069703267421505e-05, "loss": 0.14469207525253297, "step": 138540 }, { "epoch": 0.594824107227188, "grad_norm": 0.0074020447209477425, "learning_rate": 4.069272095409743e-05, "loss": 0.1733548879623413, "step": 138550 }, { "epoch": 0.5948670393172081, "grad_norm": 0.006281828507781029, "learning_rate": 4.068840923397981e-05, "loss": 0.30566720962524413, "step": 138560 }, { "epoch": 0.594909971407228, "grad_norm": 3.7366669178009033, "learning_rate": 4.068409751386218e-05, "loss": 0.1994355797767639, "step": 138570 }, { "epoch": 0.594952903497248, "grad_norm": 0.050392650067806244, "learning_rate": 4.0679785793744555e-05, "loss": 0.27683262825012206, "step": 138580 }, { "epoch": 0.5949958355872681, "grad_norm": 0.030940018594264984, "learning_rate": 4.067547407362693e-05, "loss": 0.10335183143615723, "step": 138590 }, { "epoch": 0.595038767677288, "grad_norm": 0.08072816580533981, "learning_rate": 4.067116235350931e-05, "loss": 0.17044700384140016, "step": 138600 }, { "epoch": 0.5950816997673081, "grad_norm": 0.10144093632698059, "learning_rate": 4.066685063339169e-05, "loss": 0.18469510078430176, "step": 138610 }, { "epoch": 0.5951246318573281, "grad_norm": 0.40955373644828796, "learning_rate": 4.0662538913274064e-05, "loss": 0.33158886432647705, "step": 138620 }, { "epoch": 0.595167563947348, "grad_norm": 0.8902355432510376, "learning_rate": 4.065822719315644e-05, "loss": 0.1779848337173462, "step": 138630 }, { "epoch": 0.5952104960373681, "grad_norm": 3.3796346187591553, "learning_rate": 4.065391547303882e-05, "loss": 0.16189804077148437, "step": 138640 }, { "epoch": 0.5952534281273881, "grad_norm": 0.0013126464327797294, "learning_rate": 4.0649603752921196e-05, "loss": 0.1056704044342041, "step": 138650 }, { "epoch": 0.5952963602174081, "grad_norm": 2.0929417610168457, "learning_rate": 4.064529203280357e-05, "loss": 0.12593846321105956, "step": 138660 }, { "epoch": 0.5953392923074281, "grad_norm": 0.014701430685818195, "learning_rate": 4.0640980312685944e-05, "loss": 0.10860245227813721, "step": 138670 }, { "epoch": 0.5953822243974481, "grad_norm": 0.00810084119439125, "learning_rate": 4.063666859256832e-05, "loss": 0.01611505150794983, "step": 138680 }, { "epoch": 0.5954251564874681, "grad_norm": 0.4513009190559387, "learning_rate": 4.06323568724507e-05, "loss": 0.1624898910522461, "step": 138690 }, { "epoch": 0.5954680885774881, "grad_norm": 0.0283687524497509, "learning_rate": 4.062804515233307e-05, "loss": 0.2079389810562134, "step": 138700 }, { "epoch": 0.5955110206675082, "grad_norm": 1.9030858278274536, "learning_rate": 4.062373343221545e-05, "loss": 0.03300126194953919, "step": 138710 }, { "epoch": 0.5955539527575281, "grad_norm": 0.001472063479013741, "learning_rate": 4.0619421712097824e-05, "loss": 0.26835658550262453, "step": 138720 }, { "epoch": 0.5955968848475481, "grad_norm": 0.013345958665013313, "learning_rate": 4.06151099919802e-05, "loss": 0.24954426288604736, "step": 138730 }, { "epoch": 0.5956398169375682, "grad_norm": 0.07247161865234375, "learning_rate": 4.061079827186258e-05, "loss": 0.13264119625091553, "step": 138740 }, { "epoch": 0.5956827490275881, "grad_norm": 0.723473072052002, "learning_rate": 4.0606486551744956e-05, "loss": 0.22525534629821778, "step": 138750 }, { "epoch": 0.5957256811176082, "grad_norm": 1.1872649192810059, "learning_rate": 4.0602174831627333e-05, "loss": 0.3446241617202759, "step": 138760 }, { "epoch": 0.5957686132076282, "grad_norm": 0.7071019411087036, "learning_rate": 4.059786311150971e-05, "loss": 0.2649399995803833, "step": 138770 }, { "epoch": 0.5958115452976481, "grad_norm": 0.024992501363158226, "learning_rate": 4.059355139139208e-05, "loss": 0.18880668878555298, "step": 138780 }, { "epoch": 0.5958544773876682, "grad_norm": 3.2234370708465576, "learning_rate": 4.058923967127446e-05, "loss": 0.1728742837905884, "step": 138790 }, { "epoch": 0.5958974094776882, "grad_norm": 13.19248104095459, "learning_rate": 4.0584927951156836e-05, "loss": 0.24392056465148926, "step": 138800 }, { "epoch": 0.5959403415677083, "grad_norm": 2.747596025466919, "learning_rate": 4.058061623103921e-05, "loss": 0.28858864307403564, "step": 138810 }, { "epoch": 0.5959832736577282, "grad_norm": 0.17838746309280396, "learning_rate": 4.0576304510921584e-05, "loss": 0.27601745128631594, "step": 138820 }, { "epoch": 0.5960262057477482, "grad_norm": 0.02302752248942852, "learning_rate": 4.057199279080396e-05, "loss": 0.2467722177505493, "step": 138830 }, { "epoch": 0.5960691378377683, "grad_norm": 0.04111037775874138, "learning_rate": 4.056768107068634e-05, "loss": 0.32037968635559083, "step": 138840 }, { "epoch": 0.5961120699277882, "grad_norm": 0.021072717383503914, "learning_rate": 4.056336935056872e-05, "loss": 0.28800349235534667, "step": 138850 }, { "epoch": 0.5961550020178082, "grad_norm": 5.909653663635254, "learning_rate": 4.055905763045109e-05, "loss": 0.3441181659698486, "step": 138860 }, { "epoch": 0.5961979341078283, "grad_norm": 9.896764755249023, "learning_rate": 4.055474591033347e-05, "loss": 0.1869523286819458, "step": 138870 }, { "epoch": 0.5962408661978482, "grad_norm": 8.402483940124512, "learning_rate": 4.055043419021585e-05, "loss": 0.2832054138183594, "step": 138880 }, { "epoch": 0.5962837982878683, "grad_norm": 0.009975524619221687, "learning_rate": 4.0546122470098225e-05, "loss": 0.15514372587203978, "step": 138890 }, { "epoch": 0.5963267303778883, "grad_norm": 0.10215646028518677, "learning_rate": 4.0541810749980596e-05, "loss": 0.3102398872375488, "step": 138900 }, { "epoch": 0.5963696624679082, "grad_norm": 0.17041859030723572, "learning_rate": 4.053749902986297e-05, "loss": 0.27873940467834474, "step": 138910 }, { "epoch": 0.5964125945579283, "grad_norm": 2.4230082035064697, "learning_rate": 4.053318730974535e-05, "loss": 0.21358671188354492, "step": 138920 }, { "epoch": 0.5964555266479483, "grad_norm": 0.0803167000412941, "learning_rate": 4.052887558962773e-05, "loss": 0.07980765104293823, "step": 138930 }, { "epoch": 0.5964984587379683, "grad_norm": 23.38479995727539, "learning_rate": 4.05245638695101e-05, "loss": 0.06409929990768433, "step": 138940 }, { "epoch": 0.5965413908279883, "grad_norm": 0.002030240371823311, "learning_rate": 4.0520252149392476e-05, "loss": 0.17009581327438356, "step": 138950 }, { "epoch": 0.5965843229180083, "grad_norm": 0.023227572441101074, "learning_rate": 4.051594042927486e-05, "loss": 0.34738569259643554, "step": 138960 }, { "epoch": 0.5966272550080283, "grad_norm": 1.8141781091690063, "learning_rate": 4.051162870915724e-05, "loss": 0.24157049655914306, "step": 138970 }, { "epoch": 0.5966701870980483, "grad_norm": 1.13673734664917, "learning_rate": 4.0507316989039615e-05, "loss": 0.13472883701324462, "step": 138980 }, { "epoch": 0.5967131191880684, "grad_norm": 0.0068895393051207066, "learning_rate": 4.0503005268921985e-05, "loss": 0.24904513359069824, "step": 138990 }, { "epoch": 0.5967560512780883, "grad_norm": 0.01904173754155636, "learning_rate": 4.049869354880436e-05, "loss": 0.3177989721298218, "step": 139000 }, { "epoch": 0.5967560512780883, "eval_loss": 0.4010623097419739, "eval_runtime": 27.1679, "eval_samples_per_second": 3.681, "eval_steps_per_second": 3.681, "step": 139000 }, { "epoch": 0.5967989833681083, "grad_norm": 0.02827509678900242, "learning_rate": 4.049438182868674e-05, "loss": 0.1803138494491577, "step": 139010 }, { "epoch": 0.5968419154581284, "grad_norm": 0.09278185665607452, "learning_rate": 4.049007010856912e-05, "loss": 0.08599947690963745, "step": 139020 }, { "epoch": 0.5968848475481483, "grad_norm": 2.0457799434661865, "learning_rate": 4.048575838845149e-05, "loss": 0.1696843385696411, "step": 139030 }, { "epoch": 0.5969277796381683, "grad_norm": 0.28158533573150635, "learning_rate": 4.0481446668333865e-05, "loss": 0.31057536602020264, "step": 139040 }, { "epoch": 0.5969707117281884, "grad_norm": 1.8282661437988281, "learning_rate": 4.047713494821624e-05, "loss": 0.20569372177124023, "step": 139050 }, { "epoch": 0.5970136438182083, "grad_norm": 0.027583178132772446, "learning_rate": 4.047282322809862e-05, "loss": 0.32853972911834717, "step": 139060 }, { "epoch": 0.5970565759082284, "grad_norm": 0.039123062044382095, "learning_rate": 4.0468511507981e-05, "loss": 0.14497545957565308, "step": 139070 }, { "epoch": 0.5970995079982484, "grad_norm": 0.0013616773067042232, "learning_rate": 4.0464199787863375e-05, "loss": 0.2893491268157959, "step": 139080 }, { "epoch": 0.5971424400882683, "grad_norm": 0.20029796659946442, "learning_rate": 4.045988806774575e-05, "loss": 0.18861448764801025, "step": 139090 }, { "epoch": 0.5971853721782884, "grad_norm": 0.01592946983873844, "learning_rate": 4.045557634762813e-05, "loss": 0.13244056701660156, "step": 139100 }, { "epoch": 0.5972283042683084, "grad_norm": 4.180685520172119, "learning_rate": 4.04512646275105e-05, "loss": 0.4059516429901123, "step": 139110 }, { "epoch": 0.5972712363583284, "grad_norm": 1.714571475982666, "learning_rate": 4.044695290739288e-05, "loss": 0.38700339794158933, "step": 139120 }, { "epoch": 0.5973141684483484, "grad_norm": 8.81942081451416, "learning_rate": 4.0442641187275254e-05, "loss": 0.07162479162216187, "step": 139130 }, { "epoch": 0.5973571005383684, "grad_norm": 4.870468616485596, "learning_rate": 4.043832946715763e-05, "loss": 0.0521313488483429, "step": 139140 }, { "epoch": 0.5974000326283884, "grad_norm": 0.16712331771850586, "learning_rate": 4.043401774704e-05, "loss": 0.24226417541503906, "step": 139150 }, { "epoch": 0.5974429647184084, "grad_norm": 3.7403082847595215, "learning_rate": 4.042970602692238e-05, "loss": 0.2585084676742554, "step": 139160 }, { "epoch": 0.5974858968084285, "grad_norm": 1.1525099277496338, "learning_rate": 4.042539430680476e-05, "loss": 0.2858480453491211, "step": 139170 }, { "epoch": 0.5975288288984484, "grad_norm": 0.0030668508261442184, "learning_rate": 4.0421082586687134e-05, "loss": 0.08854332566261292, "step": 139180 }, { "epoch": 0.5975717609884684, "grad_norm": 8.544142723083496, "learning_rate": 4.041677086656951e-05, "loss": 0.20749473571777344, "step": 139190 }, { "epoch": 0.5976146930784885, "grad_norm": 0.05123743787407875, "learning_rate": 4.041245914645189e-05, "loss": 0.28833374977111814, "step": 139200 }, { "epoch": 0.5976576251685084, "grad_norm": 0.4078068137168884, "learning_rate": 4.0408147426334266e-05, "loss": 0.15883285999298097, "step": 139210 }, { "epoch": 0.5977005572585284, "grad_norm": 0.5867086052894592, "learning_rate": 4.0403835706216644e-05, "loss": 0.0185800701379776, "step": 139220 }, { "epoch": 0.5977434893485485, "grad_norm": 0.020372772589325905, "learning_rate": 4.0399523986099014e-05, "loss": 0.14691412448883057, "step": 139230 }, { "epoch": 0.5977864214385685, "grad_norm": 2.4413959980010986, "learning_rate": 4.039521226598139e-05, "loss": 0.04680218100547791, "step": 139240 }, { "epoch": 0.5978293535285885, "grad_norm": 0.1127878874540329, "learning_rate": 4.039090054586377e-05, "loss": 0.24358739852905273, "step": 139250 }, { "epoch": 0.5978722856186085, "grad_norm": 0.0011141104623675346, "learning_rate": 4.0386588825746146e-05, "loss": 0.11816627979278564, "step": 139260 }, { "epoch": 0.5979152177086285, "grad_norm": 0.07920151203870773, "learning_rate": 4.038227710562852e-05, "loss": 0.19519985914230348, "step": 139270 }, { "epoch": 0.5979581497986485, "grad_norm": 3.432372808456421, "learning_rate": 4.0377965385510894e-05, "loss": 0.16987917423248292, "step": 139280 }, { "epoch": 0.5980010818886685, "grad_norm": 0.006460071075707674, "learning_rate": 4.037365366539327e-05, "loss": 0.1509072184562683, "step": 139290 }, { "epoch": 0.5980440139786886, "grad_norm": 10.475028991699219, "learning_rate": 4.036934194527565e-05, "loss": 0.16783927679061889, "step": 139300 }, { "epoch": 0.5980869460687085, "grad_norm": 0.22108608484268188, "learning_rate": 4.0365030225158026e-05, "loss": 0.22498059272766113, "step": 139310 }, { "epoch": 0.5981298781587285, "grad_norm": 1.1432753801345825, "learning_rate": 4.0360718505040404e-05, "loss": 0.2228973388671875, "step": 139320 }, { "epoch": 0.5981728102487486, "grad_norm": 1.3360220193862915, "learning_rate": 4.035640678492278e-05, "loss": 0.1929041028022766, "step": 139330 }, { "epoch": 0.5982157423387685, "grad_norm": 0.007678360678255558, "learning_rate": 4.035209506480516e-05, "loss": 0.11292246580123902, "step": 139340 }, { "epoch": 0.5982586744287886, "grad_norm": 1.5282012224197388, "learning_rate": 4.0347783344687536e-05, "loss": 0.3063904523849487, "step": 139350 }, { "epoch": 0.5983016065188086, "grad_norm": 0.27093306183815, "learning_rate": 4.0343471624569906e-05, "loss": 0.13120408058166505, "step": 139360 }, { "epoch": 0.5983445386088285, "grad_norm": 1.001611351966858, "learning_rate": 4.0339159904452284e-05, "loss": 0.11028392314910888, "step": 139370 }, { "epoch": 0.5983874706988486, "grad_norm": 0.1968022882938385, "learning_rate": 4.033484818433466e-05, "loss": 0.35245230197906496, "step": 139380 }, { "epoch": 0.5984304027888686, "grad_norm": 0.031681448221206665, "learning_rate": 4.033053646421704e-05, "loss": 0.22298216819763184, "step": 139390 }, { "epoch": 0.5984733348788885, "grad_norm": 1.4180032014846802, "learning_rate": 4.032622474409941e-05, "loss": 0.31283843517303467, "step": 139400 }, { "epoch": 0.5985162669689086, "grad_norm": 0.010278967209160328, "learning_rate": 4.0321913023981786e-05, "loss": 0.31977884769439696, "step": 139410 }, { "epoch": 0.5985591990589286, "grad_norm": 0.08360231667757034, "learning_rate": 4.0317601303864164e-05, "loss": 0.2159090518951416, "step": 139420 }, { "epoch": 0.5986021311489486, "grad_norm": 0.27361536026000977, "learning_rate": 4.031328958374654e-05, "loss": 0.1017007827758789, "step": 139430 }, { "epoch": 0.5986450632389686, "grad_norm": 0.026083212345838547, "learning_rate": 4.030897786362892e-05, "loss": 0.2089712142944336, "step": 139440 }, { "epoch": 0.5986879953289886, "grad_norm": 1.9075127840042114, "learning_rate": 4.0304666143511296e-05, "loss": 0.2128284215927124, "step": 139450 }, { "epoch": 0.5987309274190086, "grad_norm": 0.016359496861696243, "learning_rate": 4.030035442339367e-05, "loss": 0.09640793800354004, "step": 139460 }, { "epoch": 0.5987738595090286, "grad_norm": 0.010604800656437874, "learning_rate": 4.029604270327605e-05, "loss": 0.3107742786407471, "step": 139470 }, { "epoch": 0.5988167915990487, "grad_norm": 4.734330177307129, "learning_rate": 4.029173098315842e-05, "loss": 0.3754775047302246, "step": 139480 }, { "epoch": 0.5988597236890686, "grad_norm": 4.360969066619873, "learning_rate": 4.02874192630408e-05, "loss": 0.45675835609436033, "step": 139490 }, { "epoch": 0.5989026557790886, "grad_norm": 0.01934775337576866, "learning_rate": 4.0283107542923176e-05, "loss": 0.2801548957824707, "step": 139500 }, { "epoch": 0.5989455878691087, "grad_norm": 1.2892874479293823, "learning_rate": 4.027879582280555e-05, "loss": 0.11840264797210694, "step": 139510 }, { "epoch": 0.5989885199591286, "grad_norm": 0.0935051441192627, "learning_rate": 4.0274484102687923e-05, "loss": 0.11873539686203002, "step": 139520 }, { "epoch": 0.5990314520491486, "grad_norm": 0.010609208606183529, "learning_rate": 4.02701723825703e-05, "loss": 0.12016826868057251, "step": 139530 }, { "epoch": 0.5990743841391687, "grad_norm": 0.0024884147569537163, "learning_rate": 4.026586066245268e-05, "loss": 0.13507968187332153, "step": 139540 }, { "epoch": 0.5991173162291886, "grad_norm": 0.003081399481743574, "learning_rate": 4.026154894233506e-05, "loss": 0.07944802641868591, "step": 139550 }, { "epoch": 0.5991602483192087, "grad_norm": 1.4031720161437988, "learning_rate": 4.025723722221743e-05, "loss": 0.28442862033843996, "step": 139560 }, { "epoch": 0.5992031804092287, "grad_norm": 0.6752078533172607, "learning_rate": 4.025292550209981e-05, "loss": 0.26588103771209715, "step": 139570 }, { "epoch": 0.5992461124992486, "grad_norm": 0.8555001616477966, "learning_rate": 4.024861378198219e-05, "loss": 0.11324497461318969, "step": 139580 }, { "epoch": 0.5992890445892687, "grad_norm": 0.08063609153032303, "learning_rate": 4.0244302061864565e-05, "loss": 0.20769956111907958, "step": 139590 }, { "epoch": 0.5993319766792887, "grad_norm": 0.008143313229084015, "learning_rate": 4.0239990341746935e-05, "loss": 0.07645671367645264, "step": 139600 }, { "epoch": 0.5993749087693087, "grad_norm": 0.04053580388426781, "learning_rate": 4.023567862162931e-05, "loss": 0.13342757225036622, "step": 139610 }, { "epoch": 0.5994178408593287, "grad_norm": 2.0824387073516846, "learning_rate": 4.023136690151169e-05, "loss": 0.24516801834106444, "step": 139620 }, { "epoch": 0.5994607729493487, "grad_norm": 0.0006679339567199349, "learning_rate": 4.022705518139407e-05, "loss": 0.1988581895828247, "step": 139630 }, { "epoch": 0.5995037050393687, "grad_norm": 0.024627642706036568, "learning_rate": 4.022274346127644e-05, "loss": 0.2887857437133789, "step": 139640 }, { "epoch": 0.5995466371293887, "grad_norm": 0.0028000574093312025, "learning_rate": 4.0218431741158815e-05, "loss": 0.30062689781188967, "step": 139650 }, { "epoch": 0.5995895692194088, "grad_norm": 0.7627494931221008, "learning_rate": 4.02141200210412e-05, "loss": 0.425076961517334, "step": 139660 }, { "epoch": 0.5996325013094288, "grad_norm": 0.004241509363055229, "learning_rate": 4.020980830092358e-05, "loss": 0.044894880056381224, "step": 139670 }, { "epoch": 0.5996754333994487, "grad_norm": 0.020708652213215828, "learning_rate": 4.0205496580805954e-05, "loss": 0.2606109619140625, "step": 139680 }, { "epoch": 0.5997183654894688, "grad_norm": 3.3840079307556152, "learning_rate": 4.0201184860688325e-05, "loss": 0.28748269081115724, "step": 139690 }, { "epoch": 0.5997612975794888, "grad_norm": 0.025514988228678703, "learning_rate": 4.01968731405707e-05, "loss": 0.1521053433418274, "step": 139700 }, { "epoch": 0.5998042296695087, "grad_norm": 0.3507746458053589, "learning_rate": 4.019256142045308e-05, "loss": 0.183748197555542, "step": 139710 }, { "epoch": 0.5998471617595288, "grad_norm": 1.4777569770812988, "learning_rate": 4.018824970033546e-05, "loss": 0.20436084270477295, "step": 139720 }, { "epoch": 0.5998900938495488, "grad_norm": 2.184016227722168, "learning_rate": 4.018393798021783e-05, "loss": 0.32288227081298826, "step": 139730 }, { "epoch": 0.5999330259395688, "grad_norm": 3.9362363815307617, "learning_rate": 4.0179626260100205e-05, "loss": 0.18843557834625244, "step": 139740 }, { "epoch": 0.5999759580295888, "grad_norm": 2.1692709922790527, "learning_rate": 4.017531453998258e-05, "loss": 0.41624040603637696, "step": 139750 }, { "epoch": 0.6000188901196088, "grad_norm": 0.14851386845111847, "learning_rate": 4.017100281986496e-05, "loss": 0.21869902610778807, "step": 139760 }, { "epoch": 0.6000618222096288, "grad_norm": 0.011462419293820858, "learning_rate": 4.016669109974734e-05, "loss": 0.14370282888412475, "step": 139770 }, { "epoch": 0.6001047542996488, "grad_norm": 0.0862903892993927, "learning_rate": 4.0162379379629714e-05, "loss": 0.2030428409576416, "step": 139780 }, { "epoch": 0.6001476863896689, "grad_norm": 0.006694111507385969, "learning_rate": 4.015806765951209e-05, "loss": 0.10970304012298585, "step": 139790 }, { "epoch": 0.6001906184796888, "grad_norm": 0.04753762483596802, "learning_rate": 4.015375593939447e-05, "loss": 0.20441656112670897, "step": 139800 }, { "epoch": 0.6002335505697088, "grad_norm": 0.1337500959634781, "learning_rate": 4.014944421927684e-05, "loss": 0.18382371664047242, "step": 139810 }, { "epoch": 0.6002764826597289, "grad_norm": 8.118321418762207, "learning_rate": 4.0145132499159217e-05, "loss": 0.4093769073486328, "step": 139820 }, { "epoch": 0.6003194147497488, "grad_norm": 0.02916407212615013, "learning_rate": 4.0140820779041594e-05, "loss": 0.24807960987091066, "step": 139830 }, { "epoch": 0.6003623468397689, "grad_norm": 0.6688809990882874, "learning_rate": 4.013650905892397e-05, "loss": 0.1378745436668396, "step": 139840 }, { "epoch": 0.6004052789297889, "grad_norm": 0.007759707979857922, "learning_rate": 4.013219733880634e-05, "loss": 0.17140055894851686, "step": 139850 }, { "epoch": 0.6004482110198088, "grad_norm": 0.008196969516575336, "learning_rate": 4.012788561868872e-05, "loss": 0.29869420528411866, "step": 139860 }, { "epoch": 0.6004911431098289, "grad_norm": 0.00213833199813962, "learning_rate": 4.0123573898571097e-05, "loss": 0.2550304889678955, "step": 139870 }, { "epoch": 0.6005340751998489, "grad_norm": 1.2392281293869019, "learning_rate": 4.0119262178453474e-05, "loss": 0.31737627983093264, "step": 139880 }, { "epoch": 0.6005770072898688, "grad_norm": 1.2636436223983765, "learning_rate": 4.011495045833585e-05, "loss": 0.3847317695617676, "step": 139890 }, { "epoch": 0.6006199393798889, "grad_norm": 0.0047286092303693295, "learning_rate": 4.011063873821823e-05, "loss": 0.17933709621429444, "step": 139900 }, { "epoch": 0.6006628714699089, "grad_norm": 0.007108923979103565, "learning_rate": 4.0106327018100606e-05, "loss": 0.16458051204681395, "step": 139910 }, { "epoch": 0.6007058035599289, "grad_norm": 0.6757831573486328, "learning_rate": 4.010201529798298e-05, "loss": 0.2534809589385986, "step": 139920 }, { "epoch": 0.6007487356499489, "grad_norm": 0.1733238697052002, "learning_rate": 4.0097703577865354e-05, "loss": 0.24300312995910645, "step": 139930 }, { "epoch": 0.600791667739969, "grad_norm": 0.0027367551811039448, "learning_rate": 4.009339185774773e-05, "loss": 0.14856563806533812, "step": 139940 }, { "epoch": 0.6008345998299889, "grad_norm": 0.24011345207691193, "learning_rate": 4.008908013763011e-05, "loss": 0.057327890396118165, "step": 139950 }, { "epoch": 0.6008775319200089, "grad_norm": 0.11384332180023193, "learning_rate": 4.0084768417512486e-05, "loss": 0.18769536018371583, "step": 139960 }, { "epoch": 0.600920464010029, "grad_norm": 0.014897527173161507, "learning_rate": 4.0080456697394856e-05, "loss": 0.18856242895126343, "step": 139970 }, { "epoch": 0.6009633961000489, "grad_norm": 0.5122405290603638, "learning_rate": 4.0076144977277234e-05, "loss": 0.12947138547897338, "step": 139980 }, { "epoch": 0.6010063281900689, "grad_norm": 0.085267573595047, "learning_rate": 4.007183325715961e-05, "loss": 0.23959081172943114, "step": 139990 }, { "epoch": 0.601049260280089, "grad_norm": 1.2138646841049194, "learning_rate": 4.006752153704199e-05, "loss": 0.24218153953552246, "step": 140000 }, { "epoch": 0.601049260280089, "eval_loss": 0.40640848875045776, "eval_runtime": 27.2521, "eval_samples_per_second": 3.669, "eval_steps_per_second": 3.669, "step": 140000 }, { "epoch": 0.6010921923701089, "grad_norm": 2.4744691848754883, "learning_rate": 4.0063209816924366e-05, "loss": 0.21239218711853028, "step": 140010 }, { "epoch": 0.601135124460129, "grad_norm": 0.18298260867595673, "learning_rate": 4.005889809680674e-05, "loss": 0.1979839324951172, "step": 140020 }, { "epoch": 0.601178056550149, "grad_norm": 0.24607233703136444, "learning_rate": 4.005458637668912e-05, "loss": 0.14506577253341674, "step": 140030 }, { "epoch": 0.6012209886401689, "grad_norm": 1.946406602859497, "learning_rate": 4.00502746565715e-05, "loss": 0.19754066467285156, "step": 140040 }, { "epoch": 0.601263920730189, "grad_norm": 0.16587841510772705, "learning_rate": 4.0045962936453875e-05, "loss": 0.20553255081176758, "step": 140050 }, { "epoch": 0.601306852820209, "grad_norm": 0.08923459053039551, "learning_rate": 4.0041651216336246e-05, "loss": 0.2600035429000854, "step": 140060 }, { "epoch": 0.601349784910229, "grad_norm": 0.17558036744594574, "learning_rate": 4.003733949621862e-05, "loss": 0.29572343826293945, "step": 140070 }, { "epoch": 0.601392717000249, "grad_norm": 0.0013027727836742997, "learning_rate": 4.0033027776101e-05, "loss": 0.058405238389968875, "step": 140080 }, { "epoch": 0.601435649090269, "grad_norm": 0.0022462320048362017, "learning_rate": 4.002871605598338e-05, "loss": 0.28202013969421386, "step": 140090 }, { "epoch": 0.6014785811802891, "grad_norm": 0.0036439145915210247, "learning_rate": 4.002440433586575e-05, "loss": 0.160456120967865, "step": 140100 }, { "epoch": 0.601521513270309, "grad_norm": 1.2931004762649536, "learning_rate": 4.0020092615748126e-05, "loss": 0.2139185905456543, "step": 140110 }, { "epoch": 0.601564445360329, "grad_norm": 4.611878395080566, "learning_rate": 4.00157808956305e-05, "loss": 0.3115818738937378, "step": 140120 }, { "epoch": 0.6016073774503491, "grad_norm": 0.5753462314605713, "learning_rate": 4.001146917551288e-05, "loss": 0.21315619945526124, "step": 140130 }, { "epoch": 0.601650309540369, "grad_norm": 0.18079976737499237, "learning_rate": 4.000715745539526e-05, "loss": 0.09467348456382751, "step": 140140 }, { "epoch": 0.6016932416303891, "grad_norm": 0.18340753018856049, "learning_rate": 4.0002845735277635e-05, "loss": 0.20837154388427734, "step": 140150 }, { "epoch": 0.6017361737204091, "grad_norm": 1.1413424015045166, "learning_rate": 3.999853401516001e-05, "loss": 0.11949852705001832, "step": 140160 }, { "epoch": 0.601779105810429, "grad_norm": 5.983482360839844, "learning_rate": 3.999422229504239e-05, "loss": 0.3660741329193115, "step": 140170 }, { "epoch": 0.6018220379004491, "grad_norm": 0.005374810192734003, "learning_rate": 3.998991057492476e-05, "loss": 0.2321415901184082, "step": 140180 }, { "epoch": 0.6018649699904691, "grad_norm": 0.2554090917110443, "learning_rate": 3.998559885480714e-05, "loss": 0.19533088207244872, "step": 140190 }, { "epoch": 0.601907902080489, "grad_norm": 0.40389445424079895, "learning_rate": 3.9981287134689515e-05, "loss": 0.17982652187347412, "step": 140200 }, { "epoch": 0.6019508341705091, "grad_norm": 0.14264102280139923, "learning_rate": 3.997697541457189e-05, "loss": 0.11623998880386352, "step": 140210 }, { "epoch": 0.6019937662605291, "grad_norm": 11.018896102905273, "learning_rate": 3.997266369445426e-05, "loss": 0.29056406021118164, "step": 140220 }, { "epoch": 0.6020366983505491, "grad_norm": 0.019281527027487755, "learning_rate": 3.996835197433664e-05, "loss": 0.13616764545440674, "step": 140230 }, { "epoch": 0.6020796304405691, "grad_norm": 6.079987049102783, "learning_rate": 3.996404025421902e-05, "loss": 0.2820961236953735, "step": 140240 }, { "epoch": 0.6021225625305892, "grad_norm": 0.09342295676469803, "learning_rate": 3.9959728534101395e-05, "loss": 0.393782639503479, "step": 140250 }, { "epoch": 0.6021654946206091, "grad_norm": 0.13038615882396698, "learning_rate": 3.995541681398377e-05, "loss": 0.08735232949256896, "step": 140260 }, { "epoch": 0.6022084267106291, "grad_norm": 0.2240753024816513, "learning_rate": 3.995110509386615e-05, "loss": 0.5003287315368652, "step": 140270 }, { "epoch": 0.6022513588006492, "grad_norm": 0.0016490390989929438, "learning_rate": 3.994679337374853e-05, "loss": 0.23796720504760743, "step": 140280 }, { "epoch": 0.6022942908906691, "grad_norm": 0.2137441635131836, "learning_rate": 3.9942481653630904e-05, "loss": 0.06123405694961548, "step": 140290 }, { "epoch": 0.6023372229806891, "grad_norm": 2.8598108291625977, "learning_rate": 3.9938169933513275e-05, "loss": 0.14074127674102782, "step": 140300 }, { "epoch": 0.6023801550707092, "grad_norm": 2.1544981002807617, "learning_rate": 3.993385821339565e-05, "loss": 0.21434509754180908, "step": 140310 }, { "epoch": 0.6024230871607291, "grad_norm": 9.761442184448242, "learning_rate": 3.992954649327803e-05, "loss": 0.43427672386169436, "step": 140320 }, { "epoch": 0.6024660192507492, "grad_norm": 1.8055157661437988, "learning_rate": 3.992523477316041e-05, "loss": 0.2403231143951416, "step": 140330 }, { "epoch": 0.6025089513407692, "grad_norm": 0.7270634770393372, "learning_rate": 3.992092305304278e-05, "loss": 0.11149526834487915, "step": 140340 }, { "epoch": 0.6025518834307891, "grad_norm": 8.909162521362305, "learning_rate": 3.9916611332925155e-05, "loss": 0.2438430070877075, "step": 140350 }, { "epoch": 0.6025948155208092, "grad_norm": 1.5331367254257202, "learning_rate": 3.991229961280753e-05, "loss": 0.4124717712402344, "step": 140360 }, { "epoch": 0.6026377476108292, "grad_norm": 3.782731771469116, "learning_rate": 3.9907987892689916e-05, "loss": 0.3205994129180908, "step": 140370 }, { "epoch": 0.6026806797008492, "grad_norm": 0.04977316036820412, "learning_rate": 3.990367617257229e-05, "loss": 0.26922576427459716, "step": 140380 }, { "epoch": 0.6027236117908692, "grad_norm": 0.2401779443025589, "learning_rate": 3.9899364452454664e-05, "loss": 0.4137077331542969, "step": 140390 }, { "epoch": 0.6027665438808892, "grad_norm": 0.03330448642373085, "learning_rate": 3.989505273233704e-05, "loss": 0.19204211235046387, "step": 140400 }, { "epoch": 0.6028094759709092, "grad_norm": 0.19992871582508087, "learning_rate": 3.989074101221942e-05, "loss": 0.11128789186477661, "step": 140410 }, { "epoch": 0.6028524080609292, "grad_norm": 0.02906624972820282, "learning_rate": 3.9886429292101796e-05, "loss": 0.08270058035850525, "step": 140420 }, { "epoch": 0.6028953401509493, "grad_norm": 1.2285542488098145, "learning_rate": 3.988211757198417e-05, "loss": 0.3234747886657715, "step": 140430 }, { "epoch": 0.6029382722409692, "grad_norm": 0.488831102848053, "learning_rate": 3.9877805851866544e-05, "loss": 0.25629940032958987, "step": 140440 }, { "epoch": 0.6029812043309892, "grad_norm": 0.21581482887268066, "learning_rate": 3.987349413174892e-05, "loss": 0.1724982500076294, "step": 140450 }, { "epoch": 0.6030241364210093, "grad_norm": 0.7537409663200378, "learning_rate": 3.98691824116313e-05, "loss": 0.25681922435760496, "step": 140460 }, { "epoch": 0.6030670685110292, "grad_norm": 0.03951547294855118, "learning_rate": 3.986487069151367e-05, "loss": 0.06932097673416138, "step": 140470 }, { "epoch": 0.6031100006010492, "grad_norm": 0.018945086747407913, "learning_rate": 3.9860558971396053e-05, "loss": 0.22218098640441894, "step": 140480 }, { "epoch": 0.6031529326910693, "grad_norm": 0.23054622113704681, "learning_rate": 3.985624725127843e-05, "loss": 0.215470552444458, "step": 140490 }, { "epoch": 0.6031958647810892, "grad_norm": 0.20627227425575256, "learning_rate": 3.985193553116081e-05, "loss": 0.3484283208847046, "step": 140500 }, { "epoch": 0.6032387968711093, "grad_norm": 0.10565587133169174, "learning_rate": 3.984762381104318e-05, "loss": 0.21985170841217042, "step": 140510 }, { "epoch": 0.6032817289611293, "grad_norm": 3.1740968227386475, "learning_rate": 3.9843312090925556e-05, "loss": 0.40097837448120116, "step": 140520 }, { "epoch": 0.6033246610511493, "grad_norm": 0.024558668956160545, "learning_rate": 3.9839000370807933e-05, "loss": 0.19059033393859864, "step": 140530 }, { "epoch": 0.6033675931411693, "grad_norm": 0.9281611442565918, "learning_rate": 3.983468865069031e-05, "loss": 0.14230889081954956, "step": 140540 }, { "epoch": 0.6034105252311893, "grad_norm": 0.009123001247644424, "learning_rate": 3.983037693057268e-05, "loss": 0.2806967496871948, "step": 140550 }, { "epoch": 0.6034534573212094, "grad_norm": 1.394128680229187, "learning_rate": 3.982606521045506e-05, "loss": 0.15292155742645264, "step": 140560 }, { "epoch": 0.6034963894112293, "grad_norm": 3.4160406589508057, "learning_rate": 3.9821753490337436e-05, "loss": 0.3051342248916626, "step": 140570 }, { "epoch": 0.6035393215012493, "grad_norm": 0.4787931740283966, "learning_rate": 3.981744177021981e-05, "loss": 0.19603606462478637, "step": 140580 }, { "epoch": 0.6035822535912694, "grad_norm": 0.012746613472700119, "learning_rate": 3.981313005010219e-05, "loss": 0.14620459079742432, "step": 140590 }, { "epoch": 0.6036251856812893, "grad_norm": 0.11777313798666, "learning_rate": 3.980881832998457e-05, "loss": 0.43863778114318847, "step": 140600 }, { "epoch": 0.6036681177713094, "grad_norm": 0.0012925468618050218, "learning_rate": 3.9804506609866945e-05, "loss": 0.27455117702484133, "step": 140610 }, { "epoch": 0.6037110498613294, "grad_norm": 0.625106930732727, "learning_rate": 3.980019488974932e-05, "loss": 0.250502610206604, "step": 140620 }, { "epoch": 0.6037539819513493, "grad_norm": 0.003046454396098852, "learning_rate": 3.979588316963169e-05, "loss": 0.021387167274951935, "step": 140630 }, { "epoch": 0.6037969140413694, "grad_norm": 0.2070937603712082, "learning_rate": 3.979157144951407e-05, "loss": 0.2071608781814575, "step": 140640 }, { "epoch": 0.6038398461313894, "grad_norm": 0.05185168609023094, "learning_rate": 3.978725972939645e-05, "loss": 0.0429812103509903, "step": 140650 }, { "epoch": 0.6038827782214093, "grad_norm": 0.05144479125738144, "learning_rate": 3.9782948009278825e-05, "loss": 0.3867565870285034, "step": 140660 }, { "epoch": 0.6039257103114294, "grad_norm": 1.467725396156311, "learning_rate": 3.9778636289161196e-05, "loss": 0.09524292349815369, "step": 140670 }, { "epoch": 0.6039686424014494, "grad_norm": 0.16519929468631744, "learning_rate": 3.977432456904357e-05, "loss": 0.3241549015045166, "step": 140680 }, { "epoch": 0.6040115744914694, "grad_norm": 16.647319793701172, "learning_rate": 3.977001284892595e-05, "loss": 0.3598466396331787, "step": 140690 }, { "epoch": 0.6040545065814894, "grad_norm": 0.0008348033879883587, "learning_rate": 3.976570112880833e-05, "loss": 0.22508018016815184, "step": 140700 }, { "epoch": 0.6040974386715094, "grad_norm": 0.00723852077499032, "learning_rate": 3.9761389408690705e-05, "loss": 0.23433120250701905, "step": 140710 }, { "epoch": 0.6041403707615294, "grad_norm": 0.5035370588302612, "learning_rate": 3.975707768857308e-05, "loss": 0.15878416299819947, "step": 140720 }, { "epoch": 0.6041833028515494, "grad_norm": 0.12092899531126022, "learning_rate": 3.975276596845546e-05, "loss": 0.16906187534332276, "step": 140730 }, { "epoch": 0.6042262349415695, "grad_norm": 0.40479961037635803, "learning_rate": 3.974845424833784e-05, "loss": 0.20884625911712645, "step": 140740 }, { "epoch": 0.6042691670315894, "grad_norm": 0.9106727838516235, "learning_rate": 3.974414252822021e-05, "loss": 0.23875579833984376, "step": 140750 }, { "epoch": 0.6043120991216094, "grad_norm": 0.01491815596818924, "learning_rate": 3.9739830808102585e-05, "loss": 0.19347492456436158, "step": 140760 }, { "epoch": 0.6043550312116295, "grad_norm": 1.5120779275894165, "learning_rate": 3.973551908798496e-05, "loss": 0.1883600115776062, "step": 140770 }, { "epoch": 0.6043979633016494, "grad_norm": 1.720261573791504, "learning_rate": 3.973120736786734e-05, "loss": 0.41421709060668943, "step": 140780 }, { "epoch": 0.6044408953916695, "grad_norm": 0.0006754127098247409, "learning_rate": 3.972689564774972e-05, "loss": 0.07793102860450744, "step": 140790 }, { "epoch": 0.6044838274816895, "grad_norm": 0.01753504015505314, "learning_rate": 3.972258392763209e-05, "loss": 0.13585551977157592, "step": 140800 }, { "epoch": 0.6045267595717094, "grad_norm": 1.807799220085144, "learning_rate": 3.9718272207514465e-05, "loss": 0.2476651668548584, "step": 140810 }, { "epoch": 0.6045696916617295, "grad_norm": 0.3717762231826782, "learning_rate": 3.971396048739684e-05, "loss": 0.1306217670440674, "step": 140820 }, { "epoch": 0.6046126237517495, "grad_norm": 0.0009543145424686372, "learning_rate": 3.970964876727922e-05, "loss": 0.3181750774383545, "step": 140830 }, { "epoch": 0.6046555558417694, "grad_norm": 1.5006979703903198, "learning_rate": 3.97053370471616e-05, "loss": 0.1982407569885254, "step": 140840 }, { "epoch": 0.6046984879317895, "grad_norm": 0.015589235350489616, "learning_rate": 3.9701025327043975e-05, "loss": 0.026549032330513, "step": 140850 }, { "epoch": 0.6047414200218095, "grad_norm": 0.0038561576511710882, "learning_rate": 3.969671360692635e-05, "loss": 0.2876553773880005, "step": 140860 }, { "epoch": 0.6047843521118295, "grad_norm": 0.00041575246723368764, "learning_rate": 3.969240188680873e-05, "loss": 0.08002186417579651, "step": 140870 }, { "epoch": 0.6048272842018495, "grad_norm": 0.47469818592071533, "learning_rate": 3.96880901666911e-05, "loss": 0.22481439113616944, "step": 140880 }, { "epoch": 0.6048702162918695, "grad_norm": 15.686685562133789, "learning_rate": 3.968377844657348e-05, "loss": 0.25566079616546633, "step": 140890 }, { "epoch": 0.6049131483818895, "grad_norm": 1.3537100553512573, "learning_rate": 3.9679466726455854e-05, "loss": 0.2686317920684814, "step": 140900 }, { "epoch": 0.6049560804719095, "grad_norm": 0.0857565775513649, "learning_rate": 3.967515500633823e-05, "loss": 0.21077220439910888, "step": 140910 }, { "epoch": 0.6049990125619296, "grad_norm": 20.2806339263916, "learning_rate": 3.96708432862206e-05, "loss": 0.3465421199798584, "step": 140920 }, { "epoch": 0.6050419446519495, "grad_norm": 0.06577757745981216, "learning_rate": 3.966653156610298e-05, "loss": 0.10634375810623169, "step": 140930 }, { "epoch": 0.6050848767419695, "grad_norm": 0.6883453726768494, "learning_rate": 3.966221984598536e-05, "loss": 0.1529320001602173, "step": 140940 }, { "epoch": 0.6051278088319896, "grad_norm": 0.33841758966445923, "learning_rate": 3.9657908125867734e-05, "loss": 0.12229356765747071, "step": 140950 }, { "epoch": 0.6051707409220096, "grad_norm": 0.032311972230672836, "learning_rate": 3.965359640575011e-05, "loss": 0.20672781467437745, "step": 140960 }, { "epoch": 0.6052136730120296, "grad_norm": 0.029374683275818825, "learning_rate": 3.964928468563249e-05, "loss": 0.27740681171417236, "step": 140970 }, { "epoch": 0.6052566051020496, "grad_norm": 1.1965817213058472, "learning_rate": 3.9644972965514866e-05, "loss": 0.49901623725891114, "step": 140980 }, { "epoch": 0.6052995371920696, "grad_norm": 1.5310200452804565, "learning_rate": 3.9640661245397244e-05, "loss": 0.13384013175964354, "step": 140990 }, { "epoch": 0.6053424692820896, "grad_norm": 2.602834939956665, "learning_rate": 3.9636349525279614e-05, "loss": 0.12581024169921876, "step": 141000 }, { "epoch": 0.6053424692820896, "eval_loss": 0.4089200496673584, "eval_runtime": 27.1954, "eval_samples_per_second": 3.677, "eval_steps_per_second": 3.677, "step": 141000 }, { "epoch": 0.6053854013721096, "grad_norm": 0.05095406249165535, "learning_rate": 3.963203780516199e-05, "loss": 0.11216226816177369, "step": 141010 }, { "epoch": 0.6054283334621297, "grad_norm": 0.002075101248919964, "learning_rate": 3.962772608504437e-05, "loss": 0.15308403968811035, "step": 141020 }, { "epoch": 0.6054712655521496, "grad_norm": 2.099597454071045, "learning_rate": 3.9623414364926746e-05, "loss": 0.33690292835235597, "step": 141030 }, { "epoch": 0.6055141976421696, "grad_norm": 0.05643483251333237, "learning_rate": 3.961910264480912e-05, "loss": 0.1339421510696411, "step": 141040 }, { "epoch": 0.6055571297321897, "grad_norm": 0.0012016056571155787, "learning_rate": 3.9614790924691494e-05, "loss": 0.1170223593711853, "step": 141050 }, { "epoch": 0.6056000618222096, "grad_norm": 0.9147559404373169, "learning_rate": 3.961047920457387e-05, "loss": 0.4862982749938965, "step": 141060 }, { "epoch": 0.6056429939122296, "grad_norm": 0.005203854292631149, "learning_rate": 3.9606167484456256e-05, "loss": 0.26890594959259034, "step": 141070 }, { "epoch": 0.6056859260022497, "grad_norm": 1.590675950050354, "learning_rate": 3.9601855764338626e-05, "loss": 0.26728928089141846, "step": 141080 }, { "epoch": 0.6057288580922696, "grad_norm": 0.009610733948647976, "learning_rate": 3.9597544044221004e-05, "loss": 0.17799413204193115, "step": 141090 }, { "epoch": 0.6057717901822897, "grad_norm": 0.0018764605047181249, "learning_rate": 3.959323232410338e-05, "loss": 0.1783639073371887, "step": 141100 }, { "epoch": 0.6058147222723097, "grad_norm": 2.180757761001587, "learning_rate": 3.958892060398576e-05, "loss": 0.3086270332336426, "step": 141110 }, { "epoch": 0.6058576543623296, "grad_norm": 0.050399571657180786, "learning_rate": 3.958460888386813e-05, "loss": 0.13583248853683472, "step": 141120 }, { "epoch": 0.6059005864523497, "grad_norm": 0.2559893727302551, "learning_rate": 3.9580297163750506e-05, "loss": 0.23775179386138917, "step": 141130 }, { "epoch": 0.6059435185423697, "grad_norm": 0.005343136377632618, "learning_rate": 3.9575985443632884e-05, "loss": 0.1065142273902893, "step": 141140 }, { "epoch": 0.6059864506323897, "grad_norm": 0.011462339200079441, "learning_rate": 3.957167372351526e-05, "loss": 0.2745601892471313, "step": 141150 }, { "epoch": 0.6060293827224097, "grad_norm": 2.439439535140991, "learning_rate": 3.956736200339764e-05, "loss": 0.18180274963378906, "step": 141160 }, { "epoch": 0.6060723148124297, "grad_norm": 0.012357541359961033, "learning_rate": 3.956305028328001e-05, "loss": 0.18113479614257813, "step": 141170 }, { "epoch": 0.6061152469024497, "grad_norm": 0.0018625404918566346, "learning_rate": 3.955873856316239e-05, "loss": 0.07990905046463012, "step": 141180 }, { "epoch": 0.6061581789924697, "grad_norm": 0.032597437500953674, "learning_rate": 3.955442684304477e-05, "loss": 0.1915552258491516, "step": 141190 }, { "epoch": 0.6062011110824898, "grad_norm": 0.004464549943804741, "learning_rate": 3.955011512292715e-05, "loss": 0.1384279489517212, "step": 141200 }, { "epoch": 0.6062440431725097, "grad_norm": 0.2348497062921524, "learning_rate": 3.954580340280952e-05, "loss": 0.13170111179351807, "step": 141210 }, { "epoch": 0.6062869752625297, "grad_norm": 0.0035776502918452024, "learning_rate": 3.9541491682691896e-05, "loss": 0.2760654926300049, "step": 141220 }, { "epoch": 0.6063299073525498, "grad_norm": 1.5733288526535034, "learning_rate": 3.953717996257427e-05, "loss": 0.22948665618896485, "step": 141230 }, { "epoch": 0.6063728394425697, "grad_norm": 1.8536306619644165, "learning_rate": 3.953286824245665e-05, "loss": 0.33787682056427004, "step": 141240 }, { "epoch": 0.6064157715325897, "grad_norm": 1.747023344039917, "learning_rate": 3.952855652233902e-05, "loss": 0.14881832599639894, "step": 141250 }, { "epoch": 0.6064587036226098, "grad_norm": 0.2886177897453308, "learning_rate": 3.95242448022214e-05, "loss": 0.338626503944397, "step": 141260 }, { "epoch": 0.6065016357126297, "grad_norm": 0.0006161421770229936, "learning_rate": 3.9519933082103775e-05, "loss": 0.11174997091293334, "step": 141270 }, { "epoch": 0.6065445678026498, "grad_norm": 3.2979769706726074, "learning_rate": 3.951562136198615e-05, "loss": 0.18851048946380616, "step": 141280 }, { "epoch": 0.6065874998926698, "grad_norm": 0.11197972297668457, "learning_rate": 3.951130964186853e-05, "loss": 0.10484591722488404, "step": 141290 }, { "epoch": 0.6066304319826897, "grad_norm": 0.16652366518974304, "learning_rate": 3.950699792175091e-05, "loss": 0.0662794828414917, "step": 141300 }, { "epoch": 0.6066733640727098, "grad_norm": 0.09492175281047821, "learning_rate": 3.9502686201633285e-05, "loss": 0.18237814903259278, "step": 141310 }, { "epoch": 0.6067162961627298, "grad_norm": 0.8800640106201172, "learning_rate": 3.949837448151566e-05, "loss": 0.3102026700973511, "step": 141320 }, { "epoch": 0.6067592282527497, "grad_norm": 0.0023318880703300238, "learning_rate": 3.949406276139803e-05, "loss": 0.030414551496505737, "step": 141330 }, { "epoch": 0.6068021603427698, "grad_norm": 0.007045125123113394, "learning_rate": 3.948975104128041e-05, "loss": 0.1279890775680542, "step": 141340 }, { "epoch": 0.6068450924327898, "grad_norm": 1.4796124696731567, "learning_rate": 3.948543932116279e-05, "loss": 0.21116414070129394, "step": 141350 }, { "epoch": 0.6068880245228098, "grad_norm": 0.23528429865837097, "learning_rate": 3.9481127601045165e-05, "loss": 0.12599529027938844, "step": 141360 }, { "epoch": 0.6069309566128298, "grad_norm": 1.651747703552246, "learning_rate": 3.9476815880927535e-05, "loss": 0.37949261665344236, "step": 141370 }, { "epoch": 0.6069738887028499, "grad_norm": 0.061352405697107315, "learning_rate": 3.947250416080991e-05, "loss": 0.17497749328613282, "step": 141380 }, { "epoch": 0.6070168207928699, "grad_norm": 0.0331333689391613, "learning_rate": 3.946819244069229e-05, "loss": 0.11293892860412598, "step": 141390 }, { "epoch": 0.6070597528828898, "grad_norm": 5.993484020233154, "learning_rate": 3.946388072057467e-05, "loss": 0.40219316482543943, "step": 141400 }, { "epoch": 0.6071026849729099, "grad_norm": 0.0046135191805660725, "learning_rate": 3.9459569000457045e-05, "loss": 0.10798419713973999, "step": 141410 }, { "epoch": 0.6071456170629299, "grad_norm": 3.0338857173919678, "learning_rate": 3.945525728033942e-05, "loss": 0.20181164741516114, "step": 141420 }, { "epoch": 0.6071885491529498, "grad_norm": 1.8571960926055908, "learning_rate": 3.94509455602218e-05, "loss": 0.214121413230896, "step": 141430 }, { "epoch": 0.6072314812429699, "grad_norm": 2.187960147857666, "learning_rate": 3.944663384010418e-05, "loss": 0.403093957901001, "step": 141440 }, { "epoch": 0.6072744133329899, "grad_norm": 0.41850295662879944, "learning_rate": 3.944232211998655e-05, "loss": 0.18242256641387938, "step": 141450 }, { "epoch": 0.6073173454230099, "grad_norm": 0.011669473722577095, "learning_rate": 3.9438010399868925e-05, "loss": 0.0749740481376648, "step": 141460 }, { "epoch": 0.6073602775130299, "grad_norm": 0.7934962511062622, "learning_rate": 3.94336986797513e-05, "loss": 0.2681394338607788, "step": 141470 }, { "epoch": 0.6074032096030499, "grad_norm": 0.0006156415329314768, "learning_rate": 3.942938695963368e-05, "loss": 0.16833350658416749, "step": 141480 }, { "epoch": 0.6074461416930699, "grad_norm": 0.021375132724642754, "learning_rate": 3.942507523951606e-05, "loss": 0.23237297534942628, "step": 141490 }, { "epoch": 0.6074890737830899, "grad_norm": 0.5006760358810425, "learning_rate": 3.942076351939843e-05, "loss": 0.26445937156677246, "step": 141500 }, { "epoch": 0.60753200587311, "grad_norm": 0.8343283534049988, "learning_rate": 3.9416451799280805e-05, "loss": 0.3213693380355835, "step": 141510 }, { "epoch": 0.6075749379631299, "grad_norm": 3.1894664764404297, "learning_rate": 3.941214007916318e-05, "loss": 0.2887598514556885, "step": 141520 }, { "epoch": 0.6076178700531499, "grad_norm": 0.023277558386325836, "learning_rate": 3.940782835904556e-05, "loss": 0.2421741008758545, "step": 141530 }, { "epoch": 0.60766080214317, "grad_norm": 0.29840824007987976, "learning_rate": 3.940351663892794e-05, "loss": 0.17128605842590333, "step": 141540 }, { "epoch": 0.6077037342331899, "grad_norm": 0.03273274376988411, "learning_rate": 3.9399204918810314e-05, "loss": 0.5258442878723144, "step": 141550 }, { "epoch": 0.60774666632321, "grad_norm": 0.3960159718990326, "learning_rate": 3.939489319869269e-05, "loss": 0.12907603979110718, "step": 141560 }, { "epoch": 0.60778959841323, "grad_norm": 0.0017227198695763946, "learning_rate": 3.939058147857507e-05, "loss": 0.177092981338501, "step": 141570 }, { "epoch": 0.6078325305032499, "grad_norm": 0.002443228615447879, "learning_rate": 3.938626975845744e-05, "loss": 0.3996154308319092, "step": 141580 }, { "epoch": 0.60787546259327, "grad_norm": 0.15868256986141205, "learning_rate": 3.9381958038339817e-05, "loss": 0.5284997463226319, "step": 141590 }, { "epoch": 0.60791839468329, "grad_norm": 0.03645209223031998, "learning_rate": 3.9377646318222194e-05, "loss": 0.303369140625, "step": 141600 }, { "epoch": 0.6079613267733099, "grad_norm": 1.6641205549240112, "learning_rate": 3.937333459810457e-05, "loss": 0.13876266479492189, "step": 141610 }, { "epoch": 0.60800425886333, "grad_norm": 0.05909671634435654, "learning_rate": 3.936902287798694e-05, "loss": 0.21023335456848144, "step": 141620 }, { "epoch": 0.60804719095335, "grad_norm": 0.0023271851241588593, "learning_rate": 3.936471115786932e-05, "loss": 0.028406143188476562, "step": 141630 }, { "epoch": 0.60809012304337, "grad_norm": 0.004466942045837641, "learning_rate": 3.9360399437751697e-05, "loss": 0.19075484275817872, "step": 141640 }, { "epoch": 0.60813305513339, "grad_norm": 1.219118595123291, "learning_rate": 3.9356087717634074e-05, "loss": 0.07533459663391114, "step": 141650 }, { "epoch": 0.60817598722341, "grad_norm": 0.23978543281555176, "learning_rate": 3.935177599751645e-05, "loss": 0.16630566120147705, "step": 141660 }, { "epoch": 0.60821891931343, "grad_norm": 97.8420639038086, "learning_rate": 3.934746427739883e-05, "loss": 0.40468668937683105, "step": 141670 }, { "epoch": 0.60826185140345, "grad_norm": 0.0013681944692507386, "learning_rate": 3.9343152557281206e-05, "loss": 0.1450944185256958, "step": 141680 }, { "epoch": 0.6083047834934701, "grad_norm": 0.004667914938181639, "learning_rate": 3.933884083716358e-05, "loss": 0.21857268810272218, "step": 141690 }, { "epoch": 0.60834771558349, "grad_norm": 1.4839063882827759, "learning_rate": 3.9334529117045954e-05, "loss": 0.04907590448856354, "step": 141700 }, { "epoch": 0.60839064767351, "grad_norm": 0.12947788834571838, "learning_rate": 3.933021739692833e-05, "loss": 0.36401753425598143, "step": 141710 }, { "epoch": 0.6084335797635301, "grad_norm": 1.1010395288467407, "learning_rate": 3.932590567681071e-05, "loss": 0.36334273815155027, "step": 141720 }, { "epoch": 0.60847651185355, "grad_norm": 1.1198389530181885, "learning_rate": 3.9321593956693086e-05, "loss": 0.12767027616500853, "step": 141730 }, { "epoch": 0.60851944394357, "grad_norm": 6.048811435699463, "learning_rate": 3.9317282236575456e-05, "loss": 0.2986024856567383, "step": 141740 }, { "epoch": 0.6085623760335901, "grad_norm": 0.14294730126857758, "learning_rate": 3.9312970516457834e-05, "loss": 0.2689626455307007, "step": 141750 }, { "epoch": 0.60860530812361, "grad_norm": 19.958438873291016, "learning_rate": 3.930865879634021e-05, "loss": 0.3430215120315552, "step": 141760 }, { "epoch": 0.6086482402136301, "grad_norm": 0.4098569452762604, "learning_rate": 3.9304347076222595e-05, "loss": 0.1653934359550476, "step": 141770 }, { "epoch": 0.6086911723036501, "grad_norm": 0.013435311615467072, "learning_rate": 3.9300035356104966e-05, "loss": 0.2686276912689209, "step": 141780 }, { "epoch": 0.60873410439367, "grad_norm": 1.2856467962265015, "learning_rate": 3.929572363598734e-05, "loss": 0.4310938358306885, "step": 141790 }, { "epoch": 0.6087770364836901, "grad_norm": 0.027985993772745132, "learning_rate": 3.929141191586972e-05, "loss": 0.2021188497543335, "step": 141800 }, { "epoch": 0.6088199685737101, "grad_norm": 0.003142294241115451, "learning_rate": 3.92871001957521e-05, "loss": 0.09466840624809265, "step": 141810 }, { "epoch": 0.6088629006637302, "grad_norm": 1.6350462436676025, "learning_rate": 3.928278847563447e-05, "loss": 0.14503989219665528, "step": 141820 }, { "epoch": 0.6089058327537501, "grad_norm": 0.10769698023796082, "learning_rate": 3.9278476755516846e-05, "loss": 0.1177414059638977, "step": 141830 }, { "epoch": 0.6089487648437701, "grad_norm": 2.373969554901123, "learning_rate": 3.927416503539922e-05, "loss": 0.20538089275360108, "step": 141840 }, { "epoch": 0.6089916969337902, "grad_norm": 0.05289050564169884, "learning_rate": 3.92698533152816e-05, "loss": 0.04121732711791992, "step": 141850 }, { "epoch": 0.6090346290238101, "grad_norm": 2.833883047103882, "learning_rate": 3.926554159516398e-05, "loss": 0.2589694023132324, "step": 141860 }, { "epoch": 0.6090775611138302, "grad_norm": 0.02419205754995346, "learning_rate": 3.926122987504635e-05, "loss": 0.255047082901001, "step": 141870 }, { "epoch": 0.6091204932038502, "grad_norm": 0.03955872729420662, "learning_rate": 3.925691815492873e-05, "loss": 0.19322144985198975, "step": 141880 }, { "epoch": 0.6091634252938701, "grad_norm": 3.4558498859405518, "learning_rate": 3.925260643481111e-05, "loss": 0.1118241548538208, "step": 141890 }, { "epoch": 0.6092063573838902, "grad_norm": 1.9339896440505981, "learning_rate": 3.924829471469349e-05, "loss": 0.30376248359680175, "step": 141900 }, { "epoch": 0.6092492894739102, "grad_norm": 1.3876532316207886, "learning_rate": 3.924398299457586e-05, "loss": 0.29599320888519287, "step": 141910 }, { "epoch": 0.6092922215639301, "grad_norm": 0.10916638374328613, "learning_rate": 3.9239671274458235e-05, "loss": 0.2979604244232178, "step": 141920 }, { "epoch": 0.6093351536539502, "grad_norm": 0.005515061318874359, "learning_rate": 3.923535955434061e-05, "loss": 0.2690037965774536, "step": 141930 }, { "epoch": 0.6093780857439702, "grad_norm": 0.38121163845062256, "learning_rate": 3.923104783422299e-05, "loss": 0.22012245655059814, "step": 141940 }, { "epoch": 0.6094210178339902, "grad_norm": 2.7628302574157715, "learning_rate": 3.922673611410536e-05, "loss": 0.14492123126983641, "step": 141950 }, { "epoch": 0.6094639499240102, "grad_norm": 0.08726619929075241, "learning_rate": 3.922242439398774e-05, "loss": 0.10085601806640625, "step": 141960 }, { "epoch": 0.6095068820140302, "grad_norm": 11.465222358703613, "learning_rate": 3.9218112673870115e-05, "loss": 0.3472140312194824, "step": 141970 }, { "epoch": 0.6095498141040502, "grad_norm": 1.9945697784423828, "learning_rate": 3.921380095375249e-05, "loss": 0.10087604522705078, "step": 141980 }, { "epoch": 0.6095927461940702, "grad_norm": 2.085869312286377, "learning_rate": 3.920948923363487e-05, "loss": 0.2568961620330811, "step": 141990 }, { "epoch": 0.6096356782840903, "grad_norm": 0.0020460772793740034, "learning_rate": 3.920517751351725e-05, "loss": 0.3749083042144775, "step": 142000 }, { "epoch": 0.6096356782840903, "eval_loss": 0.39280468225479126, "eval_runtime": 27.3058, "eval_samples_per_second": 3.662, "eval_steps_per_second": 3.662, "step": 142000 }, { "epoch": 0.6096786103741102, "grad_norm": 7.264312744140625, "learning_rate": 3.9200865793399624e-05, "loss": 0.13132262229919434, "step": 142010 }, { "epoch": 0.6097215424641302, "grad_norm": 0.07945489883422852, "learning_rate": 3.9196554073282e-05, "loss": 0.24379079341888427, "step": 142020 }, { "epoch": 0.6097644745541503, "grad_norm": 1.0327321290969849, "learning_rate": 3.919224235316437e-05, "loss": 0.35031702518463137, "step": 142030 }, { "epoch": 0.6098074066441702, "grad_norm": 4.742274761199951, "learning_rate": 3.918793063304675e-05, "loss": 0.30789053440093994, "step": 142040 }, { "epoch": 0.6098503387341903, "grad_norm": 1.298659324645996, "learning_rate": 3.918361891292913e-05, "loss": 0.3997587919235229, "step": 142050 }, { "epoch": 0.6098932708242103, "grad_norm": 0.18839246034622192, "learning_rate": 3.9179307192811504e-05, "loss": 0.26359546184539795, "step": 142060 }, { "epoch": 0.6099362029142302, "grad_norm": 0.2514694333076477, "learning_rate": 3.9174995472693875e-05, "loss": 0.08809687495231629, "step": 142070 }, { "epoch": 0.6099791350042503, "grad_norm": 0.8888358473777771, "learning_rate": 3.917068375257625e-05, "loss": 0.23953211307525635, "step": 142080 }, { "epoch": 0.6100220670942703, "grad_norm": 0.018821191042661667, "learning_rate": 3.916637203245863e-05, "loss": 0.18834341764450074, "step": 142090 }, { "epoch": 0.6100649991842902, "grad_norm": 0.03325490280985832, "learning_rate": 3.916206031234101e-05, "loss": 0.2581282377243042, "step": 142100 }, { "epoch": 0.6101079312743103, "grad_norm": 5.251251697540283, "learning_rate": 3.9157748592223384e-05, "loss": 0.5048004627227783, "step": 142110 }, { "epoch": 0.6101508633643303, "grad_norm": 0.020616553723812103, "learning_rate": 3.915343687210576e-05, "loss": 0.07277900576591492, "step": 142120 }, { "epoch": 0.6101937954543503, "grad_norm": 0.04141886159777641, "learning_rate": 3.914912515198814e-05, "loss": 0.2258600950241089, "step": 142130 }, { "epoch": 0.6102367275443703, "grad_norm": 4.039946556091309, "learning_rate": 3.9144813431870516e-05, "loss": 0.38728866577148435, "step": 142140 }, { "epoch": 0.6102796596343903, "grad_norm": 2.5496718883514404, "learning_rate": 3.914050171175289e-05, "loss": 0.3502103328704834, "step": 142150 }, { "epoch": 0.6103225917244103, "grad_norm": 0.0676613301038742, "learning_rate": 3.9136189991635264e-05, "loss": 0.17965009212493896, "step": 142160 }, { "epoch": 0.6103655238144303, "grad_norm": 1.1712474822998047, "learning_rate": 3.913187827151764e-05, "loss": 0.21342895030975342, "step": 142170 }, { "epoch": 0.6104084559044504, "grad_norm": 1.3096929788589478, "learning_rate": 3.912756655140002e-05, "loss": 0.23940718173980713, "step": 142180 }, { "epoch": 0.6104513879944703, "grad_norm": 0.05468279868364334, "learning_rate": 3.912325483128239e-05, "loss": 0.20399603843688965, "step": 142190 }, { "epoch": 0.6104943200844903, "grad_norm": 2.2957608699798584, "learning_rate": 3.911894311116477e-05, "loss": 0.19245316982269287, "step": 142200 }, { "epoch": 0.6105372521745104, "grad_norm": 0.005418090615421534, "learning_rate": 3.9114631391047144e-05, "loss": 0.14810930490493773, "step": 142210 }, { "epoch": 0.6105801842645303, "grad_norm": 5.267305850982666, "learning_rate": 3.911031967092952e-05, "loss": 0.3291645526885986, "step": 142220 }, { "epoch": 0.6106231163545504, "grad_norm": 1.2653834819793701, "learning_rate": 3.91060079508119e-05, "loss": 0.17146894931793213, "step": 142230 }, { "epoch": 0.6106660484445704, "grad_norm": 0.12628792226314545, "learning_rate": 3.9101696230694276e-05, "loss": 0.16329647302627565, "step": 142240 }, { "epoch": 0.6107089805345904, "grad_norm": 0.09851165860891342, "learning_rate": 3.9097384510576653e-05, "loss": 0.1806574583053589, "step": 142250 }, { "epoch": 0.6107519126246104, "grad_norm": 4.4101972579956055, "learning_rate": 3.909307279045903e-05, "loss": 0.15007705688476564, "step": 142260 }, { "epoch": 0.6107948447146304, "grad_norm": 0.046127188950777054, "learning_rate": 3.908876107034141e-05, "loss": 0.15361984968185424, "step": 142270 }, { "epoch": 0.6108377768046505, "grad_norm": 0.008575751446187496, "learning_rate": 3.908444935022378e-05, "loss": 0.18621805906295777, "step": 142280 }, { "epoch": 0.6108807088946704, "grad_norm": 0.1520209163427353, "learning_rate": 3.9080137630106156e-05, "loss": 0.2147531270980835, "step": 142290 }, { "epoch": 0.6109236409846904, "grad_norm": 0.02115943655371666, "learning_rate": 3.907582590998853e-05, "loss": 0.2546267032623291, "step": 142300 }, { "epoch": 0.6109665730747105, "grad_norm": 4.71589469909668, "learning_rate": 3.907151418987091e-05, "loss": 0.1917663335800171, "step": 142310 }, { "epoch": 0.6110095051647304, "grad_norm": 0.17938746511936188, "learning_rate": 3.906720246975328e-05, "loss": 0.33880517482757566, "step": 142320 }, { "epoch": 0.6110524372547504, "grad_norm": 1.8493738174438477, "learning_rate": 3.906289074963566e-05, "loss": 0.2039203405380249, "step": 142330 }, { "epoch": 0.6110953693447705, "grad_norm": 69.82435607910156, "learning_rate": 3.9058579029518036e-05, "loss": 0.14799585342407226, "step": 142340 }, { "epoch": 0.6111383014347904, "grad_norm": 0.15120545029640198, "learning_rate": 3.905426730940041e-05, "loss": 0.2303483009338379, "step": 142350 }, { "epoch": 0.6111812335248105, "grad_norm": 1.926548957824707, "learning_rate": 3.904995558928279e-05, "loss": 0.4089049816131592, "step": 142360 }, { "epoch": 0.6112241656148305, "grad_norm": 0.0761425718665123, "learning_rate": 3.904564386916517e-05, "loss": 0.24638357162475585, "step": 142370 }, { "epoch": 0.6112670977048504, "grad_norm": 0.027300817891955376, "learning_rate": 3.9041332149047545e-05, "loss": 0.18842120170593263, "step": 142380 }, { "epoch": 0.6113100297948705, "grad_norm": 0.06413847208023071, "learning_rate": 3.903702042892992e-05, "loss": 0.23671393394470214, "step": 142390 }, { "epoch": 0.6113529618848905, "grad_norm": 0.11595964431762695, "learning_rate": 3.903270870881229e-05, "loss": 0.1801469087600708, "step": 142400 }, { "epoch": 0.6113958939749105, "grad_norm": 5.405128002166748, "learning_rate": 3.902839698869467e-05, "loss": 0.1529282808303833, "step": 142410 }, { "epoch": 0.6114388260649305, "grad_norm": 1.0243592262268066, "learning_rate": 3.902408526857705e-05, "loss": 0.21499390602111818, "step": 142420 }, { "epoch": 0.6114817581549505, "grad_norm": 0.0854133814573288, "learning_rate": 3.9019773548459425e-05, "loss": 0.11400735378265381, "step": 142430 }, { "epoch": 0.6115246902449705, "grad_norm": 2.3615777492523193, "learning_rate": 3.9015461828341796e-05, "loss": 0.4467350959777832, "step": 142440 }, { "epoch": 0.6115676223349905, "grad_norm": 0.018909169360995293, "learning_rate": 3.901115010822417e-05, "loss": 0.4240890026092529, "step": 142450 }, { "epoch": 0.6116105544250106, "grad_norm": 0.8031514883041382, "learning_rate": 3.900683838810655e-05, "loss": 0.21565210819244385, "step": 142460 }, { "epoch": 0.6116534865150305, "grad_norm": 0.15044718980789185, "learning_rate": 3.900252666798893e-05, "loss": 0.11793738603591919, "step": 142470 }, { "epoch": 0.6116964186050505, "grad_norm": 0.17571763694286346, "learning_rate": 3.8998214947871305e-05, "loss": 0.157479989528656, "step": 142480 }, { "epoch": 0.6117393506950706, "grad_norm": 1.6672226190567017, "learning_rate": 3.899390322775368e-05, "loss": 0.10348098278045655, "step": 142490 }, { "epoch": 0.6117822827850905, "grad_norm": 9.708442687988281, "learning_rate": 3.898959150763606e-05, "loss": 0.262995171546936, "step": 142500 }, { "epoch": 0.6118252148751105, "grad_norm": 0.09473815560340881, "learning_rate": 3.898527978751844e-05, "loss": 0.4231609344482422, "step": 142510 }, { "epoch": 0.6118681469651306, "grad_norm": 0.07334093749523163, "learning_rate": 3.898096806740081e-05, "loss": 0.10099009275436402, "step": 142520 }, { "epoch": 0.6119110790551505, "grad_norm": 0.014422636479139328, "learning_rate": 3.8976656347283185e-05, "loss": 0.05562713146209717, "step": 142530 }, { "epoch": 0.6119540111451706, "grad_norm": 5.1654253005981445, "learning_rate": 3.897234462716556e-05, "loss": 0.37032556533813477, "step": 142540 }, { "epoch": 0.6119969432351906, "grad_norm": 0.010609438642859459, "learning_rate": 3.896803290704794e-05, "loss": 0.2431696653366089, "step": 142550 }, { "epoch": 0.6120398753252105, "grad_norm": 0.1465774029493332, "learning_rate": 3.896372118693031e-05, "loss": 0.03999505937099457, "step": 142560 }, { "epoch": 0.6120828074152306, "grad_norm": 0.014877895824611187, "learning_rate": 3.895940946681269e-05, "loss": 0.1237523078918457, "step": 142570 }, { "epoch": 0.6121257395052506, "grad_norm": 1.9582277536392212, "learning_rate": 3.8955097746695065e-05, "loss": 0.14951646327972412, "step": 142580 }, { "epoch": 0.6121686715952706, "grad_norm": 5.586146831512451, "learning_rate": 3.895078602657745e-05, "loss": 0.39240322113037107, "step": 142590 }, { "epoch": 0.6122116036852906, "grad_norm": 0.027195017784833908, "learning_rate": 3.8946474306459827e-05, "loss": 0.3054402589797974, "step": 142600 }, { "epoch": 0.6122545357753106, "grad_norm": 0.0076742833480238914, "learning_rate": 3.89421625863422e-05, "loss": 0.29276094436645506, "step": 142610 }, { "epoch": 0.6122974678653306, "grad_norm": 1.3209526538848877, "learning_rate": 3.8937850866224574e-05, "loss": 0.18471418619155883, "step": 142620 }, { "epoch": 0.6123403999553506, "grad_norm": 0.16654518246650696, "learning_rate": 3.893353914610695e-05, "loss": 0.2270416498184204, "step": 142630 }, { "epoch": 0.6123833320453707, "grad_norm": 0.20178063213825226, "learning_rate": 3.892922742598933e-05, "loss": 0.22354300022125245, "step": 142640 }, { "epoch": 0.6124262641353906, "grad_norm": 0.03456205502152443, "learning_rate": 3.89249157058717e-05, "loss": 0.20727345943450928, "step": 142650 }, { "epoch": 0.6124691962254106, "grad_norm": 3.280965805053711, "learning_rate": 3.892060398575408e-05, "loss": 0.24969584941864015, "step": 142660 }, { "epoch": 0.6125121283154307, "grad_norm": 0.42193174362182617, "learning_rate": 3.8916292265636454e-05, "loss": 0.05913207530975342, "step": 142670 }, { "epoch": 0.6125550604054507, "grad_norm": 0.016481934115290642, "learning_rate": 3.891198054551883e-05, "loss": 0.14308911561965942, "step": 142680 }, { "epoch": 0.6125979924954706, "grad_norm": 0.6766266822814941, "learning_rate": 3.89076688254012e-05, "loss": 0.009996140748262406, "step": 142690 }, { "epoch": 0.6126409245854907, "grad_norm": 2.5119316577911377, "learning_rate": 3.8903357105283586e-05, "loss": 0.09946958422660827, "step": 142700 }, { "epoch": 0.6126838566755107, "grad_norm": 0.35976850986480713, "learning_rate": 3.8899045385165964e-05, "loss": 0.18059909343719482, "step": 142710 }, { "epoch": 0.6127267887655307, "grad_norm": 0.004872175864875317, "learning_rate": 3.889473366504834e-05, "loss": 0.07150285840034484, "step": 142720 }, { "epoch": 0.6127697208555507, "grad_norm": 0.5548312067985535, "learning_rate": 3.889042194493071e-05, "loss": 0.17871575355529784, "step": 142730 }, { "epoch": 0.6128126529455707, "grad_norm": 0.24687838554382324, "learning_rate": 3.888611022481309e-05, "loss": 0.13710942268371581, "step": 142740 }, { "epoch": 0.6128555850355907, "grad_norm": 0.01010800525546074, "learning_rate": 3.8881798504695466e-05, "loss": 0.21247475147247313, "step": 142750 }, { "epoch": 0.6128985171256107, "grad_norm": 0.0031628634314984083, "learning_rate": 3.8877486784577844e-05, "loss": 0.3880982160568237, "step": 142760 }, { "epoch": 0.6129414492156308, "grad_norm": 0.043660301715135574, "learning_rate": 3.8873175064460214e-05, "loss": 0.3001490592956543, "step": 142770 }, { "epoch": 0.6129843813056507, "grad_norm": 0.03375304117798805, "learning_rate": 3.886886334434259e-05, "loss": 0.24639558792114258, "step": 142780 }, { "epoch": 0.6130273133956707, "grad_norm": 0.09769673645496368, "learning_rate": 3.886455162422497e-05, "loss": 0.2595521926879883, "step": 142790 }, { "epoch": 0.6130702454856908, "grad_norm": 3.6060831546783447, "learning_rate": 3.8860239904107346e-05, "loss": 0.12040402889251708, "step": 142800 }, { "epoch": 0.6131131775757107, "grad_norm": 0.007844222709536552, "learning_rate": 3.8855928183989724e-05, "loss": 0.23807475566864014, "step": 142810 }, { "epoch": 0.6131561096657308, "grad_norm": 1.8894518613815308, "learning_rate": 3.88516164638721e-05, "loss": 0.2573350429534912, "step": 142820 }, { "epoch": 0.6131990417557508, "grad_norm": 0.003673046361654997, "learning_rate": 3.884730474375448e-05, "loss": 0.17068955898284913, "step": 142830 }, { "epoch": 0.6132419738457707, "grad_norm": 1.4258671998977661, "learning_rate": 3.8842993023636856e-05, "loss": 0.1882157564163208, "step": 142840 }, { "epoch": 0.6132849059357908, "grad_norm": 2.0518877506256104, "learning_rate": 3.8838681303519226e-05, "loss": 0.12432767152786255, "step": 142850 }, { "epoch": 0.6133278380258108, "grad_norm": 0.9698437452316284, "learning_rate": 3.8834369583401604e-05, "loss": 0.29598345756530764, "step": 142860 }, { "epoch": 0.6133707701158307, "grad_norm": 0.05316584184765816, "learning_rate": 3.883005786328398e-05, "loss": 0.10486148595809937, "step": 142870 }, { "epoch": 0.6134137022058508, "grad_norm": 1.7184795141220093, "learning_rate": 3.882574614316636e-05, "loss": 0.33672361373901366, "step": 142880 }, { "epoch": 0.6134566342958708, "grad_norm": 0.24001701176166534, "learning_rate": 3.882143442304873e-05, "loss": 0.20460429191589355, "step": 142890 }, { "epoch": 0.6134995663858908, "grad_norm": 0.5448141098022461, "learning_rate": 3.8817122702931106e-05, "loss": 0.1534830689430237, "step": 142900 }, { "epoch": 0.6135424984759108, "grad_norm": 7.385931491851807, "learning_rate": 3.8812810982813484e-05, "loss": 0.19792778491973878, "step": 142910 }, { "epoch": 0.6135854305659308, "grad_norm": 0.0035218922421336174, "learning_rate": 3.880849926269586e-05, "loss": 0.11510688066482544, "step": 142920 }, { "epoch": 0.6136283626559508, "grad_norm": 1.7946984767913818, "learning_rate": 3.880418754257824e-05, "loss": 0.4070457458496094, "step": 142930 }, { "epoch": 0.6136712947459708, "grad_norm": 8.210290908813477, "learning_rate": 3.8799875822460616e-05, "loss": 0.32269439697265623, "step": 142940 }, { "epoch": 0.6137142268359909, "grad_norm": 4.748100757598877, "learning_rate": 3.879556410234299e-05, "loss": 0.13236244916915893, "step": 142950 }, { "epoch": 0.6137571589260108, "grad_norm": 0.0008868540753610432, "learning_rate": 3.879125238222537e-05, "loss": 0.18349295854568481, "step": 142960 }, { "epoch": 0.6138000910160308, "grad_norm": 1.8099790811538696, "learning_rate": 3.878694066210775e-05, "loss": 0.302026629447937, "step": 142970 }, { "epoch": 0.6138430231060509, "grad_norm": 0.024444634094834328, "learning_rate": 3.878262894199012e-05, "loss": 0.053810220956802365, "step": 142980 }, { "epoch": 0.6138859551960708, "grad_norm": 0.14125758409500122, "learning_rate": 3.8778317221872496e-05, "loss": 0.22748353481292724, "step": 142990 }, { "epoch": 0.6139288872860909, "grad_norm": 1.6531537771224976, "learning_rate": 3.877400550175487e-05, "loss": 0.362827730178833, "step": 143000 }, { "epoch": 0.6139288872860909, "eval_loss": 0.3971373736858368, "eval_runtime": 27.343, "eval_samples_per_second": 3.657, "eval_steps_per_second": 3.657, "step": 143000 }, { "epoch": 0.6139718193761109, "grad_norm": 0.00045808134018443525, "learning_rate": 3.876969378163725e-05, "loss": 0.10025212764739991, "step": 143010 }, { "epoch": 0.6140147514661308, "grad_norm": 0.14784424006938934, "learning_rate": 3.876538206151962e-05, "loss": 0.11128251552581787, "step": 143020 }, { "epoch": 0.6140576835561509, "grad_norm": 0.003462345339357853, "learning_rate": 3.8761070341402e-05, "loss": 0.17818193435668944, "step": 143030 }, { "epoch": 0.6141006156461709, "grad_norm": 0.0004410098772495985, "learning_rate": 3.8756758621284375e-05, "loss": 0.11047359704971313, "step": 143040 }, { "epoch": 0.6141435477361908, "grad_norm": 0.009439370594918728, "learning_rate": 3.875244690116675e-05, "loss": 0.12234883308410645, "step": 143050 }, { "epoch": 0.6141864798262109, "grad_norm": 0.09685683995485306, "learning_rate": 3.874813518104913e-05, "loss": 0.14245309829711914, "step": 143060 }, { "epoch": 0.6142294119162309, "grad_norm": 0.1996997743844986, "learning_rate": 3.874382346093151e-05, "loss": 0.20710551738739014, "step": 143070 }, { "epoch": 0.6142723440062509, "grad_norm": 0.0035963598638772964, "learning_rate": 3.8739511740813885e-05, "loss": 0.31514573097229004, "step": 143080 }, { "epoch": 0.6143152760962709, "grad_norm": 0.0031128383707255125, "learning_rate": 3.873520002069626e-05, "loss": 0.10121103525161743, "step": 143090 }, { "epoch": 0.6143582081862909, "grad_norm": 0.004904964007437229, "learning_rate": 3.873088830057863e-05, "loss": 0.32856738567352295, "step": 143100 }, { "epoch": 0.614401140276311, "grad_norm": 0.7072336673736572, "learning_rate": 3.872657658046101e-05, "loss": 0.03897510170936584, "step": 143110 }, { "epoch": 0.6144440723663309, "grad_norm": 0.01982838474214077, "learning_rate": 3.872226486034339e-05, "loss": 0.24557275772094728, "step": 143120 }, { "epoch": 0.614487004456351, "grad_norm": 0.00509743532165885, "learning_rate": 3.8717953140225765e-05, "loss": 0.1875273823738098, "step": 143130 }, { "epoch": 0.614529936546371, "grad_norm": 0.0008203135221265256, "learning_rate": 3.8713641420108135e-05, "loss": 0.21112377643585206, "step": 143140 }, { "epoch": 0.6145728686363909, "grad_norm": 0.007646126672625542, "learning_rate": 3.870932969999051e-05, "loss": 0.13793174028396607, "step": 143150 }, { "epoch": 0.614615800726411, "grad_norm": 0.0029893110040575266, "learning_rate": 3.870501797987289e-05, "loss": 0.1003826379776001, "step": 143160 }, { "epoch": 0.614658732816431, "grad_norm": 0.0017700279131531715, "learning_rate": 3.870070625975527e-05, "loss": 0.12775927782058716, "step": 143170 }, { "epoch": 0.614701664906451, "grad_norm": 0.017108073458075523, "learning_rate": 3.8696394539637645e-05, "loss": 0.12151342630386353, "step": 143180 }, { "epoch": 0.614744596996471, "grad_norm": 0.08749516308307648, "learning_rate": 3.869208281952002e-05, "loss": 0.08751832246780396, "step": 143190 }, { "epoch": 0.614787529086491, "grad_norm": 0.14753209054470062, "learning_rate": 3.86877710994024e-05, "loss": 0.220550799369812, "step": 143200 }, { "epoch": 0.614830461176511, "grad_norm": 7.232389450073242, "learning_rate": 3.868345937928478e-05, "loss": 0.2188883066177368, "step": 143210 }, { "epoch": 0.614873393266531, "grad_norm": 0.18595461547374725, "learning_rate": 3.867914765916715e-05, "loss": 0.263457727432251, "step": 143220 }, { "epoch": 0.614916325356551, "grad_norm": 0.011233457364141941, "learning_rate": 3.8674835939049525e-05, "loss": 0.22754213809967042, "step": 143230 }, { "epoch": 0.614959257446571, "grad_norm": 2.888434886932373, "learning_rate": 3.86705242189319e-05, "loss": 0.29729480743408204, "step": 143240 }, { "epoch": 0.615002189536591, "grad_norm": 0.039356231689453125, "learning_rate": 3.866621249881428e-05, "loss": 0.1607893705368042, "step": 143250 }, { "epoch": 0.6150451216266111, "grad_norm": 1.696097731590271, "learning_rate": 3.866190077869665e-05, "loss": 0.05476242899894714, "step": 143260 }, { "epoch": 0.615088053716631, "grad_norm": 0.11227918416261673, "learning_rate": 3.865758905857903e-05, "loss": 0.3089680433273315, "step": 143270 }, { "epoch": 0.615130985806651, "grad_norm": 20.85646629333496, "learning_rate": 3.8653277338461405e-05, "loss": 0.1662601947784424, "step": 143280 }, { "epoch": 0.6151739178966711, "grad_norm": 0.04491984099149704, "learning_rate": 3.864896561834379e-05, "loss": 0.27137112617492676, "step": 143290 }, { "epoch": 0.615216849986691, "grad_norm": 0.0005721793859265745, "learning_rate": 3.864465389822616e-05, "loss": 0.2376784563064575, "step": 143300 }, { "epoch": 0.6152597820767111, "grad_norm": 0.027530111372470856, "learning_rate": 3.8640342178108537e-05, "loss": 0.26707456111907957, "step": 143310 }, { "epoch": 0.6153027141667311, "grad_norm": 0.008097376674413681, "learning_rate": 3.8636030457990914e-05, "loss": 0.15281684398651124, "step": 143320 }, { "epoch": 0.615345646256751, "grad_norm": 18.04004669189453, "learning_rate": 3.863171873787329e-05, "loss": 0.1304473400115967, "step": 143330 }, { "epoch": 0.6153885783467711, "grad_norm": 6.95366096496582, "learning_rate": 3.862740701775567e-05, "loss": 0.2441573143005371, "step": 143340 }, { "epoch": 0.6154315104367911, "grad_norm": 10.125812530517578, "learning_rate": 3.862309529763804e-05, "loss": 0.13682811260223388, "step": 143350 }, { "epoch": 0.615474442526811, "grad_norm": 7.338260650634766, "learning_rate": 3.8618783577520417e-05, "loss": 0.21427173614501954, "step": 143360 }, { "epoch": 0.6155173746168311, "grad_norm": 0.017023073509335518, "learning_rate": 3.8614471857402794e-05, "loss": 0.16689443588256836, "step": 143370 }, { "epoch": 0.6155603067068511, "grad_norm": 0.33701053261756897, "learning_rate": 3.861016013728517e-05, "loss": 0.25445683002471925, "step": 143380 }, { "epoch": 0.6156032387968711, "grad_norm": 5.320155143737793, "learning_rate": 3.860584841716754e-05, "loss": 0.23360719680786132, "step": 143390 }, { "epoch": 0.6156461708868911, "grad_norm": 1.3005390167236328, "learning_rate": 3.8601536697049926e-05, "loss": 0.34432909488677976, "step": 143400 }, { "epoch": 0.6156891029769112, "grad_norm": 0.03673168644309044, "learning_rate": 3.85972249769323e-05, "loss": 0.1609882354736328, "step": 143410 }, { "epoch": 0.6157320350669311, "grad_norm": 34.08330154418945, "learning_rate": 3.859291325681468e-05, "loss": 0.2362835168838501, "step": 143420 }, { "epoch": 0.6157749671569511, "grad_norm": 0.06983064115047455, "learning_rate": 3.858860153669705e-05, "loss": 0.2938406229019165, "step": 143430 }, { "epoch": 0.6158178992469712, "grad_norm": 0.055000558495521545, "learning_rate": 3.858428981657943e-05, "loss": 0.39708521366119387, "step": 143440 }, { "epoch": 0.6158608313369911, "grad_norm": 0.011660448275506496, "learning_rate": 3.8579978096461806e-05, "loss": 0.15488802194595336, "step": 143450 }, { "epoch": 0.6159037634270111, "grad_norm": 0.36187925934791565, "learning_rate": 3.857566637634418e-05, "loss": 0.11263597011566162, "step": 143460 }, { "epoch": 0.6159466955170312, "grad_norm": 4.819761276245117, "learning_rate": 3.8571354656226554e-05, "loss": 0.2282076358795166, "step": 143470 }, { "epoch": 0.6159896276070511, "grad_norm": 0.590923547744751, "learning_rate": 3.856704293610893e-05, "loss": 0.15642644166946412, "step": 143480 }, { "epoch": 0.6160325596970712, "grad_norm": 0.0005970151396468282, "learning_rate": 3.856273121599131e-05, "loss": 0.21688811779022216, "step": 143490 }, { "epoch": 0.6160754917870912, "grad_norm": 1.820595622062683, "learning_rate": 3.8558419495873686e-05, "loss": 0.22994556427001953, "step": 143500 }, { "epoch": 0.6161184238771111, "grad_norm": 0.025294188410043716, "learning_rate": 3.855410777575606e-05, "loss": 0.14468964338302612, "step": 143510 }, { "epoch": 0.6161613559671312, "grad_norm": 1.5548747777938843, "learning_rate": 3.854979605563844e-05, "loss": 0.16419532299041747, "step": 143520 }, { "epoch": 0.6162042880571512, "grad_norm": 0.6060371398925781, "learning_rate": 3.854548433552082e-05, "loss": 0.2152492046356201, "step": 143530 }, { "epoch": 0.6162472201471713, "grad_norm": 0.6445576548576355, "learning_rate": 3.8541172615403195e-05, "loss": 0.2481471061706543, "step": 143540 }, { "epoch": 0.6162901522371912, "grad_norm": 13.752565383911133, "learning_rate": 3.8536860895285566e-05, "loss": 0.19086780548095703, "step": 143550 }, { "epoch": 0.6163330843272112, "grad_norm": 0.0020951321348547935, "learning_rate": 3.853254917516794e-05, "loss": 0.27932713031768797, "step": 143560 }, { "epoch": 0.6163760164172313, "grad_norm": 0.6050500273704529, "learning_rate": 3.852823745505032e-05, "loss": 0.29224677085876466, "step": 143570 }, { "epoch": 0.6164189485072512, "grad_norm": 0.04540263116359711, "learning_rate": 3.85239257349327e-05, "loss": 0.3663180828094482, "step": 143580 }, { "epoch": 0.6164618805972713, "grad_norm": 0.041277043521404266, "learning_rate": 3.851961401481507e-05, "loss": 0.14575035572052003, "step": 143590 }, { "epoch": 0.6165048126872913, "grad_norm": 0.7824796438217163, "learning_rate": 3.8515302294697446e-05, "loss": 0.2100764751434326, "step": 143600 }, { "epoch": 0.6165477447773112, "grad_norm": 0.005907750688493252, "learning_rate": 3.851099057457982e-05, "loss": 0.11084829568862915, "step": 143610 }, { "epoch": 0.6165906768673313, "grad_norm": 0.744647741317749, "learning_rate": 3.85066788544622e-05, "loss": 0.26470563411712644, "step": 143620 }, { "epoch": 0.6166336089573513, "grad_norm": 2.1389269828796387, "learning_rate": 3.850236713434458e-05, "loss": 0.21109626293182374, "step": 143630 }, { "epoch": 0.6166765410473712, "grad_norm": 5.421672821044922, "learning_rate": 3.8498055414226955e-05, "loss": 0.28545246124267576, "step": 143640 }, { "epoch": 0.6167194731373913, "grad_norm": 2.370431900024414, "learning_rate": 3.849374369410933e-05, "loss": 0.06665264964103698, "step": 143650 }, { "epoch": 0.6167624052274113, "grad_norm": 0.0015999609604477882, "learning_rate": 3.848943197399171e-05, "loss": 0.3594416618347168, "step": 143660 }, { "epoch": 0.6168053373174313, "grad_norm": 0.9188343286514282, "learning_rate": 3.848512025387408e-05, "loss": 0.10321264266967774, "step": 143670 }, { "epoch": 0.6168482694074513, "grad_norm": 3.1624345779418945, "learning_rate": 3.848080853375646e-05, "loss": 0.28934388160705565, "step": 143680 }, { "epoch": 0.6168912014974713, "grad_norm": 1.9765626192092896, "learning_rate": 3.8476496813638835e-05, "loss": 0.35861854553222655, "step": 143690 }, { "epoch": 0.6169341335874913, "grad_norm": 5.686208248138428, "learning_rate": 3.847218509352121e-05, "loss": 0.17112207412719727, "step": 143700 }, { "epoch": 0.6169770656775113, "grad_norm": 0.000594784040004015, "learning_rate": 3.846787337340359e-05, "loss": 0.1451743721961975, "step": 143710 }, { "epoch": 0.6170199977675314, "grad_norm": 1.208206295967102, "learning_rate": 3.846356165328596e-05, "loss": 0.19009263515472413, "step": 143720 }, { "epoch": 0.6170629298575513, "grad_norm": 1.7347217798233032, "learning_rate": 3.845924993316834e-05, "loss": 0.4419961452484131, "step": 143730 }, { "epoch": 0.6171058619475713, "grad_norm": 0.16214624047279358, "learning_rate": 3.8454938213050715e-05, "loss": 0.26940882205963135, "step": 143740 }, { "epoch": 0.6171487940375914, "grad_norm": 0.03651418536901474, "learning_rate": 3.845062649293309e-05, "loss": 0.39347841739654543, "step": 143750 }, { "epoch": 0.6171917261276113, "grad_norm": 43.0539436340332, "learning_rate": 3.844631477281547e-05, "loss": 0.14974191188812255, "step": 143760 }, { "epoch": 0.6172346582176313, "grad_norm": 0.0016830979147925973, "learning_rate": 3.844200305269785e-05, "loss": 0.2485567808151245, "step": 143770 }, { "epoch": 0.6172775903076514, "grad_norm": 12.779747009277344, "learning_rate": 3.8437691332580224e-05, "loss": 0.16967169046401978, "step": 143780 }, { "epoch": 0.6173205223976713, "grad_norm": 3.1827280521392822, "learning_rate": 3.84333796124626e-05, "loss": 0.3932061672210693, "step": 143790 }, { "epoch": 0.6173634544876914, "grad_norm": 0.009968830272555351, "learning_rate": 3.842906789234497e-05, "loss": 0.10252895355224609, "step": 143800 }, { "epoch": 0.6174063865777114, "grad_norm": 0.02346714586019516, "learning_rate": 3.842475617222735e-05, "loss": 0.45845975875854494, "step": 143810 }, { "epoch": 0.6174493186677313, "grad_norm": 0.45332077145576477, "learning_rate": 3.842044445210973e-05, "loss": 0.19608376026153565, "step": 143820 }, { "epoch": 0.6174922507577514, "grad_norm": 0.005778151098638773, "learning_rate": 3.8416132731992104e-05, "loss": 0.013501368463039398, "step": 143830 }, { "epoch": 0.6175351828477714, "grad_norm": 0.03759386017918587, "learning_rate": 3.8411821011874475e-05, "loss": 0.12773208618164061, "step": 143840 }, { "epoch": 0.6175781149377914, "grad_norm": 0.18952840566635132, "learning_rate": 3.840750929175685e-05, "loss": 0.2644594669342041, "step": 143850 }, { "epoch": 0.6176210470278114, "grad_norm": 0.5408098101615906, "learning_rate": 3.840319757163923e-05, "loss": 0.10454925298690795, "step": 143860 }, { "epoch": 0.6176639791178314, "grad_norm": 0.0044852206483483315, "learning_rate": 3.839888585152161e-05, "loss": 0.19259582757949828, "step": 143870 }, { "epoch": 0.6177069112078514, "grad_norm": 1.8733210563659668, "learning_rate": 3.8394574131403984e-05, "loss": 0.1476469397544861, "step": 143880 }, { "epoch": 0.6177498432978714, "grad_norm": 1.6489322185516357, "learning_rate": 3.839026241128636e-05, "loss": 0.44382553100585936, "step": 143890 }, { "epoch": 0.6177927753878915, "grad_norm": 1.372307538986206, "learning_rate": 3.838595069116874e-05, "loss": 0.15867252349853517, "step": 143900 }, { "epoch": 0.6178357074779114, "grad_norm": 0.27712729573249817, "learning_rate": 3.8381638971051116e-05, "loss": 0.3588222026824951, "step": 143910 }, { "epoch": 0.6178786395679314, "grad_norm": 1.5185275077819824, "learning_rate": 3.837732725093349e-05, "loss": 0.24487559795379638, "step": 143920 }, { "epoch": 0.6179215716579515, "grad_norm": 2.4732537269592285, "learning_rate": 3.8373015530815864e-05, "loss": 0.23896138668060302, "step": 143930 }, { "epoch": 0.6179645037479714, "grad_norm": 4.313063621520996, "learning_rate": 3.836870381069824e-05, "loss": 0.23469119071960448, "step": 143940 }, { "epoch": 0.6180074358379914, "grad_norm": 0.010658573359251022, "learning_rate": 3.836439209058062e-05, "loss": 0.061583518981933594, "step": 143950 }, { "epoch": 0.6180503679280115, "grad_norm": 0.14485430717468262, "learning_rate": 3.836008037046299e-05, "loss": 0.20397756099700928, "step": 143960 }, { "epoch": 0.6180933000180315, "grad_norm": 1.583567500114441, "learning_rate": 3.835576865034537e-05, "loss": 0.3420498609542847, "step": 143970 }, { "epoch": 0.6181362321080515, "grad_norm": 0.0020854922477155924, "learning_rate": 3.8351456930227744e-05, "loss": 0.14520049095153809, "step": 143980 }, { "epoch": 0.6181791641980715, "grad_norm": 0.9290441870689392, "learning_rate": 3.834714521011012e-05, "loss": 0.20561542510986328, "step": 143990 }, { "epoch": 0.6182220962880915, "grad_norm": 0.0007875105366110802, "learning_rate": 3.83428334899925e-05, "loss": 0.27081058025360105, "step": 144000 }, { "epoch": 0.6182220962880915, "eval_loss": 0.4025852084159851, "eval_runtime": 27.3094, "eval_samples_per_second": 3.662, "eval_steps_per_second": 3.662, "step": 144000 }, { "epoch": 0.6182650283781115, "grad_norm": 0.1286296844482422, "learning_rate": 3.8338521769874876e-05, "loss": 0.1636170744895935, "step": 144010 }, { "epoch": 0.6183079604681315, "grad_norm": 0.0414075031876564, "learning_rate": 3.8334210049757253e-05, "loss": 0.19566885232925416, "step": 144020 }, { "epoch": 0.6183508925581516, "grad_norm": 3.047715425491333, "learning_rate": 3.832989832963963e-05, "loss": 0.35408968925476075, "step": 144030 }, { "epoch": 0.6183938246481715, "grad_norm": 0.16599392890930176, "learning_rate": 3.832558660952201e-05, "loss": 0.1797704815864563, "step": 144040 }, { "epoch": 0.6184367567381915, "grad_norm": 0.008573425933718681, "learning_rate": 3.832127488940438e-05, "loss": 0.28909251689910886, "step": 144050 }, { "epoch": 0.6184796888282116, "grad_norm": 1.5337269306182861, "learning_rate": 3.8316963169286756e-05, "loss": 0.19386523962020874, "step": 144060 }, { "epoch": 0.6185226209182315, "grad_norm": 0.03713132068514824, "learning_rate": 3.831265144916913e-05, "loss": 0.16312804222106933, "step": 144070 }, { "epoch": 0.6185655530082516, "grad_norm": 0.3644580543041229, "learning_rate": 3.830833972905151e-05, "loss": 0.2069899320602417, "step": 144080 }, { "epoch": 0.6186084850982716, "grad_norm": 0.4827655553817749, "learning_rate": 3.830402800893388e-05, "loss": 0.11150888204574586, "step": 144090 }, { "epoch": 0.6186514171882915, "grad_norm": 2.00545072555542, "learning_rate": 3.829971628881626e-05, "loss": 0.04419018030166626, "step": 144100 }, { "epoch": 0.6186943492783116, "grad_norm": 1.2583513259887695, "learning_rate": 3.829540456869864e-05, "loss": 0.15232290029525758, "step": 144110 }, { "epoch": 0.6187372813683316, "grad_norm": 0.6249735951423645, "learning_rate": 3.829109284858102e-05, "loss": 0.22450728416442872, "step": 144120 }, { "epoch": 0.6187802134583515, "grad_norm": 3.7458887100219727, "learning_rate": 3.828678112846339e-05, "loss": 0.2486107349395752, "step": 144130 }, { "epoch": 0.6188231455483716, "grad_norm": 0.007021801546216011, "learning_rate": 3.828246940834577e-05, "loss": 0.11499238014221191, "step": 144140 }, { "epoch": 0.6188660776383916, "grad_norm": 3.2239348888397217, "learning_rate": 3.8278157688228145e-05, "loss": 0.19586840867996216, "step": 144150 }, { "epoch": 0.6189090097284116, "grad_norm": 1.8541079759597778, "learning_rate": 3.827384596811052e-05, "loss": 0.04876286089420319, "step": 144160 }, { "epoch": 0.6189519418184316, "grad_norm": 0.020189929753541946, "learning_rate": 3.826953424799289e-05, "loss": 0.18910752534866332, "step": 144170 }, { "epoch": 0.6189948739084516, "grad_norm": 0.11692792177200317, "learning_rate": 3.826522252787527e-05, "loss": 0.06171929240226746, "step": 144180 }, { "epoch": 0.6190378059984716, "grad_norm": 0.5069569945335388, "learning_rate": 3.826091080775765e-05, "loss": 0.10949817895889283, "step": 144190 }, { "epoch": 0.6190807380884916, "grad_norm": 0.0024032050278037786, "learning_rate": 3.8256599087640025e-05, "loss": 0.3095686435699463, "step": 144200 }, { "epoch": 0.6191236701785117, "grad_norm": 1.413897156715393, "learning_rate": 3.8252287367522396e-05, "loss": 0.5190535545349121, "step": 144210 }, { "epoch": 0.6191666022685316, "grad_norm": 0.0012539130402728915, "learning_rate": 3.824797564740478e-05, "loss": 0.08090531826019287, "step": 144220 }, { "epoch": 0.6192095343585516, "grad_norm": 0.18290802836418152, "learning_rate": 3.824366392728716e-05, "loss": 0.20504300594329833, "step": 144230 }, { "epoch": 0.6192524664485717, "grad_norm": 0.0021957934368401766, "learning_rate": 3.8239352207169535e-05, "loss": 0.2403766393661499, "step": 144240 }, { "epoch": 0.6192953985385916, "grad_norm": 0.006666228640824556, "learning_rate": 3.8235040487051905e-05, "loss": 0.3289052963256836, "step": 144250 }, { "epoch": 0.6193383306286117, "grad_norm": 0.10093910992145538, "learning_rate": 3.823072876693428e-05, "loss": 0.07276955842971802, "step": 144260 }, { "epoch": 0.6193812627186317, "grad_norm": 0.022097958251833916, "learning_rate": 3.822641704681666e-05, "loss": 0.21653876304626465, "step": 144270 }, { "epoch": 0.6194241948086516, "grad_norm": 1.6240912675857544, "learning_rate": 3.822210532669904e-05, "loss": 0.15800448656082153, "step": 144280 }, { "epoch": 0.6194671268986717, "grad_norm": 2.436265468597412, "learning_rate": 3.821779360658141e-05, "loss": 0.21124651432037353, "step": 144290 }, { "epoch": 0.6195100589886917, "grad_norm": 1.256800651550293, "learning_rate": 3.8213481886463785e-05, "loss": 0.21225075721740722, "step": 144300 }, { "epoch": 0.6195529910787116, "grad_norm": 10.628231048583984, "learning_rate": 3.820917016634616e-05, "loss": 0.22397346496582032, "step": 144310 }, { "epoch": 0.6195959231687317, "grad_norm": 0.10220340639352798, "learning_rate": 3.820485844622854e-05, "loss": 0.2670298099517822, "step": 144320 }, { "epoch": 0.6196388552587517, "grad_norm": 0.010059397667646408, "learning_rate": 3.820054672611092e-05, "loss": 0.17682617902755737, "step": 144330 }, { "epoch": 0.6196817873487717, "grad_norm": 0.3097505271434784, "learning_rate": 3.8196235005993294e-05, "loss": 0.16717482805252076, "step": 144340 }, { "epoch": 0.6197247194387917, "grad_norm": 0.4097038805484772, "learning_rate": 3.819192328587567e-05, "loss": 0.3388197422027588, "step": 144350 }, { "epoch": 0.6197676515288117, "grad_norm": 10.982796669006348, "learning_rate": 3.818761156575805e-05, "loss": 0.04242531061172485, "step": 144360 }, { "epoch": 0.6198105836188317, "grad_norm": 0.06076984852552414, "learning_rate": 3.818329984564042e-05, "loss": 0.00288843959569931, "step": 144370 }, { "epoch": 0.6198535157088517, "grad_norm": 0.003789462149143219, "learning_rate": 3.81789881255228e-05, "loss": 0.15669078826904298, "step": 144380 }, { "epoch": 0.6198964477988718, "grad_norm": 0.7226512432098389, "learning_rate": 3.8174676405405174e-05, "loss": 0.1839470624923706, "step": 144390 }, { "epoch": 0.6199393798888918, "grad_norm": 3.0010428428649902, "learning_rate": 3.817036468528755e-05, "loss": 0.057888460159301755, "step": 144400 }, { "epoch": 0.6199823119789117, "grad_norm": 2.917448043823242, "learning_rate": 3.816605296516993e-05, "loss": 0.28990559577941893, "step": 144410 }, { "epoch": 0.6200252440689318, "grad_norm": 0.002920327242463827, "learning_rate": 3.81617412450523e-05, "loss": 0.07854058742523193, "step": 144420 }, { "epoch": 0.6200681761589518, "grad_norm": 3.3465566635131836, "learning_rate": 3.815742952493468e-05, "loss": 0.20734431743621826, "step": 144430 }, { "epoch": 0.6201111082489718, "grad_norm": 10.82240104675293, "learning_rate": 3.8153117804817054e-05, "loss": 0.12018462419509887, "step": 144440 }, { "epoch": 0.6201540403389918, "grad_norm": 0.002988159190863371, "learning_rate": 3.814880608469943e-05, "loss": 0.20601401329040528, "step": 144450 }, { "epoch": 0.6201969724290118, "grad_norm": 1.9121832847595215, "learning_rate": 3.814449436458181e-05, "loss": 0.2810624122619629, "step": 144460 }, { "epoch": 0.6202399045190318, "grad_norm": 0.005302377510815859, "learning_rate": 3.8140182644464186e-05, "loss": 0.3187835931777954, "step": 144470 }, { "epoch": 0.6202828366090518, "grad_norm": 1.602386236190796, "learning_rate": 3.8135870924346564e-05, "loss": 0.07800792455673218, "step": 144480 }, { "epoch": 0.6203257686990719, "grad_norm": 0.007031735498458147, "learning_rate": 3.813155920422894e-05, "loss": 0.006209475174546242, "step": 144490 }, { "epoch": 0.6203687007890918, "grad_norm": 1.0654966831207275, "learning_rate": 3.812724748411131e-05, "loss": 0.1540340781211853, "step": 144500 }, { "epoch": 0.6204116328791118, "grad_norm": 4.626923561096191, "learning_rate": 3.812293576399369e-05, "loss": 0.2106240510940552, "step": 144510 }, { "epoch": 0.6204545649691319, "grad_norm": 0.20285534858703613, "learning_rate": 3.8118624043876066e-05, "loss": 0.3320075273513794, "step": 144520 }, { "epoch": 0.6204974970591518, "grad_norm": 0.0030227135866880417, "learning_rate": 3.8114312323758444e-05, "loss": 0.13404037952423095, "step": 144530 }, { "epoch": 0.6205404291491718, "grad_norm": 0.22634758055210114, "learning_rate": 3.8110000603640814e-05, "loss": 0.0774927020072937, "step": 144540 }, { "epoch": 0.6205833612391919, "grad_norm": 2.493731737136841, "learning_rate": 3.810568888352319e-05, "loss": 0.09815502762794495, "step": 144550 }, { "epoch": 0.6206262933292118, "grad_norm": 0.0005603001336567104, "learning_rate": 3.810137716340557e-05, "loss": 0.12804954051971434, "step": 144560 }, { "epoch": 0.6206692254192319, "grad_norm": 0.03851265087723732, "learning_rate": 3.8097065443287946e-05, "loss": 0.11833794116973877, "step": 144570 }, { "epoch": 0.6207121575092519, "grad_norm": 0.00021021152497269213, "learning_rate": 3.8092753723170324e-05, "loss": 0.11247782707214356, "step": 144580 }, { "epoch": 0.6207550895992718, "grad_norm": 6.465201377868652, "learning_rate": 3.80884420030527e-05, "loss": 0.3022731304168701, "step": 144590 }, { "epoch": 0.6207980216892919, "grad_norm": 0.001970832934603095, "learning_rate": 3.808413028293508e-05, "loss": 0.26380870342254636, "step": 144600 }, { "epoch": 0.6208409537793119, "grad_norm": 0.009534573182463646, "learning_rate": 3.8079818562817456e-05, "loss": 0.0802575707435608, "step": 144610 }, { "epoch": 0.6208838858693319, "grad_norm": 0.6005143523216248, "learning_rate": 3.8075506842699826e-05, "loss": 0.0741170346736908, "step": 144620 }, { "epoch": 0.6209268179593519, "grad_norm": 0.0028213628102093935, "learning_rate": 3.8071195122582204e-05, "loss": 0.307177472114563, "step": 144630 }, { "epoch": 0.6209697500493719, "grad_norm": 0.0034725700970739126, "learning_rate": 3.806688340246458e-05, "loss": 0.0965348243713379, "step": 144640 }, { "epoch": 0.6210126821393919, "grad_norm": 1.9992482662200928, "learning_rate": 3.806257168234696e-05, "loss": 0.1916975736618042, "step": 144650 }, { "epoch": 0.6210556142294119, "grad_norm": 0.4276776611804962, "learning_rate": 3.805825996222933e-05, "loss": 0.34029054641723633, "step": 144660 }, { "epoch": 0.621098546319432, "grad_norm": 1.4472237825393677, "learning_rate": 3.8053948242111706e-05, "loss": 0.08610463738441468, "step": 144670 }, { "epoch": 0.6211414784094519, "grad_norm": 0.8272998332977295, "learning_rate": 3.8049636521994083e-05, "loss": 0.387483549118042, "step": 144680 }, { "epoch": 0.6211844104994719, "grad_norm": 0.0029743912164121866, "learning_rate": 3.804532480187646e-05, "loss": 0.22995834350585936, "step": 144690 }, { "epoch": 0.621227342589492, "grad_norm": 3.1174747943878174, "learning_rate": 3.804101308175884e-05, "loss": 0.13578439950942994, "step": 144700 }, { "epoch": 0.6212702746795119, "grad_norm": 0.009533915668725967, "learning_rate": 3.8036701361641216e-05, "loss": 0.22146253585815429, "step": 144710 }, { "epoch": 0.621313206769532, "grad_norm": 0.03149215504527092, "learning_rate": 3.803238964152359e-05, "loss": 0.2577746152877808, "step": 144720 }, { "epoch": 0.621356138859552, "grad_norm": 1.5252588987350464, "learning_rate": 3.802807792140597e-05, "loss": 0.30984480381011964, "step": 144730 }, { "epoch": 0.6213990709495719, "grad_norm": 0.17621318995952606, "learning_rate": 3.802376620128834e-05, "loss": 0.16381561756134033, "step": 144740 }, { "epoch": 0.621442003039592, "grad_norm": 0.004886439070105553, "learning_rate": 3.801945448117072e-05, "loss": 0.11476737260818481, "step": 144750 }, { "epoch": 0.621484935129612, "grad_norm": 1.7159773111343384, "learning_rate": 3.8015142761053095e-05, "loss": 0.13346294164657593, "step": 144760 }, { "epoch": 0.6215278672196319, "grad_norm": 0.4524969756603241, "learning_rate": 3.801083104093547e-05, "loss": 0.28767459392547606, "step": 144770 }, { "epoch": 0.621570799309652, "grad_norm": 0.0026604910381138325, "learning_rate": 3.800651932081785e-05, "loss": 0.25750012397766114, "step": 144780 }, { "epoch": 0.621613731399672, "grad_norm": 5.111704349517822, "learning_rate": 3.800220760070022e-05, "loss": 0.3053690195083618, "step": 144790 }, { "epoch": 0.621656663489692, "grad_norm": 1.1439964771270752, "learning_rate": 3.79978958805826e-05, "loss": 0.21157233715057372, "step": 144800 }, { "epoch": 0.621699595579712, "grad_norm": 0.010473281145095825, "learning_rate": 3.799358416046498e-05, "loss": 0.32777354717254636, "step": 144810 }, { "epoch": 0.621742527669732, "grad_norm": 1.7217943668365479, "learning_rate": 3.798927244034736e-05, "loss": 0.09053964614868164, "step": 144820 }, { "epoch": 0.6217854597597521, "grad_norm": 0.032146602869033813, "learning_rate": 3.798496072022973e-05, "loss": 0.33871643543243407, "step": 144830 }, { "epoch": 0.621828391849772, "grad_norm": 1.690449595451355, "learning_rate": 3.798064900011211e-05, "loss": 0.22544546127319337, "step": 144840 }, { "epoch": 0.621871323939792, "grad_norm": 0.03286479413509369, "learning_rate": 3.7976337279994485e-05, "loss": 0.2782609224319458, "step": 144850 }, { "epoch": 0.6219142560298121, "grad_norm": 0.22668150067329407, "learning_rate": 3.797202555987686e-05, "loss": 0.09379579424858094, "step": 144860 }, { "epoch": 0.621957188119832, "grad_norm": 1.2573636770248413, "learning_rate": 3.796771383975923e-05, "loss": 0.26519174575805665, "step": 144870 }, { "epoch": 0.6220001202098521, "grad_norm": 0.35459011793136597, "learning_rate": 3.796340211964161e-05, "loss": 0.2879498958587646, "step": 144880 }, { "epoch": 0.6220430522998721, "grad_norm": 1.8405866622924805, "learning_rate": 3.795909039952399e-05, "loss": 0.42113757133483887, "step": 144890 }, { "epoch": 0.622085984389892, "grad_norm": 0.3268384635448456, "learning_rate": 3.7954778679406365e-05, "loss": 0.21574442386627196, "step": 144900 }, { "epoch": 0.6221289164799121, "grad_norm": 0.0016373234102502465, "learning_rate": 3.7950466959288735e-05, "loss": 0.26897358894348145, "step": 144910 }, { "epoch": 0.6221718485699321, "grad_norm": 0.0009527649381197989, "learning_rate": 3.794615523917112e-05, "loss": 0.29565980434417727, "step": 144920 }, { "epoch": 0.6222147806599521, "grad_norm": 1.7517390251159668, "learning_rate": 3.79418435190535e-05, "loss": 0.07204756736755372, "step": 144930 }, { "epoch": 0.6222577127499721, "grad_norm": 2.711318016052246, "learning_rate": 3.7937531798935874e-05, "loss": 0.4752767562866211, "step": 144940 }, { "epoch": 0.6223006448399921, "grad_norm": 1.5553025007247925, "learning_rate": 3.7933220078818245e-05, "loss": 0.21367301940917968, "step": 144950 }, { "epoch": 0.6223435769300121, "grad_norm": 0.0016237174859270453, "learning_rate": 3.792890835870062e-05, "loss": 0.08536785244941711, "step": 144960 }, { "epoch": 0.6223865090200321, "grad_norm": 0.015023069456219673, "learning_rate": 3.7924596638583e-05, "loss": 0.26001734733581544, "step": 144970 }, { "epoch": 0.6224294411100522, "grad_norm": 0.001799068064428866, "learning_rate": 3.792028491846538e-05, "loss": 0.193844997882843, "step": 144980 }, { "epoch": 0.6224723732000721, "grad_norm": 0.15669679641723633, "learning_rate": 3.791597319834775e-05, "loss": 0.1980237603187561, "step": 144990 }, { "epoch": 0.6225153052900921, "grad_norm": 0.05634564161300659, "learning_rate": 3.7911661478230125e-05, "loss": 0.2074127435684204, "step": 145000 }, { "epoch": 0.6225153052900921, "eval_loss": 0.40026023983955383, "eval_runtime": 27.2668, "eval_samples_per_second": 3.667, "eval_steps_per_second": 3.667, "step": 145000 }, { "epoch": 0.6225582373801122, "grad_norm": 0.06716686487197876, "learning_rate": 3.79073497581125e-05, "loss": 0.21744098663330078, "step": 145010 }, { "epoch": 0.6226011694701321, "grad_norm": 0.043255776166915894, "learning_rate": 3.790303803799488e-05, "loss": 0.13981914520263672, "step": 145020 }, { "epoch": 0.6226441015601522, "grad_norm": 0.4962061047554016, "learning_rate": 3.789872631787726e-05, "loss": 0.19934669733047486, "step": 145030 }, { "epoch": 0.6226870336501722, "grad_norm": 0.00197092373855412, "learning_rate": 3.7894414597759634e-05, "loss": 0.22532355785369873, "step": 145040 }, { "epoch": 0.6227299657401921, "grad_norm": 0.6518476009368896, "learning_rate": 3.789010287764201e-05, "loss": 0.2599307060241699, "step": 145050 }, { "epoch": 0.6227728978302122, "grad_norm": 1.8860397338867188, "learning_rate": 3.788579115752439e-05, "loss": 0.13637256622314453, "step": 145060 }, { "epoch": 0.6228158299202322, "grad_norm": 0.07711993902921677, "learning_rate": 3.788147943740676e-05, "loss": 0.047610118985176086, "step": 145070 }, { "epoch": 0.6228587620102521, "grad_norm": 1.4419951438903809, "learning_rate": 3.7877167717289137e-05, "loss": 0.1328027367591858, "step": 145080 }, { "epoch": 0.6229016941002722, "grad_norm": 0.004267912823706865, "learning_rate": 3.7872855997171514e-05, "loss": 0.3583818435668945, "step": 145090 }, { "epoch": 0.6229446261902922, "grad_norm": 3.1124303340911865, "learning_rate": 3.786854427705389e-05, "loss": 0.2011735200881958, "step": 145100 }, { "epoch": 0.6229875582803122, "grad_norm": 1.898911952972412, "learning_rate": 3.786423255693626e-05, "loss": 0.31902217864990234, "step": 145110 }, { "epoch": 0.6230304903703322, "grad_norm": 0.10020571947097778, "learning_rate": 3.785992083681864e-05, "loss": 0.06902580857276916, "step": 145120 }, { "epoch": 0.6230734224603522, "grad_norm": 2.0357489585876465, "learning_rate": 3.7855609116701016e-05, "loss": 0.1981680989265442, "step": 145130 }, { "epoch": 0.6231163545503722, "grad_norm": 0.03490392118692398, "learning_rate": 3.7851297396583394e-05, "loss": 0.35762135982513427, "step": 145140 }, { "epoch": 0.6231592866403922, "grad_norm": 0.012586482800543308, "learning_rate": 3.784698567646577e-05, "loss": 0.14604350328445434, "step": 145150 }, { "epoch": 0.6232022187304123, "grad_norm": 0.06158836930990219, "learning_rate": 3.784267395634815e-05, "loss": 0.10603926181793213, "step": 145160 }, { "epoch": 0.6232451508204322, "grad_norm": 3.0967295169830322, "learning_rate": 3.7838362236230526e-05, "loss": 0.06373662352561951, "step": 145170 }, { "epoch": 0.6232880829104522, "grad_norm": 1.3756184577941895, "learning_rate": 3.78340505161129e-05, "loss": 0.2651843547821045, "step": 145180 }, { "epoch": 0.6233310150004723, "grad_norm": 2.4878203868865967, "learning_rate": 3.782973879599528e-05, "loss": 0.383553147315979, "step": 145190 }, { "epoch": 0.6233739470904922, "grad_norm": 0.015998877584934235, "learning_rate": 3.782542707587765e-05, "loss": 0.2149120092391968, "step": 145200 }, { "epoch": 0.6234168791805123, "grad_norm": 0.640251636505127, "learning_rate": 3.782111535576003e-05, "loss": 0.19925031661987305, "step": 145210 }, { "epoch": 0.6234598112705323, "grad_norm": 1.6535863876342773, "learning_rate": 3.7816803635642406e-05, "loss": 0.2155580997467041, "step": 145220 }, { "epoch": 0.6235027433605522, "grad_norm": 1.1984570026397705, "learning_rate": 3.781249191552478e-05, "loss": 0.19622409343719482, "step": 145230 }, { "epoch": 0.6235456754505723, "grad_norm": 0.0030771929305046797, "learning_rate": 3.7808180195407154e-05, "loss": 0.04981268346309662, "step": 145240 }, { "epoch": 0.6235886075405923, "grad_norm": 4.39127779006958, "learning_rate": 3.780386847528953e-05, "loss": 0.33121452331542967, "step": 145250 }, { "epoch": 0.6236315396306124, "grad_norm": 0.059484440833330154, "learning_rate": 3.779955675517191e-05, "loss": 0.2900093078613281, "step": 145260 }, { "epoch": 0.6236744717206323, "grad_norm": 0.011092513799667358, "learning_rate": 3.7795245035054286e-05, "loss": 0.17457928657531738, "step": 145270 }, { "epoch": 0.6237174038106523, "grad_norm": 0.025852881371974945, "learning_rate": 3.779093331493666e-05, "loss": 0.0900078535079956, "step": 145280 }, { "epoch": 0.6237603359006724, "grad_norm": 2.27447247505188, "learning_rate": 3.778662159481904e-05, "loss": 0.30752589702606203, "step": 145290 }, { "epoch": 0.6238032679906923, "grad_norm": 10.999873161315918, "learning_rate": 3.778230987470142e-05, "loss": 0.2985308408737183, "step": 145300 }, { "epoch": 0.6238462000807123, "grad_norm": 0.021880190819501877, "learning_rate": 3.7777998154583795e-05, "loss": 0.04139164686203003, "step": 145310 }, { "epoch": 0.6238891321707324, "grad_norm": 0.012969445437192917, "learning_rate": 3.7773686434466166e-05, "loss": 0.34741475582122805, "step": 145320 }, { "epoch": 0.6239320642607523, "grad_norm": 0.012905898503959179, "learning_rate": 3.776937471434854e-05, "loss": 0.0011121029034256934, "step": 145330 }, { "epoch": 0.6239749963507724, "grad_norm": 2.579164981842041, "learning_rate": 3.776506299423092e-05, "loss": 0.25848629474639895, "step": 145340 }, { "epoch": 0.6240179284407924, "grad_norm": 0.2724818289279938, "learning_rate": 3.77607512741133e-05, "loss": 0.20009520053863525, "step": 145350 }, { "epoch": 0.6240608605308123, "grad_norm": 2.1852598190307617, "learning_rate": 3.775643955399567e-05, "loss": 0.2844280242919922, "step": 145360 }, { "epoch": 0.6241037926208324, "grad_norm": 0.014285706914961338, "learning_rate": 3.7752127833878046e-05, "loss": 0.2562817335128784, "step": 145370 }, { "epoch": 0.6241467247108524, "grad_norm": 0.06769892573356628, "learning_rate": 3.774781611376042e-05, "loss": 0.015678460896015167, "step": 145380 }, { "epoch": 0.6241896568008724, "grad_norm": 0.02668279968202114, "learning_rate": 3.77435043936428e-05, "loss": 0.24262983798980714, "step": 145390 }, { "epoch": 0.6242325888908924, "grad_norm": 0.052530985325574875, "learning_rate": 3.773919267352518e-05, "loss": 0.18415896892547606, "step": 145400 }, { "epoch": 0.6242755209809124, "grad_norm": 1.8255459070205688, "learning_rate": 3.7734880953407555e-05, "loss": 0.3908602237701416, "step": 145410 }, { "epoch": 0.6243184530709324, "grad_norm": 0.0010746768675744534, "learning_rate": 3.773056923328993e-05, "loss": 0.13366581201553346, "step": 145420 }, { "epoch": 0.6243613851609524, "grad_norm": 4.105757713317871, "learning_rate": 3.772625751317231e-05, "loss": 0.3110255479812622, "step": 145430 }, { "epoch": 0.6244043172509725, "grad_norm": 0.027147723361849785, "learning_rate": 3.772194579305468e-05, "loss": 0.29846107959747314, "step": 145440 }, { "epoch": 0.6244472493409924, "grad_norm": 0.4245328903198242, "learning_rate": 3.771763407293706e-05, "loss": 0.10092895030975342, "step": 145450 }, { "epoch": 0.6244901814310124, "grad_norm": 1.827056884765625, "learning_rate": 3.7713322352819435e-05, "loss": 0.06381365656852722, "step": 145460 }, { "epoch": 0.6245331135210325, "grad_norm": 0.18593548238277435, "learning_rate": 3.770901063270181e-05, "loss": 0.14583500623703002, "step": 145470 }, { "epoch": 0.6245760456110524, "grad_norm": 0.17885200679302216, "learning_rate": 3.770469891258418e-05, "loss": 0.11033551692962647, "step": 145480 }, { "epoch": 0.6246189777010724, "grad_norm": 0.005315417889505625, "learning_rate": 3.770038719246656e-05, "loss": 0.22789452075958253, "step": 145490 }, { "epoch": 0.6246619097910925, "grad_norm": 0.004930767696350813, "learning_rate": 3.769607547234894e-05, "loss": 0.12441198825836182, "step": 145500 }, { "epoch": 0.6247048418811124, "grad_norm": 2.7298367023468018, "learning_rate": 3.769176375223132e-05, "loss": 0.2080162525177002, "step": 145510 }, { "epoch": 0.6247477739711325, "grad_norm": 0.01054426096379757, "learning_rate": 3.76874520321137e-05, "loss": 0.16831830739974976, "step": 145520 }, { "epoch": 0.6247907060611525, "grad_norm": 9.6473970413208, "learning_rate": 3.768314031199607e-05, "loss": 0.2800298690795898, "step": 145530 }, { "epoch": 0.6248336381511724, "grad_norm": 0.004264887422323227, "learning_rate": 3.767882859187845e-05, "loss": 0.07018274664878846, "step": 145540 }, { "epoch": 0.6248765702411925, "grad_norm": 2.2102596759796143, "learning_rate": 3.7674516871760824e-05, "loss": 0.47200469970703124, "step": 145550 }, { "epoch": 0.6249195023312125, "grad_norm": 0.015142920427024364, "learning_rate": 3.76702051516432e-05, "loss": 0.21918439865112305, "step": 145560 }, { "epoch": 0.6249624344212324, "grad_norm": 0.14599426090717316, "learning_rate": 3.766589343152557e-05, "loss": 0.2167109251022339, "step": 145570 }, { "epoch": 0.6250053665112525, "grad_norm": 0.3270648419857025, "learning_rate": 3.766158171140795e-05, "loss": 0.31240594387054443, "step": 145580 }, { "epoch": 0.6250482986012725, "grad_norm": 0.04953808709979057, "learning_rate": 3.765726999129033e-05, "loss": 0.21056201457977294, "step": 145590 }, { "epoch": 0.6250912306912925, "grad_norm": 0.033134374767541885, "learning_rate": 3.7652958271172704e-05, "loss": 0.1267376184463501, "step": 145600 }, { "epoch": 0.6251341627813125, "grad_norm": 0.04095722362399101, "learning_rate": 3.7648646551055075e-05, "loss": 0.053391391038894655, "step": 145610 }, { "epoch": 0.6251770948713326, "grad_norm": 0.11082349717617035, "learning_rate": 3.764433483093746e-05, "loss": 0.07753645181655884, "step": 145620 }, { "epoch": 0.6252200269613525, "grad_norm": 0.0397820807993412, "learning_rate": 3.7640023110819836e-05, "loss": 0.0551756739616394, "step": 145630 }, { "epoch": 0.6252629590513725, "grad_norm": 0.08418507874011993, "learning_rate": 3.7635711390702214e-05, "loss": 0.23780393600463867, "step": 145640 }, { "epoch": 0.6253058911413926, "grad_norm": 0.1522897332906723, "learning_rate": 3.7631399670584584e-05, "loss": 0.15822170972824096, "step": 145650 }, { "epoch": 0.6253488232314125, "grad_norm": 10.029183387756348, "learning_rate": 3.762708795046696e-05, "loss": 0.269795823097229, "step": 145660 }, { "epoch": 0.6253917553214325, "grad_norm": 0.08777247369289398, "learning_rate": 3.762277623034934e-05, "loss": 0.31310975551605225, "step": 145670 }, { "epoch": 0.6254346874114526, "grad_norm": 0.007045544218271971, "learning_rate": 3.7618464510231716e-05, "loss": 0.15862007141113282, "step": 145680 }, { "epoch": 0.6254776195014726, "grad_norm": 0.002723569516092539, "learning_rate": 3.761415279011409e-05, "loss": 0.25944039821624754, "step": 145690 }, { "epoch": 0.6255205515914926, "grad_norm": 0.0022276772651821375, "learning_rate": 3.7609841069996464e-05, "loss": 0.2553360939025879, "step": 145700 }, { "epoch": 0.6255634836815126, "grad_norm": 0.009006328880786896, "learning_rate": 3.760552934987884e-05, "loss": 0.0817391335964203, "step": 145710 }, { "epoch": 0.6256064157715326, "grad_norm": 1.0603159666061401, "learning_rate": 3.760121762976122e-05, "loss": 0.27546770572662355, "step": 145720 }, { "epoch": 0.6256493478615526, "grad_norm": 0.043728139251470566, "learning_rate": 3.7596905909643596e-05, "loss": 0.2361830234527588, "step": 145730 }, { "epoch": 0.6256922799515726, "grad_norm": 0.004647698253393173, "learning_rate": 3.7592594189525973e-05, "loss": 0.17039880752563477, "step": 145740 }, { "epoch": 0.6257352120415927, "grad_norm": 1.4827666282653809, "learning_rate": 3.758828246940835e-05, "loss": 0.2931626558303833, "step": 145750 }, { "epoch": 0.6257781441316126, "grad_norm": 0.006912170443683863, "learning_rate": 3.758397074929073e-05, "loss": 0.17341002225875854, "step": 145760 }, { "epoch": 0.6258210762216326, "grad_norm": 0.0031075282022356987, "learning_rate": 3.75796590291731e-05, "loss": 0.2037494421005249, "step": 145770 }, { "epoch": 0.6258640083116527, "grad_norm": 3.5641911029815674, "learning_rate": 3.7575347309055476e-05, "loss": 0.12922029495239257, "step": 145780 }, { "epoch": 0.6259069404016726, "grad_norm": 1.2592276334762573, "learning_rate": 3.757103558893785e-05, "loss": 0.2730496883392334, "step": 145790 }, { "epoch": 0.6259498724916926, "grad_norm": 1.6755534410476685, "learning_rate": 3.756672386882023e-05, "loss": 0.2942385196685791, "step": 145800 }, { "epoch": 0.6259928045817127, "grad_norm": 0.32389035820961, "learning_rate": 3.75624121487026e-05, "loss": 0.18852608203887938, "step": 145810 }, { "epoch": 0.6260357366717326, "grad_norm": 0.0004410554829519242, "learning_rate": 3.755810042858498e-05, "loss": 0.16696833372116088, "step": 145820 }, { "epoch": 0.6260786687617527, "grad_norm": 0.15728677809238434, "learning_rate": 3.7553788708467356e-05, "loss": 0.26857168674468995, "step": 145830 }, { "epoch": 0.6261216008517727, "grad_norm": 5.143691062927246, "learning_rate": 3.754947698834973e-05, "loss": 0.28821985721588134, "step": 145840 }, { "epoch": 0.6261645329417926, "grad_norm": 1.2966766357421875, "learning_rate": 3.754516526823211e-05, "loss": 0.13320962190628052, "step": 145850 }, { "epoch": 0.6262074650318127, "grad_norm": 0.861115038394928, "learning_rate": 3.754085354811449e-05, "loss": 0.17039065361022948, "step": 145860 }, { "epoch": 0.6262503971218327, "grad_norm": 0.37558168172836304, "learning_rate": 3.7536541827996865e-05, "loss": 0.21755945682525635, "step": 145870 }, { "epoch": 0.6262933292118527, "grad_norm": 0.0026671243831515312, "learning_rate": 3.753223010787924e-05, "loss": 0.18931851387023926, "step": 145880 }, { "epoch": 0.6263362613018727, "grad_norm": 5.478182315826416, "learning_rate": 3.752791838776162e-05, "loss": 0.19699089527130126, "step": 145890 }, { "epoch": 0.6263791933918927, "grad_norm": 0.21105577051639557, "learning_rate": 3.752360666764399e-05, "loss": 0.10836315155029297, "step": 145900 }, { "epoch": 0.6264221254819127, "grad_norm": 0.004369442816823721, "learning_rate": 3.751929494752637e-05, "loss": 0.265771222114563, "step": 145910 }, { "epoch": 0.6264650575719327, "grad_norm": 0.028172479942440987, "learning_rate": 3.7514983227408745e-05, "loss": 0.2676279067993164, "step": 145920 }, { "epoch": 0.6265079896619528, "grad_norm": 2.269669771194458, "learning_rate": 3.751067150729112e-05, "loss": 0.2712892532348633, "step": 145930 }, { "epoch": 0.6265509217519727, "grad_norm": 0.005145613569766283, "learning_rate": 3.750635978717349e-05, "loss": 0.11162854433059692, "step": 145940 }, { "epoch": 0.6265938538419927, "grad_norm": 3.3848986625671387, "learning_rate": 3.750204806705587e-05, "loss": 0.04469236433506012, "step": 145950 }, { "epoch": 0.6266367859320128, "grad_norm": 0.03765769302845001, "learning_rate": 3.749773634693825e-05, "loss": 0.12097903490066528, "step": 145960 }, { "epoch": 0.6266797180220327, "grad_norm": 1.5613038539886475, "learning_rate": 3.7493424626820625e-05, "loss": 0.29975531101226804, "step": 145970 }, { "epoch": 0.6267226501120527, "grad_norm": 0.8295878767967224, "learning_rate": 3.7489112906703e-05, "loss": 0.17278884649276732, "step": 145980 }, { "epoch": 0.6267655822020728, "grad_norm": 0.02731485851109028, "learning_rate": 3.748480118658538e-05, "loss": 0.19173781871795653, "step": 145990 }, { "epoch": 0.6268085142920927, "grad_norm": 0.2628108561038971, "learning_rate": 3.748048946646776e-05, "loss": 0.220833158493042, "step": 146000 }, { "epoch": 0.6268085142920927, "eval_loss": 0.4031921625137329, "eval_runtime": 27.2015, "eval_samples_per_second": 3.676, "eval_steps_per_second": 3.676, "step": 146000 }, { "epoch": 0.6268514463821128, "grad_norm": 1.9009525775909424, "learning_rate": 3.7476177746350135e-05, "loss": 0.181487512588501, "step": 146010 }, { "epoch": 0.6268943784721328, "grad_norm": 0.006618720479309559, "learning_rate": 3.7471866026232505e-05, "loss": 0.28319597244262695, "step": 146020 }, { "epoch": 0.6269373105621527, "grad_norm": 3.7840189933776855, "learning_rate": 3.746755430611488e-05, "loss": 0.47288169860839846, "step": 146030 }, { "epoch": 0.6269802426521728, "grad_norm": 1.4764827489852905, "learning_rate": 3.746324258599726e-05, "loss": 0.16911323070526124, "step": 146040 }, { "epoch": 0.6270231747421928, "grad_norm": 0.2975632846355438, "learning_rate": 3.745893086587964e-05, "loss": 0.19954975843429565, "step": 146050 }, { "epoch": 0.6270661068322128, "grad_norm": 0.3340650796890259, "learning_rate": 3.745461914576201e-05, "loss": 0.22841260433197022, "step": 146060 }, { "epoch": 0.6271090389222328, "grad_norm": 0.06769564002752304, "learning_rate": 3.7450307425644385e-05, "loss": 0.03349553942680359, "step": 146070 }, { "epoch": 0.6271519710122528, "grad_norm": 0.0012691410956904292, "learning_rate": 3.744599570552676e-05, "loss": 0.12927672863006592, "step": 146080 }, { "epoch": 0.6271949031022728, "grad_norm": 3.187361717224121, "learning_rate": 3.744168398540914e-05, "loss": 0.27967898845672606, "step": 146090 }, { "epoch": 0.6272378351922928, "grad_norm": 0.11216457933187485, "learning_rate": 3.743737226529152e-05, "loss": 0.27449731826782225, "step": 146100 }, { "epoch": 0.6272807672823129, "grad_norm": 0.11325439810752869, "learning_rate": 3.7433060545173894e-05, "loss": 0.18230938911437988, "step": 146110 }, { "epoch": 0.6273236993723329, "grad_norm": 0.8538678884506226, "learning_rate": 3.742874882505627e-05, "loss": 0.18734936714172362, "step": 146120 }, { "epoch": 0.6273666314623528, "grad_norm": 0.010642403736710548, "learning_rate": 3.742443710493865e-05, "loss": 0.10937390327453614, "step": 146130 }, { "epoch": 0.6274095635523729, "grad_norm": 9.793993949890137, "learning_rate": 3.742012538482102e-05, "loss": 0.3626396894454956, "step": 146140 }, { "epoch": 0.6274524956423929, "grad_norm": 1.1978455781936646, "learning_rate": 3.74158136647034e-05, "loss": 0.07268427610397339, "step": 146150 }, { "epoch": 0.6274954277324128, "grad_norm": 2.1169817447662354, "learning_rate": 3.7411501944585774e-05, "loss": 0.12902868986129762, "step": 146160 }, { "epoch": 0.6275383598224329, "grad_norm": 6.555164813995361, "learning_rate": 3.740719022446815e-05, "loss": 0.45094785690307615, "step": 146170 }, { "epoch": 0.6275812919124529, "grad_norm": 0.001882154494524002, "learning_rate": 3.740287850435052e-05, "loss": 0.23653554916381836, "step": 146180 }, { "epoch": 0.6276242240024729, "grad_norm": 0.026824140921235085, "learning_rate": 3.73985667842329e-05, "loss": 0.24134407043457032, "step": 146190 }, { "epoch": 0.6276671560924929, "grad_norm": 0.005355069879442453, "learning_rate": 3.739425506411528e-05, "loss": 0.28949851989746095, "step": 146200 }, { "epoch": 0.627710088182513, "grad_norm": 0.018241219222545624, "learning_rate": 3.7389943343997654e-05, "loss": 0.29019651412963865, "step": 146210 }, { "epoch": 0.6277530202725329, "grad_norm": 0.021457862108945847, "learning_rate": 3.738563162388004e-05, "loss": 0.2145296573638916, "step": 146220 }, { "epoch": 0.6277959523625529, "grad_norm": 0.023087697103619576, "learning_rate": 3.738131990376241e-05, "loss": 0.006541821360588074, "step": 146230 }, { "epoch": 0.627838884452573, "grad_norm": 0.07880302518606186, "learning_rate": 3.7377008183644786e-05, "loss": 0.16138269901275634, "step": 146240 }, { "epoch": 0.6278818165425929, "grad_norm": 1.7311794757843018, "learning_rate": 3.7372696463527164e-05, "loss": 0.3214499235153198, "step": 146250 }, { "epoch": 0.6279247486326129, "grad_norm": 0.009020458906888962, "learning_rate": 3.736838474340954e-05, "loss": 0.11787396669387817, "step": 146260 }, { "epoch": 0.627967680722633, "grad_norm": 1.642788052558899, "learning_rate": 3.736407302329191e-05, "loss": 0.2313527822494507, "step": 146270 }, { "epoch": 0.6280106128126529, "grad_norm": 0.03921899199485779, "learning_rate": 3.735976130317429e-05, "loss": 0.16721705198287964, "step": 146280 }, { "epoch": 0.628053544902673, "grad_norm": 2.4101881980895996, "learning_rate": 3.7355449583056666e-05, "loss": 0.2695363998413086, "step": 146290 }, { "epoch": 0.628096476992693, "grad_norm": 2.3433501720428467, "learning_rate": 3.7351137862939044e-05, "loss": 0.28004429340362547, "step": 146300 }, { "epoch": 0.6281394090827129, "grad_norm": 0.7007365822792053, "learning_rate": 3.7346826142821414e-05, "loss": 0.1107077956199646, "step": 146310 }, { "epoch": 0.628182341172733, "grad_norm": 0.019995957612991333, "learning_rate": 3.734251442270379e-05, "loss": 0.005020419508218766, "step": 146320 }, { "epoch": 0.628225273262753, "grad_norm": 0.0074407197535037994, "learning_rate": 3.7338202702586176e-05, "loss": 0.2880034208297729, "step": 146330 }, { "epoch": 0.628268205352773, "grad_norm": 40.73788833618164, "learning_rate": 3.733389098246855e-05, "loss": 0.4160567283630371, "step": 146340 }, { "epoch": 0.628311137442793, "grad_norm": 0.009502614848315716, "learning_rate": 3.7329579262350924e-05, "loss": 0.14252859354019165, "step": 146350 }, { "epoch": 0.628354069532813, "grad_norm": 2.465653896331787, "learning_rate": 3.73252675422333e-05, "loss": 0.37836034297943116, "step": 146360 }, { "epoch": 0.628397001622833, "grad_norm": 0.012200412340462208, "learning_rate": 3.732095582211568e-05, "loss": 0.08112860321998597, "step": 146370 }, { "epoch": 0.628439933712853, "grad_norm": 0.0012890741927549243, "learning_rate": 3.7316644101998056e-05, "loss": 0.13902195692062377, "step": 146380 }, { "epoch": 0.628482865802873, "grad_norm": 2.2479848861694336, "learning_rate": 3.7312332381880426e-05, "loss": 0.1304740071296692, "step": 146390 }, { "epoch": 0.628525797892893, "grad_norm": 0.45096537470817566, "learning_rate": 3.7308020661762804e-05, "loss": 0.14930739402770996, "step": 146400 }, { "epoch": 0.628568729982913, "grad_norm": 0.0019203760894015431, "learning_rate": 3.730370894164518e-05, "loss": 0.1763664960861206, "step": 146410 }, { "epoch": 0.6286116620729331, "grad_norm": 1.2747520208358765, "learning_rate": 3.729939722152756e-05, "loss": 0.1266668438911438, "step": 146420 }, { "epoch": 0.628654594162953, "grad_norm": 4.500255107879639, "learning_rate": 3.729508550140993e-05, "loss": 0.21049704551696777, "step": 146430 }, { "epoch": 0.628697526252973, "grad_norm": 0.0036547803319990635, "learning_rate": 3.729077378129231e-05, "loss": 0.07864784598350524, "step": 146440 }, { "epoch": 0.6287404583429931, "grad_norm": 0.011433214880526066, "learning_rate": 3.728646206117469e-05, "loss": 0.3140164136886597, "step": 146450 }, { "epoch": 0.628783390433013, "grad_norm": 0.3247806429862976, "learning_rate": 3.728215034105707e-05, "loss": 0.10256623029708863, "step": 146460 }, { "epoch": 0.628826322523033, "grad_norm": 0.0035158980172127485, "learning_rate": 3.727783862093944e-05, "loss": 0.11457140445709228, "step": 146470 }, { "epoch": 0.6288692546130531, "grad_norm": 0.008701084181666374, "learning_rate": 3.7273526900821815e-05, "loss": 0.1851799964904785, "step": 146480 }, { "epoch": 0.628912186703073, "grad_norm": 0.009318762458860874, "learning_rate": 3.726921518070419e-05, "loss": 0.19677205085754396, "step": 146490 }, { "epoch": 0.6289551187930931, "grad_norm": 3.374988317489624, "learning_rate": 3.726490346058657e-05, "loss": 0.21994738578796386, "step": 146500 }, { "epoch": 0.6289980508831131, "grad_norm": 0.0007356581627391279, "learning_rate": 3.726059174046894e-05, "loss": 0.07651254534721375, "step": 146510 }, { "epoch": 0.629040982973133, "grad_norm": 1.4155685901641846, "learning_rate": 3.725628002035132e-05, "loss": 0.15071755647659302, "step": 146520 }, { "epoch": 0.6290839150631531, "grad_norm": 10.228381156921387, "learning_rate": 3.7251968300233695e-05, "loss": 0.23199965953826904, "step": 146530 }, { "epoch": 0.6291268471531731, "grad_norm": 0.028175201267004013, "learning_rate": 3.724765658011607e-05, "loss": 0.19142227172851561, "step": 146540 }, { "epoch": 0.6291697792431932, "grad_norm": 0.024978064000606537, "learning_rate": 3.724334485999845e-05, "loss": 0.25046958923339846, "step": 146550 }, { "epoch": 0.6292127113332131, "grad_norm": 0.09602590650320053, "learning_rate": 3.723903313988083e-05, "loss": 0.2911947250366211, "step": 146560 }, { "epoch": 0.6292556434232331, "grad_norm": 0.024032101035118103, "learning_rate": 3.7234721419763205e-05, "loss": 0.3351039171218872, "step": 146570 }, { "epoch": 0.6292985755132532, "grad_norm": 0.0016572453314438462, "learning_rate": 3.723040969964558e-05, "loss": 0.1245275855064392, "step": 146580 }, { "epoch": 0.6293415076032731, "grad_norm": 0.12577351927757263, "learning_rate": 3.722609797952796e-05, "loss": 0.3349307537078857, "step": 146590 }, { "epoch": 0.6293844396932932, "grad_norm": 0.10792262107133865, "learning_rate": 3.722178625941033e-05, "loss": 0.2238081932067871, "step": 146600 }, { "epoch": 0.6294273717833132, "grad_norm": 0.001989643555134535, "learning_rate": 3.721747453929271e-05, "loss": 0.11774779558181762, "step": 146610 }, { "epoch": 0.6294703038733331, "grad_norm": 1.7697548866271973, "learning_rate": 3.7213162819175085e-05, "loss": 0.18405303955078126, "step": 146620 }, { "epoch": 0.6295132359633532, "grad_norm": 0.005540918558835983, "learning_rate": 3.720885109905746e-05, "loss": 0.14159350395202636, "step": 146630 }, { "epoch": 0.6295561680533732, "grad_norm": 1.2075954675674438, "learning_rate": 3.720453937893983e-05, "loss": 0.38316285610198975, "step": 146640 }, { "epoch": 0.6295991001433932, "grad_norm": 1.5206912755966187, "learning_rate": 3.720022765882221e-05, "loss": 0.30779242515563965, "step": 146650 }, { "epoch": 0.6296420322334132, "grad_norm": 0.0024733147583901882, "learning_rate": 3.719591593870459e-05, "loss": 0.2867335081100464, "step": 146660 }, { "epoch": 0.6296849643234332, "grad_norm": 0.029516298323869705, "learning_rate": 3.7191604218586965e-05, "loss": 0.15923948287963868, "step": 146670 }, { "epoch": 0.6297278964134532, "grad_norm": 3.6430470943450928, "learning_rate": 3.718729249846934e-05, "loss": 0.2126899242401123, "step": 146680 }, { "epoch": 0.6297708285034732, "grad_norm": 0.032296113669872284, "learning_rate": 3.718298077835172e-05, "loss": 0.2337803363800049, "step": 146690 }, { "epoch": 0.6298137605934933, "grad_norm": 0.13325929641723633, "learning_rate": 3.71786690582341e-05, "loss": 0.1753853440284729, "step": 146700 }, { "epoch": 0.6298566926835132, "grad_norm": 1.2130388021469116, "learning_rate": 3.7174357338116474e-05, "loss": 0.14811716079711915, "step": 146710 }, { "epoch": 0.6298996247735332, "grad_norm": 0.9190666675567627, "learning_rate": 3.7170045617998845e-05, "loss": 0.22362709045410156, "step": 146720 }, { "epoch": 0.6299425568635533, "grad_norm": 0.0004198495007585734, "learning_rate": 3.716573389788122e-05, "loss": 0.24469988346099852, "step": 146730 }, { "epoch": 0.6299854889535732, "grad_norm": 0.004793898668140173, "learning_rate": 3.71614221777636e-05, "loss": 0.3914718389511108, "step": 146740 }, { "epoch": 0.6300284210435932, "grad_norm": 0.07496125996112823, "learning_rate": 3.715711045764598e-05, "loss": 0.1663152575492859, "step": 146750 }, { "epoch": 0.6300713531336133, "grad_norm": 0.0066208732314407825, "learning_rate": 3.715279873752835e-05, "loss": 0.09344573020935058, "step": 146760 }, { "epoch": 0.6301142852236332, "grad_norm": 1.0700713396072388, "learning_rate": 3.7148487017410725e-05, "loss": 0.23460733890533447, "step": 146770 }, { "epoch": 0.6301572173136533, "grad_norm": 1.1751123666763306, "learning_rate": 3.71441752972931e-05, "loss": 0.14351837635040282, "step": 146780 }, { "epoch": 0.6302001494036733, "grad_norm": 1.113236665725708, "learning_rate": 3.713986357717548e-05, "loss": 0.2353208065032959, "step": 146790 }, { "epoch": 0.6302430814936932, "grad_norm": 0.01679939031600952, "learning_rate": 3.7135551857057857e-05, "loss": 0.19200507402420045, "step": 146800 }, { "epoch": 0.6302860135837133, "grad_norm": 0.0016825655475258827, "learning_rate": 3.7131240136940234e-05, "loss": 0.26706829071044924, "step": 146810 }, { "epoch": 0.6303289456737333, "grad_norm": 0.033295415341854095, "learning_rate": 3.712692841682261e-05, "loss": 0.3771516799926758, "step": 146820 }, { "epoch": 0.6303718777637533, "grad_norm": 0.2702064514160156, "learning_rate": 3.712261669670499e-05, "loss": 0.4259639263153076, "step": 146830 }, { "epoch": 0.6304148098537733, "grad_norm": 0.035043902695178986, "learning_rate": 3.711830497658736e-05, "loss": 0.047640106081962584, "step": 146840 }, { "epoch": 0.6304577419437933, "grad_norm": 1.3876519203186035, "learning_rate": 3.7113993256469737e-05, "loss": 0.41136393547058103, "step": 146850 }, { "epoch": 0.6305006740338133, "grad_norm": 1.9727435111999512, "learning_rate": 3.7109681536352114e-05, "loss": 0.08784054517745972, "step": 146860 }, { "epoch": 0.6305436061238333, "grad_norm": 0.4474978744983673, "learning_rate": 3.710536981623449e-05, "loss": 0.09354345798492432, "step": 146870 }, { "epoch": 0.6305865382138534, "grad_norm": 0.26996567845344543, "learning_rate": 3.710105809611686e-05, "loss": 0.21056702136993408, "step": 146880 }, { "epoch": 0.6306294703038733, "grad_norm": 0.02897840365767479, "learning_rate": 3.709674637599924e-05, "loss": 0.274979829788208, "step": 146890 }, { "epoch": 0.6306724023938933, "grad_norm": 1.4435681104660034, "learning_rate": 3.7092434655881616e-05, "loss": 0.22395720481872558, "step": 146900 }, { "epoch": 0.6307153344839134, "grad_norm": 0.37976908683776855, "learning_rate": 3.7088122935763994e-05, "loss": 0.12538430690765381, "step": 146910 }, { "epoch": 0.6307582665739333, "grad_norm": 1.2855740785598755, "learning_rate": 3.708381121564637e-05, "loss": 0.3442651033401489, "step": 146920 }, { "epoch": 0.6308011986639533, "grad_norm": 9.116958618164062, "learning_rate": 3.707949949552875e-05, "loss": 0.28530521392822267, "step": 146930 }, { "epoch": 0.6308441307539734, "grad_norm": 0.021495725959539413, "learning_rate": 3.7075187775411126e-05, "loss": 0.3916192531585693, "step": 146940 }, { "epoch": 0.6308870628439933, "grad_norm": 0.09106861799955368, "learning_rate": 3.70708760552935e-05, "loss": 0.14217714071273804, "step": 146950 }, { "epoch": 0.6309299949340134, "grad_norm": 4.241379737854004, "learning_rate": 3.706656433517588e-05, "loss": 0.4013192653656006, "step": 146960 }, { "epoch": 0.6309729270240334, "grad_norm": 0.1723231077194214, "learning_rate": 3.706225261505825e-05, "loss": 0.08882884383201599, "step": 146970 }, { "epoch": 0.6310158591140534, "grad_norm": 1.0407248735427856, "learning_rate": 3.705794089494063e-05, "loss": 0.27390859127044676, "step": 146980 }, { "epoch": 0.6310587912040734, "grad_norm": 0.13997861742973328, "learning_rate": 3.7053629174823006e-05, "loss": 0.2324601173400879, "step": 146990 }, { "epoch": 0.6311017232940934, "grad_norm": 0.013284893706440926, "learning_rate": 3.704931745470538e-05, "loss": 0.3834493160247803, "step": 147000 }, { "epoch": 0.6311017232940934, "eval_loss": 0.3863438367843628, "eval_runtime": 27.1407, "eval_samples_per_second": 3.684, "eval_steps_per_second": 3.684, "step": 147000 }, { "epoch": 0.6311446553841135, "grad_norm": 2.8731067180633545, "learning_rate": 3.7045005734587754e-05, "loss": 0.1249500036239624, "step": 147010 }, { "epoch": 0.6311875874741334, "grad_norm": 1.8072718381881714, "learning_rate": 3.704069401447013e-05, "loss": 0.4470974922180176, "step": 147020 }, { "epoch": 0.6312305195641534, "grad_norm": 0.0028647701255977154, "learning_rate": 3.7036382294352515e-05, "loss": 0.295593786239624, "step": 147030 }, { "epoch": 0.6312734516541735, "grad_norm": 0.04063660278916359, "learning_rate": 3.703207057423489e-05, "loss": 0.12825417518615723, "step": 147040 }, { "epoch": 0.6313163837441934, "grad_norm": 0.043811146169900894, "learning_rate": 3.702775885411726e-05, "loss": 0.2365511178970337, "step": 147050 }, { "epoch": 0.6313593158342135, "grad_norm": 0.025705622509121895, "learning_rate": 3.702344713399964e-05, "loss": 0.26898367404937745, "step": 147060 }, { "epoch": 0.6314022479242335, "grad_norm": 0.04759734496474266, "learning_rate": 3.701913541388202e-05, "loss": 0.00582539290189743, "step": 147070 }, { "epoch": 0.6314451800142534, "grad_norm": 0.21706196665763855, "learning_rate": 3.7014823693764395e-05, "loss": 0.49407367706298827, "step": 147080 }, { "epoch": 0.6314881121042735, "grad_norm": 1.1993558406829834, "learning_rate": 3.7010511973646766e-05, "loss": 0.21412403583526612, "step": 147090 }, { "epoch": 0.6315310441942935, "grad_norm": 0.010109450668096542, "learning_rate": 3.700620025352914e-05, "loss": 0.008372948318719865, "step": 147100 }, { "epoch": 0.6315739762843134, "grad_norm": 0.32362470030784607, "learning_rate": 3.700188853341152e-05, "loss": 0.20660581588745117, "step": 147110 }, { "epoch": 0.6316169083743335, "grad_norm": 0.0003117761225439608, "learning_rate": 3.69975768132939e-05, "loss": 0.3196255683898926, "step": 147120 }, { "epoch": 0.6316598404643535, "grad_norm": 0.22397857904434204, "learning_rate": 3.699326509317627e-05, "loss": 0.0932235836982727, "step": 147130 }, { "epoch": 0.6317027725543735, "grad_norm": 0.002880153711885214, "learning_rate": 3.698895337305865e-05, "loss": 0.24725043773651123, "step": 147140 }, { "epoch": 0.6317457046443935, "grad_norm": 0.17092221975326538, "learning_rate": 3.698464165294103e-05, "loss": 0.09742676615715026, "step": 147150 }, { "epoch": 0.6317886367344135, "grad_norm": 2.2574303150177, "learning_rate": 3.698032993282341e-05, "loss": 0.16607791185379028, "step": 147160 }, { "epoch": 0.6318315688244335, "grad_norm": 2.8181631565093994, "learning_rate": 3.697601821270578e-05, "loss": 0.1326764464378357, "step": 147170 }, { "epoch": 0.6318745009144535, "grad_norm": 0.0026326251681894064, "learning_rate": 3.6971706492588155e-05, "loss": 0.19749863147735597, "step": 147180 }, { "epoch": 0.6319174330044736, "grad_norm": 0.0013430170947685838, "learning_rate": 3.696739477247053e-05, "loss": 0.2386791467666626, "step": 147190 }, { "epoch": 0.6319603650944935, "grad_norm": 5.6976776123046875, "learning_rate": 3.696308305235291e-05, "loss": 0.31951711177825926, "step": 147200 }, { "epoch": 0.6320032971845135, "grad_norm": 0.2425728440284729, "learning_rate": 3.695877133223528e-05, "loss": 0.4066159248352051, "step": 147210 }, { "epoch": 0.6320462292745336, "grad_norm": 0.12010367214679718, "learning_rate": 3.695445961211766e-05, "loss": 0.16877690553665162, "step": 147220 }, { "epoch": 0.6320891613645535, "grad_norm": 0.13629449903964996, "learning_rate": 3.6950147892000035e-05, "loss": 0.07434083819389344, "step": 147230 }, { "epoch": 0.6321320934545736, "grad_norm": 0.00021290559379849583, "learning_rate": 3.694583617188241e-05, "loss": 0.2000523567199707, "step": 147240 }, { "epoch": 0.6321750255445936, "grad_norm": 0.24666225910186768, "learning_rate": 3.694152445176479e-05, "loss": 0.27231531143188475, "step": 147250 }, { "epoch": 0.6322179576346135, "grad_norm": 0.004511278122663498, "learning_rate": 3.693721273164717e-05, "loss": 0.08121266365051269, "step": 147260 }, { "epoch": 0.6322608897246336, "grad_norm": 0.00025838721194304526, "learning_rate": 3.6932901011529544e-05, "loss": 0.03792424499988556, "step": 147270 }, { "epoch": 0.6323038218146536, "grad_norm": 0.02920406311750412, "learning_rate": 3.692858929141192e-05, "loss": 0.22607312202453614, "step": 147280 }, { "epoch": 0.6323467539046735, "grad_norm": 0.0067008258774876595, "learning_rate": 3.692427757129429e-05, "loss": 0.26392254829406736, "step": 147290 }, { "epoch": 0.6323896859946936, "grad_norm": 0.1206984594464302, "learning_rate": 3.691996585117667e-05, "loss": 0.17658530473709105, "step": 147300 }, { "epoch": 0.6324326180847136, "grad_norm": 0.06308251619338989, "learning_rate": 3.691565413105905e-05, "loss": 0.19717254638671874, "step": 147310 }, { "epoch": 0.6324755501747336, "grad_norm": 0.0017627764027565718, "learning_rate": 3.6911342410941424e-05, "loss": 0.024138632416725158, "step": 147320 }, { "epoch": 0.6325184822647536, "grad_norm": 1.507886528968811, "learning_rate": 3.69070306908238e-05, "loss": 0.3853263854980469, "step": 147330 }, { "epoch": 0.6325614143547736, "grad_norm": 2.78678297996521, "learning_rate": 3.690271897070617e-05, "loss": 0.16321289539337158, "step": 147340 }, { "epoch": 0.6326043464447936, "grad_norm": 1.2329705953598022, "learning_rate": 3.689840725058855e-05, "loss": 0.27058162689208987, "step": 147350 }, { "epoch": 0.6326472785348136, "grad_norm": 2.1577019691467285, "learning_rate": 3.689409553047093e-05, "loss": 0.2109225273132324, "step": 147360 }, { "epoch": 0.6326902106248337, "grad_norm": 7.57331436034292e-05, "learning_rate": 3.6889783810353304e-05, "loss": 0.2609088659286499, "step": 147370 }, { "epoch": 0.6327331427148536, "grad_norm": 0.009449384175240993, "learning_rate": 3.688547209023568e-05, "loss": 0.3353603363037109, "step": 147380 }, { "epoch": 0.6327760748048736, "grad_norm": 3.3012754917144775, "learning_rate": 3.688116037011806e-05, "loss": 0.11990993022918701, "step": 147390 }, { "epoch": 0.6328190068948937, "grad_norm": 2.1117093563079834, "learning_rate": 3.6876848650000436e-05, "loss": 0.4455077648162842, "step": 147400 }, { "epoch": 0.6328619389849137, "grad_norm": 0.0228413213044405, "learning_rate": 3.6872536929882814e-05, "loss": 0.10761359930038453, "step": 147410 }, { "epoch": 0.6329048710749337, "grad_norm": 9.0891695022583, "learning_rate": 3.6868225209765184e-05, "loss": 0.11293789148330688, "step": 147420 }, { "epoch": 0.6329478031649537, "grad_norm": 0.0013412254629656672, "learning_rate": 3.686391348964756e-05, "loss": 0.11689684391021729, "step": 147430 }, { "epoch": 0.6329907352549737, "grad_norm": 0.005989507306367159, "learning_rate": 3.685960176952994e-05, "loss": 0.17218418121337892, "step": 147440 }, { "epoch": 0.6330336673449937, "grad_norm": 0.004671361763030291, "learning_rate": 3.6855290049412316e-05, "loss": 0.03742716014385224, "step": 147450 }, { "epoch": 0.6330765994350137, "grad_norm": 0.9053412079811096, "learning_rate": 3.685097832929469e-05, "loss": 0.07469549179077148, "step": 147460 }, { "epoch": 0.6331195315250338, "grad_norm": 8.508781320415437e-05, "learning_rate": 3.6846666609177064e-05, "loss": 0.2632524728775024, "step": 147470 }, { "epoch": 0.6331624636150537, "grad_norm": 4.723887920379639, "learning_rate": 3.684235488905944e-05, "loss": 0.13231680393218995, "step": 147480 }, { "epoch": 0.6332053957050737, "grad_norm": 1.05924654006958, "learning_rate": 3.683804316894182e-05, "loss": 0.2699723720550537, "step": 147490 }, { "epoch": 0.6332483277950938, "grad_norm": 0.27306386828422546, "learning_rate": 3.6833731448824196e-05, "loss": 0.0017774634063243866, "step": 147500 }, { "epoch": 0.6332912598851137, "grad_norm": 0.0028440305031836033, "learning_rate": 3.6829419728706573e-05, "loss": 0.15099271535873413, "step": 147510 }, { "epoch": 0.6333341919751337, "grad_norm": 0.2072194367647171, "learning_rate": 3.682510800858895e-05, "loss": 0.15268101692199706, "step": 147520 }, { "epoch": 0.6333771240651538, "grad_norm": 0.0005316045135259628, "learning_rate": 3.682079628847133e-05, "loss": 0.1784825563430786, "step": 147530 }, { "epoch": 0.6334200561551737, "grad_norm": 0.08889344334602356, "learning_rate": 3.68164845683537e-05, "loss": 0.07060465812683106, "step": 147540 }, { "epoch": 0.6334629882451938, "grad_norm": 0.020317930728197098, "learning_rate": 3.6812172848236076e-05, "loss": 0.08077744245529175, "step": 147550 }, { "epoch": 0.6335059203352138, "grad_norm": 0.028461042791604996, "learning_rate": 3.680786112811845e-05, "loss": 0.375799560546875, "step": 147560 }, { "epoch": 0.6335488524252337, "grad_norm": 0.0021006674505770206, "learning_rate": 3.680354940800083e-05, "loss": 0.23112802505493163, "step": 147570 }, { "epoch": 0.6335917845152538, "grad_norm": 0.4146704375743866, "learning_rate": 3.67992376878832e-05, "loss": 0.22537550926208497, "step": 147580 }, { "epoch": 0.6336347166052738, "grad_norm": 0.0008316123858094215, "learning_rate": 3.679492596776558e-05, "loss": 0.16894692182540894, "step": 147590 }, { "epoch": 0.6336776486952937, "grad_norm": 0.26273226737976074, "learning_rate": 3.6790614247647956e-05, "loss": 0.21262357234954835, "step": 147600 }, { "epoch": 0.6337205807853138, "grad_norm": 2.1046674251556396, "learning_rate": 3.678630252753033e-05, "loss": 0.17924087047576903, "step": 147610 }, { "epoch": 0.6337635128753338, "grad_norm": 5.799474716186523, "learning_rate": 3.678199080741271e-05, "loss": 0.08266505599021912, "step": 147620 }, { "epoch": 0.6338064449653538, "grad_norm": 1.8729013204574585, "learning_rate": 3.677767908729509e-05, "loss": 0.11252769231796264, "step": 147630 }, { "epoch": 0.6338493770553738, "grad_norm": 6.594720840454102, "learning_rate": 3.6773367367177465e-05, "loss": 0.4548068046569824, "step": 147640 }, { "epoch": 0.6338923091453939, "grad_norm": 0.0037639886140823364, "learning_rate": 3.676905564705984e-05, "loss": 0.22702391147613527, "step": 147650 }, { "epoch": 0.6339352412354138, "grad_norm": 0.00032389559783041477, "learning_rate": 3.676474392694221e-05, "loss": 0.11630215644836425, "step": 147660 }, { "epoch": 0.6339781733254338, "grad_norm": 0.0001477748592151329, "learning_rate": 3.676043220682459e-05, "loss": 0.19351050853729249, "step": 147670 }, { "epoch": 0.6340211054154539, "grad_norm": 1.0882714986801147, "learning_rate": 3.675612048670697e-05, "loss": 0.2964040279388428, "step": 147680 }, { "epoch": 0.6340640375054738, "grad_norm": 0.010478765703737736, "learning_rate": 3.6751808766589345e-05, "loss": 0.1366469144821167, "step": 147690 }, { "epoch": 0.6341069695954938, "grad_norm": 5.9954352378845215, "learning_rate": 3.674749704647172e-05, "loss": 0.3098312854766846, "step": 147700 }, { "epoch": 0.6341499016855139, "grad_norm": 0.4428004026412964, "learning_rate": 3.674318532635409e-05, "loss": 0.22567715644836425, "step": 147710 }, { "epoch": 0.6341928337755338, "grad_norm": 0.012649418786168098, "learning_rate": 3.673887360623647e-05, "loss": 0.13131637573242189, "step": 147720 }, { "epoch": 0.6342357658655539, "grad_norm": 0.01736398972570896, "learning_rate": 3.673456188611885e-05, "loss": 0.05424131751060486, "step": 147730 }, { "epoch": 0.6342786979555739, "grad_norm": 4.064063549041748, "learning_rate": 3.673025016600123e-05, "loss": 0.423872184753418, "step": 147740 }, { "epoch": 0.6343216300455938, "grad_norm": 0.027415230870246887, "learning_rate": 3.67259384458836e-05, "loss": 0.313714599609375, "step": 147750 }, { "epoch": 0.6343645621356139, "grad_norm": 22.213289260864258, "learning_rate": 3.672162672576598e-05, "loss": 0.1917173147201538, "step": 147760 }, { "epoch": 0.6344074942256339, "grad_norm": 2.9939708709716797, "learning_rate": 3.671731500564836e-05, "loss": 0.3942455291748047, "step": 147770 }, { "epoch": 0.6344504263156538, "grad_norm": 2.6282453536987305, "learning_rate": 3.6713003285530735e-05, "loss": 0.31463940143585206, "step": 147780 }, { "epoch": 0.6344933584056739, "grad_norm": 0.0022995853796601295, "learning_rate": 3.6708691565413105e-05, "loss": 0.08896902799606324, "step": 147790 }, { "epoch": 0.6345362904956939, "grad_norm": 0.004803662188351154, "learning_rate": 3.670437984529548e-05, "loss": 0.24017529487609862, "step": 147800 }, { "epoch": 0.6345792225857139, "grad_norm": 2.3700520992279053, "learning_rate": 3.670006812517786e-05, "loss": 0.41024460792541506, "step": 147810 }, { "epoch": 0.6346221546757339, "grad_norm": 0.014520244672894478, "learning_rate": 3.669575640506024e-05, "loss": 0.0028250502422451974, "step": 147820 }, { "epoch": 0.634665086765754, "grad_norm": 0.009078883565962315, "learning_rate": 3.669144468494261e-05, "loss": 0.024532407522201538, "step": 147830 }, { "epoch": 0.634708018855774, "grad_norm": 0.33582961559295654, "learning_rate": 3.6687132964824985e-05, "loss": 0.14474940299987793, "step": 147840 }, { "epoch": 0.6347509509457939, "grad_norm": 6.57936954498291, "learning_rate": 3.668282124470737e-05, "loss": 0.33017749786376954, "step": 147850 }, { "epoch": 0.634793883035814, "grad_norm": 0.01086271833628416, "learning_rate": 3.6678509524589747e-05, "loss": 0.26077311038970946, "step": 147860 }, { "epoch": 0.634836815125834, "grad_norm": 1.8153256177902222, "learning_rate": 3.667419780447212e-05, "loss": 0.3809725046157837, "step": 147870 }, { "epoch": 0.6348797472158539, "grad_norm": 0.39656689763069153, "learning_rate": 3.6669886084354494e-05, "loss": 0.10590251684188842, "step": 147880 }, { "epoch": 0.634922679305874, "grad_norm": 0.20176301896572113, "learning_rate": 3.666557436423687e-05, "loss": 0.11742727756500244, "step": 147890 }, { "epoch": 0.634965611395894, "grad_norm": 0.09392766654491425, "learning_rate": 3.666126264411925e-05, "loss": 0.2184234380722046, "step": 147900 }, { "epoch": 0.635008543485914, "grad_norm": 0.08617518842220306, "learning_rate": 3.665695092400162e-05, "loss": 0.35497307777404785, "step": 147910 }, { "epoch": 0.635051475575934, "grad_norm": 0.09503033012151718, "learning_rate": 3.6652639203884e-05, "loss": 0.1937497854232788, "step": 147920 }, { "epoch": 0.635094407665954, "grad_norm": 1.621155858039856, "learning_rate": 3.6648327483766374e-05, "loss": 0.14177558422088624, "step": 147930 }, { "epoch": 0.635137339755974, "grad_norm": 1.141985297203064, "learning_rate": 3.664401576364875e-05, "loss": 0.05915210247039795, "step": 147940 }, { "epoch": 0.635180271845994, "grad_norm": 0.0722414031624794, "learning_rate": 3.663970404353112e-05, "loss": 0.25335264205932617, "step": 147950 }, { "epoch": 0.6352232039360141, "grad_norm": 6.025696277618408, "learning_rate": 3.6635392323413506e-05, "loss": 0.1266070008277893, "step": 147960 }, { "epoch": 0.635266136026034, "grad_norm": 4.48659086227417, "learning_rate": 3.6631080603295884e-05, "loss": 0.25072340965270995, "step": 147970 }, { "epoch": 0.635309068116054, "grad_norm": 0.0004896190366707742, "learning_rate": 3.662676888317826e-05, "loss": 0.21098458766937256, "step": 147980 }, { "epoch": 0.6353520002060741, "grad_norm": 2.152426242828369, "learning_rate": 3.662245716306063e-05, "loss": 0.37824833393096924, "step": 147990 }, { "epoch": 0.635394932296094, "grad_norm": 2.1820719242095947, "learning_rate": 3.661814544294301e-05, "loss": 0.43947458267211914, "step": 148000 }, { "epoch": 0.635394932296094, "eval_loss": 0.3964541256427765, "eval_runtime": 27.1059, "eval_samples_per_second": 3.689, "eval_steps_per_second": 3.689, "step": 148000 }, { "epoch": 0.635437864386114, "grad_norm": 2.4284281730651855, "learning_rate": 3.6613833722825386e-05, "loss": 0.16709811687469484, "step": 148010 }, { "epoch": 0.6354807964761341, "grad_norm": 2.571385622024536, "learning_rate": 3.6609522002707764e-05, "loss": 0.2915945053100586, "step": 148020 }, { "epoch": 0.635523728566154, "grad_norm": 0.009592088870704174, "learning_rate": 3.6605210282590134e-05, "loss": 0.06732832193374634, "step": 148030 }, { "epoch": 0.6355666606561741, "grad_norm": 0.007833711802959442, "learning_rate": 3.660089856247251e-05, "loss": 0.3823229312896729, "step": 148040 }, { "epoch": 0.6356095927461941, "grad_norm": 1.7308787107467651, "learning_rate": 3.659658684235489e-05, "loss": 0.2691195964813232, "step": 148050 }, { "epoch": 0.635652524836214, "grad_norm": 0.0012639171909540892, "learning_rate": 3.6592275122237266e-05, "loss": 0.28667852878570554, "step": 148060 }, { "epoch": 0.6356954569262341, "grad_norm": 1.2245522737503052, "learning_rate": 3.6587963402119644e-05, "loss": 0.4876396656036377, "step": 148070 }, { "epoch": 0.6357383890162541, "grad_norm": 0.0028517525643110275, "learning_rate": 3.658365168200202e-05, "loss": 0.3154790163040161, "step": 148080 }, { "epoch": 0.635781321106274, "grad_norm": 0.0017242819303646684, "learning_rate": 3.65793399618844e-05, "loss": 0.20070092678070067, "step": 148090 }, { "epoch": 0.6358242531962941, "grad_norm": 0.022681070491671562, "learning_rate": 3.6575028241766776e-05, "loss": 0.35894730091094973, "step": 148100 }, { "epoch": 0.6358671852863141, "grad_norm": 0.011042545549571514, "learning_rate": 3.657071652164915e-05, "loss": 0.3039877414703369, "step": 148110 }, { "epoch": 0.6359101173763341, "grad_norm": 3.708176374435425, "learning_rate": 3.6566404801531524e-05, "loss": 0.32838003635406493, "step": 148120 }, { "epoch": 0.6359530494663541, "grad_norm": 0.12351704388856888, "learning_rate": 3.65620930814139e-05, "loss": 0.29023327827453616, "step": 148130 }, { "epoch": 0.6359959815563742, "grad_norm": 0.004000886343419552, "learning_rate": 3.655778136129628e-05, "loss": 0.07846883535385132, "step": 148140 }, { "epoch": 0.6360389136463941, "grad_norm": 0.002440853975713253, "learning_rate": 3.6553469641178656e-05, "loss": 0.05644827485084534, "step": 148150 }, { "epoch": 0.6360818457364141, "grad_norm": 2.0655572414398193, "learning_rate": 3.6549157921061026e-05, "loss": 0.42191014289855955, "step": 148160 }, { "epoch": 0.6361247778264342, "grad_norm": 0.006922286003828049, "learning_rate": 3.6544846200943403e-05, "loss": 0.24610230922698975, "step": 148170 }, { "epoch": 0.6361677099164541, "grad_norm": 2.2612147331237793, "learning_rate": 3.654053448082578e-05, "loss": 0.31474781036376953, "step": 148180 }, { "epoch": 0.6362106420064741, "grad_norm": 0.3926016688346863, "learning_rate": 3.653622276070816e-05, "loss": 0.16994028091430663, "step": 148190 }, { "epoch": 0.6362535740964942, "grad_norm": 3.7936487197875977, "learning_rate": 3.6531911040590536e-05, "loss": 0.19815267324447633, "step": 148200 }, { "epoch": 0.6362965061865141, "grad_norm": 0.7687986493110657, "learning_rate": 3.652759932047291e-05, "loss": 0.29149169921875, "step": 148210 }, { "epoch": 0.6363394382765342, "grad_norm": 3.6010568141937256, "learning_rate": 3.652328760035529e-05, "loss": 0.24495539665222169, "step": 148220 }, { "epoch": 0.6363823703665542, "grad_norm": 0.02493220753967762, "learning_rate": 3.651897588023767e-05, "loss": 0.15526796579360963, "step": 148230 }, { "epoch": 0.6364253024565741, "grad_norm": 0.38506048917770386, "learning_rate": 3.651466416012004e-05, "loss": 0.07274820804595947, "step": 148240 }, { "epoch": 0.6364682345465942, "grad_norm": 1.6484607458114624, "learning_rate": 3.6510352440002415e-05, "loss": 0.18348608016967774, "step": 148250 }, { "epoch": 0.6365111666366142, "grad_norm": 2.5601675510406494, "learning_rate": 3.650604071988479e-05, "loss": 0.4256871223449707, "step": 148260 }, { "epoch": 0.6365540987266343, "grad_norm": 0.3567001521587372, "learning_rate": 3.650172899976717e-05, "loss": 0.14035037755966187, "step": 148270 }, { "epoch": 0.6365970308166542, "grad_norm": 0.020381588488817215, "learning_rate": 3.649741727964954e-05, "loss": 0.2797467947006226, "step": 148280 }, { "epoch": 0.6366399629066742, "grad_norm": 7.5753045082092285, "learning_rate": 3.649310555953192e-05, "loss": 0.4102283477783203, "step": 148290 }, { "epoch": 0.6366828949966943, "grad_norm": 1.01334547996521, "learning_rate": 3.6488793839414295e-05, "loss": 0.2608525037765503, "step": 148300 }, { "epoch": 0.6367258270867142, "grad_norm": 0.0031121079809963703, "learning_rate": 3.648448211929667e-05, "loss": 0.271061635017395, "step": 148310 }, { "epoch": 0.6367687591767343, "grad_norm": 0.05686857923865318, "learning_rate": 3.648017039917905e-05, "loss": 0.255864691734314, "step": 148320 }, { "epoch": 0.6368116912667543, "grad_norm": 0.0934736356139183, "learning_rate": 3.647585867906143e-05, "loss": 0.07495766282081603, "step": 148330 }, { "epoch": 0.6368546233567742, "grad_norm": 1.5425732135772705, "learning_rate": 3.6471546958943805e-05, "loss": 0.34494857788085936, "step": 148340 }, { "epoch": 0.6368975554467943, "grad_norm": 0.10615680366754532, "learning_rate": 3.646723523882618e-05, "loss": 0.15554267168045044, "step": 148350 }, { "epoch": 0.6369404875368143, "grad_norm": 0.8117842078208923, "learning_rate": 3.646292351870855e-05, "loss": 0.12490513324737548, "step": 148360 }, { "epoch": 0.6369834196268342, "grad_norm": 1.5500012636184692, "learning_rate": 3.645861179859093e-05, "loss": 0.12137371301651001, "step": 148370 }, { "epoch": 0.6370263517168543, "grad_norm": 2.9333624839782715, "learning_rate": 3.645430007847331e-05, "loss": 0.2662505149841309, "step": 148380 }, { "epoch": 0.6370692838068743, "grad_norm": 0.11318331956863403, "learning_rate": 3.6449988358355685e-05, "loss": 0.23979718685150148, "step": 148390 }, { "epoch": 0.6371122158968943, "grad_norm": 1.10670804977417, "learning_rate": 3.6445676638238055e-05, "loss": 0.2728361845016479, "step": 148400 }, { "epoch": 0.6371551479869143, "grad_norm": 4.697409152984619, "learning_rate": 3.644136491812043e-05, "loss": 0.39655683040618894, "step": 148410 }, { "epoch": 0.6371980800769343, "grad_norm": 0.01723591797053814, "learning_rate": 3.643705319800281e-05, "loss": 0.2219693899154663, "step": 148420 }, { "epoch": 0.6372410121669543, "grad_norm": 1.3442760705947876, "learning_rate": 3.643274147788519e-05, "loss": 0.3207077503204346, "step": 148430 }, { "epoch": 0.6372839442569743, "grad_norm": 0.0006233075400814414, "learning_rate": 3.642842975776757e-05, "loss": 0.16337018013000487, "step": 148440 }, { "epoch": 0.6373268763469944, "grad_norm": 0.010497170500457287, "learning_rate": 3.642411803764994e-05, "loss": 0.2993746757507324, "step": 148450 }, { "epoch": 0.6373698084370143, "grad_norm": 0.06728941202163696, "learning_rate": 3.641980631753232e-05, "loss": 0.32750234603881834, "step": 148460 }, { "epoch": 0.6374127405270343, "grad_norm": 15.103754043579102, "learning_rate": 3.64154945974147e-05, "loss": 0.4762892723083496, "step": 148470 }, { "epoch": 0.6374556726170544, "grad_norm": 2.0977084636688232, "learning_rate": 3.6411182877297074e-05, "loss": 0.39258265495300293, "step": 148480 }, { "epoch": 0.6374986047070743, "grad_norm": 0.9504750370979309, "learning_rate": 3.6406871157179445e-05, "loss": 0.24261243343353273, "step": 148490 }, { "epoch": 0.6375415367970944, "grad_norm": 0.007759752683341503, "learning_rate": 3.640255943706182e-05, "loss": 0.12828243970870973, "step": 148500 }, { "epoch": 0.6375844688871144, "grad_norm": 0.061698079109191895, "learning_rate": 3.63982477169442e-05, "loss": 0.30746870040893554, "step": 148510 }, { "epoch": 0.6376274009771343, "grad_norm": 0.06339918076992035, "learning_rate": 3.6393935996826577e-05, "loss": 0.1409119725227356, "step": 148520 }, { "epoch": 0.6376703330671544, "grad_norm": 0.015551037155091763, "learning_rate": 3.638962427670895e-05, "loss": 0.14194364547729493, "step": 148530 }, { "epoch": 0.6377132651571744, "grad_norm": 0.5721806287765503, "learning_rate": 3.6385312556591325e-05, "loss": 0.2990145444869995, "step": 148540 }, { "epoch": 0.6377561972471943, "grad_norm": 0.38188424706459045, "learning_rate": 3.638100083647371e-05, "loss": 0.18333234786987304, "step": 148550 }, { "epoch": 0.6377991293372144, "grad_norm": 0.04208914935588837, "learning_rate": 3.6376689116356086e-05, "loss": 0.10026679039001465, "step": 148560 }, { "epoch": 0.6378420614272344, "grad_norm": 0.04540753737092018, "learning_rate": 3.6372377396238457e-05, "loss": 0.0547387421131134, "step": 148570 }, { "epoch": 0.6378849935172544, "grad_norm": 0.03880661725997925, "learning_rate": 3.6368065676120834e-05, "loss": 0.24963154792785644, "step": 148580 }, { "epoch": 0.6379279256072744, "grad_norm": 0.003545165527611971, "learning_rate": 3.636375395600321e-05, "loss": 0.11689521074295044, "step": 148590 }, { "epoch": 0.6379708576972944, "grad_norm": 0.830731987953186, "learning_rate": 3.635944223588559e-05, "loss": 0.30333313941955564, "step": 148600 }, { "epoch": 0.6380137897873144, "grad_norm": 0.031620461493730545, "learning_rate": 3.635513051576796e-05, "loss": 0.23221213817596437, "step": 148610 }, { "epoch": 0.6380567218773344, "grad_norm": 0.02347370609641075, "learning_rate": 3.6350818795650336e-05, "loss": 0.4794201374053955, "step": 148620 }, { "epoch": 0.6380996539673545, "grad_norm": 7.367602348327637, "learning_rate": 3.6346507075532714e-05, "loss": 0.2784811735153198, "step": 148630 }, { "epoch": 0.6381425860573744, "grad_norm": 0.06946471333503723, "learning_rate": 3.634219535541509e-05, "loss": 0.36514983177185056, "step": 148640 }, { "epoch": 0.6381855181473944, "grad_norm": 1.8712137937545776, "learning_rate": 3.633788363529746e-05, "loss": 0.1741746187210083, "step": 148650 }, { "epoch": 0.6382284502374145, "grad_norm": 0.005989521741867065, "learning_rate": 3.6333571915179846e-05, "loss": 0.2726861000061035, "step": 148660 }, { "epoch": 0.6382713823274344, "grad_norm": 0.04366813972592354, "learning_rate": 3.632926019506222e-05, "loss": 0.10254474878311157, "step": 148670 }, { "epoch": 0.6383143144174545, "grad_norm": 0.22113563120365143, "learning_rate": 3.63249484749446e-05, "loss": 0.05884222984313965, "step": 148680 }, { "epoch": 0.6383572465074745, "grad_norm": 0.21171332895755768, "learning_rate": 3.632063675482697e-05, "loss": 0.024245685338973998, "step": 148690 }, { "epoch": 0.6384001785974945, "grad_norm": 9.799121856689453, "learning_rate": 3.631632503470935e-05, "loss": 0.2564573049545288, "step": 148700 }, { "epoch": 0.6384431106875145, "grad_norm": 0.48802077770233154, "learning_rate": 3.6312013314591726e-05, "loss": 0.1415635108947754, "step": 148710 }, { "epoch": 0.6384860427775345, "grad_norm": 0.14215391874313354, "learning_rate": 3.63077015944741e-05, "loss": 0.1812630772590637, "step": 148720 }, { "epoch": 0.6385289748675546, "grad_norm": 1.549190878868103, "learning_rate": 3.6303389874356474e-05, "loss": 0.3076681137084961, "step": 148730 }, { "epoch": 0.6385719069575745, "grad_norm": 0.18916155397891998, "learning_rate": 3.629907815423885e-05, "loss": 0.014893820881843567, "step": 148740 }, { "epoch": 0.6386148390475945, "grad_norm": 0.02583307959139347, "learning_rate": 3.629476643412123e-05, "loss": 0.20866930484771729, "step": 148750 }, { "epoch": 0.6386577711376146, "grad_norm": 0.04389571025967598, "learning_rate": 3.6290454714003606e-05, "loss": 0.3857304096221924, "step": 148760 }, { "epoch": 0.6387007032276345, "grad_norm": 0.02454189956188202, "learning_rate": 3.628614299388598e-05, "loss": 0.09770624041557312, "step": 148770 }, { "epoch": 0.6387436353176545, "grad_norm": 4.198458671569824, "learning_rate": 3.628183127376836e-05, "loss": 0.1682787537574768, "step": 148780 }, { "epoch": 0.6387865674076746, "grad_norm": 0.01082449872046709, "learning_rate": 3.627751955365074e-05, "loss": 0.10119086503982544, "step": 148790 }, { "epoch": 0.6388294994976945, "grad_norm": 2.1636593341827393, "learning_rate": 3.6273207833533115e-05, "loss": 0.10471152067184449, "step": 148800 }, { "epoch": 0.6388724315877146, "grad_norm": 0.005232629831880331, "learning_rate": 3.626889611341549e-05, "loss": 0.12240467071533204, "step": 148810 }, { "epoch": 0.6389153636777346, "grad_norm": 0.06836547702550888, "learning_rate": 3.626458439329786e-05, "loss": 0.25388565063476565, "step": 148820 }, { "epoch": 0.6389582957677545, "grad_norm": 0.004365882370620966, "learning_rate": 3.626027267318024e-05, "loss": 0.3014135599136353, "step": 148830 }, { "epoch": 0.6390012278577746, "grad_norm": 0.007590130437165499, "learning_rate": 3.625596095306262e-05, "loss": 0.04911408424377441, "step": 148840 }, { "epoch": 0.6390441599477946, "grad_norm": 0.01083632092922926, "learning_rate": 3.6251649232944995e-05, "loss": 0.10735723972320557, "step": 148850 }, { "epoch": 0.6390870920378146, "grad_norm": 0.0025522809009999037, "learning_rate": 3.6247337512827366e-05, "loss": 0.09026329517364502, "step": 148860 }, { "epoch": 0.6391300241278346, "grad_norm": 0.021843230351805687, "learning_rate": 3.624302579270974e-05, "loss": 0.11751435995101929, "step": 148870 }, { "epoch": 0.6391729562178546, "grad_norm": 1.4416433572769165, "learning_rate": 3.623871407259212e-05, "loss": 0.1340113401412964, "step": 148880 }, { "epoch": 0.6392158883078746, "grad_norm": 0.9927673935890198, "learning_rate": 3.62344023524745e-05, "loss": 0.22968411445617676, "step": 148890 }, { "epoch": 0.6392588203978946, "grad_norm": 0.056031033396720886, "learning_rate": 3.6230090632356875e-05, "loss": 0.21517865657806395, "step": 148900 }, { "epoch": 0.6393017524879147, "grad_norm": 0.17687752842903137, "learning_rate": 3.622577891223925e-05, "loss": 0.1644793391227722, "step": 148910 }, { "epoch": 0.6393446845779346, "grad_norm": 0.0010743543971329927, "learning_rate": 3.622146719212163e-05, "loss": 0.13042839765548705, "step": 148920 }, { "epoch": 0.6393876166679546, "grad_norm": 0.0013966663973405957, "learning_rate": 3.621715547200401e-05, "loss": 0.2743825912475586, "step": 148930 }, { "epoch": 0.6394305487579747, "grad_norm": 3.341494083404541, "learning_rate": 3.621284375188638e-05, "loss": 0.34465975761413575, "step": 148940 }, { "epoch": 0.6394734808479946, "grad_norm": 0.004896739963442087, "learning_rate": 3.6208532031768755e-05, "loss": 0.11658406257629395, "step": 148950 }, { "epoch": 0.6395164129380146, "grad_norm": 6.2574992179870605, "learning_rate": 3.620422031165113e-05, "loss": 0.22075233459472657, "step": 148960 }, { "epoch": 0.6395593450280347, "grad_norm": 3.035881519317627, "learning_rate": 3.619990859153351e-05, "loss": 0.28051533699035647, "step": 148970 }, { "epoch": 0.6396022771180546, "grad_norm": 0.0007945537799969316, "learning_rate": 3.619559687141588e-05, "loss": 0.14235063791275024, "step": 148980 }, { "epoch": 0.6396452092080747, "grad_norm": 0.0015852749347686768, "learning_rate": 3.619128515129826e-05, "loss": 0.19316439628601073, "step": 148990 }, { "epoch": 0.6396881412980947, "grad_norm": 0.0012423275038599968, "learning_rate": 3.6186973431180635e-05, "loss": 0.5619192600250245, "step": 149000 }, { "epoch": 0.6396881412980947, "eval_loss": 0.3982442021369934, "eval_runtime": 27.134, "eval_samples_per_second": 3.685, "eval_steps_per_second": 3.685, "step": 149000 }, { "epoch": 0.6397310733881146, "grad_norm": 0.22282634675502777, "learning_rate": 3.618266171106301e-05, "loss": 0.17875736951828003, "step": 149010 }, { "epoch": 0.6397740054781347, "grad_norm": 0.35734257102012634, "learning_rate": 3.617834999094539e-05, "loss": 0.24705617427825927, "step": 149020 }, { "epoch": 0.6398169375681547, "grad_norm": 7.129267692565918, "learning_rate": 3.617403827082777e-05, "loss": 0.394257926940918, "step": 149030 }, { "epoch": 0.6398598696581747, "grad_norm": 0.5210902690887451, "learning_rate": 3.6169726550710144e-05, "loss": 0.0944884955883026, "step": 149040 }, { "epoch": 0.6399028017481947, "grad_norm": 0.014846066944301128, "learning_rate": 3.616541483059252e-05, "loss": 0.2939175605773926, "step": 149050 }, { "epoch": 0.6399457338382147, "grad_norm": 0.014867091551423073, "learning_rate": 3.616110311047489e-05, "loss": 0.06061587929725647, "step": 149060 }, { "epoch": 0.6399886659282347, "grad_norm": 0.6271289587020874, "learning_rate": 3.615679139035727e-05, "loss": 0.25383543968200684, "step": 149070 }, { "epoch": 0.6400315980182547, "grad_norm": 0.0007486839895136654, "learning_rate": 3.615247967023965e-05, "loss": 0.002995048649609089, "step": 149080 }, { "epoch": 0.6400745301082748, "grad_norm": 0.9311636090278625, "learning_rate": 3.6148167950122024e-05, "loss": 0.3089101552963257, "step": 149090 }, { "epoch": 0.6401174621982947, "grad_norm": 0.06805947422981262, "learning_rate": 3.6143856230004395e-05, "loss": 0.152249014377594, "step": 149100 }, { "epoch": 0.6401603942883147, "grad_norm": 6.076711654663086, "learning_rate": 3.613954450988677e-05, "loss": 0.4289687156677246, "step": 149110 }, { "epoch": 0.6402033263783348, "grad_norm": 0.010889571160078049, "learning_rate": 3.613523278976915e-05, "loss": 0.14229416847229004, "step": 149120 }, { "epoch": 0.6402462584683548, "grad_norm": 0.024119842797517776, "learning_rate": 3.613092106965153e-05, "loss": 0.18358902931213378, "step": 149130 }, { "epoch": 0.6402891905583747, "grad_norm": 2.6810688972473145, "learning_rate": 3.612660934953391e-05, "loss": 0.23319919109344484, "step": 149140 }, { "epoch": 0.6403321226483948, "grad_norm": 0.3877185583114624, "learning_rate": 3.612229762941628e-05, "loss": 0.05466576218605042, "step": 149150 }, { "epoch": 0.6403750547384148, "grad_norm": 3.808795928955078, "learning_rate": 3.611798590929866e-05, "loss": 0.2208317995071411, "step": 149160 }, { "epoch": 0.6404179868284348, "grad_norm": 0.43981850147247314, "learning_rate": 3.6113674189181036e-05, "loss": 0.04135819673538208, "step": 149170 }, { "epoch": 0.6404609189184548, "grad_norm": 0.23187753558158875, "learning_rate": 3.6109362469063413e-05, "loss": 0.21757895946502687, "step": 149180 }, { "epoch": 0.6405038510084748, "grad_norm": 0.01140986941754818, "learning_rate": 3.6105050748945784e-05, "loss": 0.1515246272087097, "step": 149190 }, { "epoch": 0.6405467830984948, "grad_norm": 0.01072423905134201, "learning_rate": 3.610073902882816e-05, "loss": 0.22516722679138185, "step": 149200 }, { "epoch": 0.6405897151885148, "grad_norm": 0.002811270533129573, "learning_rate": 3.609642730871054e-05, "loss": 0.09642552137374878, "step": 149210 }, { "epoch": 0.6406326472785349, "grad_norm": 0.03889846429228783, "learning_rate": 3.6092115588592916e-05, "loss": 0.061399024724960324, "step": 149220 }, { "epoch": 0.6406755793685548, "grad_norm": 0.0017019611550495028, "learning_rate": 3.608780386847529e-05, "loss": 0.258405876159668, "step": 149230 }, { "epoch": 0.6407185114585748, "grad_norm": 0.8725544810295105, "learning_rate": 3.6083492148357664e-05, "loss": 0.1622206449508667, "step": 149240 }, { "epoch": 0.6407614435485949, "grad_norm": 1.6804289817810059, "learning_rate": 3.607918042824005e-05, "loss": 0.42448744773864744, "step": 149250 }, { "epoch": 0.6408043756386148, "grad_norm": 5.870574474334717, "learning_rate": 3.6074868708122425e-05, "loss": 0.2516177654266357, "step": 149260 }, { "epoch": 0.6408473077286349, "grad_norm": 2.136117696762085, "learning_rate": 3.6070556988004796e-05, "loss": 0.2335583209991455, "step": 149270 }, { "epoch": 0.6408902398186549, "grad_norm": 0.018576808273792267, "learning_rate": 3.606624526788717e-05, "loss": 0.06257337927818299, "step": 149280 }, { "epoch": 0.6409331719086748, "grad_norm": 0.4843984544277191, "learning_rate": 3.606193354776955e-05, "loss": 0.3152668237686157, "step": 149290 }, { "epoch": 0.6409761039986949, "grad_norm": 0.15813709795475006, "learning_rate": 3.605762182765193e-05, "loss": 0.1964523434638977, "step": 149300 }, { "epoch": 0.6410190360887149, "grad_norm": 1.1011407375335693, "learning_rate": 3.60533101075343e-05, "loss": 0.17502723932266234, "step": 149310 }, { "epoch": 0.6410619681787348, "grad_norm": 6.5771684646606445, "learning_rate": 3.6048998387416676e-05, "loss": 0.2309612512588501, "step": 149320 }, { "epoch": 0.6411049002687549, "grad_norm": 0.05764686316251755, "learning_rate": 3.604468666729905e-05, "loss": 0.23601062297821046, "step": 149330 }, { "epoch": 0.6411478323587749, "grad_norm": 2.103090286254883, "learning_rate": 3.604037494718143e-05, "loss": 0.3898202657699585, "step": 149340 }, { "epoch": 0.6411907644487949, "grad_norm": 0.07793723046779633, "learning_rate": 3.60360632270638e-05, "loss": 0.09205517172813416, "step": 149350 }, { "epoch": 0.6412336965388149, "grad_norm": 1.2947548627853394, "learning_rate": 3.6031751506946185e-05, "loss": 0.1622360348701477, "step": 149360 }, { "epoch": 0.6412766286288349, "grad_norm": 1.644738793373108, "learning_rate": 3.602743978682856e-05, "loss": 0.3392355918884277, "step": 149370 }, { "epoch": 0.6413195607188549, "grad_norm": 0.00325818732380867, "learning_rate": 3.602312806671094e-05, "loss": 0.11670932769775391, "step": 149380 }, { "epoch": 0.6413624928088749, "grad_norm": 0.013296050950884819, "learning_rate": 3.601881634659331e-05, "loss": 0.22430813312530518, "step": 149390 }, { "epoch": 0.641405424898895, "grad_norm": 0.0017144365701824427, "learning_rate": 3.601450462647569e-05, "loss": 0.0805868923664093, "step": 149400 }, { "epoch": 0.6414483569889149, "grad_norm": 0.010498907417058945, "learning_rate": 3.6010192906358065e-05, "loss": 0.1966702699661255, "step": 149410 }, { "epoch": 0.6414912890789349, "grad_norm": 0.0028181958477944136, "learning_rate": 3.600588118624044e-05, "loss": 0.07636668682098388, "step": 149420 }, { "epoch": 0.641534221168955, "grad_norm": 0.004065072163939476, "learning_rate": 3.600156946612281e-05, "loss": 0.2734179735183716, "step": 149430 }, { "epoch": 0.6415771532589749, "grad_norm": 0.007763869594782591, "learning_rate": 3.599725774600519e-05, "loss": 0.23989946842193605, "step": 149440 }, { "epoch": 0.641620085348995, "grad_norm": 1.1928882598876953, "learning_rate": 3.599294602588757e-05, "loss": 0.2867441177368164, "step": 149450 }, { "epoch": 0.641663017439015, "grad_norm": 0.0222454946488142, "learning_rate": 3.5988634305769945e-05, "loss": 0.07181630730628967, "step": 149460 }, { "epoch": 0.6417059495290349, "grad_norm": 1.8484033346176147, "learning_rate": 3.598432258565232e-05, "loss": 0.32464113235473635, "step": 149470 }, { "epoch": 0.641748881619055, "grad_norm": 0.012596558779478073, "learning_rate": 3.59800108655347e-05, "loss": 0.0818311870098114, "step": 149480 }, { "epoch": 0.641791813709075, "grad_norm": 0.024298356845974922, "learning_rate": 3.597569914541708e-05, "loss": 0.20316953659057618, "step": 149490 }, { "epoch": 0.6418347457990949, "grad_norm": 0.16192997992038727, "learning_rate": 3.5971387425299455e-05, "loss": 0.008682883530855178, "step": 149500 }, { "epoch": 0.641877677889115, "grad_norm": 0.2028181105852127, "learning_rate": 3.596707570518183e-05, "loss": 0.15205684900283814, "step": 149510 }, { "epoch": 0.641920609979135, "grad_norm": 1.6921296119689941, "learning_rate": 3.59627639850642e-05, "loss": 0.2163633108139038, "step": 149520 }, { "epoch": 0.641963542069155, "grad_norm": 1.2315031290054321, "learning_rate": 3.595845226494658e-05, "loss": 0.3820303678512573, "step": 149530 }, { "epoch": 0.642006474159175, "grad_norm": 0.001260005752556026, "learning_rate": 3.595414054482896e-05, "loss": 0.252690863609314, "step": 149540 }, { "epoch": 0.642049406249195, "grad_norm": 0.0427839457988739, "learning_rate": 3.5949828824711335e-05, "loss": 0.1781385898590088, "step": 149550 }, { "epoch": 0.6420923383392151, "grad_norm": 1.7424696683883667, "learning_rate": 3.5945517104593705e-05, "loss": 0.091168612241745, "step": 149560 }, { "epoch": 0.642135270429235, "grad_norm": 0.024264035746455193, "learning_rate": 3.594120538447608e-05, "loss": 0.1589852452278137, "step": 149570 }, { "epoch": 0.6421782025192551, "grad_norm": 2.4907279014587402, "learning_rate": 3.593689366435846e-05, "loss": 0.24225354194641113, "step": 149580 }, { "epoch": 0.6422211346092751, "grad_norm": 0.03332900255918503, "learning_rate": 3.593258194424084e-05, "loss": 0.3125003814697266, "step": 149590 }, { "epoch": 0.642264066699295, "grad_norm": 1.9512094259262085, "learning_rate": 3.5928270224123214e-05, "loss": 0.08376158475875854, "step": 149600 }, { "epoch": 0.6423069987893151, "grad_norm": 0.01022291649132967, "learning_rate": 3.592395850400559e-05, "loss": 0.24850192070007324, "step": 149610 }, { "epoch": 0.6423499308793351, "grad_norm": 0.0009068639483302832, "learning_rate": 3.591964678388797e-05, "loss": 0.24298958778381347, "step": 149620 }, { "epoch": 0.642392862969355, "grad_norm": 0.8004212975502014, "learning_rate": 3.5915335063770346e-05, "loss": 0.3129880428314209, "step": 149630 }, { "epoch": 0.6424357950593751, "grad_norm": 7.949131011962891, "learning_rate": 3.591102334365272e-05, "loss": 0.28877594470977785, "step": 149640 }, { "epoch": 0.6424787271493951, "grad_norm": 1.1796244382858276, "learning_rate": 3.5906711623535094e-05, "loss": 0.22746336460113525, "step": 149650 }, { "epoch": 0.6425216592394151, "grad_norm": 3.379171848297119, "learning_rate": 3.590239990341747e-05, "loss": 0.12769432067871095, "step": 149660 }, { "epoch": 0.6425645913294351, "grad_norm": 2.4193947315216064, "learning_rate": 3.589808818329985e-05, "loss": 0.19697859287261962, "step": 149670 }, { "epoch": 0.6426075234194552, "grad_norm": 0.005682434421032667, "learning_rate": 3.589377646318222e-05, "loss": 0.0763977587223053, "step": 149680 }, { "epoch": 0.6426504555094751, "grad_norm": 0.0972975641489029, "learning_rate": 3.58894647430646e-05, "loss": 0.009377355128526688, "step": 149690 }, { "epoch": 0.6426933875994951, "grad_norm": 0.036606717854738235, "learning_rate": 3.5885153022946974e-05, "loss": 0.13104115724563598, "step": 149700 }, { "epoch": 0.6427363196895152, "grad_norm": 0.002917773788794875, "learning_rate": 3.588084130282935e-05, "loss": 0.1379793882369995, "step": 149710 }, { "epoch": 0.6427792517795351, "grad_norm": 2.2030205726623535, "learning_rate": 3.587652958271173e-05, "loss": 0.24357876777648926, "step": 149720 }, { "epoch": 0.6428221838695551, "grad_norm": 3.2720603942871094, "learning_rate": 3.5872217862594106e-05, "loss": 0.2897838592529297, "step": 149730 }, { "epoch": 0.6428651159595752, "grad_norm": 0.0052750613540410995, "learning_rate": 3.5867906142476484e-05, "loss": 0.09423772692680359, "step": 149740 }, { "epoch": 0.6429080480495951, "grad_norm": 0.8562742471694946, "learning_rate": 3.586359442235886e-05, "loss": 0.3347553968429565, "step": 149750 }, { "epoch": 0.6429509801396152, "grad_norm": 1.6895736455917358, "learning_rate": 3.585928270224123e-05, "loss": 0.35067009925842285, "step": 149760 }, { "epoch": 0.6429939122296352, "grad_norm": 0.02921820618212223, "learning_rate": 3.585497098212361e-05, "loss": 0.061477482318878174, "step": 149770 }, { "epoch": 0.6430368443196551, "grad_norm": 1.9546409845352173, "learning_rate": 3.5850659262005986e-05, "loss": 0.23800258636474608, "step": 149780 }, { "epoch": 0.6430797764096752, "grad_norm": 0.0008198361028917134, "learning_rate": 3.5846347541888364e-05, "loss": 0.2178973913192749, "step": 149790 }, { "epoch": 0.6431227084996952, "grad_norm": 0.0036720235366374254, "learning_rate": 3.5842035821770734e-05, "loss": 0.17330249547958373, "step": 149800 }, { "epoch": 0.6431656405897151, "grad_norm": 6.70130729675293, "learning_rate": 3.583772410165311e-05, "loss": 0.3165444374084473, "step": 149810 }, { "epoch": 0.6432085726797352, "grad_norm": 2.2758169174194336, "learning_rate": 3.583341238153549e-05, "loss": 0.23460037708282472, "step": 149820 }, { "epoch": 0.6432515047697552, "grad_norm": 0.012248000130057335, "learning_rate": 3.5829100661417866e-05, "loss": 0.15115057229995726, "step": 149830 }, { "epoch": 0.6432944368597752, "grad_norm": 0.01865295320749283, "learning_rate": 3.5824788941300244e-05, "loss": 0.17717584371566772, "step": 149840 }, { "epoch": 0.6433373689497952, "grad_norm": 0.1970166563987732, "learning_rate": 3.582047722118262e-05, "loss": 0.1073176383972168, "step": 149850 }, { "epoch": 0.6433803010398152, "grad_norm": 0.01511879451572895, "learning_rate": 3.5816165501065e-05, "loss": 0.20396804809570312, "step": 149860 }, { "epoch": 0.6434232331298352, "grad_norm": 2.3057587146759033, "learning_rate": 3.5811853780947376e-05, "loss": 0.44550137519836425, "step": 149870 }, { "epoch": 0.6434661652198552, "grad_norm": 0.01049025822430849, "learning_rate": 3.580754206082975e-05, "loss": 0.06601186990737914, "step": 149880 }, { "epoch": 0.6435090973098753, "grad_norm": 0.004382689017802477, "learning_rate": 3.5803230340712124e-05, "loss": 0.08267223834991455, "step": 149890 }, { "epoch": 0.6435520293998952, "grad_norm": 1.260653018951416, "learning_rate": 3.57989186205945e-05, "loss": 0.18585045337677003, "step": 149900 }, { "epoch": 0.6435949614899152, "grad_norm": 0.01793503761291504, "learning_rate": 3.579460690047688e-05, "loss": 0.27620763778686525, "step": 149910 }, { "epoch": 0.6436378935799353, "grad_norm": 0.009449219331145287, "learning_rate": 3.5790295180359256e-05, "loss": 0.17628442049026488, "step": 149920 }, { "epoch": 0.6436808256699552, "grad_norm": 0.006964544299989939, "learning_rate": 3.5785983460241626e-05, "loss": 0.06601274013519287, "step": 149930 }, { "epoch": 0.6437237577599753, "grad_norm": 0.03464784100651741, "learning_rate": 3.5781671740124003e-05, "loss": 0.1950068950653076, "step": 149940 }, { "epoch": 0.6437666898499953, "grad_norm": 0.5150058269500732, "learning_rate": 3.577736002000638e-05, "loss": 0.07730207443237305, "step": 149950 }, { "epoch": 0.6438096219400152, "grad_norm": 0.0041276682168245316, "learning_rate": 3.5773048299888765e-05, "loss": 0.15968955755233766, "step": 149960 }, { "epoch": 0.6438525540300353, "grad_norm": 0.006611963734030724, "learning_rate": 3.5768736579771135e-05, "loss": 0.2820572853088379, "step": 149970 }, { "epoch": 0.6438954861200553, "grad_norm": 2.0153894424438477, "learning_rate": 3.576442485965351e-05, "loss": 0.19689462184906006, "step": 149980 }, { "epoch": 0.6439384182100752, "grad_norm": 1.3693201541900635, "learning_rate": 3.576011313953589e-05, "loss": 0.2069246530532837, "step": 149990 }, { "epoch": 0.6439813503000953, "grad_norm": 0.00849225465208292, "learning_rate": 3.575580141941827e-05, "loss": 0.2323996067047119, "step": 150000 }, { "epoch": 0.6439813503000953, "eval_loss": 0.3989526629447937, "eval_runtime": 27.1345, "eval_samples_per_second": 3.685, "eval_steps_per_second": 3.685, "step": 150000 }, { "epoch": 0.6440242823901153, "grad_norm": 0.24946488440036774, "learning_rate": 3.575148969930064e-05, "loss": 0.05341393947601318, "step": 150010 }, { "epoch": 0.6440672144801354, "grad_norm": 0.16969804465770721, "learning_rate": 3.5747177979183015e-05, "loss": 0.2269148349761963, "step": 150020 }, { "epoch": 0.6441101465701553, "grad_norm": 0.014137118123471737, "learning_rate": 3.574286625906539e-05, "loss": 0.10535632371902466, "step": 150030 }, { "epoch": 0.6441530786601753, "grad_norm": 4.645323276519775, "learning_rate": 3.573855453894777e-05, "loss": 0.23873796463012695, "step": 150040 }, { "epoch": 0.6441960107501954, "grad_norm": 0.22043707966804504, "learning_rate": 3.573424281883014e-05, "loss": 0.10489349365234375, "step": 150050 }, { "epoch": 0.6442389428402153, "grad_norm": 0.005896933376789093, "learning_rate": 3.572993109871252e-05, "loss": 0.3532412052154541, "step": 150060 }, { "epoch": 0.6442818749302354, "grad_norm": 0.05356534942984581, "learning_rate": 3.57256193785949e-05, "loss": 0.1797205090522766, "step": 150070 }, { "epoch": 0.6443248070202554, "grad_norm": 8.781749725341797, "learning_rate": 3.572130765847728e-05, "loss": 0.06868206858634948, "step": 150080 }, { "epoch": 0.6443677391102753, "grad_norm": 2.9030253887176514, "learning_rate": 3.571699593835965e-05, "loss": 0.2557013511657715, "step": 150090 }, { "epoch": 0.6444106712002954, "grad_norm": 0.012084102258086205, "learning_rate": 3.571268421824203e-05, "loss": 0.21030721664428711, "step": 150100 }, { "epoch": 0.6444536032903154, "grad_norm": 0.0009573132847435772, "learning_rate": 3.5708372498124405e-05, "loss": 0.20109183788299562, "step": 150110 }, { "epoch": 0.6444965353803354, "grad_norm": 0.0013460484333336353, "learning_rate": 3.570406077800678e-05, "loss": 0.20196888446807862, "step": 150120 }, { "epoch": 0.6445394674703554, "grad_norm": 2.3234500885009766, "learning_rate": 3.569974905788915e-05, "loss": 0.3194742202758789, "step": 150130 }, { "epoch": 0.6445823995603754, "grad_norm": 0.09026148170232773, "learning_rate": 3.569543733777153e-05, "loss": 0.1295180320739746, "step": 150140 }, { "epoch": 0.6446253316503954, "grad_norm": 0.31340131163597107, "learning_rate": 3.569112561765391e-05, "loss": 0.25863502025604246, "step": 150150 }, { "epoch": 0.6446682637404154, "grad_norm": 0.5061963796615601, "learning_rate": 3.5686813897536285e-05, "loss": 0.22574491500854493, "step": 150160 }, { "epoch": 0.6447111958304355, "grad_norm": 0.5957401394844055, "learning_rate": 3.5682502177418655e-05, "loss": 0.28134448528289796, "step": 150170 }, { "epoch": 0.6447541279204554, "grad_norm": 0.8046202659606934, "learning_rate": 3.567819045730104e-05, "loss": 0.2787386655807495, "step": 150180 }, { "epoch": 0.6447970600104754, "grad_norm": 7.400388240814209, "learning_rate": 3.567387873718342e-05, "loss": 0.4055464744567871, "step": 150190 }, { "epoch": 0.6448399921004955, "grad_norm": 2.637162208557129, "learning_rate": 3.5669567017065794e-05, "loss": 0.24768633842468263, "step": 150200 }, { "epoch": 0.6448829241905154, "grad_norm": 0.06767522543668747, "learning_rate": 3.5665255296948165e-05, "loss": 0.13162182569503783, "step": 150210 }, { "epoch": 0.6449258562805354, "grad_norm": 1.7277255058288574, "learning_rate": 3.566094357683054e-05, "loss": 0.27955288887023927, "step": 150220 }, { "epoch": 0.6449687883705555, "grad_norm": 0.020901095122098923, "learning_rate": 3.565663185671292e-05, "loss": 0.3833391427993774, "step": 150230 }, { "epoch": 0.6450117204605754, "grad_norm": 0.04695943742990494, "learning_rate": 3.56523201365953e-05, "loss": 0.08938364982604981, "step": 150240 }, { "epoch": 0.6450546525505955, "grad_norm": 4.172770977020264, "learning_rate": 3.5648008416477674e-05, "loss": 0.5616393566131592, "step": 150250 }, { "epoch": 0.6450975846406155, "grad_norm": 1.0414800643920898, "learning_rate": 3.5643696696360045e-05, "loss": 0.16682374477386475, "step": 150260 }, { "epoch": 0.6451405167306354, "grad_norm": 0.07870873063802719, "learning_rate": 3.563938497624242e-05, "loss": 0.3457974910736084, "step": 150270 }, { "epoch": 0.6451834488206555, "grad_norm": 0.216685488820076, "learning_rate": 3.56350732561248e-05, "loss": 0.28935139179229735, "step": 150280 }, { "epoch": 0.6452263809106755, "grad_norm": 0.013029251247644424, "learning_rate": 3.5630761536007177e-05, "loss": 0.23000710010528563, "step": 150290 }, { "epoch": 0.6452693130006955, "grad_norm": 0.04310464859008789, "learning_rate": 3.5626449815889554e-05, "loss": 0.12242240905761718, "step": 150300 }, { "epoch": 0.6453122450907155, "grad_norm": 2.501413106918335, "learning_rate": 3.562213809577193e-05, "loss": 0.30814170837402344, "step": 150310 }, { "epoch": 0.6453551771807355, "grad_norm": 1.097299575805664, "learning_rate": 3.561782637565431e-05, "loss": 0.22030160427093506, "step": 150320 }, { "epoch": 0.6453981092707555, "grad_norm": 0.026852579787373543, "learning_rate": 3.5613514655536686e-05, "loss": 0.22212910652160645, "step": 150330 }, { "epoch": 0.6454410413607755, "grad_norm": 0.22037015855312347, "learning_rate": 3.5609202935419057e-05, "loss": 0.13063724040985109, "step": 150340 }, { "epoch": 0.6454839734507956, "grad_norm": 60.27128982543945, "learning_rate": 3.5604891215301434e-05, "loss": 0.08863616585731507, "step": 150350 }, { "epoch": 0.6455269055408155, "grad_norm": 3.72627592086792, "learning_rate": 3.560057949518381e-05, "loss": 0.21849074363708496, "step": 150360 }, { "epoch": 0.6455698376308355, "grad_norm": 0.45230111479759216, "learning_rate": 3.559626777506619e-05, "loss": 0.22867064476013182, "step": 150370 }, { "epoch": 0.6456127697208556, "grad_norm": 0.9160948395729065, "learning_rate": 3.559195605494856e-05, "loss": 0.0876701295375824, "step": 150380 }, { "epoch": 0.6456557018108755, "grad_norm": 3.828824043273926, "learning_rate": 3.5587644334830936e-05, "loss": 0.29961979389190674, "step": 150390 }, { "epoch": 0.6456986339008955, "grad_norm": 2.7227840423583984, "learning_rate": 3.5583332614713314e-05, "loss": 0.11139649152755737, "step": 150400 }, { "epoch": 0.6457415659909156, "grad_norm": 0.00475529907271266, "learning_rate": 3.557902089459569e-05, "loss": 0.058909434080123904, "step": 150410 }, { "epoch": 0.6457844980809355, "grad_norm": 0.2636564373970032, "learning_rate": 3.557470917447807e-05, "loss": 0.27903735637664795, "step": 150420 }, { "epoch": 0.6458274301709556, "grad_norm": 0.007288595661520958, "learning_rate": 3.5570397454360446e-05, "loss": 0.2414243221282959, "step": 150430 }, { "epoch": 0.6458703622609756, "grad_norm": 0.16422626376152039, "learning_rate": 3.556608573424282e-05, "loss": 0.19783369302749634, "step": 150440 }, { "epoch": 0.6459132943509956, "grad_norm": 2.2630701065063477, "learning_rate": 3.55617740141252e-05, "loss": 0.2823080062866211, "step": 150450 }, { "epoch": 0.6459562264410156, "grad_norm": 0.02012724243104458, "learning_rate": 3.555746229400757e-05, "loss": 0.01825539916753769, "step": 150460 }, { "epoch": 0.6459991585310356, "grad_norm": 0.0037891704123467207, "learning_rate": 3.555315057388995e-05, "loss": 0.09379191398620605, "step": 150470 }, { "epoch": 0.6460420906210557, "grad_norm": 0.01953510195016861, "learning_rate": 3.5548838853772326e-05, "loss": 0.28741207122802737, "step": 150480 }, { "epoch": 0.6460850227110756, "grad_norm": 0.018316145986318588, "learning_rate": 3.55445271336547e-05, "loss": 0.20187015533447267, "step": 150490 }, { "epoch": 0.6461279548010956, "grad_norm": 2.894874095916748, "learning_rate": 3.5540215413537074e-05, "loss": 0.2339266538619995, "step": 150500 }, { "epoch": 0.6461708868911157, "grad_norm": 0.011740121990442276, "learning_rate": 3.553590369341945e-05, "loss": 0.1139156699180603, "step": 150510 }, { "epoch": 0.6462138189811356, "grad_norm": 0.0005255074356682599, "learning_rate": 3.553159197330183e-05, "loss": 0.4588716983795166, "step": 150520 }, { "epoch": 0.6462567510711557, "grad_norm": 0.49539849162101746, "learning_rate": 3.5527280253184206e-05, "loss": 0.35927271842956543, "step": 150530 }, { "epoch": 0.6462996831611757, "grad_norm": 1.401881456375122, "learning_rate": 3.552296853306658e-05, "loss": 0.24080908298492432, "step": 150540 }, { "epoch": 0.6463426152511956, "grad_norm": 0.002369405934587121, "learning_rate": 3.551865681294896e-05, "loss": 0.17735791206359863, "step": 150550 }, { "epoch": 0.6463855473412157, "grad_norm": 3.389665126800537, "learning_rate": 3.551434509283134e-05, "loss": 0.09773001074790955, "step": 150560 }, { "epoch": 0.6464284794312357, "grad_norm": 1.7062057256698608, "learning_rate": 3.5510033372713715e-05, "loss": 0.4862934112548828, "step": 150570 }, { "epoch": 0.6464714115212556, "grad_norm": 0.10221821814775467, "learning_rate": 3.5505721652596086e-05, "loss": 0.1651764392852783, "step": 150580 }, { "epoch": 0.6465143436112757, "grad_norm": 0.5306051969528198, "learning_rate": 3.550140993247846e-05, "loss": 0.1362286925315857, "step": 150590 }, { "epoch": 0.6465572757012957, "grad_norm": 0.11507293581962585, "learning_rate": 3.549709821236084e-05, "loss": 0.35334086418151855, "step": 150600 }, { "epoch": 0.6466002077913157, "grad_norm": 1.0700881481170654, "learning_rate": 3.549278649224322e-05, "loss": 0.2019124746322632, "step": 150610 }, { "epoch": 0.6466431398813357, "grad_norm": 2.763080596923828, "learning_rate": 3.5488474772125595e-05, "loss": 0.33109560012817385, "step": 150620 }, { "epoch": 0.6466860719713557, "grad_norm": 0.0019787929486483335, "learning_rate": 3.5484163052007966e-05, "loss": 0.1044989824295044, "step": 150630 }, { "epoch": 0.6467290040613757, "grad_norm": 0.006205317564308643, "learning_rate": 3.547985133189034e-05, "loss": 0.2810218095779419, "step": 150640 }, { "epoch": 0.6467719361513957, "grad_norm": 0.08015859872102737, "learning_rate": 3.547553961177272e-05, "loss": 0.3245912790298462, "step": 150650 }, { "epoch": 0.6468148682414158, "grad_norm": 2.5483102798461914, "learning_rate": 3.5471227891655104e-05, "loss": 0.17378085851669312, "step": 150660 }, { "epoch": 0.6468578003314357, "grad_norm": 2.2716610431671143, "learning_rate": 3.5466916171537475e-05, "loss": 0.25345578193664553, "step": 150670 }, { "epoch": 0.6469007324214557, "grad_norm": 0.013727148994803429, "learning_rate": 3.546260445141985e-05, "loss": 0.2554850339889526, "step": 150680 }, { "epoch": 0.6469436645114758, "grad_norm": 1.6586568355560303, "learning_rate": 3.545829273130223e-05, "loss": 0.2570873022079468, "step": 150690 }, { "epoch": 0.6469865966014957, "grad_norm": 1.9871070384979248, "learning_rate": 3.545398101118461e-05, "loss": 0.20374269485473634, "step": 150700 }, { "epoch": 0.6470295286915158, "grad_norm": 0.022350076586008072, "learning_rate": 3.544966929106698e-05, "loss": 0.4256162166595459, "step": 150710 }, { "epoch": 0.6470724607815358, "grad_norm": 0.004630012437701225, "learning_rate": 3.5445357570949355e-05, "loss": 0.2682207107543945, "step": 150720 }, { "epoch": 0.6471153928715557, "grad_norm": 0.15521609783172607, "learning_rate": 3.544104585083173e-05, "loss": 0.20004658699035643, "step": 150730 }, { "epoch": 0.6471583249615758, "grad_norm": 0.0005406261188909411, "learning_rate": 3.543673413071411e-05, "loss": 0.15981554985046387, "step": 150740 }, { "epoch": 0.6472012570515958, "grad_norm": 0.001623362535610795, "learning_rate": 3.543242241059648e-05, "loss": 0.23363192081451417, "step": 150750 }, { "epoch": 0.6472441891416157, "grad_norm": 0.058352451771497726, "learning_rate": 3.542811069047886e-05, "loss": 0.06839287877082825, "step": 150760 }, { "epoch": 0.6472871212316358, "grad_norm": 2.14315128326416, "learning_rate": 3.542379897036124e-05, "loss": 0.4139379024505615, "step": 150770 }, { "epoch": 0.6473300533216558, "grad_norm": 0.8045847415924072, "learning_rate": 3.541948725024362e-05, "loss": 0.24877502918243408, "step": 150780 }, { "epoch": 0.6473729854116758, "grad_norm": 6.681770324707031, "learning_rate": 3.541517553012599e-05, "loss": 0.2461794376373291, "step": 150790 }, { "epoch": 0.6474159175016958, "grad_norm": 0.007231173105537891, "learning_rate": 3.541086381000837e-05, "loss": 0.10801637172698975, "step": 150800 }, { "epoch": 0.6474588495917158, "grad_norm": 0.08952129632234573, "learning_rate": 3.5406552089890744e-05, "loss": 0.163426411151886, "step": 150810 }, { "epoch": 0.6475017816817358, "grad_norm": 1.4802162647247314, "learning_rate": 3.540224036977312e-05, "loss": 0.31829946041107177, "step": 150820 }, { "epoch": 0.6475447137717558, "grad_norm": 0.6895108222961426, "learning_rate": 3.539792864965549e-05, "loss": 0.17124329805374144, "step": 150830 }, { "epoch": 0.6475876458617759, "grad_norm": 2.4215404987335205, "learning_rate": 3.539361692953787e-05, "loss": 0.15050746202468873, "step": 150840 }, { "epoch": 0.6476305779517958, "grad_norm": 0.004913152661174536, "learning_rate": 3.538930520942025e-05, "loss": 0.1458522915840149, "step": 150850 }, { "epoch": 0.6476735100418158, "grad_norm": 0.2607187032699585, "learning_rate": 3.5384993489302624e-05, "loss": 0.1336721658706665, "step": 150860 }, { "epoch": 0.6477164421318359, "grad_norm": 0.1601972132921219, "learning_rate": 3.5380681769184995e-05, "loss": 0.041481971740722656, "step": 150870 }, { "epoch": 0.6477593742218559, "grad_norm": 0.22868028283119202, "learning_rate": 3.537637004906738e-05, "loss": 0.1673444151878357, "step": 150880 }, { "epoch": 0.6478023063118759, "grad_norm": 0.0013107474660500884, "learning_rate": 3.5372058328949756e-05, "loss": 0.05632483959197998, "step": 150890 }, { "epoch": 0.6478452384018959, "grad_norm": 0.037882279604673386, "learning_rate": 3.5367746608832134e-05, "loss": 0.1135305643081665, "step": 150900 }, { "epoch": 0.6478881704919159, "grad_norm": 0.006019525229930878, "learning_rate": 3.5363434888714504e-05, "loss": 0.21990854740142823, "step": 150910 }, { "epoch": 0.6479311025819359, "grad_norm": 1.5703551769256592, "learning_rate": 3.535912316859688e-05, "loss": 0.197979736328125, "step": 150920 }, { "epoch": 0.6479740346719559, "grad_norm": 0.0006432771333493292, "learning_rate": 3.535481144847926e-05, "loss": 0.13005446195602416, "step": 150930 }, { "epoch": 0.648016966761976, "grad_norm": 0.005556243937462568, "learning_rate": 3.5350499728361636e-05, "loss": 0.19083883762359619, "step": 150940 }, { "epoch": 0.6480598988519959, "grad_norm": 1.8534623384475708, "learning_rate": 3.5346188008244013e-05, "loss": 0.28145432472229004, "step": 150950 }, { "epoch": 0.6481028309420159, "grad_norm": 0.00571678951382637, "learning_rate": 3.5341876288126384e-05, "loss": 0.09889286756515503, "step": 150960 }, { "epoch": 0.648145763032036, "grad_norm": 0.022175125777721405, "learning_rate": 3.533756456800876e-05, "loss": 0.13391042947769166, "step": 150970 }, { "epoch": 0.6481886951220559, "grad_norm": 2.6668763160705566, "learning_rate": 3.533325284789114e-05, "loss": 0.5017569541931153, "step": 150980 }, { "epoch": 0.648231627212076, "grad_norm": 2.446192741394043, "learning_rate": 3.5328941127773516e-05, "loss": 0.22131681442260742, "step": 150990 }, { "epoch": 0.648274559302096, "grad_norm": 0.0022710757330060005, "learning_rate": 3.532462940765589e-05, "loss": 0.27220540046691893, "step": 151000 }, { "epoch": 0.648274559302096, "eval_loss": 0.3889124393463135, "eval_runtime": 27.2159, "eval_samples_per_second": 3.674, "eval_steps_per_second": 3.674, "step": 151000 }, { "epoch": 0.6483174913921159, "grad_norm": 0.011589907109737396, "learning_rate": 3.532031768753827e-05, "loss": 0.09215273857116699, "step": 151010 }, { "epoch": 0.648360423482136, "grad_norm": 0.005135936196893454, "learning_rate": 3.531600596742065e-05, "loss": 0.11207122802734375, "step": 151020 }, { "epoch": 0.648403355572156, "grad_norm": 1.7124568223953247, "learning_rate": 3.5311694247303025e-05, "loss": 0.26488828659057617, "step": 151030 }, { "epoch": 0.6484462876621759, "grad_norm": 0.029093217104673386, "learning_rate": 3.5307382527185396e-05, "loss": 0.18144166469573975, "step": 151040 }, { "epoch": 0.648489219752196, "grad_norm": 0.2676723599433899, "learning_rate": 3.530307080706777e-05, "loss": 0.15304325819015502, "step": 151050 }, { "epoch": 0.648532151842216, "grad_norm": 0.026561537757515907, "learning_rate": 3.529875908695015e-05, "loss": 0.02566699981689453, "step": 151060 }, { "epoch": 0.648575083932236, "grad_norm": 0.005033195950090885, "learning_rate": 3.529444736683253e-05, "loss": 0.12942137718200683, "step": 151070 }, { "epoch": 0.648618016022256, "grad_norm": 0.00713531207293272, "learning_rate": 3.52901356467149e-05, "loss": 0.10324745178222657, "step": 151080 }, { "epoch": 0.648660948112276, "grad_norm": 6.128237724304199, "learning_rate": 3.5285823926597276e-05, "loss": 0.45879373550415037, "step": 151090 }, { "epoch": 0.648703880202296, "grad_norm": 0.4716830551624298, "learning_rate": 3.528151220647965e-05, "loss": 0.1531036376953125, "step": 151100 }, { "epoch": 0.648746812292316, "grad_norm": 0.7725340723991394, "learning_rate": 3.527720048636203e-05, "loss": 0.13332175016403197, "step": 151110 }, { "epoch": 0.648789744382336, "grad_norm": 0.4862283766269684, "learning_rate": 3.527288876624441e-05, "loss": 0.19347046613693236, "step": 151120 }, { "epoch": 0.648832676472356, "grad_norm": 0.010178297758102417, "learning_rate": 3.5268577046126785e-05, "loss": 0.07210128903388976, "step": 151130 }, { "epoch": 0.648875608562376, "grad_norm": 0.005305929109454155, "learning_rate": 3.526426532600916e-05, "loss": 0.025121399760246278, "step": 151140 }, { "epoch": 0.6489185406523961, "grad_norm": 0.036797620356082916, "learning_rate": 3.525995360589154e-05, "loss": 0.11298577785491944, "step": 151150 }, { "epoch": 0.648961472742416, "grad_norm": 0.1233087033033371, "learning_rate": 3.525564188577391e-05, "loss": 0.17588850259780883, "step": 151160 }, { "epoch": 0.649004404832436, "grad_norm": 5.559473514556885, "learning_rate": 3.525133016565629e-05, "loss": 0.35527606010437013, "step": 151170 }, { "epoch": 0.6490473369224561, "grad_norm": 0.0020315709989517927, "learning_rate": 3.5247018445538665e-05, "loss": 0.14180855751037597, "step": 151180 }, { "epoch": 0.649090269012476, "grad_norm": 1.5811628103256226, "learning_rate": 3.524270672542104e-05, "loss": 0.31139168739318845, "step": 151190 }, { "epoch": 0.6491332011024961, "grad_norm": 1.6150096654891968, "learning_rate": 3.523839500530341e-05, "loss": 0.33792574405670167, "step": 151200 }, { "epoch": 0.6491761331925161, "grad_norm": 0.29071733355522156, "learning_rate": 3.523408328518579e-05, "loss": 0.26106438636779783, "step": 151210 }, { "epoch": 0.649219065282536, "grad_norm": 4.788187503814697, "learning_rate": 3.522977156506817e-05, "loss": 0.17764378786087037, "step": 151220 }, { "epoch": 0.6492619973725561, "grad_norm": 0.0022474355064332485, "learning_rate": 3.5225459844950545e-05, "loss": 0.15507956743240356, "step": 151230 }, { "epoch": 0.6493049294625761, "grad_norm": 0.0006411916110664606, "learning_rate": 3.522114812483292e-05, "loss": 0.3545748233795166, "step": 151240 }, { "epoch": 0.649347861552596, "grad_norm": 0.044647056609392166, "learning_rate": 3.52168364047153e-05, "loss": 0.2371835947036743, "step": 151250 }, { "epoch": 0.6493907936426161, "grad_norm": 0.022296231240034103, "learning_rate": 3.521252468459768e-05, "loss": 0.19835184812545775, "step": 151260 }, { "epoch": 0.6494337257326361, "grad_norm": 1.7125898599624634, "learning_rate": 3.5208212964480055e-05, "loss": 0.27645077705383303, "step": 151270 }, { "epoch": 0.6494766578226561, "grad_norm": 0.5980311632156372, "learning_rate": 3.5203901244362425e-05, "loss": 0.2318577527999878, "step": 151280 }, { "epoch": 0.6495195899126761, "grad_norm": 0.04134934023022652, "learning_rate": 3.51995895242448e-05, "loss": 0.20498950481414796, "step": 151290 }, { "epoch": 0.6495625220026962, "grad_norm": 2.364464044570923, "learning_rate": 3.519527780412718e-05, "loss": 0.19556167125701904, "step": 151300 }, { "epoch": 0.6496054540927162, "grad_norm": 0.002782400930300355, "learning_rate": 3.519096608400956e-05, "loss": 0.299759578704834, "step": 151310 }, { "epoch": 0.6496483861827361, "grad_norm": 0.002601859625428915, "learning_rate": 3.5186654363891934e-05, "loss": 0.23682382106781005, "step": 151320 }, { "epoch": 0.6496913182727562, "grad_norm": 0.03596939146518707, "learning_rate": 3.5182342643774305e-05, "loss": 0.23851912021636962, "step": 151330 }, { "epoch": 0.6497342503627762, "grad_norm": 4.506582260131836, "learning_rate": 3.517803092365668e-05, "loss": 0.32757136821746824, "step": 151340 }, { "epoch": 0.6497771824527961, "grad_norm": 0.0009721462265588343, "learning_rate": 3.517371920353906e-05, "loss": 0.3446447134017944, "step": 151350 }, { "epoch": 0.6498201145428162, "grad_norm": 0.0029338260646909475, "learning_rate": 3.516940748342144e-05, "loss": 0.23369097709655762, "step": 151360 }, { "epoch": 0.6498630466328362, "grad_norm": 0.0009567153174430132, "learning_rate": 3.5165095763303814e-05, "loss": 0.22707803249359132, "step": 151370 }, { "epoch": 0.6499059787228562, "grad_norm": 3.856454610824585, "learning_rate": 3.516078404318619e-05, "loss": 0.09049471616744995, "step": 151380 }, { "epoch": 0.6499489108128762, "grad_norm": 0.5688821077346802, "learning_rate": 3.515647232306857e-05, "loss": 0.3531051158905029, "step": 151390 }, { "epoch": 0.6499918429028962, "grad_norm": 0.014180580154061317, "learning_rate": 3.5152160602950946e-05, "loss": 0.0902495801448822, "step": 151400 }, { "epoch": 0.6500347749929162, "grad_norm": 1.0567702054977417, "learning_rate": 3.514784888283332e-05, "loss": 0.12536590099334716, "step": 151410 }, { "epoch": 0.6500777070829362, "grad_norm": 0.05386923998594284, "learning_rate": 3.5143537162715694e-05, "loss": 0.20828590393066407, "step": 151420 }, { "epoch": 0.6501206391729563, "grad_norm": 0.0040575917810201645, "learning_rate": 3.513922544259807e-05, "loss": 0.005848048627376557, "step": 151430 }, { "epoch": 0.6501635712629762, "grad_norm": 0.005221802741289139, "learning_rate": 3.513491372248045e-05, "loss": 0.2222289562225342, "step": 151440 }, { "epoch": 0.6502065033529962, "grad_norm": 1.2336004972457886, "learning_rate": 3.513060200236282e-05, "loss": 0.1798251748085022, "step": 151450 }, { "epoch": 0.6502494354430163, "grad_norm": 0.010236959904432297, "learning_rate": 3.51262902822452e-05, "loss": 0.17683861255645753, "step": 151460 }, { "epoch": 0.6502923675330362, "grad_norm": 147.47923278808594, "learning_rate": 3.5121978562127574e-05, "loss": 0.11385173797607422, "step": 151470 }, { "epoch": 0.6503352996230563, "grad_norm": 0.2765538692474365, "learning_rate": 3.511766684200996e-05, "loss": 0.153677499294281, "step": 151480 }, { "epoch": 0.6503782317130763, "grad_norm": 0.47096744179725647, "learning_rate": 3.511335512189233e-05, "loss": 0.23527054786682128, "step": 151490 }, { "epoch": 0.6504211638030962, "grad_norm": 1.5383275747299194, "learning_rate": 3.5109043401774706e-05, "loss": 0.21786458492279054, "step": 151500 }, { "epoch": 0.6504640958931163, "grad_norm": 0.019338462501764297, "learning_rate": 3.5104731681657084e-05, "loss": 0.3397980213165283, "step": 151510 }, { "epoch": 0.6505070279831363, "grad_norm": 1.6768598556518555, "learning_rate": 3.510041996153946e-05, "loss": 0.4416059494018555, "step": 151520 }, { "epoch": 0.6505499600731562, "grad_norm": 2.0906455516815186, "learning_rate": 3.509610824142183e-05, "loss": 0.42383246421813964, "step": 151530 }, { "epoch": 0.6505928921631763, "grad_norm": 0.0007825464126653969, "learning_rate": 3.509179652130421e-05, "loss": 0.1377132773399353, "step": 151540 }, { "epoch": 0.6506358242531963, "grad_norm": 0.001401827554218471, "learning_rate": 3.5087484801186586e-05, "loss": 0.047088241577148436, "step": 151550 }, { "epoch": 0.6506787563432163, "grad_norm": 2.3653674125671387, "learning_rate": 3.5083173081068964e-05, "loss": 0.20371806621551514, "step": 151560 }, { "epoch": 0.6507216884332363, "grad_norm": 0.0049470472149550915, "learning_rate": 3.5078861360951334e-05, "loss": 0.28787286281585694, "step": 151570 }, { "epoch": 0.6507646205232563, "grad_norm": 0.0012642034562304616, "learning_rate": 3.507454964083371e-05, "loss": 0.2894446611404419, "step": 151580 }, { "epoch": 0.6508075526132763, "grad_norm": 0.3203394412994385, "learning_rate": 3.5070237920716096e-05, "loss": 0.08833484053611755, "step": 151590 }, { "epoch": 0.6508504847032963, "grad_norm": 3.1001203060150146, "learning_rate": 3.506592620059847e-05, "loss": 0.24941644668579102, "step": 151600 }, { "epoch": 0.6508934167933164, "grad_norm": 1.7117334604263306, "learning_rate": 3.5061614480480844e-05, "loss": 0.057556116580963136, "step": 151610 }, { "epoch": 0.6509363488833363, "grad_norm": 1.9608606100082397, "learning_rate": 3.505730276036322e-05, "loss": 0.1367754340171814, "step": 151620 }, { "epoch": 0.6509792809733563, "grad_norm": 0.02430753782391548, "learning_rate": 3.50529910402456e-05, "loss": 0.32413339614868164, "step": 151630 }, { "epoch": 0.6510222130633764, "grad_norm": 6.840951442718506, "learning_rate": 3.5048679320127976e-05, "loss": 0.4086446285247803, "step": 151640 }, { "epoch": 0.6510651451533963, "grad_norm": 2.100064277648926, "learning_rate": 3.5044367600010346e-05, "loss": 0.21504318714141846, "step": 151650 }, { "epoch": 0.6511080772434163, "grad_norm": 0.5872086882591248, "learning_rate": 3.5040055879892723e-05, "loss": 0.21643712520599365, "step": 151660 }, { "epoch": 0.6511510093334364, "grad_norm": 0.005748818628489971, "learning_rate": 3.50357441597751e-05, "loss": 0.19003957509994507, "step": 151670 }, { "epoch": 0.6511939414234563, "grad_norm": 0.0002342463267268613, "learning_rate": 3.503143243965748e-05, "loss": 0.1779122233390808, "step": 151680 }, { "epoch": 0.6512368735134764, "grad_norm": 0.021635752171278, "learning_rate": 3.5027120719539856e-05, "loss": 0.07176170349121094, "step": 151690 }, { "epoch": 0.6512798056034964, "grad_norm": 1.4818261861801147, "learning_rate": 3.502280899942223e-05, "loss": 0.18642498254776002, "step": 151700 }, { "epoch": 0.6513227376935163, "grad_norm": 1.6581411361694336, "learning_rate": 3.501849727930461e-05, "loss": 0.17148194313049317, "step": 151710 }, { "epoch": 0.6513656697835364, "grad_norm": 0.046362608671188354, "learning_rate": 3.501418555918699e-05, "loss": 0.15675292015075684, "step": 151720 }, { "epoch": 0.6514086018735564, "grad_norm": 0.011807423084974289, "learning_rate": 3.5009873839069365e-05, "loss": 0.09193039536476136, "step": 151730 }, { "epoch": 0.6514515339635765, "grad_norm": 0.017920024693012238, "learning_rate": 3.5005562118951735e-05, "loss": 0.20496132373809814, "step": 151740 }, { "epoch": 0.6514944660535964, "grad_norm": 0.004140602890402079, "learning_rate": 3.500125039883411e-05, "loss": 0.23158493041992187, "step": 151750 }, { "epoch": 0.6515373981436164, "grad_norm": 0.0014110020129010081, "learning_rate": 3.499693867871649e-05, "loss": 0.07966670393943787, "step": 151760 }, { "epoch": 0.6515803302336365, "grad_norm": 0.00932406634092331, "learning_rate": 3.499262695859887e-05, "loss": 0.2577128648757935, "step": 151770 }, { "epoch": 0.6516232623236564, "grad_norm": 0.0015593827702105045, "learning_rate": 3.498831523848124e-05, "loss": 0.13507426977157594, "step": 151780 }, { "epoch": 0.6516661944136765, "grad_norm": 1.8296703100204468, "learning_rate": 3.4984003518363615e-05, "loss": 0.10646724700927734, "step": 151790 }, { "epoch": 0.6517091265036965, "grad_norm": 5.870005130767822, "learning_rate": 3.497969179824599e-05, "loss": 0.3149374008178711, "step": 151800 }, { "epoch": 0.6517520585937164, "grad_norm": 1.2665034532546997, "learning_rate": 3.497538007812837e-05, "loss": 0.21344881057739257, "step": 151810 }, { "epoch": 0.6517949906837365, "grad_norm": 0.33878016471862793, "learning_rate": 3.497106835801075e-05, "loss": 0.4020512104034424, "step": 151820 }, { "epoch": 0.6518379227737565, "grad_norm": 0.005240934900939465, "learning_rate": 3.4966756637893125e-05, "loss": 0.26684696674346925, "step": 151830 }, { "epoch": 0.6518808548637764, "grad_norm": 0.0019433987326920033, "learning_rate": 3.49624449177755e-05, "loss": 0.18590428829193115, "step": 151840 }, { "epoch": 0.6519237869537965, "grad_norm": 0.40653958916664124, "learning_rate": 3.495813319765788e-05, "loss": 0.19820864200592042, "step": 151850 }, { "epoch": 0.6519667190438165, "grad_norm": 5.8154401779174805, "learning_rate": 3.495382147754025e-05, "loss": 0.2836847066879272, "step": 151860 }, { "epoch": 0.6520096511338365, "grad_norm": 0.005736039485782385, "learning_rate": 3.494950975742263e-05, "loss": 0.09045286178588867, "step": 151870 }, { "epoch": 0.6520525832238565, "grad_norm": 0.001496506156399846, "learning_rate": 3.4945198037305005e-05, "loss": 0.25731420516967773, "step": 151880 }, { "epoch": 0.6520955153138766, "grad_norm": 13.771329879760742, "learning_rate": 3.494088631718738e-05, "loss": 0.2797736644744873, "step": 151890 }, { "epoch": 0.6521384474038965, "grad_norm": 0.5289430618286133, "learning_rate": 3.493657459706975e-05, "loss": 0.22803876399993897, "step": 151900 }, { "epoch": 0.6521813794939165, "grad_norm": 0.5056369304656982, "learning_rate": 3.493226287695213e-05, "loss": 0.1541724681854248, "step": 151910 }, { "epoch": 0.6522243115839366, "grad_norm": 0.028238875791430473, "learning_rate": 3.492795115683451e-05, "loss": 0.19024114608764647, "step": 151920 }, { "epoch": 0.6522672436739565, "grad_norm": 0.0652453675866127, "learning_rate": 3.4923639436716885e-05, "loss": 0.16091703176498412, "step": 151930 }, { "epoch": 0.6523101757639765, "grad_norm": 2.1925699710845947, "learning_rate": 3.491932771659926e-05, "loss": 0.2420794725418091, "step": 151940 }, { "epoch": 0.6523531078539966, "grad_norm": 0.004015914164483547, "learning_rate": 3.491501599648164e-05, "loss": 0.3008209943771362, "step": 151950 }, { "epoch": 0.6523960399440165, "grad_norm": 2.2953882217407227, "learning_rate": 3.491070427636402e-05, "loss": 0.11296336650848389, "step": 151960 }, { "epoch": 0.6524389720340366, "grad_norm": 0.028664682060480118, "learning_rate": 3.4906392556246394e-05, "loss": 0.03355903923511505, "step": 151970 }, { "epoch": 0.6524819041240566, "grad_norm": 0.5694634914398193, "learning_rate": 3.4902080836128765e-05, "loss": 0.15875810384750366, "step": 151980 }, { "epoch": 0.6525248362140765, "grad_norm": 0.07715904712677002, "learning_rate": 3.489776911601114e-05, "loss": 0.10381278991699219, "step": 151990 }, { "epoch": 0.6525677683040966, "grad_norm": 7.928126335144043, "learning_rate": 3.489345739589352e-05, "loss": 0.3048884153366089, "step": 152000 }, { "epoch": 0.6525677683040966, "eval_loss": 0.3930748701095581, "eval_runtime": 27.1751, "eval_samples_per_second": 3.68, "eval_steps_per_second": 3.68, "step": 152000 }, { "epoch": 0.6526107003941166, "grad_norm": 0.0018158082384616137, "learning_rate": 3.4889145675775897e-05, "loss": 0.3498977184295654, "step": 152010 }, { "epoch": 0.6526536324841365, "grad_norm": 9.38770866394043, "learning_rate": 3.488483395565827e-05, "loss": 0.10030590295791626, "step": 152020 }, { "epoch": 0.6526965645741566, "grad_norm": 0.0059972163289785385, "learning_rate": 3.4880522235540645e-05, "loss": 0.255455470085144, "step": 152030 }, { "epoch": 0.6527394966641766, "grad_norm": 1.80948805809021, "learning_rate": 3.487621051542302e-05, "loss": 0.24304404258728027, "step": 152040 }, { "epoch": 0.6527824287541966, "grad_norm": 0.0005664766067638993, "learning_rate": 3.48718987953054e-05, "loss": 0.12256590127944947, "step": 152050 }, { "epoch": 0.6528253608442166, "grad_norm": 0.03551163151860237, "learning_rate": 3.4867587075187777e-05, "loss": 0.2767303466796875, "step": 152060 }, { "epoch": 0.6528682929342366, "grad_norm": 0.0017603436717763543, "learning_rate": 3.4863275355070154e-05, "loss": 0.16143405437469482, "step": 152070 }, { "epoch": 0.6529112250242566, "grad_norm": 0.00445699505507946, "learning_rate": 3.485896363495253e-05, "loss": 0.2035740852355957, "step": 152080 }, { "epoch": 0.6529541571142766, "grad_norm": 0.10075142234563828, "learning_rate": 3.485465191483491e-05, "loss": 0.20856783390045167, "step": 152090 }, { "epoch": 0.6529970892042967, "grad_norm": 0.02400701306760311, "learning_rate": 3.4850340194717286e-05, "loss": 0.26665315628051756, "step": 152100 }, { "epoch": 0.6530400212943166, "grad_norm": 2.000727653503418, "learning_rate": 3.4846028474599656e-05, "loss": 0.40493073463439944, "step": 152110 }, { "epoch": 0.6530829533843366, "grad_norm": 0.005690529942512512, "learning_rate": 3.4841716754482034e-05, "loss": 0.20626089572906495, "step": 152120 }, { "epoch": 0.6531258854743567, "grad_norm": 1.505405068397522, "learning_rate": 3.483740503436441e-05, "loss": 0.15051109790802003, "step": 152130 }, { "epoch": 0.6531688175643766, "grad_norm": 18.674238204956055, "learning_rate": 3.483309331424679e-05, "loss": 0.08497092127799988, "step": 152140 }, { "epoch": 0.6532117496543967, "grad_norm": 0.08240605145692825, "learning_rate": 3.482878159412916e-05, "loss": 0.15139393806457518, "step": 152150 }, { "epoch": 0.6532546817444167, "grad_norm": 0.019352389499545097, "learning_rate": 3.4824469874011536e-05, "loss": 0.3356482028961182, "step": 152160 }, { "epoch": 0.6532976138344367, "grad_norm": 0.3776816427707672, "learning_rate": 3.4820158153893914e-05, "loss": 0.19168307781219482, "step": 152170 }, { "epoch": 0.6533405459244567, "grad_norm": 0.010281571187078953, "learning_rate": 3.48158464337763e-05, "loss": 0.13237816095352173, "step": 152180 }, { "epoch": 0.6533834780144767, "grad_norm": 0.006922994274646044, "learning_rate": 3.481153471365867e-05, "loss": 0.3548863410949707, "step": 152190 }, { "epoch": 0.6534264101044968, "grad_norm": 0.2510335445404053, "learning_rate": 3.4807222993541046e-05, "loss": 0.11280529499053955, "step": 152200 }, { "epoch": 0.6534693421945167, "grad_norm": 0.006575438193976879, "learning_rate": 3.480291127342342e-05, "loss": 0.13859881162643434, "step": 152210 }, { "epoch": 0.6535122742845367, "grad_norm": 0.07279030978679657, "learning_rate": 3.47985995533058e-05, "loss": 0.33872673511505125, "step": 152220 }, { "epoch": 0.6535552063745568, "grad_norm": 0.4047152101993561, "learning_rate": 3.479428783318817e-05, "loss": 0.21664028167724608, "step": 152230 }, { "epoch": 0.6535981384645767, "grad_norm": 0.018721673637628555, "learning_rate": 3.478997611307055e-05, "loss": 0.1668068528175354, "step": 152240 }, { "epoch": 0.6536410705545967, "grad_norm": 0.03141282871365547, "learning_rate": 3.4785664392952926e-05, "loss": 0.1575627565383911, "step": 152250 }, { "epoch": 0.6536840026446168, "grad_norm": 0.19100338220596313, "learning_rate": 3.47813526728353e-05, "loss": 0.1080901026725769, "step": 152260 }, { "epoch": 0.6537269347346367, "grad_norm": 1.8139461278915405, "learning_rate": 3.4777040952717674e-05, "loss": 0.16453101634979247, "step": 152270 }, { "epoch": 0.6537698668246568, "grad_norm": 0.9621730446815491, "learning_rate": 3.477272923260005e-05, "loss": 0.2002204418182373, "step": 152280 }, { "epoch": 0.6538127989146768, "grad_norm": 1.7624107599258423, "learning_rate": 3.4768417512482435e-05, "loss": 0.3082698345184326, "step": 152290 }, { "epoch": 0.6538557310046967, "grad_norm": 1.3553941249847412, "learning_rate": 3.476410579236481e-05, "loss": 0.3578528881072998, "step": 152300 }, { "epoch": 0.6538986630947168, "grad_norm": 2.207336187362671, "learning_rate": 3.475979407224718e-05, "loss": 0.3818779468536377, "step": 152310 }, { "epoch": 0.6539415951847368, "grad_norm": 2.9257280826568604, "learning_rate": 3.475548235212956e-05, "loss": 0.2496558904647827, "step": 152320 }, { "epoch": 0.6539845272747568, "grad_norm": 0.05879620090126991, "learning_rate": 3.475117063201194e-05, "loss": 0.2835780382156372, "step": 152330 }, { "epoch": 0.6540274593647768, "grad_norm": 0.0013960381038486958, "learning_rate": 3.4746858911894315e-05, "loss": 0.12066258192062378, "step": 152340 }, { "epoch": 0.6540703914547968, "grad_norm": 0.10827244073152542, "learning_rate": 3.4742547191776686e-05, "loss": 0.08777568936347961, "step": 152350 }, { "epoch": 0.6541133235448168, "grad_norm": 0.0033609438687562943, "learning_rate": 3.473823547165906e-05, "loss": 0.14433823823928832, "step": 152360 }, { "epoch": 0.6541562556348368, "grad_norm": 0.12234596163034439, "learning_rate": 3.473392375154144e-05, "loss": 0.004690999910235405, "step": 152370 }, { "epoch": 0.6541991877248569, "grad_norm": 0.2859869599342346, "learning_rate": 3.472961203142382e-05, "loss": 0.1143067479133606, "step": 152380 }, { "epoch": 0.6542421198148768, "grad_norm": 2.3144009113311768, "learning_rate": 3.472530031130619e-05, "loss": 0.1523873209953308, "step": 152390 }, { "epoch": 0.6542850519048968, "grad_norm": 1.4639687538146973, "learning_rate": 3.472098859118857e-05, "loss": 0.13809309005737305, "step": 152400 }, { "epoch": 0.6543279839949169, "grad_norm": 1.4635416269302368, "learning_rate": 3.471667687107095e-05, "loss": 0.24965903759002686, "step": 152410 }, { "epoch": 0.6543709160849368, "grad_norm": 0.025756657123565674, "learning_rate": 3.471236515095333e-05, "loss": 0.26134366989135743, "step": 152420 }, { "epoch": 0.6544138481749568, "grad_norm": 0.13001833856105804, "learning_rate": 3.4708053430835704e-05, "loss": 0.2040097713470459, "step": 152430 }, { "epoch": 0.6544567802649769, "grad_norm": 0.000583972199819982, "learning_rate": 3.4703741710718075e-05, "loss": 0.28530220985412597, "step": 152440 }, { "epoch": 0.6544997123549968, "grad_norm": 0.45030274987220764, "learning_rate": 3.469942999060045e-05, "loss": 0.20632073879241944, "step": 152450 }, { "epoch": 0.6545426444450169, "grad_norm": 0.0034222842659801245, "learning_rate": 3.469511827048283e-05, "loss": 0.19489221572875975, "step": 152460 }, { "epoch": 0.6545855765350369, "grad_norm": 0.002282580127939582, "learning_rate": 3.469080655036521e-05, "loss": 0.198526132106781, "step": 152470 }, { "epoch": 0.6546285086250568, "grad_norm": 0.02770853415131569, "learning_rate": 3.468649483024758e-05, "loss": 0.11303834915161133, "step": 152480 }, { "epoch": 0.6546714407150769, "grad_norm": 1.2699313163757324, "learning_rate": 3.4682183110129955e-05, "loss": 0.12168716192245484, "step": 152490 }, { "epoch": 0.6547143728050969, "grad_norm": 0.0013995268382132053, "learning_rate": 3.467787139001233e-05, "loss": 0.3896394491195679, "step": 152500 }, { "epoch": 0.6547573048951169, "grad_norm": 1.4256479740142822, "learning_rate": 3.467355966989471e-05, "loss": 0.36592926979064944, "step": 152510 }, { "epoch": 0.6548002369851369, "grad_norm": 0.029613185673952103, "learning_rate": 3.466924794977709e-05, "loss": 0.20814926624298097, "step": 152520 }, { "epoch": 0.6548431690751569, "grad_norm": 0.013652176596224308, "learning_rate": 3.4664936229659464e-05, "loss": 0.35039663314819336, "step": 152530 }, { "epoch": 0.6548861011651769, "grad_norm": 0.19368019700050354, "learning_rate": 3.466062450954184e-05, "loss": 0.23914365768432616, "step": 152540 }, { "epoch": 0.6549290332551969, "grad_norm": 0.04172434285283089, "learning_rate": 3.465631278942422e-05, "loss": 0.07054402232170105, "step": 152550 }, { "epoch": 0.654971965345217, "grad_norm": 5.317892074584961, "learning_rate": 3.465200106930659e-05, "loss": 0.31213338375091554, "step": 152560 }, { "epoch": 0.6550148974352369, "grad_norm": 1.7020102739334106, "learning_rate": 3.464768934918897e-05, "loss": 0.30658817291259766, "step": 152570 }, { "epoch": 0.6550578295252569, "grad_norm": 0.022133029997348785, "learning_rate": 3.4643377629071344e-05, "loss": 0.01100047081708908, "step": 152580 }, { "epoch": 0.655100761615277, "grad_norm": 0.11623332649469376, "learning_rate": 3.463906590895372e-05, "loss": 0.2962920665740967, "step": 152590 }, { "epoch": 0.655143693705297, "grad_norm": 4.137857437133789, "learning_rate": 3.463475418883609e-05, "loss": 0.2276834726333618, "step": 152600 }, { "epoch": 0.655186625795317, "grad_norm": 0.003172489581629634, "learning_rate": 3.463044246871847e-05, "loss": 0.13099607229232788, "step": 152610 }, { "epoch": 0.655229557885337, "grad_norm": 0.11240309476852417, "learning_rate": 3.462613074860085e-05, "loss": 0.08598883152008056, "step": 152620 }, { "epoch": 0.655272489975357, "grad_norm": 0.021000202745199203, "learning_rate": 3.4621819028483224e-05, "loss": 0.1656929850578308, "step": 152630 }, { "epoch": 0.655315422065377, "grad_norm": 0.011905116029083729, "learning_rate": 3.46175073083656e-05, "loss": 0.2573047637939453, "step": 152640 }, { "epoch": 0.655358354155397, "grad_norm": 1.581723690032959, "learning_rate": 3.461319558824798e-05, "loss": 0.13378567695617677, "step": 152650 }, { "epoch": 0.655401286245417, "grad_norm": 0.045455001294612885, "learning_rate": 3.4608883868130356e-05, "loss": 0.20185463428497313, "step": 152660 }, { "epoch": 0.655444218335437, "grad_norm": 2.161616802215576, "learning_rate": 3.4604572148012733e-05, "loss": 0.19833118915557862, "step": 152670 }, { "epoch": 0.655487150425457, "grad_norm": 1.8970059156417847, "learning_rate": 3.4600260427895104e-05, "loss": 0.21300973892211914, "step": 152680 }, { "epoch": 0.6555300825154771, "grad_norm": 0.0016901158960536122, "learning_rate": 3.459594870777748e-05, "loss": 0.1876598834991455, "step": 152690 }, { "epoch": 0.655573014605497, "grad_norm": 0.0022294456139206886, "learning_rate": 3.459163698765986e-05, "loss": 0.10009009838104248, "step": 152700 }, { "epoch": 0.655615946695517, "grad_norm": 1.3207972049713135, "learning_rate": 3.4587325267542236e-05, "loss": 0.2541964530944824, "step": 152710 }, { "epoch": 0.6556588787855371, "grad_norm": 5.267035961151123, "learning_rate": 3.458301354742461e-05, "loss": 0.21954050064086914, "step": 152720 }, { "epoch": 0.655701810875557, "grad_norm": 0.095136858522892, "learning_rate": 3.4578701827306984e-05, "loss": 0.32280683517456055, "step": 152730 }, { "epoch": 0.655744742965577, "grad_norm": 0.012000875547528267, "learning_rate": 3.457439010718936e-05, "loss": 0.1790920615196228, "step": 152740 }, { "epoch": 0.6557876750555971, "grad_norm": 0.22052302956581116, "learning_rate": 3.457007838707174e-05, "loss": 0.13050248622894287, "step": 152750 }, { "epoch": 0.655830607145617, "grad_norm": 0.01903325505554676, "learning_rate": 3.4565766666954116e-05, "loss": 0.24872941970825196, "step": 152760 }, { "epoch": 0.6558735392356371, "grad_norm": 0.03306184709072113, "learning_rate": 3.456145494683649e-05, "loss": 0.18790819644927978, "step": 152770 }, { "epoch": 0.6559164713256571, "grad_norm": 0.03617790713906288, "learning_rate": 3.455714322671887e-05, "loss": 0.13181324005126954, "step": 152780 }, { "epoch": 0.655959403415677, "grad_norm": 0.008156493306159973, "learning_rate": 3.455283150660125e-05, "loss": 0.24294917583465575, "step": 152790 }, { "epoch": 0.6560023355056971, "grad_norm": 1.6839267015457153, "learning_rate": 3.4548519786483625e-05, "loss": 0.0995561957359314, "step": 152800 }, { "epoch": 0.6560452675957171, "grad_norm": 0.001088878489099443, "learning_rate": 3.4544208066365996e-05, "loss": 0.17891314029693603, "step": 152810 }, { "epoch": 0.6560881996857371, "grad_norm": 0.08594802021980286, "learning_rate": 3.453989634624837e-05, "loss": 0.18109216690063476, "step": 152820 }, { "epoch": 0.6561311317757571, "grad_norm": 0.05384450778365135, "learning_rate": 3.453558462613075e-05, "loss": 0.12445077896118165, "step": 152830 }, { "epoch": 0.6561740638657771, "grad_norm": 1.4890823364257812, "learning_rate": 3.453127290601313e-05, "loss": 0.27255203723907473, "step": 152840 }, { "epoch": 0.6562169959557971, "grad_norm": 0.18443816900253296, "learning_rate": 3.45269611858955e-05, "loss": 0.1994839906692505, "step": 152850 }, { "epoch": 0.6562599280458171, "grad_norm": 0.020347854122519493, "learning_rate": 3.4522649465777876e-05, "loss": 0.04077900350093842, "step": 152860 }, { "epoch": 0.6563028601358372, "grad_norm": 0.04533800855278969, "learning_rate": 3.451833774566025e-05, "loss": 0.09720346331596375, "step": 152870 }, { "epoch": 0.6563457922258571, "grad_norm": 0.034875430166721344, "learning_rate": 3.451402602554264e-05, "loss": 0.13401124477386475, "step": 152880 }, { "epoch": 0.6563887243158771, "grad_norm": 8.106966972351074, "learning_rate": 3.450971430542501e-05, "loss": 0.41603717803955076, "step": 152890 }, { "epoch": 0.6564316564058972, "grad_norm": 0.8586081862449646, "learning_rate": 3.4505402585307385e-05, "loss": 0.07474786639213563, "step": 152900 }, { "epoch": 0.6564745884959171, "grad_norm": 0.012892269529402256, "learning_rate": 3.450109086518976e-05, "loss": 0.15219208002090454, "step": 152910 }, { "epoch": 0.6565175205859372, "grad_norm": 0.4991026818752289, "learning_rate": 3.449677914507214e-05, "loss": 0.366585898399353, "step": 152920 }, { "epoch": 0.6565604526759572, "grad_norm": 0.873711109161377, "learning_rate": 3.449246742495451e-05, "loss": 0.12484588623046874, "step": 152930 }, { "epoch": 0.6566033847659771, "grad_norm": 0.053988225758075714, "learning_rate": 3.448815570483689e-05, "loss": 0.24912574291229247, "step": 152940 }, { "epoch": 0.6566463168559972, "grad_norm": 0.012094683013856411, "learning_rate": 3.4483843984719265e-05, "loss": 0.07237531542778015, "step": 152950 }, { "epoch": 0.6566892489460172, "grad_norm": 0.013880450278520584, "learning_rate": 3.447953226460164e-05, "loss": 0.2798666000366211, "step": 152960 }, { "epoch": 0.6567321810360371, "grad_norm": 0.887079656124115, "learning_rate": 3.447522054448401e-05, "loss": 0.12565791606903076, "step": 152970 }, { "epoch": 0.6567751131260572, "grad_norm": 0.07446973770856857, "learning_rate": 3.447090882436639e-05, "loss": 0.1282172441482544, "step": 152980 }, { "epoch": 0.6568180452160772, "grad_norm": 0.004310452379286289, "learning_rate": 3.4466597104248775e-05, "loss": 0.24610447883605957, "step": 152990 }, { "epoch": 0.6568609773060972, "grad_norm": 0.0427681989967823, "learning_rate": 3.446228538413115e-05, "loss": 0.06416921019554138, "step": 153000 }, { "epoch": 0.6568609773060972, "eval_loss": 0.392218679189682, "eval_runtime": 27.216, "eval_samples_per_second": 3.674, "eval_steps_per_second": 3.674, "step": 153000 }, { "epoch": 0.6569039093961172, "grad_norm": 2.4584898948669434, "learning_rate": 3.445797366401352e-05, "loss": 0.24507856369018555, "step": 153010 }, { "epoch": 0.6569468414861372, "grad_norm": 0.005726732779294252, "learning_rate": 3.44536619438959e-05, "loss": 0.10224964618682861, "step": 153020 }, { "epoch": 0.6569897735761573, "grad_norm": 3.3387672901153564, "learning_rate": 3.444935022377828e-05, "loss": 0.314972448348999, "step": 153030 }, { "epoch": 0.6570327056661772, "grad_norm": 0.008076757192611694, "learning_rate": 3.4445038503660655e-05, "loss": 0.22315526008605957, "step": 153040 }, { "epoch": 0.6570756377561973, "grad_norm": 3.77646803855896, "learning_rate": 3.4440726783543025e-05, "loss": 0.2469933032989502, "step": 153050 }, { "epoch": 0.6571185698462173, "grad_norm": 0.08149869740009308, "learning_rate": 3.44364150634254e-05, "loss": 0.15403306484222412, "step": 153060 }, { "epoch": 0.6571615019362372, "grad_norm": 1.642274022102356, "learning_rate": 3.443210334330778e-05, "loss": 0.1979893922805786, "step": 153070 }, { "epoch": 0.6572044340262573, "grad_norm": 0.8089116215705872, "learning_rate": 3.442779162319016e-05, "loss": 0.24776697158813477, "step": 153080 }, { "epoch": 0.6572473661162773, "grad_norm": 0.008406427688896656, "learning_rate": 3.442347990307253e-05, "loss": 0.2347959041595459, "step": 153090 }, { "epoch": 0.6572902982062973, "grad_norm": 0.2034047693014145, "learning_rate": 3.441916818295491e-05, "loss": 0.029045340418815613, "step": 153100 }, { "epoch": 0.6573332302963173, "grad_norm": 1.0141544342041016, "learning_rate": 3.441485646283729e-05, "loss": 0.3198372840881348, "step": 153110 }, { "epoch": 0.6573761623863373, "grad_norm": 0.04060515761375427, "learning_rate": 3.4410544742719666e-05, "loss": 0.24239850044250488, "step": 153120 }, { "epoch": 0.6574190944763573, "grad_norm": 0.009104978293180466, "learning_rate": 3.440623302260204e-05, "loss": 0.14999579191207885, "step": 153130 }, { "epoch": 0.6574620265663773, "grad_norm": 0.28482234477996826, "learning_rate": 3.4401921302484414e-05, "loss": 0.21846232414245606, "step": 153140 }, { "epoch": 0.6575049586563974, "grad_norm": 0.02105596847832203, "learning_rate": 3.439760958236679e-05, "loss": 0.21020326614379883, "step": 153150 }, { "epoch": 0.6575478907464173, "grad_norm": 1.977399230003357, "learning_rate": 3.439329786224917e-05, "loss": 0.10796371698379517, "step": 153160 }, { "epoch": 0.6575908228364373, "grad_norm": 0.144552081823349, "learning_rate": 3.4388986142131546e-05, "loss": 0.234502911567688, "step": 153170 }, { "epoch": 0.6576337549264574, "grad_norm": 3.9625911712646484, "learning_rate": 3.438467442201392e-05, "loss": 0.45007548332214353, "step": 153180 }, { "epoch": 0.6576766870164773, "grad_norm": 0.006828643381595612, "learning_rate": 3.4380362701896294e-05, "loss": 0.22843713760375978, "step": 153190 }, { "epoch": 0.6577196191064973, "grad_norm": 5.773648262023926, "learning_rate": 3.437605098177867e-05, "loss": 0.27328267097473147, "step": 153200 }, { "epoch": 0.6577625511965174, "grad_norm": 0.2885953187942505, "learning_rate": 3.437173926166105e-05, "loss": 0.3828735828399658, "step": 153210 }, { "epoch": 0.6578054832865373, "grad_norm": 0.6968337893486023, "learning_rate": 3.4367427541543426e-05, "loss": 0.2467254877090454, "step": 153220 }, { "epoch": 0.6578484153765574, "grad_norm": 0.043182846158742905, "learning_rate": 3.4363115821425804e-05, "loss": 0.10045549869537354, "step": 153230 }, { "epoch": 0.6578913474665774, "grad_norm": 6.219958782196045, "learning_rate": 3.435880410130818e-05, "loss": 0.2550558805465698, "step": 153240 }, { "epoch": 0.6579342795565973, "grad_norm": 7.405364513397217, "learning_rate": 3.435449238119056e-05, "loss": 0.193221116065979, "step": 153250 }, { "epoch": 0.6579772116466174, "grad_norm": 0.7110088467597961, "learning_rate": 3.435018066107293e-05, "loss": 0.1556612491607666, "step": 153260 }, { "epoch": 0.6580201437366374, "grad_norm": 1.2682826519012451, "learning_rate": 3.4345868940955306e-05, "loss": 0.3977441072463989, "step": 153270 }, { "epoch": 0.6580630758266574, "grad_norm": 0.35356810688972473, "learning_rate": 3.4341557220837684e-05, "loss": 0.2715872526168823, "step": 153280 }, { "epoch": 0.6581060079166774, "grad_norm": 0.04433917999267578, "learning_rate": 3.433724550072006e-05, "loss": 0.14817949533462524, "step": 153290 }, { "epoch": 0.6581489400066974, "grad_norm": 0.39730677008628845, "learning_rate": 3.433293378060243e-05, "loss": 0.2873368263244629, "step": 153300 }, { "epoch": 0.6581918720967174, "grad_norm": 0.3087719976902008, "learning_rate": 3.432862206048481e-05, "loss": 0.1608600616455078, "step": 153310 }, { "epoch": 0.6582348041867374, "grad_norm": 1.1279784440994263, "learning_rate": 3.4324310340367186e-05, "loss": 0.10123729705810547, "step": 153320 }, { "epoch": 0.6582777362767575, "grad_norm": 0.03151925280690193, "learning_rate": 3.4319998620249564e-05, "loss": 0.1971789002418518, "step": 153330 }, { "epoch": 0.6583206683667774, "grad_norm": 3.2033753395080566, "learning_rate": 3.431568690013194e-05, "loss": 0.16672835350036622, "step": 153340 }, { "epoch": 0.6583636004567974, "grad_norm": 0.046538546681404114, "learning_rate": 3.431137518001432e-05, "loss": 0.03843323886394501, "step": 153350 }, { "epoch": 0.6584065325468175, "grad_norm": 0.5804786682128906, "learning_rate": 3.4307063459896696e-05, "loss": 0.11106359958648682, "step": 153360 }, { "epoch": 0.6584494646368374, "grad_norm": 3.5863006114959717, "learning_rate": 3.430275173977907e-05, "loss": 0.22137126922607422, "step": 153370 }, { "epoch": 0.6584923967268574, "grad_norm": 4.670158386230469, "learning_rate": 3.4298440019661444e-05, "loss": 0.41120376586914065, "step": 153380 }, { "epoch": 0.6585353288168775, "grad_norm": 0.27056336402893066, "learning_rate": 3.429412829954382e-05, "loss": 0.2656059980392456, "step": 153390 }, { "epoch": 0.6585782609068974, "grad_norm": 0.010328982025384903, "learning_rate": 3.42898165794262e-05, "loss": 0.1735626220703125, "step": 153400 }, { "epoch": 0.6586211929969175, "grad_norm": 1.4152470827102661, "learning_rate": 3.4285504859308576e-05, "loss": 0.14489773511886597, "step": 153410 }, { "epoch": 0.6586641250869375, "grad_norm": 0.0291027519851923, "learning_rate": 3.4281193139190946e-05, "loss": 0.07788435220718384, "step": 153420 }, { "epoch": 0.6587070571769574, "grad_norm": 0.8220972418785095, "learning_rate": 3.4276881419073323e-05, "loss": 0.33536629676818847, "step": 153430 }, { "epoch": 0.6587499892669775, "grad_norm": 0.007435683626681566, "learning_rate": 3.42725696989557e-05, "loss": 0.19981803894042968, "step": 153440 }, { "epoch": 0.6587929213569975, "grad_norm": 0.043424610048532486, "learning_rate": 3.426825797883808e-05, "loss": 0.21512467861175538, "step": 153450 }, { "epoch": 0.6588358534470176, "grad_norm": 0.22515398263931274, "learning_rate": 3.4263946258720455e-05, "loss": 0.2515620946884155, "step": 153460 }, { "epoch": 0.6588787855370375, "grad_norm": 1.116879940032959, "learning_rate": 3.425963453860283e-05, "loss": 0.38285002708435056, "step": 153470 }, { "epoch": 0.6589217176270575, "grad_norm": 2.415517568588257, "learning_rate": 3.425532281848521e-05, "loss": 0.136501681804657, "step": 153480 }, { "epoch": 0.6589646497170776, "grad_norm": 1.7895293235778809, "learning_rate": 3.425101109836759e-05, "loss": 0.3027945995330811, "step": 153490 }, { "epoch": 0.6590075818070975, "grad_norm": 0.0065894764848053455, "learning_rate": 3.4246699378249965e-05, "loss": 0.016795614361763002, "step": 153500 }, { "epoch": 0.6590505138971176, "grad_norm": 0.12253738194704056, "learning_rate": 3.4242387658132335e-05, "loss": 0.1453849196434021, "step": 153510 }, { "epoch": 0.6590934459871376, "grad_norm": 0.04784093052148819, "learning_rate": 3.423807593801471e-05, "loss": 0.21482656002044678, "step": 153520 }, { "epoch": 0.6591363780771575, "grad_norm": 0.014918447472155094, "learning_rate": 3.423376421789709e-05, "loss": 0.18660422563552856, "step": 153530 }, { "epoch": 0.6591793101671776, "grad_norm": 0.23122480511665344, "learning_rate": 3.422945249777947e-05, "loss": 0.0963529109954834, "step": 153540 }, { "epoch": 0.6592222422571976, "grad_norm": 1.2685788869857788, "learning_rate": 3.422514077766184e-05, "loss": 0.2472707748413086, "step": 153550 }, { "epoch": 0.6592651743472175, "grad_norm": 0.004079438280314207, "learning_rate": 3.4220829057544215e-05, "loss": 0.49393181800842284, "step": 153560 }, { "epoch": 0.6593081064372376, "grad_norm": 0.006530522368848324, "learning_rate": 3.421651733742659e-05, "loss": 0.09846093654632568, "step": 153570 }, { "epoch": 0.6593510385272576, "grad_norm": 0.015483057126402855, "learning_rate": 3.421220561730897e-05, "loss": 0.24493045806884767, "step": 153580 }, { "epoch": 0.6593939706172776, "grad_norm": 0.0026599576231092215, "learning_rate": 3.420789389719135e-05, "loss": 0.14597982168197632, "step": 153590 }, { "epoch": 0.6594369027072976, "grad_norm": 0.002732135122641921, "learning_rate": 3.4203582177073725e-05, "loss": 0.1602466106414795, "step": 153600 }, { "epoch": 0.6594798347973176, "grad_norm": 0.027440810576081276, "learning_rate": 3.41992704569561e-05, "loss": 0.14492573738098144, "step": 153610 }, { "epoch": 0.6595227668873376, "grad_norm": 1.5758382081985474, "learning_rate": 3.419495873683848e-05, "loss": 0.17967160940170288, "step": 153620 }, { "epoch": 0.6595656989773576, "grad_norm": 15.669425010681152, "learning_rate": 3.419064701672085e-05, "loss": 0.38480756282806394, "step": 153630 }, { "epoch": 0.6596086310673777, "grad_norm": 0.011733450926840305, "learning_rate": 3.418633529660323e-05, "loss": 0.16886916160583496, "step": 153640 }, { "epoch": 0.6596515631573976, "grad_norm": 0.05485690385103226, "learning_rate": 3.4182023576485605e-05, "loss": 0.1938277006149292, "step": 153650 }, { "epoch": 0.6596944952474176, "grad_norm": 1.3380587100982666, "learning_rate": 3.417771185636798e-05, "loss": 0.23921914100646974, "step": 153660 }, { "epoch": 0.6597374273374377, "grad_norm": 7.73290491104126, "learning_rate": 3.417340013625035e-05, "loss": 0.2546802043914795, "step": 153670 }, { "epoch": 0.6597803594274576, "grad_norm": 0.003008150029927492, "learning_rate": 3.416908841613273e-05, "loss": 0.17785210609436036, "step": 153680 }, { "epoch": 0.6598232915174777, "grad_norm": 0.9172478914260864, "learning_rate": 3.416477669601511e-05, "loss": 0.2208587646484375, "step": 153690 }, { "epoch": 0.6598662236074977, "grad_norm": 13.522445678710938, "learning_rate": 3.416046497589749e-05, "loss": 0.15731308460235596, "step": 153700 }, { "epoch": 0.6599091556975176, "grad_norm": 0.012087918817996979, "learning_rate": 3.415615325577986e-05, "loss": 0.24291951656341554, "step": 153710 }, { "epoch": 0.6599520877875377, "grad_norm": 0.011719837784767151, "learning_rate": 3.415184153566224e-05, "loss": 0.4827817440032959, "step": 153720 }, { "epoch": 0.6599950198775577, "grad_norm": 7.859041213989258, "learning_rate": 3.414752981554462e-05, "loss": 0.3543118953704834, "step": 153730 }, { "epoch": 0.6600379519675776, "grad_norm": 2.688401460647583, "learning_rate": 3.4143218095426994e-05, "loss": 0.1372079849243164, "step": 153740 }, { "epoch": 0.6600808840575977, "grad_norm": 8.203360557556152, "learning_rate": 3.4138906375309365e-05, "loss": 0.21513383388519286, "step": 153750 }, { "epoch": 0.6601238161476177, "grad_norm": 1.9034103155136108, "learning_rate": 3.413459465519174e-05, "loss": 0.1283172607421875, "step": 153760 }, { "epoch": 0.6601667482376377, "grad_norm": 1.7265187501907349, "learning_rate": 3.413028293507412e-05, "loss": 0.22327690124511718, "step": 153770 }, { "epoch": 0.6602096803276577, "grad_norm": 2.916471242904663, "learning_rate": 3.4125971214956497e-05, "loss": 0.35086472034454347, "step": 153780 }, { "epoch": 0.6602526124176777, "grad_norm": 0.002500958973541856, "learning_rate": 3.412165949483887e-05, "loss": 0.007671752572059631, "step": 153790 }, { "epoch": 0.6602955445076977, "grad_norm": 0.0166263859719038, "learning_rate": 3.4117347774721244e-05, "loss": 0.1450344443321228, "step": 153800 }, { "epoch": 0.6603384765977177, "grad_norm": 0.02013245038688183, "learning_rate": 3.411303605460363e-05, "loss": 0.14859265089035034, "step": 153810 }, { "epoch": 0.6603814086877378, "grad_norm": 0.07011377066373825, "learning_rate": 3.4108724334486006e-05, "loss": 0.00771598219871521, "step": 153820 }, { "epoch": 0.6604243407777577, "grad_norm": 0.023680120706558228, "learning_rate": 3.4104412614368377e-05, "loss": 0.25690062046051027, "step": 153830 }, { "epoch": 0.6604672728677777, "grad_norm": 0.47995421290397644, "learning_rate": 3.4100100894250754e-05, "loss": 0.261479115486145, "step": 153840 }, { "epoch": 0.6605102049577978, "grad_norm": 1.021770715713501, "learning_rate": 3.409578917413313e-05, "loss": 0.17420082092285155, "step": 153850 }, { "epoch": 0.6605531370478177, "grad_norm": 0.33425313234329224, "learning_rate": 3.409147745401551e-05, "loss": 0.21330676078796387, "step": 153860 }, { "epoch": 0.6605960691378377, "grad_norm": 0.009500543586909771, "learning_rate": 3.4087165733897886e-05, "loss": 0.31008138656616213, "step": 153870 }, { "epoch": 0.6606390012278578, "grad_norm": 0.0007844572537578642, "learning_rate": 3.4082854013780256e-05, "loss": 0.23995554447174072, "step": 153880 }, { "epoch": 0.6606819333178778, "grad_norm": 0.091548390686512, "learning_rate": 3.4078542293662634e-05, "loss": 0.18022990226745605, "step": 153890 }, { "epoch": 0.6607248654078978, "grad_norm": 3.5057220458984375, "learning_rate": 3.407423057354501e-05, "loss": 0.16820919513702393, "step": 153900 }, { "epoch": 0.6607677974979178, "grad_norm": 0.03062303550541401, "learning_rate": 3.406991885342739e-05, "loss": 0.054902291297912596, "step": 153910 }, { "epoch": 0.6608107295879379, "grad_norm": 2.594014883041382, "learning_rate": 3.4065607133309766e-05, "loss": 0.036429685354232785, "step": 153920 }, { "epoch": 0.6608536616779578, "grad_norm": 0.20943856239318848, "learning_rate": 3.406129541319214e-05, "loss": 0.2615687608718872, "step": 153930 }, { "epoch": 0.6608965937679778, "grad_norm": 0.012907395139336586, "learning_rate": 3.405698369307452e-05, "loss": 0.19143770933151244, "step": 153940 }, { "epoch": 0.6609395258579979, "grad_norm": 1.944933533668518, "learning_rate": 3.40526719729569e-05, "loss": 0.1128533959388733, "step": 153950 }, { "epoch": 0.6609824579480178, "grad_norm": 0.04854433983564377, "learning_rate": 3.404836025283927e-05, "loss": 0.14492179155349733, "step": 153960 }, { "epoch": 0.6610253900380378, "grad_norm": 0.0480261892080307, "learning_rate": 3.4044048532721646e-05, "loss": 0.07904550433158875, "step": 153970 }, { "epoch": 0.6610683221280579, "grad_norm": 0.0033766271080821753, "learning_rate": 3.403973681260402e-05, "loss": 0.061150580644607544, "step": 153980 }, { "epoch": 0.6611112542180778, "grad_norm": 0.29395806789398193, "learning_rate": 3.40354250924864e-05, "loss": 0.3163429021835327, "step": 153990 }, { "epoch": 0.6611541863080979, "grad_norm": 0.004031799267977476, "learning_rate": 3.403111337236877e-05, "loss": 0.19167883396148683, "step": 154000 }, { "epoch": 0.6611541863080979, "eval_loss": 0.40580880641937256, "eval_runtime": 27.197, "eval_samples_per_second": 3.677, "eval_steps_per_second": 3.677, "step": 154000 }, { "epoch": 0.6611971183981179, "grad_norm": 0.5312119722366333, "learning_rate": 3.402680165225115e-05, "loss": 0.1988541841506958, "step": 154010 }, { "epoch": 0.6612400504881378, "grad_norm": 1.5345256328582764, "learning_rate": 3.4022489932133526e-05, "loss": 0.2834207534790039, "step": 154020 }, { "epoch": 0.6612829825781579, "grad_norm": 0.19601844251155853, "learning_rate": 3.40181782120159e-05, "loss": 0.14888355731964112, "step": 154030 }, { "epoch": 0.6613259146681779, "grad_norm": 0.00023016732302494347, "learning_rate": 3.401386649189828e-05, "loss": 0.20627593994140625, "step": 154040 }, { "epoch": 0.6613688467581978, "grad_norm": 0.24135711789131165, "learning_rate": 3.400955477178066e-05, "loss": 0.2646329402923584, "step": 154050 }, { "epoch": 0.6614117788482179, "grad_norm": 1.4794398546218872, "learning_rate": 3.4005243051663035e-05, "loss": 0.3707741737365723, "step": 154060 }, { "epoch": 0.6614547109382379, "grad_norm": 0.36191725730895996, "learning_rate": 3.400093133154541e-05, "loss": 0.23887648582458496, "step": 154070 }, { "epoch": 0.6614976430282579, "grad_norm": 0.01881462335586548, "learning_rate": 3.399661961142778e-05, "loss": 0.293936562538147, "step": 154080 }, { "epoch": 0.6615405751182779, "grad_norm": 0.14759767055511475, "learning_rate": 3.399230789131016e-05, "loss": 0.07919049859046937, "step": 154090 }, { "epoch": 0.661583507208298, "grad_norm": 0.9646201133728027, "learning_rate": 3.398799617119254e-05, "loss": 0.40249104499816896, "step": 154100 }, { "epoch": 0.6616264392983179, "grad_norm": 0.00739828497171402, "learning_rate": 3.3983684451074915e-05, "loss": 0.3890352725982666, "step": 154110 }, { "epoch": 0.6616693713883379, "grad_norm": 0.018785202875733376, "learning_rate": 3.3979372730957286e-05, "loss": 0.10795661211013793, "step": 154120 }, { "epoch": 0.661712303478358, "grad_norm": 0.07079663127660751, "learning_rate": 3.397506101083966e-05, "loss": 0.2482135057449341, "step": 154130 }, { "epoch": 0.6617552355683779, "grad_norm": 0.3511267304420471, "learning_rate": 3.397074929072204e-05, "loss": 0.2015997886657715, "step": 154140 }, { "epoch": 0.6617981676583979, "grad_norm": 0.005133177153766155, "learning_rate": 3.396643757060442e-05, "loss": 0.29262471199035645, "step": 154150 }, { "epoch": 0.661841099748418, "grad_norm": 0.030422838404774666, "learning_rate": 3.3962125850486795e-05, "loss": 0.25519423484802245, "step": 154160 }, { "epoch": 0.6618840318384379, "grad_norm": 0.9170808792114258, "learning_rate": 3.395781413036917e-05, "loss": 0.20135126113891602, "step": 154170 }, { "epoch": 0.661926963928458, "grad_norm": 1.5381137132644653, "learning_rate": 3.395350241025155e-05, "loss": 0.25586743354797364, "step": 154180 }, { "epoch": 0.661969896018478, "grad_norm": 0.004393408540636301, "learning_rate": 3.394919069013393e-05, "loss": 0.09318647980690002, "step": 154190 }, { "epoch": 0.6620128281084979, "grad_norm": 0.046787384897470474, "learning_rate": 3.39448789700163e-05, "loss": 0.15083513259887696, "step": 154200 }, { "epoch": 0.662055760198518, "grad_norm": 0.5503677725791931, "learning_rate": 3.3940567249898675e-05, "loss": 0.04096742570400238, "step": 154210 }, { "epoch": 0.662098692288538, "grad_norm": 2.4673948287963867, "learning_rate": 3.393625552978105e-05, "loss": 0.20666275024414063, "step": 154220 }, { "epoch": 0.662141624378558, "grad_norm": 0.740168571472168, "learning_rate": 3.393194380966343e-05, "loss": 0.06488361954689026, "step": 154230 }, { "epoch": 0.662184556468578, "grad_norm": 3.7524595260620117, "learning_rate": 3.392763208954581e-05, "loss": 0.31080782413482666, "step": 154240 }, { "epoch": 0.662227488558598, "grad_norm": 3.262477159500122, "learning_rate": 3.392332036942818e-05, "loss": 0.19835619926452636, "step": 154250 }, { "epoch": 0.662270420648618, "grad_norm": 0.24744971096515656, "learning_rate": 3.3919008649310555e-05, "loss": 0.26123824119567873, "step": 154260 }, { "epoch": 0.662313352738638, "grad_norm": 0.019804177805781364, "learning_rate": 3.391469692919293e-05, "loss": 0.1702876567840576, "step": 154270 }, { "epoch": 0.662356284828658, "grad_norm": 0.40778955817222595, "learning_rate": 3.391038520907531e-05, "loss": 0.13165427446365358, "step": 154280 }, { "epoch": 0.662399216918678, "grad_norm": 0.00482986168935895, "learning_rate": 3.390607348895769e-05, "loss": 0.16334230899810792, "step": 154290 }, { "epoch": 0.662442149008698, "grad_norm": 0.011098915711045265, "learning_rate": 3.3901761768840064e-05, "loss": 0.33003227710723876, "step": 154300 }, { "epoch": 0.6624850810987181, "grad_norm": 0.035571079701185226, "learning_rate": 3.389745004872244e-05, "loss": 0.11664698123931885, "step": 154310 }, { "epoch": 0.6625280131887381, "grad_norm": 0.027940262109041214, "learning_rate": 3.389313832860482e-05, "loss": 0.1556524395942688, "step": 154320 }, { "epoch": 0.662570945278758, "grad_norm": 1.5852309465408325, "learning_rate": 3.388882660848719e-05, "loss": 0.35993731021881104, "step": 154330 }, { "epoch": 0.6626138773687781, "grad_norm": 0.4660571217536926, "learning_rate": 3.388451488836957e-05, "loss": 0.16389789581298828, "step": 154340 }, { "epoch": 0.6626568094587981, "grad_norm": 1.251645803451538, "learning_rate": 3.3880203168251944e-05, "loss": 0.12421987056732178, "step": 154350 }, { "epoch": 0.662699741548818, "grad_norm": 1.0762736797332764, "learning_rate": 3.387589144813432e-05, "loss": 0.5422662258148193, "step": 154360 }, { "epoch": 0.6627426736388381, "grad_norm": 1.0706373453140259, "learning_rate": 3.387157972801669e-05, "loss": 0.23794779777526856, "step": 154370 }, { "epoch": 0.6627856057288581, "grad_norm": 0.021342789754271507, "learning_rate": 3.386726800789907e-05, "loss": 0.21014752388000488, "step": 154380 }, { "epoch": 0.6628285378188781, "grad_norm": 0.0008740926277823746, "learning_rate": 3.386295628778145e-05, "loss": 0.08506430983543396, "step": 154390 }, { "epoch": 0.6628714699088981, "grad_norm": 0.11168550699949265, "learning_rate": 3.385864456766383e-05, "loss": 0.26096253395080565, "step": 154400 }, { "epoch": 0.6629144019989182, "grad_norm": 0.041811276227235794, "learning_rate": 3.38543328475462e-05, "loss": 0.11968740224838256, "step": 154410 }, { "epoch": 0.6629573340889381, "grad_norm": 8.693405151367188, "learning_rate": 3.385002112742858e-05, "loss": 0.3119154691696167, "step": 154420 }, { "epoch": 0.6630002661789581, "grad_norm": 0.019545461982488632, "learning_rate": 3.3845709407310956e-05, "loss": 0.12815322875976562, "step": 154430 }, { "epoch": 0.6630431982689782, "grad_norm": 0.01368553377687931, "learning_rate": 3.3841397687193333e-05, "loss": 0.204140043258667, "step": 154440 }, { "epoch": 0.6630861303589981, "grad_norm": 0.00397186353802681, "learning_rate": 3.3837085967075704e-05, "loss": 0.22590651512145996, "step": 154450 }, { "epoch": 0.6631290624490181, "grad_norm": 0.10365869104862213, "learning_rate": 3.383277424695808e-05, "loss": 0.04067247211933136, "step": 154460 }, { "epoch": 0.6631719945390382, "grad_norm": 0.9955406188964844, "learning_rate": 3.382846252684046e-05, "loss": 0.273392391204834, "step": 154470 }, { "epoch": 0.6632149266290581, "grad_norm": 0.017841503024101257, "learning_rate": 3.3824150806722836e-05, "loss": 0.14227255582809448, "step": 154480 }, { "epoch": 0.6632578587190782, "grad_norm": 0.018622983247041702, "learning_rate": 3.3819839086605207e-05, "loss": 0.15459182262420654, "step": 154490 }, { "epoch": 0.6633007908090982, "grad_norm": 1.244162917137146, "learning_rate": 3.3815527366487584e-05, "loss": 0.3782146215438843, "step": 154500 }, { "epoch": 0.6633437228991181, "grad_norm": 0.0622965507209301, "learning_rate": 3.381121564636997e-05, "loss": 0.3015340089797974, "step": 154510 }, { "epoch": 0.6633866549891382, "grad_norm": 19.102006912231445, "learning_rate": 3.3806903926252345e-05, "loss": 0.23232758045196533, "step": 154520 }, { "epoch": 0.6634295870791582, "grad_norm": 0.04483085870742798, "learning_rate": 3.3802592206134716e-05, "loss": 0.23261446952819825, "step": 154530 }, { "epoch": 0.6634725191691782, "grad_norm": 1.1958906650543213, "learning_rate": 3.379828048601709e-05, "loss": 0.23122506141662597, "step": 154540 }, { "epoch": 0.6635154512591982, "grad_norm": 0.018912751227617264, "learning_rate": 3.379396876589947e-05, "loss": 0.2717156410217285, "step": 154550 }, { "epoch": 0.6635583833492182, "grad_norm": 0.008527401834726334, "learning_rate": 3.378965704578185e-05, "loss": 0.12283614873886109, "step": 154560 }, { "epoch": 0.6636013154392382, "grad_norm": 2.4211113452911377, "learning_rate": 3.378534532566422e-05, "loss": 0.43242511749267576, "step": 154570 }, { "epoch": 0.6636442475292582, "grad_norm": 0.01818317361176014, "learning_rate": 3.3781033605546596e-05, "loss": 0.25135438442230223, "step": 154580 }, { "epoch": 0.6636871796192783, "grad_norm": 0.006349280942231417, "learning_rate": 3.377672188542897e-05, "loss": 0.242250657081604, "step": 154590 }, { "epoch": 0.6637301117092982, "grad_norm": 0.003851179266348481, "learning_rate": 3.377241016531135e-05, "loss": 0.17027580738067627, "step": 154600 }, { "epoch": 0.6637730437993182, "grad_norm": 3.806344985961914, "learning_rate": 3.376809844519373e-05, "loss": 0.26419591903686523, "step": 154610 }, { "epoch": 0.6638159758893383, "grad_norm": 5.863420009613037, "learning_rate": 3.3763786725076105e-05, "loss": 0.42422032356262207, "step": 154620 }, { "epoch": 0.6638589079793582, "grad_norm": 0.22056105732917786, "learning_rate": 3.375947500495848e-05, "loss": 0.06590731143951416, "step": 154630 }, { "epoch": 0.6639018400693782, "grad_norm": 1.7405567169189453, "learning_rate": 3.375516328484086e-05, "loss": 0.13169472217559813, "step": 154640 }, { "epoch": 0.6639447721593983, "grad_norm": 0.08242862671613693, "learning_rate": 3.375085156472324e-05, "loss": 0.3194495439529419, "step": 154650 }, { "epoch": 0.6639877042494182, "grad_norm": 0.17024773359298706, "learning_rate": 3.374653984460561e-05, "loss": 0.22560033798217774, "step": 154660 }, { "epoch": 0.6640306363394383, "grad_norm": 0.01813027262687683, "learning_rate": 3.3742228124487985e-05, "loss": 0.33473637104034426, "step": 154670 }, { "epoch": 0.6640735684294583, "grad_norm": 0.1378939300775528, "learning_rate": 3.373791640437036e-05, "loss": 0.18422240018844604, "step": 154680 }, { "epoch": 0.6641165005194782, "grad_norm": 0.44337424635887146, "learning_rate": 3.373360468425274e-05, "loss": 0.12879838943481445, "step": 154690 }, { "epoch": 0.6641594326094983, "grad_norm": 0.04703165963292122, "learning_rate": 3.372929296413511e-05, "loss": 0.1979563355445862, "step": 154700 }, { "epoch": 0.6642023646995183, "grad_norm": 0.0008391539449803531, "learning_rate": 3.372498124401749e-05, "loss": 0.21918249130249023, "step": 154710 }, { "epoch": 0.6642452967895383, "grad_norm": 1.3081825971603394, "learning_rate": 3.3720669523899865e-05, "loss": 0.22604124546051024, "step": 154720 }, { "epoch": 0.6642882288795583, "grad_norm": 0.26419490575790405, "learning_rate": 3.371635780378224e-05, "loss": 0.19358288049697875, "step": 154730 }, { "epoch": 0.6643311609695783, "grad_norm": 0.19384314119815826, "learning_rate": 3.371204608366462e-05, "loss": 0.18703948259353637, "step": 154740 }, { "epoch": 0.6643740930595984, "grad_norm": 0.0002898857928812504, "learning_rate": 3.3707734363547e-05, "loss": 0.23569769859313966, "step": 154750 }, { "epoch": 0.6644170251496183, "grad_norm": 1.9487954378128052, "learning_rate": 3.3703422643429375e-05, "loss": 0.1675286054611206, "step": 154760 }, { "epoch": 0.6644599572396384, "grad_norm": 4.060025691986084, "learning_rate": 3.369911092331175e-05, "loss": 0.2972411155700684, "step": 154770 }, { "epoch": 0.6645028893296584, "grad_norm": 0.18675732612609863, "learning_rate": 3.369479920319412e-05, "loss": 0.2717970609664917, "step": 154780 }, { "epoch": 0.6645458214196783, "grad_norm": 0.0183492973446846, "learning_rate": 3.36904874830765e-05, "loss": 0.16480090618133544, "step": 154790 }, { "epoch": 0.6645887535096984, "grad_norm": 4.149248123168945, "learning_rate": 3.368617576295888e-05, "loss": 0.2041304349899292, "step": 154800 }, { "epoch": 0.6646316855997184, "grad_norm": 0.18096356093883514, "learning_rate": 3.3681864042841254e-05, "loss": 0.14483392238616943, "step": 154810 }, { "epoch": 0.6646746176897383, "grad_norm": 0.04419800266623497, "learning_rate": 3.3677552322723625e-05, "loss": 0.14780722856521605, "step": 154820 }, { "epoch": 0.6647175497797584, "grad_norm": 2.218914747238159, "learning_rate": 3.3673240602606e-05, "loss": 0.39519851207733153, "step": 154830 }, { "epoch": 0.6647604818697784, "grad_norm": 0.037824928760528564, "learning_rate": 3.366892888248838e-05, "loss": 0.1591792106628418, "step": 154840 }, { "epoch": 0.6648034139597984, "grad_norm": 0.018527628853917122, "learning_rate": 3.366461716237076e-05, "loss": 0.14791398048400878, "step": 154850 }, { "epoch": 0.6648463460498184, "grad_norm": 1.1151235103607178, "learning_rate": 3.3660305442253134e-05, "loss": 0.14960010051727296, "step": 154860 }, { "epoch": 0.6648892781398384, "grad_norm": 1.7647981643676758, "learning_rate": 3.365599372213551e-05, "loss": 0.13042131662368775, "step": 154870 }, { "epoch": 0.6649322102298584, "grad_norm": 0.005045225378125906, "learning_rate": 3.365168200201789e-05, "loss": 0.25216450691223147, "step": 154880 }, { "epoch": 0.6649751423198784, "grad_norm": 0.37474727630615234, "learning_rate": 3.3647370281900266e-05, "loss": 0.12212558984756469, "step": 154890 }, { "epoch": 0.6650180744098985, "grad_norm": 0.025088215246796608, "learning_rate": 3.364305856178264e-05, "loss": 0.13682304620742797, "step": 154900 }, { "epoch": 0.6650610064999184, "grad_norm": 1.0522595643997192, "learning_rate": 3.3638746841665014e-05, "loss": 0.1837522029876709, "step": 154910 }, { "epoch": 0.6651039385899384, "grad_norm": 0.06288671493530273, "learning_rate": 3.363443512154739e-05, "loss": 0.19506425857543946, "step": 154920 }, { "epoch": 0.6651468706799585, "grad_norm": 0.042734645307064056, "learning_rate": 3.363012340142977e-05, "loss": 0.15663498640060425, "step": 154930 }, { "epoch": 0.6651898027699784, "grad_norm": 0.15944519639015198, "learning_rate": 3.362581168131214e-05, "loss": 0.2185056686401367, "step": 154940 }, { "epoch": 0.6652327348599985, "grad_norm": 0.002336745150387287, "learning_rate": 3.362149996119452e-05, "loss": 0.25134222507476806, "step": 154950 }, { "epoch": 0.6652756669500185, "grad_norm": 0.17214441299438477, "learning_rate": 3.3617188241076894e-05, "loss": 0.14887669086456298, "step": 154960 }, { "epoch": 0.6653185990400384, "grad_norm": 0.19424983859062195, "learning_rate": 3.361287652095927e-05, "loss": 0.3506969690322876, "step": 154970 }, { "epoch": 0.6653615311300585, "grad_norm": 0.8946022391319275, "learning_rate": 3.360856480084165e-05, "loss": 0.2966755867004395, "step": 154980 }, { "epoch": 0.6654044632200785, "grad_norm": 0.046246446669101715, "learning_rate": 3.3604253080724026e-05, "loss": 0.14898393154144288, "step": 154990 }, { "epoch": 0.6654473953100984, "grad_norm": 3.2097201347351074, "learning_rate": 3.3599941360606404e-05, "loss": 0.25090641975402833, "step": 155000 }, { "epoch": 0.6654473953100984, "eval_loss": 0.40841829776763916, "eval_runtime": 27.1474, "eval_samples_per_second": 3.684, "eval_steps_per_second": 3.684, "step": 155000 }, { "epoch": 0.6654903274001185, "grad_norm": 0.2784558832645416, "learning_rate": 3.359562964048878e-05, "loss": 0.05249403715133667, "step": 155010 }, { "epoch": 0.6655332594901385, "grad_norm": 0.1900515854358673, "learning_rate": 3.359131792037116e-05, "loss": 0.135952889919281, "step": 155020 }, { "epoch": 0.6655761915801585, "grad_norm": 0.12390508502721786, "learning_rate": 3.358700620025353e-05, "loss": 0.13241848945617676, "step": 155030 }, { "epoch": 0.6656191236701785, "grad_norm": 1.9098988771438599, "learning_rate": 3.3582694480135906e-05, "loss": 0.18672947883605956, "step": 155040 }, { "epoch": 0.6656620557601985, "grad_norm": 1.399539828300476, "learning_rate": 3.3578382760018284e-05, "loss": 0.24725022315979003, "step": 155050 }, { "epoch": 0.6657049878502185, "grad_norm": 1.9422625303268433, "learning_rate": 3.357407103990066e-05, "loss": 0.18814780712127685, "step": 155060 }, { "epoch": 0.6657479199402385, "grad_norm": 1.2785696983337402, "learning_rate": 3.356975931978303e-05, "loss": 0.21161410808563233, "step": 155070 }, { "epoch": 0.6657908520302586, "grad_norm": 0.047315359115600586, "learning_rate": 3.356544759966541e-05, "loss": 0.190315580368042, "step": 155080 }, { "epoch": 0.6658337841202785, "grad_norm": 0.07102714478969574, "learning_rate": 3.3561135879547786e-05, "loss": 0.2807221651077271, "step": 155090 }, { "epoch": 0.6658767162102985, "grad_norm": 0.020219087600708008, "learning_rate": 3.3556824159430164e-05, "loss": 0.0993302583694458, "step": 155100 }, { "epoch": 0.6659196483003186, "grad_norm": 0.0007031201384961605, "learning_rate": 3.355251243931254e-05, "loss": 0.2325591564178467, "step": 155110 }, { "epoch": 0.6659625803903385, "grad_norm": 0.025428976863622665, "learning_rate": 3.354820071919492e-05, "loss": 0.3676584243774414, "step": 155120 }, { "epoch": 0.6660055124803586, "grad_norm": 0.05800691992044449, "learning_rate": 3.3543888999077296e-05, "loss": 0.16004191637039183, "step": 155130 }, { "epoch": 0.6660484445703786, "grad_norm": 0.02092166617512703, "learning_rate": 3.353957727895967e-05, "loss": 0.3744916200637817, "step": 155140 }, { "epoch": 0.6660913766603985, "grad_norm": 0.005752600729465485, "learning_rate": 3.3535265558842043e-05, "loss": 0.10523773431777954, "step": 155150 }, { "epoch": 0.6661343087504186, "grad_norm": 1.9601504802703857, "learning_rate": 3.353095383872442e-05, "loss": 0.31460092067718504, "step": 155160 }, { "epoch": 0.6661772408404386, "grad_norm": 0.4158020615577698, "learning_rate": 3.35266421186068e-05, "loss": 0.23941915035247802, "step": 155170 }, { "epoch": 0.6662201729304587, "grad_norm": 0.811876654624939, "learning_rate": 3.3522330398489175e-05, "loss": 0.3740152597427368, "step": 155180 }, { "epoch": 0.6662631050204786, "grad_norm": 16.450340270996094, "learning_rate": 3.3518018678371546e-05, "loss": 0.25375795364379883, "step": 155190 }, { "epoch": 0.6663060371104986, "grad_norm": 0.004651315044611692, "learning_rate": 3.3513706958253923e-05, "loss": 0.236892032623291, "step": 155200 }, { "epoch": 0.6663489692005187, "grad_norm": 0.07660853117704391, "learning_rate": 3.35093952381363e-05, "loss": 0.3181604862213135, "step": 155210 }, { "epoch": 0.6663919012905386, "grad_norm": 0.1353936493396759, "learning_rate": 3.3505083518018685e-05, "loss": 0.3340135097503662, "step": 155220 }, { "epoch": 0.6664348333805586, "grad_norm": 1.6373554468154907, "learning_rate": 3.3500771797901055e-05, "loss": 0.27181158065795896, "step": 155230 }, { "epoch": 0.6664777654705787, "grad_norm": 0.0005052989581599832, "learning_rate": 3.349646007778343e-05, "loss": 0.24267089366912842, "step": 155240 }, { "epoch": 0.6665206975605986, "grad_norm": 0.0029463102109730244, "learning_rate": 3.349214835766581e-05, "loss": 0.2408508539199829, "step": 155250 }, { "epoch": 0.6665636296506187, "grad_norm": 0.013165873475372791, "learning_rate": 3.348783663754819e-05, "loss": 0.24375631809234619, "step": 155260 }, { "epoch": 0.6666065617406387, "grad_norm": 1.2235469818115234, "learning_rate": 3.348352491743056e-05, "loss": 0.25014405250549315, "step": 155270 }, { "epoch": 0.6666494938306586, "grad_norm": 5.4037275314331055, "learning_rate": 3.3479213197312935e-05, "loss": 0.21150333881378175, "step": 155280 }, { "epoch": 0.6666924259206787, "grad_norm": 1.3540924787521362, "learning_rate": 3.347490147719531e-05, "loss": 0.2414700984954834, "step": 155290 }, { "epoch": 0.6667353580106987, "grad_norm": 0.0003013101522810757, "learning_rate": 3.347058975707769e-05, "loss": 0.23213748931884765, "step": 155300 }, { "epoch": 0.6667782901007187, "grad_norm": 2.7998039722442627, "learning_rate": 3.346627803696006e-05, "loss": 0.22673821449279785, "step": 155310 }, { "epoch": 0.6668212221907387, "grad_norm": 0.4135327935218811, "learning_rate": 3.346196631684244e-05, "loss": 0.015942250192165375, "step": 155320 }, { "epoch": 0.6668641542807587, "grad_norm": 0.380653440952301, "learning_rate": 3.345765459672482e-05, "loss": 0.21530394554138182, "step": 155330 }, { "epoch": 0.6669070863707787, "grad_norm": 0.003645789111033082, "learning_rate": 3.34533428766072e-05, "loss": 0.1611285090446472, "step": 155340 }, { "epoch": 0.6669500184607987, "grad_norm": 0.00044938252540305257, "learning_rate": 3.344903115648958e-05, "loss": 0.3208571434020996, "step": 155350 }, { "epoch": 0.6669929505508188, "grad_norm": 0.06646423786878586, "learning_rate": 3.344471943637195e-05, "loss": 0.03411422073841095, "step": 155360 }, { "epoch": 0.6670358826408387, "grad_norm": 0.0701594427227974, "learning_rate": 3.3440407716254325e-05, "loss": 0.18013898134231568, "step": 155370 }, { "epoch": 0.6670788147308587, "grad_norm": 0.015400223433971405, "learning_rate": 3.34360959961367e-05, "loss": 0.16810702085494994, "step": 155380 }, { "epoch": 0.6671217468208788, "grad_norm": 1.3191877603530884, "learning_rate": 3.343178427601908e-05, "loss": 0.028722655773162842, "step": 155390 }, { "epoch": 0.6671646789108987, "grad_norm": 2.3753511905670166, "learning_rate": 3.342747255590145e-05, "loss": 0.2831770420074463, "step": 155400 }, { "epoch": 0.6672076110009187, "grad_norm": 0.5197781920433044, "learning_rate": 3.342316083578383e-05, "loss": 0.4160111427307129, "step": 155410 }, { "epoch": 0.6672505430909388, "grad_norm": 0.0006538841407746077, "learning_rate": 3.3418849115666205e-05, "loss": 0.06287867426872254, "step": 155420 }, { "epoch": 0.6672934751809587, "grad_norm": 0.037650611251592636, "learning_rate": 3.341453739554858e-05, "loss": 0.044908028841018674, "step": 155430 }, { "epoch": 0.6673364072709788, "grad_norm": 0.0031805236358195543, "learning_rate": 3.341022567543096e-05, "loss": 0.059893810749053956, "step": 155440 }, { "epoch": 0.6673793393609988, "grad_norm": 2.588815689086914, "learning_rate": 3.340591395531334e-05, "loss": 0.33464751243591306, "step": 155450 }, { "epoch": 0.6674222714510187, "grad_norm": 0.06854215264320374, "learning_rate": 3.3401602235195714e-05, "loss": 0.12744616270065307, "step": 155460 }, { "epoch": 0.6674652035410388, "grad_norm": 0.002928926143795252, "learning_rate": 3.339729051507809e-05, "loss": 0.2883869409561157, "step": 155470 }, { "epoch": 0.6675081356310588, "grad_norm": 1.0097222328186035, "learning_rate": 3.339297879496046e-05, "loss": 0.1543585777282715, "step": 155480 }, { "epoch": 0.6675510677210788, "grad_norm": 1.2233046293258667, "learning_rate": 3.338866707484284e-05, "loss": 0.16075161695480347, "step": 155490 }, { "epoch": 0.6675939998110988, "grad_norm": 2.7273221015930176, "learning_rate": 3.3384355354725217e-05, "loss": 0.2358945608139038, "step": 155500 }, { "epoch": 0.6676369319011188, "grad_norm": 0.0005102035938762128, "learning_rate": 3.3380043634607594e-05, "loss": 0.07890783548355103, "step": 155510 }, { "epoch": 0.6676798639911388, "grad_norm": 0.056222833693027496, "learning_rate": 3.3375731914489965e-05, "loss": 0.19825621843338012, "step": 155520 }, { "epoch": 0.6677227960811588, "grad_norm": 0.0347059927880764, "learning_rate": 3.337142019437234e-05, "loss": 0.09248555302619935, "step": 155530 }, { "epoch": 0.6677657281711789, "grad_norm": 0.19800904393196106, "learning_rate": 3.336710847425472e-05, "loss": 0.11048685312271118, "step": 155540 }, { "epoch": 0.6678086602611988, "grad_norm": 3.2383038997650146, "learning_rate": 3.3362796754137097e-05, "loss": 0.24872050285339356, "step": 155550 }, { "epoch": 0.6678515923512188, "grad_norm": 0.050600819289684296, "learning_rate": 3.3358485034019474e-05, "loss": 0.39221649169921874, "step": 155560 }, { "epoch": 0.6678945244412389, "grad_norm": 0.08034029603004456, "learning_rate": 3.335417331390185e-05, "loss": 0.2349752426147461, "step": 155570 }, { "epoch": 0.6679374565312588, "grad_norm": 0.002499277936294675, "learning_rate": 3.334986159378423e-05, "loss": 0.007202136516571045, "step": 155580 }, { "epoch": 0.6679803886212788, "grad_norm": 2.0072579383850098, "learning_rate": 3.3345549873666606e-05, "loss": 0.4786830902099609, "step": 155590 }, { "epoch": 0.6680233207112989, "grad_norm": 0.13278234004974365, "learning_rate": 3.3341238153548976e-05, "loss": 0.04938144087791443, "step": 155600 }, { "epoch": 0.6680662528013189, "grad_norm": 2.5657100677490234, "learning_rate": 3.3336926433431354e-05, "loss": 0.10031435489654542, "step": 155610 }, { "epoch": 0.6681091848913389, "grad_norm": 0.006575672887265682, "learning_rate": 3.333261471331373e-05, "loss": 0.199531090259552, "step": 155620 }, { "epoch": 0.6681521169813589, "grad_norm": 0.0015442880103364587, "learning_rate": 3.332830299319611e-05, "loss": 0.25533859729766845, "step": 155630 }, { "epoch": 0.6681950490713789, "grad_norm": 0.11753270030021667, "learning_rate": 3.332399127307848e-05, "loss": 0.23512325286865235, "step": 155640 }, { "epoch": 0.6682379811613989, "grad_norm": 0.043603476136922836, "learning_rate": 3.3319679552960856e-05, "loss": 0.2509476661682129, "step": 155650 }, { "epoch": 0.6682809132514189, "grad_norm": 1.2565324306488037, "learning_rate": 3.3315367832843234e-05, "loss": 0.2644261598587036, "step": 155660 }, { "epoch": 0.668323845341439, "grad_norm": 0.9435896277427673, "learning_rate": 3.331105611272561e-05, "loss": 0.3087204933166504, "step": 155670 }, { "epoch": 0.6683667774314589, "grad_norm": 0.005109146703034639, "learning_rate": 3.330674439260799e-05, "loss": 0.30349709987640383, "step": 155680 }, { "epoch": 0.6684097095214789, "grad_norm": 0.23565086722373962, "learning_rate": 3.3302432672490366e-05, "loss": 0.033765727281570436, "step": 155690 }, { "epoch": 0.668452641611499, "grad_norm": 0.012045781128108501, "learning_rate": 3.329812095237274e-05, "loss": 0.32736949920654296, "step": 155700 }, { "epoch": 0.6684955737015189, "grad_norm": 0.003710044315084815, "learning_rate": 3.329380923225512e-05, "loss": 0.12195107936859131, "step": 155710 }, { "epoch": 0.668538505791539, "grad_norm": 0.02557937614619732, "learning_rate": 3.32894975121375e-05, "loss": 0.4231997013092041, "step": 155720 }, { "epoch": 0.668581437881559, "grad_norm": 0.06147534027695656, "learning_rate": 3.328518579201987e-05, "loss": 0.27741050720214844, "step": 155730 }, { "epoch": 0.6686243699715789, "grad_norm": 0.031692177057266235, "learning_rate": 3.3280874071902246e-05, "loss": 0.12817578315734862, "step": 155740 }, { "epoch": 0.668667302061599, "grad_norm": 12.631524085998535, "learning_rate": 3.327656235178462e-05, "loss": 0.19571025371551515, "step": 155750 }, { "epoch": 0.668710234151619, "grad_norm": 2.5644569396972656, "learning_rate": 3.3272250631667e-05, "loss": 0.21142849922180176, "step": 155760 }, { "epoch": 0.6687531662416389, "grad_norm": 2.2528250217437744, "learning_rate": 3.326793891154937e-05, "loss": 0.11066330671310425, "step": 155770 }, { "epoch": 0.668796098331659, "grad_norm": 0.1499422937631607, "learning_rate": 3.326362719143175e-05, "loss": 0.14271847009658814, "step": 155780 }, { "epoch": 0.668839030421679, "grad_norm": 0.07106486707925797, "learning_rate": 3.3259315471314126e-05, "loss": 0.03233225643634796, "step": 155790 }, { "epoch": 0.668881962511699, "grad_norm": 0.09190256148576736, "learning_rate": 3.32550037511965e-05, "loss": 0.20338826179504393, "step": 155800 }, { "epoch": 0.668924894601719, "grad_norm": 0.26690569519996643, "learning_rate": 3.325069203107888e-05, "loss": 0.19002293348312377, "step": 155810 }, { "epoch": 0.668967826691739, "grad_norm": 2.454503297805786, "learning_rate": 3.324638031096126e-05, "loss": 0.3581833839416504, "step": 155820 }, { "epoch": 0.669010758781759, "grad_norm": 6.0999755859375, "learning_rate": 3.3242068590843635e-05, "loss": 0.3228771686553955, "step": 155830 }, { "epoch": 0.669053690871779, "grad_norm": 0.006542257498949766, "learning_rate": 3.323775687072601e-05, "loss": 0.21552627086639403, "step": 155840 }, { "epoch": 0.6690966229617991, "grad_norm": 1.2118092775344849, "learning_rate": 3.323344515060838e-05, "loss": 0.26827244758605956, "step": 155850 }, { "epoch": 0.669139555051819, "grad_norm": 0.0797976404428482, "learning_rate": 3.322913343049076e-05, "loss": 0.22456207275390624, "step": 155860 }, { "epoch": 0.669182487141839, "grad_norm": 0.2964301109313965, "learning_rate": 3.322482171037314e-05, "loss": 0.2766679048538208, "step": 155870 }, { "epoch": 0.6692254192318591, "grad_norm": 3.1502134799957275, "learning_rate": 3.3220509990255515e-05, "loss": 0.36197125911712646, "step": 155880 }, { "epoch": 0.669268351321879, "grad_norm": 0.03810900077223778, "learning_rate": 3.3216198270137886e-05, "loss": 0.14142590761184692, "step": 155890 }, { "epoch": 0.669311283411899, "grad_norm": 2.3641881942749023, "learning_rate": 3.321188655002026e-05, "loss": 0.19493041038513184, "step": 155900 }, { "epoch": 0.6693542155019191, "grad_norm": 42.50188064575195, "learning_rate": 3.320757482990264e-05, "loss": 0.16444485187530516, "step": 155910 }, { "epoch": 0.669397147591939, "grad_norm": 0.01806536316871643, "learning_rate": 3.3203263109785024e-05, "loss": 0.36703317165374755, "step": 155920 }, { "epoch": 0.6694400796819591, "grad_norm": 0.08311089873313904, "learning_rate": 3.3198951389667395e-05, "loss": 0.24496970176696778, "step": 155930 }, { "epoch": 0.6694830117719791, "grad_norm": 0.0521310456097126, "learning_rate": 3.319463966954977e-05, "loss": 0.2650176763534546, "step": 155940 }, { "epoch": 0.669525943861999, "grad_norm": 0.10356919467449188, "learning_rate": 3.319032794943215e-05, "loss": 0.2823972702026367, "step": 155950 }, { "epoch": 0.6695688759520191, "grad_norm": 0.0009236885816790164, "learning_rate": 3.318601622931453e-05, "loss": 0.09366688132286072, "step": 155960 }, { "epoch": 0.6696118080420391, "grad_norm": 0.03426114842295647, "learning_rate": 3.31817045091969e-05, "loss": 0.2280339241027832, "step": 155970 }, { "epoch": 0.6696547401320591, "grad_norm": 0.09179901331663132, "learning_rate": 3.3177392789079275e-05, "loss": 0.17582715749740602, "step": 155980 }, { "epoch": 0.6696976722220791, "grad_norm": 0.04506692662835121, "learning_rate": 3.317308106896165e-05, "loss": 0.08360955119132996, "step": 155990 }, { "epoch": 0.6697406043120991, "grad_norm": 1.34595787525177, "learning_rate": 3.316876934884403e-05, "loss": 0.20075461864471436, "step": 156000 }, { "epoch": 0.6697406043120991, "eval_loss": 0.39436066150665283, "eval_runtime": 27.161, "eval_samples_per_second": 3.682, "eval_steps_per_second": 3.682, "step": 156000 }, { "epoch": 0.6697835364021191, "grad_norm": 1.7346657514572144, "learning_rate": 3.31644576287264e-05, "loss": 0.31329498291015623, "step": 156010 }, { "epoch": 0.6698264684921391, "grad_norm": 0.012260986492037773, "learning_rate": 3.316014590860878e-05, "loss": 0.14117549657821654, "step": 156020 }, { "epoch": 0.6698694005821592, "grad_norm": 0.07373584061861038, "learning_rate": 3.315583418849116e-05, "loss": 0.07356157898902893, "step": 156030 }, { "epoch": 0.6699123326721792, "grad_norm": 1.865148663520813, "learning_rate": 3.315152246837354e-05, "loss": 0.31308043003082275, "step": 156040 }, { "epoch": 0.6699552647621991, "grad_norm": 0.000529519107658416, "learning_rate": 3.3147210748255916e-05, "loss": 0.264267110824585, "step": 156050 }, { "epoch": 0.6699981968522192, "grad_norm": 0.051425471901893616, "learning_rate": 3.314289902813829e-05, "loss": 0.27402348518371583, "step": 156060 }, { "epoch": 0.6700411289422392, "grad_norm": 0.07035046815872192, "learning_rate": 3.3138587308020664e-05, "loss": 0.20741422176361085, "step": 156070 }, { "epoch": 0.6700840610322591, "grad_norm": 0.03581790253520012, "learning_rate": 3.313427558790304e-05, "loss": 0.16281461715698242, "step": 156080 }, { "epoch": 0.6701269931222792, "grad_norm": 1.2740488052368164, "learning_rate": 3.312996386778542e-05, "loss": 0.18876116275787352, "step": 156090 }, { "epoch": 0.6701699252122992, "grad_norm": 1.7908412218093872, "learning_rate": 3.312565214766779e-05, "loss": 0.24298477172851562, "step": 156100 }, { "epoch": 0.6702128573023192, "grad_norm": 0.593454897403717, "learning_rate": 3.312134042755017e-05, "loss": 0.2801332473754883, "step": 156110 }, { "epoch": 0.6702557893923392, "grad_norm": 1.5664280652999878, "learning_rate": 3.3117028707432544e-05, "loss": 0.22960739135742186, "step": 156120 }, { "epoch": 0.6702987214823592, "grad_norm": 0.19571813941001892, "learning_rate": 3.311271698731492e-05, "loss": 0.00823623463511467, "step": 156130 }, { "epoch": 0.6703416535723792, "grad_norm": 0.0967881977558136, "learning_rate": 3.31084052671973e-05, "loss": 0.3439939498901367, "step": 156140 }, { "epoch": 0.6703845856623992, "grad_norm": 1.896458387374878, "learning_rate": 3.3104093547079676e-05, "loss": 0.33332931995391846, "step": 156150 }, { "epoch": 0.6704275177524193, "grad_norm": 0.15472471714019775, "learning_rate": 3.3099781826962053e-05, "loss": 0.006204599142074585, "step": 156160 }, { "epoch": 0.6704704498424392, "grad_norm": 0.8047659397125244, "learning_rate": 3.309547010684443e-05, "loss": 0.07587890028953552, "step": 156170 }, { "epoch": 0.6705133819324592, "grad_norm": 0.0013157215435057878, "learning_rate": 3.30911583867268e-05, "loss": 0.32708396911621096, "step": 156180 }, { "epoch": 0.6705563140224793, "grad_norm": 2.68961501121521, "learning_rate": 3.308684666660918e-05, "loss": 0.15513334274291993, "step": 156190 }, { "epoch": 0.6705992461124992, "grad_norm": 0.21397237479686737, "learning_rate": 3.3082534946491556e-05, "loss": 0.1734054684638977, "step": 156200 }, { "epoch": 0.6706421782025193, "grad_norm": 0.763003408908844, "learning_rate": 3.3078223226373933e-05, "loss": 0.3408956527709961, "step": 156210 }, { "epoch": 0.6706851102925393, "grad_norm": 2.3349876403808594, "learning_rate": 3.3073911506256304e-05, "loss": 0.4397918701171875, "step": 156220 }, { "epoch": 0.6707280423825592, "grad_norm": 0.10397858917713165, "learning_rate": 3.306959978613868e-05, "loss": 0.3239002227783203, "step": 156230 }, { "epoch": 0.6707709744725793, "grad_norm": 0.04382269084453583, "learning_rate": 3.306528806602106e-05, "loss": 0.14653106927871704, "step": 156240 }, { "epoch": 0.6708139065625993, "grad_norm": 0.061937592923641205, "learning_rate": 3.3060976345903436e-05, "loss": 0.10898768901824951, "step": 156250 }, { "epoch": 0.6708568386526192, "grad_norm": 0.19617868959903717, "learning_rate": 3.305666462578581e-05, "loss": 0.041546297073364255, "step": 156260 }, { "epoch": 0.6708997707426393, "grad_norm": 1.3481838703155518, "learning_rate": 3.305235290566819e-05, "loss": 0.04840826690196991, "step": 156270 }, { "epoch": 0.6709427028326593, "grad_norm": 0.003372886683791876, "learning_rate": 3.304804118555057e-05, "loss": 0.034439802169799805, "step": 156280 }, { "epoch": 0.6709856349226793, "grad_norm": 2.043133020401001, "learning_rate": 3.3043729465432945e-05, "loss": 0.48386592864990235, "step": 156290 }, { "epoch": 0.6710285670126993, "grad_norm": 0.006714210379868746, "learning_rate": 3.3039417745315316e-05, "loss": 0.08989254236221314, "step": 156300 }, { "epoch": 0.6710714991027193, "grad_norm": 0.026555676013231277, "learning_rate": 3.303510602519769e-05, "loss": 0.23003294467926025, "step": 156310 }, { "epoch": 0.6711144311927393, "grad_norm": 0.6874704957008362, "learning_rate": 3.303079430508007e-05, "loss": 0.1800214648246765, "step": 156320 }, { "epoch": 0.6711573632827593, "grad_norm": 0.00029299809830263257, "learning_rate": 3.302648258496245e-05, "loss": 0.14955276250839233, "step": 156330 }, { "epoch": 0.6712002953727794, "grad_norm": 0.04590775445103645, "learning_rate": 3.302217086484482e-05, "loss": 0.2385624885559082, "step": 156340 }, { "epoch": 0.6712432274627993, "grad_norm": 0.23930826783180237, "learning_rate": 3.3017859144727196e-05, "loss": 0.15384535789489745, "step": 156350 }, { "epoch": 0.6712861595528193, "grad_norm": 0.03432545065879822, "learning_rate": 3.301354742460957e-05, "loss": 0.2647146463394165, "step": 156360 }, { "epoch": 0.6713290916428394, "grad_norm": 0.3528565466403961, "learning_rate": 3.300923570449195e-05, "loss": 0.15096168518066405, "step": 156370 }, { "epoch": 0.6713720237328593, "grad_norm": 0.0011686889920383692, "learning_rate": 3.300492398437433e-05, "loss": 0.15718039274215698, "step": 156380 }, { "epoch": 0.6714149558228794, "grad_norm": 0.07616129517555237, "learning_rate": 3.3000612264256705e-05, "loss": 0.144598650932312, "step": 156390 }, { "epoch": 0.6714578879128994, "grad_norm": 3.6899969577789307, "learning_rate": 3.299630054413908e-05, "loss": 0.21471176147460938, "step": 156400 }, { "epoch": 0.6715008200029193, "grad_norm": 1.9858283996582031, "learning_rate": 3.299198882402146e-05, "loss": 0.3098663091659546, "step": 156410 }, { "epoch": 0.6715437520929394, "grad_norm": 1.5402371883392334, "learning_rate": 3.298767710390384e-05, "loss": 0.06938784122467041, "step": 156420 }, { "epoch": 0.6715866841829594, "grad_norm": 0.005206105764955282, "learning_rate": 3.298336538378621e-05, "loss": 0.1276816248893738, "step": 156430 }, { "epoch": 0.6716296162729793, "grad_norm": 0.01194666512310505, "learning_rate": 3.2979053663668585e-05, "loss": 0.08454192280769349, "step": 156440 }, { "epoch": 0.6716725483629994, "grad_norm": 0.026871588081121445, "learning_rate": 3.297474194355096e-05, "loss": 0.15202608108520507, "step": 156450 }, { "epoch": 0.6717154804530194, "grad_norm": 0.31858089566230774, "learning_rate": 3.297043022343334e-05, "loss": 0.20315639972686766, "step": 156460 }, { "epoch": 0.6717584125430395, "grad_norm": 0.0001176847072201781, "learning_rate": 3.296611850331571e-05, "loss": 0.0934857428073883, "step": 156470 }, { "epoch": 0.6718013446330594, "grad_norm": 0.00044366047950461507, "learning_rate": 3.296180678319809e-05, "loss": 0.3745831727981567, "step": 156480 }, { "epoch": 0.6718442767230794, "grad_norm": 3.130305051803589, "learning_rate": 3.2957495063080465e-05, "loss": 0.2931442022323608, "step": 156490 }, { "epoch": 0.6718872088130995, "grad_norm": 0.0010793895926326513, "learning_rate": 3.295318334296284e-05, "loss": 0.10714174509048462, "step": 156500 }, { "epoch": 0.6719301409031194, "grad_norm": 1.57980215549469, "learning_rate": 3.294887162284522e-05, "loss": 0.11323796510696411, "step": 156510 }, { "epoch": 0.6719730729931395, "grad_norm": 2.0298821926116943, "learning_rate": 3.29445599027276e-05, "loss": 0.15041579008102418, "step": 156520 }, { "epoch": 0.6720160050831595, "grad_norm": 0.04598362371325493, "learning_rate": 3.2940248182609974e-05, "loss": 0.07781398296356201, "step": 156530 }, { "epoch": 0.6720589371731794, "grad_norm": 0.01166477520018816, "learning_rate": 3.293593646249235e-05, "loss": 0.10332920551300048, "step": 156540 }, { "epoch": 0.6721018692631995, "grad_norm": 0.9563781023025513, "learning_rate": 3.293162474237472e-05, "loss": 0.23160524368286134, "step": 156550 }, { "epoch": 0.6721448013532195, "grad_norm": 2.392322301864624, "learning_rate": 3.29273130222571e-05, "loss": 0.17795255184173583, "step": 156560 }, { "epoch": 0.6721877334432395, "grad_norm": 0.007823570631444454, "learning_rate": 3.292300130213948e-05, "loss": 0.06307123899459839, "step": 156570 }, { "epoch": 0.6722306655332595, "grad_norm": 2.6560440063476562, "learning_rate": 3.2918689582021854e-05, "loss": 0.35802664756774905, "step": 156580 }, { "epoch": 0.6722735976232795, "grad_norm": 2.988377332687378, "learning_rate": 3.2914377861904225e-05, "loss": 0.1856152057647705, "step": 156590 }, { "epoch": 0.6723165297132995, "grad_norm": 0.003383405040949583, "learning_rate": 3.29100661417866e-05, "loss": 0.12469416856765747, "step": 156600 }, { "epoch": 0.6723594618033195, "grad_norm": 3.6829562187194824, "learning_rate": 3.290575442166898e-05, "loss": 0.012735322117805481, "step": 156610 }, { "epoch": 0.6724023938933396, "grad_norm": 2.7931692600250244, "learning_rate": 3.2901442701551364e-05, "loss": 0.2340226173400879, "step": 156620 }, { "epoch": 0.6724453259833595, "grad_norm": 1.3279739618301392, "learning_rate": 3.2897130981433734e-05, "loss": 0.3694106101989746, "step": 156630 }, { "epoch": 0.6724882580733795, "grad_norm": 1.672303557395935, "learning_rate": 3.289281926131611e-05, "loss": 0.08186525702476502, "step": 156640 }, { "epoch": 0.6725311901633996, "grad_norm": 0.005892850458621979, "learning_rate": 3.288850754119849e-05, "loss": 0.33636016845703126, "step": 156650 }, { "epoch": 0.6725741222534195, "grad_norm": 0.17561239004135132, "learning_rate": 3.2884195821080866e-05, "loss": 0.1381277322769165, "step": 156660 }, { "epoch": 0.6726170543434395, "grad_norm": 0.014752616174519062, "learning_rate": 3.287988410096324e-05, "loss": 0.1700663685798645, "step": 156670 }, { "epoch": 0.6726599864334596, "grad_norm": 7.623488903045654, "learning_rate": 3.2875572380845614e-05, "loss": 0.3724008083343506, "step": 156680 }, { "epoch": 0.6727029185234795, "grad_norm": 4.932303428649902, "learning_rate": 3.287126066072799e-05, "loss": 0.3138710975646973, "step": 156690 }, { "epoch": 0.6727458506134996, "grad_norm": 0.005752002354711294, "learning_rate": 3.286694894061037e-05, "loss": 0.11339826583862304, "step": 156700 }, { "epoch": 0.6727887827035196, "grad_norm": 0.06204256787896156, "learning_rate": 3.286263722049274e-05, "loss": 0.21384499073028565, "step": 156710 }, { "epoch": 0.6728317147935395, "grad_norm": 0.004700512159615755, "learning_rate": 3.285832550037512e-05, "loss": 0.13949614763259888, "step": 156720 }, { "epoch": 0.6728746468835596, "grad_norm": 7.897902965545654, "learning_rate": 3.28540137802575e-05, "loss": 0.22153210639953613, "step": 156730 }, { "epoch": 0.6729175789735796, "grad_norm": 0.01706327125430107, "learning_rate": 3.284970206013988e-05, "loss": 0.15281535387039186, "step": 156740 }, { "epoch": 0.6729605110635996, "grad_norm": 11.205368041992188, "learning_rate": 3.284539034002225e-05, "loss": 0.12583911418914795, "step": 156750 }, { "epoch": 0.6730034431536196, "grad_norm": 1.768643856048584, "learning_rate": 3.2841078619904626e-05, "loss": 0.3392418622970581, "step": 156760 }, { "epoch": 0.6730463752436396, "grad_norm": 0.09945378452539444, "learning_rate": 3.2836766899787004e-05, "loss": 0.13553329706192016, "step": 156770 }, { "epoch": 0.6730893073336596, "grad_norm": 0.013488375581800938, "learning_rate": 3.283245517966938e-05, "loss": 0.2017906427383423, "step": 156780 }, { "epoch": 0.6731322394236796, "grad_norm": 0.17563210427761078, "learning_rate": 3.282814345955176e-05, "loss": 0.18367542028427125, "step": 156790 }, { "epoch": 0.6731751715136997, "grad_norm": 2.4380056858062744, "learning_rate": 3.282383173943413e-05, "loss": 0.3962520360946655, "step": 156800 }, { "epoch": 0.6732181036037196, "grad_norm": 1.7807360887527466, "learning_rate": 3.2819520019316506e-05, "loss": 0.17535316944122314, "step": 156810 }, { "epoch": 0.6732610356937396, "grad_norm": 0.03467885032296181, "learning_rate": 3.2815208299198884e-05, "loss": 0.09742856621742249, "step": 156820 }, { "epoch": 0.6733039677837597, "grad_norm": 0.9437536597251892, "learning_rate": 3.281089657908126e-05, "loss": 0.21734330654144288, "step": 156830 }, { "epoch": 0.6733468998737796, "grad_norm": 1.783363699913025, "learning_rate": 3.280658485896364e-05, "loss": 0.28152852058410643, "step": 156840 }, { "epoch": 0.6733898319637996, "grad_norm": 0.009937250055372715, "learning_rate": 3.2802273138846016e-05, "loss": 0.18057806491851808, "step": 156850 }, { "epoch": 0.6734327640538197, "grad_norm": 0.0626106932759285, "learning_rate": 3.279796141872839e-05, "loss": 0.12527823448181152, "step": 156860 }, { "epoch": 0.6734756961438396, "grad_norm": 0.024701496586203575, "learning_rate": 3.279364969861077e-05, "loss": 0.073065185546875, "step": 156870 }, { "epoch": 0.6735186282338597, "grad_norm": 0.019258178770542145, "learning_rate": 3.278933797849314e-05, "loss": 0.03922346830368042, "step": 156880 }, { "epoch": 0.6735615603238797, "grad_norm": 6.451083183288574, "learning_rate": 3.278502625837552e-05, "loss": 0.3139482498168945, "step": 156890 }, { "epoch": 0.6736044924138997, "grad_norm": 2.2330548763275146, "learning_rate": 3.2780714538257896e-05, "loss": 0.20146529674530028, "step": 156900 }, { "epoch": 0.6736474245039197, "grad_norm": 5.211982727050781, "learning_rate": 3.277640281814027e-05, "loss": 0.11265192031860352, "step": 156910 }, { "epoch": 0.6736903565939397, "grad_norm": 0.2216411679983139, "learning_rate": 3.2772091098022643e-05, "loss": 0.10636416673660279, "step": 156920 }, { "epoch": 0.6737332886839598, "grad_norm": 0.01977216824889183, "learning_rate": 3.276777937790502e-05, "loss": 0.11636031866073608, "step": 156930 }, { "epoch": 0.6737762207739797, "grad_norm": 0.037923719733953476, "learning_rate": 3.27634676577874e-05, "loss": 0.16940144300460816, "step": 156940 }, { "epoch": 0.6738191528639997, "grad_norm": 2.47348690032959, "learning_rate": 3.2759155937669775e-05, "loss": 0.43205738067626953, "step": 156950 }, { "epoch": 0.6738620849540198, "grad_norm": 0.2728065252304077, "learning_rate": 3.275484421755215e-05, "loss": 0.23530976772308348, "step": 156960 }, { "epoch": 0.6739050170440397, "grad_norm": 0.0499393604695797, "learning_rate": 3.275053249743453e-05, "loss": 0.14187155961990355, "step": 156970 }, { "epoch": 0.6739479491340598, "grad_norm": 0.022202298045158386, "learning_rate": 3.274622077731691e-05, "loss": 0.20078377723693847, "step": 156980 }, { "epoch": 0.6739908812240798, "grad_norm": 0.006260285619646311, "learning_rate": 3.2741909057199285e-05, "loss": 0.07577277421951294, "step": 156990 }, { "epoch": 0.6740338133140997, "grad_norm": 4.734714508056641, "learning_rate": 3.2737597337081655e-05, "loss": 0.2892103433609009, "step": 157000 }, { "epoch": 0.6740338133140997, "eval_loss": 0.38809940218925476, "eval_runtime": 27.2561, "eval_samples_per_second": 3.669, "eval_steps_per_second": 3.669, "step": 157000 }, { "epoch": 0.6740767454041198, "grad_norm": 0.3949276804924011, "learning_rate": 3.273328561696403e-05, "loss": 0.11092907190322876, "step": 157010 }, { "epoch": 0.6741196774941398, "grad_norm": 0.5621246695518494, "learning_rate": 3.272897389684641e-05, "loss": 0.0834064543247223, "step": 157020 }, { "epoch": 0.6741626095841597, "grad_norm": 2.7552273273468018, "learning_rate": 3.272466217672879e-05, "loss": 0.3163444995880127, "step": 157030 }, { "epoch": 0.6742055416741798, "grad_norm": 5.054752349853516, "learning_rate": 3.272035045661116e-05, "loss": 0.27749512195587156, "step": 157040 }, { "epoch": 0.6742484737641998, "grad_norm": 0.000382843310944736, "learning_rate": 3.2716038736493535e-05, "loss": 0.24705393314361573, "step": 157050 }, { "epoch": 0.6742914058542198, "grad_norm": 0.047599148005247116, "learning_rate": 3.271172701637591e-05, "loss": 0.05157904624938965, "step": 157060 }, { "epoch": 0.6743343379442398, "grad_norm": 0.13552670180797577, "learning_rate": 3.270741529625829e-05, "loss": 0.215932035446167, "step": 157070 }, { "epoch": 0.6743772700342598, "grad_norm": 0.6506958603858948, "learning_rate": 3.270310357614067e-05, "loss": 0.22939252853393555, "step": 157080 }, { "epoch": 0.6744202021242798, "grad_norm": 0.005644662771373987, "learning_rate": 3.2698791856023045e-05, "loss": 0.3055266380310059, "step": 157090 }, { "epoch": 0.6744631342142998, "grad_norm": 0.00816242303699255, "learning_rate": 3.269448013590542e-05, "loss": 0.2139211416244507, "step": 157100 }, { "epoch": 0.6745060663043199, "grad_norm": 0.00204946706071496, "learning_rate": 3.26901684157878e-05, "loss": 0.18893979787826537, "step": 157110 }, { "epoch": 0.6745489983943398, "grad_norm": 0.060986656695604324, "learning_rate": 3.268585669567017e-05, "loss": 0.19850542545318603, "step": 157120 }, { "epoch": 0.6745919304843598, "grad_norm": 0.02972446009516716, "learning_rate": 3.268154497555255e-05, "loss": 0.06751665472984314, "step": 157130 }, { "epoch": 0.6746348625743799, "grad_norm": 2.524022102355957, "learning_rate": 3.2677233255434925e-05, "loss": 0.3849080324172974, "step": 157140 }, { "epoch": 0.6746777946643998, "grad_norm": 0.002453405410051346, "learning_rate": 3.26729215353173e-05, "loss": 0.09670175909996033, "step": 157150 }, { "epoch": 0.6747207267544199, "grad_norm": 2.7271265983581543, "learning_rate": 3.266860981519968e-05, "loss": 0.07853362560272217, "step": 157160 }, { "epoch": 0.6747636588444399, "grad_norm": 2.5940101146698, "learning_rate": 3.266429809508205e-05, "loss": 0.5087200164794922, "step": 157170 }, { "epoch": 0.6748065909344598, "grad_norm": 4.071169853210449, "learning_rate": 3.265998637496443e-05, "loss": 0.1377337694168091, "step": 157180 }, { "epoch": 0.6748495230244799, "grad_norm": 0.012241007760167122, "learning_rate": 3.2655674654846805e-05, "loss": 0.06032915115356445, "step": 157190 }, { "epoch": 0.6748924551144999, "grad_norm": 1.3309521675109863, "learning_rate": 3.265136293472918e-05, "loss": 0.23908510208129882, "step": 157200 }, { "epoch": 0.6749353872045198, "grad_norm": 0.0017846348928287625, "learning_rate": 3.264705121461156e-05, "loss": 0.1398613214492798, "step": 157210 }, { "epoch": 0.6749783192945399, "grad_norm": 2.251039981842041, "learning_rate": 3.264273949449394e-05, "loss": 0.3664930105209351, "step": 157220 }, { "epoch": 0.6750212513845599, "grad_norm": 0.06420256197452545, "learning_rate": 3.2638427774376314e-05, "loss": 0.1947470188140869, "step": 157230 }, { "epoch": 0.6750641834745799, "grad_norm": 0.30117225646972656, "learning_rate": 3.263411605425869e-05, "loss": 0.24840478897094725, "step": 157240 }, { "epoch": 0.6751071155645999, "grad_norm": 0.10018105059862137, "learning_rate": 3.262980433414106e-05, "loss": 0.08070742487907409, "step": 157250 }, { "epoch": 0.67515004765462, "grad_norm": 0.011599382385611534, "learning_rate": 3.262549261402344e-05, "loss": 0.09008162021636963, "step": 157260 }, { "epoch": 0.6751929797446399, "grad_norm": 0.004888639319688082, "learning_rate": 3.2621180893905817e-05, "loss": 0.18968068361282348, "step": 157270 }, { "epoch": 0.6752359118346599, "grad_norm": 0.004041858017444611, "learning_rate": 3.2616869173788194e-05, "loss": 0.27539896965026855, "step": 157280 }, { "epoch": 0.67527884392468, "grad_norm": 1.4639886617660522, "learning_rate": 3.2612557453670564e-05, "loss": 0.25031161308288574, "step": 157290 }, { "epoch": 0.6753217760146999, "grad_norm": 2.1338157653808594, "learning_rate": 3.260824573355294e-05, "loss": 0.15990082025527955, "step": 157300 }, { "epoch": 0.6753647081047199, "grad_norm": 10.418062210083008, "learning_rate": 3.260393401343532e-05, "loss": 0.08970891237258911, "step": 157310 }, { "epoch": 0.67540764019474, "grad_norm": 0.017363833263516426, "learning_rate": 3.2599622293317696e-05, "loss": 0.2596541404724121, "step": 157320 }, { "epoch": 0.67545057228476, "grad_norm": 0.2788954973220825, "learning_rate": 3.2595310573200074e-05, "loss": 0.1514696002006531, "step": 157330 }, { "epoch": 0.67549350437478, "grad_norm": 1.667123556137085, "learning_rate": 3.259099885308245e-05, "loss": 0.15894722938537598, "step": 157340 }, { "epoch": 0.6755364364648, "grad_norm": 0.01323763933032751, "learning_rate": 3.258668713296483e-05, "loss": 0.21982104778289796, "step": 157350 }, { "epoch": 0.67557936855482, "grad_norm": 0.06124216318130493, "learning_rate": 3.2582375412847206e-05, "loss": 0.19194493293762208, "step": 157360 }, { "epoch": 0.67562230064484, "grad_norm": 0.007614783942699432, "learning_rate": 3.2578063692729576e-05, "loss": 0.39401233196258545, "step": 157370 }, { "epoch": 0.67566523273486, "grad_norm": 1.8661552667617798, "learning_rate": 3.2573751972611954e-05, "loss": 0.18967965841293336, "step": 157380 }, { "epoch": 0.67570816482488, "grad_norm": 0.014832602813839912, "learning_rate": 3.256944025249433e-05, "loss": 0.022698092460632324, "step": 157390 }, { "epoch": 0.6757510969149, "grad_norm": 3.17002534866333, "learning_rate": 3.256512853237671e-05, "loss": 0.19420602321624755, "step": 157400 }, { "epoch": 0.67579402900492, "grad_norm": 0.06440910696983337, "learning_rate": 3.256081681225908e-05, "loss": 0.2151397943496704, "step": 157410 }, { "epoch": 0.6758369610949401, "grad_norm": 0.4137822389602661, "learning_rate": 3.2556505092141456e-05, "loss": 0.2239288330078125, "step": 157420 }, { "epoch": 0.67587989318496, "grad_norm": 0.1950918734073639, "learning_rate": 3.2552193372023834e-05, "loss": 0.15644675493240356, "step": 157430 }, { "epoch": 0.67592282527498, "grad_norm": 1.746835470199585, "learning_rate": 3.254788165190622e-05, "loss": 0.2628674030303955, "step": 157440 }, { "epoch": 0.6759657573650001, "grad_norm": 4.402871608734131, "learning_rate": 3.254356993178859e-05, "loss": 0.40744714736938475, "step": 157450 }, { "epoch": 0.67600868945502, "grad_norm": 0.15432725846767426, "learning_rate": 3.2539258211670966e-05, "loss": 0.11367126703262329, "step": 157460 }, { "epoch": 0.6760516215450401, "grad_norm": 0.004775689914822578, "learning_rate": 3.253494649155334e-05, "loss": 0.07618749141693115, "step": 157470 }, { "epoch": 0.6760945536350601, "grad_norm": 0.0029706796631217003, "learning_rate": 3.253063477143572e-05, "loss": 0.3006477355957031, "step": 157480 }, { "epoch": 0.67613748572508, "grad_norm": 4.908012866973877, "learning_rate": 3.252632305131809e-05, "loss": 0.341683554649353, "step": 157490 }, { "epoch": 0.6761804178151001, "grad_norm": 0.03939557820558548, "learning_rate": 3.252201133120047e-05, "loss": 0.11358660459518433, "step": 157500 }, { "epoch": 0.6762233499051201, "grad_norm": 2.5295541286468506, "learning_rate": 3.2517699611082846e-05, "loss": 0.23426496982574463, "step": 157510 }, { "epoch": 0.67626628199514, "grad_norm": 0.016799015924334526, "learning_rate": 3.251338789096522e-05, "loss": 0.23719797134399415, "step": 157520 }, { "epoch": 0.6763092140851601, "grad_norm": 6.704193592071533, "learning_rate": 3.25090761708476e-05, "loss": 0.12964043617248536, "step": 157530 }, { "epoch": 0.6763521461751801, "grad_norm": 0.000664949300698936, "learning_rate": 3.250476445072997e-05, "loss": 0.19647494554519654, "step": 157540 }, { "epoch": 0.6763950782652001, "grad_norm": 0.05981343239545822, "learning_rate": 3.2500452730612355e-05, "loss": 0.15086113214492797, "step": 157550 }, { "epoch": 0.6764380103552201, "grad_norm": 0.008232577703893185, "learning_rate": 3.249614101049473e-05, "loss": 0.20063190460205077, "step": 157560 }, { "epoch": 0.6764809424452402, "grad_norm": 0.0008475049980916083, "learning_rate": 3.249182929037711e-05, "loss": 0.11724573373794556, "step": 157570 }, { "epoch": 0.6765238745352601, "grad_norm": 1.6179267168045044, "learning_rate": 3.248751757025948e-05, "loss": 0.13589624166488648, "step": 157580 }, { "epoch": 0.6765668066252801, "grad_norm": 0.011852581985294819, "learning_rate": 3.248320585014186e-05, "loss": 0.2451772928237915, "step": 157590 }, { "epoch": 0.6766097387153002, "grad_norm": 0.030088340863585472, "learning_rate": 3.2478894130024235e-05, "loss": 0.1790969967842102, "step": 157600 }, { "epoch": 0.6766526708053201, "grad_norm": 0.09731682389974594, "learning_rate": 3.247458240990661e-05, "loss": 0.23853666782379152, "step": 157610 }, { "epoch": 0.6766956028953401, "grad_norm": 0.006340175401419401, "learning_rate": 3.247027068978898e-05, "loss": 0.3634488105773926, "step": 157620 }, { "epoch": 0.6767385349853602, "grad_norm": 5.072446823120117, "learning_rate": 3.246595896967136e-05, "loss": 0.18125200271606445, "step": 157630 }, { "epoch": 0.6767814670753801, "grad_norm": 1.7390445470809937, "learning_rate": 3.246164724955374e-05, "loss": 0.1761768102645874, "step": 157640 }, { "epoch": 0.6768243991654002, "grad_norm": 0.06036270782351494, "learning_rate": 3.2457335529436115e-05, "loss": 0.13214666843414308, "step": 157650 }, { "epoch": 0.6768673312554202, "grad_norm": 1.2427871227264404, "learning_rate": 3.245302380931849e-05, "loss": 0.23174049854278564, "step": 157660 }, { "epoch": 0.6769102633454401, "grad_norm": 0.008723437786102295, "learning_rate": 3.244871208920087e-05, "loss": 0.16431721448898315, "step": 157670 }, { "epoch": 0.6769531954354602, "grad_norm": 0.0026647180784493685, "learning_rate": 3.244440036908325e-05, "loss": 0.25285816192626953, "step": 157680 }, { "epoch": 0.6769961275254802, "grad_norm": 0.01261743251234293, "learning_rate": 3.2440088648965624e-05, "loss": 0.1764390230178833, "step": 157690 }, { "epoch": 0.6770390596155001, "grad_norm": 0.04876869544386864, "learning_rate": 3.2435776928847995e-05, "loss": 0.18486921787261962, "step": 157700 }, { "epoch": 0.6770819917055202, "grad_norm": 0.06611602008342743, "learning_rate": 3.243146520873037e-05, "loss": 0.16170352697372437, "step": 157710 }, { "epoch": 0.6771249237955402, "grad_norm": 0.20194700360298157, "learning_rate": 3.242715348861275e-05, "loss": 0.14869037866592408, "step": 157720 }, { "epoch": 0.6771678558855602, "grad_norm": 0.00841581355780363, "learning_rate": 3.242284176849513e-05, "loss": 0.1633516311645508, "step": 157730 }, { "epoch": 0.6772107879755802, "grad_norm": 0.023939453065395355, "learning_rate": 3.24185300483775e-05, "loss": 0.23998985290527344, "step": 157740 }, { "epoch": 0.6772537200656003, "grad_norm": 0.06220559775829315, "learning_rate": 3.2414218328259875e-05, "loss": 0.12186563014984131, "step": 157750 }, { "epoch": 0.6772966521556203, "grad_norm": 0.10329548269510269, "learning_rate": 3.240990660814225e-05, "loss": 0.059828144311904904, "step": 157760 }, { "epoch": 0.6773395842456402, "grad_norm": 0.0009203555528074503, "learning_rate": 3.240559488802463e-05, "loss": 0.10206557512283325, "step": 157770 }, { "epoch": 0.6773825163356603, "grad_norm": 0.1713915914297104, "learning_rate": 3.240128316790701e-05, "loss": 0.2474388837814331, "step": 157780 }, { "epoch": 0.6774254484256803, "grad_norm": 0.009260360151529312, "learning_rate": 3.2396971447789384e-05, "loss": 0.2259169340133667, "step": 157790 }, { "epoch": 0.6774683805157002, "grad_norm": 0.004235635045915842, "learning_rate": 3.239265972767176e-05, "loss": 0.37403192520141604, "step": 157800 }, { "epoch": 0.6775113126057203, "grad_norm": 8.215943336486816, "learning_rate": 3.238834800755414e-05, "loss": 0.23961963653564453, "step": 157810 }, { "epoch": 0.6775542446957403, "grad_norm": 0.08245151489973068, "learning_rate": 3.238403628743651e-05, "loss": 0.240964674949646, "step": 157820 }, { "epoch": 0.6775971767857603, "grad_norm": 2.935471534729004, "learning_rate": 3.237972456731889e-05, "loss": 0.06698665022850037, "step": 157830 }, { "epoch": 0.6776401088757803, "grad_norm": 8.233935356140137, "learning_rate": 3.2375412847201264e-05, "loss": 0.2231219530105591, "step": 157840 }, { "epoch": 0.6776830409658003, "grad_norm": 0.005562909878790379, "learning_rate": 3.237110112708364e-05, "loss": 0.11405689716339111, "step": 157850 }, { "epoch": 0.6777259730558203, "grad_norm": 0.5877987146377563, "learning_rate": 3.236678940696601e-05, "loss": 0.11020293235778808, "step": 157860 }, { "epoch": 0.6777689051458403, "grad_norm": 0.000158540831762366, "learning_rate": 3.236247768684839e-05, "loss": 0.17420880794525145, "step": 157870 }, { "epoch": 0.6778118372358604, "grad_norm": 0.010848082602024078, "learning_rate": 3.235816596673077e-05, "loss": 0.20020055770874023, "step": 157880 }, { "epoch": 0.6778547693258803, "grad_norm": 0.024501841515302658, "learning_rate": 3.2353854246613144e-05, "loss": 0.19236403703689575, "step": 157890 }, { "epoch": 0.6778977014159003, "grad_norm": 0.08429605513811111, "learning_rate": 3.234954252649552e-05, "loss": 0.07690611481666565, "step": 157900 }, { "epoch": 0.6779406335059204, "grad_norm": 0.006016223691403866, "learning_rate": 3.23452308063779e-05, "loss": 0.22260491847991942, "step": 157910 }, { "epoch": 0.6779835655959403, "grad_norm": 0.17743821442127228, "learning_rate": 3.2340919086260276e-05, "loss": 0.09943286776542663, "step": 157920 }, { "epoch": 0.6780264976859603, "grad_norm": 0.24222442507743835, "learning_rate": 3.2336607366142653e-05, "loss": 0.2702426195144653, "step": 157930 }, { "epoch": 0.6780694297759804, "grad_norm": 0.15780700743198395, "learning_rate": 3.233229564602503e-05, "loss": 0.07441672682762146, "step": 157940 }, { "epoch": 0.6781123618660003, "grad_norm": 1.1437824964523315, "learning_rate": 3.23279839259074e-05, "loss": 0.116599702835083, "step": 157950 }, { "epoch": 0.6781552939560204, "grad_norm": 0.01496734656393528, "learning_rate": 3.232367220578978e-05, "loss": 0.27400927543640136, "step": 157960 }, { "epoch": 0.6781982260460404, "grad_norm": 1.0968358516693115, "learning_rate": 3.2319360485672156e-05, "loss": 0.36545302867889407, "step": 157970 }, { "epoch": 0.6782411581360603, "grad_norm": 2.342289924621582, "learning_rate": 3.231504876555453e-05, "loss": 0.2468874931335449, "step": 157980 }, { "epoch": 0.6782840902260804, "grad_norm": 0.007133483421057463, "learning_rate": 3.2310737045436904e-05, "loss": 0.11157327890396118, "step": 157990 }, { "epoch": 0.6783270223161004, "grad_norm": 0.019744692370295525, "learning_rate": 3.230642532531928e-05, "loss": 0.19470452070236205, "step": 158000 }, { "epoch": 0.6783270223161004, "eval_loss": 0.3861476480960846, "eval_runtime": 27.2611, "eval_samples_per_second": 3.668, "eval_steps_per_second": 3.668, "step": 158000 }, { "epoch": 0.6783699544061204, "grad_norm": 0.47449058294296265, "learning_rate": 3.230211360520166e-05, "loss": 0.030623114109039305, "step": 158010 }, { "epoch": 0.6784128864961404, "grad_norm": 0.3131827116012573, "learning_rate": 3.2297801885084036e-05, "loss": 0.22363739013671874, "step": 158020 }, { "epoch": 0.6784558185861604, "grad_norm": 0.033570338040590286, "learning_rate": 3.229349016496641e-05, "loss": 0.007682143151760102, "step": 158030 }, { "epoch": 0.6784987506761804, "grad_norm": 3.0705337524414062, "learning_rate": 3.228917844484879e-05, "loss": 0.3733278751373291, "step": 158040 }, { "epoch": 0.6785416827662004, "grad_norm": 1.8934067487716675, "learning_rate": 3.228486672473117e-05, "loss": 0.26838254928588867, "step": 158050 }, { "epoch": 0.6785846148562205, "grad_norm": 0.0044560618698596954, "learning_rate": 3.2280555004613545e-05, "loss": 0.08115120530128479, "step": 158060 }, { "epoch": 0.6786275469462404, "grad_norm": 0.0790332779288292, "learning_rate": 3.2276243284495916e-05, "loss": 0.18160150051116944, "step": 158070 }, { "epoch": 0.6786704790362604, "grad_norm": 0.0017745341174304485, "learning_rate": 3.227193156437829e-05, "loss": 0.29369025230407714, "step": 158080 }, { "epoch": 0.6787134111262805, "grad_norm": 1.6305290460586548, "learning_rate": 3.226761984426067e-05, "loss": 0.14866819381713867, "step": 158090 }, { "epoch": 0.6787563432163004, "grad_norm": 10.965187072753906, "learning_rate": 3.226330812414305e-05, "loss": 0.23119993209838868, "step": 158100 }, { "epoch": 0.6787992753063204, "grad_norm": 0.0011797469342127442, "learning_rate": 3.225899640402542e-05, "loss": 0.3503007411956787, "step": 158110 }, { "epoch": 0.6788422073963405, "grad_norm": 3.7308449745178223, "learning_rate": 3.2254684683907796e-05, "loss": 0.23552021980285645, "step": 158120 }, { "epoch": 0.6788851394863604, "grad_norm": 0.07517192512750626, "learning_rate": 3.225037296379017e-05, "loss": 0.37005517482757566, "step": 158130 }, { "epoch": 0.6789280715763805, "grad_norm": 1.7629700899124146, "learning_rate": 3.224606124367256e-05, "loss": 0.4034713268280029, "step": 158140 }, { "epoch": 0.6789710036664005, "grad_norm": 0.6647354364395142, "learning_rate": 3.224174952355493e-05, "loss": 0.2033230781555176, "step": 158150 }, { "epoch": 0.6790139357564204, "grad_norm": 0.19703158736228943, "learning_rate": 3.2237437803437305e-05, "loss": 0.1884385108947754, "step": 158160 }, { "epoch": 0.6790568678464405, "grad_norm": 2.4684970378875732, "learning_rate": 3.223312608331968e-05, "loss": 0.2863666772842407, "step": 158170 }, { "epoch": 0.6790997999364605, "grad_norm": 0.17179302871227264, "learning_rate": 3.222881436320206e-05, "loss": 0.1833629846572876, "step": 158180 }, { "epoch": 0.6791427320264806, "grad_norm": 0.18861034512519836, "learning_rate": 3.222450264308443e-05, "loss": 0.36298208236694335, "step": 158190 }, { "epoch": 0.6791856641165005, "grad_norm": 0.037184134125709534, "learning_rate": 3.222019092296681e-05, "loss": 0.17432732582092286, "step": 158200 }, { "epoch": 0.6792285962065205, "grad_norm": 1.2532052993774414, "learning_rate": 3.2215879202849185e-05, "loss": 0.07648919224739074, "step": 158210 }, { "epoch": 0.6792715282965406, "grad_norm": 0.02248220331966877, "learning_rate": 3.221156748273156e-05, "loss": 0.28692572116851806, "step": 158220 }, { "epoch": 0.6793144603865605, "grad_norm": 1.3784197568893433, "learning_rate": 3.220725576261394e-05, "loss": 0.037394005060195926, "step": 158230 }, { "epoch": 0.6793573924765806, "grad_norm": 0.009938391856849194, "learning_rate": 3.220294404249631e-05, "loss": 0.18498718738555908, "step": 158240 }, { "epoch": 0.6794003245666006, "grad_norm": 0.050672680139541626, "learning_rate": 3.2198632322378695e-05, "loss": 0.2385341167449951, "step": 158250 }, { "epoch": 0.6794432566566205, "grad_norm": 2.427584409713745, "learning_rate": 3.219432060226107e-05, "loss": 0.3927887439727783, "step": 158260 }, { "epoch": 0.6794861887466406, "grad_norm": 0.007242992520332336, "learning_rate": 3.219000888214345e-05, "loss": 0.18903251886367797, "step": 158270 }, { "epoch": 0.6795291208366606, "grad_norm": 2.674262046813965, "learning_rate": 3.218569716202582e-05, "loss": 0.3244601249694824, "step": 158280 }, { "epoch": 0.6795720529266805, "grad_norm": 1.95919930934906, "learning_rate": 3.21813854419082e-05, "loss": 0.27033874988555906, "step": 158290 }, { "epoch": 0.6796149850167006, "grad_norm": 0.0013277186080813408, "learning_rate": 3.2177073721790574e-05, "loss": 0.18444887399673462, "step": 158300 }, { "epoch": 0.6796579171067206, "grad_norm": 0.05387435480952263, "learning_rate": 3.217276200167295e-05, "loss": 0.18853824138641356, "step": 158310 }, { "epoch": 0.6797008491967406, "grad_norm": 0.7749758958816528, "learning_rate": 3.216845028155532e-05, "loss": 0.18170182704925536, "step": 158320 }, { "epoch": 0.6797437812867606, "grad_norm": 5.2914791012881324e-05, "learning_rate": 3.21641385614377e-05, "loss": 0.17878755331039428, "step": 158330 }, { "epoch": 0.6797867133767806, "grad_norm": 2.84869384765625, "learning_rate": 3.215982684132008e-05, "loss": 0.23383090496063233, "step": 158340 }, { "epoch": 0.6798296454668006, "grad_norm": 3.1321818828582764, "learning_rate": 3.2155515121202454e-05, "loss": 0.07024050951004028, "step": 158350 }, { "epoch": 0.6798725775568206, "grad_norm": 8.468925476074219, "learning_rate": 3.215120340108483e-05, "loss": 0.5075342655181885, "step": 158360 }, { "epoch": 0.6799155096468407, "grad_norm": 5.578030109405518, "learning_rate": 3.214689168096721e-05, "loss": 0.4054515838623047, "step": 158370 }, { "epoch": 0.6799584417368606, "grad_norm": 0.7275596857070923, "learning_rate": 3.2142579960849586e-05, "loss": 0.238731050491333, "step": 158380 }, { "epoch": 0.6800013738268806, "grad_norm": 0.0905885100364685, "learning_rate": 3.2138268240731964e-05, "loss": 0.16746869087219238, "step": 158390 }, { "epoch": 0.6800443059169007, "grad_norm": 1.944628357887268, "learning_rate": 3.2133956520614334e-05, "loss": 0.18926804065704345, "step": 158400 }, { "epoch": 0.6800872380069206, "grad_norm": 5.562248229980469, "learning_rate": 3.212964480049671e-05, "loss": 0.4556564807891846, "step": 158410 }, { "epoch": 0.6801301700969407, "grad_norm": 0.008871596306562424, "learning_rate": 3.212533308037909e-05, "loss": 0.19651058912277222, "step": 158420 }, { "epoch": 0.6801731021869607, "grad_norm": 0.017508648335933685, "learning_rate": 3.2121021360261466e-05, "loss": 0.1843596339225769, "step": 158430 }, { "epoch": 0.6802160342769806, "grad_norm": 0.009043751284480095, "learning_rate": 3.211670964014384e-05, "loss": 0.06310344934463501, "step": 158440 }, { "epoch": 0.6802589663670007, "grad_norm": 0.10740286856889725, "learning_rate": 3.2112397920026214e-05, "loss": 0.2981158018112183, "step": 158450 }, { "epoch": 0.6803018984570207, "grad_norm": 0.061360158026218414, "learning_rate": 3.210808619990859e-05, "loss": 0.07388191819190978, "step": 158460 }, { "epoch": 0.6803448305470406, "grad_norm": 0.1522047519683838, "learning_rate": 3.210377447979097e-05, "loss": 0.3216759204864502, "step": 158470 }, { "epoch": 0.6803877626370607, "grad_norm": 0.003306443803012371, "learning_rate": 3.2099462759673346e-05, "loss": 0.04231602251529694, "step": 158480 }, { "epoch": 0.6804306947270807, "grad_norm": 0.002171689411625266, "learning_rate": 3.2095151039555724e-05, "loss": 0.08375156521797181, "step": 158490 }, { "epoch": 0.6804736268171007, "grad_norm": 1.1639153957366943, "learning_rate": 3.20908393194381e-05, "loss": 0.26513402462005614, "step": 158500 }, { "epoch": 0.6805165589071207, "grad_norm": 0.003427459392696619, "learning_rate": 3.208652759932048e-05, "loss": 0.18078973293304443, "step": 158510 }, { "epoch": 0.6805594909971407, "grad_norm": 1.5218772888183594, "learning_rate": 3.208221587920285e-05, "loss": 0.22859303951263427, "step": 158520 }, { "epoch": 0.6806024230871607, "grad_norm": 0.046713389456272125, "learning_rate": 3.2077904159085226e-05, "loss": 0.28482129573822024, "step": 158530 }, { "epoch": 0.6806453551771807, "grad_norm": 0.009020074270665646, "learning_rate": 3.2073592438967604e-05, "loss": 0.16700432300567628, "step": 158540 }, { "epoch": 0.6806882872672008, "grad_norm": 0.12690341472625732, "learning_rate": 3.206928071884998e-05, "loss": 0.17910512685775756, "step": 158550 }, { "epoch": 0.6807312193572207, "grad_norm": 0.9299690127372742, "learning_rate": 3.206496899873235e-05, "loss": 0.1592942714691162, "step": 158560 }, { "epoch": 0.6807741514472407, "grad_norm": 1.0478684902191162, "learning_rate": 3.206065727861473e-05, "loss": 0.17841386795043945, "step": 158570 }, { "epoch": 0.6808170835372608, "grad_norm": 0.0024922348093241453, "learning_rate": 3.2056345558497106e-05, "loss": 0.4945104122161865, "step": 158580 }, { "epoch": 0.6808600156272807, "grad_norm": 9.214879035949707, "learning_rate": 3.2052033838379484e-05, "loss": 0.20899596214294433, "step": 158590 }, { "epoch": 0.6809029477173008, "grad_norm": 4.420780181884766, "learning_rate": 3.204772211826186e-05, "loss": 0.3694831371307373, "step": 158600 }, { "epoch": 0.6809458798073208, "grad_norm": 0.012590976431965828, "learning_rate": 3.204341039814424e-05, "loss": 0.18441904783248902, "step": 158610 }, { "epoch": 0.6809888118973408, "grad_norm": 0.0019417435396462679, "learning_rate": 3.2039098678026616e-05, "loss": 0.2644726514816284, "step": 158620 }, { "epoch": 0.6810317439873608, "grad_norm": 0.4429931938648224, "learning_rate": 3.203478695790899e-05, "loss": 0.34392242431640624, "step": 158630 }, { "epoch": 0.6810746760773808, "grad_norm": 1.5727390050888062, "learning_rate": 3.203047523779137e-05, "loss": 0.23203063011169434, "step": 158640 }, { "epoch": 0.6811176081674009, "grad_norm": 5.987852573394775, "learning_rate": 3.202616351767374e-05, "loss": 0.08132092952728272, "step": 158650 }, { "epoch": 0.6811605402574208, "grad_norm": 1.5135953426361084, "learning_rate": 3.202185179755612e-05, "loss": 0.22478418350219725, "step": 158660 }, { "epoch": 0.6812034723474408, "grad_norm": 7.780333518981934, "learning_rate": 3.2017540077438495e-05, "loss": 0.3361856937408447, "step": 158670 }, { "epoch": 0.6812464044374609, "grad_norm": 0.0940074622631073, "learning_rate": 3.201322835732087e-05, "loss": 0.16245073080062866, "step": 158680 }, { "epoch": 0.6812893365274808, "grad_norm": 4.361866474151611, "learning_rate": 3.2008916637203243e-05, "loss": 0.29499890804290774, "step": 158690 }, { "epoch": 0.6813322686175008, "grad_norm": 0.7107620239257812, "learning_rate": 3.200460491708562e-05, "loss": 0.16014331579208374, "step": 158700 }, { "epoch": 0.6813752007075209, "grad_norm": 0.16708636283874512, "learning_rate": 3.2000293196968e-05, "loss": 0.07247481346130372, "step": 158710 }, { "epoch": 0.6814181327975408, "grad_norm": 0.9394482970237732, "learning_rate": 3.1995981476850375e-05, "loss": 0.06600644588470458, "step": 158720 }, { "epoch": 0.6814610648875609, "grad_norm": 0.15201954543590546, "learning_rate": 3.199166975673275e-05, "loss": 0.0971751093864441, "step": 158730 }, { "epoch": 0.6815039969775809, "grad_norm": 0.19101858139038086, "learning_rate": 3.198735803661513e-05, "loss": 0.056906777620315555, "step": 158740 }, { "epoch": 0.6815469290676008, "grad_norm": 30.30670738220215, "learning_rate": 3.198304631649751e-05, "loss": 0.17296806573867798, "step": 158750 }, { "epoch": 0.6815898611576209, "grad_norm": 0.0005419242079369724, "learning_rate": 3.1978734596379885e-05, "loss": 0.0724807322025299, "step": 158760 }, { "epoch": 0.6816327932476409, "grad_norm": 0.8574475049972534, "learning_rate": 3.1974422876262255e-05, "loss": 0.12815709114074708, "step": 158770 }, { "epoch": 0.6816757253376609, "grad_norm": 0.031873419880867004, "learning_rate": 3.197011115614463e-05, "loss": 0.18840456008911133, "step": 158780 }, { "epoch": 0.6817186574276809, "grad_norm": 0.0829717144370079, "learning_rate": 3.196579943602701e-05, "loss": 0.064266437292099, "step": 158790 }, { "epoch": 0.6817615895177009, "grad_norm": 0.0006163608632050455, "learning_rate": 3.196148771590939e-05, "loss": 0.10775055885314941, "step": 158800 }, { "epoch": 0.6818045216077209, "grad_norm": 0.6611578464508057, "learning_rate": 3.195717599579176e-05, "loss": 0.051128280162811277, "step": 158810 }, { "epoch": 0.6818474536977409, "grad_norm": 2.4335217475891113, "learning_rate": 3.1952864275674135e-05, "loss": 0.262776780128479, "step": 158820 }, { "epoch": 0.681890385787761, "grad_norm": 0.041229743510484695, "learning_rate": 3.194855255555651e-05, "loss": 0.09505161046981811, "step": 158830 }, { "epoch": 0.6819333178777809, "grad_norm": 0.007038755342364311, "learning_rate": 3.194424083543889e-05, "loss": 0.20236282348632811, "step": 158840 }, { "epoch": 0.6819762499678009, "grad_norm": 0.00763944536447525, "learning_rate": 3.193992911532127e-05, "loss": 0.23800394535064698, "step": 158850 }, { "epoch": 0.682019182057821, "grad_norm": 0.0066480184905231, "learning_rate": 3.1935617395203645e-05, "loss": 0.1119657278060913, "step": 158860 }, { "epoch": 0.6820621141478409, "grad_norm": 0.0007088402635417879, "learning_rate": 3.193130567508602e-05, "loss": 0.2503938913345337, "step": 158870 }, { "epoch": 0.682105046237861, "grad_norm": 0.12080457806587219, "learning_rate": 3.19269939549684e-05, "loss": 0.1805747151374817, "step": 158880 }, { "epoch": 0.682147978327881, "grad_norm": 0.06467478722333908, "learning_rate": 3.192268223485077e-05, "loss": 0.13013215065002443, "step": 158890 }, { "epoch": 0.6821909104179009, "grad_norm": 1.5886485576629639, "learning_rate": 3.191837051473315e-05, "loss": 0.3788639783859253, "step": 158900 }, { "epoch": 0.682233842507921, "grad_norm": 0.10984954237937927, "learning_rate": 3.1914058794615525e-05, "loss": 0.14202741384506226, "step": 158910 }, { "epoch": 0.682276774597941, "grad_norm": 6.258497714996338, "learning_rate": 3.19097470744979e-05, "loss": 0.14315991401672362, "step": 158920 }, { "epoch": 0.6823197066879609, "grad_norm": 0.2764085829257965, "learning_rate": 3.190543535438027e-05, "loss": 0.10486044883728027, "step": 158930 }, { "epoch": 0.682362638777981, "grad_norm": 1.9766342639923096, "learning_rate": 3.190112363426265e-05, "loss": 0.19219329357147216, "step": 158940 }, { "epoch": 0.682405570868001, "grad_norm": 0.08026780188083649, "learning_rate": 3.189681191414503e-05, "loss": 0.2659833192825317, "step": 158950 }, { "epoch": 0.682448502958021, "grad_norm": 0.606878936290741, "learning_rate": 3.189250019402741e-05, "loss": 0.2334219694137573, "step": 158960 }, { "epoch": 0.682491435048041, "grad_norm": 0.7083647847175598, "learning_rate": 3.188818847390979e-05, "loss": 0.1931910514831543, "step": 158970 }, { "epoch": 0.682534367138061, "grad_norm": 2.9533469676971436, "learning_rate": 3.188387675379216e-05, "loss": 0.3448446035385132, "step": 158980 }, { "epoch": 0.682577299228081, "grad_norm": 2.4527599811553955, "learning_rate": 3.1879565033674537e-05, "loss": 0.09024946093559265, "step": 158990 }, { "epoch": 0.682620231318101, "grad_norm": 1.309434413909912, "learning_rate": 3.1875253313556914e-05, "loss": 0.418546724319458, "step": 159000 }, { "epoch": 0.682620231318101, "eval_loss": 0.39745739102363586, "eval_runtime": 27.161, "eval_samples_per_second": 3.682, "eval_steps_per_second": 3.682, "step": 159000 }, { "epoch": 0.682663163408121, "grad_norm": 0.2863774597644806, "learning_rate": 3.187094159343929e-05, "loss": 0.1911346435546875, "step": 159010 }, { "epoch": 0.682706095498141, "grad_norm": 0.32828912138938904, "learning_rate": 3.186662987332166e-05, "loss": 0.10439178943634034, "step": 159020 }, { "epoch": 0.682749027588161, "grad_norm": 0.005384594667702913, "learning_rate": 3.186231815320404e-05, "loss": 0.08147462010383606, "step": 159030 }, { "epoch": 0.6827919596781811, "grad_norm": 4.924872875213623, "learning_rate": 3.1858006433086417e-05, "loss": 0.21583118438720703, "step": 159040 }, { "epoch": 0.6828348917682011, "grad_norm": 1.8141136169433594, "learning_rate": 3.1853694712968794e-05, "loss": 0.25055086612701416, "step": 159050 }, { "epoch": 0.682877823858221, "grad_norm": 0.012755995616316795, "learning_rate": 3.1849382992851164e-05, "loss": 0.2830207109451294, "step": 159060 }, { "epoch": 0.6829207559482411, "grad_norm": 0.0550185889005661, "learning_rate": 3.184507127273355e-05, "loss": 0.36090056896209716, "step": 159070 }, { "epoch": 0.6829636880382611, "grad_norm": 0.001908186706714332, "learning_rate": 3.1840759552615926e-05, "loss": 0.2745044708251953, "step": 159080 }, { "epoch": 0.6830066201282811, "grad_norm": 0.018973547965288162, "learning_rate": 3.18364478324983e-05, "loss": 0.09228085875511169, "step": 159090 }, { "epoch": 0.6830495522183011, "grad_norm": 2.7169268131256104, "learning_rate": 3.1832136112380674e-05, "loss": 0.15420807600021363, "step": 159100 }, { "epoch": 0.6830924843083211, "grad_norm": 0.002240829635411501, "learning_rate": 3.182782439226305e-05, "loss": 0.246453857421875, "step": 159110 }, { "epoch": 0.6831354163983411, "grad_norm": 1.67829167842865, "learning_rate": 3.182351267214543e-05, "loss": 0.19712669849395753, "step": 159120 }, { "epoch": 0.6831783484883611, "grad_norm": 1.507208228111267, "learning_rate": 3.1819200952027806e-05, "loss": 0.12660225629806518, "step": 159130 }, { "epoch": 0.6832212805783812, "grad_norm": 0.0003549654793459922, "learning_rate": 3.1814889231910176e-05, "loss": 0.24141812324523926, "step": 159140 }, { "epoch": 0.6832642126684011, "grad_norm": 1.0748794078826904, "learning_rate": 3.1810577511792554e-05, "loss": 0.20776724815368652, "step": 159150 }, { "epoch": 0.6833071447584211, "grad_norm": 2.62322735786438, "learning_rate": 3.180626579167493e-05, "loss": 0.14376430511474608, "step": 159160 }, { "epoch": 0.6833500768484412, "grad_norm": 1.435289978981018, "learning_rate": 3.180195407155731e-05, "loss": 0.2463693141937256, "step": 159170 }, { "epoch": 0.6833930089384611, "grad_norm": 1.2902711629867554, "learning_rate": 3.1797642351439686e-05, "loss": 0.3314396858215332, "step": 159180 }, { "epoch": 0.6834359410284812, "grad_norm": 0.006626356393098831, "learning_rate": 3.179333063132206e-05, "loss": 0.07337073683738708, "step": 159190 }, { "epoch": 0.6834788731185012, "grad_norm": 0.0004718776617664844, "learning_rate": 3.178901891120444e-05, "loss": 0.21116044521331787, "step": 159200 }, { "epoch": 0.6835218052085211, "grad_norm": 1.6235929727554321, "learning_rate": 3.178470719108682e-05, "loss": 0.23272418975830078, "step": 159210 }, { "epoch": 0.6835647372985412, "grad_norm": 1.764551043510437, "learning_rate": 3.178039547096919e-05, "loss": 0.1310117721557617, "step": 159220 }, { "epoch": 0.6836076693885612, "grad_norm": 0.8673880696296692, "learning_rate": 3.1776083750851566e-05, "loss": 0.33874988555908203, "step": 159230 }, { "epoch": 0.6836506014785811, "grad_norm": 0.001663855859078467, "learning_rate": 3.177177203073394e-05, "loss": 0.1292450785636902, "step": 159240 }, { "epoch": 0.6836935335686012, "grad_norm": 0.2805323302745819, "learning_rate": 3.176746031061632e-05, "loss": 0.2075819492340088, "step": 159250 }, { "epoch": 0.6837364656586212, "grad_norm": 0.002389042405411601, "learning_rate": 3.176314859049869e-05, "loss": 0.07619832158088684, "step": 159260 }, { "epoch": 0.6837793977486412, "grad_norm": 0.23299634456634521, "learning_rate": 3.175883687038107e-05, "loss": 0.1750471830368042, "step": 159270 }, { "epoch": 0.6838223298386612, "grad_norm": 0.017031220719218254, "learning_rate": 3.1754525150263446e-05, "loss": 0.13732279539108277, "step": 159280 }, { "epoch": 0.6838652619286812, "grad_norm": 0.0018903700402006507, "learning_rate": 3.175021343014582e-05, "loss": 0.2203477144241333, "step": 159290 }, { "epoch": 0.6839081940187012, "grad_norm": 0.008383017033338547, "learning_rate": 3.17459017100282e-05, "loss": 0.1219887375831604, "step": 159300 }, { "epoch": 0.6839511261087212, "grad_norm": 1.2795454263687134, "learning_rate": 3.174158998991058e-05, "loss": 0.2261826753616333, "step": 159310 }, { "epoch": 0.6839940581987413, "grad_norm": 1.2634918689727783, "learning_rate": 3.1737278269792955e-05, "loss": 0.30649328231811523, "step": 159320 }, { "epoch": 0.6840369902887612, "grad_norm": 0.940351128578186, "learning_rate": 3.173296654967533e-05, "loss": 0.24685354232788087, "step": 159330 }, { "epoch": 0.6840799223787812, "grad_norm": 0.019773170351982117, "learning_rate": 3.172865482955771e-05, "loss": 0.08614250421524047, "step": 159340 }, { "epoch": 0.6841228544688013, "grad_norm": 1.0007884502410889, "learning_rate": 3.172434310944008e-05, "loss": 0.41218295097351076, "step": 159350 }, { "epoch": 0.6841657865588212, "grad_norm": 0.0006132782436907291, "learning_rate": 3.172003138932246e-05, "loss": 0.18524746894836425, "step": 159360 }, { "epoch": 0.6842087186488413, "grad_norm": 0.13383154571056366, "learning_rate": 3.1715719669204835e-05, "loss": 0.061647999286651614, "step": 159370 }, { "epoch": 0.6842516507388613, "grad_norm": 0.016351748257875443, "learning_rate": 3.171140794908721e-05, "loss": 0.045337098836898806, "step": 159380 }, { "epoch": 0.6842945828288812, "grad_norm": 1.11026132106781, "learning_rate": 3.170709622896958e-05, "loss": 0.09521642923355103, "step": 159390 }, { "epoch": 0.6843375149189013, "grad_norm": 7.038349628448486, "learning_rate": 3.170278450885196e-05, "loss": 0.27219247817993164, "step": 159400 }, { "epoch": 0.6843804470089213, "grad_norm": 1.584343671798706, "learning_rate": 3.169847278873434e-05, "loss": 0.31961917877197266, "step": 159410 }, { "epoch": 0.6844233790989412, "grad_norm": 1.0111151933670044, "learning_rate": 3.1694161068616715e-05, "loss": 0.16205734014511108, "step": 159420 }, { "epoch": 0.6844663111889613, "grad_norm": 0.01199696958065033, "learning_rate": 3.168984934849909e-05, "loss": 0.2615689277648926, "step": 159430 }, { "epoch": 0.6845092432789813, "grad_norm": 0.7997129559516907, "learning_rate": 3.168553762838147e-05, "loss": 0.07657995820045471, "step": 159440 }, { "epoch": 0.6845521753690013, "grad_norm": 0.00100781733635813, "learning_rate": 3.168122590826385e-05, "loss": 0.10128631591796874, "step": 159450 }, { "epoch": 0.6845951074590213, "grad_norm": 0.001099374727346003, "learning_rate": 3.1676914188146224e-05, "loss": 0.15639076232910157, "step": 159460 }, { "epoch": 0.6846380395490413, "grad_norm": 0.0031978210899978876, "learning_rate": 3.1672602468028595e-05, "loss": 0.02175009250640869, "step": 159470 }, { "epoch": 0.6846809716390614, "grad_norm": 0.04342164471745491, "learning_rate": 3.166829074791097e-05, "loss": 0.38820016384124756, "step": 159480 }, { "epoch": 0.6847239037290813, "grad_norm": 0.8257866501808167, "learning_rate": 3.166397902779335e-05, "loss": 0.17148340940475465, "step": 159490 }, { "epoch": 0.6847668358191014, "grad_norm": 0.8161950707435608, "learning_rate": 3.165966730767573e-05, "loss": 0.19095487594604493, "step": 159500 }, { "epoch": 0.6848097679091214, "grad_norm": 0.9420151114463806, "learning_rate": 3.16553555875581e-05, "loss": 0.3118323802947998, "step": 159510 }, { "epoch": 0.6848526999991413, "grad_norm": 40.907676696777344, "learning_rate": 3.1651043867440475e-05, "loss": 0.22367384433746337, "step": 159520 }, { "epoch": 0.6848956320891614, "grad_norm": 0.04517137259244919, "learning_rate": 3.164673214732285e-05, "loss": 0.14464277029037476, "step": 159530 }, { "epoch": 0.6849385641791814, "grad_norm": 1.6570888757705688, "learning_rate": 3.164242042720523e-05, "loss": 0.18562830686569215, "step": 159540 }, { "epoch": 0.6849814962692014, "grad_norm": 2.1816341876983643, "learning_rate": 3.163810870708761e-05, "loss": 0.3160805940628052, "step": 159550 }, { "epoch": 0.6850244283592214, "grad_norm": 0.47744184732437134, "learning_rate": 3.1633796986969984e-05, "loss": 0.13849132061004638, "step": 159560 }, { "epoch": 0.6850673604492414, "grad_norm": 0.006263840477913618, "learning_rate": 3.162948526685236e-05, "loss": 0.33607680797576905, "step": 159570 }, { "epoch": 0.6851102925392614, "grad_norm": 2.6430816650390625, "learning_rate": 3.162517354673474e-05, "loss": 0.48966164588928224, "step": 159580 }, { "epoch": 0.6851532246292814, "grad_norm": 0.0013148311991244555, "learning_rate": 3.162086182661711e-05, "loss": 0.07859854102134704, "step": 159590 }, { "epoch": 0.6851961567193015, "grad_norm": 0.1708020567893982, "learning_rate": 3.161655010649949e-05, "loss": 0.28932130336761475, "step": 159600 }, { "epoch": 0.6852390888093214, "grad_norm": 0.0995602011680603, "learning_rate": 3.1612238386381864e-05, "loss": 0.04597091376781463, "step": 159610 }, { "epoch": 0.6852820208993414, "grad_norm": 1.5684750080108643, "learning_rate": 3.160792666626424e-05, "loss": 0.28462212085723876, "step": 159620 }, { "epoch": 0.6853249529893615, "grad_norm": 0.0007086934638209641, "learning_rate": 3.160361494614661e-05, "loss": 0.13424597978591918, "step": 159630 }, { "epoch": 0.6853678850793814, "grad_norm": 2.4835119247436523, "learning_rate": 3.159930322602899e-05, "loss": 0.16594674587249755, "step": 159640 }, { "epoch": 0.6854108171694014, "grad_norm": 1.9306057691574097, "learning_rate": 3.159499150591137e-05, "loss": 0.30010387897491453, "step": 159650 }, { "epoch": 0.6854537492594215, "grad_norm": 0.3395925462245941, "learning_rate": 3.159067978579375e-05, "loss": 0.2560022592544556, "step": 159660 }, { "epoch": 0.6854966813494414, "grad_norm": 0.00048148524365387857, "learning_rate": 3.158636806567612e-05, "loss": 0.073485267162323, "step": 159670 }, { "epoch": 0.6855396134394615, "grad_norm": 1.5306675434112549, "learning_rate": 3.15820563455585e-05, "loss": 0.1691593885421753, "step": 159680 }, { "epoch": 0.6855825455294815, "grad_norm": 7.120598793029785, "learning_rate": 3.1577744625440876e-05, "loss": 0.24635562896728516, "step": 159690 }, { "epoch": 0.6856254776195014, "grad_norm": 0.6594388484954834, "learning_rate": 3.1573432905323253e-05, "loss": 0.09426384568214416, "step": 159700 }, { "epoch": 0.6856684097095215, "grad_norm": 0.019663846120238304, "learning_rate": 3.156912118520563e-05, "loss": 0.0715424358844757, "step": 159710 }, { "epoch": 0.6857113417995415, "grad_norm": 5.1663432121276855, "learning_rate": 3.1564809465088e-05, "loss": 0.39462015628814695, "step": 159720 }, { "epoch": 0.6857542738895614, "grad_norm": 0.09658759087324142, "learning_rate": 3.156049774497038e-05, "loss": 0.16555283069610596, "step": 159730 }, { "epoch": 0.6857972059795815, "grad_norm": 0.000354397197952494, "learning_rate": 3.1556186024852756e-05, "loss": 0.21570301055908203, "step": 159740 }, { "epoch": 0.6858401380696015, "grad_norm": 8.785991668701172, "learning_rate": 3.155187430473513e-05, "loss": 0.27770447731018066, "step": 159750 }, { "epoch": 0.6858830701596215, "grad_norm": 0.06055491045117378, "learning_rate": 3.1547562584617504e-05, "loss": 0.0837552785873413, "step": 159760 }, { "epoch": 0.6859260022496415, "grad_norm": 0.2875801920890808, "learning_rate": 3.154325086449989e-05, "loss": 0.19612315893173218, "step": 159770 }, { "epoch": 0.6859689343396616, "grad_norm": 0.024394122883677483, "learning_rate": 3.1538939144382265e-05, "loss": 0.23119087219238282, "step": 159780 }, { "epoch": 0.6860118664296815, "grad_norm": 0.0021245151292532682, "learning_rate": 3.153462742426464e-05, "loss": 0.1430792450904846, "step": 159790 }, { "epoch": 0.6860547985197015, "grad_norm": 0.04334768280386925, "learning_rate": 3.153031570414701e-05, "loss": 0.1303979992866516, "step": 159800 }, { "epoch": 0.6860977306097216, "grad_norm": 0.020839013159275055, "learning_rate": 3.152600398402939e-05, "loss": 0.07669672966003419, "step": 159810 }, { "epoch": 0.6861406626997415, "grad_norm": 1.1684037446975708, "learning_rate": 3.152169226391177e-05, "loss": 0.17646958827972412, "step": 159820 }, { "epoch": 0.6861835947897615, "grad_norm": 0.00033721158979460597, "learning_rate": 3.1517380543794145e-05, "loss": 0.23716237545013427, "step": 159830 }, { "epoch": 0.6862265268797816, "grad_norm": 0.6565494537353516, "learning_rate": 3.1513068823676516e-05, "loss": 0.24103212356567383, "step": 159840 }, { "epoch": 0.6862694589698015, "grad_norm": 0.006676824763417244, "learning_rate": 3.150875710355889e-05, "loss": 0.14299737215042113, "step": 159850 }, { "epoch": 0.6863123910598216, "grad_norm": 1.2873560190200806, "learning_rate": 3.150444538344127e-05, "loss": 0.39502582550048826, "step": 159860 }, { "epoch": 0.6863553231498416, "grad_norm": 0.019151126965880394, "learning_rate": 3.150013366332365e-05, "loss": 0.001504859235137701, "step": 159870 }, { "epoch": 0.6863982552398615, "grad_norm": 1.4346905946731567, "learning_rate": 3.1495821943206025e-05, "loss": 0.453825044631958, "step": 159880 }, { "epoch": 0.6864411873298816, "grad_norm": 0.0022187780123203993, "learning_rate": 3.14915102230884e-05, "loss": 0.19337046146392822, "step": 159890 }, { "epoch": 0.6864841194199016, "grad_norm": 0.5133907198905945, "learning_rate": 3.148719850297078e-05, "loss": 0.1229052186012268, "step": 159900 }, { "epoch": 0.6865270515099217, "grad_norm": 0.06288864463567734, "learning_rate": 3.148288678285316e-05, "loss": 0.3862318754196167, "step": 159910 }, { "epoch": 0.6865699835999416, "grad_norm": 0.23961742222309113, "learning_rate": 3.147857506273553e-05, "loss": 0.28116235733032224, "step": 159920 }, { "epoch": 0.6866129156899616, "grad_norm": 2.730591058731079, "learning_rate": 3.1474263342617905e-05, "loss": 0.27672324180603025, "step": 159930 }, { "epoch": 0.6866558477799817, "grad_norm": 0.0005314791342243552, "learning_rate": 3.146995162250028e-05, "loss": 0.33863320350646975, "step": 159940 }, { "epoch": 0.6866987798700016, "grad_norm": 0.9973521828651428, "learning_rate": 3.146563990238266e-05, "loss": 0.19327343702316285, "step": 159950 }, { "epoch": 0.6867417119600217, "grad_norm": 0.028170527890324593, "learning_rate": 3.146132818226503e-05, "loss": 0.26722302436828616, "step": 159960 }, { "epoch": 0.6867846440500417, "grad_norm": 0.028313281014561653, "learning_rate": 3.145701646214741e-05, "loss": 0.16753727197647095, "step": 159970 }, { "epoch": 0.6868275761400616, "grad_norm": 0.00603932561352849, "learning_rate": 3.1452704742029785e-05, "loss": 0.313533616065979, "step": 159980 }, { "epoch": 0.6868705082300817, "grad_norm": 1.455635905265808, "learning_rate": 3.144839302191216e-05, "loss": 0.2084658622741699, "step": 159990 }, { "epoch": 0.6869134403201017, "grad_norm": 0.003664513351395726, "learning_rate": 3.144408130179454e-05, "loss": 0.26113555431365965, "step": 160000 }, { "epoch": 0.6869134403201017, "eval_loss": 0.3895556628704071, "eval_runtime": 27.1269, "eval_samples_per_second": 3.686, "eval_steps_per_second": 3.686, "step": 160000 }, { "epoch": 0.6869563724101216, "grad_norm": 1.3072751760482788, "learning_rate": 3.143976958167692e-05, "loss": 0.25968708992004397, "step": 160010 }, { "epoch": 0.6869993045001417, "grad_norm": 0.5385197997093201, "learning_rate": 3.1435457861559294e-05, "loss": 0.13607220649719237, "step": 160020 }, { "epoch": 0.6870422365901617, "grad_norm": 0.033412832766771317, "learning_rate": 3.143114614144167e-05, "loss": 0.23602588176727296, "step": 160030 }, { "epoch": 0.6870851686801817, "grad_norm": 0.029198991134762764, "learning_rate": 3.142683442132404e-05, "loss": 0.12028307914733886, "step": 160040 }, { "epoch": 0.6871281007702017, "grad_norm": 0.0036634765565395355, "learning_rate": 3.142252270120642e-05, "loss": 0.2307124137878418, "step": 160050 }, { "epoch": 0.6871710328602217, "grad_norm": 8.274073600769043, "learning_rate": 3.14182109810888e-05, "loss": 0.3074865102767944, "step": 160060 }, { "epoch": 0.6872139649502417, "grad_norm": 0.010077284649014473, "learning_rate": 3.1413899260971174e-05, "loss": 0.07915792465209961, "step": 160070 }, { "epoch": 0.6872568970402617, "grad_norm": 0.02237793058156967, "learning_rate": 3.140958754085355e-05, "loss": 0.3373283624649048, "step": 160080 }, { "epoch": 0.6872998291302818, "grad_norm": 0.01876627467572689, "learning_rate": 3.140527582073592e-05, "loss": 0.14437304735183715, "step": 160090 }, { "epoch": 0.6873427612203017, "grad_norm": 0.08891065418720245, "learning_rate": 3.14009641006183e-05, "loss": 0.028795576095581053, "step": 160100 }, { "epoch": 0.6873856933103217, "grad_norm": 0.6405884027481079, "learning_rate": 3.139665238050068e-05, "loss": 0.16790642738342285, "step": 160110 }, { "epoch": 0.6874286254003418, "grad_norm": 2.350557804107666, "learning_rate": 3.1392340660383054e-05, "loss": 0.1628109931945801, "step": 160120 }, { "epoch": 0.6874715574903617, "grad_norm": 0.002268231939524412, "learning_rate": 3.138802894026543e-05, "loss": 0.18723307847976683, "step": 160130 }, { "epoch": 0.6875144895803817, "grad_norm": 0.004576073493808508, "learning_rate": 3.138371722014781e-05, "loss": 0.19961996078491212, "step": 160140 }, { "epoch": 0.6875574216704018, "grad_norm": 6.669589996337891, "learning_rate": 3.1379405500030186e-05, "loss": 0.3290428400039673, "step": 160150 }, { "epoch": 0.6876003537604217, "grad_norm": 0.938614010810852, "learning_rate": 3.1375093779912564e-05, "loss": 0.20669236183166503, "step": 160160 }, { "epoch": 0.6876432858504418, "grad_norm": 0.08867359906435013, "learning_rate": 3.1370782059794934e-05, "loss": 0.13379579782485962, "step": 160170 }, { "epoch": 0.6876862179404618, "grad_norm": 0.0012897284468635917, "learning_rate": 3.136647033967731e-05, "loss": 0.32950284481048586, "step": 160180 }, { "epoch": 0.6877291500304817, "grad_norm": 0.14256958663463593, "learning_rate": 3.136215861955969e-05, "loss": 0.33088634014129636, "step": 160190 }, { "epoch": 0.6877720821205018, "grad_norm": 0.0020969025790691376, "learning_rate": 3.1357846899442066e-05, "loss": 0.26999151706695557, "step": 160200 }, { "epoch": 0.6878150142105218, "grad_norm": 0.6317549347877502, "learning_rate": 3.135353517932444e-05, "loss": 0.34250357151031496, "step": 160210 }, { "epoch": 0.6878579463005418, "grad_norm": 1.39315927028656, "learning_rate": 3.1349223459206814e-05, "loss": 0.26374197006225586, "step": 160220 }, { "epoch": 0.6879008783905618, "grad_norm": 0.11452850699424744, "learning_rate": 3.134491173908919e-05, "loss": 0.08184933662414551, "step": 160230 }, { "epoch": 0.6879438104805818, "grad_norm": 1.91048264503479, "learning_rate": 3.134060001897157e-05, "loss": 0.10932408571243286, "step": 160240 }, { "epoch": 0.6879867425706018, "grad_norm": 0.05805795639753342, "learning_rate": 3.1336288298853946e-05, "loss": 0.29509758949279785, "step": 160250 }, { "epoch": 0.6880296746606218, "grad_norm": 0.13939379155635834, "learning_rate": 3.1331976578736324e-05, "loss": 0.1063469409942627, "step": 160260 }, { "epoch": 0.6880726067506419, "grad_norm": 0.001417188672348857, "learning_rate": 3.13276648586187e-05, "loss": 0.0481675386428833, "step": 160270 }, { "epoch": 0.6881155388406618, "grad_norm": 0.44573941826820374, "learning_rate": 3.132335313850108e-05, "loss": 0.1946892261505127, "step": 160280 }, { "epoch": 0.6881584709306818, "grad_norm": 0.018087316304445267, "learning_rate": 3.131904141838345e-05, "loss": 0.08029451370239257, "step": 160290 }, { "epoch": 0.6882014030207019, "grad_norm": 3.0328128337860107, "learning_rate": 3.1314729698265826e-05, "loss": 0.2553675651550293, "step": 160300 }, { "epoch": 0.6882443351107218, "grad_norm": 0.018584132194519043, "learning_rate": 3.1310417978148204e-05, "loss": 0.15149282217025756, "step": 160310 }, { "epoch": 0.6882872672007418, "grad_norm": 0.024891283363103867, "learning_rate": 3.130610625803058e-05, "loss": 0.2260596513748169, "step": 160320 }, { "epoch": 0.6883301992907619, "grad_norm": 0.010092142969369888, "learning_rate": 3.130179453791295e-05, "loss": 0.3067698240280151, "step": 160330 }, { "epoch": 0.6883731313807819, "grad_norm": 0.05609379708766937, "learning_rate": 3.129748281779533e-05, "loss": 0.007509586960077285, "step": 160340 }, { "epoch": 0.6884160634708019, "grad_norm": 2.336320400238037, "learning_rate": 3.1293171097677706e-05, "loss": 0.14287641048431396, "step": 160350 }, { "epoch": 0.6884589955608219, "grad_norm": 0.007429433986544609, "learning_rate": 3.128885937756009e-05, "loss": 0.28202841281890867, "step": 160360 }, { "epoch": 0.688501927650842, "grad_norm": 0.31570228934288025, "learning_rate": 3.128454765744246e-05, "loss": 0.16258682012557985, "step": 160370 }, { "epoch": 0.6885448597408619, "grad_norm": 0.2192019522190094, "learning_rate": 3.128023593732484e-05, "loss": 0.06812013983726502, "step": 160380 }, { "epoch": 0.6885877918308819, "grad_norm": 1.0381423234939575, "learning_rate": 3.1275924217207216e-05, "loss": 0.1076446533203125, "step": 160390 }, { "epoch": 0.688630723920902, "grad_norm": 0.008879567496478558, "learning_rate": 3.127161249708959e-05, "loss": 0.2584580183029175, "step": 160400 }, { "epoch": 0.6886736560109219, "grad_norm": 7.12844705581665, "learning_rate": 3.126730077697197e-05, "loss": 0.059986865520477294, "step": 160410 }, { "epoch": 0.6887165881009419, "grad_norm": 0.026752643287181854, "learning_rate": 3.126298905685434e-05, "loss": 0.24028337001800537, "step": 160420 }, { "epoch": 0.688759520190962, "grad_norm": 0.056220006197690964, "learning_rate": 3.125867733673672e-05, "loss": 0.2983167409896851, "step": 160430 }, { "epoch": 0.6888024522809819, "grad_norm": 0.2604270875453949, "learning_rate": 3.1254365616619095e-05, "loss": 0.0750252664089203, "step": 160440 }, { "epoch": 0.688845384371002, "grad_norm": 0.009172854013741016, "learning_rate": 3.125005389650147e-05, "loss": 0.2441507339477539, "step": 160450 }, { "epoch": 0.688888316461022, "grad_norm": 0.33372387290000916, "learning_rate": 3.124574217638384e-05, "loss": 0.32252767086029055, "step": 160460 }, { "epoch": 0.6889312485510419, "grad_norm": 1.07737398147583, "learning_rate": 3.124143045626623e-05, "loss": 0.20390644073486328, "step": 160470 }, { "epoch": 0.688974180641062, "grad_norm": 1.6875758171081543, "learning_rate": 3.1237118736148605e-05, "loss": 0.22852468490600586, "step": 160480 }, { "epoch": 0.689017112731082, "grad_norm": 0.16039396822452545, "learning_rate": 3.123280701603098e-05, "loss": 0.22296838760375975, "step": 160490 }, { "epoch": 0.689060044821102, "grad_norm": 1.185931921005249, "learning_rate": 3.122849529591335e-05, "loss": 0.08817307353019714, "step": 160500 }, { "epoch": 0.689102976911122, "grad_norm": 8.558279991149902, "learning_rate": 3.122418357579573e-05, "loss": 0.22579166889190674, "step": 160510 }, { "epoch": 0.689145909001142, "grad_norm": 0.5402048230171204, "learning_rate": 3.121987185567811e-05, "loss": 0.32658090591430666, "step": 160520 }, { "epoch": 0.689188841091162, "grad_norm": 7.0292463302612305, "learning_rate": 3.1215560135560485e-05, "loss": 0.12179718017578126, "step": 160530 }, { "epoch": 0.689231773181182, "grad_norm": 0.03717799857258797, "learning_rate": 3.1211248415442855e-05, "loss": 0.3219716787338257, "step": 160540 }, { "epoch": 0.689274705271202, "grad_norm": 0.08582460135221481, "learning_rate": 3.120693669532523e-05, "loss": 0.3392592191696167, "step": 160550 }, { "epoch": 0.689317637361222, "grad_norm": 0.05416666343808174, "learning_rate": 3.120262497520761e-05, "loss": 0.18559746742248534, "step": 160560 }, { "epoch": 0.689360569451242, "grad_norm": 0.020222166553139687, "learning_rate": 3.119831325508999e-05, "loss": 0.10422813892364502, "step": 160570 }, { "epoch": 0.6894035015412621, "grad_norm": 2.5958380699157715, "learning_rate": 3.1194001534972365e-05, "loss": 0.3050968170166016, "step": 160580 }, { "epoch": 0.689446433631282, "grad_norm": 0.02333100326359272, "learning_rate": 3.118968981485474e-05, "loss": 0.36220335960388184, "step": 160590 }, { "epoch": 0.689489365721302, "grad_norm": 0.003293287241831422, "learning_rate": 3.118537809473712e-05, "loss": 0.1430499792098999, "step": 160600 }, { "epoch": 0.6895322978113221, "grad_norm": 0.003580573247745633, "learning_rate": 3.11810663746195e-05, "loss": 0.26941795349121095, "step": 160610 }, { "epoch": 0.689575229901342, "grad_norm": 0.03517276421189308, "learning_rate": 3.117675465450187e-05, "loss": 0.2626699447631836, "step": 160620 }, { "epoch": 0.689618161991362, "grad_norm": 0.8139095306396484, "learning_rate": 3.1172442934384245e-05, "loss": 0.06488630175590515, "step": 160630 }, { "epoch": 0.6896610940813821, "grad_norm": 11.103382110595703, "learning_rate": 3.116813121426662e-05, "loss": 0.3776247501373291, "step": 160640 }, { "epoch": 0.689704026171402, "grad_norm": 1.2519363164901733, "learning_rate": 3.1163819494149e-05, "loss": 0.11121902465820313, "step": 160650 }, { "epoch": 0.6897469582614221, "grad_norm": 0.36693301796913147, "learning_rate": 3.115950777403137e-05, "loss": 0.2761913061141968, "step": 160660 }, { "epoch": 0.6897898903514421, "grad_norm": 0.003073914907872677, "learning_rate": 3.115519605391375e-05, "loss": 0.053472626209259036, "step": 160670 }, { "epoch": 0.689832822441462, "grad_norm": 0.05906624346971512, "learning_rate": 3.1150884333796125e-05, "loss": 0.24504506587982178, "step": 160680 }, { "epoch": 0.6898757545314821, "grad_norm": 0.018624769523739815, "learning_rate": 3.11465726136785e-05, "loss": 0.16039087772369384, "step": 160690 }, { "epoch": 0.6899186866215021, "grad_norm": 0.7570061683654785, "learning_rate": 3.114226089356088e-05, "loss": 0.1374683976173401, "step": 160700 }, { "epoch": 0.6899616187115221, "grad_norm": 0.5951012372970581, "learning_rate": 3.1137949173443257e-05, "loss": 0.21923141479492186, "step": 160710 }, { "epoch": 0.6900045508015421, "grad_norm": 0.004915403202176094, "learning_rate": 3.1133637453325634e-05, "loss": 0.3021175622940063, "step": 160720 }, { "epoch": 0.6900474828915621, "grad_norm": 2.058332920074463, "learning_rate": 3.112932573320801e-05, "loss": 0.4457847595214844, "step": 160730 }, { "epoch": 0.6900904149815821, "grad_norm": 1.1177704334259033, "learning_rate": 3.112501401309038e-05, "loss": 0.23458011150360109, "step": 160740 }, { "epoch": 0.6901333470716021, "grad_norm": 0.007893427275121212, "learning_rate": 3.112070229297276e-05, "loss": 0.28530762195587156, "step": 160750 }, { "epoch": 0.6901762791616222, "grad_norm": 0.0039029717445373535, "learning_rate": 3.1116390572855137e-05, "loss": 0.23412270545959474, "step": 160760 }, { "epoch": 0.6902192112516422, "grad_norm": 0.0970078706741333, "learning_rate": 3.1112078852737514e-05, "loss": 0.2985024929046631, "step": 160770 }, { "epoch": 0.6902621433416621, "grad_norm": 3.841125726699829, "learning_rate": 3.110776713261989e-05, "loss": 0.28366975784301757, "step": 160780 }, { "epoch": 0.6903050754316822, "grad_norm": 0.0038798335008323193, "learning_rate": 3.110345541250226e-05, "loss": 0.27859447002410886, "step": 160790 }, { "epoch": 0.6903480075217022, "grad_norm": 4.823430061340332, "learning_rate": 3.109914369238464e-05, "loss": 0.19317221641540527, "step": 160800 }, { "epoch": 0.6903909396117222, "grad_norm": 4.753268718719482, "learning_rate": 3.1094831972267016e-05, "loss": 0.11289010047912598, "step": 160810 }, { "epoch": 0.6904338717017422, "grad_norm": 5.022866249084473, "learning_rate": 3.1090520252149394e-05, "loss": 0.17281042337417601, "step": 160820 }, { "epoch": 0.6904768037917622, "grad_norm": 0.7847058176994324, "learning_rate": 3.108620853203177e-05, "loss": 0.34594273567199707, "step": 160830 }, { "epoch": 0.6905197358817822, "grad_norm": 1.2339727878570557, "learning_rate": 3.108189681191415e-05, "loss": 0.4348318576812744, "step": 160840 }, { "epoch": 0.6905626679718022, "grad_norm": 0.024677403271198273, "learning_rate": 3.1077585091796526e-05, "loss": 0.00807729884982109, "step": 160850 }, { "epoch": 0.6906056000618223, "grad_norm": 0.12931907176971436, "learning_rate": 3.10732733716789e-05, "loss": 0.11871621608734131, "step": 160860 }, { "epoch": 0.6906485321518422, "grad_norm": 0.07335333526134491, "learning_rate": 3.1068961651561274e-05, "loss": 0.2547421932220459, "step": 160870 }, { "epoch": 0.6906914642418622, "grad_norm": 0.05584121495485306, "learning_rate": 3.106464993144365e-05, "loss": 0.2417241096496582, "step": 160880 }, { "epoch": 0.6907343963318823, "grad_norm": 0.0022020612377673388, "learning_rate": 3.106033821132603e-05, "loss": 0.05848243236541748, "step": 160890 }, { "epoch": 0.6907773284219022, "grad_norm": 0.03212396800518036, "learning_rate": 3.1056026491208406e-05, "loss": 0.36734011173248293, "step": 160900 }, { "epoch": 0.6908202605119222, "grad_norm": 1.3712433576583862, "learning_rate": 3.1051714771090776e-05, "loss": 0.11554309129714965, "step": 160910 }, { "epoch": 0.6908631926019423, "grad_norm": 0.0002910851326305419, "learning_rate": 3.1047403050973154e-05, "loss": 0.3291846513748169, "step": 160920 }, { "epoch": 0.6909061246919622, "grad_norm": 0.008376223966479301, "learning_rate": 3.104309133085553e-05, "loss": 0.17492603063583373, "step": 160930 }, { "epoch": 0.6909490567819823, "grad_norm": 0.00044483650708571076, "learning_rate": 3.103877961073791e-05, "loss": 0.003586423024535179, "step": 160940 }, { "epoch": 0.6909919888720023, "grad_norm": 0.6287297606468201, "learning_rate": 3.1034467890620286e-05, "loss": 0.19123516082763672, "step": 160950 }, { "epoch": 0.6910349209620222, "grad_norm": 1.69691002368927, "learning_rate": 3.103015617050266e-05, "loss": 0.3056190490722656, "step": 160960 }, { "epoch": 0.6910778530520423, "grad_norm": 0.202299565076828, "learning_rate": 3.102584445038504e-05, "loss": 0.3371596336364746, "step": 160970 }, { "epoch": 0.6911207851420623, "grad_norm": 1.769765853881836, "learning_rate": 3.102153273026742e-05, "loss": 0.4470851898193359, "step": 160980 }, { "epoch": 0.6911637172320823, "grad_norm": 0.14136166870594025, "learning_rate": 3.101722101014979e-05, "loss": 0.19823846817016602, "step": 160990 }, { "epoch": 0.6912066493221023, "grad_norm": 1.6474438905715942, "learning_rate": 3.1012909290032166e-05, "loss": 0.2073206424713135, "step": 161000 }, { "epoch": 0.6912066493221023, "eval_loss": 0.3861920237541199, "eval_runtime": 27.2145, "eval_samples_per_second": 3.675, "eval_steps_per_second": 3.675, "step": 161000 }, { "epoch": 0.6912495814121223, "grad_norm": 0.3103042542934418, "learning_rate": 3.100859756991454e-05, "loss": 0.09276053905487061, "step": 161010 }, { "epoch": 0.6912925135021423, "grad_norm": 0.05611109361052513, "learning_rate": 3.100428584979692e-05, "loss": 0.24587397575378417, "step": 161020 }, { "epoch": 0.6913354455921623, "grad_norm": 0.03766069933772087, "learning_rate": 3.099997412967929e-05, "loss": 0.2660505294799805, "step": 161030 }, { "epoch": 0.6913783776821824, "grad_norm": 0.11256032437086105, "learning_rate": 3.099566240956167e-05, "loss": 0.06991714239120483, "step": 161040 }, { "epoch": 0.6914213097722023, "grad_norm": 0.0912373960018158, "learning_rate": 3.0991350689444046e-05, "loss": 0.11046913862228394, "step": 161050 }, { "epoch": 0.6914642418622223, "grad_norm": 0.043732017278671265, "learning_rate": 3.098703896932642e-05, "loss": 0.1306806683540344, "step": 161060 }, { "epoch": 0.6915071739522424, "grad_norm": 0.0003347241145092994, "learning_rate": 3.09827272492088e-05, "loss": 0.16925837993621826, "step": 161070 }, { "epoch": 0.6915501060422623, "grad_norm": 0.013059504330158234, "learning_rate": 3.097841552909118e-05, "loss": 0.08966315388679505, "step": 161080 }, { "epoch": 0.6915930381322823, "grad_norm": 0.8090730309486389, "learning_rate": 3.0974103808973555e-05, "loss": 0.25440073013305664, "step": 161090 }, { "epoch": 0.6916359702223024, "grad_norm": 0.026317503303289413, "learning_rate": 3.096979208885593e-05, "loss": 0.17350938320159912, "step": 161100 }, { "epoch": 0.6916789023123223, "grad_norm": 0.006155726965516806, "learning_rate": 3.09654803687383e-05, "loss": 0.08406718969345092, "step": 161110 }, { "epoch": 0.6917218344023424, "grad_norm": 0.000807679258286953, "learning_rate": 3.096116864862068e-05, "loss": 0.06873743534088135, "step": 161120 }, { "epoch": 0.6917647664923624, "grad_norm": 0.05226420238614082, "learning_rate": 3.095685692850306e-05, "loss": 0.1460352897644043, "step": 161130 }, { "epoch": 0.6918076985823823, "grad_norm": 0.020941512659192085, "learning_rate": 3.0952545208385435e-05, "loss": 0.15456979274749755, "step": 161140 }, { "epoch": 0.6918506306724024, "grad_norm": 0.006885021924972534, "learning_rate": 3.094823348826781e-05, "loss": 0.12112609148025513, "step": 161150 }, { "epoch": 0.6918935627624224, "grad_norm": 0.0010463733924552798, "learning_rate": 3.094392176815018e-05, "loss": 0.14944925308227539, "step": 161160 }, { "epoch": 0.6919364948524424, "grad_norm": 0.0837843269109726, "learning_rate": 3.093961004803256e-05, "loss": 0.0708765983581543, "step": 161170 }, { "epoch": 0.6919794269424624, "grad_norm": 0.0009495640988461673, "learning_rate": 3.0935298327914944e-05, "loss": 0.397504997253418, "step": 161180 }, { "epoch": 0.6920223590324824, "grad_norm": 1.5892711877822876, "learning_rate": 3.093098660779732e-05, "loss": 0.27846102714538573, "step": 161190 }, { "epoch": 0.6920652911225025, "grad_norm": 0.02117239683866501, "learning_rate": 3.092667488767969e-05, "loss": 0.1364277720451355, "step": 161200 }, { "epoch": 0.6921082232125224, "grad_norm": 0.029707126319408417, "learning_rate": 3.092236316756207e-05, "loss": 0.09654564261436463, "step": 161210 }, { "epoch": 0.6921511553025425, "grad_norm": 1.0642058849334717, "learning_rate": 3.091805144744445e-05, "loss": 0.20506763458251953, "step": 161220 }, { "epoch": 0.6921940873925625, "grad_norm": 0.3429529368877411, "learning_rate": 3.0913739727326824e-05, "loss": 0.23607473373413085, "step": 161230 }, { "epoch": 0.6922370194825824, "grad_norm": 0.008567404001951218, "learning_rate": 3.0909428007209195e-05, "loss": 0.18678658008575438, "step": 161240 }, { "epoch": 0.6922799515726025, "grad_norm": 0.72314453125, "learning_rate": 3.090511628709157e-05, "loss": 0.14462425708770751, "step": 161250 }, { "epoch": 0.6923228836626225, "grad_norm": 0.0002619328151922673, "learning_rate": 3.090080456697395e-05, "loss": 0.15907952785491944, "step": 161260 }, { "epoch": 0.6923658157526424, "grad_norm": 0.003494508331641555, "learning_rate": 3.089649284685633e-05, "loss": 0.19829418659210205, "step": 161270 }, { "epoch": 0.6924087478426625, "grad_norm": 0.01050402969121933, "learning_rate": 3.08921811267387e-05, "loss": 0.23870673179626464, "step": 161280 }, { "epoch": 0.6924516799326825, "grad_norm": 2.830049514770508, "learning_rate": 3.088786940662108e-05, "loss": 0.3266881465911865, "step": 161290 }, { "epoch": 0.6924946120227025, "grad_norm": 0.07396464049816132, "learning_rate": 3.088355768650346e-05, "loss": 0.24633843898773194, "step": 161300 }, { "epoch": 0.6925375441127225, "grad_norm": 6.711921691894531, "learning_rate": 3.0879245966385836e-05, "loss": 0.3260725259780884, "step": 161310 }, { "epoch": 0.6925804762027425, "grad_norm": 0.00216602417640388, "learning_rate": 3.087493424626821e-05, "loss": 0.152586030960083, "step": 161320 }, { "epoch": 0.6926234082927625, "grad_norm": 0.004018913954496384, "learning_rate": 3.0870622526150584e-05, "loss": 0.17486083507537842, "step": 161330 }, { "epoch": 0.6926663403827825, "grad_norm": 0.04566876217722893, "learning_rate": 3.086631080603296e-05, "loss": 0.11617728471755981, "step": 161340 }, { "epoch": 0.6927092724728026, "grad_norm": 0.2247513383626938, "learning_rate": 3.086199908591534e-05, "loss": 0.23323283195495606, "step": 161350 }, { "epoch": 0.6927522045628225, "grad_norm": 0.7604058980941772, "learning_rate": 3.085768736579771e-05, "loss": 0.4106475353240967, "step": 161360 }, { "epoch": 0.6927951366528425, "grad_norm": 20.811613082885742, "learning_rate": 3.085337564568009e-05, "loss": 0.1343095064163208, "step": 161370 }, { "epoch": 0.6928380687428626, "grad_norm": 1.6863338947296143, "learning_rate": 3.0849063925562464e-05, "loss": 0.2618619680404663, "step": 161380 }, { "epoch": 0.6928810008328825, "grad_norm": 0.0029260574374347925, "learning_rate": 3.084475220544484e-05, "loss": 0.1627426862716675, "step": 161390 }, { "epoch": 0.6929239329229026, "grad_norm": 0.00023793634318280965, "learning_rate": 3.084044048532722e-05, "loss": 0.2666466474533081, "step": 161400 }, { "epoch": 0.6929668650129226, "grad_norm": 0.010093128308653831, "learning_rate": 3.0836128765209596e-05, "loss": 0.26630597114562987, "step": 161410 }, { "epoch": 0.6930097971029425, "grad_norm": 0.001136020408011973, "learning_rate": 3.0831817045091973e-05, "loss": 0.24697518348693848, "step": 161420 }, { "epoch": 0.6930527291929626, "grad_norm": 0.9408860802650452, "learning_rate": 3.082750532497435e-05, "loss": 0.16035972833633422, "step": 161430 }, { "epoch": 0.6930956612829826, "grad_norm": 7.362483024597168, "learning_rate": 3.082319360485672e-05, "loss": 0.1600419044494629, "step": 161440 }, { "epoch": 0.6931385933730025, "grad_norm": 0.0010307239135727286, "learning_rate": 3.08188818847391e-05, "loss": 0.2767223119735718, "step": 161450 }, { "epoch": 0.6931815254630226, "grad_norm": 1.1499717235565186, "learning_rate": 3.0814570164621476e-05, "loss": 0.3421752691268921, "step": 161460 }, { "epoch": 0.6932244575530426, "grad_norm": 1.340930700302124, "learning_rate": 3.081025844450385e-05, "loss": 0.3829795360565186, "step": 161470 }, { "epoch": 0.6932673896430626, "grad_norm": 0.5305355787277222, "learning_rate": 3.0805946724386224e-05, "loss": 0.32126965522766116, "step": 161480 }, { "epoch": 0.6933103217330826, "grad_norm": 0.019384825602173805, "learning_rate": 3.08016350042686e-05, "loss": 0.2005154609680176, "step": 161490 }, { "epoch": 0.6933532538231026, "grad_norm": 0.0003718382795341313, "learning_rate": 3.079732328415098e-05, "loss": 0.19061292409896852, "step": 161500 }, { "epoch": 0.6933961859131226, "grad_norm": 0.5108389854431152, "learning_rate": 3.0793011564033356e-05, "loss": 0.25514309406280516, "step": 161510 }, { "epoch": 0.6934391180031426, "grad_norm": 0.6739466190338135, "learning_rate": 3.078869984391573e-05, "loss": 0.2486048936843872, "step": 161520 }, { "epoch": 0.6934820500931627, "grad_norm": 1.4415817260742188, "learning_rate": 3.078438812379811e-05, "loss": 0.35487942695617675, "step": 161530 }, { "epoch": 0.6935249821831826, "grad_norm": 0.013796456158161163, "learning_rate": 3.078007640368049e-05, "loss": 0.1332213282585144, "step": 161540 }, { "epoch": 0.6935679142732026, "grad_norm": 0.011534093879163265, "learning_rate": 3.0775764683562865e-05, "loss": 0.12506383657455444, "step": 161550 }, { "epoch": 0.6936108463632227, "grad_norm": 0.444654256105423, "learning_rate": 3.077145296344524e-05, "loss": 0.018143628537654877, "step": 161560 }, { "epoch": 0.6936537784532426, "grad_norm": 0.0323002003133297, "learning_rate": 3.076714124332761e-05, "loss": 0.12150543928146362, "step": 161570 }, { "epoch": 0.6936967105432627, "grad_norm": 0.0209288839250803, "learning_rate": 3.076282952320999e-05, "loss": 0.4645984172821045, "step": 161580 }, { "epoch": 0.6937396426332827, "grad_norm": 0.04016595706343651, "learning_rate": 3.075851780309237e-05, "loss": 0.21200146675109863, "step": 161590 }, { "epoch": 0.6937825747233026, "grad_norm": 2.0530660152435303, "learning_rate": 3.0754206082974745e-05, "loss": 0.24847280979156494, "step": 161600 }, { "epoch": 0.6938255068133227, "grad_norm": 0.0006301469402387738, "learning_rate": 3.0749894362857116e-05, "loss": 0.21240553855895997, "step": 161610 }, { "epoch": 0.6938684389033427, "grad_norm": 0.0044422089122235775, "learning_rate": 3.074558264273949e-05, "loss": 0.2668626070022583, "step": 161620 }, { "epoch": 0.6939113709933628, "grad_norm": 0.0022322386503219604, "learning_rate": 3.074127092262187e-05, "loss": 0.25018165111541746, "step": 161630 }, { "epoch": 0.6939543030833827, "grad_norm": 0.0028129578568041325, "learning_rate": 3.073695920250425e-05, "loss": 0.11671936511993408, "step": 161640 }, { "epoch": 0.6939972351734027, "grad_norm": 0.033121053129434586, "learning_rate": 3.0732647482386625e-05, "loss": 0.2637017726898193, "step": 161650 }, { "epoch": 0.6940401672634228, "grad_norm": 0.0012332494370639324, "learning_rate": 3.0728335762269e-05, "loss": 0.25103752613067626, "step": 161660 }, { "epoch": 0.6940830993534427, "grad_norm": 5.106101036071777, "learning_rate": 3.072402404215138e-05, "loss": 0.27963852882385254, "step": 161670 }, { "epoch": 0.6941260314434627, "grad_norm": 0.5057955384254456, "learning_rate": 3.071971232203376e-05, "loss": 0.22827911376953125, "step": 161680 }, { "epoch": 0.6941689635334828, "grad_norm": 2.2521936893463135, "learning_rate": 3.071540060191613e-05, "loss": 0.13674638271331788, "step": 161690 }, { "epoch": 0.6942118956235027, "grad_norm": 0.2217772901058197, "learning_rate": 3.0711088881798505e-05, "loss": 0.06654155850410462, "step": 161700 }, { "epoch": 0.6942548277135228, "grad_norm": 0.98880535364151, "learning_rate": 3.070677716168088e-05, "loss": 0.19069713354110718, "step": 161710 }, { "epoch": 0.6942977598035428, "grad_norm": 1.4045045375823975, "learning_rate": 3.070246544156326e-05, "loss": 0.18224425315856935, "step": 161720 }, { "epoch": 0.6943406918935627, "grad_norm": 0.017259785905480385, "learning_rate": 3.069815372144563e-05, "loss": 0.14208518266677855, "step": 161730 }, { "epoch": 0.6943836239835828, "grad_norm": 0.004367508925497532, "learning_rate": 3.069384200132801e-05, "loss": 0.1599483847618103, "step": 161740 }, { "epoch": 0.6944265560736028, "grad_norm": 0.0010708282934501767, "learning_rate": 3.0689530281210385e-05, "loss": 0.06576325893402099, "step": 161750 }, { "epoch": 0.6944694881636228, "grad_norm": 9.624493598937988, "learning_rate": 3.068521856109276e-05, "loss": 0.22715427875518798, "step": 161760 }, { "epoch": 0.6945124202536428, "grad_norm": 0.0009245334658771753, "learning_rate": 3.068090684097514e-05, "loss": 0.41392016410827637, "step": 161770 }, { "epoch": 0.6945553523436628, "grad_norm": 0.07319210469722748, "learning_rate": 3.067659512085752e-05, "loss": 0.1870087742805481, "step": 161780 }, { "epoch": 0.6945982844336828, "grad_norm": 0.010038390755653381, "learning_rate": 3.0672283400739894e-05, "loss": 0.4162933826446533, "step": 161790 }, { "epoch": 0.6946412165237028, "grad_norm": 1.0667264461517334, "learning_rate": 3.066797168062227e-05, "loss": 0.36954782009124754, "step": 161800 }, { "epoch": 0.6946841486137229, "grad_norm": 0.09390226751565933, "learning_rate": 3.066365996050464e-05, "loss": 0.08525258898735047, "step": 161810 }, { "epoch": 0.6947270807037428, "grad_norm": 1.0057406425476074, "learning_rate": 3.065934824038702e-05, "loss": 0.10366959571838379, "step": 161820 }, { "epoch": 0.6947700127937628, "grad_norm": 0.027829967439174652, "learning_rate": 3.06550365202694e-05, "loss": 0.13686974048614503, "step": 161830 }, { "epoch": 0.6948129448837829, "grad_norm": 0.48255249857902527, "learning_rate": 3.0650724800151774e-05, "loss": 0.2699114799499512, "step": 161840 }, { "epoch": 0.6948558769738028, "grad_norm": 0.5585467219352722, "learning_rate": 3.0646413080034145e-05, "loss": 0.16011971235275269, "step": 161850 }, { "epoch": 0.6948988090638228, "grad_norm": 0.003796887816861272, "learning_rate": 3.064210135991652e-05, "loss": 0.21151573657989503, "step": 161860 }, { "epoch": 0.6949417411538429, "grad_norm": 0.0015773115446791053, "learning_rate": 3.06377896397989e-05, "loss": 0.30381500720977783, "step": 161870 }, { "epoch": 0.6949846732438628, "grad_norm": 1.4874831438064575, "learning_rate": 3.0633477919681284e-05, "loss": 0.12889543771743775, "step": 161880 }, { "epoch": 0.6950276053338829, "grad_norm": 4.193164825439453, "learning_rate": 3.062916619956366e-05, "loss": 0.17410954236984252, "step": 161890 }, { "epoch": 0.6950705374239029, "grad_norm": 1.0373023748397827, "learning_rate": 3.062485447944603e-05, "loss": 0.3082738399505615, "step": 161900 }, { "epoch": 0.6951134695139228, "grad_norm": 0.0028177013155072927, "learning_rate": 3.062054275932841e-05, "loss": 0.3014000654220581, "step": 161910 }, { "epoch": 0.6951564016039429, "grad_norm": 0.821558952331543, "learning_rate": 3.0616231039210786e-05, "loss": 0.18940749168395996, "step": 161920 }, { "epoch": 0.6951993336939629, "grad_norm": 0.30685821175575256, "learning_rate": 3.0611919319093164e-05, "loss": 0.3193447828292847, "step": 161930 }, { "epoch": 0.6952422657839828, "grad_norm": 0.001213685842230916, "learning_rate": 3.0607607598975534e-05, "loss": 0.1531369924545288, "step": 161940 }, { "epoch": 0.6952851978740029, "grad_norm": 0.2140471488237381, "learning_rate": 3.060329587885791e-05, "loss": 0.254035210609436, "step": 161950 }, { "epoch": 0.6953281299640229, "grad_norm": 1.7509942054748535, "learning_rate": 3.059898415874029e-05, "loss": 0.23533205986022948, "step": 161960 }, { "epoch": 0.6953710620540429, "grad_norm": 2.06317400932312, "learning_rate": 3.0594672438622666e-05, "loss": 0.1791067361831665, "step": 161970 }, { "epoch": 0.6954139941440629, "grad_norm": 0.04516097903251648, "learning_rate": 3.059036071850504e-05, "loss": 0.17306737899780272, "step": 161980 }, { "epoch": 0.695456926234083, "grad_norm": 0.0025410563684999943, "learning_rate": 3.058604899838742e-05, "loss": 0.2030165672302246, "step": 161990 }, { "epoch": 0.6954998583241029, "grad_norm": 1.482028603553772, "learning_rate": 3.05817372782698e-05, "loss": 0.4031516551971436, "step": 162000 }, { "epoch": 0.6954998583241029, "eval_loss": 0.38559821248054504, "eval_runtime": 27.2501, "eval_samples_per_second": 3.67, "eval_steps_per_second": 3.67, "step": 162000 }, { "epoch": 0.6955427904141229, "grad_norm": 0.06098842993378639, "learning_rate": 3.0577425558152176e-05, "loss": 0.032109972834587094, "step": 162010 }, { "epoch": 0.695585722504143, "grad_norm": 0.001314207329414785, "learning_rate": 3.0573113838034546e-05, "loss": 0.42228131294250487, "step": 162020 }, { "epoch": 0.6956286545941629, "grad_norm": 0.01732785813510418, "learning_rate": 3.0568802117916924e-05, "loss": 0.25459225177764894, "step": 162030 }, { "epoch": 0.6956715866841829, "grad_norm": 6.963688850402832, "learning_rate": 3.05644903977993e-05, "loss": 0.23578083515167236, "step": 162040 }, { "epoch": 0.695714518774203, "grad_norm": 1.8500609397888184, "learning_rate": 3.056017867768168e-05, "loss": 0.08068374395370484, "step": 162050 }, { "epoch": 0.695757450864223, "grad_norm": 3.6719369888305664, "learning_rate": 3.055586695756405e-05, "loss": 0.2566834926605225, "step": 162060 }, { "epoch": 0.695800382954243, "grad_norm": 0.011841950006783009, "learning_rate": 3.0551555237446426e-05, "loss": 0.12200218439102173, "step": 162070 }, { "epoch": 0.695843315044263, "grad_norm": 0.5013278126716614, "learning_rate": 3.0547243517328804e-05, "loss": 0.11271430253982544, "step": 162080 }, { "epoch": 0.695886247134283, "grad_norm": 0.0028883975464850664, "learning_rate": 3.054293179721118e-05, "loss": 0.06581767201423645, "step": 162090 }, { "epoch": 0.695929179224303, "grad_norm": 0.05332833155989647, "learning_rate": 3.053862007709356e-05, "loss": 0.12176322937011719, "step": 162100 }, { "epoch": 0.695972111314323, "grad_norm": 0.029225388541817665, "learning_rate": 3.0534308356975936e-05, "loss": 0.29567220211029055, "step": 162110 }, { "epoch": 0.6960150434043431, "grad_norm": 0.010390534065663815, "learning_rate": 3.052999663685831e-05, "loss": 0.2578840732574463, "step": 162120 }, { "epoch": 0.696057975494363, "grad_norm": 2.247405767440796, "learning_rate": 3.052568491674069e-05, "loss": 0.461102819442749, "step": 162130 }, { "epoch": 0.696100907584383, "grad_norm": 0.010135611519217491, "learning_rate": 3.052137319662306e-05, "loss": 0.278436279296875, "step": 162140 }, { "epoch": 0.6961438396744031, "grad_norm": 0.01691882126033306, "learning_rate": 3.0517061476505438e-05, "loss": 0.11886712312698364, "step": 162150 }, { "epoch": 0.696186771764423, "grad_norm": 0.030704345554113388, "learning_rate": 3.0512749756387815e-05, "loss": 0.3023359775543213, "step": 162160 }, { "epoch": 0.696229703854443, "grad_norm": 0.002497435314580798, "learning_rate": 3.0508438036270193e-05, "loss": 0.22855567932128906, "step": 162170 }, { "epoch": 0.6962726359444631, "grad_norm": 0.034782905131578445, "learning_rate": 3.0504126316152563e-05, "loss": 0.06216605305671692, "step": 162180 }, { "epoch": 0.696315568034483, "grad_norm": 1.0647518634796143, "learning_rate": 3.049981459603494e-05, "loss": 0.1632169008255005, "step": 162190 }, { "epoch": 0.6963585001245031, "grad_norm": 0.20471031963825226, "learning_rate": 3.049550287591732e-05, "loss": 0.17721099853515626, "step": 162200 }, { "epoch": 0.6964014322145231, "grad_norm": 0.0030015218071639538, "learning_rate": 3.04911911557997e-05, "loss": 0.2211029052734375, "step": 162210 }, { "epoch": 0.696444364304543, "grad_norm": 1.2218440771102905, "learning_rate": 3.048687943568207e-05, "loss": 0.21433238983154296, "step": 162220 }, { "epoch": 0.6964872963945631, "grad_norm": 1.407080888748169, "learning_rate": 3.0482567715564447e-05, "loss": 0.23432455062866211, "step": 162230 }, { "epoch": 0.6965302284845831, "grad_norm": 0.1319386512041092, "learning_rate": 3.0478255995446824e-05, "loss": 0.17873740196228027, "step": 162240 }, { "epoch": 0.6965731605746031, "grad_norm": 0.0010449601104483008, "learning_rate": 3.04739442753292e-05, "loss": 0.14610795974731444, "step": 162250 }, { "epoch": 0.6966160926646231, "grad_norm": 1.0889495611190796, "learning_rate": 3.046963255521158e-05, "loss": 0.23132104873657228, "step": 162260 }, { "epoch": 0.6966590247546431, "grad_norm": 3.6569557189941406, "learning_rate": 3.0465320835093953e-05, "loss": 0.25479922294616697, "step": 162270 }, { "epoch": 0.6967019568446631, "grad_norm": 0.13837526738643646, "learning_rate": 3.046100911497633e-05, "loss": 0.023555827140808106, "step": 162280 }, { "epoch": 0.6967448889346831, "grad_norm": 0.11677303165197372, "learning_rate": 3.0456697394858707e-05, "loss": 0.035855191946029666, "step": 162290 }, { "epoch": 0.6967878210247032, "grad_norm": 0.03607289493083954, "learning_rate": 3.0452385674741085e-05, "loss": 0.3567406892776489, "step": 162300 }, { "epoch": 0.6968307531147231, "grad_norm": 0.022890016436576843, "learning_rate": 3.044807395462346e-05, "loss": 0.08604545593261718, "step": 162310 }, { "epoch": 0.6968736852047431, "grad_norm": 0.02981598488986492, "learning_rate": 3.0443762234505836e-05, "loss": 0.19815583229064943, "step": 162320 }, { "epoch": 0.6969166172947632, "grad_norm": 0.316649854183197, "learning_rate": 3.0439450514388213e-05, "loss": 0.1974259853363037, "step": 162330 }, { "epoch": 0.6969595493847831, "grad_norm": 2.9811651706695557, "learning_rate": 3.043513879427059e-05, "loss": 0.1774838924407959, "step": 162340 }, { "epoch": 0.6970024814748031, "grad_norm": 0.007071362342685461, "learning_rate": 3.043082707415296e-05, "loss": 0.09746151566505432, "step": 162350 }, { "epoch": 0.6970454135648232, "grad_norm": 4.316941738128662, "learning_rate": 3.042651535403534e-05, "loss": 0.30870752334594725, "step": 162360 }, { "epoch": 0.6970883456548431, "grad_norm": 0.0259851086884737, "learning_rate": 3.0422203633917716e-05, "loss": 0.2086500644683838, "step": 162370 }, { "epoch": 0.6971312777448632, "grad_norm": 0.08933248370885849, "learning_rate": 3.0417891913800097e-05, "loss": 0.30362553596496583, "step": 162380 }, { "epoch": 0.6971742098348832, "grad_norm": 0.010057899169623852, "learning_rate": 3.0413580193682467e-05, "loss": 0.11705089807510376, "step": 162390 }, { "epoch": 0.6972171419249031, "grad_norm": 0.01879618689417839, "learning_rate": 3.0409268473564845e-05, "loss": 0.3392336845397949, "step": 162400 }, { "epoch": 0.6972600740149232, "grad_norm": 74.72895050048828, "learning_rate": 3.0404956753447222e-05, "loss": 0.23954296112060547, "step": 162410 }, { "epoch": 0.6973030061049432, "grad_norm": 0.0661221295595169, "learning_rate": 3.04006450333296e-05, "loss": 0.3313016176223755, "step": 162420 }, { "epoch": 0.6973459381949632, "grad_norm": 3.2034785747528076, "learning_rate": 3.0396333313211973e-05, "loss": 0.202050518989563, "step": 162430 }, { "epoch": 0.6973888702849832, "grad_norm": 0.010310985147953033, "learning_rate": 3.039202159309435e-05, "loss": 0.3512110233306885, "step": 162440 }, { "epoch": 0.6974318023750032, "grad_norm": 3.286404848098755, "learning_rate": 3.0387709872976728e-05, "loss": 0.30471627712249755, "step": 162450 }, { "epoch": 0.6974747344650232, "grad_norm": 0.011127809062600136, "learning_rate": 3.0383398152859105e-05, "loss": 0.0021067624911665916, "step": 162460 }, { "epoch": 0.6975176665550432, "grad_norm": 0.2516888976097107, "learning_rate": 3.0379086432741476e-05, "loss": 0.18555980920791626, "step": 162470 }, { "epoch": 0.6975605986450633, "grad_norm": 4.618213653564453, "learning_rate": 3.0374774712623853e-05, "loss": 0.3885476112365723, "step": 162480 }, { "epoch": 0.6976035307350833, "grad_norm": 0.1642121523618698, "learning_rate": 3.0370462992506234e-05, "loss": 0.23192245960235597, "step": 162490 }, { "epoch": 0.6976464628251032, "grad_norm": 0.015001763589680195, "learning_rate": 3.036615127238861e-05, "loss": 0.2386481523513794, "step": 162500 }, { "epoch": 0.6976893949151233, "grad_norm": 0.03155812993645668, "learning_rate": 3.0361839552270982e-05, "loss": 0.20554816722869873, "step": 162510 }, { "epoch": 0.6977323270051433, "grad_norm": 0.04460495710372925, "learning_rate": 3.035752783215336e-05, "loss": 0.23647263050079345, "step": 162520 }, { "epoch": 0.6977752590951632, "grad_norm": 0.040947332978248596, "learning_rate": 3.0353216112035737e-05, "loss": 0.23655667304992675, "step": 162530 }, { "epoch": 0.6978181911851833, "grad_norm": 0.008029299788177013, "learning_rate": 3.0348904391918114e-05, "loss": 0.158699893951416, "step": 162540 }, { "epoch": 0.6978611232752033, "grad_norm": 0.03955543786287308, "learning_rate": 3.0344592671800488e-05, "loss": 0.09176256060600281, "step": 162550 }, { "epoch": 0.6979040553652233, "grad_norm": 0.03155898675322533, "learning_rate": 3.0340280951682865e-05, "loss": 0.2126624345779419, "step": 162560 }, { "epoch": 0.6979469874552433, "grad_norm": 0.03072783350944519, "learning_rate": 3.0335969231565242e-05, "loss": 0.21906230449676514, "step": 162570 }, { "epoch": 0.6979899195452633, "grad_norm": 0.2748047411441803, "learning_rate": 3.033165751144762e-05, "loss": 0.17269535064697267, "step": 162580 }, { "epoch": 0.6980328516352833, "grad_norm": 0.2810547947883606, "learning_rate": 3.032734579132999e-05, "loss": 0.21564557552337646, "step": 162590 }, { "epoch": 0.6980757837253033, "grad_norm": 2.9127063751220703, "learning_rate": 3.032303407121237e-05, "loss": 0.1324462652206421, "step": 162600 }, { "epoch": 0.6981187158153234, "grad_norm": 1.284143328666687, "learning_rate": 3.031872235109475e-05, "loss": 0.42905235290527344, "step": 162610 }, { "epoch": 0.6981616479053433, "grad_norm": 0.05410167947411537, "learning_rate": 3.0314410630977126e-05, "loss": 0.19309380054473876, "step": 162620 }, { "epoch": 0.6982045799953633, "grad_norm": 0.13504809141159058, "learning_rate": 3.0310098910859503e-05, "loss": 0.09252834916114808, "step": 162630 }, { "epoch": 0.6982475120853834, "grad_norm": 0.024992559105157852, "learning_rate": 3.0305787190741874e-05, "loss": 0.23650457859039306, "step": 162640 }, { "epoch": 0.6982904441754033, "grad_norm": 6.187001705169678, "learning_rate": 3.030147547062425e-05, "loss": 0.19541068077087403, "step": 162650 }, { "epoch": 0.6983333762654234, "grad_norm": 2.498851776123047, "learning_rate": 3.029716375050663e-05, "loss": 0.37836170196533203, "step": 162660 }, { "epoch": 0.6983763083554434, "grad_norm": 0.027705468237400055, "learning_rate": 3.0292852030389006e-05, "loss": 0.2952697277069092, "step": 162670 }, { "epoch": 0.6984192404454633, "grad_norm": 0.02884257212281227, "learning_rate": 3.028854031027138e-05, "loss": 0.14661755561828613, "step": 162680 }, { "epoch": 0.6984621725354834, "grad_norm": 0.21641422808170319, "learning_rate": 3.0284228590153757e-05, "loss": 0.2717902421951294, "step": 162690 }, { "epoch": 0.6985051046255034, "grad_norm": 1.3049355745315552, "learning_rate": 3.0279916870036134e-05, "loss": 0.17167997360229492, "step": 162700 }, { "epoch": 0.6985480367155233, "grad_norm": 1.1670953035354614, "learning_rate": 3.0275605149918512e-05, "loss": 0.23196604251861572, "step": 162710 }, { "epoch": 0.6985909688055434, "grad_norm": 0.0794382318854332, "learning_rate": 3.0271293429800886e-05, "loss": 0.2593263626098633, "step": 162720 }, { "epoch": 0.6986339008955634, "grad_norm": 4.211735248565674, "learning_rate": 3.0266981709683263e-05, "loss": 0.3013274669647217, "step": 162730 }, { "epoch": 0.6986768329855834, "grad_norm": 19.782581329345703, "learning_rate": 3.026266998956564e-05, "loss": 0.18726621866226195, "step": 162740 }, { "epoch": 0.6987197650756034, "grad_norm": 2.8106091022491455, "learning_rate": 3.0258358269448018e-05, "loss": 0.2839569330215454, "step": 162750 }, { "epoch": 0.6987626971656234, "grad_norm": 0.14473241567611694, "learning_rate": 3.0254046549330388e-05, "loss": 0.22602901458740235, "step": 162760 }, { "epoch": 0.6988056292556434, "grad_norm": 0.009015548974275589, "learning_rate": 3.0249734829212766e-05, "loss": 0.21210157871246338, "step": 162770 }, { "epoch": 0.6988485613456634, "grad_norm": 0.007358007598668337, "learning_rate": 3.0245423109095143e-05, "loss": 0.1613713264465332, "step": 162780 }, { "epoch": 0.6988914934356835, "grad_norm": 0.7762153744697571, "learning_rate": 3.0241111388977524e-05, "loss": 0.23849725723266602, "step": 162790 }, { "epoch": 0.6989344255257034, "grad_norm": 0.025210915133357048, "learning_rate": 3.0236799668859894e-05, "loss": 0.1408442735671997, "step": 162800 }, { "epoch": 0.6989773576157234, "grad_norm": 0.2695859372615814, "learning_rate": 3.023248794874227e-05, "loss": 0.20804314613342284, "step": 162810 }, { "epoch": 0.6990202897057435, "grad_norm": 0.34144577383995056, "learning_rate": 3.022817622862465e-05, "loss": 0.23827857971191407, "step": 162820 }, { "epoch": 0.6990632217957634, "grad_norm": 0.05939861387014389, "learning_rate": 3.0223864508507026e-05, "loss": 0.16040401458740233, "step": 162830 }, { "epoch": 0.6991061538857835, "grad_norm": 0.03952369838953018, "learning_rate": 3.02195527883894e-05, "loss": 0.04338504374027252, "step": 162840 }, { "epoch": 0.6991490859758035, "grad_norm": 0.06770238280296326, "learning_rate": 3.0215241068271778e-05, "loss": 0.35053086280822754, "step": 162850 }, { "epoch": 0.6991920180658234, "grad_norm": 0.04769308120012283, "learning_rate": 3.0210929348154155e-05, "loss": 0.17852011919021607, "step": 162860 }, { "epoch": 0.6992349501558435, "grad_norm": 0.1503300666809082, "learning_rate": 3.0206617628036532e-05, "loss": 0.17389656305313111, "step": 162870 }, { "epoch": 0.6992778822458635, "grad_norm": 23.386436462402344, "learning_rate": 3.0202305907918903e-05, "loss": 0.25399277210235593, "step": 162880 }, { "epoch": 0.6993208143358834, "grad_norm": 0.028125956654548645, "learning_rate": 3.019799418780128e-05, "loss": 0.08183012008666993, "step": 162890 }, { "epoch": 0.6993637464259035, "grad_norm": 0.020639831200242043, "learning_rate": 3.019368246768366e-05, "loss": 0.19469664096832276, "step": 162900 }, { "epoch": 0.6994066785159235, "grad_norm": 0.17118969559669495, "learning_rate": 3.0189370747566038e-05, "loss": 0.2829993724822998, "step": 162910 }, { "epoch": 0.6994496106059436, "grad_norm": 0.006604107096791267, "learning_rate": 3.018505902744841e-05, "loss": 0.3266103267669678, "step": 162920 }, { "epoch": 0.6994925426959635, "grad_norm": 0.004010102711617947, "learning_rate": 3.0180747307330786e-05, "loss": 0.11663607358932496, "step": 162930 }, { "epoch": 0.6995354747859835, "grad_norm": 23.561052322387695, "learning_rate": 3.0176435587213164e-05, "loss": 0.04907224178314209, "step": 162940 }, { "epoch": 0.6995784068760036, "grad_norm": 0.003062853356823325, "learning_rate": 3.017212386709554e-05, "loss": 0.06884866952896118, "step": 162950 }, { "epoch": 0.6996213389660235, "grad_norm": 1.0447180271148682, "learning_rate": 3.0167812146977918e-05, "loss": 0.23694450855255128, "step": 162960 }, { "epoch": 0.6996642710560436, "grad_norm": 0.02204667404294014, "learning_rate": 3.0163500426860292e-05, "loss": 0.11371309757232666, "step": 162970 }, { "epoch": 0.6997072031460636, "grad_norm": 1.6962567567825317, "learning_rate": 3.015918870674267e-05, "loss": 0.253522253036499, "step": 162980 }, { "epoch": 0.6997501352360835, "grad_norm": 2.769549608230591, "learning_rate": 3.0154876986625047e-05, "loss": 0.18700562715530394, "step": 162990 }, { "epoch": 0.6997930673261036, "grad_norm": 0.029071137309074402, "learning_rate": 3.0150565266507424e-05, "loss": 0.17749787569046022, "step": 163000 }, { "epoch": 0.6997930673261036, "eval_loss": 0.3839460015296936, "eval_runtime": 27.2931, "eval_samples_per_second": 3.664, "eval_steps_per_second": 3.664, "step": 163000 }, { "epoch": 0.6998359994161236, "grad_norm": 0.04307461529970169, "learning_rate": 3.0146253546389798e-05, "loss": 0.2530116319656372, "step": 163010 }, { "epoch": 0.6998789315061436, "grad_norm": 0.6724159717559814, "learning_rate": 3.0141941826272175e-05, "loss": 0.2557330846786499, "step": 163020 }, { "epoch": 0.6999218635961636, "grad_norm": 0.05646286904811859, "learning_rate": 3.0137630106154553e-05, "loss": 0.2898242473602295, "step": 163030 }, { "epoch": 0.6999647956861836, "grad_norm": 3.105821371078491, "learning_rate": 3.013331838603693e-05, "loss": 0.13781187534332276, "step": 163040 }, { "epoch": 0.7000077277762036, "grad_norm": 0.054522983729839325, "learning_rate": 3.01290066659193e-05, "loss": 0.37929842472076414, "step": 163050 }, { "epoch": 0.7000506598662236, "grad_norm": 0.006386724766343832, "learning_rate": 3.0124694945801678e-05, "loss": 0.3429419040679932, "step": 163060 }, { "epoch": 0.7000935919562437, "grad_norm": 0.09468874335289001, "learning_rate": 3.0120383225684055e-05, "loss": 0.26384339332580564, "step": 163070 }, { "epoch": 0.7001365240462636, "grad_norm": 1.3094730377197266, "learning_rate": 3.0116071505566433e-05, "loss": 0.17248300313949586, "step": 163080 }, { "epoch": 0.7001794561362836, "grad_norm": 0.6385529041290283, "learning_rate": 3.0111759785448807e-05, "loss": 0.15869448184967042, "step": 163090 }, { "epoch": 0.7002223882263037, "grad_norm": 0.006541683804243803, "learning_rate": 3.0107448065331184e-05, "loss": 0.2500231504440308, "step": 163100 }, { "epoch": 0.7002653203163236, "grad_norm": 0.0062957098707556725, "learning_rate": 3.010313634521356e-05, "loss": 0.14104583263397216, "step": 163110 }, { "epoch": 0.7003082524063436, "grad_norm": 0.039266157895326614, "learning_rate": 3.009882462509594e-05, "loss": 0.26926708221435547, "step": 163120 }, { "epoch": 0.7003511844963637, "grad_norm": 11.471166610717773, "learning_rate": 3.0094512904978313e-05, "loss": 0.2930644989013672, "step": 163130 }, { "epoch": 0.7003941165863836, "grad_norm": 5.168342590332031, "learning_rate": 3.009020118486069e-05, "loss": 0.3205289363861084, "step": 163140 }, { "epoch": 0.7004370486764037, "grad_norm": 0.34781506657600403, "learning_rate": 3.0085889464743067e-05, "loss": 0.17421427965164185, "step": 163150 }, { "epoch": 0.7004799807664237, "grad_norm": 0.009231721051037312, "learning_rate": 3.0081577744625445e-05, "loss": 0.22379372119903565, "step": 163160 }, { "epoch": 0.7005229128564436, "grad_norm": 2.9101412296295166, "learning_rate": 3.0077266024507815e-05, "loss": 0.2867826700210571, "step": 163170 }, { "epoch": 0.7005658449464637, "grad_norm": 0.15839970111846924, "learning_rate": 3.0072954304390193e-05, "loss": 0.2264240264892578, "step": 163180 }, { "epoch": 0.7006087770364837, "grad_norm": 0.0323498360812664, "learning_rate": 3.006864258427257e-05, "loss": 0.00971728190779686, "step": 163190 }, { "epoch": 0.7006517091265037, "grad_norm": 0.19851140677928925, "learning_rate": 3.006433086415495e-05, "loss": 0.25482997894287107, "step": 163200 }, { "epoch": 0.7006946412165237, "grad_norm": 0.19752870500087738, "learning_rate": 3.006001914403732e-05, "loss": 0.3481988668441772, "step": 163210 }, { "epoch": 0.7007375733065437, "grad_norm": 3.4968249797821045, "learning_rate": 3.00557074239197e-05, "loss": 0.29238917827606203, "step": 163220 }, { "epoch": 0.7007805053965637, "grad_norm": 0.020744258537888527, "learning_rate": 3.0051395703802076e-05, "loss": 0.06940485835075379, "step": 163230 }, { "epoch": 0.7008234374865837, "grad_norm": 0.6456360816955566, "learning_rate": 3.0047083983684453e-05, "loss": 0.42883834838867185, "step": 163240 }, { "epoch": 0.7008663695766038, "grad_norm": 1.1091424226760864, "learning_rate": 3.0042772263566827e-05, "loss": 0.16743401288986207, "step": 163250 }, { "epoch": 0.7009093016666237, "grad_norm": 0.24076159298419952, "learning_rate": 3.0038460543449205e-05, "loss": 0.11621925830841065, "step": 163260 }, { "epoch": 0.7009522337566437, "grad_norm": 0.013866151683032513, "learning_rate": 3.0034148823331582e-05, "loss": 0.330096960067749, "step": 163270 }, { "epoch": 0.7009951658466638, "grad_norm": 3.419060468673706, "learning_rate": 3.002983710321396e-05, "loss": 0.3908973693847656, "step": 163280 }, { "epoch": 0.7010380979366837, "grad_norm": 0.012446640059351921, "learning_rate": 3.002552538309633e-05, "loss": 0.34545049667358396, "step": 163290 }, { "epoch": 0.7010810300267037, "grad_norm": 1.4316107034683228, "learning_rate": 3.0021213662978707e-05, "loss": 0.22633516788482666, "step": 163300 }, { "epoch": 0.7011239621167238, "grad_norm": 0.06977680325508118, "learning_rate": 3.0016901942861088e-05, "loss": 0.004347345978021622, "step": 163310 }, { "epoch": 0.7011668942067437, "grad_norm": 1.5991872549057007, "learning_rate": 3.0012590222743465e-05, "loss": 0.12896416187286378, "step": 163320 }, { "epoch": 0.7012098262967638, "grad_norm": 1.6064382791519165, "learning_rate": 3.0008278502625843e-05, "loss": 0.45462779998779296, "step": 163330 }, { "epoch": 0.7012527583867838, "grad_norm": 2.761544942855835, "learning_rate": 3.0003966782508213e-05, "loss": 0.2084115743637085, "step": 163340 }, { "epoch": 0.7012956904768038, "grad_norm": 3.6172492504119873, "learning_rate": 2.999965506239059e-05, "loss": 0.20417706966400145, "step": 163350 }, { "epoch": 0.7013386225668238, "grad_norm": 47.53818893432617, "learning_rate": 2.9995343342272968e-05, "loss": 0.18387155532836913, "step": 163360 }, { "epoch": 0.7013815546568438, "grad_norm": 0.009035330265760422, "learning_rate": 2.9991031622155345e-05, "loss": 0.03579406440258026, "step": 163370 }, { "epoch": 0.7014244867468639, "grad_norm": 0.04270453378558159, "learning_rate": 2.998671990203772e-05, "loss": 0.1977940797805786, "step": 163380 }, { "epoch": 0.7014674188368838, "grad_norm": 1.1373462677001953, "learning_rate": 2.9982408181920097e-05, "loss": 0.18321282863616944, "step": 163390 }, { "epoch": 0.7015103509269038, "grad_norm": 0.001747044501826167, "learning_rate": 2.9978096461802474e-05, "loss": 0.21532964706420898, "step": 163400 }, { "epoch": 0.7015532830169239, "grad_norm": 5.351308345794678, "learning_rate": 2.997378474168485e-05, "loss": 0.22291159629821777, "step": 163410 }, { "epoch": 0.7015962151069438, "grad_norm": 2.718691349029541, "learning_rate": 2.9969473021567225e-05, "loss": 0.1568952441215515, "step": 163420 }, { "epoch": 0.7016391471969639, "grad_norm": 0.00798012875020504, "learning_rate": 2.9965161301449603e-05, "loss": 0.16636523008346557, "step": 163430 }, { "epoch": 0.7016820792869839, "grad_norm": 0.000884895445778966, "learning_rate": 2.996084958133198e-05, "loss": 0.032444655895233154, "step": 163440 }, { "epoch": 0.7017250113770038, "grad_norm": 31.404062271118164, "learning_rate": 2.9956537861214357e-05, "loss": 0.2867364645004272, "step": 163450 }, { "epoch": 0.7017679434670239, "grad_norm": 1.8071774244308472, "learning_rate": 2.9952226141096728e-05, "loss": 0.17658989429473876, "step": 163460 }, { "epoch": 0.7018108755570439, "grad_norm": 0.2980644702911377, "learning_rate": 2.9947914420979105e-05, "loss": 0.17964842319488525, "step": 163470 }, { "epoch": 0.7018538076470638, "grad_norm": 53.753273010253906, "learning_rate": 2.9943602700861482e-05, "loss": 0.22252166271209717, "step": 163480 }, { "epoch": 0.7018967397370839, "grad_norm": 8.756794929504395, "learning_rate": 2.9939290980743863e-05, "loss": 0.18342812061309816, "step": 163490 }, { "epoch": 0.7019396718271039, "grad_norm": 1.8001806735992432, "learning_rate": 2.9934979260626234e-05, "loss": 0.2673120737075806, "step": 163500 }, { "epoch": 0.7019826039171239, "grad_norm": 0.003850214881822467, "learning_rate": 2.993066754050861e-05, "loss": 0.2851763486862183, "step": 163510 }, { "epoch": 0.7020255360071439, "grad_norm": 0.010906051844358444, "learning_rate": 2.992635582039099e-05, "loss": 0.24270126819610596, "step": 163520 }, { "epoch": 0.702068468097164, "grad_norm": 1.0036766529083252, "learning_rate": 2.9922044100273366e-05, "loss": 0.19612650871276854, "step": 163530 }, { "epoch": 0.7021114001871839, "grad_norm": 0.01582266017794609, "learning_rate": 2.991773238015574e-05, "loss": 0.2539534568786621, "step": 163540 }, { "epoch": 0.7021543322772039, "grad_norm": 0.5005191564559937, "learning_rate": 2.9913420660038117e-05, "loss": 0.1888645648956299, "step": 163550 }, { "epoch": 0.702197264367224, "grad_norm": 0.14598479866981506, "learning_rate": 2.9909108939920494e-05, "loss": 0.3390333890914917, "step": 163560 }, { "epoch": 0.7022401964572439, "grad_norm": 1.5303585529327393, "learning_rate": 2.9904797219802872e-05, "loss": 0.4039153099060059, "step": 163570 }, { "epoch": 0.7022831285472639, "grad_norm": 0.0033237277530133724, "learning_rate": 2.9900485499685242e-05, "loss": 0.3125044822692871, "step": 163580 }, { "epoch": 0.702326060637284, "grad_norm": 0.015881020575761795, "learning_rate": 2.989617377956762e-05, "loss": 0.307576847076416, "step": 163590 }, { "epoch": 0.7023689927273039, "grad_norm": 0.028332769870758057, "learning_rate": 2.989186205945e-05, "loss": 0.1998500108718872, "step": 163600 }, { "epoch": 0.702411924817324, "grad_norm": 1.9299815893173218, "learning_rate": 2.9887550339332378e-05, "loss": 0.24326274394989014, "step": 163610 }, { "epoch": 0.702454856907344, "grad_norm": 0.03395046293735504, "learning_rate": 2.988323861921475e-05, "loss": 0.43087282180786135, "step": 163620 }, { "epoch": 0.7024977889973639, "grad_norm": 0.9291477799415588, "learning_rate": 2.9878926899097126e-05, "loss": 0.24743344783782958, "step": 163630 }, { "epoch": 0.702540721087384, "grad_norm": 4.53275203704834, "learning_rate": 2.9874615178979503e-05, "loss": 0.25386340618133546, "step": 163640 }, { "epoch": 0.702583653177404, "grad_norm": 0.40106382966041565, "learning_rate": 2.987030345886188e-05, "loss": 0.3923983573913574, "step": 163650 }, { "epoch": 0.7026265852674239, "grad_norm": 0.26700788736343384, "learning_rate": 2.9865991738744254e-05, "loss": 0.1837749719619751, "step": 163660 }, { "epoch": 0.702669517357444, "grad_norm": 1.212033987045288, "learning_rate": 2.986168001862663e-05, "loss": 0.412782096862793, "step": 163670 }, { "epoch": 0.702712449447464, "grad_norm": 1.8219190835952759, "learning_rate": 2.985736829850901e-05, "loss": 0.3716761350631714, "step": 163680 }, { "epoch": 0.702755381537484, "grad_norm": 0.006801880896091461, "learning_rate": 2.9853056578391386e-05, "loss": 0.19434766769409179, "step": 163690 }, { "epoch": 0.702798313627504, "grad_norm": 0.06828784197568893, "learning_rate": 2.9848744858273764e-05, "loss": 0.007297980785369873, "step": 163700 }, { "epoch": 0.702841245717524, "grad_norm": 0.14776858687400818, "learning_rate": 2.9844433138156138e-05, "loss": 0.1427057147026062, "step": 163710 }, { "epoch": 0.702884177807544, "grad_norm": 0.23514142632484436, "learning_rate": 2.9840121418038515e-05, "loss": 0.07293086647987365, "step": 163720 }, { "epoch": 0.702927109897564, "grad_norm": 4.044814109802246, "learning_rate": 2.9835809697920892e-05, "loss": 0.26663668155670167, "step": 163730 }, { "epoch": 0.7029700419875841, "grad_norm": 0.8512552380561829, "learning_rate": 2.983149797780327e-05, "loss": 0.14922486543655394, "step": 163740 }, { "epoch": 0.703012974077604, "grad_norm": 0.0908672958612442, "learning_rate": 2.982718625768564e-05, "loss": 0.15927162170410156, "step": 163750 }, { "epoch": 0.703055906167624, "grad_norm": 0.006831469014286995, "learning_rate": 2.9822874537568018e-05, "loss": 0.03169746696949005, "step": 163760 }, { "epoch": 0.7030988382576441, "grad_norm": 1.0652174949645996, "learning_rate": 2.9818562817450395e-05, "loss": 0.3184578657150269, "step": 163770 }, { "epoch": 0.7031417703476641, "grad_norm": 0.0035700947046279907, "learning_rate": 2.9814251097332772e-05, "loss": 0.06982010006904601, "step": 163780 }, { "epoch": 0.703184702437684, "grad_norm": 0.1739826202392578, "learning_rate": 2.9809939377215146e-05, "loss": 0.27461676597595214, "step": 163790 }, { "epoch": 0.7032276345277041, "grad_norm": 2.1455235481262207, "learning_rate": 2.9805627657097524e-05, "loss": 0.14346520900726317, "step": 163800 }, { "epoch": 0.7032705666177241, "grad_norm": 3.9834342002868652, "learning_rate": 2.98013159369799e-05, "loss": 0.22382516860961915, "step": 163810 }, { "epoch": 0.7033134987077441, "grad_norm": 1.1752283573150635, "learning_rate": 2.9797004216862278e-05, "loss": 0.17417526245117188, "step": 163820 }, { "epoch": 0.7033564307977641, "grad_norm": 0.03304734826087952, "learning_rate": 2.9792692496744652e-05, "loss": 0.19004673957824708, "step": 163830 }, { "epoch": 0.7033993628877842, "grad_norm": 0.0051128193736076355, "learning_rate": 2.978838077662703e-05, "loss": 0.18268777132034303, "step": 163840 }, { "epoch": 0.7034422949778041, "grad_norm": 0.16571545600891113, "learning_rate": 2.9784069056509407e-05, "loss": 0.17011449337005616, "step": 163850 }, { "epoch": 0.7034852270678241, "grad_norm": 3.0325210094451904, "learning_rate": 2.9779757336391784e-05, "loss": 0.38422060012817383, "step": 163860 }, { "epoch": 0.7035281591578442, "grad_norm": 1.36028254032135, "learning_rate": 2.9775445616274155e-05, "loss": 0.09718289971351624, "step": 163870 }, { "epoch": 0.7035710912478641, "grad_norm": 0.07002713531255722, "learning_rate": 2.9771133896156532e-05, "loss": 0.1471462607383728, "step": 163880 }, { "epoch": 0.7036140233378841, "grad_norm": 0.015247693285346031, "learning_rate": 2.976682217603891e-05, "loss": 0.22470765113830565, "step": 163890 }, { "epoch": 0.7036569554279042, "grad_norm": 3.642178535461426, "learning_rate": 2.976251045592129e-05, "loss": 0.12147370576858521, "step": 163900 }, { "epoch": 0.7036998875179241, "grad_norm": 0.21306264400482178, "learning_rate": 2.975819873580366e-05, "loss": 0.14276522397994995, "step": 163910 }, { "epoch": 0.7037428196079442, "grad_norm": 0.025422273203730583, "learning_rate": 2.9753887015686038e-05, "loss": 0.20691957473754882, "step": 163920 }, { "epoch": 0.7037857516979642, "grad_norm": 1.892135500907898, "learning_rate": 2.9749575295568415e-05, "loss": 0.3095592260360718, "step": 163930 }, { "epoch": 0.7038286837879841, "grad_norm": 0.9915587902069092, "learning_rate": 2.9745263575450793e-05, "loss": 0.16103274822235109, "step": 163940 }, { "epoch": 0.7038716158780042, "grad_norm": 0.003843039972707629, "learning_rate": 2.9740951855333167e-05, "loss": 0.05715287327766418, "step": 163950 }, { "epoch": 0.7039145479680242, "grad_norm": 0.7359153628349304, "learning_rate": 2.9736640135215544e-05, "loss": 0.11851122379302978, "step": 163960 }, { "epoch": 0.7039574800580441, "grad_norm": 0.02368875965476036, "learning_rate": 2.973232841509792e-05, "loss": 0.12844005823135377, "step": 163970 }, { "epoch": 0.7040004121480642, "grad_norm": 0.0908665657043457, "learning_rate": 2.97280166949803e-05, "loss": 0.20799777507781983, "step": 163980 }, { "epoch": 0.7040433442380842, "grad_norm": 31.04422378540039, "learning_rate": 2.972370497486267e-05, "loss": 0.18210945129394532, "step": 163990 }, { "epoch": 0.7040862763281042, "grad_norm": 0.05075189843773842, "learning_rate": 2.9719393254745047e-05, "loss": 0.09743213057518005, "step": 164000 }, { "epoch": 0.7040862763281042, "eval_loss": 0.3779441714286804, "eval_runtime": 27.1446, "eval_samples_per_second": 3.684, "eval_steps_per_second": 3.684, "step": 164000 }, { "epoch": 0.7041292084181242, "grad_norm": 1.5645118951797485, "learning_rate": 2.9715081534627427e-05, "loss": 0.25287203788757323, "step": 164010 }, { "epoch": 0.7041721405081443, "grad_norm": 0.00030088808853179216, "learning_rate": 2.9710769814509805e-05, "loss": 0.16287697553634645, "step": 164020 }, { "epoch": 0.7042150725981642, "grad_norm": 6.047826766967773, "learning_rate": 2.9706458094392175e-05, "loss": 0.20500681400299073, "step": 164030 }, { "epoch": 0.7042580046881842, "grad_norm": 0.08400680869817734, "learning_rate": 2.9702146374274553e-05, "loss": 0.29985120296478274, "step": 164040 }, { "epoch": 0.7043009367782043, "grad_norm": 0.006796732544898987, "learning_rate": 2.969783465415693e-05, "loss": 0.2870039463043213, "step": 164050 }, { "epoch": 0.7043438688682242, "grad_norm": 0.008046969771385193, "learning_rate": 2.9693522934039307e-05, "loss": 0.3308976888656616, "step": 164060 }, { "epoch": 0.7043868009582442, "grad_norm": 2.2312533855438232, "learning_rate": 2.9689211213921685e-05, "loss": 0.40290260314941406, "step": 164070 }, { "epoch": 0.7044297330482643, "grad_norm": 0.42959392070770264, "learning_rate": 2.968489949380406e-05, "loss": 0.06268092393875122, "step": 164080 }, { "epoch": 0.7044726651382842, "grad_norm": 0.004218485672026873, "learning_rate": 2.9680587773686436e-05, "loss": 0.2813071012496948, "step": 164090 }, { "epoch": 0.7045155972283043, "grad_norm": 0.001964650582522154, "learning_rate": 2.9676276053568813e-05, "loss": 0.28354315757751464, "step": 164100 }, { "epoch": 0.7045585293183243, "grad_norm": 0.9442742466926575, "learning_rate": 2.967196433345119e-05, "loss": 0.2996357917785645, "step": 164110 }, { "epoch": 0.7046014614083442, "grad_norm": 0.15573832392692566, "learning_rate": 2.9667652613333565e-05, "loss": 0.10072107315063476, "step": 164120 }, { "epoch": 0.7046443934983643, "grad_norm": 1.8645374774932861, "learning_rate": 2.9663340893215942e-05, "loss": 0.28838138580322265, "step": 164130 }, { "epoch": 0.7046873255883843, "grad_norm": 0.3522595167160034, "learning_rate": 2.965902917309832e-05, "loss": 0.2901811122894287, "step": 164140 }, { "epoch": 0.7047302576784042, "grad_norm": 7.661362648010254, "learning_rate": 2.9654717452980697e-05, "loss": 0.35709872245788576, "step": 164150 }, { "epoch": 0.7047731897684243, "grad_norm": 0.27700191736221313, "learning_rate": 2.9650405732863067e-05, "loss": 0.15413910150527954, "step": 164160 }, { "epoch": 0.7048161218584443, "grad_norm": 0.004466890823096037, "learning_rate": 2.9646094012745445e-05, "loss": 0.03817626535892486, "step": 164170 }, { "epoch": 0.7048590539484643, "grad_norm": 0.9056572318077087, "learning_rate": 2.9641782292627822e-05, "loss": 0.13778855800628662, "step": 164180 }, { "epoch": 0.7049019860384843, "grad_norm": 0.007168032694607973, "learning_rate": 2.96374705725102e-05, "loss": 0.1839970111846924, "step": 164190 }, { "epoch": 0.7049449181285043, "grad_norm": 0.019991997629404068, "learning_rate": 2.9633158852392573e-05, "loss": 0.26103713512420657, "step": 164200 }, { "epoch": 0.7049878502185244, "grad_norm": 0.290790319442749, "learning_rate": 2.962884713227495e-05, "loss": 0.24262683391571044, "step": 164210 }, { "epoch": 0.7050307823085443, "grad_norm": 7.077462196350098, "learning_rate": 2.9624535412157328e-05, "loss": 0.3445000648498535, "step": 164220 }, { "epoch": 0.7050737143985644, "grad_norm": 2.3658647537231445, "learning_rate": 2.9620223692039705e-05, "loss": 0.34368832111358644, "step": 164230 }, { "epoch": 0.7051166464885844, "grad_norm": 0.002537068212404847, "learning_rate": 2.961591197192208e-05, "loss": 0.1858936071395874, "step": 164240 }, { "epoch": 0.7051595785786043, "grad_norm": 0.002385231666266918, "learning_rate": 2.9611600251804457e-05, "loss": 0.22579362392425537, "step": 164250 }, { "epoch": 0.7052025106686244, "grad_norm": 1.203508734703064, "learning_rate": 2.9607288531686834e-05, "loss": 0.27742347717285154, "step": 164260 }, { "epoch": 0.7052454427586444, "grad_norm": 0.0387476347386837, "learning_rate": 2.960297681156921e-05, "loss": 0.3718020677566528, "step": 164270 }, { "epoch": 0.7052883748486644, "grad_norm": 0.006723400205373764, "learning_rate": 2.9598665091451582e-05, "loss": 0.10956581830978393, "step": 164280 }, { "epoch": 0.7053313069386844, "grad_norm": 4.690652370452881, "learning_rate": 2.959435337133396e-05, "loss": 0.24487228393554689, "step": 164290 }, { "epoch": 0.7053742390287044, "grad_norm": 0.00888009648770094, "learning_rate": 2.9590041651216336e-05, "loss": 0.06979911923408508, "step": 164300 }, { "epoch": 0.7054171711187244, "grad_norm": 1.0406445264816284, "learning_rate": 2.9585729931098717e-05, "loss": 0.2905172348022461, "step": 164310 }, { "epoch": 0.7054601032087444, "grad_norm": 0.5761302709579468, "learning_rate": 2.9581418210981088e-05, "loss": 0.34133856296539306, "step": 164320 }, { "epoch": 0.7055030352987645, "grad_norm": 0.08215140551328659, "learning_rate": 2.9577106490863465e-05, "loss": 0.31227014064788816, "step": 164330 }, { "epoch": 0.7055459673887844, "grad_norm": 7.857476234436035, "learning_rate": 2.9572794770745842e-05, "loss": 0.22481787204742432, "step": 164340 }, { "epoch": 0.7055888994788044, "grad_norm": 1.1321688890457153, "learning_rate": 2.956848305062822e-05, "loss": 0.161775803565979, "step": 164350 }, { "epoch": 0.7056318315688245, "grad_norm": 0.003534104209393263, "learning_rate": 2.9564171330510594e-05, "loss": 0.2884944677352905, "step": 164360 }, { "epoch": 0.7056747636588444, "grad_norm": 0.03924204409122467, "learning_rate": 2.955985961039297e-05, "loss": 0.23131952285766602, "step": 164370 }, { "epoch": 0.7057176957488644, "grad_norm": 0.055216234177351, "learning_rate": 2.955554789027535e-05, "loss": 0.09212759137153625, "step": 164380 }, { "epoch": 0.7057606278388845, "grad_norm": 1.711653709411621, "learning_rate": 2.9551236170157726e-05, "loss": 0.20180110931396483, "step": 164390 }, { "epoch": 0.7058035599289044, "grad_norm": 2.030550479888916, "learning_rate": 2.9546924450040096e-05, "loss": 0.30741727352142334, "step": 164400 }, { "epoch": 0.7058464920189245, "grad_norm": 0.010752196423709393, "learning_rate": 2.9542612729922474e-05, "loss": 0.09185991883277893, "step": 164410 }, { "epoch": 0.7058894241089445, "grad_norm": 0.0016691704513505101, "learning_rate": 2.9538301009804854e-05, "loss": 0.1287990927696228, "step": 164420 }, { "epoch": 0.7059323561989644, "grad_norm": 0.04343206062912941, "learning_rate": 2.9533989289687232e-05, "loss": 0.07160269618034362, "step": 164430 }, { "epoch": 0.7059752882889845, "grad_norm": 0.0019043717766180634, "learning_rate": 2.952967756956961e-05, "loss": 0.12070275545120239, "step": 164440 }, { "epoch": 0.7060182203790045, "grad_norm": 4.755942344665527, "learning_rate": 2.952536584945198e-05, "loss": 0.29552769660949707, "step": 164450 }, { "epoch": 0.7060611524690245, "grad_norm": 0.00850929506123066, "learning_rate": 2.9521054129334357e-05, "loss": 0.25412487983703613, "step": 164460 }, { "epoch": 0.7061040845590445, "grad_norm": 0.4611488878726959, "learning_rate": 2.9516742409216734e-05, "loss": 0.09284462332725525, "step": 164470 }, { "epoch": 0.7061470166490645, "grad_norm": 1.1242001056671143, "learning_rate": 2.9512430689099112e-05, "loss": 0.3816138744354248, "step": 164480 }, { "epoch": 0.7061899487390845, "grad_norm": 0.011605273000895977, "learning_rate": 2.9508118968981486e-05, "loss": 0.14563461542129516, "step": 164490 }, { "epoch": 0.7062328808291045, "grad_norm": 3.635684013366699, "learning_rate": 2.9503807248863863e-05, "loss": 0.39076852798461914, "step": 164500 }, { "epoch": 0.7062758129191246, "grad_norm": 0.005340252537280321, "learning_rate": 2.949949552874624e-05, "loss": 0.26224353313446047, "step": 164510 }, { "epoch": 0.7063187450091445, "grad_norm": 0.03700922802090645, "learning_rate": 2.9495183808628618e-05, "loss": 0.11134896278381348, "step": 164520 }, { "epoch": 0.7063616770991645, "grad_norm": 1.8511571884155273, "learning_rate": 2.949087208851099e-05, "loss": 0.17045905590057372, "step": 164530 }, { "epoch": 0.7064046091891846, "grad_norm": 0.010725017637014389, "learning_rate": 2.948656036839337e-05, "loss": 0.35302841663360596, "step": 164540 }, { "epoch": 0.7064475412792045, "grad_norm": 0.00597262941300869, "learning_rate": 2.9482248648275746e-05, "loss": 0.14913132190704345, "step": 164550 }, { "epoch": 0.7064904733692245, "grad_norm": 0.0034178998321294785, "learning_rate": 2.9477936928158124e-05, "loss": 0.23776566982269287, "step": 164560 }, { "epoch": 0.7065334054592446, "grad_norm": 0.009466869756579399, "learning_rate": 2.9473625208040494e-05, "loss": 0.14307208061218263, "step": 164570 }, { "epoch": 0.7065763375492645, "grad_norm": 0.08489704132080078, "learning_rate": 2.946931348792287e-05, "loss": 0.1949027180671692, "step": 164580 }, { "epoch": 0.7066192696392846, "grad_norm": 0.03956615552306175, "learning_rate": 2.946500176780525e-05, "loss": 0.13790332078933715, "step": 164590 }, { "epoch": 0.7066622017293046, "grad_norm": 0.038982268422842026, "learning_rate": 2.946069004768763e-05, "loss": 0.33584163188934324, "step": 164600 }, { "epoch": 0.7067051338193245, "grad_norm": 5.0026421546936035, "learning_rate": 2.945637832757e-05, "loss": 0.313620924949646, "step": 164610 }, { "epoch": 0.7067480659093446, "grad_norm": 1.726048469543457, "learning_rate": 2.9452066607452378e-05, "loss": 0.3042426109313965, "step": 164620 }, { "epoch": 0.7067909979993646, "grad_norm": 0.07862383872270584, "learning_rate": 2.9447754887334755e-05, "loss": 0.1545378804206848, "step": 164630 }, { "epoch": 0.7068339300893847, "grad_norm": 0.019337479025125504, "learning_rate": 2.9443443167217132e-05, "loss": 0.24584414958953857, "step": 164640 }, { "epoch": 0.7068768621794046, "grad_norm": 0.00637492910027504, "learning_rate": 2.9439131447099506e-05, "loss": 0.1771503210067749, "step": 164650 }, { "epoch": 0.7069197942694246, "grad_norm": 0.0154104707762599, "learning_rate": 2.9434819726981884e-05, "loss": 0.2208927631378174, "step": 164660 }, { "epoch": 0.7069627263594447, "grad_norm": 0.013810674659907818, "learning_rate": 2.943050800686426e-05, "loss": 0.07610102891921997, "step": 164670 }, { "epoch": 0.7070056584494646, "grad_norm": 0.026029305532574654, "learning_rate": 2.9426196286746638e-05, "loss": 0.16726453304290773, "step": 164680 }, { "epoch": 0.7070485905394847, "grad_norm": 1.9912234544754028, "learning_rate": 2.942188456662901e-05, "loss": 0.2336212396621704, "step": 164690 }, { "epoch": 0.7070915226295047, "grad_norm": 0.11577221751213074, "learning_rate": 2.9417572846511386e-05, "loss": 0.29100732803344725, "step": 164700 }, { "epoch": 0.7071344547195246, "grad_norm": 1.0063371658325195, "learning_rate": 2.9413261126393767e-05, "loss": 0.241862154006958, "step": 164710 }, { "epoch": 0.7071773868095447, "grad_norm": 1.699659824371338, "learning_rate": 2.9408949406276144e-05, "loss": 0.3465231657028198, "step": 164720 }, { "epoch": 0.7072203188995647, "grad_norm": 0.2365957647562027, "learning_rate": 2.9404637686158515e-05, "loss": 0.05553056001663208, "step": 164730 }, { "epoch": 0.7072632509895846, "grad_norm": 0.8947508931159973, "learning_rate": 2.9400325966040892e-05, "loss": 0.1890372157096863, "step": 164740 }, { "epoch": 0.7073061830796047, "grad_norm": 0.0036553533282130957, "learning_rate": 2.939601424592327e-05, "loss": 0.19252634048461914, "step": 164750 }, { "epoch": 0.7073491151696247, "grad_norm": 0.005008448846638203, "learning_rate": 2.9391702525805647e-05, "loss": 0.07748492956161498, "step": 164760 }, { "epoch": 0.7073920472596447, "grad_norm": 0.14192329347133636, "learning_rate": 2.938739080568802e-05, "loss": 0.006033249571919442, "step": 164770 }, { "epoch": 0.7074349793496647, "grad_norm": 8.749017715454102, "learning_rate": 2.9383079085570398e-05, "loss": 0.5570028305053711, "step": 164780 }, { "epoch": 0.7074779114396847, "grad_norm": 0.09582618623971939, "learning_rate": 2.9378767365452775e-05, "loss": 0.18232868909835814, "step": 164790 }, { "epoch": 0.7075208435297047, "grad_norm": 0.010088094510138035, "learning_rate": 2.9374455645335153e-05, "loss": 0.2660231590270996, "step": 164800 }, { "epoch": 0.7075637756197247, "grad_norm": 0.026392368599772453, "learning_rate": 2.937014392521753e-05, "loss": 0.0775251030921936, "step": 164810 }, { "epoch": 0.7076067077097448, "grad_norm": 0.1605832576751709, "learning_rate": 2.9365832205099904e-05, "loss": 0.08925783634185791, "step": 164820 }, { "epoch": 0.7076496397997647, "grad_norm": 0.0047328840009868145, "learning_rate": 2.936152048498228e-05, "loss": 0.3366368293762207, "step": 164830 }, { "epoch": 0.7076925718897847, "grad_norm": 0.08522465825080872, "learning_rate": 2.935720876486466e-05, "loss": 0.30194315910339353, "step": 164840 }, { "epoch": 0.7077355039798048, "grad_norm": 0.0030174916610121727, "learning_rate": 2.9352897044747036e-05, "loss": 0.0655434787273407, "step": 164850 }, { "epoch": 0.7077784360698247, "grad_norm": 2.143808603286743, "learning_rate": 2.9348585324629407e-05, "loss": 0.32743024826049805, "step": 164860 }, { "epoch": 0.7078213681598448, "grad_norm": 2.470440626144409, "learning_rate": 2.9344273604511784e-05, "loss": 0.22581710815429687, "step": 164870 }, { "epoch": 0.7078643002498648, "grad_norm": 0.007416535634547472, "learning_rate": 2.933996188439416e-05, "loss": 0.29442901611328126, "step": 164880 }, { "epoch": 0.7079072323398847, "grad_norm": 0.2843577265739441, "learning_rate": 2.933565016427654e-05, "loss": 0.21048979759216307, "step": 164890 }, { "epoch": 0.7079501644299048, "grad_norm": 0.0796031728386879, "learning_rate": 2.9331338444158913e-05, "loss": 0.09109994173049926, "step": 164900 }, { "epoch": 0.7079930965199248, "grad_norm": 0.017023451626300812, "learning_rate": 2.932702672404129e-05, "loss": 0.21580660343170166, "step": 164910 }, { "epoch": 0.7080360286099447, "grad_norm": 0.06896127760410309, "learning_rate": 2.9322715003923667e-05, "loss": 0.05191414952278137, "step": 164920 }, { "epoch": 0.7080789606999648, "grad_norm": 2.6581945419311523, "learning_rate": 2.9318403283806045e-05, "loss": 0.2914731025695801, "step": 164930 }, { "epoch": 0.7081218927899848, "grad_norm": 1.7635499238967896, "learning_rate": 2.931409156368842e-05, "loss": 0.2414705753326416, "step": 164940 }, { "epoch": 0.7081648248800048, "grad_norm": 0.1912590116262436, "learning_rate": 2.9309779843570796e-05, "loss": 0.14573431015014648, "step": 164950 }, { "epoch": 0.7082077569700248, "grad_norm": 0.06119944900274277, "learning_rate": 2.9305468123453173e-05, "loss": 0.22457616329193114, "step": 164960 }, { "epoch": 0.7082506890600448, "grad_norm": 3.581524133682251, "learning_rate": 2.930115640333555e-05, "loss": 0.37599098682403564, "step": 164970 }, { "epoch": 0.7082936211500648, "grad_norm": 0.3666439950466156, "learning_rate": 2.929684468321792e-05, "loss": 0.1905382513999939, "step": 164980 }, { "epoch": 0.7083365532400848, "grad_norm": 1.3603236675262451, "learning_rate": 2.92925329631003e-05, "loss": 0.314105224609375, "step": 164990 }, { "epoch": 0.7083794853301049, "grad_norm": 1.4144307374954224, "learning_rate": 2.9288221242982676e-05, "loss": 0.29831576347351074, "step": 165000 }, { "epoch": 0.7083794853301049, "eval_loss": 0.37844017148017883, "eval_runtime": 27.115, "eval_samples_per_second": 3.688, "eval_steps_per_second": 3.688, "step": 165000 }, { "epoch": 0.7084224174201248, "grad_norm": 2.283473253250122, "learning_rate": 2.9283909522865057e-05, "loss": 0.07131168842315674, "step": 165010 }, { "epoch": 0.7084653495101448, "grad_norm": 0.03843050077557564, "learning_rate": 2.9279597802747427e-05, "loss": 0.27201735973358154, "step": 165020 }, { "epoch": 0.7085082816001649, "grad_norm": 0.00798146240413189, "learning_rate": 2.9275286082629805e-05, "loss": 0.0913933277130127, "step": 165030 }, { "epoch": 0.7085512136901848, "grad_norm": 0.03475451096892357, "learning_rate": 2.9270974362512182e-05, "loss": 0.1362240195274353, "step": 165040 }, { "epoch": 0.7085941457802049, "grad_norm": 0.04747939482331276, "learning_rate": 2.926666264239456e-05, "loss": 0.07424071431159973, "step": 165050 }, { "epoch": 0.7086370778702249, "grad_norm": 0.11417294293642044, "learning_rate": 2.9262350922276933e-05, "loss": 0.07617428302764892, "step": 165060 }, { "epoch": 0.7086800099602449, "grad_norm": 0.034629106521606445, "learning_rate": 2.925803920215931e-05, "loss": 0.36686632633209226, "step": 165070 }, { "epoch": 0.7087229420502649, "grad_norm": 0.13259123265743256, "learning_rate": 2.9253727482041688e-05, "loss": 0.1826852798461914, "step": 165080 }, { "epoch": 0.7087658741402849, "grad_norm": 2.0491228103637695, "learning_rate": 2.9249415761924065e-05, "loss": 0.09049310684204101, "step": 165090 }, { "epoch": 0.708808806230305, "grad_norm": 0.0009569579851813614, "learning_rate": 2.9245104041806436e-05, "loss": 0.026717782020568848, "step": 165100 }, { "epoch": 0.7088517383203249, "grad_norm": 0.18775348365306854, "learning_rate": 2.9240792321688813e-05, "loss": 0.19738540649414063, "step": 165110 }, { "epoch": 0.7088946704103449, "grad_norm": 0.0023286493960767984, "learning_rate": 2.9236480601571194e-05, "loss": 0.22296249866485596, "step": 165120 }, { "epoch": 0.708937602500365, "grad_norm": 1.2228485345840454, "learning_rate": 2.923216888145357e-05, "loss": 0.19773554801940918, "step": 165130 }, { "epoch": 0.7089805345903849, "grad_norm": 0.07914045453071594, "learning_rate": 2.922785716133595e-05, "loss": 0.18020875453948976, "step": 165140 }, { "epoch": 0.709023466680405, "grad_norm": 0.04731407389044762, "learning_rate": 2.922354544121832e-05, "loss": 0.09294969439506531, "step": 165150 }, { "epoch": 0.709066398770425, "grad_norm": 1.9331458806991577, "learning_rate": 2.9219233721100696e-05, "loss": 0.15570180416107177, "step": 165160 }, { "epoch": 0.7091093308604449, "grad_norm": 0.001042968942783773, "learning_rate": 2.9214922000983074e-05, "loss": 0.18585129976272582, "step": 165170 }, { "epoch": 0.709152262950465, "grad_norm": 0.039528731256723404, "learning_rate": 2.921061028086545e-05, "loss": 0.1763898491859436, "step": 165180 }, { "epoch": 0.709195195040485, "grad_norm": 0.2774352729320526, "learning_rate": 2.9206298560747825e-05, "loss": 0.38413708209991454, "step": 165190 }, { "epoch": 0.7092381271305049, "grad_norm": 0.30526450276374817, "learning_rate": 2.9201986840630202e-05, "loss": 0.09335086345672608, "step": 165200 }, { "epoch": 0.709281059220525, "grad_norm": 0.009867934510111809, "learning_rate": 2.919767512051258e-05, "loss": 0.09516159892082214, "step": 165210 }, { "epoch": 0.709323991310545, "grad_norm": 1.502676010131836, "learning_rate": 2.9193363400394957e-05, "loss": 0.3276165723800659, "step": 165220 }, { "epoch": 0.709366923400565, "grad_norm": 0.23743776977062225, "learning_rate": 2.918905168027733e-05, "loss": 0.29257543087005616, "step": 165230 }, { "epoch": 0.709409855490585, "grad_norm": 0.02360161580145359, "learning_rate": 2.918473996015971e-05, "loss": 0.2478564500808716, "step": 165240 }, { "epoch": 0.709452787580605, "grad_norm": 0.02930424176156521, "learning_rate": 2.9180428240042086e-05, "loss": 0.049641406536102294, "step": 165250 }, { "epoch": 0.709495719670625, "grad_norm": 1.6508783102035522, "learning_rate": 2.9176116519924463e-05, "loss": 0.269071102142334, "step": 165260 }, { "epoch": 0.709538651760645, "grad_norm": 1.4661870002746582, "learning_rate": 2.9171804799806834e-05, "loss": 0.33371987342834475, "step": 165270 }, { "epoch": 0.709581583850665, "grad_norm": 4.903928756713867, "learning_rate": 2.916749307968921e-05, "loss": 0.3156555652618408, "step": 165280 }, { "epoch": 0.709624515940685, "grad_norm": 0.002806802047416568, "learning_rate": 2.916318135957159e-05, "loss": 0.2036449432373047, "step": 165290 }, { "epoch": 0.709667448030705, "grad_norm": 3.3624939918518066, "learning_rate": 2.9158869639453966e-05, "loss": 0.3532872200012207, "step": 165300 }, { "epoch": 0.7097103801207251, "grad_norm": 1.2719906568527222, "learning_rate": 2.915455791933634e-05, "loss": 0.3986253023147583, "step": 165310 }, { "epoch": 0.709753312210745, "grad_norm": 1.8043235540390015, "learning_rate": 2.9150246199218717e-05, "loss": 0.0949524998664856, "step": 165320 }, { "epoch": 0.709796244300765, "grad_norm": 0.05128329247236252, "learning_rate": 2.9145934479101094e-05, "loss": 0.09327793717384339, "step": 165330 }, { "epoch": 0.7098391763907851, "grad_norm": 0.5460872650146484, "learning_rate": 2.9141622758983472e-05, "loss": 0.3752920389175415, "step": 165340 }, { "epoch": 0.709882108480805, "grad_norm": 0.19574710726737976, "learning_rate": 2.9137311038865846e-05, "loss": 0.20312044620513917, "step": 165350 }, { "epoch": 0.7099250405708251, "grad_norm": 47.620269775390625, "learning_rate": 2.9132999318748223e-05, "loss": 0.19353847503662108, "step": 165360 }, { "epoch": 0.7099679726608451, "grad_norm": 0.05232425034046173, "learning_rate": 2.91286875986306e-05, "loss": 0.08786578178405761, "step": 165370 }, { "epoch": 0.710010904750865, "grad_norm": 2.8413853645324707, "learning_rate": 2.9124375878512978e-05, "loss": 0.3481892108917236, "step": 165380 }, { "epoch": 0.7100538368408851, "grad_norm": 0.023912513628602028, "learning_rate": 2.9120064158395348e-05, "loss": 0.10534037351608276, "step": 165390 }, { "epoch": 0.7100967689309051, "grad_norm": 0.3025215268135071, "learning_rate": 2.9115752438277726e-05, "loss": 0.14979887008666992, "step": 165400 }, { "epoch": 0.710139701020925, "grad_norm": 0.001987382536754012, "learning_rate": 2.9111440718160103e-05, "loss": 0.1420094847679138, "step": 165410 }, { "epoch": 0.7101826331109451, "grad_norm": 2.2093968391418457, "learning_rate": 2.9107128998042484e-05, "loss": 0.35331382751464846, "step": 165420 }, { "epoch": 0.7102255652009651, "grad_norm": 1.1964010000228882, "learning_rate": 2.9102817277924854e-05, "loss": 0.09625420570373536, "step": 165430 }, { "epoch": 0.7102684972909851, "grad_norm": 0.9831269979476929, "learning_rate": 2.909850555780723e-05, "loss": 0.3276521682739258, "step": 165440 }, { "epoch": 0.7103114293810051, "grad_norm": 15.865203857421875, "learning_rate": 2.909419383768961e-05, "loss": 0.16975079774856566, "step": 165450 }, { "epoch": 0.7103543614710252, "grad_norm": 0.058152079582214355, "learning_rate": 2.9089882117571986e-05, "loss": 0.18160277605056763, "step": 165460 }, { "epoch": 0.7103972935610451, "grad_norm": 0.013438289985060692, "learning_rate": 2.908557039745436e-05, "loss": 0.2488858461380005, "step": 165470 }, { "epoch": 0.7104402256510651, "grad_norm": 1.8803297281265259, "learning_rate": 2.9081258677336738e-05, "loss": 0.12351391315460206, "step": 165480 }, { "epoch": 0.7104831577410852, "grad_norm": 1.87388277053833, "learning_rate": 2.9076946957219115e-05, "loss": 0.386000919342041, "step": 165490 }, { "epoch": 0.7105260898311052, "grad_norm": 1.3773218393325806, "learning_rate": 2.9072635237101492e-05, "loss": 0.2971598386764526, "step": 165500 }, { "epoch": 0.7105690219211251, "grad_norm": 3.765920877456665, "learning_rate": 2.906832351698387e-05, "loss": 0.22443222999572754, "step": 165510 }, { "epoch": 0.7106119540111452, "grad_norm": 0.012355818413197994, "learning_rate": 2.906401179686624e-05, "loss": 0.29129648208618164, "step": 165520 }, { "epoch": 0.7106548861011652, "grad_norm": 0.0010706628672778606, "learning_rate": 2.905970007674862e-05, "loss": 0.00830424502491951, "step": 165530 }, { "epoch": 0.7106978181911852, "grad_norm": 0.15785139799118042, "learning_rate": 2.9055388356630998e-05, "loss": 0.12713322639465333, "step": 165540 }, { "epoch": 0.7107407502812052, "grad_norm": 0.6758171319961548, "learning_rate": 2.9051076636513376e-05, "loss": 0.09500041007995605, "step": 165550 }, { "epoch": 0.7107836823712252, "grad_norm": 1.2952488660812378, "learning_rate": 2.9046764916395746e-05, "loss": 0.19718393087387084, "step": 165560 }, { "epoch": 0.7108266144612452, "grad_norm": 0.0009880687575787306, "learning_rate": 2.9042453196278124e-05, "loss": 0.19167803525924682, "step": 165570 }, { "epoch": 0.7108695465512652, "grad_norm": 1.7319165468215942, "learning_rate": 2.90381414761605e-05, "loss": 0.314529275894165, "step": 165580 }, { "epoch": 0.7109124786412853, "grad_norm": 3.060222625732422, "learning_rate": 2.9033829756042878e-05, "loss": 0.1463207483291626, "step": 165590 }, { "epoch": 0.7109554107313052, "grad_norm": 0.038725271821022034, "learning_rate": 2.9029518035925252e-05, "loss": 0.004723150655627251, "step": 165600 }, { "epoch": 0.7109983428213252, "grad_norm": 0.01974060758948326, "learning_rate": 2.902520631580763e-05, "loss": 0.05358384251594543, "step": 165610 }, { "epoch": 0.7110412749113453, "grad_norm": 0.04186383634805679, "learning_rate": 2.9020894595690007e-05, "loss": 0.40696163177490235, "step": 165620 }, { "epoch": 0.7110842070013652, "grad_norm": 0.003584572346881032, "learning_rate": 2.9016582875572384e-05, "loss": 0.06248798370361328, "step": 165630 }, { "epoch": 0.7111271390913853, "grad_norm": 0.0991358608007431, "learning_rate": 2.9012271155454758e-05, "loss": 0.17465416193008423, "step": 165640 }, { "epoch": 0.7111700711814053, "grad_norm": 6.179220676422119, "learning_rate": 2.9007959435337135e-05, "loss": 0.09142645001411438, "step": 165650 }, { "epoch": 0.7112130032714252, "grad_norm": 1.848460078239441, "learning_rate": 2.9003647715219513e-05, "loss": 0.34997422695159913, "step": 165660 }, { "epoch": 0.7112559353614453, "grad_norm": 0.0017918201629072428, "learning_rate": 2.899933599510189e-05, "loss": 0.2364954710006714, "step": 165670 }, { "epoch": 0.7112988674514653, "grad_norm": 0.001793616102077067, "learning_rate": 2.899502427498426e-05, "loss": 0.41597414016723633, "step": 165680 }, { "epoch": 0.7113417995414852, "grad_norm": 5.766404628753662, "learning_rate": 2.8990712554866638e-05, "loss": 0.2759052038192749, "step": 165690 }, { "epoch": 0.7113847316315053, "grad_norm": 7.550206184387207, "learning_rate": 2.8986400834749015e-05, "loss": 0.4413724422454834, "step": 165700 }, { "epoch": 0.7114276637215253, "grad_norm": 0.03798282518982887, "learning_rate": 2.8982089114631393e-05, "loss": 0.25815589427948, "step": 165710 }, { "epoch": 0.7114705958115453, "grad_norm": 1.352439284324646, "learning_rate": 2.8977777394513767e-05, "loss": 0.32106575965881345, "step": 165720 }, { "epoch": 0.7115135279015653, "grad_norm": 0.06647659838199615, "learning_rate": 2.8973465674396144e-05, "loss": 0.1770651936531067, "step": 165730 }, { "epoch": 0.7115564599915853, "grad_norm": 0.002012177137658, "learning_rate": 2.896915395427852e-05, "loss": 0.0860788345336914, "step": 165740 }, { "epoch": 0.7115993920816053, "grad_norm": 0.09346366673707962, "learning_rate": 2.89648422341609e-05, "loss": 0.3049763679504395, "step": 165750 }, { "epoch": 0.7116423241716253, "grad_norm": 0.029406042769551277, "learning_rate": 2.8960530514043273e-05, "loss": 0.12645034790039061, "step": 165760 }, { "epoch": 0.7116852562616454, "grad_norm": 0.024466650560498238, "learning_rate": 2.895621879392565e-05, "loss": 0.1472999095916748, "step": 165770 }, { "epoch": 0.7117281883516653, "grad_norm": 1.3930857181549072, "learning_rate": 2.8951907073808027e-05, "loss": 0.11919002532958985, "step": 165780 }, { "epoch": 0.7117711204416853, "grad_norm": 0.050165556371212006, "learning_rate": 2.8947595353690405e-05, "loss": 0.09300928115844727, "step": 165790 }, { "epoch": 0.7118140525317054, "grad_norm": 0.05946086719632149, "learning_rate": 2.8943283633572775e-05, "loss": 0.21979830265045167, "step": 165800 }, { "epoch": 0.7118569846217253, "grad_norm": 0.49015337228775024, "learning_rate": 2.8938971913455153e-05, "loss": 0.14640580415725707, "step": 165810 }, { "epoch": 0.7118999167117454, "grad_norm": 0.01640521176159382, "learning_rate": 2.893466019333753e-05, "loss": 0.26482203006744387, "step": 165820 }, { "epoch": 0.7119428488017654, "grad_norm": 0.7002899050712585, "learning_rate": 2.893034847321991e-05, "loss": 0.19143946170806886, "step": 165830 }, { "epoch": 0.7119857808917853, "grad_norm": 3.6404550075531006, "learning_rate": 2.892603675310228e-05, "loss": 0.2002251148223877, "step": 165840 }, { "epoch": 0.7120287129818054, "grad_norm": 1.8261346817016602, "learning_rate": 2.892172503298466e-05, "loss": 0.16150326728820802, "step": 165850 }, { "epoch": 0.7120716450718254, "grad_norm": 0.020639711990952492, "learning_rate": 2.8917413312867036e-05, "loss": 0.3025749683380127, "step": 165860 }, { "epoch": 0.7121145771618453, "grad_norm": 2.7759740352630615, "learning_rate": 2.8913101592749413e-05, "loss": 0.24660980701446533, "step": 165870 }, { "epoch": 0.7121575092518654, "grad_norm": 0.053507398813962936, "learning_rate": 2.890878987263179e-05, "loss": 0.1749052047729492, "step": 165880 }, { "epoch": 0.7122004413418854, "grad_norm": 2.1792287826538086, "learning_rate": 2.8904478152514165e-05, "loss": 0.2586753129959106, "step": 165890 }, { "epoch": 0.7122433734319054, "grad_norm": 0.00457863649353385, "learning_rate": 2.8900166432396542e-05, "loss": 0.24560930728912353, "step": 165900 }, { "epoch": 0.7122863055219254, "grad_norm": 5.7979912757873535, "learning_rate": 2.889585471227892e-05, "loss": 0.08135615587234497, "step": 165910 }, { "epoch": 0.7123292376119454, "grad_norm": 0.07997957617044449, "learning_rate": 2.8891542992161297e-05, "loss": 0.14295884370803832, "step": 165920 }, { "epoch": 0.7123721697019655, "grad_norm": 0.05502615496516228, "learning_rate": 2.8887231272043667e-05, "loss": 0.15717185735702516, "step": 165930 }, { "epoch": 0.7124151017919854, "grad_norm": 0.10012836754322052, "learning_rate": 2.8882919551926048e-05, "loss": 0.09158955216407776, "step": 165940 }, { "epoch": 0.7124580338820055, "grad_norm": 0.03289559483528137, "learning_rate": 2.8878607831808425e-05, "loss": 0.17381430864334108, "step": 165950 }, { "epoch": 0.7125009659720255, "grad_norm": 8.307626724243164, "learning_rate": 2.8874296111690803e-05, "loss": 0.293519926071167, "step": 165960 }, { "epoch": 0.7125438980620454, "grad_norm": 0.008939497172832489, "learning_rate": 2.8869984391573173e-05, "loss": 0.009337369352579117, "step": 165970 }, { "epoch": 0.7125868301520655, "grad_norm": 2.4467132091522217, "learning_rate": 2.886567267145555e-05, "loss": 0.31867618560791017, "step": 165980 }, { "epoch": 0.7126297622420855, "grad_norm": 6.164371967315674, "learning_rate": 2.8861360951337928e-05, "loss": 0.3132692098617554, "step": 165990 }, { "epoch": 0.7126726943321054, "grad_norm": 0.002331892494112253, "learning_rate": 2.8857049231220305e-05, "loss": 0.08976657390594482, "step": 166000 }, { "epoch": 0.7126726943321054, "eval_loss": 0.3927363455295563, "eval_runtime": 27.1077, "eval_samples_per_second": 3.689, "eval_steps_per_second": 3.689, "step": 166000 }, { "epoch": 0.7127156264221255, "grad_norm": 0.13965459167957306, "learning_rate": 2.885273751110268e-05, "loss": 0.19368773698806763, "step": 166010 }, { "epoch": 0.7127585585121455, "grad_norm": 0.08737999945878983, "learning_rate": 2.8848425790985057e-05, "loss": 0.2531174421310425, "step": 166020 }, { "epoch": 0.7128014906021655, "grad_norm": 0.1749696582555771, "learning_rate": 2.8844114070867434e-05, "loss": 0.18520824909210204, "step": 166030 }, { "epoch": 0.7128444226921855, "grad_norm": 0.03882657736539841, "learning_rate": 2.883980235074981e-05, "loss": 0.270503306388855, "step": 166040 }, { "epoch": 0.7128873547822056, "grad_norm": 2.385289192199707, "learning_rate": 2.8835490630632185e-05, "loss": 0.21199922561645507, "step": 166050 }, { "epoch": 0.7129302868722255, "grad_norm": 0.26386263966560364, "learning_rate": 2.8831178910514562e-05, "loss": 0.07489157319068909, "step": 166060 }, { "epoch": 0.7129732189622455, "grad_norm": 0.4560233950614929, "learning_rate": 2.882686719039694e-05, "loss": 0.13342225551605225, "step": 166070 }, { "epoch": 0.7130161510522656, "grad_norm": 0.0036526834592223167, "learning_rate": 2.8822555470279317e-05, "loss": 0.15702961683273314, "step": 166080 }, { "epoch": 0.7130590831422855, "grad_norm": 1.793468952178955, "learning_rate": 2.8818243750161688e-05, "loss": 0.31508579254150393, "step": 166090 }, { "epoch": 0.7131020152323055, "grad_norm": 1.5543371438980103, "learning_rate": 2.8813932030044065e-05, "loss": 0.10623435974121094, "step": 166100 }, { "epoch": 0.7131449473223256, "grad_norm": 2.2663276195526123, "learning_rate": 2.8809620309926442e-05, "loss": 0.28469769954681395, "step": 166110 }, { "epoch": 0.7131878794123455, "grad_norm": 2.3514790534973145, "learning_rate": 2.8805308589808823e-05, "loss": 0.28321788311004636, "step": 166120 }, { "epoch": 0.7132308115023656, "grad_norm": 0.03398464620113373, "learning_rate": 2.8800996869691194e-05, "loss": 0.18461548089981078, "step": 166130 }, { "epoch": 0.7132737435923856, "grad_norm": 1.426744818687439, "learning_rate": 2.879668514957357e-05, "loss": 0.2839787244796753, "step": 166140 }, { "epoch": 0.7133166756824055, "grad_norm": 0.770117998123169, "learning_rate": 2.879237342945595e-05, "loss": 0.20739870071411132, "step": 166150 }, { "epoch": 0.7133596077724256, "grad_norm": 0.002078356221318245, "learning_rate": 2.8788061709338326e-05, "loss": 0.1516830563545227, "step": 166160 }, { "epoch": 0.7134025398624456, "grad_norm": 0.013807138428092003, "learning_rate": 2.87837499892207e-05, "loss": 0.10811216831207275, "step": 166170 }, { "epoch": 0.7134454719524655, "grad_norm": 2.072737216949463, "learning_rate": 2.8779438269103077e-05, "loss": 0.16044397354125978, "step": 166180 }, { "epoch": 0.7134884040424856, "grad_norm": 0.0021444791927933693, "learning_rate": 2.8775126548985454e-05, "loss": 0.16015149354934693, "step": 166190 }, { "epoch": 0.7135313361325056, "grad_norm": 0.3627009689807892, "learning_rate": 2.8770814828867832e-05, "loss": 0.09903419613838196, "step": 166200 }, { "epoch": 0.7135742682225256, "grad_norm": 0.26428207755088806, "learning_rate": 2.8766503108750202e-05, "loss": 0.07487475872039795, "step": 166210 }, { "epoch": 0.7136172003125456, "grad_norm": 0.0420398935675621, "learning_rate": 2.876219138863258e-05, "loss": 0.017874059081077576, "step": 166220 }, { "epoch": 0.7136601324025657, "grad_norm": 0.0023280770983546972, "learning_rate": 2.875787966851496e-05, "loss": 0.04525628387928009, "step": 166230 }, { "epoch": 0.7137030644925856, "grad_norm": 0.670574426651001, "learning_rate": 2.8753567948397338e-05, "loss": 0.2393495559692383, "step": 166240 }, { "epoch": 0.7137459965826056, "grad_norm": 0.6433839797973633, "learning_rate": 2.8749256228279715e-05, "loss": 0.19196040630340577, "step": 166250 }, { "epoch": 0.7137889286726257, "grad_norm": 1.1744970083236694, "learning_rate": 2.8744944508162086e-05, "loss": 0.17194961309432982, "step": 166260 }, { "epoch": 0.7138318607626456, "grad_norm": 0.07919386029243469, "learning_rate": 2.8740632788044463e-05, "loss": 0.3788050651550293, "step": 166270 }, { "epoch": 0.7138747928526656, "grad_norm": 0.006081217434257269, "learning_rate": 2.873632106792684e-05, "loss": 0.2530497074127197, "step": 166280 }, { "epoch": 0.7139177249426857, "grad_norm": 0.006908032111823559, "learning_rate": 2.8732009347809218e-05, "loss": 0.1852771282196045, "step": 166290 }, { "epoch": 0.7139606570327056, "grad_norm": 8.477197647094727, "learning_rate": 2.872769762769159e-05, "loss": 0.18325682878494262, "step": 166300 }, { "epoch": 0.7140035891227257, "grad_norm": 0.004288307391107082, "learning_rate": 2.872338590757397e-05, "loss": 0.31393725872039796, "step": 166310 }, { "epoch": 0.7140465212127457, "grad_norm": 0.6741523742675781, "learning_rate": 2.8719074187456346e-05, "loss": 0.08937577605247497, "step": 166320 }, { "epoch": 0.7140894533027656, "grad_norm": 0.008980872109532356, "learning_rate": 2.8714762467338724e-05, "loss": 0.26209454536437987, "step": 166330 }, { "epoch": 0.7141323853927857, "grad_norm": 7.205010414123535, "learning_rate": 2.8710450747221098e-05, "loss": 0.30361227989196776, "step": 166340 }, { "epoch": 0.7141753174828057, "grad_norm": 0.00631917966529727, "learning_rate": 2.8706139027103475e-05, "loss": 0.060678571462631226, "step": 166350 }, { "epoch": 0.7142182495728258, "grad_norm": 3.5636608600616455, "learning_rate": 2.8701827306985852e-05, "loss": 0.20951218605041505, "step": 166360 }, { "epoch": 0.7142611816628457, "grad_norm": 4.244123935699463, "learning_rate": 2.869751558686823e-05, "loss": 0.0760481059551239, "step": 166370 }, { "epoch": 0.7143041137528657, "grad_norm": 0.03286031633615494, "learning_rate": 2.86932038667506e-05, "loss": 0.0744013249874115, "step": 166380 }, { "epoch": 0.7143470458428858, "grad_norm": 0.016204357147216797, "learning_rate": 2.8688892146632978e-05, "loss": 0.0846706509590149, "step": 166390 }, { "epoch": 0.7143899779329057, "grad_norm": 0.04532900080084801, "learning_rate": 2.8684580426515355e-05, "loss": 0.1527009963989258, "step": 166400 }, { "epoch": 0.7144329100229257, "grad_norm": 0.5035681128501892, "learning_rate": 2.8680268706397732e-05, "loss": 0.44489264488220215, "step": 166410 }, { "epoch": 0.7144758421129458, "grad_norm": 0.1924525499343872, "learning_rate": 2.8675956986280106e-05, "loss": 0.15166373252868653, "step": 166420 }, { "epoch": 0.7145187742029657, "grad_norm": 0.011877103708684444, "learning_rate": 2.8671645266162484e-05, "loss": 0.062304210662841794, "step": 166430 }, { "epoch": 0.7145617062929858, "grad_norm": 0.0012645673705264926, "learning_rate": 2.866733354604486e-05, "loss": 0.15559821128845214, "step": 166440 }, { "epoch": 0.7146046383830058, "grad_norm": 0.17878887057304382, "learning_rate": 2.8663021825927238e-05, "loss": 0.33294291496276857, "step": 166450 }, { "epoch": 0.7146475704730257, "grad_norm": 2.671096086502075, "learning_rate": 2.8658710105809612e-05, "loss": 0.5303155422210694, "step": 166460 }, { "epoch": 0.7146905025630458, "grad_norm": 0.07435199618339539, "learning_rate": 2.865439838569199e-05, "loss": 0.12739841938018798, "step": 166470 }, { "epoch": 0.7147334346530658, "grad_norm": 0.1150457039475441, "learning_rate": 2.8650086665574367e-05, "loss": 0.21583008766174316, "step": 166480 }, { "epoch": 0.7147763667430858, "grad_norm": 0.08370175957679749, "learning_rate": 2.8645774945456744e-05, "loss": 0.07304016947746277, "step": 166490 }, { "epoch": 0.7148192988331058, "grad_norm": 0.4779481887817383, "learning_rate": 2.8641463225339115e-05, "loss": 0.07035287618637084, "step": 166500 }, { "epoch": 0.7148622309231258, "grad_norm": 0.027747897431254387, "learning_rate": 2.8637151505221492e-05, "loss": 0.15281134843826294, "step": 166510 }, { "epoch": 0.7149051630131458, "grad_norm": 1.3348201513290405, "learning_rate": 2.863283978510387e-05, "loss": 0.23680105209350585, "step": 166520 }, { "epoch": 0.7149480951031658, "grad_norm": 2.2081685066223145, "learning_rate": 2.862852806498625e-05, "loss": 0.20437359809875488, "step": 166530 }, { "epoch": 0.7149910271931859, "grad_norm": 0.09051236510276794, "learning_rate": 2.862421634486862e-05, "loss": 0.11743265390396118, "step": 166540 }, { "epoch": 0.7150339592832058, "grad_norm": 0.0025285957381129265, "learning_rate": 2.8619904624750998e-05, "loss": 0.10214493274688721, "step": 166550 }, { "epoch": 0.7150768913732258, "grad_norm": 0.006848174147307873, "learning_rate": 2.8615592904633375e-05, "loss": 0.09545931220054626, "step": 166560 }, { "epoch": 0.7151198234632459, "grad_norm": 2.7628366947174072, "learning_rate": 2.8611281184515753e-05, "loss": 0.37620038986206056, "step": 166570 }, { "epoch": 0.7151627555532658, "grad_norm": 0.002312214346602559, "learning_rate": 2.8606969464398127e-05, "loss": 0.3035972356796265, "step": 166580 }, { "epoch": 0.7152056876432858, "grad_norm": 0.008850247599184513, "learning_rate": 2.8602657744280504e-05, "loss": 0.21454875469207763, "step": 166590 }, { "epoch": 0.7152486197333059, "grad_norm": 3.854825973510742, "learning_rate": 2.859834602416288e-05, "loss": 0.29339096546173093, "step": 166600 }, { "epoch": 0.7152915518233258, "grad_norm": 0.004344264045357704, "learning_rate": 2.859403430404526e-05, "loss": 0.2869076728820801, "step": 166610 }, { "epoch": 0.7153344839133459, "grad_norm": 1.5101947784423828, "learning_rate": 2.8589722583927636e-05, "loss": 0.26342000961303713, "step": 166620 }, { "epoch": 0.7153774160033659, "grad_norm": 0.17990095913410187, "learning_rate": 2.8585410863810007e-05, "loss": 0.2827131986618042, "step": 166630 }, { "epoch": 0.7154203480933858, "grad_norm": 0.019368786364793777, "learning_rate": 2.8581099143692387e-05, "loss": 0.16794705390930176, "step": 166640 }, { "epoch": 0.7154632801834059, "grad_norm": 0.004532191436737776, "learning_rate": 2.8576787423574765e-05, "loss": 0.24334888458251952, "step": 166650 }, { "epoch": 0.7155062122734259, "grad_norm": 0.006376985460519791, "learning_rate": 2.8572475703457142e-05, "loss": 0.2586236000061035, "step": 166660 }, { "epoch": 0.7155491443634459, "grad_norm": 0.15656788647174835, "learning_rate": 2.8568163983339513e-05, "loss": 0.23142714500427247, "step": 166670 }, { "epoch": 0.7155920764534659, "grad_norm": 0.019105859100818634, "learning_rate": 2.856385226322189e-05, "loss": 0.18950977325439453, "step": 166680 }, { "epoch": 0.7156350085434859, "grad_norm": 0.009699470363557339, "learning_rate": 2.8559540543104267e-05, "loss": 0.4047813892364502, "step": 166690 }, { "epoch": 0.7156779406335059, "grad_norm": 0.13277097046375275, "learning_rate": 2.8555228822986645e-05, "loss": 0.06402959227561951, "step": 166700 }, { "epoch": 0.7157208727235259, "grad_norm": 2.5981411933898926, "learning_rate": 2.855091710286902e-05, "loss": 0.36272387504577636, "step": 166710 }, { "epoch": 0.715763804813546, "grad_norm": 0.5918976068496704, "learning_rate": 2.8546605382751396e-05, "loss": 0.22860181331634521, "step": 166720 }, { "epoch": 0.7158067369035659, "grad_norm": 0.04410221800208092, "learning_rate": 2.8542293662633773e-05, "loss": 0.24485437870025634, "step": 166730 }, { "epoch": 0.7158496689935859, "grad_norm": 0.009131097234785557, "learning_rate": 2.853798194251615e-05, "loss": 0.060953164100646974, "step": 166740 }, { "epoch": 0.715892601083606, "grad_norm": 0.011584590189158916, "learning_rate": 2.8533670222398525e-05, "loss": 0.28687229156494143, "step": 166750 }, { "epoch": 0.7159355331736259, "grad_norm": 3.6411337852478027, "learning_rate": 2.8529358502280902e-05, "loss": 0.3922334432601929, "step": 166760 }, { "epoch": 0.715978465263646, "grad_norm": 0.13134698569774628, "learning_rate": 2.852504678216328e-05, "loss": 0.17828294038772582, "step": 166770 }, { "epoch": 0.716021397353666, "grad_norm": 0.009131759405136108, "learning_rate": 2.8520735062045657e-05, "loss": 0.2095499277114868, "step": 166780 }, { "epoch": 0.716064329443686, "grad_norm": 0.007369739469140768, "learning_rate": 2.8516423341928027e-05, "loss": 0.19556423425674438, "step": 166790 }, { "epoch": 0.716107261533706, "grad_norm": 1.3228790760040283, "learning_rate": 2.8512111621810405e-05, "loss": 0.14940618276596068, "step": 166800 }, { "epoch": 0.716150193623726, "grad_norm": 2.1382687091827393, "learning_rate": 2.8507799901692782e-05, "loss": 0.08745411634445191, "step": 166810 }, { "epoch": 0.716193125713746, "grad_norm": 2.0986907482147217, "learning_rate": 2.850348818157516e-05, "loss": 0.18943517208099364, "step": 166820 }, { "epoch": 0.716236057803766, "grad_norm": 0.01945311203598976, "learning_rate": 2.8499176461457533e-05, "loss": 0.2173239231109619, "step": 166830 }, { "epoch": 0.716278989893786, "grad_norm": 0.037285711616277695, "learning_rate": 2.849486474133991e-05, "loss": 0.06609262228012085, "step": 166840 }, { "epoch": 0.7163219219838061, "grad_norm": 5.157741546630859, "learning_rate": 2.8490553021222288e-05, "loss": 0.22957515716552734, "step": 166850 }, { "epoch": 0.716364854073826, "grad_norm": 0.5119006037712097, "learning_rate": 2.8486241301104665e-05, "loss": 0.1565529465675354, "step": 166860 }, { "epoch": 0.716407786163846, "grad_norm": 0.3631255030632019, "learning_rate": 2.848192958098704e-05, "loss": 0.050896257162094116, "step": 166870 }, { "epoch": 0.7164507182538661, "grad_norm": 2.3672053813934326, "learning_rate": 2.8477617860869417e-05, "loss": 0.24461157321929933, "step": 166880 }, { "epoch": 0.716493650343886, "grad_norm": 0.41185131669044495, "learning_rate": 2.8473306140751794e-05, "loss": 0.24481289386749266, "step": 166890 }, { "epoch": 0.716536582433906, "grad_norm": 0.0012526832288131118, "learning_rate": 2.846899442063417e-05, "loss": 0.007355506718158722, "step": 166900 }, { "epoch": 0.7165795145239261, "grad_norm": 1.2811726331710815, "learning_rate": 2.8464682700516542e-05, "loss": 0.10153491497039795, "step": 166910 }, { "epoch": 0.716622446613946, "grad_norm": 0.5895275473594666, "learning_rate": 2.846037098039892e-05, "loss": 0.18279372453689574, "step": 166920 }, { "epoch": 0.7166653787039661, "grad_norm": 0.0027312906458973885, "learning_rate": 2.8456059260281296e-05, "loss": 0.19183812141418458, "step": 166930 }, { "epoch": 0.7167083107939861, "grad_norm": 0.29518353939056396, "learning_rate": 2.8451747540163677e-05, "loss": 0.05575355291366577, "step": 166940 }, { "epoch": 0.716751242884006, "grad_norm": 0.8758601546287537, "learning_rate": 2.8447435820046048e-05, "loss": 0.22281897068023682, "step": 166950 }, { "epoch": 0.7167941749740261, "grad_norm": 0.1582789421081543, "learning_rate": 2.8443124099928425e-05, "loss": 0.16287997961044312, "step": 166960 }, { "epoch": 0.7168371070640461, "grad_norm": 4.892996311187744, "learning_rate": 2.8438812379810802e-05, "loss": 0.4806065082550049, "step": 166970 }, { "epoch": 0.7168800391540661, "grad_norm": 0.44628673791885376, "learning_rate": 2.843450065969318e-05, "loss": 0.26183133125305175, "step": 166980 }, { "epoch": 0.7169229712440861, "grad_norm": 0.02352777309715748, "learning_rate": 2.8430188939575557e-05, "loss": 0.2532444953918457, "step": 166990 }, { "epoch": 0.7169659033341061, "grad_norm": 0.03716174140572548, "learning_rate": 2.842587721945793e-05, "loss": 0.08634965419769287, "step": 167000 }, { "epoch": 0.7169659033341061, "eval_loss": 0.3958474099636078, "eval_runtime": 27.1467, "eval_samples_per_second": 3.684, "eval_steps_per_second": 3.684, "step": 167000 }, { "epoch": 0.7170088354241261, "grad_norm": 0.0032118239905685186, "learning_rate": 2.842156549934031e-05, "loss": 0.10042769908905029, "step": 167010 }, { "epoch": 0.7170517675141461, "grad_norm": 0.02116282656788826, "learning_rate": 2.8417253779222686e-05, "loss": 0.12349098920822144, "step": 167020 }, { "epoch": 0.7170946996041662, "grad_norm": 0.1639430820941925, "learning_rate": 2.8412942059105063e-05, "loss": 0.4242201328277588, "step": 167030 }, { "epoch": 0.7171376316941861, "grad_norm": 0.9448293447494507, "learning_rate": 2.8408630338987434e-05, "loss": 0.2647895574569702, "step": 167040 }, { "epoch": 0.7171805637842061, "grad_norm": 0.019060397520661354, "learning_rate": 2.8404318618869814e-05, "loss": 0.27194387912750245, "step": 167050 }, { "epoch": 0.7172234958742262, "grad_norm": 1.0550918579101562, "learning_rate": 2.8400006898752192e-05, "loss": 0.02531105875968933, "step": 167060 }, { "epoch": 0.7172664279642461, "grad_norm": 0.12561658024787903, "learning_rate": 2.839569517863457e-05, "loss": 0.19044885635375977, "step": 167070 }, { "epoch": 0.7173093600542662, "grad_norm": 5.141531944274902, "learning_rate": 2.839138345851694e-05, "loss": 0.3282967805862427, "step": 167080 }, { "epoch": 0.7173522921442862, "grad_norm": 0.06956620514392853, "learning_rate": 2.8387071738399317e-05, "loss": 0.12983059883117676, "step": 167090 }, { "epoch": 0.7173952242343061, "grad_norm": 0.19691401720046997, "learning_rate": 2.8382760018281694e-05, "loss": 0.20943801403045653, "step": 167100 }, { "epoch": 0.7174381563243262, "grad_norm": 0.09076624363660812, "learning_rate": 2.837844829816407e-05, "loss": 0.17342876195907592, "step": 167110 }, { "epoch": 0.7174810884143462, "grad_norm": 0.10245295614004135, "learning_rate": 2.8374136578046446e-05, "loss": 0.06132156252861023, "step": 167120 }, { "epoch": 0.7175240205043661, "grad_norm": 1.7344605922698975, "learning_rate": 2.8369824857928823e-05, "loss": 0.2766872882843018, "step": 167130 }, { "epoch": 0.7175669525943862, "grad_norm": 0.07946517318487167, "learning_rate": 2.83655131378112e-05, "loss": 0.1935239315032959, "step": 167140 }, { "epoch": 0.7176098846844062, "grad_norm": 0.002123386599123478, "learning_rate": 2.8361201417693578e-05, "loss": 0.3540096998214722, "step": 167150 }, { "epoch": 0.7176528167744262, "grad_norm": 0.0639406368136406, "learning_rate": 2.835688969757595e-05, "loss": 0.2834771633148193, "step": 167160 }, { "epoch": 0.7176957488644462, "grad_norm": 7.1676177978515625, "learning_rate": 2.835257797745833e-05, "loss": 0.14616410732269286, "step": 167170 }, { "epoch": 0.7177386809544662, "grad_norm": 0.38563501834869385, "learning_rate": 2.8348266257340706e-05, "loss": 0.22452714443206787, "step": 167180 }, { "epoch": 0.7177816130444862, "grad_norm": 0.043906208127737045, "learning_rate": 2.8343954537223084e-05, "loss": 0.10431742668151855, "step": 167190 }, { "epoch": 0.7178245451345062, "grad_norm": 0.1342371553182602, "learning_rate": 2.8339642817105454e-05, "loss": 0.20088469982147217, "step": 167200 }, { "epoch": 0.7178674772245263, "grad_norm": 0.13105660676956177, "learning_rate": 2.833533109698783e-05, "loss": 0.19219967126846313, "step": 167210 }, { "epoch": 0.7179104093145463, "grad_norm": 0.023251548409461975, "learning_rate": 2.833101937687021e-05, "loss": 0.14668450355529786, "step": 167220 }, { "epoch": 0.7179533414045662, "grad_norm": 0.038987189531326294, "learning_rate": 2.832670765675259e-05, "loss": 0.22458341121673583, "step": 167230 }, { "epoch": 0.7179962734945863, "grad_norm": 1.144334077835083, "learning_rate": 2.832239593663496e-05, "loss": 0.412811803817749, "step": 167240 }, { "epoch": 0.7180392055846063, "grad_norm": 1.0596046447753906, "learning_rate": 2.8318084216517338e-05, "loss": 0.29309654235839844, "step": 167250 }, { "epoch": 0.7180821376746263, "grad_norm": 0.005872590467333794, "learning_rate": 2.8313772496399715e-05, "loss": 0.17906821966171266, "step": 167260 }, { "epoch": 0.7181250697646463, "grad_norm": 0.005974804516881704, "learning_rate": 2.8309460776282092e-05, "loss": 0.05527445077896118, "step": 167270 }, { "epoch": 0.7181680018546663, "grad_norm": 0.6632001399993896, "learning_rate": 2.8305149056164466e-05, "loss": 0.16342580318450928, "step": 167280 }, { "epoch": 0.7182109339446863, "grad_norm": 1.1039843559265137, "learning_rate": 2.8300837336046844e-05, "loss": 0.1806264877319336, "step": 167290 }, { "epoch": 0.7182538660347063, "grad_norm": 3.905797243118286, "learning_rate": 2.829652561592922e-05, "loss": 0.24257402420043944, "step": 167300 }, { "epoch": 0.7182967981247264, "grad_norm": 1.0418668985366821, "learning_rate": 2.8292213895811598e-05, "loss": 0.2584467887878418, "step": 167310 }, { "epoch": 0.7183397302147463, "grad_norm": 0.009823929518461227, "learning_rate": 2.828790217569397e-05, "loss": 0.21770334243774414, "step": 167320 }, { "epoch": 0.7183826623047663, "grad_norm": 1.0369503498077393, "learning_rate": 2.8283590455576346e-05, "loss": 0.20086488723754883, "step": 167330 }, { "epoch": 0.7184255943947864, "grad_norm": 0.02220025099813938, "learning_rate": 2.8279278735458727e-05, "loss": 0.006128740310668945, "step": 167340 }, { "epoch": 0.7184685264848063, "grad_norm": 0.035444118082523346, "learning_rate": 2.8274967015341104e-05, "loss": 0.09826570153236389, "step": 167350 }, { "epoch": 0.7185114585748263, "grad_norm": 0.006482461001724005, "learning_rate": 2.827065529522348e-05, "loss": 0.14989885091781616, "step": 167360 }, { "epoch": 0.7185543906648464, "grad_norm": 7.3291401863098145, "learning_rate": 2.8266343575105852e-05, "loss": 0.30905053615570066, "step": 167370 }, { "epoch": 0.7185973227548663, "grad_norm": 3.8639028072357178, "learning_rate": 2.826203185498823e-05, "loss": 0.13532700538635253, "step": 167380 }, { "epoch": 0.7186402548448864, "grad_norm": 0.2876453101634979, "learning_rate": 2.8257720134870607e-05, "loss": 0.1640407085418701, "step": 167390 }, { "epoch": 0.7186831869349064, "grad_norm": 1.3986937999725342, "learning_rate": 2.8253408414752984e-05, "loss": 0.3686722755432129, "step": 167400 }, { "epoch": 0.7187261190249263, "grad_norm": 0.004579016473144293, "learning_rate": 2.8249096694635358e-05, "loss": 0.2243633270263672, "step": 167410 }, { "epoch": 0.7187690511149464, "grad_norm": 1.4218645095825195, "learning_rate": 2.8244784974517735e-05, "loss": 0.07126256227493286, "step": 167420 }, { "epoch": 0.7188119832049664, "grad_norm": 0.06639666855335236, "learning_rate": 2.8240473254400113e-05, "loss": 0.2880155086517334, "step": 167430 }, { "epoch": 0.7188549152949864, "grad_norm": 1.2099025249481201, "learning_rate": 2.823616153428249e-05, "loss": 0.22116010189056395, "step": 167440 }, { "epoch": 0.7188978473850064, "grad_norm": 0.001034542452543974, "learning_rate": 2.8231849814164864e-05, "loss": 0.18301440477371217, "step": 167450 }, { "epoch": 0.7189407794750264, "grad_norm": 0.07826003432273865, "learning_rate": 2.822753809404724e-05, "loss": 0.19326547384262086, "step": 167460 }, { "epoch": 0.7189837115650464, "grad_norm": 1.5844489336013794, "learning_rate": 2.822322637392962e-05, "loss": 0.3147656202316284, "step": 167470 }, { "epoch": 0.7190266436550664, "grad_norm": 0.7816833257675171, "learning_rate": 2.8218914653811996e-05, "loss": 0.17998864650726318, "step": 167480 }, { "epoch": 0.7190695757450865, "grad_norm": 0.0279255211353302, "learning_rate": 2.8214602933694367e-05, "loss": 0.11451849937438965, "step": 167490 }, { "epoch": 0.7191125078351064, "grad_norm": 0.011592227965593338, "learning_rate": 2.8210291213576744e-05, "loss": 0.02958979904651642, "step": 167500 }, { "epoch": 0.7191554399251264, "grad_norm": 0.1158149316906929, "learning_rate": 2.820597949345912e-05, "loss": 0.18526514768600463, "step": 167510 }, { "epoch": 0.7191983720151465, "grad_norm": 0.02158307656645775, "learning_rate": 2.82016677733415e-05, "loss": 0.3040745735168457, "step": 167520 }, { "epoch": 0.7192413041051664, "grad_norm": 0.6742632985115051, "learning_rate": 2.8197356053223873e-05, "loss": 0.15425585508346557, "step": 167530 }, { "epoch": 0.7192842361951864, "grad_norm": 1.8113670349121094, "learning_rate": 2.819304433310625e-05, "loss": 0.21069693565368652, "step": 167540 }, { "epoch": 0.7193271682852065, "grad_norm": 0.0016236762749031186, "learning_rate": 2.8188732612988627e-05, "loss": 0.1839777112007141, "step": 167550 }, { "epoch": 0.7193701003752264, "grad_norm": 0.3076116144657135, "learning_rate": 2.8184420892871005e-05, "loss": 0.16991562843322755, "step": 167560 }, { "epoch": 0.7194130324652465, "grad_norm": 0.15978385508060455, "learning_rate": 2.818010917275338e-05, "loss": 0.09455273747444153, "step": 167570 }, { "epoch": 0.7194559645552665, "grad_norm": 0.010344245471060276, "learning_rate": 2.8175797452635756e-05, "loss": 0.09267536401748658, "step": 167580 }, { "epoch": 0.7194988966452864, "grad_norm": 0.026899434626102448, "learning_rate": 2.8171485732518133e-05, "loss": 0.1246342420578003, "step": 167590 }, { "epoch": 0.7195418287353065, "grad_norm": 0.18134640157222748, "learning_rate": 2.816717401240051e-05, "loss": 0.11649966239929199, "step": 167600 }, { "epoch": 0.7195847608253265, "grad_norm": 0.008635690435767174, "learning_rate": 2.816286229228288e-05, "loss": 0.24684834480285645, "step": 167610 }, { "epoch": 0.7196276929153465, "grad_norm": 0.0058225602842867374, "learning_rate": 2.815855057216526e-05, "loss": 0.21973233222961425, "step": 167620 }, { "epoch": 0.7196706250053665, "grad_norm": 1.9177488088607788, "learning_rate": 2.8154238852047636e-05, "loss": 0.1735082745552063, "step": 167630 }, { "epoch": 0.7197135570953865, "grad_norm": 1.5728529691696167, "learning_rate": 2.8149927131930017e-05, "loss": 0.2044273853302002, "step": 167640 }, { "epoch": 0.7197564891854066, "grad_norm": 0.003188293194398284, "learning_rate": 2.8145615411812387e-05, "loss": 0.24602503776550294, "step": 167650 }, { "epoch": 0.7197994212754265, "grad_norm": 7.650982856750488, "learning_rate": 2.8141303691694765e-05, "loss": 0.28363747596740724, "step": 167660 }, { "epoch": 0.7198423533654466, "grad_norm": 0.03091641142964363, "learning_rate": 2.8136991971577142e-05, "loss": 0.10700229406356812, "step": 167670 }, { "epoch": 0.7198852854554666, "grad_norm": 4.954349994659424, "learning_rate": 2.813268025145952e-05, "loss": 0.2537800073623657, "step": 167680 }, { "epoch": 0.7199282175454865, "grad_norm": 0.0050700693391263485, "learning_rate": 2.8128368531341897e-05, "loss": 0.21821444034576415, "step": 167690 }, { "epoch": 0.7199711496355066, "grad_norm": 0.0030803687404841185, "learning_rate": 2.812405681122427e-05, "loss": 0.024721534550189973, "step": 167700 }, { "epoch": 0.7200140817255266, "grad_norm": 0.05516568943858147, "learning_rate": 2.8119745091106648e-05, "loss": 0.18041404485702514, "step": 167710 }, { "epoch": 0.7200570138155465, "grad_norm": 0.0014705831417813897, "learning_rate": 2.8115433370989025e-05, "loss": 0.12923437356948853, "step": 167720 }, { "epoch": 0.7200999459055666, "grad_norm": 1.7812938690185547, "learning_rate": 2.8111121650871403e-05, "loss": 0.02560472786426544, "step": 167730 }, { "epoch": 0.7201428779955866, "grad_norm": 1.239588975906372, "learning_rate": 2.8106809930753773e-05, "loss": 0.20239953994750975, "step": 167740 }, { "epoch": 0.7201858100856066, "grad_norm": 0.6212531328201294, "learning_rate": 2.8102498210636154e-05, "loss": 0.19338698387145997, "step": 167750 }, { "epoch": 0.7202287421756266, "grad_norm": 0.016148079186677933, "learning_rate": 2.809818649051853e-05, "loss": 0.16366405487060548, "step": 167760 }, { "epoch": 0.7202716742656466, "grad_norm": 0.0054362318478524685, "learning_rate": 2.809387477040091e-05, "loss": 0.22141509056091307, "step": 167770 }, { "epoch": 0.7203146063556666, "grad_norm": 0.10470175743103027, "learning_rate": 2.808956305028328e-05, "loss": 0.2782343864440918, "step": 167780 }, { "epoch": 0.7203575384456866, "grad_norm": 0.18910618126392365, "learning_rate": 2.8085251330165656e-05, "loss": 0.2893279790878296, "step": 167790 }, { "epoch": 0.7204004705357067, "grad_norm": 0.022973231971263885, "learning_rate": 2.8080939610048034e-05, "loss": 0.2531591892242432, "step": 167800 }, { "epoch": 0.7204434026257266, "grad_norm": 0.02012925036251545, "learning_rate": 2.807662788993041e-05, "loss": 0.07524177432060242, "step": 167810 }, { "epoch": 0.7204863347157466, "grad_norm": 0.0014940955443307757, "learning_rate": 2.8072316169812785e-05, "loss": 0.1927587866783142, "step": 167820 }, { "epoch": 0.7205292668057667, "grad_norm": 0.6223175525665283, "learning_rate": 2.8068004449695162e-05, "loss": 0.18107985258102416, "step": 167830 }, { "epoch": 0.7205721988957866, "grad_norm": 1.9258840084075928, "learning_rate": 2.806369272957754e-05, "loss": 0.1704465627670288, "step": 167840 }, { "epoch": 0.7206151309858067, "grad_norm": 0.1283116340637207, "learning_rate": 2.8059381009459917e-05, "loss": 0.18721762895584107, "step": 167850 }, { "epoch": 0.7206580630758267, "grad_norm": 1.301703929901123, "learning_rate": 2.805506928934229e-05, "loss": 0.18680089712142944, "step": 167860 }, { "epoch": 0.7207009951658466, "grad_norm": 0.03944355994462967, "learning_rate": 2.805075756922467e-05, "loss": 0.055796694755554196, "step": 167870 }, { "epoch": 0.7207439272558667, "grad_norm": 0.010357534512877464, "learning_rate": 2.8046445849107046e-05, "loss": 0.23156373500823973, "step": 167880 }, { "epoch": 0.7207868593458867, "grad_norm": 0.21075792610645294, "learning_rate": 2.8042134128989423e-05, "loss": 0.306375527381897, "step": 167890 }, { "epoch": 0.7208297914359066, "grad_norm": 0.03510294854640961, "learning_rate": 2.8037822408871794e-05, "loss": 0.17589036226272584, "step": 167900 }, { "epoch": 0.7208727235259267, "grad_norm": 0.1116693839430809, "learning_rate": 2.803351068875417e-05, "loss": 0.12817736864089965, "step": 167910 }, { "epoch": 0.7209156556159467, "grad_norm": 0.006631443277001381, "learning_rate": 2.802919896863655e-05, "loss": 0.2521315097808838, "step": 167920 }, { "epoch": 0.7209585877059667, "grad_norm": 0.04760783165693283, "learning_rate": 2.8024887248518926e-05, "loss": 0.26681339740753174, "step": 167930 }, { "epoch": 0.7210015197959867, "grad_norm": 0.0051206364296376705, "learning_rate": 2.80205755284013e-05, "loss": 0.12145293951034546, "step": 167940 }, { "epoch": 0.7210444518860067, "grad_norm": 2.330493211746216, "learning_rate": 2.8016263808283677e-05, "loss": 0.2616943359375, "step": 167950 }, { "epoch": 0.7210873839760267, "grad_norm": 0.767716109752655, "learning_rate": 2.8011952088166054e-05, "loss": 0.09361116886138916, "step": 167960 }, { "epoch": 0.7211303160660467, "grad_norm": 0.017026184126734734, "learning_rate": 2.8007640368048432e-05, "loss": 0.10996824502944946, "step": 167970 }, { "epoch": 0.7211732481560668, "grad_norm": 0.0036786592099815607, "learning_rate": 2.8003328647930806e-05, "loss": 0.07994485497474671, "step": 167980 }, { "epoch": 0.7212161802460867, "grad_norm": 0.14449842274188995, "learning_rate": 2.7999016927813183e-05, "loss": 0.1397989273071289, "step": 167990 }, { "epoch": 0.7212591123361067, "grad_norm": 1.243582844734192, "learning_rate": 2.799470520769556e-05, "loss": 0.18534576892852783, "step": 168000 }, { "epoch": 0.7212591123361067, "eval_loss": 0.37931978702545166, "eval_runtime": 27.1262, "eval_samples_per_second": 3.686, "eval_steps_per_second": 3.686, "step": 168000 }, { "epoch": 0.7213020444261268, "grad_norm": 0.15428930521011353, "learning_rate": 2.7990393487577938e-05, "loss": 0.2006314516067505, "step": 168010 }, { "epoch": 0.7213449765161467, "grad_norm": 3.320246458053589, "learning_rate": 2.7986081767460308e-05, "loss": 0.37806756496429444, "step": 168020 }, { "epoch": 0.7213879086061668, "grad_norm": 1.1948304176330566, "learning_rate": 2.7981770047342686e-05, "loss": 0.3856146812438965, "step": 168030 }, { "epoch": 0.7214308406961868, "grad_norm": 5.518876552581787, "learning_rate": 2.7977458327225063e-05, "loss": 0.2722789764404297, "step": 168040 }, { "epoch": 0.7214737727862067, "grad_norm": 1.6739928722381592, "learning_rate": 2.7973146607107444e-05, "loss": 0.27594332695007323, "step": 168050 }, { "epoch": 0.7215167048762268, "grad_norm": 0.8862299919128418, "learning_rate": 2.796883488698982e-05, "loss": 0.3354404211044312, "step": 168060 }, { "epoch": 0.7215596369662468, "grad_norm": 0.0056771812960505486, "learning_rate": 2.796452316687219e-05, "loss": 0.28955352306365967, "step": 168070 }, { "epoch": 0.7216025690562669, "grad_norm": 0.013778852298855782, "learning_rate": 2.796021144675457e-05, "loss": 0.03039870858192444, "step": 168080 }, { "epoch": 0.7216455011462868, "grad_norm": 0.27223819494247437, "learning_rate": 2.7955899726636946e-05, "loss": 0.16090576648712157, "step": 168090 }, { "epoch": 0.7216884332363068, "grad_norm": 1.8554707765579224, "learning_rate": 2.7951588006519324e-05, "loss": 0.19248855113983154, "step": 168100 }, { "epoch": 0.7217313653263269, "grad_norm": 0.012462825514376163, "learning_rate": 2.7947276286401698e-05, "loss": 0.339345121383667, "step": 168110 }, { "epoch": 0.7217742974163468, "grad_norm": 0.0005453620688058436, "learning_rate": 2.7942964566284075e-05, "loss": 0.13194665908813477, "step": 168120 }, { "epoch": 0.7218172295063668, "grad_norm": 0.0018816015217453241, "learning_rate": 2.7938652846166452e-05, "loss": 0.31123464107513427, "step": 168130 }, { "epoch": 0.7218601615963869, "grad_norm": 0.0021208133548498154, "learning_rate": 2.793434112604883e-05, "loss": 0.06131689548492432, "step": 168140 }, { "epoch": 0.7219030936864068, "grad_norm": 1.9875645637512207, "learning_rate": 2.79300294059312e-05, "loss": 0.2216268301010132, "step": 168150 }, { "epoch": 0.7219460257764269, "grad_norm": 0.008463248610496521, "learning_rate": 2.792571768581358e-05, "loss": 0.2832399845123291, "step": 168160 }, { "epoch": 0.7219889578664469, "grad_norm": 1.7538836002349854, "learning_rate": 2.7921405965695958e-05, "loss": 0.1032507061958313, "step": 168170 }, { "epoch": 0.7220318899564668, "grad_norm": 0.9188027381896973, "learning_rate": 2.7917094245578336e-05, "loss": 0.06151009202003479, "step": 168180 }, { "epoch": 0.7220748220464869, "grad_norm": 0.5642251968383789, "learning_rate": 2.7912782525460706e-05, "loss": 0.2438356399536133, "step": 168190 }, { "epoch": 0.7221177541365069, "grad_norm": 0.001289999345317483, "learning_rate": 2.7908470805343083e-05, "loss": 0.2134316682815552, "step": 168200 }, { "epoch": 0.7221606862265268, "grad_norm": 0.05979342386126518, "learning_rate": 2.790415908522546e-05, "loss": 0.13589794635772706, "step": 168210 }, { "epoch": 0.7222036183165469, "grad_norm": 0.3308796286582947, "learning_rate": 2.7899847365107838e-05, "loss": 0.3463158130645752, "step": 168220 }, { "epoch": 0.7222465504065669, "grad_norm": 0.014125037007033825, "learning_rate": 2.7895535644990212e-05, "loss": 0.17682739496231079, "step": 168230 }, { "epoch": 0.7222894824965869, "grad_norm": 0.014602779410779476, "learning_rate": 2.789122392487259e-05, "loss": 0.26034021377563477, "step": 168240 }, { "epoch": 0.7223324145866069, "grad_norm": 2.157536745071411, "learning_rate": 2.7886912204754967e-05, "loss": 0.36604933738708495, "step": 168250 }, { "epoch": 0.722375346676627, "grad_norm": 5.271147727966309, "learning_rate": 2.7882600484637344e-05, "loss": 0.3733457088470459, "step": 168260 }, { "epoch": 0.7224182787666469, "grad_norm": 0.17696958780288696, "learning_rate": 2.7878288764519718e-05, "loss": 0.24389424324035644, "step": 168270 }, { "epoch": 0.7224612108566669, "grad_norm": 0.0023903343826532364, "learning_rate": 2.7873977044402095e-05, "loss": 0.11699960231781006, "step": 168280 }, { "epoch": 0.722504142946687, "grad_norm": 0.6614067554473877, "learning_rate": 2.7869665324284473e-05, "loss": 0.17375963926315308, "step": 168290 }, { "epoch": 0.7225470750367069, "grad_norm": 0.29951897263526917, "learning_rate": 2.786535360416685e-05, "loss": 0.15570751428604127, "step": 168300 }, { "epoch": 0.7225900071267269, "grad_norm": 0.049194544553756714, "learning_rate": 2.786104188404922e-05, "loss": 0.11768434047698975, "step": 168310 }, { "epoch": 0.722632939216747, "grad_norm": 11.708648681640625, "learning_rate": 2.7856730163931598e-05, "loss": 0.40833373069763185, "step": 168320 }, { "epoch": 0.7226758713067669, "grad_norm": 0.00927521288394928, "learning_rate": 2.7852418443813975e-05, "loss": 0.14625450372695922, "step": 168330 }, { "epoch": 0.722718803396787, "grad_norm": 3.148890495300293, "learning_rate": 2.7848106723696356e-05, "loss": 0.22452225685119628, "step": 168340 }, { "epoch": 0.722761735486807, "grad_norm": 2.645533561706543, "learning_rate": 2.7843795003578727e-05, "loss": 0.37561984062194825, "step": 168350 }, { "epoch": 0.7228046675768269, "grad_norm": 1.514840841293335, "learning_rate": 2.7839483283461104e-05, "loss": 0.15910897254943848, "step": 168360 }, { "epoch": 0.722847599666847, "grad_norm": 1.4466434717178345, "learning_rate": 2.783517156334348e-05, "loss": 0.07210831046104431, "step": 168370 }, { "epoch": 0.722890531756867, "grad_norm": 0.9692143201828003, "learning_rate": 2.783085984322586e-05, "loss": 0.22329955101013182, "step": 168380 }, { "epoch": 0.722933463846887, "grad_norm": 1.6919922828674316, "learning_rate": 2.7826548123108233e-05, "loss": 0.19658915996551513, "step": 168390 }, { "epoch": 0.722976395936907, "grad_norm": 0.04949316009879112, "learning_rate": 2.782223640299061e-05, "loss": 0.10937418937683105, "step": 168400 }, { "epoch": 0.723019328026927, "grad_norm": 0.004701630212366581, "learning_rate": 2.7817924682872987e-05, "loss": 0.22459728717803956, "step": 168410 }, { "epoch": 0.723062260116947, "grad_norm": 0.007539353799074888, "learning_rate": 2.7813612962755365e-05, "loss": 0.09266999959945679, "step": 168420 }, { "epoch": 0.723105192206967, "grad_norm": 0.03429533913731575, "learning_rate": 2.7809301242637742e-05, "loss": 0.1797150492668152, "step": 168430 }, { "epoch": 0.723148124296987, "grad_norm": 0.04105391725897789, "learning_rate": 2.7804989522520113e-05, "loss": 0.17547688484191895, "step": 168440 }, { "epoch": 0.723191056387007, "grad_norm": 0.04575356841087341, "learning_rate": 2.7800677802402493e-05, "loss": 0.11070233583450317, "step": 168450 }, { "epoch": 0.723233988477027, "grad_norm": 0.0016563141252845526, "learning_rate": 2.779636608228487e-05, "loss": 0.28673884868621824, "step": 168460 }, { "epoch": 0.7232769205670471, "grad_norm": 0.6947919130325317, "learning_rate": 2.7792054362167248e-05, "loss": 0.2968265533447266, "step": 168470 }, { "epoch": 0.723319852657067, "grad_norm": 0.003793654264882207, "learning_rate": 2.778774264204962e-05, "loss": 0.10515412092208862, "step": 168480 }, { "epoch": 0.723362784747087, "grad_norm": 1.045601487159729, "learning_rate": 2.7783430921931996e-05, "loss": 0.334007716178894, "step": 168490 }, { "epoch": 0.7234057168371071, "grad_norm": 2.9117648601531982, "learning_rate": 2.7779119201814373e-05, "loss": 0.18874597549438477, "step": 168500 }, { "epoch": 0.7234486489271271, "grad_norm": 0.006848334800451994, "learning_rate": 2.777480748169675e-05, "loss": 0.1698448419570923, "step": 168510 }, { "epoch": 0.7234915810171471, "grad_norm": 0.806219756603241, "learning_rate": 2.7770495761579125e-05, "loss": 0.5198267936706543, "step": 168520 }, { "epoch": 0.7235345131071671, "grad_norm": 1.7642408609390259, "learning_rate": 2.7766184041461502e-05, "loss": 0.2058842658996582, "step": 168530 }, { "epoch": 0.7235774451971871, "grad_norm": 4.31995964050293, "learning_rate": 2.776187232134388e-05, "loss": 0.17449574470520018, "step": 168540 }, { "epoch": 0.7236203772872071, "grad_norm": 0.0006515654386021197, "learning_rate": 2.7757560601226257e-05, "loss": 0.20330989360809326, "step": 168550 }, { "epoch": 0.7236633093772271, "grad_norm": 0.3082123398780823, "learning_rate": 2.7753248881108627e-05, "loss": 0.11522177457809449, "step": 168560 }, { "epoch": 0.7237062414672472, "grad_norm": 0.05763091892004013, "learning_rate": 2.7748937160991008e-05, "loss": 0.04711337983608246, "step": 168570 }, { "epoch": 0.7237491735572671, "grad_norm": 15.214527130126953, "learning_rate": 2.7744625440873385e-05, "loss": 0.2867955207824707, "step": 168580 }, { "epoch": 0.7237921056472871, "grad_norm": 3.8333139419555664, "learning_rate": 2.7740313720755763e-05, "loss": 0.09657506942749024, "step": 168590 }, { "epoch": 0.7238350377373072, "grad_norm": 0.0093782814219594, "learning_rate": 2.7736002000638133e-05, "loss": 0.20292196273803711, "step": 168600 }, { "epoch": 0.7238779698273271, "grad_norm": 0.004507375881075859, "learning_rate": 2.773169028052051e-05, "loss": 0.0971630871295929, "step": 168610 }, { "epoch": 0.7239209019173471, "grad_norm": 0.013182217255234718, "learning_rate": 2.7727378560402888e-05, "loss": 0.14002325534820556, "step": 168620 }, { "epoch": 0.7239638340073672, "grad_norm": 0.002185863209888339, "learning_rate": 2.7723066840285265e-05, "loss": 0.17800129652023317, "step": 168630 }, { "epoch": 0.7240067660973871, "grad_norm": 1.0492135286331177, "learning_rate": 2.771875512016764e-05, "loss": 0.04826590418815613, "step": 168640 }, { "epoch": 0.7240496981874072, "grad_norm": 0.014060789719223976, "learning_rate": 2.7714443400050016e-05, "loss": 0.3352368354797363, "step": 168650 }, { "epoch": 0.7240926302774272, "grad_norm": 0.03786401078104973, "learning_rate": 2.7710131679932394e-05, "loss": 0.23978736400604247, "step": 168660 }, { "epoch": 0.7241355623674471, "grad_norm": 0.0007775720441713929, "learning_rate": 2.770581995981477e-05, "loss": 0.24734921455383302, "step": 168670 }, { "epoch": 0.7241784944574672, "grad_norm": 1.8972983360290527, "learning_rate": 2.7701508239697145e-05, "loss": 0.19279592037200927, "step": 168680 }, { "epoch": 0.7242214265474872, "grad_norm": 1.2243245840072632, "learning_rate": 2.7697196519579522e-05, "loss": 0.1520993232727051, "step": 168690 }, { "epoch": 0.7242643586375072, "grad_norm": 1.4366346597671509, "learning_rate": 2.76928847994619e-05, "loss": 0.101070237159729, "step": 168700 }, { "epoch": 0.7243072907275272, "grad_norm": 0.02268645167350769, "learning_rate": 2.7688573079344277e-05, "loss": 0.21039226055145263, "step": 168710 }, { "epoch": 0.7243502228175472, "grad_norm": 0.3205955922603607, "learning_rate": 2.7684261359226648e-05, "loss": 0.12725365161895752, "step": 168720 }, { "epoch": 0.7243931549075672, "grad_norm": 1.147531270980835, "learning_rate": 2.7679949639109025e-05, "loss": 0.17458294630050658, "step": 168730 }, { "epoch": 0.7244360869975872, "grad_norm": 0.016879552975296974, "learning_rate": 2.7675637918991402e-05, "loss": 0.34085586071014407, "step": 168740 }, { "epoch": 0.7244790190876073, "grad_norm": 0.00016512209549546242, "learning_rate": 2.7671326198873783e-05, "loss": 0.28415277004241946, "step": 168750 }, { "epoch": 0.7245219511776272, "grad_norm": 5.477837085723877, "learning_rate": 2.7667014478756154e-05, "loss": 0.20578546524047853, "step": 168760 }, { "epoch": 0.7245648832676472, "grad_norm": 0.050836920738220215, "learning_rate": 2.766270275863853e-05, "loss": 0.36799397468566897, "step": 168770 }, { "epoch": 0.7246078153576673, "grad_norm": 6.697229862213135, "learning_rate": 2.765839103852091e-05, "loss": 0.054876917600631715, "step": 168780 }, { "epoch": 0.7246507474476872, "grad_norm": 1.9743859767913818, "learning_rate": 2.7654079318403286e-05, "loss": 0.26255998611450193, "step": 168790 }, { "epoch": 0.7246936795377072, "grad_norm": 0.1395675390958786, "learning_rate": 2.7649767598285663e-05, "loss": 0.08039167523384094, "step": 168800 }, { "epoch": 0.7247366116277273, "grad_norm": 8.4291353225708, "learning_rate": 2.7645455878168037e-05, "loss": 0.462004280090332, "step": 168810 }, { "epoch": 0.7247795437177472, "grad_norm": 0.12003923207521439, "learning_rate": 2.7641144158050414e-05, "loss": 0.22433068752288818, "step": 168820 }, { "epoch": 0.7248224758077673, "grad_norm": 0.9225029945373535, "learning_rate": 2.7636832437932792e-05, "loss": 0.25149383544921877, "step": 168830 }, { "epoch": 0.7248654078977873, "grad_norm": 1.2647552490234375, "learning_rate": 2.763252071781517e-05, "loss": 0.2392057180404663, "step": 168840 }, { "epoch": 0.7249083399878072, "grad_norm": 0.7912980914115906, "learning_rate": 2.762820899769754e-05, "loss": 0.13407490253448487, "step": 168850 }, { "epoch": 0.7249512720778273, "grad_norm": 0.029732348397374153, "learning_rate": 2.762389727757992e-05, "loss": 0.059022271633148195, "step": 168860 }, { "epoch": 0.7249942041678473, "grad_norm": 0.8471653461456299, "learning_rate": 2.7619585557462298e-05, "loss": 0.3402934312820435, "step": 168870 }, { "epoch": 0.7250371362578673, "grad_norm": 11.936932563781738, "learning_rate": 2.7615273837344675e-05, "loss": 0.2877587080001831, "step": 168880 }, { "epoch": 0.7250800683478873, "grad_norm": 1.663932204246521, "learning_rate": 2.7610962117227046e-05, "loss": 0.28721544742584226, "step": 168890 }, { "epoch": 0.7251230004379073, "grad_norm": 0.3331746459007263, "learning_rate": 2.7606650397109423e-05, "loss": 0.11548913717269897, "step": 168900 }, { "epoch": 0.7251659325279273, "grad_norm": 1.681654930114746, "learning_rate": 2.76023386769918e-05, "loss": 0.3489760160446167, "step": 168910 }, { "epoch": 0.7252088646179473, "grad_norm": 0.09427236020565033, "learning_rate": 2.7598026956874178e-05, "loss": 0.0763422667980194, "step": 168920 }, { "epoch": 0.7252517967079674, "grad_norm": 0.07877668738365173, "learning_rate": 2.759371523675655e-05, "loss": 0.1756036639213562, "step": 168930 }, { "epoch": 0.7252947287979874, "grad_norm": 1.987454891204834, "learning_rate": 2.758940351663893e-05, "loss": 0.19289370775222778, "step": 168940 }, { "epoch": 0.7253376608880073, "grad_norm": 1.1299264430999756, "learning_rate": 2.7585091796521306e-05, "loss": 0.3390709638595581, "step": 168950 }, { "epoch": 0.7253805929780274, "grad_norm": 0.0014903040137141943, "learning_rate": 2.7580780076403684e-05, "loss": 0.2046818256378174, "step": 168960 }, { "epoch": 0.7254235250680474, "grad_norm": 1.1031399965286255, "learning_rate": 2.7576468356286058e-05, "loss": 0.16519639492034913, "step": 168970 }, { "epoch": 0.7254664571580673, "grad_norm": 0.0073827896267175674, "learning_rate": 2.7572156636168435e-05, "loss": 0.2113725423812866, "step": 168980 }, { "epoch": 0.7255093892480874, "grad_norm": 0.00510813295841217, "learning_rate": 2.7567844916050812e-05, "loss": 0.09802131652832032, "step": 168990 }, { "epoch": 0.7255523213381074, "grad_norm": 0.2502835690975189, "learning_rate": 2.756353319593319e-05, "loss": 0.18962769508361815, "step": 169000 }, { "epoch": 0.7255523213381074, "eval_loss": 0.38295066356658936, "eval_runtime": 27.3886, "eval_samples_per_second": 3.651, "eval_steps_per_second": 3.651, "step": 169000 }, { "epoch": 0.7255952534281274, "grad_norm": 0.0011066279839724302, "learning_rate": 2.755922147581556e-05, "loss": 0.2311826229095459, "step": 169010 }, { "epoch": 0.7256381855181474, "grad_norm": 0.00853653158992529, "learning_rate": 2.7554909755697938e-05, "loss": 0.18713712692260742, "step": 169020 }, { "epoch": 0.7256811176081674, "grad_norm": 7.358502388000488, "learning_rate": 2.7550598035580315e-05, "loss": 0.273174524307251, "step": 169030 }, { "epoch": 0.7257240496981874, "grad_norm": 0.004957462195307016, "learning_rate": 2.7546286315462692e-05, "loss": 0.17104941606521606, "step": 169040 }, { "epoch": 0.7257669817882074, "grad_norm": 1.1058156490325928, "learning_rate": 2.7541974595345066e-05, "loss": 0.3058955669403076, "step": 169050 }, { "epoch": 0.7258099138782275, "grad_norm": 0.015500033274292946, "learning_rate": 2.7537662875227443e-05, "loss": 0.21356968879699706, "step": 169060 }, { "epoch": 0.7258528459682474, "grad_norm": 0.02399790659546852, "learning_rate": 2.753335115510982e-05, "loss": 0.059341037273406984, "step": 169070 }, { "epoch": 0.7258957780582674, "grad_norm": 1.957128643989563, "learning_rate": 2.7529039434992198e-05, "loss": 0.381409740447998, "step": 169080 }, { "epoch": 0.7259387101482875, "grad_norm": 2.0425257682800293, "learning_rate": 2.7524727714874572e-05, "loss": 0.17824835777282716, "step": 169090 }, { "epoch": 0.7259816422383074, "grad_norm": 0.0014805634273216128, "learning_rate": 2.752041599475695e-05, "loss": 0.39816100597381593, "step": 169100 }, { "epoch": 0.7260245743283275, "grad_norm": 8.61057186126709, "learning_rate": 2.7516104274639327e-05, "loss": 0.3082611322402954, "step": 169110 }, { "epoch": 0.7260675064183475, "grad_norm": 0.08923623710870743, "learning_rate": 2.7511792554521704e-05, "loss": 0.14777944087982178, "step": 169120 }, { "epoch": 0.7261104385083674, "grad_norm": 0.01090413797646761, "learning_rate": 2.7507480834404075e-05, "loss": 0.23257055282592773, "step": 169130 }, { "epoch": 0.7261533705983875, "grad_norm": 0.10899604111909866, "learning_rate": 2.7503169114286452e-05, "loss": 0.18908019065856935, "step": 169140 }, { "epoch": 0.7261963026884075, "grad_norm": 3.4646310806274414, "learning_rate": 2.749885739416883e-05, "loss": 0.3117159128189087, "step": 169150 }, { "epoch": 0.7262392347784274, "grad_norm": 1.4176348447799683, "learning_rate": 2.749454567405121e-05, "loss": 0.06437577605247498, "step": 169160 }, { "epoch": 0.7262821668684475, "grad_norm": 3.021955966949463, "learning_rate": 2.7490233953933587e-05, "loss": 0.1498560667037964, "step": 169170 }, { "epoch": 0.7263250989584675, "grad_norm": 0.019517341628670692, "learning_rate": 2.7485922233815958e-05, "loss": 0.34508914947509767, "step": 169180 }, { "epoch": 0.7263680310484875, "grad_norm": 0.004844113253057003, "learning_rate": 2.7481610513698335e-05, "loss": 0.19807019233703613, "step": 169190 }, { "epoch": 0.7264109631385075, "grad_norm": 0.0019148435676470399, "learning_rate": 2.7477298793580713e-05, "loss": 0.36115279197692873, "step": 169200 }, { "epoch": 0.7264538952285275, "grad_norm": 0.19224651157855988, "learning_rate": 2.747298707346309e-05, "loss": 0.39707260131835936, "step": 169210 }, { "epoch": 0.7264968273185475, "grad_norm": 0.22394929826259613, "learning_rate": 2.7468675353345464e-05, "loss": 0.10976753234863282, "step": 169220 }, { "epoch": 0.7265397594085675, "grad_norm": 0.0022097777109593153, "learning_rate": 2.746436363322784e-05, "loss": 0.31788818836212157, "step": 169230 }, { "epoch": 0.7265826914985876, "grad_norm": 3.0895588397979736, "learning_rate": 2.746005191311022e-05, "loss": 0.18544777631759643, "step": 169240 }, { "epoch": 0.7266256235886075, "grad_norm": 1.5325919389724731, "learning_rate": 2.7455740192992596e-05, "loss": 0.1992364525794983, "step": 169250 }, { "epoch": 0.7266685556786275, "grad_norm": 0.045792438089847565, "learning_rate": 2.7451428472874967e-05, "loss": 0.3305244445800781, "step": 169260 }, { "epoch": 0.7267114877686476, "grad_norm": 0.9892081618309021, "learning_rate": 2.7447116752757347e-05, "loss": 0.15464599132537843, "step": 169270 }, { "epoch": 0.7267544198586675, "grad_norm": 0.009701947681605816, "learning_rate": 2.7442805032639725e-05, "loss": 0.1055402398109436, "step": 169280 }, { "epoch": 0.7267973519486876, "grad_norm": 2.0974602699279785, "learning_rate": 2.7438493312522102e-05, "loss": 0.12994418144226075, "step": 169290 }, { "epoch": 0.7268402840387076, "grad_norm": 0.018503086641430855, "learning_rate": 2.7434181592404473e-05, "loss": 0.34631929397583006, "step": 169300 }, { "epoch": 0.7268832161287275, "grad_norm": 0.5620545148849487, "learning_rate": 2.742986987228685e-05, "loss": 0.29466919898986815, "step": 169310 }, { "epoch": 0.7269261482187476, "grad_norm": 1.7169890403747559, "learning_rate": 2.7425558152169227e-05, "loss": 0.25790133476257326, "step": 169320 }, { "epoch": 0.7269690803087676, "grad_norm": 0.01841481775045395, "learning_rate": 2.7421246432051605e-05, "loss": 0.3726177215576172, "step": 169330 }, { "epoch": 0.7270120123987875, "grad_norm": 7.7407331466674805, "learning_rate": 2.741693471193398e-05, "loss": 0.2069004774093628, "step": 169340 }, { "epoch": 0.7270549444888076, "grad_norm": 0.041998837143182755, "learning_rate": 2.7412622991816356e-05, "loss": 0.18190546035766603, "step": 169350 }, { "epoch": 0.7270978765788276, "grad_norm": 0.0035823362413793802, "learning_rate": 2.7408311271698733e-05, "loss": 0.27233123779296875, "step": 169360 }, { "epoch": 0.7271408086688477, "grad_norm": 0.00309250270947814, "learning_rate": 2.740399955158111e-05, "loss": 0.19596065282821656, "step": 169370 }, { "epoch": 0.7271837407588676, "grad_norm": 0.059527233242988586, "learning_rate": 2.7399687831463485e-05, "loss": 0.22426025867462157, "step": 169380 }, { "epoch": 0.7272266728488876, "grad_norm": 2.7782046794891357, "learning_rate": 2.7395376111345862e-05, "loss": 0.24270076751708985, "step": 169390 }, { "epoch": 0.7272696049389077, "grad_norm": 2.5395593643188477, "learning_rate": 2.739106439122824e-05, "loss": 0.22292840480804443, "step": 169400 }, { "epoch": 0.7273125370289276, "grad_norm": 0.021359330043196678, "learning_rate": 2.7386752671110617e-05, "loss": 0.1687622547149658, "step": 169410 }, { "epoch": 0.7273554691189477, "grad_norm": 2.1432902812957764, "learning_rate": 2.7382440950992987e-05, "loss": 0.22134754657745362, "step": 169420 }, { "epoch": 0.7273984012089677, "grad_norm": 0.15447072684764862, "learning_rate": 2.7378129230875365e-05, "loss": 0.1350387692451477, "step": 169430 }, { "epoch": 0.7274413332989876, "grad_norm": 1.2907360792160034, "learning_rate": 2.7373817510757742e-05, "loss": 0.3004169940948486, "step": 169440 }, { "epoch": 0.7274842653890077, "grad_norm": 0.00447465293109417, "learning_rate": 2.736950579064012e-05, "loss": 0.055487924814224245, "step": 169450 }, { "epoch": 0.7275271974790277, "grad_norm": 0.01885574497282505, "learning_rate": 2.7365194070522493e-05, "loss": 0.17447357177734374, "step": 169460 }, { "epoch": 0.7275701295690477, "grad_norm": 1.8977580070495605, "learning_rate": 2.736088235040487e-05, "loss": 0.18592686653137208, "step": 169470 }, { "epoch": 0.7276130616590677, "grad_norm": 0.006628870032727718, "learning_rate": 2.7356570630287248e-05, "loss": 0.2765743494033813, "step": 169480 }, { "epoch": 0.7276559937490877, "grad_norm": 0.015173074789345264, "learning_rate": 2.7352258910169625e-05, "loss": 0.12751307487487792, "step": 169490 }, { "epoch": 0.7276989258391077, "grad_norm": 0.012416012585163116, "learning_rate": 2.7347947190052e-05, "loss": 0.1876566767692566, "step": 169500 }, { "epoch": 0.7277418579291277, "grad_norm": 1.2336422204971313, "learning_rate": 2.7343635469934376e-05, "loss": 0.3533480167388916, "step": 169510 }, { "epoch": 0.7277847900191478, "grad_norm": 0.02001671865582466, "learning_rate": 2.7339323749816754e-05, "loss": 0.2230234146118164, "step": 169520 }, { "epoch": 0.7278277221091677, "grad_norm": 0.015594366006553173, "learning_rate": 2.733501202969913e-05, "loss": 0.06503039002418518, "step": 169530 }, { "epoch": 0.7278706541991877, "grad_norm": 2.3339343070983887, "learning_rate": 2.733070030958151e-05, "loss": 0.19949530363082885, "step": 169540 }, { "epoch": 0.7279135862892078, "grad_norm": 0.90096515417099, "learning_rate": 2.732638858946388e-05, "loss": 0.0720227301120758, "step": 169550 }, { "epoch": 0.7279565183792277, "grad_norm": 0.0013229718897491693, "learning_rate": 2.7322076869346256e-05, "loss": 0.2935648441314697, "step": 169560 }, { "epoch": 0.7279994504692477, "grad_norm": 3.1632344722747803, "learning_rate": 2.7317765149228637e-05, "loss": 0.2547586917877197, "step": 169570 }, { "epoch": 0.7280423825592678, "grad_norm": 0.9867843389511108, "learning_rate": 2.7313453429111015e-05, "loss": 0.10192888975143433, "step": 169580 }, { "epoch": 0.7280853146492877, "grad_norm": 3.412651538848877, "learning_rate": 2.7309141708993385e-05, "loss": 0.14853811264038086, "step": 169590 }, { "epoch": 0.7281282467393078, "grad_norm": 0.13131414353847504, "learning_rate": 2.7304829988875762e-05, "loss": 0.16117234230041505, "step": 169600 }, { "epoch": 0.7281711788293278, "grad_norm": 0.20439204573631287, "learning_rate": 2.730051826875814e-05, "loss": 0.30737872123718263, "step": 169610 }, { "epoch": 0.7282141109193477, "grad_norm": 0.0034942487254738808, "learning_rate": 2.7296206548640517e-05, "loss": 0.1933616042137146, "step": 169620 }, { "epoch": 0.7282570430093678, "grad_norm": 0.039384450763463974, "learning_rate": 2.729189482852289e-05, "loss": 0.3517719507217407, "step": 169630 }, { "epoch": 0.7282999750993878, "grad_norm": 1.3371518850326538, "learning_rate": 2.728758310840527e-05, "loss": 0.2886610984802246, "step": 169640 }, { "epoch": 0.7283429071894078, "grad_norm": 0.1878964751958847, "learning_rate": 2.7283271388287646e-05, "loss": 0.18123259544372558, "step": 169650 }, { "epoch": 0.7283858392794278, "grad_norm": 1.6017755270004272, "learning_rate": 2.7278959668170023e-05, "loss": 0.2711747884750366, "step": 169660 }, { "epoch": 0.7284287713694478, "grad_norm": 1.121159315109253, "learning_rate": 2.7274647948052394e-05, "loss": 0.141208279132843, "step": 169670 }, { "epoch": 0.7284717034594678, "grad_norm": 1.4669911861419678, "learning_rate": 2.7270336227934774e-05, "loss": 0.34694585800170896, "step": 169680 }, { "epoch": 0.7285146355494878, "grad_norm": 0.0026642445009201765, "learning_rate": 2.7266024507817152e-05, "loss": 0.30149426460266116, "step": 169690 }, { "epoch": 0.7285575676395079, "grad_norm": 0.01654047705233097, "learning_rate": 2.726171278769953e-05, "loss": 0.07679366469383239, "step": 169700 }, { "epoch": 0.7286004997295278, "grad_norm": 3.939174175262451, "learning_rate": 2.72574010675819e-05, "loss": 0.1391007423400879, "step": 169710 }, { "epoch": 0.7286434318195478, "grad_norm": 1.1632599830627441, "learning_rate": 2.7253089347464277e-05, "loss": 0.3620645046234131, "step": 169720 }, { "epoch": 0.7286863639095679, "grad_norm": 0.03564436361193657, "learning_rate": 2.7248777627346654e-05, "loss": 0.1843175172805786, "step": 169730 }, { "epoch": 0.7287292959995878, "grad_norm": 2.9193012714385986, "learning_rate": 2.724446590722903e-05, "loss": 0.28504557609558107, "step": 169740 }, { "epoch": 0.7287722280896078, "grad_norm": 6.900730133056641, "learning_rate": 2.7240154187111406e-05, "loss": 0.29606239795684813, "step": 169750 }, { "epoch": 0.7288151601796279, "grad_norm": 0.05998038873076439, "learning_rate": 2.7235842466993783e-05, "loss": 0.34577767848968505, "step": 169760 }, { "epoch": 0.7288580922696478, "grad_norm": 5.61971378326416, "learning_rate": 2.723153074687616e-05, "loss": 0.25440452098846433, "step": 169770 }, { "epoch": 0.7289010243596679, "grad_norm": 0.690499484539032, "learning_rate": 2.7227219026758538e-05, "loss": 0.11958892345428467, "step": 169780 }, { "epoch": 0.7289439564496879, "grad_norm": 3.1090917587280273, "learning_rate": 2.722290730664091e-05, "loss": 0.14645473957061766, "step": 169790 }, { "epoch": 0.728986888539708, "grad_norm": 0.10904362052679062, "learning_rate": 2.721859558652329e-05, "loss": 0.0882343590259552, "step": 169800 }, { "epoch": 0.7290298206297279, "grad_norm": 6.914793491363525, "learning_rate": 2.7214283866405666e-05, "loss": 0.38003613948822024, "step": 169810 }, { "epoch": 0.7290727527197479, "grad_norm": 0.009238614700734615, "learning_rate": 2.7209972146288044e-05, "loss": 0.21988511085510254, "step": 169820 }, { "epoch": 0.729115684809768, "grad_norm": 3.774786949157715, "learning_rate": 2.7205660426170414e-05, "loss": 0.1955350399017334, "step": 169830 }, { "epoch": 0.7291586168997879, "grad_norm": 3.135042667388916, "learning_rate": 2.720134870605279e-05, "loss": 0.17978460788726808, "step": 169840 }, { "epoch": 0.7292015489898079, "grad_norm": 0.557732343673706, "learning_rate": 2.719703698593517e-05, "loss": 0.09407221078872681, "step": 169850 }, { "epoch": 0.729244481079828, "grad_norm": 0.01777493953704834, "learning_rate": 2.719272526581755e-05, "loss": 0.15389621257781982, "step": 169860 }, { "epoch": 0.7292874131698479, "grad_norm": 0.0050742123275995255, "learning_rate": 2.7188413545699927e-05, "loss": 0.28021841049194335, "step": 169870 }, { "epoch": 0.729330345259868, "grad_norm": 0.051354601979255676, "learning_rate": 2.7184101825582298e-05, "loss": 0.13149229288101197, "step": 169880 }, { "epoch": 0.729373277349888, "grad_norm": 17.105356216430664, "learning_rate": 2.7179790105464675e-05, "loss": 0.16971458196640016, "step": 169890 }, { "epoch": 0.7294162094399079, "grad_norm": 0.1747300624847412, "learning_rate": 2.7175478385347052e-05, "loss": 0.21624934673309326, "step": 169900 }, { "epoch": 0.729459141529928, "grad_norm": 0.04679642617702484, "learning_rate": 2.717116666522943e-05, "loss": 0.09935680627822877, "step": 169910 }, { "epoch": 0.729502073619948, "grad_norm": 0.0205056332051754, "learning_rate": 2.7166854945111804e-05, "loss": 0.15621496438980104, "step": 169920 }, { "epoch": 0.7295450057099679, "grad_norm": 0.07464340329170227, "learning_rate": 2.716254322499418e-05, "loss": 0.27371673583984374, "step": 169930 }, { "epoch": 0.729587937799988, "grad_norm": 5.811140537261963, "learning_rate": 2.7158231504876558e-05, "loss": 0.22537665367126464, "step": 169940 }, { "epoch": 0.729630869890008, "grad_norm": 1.5015976428985596, "learning_rate": 2.7153919784758936e-05, "loss": 0.26116361618041994, "step": 169950 }, { "epoch": 0.729673801980028, "grad_norm": 1.3956966400146484, "learning_rate": 2.7149608064641306e-05, "loss": 0.3448702573776245, "step": 169960 }, { "epoch": 0.729716734070048, "grad_norm": 0.0037657865323126316, "learning_rate": 2.7145296344523687e-05, "loss": 0.23841521739959717, "step": 169970 }, { "epoch": 0.729759666160068, "grad_norm": 0.27306094765663147, "learning_rate": 2.7140984624406064e-05, "loss": 0.22840194702148436, "step": 169980 }, { "epoch": 0.729802598250088, "grad_norm": 0.36141231656074524, "learning_rate": 2.713667290428844e-05, "loss": 0.29284093379974363, "step": 169990 }, { "epoch": 0.729845530340108, "grad_norm": 0.011245034635066986, "learning_rate": 2.7132361184170812e-05, "loss": 0.18197616338729858, "step": 170000 }, { "epoch": 0.729845530340108, "eval_loss": 0.3767232596874237, "eval_runtime": 27.4074, "eval_samples_per_second": 3.649, "eval_steps_per_second": 3.649, "step": 170000 }, { "epoch": 0.7298884624301281, "grad_norm": 0.0031025889329612255, "learning_rate": 2.712804946405319e-05, "loss": 0.23458943367004395, "step": 170010 }, { "epoch": 0.729931394520148, "grad_norm": 0.17620614171028137, "learning_rate": 2.7123737743935567e-05, "loss": 0.29802589416503905, "step": 170020 }, { "epoch": 0.729974326610168, "grad_norm": 0.017262430861592293, "learning_rate": 2.7119426023817944e-05, "loss": 0.03175511360168457, "step": 170030 }, { "epoch": 0.7300172587001881, "grad_norm": 0.025158502161502838, "learning_rate": 2.7115114303700318e-05, "loss": 0.26365683078765867, "step": 170040 }, { "epoch": 0.730060190790208, "grad_norm": 2.2967610359191895, "learning_rate": 2.7110802583582695e-05, "loss": 0.32564468383789064, "step": 170050 }, { "epoch": 0.730103122880228, "grad_norm": 1.5477705001831055, "learning_rate": 2.7106490863465073e-05, "loss": 0.4748509407043457, "step": 170060 }, { "epoch": 0.7301460549702481, "grad_norm": 0.09828320890665054, "learning_rate": 2.710217914334745e-05, "loss": 0.136482310295105, "step": 170070 }, { "epoch": 0.730188987060268, "grad_norm": 0.8780837655067444, "learning_rate": 2.7097867423229824e-05, "loss": 0.2198232650756836, "step": 170080 }, { "epoch": 0.7302319191502881, "grad_norm": 1.8543720245361328, "learning_rate": 2.70935557031122e-05, "loss": 0.18494052886962892, "step": 170090 }, { "epoch": 0.7302748512403081, "grad_norm": 0.2783847451210022, "learning_rate": 2.708924398299458e-05, "loss": 0.053096276521682736, "step": 170100 }, { "epoch": 0.730317783330328, "grad_norm": 0.0045045046135783195, "learning_rate": 2.7084932262876956e-05, "loss": 0.3136617660522461, "step": 170110 }, { "epoch": 0.7303607154203481, "grad_norm": 0.3513777256011963, "learning_rate": 2.7080620542759327e-05, "loss": 0.12250692844390869, "step": 170120 }, { "epoch": 0.7304036475103681, "grad_norm": 2.300067663192749, "learning_rate": 2.7076308822641704e-05, "loss": 0.07853416204452515, "step": 170130 }, { "epoch": 0.7304465796003881, "grad_norm": 0.018564164638519287, "learning_rate": 2.707199710252408e-05, "loss": 0.12981711626052855, "step": 170140 }, { "epoch": 0.7304895116904081, "grad_norm": 0.6602355241775513, "learning_rate": 2.706768538240646e-05, "loss": 0.4602662086486816, "step": 170150 }, { "epoch": 0.7305324437804281, "grad_norm": 0.04296904429793358, "learning_rate": 2.7063373662288833e-05, "loss": 0.024672232568264008, "step": 170160 }, { "epoch": 0.7305753758704481, "grad_norm": 0.08935589343309402, "learning_rate": 2.705906194217121e-05, "loss": 0.05151203870773315, "step": 170170 }, { "epoch": 0.7306183079604681, "grad_norm": 1.2046279907226562, "learning_rate": 2.7054750222053587e-05, "loss": 0.11995961666107177, "step": 170180 }, { "epoch": 0.7306612400504882, "grad_norm": 0.007651221007108688, "learning_rate": 2.7050438501935965e-05, "loss": 0.17691102027893066, "step": 170190 }, { "epoch": 0.7307041721405081, "grad_norm": 0.016610870137810707, "learning_rate": 2.704612678181834e-05, "loss": 0.23532278537750245, "step": 170200 }, { "epoch": 0.7307471042305281, "grad_norm": 0.010207589715719223, "learning_rate": 2.7041815061700716e-05, "loss": 0.19673173427581786, "step": 170210 }, { "epoch": 0.7307900363205482, "grad_norm": 2.705111503601074, "learning_rate": 2.7037503341583093e-05, "loss": 0.15226796865463257, "step": 170220 }, { "epoch": 0.7308329684105682, "grad_norm": 0.0040432969108223915, "learning_rate": 2.703319162146547e-05, "loss": 0.5022855758666992, "step": 170230 }, { "epoch": 0.7308759005005881, "grad_norm": 0.009334494359791279, "learning_rate": 2.7028879901347848e-05, "loss": 0.08118436336517335, "step": 170240 }, { "epoch": 0.7309188325906082, "grad_norm": 1.2461107969284058, "learning_rate": 2.702456818123022e-05, "loss": 0.3561758041381836, "step": 170250 }, { "epoch": 0.7309617646806282, "grad_norm": 0.1575508862733841, "learning_rate": 2.7020256461112596e-05, "loss": 0.3139577627182007, "step": 170260 }, { "epoch": 0.7310046967706482, "grad_norm": 0.013691013678908348, "learning_rate": 2.7015944740994977e-05, "loss": 0.15196446180343628, "step": 170270 }, { "epoch": 0.7310476288606682, "grad_norm": 0.03952275961637497, "learning_rate": 2.7011633020877354e-05, "loss": 0.2643216371536255, "step": 170280 }, { "epoch": 0.7310905609506883, "grad_norm": 0.08539864420890808, "learning_rate": 2.7007321300759725e-05, "loss": 0.2871419906616211, "step": 170290 }, { "epoch": 0.7311334930407082, "grad_norm": 0.0039503430016338825, "learning_rate": 2.7003009580642102e-05, "loss": 0.19403316974639892, "step": 170300 }, { "epoch": 0.7311764251307282, "grad_norm": 4.306076526641846, "learning_rate": 2.699869786052448e-05, "loss": 0.43187770843505857, "step": 170310 }, { "epoch": 0.7312193572207483, "grad_norm": 0.0012534123379737139, "learning_rate": 2.6994386140406857e-05, "loss": 0.2351520299911499, "step": 170320 }, { "epoch": 0.7312622893107682, "grad_norm": 1.3385637998580933, "learning_rate": 2.699007442028923e-05, "loss": 0.1445598840713501, "step": 170330 }, { "epoch": 0.7313052214007882, "grad_norm": 0.6701360940933228, "learning_rate": 2.6985762700171608e-05, "loss": 0.12373219728469849, "step": 170340 }, { "epoch": 0.7313481534908083, "grad_norm": 0.5813913345336914, "learning_rate": 2.6981450980053985e-05, "loss": 0.12824459075927735, "step": 170350 }, { "epoch": 0.7313910855808282, "grad_norm": 0.10391009598970413, "learning_rate": 2.6977139259936363e-05, "loss": 0.3668433904647827, "step": 170360 }, { "epoch": 0.7314340176708483, "grad_norm": 0.47627198696136475, "learning_rate": 2.6972827539818733e-05, "loss": 0.1359683632850647, "step": 170370 }, { "epoch": 0.7314769497608683, "grad_norm": 0.030919019132852554, "learning_rate": 2.6968515819701114e-05, "loss": 0.2007523775100708, "step": 170380 }, { "epoch": 0.7315198818508882, "grad_norm": 1.0183385610580444, "learning_rate": 2.696420409958349e-05, "loss": 0.1809418797492981, "step": 170390 }, { "epoch": 0.7315628139409083, "grad_norm": 0.40025752782821655, "learning_rate": 2.695989237946587e-05, "loss": 0.1921942114830017, "step": 170400 }, { "epoch": 0.7316057460309283, "grad_norm": 1.7565090656280518, "learning_rate": 2.695558065934824e-05, "loss": 0.1617509365081787, "step": 170410 }, { "epoch": 0.7316486781209482, "grad_norm": 0.007016188930720091, "learning_rate": 2.6951268939230616e-05, "loss": 0.02748640179634094, "step": 170420 }, { "epoch": 0.7316916102109683, "grad_norm": 2.187925100326538, "learning_rate": 2.6946957219112994e-05, "loss": 0.30386340618133545, "step": 170430 }, { "epoch": 0.7317345423009883, "grad_norm": 2.4722328186035156, "learning_rate": 2.694264549899537e-05, "loss": 0.2640492916107178, "step": 170440 }, { "epoch": 0.7317774743910083, "grad_norm": 0.014905155636370182, "learning_rate": 2.6938333778877745e-05, "loss": 0.24466726779937745, "step": 170450 }, { "epoch": 0.7318204064810283, "grad_norm": 5.93389892578125, "learning_rate": 2.6934022058760122e-05, "loss": 0.4935251235961914, "step": 170460 }, { "epoch": 0.7318633385710483, "grad_norm": 1.4191564321517944, "learning_rate": 2.69297103386425e-05, "loss": 0.3071990728378296, "step": 170470 }, { "epoch": 0.7319062706610683, "grad_norm": 0.01276042778044939, "learning_rate": 2.6925398618524877e-05, "loss": 0.15078266859054565, "step": 170480 }, { "epoch": 0.7319492027510883, "grad_norm": 0.004253904335200787, "learning_rate": 2.692108689840725e-05, "loss": 0.1008331298828125, "step": 170490 }, { "epoch": 0.7319921348411084, "grad_norm": 0.9740898609161377, "learning_rate": 2.691677517828963e-05, "loss": 0.3161968231201172, "step": 170500 }, { "epoch": 0.7320350669311283, "grad_norm": 31.598270416259766, "learning_rate": 2.6912463458172006e-05, "loss": 0.20869870185852052, "step": 170510 }, { "epoch": 0.7320779990211483, "grad_norm": 0.015376714058220387, "learning_rate": 2.6908151738054383e-05, "loss": 0.09759688377380371, "step": 170520 }, { "epoch": 0.7321209311111684, "grad_norm": 0.013746547512710094, "learning_rate": 2.6903840017936754e-05, "loss": 0.05905347466468811, "step": 170530 }, { "epoch": 0.7321638632011883, "grad_norm": 1.2723731994628906, "learning_rate": 2.689952829781913e-05, "loss": 0.159501314163208, "step": 170540 }, { "epoch": 0.7322067952912084, "grad_norm": 3.711704730987549, "learning_rate": 2.689521657770151e-05, "loss": 0.30540282726287843, "step": 170550 }, { "epoch": 0.7322497273812284, "grad_norm": 1.8933461904525757, "learning_rate": 2.6890904857583886e-05, "loss": 0.17268972396850585, "step": 170560 }, { "epoch": 0.7322926594712483, "grad_norm": 0.024054287001490593, "learning_rate": 2.688659313746626e-05, "loss": 0.2180922269821167, "step": 170570 }, { "epoch": 0.7323355915612684, "grad_norm": 0.0017070991452783346, "learning_rate": 2.6882281417348637e-05, "loss": 0.3071908473968506, "step": 170580 }, { "epoch": 0.7323785236512884, "grad_norm": 0.015459275804460049, "learning_rate": 2.6877969697231014e-05, "loss": 0.10010931491851807, "step": 170590 }, { "epoch": 0.7324214557413083, "grad_norm": 0.25705915689468384, "learning_rate": 2.687365797711339e-05, "loss": 0.20313713550567628, "step": 170600 }, { "epoch": 0.7324643878313284, "grad_norm": 1.2373805046081543, "learning_rate": 2.686934625699577e-05, "loss": 0.12128767967224122, "step": 170610 }, { "epoch": 0.7325073199213484, "grad_norm": 0.06745709478855133, "learning_rate": 2.6865034536878143e-05, "loss": 0.10555492639541626, "step": 170620 }, { "epoch": 0.7325502520113684, "grad_norm": 0.22075150907039642, "learning_rate": 2.686072281676052e-05, "loss": 0.21621780395507811, "step": 170630 }, { "epoch": 0.7325931841013884, "grad_norm": 0.013154013082385063, "learning_rate": 2.6856411096642898e-05, "loss": 0.25960729122161863, "step": 170640 }, { "epoch": 0.7326361161914084, "grad_norm": 0.006061031948775053, "learning_rate": 2.6852099376525275e-05, "loss": 0.10240201950073242, "step": 170650 }, { "epoch": 0.7326790482814285, "grad_norm": 0.2058708518743515, "learning_rate": 2.6847787656407646e-05, "loss": 0.16383315324783326, "step": 170660 }, { "epoch": 0.7327219803714484, "grad_norm": 0.01506069302558899, "learning_rate": 2.6843475936290023e-05, "loss": 0.15489013195037843, "step": 170670 }, { "epoch": 0.7327649124614685, "grad_norm": 0.002717470284551382, "learning_rate": 2.6839164216172404e-05, "loss": 0.1274445176124573, "step": 170680 }, { "epoch": 0.7328078445514885, "grad_norm": 3.37857985496521, "learning_rate": 2.683485249605478e-05, "loss": 0.3458231210708618, "step": 170690 }, { "epoch": 0.7328507766415084, "grad_norm": 0.07261750847101212, "learning_rate": 2.683054077593715e-05, "loss": 0.05073615312576294, "step": 170700 }, { "epoch": 0.7328937087315285, "grad_norm": 0.009463918395340443, "learning_rate": 2.682622905581953e-05, "loss": 0.25365068912506106, "step": 170710 }, { "epoch": 0.7329366408215485, "grad_norm": 1.966654896736145, "learning_rate": 2.6821917335701906e-05, "loss": 0.1966190218925476, "step": 170720 }, { "epoch": 0.7329795729115685, "grad_norm": 0.0020303381606936455, "learning_rate": 2.6817605615584284e-05, "loss": 0.28710646629333497, "step": 170730 }, { "epoch": 0.7330225050015885, "grad_norm": 0.01680716685950756, "learning_rate": 2.6813293895466658e-05, "loss": 0.30534236431121825, "step": 170740 }, { "epoch": 0.7330654370916085, "grad_norm": 0.021328696981072426, "learning_rate": 2.6808982175349035e-05, "loss": 0.08837099671363831, "step": 170750 }, { "epoch": 0.7331083691816285, "grad_norm": 0.027719993144273758, "learning_rate": 2.6804670455231412e-05, "loss": 0.22146189212799072, "step": 170760 }, { "epoch": 0.7331513012716485, "grad_norm": 57.73662185668945, "learning_rate": 2.680035873511379e-05, "loss": 0.2809849977493286, "step": 170770 }, { "epoch": 0.7331942333616686, "grad_norm": 0.003101927461102605, "learning_rate": 2.679604701499616e-05, "loss": 0.16726034879684448, "step": 170780 }, { "epoch": 0.7332371654516885, "grad_norm": 0.00422689039260149, "learning_rate": 2.679173529487854e-05, "loss": 0.40579957962036134, "step": 170790 }, { "epoch": 0.7332800975417085, "grad_norm": 0.05328085273504257, "learning_rate": 2.6787423574760918e-05, "loss": 0.19401125907897948, "step": 170800 }, { "epoch": 0.7333230296317286, "grad_norm": 3.178964614868164, "learning_rate": 2.6783111854643296e-05, "loss": 0.33956422805786135, "step": 170810 }, { "epoch": 0.7333659617217485, "grad_norm": 0.07042407989501953, "learning_rate": 2.6778800134525666e-05, "loss": 0.18467453718185425, "step": 170820 }, { "epoch": 0.7334088938117685, "grad_norm": 2.205892324447632, "learning_rate": 2.6774488414408043e-05, "loss": 0.14611732959747314, "step": 170830 }, { "epoch": 0.7334518259017886, "grad_norm": 0.9493621587753296, "learning_rate": 2.677017669429042e-05, "loss": 0.26489179134368895, "step": 170840 }, { "epoch": 0.7334947579918085, "grad_norm": 0.11686643213033676, "learning_rate": 2.6765864974172798e-05, "loss": 0.03403322398662567, "step": 170850 }, { "epoch": 0.7335376900818286, "grad_norm": 0.1313803642988205, "learning_rate": 2.6761553254055172e-05, "loss": 0.14360102415084838, "step": 170860 }, { "epoch": 0.7335806221718486, "grad_norm": 0.22880996763706207, "learning_rate": 2.675724153393755e-05, "loss": 0.08690401315689086, "step": 170870 }, { "epoch": 0.7336235542618685, "grad_norm": 0.1637190282344818, "learning_rate": 2.6752929813819927e-05, "loss": 0.15708295106887818, "step": 170880 }, { "epoch": 0.7336664863518886, "grad_norm": 0.02968779020011425, "learning_rate": 2.6748618093702304e-05, "loss": 0.19256193637847902, "step": 170890 }, { "epoch": 0.7337094184419086, "grad_norm": 1.3331794738769531, "learning_rate": 2.6744306373584678e-05, "loss": 0.15403072834014891, "step": 170900 }, { "epoch": 0.7337523505319286, "grad_norm": 22.366769790649414, "learning_rate": 2.6739994653467055e-05, "loss": 0.08723581433296204, "step": 170910 }, { "epoch": 0.7337952826219486, "grad_norm": 2.587179660797119, "learning_rate": 2.6735682933349433e-05, "loss": 0.16030250787734984, "step": 170920 }, { "epoch": 0.7338382147119686, "grad_norm": 0.016467789188027382, "learning_rate": 2.673137121323181e-05, "loss": 0.20831329822540284, "step": 170930 }, { "epoch": 0.7338811468019886, "grad_norm": 1.1086673736572266, "learning_rate": 2.672705949311418e-05, "loss": 0.462873649597168, "step": 170940 }, { "epoch": 0.7339240788920086, "grad_norm": 0.0017751099076122046, "learning_rate": 2.6722747772996558e-05, "loss": 0.13645445108413695, "step": 170950 }, { "epoch": 0.7339670109820287, "grad_norm": 0.8315486907958984, "learning_rate": 2.6718436052878935e-05, "loss": 0.45193896293640134, "step": 170960 }, { "epoch": 0.7340099430720486, "grad_norm": 0.0014380532084032893, "learning_rate": 2.6714124332761316e-05, "loss": 0.3336472988128662, "step": 170970 }, { "epoch": 0.7340528751620686, "grad_norm": 0.3532212972640991, "learning_rate": 2.6709812612643693e-05, "loss": 0.1193724513053894, "step": 170980 }, { "epoch": 0.7340958072520887, "grad_norm": 0.023362183943390846, "learning_rate": 2.6705500892526064e-05, "loss": 0.30927510261535646, "step": 170990 }, { "epoch": 0.7341387393421086, "grad_norm": 0.012097061611711979, "learning_rate": 2.670118917240844e-05, "loss": 0.1073559045791626, "step": 171000 }, { "epoch": 0.7341387393421086, "eval_loss": 0.39417701959609985, "eval_runtime": 27.5821, "eval_samples_per_second": 3.626, "eval_steps_per_second": 3.626, "step": 171000 }, { "epoch": 0.7341816714321286, "grad_norm": 0.09236214309930801, "learning_rate": 2.669687745229082e-05, "loss": 0.31250853538513185, "step": 171010 }, { "epoch": 0.7342246035221487, "grad_norm": 0.05382629483938217, "learning_rate": 2.6692565732173196e-05, "loss": 0.048319220542907715, "step": 171020 }, { "epoch": 0.7342675356121686, "grad_norm": 0.6148742437362671, "learning_rate": 2.668825401205557e-05, "loss": 0.0924647569656372, "step": 171030 }, { "epoch": 0.7343104677021887, "grad_norm": 1.4304864406585693, "learning_rate": 2.6683942291937947e-05, "loss": 0.20467443466186525, "step": 171040 }, { "epoch": 0.7343533997922087, "grad_norm": 1.1778323650360107, "learning_rate": 2.6679630571820325e-05, "loss": 0.5647805690765381, "step": 171050 }, { "epoch": 0.7343963318822286, "grad_norm": 0.8232007026672363, "learning_rate": 2.6675318851702702e-05, "loss": 0.09155967235565185, "step": 171060 }, { "epoch": 0.7344392639722487, "grad_norm": 0.0669042244553566, "learning_rate": 2.6671007131585073e-05, "loss": 0.1859180212020874, "step": 171070 }, { "epoch": 0.7344821960622687, "grad_norm": 0.005053384695202112, "learning_rate": 2.6666695411467453e-05, "loss": 0.12264673709869385, "step": 171080 }, { "epoch": 0.7345251281522888, "grad_norm": 0.9383001327514648, "learning_rate": 2.666238369134983e-05, "loss": 0.3935052156448364, "step": 171090 }, { "epoch": 0.7345680602423087, "grad_norm": 0.0015224060043692589, "learning_rate": 2.6658071971232208e-05, "loss": 0.21541233062744142, "step": 171100 }, { "epoch": 0.7346109923323287, "grad_norm": 0.3329278826713562, "learning_rate": 2.665376025111458e-05, "loss": 0.3047566652297974, "step": 171110 }, { "epoch": 0.7346539244223488, "grad_norm": 0.7561929225921631, "learning_rate": 2.6649448530996956e-05, "loss": 0.30191969871520996, "step": 171120 }, { "epoch": 0.7346968565123687, "grad_norm": 0.03231775388121605, "learning_rate": 2.6645136810879333e-05, "loss": 0.10164375305175781, "step": 171130 }, { "epoch": 0.7347397886023888, "grad_norm": 0.06257763504981995, "learning_rate": 2.664082509076171e-05, "loss": 0.18539004325866698, "step": 171140 }, { "epoch": 0.7347827206924088, "grad_norm": 1.2411458492279053, "learning_rate": 2.6636513370644085e-05, "loss": 0.2938519477844238, "step": 171150 }, { "epoch": 0.7348256527824287, "grad_norm": 0.11264359205961227, "learning_rate": 2.6632201650526462e-05, "loss": 0.12792333364486694, "step": 171160 }, { "epoch": 0.7348685848724488, "grad_norm": 0.006919489707797766, "learning_rate": 2.662788993040884e-05, "loss": 0.4435697555541992, "step": 171170 }, { "epoch": 0.7349115169624688, "grad_norm": 0.014986931346356869, "learning_rate": 2.6623578210291217e-05, "loss": 0.0587616503238678, "step": 171180 }, { "epoch": 0.7349544490524887, "grad_norm": 0.00439242459833622, "learning_rate": 2.661926649017359e-05, "loss": 0.12553696632385253, "step": 171190 }, { "epoch": 0.7349973811425088, "grad_norm": 0.006219713948667049, "learning_rate": 2.6614954770055968e-05, "loss": 0.5123360633850098, "step": 171200 }, { "epoch": 0.7350403132325288, "grad_norm": 0.004228357691317797, "learning_rate": 2.6610643049938345e-05, "loss": 0.05139904618263245, "step": 171210 }, { "epoch": 0.7350832453225488, "grad_norm": 2.0546998977661133, "learning_rate": 2.6606331329820723e-05, "loss": 0.10472931861877441, "step": 171220 }, { "epoch": 0.7351261774125688, "grad_norm": 0.42057308554649353, "learning_rate": 2.6602019609703093e-05, "loss": 0.1874741792678833, "step": 171230 }, { "epoch": 0.7351691095025888, "grad_norm": 2.450517177581787, "learning_rate": 2.659770788958547e-05, "loss": 0.2614566802978516, "step": 171240 }, { "epoch": 0.7352120415926088, "grad_norm": 0.06705211102962494, "learning_rate": 2.6593396169467848e-05, "loss": 0.288163161277771, "step": 171250 }, { "epoch": 0.7352549736826288, "grad_norm": 0.570608377456665, "learning_rate": 2.6589084449350225e-05, "loss": 0.1233750820159912, "step": 171260 }, { "epoch": 0.7352979057726489, "grad_norm": 2.294008493423462, "learning_rate": 2.65847727292326e-05, "loss": 0.3664525508880615, "step": 171270 }, { "epoch": 0.7353408378626688, "grad_norm": 0.11099325120449066, "learning_rate": 2.6580461009114976e-05, "loss": 0.34699676036834715, "step": 171280 }, { "epoch": 0.7353837699526888, "grad_norm": 0.1224088966846466, "learning_rate": 2.6576149288997354e-05, "loss": 0.07544822692871093, "step": 171290 }, { "epoch": 0.7354267020427089, "grad_norm": 0.09361089766025543, "learning_rate": 2.657183756887973e-05, "loss": 0.44263458251953125, "step": 171300 }, { "epoch": 0.7354696341327288, "grad_norm": 4.612122535705566, "learning_rate": 2.6567525848762105e-05, "loss": 0.26591827869415285, "step": 171310 }, { "epoch": 0.7355125662227489, "grad_norm": 0.023752661421895027, "learning_rate": 2.6563214128644482e-05, "loss": 0.1700994849205017, "step": 171320 }, { "epoch": 0.7355554983127689, "grad_norm": 0.03229743242263794, "learning_rate": 2.655890240852686e-05, "loss": 0.3263807535171509, "step": 171330 }, { "epoch": 0.7355984304027888, "grad_norm": 0.003039776347577572, "learning_rate": 2.6554590688409237e-05, "loss": 0.2008406400680542, "step": 171340 }, { "epoch": 0.7356413624928089, "grad_norm": 0.18037524819374084, "learning_rate": 2.6550278968291614e-05, "loss": 0.212782883644104, "step": 171350 }, { "epoch": 0.7356842945828289, "grad_norm": 0.010732796974480152, "learning_rate": 2.6545967248173985e-05, "loss": 0.21220669746398926, "step": 171360 }, { "epoch": 0.7357272266728488, "grad_norm": 0.00419106800109148, "learning_rate": 2.6541655528056362e-05, "loss": 0.15579986572265625, "step": 171370 }, { "epoch": 0.7357701587628689, "grad_norm": 1.4872262477874756, "learning_rate": 2.6537343807938743e-05, "loss": 0.359185528755188, "step": 171380 }, { "epoch": 0.7358130908528889, "grad_norm": 0.031842347234487534, "learning_rate": 2.653303208782112e-05, "loss": 0.14681037664413452, "step": 171390 }, { "epoch": 0.7358560229429089, "grad_norm": 0.03504836559295654, "learning_rate": 2.652872036770349e-05, "loss": 0.18587688207626343, "step": 171400 }, { "epoch": 0.7358989550329289, "grad_norm": 1.9147745370864868, "learning_rate": 2.652440864758587e-05, "loss": 0.15949387550354005, "step": 171410 }, { "epoch": 0.735941887122949, "grad_norm": 0.004897118546068668, "learning_rate": 2.6520096927468246e-05, "loss": 0.2251277208328247, "step": 171420 }, { "epoch": 0.7359848192129689, "grad_norm": 0.6692392826080322, "learning_rate": 2.6515785207350623e-05, "loss": 0.19807039499282836, "step": 171430 }, { "epoch": 0.7360277513029889, "grad_norm": 0.005262289196252823, "learning_rate": 2.6511473487232997e-05, "loss": 0.23101203441619872, "step": 171440 }, { "epoch": 0.736070683393009, "grad_norm": 1.8423153162002563, "learning_rate": 2.6507161767115374e-05, "loss": 0.38815784454345703, "step": 171450 }, { "epoch": 0.7361136154830289, "grad_norm": 0.03533385694026947, "learning_rate": 2.650285004699775e-05, "loss": 0.1562308430671692, "step": 171460 }, { "epoch": 0.7361565475730489, "grad_norm": 0.010025018826127052, "learning_rate": 2.649853832688013e-05, "loss": 0.10853803157806396, "step": 171470 }, { "epoch": 0.736199479663069, "grad_norm": 0.0056663258001208305, "learning_rate": 2.64942266067625e-05, "loss": 0.18904753923416137, "step": 171480 }, { "epoch": 0.7362424117530889, "grad_norm": 0.09215538948774338, "learning_rate": 2.648991488664488e-05, "loss": 0.2665423393249512, "step": 171490 }, { "epoch": 0.736285343843109, "grad_norm": 1.4893046617507935, "learning_rate": 2.6485603166527258e-05, "loss": 0.17237144708633423, "step": 171500 }, { "epoch": 0.736328275933129, "grad_norm": 0.1289806365966797, "learning_rate": 2.6481291446409635e-05, "loss": 0.10626275539398193, "step": 171510 }, { "epoch": 0.736371208023149, "grad_norm": 0.02575540356338024, "learning_rate": 2.6476979726292006e-05, "loss": 0.1492064118385315, "step": 171520 }, { "epoch": 0.736414140113169, "grad_norm": 2.737734079360962, "learning_rate": 2.6472668006174383e-05, "loss": 0.3282522439956665, "step": 171530 }, { "epoch": 0.736457072203189, "grad_norm": 0.1628538817167282, "learning_rate": 2.646835628605676e-05, "loss": 0.011172150075435639, "step": 171540 }, { "epoch": 0.736500004293209, "grad_norm": 0.026045171543955803, "learning_rate": 2.6464044565939138e-05, "loss": 0.2815445899963379, "step": 171550 }, { "epoch": 0.736542936383229, "grad_norm": 5.577436923980713, "learning_rate": 2.645973284582151e-05, "loss": 0.16396323442459107, "step": 171560 }, { "epoch": 0.736585868473249, "grad_norm": 1.4694331884384155, "learning_rate": 2.645542112570389e-05, "loss": 0.10221610069274903, "step": 171570 }, { "epoch": 0.7366288005632691, "grad_norm": 2.34641432762146, "learning_rate": 2.6451109405586266e-05, "loss": 0.18182029724121093, "step": 171580 }, { "epoch": 0.736671732653289, "grad_norm": 0.022412387654185295, "learning_rate": 2.6446797685468644e-05, "loss": 0.26555168628692627, "step": 171590 }, { "epoch": 0.736714664743309, "grad_norm": 0.12042602151632309, "learning_rate": 2.6442485965351018e-05, "loss": 0.32265405654907225, "step": 171600 }, { "epoch": 0.7367575968333291, "grad_norm": 0.16809596121311188, "learning_rate": 2.6438174245233395e-05, "loss": 0.1988927125930786, "step": 171610 }, { "epoch": 0.736800528923349, "grad_norm": 0.004430527798831463, "learning_rate": 2.6433862525115772e-05, "loss": 0.32211244106292725, "step": 171620 }, { "epoch": 0.7368434610133691, "grad_norm": 0.04840118810534477, "learning_rate": 2.642955080499815e-05, "loss": 0.10191557407379151, "step": 171630 }, { "epoch": 0.7368863931033891, "grad_norm": 0.8072931170463562, "learning_rate": 2.642523908488052e-05, "loss": 0.24994516372680664, "step": 171640 }, { "epoch": 0.736929325193409, "grad_norm": 0.7125005722045898, "learning_rate": 2.6420927364762897e-05, "loss": 0.04099064767360687, "step": 171650 }, { "epoch": 0.7369722572834291, "grad_norm": 0.011734679341316223, "learning_rate": 2.6416615644645275e-05, "loss": 0.15249698162078856, "step": 171660 }, { "epoch": 0.7370151893734491, "grad_norm": 2.276210069656372, "learning_rate": 2.6412303924527652e-05, "loss": 0.25595483779907224, "step": 171670 }, { "epoch": 0.737058121463469, "grad_norm": 0.00881089735776186, "learning_rate": 2.6407992204410026e-05, "loss": 0.4060102939605713, "step": 171680 }, { "epoch": 0.7371010535534891, "grad_norm": 4.0960774421691895, "learning_rate": 2.6403680484292403e-05, "loss": 0.28471989631652833, "step": 171690 }, { "epoch": 0.7371439856435091, "grad_norm": 1.0635778903961182, "learning_rate": 2.639936876417478e-05, "loss": 0.30349667072296144, "step": 171700 }, { "epoch": 0.7371869177335291, "grad_norm": 2.140429973602295, "learning_rate": 2.6395057044057158e-05, "loss": 0.22982544898986818, "step": 171710 }, { "epoch": 0.7372298498235491, "grad_norm": 3.2402894496917725, "learning_rate": 2.6390745323939536e-05, "loss": 0.32609105110168457, "step": 171720 }, { "epoch": 0.7372727819135692, "grad_norm": 1.8764134645462036, "learning_rate": 2.638643360382191e-05, "loss": 0.28361532688140867, "step": 171730 }, { "epoch": 0.7373157140035891, "grad_norm": 0.014295602217316628, "learning_rate": 2.6382121883704287e-05, "loss": 0.1151078224182129, "step": 171740 }, { "epoch": 0.7373586460936091, "grad_norm": 0.0026767903473228216, "learning_rate": 2.6377810163586664e-05, "loss": 0.2339120626449585, "step": 171750 }, { "epoch": 0.7374015781836292, "grad_norm": 0.02163386344909668, "learning_rate": 2.637349844346904e-05, "loss": 0.19625715017318726, "step": 171760 }, { "epoch": 0.7374445102736491, "grad_norm": 0.07742445170879364, "learning_rate": 2.6369186723351412e-05, "loss": 0.05881250500679016, "step": 171770 }, { "epoch": 0.7374874423636691, "grad_norm": 1.3166648149490356, "learning_rate": 2.636487500323379e-05, "loss": 0.28442997932434083, "step": 171780 }, { "epoch": 0.7375303744536892, "grad_norm": 0.334449827671051, "learning_rate": 2.636056328311617e-05, "loss": 0.19606301784515381, "step": 171790 }, { "epoch": 0.7375733065437091, "grad_norm": 0.011855477467179298, "learning_rate": 2.6356251562998547e-05, "loss": 0.13084664344787597, "step": 171800 }, { "epoch": 0.7376162386337292, "grad_norm": 10.077703475952148, "learning_rate": 2.6351939842880918e-05, "loss": 0.3036245584487915, "step": 171810 }, { "epoch": 0.7376591707237492, "grad_norm": 1.4328504800796509, "learning_rate": 2.6347628122763295e-05, "loss": 0.23132739067077637, "step": 171820 }, { "epoch": 0.7377021028137691, "grad_norm": 0.11290911585092545, "learning_rate": 2.6343316402645673e-05, "loss": 0.15605651140213012, "step": 171830 }, { "epoch": 0.7377450349037892, "grad_norm": 0.06027461960911751, "learning_rate": 2.633900468252805e-05, "loss": 0.12230217456817627, "step": 171840 }, { "epoch": 0.7377879669938092, "grad_norm": 2.58107328414917, "learning_rate": 2.6334692962410424e-05, "loss": 0.10988858938217164, "step": 171850 }, { "epoch": 0.7378308990838292, "grad_norm": 1.151477575302124, "learning_rate": 2.63303812422928e-05, "loss": 0.3149649381637573, "step": 171860 }, { "epoch": 0.7378738311738492, "grad_norm": 1.9065663814544678, "learning_rate": 2.632606952217518e-05, "loss": 0.3236576318740845, "step": 171870 }, { "epoch": 0.7379167632638692, "grad_norm": 1.5041933059692383, "learning_rate": 2.6321757802057556e-05, "loss": 0.2537533283233643, "step": 171880 }, { "epoch": 0.7379596953538892, "grad_norm": 0.03302591294050217, "learning_rate": 2.6317446081939927e-05, "loss": 0.05842955112457275, "step": 171890 }, { "epoch": 0.7380026274439092, "grad_norm": 1.3000556230545044, "learning_rate": 2.6313134361822307e-05, "loss": 0.472438907623291, "step": 171900 }, { "epoch": 0.7380455595339293, "grad_norm": 0.2630452513694763, "learning_rate": 2.6308822641704685e-05, "loss": 0.1062608003616333, "step": 171910 }, { "epoch": 0.7380884916239492, "grad_norm": 2.648360013961792, "learning_rate": 2.6304510921587062e-05, "loss": 0.10505330562591553, "step": 171920 }, { "epoch": 0.7381314237139692, "grad_norm": 0.03722335025668144, "learning_rate": 2.6300199201469433e-05, "loss": 0.14387372732162476, "step": 171930 }, { "epoch": 0.7381743558039893, "grad_norm": 0.010214372538030148, "learning_rate": 2.629588748135181e-05, "loss": 0.052946603298187254, "step": 171940 }, { "epoch": 0.7382172878940093, "grad_norm": 0.016190167516469955, "learning_rate": 2.6291575761234187e-05, "loss": 0.11722316741943359, "step": 171950 }, { "epoch": 0.7382602199840292, "grad_norm": 0.11385954916477203, "learning_rate": 2.6287264041116565e-05, "loss": 0.4546807289123535, "step": 171960 }, { "epoch": 0.7383031520740493, "grad_norm": 0.030435390770435333, "learning_rate": 2.628295232099894e-05, "loss": 0.23353421688079834, "step": 171970 }, { "epoch": 0.7383460841640693, "grad_norm": 4.89837121963501, "learning_rate": 2.6278640600881316e-05, "loss": 0.15024209022521973, "step": 171980 }, { "epoch": 0.7383890162540893, "grad_norm": 0.031725313514471054, "learning_rate": 2.6274328880763693e-05, "loss": 0.3565608024597168, "step": 171990 }, { "epoch": 0.7384319483441093, "grad_norm": 0.5686355829238892, "learning_rate": 2.627001716064607e-05, "loss": 0.2020179271697998, "step": 172000 }, { "epoch": 0.7384319483441093, "eval_loss": 0.3913794457912445, "eval_runtime": 27.4732, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 172000 }, { "epoch": 0.7384748804341293, "grad_norm": 1.6541748046875, "learning_rate": 2.6265705440528445e-05, "loss": 0.05306849479675293, "step": 172010 }, { "epoch": 0.7385178125241493, "grad_norm": 5.37670373916626, "learning_rate": 2.6261393720410822e-05, "loss": 0.21729233264923095, "step": 172020 }, { "epoch": 0.7385607446141693, "grad_norm": 1.403529167175293, "learning_rate": 2.62570820002932e-05, "loss": 0.12173585891723633, "step": 172030 }, { "epoch": 0.7386036767041894, "grad_norm": 0.32816994190216064, "learning_rate": 2.6252770280175577e-05, "loss": 0.3592313289642334, "step": 172040 }, { "epoch": 0.7386466087942093, "grad_norm": 0.02111462503671646, "learning_rate": 2.6248458560057947e-05, "loss": 0.36513347625732423, "step": 172050 }, { "epoch": 0.7386895408842293, "grad_norm": 7.12018346786499, "learning_rate": 2.6244146839940325e-05, "loss": 0.2642231464385986, "step": 172060 }, { "epoch": 0.7387324729742494, "grad_norm": 0.02392994984984398, "learning_rate": 2.6239835119822702e-05, "loss": 0.2387470483779907, "step": 172070 }, { "epoch": 0.7387754050642693, "grad_norm": 0.04821072146296501, "learning_rate": 2.6235523399705083e-05, "loss": 0.24384143352508544, "step": 172080 }, { "epoch": 0.7388183371542894, "grad_norm": 0.27216729521751404, "learning_rate": 2.623121167958746e-05, "loss": 0.19975589513778685, "step": 172090 }, { "epoch": 0.7388612692443094, "grad_norm": 0.005782154388725758, "learning_rate": 2.622689995946983e-05, "loss": 0.21162703037261962, "step": 172100 }, { "epoch": 0.7389042013343293, "grad_norm": 0.018965255469083786, "learning_rate": 2.6222588239352208e-05, "loss": 0.21590452194213866, "step": 172110 }, { "epoch": 0.7389471334243494, "grad_norm": 0.0020858191419392824, "learning_rate": 2.6218276519234585e-05, "loss": 0.25055460929870604, "step": 172120 }, { "epoch": 0.7389900655143694, "grad_norm": 1.3369940519332886, "learning_rate": 2.6213964799116963e-05, "loss": 0.14633073806762695, "step": 172130 }, { "epoch": 0.7390329976043893, "grad_norm": 0.527765691280365, "learning_rate": 2.6209653078999336e-05, "loss": 0.2503513813018799, "step": 172140 }, { "epoch": 0.7390759296944094, "grad_norm": 1.8898754119873047, "learning_rate": 2.6205341358881714e-05, "loss": 0.23500373363494872, "step": 172150 }, { "epoch": 0.7391188617844294, "grad_norm": 0.10808947682380676, "learning_rate": 2.620102963876409e-05, "loss": 0.23570013046264648, "step": 172160 }, { "epoch": 0.7391617938744494, "grad_norm": 3.1415834426879883, "learning_rate": 2.619671791864647e-05, "loss": 0.13939026594161988, "step": 172170 }, { "epoch": 0.7392047259644694, "grad_norm": 4.802440166473389, "learning_rate": 2.619240619852884e-05, "loss": 0.150770902633667, "step": 172180 }, { "epoch": 0.7392476580544894, "grad_norm": 0.0116340983659029, "learning_rate": 2.618809447841122e-05, "loss": 0.4175867557525635, "step": 172190 }, { "epoch": 0.7392905901445094, "grad_norm": 1.3842447996139526, "learning_rate": 2.6183782758293597e-05, "loss": 0.05002044439315796, "step": 172200 }, { "epoch": 0.7393335222345294, "grad_norm": 1.6711241006851196, "learning_rate": 2.6179471038175974e-05, "loss": 0.23692212104797364, "step": 172210 }, { "epoch": 0.7393764543245495, "grad_norm": 1.484615683555603, "learning_rate": 2.6175159318058345e-05, "loss": 0.22095875740051268, "step": 172220 }, { "epoch": 0.7394193864145694, "grad_norm": 4.88292121887207, "learning_rate": 2.6170847597940722e-05, "loss": 0.21521258354187012, "step": 172230 }, { "epoch": 0.7394623185045894, "grad_norm": 0.0060545094311237335, "learning_rate": 2.61665358778231e-05, "loss": 0.2710393190383911, "step": 172240 }, { "epoch": 0.7395052505946095, "grad_norm": 0.011293796822428703, "learning_rate": 2.6162224157705477e-05, "loss": 0.08580965995788574, "step": 172250 }, { "epoch": 0.7395481826846294, "grad_norm": 5.334449768066406, "learning_rate": 2.615791243758785e-05, "loss": 0.3057300329208374, "step": 172260 }, { "epoch": 0.7395911147746494, "grad_norm": 20.33785629272461, "learning_rate": 2.615360071747023e-05, "loss": 0.23020739555358888, "step": 172270 }, { "epoch": 0.7396340468646695, "grad_norm": 0.0005580539582297206, "learning_rate": 2.6149288997352606e-05, "loss": 0.03917438983917236, "step": 172280 }, { "epoch": 0.7396769789546894, "grad_norm": 2.250082015991211, "learning_rate": 2.6144977277234983e-05, "loss": 0.3027928352355957, "step": 172290 }, { "epoch": 0.7397199110447095, "grad_norm": 0.008189544081687927, "learning_rate": 2.6140665557117354e-05, "loss": 0.14146639108657838, "step": 172300 }, { "epoch": 0.7397628431347295, "grad_norm": 15.595314979553223, "learning_rate": 2.6136353836999734e-05, "loss": 0.3388784885406494, "step": 172310 }, { "epoch": 0.7398057752247494, "grad_norm": 0.15983085334300995, "learning_rate": 2.6132042116882112e-05, "loss": 0.1603078603744507, "step": 172320 }, { "epoch": 0.7398487073147695, "grad_norm": 0.9329927563667297, "learning_rate": 2.612773039676449e-05, "loss": 0.13090095520019532, "step": 172330 }, { "epoch": 0.7398916394047895, "grad_norm": 0.0009728687582537532, "learning_rate": 2.612341867664686e-05, "loss": 0.1858871340751648, "step": 172340 }, { "epoch": 0.7399345714948095, "grad_norm": 1.6413109302520752, "learning_rate": 2.6119106956529237e-05, "loss": 0.29552493095397947, "step": 172350 }, { "epoch": 0.7399775035848295, "grad_norm": 1.6135348081588745, "learning_rate": 2.6114795236411614e-05, "loss": 0.1183046579360962, "step": 172360 }, { "epoch": 0.7400204356748495, "grad_norm": 0.00031716900411993265, "learning_rate": 2.611048351629399e-05, "loss": 0.1578521728515625, "step": 172370 }, { "epoch": 0.7400633677648696, "grad_norm": 0.02061157487332821, "learning_rate": 2.6106171796176366e-05, "loss": 0.001564457081258297, "step": 172380 }, { "epoch": 0.7401062998548895, "grad_norm": 2.053553342819214, "learning_rate": 2.6101860076058743e-05, "loss": 0.25558698177337646, "step": 172390 }, { "epoch": 0.7401492319449096, "grad_norm": 0.22899110615253448, "learning_rate": 2.609754835594112e-05, "loss": 0.2048067569732666, "step": 172400 }, { "epoch": 0.7401921640349296, "grad_norm": 3.2389774322509766, "learning_rate": 2.6093236635823498e-05, "loss": 0.16423354148864747, "step": 172410 }, { "epoch": 0.7402350961249495, "grad_norm": 7.045860290527344, "learning_rate": 2.6088924915705875e-05, "loss": 0.3069408893585205, "step": 172420 }, { "epoch": 0.7402780282149696, "grad_norm": 0.00262209540233016, "learning_rate": 2.608461319558825e-05, "loss": 0.09878330826759338, "step": 172430 }, { "epoch": 0.7403209603049896, "grad_norm": 0.008975930511951447, "learning_rate": 2.6080301475470626e-05, "loss": 0.16282857656478883, "step": 172440 }, { "epoch": 0.7403638923950095, "grad_norm": 0.006928627844899893, "learning_rate": 2.6075989755353004e-05, "loss": 0.060286080837249754, "step": 172450 }, { "epoch": 0.7404068244850296, "grad_norm": 0.003378543769940734, "learning_rate": 2.607167803523538e-05, "loss": 0.07941926717758178, "step": 172460 }, { "epoch": 0.7404497565750496, "grad_norm": 4.774534225463867, "learning_rate": 2.606736631511775e-05, "loss": 0.29351153373718264, "step": 172470 }, { "epoch": 0.7404926886650696, "grad_norm": 0.4744148254394531, "learning_rate": 2.606305459500013e-05, "loss": 0.09133726954460145, "step": 172480 }, { "epoch": 0.7405356207550896, "grad_norm": 6.601369857788086, "learning_rate": 2.605874287488251e-05, "loss": 0.190312922000885, "step": 172490 }, { "epoch": 0.7405785528451096, "grad_norm": 14.058046340942383, "learning_rate": 2.6054431154764887e-05, "loss": 0.3220758199691772, "step": 172500 }, { "epoch": 0.7406214849351296, "grad_norm": 0.09356804192066193, "learning_rate": 2.6050119434647258e-05, "loss": 0.3615183591842651, "step": 172510 }, { "epoch": 0.7406644170251496, "grad_norm": 0.052794020622968674, "learning_rate": 2.6045807714529635e-05, "loss": 0.10844634771347046, "step": 172520 }, { "epoch": 0.7407073491151697, "grad_norm": 0.1476939171552658, "learning_rate": 2.6041495994412012e-05, "loss": 0.19965894222259523, "step": 172530 }, { "epoch": 0.7407502812051896, "grad_norm": 2.120439052581787, "learning_rate": 2.603718427429439e-05, "loss": 0.19153574705123902, "step": 172540 }, { "epoch": 0.7407932132952096, "grad_norm": 0.0483546257019043, "learning_rate": 2.6032872554176763e-05, "loss": 0.28668229579925536, "step": 172550 }, { "epoch": 0.7408361453852297, "grad_norm": 0.01480165682733059, "learning_rate": 2.602856083405914e-05, "loss": 0.2542074918746948, "step": 172560 }, { "epoch": 0.7408790774752496, "grad_norm": 0.2001974880695343, "learning_rate": 2.6024249113941518e-05, "loss": 0.0713961124420166, "step": 172570 }, { "epoch": 0.7409220095652697, "grad_norm": 1.2386269569396973, "learning_rate": 2.6019937393823896e-05, "loss": 0.18896110057830812, "step": 172580 }, { "epoch": 0.7409649416552897, "grad_norm": 0.0029969087336212397, "learning_rate": 2.6015625673706266e-05, "loss": 0.10486527681350707, "step": 172590 }, { "epoch": 0.7410078737453096, "grad_norm": 0.015220149420201778, "learning_rate": 2.6011313953588647e-05, "loss": 0.3754743576049805, "step": 172600 }, { "epoch": 0.7410508058353297, "grad_norm": 4.762171268463135, "learning_rate": 2.6007002233471024e-05, "loss": 0.26454839706420896, "step": 172610 }, { "epoch": 0.7410937379253497, "grad_norm": 1.5890958309173584, "learning_rate": 2.60026905133534e-05, "loss": 0.16944000720977784, "step": 172620 }, { "epoch": 0.7411366700153696, "grad_norm": 0.0030514898244291544, "learning_rate": 2.5998378793235772e-05, "loss": 0.2463892698287964, "step": 172630 }, { "epoch": 0.7411796021053897, "grad_norm": 0.13802503049373627, "learning_rate": 2.599406707311815e-05, "loss": 0.32329416275024414, "step": 172640 }, { "epoch": 0.7412225341954097, "grad_norm": 0.013265220448374748, "learning_rate": 2.5989755353000527e-05, "loss": 0.28892490863800047, "step": 172650 }, { "epoch": 0.7412654662854297, "grad_norm": 0.48437264561653137, "learning_rate": 2.5985443632882904e-05, "loss": 0.2645559549331665, "step": 172660 }, { "epoch": 0.7413083983754497, "grad_norm": 0.04876817762851715, "learning_rate": 2.5981131912765278e-05, "loss": 0.24079082012176514, "step": 172670 }, { "epoch": 0.7413513304654697, "grad_norm": 0.001597659313119948, "learning_rate": 2.5976820192647655e-05, "loss": 0.22210347652435303, "step": 172680 }, { "epoch": 0.7413942625554897, "grad_norm": 0.1181657686829567, "learning_rate": 2.5972508472530033e-05, "loss": 0.14718619585037232, "step": 172690 }, { "epoch": 0.7414371946455097, "grad_norm": 1.3400815725326538, "learning_rate": 2.596819675241241e-05, "loss": 0.16132137775421143, "step": 172700 }, { "epoch": 0.7414801267355298, "grad_norm": 0.009908415377140045, "learning_rate": 2.5963885032294784e-05, "loss": 0.08111634254455566, "step": 172710 }, { "epoch": 0.7415230588255497, "grad_norm": 0.3089774250984192, "learning_rate": 2.595957331217716e-05, "loss": 0.14888973236083985, "step": 172720 }, { "epoch": 0.7415659909155697, "grad_norm": 0.2667434513568878, "learning_rate": 2.595526159205954e-05, "loss": 0.06582852005958557, "step": 172730 }, { "epoch": 0.7416089230055898, "grad_norm": 5.848819732666016, "learning_rate": 2.5950949871941916e-05, "loss": 0.08641493320465088, "step": 172740 }, { "epoch": 0.7416518550956097, "grad_norm": 0.0031941940542310476, "learning_rate": 2.5946638151824287e-05, "loss": 0.09354096055030822, "step": 172750 }, { "epoch": 0.7416947871856298, "grad_norm": 8.418753623962402, "learning_rate": 2.5942326431706664e-05, "loss": 0.15573036670684814, "step": 172760 }, { "epoch": 0.7417377192756498, "grad_norm": 0.03718193247914314, "learning_rate": 2.593801471158904e-05, "loss": 0.017639188468456267, "step": 172770 }, { "epoch": 0.7417806513656697, "grad_norm": 0.3647063672542572, "learning_rate": 2.593370299147142e-05, "loss": 0.30183801651000974, "step": 172780 }, { "epoch": 0.7418235834556898, "grad_norm": 0.01387582067400217, "learning_rate": 2.59293912713538e-05, "loss": 0.3928518533706665, "step": 172790 }, { "epoch": 0.7418665155457098, "grad_norm": 1.8043218851089478, "learning_rate": 2.592507955123617e-05, "loss": 0.13272377252578735, "step": 172800 }, { "epoch": 0.7419094476357299, "grad_norm": 0.012643926776945591, "learning_rate": 2.5920767831118547e-05, "loss": 0.2803585290908813, "step": 172810 }, { "epoch": 0.7419523797257498, "grad_norm": 0.015432733111083508, "learning_rate": 2.5916456111000925e-05, "loss": 0.10544899702072144, "step": 172820 }, { "epoch": 0.7419953118157698, "grad_norm": 0.8467946648597717, "learning_rate": 2.5912144390883302e-05, "loss": 0.37466087341308596, "step": 172830 }, { "epoch": 0.7420382439057899, "grad_norm": 0.026423417031764984, "learning_rate": 2.5907832670765676e-05, "loss": 0.11121004819869995, "step": 172840 }, { "epoch": 0.7420811759958098, "grad_norm": 2.762631416320801, "learning_rate": 2.5903520950648053e-05, "loss": 0.3465926170349121, "step": 172850 }, { "epoch": 0.7421241080858298, "grad_norm": 0.00443359324708581, "learning_rate": 2.589920923053043e-05, "loss": 0.3730604648590088, "step": 172860 }, { "epoch": 0.7421670401758499, "grad_norm": 0.0019174201879650354, "learning_rate": 2.5894897510412808e-05, "loss": 0.025777462124824523, "step": 172870 }, { "epoch": 0.7422099722658698, "grad_norm": 0.011310220696032047, "learning_rate": 2.589058579029518e-05, "loss": 0.2822575092315674, "step": 172880 }, { "epoch": 0.7422529043558899, "grad_norm": 0.02572781592607498, "learning_rate": 2.5886274070177556e-05, "loss": 0.1161012053489685, "step": 172890 }, { "epoch": 0.7422958364459099, "grad_norm": 21.611812591552734, "learning_rate": 2.5881962350059937e-05, "loss": 0.3096113681793213, "step": 172900 }, { "epoch": 0.7423387685359298, "grad_norm": 0.0002522652503103018, "learning_rate": 2.5877650629942314e-05, "loss": 0.3812889814376831, "step": 172910 }, { "epoch": 0.7423817006259499, "grad_norm": 1.2188701629638672, "learning_rate": 2.5873338909824685e-05, "loss": 0.25791704654693604, "step": 172920 }, { "epoch": 0.7424246327159699, "grad_norm": 0.6817674040794373, "learning_rate": 2.5869027189707062e-05, "loss": 0.24171240329742433, "step": 172930 }, { "epoch": 0.7424675648059899, "grad_norm": 0.0008015789790078998, "learning_rate": 2.586471546958944e-05, "loss": 0.061844897270202634, "step": 172940 }, { "epoch": 0.7425104968960099, "grad_norm": 4.590038776397705, "learning_rate": 2.5860403749471817e-05, "loss": 0.3130334377288818, "step": 172950 }, { "epoch": 0.7425534289860299, "grad_norm": 0.005684803705662489, "learning_rate": 2.585609202935419e-05, "loss": 0.11198042631149292, "step": 172960 }, { "epoch": 0.7425963610760499, "grad_norm": 0.6107711791992188, "learning_rate": 2.5851780309236568e-05, "loss": 0.31751389503479005, "step": 172970 }, { "epoch": 0.7426392931660699, "grad_norm": 0.00203691772185266, "learning_rate": 2.5847468589118945e-05, "loss": 0.14549793004989625, "step": 172980 }, { "epoch": 0.74268222525609, "grad_norm": 0.01611451804637909, "learning_rate": 2.5843156869001323e-05, "loss": 0.19413976669311522, "step": 172990 }, { "epoch": 0.7427251573461099, "grad_norm": 0.01514244545251131, "learning_rate": 2.5838845148883693e-05, "loss": 0.06386711597442626, "step": 173000 }, { "epoch": 0.7427251573461099, "eval_loss": 0.38922441005706787, "eval_runtime": 27.6049, "eval_samples_per_second": 3.623, "eval_steps_per_second": 3.623, "step": 173000 }, { "epoch": 0.7427680894361299, "grad_norm": 0.009602725505828857, "learning_rate": 2.5834533428766074e-05, "loss": 0.034127888083457944, "step": 173010 }, { "epoch": 0.74281102152615, "grad_norm": 1.5638679265975952, "learning_rate": 2.583022170864845e-05, "loss": 0.14553526639938355, "step": 173020 }, { "epoch": 0.7428539536161699, "grad_norm": 0.42116615176200867, "learning_rate": 2.582590998853083e-05, "loss": 0.3827761888504028, "step": 173030 }, { "epoch": 0.74289688570619, "grad_norm": 0.009471829980611801, "learning_rate": 2.58215982684132e-05, "loss": 0.13673275709152222, "step": 173040 }, { "epoch": 0.74293981779621, "grad_norm": 8.371386528015137, "learning_rate": 2.5817286548295576e-05, "loss": 0.36310031414031985, "step": 173050 }, { "epoch": 0.7429827498862299, "grad_norm": 1.1578912734985352, "learning_rate": 2.5812974828177954e-05, "loss": 0.18568694591522217, "step": 173060 }, { "epoch": 0.74302568197625, "grad_norm": 0.0743391215801239, "learning_rate": 2.580866310806033e-05, "loss": 0.16277066469192505, "step": 173070 }, { "epoch": 0.74306861406627, "grad_norm": 0.011781970970332623, "learning_rate": 2.5804351387942705e-05, "loss": 0.24296224117279053, "step": 173080 }, { "epoch": 0.7431115461562899, "grad_norm": 2.4936790466308594, "learning_rate": 2.5800039667825082e-05, "loss": 0.22644639015197754, "step": 173090 }, { "epoch": 0.74315447824631, "grad_norm": 0.39981764554977417, "learning_rate": 2.579572794770746e-05, "loss": 0.28062996864318845, "step": 173100 }, { "epoch": 0.74319741033633, "grad_norm": 5.6728644371032715, "learning_rate": 2.5791416227589837e-05, "loss": 0.3589806079864502, "step": 173110 }, { "epoch": 0.74324034242635, "grad_norm": 0.006206681486219168, "learning_rate": 2.578710450747221e-05, "loss": 0.11007034778594971, "step": 173120 }, { "epoch": 0.74328327451637, "grad_norm": 0.005777155049145222, "learning_rate": 2.578279278735459e-05, "loss": 0.04013732373714447, "step": 173130 }, { "epoch": 0.74332620660639, "grad_norm": 2.492429256439209, "learning_rate": 2.5778481067236966e-05, "loss": 0.2559037208557129, "step": 173140 }, { "epoch": 0.74336913869641, "grad_norm": 2.0569210052490234, "learning_rate": 2.5774169347119343e-05, "loss": 0.1842200756072998, "step": 173150 }, { "epoch": 0.74341207078643, "grad_norm": 0.05245661735534668, "learning_rate": 2.576985762700172e-05, "loss": 0.11454921960830688, "step": 173160 }, { "epoch": 0.74345500287645, "grad_norm": 0.0004969439469277859, "learning_rate": 2.576554590688409e-05, "loss": 0.36214404106140136, "step": 173170 }, { "epoch": 0.74349793496647, "grad_norm": 0.011017000302672386, "learning_rate": 2.576123418676647e-05, "loss": 0.34534130096435545, "step": 173180 }, { "epoch": 0.74354086705649, "grad_norm": 0.01024332270026207, "learning_rate": 2.5756922466648846e-05, "loss": 0.14188783168792723, "step": 173190 }, { "epoch": 0.7435837991465101, "grad_norm": 0.053222961723804474, "learning_rate": 2.5752610746531226e-05, "loss": 0.15369497537612914, "step": 173200 }, { "epoch": 0.74362673123653, "grad_norm": 1.4653114080429077, "learning_rate": 2.5748299026413597e-05, "loss": 0.2768231391906738, "step": 173210 }, { "epoch": 0.74366966332655, "grad_norm": 1.0309170484542847, "learning_rate": 2.5743987306295974e-05, "loss": 0.3903130292892456, "step": 173220 }, { "epoch": 0.7437125954165701, "grad_norm": 0.03295344114303589, "learning_rate": 2.573967558617835e-05, "loss": 0.09203203916549682, "step": 173230 }, { "epoch": 0.7437555275065901, "grad_norm": 1.7051453590393066, "learning_rate": 2.573536386606073e-05, "loss": 0.04676951467990875, "step": 173240 }, { "epoch": 0.7437984595966101, "grad_norm": 0.07142875343561172, "learning_rate": 2.5731052145943103e-05, "loss": 0.4174853801727295, "step": 173250 }, { "epoch": 0.7438413916866301, "grad_norm": 0.9815771579742432, "learning_rate": 2.572674042582548e-05, "loss": 0.12617738246917726, "step": 173260 }, { "epoch": 0.7438843237766501, "grad_norm": 0.4857109785079956, "learning_rate": 2.5722428705707858e-05, "loss": 0.11668617725372314, "step": 173270 }, { "epoch": 0.7439272558666701, "grad_norm": 0.07516008615493774, "learning_rate": 2.5718116985590235e-05, "loss": 0.04436193406581879, "step": 173280 }, { "epoch": 0.7439701879566901, "grad_norm": 0.07428845763206482, "learning_rate": 2.5713805265472606e-05, "loss": 0.03692366778850555, "step": 173290 }, { "epoch": 0.7440131200467102, "grad_norm": 0.0036778750363737345, "learning_rate": 2.5709493545354983e-05, "loss": 0.07944020628929138, "step": 173300 }, { "epoch": 0.7440560521367301, "grad_norm": 10.471997261047363, "learning_rate": 2.5705181825237364e-05, "loss": 0.2046130657196045, "step": 173310 }, { "epoch": 0.7440989842267501, "grad_norm": 0.05514159053564072, "learning_rate": 2.570087010511974e-05, "loss": 0.08784713745117187, "step": 173320 }, { "epoch": 0.7441419163167702, "grad_norm": 0.3847615420818329, "learning_rate": 2.569655838500211e-05, "loss": 0.163929283618927, "step": 173330 }, { "epoch": 0.7441848484067901, "grad_norm": 0.041439611464738846, "learning_rate": 2.569224666488449e-05, "loss": 0.08686239123344422, "step": 173340 }, { "epoch": 0.7442277804968102, "grad_norm": 1.8955515623092651, "learning_rate": 2.5687934944766866e-05, "loss": 0.14141509532928467, "step": 173350 }, { "epoch": 0.7442707125868302, "grad_norm": 2.777130603790283, "learning_rate": 2.5683623224649244e-05, "loss": 0.11059495210647582, "step": 173360 }, { "epoch": 0.7443136446768501, "grad_norm": 0.016122760251164436, "learning_rate": 2.5679311504531618e-05, "loss": 0.17072702646255494, "step": 173370 }, { "epoch": 0.7443565767668702, "grad_norm": 1.575832724571228, "learning_rate": 2.5674999784413995e-05, "loss": 0.31911892890930177, "step": 173380 }, { "epoch": 0.7443995088568902, "grad_norm": 1.009772777557373, "learning_rate": 2.5670688064296372e-05, "loss": 0.16791296005249023, "step": 173390 }, { "epoch": 0.7444424409469101, "grad_norm": 2.09256911277771, "learning_rate": 2.566637634417875e-05, "loss": 0.1358073353767395, "step": 173400 }, { "epoch": 0.7444853730369302, "grad_norm": 0.003986488562077284, "learning_rate": 2.566206462406112e-05, "loss": 0.30874583721160886, "step": 173410 }, { "epoch": 0.7445283051269502, "grad_norm": 0.0005260541802272201, "learning_rate": 2.56577529039435e-05, "loss": 0.12634888887405396, "step": 173420 }, { "epoch": 0.7445712372169702, "grad_norm": 0.09744524955749512, "learning_rate": 2.5653441183825878e-05, "loss": 0.1951184868812561, "step": 173430 }, { "epoch": 0.7446141693069902, "grad_norm": 2.7815160751342773, "learning_rate": 2.5649129463708256e-05, "loss": 0.13784483671188355, "step": 173440 }, { "epoch": 0.7446571013970102, "grad_norm": 1.0305331945419312, "learning_rate": 2.5644817743590626e-05, "loss": 0.30634360313415526, "step": 173450 }, { "epoch": 0.7447000334870302, "grad_norm": 2.2558400630950928, "learning_rate": 2.5640506023473003e-05, "loss": 0.06751551032066345, "step": 173460 }, { "epoch": 0.7447429655770502, "grad_norm": 1.7618029117584229, "learning_rate": 2.563619430335538e-05, "loss": 0.16229534149169922, "step": 173470 }, { "epoch": 0.7447858976670703, "grad_norm": 0.021817678585648537, "learning_rate": 2.5631882583237758e-05, "loss": 0.22537617683410643, "step": 173480 }, { "epoch": 0.7448288297570902, "grad_norm": 0.02080300636589527, "learning_rate": 2.5627570863120132e-05, "loss": 0.2169330358505249, "step": 173490 }, { "epoch": 0.7448717618471102, "grad_norm": 0.438027560710907, "learning_rate": 2.562325914300251e-05, "loss": 0.20829992294311522, "step": 173500 }, { "epoch": 0.7449146939371303, "grad_norm": 0.0014316404704004526, "learning_rate": 2.5618947422884887e-05, "loss": 0.045699626207351685, "step": 173510 }, { "epoch": 0.7449576260271502, "grad_norm": 0.029168443754315376, "learning_rate": 2.5614635702767264e-05, "loss": 0.22984709739685058, "step": 173520 }, { "epoch": 0.7450005581171703, "grad_norm": 0.024142567068338394, "learning_rate": 2.561032398264964e-05, "loss": 0.23821489810943602, "step": 173530 }, { "epoch": 0.7450434902071903, "grad_norm": 0.07180153578519821, "learning_rate": 2.5606012262532015e-05, "loss": 0.24222948551177978, "step": 173540 }, { "epoch": 0.7450864222972102, "grad_norm": 0.04247334226965904, "learning_rate": 2.5601700542414393e-05, "loss": 0.08847699761390686, "step": 173550 }, { "epoch": 0.7451293543872303, "grad_norm": 7.379086971282959, "learning_rate": 2.559738882229677e-05, "loss": 0.07075945138931275, "step": 173560 }, { "epoch": 0.7451722864772503, "grad_norm": 1.955690622329712, "learning_rate": 2.5593077102179147e-05, "loss": 0.2634904384613037, "step": 173570 }, { "epoch": 0.7452152185672702, "grad_norm": 0.0009114326676353812, "learning_rate": 2.5588765382061518e-05, "loss": 0.2877689599990845, "step": 173580 }, { "epoch": 0.7452581506572903, "grad_norm": 0.00039587041828781366, "learning_rate": 2.5584453661943895e-05, "loss": 0.20023708343505858, "step": 173590 }, { "epoch": 0.7453010827473103, "grad_norm": 0.005947303492575884, "learning_rate": 2.5580141941826276e-05, "loss": 0.20177597999572755, "step": 173600 }, { "epoch": 0.7453440148373303, "grad_norm": 0.9493522047996521, "learning_rate": 2.5575830221708653e-05, "loss": 0.041297358274459836, "step": 173610 }, { "epoch": 0.7453869469273503, "grad_norm": 0.00046072210534475744, "learning_rate": 2.5571518501591024e-05, "loss": 0.1452507734298706, "step": 173620 }, { "epoch": 0.7454298790173703, "grad_norm": 0.4799286127090454, "learning_rate": 2.55672067814734e-05, "loss": 0.35864949226379395, "step": 173630 }, { "epoch": 0.7454728111073903, "grad_norm": 2.4819602966308594, "learning_rate": 2.556289506135578e-05, "loss": 0.3039137363433838, "step": 173640 }, { "epoch": 0.7455157431974103, "grad_norm": 2.6950457096099854, "learning_rate": 2.5558583341238156e-05, "loss": 0.04391449689865112, "step": 173650 }, { "epoch": 0.7455586752874304, "grad_norm": 0.13231931626796722, "learning_rate": 2.555427162112053e-05, "loss": 0.3039705276489258, "step": 173660 }, { "epoch": 0.7456016073774504, "grad_norm": 0.13154283165931702, "learning_rate": 2.5549959901002907e-05, "loss": 0.05160494446754456, "step": 173670 }, { "epoch": 0.7456445394674703, "grad_norm": 0.18037918210029602, "learning_rate": 2.5545648180885285e-05, "loss": 0.12189091444015503, "step": 173680 }, { "epoch": 0.7456874715574904, "grad_norm": 1.9181801080703735, "learning_rate": 2.5541336460767662e-05, "loss": 0.1407124638557434, "step": 173690 }, { "epoch": 0.7457304036475104, "grad_norm": 0.001891557709313929, "learning_rate": 2.5537024740650033e-05, "loss": 0.1649843454360962, "step": 173700 }, { "epoch": 0.7457733357375304, "grad_norm": 0.009757429361343384, "learning_rate": 2.5532713020532413e-05, "loss": 0.3177161693572998, "step": 173710 }, { "epoch": 0.7458162678275504, "grad_norm": 1.442531943321228, "learning_rate": 2.552840130041479e-05, "loss": 0.36181912422180174, "step": 173720 }, { "epoch": 0.7458591999175704, "grad_norm": 0.07108304649591446, "learning_rate": 2.5524089580297168e-05, "loss": 0.13565880060195923, "step": 173730 }, { "epoch": 0.7459021320075904, "grad_norm": 7.040263652801514, "learning_rate": 2.551977786017954e-05, "loss": 0.20055036544799804, "step": 173740 }, { "epoch": 0.7459450640976104, "grad_norm": 4.002945423126221, "learning_rate": 2.5515466140061916e-05, "loss": 0.4659637451171875, "step": 173750 }, { "epoch": 0.7459879961876305, "grad_norm": 0.011049356311559677, "learning_rate": 2.5511154419944293e-05, "loss": 0.2700127124786377, "step": 173760 }, { "epoch": 0.7460309282776504, "grad_norm": 0.00305456412024796, "learning_rate": 2.550684269982667e-05, "loss": 0.21567902565002442, "step": 173770 }, { "epoch": 0.7460738603676704, "grad_norm": 0.7426313161849976, "learning_rate": 2.5502530979709045e-05, "loss": 0.20117294788360596, "step": 173780 }, { "epoch": 0.7461167924576905, "grad_norm": 0.27082696557044983, "learning_rate": 2.5498219259591422e-05, "loss": 0.10781660079956054, "step": 173790 }, { "epoch": 0.7461597245477104, "grad_norm": 0.006183996796607971, "learning_rate": 2.54939075394738e-05, "loss": 0.10997037887573242, "step": 173800 }, { "epoch": 0.7462026566377304, "grad_norm": 0.0367814339697361, "learning_rate": 2.5489595819356177e-05, "loss": 0.3016884088516235, "step": 173810 }, { "epoch": 0.7462455887277505, "grad_norm": 4.757618427276611, "learning_rate": 2.548528409923855e-05, "loss": 0.4883930206298828, "step": 173820 }, { "epoch": 0.7462885208177704, "grad_norm": 0.00017911156464833766, "learning_rate": 2.5480972379120928e-05, "loss": 0.28103320598602294, "step": 173830 }, { "epoch": 0.7463314529077905, "grad_norm": 0.10610644519329071, "learning_rate": 2.5476660659003305e-05, "loss": 0.11108193397521973, "step": 173840 }, { "epoch": 0.7463743849978105, "grad_norm": 0.007589709013700485, "learning_rate": 2.5472348938885683e-05, "loss": 0.47249712944030764, "step": 173850 }, { "epoch": 0.7464173170878304, "grad_norm": 2.555662155151367, "learning_rate": 2.5468037218768053e-05, "loss": 0.13019719123840331, "step": 173860 }, { "epoch": 0.7464602491778505, "grad_norm": 3.0848388671875, "learning_rate": 2.546372549865043e-05, "loss": 0.10937469005584717, "step": 173870 }, { "epoch": 0.7465031812678705, "grad_norm": 0.0011001820676028728, "learning_rate": 2.5459413778532808e-05, "loss": 0.06224575638771057, "step": 173880 }, { "epoch": 0.7465461133578905, "grad_norm": 1.4132946729660034, "learning_rate": 2.5455102058415185e-05, "loss": 0.3091392993927002, "step": 173890 }, { "epoch": 0.7465890454479105, "grad_norm": 3.6197102069854736, "learning_rate": 2.5450790338297566e-05, "loss": 0.28432717323303225, "step": 173900 }, { "epoch": 0.7466319775379305, "grad_norm": 0.5134070515632629, "learning_rate": 2.5446478618179936e-05, "loss": 0.21487205028533934, "step": 173910 }, { "epoch": 0.7466749096279505, "grad_norm": 1.3445314168930054, "learning_rate": 2.5442166898062314e-05, "loss": 0.08604246973991395, "step": 173920 }, { "epoch": 0.7467178417179705, "grad_norm": 0.9623371362686157, "learning_rate": 2.543785517794469e-05, "loss": 0.1378989338874817, "step": 173930 }, { "epoch": 0.7467607738079906, "grad_norm": 0.07971750944852829, "learning_rate": 2.543354345782707e-05, "loss": 0.136910879611969, "step": 173940 }, { "epoch": 0.7468037058980105, "grad_norm": 19.788955688476562, "learning_rate": 2.5429231737709442e-05, "loss": 0.07829828262329101, "step": 173950 }, { "epoch": 0.7468466379880305, "grad_norm": 0.132577046751976, "learning_rate": 2.542492001759182e-05, "loss": 0.16890411376953124, "step": 173960 }, { "epoch": 0.7468895700780506, "grad_norm": 6.434149265289307, "learning_rate": 2.5420608297474197e-05, "loss": 0.14480010271072388, "step": 173970 }, { "epoch": 0.7469325021680705, "grad_norm": 0.12347644567489624, "learning_rate": 2.5416296577356574e-05, "loss": 0.22138521671295167, "step": 173980 }, { "epoch": 0.7469754342580905, "grad_norm": 0.12491331994533539, "learning_rate": 2.5411984857238945e-05, "loss": 0.31520378589630127, "step": 173990 }, { "epoch": 0.7470183663481106, "grad_norm": 5.576565742492676, "learning_rate": 2.5407673137121322e-05, "loss": 0.18219449520111083, "step": 174000 }, { "epoch": 0.7470183663481106, "eval_loss": 0.38934630155563354, "eval_runtime": 27.4236, "eval_samples_per_second": 3.646, "eval_steps_per_second": 3.646, "step": 174000 }, { "epoch": 0.7470612984381305, "grad_norm": 1.1183409690856934, "learning_rate": 2.5403361417003703e-05, "loss": 0.33946449756622316, "step": 174010 }, { "epoch": 0.7471042305281506, "grad_norm": 0.02250983752310276, "learning_rate": 2.539904969688608e-05, "loss": 0.08952296376228333, "step": 174020 }, { "epoch": 0.7471471626181706, "grad_norm": 0.13014785945415497, "learning_rate": 2.539473797676845e-05, "loss": 0.18597830533981324, "step": 174030 }, { "epoch": 0.7471900947081905, "grad_norm": 1.9278321266174316, "learning_rate": 2.539042625665083e-05, "loss": 0.18685194253921508, "step": 174040 }, { "epoch": 0.7472330267982106, "grad_norm": 2.881211519241333, "learning_rate": 2.5386114536533206e-05, "loss": 0.18293185234069825, "step": 174050 }, { "epoch": 0.7472759588882306, "grad_norm": 0.12321528792381287, "learning_rate": 2.5381802816415583e-05, "loss": 0.19650537967681886, "step": 174060 }, { "epoch": 0.7473188909782505, "grad_norm": 0.0030896917451173067, "learning_rate": 2.5377491096297957e-05, "loss": 0.33722474575042727, "step": 174070 }, { "epoch": 0.7473618230682706, "grad_norm": 2.7675693035125732, "learning_rate": 2.5373179376180334e-05, "loss": 0.2696052551269531, "step": 174080 }, { "epoch": 0.7474047551582906, "grad_norm": 0.3029930591583252, "learning_rate": 2.536886765606271e-05, "loss": 0.16292864084243774, "step": 174090 }, { "epoch": 0.7474476872483107, "grad_norm": 0.000494702544528991, "learning_rate": 2.536455593594509e-05, "loss": 0.04459400177001953, "step": 174100 }, { "epoch": 0.7474906193383306, "grad_norm": 0.043872445821762085, "learning_rate": 2.536024421582746e-05, "loss": 0.009959495067596436, "step": 174110 }, { "epoch": 0.7475335514283507, "grad_norm": 0.008128165267407894, "learning_rate": 2.535593249570984e-05, "loss": 0.07839224338531495, "step": 174120 }, { "epoch": 0.7475764835183707, "grad_norm": 1.3136489391326904, "learning_rate": 2.5351620775592218e-05, "loss": 0.14459240436553955, "step": 174130 }, { "epoch": 0.7476194156083906, "grad_norm": 3.1148681640625, "learning_rate": 2.5347309055474595e-05, "loss": 0.28304076194763184, "step": 174140 }, { "epoch": 0.7476623476984107, "grad_norm": 0.47905516624450684, "learning_rate": 2.5342997335356966e-05, "loss": 0.1126200795173645, "step": 174150 }, { "epoch": 0.7477052797884307, "grad_norm": 0.06672538816928864, "learning_rate": 2.5338685615239343e-05, "loss": 0.16702120304107665, "step": 174160 }, { "epoch": 0.7477482118784506, "grad_norm": 0.004307939670979977, "learning_rate": 2.533437389512172e-05, "loss": 0.2730269908905029, "step": 174170 }, { "epoch": 0.7477911439684707, "grad_norm": 3.2004806995391846, "learning_rate": 2.5330062175004098e-05, "loss": 0.06000160574913025, "step": 174180 }, { "epoch": 0.7478340760584907, "grad_norm": 0.006153371185064316, "learning_rate": 2.532575045488647e-05, "loss": 0.04418157935142517, "step": 174190 }, { "epoch": 0.7478770081485107, "grad_norm": 0.005760138388723135, "learning_rate": 2.532143873476885e-05, "loss": 0.25185327529907225, "step": 174200 }, { "epoch": 0.7479199402385307, "grad_norm": 0.004189047031104565, "learning_rate": 2.5317127014651226e-05, "loss": 0.3935739755630493, "step": 174210 }, { "epoch": 0.7479628723285507, "grad_norm": 1.2200220823287964, "learning_rate": 2.5312815294533604e-05, "loss": 0.17278871536254883, "step": 174220 }, { "epoch": 0.7480058044185707, "grad_norm": 0.32186490297317505, "learning_rate": 2.5308503574415978e-05, "loss": 0.26594464778900145, "step": 174230 }, { "epoch": 0.7480487365085907, "grad_norm": 2.935149908065796, "learning_rate": 2.5304191854298355e-05, "loss": 0.10904730558395385, "step": 174240 }, { "epoch": 0.7480916685986108, "grad_norm": 3.964522361755371, "learning_rate": 2.5299880134180732e-05, "loss": 0.3033463001251221, "step": 174250 }, { "epoch": 0.7481346006886307, "grad_norm": 0.11229217797517776, "learning_rate": 2.529556841406311e-05, "loss": 0.2783724308013916, "step": 174260 }, { "epoch": 0.7481775327786507, "grad_norm": 1.392785668373108, "learning_rate": 2.5291256693945487e-05, "loss": 0.16704895496368408, "step": 174270 }, { "epoch": 0.7482204648686708, "grad_norm": 1.1950591802597046, "learning_rate": 2.5286944973827857e-05, "loss": 0.37365069389343264, "step": 174280 }, { "epoch": 0.7482633969586907, "grad_norm": 1.6007744073867798, "learning_rate": 2.5282633253710235e-05, "loss": 0.25082619190216066, "step": 174290 }, { "epoch": 0.7483063290487107, "grad_norm": 0.006831104401499033, "learning_rate": 2.5278321533592612e-05, "loss": 0.27920453548431395, "step": 174300 }, { "epoch": 0.7483492611387308, "grad_norm": 0.01827859692275524, "learning_rate": 2.5274009813474993e-05, "loss": 0.11764630079269409, "step": 174310 }, { "epoch": 0.7483921932287507, "grad_norm": 1.2253323793411255, "learning_rate": 2.5269698093357363e-05, "loss": 0.17802076339721679, "step": 174320 }, { "epoch": 0.7484351253187708, "grad_norm": 0.004427074920386076, "learning_rate": 2.526538637323974e-05, "loss": 0.1897179365158081, "step": 174330 }, { "epoch": 0.7484780574087908, "grad_norm": 0.019519807770848274, "learning_rate": 2.5261074653122118e-05, "loss": 0.2350210428237915, "step": 174340 }, { "epoch": 0.7485209894988107, "grad_norm": 46.768394470214844, "learning_rate": 2.5256762933004495e-05, "loss": 0.24312927722930908, "step": 174350 }, { "epoch": 0.7485639215888308, "grad_norm": 0.025334032252430916, "learning_rate": 2.525245121288687e-05, "loss": 0.20443146228790282, "step": 174360 }, { "epoch": 0.7486068536788508, "grad_norm": 0.11763063818216324, "learning_rate": 2.5248139492769247e-05, "loss": 0.19978156089782714, "step": 174370 }, { "epoch": 0.7486497857688708, "grad_norm": 6.262375354766846, "learning_rate": 2.5243827772651624e-05, "loss": 0.039094260334968566, "step": 174380 }, { "epoch": 0.7486927178588908, "grad_norm": 0.0023005977272987366, "learning_rate": 2.5239516052534e-05, "loss": 0.07948078513145447, "step": 174390 }, { "epoch": 0.7487356499489108, "grad_norm": 0.006748616229742765, "learning_rate": 2.5235204332416372e-05, "loss": 0.029941585659980775, "step": 174400 }, { "epoch": 0.7487785820389308, "grad_norm": 0.002810591831803322, "learning_rate": 2.523089261229875e-05, "loss": 0.4636706352233887, "step": 174410 }, { "epoch": 0.7488215141289508, "grad_norm": 1.3158349990844727, "learning_rate": 2.522658089218113e-05, "loss": 0.558734655380249, "step": 174420 }, { "epoch": 0.7488644462189709, "grad_norm": 0.004559780471026897, "learning_rate": 2.5222269172063507e-05, "loss": 0.14058899879455566, "step": 174430 }, { "epoch": 0.7489073783089908, "grad_norm": 1.9623855352401733, "learning_rate": 2.5217957451945878e-05, "loss": 0.16024988889694214, "step": 174440 }, { "epoch": 0.7489503103990108, "grad_norm": 0.013161673210561275, "learning_rate": 2.5213645731828255e-05, "loss": 0.2463479995727539, "step": 174450 }, { "epoch": 0.7489932424890309, "grad_norm": 0.013577003963291645, "learning_rate": 2.5209334011710633e-05, "loss": 0.2299262523651123, "step": 174460 }, { "epoch": 0.7490361745790508, "grad_norm": 0.0064038001000881195, "learning_rate": 2.520502229159301e-05, "loss": 0.16454423666000367, "step": 174470 }, { "epoch": 0.7490791066690708, "grad_norm": 0.006632882170379162, "learning_rate": 2.5200710571475384e-05, "loss": 0.15173077583312988, "step": 174480 }, { "epoch": 0.7491220387590909, "grad_norm": 1.0597169399261475, "learning_rate": 2.519639885135776e-05, "loss": 0.12685471773147583, "step": 174490 }, { "epoch": 0.7491649708491108, "grad_norm": 0.0004002380883321166, "learning_rate": 2.519208713124014e-05, "loss": 0.16025757789611816, "step": 174500 }, { "epoch": 0.7492079029391309, "grad_norm": 3.280585289001465, "learning_rate": 2.5187775411122516e-05, "loss": 0.09133055210113525, "step": 174510 }, { "epoch": 0.7492508350291509, "grad_norm": 0.008724031038582325, "learning_rate": 2.5183463691004887e-05, "loss": 0.21232659816741944, "step": 174520 }, { "epoch": 0.749293767119171, "grad_norm": 0.005878915078938007, "learning_rate": 2.5179151970887267e-05, "loss": 0.1962152123451233, "step": 174530 }, { "epoch": 0.7493366992091909, "grad_norm": 0.04787326976656914, "learning_rate": 2.5174840250769645e-05, "loss": 0.19027726650238036, "step": 174540 }, { "epoch": 0.7493796312992109, "grad_norm": 0.006681304890662432, "learning_rate": 2.5170528530652022e-05, "loss": 0.05747186541557312, "step": 174550 }, { "epoch": 0.749422563389231, "grad_norm": 0.04217128828167915, "learning_rate": 2.5166216810534393e-05, "loss": 0.18018451929092408, "step": 174560 }, { "epoch": 0.7494654954792509, "grad_norm": 7.066211700439453, "learning_rate": 2.516190509041677e-05, "loss": 0.18665244579315185, "step": 174570 }, { "epoch": 0.7495084275692709, "grad_norm": 3.013502597808838, "learning_rate": 2.5157593370299147e-05, "loss": 0.23242578506469727, "step": 174580 }, { "epoch": 0.749551359659291, "grad_norm": 1.7900302410125732, "learning_rate": 2.5153281650181525e-05, "loss": 0.18565467596054078, "step": 174590 }, { "epoch": 0.7495942917493109, "grad_norm": 1.183828592300415, "learning_rate": 2.5148969930063905e-05, "loss": 0.25924339294433596, "step": 174600 }, { "epoch": 0.749637223839331, "grad_norm": 3.1744091510772705, "learning_rate": 2.5144658209946276e-05, "loss": 0.19662506580352784, "step": 174610 }, { "epoch": 0.749680155929351, "grad_norm": 1.2753418684005737, "learning_rate": 2.5140346489828653e-05, "loss": 0.3594705820083618, "step": 174620 }, { "epoch": 0.7497230880193709, "grad_norm": 0.009313058108091354, "learning_rate": 2.513603476971103e-05, "loss": 0.10875067710876465, "step": 174630 }, { "epoch": 0.749766020109391, "grad_norm": 1.209693193435669, "learning_rate": 2.5131723049593408e-05, "loss": 0.1888060212135315, "step": 174640 }, { "epoch": 0.749808952199411, "grad_norm": 0.008648032322525978, "learning_rate": 2.5127411329475782e-05, "loss": 0.17343711853027344, "step": 174650 }, { "epoch": 0.749851884289431, "grad_norm": 0.12723885476589203, "learning_rate": 2.512309960935816e-05, "loss": 0.1848118543624878, "step": 174660 }, { "epoch": 0.749894816379451, "grad_norm": 0.01464887149631977, "learning_rate": 2.5118787889240537e-05, "loss": 0.050753462314605716, "step": 174670 }, { "epoch": 0.749937748469471, "grad_norm": 0.044885214418172836, "learning_rate": 2.5114476169122914e-05, "loss": 0.1882512927055359, "step": 174680 }, { "epoch": 0.749980680559491, "grad_norm": 1.5992013216018677, "learning_rate": 2.5110164449005284e-05, "loss": 0.14516894817352294, "step": 174690 }, { "epoch": 0.750023612649511, "grad_norm": 1.8369817733764648, "learning_rate": 2.5105852728887662e-05, "loss": 0.38750979900360105, "step": 174700 }, { "epoch": 0.750066544739531, "grad_norm": 0.6581123471260071, "learning_rate": 2.5101541008770043e-05, "loss": 0.28684771060943604, "step": 174710 }, { "epoch": 0.750109476829551, "grad_norm": 40.683799743652344, "learning_rate": 2.509722928865242e-05, "loss": 0.1460190534591675, "step": 174720 }, { "epoch": 0.750152408919571, "grad_norm": 0.03157550096511841, "learning_rate": 2.509291756853479e-05, "loss": 0.19834092855453492, "step": 174730 }, { "epoch": 0.7501953410095911, "grad_norm": 0.0021849041804671288, "learning_rate": 2.5088605848417168e-05, "loss": 0.1647346019744873, "step": 174740 }, { "epoch": 0.750238273099611, "grad_norm": 0.18896019458770752, "learning_rate": 2.5084294128299545e-05, "loss": 0.3278000593185425, "step": 174750 }, { "epoch": 0.750281205189631, "grad_norm": 0.932102620601654, "learning_rate": 2.5079982408181922e-05, "loss": 0.0883977472782135, "step": 174760 }, { "epoch": 0.7503241372796511, "grad_norm": 2.2175753116607666, "learning_rate": 2.5075670688064296e-05, "loss": 0.32731032371520996, "step": 174770 }, { "epoch": 0.750367069369671, "grad_norm": 0.004549882840365171, "learning_rate": 2.5071358967946674e-05, "loss": 0.17782050371170044, "step": 174780 }, { "epoch": 0.7504100014596911, "grad_norm": 2.5548672676086426, "learning_rate": 2.506704724782905e-05, "loss": 0.09278339147567749, "step": 174790 }, { "epoch": 0.7504529335497111, "grad_norm": 0.035067737102508545, "learning_rate": 2.506273552771143e-05, "loss": 0.01957797259092331, "step": 174800 }, { "epoch": 0.750495865639731, "grad_norm": 0.33087292313575745, "learning_rate": 2.50584238075938e-05, "loss": 0.4236994743347168, "step": 174810 }, { "epoch": 0.7505387977297511, "grad_norm": 0.002858347026631236, "learning_rate": 2.505411208747618e-05, "loss": 0.24580299854278564, "step": 174820 }, { "epoch": 0.7505817298197711, "grad_norm": 3.3408989906311035, "learning_rate": 2.5049800367358557e-05, "loss": 0.10669113397598266, "step": 174830 }, { "epoch": 0.750624661909791, "grad_norm": 0.4025706350803375, "learning_rate": 2.5045488647240934e-05, "loss": 0.19302643537521363, "step": 174840 }, { "epoch": 0.7506675939998111, "grad_norm": 0.33814287185668945, "learning_rate": 2.5041176927123305e-05, "loss": 0.1657320261001587, "step": 174850 }, { "epoch": 0.7507105260898311, "grad_norm": 0.5623843669891357, "learning_rate": 2.5036865207005682e-05, "loss": 0.030144301056861878, "step": 174860 }, { "epoch": 0.7507534581798511, "grad_norm": 0.4477728605270386, "learning_rate": 2.503255348688806e-05, "loss": 0.09932756423950195, "step": 174870 }, { "epoch": 0.7507963902698711, "grad_norm": 0.5520215034484863, "learning_rate": 2.5028241766770437e-05, "loss": 0.21815659999847412, "step": 174880 }, { "epoch": 0.7508393223598911, "grad_norm": 0.015766866505146027, "learning_rate": 2.502393004665281e-05, "loss": 0.17627745866775513, "step": 174890 }, { "epoch": 0.7508822544499111, "grad_norm": 0.22014105319976807, "learning_rate": 2.501961832653519e-05, "loss": 0.048931142687797545, "step": 174900 }, { "epoch": 0.7509251865399311, "grad_norm": 0.0031273365020751953, "learning_rate": 2.5015306606417566e-05, "loss": 0.2751063346862793, "step": 174910 }, { "epoch": 0.7509681186299512, "grad_norm": 0.4453137516975403, "learning_rate": 2.5010994886299943e-05, "loss": 0.32244696617126467, "step": 174920 }, { "epoch": 0.7510110507199711, "grad_norm": 2.4020941257476807, "learning_rate": 2.5006683166182317e-05, "loss": 0.15655696392059326, "step": 174930 }, { "epoch": 0.7510539828099911, "grad_norm": 2.225407361984253, "learning_rate": 2.5002371446064694e-05, "loss": 0.32480764389038086, "step": 174940 }, { "epoch": 0.7510969149000112, "grad_norm": 0.3002236783504486, "learning_rate": 2.499805972594707e-05, "loss": 0.14254547357559205, "step": 174950 }, { "epoch": 0.7511398469900312, "grad_norm": 1.3607335090637207, "learning_rate": 2.4993748005829446e-05, "loss": 0.3548447132110596, "step": 174960 }, { "epoch": 0.7511827790800512, "grad_norm": 10.988116264343262, "learning_rate": 2.4989436285711823e-05, "loss": 0.29602463245391847, "step": 174970 }, { "epoch": 0.7512257111700712, "grad_norm": 1.3006998300552368, "learning_rate": 2.49851245655942e-05, "loss": 0.15326464176177979, "step": 174980 }, { "epoch": 0.7512686432600912, "grad_norm": 0.04871809855103493, "learning_rate": 2.4980812845476574e-05, "loss": 0.08068002462387085, "step": 174990 }, { "epoch": 0.7513115753501112, "grad_norm": 0.0009460552246309817, "learning_rate": 2.497650112535895e-05, "loss": 0.1050765872001648, "step": 175000 }, { "epoch": 0.7513115753501112, "eval_loss": 0.39374786615371704, "eval_runtime": 27.3897, "eval_samples_per_second": 3.651, "eval_steps_per_second": 3.651, "step": 175000 }, { "epoch": 0.7513545074401312, "grad_norm": 0.04455732926726341, "learning_rate": 2.497218940524133e-05, "loss": 0.07660987973213196, "step": 175010 }, { "epoch": 0.7513974395301513, "grad_norm": 0.011786301620304585, "learning_rate": 2.4967877685123706e-05, "loss": 0.24636490345001222, "step": 175020 }, { "epoch": 0.7514403716201712, "grad_norm": 0.09234892576932907, "learning_rate": 2.496356596500608e-05, "loss": 0.35528485774993895, "step": 175030 }, { "epoch": 0.7514833037101912, "grad_norm": 0.13859136402606964, "learning_rate": 2.4959254244888458e-05, "loss": 0.2566323518753052, "step": 175040 }, { "epoch": 0.7515262358002113, "grad_norm": 0.0014120546402409673, "learning_rate": 2.495494252477083e-05, "loss": 0.06871371865272521, "step": 175050 }, { "epoch": 0.7515691678902312, "grad_norm": 0.20913314819335938, "learning_rate": 2.495063080465321e-05, "loss": 0.3590980052947998, "step": 175060 }, { "epoch": 0.7516120999802512, "grad_norm": 0.0026090750470757484, "learning_rate": 2.4946319084535586e-05, "loss": 0.10994521379470826, "step": 175070 }, { "epoch": 0.7516550320702713, "grad_norm": 0.018767498433589935, "learning_rate": 2.4942007364417964e-05, "loss": 0.1653411030769348, "step": 175080 }, { "epoch": 0.7516979641602912, "grad_norm": 0.011714949272572994, "learning_rate": 2.4937695644300338e-05, "loss": 0.17751481533050537, "step": 175090 }, { "epoch": 0.7517408962503113, "grad_norm": 0.042523354291915894, "learning_rate": 2.4933383924182715e-05, "loss": 0.19038463830947877, "step": 175100 }, { "epoch": 0.7517838283403313, "grad_norm": 1.2998957633972168, "learning_rate": 2.492907220406509e-05, "loss": 0.10467228889465333, "step": 175110 }, { "epoch": 0.7518267604303512, "grad_norm": 1.3815051317214966, "learning_rate": 2.492476048394747e-05, "loss": 0.15653225183486938, "step": 175120 }, { "epoch": 0.7518696925203713, "grad_norm": 9.51794719696045, "learning_rate": 2.4920448763829844e-05, "loss": 0.30117287635803225, "step": 175130 }, { "epoch": 0.7519126246103913, "grad_norm": 0.8320513367652893, "learning_rate": 2.491613704371222e-05, "loss": 0.19667335748672485, "step": 175140 }, { "epoch": 0.7519555567004113, "grad_norm": 1.9889274835586548, "learning_rate": 2.4911825323594595e-05, "loss": 0.20154480934143065, "step": 175150 }, { "epoch": 0.7519984887904313, "grad_norm": 0.0013290152419358492, "learning_rate": 2.4907513603476972e-05, "loss": 0.22941241264343262, "step": 175160 }, { "epoch": 0.7520414208804513, "grad_norm": 0.02920190989971161, "learning_rate": 2.4903201883359346e-05, "loss": 0.13015002012252808, "step": 175170 }, { "epoch": 0.7520843529704713, "grad_norm": 9.561676979064941, "learning_rate": 2.4898890163241727e-05, "loss": 0.31256589889526365, "step": 175180 }, { "epoch": 0.7521272850604913, "grad_norm": 1.7143646478652954, "learning_rate": 2.48945784431241e-05, "loss": 0.30118331909179685, "step": 175190 }, { "epoch": 0.7521702171505114, "grad_norm": 0.371857225894928, "learning_rate": 2.4890266723006478e-05, "loss": 0.24887962341308595, "step": 175200 }, { "epoch": 0.7522131492405313, "grad_norm": 1.154715657234192, "learning_rate": 2.4885955002888852e-05, "loss": 0.27854137420654296, "step": 175210 }, { "epoch": 0.7522560813305513, "grad_norm": 3.212376832962036, "learning_rate": 2.488164328277123e-05, "loss": 0.2116297721862793, "step": 175220 }, { "epoch": 0.7522990134205714, "grad_norm": 0.03922676295042038, "learning_rate": 2.4877331562653607e-05, "loss": 0.308078408241272, "step": 175230 }, { "epoch": 0.7523419455105913, "grad_norm": 0.01890389621257782, "learning_rate": 2.4873019842535984e-05, "loss": 0.2405827522277832, "step": 175240 }, { "epoch": 0.7523848776006113, "grad_norm": 6.3656487464904785, "learning_rate": 2.4868708122418358e-05, "loss": 0.36165671348571776, "step": 175250 }, { "epoch": 0.7524278096906314, "grad_norm": 0.003142143599689007, "learning_rate": 2.4864396402300735e-05, "loss": 0.30589354038238525, "step": 175260 }, { "epoch": 0.7524707417806513, "grad_norm": 0.10753688961267471, "learning_rate": 2.486008468218311e-05, "loss": 0.17673590183258056, "step": 175270 }, { "epoch": 0.7525136738706714, "grad_norm": 2.2089762687683105, "learning_rate": 2.4855772962065487e-05, "loss": 0.1999528169631958, "step": 175280 }, { "epoch": 0.7525566059606914, "grad_norm": 1.118449330329895, "learning_rate": 2.4851461241947864e-05, "loss": 0.15476460456848146, "step": 175290 }, { "epoch": 0.7525995380507113, "grad_norm": 1.8658347129821777, "learning_rate": 2.484714952183024e-05, "loss": 0.26465458869934083, "step": 175300 }, { "epoch": 0.7526424701407314, "grad_norm": 0.03414380922913551, "learning_rate": 2.4842837801712615e-05, "loss": 0.11501485109329224, "step": 175310 }, { "epoch": 0.7526854022307514, "grad_norm": 0.054499465972185135, "learning_rate": 2.4838526081594993e-05, "loss": 0.13327181339263916, "step": 175320 }, { "epoch": 0.7527283343207714, "grad_norm": 4.911616802215576, "learning_rate": 2.483421436147737e-05, "loss": 0.15141682624816893, "step": 175330 }, { "epoch": 0.7527712664107914, "grad_norm": 1.755698561668396, "learning_rate": 2.4829902641359744e-05, "loss": 0.1645986557006836, "step": 175340 }, { "epoch": 0.7528141985008114, "grad_norm": 1.3716384172439575, "learning_rate": 2.482559092124212e-05, "loss": 0.11155534982681274, "step": 175350 }, { "epoch": 0.7528571305908314, "grad_norm": 0.03115398809313774, "learning_rate": 2.48212792011245e-05, "loss": 0.13327916860580444, "step": 175360 }, { "epoch": 0.7529000626808514, "grad_norm": 1.6688318252563477, "learning_rate": 2.4816967481006876e-05, "loss": 0.15580270290374756, "step": 175370 }, { "epoch": 0.7529429947708715, "grad_norm": 0.20583480596542358, "learning_rate": 2.481265576088925e-05, "loss": 0.02036563605070114, "step": 175380 }, { "epoch": 0.7529859268608915, "grad_norm": 0.08086307346820831, "learning_rate": 2.4808344040771627e-05, "loss": 0.024845921993255617, "step": 175390 }, { "epoch": 0.7530288589509114, "grad_norm": 0.007556057535111904, "learning_rate": 2.4804032320654e-05, "loss": 0.17657938003540039, "step": 175400 }, { "epoch": 0.7530717910409315, "grad_norm": 0.0017575263045728207, "learning_rate": 2.479972060053638e-05, "loss": 0.186118745803833, "step": 175410 }, { "epoch": 0.7531147231309515, "grad_norm": 1.4013792276382446, "learning_rate": 2.4795408880418756e-05, "loss": 0.1842397928237915, "step": 175420 }, { "epoch": 0.7531576552209714, "grad_norm": 0.01723472774028778, "learning_rate": 2.4791097160301133e-05, "loss": 0.28156132698059083, "step": 175430 }, { "epoch": 0.7532005873109915, "grad_norm": 2.1313681602478027, "learning_rate": 2.4786785440183507e-05, "loss": 0.21020984649658203, "step": 175440 }, { "epoch": 0.7532435194010115, "grad_norm": 0.006951197050511837, "learning_rate": 2.4782473720065885e-05, "loss": 0.09738854169845582, "step": 175450 }, { "epoch": 0.7532864514910315, "grad_norm": 6.1560444831848145, "learning_rate": 2.477816199994826e-05, "loss": 0.23765075206756592, "step": 175460 }, { "epoch": 0.7533293835810515, "grad_norm": 0.4051508605480194, "learning_rate": 2.477385027983064e-05, "loss": 0.2988025665283203, "step": 175470 }, { "epoch": 0.7533723156710715, "grad_norm": 0.34460052847862244, "learning_rate": 2.4769538559713013e-05, "loss": 0.1418423533439636, "step": 175480 }, { "epoch": 0.7534152477610915, "grad_norm": 0.13387997448444366, "learning_rate": 2.476522683959539e-05, "loss": 0.10612796545028687, "step": 175490 }, { "epoch": 0.7534581798511115, "grad_norm": 1.4258102178573608, "learning_rate": 2.4760915119477765e-05, "loss": 0.141604745388031, "step": 175500 }, { "epoch": 0.7535011119411316, "grad_norm": 0.0010811506072059274, "learning_rate": 2.4756603399360142e-05, "loss": 0.2934016227722168, "step": 175510 }, { "epoch": 0.7535440440311515, "grad_norm": 0.0018770555034279823, "learning_rate": 2.4752291679242516e-05, "loss": 0.017631618678569792, "step": 175520 }, { "epoch": 0.7535869761211715, "grad_norm": 4.808864116668701, "learning_rate": 2.4747979959124897e-05, "loss": 0.3421001434326172, "step": 175530 }, { "epoch": 0.7536299082111916, "grad_norm": 0.003242551814764738, "learning_rate": 2.474366823900727e-05, "loss": 0.24067187309265137, "step": 175540 }, { "epoch": 0.7536728403012115, "grad_norm": 0.0072052436880767345, "learning_rate": 2.4739356518889648e-05, "loss": 0.08315362930297851, "step": 175550 }, { "epoch": 0.7537157723912316, "grad_norm": 0.00040326855378225446, "learning_rate": 2.4735044798772022e-05, "loss": 0.1443575143814087, "step": 175560 }, { "epoch": 0.7537587044812516, "grad_norm": 20.111753463745117, "learning_rate": 2.47307330786544e-05, "loss": 0.31186325550079347, "step": 175570 }, { "epoch": 0.7538016365712715, "grad_norm": 0.008096352219581604, "learning_rate": 2.4726421358536777e-05, "loss": 0.08084225654602051, "step": 175580 }, { "epoch": 0.7538445686612916, "grad_norm": 3.5794265270233154, "learning_rate": 2.4722109638419154e-05, "loss": 0.23514416217803955, "step": 175590 }, { "epoch": 0.7538875007513116, "grad_norm": 0.01341279223561287, "learning_rate": 2.4717797918301528e-05, "loss": 0.17565954923629762, "step": 175600 }, { "epoch": 0.7539304328413315, "grad_norm": 0.15792067348957062, "learning_rate": 2.4713486198183905e-05, "loss": 0.10981240272521972, "step": 175610 }, { "epoch": 0.7539733649313516, "grad_norm": 0.02763104997575283, "learning_rate": 2.470917447806628e-05, "loss": 0.10967028141021729, "step": 175620 }, { "epoch": 0.7540162970213716, "grad_norm": 3.09664249420166, "learning_rate": 2.4704862757948656e-05, "loss": 0.14928872585296632, "step": 175630 }, { "epoch": 0.7540592291113916, "grad_norm": 3.242180347442627, "learning_rate": 2.4700551037831034e-05, "loss": 0.19187842607498168, "step": 175640 }, { "epoch": 0.7541021612014116, "grad_norm": 0.0028528040274977684, "learning_rate": 2.469623931771341e-05, "loss": 0.005672257766127587, "step": 175650 }, { "epoch": 0.7541450932914316, "grad_norm": 0.5106819272041321, "learning_rate": 2.4691927597595785e-05, "loss": 0.16544620990753173, "step": 175660 }, { "epoch": 0.7541880253814516, "grad_norm": 1.5341259241104126, "learning_rate": 2.4687615877478162e-05, "loss": 0.2775939226150513, "step": 175670 }, { "epoch": 0.7542309574714716, "grad_norm": 0.32357120513916016, "learning_rate": 2.4683304157360536e-05, "loss": 0.1690353274345398, "step": 175680 }, { "epoch": 0.7542738895614917, "grad_norm": 0.04019639268517494, "learning_rate": 2.4678992437242914e-05, "loss": 0.21661920547485353, "step": 175690 }, { "epoch": 0.7543168216515116, "grad_norm": 0.0546407625079155, "learning_rate": 2.467468071712529e-05, "loss": 0.18709558248519897, "step": 175700 }, { "epoch": 0.7543597537415316, "grad_norm": 0.029099030420184135, "learning_rate": 2.467036899700767e-05, "loss": 0.17751796245574952, "step": 175710 }, { "epoch": 0.7544026858315517, "grad_norm": 0.00495111383497715, "learning_rate": 2.4666057276890046e-05, "loss": 0.14952001571655274, "step": 175720 }, { "epoch": 0.7544456179215716, "grad_norm": 2.223857879638672, "learning_rate": 2.466174555677242e-05, "loss": 0.34983224868774415, "step": 175730 }, { "epoch": 0.7544885500115917, "grad_norm": 0.016578232869505882, "learning_rate": 2.4657433836654797e-05, "loss": 0.15543704032897948, "step": 175740 }, { "epoch": 0.7545314821016117, "grad_norm": 0.9805734157562256, "learning_rate": 2.465312211653717e-05, "loss": 0.2730615854263306, "step": 175750 }, { "epoch": 0.7545744141916316, "grad_norm": 0.12643659114837646, "learning_rate": 2.464881039641955e-05, "loss": 0.07886726260185242, "step": 175760 }, { "epoch": 0.7546173462816517, "grad_norm": 2.201289176940918, "learning_rate": 2.4644498676301926e-05, "loss": 0.30348107814788816, "step": 175770 }, { "epoch": 0.7546602783716717, "grad_norm": 1.6394163370132446, "learning_rate": 2.4640186956184303e-05, "loss": 0.31897358894348143, "step": 175780 }, { "epoch": 0.7547032104616916, "grad_norm": 0.00793420895934105, "learning_rate": 2.4635875236066677e-05, "loss": 0.35818772315979003, "step": 175790 }, { "epoch": 0.7547461425517117, "grad_norm": 0.0008388396818190813, "learning_rate": 2.4631563515949054e-05, "loss": 0.1866925001144409, "step": 175800 }, { "epoch": 0.7547890746417317, "grad_norm": 0.001753473188728094, "learning_rate": 2.4627251795831428e-05, "loss": 0.368836498260498, "step": 175810 }, { "epoch": 0.7548320067317518, "grad_norm": 0.00452902726829052, "learning_rate": 2.462294007571381e-05, "loss": 0.1541559338569641, "step": 175820 }, { "epoch": 0.7548749388217717, "grad_norm": 0.17083846032619476, "learning_rate": 2.4618628355596183e-05, "loss": 0.12283452749252319, "step": 175830 }, { "epoch": 0.7549178709117917, "grad_norm": 7.021027088165283, "learning_rate": 2.461431663547856e-05, "loss": 0.34672794342041013, "step": 175840 }, { "epoch": 0.7549608030018118, "grad_norm": 0.008842471987009048, "learning_rate": 2.4610004915360934e-05, "loss": 0.4350734233856201, "step": 175850 }, { "epoch": 0.7550037350918317, "grad_norm": 2.1589739322662354, "learning_rate": 2.460569319524331e-05, "loss": 0.1623774290084839, "step": 175860 }, { "epoch": 0.7550466671818518, "grad_norm": 1.3766883611679077, "learning_rate": 2.4601381475125686e-05, "loss": 0.24213268756866455, "step": 175870 }, { "epoch": 0.7550895992718718, "grad_norm": 1.2221431732177734, "learning_rate": 2.4597069755008066e-05, "loss": 0.19608230590820314, "step": 175880 }, { "epoch": 0.7551325313618917, "grad_norm": 4.313803672790527, "learning_rate": 2.459275803489044e-05, "loss": 0.32244043350219725, "step": 175890 }, { "epoch": 0.7551754634519118, "grad_norm": 0.08472935110330582, "learning_rate": 2.4588446314772818e-05, "loss": 0.07666860222816467, "step": 175900 }, { "epoch": 0.7552183955419318, "grad_norm": 0.015484074130654335, "learning_rate": 2.458413459465519e-05, "loss": 0.17252891063690184, "step": 175910 }, { "epoch": 0.7552613276319518, "grad_norm": 0.014127151109278202, "learning_rate": 2.457982287453757e-05, "loss": 0.1840136766433716, "step": 175920 }, { "epoch": 0.7553042597219718, "grad_norm": 0.28970640897750854, "learning_rate": 2.4575511154419943e-05, "loss": 0.21439287662506104, "step": 175930 }, { "epoch": 0.7553471918119918, "grad_norm": 0.0020846251863986254, "learning_rate": 2.4571199434302324e-05, "loss": 0.13129196166992188, "step": 175940 }, { "epoch": 0.7553901239020118, "grad_norm": 0.0034930300898849964, "learning_rate": 2.4566887714184698e-05, "loss": 0.3986776828765869, "step": 175950 }, { "epoch": 0.7554330559920318, "grad_norm": 0.006673635449260473, "learning_rate": 2.4562575994067075e-05, "loss": 0.21093640327453614, "step": 175960 }, { "epoch": 0.7554759880820519, "grad_norm": 1.6050211191177368, "learning_rate": 2.455826427394945e-05, "loss": 0.15360102653503419, "step": 175970 }, { "epoch": 0.7555189201720718, "grad_norm": 2.7723329067230225, "learning_rate": 2.4553952553831826e-05, "loss": 0.11786471605300904, "step": 175980 }, { "epoch": 0.7555618522620918, "grad_norm": 0.004894792102277279, "learning_rate": 2.4549640833714204e-05, "loss": 0.11137468814849853, "step": 175990 }, { "epoch": 0.7556047843521119, "grad_norm": 0.0016606005374342203, "learning_rate": 2.454532911359658e-05, "loss": 0.2943387031555176, "step": 176000 }, { "epoch": 0.7556047843521119, "eval_loss": 0.3844696879386902, "eval_runtime": 27.4381, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 176000 }, { "epoch": 0.7556477164421318, "grad_norm": 3.118253469467163, "learning_rate": 2.4541017393478955e-05, "loss": 0.13530291318893434, "step": 176010 }, { "epoch": 0.7556906485321518, "grad_norm": 0.21326979994773865, "learning_rate": 2.4536705673361332e-05, "loss": 0.449766731262207, "step": 176020 }, { "epoch": 0.7557335806221719, "grad_norm": 0.07444790750741959, "learning_rate": 2.4532393953243706e-05, "loss": 0.0024720698595046995, "step": 176030 }, { "epoch": 0.7557765127121918, "grad_norm": 1.2973755598068237, "learning_rate": 2.4528082233126083e-05, "loss": 0.26403958797454835, "step": 176040 }, { "epoch": 0.7558194448022119, "grad_norm": 0.0019384416518732905, "learning_rate": 2.452377051300846e-05, "loss": 0.13067800998687745, "step": 176050 }, { "epoch": 0.7558623768922319, "grad_norm": 0.9657537937164307, "learning_rate": 2.4519458792890838e-05, "loss": 0.20171825885772704, "step": 176060 }, { "epoch": 0.7559053089822518, "grad_norm": 0.0036415038630366325, "learning_rate": 2.4515147072773216e-05, "loss": 0.2104336977005005, "step": 176070 }, { "epoch": 0.7559482410722719, "grad_norm": 3.9165921211242676, "learning_rate": 2.451083535265559e-05, "loss": 0.4046797752380371, "step": 176080 }, { "epoch": 0.7559911731622919, "grad_norm": 0.004895086400210857, "learning_rate": 2.4506523632537967e-05, "loss": 0.2396653413772583, "step": 176090 }, { "epoch": 0.7560341052523118, "grad_norm": 7.71179723739624, "learning_rate": 2.450221191242034e-05, "loss": 0.17154238224029542, "step": 176100 }, { "epoch": 0.7560770373423319, "grad_norm": 0.16281859576702118, "learning_rate": 2.4497900192302718e-05, "loss": 0.2616174936294556, "step": 176110 }, { "epoch": 0.7561199694323519, "grad_norm": 3.693711757659912, "learning_rate": 2.4493588472185095e-05, "loss": 0.32230591773986816, "step": 176120 }, { "epoch": 0.7561629015223719, "grad_norm": 0.003052955726161599, "learning_rate": 2.4489276752067473e-05, "loss": 0.24035139083862306, "step": 176130 }, { "epoch": 0.7562058336123919, "grad_norm": 5.051456451416016, "learning_rate": 2.4484965031949847e-05, "loss": 0.368407154083252, "step": 176140 }, { "epoch": 0.756248765702412, "grad_norm": 0.01023197453469038, "learning_rate": 2.4480653311832224e-05, "loss": 0.2336270809173584, "step": 176150 }, { "epoch": 0.7562916977924319, "grad_norm": 0.053483642637729645, "learning_rate": 2.4476341591714598e-05, "loss": 0.07102344632148742, "step": 176160 }, { "epoch": 0.7563346298824519, "grad_norm": 0.002425705548375845, "learning_rate": 2.4472029871596975e-05, "loss": 0.19212217330932618, "step": 176170 }, { "epoch": 0.756377561972472, "grad_norm": 0.0017519342945888638, "learning_rate": 2.4467718151479353e-05, "loss": 0.4195821285247803, "step": 176180 }, { "epoch": 0.7564204940624919, "grad_norm": 0.8289617896080017, "learning_rate": 2.446340643136173e-05, "loss": 0.2355353355407715, "step": 176190 }, { "epoch": 0.7564634261525119, "grad_norm": 0.9240083694458008, "learning_rate": 2.4459094711244104e-05, "loss": 0.11165834665298462, "step": 176200 }, { "epoch": 0.756506358242532, "grad_norm": 3.18680739402771, "learning_rate": 2.445478299112648e-05, "loss": 0.3855263710021973, "step": 176210 }, { "epoch": 0.7565492903325519, "grad_norm": 0.004274186212569475, "learning_rate": 2.4450471271008855e-05, "loss": 0.07496775388717651, "step": 176220 }, { "epoch": 0.756592222422572, "grad_norm": 0.28305789828300476, "learning_rate": 2.4446159550891236e-05, "loss": 0.11907908916473389, "step": 176230 }, { "epoch": 0.756635154512592, "grad_norm": 0.0008765619131736457, "learning_rate": 2.444184783077361e-05, "loss": 0.014611579477787018, "step": 176240 }, { "epoch": 0.756678086602612, "grad_norm": 0.030822429805994034, "learning_rate": 2.4437536110655987e-05, "loss": 0.17702211141586305, "step": 176250 }, { "epoch": 0.756721018692632, "grad_norm": 0.00023859924112912267, "learning_rate": 2.443322439053836e-05, "loss": 0.21805362701416015, "step": 176260 }, { "epoch": 0.756763950782652, "grad_norm": 0.06023327261209488, "learning_rate": 2.442891267042074e-05, "loss": 0.23725461959838867, "step": 176270 }, { "epoch": 0.7568068828726721, "grad_norm": 0.06006970629096031, "learning_rate": 2.4424600950303113e-05, "loss": 0.07275346517562867, "step": 176280 }, { "epoch": 0.756849814962692, "grad_norm": 2.060161590576172, "learning_rate": 2.4420289230185493e-05, "loss": 0.469630765914917, "step": 176290 }, { "epoch": 0.756892747052712, "grad_norm": 0.01709127053618431, "learning_rate": 2.4415977510067867e-05, "loss": 0.29026083946228026, "step": 176300 }, { "epoch": 0.7569356791427321, "grad_norm": 1.0979907512664795, "learning_rate": 2.4411665789950245e-05, "loss": 0.152878737449646, "step": 176310 }, { "epoch": 0.756978611232752, "grad_norm": 0.0037508816458284855, "learning_rate": 2.440735406983262e-05, "loss": 0.14382373094558715, "step": 176320 }, { "epoch": 0.757021543322772, "grad_norm": 3.9158923625946045, "learning_rate": 2.4403042349714996e-05, "loss": 0.27426795959472655, "step": 176330 }, { "epoch": 0.7570644754127921, "grad_norm": 0.7643011808395386, "learning_rate": 2.4398730629597373e-05, "loss": 0.14274754524230956, "step": 176340 }, { "epoch": 0.757107407502812, "grad_norm": 0.0008324419031850994, "learning_rate": 2.439441890947975e-05, "loss": 0.22440826892852783, "step": 176350 }, { "epoch": 0.7571503395928321, "grad_norm": 1.3607858419418335, "learning_rate": 2.4390107189362125e-05, "loss": 0.1967037558555603, "step": 176360 }, { "epoch": 0.7571932716828521, "grad_norm": 0.0033644582144916058, "learning_rate": 2.4385795469244502e-05, "loss": 0.12327347993850708, "step": 176370 }, { "epoch": 0.757236203772872, "grad_norm": 0.0029008015990257263, "learning_rate": 2.4381483749126876e-05, "loss": 0.21013979911804198, "step": 176380 }, { "epoch": 0.7572791358628921, "grad_norm": 0.00942598469555378, "learning_rate": 2.4377172029009253e-05, "loss": 0.0036503538489341737, "step": 176390 }, { "epoch": 0.7573220679529121, "grad_norm": 1.111317753791809, "learning_rate": 2.437286030889163e-05, "loss": 0.19102848768234254, "step": 176400 }, { "epoch": 0.7573650000429321, "grad_norm": 4.567811965942383, "learning_rate": 2.4368548588774008e-05, "loss": 0.16488993167877197, "step": 176410 }, { "epoch": 0.7574079321329521, "grad_norm": 0.011961126700043678, "learning_rate": 2.4364236868656385e-05, "loss": 0.15731921195983886, "step": 176420 }, { "epoch": 0.7574508642229721, "grad_norm": 0.000113897658593487, "learning_rate": 2.435992514853876e-05, "loss": 0.3114840030670166, "step": 176430 }, { "epoch": 0.7574937963129921, "grad_norm": 1.7821091413497925, "learning_rate": 2.4355613428421137e-05, "loss": 0.4081563949584961, "step": 176440 }, { "epoch": 0.7575367284030121, "grad_norm": 0.09143106639385223, "learning_rate": 2.435130170830351e-05, "loss": 0.18028711080551146, "step": 176450 }, { "epoch": 0.7575796604930322, "grad_norm": 1.0728000402450562, "learning_rate": 2.4346989988185888e-05, "loss": 0.25679965019226075, "step": 176460 }, { "epoch": 0.7576225925830521, "grad_norm": 1.9048360586166382, "learning_rate": 2.4342678268068265e-05, "loss": 0.1729556918144226, "step": 176470 }, { "epoch": 0.7576655246730721, "grad_norm": 1.2008455991744995, "learning_rate": 2.4338366547950643e-05, "loss": 0.2813676118850708, "step": 176480 }, { "epoch": 0.7577084567630922, "grad_norm": 0.7849253416061401, "learning_rate": 2.4334054827833016e-05, "loss": 0.24318392276763917, "step": 176490 }, { "epoch": 0.7577513888531121, "grad_norm": 0.015166381374001503, "learning_rate": 2.4329743107715394e-05, "loss": 0.14935698509216308, "step": 176500 }, { "epoch": 0.7577943209431321, "grad_norm": 0.3513484299182892, "learning_rate": 2.4325431387597768e-05, "loss": 0.09348582029342652, "step": 176510 }, { "epoch": 0.7578372530331522, "grad_norm": 2.0167341232299805, "learning_rate": 2.4321119667480145e-05, "loss": 0.15381017923355103, "step": 176520 }, { "epoch": 0.7578801851231721, "grad_norm": 2.2151646614074707, "learning_rate": 2.4316807947362522e-05, "loss": 0.25024800300598143, "step": 176530 }, { "epoch": 0.7579231172131922, "grad_norm": 4.011649131774902, "learning_rate": 2.43124962272449e-05, "loss": 0.31911654472351075, "step": 176540 }, { "epoch": 0.7579660493032122, "grad_norm": 0.0012101922184228897, "learning_rate": 2.4308184507127274e-05, "loss": 0.1742846131324768, "step": 176550 }, { "epoch": 0.7580089813932321, "grad_norm": 0.0010554291075095534, "learning_rate": 2.430387278700965e-05, "loss": 0.16264034509658815, "step": 176560 }, { "epoch": 0.7580519134832522, "grad_norm": 2.1255459785461426, "learning_rate": 2.4299561066892025e-05, "loss": 0.22510194778442383, "step": 176570 }, { "epoch": 0.7580948455732722, "grad_norm": 5.232020378112793, "learning_rate": 2.4295249346774406e-05, "loss": 0.4652894973754883, "step": 176580 }, { "epoch": 0.7581377776632922, "grad_norm": 0.01214161328971386, "learning_rate": 2.429093762665678e-05, "loss": 0.21678454875946046, "step": 176590 }, { "epoch": 0.7581807097533122, "grad_norm": 0.051134634763002396, "learning_rate": 2.4286625906539157e-05, "loss": 0.17276641130447387, "step": 176600 }, { "epoch": 0.7582236418433322, "grad_norm": 0.879493236541748, "learning_rate": 2.428231418642153e-05, "loss": 0.23750553131103516, "step": 176610 }, { "epoch": 0.7582665739333522, "grad_norm": 0.2266494333744049, "learning_rate": 2.427800246630391e-05, "loss": 0.14086905717849732, "step": 176620 }, { "epoch": 0.7583095060233722, "grad_norm": 5.996125221252441, "learning_rate": 2.4273690746186282e-05, "loss": 0.16965644359588622, "step": 176630 }, { "epoch": 0.7583524381133923, "grad_norm": 0.20658458769321442, "learning_rate": 2.4269379026068663e-05, "loss": 0.25354115962982177, "step": 176640 }, { "epoch": 0.7583953702034122, "grad_norm": 0.07418932020664215, "learning_rate": 2.4265067305951037e-05, "loss": 0.27120108604431153, "step": 176650 }, { "epoch": 0.7584383022934322, "grad_norm": 0.009839876554906368, "learning_rate": 2.4260755585833414e-05, "loss": 0.3132223844528198, "step": 176660 }, { "epoch": 0.7584812343834523, "grad_norm": 0.00416004192084074, "learning_rate": 2.425644386571579e-05, "loss": 0.35200812816619875, "step": 176670 }, { "epoch": 0.7585241664734723, "grad_norm": 0.0022613676264882088, "learning_rate": 2.4252132145598166e-05, "loss": 0.13670860528945922, "step": 176680 }, { "epoch": 0.7585670985634922, "grad_norm": 0.19223640859127045, "learning_rate": 2.4247820425480543e-05, "loss": 0.06737182736396789, "step": 176690 }, { "epoch": 0.7586100306535123, "grad_norm": 1.3617298603057861, "learning_rate": 2.424350870536292e-05, "loss": 0.0552653968334198, "step": 176700 }, { "epoch": 0.7586529627435323, "grad_norm": 0.01469328347593546, "learning_rate": 2.4239196985245294e-05, "loss": 0.16656371355056762, "step": 176710 }, { "epoch": 0.7586958948335523, "grad_norm": 0.01902511715888977, "learning_rate": 2.423488526512767e-05, "loss": 0.16330991983413695, "step": 176720 }, { "epoch": 0.7587388269235723, "grad_norm": 0.26943960785865784, "learning_rate": 2.4230573545010046e-05, "loss": 0.22292008399963378, "step": 176730 }, { "epoch": 0.7587817590135923, "grad_norm": 0.25879213213920593, "learning_rate": 2.4226261824892423e-05, "loss": 0.17576502561569213, "step": 176740 }, { "epoch": 0.7588246911036123, "grad_norm": 0.42930835485458374, "learning_rate": 2.42219501047748e-05, "loss": 0.15430418252944947, "step": 176750 }, { "epoch": 0.7588676231936323, "grad_norm": 1.0486270189285278, "learning_rate": 2.4217638384657178e-05, "loss": 0.4111494064331055, "step": 176760 }, { "epoch": 0.7589105552836524, "grad_norm": 1.5241907835006714, "learning_rate": 2.421332666453955e-05, "loss": 0.15125579833984376, "step": 176770 }, { "epoch": 0.7589534873736723, "grad_norm": 3.3031556606292725, "learning_rate": 2.420901494442193e-05, "loss": 0.05916283130645752, "step": 176780 }, { "epoch": 0.7589964194636923, "grad_norm": 0.0032096717040985823, "learning_rate": 2.4204703224304306e-05, "loss": 0.1436079502105713, "step": 176790 }, { "epoch": 0.7590393515537124, "grad_norm": 0.14260707795619965, "learning_rate": 2.420039150418668e-05, "loss": 0.3289307117462158, "step": 176800 }, { "epoch": 0.7590822836437323, "grad_norm": 4.995758533477783, "learning_rate": 2.4196079784069058e-05, "loss": 0.2021782875061035, "step": 176810 }, { "epoch": 0.7591252157337524, "grad_norm": 15.415989875793457, "learning_rate": 2.4191768063951435e-05, "loss": 0.3475226163864136, "step": 176820 }, { "epoch": 0.7591681478237724, "grad_norm": 2.6475751399993896, "learning_rate": 2.4187456343833812e-05, "loss": 0.22502753734588624, "step": 176830 }, { "epoch": 0.7592110799137923, "grad_norm": 5.432012557983398, "learning_rate": 2.4183144623716186e-05, "loss": 0.15463144779205323, "step": 176840 }, { "epoch": 0.7592540120038124, "grad_norm": 1.455808401107788, "learning_rate": 2.4178832903598564e-05, "loss": 0.3014279127120972, "step": 176850 }, { "epoch": 0.7592969440938324, "grad_norm": 0.05897454172372818, "learning_rate": 2.4174521183480938e-05, "loss": 0.22337071895599364, "step": 176860 }, { "epoch": 0.7593398761838523, "grad_norm": 1.421477198600769, "learning_rate": 2.4170209463363315e-05, "loss": 0.36787757873535154, "step": 176870 }, { "epoch": 0.7593828082738724, "grad_norm": 25.1231746673584, "learning_rate": 2.4165897743245692e-05, "loss": 0.10162899494171143, "step": 176880 }, { "epoch": 0.7594257403638924, "grad_norm": 0.4189836382865906, "learning_rate": 2.416158602312807e-05, "loss": 0.3461848258972168, "step": 176890 }, { "epoch": 0.7594686724539124, "grad_norm": 3.7396621704101562, "learning_rate": 2.4157274303010443e-05, "loss": 0.12798227071762086, "step": 176900 }, { "epoch": 0.7595116045439324, "grad_norm": 0.008923502638936043, "learning_rate": 2.415296258289282e-05, "loss": 0.15080041885375978, "step": 176910 }, { "epoch": 0.7595545366339524, "grad_norm": 0.0778871700167656, "learning_rate": 2.4148650862775195e-05, "loss": 0.04988257884979248, "step": 176920 }, { "epoch": 0.7595974687239724, "grad_norm": 0.15040385723114014, "learning_rate": 2.4144339142657572e-05, "loss": 0.3418047666549683, "step": 176930 }, { "epoch": 0.7596404008139924, "grad_norm": 54.44385528564453, "learning_rate": 2.414002742253995e-05, "loss": 0.30226004123687744, "step": 176940 }, { "epoch": 0.7596833329040125, "grad_norm": 3.170269250869751, "learning_rate": 2.4135715702422327e-05, "loss": 0.1979185461997986, "step": 176950 }, { "epoch": 0.7597262649940324, "grad_norm": 1.6455987691879272, "learning_rate": 2.41314039823047e-05, "loss": 0.18162251710891725, "step": 176960 }, { "epoch": 0.7597691970840524, "grad_norm": 1.4148166179656982, "learning_rate": 2.4127092262187078e-05, "loss": 0.16824384927749633, "step": 176970 }, { "epoch": 0.7598121291740725, "grad_norm": 0.18569713830947876, "learning_rate": 2.4122780542069452e-05, "loss": 0.12295417785644532, "step": 176980 }, { "epoch": 0.7598550612640924, "grad_norm": 2.242105007171631, "learning_rate": 2.4118468821951833e-05, "loss": 0.1427332878112793, "step": 176990 }, { "epoch": 0.7598979933541125, "grad_norm": 0.004524548072367907, "learning_rate": 2.4114157101834207e-05, "loss": 0.3069367647171021, "step": 177000 }, { "epoch": 0.7598979933541125, "eval_loss": 0.39005473256111145, "eval_runtime": 27.4316, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 177000 }, { "epoch": 0.7599409254441325, "grad_norm": 0.8188188672065735, "learning_rate": 2.4109845381716584e-05, "loss": 0.17687420845031737, "step": 177010 }, { "epoch": 0.7599838575341524, "grad_norm": 0.44165608286857605, "learning_rate": 2.4105533661598958e-05, "loss": 0.06316535472869873, "step": 177020 }, { "epoch": 0.7600267896241725, "grad_norm": 2.4210214614868164, "learning_rate": 2.4101221941481335e-05, "loss": 0.49973325729370116, "step": 177030 }, { "epoch": 0.7600697217141925, "grad_norm": 0.012840594165027142, "learning_rate": 2.409691022136371e-05, "loss": 0.06451746821403503, "step": 177040 }, { "epoch": 0.7601126538042124, "grad_norm": 0.003341269213706255, "learning_rate": 2.409259850124609e-05, "loss": 0.1332295298576355, "step": 177050 }, { "epoch": 0.7601555858942325, "grad_norm": 1.3087090253829956, "learning_rate": 2.4088286781128464e-05, "loss": 0.11503291130065918, "step": 177060 }, { "epoch": 0.7601985179842525, "grad_norm": 0.07258166372776031, "learning_rate": 2.408397506101084e-05, "loss": 0.18839584589004515, "step": 177070 }, { "epoch": 0.7602414500742725, "grad_norm": 0.07161829620599747, "learning_rate": 2.4079663340893215e-05, "loss": 0.07642164826393127, "step": 177080 }, { "epoch": 0.7602843821642925, "grad_norm": 0.0005039049428887665, "learning_rate": 2.4075351620775593e-05, "loss": 0.17287344932556153, "step": 177090 }, { "epoch": 0.7603273142543125, "grad_norm": 0.0029817174654453993, "learning_rate": 2.407103990065797e-05, "loss": 0.1694784641265869, "step": 177100 }, { "epoch": 0.7603702463443326, "grad_norm": 1.9690752029418945, "learning_rate": 2.4066728180540347e-05, "loss": 0.1936242938041687, "step": 177110 }, { "epoch": 0.7604131784343525, "grad_norm": 0.013945703394711018, "learning_rate": 2.406241646042272e-05, "loss": 0.18476022481918336, "step": 177120 }, { "epoch": 0.7604561105243726, "grad_norm": 0.012228801846504211, "learning_rate": 2.40581047403051e-05, "loss": 0.10598087310791016, "step": 177130 }, { "epoch": 0.7604990426143926, "grad_norm": 0.026704225689172745, "learning_rate": 2.4053793020187473e-05, "loss": 0.11414153575897217, "step": 177140 }, { "epoch": 0.7605419747044125, "grad_norm": 0.03746294975280762, "learning_rate": 2.404948130006985e-05, "loss": 0.07400650978088379, "step": 177150 }, { "epoch": 0.7605849067944326, "grad_norm": 2.7711026668548584, "learning_rate": 2.4045169579952227e-05, "loss": 0.19555963277816774, "step": 177160 }, { "epoch": 0.7606278388844526, "grad_norm": 3.9885945320129395, "learning_rate": 2.4040857859834605e-05, "loss": 0.20378420352935792, "step": 177170 }, { "epoch": 0.7606707709744726, "grad_norm": 0.01776500605046749, "learning_rate": 2.4036546139716982e-05, "loss": 0.2603089094161987, "step": 177180 }, { "epoch": 0.7607137030644926, "grad_norm": 0.15067414939403534, "learning_rate": 2.4032234419599356e-05, "loss": 0.2957641124725342, "step": 177190 }, { "epoch": 0.7607566351545126, "grad_norm": 1.563685655593872, "learning_rate": 2.4027922699481733e-05, "loss": 0.1789721965789795, "step": 177200 }, { "epoch": 0.7607995672445326, "grad_norm": 0.46982231736183167, "learning_rate": 2.4023610979364107e-05, "loss": 0.23951370716094972, "step": 177210 }, { "epoch": 0.7608424993345526, "grad_norm": 7.683855056762695, "learning_rate": 2.4019299259246485e-05, "loss": 0.35542027950286864, "step": 177220 }, { "epoch": 0.7608854314245727, "grad_norm": 0.038922760635614395, "learning_rate": 2.4014987539128862e-05, "loss": 0.05637596845626831, "step": 177230 }, { "epoch": 0.7609283635145926, "grad_norm": 0.03396385908126831, "learning_rate": 2.401067581901124e-05, "loss": 0.2034245252609253, "step": 177240 }, { "epoch": 0.7609712956046126, "grad_norm": 5.346138000488281, "learning_rate": 2.4006364098893613e-05, "loss": 0.10607466697692872, "step": 177250 }, { "epoch": 0.7610142276946327, "grad_norm": 0.0009848641930148005, "learning_rate": 2.400205237877599e-05, "loss": 0.23548879623413085, "step": 177260 }, { "epoch": 0.7610571597846526, "grad_norm": 0.8244641423225403, "learning_rate": 2.3997740658658365e-05, "loss": 0.13387619256973265, "step": 177270 }, { "epoch": 0.7611000918746726, "grad_norm": 0.0011648483341559768, "learning_rate": 2.3993428938540742e-05, "loss": 0.09976664781570435, "step": 177280 }, { "epoch": 0.7611430239646927, "grad_norm": 5.723977088928223, "learning_rate": 2.398911721842312e-05, "loss": 0.24518101215362548, "step": 177290 }, { "epoch": 0.7611859560547126, "grad_norm": 0.1946738213300705, "learning_rate": 2.3984805498305497e-05, "loss": 0.2515752077102661, "step": 177300 }, { "epoch": 0.7612288881447327, "grad_norm": 0.001265937928110361, "learning_rate": 2.398049377818787e-05, "loss": 0.16979835033416749, "step": 177310 }, { "epoch": 0.7612718202347527, "grad_norm": 0.04173870384693146, "learning_rate": 2.3976182058070248e-05, "loss": 0.35801122188568113, "step": 177320 }, { "epoch": 0.7613147523247726, "grad_norm": 1.011568546295166, "learning_rate": 2.3971870337952622e-05, "loss": 0.31412568092346194, "step": 177330 }, { "epoch": 0.7613576844147927, "grad_norm": 0.6044009923934937, "learning_rate": 2.3967558617835003e-05, "loss": 0.2454387664794922, "step": 177340 }, { "epoch": 0.7614006165048127, "grad_norm": 0.006332141347229481, "learning_rate": 2.3963246897717376e-05, "loss": 0.3005851984024048, "step": 177350 }, { "epoch": 0.7614435485948327, "grad_norm": 0.3464721739292145, "learning_rate": 2.3958935177599754e-05, "loss": 0.1406756043434143, "step": 177360 }, { "epoch": 0.7614864806848527, "grad_norm": 0.14981555938720703, "learning_rate": 2.3954623457482128e-05, "loss": 0.11905041933059693, "step": 177370 }, { "epoch": 0.7615294127748727, "grad_norm": 1.5472999811172485, "learning_rate": 2.3950311737364505e-05, "loss": 0.1065073013305664, "step": 177380 }, { "epoch": 0.7615723448648927, "grad_norm": 0.0009052807581610978, "learning_rate": 2.394600001724688e-05, "loss": 0.22083621025085448, "step": 177390 }, { "epoch": 0.7616152769549127, "grad_norm": 0.0028880308382213116, "learning_rate": 2.394168829712926e-05, "loss": 0.22731993198394776, "step": 177400 }, { "epoch": 0.7616582090449328, "grad_norm": 0.006786768790334463, "learning_rate": 2.3937376577011634e-05, "loss": 0.05157320499420166, "step": 177410 }, { "epoch": 0.7617011411349527, "grad_norm": 1.7782171964645386, "learning_rate": 2.393306485689401e-05, "loss": 0.28482072353363036, "step": 177420 }, { "epoch": 0.7617440732249727, "grad_norm": 0.010772820562124252, "learning_rate": 2.3928753136776385e-05, "loss": 0.15599217414855956, "step": 177430 }, { "epoch": 0.7617870053149928, "grad_norm": 0.3834504187107086, "learning_rate": 2.3924441416658762e-05, "loss": 0.06821857094764709, "step": 177440 }, { "epoch": 0.7618299374050127, "grad_norm": 10.938364028930664, "learning_rate": 2.392012969654114e-05, "loss": 0.11866967678070069, "step": 177450 }, { "epoch": 0.7618728694950327, "grad_norm": 7.032254219055176, "learning_rate": 2.3915817976423517e-05, "loss": 0.20671021938323975, "step": 177460 }, { "epoch": 0.7619158015850528, "grad_norm": 0.004301557317376137, "learning_rate": 2.391150625630589e-05, "loss": 0.2941230058670044, "step": 177470 }, { "epoch": 0.7619587336750727, "grad_norm": 1.5329325199127197, "learning_rate": 2.390719453618827e-05, "loss": 0.2828094244003296, "step": 177480 }, { "epoch": 0.7620016657650928, "grad_norm": 2.2881085872650146, "learning_rate": 2.3902882816070642e-05, "loss": 0.40318880081176756, "step": 177490 }, { "epoch": 0.7620445978551128, "grad_norm": 5.213508129119873, "learning_rate": 2.389857109595302e-05, "loss": 0.14141581058502198, "step": 177500 }, { "epoch": 0.7620875299451327, "grad_norm": 0.006306284107267857, "learning_rate": 2.3894259375835397e-05, "loss": 0.08049569725990295, "step": 177510 }, { "epoch": 0.7621304620351528, "grad_norm": 0.22153890132904053, "learning_rate": 2.3889947655717774e-05, "loss": 0.12354075908660889, "step": 177520 }, { "epoch": 0.7621733941251728, "grad_norm": 0.04059620201587677, "learning_rate": 2.3885635935600152e-05, "loss": 0.0005926693323999643, "step": 177530 }, { "epoch": 0.7622163262151929, "grad_norm": 0.004459694027900696, "learning_rate": 2.3881324215482526e-05, "loss": 0.2787043809890747, "step": 177540 }, { "epoch": 0.7622592583052128, "grad_norm": 0.11074524372816086, "learning_rate": 2.3877012495364903e-05, "loss": 0.25904710292816163, "step": 177550 }, { "epoch": 0.7623021903952328, "grad_norm": 0.013455307111144066, "learning_rate": 2.3872700775247277e-05, "loss": 0.3018791675567627, "step": 177560 }, { "epoch": 0.7623451224852529, "grad_norm": 1.0258920192718506, "learning_rate": 2.3868389055129654e-05, "loss": 0.06581991314888, "step": 177570 }, { "epoch": 0.7623880545752728, "grad_norm": 1.115838646888733, "learning_rate": 2.386407733501203e-05, "loss": 0.2682212829589844, "step": 177580 }, { "epoch": 0.7624309866652929, "grad_norm": 0.015212813392281532, "learning_rate": 2.385976561489441e-05, "loss": 0.2845586061477661, "step": 177590 }, { "epoch": 0.7624739187553129, "grad_norm": 0.15196259319782257, "learning_rate": 2.3855453894776783e-05, "loss": 0.2876842498779297, "step": 177600 }, { "epoch": 0.7625168508453328, "grad_norm": 0.07368028908967972, "learning_rate": 2.385114217465916e-05, "loss": 0.10164880752563477, "step": 177610 }, { "epoch": 0.7625597829353529, "grad_norm": 0.0007988035795278847, "learning_rate": 2.3846830454541534e-05, "loss": 0.09450948238372803, "step": 177620 }, { "epoch": 0.7626027150253729, "grad_norm": 0.010584760457277298, "learning_rate": 2.384251873442391e-05, "loss": 0.3373741626739502, "step": 177630 }, { "epoch": 0.7626456471153928, "grad_norm": 0.02437574975192547, "learning_rate": 2.383820701430629e-05, "loss": 0.23039453029632567, "step": 177640 }, { "epoch": 0.7626885792054129, "grad_norm": 0.001247620559297502, "learning_rate": 2.3833895294188666e-05, "loss": 0.27570860385894774, "step": 177650 }, { "epoch": 0.7627315112954329, "grad_norm": 2.2336456775665283, "learning_rate": 2.382958357407104e-05, "loss": 0.1201433539390564, "step": 177660 }, { "epoch": 0.7627744433854529, "grad_norm": 0.146112859249115, "learning_rate": 2.3825271853953418e-05, "loss": 0.21582226753234862, "step": 177670 }, { "epoch": 0.7628173754754729, "grad_norm": 14.193013191223145, "learning_rate": 2.382096013383579e-05, "loss": 0.397243595123291, "step": 177680 }, { "epoch": 0.762860307565493, "grad_norm": 0.22348107397556305, "learning_rate": 2.3816648413718172e-05, "loss": 0.21426901817321778, "step": 177690 }, { "epoch": 0.7629032396555129, "grad_norm": 4.388669967651367, "learning_rate": 2.3812336693600546e-05, "loss": 0.3503814697265625, "step": 177700 }, { "epoch": 0.7629461717455329, "grad_norm": 0.05588550865650177, "learning_rate": 2.3808024973482924e-05, "loss": 0.11665399074554443, "step": 177710 }, { "epoch": 0.762989103835553, "grad_norm": 0.00433525163680315, "learning_rate": 2.3803713253365298e-05, "loss": 0.2739673137664795, "step": 177720 }, { "epoch": 0.7630320359255729, "grad_norm": 0.035902559757232666, "learning_rate": 2.3799401533247675e-05, "loss": 0.300280237197876, "step": 177730 }, { "epoch": 0.7630749680155929, "grad_norm": 0.34329330921173096, "learning_rate": 2.379508981313005e-05, "loss": 0.3793229579925537, "step": 177740 }, { "epoch": 0.763117900105613, "grad_norm": 0.04771098494529724, "learning_rate": 2.379077809301243e-05, "loss": 0.15737433433532716, "step": 177750 }, { "epoch": 0.7631608321956329, "grad_norm": 0.0009794149082154036, "learning_rate": 2.3786466372894804e-05, "loss": 0.11524491310119629, "step": 177760 }, { "epoch": 0.763203764285653, "grad_norm": 0.09087666869163513, "learning_rate": 2.378215465277718e-05, "loss": 0.21049742698669432, "step": 177770 }, { "epoch": 0.763246696375673, "grad_norm": 0.0185843575745821, "learning_rate": 2.3777842932659555e-05, "loss": 0.24470274448394774, "step": 177780 }, { "epoch": 0.7632896284656929, "grad_norm": 0.0009324979619123042, "learning_rate": 2.3773531212541932e-05, "loss": 0.08331471681594849, "step": 177790 }, { "epoch": 0.763332560555713, "grad_norm": 0.2116546779870987, "learning_rate": 2.3769219492424306e-05, "loss": 0.37836732864379885, "step": 177800 }, { "epoch": 0.763375492645733, "grad_norm": 0.2698652446269989, "learning_rate": 2.3764907772306687e-05, "loss": 0.2392141342163086, "step": 177810 }, { "epoch": 0.7634184247357529, "grad_norm": 0.10046686977148056, "learning_rate": 2.376059605218906e-05, "loss": 0.02579231858253479, "step": 177820 }, { "epoch": 0.763461356825773, "grad_norm": 0.1942238211631775, "learning_rate": 2.3756284332071438e-05, "loss": 0.31575570106506345, "step": 177830 }, { "epoch": 0.763504288915793, "grad_norm": 0.0024036553222686052, "learning_rate": 2.3751972611953812e-05, "loss": 0.25040130615234374, "step": 177840 }, { "epoch": 0.763547221005813, "grad_norm": 8.009116172790527, "learning_rate": 2.374766089183619e-05, "loss": 0.21138277053833007, "step": 177850 }, { "epoch": 0.763590153095833, "grad_norm": 1.8110581636428833, "learning_rate": 2.3743349171718567e-05, "loss": 0.3237518310546875, "step": 177860 }, { "epoch": 0.763633085185853, "grad_norm": 1.874503254890442, "learning_rate": 2.3739037451600944e-05, "loss": 0.2857369422912598, "step": 177870 }, { "epoch": 0.763676017275873, "grad_norm": 1.8950402736663818, "learning_rate": 2.373472573148332e-05, "loss": 0.14816755056381226, "step": 177880 }, { "epoch": 0.763718949365893, "grad_norm": 1.536193609237671, "learning_rate": 2.3730414011365695e-05, "loss": 0.3412646293640137, "step": 177890 }, { "epoch": 0.7637618814559131, "grad_norm": 0.028296170756220818, "learning_rate": 2.3726102291248073e-05, "loss": 0.07663151621818542, "step": 177900 }, { "epoch": 0.763804813545933, "grad_norm": 0.1083562821149826, "learning_rate": 2.3721790571130447e-05, "loss": 0.4823110580444336, "step": 177910 }, { "epoch": 0.763847745635953, "grad_norm": 0.038458842784166336, "learning_rate": 2.3717478851012824e-05, "loss": 0.2413325786590576, "step": 177920 }, { "epoch": 0.7638906777259731, "grad_norm": 1.106160044670105, "learning_rate": 2.37131671308952e-05, "loss": 0.21324872970581055, "step": 177930 }, { "epoch": 0.763933609815993, "grad_norm": 1.998532772064209, "learning_rate": 2.370885541077758e-05, "loss": 0.14935051202774047, "step": 177940 }, { "epoch": 0.763976541906013, "grad_norm": 0.047563642263412476, "learning_rate": 2.3704543690659953e-05, "loss": 0.09624691009521484, "step": 177950 }, { "epoch": 0.7640194739960331, "grad_norm": 0.014821537770330906, "learning_rate": 2.370023197054233e-05, "loss": 0.05763006806373596, "step": 177960 }, { "epoch": 0.7640624060860531, "grad_norm": 2.6112759113311768, "learning_rate": 2.3695920250424704e-05, "loss": 0.23190555572509766, "step": 177970 }, { "epoch": 0.7641053381760731, "grad_norm": 0.008518679067492485, "learning_rate": 2.369160853030708e-05, "loss": 0.29477295875549314, "step": 177980 }, { "epoch": 0.7641482702660931, "grad_norm": 1.1182646751403809, "learning_rate": 2.368729681018946e-05, "loss": 0.19260014295578004, "step": 177990 }, { "epoch": 0.7641912023561132, "grad_norm": 8.637264251708984, "learning_rate": 2.3682985090071836e-05, "loss": 0.22437336444854736, "step": 178000 }, { "epoch": 0.7641912023561132, "eval_loss": 0.38519030809402466, "eval_runtime": 27.4348, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 178000 }, { "epoch": 0.7642341344461331, "grad_norm": 0.12673498690128326, "learning_rate": 2.367867336995421e-05, "loss": 0.05941138863563537, "step": 178010 }, { "epoch": 0.7642770665361531, "grad_norm": 0.019599629566073418, "learning_rate": 2.3674361649836587e-05, "loss": 0.048000562191009524, "step": 178020 }, { "epoch": 0.7643199986261732, "grad_norm": 1.9546705484390259, "learning_rate": 2.367004992971896e-05, "loss": 0.22433915138244628, "step": 178030 }, { "epoch": 0.7643629307161931, "grad_norm": 1.5347076654434204, "learning_rate": 2.366573820960134e-05, "loss": 0.28104162216186523, "step": 178040 }, { "epoch": 0.7644058628062131, "grad_norm": 0.35148516297340393, "learning_rate": 2.3661426489483716e-05, "loss": 0.35195064544677734, "step": 178050 }, { "epoch": 0.7644487948962332, "grad_norm": 0.2593337297439575, "learning_rate": 2.3657114769366093e-05, "loss": 0.15546387434005737, "step": 178060 }, { "epoch": 0.7644917269862531, "grad_norm": 1.2594435214996338, "learning_rate": 2.3652803049248467e-05, "loss": 0.2298504114151001, "step": 178070 }, { "epoch": 0.7645346590762732, "grad_norm": 0.08988411724567413, "learning_rate": 2.3648491329130845e-05, "loss": 0.4057119369506836, "step": 178080 }, { "epoch": 0.7645775911662932, "grad_norm": 1.5320706367492676, "learning_rate": 2.364417960901322e-05, "loss": 0.31486694812774657, "step": 178090 }, { "epoch": 0.7646205232563131, "grad_norm": 0.11509314924478531, "learning_rate": 2.36398678888956e-05, "loss": 0.35809285640716554, "step": 178100 }, { "epoch": 0.7646634553463332, "grad_norm": 0.002720639342442155, "learning_rate": 2.3635556168777973e-05, "loss": 0.14641090631484985, "step": 178110 }, { "epoch": 0.7647063874363532, "grad_norm": 0.6865724325180054, "learning_rate": 2.363124444866035e-05, "loss": 0.23801584243774415, "step": 178120 }, { "epoch": 0.7647493195263732, "grad_norm": 0.005314926616847515, "learning_rate": 2.3626932728542725e-05, "loss": 0.19524000883102416, "step": 178130 }, { "epoch": 0.7647922516163932, "grad_norm": 0.13690844178199768, "learning_rate": 2.3622621008425102e-05, "loss": 0.19549834728240967, "step": 178140 }, { "epoch": 0.7648351837064132, "grad_norm": 0.011456413194537163, "learning_rate": 2.3618309288307476e-05, "loss": 0.312131667137146, "step": 178150 }, { "epoch": 0.7648781157964332, "grad_norm": 0.0010632037883624434, "learning_rate": 2.3613997568189857e-05, "loss": 0.014251169562339783, "step": 178160 }, { "epoch": 0.7649210478864532, "grad_norm": 0.006948573049157858, "learning_rate": 2.360968584807223e-05, "loss": 0.26813473701477053, "step": 178170 }, { "epoch": 0.7649639799764733, "grad_norm": 0.06771288067102432, "learning_rate": 2.3605374127954608e-05, "loss": 0.15589020252227784, "step": 178180 }, { "epoch": 0.7650069120664932, "grad_norm": 0.03058125637471676, "learning_rate": 2.3601062407836982e-05, "loss": 0.13075129985809325, "step": 178190 }, { "epoch": 0.7650498441565132, "grad_norm": 0.023907043039798737, "learning_rate": 2.359675068771936e-05, "loss": 0.02735612988471985, "step": 178200 }, { "epoch": 0.7650927762465333, "grad_norm": 0.032358940690755844, "learning_rate": 2.3592438967601736e-05, "loss": 0.1290936827659607, "step": 178210 }, { "epoch": 0.7651357083365532, "grad_norm": 1.0458128452301025, "learning_rate": 2.3588127247484114e-05, "loss": 0.19473346471786498, "step": 178220 }, { "epoch": 0.7651786404265732, "grad_norm": 0.03492206335067749, "learning_rate": 2.3583815527366488e-05, "loss": 0.27468626499176024, "step": 178230 }, { "epoch": 0.7652215725165933, "grad_norm": 0.02318352460861206, "learning_rate": 2.3579503807248865e-05, "loss": 0.18723065853118898, "step": 178240 }, { "epoch": 0.7652645046066132, "grad_norm": 5.709285259246826, "learning_rate": 2.3575192087131242e-05, "loss": 0.11720331907272338, "step": 178250 }, { "epoch": 0.7653074366966333, "grad_norm": 1.5659072399139404, "learning_rate": 2.3570880367013616e-05, "loss": 0.16660025119781494, "step": 178260 }, { "epoch": 0.7653503687866533, "grad_norm": 0.004724172875285149, "learning_rate": 2.3566568646895994e-05, "loss": 0.14990419149398804, "step": 178270 }, { "epoch": 0.7653933008766732, "grad_norm": 0.00437846640124917, "learning_rate": 2.356225692677837e-05, "loss": 0.32151336669921876, "step": 178280 }, { "epoch": 0.7654362329666933, "grad_norm": 0.3685600459575653, "learning_rate": 2.355794520666075e-05, "loss": 0.09072909355163575, "step": 178290 }, { "epoch": 0.7654791650567133, "grad_norm": 0.0566171258687973, "learning_rate": 2.3553633486543122e-05, "loss": 0.23811821937561034, "step": 178300 }, { "epoch": 0.7655220971467332, "grad_norm": 3.1550590991973877, "learning_rate": 2.35493217664255e-05, "loss": 0.3866184949874878, "step": 178310 }, { "epoch": 0.7655650292367533, "grad_norm": 0.04981329292058945, "learning_rate": 2.3545010046307874e-05, "loss": 0.24460201263427733, "step": 178320 }, { "epoch": 0.7656079613267733, "grad_norm": 0.008631882257759571, "learning_rate": 2.354069832619025e-05, "loss": 0.14547290802001953, "step": 178330 }, { "epoch": 0.7656508934167933, "grad_norm": 0.17233169078826904, "learning_rate": 2.353638660607263e-05, "loss": 0.12638088464736938, "step": 178340 }, { "epoch": 0.7656938255068133, "grad_norm": 0.03211379051208496, "learning_rate": 2.3532074885955006e-05, "loss": 0.2023622751235962, "step": 178350 }, { "epoch": 0.7657367575968334, "grad_norm": 0.06673210114240646, "learning_rate": 2.352776316583738e-05, "loss": 0.2539444208145142, "step": 178360 }, { "epoch": 0.7657796896868533, "grad_norm": 2.9048259258270264, "learning_rate": 2.3523451445719757e-05, "loss": 0.1878517746925354, "step": 178370 }, { "epoch": 0.7658226217768733, "grad_norm": 0.19632039964199066, "learning_rate": 2.351913972560213e-05, "loss": 0.38019933700561526, "step": 178380 }, { "epoch": 0.7658655538668934, "grad_norm": 0.12534579634666443, "learning_rate": 2.351482800548451e-05, "loss": 0.39364616870880126, "step": 178390 }, { "epoch": 0.7659084859569134, "grad_norm": 0.040436238050460815, "learning_rate": 2.3510516285366886e-05, "loss": 0.1989324927330017, "step": 178400 }, { "epoch": 0.7659514180469333, "grad_norm": 0.3849923312664032, "learning_rate": 2.3506204565249263e-05, "loss": 0.229500150680542, "step": 178410 }, { "epoch": 0.7659943501369534, "grad_norm": 0.6619948148727417, "learning_rate": 2.3501892845131637e-05, "loss": 0.15900182723999023, "step": 178420 }, { "epoch": 0.7660372822269734, "grad_norm": 1.1308156251907349, "learning_rate": 2.3497581125014014e-05, "loss": 0.2050173759460449, "step": 178430 }, { "epoch": 0.7660802143169934, "grad_norm": 0.0016266998136416078, "learning_rate": 2.3493269404896388e-05, "loss": 0.2516968250274658, "step": 178440 }, { "epoch": 0.7661231464070134, "grad_norm": 0.3307945430278778, "learning_rate": 2.348895768477877e-05, "loss": 0.19992436170578004, "step": 178450 }, { "epoch": 0.7661660784970334, "grad_norm": 1.8131606578826904, "learning_rate": 2.3484645964661143e-05, "loss": 0.23348846435546874, "step": 178460 }, { "epoch": 0.7662090105870534, "grad_norm": 2.5765650272369385, "learning_rate": 2.348033424454352e-05, "loss": 0.14857852458953857, "step": 178470 }, { "epoch": 0.7662519426770734, "grad_norm": 0.001548268715851009, "learning_rate": 2.3476022524425894e-05, "loss": 0.32491796016693114, "step": 178480 }, { "epoch": 0.7662948747670935, "grad_norm": 0.004323096945881844, "learning_rate": 2.347171080430827e-05, "loss": 0.07983020544052125, "step": 178490 }, { "epoch": 0.7663378068571134, "grad_norm": 0.33664846420288086, "learning_rate": 2.3467399084190646e-05, "loss": 0.2000946044921875, "step": 178500 }, { "epoch": 0.7663807389471334, "grad_norm": 3.2330658435821533, "learning_rate": 2.3463087364073026e-05, "loss": 0.30185685157775877, "step": 178510 }, { "epoch": 0.7664236710371535, "grad_norm": 0.001679239678196609, "learning_rate": 2.34587756439554e-05, "loss": 0.11961598396301269, "step": 178520 }, { "epoch": 0.7664666031271734, "grad_norm": 0.001713413163088262, "learning_rate": 2.3454463923837778e-05, "loss": 0.26211502552032473, "step": 178530 }, { "epoch": 0.7665095352171934, "grad_norm": 1.5684071779251099, "learning_rate": 2.345015220372015e-05, "loss": 0.1904462218284607, "step": 178540 }, { "epoch": 0.7665524673072135, "grad_norm": 0.0025774992536753416, "learning_rate": 2.344584048360253e-05, "loss": 0.19834427833557128, "step": 178550 }, { "epoch": 0.7665953993972334, "grad_norm": 0.013022633269429207, "learning_rate": 2.3441528763484906e-05, "loss": 0.11189095973968506, "step": 178560 }, { "epoch": 0.7666383314872535, "grad_norm": 0.1410759687423706, "learning_rate": 2.3437217043367284e-05, "loss": 0.07068445682525634, "step": 178570 }, { "epoch": 0.7666812635772735, "grad_norm": 0.12566423416137695, "learning_rate": 2.3432905323249658e-05, "loss": 0.1137201189994812, "step": 178580 }, { "epoch": 0.7667241956672934, "grad_norm": 0.004756872076541185, "learning_rate": 2.3428593603132035e-05, "loss": 0.1246252179145813, "step": 178590 }, { "epoch": 0.7667671277573135, "grad_norm": 2.4529027938842773, "learning_rate": 2.342428188301441e-05, "loss": 0.11557846069335938, "step": 178600 }, { "epoch": 0.7668100598473335, "grad_norm": 0.004438826348632574, "learning_rate": 2.3419970162896786e-05, "loss": 0.25914185047149657, "step": 178610 }, { "epoch": 0.7668529919373535, "grad_norm": 4.537032127380371, "learning_rate": 2.3415658442779164e-05, "loss": 0.2596637010574341, "step": 178620 }, { "epoch": 0.7668959240273735, "grad_norm": 0.0016977523919194937, "learning_rate": 2.341134672266154e-05, "loss": 0.11433390378952027, "step": 178630 }, { "epoch": 0.7669388561173935, "grad_norm": 0.6404827237129211, "learning_rate": 2.3407035002543918e-05, "loss": 0.3891470193862915, "step": 178640 }, { "epoch": 0.7669817882074135, "grad_norm": 1.4652893543243408, "learning_rate": 2.3402723282426292e-05, "loss": 0.24137496948242188, "step": 178650 }, { "epoch": 0.7670247202974335, "grad_norm": 0.006902490276843309, "learning_rate": 2.339841156230867e-05, "loss": 0.2944831848144531, "step": 178660 }, { "epoch": 0.7670676523874536, "grad_norm": 2.101875066757202, "learning_rate": 2.3394099842191043e-05, "loss": 0.1436405062675476, "step": 178670 }, { "epoch": 0.7671105844774735, "grad_norm": 0.07013654708862305, "learning_rate": 2.338978812207342e-05, "loss": 0.13763973712921143, "step": 178680 }, { "epoch": 0.7671535165674935, "grad_norm": 0.21145546436309814, "learning_rate": 2.3385476401955798e-05, "loss": 0.12384222745895386, "step": 178690 }, { "epoch": 0.7671964486575136, "grad_norm": 1.3103197813034058, "learning_rate": 2.3381164681838175e-05, "loss": 0.16632239818572997, "step": 178700 }, { "epoch": 0.7672393807475335, "grad_norm": 0.02879696898162365, "learning_rate": 2.337685296172055e-05, "loss": 0.20476338863372803, "step": 178710 }, { "epoch": 0.7672823128375535, "grad_norm": 1.125725269317627, "learning_rate": 2.3372541241602927e-05, "loss": 0.3404285192489624, "step": 178720 }, { "epoch": 0.7673252449275736, "grad_norm": 0.10981670767068863, "learning_rate": 2.33682295214853e-05, "loss": 0.01966983377933502, "step": 178730 }, { "epoch": 0.7673681770175935, "grad_norm": 0.002532815095037222, "learning_rate": 2.3363917801367678e-05, "loss": 0.1262107491493225, "step": 178740 }, { "epoch": 0.7674111091076136, "grad_norm": 1.2392613887786865, "learning_rate": 2.3359606081250055e-05, "loss": 0.1985929489135742, "step": 178750 }, { "epoch": 0.7674540411976336, "grad_norm": 1.4831242561340332, "learning_rate": 2.3355294361132433e-05, "loss": 0.07167594432830811, "step": 178760 }, { "epoch": 0.7674969732876535, "grad_norm": 0.013040987774729729, "learning_rate": 2.3350982641014807e-05, "loss": 0.016568221151828766, "step": 178770 }, { "epoch": 0.7675399053776736, "grad_norm": 0.09118735790252686, "learning_rate": 2.3346670920897184e-05, "loss": 0.203193998336792, "step": 178780 }, { "epoch": 0.7675828374676936, "grad_norm": 2.505168914794922, "learning_rate": 2.3342359200779558e-05, "loss": 0.41781888008117674, "step": 178790 }, { "epoch": 0.7676257695577136, "grad_norm": 0.9963707327842712, "learning_rate": 2.3338047480661935e-05, "loss": 0.2993806838989258, "step": 178800 }, { "epoch": 0.7676687016477336, "grad_norm": 0.0019730194471776485, "learning_rate": 2.3333735760544313e-05, "loss": 0.2103712558746338, "step": 178810 }, { "epoch": 0.7677116337377536, "grad_norm": 0.0005218818550929427, "learning_rate": 2.332942404042669e-05, "loss": 0.1815126657485962, "step": 178820 }, { "epoch": 0.7677545658277737, "grad_norm": 12.162750244140625, "learning_rate": 2.3325112320309064e-05, "loss": 0.10047402381896972, "step": 178830 }, { "epoch": 0.7677974979177936, "grad_norm": 1.1582043170928955, "learning_rate": 2.332080060019144e-05, "loss": 0.12247195243835449, "step": 178840 }, { "epoch": 0.7678404300078137, "grad_norm": 0.007292480207979679, "learning_rate": 2.3316488880073815e-05, "loss": 0.1326340913772583, "step": 178850 }, { "epoch": 0.7678833620978337, "grad_norm": 0.11198096722364426, "learning_rate": 2.3312177159956196e-05, "loss": 0.14960025548934935, "step": 178860 }, { "epoch": 0.7679262941878536, "grad_norm": 0.0018669597338885069, "learning_rate": 2.330786543983857e-05, "loss": 0.08662062883377075, "step": 178870 }, { "epoch": 0.7679692262778737, "grad_norm": 0.0002546081959735602, "learning_rate": 2.3303553719720947e-05, "loss": 0.16194459199905395, "step": 178880 }, { "epoch": 0.7680121583678937, "grad_norm": 0.004007402341812849, "learning_rate": 2.329924199960332e-05, "loss": 0.20729336738586426, "step": 178890 }, { "epoch": 0.7680550904579136, "grad_norm": 0.017977435141801834, "learning_rate": 2.32949302794857e-05, "loss": 0.1353507161140442, "step": 178900 }, { "epoch": 0.7680980225479337, "grad_norm": 0.009154192171990871, "learning_rate": 2.3290618559368073e-05, "loss": 0.17526334524154663, "step": 178910 }, { "epoch": 0.7681409546379537, "grad_norm": 0.003638830967247486, "learning_rate": 2.3286306839250453e-05, "loss": 0.05437243580818176, "step": 178920 }, { "epoch": 0.7681838867279737, "grad_norm": 0.0003096268919762224, "learning_rate": 2.3281995119132827e-05, "loss": 0.12296555042266846, "step": 178930 }, { "epoch": 0.7682268188179937, "grad_norm": 0.004455664660781622, "learning_rate": 2.3277683399015205e-05, "loss": 0.03397051095962524, "step": 178940 }, { "epoch": 0.7682697509080137, "grad_norm": 0.08765816688537598, "learning_rate": 2.327337167889758e-05, "loss": 0.17958006858825684, "step": 178950 }, { "epoch": 0.7683126829980337, "grad_norm": 0.005225532688200474, "learning_rate": 2.3269059958779956e-05, "loss": 0.43357043266296386, "step": 178960 }, { "epoch": 0.7683556150880537, "grad_norm": 10.482624053955078, "learning_rate": 2.3264748238662333e-05, "loss": 0.30485365390777586, "step": 178970 }, { "epoch": 0.7683985471780738, "grad_norm": 0.008869537152349949, "learning_rate": 2.326043651854471e-05, "loss": 0.06108769774436951, "step": 178980 }, { "epoch": 0.7684414792680937, "grad_norm": 0.00020321154443081468, "learning_rate": 2.3256124798427088e-05, "loss": 0.2938662052154541, "step": 178990 }, { "epoch": 0.7684844113581137, "grad_norm": 0.019519424065947533, "learning_rate": 2.3251813078309462e-05, "loss": 0.08139798045158386, "step": 179000 }, { "epoch": 0.7684844113581137, "eval_loss": 0.3833867907524109, "eval_runtime": 27.4034, "eval_samples_per_second": 3.649, "eval_steps_per_second": 3.649, "step": 179000 }, { "epoch": 0.7685273434481338, "grad_norm": 0.006098241079598665, "learning_rate": 2.324750135819184e-05, "loss": 0.1604252576828003, "step": 179010 }, { "epoch": 0.7685702755381537, "grad_norm": 0.01255844533443451, "learning_rate": 2.3243189638074213e-05, "loss": 0.07683556675910949, "step": 179020 }, { "epoch": 0.7686132076281738, "grad_norm": 0.09523410350084305, "learning_rate": 2.323887791795659e-05, "loss": 0.24342586994171142, "step": 179030 }, { "epoch": 0.7686561397181938, "grad_norm": 3.1891558170318604, "learning_rate": 2.3234566197838968e-05, "loss": 0.3374955177307129, "step": 179040 }, { "epoch": 0.7686990718082137, "grad_norm": 4.197160720825195, "learning_rate": 2.3230254477721345e-05, "loss": 0.34609458446502683, "step": 179050 }, { "epoch": 0.7687420038982338, "grad_norm": 0.15234249830245972, "learning_rate": 2.322594275760372e-05, "loss": 0.25315892696380615, "step": 179060 }, { "epoch": 0.7687849359882538, "grad_norm": 0.11703097075223923, "learning_rate": 2.3221631037486097e-05, "loss": 0.021952293813228607, "step": 179070 }, { "epoch": 0.7688278680782737, "grad_norm": 0.0015012217918410897, "learning_rate": 2.321731931736847e-05, "loss": 0.08591393828392029, "step": 179080 }, { "epoch": 0.7688708001682938, "grad_norm": 0.0504002645611763, "learning_rate": 2.3213007597250848e-05, "loss": 0.20234971046447753, "step": 179090 }, { "epoch": 0.7689137322583138, "grad_norm": 1.278030276298523, "learning_rate": 2.3208695877133225e-05, "loss": 0.30761592388153075, "step": 179100 }, { "epoch": 0.7689566643483338, "grad_norm": 0.025709327310323715, "learning_rate": 2.3204384157015602e-05, "loss": 0.1279531955718994, "step": 179110 }, { "epoch": 0.7689995964383538, "grad_norm": 2.917078971862793, "learning_rate": 2.3200072436897976e-05, "loss": 0.4120755195617676, "step": 179120 }, { "epoch": 0.7690425285283738, "grad_norm": 0.0006804691511206329, "learning_rate": 2.3195760716780354e-05, "loss": 0.09456906318664551, "step": 179130 }, { "epoch": 0.7690854606183938, "grad_norm": 0.006927388720214367, "learning_rate": 2.3191448996662728e-05, "loss": 0.31485681533813475, "step": 179140 }, { "epoch": 0.7691283927084138, "grad_norm": 8.310165405273438, "learning_rate": 2.3187137276545105e-05, "loss": 0.40290584564208987, "step": 179150 }, { "epoch": 0.7691713247984339, "grad_norm": 1.0132633447647095, "learning_rate": 2.3182825556427482e-05, "loss": 0.326165771484375, "step": 179160 }, { "epoch": 0.7692142568884538, "grad_norm": 0.004784280899912119, "learning_rate": 2.317851383630986e-05, "loss": 0.3097829341888428, "step": 179170 }, { "epoch": 0.7692571889784738, "grad_norm": 0.0012556664878502488, "learning_rate": 2.3174202116192234e-05, "loss": 0.118812096118927, "step": 179180 }, { "epoch": 0.7693001210684939, "grad_norm": 2.7564048767089844, "learning_rate": 2.316989039607461e-05, "loss": 0.06245466470718384, "step": 179190 }, { "epoch": 0.7693430531585138, "grad_norm": 2.507434129714966, "learning_rate": 2.3165578675956985e-05, "loss": 0.1164400339126587, "step": 179200 }, { "epoch": 0.7693859852485339, "grad_norm": 0.6105691194534302, "learning_rate": 2.3161266955839366e-05, "loss": 0.20296921730041503, "step": 179210 }, { "epoch": 0.7694289173385539, "grad_norm": 2.032741069793701, "learning_rate": 2.315695523572174e-05, "loss": 0.28797986507415774, "step": 179220 }, { "epoch": 0.7694718494285738, "grad_norm": 0.03527984768152237, "learning_rate": 2.3152643515604117e-05, "loss": 0.10299174785614014, "step": 179230 }, { "epoch": 0.7695147815185939, "grad_norm": 0.0007198863895609975, "learning_rate": 2.314833179548649e-05, "loss": 0.13453786373138427, "step": 179240 }, { "epoch": 0.7695577136086139, "grad_norm": 0.7343387007713318, "learning_rate": 2.314402007536887e-05, "loss": 0.06165881752967835, "step": 179250 }, { "epoch": 0.769600645698634, "grad_norm": 0.2602272629737854, "learning_rate": 2.3139708355251242e-05, "loss": 0.2444288730621338, "step": 179260 }, { "epoch": 0.7696435777886539, "grad_norm": 3.470984935760498, "learning_rate": 2.3135396635133623e-05, "loss": 0.1751070499420166, "step": 179270 }, { "epoch": 0.7696865098786739, "grad_norm": 0.012675684876739979, "learning_rate": 2.3131084915015997e-05, "loss": 0.12060407400131226, "step": 179280 }, { "epoch": 0.769729441968694, "grad_norm": 8.048802375793457, "learning_rate": 2.3126773194898374e-05, "loss": 0.31142096519470214, "step": 179290 }, { "epoch": 0.7697723740587139, "grad_norm": 0.052960868924856186, "learning_rate": 2.3122461474780748e-05, "loss": 0.11074914932250976, "step": 179300 }, { "epoch": 0.769815306148734, "grad_norm": 0.0007027122192084789, "learning_rate": 2.3118149754663126e-05, "loss": 0.1947923183441162, "step": 179310 }, { "epoch": 0.769858238238754, "grad_norm": 0.8784052729606628, "learning_rate": 2.3113838034545503e-05, "loss": 0.30508151054382326, "step": 179320 }, { "epoch": 0.7699011703287739, "grad_norm": 0.06336329132318497, "learning_rate": 2.310952631442788e-05, "loss": 0.17809114456176758, "step": 179330 }, { "epoch": 0.769944102418794, "grad_norm": 0.02893325686454773, "learning_rate": 2.3105214594310258e-05, "loss": 0.21597938537597655, "step": 179340 }, { "epoch": 0.769987034508814, "grad_norm": 0.0019647746812552214, "learning_rate": 2.310090287419263e-05, "loss": 0.07605461478233337, "step": 179350 }, { "epoch": 0.7700299665988339, "grad_norm": 0.0028181010857224464, "learning_rate": 2.309659115407501e-05, "loss": 0.17536162137985228, "step": 179360 }, { "epoch": 0.770072898688854, "grad_norm": 1.347222924232483, "learning_rate": 2.3092279433957383e-05, "loss": 0.22230615615844726, "step": 179370 }, { "epoch": 0.770115830778874, "grad_norm": 0.07306050509214401, "learning_rate": 2.308796771383976e-05, "loss": 0.31887292861938477, "step": 179380 }, { "epoch": 0.770158762868894, "grad_norm": 2.1165270805358887, "learning_rate": 2.3083655993722138e-05, "loss": 0.34431474208831786, "step": 179390 }, { "epoch": 0.770201694958914, "grad_norm": 0.15936022996902466, "learning_rate": 2.3079344273604515e-05, "loss": 0.1585230588912964, "step": 179400 }, { "epoch": 0.770244627048934, "grad_norm": 1.2283518314361572, "learning_rate": 2.307503255348689e-05, "loss": 0.36916613578796387, "step": 179410 }, { "epoch": 0.770287559138954, "grad_norm": 0.015880318358540535, "learning_rate": 2.3070720833369266e-05, "loss": 0.3850353717803955, "step": 179420 }, { "epoch": 0.770330491228974, "grad_norm": 6.375476837158203, "learning_rate": 2.306640911325164e-05, "loss": 0.3537035703659058, "step": 179430 }, { "epoch": 0.770373423318994, "grad_norm": 2.0374484062194824, "learning_rate": 2.3062097393134018e-05, "loss": 0.46723246574401855, "step": 179440 }, { "epoch": 0.770416355409014, "grad_norm": 0.9383795857429504, "learning_rate": 2.3057785673016395e-05, "loss": 0.11651902198791504, "step": 179450 }, { "epoch": 0.770459287499034, "grad_norm": 0.7634946703910828, "learning_rate": 2.3053473952898772e-05, "loss": 0.1451859712600708, "step": 179460 }, { "epoch": 0.7705022195890541, "grad_norm": 6.315857887268066, "learning_rate": 2.3049162232781146e-05, "loss": 0.3340526342391968, "step": 179470 }, { "epoch": 0.770545151679074, "grad_norm": 1.4309190511703491, "learning_rate": 2.3044850512663524e-05, "loss": 0.12667789459228515, "step": 179480 }, { "epoch": 0.770588083769094, "grad_norm": 1.7753784656524658, "learning_rate": 2.3040538792545897e-05, "loss": 0.3001526355743408, "step": 179490 }, { "epoch": 0.7706310158591141, "grad_norm": 1.0547752380371094, "learning_rate": 2.3036227072428275e-05, "loss": 0.3086581230163574, "step": 179500 }, { "epoch": 0.770673947949134, "grad_norm": 3.6708216667175293, "learning_rate": 2.3031915352310652e-05, "loss": 0.4584041595458984, "step": 179510 }, { "epoch": 0.7707168800391541, "grad_norm": 1.5989513397216797, "learning_rate": 2.302760363219303e-05, "loss": 0.2776602506637573, "step": 179520 }, { "epoch": 0.7707598121291741, "grad_norm": 0.2675705850124359, "learning_rate": 2.3023291912075403e-05, "loss": 0.11531891822814941, "step": 179530 }, { "epoch": 0.770802744219194, "grad_norm": 0.14942918717861176, "learning_rate": 2.301898019195778e-05, "loss": 0.13127764463424682, "step": 179540 }, { "epoch": 0.7708456763092141, "grad_norm": 0.02816365659236908, "learning_rate": 2.3014668471840155e-05, "loss": 0.17667512893676757, "step": 179550 }, { "epoch": 0.7708886083992341, "grad_norm": 0.006725494284182787, "learning_rate": 2.3010356751722532e-05, "loss": 0.3269664287567139, "step": 179560 }, { "epoch": 0.770931540489254, "grad_norm": 0.016622474417090416, "learning_rate": 2.300604503160491e-05, "loss": 0.15728062391281128, "step": 179570 }, { "epoch": 0.7709744725792741, "grad_norm": 20.81556510925293, "learning_rate": 2.3001733311487287e-05, "loss": 0.17218732833862305, "step": 179580 }, { "epoch": 0.7710174046692941, "grad_norm": 0.0007839555619284511, "learning_rate": 2.299742159136966e-05, "loss": 0.01884029060602188, "step": 179590 }, { "epoch": 0.7710603367593141, "grad_norm": 0.43381252884864807, "learning_rate": 2.2993109871252038e-05, "loss": 0.18695248365402223, "step": 179600 }, { "epoch": 0.7711032688493341, "grad_norm": 5.024713039398193, "learning_rate": 2.2988798151134412e-05, "loss": 0.3955603361129761, "step": 179610 }, { "epoch": 0.7711462009393542, "grad_norm": 1.1479946374893188, "learning_rate": 2.2984486431016793e-05, "loss": 0.27515287399291993, "step": 179620 }, { "epoch": 0.7711891330293741, "grad_norm": 0.017682382836937904, "learning_rate": 2.2980174710899167e-05, "loss": 0.20232205390930175, "step": 179630 }, { "epoch": 0.7712320651193941, "grad_norm": 0.0059524052776396275, "learning_rate": 2.2975862990781544e-05, "loss": 0.2792728185653687, "step": 179640 }, { "epoch": 0.7712749972094142, "grad_norm": 0.006699263118207455, "learning_rate": 2.2971551270663918e-05, "loss": 0.16753827333450316, "step": 179650 }, { "epoch": 0.7713179292994341, "grad_norm": 0.029053257778286934, "learning_rate": 2.2967239550546295e-05, "loss": 0.2607213258743286, "step": 179660 }, { "epoch": 0.7713608613894541, "grad_norm": 1.745116114616394, "learning_rate": 2.296292783042867e-05, "loss": 0.11559852361679077, "step": 179670 }, { "epoch": 0.7714037934794742, "grad_norm": 1.3347855806350708, "learning_rate": 2.295861611031105e-05, "loss": 0.35223388671875, "step": 179680 }, { "epoch": 0.7714467255694942, "grad_norm": 2.656464099884033, "learning_rate": 2.2954304390193424e-05, "loss": 0.33100650310516355, "step": 179690 }, { "epoch": 0.7714896576595142, "grad_norm": 7.4256391525268555, "learning_rate": 2.29499926700758e-05, "loss": 0.35502123832702637, "step": 179700 }, { "epoch": 0.7715325897495342, "grad_norm": 0.007397874724119902, "learning_rate": 2.294568094995818e-05, "loss": 0.2101654291152954, "step": 179710 }, { "epoch": 0.7715755218395542, "grad_norm": 0.0042721061035990715, "learning_rate": 2.2941369229840553e-05, "loss": 0.009166765213012695, "step": 179720 }, { "epoch": 0.7716184539295742, "grad_norm": 0.07033202797174454, "learning_rate": 2.293705750972293e-05, "loss": 0.13368966579437255, "step": 179730 }, { "epoch": 0.7716613860195942, "grad_norm": 1.5344808101654053, "learning_rate": 2.2932745789605307e-05, "loss": 0.3444932222366333, "step": 179740 }, { "epoch": 0.7717043181096143, "grad_norm": 0.7983564138412476, "learning_rate": 2.2928434069487685e-05, "loss": 0.033365219831466675, "step": 179750 }, { "epoch": 0.7717472501996342, "grad_norm": 3.2751638889312744, "learning_rate": 2.292412234937006e-05, "loss": 0.20990748405456544, "step": 179760 }, { "epoch": 0.7717901822896542, "grad_norm": 1.1416313648223877, "learning_rate": 2.2919810629252436e-05, "loss": 0.15725255012512207, "step": 179770 }, { "epoch": 0.7718331143796743, "grad_norm": 8.257407188415527, "learning_rate": 2.291549890913481e-05, "loss": 0.17753384113311768, "step": 179780 }, { "epoch": 0.7718760464696942, "grad_norm": 0.5357545018196106, "learning_rate": 2.2911187189017187e-05, "loss": 0.14562582969665527, "step": 179790 }, { "epoch": 0.7719189785597143, "grad_norm": 1.7779492139816284, "learning_rate": 2.2906875468899565e-05, "loss": 0.13873924016952516, "step": 179800 }, { "epoch": 0.7719619106497343, "grad_norm": 1.9327869415283203, "learning_rate": 2.2902563748781942e-05, "loss": 0.37070517539978026, "step": 179810 }, { "epoch": 0.7720048427397542, "grad_norm": 2.53825306892395, "learning_rate": 2.2898252028664316e-05, "loss": 0.36956005096435546, "step": 179820 }, { "epoch": 0.7720477748297743, "grad_norm": 0.028061002492904663, "learning_rate": 2.2893940308546693e-05, "loss": 0.05154078602790833, "step": 179830 }, { "epoch": 0.7720907069197943, "grad_norm": 0.0300615057349205, "learning_rate": 2.2889628588429067e-05, "loss": 0.19126038551330565, "step": 179840 }, { "epoch": 0.7721336390098142, "grad_norm": 12.17878246307373, "learning_rate": 2.2885316868311445e-05, "loss": 0.2945890426635742, "step": 179850 }, { "epoch": 0.7721765710998343, "grad_norm": 5.995707035064697, "learning_rate": 2.2881005148193822e-05, "loss": 0.09305886626243591, "step": 179860 }, { "epoch": 0.7722195031898543, "grad_norm": 0.06677145510911942, "learning_rate": 2.28766934280762e-05, "loss": 0.3105530500411987, "step": 179870 }, { "epoch": 0.7722624352798743, "grad_norm": 2.9172847270965576, "learning_rate": 2.2872381707958573e-05, "loss": 0.13742368221282958, "step": 179880 }, { "epoch": 0.7723053673698943, "grad_norm": 0.015876207500696182, "learning_rate": 2.286806998784095e-05, "loss": 0.09454439282417297, "step": 179890 }, { "epoch": 0.7723482994599143, "grad_norm": 0.019737502560019493, "learning_rate": 2.2863758267723324e-05, "loss": 0.29710509777069094, "step": 179900 }, { "epoch": 0.7723912315499343, "grad_norm": 1.5522518157958984, "learning_rate": 2.2859446547605702e-05, "loss": 0.12715576887130736, "step": 179910 }, { "epoch": 0.7724341636399543, "grad_norm": 2.2886972427368164, "learning_rate": 2.285513482748808e-05, "loss": 0.41290721893310545, "step": 179920 }, { "epoch": 0.7724770957299744, "grad_norm": 0.04067216068506241, "learning_rate": 2.2850823107370457e-05, "loss": 0.11689453125, "step": 179930 }, { "epoch": 0.7725200278199943, "grad_norm": 0.002634049393236637, "learning_rate": 2.284651138725283e-05, "loss": 0.25855841636657717, "step": 179940 }, { "epoch": 0.7725629599100143, "grad_norm": 1.6041786670684814, "learning_rate": 2.2842199667135208e-05, "loss": 0.21327719688415528, "step": 179950 }, { "epoch": 0.7726058920000344, "grad_norm": 0.007944965735077858, "learning_rate": 2.2837887947017582e-05, "loss": 0.22927529811859132, "step": 179960 }, { "epoch": 0.7726488240900543, "grad_norm": 0.025026388466358185, "learning_rate": 2.2833576226899963e-05, "loss": 0.09137169122695923, "step": 179970 }, { "epoch": 0.7726917561800744, "grad_norm": 0.0005246539367362857, "learning_rate": 2.2829264506782336e-05, "loss": 0.24379174709320067, "step": 179980 }, { "epoch": 0.7727346882700944, "grad_norm": 0.014372066594660282, "learning_rate": 2.2824952786664714e-05, "loss": 0.286321234703064, "step": 179990 }, { "epoch": 0.7727776203601143, "grad_norm": 0.008647691458463669, "learning_rate": 2.2820641066547088e-05, "loss": 0.13580285310745238, "step": 180000 }, { "epoch": 0.7727776203601143, "eval_loss": 0.38957008719444275, "eval_runtime": 27.4406, "eval_samples_per_second": 3.644, "eval_steps_per_second": 3.644, "step": 180000 }, { "epoch": 0.7728205524501344, "grad_norm": 0.9259838461875916, "learning_rate": 2.2816329346429465e-05, "loss": 0.5427755832672119, "step": 180010 }, { "epoch": 0.7728634845401544, "grad_norm": 41.22263717651367, "learning_rate": 2.281201762631184e-05, "loss": 0.22616727352142335, "step": 180020 }, { "epoch": 0.7729064166301743, "grad_norm": 1.4104810953140259, "learning_rate": 2.280770590619422e-05, "loss": 0.31243743896484377, "step": 180030 }, { "epoch": 0.7729493487201944, "grad_norm": 2.4222309589385986, "learning_rate": 2.2803394186076594e-05, "loss": 0.30273444652557374, "step": 180040 }, { "epoch": 0.7729922808102144, "grad_norm": 0.28040480613708496, "learning_rate": 2.279908246595897e-05, "loss": 0.07433147430419922, "step": 180050 }, { "epoch": 0.7730352129002344, "grad_norm": 2.304126739501953, "learning_rate": 2.279477074584135e-05, "loss": 0.17433713674545287, "step": 180060 }, { "epoch": 0.7730781449902544, "grad_norm": 0.02427615225315094, "learning_rate": 2.2790459025723722e-05, "loss": 0.24402711391448975, "step": 180070 }, { "epoch": 0.7731210770802744, "grad_norm": 0.0444793738424778, "learning_rate": 2.27861473056061e-05, "loss": 0.014486879110336304, "step": 180080 }, { "epoch": 0.7731640091702944, "grad_norm": 0.5242799520492554, "learning_rate": 2.2781835585488477e-05, "loss": 0.18714048862457275, "step": 180090 }, { "epoch": 0.7732069412603144, "grad_norm": 2.1512036323547363, "learning_rate": 2.2777523865370854e-05, "loss": 0.14754347801208495, "step": 180100 }, { "epoch": 0.7732498733503345, "grad_norm": 0.0005996071267873049, "learning_rate": 2.277321214525323e-05, "loss": 0.17570135593414307, "step": 180110 }, { "epoch": 0.7732928054403545, "grad_norm": 5.650803565979004, "learning_rate": 2.2768900425135606e-05, "loss": 0.2217705011367798, "step": 180120 }, { "epoch": 0.7733357375303744, "grad_norm": 2.09537410736084, "learning_rate": 2.276458870501798e-05, "loss": 0.15717799663543702, "step": 180130 }, { "epoch": 0.7733786696203945, "grad_norm": 0.002615907695144415, "learning_rate": 2.2760276984900357e-05, "loss": 0.21549594402313232, "step": 180140 }, { "epoch": 0.7734216017104145, "grad_norm": 0.013990242965519428, "learning_rate": 2.2755965264782734e-05, "loss": 0.010225190967321395, "step": 180150 }, { "epoch": 0.7734645338004345, "grad_norm": 0.33775243163108826, "learning_rate": 2.2751653544665112e-05, "loss": 0.34395158290863037, "step": 180160 }, { "epoch": 0.7735074658904545, "grad_norm": 0.20428995788097382, "learning_rate": 2.2747341824547486e-05, "loss": 0.08059185743331909, "step": 180170 }, { "epoch": 0.7735503979804745, "grad_norm": 2.344538450241089, "learning_rate": 2.2743030104429863e-05, "loss": 0.4059558391571045, "step": 180180 }, { "epoch": 0.7735933300704945, "grad_norm": 2.440988540649414, "learning_rate": 2.2738718384312237e-05, "loss": 0.2932596206665039, "step": 180190 }, { "epoch": 0.7736362621605145, "grad_norm": 0.0016101880464702845, "learning_rate": 2.2734406664194614e-05, "loss": 0.2458946466445923, "step": 180200 }, { "epoch": 0.7736791942505346, "grad_norm": 0.2261928915977478, "learning_rate": 2.273009494407699e-05, "loss": 0.06624903678894042, "step": 180210 }, { "epoch": 0.7737221263405545, "grad_norm": 0.02708575315773487, "learning_rate": 2.272578322395937e-05, "loss": 0.11959649324417114, "step": 180220 }, { "epoch": 0.7737650584305745, "grad_norm": 11.702322959899902, "learning_rate": 2.2721471503841743e-05, "loss": 0.2761650323867798, "step": 180230 }, { "epoch": 0.7738079905205946, "grad_norm": 0.006343924440443516, "learning_rate": 2.271715978372412e-05, "loss": 0.16064642667770385, "step": 180240 }, { "epoch": 0.7738509226106145, "grad_norm": 0.14308874309062958, "learning_rate": 2.2712848063606494e-05, "loss": 0.23857133388519286, "step": 180250 }, { "epoch": 0.7738938547006345, "grad_norm": 8.168606758117676, "learning_rate": 2.270853634348887e-05, "loss": 0.18918099403381347, "step": 180260 }, { "epoch": 0.7739367867906546, "grad_norm": 0.03108370304107666, "learning_rate": 2.270422462337125e-05, "loss": 0.19499192237854004, "step": 180270 }, { "epoch": 0.7739797188806745, "grad_norm": 1.625861644744873, "learning_rate": 2.2699912903253626e-05, "loss": 0.39890251159667967, "step": 180280 }, { "epoch": 0.7740226509706946, "grad_norm": 0.5996838212013245, "learning_rate": 2.2695601183136e-05, "loss": 0.08134393692016602, "step": 180290 }, { "epoch": 0.7740655830607146, "grad_norm": 2.1445157527923584, "learning_rate": 2.2691289463018378e-05, "loss": 0.22284488677978515, "step": 180300 }, { "epoch": 0.7741085151507345, "grad_norm": 0.04387044161558151, "learning_rate": 2.268697774290075e-05, "loss": 0.25220816135406493, "step": 180310 }, { "epoch": 0.7741514472407546, "grad_norm": 1.3502665758132935, "learning_rate": 2.2682666022783132e-05, "loss": 0.2518911600112915, "step": 180320 }, { "epoch": 0.7741943793307746, "grad_norm": 2.0550084114074707, "learning_rate": 2.2678354302665506e-05, "loss": 0.1281597137451172, "step": 180330 }, { "epoch": 0.7742373114207945, "grad_norm": 2.0173451900482178, "learning_rate": 2.2674042582547884e-05, "loss": 0.07103241682052612, "step": 180340 }, { "epoch": 0.7742802435108146, "grad_norm": 1.8583507537841797, "learning_rate": 2.2669730862430257e-05, "loss": 0.2636585235595703, "step": 180350 }, { "epoch": 0.7743231756008346, "grad_norm": 0.00575015926733613, "learning_rate": 2.2665419142312635e-05, "loss": 0.3370833873748779, "step": 180360 }, { "epoch": 0.7743661076908546, "grad_norm": 1.0144314765930176, "learning_rate": 2.266110742219501e-05, "loss": 0.16626391410827637, "step": 180370 }, { "epoch": 0.7744090397808746, "grad_norm": 0.0012669252464547753, "learning_rate": 2.265679570207739e-05, "loss": 0.11038153171539307, "step": 180380 }, { "epoch": 0.7744519718708947, "grad_norm": 0.08255119621753693, "learning_rate": 2.2652483981959763e-05, "loss": 0.12617795467376708, "step": 180390 }, { "epoch": 0.7744949039609146, "grad_norm": 0.012879389338195324, "learning_rate": 2.264817226184214e-05, "loss": 0.3277858018875122, "step": 180400 }, { "epoch": 0.7745378360509346, "grad_norm": 9.729223251342773, "learning_rate": 2.2643860541724515e-05, "loss": 0.20663437843322754, "step": 180410 }, { "epoch": 0.7745807681409547, "grad_norm": 0.3886057734489441, "learning_rate": 2.2639548821606892e-05, "loss": 0.14941807985305786, "step": 180420 }, { "epoch": 0.7746237002309746, "grad_norm": 0.0036090456414967775, "learning_rate": 2.263523710148927e-05, "loss": 0.30705966949462893, "step": 180430 }, { "epoch": 0.7746666323209946, "grad_norm": 0.02179667167365551, "learning_rate": 2.2630925381371647e-05, "loss": 0.13955130577087402, "step": 180440 }, { "epoch": 0.7747095644110147, "grad_norm": 0.003469049697741866, "learning_rate": 2.2626613661254024e-05, "loss": 0.08940157890319825, "step": 180450 }, { "epoch": 0.7747524965010346, "grad_norm": 1.649953007698059, "learning_rate": 2.2622301941136398e-05, "loss": 0.2585058450698853, "step": 180460 }, { "epoch": 0.7747954285910547, "grad_norm": 0.0027396460063755512, "learning_rate": 2.2617990221018775e-05, "loss": 0.20617377758026123, "step": 180470 }, { "epoch": 0.7748383606810747, "grad_norm": 0.0049470034427940845, "learning_rate": 2.261367850090115e-05, "loss": 0.05881868600845337, "step": 180480 }, { "epoch": 0.7748812927710946, "grad_norm": 4.35863733291626, "learning_rate": 2.2609366780783527e-05, "loss": 0.24249038696289063, "step": 180490 }, { "epoch": 0.7749242248611147, "grad_norm": 2.722135305404663, "learning_rate": 2.2605055060665904e-05, "loss": 0.32448766231536863, "step": 180500 }, { "epoch": 0.7749671569511347, "grad_norm": 0.010333052836358547, "learning_rate": 2.260074334054828e-05, "loss": 0.32258646488189696, "step": 180510 }, { "epoch": 0.7750100890411546, "grad_norm": 0.0034809063654392958, "learning_rate": 2.2596431620430655e-05, "loss": 0.09903744459152222, "step": 180520 }, { "epoch": 0.7750530211311747, "grad_norm": 0.9046080112457275, "learning_rate": 2.2592119900313033e-05, "loss": 0.39251620769500734, "step": 180530 }, { "epoch": 0.7750959532211947, "grad_norm": 0.0639759823679924, "learning_rate": 2.2587808180195407e-05, "loss": 0.021153028309345245, "step": 180540 }, { "epoch": 0.7751388853112148, "grad_norm": 3.202190637588501, "learning_rate": 2.2583496460077784e-05, "loss": 0.14328465461730958, "step": 180550 }, { "epoch": 0.7751818174012347, "grad_norm": 2.1860907077789307, "learning_rate": 2.257918473996016e-05, "loss": 0.4133021831512451, "step": 180560 }, { "epoch": 0.7752247494912547, "grad_norm": 34.30323791503906, "learning_rate": 2.257487301984254e-05, "loss": 0.08191108703613281, "step": 180570 }, { "epoch": 0.7752676815812748, "grad_norm": 0.9189937114715576, "learning_rate": 2.2570561299724913e-05, "loss": 0.32755978107452394, "step": 180580 }, { "epoch": 0.7753106136712947, "grad_norm": 0.04545053094625473, "learning_rate": 2.256624957960729e-05, "loss": 0.07546036839485168, "step": 180590 }, { "epoch": 0.7753535457613148, "grad_norm": 0.4012831151485443, "learning_rate": 2.2561937859489664e-05, "loss": 0.2035548448562622, "step": 180600 }, { "epoch": 0.7753964778513348, "grad_norm": 0.5780662298202515, "learning_rate": 2.255762613937204e-05, "loss": 0.02327096462249756, "step": 180610 }, { "epoch": 0.7754394099413547, "grad_norm": 1.632012963294983, "learning_rate": 2.255331441925442e-05, "loss": 0.2648672580718994, "step": 180620 }, { "epoch": 0.7754823420313748, "grad_norm": 0.15308120846748352, "learning_rate": 2.2549002699136796e-05, "loss": 0.17612071037292482, "step": 180630 }, { "epoch": 0.7755252741213948, "grad_norm": 0.047732140868902206, "learning_rate": 2.254469097901917e-05, "loss": 0.08916597366333008, "step": 180640 }, { "epoch": 0.7755682062114148, "grad_norm": 5.136704444885254, "learning_rate": 2.2540379258901547e-05, "loss": 0.42125658988952636, "step": 180650 }, { "epoch": 0.7756111383014348, "grad_norm": 0.7114421725273132, "learning_rate": 2.253606753878392e-05, "loss": 0.37050158977508546, "step": 180660 }, { "epoch": 0.7756540703914548, "grad_norm": 0.008971183560788631, "learning_rate": 2.25317558186663e-05, "loss": 0.18429280519485475, "step": 180670 }, { "epoch": 0.7756970024814748, "grad_norm": 0.017602117732167244, "learning_rate": 2.2527444098548676e-05, "loss": 0.0649482250213623, "step": 180680 }, { "epoch": 0.7757399345714948, "grad_norm": 1.1628626585006714, "learning_rate": 2.2523132378431053e-05, "loss": 0.2136044979095459, "step": 180690 }, { "epoch": 0.7757828666615149, "grad_norm": 0.6008757948875427, "learning_rate": 2.2518820658313427e-05, "loss": 0.2509660243988037, "step": 180700 }, { "epoch": 0.7758257987515348, "grad_norm": 0.016465460881590843, "learning_rate": 2.2514508938195805e-05, "loss": 0.1221784234046936, "step": 180710 }, { "epoch": 0.7758687308415548, "grad_norm": 0.5145441889762878, "learning_rate": 2.251019721807818e-05, "loss": 0.22736752033233643, "step": 180720 }, { "epoch": 0.7759116629315749, "grad_norm": 3.371095895767212, "learning_rate": 2.250588549796056e-05, "loss": 0.3835928201675415, "step": 180730 }, { "epoch": 0.7759545950215948, "grad_norm": 3.624058485031128, "learning_rate": 2.2501573777842933e-05, "loss": 0.15203168392181396, "step": 180740 }, { "epoch": 0.7759975271116148, "grad_norm": 6.183959484100342, "learning_rate": 2.249726205772531e-05, "loss": 0.13569035530090331, "step": 180750 }, { "epoch": 0.7760404592016349, "grad_norm": 0.5464549660682678, "learning_rate": 2.2492950337607685e-05, "loss": 0.08163414001464844, "step": 180760 }, { "epoch": 0.7760833912916548, "grad_norm": 2.6773977279663086, "learning_rate": 2.2488638617490062e-05, "loss": 0.20119915008544922, "step": 180770 }, { "epoch": 0.7761263233816749, "grad_norm": 0.17884625494480133, "learning_rate": 2.2484326897372436e-05, "loss": 0.31024243831634524, "step": 180780 }, { "epoch": 0.7761692554716949, "grad_norm": 0.003842623671516776, "learning_rate": 2.2480015177254817e-05, "loss": 0.18495142459869385, "step": 180790 }, { "epoch": 0.7762121875617148, "grad_norm": 0.0021976360585540533, "learning_rate": 2.2475703457137194e-05, "loss": 0.15571430921554566, "step": 180800 }, { "epoch": 0.7762551196517349, "grad_norm": 0.04043019562959671, "learning_rate": 2.2471391737019568e-05, "loss": 0.12830135822296143, "step": 180810 }, { "epoch": 0.7762980517417549, "grad_norm": 0.7248302698135376, "learning_rate": 2.2467080016901945e-05, "loss": 0.2668464183807373, "step": 180820 }, { "epoch": 0.7763409838317749, "grad_norm": 0.02985418029129505, "learning_rate": 2.246276829678432e-05, "loss": 0.23797941207885742, "step": 180830 }, { "epoch": 0.7763839159217949, "grad_norm": 0.005406526383012533, "learning_rate": 2.2458456576666696e-05, "loss": 0.13979263305664064, "step": 180840 }, { "epoch": 0.7764268480118149, "grad_norm": 0.7003358602523804, "learning_rate": 2.2454144856549074e-05, "loss": 0.2024545907974243, "step": 180850 }, { "epoch": 0.7764697801018349, "grad_norm": 2.006132125854492, "learning_rate": 2.244983313643145e-05, "loss": 0.36295666694641116, "step": 180860 }, { "epoch": 0.7765127121918549, "grad_norm": 0.3677394986152649, "learning_rate": 2.2445521416313825e-05, "loss": 0.21423032283782958, "step": 180870 }, { "epoch": 0.776555644281875, "grad_norm": 0.0009157925960607827, "learning_rate": 2.2441209696196202e-05, "loss": 0.19082083702087402, "step": 180880 }, { "epoch": 0.7765985763718949, "grad_norm": 0.44521912932395935, "learning_rate": 2.2436897976078576e-05, "loss": 0.39336583614349363, "step": 180890 }, { "epoch": 0.7766415084619149, "grad_norm": 0.010876310989260674, "learning_rate": 2.2432586255960954e-05, "loss": 0.3106126308441162, "step": 180900 }, { "epoch": 0.776684440551935, "grad_norm": 2.960257053375244, "learning_rate": 2.242827453584333e-05, "loss": 0.27347769737243655, "step": 180910 }, { "epoch": 0.7767273726419549, "grad_norm": 0.3393663465976715, "learning_rate": 2.242396281572571e-05, "loss": 0.27551591396331787, "step": 180920 }, { "epoch": 0.776770304731975, "grad_norm": 1.4277352094650269, "learning_rate": 2.2419651095608082e-05, "loss": 0.12645928859710692, "step": 180930 }, { "epoch": 0.776813236821995, "grad_norm": 0.007761645596474409, "learning_rate": 2.241533937549046e-05, "loss": 0.20199737548828126, "step": 180940 }, { "epoch": 0.7768561689120149, "grad_norm": 0.00503843417391181, "learning_rate": 2.2411027655372834e-05, "loss": 0.05249316096305847, "step": 180950 }, { "epoch": 0.776899101002035, "grad_norm": 0.013779071159660816, "learning_rate": 2.240671593525521e-05, "loss": 0.0818875014781952, "step": 180960 }, { "epoch": 0.776942033092055, "grad_norm": 0.001981938723474741, "learning_rate": 2.240240421513759e-05, "loss": 0.12001742124557495, "step": 180970 }, { "epoch": 0.776984965182075, "grad_norm": 0.10726061463356018, "learning_rate": 2.2398092495019966e-05, "loss": 0.11583765745162963, "step": 180980 }, { "epoch": 0.777027897272095, "grad_norm": 0.0004066908441018313, "learning_rate": 2.239378077490234e-05, "loss": 0.07836989164352418, "step": 180990 }, { "epoch": 0.777070829362115, "grad_norm": 0.5633981227874756, "learning_rate": 2.2389469054784717e-05, "loss": 0.08607017993927002, "step": 181000 }, { "epoch": 0.777070829362115, "eval_loss": 0.38105422258377075, "eval_runtime": 27.5029, "eval_samples_per_second": 3.636, "eval_steps_per_second": 3.636, "step": 181000 }, { "epoch": 0.7771137614521351, "grad_norm": 0.2542254328727722, "learning_rate": 2.238515733466709e-05, "loss": 0.24805686473846436, "step": 181010 }, { "epoch": 0.777156693542155, "grad_norm": 2.633180856704712, "learning_rate": 2.238084561454947e-05, "loss": 0.12436635494232177, "step": 181020 }, { "epoch": 0.777199625632175, "grad_norm": 0.5145475268363953, "learning_rate": 2.2376533894431846e-05, "loss": 0.13324408531188964, "step": 181030 }, { "epoch": 0.7772425577221951, "grad_norm": 0.002931904746219516, "learning_rate": 2.2372222174314223e-05, "loss": 0.13255962133407592, "step": 181040 }, { "epoch": 0.777285489812215, "grad_norm": 0.047408297657966614, "learning_rate": 2.2367910454196597e-05, "loss": 0.13203340768814087, "step": 181050 }, { "epoch": 0.7773284219022351, "grad_norm": 3.426299810409546, "learning_rate": 2.2363598734078974e-05, "loss": 0.4311577796936035, "step": 181060 }, { "epoch": 0.7773713539922551, "grad_norm": 0.04267265275120735, "learning_rate": 2.2359287013961348e-05, "loss": 0.3033830404281616, "step": 181070 }, { "epoch": 0.777414286082275, "grad_norm": 1.0460861921310425, "learning_rate": 2.235497529384373e-05, "loss": 0.2784090995788574, "step": 181080 }, { "epoch": 0.7774572181722951, "grad_norm": 0.10757939517498016, "learning_rate": 2.2350663573726103e-05, "loss": 0.2997272491455078, "step": 181090 }, { "epoch": 0.7775001502623151, "grad_norm": 0.29957115650177, "learning_rate": 2.234635185360848e-05, "loss": 0.16533089876174928, "step": 181100 }, { "epoch": 0.777543082352335, "grad_norm": 0.01229582354426384, "learning_rate": 2.2342040133490854e-05, "loss": 0.14539920091629027, "step": 181110 }, { "epoch": 0.7775860144423551, "grad_norm": 5.837769508361816, "learning_rate": 2.233772841337323e-05, "loss": 0.22391026020050048, "step": 181120 }, { "epoch": 0.7776289465323751, "grad_norm": 0.005174816586077213, "learning_rate": 2.2333416693255606e-05, "loss": 0.1318308472633362, "step": 181130 }, { "epoch": 0.7776718786223951, "grad_norm": 1.294174075126648, "learning_rate": 2.2329104973137986e-05, "loss": 0.1779860258102417, "step": 181140 }, { "epoch": 0.7777148107124151, "grad_norm": 1.2440794706344604, "learning_rate": 2.2324793253020364e-05, "loss": 0.5334236145019531, "step": 181150 }, { "epoch": 0.7777577428024351, "grad_norm": 59.03296661376953, "learning_rate": 2.2320481532902738e-05, "loss": 0.30062379837036135, "step": 181160 }, { "epoch": 0.7778006748924551, "grad_norm": 2.072064161300659, "learning_rate": 2.2316169812785115e-05, "loss": 0.15776560306549073, "step": 181170 }, { "epoch": 0.7778436069824751, "grad_norm": 0.0024535886477679014, "learning_rate": 2.231185809266749e-05, "loss": 0.04311685562133789, "step": 181180 }, { "epoch": 0.7778865390724952, "grad_norm": 6.453382968902588, "learning_rate": 2.2307546372549866e-05, "loss": 0.30128116607666017, "step": 181190 }, { "epoch": 0.7779294711625151, "grad_norm": 0.003028257517144084, "learning_rate": 2.2303234652432244e-05, "loss": 0.24861340522766112, "step": 181200 }, { "epoch": 0.7779724032525351, "grad_norm": 0.5890544056892395, "learning_rate": 2.229892293231462e-05, "loss": 0.20256829261779785, "step": 181210 }, { "epoch": 0.7780153353425552, "grad_norm": 0.1589733362197876, "learning_rate": 2.2294611212196995e-05, "loss": 0.031123009324073792, "step": 181220 }, { "epoch": 0.7780582674325751, "grad_norm": 0.0035427564289420843, "learning_rate": 2.2290299492079372e-05, "loss": 0.1692456603050232, "step": 181230 }, { "epoch": 0.7781011995225952, "grad_norm": 1.0639280080795288, "learning_rate": 2.2285987771961746e-05, "loss": 0.19282352924346924, "step": 181240 }, { "epoch": 0.7781441316126152, "grad_norm": 0.019336223602294922, "learning_rate": 2.2281676051844123e-05, "loss": 0.11884394884109498, "step": 181250 }, { "epoch": 0.7781870637026351, "grad_norm": 3.3598930835723877, "learning_rate": 2.22773643317265e-05, "loss": 0.11887574195861816, "step": 181260 }, { "epoch": 0.7782299957926552, "grad_norm": 0.0136167136952281, "learning_rate": 2.2273052611608878e-05, "loss": 0.321718430519104, "step": 181270 }, { "epoch": 0.7782729278826752, "grad_norm": 0.06143486872315407, "learning_rate": 2.2268740891491252e-05, "loss": 0.01671365797519684, "step": 181280 }, { "epoch": 0.7783158599726951, "grad_norm": 0.07697362452745438, "learning_rate": 2.226442917137363e-05, "loss": 0.1757973313331604, "step": 181290 }, { "epoch": 0.7783587920627152, "grad_norm": 4.821767330169678, "learning_rate": 2.2260117451256003e-05, "loss": 0.3337341070175171, "step": 181300 }, { "epoch": 0.7784017241527352, "grad_norm": 0.8795390129089355, "learning_rate": 2.225580573113838e-05, "loss": 0.24267683029174805, "step": 181310 }, { "epoch": 0.7784446562427552, "grad_norm": 6.890802383422852, "learning_rate": 2.2251494011020758e-05, "loss": 0.19483572244644165, "step": 181320 }, { "epoch": 0.7784875883327752, "grad_norm": 1.7252296209335327, "learning_rate": 2.2247182290903135e-05, "loss": 0.3330058574676514, "step": 181330 }, { "epoch": 0.7785305204227952, "grad_norm": 0.007271029055118561, "learning_rate": 2.224287057078551e-05, "loss": 0.13872404098510743, "step": 181340 }, { "epoch": 0.7785734525128152, "grad_norm": 0.10837399959564209, "learning_rate": 2.2238558850667887e-05, "loss": 0.08872743844985961, "step": 181350 }, { "epoch": 0.7786163846028352, "grad_norm": 0.012545960955321789, "learning_rate": 2.223424713055026e-05, "loss": 0.1828855037689209, "step": 181360 }, { "epoch": 0.7786593166928553, "grad_norm": 1.8288440704345703, "learning_rate": 2.2229935410432638e-05, "loss": 0.32746448516845705, "step": 181370 }, { "epoch": 0.7787022487828752, "grad_norm": 0.5537493824958801, "learning_rate": 2.2225623690315015e-05, "loss": 0.37675645351409914, "step": 181380 }, { "epoch": 0.7787451808728952, "grad_norm": 0.10458727180957794, "learning_rate": 2.2221311970197393e-05, "loss": 0.12987738847732544, "step": 181390 }, { "epoch": 0.7787881129629153, "grad_norm": 0.005267029628157616, "learning_rate": 2.2217000250079767e-05, "loss": 0.10624191761016846, "step": 181400 }, { "epoch": 0.7788310450529353, "grad_norm": 13.237812995910645, "learning_rate": 2.2212688529962144e-05, "loss": 0.28482346534729003, "step": 181410 }, { "epoch": 0.7788739771429553, "grad_norm": 0.005658499430865049, "learning_rate": 2.2208376809844518e-05, "loss": 0.0005017845891416073, "step": 181420 }, { "epoch": 0.7789169092329753, "grad_norm": 0.040372833609580994, "learning_rate": 2.2204065089726895e-05, "loss": 0.0969342827796936, "step": 181430 }, { "epoch": 0.7789598413229953, "grad_norm": 0.20668336749076843, "learning_rate": 2.2199753369609273e-05, "loss": 0.12974029779434204, "step": 181440 }, { "epoch": 0.7790027734130153, "grad_norm": 0.0021882448345422745, "learning_rate": 2.219544164949165e-05, "loss": 0.07826072573661805, "step": 181450 }, { "epoch": 0.7790457055030353, "grad_norm": 0.03661274537444115, "learning_rate": 2.2191129929374024e-05, "loss": 0.1869038701057434, "step": 181460 }, { "epoch": 0.7790886375930554, "grad_norm": 0.0007741366280242801, "learning_rate": 2.21868182092564e-05, "loss": 0.369673752784729, "step": 181470 }, { "epoch": 0.7791315696830753, "grad_norm": 0.009072545915842056, "learning_rate": 2.2182506489138775e-05, "loss": 0.10426502227783203, "step": 181480 }, { "epoch": 0.7791745017730953, "grad_norm": 0.010089321993291378, "learning_rate": 2.2178194769021156e-05, "loss": 0.19302576780319214, "step": 181490 }, { "epoch": 0.7792174338631154, "grad_norm": 0.07819779217243195, "learning_rate": 2.217388304890353e-05, "loss": 0.1887624144554138, "step": 181500 }, { "epoch": 0.7792603659531353, "grad_norm": 1.0896580219268799, "learning_rate": 2.2169571328785907e-05, "loss": 0.17373030185699462, "step": 181510 }, { "epoch": 0.7793032980431553, "grad_norm": 9.74447830230929e-05, "learning_rate": 2.2165259608668285e-05, "loss": 0.0991288185119629, "step": 181520 }, { "epoch": 0.7793462301331754, "grad_norm": 0.029820624738931656, "learning_rate": 2.216094788855066e-05, "loss": 0.1576171636581421, "step": 181530 }, { "epoch": 0.7793891622231953, "grad_norm": 0.5509734153747559, "learning_rate": 2.2156636168433036e-05, "loss": 0.18902976512908937, "step": 181540 }, { "epoch": 0.7794320943132154, "grad_norm": 5.502527713775635, "learning_rate": 2.2152324448315413e-05, "loss": 0.3472958326339722, "step": 181550 }, { "epoch": 0.7794750264032354, "grad_norm": 3.362891435623169, "learning_rate": 2.214801272819779e-05, "loss": 0.3285074234008789, "step": 181560 }, { "epoch": 0.7795179584932553, "grad_norm": 0.019354185089468956, "learning_rate": 2.2143701008080165e-05, "loss": 0.11236345767974854, "step": 181570 }, { "epoch": 0.7795608905832754, "grad_norm": 0.0021464480087161064, "learning_rate": 2.2139389287962542e-05, "loss": 0.05739705562591553, "step": 181580 }, { "epoch": 0.7796038226732954, "grad_norm": 2.1688601970672607, "learning_rate": 2.2135077567844916e-05, "loss": 0.2590325832366943, "step": 181590 }, { "epoch": 0.7796467547633154, "grad_norm": 0.0046370062045753, "learning_rate": 2.2130765847727293e-05, "loss": 0.06834167838096619, "step": 181600 }, { "epoch": 0.7796896868533354, "grad_norm": 7.68601655960083, "learning_rate": 2.212645412760967e-05, "loss": 0.256929874420166, "step": 181610 }, { "epoch": 0.7797326189433554, "grad_norm": 0.034900128841400146, "learning_rate": 2.2122142407492048e-05, "loss": 0.2113412380218506, "step": 181620 }, { "epoch": 0.7797755510333754, "grad_norm": 0.0029798184987157583, "learning_rate": 2.2117830687374422e-05, "loss": 0.14006327390670775, "step": 181630 }, { "epoch": 0.7798184831233954, "grad_norm": 0.0022323820739984512, "learning_rate": 2.21135189672568e-05, "loss": 0.07900501489639282, "step": 181640 }, { "epoch": 0.7798614152134155, "grad_norm": 1.9272010326385498, "learning_rate": 2.2109207247139173e-05, "loss": 0.33671183586120607, "step": 181650 }, { "epoch": 0.7799043473034354, "grad_norm": 0.0035930187441408634, "learning_rate": 2.210489552702155e-05, "loss": 0.19001634120941163, "step": 181660 }, { "epoch": 0.7799472793934554, "grad_norm": 1.291403889656067, "learning_rate": 2.2100583806903928e-05, "loss": 0.39527206420898436, "step": 181670 }, { "epoch": 0.7799902114834755, "grad_norm": 0.18798917531967163, "learning_rate": 2.2096272086786305e-05, "loss": 0.2213963270187378, "step": 181680 }, { "epoch": 0.7800331435734954, "grad_norm": 4.843586444854736, "learning_rate": 2.209196036666868e-05, "loss": 0.2306389331817627, "step": 181690 }, { "epoch": 0.7800760756635154, "grad_norm": 0.06972838193178177, "learning_rate": 2.2087648646551056e-05, "loss": 0.17254316806793213, "step": 181700 }, { "epoch": 0.7801190077535355, "grad_norm": 0.00852520577609539, "learning_rate": 2.208333692643343e-05, "loss": 0.018256105482578278, "step": 181710 }, { "epoch": 0.7801619398435554, "grad_norm": 0.006862110458314419, "learning_rate": 2.2079025206315808e-05, "loss": 0.21431200504302977, "step": 181720 }, { "epoch": 0.7802048719335755, "grad_norm": 0.39576256275177, "learning_rate": 2.2074713486198185e-05, "loss": 0.31564376354217527, "step": 181730 }, { "epoch": 0.7802478040235955, "grad_norm": 3.400718927383423, "learning_rate": 2.2070401766080562e-05, "loss": 0.22937994003295897, "step": 181740 }, { "epoch": 0.7802907361136154, "grad_norm": 0.1543576568365097, "learning_rate": 2.2066090045962936e-05, "loss": 0.2346953868865967, "step": 181750 }, { "epoch": 0.7803336682036355, "grad_norm": 1.9610964059829712, "learning_rate": 2.2061778325845314e-05, "loss": 0.2245539665222168, "step": 181760 }, { "epoch": 0.7803766002936555, "grad_norm": 10.084091186523438, "learning_rate": 2.2057466605727688e-05, "loss": 0.3631103038787842, "step": 181770 }, { "epoch": 0.7804195323836755, "grad_norm": 0.3080121874809265, "learning_rate": 2.2053154885610065e-05, "loss": 0.2070967435836792, "step": 181780 }, { "epoch": 0.7804624644736955, "grad_norm": 0.010580839589238167, "learning_rate": 2.2048843165492442e-05, "loss": 0.3262079477310181, "step": 181790 }, { "epoch": 0.7805053965637155, "grad_norm": 0.000504847674164921, "learning_rate": 2.204453144537482e-05, "loss": 0.41712441444396975, "step": 181800 }, { "epoch": 0.7805483286537355, "grad_norm": 0.0038607046008110046, "learning_rate": 2.2040219725257194e-05, "loss": 0.15046488046646117, "step": 181810 }, { "epoch": 0.7805912607437555, "grad_norm": 0.027759751304984093, "learning_rate": 2.203590800513957e-05, "loss": 0.16718239784240724, "step": 181820 }, { "epoch": 0.7806341928337756, "grad_norm": 0.004910132847726345, "learning_rate": 2.2031596285021945e-05, "loss": 0.37359812259674074, "step": 181830 }, { "epoch": 0.7806771249237956, "grad_norm": 0.013715947978198528, "learning_rate": 2.2027284564904326e-05, "loss": 0.19829065799713136, "step": 181840 }, { "epoch": 0.7807200570138155, "grad_norm": 0.6874580979347229, "learning_rate": 2.20229728447867e-05, "loss": 0.15471491813659669, "step": 181850 }, { "epoch": 0.7807629891038356, "grad_norm": 0.02267182432115078, "learning_rate": 2.2018661124669077e-05, "loss": 0.12024468183517456, "step": 181860 }, { "epoch": 0.7808059211938556, "grad_norm": 0.0022773402743041515, "learning_rate": 2.201434940455145e-05, "loss": 0.1695494294166565, "step": 181870 }, { "epoch": 0.7808488532838755, "grad_norm": 1.056784749031067, "learning_rate": 2.201003768443383e-05, "loss": 0.1925033450126648, "step": 181880 }, { "epoch": 0.7808917853738956, "grad_norm": 0.05219695717096329, "learning_rate": 2.2005725964316206e-05, "loss": 0.20744595527648926, "step": 181890 }, { "epoch": 0.7809347174639156, "grad_norm": 1.8505367040634155, "learning_rate": 2.2001414244198583e-05, "loss": 0.38707144260406495, "step": 181900 }, { "epoch": 0.7809776495539356, "grad_norm": 0.035705603659152985, "learning_rate": 2.199710252408096e-05, "loss": 0.12455270290374756, "step": 181910 }, { "epoch": 0.7810205816439556, "grad_norm": 0.4392683804035187, "learning_rate": 2.1992790803963334e-05, "loss": 0.22299594879150392, "step": 181920 }, { "epoch": 0.7810635137339756, "grad_norm": 14.645524024963379, "learning_rate": 2.198847908384571e-05, "loss": 0.17089887857437133, "step": 181930 }, { "epoch": 0.7811064458239956, "grad_norm": 0.014898652210831642, "learning_rate": 2.1984167363728086e-05, "loss": 0.22336406707763673, "step": 181940 }, { "epoch": 0.7811493779140156, "grad_norm": 0.2620161771774292, "learning_rate": 2.1979855643610463e-05, "loss": 0.11998844146728516, "step": 181950 }, { "epoch": 0.7811923100040357, "grad_norm": 0.5950635075569153, "learning_rate": 2.197554392349284e-05, "loss": 0.09730787873268128, "step": 181960 }, { "epoch": 0.7812352420940556, "grad_norm": 0.036811236292123795, "learning_rate": 2.1971232203375218e-05, "loss": 0.20459918975830077, "step": 181970 }, { "epoch": 0.7812781741840756, "grad_norm": 1.3045154809951782, "learning_rate": 2.196692048325759e-05, "loss": 0.1823878765106201, "step": 181980 }, { "epoch": 0.7813211062740957, "grad_norm": 0.025310710072517395, "learning_rate": 2.196260876313997e-05, "loss": 0.11046243906021118, "step": 181990 }, { "epoch": 0.7813640383641156, "grad_norm": 0.0037307622842490673, "learning_rate": 2.1958297043022343e-05, "loss": 0.2978527307510376, "step": 182000 }, { "epoch": 0.7813640383641156, "eval_loss": 0.3825055658817291, "eval_runtime": 27.4746, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 182000 }, { "epoch": 0.7814069704541357, "grad_norm": 0.23781545460224152, "learning_rate": 2.195398532290472e-05, "loss": 0.09041933417320251, "step": 182010 }, { "epoch": 0.7814499025441557, "grad_norm": 0.013464709743857384, "learning_rate": 2.1949673602787098e-05, "loss": 0.08655680418014526, "step": 182020 }, { "epoch": 0.7814928346341756, "grad_norm": 0.0028776293620467186, "learning_rate": 2.1945361882669475e-05, "loss": 0.17070761919021607, "step": 182030 }, { "epoch": 0.7815357667241957, "grad_norm": 0.023576032370328903, "learning_rate": 2.194105016255185e-05, "loss": 0.1503755569458008, "step": 182040 }, { "epoch": 0.7815786988142157, "grad_norm": 4.2293853759765625, "learning_rate": 2.1936738442434226e-05, "loss": 0.24403553009033202, "step": 182050 }, { "epoch": 0.7816216309042356, "grad_norm": 8.121377944946289, "learning_rate": 2.19324267223166e-05, "loss": 0.396437668800354, "step": 182060 }, { "epoch": 0.7816645629942557, "grad_norm": 2.9311256408691406, "learning_rate": 2.1928115002198978e-05, "loss": 0.24542434215545655, "step": 182070 }, { "epoch": 0.7817074950842757, "grad_norm": 1.1242650747299194, "learning_rate": 2.1923803282081355e-05, "loss": 0.20290498733520507, "step": 182080 }, { "epoch": 0.7817504271742957, "grad_norm": 0.004131468012928963, "learning_rate": 2.1919491561963732e-05, "loss": 0.29813075065612793, "step": 182090 }, { "epoch": 0.7817933592643157, "grad_norm": 0.9652837514877319, "learning_rate": 2.1915179841846106e-05, "loss": 0.05880681276321411, "step": 182100 }, { "epoch": 0.7818362913543357, "grad_norm": 0.8078187704086304, "learning_rate": 2.1910868121728484e-05, "loss": 0.22447094917297364, "step": 182110 }, { "epoch": 0.7818792234443557, "grad_norm": 0.006744046695530415, "learning_rate": 2.1906556401610857e-05, "loss": 0.17717311382293702, "step": 182120 }, { "epoch": 0.7819221555343757, "grad_norm": 0.02256331779062748, "learning_rate": 2.1902244681493235e-05, "loss": 0.08970722556114197, "step": 182130 }, { "epoch": 0.7819650876243958, "grad_norm": 7.174070835113525, "learning_rate": 2.1897932961375612e-05, "loss": 0.3284448623657227, "step": 182140 }, { "epoch": 0.7820080197144157, "grad_norm": 0.005719190463423729, "learning_rate": 2.189362124125799e-05, "loss": 0.30143983364105226, "step": 182150 }, { "epoch": 0.7820509518044357, "grad_norm": 2.5708513259887695, "learning_rate": 2.1889309521140363e-05, "loss": 0.13481109142303466, "step": 182160 }, { "epoch": 0.7820938838944558, "grad_norm": 0.009437017142772675, "learning_rate": 2.188499780102274e-05, "loss": 0.07504384517669678, "step": 182170 }, { "epoch": 0.7821368159844757, "grad_norm": 0.4418356716632843, "learning_rate": 2.1880686080905115e-05, "loss": 0.18481459617614746, "step": 182180 }, { "epoch": 0.7821797480744958, "grad_norm": 0.9562405347824097, "learning_rate": 2.1876374360787495e-05, "loss": 0.2165064811706543, "step": 182190 }, { "epoch": 0.7822226801645158, "grad_norm": 1.1468602418899536, "learning_rate": 2.187206264066987e-05, "loss": 0.2354745388031006, "step": 182200 }, { "epoch": 0.7822656122545357, "grad_norm": 0.0384535938501358, "learning_rate": 2.1867750920552247e-05, "loss": 0.2891047477722168, "step": 182210 }, { "epoch": 0.7823085443445558, "grad_norm": 0.06366550922393799, "learning_rate": 2.186343920043462e-05, "loss": 0.1543177843093872, "step": 182220 }, { "epoch": 0.7823514764345758, "grad_norm": 0.36118966341018677, "learning_rate": 2.1859127480316998e-05, "loss": 0.16045292615890502, "step": 182230 }, { "epoch": 0.7823944085245957, "grad_norm": 0.000742246164008975, "learning_rate": 2.1854815760199372e-05, "loss": 0.18205565214157104, "step": 182240 }, { "epoch": 0.7824373406146158, "grad_norm": 0.21004563570022583, "learning_rate": 2.1850504040081753e-05, "loss": 0.17873560190200805, "step": 182250 }, { "epoch": 0.7824802727046358, "grad_norm": 0.27570030093193054, "learning_rate": 2.184619231996413e-05, "loss": 0.2319551467895508, "step": 182260 }, { "epoch": 0.7825232047946559, "grad_norm": 0.9478753805160522, "learning_rate": 2.1841880599846504e-05, "loss": 0.3653576374053955, "step": 182270 }, { "epoch": 0.7825661368846758, "grad_norm": 0.0772586390376091, "learning_rate": 2.183756887972888e-05, "loss": 0.256859564781189, "step": 182280 }, { "epoch": 0.7826090689746958, "grad_norm": 0.24786922335624695, "learning_rate": 2.1833257159611255e-05, "loss": 0.06968159675598144, "step": 182290 }, { "epoch": 0.7826520010647159, "grad_norm": 1.7795507907867432, "learning_rate": 2.1828945439493633e-05, "loss": 0.3153455018997192, "step": 182300 }, { "epoch": 0.7826949331547358, "grad_norm": 4.49236536026001, "learning_rate": 2.182463371937601e-05, "loss": 0.3862607002258301, "step": 182310 }, { "epoch": 0.7827378652447559, "grad_norm": 1.5710570812225342, "learning_rate": 2.1820321999258387e-05, "loss": 0.1828963041305542, "step": 182320 }, { "epoch": 0.7827807973347759, "grad_norm": 0.00569881172850728, "learning_rate": 2.181601027914076e-05, "loss": 0.0007293551228940487, "step": 182330 }, { "epoch": 0.7828237294247958, "grad_norm": 0.00949194747954607, "learning_rate": 2.181169855902314e-05, "loss": 0.16933388710021974, "step": 182340 }, { "epoch": 0.7828666615148159, "grad_norm": 0.008789801970124245, "learning_rate": 2.1807386838905513e-05, "loss": 0.23258440494537352, "step": 182350 }, { "epoch": 0.7829095936048359, "grad_norm": 2.179302453994751, "learning_rate": 2.180307511878789e-05, "loss": 0.19624111652374268, "step": 182360 }, { "epoch": 0.7829525256948558, "grad_norm": 4.362630844116211, "learning_rate": 2.1798763398670267e-05, "loss": 0.23598828315734863, "step": 182370 }, { "epoch": 0.7829954577848759, "grad_norm": 0.09223562479019165, "learning_rate": 2.1794451678552645e-05, "loss": 0.0645039439201355, "step": 182380 }, { "epoch": 0.7830383898748959, "grad_norm": 0.004430851899087429, "learning_rate": 2.179013995843502e-05, "loss": 0.1624495267868042, "step": 182390 }, { "epoch": 0.7830813219649159, "grad_norm": 0.00718493340536952, "learning_rate": 2.1785828238317396e-05, "loss": 0.20069551467895508, "step": 182400 }, { "epoch": 0.7831242540549359, "grad_norm": 3.395489454269409, "learning_rate": 2.178151651819977e-05, "loss": 0.17186505794525148, "step": 182410 }, { "epoch": 0.783167186144956, "grad_norm": 1.021973729133606, "learning_rate": 2.1777204798082147e-05, "loss": 0.1961965322494507, "step": 182420 }, { "epoch": 0.7832101182349759, "grad_norm": 0.0013628596207126975, "learning_rate": 2.1772893077964525e-05, "loss": 0.1489197015762329, "step": 182430 }, { "epoch": 0.7832530503249959, "grad_norm": 0.028677189722657204, "learning_rate": 2.1768581357846902e-05, "loss": 0.2267765998840332, "step": 182440 }, { "epoch": 0.783295982415016, "grad_norm": 1.3386242389678955, "learning_rate": 2.1764269637729276e-05, "loss": 0.35156545639038084, "step": 182450 }, { "epoch": 0.7833389145050359, "grad_norm": 0.007181175053119659, "learning_rate": 2.1759957917611653e-05, "loss": 0.2567557096481323, "step": 182460 }, { "epoch": 0.7833818465950559, "grad_norm": 0.3704771101474762, "learning_rate": 2.1755646197494027e-05, "loss": 0.0791439414024353, "step": 182470 }, { "epoch": 0.783424778685076, "grad_norm": 0.7793884873390198, "learning_rate": 2.1751334477376405e-05, "loss": 0.14850038290023804, "step": 182480 }, { "epoch": 0.7834677107750959, "grad_norm": 0.00027104900800623, "learning_rate": 2.1747022757258782e-05, "loss": 0.08147647380828857, "step": 182490 }, { "epoch": 0.783510642865116, "grad_norm": 0.00521089369431138, "learning_rate": 2.174271103714116e-05, "loss": 0.08911651372909546, "step": 182500 }, { "epoch": 0.783553574955136, "grad_norm": 0.01887751929461956, "learning_rate": 2.1738399317023533e-05, "loss": 0.304192852973938, "step": 182510 }, { "epoch": 0.7835965070451559, "grad_norm": 0.02301798388361931, "learning_rate": 2.173408759690591e-05, "loss": 0.22890405654907225, "step": 182520 }, { "epoch": 0.783639439135176, "grad_norm": 0.018653536215424538, "learning_rate": 2.1729775876788284e-05, "loss": 0.19519143104553222, "step": 182530 }, { "epoch": 0.783682371225196, "grad_norm": 0.030346734449267387, "learning_rate": 2.1725464156670662e-05, "loss": 0.41370511054992676, "step": 182540 }, { "epoch": 0.783725303315216, "grad_norm": 0.8571396470069885, "learning_rate": 2.172115243655304e-05, "loss": 0.38896913528442384, "step": 182550 }, { "epoch": 0.783768235405236, "grad_norm": 0.00412337388843298, "learning_rate": 2.1716840716435417e-05, "loss": 0.10326659679412842, "step": 182560 }, { "epoch": 0.783811167495256, "grad_norm": 0.6427186727523804, "learning_rate": 2.171252899631779e-05, "loss": 0.25321714878082274, "step": 182570 }, { "epoch": 0.783854099585276, "grad_norm": 0.001954421168193221, "learning_rate": 2.1708217276200168e-05, "loss": 0.05052712559700012, "step": 182580 }, { "epoch": 0.783897031675296, "grad_norm": 1.9638227224349976, "learning_rate": 2.1703905556082542e-05, "loss": 0.4849149227142334, "step": 182590 }, { "epoch": 0.783939963765316, "grad_norm": 0.020015276968479156, "learning_rate": 2.1699593835964922e-05, "loss": 0.04313889741897583, "step": 182600 }, { "epoch": 0.783982895855336, "grad_norm": 0.020334254950284958, "learning_rate": 2.16952821158473e-05, "loss": 0.15066235065460204, "step": 182610 }, { "epoch": 0.784025827945356, "grad_norm": 6.323507785797119, "learning_rate": 2.1690970395729674e-05, "loss": 0.1465320110321045, "step": 182620 }, { "epoch": 0.7840687600353761, "grad_norm": 0.030544867739081383, "learning_rate": 2.168665867561205e-05, "loss": 0.06789618730545044, "step": 182630 }, { "epoch": 0.784111692125396, "grad_norm": 0.03058551624417305, "learning_rate": 2.1682346955494425e-05, "loss": 0.20928442478179932, "step": 182640 }, { "epoch": 0.784154624215416, "grad_norm": 0.4168550372123718, "learning_rate": 2.1678035235376802e-05, "loss": 0.2982338428497314, "step": 182650 }, { "epoch": 0.7841975563054361, "grad_norm": 12.178466796875, "learning_rate": 2.167372351525918e-05, "loss": 0.27531182765960693, "step": 182660 }, { "epoch": 0.784240488395456, "grad_norm": 0.009957391768693924, "learning_rate": 2.1669411795141557e-05, "loss": 0.2303466320037842, "step": 182670 }, { "epoch": 0.7842834204854761, "grad_norm": 0.0028455080464482307, "learning_rate": 2.166510007502393e-05, "loss": 0.17376034259796141, "step": 182680 }, { "epoch": 0.7843263525754961, "grad_norm": 8.281001091003418, "learning_rate": 2.166078835490631e-05, "loss": 0.18689534664154053, "step": 182690 }, { "epoch": 0.7843692846655161, "grad_norm": 0.005231133662164211, "learning_rate": 2.1656476634788682e-05, "loss": 0.2655553102493286, "step": 182700 }, { "epoch": 0.7844122167555361, "grad_norm": 5.244409561157227, "learning_rate": 2.165216491467106e-05, "loss": 0.32235958576202395, "step": 182710 }, { "epoch": 0.7844551488455561, "grad_norm": 0.15298572182655334, "learning_rate": 2.1647853194553437e-05, "loss": 0.36844849586486816, "step": 182720 }, { "epoch": 0.7844980809355762, "grad_norm": 0.020973147824406624, "learning_rate": 2.1643541474435814e-05, "loss": 0.2034377336502075, "step": 182730 }, { "epoch": 0.7845410130255961, "grad_norm": 2.0955288410186768, "learning_rate": 2.163922975431819e-05, "loss": 0.3749516487121582, "step": 182740 }, { "epoch": 0.7845839451156161, "grad_norm": 0.053335897624492645, "learning_rate": 2.1634918034200566e-05, "loss": 0.22016558647155762, "step": 182750 }, { "epoch": 0.7846268772056362, "grad_norm": 10.606240272521973, "learning_rate": 2.163060631408294e-05, "loss": 0.2146967649459839, "step": 182760 }, { "epoch": 0.7846698092956561, "grad_norm": 1.1345289945602417, "learning_rate": 2.1626294593965317e-05, "loss": 0.4405350685119629, "step": 182770 }, { "epoch": 0.7847127413856761, "grad_norm": 0.028475617989897728, "learning_rate": 2.1621982873847694e-05, "loss": 0.1836310863494873, "step": 182780 }, { "epoch": 0.7847556734756962, "grad_norm": 0.7680911421775818, "learning_rate": 2.161767115373007e-05, "loss": 0.19354742765426636, "step": 182790 }, { "epoch": 0.7847986055657161, "grad_norm": 0.04010359197854996, "learning_rate": 2.1613359433612446e-05, "loss": 0.2988368272781372, "step": 182800 }, { "epoch": 0.7848415376557362, "grad_norm": 0.0036592287942767143, "learning_rate": 2.1609047713494823e-05, "loss": 0.17033002376556397, "step": 182810 }, { "epoch": 0.7848844697457562, "grad_norm": 0.014441991224884987, "learning_rate": 2.1604735993377197e-05, "loss": 0.11610093116760253, "step": 182820 }, { "epoch": 0.7849274018357761, "grad_norm": 0.3196814954280853, "learning_rate": 2.1600424273259574e-05, "loss": 0.23743658065795897, "step": 182830 }, { "epoch": 0.7849703339257962, "grad_norm": 1.738743782043457, "learning_rate": 2.159611255314195e-05, "loss": 0.36977529525756836, "step": 182840 }, { "epoch": 0.7850132660158162, "grad_norm": 0.5812845230102539, "learning_rate": 2.159180083302433e-05, "loss": 0.25585949420928955, "step": 182850 }, { "epoch": 0.7850561981058362, "grad_norm": 0.008618000894784927, "learning_rate": 2.1587489112906703e-05, "loss": 0.17306852340698242, "step": 182860 }, { "epoch": 0.7850991301958562, "grad_norm": 3.9255237579345703, "learning_rate": 2.158317739278908e-05, "loss": 0.20480165481567383, "step": 182870 }, { "epoch": 0.7851420622858762, "grad_norm": 0.004135298077017069, "learning_rate": 2.1578865672671454e-05, "loss": 0.37045023441314695, "step": 182880 }, { "epoch": 0.7851849943758962, "grad_norm": 0.5705690979957581, "learning_rate": 2.157455395255383e-05, "loss": 0.2988840818405151, "step": 182890 }, { "epoch": 0.7852279264659162, "grad_norm": 0.005285890772938728, "learning_rate": 2.157024223243621e-05, "loss": 0.12949843406677247, "step": 182900 }, { "epoch": 0.7852708585559363, "grad_norm": 0.09740712493658066, "learning_rate": 2.1565930512318586e-05, "loss": 0.05606417059898376, "step": 182910 }, { "epoch": 0.7853137906459562, "grad_norm": 0.09031404554843903, "learning_rate": 2.156161879220096e-05, "loss": 0.003790769726037979, "step": 182920 }, { "epoch": 0.7853567227359762, "grad_norm": 0.2656407356262207, "learning_rate": 2.1557307072083338e-05, "loss": 0.22708549499511718, "step": 182930 }, { "epoch": 0.7853996548259963, "grad_norm": 0.6343486309051514, "learning_rate": 2.155299535196571e-05, "loss": 0.14608529806137086, "step": 182940 }, { "epoch": 0.7854425869160162, "grad_norm": 0.0007862726924940944, "learning_rate": 2.1548683631848092e-05, "loss": 0.08486260771751404, "step": 182950 }, { "epoch": 0.7854855190060362, "grad_norm": 0.04844307526946068, "learning_rate": 2.1544371911730466e-05, "loss": 0.33439657688140867, "step": 182960 }, { "epoch": 0.7855284510960563, "grad_norm": 0.0017822891240939498, "learning_rate": 2.1540060191612844e-05, "loss": 0.04367310702800751, "step": 182970 }, { "epoch": 0.7855713831860762, "grad_norm": 0.01023387722671032, "learning_rate": 2.153574847149522e-05, "loss": 0.30081963539123535, "step": 182980 }, { "epoch": 0.7856143152760963, "grad_norm": 0.15752474963665009, "learning_rate": 2.1531436751377595e-05, "loss": 0.10231451988220215, "step": 182990 }, { "epoch": 0.7856572473661163, "grad_norm": 0.0028393957763910294, "learning_rate": 2.1527125031259972e-05, "loss": 0.18005791902542115, "step": 183000 }, { "epoch": 0.7856572473661163, "eval_loss": 0.3797743618488312, "eval_runtime": 27.4618, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 183000 }, { "epoch": 0.7857001794561362, "grad_norm": 0.007138405926525593, "learning_rate": 2.152281331114235e-05, "loss": 0.2358877420425415, "step": 183010 }, { "epoch": 0.7857431115461563, "grad_norm": 0.0003113711136393249, "learning_rate": 2.1518501591024727e-05, "loss": 0.1500988006591797, "step": 183020 }, { "epoch": 0.7857860436361763, "grad_norm": 0.007665345445275307, "learning_rate": 2.15141898709071e-05, "loss": 0.14550169706344604, "step": 183030 }, { "epoch": 0.7858289757261963, "grad_norm": 0.008687633089721203, "learning_rate": 2.1509878150789478e-05, "loss": 0.2093245267868042, "step": 183040 }, { "epoch": 0.7858719078162163, "grad_norm": 0.0004379673337098211, "learning_rate": 2.1505566430671852e-05, "loss": 0.21798477172851563, "step": 183050 }, { "epoch": 0.7859148399062363, "grad_norm": 0.05952138453722, "learning_rate": 2.150125471055423e-05, "loss": 0.2020576000213623, "step": 183060 }, { "epoch": 0.7859577719962563, "grad_norm": 3.7258222103118896, "learning_rate": 2.1496942990436607e-05, "loss": 0.3716374635696411, "step": 183070 }, { "epoch": 0.7860007040862763, "grad_norm": 0.0039495136588811874, "learning_rate": 2.1492631270318984e-05, "loss": 0.1307750701904297, "step": 183080 }, { "epoch": 0.7860436361762964, "grad_norm": 0.09609965980052948, "learning_rate": 2.1488319550201358e-05, "loss": 0.24323813915252684, "step": 183090 }, { "epoch": 0.7860865682663163, "grad_norm": 0.009046729654073715, "learning_rate": 2.1484007830083735e-05, "loss": 0.10413626432418824, "step": 183100 }, { "epoch": 0.7861295003563363, "grad_norm": 0.0018896028632298112, "learning_rate": 2.147969610996611e-05, "loss": 0.27177743911743163, "step": 183110 }, { "epoch": 0.7861724324463564, "grad_norm": 3.522463321685791, "learning_rate": 2.1475384389848487e-05, "loss": 0.24144039154052735, "step": 183120 }, { "epoch": 0.7862153645363763, "grad_norm": 11.466240882873535, "learning_rate": 2.1471072669730864e-05, "loss": 0.33338441848754885, "step": 183130 }, { "epoch": 0.7862582966263963, "grad_norm": 2.057114362716675, "learning_rate": 2.146676094961324e-05, "loss": 0.45223298072814944, "step": 183140 }, { "epoch": 0.7863012287164164, "grad_norm": 0.05960441380739212, "learning_rate": 2.1462449229495615e-05, "loss": 0.22052621841430664, "step": 183150 }, { "epoch": 0.7863441608064364, "grad_norm": 1.7557612657546997, "learning_rate": 2.1458137509377993e-05, "loss": 0.2731237173080444, "step": 183160 }, { "epoch": 0.7863870928964564, "grad_norm": 1.4419736862182617, "learning_rate": 2.1453825789260367e-05, "loss": 0.11441911458969116, "step": 183170 }, { "epoch": 0.7864300249864764, "grad_norm": 0.00411709351465106, "learning_rate": 2.1449514069142744e-05, "loss": 0.033696231245994565, "step": 183180 }, { "epoch": 0.7864729570764964, "grad_norm": 0.04816296696662903, "learning_rate": 2.144520234902512e-05, "loss": 0.16660306453704835, "step": 183190 }, { "epoch": 0.7865158891665164, "grad_norm": 0.9707366228103638, "learning_rate": 2.14408906289075e-05, "loss": 0.24993407726287842, "step": 183200 }, { "epoch": 0.7865588212565364, "grad_norm": 0.6969446539878845, "learning_rate": 2.1436578908789873e-05, "loss": 0.13606752157211305, "step": 183210 }, { "epoch": 0.7866017533465565, "grad_norm": 0.27022868394851685, "learning_rate": 2.143226718867225e-05, "loss": 0.13606159687042235, "step": 183220 }, { "epoch": 0.7866446854365764, "grad_norm": 1.8256360292434692, "learning_rate": 2.1427955468554624e-05, "loss": 0.17958106994628906, "step": 183230 }, { "epoch": 0.7866876175265964, "grad_norm": 0.04855459928512573, "learning_rate": 2.1423643748437e-05, "loss": 0.1294558048248291, "step": 183240 }, { "epoch": 0.7867305496166165, "grad_norm": 0.9606425166130066, "learning_rate": 2.141933202831938e-05, "loss": 0.21541991233825683, "step": 183250 }, { "epoch": 0.7867734817066364, "grad_norm": 0.0012368131428956985, "learning_rate": 2.1415020308201756e-05, "loss": 0.09298962354660034, "step": 183260 }, { "epoch": 0.7868164137966565, "grad_norm": 0.004385500680655241, "learning_rate": 2.141070858808413e-05, "loss": 0.11553781032562256, "step": 183270 }, { "epoch": 0.7868593458866765, "grad_norm": 0.003434704151004553, "learning_rate": 2.1406396867966507e-05, "loss": 0.16233421564102174, "step": 183280 }, { "epoch": 0.7869022779766964, "grad_norm": 0.028390705585479736, "learning_rate": 2.140208514784888e-05, "loss": 0.19993984699249268, "step": 183290 }, { "epoch": 0.7869452100667165, "grad_norm": 4.946071624755859, "learning_rate": 2.139777342773126e-05, "loss": 0.1852961778640747, "step": 183300 }, { "epoch": 0.7869881421567365, "grad_norm": 0.0032075869385153055, "learning_rate": 2.1393461707613636e-05, "loss": 0.1300378918647766, "step": 183310 }, { "epoch": 0.7870310742467564, "grad_norm": 0.0008391632582060993, "learning_rate": 2.1389149987496013e-05, "loss": 0.19580576419830323, "step": 183320 }, { "epoch": 0.7870740063367765, "grad_norm": 0.006768613122403622, "learning_rate": 2.1384838267378387e-05, "loss": 0.3424041271209717, "step": 183330 }, { "epoch": 0.7871169384267965, "grad_norm": 2.0839383602142334, "learning_rate": 2.1380526547260765e-05, "loss": 0.1107265830039978, "step": 183340 }, { "epoch": 0.7871598705168165, "grad_norm": 1.0905482769012451, "learning_rate": 2.1376214827143142e-05, "loss": 0.2123729705810547, "step": 183350 }, { "epoch": 0.7872028026068365, "grad_norm": 0.0029551531188189983, "learning_rate": 2.137190310702552e-05, "loss": 0.08570262193679809, "step": 183360 }, { "epoch": 0.7872457346968565, "grad_norm": 2.3354883193969727, "learning_rate": 2.1367591386907897e-05, "loss": 0.3122261047363281, "step": 183370 }, { "epoch": 0.7872886667868765, "grad_norm": 0.8970825672149658, "learning_rate": 2.136327966679027e-05, "loss": 0.1956690549850464, "step": 183380 }, { "epoch": 0.7873315988768965, "grad_norm": 5.209467887878418, "learning_rate": 2.1358967946672648e-05, "loss": 0.22057158946990968, "step": 183390 }, { "epoch": 0.7873745309669166, "grad_norm": 0.0026834437157958746, "learning_rate": 2.1354656226555022e-05, "loss": 0.13024570941925048, "step": 183400 }, { "epoch": 0.7874174630569365, "grad_norm": 0.0007249237387441099, "learning_rate": 2.13503445064374e-05, "loss": 0.08153098225593566, "step": 183410 }, { "epoch": 0.7874603951469565, "grad_norm": 1.7533392906188965, "learning_rate": 2.1346032786319777e-05, "loss": 0.275898814201355, "step": 183420 }, { "epoch": 0.7875033272369766, "grad_norm": 1.7578963041305542, "learning_rate": 2.1341721066202154e-05, "loss": 0.3079784393310547, "step": 183430 }, { "epoch": 0.7875462593269965, "grad_norm": 0.024534443393349648, "learning_rate": 2.1337409346084528e-05, "loss": 0.216939377784729, "step": 183440 }, { "epoch": 0.7875891914170166, "grad_norm": 0.002587397349998355, "learning_rate": 2.1333097625966905e-05, "loss": 0.11361846923828126, "step": 183450 }, { "epoch": 0.7876321235070366, "grad_norm": 0.0038166409358382225, "learning_rate": 2.132878590584928e-05, "loss": 0.07297162413597107, "step": 183460 }, { "epoch": 0.7876750555970565, "grad_norm": 0.0017480719834566116, "learning_rate": 2.1324474185731656e-05, "loss": 0.11760724782943725, "step": 183470 }, { "epoch": 0.7877179876870766, "grad_norm": 93.22285461425781, "learning_rate": 2.1320162465614034e-05, "loss": 0.36112000942230227, "step": 183480 }, { "epoch": 0.7877609197770966, "grad_norm": 0.9116643667221069, "learning_rate": 2.131585074549641e-05, "loss": 0.08869249820709228, "step": 183490 }, { "epoch": 0.7878038518671165, "grad_norm": 1.2129038572311401, "learning_rate": 2.1311539025378785e-05, "loss": 0.2812972545623779, "step": 183500 }, { "epoch": 0.7878467839571366, "grad_norm": 0.002802674425765872, "learning_rate": 2.1307227305261162e-05, "loss": 0.17642027139663696, "step": 183510 }, { "epoch": 0.7878897160471566, "grad_norm": 0.13792453706264496, "learning_rate": 2.1302915585143536e-05, "loss": 0.43254504203796384, "step": 183520 }, { "epoch": 0.7879326481371766, "grad_norm": 0.0027651230338960886, "learning_rate": 2.1298603865025914e-05, "loss": 0.09086210131645203, "step": 183530 }, { "epoch": 0.7879755802271966, "grad_norm": 0.8143436908721924, "learning_rate": 2.129429214490829e-05, "loss": 0.22465338706970214, "step": 183540 }, { "epoch": 0.7880185123172166, "grad_norm": 1.933145523071289, "learning_rate": 2.128998042479067e-05, "loss": 0.2547163486480713, "step": 183550 }, { "epoch": 0.7880614444072366, "grad_norm": 7.512142181396484, "learning_rate": 2.1285668704673042e-05, "loss": 0.5128835201263428, "step": 183560 }, { "epoch": 0.7881043764972566, "grad_norm": 5.832769870758057, "learning_rate": 2.128135698455542e-05, "loss": 0.16420665979385377, "step": 183570 }, { "epoch": 0.7881473085872767, "grad_norm": 1.8669154644012451, "learning_rate": 2.1277045264437794e-05, "loss": 0.41615657806396483, "step": 183580 }, { "epoch": 0.7881902406772967, "grad_norm": 0.020469048991799355, "learning_rate": 2.127273354432017e-05, "loss": 0.14705488681793213, "step": 183590 }, { "epoch": 0.7882331727673166, "grad_norm": 1.6848324537277222, "learning_rate": 2.126842182420255e-05, "loss": 0.34337496757507324, "step": 183600 }, { "epoch": 0.7882761048573367, "grad_norm": 0.003964392002671957, "learning_rate": 2.1264110104084926e-05, "loss": 0.2729069709777832, "step": 183610 }, { "epoch": 0.7883190369473567, "grad_norm": 2.254751205444336, "learning_rate": 2.12597983839673e-05, "loss": 0.2700617790222168, "step": 183620 }, { "epoch": 0.7883619690373767, "grad_norm": 0.0009603060898371041, "learning_rate": 2.1255486663849677e-05, "loss": 0.1181606411933899, "step": 183630 }, { "epoch": 0.7884049011273967, "grad_norm": 0.055945176631212234, "learning_rate": 2.125117494373205e-05, "loss": 0.20723607540130615, "step": 183640 }, { "epoch": 0.7884478332174167, "grad_norm": 0.18929432332515717, "learning_rate": 2.1246863223614428e-05, "loss": 0.05493208169937134, "step": 183650 }, { "epoch": 0.7884907653074367, "grad_norm": 0.05633874610066414, "learning_rate": 2.1242551503496806e-05, "loss": 0.1731419086456299, "step": 183660 }, { "epoch": 0.7885336973974567, "grad_norm": 0.02658155746757984, "learning_rate": 2.1238239783379183e-05, "loss": 0.22599170207977295, "step": 183670 }, { "epoch": 0.7885766294874768, "grad_norm": 0.3270963132381439, "learning_rate": 2.1233928063261557e-05, "loss": 0.09019438028335572, "step": 183680 }, { "epoch": 0.7886195615774967, "grad_norm": 0.005451427306979895, "learning_rate": 2.1229616343143934e-05, "loss": 0.11634416580200195, "step": 183690 }, { "epoch": 0.7886624936675167, "grad_norm": 1.4958854913711548, "learning_rate": 2.122530462302631e-05, "loss": 0.192735755443573, "step": 183700 }, { "epoch": 0.7887054257575368, "grad_norm": 0.0020073908381164074, "learning_rate": 2.122099290290869e-05, "loss": 0.05960498452186584, "step": 183710 }, { "epoch": 0.7887483578475567, "grad_norm": 1.2685023546218872, "learning_rate": 2.1216681182791066e-05, "loss": 0.23755450248718263, "step": 183720 }, { "epoch": 0.7887912899375767, "grad_norm": 0.039844296872615814, "learning_rate": 2.121236946267344e-05, "loss": 0.20982720851898193, "step": 183730 }, { "epoch": 0.7888342220275968, "grad_norm": 0.04778163507580757, "learning_rate": 2.1208057742555818e-05, "loss": 0.17580852508544922, "step": 183740 }, { "epoch": 0.7888771541176167, "grad_norm": 1.0229040384292603, "learning_rate": 2.120374602243819e-05, "loss": 0.30660076141357423, "step": 183750 }, { "epoch": 0.7889200862076368, "grad_norm": 0.021670211106538773, "learning_rate": 2.119943430232057e-05, "loss": 0.08490864038467408, "step": 183760 }, { "epoch": 0.7889630182976568, "grad_norm": 7.117254426702857e-05, "learning_rate": 2.1195122582202946e-05, "loss": 0.10266214609146118, "step": 183770 }, { "epoch": 0.7890059503876767, "grad_norm": 0.0019227155717089772, "learning_rate": 2.1190810862085324e-05, "loss": 0.008311575651168824, "step": 183780 }, { "epoch": 0.7890488824776968, "grad_norm": 8.819829940795898, "learning_rate": 2.1186499141967698e-05, "loss": 0.23230252265930176, "step": 183790 }, { "epoch": 0.7890918145677168, "grad_norm": 1.0761563777923584, "learning_rate": 2.1182187421850075e-05, "loss": 0.26760454177856446, "step": 183800 }, { "epoch": 0.7891347466577368, "grad_norm": 1.6653600931167603, "learning_rate": 2.117787570173245e-05, "loss": 0.2435081720352173, "step": 183810 }, { "epoch": 0.7891776787477568, "grad_norm": 3.4551095962524414, "learning_rate": 2.1173563981614826e-05, "loss": 0.425843334197998, "step": 183820 }, { "epoch": 0.7892206108377768, "grad_norm": 1.0570393800735474, "learning_rate": 2.1169252261497204e-05, "loss": 0.23131773471832276, "step": 183830 }, { "epoch": 0.7892635429277968, "grad_norm": 0.0038749901577830315, "learning_rate": 2.116494054137958e-05, "loss": 0.26228177547454834, "step": 183840 }, { "epoch": 0.7893064750178168, "grad_norm": 0.29532575607299805, "learning_rate": 2.1160628821261955e-05, "loss": 0.28396048545837405, "step": 183850 }, { "epoch": 0.7893494071078369, "grad_norm": 0.40147167444229126, "learning_rate": 2.1156317101144332e-05, "loss": 0.16280757188796996, "step": 183860 }, { "epoch": 0.7893923391978568, "grad_norm": 0.008659133687615395, "learning_rate": 2.1152005381026706e-05, "loss": 0.06703677773475647, "step": 183870 }, { "epoch": 0.7894352712878768, "grad_norm": 5.348660469055176, "learning_rate": 2.1147693660909083e-05, "loss": 0.14614660739898683, "step": 183880 }, { "epoch": 0.7894782033778969, "grad_norm": 2.7472760677337646, "learning_rate": 2.114338194079146e-05, "loss": 0.27347257137298586, "step": 183890 }, { "epoch": 0.7895211354679168, "grad_norm": 1.1811401844024658, "learning_rate": 2.1139070220673838e-05, "loss": 0.21110949516296387, "step": 183900 }, { "epoch": 0.7895640675579368, "grad_norm": 0.01332367118448019, "learning_rate": 2.1134758500556212e-05, "loss": 0.03304066956043243, "step": 183910 }, { "epoch": 0.7896069996479569, "grad_norm": 0.013469953089952469, "learning_rate": 2.113044678043859e-05, "loss": 0.1550193428993225, "step": 183920 }, { "epoch": 0.7896499317379768, "grad_norm": 0.001539850840345025, "learning_rate": 2.1126135060320963e-05, "loss": 0.10376846790313721, "step": 183930 }, { "epoch": 0.7896928638279969, "grad_norm": 0.7918428182601929, "learning_rate": 2.112182334020334e-05, "loss": 0.1784311294555664, "step": 183940 }, { "epoch": 0.7897357959180169, "grad_norm": 0.0009629686828702688, "learning_rate": 2.1117511620085718e-05, "loss": 0.1645200252532959, "step": 183950 }, { "epoch": 0.7897787280080368, "grad_norm": 1.0543266534805298, "learning_rate": 2.1113199899968095e-05, "loss": 0.14095029830932618, "step": 183960 }, { "epoch": 0.7898216600980569, "grad_norm": 4.017325401306152, "learning_rate": 2.110888817985047e-05, "loss": 0.24089696407318115, "step": 183970 }, { "epoch": 0.7898645921880769, "grad_norm": 0.9489465355873108, "learning_rate": 2.1104576459732847e-05, "loss": 0.5302737712860107, "step": 183980 }, { "epoch": 0.7899075242780969, "grad_norm": 0.004499488044530153, "learning_rate": 2.110026473961522e-05, "loss": 0.2945434808731079, "step": 183990 }, { "epoch": 0.7899504563681169, "grad_norm": 0.016302023082971573, "learning_rate": 2.1095953019497598e-05, "loss": 0.19677678346633912, "step": 184000 }, { "epoch": 0.7899504563681169, "eval_loss": 0.3860042691230774, "eval_runtime": 27.41, "eval_samples_per_second": 3.648, "eval_steps_per_second": 3.648, "step": 184000 }, { "epoch": 0.7899933884581369, "grad_norm": 0.002552248304709792, "learning_rate": 2.1091641299379975e-05, "loss": 0.1127932071685791, "step": 184010 }, { "epoch": 0.790036320548157, "grad_norm": 2.2927207946777344, "learning_rate": 2.1087329579262353e-05, "loss": 0.3413818359375, "step": 184020 }, { "epoch": 0.7900792526381769, "grad_norm": 2.3446779251098633, "learning_rate": 2.1083017859144727e-05, "loss": 0.1625274896621704, "step": 184030 }, { "epoch": 0.790122184728197, "grad_norm": 0.10824047774076462, "learning_rate": 2.1078706139027104e-05, "loss": 0.20035688877105712, "step": 184040 }, { "epoch": 0.790165116818217, "grad_norm": 0.07350625097751617, "learning_rate": 2.1074394418909478e-05, "loss": 0.26237874031066893, "step": 184050 }, { "epoch": 0.7902080489082369, "grad_norm": 0.0038361994083970785, "learning_rate": 2.107008269879186e-05, "loss": 0.12209371328353882, "step": 184060 }, { "epoch": 0.790250980998257, "grad_norm": 0.0032284085173159838, "learning_rate": 2.1065770978674236e-05, "loss": 0.20432050228118898, "step": 184070 }, { "epoch": 0.790293913088277, "grad_norm": 0.007364768069237471, "learning_rate": 2.106145925855661e-05, "loss": 0.05040127635002136, "step": 184080 }, { "epoch": 0.7903368451782969, "grad_norm": 0.07383093982934952, "learning_rate": 2.1057147538438987e-05, "loss": 0.08585337996482849, "step": 184090 }, { "epoch": 0.790379777268317, "grad_norm": 2.5571935176849365, "learning_rate": 2.105283581832136e-05, "loss": 0.13882994651794434, "step": 184100 }, { "epoch": 0.790422709358337, "grad_norm": 0.917072594165802, "learning_rate": 2.104852409820374e-05, "loss": 0.4125373363494873, "step": 184110 }, { "epoch": 0.790465641448357, "grad_norm": 0.0016729761846363544, "learning_rate": 2.1044212378086116e-05, "loss": 0.38989236354827883, "step": 184120 }, { "epoch": 0.790508573538377, "grad_norm": 0.15144653618335724, "learning_rate": 2.1039900657968493e-05, "loss": 0.23984091281890868, "step": 184130 }, { "epoch": 0.790551505628397, "grad_norm": 0.026266876608133316, "learning_rate": 2.1035588937850867e-05, "loss": 0.2721916437149048, "step": 184140 }, { "epoch": 0.790594437718417, "grad_norm": 0.0340094156563282, "learning_rate": 2.1031277217733245e-05, "loss": 0.10305203199386596, "step": 184150 }, { "epoch": 0.790637369808437, "grad_norm": 0.008664273656904697, "learning_rate": 2.102696549761562e-05, "loss": 0.2137474536895752, "step": 184160 }, { "epoch": 0.7906803018984571, "grad_norm": 0.029949234798550606, "learning_rate": 2.1022653777497996e-05, "loss": 0.11488361358642578, "step": 184170 }, { "epoch": 0.790723233988477, "grad_norm": 0.003333107102662325, "learning_rate": 2.1018342057380373e-05, "loss": 0.21800367832183837, "step": 184180 }, { "epoch": 0.790766166078497, "grad_norm": 0.007137620355933905, "learning_rate": 2.101403033726275e-05, "loss": 0.2649306058883667, "step": 184190 }, { "epoch": 0.7908090981685171, "grad_norm": 1.8278257846832275, "learning_rate": 2.1009718617145125e-05, "loss": 0.1403980016708374, "step": 184200 }, { "epoch": 0.790852030258537, "grad_norm": 0.0005646710051223636, "learning_rate": 2.1005406897027502e-05, "loss": 0.04360083937644958, "step": 184210 }, { "epoch": 0.790894962348557, "grad_norm": 0.802178680896759, "learning_rate": 2.1001095176909876e-05, "loss": 0.3105415105819702, "step": 184220 }, { "epoch": 0.7909378944385771, "grad_norm": 1.269438624382019, "learning_rate": 2.0996783456792253e-05, "loss": 0.14836931228637695, "step": 184230 }, { "epoch": 0.790980826528597, "grad_norm": 1.6588866710662842, "learning_rate": 2.099247173667463e-05, "loss": 0.5411814212799072, "step": 184240 }, { "epoch": 0.7910237586186171, "grad_norm": 3.9255900382995605, "learning_rate": 2.0988160016557008e-05, "loss": 0.03738404214382172, "step": 184250 }, { "epoch": 0.7910666907086371, "grad_norm": 2.704301595687866, "learning_rate": 2.0983848296439382e-05, "loss": 0.37105047702789307, "step": 184260 }, { "epoch": 0.791109622798657, "grad_norm": 0.024229900911450386, "learning_rate": 2.097953657632176e-05, "loss": 0.04928521811962128, "step": 184270 }, { "epoch": 0.7911525548886771, "grad_norm": 0.053558625280857086, "learning_rate": 2.0975224856204133e-05, "loss": 0.08247345685958862, "step": 184280 }, { "epoch": 0.7911954869786971, "grad_norm": 0.04510665684938431, "learning_rate": 2.097091313608651e-05, "loss": 0.08157772421836854, "step": 184290 }, { "epoch": 0.7912384190687171, "grad_norm": 0.0015568624949082732, "learning_rate": 2.0966601415968888e-05, "loss": 0.07714862227439881, "step": 184300 }, { "epoch": 0.7912813511587371, "grad_norm": 0.010974083095788956, "learning_rate": 2.0962289695851265e-05, "loss": 0.1628153681755066, "step": 184310 }, { "epoch": 0.7913242832487571, "grad_norm": 1.7074087858200073, "learning_rate": 2.095797797573364e-05, "loss": 0.1585795760154724, "step": 184320 }, { "epoch": 0.7913672153387771, "grad_norm": 0.026802221313118935, "learning_rate": 2.0953666255616016e-05, "loss": 0.08797412514686584, "step": 184330 }, { "epoch": 0.7914101474287971, "grad_norm": 1.5584458112716675, "learning_rate": 2.094935453549839e-05, "loss": 0.13183501958847046, "step": 184340 }, { "epoch": 0.7914530795188172, "grad_norm": 0.004854326136410236, "learning_rate": 2.0945042815380768e-05, "loss": 0.2509061574935913, "step": 184350 }, { "epoch": 0.7914960116088371, "grad_norm": 2.4723963737487793, "learning_rate": 2.0940731095263145e-05, "loss": 0.2575789451599121, "step": 184360 }, { "epoch": 0.7915389436988571, "grad_norm": 10.56302261352539, "learning_rate": 2.0936419375145522e-05, "loss": 0.27280776500701903, "step": 184370 }, { "epoch": 0.7915818757888772, "grad_norm": 0.034737855195999146, "learning_rate": 2.0932107655027896e-05, "loss": 0.020511318743228913, "step": 184380 }, { "epoch": 0.7916248078788971, "grad_norm": 8.125020027160645, "learning_rate": 2.0927795934910274e-05, "loss": 0.5077376365661621, "step": 184390 }, { "epoch": 0.7916677399689172, "grad_norm": 0.2507530450820923, "learning_rate": 2.0923484214792648e-05, "loss": 0.15186481475830077, "step": 184400 }, { "epoch": 0.7917106720589372, "grad_norm": 0.10388506948947906, "learning_rate": 2.0919172494675025e-05, "loss": 0.21182787418365479, "step": 184410 }, { "epoch": 0.7917536041489571, "grad_norm": 0.005574772600084543, "learning_rate": 2.0914860774557402e-05, "loss": 0.3856783628463745, "step": 184420 }, { "epoch": 0.7917965362389772, "grad_norm": 1.1669795513153076, "learning_rate": 2.091054905443978e-05, "loss": 0.29858415126800536, "step": 184430 }, { "epoch": 0.7918394683289972, "grad_norm": 3.4067726135253906, "learning_rate": 2.0906237334322157e-05, "loss": 0.2626990079879761, "step": 184440 }, { "epoch": 0.7918824004190173, "grad_norm": 0.049436818808317184, "learning_rate": 2.090192561420453e-05, "loss": 0.09174957871437073, "step": 184450 }, { "epoch": 0.7919253325090372, "grad_norm": 0.007332088891416788, "learning_rate": 2.089761389408691e-05, "loss": 0.17745136022567748, "step": 184460 }, { "epoch": 0.7919682645990572, "grad_norm": 1.9990302324295044, "learning_rate": 2.0893302173969286e-05, "loss": 0.3661717414855957, "step": 184470 }, { "epoch": 0.7920111966890773, "grad_norm": 0.04143689200282097, "learning_rate": 2.0888990453851663e-05, "loss": 0.31041841506958007, "step": 184480 }, { "epoch": 0.7920541287790972, "grad_norm": 0.012891510501503944, "learning_rate": 2.0884678733734037e-05, "loss": 0.3005017042160034, "step": 184490 }, { "epoch": 0.7920970608691172, "grad_norm": 4.862680912017822, "learning_rate": 2.0880367013616414e-05, "loss": 0.12409036159515381, "step": 184500 }, { "epoch": 0.7921399929591373, "grad_norm": 3.8281655311584473, "learning_rate": 2.087605529349879e-05, "loss": 0.41316800117492675, "step": 184510 }, { "epoch": 0.7921829250491572, "grad_norm": 0.008535422384738922, "learning_rate": 2.0871743573381166e-05, "loss": 0.27865374088287354, "step": 184520 }, { "epoch": 0.7922258571391773, "grad_norm": 0.008231055922806263, "learning_rate": 2.0867431853263543e-05, "loss": 0.3190565586090088, "step": 184530 }, { "epoch": 0.7922687892291973, "grad_norm": 1.9232498407363892, "learning_rate": 2.086312013314592e-05, "loss": 0.1800924062728882, "step": 184540 }, { "epoch": 0.7923117213192172, "grad_norm": 0.0029449264984577894, "learning_rate": 2.0858808413028294e-05, "loss": 0.28110227584838865, "step": 184550 }, { "epoch": 0.7923546534092373, "grad_norm": 0.4471541941165924, "learning_rate": 2.085449669291067e-05, "loss": 0.11063566207885742, "step": 184560 }, { "epoch": 0.7923975854992573, "grad_norm": 0.8572136163711548, "learning_rate": 2.0850184972793046e-05, "loss": 0.1575225830078125, "step": 184570 }, { "epoch": 0.7924405175892772, "grad_norm": 0.008033351972699165, "learning_rate": 2.0845873252675423e-05, "loss": 0.1626001477241516, "step": 184580 }, { "epoch": 0.7924834496792973, "grad_norm": 2.196404218673706, "learning_rate": 2.08415615325578e-05, "loss": 0.30686063766479493, "step": 184590 }, { "epoch": 0.7925263817693173, "grad_norm": 0.0012454588431864977, "learning_rate": 2.0837249812440178e-05, "loss": 0.14365462064743043, "step": 184600 }, { "epoch": 0.7925693138593373, "grad_norm": 0.08527855575084686, "learning_rate": 2.083293809232255e-05, "loss": 0.09844621419906616, "step": 184610 }, { "epoch": 0.7926122459493573, "grad_norm": 0.22390778362751007, "learning_rate": 2.082862637220493e-05, "loss": 0.11322391033172607, "step": 184620 }, { "epoch": 0.7926551780393774, "grad_norm": 1.483320951461792, "learning_rate": 2.0824314652087303e-05, "loss": 0.2445986747741699, "step": 184630 }, { "epoch": 0.7926981101293973, "grad_norm": 0.9645312428474426, "learning_rate": 2.082000293196968e-05, "loss": 0.21833391189575196, "step": 184640 }, { "epoch": 0.7927410422194173, "grad_norm": 3.154022455215454, "learning_rate": 2.0815691211852058e-05, "loss": 0.139963960647583, "step": 184650 }, { "epoch": 0.7927839743094374, "grad_norm": 0.00142047053668648, "learning_rate": 2.0811379491734435e-05, "loss": 0.16589174270629883, "step": 184660 }, { "epoch": 0.7928269063994573, "grad_norm": 0.013610593043267727, "learning_rate": 2.080706777161681e-05, "loss": 0.040987375378608706, "step": 184670 }, { "epoch": 0.7928698384894773, "grad_norm": 0.0035771746188402176, "learning_rate": 2.0802756051499186e-05, "loss": 0.175575852394104, "step": 184680 }, { "epoch": 0.7929127705794974, "grad_norm": 24.877038955688477, "learning_rate": 2.079844433138156e-05, "loss": 0.01081414669752121, "step": 184690 }, { "epoch": 0.7929557026695173, "grad_norm": 0.0351494662463665, "learning_rate": 2.0794132611263937e-05, "loss": 0.058581531047821045, "step": 184700 }, { "epoch": 0.7929986347595374, "grad_norm": 0.016925616189837456, "learning_rate": 2.0789820891146315e-05, "loss": 0.22958390712738036, "step": 184710 }, { "epoch": 0.7930415668495574, "grad_norm": 1.6746351718902588, "learning_rate": 2.0785509171028692e-05, "loss": 0.2561425924301147, "step": 184720 }, { "epoch": 0.7930844989395773, "grad_norm": 0.0013505280949175358, "learning_rate": 2.0781197450911066e-05, "loss": 0.09878464341163636, "step": 184730 }, { "epoch": 0.7931274310295974, "grad_norm": 0.46645739674568176, "learning_rate": 2.0776885730793443e-05, "loss": 0.16808898448944093, "step": 184740 }, { "epoch": 0.7931703631196174, "grad_norm": 0.3912404775619507, "learning_rate": 2.0772574010675817e-05, "loss": 0.27826414108276365, "step": 184750 }, { "epoch": 0.7932132952096373, "grad_norm": 0.05493396148085594, "learning_rate": 2.0768262290558195e-05, "loss": 0.11417219638824463, "step": 184760 }, { "epoch": 0.7932562272996574, "grad_norm": 3.7654097080230713, "learning_rate": 2.0763950570440572e-05, "loss": 0.10978877544403076, "step": 184770 }, { "epoch": 0.7932991593896774, "grad_norm": 0.39600008726119995, "learning_rate": 2.075963885032295e-05, "loss": 0.08345726132392883, "step": 184780 }, { "epoch": 0.7933420914796974, "grad_norm": 1.5910329818725586, "learning_rate": 2.0755327130205327e-05, "loss": 0.1979695200920105, "step": 184790 }, { "epoch": 0.7933850235697174, "grad_norm": 0.007868641056120396, "learning_rate": 2.07510154100877e-05, "loss": 0.16176763772964478, "step": 184800 }, { "epoch": 0.7934279556597374, "grad_norm": 0.05180217698216438, "learning_rate": 2.0746703689970078e-05, "loss": 0.5233680725097656, "step": 184810 }, { "epoch": 0.7934708877497574, "grad_norm": 0.008714994415640831, "learning_rate": 2.0742391969852455e-05, "loss": 0.08109192252159118, "step": 184820 }, { "epoch": 0.7935138198397774, "grad_norm": 0.012277199886739254, "learning_rate": 2.0738080249734833e-05, "loss": 0.30088798999786376, "step": 184830 }, { "epoch": 0.7935567519297975, "grad_norm": 0.17794494330883026, "learning_rate": 2.0733768529617207e-05, "loss": 0.15534228086471558, "step": 184840 }, { "epoch": 0.7935996840198174, "grad_norm": 0.005018056370317936, "learning_rate": 2.0729456809499584e-05, "loss": 0.06532618999481202, "step": 184850 }, { "epoch": 0.7936426161098374, "grad_norm": 0.010500622913241386, "learning_rate": 2.0725145089381958e-05, "loss": 0.2514193534851074, "step": 184860 }, { "epoch": 0.7936855481998575, "grad_norm": 11.70617389678955, "learning_rate": 2.0720833369264335e-05, "loss": 0.19119045734405518, "step": 184870 }, { "epoch": 0.7937284802898775, "grad_norm": 0.007109550293534994, "learning_rate": 2.0716521649146713e-05, "loss": 0.037236130237579344, "step": 184880 }, { "epoch": 0.7937714123798975, "grad_norm": 12.96017837524414, "learning_rate": 2.071220992902909e-05, "loss": 0.19494185447692872, "step": 184890 }, { "epoch": 0.7938143444699175, "grad_norm": 0.003496050601825118, "learning_rate": 2.0707898208911464e-05, "loss": 0.16270724534988404, "step": 184900 }, { "epoch": 0.7938572765599375, "grad_norm": 0.16476179659366608, "learning_rate": 2.070358648879384e-05, "loss": 0.2962705850601196, "step": 184910 }, { "epoch": 0.7939002086499575, "grad_norm": 2.141551971435547, "learning_rate": 2.0699274768676215e-05, "loss": 0.2920737981796265, "step": 184920 }, { "epoch": 0.7939431407399775, "grad_norm": 0.0014055408537387848, "learning_rate": 2.0694963048558593e-05, "loss": 0.21264221668243408, "step": 184930 }, { "epoch": 0.7939860728299976, "grad_norm": 0.008581023663282394, "learning_rate": 2.069065132844097e-05, "loss": 0.28402559757232665, "step": 184940 }, { "epoch": 0.7940290049200175, "grad_norm": 0.15683463215827942, "learning_rate": 2.0686339608323347e-05, "loss": 0.3171902418136597, "step": 184950 }, { "epoch": 0.7940719370100375, "grad_norm": 24.406858444213867, "learning_rate": 2.068202788820572e-05, "loss": 0.19824904203414917, "step": 184960 }, { "epoch": 0.7941148691000576, "grad_norm": 0.009375318884849548, "learning_rate": 2.06777161680881e-05, "loss": 0.37527921199798586, "step": 184970 }, { "epoch": 0.7941578011900775, "grad_norm": 0.0037970547564327717, "learning_rate": 2.0673404447970473e-05, "loss": 0.10710017681121826, "step": 184980 }, { "epoch": 0.7942007332800975, "grad_norm": 6.955389499664307, "learning_rate": 2.066909272785285e-05, "loss": 0.22487101554870606, "step": 184990 }, { "epoch": 0.7942436653701176, "grad_norm": 5.220800876617432, "learning_rate": 2.0664781007735227e-05, "loss": 0.08548426628112793, "step": 185000 }, { "epoch": 0.7942436653701176, "eval_loss": 0.3935047388076782, "eval_runtime": 27.4742, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 185000 }, { "epoch": 0.7942865974601375, "grad_norm": 0.014097069390118122, "learning_rate": 2.0660469287617605e-05, "loss": 0.1502092957496643, "step": 185010 }, { "epoch": 0.7943295295501576, "grad_norm": 3.219040632247925, "learning_rate": 2.065615756749998e-05, "loss": 0.1983199119567871, "step": 185020 }, { "epoch": 0.7943724616401776, "grad_norm": 0.0013536418555304408, "learning_rate": 2.0651845847382356e-05, "loss": 0.11366220712661743, "step": 185030 }, { "epoch": 0.7944153937301975, "grad_norm": 0.010713169351220131, "learning_rate": 2.064753412726473e-05, "loss": 0.2043184757232666, "step": 185040 }, { "epoch": 0.7944583258202176, "grad_norm": 0.22274163365364075, "learning_rate": 2.0643222407147107e-05, "loss": 0.19996603727340698, "step": 185050 }, { "epoch": 0.7945012579102376, "grad_norm": 0.02003192901611328, "learning_rate": 2.0638910687029485e-05, "loss": 0.23419508934020997, "step": 185060 }, { "epoch": 0.7945441900002576, "grad_norm": 0.2899033725261688, "learning_rate": 2.0634598966911862e-05, "loss": 0.3487301588058472, "step": 185070 }, { "epoch": 0.7945871220902776, "grad_norm": 0.01600206270813942, "learning_rate": 2.0630287246794236e-05, "loss": 0.2106010675430298, "step": 185080 }, { "epoch": 0.7946300541802976, "grad_norm": 0.014160433784127235, "learning_rate": 2.0625975526676613e-05, "loss": 0.15078703165054322, "step": 185090 }, { "epoch": 0.7946729862703176, "grad_norm": 2.848785877227783, "learning_rate": 2.0621663806558987e-05, "loss": 0.2850050926208496, "step": 185100 }, { "epoch": 0.7947159183603376, "grad_norm": 0.011155441403388977, "learning_rate": 2.0617352086441365e-05, "loss": 0.1666271209716797, "step": 185110 }, { "epoch": 0.7947588504503577, "grad_norm": 1.6798511743545532, "learning_rate": 2.0613040366323742e-05, "loss": 0.10716216564178467, "step": 185120 }, { "epoch": 0.7948017825403776, "grad_norm": 1.9502683877944946, "learning_rate": 2.060872864620612e-05, "loss": 0.1362156629562378, "step": 185130 }, { "epoch": 0.7948447146303976, "grad_norm": 0.025498563423752785, "learning_rate": 2.0604416926088493e-05, "loss": 0.2359158515930176, "step": 185140 }, { "epoch": 0.7948876467204177, "grad_norm": 1.1539736986160278, "learning_rate": 2.060010520597087e-05, "loss": 0.1627048969268799, "step": 185150 }, { "epoch": 0.7949305788104376, "grad_norm": 0.023429078981280327, "learning_rate": 2.0595793485853248e-05, "loss": 0.26216707229614256, "step": 185160 }, { "epoch": 0.7949735109004576, "grad_norm": 0.0036843300331383944, "learning_rate": 2.0591481765735622e-05, "loss": 0.21936185359954835, "step": 185170 }, { "epoch": 0.7950164429904777, "grad_norm": 0.011735700070858002, "learning_rate": 2.0587170045618003e-05, "loss": 0.21217544078826905, "step": 185180 }, { "epoch": 0.7950593750804976, "grad_norm": 1.6435482501983643, "learning_rate": 2.0582858325500376e-05, "loss": 0.24956004619598388, "step": 185190 }, { "epoch": 0.7951023071705177, "grad_norm": 0.008881629444658756, "learning_rate": 2.0578546605382754e-05, "loss": 0.18325870037078856, "step": 185200 }, { "epoch": 0.7951452392605377, "grad_norm": 0.00953991711139679, "learning_rate": 2.0574234885265128e-05, "loss": 0.2940868854522705, "step": 185210 }, { "epoch": 0.7951881713505576, "grad_norm": 0.005282262340188026, "learning_rate": 2.0569923165147505e-05, "loss": 0.11159526109695435, "step": 185220 }, { "epoch": 0.7952311034405777, "grad_norm": 0.004339796025305986, "learning_rate": 2.0565611445029882e-05, "loss": 0.17705767154693602, "step": 185230 }, { "epoch": 0.7952740355305977, "grad_norm": 0.23181919753551483, "learning_rate": 2.056129972491226e-05, "loss": 0.24660682678222656, "step": 185240 }, { "epoch": 0.7953169676206177, "grad_norm": 1.4440089464187622, "learning_rate": 2.0556988004794634e-05, "loss": 0.30808000564575194, "step": 185250 }, { "epoch": 0.7953598997106377, "grad_norm": 0.01273949071764946, "learning_rate": 2.055267628467701e-05, "loss": 0.02238984853029251, "step": 185260 }, { "epoch": 0.7954028318006577, "grad_norm": 0.07017716765403748, "learning_rate": 2.0548364564559385e-05, "loss": 0.08472115397453309, "step": 185270 }, { "epoch": 0.7954457638906777, "grad_norm": 0.0008051918121054769, "learning_rate": 2.0544052844441762e-05, "loss": 0.2069920063018799, "step": 185280 }, { "epoch": 0.7954886959806977, "grad_norm": 0.15069304406642914, "learning_rate": 2.053974112432414e-05, "loss": 0.17520055770874024, "step": 185290 }, { "epoch": 0.7955316280707178, "grad_norm": 7.917318820953369, "learning_rate": 2.0535429404206517e-05, "loss": 0.366973352432251, "step": 185300 }, { "epoch": 0.7955745601607378, "grad_norm": 0.0027544747572392225, "learning_rate": 2.053111768408889e-05, "loss": 0.1479700803756714, "step": 185310 }, { "epoch": 0.7956174922507577, "grad_norm": 0.7745925784111023, "learning_rate": 2.052680596397127e-05, "loss": 0.05805274844169617, "step": 185320 }, { "epoch": 0.7956604243407778, "grad_norm": 1.471014380455017, "learning_rate": 2.0522494243853642e-05, "loss": 0.2297032117843628, "step": 185330 }, { "epoch": 0.7957033564307978, "grad_norm": 0.4838404357433319, "learning_rate": 2.051818252373602e-05, "loss": 0.2138000726699829, "step": 185340 }, { "epoch": 0.7957462885208177, "grad_norm": 1.3738874197006226, "learning_rate": 2.0513870803618397e-05, "loss": 0.1628502607345581, "step": 185350 }, { "epoch": 0.7957892206108378, "grad_norm": 0.1752384454011917, "learning_rate": 2.0509559083500774e-05, "loss": 0.33957724571228026, "step": 185360 }, { "epoch": 0.7958321527008578, "grad_norm": 0.13606612384319305, "learning_rate": 2.050524736338315e-05, "loss": 0.14007097482681274, "step": 185370 }, { "epoch": 0.7958750847908778, "grad_norm": 1.536285638809204, "learning_rate": 2.0500935643265526e-05, "loss": 0.1621859550476074, "step": 185380 }, { "epoch": 0.7959180168808978, "grad_norm": 0.005415162071585655, "learning_rate": 2.04966239231479e-05, "loss": 0.07260684967041016, "step": 185390 }, { "epoch": 0.7959609489709178, "grad_norm": 3.600752353668213, "learning_rate": 2.0492312203030277e-05, "loss": 0.24089322090148926, "step": 185400 }, { "epoch": 0.7960038810609378, "grad_norm": 0.05691635608673096, "learning_rate": 2.0488000482912654e-05, "loss": 0.13588875532150269, "step": 185410 }, { "epoch": 0.7960468131509578, "grad_norm": 0.01960470899939537, "learning_rate": 2.048368876279503e-05, "loss": 0.1699157953262329, "step": 185420 }, { "epoch": 0.7960897452409779, "grad_norm": 0.011590172536671162, "learning_rate": 2.0479377042677406e-05, "loss": 0.024132239818572997, "step": 185430 }, { "epoch": 0.7961326773309978, "grad_norm": 0.003582247532904148, "learning_rate": 2.0475065322559783e-05, "loss": 0.25007989406585696, "step": 185440 }, { "epoch": 0.7961756094210178, "grad_norm": 2.1678757667541504, "learning_rate": 2.0470753602442157e-05, "loss": 0.2906757116317749, "step": 185450 }, { "epoch": 0.7962185415110379, "grad_norm": 0.019433652982115746, "learning_rate": 2.0466441882324534e-05, "loss": 0.05460849404335022, "step": 185460 }, { "epoch": 0.7962614736010578, "grad_norm": 0.0070192208513617516, "learning_rate": 2.046213016220691e-05, "loss": 0.18471946716308593, "step": 185470 }, { "epoch": 0.7963044056910779, "grad_norm": 3.4451112747192383, "learning_rate": 2.045781844208929e-05, "loss": 0.19607019424438477, "step": 185480 }, { "epoch": 0.7963473377810979, "grad_norm": 0.08804819732904434, "learning_rate": 2.0453506721971663e-05, "loss": 0.1398802638053894, "step": 185490 }, { "epoch": 0.7963902698711178, "grad_norm": 0.0010080928914248943, "learning_rate": 2.044919500185404e-05, "loss": 0.22251415252685547, "step": 185500 }, { "epoch": 0.7964332019611379, "grad_norm": 0.05793009698390961, "learning_rate": 2.0444883281736414e-05, "loss": 0.12232472896575927, "step": 185510 }, { "epoch": 0.7964761340511579, "grad_norm": 0.0047072130255401134, "learning_rate": 2.044057156161879e-05, "loss": 0.29519612789154054, "step": 185520 }, { "epoch": 0.7965190661411778, "grad_norm": 3.4594709873199463, "learning_rate": 2.0436259841501172e-05, "loss": 0.23673882484436035, "step": 185530 }, { "epoch": 0.7965619982311979, "grad_norm": 5.806293964385986, "learning_rate": 2.0431948121383546e-05, "loss": 0.2166839361190796, "step": 185540 }, { "epoch": 0.7966049303212179, "grad_norm": 0.005207410082221031, "learning_rate": 2.0427636401265924e-05, "loss": 0.09798675179481506, "step": 185550 }, { "epoch": 0.7966478624112379, "grad_norm": 0.0033032442443072796, "learning_rate": 2.0423324681148298e-05, "loss": 0.24011375904083251, "step": 185560 }, { "epoch": 0.7966907945012579, "grad_norm": 0.04708894342184067, "learning_rate": 2.0419012961030675e-05, "loss": 0.15701087713241577, "step": 185570 }, { "epoch": 0.796733726591278, "grad_norm": 2.4670138359069824, "learning_rate": 2.0414701240913052e-05, "loss": 0.061458778381347653, "step": 185580 }, { "epoch": 0.7967766586812979, "grad_norm": 0.0011994473170489073, "learning_rate": 2.041038952079543e-05, "loss": 0.21192600727081298, "step": 185590 }, { "epoch": 0.7968195907713179, "grad_norm": 7.147767543792725, "learning_rate": 2.0406077800677803e-05, "loss": 0.28967599868774413, "step": 185600 }, { "epoch": 0.796862522861338, "grad_norm": 3.6588809490203857, "learning_rate": 2.040176608056018e-05, "loss": 0.2007124900817871, "step": 185610 }, { "epoch": 0.7969054549513579, "grad_norm": 0.0008074496872723103, "learning_rate": 2.0397454360442555e-05, "loss": 0.27359185218811033, "step": 185620 }, { "epoch": 0.7969483870413779, "grad_norm": 0.04204615205526352, "learning_rate": 2.0393142640324932e-05, "loss": 0.15062224864959717, "step": 185630 }, { "epoch": 0.796991319131398, "grad_norm": 0.003354994347319007, "learning_rate": 2.038883092020731e-05, "loss": 0.16356064081192018, "step": 185640 }, { "epoch": 0.7970342512214179, "grad_norm": 0.9174707531929016, "learning_rate": 2.0384519200089687e-05, "loss": 0.4482748508453369, "step": 185650 }, { "epoch": 0.797077183311438, "grad_norm": 1.6391700506210327, "learning_rate": 2.038020747997206e-05, "loss": 0.2696021795272827, "step": 185660 }, { "epoch": 0.797120115401458, "grad_norm": 0.0044967783614993095, "learning_rate": 2.0375895759854438e-05, "loss": 0.09318976402282715, "step": 185670 }, { "epoch": 0.7971630474914779, "grad_norm": 2.8881003856658936, "learning_rate": 2.0371584039736812e-05, "loss": 0.1859677791595459, "step": 185680 }, { "epoch": 0.797205979581498, "grad_norm": 2.529358386993408, "learning_rate": 2.036727231961919e-05, "loss": 0.05045361518859863, "step": 185690 }, { "epoch": 0.797248911671518, "grad_norm": 2.6487114429473877, "learning_rate": 2.0362960599501567e-05, "loss": 0.23644933700561524, "step": 185700 }, { "epoch": 0.7972918437615379, "grad_norm": 0.009652736596763134, "learning_rate": 2.0358648879383944e-05, "loss": 0.31275110244750975, "step": 185710 }, { "epoch": 0.797334775851558, "grad_norm": 0.001559227122925222, "learning_rate": 2.0354337159266318e-05, "loss": 0.12550193071365356, "step": 185720 }, { "epoch": 0.797377707941578, "grad_norm": 0.02826523408293724, "learning_rate": 2.0350025439148695e-05, "loss": 0.30759053230285643, "step": 185730 }, { "epoch": 0.7974206400315981, "grad_norm": 3.503047227859497, "learning_rate": 2.034571371903107e-05, "loss": 0.2664123296737671, "step": 185740 }, { "epoch": 0.797463572121618, "grad_norm": 0.008845411241054535, "learning_rate": 2.0341401998913447e-05, "loss": 0.24550697803497315, "step": 185750 }, { "epoch": 0.797506504211638, "grad_norm": 0.24258193373680115, "learning_rate": 2.0337090278795824e-05, "loss": 0.18062769174575805, "step": 185760 }, { "epoch": 0.7975494363016581, "grad_norm": 0.05451073870062828, "learning_rate": 2.03327785586782e-05, "loss": 0.2377575159072876, "step": 185770 }, { "epoch": 0.797592368391678, "grad_norm": 0.006921617779880762, "learning_rate": 2.0328466838560575e-05, "loss": 0.004407922551035881, "step": 185780 }, { "epoch": 0.7976353004816981, "grad_norm": 0.395277202129364, "learning_rate": 2.0324155118442953e-05, "loss": 0.21247997283935546, "step": 185790 }, { "epoch": 0.7976782325717181, "grad_norm": 0.0005243797786533833, "learning_rate": 2.0319843398325327e-05, "loss": 0.20616483688354492, "step": 185800 }, { "epoch": 0.797721164661738, "grad_norm": 0.016554752364754677, "learning_rate": 2.0315531678207704e-05, "loss": 0.26093599796295164, "step": 185810 }, { "epoch": 0.7977640967517581, "grad_norm": 1.9969154596328735, "learning_rate": 2.031121995809008e-05, "loss": 0.20317411422729492, "step": 185820 }, { "epoch": 0.7978070288417781, "grad_norm": 0.2166758030653, "learning_rate": 2.030690823797246e-05, "loss": 0.1044198989868164, "step": 185830 }, { "epoch": 0.797849960931798, "grad_norm": 1.184893250465393, "learning_rate": 2.0302596517854833e-05, "loss": 0.30167474746704104, "step": 185840 }, { "epoch": 0.7978928930218181, "grad_norm": 0.007805278990417719, "learning_rate": 2.029828479773721e-05, "loss": 0.09440180659294128, "step": 185850 }, { "epoch": 0.7979358251118381, "grad_norm": 0.6124712228775024, "learning_rate": 2.0293973077619584e-05, "loss": 0.07313104271888733, "step": 185860 }, { "epoch": 0.7979787572018581, "grad_norm": 0.0013098136987537146, "learning_rate": 2.028966135750196e-05, "loss": 0.22837140560150146, "step": 185870 }, { "epoch": 0.7980216892918781, "grad_norm": 2.9226720333099365, "learning_rate": 2.0285349637384342e-05, "loss": 0.34854779243469236, "step": 185880 }, { "epoch": 0.7980646213818982, "grad_norm": 2.7549538612365723, "learning_rate": 2.0281037917266716e-05, "loss": 0.18072230815887452, "step": 185890 }, { "epoch": 0.7981075534719181, "grad_norm": 0.19986467063426971, "learning_rate": 2.0276726197149093e-05, "loss": 0.005468511208891869, "step": 185900 }, { "epoch": 0.7981504855619381, "grad_norm": 0.000706440070644021, "learning_rate": 2.0272414477031467e-05, "loss": 0.30676515102386476, "step": 185910 }, { "epoch": 0.7981934176519582, "grad_norm": 9.663665771484375, "learning_rate": 2.0268102756913845e-05, "loss": 0.34456477165222166, "step": 185920 }, { "epoch": 0.7982363497419781, "grad_norm": 0.51984041929245, "learning_rate": 2.0263791036796222e-05, "loss": 0.18212833404541015, "step": 185930 }, { "epoch": 0.7982792818319981, "grad_norm": 0.41934898495674133, "learning_rate": 2.02594793166786e-05, "loss": 0.05728796124458313, "step": 185940 }, { "epoch": 0.7983222139220182, "grad_norm": 0.3425225019454956, "learning_rate": 2.0255167596560973e-05, "loss": 0.223008131980896, "step": 185950 }, { "epoch": 0.7983651460120381, "grad_norm": 0.0046521383337676525, "learning_rate": 2.025085587644335e-05, "loss": 0.31833226680755616, "step": 185960 }, { "epoch": 0.7984080781020582, "grad_norm": 1.873736023902893, "learning_rate": 2.0246544156325725e-05, "loss": 0.2819650888442993, "step": 185970 }, { "epoch": 0.7984510101920782, "grad_norm": 0.053500592708587646, "learning_rate": 2.0242232436208102e-05, "loss": 0.15295974016189576, "step": 185980 }, { "epoch": 0.7984939422820981, "grad_norm": 2.2874643802642822, "learning_rate": 2.023792071609048e-05, "loss": 0.09558444619178771, "step": 185990 }, { "epoch": 0.7985368743721182, "grad_norm": 0.42870280146598816, "learning_rate": 2.0233608995972857e-05, "loss": 0.14517755508422853, "step": 186000 }, { "epoch": 0.7985368743721182, "eval_loss": 0.383025199174881, "eval_runtime": 27.3953, "eval_samples_per_second": 3.65, "eval_steps_per_second": 3.65, "step": 186000 }, { "epoch": 0.7985798064621382, "grad_norm": 0.0867031067609787, "learning_rate": 2.022929727585523e-05, "loss": 0.09830026030540466, "step": 186010 }, { "epoch": 0.7986227385521582, "grad_norm": 1.7900984287261963, "learning_rate": 2.0224985555737608e-05, "loss": 0.15121421813964844, "step": 186020 }, { "epoch": 0.7986656706421782, "grad_norm": 0.07163318246603012, "learning_rate": 2.0220673835619982e-05, "loss": 0.43063783645629883, "step": 186030 }, { "epoch": 0.7987086027321982, "grad_norm": 0.010341783985495567, "learning_rate": 2.021636211550236e-05, "loss": 0.17103235721588134, "step": 186040 }, { "epoch": 0.7987515348222182, "grad_norm": 9.039875984191895, "learning_rate": 2.0212050395384736e-05, "loss": 0.3425628185272217, "step": 186050 }, { "epoch": 0.7987944669122382, "grad_norm": 0.012275336310267448, "learning_rate": 2.0207738675267114e-05, "loss": 0.21186771392822265, "step": 186060 }, { "epoch": 0.7988373990022583, "grad_norm": 0.09523310512304306, "learning_rate": 2.0203426955149488e-05, "loss": 0.22170612812042237, "step": 186070 }, { "epoch": 0.7988803310922782, "grad_norm": 0.8266873955726624, "learning_rate": 2.0199115235031865e-05, "loss": 0.21422924995422363, "step": 186080 }, { "epoch": 0.7989232631822982, "grad_norm": 1.8576383590698242, "learning_rate": 2.019480351491424e-05, "loss": 0.17523318529129028, "step": 186090 }, { "epoch": 0.7989661952723183, "grad_norm": 2.276369333267212, "learning_rate": 2.0190491794796616e-05, "loss": 0.18289344310760497, "step": 186100 }, { "epoch": 0.7990091273623382, "grad_norm": 0.03286011144518852, "learning_rate": 2.0186180074678994e-05, "loss": 0.06531772613525391, "step": 186110 }, { "epoch": 0.7990520594523582, "grad_norm": 0.5831666588783264, "learning_rate": 2.018186835456137e-05, "loss": 0.22013142108917236, "step": 186120 }, { "epoch": 0.7990949915423783, "grad_norm": 3.7149910926818848, "learning_rate": 2.0177556634443745e-05, "loss": 0.2326035737991333, "step": 186130 }, { "epoch": 0.7991379236323982, "grad_norm": 2.462045907974243, "learning_rate": 2.0173244914326122e-05, "loss": 0.2145132303237915, "step": 186140 }, { "epoch": 0.7991808557224183, "grad_norm": 0.06917181611061096, "learning_rate": 2.0168933194208496e-05, "loss": 0.26174378395080566, "step": 186150 }, { "epoch": 0.7992237878124383, "grad_norm": 3.2315165996551514, "learning_rate": 2.0164621474090874e-05, "loss": 0.3010656118392944, "step": 186160 }, { "epoch": 0.7992667199024583, "grad_norm": 1.9338715076446533, "learning_rate": 2.016030975397325e-05, "loss": 0.10233894586563111, "step": 186170 }, { "epoch": 0.7993096519924783, "grad_norm": 0.004972547292709351, "learning_rate": 2.015599803385563e-05, "loss": 0.15367188453674316, "step": 186180 }, { "epoch": 0.7993525840824983, "grad_norm": 0.46371036767959595, "learning_rate": 2.0151686313738002e-05, "loss": 0.20945146083831787, "step": 186190 }, { "epoch": 0.7993955161725184, "grad_norm": 0.004476912785321474, "learning_rate": 2.014737459362038e-05, "loss": 0.002007796801626682, "step": 186200 }, { "epoch": 0.7994384482625383, "grad_norm": 0.006324408575892448, "learning_rate": 2.0143062873502754e-05, "loss": 0.22494516372680665, "step": 186210 }, { "epoch": 0.7994813803525583, "grad_norm": 0.007815426215529442, "learning_rate": 2.013875115338513e-05, "loss": 0.15336228609085084, "step": 186220 }, { "epoch": 0.7995243124425784, "grad_norm": 0.001811459893360734, "learning_rate": 2.013443943326751e-05, "loss": 0.31596338748931885, "step": 186230 }, { "epoch": 0.7995672445325983, "grad_norm": 0.00441304175183177, "learning_rate": 2.0130127713149886e-05, "loss": 0.0335442453622818, "step": 186240 }, { "epoch": 0.7996101766226184, "grad_norm": 0.0016258248360827565, "learning_rate": 2.0125815993032263e-05, "loss": 0.1325811982154846, "step": 186250 }, { "epoch": 0.7996531087126384, "grad_norm": 0.014790796674787998, "learning_rate": 2.0121504272914637e-05, "loss": 0.10485789775848389, "step": 186260 }, { "epoch": 0.7996960408026583, "grad_norm": 0.02397337555885315, "learning_rate": 2.0117192552797014e-05, "loss": 0.08896262049674988, "step": 186270 }, { "epoch": 0.7997389728926784, "grad_norm": 0.14343607425689697, "learning_rate": 2.0112880832679388e-05, "loss": 0.2278761386871338, "step": 186280 }, { "epoch": 0.7997819049826984, "grad_norm": 0.0002532574872020632, "learning_rate": 2.010856911256177e-05, "loss": 0.13488831520080566, "step": 186290 }, { "epoch": 0.7998248370727183, "grad_norm": 0.0023948042653501034, "learning_rate": 2.0104257392444143e-05, "loss": 0.21678998470306396, "step": 186300 }, { "epoch": 0.7998677691627384, "grad_norm": 0.10748161375522614, "learning_rate": 2.009994567232652e-05, "loss": 0.048986378312110904, "step": 186310 }, { "epoch": 0.7999107012527584, "grad_norm": 0.025953242555260658, "learning_rate": 2.0095633952208894e-05, "loss": 0.12238609790802002, "step": 186320 }, { "epoch": 0.7999536333427784, "grad_norm": 0.04679807275533676, "learning_rate": 2.009132223209127e-05, "loss": 0.2825323581695557, "step": 186330 }, { "epoch": 0.7999965654327984, "grad_norm": 0.0020087333396077156, "learning_rate": 2.008701051197365e-05, "loss": 0.14617905616760254, "step": 186340 }, { "epoch": 0.8000394975228184, "grad_norm": 0.02344387024641037, "learning_rate": 2.0082698791856026e-05, "loss": 0.11631090641021728, "step": 186350 }, { "epoch": 0.8000824296128384, "grad_norm": 2.009197950363159, "learning_rate": 2.00783870717384e-05, "loss": 0.17129077911376953, "step": 186360 }, { "epoch": 0.8001253617028584, "grad_norm": 0.011406494304537773, "learning_rate": 2.0074075351620778e-05, "loss": 0.07285622358322144, "step": 186370 }, { "epoch": 0.8001682937928785, "grad_norm": 0.0726480782032013, "learning_rate": 2.006976363150315e-05, "loss": 0.1225576400756836, "step": 186380 }, { "epoch": 0.8002112258828984, "grad_norm": 0.015434571541845798, "learning_rate": 2.006545191138553e-05, "loss": 0.05374734997749329, "step": 186390 }, { "epoch": 0.8002541579729184, "grad_norm": 0.10589390993118286, "learning_rate": 2.0061140191267906e-05, "loss": 0.16505370140075684, "step": 186400 }, { "epoch": 0.8002970900629385, "grad_norm": 0.01193720381706953, "learning_rate": 2.0056828471150284e-05, "loss": 0.17347509860992433, "step": 186410 }, { "epoch": 0.8003400221529584, "grad_norm": 0.02078915387392044, "learning_rate": 2.0052516751032658e-05, "loss": 0.15373240709304808, "step": 186420 }, { "epoch": 0.8003829542429785, "grad_norm": 0.003241701051592827, "learning_rate": 2.0048205030915035e-05, "loss": 0.15771273374557496, "step": 186430 }, { "epoch": 0.8004258863329985, "grad_norm": 0.03669734671711922, "learning_rate": 2.004389331079741e-05, "loss": 0.16545580625534057, "step": 186440 }, { "epoch": 0.8004688184230184, "grad_norm": 1.4811112880706787, "learning_rate": 2.0039581590679786e-05, "loss": 0.3451890468597412, "step": 186450 }, { "epoch": 0.8005117505130385, "grad_norm": 0.0017724055796861649, "learning_rate": 2.0035269870562164e-05, "loss": 0.14023756980895996, "step": 186460 }, { "epoch": 0.8005546826030585, "grad_norm": 0.17874978482723236, "learning_rate": 2.003095815044454e-05, "loss": 0.1794173836708069, "step": 186470 }, { "epoch": 0.8005976146930784, "grad_norm": 1.1632425785064697, "learning_rate": 2.0026646430326915e-05, "loss": 0.25994980335235596, "step": 186480 }, { "epoch": 0.8006405467830985, "grad_norm": 1.2274190187454224, "learning_rate": 2.0022334710209292e-05, "loss": 0.21253445148468017, "step": 186490 }, { "epoch": 0.8006834788731185, "grad_norm": 0.09271473437547684, "learning_rate": 2.0018022990091666e-05, "loss": 0.08092796206474304, "step": 186500 }, { "epoch": 0.8007264109631385, "grad_norm": 0.025559542700648308, "learning_rate": 2.0013711269974043e-05, "loss": 0.3423144817352295, "step": 186510 }, { "epoch": 0.8007693430531585, "grad_norm": 0.018450139090418816, "learning_rate": 2.000939954985642e-05, "loss": 0.2602881669998169, "step": 186520 }, { "epoch": 0.8008122751431785, "grad_norm": 0.0023915015626698732, "learning_rate": 2.0005087829738798e-05, "loss": 0.11772937774658203, "step": 186530 }, { "epoch": 0.8008552072331985, "grad_norm": 0.9385748505592346, "learning_rate": 2.0000776109621172e-05, "loss": 0.21836485862731933, "step": 186540 }, { "epoch": 0.8008981393232185, "grad_norm": 0.007567053660750389, "learning_rate": 1.999646438950355e-05, "loss": 0.1203912615776062, "step": 186550 }, { "epoch": 0.8009410714132386, "grad_norm": 0.0016851389082148671, "learning_rate": 1.9992152669385923e-05, "loss": 0.2831920623779297, "step": 186560 }, { "epoch": 0.8009840035032585, "grad_norm": 0.2422313392162323, "learning_rate": 1.99878409492683e-05, "loss": 0.11574677228927613, "step": 186570 }, { "epoch": 0.8010269355932785, "grad_norm": 0.00440719211474061, "learning_rate": 1.9983529229150678e-05, "loss": 0.19264068603515624, "step": 186580 }, { "epoch": 0.8010698676832986, "grad_norm": 0.12345883995294571, "learning_rate": 1.9979217509033055e-05, "loss": 0.2742461204528809, "step": 186590 }, { "epoch": 0.8011127997733186, "grad_norm": 0.0027105410117655993, "learning_rate": 1.997490578891543e-05, "loss": 0.19218816757202148, "step": 186600 }, { "epoch": 0.8011557318633385, "grad_norm": 0.3615066409111023, "learning_rate": 1.9970594068797807e-05, "loss": 0.1688565731048584, "step": 186610 }, { "epoch": 0.8011986639533586, "grad_norm": 4.431537628173828, "learning_rate": 1.9966282348680184e-05, "loss": 0.2278277635574341, "step": 186620 }, { "epoch": 0.8012415960433786, "grad_norm": 2.2264537811279297, "learning_rate": 1.9961970628562558e-05, "loss": 0.13971056938171386, "step": 186630 }, { "epoch": 0.8012845281333986, "grad_norm": 2.352553129196167, "learning_rate": 1.995765890844494e-05, "loss": 0.5238365173339844, "step": 186640 }, { "epoch": 0.8013274602234186, "grad_norm": 0.05882478877902031, "learning_rate": 1.9953347188327313e-05, "loss": 0.2581619739532471, "step": 186650 }, { "epoch": 0.8013703923134387, "grad_norm": 0.046139009296894073, "learning_rate": 1.994903546820969e-05, "loss": 0.07872686386108399, "step": 186660 }, { "epoch": 0.8014133244034586, "grad_norm": 0.03067977912724018, "learning_rate": 1.9944723748092064e-05, "loss": 0.34165639877319337, "step": 186670 }, { "epoch": 0.8014562564934786, "grad_norm": 0.010820715688169003, "learning_rate": 1.994041202797444e-05, "loss": 0.3078367471694946, "step": 186680 }, { "epoch": 0.8014991885834987, "grad_norm": 0.08466701209545135, "learning_rate": 1.993610030785682e-05, "loss": 0.23001487255096437, "step": 186690 }, { "epoch": 0.8015421206735186, "grad_norm": 0.0826585441827774, "learning_rate": 1.9931788587739196e-05, "loss": 0.2410907506942749, "step": 186700 }, { "epoch": 0.8015850527635386, "grad_norm": 2.120948314666748, "learning_rate": 1.992747686762157e-05, "loss": 0.1979671001434326, "step": 186710 }, { "epoch": 0.8016279848535587, "grad_norm": 0.002734254812821746, "learning_rate": 1.9923165147503947e-05, "loss": 0.36019628047943114, "step": 186720 }, { "epoch": 0.8016709169435786, "grad_norm": 1.2035413980484009, "learning_rate": 1.991885342738632e-05, "loss": 0.18466417789459227, "step": 186730 }, { "epoch": 0.8017138490335987, "grad_norm": 0.09759000688791275, "learning_rate": 1.99145417072687e-05, "loss": 0.1547027587890625, "step": 186740 }, { "epoch": 0.8017567811236187, "grad_norm": 0.01482541672885418, "learning_rate": 1.9910229987151076e-05, "loss": 0.30575516223907473, "step": 186750 }, { "epoch": 0.8017997132136386, "grad_norm": 0.03860694542527199, "learning_rate": 1.9905918267033453e-05, "loss": 0.33325586318969724, "step": 186760 }, { "epoch": 0.8018426453036587, "grad_norm": 0.00036169207305647433, "learning_rate": 1.9901606546915827e-05, "loss": 0.1686161994934082, "step": 186770 }, { "epoch": 0.8018855773936787, "grad_norm": 1.8329875469207764, "learning_rate": 1.9897294826798205e-05, "loss": 0.2805971622467041, "step": 186780 }, { "epoch": 0.8019285094836986, "grad_norm": 2.861274242401123, "learning_rate": 1.989298310668058e-05, "loss": 0.3051000118255615, "step": 186790 }, { "epoch": 0.8019714415737187, "grad_norm": 51.22409439086914, "learning_rate": 1.9888671386562956e-05, "loss": 0.158194899559021, "step": 186800 }, { "epoch": 0.8020143736637387, "grad_norm": 1.1815828084945679, "learning_rate": 1.9884359666445333e-05, "loss": 0.3367149353027344, "step": 186810 }, { "epoch": 0.8020573057537587, "grad_norm": 0.05270637199282646, "learning_rate": 1.988004794632771e-05, "loss": 0.3387028932571411, "step": 186820 }, { "epoch": 0.8021002378437787, "grad_norm": 0.08214150369167328, "learning_rate": 1.9875736226210085e-05, "loss": 0.18951514959335328, "step": 186830 }, { "epoch": 0.8021431699337987, "grad_norm": 1.066209077835083, "learning_rate": 1.9871424506092462e-05, "loss": 0.2531846046447754, "step": 186840 }, { "epoch": 0.8021861020238187, "grad_norm": 0.666606068611145, "learning_rate": 1.9867112785974836e-05, "loss": 0.14989641904830933, "step": 186850 }, { "epoch": 0.8022290341138387, "grad_norm": 0.18978695571422577, "learning_rate": 1.9862801065857213e-05, "loss": 0.20271751880645753, "step": 186860 }, { "epoch": 0.8022719662038588, "grad_norm": 0.2546761631965637, "learning_rate": 1.985848934573959e-05, "loss": 0.315608811378479, "step": 186870 }, { "epoch": 0.8023148982938787, "grad_norm": 0.004536745138466358, "learning_rate": 1.9854177625621968e-05, "loss": 0.20478010177612305, "step": 186880 }, { "epoch": 0.8023578303838987, "grad_norm": 0.11876603215932846, "learning_rate": 1.9849865905504342e-05, "loss": 0.03800502419471741, "step": 186890 }, { "epoch": 0.8024007624739188, "grad_norm": 2.519597053527832, "learning_rate": 1.984555418538672e-05, "loss": 0.2677901268005371, "step": 186900 }, { "epoch": 0.8024436945639387, "grad_norm": 0.019811009988188744, "learning_rate": 1.9841242465269093e-05, "loss": 0.27901058197021483, "step": 186910 }, { "epoch": 0.8024866266539588, "grad_norm": 0.23691457509994507, "learning_rate": 1.983693074515147e-05, "loss": 0.24831018447875977, "step": 186920 }, { "epoch": 0.8025295587439788, "grad_norm": 2.062676429748535, "learning_rate": 1.9832619025033848e-05, "loss": 0.18976922035217286, "step": 186930 }, { "epoch": 0.8025724908339987, "grad_norm": 2.7888567447662354, "learning_rate": 1.9828307304916225e-05, "loss": 0.1329728126525879, "step": 186940 }, { "epoch": 0.8026154229240188, "grad_norm": 0.053935494273900986, "learning_rate": 1.98239955847986e-05, "loss": 0.3322454929351807, "step": 186950 }, { "epoch": 0.8026583550140388, "grad_norm": 0.01403512991964817, "learning_rate": 1.9819683864680976e-05, "loss": 0.28524141311645507, "step": 186960 }, { "epoch": 0.8027012871040587, "grad_norm": 1.594042181968689, "learning_rate": 1.981537214456335e-05, "loss": 0.13451719284057617, "step": 186970 }, { "epoch": 0.8027442191940788, "grad_norm": 1.3952689170837402, "learning_rate": 1.9811060424445728e-05, "loss": 0.08733786344528198, "step": 186980 }, { "epoch": 0.8027871512840988, "grad_norm": 0.07780808955430984, "learning_rate": 1.980674870432811e-05, "loss": 0.1476304292678833, "step": 186990 }, { "epoch": 0.8028300833741188, "grad_norm": 1.0146738290786743, "learning_rate": 1.9802436984210482e-05, "loss": 0.11834737062454223, "step": 187000 }, { "epoch": 0.8028300833741188, "eval_loss": 0.3865559697151184, "eval_runtime": 27.422, "eval_samples_per_second": 3.647, "eval_steps_per_second": 3.647, "step": 187000 }, { "epoch": 0.8028730154641388, "grad_norm": 6.436366081237793, "learning_rate": 1.979812526409286e-05, "loss": 0.206487774848938, "step": 187010 }, { "epoch": 0.8029159475541588, "grad_norm": 0.007422825321555138, "learning_rate": 1.9793813543975234e-05, "loss": 0.18670098781585692, "step": 187020 }, { "epoch": 0.8029588796441789, "grad_norm": 0.001183122512884438, "learning_rate": 1.978950182385761e-05, "loss": 0.278668475151062, "step": 187030 }, { "epoch": 0.8030018117341988, "grad_norm": 11.426277160644531, "learning_rate": 1.9785190103739985e-05, "loss": 0.3917843818664551, "step": 187040 }, { "epoch": 0.8030447438242189, "grad_norm": 1.441265344619751, "learning_rate": 1.9780878383622366e-05, "loss": 0.06324902772903443, "step": 187050 }, { "epoch": 0.8030876759142389, "grad_norm": 0.1833934485912323, "learning_rate": 1.977656666350474e-05, "loss": 0.14141279458999634, "step": 187060 }, { "epoch": 0.8031306080042588, "grad_norm": 1.207807183265686, "learning_rate": 1.9772254943387117e-05, "loss": 0.15223703384399415, "step": 187070 }, { "epoch": 0.8031735400942789, "grad_norm": 0.013842697255313396, "learning_rate": 1.976794322326949e-05, "loss": 0.24720118045806885, "step": 187080 }, { "epoch": 0.8032164721842989, "grad_norm": 0.0008404516847804189, "learning_rate": 1.976363150315187e-05, "loss": 0.1951699137687683, "step": 187090 }, { "epoch": 0.8032594042743189, "grad_norm": 1.2827842235565186, "learning_rate": 1.9759319783034246e-05, "loss": 0.2023831605911255, "step": 187100 }, { "epoch": 0.8033023363643389, "grad_norm": 0.00881370808929205, "learning_rate": 1.9755008062916623e-05, "loss": 0.21812007427215577, "step": 187110 }, { "epoch": 0.8033452684543589, "grad_norm": 0.0040892851538956165, "learning_rate": 1.9750696342798997e-05, "loss": 0.13440535068511963, "step": 187120 }, { "epoch": 0.8033882005443789, "grad_norm": 4.08140754699707, "learning_rate": 1.9746384622681374e-05, "loss": 0.1526040554046631, "step": 187130 }, { "epoch": 0.8034311326343989, "grad_norm": 16.14797019958496, "learning_rate": 1.9742072902563748e-05, "loss": 0.19586637020111083, "step": 187140 }, { "epoch": 0.803474064724419, "grad_norm": 3.350125789642334, "learning_rate": 1.9737761182446126e-05, "loss": 0.10762113332748413, "step": 187150 }, { "epoch": 0.8035169968144389, "grad_norm": 0.20307861268520355, "learning_rate": 1.9733449462328503e-05, "loss": 0.1554766535758972, "step": 187160 }, { "epoch": 0.8035599289044589, "grad_norm": 0.5534129738807678, "learning_rate": 1.972913774221088e-05, "loss": 0.20793559551239013, "step": 187170 }, { "epoch": 0.803602860994479, "grad_norm": 0.3242033123970032, "learning_rate": 1.9724826022093254e-05, "loss": 0.2671414852142334, "step": 187180 }, { "epoch": 0.8036457930844989, "grad_norm": 4.204552173614502, "learning_rate": 1.972051430197563e-05, "loss": 0.1226415753364563, "step": 187190 }, { "epoch": 0.803688725174519, "grad_norm": 0.008825673721730709, "learning_rate": 1.9716202581858006e-05, "loss": 0.1894465208053589, "step": 187200 }, { "epoch": 0.803731657264539, "grad_norm": 0.07439620047807693, "learning_rate": 1.9711890861740383e-05, "loss": 0.03914215862751007, "step": 187210 }, { "epoch": 0.8037745893545589, "grad_norm": 0.0796755999326706, "learning_rate": 1.970757914162276e-05, "loss": 0.4778743267059326, "step": 187220 }, { "epoch": 0.803817521444579, "grad_norm": 0.4283967912197113, "learning_rate": 1.9703267421505138e-05, "loss": 0.0604019284248352, "step": 187230 }, { "epoch": 0.803860453534599, "grad_norm": 68.16015625, "learning_rate": 1.969895570138751e-05, "loss": 0.26521375179290774, "step": 187240 }, { "epoch": 0.8039033856246189, "grad_norm": 0.04783042520284653, "learning_rate": 1.969464398126989e-05, "loss": 0.35483057498931886, "step": 187250 }, { "epoch": 0.803946317714639, "grad_norm": 0.005563311744481325, "learning_rate": 1.9690332261152263e-05, "loss": 0.11798173189163208, "step": 187260 }, { "epoch": 0.803989249804659, "grad_norm": 0.029166478663682938, "learning_rate": 1.968602054103464e-05, "loss": 0.021522310376167298, "step": 187270 }, { "epoch": 0.804032181894679, "grad_norm": 0.005799205508083105, "learning_rate": 1.9681708820917018e-05, "loss": 0.297559928894043, "step": 187280 }, { "epoch": 0.804075113984699, "grad_norm": 2.34651517868042, "learning_rate": 1.9677397100799395e-05, "loss": 0.18867183923721315, "step": 187290 }, { "epoch": 0.804118046074719, "grad_norm": 0.002682226477190852, "learning_rate": 1.967308538068177e-05, "loss": 0.18364322185516357, "step": 187300 }, { "epoch": 0.804160978164739, "grad_norm": 0.08684167265892029, "learning_rate": 1.9668773660564146e-05, "loss": 0.21005053520202638, "step": 187310 }, { "epoch": 0.804203910254759, "grad_norm": 0.00973292626440525, "learning_rate": 1.966446194044652e-05, "loss": 0.2818763256072998, "step": 187320 }, { "epoch": 0.8042468423447791, "grad_norm": 0.16216319799423218, "learning_rate": 1.9660150220328897e-05, "loss": 0.3723745346069336, "step": 187330 }, { "epoch": 0.804289774434799, "grad_norm": 4.925543308258057, "learning_rate": 1.9655838500211278e-05, "loss": 0.2016061782836914, "step": 187340 }, { "epoch": 0.804332706524819, "grad_norm": 1.2502108812332153, "learning_rate": 1.9651526780093652e-05, "loss": 0.12285642623901367, "step": 187350 }, { "epoch": 0.8043756386148391, "grad_norm": 1.50983726978302, "learning_rate": 1.964721505997603e-05, "loss": 0.32901337146759035, "step": 187360 }, { "epoch": 0.804418570704859, "grad_norm": 0.02697085589170456, "learning_rate": 1.9642903339858403e-05, "loss": 0.0628973662853241, "step": 187370 }, { "epoch": 0.804461502794879, "grad_norm": 0.12497185170650482, "learning_rate": 1.963859161974078e-05, "loss": 0.022782574594020843, "step": 187380 }, { "epoch": 0.8045044348848991, "grad_norm": 2.2022886276245117, "learning_rate": 1.9634279899623155e-05, "loss": 0.3713336229324341, "step": 187390 }, { "epoch": 0.804547366974919, "grad_norm": 1.3997927904129028, "learning_rate": 1.9629968179505535e-05, "loss": 0.18089509010314941, "step": 187400 }, { "epoch": 0.8045902990649391, "grad_norm": 0.012205325998365879, "learning_rate": 1.962565645938791e-05, "loss": 0.17987641096115112, "step": 187410 }, { "epoch": 0.8046332311549591, "grad_norm": 0.6075366735458374, "learning_rate": 1.9621344739270287e-05, "loss": 0.3212277889251709, "step": 187420 }, { "epoch": 0.804676163244979, "grad_norm": 7.286690711975098, "learning_rate": 1.961703301915266e-05, "loss": 0.29967246055603025, "step": 187430 }, { "epoch": 0.8047190953349991, "grad_norm": 0.0046836817637085915, "learning_rate": 1.9612721299035038e-05, "loss": 0.05317643880844116, "step": 187440 }, { "epoch": 0.8047620274250191, "grad_norm": 0.4176381528377533, "learning_rate": 1.9608409578917415e-05, "loss": 0.23051493167877196, "step": 187450 }, { "epoch": 0.8048049595150392, "grad_norm": 0.037199754267930984, "learning_rate": 1.9604097858799793e-05, "loss": 0.2971806526184082, "step": 187460 }, { "epoch": 0.8048478916050591, "grad_norm": 0.19382435083389282, "learning_rate": 1.9599786138682167e-05, "loss": 0.1703810214996338, "step": 187470 }, { "epoch": 0.8048908236950791, "grad_norm": 0.015945645049214363, "learning_rate": 1.9595474418564544e-05, "loss": 0.18769537210464476, "step": 187480 }, { "epoch": 0.8049337557850992, "grad_norm": 0.009000780060887337, "learning_rate": 1.9591162698446918e-05, "loss": 0.11943295001983642, "step": 187490 }, { "epoch": 0.8049766878751191, "grad_norm": 1.8981505632400513, "learning_rate": 1.9586850978329295e-05, "loss": 0.27698822021484376, "step": 187500 }, { "epoch": 0.8050196199651392, "grad_norm": 0.04114925488829613, "learning_rate": 1.9582539258211673e-05, "loss": 0.16432760953903197, "step": 187510 }, { "epoch": 0.8050625520551592, "grad_norm": 0.04115109518170357, "learning_rate": 1.957822753809405e-05, "loss": 0.14959651231765747, "step": 187520 }, { "epoch": 0.8051054841451791, "grad_norm": 1.73521888256073, "learning_rate": 1.9573915817976424e-05, "loss": 0.11037262678146362, "step": 187530 }, { "epoch": 0.8051484162351992, "grad_norm": 0.05841144919395447, "learning_rate": 1.95696040978588e-05, "loss": 0.19754847288131713, "step": 187540 }, { "epoch": 0.8051913483252192, "grad_norm": 0.0010160811943933368, "learning_rate": 1.9565292377741175e-05, "loss": 0.04744173586368561, "step": 187550 }, { "epoch": 0.8052342804152391, "grad_norm": 0.029853256419301033, "learning_rate": 1.9560980657623553e-05, "loss": 0.19616028070449829, "step": 187560 }, { "epoch": 0.8052772125052592, "grad_norm": 1.63491690158844, "learning_rate": 1.955666893750593e-05, "loss": 0.34844026565551756, "step": 187570 }, { "epoch": 0.8053201445952792, "grad_norm": 0.16325415670871735, "learning_rate": 1.9552357217388307e-05, "loss": 0.09422051906585693, "step": 187580 }, { "epoch": 0.8053630766852992, "grad_norm": 1.076112985610962, "learning_rate": 1.954804549727068e-05, "loss": 0.1710420608520508, "step": 187590 }, { "epoch": 0.8054060087753192, "grad_norm": 1.230614423751831, "learning_rate": 1.954373377715306e-05, "loss": 0.36779584884643557, "step": 187600 }, { "epoch": 0.8054489408653392, "grad_norm": 1.0910813808441162, "learning_rate": 1.9539422057035433e-05, "loss": 0.15935710668563843, "step": 187610 }, { "epoch": 0.8054918729553592, "grad_norm": 0.0033349471632391214, "learning_rate": 1.953511033691781e-05, "loss": 0.2521167755126953, "step": 187620 }, { "epoch": 0.8055348050453792, "grad_norm": 0.13731007277965546, "learning_rate": 1.9530798616800187e-05, "loss": 0.3199592590332031, "step": 187630 }, { "epoch": 0.8055777371353993, "grad_norm": 0.5971689820289612, "learning_rate": 1.9526486896682565e-05, "loss": 0.2866949558258057, "step": 187640 }, { "epoch": 0.8056206692254192, "grad_norm": 0.01081762369722128, "learning_rate": 1.952217517656494e-05, "loss": 0.1536438822746277, "step": 187650 }, { "epoch": 0.8056636013154392, "grad_norm": 0.5095075368881226, "learning_rate": 1.9517863456447316e-05, "loss": 0.14425853490829468, "step": 187660 }, { "epoch": 0.8057065334054593, "grad_norm": 0.009507289156317711, "learning_rate": 1.951355173632969e-05, "loss": 0.028446447849273682, "step": 187670 }, { "epoch": 0.8057494654954792, "grad_norm": 0.30559566617012024, "learning_rate": 1.9509240016212067e-05, "loss": 0.2078967809677124, "step": 187680 }, { "epoch": 0.8057923975854993, "grad_norm": 15.7970609664917, "learning_rate": 1.9504928296094445e-05, "loss": 0.18506253957748414, "step": 187690 }, { "epoch": 0.8058353296755193, "grad_norm": 0.49444466829299927, "learning_rate": 1.9500616575976822e-05, "loss": 0.10527991056442261, "step": 187700 }, { "epoch": 0.8058782617655392, "grad_norm": 1.964054822921753, "learning_rate": 1.94963048558592e-05, "loss": 0.3385364770889282, "step": 187710 }, { "epoch": 0.8059211938555593, "grad_norm": 0.007375969551503658, "learning_rate": 1.9491993135741573e-05, "loss": 0.35996806621551514, "step": 187720 }, { "epoch": 0.8059641259455793, "grad_norm": 0.014366790652275085, "learning_rate": 1.948768141562395e-05, "loss": 0.1376686930656433, "step": 187730 }, { "epoch": 0.8060070580355992, "grad_norm": 0.9235551953315735, "learning_rate": 1.9483369695506324e-05, "loss": 0.08212153911590576, "step": 187740 }, { "epoch": 0.8060499901256193, "grad_norm": 1.7573333978652954, "learning_rate": 1.9479057975388705e-05, "loss": 0.15899146795272828, "step": 187750 }, { "epoch": 0.8060929222156393, "grad_norm": 1.1374258995056152, "learning_rate": 1.947474625527108e-05, "loss": 0.3773771047592163, "step": 187760 }, { "epoch": 0.8061358543056593, "grad_norm": 0.06261592358350754, "learning_rate": 1.9470434535153457e-05, "loss": 0.07383977174758911, "step": 187770 }, { "epoch": 0.8061787863956793, "grad_norm": 1.3004719018936157, "learning_rate": 1.946612281503583e-05, "loss": 0.18768155574798584, "step": 187780 }, { "epoch": 0.8062217184856993, "grad_norm": 0.0006582170026376843, "learning_rate": 1.9461811094918208e-05, "loss": 0.16706053018569947, "step": 187790 }, { "epoch": 0.8062646505757193, "grad_norm": 0.011071824468672276, "learning_rate": 1.9457499374800585e-05, "loss": 0.15217964649200438, "step": 187800 }, { "epoch": 0.8063075826657393, "grad_norm": 0.02180718258023262, "learning_rate": 1.9453187654682963e-05, "loss": 0.011281723529100418, "step": 187810 }, { "epoch": 0.8063505147557594, "grad_norm": 0.1587686687707901, "learning_rate": 1.9448875934565336e-05, "loss": 0.04129020571708679, "step": 187820 }, { "epoch": 0.8063934468457793, "grad_norm": 0.01058216579258442, "learning_rate": 1.9444564214447714e-05, "loss": 0.060017770528793334, "step": 187830 }, { "epoch": 0.8064363789357993, "grad_norm": 0.00888375099748373, "learning_rate": 1.9440252494330088e-05, "loss": 0.1673647165298462, "step": 187840 }, { "epoch": 0.8064793110258194, "grad_norm": 0.00042585397022776306, "learning_rate": 1.9435940774212465e-05, "loss": 0.11510329246520996, "step": 187850 }, { "epoch": 0.8065222431158393, "grad_norm": 0.004985545761883259, "learning_rate": 1.9431629054094842e-05, "loss": 0.20480480194091796, "step": 187860 }, { "epoch": 0.8065651752058594, "grad_norm": 0.010661100968718529, "learning_rate": 1.942731733397722e-05, "loss": 0.11720261573791504, "step": 187870 }, { "epoch": 0.8066081072958794, "grad_norm": 0.09563350677490234, "learning_rate": 1.9423005613859594e-05, "loss": 0.23833897113800048, "step": 187880 }, { "epoch": 0.8066510393858994, "grad_norm": 9.614541053771973, "learning_rate": 1.941869389374197e-05, "loss": 0.15743416547775269, "step": 187890 }, { "epoch": 0.8066939714759194, "grad_norm": 0.007036667782813311, "learning_rate": 1.9414382173624345e-05, "loss": 0.19440302848815919, "step": 187900 }, { "epoch": 0.8067369035659394, "grad_norm": 0.11198631674051285, "learning_rate": 1.9410070453506722e-05, "loss": 0.29474101066589353, "step": 187910 }, { "epoch": 0.8067798356559595, "grad_norm": 1.7913647890090942, "learning_rate": 1.94057587333891e-05, "loss": 0.19286303520202636, "step": 187920 }, { "epoch": 0.8068227677459794, "grad_norm": 0.38198572397232056, "learning_rate": 1.9401447013271477e-05, "loss": 0.114966881275177, "step": 187930 }, { "epoch": 0.8068656998359994, "grad_norm": 1.1205809116363525, "learning_rate": 1.939713529315385e-05, "loss": 0.2618088960647583, "step": 187940 }, { "epoch": 0.8069086319260195, "grad_norm": 0.11646323651075363, "learning_rate": 1.939282357303623e-05, "loss": 0.1016423225402832, "step": 187950 }, { "epoch": 0.8069515640160394, "grad_norm": 0.042885858565568924, "learning_rate": 1.9388511852918602e-05, "loss": 0.22202568054199218, "step": 187960 }, { "epoch": 0.8069944961060594, "grad_norm": 0.2142648547887802, "learning_rate": 1.938420013280098e-05, "loss": 0.09741895794868469, "step": 187970 }, { "epoch": 0.8070374281960795, "grad_norm": 0.0022309094201773405, "learning_rate": 1.9379888412683357e-05, "loss": 0.22670445442199708, "step": 187980 }, { "epoch": 0.8070803602860994, "grad_norm": 0.004053886979818344, "learning_rate": 1.9375576692565734e-05, "loss": 0.3911715269088745, "step": 187990 }, { "epoch": 0.8071232923761195, "grad_norm": 0.003097526729106903, "learning_rate": 1.9371264972448108e-05, "loss": 0.11114219427108765, "step": 188000 }, { "epoch": 0.8071232923761195, "eval_loss": 0.3755682110786438, "eval_runtime": 27.5879, "eval_samples_per_second": 3.625, "eval_steps_per_second": 3.625, "step": 188000 }, { "epoch": 0.8071662244661395, "grad_norm": 1.1802542209625244, "learning_rate": 1.9366953252330486e-05, "loss": 0.1982245683670044, "step": 188010 }, { "epoch": 0.8072091565561594, "grad_norm": 3.8327369689941406, "learning_rate": 1.936264153221286e-05, "loss": 0.18961336612701415, "step": 188020 }, { "epoch": 0.8072520886461795, "grad_norm": 0.017420997843146324, "learning_rate": 1.9358329812095237e-05, "loss": 0.1820637345314026, "step": 188030 }, { "epoch": 0.8072950207361995, "grad_norm": 2.7999138832092285, "learning_rate": 1.9354018091977614e-05, "loss": 0.36874754428863527, "step": 188040 }, { "epoch": 0.8073379528262195, "grad_norm": 8.329748153686523, "learning_rate": 1.934970637185999e-05, "loss": 0.1601884126663208, "step": 188050 }, { "epoch": 0.8073808849162395, "grad_norm": 0.3798222243785858, "learning_rate": 1.9345394651742366e-05, "loss": 0.24167709350585936, "step": 188060 }, { "epoch": 0.8074238170062595, "grad_norm": 2.208631753921509, "learning_rate": 1.9341082931624743e-05, "loss": 0.12863118648529054, "step": 188070 }, { "epoch": 0.8074667490962795, "grad_norm": 0.00045348392450250685, "learning_rate": 1.933677121150712e-05, "loss": 0.3917485952377319, "step": 188080 }, { "epoch": 0.8075096811862995, "grad_norm": 0.968796968460083, "learning_rate": 1.9332459491389494e-05, "loss": 0.11934627294540405, "step": 188090 }, { "epoch": 0.8075526132763196, "grad_norm": 0.0765795037150383, "learning_rate": 1.9328147771271875e-05, "loss": 0.5342819690704346, "step": 188100 }, { "epoch": 0.8075955453663395, "grad_norm": 1.340894341468811, "learning_rate": 1.932383605115425e-05, "loss": 0.33205289840698243, "step": 188110 }, { "epoch": 0.8076384774563595, "grad_norm": 0.06477386504411697, "learning_rate": 1.9319524331036626e-05, "loss": 0.20810203552246093, "step": 188120 }, { "epoch": 0.8076814095463796, "grad_norm": 0.41469255089759827, "learning_rate": 1.9315212610919e-05, "loss": 0.12332984209060668, "step": 188130 }, { "epoch": 0.8077243416363995, "grad_norm": 0.010204517282545567, "learning_rate": 1.9310900890801378e-05, "loss": 0.17447993755340577, "step": 188140 }, { "epoch": 0.8077672737264195, "grad_norm": 3.1295204162597656, "learning_rate": 1.930658917068375e-05, "loss": 0.06377485990524293, "step": 188150 }, { "epoch": 0.8078102058164396, "grad_norm": 10.702325820922852, "learning_rate": 1.9302277450566132e-05, "loss": 0.3309658050537109, "step": 188160 }, { "epoch": 0.8078531379064595, "grad_norm": 0.003194740740582347, "learning_rate": 1.9297965730448506e-05, "loss": 0.22385261058807374, "step": 188170 }, { "epoch": 0.8078960699964796, "grad_norm": 0.018159620463848114, "learning_rate": 1.9293654010330884e-05, "loss": 0.00289649311453104, "step": 188180 }, { "epoch": 0.8079390020864996, "grad_norm": 0.029929209500551224, "learning_rate": 1.9289342290213257e-05, "loss": 0.11803474426269531, "step": 188190 }, { "epoch": 0.8079819341765195, "grad_norm": 0.000580483756493777, "learning_rate": 1.9285030570095635e-05, "loss": 0.13644762039184571, "step": 188200 }, { "epoch": 0.8080248662665396, "grad_norm": 1.9198402166366577, "learning_rate": 1.9280718849978012e-05, "loss": 0.10175681114196777, "step": 188210 }, { "epoch": 0.8080677983565596, "grad_norm": 5.698566913604736, "learning_rate": 1.927640712986039e-05, "loss": 0.15870132446289062, "step": 188220 }, { "epoch": 0.8081107304465796, "grad_norm": 183.16192626953125, "learning_rate": 1.9272095409742763e-05, "loss": 0.22849838733673095, "step": 188230 }, { "epoch": 0.8081536625365996, "grad_norm": 0.007106813136488199, "learning_rate": 1.926778368962514e-05, "loss": 0.21907241344451905, "step": 188240 }, { "epoch": 0.8081965946266196, "grad_norm": 0.0013693399960175157, "learning_rate": 1.9263471969507515e-05, "loss": 0.1918390989303589, "step": 188250 }, { "epoch": 0.8082395267166396, "grad_norm": 0.0004091776499990374, "learning_rate": 1.9259160249389892e-05, "loss": 0.13080551624298095, "step": 188260 }, { "epoch": 0.8082824588066596, "grad_norm": 2.989788055419922, "learning_rate": 1.925484852927227e-05, "loss": 0.20496292114257814, "step": 188270 }, { "epoch": 0.8083253908966797, "grad_norm": 0.13609616458415985, "learning_rate": 1.9250536809154647e-05, "loss": 0.04478162825107575, "step": 188280 }, { "epoch": 0.8083683229866996, "grad_norm": 0.8474799394607544, "learning_rate": 1.924622508903702e-05, "loss": 0.11303468942642211, "step": 188290 }, { "epoch": 0.8084112550767196, "grad_norm": 0.9259027242660522, "learning_rate": 1.9241913368919398e-05, "loss": 0.10569697618484497, "step": 188300 }, { "epoch": 0.8084541871667397, "grad_norm": 0.005927898921072483, "learning_rate": 1.9237601648801772e-05, "loss": 0.16273220777511596, "step": 188310 }, { "epoch": 0.8084971192567597, "grad_norm": 0.05544566735625267, "learning_rate": 1.923328992868415e-05, "loss": 0.23240218162536622, "step": 188320 }, { "epoch": 0.8085400513467796, "grad_norm": 1.6250989437103271, "learning_rate": 1.9228978208566527e-05, "loss": 0.13480372428894044, "step": 188330 }, { "epoch": 0.8085829834367997, "grad_norm": 1.868417739868164, "learning_rate": 1.9224666488448904e-05, "loss": 0.21295087337493895, "step": 188340 }, { "epoch": 0.8086259155268197, "grad_norm": 0.011199853383004665, "learning_rate": 1.9220354768331278e-05, "loss": 0.314319372177124, "step": 188350 }, { "epoch": 0.8086688476168397, "grad_norm": 2.2199318408966064, "learning_rate": 1.9216043048213655e-05, "loss": 0.2382221221923828, "step": 188360 }, { "epoch": 0.8087117797068597, "grad_norm": 4.999961853027344, "learning_rate": 1.921173132809603e-05, "loss": 0.1867772102355957, "step": 188370 }, { "epoch": 0.8087547117968797, "grad_norm": 2.920680046081543, "learning_rate": 1.9207419607978407e-05, "loss": 0.20712087154388428, "step": 188380 }, { "epoch": 0.8087976438868997, "grad_norm": 0.06474506855010986, "learning_rate": 1.9203107887860784e-05, "loss": 0.16149909496307374, "step": 188390 }, { "epoch": 0.8088405759769197, "grad_norm": 1.951294183731079, "learning_rate": 1.919879616774316e-05, "loss": 0.2100539207458496, "step": 188400 }, { "epoch": 0.8088835080669398, "grad_norm": 1.694394826889038, "learning_rate": 1.9194484447625535e-05, "loss": 0.3848715782165527, "step": 188410 }, { "epoch": 0.8089264401569597, "grad_norm": 0.07759755849838257, "learning_rate": 1.9190172727507913e-05, "loss": 0.10030233860015869, "step": 188420 }, { "epoch": 0.8089693722469797, "grad_norm": 1.2198158502578735, "learning_rate": 1.918586100739029e-05, "loss": 0.20859754085540771, "step": 188430 }, { "epoch": 0.8090123043369998, "grad_norm": 6.012630462646484, "learning_rate": 1.9181549287272664e-05, "loss": 0.16353113651275636, "step": 188440 }, { "epoch": 0.8090552364270197, "grad_norm": 0.10439054667949677, "learning_rate": 1.9177237567155045e-05, "loss": 0.24715774059295653, "step": 188450 }, { "epoch": 0.8090981685170398, "grad_norm": 16.584360122680664, "learning_rate": 1.917292584703742e-05, "loss": 0.26418266296386717, "step": 188460 }, { "epoch": 0.8091411006070598, "grad_norm": 0.0019134439062327147, "learning_rate": 1.9168614126919796e-05, "loss": 0.10769253969192505, "step": 188470 }, { "epoch": 0.8091840326970797, "grad_norm": 0.37607043981552124, "learning_rate": 1.916430240680217e-05, "loss": 0.11277778148651123, "step": 188480 }, { "epoch": 0.8092269647870998, "grad_norm": 0.0044091567397117615, "learning_rate": 1.9159990686684547e-05, "loss": 0.12072253227233887, "step": 188490 }, { "epoch": 0.8092698968771198, "grad_norm": 0.018574392423033714, "learning_rate": 1.915567896656692e-05, "loss": 0.2124023199081421, "step": 188500 }, { "epoch": 0.8093128289671397, "grad_norm": 0.0848187655210495, "learning_rate": 1.9151367246449302e-05, "loss": 0.22679450511932372, "step": 188510 }, { "epoch": 0.8093557610571598, "grad_norm": 0.003953431732952595, "learning_rate": 1.9147055526331676e-05, "loss": 0.03668028116226196, "step": 188520 }, { "epoch": 0.8093986931471798, "grad_norm": 0.130598783493042, "learning_rate": 1.9142743806214053e-05, "loss": 0.3036438226699829, "step": 188530 }, { "epoch": 0.8094416252371998, "grad_norm": 0.002816180931404233, "learning_rate": 1.9138432086096427e-05, "loss": 0.1459151029586792, "step": 188540 }, { "epoch": 0.8094845573272198, "grad_norm": 0.04123365506529808, "learning_rate": 1.9134120365978805e-05, "loss": 0.12174561023712158, "step": 188550 }, { "epoch": 0.8095274894172398, "grad_norm": 3.4493002891540527, "learning_rate": 1.9129808645861182e-05, "loss": 0.2582594156265259, "step": 188560 }, { "epoch": 0.8095704215072598, "grad_norm": 3.131918430328369, "learning_rate": 1.912549692574356e-05, "loss": 0.19951019287109376, "step": 188570 }, { "epoch": 0.8096133535972798, "grad_norm": 0.002401916077360511, "learning_rate": 1.9121185205625933e-05, "loss": 0.1220403790473938, "step": 188580 }, { "epoch": 0.8096562856872999, "grad_norm": 0.11982887238264084, "learning_rate": 1.911687348550831e-05, "loss": 0.10341262817382812, "step": 188590 }, { "epoch": 0.8096992177773198, "grad_norm": 4.427649974822998, "learning_rate": 1.9112561765390685e-05, "loss": 0.267809534072876, "step": 188600 }, { "epoch": 0.8097421498673398, "grad_norm": 0.36299610137939453, "learning_rate": 1.9108250045273062e-05, "loss": 0.07970675826072693, "step": 188610 }, { "epoch": 0.8097850819573599, "grad_norm": 3.0310583114624023, "learning_rate": 1.910393832515544e-05, "loss": 0.28852736949920654, "step": 188620 }, { "epoch": 0.8098280140473798, "grad_norm": 0.010732367634773254, "learning_rate": 1.9099626605037817e-05, "loss": 0.14984482526779175, "step": 188630 }, { "epoch": 0.8098709461373998, "grad_norm": 0.003036505077034235, "learning_rate": 1.909531488492019e-05, "loss": 0.1123344898223877, "step": 188640 }, { "epoch": 0.8099138782274199, "grad_norm": 0.003406435251235962, "learning_rate": 1.9091003164802568e-05, "loss": 0.17016907930374145, "step": 188650 }, { "epoch": 0.8099568103174398, "grad_norm": 0.07481379061937332, "learning_rate": 1.9086691444684942e-05, "loss": 0.0838213324546814, "step": 188660 }, { "epoch": 0.8099997424074599, "grad_norm": 0.09323481470346451, "learning_rate": 1.908237972456732e-05, "loss": 0.019658631086349486, "step": 188670 }, { "epoch": 0.8100426744974799, "grad_norm": 0.0037671183235943317, "learning_rate": 1.9078068004449696e-05, "loss": 0.047585475444793704, "step": 188680 }, { "epoch": 0.8100856065874998, "grad_norm": 0.024669989943504333, "learning_rate": 1.9073756284332074e-05, "loss": 0.170916748046875, "step": 188690 }, { "epoch": 0.8101285386775199, "grad_norm": 7.089773654937744, "learning_rate": 1.9069444564214448e-05, "loss": 0.1541322350502014, "step": 188700 }, { "epoch": 0.8101714707675399, "grad_norm": 1.925876259803772, "learning_rate": 1.9065132844096825e-05, "loss": 0.3852232933044434, "step": 188710 }, { "epoch": 0.8102144028575599, "grad_norm": 0.03101220354437828, "learning_rate": 1.90608211239792e-05, "loss": 0.056364941596984866, "step": 188720 }, { "epoch": 0.8102573349475799, "grad_norm": 1.4782829284667969, "learning_rate": 1.9056509403861576e-05, "loss": 0.11494901180267333, "step": 188730 }, { "epoch": 0.8103002670375999, "grad_norm": 0.12165243923664093, "learning_rate": 1.9052197683743954e-05, "loss": 0.12389819622039795, "step": 188740 }, { "epoch": 0.81034319912762, "grad_norm": 4.626972675323486, "learning_rate": 1.904788596362633e-05, "loss": 0.124202561378479, "step": 188750 }, { "epoch": 0.8103861312176399, "grad_norm": 1.9608211517333984, "learning_rate": 1.9043574243508705e-05, "loss": 0.28276519775390624, "step": 188760 }, { "epoch": 0.81042906330766, "grad_norm": 0.014439301565289497, "learning_rate": 1.9039262523391082e-05, "loss": 0.3626107931137085, "step": 188770 }, { "epoch": 0.81047199539768, "grad_norm": 0.04803458973765373, "learning_rate": 1.9034950803273456e-05, "loss": 0.16060223579406738, "step": 188780 }, { "epoch": 0.8105149274876999, "grad_norm": 0.6107069849967957, "learning_rate": 1.9030639083155834e-05, "loss": 0.10360321998596192, "step": 188790 }, { "epoch": 0.81055785957772, "grad_norm": 0.05219374597072601, "learning_rate": 1.902632736303821e-05, "loss": 0.1694638252258301, "step": 188800 }, { "epoch": 0.81060079166774, "grad_norm": 0.4738638997077942, "learning_rate": 1.902201564292059e-05, "loss": 0.21596572399139405, "step": 188810 }, { "epoch": 0.81064372375776, "grad_norm": 0.43081095814704895, "learning_rate": 1.9017703922802966e-05, "loss": 0.1942771077156067, "step": 188820 }, { "epoch": 0.81068665584778, "grad_norm": 0.016578922048211098, "learning_rate": 1.901339220268534e-05, "loss": 0.1609804630279541, "step": 188830 }, { "epoch": 0.8107295879378, "grad_norm": 3.316399574279785, "learning_rate": 1.9009080482567717e-05, "loss": 0.16462740898132325, "step": 188840 }, { "epoch": 0.81077252002782, "grad_norm": 3.450151205062866, "learning_rate": 1.900476876245009e-05, "loss": 0.34598455429077146, "step": 188850 }, { "epoch": 0.81081545211784, "grad_norm": 0.0006724316626787186, "learning_rate": 1.9000457042332472e-05, "loss": 0.1317456841468811, "step": 188860 }, { "epoch": 0.81085838420786, "grad_norm": 0.0004210352199152112, "learning_rate": 1.8996145322214846e-05, "loss": 0.0758798897266388, "step": 188870 }, { "epoch": 0.81090131629788, "grad_norm": 0.001141284592449665, "learning_rate": 1.8991833602097223e-05, "loss": 0.1845982313156128, "step": 188880 }, { "epoch": 0.8109442483879, "grad_norm": 0.002732800552621484, "learning_rate": 1.8987521881979597e-05, "loss": 0.2241126298904419, "step": 188890 }, { "epoch": 0.8109871804779201, "grad_norm": 1.1904443502426147, "learning_rate": 1.8983210161861974e-05, "loss": 0.4464689254760742, "step": 188900 }, { "epoch": 0.81103011256794, "grad_norm": 0.06258974969387054, "learning_rate": 1.8978898441744348e-05, "loss": 0.06665477752685547, "step": 188910 }, { "epoch": 0.81107304465796, "grad_norm": 1.2643054723739624, "learning_rate": 1.897458672162673e-05, "loss": 0.18965506553649902, "step": 188920 }, { "epoch": 0.8111159767479801, "grad_norm": 0.11003031581640244, "learning_rate": 1.8970275001509103e-05, "loss": 0.0030824728310108185, "step": 188930 }, { "epoch": 0.811158908838, "grad_norm": 0.01650061085820198, "learning_rate": 1.896596328139148e-05, "loss": 0.16442786455154418, "step": 188940 }, { "epoch": 0.8112018409280201, "grad_norm": 0.006386617664247751, "learning_rate": 1.8961651561273854e-05, "loss": 0.18398728370666503, "step": 188950 }, { "epoch": 0.8112447730180401, "grad_norm": 2.380122661590576, "learning_rate": 1.895733984115623e-05, "loss": 0.14523251056671144, "step": 188960 }, { "epoch": 0.81128770510806, "grad_norm": 0.021122131496667862, "learning_rate": 1.895302812103861e-05, "loss": 0.22241947650909424, "step": 188970 }, { "epoch": 0.8113306371980801, "grad_norm": 4.08834171295166, "learning_rate": 1.8948716400920986e-05, "loss": 0.44264769554138184, "step": 188980 }, { "epoch": 0.8113735692881001, "grad_norm": 0.006722049321979284, "learning_rate": 1.894440468080336e-05, "loss": 0.298035717010498, "step": 188990 }, { "epoch": 0.81141650137812, "grad_norm": 1.6981254816055298, "learning_rate": 1.8940092960685738e-05, "loss": 0.14734795093536376, "step": 189000 }, { "epoch": 0.81141650137812, "eval_loss": 0.38009634613990784, "eval_runtime": 27.5037, "eval_samples_per_second": 3.636, "eval_steps_per_second": 3.636, "step": 189000 }, { "epoch": 0.8114594334681401, "grad_norm": 0.4784759283065796, "learning_rate": 1.893578124056811e-05, "loss": 0.2854416608810425, "step": 189010 }, { "epoch": 0.8115023655581601, "grad_norm": 1.5554957389831543, "learning_rate": 1.893146952045049e-05, "loss": 0.32027492523193357, "step": 189020 }, { "epoch": 0.8115452976481801, "grad_norm": 1.664131999015808, "learning_rate": 1.8927157800332866e-05, "loss": 0.16618586778640748, "step": 189030 }, { "epoch": 0.8115882297382001, "grad_norm": 0.34115976095199585, "learning_rate": 1.8922846080215244e-05, "loss": 0.15978493690490722, "step": 189040 }, { "epoch": 0.8116311618282201, "grad_norm": 1.0413432121276855, "learning_rate": 1.8918534360097618e-05, "loss": 0.26266343593597413, "step": 189050 }, { "epoch": 0.8116740939182401, "grad_norm": 0.5915712714195251, "learning_rate": 1.8914222639979995e-05, "loss": 0.0370695948600769, "step": 189060 }, { "epoch": 0.8117170260082601, "grad_norm": 0.01841799169778824, "learning_rate": 1.890991091986237e-05, "loss": 0.11278891563415527, "step": 189070 }, { "epoch": 0.8117599580982802, "grad_norm": 6.7392964363098145, "learning_rate": 1.8905599199744746e-05, "loss": 0.45865182876586913, "step": 189080 }, { "epoch": 0.8118028901883001, "grad_norm": 1.3006116151809692, "learning_rate": 1.8901287479627123e-05, "loss": 0.14712069034576417, "step": 189090 }, { "epoch": 0.8118458222783201, "grad_norm": 0.007229759357869625, "learning_rate": 1.88969757595095e-05, "loss": 0.07043436765670777, "step": 189100 }, { "epoch": 0.8118887543683402, "grad_norm": 0.012855586595833302, "learning_rate": 1.8892664039391875e-05, "loss": 0.13676822185516357, "step": 189110 }, { "epoch": 0.8119316864583601, "grad_norm": 8.256521224975586, "learning_rate": 1.8888352319274252e-05, "loss": 0.151786732673645, "step": 189120 }, { "epoch": 0.8119746185483802, "grad_norm": 0.0016009919345378876, "learning_rate": 1.8884040599156626e-05, "loss": 0.22647907733917236, "step": 189130 }, { "epoch": 0.8120175506384002, "grad_norm": 0.039503589272499084, "learning_rate": 1.8879728879039003e-05, "loss": 0.2703474283218384, "step": 189140 }, { "epoch": 0.8120604827284201, "grad_norm": 0.0002003997069550678, "learning_rate": 1.887541715892138e-05, "loss": 0.2675285577774048, "step": 189150 }, { "epoch": 0.8121034148184402, "grad_norm": 0.0024399063549935818, "learning_rate": 1.8871105438803758e-05, "loss": 0.10987521409988403, "step": 189160 }, { "epoch": 0.8121463469084602, "grad_norm": 9.465962409973145, "learning_rate": 1.8866793718686135e-05, "loss": 0.11177196502685546, "step": 189170 }, { "epoch": 0.8121892789984803, "grad_norm": 0.016127515584230423, "learning_rate": 1.886248199856851e-05, "loss": 0.1671124815940857, "step": 189180 }, { "epoch": 0.8122322110885002, "grad_norm": 0.6212892532348633, "learning_rate": 1.8858170278450887e-05, "loss": 0.2125856876373291, "step": 189190 }, { "epoch": 0.8122751431785202, "grad_norm": 0.16576534509658813, "learning_rate": 1.885385855833326e-05, "loss": 0.19760308265686036, "step": 189200 }, { "epoch": 0.8123180752685403, "grad_norm": 0.008632770739495754, "learning_rate": 1.884954683821564e-05, "loss": 0.17998934984207154, "step": 189210 }, { "epoch": 0.8123610073585602, "grad_norm": 0.004760857205837965, "learning_rate": 1.8845235118098015e-05, "loss": 0.10525168180465698, "step": 189220 }, { "epoch": 0.8124039394485802, "grad_norm": 7.089337348937988, "learning_rate": 1.8840923397980393e-05, "loss": 0.0909703254699707, "step": 189230 }, { "epoch": 0.8124468715386003, "grad_norm": 0.01754256896674633, "learning_rate": 1.8836611677862767e-05, "loss": 0.21712646484375, "step": 189240 }, { "epoch": 0.8124898036286202, "grad_norm": 1.2798113822937012, "learning_rate": 1.8832299957745144e-05, "loss": 0.2140897274017334, "step": 189250 }, { "epoch": 0.8125327357186403, "grad_norm": 25.59235191345215, "learning_rate": 1.8827988237627518e-05, "loss": 0.2592212915420532, "step": 189260 }, { "epoch": 0.8125756678086603, "grad_norm": 1.0722297430038452, "learning_rate": 1.88236765175099e-05, "loss": 0.20234456062316894, "step": 189270 }, { "epoch": 0.8126185998986802, "grad_norm": 0.06800615787506104, "learning_rate": 1.8819364797392273e-05, "loss": 0.17045010328292848, "step": 189280 }, { "epoch": 0.8126615319887003, "grad_norm": 1.14657723903656, "learning_rate": 1.881505307727465e-05, "loss": 0.167192280292511, "step": 189290 }, { "epoch": 0.8127044640787203, "grad_norm": 31.580127716064453, "learning_rate": 1.8810741357157024e-05, "loss": 0.2977290153503418, "step": 189300 }, { "epoch": 0.8127473961687403, "grad_norm": 2.1737892627716064, "learning_rate": 1.88064296370394e-05, "loss": 0.2161936044692993, "step": 189310 }, { "epoch": 0.8127903282587603, "grad_norm": 0.0015936638228595257, "learning_rate": 1.880211791692178e-05, "loss": 0.08559054732322693, "step": 189320 }, { "epoch": 0.8128332603487803, "grad_norm": 0.04431832209229469, "learning_rate": 1.8797806196804156e-05, "loss": 0.2089853048324585, "step": 189330 }, { "epoch": 0.8128761924388003, "grad_norm": 0.013524886220693588, "learning_rate": 1.879349447668653e-05, "loss": 0.10106925964355469, "step": 189340 }, { "epoch": 0.8129191245288203, "grad_norm": 0.21533319354057312, "learning_rate": 1.8789182756568907e-05, "loss": 0.1647628664970398, "step": 189350 }, { "epoch": 0.8129620566188404, "grad_norm": 0.007309638429433107, "learning_rate": 1.878487103645128e-05, "loss": 0.280776047706604, "step": 189360 }, { "epoch": 0.8130049887088603, "grad_norm": 0.030792390927672386, "learning_rate": 1.878055931633366e-05, "loss": 0.23691213130950928, "step": 189370 }, { "epoch": 0.8130479207988803, "grad_norm": 0.0014829770661890507, "learning_rate": 1.8776247596216036e-05, "loss": 0.06938903331756592, "step": 189380 }, { "epoch": 0.8130908528889004, "grad_norm": 1.4969149827957153, "learning_rate": 1.8771935876098413e-05, "loss": 0.23304481506347657, "step": 189390 }, { "epoch": 0.8131337849789203, "grad_norm": 5.537669658660889, "learning_rate": 1.8767624155980787e-05, "loss": 0.15186458826065063, "step": 189400 }, { "epoch": 0.8131767170689403, "grad_norm": 1.8411829471588135, "learning_rate": 1.8763312435863165e-05, "loss": 0.14897530078887938, "step": 189410 }, { "epoch": 0.8132196491589604, "grad_norm": 0.00035322734038345516, "learning_rate": 1.875900071574554e-05, "loss": 0.257629919052124, "step": 189420 }, { "epoch": 0.8132625812489803, "grad_norm": 0.004576101899147034, "learning_rate": 1.8754688995627916e-05, "loss": 0.1692986011505127, "step": 189430 }, { "epoch": 0.8133055133390004, "grad_norm": 1.1908814907073975, "learning_rate": 1.8750377275510293e-05, "loss": 0.1332242012023926, "step": 189440 }, { "epoch": 0.8133484454290204, "grad_norm": 1.6177889108657837, "learning_rate": 1.874606555539267e-05, "loss": 0.23381438255310058, "step": 189450 }, { "epoch": 0.8133913775190403, "grad_norm": 0.0006076296558603644, "learning_rate": 1.8741753835275045e-05, "loss": 0.35434651374816895, "step": 189460 }, { "epoch": 0.8134343096090604, "grad_norm": 0.008793370798230171, "learning_rate": 1.8737442115157422e-05, "loss": 0.2348698139190674, "step": 189470 }, { "epoch": 0.8134772416990804, "grad_norm": 0.10453282296657562, "learning_rate": 1.8733130395039796e-05, "loss": 0.16681669950485228, "step": 189480 }, { "epoch": 0.8135201737891004, "grad_norm": 0.01704632118344307, "learning_rate": 1.8728818674922173e-05, "loss": 0.23047642707824706, "step": 189490 }, { "epoch": 0.8135631058791204, "grad_norm": 0.005977040156722069, "learning_rate": 1.872450695480455e-05, "loss": 0.0015470744110643864, "step": 189500 }, { "epoch": 0.8136060379691404, "grad_norm": 0.014360605739057064, "learning_rate": 1.8720195234686928e-05, "loss": 0.03445011377334595, "step": 189510 }, { "epoch": 0.8136489700591604, "grad_norm": 0.002405626932159066, "learning_rate": 1.8715883514569305e-05, "loss": 0.06739037036895752, "step": 189520 }, { "epoch": 0.8136919021491804, "grad_norm": 0.00946758408099413, "learning_rate": 1.871157179445168e-05, "loss": 0.026663467288017273, "step": 189530 }, { "epoch": 0.8137348342392005, "grad_norm": 0.0020533869974315166, "learning_rate": 1.8707260074334056e-05, "loss": 0.10337278842926026, "step": 189540 }, { "epoch": 0.8137777663292204, "grad_norm": 1.2318427562713623, "learning_rate": 1.870294835421643e-05, "loss": 0.17301219701766968, "step": 189550 }, { "epoch": 0.8138206984192404, "grad_norm": 0.0025602129753679037, "learning_rate": 1.869863663409881e-05, "loss": 0.2267094373703003, "step": 189560 }, { "epoch": 0.8138636305092605, "grad_norm": 8.636144638061523, "learning_rate": 1.8694324913981185e-05, "loss": 0.6510757446289063, "step": 189570 }, { "epoch": 0.8139065625992804, "grad_norm": 16.425983428955078, "learning_rate": 1.8690013193863562e-05, "loss": 0.23902950286865235, "step": 189580 }, { "epoch": 0.8139494946893004, "grad_norm": 0.005090423859655857, "learning_rate": 1.8685701473745936e-05, "loss": 0.38922483921051027, "step": 189590 }, { "epoch": 0.8139924267793205, "grad_norm": 3.3574795722961426, "learning_rate": 1.8681389753628314e-05, "loss": 0.33330717086791994, "step": 189600 }, { "epoch": 0.8140353588693405, "grad_norm": 0.035569801926612854, "learning_rate": 1.8677078033510688e-05, "loss": 0.2678635835647583, "step": 189610 }, { "epoch": 0.8140782909593605, "grad_norm": 0.08557265251874924, "learning_rate": 1.867276631339307e-05, "loss": 0.10242055654525757, "step": 189620 }, { "epoch": 0.8141212230493805, "grad_norm": 6.003871440887451, "learning_rate": 1.8668454593275442e-05, "loss": 0.20186066627502441, "step": 189630 }, { "epoch": 0.8141641551394005, "grad_norm": 0.0007279837154783309, "learning_rate": 1.866414287315782e-05, "loss": 0.3290987730026245, "step": 189640 }, { "epoch": 0.8142070872294205, "grad_norm": 0.07561841607093811, "learning_rate": 1.8659831153040194e-05, "loss": 0.2522317409515381, "step": 189650 }, { "epoch": 0.8142500193194405, "grad_norm": 1.2994825839996338, "learning_rate": 1.865551943292257e-05, "loss": 0.13057563304901124, "step": 189660 }, { "epoch": 0.8142929514094606, "grad_norm": 1.358086347579956, "learning_rate": 1.865120771280495e-05, "loss": 0.27964789867401124, "step": 189670 }, { "epoch": 0.8143358834994805, "grad_norm": 2.5652987957000732, "learning_rate": 1.8646895992687326e-05, "loss": 0.4078275203704834, "step": 189680 }, { "epoch": 0.8143788155895005, "grad_norm": 1.2036330699920654, "learning_rate": 1.86425842725697e-05, "loss": 0.3373491048812866, "step": 189690 }, { "epoch": 0.8144217476795206, "grad_norm": 0.638670027256012, "learning_rate": 1.8638272552452077e-05, "loss": 0.10838404893875123, "step": 189700 }, { "epoch": 0.8144646797695405, "grad_norm": 2.393080472946167, "learning_rate": 1.863396083233445e-05, "loss": 0.23193233013153075, "step": 189710 }, { "epoch": 0.8145076118595606, "grad_norm": 0.7426817417144775, "learning_rate": 1.862964911221683e-05, "loss": 0.148024320602417, "step": 189720 }, { "epoch": 0.8145505439495806, "grad_norm": 0.6907213926315308, "learning_rate": 1.8625337392099206e-05, "loss": 0.13800256252288817, "step": 189730 }, { "epoch": 0.8145934760396005, "grad_norm": 2.134216547012329, "learning_rate": 1.8621025671981583e-05, "loss": 0.16400604248046874, "step": 189740 }, { "epoch": 0.8146364081296206, "grad_norm": 0.004457848146557808, "learning_rate": 1.8616713951863957e-05, "loss": 0.01569632887840271, "step": 189750 }, { "epoch": 0.8146793402196406, "grad_norm": 0.025374725461006165, "learning_rate": 1.8612402231746334e-05, "loss": 0.11625961065292359, "step": 189760 }, { "epoch": 0.8147222723096605, "grad_norm": 6.314393043518066, "learning_rate": 1.8608090511628708e-05, "loss": 0.3577938795089722, "step": 189770 }, { "epoch": 0.8147652043996806, "grad_norm": 0.018094485625624657, "learning_rate": 1.8603778791511086e-05, "loss": 0.19390039443969725, "step": 189780 }, { "epoch": 0.8148081364897006, "grad_norm": 0.022799383848905563, "learning_rate": 1.8599467071393463e-05, "loss": 0.1557063341140747, "step": 189790 }, { "epoch": 0.8148510685797206, "grad_norm": 0.0029004893731325865, "learning_rate": 1.859515535127584e-05, "loss": 0.024601130187511443, "step": 189800 }, { "epoch": 0.8148940006697406, "grad_norm": 0.009202632121741772, "learning_rate": 1.8590843631158214e-05, "loss": 0.27263336181640624, "step": 189810 }, { "epoch": 0.8149369327597606, "grad_norm": 0.00577400391921401, "learning_rate": 1.858653191104059e-05, "loss": 0.04384286105632782, "step": 189820 }, { "epoch": 0.8149798648497806, "grad_norm": 0.00864651519805193, "learning_rate": 1.8582220190922966e-05, "loss": 0.16352792978286743, "step": 189830 }, { "epoch": 0.8150227969398006, "grad_norm": 0.06453585624694824, "learning_rate": 1.8577908470805343e-05, "loss": 0.25619847774505616, "step": 189840 }, { "epoch": 0.8150657290298207, "grad_norm": 0.8390404582023621, "learning_rate": 1.857359675068772e-05, "loss": 0.1784263014793396, "step": 189850 }, { "epoch": 0.8151086611198406, "grad_norm": 1.5671316385269165, "learning_rate": 1.8569285030570098e-05, "loss": 0.1716221332550049, "step": 189860 }, { "epoch": 0.8151515932098606, "grad_norm": 0.026777099817991257, "learning_rate": 1.856497331045247e-05, "loss": 0.14569276571273804, "step": 189870 }, { "epoch": 0.8151945252998807, "grad_norm": 0.06430796533823013, "learning_rate": 1.856066159033485e-05, "loss": 0.15251225233078003, "step": 189880 }, { "epoch": 0.8152374573899006, "grad_norm": 2.121525764465332, "learning_rate": 1.8556349870217226e-05, "loss": 0.27808551788330077, "step": 189890 }, { "epoch": 0.8152803894799207, "grad_norm": 0.003509877948090434, "learning_rate": 1.85520381500996e-05, "loss": 0.18043378591537476, "step": 189900 }, { "epoch": 0.8153233215699407, "grad_norm": 1.6782100200653076, "learning_rate": 1.8547726429981978e-05, "loss": 0.2832813262939453, "step": 189910 }, { "epoch": 0.8153662536599606, "grad_norm": 0.006824735086411238, "learning_rate": 1.8543414709864355e-05, "loss": 0.35706157684326173, "step": 189920 }, { "epoch": 0.8154091857499807, "grad_norm": 0.0013534731697291136, "learning_rate": 1.8539102989746732e-05, "loss": 0.08758707642555237, "step": 189930 }, { "epoch": 0.8154521178400007, "grad_norm": 1.16303551197052, "learning_rate": 1.8534791269629106e-05, "loss": 0.41260361671447754, "step": 189940 }, { "epoch": 0.8154950499300206, "grad_norm": 0.008056594990193844, "learning_rate": 1.8530479549511483e-05, "loss": 0.2539925813674927, "step": 189950 }, { "epoch": 0.8155379820200407, "grad_norm": 2.1018102169036865, "learning_rate": 1.8526167829393857e-05, "loss": 0.25430138111114503, "step": 189960 }, { "epoch": 0.8155809141100607, "grad_norm": 2.4627227783203125, "learning_rate": 1.8521856109276238e-05, "loss": 0.29794859886169434, "step": 189970 }, { "epoch": 0.8156238462000807, "grad_norm": 0.7906169295310974, "learning_rate": 1.8517544389158612e-05, "loss": 0.17653955221176149, "step": 189980 }, { "epoch": 0.8156667782901007, "grad_norm": 0.1337890774011612, "learning_rate": 1.851323266904099e-05, "loss": 0.17861900329589844, "step": 189990 }, { "epoch": 0.8157097103801207, "grad_norm": 0.7123793363571167, "learning_rate": 1.8508920948923363e-05, "loss": 0.22133033275604247, "step": 190000 }, { "epoch": 0.8157097103801207, "eval_loss": 0.3748077154159546, "eval_runtime": 27.4732, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 190000 }, { "epoch": 0.8157526424701407, "grad_norm": 0.008327401243150234, "learning_rate": 1.850460922880574e-05, "loss": 0.14534246921539307, "step": 190010 }, { "epoch": 0.8157955745601607, "grad_norm": 0.032320182770490646, "learning_rate": 1.8500297508688115e-05, "loss": 0.28674242496490476, "step": 190020 }, { "epoch": 0.8158385066501808, "grad_norm": 0.01738484762609005, "learning_rate": 1.8495985788570495e-05, "loss": 0.03833106756210327, "step": 190030 }, { "epoch": 0.8158814387402008, "grad_norm": 0.005880672950297594, "learning_rate": 1.849167406845287e-05, "loss": 0.04911441206932068, "step": 190040 }, { "epoch": 0.8159243708302207, "grad_norm": 0.037115778774023056, "learning_rate": 1.8487362348335247e-05, "loss": 0.2223417043685913, "step": 190050 }, { "epoch": 0.8159673029202408, "grad_norm": 0.9087185859680176, "learning_rate": 1.848305062821762e-05, "loss": 0.12857097387313843, "step": 190060 }, { "epoch": 0.8160102350102608, "grad_norm": 0.000869360170327127, "learning_rate": 1.8478738908099998e-05, "loss": 0.22225430011749267, "step": 190070 }, { "epoch": 0.8160531671002808, "grad_norm": 0.011036857031285763, "learning_rate": 1.8474427187982375e-05, "loss": 0.1313633918762207, "step": 190080 }, { "epoch": 0.8160960991903008, "grad_norm": 0.19269496202468872, "learning_rate": 1.8470115467864753e-05, "loss": 0.038611036539077756, "step": 190090 }, { "epoch": 0.8161390312803208, "grad_norm": 1.4396089315414429, "learning_rate": 1.8465803747747127e-05, "loss": 0.14506794214248658, "step": 190100 }, { "epoch": 0.8161819633703408, "grad_norm": 0.0028870003297924995, "learning_rate": 1.8461492027629504e-05, "loss": 0.13069781064987182, "step": 190110 }, { "epoch": 0.8162248954603608, "grad_norm": 1.1406079530715942, "learning_rate": 1.8457180307511878e-05, "loss": 0.4691906929016113, "step": 190120 }, { "epoch": 0.8162678275503809, "grad_norm": 0.00298871798440814, "learning_rate": 1.8452868587394255e-05, "loss": 0.12180900573730469, "step": 190130 }, { "epoch": 0.8163107596404008, "grad_norm": 0.05643042176961899, "learning_rate": 1.8448556867276633e-05, "loss": 0.26197865009307864, "step": 190140 }, { "epoch": 0.8163536917304208, "grad_norm": 0.010706533677875996, "learning_rate": 1.844424514715901e-05, "loss": 0.05873526334762573, "step": 190150 }, { "epoch": 0.8163966238204409, "grad_norm": 2.701686143875122, "learning_rate": 1.8439933427041384e-05, "loss": 0.09920306205749511, "step": 190160 }, { "epoch": 0.8164395559104608, "grad_norm": 0.010354182682931423, "learning_rate": 1.843562170692376e-05, "loss": 0.23539597988128663, "step": 190170 }, { "epoch": 0.8164824880004808, "grad_norm": 3.4639413356781006, "learning_rate": 1.8431309986806135e-05, "loss": 0.2738173007965088, "step": 190180 }, { "epoch": 0.8165254200905009, "grad_norm": 0.8891342878341675, "learning_rate": 1.8426998266688513e-05, "loss": 0.0747622311115265, "step": 190190 }, { "epoch": 0.8165683521805208, "grad_norm": 3.6724092960357666, "learning_rate": 1.842268654657089e-05, "loss": 0.3093080520629883, "step": 190200 }, { "epoch": 0.8166112842705409, "grad_norm": 2.065101146697998, "learning_rate": 1.8418374826453267e-05, "loss": 0.24554228782653809, "step": 190210 }, { "epoch": 0.8166542163605609, "grad_norm": 0.7292916178703308, "learning_rate": 1.841406310633564e-05, "loss": 0.28256423473358155, "step": 190220 }, { "epoch": 0.8166971484505808, "grad_norm": 0.032051555812358856, "learning_rate": 1.840975138621802e-05, "loss": 0.16433157920837402, "step": 190230 }, { "epoch": 0.8167400805406009, "grad_norm": 0.0009634624002501369, "learning_rate": 1.8405439666100393e-05, "loss": 0.1607471823692322, "step": 190240 }, { "epoch": 0.8167830126306209, "grad_norm": 0.1358780860900879, "learning_rate": 1.840112794598277e-05, "loss": 0.19506868124008178, "step": 190250 }, { "epoch": 0.8168259447206409, "grad_norm": 0.05440731719136238, "learning_rate": 1.8396816225865147e-05, "loss": 0.10299488306045532, "step": 190260 }, { "epoch": 0.8168688768106609, "grad_norm": 0.0032140351831912994, "learning_rate": 1.8392504505747525e-05, "loss": 0.18394207954406738, "step": 190270 }, { "epoch": 0.8169118089006809, "grad_norm": 0.058177292346954346, "learning_rate": 1.8388192785629902e-05, "loss": 0.1681948184967041, "step": 190280 }, { "epoch": 0.8169547409907009, "grad_norm": 0.0030108222272247076, "learning_rate": 1.8383881065512276e-05, "loss": 0.2944432258605957, "step": 190290 }, { "epoch": 0.8169976730807209, "grad_norm": 0.32893991470336914, "learning_rate": 1.8379569345394653e-05, "loss": 0.10108593702316285, "step": 190300 }, { "epoch": 0.817040605170741, "grad_norm": 0.3538655936717987, "learning_rate": 1.8375257625277027e-05, "loss": 0.30429155826568605, "step": 190310 }, { "epoch": 0.8170835372607609, "grad_norm": 3.212498426437378, "learning_rate": 1.8370945905159408e-05, "loss": 0.06719596982002259, "step": 190320 }, { "epoch": 0.8171264693507809, "grad_norm": 0.026576699689030647, "learning_rate": 1.8366634185041782e-05, "loss": 0.09907512664794922, "step": 190330 }, { "epoch": 0.817169401440801, "grad_norm": 0.9718691110610962, "learning_rate": 1.836232246492416e-05, "loss": 0.1221091628074646, "step": 190340 }, { "epoch": 0.8172123335308209, "grad_norm": 0.8951011300086975, "learning_rate": 1.8358010744806533e-05, "loss": 0.13755905628204346, "step": 190350 }, { "epoch": 0.8172552656208409, "grad_norm": 0.9757001996040344, "learning_rate": 1.835369902468891e-05, "loss": 0.27019762992858887, "step": 190360 }, { "epoch": 0.817298197710861, "grad_norm": 0.0059048268012702465, "learning_rate": 1.8349387304571284e-05, "loss": 0.24034535884857178, "step": 190370 }, { "epoch": 0.8173411298008809, "grad_norm": 0.9905226230621338, "learning_rate": 1.8345075584453665e-05, "loss": 0.34694108963012693, "step": 190380 }, { "epoch": 0.817384061890901, "grad_norm": 3.6334996223449707, "learning_rate": 1.834076386433604e-05, "loss": 0.22657389640808107, "step": 190390 }, { "epoch": 0.817426993980921, "grad_norm": 1.3295059204101562, "learning_rate": 1.8336452144218416e-05, "loss": 0.12326008081436157, "step": 190400 }, { "epoch": 0.8174699260709409, "grad_norm": 0.014637632295489311, "learning_rate": 1.833214042410079e-05, "loss": 0.3915814161300659, "step": 190410 }, { "epoch": 0.817512858160961, "grad_norm": 0.007784396409988403, "learning_rate": 1.8327828703983168e-05, "loss": 0.11680706739425659, "step": 190420 }, { "epoch": 0.817555790250981, "grad_norm": 0.010571125894784927, "learning_rate": 1.8323516983865545e-05, "loss": 0.08030446767807006, "step": 190430 }, { "epoch": 0.817598722341001, "grad_norm": 0.01794949173927307, "learning_rate": 1.8319205263747922e-05, "loss": 0.3555665731430054, "step": 190440 }, { "epoch": 0.817641654431021, "grad_norm": 0.02172680012881756, "learning_rate": 1.8314893543630296e-05, "loss": 0.12549080848693847, "step": 190450 }, { "epoch": 0.817684586521041, "grad_norm": 0.057841021567583084, "learning_rate": 1.8310581823512674e-05, "loss": 0.27305548191070556, "step": 190460 }, { "epoch": 0.8177275186110611, "grad_norm": 0.006068886257708073, "learning_rate": 1.8306270103395048e-05, "loss": 0.20729308128356932, "step": 190470 }, { "epoch": 0.817770450701081, "grad_norm": 0.02753814309835434, "learning_rate": 1.8301958383277425e-05, "loss": 0.2326343536376953, "step": 190480 }, { "epoch": 0.817813382791101, "grad_norm": 0.08779291808605194, "learning_rate": 1.8297646663159802e-05, "loss": 0.05305823087692261, "step": 190490 }, { "epoch": 0.8178563148811211, "grad_norm": 1.9648560285568237, "learning_rate": 1.829333494304218e-05, "loss": 0.20017385482788086, "step": 190500 }, { "epoch": 0.817899246971141, "grad_norm": 0.003839155426248908, "learning_rate": 1.8289023222924554e-05, "loss": 0.2160346508026123, "step": 190510 }, { "epoch": 0.8179421790611611, "grad_norm": 1.4220077991485596, "learning_rate": 1.828471150280693e-05, "loss": 0.3003256320953369, "step": 190520 }, { "epoch": 0.8179851111511811, "grad_norm": 0.06661306321620941, "learning_rate": 1.8280399782689305e-05, "loss": 0.16826083660125732, "step": 190530 }, { "epoch": 0.818028043241201, "grad_norm": 1.7498247623443604, "learning_rate": 1.8276088062571682e-05, "loss": 0.14414217472076415, "step": 190540 }, { "epoch": 0.8180709753312211, "grad_norm": 0.0036016765516251326, "learning_rate": 1.827177634245406e-05, "loss": 0.2631456136703491, "step": 190550 }, { "epoch": 0.8181139074212411, "grad_norm": 0.011093534529209137, "learning_rate": 1.8267464622336437e-05, "loss": 0.0830884575843811, "step": 190560 }, { "epoch": 0.8181568395112611, "grad_norm": 0.008177113719284534, "learning_rate": 1.826315290221881e-05, "loss": 0.16356415748596193, "step": 190570 }, { "epoch": 0.8181997716012811, "grad_norm": 1.0167497396469116, "learning_rate": 1.825884118210119e-05, "loss": 0.2114093065261841, "step": 190580 }, { "epoch": 0.8182427036913011, "grad_norm": 1.0505751371383667, "learning_rate": 1.8254529461983562e-05, "loss": 0.09989173412322998, "step": 190590 }, { "epoch": 0.8182856357813211, "grad_norm": 0.003051972948014736, "learning_rate": 1.825021774186594e-05, "loss": 0.21981940269470215, "step": 190600 }, { "epoch": 0.8183285678713411, "grad_norm": 0.012448975816369057, "learning_rate": 1.8245906021748317e-05, "loss": 0.04102669060230255, "step": 190610 }, { "epoch": 0.8183714999613612, "grad_norm": 0.29564034938812256, "learning_rate": 1.8241594301630694e-05, "loss": 0.16563717126846314, "step": 190620 }, { "epoch": 0.8184144320513811, "grad_norm": 0.0020702462643384933, "learning_rate": 1.823728258151307e-05, "loss": 0.3559636116027832, "step": 190630 }, { "epoch": 0.8184573641414011, "grad_norm": 0.8853886723518372, "learning_rate": 1.8232970861395446e-05, "loss": 0.3207648754119873, "step": 190640 }, { "epoch": 0.8185002962314212, "grad_norm": 0.05311070755124092, "learning_rate": 1.8228659141277823e-05, "loss": 0.22375433444976806, "step": 190650 }, { "epoch": 0.8185432283214411, "grad_norm": 6.844063758850098, "learning_rate": 1.8224347421160197e-05, "loss": 0.32745258808135985, "step": 190660 }, { "epoch": 0.8185861604114612, "grad_norm": 0.017273802310228348, "learning_rate": 1.8220035701042574e-05, "loss": 0.34038119316101073, "step": 190670 }, { "epoch": 0.8186290925014812, "grad_norm": 0.05780120566487312, "learning_rate": 1.821572398092495e-05, "loss": 0.16594982147216797, "step": 190680 }, { "epoch": 0.8186720245915011, "grad_norm": 0.04340917989611626, "learning_rate": 1.821141226080733e-05, "loss": 0.23679041862487793, "step": 190690 }, { "epoch": 0.8187149566815212, "grad_norm": 2.4382174015045166, "learning_rate": 1.8207100540689703e-05, "loss": 0.21798980236053467, "step": 190700 }, { "epoch": 0.8187578887715412, "grad_norm": 0.02230762504041195, "learning_rate": 1.820278882057208e-05, "loss": 0.28459992408752444, "step": 190710 }, { "epoch": 0.8188008208615611, "grad_norm": 2.178717613220215, "learning_rate": 1.8198477100454454e-05, "loss": 0.29400684833526614, "step": 190720 }, { "epoch": 0.8188437529515812, "grad_norm": 0.004009547643363476, "learning_rate": 1.8194165380336835e-05, "loss": 0.06440883874893188, "step": 190730 }, { "epoch": 0.8188866850416012, "grad_norm": 0.9320793747901917, "learning_rate": 1.818985366021921e-05, "loss": 0.08484240174293518, "step": 190740 }, { "epoch": 0.8189296171316212, "grad_norm": 2.0037152767181396, "learning_rate": 1.8185541940101586e-05, "loss": 0.15250011682510375, "step": 190750 }, { "epoch": 0.8189725492216412, "grad_norm": 1.7748454809188843, "learning_rate": 1.818123021998396e-05, "loss": 0.17670098543167115, "step": 190760 }, { "epoch": 0.8190154813116612, "grad_norm": 0.17334459722042084, "learning_rate": 1.8176918499866338e-05, "loss": 0.30178070068359375, "step": 190770 }, { "epoch": 0.8190584134016812, "grad_norm": 0.11208771914243698, "learning_rate": 1.817260677974871e-05, "loss": 0.23756444454193115, "step": 190780 }, { "epoch": 0.8191013454917012, "grad_norm": 0.048918429762125015, "learning_rate": 1.8168295059631092e-05, "loss": 0.19704781770706176, "step": 190790 }, { "epoch": 0.8191442775817213, "grad_norm": 1.5001906156539917, "learning_rate": 1.8163983339513466e-05, "loss": 0.11418488025665283, "step": 190800 }, { "epoch": 0.8191872096717412, "grad_norm": 1.576877474784851, "learning_rate": 1.8159671619395844e-05, "loss": 0.1952579140663147, "step": 190810 }, { "epoch": 0.8192301417617612, "grad_norm": 0.02885427139699459, "learning_rate": 1.8155359899278217e-05, "loss": 0.21382324695587157, "step": 190820 }, { "epoch": 0.8192730738517813, "grad_norm": 4.801934242248535, "learning_rate": 1.8151048179160595e-05, "loss": 0.19394272565841675, "step": 190830 }, { "epoch": 0.8193160059418012, "grad_norm": 0.022001752629876137, "learning_rate": 1.8146736459042972e-05, "loss": 0.11965030431747437, "step": 190840 }, { "epoch": 0.8193589380318212, "grad_norm": 0.5711457133293152, "learning_rate": 1.814242473892535e-05, "loss": 0.2572173118591309, "step": 190850 }, { "epoch": 0.8194018701218413, "grad_norm": 0.10609620809555054, "learning_rate": 1.8138113018807723e-05, "loss": 0.19334434270858764, "step": 190860 }, { "epoch": 0.8194448022118612, "grad_norm": 0.18748600780963898, "learning_rate": 1.81338012986901e-05, "loss": 0.03161357641220093, "step": 190870 }, { "epoch": 0.8194877343018813, "grad_norm": 6.887355327606201, "learning_rate": 1.8129489578572475e-05, "loss": 0.4630523681640625, "step": 190880 }, { "epoch": 0.8195306663919013, "grad_norm": 0.8817146420478821, "learning_rate": 1.8125177858454852e-05, "loss": 0.2447819471359253, "step": 190890 }, { "epoch": 0.8195735984819214, "grad_norm": 1.8151564598083496, "learning_rate": 1.812086613833723e-05, "loss": 0.20715341567993165, "step": 190900 }, { "epoch": 0.8196165305719413, "grad_norm": 0.008164191618561745, "learning_rate": 1.8116554418219607e-05, "loss": 0.14329957962036133, "step": 190910 }, { "epoch": 0.8196594626619613, "grad_norm": 0.007723241113126278, "learning_rate": 1.811224269810198e-05, "loss": 0.2750748872756958, "step": 190920 }, { "epoch": 0.8197023947519814, "grad_norm": 0.2240218222141266, "learning_rate": 1.8107930977984358e-05, "loss": 0.11921135187149048, "step": 190930 }, { "epoch": 0.8197453268420013, "grad_norm": 0.17240864038467407, "learning_rate": 1.8103619257866732e-05, "loss": 0.34458763599395753, "step": 190940 }, { "epoch": 0.8197882589320213, "grad_norm": 0.014422965236008167, "learning_rate": 1.809930753774911e-05, "loss": 0.024203298985958098, "step": 190950 }, { "epoch": 0.8198311910220414, "grad_norm": 0.1409555822610855, "learning_rate": 1.8094995817631487e-05, "loss": 0.21380703449249266, "step": 190960 }, { "epoch": 0.8198741231120613, "grad_norm": 0.059442151337862015, "learning_rate": 1.8090684097513864e-05, "loss": 0.24995136260986328, "step": 190970 }, { "epoch": 0.8199170552020814, "grad_norm": 0.0010440107434988022, "learning_rate": 1.808637237739624e-05, "loss": 0.07371382713317871, "step": 190980 }, { "epoch": 0.8199599872921014, "grad_norm": 0.0026696210261434317, "learning_rate": 1.8082060657278615e-05, "loss": 0.09124609231948852, "step": 190990 }, { "epoch": 0.8200029193821213, "grad_norm": 1.8029237985610962, "learning_rate": 1.8077748937160993e-05, "loss": 0.11664470434188842, "step": 191000 }, { "epoch": 0.8200029193821213, "eval_loss": 0.38155049085617065, "eval_runtime": 27.5094, "eval_samples_per_second": 3.635, "eval_steps_per_second": 3.635, "step": 191000 }, { "epoch": 0.8200458514721414, "grad_norm": 0.22230297327041626, "learning_rate": 1.8073437217043367e-05, "loss": 0.24210076332092284, "step": 191010 }, { "epoch": 0.8200887835621614, "grad_norm": 0.12822383642196655, "learning_rate": 1.8069125496925744e-05, "loss": 0.22140724658966066, "step": 191020 }, { "epoch": 0.8201317156521813, "grad_norm": 0.13210682570934296, "learning_rate": 1.806481377680812e-05, "loss": 0.11168990135192872, "step": 191030 }, { "epoch": 0.8201746477422014, "grad_norm": 0.1504775583744049, "learning_rate": 1.80605020566905e-05, "loss": 0.26407132148742674, "step": 191040 }, { "epoch": 0.8202175798322214, "grad_norm": 1.0668368339538574, "learning_rate": 1.8056190336572873e-05, "loss": 0.27573869228363035, "step": 191050 }, { "epoch": 0.8202605119222414, "grad_norm": 1.6537013053894043, "learning_rate": 1.805187861645525e-05, "loss": 0.23031814098358155, "step": 191060 }, { "epoch": 0.8203034440122614, "grad_norm": 0.7904632091522217, "learning_rate": 1.8047566896337624e-05, "loss": 0.13049904108047486, "step": 191070 }, { "epoch": 0.8203463761022814, "grad_norm": 0.023739825934171677, "learning_rate": 1.8043255176220005e-05, "loss": 0.07214347720146179, "step": 191080 }, { "epoch": 0.8203893081923014, "grad_norm": 0.03254947438836098, "learning_rate": 1.803894345610238e-05, "loss": 0.29176888465881345, "step": 191090 }, { "epoch": 0.8204322402823214, "grad_norm": 4.219489574432373, "learning_rate": 1.8034631735984756e-05, "loss": 0.2843551397323608, "step": 191100 }, { "epoch": 0.8204751723723415, "grad_norm": 0.014289355836808681, "learning_rate": 1.803032001586713e-05, "loss": 0.18762102127075195, "step": 191110 }, { "epoch": 0.8205181044623614, "grad_norm": 2.8176822662353516, "learning_rate": 1.8026008295749507e-05, "loss": 0.15682103633880615, "step": 191120 }, { "epoch": 0.8205610365523814, "grad_norm": 41.578189849853516, "learning_rate": 1.802169657563188e-05, "loss": 0.29941635131835936, "step": 191130 }, { "epoch": 0.8206039686424015, "grad_norm": 0.001329735154286027, "learning_rate": 1.8017384855514262e-05, "loss": 0.23812174797058105, "step": 191140 }, { "epoch": 0.8206469007324214, "grad_norm": 1.0163871049880981, "learning_rate": 1.8013073135396636e-05, "loss": 0.17960180044174195, "step": 191150 }, { "epoch": 0.8206898328224415, "grad_norm": 0.07313176989555359, "learning_rate": 1.8008761415279013e-05, "loss": 0.19614267349243164, "step": 191160 }, { "epoch": 0.8207327649124615, "grad_norm": 0.6908106207847595, "learning_rate": 1.8004449695161387e-05, "loss": 0.09613000750541686, "step": 191170 }, { "epoch": 0.8207756970024814, "grad_norm": 0.036255963146686554, "learning_rate": 1.8000137975043765e-05, "loss": 0.19044407606124877, "step": 191180 }, { "epoch": 0.8208186290925015, "grad_norm": 1.8935225009918213, "learning_rate": 1.7995826254926142e-05, "loss": 0.06331070661544799, "step": 191190 }, { "epoch": 0.8208615611825215, "grad_norm": 0.046459510922431946, "learning_rate": 1.799151453480852e-05, "loss": 0.04903110563755035, "step": 191200 }, { "epoch": 0.8209044932725414, "grad_norm": 0.1087096557021141, "learning_rate": 1.7987202814690893e-05, "loss": 0.280530309677124, "step": 191210 }, { "epoch": 0.8209474253625615, "grad_norm": 1.053376317024231, "learning_rate": 1.798289109457327e-05, "loss": 0.38856456279754636, "step": 191220 }, { "epoch": 0.8209903574525815, "grad_norm": 0.001331243198364973, "learning_rate": 1.7978579374455644e-05, "loss": 0.23703274726867676, "step": 191230 }, { "epoch": 0.8210332895426015, "grad_norm": 2.703815460205078, "learning_rate": 1.7974267654338022e-05, "loss": 0.2822533130645752, "step": 191240 }, { "epoch": 0.8210762216326215, "grad_norm": 0.015311984345316887, "learning_rate": 1.79699559342204e-05, "loss": 0.4106620788574219, "step": 191250 }, { "epoch": 0.8211191537226415, "grad_norm": 50.689842224121094, "learning_rate": 1.7965644214102777e-05, "loss": 0.11393247842788697, "step": 191260 }, { "epoch": 0.8211620858126615, "grad_norm": 0.03459545969963074, "learning_rate": 1.796133249398515e-05, "loss": 0.10530201196670533, "step": 191270 }, { "epoch": 0.8212050179026815, "grad_norm": 1.1778295040130615, "learning_rate": 1.7957020773867528e-05, "loss": 0.16542747020721435, "step": 191280 }, { "epoch": 0.8212479499927016, "grad_norm": 0.05201715975999832, "learning_rate": 1.7952709053749902e-05, "loss": 0.12944258451461793, "step": 191290 }, { "epoch": 0.8212908820827215, "grad_norm": 0.005311310291290283, "learning_rate": 1.794839733363228e-05, "loss": 0.11700173616409301, "step": 191300 }, { "epoch": 0.8213338141727415, "grad_norm": 0.009006867185235023, "learning_rate": 1.7944085613514656e-05, "loss": 0.054128849506378175, "step": 191310 }, { "epoch": 0.8213767462627616, "grad_norm": 0.05511913821101189, "learning_rate": 1.7939773893397034e-05, "loss": 0.09677992463111877, "step": 191320 }, { "epoch": 0.8214196783527816, "grad_norm": 0.012460976839065552, "learning_rate": 1.7935462173279408e-05, "loss": 0.12651125192642212, "step": 191330 }, { "epoch": 0.8214626104428016, "grad_norm": 0.05311845242977142, "learning_rate": 1.7931150453161785e-05, "loss": 0.2608399152755737, "step": 191340 }, { "epoch": 0.8215055425328216, "grad_norm": 0.8644061088562012, "learning_rate": 1.7926838733044162e-05, "loss": 0.2154980182647705, "step": 191350 }, { "epoch": 0.8215484746228416, "grad_norm": 1.3083879947662354, "learning_rate": 1.7922527012926536e-05, "loss": 0.18745582103729247, "step": 191360 }, { "epoch": 0.8215914067128616, "grad_norm": 0.7998777627944946, "learning_rate": 1.7918215292808914e-05, "loss": 0.37282159328460696, "step": 191370 }, { "epoch": 0.8216343388028816, "grad_norm": 1.0937479734420776, "learning_rate": 1.791390357269129e-05, "loss": 0.28621206283569334, "step": 191380 }, { "epoch": 0.8216772708929017, "grad_norm": 0.003337725531309843, "learning_rate": 1.790959185257367e-05, "loss": 0.1956787347793579, "step": 191390 }, { "epoch": 0.8217202029829216, "grad_norm": 4.987943172454834, "learning_rate": 1.7905280132456042e-05, "loss": 0.2712759256362915, "step": 191400 }, { "epoch": 0.8217631350729416, "grad_norm": 0.0192168690264225, "learning_rate": 1.790096841233842e-05, "loss": 0.05187936425209046, "step": 191410 }, { "epoch": 0.8218060671629617, "grad_norm": 0.8645950555801392, "learning_rate": 1.7896656692220794e-05, "loss": 0.23125803470611572, "step": 191420 }, { "epoch": 0.8218489992529816, "grad_norm": 1.6332647800445557, "learning_rate": 1.7892344972103174e-05, "loss": 0.17718969583511351, "step": 191430 }, { "epoch": 0.8218919313430016, "grad_norm": 3.3261234760284424, "learning_rate": 1.788803325198555e-05, "loss": 0.24678752422332764, "step": 191440 }, { "epoch": 0.8219348634330217, "grad_norm": 3.7975666522979736, "learning_rate": 1.7883721531867926e-05, "loss": 0.19560292959213257, "step": 191450 }, { "epoch": 0.8219777955230416, "grad_norm": 0.3274807631969452, "learning_rate": 1.78794098117503e-05, "loss": 0.351909875869751, "step": 191460 }, { "epoch": 0.8220207276130617, "grad_norm": 0.0043863472528755665, "learning_rate": 1.7875098091632677e-05, "loss": 0.16393966674804689, "step": 191470 }, { "epoch": 0.8220636597030817, "grad_norm": 0.015406210906803608, "learning_rate": 1.787078637151505e-05, "loss": 0.22621474266052247, "step": 191480 }, { "epoch": 0.8221065917931016, "grad_norm": 0.07137417048215866, "learning_rate": 1.786647465139743e-05, "loss": 0.038229352235794066, "step": 191490 }, { "epoch": 0.8221495238831217, "grad_norm": 0.027764789760112762, "learning_rate": 1.7862162931279806e-05, "loss": 0.23064649105072021, "step": 191500 }, { "epoch": 0.8221924559731417, "grad_norm": 0.019897371530532837, "learning_rate": 1.7857851211162183e-05, "loss": 0.35613534450531004, "step": 191510 }, { "epoch": 0.8222353880631617, "grad_norm": 0.12119104713201523, "learning_rate": 1.7853539491044557e-05, "loss": 0.27148022651672366, "step": 191520 }, { "epoch": 0.8222783201531817, "grad_norm": 0.05415143072605133, "learning_rate": 1.7849227770926934e-05, "loss": 0.1662237048149109, "step": 191530 }, { "epoch": 0.8223212522432017, "grad_norm": 0.06889358162879944, "learning_rate": 1.784491605080931e-05, "loss": 0.05766690969467163, "step": 191540 }, { "epoch": 0.8223641843332217, "grad_norm": 12.351526260375977, "learning_rate": 1.784060433069169e-05, "loss": 0.15811610221862793, "step": 191550 }, { "epoch": 0.8224071164232417, "grad_norm": 0.13275538384914398, "learning_rate": 1.7836292610574063e-05, "loss": 0.27862823009490967, "step": 191560 }, { "epoch": 0.8224500485132618, "grad_norm": 0.003320979652926326, "learning_rate": 1.783198089045644e-05, "loss": 0.15656894445419312, "step": 191570 }, { "epoch": 0.8224929806032817, "grad_norm": 5.074235916137695, "learning_rate": 1.7827669170338814e-05, "loss": 0.307766056060791, "step": 191580 }, { "epoch": 0.8225359126933017, "grad_norm": 0.04263024777173996, "learning_rate": 1.782335745022119e-05, "loss": 0.14165412187576293, "step": 191590 }, { "epoch": 0.8225788447833218, "grad_norm": 0.048086926341056824, "learning_rate": 1.781904573010357e-05, "loss": 0.35209856033325193, "step": 191600 }, { "epoch": 0.8226217768733417, "grad_norm": 0.006095598451793194, "learning_rate": 1.7814734009985946e-05, "loss": 0.08997969627380371, "step": 191610 }, { "epoch": 0.8226647089633617, "grad_norm": 0.0008162250742316246, "learning_rate": 1.781042228986832e-05, "loss": 0.09934253096580506, "step": 191620 }, { "epoch": 0.8227076410533818, "grad_norm": 0.0008933839853852987, "learning_rate": 1.7806110569750698e-05, "loss": 0.1271510362625122, "step": 191630 }, { "epoch": 0.8227505731434017, "grad_norm": 0.10590466111898422, "learning_rate": 1.780179884963307e-05, "loss": 0.13586642742156982, "step": 191640 }, { "epoch": 0.8227935052334218, "grad_norm": 0.20287977159023285, "learning_rate": 1.779748712951545e-05, "loss": 0.2331789493560791, "step": 191650 }, { "epoch": 0.8228364373234418, "grad_norm": 0.017177434638142586, "learning_rate": 1.7793175409397826e-05, "loss": 0.24761927127838135, "step": 191660 }, { "epoch": 0.8228793694134617, "grad_norm": 1.3558788299560547, "learning_rate": 1.7788863689280204e-05, "loss": 0.19192906618118286, "step": 191670 }, { "epoch": 0.8229223015034818, "grad_norm": 0.019712621346116066, "learning_rate": 1.7784551969162577e-05, "loss": 0.22504265308380128, "step": 191680 }, { "epoch": 0.8229652335935018, "grad_norm": 1.1804683208465576, "learning_rate": 1.7780240249044955e-05, "loss": 0.5104588985443115, "step": 191690 }, { "epoch": 0.8230081656835218, "grad_norm": 0.34881842136383057, "learning_rate": 1.777592852892733e-05, "loss": 0.27373499870300294, "step": 191700 }, { "epoch": 0.8230510977735418, "grad_norm": 0.0014252919936552644, "learning_rate": 1.7771616808809706e-05, "loss": 0.12193593978881836, "step": 191710 }, { "epoch": 0.8230940298635618, "grad_norm": 0.12936106324195862, "learning_rate": 1.7767305088692083e-05, "loss": 0.16922520399093627, "step": 191720 }, { "epoch": 0.8231369619535818, "grad_norm": 2.7369914054870605, "learning_rate": 1.776299336857446e-05, "loss": 0.25319030284881594, "step": 191730 }, { "epoch": 0.8231798940436018, "grad_norm": 0.022860685363411903, "learning_rate": 1.7758681648456838e-05, "loss": 0.4412991046905518, "step": 191740 }, { "epoch": 0.8232228261336219, "grad_norm": 0.004503470379859209, "learning_rate": 1.7754369928339212e-05, "loss": 0.11105915307998657, "step": 191750 }, { "epoch": 0.8232657582236419, "grad_norm": 0.3987155854701996, "learning_rate": 1.775005820822159e-05, "loss": 0.20699021816253663, "step": 191760 }, { "epoch": 0.8233086903136618, "grad_norm": 0.003383493749424815, "learning_rate": 1.7745746488103963e-05, "loss": 0.1353330969810486, "step": 191770 }, { "epoch": 0.8233516224036819, "grad_norm": 0.006548069417476654, "learning_rate": 1.774143476798634e-05, "loss": 0.023429669439792633, "step": 191780 }, { "epoch": 0.8233945544937019, "grad_norm": 0.47888049483299255, "learning_rate": 1.7737123047868718e-05, "loss": 0.3590864181518555, "step": 191790 }, { "epoch": 0.8234374865837218, "grad_norm": 0.012283619493246078, "learning_rate": 1.7732811327751095e-05, "loss": 0.036898362636566165, "step": 191800 }, { "epoch": 0.8234804186737419, "grad_norm": 0.9226396083831787, "learning_rate": 1.772849960763347e-05, "loss": 0.10457895994186402, "step": 191810 }, { "epoch": 0.8235233507637619, "grad_norm": 0.03380728140473366, "learning_rate": 1.7724187887515847e-05, "loss": 0.49823312759399413, "step": 191820 }, { "epoch": 0.8235662828537819, "grad_norm": 0.031724244356155396, "learning_rate": 1.771987616739822e-05, "loss": 0.020426127314567565, "step": 191830 }, { "epoch": 0.8236092149438019, "grad_norm": 1.9001127481460571, "learning_rate": 1.77155644472806e-05, "loss": 0.2856115818023682, "step": 191840 }, { "epoch": 0.823652147033822, "grad_norm": 0.3387307822704315, "learning_rate": 1.7711252727162975e-05, "loss": 0.1957484722137451, "step": 191850 }, { "epoch": 0.8236950791238419, "grad_norm": 5.49625301361084, "learning_rate": 1.7706941007045353e-05, "loss": 0.24340589046478273, "step": 191860 }, { "epoch": 0.8237380112138619, "grad_norm": 0.005266428925096989, "learning_rate": 1.7702629286927727e-05, "loss": 0.25476138591766356, "step": 191870 }, { "epoch": 0.823780943303882, "grad_norm": 0.07123085111379623, "learning_rate": 1.7698317566810104e-05, "loss": 0.20363154411315917, "step": 191880 }, { "epoch": 0.8238238753939019, "grad_norm": 135.7645721435547, "learning_rate": 1.7694005846692478e-05, "loss": 0.16444821357727052, "step": 191890 }, { "epoch": 0.8238668074839219, "grad_norm": 1.4238746166229248, "learning_rate": 1.768969412657486e-05, "loss": 0.20836834907531737, "step": 191900 }, { "epoch": 0.823909739573942, "grad_norm": 0.1336083710193634, "learning_rate": 1.7685382406457233e-05, "loss": 0.11080396175384521, "step": 191910 }, { "epoch": 0.8239526716639619, "grad_norm": 3.034364700317383, "learning_rate": 1.768107068633961e-05, "loss": 0.34577069282531736, "step": 191920 }, { "epoch": 0.823995603753982, "grad_norm": 0.02131952904164791, "learning_rate": 1.7676758966221984e-05, "loss": 0.03955221474170685, "step": 191930 }, { "epoch": 0.824038535844002, "grad_norm": 0.005112878978252411, "learning_rate": 1.767244724610436e-05, "loss": 0.20619418621063232, "step": 191940 }, { "epoch": 0.8240814679340219, "grad_norm": 1.2098677158355713, "learning_rate": 1.766813552598674e-05, "loss": 0.15751324892044066, "step": 191950 }, { "epoch": 0.824124400024042, "grad_norm": 0.13064929842948914, "learning_rate": 1.7663823805869116e-05, "loss": 0.21526129245758058, "step": 191960 }, { "epoch": 0.824167332114062, "grad_norm": 0.6280670762062073, "learning_rate": 1.765951208575149e-05, "loss": 0.17393140792846679, "step": 191970 }, { "epoch": 0.8242102642040819, "grad_norm": 1.3640555143356323, "learning_rate": 1.7655200365633867e-05, "loss": 0.171847927570343, "step": 191980 }, { "epoch": 0.824253196294102, "grad_norm": 0.012612332589924335, "learning_rate": 1.765088864551624e-05, "loss": 0.17563261985778808, "step": 191990 }, { "epoch": 0.824296128384122, "grad_norm": 42.14128112792969, "learning_rate": 1.764657692539862e-05, "loss": 0.085991770029068, "step": 192000 }, { "epoch": 0.824296128384122, "eval_loss": 0.3825879991054535, "eval_runtime": 27.4797, "eval_samples_per_second": 3.639, "eval_steps_per_second": 3.639, "step": 192000 }, { "epoch": 0.824339060474142, "grad_norm": 0.01008710078895092, "learning_rate": 1.7642265205280996e-05, "loss": 0.37354443073272703, "step": 192010 }, { "epoch": 0.824381992564162, "grad_norm": 1.7483117580413818, "learning_rate": 1.7637953485163373e-05, "loss": 0.18292040824890138, "step": 192020 }, { "epoch": 0.824424924654182, "grad_norm": 0.012306587770581245, "learning_rate": 1.7633641765045747e-05, "loss": 0.36442830562591555, "step": 192030 }, { "epoch": 0.824467856744202, "grad_norm": 0.04286915063858032, "learning_rate": 1.7629330044928125e-05, "loss": 0.126639723777771, "step": 192040 }, { "epoch": 0.824510788834222, "grad_norm": 0.0028463948983699083, "learning_rate": 1.76250183248105e-05, "loss": 0.09038893580436706, "step": 192050 }, { "epoch": 0.8245537209242421, "grad_norm": 1.2837460041046143, "learning_rate": 1.7620706604692876e-05, "loss": 0.06621870994567872, "step": 192060 }, { "epoch": 0.824596653014262, "grad_norm": 0.050448864698410034, "learning_rate": 1.7616394884575253e-05, "loss": 0.1748013138771057, "step": 192070 }, { "epoch": 0.824639585104282, "grad_norm": 0.006843621842563152, "learning_rate": 1.761208316445763e-05, "loss": 0.021199363470077514, "step": 192080 }, { "epoch": 0.8246825171943021, "grad_norm": 0.28706303238868713, "learning_rate": 1.7607771444340008e-05, "loss": 0.1016458511352539, "step": 192090 }, { "epoch": 0.824725449284322, "grad_norm": 1.928757905960083, "learning_rate": 1.7603459724222382e-05, "loss": 0.15487730503082275, "step": 192100 }, { "epoch": 0.824768381374342, "grad_norm": 6.034005165100098, "learning_rate": 1.759914800410476e-05, "loss": 0.169073486328125, "step": 192110 }, { "epoch": 0.8248113134643621, "grad_norm": 0.748520016670227, "learning_rate": 1.7594836283987133e-05, "loss": 0.11985911130905151, "step": 192120 }, { "epoch": 0.824854245554382, "grad_norm": 0.25992897152900696, "learning_rate": 1.759052456386951e-05, "loss": 0.08560782074928283, "step": 192130 }, { "epoch": 0.8248971776444021, "grad_norm": 0.0937369093298912, "learning_rate": 1.7586212843751888e-05, "loss": 0.19405397176742553, "step": 192140 }, { "epoch": 0.8249401097344221, "grad_norm": 0.5325449109077454, "learning_rate": 1.7581901123634265e-05, "loss": 0.28080456256866454, "step": 192150 }, { "epoch": 0.824983041824442, "grad_norm": 6.978481769561768, "learning_rate": 1.757758940351664e-05, "loss": 0.19362866878509521, "step": 192160 }, { "epoch": 0.8250259739144621, "grad_norm": 0.017143480479717255, "learning_rate": 1.7573277683399016e-05, "loss": 0.2803755521774292, "step": 192170 }, { "epoch": 0.8250689060044821, "grad_norm": 0.14805008471012115, "learning_rate": 1.756896596328139e-05, "loss": 0.16662766933441162, "step": 192180 }, { "epoch": 0.8251118380945022, "grad_norm": 0.0492326021194458, "learning_rate": 1.756465424316377e-05, "loss": 0.09783921837806701, "step": 192190 }, { "epoch": 0.8251547701845221, "grad_norm": 3.8107030391693115, "learning_rate": 1.7560342523046145e-05, "loss": 0.29922664165496826, "step": 192200 }, { "epoch": 0.8251977022745421, "grad_norm": 0.06435045599937439, "learning_rate": 1.7556030802928522e-05, "loss": 0.07153820395469665, "step": 192210 }, { "epoch": 0.8252406343645622, "grad_norm": 0.0006460752920247614, "learning_rate": 1.7551719082810896e-05, "loss": 0.04712951183319092, "step": 192220 }, { "epoch": 0.8252835664545821, "grad_norm": 0.022561080753803253, "learning_rate": 1.7547407362693274e-05, "loss": 0.1923914670944214, "step": 192230 }, { "epoch": 0.8253264985446022, "grad_norm": 0.009855869226157665, "learning_rate": 1.7543095642575648e-05, "loss": 0.09128308892250062, "step": 192240 }, { "epoch": 0.8253694306346222, "grad_norm": 0.555938184261322, "learning_rate": 1.753878392245803e-05, "loss": 0.004356810450553894, "step": 192250 }, { "epoch": 0.8254123627246421, "grad_norm": 0.0011161682195961475, "learning_rate": 1.7534472202340402e-05, "loss": 0.22292764186859132, "step": 192260 }, { "epoch": 0.8254552948146622, "grad_norm": 1.2606300115585327, "learning_rate": 1.753016048222278e-05, "loss": 0.1448211669921875, "step": 192270 }, { "epoch": 0.8254982269046822, "grad_norm": 4.72949743270874, "learning_rate": 1.7525848762105154e-05, "loss": 0.29486114978790284, "step": 192280 }, { "epoch": 0.8255411589947022, "grad_norm": 5.2792558670043945, "learning_rate": 1.752153704198753e-05, "loss": 0.08456004858016967, "step": 192290 }, { "epoch": 0.8255840910847222, "grad_norm": 0.022953951731324196, "learning_rate": 1.751722532186991e-05, "loss": 0.2897635936737061, "step": 192300 }, { "epoch": 0.8256270231747422, "grad_norm": 0.0130073893815279, "learning_rate": 1.7512913601752286e-05, "loss": 0.19400113821029663, "step": 192310 }, { "epoch": 0.8256699552647622, "grad_norm": 0.009828859008848667, "learning_rate": 1.750860188163466e-05, "loss": 0.16359663009643555, "step": 192320 }, { "epoch": 0.8257128873547822, "grad_norm": 0.004650391638278961, "learning_rate": 1.7504290161517037e-05, "loss": 0.03614757657051086, "step": 192330 }, { "epoch": 0.8257558194448023, "grad_norm": 0.030460670590400696, "learning_rate": 1.749997844139941e-05, "loss": 0.06386544108390808, "step": 192340 }, { "epoch": 0.8257987515348222, "grad_norm": 0.016825038939714432, "learning_rate": 1.749566672128179e-05, "loss": 0.17280819416046142, "step": 192350 }, { "epoch": 0.8258416836248422, "grad_norm": 28.79928970336914, "learning_rate": 1.7491355001164166e-05, "loss": 0.08564205169677734, "step": 192360 }, { "epoch": 0.8258846157148623, "grad_norm": 7.042786598205566, "learning_rate": 1.7487043281046543e-05, "loss": 0.2968191385269165, "step": 192370 }, { "epoch": 0.8259275478048822, "grad_norm": 0.004526129458099604, "learning_rate": 1.7482731560928917e-05, "loss": 0.2754892587661743, "step": 192380 }, { "epoch": 0.8259704798949022, "grad_norm": 0.002219579881057143, "learning_rate": 1.7478419840811294e-05, "loss": 0.2458263635635376, "step": 192390 }, { "epoch": 0.8260134119849223, "grad_norm": 0.05032936483621597, "learning_rate": 1.7474108120693668e-05, "loss": 0.28162617683410646, "step": 192400 }, { "epoch": 0.8260563440749422, "grad_norm": 0.0014833813766017556, "learning_rate": 1.7469796400576046e-05, "loss": 0.2364175796508789, "step": 192410 }, { "epoch": 0.8260992761649623, "grad_norm": 0.0007317995768971741, "learning_rate": 1.7465484680458423e-05, "loss": 0.0849960207939148, "step": 192420 }, { "epoch": 0.8261422082549823, "grad_norm": 0.0030191184487193823, "learning_rate": 1.74611729603408e-05, "loss": 0.2047954559326172, "step": 192430 }, { "epoch": 0.8261851403450022, "grad_norm": 0.014762978069484234, "learning_rate": 1.7456861240223178e-05, "loss": 0.38550779819488523, "step": 192440 }, { "epoch": 0.8262280724350223, "grad_norm": 0.004658820573240519, "learning_rate": 1.745254952010555e-05, "loss": 0.1906131386756897, "step": 192450 }, { "epoch": 0.8262710045250423, "grad_norm": 0.12024617195129395, "learning_rate": 1.744823779998793e-05, "loss": 0.18920086622238158, "step": 192460 }, { "epoch": 0.8263139366150623, "grad_norm": 6.6206793785095215, "learning_rate": 1.7443926079870303e-05, "loss": 0.1918006420135498, "step": 192470 }, { "epoch": 0.8263568687050823, "grad_norm": 8.5894193649292, "learning_rate": 1.743961435975268e-05, "loss": 0.30832791328430176, "step": 192480 }, { "epoch": 0.8263998007951023, "grad_norm": 0.08169787377119064, "learning_rate": 1.7435302639635058e-05, "loss": 0.05640159249305725, "step": 192490 }, { "epoch": 0.8264427328851223, "grad_norm": 0.0032690681982785463, "learning_rate": 1.7430990919517435e-05, "loss": 0.07844080328941345, "step": 192500 }, { "epoch": 0.8264856649751423, "grad_norm": 1.843385934829712, "learning_rate": 1.742667919939981e-05, "loss": 0.20965149402618408, "step": 192510 }, { "epoch": 0.8265285970651624, "grad_norm": 0.04977283626794815, "learning_rate": 1.7422367479282186e-05, "loss": 0.3092090129852295, "step": 192520 }, { "epoch": 0.8265715291551823, "grad_norm": 0.057017020881175995, "learning_rate": 1.741805575916456e-05, "loss": 0.2586258888244629, "step": 192530 }, { "epoch": 0.8266144612452023, "grad_norm": 0.7330488562583923, "learning_rate": 1.7413744039046937e-05, "loss": 0.2805727481842041, "step": 192540 }, { "epoch": 0.8266573933352224, "grad_norm": 1.5467420816421509, "learning_rate": 1.7409432318929315e-05, "loss": 0.18074100017547606, "step": 192550 }, { "epoch": 0.8267003254252423, "grad_norm": 0.05255649983882904, "learning_rate": 1.7405120598811692e-05, "loss": 0.12051188945770264, "step": 192560 }, { "epoch": 0.8267432575152623, "grad_norm": 0.35828202962875366, "learning_rate": 1.7400808878694066e-05, "loss": 0.1667983651161194, "step": 192570 }, { "epoch": 0.8267861896052824, "grad_norm": 0.007995634339749813, "learning_rate": 1.7396497158576443e-05, "loss": 0.22869086265563965, "step": 192580 }, { "epoch": 0.8268291216953023, "grad_norm": 21.580411911010742, "learning_rate": 1.7392185438458817e-05, "loss": 0.21399738788604736, "step": 192590 }, { "epoch": 0.8268720537853224, "grad_norm": 0.02281232550740242, "learning_rate": 1.7387873718341198e-05, "loss": 0.19772342443466187, "step": 192600 }, { "epoch": 0.8269149858753424, "grad_norm": 0.05629677325487137, "learning_rate": 1.7383561998223572e-05, "loss": 0.23454837799072265, "step": 192610 }, { "epoch": 0.8269579179653624, "grad_norm": 2.705537796020508, "learning_rate": 1.737925027810595e-05, "loss": 0.21049823760986328, "step": 192620 }, { "epoch": 0.8270008500553824, "grad_norm": 0.03227739408612251, "learning_rate": 1.7374938557988323e-05, "loss": 0.15504707098007203, "step": 192630 }, { "epoch": 0.8270437821454024, "grad_norm": 0.056841857731342316, "learning_rate": 1.73706268378707e-05, "loss": 0.11248964071273804, "step": 192640 }, { "epoch": 0.8270867142354225, "grad_norm": 2.6003172397613525, "learning_rate": 1.7366315117753075e-05, "loss": 0.22536401748657225, "step": 192650 }, { "epoch": 0.8271296463254424, "grad_norm": 0.3005393445491791, "learning_rate": 1.7362003397635455e-05, "loss": 0.061086273193359374, "step": 192660 }, { "epoch": 0.8271725784154624, "grad_norm": 0.004778577946126461, "learning_rate": 1.735769167751783e-05, "loss": 0.17962511777877807, "step": 192670 }, { "epoch": 0.8272155105054825, "grad_norm": 2.5120809078216553, "learning_rate": 1.7353379957400207e-05, "loss": 0.12587801218032837, "step": 192680 }, { "epoch": 0.8272584425955024, "grad_norm": 0.020870067179203033, "learning_rate": 1.734906823728258e-05, "loss": 0.38633582592010496, "step": 192690 }, { "epoch": 0.8273013746855225, "grad_norm": 0.08555847406387329, "learning_rate": 1.7344756517164958e-05, "loss": 0.20045323371887208, "step": 192700 }, { "epoch": 0.8273443067755425, "grad_norm": 3.7902328968048096, "learning_rate": 1.7340444797047335e-05, "loss": 0.1515338897705078, "step": 192710 }, { "epoch": 0.8273872388655624, "grad_norm": 0.05745023488998413, "learning_rate": 1.7336133076929713e-05, "loss": 0.2779086589813232, "step": 192720 }, { "epoch": 0.8274301709555825, "grad_norm": 0.06869952380657196, "learning_rate": 1.7331821356812087e-05, "loss": 0.19615509510040283, "step": 192730 }, { "epoch": 0.8274731030456025, "grad_norm": 0.0012193419970571995, "learning_rate": 1.7327509636694464e-05, "loss": 0.14778521060943603, "step": 192740 }, { "epoch": 0.8275160351356224, "grad_norm": 0.010909829288721085, "learning_rate": 1.7323197916576838e-05, "loss": 0.1756802201271057, "step": 192750 }, { "epoch": 0.8275589672256425, "grad_norm": 6.0416765213012695, "learning_rate": 1.7318886196459215e-05, "loss": 0.26812071800231935, "step": 192760 }, { "epoch": 0.8276018993156625, "grad_norm": 3.5843048095703125, "learning_rate": 1.7314574476341593e-05, "loss": 0.26665773391723635, "step": 192770 }, { "epoch": 0.8276448314056825, "grad_norm": 5.423513889312744, "learning_rate": 1.731026275622397e-05, "loss": 0.22109003067016603, "step": 192780 }, { "epoch": 0.8276877634957025, "grad_norm": 0.18677234649658203, "learning_rate": 1.7305951036106344e-05, "loss": 0.1045659065246582, "step": 192790 }, { "epoch": 0.8277306955857225, "grad_norm": 0.008483259938657284, "learning_rate": 1.730163931598872e-05, "loss": 0.09153875708580017, "step": 192800 }, { "epoch": 0.8277736276757425, "grad_norm": 1.6661065816879272, "learning_rate": 1.72973275958711e-05, "loss": 0.17102301120758057, "step": 192810 }, { "epoch": 0.8278165597657625, "grad_norm": 1.7879948616027832, "learning_rate": 1.7293015875753473e-05, "loss": 0.1961473822593689, "step": 192820 }, { "epoch": 0.8278594918557826, "grad_norm": 2.1218552589416504, "learning_rate": 1.728870415563585e-05, "loss": 0.1817714214324951, "step": 192830 }, { "epoch": 0.8279024239458025, "grad_norm": 0.06027497723698616, "learning_rate": 1.7284392435518227e-05, "loss": 0.37011630535125734, "step": 192840 }, { "epoch": 0.8279453560358225, "grad_norm": 0.13712593913078308, "learning_rate": 1.7280080715400605e-05, "loss": 0.20809953212738036, "step": 192850 }, { "epoch": 0.8279882881258426, "grad_norm": 2.3398091793060303, "learning_rate": 1.727576899528298e-05, "loss": 0.32560951709747316, "step": 192860 }, { "epoch": 0.8280312202158625, "grad_norm": 0.34039562940597534, "learning_rate": 1.7271457275165356e-05, "loss": 0.16434768438339234, "step": 192870 }, { "epoch": 0.8280741523058825, "grad_norm": 0.008567468263208866, "learning_rate": 1.726714555504773e-05, "loss": 0.1836724042892456, "step": 192880 }, { "epoch": 0.8281170843959026, "grad_norm": 0.003646705998107791, "learning_rate": 1.7262833834930107e-05, "loss": 0.34908242225646974, "step": 192890 }, { "epoch": 0.8281600164859225, "grad_norm": 0.00493283849209547, "learning_rate": 1.7258522114812485e-05, "loss": 0.25686960220336913, "step": 192900 }, { "epoch": 0.8282029485759426, "grad_norm": 0.014279542490839958, "learning_rate": 1.7254210394694862e-05, "loss": 0.10751999616622925, "step": 192910 }, { "epoch": 0.8282458806659626, "grad_norm": 0.10793264210224152, "learning_rate": 1.7249898674577236e-05, "loss": 0.21126208305358887, "step": 192920 }, { "epoch": 0.8282888127559825, "grad_norm": 0.020320894196629524, "learning_rate": 1.7245586954459613e-05, "loss": 0.3169750452041626, "step": 192930 }, { "epoch": 0.8283317448460026, "grad_norm": 0.004445925354957581, "learning_rate": 1.7241275234341987e-05, "loss": 0.1835729718208313, "step": 192940 }, { "epoch": 0.8283746769360226, "grad_norm": 0.0913233682513237, "learning_rate": 1.7236963514224368e-05, "loss": 0.3368794918060303, "step": 192950 }, { "epoch": 0.8284176090260426, "grad_norm": 0.005395609885454178, "learning_rate": 1.7232651794106742e-05, "loss": 0.12072069644927978, "step": 192960 }, { "epoch": 0.8284605411160626, "grad_norm": 1.188323974609375, "learning_rate": 1.722834007398912e-05, "loss": 0.07599248290061951, "step": 192970 }, { "epoch": 0.8285034732060826, "grad_norm": 1.4815592765808105, "learning_rate": 1.7224028353871493e-05, "loss": 0.300715970993042, "step": 192980 }, { "epoch": 0.8285464052961026, "grad_norm": 0.013542281463742256, "learning_rate": 1.721971663375387e-05, "loss": 0.21341800689697266, "step": 192990 }, { "epoch": 0.8285893373861226, "grad_norm": 0.02348690666258335, "learning_rate": 1.7215404913636244e-05, "loss": 0.20570461750030516, "step": 193000 }, { "epoch": 0.8285893373861226, "eval_loss": 0.3693878948688507, "eval_runtime": 27.4324, "eval_samples_per_second": 3.645, "eval_steps_per_second": 3.645, "step": 193000 }, { "epoch": 0.8286322694761427, "grad_norm": 0.4562581181526184, "learning_rate": 1.7211093193518625e-05, "loss": 0.32135188579559326, "step": 193010 }, { "epoch": 0.8286752015661626, "grad_norm": 2.3221492767333984, "learning_rate": 1.7206781473401e-05, "loss": 0.19243701696395873, "step": 193020 }, { "epoch": 0.8287181336561826, "grad_norm": 1.3994622230529785, "learning_rate": 1.7202469753283376e-05, "loss": 0.14126864671707154, "step": 193030 }, { "epoch": 0.8287610657462027, "grad_norm": 0.6949800848960876, "learning_rate": 1.719815803316575e-05, "loss": 0.09074133634567261, "step": 193040 }, { "epoch": 0.8288039978362227, "grad_norm": 5.811275005340576, "learning_rate": 1.7193846313048128e-05, "loss": 0.3873776435852051, "step": 193050 }, { "epoch": 0.8288469299262426, "grad_norm": 0.003161689732223749, "learning_rate": 1.7189534592930505e-05, "loss": 0.2473665952682495, "step": 193060 }, { "epoch": 0.8288898620162627, "grad_norm": 0.01264791376888752, "learning_rate": 1.7185222872812882e-05, "loss": 0.15263274908065796, "step": 193070 }, { "epoch": 0.8289327941062827, "grad_norm": 2.3640644550323486, "learning_rate": 1.7180911152695256e-05, "loss": 0.43180265426635744, "step": 193080 }, { "epoch": 0.8289757261963027, "grad_norm": 0.015924545004963875, "learning_rate": 1.7176599432577634e-05, "loss": 0.2639934062957764, "step": 193090 }, { "epoch": 0.8290186582863227, "grad_norm": 2.248006582260132, "learning_rate": 1.7172287712460008e-05, "loss": 0.3951799631118774, "step": 193100 }, { "epoch": 0.8290615903763427, "grad_norm": 0.01186326239258051, "learning_rate": 1.7167975992342385e-05, "loss": 0.1399616003036499, "step": 193110 }, { "epoch": 0.8291045224663627, "grad_norm": 0.025430859997868538, "learning_rate": 1.7163664272224762e-05, "loss": 0.1432553768157959, "step": 193120 }, { "epoch": 0.8291474545563827, "grad_norm": 28.576847076416016, "learning_rate": 1.715935255210714e-05, "loss": 0.48826937675476073, "step": 193130 }, { "epoch": 0.8291903866464028, "grad_norm": 4.05294942855835, "learning_rate": 1.7155040831989514e-05, "loss": 0.41413297653198244, "step": 193140 }, { "epoch": 0.8292333187364227, "grad_norm": 0.5668222904205322, "learning_rate": 1.715072911187189e-05, "loss": 0.14205758571624755, "step": 193150 }, { "epoch": 0.8292762508264427, "grad_norm": 0.2314767837524414, "learning_rate": 1.714641739175427e-05, "loss": 0.17212096452713013, "step": 193160 }, { "epoch": 0.8293191829164628, "grad_norm": 7.710951805114746, "learning_rate": 1.7142105671636642e-05, "loss": 0.2715949773788452, "step": 193170 }, { "epoch": 0.8293621150064827, "grad_norm": 0.03295942768454552, "learning_rate": 1.713779395151902e-05, "loss": 0.17002564668655396, "step": 193180 }, { "epoch": 0.8294050470965028, "grad_norm": 2.842479944229126, "learning_rate": 1.7133482231401397e-05, "loss": 0.27584826946258545, "step": 193190 }, { "epoch": 0.8294479791865228, "grad_norm": 0.44256696105003357, "learning_rate": 1.7129170511283774e-05, "loss": 0.15299785137176514, "step": 193200 }, { "epoch": 0.8294909112765427, "grad_norm": 1.9088733196258545, "learning_rate": 1.712485879116615e-05, "loss": 0.27400665283203124, "step": 193210 }, { "epoch": 0.8295338433665628, "grad_norm": 0.3094813823699951, "learning_rate": 1.7120547071048526e-05, "loss": 0.06840046644210815, "step": 193220 }, { "epoch": 0.8295767754565828, "grad_norm": 0.250940203666687, "learning_rate": 1.71162353509309e-05, "loss": 0.14590828418731688, "step": 193230 }, { "epoch": 0.8296197075466027, "grad_norm": 0.02940000407397747, "learning_rate": 1.7111923630813277e-05, "loss": 0.0903593122959137, "step": 193240 }, { "epoch": 0.8296626396366228, "grad_norm": 0.004827784840017557, "learning_rate": 1.7107611910695654e-05, "loss": 0.12426952123641968, "step": 193250 }, { "epoch": 0.8297055717266428, "grad_norm": 0.0651189386844635, "learning_rate": 1.710330019057803e-05, "loss": 0.1715847373008728, "step": 193260 }, { "epoch": 0.8297485038166628, "grad_norm": 0.1649560183286667, "learning_rate": 1.7098988470460406e-05, "loss": 0.23831121921539306, "step": 193270 }, { "epoch": 0.8297914359066828, "grad_norm": 0.007706853561103344, "learning_rate": 1.7094676750342783e-05, "loss": 0.2472676992416382, "step": 193280 }, { "epoch": 0.8298343679967028, "grad_norm": 2.9978578090667725, "learning_rate": 1.7090365030225157e-05, "loss": 0.2791655778884888, "step": 193290 }, { "epoch": 0.8298773000867228, "grad_norm": 0.02437140978872776, "learning_rate": 1.7086053310107538e-05, "loss": 0.172445547580719, "step": 193300 }, { "epoch": 0.8299202321767428, "grad_norm": 0.02160993218421936, "learning_rate": 1.708174158998991e-05, "loss": 0.11930578947067261, "step": 193310 }, { "epoch": 0.8299631642667629, "grad_norm": 0.06851465255022049, "learning_rate": 1.707742986987229e-05, "loss": 0.17158340215682982, "step": 193320 }, { "epoch": 0.8300060963567828, "grad_norm": 0.004634546581655741, "learning_rate": 1.7073118149754663e-05, "loss": 0.004219388589262962, "step": 193330 }, { "epoch": 0.8300490284468028, "grad_norm": 0.07137849926948547, "learning_rate": 1.706880642963704e-05, "loss": 0.08486306667327881, "step": 193340 }, { "epoch": 0.8300919605368229, "grad_norm": 0.10204579681158066, "learning_rate": 1.7064494709519414e-05, "loss": 0.07411643266677856, "step": 193350 }, { "epoch": 0.8301348926268428, "grad_norm": 0.038510385900735855, "learning_rate": 1.7060182989401795e-05, "loss": 0.23881709575653076, "step": 193360 }, { "epoch": 0.8301778247168629, "grad_norm": 0.030046027153730392, "learning_rate": 1.705587126928417e-05, "loss": 0.07068468332290649, "step": 193370 }, { "epoch": 0.8302207568068829, "grad_norm": 0.5060645937919617, "learning_rate": 1.7051559549166546e-05, "loss": 0.33170912265777586, "step": 193380 }, { "epoch": 0.8302636888969028, "grad_norm": 0.07317323982715607, "learning_rate": 1.704724782904892e-05, "loss": 0.13146320581436158, "step": 193390 }, { "epoch": 0.8303066209869229, "grad_norm": 0.9889684319496155, "learning_rate": 1.7042936108931298e-05, "loss": 0.3599538803100586, "step": 193400 }, { "epoch": 0.8303495530769429, "grad_norm": 0.009405163116753101, "learning_rate": 1.7038624388813675e-05, "loss": 0.09876270294189453, "step": 193410 }, { "epoch": 0.8303924851669628, "grad_norm": 0.1360063999891281, "learning_rate": 1.7034312668696052e-05, "loss": 0.12787834405899048, "step": 193420 }, { "epoch": 0.8304354172569829, "grad_norm": 0.08976658433675766, "learning_rate": 1.7030000948578426e-05, "loss": 0.09479953050613403, "step": 193430 }, { "epoch": 0.8304783493470029, "grad_norm": 0.015315504744648933, "learning_rate": 1.7025689228460803e-05, "loss": 0.20027387142181396, "step": 193440 }, { "epoch": 0.8305212814370229, "grad_norm": 0.015854516997933388, "learning_rate": 1.7021377508343177e-05, "loss": 0.2696486711502075, "step": 193450 }, { "epoch": 0.8305642135270429, "grad_norm": 0.015382854267954826, "learning_rate": 1.7017065788225555e-05, "loss": 0.04382437169551849, "step": 193460 }, { "epoch": 0.830607145617063, "grad_norm": 1.2499767541885376, "learning_rate": 1.7012754068107932e-05, "loss": 0.2440438985824585, "step": 193470 }, { "epoch": 0.830650077707083, "grad_norm": 0.0012643365189433098, "learning_rate": 1.700844234799031e-05, "loss": 0.29134511947631836, "step": 193480 }, { "epoch": 0.8306930097971029, "grad_norm": 2.6301071643829346, "learning_rate": 1.7004130627872683e-05, "loss": 0.42733354568481446, "step": 193490 }, { "epoch": 0.830735941887123, "grad_norm": 0.018867680802941322, "learning_rate": 1.699981890775506e-05, "loss": 0.12103630304336548, "step": 193500 }, { "epoch": 0.830778873977143, "grad_norm": 0.5415545105934143, "learning_rate": 1.6995507187637435e-05, "loss": 0.18488938808441163, "step": 193510 }, { "epoch": 0.8308218060671629, "grad_norm": 10.19937801361084, "learning_rate": 1.6991195467519812e-05, "loss": 0.5037790775299072, "step": 193520 }, { "epoch": 0.830864738157183, "grad_norm": 0.039546895772218704, "learning_rate": 1.698688374740219e-05, "loss": 0.08394562602043151, "step": 193530 }, { "epoch": 0.830907670247203, "grad_norm": 0.06411205977201462, "learning_rate": 1.6982572027284567e-05, "loss": 0.19905096292495728, "step": 193540 }, { "epoch": 0.830950602337223, "grad_norm": 0.005737802479416132, "learning_rate": 1.6978260307166944e-05, "loss": 0.1987488627433777, "step": 193550 }, { "epoch": 0.830993534427243, "grad_norm": 0.017030853778123856, "learning_rate": 1.6973948587049318e-05, "loss": 0.21782851219177246, "step": 193560 }, { "epoch": 0.831036466517263, "grad_norm": 0.2145611047744751, "learning_rate": 1.6969636866931695e-05, "loss": 0.23176918029785157, "step": 193570 }, { "epoch": 0.831079398607283, "grad_norm": 0.0035232685040682554, "learning_rate": 1.696532514681407e-05, "loss": 0.004082301631569862, "step": 193580 }, { "epoch": 0.831122330697303, "grad_norm": 1.625364899635315, "learning_rate": 1.6961013426696447e-05, "loss": 0.23201777935028076, "step": 193590 }, { "epoch": 0.8311652627873231, "grad_norm": 0.777439534664154, "learning_rate": 1.6956701706578824e-05, "loss": 0.0941228985786438, "step": 193600 }, { "epoch": 0.831208194877343, "grad_norm": 0.0933847650885582, "learning_rate": 1.69523899864612e-05, "loss": 0.027557867765426635, "step": 193610 }, { "epoch": 0.831251126967363, "grad_norm": 0.3298414945602417, "learning_rate": 1.6948078266343575e-05, "loss": 0.3442185878753662, "step": 193620 }, { "epoch": 0.8312940590573831, "grad_norm": 0.032382167875766754, "learning_rate": 1.6943766546225953e-05, "loss": 0.14262547492980956, "step": 193630 }, { "epoch": 0.831336991147403, "grad_norm": 0.09118391573429108, "learning_rate": 1.6939454826108327e-05, "loss": 0.1025362491607666, "step": 193640 }, { "epoch": 0.831379923237423, "grad_norm": 0.010061063803732395, "learning_rate": 1.6935143105990704e-05, "loss": 0.08400711417198181, "step": 193650 }, { "epoch": 0.8314228553274431, "grad_norm": 1.2069307565689087, "learning_rate": 1.693083138587308e-05, "loss": 0.2209791660308838, "step": 193660 }, { "epoch": 0.831465787417463, "grad_norm": 2.28737735748291, "learning_rate": 1.692651966575546e-05, "loss": 0.1758479356765747, "step": 193670 }, { "epoch": 0.8315087195074831, "grad_norm": 1.1508358716964722, "learning_rate": 1.6922207945637833e-05, "loss": 0.13885608911514283, "step": 193680 }, { "epoch": 0.8315516515975031, "grad_norm": 0.05167385935783386, "learning_rate": 1.691789622552021e-05, "loss": 0.2271339178085327, "step": 193690 }, { "epoch": 0.831594583687523, "grad_norm": 0.10447119921445847, "learning_rate": 1.6913584505402584e-05, "loss": 0.08349364995956421, "step": 193700 }, { "epoch": 0.8316375157775431, "grad_norm": 0.01147378422319889, "learning_rate": 1.6909272785284965e-05, "loss": 0.09823180437088012, "step": 193710 }, { "epoch": 0.8316804478675631, "grad_norm": 1.3284432888031006, "learning_rate": 1.690496106516734e-05, "loss": 0.1961848258972168, "step": 193720 }, { "epoch": 0.831723379957583, "grad_norm": 5.696112155914307, "learning_rate": 1.6900649345049716e-05, "loss": 0.2415191650390625, "step": 193730 }, { "epoch": 0.8317663120476031, "grad_norm": 12.25600528717041, "learning_rate": 1.689633762493209e-05, "loss": 0.21733403205871582, "step": 193740 }, { "epoch": 0.8318092441376231, "grad_norm": 0.0029865370597690344, "learning_rate": 1.6892025904814467e-05, "loss": 0.2730604887008667, "step": 193750 }, { "epoch": 0.8318521762276431, "grad_norm": 0.19473698735237122, "learning_rate": 1.688771418469684e-05, "loss": 0.0037568826228380203, "step": 193760 }, { "epoch": 0.8318951083176631, "grad_norm": 0.04992964491248131, "learning_rate": 1.6883402464579222e-05, "loss": 0.19734233617782593, "step": 193770 }, { "epoch": 0.8319380404076832, "grad_norm": 0.1691906601190567, "learning_rate": 1.6879090744461596e-05, "loss": 0.10528583526611328, "step": 193780 }, { "epoch": 0.8319809724977031, "grad_norm": 0.0038565825670957565, "learning_rate": 1.6874779024343973e-05, "loss": 0.4071638107299805, "step": 193790 }, { "epoch": 0.8320239045877231, "grad_norm": 0.030594274401664734, "learning_rate": 1.6870467304226347e-05, "loss": 0.16759810447692872, "step": 193800 }, { "epoch": 0.8320668366777432, "grad_norm": 1.1685280799865723, "learning_rate": 1.6866155584108725e-05, "loss": 0.1373188853263855, "step": 193810 }, { "epoch": 0.8321097687677631, "grad_norm": 2.992621660232544, "learning_rate": 1.6861843863991102e-05, "loss": 0.22395699024200438, "step": 193820 }, { "epoch": 0.8321527008577831, "grad_norm": 0.08463391661643982, "learning_rate": 1.685753214387348e-05, "loss": 0.25692574977874755, "step": 193830 }, { "epoch": 0.8321956329478032, "grad_norm": 0.002346848836168647, "learning_rate": 1.6853220423755853e-05, "loss": 0.3371147394180298, "step": 193840 }, { "epoch": 0.8322385650378231, "grad_norm": 0.02107856795191765, "learning_rate": 1.684890870363823e-05, "loss": 0.0994668424129486, "step": 193850 }, { "epoch": 0.8322814971278432, "grad_norm": 0.007043390069156885, "learning_rate": 1.6844596983520604e-05, "loss": 0.26696507930755614, "step": 193860 }, { "epoch": 0.8323244292178632, "grad_norm": 2.924276351928711, "learning_rate": 1.6840285263402982e-05, "loss": 0.24864647388458253, "step": 193870 }, { "epoch": 0.8323673613078831, "grad_norm": 1.4473538398742676, "learning_rate": 1.683597354328536e-05, "loss": 0.18037652969360352, "step": 193880 }, { "epoch": 0.8324102933979032, "grad_norm": 2.4899935722351074, "learning_rate": 1.6831661823167736e-05, "loss": 0.14435986280441285, "step": 193890 }, { "epoch": 0.8324532254879232, "grad_norm": 0.00426316587254405, "learning_rate": 1.6827350103050114e-05, "loss": 0.39287576675415037, "step": 193900 }, { "epoch": 0.8324961575779433, "grad_norm": 0.003730037249624729, "learning_rate": 1.6823038382932488e-05, "loss": 0.06763601303100586, "step": 193910 }, { "epoch": 0.8325390896679632, "grad_norm": 0.04255102202296257, "learning_rate": 1.6818726662814865e-05, "loss": 0.20267326831817628, "step": 193920 }, { "epoch": 0.8325820217579832, "grad_norm": 0.03916976973414421, "learning_rate": 1.681441494269724e-05, "loss": 0.1570077896118164, "step": 193930 }, { "epoch": 0.8326249538480033, "grad_norm": 0.331269770860672, "learning_rate": 1.6810103222579616e-05, "loss": 0.21268806457519532, "step": 193940 }, { "epoch": 0.8326678859380232, "grad_norm": 0.013200412504374981, "learning_rate": 1.6805791502461994e-05, "loss": 0.24857487678527831, "step": 193950 }, { "epoch": 0.8327108180280433, "grad_norm": 0.13833187520503998, "learning_rate": 1.680147978234437e-05, "loss": 0.00927966982126236, "step": 193960 }, { "epoch": 0.8327537501180633, "grad_norm": 0.020017297938466072, "learning_rate": 1.6797168062226745e-05, "loss": 0.046907836198806764, "step": 193970 }, { "epoch": 0.8327966822080832, "grad_norm": 0.0073755038902163506, "learning_rate": 1.6792856342109122e-05, "loss": 0.27490532398223877, "step": 193980 }, { "epoch": 0.8328396142981033, "grad_norm": 0.07011353224515915, "learning_rate": 1.6788544621991496e-05, "loss": 0.30378525257110595, "step": 193990 }, { "epoch": 0.8328825463881233, "grad_norm": 0.0438222661614418, "learning_rate": 1.6784232901873874e-05, "loss": 0.0050745390355587, "step": 194000 }, { "epoch": 0.8328825463881233, "eval_loss": 0.3831603527069092, "eval_runtime": 27.471, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 194000 }, { "epoch": 0.8329254784781432, "grad_norm": 1.8510911464691162, "learning_rate": 1.677992118175625e-05, "loss": 0.2678192138671875, "step": 194010 }, { "epoch": 0.8329684105681633, "grad_norm": 0.7535163164138794, "learning_rate": 1.677560946163863e-05, "loss": 0.3478584289550781, "step": 194020 }, { "epoch": 0.8330113426581833, "grad_norm": 0.014061828143894672, "learning_rate": 1.6771297741521002e-05, "loss": 0.09568552970886231, "step": 194030 }, { "epoch": 0.8330542747482033, "grad_norm": 2.01088285446167, "learning_rate": 1.676698602140338e-05, "loss": 0.41285243034362795, "step": 194040 }, { "epoch": 0.8330972068382233, "grad_norm": 0.00961384829133749, "learning_rate": 1.6762674301285754e-05, "loss": 0.2360004425048828, "step": 194050 }, { "epoch": 0.8331401389282433, "grad_norm": 166.18780517578125, "learning_rate": 1.6758362581168134e-05, "loss": 0.2200385332107544, "step": 194060 }, { "epoch": 0.8331830710182633, "grad_norm": 0.002402801066637039, "learning_rate": 1.675405086105051e-05, "loss": 0.3726295709609985, "step": 194070 }, { "epoch": 0.8332260031082833, "grad_norm": 0.007039608899503946, "learning_rate": 1.6749739140932886e-05, "loss": 0.06041044592857361, "step": 194080 }, { "epoch": 0.8332689351983034, "grad_norm": 0.04404909536242485, "learning_rate": 1.674542742081526e-05, "loss": 0.10072480440139771, "step": 194090 }, { "epoch": 0.8333118672883233, "grad_norm": 0.13793088495731354, "learning_rate": 1.6741115700697637e-05, "loss": 0.053259152173995974, "step": 194100 }, { "epoch": 0.8333547993783433, "grad_norm": 0.02142341621220112, "learning_rate": 1.673680398058001e-05, "loss": 0.10477865934371948, "step": 194110 }, { "epoch": 0.8333977314683634, "grad_norm": 0.015102100558578968, "learning_rate": 1.673249226046239e-05, "loss": 0.21803498268127441, "step": 194120 }, { "epoch": 0.8334406635583833, "grad_norm": 0.39184778928756714, "learning_rate": 1.6728180540344766e-05, "loss": 0.15625743865966796, "step": 194130 }, { "epoch": 0.8334835956484034, "grad_norm": 0.16577816009521484, "learning_rate": 1.6723868820227143e-05, "loss": 0.1322509765625, "step": 194140 }, { "epoch": 0.8335265277384234, "grad_norm": 0.35797154903411865, "learning_rate": 1.6719557100109517e-05, "loss": 0.20927348136901855, "step": 194150 }, { "epoch": 0.8335694598284433, "grad_norm": 0.7359702587127686, "learning_rate": 1.6715245379991894e-05, "loss": 0.059522110223770144, "step": 194160 }, { "epoch": 0.8336123919184634, "grad_norm": 0.015525028109550476, "learning_rate": 1.671093365987427e-05, "loss": 0.23056964874267577, "step": 194170 }, { "epoch": 0.8336553240084834, "grad_norm": 0.009239768609404564, "learning_rate": 1.670662193975665e-05, "loss": 0.0353708952665329, "step": 194180 }, { "epoch": 0.8336982560985033, "grad_norm": 0.00043988885590806603, "learning_rate": 1.6702310219639023e-05, "loss": 0.2876828670501709, "step": 194190 }, { "epoch": 0.8337411881885234, "grad_norm": 0.0003478638536762446, "learning_rate": 1.66979984995214e-05, "loss": 0.12474907636642456, "step": 194200 }, { "epoch": 0.8337841202785434, "grad_norm": 1.505427360534668, "learning_rate": 1.6693686779403774e-05, "loss": 0.16469504833221435, "step": 194210 }, { "epoch": 0.8338270523685634, "grad_norm": 1.9175589084625244, "learning_rate": 1.668937505928615e-05, "loss": 0.3978353261947632, "step": 194220 }, { "epoch": 0.8338699844585834, "grad_norm": 0.22217345237731934, "learning_rate": 1.668506333916853e-05, "loss": 0.16778547763824464, "step": 194230 }, { "epoch": 0.8339129165486034, "grad_norm": 2.0455987453460693, "learning_rate": 1.6680751619050906e-05, "loss": 0.2525254011154175, "step": 194240 }, { "epoch": 0.8339558486386234, "grad_norm": 0.008584201335906982, "learning_rate": 1.6676439898933284e-05, "loss": 0.22424793243408203, "step": 194250 }, { "epoch": 0.8339987807286434, "grad_norm": 1.2639451026916504, "learning_rate": 1.6672128178815658e-05, "loss": 0.2725630283355713, "step": 194260 }, { "epoch": 0.8340417128186635, "grad_norm": 0.06333382427692413, "learning_rate": 1.6667816458698035e-05, "loss": 0.20261473655700685, "step": 194270 }, { "epoch": 0.8340846449086834, "grad_norm": 0.17632277309894562, "learning_rate": 1.666350473858041e-05, "loss": 0.06435710787773133, "step": 194280 }, { "epoch": 0.8341275769987034, "grad_norm": 0.01805727928876877, "learning_rate": 1.6659193018462786e-05, "loss": 0.1538480281829834, "step": 194290 }, { "epoch": 0.8341705090887235, "grad_norm": 0.0022346056066453457, "learning_rate": 1.6654881298345164e-05, "loss": 0.2566747426986694, "step": 194300 }, { "epoch": 0.8342134411787434, "grad_norm": 1.2149933576583862, "learning_rate": 1.665056957822754e-05, "loss": 0.15987224578857423, "step": 194310 }, { "epoch": 0.8342563732687635, "grad_norm": 0.003662853967398405, "learning_rate": 1.6646257858109915e-05, "loss": 0.2221593141555786, "step": 194320 }, { "epoch": 0.8342993053587835, "grad_norm": 1.8059449195861816, "learning_rate": 1.6641946137992292e-05, "loss": 0.10572590827941894, "step": 194330 }, { "epoch": 0.8343422374488035, "grad_norm": 0.08389879763126373, "learning_rate": 1.6637634417874666e-05, "loss": 0.24218251705169677, "step": 194340 }, { "epoch": 0.8343851695388235, "grad_norm": 0.18546342849731445, "learning_rate": 1.6633322697757043e-05, "loss": 0.15501357316970826, "step": 194350 }, { "epoch": 0.8344281016288435, "grad_norm": 0.1114467978477478, "learning_rate": 1.662901097763942e-05, "loss": 0.20605828762054443, "step": 194360 }, { "epoch": 0.8344710337188636, "grad_norm": 0.008445865474641323, "learning_rate": 1.6624699257521798e-05, "loss": 0.14372949600219725, "step": 194370 }, { "epoch": 0.8345139658088835, "grad_norm": 1.4896812438964844, "learning_rate": 1.6620387537404172e-05, "loss": 0.2524362564086914, "step": 194380 }, { "epoch": 0.8345568978989035, "grad_norm": 0.013691961765289307, "learning_rate": 1.661607581728655e-05, "loss": 0.14471871852874757, "step": 194390 }, { "epoch": 0.8345998299889236, "grad_norm": 4.92308235168457, "learning_rate": 1.6611764097168923e-05, "loss": 0.2559764862060547, "step": 194400 }, { "epoch": 0.8346427620789435, "grad_norm": 2.5394248962402344, "learning_rate": 1.66074523770513e-05, "loss": 0.24362993240356445, "step": 194410 }, { "epoch": 0.8346856941689635, "grad_norm": 0.277831107378006, "learning_rate": 1.6603140656933678e-05, "loss": 0.12817366123199464, "step": 194420 }, { "epoch": 0.8347286262589836, "grad_norm": 0.0360804982483387, "learning_rate": 1.6598828936816055e-05, "loss": 0.07550318837165833, "step": 194430 }, { "epoch": 0.8347715583490035, "grad_norm": 0.03386925160884857, "learning_rate": 1.659451721669843e-05, "loss": 0.06955471634864807, "step": 194440 }, { "epoch": 0.8348144904390236, "grad_norm": 0.004523274954408407, "learning_rate": 1.6590205496580807e-05, "loss": 0.12661986351013182, "step": 194450 }, { "epoch": 0.8348574225290436, "grad_norm": 0.06306543201208115, "learning_rate": 1.658589377646318e-05, "loss": 0.16889942884445192, "step": 194460 }, { "epoch": 0.8349003546190635, "grad_norm": 3.41239595413208, "learning_rate": 1.658158205634556e-05, "loss": 0.2694386959075928, "step": 194470 }, { "epoch": 0.8349432867090836, "grad_norm": 0.033036649227142334, "learning_rate": 1.6577270336227935e-05, "loss": 0.1256989598274231, "step": 194480 }, { "epoch": 0.8349862187991036, "grad_norm": 0.1092417761683464, "learning_rate": 1.6572958616110313e-05, "loss": 0.08331429362297058, "step": 194490 }, { "epoch": 0.8350291508891236, "grad_norm": 1.1124284267425537, "learning_rate": 1.6568646895992687e-05, "loss": 0.2619120359420776, "step": 194500 }, { "epoch": 0.8350720829791436, "grad_norm": 0.020524216815829277, "learning_rate": 1.6564335175875064e-05, "loss": 0.12931500673294066, "step": 194510 }, { "epoch": 0.8351150150691636, "grad_norm": 5.772107124328613, "learning_rate": 1.6560023455757438e-05, "loss": 0.1707077741622925, "step": 194520 }, { "epoch": 0.8351579471591836, "grad_norm": 1.7792444229125977, "learning_rate": 1.655571173563982e-05, "loss": 0.21941332817077636, "step": 194530 }, { "epoch": 0.8352008792492036, "grad_norm": 0.0069997734390199184, "learning_rate": 1.6551400015522193e-05, "loss": 0.29697065353393554, "step": 194540 }, { "epoch": 0.8352438113392237, "grad_norm": 0.35176554322242737, "learning_rate": 1.654708829540457e-05, "loss": 0.37339417934417723, "step": 194550 }, { "epoch": 0.8352867434292436, "grad_norm": 0.1942061185836792, "learning_rate": 1.6542776575286944e-05, "loss": 0.276427698135376, "step": 194560 }, { "epoch": 0.8353296755192636, "grad_norm": 1.3757528066635132, "learning_rate": 1.653846485516932e-05, "loss": 0.19914722442626953, "step": 194570 }, { "epoch": 0.8353726076092837, "grad_norm": 0.005043280776590109, "learning_rate": 1.65341531350517e-05, "loss": 0.24934778213500977, "step": 194580 }, { "epoch": 0.8354155396993036, "grad_norm": 0.06708847731351852, "learning_rate": 1.6529841414934076e-05, "loss": 0.16489659547805785, "step": 194590 }, { "epoch": 0.8354584717893236, "grad_norm": 2.7416841983795166, "learning_rate": 1.652552969481645e-05, "loss": 0.30127294063568116, "step": 194600 }, { "epoch": 0.8355014038793437, "grad_norm": 0.2182602882385254, "learning_rate": 1.6521217974698827e-05, "loss": 0.45470247268676756, "step": 194610 }, { "epoch": 0.8355443359693636, "grad_norm": 1.337004542350769, "learning_rate": 1.6516906254581205e-05, "loss": 0.27136123180389404, "step": 194620 }, { "epoch": 0.8355872680593837, "grad_norm": 0.018352841958403587, "learning_rate": 1.651259453446358e-05, "loss": 0.3394086122512817, "step": 194630 }, { "epoch": 0.8356302001494037, "grad_norm": 0.003774836892262101, "learning_rate": 1.6508282814345956e-05, "loss": 0.0814660906791687, "step": 194640 }, { "epoch": 0.8356731322394236, "grad_norm": 0.007637556176632643, "learning_rate": 1.6503971094228333e-05, "loss": 0.1790636420249939, "step": 194650 }, { "epoch": 0.8357160643294437, "grad_norm": 1.6730716228485107, "learning_rate": 1.649965937411071e-05, "loss": 0.29316766262054444, "step": 194660 }, { "epoch": 0.8357589964194637, "grad_norm": 0.005538949277251959, "learning_rate": 1.6495347653993085e-05, "loss": 0.16200257539749147, "step": 194670 }, { "epoch": 0.8358019285094836, "grad_norm": 1.8531112670898438, "learning_rate": 1.6491035933875462e-05, "loss": 0.3488029718399048, "step": 194680 }, { "epoch": 0.8358448605995037, "grad_norm": 1.9031802415847778, "learning_rate": 1.6486724213757836e-05, "loss": 0.4225339412689209, "step": 194690 }, { "epoch": 0.8358877926895237, "grad_norm": 0.03141864761710167, "learning_rate": 1.6482412493640213e-05, "loss": 0.07720988988876343, "step": 194700 }, { "epoch": 0.8359307247795437, "grad_norm": 4.636709690093994, "learning_rate": 1.647810077352259e-05, "loss": 0.2965707778930664, "step": 194710 }, { "epoch": 0.8359736568695637, "grad_norm": 0.011487384326756, "learning_rate": 1.6473789053404968e-05, "loss": 0.17508405447006226, "step": 194720 }, { "epoch": 0.8360165889595838, "grad_norm": 0.00859050638973713, "learning_rate": 1.6469477333287342e-05, "loss": 0.2810189962387085, "step": 194730 }, { "epoch": 0.8360595210496037, "grad_norm": 0.008925511501729488, "learning_rate": 1.646516561316972e-05, "loss": 0.01954289376735687, "step": 194740 }, { "epoch": 0.8361024531396237, "grad_norm": 4.719195365905762, "learning_rate": 1.6460853893052093e-05, "loss": 0.33624041080474854, "step": 194750 }, { "epoch": 0.8361453852296438, "grad_norm": 129.30929565429688, "learning_rate": 1.645654217293447e-05, "loss": 0.23399410247802735, "step": 194760 }, { "epoch": 0.8361883173196638, "grad_norm": 0.4405730068683624, "learning_rate": 1.6452230452816848e-05, "loss": 0.1191325068473816, "step": 194770 }, { "epoch": 0.8362312494096837, "grad_norm": 0.0006891182856634259, "learning_rate": 1.6447918732699225e-05, "loss": 0.3420095443725586, "step": 194780 }, { "epoch": 0.8362741814997038, "grad_norm": 0.7859971523284912, "learning_rate": 1.64436070125816e-05, "loss": 0.3719822645187378, "step": 194790 }, { "epoch": 0.8363171135897238, "grad_norm": 3.2730681896209717, "learning_rate": 1.6439295292463976e-05, "loss": 0.41228413581848145, "step": 194800 }, { "epoch": 0.8363600456797438, "grad_norm": 1.8419042825698853, "learning_rate": 1.643498357234635e-05, "loss": 0.18629003763198854, "step": 194810 }, { "epoch": 0.8364029777697638, "grad_norm": 0.25675979256629944, "learning_rate": 1.643067185222873e-05, "loss": 0.06031713485717773, "step": 194820 }, { "epoch": 0.8364459098597838, "grad_norm": 0.029397347941994667, "learning_rate": 1.6426360132111105e-05, "loss": 0.22633352279663085, "step": 194830 }, { "epoch": 0.8364888419498038, "grad_norm": 0.1383998543024063, "learning_rate": 1.6422048411993482e-05, "loss": 0.1639503836631775, "step": 194840 }, { "epoch": 0.8365317740398238, "grad_norm": 1.5284881591796875, "learning_rate": 1.6417736691875856e-05, "loss": 0.16716455221176146, "step": 194850 }, { "epoch": 0.8365747061298439, "grad_norm": 0.769423246383667, "learning_rate": 1.6413424971758234e-05, "loss": 0.2911246299743652, "step": 194860 }, { "epoch": 0.8366176382198638, "grad_norm": 0.06036985665559769, "learning_rate": 1.6409113251640608e-05, "loss": 0.16936222314834595, "step": 194870 }, { "epoch": 0.8366605703098838, "grad_norm": 4.0209879875183105, "learning_rate": 1.640480153152299e-05, "loss": 0.23061556816101075, "step": 194880 }, { "epoch": 0.8367035023999039, "grad_norm": 2.87886118888855, "learning_rate": 1.6400489811405362e-05, "loss": 0.1437964677810669, "step": 194890 }, { "epoch": 0.8367464344899238, "grad_norm": 0.0012091115349903703, "learning_rate": 1.639617809128774e-05, "loss": 0.10468262434005737, "step": 194900 }, { "epoch": 0.8367893665799438, "grad_norm": 6.849912643432617, "learning_rate": 1.6391866371170114e-05, "loss": 0.3797173500061035, "step": 194910 }, { "epoch": 0.8368322986699639, "grad_norm": 0.00793464481830597, "learning_rate": 1.638755465105249e-05, "loss": 0.31881911754608155, "step": 194920 }, { "epoch": 0.8368752307599838, "grad_norm": 0.00296783191151917, "learning_rate": 1.638324293093487e-05, "loss": 0.06018228530883789, "step": 194930 }, { "epoch": 0.8369181628500039, "grad_norm": 0.8054410219192505, "learning_rate": 1.6378931210817246e-05, "loss": 0.2937172412872314, "step": 194940 }, { "epoch": 0.8369610949400239, "grad_norm": 0.054855868220329285, "learning_rate": 1.637461949069962e-05, "loss": 0.28021395206451416, "step": 194950 }, { "epoch": 0.8370040270300438, "grad_norm": 0.05339659005403519, "learning_rate": 1.6370307770581997e-05, "loss": 0.2437211275100708, "step": 194960 }, { "epoch": 0.8370469591200639, "grad_norm": 0.013190588913857937, "learning_rate": 1.636599605046437e-05, "loss": 0.21684250831604004, "step": 194970 }, { "epoch": 0.8370898912100839, "grad_norm": 1.0553587675094604, "learning_rate": 1.6361684330346748e-05, "loss": 0.26926403045654296, "step": 194980 }, { "epoch": 0.8371328233001039, "grad_norm": 0.05917760357260704, "learning_rate": 1.6357372610229126e-05, "loss": 0.1761362671852112, "step": 194990 }, { "epoch": 0.8371757553901239, "grad_norm": 0.020496509969234467, "learning_rate": 1.6353060890111503e-05, "loss": 0.17508223056793212, "step": 195000 }, { "epoch": 0.8371757553901239, "eval_loss": 0.3825985789299011, "eval_runtime": 27.4396, "eval_samples_per_second": 3.644, "eval_steps_per_second": 3.644, "step": 195000 }, { "epoch": 0.8372186874801439, "grad_norm": 0.007341762073338032, "learning_rate": 1.634874916999388e-05, "loss": 0.17335011959075927, "step": 195010 }, { "epoch": 0.8372616195701639, "grad_norm": 2.7988274097442627, "learning_rate": 1.6344437449876254e-05, "loss": 0.09647920131683349, "step": 195020 }, { "epoch": 0.8373045516601839, "grad_norm": 0.16739942133426666, "learning_rate": 1.634012572975863e-05, "loss": 0.2648929595947266, "step": 195030 }, { "epoch": 0.837347483750204, "grad_norm": 0.8594076633453369, "learning_rate": 1.6335814009641006e-05, "loss": 0.121567702293396, "step": 195040 }, { "epoch": 0.8373904158402239, "grad_norm": 0.008997962810099125, "learning_rate": 1.6331502289523383e-05, "loss": 0.3291927814483643, "step": 195050 }, { "epoch": 0.8374333479302439, "grad_norm": 1.8808585405349731, "learning_rate": 1.632719056940576e-05, "loss": 0.2479541301727295, "step": 195060 }, { "epoch": 0.837476280020264, "grad_norm": 0.26992496848106384, "learning_rate": 1.6322878849288138e-05, "loss": 0.31995177268981934, "step": 195070 }, { "epoch": 0.8375192121102839, "grad_norm": 0.7552814483642578, "learning_rate": 1.631856712917051e-05, "loss": 0.37452163696289065, "step": 195080 }, { "epoch": 0.837562144200304, "grad_norm": 0.01972191222012043, "learning_rate": 1.631425540905289e-05, "loss": 0.27466640472412107, "step": 195090 }, { "epoch": 0.837605076290324, "grad_norm": 0.03341887891292572, "learning_rate": 1.6309943688935263e-05, "loss": 0.020095197856426238, "step": 195100 }, { "epoch": 0.8376480083803439, "grad_norm": 0.01714405231177807, "learning_rate": 1.630563196881764e-05, "loss": 0.2320852279663086, "step": 195110 }, { "epoch": 0.837690940470364, "grad_norm": 6.817076683044434, "learning_rate": 1.6301320248700018e-05, "loss": 0.33026838302612305, "step": 195120 }, { "epoch": 0.837733872560384, "grad_norm": 0.2995063364505768, "learning_rate": 1.6297008528582395e-05, "loss": 0.06860451102256775, "step": 195130 }, { "epoch": 0.8377768046504039, "grad_norm": 0.29282864928245544, "learning_rate": 1.629269680846477e-05, "loss": 0.08025183081626892, "step": 195140 }, { "epoch": 0.837819736740424, "grad_norm": 0.6161121726036072, "learning_rate": 1.6288385088347146e-05, "loss": 0.14255157709121705, "step": 195150 }, { "epoch": 0.837862668830444, "grad_norm": 0.008154317736625671, "learning_rate": 1.628407336822952e-05, "loss": 0.19751204252243043, "step": 195160 }, { "epoch": 0.837905600920464, "grad_norm": 0.030738957226276398, "learning_rate": 1.62797616481119e-05, "loss": 0.07990161180496216, "step": 195170 }, { "epoch": 0.837948533010484, "grad_norm": 0.06262635439634323, "learning_rate": 1.6275449927994275e-05, "loss": 0.10323004722595215, "step": 195180 }, { "epoch": 0.837991465100504, "grad_norm": 0.16967739164829254, "learning_rate": 1.6271138207876652e-05, "loss": 0.13956884145736695, "step": 195190 }, { "epoch": 0.8380343971905241, "grad_norm": 0.008536574430763721, "learning_rate": 1.6266826487759026e-05, "loss": 0.06692397594451904, "step": 195200 }, { "epoch": 0.838077329280544, "grad_norm": 0.1352798193693161, "learning_rate": 1.6262514767641403e-05, "loss": 0.40900468826293945, "step": 195210 }, { "epoch": 0.8381202613705641, "grad_norm": 3.675179958343506, "learning_rate": 1.6258203047523777e-05, "loss": 0.3779770374298096, "step": 195220 }, { "epoch": 0.8381631934605841, "grad_norm": 0.16516053676605225, "learning_rate": 1.6253891327406158e-05, "loss": 0.12357484102249146, "step": 195230 }, { "epoch": 0.838206125550604, "grad_norm": 0.1563853621482849, "learning_rate": 1.6249579607288532e-05, "loss": 0.013473853468894958, "step": 195240 }, { "epoch": 0.8382490576406241, "grad_norm": 0.005513192620128393, "learning_rate": 1.624526788717091e-05, "loss": 0.0922442376613617, "step": 195250 }, { "epoch": 0.8382919897306441, "grad_norm": 0.019567089155316353, "learning_rate": 1.6240956167053283e-05, "loss": 0.13599594831466674, "step": 195260 }, { "epoch": 0.838334921820664, "grad_norm": 0.07949855923652649, "learning_rate": 1.623664444693566e-05, "loss": 0.12350625991821289, "step": 195270 }, { "epoch": 0.8383778539106841, "grad_norm": 0.03716310113668442, "learning_rate": 1.6232332726818038e-05, "loss": 0.18347951173782348, "step": 195280 }, { "epoch": 0.8384207860007041, "grad_norm": 0.0044286614283919334, "learning_rate": 1.6228021006700415e-05, "loss": 0.08632364869117737, "step": 195290 }, { "epoch": 0.8384637180907241, "grad_norm": 1.718677043914795, "learning_rate": 1.622370928658279e-05, "loss": 0.31393024921417234, "step": 195300 }, { "epoch": 0.8385066501807441, "grad_norm": 0.00345993647351861, "learning_rate": 1.6219397566465167e-05, "loss": 0.2210599184036255, "step": 195310 }, { "epoch": 0.8385495822707641, "grad_norm": 0.14155049622058868, "learning_rate": 1.621508584634754e-05, "loss": 0.2827406644821167, "step": 195320 }, { "epoch": 0.8385925143607841, "grad_norm": 0.009309999644756317, "learning_rate": 1.6210774126229918e-05, "loss": 0.38733735084533694, "step": 195330 }, { "epoch": 0.8386354464508041, "grad_norm": 0.2578687071800232, "learning_rate": 1.6206462406112295e-05, "loss": 0.13358743190765382, "step": 195340 }, { "epoch": 0.8386783785408242, "grad_norm": 0.024788912385702133, "learning_rate": 1.6202150685994673e-05, "loss": 0.22722995281219482, "step": 195350 }, { "epoch": 0.8387213106308441, "grad_norm": 1.2188549041748047, "learning_rate": 1.619783896587705e-05, "loss": 0.18080921173095704, "step": 195360 }, { "epoch": 0.8387642427208641, "grad_norm": 0.03402552753686905, "learning_rate": 1.6193527245759424e-05, "loss": 0.16962271928787231, "step": 195370 }, { "epoch": 0.8388071748108842, "grad_norm": 1.3098325729370117, "learning_rate": 1.61892155256418e-05, "loss": 0.1280285358428955, "step": 195380 }, { "epoch": 0.8388501069009041, "grad_norm": 0.08495664596557617, "learning_rate": 1.6184903805524175e-05, "loss": 0.10631186962127685, "step": 195390 }, { "epoch": 0.8388930389909242, "grad_norm": 0.005879923235625029, "learning_rate": 1.6180592085406553e-05, "loss": 0.3474308967590332, "step": 195400 }, { "epoch": 0.8389359710809442, "grad_norm": 0.004986526444554329, "learning_rate": 1.617628036528893e-05, "loss": 0.02645334303379059, "step": 195410 }, { "epoch": 0.8389789031709641, "grad_norm": 0.0748465284705162, "learning_rate": 1.6171968645171307e-05, "loss": 0.09948440790176391, "step": 195420 }, { "epoch": 0.8390218352609842, "grad_norm": 0.00232899421826005, "learning_rate": 1.616765692505368e-05, "loss": 0.1367961049079895, "step": 195430 }, { "epoch": 0.8390647673510042, "grad_norm": 0.0858355164527893, "learning_rate": 1.616334520493606e-05, "loss": 0.1316709876060486, "step": 195440 }, { "epoch": 0.8391076994410241, "grad_norm": 0.024373479187488556, "learning_rate": 1.6159033484818433e-05, "loss": 0.35381669998168946, "step": 195450 }, { "epoch": 0.8391506315310442, "grad_norm": 0.007658019196242094, "learning_rate": 1.615472176470081e-05, "loss": 0.1145735502243042, "step": 195460 }, { "epoch": 0.8391935636210642, "grad_norm": 4.139742374420166, "learning_rate": 1.6150410044583187e-05, "loss": 0.20845775604248046, "step": 195470 }, { "epoch": 0.8392364957110842, "grad_norm": 0.07298199832439423, "learning_rate": 1.6146098324465565e-05, "loss": 0.14519520998001098, "step": 195480 }, { "epoch": 0.8392794278011042, "grad_norm": 0.04505956918001175, "learning_rate": 1.614178660434794e-05, "loss": 0.19180811643600465, "step": 195490 }, { "epoch": 0.8393223598911242, "grad_norm": 0.5236793160438538, "learning_rate": 1.6137474884230316e-05, "loss": 0.26501855850219724, "step": 195500 }, { "epoch": 0.8393652919811442, "grad_norm": 1.8800487518310547, "learning_rate": 1.613316316411269e-05, "loss": 0.19736210107803345, "step": 195510 }, { "epoch": 0.8394082240711642, "grad_norm": 0.018839063122868538, "learning_rate": 1.6128851443995067e-05, "loss": 0.20923397541046143, "step": 195520 }, { "epoch": 0.8394511561611843, "grad_norm": 0.009148648008704185, "learning_rate": 1.6124539723877445e-05, "loss": 0.21486268043518067, "step": 195530 }, { "epoch": 0.8394940882512042, "grad_norm": 0.010658112354576588, "learning_rate": 1.6120228003759822e-05, "loss": 0.24198806285858154, "step": 195540 }, { "epoch": 0.8395370203412242, "grad_norm": 2.198223352432251, "learning_rate": 1.6115916283642196e-05, "loss": 0.17943172454833983, "step": 195550 }, { "epoch": 0.8395799524312443, "grad_norm": 0.008235502988100052, "learning_rate": 1.6111604563524573e-05, "loss": 0.1194198489189148, "step": 195560 }, { "epoch": 0.8396228845212642, "grad_norm": 8.622540473937988, "learning_rate": 1.6107292843406947e-05, "loss": 0.2828612565994263, "step": 195570 }, { "epoch": 0.8396658166112843, "grad_norm": 0.01411815918982029, "learning_rate": 1.6102981123289328e-05, "loss": 0.20698959827423097, "step": 195580 }, { "epoch": 0.8397087487013043, "grad_norm": 0.015219231136143208, "learning_rate": 1.6098669403171702e-05, "loss": 0.04689061641693115, "step": 195590 }, { "epoch": 0.8397516807913242, "grad_norm": 7.192612171173096, "learning_rate": 1.609435768305408e-05, "loss": 0.4910551071166992, "step": 195600 }, { "epoch": 0.8397946128813443, "grad_norm": 0.047113098204135895, "learning_rate": 1.6090045962936453e-05, "loss": 0.0928109884262085, "step": 195610 }, { "epoch": 0.8398375449713643, "grad_norm": 2.3072288036346436, "learning_rate": 1.608573424281883e-05, "loss": 0.25140502452850344, "step": 195620 }, { "epoch": 0.8398804770613844, "grad_norm": 0.09699340909719467, "learning_rate": 1.6081422522701204e-05, "loss": 0.02875358462333679, "step": 195630 }, { "epoch": 0.8399234091514043, "grad_norm": 1.5584121942520142, "learning_rate": 1.6077110802583585e-05, "loss": 0.1507789373397827, "step": 195640 }, { "epoch": 0.8399663412414243, "grad_norm": 0.008640035055577755, "learning_rate": 1.607279908246596e-05, "loss": 0.31208076477050783, "step": 195650 }, { "epoch": 0.8400092733314444, "grad_norm": 0.27219441533088684, "learning_rate": 1.6068487362348336e-05, "loss": 0.15312207937240602, "step": 195660 }, { "epoch": 0.8400522054214643, "grad_norm": 0.002600613981485367, "learning_rate": 1.606417564223071e-05, "loss": 0.03642718493938446, "step": 195670 }, { "epoch": 0.8400951375114843, "grad_norm": 0.12953244149684906, "learning_rate": 1.6059863922113088e-05, "loss": 0.12930947542190552, "step": 195680 }, { "epoch": 0.8401380696015044, "grad_norm": 0.0030175873544067144, "learning_rate": 1.6055552201995465e-05, "loss": 0.459410572052002, "step": 195690 }, { "epoch": 0.8401810016915243, "grad_norm": 0.0005476188962347806, "learning_rate": 1.6051240481877842e-05, "loss": 0.2390958547592163, "step": 195700 }, { "epoch": 0.8402239337815444, "grad_norm": 1.5651413202285767, "learning_rate": 1.604692876176022e-05, "loss": 0.08409489393234253, "step": 195710 }, { "epoch": 0.8402668658715644, "grad_norm": 3.797452211380005, "learning_rate": 1.6042617041642594e-05, "loss": 0.22493295669555663, "step": 195720 }, { "epoch": 0.8403097979615843, "grad_norm": 5.24019718170166, "learning_rate": 1.603830532152497e-05, "loss": 0.2507028579711914, "step": 195730 }, { "epoch": 0.8403527300516044, "grad_norm": 0.00809667818248272, "learning_rate": 1.6033993601407345e-05, "loss": 0.20862414836883544, "step": 195740 }, { "epoch": 0.8403956621416244, "grad_norm": 0.08349917829036713, "learning_rate": 1.6029681881289722e-05, "loss": 0.0961777925491333, "step": 195750 }, { "epoch": 0.8404385942316444, "grad_norm": 0.03538598120212555, "learning_rate": 1.60253701611721e-05, "loss": 0.0851999580860138, "step": 195760 }, { "epoch": 0.8404815263216644, "grad_norm": 3.1396093368530273, "learning_rate": 1.6021058441054477e-05, "loss": 0.2021549701690674, "step": 195770 }, { "epoch": 0.8405244584116844, "grad_norm": 2.161872386932373, "learning_rate": 1.601674672093685e-05, "loss": 0.23405060768127442, "step": 195780 }, { "epoch": 0.8405673905017044, "grad_norm": 0.05371826887130737, "learning_rate": 1.601243500081923e-05, "loss": 0.1200188398361206, "step": 195790 }, { "epoch": 0.8406103225917244, "grad_norm": 0.006190289277583361, "learning_rate": 1.6008123280701602e-05, "loss": 0.30011117458343506, "step": 195800 }, { "epoch": 0.8406532546817445, "grad_norm": 0.002181611256673932, "learning_rate": 1.600381156058398e-05, "loss": 0.33170261383056643, "step": 195810 }, { "epoch": 0.8406961867717644, "grad_norm": 0.00914520863443613, "learning_rate": 1.5999499840466357e-05, "loss": 0.008389408886432647, "step": 195820 }, { "epoch": 0.8407391188617844, "grad_norm": 0.03632459416985512, "learning_rate": 1.5995188120348734e-05, "loss": 0.287453818321228, "step": 195830 }, { "epoch": 0.8407820509518045, "grad_norm": 0.5841567516326904, "learning_rate": 1.5990876400231108e-05, "loss": 0.12442935705184936, "step": 195840 }, { "epoch": 0.8408249830418244, "grad_norm": 0.05322708189487457, "learning_rate": 1.5986564680113486e-05, "loss": 0.1795598268508911, "step": 195850 }, { "epoch": 0.8408679151318444, "grad_norm": 0.018948446959257126, "learning_rate": 1.598225295999586e-05, "loss": 0.13776202201843263, "step": 195860 }, { "epoch": 0.8409108472218645, "grad_norm": 2.212151050567627, "learning_rate": 1.5977941239878237e-05, "loss": 0.1870142936706543, "step": 195870 }, { "epoch": 0.8409537793118844, "grad_norm": 1.0908949375152588, "learning_rate": 1.5973629519760614e-05, "loss": 0.12300001382827759, "step": 195880 }, { "epoch": 0.8409967114019045, "grad_norm": 2.6546292304992676, "learning_rate": 1.596931779964299e-05, "loss": 0.0752610445022583, "step": 195890 }, { "epoch": 0.8410396434919245, "grad_norm": 0.012228342704474926, "learning_rate": 1.5965006079525366e-05, "loss": 0.22464075088500976, "step": 195900 }, { "epoch": 0.8410825755819444, "grad_norm": 0.1338385045528412, "learning_rate": 1.5960694359407743e-05, "loss": 0.36805989742279055, "step": 195910 }, { "epoch": 0.8411255076719645, "grad_norm": 0.05296871438622475, "learning_rate": 1.5956382639290117e-05, "loss": 0.18148987293243407, "step": 195920 }, { "epoch": 0.8411684397619845, "grad_norm": 0.025313997641205788, "learning_rate": 1.5952070919172498e-05, "loss": 0.13553870916366578, "step": 195930 }, { "epoch": 0.8412113718520045, "grad_norm": 0.0009583273204043508, "learning_rate": 1.594775919905487e-05, "loss": 0.3775708436965942, "step": 195940 }, { "epoch": 0.8412543039420245, "grad_norm": 0.012733984738588333, "learning_rate": 1.594344747893725e-05, "loss": 0.10729289054870605, "step": 195950 }, { "epoch": 0.8412972360320445, "grad_norm": 0.0589638277888298, "learning_rate": 1.5939135758819623e-05, "loss": 0.29300096035003664, "step": 195960 }, { "epoch": 0.8413401681220645, "grad_norm": 47.52212905883789, "learning_rate": 1.5934824038702e-05, "loss": 0.11700329780578614, "step": 195970 }, { "epoch": 0.8413831002120845, "grad_norm": 0.08651348203420639, "learning_rate": 1.5930512318584374e-05, "loss": 0.119724440574646, "step": 195980 }, { "epoch": 0.8414260323021046, "grad_norm": 0.017822356894612312, "learning_rate": 1.5926200598466755e-05, "loss": 0.12555168867111205, "step": 195990 }, { "epoch": 0.8414689643921245, "grad_norm": 0.001900406088680029, "learning_rate": 1.592188887834913e-05, "loss": 0.24136440753936766, "step": 196000 }, { "epoch": 0.8414689643921245, "eval_loss": 0.3772696256637573, "eval_runtime": 27.5097, "eval_samples_per_second": 3.635, "eval_steps_per_second": 3.635, "step": 196000 }, { "epoch": 0.8415118964821445, "grad_norm": 0.20596669614315033, "learning_rate": 1.5917577158231506e-05, "loss": 0.12156057357788086, "step": 196010 }, { "epoch": 0.8415548285721646, "grad_norm": 0.022826924920082092, "learning_rate": 1.591326543811388e-05, "loss": 0.09349904656410217, "step": 196020 }, { "epoch": 0.8415977606621845, "grad_norm": 4.478949546813965, "learning_rate": 1.5908953717996257e-05, "loss": 0.16890153884887696, "step": 196030 }, { "epoch": 0.8416406927522045, "grad_norm": 0.10414303839206696, "learning_rate": 1.5904641997878635e-05, "loss": 0.25435285568237304, "step": 196040 }, { "epoch": 0.8416836248422246, "grad_norm": 18.744157791137695, "learning_rate": 1.5900330277761012e-05, "loss": 0.23109426498413085, "step": 196050 }, { "epoch": 0.8417265569322446, "grad_norm": 0.00422808388248086, "learning_rate": 1.5896018557643386e-05, "loss": 0.09207351803779602, "step": 196060 }, { "epoch": 0.8417694890222646, "grad_norm": 0.0252239890396595, "learning_rate": 1.5891706837525763e-05, "loss": 0.13819221258163453, "step": 196070 }, { "epoch": 0.8418124211122846, "grad_norm": 0.0008331200224347413, "learning_rate": 1.588739511740814e-05, "loss": 0.24390954971313478, "step": 196080 }, { "epoch": 0.8418553532023046, "grad_norm": 0.004637080244719982, "learning_rate": 1.5883083397290515e-05, "loss": 0.13876519203186036, "step": 196090 }, { "epoch": 0.8418982852923246, "grad_norm": 0.0025159171782433987, "learning_rate": 1.5878771677172892e-05, "loss": 0.3237619400024414, "step": 196100 }, { "epoch": 0.8419412173823446, "grad_norm": 1.4971562623977661, "learning_rate": 1.587445995705527e-05, "loss": 0.08215521574020386, "step": 196110 }, { "epoch": 0.8419841494723647, "grad_norm": 3.8147895336151123, "learning_rate": 1.5870148236937647e-05, "loss": 0.21302051544189454, "step": 196120 }, { "epoch": 0.8420270815623846, "grad_norm": 4.908078193664551, "learning_rate": 1.586583651682002e-05, "loss": 0.5922693252563477, "step": 196130 }, { "epoch": 0.8420700136524046, "grad_norm": 0.05550285801291466, "learning_rate": 1.5861524796702398e-05, "loss": 0.07058622241020203, "step": 196140 }, { "epoch": 0.8421129457424247, "grad_norm": 0.3030736744403839, "learning_rate": 1.5857213076584772e-05, "loss": 0.36227562427520754, "step": 196150 }, { "epoch": 0.8421558778324446, "grad_norm": 0.9248490929603577, "learning_rate": 1.585290135646715e-05, "loss": 0.18082973957061768, "step": 196160 }, { "epoch": 0.8421988099224647, "grad_norm": 0.6012828946113586, "learning_rate": 1.5848589636349527e-05, "loss": 0.18957573175430298, "step": 196170 }, { "epoch": 0.8422417420124847, "grad_norm": 0.40026846528053284, "learning_rate": 1.5844277916231904e-05, "loss": 0.1907409906387329, "step": 196180 }, { "epoch": 0.8422846741025046, "grad_norm": 4.090051651000977, "learning_rate": 1.5839966196114278e-05, "loss": 0.063755863904953, "step": 196190 }, { "epoch": 0.8423276061925247, "grad_norm": 0.009709478355944157, "learning_rate": 1.5835654475996655e-05, "loss": 0.3304955244064331, "step": 196200 }, { "epoch": 0.8423705382825447, "grad_norm": 0.017408445477485657, "learning_rate": 1.583134275587903e-05, "loss": 0.4167612552642822, "step": 196210 }, { "epoch": 0.8424134703725646, "grad_norm": 0.0060608345083892345, "learning_rate": 1.5827031035761407e-05, "loss": 0.1044541597366333, "step": 196220 }, { "epoch": 0.8424564024625847, "grad_norm": 0.011498616077005863, "learning_rate": 1.5822719315643784e-05, "loss": 0.20374600887298583, "step": 196230 }, { "epoch": 0.8424993345526047, "grad_norm": 0.00787246972322464, "learning_rate": 1.581840759552616e-05, "loss": 0.22661259174346923, "step": 196240 }, { "epoch": 0.8425422666426247, "grad_norm": 4.431751728057861, "learning_rate": 1.5814095875408535e-05, "loss": 0.37505383491516114, "step": 196250 }, { "epoch": 0.8425851987326447, "grad_norm": 0.006028730887919664, "learning_rate": 1.5809784155290913e-05, "loss": 0.19709838628768922, "step": 196260 }, { "epoch": 0.8426281308226647, "grad_norm": 0.3250470757484436, "learning_rate": 1.5805472435173287e-05, "loss": 0.2291292667388916, "step": 196270 }, { "epoch": 0.8426710629126847, "grad_norm": 0.46007418632507324, "learning_rate": 1.5801160715055664e-05, "loss": 0.19784101247787475, "step": 196280 }, { "epoch": 0.8427139950027047, "grad_norm": 0.928049623966217, "learning_rate": 1.579684899493804e-05, "loss": 0.10087544918060302, "step": 196290 }, { "epoch": 0.8427569270927248, "grad_norm": 0.02121254988014698, "learning_rate": 1.579253727482042e-05, "loss": 0.20783622264862062, "step": 196300 }, { "epoch": 0.8427998591827447, "grad_norm": 0.1715366542339325, "learning_rate": 1.5788225554702793e-05, "loss": 0.1536109209060669, "step": 196310 }, { "epoch": 0.8428427912727647, "grad_norm": 0.6590801477432251, "learning_rate": 1.578391383458517e-05, "loss": 0.1001272439956665, "step": 196320 }, { "epoch": 0.8428857233627848, "grad_norm": 7.680621147155762, "learning_rate": 1.5779602114467544e-05, "loss": 0.15085554122924805, "step": 196330 }, { "epoch": 0.8429286554528047, "grad_norm": 4.486323833465576, "learning_rate": 1.5775290394349925e-05, "loss": 0.27280516624450685, "step": 196340 }, { "epoch": 0.8429715875428248, "grad_norm": 1.990494728088379, "learning_rate": 1.57709786742323e-05, "loss": 0.11515017747879028, "step": 196350 }, { "epoch": 0.8430145196328448, "grad_norm": 1.1661758422851562, "learning_rate": 1.5766666954114676e-05, "loss": 0.23308405876159669, "step": 196360 }, { "epoch": 0.8430574517228647, "grad_norm": 1.5656630992889404, "learning_rate": 1.576235523399705e-05, "loss": 0.18918917179107667, "step": 196370 }, { "epoch": 0.8431003838128848, "grad_norm": 6.898838520050049, "learning_rate": 1.5758043513879427e-05, "loss": 0.1752528190612793, "step": 196380 }, { "epoch": 0.8431433159029048, "grad_norm": 0.010424407199025154, "learning_rate": 1.57537317937618e-05, "loss": 0.3139214277267456, "step": 196390 }, { "epoch": 0.8431862479929247, "grad_norm": 0.010615244507789612, "learning_rate": 1.5749420073644182e-05, "loss": 0.21709723472595216, "step": 196400 }, { "epoch": 0.8432291800829448, "grad_norm": 1.2221513986587524, "learning_rate": 1.5745108353526556e-05, "loss": 0.08752541542053223, "step": 196410 }, { "epoch": 0.8432721121729648, "grad_norm": 0.0036838948726654053, "learning_rate": 1.5740796633408933e-05, "loss": 0.15428515672683715, "step": 196420 }, { "epoch": 0.8433150442629848, "grad_norm": 1.6186450719833374, "learning_rate": 1.5736484913291307e-05, "loss": 0.019905810058116914, "step": 196430 }, { "epoch": 0.8433579763530048, "grad_norm": 0.002743236254900694, "learning_rate": 1.5732173193173684e-05, "loss": 0.03064911365509033, "step": 196440 }, { "epoch": 0.8434009084430248, "grad_norm": 0.009198924526572227, "learning_rate": 1.5727861473056062e-05, "loss": 0.030152544379234314, "step": 196450 }, { "epoch": 0.8434438405330448, "grad_norm": 0.9507923722267151, "learning_rate": 1.572354975293844e-05, "loss": 0.42665905952453614, "step": 196460 }, { "epoch": 0.8434867726230648, "grad_norm": 0.0016342259477823973, "learning_rate": 1.5719238032820817e-05, "loss": 0.21152596473693847, "step": 196470 }, { "epoch": 0.8435297047130849, "grad_norm": 0.17750652134418488, "learning_rate": 1.571492631270319e-05, "loss": 0.14839037656784057, "step": 196480 }, { "epoch": 0.8435726368031049, "grad_norm": 3.569044589996338, "learning_rate": 1.5710614592585568e-05, "loss": 0.19094862937927246, "step": 196490 }, { "epoch": 0.8436155688931248, "grad_norm": 0.27529528737068176, "learning_rate": 1.5706302872467942e-05, "loss": 0.2316378116607666, "step": 196500 }, { "epoch": 0.8436585009831449, "grad_norm": 0.0013704081065952778, "learning_rate": 1.570199115235032e-05, "loss": 0.03895947635173798, "step": 196510 }, { "epoch": 0.8437014330731649, "grad_norm": 0.020330656319856644, "learning_rate": 1.5697679432232696e-05, "loss": 0.04956190586090088, "step": 196520 }, { "epoch": 0.8437443651631849, "grad_norm": 5.464777946472168, "learning_rate": 1.5693367712115074e-05, "loss": 0.30693516731262205, "step": 196530 }, { "epoch": 0.8437872972532049, "grad_norm": 0.006584883667528629, "learning_rate": 1.5689055991997448e-05, "loss": 0.157961905002594, "step": 196540 }, { "epoch": 0.8438302293432249, "grad_norm": 0.004959666635841131, "learning_rate": 1.5684744271879825e-05, "loss": 0.1256941080093384, "step": 196550 }, { "epoch": 0.8438731614332449, "grad_norm": 0.0028481590561568737, "learning_rate": 1.56804325517622e-05, "loss": 0.011361487209796906, "step": 196560 }, { "epoch": 0.8439160935232649, "grad_norm": 0.01640136167407036, "learning_rate": 1.5676120831644576e-05, "loss": 0.04784930944442749, "step": 196570 }, { "epoch": 0.843959025613285, "grad_norm": 2.794304847717285, "learning_rate": 1.5671809111526954e-05, "loss": 0.22906594276428222, "step": 196580 }, { "epoch": 0.8440019577033049, "grad_norm": 2.3926756381988525, "learning_rate": 1.566749739140933e-05, "loss": 0.15934289693832399, "step": 196590 }, { "epoch": 0.8440448897933249, "grad_norm": 0.094965860247612, "learning_rate": 1.5663185671291705e-05, "loss": 0.13832385540008546, "step": 196600 }, { "epoch": 0.844087821883345, "grad_norm": 1.1906780004501343, "learning_rate": 1.5658873951174082e-05, "loss": 0.2520163536071777, "step": 196610 }, { "epoch": 0.8441307539733649, "grad_norm": 0.026045776903629303, "learning_rate": 1.5654562231056456e-05, "loss": 0.22062888145446777, "step": 196620 }, { "epoch": 0.8441736860633849, "grad_norm": 0.009660093113780022, "learning_rate": 1.5650250510938834e-05, "loss": 0.03899048864841461, "step": 196630 }, { "epoch": 0.844216618153405, "grad_norm": 0.9208100438117981, "learning_rate": 1.564593879082121e-05, "loss": 0.33248515129089357, "step": 196640 }, { "epoch": 0.8442595502434249, "grad_norm": 0.24865712225437164, "learning_rate": 1.564162707070359e-05, "loss": 0.16582545042037963, "step": 196650 }, { "epoch": 0.844302482333445, "grad_norm": 0.021726198494434357, "learning_rate": 1.5637315350585962e-05, "loss": 0.15123635530471802, "step": 196660 }, { "epoch": 0.844345414423465, "grad_norm": 0.06716220825910568, "learning_rate": 1.563300363046834e-05, "loss": 0.20972583293914795, "step": 196670 }, { "epoch": 0.8443883465134849, "grad_norm": 3.435544729232788, "learning_rate": 1.5628691910350714e-05, "loss": 0.07267772555351257, "step": 196680 }, { "epoch": 0.844431278603505, "grad_norm": 0.06330103427171707, "learning_rate": 1.5624380190233094e-05, "loss": 0.10410884618759156, "step": 196690 }, { "epoch": 0.844474210693525, "grad_norm": 0.06830105185508728, "learning_rate": 1.562006847011547e-05, "loss": 0.15068842172622682, "step": 196700 }, { "epoch": 0.844517142783545, "grad_norm": 7.13935661315918, "learning_rate": 1.5615756749997846e-05, "loss": 0.34758007526397705, "step": 196710 }, { "epoch": 0.844560074873565, "grad_norm": 0.0010626811999827623, "learning_rate": 1.561144502988022e-05, "loss": 0.22543506622314452, "step": 196720 }, { "epoch": 0.844603006963585, "grad_norm": 0.6307433843612671, "learning_rate": 1.5607133309762597e-05, "loss": 0.014995664358139038, "step": 196730 }, { "epoch": 0.844645939053605, "grad_norm": 14.586715698242188, "learning_rate": 1.560282158964497e-05, "loss": 0.27491576671600343, "step": 196740 }, { "epoch": 0.844688871143625, "grad_norm": 0.30766063928604126, "learning_rate": 1.559850986952735e-05, "loss": 0.27586333751678466, "step": 196750 }, { "epoch": 0.844731803233645, "grad_norm": 0.016385281458497047, "learning_rate": 1.5594198149409726e-05, "loss": 0.27432804107666015, "step": 196760 }, { "epoch": 0.844774735323665, "grad_norm": 0.1537727415561676, "learning_rate": 1.5589886429292103e-05, "loss": 0.1805056095123291, "step": 196770 }, { "epoch": 0.844817667413685, "grad_norm": 5.115199089050293, "learning_rate": 1.5585574709174477e-05, "loss": 0.2694607973098755, "step": 196780 }, { "epoch": 0.8448605995037051, "grad_norm": 0.0011792482109740376, "learning_rate": 1.5581262989056854e-05, "loss": 0.21872477531433104, "step": 196790 }, { "epoch": 0.844903531593725, "grad_norm": 0.26486822962760925, "learning_rate": 1.557695126893923e-05, "loss": 0.15273507833480834, "step": 196800 }, { "epoch": 0.844946463683745, "grad_norm": 0.014987935312092304, "learning_rate": 1.557263954882161e-05, "loss": 0.1194075584411621, "step": 196810 }, { "epoch": 0.8449893957737651, "grad_norm": 0.001741683459840715, "learning_rate": 1.5568327828703986e-05, "loss": 0.1621376872062683, "step": 196820 }, { "epoch": 0.845032327863785, "grad_norm": 0.01641698181629181, "learning_rate": 1.556401610858636e-05, "loss": 0.08461439609527588, "step": 196830 }, { "epoch": 0.8450752599538051, "grad_norm": 0.2613827884197235, "learning_rate": 1.5559704388468738e-05, "loss": 0.02929074168205261, "step": 196840 }, { "epoch": 0.8451181920438251, "grad_norm": 3.5483486652374268, "learning_rate": 1.555539266835111e-05, "loss": 0.24586338996887208, "step": 196850 }, { "epoch": 0.845161124133845, "grad_norm": 0.2233700454235077, "learning_rate": 1.555108094823349e-05, "loss": 0.5050179958343506, "step": 196860 }, { "epoch": 0.8452040562238651, "grad_norm": 0.036731135100126266, "learning_rate": 1.5546769228115866e-05, "loss": 0.17513363361358641, "step": 196870 }, { "epoch": 0.8452469883138851, "grad_norm": 0.8537135124206543, "learning_rate": 1.5542457507998244e-05, "loss": 0.10908613204956055, "step": 196880 }, { "epoch": 0.845289920403905, "grad_norm": 0.018275853246450424, "learning_rate": 1.5538145787880617e-05, "loss": 0.19336570501327516, "step": 196890 }, { "epoch": 0.8453328524939251, "grad_norm": 0.08329416811466217, "learning_rate": 1.5533834067762995e-05, "loss": 0.14187638759613036, "step": 196900 }, { "epoch": 0.8453757845839451, "grad_norm": 0.01114792563021183, "learning_rate": 1.552952234764537e-05, "loss": 0.10525352954864502, "step": 196910 }, { "epoch": 0.8454187166739652, "grad_norm": 0.3175894320011139, "learning_rate": 1.5525210627527746e-05, "loss": 0.05137323141098023, "step": 196920 }, { "epoch": 0.8454616487639851, "grad_norm": 0.008936109021306038, "learning_rate": 1.5520898907410123e-05, "loss": 0.08222769498825074, "step": 196930 }, { "epoch": 0.8455045808540051, "grad_norm": 7.562148094177246, "learning_rate": 1.55165871872925e-05, "loss": 0.30330629348754884, "step": 196940 }, { "epoch": 0.8455475129440252, "grad_norm": 0.37392285466194153, "learning_rate": 1.5512275467174875e-05, "loss": 0.12386249303817749, "step": 196950 }, { "epoch": 0.8455904450340451, "grad_norm": 7.58154296875, "learning_rate": 1.5507963747057252e-05, "loss": 0.15494595766067504, "step": 196960 }, { "epoch": 0.8456333771240652, "grad_norm": 3.248518466949463, "learning_rate": 1.5503652026939626e-05, "loss": 0.39366850852966306, "step": 196970 }, { "epoch": 0.8456763092140852, "grad_norm": 4.586677074432373, "learning_rate": 1.5499340306822003e-05, "loss": 0.1424519896507263, "step": 196980 }, { "epoch": 0.8457192413041051, "grad_norm": 3.7495882511138916, "learning_rate": 1.549502858670438e-05, "loss": 0.027593034505844116, "step": 196990 }, { "epoch": 0.8457621733941252, "grad_norm": 0.5563340783119202, "learning_rate": 1.5490716866586758e-05, "loss": 0.19862442016601561, "step": 197000 }, { "epoch": 0.8457621733941252, "eval_loss": 0.3774981200695038, "eval_runtime": 27.4864, "eval_samples_per_second": 3.638, "eval_steps_per_second": 3.638, "step": 197000 }, { "epoch": 0.8458051054841452, "grad_norm": 42.356590270996094, "learning_rate": 1.5486405146469132e-05, "loss": 0.38418533802032473, "step": 197010 }, { "epoch": 0.8458480375741652, "grad_norm": 0.005917608272284269, "learning_rate": 1.548209342635151e-05, "loss": 0.12943546772003173, "step": 197020 }, { "epoch": 0.8458909696641852, "grad_norm": 0.477541446685791, "learning_rate": 1.5477781706233883e-05, "loss": 0.1763970136642456, "step": 197030 }, { "epoch": 0.8459339017542052, "grad_norm": 1.4189867973327637, "learning_rate": 1.5473469986116264e-05, "loss": 0.27072913646698, "step": 197040 }, { "epoch": 0.8459768338442252, "grad_norm": 1.8773818016052246, "learning_rate": 1.5469158265998638e-05, "loss": 0.44829769134521485, "step": 197050 }, { "epoch": 0.8460197659342452, "grad_norm": 0.03911508619785309, "learning_rate": 1.5464846545881015e-05, "loss": 0.12318435907363892, "step": 197060 }, { "epoch": 0.8460626980242653, "grad_norm": 0.0011162409791722894, "learning_rate": 1.546053482576339e-05, "loss": 0.19681681394577027, "step": 197070 }, { "epoch": 0.8461056301142852, "grad_norm": 0.033100008964538574, "learning_rate": 1.5456223105645767e-05, "loss": 0.15288090705871582, "step": 197080 }, { "epoch": 0.8461485622043052, "grad_norm": 0.04976103827357292, "learning_rate": 1.545191138552814e-05, "loss": 0.14306716918945311, "step": 197090 }, { "epoch": 0.8461914942943253, "grad_norm": 1.2336324453353882, "learning_rate": 1.544759966541052e-05, "loss": 0.13660180568695068, "step": 197100 }, { "epoch": 0.8462344263843452, "grad_norm": 0.0031069640535861254, "learning_rate": 1.5443287945292895e-05, "loss": 0.1620272159576416, "step": 197110 }, { "epoch": 0.8462773584743652, "grad_norm": 0.04594505578279495, "learning_rate": 1.5438976225175273e-05, "loss": 0.0735178291797638, "step": 197120 }, { "epoch": 0.8463202905643853, "grad_norm": 3.394017219543457, "learning_rate": 1.5434664505057647e-05, "loss": 0.20956742763519287, "step": 197130 }, { "epoch": 0.8463632226544052, "grad_norm": 4.642988204956055, "learning_rate": 1.5430352784940024e-05, "loss": 0.24042127132415772, "step": 197140 }, { "epoch": 0.8464061547444253, "grad_norm": 1.316240668296814, "learning_rate": 1.54260410648224e-05, "loss": 0.13471782207489014, "step": 197150 }, { "epoch": 0.8464490868344453, "grad_norm": 1.475600004196167, "learning_rate": 1.542172934470478e-05, "loss": 0.2492267370223999, "step": 197160 }, { "epoch": 0.8464920189244652, "grad_norm": 0.02170061506330967, "learning_rate": 1.5417417624587156e-05, "loss": 0.2634519100189209, "step": 197170 }, { "epoch": 0.8465349510144853, "grad_norm": 5.417356491088867, "learning_rate": 1.541310590446953e-05, "loss": 0.12121155261993408, "step": 197180 }, { "epoch": 0.8465778831045053, "grad_norm": 0.09285827726125717, "learning_rate": 1.5408794184351907e-05, "loss": 0.12122727632522583, "step": 197190 }, { "epoch": 0.8466208151945253, "grad_norm": 0.007813221774995327, "learning_rate": 1.540448246423428e-05, "loss": 0.098856121301651, "step": 197200 }, { "epoch": 0.8466637472845453, "grad_norm": 0.2953546643257141, "learning_rate": 1.540017074411666e-05, "loss": 0.10793371200561523, "step": 197210 }, { "epoch": 0.8467066793745653, "grad_norm": 1.193633794784546, "learning_rate": 1.5395859023999036e-05, "loss": 0.30551395416259763, "step": 197220 }, { "epoch": 0.8467496114645853, "grad_norm": 0.010783948004245758, "learning_rate": 1.5391547303881413e-05, "loss": 0.23938562870025634, "step": 197230 }, { "epoch": 0.8467925435546053, "grad_norm": 0.0030303297098726034, "learning_rate": 1.5387235583763787e-05, "loss": 0.17464864253997803, "step": 197240 }, { "epoch": 0.8468354756446254, "grad_norm": 2.3788888454437256, "learning_rate": 1.5382923863646165e-05, "loss": 0.23440778255462646, "step": 197250 }, { "epoch": 0.8468784077346453, "grad_norm": 0.002660320606082678, "learning_rate": 1.537861214352854e-05, "loss": 0.23806724548339844, "step": 197260 }, { "epoch": 0.8469213398246653, "grad_norm": 0.014182024635374546, "learning_rate": 1.5374300423410916e-05, "loss": 0.10502651929855347, "step": 197270 }, { "epoch": 0.8469642719146854, "grad_norm": 0.03121148981153965, "learning_rate": 1.5369988703293293e-05, "loss": 0.2541842222213745, "step": 197280 }, { "epoch": 0.8470072040047053, "grad_norm": 1.580581545829773, "learning_rate": 1.536567698317567e-05, "loss": 0.22803409099578859, "step": 197290 }, { "epoch": 0.8470501360947253, "grad_norm": 0.006081053521484137, "learning_rate": 1.5361365263058045e-05, "loss": 0.1805238366127014, "step": 197300 }, { "epoch": 0.8470930681847454, "grad_norm": 0.1482483446598053, "learning_rate": 1.5357053542940422e-05, "loss": 0.24855918884277345, "step": 197310 }, { "epoch": 0.8471360002747653, "grad_norm": 0.010404618456959724, "learning_rate": 1.5352741822822796e-05, "loss": 0.1581823706626892, "step": 197320 }, { "epoch": 0.8471789323647854, "grad_norm": 0.5020700097084045, "learning_rate": 1.5348430102705173e-05, "loss": 0.2955003023147583, "step": 197330 }, { "epoch": 0.8472218644548054, "grad_norm": 0.04875797778367996, "learning_rate": 1.534411838258755e-05, "loss": 0.17217621803283692, "step": 197340 }, { "epoch": 0.8472647965448254, "grad_norm": 0.12947072088718414, "learning_rate": 1.5339806662469928e-05, "loss": 0.23299505710601806, "step": 197350 }, { "epoch": 0.8473077286348454, "grad_norm": 0.00724436342716217, "learning_rate": 1.5335494942352302e-05, "loss": 0.16534049510955812, "step": 197360 }, { "epoch": 0.8473506607248654, "grad_norm": 0.017508773133158684, "learning_rate": 1.533118322223468e-05, "loss": 0.15059289932250977, "step": 197370 }, { "epoch": 0.8473935928148855, "grad_norm": 0.17189420759677887, "learning_rate": 1.5326871502117053e-05, "loss": 0.2993730306625366, "step": 197380 }, { "epoch": 0.8474365249049054, "grad_norm": 0.10093765705823898, "learning_rate": 1.532255978199943e-05, "loss": 0.15821362733840943, "step": 197390 }, { "epoch": 0.8474794569949254, "grad_norm": 0.009381674230098724, "learning_rate": 1.5318248061881808e-05, "loss": 0.11810331344604492, "step": 197400 }, { "epoch": 0.8475223890849455, "grad_norm": 2.480515956878662, "learning_rate": 1.5313936341764185e-05, "loss": 0.15523053407669068, "step": 197410 }, { "epoch": 0.8475653211749654, "grad_norm": 3.614999532699585, "learning_rate": 1.530962462164656e-05, "loss": 0.15576525926589965, "step": 197420 }, { "epoch": 0.8476082532649855, "grad_norm": 65.64250183105469, "learning_rate": 1.5305312901528936e-05, "loss": 0.19414908885955812, "step": 197430 }, { "epoch": 0.8476511853550055, "grad_norm": 0.489206999540329, "learning_rate": 1.530100118141131e-05, "loss": 0.32179012298583987, "step": 197440 }, { "epoch": 0.8476941174450254, "grad_norm": 0.11385396867990494, "learning_rate": 1.529668946129369e-05, "loss": 0.20681419372558593, "step": 197450 }, { "epoch": 0.8477370495350455, "grad_norm": 0.11933526396751404, "learning_rate": 1.5292377741176065e-05, "loss": 0.14109406471252442, "step": 197460 }, { "epoch": 0.8477799816250655, "grad_norm": 0.002649921691045165, "learning_rate": 1.5288066021058442e-05, "loss": 0.17448320388793945, "step": 197470 }, { "epoch": 0.8478229137150854, "grad_norm": 2.262589931488037, "learning_rate": 1.5283754300940816e-05, "loss": 0.07583979964256286, "step": 197480 }, { "epoch": 0.8478658458051055, "grad_norm": 0.14377081394195557, "learning_rate": 1.5279442580823194e-05, "loss": 0.3324470043182373, "step": 197490 }, { "epoch": 0.8479087778951255, "grad_norm": 0.023049483075737953, "learning_rate": 1.5275130860705568e-05, "loss": 0.09699010848999023, "step": 197500 }, { "epoch": 0.8479517099851455, "grad_norm": 0.047775398939847946, "learning_rate": 1.527081914058795e-05, "loss": 0.13095303773880004, "step": 197510 }, { "epoch": 0.8479946420751655, "grad_norm": 1.631107211112976, "learning_rate": 1.5266507420470322e-05, "loss": 0.29105587005615235, "step": 197520 }, { "epoch": 0.8480375741651855, "grad_norm": 0.03031880035996437, "learning_rate": 1.52621957003527e-05, "loss": 0.2267350435256958, "step": 197530 }, { "epoch": 0.8480805062552055, "grad_norm": 0.4916745722293854, "learning_rate": 1.5257883980235077e-05, "loss": 0.14272670745849608, "step": 197540 }, { "epoch": 0.8481234383452255, "grad_norm": 0.009396664798259735, "learning_rate": 1.5253572260117453e-05, "loss": 0.03404759168624878, "step": 197550 }, { "epoch": 0.8481663704352456, "grad_norm": 6.043698310852051, "learning_rate": 1.524926053999983e-05, "loss": 0.2500048875808716, "step": 197560 }, { "epoch": 0.8482093025252655, "grad_norm": 0.02857845462858677, "learning_rate": 1.5244948819882204e-05, "loss": 0.08116910457611085, "step": 197570 }, { "epoch": 0.8482522346152855, "grad_norm": 4.6327128410339355, "learning_rate": 1.5240637099764581e-05, "loss": 0.3209220886230469, "step": 197580 }, { "epoch": 0.8482951667053056, "grad_norm": 0.36840423941612244, "learning_rate": 1.5236325379646957e-05, "loss": 0.18857355117797853, "step": 197590 }, { "epoch": 0.8483380987953255, "grad_norm": 0.011004694737493992, "learning_rate": 1.5232013659529334e-05, "loss": 0.0036482542753219606, "step": 197600 }, { "epoch": 0.8483810308853456, "grad_norm": 0.09082172065973282, "learning_rate": 1.522770193941171e-05, "loss": 0.2099597215652466, "step": 197610 }, { "epoch": 0.8484239629753656, "grad_norm": 1.6527332067489624, "learning_rate": 1.5223390219294087e-05, "loss": 0.40575323104858396, "step": 197620 }, { "epoch": 0.8484668950653855, "grad_norm": 3.2181715965270996, "learning_rate": 1.5219078499176461e-05, "loss": 0.11115305423736573, "step": 197630 }, { "epoch": 0.8485098271554056, "grad_norm": 0.1894989311695099, "learning_rate": 1.5214766779058839e-05, "loss": 0.2039348840713501, "step": 197640 }, { "epoch": 0.8485527592454256, "grad_norm": 1.8448779582977295, "learning_rate": 1.5210455058941214e-05, "loss": 0.2965463399887085, "step": 197650 }, { "epoch": 0.8485956913354455, "grad_norm": 0.028072591871023178, "learning_rate": 1.5206143338823592e-05, "loss": 0.144870388507843, "step": 197660 }, { "epoch": 0.8486386234254656, "grad_norm": 0.006217554677277803, "learning_rate": 1.5201831618705967e-05, "loss": 0.03407878577709198, "step": 197670 }, { "epoch": 0.8486815555154856, "grad_norm": 0.029794985428452492, "learning_rate": 1.5197519898588345e-05, "loss": 0.1508237600326538, "step": 197680 }, { "epoch": 0.8487244876055056, "grad_norm": 1.7843852043151855, "learning_rate": 1.5193208178470719e-05, "loss": 0.20659241676330567, "step": 197690 }, { "epoch": 0.8487674196955256, "grad_norm": 0.2533365488052368, "learning_rate": 1.5188896458353098e-05, "loss": 0.22586512565612793, "step": 197700 }, { "epoch": 0.8488103517855456, "grad_norm": 0.00228360784240067, "learning_rate": 1.5184584738235472e-05, "loss": 0.3032339572906494, "step": 197710 }, { "epoch": 0.8488532838755656, "grad_norm": 0.0023056359495967627, "learning_rate": 1.5180273018117849e-05, "loss": 0.15963205099105834, "step": 197720 }, { "epoch": 0.8488962159655856, "grad_norm": 26.590858459472656, "learning_rate": 1.5175961298000225e-05, "loss": 0.18662004470825194, "step": 197730 }, { "epoch": 0.8489391480556057, "grad_norm": 0.003507897723466158, "learning_rate": 1.5171649577882602e-05, "loss": 0.13641457557678222, "step": 197740 }, { "epoch": 0.8489820801456256, "grad_norm": 0.008930052630603313, "learning_rate": 1.5167337857764976e-05, "loss": 0.23413097858428955, "step": 197750 }, { "epoch": 0.8490250122356456, "grad_norm": 0.057356588542461395, "learning_rate": 1.5163026137647355e-05, "loss": 0.04057014882564545, "step": 197760 }, { "epoch": 0.8490679443256657, "grad_norm": 1.1264686584472656, "learning_rate": 1.5158714417529729e-05, "loss": 0.11325465440750122, "step": 197770 }, { "epoch": 0.8491108764156857, "grad_norm": 0.25341594219207764, "learning_rate": 1.5154402697412106e-05, "loss": 0.09531864523887634, "step": 197780 }, { "epoch": 0.8491538085057057, "grad_norm": 0.05748600512742996, "learning_rate": 1.5150090977294482e-05, "loss": 0.17363308668136596, "step": 197790 }, { "epoch": 0.8491967405957257, "grad_norm": 0.07798365503549576, "learning_rate": 1.5145779257176859e-05, "loss": 0.1146618366241455, "step": 197800 }, { "epoch": 0.8492396726857457, "grad_norm": 0.015463123098015785, "learning_rate": 1.5141467537059235e-05, "loss": 0.0030902113765478136, "step": 197810 }, { "epoch": 0.8492826047757657, "grad_norm": 0.07422202825546265, "learning_rate": 1.5137155816941612e-05, "loss": 0.23043811321258545, "step": 197820 }, { "epoch": 0.8493255368657857, "grad_norm": 0.3136723041534424, "learning_rate": 1.5132844096823986e-05, "loss": 0.10654685497283936, "step": 197830 }, { "epoch": 0.8493684689558058, "grad_norm": 0.011924277059733868, "learning_rate": 1.5128532376706363e-05, "loss": 0.32079641819000243, "step": 197840 }, { "epoch": 0.8494114010458257, "grad_norm": 0.002160630887374282, "learning_rate": 1.5124220656588739e-05, "loss": 0.09690849184989929, "step": 197850 }, { "epoch": 0.8494543331358457, "grad_norm": 4.282308101654053, "learning_rate": 1.5119908936471116e-05, "loss": 0.3771207809448242, "step": 197860 }, { "epoch": 0.8494972652258658, "grad_norm": 0.034167733043432236, "learning_rate": 1.5115597216353492e-05, "loss": 0.05973265767097473, "step": 197870 }, { "epoch": 0.8495401973158857, "grad_norm": 2.4957375526428223, "learning_rate": 1.511128549623587e-05, "loss": 0.18202605247497558, "step": 197880 }, { "epoch": 0.8495831294059057, "grad_norm": 0.14000332355499268, "learning_rate": 1.5106973776118247e-05, "loss": 0.16483761072158815, "step": 197890 }, { "epoch": 0.8496260614959258, "grad_norm": 6.224600791931152, "learning_rate": 1.5102662056000622e-05, "loss": 0.4294785499572754, "step": 197900 }, { "epoch": 0.8496689935859457, "grad_norm": 0.027004102244973183, "learning_rate": 1.5098350335883e-05, "loss": 0.0069320306181907656, "step": 197910 }, { "epoch": 0.8497119256759658, "grad_norm": 0.08216096460819244, "learning_rate": 1.5094038615765374e-05, "loss": 0.0045673668384552, "step": 197920 }, { "epoch": 0.8497548577659858, "grad_norm": 0.07297103106975555, "learning_rate": 1.5089726895647751e-05, "loss": 0.26889212131500245, "step": 197930 }, { "epoch": 0.8497977898560057, "grad_norm": 0.006628489587455988, "learning_rate": 1.5085415175530127e-05, "loss": 0.2143918514251709, "step": 197940 }, { "epoch": 0.8498407219460258, "grad_norm": 0.0073399050161242485, "learning_rate": 1.5081103455412504e-05, "loss": 0.1867246985435486, "step": 197950 }, { "epoch": 0.8498836540360458, "grad_norm": 1.6105217933654785, "learning_rate": 1.507679173529488e-05, "loss": 0.10490700006484985, "step": 197960 }, { "epoch": 0.8499265861260658, "grad_norm": 1.9887235164642334, "learning_rate": 1.5072480015177257e-05, "loss": 0.30768733024597167, "step": 197970 }, { "epoch": 0.8499695182160858, "grad_norm": 1.3068478107452393, "learning_rate": 1.5068168295059631e-05, "loss": 0.2877668857574463, "step": 197980 }, { "epoch": 0.8500124503061058, "grad_norm": 0.037830330431461334, "learning_rate": 1.5063856574942008e-05, "loss": 0.40204415321350095, "step": 197990 }, { "epoch": 0.8500553823961258, "grad_norm": 0.6198349595069885, "learning_rate": 1.5059544854824384e-05, "loss": 0.21968297958374022, "step": 198000 }, { "epoch": 0.8500553823961258, "eval_loss": 0.37510281801223755, "eval_runtime": 27.474, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 198000 }, { "epoch": 0.8500983144861458, "grad_norm": 1.1435520648956299, "learning_rate": 1.5055233134706761e-05, "loss": 0.0789073646068573, "step": 198010 }, { "epoch": 0.8501412465761659, "grad_norm": 0.03565061092376709, "learning_rate": 1.5050921414589137e-05, "loss": 0.3191929578781128, "step": 198020 }, { "epoch": 0.8501841786661858, "grad_norm": 7.397432804107666, "learning_rate": 1.5046609694471514e-05, "loss": 0.2238842487335205, "step": 198030 }, { "epoch": 0.8502271107562058, "grad_norm": 0.06873110681772232, "learning_rate": 1.5042297974353888e-05, "loss": 0.21624131202697755, "step": 198040 }, { "epoch": 0.8502700428462259, "grad_norm": 2.253124952316284, "learning_rate": 1.5037986254236267e-05, "loss": 0.28505237102508546, "step": 198050 }, { "epoch": 0.8503129749362458, "grad_norm": 4.617161273956299, "learning_rate": 1.5033674534118641e-05, "loss": 0.041191473603248596, "step": 198060 }, { "epoch": 0.8503559070262658, "grad_norm": 0.4110824167728424, "learning_rate": 1.5029362814001019e-05, "loss": 0.1780386209487915, "step": 198070 }, { "epoch": 0.8503988391162859, "grad_norm": 0.0656173974275589, "learning_rate": 1.5025051093883394e-05, "loss": 0.01941981315612793, "step": 198080 }, { "epoch": 0.8504417712063058, "grad_norm": 0.017897628247737885, "learning_rate": 1.5020739373765772e-05, "loss": 0.27413904666900635, "step": 198090 }, { "epoch": 0.8504847032963259, "grad_norm": 0.0807955265045166, "learning_rate": 1.5016427653648146e-05, "loss": 0.46311440467834475, "step": 198100 }, { "epoch": 0.8505276353863459, "grad_norm": 0.0031225387938320637, "learning_rate": 1.5012115933530525e-05, "loss": 0.28832767009735105, "step": 198110 }, { "epoch": 0.8505705674763658, "grad_norm": 0.026935061439871788, "learning_rate": 1.5007804213412899e-05, "loss": 0.025250527262687682, "step": 198120 }, { "epoch": 0.8506134995663859, "grad_norm": 0.037563394755125046, "learning_rate": 1.5003492493295276e-05, "loss": 0.3608541965484619, "step": 198130 }, { "epoch": 0.8506564316564059, "grad_norm": 0.007384961470961571, "learning_rate": 1.4999180773177652e-05, "loss": 0.16276904344558715, "step": 198140 }, { "epoch": 0.8506993637464259, "grad_norm": 1.0036953687667847, "learning_rate": 1.4994869053060029e-05, "loss": 0.1697608470916748, "step": 198150 }, { "epoch": 0.8507422958364459, "grad_norm": 4.348733425140381, "learning_rate": 1.4990557332942405e-05, "loss": 0.33487553596496583, "step": 198160 }, { "epoch": 0.8507852279264659, "grad_norm": 0.004956691060215235, "learning_rate": 1.4986245612824782e-05, "loss": 0.12451618909835815, "step": 198170 }, { "epoch": 0.8508281600164859, "grad_norm": 2.82716703414917, "learning_rate": 1.4981933892707156e-05, "loss": 0.16169651746749877, "step": 198180 }, { "epoch": 0.8508710921065059, "grad_norm": 0.003215742064639926, "learning_rate": 1.4977622172589533e-05, "loss": 0.22183990478515625, "step": 198190 }, { "epoch": 0.850914024196526, "grad_norm": 0.011257747188210487, "learning_rate": 1.4973310452471909e-05, "loss": 0.14878798723220826, "step": 198200 }, { "epoch": 0.850956956286546, "grad_norm": 0.20254521071910858, "learning_rate": 1.4968998732354286e-05, "loss": 0.0998440444469452, "step": 198210 }, { "epoch": 0.8509998883765659, "grad_norm": 6.641183853149414, "learning_rate": 1.4964687012236662e-05, "loss": 0.3686415910720825, "step": 198220 }, { "epoch": 0.851042820466586, "grad_norm": 3.061818838119507, "learning_rate": 1.4960375292119039e-05, "loss": 0.2695192575454712, "step": 198230 }, { "epoch": 0.851085752556606, "grad_norm": 0.0106343450024724, "learning_rate": 1.4956063572001413e-05, "loss": 0.10752819776535034, "step": 198240 }, { "epoch": 0.8511286846466259, "grad_norm": 1.9127508401870728, "learning_rate": 1.495175185188379e-05, "loss": 0.13210554122924806, "step": 198250 }, { "epoch": 0.851171616736646, "grad_norm": 4.323821544647217, "learning_rate": 1.494744013176617e-05, "loss": 0.21595339775085448, "step": 198260 }, { "epoch": 0.851214548826666, "grad_norm": 0.07402490079402924, "learning_rate": 1.4943128411648543e-05, "loss": 0.14881352186203003, "step": 198270 }, { "epoch": 0.851257480916686, "grad_norm": 0.0036409071180969477, "learning_rate": 1.493881669153092e-05, "loss": 0.1895911693572998, "step": 198280 }, { "epoch": 0.851300413006706, "grad_norm": 1.7480568885803223, "learning_rate": 1.4934504971413296e-05, "loss": 0.10597457885742187, "step": 198290 }, { "epoch": 0.851343345096726, "grad_norm": 0.02202913723886013, "learning_rate": 1.4930193251295674e-05, "loss": 0.2131192207336426, "step": 198300 }, { "epoch": 0.851386277186746, "grad_norm": 0.06853848695755005, "learning_rate": 1.492588153117805e-05, "loss": 0.12416889667510986, "step": 198310 }, { "epoch": 0.851429209276766, "grad_norm": 0.20226770639419556, "learning_rate": 1.4921569811060427e-05, "loss": 0.18529151678085326, "step": 198320 }, { "epoch": 0.8514721413667861, "grad_norm": 0.012098453938961029, "learning_rate": 1.49172580909428e-05, "loss": 0.18486225605010986, "step": 198330 }, { "epoch": 0.851515073456806, "grad_norm": 0.000547365692909807, "learning_rate": 1.4912946370825178e-05, "loss": 0.2261587142944336, "step": 198340 }, { "epoch": 0.851558005546826, "grad_norm": 52.2000732421875, "learning_rate": 1.4908634650707554e-05, "loss": 0.17255141735076904, "step": 198350 }, { "epoch": 0.8516009376368461, "grad_norm": 0.004955257289111614, "learning_rate": 1.4904322930589931e-05, "loss": 0.14709478616714478, "step": 198360 }, { "epoch": 0.851643869726866, "grad_norm": 0.029430586844682693, "learning_rate": 1.4900011210472307e-05, "loss": 0.019669802486896516, "step": 198370 }, { "epoch": 0.851686801816886, "grad_norm": 0.7717126607894897, "learning_rate": 1.4895699490354684e-05, "loss": 0.2101571798324585, "step": 198380 }, { "epoch": 0.8517297339069061, "grad_norm": 0.08160284161567688, "learning_rate": 1.4891387770237058e-05, "loss": 0.01767835021018982, "step": 198390 }, { "epoch": 0.851772665996926, "grad_norm": 3.979954481124878, "learning_rate": 1.4887076050119435e-05, "loss": 0.1902117371559143, "step": 198400 }, { "epoch": 0.8518155980869461, "grad_norm": 0.8650118708610535, "learning_rate": 1.4882764330001811e-05, "loss": 0.1752025842666626, "step": 198410 }, { "epoch": 0.8518585301769661, "grad_norm": 0.02981482818722725, "learning_rate": 1.4878452609884188e-05, "loss": 0.17029629945755004, "step": 198420 }, { "epoch": 0.851901462266986, "grad_norm": 2.2017018795013428, "learning_rate": 1.4874140889766564e-05, "loss": 0.14863049983978271, "step": 198430 }, { "epoch": 0.8519443943570061, "grad_norm": 0.03577551618218422, "learning_rate": 1.4869829169648941e-05, "loss": 0.09861577153205872, "step": 198440 }, { "epoch": 0.8519873264470261, "grad_norm": 5.863005638122559, "learning_rate": 1.4865517449531315e-05, "loss": 0.32581243515014646, "step": 198450 }, { "epoch": 0.8520302585370461, "grad_norm": 0.10977106541395187, "learning_rate": 1.4861205729413694e-05, "loss": 0.2411884069442749, "step": 198460 }, { "epoch": 0.8520731906270661, "grad_norm": 0.026395481079816818, "learning_rate": 1.4856894009296068e-05, "loss": 0.11193904876708985, "step": 198470 }, { "epoch": 0.8521161227170861, "grad_norm": 0.003131694160401821, "learning_rate": 1.4852582289178446e-05, "loss": 0.09634979963302612, "step": 198480 }, { "epoch": 0.8521590548071061, "grad_norm": 0.011490268632769585, "learning_rate": 1.4848270569060821e-05, "loss": 0.12840532064437865, "step": 198490 }, { "epoch": 0.8522019868971261, "grad_norm": 0.004589975345879793, "learning_rate": 1.4843958848943199e-05, "loss": 0.2682207107543945, "step": 198500 }, { "epoch": 0.8522449189871462, "grad_norm": 0.00972757488489151, "learning_rate": 1.4839647128825573e-05, "loss": 0.24504990577697755, "step": 198510 }, { "epoch": 0.8522878510771661, "grad_norm": 0.8272261023521423, "learning_rate": 1.4835335408707952e-05, "loss": 0.10439317226409912, "step": 198520 }, { "epoch": 0.8523307831671861, "grad_norm": 0.005756685975939035, "learning_rate": 1.4831023688590326e-05, "loss": 0.1679734468460083, "step": 198530 }, { "epoch": 0.8523737152572062, "grad_norm": 0.1370045691728592, "learning_rate": 1.4826711968472703e-05, "loss": 0.06637945771217346, "step": 198540 }, { "epoch": 0.8524166473472261, "grad_norm": 1.1877647638320923, "learning_rate": 1.4822400248355079e-05, "loss": 0.23604612350463866, "step": 198550 }, { "epoch": 0.8524595794372462, "grad_norm": 0.03956817835569382, "learning_rate": 1.4818088528237456e-05, "loss": 0.2815361976623535, "step": 198560 }, { "epoch": 0.8525025115272662, "grad_norm": 0.0008984607411548495, "learning_rate": 1.4813776808119832e-05, "loss": 0.13040565252304076, "step": 198570 }, { "epoch": 0.8525454436172861, "grad_norm": 0.013303420506417751, "learning_rate": 1.4809465088002209e-05, "loss": 0.07282218933105469, "step": 198580 }, { "epoch": 0.8525883757073062, "grad_norm": 0.01912931352853775, "learning_rate": 1.4805153367884583e-05, "loss": 0.1441921591758728, "step": 198590 }, { "epoch": 0.8526313077973262, "grad_norm": 0.0023365512024611235, "learning_rate": 1.480084164776696e-05, "loss": 0.038869994878768924, "step": 198600 }, { "epoch": 0.8526742398873461, "grad_norm": 0.0236685611307621, "learning_rate": 1.4796529927649336e-05, "loss": 0.14604955911636353, "step": 198610 }, { "epoch": 0.8527171719773662, "grad_norm": 0.8849748373031616, "learning_rate": 1.4792218207531713e-05, "loss": 0.24399905204772948, "step": 198620 }, { "epoch": 0.8527601040673862, "grad_norm": 0.007544425316154957, "learning_rate": 1.478790648741409e-05, "loss": 0.1654476284980774, "step": 198630 }, { "epoch": 0.8528030361574063, "grad_norm": 0.0008877692162059247, "learning_rate": 1.4783594767296466e-05, "loss": 0.2518858671188354, "step": 198640 }, { "epoch": 0.8528459682474262, "grad_norm": 5.671640396118164, "learning_rate": 1.4779283047178844e-05, "loss": 0.06790667772293091, "step": 198650 }, { "epoch": 0.8528889003374462, "grad_norm": 0.04101986810564995, "learning_rate": 1.477497132706122e-05, "loss": 0.274362587928772, "step": 198660 }, { "epoch": 0.8529318324274663, "grad_norm": 0.4285285174846649, "learning_rate": 1.4770659606943596e-05, "loss": 0.3526389837265015, "step": 198670 }, { "epoch": 0.8529747645174862, "grad_norm": 4.997021198272705, "learning_rate": 1.476634788682597e-05, "loss": 0.2588140249252319, "step": 198680 }, { "epoch": 0.8530176966075063, "grad_norm": 0.00464608846232295, "learning_rate": 1.4762036166708348e-05, "loss": 0.2015136480331421, "step": 198690 }, { "epoch": 0.8530606286975263, "grad_norm": 3.5841684341430664, "learning_rate": 1.4757724446590723e-05, "loss": 0.2638310432434082, "step": 198700 }, { "epoch": 0.8531035607875462, "grad_norm": 0.02887623757123947, "learning_rate": 1.47534127264731e-05, "loss": 0.19619773626327514, "step": 198710 }, { "epoch": 0.8531464928775663, "grad_norm": 0.12220575660467148, "learning_rate": 1.4749101006355476e-05, "loss": 0.05668666362762451, "step": 198720 }, { "epoch": 0.8531894249675863, "grad_norm": 0.0056074149906635284, "learning_rate": 1.4744789286237854e-05, "loss": 0.3229382038116455, "step": 198730 }, { "epoch": 0.8532323570576062, "grad_norm": 0.0034133633598685265, "learning_rate": 1.4740477566120228e-05, "loss": 0.11227246522903442, "step": 198740 }, { "epoch": 0.8532752891476263, "grad_norm": 0.008565911091864109, "learning_rate": 1.4736165846002605e-05, "loss": 0.30050177574157716, "step": 198750 }, { "epoch": 0.8533182212376463, "grad_norm": 0.010548289865255356, "learning_rate": 1.473185412588498e-05, "loss": 0.15760529041290283, "step": 198760 }, { "epoch": 0.8533611533276663, "grad_norm": 1.4700676202774048, "learning_rate": 1.4727542405767358e-05, "loss": 0.2501633644104004, "step": 198770 }, { "epoch": 0.8534040854176863, "grad_norm": 1.133919358253479, "learning_rate": 1.4723230685649734e-05, "loss": 0.09911720156669616, "step": 198780 }, { "epoch": 0.8534470175077064, "grad_norm": 0.002514589112251997, "learning_rate": 1.4718918965532111e-05, "loss": 0.11620491743087769, "step": 198790 }, { "epoch": 0.8534899495977263, "grad_norm": 0.2662227749824524, "learning_rate": 1.4714607245414485e-05, "loss": 0.24606781005859374, "step": 198800 }, { "epoch": 0.8535328816877463, "grad_norm": 0.05281221121549606, "learning_rate": 1.4710295525296864e-05, "loss": 0.1803385615348816, "step": 198810 }, { "epoch": 0.8535758137777664, "grad_norm": 0.2812144160270691, "learning_rate": 1.4705983805179238e-05, "loss": 0.06128525733947754, "step": 198820 }, { "epoch": 0.8536187458677863, "grad_norm": 3.4682204723358154, "learning_rate": 1.4701672085061615e-05, "loss": 0.4475576877593994, "step": 198830 }, { "epoch": 0.8536616779578063, "grad_norm": 11.337523460388184, "learning_rate": 1.4697360364943991e-05, "loss": 0.5344314575195312, "step": 198840 }, { "epoch": 0.8537046100478264, "grad_norm": 1.6709545850753784, "learning_rate": 1.4693048644826368e-05, "loss": 0.28226404190063475, "step": 198850 }, { "epoch": 0.8537475421378463, "grad_norm": 0.10255688428878784, "learning_rate": 1.4688736924708742e-05, "loss": 0.2390998363494873, "step": 198860 }, { "epoch": 0.8537904742278664, "grad_norm": 0.021323187276721, "learning_rate": 1.4684425204591121e-05, "loss": 0.24051153659820557, "step": 198870 }, { "epoch": 0.8538334063178864, "grad_norm": 0.011119546368718147, "learning_rate": 1.4680113484473495e-05, "loss": 0.09393274188041686, "step": 198880 }, { "epoch": 0.8538763384079063, "grad_norm": 0.03298373520374298, "learning_rate": 1.4675801764355873e-05, "loss": 0.135442852973938, "step": 198890 }, { "epoch": 0.8539192704979264, "grad_norm": 2.304260730743408, "learning_rate": 1.4671490044238248e-05, "loss": 0.1495063066482544, "step": 198900 }, { "epoch": 0.8539622025879464, "grad_norm": 0.033370859920978546, "learning_rate": 1.4667178324120626e-05, "loss": 0.17575643062591553, "step": 198910 }, { "epoch": 0.8540051346779663, "grad_norm": 0.0040658460929989815, "learning_rate": 1.4662866604003001e-05, "loss": 0.21572141647338866, "step": 198920 }, { "epoch": 0.8540480667679864, "grad_norm": 1.2997663021087646, "learning_rate": 1.4658554883885379e-05, "loss": 0.12388513088226319, "step": 198930 }, { "epoch": 0.8540909988580064, "grad_norm": 0.35280415415763855, "learning_rate": 1.4654243163767753e-05, "loss": 0.11193270683288574, "step": 198940 }, { "epoch": 0.8541339309480264, "grad_norm": 1.490394949913025, "learning_rate": 1.464993144365013e-05, "loss": 0.309222149848938, "step": 198950 }, { "epoch": 0.8541768630380464, "grad_norm": 0.7588453888893127, "learning_rate": 1.4645619723532506e-05, "loss": 0.28592557907104493, "step": 198960 }, { "epoch": 0.8542197951280665, "grad_norm": 0.002865720773115754, "learning_rate": 1.4641308003414883e-05, "loss": 0.11334433555603027, "step": 198970 }, { "epoch": 0.8542627272180864, "grad_norm": 17.505897521972656, "learning_rate": 1.463699628329726e-05, "loss": 0.2242356061935425, "step": 198980 }, { "epoch": 0.8543056593081064, "grad_norm": 0.6268243193626404, "learning_rate": 1.4632684563179636e-05, "loss": 0.3863025665283203, "step": 198990 }, { "epoch": 0.8543485913981265, "grad_norm": 0.0017697836738079786, "learning_rate": 1.4628372843062013e-05, "loss": 0.10508785247802735, "step": 199000 }, { "epoch": 0.8543485913981265, "eval_loss": 0.3737829625606537, "eval_runtime": 27.4522, "eval_samples_per_second": 3.643, "eval_steps_per_second": 3.643, "step": 199000 }, { "epoch": 0.8543915234881464, "grad_norm": 21.080825805664062, "learning_rate": 1.4624061122944387e-05, "loss": 0.2644538640975952, "step": 199010 }, { "epoch": 0.8544344555781664, "grad_norm": 0.2038983702659607, "learning_rate": 1.4619749402826766e-05, "loss": 0.24054296016693116, "step": 199020 }, { "epoch": 0.8544773876681865, "grad_norm": 0.03463335707783699, "learning_rate": 1.461543768270914e-05, "loss": 0.2102268934249878, "step": 199030 }, { "epoch": 0.8545203197582064, "grad_norm": 0.0042945947498083115, "learning_rate": 1.4611125962591518e-05, "loss": 0.09614935517311096, "step": 199040 }, { "epoch": 0.8545632518482265, "grad_norm": 0.13123205304145813, "learning_rate": 1.4606814242473893e-05, "loss": 0.3031400203704834, "step": 199050 }, { "epoch": 0.8546061839382465, "grad_norm": 0.06705895811319351, "learning_rate": 1.460250252235627e-05, "loss": 0.1421942949295044, "step": 199060 }, { "epoch": 0.8546491160282665, "grad_norm": 4.683314323425293, "learning_rate": 1.4598190802238646e-05, "loss": 0.08168305158615112, "step": 199070 }, { "epoch": 0.8546920481182865, "grad_norm": 0.0422859787940979, "learning_rate": 1.4593879082121024e-05, "loss": 0.16412352323532103, "step": 199080 }, { "epoch": 0.8547349802083065, "grad_norm": 0.021981369704008102, "learning_rate": 1.4589567362003397e-05, "loss": 0.20852310657501222, "step": 199090 }, { "epoch": 0.8547779122983266, "grad_norm": 0.1515505313873291, "learning_rate": 1.4585255641885775e-05, "loss": 0.06142481565475464, "step": 199100 }, { "epoch": 0.8548208443883465, "grad_norm": 0.13333864510059357, "learning_rate": 1.458094392176815e-05, "loss": 0.2553966283798218, "step": 199110 }, { "epoch": 0.8548637764783665, "grad_norm": 26.86813735961914, "learning_rate": 1.4576632201650528e-05, "loss": 0.36869144439697266, "step": 199120 }, { "epoch": 0.8549067085683866, "grad_norm": 5.093358516693115, "learning_rate": 1.4572320481532903e-05, "loss": 0.32376537322998045, "step": 199130 }, { "epoch": 0.8549496406584065, "grad_norm": 0.03296341374516487, "learning_rate": 1.456800876141528e-05, "loss": 0.3048763036727905, "step": 199140 }, { "epoch": 0.8549925727484265, "grad_norm": 0.14229708909988403, "learning_rate": 1.4563697041297655e-05, "loss": 0.1860854983329773, "step": 199150 }, { "epoch": 0.8550355048384466, "grad_norm": 0.0028889018576592207, "learning_rate": 1.4559385321180034e-05, "loss": 0.3693838357925415, "step": 199160 }, { "epoch": 0.8550784369284665, "grad_norm": 0.8938426971435547, "learning_rate": 1.4555073601062408e-05, "loss": 0.3041311979293823, "step": 199170 }, { "epoch": 0.8551213690184866, "grad_norm": 0.0019801489543169737, "learning_rate": 1.4550761880944785e-05, "loss": 0.2032299041748047, "step": 199180 }, { "epoch": 0.8551643011085066, "grad_norm": 5.202205181121826, "learning_rate": 1.454645016082716e-05, "loss": 0.09197419881820679, "step": 199190 }, { "epoch": 0.8552072331985265, "grad_norm": 3.7978355884552, "learning_rate": 1.4542138440709538e-05, "loss": 0.38236684799194337, "step": 199200 }, { "epoch": 0.8552501652885466, "grad_norm": 0.003671700833365321, "learning_rate": 1.4537826720591912e-05, "loss": 0.17069878578186035, "step": 199210 }, { "epoch": 0.8552930973785666, "grad_norm": 0.001484514563344419, "learning_rate": 1.4533515000474291e-05, "loss": 0.15370936393737794, "step": 199220 }, { "epoch": 0.8553360294685866, "grad_norm": 3.502985715866089, "learning_rate": 1.4529203280356665e-05, "loss": 0.3548114776611328, "step": 199230 }, { "epoch": 0.8553789615586066, "grad_norm": 0.02675688825547695, "learning_rate": 1.4524891560239042e-05, "loss": 0.45171194076538085, "step": 199240 }, { "epoch": 0.8554218936486266, "grad_norm": 4.345065593719482, "learning_rate": 1.4520579840121418e-05, "loss": 0.3299868583679199, "step": 199250 }, { "epoch": 0.8554648257386466, "grad_norm": 0.011832942254841328, "learning_rate": 1.4516268120003795e-05, "loss": 0.1040421962738037, "step": 199260 }, { "epoch": 0.8555077578286666, "grad_norm": 0.05655861273407936, "learning_rate": 1.4511956399886171e-05, "loss": 0.10234776735305787, "step": 199270 }, { "epoch": 0.8555506899186867, "grad_norm": 0.03904902935028076, "learning_rate": 1.4507644679768548e-05, "loss": 0.20289580821990966, "step": 199280 }, { "epoch": 0.8555936220087066, "grad_norm": 1.6829935312271118, "learning_rate": 1.4503332959650922e-05, "loss": 0.1187442421913147, "step": 199290 }, { "epoch": 0.8556365540987266, "grad_norm": 0.07314160466194153, "learning_rate": 1.44990212395333e-05, "loss": 0.23276002407073976, "step": 199300 }, { "epoch": 0.8556794861887467, "grad_norm": 0.026286179199814796, "learning_rate": 1.4494709519415675e-05, "loss": 0.424951171875, "step": 199310 }, { "epoch": 0.8557224182787666, "grad_norm": 0.01814284734427929, "learning_rate": 1.4490397799298053e-05, "loss": 0.35801832675933837, "step": 199320 }, { "epoch": 0.8557653503687866, "grad_norm": 0.002590248826891184, "learning_rate": 1.4486086079180428e-05, "loss": 0.25980291366577146, "step": 199330 }, { "epoch": 0.8558082824588067, "grad_norm": 0.009577231481671333, "learning_rate": 1.4481774359062806e-05, "loss": 0.09345943331718445, "step": 199340 }, { "epoch": 0.8558512145488266, "grad_norm": 0.005946278106421232, "learning_rate": 1.4477462638945183e-05, "loss": 0.21709318161010743, "step": 199350 }, { "epoch": 0.8558941466388467, "grad_norm": 0.15229612588882446, "learning_rate": 1.4473150918827557e-05, "loss": 0.14111752510070802, "step": 199360 }, { "epoch": 0.8559370787288667, "grad_norm": 0.004969291388988495, "learning_rate": 1.4468839198709936e-05, "loss": 0.14079365730285645, "step": 199370 }, { "epoch": 0.8559800108188866, "grad_norm": 0.028083153069019318, "learning_rate": 1.446452747859231e-05, "loss": 0.11227288246154785, "step": 199380 }, { "epoch": 0.8560229429089067, "grad_norm": 0.001869790954515338, "learning_rate": 1.4460215758474687e-05, "loss": 0.054939448833465576, "step": 199390 }, { "epoch": 0.8560658749989267, "grad_norm": 0.37315037846565247, "learning_rate": 1.4455904038357063e-05, "loss": 0.14655609130859376, "step": 199400 }, { "epoch": 0.8561088070889467, "grad_norm": 3.289761543273926, "learning_rate": 1.445159231823944e-05, "loss": 0.14338996410369872, "step": 199410 }, { "epoch": 0.8561517391789667, "grad_norm": 42.340110778808594, "learning_rate": 1.4447280598121816e-05, "loss": 0.036475923657417295, "step": 199420 }, { "epoch": 0.8561946712689867, "grad_norm": 0.0018733445322141051, "learning_rate": 1.4442968878004193e-05, "loss": 0.039892581105232236, "step": 199430 }, { "epoch": 0.8562376033590067, "grad_norm": 0.045775555074214935, "learning_rate": 1.4438657157886567e-05, "loss": 0.03090730607509613, "step": 199440 }, { "epoch": 0.8562805354490267, "grad_norm": 1.3289633989334106, "learning_rate": 1.4434345437768945e-05, "loss": 0.22715282440185547, "step": 199450 }, { "epoch": 0.8563234675390468, "grad_norm": 0.033413052558898926, "learning_rate": 1.443003371765132e-05, "loss": 0.17703909873962403, "step": 199460 }, { "epoch": 0.8563663996290667, "grad_norm": 0.0026612640358507633, "learning_rate": 1.4425721997533698e-05, "loss": 0.09658971428871155, "step": 199470 }, { "epoch": 0.8564093317190867, "grad_norm": 0.0032949293963611126, "learning_rate": 1.4421410277416073e-05, "loss": 0.08444079160690307, "step": 199480 }, { "epoch": 0.8564522638091068, "grad_norm": 2.955509662628174, "learning_rate": 1.441709855729845e-05, "loss": 0.3362619161605835, "step": 199490 }, { "epoch": 0.8564951958991268, "grad_norm": 1.2076606750488281, "learning_rate": 1.4412786837180824e-05, "loss": 0.15357297658920288, "step": 199500 }, { "epoch": 0.8565381279891467, "grad_norm": 1.189163327217102, "learning_rate": 1.4408475117063202e-05, "loss": 0.20367789268493652, "step": 199510 }, { "epoch": 0.8565810600791668, "grad_norm": 3.673931360244751, "learning_rate": 1.4404163396945577e-05, "loss": 0.3991194486618042, "step": 199520 }, { "epoch": 0.8566239921691868, "grad_norm": 0.24287502467632294, "learning_rate": 1.4399851676827955e-05, "loss": 0.20523061752319335, "step": 199530 }, { "epoch": 0.8566669242592068, "grad_norm": 0.8159131407737732, "learning_rate": 1.439553995671033e-05, "loss": 0.16643912792205812, "step": 199540 }, { "epoch": 0.8567098563492268, "grad_norm": 0.0028872189577668905, "learning_rate": 1.4391228236592708e-05, "loss": 0.22258753776550294, "step": 199550 }, { "epoch": 0.8567527884392468, "grad_norm": 1.2108325958251953, "learning_rate": 1.4386916516475082e-05, "loss": 0.13343702554702758, "step": 199560 }, { "epoch": 0.8567957205292668, "grad_norm": 1.2748582363128662, "learning_rate": 1.438260479635746e-05, "loss": 0.1584118366241455, "step": 199570 }, { "epoch": 0.8568386526192868, "grad_norm": 3.328965663909912, "learning_rate": 1.4378293076239835e-05, "loss": 0.08251501321792602, "step": 199580 }, { "epoch": 0.8568815847093069, "grad_norm": 9.528289794921875, "learning_rate": 1.4373981356122212e-05, "loss": 0.3464500904083252, "step": 199590 }, { "epoch": 0.8569245167993268, "grad_norm": 0.027622880414128304, "learning_rate": 1.4369669636004588e-05, "loss": 0.11451354026794433, "step": 199600 }, { "epoch": 0.8569674488893468, "grad_norm": 0.0027303590904921293, "learning_rate": 1.4365357915886965e-05, "loss": 0.24467782974243163, "step": 199610 }, { "epoch": 0.8570103809793669, "grad_norm": 0.02862054668366909, "learning_rate": 1.4361046195769339e-05, "loss": 0.1920831799507141, "step": 199620 }, { "epoch": 0.8570533130693868, "grad_norm": 0.0019593604374676943, "learning_rate": 1.4356734475651718e-05, "loss": 0.11015578508377075, "step": 199630 }, { "epoch": 0.8570962451594069, "grad_norm": 3.255344867706299, "learning_rate": 1.4352422755534092e-05, "loss": 0.3502398729324341, "step": 199640 }, { "epoch": 0.8571391772494269, "grad_norm": 2.490980625152588, "learning_rate": 1.434811103541647e-05, "loss": 0.4833946704864502, "step": 199650 }, { "epoch": 0.8571821093394468, "grad_norm": 0.00246992614120245, "learning_rate": 1.4343799315298845e-05, "loss": 0.1826173782348633, "step": 199660 }, { "epoch": 0.8572250414294669, "grad_norm": 0.002251984551548958, "learning_rate": 1.4339487595181222e-05, "loss": 0.5154530048370362, "step": 199670 }, { "epoch": 0.8572679735194869, "grad_norm": 5.490510940551758, "learning_rate": 1.4335175875063598e-05, "loss": 0.29611093997955323, "step": 199680 }, { "epoch": 0.8573109056095068, "grad_norm": 2.570019006729126, "learning_rate": 1.4330864154945975e-05, "loss": 0.40025644302368163, "step": 199690 }, { "epoch": 0.8573538376995269, "grad_norm": 0.5635516047477722, "learning_rate": 1.432655243482835e-05, "loss": 0.06518831253051757, "step": 199700 }, { "epoch": 0.8573967697895469, "grad_norm": 2.1924285888671875, "learning_rate": 1.4322240714710727e-05, "loss": 0.15234854221343994, "step": 199710 }, { "epoch": 0.8574397018795669, "grad_norm": 3.579240083694458, "learning_rate": 1.4317928994593106e-05, "loss": 0.17781139612197877, "step": 199720 }, { "epoch": 0.8574826339695869, "grad_norm": 0.04103695973753929, "learning_rate": 1.431361727447548e-05, "loss": 0.41463704109191896, "step": 199730 }, { "epoch": 0.857525566059607, "grad_norm": 1.245309829711914, "learning_rate": 1.4309305554357857e-05, "loss": 0.1456066131591797, "step": 199740 }, { "epoch": 0.8575684981496269, "grad_norm": 0.6712180972099304, "learning_rate": 1.4304993834240233e-05, "loss": 0.40964322090148925, "step": 199750 }, { "epoch": 0.8576114302396469, "grad_norm": 1.6160268783569336, "learning_rate": 1.430068211412261e-05, "loss": 0.27961690425872804, "step": 199760 }, { "epoch": 0.857654362329667, "grad_norm": 0.030963778495788574, "learning_rate": 1.4296370394004986e-05, "loss": 0.19356164932250977, "step": 199770 }, { "epoch": 0.8576972944196869, "grad_norm": 0.06203527748584747, "learning_rate": 1.4292058673887363e-05, "loss": 0.1277173399925232, "step": 199780 }, { "epoch": 0.8577402265097069, "grad_norm": 0.07475373893976212, "learning_rate": 1.4287746953769737e-05, "loss": 0.13933945894241334, "step": 199790 }, { "epoch": 0.857783158599727, "grad_norm": 2.2970597743988037, "learning_rate": 1.4283435233652114e-05, "loss": 0.343434739112854, "step": 199800 }, { "epoch": 0.8578260906897469, "grad_norm": 1.9501216411590576, "learning_rate": 1.427912351353449e-05, "loss": 0.17411470413208008, "step": 199810 }, { "epoch": 0.857869022779767, "grad_norm": 1.5812982320785522, "learning_rate": 1.4274811793416867e-05, "loss": 0.12971644401550292, "step": 199820 }, { "epoch": 0.857911954869787, "grad_norm": 0.2392256110906601, "learning_rate": 1.4270500073299243e-05, "loss": 0.07452273964881898, "step": 199830 }, { "epoch": 0.8579548869598069, "grad_norm": 2.668260097503662, "learning_rate": 1.426618835318162e-05, "loss": 0.1514972448348999, "step": 199840 }, { "epoch": 0.857997819049827, "grad_norm": 0.029095986858010292, "learning_rate": 1.4261876633063994e-05, "loss": 0.06277897953987122, "step": 199850 }, { "epoch": 0.858040751139847, "grad_norm": 0.001128542353399098, "learning_rate": 1.4257564912946372e-05, "loss": 0.24682888984680176, "step": 199860 }, { "epoch": 0.858083683229867, "grad_norm": 0.061851732432842255, "learning_rate": 1.4253253192828747e-05, "loss": 0.12122787237167358, "step": 199870 }, { "epoch": 0.858126615319887, "grad_norm": 0.0018141150940209627, "learning_rate": 1.4248941472711125e-05, "loss": 0.37261626720428465, "step": 199880 }, { "epoch": 0.858169547409907, "grad_norm": 0.0010228273458778858, "learning_rate": 1.42446297525935e-05, "loss": 0.10979610681533813, "step": 199890 }, { "epoch": 0.858212479499927, "grad_norm": 0.2523162364959717, "learning_rate": 1.4240318032475878e-05, "loss": 0.22949092388153075, "step": 199900 }, { "epoch": 0.858255411589947, "grad_norm": 0.06273109465837479, "learning_rate": 1.4236006312358251e-05, "loss": 0.28017995357513426, "step": 199910 }, { "epoch": 0.858298343679967, "grad_norm": 0.008993618190288544, "learning_rate": 1.423169459224063e-05, "loss": 0.09785916209220887, "step": 199920 }, { "epoch": 0.8583412757699871, "grad_norm": 0.0179828479886055, "learning_rate": 1.4227382872123004e-05, "loss": 0.4375472545623779, "step": 199930 }, { "epoch": 0.858384207860007, "grad_norm": 0.003786655142903328, "learning_rate": 1.4223071152005382e-05, "loss": 0.28068861961364744, "step": 199940 }, { "epoch": 0.8584271399500271, "grad_norm": 1.220051884651184, "learning_rate": 1.4218759431887757e-05, "loss": 0.3362873077392578, "step": 199950 }, { "epoch": 0.8584700720400471, "grad_norm": 0.004382483195513487, "learning_rate": 1.4214447711770135e-05, "loss": 0.1859076976776123, "step": 199960 }, { "epoch": 0.858513004130067, "grad_norm": 0.008440139703452587, "learning_rate": 1.4210135991652509e-05, "loss": 0.2096766471862793, "step": 199970 }, { "epoch": 0.8585559362200871, "grad_norm": 0.004920396488159895, "learning_rate": 1.4205824271534888e-05, "loss": 0.23950378894805907, "step": 199980 }, { "epoch": 0.8585988683101071, "grad_norm": 0.0025335282552987337, "learning_rate": 1.4201512551417262e-05, "loss": 0.14211657047271728, "step": 199990 }, { "epoch": 0.858641800400127, "grad_norm": 0.011228648945689201, "learning_rate": 1.4197200831299639e-05, "loss": 0.16281137466430665, "step": 200000 }, { "epoch": 0.858641800400127, "eval_loss": 0.3807581961154938, "eval_runtime": 27.6023, "eval_samples_per_second": 3.623, "eval_steps_per_second": 3.623, "step": 200000 }, { "epoch": 0.8586847324901471, "grad_norm": 0.020021537318825722, "learning_rate": 1.4192889111182015e-05, "loss": 0.3246197462081909, "step": 200010 }, { "epoch": 0.8587276645801671, "grad_norm": 0.00552589725703001, "learning_rate": 1.4188577391064392e-05, "loss": 0.325114107131958, "step": 200020 }, { "epoch": 0.8587705966701871, "grad_norm": 1.4242668151855469, "learning_rate": 1.4184265670946768e-05, "loss": 0.1461607336997986, "step": 200030 }, { "epoch": 0.8588135287602071, "grad_norm": 2.1981289386749268, "learning_rate": 1.4179953950829145e-05, "loss": 0.23893659114837645, "step": 200040 }, { "epoch": 0.8588564608502272, "grad_norm": 0.003543504746630788, "learning_rate": 1.4175642230711519e-05, "loss": 0.17610794305801392, "step": 200050 }, { "epoch": 0.8588993929402471, "grad_norm": 7.391210079193115, "learning_rate": 1.4171330510593896e-05, "loss": 0.28483357429504397, "step": 200060 }, { "epoch": 0.8589423250302671, "grad_norm": 0.03855060786008835, "learning_rate": 1.4167018790476272e-05, "loss": 0.28767685890197753, "step": 200070 }, { "epoch": 0.8589852571202872, "grad_norm": 4.486928939819336, "learning_rate": 1.416270707035865e-05, "loss": 0.1376192569732666, "step": 200080 }, { "epoch": 0.8590281892103071, "grad_norm": 0.01081493217498064, "learning_rate": 1.4158395350241027e-05, "loss": 0.2854840040206909, "step": 200090 }, { "epoch": 0.8590711213003271, "grad_norm": 0.015720579773187637, "learning_rate": 1.4154083630123402e-05, "loss": 0.15743573904037475, "step": 200100 }, { "epoch": 0.8591140533903472, "grad_norm": 0.38807976245880127, "learning_rate": 1.414977191000578e-05, "loss": 0.11960583925247192, "step": 200110 }, { "epoch": 0.8591569854803671, "grad_norm": 0.2119477093219757, "learning_rate": 1.4145460189888154e-05, "loss": 0.09136803150177002, "step": 200120 }, { "epoch": 0.8591999175703872, "grad_norm": 1.8273348808288574, "learning_rate": 1.4141148469770533e-05, "loss": 0.1844263792037964, "step": 200130 }, { "epoch": 0.8592428496604072, "grad_norm": 6.799977779388428, "learning_rate": 1.4136836749652907e-05, "loss": 0.2549062013626099, "step": 200140 }, { "epoch": 0.8592857817504271, "grad_norm": 0.05221116542816162, "learning_rate": 1.4132525029535284e-05, "loss": 0.01656073033809662, "step": 200150 }, { "epoch": 0.8593287138404472, "grad_norm": 0.002522778697311878, "learning_rate": 1.412821330941766e-05, "loss": 0.18575385808944703, "step": 200160 }, { "epoch": 0.8593716459304672, "grad_norm": 2.425380229949951, "learning_rate": 1.4123901589300037e-05, "loss": 0.3281693458557129, "step": 200170 }, { "epoch": 0.8594145780204872, "grad_norm": 1.184431791305542, "learning_rate": 1.4119589869182413e-05, "loss": 0.16849026679992676, "step": 200180 }, { "epoch": 0.8594575101105072, "grad_norm": 0.27626046538352966, "learning_rate": 1.411527814906479e-05, "loss": 0.2617574453353882, "step": 200190 }, { "epoch": 0.8595004422005272, "grad_norm": 0.0017152708023786545, "learning_rate": 1.4110966428947164e-05, "loss": 0.2841400146484375, "step": 200200 }, { "epoch": 0.8595433742905472, "grad_norm": 0.006438401993364096, "learning_rate": 1.4106654708829541e-05, "loss": 0.4257059097290039, "step": 200210 }, { "epoch": 0.8595863063805672, "grad_norm": 0.039594002068042755, "learning_rate": 1.4102342988711917e-05, "loss": 0.16782171726226808, "step": 200220 }, { "epoch": 0.8596292384705873, "grad_norm": 0.28560030460357666, "learning_rate": 1.4098031268594294e-05, "loss": 0.22525856494903565, "step": 200230 }, { "epoch": 0.8596721705606072, "grad_norm": 2.2180888652801514, "learning_rate": 1.409371954847667e-05, "loss": 0.1681581974029541, "step": 200240 }, { "epoch": 0.8597151026506272, "grad_norm": 0.059903547167778015, "learning_rate": 1.4089407828359047e-05, "loss": 0.11498527526855469, "step": 200250 }, { "epoch": 0.8597580347406473, "grad_norm": 0.017579248175024986, "learning_rate": 1.4085096108241421e-05, "loss": 0.2624624252319336, "step": 200260 }, { "epoch": 0.8598009668306672, "grad_norm": 0.0018392846686765552, "learning_rate": 1.4080784388123799e-05, "loss": 0.20496695041656493, "step": 200270 }, { "epoch": 0.8598438989206872, "grad_norm": 0.022280734032392502, "learning_rate": 1.4076472668006174e-05, "loss": 0.22362060546875, "step": 200280 }, { "epoch": 0.8598868310107073, "grad_norm": 0.04711763188242912, "learning_rate": 1.4072160947888552e-05, "loss": 0.25614199638366697, "step": 200290 }, { "epoch": 0.8599297631007272, "grad_norm": 0.9575517177581787, "learning_rate": 1.4067849227770927e-05, "loss": 0.3862587928771973, "step": 200300 }, { "epoch": 0.8599726951907473, "grad_norm": 1.2002527713775635, "learning_rate": 1.4063537507653305e-05, "loss": 0.29901714324951173, "step": 200310 }, { "epoch": 0.8600156272807673, "grad_norm": 3.8473565578460693, "learning_rate": 1.4059225787535679e-05, "loss": 0.2574320793151855, "step": 200320 }, { "epoch": 0.8600585593707872, "grad_norm": 0.025791872292757034, "learning_rate": 1.4054914067418058e-05, "loss": 0.22598130702972413, "step": 200330 }, { "epoch": 0.8601014914608073, "grad_norm": 0.2701096534729004, "learning_rate": 1.4050602347300432e-05, "loss": 0.3798185348510742, "step": 200340 }, { "epoch": 0.8601444235508273, "grad_norm": 0.2829890251159668, "learning_rate": 1.4046290627182809e-05, "loss": 0.21899137496948243, "step": 200350 }, { "epoch": 0.8601873556408474, "grad_norm": 0.024702927097678185, "learning_rate": 1.4041978907065184e-05, "loss": 0.16811333894729613, "step": 200360 }, { "epoch": 0.8602302877308673, "grad_norm": 0.07576318085193634, "learning_rate": 1.4037667186947562e-05, "loss": 0.00663701742887497, "step": 200370 }, { "epoch": 0.8602732198208873, "grad_norm": 1.4503016471862793, "learning_rate": 1.4033355466829936e-05, "loss": 0.12653093338012694, "step": 200380 }, { "epoch": 0.8603161519109074, "grad_norm": 1.278931975364685, "learning_rate": 1.4029043746712315e-05, "loss": 0.149183988571167, "step": 200390 }, { "epoch": 0.8603590840009273, "grad_norm": 2.7294182777404785, "learning_rate": 1.4024732026594689e-05, "loss": 0.2316087007522583, "step": 200400 }, { "epoch": 0.8604020160909474, "grad_norm": 0.0018006651662290096, "learning_rate": 1.4020420306477066e-05, "loss": 0.16761742830276488, "step": 200410 }, { "epoch": 0.8604449481809674, "grad_norm": 0.011157185770571232, "learning_rate": 1.4016108586359442e-05, "loss": 0.16158027648925782, "step": 200420 }, { "epoch": 0.8604878802709873, "grad_norm": 2.3285841941833496, "learning_rate": 1.4011796866241819e-05, "loss": 0.2909983158111572, "step": 200430 }, { "epoch": 0.8605308123610074, "grad_norm": 4.663473129272461, "learning_rate": 1.4007485146124196e-05, "loss": 0.20800578594207764, "step": 200440 }, { "epoch": 0.8605737444510274, "grad_norm": 0.18279698491096497, "learning_rate": 1.4003173426006572e-05, "loss": 0.1953489065170288, "step": 200450 }, { "epoch": 0.8606166765410473, "grad_norm": 0.009925964288413525, "learning_rate": 1.399886170588895e-05, "loss": 0.2169330358505249, "step": 200460 }, { "epoch": 0.8606596086310674, "grad_norm": 0.19705329835414886, "learning_rate": 1.3994549985771323e-05, "loss": 0.0679843246936798, "step": 200470 }, { "epoch": 0.8607025407210874, "grad_norm": 0.15052109956741333, "learning_rate": 1.3990238265653702e-05, "loss": 0.24635510444641112, "step": 200480 }, { "epoch": 0.8607454728111074, "grad_norm": 0.002157369162887335, "learning_rate": 1.3985926545536076e-05, "loss": 0.1807490348815918, "step": 200490 }, { "epoch": 0.8607884049011274, "grad_norm": 0.0005508503527380526, "learning_rate": 1.3981614825418454e-05, "loss": 0.06903789043426514, "step": 200500 }, { "epoch": 0.8608313369911474, "grad_norm": 1.3312430381774902, "learning_rate": 1.397730310530083e-05, "loss": 0.12055761814117431, "step": 200510 }, { "epoch": 0.8608742690811674, "grad_norm": 0.02469063363969326, "learning_rate": 1.3972991385183207e-05, "loss": 0.02727437615394592, "step": 200520 }, { "epoch": 0.8609172011711874, "grad_norm": 2.345923662185669, "learning_rate": 1.3968679665065582e-05, "loss": 0.20277533531188965, "step": 200530 }, { "epoch": 0.8609601332612075, "grad_norm": 0.0974428579211235, "learning_rate": 1.396436794494796e-05, "loss": 0.17809678316116334, "step": 200540 }, { "epoch": 0.8610030653512274, "grad_norm": 1.2723183631896973, "learning_rate": 1.3960056224830334e-05, "loss": 0.1863088607788086, "step": 200550 }, { "epoch": 0.8610459974412474, "grad_norm": 1.1321065425872803, "learning_rate": 1.3955744504712711e-05, "loss": 0.2558783292770386, "step": 200560 }, { "epoch": 0.8610889295312675, "grad_norm": 1.367501974105835, "learning_rate": 1.3951432784595087e-05, "loss": 0.21771454811096191, "step": 200570 }, { "epoch": 0.8611318616212874, "grad_norm": 0.05725770443677902, "learning_rate": 1.3947121064477464e-05, "loss": 0.08010371923446655, "step": 200580 }, { "epoch": 0.8611747937113075, "grad_norm": 1.8050976991653442, "learning_rate": 1.394280934435984e-05, "loss": 0.32913758754730227, "step": 200590 }, { "epoch": 0.8612177258013275, "grad_norm": 0.08833785355091095, "learning_rate": 1.3938497624242217e-05, "loss": 0.03277966380119324, "step": 200600 }, { "epoch": 0.8612606578913474, "grad_norm": 0.005714171566069126, "learning_rate": 1.3934185904124591e-05, "loss": 0.18971192836761475, "step": 200610 }, { "epoch": 0.8613035899813675, "grad_norm": 0.010824470780789852, "learning_rate": 1.3929874184006968e-05, "loss": 0.2813425540924072, "step": 200620 }, { "epoch": 0.8613465220713875, "grad_norm": 0.00637517124414444, "learning_rate": 1.3925562463889344e-05, "loss": 0.2274622440338135, "step": 200630 }, { "epoch": 0.8613894541614074, "grad_norm": 0.015382036566734314, "learning_rate": 1.3921250743771721e-05, "loss": 0.05090780854225159, "step": 200640 }, { "epoch": 0.8614323862514275, "grad_norm": 2.6729819774627686, "learning_rate": 1.3916939023654097e-05, "loss": 0.24126005172729492, "step": 200650 }, { "epoch": 0.8614753183414475, "grad_norm": 1.538490653038025, "learning_rate": 1.3912627303536474e-05, "loss": 0.21481027603149414, "step": 200660 }, { "epoch": 0.8615182504314675, "grad_norm": 0.002245868556201458, "learning_rate": 1.3908315583418848e-05, "loss": 0.23646047115325927, "step": 200670 }, { "epoch": 0.8615611825214875, "grad_norm": 0.0007779735024087131, "learning_rate": 1.3904003863301227e-05, "loss": 0.1696911096572876, "step": 200680 }, { "epoch": 0.8616041146115075, "grad_norm": 2.3345389366149902, "learning_rate": 1.3899692143183601e-05, "loss": 0.24698200225830078, "step": 200690 }, { "epoch": 0.8616470467015275, "grad_norm": 0.010887118056416512, "learning_rate": 1.3895380423065979e-05, "loss": 0.11975333690643311, "step": 200700 }, { "epoch": 0.8616899787915475, "grad_norm": 0.03210204094648361, "learning_rate": 1.3891068702948354e-05, "loss": 0.12684575319290162, "step": 200710 }, { "epoch": 0.8617329108815676, "grad_norm": 2.8229777812957764, "learning_rate": 1.3886756982830732e-05, "loss": 0.33409197330474855, "step": 200720 }, { "epoch": 0.8617758429715875, "grad_norm": 1.6647875308990479, "learning_rate": 1.3882445262713106e-05, "loss": 0.16415317058563234, "step": 200730 }, { "epoch": 0.8618187750616075, "grad_norm": 1.3204957246780396, "learning_rate": 1.3878133542595485e-05, "loss": 0.17294979095458984, "step": 200740 }, { "epoch": 0.8618617071516276, "grad_norm": 0.0013664969010278583, "learning_rate": 1.3873821822477859e-05, "loss": 0.1268696069717407, "step": 200750 }, { "epoch": 0.8619046392416475, "grad_norm": 0.16938357055187225, "learning_rate": 1.3869510102360236e-05, "loss": 0.06001962423324585, "step": 200760 }, { "epoch": 0.8619475713316676, "grad_norm": 0.8504765629768372, "learning_rate": 1.3865198382242612e-05, "loss": 0.48522701263427737, "step": 200770 }, { "epoch": 0.8619905034216876, "grad_norm": 0.0021308199502527714, "learning_rate": 1.3860886662124989e-05, "loss": 0.2758405447006226, "step": 200780 }, { "epoch": 0.8620334355117076, "grad_norm": 0.004662891384214163, "learning_rate": 1.3856574942007365e-05, "loss": 0.3536546230316162, "step": 200790 }, { "epoch": 0.8620763676017276, "grad_norm": 5.162191390991211, "learning_rate": 1.3852263221889742e-05, "loss": 0.12391103506088257, "step": 200800 }, { "epoch": 0.8621192996917476, "grad_norm": 0.003000795841217041, "learning_rate": 1.384795150177212e-05, "loss": 0.08238120079040527, "step": 200810 }, { "epoch": 0.8621622317817677, "grad_norm": 0.6597919464111328, "learning_rate": 1.3843639781654493e-05, "loss": 0.1471768617630005, "step": 200820 }, { "epoch": 0.8622051638717876, "grad_norm": 3.253000020980835, "learning_rate": 1.3839328061536872e-05, "loss": 0.3409099817276001, "step": 200830 }, { "epoch": 0.8622480959618076, "grad_norm": 0.0024019486736506224, "learning_rate": 1.3835016341419246e-05, "loss": 0.3075373888015747, "step": 200840 }, { "epoch": 0.8622910280518277, "grad_norm": 0.12324788421392441, "learning_rate": 1.3830704621301623e-05, "loss": 0.05642509460449219, "step": 200850 }, { "epoch": 0.8623339601418476, "grad_norm": 1.4376167058944702, "learning_rate": 1.3826392901183999e-05, "loss": 0.11846233606338501, "step": 200860 }, { "epoch": 0.8623768922318676, "grad_norm": 0.025254063308238983, "learning_rate": 1.3822081181066376e-05, "loss": 0.17070431709289552, "step": 200870 }, { "epoch": 0.8624198243218877, "grad_norm": 0.9955533146858215, "learning_rate": 1.381776946094875e-05, "loss": 0.34915981292724607, "step": 200880 }, { "epoch": 0.8624627564119076, "grad_norm": 0.008249502629041672, "learning_rate": 1.381345774083113e-05, "loss": 0.23022923469543458, "step": 200890 }, { "epoch": 0.8625056885019277, "grad_norm": 1.0851255655288696, "learning_rate": 1.3809146020713503e-05, "loss": 0.2843886375427246, "step": 200900 }, { "epoch": 0.8625486205919477, "grad_norm": 2.4486043453216553, "learning_rate": 1.380483430059588e-05, "loss": 0.2356062650680542, "step": 200910 }, { "epoch": 0.8625915526819676, "grad_norm": 0.03232686221599579, "learning_rate": 1.3800522580478256e-05, "loss": 0.13071533441543579, "step": 200920 }, { "epoch": 0.8626344847719877, "grad_norm": 1.3295395374298096, "learning_rate": 1.3796210860360634e-05, "loss": 0.07537164092063904, "step": 200930 }, { "epoch": 0.8626774168620077, "grad_norm": 0.08242210745811462, "learning_rate": 1.379189914024301e-05, "loss": 0.10701709985733032, "step": 200940 }, { "epoch": 0.8627203489520276, "grad_norm": 0.059901103377342224, "learning_rate": 1.3787587420125387e-05, "loss": 0.23902592658996583, "step": 200950 }, { "epoch": 0.8627632810420477, "grad_norm": 2.502207040786743, "learning_rate": 1.378327570000776e-05, "loss": 0.16078464984893798, "step": 200960 }, { "epoch": 0.8628062131320677, "grad_norm": 0.0037575415335595608, "learning_rate": 1.3778963979890138e-05, "loss": 0.040537270903587344, "step": 200970 }, { "epoch": 0.8628491452220877, "grad_norm": 0.0015732988249510527, "learning_rate": 1.3774652259772514e-05, "loss": 0.24941112995147705, "step": 200980 }, { "epoch": 0.8628920773121077, "grad_norm": 0.08129372447729111, "learning_rate": 1.3770340539654891e-05, "loss": 0.05371713638305664, "step": 200990 }, { "epoch": 0.8629350094021278, "grad_norm": 0.0015491340309381485, "learning_rate": 1.3766028819537267e-05, "loss": 0.32582085132598876, "step": 201000 }, { "epoch": 0.8629350094021278, "eval_loss": 0.3771650195121765, "eval_runtime": 27.4127, "eval_samples_per_second": 3.648, "eval_steps_per_second": 3.648, "step": 201000 }, { "epoch": 0.8629779414921477, "grad_norm": 1.424601674079895, "learning_rate": 1.3761717099419644e-05, "loss": 0.027511578798294068, "step": 201010 }, { "epoch": 0.8630208735821677, "grad_norm": 0.012731004506349564, "learning_rate": 1.3757405379302018e-05, "loss": 0.20147929191589356, "step": 201020 }, { "epoch": 0.8630638056721878, "grad_norm": 0.1110619604587555, "learning_rate": 1.3753093659184397e-05, "loss": 0.026392871141433717, "step": 201030 }, { "epoch": 0.8631067377622077, "grad_norm": 1.733137845993042, "learning_rate": 1.3748781939066771e-05, "loss": 0.16961859464645385, "step": 201040 }, { "epoch": 0.8631496698522277, "grad_norm": 0.0012216472532600164, "learning_rate": 1.3744470218949148e-05, "loss": 0.23530104160308837, "step": 201050 }, { "epoch": 0.8631926019422478, "grad_norm": 0.03294944763183594, "learning_rate": 1.3740158498831524e-05, "loss": 0.08938648104667664, "step": 201060 }, { "epoch": 0.8632355340322677, "grad_norm": 6.165449619293213, "learning_rate": 1.3735846778713901e-05, "loss": 0.2767988681793213, "step": 201070 }, { "epoch": 0.8632784661222878, "grad_norm": 0.006369173992425203, "learning_rate": 1.3731535058596275e-05, "loss": 0.22666561603546143, "step": 201080 }, { "epoch": 0.8633213982123078, "grad_norm": 123.1529312133789, "learning_rate": 1.3727223338478654e-05, "loss": 0.2879739046096802, "step": 201090 }, { "epoch": 0.8633643303023277, "grad_norm": 1.1861544847488403, "learning_rate": 1.3722911618361028e-05, "loss": 0.29244720935821533, "step": 201100 }, { "epoch": 0.8634072623923478, "grad_norm": 0.008288329467177391, "learning_rate": 1.3718599898243406e-05, "loss": 0.2790151834487915, "step": 201110 }, { "epoch": 0.8634501944823678, "grad_norm": 0.02679705061018467, "learning_rate": 1.3714288178125781e-05, "loss": 0.2626842975616455, "step": 201120 }, { "epoch": 0.8634931265723877, "grad_norm": 0.5207473039627075, "learning_rate": 1.3709976458008159e-05, "loss": 0.24442975521087645, "step": 201130 }, { "epoch": 0.8635360586624078, "grad_norm": 0.0031045994255691767, "learning_rate": 1.3705664737890534e-05, "loss": 0.2642416000366211, "step": 201140 }, { "epoch": 0.8635789907524278, "grad_norm": 0.01354249194264412, "learning_rate": 1.3701353017772912e-05, "loss": 0.023589606583118438, "step": 201150 }, { "epoch": 0.8636219228424478, "grad_norm": 0.0018465573666617274, "learning_rate": 1.3697041297655286e-05, "loss": 0.2037327527999878, "step": 201160 }, { "epoch": 0.8636648549324678, "grad_norm": 0.01761949621140957, "learning_rate": 1.3692729577537663e-05, "loss": 0.27513961791992186, "step": 201170 }, { "epoch": 0.8637077870224878, "grad_norm": 0.001405532006174326, "learning_rate": 1.3688417857420042e-05, "loss": 0.16688908338546754, "step": 201180 }, { "epoch": 0.8637507191125078, "grad_norm": 0.0007753579411655664, "learning_rate": 1.3684106137302416e-05, "loss": 0.23070762157440186, "step": 201190 }, { "epoch": 0.8637936512025278, "grad_norm": 0.2772015929222107, "learning_rate": 1.3679794417184793e-05, "loss": 0.04422442317008972, "step": 201200 }, { "epoch": 0.8638365832925479, "grad_norm": 0.03911440819501877, "learning_rate": 1.3675482697067169e-05, "loss": 0.06600427627563477, "step": 201210 }, { "epoch": 0.8638795153825679, "grad_norm": 0.004431838635355234, "learning_rate": 1.3671170976949546e-05, "loss": 0.20836050510406495, "step": 201220 }, { "epoch": 0.8639224474725878, "grad_norm": 0.0669967457652092, "learning_rate": 1.366685925683192e-05, "loss": 0.09750092029571533, "step": 201230 }, { "epoch": 0.8639653795626079, "grad_norm": 2.275888681411743, "learning_rate": 1.36625475367143e-05, "loss": 0.33345661163330076, "step": 201240 }, { "epoch": 0.8640083116526279, "grad_norm": 1.3841379880905151, "learning_rate": 1.3658235816596673e-05, "loss": 0.2437295913696289, "step": 201250 }, { "epoch": 0.8640512437426479, "grad_norm": 0.0015755087370052934, "learning_rate": 1.365392409647905e-05, "loss": 0.34356591701507566, "step": 201260 }, { "epoch": 0.8640941758326679, "grad_norm": 8.029579162597656, "learning_rate": 1.3649612376361426e-05, "loss": 0.13912639617919922, "step": 201270 }, { "epoch": 0.8641371079226879, "grad_norm": 0.06813203543424606, "learning_rate": 1.3645300656243803e-05, "loss": 0.1133497714996338, "step": 201280 }, { "epoch": 0.8641800400127079, "grad_norm": 7.3910980224609375, "learning_rate": 1.3640988936126179e-05, "loss": 0.22454369068145752, "step": 201290 }, { "epoch": 0.8642229721027279, "grad_norm": 8.968598365783691, "learning_rate": 1.3636677216008556e-05, "loss": 0.2147754192352295, "step": 201300 }, { "epoch": 0.864265904192748, "grad_norm": 0.0024705410469323397, "learning_rate": 1.363236549589093e-05, "loss": 0.3113959789276123, "step": 201310 }, { "epoch": 0.8643088362827679, "grad_norm": 1.973299503326416, "learning_rate": 1.3628053775773308e-05, "loss": 0.23185880184173585, "step": 201320 }, { "epoch": 0.8643517683727879, "grad_norm": 0.5137335658073425, "learning_rate": 1.3623742055655683e-05, "loss": 0.2932513475418091, "step": 201330 }, { "epoch": 0.864394700462808, "grad_norm": 0.0013410047395154834, "learning_rate": 1.361943033553806e-05, "loss": 0.05563706159591675, "step": 201340 }, { "epoch": 0.8644376325528279, "grad_norm": 2.642397403717041, "learning_rate": 1.3615118615420436e-05, "loss": 0.22566254138946534, "step": 201350 }, { "epoch": 0.864480564642848, "grad_norm": 0.027849914506077766, "learning_rate": 1.3610806895302814e-05, "loss": 0.12023868560791015, "step": 201360 }, { "epoch": 0.864523496732868, "grad_norm": 0.0002782710362225771, "learning_rate": 1.3606495175185188e-05, "loss": 0.40762104988098147, "step": 201370 }, { "epoch": 0.8645664288228879, "grad_norm": 0.00015609625552315265, "learning_rate": 1.3602183455067565e-05, "loss": 0.3876370906829834, "step": 201380 }, { "epoch": 0.864609360912908, "grad_norm": 8.169439315795898, "learning_rate": 1.359787173494994e-05, "loss": 0.14945834875106812, "step": 201390 }, { "epoch": 0.864652293002928, "grad_norm": 0.005248130764812231, "learning_rate": 1.3593560014832318e-05, "loss": 0.04767285883426666, "step": 201400 }, { "epoch": 0.8646952250929479, "grad_norm": 2.542904853820801, "learning_rate": 1.3589248294714694e-05, "loss": 0.10348002910614014, "step": 201410 }, { "epoch": 0.864738157182968, "grad_norm": 0.03276196867227554, "learning_rate": 1.3584936574597071e-05, "loss": 0.11932778358459473, "step": 201420 }, { "epoch": 0.864781089272988, "grad_norm": 3.358114719390869, "learning_rate": 1.3580624854479445e-05, "loss": 0.08350310325622559, "step": 201430 }, { "epoch": 0.864824021363008, "grad_norm": 1.9119728803634644, "learning_rate": 1.3576313134361824e-05, "loss": 0.13522932529449463, "step": 201440 }, { "epoch": 0.864866953453028, "grad_norm": 1.0199462175369263, "learning_rate": 1.3572001414244198e-05, "loss": 0.38537781238555907, "step": 201450 }, { "epoch": 0.864909885543048, "grad_norm": 0.4397680461406708, "learning_rate": 1.3567689694126575e-05, "loss": 0.20663084983825683, "step": 201460 }, { "epoch": 0.864952817633068, "grad_norm": 0.4000920355319977, "learning_rate": 1.3563377974008951e-05, "loss": 0.16495121717453004, "step": 201470 }, { "epoch": 0.864995749723088, "grad_norm": 0.001823501312173903, "learning_rate": 1.3559066253891328e-05, "loss": 0.28188743591308596, "step": 201480 }, { "epoch": 0.8650386818131081, "grad_norm": 0.06819120794534683, "learning_rate": 1.3554754533773702e-05, "loss": 0.12166712284088135, "step": 201490 }, { "epoch": 0.865081613903128, "grad_norm": 0.011608834378421307, "learning_rate": 1.3550442813656081e-05, "loss": 0.13515074253082277, "step": 201500 }, { "epoch": 0.865124545993148, "grad_norm": 6.033400535583496, "learning_rate": 1.3546131093538455e-05, "loss": 0.3407582759857178, "step": 201510 }, { "epoch": 0.8651674780831681, "grad_norm": 2.359403610229492, "learning_rate": 1.3541819373420833e-05, "loss": 0.1246711015701294, "step": 201520 }, { "epoch": 0.865210410173188, "grad_norm": 5.6313276290893555, "learning_rate": 1.3537507653303212e-05, "loss": 0.06634917855262756, "step": 201530 }, { "epoch": 0.865253342263208, "grad_norm": 0.011722094379365444, "learning_rate": 1.3533195933185586e-05, "loss": 0.08681342005729675, "step": 201540 }, { "epoch": 0.8652962743532281, "grad_norm": 0.0036464305594563484, "learning_rate": 1.3528884213067963e-05, "loss": 0.0781008780002594, "step": 201550 }, { "epoch": 0.865339206443248, "grad_norm": 0.17769843339920044, "learning_rate": 1.3524572492950339e-05, "loss": 0.20671448707580567, "step": 201560 }, { "epoch": 0.8653821385332681, "grad_norm": 2.0480868816375732, "learning_rate": 1.3520260772832716e-05, "loss": 0.1721822738647461, "step": 201570 }, { "epoch": 0.8654250706232881, "grad_norm": 0.23706549406051636, "learning_rate": 1.351594905271509e-05, "loss": 0.22920043468475343, "step": 201580 }, { "epoch": 0.865468002713308, "grad_norm": 0.0014394361060112715, "learning_rate": 1.3511637332597469e-05, "loss": 0.16023410558700563, "step": 201590 }, { "epoch": 0.8655109348033281, "grad_norm": 1.2048903703689575, "learning_rate": 1.3507325612479843e-05, "loss": 0.226576828956604, "step": 201600 }, { "epoch": 0.8655538668933481, "grad_norm": 0.035271406173706055, "learning_rate": 1.350301389236222e-05, "loss": 0.1346900224685669, "step": 201610 }, { "epoch": 0.865596798983368, "grad_norm": 0.0051587289199233055, "learning_rate": 1.3498702172244596e-05, "loss": 0.3614091157913208, "step": 201620 }, { "epoch": 0.8656397310733881, "grad_norm": 0.08925247192382812, "learning_rate": 1.3494390452126973e-05, "loss": 0.3365633964538574, "step": 201630 }, { "epoch": 0.8656826631634081, "grad_norm": 0.009239203296601772, "learning_rate": 1.3490078732009349e-05, "loss": 0.1582499384880066, "step": 201640 }, { "epoch": 0.8657255952534282, "grad_norm": 0.5474352836608887, "learning_rate": 1.3485767011891726e-05, "loss": 0.06596941947937011, "step": 201650 }, { "epoch": 0.8657685273434481, "grad_norm": 0.7350939512252808, "learning_rate": 1.34814552917741e-05, "loss": 0.290863037109375, "step": 201660 }, { "epoch": 0.8658114594334682, "grad_norm": 0.005053548142313957, "learning_rate": 1.3477143571656478e-05, "loss": 0.12502760887145997, "step": 201670 }, { "epoch": 0.8658543915234882, "grad_norm": 3.777261734008789, "learning_rate": 1.3472831851538853e-05, "loss": 0.07586979866027832, "step": 201680 }, { "epoch": 0.8658973236135081, "grad_norm": 2.031341791152954, "learning_rate": 1.346852013142123e-05, "loss": 0.24181299209594725, "step": 201690 }, { "epoch": 0.8659402557035282, "grad_norm": 5.771152973175049, "learning_rate": 1.3464208411303606e-05, "loss": 0.08921363353729247, "step": 201700 }, { "epoch": 0.8659831877935482, "grad_norm": 0.04179826006293297, "learning_rate": 1.3459896691185983e-05, "loss": 0.327194881439209, "step": 201710 }, { "epoch": 0.8660261198835681, "grad_norm": 3.3760650157928467, "learning_rate": 1.3455584971068357e-05, "loss": 0.27189462184906005, "step": 201720 }, { "epoch": 0.8660690519735882, "grad_norm": 0.0028545090463012457, "learning_rate": 1.3451273250950735e-05, "loss": 0.2069005250930786, "step": 201730 }, { "epoch": 0.8661119840636082, "grad_norm": 2.0396742820739746, "learning_rate": 1.344696153083311e-05, "loss": 0.2259754419326782, "step": 201740 }, { "epoch": 0.8661549161536282, "grad_norm": 0.8559262156486511, "learning_rate": 1.3442649810715488e-05, "loss": 0.34209017753601073, "step": 201750 }, { "epoch": 0.8661978482436482, "grad_norm": 0.007572118658572435, "learning_rate": 1.3438338090597863e-05, "loss": 0.2585746765136719, "step": 201760 }, { "epoch": 0.8662407803336682, "grad_norm": 0.5755453705787659, "learning_rate": 1.343402637048024e-05, "loss": 0.07282045483589172, "step": 201770 }, { "epoch": 0.8662837124236882, "grad_norm": 0.0012946148635819554, "learning_rate": 1.3429714650362615e-05, "loss": 0.2800379753112793, "step": 201780 }, { "epoch": 0.8663266445137082, "grad_norm": 0.019003266468644142, "learning_rate": 1.3425402930244994e-05, "loss": 0.2469172716140747, "step": 201790 }, { "epoch": 0.8663695766037283, "grad_norm": 0.0037993162404745817, "learning_rate": 1.3421091210127368e-05, "loss": 0.20482521057128905, "step": 201800 }, { "epoch": 0.8664125086937482, "grad_norm": 0.042166516184806824, "learning_rate": 1.3416779490009745e-05, "loss": 0.15784236192703247, "step": 201810 }, { "epoch": 0.8664554407837682, "grad_norm": 4.952692985534668, "learning_rate": 1.341246776989212e-05, "loss": 0.10906834602355957, "step": 201820 }, { "epoch": 0.8664983728737883, "grad_norm": 0.1707507222890854, "learning_rate": 1.3408156049774498e-05, "loss": 0.10929840803146362, "step": 201830 }, { "epoch": 0.8665413049638082, "grad_norm": 2.6976828575134277, "learning_rate": 1.3403844329656872e-05, "loss": 0.10269479751586914, "step": 201840 }, { "epoch": 0.8665842370538283, "grad_norm": 2.0937705039978027, "learning_rate": 1.3399532609539251e-05, "loss": 0.18717145919799805, "step": 201850 }, { "epoch": 0.8666271691438483, "grad_norm": 1.711639642715454, "learning_rate": 1.3395220889421625e-05, "loss": 0.22680439949035644, "step": 201860 }, { "epoch": 0.8666701012338682, "grad_norm": 1.208844780921936, "learning_rate": 1.3390909169304002e-05, "loss": 0.2210688829421997, "step": 201870 }, { "epoch": 0.8667130333238883, "grad_norm": 0.007113473489880562, "learning_rate": 1.3386597449186378e-05, "loss": 0.19343369007110595, "step": 201880 }, { "epoch": 0.8667559654139083, "grad_norm": 0.32166412472724915, "learning_rate": 1.3382285729068755e-05, "loss": 0.2312183380126953, "step": 201890 }, { "epoch": 0.8667988975039282, "grad_norm": 0.40931782126426697, "learning_rate": 1.3377974008951133e-05, "loss": 0.24105579853057862, "step": 201900 }, { "epoch": 0.8668418295939483, "grad_norm": 0.032758649438619614, "learning_rate": 1.3373662288833508e-05, "loss": 0.07932702898979187, "step": 201910 }, { "epoch": 0.8668847616839683, "grad_norm": 0.058970626443624496, "learning_rate": 1.3369350568715886e-05, "loss": 0.2714974403381348, "step": 201920 }, { "epoch": 0.8669276937739883, "grad_norm": 0.2553642988204956, "learning_rate": 1.336503884859826e-05, "loss": 0.1437745451927185, "step": 201930 }, { "epoch": 0.8669706258640083, "grad_norm": 0.010126570239663124, "learning_rate": 1.3360727128480639e-05, "loss": 0.1601130485534668, "step": 201940 }, { "epoch": 0.8670135579540283, "grad_norm": 0.04212572053074837, "learning_rate": 1.3356415408363013e-05, "loss": 0.34910471439361573, "step": 201950 }, { "epoch": 0.8670564900440483, "grad_norm": 0.01635109633207321, "learning_rate": 1.335210368824539e-05, "loss": 0.04124734103679657, "step": 201960 }, { "epoch": 0.8670994221340683, "grad_norm": 0.22834289073944092, "learning_rate": 1.3347791968127766e-05, "loss": 0.09849913120269775, "step": 201970 }, { "epoch": 0.8671423542240884, "grad_norm": 3.879984140396118, "learning_rate": 1.3343480248010143e-05, "loss": 0.08664489984512329, "step": 201980 }, { "epoch": 0.8671852863141083, "grad_norm": 3.0354487895965576, "learning_rate": 1.3339168527892517e-05, "loss": 0.2665108680725098, "step": 201990 }, { "epoch": 0.8672282184041283, "grad_norm": 0.0027560857124626637, "learning_rate": 1.3334856807774896e-05, "loss": 0.14189740419387817, "step": 202000 }, { "epoch": 0.8672282184041283, "eval_loss": 0.38031283020973206, "eval_runtime": 27.421, "eval_samples_per_second": 3.647, "eval_steps_per_second": 3.647, "step": 202000 }, { "epoch": 0.8672711504941484, "grad_norm": 0.028320224955677986, "learning_rate": 1.333054508765727e-05, "loss": 0.23645930290222167, "step": 202010 }, { "epoch": 0.8673140825841683, "grad_norm": 2.802095651626587, "learning_rate": 1.3326233367539647e-05, "loss": 0.34255218505859375, "step": 202020 }, { "epoch": 0.8673570146741884, "grad_norm": 5.453604221343994, "learning_rate": 1.3321921647422023e-05, "loss": 0.20399942398071289, "step": 202030 }, { "epoch": 0.8673999467642084, "grad_norm": 0.44003716111183167, "learning_rate": 1.33176099273044e-05, "loss": 0.2527095079421997, "step": 202040 }, { "epoch": 0.8674428788542283, "grad_norm": 0.10289528965950012, "learning_rate": 1.3313298207186776e-05, "loss": 0.06380432844161987, "step": 202050 }, { "epoch": 0.8674858109442484, "grad_norm": 1.4219003915786743, "learning_rate": 1.3308986487069153e-05, "loss": 0.22886343002319337, "step": 202060 }, { "epoch": 0.8675287430342684, "grad_norm": 0.4398294687271118, "learning_rate": 1.3304674766951527e-05, "loss": 0.1260904312133789, "step": 202070 }, { "epoch": 0.8675716751242885, "grad_norm": 0.013393580913543701, "learning_rate": 1.3300363046833905e-05, "loss": 0.366540789604187, "step": 202080 }, { "epoch": 0.8676146072143084, "grad_norm": 0.033431414514780045, "learning_rate": 1.329605132671628e-05, "loss": 0.34116013050079347, "step": 202090 }, { "epoch": 0.8676575393043284, "grad_norm": 0.003720491658896208, "learning_rate": 1.3291739606598658e-05, "loss": 0.1548625946044922, "step": 202100 }, { "epoch": 0.8677004713943485, "grad_norm": 1.0757026672363281, "learning_rate": 1.3287427886481033e-05, "loss": 0.43051700592041015, "step": 202110 }, { "epoch": 0.8677434034843684, "grad_norm": 7.948141574859619, "learning_rate": 1.328311616636341e-05, "loss": 0.2517849445343018, "step": 202120 }, { "epoch": 0.8677863355743884, "grad_norm": 7.49459171295166, "learning_rate": 1.3278804446245784e-05, "loss": 0.18098866939544678, "step": 202130 }, { "epoch": 0.8678292676644085, "grad_norm": 2.5668296813964844, "learning_rate": 1.3274492726128162e-05, "loss": 0.2603480577468872, "step": 202140 }, { "epoch": 0.8678721997544284, "grad_norm": 0.0003951344988308847, "learning_rate": 1.3270181006010537e-05, "loss": 0.15042036771774292, "step": 202150 }, { "epoch": 0.8679151318444485, "grad_norm": 2.0018789768218994, "learning_rate": 1.3265869285892915e-05, "loss": 0.2826423406600952, "step": 202160 }, { "epoch": 0.8679580639344685, "grad_norm": 0.21826449036598206, "learning_rate": 1.326155756577529e-05, "loss": 0.17833973169326783, "step": 202170 }, { "epoch": 0.8680009960244884, "grad_norm": 0.00463878596201539, "learning_rate": 1.3257245845657668e-05, "loss": 0.16179614067077636, "step": 202180 }, { "epoch": 0.8680439281145085, "grad_norm": 4.259723663330078, "learning_rate": 1.3252934125540042e-05, "loss": 0.19961766004562378, "step": 202190 }, { "epoch": 0.8680868602045285, "grad_norm": 3.2912721633911133, "learning_rate": 1.324862240542242e-05, "loss": 0.24763474464416504, "step": 202200 }, { "epoch": 0.8681297922945485, "grad_norm": 1.176949143409729, "learning_rate": 1.3244310685304795e-05, "loss": 0.19105069637298583, "step": 202210 }, { "epoch": 0.8681727243845685, "grad_norm": 0.02803204394876957, "learning_rate": 1.3239998965187172e-05, "loss": 0.31554696559906004, "step": 202220 }, { "epoch": 0.8682156564745885, "grad_norm": 3.055330991744995, "learning_rate": 1.3235687245069548e-05, "loss": 0.1737144708633423, "step": 202230 }, { "epoch": 0.8682585885646085, "grad_norm": 0.016064148396253586, "learning_rate": 1.3231375524951925e-05, "loss": 0.1376855969429016, "step": 202240 }, { "epoch": 0.8683015206546285, "grad_norm": 0.05431525409221649, "learning_rate": 1.3227063804834299e-05, "loss": 0.19808392524719237, "step": 202250 }, { "epoch": 0.8683444527446486, "grad_norm": 0.003191595897078514, "learning_rate": 1.3222752084716678e-05, "loss": 0.12034578323364258, "step": 202260 }, { "epoch": 0.8683873848346685, "grad_norm": 0.004532682243734598, "learning_rate": 1.3218440364599055e-05, "loss": 0.10594029426574707, "step": 202270 }, { "epoch": 0.8684303169246885, "grad_norm": 0.002449595369398594, "learning_rate": 1.321412864448143e-05, "loss": 0.29216201305389405, "step": 202280 }, { "epoch": 0.8684732490147086, "grad_norm": 0.0005997202824801207, "learning_rate": 1.3209816924363808e-05, "loss": 0.3863027095794678, "step": 202290 }, { "epoch": 0.8685161811047285, "grad_norm": 3.148550271987915, "learning_rate": 1.3205505204246182e-05, "loss": 0.11254967451095581, "step": 202300 }, { "epoch": 0.8685591131947485, "grad_norm": 2.766737461090088, "learning_rate": 1.320119348412856e-05, "loss": 0.12313051223754883, "step": 202310 }, { "epoch": 0.8686020452847686, "grad_norm": 1.5454360246658325, "learning_rate": 1.3196881764010935e-05, "loss": 0.1295259952545166, "step": 202320 }, { "epoch": 0.8686449773747885, "grad_norm": 6.207314491271973, "learning_rate": 1.3192570043893313e-05, "loss": 0.17339755296707154, "step": 202330 }, { "epoch": 0.8686879094648086, "grad_norm": 1.4427553415298462, "learning_rate": 1.3188258323775687e-05, "loss": 0.18795595169067383, "step": 202340 }, { "epoch": 0.8687308415548286, "grad_norm": 0.0015251105651259422, "learning_rate": 1.3183946603658066e-05, "loss": 0.06718888878822327, "step": 202350 }, { "epoch": 0.8687737736448485, "grad_norm": 1.752411961555481, "learning_rate": 1.317963488354044e-05, "loss": 0.2728855848312378, "step": 202360 }, { "epoch": 0.8688167057348686, "grad_norm": 0.49718743562698364, "learning_rate": 1.3175323163422817e-05, "loss": 0.18400404453277588, "step": 202370 }, { "epoch": 0.8688596378248886, "grad_norm": 0.0021883861627429724, "learning_rate": 1.3171011443305193e-05, "loss": 0.06978545188903809, "step": 202380 }, { "epoch": 0.8689025699149086, "grad_norm": 0.030992716550827026, "learning_rate": 1.316669972318757e-05, "loss": 0.1565848708152771, "step": 202390 }, { "epoch": 0.8689455020049286, "grad_norm": 0.025812238454818726, "learning_rate": 1.3162388003069946e-05, "loss": 0.08083627223968506, "step": 202400 }, { "epoch": 0.8689884340949486, "grad_norm": 0.0011363154044374824, "learning_rate": 1.3158076282952323e-05, "loss": 0.15595753192901612, "step": 202410 }, { "epoch": 0.8690313661849686, "grad_norm": 2.2198967933654785, "learning_rate": 1.3153764562834697e-05, "loss": 0.23197317123413086, "step": 202420 }, { "epoch": 0.8690742982749886, "grad_norm": 0.9877627491950989, "learning_rate": 1.3149452842717074e-05, "loss": 0.13361701965332032, "step": 202430 }, { "epoch": 0.8691172303650087, "grad_norm": 0.1605759710073471, "learning_rate": 1.314514112259945e-05, "loss": 0.13915138244628905, "step": 202440 }, { "epoch": 0.8691601624550286, "grad_norm": 0.05874808132648468, "learning_rate": 1.3140829402481827e-05, "loss": 0.5318547248840332, "step": 202450 }, { "epoch": 0.8692030945450486, "grad_norm": 1.9270455837249756, "learning_rate": 1.3136517682364203e-05, "loss": 0.2182586669921875, "step": 202460 }, { "epoch": 0.8692460266350687, "grad_norm": 0.002081893617287278, "learning_rate": 1.313220596224658e-05, "loss": 0.1298648715019226, "step": 202470 }, { "epoch": 0.8692889587250886, "grad_norm": 0.01326664723455906, "learning_rate": 1.3127894242128954e-05, "loss": 0.18864309787750244, "step": 202480 }, { "epoch": 0.8693318908151086, "grad_norm": 0.02235686592757702, "learning_rate": 1.3123582522011332e-05, "loss": 0.16074566841125487, "step": 202490 }, { "epoch": 0.8693748229051287, "grad_norm": 0.0038623339496552944, "learning_rate": 1.3119270801893707e-05, "loss": 0.16679761409759522, "step": 202500 }, { "epoch": 0.8694177549951487, "grad_norm": 0.008476986549794674, "learning_rate": 1.3114959081776085e-05, "loss": 0.10375416278839111, "step": 202510 }, { "epoch": 0.8694606870851687, "grad_norm": 0.03022216260433197, "learning_rate": 1.311064736165846e-05, "loss": 0.11786410808563233, "step": 202520 }, { "epoch": 0.8695036191751887, "grad_norm": 2.8388748168945312, "learning_rate": 1.3106335641540838e-05, "loss": 0.26753456592559816, "step": 202530 }, { "epoch": 0.8695465512652087, "grad_norm": 2.872877836227417, "learning_rate": 1.3102023921423211e-05, "loss": 0.2310659408569336, "step": 202540 }, { "epoch": 0.8695894833552287, "grad_norm": 9.196786880493164, "learning_rate": 1.309771220130559e-05, "loss": 0.21355884075164794, "step": 202550 }, { "epoch": 0.8696324154452487, "grad_norm": 25.724925994873047, "learning_rate": 1.3093400481187964e-05, "loss": 0.15160589218139647, "step": 202560 }, { "epoch": 0.8696753475352688, "grad_norm": 0.0017343858489766717, "learning_rate": 1.3089088761070342e-05, "loss": 0.38728699684143064, "step": 202570 }, { "epoch": 0.8697182796252887, "grad_norm": 1.2419610023498535, "learning_rate": 1.3084777040952717e-05, "loss": 0.3935434818267822, "step": 202580 }, { "epoch": 0.8697612117153087, "grad_norm": 1.0418486595153809, "learning_rate": 1.3080465320835095e-05, "loss": 0.1654996395111084, "step": 202590 }, { "epoch": 0.8698041438053288, "grad_norm": 0.12601947784423828, "learning_rate": 1.3076153600717469e-05, "loss": 0.13802404403686525, "step": 202600 }, { "epoch": 0.8698470758953487, "grad_norm": 0.09665412455797195, "learning_rate": 1.3071841880599848e-05, "loss": 0.08990532755851746, "step": 202610 }, { "epoch": 0.8698900079853688, "grad_norm": 0.5923460125923157, "learning_rate": 1.3067530160482225e-05, "loss": 0.08348230719566345, "step": 202620 }, { "epoch": 0.8699329400753888, "grad_norm": 3.084900140762329, "learning_rate": 1.3063218440364599e-05, "loss": 0.16990399360656738, "step": 202630 }, { "epoch": 0.8699758721654087, "grad_norm": 0.005899806506931782, "learning_rate": 1.3058906720246976e-05, "loss": 0.18719682693481446, "step": 202640 }, { "epoch": 0.8700188042554288, "grad_norm": 3.733660936355591, "learning_rate": 1.3054595000129352e-05, "loss": 0.10188864469528199, "step": 202650 }, { "epoch": 0.8700617363454488, "grad_norm": 0.015237463638186455, "learning_rate": 1.305028328001173e-05, "loss": 0.10797481536865235, "step": 202660 }, { "epoch": 0.8701046684354687, "grad_norm": 0.6044527292251587, "learning_rate": 1.3045971559894105e-05, "loss": 0.19505347013473512, "step": 202670 }, { "epoch": 0.8701476005254888, "grad_norm": 0.004703771322965622, "learning_rate": 1.3041659839776482e-05, "loss": 0.0875515103340149, "step": 202680 }, { "epoch": 0.8701905326155088, "grad_norm": 0.9704412221908569, "learning_rate": 1.3037348119658856e-05, "loss": 0.4309795379638672, "step": 202690 }, { "epoch": 0.8702334647055288, "grad_norm": 0.047677453607320786, "learning_rate": 1.3033036399541235e-05, "loss": 0.04325348734855652, "step": 202700 }, { "epoch": 0.8702763967955488, "grad_norm": 4.24571418762207, "learning_rate": 1.302872467942361e-05, "loss": 0.10959751605987549, "step": 202710 }, { "epoch": 0.8703193288855688, "grad_norm": 2.259910821914673, "learning_rate": 1.3024412959305987e-05, "loss": 0.13984992504119872, "step": 202720 }, { "epoch": 0.8703622609755888, "grad_norm": 0.022040946409106255, "learning_rate": 1.3020101239188362e-05, "loss": 0.12837090492248535, "step": 202730 }, { "epoch": 0.8704051930656088, "grad_norm": 0.005013573449105024, "learning_rate": 1.301578951907074e-05, "loss": 0.02592419385910034, "step": 202740 }, { "epoch": 0.8704481251556289, "grad_norm": 0.007160715758800507, "learning_rate": 1.3011477798953114e-05, "loss": 0.03959584832191467, "step": 202750 }, { "epoch": 0.8704910572456488, "grad_norm": 0.22778935730457306, "learning_rate": 1.3007166078835493e-05, "loss": 0.14829691648483276, "step": 202760 }, { "epoch": 0.8705339893356688, "grad_norm": 0.0009955308632925153, "learning_rate": 1.3002854358717867e-05, "loss": 0.19827834367752076, "step": 202770 }, { "epoch": 0.8705769214256889, "grad_norm": 0.011871743947267532, "learning_rate": 1.2998542638600244e-05, "loss": 0.18226596117019653, "step": 202780 }, { "epoch": 0.8706198535157088, "grad_norm": 0.030095193535089493, "learning_rate": 1.299423091848262e-05, "loss": 0.11577250957489013, "step": 202790 }, { "epoch": 0.8706627856057289, "grad_norm": 1.2848188877105713, "learning_rate": 1.2989919198364997e-05, "loss": 0.29182541370391846, "step": 202800 }, { "epoch": 0.8707057176957489, "grad_norm": 0.03173932060599327, "learning_rate": 1.2985607478247373e-05, "loss": 0.08262947797775269, "step": 202810 }, { "epoch": 0.8707486497857688, "grad_norm": 3.3625714778900146, "learning_rate": 1.298129575812975e-05, "loss": 0.36866700649261475, "step": 202820 }, { "epoch": 0.8707915818757889, "grad_norm": 0.07918155938386917, "learning_rate": 1.2976984038012124e-05, "loss": 0.21780040264129638, "step": 202830 }, { "epoch": 0.8708345139658089, "grad_norm": 2.23683500289917, "learning_rate": 1.2972672317894501e-05, "loss": 0.05871073007583618, "step": 202840 }, { "epoch": 0.8708774460558288, "grad_norm": 1.5801154375076294, "learning_rate": 1.2968360597776877e-05, "loss": 0.1820102572441101, "step": 202850 }, { "epoch": 0.8709203781458489, "grad_norm": 0.0043759336695075035, "learning_rate": 1.2964048877659254e-05, "loss": 0.2038339853286743, "step": 202860 }, { "epoch": 0.8709633102358689, "grad_norm": 0.19102871417999268, "learning_rate": 1.295973715754163e-05, "loss": 0.3390310525894165, "step": 202870 }, { "epoch": 0.8710062423258889, "grad_norm": 1.966539978981018, "learning_rate": 1.2955425437424007e-05, "loss": 0.11660757064819335, "step": 202880 }, { "epoch": 0.8710491744159089, "grad_norm": 0.1771833449602127, "learning_rate": 1.2951113717306381e-05, "loss": 0.1814139485359192, "step": 202890 }, { "epoch": 0.8710921065059289, "grad_norm": 0.0787118449807167, "learning_rate": 1.294680199718876e-05, "loss": 0.37035412788391114, "step": 202900 }, { "epoch": 0.8711350385959489, "grad_norm": 0.6835107803344727, "learning_rate": 1.2942490277071134e-05, "loss": 0.13978594541549683, "step": 202910 }, { "epoch": 0.8711779706859689, "grad_norm": 9.422897338867188, "learning_rate": 1.2938178556953512e-05, "loss": 0.27941164970397947, "step": 202920 }, { "epoch": 0.871220902775989, "grad_norm": 0.5118245482444763, "learning_rate": 1.2933866836835887e-05, "loss": 0.12363041639328003, "step": 202930 }, { "epoch": 0.871263834866009, "grad_norm": 2.3696482181549072, "learning_rate": 1.2929555116718265e-05, "loss": 0.38212246894836427, "step": 202940 }, { "epoch": 0.8713067669560289, "grad_norm": 0.0280147772282362, "learning_rate": 1.2925243396600638e-05, "loss": 0.09542213678359986, "step": 202950 }, { "epoch": 0.871349699046049, "grad_norm": 2.087315082550049, "learning_rate": 1.2920931676483018e-05, "loss": 0.2335756540298462, "step": 202960 }, { "epoch": 0.871392631136069, "grad_norm": 3.394486904144287, "learning_rate": 1.2916619956365391e-05, "loss": 0.2918868541717529, "step": 202970 }, { "epoch": 0.871435563226089, "grad_norm": 0.46176642179489136, "learning_rate": 1.2912308236247769e-05, "loss": 0.030417990684509278, "step": 202980 }, { "epoch": 0.871478495316109, "grad_norm": 0.37049049139022827, "learning_rate": 1.2907996516130146e-05, "loss": 0.1741700291633606, "step": 202990 }, { "epoch": 0.871521427406129, "grad_norm": 0.10045011341571808, "learning_rate": 1.2903684796012522e-05, "loss": 0.30179901123046876, "step": 203000 }, { "epoch": 0.871521427406129, "eval_loss": 0.37936079502105713, "eval_runtime": 27.6142, "eval_samples_per_second": 3.621, "eval_steps_per_second": 3.621, "step": 203000 }, { "epoch": 0.871564359496149, "grad_norm": 2.178699254989624, "learning_rate": 1.28993730758949e-05, "loss": 0.18508745431900026, "step": 203010 }, { "epoch": 0.871607291586169, "grad_norm": 0.003770484123378992, "learning_rate": 1.2895061355777275e-05, "loss": 0.2334007740020752, "step": 203020 }, { "epoch": 0.871650223676189, "grad_norm": 0.01089702919125557, "learning_rate": 1.2890749635659652e-05, "loss": 0.21399302482604982, "step": 203030 }, { "epoch": 0.871693155766209, "grad_norm": 0.7750707268714905, "learning_rate": 1.2886437915542026e-05, "loss": 0.08867501616477966, "step": 203040 }, { "epoch": 0.871736087856229, "grad_norm": 0.03722615912556648, "learning_rate": 1.2882126195424405e-05, "loss": 0.17752952575683595, "step": 203050 }, { "epoch": 0.8717790199462491, "grad_norm": 0.010971063748002052, "learning_rate": 1.2877814475306779e-05, "loss": 0.22754526138305664, "step": 203060 }, { "epoch": 0.871821952036269, "grad_norm": 0.010999973863363266, "learning_rate": 1.2873502755189156e-05, "loss": 0.21283633708953859, "step": 203070 }, { "epoch": 0.871864884126289, "grad_norm": 0.09963197261095047, "learning_rate": 1.2869191035071532e-05, "loss": 0.3332798719406128, "step": 203080 }, { "epoch": 0.8719078162163091, "grad_norm": 0.012807992286980152, "learning_rate": 1.286487931495391e-05, "loss": 0.07219233512878417, "step": 203090 }, { "epoch": 0.871950748306329, "grad_norm": 60.12035369873047, "learning_rate": 1.2860567594836283e-05, "loss": 0.11430882215499878, "step": 203100 }, { "epoch": 0.8719936803963491, "grad_norm": 2.6776928901672363, "learning_rate": 1.2856255874718662e-05, "loss": 0.2610363483428955, "step": 203110 }, { "epoch": 0.8720366124863691, "grad_norm": 0.25831401348114014, "learning_rate": 1.2851944154601036e-05, "loss": 0.12929258346557618, "step": 203120 }, { "epoch": 0.872079544576389, "grad_norm": 1.7765833139419556, "learning_rate": 1.2847632434483414e-05, "loss": 0.2976083755493164, "step": 203130 }, { "epoch": 0.8721224766664091, "grad_norm": 0.03122456930577755, "learning_rate": 1.284332071436579e-05, "loss": 0.12493312358856201, "step": 203140 }, { "epoch": 0.8721654087564291, "grad_norm": 2.952826499938965, "learning_rate": 1.2839008994248167e-05, "loss": 0.3075044870376587, "step": 203150 }, { "epoch": 0.872208340846449, "grad_norm": 0.12431207299232483, "learning_rate": 1.2834697274130542e-05, "loss": 0.13878283500671387, "step": 203160 }, { "epoch": 0.8722512729364691, "grad_norm": 1.12087881565094, "learning_rate": 1.283038555401292e-05, "loss": 0.11472923755645752, "step": 203170 }, { "epoch": 0.8722942050264891, "grad_norm": 1.5404136180877686, "learning_rate": 1.2826073833895294e-05, "loss": 0.29040989875793455, "step": 203180 }, { "epoch": 0.8723371371165091, "grad_norm": 0.06525274366140366, "learning_rate": 1.2821762113777671e-05, "loss": 0.2566260814666748, "step": 203190 }, { "epoch": 0.8723800692065291, "grad_norm": 0.001346416654996574, "learning_rate": 1.2817450393660047e-05, "loss": 0.13974400758743286, "step": 203200 }, { "epoch": 0.8724230012965491, "grad_norm": 0.10522188991308212, "learning_rate": 1.2813138673542424e-05, "loss": 0.22829935550689698, "step": 203210 }, { "epoch": 0.8724659333865691, "grad_norm": 0.21968132257461548, "learning_rate": 1.28088269534248e-05, "loss": 0.0917892575263977, "step": 203220 }, { "epoch": 0.8725088654765891, "grad_norm": 1.885111927986145, "learning_rate": 1.2804515233307177e-05, "loss": 0.2599085092544556, "step": 203230 }, { "epoch": 0.8725517975666092, "grad_norm": 3.5005955696105957, "learning_rate": 1.2800203513189551e-05, "loss": 0.5183819770812989, "step": 203240 }, { "epoch": 0.8725947296566291, "grad_norm": 0.003291301429271698, "learning_rate": 1.2795891793071928e-05, "loss": 0.10750803947448731, "step": 203250 }, { "epoch": 0.8726376617466491, "grad_norm": 2.1088061332702637, "learning_rate": 1.2791580072954304e-05, "loss": 0.3345699071884155, "step": 203260 }, { "epoch": 0.8726805938366692, "grad_norm": 0.0036086903419345617, "learning_rate": 1.2787268352836681e-05, "loss": 0.35092260837554934, "step": 203270 }, { "epoch": 0.8727235259266891, "grad_norm": 0.058109551668167114, "learning_rate": 1.2782956632719057e-05, "loss": 0.3079172134399414, "step": 203280 }, { "epoch": 0.8727664580167092, "grad_norm": 0.005001907702535391, "learning_rate": 1.2778644912601434e-05, "loss": 0.05406479239463806, "step": 203290 }, { "epoch": 0.8728093901067292, "grad_norm": 1.243675708770752, "learning_rate": 1.2774333192483808e-05, "loss": 0.3355956792831421, "step": 203300 }, { "epoch": 0.8728523221967491, "grad_norm": 0.004065210931003094, "learning_rate": 1.2770021472366187e-05, "loss": 0.14164478778839112, "step": 203310 }, { "epoch": 0.8728952542867692, "grad_norm": 1.4152636528015137, "learning_rate": 1.2765709752248561e-05, "loss": 0.21300029754638672, "step": 203320 }, { "epoch": 0.8729381863767892, "grad_norm": 0.019872266799211502, "learning_rate": 1.2761398032130939e-05, "loss": 0.24908978939056398, "step": 203330 }, { "epoch": 0.8729811184668091, "grad_norm": 3.7372965812683105, "learning_rate": 1.2757086312013314e-05, "loss": 0.20218095779418946, "step": 203340 }, { "epoch": 0.8730240505568292, "grad_norm": 0.014889443293213844, "learning_rate": 1.2752774591895692e-05, "loss": 0.14196691513061524, "step": 203350 }, { "epoch": 0.8730669826468492, "grad_norm": 0.0060808053240180016, "learning_rate": 1.2748462871778069e-05, "loss": 0.22262256145477294, "step": 203360 }, { "epoch": 0.8731099147368693, "grad_norm": 0.009378614835441113, "learning_rate": 1.2744151151660445e-05, "loss": 0.10588387250900269, "step": 203370 }, { "epoch": 0.8731528468268892, "grad_norm": 0.8276627063751221, "learning_rate": 1.2739839431542822e-05, "loss": 0.09532456994056701, "step": 203380 }, { "epoch": 0.8731957789169092, "grad_norm": 1.4195549488067627, "learning_rate": 1.2735527711425196e-05, "loss": 0.16106001138687134, "step": 203390 }, { "epoch": 0.8732387110069293, "grad_norm": 0.006768465042114258, "learning_rate": 1.2731215991307575e-05, "loss": 0.11826251745223999, "step": 203400 }, { "epoch": 0.8732816430969492, "grad_norm": 0.013329996727406979, "learning_rate": 1.2726904271189949e-05, "loss": 0.20020360946655275, "step": 203410 }, { "epoch": 0.8733245751869693, "grad_norm": 1.7608102560043335, "learning_rate": 1.2722592551072326e-05, "loss": 0.29365689754486085, "step": 203420 }, { "epoch": 0.8733675072769893, "grad_norm": 1.9444129467010498, "learning_rate": 1.2718280830954702e-05, "loss": 0.19343478679656984, "step": 203430 }, { "epoch": 0.8734104393670092, "grad_norm": 2.3005573749542236, "learning_rate": 1.271396911083708e-05, "loss": 0.26775202751159666, "step": 203440 }, { "epoch": 0.8734533714570293, "grad_norm": 3.412327527999878, "learning_rate": 1.2709657390719453e-05, "loss": 0.24732573032379152, "step": 203450 }, { "epoch": 0.8734963035470493, "grad_norm": 0.0005757113103754818, "learning_rate": 1.2705345670601832e-05, "loss": 0.13644465208053588, "step": 203460 }, { "epoch": 0.8735392356370693, "grad_norm": 1.6123828887939453, "learning_rate": 1.2701033950484206e-05, "loss": 0.22131829261779784, "step": 203470 }, { "epoch": 0.8735821677270893, "grad_norm": 0.026981748640537262, "learning_rate": 1.2696722230366583e-05, "loss": 0.13728641271591185, "step": 203480 }, { "epoch": 0.8736250998171093, "grad_norm": 2.542603015899658, "learning_rate": 1.2692410510248959e-05, "loss": 0.24295291900634766, "step": 203490 }, { "epoch": 0.8736680319071293, "grad_norm": 0.00556629803031683, "learning_rate": 1.2688098790131336e-05, "loss": 0.2404294490814209, "step": 203500 }, { "epoch": 0.8737109639971493, "grad_norm": 0.724449098110199, "learning_rate": 1.268378707001371e-05, "loss": 0.09477731585502625, "step": 203510 }, { "epoch": 0.8737538960871694, "grad_norm": 2.029515027999878, "learning_rate": 1.267947534989609e-05, "loss": 0.27462499141693114, "step": 203520 }, { "epoch": 0.8737968281771893, "grad_norm": 2.440943479537964, "learning_rate": 1.2675163629778463e-05, "loss": 0.16564322710037233, "step": 203530 }, { "epoch": 0.8738397602672093, "grad_norm": 0.08441468328237534, "learning_rate": 1.267085190966084e-05, "loss": 0.2753757476806641, "step": 203540 }, { "epoch": 0.8738826923572294, "grad_norm": 1.7377372980117798, "learning_rate": 1.2666540189543216e-05, "loss": 0.22013463973999023, "step": 203550 }, { "epoch": 0.8739256244472493, "grad_norm": 1.6985223293304443, "learning_rate": 1.2662228469425594e-05, "loss": 0.11333926916122436, "step": 203560 }, { "epoch": 0.8739685565372693, "grad_norm": 1.478522777557373, "learning_rate": 1.265791674930797e-05, "loss": 0.3162508010864258, "step": 203570 }, { "epoch": 0.8740114886272894, "grad_norm": 0.9837937951087952, "learning_rate": 1.2653605029190347e-05, "loss": 0.18049405813217162, "step": 203580 }, { "epoch": 0.8740544207173093, "grad_norm": 0.0009803579887375236, "learning_rate": 1.264929330907272e-05, "loss": 0.10179203748703003, "step": 203590 }, { "epoch": 0.8740973528073294, "grad_norm": 2.1010031700134277, "learning_rate": 1.2644981588955098e-05, "loss": 0.197291100025177, "step": 203600 }, { "epoch": 0.8741402848973494, "grad_norm": 0.005292298272252083, "learning_rate": 1.2640669868837474e-05, "loss": 0.2430145263671875, "step": 203610 }, { "epoch": 0.8741832169873693, "grad_norm": 0.004215996712446213, "learning_rate": 1.2636358148719851e-05, "loss": 0.18761887550354003, "step": 203620 }, { "epoch": 0.8742261490773894, "grad_norm": 0.00525982491672039, "learning_rate": 1.2632046428602227e-05, "loss": 0.31954474449157716, "step": 203630 }, { "epoch": 0.8742690811674094, "grad_norm": 2.0667223930358887, "learning_rate": 1.2627734708484604e-05, "loss": 0.2860520362854004, "step": 203640 }, { "epoch": 0.8743120132574294, "grad_norm": 2.997504711151123, "learning_rate": 1.2623422988366978e-05, "loss": 0.13857897520065307, "step": 203650 }, { "epoch": 0.8743549453474494, "grad_norm": 6.365592956542969, "learning_rate": 1.2619111268249357e-05, "loss": 0.18075789213180543, "step": 203660 }, { "epoch": 0.8743978774374694, "grad_norm": 0.13455304503440857, "learning_rate": 1.2614799548131731e-05, "loss": 0.1939695358276367, "step": 203670 }, { "epoch": 0.8744408095274894, "grad_norm": 1.9599144458770752, "learning_rate": 1.2610487828014108e-05, "loss": 0.11105598211288452, "step": 203680 }, { "epoch": 0.8744837416175094, "grad_norm": 8.12281608581543, "learning_rate": 1.2606176107896484e-05, "loss": 0.1515058159828186, "step": 203690 }, { "epoch": 0.8745266737075295, "grad_norm": 0.005965727381408215, "learning_rate": 1.2601864387778861e-05, "loss": 0.2970985651016235, "step": 203700 }, { "epoch": 0.8745696057975494, "grad_norm": 2.7481706142425537, "learning_rate": 1.2597552667661239e-05, "loss": 0.22161006927490234, "step": 203710 }, { "epoch": 0.8746125378875694, "grad_norm": 0.00433462206274271, "learning_rate": 1.2593240947543614e-05, "loss": 0.2382342576980591, "step": 203720 }, { "epoch": 0.8746554699775895, "grad_norm": 0.04546516761183739, "learning_rate": 1.2588929227425992e-05, "loss": 0.3762409210205078, "step": 203730 }, { "epoch": 0.8746984020676094, "grad_norm": 1.7207748889923096, "learning_rate": 1.2584617507308366e-05, "loss": 0.2575163602828979, "step": 203740 }, { "epoch": 0.8747413341576294, "grad_norm": 1.7368102073669434, "learning_rate": 1.2580305787190743e-05, "loss": 0.05370763540267944, "step": 203750 }, { "epoch": 0.8747842662476495, "grad_norm": 0.006718280725181103, "learning_rate": 1.2575994067073119e-05, "loss": 0.15436301231384278, "step": 203760 }, { "epoch": 0.8748271983376694, "grad_norm": 0.3010559678077698, "learning_rate": 1.2571682346955496e-05, "loss": 0.1848167061805725, "step": 203770 }, { "epoch": 0.8748701304276895, "grad_norm": 1.1568249464035034, "learning_rate": 1.2567370626837872e-05, "loss": 0.24243266582489015, "step": 203780 }, { "epoch": 0.8749130625177095, "grad_norm": 0.07093426585197449, "learning_rate": 1.2563058906720249e-05, "loss": 0.11582286357879638, "step": 203790 }, { "epoch": 0.8749559946077295, "grad_norm": 0.01068392489105463, "learning_rate": 1.2558747186602623e-05, "loss": 0.29853043556213377, "step": 203800 }, { "epoch": 0.8749989266977495, "grad_norm": 0.0020451010204851627, "learning_rate": 1.2554435466485002e-05, "loss": 0.38561258316040037, "step": 203810 }, { "epoch": 0.8750418587877695, "grad_norm": 0.0017150049097836018, "learning_rate": 1.2550123746367376e-05, "loss": 0.12902814149856567, "step": 203820 }, { "epoch": 0.8750847908777896, "grad_norm": 0.10915122926235199, "learning_rate": 1.2545812026249753e-05, "loss": 0.08772753477096558, "step": 203830 }, { "epoch": 0.8751277229678095, "grad_norm": 2.126547336578369, "learning_rate": 1.2541500306132129e-05, "loss": 0.29000654220581057, "step": 203840 }, { "epoch": 0.8751706550578295, "grad_norm": 0.010257849469780922, "learning_rate": 1.2537188586014506e-05, "loss": 0.040891838073730466, "step": 203850 }, { "epoch": 0.8752135871478496, "grad_norm": 0.006925768218934536, "learning_rate": 1.253287686589688e-05, "loss": 0.18998026847839355, "step": 203860 }, { "epoch": 0.8752565192378695, "grad_norm": 1.4014134407043457, "learning_rate": 1.252856514577926e-05, "loss": 0.17409541606903076, "step": 203870 }, { "epoch": 0.8752994513278896, "grad_norm": 1.6496857404708862, "learning_rate": 1.2524253425661633e-05, "loss": 0.23374514579772948, "step": 203880 }, { "epoch": 0.8753423834179096, "grad_norm": 2.2921431064605713, "learning_rate": 1.251994170554401e-05, "loss": 0.1461879253387451, "step": 203890 }, { "epoch": 0.8753853155079295, "grad_norm": 0.2534264624118805, "learning_rate": 1.2515629985426386e-05, "loss": 0.19159697294235228, "step": 203900 }, { "epoch": 0.8754282475979496, "grad_norm": 1.1164767742156982, "learning_rate": 1.2511318265308763e-05, "loss": 0.1750656008720398, "step": 203910 }, { "epoch": 0.8754711796879696, "grad_norm": 0.02214355207979679, "learning_rate": 1.2507006545191139e-05, "loss": 0.18490495681762695, "step": 203920 }, { "epoch": 0.8755141117779895, "grad_norm": 0.09135203063488007, "learning_rate": 1.2502694825073516e-05, "loss": 0.13188610076904297, "step": 203930 }, { "epoch": 0.8755570438680096, "grad_norm": 0.01683737337589264, "learning_rate": 1.2498383104955892e-05, "loss": 0.06789767146110534, "step": 203940 }, { "epoch": 0.8755999759580296, "grad_norm": 8.750083923339844, "learning_rate": 1.2494071384838268e-05, "loss": 0.31661880016326904, "step": 203950 }, { "epoch": 0.8756429080480496, "grad_norm": 2.9150664806365967, "learning_rate": 1.2489759664720645e-05, "loss": 0.1383286952972412, "step": 203960 }, { "epoch": 0.8756858401380696, "grad_norm": 0.008152827620506287, "learning_rate": 1.248544794460302e-05, "loss": 0.14235267639160157, "step": 203970 }, { "epoch": 0.8757287722280896, "grad_norm": 0.06607159972190857, "learning_rate": 1.2481136224485396e-05, "loss": 0.26250791549682617, "step": 203980 }, { "epoch": 0.8757717043181096, "grad_norm": 0.08335408568382263, "learning_rate": 1.2476824504367774e-05, "loss": 0.42444653511047364, "step": 203990 }, { "epoch": 0.8758146364081296, "grad_norm": 0.0037135977763682604, "learning_rate": 1.247251278425015e-05, "loss": 0.0763792335987091, "step": 204000 }, { "epoch": 0.8758146364081296, "eval_loss": 0.3746587336063385, "eval_runtime": 27.3661, "eval_samples_per_second": 3.654, "eval_steps_per_second": 3.654, "step": 204000 }, { "epoch": 0.8758575684981497, "grad_norm": 0.09524694830179214, "learning_rate": 1.2468201064132525e-05, "loss": 0.019021494686603545, "step": 204010 }, { "epoch": 0.8759005005881696, "grad_norm": 0.05071650445461273, "learning_rate": 1.2463889344014902e-05, "loss": 0.4243476390838623, "step": 204020 }, { "epoch": 0.8759434326781896, "grad_norm": 1.4659315347671509, "learning_rate": 1.2459577623897278e-05, "loss": 0.1035999059677124, "step": 204030 }, { "epoch": 0.8759863647682097, "grad_norm": 1.1015068292617798, "learning_rate": 1.2455265903779655e-05, "loss": 0.06280344724655151, "step": 204040 }, { "epoch": 0.8760292968582296, "grad_norm": 7.034282207489014, "learning_rate": 1.2450954183662031e-05, "loss": 0.29902329444885256, "step": 204050 }, { "epoch": 0.8760722289482497, "grad_norm": 1.9581341743469238, "learning_rate": 1.2446642463544407e-05, "loss": 0.37990517616271974, "step": 204060 }, { "epoch": 0.8761151610382697, "grad_norm": 0.0010750473011285067, "learning_rate": 1.2442330743426784e-05, "loss": 0.22955756187438964, "step": 204070 }, { "epoch": 0.8761580931282896, "grad_norm": 0.11583001166582108, "learning_rate": 1.243801902330916e-05, "loss": 0.11538155078887939, "step": 204080 }, { "epoch": 0.8762010252183097, "grad_norm": 0.027170462533831596, "learning_rate": 1.2433707303191535e-05, "loss": 0.13683617115020752, "step": 204090 }, { "epoch": 0.8762439573083297, "grad_norm": 0.025869445875287056, "learning_rate": 1.2429395583073913e-05, "loss": 0.21306424140930175, "step": 204100 }, { "epoch": 0.8762868893983496, "grad_norm": 6.191617488861084, "learning_rate": 1.2425083862956288e-05, "loss": 0.41753354072570803, "step": 204110 }, { "epoch": 0.8763298214883697, "grad_norm": 1.990005373954773, "learning_rate": 1.2420772142838664e-05, "loss": 0.3240317106246948, "step": 204120 }, { "epoch": 0.8763727535783897, "grad_norm": 40.56575393676758, "learning_rate": 1.2416460422721041e-05, "loss": 0.28506391048431395, "step": 204130 }, { "epoch": 0.8764156856684097, "grad_norm": 2.0711939334869385, "learning_rate": 1.2412148702603417e-05, "loss": 0.29656229019165037, "step": 204140 }, { "epoch": 0.8764586177584297, "grad_norm": 2.7142903804779053, "learning_rate": 1.2407836982485793e-05, "loss": 0.22822027206420897, "step": 204150 }, { "epoch": 0.8765015498484497, "grad_norm": 0.010692157782614231, "learning_rate": 1.240352526236817e-05, "loss": 0.14877686500549317, "step": 204160 }, { "epoch": 0.8765444819384697, "grad_norm": 0.011316151358187199, "learning_rate": 1.2399213542250546e-05, "loss": 0.31983926296234133, "step": 204170 }, { "epoch": 0.8765874140284897, "grad_norm": 0.02851453237235546, "learning_rate": 1.2394901822132921e-05, "loss": 0.2908674716949463, "step": 204180 }, { "epoch": 0.8766303461185098, "grad_norm": 0.03520075976848602, "learning_rate": 1.2390590102015299e-05, "loss": 0.09001702666282654, "step": 204190 }, { "epoch": 0.8766732782085297, "grad_norm": 0.028541959822177887, "learning_rate": 1.2386278381897674e-05, "loss": 0.1605126976966858, "step": 204200 }, { "epoch": 0.8767162102985497, "grad_norm": 0.47786813974380493, "learning_rate": 1.238196666178005e-05, "loss": 0.07542160153388977, "step": 204210 }, { "epoch": 0.8767591423885698, "grad_norm": 0.0031443950720131397, "learning_rate": 1.2377654941662427e-05, "loss": 0.07932630181312561, "step": 204220 }, { "epoch": 0.8768020744785898, "grad_norm": 0.08970996737480164, "learning_rate": 1.2373343221544803e-05, "loss": 0.2293954610824585, "step": 204230 }, { "epoch": 0.8768450065686098, "grad_norm": 0.015377390198409557, "learning_rate": 1.2369031501427179e-05, "loss": 0.03166041374206543, "step": 204240 }, { "epoch": 0.8768879386586298, "grad_norm": 0.0009102340554818511, "learning_rate": 1.2364719781309556e-05, "loss": 0.17050247192382811, "step": 204250 }, { "epoch": 0.8769308707486498, "grad_norm": 2.917968511581421, "learning_rate": 1.2360408061191933e-05, "loss": 0.34798197746276854, "step": 204260 }, { "epoch": 0.8769738028386698, "grad_norm": 0.18242336809635162, "learning_rate": 1.2356096341074309e-05, "loss": 0.12703689336776733, "step": 204270 }, { "epoch": 0.8770167349286898, "grad_norm": 0.12199801206588745, "learning_rate": 1.2351784620956686e-05, "loss": 0.17488703727722169, "step": 204280 }, { "epoch": 0.8770596670187099, "grad_norm": 0.022362831979990005, "learning_rate": 1.2347472900839062e-05, "loss": 0.2593738079071045, "step": 204290 }, { "epoch": 0.8771025991087298, "grad_norm": 0.08351355046033859, "learning_rate": 1.2343161180721437e-05, "loss": 0.08270981311798095, "step": 204300 }, { "epoch": 0.8771455311987498, "grad_norm": 1.2442940473556519, "learning_rate": 1.2338849460603815e-05, "loss": 0.18074887990951538, "step": 204310 }, { "epoch": 0.8771884632887699, "grad_norm": 1.6640584468841553, "learning_rate": 1.233453774048619e-05, "loss": 0.30208401679992675, "step": 204320 }, { "epoch": 0.8772313953787898, "grad_norm": 0.015673568472266197, "learning_rate": 1.2330226020368566e-05, "loss": 0.10918002128601074, "step": 204330 }, { "epoch": 0.8772743274688098, "grad_norm": 0.13469268381595612, "learning_rate": 1.2325914300250943e-05, "loss": 0.0036360248923301697, "step": 204340 }, { "epoch": 0.8773172595588299, "grad_norm": 7.4689764976501465, "learning_rate": 1.2321602580133319e-05, "loss": 0.22000887393951415, "step": 204350 }, { "epoch": 0.8773601916488498, "grad_norm": 0.24244554340839386, "learning_rate": 1.2317290860015695e-05, "loss": 0.09282507896423339, "step": 204360 }, { "epoch": 0.8774031237388699, "grad_norm": 0.7553274631500244, "learning_rate": 1.2312979139898072e-05, "loss": 0.3855604648590088, "step": 204370 }, { "epoch": 0.8774460558288899, "grad_norm": 0.07456060498952866, "learning_rate": 1.2308667419780448e-05, "loss": 0.14472362995147706, "step": 204380 }, { "epoch": 0.8774889879189098, "grad_norm": 1.7638195753097534, "learning_rate": 1.2304355699662825e-05, "loss": 0.09877208471298218, "step": 204390 }, { "epoch": 0.8775319200089299, "grad_norm": 0.023816445842385292, "learning_rate": 1.23000439795452e-05, "loss": 0.14562071561813356, "step": 204400 }, { "epoch": 0.8775748520989499, "grad_norm": 0.5713729858398438, "learning_rate": 1.2295732259427576e-05, "loss": 0.14468239545822142, "step": 204410 }, { "epoch": 0.8776177841889699, "grad_norm": 0.041681379079818726, "learning_rate": 1.2291420539309954e-05, "loss": 0.12000983953475952, "step": 204420 }, { "epoch": 0.8776607162789899, "grad_norm": 0.02087230794131756, "learning_rate": 1.228710881919233e-05, "loss": 0.410722541809082, "step": 204430 }, { "epoch": 0.8777036483690099, "grad_norm": 0.0039020644035190344, "learning_rate": 1.2282797099074705e-05, "loss": 0.027035737037658693, "step": 204440 }, { "epoch": 0.8777465804590299, "grad_norm": 0.07208481431007385, "learning_rate": 1.2278485378957082e-05, "loss": 0.1252536416053772, "step": 204450 }, { "epoch": 0.8777895125490499, "grad_norm": 0.02558734640479088, "learning_rate": 1.2274173658839458e-05, "loss": 0.14597811698913574, "step": 204460 }, { "epoch": 0.87783244463907, "grad_norm": 0.006909816525876522, "learning_rate": 1.2269861938721834e-05, "loss": 0.2177650213241577, "step": 204470 }, { "epoch": 0.8778753767290899, "grad_norm": 0.0023716725409030914, "learning_rate": 1.2265550218604211e-05, "loss": 0.16003108024597168, "step": 204480 }, { "epoch": 0.8779183088191099, "grad_norm": 0.0030329038854688406, "learning_rate": 1.2261238498486587e-05, "loss": 0.06489881873130798, "step": 204490 }, { "epoch": 0.87796124090913, "grad_norm": 1.8486450910568237, "learning_rate": 1.2256926778368962e-05, "loss": 0.19034510850906372, "step": 204500 }, { "epoch": 0.8780041729991499, "grad_norm": 0.08171185106039047, "learning_rate": 1.225261505825134e-05, "loss": 0.148298442363739, "step": 204510 }, { "epoch": 0.8780471050891699, "grad_norm": 0.00731939310207963, "learning_rate": 1.2248303338133715e-05, "loss": 0.12092814445495606, "step": 204520 }, { "epoch": 0.87809003717919, "grad_norm": 2.0191352367401123, "learning_rate": 1.2243991618016091e-05, "loss": 0.23588323593139648, "step": 204530 }, { "epoch": 0.8781329692692099, "grad_norm": 0.01967192441225052, "learning_rate": 1.2239679897898468e-05, "loss": 0.2921638488769531, "step": 204540 }, { "epoch": 0.87817590135923, "grad_norm": 2.9450790882110596, "learning_rate": 1.2235368177780844e-05, "loss": 0.18513665199279786, "step": 204550 }, { "epoch": 0.87821883344925, "grad_norm": 0.6629845499992371, "learning_rate": 1.223105645766322e-05, "loss": 0.26061363220214845, "step": 204560 }, { "epoch": 0.8782617655392699, "grad_norm": 3.5497055053710938, "learning_rate": 1.2226744737545597e-05, "loss": 0.27632803916931153, "step": 204570 }, { "epoch": 0.87830469762929, "grad_norm": 0.026790611445903778, "learning_rate": 1.2222433017427973e-05, "loss": 0.2509056329727173, "step": 204580 }, { "epoch": 0.87834762971931, "grad_norm": 0.017180632799863815, "learning_rate": 1.2218121297310348e-05, "loss": 0.15285614728927613, "step": 204590 }, { "epoch": 0.87839056180933, "grad_norm": 0.002091821050271392, "learning_rate": 1.2213809577192726e-05, "loss": 0.3005859136581421, "step": 204600 }, { "epoch": 0.87843349389935, "grad_norm": 0.010604831390082836, "learning_rate": 1.2209497857075101e-05, "loss": 0.16785238981246947, "step": 204610 }, { "epoch": 0.87847642598937, "grad_norm": 7.6037468910217285, "learning_rate": 1.2205186136957479e-05, "loss": 0.281678295135498, "step": 204620 }, { "epoch": 0.87851935807939, "grad_norm": 0.0036886732559651136, "learning_rate": 1.2200874416839856e-05, "loss": 0.0634632170200348, "step": 204630 }, { "epoch": 0.87856229016941, "grad_norm": 0.044636115431785583, "learning_rate": 1.2196562696722232e-05, "loss": 0.26335391998291013, "step": 204640 }, { "epoch": 0.87860522225943, "grad_norm": 0.009788943454623222, "learning_rate": 1.2192250976604607e-05, "loss": 0.20432703495025634, "step": 204650 }, { "epoch": 0.8786481543494501, "grad_norm": 2.5842597484588623, "learning_rate": 1.2187939256486985e-05, "loss": 0.2513580322265625, "step": 204660 }, { "epoch": 0.87869108643947, "grad_norm": 4.397493839263916, "learning_rate": 1.218362753636936e-05, "loss": 0.24058179855346679, "step": 204670 }, { "epoch": 0.8787340185294901, "grad_norm": 0.0031634909100830555, "learning_rate": 1.2179315816251736e-05, "loss": 0.09127176403999329, "step": 204680 }, { "epoch": 0.8787769506195101, "grad_norm": 0.06499703228473663, "learning_rate": 1.2175004096134113e-05, "loss": 0.10204474925994873, "step": 204690 }, { "epoch": 0.87881988270953, "grad_norm": 0.9429930448532104, "learning_rate": 1.2170692376016489e-05, "loss": 0.17954732179641725, "step": 204700 }, { "epoch": 0.8788628147995501, "grad_norm": 0.002309724921360612, "learning_rate": 1.2166380655898864e-05, "loss": 0.337858247756958, "step": 204710 }, { "epoch": 0.8789057468895701, "grad_norm": 0.00705463532358408, "learning_rate": 1.2162068935781242e-05, "loss": 0.30138473510742186, "step": 204720 }, { "epoch": 0.8789486789795901, "grad_norm": 0.021212739869952202, "learning_rate": 1.2157757215663617e-05, "loss": 0.16993517875671388, "step": 204730 }, { "epoch": 0.8789916110696101, "grad_norm": 0.004442409612238407, "learning_rate": 1.2153445495545993e-05, "loss": 0.30782337188720704, "step": 204740 }, { "epoch": 0.8790345431596301, "grad_norm": 0.022244207561016083, "learning_rate": 1.214913377542837e-05, "loss": 0.20544323921203614, "step": 204750 }, { "epoch": 0.8790774752496501, "grad_norm": 4.958491325378418, "learning_rate": 1.2144822055310746e-05, "loss": 0.09967964887619019, "step": 204760 }, { "epoch": 0.8791204073396701, "grad_norm": 0.07501170039176941, "learning_rate": 1.2140510335193123e-05, "loss": 0.16576032638549804, "step": 204770 }, { "epoch": 0.8791633394296902, "grad_norm": 0.012495110742747784, "learning_rate": 1.2136198615075499e-05, "loss": 0.006057353690266609, "step": 204780 }, { "epoch": 0.8792062715197101, "grad_norm": 0.05930882692337036, "learning_rate": 1.2131886894957875e-05, "loss": 0.1349782109260559, "step": 204790 }, { "epoch": 0.8792492036097301, "grad_norm": 0.00040715167415328324, "learning_rate": 1.2127575174840252e-05, "loss": 0.09597882628440857, "step": 204800 }, { "epoch": 0.8792921356997502, "grad_norm": 0.31938889622688293, "learning_rate": 1.2123263454722628e-05, "loss": 0.11955327987670898, "step": 204810 }, { "epoch": 0.8793350677897701, "grad_norm": 0.5764264464378357, "learning_rate": 1.2118951734605003e-05, "loss": 0.13746170997619628, "step": 204820 }, { "epoch": 0.8793779998797902, "grad_norm": 0.010667004622519016, "learning_rate": 1.211464001448738e-05, "loss": 0.05581216812133789, "step": 204830 }, { "epoch": 0.8794209319698102, "grad_norm": 0.3324984610080719, "learning_rate": 1.2110328294369756e-05, "loss": 0.16010618209838867, "step": 204840 }, { "epoch": 0.8794638640598301, "grad_norm": 0.04684317484498024, "learning_rate": 1.2106016574252132e-05, "loss": 0.18404499292373658, "step": 204850 }, { "epoch": 0.8795067961498502, "grad_norm": 0.135576069355011, "learning_rate": 1.210170485413451e-05, "loss": 0.22846548557281493, "step": 204860 }, { "epoch": 0.8795497282398702, "grad_norm": 0.21464096009731293, "learning_rate": 1.2097393134016885e-05, "loss": 0.09315667152404786, "step": 204870 }, { "epoch": 0.8795926603298901, "grad_norm": 1.5333083868026733, "learning_rate": 1.209308141389926e-05, "loss": 0.18947761058807372, "step": 204880 }, { "epoch": 0.8796355924199102, "grad_norm": 0.037847794592380524, "learning_rate": 1.2088769693781638e-05, "loss": 0.10737059116363526, "step": 204890 }, { "epoch": 0.8796785245099302, "grad_norm": 0.03366300091147423, "learning_rate": 1.2084457973664014e-05, "loss": 0.08891225457191468, "step": 204900 }, { "epoch": 0.8797214565999502, "grad_norm": 0.09716782718896866, "learning_rate": 1.208014625354639e-05, "loss": 0.11172068119049072, "step": 204910 }, { "epoch": 0.8797643886899702, "grad_norm": 6.475722312927246, "learning_rate": 1.2075834533428767e-05, "loss": 0.27301716804504395, "step": 204920 }, { "epoch": 0.8798073207799902, "grad_norm": 1.4842710494995117, "learning_rate": 1.2071522813311142e-05, "loss": 0.14906420707702636, "step": 204930 }, { "epoch": 0.8798502528700102, "grad_norm": 0.01469878014177084, "learning_rate": 1.2067211093193518e-05, "loss": 0.16778630018234253, "step": 204940 }, { "epoch": 0.8798931849600302, "grad_norm": 0.05601226165890694, "learning_rate": 1.2062899373075895e-05, "loss": 0.30366196632385256, "step": 204950 }, { "epoch": 0.8799361170500503, "grad_norm": 0.02477823570370674, "learning_rate": 1.2058587652958271e-05, "loss": 0.05100439190864563, "step": 204960 }, { "epoch": 0.8799790491400702, "grad_norm": 0.0359761118888855, "learning_rate": 1.2054275932840647e-05, "loss": 0.2062239408493042, "step": 204970 }, { "epoch": 0.8800219812300902, "grad_norm": 5.375646591186523, "learning_rate": 1.2049964212723024e-05, "loss": 0.17303190231323243, "step": 204980 }, { "epoch": 0.8800649133201103, "grad_norm": 0.01700400933623314, "learning_rate": 1.2045652492605401e-05, "loss": 0.2812765836715698, "step": 204990 }, { "epoch": 0.8801078454101302, "grad_norm": 0.07763543725013733, "learning_rate": 1.2041340772487777e-05, "loss": 0.20389330387115479, "step": 205000 }, { "epoch": 0.8801078454101302, "eval_loss": 0.37906956672668457, "eval_runtime": 27.4916, "eval_samples_per_second": 3.637, "eval_steps_per_second": 3.637, "step": 205000 }, { "epoch": 0.8801507775001502, "grad_norm": 0.5729861855506897, "learning_rate": 1.2037029052370154e-05, "loss": 0.2167433738708496, "step": 205010 }, { "epoch": 0.8801937095901703, "grad_norm": 0.0022135234903544188, "learning_rate": 1.203271733225253e-05, "loss": 0.1361485242843628, "step": 205020 }, { "epoch": 0.8802366416801902, "grad_norm": 3.3651068210601807, "learning_rate": 1.2028405612134906e-05, "loss": 0.25268898010253904, "step": 205030 }, { "epoch": 0.8802795737702103, "grad_norm": 2.356663465499878, "learning_rate": 1.2024093892017283e-05, "loss": 0.22077784538269044, "step": 205040 }, { "epoch": 0.8803225058602303, "grad_norm": 0.0038632149808108807, "learning_rate": 1.2019782171899659e-05, "loss": 0.3469837665557861, "step": 205050 }, { "epoch": 0.8803654379502502, "grad_norm": 0.06996764987707138, "learning_rate": 1.2015470451782034e-05, "loss": 0.31807005405426025, "step": 205060 }, { "epoch": 0.8804083700402703, "grad_norm": 0.45123931765556335, "learning_rate": 1.2011158731664412e-05, "loss": 0.07297312021255493, "step": 205070 }, { "epoch": 0.8804513021302903, "grad_norm": 0.17306841909885406, "learning_rate": 1.2006847011546787e-05, "loss": 0.18359951972961425, "step": 205080 }, { "epoch": 0.8804942342203104, "grad_norm": 0.001680709421634674, "learning_rate": 1.2002535291429163e-05, "loss": 0.08356087803840637, "step": 205090 }, { "epoch": 0.8805371663103303, "grad_norm": 1.4745081663131714, "learning_rate": 1.199822357131154e-05, "loss": 0.30778489112854, "step": 205100 }, { "epoch": 0.8805800984003503, "grad_norm": 0.49757787585258484, "learning_rate": 1.1993911851193916e-05, "loss": 0.19059680700302123, "step": 205110 }, { "epoch": 0.8806230304903704, "grad_norm": 0.7082939147949219, "learning_rate": 1.1989600131076292e-05, "loss": 0.26523666381835936, "step": 205120 }, { "epoch": 0.8806659625803903, "grad_norm": 3.5472426414489746, "learning_rate": 1.1985288410958669e-05, "loss": 0.12654772996902466, "step": 205130 }, { "epoch": 0.8807088946704104, "grad_norm": 0.035611625760793686, "learning_rate": 1.1980976690841045e-05, "loss": 0.1481760025024414, "step": 205140 }, { "epoch": 0.8807518267604304, "grad_norm": 0.024171432480216026, "learning_rate": 1.1976664970723422e-05, "loss": 0.11591238975524902, "step": 205150 }, { "epoch": 0.8807947588504503, "grad_norm": 5.451761245727539, "learning_rate": 1.1972353250605797e-05, "loss": 0.23077406883239746, "step": 205160 }, { "epoch": 0.8808376909404704, "grad_norm": 0.3710680305957794, "learning_rate": 1.1968041530488173e-05, "loss": 0.21281032562255858, "step": 205170 }, { "epoch": 0.8808806230304904, "grad_norm": 0.9765964150428772, "learning_rate": 1.196372981037055e-05, "loss": 0.1998102068901062, "step": 205180 }, { "epoch": 0.8809235551205103, "grad_norm": 0.42906275391578674, "learning_rate": 1.1959418090252926e-05, "loss": 0.16494978666305543, "step": 205190 }, { "epoch": 0.8809664872105304, "grad_norm": 0.009923688136041164, "learning_rate": 1.1955106370135302e-05, "loss": 0.1205499529838562, "step": 205200 }, { "epoch": 0.8810094193005504, "grad_norm": 0.004118237178772688, "learning_rate": 1.1950794650017679e-05, "loss": 0.11395120620727539, "step": 205210 }, { "epoch": 0.8810523513905704, "grad_norm": 0.128899484872818, "learning_rate": 1.1946482929900055e-05, "loss": 0.2525834083557129, "step": 205220 }, { "epoch": 0.8810952834805904, "grad_norm": 0.007528554182499647, "learning_rate": 1.194217120978243e-05, "loss": 0.2052382230758667, "step": 205230 }, { "epoch": 0.8811382155706105, "grad_norm": 0.9687526822090149, "learning_rate": 1.1937859489664808e-05, "loss": 0.4134235382080078, "step": 205240 }, { "epoch": 0.8811811476606304, "grad_norm": 0.0006184322410263121, "learning_rate": 1.1933547769547183e-05, "loss": 0.42797160148620605, "step": 205250 }, { "epoch": 0.8812240797506504, "grad_norm": 1.1402137279510498, "learning_rate": 1.1929236049429559e-05, "loss": 0.4267298698425293, "step": 205260 }, { "epoch": 0.8812670118406705, "grad_norm": 0.17912384867668152, "learning_rate": 1.1924924329311936e-05, "loss": 0.09835328459739685, "step": 205270 }, { "epoch": 0.8813099439306904, "grad_norm": 4.9136061668396, "learning_rate": 1.1920612609194312e-05, "loss": 0.0916410207748413, "step": 205280 }, { "epoch": 0.8813528760207104, "grad_norm": 0.00036412899498827755, "learning_rate": 1.1916300889076688e-05, "loss": 0.29571709632873533, "step": 205290 }, { "epoch": 0.8813958081107305, "grad_norm": 0.12275026738643646, "learning_rate": 1.1911989168959065e-05, "loss": 0.13231356143951417, "step": 205300 }, { "epoch": 0.8814387402007504, "grad_norm": 0.013888251036405563, "learning_rate": 1.190767744884144e-05, "loss": 0.0009010875597596169, "step": 205310 }, { "epoch": 0.8814816722907705, "grad_norm": 0.5148999691009521, "learning_rate": 1.1903365728723816e-05, "loss": 0.33445086479187014, "step": 205320 }, { "epoch": 0.8815246043807905, "grad_norm": 7.188920974731445, "learning_rate": 1.1899054008606194e-05, "loss": 0.5070527076721192, "step": 205330 }, { "epoch": 0.8815675364708104, "grad_norm": 0.6675366163253784, "learning_rate": 1.189474228848857e-05, "loss": 0.24539120197296144, "step": 205340 }, { "epoch": 0.8816104685608305, "grad_norm": 1.1379445791244507, "learning_rate": 1.1890430568370947e-05, "loss": 0.20651309490203856, "step": 205350 }, { "epoch": 0.8816534006508505, "grad_norm": 0.5539625883102417, "learning_rate": 1.1886118848253324e-05, "loss": 0.07830089330673218, "step": 205360 }, { "epoch": 0.8816963327408704, "grad_norm": 1.806531310081482, "learning_rate": 1.18818071281357e-05, "loss": 0.2538323879241943, "step": 205370 }, { "epoch": 0.8817392648308905, "grad_norm": 0.011052205227315426, "learning_rate": 1.1877495408018075e-05, "loss": 0.07202536463737488, "step": 205380 }, { "epoch": 0.8817821969209105, "grad_norm": 0.9535105228424072, "learning_rate": 1.1873183687900453e-05, "loss": 0.11274955272674561, "step": 205390 }, { "epoch": 0.8818251290109305, "grad_norm": 1.3573006391525269, "learning_rate": 1.1868871967782828e-05, "loss": 0.12120436429977417, "step": 205400 }, { "epoch": 0.8818680611009505, "grad_norm": 0.2514362037181854, "learning_rate": 1.1864560247665204e-05, "loss": 0.20958714485168456, "step": 205410 }, { "epoch": 0.8819109931909705, "grad_norm": 0.6128543019294739, "learning_rate": 1.1860248527547581e-05, "loss": 0.21837210655212402, "step": 205420 }, { "epoch": 0.8819539252809905, "grad_norm": 0.18143728375434875, "learning_rate": 1.1855936807429957e-05, "loss": 0.052649658918380735, "step": 205430 }, { "epoch": 0.8819968573710105, "grad_norm": 5.726690769195557, "learning_rate": 1.1851625087312333e-05, "loss": 0.14876792430877686, "step": 205440 }, { "epoch": 0.8820397894610306, "grad_norm": 1.3325129747390747, "learning_rate": 1.184731336719471e-05, "loss": 0.20912702083587648, "step": 205450 }, { "epoch": 0.8820827215510505, "grad_norm": 0.056446004658937454, "learning_rate": 1.1843001647077086e-05, "loss": 0.10956629514694213, "step": 205460 }, { "epoch": 0.8821256536410705, "grad_norm": 14.242410659790039, "learning_rate": 1.1838689926959461e-05, "loss": 0.1361212968826294, "step": 205470 }, { "epoch": 0.8821685857310906, "grad_norm": 0.010048151947557926, "learning_rate": 1.1834378206841839e-05, "loss": 0.2654492616653442, "step": 205480 }, { "epoch": 0.8822115178211105, "grad_norm": 0.0009726140415295959, "learning_rate": 1.1830066486724214e-05, "loss": 0.32817275524139405, "step": 205490 }, { "epoch": 0.8822544499111306, "grad_norm": 7.682567596435547, "learning_rate": 1.182575476660659e-05, "loss": 0.18321444988250732, "step": 205500 }, { "epoch": 0.8822973820011506, "grad_norm": 2.296447992324829, "learning_rate": 1.1821443046488967e-05, "loss": 0.14441272020339965, "step": 205510 }, { "epoch": 0.8823403140911706, "grad_norm": 1.1311460733413696, "learning_rate": 1.1817131326371343e-05, "loss": 0.46404194831848145, "step": 205520 }, { "epoch": 0.8823832461811906, "grad_norm": 0.00633773161098361, "learning_rate": 1.181281960625372e-05, "loss": 0.1516602635383606, "step": 205530 }, { "epoch": 0.8824261782712106, "grad_norm": 0.0009929277002811432, "learning_rate": 1.1808507886136096e-05, "loss": 0.1654080033302307, "step": 205540 }, { "epoch": 0.8824691103612307, "grad_norm": 0.9732330441474915, "learning_rate": 1.1804196166018472e-05, "loss": 0.38271920680999755, "step": 205550 }, { "epoch": 0.8825120424512506, "grad_norm": 0.018041890114545822, "learning_rate": 1.1799884445900849e-05, "loss": 0.1938990592956543, "step": 205560 }, { "epoch": 0.8825549745412706, "grad_norm": 0.025509031489491463, "learning_rate": 1.1795572725783225e-05, "loss": 0.11931205987930298, "step": 205570 }, { "epoch": 0.8825979066312907, "grad_norm": 0.07069827616214752, "learning_rate": 1.17912610056656e-05, "loss": 0.26412503719329833, "step": 205580 }, { "epoch": 0.8826408387213106, "grad_norm": 0.0899178609251976, "learning_rate": 1.1786949285547978e-05, "loss": 0.27751870155334474, "step": 205590 }, { "epoch": 0.8826837708113306, "grad_norm": 0.0004098423523828387, "learning_rate": 1.1782637565430353e-05, "loss": 0.12910977602005005, "step": 205600 }, { "epoch": 0.8827267029013507, "grad_norm": 0.006932375021278858, "learning_rate": 1.1778325845312729e-05, "loss": 0.1931472659111023, "step": 205610 }, { "epoch": 0.8827696349913706, "grad_norm": 7.9753851890563965, "learning_rate": 1.1774014125195106e-05, "loss": 0.33435921669006347, "step": 205620 }, { "epoch": 0.8828125670813907, "grad_norm": 2.1303913593292236, "learning_rate": 1.1769702405077482e-05, "loss": 0.3566124439239502, "step": 205630 }, { "epoch": 0.8828554991714107, "grad_norm": 0.0010813261615112424, "learning_rate": 1.1765390684959857e-05, "loss": 0.19597339630126953, "step": 205640 }, { "epoch": 0.8828984312614306, "grad_norm": 0.012582486495375633, "learning_rate": 1.1761078964842235e-05, "loss": 0.22391045093536377, "step": 205650 }, { "epoch": 0.8829413633514507, "grad_norm": 0.01926124095916748, "learning_rate": 1.175676724472461e-05, "loss": 0.1478889226913452, "step": 205660 }, { "epoch": 0.8829842954414707, "grad_norm": 0.0006865571485832334, "learning_rate": 1.1752455524606986e-05, "loss": 0.2585289001464844, "step": 205670 }, { "epoch": 0.8830272275314907, "grad_norm": 0.01835837960243225, "learning_rate": 1.1748143804489363e-05, "loss": 0.03371688425540924, "step": 205680 }, { "epoch": 0.8830701596215107, "grad_norm": 3.0662288665771484, "learning_rate": 1.1743832084371739e-05, "loss": 0.34697909355163575, "step": 205690 }, { "epoch": 0.8831130917115307, "grad_norm": 0.016853464767336845, "learning_rate": 1.1739520364254115e-05, "loss": 0.19272353649139404, "step": 205700 }, { "epoch": 0.8831560238015507, "grad_norm": 0.43144023418426514, "learning_rate": 1.1735208644136492e-05, "loss": 0.1649027109146118, "step": 205710 }, { "epoch": 0.8831989558915707, "grad_norm": 0.008879882283508778, "learning_rate": 1.173089692401887e-05, "loss": 0.20535609722137452, "step": 205720 }, { "epoch": 0.8832418879815908, "grad_norm": 0.6299045085906982, "learning_rate": 1.1726585203901245e-05, "loss": 0.23428215980529785, "step": 205730 }, { "epoch": 0.8832848200716107, "grad_norm": 0.10132008045911789, "learning_rate": 1.1722273483783622e-05, "loss": 0.3938116550445557, "step": 205740 }, { "epoch": 0.8833277521616307, "grad_norm": 5.267863750457764, "learning_rate": 1.1717961763665998e-05, "loss": 0.25835647583007815, "step": 205750 }, { "epoch": 0.8833706842516508, "grad_norm": 1.3534311056137085, "learning_rate": 1.1713650043548374e-05, "loss": 0.26612701416015627, "step": 205760 }, { "epoch": 0.8834136163416707, "grad_norm": 4.357753753662109, "learning_rate": 1.1709338323430751e-05, "loss": 0.26286730766296384, "step": 205770 }, { "epoch": 0.8834565484316907, "grad_norm": 1.7099212408065796, "learning_rate": 1.1705026603313127e-05, "loss": 0.39784243106842043, "step": 205780 }, { "epoch": 0.8834994805217108, "grad_norm": 0.8876595497131348, "learning_rate": 1.1700714883195502e-05, "loss": 0.2518022060394287, "step": 205790 }, { "epoch": 0.8835424126117307, "grad_norm": 0.0008838191861286759, "learning_rate": 1.169640316307788e-05, "loss": 0.11853432655334473, "step": 205800 }, { "epoch": 0.8835853447017508, "grad_norm": 2.1805479526519775, "learning_rate": 1.1692091442960255e-05, "loss": 0.19648573398590088, "step": 205810 }, { "epoch": 0.8836282767917708, "grad_norm": 0.008458657190203667, "learning_rate": 1.1687779722842631e-05, "loss": 0.14152785539627075, "step": 205820 }, { "epoch": 0.8836712088817907, "grad_norm": 8.96318244934082, "learning_rate": 1.1683468002725008e-05, "loss": 0.1602509140968323, "step": 205830 }, { "epoch": 0.8837141409718108, "grad_norm": 0.2068006992340088, "learning_rate": 1.1679156282607384e-05, "loss": 0.3406901597976685, "step": 205840 }, { "epoch": 0.8837570730618308, "grad_norm": 0.8047705292701721, "learning_rate": 1.167484456248976e-05, "loss": 0.11222801208496094, "step": 205850 }, { "epoch": 0.8838000051518508, "grad_norm": 0.01122902799397707, "learning_rate": 1.1670532842372137e-05, "loss": 0.11741714477539063, "step": 205860 }, { "epoch": 0.8838429372418708, "grad_norm": 0.18358559906482697, "learning_rate": 1.1666221122254513e-05, "loss": 0.08803790211677551, "step": 205870 }, { "epoch": 0.8838858693318908, "grad_norm": 0.005441271234303713, "learning_rate": 1.1661909402136888e-05, "loss": 0.1691023588180542, "step": 205880 }, { "epoch": 0.8839288014219108, "grad_norm": 0.003943410702049732, "learning_rate": 1.1657597682019266e-05, "loss": 0.1766132116317749, "step": 205890 }, { "epoch": 0.8839717335119308, "grad_norm": 0.026890434324741364, "learning_rate": 1.1653285961901641e-05, "loss": 0.20017850399017334, "step": 205900 }, { "epoch": 0.8840146656019509, "grad_norm": 0.0028352816589176655, "learning_rate": 1.1648974241784019e-05, "loss": 0.10084348917007446, "step": 205910 }, { "epoch": 0.8840575976919708, "grad_norm": 1.1735332012176514, "learning_rate": 1.1644662521666394e-05, "loss": 0.43771066665649416, "step": 205920 }, { "epoch": 0.8841005297819908, "grad_norm": 0.0006905131740495563, "learning_rate": 1.164035080154877e-05, "loss": 0.2366577386856079, "step": 205930 }, { "epoch": 0.8841434618720109, "grad_norm": 3.063614845275879, "learning_rate": 1.1636039081431147e-05, "loss": 0.11916078329086303, "step": 205940 }, { "epoch": 0.8841863939620309, "grad_norm": 1.343090295791626, "learning_rate": 1.1631727361313523e-05, "loss": 0.2360858917236328, "step": 205950 }, { "epoch": 0.8842293260520508, "grad_norm": 0.03908395394682884, "learning_rate": 1.1627415641195899e-05, "loss": 0.06640787124633789, "step": 205960 }, { "epoch": 0.8842722581420709, "grad_norm": 1.5124359130859375, "learning_rate": 1.1623103921078276e-05, "loss": 0.3608893394470215, "step": 205970 }, { "epoch": 0.8843151902320909, "grad_norm": 0.008707244880497456, "learning_rate": 1.1618792200960652e-05, "loss": 0.18862802982330323, "step": 205980 }, { "epoch": 0.8843581223221109, "grad_norm": 0.005090769380331039, "learning_rate": 1.1614480480843027e-05, "loss": 0.22757859230041505, "step": 205990 }, { "epoch": 0.8844010544121309, "grad_norm": 0.019245756790041924, "learning_rate": 1.1610168760725405e-05, "loss": 0.12244853973388672, "step": 206000 }, { "epoch": 0.8844010544121309, "eval_loss": 0.37726035714149475, "eval_runtime": 27.513, "eval_samples_per_second": 3.635, "eval_steps_per_second": 3.635, "step": 206000 }, { "epoch": 0.884443986502151, "grad_norm": 0.016391757875680923, "learning_rate": 1.160585704060778e-05, "loss": 0.33532443046569826, "step": 206010 }, { "epoch": 0.8844869185921709, "grad_norm": 0.0012011586222797632, "learning_rate": 1.1601545320490156e-05, "loss": 0.0573084831237793, "step": 206020 }, { "epoch": 0.8845298506821909, "grad_norm": 0.030159875750541687, "learning_rate": 1.1597233600372533e-05, "loss": 0.12054593563079834, "step": 206030 }, { "epoch": 0.884572782772211, "grad_norm": 0.03495888411998749, "learning_rate": 1.1592921880254909e-05, "loss": 0.13690305948257447, "step": 206040 }, { "epoch": 0.8846157148622309, "grad_norm": 0.02454495243728161, "learning_rate": 1.1588610160137284e-05, "loss": 0.023973910510540007, "step": 206050 }, { "epoch": 0.8846586469522509, "grad_norm": 0.003935964312404394, "learning_rate": 1.1584298440019662e-05, "loss": 0.31903369426727296, "step": 206060 }, { "epoch": 0.884701579042271, "grad_norm": 0.007954302243888378, "learning_rate": 1.1579986719902037e-05, "loss": 0.10175185203552246, "step": 206070 }, { "epoch": 0.8847445111322909, "grad_norm": 1.4729177951812744, "learning_rate": 1.1575674999784415e-05, "loss": 0.2837111711502075, "step": 206080 }, { "epoch": 0.884787443222311, "grad_norm": 0.010839829221367836, "learning_rate": 1.1571363279666792e-05, "loss": 0.10710896253585815, "step": 206090 }, { "epoch": 0.884830375312331, "grad_norm": 0.6962838172912598, "learning_rate": 1.1567051559549168e-05, "loss": 0.41863112449645995, "step": 206100 }, { "epoch": 0.8848733074023509, "grad_norm": 4.106805324554443, "learning_rate": 1.1562739839431543e-05, "loss": 0.16724135875701904, "step": 206110 }, { "epoch": 0.884916239492371, "grad_norm": 0.00478664506226778, "learning_rate": 1.155842811931392e-05, "loss": 0.199246084690094, "step": 206120 }, { "epoch": 0.884959171582391, "grad_norm": 0.021048052236437798, "learning_rate": 1.1554116399196296e-05, "loss": 0.14670164585113527, "step": 206130 }, { "epoch": 0.885002103672411, "grad_norm": 1.4622784852981567, "learning_rate": 1.1549804679078672e-05, "loss": 0.17531541585922242, "step": 206140 }, { "epoch": 0.885045035762431, "grad_norm": 2.0894620418548584, "learning_rate": 1.154549295896105e-05, "loss": 0.2334801435470581, "step": 206150 }, { "epoch": 0.885087967852451, "grad_norm": 0.1139649897813797, "learning_rate": 1.1541181238843425e-05, "loss": 0.15220773220062256, "step": 206160 }, { "epoch": 0.885130899942471, "grad_norm": 0.0686078742146492, "learning_rate": 1.15368695187258e-05, "loss": 0.08027942180633545, "step": 206170 }, { "epoch": 0.885173832032491, "grad_norm": 1.212646722793579, "learning_rate": 1.1532557798608178e-05, "loss": 0.3642886638641357, "step": 206180 }, { "epoch": 0.885216764122511, "grad_norm": 0.38929513096809387, "learning_rate": 1.1528246078490554e-05, "loss": 0.1459059476852417, "step": 206190 }, { "epoch": 0.885259696212531, "grad_norm": 0.5141419768333435, "learning_rate": 1.152393435837293e-05, "loss": 0.21101291179656984, "step": 206200 }, { "epoch": 0.885302628302551, "grad_norm": 0.023184938356280327, "learning_rate": 1.1519622638255307e-05, "loss": 0.12943390607833863, "step": 206210 }, { "epoch": 0.8853455603925711, "grad_norm": 0.010622471570968628, "learning_rate": 1.1515310918137682e-05, "loss": 0.1018107533454895, "step": 206220 }, { "epoch": 0.885388492482591, "grad_norm": 0.05979568138718605, "learning_rate": 1.1510999198020058e-05, "loss": 0.017998765408992767, "step": 206230 }, { "epoch": 0.885431424572611, "grad_norm": 0.03342049568891525, "learning_rate": 1.1506687477902435e-05, "loss": 0.32997517585754393, "step": 206240 }, { "epoch": 0.8854743566626311, "grad_norm": 1.3702374696731567, "learning_rate": 1.1502375757784811e-05, "loss": 0.33046650886535645, "step": 206250 }, { "epoch": 0.885517288752651, "grad_norm": 1.2958252429962158, "learning_rate": 1.1498064037667187e-05, "loss": 0.2511059522628784, "step": 206260 }, { "epoch": 0.885560220842671, "grad_norm": 0.011626111343502998, "learning_rate": 1.1493752317549564e-05, "loss": 0.14074764251708985, "step": 206270 }, { "epoch": 0.8856031529326911, "grad_norm": 0.7215688228607178, "learning_rate": 1.148944059743194e-05, "loss": 0.11617779731750488, "step": 206280 }, { "epoch": 0.885646085022711, "grad_norm": 3.217930316925049, "learning_rate": 1.1485128877314317e-05, "loss": 0.07321255207061768, "step": 206290 }, { "epoch": 0.8856890171127311, "grad_norm": 0.014127644710242748, "learning_rate": 1.1480817157196693e-05, "loss": 0.002158048748970032, "step": 206300 }, { "epoch": 0.8857319492027511, "grad_norm": 0.008127226494252682, "learning_rate": 1.1476505437079068e-05, "loss": 0.20068845748901368, "step": 206310 }, { "epoch": 0.885774881292771, "grad_norm": 0.0022074533626437187, "learning_rate": 1.1472193716961446e-05, "loss": 0.36427195072174073, "step": 206320 }, { "epoch": 0.8858178133827911, "grad_norm": 0.003371837781742215, "learning_rate": 1.1467881996843821e-05, "loss": 0.2856950044631958, "step": 206330 }, { "epoch": 0.8858607454728111, "grad_norm": 0.054005175828933716, "learning_rate": 1.1463570276726197e-05, "loss": 0.1003882884979248, "step": 206340 }, { "epoch": 0.8859036775628311, "grad_norm": 0.008997203782200813, "learning_rate": 1.1459258556608574e-05, "loss": 0.07773151993751526, "step": 206350 }, { "epoch": 0.8859466096528511, "grad_norm": 0.004392173606902361, "learning_rate": 1.145494683649095e-05, "loss": 0.22653238773345946, "step": 206360 }, { "epoch": 0.8859895417428711, "grad_norm": 0.18398471176624298, "learning_rate": 1.1450635116373326e-05, "loss": 0.07900636792182922, "step": 206370 }, { "epoch": 0.8860324738328912, "grad_norm": 0.023309681564569473, "learning_rate": 1.1446323396255703e-05, "loss": 0.1441459536552429, "step": 206380 }, { "epoch": 0.8860754059229111, "grad_norm": 0.2616986036300659, "learning_rate": 1.1442011676138079e-05, "loss": 0.0349333643913269, "step": 206390 }, { "epoch": 0.8861183380129312, "grad_norm": 0.15079903602600098, "learning_rate": 1.1437699956020454e-05, "loss": 0.14630622863769532, "step": 206400 }, { "epoch": 0.8861612701029512, "grad_norm": 1.4602254629135132, "learning_rate": 1.1433388235902832e-05, "loss": 0.15438275337219237, "step": 206410 }, { "epoch": 0.8862042021929711, "grad_norm": 0.011119797825813293, "learning_rate": 1.1429076515785207e-05, "loss": 0.12381850481033325, "step": 206420 }, { "epoch": 0.8862471342829912, "grad_norm": 0.019156260415911674, "learning_rate": 1.1424764795667583e-05, "loss": 0.29505267143249514, "step": 206430 }, { "epoch": 0.8862900663730112, "grad_norm": 1.6167775392532349, "learning_rate": 1.1420453075549962e-05, "loss": 0.32378649711608887, "step": 206440 }, { "epoch": 0.8863329984630312, "grad_norm": 0.0651068389415741, "learning_rate": 1.1416141355432338e-05, "loss": 0.08244922161102294, "step": 206450 }, { "epoch": 0.8863759305530512, "grad_norm": 4.568314552307129, "learning_rate": 1.1411829635314713e-05, "loss": 0.15970878601074218, "step": 206460 }, { "epoch": 0.8864188626430712, "grad_norm": 0.5490240454673767, "learning_rate": 1.140751791519709e-05, "loss": 0.18453452587127686, "step": 206470 }, { "epoch": 0.8864617947330912, "grad_norm": 0.026446960866451263, "learning_rate": 1.1403206195079466e-05, "loss": 0.1261613130569458, "step": 206480 }, { "epoch": 0.8865047268231112, "grad_norm": 0.05164752155542374, "learning_rate": 1.1398894474961842e-05, "loss": 0.05536790490150452, "step": 206490 }, { "epoch": 0.8865476589131313, "grad_norm": 0.013431284576654434, "learning_rate": 1.1394582754844219e-05, "loss": 0.005943649634718895, "step": 206500 }, { "epoch": 0.8865905910031512, "grad_norm": 0.007348786108195782, "learning_rate": 1.1390271034726595e-05, "loss": 0.10574545860290527, "step": 206510 }, { "epoch": 0.8866335230931712, "grad_norm": 2.562957525253296, "learning_rate": 1.138595931460897e-05, "loss": 0.3311743259429932, "step": 206520 }, { "epoch": 0.8866764551831913, "grad_norm": 0.6146745085716248, "learning_rate": 1.1381647594491348e-05, "loss": 0.27206060886383054, "step": 206530 }, { "epoch": 0.8867193872732112, "grad_norm": 0.12736624479293823, "learning_rate": 1.1377335874373723e-05, "loss": 0.12166001796722412, "step": 206540 }, { "epoch": 0.8867623193632312, "grad_norm": 0.02262021414935589, "learning_rate": 1.1373024154256099e-05, "loss": 0.1580941915512085, "step": 206550 }, { "epoch": 0.8868052514532513, "grad_norm": 3.1980385780334473, "learning_rate": 1.1368712434138476e-05, "loss": 0.0834578275680542, "step": 206560 }, { "epoch": 0.8868481835432712, "grad_norm": 0.7768859267234802, "learning_rate": 1.1364400714020852e-05, "loss": 0.19401477575302123, "step": 206570 }, { "epoch": 0.8868911156332913, "grad_norm": 1.658769965171814, "learning_rate": 1.1360088993903228e-05, "loss": 0.27375426292419436, "step": 206580 }, { "epoch": 0.8869340477233113, "grad_norm": 0.08683007210493088, "learning_rate": 1.1355777273785605e-05, "loss": 0.06745948195457459, "step": 206590 }, { "epoch": 0.8869769798133312, "grad_norm": 3.6138076782226562, "learning_rate": 1.135146555366798e-05, "loss": 0.14725286960601808, "step": 206600 }, { "epoch": 0.8870199119033513, "grad_norm": 0.0008120244019664824, "learning_rate": 1.1347153833550356e-05, "loss": 0.05979236364364624, "step": 206610 }, { "epoch": 0.8870628439933713, "grad_norm": 0.33934497833251953, "learning_rate": 1.1342842113432734e-05, "loss": 0.2910740375518799, "step": 206620 }, { "epoch": 0.8871057760833913, "grad_norm": 0.019929246976971626, "learning_rate": 1.133853039331511e-05, "loss": 0.050988197326660156, "step": 206630 }, { "epoch": 0.8871487081734113, "grad_norm": 0.017778104171156883, "learning_rate": 1.1334218673197487e-05, "loss": 0.257867693901062, "step": 206640 }, { "epoch": 0.8871916402634313, "grad_norm": 0.01611817069351673, "learning_rate": 1.1329906953079862e-05, "loss": 0.2223726272583008, "step": 206650 }, { "epoch": 0.8872345723534513, "grad_norm": 11.27148151397705, "learning_rate": 1.1325595232962238e-05, "loss": 0.1859837532043457, "step": 206660 }, { "epoch": 0.8872775044434713, "grad_norm": 0.07977546006441116, "learning_rate": 1.1321283512844615e-05, "loss": 0.1255193829536438, "step": 206670 }, { "epoch": 0.8873204365334914, "grad_norm": 3.2276129722595215, "learning_rate": 1.1316971792726991e-05, "loss": 0.07970626354217529, "step": 206680 }, { "epoch": 0.8873633686235113, "grad_norm": 3.979701042175293, "learning_rate": 1.1312660072609367e-05, "loss": 0.22427015304565429, "step": 206690 }, { "epoch": 0.8874063007135313, "grad_norm": 2.818377733230591, "learning_rate": 1.1308348352491744e-05, "loss": 0.47805194854736327, "step": 206700 }, { "epoch": 0.8874492328035514, "grad_norm": 0.0031118851620703936, "learning_rate": 1.130403663237412e-05, "loss": 0.06364747881889343, "step": 206710 }, { "epoch": 0.8874921648935713, "grad_norm": 0.010256568901240826, "learning_rate": 1.1299724912256495e-05, "loss": 0.26986443996429443, "step": 206720 }, { "epoch": 0.8875350969835913, "grad_norm": 1.0608967542648315, "learning_rate": 1.1295413192138873e-05, "loss": 0.15425010919570922, "step": 206730 }, { "epoch": 0.8875780290736114, "grad_norm": 0.5740428566932678, "learning_rate": 1.1291101472021248e-05, "loss": 0.16769996881484986, "step": 206740 }, { "epoch": 0.8876209611636313, "grad_norm": 1.803846001625061, "learning_rate": 1.1286789751903624e-05, "loss": 0.167777681350708, "step": 206750 }, { "epoch": 0.8876638932536514, "grad_norm": 0.028705893084406853, "learning_rate": 1.1282478031786001e-05, "loss": 0.10598093271255493, "step": 206760 }, { "epoch": 0.8877068253436714, "grad_norm": 0.013095211237668991, "learning_rate": 1.1278166311668377e-05, "loss": 0.3958571910858154, "step": 206770 }, { "epoch": 0.8877497574336913, "grad_norm": 1.313372015953064, "learning_rate": 1.1273854591550753e-05, "loss": 0.45798640251159667, "step": 206780 }, { "epoch": 0.8877926895237114, "grad_norm": 0.05569923669099808, "learning_rate": 1.126954287143313e-05, "loss": 0.18811848163604736, "step": 206790 }, { "epoch": 0.8878356216137314, "grad_norm": 0.0055416957475245, "learning_rate": 1.1265231151315506e-05, "loss": 0.22820720672607422, "step": 206800 }, { "epoch": 0.8878785537037515, "grad_norm": 0.001364398980513215, "learning_rate": 1.1260919431197883e-05, "loss": 0.15015835762023927, "step": 206810 }, { "epoch": 0.8879214857937714, "grad_norm": 2.2645983695983887, "learning_rate": 1.125660771108026e-05, "loss": 0.2777566432952881, "step": 206820 }, { "epoch": 0.8879644178837914, "grad_norm": 2.002868413925171, "learning_rate": 1.1252295990962636e-05, "loss": 0.20114946365356445, "step": 206830 }, { "epoch": 0.8880073499738115, "grad_norm": 0.0017295647412538528, "learning_rate": 1.1247984270845012e-05, "loss": 0.24505915641784667, "step": 206840 }, { "epoch": 0.8880502820638314, "grad_norm": 0.03503365069627762, "learning_rate": 1.1243672550727389e-05, "loss": 0.13042792081832885, "step": 206850 }, { "epoch": 0.8880932141538515, "grad_norm": 0.08353490382432938, "learning_rate": 1.1239360830609765e-05, "loss": 0.22364115715026855, "step": 206860 }, { "epoch": 0.8881361462438715, "grad_norm": 0.9352854490280151, "learning_rate": 1.123504911049214e-05, "loss": 0.1533583402633667, "step": 206870 }, { "epoch": 0.8881790783338914, "grad_norm": 0.19706374406814575, "learning_rate": 1.1230737390374518e-05, "loss": 0.20712320804595946, "step": 206880 }, { "epoch": 0.8882220104239115, "grad_norm": 0.04519926756620407, "learning_rate": 1.1226425670256893e-05, "loss": 0.3285240173339844, "step": 206890 }, { "epoch": 0.8882649425139315, "grad_norm": 6.046555995941162, "learning_rate": 1.1222113950139269e-05, "loss": 0.07612007856369019, "step": 206900 }, { "epoch": 0.8883078746039514, "grad_norm": 1.6961110830307007, "learning_rate": 1.1217802230021646e-05, "loss": 0.2191645622253418, "step": 206910 }, { "epoch": 0.8883508066939715, "grad_norm": 0.0015003492590039968, "learning_rate": 1.1213490509904022e-05, "loss": 0.3165169954299927, "step": 206920 }, { "epoch": 0.8883937387839915, "grad_norm": 0.7118803858757019, "learning_rate": 1.1209178789786397e-05, "loss": 0.14242100715637207, "step": 206930 }, { "epoch": 0.8884366708740115, "grad_norm": 0.0008318256586790085, "learning_rate": 1.1204867069668775e-05, "loss": 0.15933756828308104, "step": 206940 }, { "epoch": 0.8884796029640315, "grad_norm": 0.40112194418907166, "learning_rate": 1.120055534955115e-05, "loss": 0.15181329250335693, "step": 206950 }, { "epoch": 0.8885225350540515, "grad_norm": 0.003861044766381383, "learning_rate": 1.1196243629433526e-05, "loss": 0.14451063871383668, "step": 206960 }, { "epoch": 0.8885654671440715, "grad_norm": 0.0033035404048860073, "learning_rate": 1.1191931909315903e-05, "loss": 0.17607152462005615, "step": 206970 }, { "epoch": 0.8886083992340915, "grad_norm": 0.004272112622857094, "learning_rate": 1.1187620189198279e-05, "loss": 0.30315728187561036, "step": 206980 }, { "epoch": 0.8886513313241116, "grad_norm": 0.6283832788467407, "learning_rate": 1.1183308469080655e-05, "loss": 0.15836750268936156, "step": 206990 }, { "epoch": 0.8886942634141315, "grad_norm": 0.1068863794207573, "learning_rate": 1.1178996748963032e-05, "loss": 0.19578337669372559, "step": 207000 }, { "epoch": 0.8886942634141315, "eval_loss": 0.37495312094688416, "eval_runtime": 27.3443, "eval_samples_per_second": 3.657, "eval_steps_per_second": 3.657, "step": 207000 }, { "epoch": 0.8887371955041515, "grad_norm": 1.5647714138031006, "learning_rate": 1.1174685028845408e-05, "loss": 0.14250789880752562, "step": 207010 }, { "epoch": 0.8887801275941716, "grad_norm": 0.289928138256073, "learning_rate": 1.1170373308727785e-05, "loss": 0.34872961044311523, "step": 207020 }, { "epoch": 0.8888230596841915, "grad_norm": 0.03888840228319168, "learning_rate": 1.116606158861016e-05, "loss": 0.1511203169822693, "step": 207030 }, { "epoch": 0.8888659917742116, "grad_norm": 1.086020827293396, "learning_rate": 1.1161749868492536e-05, "loss": 0.17135267257690429, "step": 207040 }, { "epoch": 0.8889089238642316, "grad_norm": 0.035367049276828766, "learning_rate": 1.1157438148374914e-05, "loss": 0.2833311319351196, "step": 207050 }, { "epoch": 0.8889518559542515, "grad_norm": 0.39125993847846985, "learning_rate": 1.115312642825729e-05, "loss": 0.3519448280334473, "step": 207060 }, { "epoch": 0.8889947880442716, "grad_norm": 1.0476738214492798, "learning_rate": 1.1148814708139665e-05, "loss": 0.28065240383148193, "step": 207070 }, { "epoch": 0.8890377201342916, "grad_norm": 2.892183303833008, "learning_rate": 1.1144502988022042e-05, "loss": 0.29977333545684814, "step": 207080 }, { "epoch": 0.8890806522243115, "grad_norm": 0.21420015394687653, "learning_rate": 1.1140191267904418e-05, "loss": 0.17063156366348267, "step": 207090 }, { "epoch": 0.8891235843143316, "grad_norm": 1.2925959825515747, "learning_rate": 1.1135879547786794e-05, "loss": 0.2224264621734619, "step": 207100 }, { "epoch": 0.8891665164043516, "grad_norm": 0.03948453813791275, "learning_rate": 1.1131567827669171e-05, "loss": 0.1689953088760376, "step": 207110 }, { "epoch": 0.8892094484943716, "grad_norm": 1.0124317407608032, "learning_rate": 1.1127256107551547e-05, "loss": 0.16605607271194459, "step": 207120 }, { "epoch": 0.8892523805843916, "grad_norm": 0.6965276002883911, "learning_rate": 1.1122944387433922e-05, "loss": 0.22367043495178224, "step": 207130 }, { "epoch": 0.8892953126744116, "grad_norm": 0.02940557524561882, "learning_rate": 1.11186326673163e-05, "loss": 0.2905860424041748, "step": 207140 }, { "epoch": 0.8893382447644316, "grad_norm": 0.003474108874797821, "learning_rate": 1.1114320947198675e-05, "loss": 0.060731494426727296, "step": 207150 }, { "epoch": 0.8893811768544516, "grad_norm": 0.02715556137263775, "learning_rate": 1.1110009227081051e-05, "loss": 0.08220630884170532, "step": 207160 }, { "epoch": 0.8894241089444717, "grad_norm": 0.05262776091694832, "learning_rate": 1.110569750696343e-05, "loss": 0.022940675914287566, "step": 207170 }, { "epoch": 0.8894670410344916, "grad_norm": 0.09899931401014328, "learning_rate": 1.1101385786845806e-05, "loss": 0.18971925973892212, "step": 207180 }, { "epoch": 0.8895099731245116, "grad_norm": 0.6711128354072571, "learning_rate": 1.1097074066728181e-05, "loss": 0.13049598932266235, "step": 207190 }, { "epoch": 0.8895529052145317, "grad_norm": 0.6323684453964233, "learning_rate": 1.1092762346610559e-05, "loss": 0.2400278091430664, "step": 207200 }, { "epoch": 0.8895958373045516, "grad_norm": 2.824448823928833, "learning_rate": 1.1088450626492934e-05, "loss": 0.387871241569519, "step": 207210 }, { "epoch": 0.8896387693945716, "grad_norm": 0.0035468984860926867, "learning_rate": 1.108413890637531e-05, "loss": 0.08591824173927307, "step": 207220 }, { "epoch": 0.8896817014845917, "grad_norm": 0.00021397089585661888, "learning_rate": 1.1079827186257687e-05, "loss": 0.16451675891876222, "step": 207230 }, { "epoch": 0.8897246335746117, "grad_norm": 3.756059169769287, "learning_rate": 1.1075515466140063e-05, "loss": 0.3345004081726074, "step": 207240 }, { "epoch": 0.8897675656646317, "grad_norm": 0.030414363369345665, "learning_rate": 1.1071203746022439e-05, "loss": 0.18696179389953613, "step": 207250 }, { "epoch": 0.8898104977546517, "grad_norm": 1.4751427173614502, "learning_rate": 1.1066892025904816e-05, "loss": 0.0779729425907135, "step": 207260 }, { "epoch": 0.8898534298446718, "grad_norm": 0.0060063861310482025, "learning_rate": 1.1062580305787192e-05, "loss": 0.06680658459663391, "step": 207270 }, { "epoch": 0.8898963619346917, "grad_norm": 0.002740606665611267, "learning_rate": 1.1058268585669567e-05, "loss": 0.22256386280059814, "step": 207280 }, { "epoch": 0.8899392940247117, "grad_norm": 1.8619122505187988, "learning_rate": 1.1053956865551945e-05, "loss": 0.08997167348861694, "step": 207290 }, { "epoch": 0.8899822261147318, "grad_norm": 0.28505250811576843, "learning_rate": 1.104964514543432e-05, "loss": 0.2243257999420166, "step": 207300 }, { "epoch": 0.8900251582047517, "grad_norm": 0.00235749501734972, "learning_rate": 1.1045333425316696e-05, "loss": 0.12256654500961303, "step": 207310 }, { "epoch": 0.8900680902947717, "grad_norm": 4.915469646453857, "learning_rate": 1.1041021705199073e-05, "loss": 0.4654879570007324, "step": 207320 }, { "epoch": 0.8901110223847918, "grad_norm": 2.4316060543060303, "learning_rate": 1.1036709985081449e-05, "loss": 0.3756813287734985, "step": 207330 }, { "epoch": 0.8901539544748117, "grad_norm": 0.8675051331520081, "learning_rate": 1.1032398264963824e-05, "loss": 0.3095096588134766, "step": 207340 }, { "epoch": 0.8901968865648318, "grad_norm": 0.29333287477493286, "learning_rate": 1.1028086544846202e-05, "loss": 0.31053957939147947, "step": 207350 }, { "epoch": 0.8902398186548518, "grad_norm": 0.033922795206308365, "learning_rate": 1.1023774824728577e-05, "loss": 0.2523657321929932, "step": 207360 }, { "epoch": 0.8902827507448717, "grad_norm": 0.062226712703704834, "learning_rate": 1.1019463104610953e-05, "loss": 0.2878741979598999, "step": 207370 }, { "epoch": 0.8903256828348918, "grad_norm": 0.1028006374835968, "learning_rate": 1.101515138449333e-05, "loss": 0.19018113613128662, "step": 207380 }, { "epoch": 0.8903686149249118, "grad_norm": 2.000626802444458, "learning_rate": 1.1010839664375706e-05, "loss": 0.24007706642150878, "step": 207390 }, { "epoch": 0.8904115470149317, "grad_norm": 0.6968321800231934, "learning_rate": 1.1006527944258083e-05, "loss": 0.29462342262268065, "step": 207400 }, { "epoch": 0.8904544791049518, "grad_norm": 0.012917857617139816, "learning_rate": 1.1002216224140459e-05, "loss": 0.1639692783355713, "step": 207410 }, { "epoch": 0.8904974111949718, "grad_norm": 21.311800003051758, "learning_rate": 1.0997904504022835e-05, "loss": 0.2652838706970215, "step": 207420 }, { "epoch": 0.8905403432849918, "grad_norm": 0.28604063391685486, "learning_rate": 1.0993592783905212e-05, "loss": 0.0822974681854248, "step": 207430 }, { "epoch": 0.8905832753750118, "grad_norm": 0.0694635659456253, "learning_rate": 1.0989281063787588e-05, "loss": 0.05964369773864746, "step": 207440 }, { "epoch": 0.8906262074650318, "grad_norm": 0.04713095724582672, "learning_rate": 1.0984969343669963e-05, "loss": 0.20178651809692383, "step": 207450 }, { "epoch": 0.8906691395550518, "grad_norm": 0.023525338619947433, "learning_rate": 1.098065762355234e-05, "loss": 0.19469608068466188, "step": 207460 }, { "epoch": 0.8907120716450718, "grad_norm": 6.417396545410156, "learning_rate": 1.0976345903434716e-05, "loss": 0.24268784523010253, "step": 207470 }, { "epoch": 0.8907550037350919, "grad_norm": 0.015842819586396217, "learning_rate": 1.0972034183317092e-05, "loss": 0.09640743136405945, "step": 207480 }, { "epoch": 0.8907979358251118, "grad_norm": 0.0036308271810412407, "learning_rate": 1.096772246319947e-05, "loss": 0.028143587708473205, "step": 207490 }, { "epoch": 0.8908408679151318, "grad_norm": 0.8678433895111084, "learning_rate": 1.0963410743081845e-05, "loss": 0.03296833634376526, "step": 207500 }, { "epoch": 0.8908838000051519, "grad_norm": 0.0009780797408893704, "learning_rate": 1.095909902296422e-05, "loss": 0.04464678466320038, "step": 207510 }, { "epoch": 0.8909267320951718, "grad_norm": 3.87165904045105, "learning_rate": 1.0954787302846598e-05, "loss": 0.3635662317276001, "step": 207520 }, { "epoch": 0.8909696641851919, "grad_norm": 0.0022383269388228655, "learning_rate": 1.0950475582728975e-05, "loss": 0.001991683803498745, "step": 207530 }, { "epoch": 0.8910125962752119, "grad_norm": 0.037035245448350906, "learning_rate": 1.0946163862611351e-05, "loss": 0.16891145706176758, "step": 207540 }, { "epoch": 0.8910555283652318, "grad_norm": 1.5228081941604614, "learning_rate": 1.0941852142493728e-05, "loss": 0.15557491779327393, "step": 207550 }, { "epoch": 0.8910984604552519, "grad_norm": 0.03345203027129173, "learning_rate": 1.0937540422376104e-05, "loss": 0.18587608337402345, "step": 207560 }, { "epoch": 0.8911413925452719, "grad_norm": 0.016381222754716873, "learning_rate": 1.093322870225848e-05, "loss": 0.03826359212398529, "step": 207570 }, { "epoch": 0.8911843246352918, "grad_norm": 7.473484516143799, "learning_rate": 1.0928916982140857e-05, "loss": 0.09259976148605346, "step": 207580 }, { "epoch": 0.8912272567253119, "grad_norm": 0.07392115145921707, "learning_rate": 1.0924605262023233e-05, "loss": 0.21983919143676758, "step": 207590 }, { "epoch": 0.8912701888153319, "grad_norm": 0.028260618448257446, "learning_rate": 1.0920293541905608e-05, "loss": 0.1875922679901123, "step": 207600 }, { "epoch": 0.8913131209053519, "grad_norm": 1.9657236337661743, "learning_rate": 1.0915981821787986e-05, "loss": 0.30432312488555907, "step": 207610 }, { "epoch": 0.8913560529953719, "grad_norm": 0.05618586018681526, "learning_rate": 1.0911670101670361e-05, "loss": 0.025335219502449036, "step": 207620 }, { "epoch": 0.891398985085392, "grad_norm": 1.6123510599136353, "learning_rate": 1.0907358381552737e-05, "loss": 0.15341012477874755, "step": 207630 }, { "epoch": 0.8914419171754119, "grad_norm": 4.691909313201904, "learning_rate": 1.0903046661435114e-05, "loss": 0.24398250579833985, "step": 207640 }, { "epoch": 0.8914848492654319, "grad_norm": 2.1337225437164307, "learning_rate": 1.089873494131749e-05, "loss": 0.5305446147918701, "step": 207650 }, { "epoch": 0.891527781355452, "grad_norm": 0.0023214505054056644, "learning_rate": 1.0894423221199866e-05, "loss": 0.05925256609916687, "step": 207660 }, { "epoch": 0.891570713445472, "grad_norm": 2.3543307781219482, "learning_rate": 1.0890111501082243e-05, "loss": 0.350689959526062, "step": 207670 }, { "epoch": 0.8916136455354919, "grad_norm": 0.0017880657687783241, "learning_rate": 1.0885799780964619e-05, "loss": 0.16659703254699706, "step": 207680 }, { "epoch": 0.891656577625512, "grad_norm": 2.1300582885742188, "learning_rate": 1.0881488060846994e-05, "loss": 0.38850128650665283, "step": 207690 }, { "epoch": 0.891699509715532, "grad_norm": 0.002209410071372986, "learning_rate": 1.0877176340729372e-05, "loss": 0.18450218439102173, "step": 207700 }, { "epoch": 0.891742441805552, "grad_norm": 1.4890061616897583, "learning_rate": 1.0872864620611747e-05, "loss": 0.13996822834014894, "step": 207710 }, { "epoch": 0.891785373895572, "grad_norm": 0.8673306107521057, "learning_rate": 1.0868552900494123e-05, "loss": 0.2520700454711914, "step": 207720 }, { "epoch": 0.891828305985592, "grad_norm": 0.0014024653937667608, "learning_rate": 1.08642411803765e-05, "loss": 0.1759459137916565, "step": 207730 }, { "epoch": 0.891871238075612, "grad_norm": 0.02599116787314415, "learning_rate": 1.0859929460258876e-05, "loss": 0.059044384956359865, "step": 207740 }, { "epoch": 0.891914170165632, "grad_norm": 0.46189165115356445, "learning_rate": 1.0855617740141251e-05, "loss": 0.09625746607780457, "step": 207750 }, { "epoch": 0.8919571022556521, "grad_norm": 6.523098468780518, "learning_rate": 1.0851306020023629e-05, "loss": 0.20586519241333007, "step": 207760 }, { "epoch": 0.892000034345672, "grad_norm": 0.0002252467820653692, "learning_rate": 1.0846994299906004e-05, "loss": 0.14474284648895264, "step": 207770 }, { "epoch": 0.892042966435692, "grad_norm": 1.4413135051727295, "learning_rate": 1.0842682579788382e-05, "loss": 0.18990509510040282, "step": 207780 }, { "epoch": 0.8920858985257121, "grad_norm": 0.4706697165966034, "learning_rate": 1.0838370859670757e-05, "loss": 0.1626267671585083, "step": 207790 }, { "epoch": 0.892128830615732, "grad_norm": 1.6966557502746582, "learning_rate": 1.0834059139553133e-05, "loss": 0.03528638184070587, "step": 207800 }, { "epoch": 0.892171762705752, "grad_norm": 0.001013890141621232, "learning_rate": 1.082974741943551e-05, "loss": 0.3139493942260742, "step": 207810 }, { "epoch": 0.8922146947957721, "grad_norm": 0.04221909120678902, "learning_rate": 1.0825435699317886e-05, "loss": 0.21665351390838622, "step": 207820 }, { "epoch": 0.892257626885792, "grad_norm": 0.04389188066124916, "learning_rate": 1.0821123979200262e-05, "loss": 0.12443130016326905, "step": 207830 }, { "epoch": 0.8923005589758121, "grad_norm": 0.0007254289230331779, "learning_rate": 1.0816812259082639e-05, "loss": 0.07733795642852784, "step": 207840 }, { "epoch": 0.8923434910658321, "grad_norm": 0.002695313189178705, "learning_rate": 1.0812500538965015e-05, "loss": 0.20181338787078856, "step": 207850 }, { "epoch": 0.892386423155852, "grad_norm": 1.518416166305542, "learning_rate": 1.080818881884739e-05, "loss": 0.35289008617401124, "step": 207860 }, { "epoch": 0.8924293552458721, "grad_norm": 0.09882375597953796, "learning_rate": 1.0803877098729768e-05, "loss": 0.084866863489151, "step": 207870 }, { "epoch": 0.8924722873358921, "grad_norm": 0.07617539167404175, "learning_rate": 1.0799565378612143e-05, "loss": 0.25410733222961424, "step": 207880 }, { "epoch": 0.892515219425912, "grad_norm": 0.005790808238089085, "learning_rate": 1.0795253658494519e-05, "loss": 0.09027189016342163, "step": 207890 }, { "epoch": 0.8925581515159321, "grad_norm": 0.0614863745868206, "learning_rate": 1.0790941938376898e-05, "loss": 0.02990849018096924, "step": 207900 }, { "epoch": 0.8926010836059521, "grad_norm": 1.8789814710617065, "learning_rate": 1.0786630218259274e-05, "loss": 0.2544694423675537, "step": 207910 }, { "epoch": 0.8926440156959721, "grad_norm": 1.8106611967086792, "learning_rate": 1.078231849814165e-05, "loss": 0.1708831787109375, "step": 207920 }, { "epoch": 0.8926869477859921, "grad_norm": 0.44066721200942993, "learning_rate": 1.0778006778024027e-05, "loss": 0.3115999698638916, "step": 207930 }, { "epoch": 0.8927298798760122, "grad_norm": 0.7485963702201843, "learning_rate": 1.0773695057906402e-05, "loss": 0.10244852304458618, "step": 207940 }, { "epoch": 0.8927728119660321, "grad_norm": 0.002917962847277522, "learning_rate": 1.0769383337788778e-05, "loss": 0.08186596035957336, "step": 207950 }, { "epoch": 0.8928157440560521, "grad_norm": 0.25023436546325684, "learning_rate": 1.0765071617671155e-05, "loss": 0.2952109336853027, "step": 207960 }, { "epoch": 0.8928586761460722, "grad_norm": 0.0062995050102472305, "learning_rate": 1.0760759897553531e-05, "loss": 0.13027873039245605, "step": 207970 }, { "epoch": 0.8929016082360921, "grad_norm": 0.4849872589111328, "learning_rate": 1.0756448177435907e-05, "loss": 0.08536691069602967, "step": 207980 }, { "epoch": 0.8929445403261121, "grad_norm": 0.04339861497282982, "learning_rate": 1.0752136457318284e-05, "loss": 0.1852457642555237, "step": 207990 }, { "epoch": 0.8929874724161322, "grad_norm": 0.7880849838256836, "learning_rate": 1.074782473720066e-05, "loss": 0.15559797286987304, "step": 208000 }, { "epoch": 0.8929874724161322, "eval_loss": 0.3726086914539337, "eval_runtime": 27.5821, "eval_samples_per_second": 3.626, "eval_steps_per_second": 3.626, "step": 208000 }, { "epoch": 0.8930304045061521, "grad_norm": 0.05981164053082466, "learning_rate": 1.0743513017083035e-05, "loss": 0.1855257749557495, "step": 208010 }, { "epoch": 0.8930733365961722, "grad_norm": 2.3654754161834717, "learning_rate": 1.0739201296965413e-05, "loss": 0.2869313478469849, "step": 208020 }, { "epoch": 0.8931162686861922, "grad_norm": 0.8840840458869934, "learning_rate": 1.0734889576847788e-05, "loss": 0.18838484287261964, "step": 208030 }, { "epoch": 0.8931592007762121, "grad_norm": 0.018801458179950714, "learning_rate": 1.0730577856730164e-05, "loss": 0.0698580026626587, "step": 208040 }, { "epoch": 0.8932021328662322, "grad_norm": 0.7063366770744324, "learning_rate": 1.0726266136612541e-05, "loss": 0.03508914709091186, "step": 208050 }, { "epoch": 0.8932450649562522, "grad_norm": 0.012687929905951023, "learning_rate": 1.0721954416494917e-05, "loss": 0.3084988594055176, "step": 208060 }, { "epoch": 0.8932879970462722, "grad_norm": 0.000954024086240679, "learning_rate": 1.0717642696377293e-05, "loss": 0.4633500576019287, "step": 208070 }, { "epoch": 0.8933309291362922, "grad_norm": 0.08769218623638153, "learning_rate": 1.071333097625967e-05, "loss": 0.05249186158180237, "step": 208080 }, { "epoch": 0.8933738612263122, "grad_norm": 0.07643234729766846, "learning_rate": 1.0709019256142046e-05, "loss": 0.12548816204071045, "step": 208090 }, { "epoch": 0.8934167933163323, "grad_norm": 0.32027769088745117, "learning_rate": 1.0704707536024421e-05, "loss": 0.10189043283462525, "step": 208100 }, { "epoch": 0.8934597254063522, "grad_norm": 0.014366092160344124, "learning_rate": 1.0700395815906799e-05, "loss": 0.04180750846862793, "step": 208110 }, { "epoch": 0.8935026574963723, "grad_norm": 0.028544161468744278, "learning_rate": 1.0696084095789174e-05, "loss": 0.025064852833747864, "step": 208120 }, { "epoch": 0.8935455895863923, "grad_norm": 0.4280235171318054, "learning_rate": 1.069177237567155e-05, "loss": 0.0633910834789276, "step": 208130 }, { "epoch": 0.8935885216764122, "grad_norm": 0.09054075181484222, "learning_rate": 1.0687460655553927e-05, "loss": 0.1530647873878479, "step": 208140 }, { "epoch": 0.8936314537664323, "grad_norm": 0.03635436296463013, "learning_rate": 1.0683148935436303e-05, "loss": 0.32849485874176027, "step": 208150 }, { "epoch": 0.8936743858564523, "grad_norm": 0.02411261945962906, "learning_rate": 1.067883721531868e-05, "loss": 0.2045769453048706, "step": 208160 }, { "epoch": 0.8937173179464722, "grad_norm": 0.014103651978075504, "learning_rate": 1.0674525495201056e-05, "loss": 0.288296103477478, "step": 208170 }, { "epoch": 0.8937602500364923, "grad_norm": 4.653670787811279, "learning_rate": 1.0670213775083431e-05, "loss": 0.28550784587860106, "step": 208180 }, { "epoch": 0.8938031821265123, "grad_norm": 0.015572491101920605, "learning_rate": 1.0665902054965809e-05, "loss": 0.08331742286682128, "step": 208190 }, { "epoch": 0.8938461142165323, "grad_norm": 0.33414509892463684, "learning_rate": 1.0661590334848184e-05, "loss": 0.12296531200408936, "step": 208200 }, { "epoch": 0.8938890463065523, "grad_norm": 3.6274185180664062, "learning_rate": 1.065727861473056e-05, "loss": 0.28098833560943604, "step": 208210 }, { "epoch": 0.8939319783965723, "grad_norm": 0.025328239426016808, "learning_rate": 1.0652966894612937e-05, "loss": 0.1296234130859375, "step": 208220 }, { "epoch": 0.8939749104865923, "grad_norm": 1.3595960140228271, "learning_rate": 1.0648655174495313e-05, "loss": 0.323833966255188, "step": 208230 }, { "epoch": 0.8940178425766123, "grad_norm": 3.208756685256958, "learning_rate": 1.0644343454377689e-05, "loss": 0.33398821353912356, "step": 208240 }, { "epoch": 0.8940607746666324, "grad_norm": 1.5382039546966553, "learning_rate": 1.0640031734260066e-05, "loss": 0.17398445606231688, "step": 208250 }, { "epoch": 0.8941037067566523, "grad_norm": 0.04903976246714592, "learning_rate": 1.0635720014142443e-05, "loss": 0.044875967502594, "step": 208260 }, { "epoch": 0.8941466388466723, "grad_norm": 22.92563247680664, "learning_rate": 1.0631408294024819e-05, "loss": 0.321004581451416, "step": 208270 }, { "epoch": 0.8941895709366924, "grad_norm": 0.141753152012825, "learning_rate": 1.0627096573907196e-05, "loss": 0.2542982578277588, "step": 208280 }, { "epoch": 0.8942325030267123, "grad_norm": 0.01915988139808178, "learning_rate": 1.0622784853789572e-05, "loss": 0.19424819946289062, "step": 208290 }, { "epoch": 0.8942754351167324, "grad_norm": 0.2076554298400879, "learning_rate": 1.0618473133671948e-05, "loss": 0.23011200428009032, "step": 208300 }, { "epoch": 0.8943183672067524, "grad_norm": 1.0068254470825195, "learning_rate": 1.0614161413554325e-05, "loss": 0.3052649974822998, "step": 208310 }, { "epoch": 0.8943612992967723, "grad_norm": 0.005863682366907597, "learning_rate": 1.06098496934367e-05, "loss": 0.06528003215789795, "step": 208320 }, { "epoch": 0.8944042313867924, "grad_norm": 1.8667118549346924, "learning_rate": 1.0605537973319076e-05, "loss": 0.3344250679016113, "step": 208330 }, { "epoch": 0.8944471634768124, "grad_norm": 0.011455858126282692, "learning_rate": 1.0601226253201454e-05, "loss": 0.29726667404174806, "step": 208340 }, { "epoch": 0.8944900955668323, "grad_norm": 1.6951905488967896, "learning_rate": 1.059691453308383e-05, "loss": 0.12936344146728515, "step": 208350 }, { "epoch": 0.8945330276568524, "grad_norm": 2.5541203022003174, "learning_rate": 1.0592602812966205e-05, "loss": 0.12269244194030762, "step": 208360 }, { "epoch": 0.8945759597468724, "grad_norm": 0.7697427272796631, "learning_rate": 1.0588291092848582e-05, "loss": 0.07301286458969117, "step": 208370 }, { "epoch": 0.8946188918368924, "grad_norm": 0.27730637788772583, "learning_rate": 1.0583979372730958e-05, "loss": 0.0558599591255188, "step": 208380 }, { "epoch": 0.8946618239269124, "grad_norm": 0.004843627102673054, "learning_rate": 1.0579667652613334e-05, "loss": 0.017573785781860352, "step": 208390 }, { "epoch": 0.8947047560169324, "grad_norm": 0.006611781660467386, "learning_rate": 1.0575355932495711e-05, "loss": 0.13027034997940062, "step": 208400 }, { "epoch": 0.8947476881069524, "grad_norm": 0.008200961165130138, "learning_rate": 1.0571044212378087e-05, "loss": 0.4753425121307373, "step": 208410 }, { "epoch": 0.8947906201969724, "grad_norm": 0.02584204636514187, "learning_rate": 1.0566732492260462e-05, "loss": 0.09846428036689758, "step": 208420 }, { "epoch": 0.8948335522869925, "grad_norm": 0.9147371649742126, "learning_rate": 1.056242077214284e-05, "loss": 0.18001638650894164, "step": 208430 }, { "epoch": 0.8948764843770124, "grad_norm": 1.8289741277694702, "learning_rate": 1.0558109052025215e-05, "loss": 0.24366312026977538, "step": 208440 }, { "epoch": 0.8949194164670324, "grad_norm": 5.123477458953857, "learning_rate": 1.0553797331907591e-05, "loss": 0.2094179391860962, "step": 208450 }, { "epoch": 0.8949623485570525, "grad_norm": 0.017321955412626266, "learning_rate": 1.0549485611789968e-05, "loss": 0.09925681352615356, "step": 208460 }, { "epoch": 0.8950052806470724, "grad_norm": 0.020616520196199417, "learning_rate": 1.0545173891672344e-05, "loss": 0.28967747688293455, "step": 208470 }, { "epoch": 0.8950482127370925, "grad_norm": 1.7180979251861572, "learning_rate": 1.054086217155472e-05, "loss": 0.15444715023040773, "step": 208480 }, { "epoch": 0.8950911448271125, "grad_norm": 0.021595356985926628, "learning_rate": 1.0536550451437097e-05, "loss": 0.14605026245117186, "step": 208490 }, { "epoch": 0.8951340769171324, "grad_norm": 0.1364668309688568, "learning_rate": 1.0532238731319473e-05, "loss": 0.16470075845718385, "step": 208500 }, { "epoch": 0.8951770090071525, "grad_norm": 0.002942474326118827, "learning_rate": 1.052792701120185e-05, "loss": 0.21846382617950438, "step": 208510 }, { "epoch": 0.8952199410971725, "grad_norm": 0.008422262966632843, "learning_rate": 1.0523615291084226e-05, "loss": 0.002869504317641258, "step": 208520 }, { "epoch": 0.8952628731871926, "grad_norm": 5.913967132568359, "learning_rate": 1.0519303570966601e-05, "loss": 0.43233413696289064, "step": 208530 }, { "epoch": 0.8953058052772125, "grad_norm": 0.0015407500322908163, "learning_rate": 1.0514991850848979e-05, "loss": 0.17136805057525634, "step": 208540 }, { "epoch": 0.8953487373672325, "grad_norm": 1.3527334928512573, "learning_rate": 1.0510680130731354e-05, "loss": 0.2805386781692505, "step": 208550 }, { "epoch": 0.8953916694572526, "grad_norm": 7.946166038513184, "learning_rate": 1.050636841061373e-05, "loss": 0.1672977924346924, "step": 208560 }, { "epoch": 0.8954346015472725, "grad_norm": 0.03738050535321236, "learning_rate": 1.0502056690496107e-05, "loss": 0.22332425117492677, "step": 208570 }, { "epoch": 0.8954775336372925, "grad_norm": 0.008645240217447281, "learning_rate": 1.0497744970378483e-05, "loss": 0.25367834568023684, "step": 208580 }, { "epoch": 0.8955204657273126, "grad_norm": 1.5730016231536865, "learning_rate": 1.0493433250260859e-05, "loss": 0.20938484668731688, "step": 208590 }, { "epoch": 0.8955633978173325, "grad_norm": 4.809794902801514, "learning_rate": 1.0489121530143236e-05, "loss": 0.0948636293411255, "step": 208600 }, { "epoch": 0.8956063299073526, "grad_norm": 0.0076020825654268265, "learning_rate": 1.0484809810025612e-05, "loss": 0.11845968961715699, "step": 208610 }, { "epoch": 0.8956492619973726, "grad_norm": 0.049854811280965805, "learning_rate": 1.0480498089907987e-05, "loss": 0.070538729429245, "step": 208620 }, { "epoch": 0.8956921940873925, "grad_norm": 0.04433738440275192, "learning_rate": 1.0476186369790364e-05, "loss": 0.13895654678344727, "step": 208630 }, { "epoch": 0.8957351261774126, "grad_norm": 0.3433583676815033, "learning_rate": 1.0471874649672742e-05, "loss": 0.10408983230590821, "step": 208640 }, { "epoch": 0.8957780582674326, "grad_norm": 1.599429965019226, "learning_rate": 1.0467562929555117e-05, "loss": 0.2278376340866089, "step": 208650 }, { "epoch": 0.8958209903574526, "grad_norm": 0.6149447560310364, "learning_rate": 1.0463251209437495e-05, "loss": 0.1790841341018677, "step": 208660 }, { "epoch": 0.8958639224474726, "grad_norm": 0.06281206011772156, "learning_rate": 1.045893948931987e-05, "loss": 0.1051477074623108, "step": 208670 }, { "epoch": 0.8959068545374926, "grad_norm": 6.033206462860107, "learning_rate": 1.0454627769202246e-05, "loss": 0.17541825771331787, "step": 208680 }, { "epoch": 0.8959497866275126, "grad_norm": 0.015856314450502396, "learning_rate": 1.0450316049084623e-05, "loss": 0.07973178029060364, "step": 208690 }, { "epoch": 0.8959927187175326, "grad_norm": 0.0016035563312470913, "learning_rate": 1.0446004328966999e-05, "loss": 0.13870831727981567, "step": 208700 }, { "epoch": 0.8960356508075527, "grad_norm": 8.894113540649414, "learning_rate": 1.0441692608849375e-05, "loss": 0.3145705223083496, "step": 208710 }, { "epoch": 0.8960785828975726, "grad_norm": 0.20434823632240295, "learning_rate": 1.0437380888731752e-05, "loss": 0.17712016105651857, "step": 208720 }, { "epoch": 0.8961215149875926, "grad_norm": 0.5490210056304932, "learning_rate": 1.0433069168614128e-05, "loss": 0.22801218032836915, "step": 208730 }, { "epoch": 0.8961644470776127, "grad_norm": 1.5750205516815186, "learning_rate": 1.0428757448496503e-05, "loss": 0.16703424453735352, "step": 208740 }, { "epoch": 0.8962073791676326, "grad_norm": 1.4231799840927124, "learning_rate": 1.042444572837888e-05, "loss": 0.22915937900543212, "step": 208750 }, { "epoch": 0.8962503112576526, "grad_norm": 0.0203181691467762, "learning_rate": 1.0420134008261256e-05, "loss": 0.1963837265968323, "step": 208760 }, { "epoch": 0.8962932433476727, "grad_norm": 0.01275878306478262, "learning_rate": 1.0415822288143632e-05, "loss": 0.14313185214996338, "step": 208770 }, { "epoch": 0.8963361754376926, "grad_norm": 4.891313076019287, "learning_rate": 1.041151056802601e-05, "loss": 0.2985722064971924, "step": 208780 }, { "epoch": 0.8963791075277127, "grad_norm": 11.108732223510742, "learning_rate": 1.0407198847908385e-05, "loss": 0.07313599586486816, "step": 208790 }, { "epoch": 0.8964220396177327, "grad_norm": 0.8342699408531189, "learning_rate": 1.040288712779076e-05, "loss": 0.19693690538406372, "step": 208800 }, { "epoch": 0.8964649717077526, "grad_norm": 0.0011004661209881306, "learning_rate": 1.0398575407673138e-05, "loss": 0.091287761926651, "step": 208810 }, { "epoch": 0.8965079037977727, "grad_norm": 0.12642458081245422, "learning_rate": 1.0394263687555514e-05, "loss": 0.2455592632293701, "step": 208820 }, { "epoch": 0.8965508358877927, "grad_norm": 1.3348584175109863, "learning_rate": 1.038995196743789e-05, "loss": 0.1055836796760559, "step": 208830 }, { "epoch": 0.8965937679778127, "grad_norm": 0.014487615786492825, "learning_rate": 1.0385640247320267e-05, "loss": 0.10463262796401977, "step": 208840 }, { "epoch": 0.8966367000678327, "grad_norm": 0.07032379508018494, "learning_rate": 1.0381328527202642e-05, "loss": 0.13996758460998535, "step": 208850 }, { "epoch": 0.8966796321578527, "grad_norm": 2.5771610736846924, "learning_rate": 1.0377016807085018e-05, "loss": 0.287884259223938, "step": 208860 }, { "epoch": 0.8967225642478727, "grad_norm": 0.010654770769178867, "learning_rate": 1.0372705086967395e-05, "loss": 0.19098736047744752, "step": 208870 }, { "epoch": 0.8967654963378927, "grad_norm": 0.006161325611174107, "learning_rate": 1.0368393366849771e-05, "loss": 0.15580826997756958, "step": 208880 }, { "epoch": 0.8968084284279128, "grad_norm": 1.6130363941192627, "learning_rate": 1.0364081646732148e-05, "loss": 0.10374679565429687, "step": 208890 }, { "epoch": 0.8968513605179327, "grad_norm": 0.014442683197557926, "learning_rate": 1.0359769926614524e-05, "loss": 0.26409647464752195, "step": 208900 }, { "epoch": 0.8968942926079527, "grad_norm": 0.0012823556317016482, "learning_rate": 1.03554582064969e-05, "loss": 0.16642024517059326, "step": 208910 }, { "epoch": 0.8969372246979728, "grad_norm": 0.2990255057811737, "learning_rate": 1.0351146486379277e-05, "loss": 0.31660277843475343, "step": 208920 }, { "epoch": 0.8969801567879927, "grad_norm": 0.025890527293086052, "learning_rate": 1.0346834766261653e-05, "loss": 0.27528440952301025, "step": 208930 }, { "epoch": 0.8970230888780127, "grad_norm": 1.0845426321029663, "learning_rate": 1.0342523046144028e-05, "loss": 0.32809431552886964, "step": 208940 }, { "epoch": 0.8970660209680328, "grad_norm": 0.003917692694813013, "learning_rate": 1.0338211326026406e-05, "loss": 0.2939892768859863, "step": 208950 }, { "epoch": 0.8971089530580528, "grad_norm": 0.002184345619753003, "learning_rate": 1.0333899605908781e-05, "loss": 0.21517295837402345, "step": 208960 }, { "epoch": 0.8971518851480728, "grad_norm": 2.200176239013672, "learning_rate": 1.0329587885791157e-05, "loss": 0.45783376693725586, "step": 208970 }, { "epoch": 0.8971948172380928, "grad_norm": 0.03883425518870354, "learning_rate": 1.0325276165673534e-05, "loss": 0.12506805658340453, "step": 208980 }, { "epoch": 0.8972377493281128, "grad_norm": 0.08557221293449402, "learning_rate": 1.0320964445555912e-05, "loss": 0.2575039863586426, "step": 208990 }, { "epoch": 0.8972806814181328, "grad_norm": 0.6455543637275696, "learning_rate": 1.0316652725438287e-05, "loss": 0.22111158370971679, "step": 209000 }, { "epoch": 0.8972806814181328, "eval_loss": 0.37228667736053467, "eval_runtime": 27.3584, "eval_samples_per_second": 3.655, "eval_steps_per_second": 3.655, "step": 209000 }, { "epoch": 0.8973236135081528, "grad_norm": 0.00818126555532217, "learning_rate": 1.0312341005320663e-05, "loss": 0.13509939908981322, "step": 209010 }, { "epoch": 0.8973665455981729, "grad_norm": 0.07581299543380737, "learning_rate": 1.030802928520304e-05, "loss": 0.25842645168304446, "step": 209020 }, { "epoch": 0.8974094776881928, "grad_norm": 0.009542779996991158, "learning_rate": 1.0303717565085416e-05, "loss": 0.24384644031524658, "step": 209030 }, { "epoch": 0.8974524097782128, "grad_norm": 1.313149333000183, "learning_rate": 1.0299405844967793e-05, "loss": 0.12051091194152833, "step": 209040 }, { "epoch": 0.8974953418682329, "grad_norm": 0.1645997017621994, "learning_rate": 1.0295094124850169e-05, "loss": 0.24603877067565919, "step": 209050 }, { "epoch": 0.8975382739582528, "grad_norm": 1.1862980127334595, "learning_rate": 1.0290782404732545e-05, "loss": 0.18507742881774902, "step": 209060 }, { "epoch": 0.8975812060482729, "grad_norm": 0.012851928360760212, "learning_rate": 1.0286470684614922e-05, "loss": 0.16939222812652588, "step": 209070 }, { "epoch": 0.8976241381382929, "grad_norm": 0.10280074924230576, "learning_rate": 1.0282158964497297e-05, "loss": 0.1931609630584717, "step": 209080 }, { "epoch": 0.8976670702283128, "grad_norm": 0.09403973817825317, "learning_rate": 1.0277847244379673e-05, "loss": 0.31662075519561766, "step": 209090 }, { "epoch": 0.8977100023183329, "grad_norm": 0.09060845524072647, "learning_rate": 1.027353552426205e-05, "loss": 0.2866966724395752, "step": 209100 }, { "epoch": 0.8977529344083529, "grad_norm": 2.3493640422821045, "learning_rate": 1.0269223804144426e-05, "loss": 0.2028360605239868, "step": 209110 }, { "epoch": 0.8977958664983728, "grad_norm": 2.533294439315796, "learning_rate": 1.0264912084026802e-05, "loss": 0.2530220985412598, "step": 209120 }, { "epoch": 0.8978387985883929, "grad_norm": 0.041833195835351944, "learning_rate": 1.0260600363909179e-05, "loss": 0.3378680944442749, "step": 209130 }, { "epoch": 0.8978817306784129, "grad_norm": 2.1199188232421875, "learning_rate": 1.0256288643791555e-05, "loss": 0.20430638790130615, "step": 209140 }, { "epoch": 0.8979246627684329, "grad_norm": 0.4007624387741089, "learning_rate": 1.025197692367393e-05, "loss": 0.10560801029205322, "step": 209150 }, { "epoch": 0.8979675948584529, "grad_norm": 3.4828829765319824, "learning_rate": 1.0247665203556308e-05, "loss": 0.1327006459236145, "step": 209160 }, { "epoch": 0.8980105269484729, "grad_norm": 0.8840770721435547, "learning_rate": 1.0243353483438683e-05, "loss": 0.2786088466644287, "step": 209170 }, { "epoch": 0.8980534590384929, "grad_norm": 3.045027494430542, "learning_rate": 1.0239041763321059e-05, "loss": 0.33854291439056394, "step": 209180 }, { "epoch": 0.8980963911285129, "grad_norm": 0.05625593662261963, "learning_rate": 1.0234730043203436e-05, "loss": 0.2884523868560791, "step": 209190 }, { "epoch": 0.898139323218533, "grad_norm": 0.054473888128995895, "learning_rate": 1.0230418323085812e-05, "loss": 0.16954405307769777, "step": 209200 }, { "epoch": 0.8981822553085529, "grad_norm": 0.9436574578285217, "learning_rate": 1.0226106602968188e-05, "loss": 0.17064402103424073, "step": 209210 }, { "epoch": 0.8982251873985729, "grad_norm": 0.5394967198371887, "learning_rate": 1.0221794882850565e-05, "loss": 0.2750392913818359, "step": 209220 }, { "epoch": 0.898268119488593, "grad_norm": 0.009204737842082977, "learning_rate": 1.021748316273294e-05, "loss": 0.07998819351196289, "step": 209230 }, { "epoch": 0.8983110515786129, "grad_norm": 1.3306227922439575, "learning_rate": 1.0213171442615316e-05, "loss": 0.1779198169708252, "step": 209240 }, { "epoch": 0.898353983668633, "grad_norm": 0.01629238948225975, "learning_rate": 1.0208859722497694e-05, "loss": 0.39297688007354736, "step": 209250 }, { "epoch": 0.898396915758653, "grad_norm": 0.01804766058921814, "learning_rate": 1.020454800238007e-05, "loss": 0.18484385013580323, "step": 209260 }, { "epoch": 0.8984398478486729, "grad_norm": 0.0010492533911019564, "learning_rate": 1.0200236282262447e-05, "loss": 0.36370325088500977, "step": 209270 }, { "epoch": 0.898482779938693, "grad_norm": 1.7016007900238037, "learning_rate": 1.0195924562144822e-05, "loss": 0.33094918727874756, "step": 209280 }, { "epoch": 0.898525712028713, "grad_norm": 0.005374387372285128, "learning_rate": 1.0191612842027198e-05, "loss": 0.009152711182832719, "step": 209290 }, { "epoch": 0.8985686441187329, "grad_norm": 0.2994307279586792, "learning_rate": 1.0187301121909575e-05, "loss": 0.32453336715698244, "step": 209300 }, { "epoch": 0.898611576208753, "grad_norm": 0.13801302015781403, "learning_rate": 1.0182989401791951e-05, "loss": 0.19398313760757446, "step": 209310 }, { "epoch": 0.898654508298773, "grad_norm": 2.9953441619873047, "learning_rate": 1.0178677681674327e-05, "loss": 0.18454222679138182, "step": 209320 }, { "epoch": 0.898697440388793, "grad_norm": 0.0009266235865652561, "learning_rate": 1.0174365961556704e-05, "loss": 0.056571030616760255, "step": 209330 }, { "epoch": 0.898740372478813, "grad_norm": 0.0011994290398433805, "learning_rate": 1.017005424143908e-05, "loss": 0.1038630723953247, "step": 209340 }, { "epoch": 0.898783304568833, "grad_norm": 0.3412676155567169, "learning_rate": 1.0165742521321457e-05, "loss": 0.1812652587890625, "step": 209350 }, { "epoch": 0.898826236658853, "grad_norm": 0.03324000537395477, "learning_rate": 1.0161430801203833e-05, "loss": 0.24029359817504883, "step": 209360 }, { "epoch": 0.898869168748873, "grad_norm": 0.03261468559503555, "learning_rate": 1.015711908108621e-05, "loss": 0.057292830944061277, "step": 209370 }, { "epoch": 0.8989121008388931, "grad_norm": 0.714746356010437, "learning_rate": 1.0152807360968586e-05, "loss": 0.026148182153701783, "step": 209380 }, { "epoch": 0.8989550329289131, "grad_norm": 0.6475493311882019, "learning_rate": 1.0148495640850963e-05, "loss": 0.23983364105224608, "step": 209390 }, { "epoch": 0.898997965018933, "grad_norm": 0.5558874607086182, "learning_rate": 1.0144183920733339e-05, "loss": 0.2596889972686768, "step": 209400 }, { "epoch": 0.8990408971089531, "grad_norm": 0.0003981745394412428, "learning_rate": 1.0139872200615714e-05, "loss": 0.09359182119369507, "step": 209410 }, { "epoch": 0.8990838291989731, "grad_norm": 1.4816875457763672, "learning_rate": 1.0135560480498092e-05, "loss": 0.3855120658874512, "step": 209420 }, { "epoch": 0.899126761288993, "grad_norm": 2.404386281967163, "learning_rate": 1.0131248760380467e-05, "loss": 0.21844077110290527, "step": 209430 }, { "epoch": 0.8991696933790131, "grad_norm": 5.189345359802246, "learning_rate": 1.0126937040262843e-05, "loss": 0.3697656154632568, "step": 209440 }, { "epoch": 0.8992126254690331, "grad_norm": 6.110927581787109, "learning_rate": 1.012262532014522e-05, "loss": 0.415781831741333, "step": 209450 }, { "epoch": 0.8992555575590531, "grad_norm": 2.683704376220703, "learning_rate": 1.0118313600027596e-05, "loss": 0.2659297943115234, "step": 209460 }, { "epoch": 0.8992984896490731, "grad_norm": 1.1903349161148071, "learning_rate": 1.0114001879909972e-05, "loss": 0.17215741872787477, "step": 209470 }, { "epoch": 0.8993414217390931, "grad_norm": 0.04750831425189972, "learning_rate": 1.0109690159792349e-05, "loss": 0.25689048767089845, "step": 209480 }, { "epoch": 0.8993843538291131, "grad_norm": 2.1213433742523193, "learning_rate": 1.0105378439674725e-05, "loss": 0.3482259511947632, "step": 209490 }, { "epoch": 0.8994272859191331, "grad_norm": 5.7372565269470215, "learning_rate": 1.01010667195571e-05, "loss": 0.16127365827560425, "step": 209500 }, { "epoch": 0.8994702180091532, "grad_norm": 0.12518486380577087, "learning_rate": 1.0096754999439478e-05, "loss": 0.11983962059020996, "step": 209510 }, { "epoch": 0.8995131500991731, "grad_norm": 8.179588317871094, "learning_rate": 1.0092443279321853e-05, "loss": 0.2647406578063965, "step": 209520 }, { "epoch": 0.8995560821891931, "grad_norm": 0.024799851700663567, "learning_rate": 1.0088131559204229e-05, "loss": 0.0050085954368114475, "step": 209530 }, { "epoch": 0.8995990142792132, "grad_norm": 0.0018341508693993092, "learning_rate": 1.0083819839086606e-05, "loss": 0.22504839897155762, "step": 209540 }, { "epoch": 0.8996419463692331, "grad_norm": 1.1719921827316284, "learning_rate": 1.0079508118968982e-05, "loss": 0.20294253826141356, "step": 209550 }, { "epoch": 0.8996848784592532, "grad_norm": 0.005458078347146511, "learning_rate": 1.0075196398851357e-05, "loss": 0.1725464105606079, "step": 209560 }, { "epoch": 0.8997278105492732, "grad_norm": 0.2729476988315582, "learning_rate": 1.0070884678733735e-05, "loss": 0.1303679823875427, "step": 209570 }, { "epoch": 0.8997707426392931, "grad_norm": 0.017656253650784492, "learning_rate": 1.006657295861611e-05, "loss": 0.31265428066253664, "step": 209580 }, { "epoch": 0.8998136747293132, "grad_norm": 1.6056455373764038, "learning_rate": 1.0062261238498486e-05, "loss": 0.26979079246521, "step": 209590 }, { "epoch": 0.8998566068193332, "grad_norm": 1.8567376136779785, "learning_rate": 1.0057949518380863e-05, "loss": 0.40013885498046875, "step": 209600 }, { "epoch": 0.8998995389093531, "grad_norm": 0.03454438969492912, "learning_rate": 1.0053637798263239e-05, "loss": 0.18960126638412475, "step": 209610 }, { "epoch": 0.8999424709993732, "grad_norm": 0.03088500164449215, "learning_rate": 1.0049326078145615e-05, "loss": 0.06449623107910156, "step": 209620 }, { "epoch": 0.8999854030893932, "grad_norm": 0.0003505227214191109, "learning_rate": 1.0045014358027992e-05, "loss": 0.1564359188079834, "step": 209630 }, { "epoch": 0.9000283351794132, "grad_norm": 0.01360271405428648, "learning_rate": 1.0040702637910368e-05, "loss": 0.21048970222473146, "step": 209640 }, { "epoch": 0.9000712672694332, "grad_norm": 0.07835225015878677, "learning_rate": 1.0036390917792745e-05, "loss": 0.1049225926399231, "step": 209650 }, { "epoch": 0.9001141993594532, "grad_norm": 2.60602068901062, "learning_rate": 1.003207919767512e-05, "loss": 0.10012807846069335, "step": 209660 }, { "epoch": 0.9001571314494732, "grad_norm": 0.1185954362154007, "learning_rate": 1.0027767477557496e-05, "loss": 0.2436140775680542, "step": 209670 }, { "epoch": 0.9002000635394932, "grad_norm": 0.005014302209019661, "learning_rate": 1.0023455757439874e-05, "loss": 0.14921751022338867, "step": 209680 }, { "epoch": 0.9002429956295133, "grad_norm": 3.485819101333618, "learning_rate": 1.001914403732225e-05, "loss": 0.26135404109954835, "step": 209690 }, { "epoch": 0.9002859277195332, "grad_norm": 0.08411333709955215, "learning_rate": 1.0014832317204625e-05, "loss": 0.14293501377105713, "step": 209700 }, { "epoch": 0.9003288598095532, "grad_norm": 0.004507078789174557, "learning_rate": 1.0010520597087002e-05, "loss": 0.33165478706359863, "step": 209710 }, { "epoch": 0.9003717918995733, "grad_norm": 0.027253407984972, "learning_rate": 1.000620887696938e-05, "loss": 0.3246056079864502, "step": 209720 }, { "epoch": 0.9004147239895932, "grad_norm": 0.01824076473712921, "learning_rate": 1.0001897156851755e-05, "loss": 0.42398953437805176, "step": 209730 }, { "epoch": 0.9004576560796133, "grad_norm": 5.792724132537842, "learning_rate": 9.997585436734131e-06, "loss": 0.418946361541748, "step": 209740 }, { "epoch": 0.9005005881696333, "grad_norm": 1.4856534004211426, "learning_rate": 9.993273716616508e-06, "loss": 0.3223897457122803, "step": 209750 }, { "epoch": 0.9005435202596532, "grad_norm": 0.013289397582411766, "learning_rate": 9.988961996498884e-06, "loss": 0.3965138912200928, "step": 209760 }, { "epoch": 0.9005864523496733, "grad_norm": 0.04032348468899727, "learning_rate": 9.984650276381261e-06, "loss": 0.034949111938476565, "step": 209770 }, { "epoch": 0.9006293844396933, "grad_norm": 0.0031058243475854397, "learning_rate": 9.980338556263637e-06, "loss": 0.0654286801815033, "step": 209780 }, { "epoch": 0.9006723165297132, "grad_norm": 2.0327398777008057, "learning_rate": 9.976026836146013e-06, "loss": 0.0602993369102478, "step": 209790 }, { "epoch": 0.9007152486197333, "grad_norm": 0.06954237818717957, "learning_rate": 9.97171511602839e-06, "loss": 0.11823323965072632, "step": 209800 }, { "epoch": 0.9007581807097533, "grad_norm": 1.5096991062164307, "learning_rate": 9.967403395910766e-06, "loss": 0.0629551887512207, "step": 209810 }, { "epoch": 0.9008011127997734, "grad_norm": 0.5546233654022217, "learning_rate": 9.963091675793141e-06, "loss": 0.12162181138992309, "step": 209820 }, { "epoch": 0.9008440448897933, "grad_norm": 0.009273167699575424, "learning_rate": 9.958779955675519e-06, "loss": 0.1455420136451721, "step": 209830 }, { "epoch": 0.9008869769798133, "grad_norm": 1.0391302108764648, "learning_rate": 9.954468235557894e-06, "loss": 0.04995992183685303, "step": 209840 }, { "epoch": 0.9009299090698334, "grad_norm": 0.0062095304019749165, "learning_rate": 9.95015651544027e-06, "loss": 0.05889610648155212, "step": 209850 }, { "epoch": 0.9009728411598533, "grad_norm": 0.008527030237019062, "learning_rate": 9.945844795322647e-06, "loss": 0.15333669185638427, "step": 209860 }, { "epoch": 0.9010157732498734, "grad_norm": 0.010419109836220741, "learning_rate": 9.941533075205023e-06, "loss": 0.18245283365249634, "step": 209870 }, { "epoch": 0.9010587053398934, "grad_norm": 1.6431654691696167, "learning_rate": 9.937221355087399e-06, "loss": 0.2896125793457031, "step": 209880 }, { "epoch": 0.9011016374299133, "grad_norm": 0.1526016741991043, "learning_rate": 9.932909634969776e-06, "loss": 0.002722269482910633, "step": 209890 }, { "epoch": 0.9011445695199334, "grad_norm": 0.023740194737911224, "learning_rate": 9.928597914852152e-06, "loss": 0.09291549921035766, "step": 209900 }, { "epoch": 0.9011875016099534, "grad_norm": 0.02442866750061512, "learning_rate": 9.924286194734527e-06, "loss": 0.3072220325469971, "step": 209910 }, { "epoch": 0.9012304336999734, "grad_norm": 0.11724057048559189, "learning_rate": 9.919974474616905e-06, "loss": 0.10614768266677857, "step": 209920 }, { "epoch": 0.9012733657899934, "grad_norm": 0.6770175695419312, "learning_rate": 9.91566275449928e-06, "loss": 0.09750730395317078, "step": 209930 }, { "epoch": 0.9013162978800134, "grad_norm": 9.686930656433105, "learning_rate": 9.911351034381656e-06, "loss": 0.3479560136795044, "step": 209940 }, { "epoch": 0.9013592299700334, "grad_norm": 1.8891156911849976, "learning_rate": 9.907039314264033e-06, "loss": 0.1551816463470459, "step": 209950 }, { "epoch": 0.9014021620600534, "grad_norm": 0.15253959596157074, "learning_rate": 9.902727594146409e-06, "loss": 0.11488509178161621, "step": 209960 }, { "epoch": 0.9014450941500735, "grad_norm": 1.0364890098571777, "learning_rate": 9.898415874028784e-06, "loss": 0.26020739078521726, "step": 209970 }, { "epoch": 0.9014880262400934, "grad_norm": 0.0004387570661492646, "learning_rate": 9.894104153911162e-06, "loss": 0.333538818359375, "step": 209980 }, { "epoch": 0.9015309583301134, "grad_norm": 3.0880167484283447, "learning_rate": 9.889792433793537e-06, "loss": 0.2603978872299194, "step": 209990 }, { "epoch": 0.9015738904201335, "grad_norm": 1.891642451286316, "learning_rate": 9.885480713675913e-06, "loss": 0.2281651735305786, "step": 210000 }, { "epoch": 0.9015738904201335, "eval_loss": 0.3739822506904602, "eval_runtime": 27.5102, "eval_samples_per_second": 3.635, "eval_steps_per_second": 3.635, "step": 210000 }, { "epoch": 0.9016168225101534, "grad_norm": 1.4768375158309937, "learning_rate": 9.88116899355829e-06, "loss": 0.15798479318618774, "step": 210010 }, { "epoch": 0.9016597546001734, "grad_norm": 0.018229328095912933, "learning_rate": 9.876857273440666e-06, "loss": 0.0977150857448578, "step": 210020 }, { "epoch": 0.9017026866901935, "grad_norm": 0.001907564583234489, "learning_rate": 9.872545553323043e-06, "loss": 0.04974755644798279, "step": 210030 }, { "epoch": 0.9017456187802134, "grad_norm": 0.035649724304676056, "learning_rate": 9.868233833205419e-06, "loss": 0.1306628942489624, "step": 210040 }, { "epoch": 0.9017885508702335, "grad_norm": 3.1833529472351074, "learning_rate": 9.863922113087795e-06, "loss": 0.14224931001663207, "step": 210050 }, { "epoch": 0.9018314829602535, "grad_norm": 2.376718282699585, "learning_rate": 9.859610392970172e-06, "loss": 0.17341439723968505, "step": 210060 }, { "epoch": 0.9018744150502734, "grad_norm": 2.405601739883423, "learning_rate": 9.855298672852548e-06, "loss": 0.1730472445487976, "step": 210070 }, { "epoch": 0.9019173471402935, "grad_norm": 3.975780487060547, "learning_rate": 9.850986952734925e-06, "loss": 0.18226372003555297, "step": 210080 }, { "epoch": 0.9019602792303135, "grad_norm": 0.012735238298773766, "learning_rate": 9.8466752326173e-06, "loss": 0.2455826759338379, "step": 210090 }, { "epoch": 0.9020032113203335, "grad_norm": 3.667410135269165, "learning_rate": 9.842363512499678e-06, "loss": 0.20534975528717042, "step": 210100 }, { "epoch": 0.9020461434103535, "grad_norm": 0.0019849713426083326, "learning_rate": 9.838051792382054e-06, "loss": 0.18063912391662598, "step": 210110 }, { "epoch": 0.9020890755003735, "grad_norm": 0.21961329877376556, "learning_rate": 9.83374007226443e-06, "loss": 0.23487327098846436, "step": 210120 }, { "epoch": 0.9021320075903935, "grad_norm": 3.4133858680725098, "learning_rate": 9.829428352146807e-06, "loss": 0.20193076133728027, "step": 210130 }, { "epoch": 0.9021749396804135, "grad_norm": 0.020273273810744286, "learning_rate": 9.825116632029182e-06, "loss": 0.18437005281448365, "step": 210140 }, { "epoch": 0.9022178717704336, "grad_norm": 0.00016728635819163173, "learning_rate": 9.82080491191156e-06, "loss": 0.29014551639556885, "step": 210150 }, { "epoch": 0.9022608038604535, "grad_norm": 0.03587735816836357, "learning_rate": 9.816493191793935e-06, "loss": 0.3731184959411621, "step": 210160 }, { "epoch": 0.9023037359504735, "grad_norm": 1.1249128580093384, "learning_rate": 9.812181471676311e-06, "loss": 0.18152229785919188, "step": 210170 }, { "epoch": 0.9023466680404936, "grad_norm": 0.5035809278488159, "learning_rate": 9.807869751558688e-06, "loss": 0.20793728828430175, "step": 210180 }, { "epoch": 0.9023896001305135, "grad_norm": 17.78410530090332, "learning_rate": 9.803558031441064e-06, "loss": 0.21700966358184814, "step": 210190 }, { "epoch": 0.9024325322205335, "grad_norm": 2.60648775100708, "learning_rate": 9.79924631132344e-06, "loss": 0.3313996076583862, "step": 210200 }, { "epoch": 0.9024754643105536, "grad_norm": 0.9339752197265625, "learning_rate": 9.794934591205817e-06, "loss": 0.23482167720794678, "step": 210210 }, { "epoch": 0.9025183964005735, "grad_norm": 0.0010642610723152757, "learning_rate": 9.790622871088193e-06, "loss": 0.12294025421142578, "step": 210220 }, { "epoch": 0.9025613284905936, "grad_norm": 0.041032496839761734, "learning_rate": 9.786311150970568e-06, "loss": 0.06782339811325074, "step": 210230 }, { "epoch": 0.9026042605806136, "grad_norm": 9.69448184967041, "learning_rate": 9.781999430852946e-06, "loss": 0.19246318340301513, "step": 210240 }, { "epoch": 0.9026471926706336, "grad_norm": 0.236909881234169, "learning_rate": 9.777687710735321e-06, "loss": 0.004842896386981011, "step": 210250 }, { "epoch": 0.9026901247606536, "grad_norm": 1.3408589363098145, "learning_rate": 9.773375990617697e-06, "loss": 0.13109498023986815, "step": 210260 }, { "epoch": 0.9027330568506736, "grad_norm": 0.00923833530396223, "learning_rate": 9.769064270500074e-06, "loss": 0.17234526872634887, "step": 210270 }, { "epoch": 0.9027759889406937, "grad_norm": 0.0002061129198409617, "learning_rate": 9.76475255038245e-06, "loss": 0.12138881683349609, "step": 210280 }, { "epoch": 0.9028189210307136, "grad_norm": 0.00944637693464756, "learning_rate": 9.760440830264826e-06, "loss": 0.2013624429702759, "step": 210290 }, { "epoch": 0.9028618531207336, "grad_norm": 1.6908912658691406, "learning_rate": 9.756129110147203e-06, "loss": 0.40554208755493165, "step": 210300 }, { "epoch": 0.9029047852107537, "grad_norm": 0.00027729306020773947, "learning_rate": 9.751817390029579e-06, "loss": 0.25098981857299807, "step": 210310 }, { "epoch": 0.9029477173007736, "grad_norm": 2.2863595485687256, "learning_rate": 9.747505669911954e-06, "loss": 0.36655559539794924, "step": 210320 }, { "epoch": 0.9029906493907937, "grad_norm": 0.005614591762423515, "learning_rate": 9.743193949794332e-06, "loss": 0.18383264541625977, "step": 210330 }, { "epoch": 0.9030335814808137, "grad_norm": 0.0846930667757988, "learning_rate": 9.738882229676707e-06, "loss": 0.17577136754989625, "step": 210340 }, { "epoch": 0.9030765135708336, "grad_norm": 0.006774316541850567, "learning_rate": 9.734570509559083e-06, "loss": 0.10552970170974732, "step": 210350 }, { "epoch": 0.9031194456608537, "grad_norm": 4.395589828491211, "learning_rate": 9.73025878944146e-06, "loss": 0.22056665420532226, "step": 210360 }, { "epoch": 0.9031623777508737, "grad_norm": 0.7510838508605957, "learning_rate": 9.725947069323836e-06, "loss": 0.3048895835876465, "step": 210370 }, { "epoch": 0.9032053098408936, "grad_norm": 10.735414505004883, "learning_rate": 9.721635349206213e-06, "loss": 0.2514230728149414, "step": 210380 }, { "epoch": 0.9032482419309137, "grad_norm": 9.217434883117676, "learning_rate": 9.717323629088589e-06, "loss": 0.20851621627807618, "step": 210390 }, { "epoch": 0.9032911740209337, "grad_norm": 3.4000942707061768, "learning_rate": 9.713011908970964e-06, "loss": 0.10003012418746948, "step": 210400 }, { "epoch": 0.9033341061109537, "grad_norm": 1.2130926847457886, "learning_rate": 9.708700188853342e-06, "loss": 0.1683989644050598, "step": 210410 }, { "epoch": 0.9033770382009737, "grad_norm": 0.11750691384077072, "learning_rate": 9.704388468735717e-06, "loss": 0.06318738460540771, "step": 210420 }, { "epoch": 0.9034199702909937, "grad_norm": 0.0006076234858483076, "learning_rate": 9.700076748618093e-06, "loss": 0.2571662187576294, "step": 210430 }, { "epoch": 0.9034629023810137, "grad_norm": 1.1019152402877808, "learning_rate": 9.69576502850047e-06, "loss": 0.1616765022277832, "step": 210440 }, { "epoch": 0.9035058344710337, "grad_norm": 4.9832892417907715, "learning_rate": 9.691453308382848e-06, "loss": 0.22566719055175782, "step": 210450 }, { "epoch": 0.9035487665610538, "grad_norm": 41.49699783325195, "learning_rate": 9.687141588265223e-06, "loss": 0.3436073064804077, "step": 210460 }, { "epoch": 0.9035916986510737, "grad_norm": 0.0022543699014931917, "learning_rate": 9.682829868147599e-06, "loss": 0.052913957834243776, "step": 210470 }, { "epoch": 0.9036346307410937, "grad_norm": 0.04838306084275246, "learning_rate": 9.678518148029976e-06, "loss": 0.16422059535980224, "step": 210480 }, { "epoch": 0.9036775628311138, "grad_norm": 2.2827861309051514, "learning_rate": 9.674206427912352e-06, "loss": 0.22558994293212892, "step": 210490 }, { "epoch": 0.9037204949211337, "grad_norm": 0.0007248317124322057, "learning_rate": 9.669894707794728e-06, "loss": 0.16456080675125123, "step": 210500 }, { "epoch": 0.9037634270111538, "grad_norm": 0.001483508967794478, "learning_rate": 9.665582987677105e-06, "loss": 0.05367158055305481, "step": 210510 }, { "epoch": 0.9038063591011738, "grad_norm": 0.0016992673045024276, "learning_rate": 9.66127126755948e-06, "loss": 0.32336156368255614, "step": 210520 }, { "epoch": 0.9038492911911937, "grad_norm": 0.013364183716475964, "learning_rate": 9.656959547441858e-06, "loss": 0.18762919902801514, "step": 210530 }, { "epoch": 0.9038922232812138, "grad_norm": 0.0003909058286808431, "learning_rate": 9.652647827324234e-06, "loss": 0.0872824728488922, "step": 210540 }, { "epoch": 0.9039351553712338, "grad_norm": 0.04263646900653839, "learning_rate": 9.64833610720661e-06, "loss": 0.23260953426361083, "step": 210550 }, { "epoch": 0.9039780874612537, "grad_norm": 0.14210690557956696, "learning_rate": 9.644024387088987e-06, "loss": 0.16822166442871095, "step": 210560 }, { "epoch": 0.9040210195512738, "grad_norm": 0.18358734250068665, "learning_rate": 9.639712666971362e-06, "loss": 0.05378770232200623, "step": 210570 }, { "epoch": 0.9040639516412938, "grad_norm": 0.019276317209005356, "learning_rate": 9.635400946853738e-06, "loss": 0.16520183086395263, "step": 210580 }, { "epoch": 0.9041068837313138, "grad_norm": 1.3517718315124512, "learning_rate": 9.631089226736115e-06, "loss": 0.22130036354064941, "step": 210590 }, { "epoch": 0.9041498158213338, "grad_norm": 0.06858087331056595, "learning_rate": 9.626777506618491e-06, "loss": 0.20281364917755126, "step": 210600 }, { "epoch": 0.9041927479113538, "grad_norm": 0.004181146156042814, "learning_rate": 9.622465786500867e-06, "loss": 0.33087265491485596, "step": 210610 }, { "epoch": 0.9042356800013738, "grad_norm": 0.023237429559230804, "learning_rate": 9.618154066383244e-06, "loss": 0.043955230712890626, "step": 210620 }, { "epoch": 0.9042786120913938, "grad_norm": 6.121235370635986, "learning_rate": 9.61384234626562e-06, "loss": 0.1775528073310852, "step": 210630 }, { "epoch": 0.9043215441814139, "grad_norm": 0.0028020916506648064, "learning_rate": 9.609530626147995e-06, "loss": 0.22397143840789796, "step": 210640 }, { "epoch": 0.9043644762714338, "grad_norm": 0.005821366794407368, "learning_rate": 9.605218906030373e-06, "loss": 0.19856249094009398, "step": 210650 }, { "epoch": 0.9044074083614538, "grad_norm": 1.4921270608901978, "learning_rate": 9.600907185912748e-06, "loss": 0.31009862422943113, "step": 210660 }, { "epoch": 0.9044503404514739, "grad_norm": 2.3841261863708496, "learning_rate": 9.596595465795124e-06, "loss": 0.17027231454849243, "step": 210670 }, { "epoch": 0.9044932725414939, "grad_norm": 6.284855365753174, "learning_rate": 9.592283745677501e-06, "loss": 0.1498428463935852, "step": 210680 }, { "epoch": 0.9045362046315139, "grad_norm": 8.141776925185695e-05, "learning_rate": 9.587972025559877e-06, "loss": 0.29921822547912597, "step": 210690 }, { "epoch": 0.9045791367215339, "grad_norm": 1.557852029800415, "learning_rate": 9.583660305442253e-06, "loss": 0.11124341487884522, "step": 210700 }, { "epoch": 0.9046220688115539, "grad_norm": 0.002186469966545701, "learning_rate": 9.57934858532463e-06, "loss": 0.057540792226791385, "step": 210710 }, { "epoch": 0.9046650009015739, "grad_norm": 0.014679819345474243, "learning_rate": 9.575036865207006e-06, "loss": 0.07504054307937622, "step": 210720 }, { "epoch": 0.9047079329915939, "grad_norm": 3.3283891677856445, "learning_rate": 9.570725145089381e-06, "loss": 0.3486778974533081, "step": 210730 }, { "epoch": 0.904750865081614, "grad_norm": 3.6053216457366943, "learning_rate": 9.566413424971759e-06, "loss": 0.08856486082077027, "step": 210740 }, { "epoch": 0.9047937971716339, "grad_norm": 0.024385172873735428, "learning_rate": 9.562101704854134e-06, "loss": 0.17349275350570678, "step": 210750 }, { "epoch": 0.9048367292616539, "grad_norm": 2.2132294178009033, "learning_rate": 9.557789984736512e-06, "loss": 0.123107647895813, "step": 210760 }, { "epoch": 0.904879661351674, "grad_norm": 0.9034229516983032, "learning_rate": 9.553478264618887e-06, "loss": 0.22174947261810302, "step": 210770 }, { "epoch": 0.9049225934416939, "grad_norm": 0.03812728449702263, "learning_rate": 9.549166544501263e-06, "loss": 0.11002792119979858, "step": 210780 }, { "epoch": 0.9049655255317139, "grad_norm": 0.6964645385742188, "learning_rate": 9.54485482438364e-06, "loss": 0.30134100914001466, "step": 210790 }, { "epoch": 0.905008457621734, "grad_norm": 1.9698841571807861, "learning_rate": 9.540543104266016e-06, "loss": 0.11750224828720093, "step": 210800 }, { "epoch": 0.9050513897117539, "grad_norm": 0.021683400496840477, "learning_rate": 9.536231384148393e-06, "loss": 0.157227087020874, "step": 210810 }, { "epoch": 0.905094321801774, "grad_norm": 1.2105274200439453, "learning_rate": 9.531919664030769e-06, "loss": 0.11554534435272217, "step": 210820 }, { "epoch": 0.905137253891794, "grad_norm": 0.0017459297087043524, "learning_rate": 9.527607943913146e-06, "loss": 0.08362597823143006, "step": 210830 }, { "epoch": 0.9051801859818139, "grad_norm": 1.1244165897369385, "learning_rate": 9.523296223795522e-06, "loss": 0.2152784824371338, "step": 210840 }, { "epoch": 0.905223118071834, "grad_norm": 0.003775811055675149, "learning_rate": 9.518984503677897e-06, "loss": 0.1833783507347107, "step": 210850 }, { "epoch": 0.905266050161854, "grad_norm": 2.9542245864868164, "learning_rate": 9.514672783560275e-06, "loss": 0.30322258472442626, "step": 210860 }, { "epoch": 0.905308982251874, "grad_norm": 0.00222378084436059, "learning_rate": 9.51036106344265e-06, "loss": 0.3002403020858765, "step": 210870 }, { "epoch": 0.905351914341894, "grad_norm": 0.02239265665411949, "learning_rate": 9.506049343325026e-06, "loss": 0.027442681789398193, "step": 210880 }, { "epoch": 0.905394846431914, "grad_norm": 0.09150674194097519, "learning_rate": 9.501737623207403e-06, "loss": 0.17352490425109862, "step": 210890 }, { "epoch": 0.905437778521934, "grad_norm": 2.7677178382873535, "learning_rate": 9.497425903089779e-06, "loss": 0.22967958450317383, "step": 210900 }, { "epoch": 0.905480710611954, "grad_norm": 0.7799781560897827, "learning_rate": 9.493114182972156e-06, "loss": 0.14056270122528075, "step": 210910 }, { "epoch": 0.905523642701974, "grad_norm": 1.656139612197876, "learning_rate": 9.488802462854532e-06, "loss": 0.21149537563323975, "step": 210920 }, { "epoch": 0.905566574791994, "grad_norm": 0.04829051345586777, "learning_rate": 9.484490742736908e-06, "loss": 0.2642256259918213, "step": 210930 }, { "epoch": 0.905609506882014, "grad_norm": 0.02896782010793686, "learning_rate": 9.480179022619285e-06, "loss": 0.37795684337615965, "step": 210940 }, { "epoch": 0.9056524389720341, "grad_norm": 0.1285865157842636, "learning_rate": 9.47586730250166e-06, "loss": 0.13206915855407714, "step": 210950 }, { "epoch": 0.905695371062054, "grad_norm": 0.0013368797954171896, "learning_rate": 9.471555582384036e-06, "loss": 0.45220026969909666, "step": 210960 }, { "epoch": 0.905738303152074, "grad_norm": 3.8433640003204346, "learning_rate": 9.467243862266414e-06, "loss": 0.2438734531402588, "step": 210970 }, { "epoch": 0.9057812352420941, "grad_norm": 0.7117074728012085, "learning_rate": 9.46293214214879e-06, "loss": 0.15083757638931275, "step": 210980 }, { "epoch": 0.905824167332114, "grad_norm": 1.0885868072509766, "learning_rate": 9.458620422031165e-06, "loss": 0.39199352264404297, "step": 210990 }, { "epoch": 0.9058670994221341, "grad_norm": 0.20401152968406677, "learning_rate": 9.454308701913542e-06, "loss": 0.21293511390686035, "step": 211000 }, { "epoch": 0.9058670994221341, "eval_loss": 0.37001845240592957, "eval_runtime": 27.3722, "eval_samples_per_second": 3.653, "eval_steps_per_second": 3.653, "step": 211000 }, { "epoch": 0.9059100315121541, "grad_norm": 1.95425546169281, "learning_rate": 9.449996981795918e-06, "loss": 0.20344960689544678, "step": 211010 }, { "epoch": 0.905952963602174, "grad_norm": 0.04089081287384033, "learning_rate": 9.445685261678294e-06, "loss": 0.12624008655548097, "step": 211020 }, { "epoch": 0.9059958956921941, "grad_norm": 0.2765030264854431, "learning_rate": 9.441373541560671e-06, "loss": 0.35709438323974607, "step": 211030 }, { "epoch": 0.9060388277822141, "grad_norm": 0.0029109471943229437, "learning_rate": 9.437061821443047e-06, "loss": 0.086311936378479, "step": 211040 }, { "epoch": 0.906081759872234, "grad_norm": 0.1786552518606186, "learning_rate": 9.432750101325422e-06, "loss": 0.19341384172439574, "step": 211050 }, { "epoch": 0.9061246919622541, "grad_norm": 6.021338939666748, "learning_rate": 9.4284383812078e-06, "loss": 0.10097607374191284, "step": 211060 }, { "epoch": 0.9061676240522741, "grad_norm": 0.04982906952500343, "learning_rate": 9.424126661090175e-06, "loss": 0.04032069146633148, "step": 211070 }, { "epoch": 0.9062105561422941, "grad_norm": 0.0056654238142073154, "learning_rate": 9.419814940972551e-06, "loss": 0.07370581030845642, "step": 211080 }, { "epoch": 0.9062534882323141, "grad_norm": 0.0005656041321344674, "learning_rate": 9.415503220854928e-06, "loss": 0.35269713401794434, "step": 211090 }, { "epoch": 0.9062964203223342, "grad_norm": 0.04776056110858917, "learning_rate": 9.411191500737304e-06, "loss": 0.2374497890472412, "step": 211100 }, { "epoch": 0.9063393524123542, "grad_norm": 0.15905514359474182, "learning_rate": 9.40687978061968e-06, "loss": 0.17020422220230103, "step": 211110 }, { "epoch": 0.9063822845023741, "grad_norm": 0.006161512341350317, "learning_rate": 9.402568060502057e-06, "loss": 0.1747220277786255, "step": 211120 }, { "epoch": 0.9064252165923942, "grad_norm": 6.532177925109863, "learning_rate": 9.398256340384433e-06, "loss": 0.2477626085281372, "step": 211130 }, { "epoch": 0.9064681486824142, "grad_norm": 0.21067188680171967, "learning_rate": 9.39394462026681e-06, "loss": 0.07029619812965393, "step": 211140 }, { "epoch": 0.9065110807724341, "grad_norm": 3.6878209114074707, "learning_rate": 9.389632900149186e-06, "loss": 0.22565534114837646, "step": 211150 }, { "epoch": 0.9065540128624542, "grad_norm": 0.0373564250767231, "learning_rate": 9.385321180031561e-06, "loss": 0.38879761695861814, "step": 211160 }, { "epoch": 0.9065969449524742, "grad_norm": 2.5509607791900635, "learning_rate": 9.381009459913939e-06, "loss": 0.28637146949768066, "step": 211170 }, { "epoch": 0.9066398770424942, "grad_norm": 1.6686248779296875, "learning_rate": 9.376697739796316e-06, "loss": 0.28002395629882815, "step": 211180 }, { "epoch": 0.9066828091325142, "grad_norm": 0.0021680134814232588, "learning_rate": 9.372386019678692e-06, "loss": 0.445535945892334, "step": 211190 }, { "epoch": 0.9067257412225342, "grad_norm": 0.0014827148988842964, "learning_rate": 9.368074299561067e-06, "loss": 0.09994487166404724, "step": 211200 }, { "epoch": 0.9067686733125542, "grad_norm": 0.12028845399618149, "learning_rate": 9.363762579443445e-06, "loss": 0.26585865020751953, "step": 211210 }, { "epoch": 0.9068116054025742, "grad_norm": 0.941667914390564, "learning_rate": 9.35945085932582e-06, "loss": 0.39935662746429446, "step": 211220 }, { "epoch": 0.9068545374925943, "grad_norm": 3.0790231227874756, "learning_rate": 9.355139139208196e-06, "loss": 0.1506575345993042, "step": 211230 }, { "epoch": 0.9068974695826142, "grad_norm": 2.296433687210083, "learning_rate": 9.350827419090573e-06, "loss": 0.15209686756134033, "step": 211240 }, { "epoch": 0.9069404016726342, "grad_norm": 0.02622704952955246, "learning_rate": 9.346515698972949e-06, "loss": 0.11668375730514527, "step": 211250 }, { "epoch": 0.9069833337626543, "grad_norm": 0.0014148653717711568, "learning_rate": 9.342203978855326e-06, "loss": 0.27220356464385986, "step": 211260 }, { "epoch": 0.9070262658526742, "grad_norm": 0.12697049975395203, "learning_rate": 9.337892258737702e-06, "loss": 0.23106029033660888, "step": 211270 }, { "epoch": 0.9070691979426942, "grad_norm": 0.04394913837313652, "learning_rate": 9.333580538620077e-06, "loss": 0.15961995124816894, "step": 211280 }, { "epoch": 0.9071121300327143, "grad_norm": 3.6546521186828613, "learning_rate": 9.329268818502455e-06, "loss": 0.2648077249526978, "step": 211290 }, { "epoch": 0.9071550621227342, "grad_norm": 2.3274571895599365, "learning_rate": 9.32495709838483e-06, "loss": 0.2588292837142944, "step": 211300 }, { "epoch": 0.9071979942127543, "grad_norm": 0.2122945338487625, "learning_rate": 9.320645378267206e-06, "loss": 0.2704058885574341, "step": 211310 }, { "epoch": 0.9072409263027743, "grad_norm": 0.0172574520111084, "learning_rate": 9.316333658149583e-06, "loss": 0.34654572010040285, "step": 211320 }, { "epoch": 0.9072838583927942, "grad_norm": 0.005942110437899828, "learning_rate": 9.312021938031959e-06, "loss": 0.14186539649963378, "step": 211330 }, { "epoch": 0.9073267904828143, "grad_norm": 0.016240037977695465, "learning_rate": 9.307710217914335e-06, "loss": 0.07858587503433227, "step": 211340 }, { "epoch": 0.9073697225728343, "grad_norm": 0.010405509732663631, "learning_rate": 9.303398497796712e-06, "loss": 0.18252902030944823, "step": 211350 }, { "epoch": 0.9074126546628543, "grad_norm": 0.0003283452242612839, "learning_rate": 9.299086777679088e-06, "loss": 0.21496164798736572, "step": 211360 }, { "epoch": 0.9074555867528743, "grad_norm": 0.1149737760424614, "learning_rate": 9.294775057561463e-06, "loss": 0.21276600360870362, "step": 211370 }, { "epoch": 0.9074985188428943, "grad_norm": 1.8674908876419067, "learning_rate": 9.29046333744384e-06, "loss": 0.2720707178115845, "step": 211380 }, { "epoch": 0.9075414509329143, "grad_norm": 0.16619011759757996, "learning_rate": 9.286151617326216e-06, "loss": 0.14984419345855712, "step": 211390 }, { "epoch": 0.9075843830229343, "grad_norm": 2.006049156188965, "learning_rate": 9.281839897208592e-06, "loss": 0.11598966121673585, "step": 211400 }, { "epoch": 0.9076273151129544, "grad_norm": 0.18155886232852936, "learning_rate": 9.27752817709097e-06, "loss": 0.24428968429565429, "step": 211410 }, { "epoch": 0.9076702472029743, "grad_norm": 0.004149468149989843, "learning_rate": 9.273216456973345e-06, "loss": 0.16037646532058716, "step": 211420 }, { "epoch": 0.9077131792929943, "grad_norm": 0.004715626128017902, "learning_rate": 9.26890473685572e-06, "loss": 0.13179171085357666, "step": 211430 }, { "epoch": 0.9077561113830144, "grad_norm": 0.010400134138762951, "learning_rate": 9.264593016738098e-06, "loss": 0.03884969651699066, "step": 211440 }, { "epoch": 0.9077990434730343, "grad_norm": 0.022108979523181915, "learning_rate": 9.260281296620474e-06, "loss": 0.3082466125488281, "step": 211450 }, { "epoch": 0.9078419755630543, "grad_norm": 3.668081045150757, "learning_rate": 9.25596957650285e-06, "loss": 0.19297831058502196, "step": 211460 }, { "epoch": 0.9078849076530744, "grad_norm": 1.232636570930481, "learning_rate": 9.251657856385227e-06, "loss": 0.372821044921875, "step": 211470 }, { "epoch": 0.9079278397430943, "grad_norm": 1.6673029661178589, "learning_rate": 9.247346136267602e-06, "loss": 0.19048078060150148, "step": 211480 }, { "epoch": 0.9079707718331144, "grad_norm": 0.03407606855034828, "learning_rate": 9.243034416149978e-06, "loss": 0.17276411056518554, "step": 211490 }, { "epoch": 0.9080137039231344, "grad_norm": 0.004653999116271734, "learning_rate": 9.238722696032355e-06, "loss": 0.18268084526062012, "step": 211500 }, { "epoch": 0.9080566360131543, "grad_norm": 0.010216489434242249, "learning_rate": 9.234410975914731e-06, "loss": 0.2223001480102539, "step": 211510 }, { "epoch": 0.9080995681031744, "grad_norm": 0.30999666452407837, "learning_rate": 9.230099255797108e-06, "loss": 0.3233329296112061, "step": 211520 }, { "epoch": 0.9081425001931944, "grad_norm": 0.0008284636423923075, "learning_rate": 9.225787535679484e-06, "loss": 0.05378011465072632, "step": 211530 }, { "epoch": 0.9081854322832145, "grad_norm": 0.007144168484956026, "learning_rate": 9.221475815561861e-06, "loss": 0.09841606616973878, "step": 211540 }, { "epoch": 0.9082283643732344, "grad_norm": 0.050571274012327194, "learning_rate": 9.217164095444237e-06, "loss": 0.162974750995636, "step": 211550 }, { "epoch": 0.9082712964632544, "grad_norm": 0.004725494422018528, "learning_rate": 9.212852375326614e-06, "loss": 0.2045898199081421, "step": 211560 }, { "epoch": 0.9083142285532745, "grad_norm": 0.09760285913944244, "learning_rate": 9.20854065520899e-06, "loss": 0.1423335075378418, "step": 211570 }, { "epoch": 0.9083571606432944, "grad_norm": 1.1642075777053833, "learning_rate": 9.204228935091366e-06, "loss": 0.06477875113487244, "step": 211580 }, { "epoch": 0.9084000927333145, "grad_norm": 0.21785888075828552, "learning_rate": 9.199917214973743e-06, "loss": 0.22252657413482665, "step": 211590 }, { "epoch": 0.9084430248233345, "grad_norm": 0.015162704512476921, "learning_rate": 9.195605494856119e-06, "loss": 0.2382740259170532, "step": 211600 }, { "epoch": 0.9084859569133544, "grad_norm": 0.0008446506108157337, "learning_rate": 9.191293774738494e-06, "loss": 0.16340986490249634, "step": 211610 }, { "epoch": 0.9085288890033745, "grad_norm": 0.006048992741852999, "learning_rate": 9.186982054620872e-06, "loss": 0.09929171800613404, "step": 211620 }, { "epoch": 0.9085718210933945, "grad_norm": 0.021925954148173332, "learning_rate": 9.182670334503247e-06, "loss": 0.12175835371017456, "step": 211630 }, { "epoch": 0.9086147531834144, "grad_norm": 0.0023411933798342943, "learning_rate": 9.178358614385625e-06, "loss": 0.1825516939163208, "step": 211640 }, { "epoch": 0.9086576852734345, "grad_norm": 0.113210529088974, "learning_rate": 9.174046894268e-06, "loss": 0.23193886280059814, "step": 211650 }, { "epoch": 0.9087006173634545, "grad_norm": 0.00030858899117447436, "learning_rate": 9.169735174150376e-06, "loss": 0.22229115962982177, "step": 211660 }, { "epoch": 0.9087435494534745, "grad_norm": 0.0011662208708003163, "learning_rate": 9.165423454032753e-06, "loss": 0.15364946126937867, "step": 211670 }, { "epoch": 0.9087864815434945, "grad_norm": 4.0690460205078125, "learning_rate": 9.161111733915129e-06, "loss": 0.18665337562561035, "step": 211680 }, { "epoch": 0.9088294136335145, "grad_norm": 0.00017444766126573086, "learning_rate": 9.156800013797504e-06, "loss": 0.1485775351524353, "step": 211690 }, { "epoch": 0.9088723457235345, "grad_norm": 0.768441379070282, "learning_rate": 9.152488293679882e-06, "loss": 0.14072189331054688, "step": 211700 }, { "epoch": 0.9089152778135545, "grad_norm": 0.07776512950658798, "learning_rate": 9.148176573562257e-06, "loss": 0.25699448585510254, "step": 211710 }, { "epoch": 0.9089582099035746, "grad_norm": 0.013703049160540104, "learning_rate": 9.143864853444633e-06, "loss": 0.21622865200042723, "step": 211720 }, { "epoch": 0.9090011419935945, "grad_norm": 0.17358940839767456, "learning_rate": 9.13955313332701e-06, "loss": 0.10401270389556885, "step": 211730 }, { "epoch": 0.9090440740836145, "grad_norm": 0.0002648585650604218, "learning_rate": 9.135241413209386e-06, "loss": 0.1151541829109192, "step": 211740 }, { "epoch": 0.9090870061736346, "grad_norm": 4.667215824127197, "learning_rate": 9.130929693091762e-06, "loss": 0.23117105960845946, "step": 211750 }, { "epoch": 0.9091299382636545, "grad_norm": 1.013177514076233, "learning_rate": 9.126617972974139e-06, "loss": 0.23333847522735596, "step": 211760 }, { "epoch": 0.9091728703536746, "grad_norm": 1.5600218772888184, "learning_rate": 9.122306252856515e-06, "loss": 0.15483250617980956, "step": 211770 }, { "epoch": 0.9092158024436946, "grad_norm": 0.0004753020766656846, "learning_rate": 9.11799453273889e-06, "loss": 0.2777076244354248, "step": 211780 }, { "epoch": 0.9092587345337145, "grad_norm": 0.014906748197972775, "learning_rate": 9.113682812621268e-06, "loss": 0.14254034757614137, "step": 211790 }, { "epoch": 0.9093016666237346, "grad_norm": 0.22179877758026123, "learning_rate": 9.109371092503643e-06, "loss": 0.14603278636932374, "step": 211800 }, { "epoch": 0.9093445987137546, "grad_norm": 0.06097375229001045, "learning_rate": 9.105059372386019e-06, "loss": 0.18807802200317383, "step": 211810 }, { "epoch": 0.9093875308037745, "grad_norm": 0.002239939058199525, "learning_rate": 9.100747652268396e-06, "loss": 0.15530016422271728, "step": 211820 }, { "epoch": 0.9094304628937946, "grad_norm": 51.984317779541016, "learning_rate": 9.096435932150772e-06, "loss": 0.12407512664794922, "step": 211830 }, { "epoch": 0.9094733949838146, "grad_norm": 7.28548526763916, "learning_rate": 9.092124212033148e-06, "loss": 0.21014230251312255, "step": 211840 }, { "epoch": 0.9095163270738346, "grad_norm": 0.004259404726326466, "learning_rate": 9.087812491915525e-06, "loss": 0.1882183074951172, "step": 211850 }, { "epoch": 0.9095592591638546, "grad_norm": 0.014772419817745686, "learning_rate": 9.0835007717979e-06, "loss": 0.147011661529541, "step": 211860 }, { "epoch": 0.9096021912538746, "grad_norm": 0.49753573536872864, "learning_rate": 9.079189051680276e-06, "loss": 0.29521842002868653, "step": 211870 }, { "epoch": 0.9096451233438946, "grad_norm": 0.015458498150110245, "learning_rate": 9.074877331562654e-06, "loss": 0.1639685869216919, "step": 211880 }, { "epoch": 0.9096880554339146, "grad_norm": 2.9252068996429443, "learning_rate": 9.07056561144503e-06, "loss": 0.18361132144927977, "step": 211890 }, { "epoch": 0.9097309875239347, "grad_norm": 0.1760420799255371, "learning_rate": 9.066253891327407e-06, "loss": 0.11042402982711792, "step": 211900 }, { "epoch": 0.9097739196139546, "grad_norm": 0.00045009804307483137, "learning_rate": 9.061942171209784e-06, "loss": 0.2616447925567627, "step": 211910 }, { "epoch": 0.9098168517039746, "grad_norm": 0.006046078633517027, "learning_rate": 9.05763045109216e-06, "loss": 0.06336274743080139, "step": 211920 }, { "epoch": 0.9098597837939947, "grad_norm": 0.051460232585668564, "learning_rate": 9.053318730974535e-06, "loss": 0.18316928148269654, "step": 211930 }, { "epoch": 0.9099027158840146, "grad_norm": 2.8884639739990234, "learning_rate": 9.049007010856913e-06, "loss": 0.13370351791381835, "step": 211940 }, { "epoch": 0.9099456479740347, "grad_norm": 0.01390179991722107, "learning_rate": 9.044695290739288e-06, "loss": 0.3180847644805908, "step": 211950 }, { "epoch": 0.9099885800640547, "grad_norm": 1.4461475610733032, "learning_rate": 9.040383570621664e-06, "loss": 0.12614543437957765, "step": 211960 }, { "epoch": 0.9100315121540747, "grad_norm": 0.0034605904947966337, "learning_rate": 9.036071850504041e-06, "loss": 0.2389918804168701, "step": 211970 }, { "epoch": 0.9100744442440947, "grad_norm": 0.00030550433439202607, "learning_rate": 9.031760130386417e-06, "loss": 0.04597398638725281, "step": 211980 }, { "epoch": 0.9101173763341147, "grad_norm": 0.091501384973526, "learning_rate": 9.027448410268793e-06, "loss": 0.023420873284339904, "step": 211990 }, { "epoch": 0.9101603084241348, "grad_norm": 0.005600926466286182, "learning_rate": 9.02313669015117e-06, "loss": 0.07838650941848754, "step": 212000 }, { "epoch": 0.9101603084241348, "eval_loss": 0.37726593017578125, "eval_runtime": 27.4014, "eval_samples_per_second": 3.649, "eval_steps_per_second": 3.649, "step": 212000 }, { "epoch": 0.9102032405141547, "grad_norm": 0.11374276131391525, "learning_rate": 9.018824970033546e-06, "loss": 0.15132025480270386, "step": 212010 }, { "epoch": 0.9102461726041747, "grad_norm": 2.1611249446868896, "learning_rate": 9.014513249915923e-06, "loss": 0.21653666496276855, "step": 212020 }, { "epoch": 0.9102891046941948, "grad_norm": 0.1119178906083107, "learning_rate": 9.010201529798299e-06, "loss": 0.19596610069274903, "step": 212030 }, { "epoch": 0.9103320367842147, "grad_norm": 0.03157566860318184, "learning_rate": 9.005889809680674e-06, "loss": 0.17952189445495606, "step": 212040 }, { "epoch": 0.9103749688742347, "grad_norm": 0.029103923588991165, "learning_rate": 9.001578089563052e-06, "loss": 0.17300734519958497, "step": 212050 }, { "epoch": 0.9104179009642548, "grad_norm": 0.058678023517131805, "learning_rate": 8.997266369445427e-06, "loss": 0.05254848003387451, "step": 212060 }, { "epoch": 0.9104608330542747, "grad_norm": 2.070000648498535, "learning_rate": 8.992954649327803e-06, "loss": 0.2808130502700806, "step": 212070 }, { "epoch": 0.9105037651442948, "grad_norm": 0.09030576795339584, "learning_rate": 8.98864292921018e-06, "loss": 0.17629364728927613, "step": 212080 }, { "epoch": 0.9105466972343148, "grad_norm": 1.5905768871307373, "learning_rate": 8.984331209092556e-06, "loss": 0.3039500951766968, "step": 212090 }, { "epoch": 0.9105896293243347, "grad_norm": 0.016770660877227783, "learning_rate": 8.980019488974931e-06, "loss": 0.12196837663650513, "step": 212100 }, { "epoch": 0.9106325614143548, "grad_norm": 0.0024727019481360912, "learning_rate": 8.975707768857309e-06, "loss": 0.18381850719451903, "step": 212110 }, { "epoch": 0.9106754935043748, "grad_norm": 0.027349425479769707, "learning_rate": 8.971396048739684e-06, "loss": 0.16867611408233643, "step": 212120 }, { "epoch": 0.9107184255943948, "grad_norm": 0.14424623548984528, "learning_rate": 8.96708432862206e-06, "loss": 0.18001662492752074, "step": 212130 }, { "epoch": 0.9107613576844148, "grad_norm": 0.02970905415713787, "learning_rate": 8.962772608504437e-06, "loss": 0.21140286922454835, "step": 212140 }, { "epoch": 0.9108042897744348, "grad_norm": 1.0737354755401611, "learning_rate": 8.958460888386813e-06, "loss": 0.2667649269104004, "step": 212150 }, { "epoch": 0.9108472218644548, "grad_norm": 0.9957414269447327, "learning_rate": 8.954149168269189e-06, "loss": 0.2296832323074341, "step": 212160 }, { "epoch": 0.9108901539544748, "grad_norm": 0.07176550477743149, "learning_rate": 8.949837448151566e-06, "loss": 0.07566173076629638, "step": 212170 }, { "epoch": 0.9109330860444949, "grad_norm": 28.415855407714844, "learning_rate": 8.945525728033942e-06, "loss": 0.10729323625564575, "step": 212180 }, { "epoch": 0.9109760181345148, "grad_norm": 0.0053515927866101265, "learning_rate": 8.941214007916317e-06, "loss": 0.031461399793624875, "step": 212190 }, { "epoch": 0.9110189502245348, "grad_norm": 3.31968092918396, "learning_rate": 8.936902287798695e-06, "loss": 0.0983917772769928, "step": 212200 }, { "epoch": 0.9110618823145549, "grad_norm": 0.0015259032370522618, "learning_rate": 8.93259056768107e-06, "loss": 0.049478965997695926, "step": 212210 }, { "epoch": 0.9111048144045748, "grad_norm": 7.493229866027832, "learning_rate": 8.928278847563446e-06, "loss": 0.11486443281173705, "step": 212220 }, { "epoch": 0.9111477464945948, "grad_norm": 0.7252117991447449, "learning_rate": 8.923967127445823e-06, "loss": 0.25599918365478513, "step": 212230 }, { "epoch": 0.9111906785846149, "grad_norm": 0.6110628843307495, "learning_rate": 8.919655407328199e-06, "loss": 0.2323148250579834, "step": 212240 }, { "epoch": 0.9112336106746348, "grad_norm": 15.844795227050781, "learning_rate": 8.915343687210576e-06, "loss": 0.20794005393981935, "step": 212250 }, { "epoch": 0.9112765427646549, "grad_norm": 1.656082034111023, "learning_rate": 8.911031967092954e-06, "loss": 0.14436640739440917, "step": 212260 }, { "epoch": 0.9113194748546749, "grad_norm": 0.0013086560647934675, "learning_rate": 8.90672024697533e-06, "loss": 0.35257253646850584, "step": 212270 }, { "epoch": 0.9113624069446948, "grad_norm": 0.011829572729766369, "learning_rate": 8.902408526857705e-06, "loss": 0.13194645643234254, "step": 212280 }, { "epoch": 0.9114053390347149, "grad_norm": 3.3504648208618164, "learning_rate": 8.898096806740082e-06, "loss": 0.27801511287689207, "step": 212290 }, { "epoch": 0.9114482711247349, "grad_norm": 0.08066578209400177, "learning_rate": 8.893785086622458e-06, "loss": 0.1585480809211731, "step": 212300 }, { "epoch": 0.9114912032147549, "grad_norm": 0.1084263026714325, "learning_rate": 8.889473366504834e-06, "loss": 0.1131742000579834, "step": 212310 }, { "epoch": 0.9115341353047749, "grad_norm": 0.308685302734375, "learning_rate": 8.885161646387211e-06, "loss": 0.1919371724128723, "step": 212320 }, { "epoch": 0.9115770673947949, "grad_norm": 0.10141505300998688, "learning_rate": 8.880849926269587e-06, "loss": 0.11400158405303955, "step": 212330 }, { "epoch": 0.9116199994848149, "grad_norm": 0.0039230696856975555, "learning_rate": 8.876538206151962e-06, "loss": 0.05479676723480224, "step": 212340 }, { "epoch": 0.9116629315748349, "grad_norm": 0.024995839223265648, "learning_rate": 8.87222648603434e-06, "loss": 0.15745952129364013, "step": 212350 }, { "epoch": 0.911705863664855, "grad_norm": 0.0010867923265323043, "learning_rate": 8.867914765916715e-06, "loss": 0.3819491624832153, "step": 212360 }, { "epoch": 0.9117487957548749, "grad_norm": 1.1967419385910034, "learning_rate": 8.863603045799091e-06, "loss": 0.475917911529541, "step": 212370 }, { "epoch": 0.9117917278448949, "grad_norm": 1.2156577110290527, "learning_rate": 8.859291325681468e-06, "loss": 0.24176313877105712, "step": 212380 }, { "epoch": 0.911834659934915, "grad_norm": 0.0021465690806508064, "learning_rate": 8.854979605563844e-06, "loss": 0.23308084011077881, "step": 212390 }, { "epoch": 0.911877592024935, "grad_norm": 2.2919228076934814, "learning_rate": 8.850667885446221e-06, "loss": 0.11274752616882325, "step": 212400 }, { "epoch": 0.911920524114955, "grad_norm": 3.1902475357055664, "learning_rate": 8.846356165328597e-06, "loss": 0.16625409126281737, "step": 212410 }, { "epoch": 0.911963456204975, "grad_norm": 0.0027463510632514954, "learning_rate": 8.842044445210973e-06, "loss": 0.1264907717704773, "step": 212420 }, { "epoch": 0.912006388294995, "grad_norm": 1.7046873569488525, "learning_rate": 8.83773272509335e-06, "loss": 0.22326078414916992, "step": 212430 }, { "epoch": 0.912049320385015, "grad_norm": 0.041218217462301254, "learning_rate": 8.833421004975726e-06, "loss": 0.26831223964691164, "step": 212440 }, { "epoch": 0.912092252475035, "grad_norm": 6.816025733947754, "learning_rate": 8.829109284858101e-06, "loss": 0.36359567642211915, "step": 212450 }, { "epoch": 0.912135184565055, "grad_norm": 0.5391436815261841, "learning_rate": 8.824797564740479e-06, "loss": 0.19195263385772704, "step": 212460 }, { "epoch": 0.912178116655075, "grad_norm": 1.583741307258606, "learning_rate": 8.820485844622854e-06, "loss": 0.35937676429748533, "step": 212470 }, { "epoch": 0.912221048745095, "grad_norm": 2.529594898223877, "learning_rate": 8.81617412450523e-06, "loss": 0.18012168407440185, "step": 212480 }, { "epoch": 0.9122639808351151, "grad_norm": 3.7486445903778076, "learning_rate": 8.811862404387607e-06, "loss": 0.21492357254028321, "step": 212490 }, { "epoch": 0.912306912925135, "grad_norm": 0.006154716946184635, "learning_rate": 8.807550684269983e-06, "loss": 0.16618551015853883, "step": 212500 }, { "epoch": 0.912349845015155, "grad_norm": 0.2932658791542053, "learning_rate": 8.803238964152359e-06, "loss": 0.08020783066749573, "step": 212510 }, { "epoch": 0.9123927771051751, "grad_norm": 0.908278226852417, "learning_rate": 8.798927244034736e-06, "loss": 0.0777955710887909, "step": 212520 }, { "epoch": 0.912435709195195, "grad_norm": 0.0029801647178828716, "learning_rate": 8.794615523917112e-06, "loss": 0.02414112240076065, "step": 212530 }, { "epoch": 0.912478641285215, "grad_norm": 0.0029607617761939764, "learning_rate": 8.790303803799487e-06, "loss": 0.24954428672790527, "step": 212540 }, { "epoch": 0.9125215733752351, "grad_norm": 1.418501377105713, "learning_rate": 8.785992083681864e-06, "loss": 0.06055132150650024, "step": 212550 }, { "epoch": 0.912564505465255, "grad_norm": 1.9168803691864014, "learning_rate": 8.78168036356424e-06, "loss": 0.17470144033432006, "step": 212560 }, { "epoch": 0.9126074375552751, "grad_norm": 0.02751356177031994, "learning_rate": 8.777368643446616e-06, "loss": 0.0252609521150589, "step": 212570 }, { "epoch": 0.9126503696452951, "grad_norm": 4.568458080291748, "learning_rate": 8.773056923328993e-06, "loss": 0.14840620756149292, "step": 212580 }, { "epoch": 0.912693301735315, "grad_norm": 2.3897361755371094, "learning_rate": 8.768745203211369e-06, "loss": 0.18637797832489014, "step": 212590 }, { "epoch": 0.9127362338253351, "grad_norm": 0.04232500493526459, "learning_rate": 8.764433483093744e-06, "loss": 0.16503369808197021, "step": 212600 }, { "epoch": 0.9127791659153551, "grad_norm": 1.7149666547775269, "learning_rate": 8.760121762976122e-06, "loss": 0.13525526523590087, "step": 212610 }, { "epoch": 0.9128220980053751, "grad_norm": 2.110562324523926, "learning_rate": 8.755810042858497e-06, "loss": 0.13837047815322875, "step": 212620 }, { "epoch": 0.9128650300953951, "grad_norm": 9.753104209899902, "learning_rate": 8.751498322740875e-06, "loss": 0.30054826736450196, "step": 212630 }, { "epoch": 0.9129079621854151, "grad_norm": 3.0043880939483643, "learning_rate": 8.747186602623252e-06, "loss": 0.11299515962600708, "step": 212640 }, { "epoch": 0.9129508942754351, "grad_norm": 0.38159698247909546, "learning_rate": 8.742874882505628e-06, "loss": 0.06615483164787292, "step": 212650 }, { "epoch": 0.9129938263654551, "grad_norm": 0.0005800298531539738, "learning_rate": 8.738563162388003e-06, "loss": 0.18061435222625732, "step": 212660 }, { "epoch": 0.9130367584554752, "grad_norm": 0.24667766690254211, "learning_rate": 8.73425144227038e-06, "loss": 0.1562572717666626, "step": 212670 }, { "epoch": 0.9130796905454951, "grad_norm": 0.057288434356451035, "learning_rate": 8.729939722152756e-06, "loss": 0.33345324993133546, "step": 212680 }, { "epoch": 0.9131226226355151, "grad_norm": 1.1795369386672974, "learning_rate": 8.725628002035132e-06, "loss": 0.3631131172180176, "step": 212690 }, { "epoch": 0.9131655547255352, "grad_norm": 0.0925176739692688, "learning_rate": 8.72131628191751e-06, "loss": 0.06134541034698486, "step": 212700 }, { "epoch": 0.9132084868155551, "grad_norm": 2.638174057006836, "learning_rate": 8.717004561799885e-06, "loss": 0.14020242691040039, "step": 212710 }, { "epoch": 0.9132514189055752, "grad_norm": 0.9038133025169373, "learning_rate": 8.71269284168226e-06, "loss": 0.08528336286544799, "step": 212720 }, { "epoch": 0.9132943509955952, "grad_norm": 0.0008694896241649985, "learning_rate": 8.708381121564638e-06, "loss": 0.053201431035995485, "step": 212730 }, { "epoch": 0.9133372830856151, "grad_norm": 2.8368773460388184, "learning_rate": 8.704069401447014e-06, "loss": 0.29605350494384763, "step": 212740 }, { "epoch": 0.9133802151756352, "grad_norm": 0.001284956349991262, "learning_rate": 8.69975768132939e-06, "loss": 0.17406622171401978, "step": 212750 }, { "epoch": 0.9134231472656552, "grad_norm": 0.0011331437854096293, "learning_rate": 8.695445961211767e-06, "loss": 0.20874719619750975, "step": 212760 }, { "epoch": 0.9134660793556751, "grad_norm": 0.1996951401233673, "learning_rate": 8.691134241094142e-06, "loss": 0.13332525491714478, "step": 212770 }, { "epoch": 0.9135090114456952, "grad_norm": 0.0020294704008847475, "learning_rate": 8.68682252097652e-06, "loss": 0.10547182559967042, "step": 212780 }, { "epoch": 0.9135519435357152, "grad_norm": 0.0018016091780737042, "learning_rate": 8.682510800858895e-06, "loss": 0.2505123376846313, "step": 212790 }, { "epoch": 0.9135948756257352, "grad_norm": 1.4915944337844849, "learning_rate": 8.678199080741271e-06, "loss": 0.20534911155700683, "step": 212800 }, { "epoch": 0.9136378077157552, "grad_norm": 0.02371162362396717, "learning_rate": 8.673887360623648e-06, "loss": 0.0034074489027261733, "step": 212810 }, { "epoch": 0.9136807398057752, "grad_norm": 0.0006841256399638951, "learning_rate": 8.669575640506024e-06, "loss": 0.19988794326782228, "step": 212820 }, { "epoch": 0.9137236718957953, "grad_norm": 0.1977357715368271, "learning_rate": 8.6652639203884e-06, "loss": 0.1504676103591919, "step": 212830 }, { "epoch": 0.9137666039858152, "grad_norm": 1.7436408996582031, "learning_rate": 8.660952200270777e-06, "loss": 0.2765189170837402, "step": 212840 }, { "epoch": 0.9138095360758353, "grad_norm": 0.013714958913624287, "learning_rate": 8.656640480153153e-06, "loss": 0.12662038803100586, "step": 212850 }, { "epoch": 0.9138524681658553, "grad_norm": 5.23076868057251, "learning_rate": 8.652328760035528e-06, "loss": 0.1594509482383728, "step": 212860 }, { "epoch": 0.9138954002558752, "grad_norm": 0.027277108281850815, "learning_rate": 8.648017039917906e-06, "loss": 0.1075689435005188, "step": 212870 }, { "epoch": 0.9139383323458953, "grad_norm": 1.4043325185775757, "learning_rate": 8.643705319800281e-06, "loss": 0.25166323184967043, "step": 212880 }, { "epoch": 0.9139812644359153, "grad_norm": 0.04628121107816696, "learning_rate": 8.639393599682657e-06, "loss": 0.004422901198267937, "step": 212890 }, { "epoch": 0.9140241965259353, "grad_norm": 0.02353578992187977, "learning_rate": 8.635081879565034e-06, "loss": 0.2293551445007324, "step": 212900 }, { "epoch": 0.9140671286159553, "grad_norm": 3.4110593795776367, "learning_rate": 8.63077015944741e-06, "loss": 0.29389586448669436, "step": 212910 }, { "epoch": 0.9141100607059753, "grad_norm": 0.4436527192592621, "learning_rate": 8.626458439329786e-06, "loss": 0.06104323863983154, "step": 212920 }, { "epoch": 0.9141529927959953, "grad_norm": 0.06245763599872589, "learning_rate": 8.622146719212163e-06, "loss": 0.18613338470458984, "step": 212930 }, { "epoch": 0.9141959248860153, "grad_norm": 1.623071551322937, "learning_rate": 8.617834999094539e-06, "loss": 0.1850024938583374, "step": 212940 }, { "epoch": 0.9142388569760354, "grad_norm": 0.0883312001824379, "learning_rate": 8.613523278976914e-06, "loss": 0.1974207043647766, "step": 212950 }, { "epoch": 0.9142817890660553, "grad_norm": 0.5963457822799683, "learning_rate": 8.609211558859292e-06, "loss": 0.24655821323394775, "step": 212960 }, { "epoch": 0.9143247211560753, "grad_norm": 1.2836028337478638, "learning_rate": 8.604899838741667e-06, "loss": 0.23074700832366943, "step": 212970 }, { "epoch": 0.9143676532460954, "grad_norm": 1.9236855506896973, "learning_rate": 8.600588118624043e-06, "loss": 0.16037309169769287, "step": 212980 }, { "epoch": 0.9144105853361153, "grad_norm": 0.0057564787566661835, "learning_rate": 8.596276398506422e-06, "loss": 0.31025793552398684, "step": 212990 }, { "epoch": 0.9144535174261353, "grad_norm": 2.740952968597412, "learning_rate": 8.591964678388797e-06, "loss": 0.2665108680725098, "step": 213000 }, { "epoch": 0.9144535174261353, "eval_loss": 0.37361475825309753, "eval_runtime": 27.4134, "eval_samples_per_second": 3.648, "eval_steps_per_second": 3.648, "step": 213000 }, { "epoch": 0.9144964495161554, "grad_norm": 1.782199501991272, "learning_rate": 8.587652958271173e-06, "loss": 0.2488037109375, "step": 213010 }, { "epoch": 0.9145393816061753, "grad_norm": 1.5992531776428223, "learning_rate": 8.58334123815355e-06, "loss": 0.34330708980560304, "step": 213020 }, { "epoch": 0.9145823136961954, "grad_norm": 0.001826804713346064, "learning_rate": 8.579029518035926e-06, "loss": 0.2511431217193604, "step": 213030 }, { "epoch": 0.9146252457862154, "grad_norm": 0.2493457943201065, "learning_rate": 8.574717797918302e-06, "loss": 0.2527246713638306, "step": 213040 }, { "epoch": 0.9146681778762353, "grad_norm": 0.4726060628890991, "learning_rate": 8.570406077800679e-06, "loss": 0.12096805572509765, "step": 213050 }, { "epoch": 0.9147111099662554, "grad_norm": 0.033890970051288605, "learning_rate": 8.566094357683055e-06, "loss": 0.3322614192962646, "step": 213060 }, { "epoch": 0.9147540420562754, "grad_norm": 0.0006106910877861083, "learning_rate": 8.56178263756543e-06, "loss": 0.1534043788909912, "step": 213070 }, { "epoch": 0.9147969741462953, "grad_norm": 0.5903043746948242, "learning_rate": 8.557470917447808e-06, "loss": 0.12563472986221313, "step": 213080 }, { "epoch": 0.9148399062363154, "grad_norm": 0.07464814186096191, "learning_rate": 8.553159197330183e-06, "loss": 0.08452232480049134, "step": 213090 }, { "epoch": 0.9148828383263354, "grad_norm": 0.003437488107010722, "learning_rate": 8.548847477212559e-06, "loss": 0.2705679416656494, "step": 213100 }, { "epoch": 0.9149257704163554, "grad_norm": 0.009765159338712692, "learning_rate": 8.544535757094936e-06, "loss": 0.28933393955230713, "step": 213110 }, { "epoch": 0.9149687025063754, "grad_norm": 0.007637848611921072, "learning_rate": 8.540224036977312e-06, "loss": 0.19045697450637816, "step": 213120 }, { "epoch": 0.9150116345963955, "grad_norm": 0.000521529174875468, "learning_rate": 8.53591231685969e-06, "loss": 0.1336083769798279, "step": 213130 }, { "epoch": 0.9150545666864154, "grad_norm": 0.30744844675064087, "learning_rate": 8.531600596742065e-06, "loss": 0.14940725564956664, "step": 213140 }, { "epoch": 0.9150974987764354, "grad_norm": 0.007774029858410358, "learning_rate": 8.52728887662444e-06, "loss": 0.01716921925544739, "step": 213150 }, { "epoch": 0.9151404308664555, "grad_norm": 0.33261045813560486, "learning_rate": 8.522977156506818e-06, "loss": 0.10560462474822999, "step": 213160 }, { "epoch": 0.9151833629564754, "grad_norm": 2.7580161094665527, "learning_rate": 8.518665436389194e-06, "loss": 0.22219655513763428, "step": 213170 }, { "epoch": 0.9152262950464954, "grad_norm": 0.004856944549828768, "learning_rate": 8.51435371627157e-06, "loss": 0.22760391235351562, "step": 213180 }, { "epoch": 0.9152692271365155, "grad_norm": 0.029608484357595444, "learning_rate": 8.510041996153947e-06, "loss": 0.11115431785583496, "step": 213190 }, { "epoch": 0.9153121592265354, "grad_norm": 0.2841481566429138, "learning_rate": 8.505730276036322e-06, "loss": 0.07824562788009644, "step": 213200 }, { "epoch": 0.9153550913165555, "grad_norm": 1.3372067213058472, "learning_rate": 8.501418555918698e-06, "loss": 0.2558911323547363, "step": 213210 }, { "epoch": 0.9153980234065755, "grad_norm": 0.07914111018180847, "learning_rate": 8.497106835801075e-06, "loss": 0.09136658310890197, "step": 213220 }, { "epoch": 0.9154409554965954, "grad_norm": 1.8780505657196045, "learning_rate": 8.492795115683451e-06, "loss": 0.22362115383148193, "step": 213230 }, { "epoch": 0.9154838875866155, "grad_norm": 2.8011960983276367, "learning_rate": 8.488483395565827e-06, "loss": 0.1213125467300415, "step": 213240 }, { "epoch": 0.9155268196766355, "grad_norm": 0.8596304655075073, "learning_rate": 8.484171675448204e-06, "loss": 0.2691220998764038, "step": 213250 }, { "epoch": 0.9155697517666556, "grad_norm": 0.019585467875003815, "learning_rate": 8.47985995533058e-06, "loss": 0.36429500579833984, "step": 213260 }, { "epoch": 0.9156126838566755, "grad_norm": 0.10184433311223984, "learning_rate": 8.475548235212955e-06, "loss": 0.1306537866592407, "step": 213270 }, { "epoch": 0.9156556159466955, "grad_norm": 1.1802695989608765, "learning_rate": 8.471236515095333e-06, "loss": 0.26048238277435304, "step": 213280 }, { "epoch": 0.9156985480367156, "grad_norm": 0.00236449739895761, "learning_rate": 8.466924794977708e-06, "loss": 0.23241877555847168, "step": 213290 }, { "epoch": 0.9157414801267355, "grad_norm": 5.898766040802002, "learning_rate": 8.462613074860084e-06, "loss": 0.2835911989212036, "step": 213300 }, { "epoch": 0.9157844122167556, "grad_norm": 0.004197239875793457, "learning_rate": 8.458301354742461e-06, "loss": 0.25936548709869384, "step": 213310 }, { "epoch": 0.9158273443067756, "grad_norm": 2.1925132274627686, "learning_rate": 8.453989634624837e-06, "loss": 0.3117018699645996, "step": 213320 }, { "epoch": 0.9158702763967955, "grad_norm": 0.0032696540001779795, "learning_rate": 8.449677914507213e-06, "loss": 0.18839519023895263, "step": 213330 }, { "epoch": 0.9159132084868156, "grad_norm": 0.3618191182613373, "learning_rate": 8.44536619438959e-06, "loss": 0.4135741233825684, "step": 213340 }, { "epoch": 0.9159561405768356, "grad_norm": 0.6629675626754761, "learning_rate": 8.441054474271966e-06, "loss": 0.19146194458007812, "step": 213350 }, { "epoch": 0.9159990726668555, "grad_norm": 0.027104495093226433, "learning_rate": 8.436742754154343e-06, "loss": 0.1926203966140747, "step": 213360 }, { "epoch": 0.9160420047568756, "grad_norm": 1.6082515716552734, "learning_rate": 8.43243103403672e-06, "loss": 0.29182188510894774, "step": 213370 }, { "epoch": 0.9160849368468956, "grad_norm": 2.3720266819000244, "learning_rate": 8.428119313919096e-06, "loss": 0.16708863973617555, "step": 213380 }, { "epoch": 0.9161278689369156, "grad_norm": 0.0009071618551388383, "learning_rate": 8.423807593801472e-06, "loss": 0.04892894625663757, "step": 213390 }, { "epoch": 0.9161708010269356, "grad_norm": 0.02186976745724678, "learning_rate": 8.419495873683849e-06, "loss": 0.07931110262870789, "step": 213400 }, { "epoch": 0.9162137331169556, "grad_norm": 0.006246891804039478, "learning_rate": 8.415184153566225e-06, "loss": 0.1462794780731201, "step": 213410 }, { "epoch": 0.9162566652069756, "grad_norm": 1.6635063886642456, "learning_rate": 8.4108724334486e-06, "loss": 0.11009447574615479, "step": 213420 }, { "epoch": 0.9162995972969956, "grad_norm": 2.642993927001953, "learning_rate": 8.406560713330977e-06, "loss": 0.1289622187614441, "step": 213430 }, { "epoch": 0.9163425293870157, "grad_norm": 0.02965669333934784, "learning_rate": 8.402248993213353e-06, "loss": 0.15729275941848755, "step": 213440 }, { "epoch": 0.9163854614770356, "grad_norm": 0.612890362739563, "learning_rate": 8.397937273095729e-06, "loss": 0.3066643476486206, "step": 213450 }, { "epoch": 0.9164283935670556, "grad_norm": 0.0016727737383916974, "learning_rate": 8.393625552978106e-06, "loss": 0.08483893871307373, "step": 213460 }, { "epoch": 0.9164713256570757, "grad_norm": 0.00839283224195242, "learning_rate": 8.389313832860482e-06, "loss": 0.254620099067688, "step": 213470 }, { "epoch": 0.9165142577470956, "grad_norm": 1.7350776195526123, "learning_rate": 8.385002112742857e-06, "loss": 0.15120675563812255, "step": 213480 }, { "epoch": 0.9165571898371156, "grad_norm": 0.0003176078025717288, "learning_rate": 8.380690392625235e-06, "loss": 0.13280483484268188, "step": 213490 }, { "epoch": 0.9166001219271357, "grad_norm": 0.0025800876319408417, "learning_rate": 8.37637867250761e-06, "loss": 0.17722119092941285, "step": 213500 }, { "epoch": 0.9166430540171556, "grad_norm": 5.274077892303467, "learning_rate": 8.372066952389988e-06, "loss": 0.2644465208053589, "step": 213510 }, { "epoch": 0.9166859861071757, "grad_norm": 0.2625701129436493, "learning_rate": 8.367755232272363e-06, "loss": 0.14736897945404054, "step": 213520 }, { "epoch": 0.9167289181971957, "grad_norm": 0.0007383037591353059, "learning_rate": 8.363443512154739e-06, "loss": 0.1682689070701599, "step": 213530 }, { "epoch": 0.9167718502872156, "grad_norm": 0.07749518007040024, "learning_rate": 8.359131792037116e-06, "loss": 0.07301062941551209, "step": 213540 }, { "epoch": 0.9168147823772357, "grad_norm": 0.08707591146230698, "learning_rate": 8.354820071919492e-06, "loss": 0.35566532611846924, "step": 213550 }, { "epoch": 0.9168577144672557, "grad_norm": 0.023812185972929, "learning_rate": 8.350508351801868e-06, "loss": 0.09307337403297425, "step": 213560 }, { "epoch": 0.9169006465572757, "grad_norm": 2.4213881492614746, "learning_rate": 8.346196631684245e-06, "loss": 0.27511775493621826, "step": 213570 }, { "epoch": 0.9169435786472957, "grad_norm": 0.014306395314633846, "learning_rate": 8.34188491156662e-06, "loss": 0.28723247051239015, "step": 213580 }, { "epoch": 0.9169865107373157, "grad_norm": 0.6624493598937988, "learning_rate": 8.337573191448996e-06, "loss": 0.19444373846054078, "step": 213590 }, { "epoch": 0.9170294428273357, "grad_norm": 2.9697694778442383, "learning_rate": 8.333261471331374e-06, "loss": 0.43126649856567384, "step": 213600 }, { "epoch": 0.9170723749173557, "grad_norm": 1.9498356580734253, "learning_rate": 8.32894975121375e-06, "loss": 0.12668828964233397, "step": 213610 }, { "epoch": 0.9171153070073758, "grad_norm": 0.39234861731529236, "learning_rate": 8.324638031096125e-06, "loss": 0.05853158235549927, "step": 213620 }, { "epoch": 0.9171582390973957, "grad_norm": 0.03262830153107643, "learning_rate": 8.320326310978502e-06, "loss": 0.13207764625549318, "step": 213630 }, { "epoch": 0.9172011711874157, "grad_norm": 0.10223422944545746, "learning_rate": 8.316014590860878e-06, "loss": 0.20672743320465087, "step": 213640 }, { "epoch": 0.9172441032774358, "grad_norm": 6.9421563148498535, "learning_rate": 8.311702870743254e-06, "loss": 0.41819748878479, "step": 213650 }, { "epoch": 0.9172870353674557, "grad_norm": 0.09261999279260635, "learning_rate": 8.307391150625631e-06, "loss": 0.12242704629898071, "step": 213660 }, { "epoch": 0.9173299674574757, "grad_norm": 0.21561941504478455, "learning_rate": 8.303079430508007e-06, "loss": 0.26964023113250735, "step": 213670 }, { "epoch": 0.9173728995474958, "grad_norm": 0.001588730257935822, "learning_rate": 8.298767710390382e-06, "loss": 0.03283386826515198, "step": 213680 }, { "epoch": 0.9174158316375158, "grad_norm": 2.3190999031066895, "learning_rate": 8.29445599027276e-06, "loss": 0.10395605564117431, "step": 213690 }, { "epoch": 0.9174587637275358, "grad_norm": 0.015156721696257591, "learning_rate": 8.290144270155135e-06, "loss": 0.28372209072113036, "step": 213700 }, { "epoch": 0.9175016958175558, "grad_norm": 44.24710464477539, "learning_rate": 8.285832550037511e-06, "loss": 0.07365514039993286, "step": 213710 }, { "epoch": 0.9175446279075758, "grad_norm": 55.237640380859375, "learning_rate": 8.28152082991989e-06, "loss": 0.2189706325531006, "step": 213720 }, { "epoch": 0.9175875599975958, "grad_norm": 6.306714057922363, "learning_rate": 8.277209109802266e-06, "loss": 0.30330231189727785, "step": 213730 }, { "epoch": 0.9176304920876158, "grad_norm": 0.6346426010131836, "learning_rate": 8.272897389684641e-06, "loss": 0.2297675848007202, "step": 213740 }, { "epoch": 0.9176734241776359, "grad_norm": 0.011375557631254196, "learning_rate": 8.268585669567019e-06, "loss": 0.13121119737625123, "step": 213750 }, { "epoch": 0.9177163562676558, "grad_norm": 0.029359858483076096, "learning_rate": 8.264273949449394e-06, "loss": 0.410442590713501, "step": 213760 }, { "epoch": 0.9177592883576758, "grad_norm": 0.004748243372887373, "learning_rate": 8.25996222933177e-06, "loss": 0.24686429500579835, "step": 213770 }, { "epoch": 0.9178022204476959, "grad_norm": 2.400439977645874, "learning_rate": 8.255650509214147e-06, "loss": 0.04432217478752136, "step": 213780 }, { "epoch": 0.9178451525377158, "grad_norm": 1.4817440509796143, "learning_rate": 8.251338789096523e-06, "loss": 0.24401164054870605, "step": 213790 }, { "epoch": 0.9178880846277359, "grad_norm": 2.9294021129608154, "learning_rate": 8.247027068978899e-06, "loss": 0.1489182472229004, "step": 213800 }, { "epoch": 0.9179310167177559, "grad_norm": 0.0016216447111219168, "learning_rate": 8.242715348861276e-06, "loss": 0.08440933227539063, "step": 213810 }, { "epoch": 0.9179739488077758, "grad_norm": 1.4032232761383057, "learning_rate": 8.238403628743652e-06, "loss": 0.4191108226776123, "step": 213820 }, { "epoch": 0.9180168808977959, "grad_norm": 0.0279683880507946, "learning_rate": 8.234091908626027e-06, "loss": 0.07173965573310852, "step": 213830 }, { "epoch": 0.9180598129878159, "grad_norm": 4.024061679840088, "learning_rate": 8.229780188508405e-06, "loss": 0.2750051975250244, "step": 213840 }, { "epoch": 0.9181027450778358, "grad_norm": 1.4007704257965088, "learning_rate": 8.22546846839078e-06, "loss": 0.2678218364715576, "step": 213850 }, { "epoch": 0.9181456771678559, "grad_norm": 0.30023008584976196, "learning_rate": 8.221156748273156e-06, "loss": 0.06759833097457886, "step": 213860 }, { "epoch": 0.9181886092578759, "grad_norm": 0.004864270333200693, "learning_rate": 8.216845028155533e-06, "loss": 0.11008800268173217, "step": 213870 }, { "epoch": 0.9182315413478959, "grad_norm": 0.0029974612407386303, "learning_rate": 8.212533308037909e-06, "loss": 0.15078630447387695, "step": 213880 }, { "epoch": 0.9182744734379159, "grad_norm": 0.035421110689640045, "learning_rate": 8.208221587920286e-06, "loss": 0.16177096366882324, "step": 213890 }, { "epoch": 0.918317405527936, "grad_norm": 0.7298688292503357, "learning_rate": 8.203909867802662e-06, "loss": 0.256973123550415, "step": 213900 }, { "epoch": 0.9183603376179559, "grad_norm": 0.05107526481151581, "learning_rate": 8.199598147685037e-06, "loss": 0.2563158988952637, "step": 213910 }, { "epoch": 0.9184032697079759, "grad_norm": 2.8571629524230957, "learning_rate": 8.195286427567415e-06, "loss": 0.1808464288711548, "step": 213920 }, { "epoch": 0.918446201797996, "grad_norm": 0.28787219524383545, "learning_rate": 8.19097470744979e-06, "loss": 0.20286407470703124, "step": 213930 }, { "epoch": 0.9184891338880159, "grad_norm": 0.031048133969306946, "learning_rate": 8.186662987332166e-06, "loss": 0.14505958557128906, "step": 213940 }, { "epoch": 0.9185320659780359, "grad_norm": 0.037190306931734085, "learning_rate": 8.182351267214543e-06, "loss": 0.3253729581832886, "step": 213950 }, { "epoch": 0.918574998068056, "grad_norm": 0.3285282254219055, "learning_rate": 8.178039547096919e-06, "loss": 0.325065016746521, "step": 213960 }, { "epoch": 0.9186179301580759, "grad_norm": 2.641339063644409, "learning_rate": 8.173727826979295e-06, "loss": 0.23930034637451172, "step": 213970 }, { "epoch": 0.918660862248096, "grad_norm": 0.017421351745724678, "learning_rate": 8.169416106861672e-06, "loss": 0.1706032156944275, "step": 213980 }, { "epoch": 0.918703794338116, "grad_norm": 0.0013588599395006895, "learning_rate": 8.165104386744048e-06, "loss": 0.301548171043396, "step": 213990 }, { "epoch": 0.9187467264281359, "grad_norm": 0.003222766565158963, "learning_rate": 8.160792666626423e-06, "loss": 0.13970067501068115, "step": 214000 }, { "epoch": 0.9187467264281359, "eval_loss": 0.3728339672088623, "eval_runtime": 27.3708, "eval_samples_per_second": 3.654, "eval_steps_per_second": 3.654, "step": 214000 }, { "epoch": 0.918789658518156, "grad_norm": 0.0069166203029453754, "learning_rate": 8.1564809465088e-06, "loss": 0.16746548414230347, "step": 214010 }, { "epoch": 0.918832590608176, "grad_norm": 0.002220205496996641, "learning_rate": 8.152169226391176e-06, "loss": 0.19638227224349974, "step": 214020 }, { "epoch": 0.918875522698196, "grad_norm": 0.009522872976958752, "learning_rate": 8.147857506273552e-06, "loss": 0.10982517004013062, "step": 214030 }, { "epoch": 0.918918454788216, "grad_norm": 0.005404068157076836, "learning_rate": 8.14354578615593e-06, "loss": 0.11145150661468506, "step": 214040 }, { "epoch": 0.918961386878236, "grad_norm": 1.6319386959075928, "learning_rate": 8.139234066038305e-06, "loss": 0.2857020854949951, "step": 214050 }, { "epoch": 0.919004318968256, "grad_norm": 0.04213089123368263, "learning_rate": 8.13492234592068e-06, "loss": 0.0028133489191532134, "step": 214060 }, { "epoch": 0.919047251058276, "grad_norm": 0.0002146908809663728, "learning_rate": 8.130610625803058e-06, "loss": 0.13747214078903197, "step": 214070 }, { "epoch": 0.919090183148296, "grad_norm": 8.343812942504883, "learning_rate": 8.126298905685435e-06, "loss": 0.2809786081314087, "step": 214080 }, { "epoch": 0.919133115238316, "grad_norm": 0.0025600444059818983, "learning_rate": 8.121987185567811e-06, "loss": 0.05838137865066528, "step": 214090 }, { "epoch": 0.919176047328336, "grad_norm": 1.5793315172195435, "learning_rate": 8.117675465450188e-06, "loss": 0.02491525709629059, "step": 214100 }, { "epoch": 0.9192189794183561, "grad_norm": 2.205817699432373, "learning_rate": 8.113363745332564e-06, "loss": 0.18225139379501343, "step": 214110 }, { "epoch": 0.9192619115083761, "grad_norm": 0.12144787609577179, "learning_rate": 8.10905202521494e-06, "loss": 0.41785888671875, "step": 214120 }, { "epoch": 0.919304843598396, "grad_norm": 0.005557660944759846, "learning_rate": 8.104740305097317e-06, "loss": 0.12414391040802002, "step": 214130 }, { "epoch": 0.9193477756884161, "grad_norm": 0.032746944576501846, "learning_rate": 8.100428584979693e-06, "loss": 0.21280176639556886, "step": 214140 }, { "epoch": 0.9193907077784361, "grad_norm": 2.1349384784698486, "learning_rate": 8.096116864862068e-06, "loss": 0.2097855806350708, "step": 214150 }, { "epoch": 0.919433639868456, "grad_norm": 28.290878295898438, "learning_rate": 8.091805144744446e-06, "loss": 0.26921648979187013, "step": 214160 }, { "epoch": 0.9194765719584761, "grad_norm": 1.0186740159988403, "learning_rate": 8.087493424626821e-06, "loss": 0.056221646070480344, "step": 214170 }, { "epoch": 0.9195195040484961, "grad_norm": 0.0006930052768439054, "learning_rate": 8.083181704509197e-06, "loss": 0.1876317262649536, "step": 214180 }, { "epoch": 0.9195624361385161, "grad_norm": 0.01069891732186079, "learning_rate": 8.078869984391574e-06, "loss": 0.1312105655670166, "step": 214190 }, { "epoch": 0.9196053682285361, "grad_norm": 3.0067901611328125, "learning_rate": 8.07455826427395e-06, "loss": 0.2393186092376709, "step": 214200 }, { "epoch": 0.9196483003185562, "grad_norm": 1.284667730331421, "learning_rate": 8.070246544156326e-06, "loss": 0.33020362854003904, "step": 214210 }, { "epoch": 0.9196912324085761, "grad_norm": 1.096384882926941, "learning_rate": 8.065934824038703e-06, "loss": 0.2953479290008545, "step": 214220 }, { "epoch": 0.9197341644985961, "grad_norm": 0.004542557522654533, "learning_rate": 8.061623103921079e-06, "loss": 0.24383957386016847, "step": 214230 }, { "epoch": 0.9197770965886162, "grad_norm": 2.143012046813965, "learning_rate": 8.057311383803454e-06, "loss": 0.19743248224258422, "step": 214240 }, { "epoch": 0.9198200286786361, "grad_norm": 0.0006490990635938942, "learning_rate": 8.052999663685832e-06, "loss": 0.04032123982906342, "step": 214250 }, { "epoch": 0.9198629607686561, "grad_norm": 0.03700326010584831, "learning_rate": 8.048687943568207e-06, "loss": 0.06131017804145813, "step": 214260 }, { "epoch": 0.9199058928586762, "grad_norm": 1.601779580116272, "learning_rate": 8.044376223450585e-06, "loss": 0.314433217048645, "step": 214270 }, { "epoch": 0.9199488249486961, "grad_norm": 0.9620373845100403, "learning_rate": 8.04006450333296e-06, "loss": 0.17828409671783446, "step": 214280 }, { "epoch": 0.9199917570387162, "grad_norm": 1.0853599309921265, "learning_rate": 8.035752783215336e-06, "loss": 0.24211184978485106, "step": 214290 }, { "epoch": 0.9200346891287362, "grad_norm": 0.02577628381550312, "learning_rate": 8.031441063097713e-06, "loss": 0.13259075880050658, "step": 214300 }, { "epoch": 0.9200776212187561, "grad_norm": 5.1638264656066895, "learning_rate": 8.027129342980089e-06, "loss": 0.12507407665252684, "step": 214310 }, { "epoch": 0.9201205533087762, "grad_norm": 0.04012976959347725, "learning_rate": 8.022817622862464e-06, "loss": 0.2570314645767212, "step": 214320 }, { "epoch": 0.9201634853987962, "grad_norm": 0.6823280453681946, "learning_rate": 8.018505902744842e-06, "loss": 0.16146708726882936, "step": 214330 }, { "epoch": 0.9202064174888162, "grad_norm": 0.0014941110275685787, "learning_rate": 8.014194182627217e-06, "loss": 0.05990640521049499, "step": 214340 }, { "epoch": 0.9202493495788362, "grad_norm": 1.3181439638137817, "learning_rate": 8.009882462509593e-06, "loss": 0.17725783586502075, "step": 214350 }, { "epoch": 0.9202922816688562, "grad_norm": 22.59912872314453, "learning_rate": 8.00557074239197e-06, "loss": 0.11575099229812622, "step": 214360 }, { "epoch": 0.9203352137588762, "grad_norm": 0.00027134406263940036, "learning_rate": 8.001259022274346e-06, "loss": 0.18448431491851808, "step": 214370 }, { "epoch": 0.9203781458488962, "grad_norm": 1.813732385635376, "learning_rate": 7.996947302156722e-06, "loss": 0.3346815347671509, "step": 214380 }, { "epoch": 0.9204210779389163, "grad_norm": 0.0324367992579937, "learning_rate": 7.992635582039099e-06, "loss": 0.4010632991790771, "step": 214390 }, { "epoch": 0.9204640100289362, "grad_norm": 0.10355591773986816, "learning_rate": 7.988323861921475e-06, "loss": 0.3059864521026611, "step": 214400 }, { "epoch": 0.9205069421189562, "grad_norm": 0.03525923192501068, "learning_rate": 7.98401214180385e-06, "loss": 0.25398414134979247, "step": 214410 }, { "epoch": 0.9205498742089763, "grad_norm": 2.2439262866973877, "learning_rate": 7.979700421686228e-06, "loss": 0.1350804328918457, "step": 214420 }, { "epoch": 0.9205928062989962, "grad_norm": 0.00023116929514799267, "learning_rate": 7.975388701568603e-06, "loss": 0.06914361715316772, "step": 214430 }, { "epoch": 0.9206357383890162, "grad_norm": 0.04269478842616081, "learning_rate": 7.971076981450979e-06, "loss": 0.17834771871566774, "step": 214440 }, { "epoch": 0.9206786704790363, "grad_norm": 0.006393686402589083, "learning_rate": 7.966765261333358e-06, "loss": 0.4067357063293457, "step": 214450 }, { "epoch": 0.9207216025690562, "grad_norm": 0.16539409756660461, "learning_rate": 7.962453541215734e-06, "loss": 0.19677144289016724, "step": 214460 }, { "epoch": 0.9207645346590763, "grad_norm": 0.006637761369347572, "learning_rate": 7.95814182109811e-06, "loss": 0.07061156630516052, "step": 214470 }, { "epoch": 0.9208074667490963, "grad_norm": 1.6709380149841309, "learning_rate": 7.953830100980487e-06, "loss": 0.03651362359523773, "step": 214480 }, { "epoch": 0.9208503988391162, "grad_norm": 0.00023092412448022515, "learning_rate": 7.949518380862862e-06, "loss": 0.1736156463623047, "step": 214490 }, { "epoch": 0.9208933309291363, "grad_norm": 0.40267854928970337, "learning_rate": 7.945206660745238e-06, "loss": 0.19352984428405762, "step": 214500 }, { "epoch": 0.9209362630191563, "grad_norm": 0.08892907202243805, "learning_rate": 7.940894940627615e-06, "loss": 0.0169476717710495, "step": 214510 }, { "epoch": 0.9209791951091763, "grad_norm": 0.001397549407556653, "learning_rate": 7.936583220509991e-06, "loss": 0.08612239956855774, "step": 214520 }, { "epoch": 0.9210221271991963, "grad_norm": 0.6343229413032532, "learning_rate": 7.932271500392367e-06, "loss": 0.2680309534072876, "step": 214530 }, { "epoch": 0.9210650592892163, "grad_norm": 0.7043375372886658, "learning_rate": 7.927959780274744e-06, "loss": 0.29093358516693113, "step": 214540 }, { "epoch": 0.9211079913792364, "grad_norm": 0.3087158501148224, "learning_rate": 7.92364806015712e-06, "loss": 0.06735055446624756, "step": 214550 }, { "epoch": 0.9211509234692563, "grad_norm": 0.0004326922935433686, "learning_rate": 7.919336340039495e-06, "loss": 0.17999672889709473, "step": 214560 }, { "epoch": 0.9211938555592764, "grad_norm": 0.15494950115680695, "learning_rate": 7.915024619921873e-06, "loss": 0.21485633850097657, "step": 214570 }, { "epoch": 0.9212367876492964, "grad_norm": 2.401089668273926, "learning_rate": 7.910712899804248e-06, "loss": 0.4662174701690674, "step": 214580 }, { "epoch": 0.9212797197393163, "grad_norm": 2.7063958644866943, "learning_rate": 7.906401179686624e-06, "loss": 0.2256403923034668, "step": 214590 }, { "epoch": 0.9213226518293364, "grad_norm": 2.5228281021118164, "learning_rate": 7.902089459569001e-06, "loss": 0.17687809467315674, "step": 214600 }, { "epoch": 0.9213655839193564, "grad_norm": 0.2116834670305252, "learning_rate": 7.897777739451377e-06, "loss": 0.17992725372314453, "step": 214610 }, { "epoch": 0.9214085160093763, "grad_norm": 0.0003231070004403591, "learning_rate": 7.893466019333753e-06, "loss": 0.16799609661102294, "step": 214620 }, { "epoch": 0.9214514480993964, "grad_norm": 0.004379116464406252, "learning_rate": 7.88915429921613e-06, "loss": 0.030523082613945006, "step": 214630 }, { "epoch": 0.9214943801894164, "grad_norm": 0.00718501815572381, "learning_rate": 7.884842579098506e-06, "loss": 0.10228891372680664, "step": 214640 }, { "epoch": 0.9215373122794364, "grad_norm": 0.015222213231027126, "learning_rate": 7.880530858980883e-06, "loss": 0.21992430686950684, "step": 214650 }, { "epoch": 0.9215802443694564, "grad_norm": 0.0776410922408104, "learning_rate": 7.876219138863259e-06, "loss": 0.3099169969558716, "step": 214660 }, { "epoch": 0.9216231764594764, "grad_norm": 1.4122614860534668, "learning_rate": 7.871907418745634e-06, "loss": 0.17398278713226317, "step": 214670 }, { "epoch": 0.9216661085494964, "grad_norm": 7.875330924987793, "learning_rate": 7.867595698628012e-06, "loss": 0.12187422513961792, "step": 214680 }, { "epoch": 0.9217090406395164, "grad_norm": 0.010840149596333504, "learning_rate": 7.863283978510387e-06, "loss": 0.1018634557723999, "step": 214690 }, { "epoch": 0.9217519727295365, "grad_norm": 0.007989337667822838, "learning_rate": 7.858972258392763e-06, "loss": 0.0946840524673462, "step": 214700 }, { "epoch": 0.9217949048195564, "grad_norm": 0.00837134663015604, "learning_rate": 7.85466053827514e-06, "loss": 0.1308300495147705, "step": 214710 }, { "epoch": 0.9218378369095764, "grad_norm": 0.5216271877288818, "learning_rate": 7.850348818157516e-06, "loss": 0.08474286198616028, "step": 214720 }, { "epoch": 0.9218807689995965, "grad_norm": 2.1394126415252686, "learning_rate": 7.846037098039891e-06, "loss": 0.32121779918670657, "step": 214730 }, { "epoch": 0.9219237010896164, "grad_norm": 3.283982276916504, "learning_rate": 7.841725377922269e-06, "loss": 0.16682465076446534, "step": 214740 }, { "epoch": 0.9219666331796365, "grad_norm": 0.014272511005401611, "learning_rate": 7.837413657804644e-06, "loss": 0.19908100366592407, "step": 214750 }, { "epoch": 0.9220095652696565, "grad_norm": 0.00569815281778574, "learning_rate": 7.83310193768702e-06, "loss": 0.10329477787017823, "step": 214760 }, { "epoch": 0.9220524973596764, "grad_norm": 1.111350178718567, "learning_rate": 7.828790217569397e-06, "loss": 0.24649744033813475, "step": 214770 }, { "epoch": 0.9220954294496965, "grad_norm": 0.0014714060816913843, "learning_rate": 7.824478497451773e-06, "loss": 0.17052639722824098, "step": 214780 }, { "epoch": 0.9221383615397165, "grad_norm": 2.4729766845703125, "learning_rate": 7.820166777334149e-06, "loss": 0.21881551742553712, "step": 214790 }, { "epoch": 0.9221812936297364, "grad_norm": 4.458625316619873, "learning_rate": 7.815855057216526e-06, "loss": 0.19693403244018554, "step": 214800 }, { "epoch": 0.9222242257197565, "grad_norm": 0.16447418928146362, "learning_rate": 7.811543337098903e-06, "loss": 0.3553196907043457, "step": 214810 }, { "epoch": 0.9222671578097765, "grad_norm": 0.015636222437024117, "learning_rate": 7.807231616981279e-06, "loss": 0.003960480913519859, "step": 214820 }, { "epoch": 0.9223100898997965, "grad_norm": 0.11809345334768295, "learning_rate": 7.802919896863656e-06, "loss": 0.2305279016494751, "step": 214830 }, { "epoch": 0.9223530219898165, "grad_norm": 1.4532008171081543, "learning_rate": 7.798608176746032e-06, "loss": 0.26950161457061766, "step": 214840 }, { "epoch": 0.9223959540798365, "grad_norm": 0.292232871055603, "learning_rate": 7.794296456628408e-06, "loss": 0.019086624681949615, "step": 214850 }, { "epoch": 0.9224388861698565, "grad_norm": 0.0011943303979933262, "learning_rate": 7.789984736510785e-06, "loss": 0.21599533557891845, "step": 214860 }, { "epoch": 0.9224818182598765, "grad_norm": 2.3715226650238037, "learning_rate": 7.78567301639316e-06, "loss": 0.17667001485824585, "step": 214870 }, { "epoch": 0.9225247503498966, "grad_norm": 6.8032708168029785, "learning_rate": 7.781361296275536e-06, "loss": 0.16944377422332763, "step": 214880 }, { "epoch": 0.9225676824399165, "grad_norm": 0.09619058668613434, "learning_rate": 7.777049576157914e-06, "loss": 0.060175222158432004, "step": 214890 }, { "epoch": 0.9226106145299365, "grad_norm": 3.406337022781372, "learning_rate": 7.77273785604029e-06, "loss": 0.11395597457885742, "step": 214900 }, { "epoch": 0.9226535466199566, "grad_norm": 0.000977289048023522, "learning_rate": 7.768426135922665e-06, "loss": 0.16869571208953857, "step": 214910 }, { "epoch": 0.9226964787099765, "grad_norm": 0.4416428804397583, "learning_rate": 7.764114415805042e-06, "loss": 0.2359858751296997, "step": 214920 }, { "epoch": 0.9227394107999966, "grad_norm": 2.120130777359009, "learning_rate": 7.759802695687418e-06, "loss": 0.3166823148727417, "step": 214930 }, { "epoch": 0.9227823428900166, "grad_norm": 0.9974570870399475, "learning_rate": 7.755490975569794e-06, "loss": 0.2767601013183594, "step": 214940 }, { "epoch": 0.9228252749800365, "grad_norm": 0.011383699253201485, "learning_rate": 7.751179255452171e-06, "loss": 0.14973424673080443, "step": 214950 }, { "epoch": 0.9228682070700566, "grad_norm": 1.8270457983016968, "learning_rate": 7.746867535334547e-06, "loss": 0.16595523357391356, "step": 214960 }, { "epoch": 0.9229111391600766, "grad_norm": 0.05330152064561844, "learning_rate": 7.742555815216922e-06, "loss": 0.36078336238861086, "step": 214970 }, { "epoch": 0.9229540712500967, "grad_norm": 0.0003709446464199573, "learning_rate": 7.7382440950993e-06, "loss": 0.2640532493591309, "step": 214980 }, { "epoch": 0.9229970033401166, "grad_norm": 0.1887034922838211, "learning_rate": 7.733932374981675e-06, "loss": 0.08925374746322631, "step": 214990 }, { "epoch": 0.9230399354301366, "grad_norm": 0.0009779626270756125, "learning_rate": 7.729620654864053e-06, "loss": 0.19787448644638062, "step": 215000 }, { "epoch": 0.9230399354301366, "eval_loss": 0.3707399368286133, "eval_runtime": 27.4681, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 215000 }, { "epoch": 0.9230828675201567, "grad_norm": 0.06447288393974304, "learning_rate": 7.725308934746428e-06, "loss": 0.16723594665527344, "step": 215010 }, { "epoch": 0.9231257996101766, "grad_norm": 0.1130533218383789, "learning_rate": 7.720997214628804e-06, "loss": 0.13752557039260865, "step": 215020 }, { "epoch": 0.9231687317001966, "grad_norm": 1.3311164379119873, "learning_rate": 7.716685494511181e-06, "loss": 0.24227960109710694, "step": 215030 }, { "epoch": 0.9232116637902167, "grad_norm": 0.46791529655456543, "learning_rate": 7.712373774393557e-06, "loss": 0.011401008814573288, "step": 215040 }, { "epoch": 0.9232545958802366, "grad_norm": 0.005901793949306011, "learning_rate": 7.708062054275933e-06, "loss": 0.2547999143600464, "step": 215050 }, { "epoch": 0.9232975279702567, "grad_norm": 4.491530418395996, "learning_rate": 7.70375033415831e-06, "loss": 0.10067328214645385, "step": 215060 }, { "epoch": 0.9233404600602767, "grad_norm": 5.371583938598633, "learning_rate": 7.699438614040686e-06, "loss": 0.1953263521194458, "step": 215070 }, { "epoch": 0.9233833921502966, "grad_norm": 5.961281776428223, "learning_rate": 7.695126893923061e-06, "loss": 0.291353178024292, "step": 215080 }, { "epoch": 0.9234263242403167, "grad_norm": 2.469714641571045, "learning_rate": 7.690815173805439e-06, "loss": 0.3257584571838379, "step": 215090 }, { "epoch": 0.9234692563303367, "grad_norm": 0.29294028878211975, "learning_rate": 7.686503453687814e-06, "loss": 0.08499239087104797, "step": 215100 }, { "epoch": 0.9235121884203567, "grad_norm": 0.0017936037620529532, "learning_rate": 7.68219173357019e-06, "loss": 0.08098435401916504, "step": 215110 }, { "epoch": 0.9235551205103767, "grad_norm": 1.058728575706482, "learning_rate": 7.677880013452567e-06, "loss": 0.15216209888458251, "step": 215120 }, { "epoch": 0.9235980526003967, "grad_norm": 0.08794575929641724, "learning_rate": 7.673568293334943e-06, "loss": 0.11532053947448731, "step": 215130 }, { "epoch": 0.9236409846904167, "grad_norm": 0.0029542662668973207, "learning_rate": 7.669256573217318e-06, "loss": 0.403619384765625, "step": 215140 }, { "epoch": 0.9236839167804367, "grad_norm": 1.159416913986206, "learning_rate": 7.664944853099696e-06, "loss": 0.3979302406311035, "step": 215150 }, { "epoch": 0.9237268488704568, "grad_norm": 1.725462794303894, "learning_rate": 7.660633132982071e-06, "loss": 0.12816778421401978, "step": 215160 }, { "epoch": 0.9237697809604767, "grad_norm": 4.982163429260254, "learning_rate": 7.656321412864447e-06, "loss": 0.2503523826599121, "step": 215170 }, { "epoch": 0.9238127130504967, "grad_norm": 0.055415526032447815, "learning_rate": 7.652009692746826e-06, "loss": 0.18151148557662963, "step": 215180 }, { "epoch": 0.9238556451405168, "grad_norm": 0.009110411629080772, "learning_rate": 7.647697972629202e-06, "loss": 0.22747561931610108, "step": 215190 }, { "epoch": 0.9238985772305367, "grad_norm": 1.5314021110534668, "learning_rate": 7.643386252511577e-06, "loss": 0.19716296195983887, "step": 215200 }, { "epoch": 0.9239415093205567, "grad_norm": 6.504369258880615, "learning_rate": 7.639074532393955e-06, "loss": 0.41256489753723147, "step": 215210 }, { "epoch": 0.9239844414105768, "grad_norm": 0.0012986245565116405, "learning_rate": 7.63476281227633e-06, "loss": 0.05647019147872925, "step": 215220 }, { "epoch": 0.9240273735005967, "grad_norm": 0.0007943433593027294, "learning_rate": 7.630451092158706e-06, "loss": 0.14794392585754396, "step": 215230 }, { "epoch": 0.9240703055906168, "grad_norm": 0.0049696327187120914, "learning_rate": 7.626139372041083e-06, "loss": 0.32319116592407227, "step": 215240 }, { "epoch": 0.9241132376806368, "grad_norm": 0.005273050162941217, "learning_rate": 7.621827651923459e-06, "loss": 0.24418747425079346, "step": 215250 }, { "epoch": 0.9241561697706567, "grad_norm": 1.32347571849823, "learning_rate": 7.617515931805836e-06, "loss": 0.14303951263427733, "step": 215260 }, { "epoch": 0.9241991018606768, "grad_norm": 0.21632139384746552, "learning_rate": 7.613204211688211e-06, "loss": 0.20365605354309083, "step": 215270 }, { "epoch": 0.9242420339506968, "grad_norm": 0.5117451548576355, "learning_rate": 7.608892491570588e-06, "loss": 0.17990248203277587, "step": 215280 }, { "epoch": 0.9242849660407167, "grad_norm": 0.011012107133865356, "learning_rate": 7.604580771452964e-06, "loss": 0.1717315912246704, "step": 215290 }, { "epoch": 0.9243278981307368, "grad_norm": 0.00873053353279829, "learning_rate": 7.60026905133534e-06, "loss": 0.04388104975223541, "step": 215300 }, { "epoch": 0.9243708302207568, "grad_norm": 0.017410963773727417, "learning_rate": 7.595957331217716e-06, "loss": 0.12251949310302734, "step": 215310 }, { "epoch": 0.9244137623107768, "grad_norm": 0.015072675421833992, "learning_rate": 7.591645611100093e-06, "loss": 0.21252987384796143, "step": 215320 }, { "epoch": 0.9244566944007968, "grad_norm": 0.0012947338400408626, "learning_rate": 7.587333890982469e-06, "loss": 0.2460631847381592, "step": 215330 }, { "epoch": 0.9244996264908169, "grad_norm": 1.0608649253845215, "learning_rate": 7.583022170864845e-06, "loss": 0.22779808044433594, "step": 215340 }, { "epoch": 0.9245425585808368, "grad_norm": 0.015276042744517326, "learning_rate": 7.5787104507472215e-06, "loss": 0.2625183343887329, "step": 215350 }, { "epoch": 0.9245854906708568, "grad_norm": 1.360752820968628, "learning_rate": 7.574398730629598e-06, "loss": 0.4349686622619629, "step": 215360 }, { "epoch": 0.9246284227608769, "grad_norm": 5.221940994262695, "learning_rate": 7.570087010511974e-06, "loss": 0.34817028045654297, "step": 215370 }, { "epoch": 0.9246713548508968, "grad_norm": 7.083820819854736, "learning_rate": 7.56577529039435e-06, "loss": 0.17298378944396972, "step": 215380 }, { "epoch": 0.9247142869409168, "grad_norm": 0.016590062528848648, "learning_rate": 7.561463570276727e-06, "loss": 0.0039948180317878725, "step": 215390 }, { "epoch": 0.9247572190309369, "grad_norm": 0.0004913709126412868, "learning_rate": 7.557151850159102e-06, "loss": 0.2017059087753296, "step": 215400 }, { "epoch": 0.9248001511209569, "grad_norm": 7.199446201324463, "learning_rate": 7.552840130041479e-06, "loss": 0.2697106599807739, "step": 215410 }, { "epoch": 0.9248430832109769, "grad_norm": 0.28717929124832153, "learning_rate": 7.548528409923855e-06, "loss": 0.3328535556793213, "step": 215420 }, { "epoch": 0.9248860153009969, "grad_norm": 0.053416259586811066, "learning_rate": 7.544216689806232e-06, "loss": 0.22627270221710205, "step": 215430 }, { "epoch": 0.9249289473910169, "grad_norm": 0.006245059426873922, "learning_rate": 7.5399049696886074e-06, "loss": 0.10136998891830444, "step": 215440 }, { "epoch": 0.9249718794810369, "grad_norm": 1.9855570793151855, "learning_rate": 7.535593249570984e-06, "loss": 0.1648913860321045, "step": 215450 }, { "epoch": 0.9250148115710569, "grad_norm": 5.2693939208984375, "learning_rate": 7.5312815294533604e-06, "loss": 0.23091514110565187, "step": 215460 }, { "epoch": 0.925057743661077, "grad_norm": 0.22706347703933716, "learning_rate": 7.526969809335736e-06, "loss": 0.27173595428466796, "step": 215470 }, { "epoch": 0.9251006757510969, "grad_norm": 0.03872816637158394, "learning_rate": 7.5226580892181126e-06, "loss": 0.14449009895324708, "step": 215480 }, { "epoch": 0.9251436078411169, "grad_norm": 0.07340923696756363, "learning_rate": 7.518346369100489e-06, "loss": 0.1235128402709961, "step": 215490 }, { "epoch": 0.925186539931137, "grad_norm": 0.16422735154628754, "learning_rate": 7.514034648982865e-06, "loss": 0.04489589631557465, "step": 215500 }, { "epoch": 0.9252294720211569, "grad_norm": 5.788188457489014, "learning_rate": 7.509722928865241e-06, "loss": 0.2109375, "step": 215510 }, { "epoch": 0.925272404111177, "grad_norm": 0.025441322475671768, "learning_rate": 7.505411208747618e-06, "loss": 0.1279160737991333, "step": 215520 }, { "epoch": 0.925315336201197, "grad_norm": 1.7489529848098755, "learning_rate": 7.501099488629993e-06, "loss": 0.3018485546112061, "step": 215530 }, { "epoch": 0.9253582682912169, "grad_norm": 0.13211052119731903, "learning_rate": 7.4967877685123715e-06, "loss": 0.15862523317337035, "step": 215540 }, { "epoch": 0.925401200381237, "grad_norm": 0.043895423412323, "learning_rate": 7.492476048394747e-06, "loss": 0.17331376075744628, "step": 215550 }, { "epoch": 0.925444132471257, "grad_norm": 1.541989803314209, "learning_rate": 7.488164328277124e-06, "loss": 0.2534844636917114, "step": 215560 }, { "epoch": 0.9254870645612769, "grad_norm": 1.3819447755813599, "learning_rate": 7.4838526081595e-06, "loss": 0.0953163206577301, "step": 215570 }, { "epoch": 0.925529996651297, "grad_norm": 0.17048867046833038, "learning_rate": 7.479540888041877e-06, "loss": 0.20968384742736818, "step": 215580 }, { "epoch": 0.925572928741317, "grad_norm": 0.2957654297351837, "learning_rate": 7.475229167924252e-06, "loss": 0.1413059115409851, "step": 215590 }, { "epoch": 0.925615860831337, "grad_norm": 1.288763165473938, "learning_rate": 7.470917447806629e-06, "loss": 0.180072557926178, "step": 215600 }, { "epoch": 0.925658792921357, "grad_norm": 0.014453163370490074, "learning_rate": 7.466605727689005e-06, "loss": 0.37809994220733645, "step": 215610 }, { "epoch": 0.925701725011377, "grad_norm": 8.156563758850098, "learning_rate": 7.462294007571381e-06, "loss": 0.23681471347808838, "step": 215620 }, { "epoch": 0.925744657101397, "grad_norm": 0.010253574699163437, "learning_rate": 7.4579822874537575e-06, "loss": 0.24417309761047362, "step": 215630 }, { "epoch": 0.925787589191417, "grad_norm": 6.712070941925049, "learning_rate": 7.453670567336134e-06, "loss": 0.2375593900680542, "step": 215640 }, { "epoch": 0.9258305212814371, "grad_norm": 0.011438658460974693, "learning_rate": 7.44935884721851e-06, "loss": 0.15096044540405273, "step": 215650 }, { "epoch": 0.925873453371457, "grad_norm": 0.025541523471474648, "learning_rate": 7.445047127100886e-06, "loss": 0.11792056560516358, "step": 215660 }, { "epoch": 0.925916385461477, "grad_norm": 2.1013917922973633, "learning_rate": 7.440735406983263e-06, "loss": 0.24362952709198, "step": 215670 }, { "epoch": 0.9259593175514971, "grad_norm": 0.01850118488073349, "learning_rate": 7.436423686865638e-06, "loss": 0.1713258981704712, "step": 215680 }, { "epoch": 0.926002249641517, "grad_norm": 0.047367800027132034, "learning_rate": 7.432111966748015e-06, "loss": 0.22293555736541748, "step": 215690 }, { "epoch": 0.926045181731537, "grad_norm": 3.175168037414551, "learning_rate": 7.427800246630391e-06, "loss": 0.10906111001968384, "step": 215700 }, { "epoch": 0.9260881138215571, "grad_norm": 0.006182890385389328, "learning_rate": 7.423488526512768e-06, "loss": 0.24358677864074707, "step": 215710 }, { "epoch": 0.926131045911577, "grad_norm": 0.00020513370691332966, "learning_rate": 7.419176806395143e-06, "loss": 0.03684086203575134, "step": 215720 }, { "epoch": 0.9261739780015971, "grad_norm": 0.010843836702406406, "learning_rate": 7.41486508627752e-06, "loss": 0.024738329648971557, "step": 215730 }, { "epoch": 0.9262169100916171, "grad_norm": 0.07120528817176819, "learning_rate": 7.410553366159896e-06, "loss": 0.07912518382072449, "step": 215740 }, { "epoch": 0.926259842181637, "grad_norm": 0.012355622835457325, "learning_rate": 7.406241646042272e-06, "loss": 0.1800381064414978, "step": 215750 }, { "epoch": 0.9263027742716571, "grad_norm": 0.040920548141002655, "learning_rate": 7.4019299259246485e-06, "loss": 0.3121651649475098, "step": 215760 }, { "epoch": 0.9263457063616771, "grad_norm": 0.15170609951019287, "learning_rate": 7.397618205807025e-06, "loss": 0.3069912433624268, "step": 215770 }, { "epoch": 0.926388638451697, "grad_norm": 1.5117013454437256, "learning_rate": 7.393306485689401e-06, "loss": 0.4376046180725098, "step": 215780 }, { "epoch": 0.9264315705417171, "grad_norm": 6.3107733726501465, "learning_rate": 7.388994765571777e-06, "loss": 0.19311146736145018, "step": 215790 }, { "epoch": 0.9264745026317371, "grad_norm": 0.010845409706234932, "learning_rate": 7.384683045454154e-06, "loss": 0.11981412172317504, "step": 215800 }, { "epoch": 0.9265174347217571, "grad_norm": 0.029103947803378105, "learning_rate": 7.38037132533653e-06, "loss": 0.17413549423217772, "step": 215810 }, { "epoch": 0.9265603668117771, "grad_norm": 6.210660457611084, "learning_rate": 7.376059605218906e-06, "loss": 0.37948288917541506, "step": 215820 }, { "epoch": 0.9266032989017972, "grad_norm": 3.099215269088745, "learning_rate": 7.371747885101282e-06, "loss": 0.2719104766845703, "step": 215830 }, { "epoch": 0.9266462309918172, "grad_norm": 0.1082659363746643, "learning_rate": 7.367436164983659e-06, "loss": 0.13931306600570678, "step": 215840 }, { "epoch": 0.9266891630818371, "grad_norm": 0.8399651050567627, "learning_rate": 7.3631244448660345e-06, "loss": 0.5135842323303222, "step": 215850 }, { "epoch": 0.9267320951718572, "grad_norm": 0.02767796255648136, "learning_rate": 7.358812724748411e-06, "loss": 0.35433938503265383, "step": 215860 }, { "epoch": 0.9267750272618772, "grad_norm": 0.08824355900287628, "learning_rate": 7.3545010046307874e-06, "loss": 0.14087847471237183, "step": 215870 }, { "epoch": 0.9268179593518971, "grad_norm": 0.05289234593510628, "learning_rate": 7.350189284513163e-06, "loss": 0.09670596718788146, "step": 215880 }, { "epoch": 0.9268608914419172, "grad_norm": 0.4825235605239868, "learning_rate": 7.34587756439554e-06, "loss": 0.23260304927825928, "step": 215890 }, { "epoch": 0.9269038235319372, "grad_norm": 1.406697154045105, "learning_rate": 7.341565844277917e-06, "loss": 0.13096822500228883, "step": 215900 }, { "epoch": 0.9269467556219572, "grad_norm": 0.16242437064647675, "learning_rate": 7.3372541241602934e-06, "loss": 0.17526451349258423, "step": 215910 }, { "epoch": 0.9269896877119772, "grad_norm": 1.7749894857406616, "learning_rate": 7.33294240404267e-06, "loss": 0.19636834859848024, "step": 215920 }, { "epoch": 0.9270326198019972, "grad_norm": 0.00374089227989316, "learning_rate": 7.3286306839250456e-06, "loss": 0.07796565294265748, "step": 215930 }, { "epoch": 0.9270755518920172, "grad_norm": 5.736209869384766, "learning_rate": 7.324318963807422e-06, "loss": 0.4099263668060303, "step": 215940 }, { "epoch": 0.9271184839820372, "grad_norm": 0.009525381959974766, "learning_rate": 7.3200072436897986e-06, "loss": 0.09380664825439453, "step": 215950 }, { "epoch": 0.9271614160720573, "grad_norm": 0.018462950363755226, "learning_rate": 7.315695523572175e-06, "loss": 0.17773728370666503, "step": 215960 }, { "epoch": 0.9272043481620772, "grad_norm": 2.080399751663208, "learning_rate": 7.311383803454551e-06, "loss": 0.15385993719100952, "step": 215970 }, { "epoch": 0.9272472802520972, "grad_norm": 0.0018877952825278044, "learning_rate": 7.307072083336927e-06, "loss": 0.1597719669342041, "step": 215980 }, { "epoch": 0.9272902123421173, "grad_norm": 0.12085002660751343, "learning_rate": 7.302760363219304e-06, "loss": 0.14481079578399658, "step": 215990 }, { "epoch": 0.9273331444321372, "grad_norm": 0.006337421014904976, "learning_rate": 7.298448643101679e-06, "loss": 0.24477388858795165, "step": 216000 }, { "epoch": 0.9273331444321372, "eval_loss": 0.37125617265701294, "eval_runtime": 27.3992, "eval_samples_per_second": 3.65, "eval_steps_per_second": 3.65, "step": 216000 }, { "epoch": 0.9273760765221573, "grad_norm": 0.004524619318544865, "learning_rate": 7.294136922984056e-06, "loss": 0.3099964618682861, "step": 216010 }, { "epoch": 0.9274190086121773, "grad_norm": 0.6417093276977539, "learning_rate": 7.289825202866432e-06, "loss": 0.1063919186592102, "step": 216020 }, { "epoch": 0.9274619407021972, "grad_norm": 0.003405363531783223, "learning_rate": 7.285513482748808e-06, "loss": 0.08869165778160096, "step": 216030 }, { "epoch": 0.9275048727922173, "grad_norm": 1.994133710861206, "learning_rate": 7.2812017626311845e-06, "loss": 0.09733704328536988, "step": 216040 }, { "epoch": 0.9275478048822373, "grad_norm": 1.5306503772735596, "learning_rate": 7.276890042513561e-06, "loss": 0.4169147491455078, "step": 216050 }, { "epoch": 0.9275907369722572, "grad_norm": 0.00030547488131560385, "learning_rate": 7.2725783223959375e-06, "loss": 0.38906643390655515, "step": 216060 }, { "epoch": 0.9276336690622773, "grad_norm": 0.1737918257713318, "learning_rate": 7.268266602278313e-06, "loss": 0.01735979914665222, "step": 216070 }, { "epoch": 0.9276766011522973, "grad_norm": 0.006991616450250149, "learning_rate": 7.26395488216069e-06, "loss": 0.10484539270401001, "step": 216080 }, { "epoch": 0.9277195332423173, "grad_norm": 1.4587398767471313, "learning_rate": 7.259643162043066e-06, "loss": 0.16874505281448365, "step": 216090 }, { "epoch": 0.9277624653323373, "grad_norm": 0.03317570313811302, "learning_rate": 7.255331441925442e-06, "loss": 0.19244474172592163, "step": 216100 }, { "epoch": 0.9278053974223573, "grad_norm": 0.0003741345426533371, "learning_rate": 7.251019721807818e-06, "loss": 0.14304815530776976, "step": 216110 }, { "epoch": 0.9278483295123773, "grad_norm": 0.0023389640264213085, "learning_rate": 7.246708001690195e-06, "loss": 0.1601564407348633, "step": 216120 }, { "epoch": 0.9278912616023973, "grad_norm": 0.7748071551322937, "learning_rate": 7.24239628157257e-06, "loss": 0.24553565979003905, "step": 216130 }, { "epoch": 0.9279341936924174, "grad_norm": 1.804882287979126, "learning_rate": 7.238084561454947e-06, "loss": 0.08244321942329406, "step": 216140 }, { "epoch": 0.9279771257824373, "grad_norm": 0.0013793292455375195, "learning_rate": 7.233772841337323e-06, "loss": 0.3597370386123657, "step": 216150 }, { "epoch": 0.9280200578724573, "grad_norm": 0.009140110574662685, "learning_rate": 7.229461121219699e-06, "loss": 0.1765925645828247, "step": 216160 }, { "epoch": 0.9280629899624774, "grad_norm": 0.007054975721985102, "learning_rate": 7.2251494011020755e-06, "loss": 0.26095626354217527, "step": 216170 }, { "epoch": 0.9281059220524973, "grad_norm": 0.9550222158432007, "learning_rate": 7.220837680984452e-06, "loss": 0.127244234085083, "step": 216180 }, { "epoch": 0.9281488541425174, "grad_norm": 5.590497970581055, "learning_rate": 7.2165259608668285e-06, "loss": 0.250270938873291, "step": 216190 }, { "epoch": 0.9281917862325374, "grad_norm": 2.962364912033081, "learning_rate": 7.212214240749204e-06, "loss": 0.4075340270996094, "step": 216200 }, { "epoch": 0.9282347183225573, "grad_norm": 0.0032614811789244413, "learning_rate": 7.207902520631581e-06, "loss": 0.16378134489059448, "step": 216210 }, { "epoch": 0.9282776504125774, "grad_norm": 1.4757379293441772, "learning_rate": 7.203590800513957e-06, "loss": 0.27907600402832033, "step": 216220 }, { "epoch": 0.9283205825025974, "grad_norm": 0.00793127715587616, "learning_rate": 7.199279080396333e-06, "loss": 0.22693383693695068, "step": 216230 }, { "epoch": 0.9283635145926173, "grad_norm": 0.08194581419229507, "learning_rate": 7.194967360278709e-06, "loss": 0.06598881483078003, "step": 216240 }, { "epoch": 0.9284064466826374, "grad_norm": 0.02816123329102993, "learning_rate": 7.190655640161086e-06, "loss": 0.027814167737960815, "step": 216250 }, { "epoch": 0.9284493787726574, "grad_norm": 3.995386838912964, "learning_rate": 7.1863439200434615e-06, "loss": 0.3064185857772827, "step": 216260 }, { "epoch": 0.9284923108626775, "grad_norm": 0.08040156215429306, "learning_rate": 7.18203219992584e-06, "loss": 0.32654943466186526, "step": 216270 }, { "epoch": 0.9285352429526974, "grad_norm": 0.0028185301925987005, "learning_rate": 7.177720479808215e-06, "loss": 0.22416894435882567, "step": 216280 }, { "epoch": 0.9285781750427174, "grad_norm": 0.0008616048726253211, "learning_rate": 7.173408759690592e-06, "loss": 0.336730432510376, "step": 216290 }, { "epoch": 0.9286211071327375, "grad_norm": 0.0030813466291874647, "learning_rate": 7.169097039572968e-06, "loss": 0.07731318473815918, "step": 216300 }, { "epoch": 0.9286640392227574, "grad_norm": 0.003057356458157301, "learning_rate": 7.164785319455345e-06, "loss": 0.08529521822929383, "step": 216310 }, { "epoch": 0.9287069713127775, "grad_norm": 0.0021866008173674345, "learning_rate": 7.1604735993377204e-06, "loss": 0.13397653102874757, "step": 216320 }, { "epoch": 0.9287499034027975, "grad_norm": 10.670136451721191, "learning_rate": 7.156161879220097e-06, "loss": 0.11311818361282348, "step": 216330 }, { "epoch": 0.9287928354928174, "grad_norm": 0.053423043340444565, "learning_rate": 7.1518501591024734e-06, "loss": 0.1762098789215088, "step": 216340 }, { "epoch": 0.9288357675828375, "grad_norm": 0.12246271222829819, "learning_rate": 7.147538438984849e-06, "loss": 0.15174304246902465, "step": 216350 }, { "epoch": 0.9288786996728575, "grad_norm": 3.375256061553955, "learning_rate": 7.143226718867226e-06, "loss": 0.21079769134521484, "step": 216360 }, { "epoch": 0.9289216317628775, "grad_norm": 0.113634392619133, "learning_rate": 7.138914998749602e-06, "loss": 0.2766488313674927, "step": 216370 }, { "epoch": 0.9289645638528975, "grad_norm": 0.8795444965362549, "learning_rate": 7.134603278631978e-06, "loss": 0.3603955268859863, "step": 216380 }, { "epoch": 0.9290074959429175, "grad_norm": 1.135464072227478, "learning_rate": 7.130291558514354e-06, "loss": 0.22582101821899414, "step": 216390 }, { "epoch": 0.9290504280329375, "grad_norm": 0.5949857831001282, "learning_rate": 7.125979838396731e-06, "loss": 0.16929233074188232, "step": 216400 }, { "epoch": 0.9290933601229575, "grad_norm": 0.012552267871797085, "learning_rate": 7.121668118279106e-06, "loss": 0.16296029090881348, "step": 216410 }, { "epoch": 0.9291362922129776, "grad_norm": 0.01894237846136093, "learning_rate": 7.117356398161483e-06, "loss": 0.017162826657295228, "step": 216420 }, { "epoch": 0.9291792243029975, "grad_norm": 0.01409218367189169, "learning_rate": 7.113044678043859e-06, "loss": 0.6158824443817139, "step": 216430 }, { "epoch": 0.9292221563930175, "grad_norm": 0.5395675897598267, "learning_rate": 7.108732957926236e-06, "loss": 0.20108966827392577, "step": 216440 }, { "epoch": 0.9292650884830376, "grad_norm": 0.006730252411216497, "learning_rate": 7.1044212378086115e-06, "loss": 0.1388014554977417, "step": 216450 }, { "epoch": 0.9293080205730575, "grad_norm": 0.005381924100220203, "learning_rate": 7.100109517690988e-06, "loss": 0.3470167875289917, "step": 216460 }, { "epoch": 0.9293509526630775, "grad_norm": 0.001024061581119895, "learning_rate": 7.0957977975733645e-06, "loss": 0.207639741897583, "step": 216470 }, { "epoch": 0.9293938847530976, "grad_norm": 0.5139259696006775, "learning_rate": 7.09148607745574e-06, "loss": 0.3020778179168701, "step": 216480 }, { "epoch": 0.9294368168431175, "grad_norm": 0.3843024969100952, "learning_rate": 7.087174357338117e-06, "loss": 0.2944885015487671, "step": 216490 }, { "epoch": 0.9294797489331376, "grad_norm": 1.2467830181121826, "learning_rate": 7.082862637220493e-06, "loss": 0.15704236030578614, "step": 216500 }, { "epoch": 0.9295226810231576, "grad_norm": 0.022638993337750435, "learning_rate": 7.078550917102869e-06, "loss": 0.12156269550323487, "step": 216510 }, { "epoch": 0.9295656131131775, "grad_norm": 0.007260969839990139, "learning_rate": 7.074239196985245e-06, "loss": 0.20155694484710693, "step": 216520 }, { "epoch": 0.9296085452031976, "grad_norm": 6.707982063293457, "learning_rate": 7.069927476867622e-06, "loss": 0.4008821964263916, "step": 216530 }, { "epoch": 0.9296514772932176, "grad_norm": 0.00419988576322794, "learning_rate": 7.065615756749997e-06, "loss": 0.10957802534103393, "step": 216540 }, { "epoch": 0.9296944093832376, "grad_norm": 7.6871209144592285, "learning_rate": 7.061304036632374e-06, "loss": 0.22321274280548095, "step": 216550 }, { "epoch": 0.9297373414732576, "grad_norm": 0.021390387788414955, "learning_rate": 7.05699231651475e-06, "loss": 0.07745559811592102, "step": 216560 }, { "epoch": 0.9297802735632776, "grad_norm": 0.015582584775984287, "learning_rate": 7.052680596397127e-06, "loss": 0.12427722215652466, "step": 216570 }, { "epoch": 0.9298232056532976, "grad_norm": 0.43360817432403564, "learning_rate": 7.0483688762795026e-06, "loss": 0.01620929390192032, "step": 216580 }, { "epoch": 0.9298661377433176, "grad_norm": 0.48537853360176086, "learning_rate": 7.044057156161879e-06, "loss": 0.20732901096343995, "step": 216590 }, { "epoch": 0.9299090698333377, "grad_norm": 2.50187611579895, "learning_rate": 7.0397454360442555e-06, "loss": 0.3742201805114746, "step": 216600 }, { "epoch": 0.9299520019233576, "grad_norm": 0.06702170521020889, "learning_rate": 7.035433715926631e-06, "loss": 0.23646984100341797, "step": 216610 }, { "epoch": 0.9299949340133776, "grad_norm": 0.4540206789970398, "learning_rate": 7.031121995809008e-06, "loss": 0.40461249351501466, "step": 216620 }, { "epoch": 0.9300378661033977, "grad_norm": 0.0032786328811198473, "learning_rate": 7.026810275691385e-06, "loss": 0.25916366577148436, "step": 216630 }, { "epoch": 0.9300807981934176, "grad_norm": 0.002468528226017952, "learning_rate": 7.0224985555737615e-06, "loss": 0.19205652475357055, "step": 216640 }, { "epoch": 0.9301237302834376, "grad_norm": 0.09234610944986343, "learning_rate": 7.018186835456138e-06, "loss": 0.16778699159622193, "step": 216650 }, { "epoch": 0.9301666623734577, "grad_norm": 0.00125981867313385, "learning_rate": 7.013875115338514e-06, "loss": 0.10867637395858765, "step": 216660 }, { "epoch": 0.9302095944634776, "grad_norm": 0.0003518553567118943, "learning_rate": 7.00956339522089e-06, "loss": 0.2450389862060547, "step": 216670 }, { "epoch": 0.9302525265534977, "grad_norm": 0.21603882312774658, "learning_rate": 7.005251675103267e-06, "loss": 0.15238502025604247, "step": 216680 }, { "epoch": 0.9302954586435177, "grad_norm": 0.00265440228395164, "learning_rate": 7.000939954985643e-06, "loss": 0.0186956986784935, "step": 216690 }, { "epoch": 0.9303383907335376, "grad_norm": 0.00983706209808588, "learning_rate": 6.996628234868019e-06, "loss": 0.13357884883880616, "step": 216700 }, { "epoch": 0.9303813228235577, "grad_norm": 0.0024289621505886316, "learning_rate": 6.992316514750395e-06, "loss": 0.21640503406524658, "step": 216710 }, { "epoch": 0.9304242549135777, "grad_norm": 0.15975762903690338, "learning_rate": 6.988004794632772e-06, "loss": 0.10567178726196289, "step": 216720 }, { "epoch": 0.9304671870035978, "grad_norm": 0.03177304193377495, "learning_rate": 6.9836930745151475e-06, "loss": 0.16201092004776002, "step": 216730 }, { "epoch": 0.9305101190936177, "grad_norm": 0.022674480453133583, "learning_rate": 6.979381354397524e-06, "loss": 0.26315600872039796, "step": 216740 }, { "epoch": 0.9305530511836377, "grad_norm": 0.019809991121292114, "learning_rate": 6.9750696342799004e-06, "loss": 0.1809520125389099, "step": 216750 }, { "epoch": 0.9305959832736578, "grad_norm": 1.3690739870071411, "learning_rate": 6.970757914162276e-06, "loss": 0.21216487884521484, "step": 216760 }, { "epoch": 0.9306389153636777, "grad_norm": 0.0006519387243315578, "learning_rate": 6.966446194044653e-06, "loss": 0.23284423351287842, "step": 216770 }, { "epoch": 0.9306818474536978, "grad_norm": 1.3061208724975586, "learning_rate": 6.962134473927029e-06, "loss": 0.04781084656715393, "step": 216780 }, { "epoch": 0.9307247795437178, "grad_norm": 0.007215921767055988, "learning_rate": 6.957822753809405e-06, "loss": 0.16829094886779786, "step": 216790 }, { "epoch": 0.9307677116337377, "grad_norm": 4.176065921783447, "learning_rate": 6.953511033691781e-06, "loss": 0.22234597206115722, "step": 216800 }, { "epoch": 0.9308106437237578, "grad_norm": 0.19154906272888184, "learning_rate": 6.949199313574158e-06, "loss": 0.12716169357299806, "step": 216810 }, { "epoch": 0.9308535758137778, "grad_norm": 5.86802339553833, "learning_rate": 6.944887593456534e-06, "loss": 0.29127726554870603, "step": 216820 }, { "epoch": 0.9308965079037977, "grad_norm": 3.3478128910064697, "learning_rate": 6.94057587333891e-06, "loss": 0.3179521083831787, "step": 216830 }, { "epoch": 0.9309394399938178, "grad_norm": 0.002998100593686104, "learning_rate": 6.936264153221286e-06, "loss": 0.0034219883382320404, "step": 216840 }, { "epoch": 0.9309823720838378, "grad_norm": 2.541491746902466, "learning_rate": 6.931952433103663e-06, "loss": 0.23211936950683593, "step": 216850 }, { "epoch": 0.9310253041738578, "grad_norm": 0.2962930500507355, "learning_rate": 6.9276407129860385e-06, "loss": 0.11564910411834717, "step": 216860 }, { "epoch": 0.9310682362638778, "grad_norm": 2.9043209552764893, "learning_rate": 6.923328992868415e-06, "loss": 0.15632420778274536, "step": 216870 }, { "epoch": 0.9311111683538978, "grad_norm": 3.168398380279541, "learning_rate": 6.9190172727507915e-06, "loss": 0.11619545221328735, "step": 216880 }, { "epoch": 0.9311541004439178, "grad_norm": 0.0030346859712153673, "learning_rate": 6.914705552633167e-06, "loss": 0.1562952995300293, "step": 216890 }, { "epoch": 0.9311970325339378, "grad_norm": 5.6514692306518555, "learning_rate": 6.910393832515544e-06, "loss": 0.2962817192077637, "step": 216900 }, { "epoch": 0.9312399646239579, "grad_norm": 40.44093322753906, "learning_rate": 6.90608211239792e-06, "loss": 0.14009404182434082, "step": 216910 }, { "epoch": 0.9312828967139778, "grad_norm": 0.12538249790668488, "learning_rate": 6.901770392280296e-06, "loss": 0.09896424412727356, "step": 216920 }, { "epoch": 0.9313258288039978, "grad_norm": 4.35367488861084, "learning_rate": 6.897458672162672e-06, "loss": 0.2702718019485474, "step": 216930 }, { "epoch": 0.9313687608940179, "grad_norm": 0.004443527199327946, "learning_rate": 6.893146952045049e-06, "loss": 0.1984582781791687, "step": 216940 }, { "epoch": 0.9314116929840378, "grad_norm": 0.7379357814788818, "learning_rate": 6.888835231927425e-06, "loss": 0.1050255537033081, "step": 216950 }, { "epoch": 0.9314546250740579, "grad_norm": 1.528487205505371, "learning_rate": 6.884523511809801e-06, "loss": 0.26220476627349854, "step": 216960 }, { "epoch": 0.9314975571640779, "grad_norm": 0.0015041300794109702, "learning_rate": 6.8802117916921774e-06, "loss": 0.19335163831710817, "step": 216970 }, { "epoch": 0.9315404892540978, "grad_norm": 3.583026170730591, "learning_rate": 6.875900071574554e-06, "loss": 0.4297220230102539, "step": 216980 }, { "epoch": 0.9315834213441179, "grad_norm": 2.160015344619751, "learning_rate": 6.871588351456931e-06, "loss": 0.41837215423583984, "step": 216990 }, { "epoch": 0.9316263534341379, "grad_norm": 0.005593061912804842, "learning_rate": 6.867276631339308e-06, "loss": 0.2308417320251465, "step": 217000 }, { "epoch": 0.9316263534341379, "eval_loss": 0.3718287944793701, "eval_runtime": 27.3656, "eval_samples_per_second": 3.654, "eval_steps_per_second": 3.654, "step": 217000 }, { "epoch": 0.9316692855241578, "grad_norm": 0.00907901581376791, "learning_rate": 6.862964911221683e-06, "loss": 0.24105193614959716, "step": 217010 }, { "epoch": 0.9317122176141779, "grad_norm": 0.15819871425628662, "learning_rate": 6.85865319110406e-06, "loss": 0.13366590738296508, "step": 217020 }, { "epoch": 0.9317551497041979, "grad_norm": 1.8451436758041382, "learning_rate": 6.854341470986436e-06, "loss": 0.17509522438049316, "step": 217030 }, { "epoch": 0.9317980817942179, "grad_norm": 1.5129737854003906, "learning_rate": 6.850029750868812e-06, "loss": 0.3355816125869751, "step": 217040 }, { "epoch": 0.9318410138842379, "grad_norm": 3.076338052749634, "learning_rate": 6.8457180307511885e-06, "loss": 0.3039119005203247, "step": 217050 }, { "epoch": 0.9318839459742579, "grad_norm": 0.12309716641902924, "learning_rate": 6.841406310633565e-06, "loss": 0.1814027786254883, "step": 217060 }, { "epoch": 0.9319268780642779, "grad_norm": 0.8530434966087341, "learning_rate": 6.8370945905159415e-06, "loss": 0.2380002737045288, "step": 217070 }, { "epoch": 0.9319698101542979, "grad_norm": 0.05917308107018471, "learning_rate": 6.832782870398317e-06, "loss": 0.06543290615081787, "step": 217080 }, { "epoch": 0.932012742244318, "grad_norm": 0.012692649848759174, "learning_rate": 6.828471150280694e-06, "loss": 0.24887619018554688, "step": 217090 }, { "epoch": 0.9320556743343379, "grad_norm": 0.019568748772144318, "learning_rate": 6.82415943016307e-06, "loss": 0.1854008674621582, "step": 217100 }, { "epoch": 0.9320986064243579, "grad_norm": 0.0013383131008595228, "learning_rate": 6.819847710045446e-06, "loss": 0.32312760353088377, "step": 217110 }, { "epoch": 0.932141538514378, "grad_norm": 0.027715496718883514, "learning_rate": 6.815535989927822e-06, "loss": 0.04742673635482788, "step": 217120 }, { "epoch": 0.9321844706043979, "grad_norm": 0.6883767247200012, "learning_rate": 6.811224269810199e-06, "loss": 0.44977903366088867, "step": 217130 }, { "epoch": 0.932227402694418, "grad_norm": 0.014982739463448524, "learning_rate": 6.8069125496925745e-06, "loss": 0.01991720199584961, "step": 217140 }, { "epoch": 0.932270334784438, "grad_norm": 1.052545428276062, "learning_rate": 6.802600829574951e-06, "loss": 0.3021081447601318, "step": 217150 }, { "epoch": 0.932313266874458, "grad_norm": 0.022616246715188026, "learning_rate": 6.7982891094573275e-06, "loss": 0.1141018033027649, "step": 217160 }, { "epoch": 0.932356198964478, "grad_norm": 0.8831282258033752, "learning_rate": 6.793977389339703e-06, "loss": 0.28502347469329836, "step": 217170 }, { "epoch": 0.932399131054498, "grad_norm": 0.5590686798095703, "learning_rate": 6.78966566922208e-06, "loss": 0.12975300550460817, "step": 217180 }, { "epoch": 0.932442063144518, "grad_norm": 2.844775915145874, "learning_rate": 6.785353949104456e-06, "loss": 0.29583823680877686, "step": 217190 }, { "epoch": 0.932484995234538, "grad_norm": 0.0781247541308403, "learning_rate": 6.781042228986833e-06, "loss": 0.1255470871925354, "step": 217200 }, { "epoch": 0.932527927324558, "grad_norm": 3.2160632610321045, "learning_rate": 6.776730508869208e-06, "loss": 0.1684964656829834, "step": 217210 }, { "epoch": 0.9325708594145781, "grad_norm": 3.4206385612487793, "learning_rate": 6.772418788751585e-06, "loss": 0.37236800193786623, "step": 217220 }, { "epoch": 0.932613791504598, "grad_norm": 0.0038313406985253096, "learning_rate": 6.768107068633961e-06, "loss": 0.17248566150665284, "step": 217230 }, { "epoch": 0.932656723594618, "grad_norm": 0.038741614669561386, "learning_rate": 6.763795348516337e-06, "loss": 0.12119134664535522, "step": 217240 }, { "epoch": 0.9326996556846381, "grad_norm": 6.805099010467529, "learning_rate": 6.759483628398713e-06, "loss": 0.4294279098510742, "step": 217250 }, { "epoch": 0.932742587774658, "grad_norm": 1.4378046989440918, "learning_rate": 6.75517190828109e-06, "loss": 0.3188670873641968, "step": 217260 }, { "epoch": 0.9327855198646781, "grad_norm": 0.9875854849815369, "learning_rate": 6.7508601881634655e-06, "loss": 0.10516676902770997, "step": 217270 }, { "epoch": 0.9328284519546981, "grad_norm": 0.013433823361992836, "learning_rate": 6.746548468045842e-06, "loss": 0.25795345306396483, "step": 217280 }, { "epoch": 0.932871384044718, "grad_norm": 0.016448192298412323, "learning_rate": 6.7422367479282185e-06, "loss": 0.08310087323188782, "step": 217290 }, { "epoch": 0.9329143161347381, "grad_norm": 0.01219375804066658, "learning_rate": 6.737925027810594e-06, "loss": 0.22616422176361084, "step": 217300 }, { "epoch": 0.9329572482247581, "grad_norm": 3.024327278137207, "learning_rate": 6.733613307692971e-06, "loss": 0.13399930000305177, "step": 217310 }, { "epoch": 0.933000180314778, "grad_norm": 2.9267966747283936, "learning_rate": 6.729301587575347e-06, "loss": 0.32572245597839355, "step": 217320 }, { "epoch": 0.9330431124047981, "grad_norm": 0.055614180862903595, "learning_rate": 6.724989867457724e-06, "loss": 0.402554988861084, "step": 217330 }, { "epoch": 0.9330860444948181, "grad_norm": 0.0038208523765206337, "learning_rate": 6.720678147340099e-06, "loss": 0.08428694009780884, "step": 217340 }, { "epoch": 0.9331289765848381, "grad_norm": 0.004386112093925476, "learning_rate": 6.716366427222476e-06, "loss": 0.26659092903137205, "step": 217350 }, { "epoch": 0.9331719086748581, "grad_norm": 0.0014016431523486972, "learning_rate": 6.712054707104853e-06, "loss": 0.006140134483575821, "step": 217360 }, { "epoch": 0.9332148407648782, "grad_norm": 0.3096982538700104, "learning_rate": 6.70774298698723e-06, "loss": 0.11977262496948242, "step": 217370 }, { "epoch": 0.9332577728548981, "grad_norm": 0.05670047178864479, "learning_rate": 6.703431266869606e-06, "loss": 0.22703907489776612, "step": 217380 }, { "epoch": 0.9333007049449181, "grad_norm": 0.04537387937307358, "learning_rate": 6.699119546751982e-06, "loss": 0.07578898072242737, "step": 217390 }, { "epoch": 0.9333436370349382, "grad_norm": 0.06389743089675903, "learning_rate": 6.694807826634358e-06, "loss": 0.010492034256458282, "step": 217400 }, { "epoch": 0.9333865691249581, "grad_norm": 0.02293463796377182, "learning_rate": 6.690496106516735e-06, "loss": 0.438470458984375, "step": 217410 }, { "epoch": 0.9334295012149781, "grad_norm": 0.003789474256336689, "learning_rate": 6.6861843863991104e-06, "loss": 0.23822336196899413, "step": 217420 }, { "epoch": 0.9334724333049982, "grad_norm": 0.2004413604736328, "learning_rate": 6.681872666281487e-06, "loss": 0.12822024822235106, "step": 217430 }, { "epoch": 0.9335153653950181, "grad_norm": 0.3013432025909424, "learning_rate": 6.677560946163863e-06, "loss": 0.16031705141067504, "step": 217440 }, { "epoch": 0.9335582974850382, "grad_norm": 1.5371167659759521, "learning_rate": 6.67324922604624e-06, "loss": 0.17891354560852052, "step": 217450 }, { "epoch": 0.9336012295750582, "grad_norm": 0.011646891944110394, "learning_rate": 6.6689375059286156e-06, "loss": 0.05217450261116028, "step": 217460 }, { "epoch": 0.9336441616650781, "grad_norm": 0.01779901422560215, "learning_rate": 6.664625785810992e-06, "loss": 0.06180897355079651, "step": 217470 }, { "epoch": 0.9336870937550982, "grad_norm": 0.04970623552799225, "learning_rate": 6.6603140656933686e-06, "loss": 0.16953123807907106, "step": 217480 }, { "epoch": 0.9337300258451182, "grad_norm": 3.2656664848327637, "learning_rate": 6.656002345575744e-06, "loss": 0.13938552141189575, "step": 217490 }, { "epoch": 0.9337729579351381, "grad_norm": 0.09889588505029678, "learning_rate": 6.651690625458121e-06, "loss": 0.20841660499572753, "step": 217500 }, { "epoch": 0.9338158900251582, "grad_norm": 3.853638172149658, "learning_rate": 6.647378905340497e-06, "loss": 0.2557401180267334, "step": 217510 }, { "epoch": 0.9338588221151782, "grad_norm": 0.19543249905109406, "learning_rate": 6.643067185222873e-06, "loss": 0.15189332962036134, "step": 217520 }, { "epoch": 0.9339017542051982, "grad_norm": 0.22307856380939484, "learning_rate": 6.638755465105249e-06, "loss": 0.2102879285812378, "step": 217530 }, { "epoch": 0.9339446862952182, "grad_norm": 0.026279602199792862, "learning_rate": 6.634443744987626e-06, "loss": 0.23108108043670655, "step": 217540 }, { "epoch": 0.9339876183852382, "grad_norm": 0.03201638534665108, "learning_rate": 6.6301320248700015e-06, "loss": 0.0656770944595337, "step": 217550 }, { "epoch": 0.9340305504752582, "grad_norm": 0.008588535711169243, "learning_rate": 6.625820304752378e-06, "loss": 0.1322205901145935, "step": 217560 }, { "epoch": 0.9340734825652782, "grad_norm": 0.015069599263370037, "learning_rate": 6.6215085846347545e-06, "loss": 0.14335529804229735, "step": 217570 }, { "epoch": 0.9341164146552983, "grad_norm": 0.47126927971839905, "learning_rate": 6.617196864517131e-06, "loss": 0.18532544374465942, "step": 217580 }, { "epoch": 0.9341593467453183, "grad_norm": 2.1417133808135986, "learning_rate": 6.612885144399507e-06, "loss": 0.14605796337127686, "step": 217590 }, { "epoch": 0.9342022788353382, "grad_norm": 2.728137254714966, "learning_rate": 6.608573424281883e-06, "loss": 0.21810925006866455, "step": 217600 }, { "epoch": 0.9342452109253583, "grad_norm": 8.483184814453125, "learning_rate": 6.60426170416426e-06, "loss": 0.20435698032379152, "step": 217610 }, { "epoch": 0.9342881430153783, "grad_norm": 3.666672468185425, "learning_rate": 6.599949984046635e-06, "loss": 0.027203971147537233, "step": 217620 }, { "epoch": 0.9343310751053983, "grad_norm": 0.0007382782059721649, "learning_rate": 6.595638263929012e-06, "loss": 0.13797013759613036, "step": 217630 }, { "epoch": 0.9343740071954183, "grad_norm": 0.0005268286331556737, "learning_rate": 6.591326543811388e-06, "loss": 0.16217823028564454, "step": 217640 }, { "epoch": 0.9344169392854383, "grad_norm": 0.2898944914340973, "learning_rate": 6.587014823693764e-06, "loss": 0.050319230556488036, "step": 217650 }, { "epoch": 0.9344598713754583, "grad_norm": 0.010052835568785667, "learning_rate": 6.58270310357614e-06, "loss": 0.12320134639739991, "step": 217660 }, { "epoch": 0.9345028034654783, "grad_norm": 0.016634009778499603, "learning_rate": 6.578391383458517e-06, "loss": 0.12773616313934327, "step": 217670 }, { "epoch": 0.9345457355554984, "grad_norm": 0.08195490390062332, "learning_rate": 6.574079663340893e-06, "loss": 0.29748404026031494, "step": 217680 }, { "epoch": 0.9345886676455183, "grad_norm": 0.0030808262526988983, "learning_rate": 6.569767943223269e-06, "loss": 0.2741116762161255, "step": 217690 }, { "epoch": 0.9346315997355383, "grad_norm": 0.03463468328118324, "learning_rate": 6.5654562231056455e-06, "loss": 0.2992996692657471, "step": 217700 }, { "epoch": 0.9346745318255584, "grad_norm": 0.057465892285108566, "learning_rate": 6.561144502988022e-06, "loss": 0.10853420495986939, "step": 217710 }, { "epoch": 0.9347174639155783, "grad_norm": 0.011129839345812798, "learning_rate": 6.556832782870399e-06, "loss": 0.13864043951034546, "step": 217720 }, { "epoch": 0.9347603960055983, "grad_norm": 0.3891150653362274, "learning_rate": 6.552521062752776e-06, "loss": 0.20633718967437745, "step": 217730 }, { "epoch": 0.9348033280956184, "grad_norm": 0.05311836674809456, "learning_rate": 6.5482093426351515e-06, "loss": 0.2821415185928345, "step": 217740 }, { "epoch": 0.9348462601856383, "grad_norm": 1.0979710817337036, "learning_rate": 6.543897622517528e-06, "loss": 0.0567243218421936, "step": 217750 }, { "epoch": 0.9348891922756584, "grad_norm": 5.553037166595459, "learning_rate": 6.5395859023999045e-06, "loss": 0.18742778301239013, "step": 217760 }, { "epoch": 0.9349321243656784, "grad_norm": 0.0048981523141264915, "learning_rate": 6.53527418228228e-06, "loss": 0.06455135345458984, "step": 217770 }, { "epoch": 0.9349750564556983, "grad_norm": 0.0630481168627739, "learning_rate": 6.530962462164657e-06, "loss": 0.14230971336364745, "step": 217780 }, { "epoch": 0.9350179885457184, "grad_norm": 1.3087913990020752, "learning_rate": 6.526650742047033e-06, "loss": 0.24478435516357422, "step": 217790 }, { "epoch": 0.9350609206357384, "grad_norm": 3.037684679031372, "learning_rate": 6.522339021929409e-06, "loss": 0.23640942573547363, "step": 217800 }, { "epoch": 0.9351038527257584, "grad_norm": 0.005896865390241146, "learning_rate": 6.518027301811785e-06, "loss": 0.14557818174362183, "step": 217810 }, { "epoch": 0.9351467848157784, "grad_norm": 0.0034972967114299536, "learning_rate": 6.513715581694162e-06, "loss": 0.27066123485565186, "step": 217820 }, { "epoch": 0.9351897169057984, "grad_norm": 0.002648388734087348, "learning_rate": 6.509403861576538e-06, "loss": 0.280427360534668, "step": 217830 }, { "epoch": 0.9352326489958184, "grad_norm": 2.0864877700805664, "learning_rate": 6.505092141458914e-06, "loss": 0.3587048530578613, "step": 217840 }, { "epoch": 0.9352755810858384, "grad_norm": 0.04708682373166084, "learning_rate": 6.5007804213412904e-06, "loss": 0.07716538310050965, "step": 217850 }, { "epoch": 0.9353185131758585, "grad_norm": 0.012469529174268246, "learning_rate": 6.496468701223667e-06, "loss": 0.17058430910110473, "step": 217860 }, { "epoch": 0.9353614452658784, "grad_norm": 4.872241497039795, "learning_rate": 6.492156981106043e-06, "loss": 0.1434084177017212, "step": 217870 }, { "epoch": 0.9354043773558984, "grad_norm": 0.02215704135596752, "learning_rate": 6.487845260988419e-06, "loss": 0.31536402702331545, "step": 217880 }, { "epoch": 0.9354473094459185, "grad_norm": 0.04922785237431526, "learning_rate": 6.4835335408707956e-06, "loss": 0.29095261096954345, "step": 217890 }, { "epoch": 0.9354902415359384, "grad_norm": 0.4923323094844818, "learning_rate": 6.479221820753171e-06, "loss": 0.08270606994628907, "step": 217900 }, { "epoch": 0.9355331736259584, "grad_norm": 0.005486680194735527, "learning_rate": 6.474910100635548e-06, "loss": 0.14887460470199584, "step": 217910 }, { "epoch": 0.9355761057159785, "grad_norm": 3.3651132583618164, "learning_rate": 6.470598380517924e-06, "loss": 0.24405162334442138, "step": 217920 }, { "epoch": 0.9356190378059984, "grad_norm": 3.6582298278808594, "learning_rate": 6.466286660400301e-06, "loss": 0.18124510049819947, "step": 217930 }, { "epoch": 0.9356619698960185, "grad_norm": 2.6301400661468506, "learning_rate": 6.461974940282676e-06, "loss": 0.24226765632629393, "step": 217940 }, { "epoch": 0.9357049019860385, "grad_norm": 3.168323278427124, "learning_rate": 6.457663220165053e-06, "loss": 0.13127228021621704, "step": 217950 }, { "epoch": 0.9357478340760584, "grad_norm": 0.9777452349662781, "learning_rate": 6.453351500047429e-06, "loss": 0.24676942825317383, "step": 217960 }, { "epoch": 0.9357907661660785, "grad_norm": 0.0015889370115473866, "learning_rate": 6.449039779929805e-06, "loss": 0.16644636392593384, "step": 217970 }, { "epoch": 0.9358336982560985, "grad_norm": 0.017067328095436096, "learning_rate": 6.4447280598121815e-06, "loss": 0.14611515998840333, "step": 217980 }, { "epoch": 0.9358766303461185, "grad_norm": 0.0011988900369033217, "learning_rate": 6.440416339694558e-06, "loss": 0.022569629549980163, "step": 217990 }, { "epoch": 0.9359195624361385, "grad_norm": 1.1535048484802246, "learning_rate": 6.436104619576934e-06, "loss": 0.13392226696014403, "step": 218000 }, { "epoch": 0.9359195624361385, "eval_loss": 0.37059035897254944, "eval_runtime": 27.4273, "eval_samples_per_second": 3.646, "eval_steps_per_second": 3.646, "step": 218000 }, { "epoch": 0.9359624945261585, "grad_norm": 0.2708398699760437, "learning_rate": 6.43179289945931e-06, "loss": 0.11112178564071655, "step": 218010 }, { "epoch": 0.9360054266161786, "grad_norm": 0.07791769504547119, "learning_rate": 6.427481179341687e-06, "loss": 0.01833338290452957, "step": 218020 }, { "epoch": 0.9360483587061985, "grad_norm": 0.03304653614759445, "learning_rate": 6.423169459224062e-06, "loss": 0.20075433254241942, "step": 218030 }, { "epoch": 0.9360912907962186, "grad_norm": 2.0760416984558105, "learning_rate": 6.418857739106439e-06, "loss": 0.3484358549118042, "step": 218040 }, { "epoch": 0.9361342228862386, "grad_norm": 0.0009414487867616117, "learning_rate": 6.414546018988815e-06, "loss": 0.08126358389854431, "step": 218050 }, { "epoch": 0.9361771549762585, "grad_norm": 0.0022511922288686037, "learning_rate": 6.410234298871192e-06, "loss": 0.21538751125335692, "step": 218060 }, { "epoch": 0.9362200870662786, "grad_norm": 6.507908821105957, "learning_rate": 6.405922578753567e-06, "loss": 0.12897765636444092, "step": 218070 }, { "epoch": 0.9362630191562986, "grad_norm": 0.005039474926888943, "learning_rate": 6.401610858635944e-06, "loss": 0.26041815280914304, "step": 218080 }, { "epoch": 0.9363059512463185, "grad_norm": 0.013466509990394115, "learning_rate": 6.397299138518321e-06, "loss": 0.1639467239379883, "step": 218090 }, { "epoch": 0.9363488833363386, "grad_norm": 0.13806986808776855, "learning_rate": 6.392987418400698e-06, "loss": 0.15577703714370728, "step": 218100 }, { "epoch": 0.9363918154263586, "grad_norm": 0.0029524166602641344, "learning_rate": 6.388675698283074e-06, "loss": 0.2313316822052002, "step": 218110 }, { "epoch": 0.9364347475163786, "grad_norm": 0.38946065306663513, "learning_rate": 6.38436397816545e-06, "loss": 0.048220235109329226, "step": 218120 }, { "epoch": 0.9364776796063986, "grad_norm": 0.1648869812488556, "learning_rate": 6.380052258047826e-06, "loss": 0.14529719352722167, "step": 218130 }, { "epoch": 0.9365206116964186, "grad_norm": 0.9979525208473206, "learning_rate": 6.375740537930203e-06, "loss": 0.3048782587051392, "step": 218140 }, { "epoch": 0.9365635437864386, "grad_norm": 16.755258560180664, "learning_rate": 6.3714288178125785e-06, "loss": 0.2275531768798828, "step": 218150 }, { "epoch": 0.9366064758764586, "grad_norm": 0.02060013823211193, "learning_rate": 6.367117097694955e-06, "loss": 0.19351630210876464, "step": 218160 }, { "epoch": 0.9366494079664787, "grad_norm": 0.009929434396326542, "learning_rate": 6.3628053775773315e-06, "loss": 0.046320590376853946, "step": 218170 }, { "epoch": 0.9366923400564986, "grad_norm": 1.319644570350647, "learning_rate": 6.358493657459708e-06, "loss": 0.15600812435150146, "step": 218180 }, { "epoch": 0.9367352721465186, "grad_norm": 0.17497789859771729, "learning_rate": 6.354181937342084e-06, "loss": 0.21659915447235106, "step": 218190 }, { "epoch": 0.9367782042365387, "grad_norm": 3.353944778442383, "learning_rate": 6.34987021722446e-06, "loss": 0.1580977439880371, "step": 218200 }, { "epoch": 0.9368211363265586, "grad_norm": 1.8000859022140503, "learning_rate": 6.345558497106837e-06, "loss": 0.15619392395019532, "step": 218210 }, { "epoch": 0.9368640684165787, "grad_norm": 0.007903819903731346, "learning_rate": 6.341246776989212e-06, "loss": 0.37516734600067136, "step": 218220 }, { "epoch": 0.9369070005065987, "grad_norm": 2.224149227142334, "learning_rate": 6.336935056871589e-06, "loss": 0.07544257044792176, "step": 218230 }, { "epoch": 0.9369499325966186, "grad_norm": 0.026319283992052078, "learning_rate": 6.332623336753965e-06, "loss": 0.0577617883682251, "step": 218240 }, { "epoch": 0.9369928646866387, "grad_norm": 0.0009843171574175358, "learning_rate": 6.328311616636341e-06, "loss": 0.2975840330123901, "step": 218250 }, { "epoch": 0.9370357967766587, "grad_norm": 0.189020037651062, "learning_rate": 6.3239998965187174e-06, "loss": 0.38624231815338134, "step": 218260 }, { "epoch": 0.9370787288666786, "grad_norm": 0.8112937808036804, "learning_rate": 6.319688176401094e-06, "loss": 0.2941300392150879, "step": 218270 }, { "epoch": 0.9371216609566987, "grad_norm": 1.3181637525558472, "learning_rate": 6.31537645628347e-06, "loss": 0.49183197021484376, "step": 218280 }, { "epoch": 0.9371645930467187, "grad_norm": 1.5222159624099731, "learning_rate": 6.311064736165846e-06, "loss": 0.16584717035293578, "step": 218290 }, { "epoch": 0.9372075251367387, "grad_norm": 1.103300929069519, "learning_rate": 6.306753016048223e-06, "loss": 0.1373578667640686, "step": 218300 }, { "epoch": 0.9372504572267587, "grad_norm": 0.04471690580248833, "learning_rate": 6.302441295930599e-06, "loss": 0.3182393789291382, "step": 218310 }, { "epoch": 0.9372933893167787, "grad_norm": 0.02034154161810875, "learning_rate": 6.298129575812975e-06, "loss": 0.09224039316177368, "step": 218320 }, { "epoch": 0.9373363214067987, "grad_norm": 15.507043838500977, "learning_rate": 6.293817855695351e-06, "loss": 0.44158592224121096, "step": 218330 }, { "epoch": 0.9373792534968187, "grad_norm": 0.013069471344351768, "learning_rate": 6.289506135577728e-06, "loss": 0.15449692010879518, "step": 218340 }, { "epoch": 0.9374221855868388, "grad_norm": 0.2247493863105774, "learning_rate": 6.285194415460103e-06, "loss": 0.2751898288726807, "step": 218350 }, { "epoch": 0.9374651176768587, "grad_norm": 0.8723449110984802, "learning_rate": 6.28088269534248e-06, "loss": 0.17945202589035034, "step": 218360 }, { "epoch": 0.9375080497668787, "grad_norm": 0.0049847024492919445, "learning_rate": 6.276570975224856e-06, "loss": 0.1884321689605713, "step": 218370 }, { "epoch": 0.9375509818568988, "grad_norm": 0.017331691458821297, "learning_rate": 6.272259255107232e-06, "loss": 0.029934722185134887, "step": 218380 }, { "epoch": 0.9375939139469187, "grad_norm": 3.178102970123291, "learning_rate": 6.2679475349896085e-06, "loss": 0.1653683066368103, "step": 218390 }, { "epoch": 0.9376368460369388, "grad_norm": 0.003745247842743993, "learning_rate": 6.263635814871985e-06, "loss": 0.14961315393447877, "step": 218400 }, { "epoch": 0.9376797781269588, "grad_norm": 0.30254513025283813, "learning_rate": 6.259324094754361e-06, "loss": 0.11315090656280517, "step": 218410 }, { "epoch": 0.9377227102169787, "grad_norm": 0.0023749074898660183, "learning_rate": 6.255012374636737e-06, "loss": 0.2886801242828369, "step": 218420 }, { "epoch": 0.9377656423069988, "grad_norm": 0.0015023527666926384, "learning_rate": 6.250700654519114e-06, "loss": 0.17715065479278563, "step": 218430 }, { "epoch": 0.9378085743970188, "grad_norm": 0.017936963587999344, "learning_rate": 6.24638893440149e-06, "loss": 0.32670762538909914, "step": 218440 }, { "epoch": 0.9378515064870389, "grad_norm": 2.004991292953491, "learning_rate": 6.242077214283867e-06, "loss": 0.2062148094177246, "step": 218450 }, { "epoch": 0.9378944385770588, "grad_norm": 0.18925458192825317, "learning_rate": 6.237765494166243e-06, "loss": 0.08423059582710266, "step": 218460 }, { "epoch": 0.9379373706670788, "grad_norm": 6.76282262802124, "learning_rate": 6.233453774048619e-06, "loss": 0.1460658073425293, "step": 218470 }, { "epoch": 0.9379803027570989, "grad_norm": 1.7375794649124146, "learning_rate": 6.229142053930995e-06, "loss": 0.2405299186706543, "step": 218480 }, { "epoch": 0.9380232348471188, "grad_norm": 0.9361600279808044, "learning_rate": 6.224830333813372e-06, "loss": 0.31464076042175293, "step": 218490 }, { "epoch": 0.9380661669371388, "grad_norm": 1.7604894638061523, "learning_rate": 6.220518613695748e-06, "loss": 0.16823394298553468, "step": 218500 }, { "epoch": 0.9381090990271589, "grad_norm": 0.03205284848809242, "learning_rate": 6.216206893578124e-06, "loss": 0.17398911714553833, "step": 218510 }, { "epoch": 0.9381520311171788, "grad_norm": 0.051615796983242035, "learning_rate": 6.2118951734605e-06, "loss": 0.1916268587112427, "step": 218520 }, { "epoch": 0.9381949632071989, "grad_norm": 1.9579862356185913, "learning_rate": 6.207583453342877e-06, "loss": 0.16121180057525636, "step": 218530 }, { "epoch": 0.9382378952972189, "grad_norm": 1.5483946800231934, "learning_rate": 6.203271733225253e-06, "loss": 0.06491850018501281, "step": 218540 }, { "epoch": 0.9382808273872388, "grad_norm": 0.05101202428340912, "learning_rate": 6.19896001310763e-06, "loss": 0.08455324172973633, "step": 218550 }, { "epoch": 0.9383237594772589, "grad_norm": 0.040585048496723175, "learning_rate": 6.194648292990006e-06, "loss": 0.1799705743789673, "step": 218560 }, { "epoch": 0.9383666915672789, "grad_norm": 0.016378004103899002, "learning_rate": 6.190336572872382e-06, "loss": 0.134379780292511, "step": 218570 }, { "epoch": 0.9384096236572989, "grad_norm": 3.475637674331665, "learning_rate": 6.1860248527547585e-06, "loss": 0.15294344425201417, "step": 218580 }, { "epoch": 0.9384525557473189, "grad_norm": 0.05142517387866974, "learning_rate": 6.181713132637135e-06, "loss": 0.13303122520446778, "step": 218590 }, { "epoch": 0.9384954878373389, "grad_norm": 1.9796829223632812, "learning_rate": 6.177401412519511e-06, "loss": 0.3248765468597412, "step": 218600 }, { "epoch": 0.9385384199273589, "grad_norm": 0.013521007262170315, "learning_rate": 6.173089692401887e-06, "loss": 0.24125297069549562, "step": 218610 }, { "epoch": 0.9385813520173789, "grad_norm": 0.02028888463973999, "learning_rate": 6.168777972284264e-06, "loss": 0.10118316411972046, "step": 218620 }, { "epoch": 0.938624284107399, "grad_norm": 4.58914852142334, "learning_rate": 6.164466252166639e-06, "loss": 0.1950531005859375, "step": 218630 }, { "epoch": 0.9386672161974189, "grad_norm": 1.3460999727249146, "learning_rate": 6.160154532049016e-06, "loss": 0.19600038528442382, "step": 218640 }, { "epoch": 0.9387101482874389, "grad_norm": 0.016167912632226944, "learning_rate": 6.155842811931392e-06, "loss": 0.0983378529548645, "step": 218650 }, { "epoch": 0.938753080377459, "grad_norm": 1.4347484111785889, "learning_rate": 6.151531091813768e-06, "loss": 0.3592548370361328, "step": 218660 }, { "epoch": 0.9387960124674789, "grad_norm": 9.225719451904297, "learning_rate": 6.1472193716961445e-06, "loss": 0.2684290409088135, "step": 218670 }, { "epoch": 0.9388389445574989, "grad_norm": 0.01551085989922285, "learning_rate": 6.142907651578521e-06, "loss": 0.17376840114593506, "step": 218680 }, { "epoch": 0.938881876647519, "grad_norm": 0.1393050104379654, "learning_rate": 6.1385959314608975e-06, "loss": 0.3194669246673584, "step": 218690 }, { "epoch": 0.9389248087375389, "grad_norm": 3.2130279541015625, "learning_rate": 6.134284211343273e-06, "loss": 0.3998244047164917, "step": 218700 }, { "epoch": 0.938967740827559, "grad_norm": 14.124472618103027, "learning_rate": 6.12997249122565e-06, "loss": 0.11968344449996948, "step": 218710 }, { "epoch": 0.939010672917579, "grad_norm": 0.0034347360488027334, "learning_rate": 6.125660771108026e-06, "loss": 0.1548219919204712, "step": 218720 }, { "epoch": 0.9390536050075989, "grad_norm": 3.031308174133301, "learning_rate": 6.121349050990403e-06, "loss": 0.2987856388092041, "step": 218730 }, { "epoch": 0.939096537097619, "grad_norm": 0.6183398365974426, "learning_rate": 6.117037330872779e-06, "loss": 0.12947933673858641, "step": 218740 }, { "epoch": 0.939139469187639, "grad_norm": 1.35626220703125, "learning_rate": 6.112725610755156e-06, "loss": 0.2367931127548218, "step": 218750 }, { "epoch": 0.939182401277659, "grad_norm": 0.009454138576984406, "learning_rate": 6.108413890637531e-06, "loss": 0.2076733112335205, "step": 218760 }, { "epoch": 0.939225333367679, "grad_norm": 0.2160428762435913, "learning_rate": 6.104102170519908e-06, "loss": 0.010231452435255051, "step": 218770 }, { "epoch": 0.939268265457699, "grad_norm": 2.0446693897247314, "learning_rate": 6.099790450402284e-06, "loss": 0.34381661415100095, "step": 218780 }, { "epoch": 0.939311197547719, "grad_norm": 1.378746509552002, "learning_rate": 6.09547873028466e-06, "loss": 0.18798928260803222, "step": 218790 }, { "epoch": 0.939354129637739, "grad_norm": 1.4301226139068604, "learning_rate": 6.091167010167036e-06, "loss": 0.0660437822341919, "step": 218800 }, { "epoch": 0.939397061727759, "grad_norm": 0.016804933547973633, "learning_rate": 6.086855290049413e-06, "loss": 0.3777721643447876, "step": 218810 }, { "epoch": 0.939439993817779, "grad_norm": 1.1796166896820068, "learning_rate": 6.0825435699317885e-06, "loss": 0.1267245888710022, "step": 218820 }, { "epoch": 0.939482925907799, "grad_norm": 0.44042861461639404, "learning_rate": 6.078231849814165e-06, "loss": 0.1943804383277893, "step": 218830 }, { "epoch": 0.9395258579978191, "grad_norm": 1.6625487804412842, "learning_rate": 6.0739201296965415e-06, "loss": 0.25074880123138427, "step": 218840 }, { "epoch": 0.939568790087839, "grad_norm": 0.15306060016155243, "learning_rate": 6.069608409578917e-06, "loss": 0.41699953079223634, "step": 218850 }, { "epoch": 0.939611722177859, "grad_norm": 1.8667495250701904, "learning_rate": 6.065296689461294e-06, "loss": 0.3992176532745361, "step": 218860 }, { "epoch": 0.9396546542678791, "grad_norm": 0.0020866908598691225, "learning_rate": 6.06098496934367e-06, "loss": 0.14581599235534667, "step": 218870 }, { "epoch": 0.9396975863578991, "grad_norm": 0.02238316461443901, "learning_rate": 6.056673249226047e-06, "loss": 0.33009798526763917, "step": 218880 }, { "epoch": 0.9397405184479191, "grad_norm": 2.0602495670318604, "learning_rate": 6.052361529108422e-06, "loss": 0.12349958419799804, "step": 218890 }, { "epoch": 0.9397834505379391, "grad_norm": 0.0704239159822464, "learning_rate": 6.048049808990799e-06, "loss": 0.2625364542007446, "step": 218900 }, { "epoch": 0.9398263826279591, "grad_norm": 0.006151742767542601, "learning_rate": 6.043738088873175e-06, "loss": 0.28477323055267334, "step": 218910 }, { "epoch": 0.9398693147179791, "grad_norm": 1.7099026441574097, "learning_rate": 6.039426368755552e-06, "loss": 0.17000820636749267, "step": 218920 }, { "epoch": 0.9399122468079991, "grad_norm": 0.0685584619641304, "learning_rate": 6.035114648637928e-06, "loss": 0.2855337142944336, "step": 218930 }, { "epoch": 0.9399551788980192, "grad_norm": 0.1191517561674118, "learning_rate": 6.030802928520305e-06, "loss": 0.13334267139434813, "step": 218940 }, { "epoch": 0.9399981109880391, "grad_norm": 0.5394452214241028, "learning_rate": 6.02649120840268e-06, "loss": 0.3404886722564697, "step": 218950 }, { "epoch": 0.9400410430780591, "grad_norm": 0.04119795933365822, "learning_rate": 6.022179488285057e-06, "loss": 0.044364386796951295, "step": 218960 }, { "epoch": 0.9400839751680792, "grad_norm": 0.1636960208415985, "learning_rate": 6.017867768167433e-06, "loss": 0.1932442307472229, "step": 218970 }, { "epoch": 0.9401269072580991, "grad_norm": 0.033029649406671524, "learning_rate": 6.013556048049809e-06, "loss": 0.3541710376739502, "step": 218980 }, { "epoch": 0.9401698393481192, "grad_norm": 0.0074260905385017395, "learning_rate": 6.0092443279321856e-06, "loss": 0.07902343273162842, "step": 218990 }, { "epoch": 0.9402127714381392, "grad_norm": 5.728902816772461, "learning_rate": 6.004932607814562e-06, "loss": 0.2292637825012207, "step": 219000 }, { "epoch": 0.9402127714381392, "eval_loss": 0.3688412010669708, "eval_runtime": 27.5737, "eval_samples_per_second": 3.627, "eval_steps_per_second": 3.627, "step": 219000 }, { "epoch": 0.9402557035281591, "grad_norm": 1.7974296808242798, "learning_rate": 6.000620887696938e-06, "loss": 0.25763325691223143, "step": 219010 }, { "epoch": 0.9402986356181792, "grad_norm": 0.029638810083270073, "learning_rate": 5.996309167579314e-06, "loss": 0.15889897346496581, "step": 219020 }, { "epoch": 0.9403415677081992, "grad_norm": 0.028355387970805168, "learning_rate": 5.991997447461691e-06, "loss": 0.19087129831314087, "step": 219030 }, { "epoch": 0.9403844997982191, "grad_norm": 0.08307473361492157, "learning_rate": 5.987685727344066e-06, "loss": 0.11460771560668945, "step": 219040 }, { "epoch": 0.9404274318882392, "grad_norm": 0.028220640495419502, "learning_rate": 5.983374007226443e-06, "loss": 0.14903123378753663, "step": 219050 }, { "epoch": 0.9404703639782592, "grad_norm": 0.7593523859977722, "learning_rate": 5.979062287108819e-06, "loss": 0.23282818794250487, "step": 219060 }, { "epoch": 0.9405132960682792, "grad_norm": 0.014977425336837769, "learning_rate": 5.974750566991196e-06, "loss": 0.04835158586502075, "step": 219070 }, { "epoch": 0.9405562281582992, "grad_norm": 0.0452410951256752, "learning_rate": 5.9704388468735715e-06, "loss": 0.15343317985534669, "step": 219080 }, { "epoch": 0.9405991602483192, "grad_norm": 2.069753408432007, "learning_rate": 5.966127126755949e-06, "loss": 0.11665205955505371, "step": 219090 }, { "epoch": 0.9406420923383392, "grad_norm": 0.006946403067559004, "learning_rate": 5.9618154066383245e-06, "loss": 0.1817198634147644, "step": 219100 }, { "epoch": 0.9406850244283592, "grad_norm": 0.010719393379986286, "learning_rate": 5.957503686520701e-06, "loss": 0.22847046852111816, "step": 219110 }, { "epoch": 0.9407279565183793, "grad_norm": 0.008265381678938866, "learning_rate": 5.9531919664030775e-06, "loss": 0.3102251052856445, "step": 219120 }, { "epoch": 0.9407708886083992, "grad_norm": 1.2220699787139893, "learning_rate": 5.948880246285454e-06, "loss": 0.12826883792877197, "step": 219130 }, { "epoch": 0.9408138206984192, "grad_norm": 7.854069232940674, "learning_rate": 5.94456852616783e-06, "loss": 0.25547878742218016, "step": 219140 }, { "epoch": 0.9408567527884393, "grad_norm": 0.1460963934659958, "learning_rate": 5.940256806050206e-06, "loss": 0.10857096910476685, "step": 219150 }, { "epoch": 0.9408996848784592, "grad_norm": 0.013315374962985516, "learning_rate": 5.935945085932583e-06, "loss": 0.23141663074493407, "step": 219160 }, { "epoch": 0.9409426169684793, "grad_norm": 0.04174807667732239, "learning_rate": 5.931633365814958e-06, "loss": 0.15095480680465698, "step": 219170 }, { "epoch": 0.9409855490584993, "grad_norm": 2.723499059677124, "learning_rate": 5.927321645697335e-06, "loss": 0.0929310142993927, "step": 219180 }, { "epoch": 0.9410284811485192, "grad_norm": 0.0006184586673043668, "learning_rate": 5.923009925579711e-06, "loss": 0.15547598600387574, "step": 219190 }, { "epoch": 0.9410714132385393, "grad_norm": 0.8751519322395325, "learning_rate": 5.918698205462087e-06, "loss": 0.4736207962036133, "step": 219200 }, { "epoch": 0.9411143453285593, "grad_norm": 0.007214980665594339, "learning_rate": 5.914386485344463e-06, "loss": 0.09388966560363769, "step": 219210 }, { "epoch": 0.9411572774185792, "grad_norm": 0.007233398500829935, "learning_rate": 5.91007476522684e-06, "loss": 0.1745733141899109, "step": 219220 }, { "epoch": 0.9412002095085993, "grad_norm": 2.5610218048095703, "learning_rate": 5.9057630451092155e-06, "loss": 0.28267683982849123, "step": 219230 }, { "epoch": 0.9412431415986193, "grad_norm": 0.031056322157382965, "learning_rate": 5.901451324991592e-06, "loss": 0.09716549515724182, "step": 219240 }, { "epoch": 0.9412860736886393, "grad_norm": 0.07894471287727356, "learning_rate": 5.8971396048739685e-06, "loss": 0.02060786187648773, "step": 219250 }, { "epoch": 0.9413290057786593, "grad_norm": 0.019434994086623192, "learning_rate": 5.892827884756345e-06, "loss": 0.10689753293991089, "step": 219260 }, { "epoch": 0.9413719378686793, "grad_norm": 0.003029686165973544, "learning_rate": 5.8885161646387215e-06, "loss": 0.19583780765533448, "step": 219270 }, { "epoch": 0.9414148699586993, "grad_norm": 0.08387192338705063, "learning_rate": 5.884204444521098e-06, "loss": 0.0790525197982788, "step": 219280 }, { "epoch": 0.9414578020487193, "grad_norm": 1.89656662940979, "learning_rate": 5.879892724403474e-06, "loss": 0.19452766180038453, "step": 219290 }, { "epoch": 0.9415007341387394, "grad_norm": 0.09110067039728165, "learning_rate": 5.87558100428585e-06, "loss": 0.19205337762832642, "step": 219300 }, { "epoch": 0.9415436662287594, "grad_norm": 0.6194562911987305, "learning_rate": 5.871269284168227e-06, "loss": 0.1736610174179077, "step": 219310 }, { "epoch": 0.9415865983187793, "grad_norm": 0.30323392152786255, "learning_rate": 5.866957564050603e-06, "loss": 0.2051142454147339, "step": 219320 }, { "epoch": 0.9416295304087994, "grad_norm": 0.006133268587291241, "learning_rate": 5.862645843932979e-06, "loss": 0.24879255294799804, "step": 219330 }, { "epoch": 0.9416724624988194, "grad_norm": 0.033949390053749084, "learning_rate": 5.858334123815355e-06, "loss": 0.17589467763900757, "step": 219340 }, { "epoch": 0.9417153945888393, "grad_norm": 0.23580507934093475, "learning_rate": 5.854022403697732e-06, "loss": 0.1860203981399536, "step": 219350 }, { "epoch": 0.9417583266788594, "grad_norm": 0.953391969203949, "learning_rate": 5.8497106835801074e-06, "loss": 0.31667141914367675, "step": 219360 }, { "epoch": 0.9418012587688794, "grad_norm": 0.9369455575942993, "learning_rate": 5.845398963462484e-06, "loss": 0.08720980882644654, "step": 219370 }, { "epoch": 0.9418441908588994, "grad_norm": 0.8098713755607605, "learning_rate": 5.8410872433448604e-06, "loss": 0.23913743495941162, "step": 219380 }, { "epoch": 0.9418871229489194, "grad_norm": 1.5632972717285156, "learning_rate": 5.836775523227236e-06, "loss": 0.24591350555419922, "step": 219390 }, { "epoch": 0.9419300550389395, "grad_norm": 0.00022845991770736873, "learning_rate": 5.8324638031096126e-06, "loss": 0.11978145837783813, "step": 219400 }, { "epoch": 0.9419729871289594, "grad_norm": 0.6672672629356384, "learning_rate": 5.828152082991989e-06, "loss": 0.11547610759735108, "step": 219410 }, { "epoch": 0.9420159192189794, "grad_norm": 0.001994553254917264, "learning_rate": 5.823840362874365e-06, "loss": 0.2555203914642334, "step": 219420 }, { "epoch": 0.9420588513089995, "grad_norm": 0.18988437950611115, "learning_rate": 5.819528642756741e-06, "loss": 0.026738014817237855, "step": 219430 }, { "epoch": 0.9421017833990194, "grad_norm": 1.5366977453231812, "learning_rate": 5.815216922639118e-06, "loss": 0.2509934425354004, "step": 219440 }, { "epoch": 0.9421447154890394, "grad_norm": 0.04574510082602501, "learning_rate": 5.810905202521494e-06, "loss": 0.04108910858631134, "step": 219450 }, { "epoch": 0.9421876475790595, "grad_norm": 0.04924973472952843, "learning_rate": 5.806593482403871e-06, "loss": 0.4201669216156006, "step": 219460 }, { "epoch": 0.9422305796690794, "grad_norm": 0.005704942625015974, "learning_rate": 5.802281762286247e-06, "loss": 0.1917565107345581, "step": 219470 }, { "epoch": 0.9422735117590995, "grad_norm": 0.005896027199923992, "learning_rate": 5.797970042168623e-06, "loss": 0.154584801197052, "step": 219480 }, { "epoch": 0.9423164438491195, "grad_norm": 0.0035282974131405354, "learning_rate": 5.793658322050999e-06, "loss": 0.17236467599868774, "step": 219490 }, { "epoch": 0.9423593759391394, "grad_norm": 2.3953702449798584, "learning_rate": 5.789346601933376e-06, "loss": 0.4158484935760498, "step": 219500 }, { "epoch": 0.9424023080291595, "grad_norm": 0.0037427491042762995, "learning_rate": 5.785034881815752e-06, "loss": 0.14870126247406007, "step": 219510 }, { "epoch": 0.9424452401191795, "grad_norm": 0.0012468050699681044, "learning_rate": 5.780723161698128e-06, "loss": 0.11983139514923095, "step": 219520 }, { "epoch": 0.9424881722091994, "grad_norm": 0.008860177360475063, "learning_rate": 5.7764114415805045e-06, "loss": 0.2827944278717041, "step": 219530 }, { "epoch": 0.9425311042992195, "grad_norm": 0.1442103087902069, "learning_rate": 5.772099721462881e-06, "loss": 0.3095766305923462, "step": 219540 }, { "epoch": 0.9425740363892395, "grad_norm": 6.474790096282959, "learning_rate": 5.767788001345257e-06, "loss": 0.34821128845214844, "step": 219550 }, { "epoch": 0.9426169684792595, "grad_norm": 0.05650155246257782, "learning_rate": 5.763476281227633e-06, "loss": 0.08767632246017457, "step": 219560 }, { "epoch": 0.9426599005692795, "grad_norm": 0.001218812307342887, "learning_rate": 5.75916456111001e-06, "loss": 0.26210243701934816, "step": 219570 }, { "epoch": 0.9427028326592995, "grad_norm": 1.0307174921035767, "learning_rate": 5.754852840992385e-06, "loss": 0.20877382755279542, "step": 219580 }, { "epoch": 0.9427457647493195, "grad_norm": 1.477104663848877, "learning_rate": 5.750541120874762e-06, "loss": 0.1944859504699707, "step": 219590 }, { "epoch": 0.9427886968393395, "grad_norm": 0.008503386750817299, "learning_rate": 5.746229400757138e-06, "loss": 0.1722312092781067, "step": 219600 }, { "epoch": 0.9428316289293596, "grad_norm": 0.2559939920902252, "learning_rate": 5.741917680639514e-06, "loss": 0.2364104986190796, "step": 219610 }, { "epoch": 0.9428745610193795, "grad_norm": 1.0935306549072266, "learning_rate": 5.73760596052189e-06, "loss": 0.18122740983963012, "step": 219620 }, { "epoch": 0.9429174931093995, "grad_norm": 0.06634160131216049, "learning_rate": 5.733294240404268e-06, "loss": 0.03658463954925537, "step": 219630 }, { "epoch": 0.9429604251994196, "grad_norm": 0.009046138264238834, "learning_rate": 5.728982520286643e-06, "loss": 0.09423674941062928, "step": 219640 }, { "epoch": 0.9430033572894395, "grad_norm": 1.5938408374786377, "learning_rate": 5.72467080016902e-06, "loss": 0.16401240825653077, "step": 219650 }, { "epoch": 0.9430462893794596, "grad_norm": 0.20236362516880035, "learning_rate": 5.720359080051396e-06, "loss": 0.2952629327774048, "step": 219660 }, { "epoch": 0.9430892214694796, "grad_norm": 0.6822418570518494, "learning_rate": 5.716047359933772e-06, "loss": 0.19778774976730346, "step": 219670 }, { "epoch": 0.9431321535594995, "grad_norm": 0.00200895918533206, "learning_rate": 5.7117356398161485e-06, "loss": 0.10837646722793579, "step": 219680 }, { "epoch": 0.9431750856495196, "grad_norm": 0.0010093733435496688, "learning_rate": 5.707423919698525e-06, "loss": 0.16900849342346191, "step": 219690 }, { "epoch": 0.9432180177395396, "grad_norm": 1.047550916671753, "learning_rate": 5.7031121995809015e-06, "loss": 0.16191253662109376, "step": 219700 }, { "epoch": 0.9432609498295595, "grad_norm": 1.4691581726074219, "learning_rate": 5.698800479463277e-06, "loss": 0.27159914970397947, "step": 219710 }, { "epoch": 0.9433038819195796, "grad_norm": 0.006137867458164692, "learning_rate": 5.694488759345654e-06, "loss": 0.12011933326721191, "step": 219720 }, { "epoch": 0.9433468140095996, "grad_norm": 3.10224986076355, "learning_rate": 5.69017703922803e-06, "loss": 0.12906384468078613, "step": 219730 }, { "epoch": 0.9433897460996197, "grad_norm": 1.1125973463058472, "learning_rate": 5.685865319110406e-06, "loss": 0.11585037708282471, "step": 219740 }, { "epoch": 0.9434326781896396, "grad_norm": 1.7985775470733643, "learning_rate": 5.681553598992782e-06, "loss": 0.15065838098526002, "step": 219750 }, { "epoch": 0.9434756102796596, "grad_norm": 1.7841936349868774, "learning_rate": 5.677241878875159e-06, "loss": 0.29679064750671386, "step": 219760 }, { "epoch": 0.9435185423696797, "grad_norm": 0.0036705408710986376, "learning_rate": 5.6729301587575344e-06, "loss": 0.08027640581130982, "step": 219770 }, { "epoch": 0.9435614744596996, "grad_norm": 0.0015720472438260913, "learning_rate": 5.668618438639911e-06, "loss": 0.13819996118545533, "step": 219780 }, { "epoch": 0.9436044065497197, "grad_norm": 0.1628006100654602, "learning_rate": 5.6643067185222874e-06, "loss": 0.13903387784957885, "step": 219790 }, { "epoch": 0.9436473386397397, "grad_norm": 0.23969070613384247, "learning_rate": 5.659994998404664e-06, "loss": 0.07040133476257324, "step": 219800 }, { "epoch": 0.9436902707297596, "grad_norm": 5.787487030029297, "learning_rate": 5.6556832782870404e-06, "loss": 0.36126892566680907, "step": 219810 }, { "epoch": 0.9437332028197797, "grad_norm": 0.8496485352516174, "learning_rate": 5.651371558169417e-06, "loss": 0.3064922332763672, "step": 219820 }, { "epoch": 0.9437761349097997, "grad_norm": 2.6420037746429443, "learning_rate": 5.6470598380517926e-06, "loss": 0.23954577445983888, "step": 219830 }, { "epoch": 0.9438190669998197, "grad_norm": 1.964925765991211, "learning_rate": 5.642748117934169e-06, "loss": 0.47455859184265137, "step": 219840 }, { "epoch": 0.9438619990898397, "grad_norm": 0.029651742428541183, "learning_rate": 5.6384363978165456e-06, "loss": 0.178091561794281, "step": 219850 }, { "epoch": 0.9439049311798597, "grad_norm": 0.015751611441373825, "learning_rate": 5.634124677698921e-06, "loss": 0.17995089292526245, "step": 219860 }, { "epoch": 0.9439478632698797, "grad_norm": 0.011860656552016735, "learning_rate": 5.629812957581298e-06, "loss": 0.15687016248703003, "step": 219870 }, { "epoch": 0.9439907953598997, "grad_norm": 0.003596897004172206, "learning_rate": 5.625501237463674e-06, "loss": 0.04495824873447418, "step": 219880 }, { "epoch": 0.9440337274499198, "grad_norm": 0.0010537290945649147, "learning_rate": 5.621189517346051e-06, "loss": 0.1898065447807312, "step": 219890 }, { "epoch": 0.9440766595399397, "grad_norm": 4.584033012390137, "learning_rate": 5.616877797228426e-06, "loss": 0.2350625991821289, "step": 219900 }, { "epoch": 0.9441195916299597, "grad_norm": 0.002993101254105568, "learning_rate": 5.612566077110803e-06, "loss": 0.1021914005279541, "step": 219910 }, { "epoch": 0.9441625237199798, "grad_norm": 0.31038209795951843, "learning_rate": 5.608254356993179e-06, "loss": 0.06648457646369935, "step": 219920 }, { "epoch": 0.9442054558099997, "grad_norm": 0.01189296692609787, "learning_rate": 5.603942636875555e-06, "loss": 0.05628054738044739, "step": 219930 }, { "epoch": 0.9442483879000197, "grad_norm": 0.018562033772468567, "learning_rate": 5.5996309167579315e-06, "loss": 0.37505056858062746, "step": 219940 }, { "epoch": 0.9442913199900398, "grad_norm": 0.02130456268787384, "learning_rate": 5.595319196640308e-06, "loss": 0.24764158725738525, "step": 219950 }, { "epoch": 0.9443342520800597, "grad_norm": 0.0013278445694595575, "learning_rate": 5.591007476522684e-06, "loss": 0.061684155464172365, "step": 219960 }, { "epoch": 0.9443771841700798, "grad_norm": 0.012180428020656109, "learning_rate": 5.58669575640506e-06, "loss": 0.03911663293838501, "step": 219970 }, { "epoch": 0.9444201162600998, "grad_norm": 0.10410662740468979, "learning_rate": 5.582384036287437e-06, "loss": 0.24889121055603028, "step": 219980 }, { "epoch": 0.9444630483501197, "grad_norm": 2.865203380584717, "learning_rate": 5.578072316169813e-06, "loss": 0.5030947685241699, "step": 219990 }, { "epoch": 0.9445059804401398, "grad_norm": 9.011594772338867, "learning_rate": 5.57376059605219e-06, "loss": 0.14479026794433594, "step": 220000 }, { "epoch": 0.9445059804401398, "eval_loss": 0.36936265230178833, "eval_runtime": 27.4733, "eval_samples_per_second": 3.64, "eval_steps_per_second": 3.64, "step": 220000 }, { "epoch": 0.9445489125301598, "grad_norm": 0.02283742092549801, "learning_rate": 5.569448875934566e-06, "loss": 0.2979933977127075, "step": 220010 }, { "epoch": 0.9445918446201798, "grad_norm": 0.8922110795974731, "learning_rate": 5.565137155816942e-06, "loss": 0.21819326877593995, "step": 220020 }, { "epoch": 0.9446347767101998, "grad_norm": 0.0026191927026957273, "learning_rate": 5.560825435699318e-06, "loss": 0.12958024740219115, "step": 220030 }, { "epoch": 0.9446777088002198, "grad_norm": 0.0004326049529481679, "learning_rate": 5.556513715581695e-06, "loss": 0.13836138248443602, "step": 220040 }, { "epoch": 0.9447206408902398, "grad_norm": 1.2779291868209839, "learning_rate": 5.55220199546407e-06, "loss": 0.4020243167877197, "step": 220050 }, { "epoch": 0.9447635729802598, "grad_norm": 0.005593777634203434, "learning_rate": 5.547890275346447e-06, "loss": 0.11673593521118164, "step": 220060 }, { "epoch": 0.9448065050702799, "grad_norm": 0.13254858553409576, "learning_rate": 5.543578555228823e-06, "loss": 0.15964832305908203, "step": 220070 }, { "epoch": 0.9448494371602998, "grad_norm": 4.318567752838135, "learning_rate": 5.5392668351112e-06, "loss": 0.2629995584487915, "step": 220080 }, { "epoch": 0.9448923692503198, "grad_norm": 0.009927893988788128, "learning_rate": 5.5349551149935755e-06, "loss": 0.2225257158279419, "step": 220090 }, { "epoch": 0.9449353013403399, "grad_norm": 0.00321480305865407, "learning_rate": 5.530643394875952e-06, "loss": 0.13898024559020997, "step": 220100 }, { "epoch": 0.9449782334303598, "grad_norm": 5.857727527618408, "learning_rate": 5.5263316747583285e-06, "loss": 0.08403302431106567, "step": 220110 }, { "epoch": 0.9450211655203798, "grad_norm": 1.4161524772644043, "learning_rate": 5.522019954640704e-06, "loss": 0.23849353790283204, "step": 220120 }, { "epoch": 0.9450640976103999, "grad_norm": 0.0011095688678324223, "learning_rate": 5.517708234523081e-06, "loss": 0.18472713232040405, "step": 220130 }, { "epoch": 0.9451070297004198, "grad_norm": 0.8806191682815552, "learning_rate": 5.513396514405457e-06, "loss": 0.18616429567337037, "step": 220140 }, { "epoch": 0.9451499617904399, "grad_norm": 2.297152042388916, "learning_rate": 5.509084794287833e-06, "loss": 0.3651312828063965, "step": 220150 }, { "epoch": 0.9451928938804599, "grad_norm": 0.0011765279341489077, "learning_rate": 5.504773074170209e-06, "loss": 0.31837148666381837, "step": 220160 }, { "epoch": 0.94523582597048, "grad_norm": 0.9704437255859375, "learning_rate": 5.500461354052586e-06, "loss": 0.1830833673477173, "step": 220170 }, { "epoch": 0.9452787580604999, "grad_norm": 1.3971502780914307, "learning_rate": 5.496149633934962e-06, "loss": 0.3164043664932251, "step": 220180 }, { "epoch": 0.9453216901505199, "grad_norm": 6.114679336547852, "learning_rate": 5.491837913817339e-06, "loss": 0.0872846245765686, "step": 220190 }, { "epoch": 0.94536462224054, "grad_norm": 0.43857166171073914, "learning_rate": 5.487526193699715e-06, "loss": 0.28688344955444334, "step": 220200 }, { "epoch": 0.9454075543305599, "grad_norm": 0.9512658715248108, "learning_rate": 5.483214473582091e-06, "loss": 0.34722816944122314, "step": 220210 }, { "epoch": 0.9454504864205799, "grad_norm": 0.20257291197776794, "learning_rate": 5.4789027534644674e-06, "loss": 0.22685813903808594, "step": 220220 }, { "epoch": 0.9454934185106, "grad_norm": 0.0031715496443212032, "learning_rate": 5.474591033346844e-06, "loss": 0.026157155632972717, "step": 220230 }, { "epoch": 0.9455363506006199, "grad_norm": 0.053399458527565, "learning_rate": 5.4702793132292204e-06, "loss": 0.18398308753967285, "step": 220240 }, { "epoch": 0.94557928269064, "grad_norm": 0.3509232997894287, "learning_rate": 5.465967593111596e-06, "loss": 0.005688534304499626, "step": 220250 }, { "epoch": 0.94562221478066, "grad_norm": 0.003000019583851099, "learning_rate": 5.461655872993973e-06, "loss": 0.2526733875274658, "step": 220260 }, { "epoch": 0.9456651468706799, "grad_norm": 0.8390291929244995, "learning_rate": 5.457344152876349e-06, "loss": 0.293306303024292, "step": 220270 }, { "epoch": 0.9457080789607, "grad_norm": 0.4303343892097473, "learning_rate": 5.453032432758725e-06, "loss": 0.2933788299560547, "step": 220280 }, { "epoch": 0.94575101105072, "grad_norm": 2.3883321285247803, "learning_rate": 5.448720712641101e-06, "loss": 0.4658994197845459, "step": 220290 }, { "epoch": 0.94579394314074, "grad_norm": 0.0033390114549547434, "learning_rate": 5.444408992523478e-06, "loss": 0.3251925468444824, "step": 220300 }, { "epoch": 0.94583687523076, "grad_norm": 0.006041026208549738, "learning_rate": 5.440097272405853e-06, "loss": 0.05924082398414612, "step": 220310 }, { "epoch": 0.94587980732078, "grad_norm": 1.786308765411377, "learning_rate": 5.43578555228823e-06, "loss": 0.15548089742660523, "step": 220320 }, { "epoch": 0.9459227394108, "grad_norm": 29.166587829589844, "learning_rate": 5.431473832170606e-06, "loss": 0.2582669734954834, "step": 220330 }, { "epoch": 0.94596567150082, "grad_norm": 0.40726613998413086, "learning_rate": 5.427162112052982e-06, "loss": 0.19830280542373657, "step": 220340 }, { "epoch": 0.94600860359084, "grad_norm": 0.1460336297750473, "learning_rate": 5.4228503919353585e-06, "loss": 0.2719969987869263, "step": 220350 }, { "epoch": 0.94605153568086, "grad_norm": 0.004638573620468378, "learning_rate": 5.418538671817736e-06, "loss": 0.287063455581665, "step": 220360 }, { "epoch": 0.94609446777088, "grad_norm": 0.43055668473243713, "learning_rate": 5.4142269517001115e-06, "loss": 0.11195619106292724, "step": 220370 }, { "epoch": 0.9461373998609001, "grad_norm": 0.0006677526980638504, "learning_rate": 5.409915231582488e-06, "loss": 0.2058488368988037, "step": 220380 }, { "epoch": 0.94618033195092, "grad_norm": 0.010899543762207031, "learning_rate": 5.4056035114648645e-06, "loss": 0.1301390767097473, "step": 220390 }, { "epoch": 0.94622326404094, "grad_norm": 0.5160672664642334, "learning_rate": 5.40129179134724e-06, "loss": 0.15139005184173585, "step": 220400 }, { "epoch": 0.9462661961309601, "grad_norm": 0.4792172610759735, "learning_rate": 5.396980071229617e-06, "loss": 0.03056088089942932, "step": 220410 }, { "epoch": 0.94630912822098, "grad_norm": 0.006284055765718222, "learning_rate": 5.392668351111993e-06, "loss": 0.0028323063626885412, "step": 220420 }, { "epoch": 0.946352060311, "grad_norm": 0.10239158570766449, "learning_rate": 5.38835663099437e-06, "loss": 0.15772597789764403, "step": 220430 }, { "epoch": 0.9463949924010201, "grad_norm": 0.047647874802351, "learning_rate": 5.384044910876745e-06, "loss": 0.19255368709564208, "step": 220440 }, { "epoch": 0.94643792449104, "grad_norm": 0.02043193392455578, "learning_rate": 5.379733190759122e-06, "loss": 0.05766103863716125, "step": 220450 }, { "epoch": 0.9464808565810601, "grad_norm": 0.2842939496040344, "learning_rate": 5.375421470641498e-06, "loss": 0.13044567108154298, "step": 220460 }, { "epoch": 0.9465237886710801, "grad_norm": 0.002361274790018797, "learning_rate": 5.371109750523874e-06, "loss": 0.10269827842712402, "step": 220470 }, { "epoch": 0.9465667207611, "grad_norm": 0.015903867781162262, "learning_rate": 5.36679803040625e-06, "loss": 0.45228824615478513, "step": 220480 }, { "epoch": 0.9466096528511201, "grad_norm": 0.014387888833880424, "learning_rate": 5.362486310288627e-06, "loss": 0.09613164663314819, "step": 220490 }, { "epoch": 0.9466525849411401, "grad_norm": 0.0561012402176857, "learning_rate": 5.3581745901710026e-06, "loss": 0.09313885569572448, "step": 220500 }, { "epoch": 0.9466955170311601, "grad_norm": 0.1349417120218277, "learning_rate": 5.353862870053379e-06, "loss": 0.0629725456237793, "step": 220510 }, { "epoch": 0.9467384491211801, "grad_norm": 1.3292343616485596, "learning_rate": 5.3495511499357555e-06, "loss": 0.13882611989974974, "step": 220520 }, { "epoch": 0.9467813812112001, "grad_norm": 0.27240267395973206, "learning_rate": 5.345239429818131e-06, "loss": 0.3212329149246216, "step": 220530 }, { "epoch": 0.9468243133012201, "grad_norm": 0.0015581254847347736, "learning_rate": 5.3409277097005085e-06, "loss": 0.08865725994110107, "step": 220540 }, { "epoch": 0.9468672453912401, "grad_norm": 0.024519003927707672, "learning_rate": 5.336615989582885e-06, "loss": 0.010979881882667542, "step": 220550 }, { "epoch": 0.9469101774812602, "grad_norm": 6.637258529663086, "learning_rate": 5.332304269465261e-06, "loss": 0.10928783416748047, "step": 220560 }, { "epoch": 0.9469531095712801, "grad_norm": 0.15391530096530914, "learning_rate": 5.327992549347637e-06, "loss": 0.22134747505187988, "step": 220570 }, { "epoch": 0.9469960416613001, "grad_norm": 3.40328049659729, "learning_rate": 5.323680829230014e-06, "loss": 0.32368927001953124, "step": 220580 }, { "epoch": 0.9470389737513202, "grad_norm": 1.0748347043991089, "learning_rate": 5.319369109112389e-06, "loss": 0.30531432628631594, "step": 220590 }, { "epoch": 0.9470819058413402, "grad_norm": 0.009988274425268173, "learning_rate": 5.315057388994766e-06, "loss": 0.07916906476020813, "step": 220600 }, { "epoch": 0.9471248379313602, "grad_norm": 0.06923183798789978, "learning_rate": 5.310745668877142e-06, "loss": 0.2095344305038452, "step": 220610 }, { "epoch": 0.9471677700213802, "grad_norm": 0.7235842347145081, "learning_rate": 5.306433948759519e-06, "loss": 0.2882643938064575, "step": 220620 }, { "epoch": 0.9472107021114002, "grad_norm": 0.03255331888794899, "learning_rate": 5.3021222286418945e-06, "loss": 0.14885547161102294, "step": 220630 }, { "epoch": 0.9472536342014202, "grad_norm": 0.00038300984306260943, "learning_rate": 5.297810508524271e-06, "loss": 0.11864241361618041, "step": 220640 }, { "epoch": 0.9472965662914402, "grad_norm": 1.5155200958251953, "learning_rate": 5.2934987884066475e-06, "loss": 0.17059919834136963, "step": 220650 }, { "epoch": 0.9473394983814603, "grad_norm": 0.0005635952693410218, "learning_rate": 5.289187068289023e-06, "loss": 0.19965181350708008, "step": 220660 }, { "epoch": 0.9473824304714802, "grad_norm": 0.0028896143194288015, "learning_rate": 5.2848753481714e-06, "loss": 0.11965832710266114, "step": 220670 }, { "epoch": 0.9474253625615002, "grad_norm": 0.004104061983525753, "learning_rate": 5.280563628053776e-06, "loss": 0.14908111095428467, "step": 220680 }, { "epoch": 0.9474682946515203, "grad_norm": 1.0493465662002563, "learning_rate": 5.276251907936152e-06, "loss": 0.15110547542572023, "step": 220690 }, { "epoch": 0.9475112267415402, "grad_norm": 3.4508376121520996, "learning_rate": 5.271940187818528e-06, "loss": 0.20434744358062745, "step": 220700 }, { "epoch": 0.9475541588315602, "grad_norm": 0.0009196574683301151, "learning_rate": 5.267628467700905e-06, "loss": 0.26406259536743165, "step": 220710 }, { "epoch": 0.9475970909215803, "grad_norm": 0.0016605377895757556, "learning_rate": 5.263316747583281e-06, "loss": 0.14054032564163207, "step": 220720 }, { "epoch": 0.9476400230116002, "grad_norm": 0.4991012513637543, "learning_rate": 5.259005027465658e-06, "loss": 0.1640143871307373, "step": 220730 }, { "epoch": 0.9476829551016203, "grad_norm": 0.18163564801216125, "learning_rate": 5.254693307348034e-06, "loss": 0.2269913911819458, "step": 220740 }, { "epoch": 0.9477258871916403, "grad_norm": 0.13701975345611572, "learning_rate": 5.25038158723041e-06, "loss": 0.1771934747695923, "step": 220750 }, { "epoch": 0.9477688192816602, "grad_norm": 0.009738288819789886, "learning_rate": 5.246069867112786e-06, "loss": 0.12999645471572877, "step": 220760 }, { "epoch": 0.9478117513716803, "grad_norm": 0.004294713959097862, "learning_rate": 5.241758146995163e-06, "loss": 0.21597652435302733, "step": 220770 }, { "epoch": 0.9478546834617003, "grad_norm": 0.0014341874048113823, "learning_rate": 5.2374464268775385e-06, "loss": 0.1990837574005127, "step": 220780 }, { "epoch": 0.9478976155517203, "grad_norm": 0.0011496704537421465, "learning_rate": 5.233134706759915e-06, "loss": 0.2995701789855957, "step": 220790 }, { "epoch": 0.9479405476417403, "grad_norm": 0.32946211099624634, "learning_rate": 5.2288229866422915e-06, "loss": 0.08457562923431397, "step": 220800 }, { "epoch": 0.9479834797317603, "grad_norm": 2.4673047065734863, "learning_rate": 5.224511266524668e-06, "loss": 0.3051250457763672, "step": 220810 }, { "epoch": 0.9480264118217803, "grad_norm": 1.6087555885314941, "learning_rate": 5.220199546407044e-06, "loss": 0.11010923385620117, "step": 220820 }, { "epoch": 0.9480693439118003, "grad_norm": 2.3338229656219482, "learning_rate": 5.21588782628942e-06, "loss": 0.2138131618499756, "step": 220830 }, { "epoch": 0.9481122760018204, "grad_norm": 5.679168224334717, "learning_rate": 5.211576106171797e-06, "loss": 0.17699214220046997, "step": 220840 }, { "epoch": 0.9481552080918403, "grad_norm": 0.005776789505034685, "learning_rate": 5.207264386054172e-06, "loss": 0.10060790777206421, "step": 220850 }, { "epoch": 0.9481981401818603, "grad_norm": 0.015451615676283836, "learning_rate": 5.202952665936549e-06, "loss": 0.37086925506591795, "step": 220860 }, { "epoch": 0.9482410722718804, "grad_norm": 1.4263032674789429, "learning_rate": 5.198640945818925e-06, "loss": 0.2528158903121948, "step": 220870 }, { "epoch": 0.9482840043619003, "grad_norm": 0.03226279467344284, "learning_rate": 5.194329225701301e-06, "loss": 0.08974118828773499, "step": 220880 }, { "epoch": 0.9483269364519203, "grad_norm": 4.70692777633667, "learning_rate": 5.1900175055836774e-06, "loss": 0.2029585361480713, "step": 220890 }, { "epoch": 0.9483698685419404, "grad_norm": 0.20702318847179413, "learning_rate": 5.185705785466054e-06, "loss": 0.2841609001159668, "step": 220900 }, { "epoch": 0.9484128006319603, "grad_norm": 0.005325347185134888, "learning_rate": 5.18139406534843e-06, "loss": 0.17788323163986205, "step": 220910 }, { "epoch": 0.9484557327219804, "grad_norm": 1.380802869796753, "learning_rate": 5.177082345230807e-06, "loss": 0.1807429552078247, "step": 220920 }, { "epoch": 0.9484986648120004, "grad_norm": 0.017762403935194016, "learning_rate": 5.172770625113183e-06, "loss": 0.12475830316543579, "step": 220930 }, { "epoch": 0.9485415969020203, "grad_norm": 1.45126211643219, "learning_rate": 5.168458904995559e-06, "loss": 0.06799777746200561, "step": 220940 }, { "epoch": 0.9485845289920404, "grad_norm": 0.7015895843505859, "learning_rate": 5.1641471848779356e-06, "loss": 0.2524296760559082, "step": 220950 }, { "epoch": 0.9486274610820604, "grad_norm": 0.03411026671528816, "learning_rate": 5.159835464760312e-06, "loss": 0.1578918218612671, "step": 220960 }, { "epoch": 0.9486703931720804, "grad_norm": 0.09593575447797775, "learning_rate": 5.155523744642688e-06, "loss": 0.3851247072219849, "step": 220970 }, { "epoch": 0.9487133252621004, "grad_norm": 0.019091414287686348, "learning_rate": 5.151212024525064e-06, "loss": 0.14782023429870605, "step": 220980 }, { "epoch": 0.9487562573521204, "grad_norm": 0.10230414569377899, "learning_rate": 5.146900304407441e-06, "loss": 0.17959287166595458, "step": 220990 }, { "epoch": 0.9487991894421404, "grad_norm": 1.1342577934265137, "learning_rate": 5.142588584289817e-06, "loss": 0.2069911003112793, "step": 221000 }, { "epoch": 0.9487991894421404, "eval_loss": 0.37033191323280334, "eval_runtime": 27.4293, "eval_samples_per_second": 3.646, "eval_steps_per_second": 3.646, "step": 221000 }, { "epoch": 0.9488421215321604, "grad_norm": 0.01214311458170414, "learning_rate": 5.138276864172193e-06, "loss": 0.07155942916870117, "step": 221010 }, { "epoch": 0.9488850536221805, "grad_norm": 0.0067489249631762505, "learning_rate": 5.133965144054569e-06, "loss": 0.14288434982299805, "step": 221020 }, { "epoch": 0.9489279857122005, "grad_norm": 0.021981053054332733, "learning_rate": 5.129653423936946e-06, "loss": 0.08851742744445801, "step": 221030 }, { "epoch": 0.9489709178022204, "grad_norm": 0.016317633911967278, "learning_rate": 5.1253417038193215e-06, "loss": 0.13080765008926393, "step": 221040 }, { "epoch": 0.9490138498922405, "grad_norm": 7.504158973693848, "learning_rate": 5.121029983701698e-06, "loss": 0.29133789539337157, "step": 221050 }, { "epoch": 0.9490567819822605, "grad_norm": 0.020063180476427078, "learning_rate": 5.1167182635840745e-06, "loss": 0.11202926635742187, "step": 221060 }, { "epoch": 0.9490997140722804, "grad_norm": 0.18971426784992218, "learning_rate": 5.11240654346645e-06, "loss": 0.18990329504013062, "step": 221070 }, { "epoch": 0.9491426461623005, "grad_norm": 0.0023197117261588573, "learning_rate": 5.108094823348827e-06, "loss": 0.24900193214416505, "step": 221080 }, { "epoch": 0.9491855782523205, "grad_norm": 0.003275302704423666, "learning_rate": 5.103783103231204e-06, "loss": 0.1832704186439514, "step": 221090 }, { "epoch": 0.9492285103423405, "grad_norm": 0.011212700977921486, "learning_rate": 5.09947138311358e-06, "loss": 0.12993799448013305, "step": 221100 }, { "epoch": 0.9492714424323605, "grad_norm": 0.09696463495492935, "learning_rate": 5.095159662995956e-06, "loss": 0.1601085901260376, "step": 221110 }, { "epoch": 0.9493143745223805, "grad_norm": 0.07188393920660019, "learning_rate": 5.090847942878333e-06, "loss": 0.192316997051239, "step": 221120 }, { "epoch": 0.9493573066124005, "grad_norm": 0.055957090109586716, "learning_rate": 5.086536222760708e-06, "loss": 0.15709545612335205, "step": 221130 }, { "epoch": 0.9494002387024205, "grad_norm": 0.9220591187477112, "learning_rate": 5.082224502643085e-06, "loss": 0.24518163204193116, "step": 221140 }, { "epoch": 0.9494431707924406, "grad_norm": 1.6489856243133545, "learning_rate": 5.077912782525461e-06, "loss": 0.218973708152771, "step": 221150 }, { "epoch": 0.9494861028824605, "grad_norm": 0.0015439556445926428, "learning_rate": 5.073601062407837e-06, "loss": 0.19692326784133912, "step": 221160 }, { "epoch": 0.9495290349724805, "grad_norm": 0.1865650713443756, "learning_rate": 5.069289342290213e-06, "loss": 0.16926788091659545, "step": 221170 }, { "epoch": 0.9495719670625006, "grad_norm": 1.5441687107086182, "learning_rate": 5.06497762217259e-06, "loss": 0.17856531143188475, "step": 221180 }, { "epoch": 0.9496148991525205, "grad_norm": 0.028018776327371597, "learning_rate": 5.060665902054966e-06, "loss": 0.20565359592437743, "step": 221190 }, { "epoch": 0.9496578312425406, "grad_norm": 2.1190805435180664, "learning_rate": 5.056354181937342e-06, "loss": 0.15518250465393066, "step": 221200 }, { "epoch": 0.9497007633325606, "grad_norm": 1.3152310848236084, "learning_rate": 5.0520424618197185e-06, "loss": 0.25301418304443357, "step": 221210 }, { "epoch": 0.9497436954225805, "grad_norm": 1.5250507593154907, "learning_rate": 5.047730741702095e-06, "loss": 0.20646183490753173, "step": 221220 }, { "epoch": 0.9497866275126006, "grad_norm": 0.0007811338873580098, "learning_rate": 5.043419021584471e-06, "loss": 0.17377300262451173, "step": 221230 }, { "epoch": 0.9498295596026206, "grad_norm": 1.1598446369171143, "learning_rate": 5.039107301466847e-06, "loss": 0.2322542667388916, "step": 221240 }, { "epoch": 0.9498724916926405, "grad_norm": 0.009316137060523033, "learning_rate": 5.034795581349224e-06, "loss": 0.07732200622558594, "step": 221250 }, { "epoch": 0.9499154237826606, "grad_norm": 0.407930850982666, "learning_rate": 5.030483861231599e-06, "loss": 0.24793801307678223, "step": 221260 }, { "epoch": 0.9499583558726806, "grad_norm": 0.048544321209192276, "learning_rate": 5.026172141113977e-06, "loss": 0.07298610806465149, "step": 221270 }, { "epoch": 0.9500012879627006, "grad_norm": 1.4429339170455933, "learning_rate": 5.021860420996353e-06, "loss": 0.23808639049530028, "step": 221280 }, { "epoch": 0.9500442200527206, "grad_norm": 0.14647912979125977, "learning_rate": 5.017548700878729e-06, "loss": 0.09555991888046264, "step": 221290 }, { "epoch": 0.9500871521427406, "grad_norm": 3.288055658340454, "learning_rate": 5.013236980761105e-06, "loss": 0.18354005813598634, "step": 221300 }, { "epoch": 0.9501300842327606, "grad_norm": 0.00022563387756235898, "learning_rate": 5.008925260643482e-06, "loss": 0.196072518825531, "step": 221310 }, { "epoch": 0.9501730163227806, "grad_norm": 1.5810396671295166, "learning_rate": 5.0046135405258574e-06, "loss": 0.21989688873291016, "step": 221320 }, { "epoch": 0.9502159484128007, "grad_norm": 1.266875147819519, "learning_rate": 5.000301820408234e-06, "loss": 0.13136966228485109, "step": 221330 }, { "epoch": 0.9502588805028206, "grad_norm": 0.001413840800523758, "learning_rate": 4.99599010029061e-06, "loss": 0.159348464012146, "step": 221340 }, { "epoch": 0.9503018125928406, "grad_norm": 0.009680492803454399, "learning_rate": 4.991678380172986e-06, "loss": 0.18243454694747924, "step": 221350 }, { "epoch": 0.9503447446828607, "grad_norm": 0.009590870700776577, "learning_rate": 4.9873666600553626e-06, "loss": 0.4021789073944092, "step": 221360 }, { "epoch": 0.9503876767728806, "grad_norm": 0.002812737599015236, "learning_rate": 4.983054939937739e-06, "loss": 0.34322056770324705, "step": 221370 }, { "epoch": 0.9504306088629006, "grad_norm": 0.021498220041394234, "learning_rate": 4.9787432198201156e-06, "loss": 0.1618666648864746, "step": 221380 }, { "epoch": 0.9504735409529207, "grad_norm": 0.000619508558884263, "learning_rate": 4.974431499702491e-06, "loss": 0.4161521911621094, "step": 221390 }, { "epoch": 0.9505164730429406, "grad_norm": 0.015014680102467537, "learning_rate": 4.970119779584868e-06, "loss": 0.1755547881126404, "step": 221400 }, { "epoch": 0.9505594051329607, "grad_norm": 0.9173482060432434, "learning_rate": 4.965808059467244e-06, "loss": 0.4441547870635986, "step": 221410 }, { "epoch": 0.9506023372229807, "grad_norm": 0.007916325703263283, "learning_rate": 4.96149633934962e-06, "loss": 0.05237484574317932, "step": 221420 }, { "epoch": 0.9506452693130006, "grad_norm": 0.14865094423294067, "learning_rate": 4.957184619231996e-06, "loss": 0.20918598175048828, "step": 221430 }, { "epoch": 0.9506882014030207, "grad_norm": 3.4020707607269287, "learning_rate": 4.952872899114373e-06, "loss": 0.18745356798171997, "step": 221440 }, { "epoch": 0.9507311334930407, "grad_norm": 0.0011085561709478498, "learning_rate": 4.948561178996749e-06, "loss": 0.17667560577392577, "step": 221450 }, { "epoch": 0.9507740655830608, "grad_norm": 8.518034934997559, "learning_rate": 4.944249458879126e-06, "loss": 0.09774234294891357, "step": 221460 }, { "epoch": 0.9508169976730807, "grad_norm": 0.3491690754890442, "learning_rate": 4.939937738761502e-06, "loss": 0.2363292694091797, "step": 221470 }, { "epoch": 0.9508599297631007, "grad_norm": 0.011719225905835629, "learning_rate": 4.935626018643878e-06, "loss": 0.18856680393218994, "step": 221480 }, { "epoch": 0.9509028618531208, "grad_norm": 0.00425582705065608, "learning_rate": 4.9313142985262545e-06, "loss": 0.1619241714477539, "step": 221490 }, { "epoch": 0.9509457939431407, "grad_norm": 0.0060163987800478935, "learning_rate": 4.927002578408631e-06, "loss": 0.16561635732650756, "step": 221500 }, { "epoch": 0.9509887260331608, "grad_norm": 0.01624118909239769, "learning_rate": 4.922690858291007e-06, "loss": 0.04563118517398834, "step": 221510 }, { "epoch": 0.9510316581231808, "grad_norm": 1.32929265499115, "learning_rate": 4.918379138173383e-06, "loss": 0.24808201789855958, "step": 221520 }, { "epoch": 0.9510745902132007, "grad_norm": 0.03225236386060715, "learning_rate": 4.91406741805576e-06, "loss": 0.24988224506378173, "step": 221530 }, { "epoch": 0.9511175223032208, "grad_norm": 2.835340738296509, "learning_rate": 4.909755697938135e-06, "loss": 0.15641406774520875, "step": 221540 }, { "epoch": 0.9511604543932408, "grad_norm": 0.08538325875997543, "learning_rate": 4.905443977820512e-06, "loss": 0.10083311796188354, "step": 221550 }, { "epoch": 0.9512033864832607, "grad_norm": 3.253385305404663, "learning_rate": 4.901132257702888e-06, "loss": 0.12257604598999024, "step": 221560 }, { "epoch": 0.9512463185732808, "grad_norm": 0.0689903274178505, "learning_rate": 4.896820537585265e-06, "loss": 0.2482537269592285, "step": 221570 }, { "epoch": 0.9512892506633008, "grad_norm": 0.9589329361915588, "learning_rate": 4.89250881746764e-06, "loss": 0.14661691188812256, "step": 221580 }, { "epoch": 0.9513321827533208, "grad_norm": 0.8014940023422241, "learning_rate": 4.888197097350017e-06, "loss": 0.15701075792312622, "step": 221590 }, { "epoch": 0.9513751148433408, "grad_norm": 0.0076012699864804745, "learning_rate": 4.883885377232393e-06, "loss": 0.20025289058685303, "step": 221600 }, { "epoch": 0.9514180469333609, "grad_norm": 0.9554777145385742, "learning_rate": 4.879573657114769e-06, "loss": 0.21109647750854493, "step": 221610 }, { "epoch": 0.9514609790233808, "grad_norm": 0.03870271146297455, "learning_rate": 4.8752619369971455e-06, "loss": 0.151367712020874, "step": 221620 }, { "epoch": 0.9515039111134008, "grad_norm": 12.868548393249512, "learning_rate": 4.870950216879523e-06, "loss": 0.17155493497848512, "step": 221630 }, { "epoch": 0.9515468432034209, "grad_norm": 1.6246249675750732, "learning_rate": 4.8666384967618985e-06, "loss": 0.3529273509979248, "step": 221640 }, { "epoch": 0.9515897752934408, "grad_norm": 1.4000318050384521, "learning_rate": 4.862326776644275e-06, "loss": 0.4355010509490967, "step": 221650 }, { "epoch": 0.9516327073834608, "grad_norm": 4.835864543914795, "learning_rate": 4.8580150565266515e-06, "loss": 0.22297344207763672, "step": 221660 }, { "epoch": 0.9516756394734809, "grad_norm": 0.011678424663841724, "learning_rate": 4.853703336409027e-06, "loss": 0.10040615797042847, "step": 221670 }, { "epoch": 0.9517185715635008, "grad_norm": 0.04759407415986061, "learning_rate": 4.849391616291404e-06, "loss": 0.20213911533355713, "step": 221680 }, { "epoch": 0.9517615036535209, "grad_norm": 0.0031532905995845795, "learning_rate": 4.84507989617378e-06, "loss": 0.26140198707580564, "step": 221690 }, { "epoch": 0.9518044357435409, "grad_norm": 0.004247985314577818, "learning_rate": 4.840768176056156e-06, "loss": 0.2709981441497803, "step": 221700 }, { "epoch": 0.9518473678335608, "grad_norm": 0.08644208312034607, "learning_rate": 4.836456455938532e-06, "loss": 0.15383058786392212, "step": 221710 }, { "epoch": 0.9518902999235809, "grad_norm": 0.18716543912887573, "learning_rate": 4.832144735820909e-06, "loss": 0.13800834417343139, "step": 221720 }, { "epoch": 0.9519332320136009, "grad_norm": 0.020547056570649147, "learning_rate": 4.8278330157032844e-06, "loss": 0.22123284339904786, "step": 221730 }, { "epoch": 0.9519761641036208, "grad_norm": 0.0005546602769754827, "learning_rate": 4.823521295585661e-06, "loss": 0.05822961926460266, "step": 221740 }, { "epoch": 0.9520190961936409, "grad_norm": 0.007388788275420666, "learning_rate": 4.8192095754680374e-06, "loss": 0.07534821033477783, "step": 221750 }, { "epoch": 0.9520620282836609, "grad_norm": 0.06283167749643326, "learning_rate": 4.814897855350414e-06, "loss": 0.05532159805297852, "step": 221760 }, { "epoch": 0.9521049603736809, "grad_norm": 0.12734369933605194, "learning_rate": 4.81058613523279e-06, "loss": 0.06152777075767517, "step": 221770 }, { "epoch": 0.9521478924637009, "grad_norm": 0.018678177148103714, "learning_rate": 4.806274415115166e-06, "loss": 0.1855409026145935, "step": 221780 }, { "epoch": 0.952190824553721, "grad_norm": 0.007766157388687134, "learning_rate": 4.8019626949975426e-06, "loss": 0.13776013851165772, "step": 221790 }, { "epoch": 0.9522337566437409, "grad_norm": 0.0021806948352605104, "learning_rate": 4.797650974879918e-06, "loss": 0.24061529636383056, "step": 221800 }, { "epoch": 0.9522766887337609, "grad_norm": 0.0725255087018013, "learning_rate": 4.793339254762295e-06, "loss": 0.002632809802889824, "step": 221810 }, { "epoch": 0.952319620823781, "grad_norm": 0.023199887946248055, "learning_rate": 4.789027534644672e-06, "loss": 0.05077831149101257, "step": 221820 }, { "epoch": 0.9523625529138009, "grad_norm": 1.4895657300949097, "learning_rate": 4.784715814527048e-06, "loss": 0.10186721086502075, "step": 221830 }, { "epoch": 0.9524054850038209, "grad_norm": 2.4794552326202393, "learning_rate": 4.780404094409424e-06, "loss": 0.31405203342437743, "step": 221840 }, { "epoch": 0.952448417093841, "grad_norm": 0.003952878527343273, "learning_rate": 4.776092374291801e-06, "loss": 0.2233206510543823, "step": 221850 }, { "epoch": 0.9524913491838609, "grad_norm": 0.006409044843167067, "learning_rate": 4.771780654174176e-06, "loss": 0.20069022178649903, "step": 221860 }, { "epoch": 0.952534281273881, "grad_norm": 8.640118598937988, "learning_rate": 4.767468934056553e-06, "loss": 0.11834697723388672, "step": 221870 }, { "epoch": 0.952577213363901, "grad_norm": 0.11279051005840302, "learning_rate": 4.763157213938929e-06, "loss": 0.1271551489830017, "step": 221880 }, { "epoch": 0.952620145453921, "grad_norm": 1.8554179668426514, "learning_rate": 4.758845493821305e-06, "loss": 0.1396311640739441, "step": 221890 }, { "epoch": 0.952663077543941, "grad_norm": 0.005013489164412022, "learning_rate": 4.7545337737036815e-06, "loss": 0.08952710628509522, "step": 221900 }, { "epoch": 0.952706009633961, "grad_norm": 0.006020084954798222, "learning_rate": 4.750222053586058e-06, "loss": 0.20054419040679933, "step": 221910 }, { "epoch": 0.9527489417239811, "grad_norm": 0.019753310829401016, "learning_rate": 4.745910333468434e-06, "loss": 0.23398704528808595, "step": 221920 }, { "epoch": 0.952791873814001, "grad_norm": 0.00018748146248981357, "learning_rate": 4.74159861335081e-06, "loss": 0.15820237398147582, "step": 221930 }, { "epoch": 0.952834805904021, "grad_norm": 0.02594497613608837, "learning_rate": 4.737286893233187e-06, "loss": 0.09363011121749878, "step": 221940 }, { "epoch": 0.9528777379940411, "grad_norm": 0.0038654941599816084, "learning_rate": 4.732975173115563e-06, "loss": 0.21626200675964355, "step": 221950 }, { "epoch": 0.952920670084061, "grad_norm": 0.00581236369907856, "learning_rate": 4.728663452997939e-06, "loss": 0.19654461145401, "step": 221960 }, { "epoch": 0.952963602174081, "grad_norm": 6.517816543579102, "learning_rate": 4.724351732880315e-06, "loss": 0.4622176170349121, "step": 221970 }, { "epoch": 0.9530065342641011, "grad_norm": 0.012695305980741978, "learning_rate": 4.720040012762692e-06, "loss": 0.21787841320037843, "step": 221980 }, { "epoch": 0.953049466354121, "grad_norm": 0.3617287576198578, "learning_rate": 4.715728292645067e-06, "loss": 0.25056912899017336, "step": 221990 }, { "epoch": 0.9530923984441411, "grad_norm": 0.04657085984945297, "learning_rate": 4.711416572527445e-06, "loss": 0.17787359952926635, "step": 222000 }, { "epoch": 0.9530923984441411, "eval_loss": 0.3706425726413727, "eval_runtime": 27.4304, "eval_samples_per_second": 3.646, "eval_steps_per_second": 3.646, "step": 222000 }, { "epoch": 0.9531353305341611, "grad_norm": 0.03316396474838257, "learning_rate": 4.707104852409821e-06, "loss": 0.15991002321243286, "step": 222010 }, { "epoch": 0.953178262624181, "grad_norm": 0.36852920055389404, "learning_rate": 4.702793132292197e-06, "loss": 0.1532452940940857, "step": 222020 }, { "epoch": 0.9532211947142011, "grad_norm": 4.372796535491943, "learning_rate": 4.698481412174573e-06, "loss": 0.2997922897338867, "step": 222030 }, { "epoch": 0.9532641268042211, "grad_norm": 2.077420711517334, "learning_rate": 4.69416969205695e-06, "loss": 0.3332381248474121, "step": 222040 }, { "epoch": 0.953307058894241, "grad_norm": 0.002367915352806449, "learning_rate": 4.6898579719393255e-06, "loss": 0.16526131629943847, "step": 222050 }, { "epoch": 0.9533499909842611, "grad_norm": 7.411321640014648, "learning_rate": 4.685546251821702e-06, "loss": 0.24013891220092773, "step": 222060 }, { "epoch": 0.9533929230742811, "grad_norm": 0.02261611446738243, "learning_rate": 4.6812345317040785e-06, "loss": 0.09087812900543213, "step": 222070 }, { "epoch": 0.9534358551643011, "grad_norm": 0.01428727526217699, "learning_rate": 4.676922811586454e-06, "loss": 0.21020045280456542, "step": 222080 }, { "epoch": 0.9534787872543211, "grad_norm": 0.25873035192489624, "learning_rate": 4.672611091468831e-06, "loss": 0.07374967336654663, "step": 222090 }, { "epoch": 0.9535217193443412, "grad_norm": 0.0773739367723465, "learning_rate": 4.668299371351207e-06, "loss": 0.3371073007583618, "step": 222100 }, { "epoch": 0.9535646514343611, "grad_norm": 0.024157024919986725, "learning_rate": 4.663987651233584e-06, "loss": 0.1477481961250305, "step": 222110 }, { "epoch": 0.9536075835243811, "grad_norm": 0.0007937061600387096, "learning_rate": 4.659675931115959e-06, "loss": 0.1839459180831909, "step": 222120 }, { "epoch": 0.9536505156144012, "grad_norm": 0.010143321007490158, "learning_rate": 4.655364210998336e-06, "loss": 0.3554100275039673, "step": 222130 }, { "epoch": 0.9536934477044211, "grad_norm": 0.01476583257317543, "learning_rate": 4.651052490880712e-06, "loss": 0.16000083684921265, "step": 222140 }, { "epoch": 0.9537363797944411, "grad_norm": 0.0036839002277702093, "learning_rate": 4.646740770763088e-06, "loss": 0.12175587415695191, "step": 222150 }, { "epoch": 0.9537793118844612, "grad_norm": 0.023834414780139923, "learning_rate": 4.6424290506454645e-06, "loss": 0.19544967412948608, "step": 222160 }, { "epoch": 0.9538222439744811, "grad_norm": 0.0008877997170202434, "learning_rate": 4.638117330527841e-06, "loss": 0.21831424236297609, "step": 222170 }, { "epoch": 0.9538651760645012, "grad_norm": 0.518056333065033, "learning_rate": 4.6338056104102174e-06, "loss": 0.277052640914917, "step": 222180 }, { "epoch": 0.9539081081545212, "grad_norm": 2.052628517150879, "learning_rate": 4.629493890292594e-06, "loss": 0.18043670654296876, "step": 222190 }, { "epoch": 0.9539510402445411, "grad_norm": 2.4332070350646973, "learning_rate": 4.6251821701749704e-06, "loss": 0.37254397869110106, "step": 222200 }, { "epoch": 0.9539939723345612, "grad_norm": 2.9976701736450195, "learning_rate": 4.620870450057346e-06, "loss": 0.2258861780166626, "step": 222210 }, { "epoch": 0.9540369044245812, "grad_norm": 4.1985344886779785, "learning_rate": 4.616558729939723e-06, "loss": 0.3831270694732666, "step": 222220 }, { "epoch": 0.9540798365146012, "grad_norm": 1.095831274986267, "learning_rate": 4.612247009822099e-06, "loss": 0.2485506296157837, "step": 222230 }, { "epoch": 0.9541227686046212, "grad_norm": 0.0008200284210033715, "learning_rate": 4.607935289704475e-06, "loss": 0.12095870971679687, "step": 222240 }, { "epoch": 0.9541657006946412, "grad_norm": 0.001585979014635086, "learning_rate": 4.603623569586851e-06, "loss": 0.11065644025802612, "step": 222250 }, { "epoch": 0.9542086327846612, "grad_norm": 1.3344343900680542, "learning_rate": 4.599311849469228e-06, "loss": 0.2467266798019409, "step": 222260 }, { "epoch": 0.9542515648746812, "grad_norm": 0.8917443156242371, "learning_rate": 4.595000129351603e-06, "loss": 0.3195840358734131, "step": 222270 }, { "epoch": 0.9542944969647013, "grad_norm": 0.0020116260275244713, "learning_rate": 4.59068840923398e-06, "loss": 0.24985339641571044, "step": 222280 }, { "epoch": 0.9543374290547212, "grad_norm": 1.8657126426696777, "learning_rate": 4.586376689116356e-06, "loss": 0.19289579391479492, "step": 222290 }, { "epoch": 0.9543803611447412, "grad_norm": 0.014383583329617977, "learning_rate": 4.582064968998733e-06, "loss": 0.17697054147720337, "step": 222300 }, { "epoch": 0.9544232932347613, "grad_norm": 0.055165525525808334, "learning_rate": 4.5777532488811085e-06, "loss": 0.2178732395172119, "step": 222310 }, { "epoch": 0.9544662253247813, "grad_norm": 0.20588642358779907, "learning_rate": 4.573441528763485e-06, "loss": 0.34146618843078613, "step": 222320 }, { "epoch": 0.9545091574148012, "grad_norm": 1.7712666988372803, "learning_rate": 4.5691298086458615e-06, "loss": 0.22836158275604249, "step": 222330 }, { "epoch": 0.9545520895048213, "grad_norm": 0.004596407525241375, "learning_rate": 4.564818088528237e-06, "loss": 0.016224928200244904, "step": 222340 }, { "epoch": 0.9545950215948413, "grad_norm": 0.006730203051120043, "learning_rate": 4.560506368410614e-06, "loss": 0.07292388081550598, "step": 222350 }, { "epoch": 0.9546379536848613, "grad_norm": 0.009615895338356495, "learning_rate": 4.55619464829299e-06, "loss": 0.09958805441856385, "step": 222360 }, { "epoch": 0.9546808857748813, "grad_norm": 0.8345091342926025, "learning_rate": 4.551882928175367e-06, "loss": 0.3748096704483032, "step": 222370 }, { "epoch": 0.9547238178649013, "grad_norm": 9.58636474609375, "learning_rate": 4.547571208057743e-06, "loss": 0.40106916427612305, "step": 222380 }, { "epoch": 0.9547667499549213, "grad_norm": 0.22981485724449158, "learning_rate": 4.54325948794012e-06, "loss": 0.0750343382358551, "step": 222390 }, { "epoch": 0.9548096820449413, "grad_norm": 1.1611047983169556, "learning_rate": 4.538947767822495e-06, "loss": 0.15348081588745116, "step": 222400 }, { "epoch": 0.9548526141349614, "grad_norm": 0.0849420502781868, "learning_rate": 4.534636047704872e-06, "loss": 0.2569085359573364, "step": 222410 }, { "epoch": 0.9548955462249813, "grad_norm": 0.020023846998810768, "learning_rate": 4.530324327587248e-06, "loss": 0.2325721263885498, "step": 222420 }, { "epoch": 0.9549384783150013, "grad_norm": 1.6331207752227783, "learning_rate": 4.526012607469624e-06, "loss": 0.1936333417892456, "step": 222430 }, { "epoch": 0.9549814104050214, "grad_norm": 0.06379146873950958, "learning_rate": 4.521700887352e-06, "loss": 0.12804726362228394, "step": 222440 }, { "epoch": 0.9550243424950413, "grad_norm": 0.0015627113170921803, "learning_rate": 4.517389167234377e-06, "loss": 0.10898298025131226, "step": 222450 }, { "epoch": 0.9550672745850614, "grad_norm": 5.185602188110352, "learning_rate": 4.5130774471167526e-06, "loss": 0.24877891540527344, "step": 222460 }, { "epoch": 0.9551102066750814, "grad_norm": 0.001561668235808611, "learning_rate": 4.508765726999129e-06, "loss": 0.3175127744674683, "step": 222470 }, { "epoch": 0.9551531387651013, "grad_norm": 0.04387206211686134, "learning_rate": 4.5044540068815055e-06, "loss": 0.14225680828094484, "step": 222480 }, { "epoch": 0.9551960708551214, "grad_norm": 2.1273081302642822, "learning_rate": 4.500142286763882e-06, "loss": 0.20350861549377441, "step": 222490 }, { "epoch": 0.9552390029451414, "grad_norm": 3.505652904510498, "learning_rate": 4.495830566646258e-06, "loss": 0.10865819454193115, "step": 222500 }, { "epoch": 0.9552819350351613, "grad_norm": 0.023028582334518433, "learning_rate": 4.491518846528634e-06, "loss": 0.2138669490814209, "step": 222510 }, { "epoch": 0.9553248671251814, "grad_norm": 0.03329871594905853, "learning_rate": 4.487207126411011e-06, "loss": 0.02992744743824005, "step": 222520 }, { "epoch": 0.9553677992152014, "grad_norm": 0.124062679708004, "learning_rate": 4.482895406293386e-06, "loss": 0.2770715236663818, "step": 222530 }, { "epoch": 0.9554107313052214, "grad_norm": 33.47114181518555, "learning_rate": 4.478583686175764e-06, "loss": 0.0699992299079895, "step": 222540 }, { "epoch": 0.9554536633952414, "grad_norm": 0.008653360418975353, "learning_rate": 4.47427196605814e-06, "loss": 0.11284812688827514, "step": 222550 }, { "epoch": 0.9554965954852614, "grad_norm": 2.8744757175445557, "learning_rate": 4.469960245940516e-06, "loss": 0.12507638931274415, "step": 222560 }, { "epoch": 0.9555395275752814, "grad_norm": 5.595198154449463, "learning_rate": 4.465648525822892e-06, "loss": 0.347485089302063, "step": 222570 }, { "epoch": 0.9555824596653014, "grad_norm": 0.33337855339050293, "learning_rate": 4.461336805705269e-06, "loss": 0.18478089570999146, "step": 222580 }, { "epoch": 0.9556253917553215, "grad_norm": 0.020518135279417038, "learning_rate": 4.4570250855876445e-06, "loss": 0.3763843536376953, "step": 222590 }, { "epoch": 0.9556683238453414, "grad_norm": 0.012903768569231033, "learning_rate": 4.452713365470021e-06, "loss": 0.140981125831604, "step": 222600 }, { "epoch": 0.9557112559353614, "grad_norm": 1.3483617305755615, "learning_rate": 4.4484016453523975e-06, "loss": 0.16890804767608641, "step": 222610 }, { "epoch": 0.9557541880253815, "grad_norm": 0.01349811814725399, "learning_rate": 4.444089925234773e-06, "loss": 0.18624815940856934, "step": 222620 }, { "epoch": 0.9557971201154014, "grad_norm": 0.0021429001353681087, "learning_rate": 4.43977820511715e-06, "loss": 0.017654465138912202, "step": 222630 }, { "epoch": 0.9558400522054215, "grad_norm": 68.33997344970703, "learning_rate": 4.435466484999526e-06, "loss": 0.11674298048019409, "step": 222640 }, { "epoch": 0.9558829842954415, "grad_norm": 65.13331604003906, "learning_rate": 4.431154764881902e-06, "loss": 0.32192862033843994, "step": 222650 }, { "epoch": 0.9559259163854614, "grad_norm": 0.043820153921842575, "learning_rate": 4.426843044764278e-06, "loss": 0.4090623378753662, "step": 222660 }, { "epoch": 0.9559688484754815, "grad_norm": 0.0010055521270260215, "learning_rate": 4.422531324646655e-06, "loss": 0.28583674430847167, "step": 222670 }, { "epoch": 0.9560117805655015, "grad_norm": 1.6560944318771362, "learning_rate": 4.418219604529031e-06, "loss": 0.21666302680969238, "step": 222680 }, { "epoch": 0.9560547126555214, "grad_norm": 0.0015214415034279227, "learning_rate": 4.413907884411407e-06, "loss": 0.12489688396453857, "step": 222690 }, { "epoch": 0.9560976447455415, "grad_norm": 0.20916807651519775, "learning_rate": 4.409596164293783e-06, "loss": 0.23923103809356688, "step": 222700 }, { "epoch": 0.9561405768355615, "grad_norm": 0.029555104672908783, "learning_rate": 4.40528444417616e-06, "loss": 0.1707392930984497, "step": 222710 }, { "epoch": 0.9561835089255815, "grad_norm": 0.004484428558498621, "learning_rate": 4.4009727240585355e-06, "loss": 0.06875411272048951, "step": 222720 }, { "epoch": 0.9562264410156015, "grad_norm": 1.1642791032791138, "learning_rate": 4.396661003940913e-06, "loss": 0.33511579036712646, "step": 222730 }, { "epoch": 0.9562693731056215, "grad_norm": 0.017194673418998718, "learning_rate": 4.392349283823289e-06, "loss": 0.13210066556930541, "step": 222740 }, { "epoch": 0.9563123051956416, "grad_norm": 0.002511198166757822, "learning_rate": 4.388037563705665e-06, "loss": 0.24065072536468507, "step": 222750 }, { "epoch": 0.9563552372856615, "grad_norm": 0.0038884340319782495, "learning_rate": 4.3837258435880415e-06, "loss": 0.3353621006011963, "step": 222760 }, { "epoch": 0.9563981693756816, "grad_norm": 0.8704876899719238, "learning_rate": 4.379414123470418e-06, "loss": 0.20678813457489015, "step": 222770 }, { "epoch": 0.9564411014657016, "grad_norm": 0.02445058897137642, "learning_rate": 4.375102403352794e-06, "loss": 0.15792833566665648, "step": 222780 }, { "epoch": 0.9564840335557215, "grad_norm": 0.11866077035665512, "learning_rate": 4.37079068323517e-06, "loss": 0.21752674579620362, "step": 222790 }, { "epoch": 0.9565269656457416, "grad_norm": 0.3736298084259033, "learning_rate": 4.366478963117547e-06, "loss": 0.1535860538482666, "step": 222800 }, { "epoch": 0.9565698977357616, "grad_norm": 2.179549217224121, "learning_rate": 4.362167242999922e-06, "loss": 0.2848331928253174, "step": 222810 }, { "epoch": 0.9566128298257816, "grad_norm": 1.7065210342407227, "learning_rate": 4.357855522882299e-06, "loss": 0.11012871265411377, "step": 222820 }, { "epoch": 0.9566557619158016, "grad_norm": 4.045711517333984, "learning_rate": 4.353543802764675e-06, "loss": 0.2910693407058716, "step": 222830 }, { "epoch": 0.9566986940058216, "grad_norm": 0.0030369660817086697, "learning_rate": 4.349232082647051e-06, "loss": 0.07584235668182374, "step": 222840 }, { "epoch": 0.9567416260958416, "grad_norm": 0.20301000773906708, "learning_rate": 4.344920362529427e-06, "loss": 0.15026159286499025, "step": 222850 }, { "epoch": 0.9567845581858616, "grad_norm": 2.4597878456115723, "learning_rate": 4.340608642411804e-06, "loss": 0.16425200700759887, "step": 222860 }, { "epoch": 0.9568274902758817, "grad_norm": 0.026504697278141975, "learning_rate": 4.33629692229418e-06, "loss": 0.37502100467681887, "step": 222870 }, { "epoch": 0.9568704223659016, "grad_norm": 0.7608723044395447, "learning_rate": 4.331985202176556e-06, "loss": 0.17374789714813232, "step": 222880 }, { "epoch": 0.9569133544559216, "grad_norm": 0.26648297905921936, "learning_rate": 4.3276734820589326e-06, "loss": 0.19921503067016602, "step": 222890 }, { "epoch": 0.9569562865459417, "grad_norm": 1.9915199279785156, "learning_rate": 4.323361761941309e-06, "loss": 0.25008134841918944, "step": 222900 }, { "epoch": 0.9569992186359616, "grad_norm": 0.9826081395149231, "learning_rate": 4.3190500418236856e-06, "loss": 0.3138739824295044, "step": 222910 }, { "epoch": 0.9570421507259816, "grad_norm": 0.041718751192092896, "learning_rate": 4.314738321706062e-06, "loss": 0.0015770439058542252, "step": 222920 }, { "epoch": 0.9570850828160017, "grad_norm": 1.591940999031067, "learning_rate": 4.3104266015884385e-06, "loss": 0.1963658571243286, "step": 222930 }, { "epoch": 0.9571280149060216, "grad_norm": 2.0966057777404785, "learning_rate": 4.306114881470814e-06, "loss": 0.42662744522094725, "step": 222940 }, { "epoch": 0.9571709469960417, "grad_norm": 0.013677514158189297, "learning_rate": 4.301803161353191e-06, "loss": 0.40020246505737306, "step": 222950 }, { "epoch": 0.9572138790860617, "grad_norm": 2.0066123008728027, "learning_rate": 4.297491441235567e-06, "loss": 0.15436534881591796, "step": 222960 }, { "epoch": 0.9572568111760816, "grad_norm": 21.6331729888916, "learning_rate": 4.293179721117943e-06, "loss": 0.24480834007263183, "step": 222970 }, { "epoch": 0.9572997432661017, "grad_norm": 4.920427322387695, "learning_rate": 4.288868001000319e-06, "loss": 0.27158007621765134, "step": 222980 }, { "epoch": 0.9573426753561217, "grad_norm": 48.925804138183594, "learning_rate": 4.284556280882696e-06, "loss": 0.2037571907043457, "step": 222990 }, { "epoch": 0.9573856074461417, "grad_norm": 0.005673081614077091, "learning_rate": 4.2802445607650715e-06, "loss": 0.19406213760375976, "step": 223000 }, { "epoch": 0.9573856074461417, "eval_loss": 0.36744725704193115, "eval_runtime": 27.5922, "eval_samples_per_second": 3.624, "eval_steps_per_second": 3.624, "step": 223000 }, { "epoch": 0.9574285395361617, "grad_norm": 2.244419574737549, "learning_rate": 4.275932840647448e-06, "loss": 0.15834920406341552, "step": 223010 }, { "epoch": 0.9574714716261817, "grad_norm": 0.05991288647055626, "learning_rate": 4.2716211205298245e-06, "loss": 0.3904329061508179, "step": 223020 }, { "epoch": 0.9575144037162017, "grad_norm": 0.5041655898094177, "learning_rate": 4.2673094004122e-06, "loss": 0.15431556701660157, "step": 223030 }, { "epoch": 0.9575573358062217, "grad_norm": 0.8797579407691956, "learning_rate": 4.262997680294577e-06, "loss": 0.28423237800598145, "step": 223040 }, { "epoch": 0.9576002678962418, "grad_norm": 0.947993814945221, "learning_rate": 4.258685960176953e-06, "loss": 0.14467850923538209, "step": 223050 }, { "epoch": 0.9576431999862617, "grad_norm": 1.3202130794525146, "learning_rate": 4.25437424005933e-06, "loss": 0.2532930612564087, "step": 223060 }, { "epoch": 0.9576861320762817, "grad_norm": 0.4150962233543396, "learning_rate": 4.250062519941705e-06, "loss": 0.10975308418273926, "step": 223070 }, { "epoch": 0.9577290641663018, "grad_norm": 0.031912241131067276, "learning_rate": 4.245750799824082e-06, "loss": 0.08250230550765991, "step": 223080 }, { "epoch": 0.9577719962563217, "grad_norm": 0.004118985962122679, "learning_rate": 4.241439079706458e-06, "loss": 0.13309333324432374, "step": 223090 }, { "epoch": 0.9578149283463417, "grad_norm": 0.004930357448756695, "learning_rate": 4.237127359588835e-06, "loss": 0.11556535959243774, "step": 223100 }, { "epoch": 0.9578578604363618, "grad_norm": 16.858257293701172, "learning_rate": 4.232815639471211e-06, "loss": 0.26987152099609374, "step": 223110 }, { "epoch": 0.9579007925263817, "grad_norm": 0.012583344243466854, "learning_rate": 4.228503919353588e-06, "loss": 0.2191478729248047, "step": 223120 }, { "epoch": 0.9579437246164018, "grad_norm": 1.26869535446167, "learning_rate": 4.224192199235963e-06, "loss": 0.24034445285797118, "step": 223130 }, { "epoch": 0.9579866567064218, "grad_norm": 1.5905871391296387, "learning_rate": 4.21988047911834e-06, "loss": 0.24000306129455568, "step": 223140 }, { "epoch": 0.9580295887964417, "grad_norm": 1.7036107778549194, "learning_rate": 4.215568759000716e-06, "loss": 0.10172114372253419, "step": 223150 }, { "epoch": 0.9580725208864618, "grad_norm": 0.03540444001555443, "learning_rate": 4.211257038883092e-06, "loss": 0.09440221190452576, "step": 223160 }, { "epoch": 0.9581154529764818, "grad_norm": 1.9591245651245117, "learning_rate": 4.2069453187654685e-06, "loss": 0.21786725521087646, "step": 223170 }, { "epoch": 0.9581583850665019, "grad_norm": 1.5034193992614746, "learning_rate": 4.202633598647845e-06, "loss": 0.25783910751342776, "step": 223180 }, { "epoch": 0.9582013171565218, "grad_norm": 0.06697037070989609, "learning_rate": 4.198321878530221e-06, "loss": 0.1745791792869568, "step": 223190 }, { "epoch": 0.9582442492465418, "grad_norm": 0.010242069140076637, "learning_rate": 4.194010158412597e-06, "loss": 0.38800320625305174, "step": 223200 }, { "epoch": 0.9582871813365619, "grad_norm": 1.9304941892623901, "learning_rate": 4.189698438294974e-06, "loss": 0.5144011974334717, "step": 223210 }, { "epoch": 0.9583301134265818, "grad_norm": 0.24945738911628723, "learning_rate": 4.185386718177349e-06, "loss": 0.1446653962135315, "step": 223220 }, { "epoch": 0.9583730455166019, "grad_norm": 0.09023217856884003, "learning_rate": 4.181074998059726e-06, "loss": 0.10228971242904664, "step": 223230 }, { "epoch": 0.9584159776066219, "grad_norm": 0.42283934354782104, "learning_rate": 4.176763277942102e-06, "loss": 0.19438369274139405, "step": 223240 }, { "epoch": 0.9584589096966418, "grad_norm": 0.002236647065728903, "learning_rate": 4.172451557824479e-06, "loss": 0.2513619899749756, "step": 223250 }, { "epoch": 0.9585018417866619, "grad_norm": 3.2790701389312744, "learning_rate": 4.1681398377068544e-06, "loss": 0.5641608238220215, "step": 223260 }, { "epoch": 0.9585447738766819, "grad_norm": 1.5386213064193726, "learning_rate": 4.163828117589232e-06, "loss": 0.23426687717437744, "step": 223270 }, { "epoch": 0.9585877059667018, "grad_norm": 0.5800334811210632, "learning_rate": 4.1595163974716074e-06, "loss": 0.12683658599853515, "step": 223280 }, { "epoch": 0.9586306380567219, "grad_norm": 0.014845290221273899, "learning_rate": 4.155204677353984e-06, "loss": 0.07357310652732849, "step": 223290 }, { "epoch": 0.9586735701467419, "grad_norm": 0.0030349476728588343, "learning_rate": 4.15089295723636e-06, "loss": 0.14453049898147582, "step": 223300 }, { "epoch": 0.9587165022367619, "grad_norm": 0.0032269014045596123, "learning_rate": 4.146581237118737e-06, "loss": 0.120048987865448, "step": 223310 }, { "epoch": 0.9587594343267819, "grad_norm": 0.016529643908143044, "learning_rate": 4.1422695170011126e-06, "loss": 0.11575425863265991, "step": 223320 }, { "epoch": 0.9588023664168019, "grad_norm": 2.5239858627319336, "learning_rate": 4.137957796883489e-06, "loss": 0.2145371913909912, "step": 223330 }, { "epoch": 0.9588452985068219, "grad_norm": 1.4063358306884766, "learning_rate": 4.1336460767658656e-06, "loss": 0.25419816970825193, "step": 223340 }, { "epoch": 0.9588882305968419, "grad_norm": 1.2896349430084229, "learning_rate": 4.129334356648241e-06, "loss": 0.1854541301727295, "step": 223350 }, { "epoch": 0.958931162686862, "grad_norm": 45.9904670715332, "learning_rate": 4.125022636530618e-06, "loss": 0.1698264718055725, "step": 223360 }, { "epoch": 0.9589740947768819, "grad_norm": 0.11553294956684113, "learning_rate": 4.120710916412994e-06, "loss": 0.307094144821167, "step": 223370 }, { "epoch": 0.9590170268669019, "grad_norm": 0.1716066598892212, "learning_rate": 4.11639919629537e-06, "loss": 0.09169681668281555, "step": 223380 }, { "epoch": 0.959059958956922, "grad_norm": 0.0001855223235907033, "learning_rate": 4.112087476177746e-06, "loss": 0.07396456003189086, "step": 223390 }, { "epoch": 0.9591028910469419, "grad_norm": 0.0005895741633139551, "learning_rate": 4.107775756060123e-06, "loss": 0.20217819213867189, "step": 223400 }, { "epoch": 0.959145823136962, "grad_norm": 0.0007842977647669613, "learning_rate": 4.1034640359424985e-06, "loss": 0.3810096502304077, "step": 223410 }, { "epoch": 0.959188755226982, "grad_norm": 0.07560084015130997, "learning_rate": 4.099152315824875e-06, "loss": 0.15365735292434693, "step": 223420 }, { "epoch": 0.9592316873170019, "grad_norm": 2.457524061203003, "learning_rate": 4.0948405957072515e-06, "loss": 0.2020556926727295, "step": 223430 }, { "epoch": 0.959274619407022, "grad_norm": 0.02854176051914692, "learning_rate": 4.090528875589628e-06, "loss": 0.2204937219619751, "step": 223440 }, { "epoch": 0.959317551497042, "grad_norm": 1.960445523262024, "learning_rate": 4.0862171554720045e-06, "loss": 0.28523755073547363, "step": 223450 }, { "epoch": 0.9593604835870619, "grad_norm": 3.8729004859924316, "learning_rate": 4.081905435354381e-06, "loss": 0.3639643669128418, "step": 223460 }, { "epoch": 0.959403415677082, "grad_norm": 2.3094053268432617, "learning_rate": 4.077593715236757e-06, "loss": 0.18904602527618408, "step": 223470 }, { "epoch": 0.959446347767102, "grad_norm": 0.011753720231354237, "learning_rate": 4.073281995119133e-06, "loss": 0.21364269256591797, "step": 223480 }, { "epoch": 0.959489279857122, "grad_norm": 0.005512732081115246, "learning_rate": 4.06897027500151e-06, "loss": 0.09811052083969116, "step": 223490 }, { "epoch": 0.959532211947142, "grad_norm": 2.5590529441833496, "learning_rate": 4.064658554883886e-06, "loss": 0.12399983406066895, "step": 223500 }, { "epoch": 0.959575144037162, "grad_norm": 0.13864728808403015, "learning_rate": 4.060346834766262e-06, "loss": 0.24441328048706054, "step": 223510 }, { "epoch": 0.959618076127182, "grad_norm": 0.00043273199116811156, "learning_rate": 4.056035114648638e-06, "loss": 0.21513741016387938, "step": 223520 }, { "epoch": 0.959661008217202, "grad_norm": 4.524774074554443, "learning_rate": 4.051723394531015e-06, "loss": 0.2797492504119873, "step": 223530 }, { "epoch": 0.9597039403072221, "grad_norm": 1.4905755519866943, "learning_rate": 4.04741167441339e-06, "loss": 0.3571767330169678, "step": 223540 }, { "epoch": 0.959746872397242, "grad_norm": 0.12812760472297668, "learning_rate": 4.043099954295767e-06, "loss": 0.04512379467487335, "step": 223550 }, { "epoch": 0.959789804487262, "grad_norm": 1.246614694595337, "learning_rate": 4.038788234178143e-06, "loss": 0.08907458782196045, "step": 223560 }, { "epoch": 0.9598327365772821, "grad_norm": 0.001846889266744256, "learning_rate": 4.034476514060519e-06, "loss": 0.17755554914474486, "step": 223570 }, { "epoch": 0.959875668667302, "grad_norm": 0.11436885595321655, "learning_rate": 4.0301647939428955e-06, "loss": 0.12528493404388427, "step": 223580 }, { "epoch": 0.959918600757322, "grad_norm": 0.002565787872299552, "learning_rate": 4.025853073825272e-06, "loss": 0.06130242347717285, "step": 223590 }, { "epoch": 0.9599615328473421, "grad_norm": 1.4368469715118408, "learning_rate": 4.021541353707648e-06, "loss": 0.2077068567276001, "step": 223600 }, { "epoch": 0.9600044649373621, "grad_norm": 0.19996072351932526, "learning_rate": 4.017229633590024e-06, "loss": 0.20007028579711914, "step": 223610 }, { "epoch": 0.9600473970273821, "grad_norm": 11.39544677734375, "learning_rate": 4.012917913472401e-06, "loss": 0.1851373314857483, "step": 223620 }, { "epoch": 0.9600903291174021, "grad_norm": 0.0127298878505826, "learning_rate": 4.008606193354777e-06, "loss": 0.17328726053237914, "step": 223630 }, { "epoch": 0.9601332612074222, "grad_norm": 0.060895953327417374, "learning_rate": 4.004294473237154e-06, "loss": 0.3138414859771729, "step": 223640 }, { "epoch": 0.9601761932974421, "grad_norm": 0.009014183655381203, "learning_rate": 3.99998275311953e-06, "loss": 0.19367181062698363, "step": 223650 }, { "epoch": 0.9602191253874621, "grad_norm": 2.9843554496765137, "learning_rate": 3.995671033001906e-06, "loss": 0.3071397304534912, "step": 223660 }, { "epoch": 0.9602620574774822, "grad_norm": 0.016983680427074432, "learning_rate": 3.991359312884282e-06, "loss": 0.210011625289917, "step": 223670 }, { "epoch": 0.9603049895675021, "grad_norm": 0.7923820614814758, "learning_rate": 3.987047592766659e-06, "loss": 0.3432178020477295, "step": 223680 }, { "epoch": 0.9603479216575221, "grad_norm": 1.8482073545455933, "learning_rate": 3.982735872649035e-06, "loss": 0.4096959114074707, "step": 223690 }, { "epoch": 0.9603908537475422, "grad_norm": 0.057525552809238434, "learning_rate": 3.978424152531411e-06, "loss": 0.10442249774932862, "step": 223700 }, { "epoch": 0.9604337858375621, "grad_norm": 1.0683971643447876, "learning_rate": 3.9741124324137874e-06, "loss": 0.5931124687194824, "step": 223710 }, { "epoch": 0.9604767179275822, "grad_norm": 0.08928456157445908, "learning_rate": 3.969800712296164e-06, "loss": 0.2801962375640869, "step": 223720 }, { "epoch": 0.9605196500176022, "grad_norm": 0.08087646961212158, "learning_rate": 3.96548899217854e-06, "loss": 0.20012621879577636, "step": 223730 }, { "epoch": 0.9605625821076221, "grad_norm": 0.01258047018200159, "learning_rate": 3.961177272060916e-06, "loss": 0.15536543130874633, "step": 223740 }, { "epoch": 0.9606055141976422, "grad_norm": 1.7692468166351318, "learning_rate": 3.9568655519432926e-06, "loss": 0.2260221004486084, "step": 223750 }, { "epoch": 0.9606484462876622, "grad_norm": 0.01273643970489502, "learning_rate": 3.952553831825668e-06, "loss": 0.2594797134399414, "step": 223760 }, { "epoch": 0.9606913783776821, "grad_norm": 1.927130937576294, "learning_rate": 3.948242111708045e-06, "loss": 0.18560283184051513, "step": 223770 }, { "epoch": 0.9607343104677022, "grad_norm": 1.519124150276184, "learning_rate": 3.943930391590421e-06, "loss": 0.11475672721862792, "step": 223780 }, { "epoch": 0.9607772425577222, "grad_norm": 0.023710211738944054, "learning_rate": 3.939618671472797e-06, "loss": 0.2021331548690796, "step": 223790 }, { "epoch": 0.9608201746477422, "grad_norm": 0.036301396787166595, "learning_rate": 3.935306951355173e-06, "loss": 0.1153161883354187, "step": 223800 }, { "epoch": 0.9608631067377622, "grad_norm": 0.016259074211120605, "learning_rate": 3.93099523123755e-06, "loss": 0.1772770404815674, "step": 223810 }, { "epoch": 0.9609060388277822, "grad_norm": 1.032292366027832, "learning_rate": 3.926683511119926e-06, "loss": 0.06174069046974182, "step": 223820 }, { "epoch": 0.9609489709178022, "grad_norm": 0.0033859829418361187, "learning_rate": 3.922371791002303e-06, "loss": 0.014263886213302612, "step": 223830 }, { "epoch": 0.9609919030078222, "grad_norm": 8.165559768676758, "learning_rate": 3.918060070884679e-06, "loss": 0.40694475173950195, "step": 223840 }, { "epoch": 0.9610348350978423, "grad_norm": 0.003001274075359106, "learning_rate": 3.913748350767055e-06, "loss": 0.09890034198760986, "step": 223850 }, { "epoch": 0.9610777671878622, "grad_norm": 5.088690280914307, "learning_rate": 3.9094366306494315e-06, "loss": 0.09827024936676025, "step": 223860 }, { "epoch": 0.9611206992778822, "grad_norm": 3.8880326747894287, "learning_rate": 3.905124910531808e-06, "loss": 0.40003390312194825, "step": 223870 }, { "epoch": 0.9611636313679023, "grad_norm": 0.08274823427200317, "learning_rate": 3.9008131904141845e-06, "loss": 0.1569799542427063, "step": 223880 }, { "epoch": 0.9612065634579222, "grad_norm": 0.0208677276968956, "learning_rate": 3.89650147029656e-06, "loss": 0.07912614941596985, "step": 223890 }, { "epoch": 0.9612494955479423, "grad_norm": 0.006856684572994709, "learning_rate": 3.892189750178937e-06, "loss": 0.1457221269607544, "step": 223900 }, { "epoch": 0.9612924276379623, "grad_norm": 0.0018087215721607208, "learning_rate": 3.887878030061313e-06, "loss": 0.0596615195274353, "step": 223910 }, { "epoch": 0.9613353597279822, "grad_norm": 2.4244134426116943, "learning_rate": 3.883566309943689e-06, "loss": 0.07604253888130189, "step": 223920 }, { "epoch": 0.9613782918180023, "grad_norm": 1.4929369688034058, "learning_rate": 3.879254589826065e-06, "loss": 0.24013571739196776, "step": 223930 }, { "epoch": 0.9614212239080223, "grad_norm": 0.011781311593949795, "learning_rate": 3.874942869708442e-06, "loss": 0.20817484855651855, "step": 223940 }, { "epoch": 0.9614641559980422, "grad_norm": 0.06727916747331619, "learning_rate": 3.870631149590817e-06, "loss": 0.003799154236912727, "step": 223950 }, { "epoch": 0.9615070880880623, "grad_norm": 3.397916316986084, "learning_rate": 3.866319429473194e-06, "loss": 0.049684774875640866, "step": 223960 }, { "epoch": 0.9615500201780823, "grad_norm": 0.789635181427002, "learning_rate": 3.86200770935557e-06, "loss": 0.1622435212135315, "step": 223970 }, { "epoch": 0.9615929522681023, "grad_norm": 0.0496024563908577, "learning_rate": 3.857695989237947e-06, "loss": 0.28729078769683836, "step": 223980 }, { "epoch": 0.9616358843581223, "grad_norm": 0.3095422089099884, "learning_rate": 3.8533842691203225e-06, "loss": 0.21515951156616211, "step": 223990 }, { "epoch": 0.9616788164481423, "grad_norm": 0.009204866364598274, "learning_rate": 3.8490725490027e-06, "loss": 0.12539956569671631, "step": 224000 }, { "epoch": 0.9616788164481423, "eval_loss": 0.3682700991630554, "eval_runtime": 27.4644, "eval_samples_per_second": 3.641, "eval_steps_per_second": 3.641, "step": 224000 }, { "epoch": 0.9617217485381623, "grad_norm": 2.353421449661255, "learning_rate": 3.8447608288850755e-06, "loss": 0.21565442085266112, "step": 224010 }, { "epoch": 0.9617646806281823, "grad_norm": 0.01283611822873354, "learning_rate": 3.840449108767452e-06, "loss": 0.202630352973938, "step": 224020 }, { "epoch": 0.9618076127182024, "grad_norm": 0.0016208564629778266, "learning_rate": 3.8361373886498285e-06, "loss": 0.16334728002548218, "step": 224030 }, { "epoch": 0.9618505448082224, "grad_norm": 0.1348349153995514, "learning_rate": 3.831825668532204e-06, "loss": 0.13696320056915284, "step": 224040 }, { "epoch": 0.9618934768982423, "grad_norm": 0.009350267238914967, "learning_rate": 3.827513948414581e-06, "loss": 0.20097155570983888, "step": 224050 }, { "epoch": 0.9619364089882624, "grad_norm": 0.005436836276203394, "learning_rate": 3.823202228296957e-06, "loss": 0.016508430242538452, "step": 224060 }, { "epoch": 0.9619793410782824, "grad_norm": 5.0872883796691895, "learning_rate": 3.818890508179334e-06, "loss": 0.09875186681747436, "step": 224070 }, { "epoch": 0.9620222731683024, "grad_norm": 0.0039020462427288294, "learning_rate": 3.8145787880617097e-06, "loss": 0.12000479698181152, "step": 224080 }, { "epoch": 0.9620652052583224, "grad_norm": 6.722840785980225, "learning_rate": 3.810267067944086e-06, "loss": 0.4321133613586426, "step": 224090 }, { "epoch": 0.9621081373483424, "grad_norm": 0.001120436587370932, "learning_rate": 3.805955347826462e-06, "loss": 0.21240513324737548, "step": 224100 }, { "epoch": 0.9621510694383624, "grad_norm": 0.03145579993724823, "learning_rate": 3.8016436277088384e-06, "loss": 0.19584509134292602, "step": 224110 }, { "epoch": 0.9621940015283824, "grad_norm": 2.7041938304901123, "learning_rate": 3.7973319075912145e-06, "loss": 0.2914386510848999, "step": 224120 }, { "epoch": 0.9622369336184025, "grad_norm": 0.040991175919771194, "learning_rate": 3.7930201874735905e-06, "loss": 0.1548812985420227, "step": 224130 }, { "epoch": 0.9622798657084224, "grad_norm": 0.7310913801193237, "learning_rate": 3.788708467355967e-06, "loss": 0.0296543151140213, "step": 224140 }, { "epoch": 0.9623227977984424, "grad_norm": 2.342799425125122, "learning_rate": 3.784396747238343e-06, "loss": 0.14656716585159302, "step": 224150 }, { "epoch": 0.9623657298884625, "grad_norm": 2.133543014526367, "learning_rate": 3.7800850271207196e-06, "loss": 0.1531446695327759, "step": 224160 }, { "epoch": 0.9624086619784824, "grad_norm": 3.5911266803741455, "learning_rate": 3.7757733070030957e-06, "loss": 0.3761185884475708, "step": 224170 }, { "epoch": 0.9624515940685024, "grad_norm": 1.0610127449035645, "learning_rate": 3.7714615868854726e-06, "loss": 0.16036534309387207, "step": 224180 }, { "epoch": 0.9624945261585225, "grad_norm": 0.003502331208437681, "learning_rate": 3.7671498667678487e-06, "loss": 0.13411206007003784, "step": 224190 }, { "epoch": 0.9625374582485424, "grad_norm": 0.00993330217897892, "learning_rate": 3.762838146650225e-06, "loss": 0.37001564502716067, "step": 224200 }, { "epoch": 0.9625803903385625, "grad_norm": 1.013261079788208, "learning_rate": 3.7585264265326012e-06, "loss": 0.3331045627593994, "step": 224210 }, { "epoch": 0.9626233224285825, "grad_norm": 0.04176459461450577, "learning_rate": 3.7542147064149777e-06, "loss": 0.04846885502338409, "step": 224220 }, { "epoch": 0.9626662545186024, "grad_norm": 0.0031787080224603415, "learning_rate": 3.749902986297354e-06, "loss": 0.18654595613479613, "step": 224230 }, { "epoch": 0.9627091866086225, "grad_norm": 0.7569457292556763, "learning_rate": 3.74559126617973e-06, "loss": 0.18365461826324464, "step": 224240 }, { "epoch": 0.9627521186986425, "grad_norm": 0.019828135147690773, "learning_rate": 3.7412795460621064e-06, "loss": 0.06259853839874267, "step": 224250 }, { "epoch": 0.9627950507886625, "grad_norm": 0.0054312837310135365, "learning_rate": 3.7369678259444824e-06, "loss": 0.17033387422561647, "step": 224260 }, { "epoch": 0.9628379828786825, "grad_norm": 2.7154340744018555, "learning_rate": 3.732656105826859e-06, "loss": 0.042243891954422, "step": 224270 }, { "epoch": 0.9628809149687025, "grad_norm": 0.9128412008285522, "learning_rate": 3.728344385709235e-06, "loss": 0.05694934129714966, "step": 224280 }, { "epoch": 0.9629238470587225, "grad_norm": 1.8611356019973755, "learning_rate": 3.724032665591611e-06, "loss": 0.32024905681610105, "step": 224290 }, { "epoch": 0.9629667791487425, "grad_norm": 0.0045630838721990585, "learning_rate": 3.7197209454739876e-06, "loss": 0.14713085889816285, "step": 224300 }, { "epoch": 0.9630097112387626, "grad_norm": 0.05896364524960518, "learning_rate": 3.7154092253563636e-06, "loss": 0.11309947967529296, "step": 224310 }, { "epoch": 0.9630526433287825, "grad_norm": 3.036715507507324, "learning_rate": 3.7110975052387397e-06, "loss": 0.13869774341583252, "step": 224320 }, { "epoch": 0.9630955754188025, "grad_norm": 0.044732533395290375, "learning_rate": 3.706785785121116e-06, "loss": 0.1689812183380127, "step": 224330 }, { "epoch": 0.9631385075088226, "grad_norm": 0.0014035659842193127, "learning_rate": 3.7024740650034923e-06, "loss": 0.0963472604751587, "step": 224340 }, { "epoch": 0.9631814395988425, "grad_norm": 1.8297466039657593, "learning_rate": 3.6981623448858688e-06, "loss": 0.31110439300537107, "step": 224350 }, { "epoch": 0.9632243716888625, "grad_norm": 0.05191795900464058, "learning_rate": 3.6938506247682457e-06, "loss": 0.10177983045578003, "step": 224360 }, { "epoch": 0.9632673037788826, "grad_norm": 1.1834299564361572, "learning_rate": 3.6895389046506218e-06, "loss": 0.1933537244796753, "step": 224370 }, { "epoch": 0.9633102358689025, "grad_norm": 0.16819176077842712, "learning_rate": 3.685227184532998e-06, "loss": 0.1773150682449341, "step": 224380 }, { "epoch": 0.9633531679589226, "grad_norm": 0.02834729105234146, "learning_rate": 3.6809154644153743e-06, "loss": 0.13921488523483277, "step": 224390 }, { "epoch": 0.9633961000489426, "grad_norm": 0.4217594861984253, "learning_rate": 3.6766037442977504e-06, "loss": 0.24627528190612794, "step": 224400 }, { "epoch": 0.9634390321389625, "grad_norm": 0.05298386886715889, "learning_rate": 3.672292024180127e-06, "loss": 0.12269526720046997, "step": 224410 }, { "epoch": 0.9634819642289826, "grad_norm": 2.3080687522888184, "learning_rate": 3.667980304062503e-06, "loss": 0.16422743797302247, "step": 224420 }, { "epoch": 0.9635248963190026, "grad_norm": 4.178096771240234, "learning_rate": 3.663668583944879e-06, "loss": 0.17461690902709961, "step": 224430 }, { "epoch": 0.9635678284090226, "grad_norm": 2.450517177581787, "learning_rate": 3.6593568638272555e-06, "loss": 0.18653972148895265, "step": 224440 }, { "epoch": 0.9636107604990426, "grad_norm": 0.036486510187387466, "learning_rate": 3.6550451437096316e-06, "loss": 0.19325416088104247, "step": 224450 }, { "epoch": 0.9636536925890626, "grad_norm": 0.03563200309872627, "learning_rate": 3.650733423592008e-06, "loss": 0.1459072709083557, "step": 224460 }, { "epoch": 0.9636966246790827, "grad_norm": 0.25156867504119873, "learning_rate": 3.646421703474384e-06, "loss": 0.1999955177307129, "step": 224470 }, { "epoch": 0.9637395567691026, "grad_norm": 8.842079162597656, "learning_rate": 3.6421099833567603e-06, "loss": 0.19552001953125, "step": 224480 }, { "epoch": 0.9637824888591227, "grad_norm": 0.0028808624483644962, "learning_rate": 3.6377982632391368e-06, "loss": 0.04963212013244629, "step": 224490 }, { "epoch": 0.9638254209491427, "grad_norm": 0.001806130982004106, "learning_rate": 3.633486543121513e-06, "loss": 0.15328739881515502, "step": 224500 }, { "epoch": 0.9638683530391626, "grad_norm": 0.007961916737258434, "learning_rate": 3.6291748230038893e-06, "loss": 0.24264960289001464, "step": 224510 }, { "epoch": 0.9639112851291827, "grad_norm": 1.3244423866271973, "learning_rate": 3.6248631028862654e-06, "loss": 0.28087844848632815, "step": 224520 }, { "epoch": 0.9639542172192027, "grad_norm": 0.01275936421006918, "learning_rate": 3.6205513827686415e-06, "loss": 0.3498584032058716, "step": 224530 }, { "epoch": 0.9639971493092226, "grad_norm": 0.0005850349552929401, "learning_rate": 3.6162396626510184e-06, "loss": 0.2803433656692505, "step": 224540 }, { "epoch": 0.9640400813992427, "grad_norm": 0.00403195945546031, "learning_rate": 3.611927942533395e-06, "loss": 0.1901412010192871, "step": 224550 }, { "epoch": 0.9640830134892627, "grad_norm": 0.9117140769958496, "learning_rate": 3.607616222415771e-06, "loss": 0.09261349439620972, "step": 224560 }, { "epoch": 0.9641259455792827, "grad_norm": 7.377902030944824, "learning_rate": 3.603304502298147e-06, "loss": 0.39287283420562746, "step": 224570 }, { "epoch": 0.9641688776693027, "grad_norm": 1.6168286800384521, "learning_rate": 3.5989927821805235e-06, "loss": 0.25367326736450196, "step": 224580 }, { "epoch": 0.9642118097593227, "grad_norm": 3.83086895942688, "learning_rate": 3.5946810620628996e-06, "loss": 0.20805747509002687, "step": 224590 }, { "epoch": 0.9642547418493427, "grad_norm": 0.015164944343268871, "learning_rate": 3.590369341945276e-06, "loss": 0.12288215160369872, "step": 224600 }, { "epoch": 0.9642976739393627, "grad_norm": 0.22293995320796967, "learning_rate": 3.586057621827652e-06, "loss": 0.04461737871170044, "step": 224610 }, { "epoch": 0.9643406060293828, "grad_norm": 3.296658992767334, "learning_rate": 3.5817459017100282e-06, "loss": 0.2322096824645996, "step": 224620 }, { "epoch": 0.9643835381194027, "grad_norm": 5.496777057647705, "learning_rate": 3.5774341815924047e-06, "loss": 0.2097012996673584, "step": 224630 }, { "epoch": 0.9644264702094227, "grad_norm": 3.9928760528564453, "learning_rate": 3.573122461474781e-06, "loss": 0.4075942516326904, "step": 224640 }, { "epoch": 0.9644694022994428, "grad_norm": 0.23641102015972137, "learning_rate": 3.5688107413571573e-06, "loss": 0.16521319150924682, "step": 224650 }, { "epoch": 0.9645123343894627, "grad_norm": 2.510756731033325, "learning_rate": 3.5644990212395334e-06, "loss": 0.35691146850585936, "step": 224660 }, { "epoch": 0.9645552664794828, "grad_norm": 3.972184896469116, "learning_rate": 3.5601873011219094e-06, "loss": 0.17900394201278685, "step": 224670 }, { "epoch": 0.9645981985695028, "grad_norm": 0.0042314473539590836, "learning_rate": 3.555875581004286e-06, "loss": 0.11929336786270142, "step": 224680 }, { "epoch": 0.9646411306595227, "grad_norm": 6.589748859405518, "learning_rate": 3.551563860886662e-06, "loss": 0.2866029739379883, "step": 224690 }, { "epoch": 0.9646840627495428, "grad_norm": 0.02086971141397953, "learning_rate": 3.5472521407690385e-06, "loss": 0.17334396839141847, "step": 224700 }, { "epoch": 0.9647269948395628, "grad_norm": 2.366854190826416, "learning_rate": 3.5429404206514146e-06, "loss": 0.33515105247497556, "step": 224710 }, { "epoch": 0.9647699269295827, "grad_norm": 0.06535419821739197, "learning_rate": 3.5386287005337907e-06, "loss": 0.1478777050971985, "step": 224720 }, { "epoch": 0.9648128590196028, "grad_norm": 0.06608917564153671, "learning_rate": 3.5343169804161676e-06, "loss": 0.2516626358032227, "step": 224730 }, { "epoch": 0.9648557911096228, "grad_norm": 0.3839847147464752, "learning_rate": 3.530005260298544e-06, "loss": 0.07610129714012145, "step": 224740 }, { "epoch": 0.9648987231996428, "grad_norm": 0.0032831490971148014, "learning_rate": 3.52569354018092e-06, "loss": 0.2178466796875, "step": 224750 }, { "epoch": 0.9649416552896628, "grad_norm": 1.180791974067688, "learning_rate": 3.5213818200632962e-06, "loss": 0.18217480182647705, "step": 224760 }, { "epoch": 0.9649845873796828, "grad_norm": 0.0028181334491819143, "learning_rate": 3.5170700999456727e-06, "loss": 0.052039462327957156, "step": 224770 }, { "epoch": 0.9650275194697028, "grad_norm": 0.01064180489629507, "learning_rate": 3.5127583798280488e-06, "loss": 0.0493350625038147, "step": 224780 }, { "epoch": 0.9650704515597228, "grad_norm": 5.400768756866455, "learning_rate": 3.5084466597104253e-06, "loss": 0.3771092176437378, "step": 224790 }, { "epoch": 0.9651133836497429, "grad_norm": 1.3995170593261719, "learning_rate": 3.5041349395928013e-06, "loss": 0.5951415538787842, "step": 224800 }, { "epoch": 0.9651563157397628, "grad_norm": 0.006610509008169174, "learning_rate": 3.4998232194751774e-06, "loss": 0.1903270959854126, "step": 224810 }, { "epoch": 0.9651992478297828, "grad_norm": 0.007922952994704247, "learning_rate": 3.495511499357554e-06, "loss": 0.17838799953460693, "step": 224820 }, { "epoch": 0.9652421799198029, "grad_norm": 2.0023837089538574, "learning_rate": 3.49119977923993e-06, "loss": 0.14361563920974732, "step": 224830 }, { "epoch": 0.9652851120098228, "grad_norm": 0.020767943933606148, "learning_rate": 3.4868880591223065e-06, "loss": 0.23522424697875977, "step": 224840 }, { "epoch": 0.9653280440998429, "grad_norm": 0.014713380485773087, "learning_rate": 3.4825763390046826e-06, "loss": 0.19255516529083253, "step": 224850 }, { "epoch": 0.9653709761898629, "grad_norm": 0.006427861750125885, "learning_rate": 3.4782646188870586e-06, "loss": 0.12834020853042602, "step": 224860 }, { "epoch": 0.9654139082798828, "grad_norm": 0.9519116282463074, "learning_rate": 3.473952898769435e-06, "loss": 0.3461976289749146, "step": 224870 }, { "epoch": 0.9654568403699029, "grad_norm": 0.017268039286136627, "learning_rate": 3.469641178651811e-06, "loss": 0.21045241355895997, "step": 224880 }, { "epoch": 0.9654997724599229, "grad_norm": 0.013922857120633125, "learning_rate": 3.4653294585341877e-06, "loss": 0.0968497633934021, "step": 224890 }, { "epoch": 0.965542704549943, "grad_norm": 0.28565648198127747, "learning_rate": 3.4610177384165638e-06, "loss": 0.06030838489532471, "step": 224900 }, { "epoch": 0.9655856366399629, "grad_norm": 0.003438707906752825, "learning_rate": 3.4567060182989407e-06, "loss": 0.06618756055831909, "step": 224910 }, { "epoch": 0.9656285687299829, "grad_norm": 0.03495679795742035, "learning_rate": 3.4523942981813168e-06, "loss": 0.2902189254760742, "step": 224920 }, { "epoch": 0.965671500820003, "grad_norm": 1.1262989044189453, "learning_rate": 3.4480825780636933e-06, "loss": 0.17496834993362426, "step": 224930 }, { "epoch": 0.9657144329100229, "grad_norm": 0.047534435987472534, "learning_rate": 3.4437708579460693e-06, "loss": 0.09073890447616577, "step": 224940 }, { "epoch": 0.9657573650000429, "grad_norm": 1.0417522192001343, "learning_rate": 3.439459137828446e-06, "loss": 0.16527436971664428, "step": 224950 }, { "epoch": 0.965800297090063, "grad_norm": 0.9508054256439209, "learning_rate": 3.435147417710822e-06, "loss": 0.22022783756256104, "step": 224960 }, { "epoch": 0.9658432291800829, "grad_norm": 0.0005445060087367892, "learning_rate": 3.430835697593198e-06, "loss": 0.2978523731231689, "step": 224970 }, { "epoch": 0.965886161270103, "grad_norm": 17.093509674072266, "learning_rate": 3.4265239774755745e-06, "loss": 0.05229206085205078, "step": 224980 }, { "epoch": 0.965929093360123, "grad_norm": 0.005673188250511885, "learning_rate": 3.4222122573579505e-06, "loss": 0.1114910364151001, "step": 224990 }, { "epoch": 0.9659720254501429, "grad_norm": 1.3230369091033936, "learning_rate": 3.4179005372403266e-06, "loss": 0.18673681020736693, "step": 225000 }, { "epoch": 0.9659720254501429, "eval_loss": 0.3696173131465912, "eval_runtime": 27.4602, "eval_samples_per_second": 3.642, "eval_steps_per_second": 3.642, "step": 225000 }, { "epoch": 0.966014957540163, "grad_norm": 2.1282687187194824, "learning_rate": 3.413588817122703e-06, "loss": 0.20100622177124022, "step": 225010 }, { "epoch": 0.966057889630183, "grad_norm": 3.6257035732269287, "learning_rate": 3.409277097005079e-06, "loss": 0.299635124206543, "step": 225020 }, { "epoch": 0.966100821720203, "grad_norm": 2.9898667335510254, "learning_rate": 3.4049653768874557e-06, "loss": 0.14922019243240356, "step": 225030 }, { "epoch": 0.966143753810223, "grad_norm": 0.0014072444755584002, "learning_rate": 3.4006536567698317e-06, "loss": 0.14536370038986207, "step": 225040 }, { "epoch": 0.966186685900243, "grad_norm": 2.570472478866577, "learning_rate": 3.396341936652208e-06, "loss": 0.3303256034851074, "step": 225050 }, { "epoch": 0.966229617990263, "grad_norm": 0.4072743356227875, "learning_rate": 3.3920302165345843e-06, "loss": 0.228645920753479, "step": 225060 }, { "epoch": 0.966272550080283, "grad_norm": 0.035002633929252625, "learning_rate": 3.3877184964169604e-06, "loss": 0.25193569660186765, "step": 225070 }, { "epoch": 0.966315482170303, "grad_norm": 2.990675687789917, "learning_rate": 3.383406776299337e-06, "loss": 0.23985731601715088, "step": 225080 }, { "epoch": 0.966358414260323, "grad_norm": 0.006291983183473349, "learning_rate": 3.379095056181714e-06, "loss": 0.28874037265777586, "step": 225090 }, { "epoch": 0.966401346350343, "grad_norm": 4.065964698791504, "learning_rate": 3.37478333606409e-06, "loss": 0.26277947425842285, "step": 225100 }, { "epoch": 0.9664442784403631, "grad_norm": 0.004781167954206467, "learning_rate": 3.370471615946466e-06, "loss": 0.33129491806030276, "step": 225110 }, { "epoch": 0.966487210530383, "grad_norm": 0.10862305015325546, "learning_rate": 3.3661598958288424e-06, "loss": 0.32240264415740966, "step": 225120 }, { "epoch": 0.966530142620403, "grad_norm": 0.06308849155902863, "learning_rate": 3.3618481757112185e-06, "loss": 0.17908284664154053, "step": 225130 }, { "epoch": 0.9665730747104231, "grad_norm": 1.10364830493927, "learning_rate": 3.357536455593595e-06, "loss": 0.3277503252029419, "step": 225140 }, { "epoch": 0.966616006800443, "grad_norm": 6.852668762207031, "learning_rate": 3.353224735475971e-06, "loss": 0.23288826942443847, "step": 225150 }, { "epoch": 0.9666589388904631, "grad_norm": 4.911060333251953, "learning_rate": 3.348913015358347e-06, "loss": 0.12526233196258546, "step": 225160 }, { "epoch": 0.9667018709804831, "grad_norm": 0.010838578455150127, "learning_rate": 3.3446012952407237e-06, "loss": 0.17414969205856323, "step": 225170 }, { "epoch": 0.966744803070503, "grad_norm": 0.016853701323270798, "learning_rate": 3.3402895751230997e-06, "loss": 0.3909905195236206, "step": 225180 }, { "epoch": 0.9667877351605231, "grad_norm": 7.306344509124756, "learning_rate": 3.335977855005476e-06, "loss": 0.2299267292022705, "step": 225190 }, { "epoch": 0.9668306672505431, "grad_norm": 0.004599553067237139, "learning_rate": 3.3316661348878523e-06, "loss": 0.21219370365142823, "step": 225200 }, { "epoch": 0.966873599340563, "grad_norm": 1.972286343574524, "learning_rate": 3.3273544147702284e-06, "loss": 0.18740177154541016, "step": 225210 }, { "epoch": 0.9669165314305831, "grad_norm": 2.4764668941497803, "learning_rate": 3.323042694652605e-06, "loss": 0.21578762531280518, "step": 225220 }, { "epoch": 0.9669594635206031, "grad_norm": 0.6758376359939575, "learning_rate": 3.318730974534981e-06, "loss": 0.2908621788024902, "step": 225230 }, { "epoch": 0.9670023956106231, "grad_norm": 0.005352508742362261, "learning_rate": 3.314419254417357e-06, "loss": 0.23948318958282472, "step": 225240 }, { "epoch": 0.9670453277006431, "grad_norm": 5.595332622528076, "learning_rate": 3.3101075342997335e-06, "loss": 0.15925862789154052, "step": 225250 }, { "epoch": 0.9670882597906632, "grad_norm": 0.034605976194143295, "learning_rate": 3.3057958141821096e-06, "loss": 0.02419239580631256, "step": 225260 }, { "epoch": 0.9671311918806831, "grad_norm": 4.634537696838379, "learning_rate": 3.3014840940644865e-06, "loss": 0.15210497379302979, "step": 225270 }, { "epoch": 0.9671741239707031, "grad_norm": 0.0017427064012736082, "learning_rate": 3.297172373946863e-06, "loss": 0.09682173728942871, "step": 225280 }, { "epoch": 0.9672170560607232, "grad_norm": 0.02096271514892578, "learning_rate": 3.292860653829239e-06, "loss": 0.18717020750045776, "step": 225290 }, { "epoch": 0.9672599881507431, "grad_norm": 0.21846190094947815, "learning_rate": 3.288548933711615e-06, "loss": 0.21091217994689943, "step": 225300 }, { "epoch": 0.9673029202407631, "grad_norm": 0.03409862890839577, "learning_rate": 3.2842372135939916e-06, "loss": 0.2435748815536499, "step": 225310 }, { "epoch": 0.9673458523307832, "grad_norm": 1.015627384185791, "learning_rate": 3.2799254934763677e-06, "loss": 0.2684969186782837, "step": 225320 }, { "epoch": 0.9673887844208032, "grad_norm": 0.0285097174346447, "learning_rate": 3.275613773358744e-06, "loss": 0.06942356228828431, "step": 225330 }, { "epoch": 0.9674317165108232, "grad_norm": 0.07428458333015442, "learning_rate": 3.2713020532411203e-06, "loss": 0.17607952356338502, "step": 225340 }, { "epoch": 0.9674746486008432, "grad_norm": 0.018062138929963112, "learning_rate": 3.2669903331234963e-06, "loss": 0.19157010316848755, "step": 225350 }, { "epoch": 0.9675175806908632, "grad_norm": 0.4948386251926422, "learning_rate": 3.262678613005873e-06, "loss": 0.15009280443191528, "step": 225360 }, { "epoch": 0.9675605127808832, "grad_norm": 0.09620340168476105, "learning_rate": 3.258366892888249e-06, "loss": 0.2818382978439331, "step": 225370 }, { "epoch": 0.9676034448709032, "grad_norm": 0.001367167104035616, "learning_rate": 3.254055172770625e-06, "loss": 0.2431161642074585, "step": 225380 }, { "epoch": 0.9676463769609233, "grad_norm": 0.32261449098587036, "learning_rate": 3.2497434526530015e-06, "loss": 0.2842667818069458, "step": 225390 }, { "epoch": 0.9676893090509432, "grad_norm": 1.915399432182312, "learning_rate": 3.2454317325353776e-06, "loss": 0.17604867219924927, "step": 225400 }, { "epoch": 0.9677322411409632, "grad_norm": 0.08497922867536545, "learning_rate": 3.241120012417754e-06, "loss": 0.11107239723205567, "step": 225410 }, { "epoch": 0.9677751732309833, "grad_norm": 2.0568411350250244, "learning_rate": 3.23680829230013e-06, "loss": 0.27031285762786866, "step": 225420 }, { "epoch": 0.9678181053210032, "grad_norm": 0.01337936706840992, "learning_rate": 3.232496572182506e-06, "loss": 0.12987562417984008, "step": 225430 }, { "epoch": 0.9678610374110233, "grad_norm": 3.4393444061279297, "learning_rate": 3.2281848520648827e-06, "loss": 0.13027408123016357, "step": 225440 }, { "epoch": 0.9679039695010433, "grad_norm": 2.069347858428955, "learning_rate": 3.2238731319472596e-06, "loss": 0.15661873817443847, "step": 225450 }, { "epoch": 0.9679469015910632, "grad_norm": 0.00041850711568258703, "learning_rate": 3.2195614118296357e-06, "loss": 0.15425716638565062, "step": 225460 }, { "epoch": 0.9679898336810833, "grad_norm": 3.110551357269287, "learning_rate": 3.215249691712012e-06, "loss": 0.03208427131175995, "step": 225470 }, { "epoch": 0.9680327657711033, "grad_norm": 0.019477995112538338, "learning_rate": 3.2109379715943882e-06, "loss": 0.15079782009124756, "step": 225480 }, { "epoch": 0.9680756978611232, "grad_norm": 0.09103574603796005, "learning_rate": 3.2066262514767643e-06, "loss": 0.1436993360519409, "step": 225490 }, { "epoch": 0.9681186299511433, "grad_norm": 1.1488316059112549, "learning_rate": 3.202314531359141e-06, "loss": 0.4554173946380615, "step": 225500 }, { "epoch": 0.9681615620411633, "grad_norm": 8.552606582641602, "learning_rate": 3.198002811241517e-06, "loss": 0.25870747566223146, "step": 225510 }, { "epoch": 0.9682044941311833, "grad_norm": 1.9646648168563843, "learning_rate": 3.1936910911238934e-06, "loss": 0.3105947732925415, "step": 225520 }, { "epoch": 0.9682474262212033, "grad_norm": 0.2984846830368042, "learning_rate": 3.1893793710062695e-06, "loss": 0.12478234767913818, "step": 225530 }, { "epoch": 0.9682903583112233, "grad_norm": 0.0017521742265671492, "learning_rate": 3.1850676508886455e-06, "loss": 0.1251183867454529, "step": 225540 }, { "epoch": 0.9683332904012433, "grad_norm": 0.008583495393395424, "learning_rate": 3.180755930771022e-06, "loss": 0.08604643344879151, "step": 225550 }, { "epoch": 0.9683762224912633, "grad_norm": 0.0018312825122848153, "learning_rate": 3.176444210653398e-06, "loss": 0.10744818449020385, "step": 225560 }, { "epoch": 0.9684191545812834, "grad_norm": 0.009577479213476181, "learning_rate": 3.172132490535774e-06, "loss": 0.12377842664718627, "step": 225570 }, { "epoch": 0.9684620866713033, "grad_norm": 0.0017507995944470167, "learning_rate": 3.1678207704181507e-06, "loss": 0.3090746641159058, "step": 225580 }, { "epoch": 0.9685050187613233, "grad_norm": 0.9828161597251892, "learning_rate": 3.1635090503005267e-06, "loss": 0.2902642250061035, "step": 225590 }, { "epoch": 0.9685479508513434, "grad_norm": 0.14380550384521484, "learning_rate": 3.1591973301829032e-06, "loss": 0.11983634233474731, "step": 225600 }, { "epoch": 0.9685908829413633, "grad_norm": 1.3398834466934204, "learning_rate": 3.1548856100652793e-06, "loss": 0.23884208202362062, "step": 225610 }, { "epoch": 0.9686338150313833, "grad_norm": 0.012832066975533962, "learning_rate": 3.1505738899476554e-06, "loss": 0.19006496667861938, "step": 225620 }, { "epoch": 0.9686767471214034, "grad_norm": 0.0011093963403254747, "learning_rate": 3.146262169830032e-06, "loss": 0.20199911594390868, "step": 225630 }, { "epoch": 0.9687196792114233, "grad_norm": 1.6886579990386963, "learning_rate": 3.141950449712409e-06, "loss": 0.19745049476623536, "step": 225640 }, { "epoch": 0.9687626113014434, "grad_norm": 2.032439708709717, "learning_rate": 3.137638729594785e-06, "loss": 0.24782955646514893, "step": 225650 }, { "epoch": 0.9688055433914634, "grad_norm": 0.0008506453596055508, "learning_rate": 3.1333270094771614e-06, "loss": 0.5238415718078613, "step": 225660 }, { "epoch": 0.9688484754814833, "grad_norm": 0.043741144239902496, "learning_rate": 3.1290152893595374e-06, "loss": 0.3239001274108887, "step": 225670 }, { "epoch": 0.9688914075715034, "grad_norm": 0.0012500104494392872, "learning_rate": 3.1247035692419135e-06, "loss": 0.12101260423660279, "step": 225680 }, { "epoch": 0.9689343396615234, "grad_norm": 0.009424582123756409, "learning_rate": 3.12039184912429e-06, "loss": 0.17357531785964966, "step": 225690 }, { "epoch": 0.9689772717515434, "grad_norm": 0.06987378001213074, "learning_rate": 3.116080129006666e-06, "loss": 0.5514712810516358, "step": 225700 }, { "epoch": 0.9690202038415634, "grad_norm": 0.8753553032875061, "learning_rate": 3.1117684088890426e-06, "loss": 0.1353633165359497, "step": 225710 }, { "epoch": 0.9690631359315834, "grad_norm": 3.089796304702759, "learning_rate": 3.1074566887714186e-06, "loss": 0.14911720752716065, "step": 225720 }, { "epoch": 0.9691060680216034, "grad_norm": 2.0691325664520264, "learning_rate": 3.1031449686537947e-06, "loss": 0.17857154607772827, "step": 225730 }, { "epoch": 0.9691490001116234, "grad_norm": 0.24299030005931854, "learning_rate": 3.0988332485361712e-06, "loss": 0.1547597885131836, "step": 225740 }, { "epoch": 0.9691919322016435, "grad_norm": 0.003923803102225065, "learning_rate": 3.0945215284185473e-06, "loss": 0.07332241535186768, "step": 225750 }, { "epoch": 0.9692348642916635, "grad_norm": 0.0067015704698860645, "learning_rate": 3.0902098083009238e-06, "loss": 0.17883477210998536, "step": 225760 }, { "epoch": 0.9692777963816834, "grad_norm": 0.0028638611547648907, "learning_rate": 3.0858980881833003e-06, "loss": 0.003787020221352577, "step": 225770 }, { "epoch": 0.9693207284717035, "grad_norm": 1.8772034645080566, "learning_rate": 3.0815863680656763e-06, "loss": 0.15486459732055663, "step": 225780 }, { "epoch": 0.9693636605617235, "grad_norm": 0.00863329041749239, "learning_rate": 3.077274647948053e-06, "loss": 0.0408756822347641, "step": 225790 }, { "epoch": 0.9694065926517434, "grad_norm": 3.4329917430877686, "learning_rate": 3.072962927830429e-06, "loss": 0.3295733451843262, "step": 225800 }, { "epoch": 0.9694495247417635, "grad_norm": 2.6693780422210693, "learning_rate": 3.068651207712805e-06, "loss": 0.273480224609375, "step": 225810 }, { "epoch": 0.9694924568317835, "grad_norm": 0.059428147971630096, "learning_rate": 3.0643394875951815e-06, "loss": 0.11988484859466553, "step": 225820 }, { "epoch": 0.9695353889218035, "grad_norm": 0.3197462260723114, "learning_rate": 3.0600277674775576e-06, "loss": 0.008120565116405487, "step": 225830 }, { "epoch": 0.9695783210118235, "grad_norm": 0.00911969318985939, "learning_rate": 3.0557160473599336e-06, "loss": 0.04398435056209564, "step": 225840 }, { "epoch": 0.9696212531018435, "grad_norm": 0.040632810443639755, "learning_rate": 3.05140432724231e-06, "loss": 0.04892999529838562, "step": 225850 }, { "epoch": 0.9696641851918635, "grad_norm": 1.2151085138320923, "learning_rate": 3.0470926071246866e-06, "loss": 0.2384195327758789, "step": 225860 }, { "epoch": 0.9697071172818835, "grad_norm": 4.104531288146973, "learning_rate": 3.0427808870070627e-06, "loss": 0.11756811141967774, "step": 225870 }, { "epoch": 0.9697500493719036, "grad_norm": 0.016218269243836403, "learning_rate": 3.038469166889439e-06, "loss": 0.1457047700881958, "step": 225880 }, { "epoch": 0.9697929814619235, "grad_norm": 0.11366493999958038, "learning_rate": 3.0341574467718153e-06, "loss": 0.09916544556617737, "step": 225890 }, { "epoch": 0.9698359135519435, "grad_norm": 0.08160996437072754, "learning_rate": 3.0298457266541918e-06, "loss": 0.15691580772399902, "step": 225900 }, { "epoch": 0.9698788456419636, "grad_norm": 0.02445426769554615, "learning_rate": 3.025534006536568e-06, "loss": 0.19571220874786377, "step": 225910 }, { "epoch": 0.9699217777319835, "grad_norm": 0.006836262531578541, "learning_rate": 3.021222286418944e-06, "loss": 0.17478055953979493, "step": 225920 }, { "epoch": 0.9699647098220036, "grad_norm": 0.005737704690545797, "learning_rate": 3.0169105663013204e-06, "loss": 0.18237990140914917, "step": 225930 }, { "epoch": 0.9700076419120236, "grad_norm": 0.008860369212925434, "learning_rate": 3.0125988461836965e-06, "loss": 0.2603444576263428, "step": 225940 }, { "epoch": 0.9700505740020435, "grad_norm": 0.04425879567861557, "learning_rate": 3.008287126066073e-06, "loss": 0.20395264625549317, "step": 225950 }, { "epoch": 0.9700935060920636, "grad_norm": 0.02337292581796646, "learning_rate": 3.0039754059484495e-06, "loss": 0.021476563811302186, "step": 225960 }, { "epoch": 0.9701364381820836, "grad_norm": 0.08876360952854156, "learning_rate": 2.9996636858308255e-06, "loss": 0.2368551015853882, "step": 225970 }, { "epoch": 0.9701793702721035, "grad_norm": 0.5836232304573059, "learning_rate": 2.995351965713202e-06, "loss": 0.10550501346588134, "step": 225980 }, { "epoch": 0.9702223023621236, "grad_norm": 0.042574867606163025, "learning_rate": 2.991040245595578e-06, "loss": 0.07009645700454711, "step": 225990 }, { "epoch": 0.9702652344521436, "grad_norm": 0.003975628409534693, "learning_rate": 2.986728525477954e-06, "loss": 0.13062582015991211, "step": 226000 }, { "epoch": 0.9702652344521436, "eval_loss": 0.3667832612991333, "eval_runtime": 27.4415, "eval_samples_per_second": 3.644, "eval_steps_per_second": 3.644, "step": 226000 }, { "epoch": 0.9703081665421636, "grad_norm": 0.0030697945039719343, "learning_rate": 2.9824168053603307e-06, "loss": 0.3693444013595581, "step": 226010 }, { "epoch": 0.9703510986321836, "grad_norm": 0.6716877818107605, "learning_rate": 2.9781050852427067e-06, "loss": 0.25938024520874026, "step": 226020 }, { "epoch": 0.9703940307222036, "grad_norm": 1.5804414749145508, "learning_rate": 2.973793365125083e-06, "loss": 0.2481531620025635, "step": 226030 }, { "epoch": 0.9704369628122236, "grad_norm": 1.3616093397140503, "learning_rate": 2.9694816450074593e-06, "loss": 0.18937134742736816, "step": 226040 }, { "epoch": 0.9704798949022436, "grad_norm": 1.3570085763931274, "learning_rate": 2.965169924889836e-06, "loss": 0.12422878742218017, "step": 226050 }, { "epoch": 0.9705228269922637, "grad_norm": 0.004477211739867926, "learning_rate": 2.960858204772212e-06, "loss": 0.014526186883449555, "step": 226060 }, { "epoch": 0.9705657590822836, "grad_norm": 0.3260514438152313, "learning_rate": 2.9565464846545884e-06, "loss": 0.05073114037513733, "step": 226070 }, { "epoch": 0.9706086911723036, "grad_norm": 0.0009509851224720478, "learning_rate": 2.9522347645369644e-06, "loss": 0.16182929277420044, "step": 226080 }, { "epoch": 0.9706516232623237, "grad_norm": 2.467970371246338, "learning_rate": 2.947923044419341e-06, "loss": 0.3950214862823486, "step": 226090 }, { "epoch": 0.9706945553523436, "grad_norm": 0.006497784983366728, "learning_rate": 2.943611324301717e-06, "loss": 0.15148746967315674, "step": 226100 }, { "epoch": 0.9707374874423637, "grad_norm": 0.019508181139826775, "learning_rate": 2.939299604184093e-06, "loss": 0.02425907403230667, "step": 226110 }, { "epoch": 0.9707804195323837, "grad_norm": 0.0019436703296378255, "learning_rate": 2.9349878840664696e-06, "loss": 0.20029797554016113, "step": 226120 }, { "epoch": 0.9708233516224036, "grad_norm": 4.597479343414307, "learning_rate": 2.9306761639488457e-06, "loss": 0.3057328939437866, "step": 226130 }, { "epoch": 0.9708662837124237, "grad_norm": 0.0020158360712230206, "learning_rate": 2.926364443831222e-06, "loss": 0.1163069725036621, "step": 226140 }, { "epoch": 0.9709092158024437, "grad_norm": 0.00654413690790534, "learning_rate": 2.9220527237135987e-06, "loss": 0.2541393995285034, "step": 226150 }, { "epoch": 0.9709521478924636, "grad_norm": 0.08558319509029388, "learning_rate": 2.9177410035959747e-06, "loss": 0.19713094234466552, "step": 226160 }, { "epoch": 0.9709950799824837, "grad_norm": 0.7128730416297913, "learning_rate": 2.9134292834783512e-06, "loss": 0.4300590991973877, "step": 226170 }, { "epoch": 0.9710380120725037, "grad_norm": 0.0020223883911967278, "learning_rate": 2.9091175633607273e-06, "loss": 0.01936231702566147, "step": 226180 }, { "epoch": 0.9710809441625238, "grad_norm": 0.005232381168752909, "learning_rate": 2.9048058432431034e-06, "loss": 0.1344476342201233, "step": 226190 }, { "epoch": 0.9711238762525437, "grad_norm": 0.4355989694595337, "learning_rate": 2.90049412312548e-06, "loss": 0.25427677631378176, "step": 226200 }, { "epoch": 0.9711668083425637, "grad_norm": 0.0015556018333882093, "learning_rate": 2.896182403007856e-06, "loss": 0.26048619747161866, "step": 226210 }, { "epoch": 0.9712097404325838, "grad_norm": 0.005317199043929577, "learning_rate": 2.891870682890232e-06, "loss": 0.30444252490997314, "step": 226220 }, { "epoch": 0.9712526725226037, "grad_norm": 0.013098231516778469, "learning_rate": 2.887558962772609e-06, "loss": 0.1752004861831665, "step": 226230 }, { "epoch": 0.9712956046126238, "grad_norm": 0.014951915480196476, "learning_rate": 2.883247242654985e-06, "loss": 0.18930820226669312, "step": 226240 }, { "epoch": 0.9713385367026438, "grad_norm": 0.002639220794662833, "learning_rate": 2.878935522537361e-06, "loss": 0.17423256635665893, "step": 226250 }, { "epoch": 0.9713814687926637, "grad_norm": 0.0012302837567403913, "learning_rate": 2.8746238024197376e-06, "loss": 0.23043808937072754, "step": 226260 }, { "epoch": 0.9714244008826838, "grad_norm": 3.3449714183807373, "learning_rate": 2.8703120823021136e-06, "loss": 0.15526468753814698, "step": 226270 }, { "epoch": 0.9714673329727038, "grad_norm": 1.695763349533081, "learning_rate": 2.86600036218449e-06, "loss": 0.29962406158447263, "step": 226280 }, { "epoch": 0.9715102650627238, "grad_norm": 0.01537360530346632, "learning_rate": 2.861688642066866e-06, "loss": 0.03783855736255646, "step": 226290 }, { "epoch": 0.9715531971527438, "grad_norm": 0.000410413253121078, "learning_rate": 2.8573769219492423e-06, "loss": 0.3130494117736816, "step": 226300 }, { "epoch": 0.9715961292427638, "grad_norm": 2.2895946502685547, "learning_rate": 2.8530652018316188e-06, "loss": 0.3380448579788208, "step": 226310 }, { "epoch": 0.9716390613327838, "grad_norm": 2.4340476989746094, "learning_rate": 2.8487534817139953e-06, "loss": 0.35978193283081056, "step": 226320 }, { "epoch": 0.9716819934228038, "grad_norm": 0.40139251947402954, "learning_rate": 2.8444417615963713e-06, "loss": 0.18451598882675171, "step": 226330 }, { "epoch": 0.9717249255128239, "grad_norm": 3.936011552810669, "learning_rate": 2.840130041478748e-06, "loss": 0.40130367279052737, "step": 226340 }, { "epoch": 0.9717678576028438, "grad_norm": 0.03930728882551193, "learning_rate": 2.835818321361124e-06, "loss": 0.14985283613204955, "step": 226350 }, { "epoch": 0.9718107896928638, "grad_norm": 0.5176249146461487, "learning_rate": 2.8315066012435004e-06, "loss": 0.19097598791122436, "step": 226360 }, { "epoch": 0.9718537217828839, "grad_norm": 0.0014861667295917869, "learning_rate": 2.8271948811258765e-06, "loss": 0.24988765716552735, "step": 226370 }, { "epoch": 0.9718966538729038, "grad_norm": 1.4453141689300537, "learning_rate": 2.8228831610082526e-06, "loss": 0.28491060733795165, "step": 226380 }, { "epoch": 0.9719395859629238, "grad_norm": 0.6912074685096741, "learning_rate": 2.818571440890629e-06, "loss": 0.022906261682510375, "step": 226390 }, { "epoch": 0.9719825180529439, "grad_norm": 2.4349849224090576, "learning_rate": 2.814259720773005e-06, "loss": 0.24235982894897462, "step": 226400 }, { "epoch": 0.9720254501429638, "grad_norm": 4.375247001647949, "learning_rate": 2.8099480006553816e-06, "loss": 0.43722686767578123, "step": 226410 }, { "epoch": 0.9720683822329839, "grad_norm": 0.750277042388916, "learning_rate": 2.805636280537758e-06, "loss": 0.11602920293807983, "step": 226420 }, { "epoch": 0.9721113143230039, "grad_norm": 0.5917320251464844, "learning_rate": 2.801324560420134e-06, "loss": 0.2671214580535889, "step": 226430 }, { "epoch": 0.9721542464130238, "grad_norm": 1.1064698696136475, "learning_rate": 2.7970128403025103e-06, "loss": 0.16829732656478882, "step": 226440 }, { "epoch": 0.9721971785030439, "grad_norm": 3.341284990310669, "learning_rate": 2.7927011201848868e-06, "loss": 0.13691198825836182, "step": 226450 }, { "epoch": 0.9722401105930639, "grad_norm": 1.4227256774902344, "learning_rate": 2.788389400067263e-06, "loss": 0.20174951553344728, "step": 226460 }, { "epoch": 0.9722830426830839, "grad_norm": 0.05399510636925697, "learning_rate": 2.7840776799496393e-06, "loss": 0.25884032249450684, "step": 226470 }, { "epoch": 0.9723259747731039, "grad_norm": 1.700989842414856, "learning_rate": 2.7797659598320154e-06, "loss": 0.15701980590820314, "step": 226480 }, { "epoch": 0.9723689068631239, "grad_norm": 0.06168566271662712, "learning_rate": 2.7754542397143915e-06, "loss": 0.3472371816635132, "step": 226490 }, { "epoch": 0.9724118389531439, "grad_norm": 0.029171306639909744, "learning_rate": 2.7711425195967684e-06, "loss": 0.2525274991989136, "step": 226500 }, { "epoch": 0.9724547710431639, "grad_norm": 0.006166656501591206, "learning_rate": 2.7668307994791445e-06, "loss": 0.41352314949035646, "step": 226510 }, { "epoch": 0.972497703133184, "grad_norm": 0.18927429616451263, "learning_rate": 2.7625190793615205e-06, "loss": 0.17671269178390503, "step": 226520 }, { "epoch": 0.9725406352232039, "grad_norm": 1.8159795999526978, "learning_rate": 2.758207359243897e-06, "loss": 0.15529967546463014, "step": 226530 }, { "epoch": 0.9725835673132239, "grad_norm": 0.02586592175066471, "learning_rate": 2.753895639126273e-06, "loss": 0.15995392799377442, "step": 226540 }, { "epoch": 0.972626499403244, "grad_norm": 0.007591897621750832, "learning_rate": 2.7495839190086496e-06, "loss": 0.04184904992580414, "step": 226550 }, { "epoch": 0.9726694314932639, "grad_norm": 0.012123005464673042, "learning_rate": 2.7452721988910257e-06, "loss": 0.21659538745880128, "step": 226560 }, { "epoch": 0.972712363583284, "grad_norm": 1.6355416774749756, "learning_rate": 2.7409604787734017e-06, "loss": 0.35333163738250734, "step": 226570 }, { "epoch": 0.972755295673304, "grad_norm": 0.010789229534566402, "learning_rate": 2.7366487586557782e-06, "loss": 0.12032248973846435, "step": 226580 }, { "epoch": 0.9727982277633239, "grad_norm": 1.9976438283920288, "learning_rate": 2.7323370385381547e-06, "loss": 0.05439336895942688, "step": 226590 }, { "epoch": 0.972841159853344, "grad_norm": 0.013741575181484222, "learning_rate": 2.728025318420531e-06, "loss": 0.15803490877151488, "step": 226600 }, { "epoch": 0.972884091943364, "grad_norm": 0.2421882599592209, "learning_rate": 2.7237135983029073e-06, "loss": 0.2856144905090332, "step": 226610 }, { "epoch": 0.972927024033384, "grad_norm": 1.9955962896347046, "learning_rate": 2.7194018781852834e-06, "loss": 0.16014361381530762, "step": 226620 }, { "epoch": 0.972969956123404, "grad_norm": 0.28946375846862793, "learning_rate": 2.7150901580676594e-06, "loss": 0.2100440740585327, "step": 226630 }, { "epoch": 0.973012888213424, "grad_norm": 1.82895827293396, "learning_rate": 2.710778437950036e-06, "loss": 0.2709784746170044, "step": 226640 }, { "epoch": 0.9730558203034441, "grad_norm": 0.0039507439360022545, "learning_rate": 2.706466717832412e-06, "loss": 0.15208420753479004, "step": 226650 }, { "epoch": 0.973098752393464, "grad_norm": 8.616528511047363, "learning_rate": 2.7021549977147885e-06, "loss": 0.31345291137695314, "step": 226660 }, { "epoch": 0.973141684483484, "grad_norm": 0.6658341884613037, "learning_rate": 2.6978432775971646e-06, "loss": 0.21907491683959962, "step": 226670 }, { "epoch": 0.9731846165735041, "grad_norm": 0.02232818678021431, "learning_rate": 2.693531557479541e-06, "loss": 0.201385498046875, "step": 226680 }, { "epoch": 0.973227548663524, "grad_norm": 1.584527850151062, "learning_rate": 2.6892198373619176e-06, "loss": 0.4038440704345703, "step": 226690 }, { "epoch": 0.973270480753544, "grad_norm": 0.014107631519436836, "learning_rate": 2.6849081172442936e-06, "loss": 0.2761273145675659, "step": 226700 }, { "epoch": 0.9733134128435641, "grad_norm": 0.002957344288006425, "learning_rate": 2.6805963971266697e-06, "loss": 0.12850207090377808, "step": 226710 }, { "epoch": 0.973356344933584, "grad_norm": 0.0005787658737972379, "learning_rate": 2.676284677009046e-06, "loss": 0.14453613758087158, "step": 226720 }, { "epoch": 0.9733992770236041, "grad_norm": 4.067680835723877, "learning_rate": 2.6719729568914223e-06, "loss": 0.20334532260894775, "step": 226730 }, { "epoch": 0.9734422091136241, "grad_norm": 7.407779216766357, "learning_rate": 2.6676612367737988e-06, "loss": 0.35728869438171384, "step": 226740 }, { "epoch": 0.973485141203644, "grad_norm": 3.8809151649475098, "learning_rate": 2.663349516656175e-06, "loss": 0.23047690391540526, "step": 226750 }, { "epoch": 0.9735280732936641, "grad_norm": 0.03422745689749718, "learning_rate": 2.659037796538551e-06, "loss": 0.2741354703903198, "step": 226760 }, { "epoch": 0.9735710053836841, "grad_norm": 4.280041217803955, "learning_rate": 2.654726076420928e-06, "loss": 0.04992449879646301, "step": 226770 }, { "epoch": 0.9736139374737041, "grad_norm": 0.03290945664048195, "learning_rate": 2.650414356303304e-06, "loss": 0.07728307247161866, "step": 226780 }, { "epoch": 0.9736568695637241, "grad_norm": 0.003999393433332443, "learning_rate": 2.64610263618568e-06, "loss": 0.1892857313156128, "step": 226790 }, { "epoch": 0.9736998016537441, "grad_norm": 1.8522354364395142, "learning_rate": 2.6417909160680565e-06, "loss": 0.37737085819244387, "step": 226800 }, { "epoch": 0.9737427337437641, "grad_norm": 0.1001518964767456, "learning_rate": 2.6374791959504326e-06, "loss": 0.12852399349212645, "step": 226810 }, { "epoch": 0.9737856658337841, "grad_norm": 0.04786868020892143, "learning_rate": 2.633167475832809e-06, "loss": 0.09738991856575012, "step": 226820 }, { "epoch": 0.9738285979238042, "grad_norm": 0.3759821951389313, "learning_rate": 2.628855755715185e-06, "loss": 0.18861787319183348, "step": 226830 }, { "epoch": 0.9738715300138241, "grad_norm": 0.0015561191830784082, "learning_rate": 2.624544035597561e-06, "loss": 0.1046902894973755, "step": 226840 }, { "epoch": 0.9739144621038441, "grad_norm": 0.010539585724473, "learning_rate": 2.6202323154799377e-06, "loss": 0.14529746770858765, "step": 226850 }, { "epoch": 0.9739573941938642, "grad_norm": 0.002079649828374386, "learning_rate": 2.615920595362314e-06, "loss": 0.1505010724067688, "step": 226860 }, { "epoch": 0.9740003262838841, "grad_norm": 0.0004063397354912013, "learning_rate": 2.6116088752446903e-06, "loss": 0.07104050517082214, "step": 226870 }, { "epoch": 0.9740432583739042, "grad_norm": 0.649005651473999, "learning_rate": 2.6072971551270668e-06, "loss": 0.12537972927093505, "step": 226880 }, { "epoch": 0.9740861904639242, "grad_norm": 2.9869272708892822, "learning_rate": 2.602985435009443e-06, "loss": 0.4363424301147461, "step": 226890 }, { "epoch": 0.9741291225539441, "grad_norm": 0.004428323358297348, "learning_rate": 2.598673714891819e-06, "loss": 0.268056321144104, "step": 226900 }, { "epoch": 0.9741720546439642, "grad_norm": 0.025846082717180252, "learning_rate": 2.5943619947741954e-06, "loss": 0.17284902334213256, "step": 226910 }, { "epoch": 0.9742149867339842, "grad_norm": 0.07464879006147385, "learning_rate": 2.5900502746565715e-06, "loss": 0.35149922370910647, "step": 226920 }, { "epoch": 0.9742579188240041, "grad_norm": 0.003758594859391451, "learning_rate": 2.585738554538948e-06, "loss": 0.08602445125579834, "step": 226930 }, { "epoch": 0.9743008509140242, "grad_norm": 0.0022107944823801517, "learning_rate": 2.581426834421324e-06, "loss": 0.3697013854980469, "step": 226940 }, { "epoch": 0.9743437830040442, "grad_norm": 1.706981897354126, "learning_rate": 2.5771151143037e-06, "loss": 0.309721565246582, "step": 226950 }, { "epoch": 0.9743867150940642, "grad_norm": 1.509405255317688, "learning_rate": 2.572803394186077e-06, "loss": 0.12059307098388672, "step": 226960 }, { "epoch": 0.9744296471840842, "grad_norm": 0.014540722593665123, "learning_rate": 2.568491674068453e-06, "loss": 0.13089817762374878, "step": 226970 }, { "epoch": 0.9744725792741042, "grad_norm": 5.593906402587891, "learning_rate": 2.564179953950829e-06, "loss": 0.12888550758361816, "step": 226980 }, { "epoch": 0.9745155113641242, "grad_norm": 0.0023993600625544786, "learning_rate": 2.5598682338332057e-06, "loss": 0.14289275407791138, "step": 226990 }, { "epoch": 0.9745584434541442, "grad_norm": 0.0118110878393054, "learning_rate": 2.5555565137155817e-06, "loss": 0.43169412612915037, "step": 227000 }, { "epoch": 0.9745584434541442, "eval_loss": 0.36603018641471863, "eval_runtime": 27.3928, "eval_samples_per_second": 3.651, "eval_steps_per_second": 3.651, "step": 227000 }, { "epoch": 0.9746013755441643, "grad_norm": 0.03272734209895134, "learning_rate": 2.5512447935979582e-06, "loss": 0.18798859119415284, "step": 227010 }, { "epoch": 0.9746443076341842, "grad_norm": 0.07965968549251556, "learning_rate": 2.5469330734803343e-06, "loss": 0.1925390124320984, "step": 227020 }, { "epoch": 0.9746872397242042, "grad_norm": 6.077420234680176, "learning_rate": 2.5426213533627104e-06, "loss": 0.19045372009277345, "step": 227030 }, { "epoch": 0.9747301718142243, "grad_norm": 0.2418179214000702, "learning_rate": 2.538309633245087e-06, "loss": 0.1367825984954834, "step": 227040 }, { "epoch": 0.9747731039042443, "grad_norm": 0.0008199986768886447, "learning_rate": 2.5339979131274634e-06, "loss": 0.15820411443710328, "step": 227050 }, { "epoch": 0.9748160359942643, "grad_norm": 0.24902009963989258, "learning_rate": 2.5296861930098394e-06, "loss": 0.08510852456092835, "step": 227060 }, { "epoch": 0.9748589680842843, "grad_norm": 0.8076011538505554, "learning_rate": 2.525374472892216e-06, "loss": 0.16344135999679565, "step": 227070 }, { "epoch": 0.9749019001743043, "grad_norm": 0.015640171244740486, "learning_rate": 2.521062752774592e-06, "loss": 0.07457131147384644, "step": 227080 }, { "epoch": 0.9749448322643243, "grad_norm": 0.001444989233277738, "learning_rate": 2.516751032656968e-06, "loss": 0.1386529803276062, "step": 227090 }, { "epoch": 0.9749877643543443, "grad_norm": 0.039012517780065536, "learning_rate": 2.5124393125393446e-06, "loss": 0.002665388956665993, "step": 227100 }, { "epoch": 0.9750306964443644, "grad_norm": 0.15500108897686005, "learning_rate": 2.5081275924217207e-06, "loss": 0.33515994548797606, "step": 227110 }, { "epoch": 0.9750736285343843, "grad_norm": 0.0024809353053569794, "learning_rate": 2.503815872304097e-06, "loss": 0.06954295039176941, "step": 227120 }, { "epoch": 0.9751165606244043, "grad_norm": 0.0008949413313530385, "learning_rate": 2.4995041521864732e-06, "loss": 0.20227794647216796, "step": 227130 }, { "epoch": 0.9751594927144244, "grad_norm": 0.0061431932263076305, "learning_rate": 2.4951924320688497e-06, "loss": 0.07348456382751464, "step": 227140 }, { "epoch": 0.9752024248044443, "grad_norm": 0.039200812578201294, "learning_rate": 2.4908807119512262e-06, "loss": 0.1787712812423706, "step": 227150 }, { "epoch": 0.9752453568944643, "grad_norm": 0.2038622498512268, "learning_rate": 2.4865689918336023e-06, "loss": 0.3955040454864502, "step": 227160 }, { "epoch": 0.9752882889844844, "grad_norm": 0.0030347283463925123, "learning_rate": 2.4822572717159784e-06, "loss": 0.3216248989105225, "step": 227170 }, { "epoch": 0.9753312210745043, "grad_norm": 0.23773819208145142, "learning_rate": 2.477945551598355e-06, "loss": 0.0572589635848999, "step": 227180 }, { "epoch": 0.9753741531645244, "grad_norm": 4.1003546714782715, "learning_rate": 2.473633831480731e-06, "loss": 0.3647440433502197, "step": 227190 }, { "epoch": 0.9754170852545444, "grad_norm": 0.010763336904346943, "learning_rate": 2.4693221113631074e-06, "loss": 0.07364763617515564, "step": 227200 }, { "epoch": 0.9754600173445643, "grad_norm": 0.05231078341603279, "learning_rate": 2.4650103912454835e-06, "loss": 0.21562786102294923, "step": 227210 }, { "epoch": 0.9755029494345844, "grad_norm": 0.0015636775642633438, "learning_rate": 2.4606986711278596e-06, "loss": 0.1701973557472229, "step": 227220 }, { "epoch": 0.9755458815246044, "grad_norm": 0.0006244481191970408, "learning_rate": 2.4563869510102365e-06, "loss": 0.22180135250091554, "step": 227230 }, { "epoch": 0.9755888136146244, "grad_norm": 1.9431523084640503, "learning_rate": 2.4520752308926126e-06, "loss": 0.3102797269821167, "step": 227240 }, { "epoch": 0.9756317457046444, "grad_norm": 4.471474647521973, "learning_rate": 2.4477635107749886e-06, "loss": 0.47100114822387695, "step": 227250 }, { "epoch": 0.9756746777946644, "grad_norm": 0.007208989933133125, "learning_rate": 2.443451790657365e-06, "loss": 0.06431352496147155, "step": 227260 }, { "epoch": 0.9757176098846844, "grad_norm": 2.231614589691162, "learning_rate": 2.439140070539741e-06, "loss": 0.3845525741577148, "step": 227270 }, { "epoch": 0.9757605419747044, "grad_norm": 0.024662956595420837, "learning_rate": 2.4348283504221173e-06, "loss": 0.1768028736114502, "step": 227280 }, { "epoch": 0.9758034740647245, "grad_norm": 0.19310876727104187, "learning_rate": 2.4305166303044938e-06, "loss": 0.12158677577972413, "step": 227290 }, { "epoch": 0.9758464061547444, "grad_norm": 1.1528043746948242, "learning_rate": 2.42620491018687e-06, "loss": 0.1612181305885315, "step": 227300 }, { "epoch": 0.9758893382447644, "grad_norm": 4.803441047668457, "learning_rate": 2.4218931900692463e-06, "loss": 0.2455395221710205, "step": 227310 }, { "epoch": 0.9759322703347845, "grad_norm": 0.8248661160469055, "learning_rate": 2.417581469951623e-06, "loss": 0.1723708391189575, "step": 227320 }, { "epoch": 0.9759752024248044, "grad_norm": 1.202093243598938, "learning_rate": 2.413269749833999e-06, "loss": 0.15180646181106566, "step": 227330 }, { "epoch": 0.9760181345148244, "grad_norm": 3.625997304916382, "learning_rate": 2.4089580297163754e-06, "loss": 0.24789741039276122, "step": 227340 }, { "epoch": 0.9760610666048445, "grad_norm": 0.038008879870176315, "learning_rate": 2.4046463095987515e-06, "loss": 0.01338741034269333, "step": 227350 }, { "epoch": 0.9761039986948644, "grad_norm": 0.2671995759010315, "learning_rate": 2.4003345894811275e-06, "loss": 0.18389822244644166, "step": 227360 }, { "epoch": 0.9761469307848845, "grad_norm": 0.003803535597398877, "learning_rate": 2.396022869363504e-06, "loss": 0.0031682711094617845, "step": 227370 }, { "epoch": 0.9761898628749045, "grad_norm": 0.008440026082098484, "learning_rate": 2.39171114924588e-06, "loss": 0.207161021232605, "step": 227380 }, { "epoch": 0.9762327949649244, "grad_norm": 112.1073989868164, "learning_rate": 2.3873994291282566e-06, "loss": 0.3629873275756836, "step": 227390 }, { "epoch": 0.9762757270549445, "grad_norm": 0.013895579613745213, "learning_rate": 2.3830877090106327e-06, "loss": 0.09868028163909912, "step": 227400 }, { "epoch": 0.9763186591449645, "grad_norm": 0.8464810848236084, "learning_rate": 2.378775988893009e-06, "loss": 0.2205869197845459, "step": 227410 }, { "epoch": 0.9763615912349844, "grad_norm": 0.12771189212799072, "learning_rate": 2.3744642687753857e-06, "loss": 0.07797903418540955, "step": 227420 }, { "epoch": 0.9764045233250045, "grad_norm": 0.3783540427684784, "learning_rate": 2.3701525486577618e-06, "loss": 0.1723588228225708, "step": 227430 }, { "epoch": 0.9764474554150245, "grad_norm": 0.02006363309919834, "learning_rate": 2.365840828540138e-06, "loss": 0.2192901849746704, "step": 227440 }, { "epoch": 0.9764903875050445, "grad_norm": 0.024119436740875244, "learning_rate": 2.3615291084225143e-06, "loss": 0.14305353164672852, "step": 227450 }, { "epoch": 0.9765333195950645, "grad_norm": 7.444068908691406, "learning_rate": 2.3572173883048904e-06, "loss": 0.31871623992919923, "step": 227460 }, { "epoch": 0.9765762516850846, "grad_norm": 0.6991757154464722, "learning_rate": 2.3529056681872665e-06, "loss": 0.10647947788238525, "step": 227470 }, { "epoch": 0.9766191837751046, "grad_norm": 1.1979470252990723, "learning_rate": 2.348593948069643e-06, "loss": 0.17139954566955568, "step": 227480 }, { "epoch": 0.9766621158651245, "grad_norm": 0.047781504690647125, "learning_rate": 2.344282227952019e-06, "loss": 0.24302124977111816, "step": 227490 }, { "epoch": 0.9767050479551446, "grad_norm": 43.781436920166016, "learning_rate": 2.3399705078343955e-06, "loss": 0.09933934807777405, "step": 227500 }, { "epoch": 0.9767479800451646, "grad_norm": 0.06575685739517212, "learning_rate": 2.335658787716772e-06, "loss": 0.3872169017791748, "step": 227510 }, { "epoch": 0.9767909121351845, "grad_norm": 1.0957386493682861, "learning_rate": 2.331347067599148e-06, "loss": 0.17452040910720826, "step": 227520 }, { "epoch": 0.9768338442252046, "grad_norm": 1.1414989233016968, "learning_rate": 2.3270353474815246e-06, "loss": 0.32037715911865233, "step": 227530 }, { "epoch": 0.9768767763152246, "grad_norm": 0.021362558007240295, "learning_rate": 2.3227236273639007e-06, "loss": 0.15376040935516358, "step": 227540 }, { "epoch": 0.9769197084052446, "grad_norm": 3.3690531253814697, "learning_rate": 2.3184119072462767e-06, "loss": 0.35323572158813477, "step": 227550 }, { "epoch": 0.9769626404952646, "grad_norm": 0.03152088448405266, "learning_rate": 2.3141001871286532e-06, "loss": 0.15891146659851074, "step": 227560 }, { "epoch": 0.9770055725852846, "grad_norm": 0.018579134717583656, "learning_rate": 2.3097884670110293e-06, "loss": 0.06911247372627258, "step": 227570 }, { "epoch": 0.9770485046753046, "grad_norm": 0.0024237283505499363, "learning_rate": 2.305476746893406e-06, "loss": 0.02136930972337723, "step": 227580 }, { "epoch": 0.9770914367653246, "grad_norm": 4.892970085144043, "learning_rate": 2.3011650267757823e-06, "loss": 0.24032959938049317, "step": 227590 }, { "epoch": 0.9771343688553447, "grad_norm": 0.0077386279590427876, "learning_rate": 2.2968533066581584e-06, "loss": 0.12323408126831055, "step": 227600 }, { "epoch": 0.9771773009453646, "grad_norm": 4.715496063232422, "learning_rate": 2.292541586540535e-06, "loss": 0.2505172252655029, "step": 227610 }, { "epoch": 0.9772202330353846, "grad_norm": 0.002098772209137678, "learning_rate": 2.288229866422911e-06, "loss": 0.08634217381477356, "step": 227620 }, { "epoch": 0.9772631651254047, "grad_norm": 0.9451601505279541, "learning_rate": 2.283918146305287e-06, "loss": 0.23014225959777831, "step": 227630 }, { "epoch": 0.9773060972154246, "grad_norm": 0.08155321329832077, "learning_rate": 2.2796064261876635e-06, "loss": 0.15814539194107055, "step": 227640 }, { "epoch": 0.9773490293054446, "grad_norm": 1.6224653720855713, "learning_rate": 2.2752947060700396e-06, "loss": 0.2659270763397217, "step": 227650 }, { "epoch": 0.9773919613954647, "grad_norm": 0.10807585716247559, "learning_rate": 2.2709829859524157e-06, "loss": 0.17804590463638306, "step": 227660 }, { "epoch": 0.9774348934854846, "grad_norm": 0.5821875929832458, "learning_rate": 2.266671265834792e-06, "loss": 0.368826699256897, "step": 227670 }, { "epoch": 0.9774778255755047, "grad_norm": 0.0009552693809382617, "learning_rate": 2.2623595457171686e-06, "loss": 0.22152063846588135, "step": 227680 }, { "epoch": 0.9775207576655247, "grad_norm": 0.00434734346345067, "learning_rate": 2.2580478255995447e-06, "loss": 0.22306718826293945, "step": 227690 }, { "epoch": 0.9775636897555446, "grad_norm": 6.497489929199219, "learning_rate": 2.253736105481921e-06, "loss": 0.2369317054748535, "step": 227700 }, { "epoch": 0.9776066218455647, "grad_norm": 0.027641694992780685, "learning_rate": 2.2494243853642973e-06, "loss": 0.22835359573364258, "step": 227710 }, { "epoch": 0.9776495539355847, "grad_norm": 0.12441679835319519, "learning_rate": 2.2451126652466738e-06, "loss": 0.06525906324386596, "step": 227720 }, { "epoch": 0.9776924860256047, "grad_norm": 1.1552482843399048, "learning_rate": 2.24080094512905e-06, "loss": 0.24290194511413574, "step": 227730 }, { "epoch": 0.9777354181156247, "grad_norm": 0.45403793454170227, "learning_rate": 2.236489225011426e-06, "loss": 0.17993674278259278, "step": 227740 }, { "epoch": 0.9777783502056447, "grad_norm": 2.896038293838501, "learning_rate": 2.2321775048938024e-06, "loss": 0.2308863639831543, "step": 227750 }, { "epoch": 0.9778212822956647, "grad_norm": 0.5456775426864624, "learning_rate": 2.2278657847761785e-06, "loss": 0.20217106342315674, "step": 227760 }, { "epoch": 0.9778642143856847, "grad_norm": 3.1207029819488525, "learning_rate": 2.223554064658555e-06, "loss": 0.33245410919189455, "step": 227770 }, { "epoch": 0.9779071464757048, "grad_norm": 2.0536296367645264, "learning_rate": 2.2192423445409315e-06, "loss": 0.14470397233963012, "step": 227780 }, { "epoch": 0.9779500785657247, "grad_norm": 0.0007920139469206333, "learning_rate": 2.2149306244233076e-06, "loss": 0.20721204280853273, "step": 227790 }, { "epoch": 0.9779930106557447, "grad_norm": 0.09720521420240402, "learning_rate": 2.210618904305684e-06, "loss": 0.14968944787979127, "step": 227800 }, { "epoch": 0.9780359427457648, "grad_norm": 0.5558373332023621, "learning_rate": 2.20630718418806e-06, "loss": 0.08571412563323974, "step": 227810 }, { "epoch": 0.9780788748357847, "grad_norm": 0.005012707784771919, "learning_rate": 2.201995464070436e-06, "loss": 0.23334116935729982, "step": 227820 }, { "epoch": 0.9781218069258047, "grad_norm": 0.002742292359471321, "learning_rate": 2.1976837439528127e-06, "loss": 0.3009469032287598, "step": 227830 }, { "epoch": 0.9781647390158248, "grad_norm": 0.18742695450782776, "learning_rate": 2.1933720238351888e-06, "loss": 0.1338501214981079, "step": 227840 }, { "epoch": 0.9782076711058447, "grad_norm": 0.001777024706825614, "learning_rate": 2.1890603037175653e-06, "loss": 0.2050083875656128, "step": 227850 }, { "epoch": 0.9782506031958648, "grad_norm": 1.8684906959533691, "learning_rate": 2.1847485835999413e-06, "loss": 0.44011287689208983, "step": 227860 }, { "epoch": 0.9782935352858848, "grad_norm": 0.13816608488559723, "learning_rate": 2.180436863482318e-06, "loss": 0.023053205013275145, "step": 227870 }, { "epoch": 0.9783364673759047, "grad_norm": 0.063438281416893, "learning_rate": 2.176125143364694e-06, "loss": 0.32267377376556394, "step": 227880 }, { "epoch": 0.9783793994659248, "grad_norm": 0.0014309908729046583, "learning_rate": 2.1718134232470704e-06, "loss": 0.10749776363372802, "step": 227890 }, { "epoch": 0.9784223315559448, "grad_norm": 0.02877630479633808, "learning_rate": 2.1675017031294465e-06, "loss": 0.2096189498901367, "step": 227900 }, { "epoch": 0.9784652636459649, "grad_norm": 0.0027715996839106083, "learning_rate": 2.163189983011823e-06, "loss": 0.17635757923126222, "step": 227910 }, { "epoch": 0.9785081957359848, "grad_norm": 0.06941622495651245, "learning_rate": 2.158878262894199e-06, "loss": 0.006739767640829087, "step": 227920 }, { "epoch": 0.9785511278260048, "grad_norm": 0.0025414933916181326, "learning_rate": 2.154566542776575e-06, "loss": 0.002655784413218498, "step": 227930 }, { "epoch": 0.9785940599160249, "grad_norm": 3.310178518295288, "learning_rate": 2.1502548226589516e-06, "loss": 0.30617356300354004, "step": 227940 }, { "epoch": 0.9786369920060448, "grad_norm": 1.4081175327301025, "learning_rate": 2.1459431025413277e-06, "loss": 0.2300107955932617, "step": 227950 }, { "epoch": 0.9786799240960649, "grad_norm": 0.009740284644067287, "learning_rate": 2.141631382423704e-06, "loss": 0.07422532439231873, "step": 227960 }, { "epoch": 0.9787228561860849, "grad_norm": 1.0734556913375854, "learning_rate": 2.1373196623060807e-06, "loss": 0.21981630325317383, "step": 227970 }, { "epoch": 0.9787657882761048, "grad_norm": 0.22914394736289978, "learning_rate": 2.1330079421884567e-06, "loss": 0.11649729013442993, "step": 227980 }, { "epoch": 0.9788087203661249, "grad_norm": 0.034725673496723175, "learning_rate": 2.1286962220708332e-06, "loss": 0.2758263349533081, "step": 227990 }, { "epoch": 0.9788516524561449, "grad_norm": 0.03354437276721001, "learning_rate": 2.1243845019532093e-06, "loss": 0.18928390741348267, "step": 228000 }, { "epoch": 0.9788516524561449, "eval_loss": 0.3664787709712982, "eval_runtime": 27.507, "eval_samples_per_second": 3.635, "eval_steps_per_second": 3.635, "step": 228000 }, { "epoch": 0.9788945845461648, "grad_norm": 0.3679724335670471, "learning_rate": 2.1200727818355854e-06, "loss": 0.09740736484527587, "step": 228010 }, { "epoch": 0.9789375166361849, "grad_norm": 0.17959082126617432, "learning_rate": 2.115761061717962e-06, "loss": 0.11371631622314453, "step": 228020 }, { "epoch": 0.9789804487262049, "grad_norm": 0.14090149104595184, "learning_rate": 2.111449341600338e-06, "loss": 0.1154598593711853, "step": 228030 }, { "epoch": 0.9790233808162249, "grad_norm": 0.14479191601276398, "learning_rate": 2.1071376214827144e-06, "loss": 0.2554008960723877, "step": 228040 }, { "epoch": 0.9790663129062449, "grad_norm": 0.08767344802618027, "learning_rate": 2.102825901365091e-06, "loss": 0.17707765102386475, "step": 228050 }, { "epoch": 0.979109244996265, "grad_norm": 0.00018606445519253612, "learning_rate": 2.098514181247467e-06, "loss": 0.0368975430727005, "step": 228060 }, { "epoch": 0.9791521770862849, "grad_norm": 0.026990529149770737, "learning_rate": 2.0942024611298435e-06, "loss": 0.16851993799209594, "step": 228070 }, { "epoch": 0.9791951091763049, "grad_norm": 0.02671569027006626, "learning_rate": 2.0898907410122196e-06, "loss": 0.19040234088897706, "step": 228080 }, { "epoch": 0.979238041266325, "grad_norm": 0.0703616812825203, "learning_rate": 2.0855790208945957e-06, "loss": 0.14678169488906861, "step": 228090 }, { "epoch": 0.9792809733563449, "grad_norm": 0.011678442358970642, "learning_rate": 2.081267300776972e-06, "loss": 0.14489219188690186, "step": 228100 }, { "epoch": 0.9793239054463649, "grad_norm": 0.03233587369322777, "learning_rate": 2.0769555806593482e-06, "loss": 0.0989188551902771, "step": 228110 }, { "epoch": 0.979366837536385, "grad_norm": 0.010937056504189968, "learning_rate": 2.0726438605417243e-06, "loss": 0.2196566343307495, "step": 228120 }, { "epoch": 0.9794097696264049, "grad_norm": 0.43799400329589844, "learning_rate": 2.068332140424101e-06, "loss": 0.43077888488769533, "step": 228130 }, { "epoch": 0.979452701716425, "grad_norm": 0.2214488834142685, "learning_rate": 2.0640204203064773e-06, "loss": 0.22788968086242675, "step": 228140 }, { "epoch": 0.979495633806445, "grad_norm": 0.004069837741553783, "learning_rate": 2.0597087001888534e-06, "loss": 0.12345916032791138, "step": 228150 }, { "epoch": 0.9795385658964649, "grad_norm": 0.025346960872411728, "learning_rate": 2.05539698007123e-06, "loss": 0.2612889051437378, "step": 228160 }, { "epoch": 0.979581497986485, "grad_norm": 0.016129495576024055, "learning_rate": 2.051085259953606e-06, "loss": 0.3327603816986084, "step": 228170 }, { "epoch": 0.979624430076505, "grad_norm": 0.668687105178833, "learning_rate": 2.0467735398359824e-06, "loss": 0.2924813270568848, "step": 228180 }, { "epoch": 0.979667362166525, "grad_norm": 3.980806827545166, "learning_rate": 2.0424618197183585e-06, "loss": 0.27712819576263426, "step": 228190 }, { "epoch": 0.979710294256545, "grad_norm": 4.9260783195495605, "learning_rate": 2.0381500996007346e-06, "loss": 0.18192075490951537, "step": 228200 }, { "epoch": 0.979753226346565, "grad_norm": 5.185247898101807, "learning_rate": 2.033838379483111e-06, "loss": 0.22626786231994628, "step": 228210 }, { "epoch": 0.979796158436585, "grad_norm": 0.0023593876976519823, "learning_rate": 2.029526659365487e-06, "loss": 0.22910573482513427, "step": 228220 }, { "epoch": 0.979839090526605, "grad_norm": 0.22327831387519836, "learning_rate": 2.0252149392478636e-06, "loss": 0.16981350183486937, "step": 228230 }, { "epoch": 0.979882022616625, "grad_norm": 0.478909432888031, "learning_rate": 2.02090321913024e-06, "loss": 0.12508680820465087, "step": 228240 }, { "epoch": 0.979924954706645, "grad_norm": 0.007253910880535841, "learning_rate": 2.016591499012616e-06, "loss": 0.34746356010437013, "step": 228250 }, { "epoch": 0.979967886796665, "grad_norm": 0.684379518032074, "learning_rate": 2.0122797788949927e-06, "loss": 0.25901849269866944, "step": 228260 }, { "epoch": 0.9800108188866851, "grad_norm": 0.6113347411155701, "learning_rate": 2.0079680587773688e-06, "loss": 0.04099811613559723, "step": 228270 }, { "epoch": 0.980053750976705, "grad_norm": 2.26021409034729, "learning_rate": 2.003656338659745e-06, "loss": 0.1390989303588867, "step": 228280 }, { "epoch": 0.980096683066725, "grad_norm": 0.06597410887479782, "learning_rate": 1.9993446185421213e-06, "loss": 0.04822684526443481, "step": 228290 }, { "epoch": 0.9801396151567451, "grad_norm": 0.3486667573451996, "learning_rate": 1.9950328984244974e-06, "loss": 0.27595837116241456, "step": 228300 }, { "epoch": 0.980182547246765, "grad_norm": 0.01786203868687153, "learning_rate": 1.9907211783068735e-06, "loss": 0.1529044032096863, "step": 228310 }, { "epoch": 0.980225479336785, "grad_norm": 2.636488437652588, "learning_rate": 1.9864094581892504e-06, "loss": 0.15176565647125245, "step": 228320 }, { "epoch": 0.9802684114268051, "grad_norm": 0.136933371424675, "learning_rate": 1.9820977380716265e-06, "loss": 0.23685121536254883, "step": 228330 }, { "epoch": 0.9803113435168251, "grad_norm": 0.007709937170147896, "learning_rate": 1.9777860179540025e-06, "loss": 0.11736712455749512, "step": 228340 }, { "epoch": 0.9803542756068451, "grad_norm": 0.028216032311320305, "learning_rate": 1.973474297836379e-06, "loss": 0.2329657793045044, "step": 228350 }, { "epoch": 0.9803972076968651, "grad_norm": 0.03977706655859947, "learning_rate": 1.969162577718755e-06, "loss": 0.24770543575286866, "step": 228360 }, { "epoch": 0.9804401397868852, "grad_norm": 0.0018368628807365894, "learning_rate": 1.9648508576011316e-06, "loss": 0.1355770230293274, "step": 228370 }, { "epoch": 0.9804830718769051, "grad_norm": 1.5883880853652954, "learning_rate": 1.9605391374835077e-06, "loss": 0.3594334840774536, "step": 228380 }, { "epoch": 0.9805260039669251, "grad_norm": 0.007805908564478159, "learning_rate": 1.9562274173658838e-06, "loss": 0.3174657583236694, "step": 228390 }, { "epoch": 0.9805689360569452, "grad_norm": 0.01015984546393156, "learning_rate": 1.9519156972482603e-06, "loss": 0.08694056272506714, "step": 228400 }, { "epoch": 0.9806118681469651, "grad_norm": 1.0115506649017334, "learning_rate": 1.9476039771306367e-06, "loss": 0.08374444246292115, "step": 228410 }, { "epoch": 0.9806548002369851, "grad_norm": 0.0012809137115254998, "learning_rate": 1.943292257013013e-06, "loss": 0.2278204917907715, "step": 228420 }, { "epoch": 0.9806977323270052, "grad_norm": 0.02701873891055584, "learning_rate": 1.9389805368953893e-06, "loss": 0.21419262886047363, "step": 228430 }, { "epoch": 0.9807406644170251, "grad_norm": 0.000725551275536418, "learning_rate": 1.9346688167777654e-06, "loss": 0.38900730609893797, "step": 228440 }, { "epoch": 0.9807835965070452, "grad_norm": 0.09149477630853653, "learning_rate": 1.930357096660142e-06, "loss": 0.13757355213165284, "step": 228450 }, { "epoch": 0.9808265285970652, "grad_norm": 0.22916379570960999, "learning_rate": 1.926045376542518e-06, "loss": 0.18153246641159057, "step": 228460 }, { "epoch": 0.9808694606870851, "grad_norm": 0.0007235017255879939, "learning_rate": 1.921733656424894e-06, "loss": 0.12875158786773683, "step": 228470 }, { "epoch": 0.9809123927771052, "grad_norm": 0.01834966614842415, "learning_rate": 1.9174219363072705e-06, "loss": 0.1255298972129822, "step": 228480 }, { "epoch": 0.9809553248671252, "grad_norm": 0.016327425837516785, "learning_rate": 1.9131102161896466e-06, "loss": 0.17698729038238525, "step": 228490 }, { "epoch": 0.9809982569571452, "grad_norm": 0.001244712620973587, "learning_rate": 1.908798496072023e-06, "loss": 0.14560694694519044, "step": 228500 }, { "epoch": 0.9810411890471652, "grad_norm": 0.011022629216313362, "learning_rate": 1.9044867759543994e-06, "loss": 0.4523012638092041, "step": 228510 }, { "epoch": 0.9810841211371852, "grad_norm": 2.6876344680786133, "learning_rate": 1.9001750558367757e-06, "loss": 0.14019935131072997, "step": 228520 }, { "epoch": 0.9811270532272052, "grad_norm": 0.038009800016880035, "learning_rate": 1.895863335719152e-06, "loss": 0.22825298309326172, "step": 228530 }, { "epoch": 0.9811699853172252, "grad_norm": 0.0013615781208500266, "learning_rate": 1.8915516156015282e-06, "loss": 0.29034783840179446, "step": 228540 }, { "epoch": 0.9812129174072453, "grad_norm": 1.2209573984146118, "learning_rate": 1.8872398954839045e-06, "loss": 0.12078677415847779, "step": 228550 }, { "epoch": 0.9812558494972652, "grad_norm": 4.229518890380859, "learning_rate": 1.8829281753662806e-06, "loss": 0.276714563369751, "step": 228560 }, { "epoch": 0.9812987815872852, "grad_norm": 0.014234269969165325, "learning_rate": 1.8786164552486569e-06, "loss": 0.1266363263130188, "step": 228570 }, { "epoch": 0.9813417136773053, "grad_norm": 0.09993533790111542, "learning_rate": 1.8743047351310332e-06, "loss": 0.23038718700408936, "step": 228580 }, { "epoch": 0.9813846457673252, "grad_norm": 19.95162582397461, "learning_rate": 1.8699930150134097e-06, "loss": 0.045977193117141726, "step": 228590 }, { "epoch": 0.9814275778573452, "grad_norm": 66.88458251953125, "learning_rate": 1.865681294895786e-06, "loss": 0.17263211011886598, "step": 228600 }, { "epoch": 0.9814705099473653, "grad_norm": 0.0014390175929293036, "learning_rate": 1.8613695747781622e-06, "loss": 0.004979272186756134, "step": 228610 }, { "epoch": 0.9815134420373852, "grad_norm": 0.035006795078516006, "learning_rate": 1.8570578546605385e-06, "loss": 0.08537711501121521, "step": 228620 }, { "epoch": 0.9815563741274053, "grad_norm": 1.645871639251709, "learning_rate": 1.8527461345429146e-06, "loss": 0.09488987922668457, "step": 228630 }, { "epoch": 0.9815993062174253, "grad_norm": 0.05962035059928894, "learning_rate": 1.8484344144252909e-06, "loss": 0.3014033794403076, "step": 228640 }, { "epoch": 0.9816422383074452, "grad_norm": 6.166053295135498, "learning_rate": 1.8441226943076671e-06, "loss": 0.152974534034729, "step": 228650 }, { "epoch": 0.9816851703974653, "grad_norm": 4.5735392570495605, "learning_rate": 1.8398109741900434e-06, "loss": 0.43787474632263185, "step": 228660 }, { "epoch": 0.9817281024874853, "grad_norm": 1.1520566940307617, "learning_rate": 1.8354992540724195e-06, "loss": 0.27316927909851074, "step": 228670 }, { "epoch": 0.9817710345775053, "grad_norm": 0.025254230946302414, "learning_rate": 1.8311875339547962e-06, "loss": 0.08152788877487183, "step": 228680 }, { "epoch": 0.9818139666675253, "grad_norm": 0.04058744013309479, "learning_rate": 1.8268758138371725e-06, "loss": 0.022492873668670654, "step": 228690 }, { "epoch": 0.9818568987575453, "grad_norm": 0.07055263966321945, "learning_rate": 1.8225640937195486e-06, "loss": 0.1144339919090271, "step": 228700 }, { "epoch": 0.9818998308475653, "grad_norm": 0.009441941976547241, "learning_rate": 1.8182523736019249e-06, "loss": 0.22424943447113038, "step": 228710 }, { "epoch": 0.9819427629375853, "grad_norm": 0.001161554828286171, "learning_rate": 1.8139406534843011e-06, "loss": 0.21555452346801757, "step": 228720 }, { "epoch": 0.9819856950276054, "grad_norm": 0.0002816063060890883, "learning_rate": 1.8096289333666774e-06, "loss": 0.18701348304748536, "step": 228730 }, { "epoch": 0.9820286271176253, "grad_norm": 0.5413615703582764, "learning_rate": 1.8053172132490537e-06, "loss": 0.15520825386047363, "step": 228740 }, { "epoch": 0.9820715592076453, "grad_norm": 0.20203712582588196, "learning_rate": 1.8010054931314298e-06, "loss": 0.1745295763015747, "step": 228750 }, { "epoch": 0.9821144912976654, "grad_norm": 0.0021156531292945147, "learning_rate": 1.796693773013806e-06, "loss": 0.055399179458618164, "step": 228760 }, { "epoch": 0.9821574233876854, "grad_norm": 0.01750045455992222, "learning_rate": 1.7923820528961828e-06, "loss": 0.18583918809890748, "step": 228770 }, { "epoch": 0.9822003554777053, "grad_norm": 0.007187838666141033, "learning_rate": 1.7880703327785588e-06, "loss": 0.060408055782318115, "step": 228780 }, { "epoch": 0.9822432875677254, "grad_norm": 0.003980322275310755, "learning_rate": 1.7837586126609351e-06, "loss": 0.1356638789176941, "step": 228790 }, { "epoch": 0.9822862196577454, "grad_norm": 1.3486459255218506, "learning_rate": 1.7794468925433114e-06, "loss": 0.15132099390029907, "step": 228800 }, { "epoch": 0.9823291517477654, "grad_norm": 0.3155653178691864, "learning_rate": 1.7751351724256877e-06, "loss": 0.1574857473373413, "step": 228810 }, { "epoch": 0.9823720838377854, "grad_norm": 0.0023163023870438337, "learning_rate": 1.7708234523080638e-06, "loss": 0.24329051971435547, "step": 228820 }, { "epoch": 0.9824150159278054, "grad_norm": 0.0015276200138032436, "learning_rate": 1.76651173219044e-06, "loss": 0.3124807357788086, "step": 228830 }, { "epoch": 0.9824579480178254, "grad_norm": 0.006269668694585562, "learning_rate": 1.7622000120728163e-06, "loss": 0.1125043511390686, "step": 228840 }, { "epoch": 0.9825008801078454, "grad_norm": 0.7968956232070923, "learning_rate": 1.7578882919551926e-06, "loss": 0.14964487552642822, "step": 228850 }, { "epoch": 0.9825438121978655, "grad_norm": 0.000462773023173213, "learning_rate": 1.7535765718375687e-06, "loss": 0.08822548389434814, "step": 228860 }, { "epoch": 0.9825867442878854, "grad_norm": 0.04455536976456642, "learning_rate": 1.7492648517199454e-06, "loss": 0.21988744735717775, "step": 228870 }, { "epoch": 0.9826296763779054, "grad_norm": 0.14884206652641296, "learning_rate": 1.7449531316023217e-06, "loss": 0.06686034798622131, "step": 228880 }, { "epoch": 0.9826726084679255, "grad_norm": 0.18106015026569366, "learning_rate": 1.7406414114846978e-06, "loss": 0.0585746705532074, "step": 228890 }, { "epoch": 0.9827155405579454, "grad_norm": 1.4197570085525513, "learning_rate": 1.736329691367074e-06, "loss": 0.045840752124786374, "step": 228900 }, { "epoch": 0.9827584726479655, "grad_norm": 1.01323401927948, "learning_rate": 1.7320179712494503e-06, "loss": 0.13620872497558595, "step": 228910 }, { "epoch": 0.9828014047379855, "grad_norm": 1.0370769500732422, "learning_rate": 1.7277062511318266e-06, "loss": 0.32361865043640137, "step": 228920 }, { "epoch": 0.9828443368280054, "grad_norm": 0.024156205356121063, "learning_rate": 1.7233945310142029e-06, "loss": 0.13605793714523315, "step": 228930 }, { "epoch": 0.9828872689180255, "grad_norm": 0.034660086035728455, "learning_rate": 1.719082810896579e-06, "loss": 0.27935500144958497, "step": 228940 }, { "epoch": 0.9829302010080455, "grad_norm": 3.552581548690796, "learning_rate": 1.7147710907789552e-06, "loss": 0.2805205821990967, "step": 228950 }, { "epoch": 0.9829731330980654, "grad_norm": 0.0014244000194594264, "learning_rate": 1.710459370661332e-06, "loss": 0.1224864363670349, "step": 228960 }, { "epoch": 0.9830160651880855, "grad_norm": 0.1946384459733963, "learning_rate": 1.706147650543708e-06, "loss": 0.24968397617340088, "step": 228970 }, { "epoch": 0.9830589972781055, "grad_norm": 0.009285945445299149, "learning_rate": 1.7018359304260843e-06, "loss": 0.023589310050010682, "step": 228980 }, { "epoch": 0.9831019293681255, "grad_norm": 0.004341872408986092, "learning_rate": 1.6975242103084606e-06, "loss": 0.08136585950851441, "step": 228990 }, { "epoch": 0.9831448614581455, "grad_norm": 0.016660042107105255, "learning_rate": 1.6932124901908369e-06, "loss": 0.2626636266708374, "step": 229000 }, { "epoch": 0.9831448614581455, "eval_loss": 0.36609160900115967, "eval_runtime": 27.3907, "eval_samples_per_second": 3.651, "eval_steps_per_second": 3.651, "step": 229000 }, { "epoch": 0.9831877935481655, "grad_norm": 2.750297784805298, "learning_rate": 1.688900770073213e-06, "loss": 0.2714789152145386, "step": 229010 }, { "epoch": 0.9832307256381855, "grad_norm": 1.1807098388671875, "learning_rate": 1.6845890499555892e-06, "loss": 0.09504563212394715, "step": 229020 }, { "epoch": 0.9832736577282055, "grad_norm": 0.00848210509866476, "learning_rate": 1.6802773298379655e-06, "loss": 0.20887374877929688, "step": 229030 }, { "epoch": 0.9833165898182256, "grad_norm": 0.053657423704862595, "learning_rate": 1.6759656097203418e-06, "loss": 0.31155006885528563, "step": 229040 }, { "epoch": 0.9833595219082455, "grad_norm": 0.0011517629027366638, "learning_rate": 1.6716538896027183e-06, "loss": 0.4373622894287109, "step": 229050 }, { "epoch": 0.9834024539982655, "grad_norm": 0.021499576047062874, "learning_rate": 1.6673421694850946e-06, "loss": 0.28340489864349366, "step": 229060 }, { "epoch": 0.9834453860882856, "grad_norm": 1.2539126873016357, "learning_rate": 1.6630304493674709e-06, "loss": 0.29106993675231935, "step": 229070 }, { "epoch": 0.9834883181783055, "grad_norm": 3.4739508628845215, "learning_rate": 1.658718729249847e-06, "loss": 0.0644143283367157, "step": 229080 }, { "epoch": 0.9835312502683256, "grad_norm": 3.182511806488037, "learning_rate": 1.6544070091322232e-06, "loss": 0.3640718936920166, "step": 229090 }, { "epoch": 0.9835741823583456, "grad_norm": 1.5243083238601685, "learning_rate": 1.6500952890145995e-06, "loss": 0.20260207653045653, "step": 229100 }, { "epoch": 0.9836171144483655, "grad_norm": 0.06290149688720703, "learning_rate": 1.6457835688969758e-06, "loss": 0.32620935440063475, "step": 229110 }, { "epoch": 0.9836600465383856, "grad_norm": 0.03638211637735367, "learning_rate": 1.641471848779352e-06, "loss": 0.06037415862083435, "step": 229120 }, { "epoch": 0.9837029786284056, "grad_norm": 0.030279604718089104, "learning_rate": 1.6371601286617281e-06, "loss": 0.17438408136367797, "step": 229130 }, { "epoch": 0.9837459107184255, "grad_norm": 0.0020007449202239513, "learning_rate": 1.6328484085441049e-06, "loss": 0.220800518989563, "step": 229140 }, { "epoch": 0.9837888428084456, "grad_norm": 0.115732841193676, "learning_rate": 1.6285366884264811e-06, "loss": 0.22637467384338378, "step": 229150 }, { "epoch": 0.9838317748984656, "grad_norm": 0.05772421509027481, "learning_rate": 1.6242249683088572e-06, "loss": 0.13500807285308838, "step": 229160 }, { "epoch": 0.9838747069884856, "grad_norm": 0.0166893620043993, "learning_rate": 1.6199132481912335e-06, "loss": 0.1712018847465515, "step": 229170 }, { "epoch": 0.9839176390785056, "grad_norm": 0.01135605201125145, "learning_rate": 1.6156015280736098e-06, "loss": 0.06851279139518737, "step": 229180 }, { "epoch": 0.9839605711685256, "grad_norm": 0.06863057613372803, "learning_rate": 1.611289807955986e-06, "loss": 0.10661604404449462, "step": 229190 }, { "epoch": 0.9840035032585457, "grad_norm": 3.3983232975006104, "learning_rate": 1.6069780878383621e-06, "loss": 0.21318387985229492, "step": 229200 }, { "epoch": 0.9840464353485656, "grad_norm": 0.024844232946634293, "learning_rate": 1.6026663677207384e-06, "loss": 0.1229941725730896, "step": 229210 }, { "epoch": 0.9840893674385857, "grad_norm": 2.1055259704589844, "learning_rate": 1.5983546476031147e-06, "loss": 0.276519250869751, "step": 229220 }, { "epoch": 0.9841322995286057, "grad_norm": 0.04002037271857262, "learning_rate": 1.5940429274854912e-06, "loss": 0.17145614624023436, "step": 229230 }, { "epoch": 0.9841752316186256, "grad_norm": 0.00037594526656903327, "learning_rate": 1.5897312073678675e-06, "loss": 0.22023613452911378, "step": 229240 }, { "epoch": 0.9842181637086457, "grad_norm": 1.376724362373352, "learning_rate": 1.5854194872502438e-06, "loss": 0.19156415462493898, "step": 229250 }, { "epoch": 0.9842610957986657, "grad_norm": 0.0014392342418432236, "learning_rate": 1.58110776713262e-06, "loss": 0.21174421310424804, "step": 229260 }, { "epoch": 0.9843040278886857, "grad_norm": 0.0014310575788840652, "learning_rate": 1.5767960470149963e-06, "loss": 0.021212969720363618, "step": 229270 }, { "epoch": 0.9843469599787057, "grad_norm": 0.0008379554492421448, "learning_rate": 1.5724843268973724e-06, "loss": 0.16715192794799805, "step": 229280 }, { "epoch": 0.9843898920687257, "grad_norm": 38.737857818603516, "learning_rate": 1.5681726067797487e-06, "loss": 0.12215352058410645, "step": 229290 }, { "epoch": 0.9844328241587457, "grad_norm": 2.1133065223693848, "learning_rate": 1.563860886662125e-06, "loss": 0.20123109817504883, "step": 229300 }, { "epoch": 0.9844757562487657, "grad_norm": 0.1494905948638916, "learning_rate": 1.5595491665445013e-06, "loss": 0.11521637439727783, "step": 229310 }, { "epoch": 0.9845186883387858, "grad_norm": 0.08978375047445297, "learning_rate": 1.5552374464268775e-06, "loss": 0.17968294620513917, "step": 229320 }, { "epoch": 0.9845616204288057, "grad_norm": 0.14778830111026764, "learning_rate": 1.5509257263092538e-06, "loss": 0.09077887535095215, "step": 229330 }, { "epoch": 0.9846045525188257, "grad_norm": 1.6615095138549805, "learning_rate": 1.5466140061916303e-06, "loss": 0.10497822761535644, "step": 229340 }, { "epoch": 0.9846474846088458, "grad_norm": 0.014423206448554993, "learning_rate": 1.5423022860740064e-06, "loss": 0.005706658959388733, "step": 229350 }, { "epoch": 0.9846904166988657, "grad_norm": 0.8330245018005371, "learning_rate": 1.5379905659563827e-06, "loss": 0.1308761477470398, "step": 229360 }, { "epoch": 0.9847333487888857, "grad_norm": 3.2889111042022705, "learning_rate": 1.533678845838759e-06, "loss": 0.43849334716796873, "step": 229370 }, { "epoch": 0.9847762808789058, "grad_norm": 0.02888214774429798, "learning_rate": 1.5293671257211353e-06, "loss": 0.26519927978515623, "step": 229380 }, { "epoch": 0.9848192129689257, "grad_norm": 0.22269928455352783, "learning_rate": 1.5250554056035115e-06, "loss": 0.19621092081069946, "step": 229390 }, { "epoch": 0.9848621450589458, "grad_norm": 0.007023406680673361, "learning_rate": 1.5207436854858878e-06, "loss": 0.14593169689178467, "step": 229400 }, { "epoch": 0.9849050771489658, "grad_norm": 0.22836387157440186, "learning_rate": 1.516431965368264e-06, "loss": 0.19318276643753052, "step": 229410 }, { "epoch": 0.9849480092389857, "grad_norm": 0.010965757071971893, "learning_rate": 1.5121202452506404e-06, "loss": 0.08530879020690918, "step": 229420 }, { "epoch": 0.9849909413290058, "grad_norm": 0.0280422605574131, "learning_rate": 1.5078085251330167e-06, "loss": 0.22887613773345947, "step": 229430 }, { "epoch": 0.9850338734190258, "grad_norm": 0.0066929347813129425, "learning_rate": 1.503496805015393e-06, "loss": 0.3554296255111694, "step": 229440 }, { "epoch": 0.9850768055090457, "grad_norm": 3.0852301120758057, "learning_rate": 1.4991850848977692e-06, "loss": 0.18785364627838136, "step": 229450 }, { "epoch": 0.9851197375990658, "grad_norm": 6.435519218444824, "learning_rate": 1.4948733647801455e-06, "loss": 0.25583548545837403, "step": 229460 }, { "epoch": 0.9851626696890858, "grad_norm": 6.700213432312012, "learning_rate": 1.4905616446625216e-06, "loss": 0.28615341186523435, "step": 229470 }, { "epoch": 0.9852056017791058, "grad_norm": 1.2810556888580322, "learning_rate": 1.486249924544898e-06, "loss": 0.17413702011108398, "step": 229480 }, { "epoch": 0.9852485338691258, "grad_norm": 0.018091721460223198, "learning_rate": 1.4819382044272744e-06, "loss": 0.12498599290847778, "step": 229490 }, { "epoch": 0.9852914659591459, "grad_norm": 0.043885841965675354, "learning_rate": 1.4776264843096505e-06, "loss": 0.09390342235565186, "step": 229500 }, { "epoch": 0.9853343980491658, "grad_norm": 7.9579176902771, "learning_rate": 1.4733147641920267e-06, "loss": 0.286728048324585, "step": 229510 }, { "epoch": 0.9853773301391858, "grad_norm": 0.016106192022562027, "learning_rate": 1.469003044074403e-06, "loss": 0.05692702531814575, "step": 229520 }, { "epoch": 0.9854202622292059, "grad_norm": 6.578338623046875, "learning_rate": 1.4646913239567795e-06, "loss": 0.2311037540435791, "step": 229530 }, { "epoch": 0.9854631943192258, "grad_norm": 0.002258319640532136, "learning_rate": 1.4603796038391556e-06, "loss": 0.2164003372192383, "step": 229540 }, { "epoch": 0.9855061264092458, "grad_norm": 0.015184340067207813, "learning_rate": 1.4560678837215319e-06, "loss": 0.05240732431411743, "step": 229550 }, { "epoch": 0.9855490584992659, "grad_norm": 0.1849614530801773, "learning_rate": 1.4517561636039082e-06, "loss": 0.07531896233558655, "step": 229560 }, { "epoch": 0.9855919905892858, "grad_norm": 0.0011321872007101774, "learning_rate": 1.4474444434862847e-06, "loss": 0.3143985509872437, "step": 229570 }, { "epoch": 0.9856349226793059, "grad_norm": 0.0027777322102338076, "learning_rate": 1.4431327233686607e-06, "loss": 0.17318645715713502, "step": 229580 }, { "epoch": 0.9856778547693259, "grad_norm": 4.1844611167907715, "learning_rate": 1.438821003251037e-06, "loss": 0.09338077902793884, "step": 229590 }, { "epoch": 0.9857207868593458, "grad_norm": 0.7365671992301941, "learning_rate": 1.4345092831334133e-06, "loss": 0.12292720079421997, "step": 229600 }, { "epoch": 0.9857637189493659, "grad_norm": 0.04432155191898346, "learning_rate": 1.4301975630157896e-06, "loss": 0.1919556140899658, "step": 229610 }, { "epoch": 0.9858066510393859, "grad_norm": 0.1292286068201065, "learning_rate": 1.4258858428981659e-06, "loss": 0.2025829315185547, "step": 229620 }, { "epoch": 0.985849583129406, "grad_norm": 0.43458276987075806, "learning_rate": 1.4215741227805421e-06, "loss": 0.19809385538101196, "step": 229630 }, { "epoch": 0.9858925152194259, "grad_norm": 4.930924892425537, "learning_rate": 1.4172624026629184e-06, "loss": 0.3780056953430176, "step": 229640 }, { "epoch": 0.9859354473094459, "grad_norm": 0.002644116058945656, "learning_rate": 1.4129506825452947e-06, "loss": 0.12666265964508056, "step": 229650 }, { "epoch": 0.985978379399466, "grad_norm": 0.002665426814928651, "learning_rate": 1.408638962427671e-06, "loss": 0.025838717818260193, "step": 229660 }, { "epoch": 0.9860213114894859, "grad_norm": 0.9424791932106018, "learning_rate": 1.4043272423100473e-06, "loss": 0.07721037864685058, "step": 229670 }, { "epoch": 0.986064243579506, "grad_norm": 0.03978941589593887, "learning_rate": 1.4000155221924236e-06, "loss": 0.10961095094680787, "step": 229680 }, { "epoch": 0.986107175669526, "grad_norm": 0.0040367403998970985, "learning_rate": 1.3957038020747999e-06, "loss": 0.13419954776763915, "step": 229690 }, { "epoch": 0.9861501077595459, "grad_norm": 2.451991081237793, "learning_rate": 1.391392081957176e-06, "loss": 0.31821794509887696, "step": 229700 }, { "epoch": 0.986193039849566, "grad_norm": 0.006343350745737553, "learning_rate": 1.3870803618395524e-06, "loss": 0.11663144826889038, "step": 229710 }, { "epoch": 0.986235971939586, "grad_norm": 1.4940195083618164, "learning_rate": 1.3827686417219287e-06, "loss": 0.21371042728424072, "step": 229720 }, { "epoch": 0.9862789040296059, "grad_norm": 0.7261769771575928, "learning_rate": 1.3784569216043048e-06, "loss": 0.19989676475524903, "step": 229730 }, { "epoch": 0.986321836119626, "grad_norm": 1.1416183710098267, "learning_rate": 1.374145201486681e-06, "loss": 0.05739124417304993, "step": 229740 }, { "epoch": 0.986364768209646, "grad_norm": 1.768046498298645, "learning_rate": 1.3698334813690576e-06, "loss": 0.16595814228057862, "step": 229750 }, { "epoch": 0.986407700299666, "grad_norm": 0.37908732891082764, "learning_rate": 1.3655217612514338e-06, "loss": 0.5042348861694336, "step": 229760 }, { "epoch": 0.986450632389686, "grad_norm": 0.004125974606722593, "learning_rate": 1.36121004113381e-06, "loss": 0.26359896659851073, "step": 229770 }, { "epoch": 0.986493564479706, "grad_norm": 0.008460349403321743, "learning_rate": 1.3568983210161862e-06, "loss": 0.14525082111358642, "step": 229780 }, { "epoch": 0.986536496569726, "grad_norm": 1.253076195716858, "learning_rate": 1.3525866008985625e-06, "loss": 0.16380696296691893, "step": 229790 }, { "epoch": 0.986579428659746, "grad_norm": 0.3819549083709717, "learning_rate": 1.348274880780939e-06, "loss": 0.2786677598953247, "step": 229800 }, { "epoch": 0.9866223607497661, "grad_norm": 0.02352546714246273, "learning_rate": 1.343963160663315e-06, "loss": 0.08861920833587647, "step": 229810 }, { "epoch": 0.986665292839786, "grad_norm": 0.006553748622536659, "learning_rate": 1.3396514405456913e-06, "loss": 0.09064736366271972, "step": 229820 }, { "epoch": 0.986708224929806, "grad_norm": 0.03975367173552513, "learning_rate": 1.3353397204280676e-06, "loss": 0.08393974304199218, "step": 229830 }, { "epoch": 0.9867511570198261, "grad_norm": 0.01190192997455597, "learning_rate": 1.331028000310444e-06, "loss": 0.16109168529510498, "step": 229840 }, { "epoch": 0.986794089109846, "grad_norm": 0.017632009461522102, "learning_rate": 1.3267162801928202e-06, "loss": 0.02562235891819, "step": 229850 }, { "epoch": 0.986837021199866, "grad_norm": 0.003476449754089117, "learning_rate": 1.3224045600751965e-06, "loss": 0.04230686128139496, "step": 229860 }, { "epoch": 0.9868799532898861, "grad_norm": 0.005417051259428263, "learning_rate": 1.3180928399575728e-06, "loss": 0.07968264818191528, "step": 229870 }, { "epoch": 0.986922885379906, "grad_norm": 0.0005414534243755043, "learning_rate": 1.313781119839949e-06, "loss": 0.09873697757720948, "step": 229880 }, { "epoch": 0.9869658174699261, "grad_norm": 3.2428600788116455, "learning_rate": 1.3094693997223253e-06, "loss": 0.11447465419769287, "step": 229890 }, { "epoch": 0.9870087495599461, "grad_norm": 0.9381573796272278, "learning_rate": 1.3051576796047016e-06, "loss": 0.2896867036819458, "step": 229900 }, { "epoch": 0.987051681649966, "grad_norm": 0.0015979417366907, "learning_rate": 1.3008459594870779e-06, "loss": 0.16397885084152222, "step": 229910 }, { "epoch": 0.9870946137399861, "grad_norm": 2.4915566444396973, "learning_rate": 1.296534239369454e-06, "loss": 0.4369049549102783, "step": 229920 }, { "epoch": 0.9871375458300061, "grad_norm": 1.179463267326355, "learning_rate": 1.2922225192518305e-06, "loss": 0.1902614951133728, "step": 229930 }, { "epoch": 0.9871804779200261, "grad_norm": 0.12325243651866913, "learning_rate": 1.2879107991342067e-06, "loss": 0.14833759069442748, "step": 229940 }, { "epoch": 0.9872234100100461, "grad_norm": 0.050647057592868805, "learning_rate": 1.283599079016583e-06, "loss": 0.1878619074821472, "step": 229950 }, { "epoch": 0.9872663421000661, "grad_norm": 0.003908630460500717, "learning_rate": 1.279287358898959e-06, "loss": 0.300618839263916, "step": 229960 }, { "epoch": 0.9873092741900861, "grad_norm": 0.04351991042494774, "learning_rate": 1.2749756387813354e-06, "loss": 0.16139765977859497, "step": 229970 }, { "epoch": 0.9873522062801061, "grad_norm": 0.992188036441803, "learning_rate": 1.2706639186637119e-06, "loss": 0.22291405200958253, "step": 229980 }, { "epoch": 0.9873951383701262, "grad_norm": 6.605251789093018, "learning_rate": 1.2663521985460882e-06, "loss": 0.0996120810508728, "step": 229990 }, { "epoch": 0.9874380704601461, "grad_norm": 0.0715533122420311, "learning_rate": 1.2620404784284642e-06, "loss": 0.14207704067230226, "step": 230000 }, { "epoch": 0.9874380704601461, "eval_loss": 0.367033451795578, "eval_runtime": 27.4805, "eval_samples_per_second": 3.639, "eval_steps_per_second": 3.639, "step": 230000 }, { "epoch": 0.9874810025501661, "grad_norm": 0.060485634952783585, "learning_rate": 1.2577287583108405e-06, "loss": 0.0019446693360805512, "step": 230010 }, { "epoch": 0.9875239346401862, "grad_norm": 0.0023204952012747526, "learning_rate": 1.2534170381932168e-06, "loss": 0.31025264263153074, "step": 230020 }, { "epoch": 0.9875668667302061, "grad_norm": 2.3061840534210205, "learning_rate": 1.249105318075593e-06, "loss": 0.17957751750946044, "step": 230030 }, { "epoch": 0.9876097988202261, "grad_norm": 0.0011746891541406512, "learning_rate": 1.2447935979579694e-06, "loss": 0.2284639835357666, "step": 230040 }, { "epoch": 0.9876527309102462, "grad_norm": 0.05280338600277901, "learning_rate": 1.2404818778403457e-06, "loss": 0.30267183780670165, "step": 230050 }, { "epoch": 0.9876956630002662, "grad_norm": 0.1548868864774704, "learning_rate": 1.236170157722722e-06, "loss": 0.11789828538894653, "step": 230060 }, { "epoch": 0.9877385950902862, "grad_norm": 0.7587932348251343, "learning_rate": 1.2318584376050982e-06, "loss": 0.07684867978096008, "step": 230070 }, { "epoch": 0.9877815271803062, "grad_norm": 1.2028552293777466, "learning_rate": 1.2275467174874745e-06, "loss": 0.24567055702209473, "step": 230080 }, { "epoch": 0.9878244592703262, "grad_norm": 0.00883434247225523, "learning_rate": 1.2232349973698508e-06, "loss": 0.10406320095062256, "step": 230090 }, { "epoch": 0.9878673913603462, "grad_norm": 0.02592717483639717, "learning_rate": 1.218923277252227e-06, "loss": 0.25481562614440917, "step": 230100 }, { "epoch": 0.9879103234503662, "grad_norm": 6.862185001373291, "learning_rate": 1.2146115571346034e-06, "loss": 0.3252408981323242, "step": 230110 }, { "epoch": 0.9879532555403863, "grad_norm": 0.0014842419186607003, "learning_rate": 1.2102998370169796e-06, "loss": 0.19278960227966307, "step": 230120 }, { "epoch": 0.9879961876304062, "grad_norm": 0.38045448064804077, "learning_rate": 1.205988116899356e-06, "loss": 0.09647968411445618, "step": 230130 }, { "epoch": 0.9880391197204262, "grad_norm": 0.0013819060986861587, "learning_rate": 1.2016763967817322e-06, "loss": 0.30128400325775145, "step": 230140 }, { "epoch": 0.9880820518104463, "grad_norm": 0.012147623114287853, "learning_rate": 1.1973646766641083e-06, "loss": 0.24420483112335206, "step": 230150 }, { "epoch": 0.9881249839004662, "grad_norm": 0.0804034173488617, "learning_rate": 1.1930529565464848e-06, "loss": 0.31123244762420654, "step": 230160 }, { "epoch": 0.9881679159904863, "grad_norm": 0.006644760724157095, "learning_rate": 1.188741236428861e-06, "loss": 0.18235220909118652, "step": 230170 }, { "epoch": 0.9882108480805063, "grad_norm": 0.8137630224227905, "learning_rate": 1.1844295163112373e-06, "loss": 0.10330394506454468, "step": 230180 }, { "epoch": 0.9882537801705262, "grad_norm": 0.013571128249168396, "learning_rate": 1.1801177961936134e-06, "loss": 0.08659087419509888, "step": 230190 }, { "epoch": 0.9882967122605463, "grad_norm": 0.011523943394422531, "learning_rate": 1.1758060760759897e-06, "loss": 0.2964245557785034, "step": 230200 }, { "epoch": 0.9883396443505663, "grad_norm": 0.1999071091413498, "learning_rate": 1.1714943559583662e-06, "loss": 0.27459328174591063, "step": 230210 }, { "epoch": 0.9883825764405862, "grad_norm": 0.0024741236120462418, "learning_rate": 1.1671826358407425e-06, "loss": 0.03986948430538177, "step": 230220 }, { "epoch": 0.9884255085306063, "grad_norm": 0.007317548152059317, "learning_rate": 1.1628709157231186e-06, "loss": 0.1312456488609314, "step": 230230 }, { "epoch": 0.9884684406206263, "grad_norm": 0.020278507843613625, "learning_rate": 1.1585591956054948e-06, "loss": 0.07149394154548645, "step": 230240 }, { "epoch": 0.9885113727106463, "grad_norm": 0.03085501119494438, "learning_rate": 1.1542474754878713e-06, "loss": 0.30056056976318357, "step": 230250 }, { "epoch": 0.9885543048006663, "grad_norm": 2.383174180984497, "learning_rate": 1.1499357553702474e-06, "loss": 0.19107441902160643, "step": 230260 }, { "epoch": 0.9885972368906863, "grad_norm": 0.20591601729393005, "learning_rate": 1.1456240352526237e-06, "loss": 0.028360658884048463, "step": 230270 }, { "epoch": 0.9886401689807063, "grad_norm": 0.023873161524534225, "learning_rate": 1.141312315135e-06, "loss": 0.24436991214752196, "step": 230280 }, { "epoch": 0.9886831010707263, "grad_norm": 1.3845298290252686, "learning_rate": 1.1370005950173763e-06, "loss": 0.3007499217987061, "step": 230290 }, { "epoch": 0.9887260331607464, "grad_norm": 0.015084992162883282, "learning_rate": 1.1326888748997525e-06, "loss": 0.22297992706298828, "step": 230300 }, { "epoch": 0.9887689652507663, "grad_norm": 0.5837798714637756, "learning_rate": 1.1283771547821288e-06, "loss": 0.26232342720031737, "step": 230310 }, { "epoch": 0.9888118973407863, "grad_norm": 0.14470583200454712, "learning_rate": 1.1240654346645051e-06, "loss": 0.19923378229141236, "step": 230320 }, { "epoch": 0.9888548294308064, "grad_norm": 0.2220803201198578, "learning_rate": 1.1197537145468814e-06, "loss": 0.1936778426170349, "step": 230330 }, { "epoch": 0.9888977615208263, "grad_norm": 3.847043752670288, "learning_rate": 1.1154419944292577e-06, "loss": 0.07521622180938721, "step": 230340 }, { "epoch": 0.9889406936108464, "grad_norm": 0.016686517745256424, "learning_rate": 1.111130274311634e-06, "loss": 0.11862159967422485, "step": 230350 }, { "epoch": 0.9889836257008664, "grad_norm": 3.9269278049468994, "learning_rate": 1.1068185541940103e-06, "loss": 0.2966684579849243, "step": 230360 }, { "epoch": 0.9890265577908863, "grad_norm": 0.10296718031167984, "learning_rate": 1.1025068340763865e-06, "loss": 0.16811978816986084, "step": 230370 }, { "epoch": 0.9890694898809064, "grad_norm": 0.07834142446517944, "learning_rate": 1.0981951139587626e-06, "loss": 0.07888695001602172, "step": 230380 }, { "epoch": 0.9891124219709264, "grad_norm": 1.7769652605056763, "learning_rate": 1.093883393841139e-06, "loss": 0.287352180480957, "step": 230390 }, { "epoch": 0.9891553540609463, "grad_norm": 0.0602647066116333, "learning_rate": 1.0895716737235154e-06, "loss": 0.16694862842559816, "step": 230400 }, { "epoch": 0.9891982861509664, "grad_norm": 0.7788509726524353, "learning_rate": 1.0852599536058917e-06, "loss": 0.18424661159515382, "step": 230410 }, { "epoch": 0.9892412182409864, "grad_norm": 2.877631664276123, "learning_rate": 1.0809482334882677e-06, "loss": 0.12469482421875, "step": 230420 }, { "epoch": 0.9892841503310064, "grad_norm": 0.00023763379431329668, "learning_rate": 1.0766365133706442e-06, "loss": 0.10847523212432861, "step": 230430 }, { "epoch": 0.9893270824210264, "grad_norm": 4.226632595062256, "learning_rate": 1.0723247932530205e-06, "loss": 0.08445930480957031, "step": 230440 }, { "epoch": 0.9893700145110464, "grad_norm": 0.3350447118282318, "learning_rate": 1.0680130731353966e-06, "loss": 0.006454658508300781, "step": 230450 }, { "epoch": 0.9894129466010664, "grad_norm": 6.135351657867432, "learning_rate": 1.0637013530177729e-06, "loss": 0.22950005531311035, "step": 230460 }, { "epoch": 0.9894558786910864, "grad_norm": 0.987662672996521, "learning_rate": 1.0593896329001492e-06, "loss": 0.12434533834457398, "step": 230470 }, { "epoch": 0.9894988107811065, "grad_norm": 1.4658321142196655, "learning_rate": 1.0550779127825257e-06, "loss": 0.2138460874557495, "step": 230480 }, { "epoch": 0.9895417428711265, "grad_norm": 0.0016754432581365108, "learning_rate": 1.0507661926649017e-06, "loss": 0.3065653324127197, "step": 230490 }, { "epoch": 0.9895846749611464, "grad_norm": 0.002386566484346986, "learning_rate": 1.046454472547278e-06, "loss": 0.04717585146427154, "step": 230500 }, { "epoch": 0.9896276070511665, "grad_norm": 0.03728202357888222, "learning_rate": 1.0421427524296543e-06, "loss": 0.026825031638145445, "step": 230510 }, { "epoch": 0.9896705391411865, "grad_norm": 0.03018287941813469, "learning_rate": 1.0378310323120306e-06, "loss": 0.09836741089820862, "step": 230520 }, { "epoch": 0.9897134712312065, "grad_norm": 0.006733125075697899, "learning_rate": 1.0335193121944069e-06, "loss": 0.3137779951095581, "step": 230530 }, { "epoch": 0.9897564033212265, "grad_norm": 0.040674638003110886, "learning_rate": 1.0292075920767832e-06, "loss": 0.03551376461982727, "step": 230540 }, { "epoch": 0.9897993354112465, "grad_norm": 0.3072684705257416, "learning_rate": 1.0248958719591594e-06, "loss": 0.2572211265563965, "step": 230550 }, { "epoch": 0.9898422675012665, "grad_norm": 2.135777473449707, "learning_rate": 1.0205841518415357e-06, "loss": 0.06069689989089966, "step": 230560 }, { "epoch": 0.9898851995912865, "grad_norm": 3.5530755519866943, "learning_rate": 1.016272431723912e-06, "loss": 0.1646146297454834, "step": 230570 }, { "epoch": 0.9899281316813066, "grad_norm": 2.017244815826416, "learning_rate": 1.0119607116062883e-06, "loss": 0.17254488468170165, "step": 230580 }, { "epoch": 0.9899710637713265, "grad_norm": 0.00539855333045125, "learning_rate": 1.0076489914886646e-06, "loss": 0.24719760417938233, "step": 230590 }, { "epoch": 0.9900139958613465, "grad_norm": 2.330122947692871, "learning_rate": 1.0033372713710409e-06, "loss": 0.15231788158416748, "step": 230600 }, { "epoch": 0.9900569279513666, "grad_norm": 0.00920083187520504, "learning_rate": 9.99025551253417e-07, "loss": 0.3541694641113281, "step": 230610 }, { "epoch": 0.9900998600413865, "grad_norm": 2.9414072036743164, "learning_rate": 9.947138311357934e-07, "loss": 0.0661549985408783, "step": 230620 }, { "epoch": 0.9901427921314065, "grad_norm": 1.7807673215866089, "learning_rate": 9.904021110181697e-07, "loss": 0.20318183898925782, "step": 230630 }, { "epoch": 0.9901857242214266, "grad_norm": 0.013886654749512672, "learning_rate": 9.860903909005458e-07, "loss": 0.14823532104492188, "step": 230640 }, { "epoch": 0.9902286563114465, "grad_norm": 0.048982731997966766, "learning_rate": 9.81778670782922e-07, "loss": 0.0046006467193365095, "step": 230650 }, { "epoch": 0.9902715884014666, "grad_norm": 0.2466362863779068, "learning_rate": 9.774669506652986e-07, "loss": 0.23479697704315186, "step": 230660 }, { "epoch": 0.9903145204914866, "grad_norm": 0.19999799132347107, "learning_rate": 9.731552305476748e-07, "loss": 0.1353333353996277, "step": 230670 }, { "epoch": 0.9903574525815065, "grad_norm": 0.005991742480546236, "learning_rate": 9.68843510430051e-07, "loss": 0.07191218137741089, "step": 230680 }, { "epoch": 0.9904003846715266, "grad_norm": 0.3562621474266052, "learning_rate": 9.645317903124272e-07, "loss": 0.3172381162643433, "step": 230690 }, { "epoch": 0.9904433167615466, "grad_norm": 0.008976894430816174, "learning_rate": 9.602200701948035e-07, "loss": 0.1105201005935669, "step": 230700 }, { "epoch": 0.9904862488515666, "grad_norm": 35.98250198364258, "learning_rate": 9.5590835007718e-07, "loss": 0.2461772680282593, "step": 230710 }, { "epoch": 0.9905291809415866, "grad_norm": 0.4796089231967926, "learning_rate": 9.515966299595562e-07, "loss": 0.07134444117546082, "step": 230720 }, { "epoch": 0.9905721130316066, "grad_norm": 0.3961131274700165, "learning_rate": 9.472849098419323e-07, "loss": 0.16740727424621582, "step": 230730 }, { "epoch": 0.9906150451216266, "grad_norm": 0.03454848751425743, "learning_rate": 9.429731897243086e-07, "loss": 0.20739531517028809, "step": 230740 }, { "epoch": 0.9906579772116466, "grad_norm": 0.015018555335700512, "learning_rate": 9.38661469606685e-07, "loss": 0.2724655866622925, "step": 230750 }, { "epoch": 0.9907009093016667, "grad_norm": 1.8921265602111816, "learning_rate": 9.343497494890613e-07, "loss": 0.3332388162612915, "step": 230760 }, { "epoch": 0.9907438413916866, "grad_norm": 0.10715219378471375, "learning_rate": 9.300380293714375e-07, "loss": 0.004315024986863136, "step": 230770 }, { "epoch": 0.9907867734817066, "grad_norm": 4.257637023925781, "learning_rate": 9.257263092538138e-07, "loss": 0.09003528356552123, "step": 230780 }, { "epoch": 0.9908297055717267, "grad_norm": 1.0070799589157104, "learning_rate": 9.214145891361899e-07, "loss": 0.1680148720741272, "step": 230790 }, { "epoch": 0.9908726376617466, "grad_norm": 1.6183319091796875, "learning_rate": 9.171028690185663e-07, "loss": 0.1481905460357666, "step": 230800 }, { "epoch": 0.9909155697517666, "grad_norm": 0.005772717762738466, "learning_rate": 9.127911489009426e-07, "loss": 0.05922789573669433, "step": 230810 }, { "epoch": 0.9909585018417867, "grad_norm": 0.2136821448802948, "learning_rate": 9.084794287833189e-07, "loss": 0.143665611743927, "step": 230820 }, { "epoch": 0.9910014339318066, "grad_norm": 0.7301328778266907, "learning_rate": 9.041677086656951e-07, "loss": 0.03996670842170715, "step": 230830 }, { "epoch": 0.9910443660218267, "grad_norm": 0.0018787942826747894, "learning_rate": 8.998559885480715e-07, "loss": 0.22617673873901367, "step": 230840 }, { "epoch": 0.9910872981118467, "grad_norm": 0.10825284570455551, "learning_rate": 8.955442684304478e-07, "loss": 0.1180370569229126, "step": 230850 }, { "epoch": 0.9911302302018666, "grad_norm": 16.12891387939453, "learning_rate": 8.912325483128239e-07, "loss": 0.2612591743469238, "step": 230860 }, { "epoch": 0.9911731622918867, "grad_norm": 0.0020049717277288437, "learning_rate": 8.869208281952002e-07, "loss": 0.09563700556755066, "step": 230870 }, { "epoch": 0.9912160943819067, "grad_norm": 0.002001575892791152, "learning_rate": 8.826091080775764e-07, "loss": 0.39770505428314207, "step": 230880 }, { "epoch": 0.9912590264719267, "grad_norm": 0.7403496503829956, "learning_rate": 8.782973879599529e-07, "loss": 0.019684380292892455, "step": 230890 }, { "epoch": 0.9913019585619467, "grad_norm": 0.001754825352691114, "learning_rate": 8.739856678423291e-07, "loss": 0.16134670972824097, "step": 230900 }, { "epoch": 0.9913448906519667, "grad_norm": 0.3392312824726105, "learning_rate": 8.696739477247054e-07, "loss": 0.16810760498046876, "step": 230910 }, { "epoch": 0.9913878227419868, "grad_norm": 3.1423470973968506, "learning_rate": 8.653622276070815e-07, "loss": 0.24040436744689941, "step": 230920 }, { "epoch": 0.9914307548320067, "grad_norm": 0.0010441187769174576, "learning_rate": 8.610505074894578e-07, "loss": 0.1794663906097412, "step": 230930 }, { "epoch": 0.9914736869220268, "grad_norm": 1.1992433071136475, "learning_rate": 8.567387873718342e-07, "loss": 0.13717471361160277, "step": 230940 }, { "epoch": 0.9915166190120468, "grad_norm": 0.11305972188711166, "learning_rate": 8.524270672542105e-07, "loss": 0.1720863938331604, "step": 230950 }, { "epoch": 0.9915595511020667, "grad_norm": 0.01105725672096014, "learning_rate": 8.481153471365867e-07, "loss": 0.16816022396087646, "step": 230960 }, { "epoch": 0.9916024831920868, "grad_norm": 6.651047229766846, "learning_rate": 8.43803627018963e-07, "loss": 0.22323966026306152, "step": 230970 }, { "epoch": 0.9916454152821068, "grad_norm": 0.0015010848874226213, "learning_rate": 8.394919069013393e-07, "loss": 0.2742466926574707, "step": 230980 }, { "epoch": 0.9916883473721267, "grad_norm": 0.009254826232790947, "learning_rate": 8.351801867837155e-07, "loss": 0.19576044082641603, "step": 230990 }, { "epoch": 0.9917312794621468, "grad_norm": 0.001263033365830779, "learning_rate": 8.308684666660918e-07, "loss": 0.09472488164901734, "step": 231000 }, { "epoch": 0.9917312794621468, "eval_loss": 0.36783263087272644, "eval_runtime": 27.4534, "eval_samples_per_second": 3.643, "eval_steps_per_second": 3.643, "step": 231000 }, { "epoch": 0.9917742115521668, "grad_norm": 1.9732149839401245, "learning_rate": 8.265567465484681e-07, "loss": 0.23163745403289795, "step": 231010 }, { "epoch": 0.9918171436421868, "grad_norm": 7.527266025543213, "learning_rate": 8.222450264308443e-07, "loss": 0.36402325630187987, "step": 231020 }, { "epoch": 0.9918600757322068, "grad_norm": 0.02747558429837227, "learning_rate": 8.179333063132207e-07, "loss": 0.16112041473388672, "step": 231030 }, { "epoch": 0.9919030078222268, "grad_norm": 0.014340748079121113, "learning_rate": 8.136215861955969e-07, "loss": 0.22366724014282227, "step": 231040 }, { "epoch": 0.9919459399122468, "grad_norm": 1.6919987201690674, "learning_rate": 8.093098660779731e-07, "loss": 0.3583649158477783, "step": 231050 }, { "epoch": 0.9919888720022668, "grad_norm": 0.006823307368904352, "learning_rate": 8.049981459603494e-07, "loss": 0.30974841117858887, "step": 231060 }, { "epoch": 0.9920318040922869, "grad_norm": 0.005828527733683586, "learning_rate": 8.006864258427258e-07, "loss": 0.1370411992073059, "step": 231070 }, { "epoch": 0.9920747361823068, "grad_norm": 0.008763202466070652, "learning_rate": 7.963747057251021e-07, "loss": 0.1355002760887146, "step": 231080 }, { "epoch": 0.9921176682723268, "grad_norm": 0.003385061165317893, "learning_rate": 7.920629856074783e-07, "loss": 0.25493133068084717, "step": 231090 }, { "epoch": 0.9921606003623469, "grad_norm": 0.03868903964757919, "learning_rate": 7.877512654898545e-07, "loss": 0.22841250896453857, "step": 231100 }, { "epoch": 0.9922035324523668, "grad_norm": 4.405091285705566, "learning_rate": 7.834395453722307e-07, "loss": 0.14411439895629882, "step": 231110 }, { "epoch": 0.9922464645423869, "grad_norm": 0.006356885191053152, "learning_rate": 7.791278252546071e-07, "loss": 0.24147932529449462, "step": 231120 }, { "epoch": 0.9922893966324069, "grad_norm": 0.943030059337616, "learning_rate": 7.748161051369834e-07, "loss": 0.2868966579437256, "step": 231130 }, { "epoch": 0.9923323287224268, "grad_norm": 4.7073588371276855, "learning_rate": 7.705043850193597e-07, "loss": 0.22108216285705568, "step": 231140 }, { "epoch": 0.9923752608124469, "grad_norm": 0.01346675492823124, "learning_rate": 7.66192664901736e-07, "loss": 0.22234489917755126, "step": 231150 }, { "epoch": 0.9924181929024669, "grad_norm": 1.4010380506515503, "learning_rate": 7.618809447841122e-07, "loss": 0.23675973415374757, "step": 231160 }, { "epoch": 0.9924611249924868, "grad_norm": 1.4407527446746826, "learning_rate": 7.575692246664885e-07, "loss": 0.21478147506713868, "step": 231170 }, { "epoch": 0.9925040570825069, "grad_norm": 0.014923127368092537, "learning_rate": 7.532575045488648e-07, "loss": 0.018306614458560945, "step": 231180 }, { "epoch": 0.9925469891725269, "grad_norm": 0.010917030274868011, "learning_rate": 7.48945784431241e-07, "loss": 0.20243346691131592, "step": 231190 }, { "epoch": 0.9925899212625469, "grad_norm": 5.202339172363281, "learning_rate": 7.446340643136173e-07, "loss": 0.48285846710205077, "step": 231200 }, { "epoch": 0.9926328533525669, "grad_norm": 0.31020039319992065, "learning_rate": 7.403223441959936e-07, "loss": 0.2021394968032837, "step": 231210 }, { "epoch": 0.9926757854425869, "grad_norm": 0.019344815984368324, "learning_rate": 7.360106240783698e-07, "loss": 0.21105799674987794, "step": 231220 }, { "epoch": 0.9927187175326069, "grad_norm": 0.8539208173751831, "learning_rate": 7.316989039607461e-07, "loss": 0.2706784725189209, "step": 231230 }, { "epoch": 0.9927616496226269, "grad_norm": 1.9695597887039185, "learning_rate": 7.273871838431224e-07, "loss": 0.15657505989074708, "step": 231240 }, { "epoch": 0.992804581712647, "grad_norm": 7.32958459854126, "learning_rate": 7.230754637254987e-07, "loss": 0.22953457832336427, "step": 231250 }, { "epoch": 0.9928475138026669, "grad_norm": 0.0042612794786691666, "learning_rate": 7.187637436078749e-07, "loss": 0.026990744471549987, "step": 231260 }, { "epoch": 0.9928904458926869, "grad_norm": 0.039048999547958374, "learning_rate": 7.144520234902513e-07, "loss": 0.10248461961746216, "step": 231270 }, { "epoch": 0.992933377982707, "grad_norm": 1.1401162147521973, "learning_rate": 7.101403033726274e-07, "loss": 0.2755697965621948, "step": 231280 }, { "epoch": 0.9929763100727269, "grad_norm": 0.04793756455183029, "learning_rate": 7.058285832550038e-07, "loss": 0.0071813158690929415, "step": 231290 }, { "epoch": 0.993019242162747, "grad_norm": 0.010070315562188625, "learning_rate": 7.0151686313738e-07, "loss": 0.14669071435928344, "step": 231300 }, { "epoch": 0.993062174252767, "grad_norm": 1.1468803882598877, "learning_rate": 6.972051430197564e-07, "loss": 0.057636570930480954, "step": 231310 }, { "epoch": 0.9931051063427869, "grad_norm": 0.005894109606742859, "learning_rate": 6.928934229021326e-07, "loss": 0.3529577016830444, "step": 231320 }, { "epoch": 0.993148038432807, "grad_norm": 0.027687201276421547, "learning_rate": 6.88581702784509e-07, "loss": 0.13618324995040892, "step": 231330 }, { "epoch": 0.993190970522827, "grad_norm": 0.02283935621380806, "learning_rate": 6.842699826668851e-07, "loss": 0.17922990322113036, "step": 231340 }, { "epoch": 0.993233902612847, "grad_norm": 0.06553700566291809, "learning_rate": 6.799582625492614e-07, "loss": 0.05254532098770141, "step": 231350 }, { "epoch": 0.993276834702867, "grad_norm": 0.2980232238769531, "learning_rate": 6.756465424316377e-07, "loss": 0.3425051927566528, "step": 231360 }, { "epoch": 0.993319766792887, "grad_norm": 1.3741534948349, "learning_rate": 6.71334822314014e-07, "loss": 0.32896277904510496, "step": 231370 }, { "epoch": 0.9933626988829071, "grad_norm": 2.6923539638519287, "learning_rate": 6.670231021963903e-07, "loss": 0.19083750247955322, "step": 231380 }, { "epoch": 0.993405630972927, "grad_norm": 4.866817474365234, "learning_rate": 6.627113820787666e-07, "loss": 0.23523907661437987, "step": 231390 }, { "epoch": 0.993448563062947, "grad_norm": 0.19903796911239624, "learning_rate": 6.583996619611429e-07, "loss": 0.12940009832382202, "step": 231400 }, { "epoch": 0.9934914951529671, "grad_norm": 1.1251431703567505, "learning_rate": 6.54087941843519e-07, "loss": 0.13517963886260986, "step": 231410 }, { "epoch": 0.993534427242987, "grad_norm": 0.09284210950136185, "learning_rate": 6.497762217258954e-07, "loss": 0.019798481464385988, "step": 231420 }, { "epoch": 0.9935773593330071, "grad_norm": 16.369192123413086, "learning_rate": 6.454645016082716e-07, "loss": 0.2830683708190918, "step": 231430 }, { "epoch": 0.9936202914230271, "grad_norm": 0.0024391154292970896, "learning_rate": 6.411527814906479e-07, "loss": 0.17113139629364013, "step": 231440 }, { "epoch": 0.993663223513047, "grad_norm": 0.0015551660908386111, "learning_rate": 6.368410613730242e-07, "loss": 0.10649877786636353, "step": 231450 }, { "epoch": 0.9937061556030671, "grad_norm": 5.168276309967041, "learning_rate": 6.325293412554004e-07, "loss": 0.19731935262680053, "step": 231460 }, { "epoch": 0.9937490876930871, "grad_norm": 1.3026635646820068, "learning_rate": 6.282176211377767e-07, "loss": 0.24371423721313476, "step": 231470 }, { "epoch": 0.993792019783107, "grad_norm": 4.633128643035889, "learning_rate": 6.23905901020153e-07, "loss": 0.21536602973937988, "step": 231480 }, { "epoch": 0.9938349518731271, "grad_norm": 1.4385215044021606, "learning_rate": 6.195941809025293e-07, "loss": 0.21898224353790283, "step": 231490 }, { "epoch": 0.9938778839631471, "grad_norm": 3.3495383262634277, "learning_rate": 6.152824607849056e-07, "loss": 0.14156938791275026, "step": 231500 }, { "epoch": 0.9939208160531671, "grad_norm": 0.0007417344022542238, "learning_rate": 6.109707406672818e-07, "loss": 0.11812833547592164, "step": 231510 }, { "epoch": 0.9939637481431871, "grad_norm": 0.034221477806568146, "learning_rate": 6.066590205496582e-07, "loss": 0.18498988151550294, "step": 231520 }, { "epoch": 0.9940066802332072, "grad_norm": 0.01323069166392088, "learning_rate": 6.023473004320343e-07, "loss": 0.194827401638031, "step": 231530 }, { "epoch": 0.9940496123232271, "grad_norm": 0.571534276008606, "learning_rate": 5.980355803144107e-07, "loss": 0.19657092094421386, "step": 231540 }, { "epoch": 0.9940925444132471, "grad_norm": 0.00028535982710309327, "learning_rate": 5.937238601967869e-07, "loss": 0.10385684967041016, "step": 231550 }, { "epoch": 0.9941354765032672, "grad_norm": 0.05803811177611351, "learning_rate": 5.894121400791632e-07, "loss": 0.29644711017608644, "step": 231560 }, { "epoch": 0.9941784085932871, "grad_norm": 0.004579696338623762, "learning_rate": 5.851004199615395e-07, "loss": 0.20959279537200928, "step": 231570 }, { "epoch": 0.9942213406833071, "grad_norm": 0.0018657728796824813, "learning_rate": 5.807886998439158e-07, "loss": 0.08454373478889465, "step": 231580 }, { "epoch": 0.9942642727733272, "grad_norm": 0.008013848215341568, "learning_rate": 5.76476979726292e-07, "loss": 0.28561060428619384, "step": 231590 }, { "epoch": 0.9943072048633471, "grad_norm": 4.572896957397461, "learning_rate": 5.721652596086683e-07, "loss": 0.20241074562072753, "step": 231600 }, { "epoch": 0.9943501369533672, "grad_norm": 0.0008746059611439705, "learning_rate": 5.678535394910446e-07, "loss": 0.16773425340652465, "step": 231610 }, { "epoch": 0.9943930690433872, "grad_norm": 0.04221741482615471, "learning_rate": 5.635418193734208e-07, "loss": 0.07259194850921631, "step": 231620 }, { "epoch": 0.9944360011334071, "grad_norm": 2.6986231803894043, "learning_rate": 5.592300992557972e-07, "loss": 0.19206438064575196, "step": 231630 }, { "epoch": 0.9944789332234272, "grad_norm": 9.255887985229492, "learning_rate": 5.549183791381734e-07, "loss": 0.5112367630004883, "step": 231640 }, { "epoch": 0.9945218653134472, "grad_norm": 0.061653558164834976, "learning_rate": 5.506066590205497e-07, "loss": 0.09895297288894653, "step": 231650 }, { "epoch": 0.9945647974034671, "grad_norm": 5.419751167297363, "learning_rate": 5.462949389029259e-07, "loss": 0.07865924835205078, "step": 231660 }, { "epoch": 0.9946077294934872, "grad_norm": 1.7274482250213623, "learning_rate": 5.419832187853023e-07, "loss": 0.09330617189407349, "step": 231670 }, { "epoch": 0.9946506615835072, "grad_norm": 0.0029562627896666527, "learning_rate": 5.376714986676785e-07, "loss": 0.2507413625717163, "step": 231680 }, { "epoch": 0.9946935936735272, "grad_norm": 1.1139379739761353, "learning_rate": 5.333597785500548e-07, "loss": 0.3691600799560547, "step": 231690 }, { "epoch": 0.9947365257635472, "grad_norm": 0.1695391833782196, "learning_rate": 5.290480584324311e-07, "loss": 0.22681543827056885, "step": 231700 }, { "epoch": 0.9947794578535673, "grad_norm": 3.3068721294403076, "learning_rate": 5.247363383148073e-07, "loss": 0.23673622608184813, "step": 231710 }, { "epoch": 0.9948223899435872, "grad_norm": 1.692142128944397, "learning_rate": 5.204246181971836e-07, "loss": 0.11870994567871093, "step": 231720 }, { "epoch": 0.9948653220336072, "grad_norm": 0.03946472331881523, "learning_rate": 5.161128980795599e-07, "loss": 0.0988048791885376, "step": 231730 }, { "epoch": 0.9949082541236273, "grad_norm": 2.345656394958496, "learning_rate": 5.118011779619362e-07, "loss": 0.12542537450790406, "step": 231740 }, { "epoch": 0.9949511862136472, "grad_norm": 0.2746301591396332, "learning_rate": 5.074894578443125e-07, "loss": 0.18555349111557007, "step": 231750 }, { "epoch": 0.9949941183036672, "grad_norm": 1.1292948722839355, "learning_rate": 5.031777377266887e-07, "loss": 0.3993506908416748, "step": 231760 }, { "epoch": 0.9950370503936873, "grad_norm": 0.00699152797460556, "learning_rate": 4.988660176090649e-07, "loss": 0.12862871885299682, "step": 231770 }, { "epoch": 0.9950799824837073, "grad_norm": 0.029387371614575386, "learning_rate": 4.945542974914412e-07, "loss": 0.33222131729125975, "step": 231780 }, { "epoch": 0.9951229145737273, "grad_norm": 0.010533314198255539, "learning_rate": 4.902425773738175e-07, "loss": 0.37362515926361084, "step": 231790 }, { "epoch": 0.9951658466637473, "grad_norm": 0.024894610047340393, "learning_rate": 4.859308572561938e-07, "loss": 0.18944785594940186, "step": 231800 }, { "epoch": 0.9952087787537673, "grad_norm": 0.07286173105239868, "learning_rate": 4.816191371385701e-07, "loss": 0.14887828826904298, "step": 231810 }, { "epoch": 0.9952517108437873, "grad_norm": 0.009799705818295479, "learning_rate": 4.773074170209464e-07, "loss": 0.08595054745674133, "step": 231820 }, { "epoch": 0.9952946429338073, "grad_norm": 0.010909819975495338, "learning_rate": 4.729956969033227e-07, "loss": 0.13564144372940062, "step": 231830 }, { "epoch": 0.9953375750238274, "grad_norm": 0.21695959568023682, "learning_rate": 4.6868397678569893e-07, "loss": 0.207657790184021, "step": 231840 }, { "epoch": 0.9953805071138473, "grad_norm": 0.007266578730195761, "learning_rate": 4.6437225666807516e-07, "loss": 0.15762712955474853, "step": 231850 }, { "epoch": 0.9954234392038673, "grad_norm": 0.08585427701473236, "learning_rate": 4.6006053655045145e-07, "loss": 0.1914979934692383, "step": 231860 }, { "epoch": 0.9954663712938874, "grad_norm": 9.776519775390625, "learning_rate": 4.5574881643282773e-07, "loss": 0.3182793617248535, "step": 231870 }, { "epoch": 0.9955093033839073, "grad_norm": 0.026343174278736115, "learning_rate": 4.51437096315204e-07, "loss": 0.19399741888046265, "step": 231880 }, { "epoch": 0.9955522354739273, "grad_norm": 11.97410774230957, "learning_rate": 4.4712537619758024e-07, "loss": 0.3325981616973877, "step": 231890 }, { "epoch": 0.9955951675639474, "grad_norm": 8.708888053894043, "learning_rate": 4.428136560799566e-07, "loss": 0.21990721225738524, "step": 231900 }, { "epoch": 0.9956380996539673, "grad_norm": 0.031915098428726196, "learning_rate": 4.385019359623328e-07, "loss": 0.12815059423446656, "step": 231910 }, { "epoch": 0.9956810317439874, "grad_norm": 0.05291389301419258, "learning_rate": 4.3419021584470915e-07, "loss": 0.1420881152153015, "step": 231920 }, { "epoch": 0.9957239638340074, "grad_norm": 0.519956111907959, "learning_rate": 4.298784957270854e-07, "loss": 0.014936311542987824, "step": 231930 }, { "epoch": 0.9957668959240273, "grad_norm": 0.05501473695039749, "learning_rate": 4.255667756094616e-07, "loss": 0.18154823780059814, "step": 231940 }, { "epoch": 0.9958098280140474, "grad_norm": 0.0032672970555722713, "learning_rate": 4.2125505549183795e-07, "loss": 0.12591198682785035, "step": 231950 }, { "epoch": 0.9958527601040674, "grad_norm": 1.8218486309051514, "learning_rate": 4.169433353742142e-07, "loss": 0.18262457847595215, "step": 231960 }, { "epoch": 0.9958956921940874, "grad_norm": 0.014850892126560211, "learning_rate": 4.126316152565905e-07, "loss": 0.241205096244812, "step": 231970 }, { "epoch": 0.9959386242841074, "grad_norm": 0.00398431159555912, "learning_rate": 4.0831989513896675e-07, "loss": 0.22642951011657714, "step": 231980 }, { "epoch": 0.9959815563741274, "grad_norm": 0.25197991728782654, "learning_rate": 4.040081750213431e-07, "loss": 0.17500852346420287, "step": 231990 }, { "epoch": 0.9960244884641474, "grad_norm": 0.0007317436393350363, "learning_rate": 3.996964549037193e-07, "loss": 0.2329272985458374, "step": 232000 }, { "epoch": 0.9960244884641474, "eval_loss": 0.36665475368499756, "eval_runtime": 27.639, "eval_samples_per_second": 3.618, "eval_steps_per_second": 3.618, "step": 232000 }, { "epoch": 0.9960674205541674, "grad_norm": 1.9117028713226318, "learning_rate": 3.9538473478609555e-07, "loss": 0.25112009048461914, "step": 232010 }, { "epoch": 0.9961103526441875, "grad_norm": 0.007310639601200819, "learning_rate": 3.910730146684719e-07, "loss": 0.11872807741165162, "step": 232020 }, { "epoch": 0.9961532847342074, "grad_norm": 1.4803849458694458, "learning_rate": 3.8676129455084817e-07, "loss": 0.15340189933776854, "step": 232030 }, { "epoch": 0.9961962168242274, "grad_norm": 2.3438141345977783, "learning_rate": 3.824495744332244e-07, "loss": 0.12148548364639282, "step": 232040 }, { "epoch": 0.9962391489142475, "grad_norm": 1.9131057262420654, "learning_rate": 3.781378543156007e-07, "loss": 0.2916119575500488, "step": 232050 }, { "epoch": 0.9962820810042674, "grad_norm": 2.943357467651367, "learning_rate": 3.7382613419797697e-07, "loss": 0.12406005859375, "step": 232060 }, { "epoch": 0.9963250130942874, "grad_norm": 0.8212069272994995, "learning_rate": 3.695144140803532e-07, "loss": 0.09711835384368897, "step": 232070 }, { "epoch": 0.9963679451843075, "grad_norm": 0.00522095849737525, "learning_rate": 3.652026939627295e-07, "loss": 0.4548477649688721, "step": 232080 }, { "epoch": 0.9964108772743274, "grad_norm": 7.26854944229126, "learning_rate": 3.6089097384510577e-07, "loss": 0.2613101005554199, "step": 232090 }, { "epoch": 0.9964538093643475, "grad_norm": 0.9109877347946167, "learning_rate": 3.5657925372748205e-07, "loss": 0.2800257444381714, "step": 232100 }, { "epoch": 0.9964967414543675, "grad_norm": 1.436590313911438, "learning_rate": 3.5226753360985834e-07, "loss": 0.1499798536300659, "step": 232110 }, { "epoch": 0.9965396735443874, "grad_norm": 0.006864710710942745, "learning_rate": 3.479558134922346e-07, "loss": 0.196934974193573, "step": 232120 }, { "epoch": 0.9965826056344075, "grad_norm": 0.13871745765209198, "learning_rate": 3.436440933746109e-07, "loss": 0.1414160370826721, "step": 232130 }, { "epoch": 0.9966255377244275, "grad_norm": 0.7631210684776306, "learning_rate": 3.3933237325698714e-07, "loss": 0.2055798053741455, "step": 232140 }, { "epoch": 0.9966684698144475, "grad_norm": 1.24130380153656, "learning_rate": 3.350206531393634e-07, "loss": 0.16898894309997559, "step": 232150 }, { "epoch": 0.9967114019044675, "grad_norm": 1.4094983339309692, "learning_rate": 3.307089330217397e-07, "loss": 0.18343677520751953, "step": 232160 }, { "epoch": 0.9967543339944875, "grad_norm": 0.2644950747489929, "learning_rate": 3.26397212904116e-07, "loss": 0.1670290470123291, "step": 232170 }, { "epoch": 0.9967972660845075, "grad_norm": 1.0128084421157837, "learning_rate": 3.2208549278649227e-07, "loss": 0.17235329151153564, "step": 232180 }, { "epoch": 0.9968401981745275, "grad_norm": 21.385019302368164, "learning_rate": 3.1777377266886856e-07, "loss": 0.2850952625274658, "step": 232190 }, { "epoch": 0.9968831302645476, "grad_norm": 0.0020975386723876, "learning_rate": 3.1346205255124484e-07, "loss": 0.24136075973510743, "step": 232200 }, { "epoch": 0.9969260623545676, "grad_norm": 2.3244147300720215, "learning_rate": 3.091503324336211e-07, "loss": 0.1292970895767212, "step": 232210 }, { "epoch": 0.9969689944445875, "grad_norm": 0.013622512109577656, "learning_rate": 3.0483861231599736e-07, "loss": 0.2299511432647705, "step": 232220 }, { "epoch": 0.9970119265346076, "grad_norm": 0.010262306779623032, "learning_rate": 3.0052689219837364e-07, "loss": 0.3331043004989624, "step": 232230 }, { "epoch": 0.9970548586246276, "grad_norm": 0.4883683919906616, "learning_rate": 2.962151720807499e-07, "loss": 0.11569994688034058, "step": 232240 }, { "epoch": 0.9970977907146475, "grad_norm": 1.1489534378051758, "learning_rate": 2.9190345196312616e-07, "loss": 0.2363152265548706, "step": 232250 }, { "epoch": 0.9971407228046676, "grad_norm": 0.018088897690176964, "learning_rate": 2.8759173184550244e-07, "loss": 0.27835333347320557, "step": 232260 }, { "epoch": 0.9971836548946876, "grad_norm": 0.0005512001807801425, "learning_rate": 2.832800117278787e-07, "loss": 0.14523568153381347, "step": 232270 }, { "epoch": 0.9972265869847076, "grad_norm": 2.278998851776123, "learning_rate": 2.78968291610255e-07, "loss": 0.04154463410377503, "step": 232280 }, { "epoch": 0.9972695190747276, "grad_norm": 0.003172652330249548, "learning_rate": 2.746565714926313e-07, "loss": 0.12838269472122193, "step": 232290 }, { "epoch": 0.9973124511647476, "grad_norm": 2.7414071559906006, "learning_rate": 2.703448513750075e-07, "loss": 0.2711420297622681, "step": 232300 }, { "epoch": 0.9973553832547676, "grad_norm": 0.043592531234025955, "learning_rate": 2.660331312573838e-07, "loss": 0.2562830448150635, "step": 232310 }, { "epoch": 0.9973983153447876, "grad_norm": 0.039814338088035583, "learning_rate": 2.617214111397601e-07, "loss": 0.1167643666267395, "step": 232320 }, { "epoch": 0.9974412474348077, "grad_norm": 0.021249786019325256, "learning_rate": 2.574096910221364e-07, "loss": 0.14535123109817505, "step": 232330 }, { "epoch": 0.9974841795248276, "grad_norm": 0.017002159729599953, "learning_rate": 2.5309797090451266e-07, "loss": 0.1427559733390808, "step": 232340 }, { "epoch": 0.9975271116148476, "grad_norm": 0.10289645195007324, "learning_rate": 2.4878625078688894e-07, "loss": 0.3648236751556396, "step": 232350 }, { "epoch": 0.9975700437048677, "grad_norm": 0.5845692753791809, "learning_rate": 2.4447453066926523e-07, "loss": 0.13263989686965943, "step": 232360 }, { "epoch": 0.9976129757948876, "grad_norm": 0.176754891872406, "learning_rate": 2.401628105516415e-07, "loss": 0.09868491888046264, "step": 232370 }, { "epoch": 0.9976559078849077, "grad_norm": 1.6485669612884521, "learning_rate": 2.3585109043401774e-07, "loss": 0.36487441062927245, "step": 232380 }, { "epoch": 0.9976988399749277, "grad_norm": 0.001132033416070044, "learning_rate": 2.3153937031639403e-07, "loss": 0.24000678062438965, "step": 232390 }, { "epoch": 0.9977417720649476, "grad_norm": 0.039538487792015076, "learning_rate": 2.2722765019877029e-07, "loss": 0.19298700094223023, "step": 232400 }, { "epoch": 0.9977847041549677, "grad_norm": 0.7006238102912903, "learning_rate": 2.2291593008114657e-07, "loss": 0.10505164861679077, "step": 232410 }, { "epoch": 0.9978276362449877, "grad_norm": 0.03369970619678497, "learning_rate": 2.1860420996352285e-07, "loss": 0.22971062660217284, "step": 232420 }, { "epoch": 0.9978705683350076, "grad_norm": 0.20301634073257446, "learning_rate": 2.1429248984589914e-07, "loss": 0.32322068214416505, "step": 232430 }, { "epoch": 0.9979135004250277, "grad_norm": 0.1694352924823761, "learning_rate": 2.0998076972827542e-07, "loss": 0.27818710803985597, "step": 232440 }, { "epoch": 0.9979564325150477, "grad_norm": 0.4383462071418762, "learning_rate": 2.056690496106517e-07, "loss": 0.08511244058609009, "step": 232450 }, { "epoch": 0.9979993646050677, "grad_norm": 0.9433944821357727, "learning_rate": 2.01357329493028e-07, "loss": 0.06735751032829285, "step": 232460 }, { "epoch": 0.9980422966950877, "grad_norm": 0.004472394939512014, "learning_rate": 1.9704560937540422e-07, "loss": 0.06144242286682129, "step": 232470 }, { "epoch": 0.9980852287851077, "grad_norm": 0.040545202791690826, "learning_rate": 1.927338892577805e-07, "loss": 0.1260249972343445, "step": 232480 }, { "epoch": 0.9981281608751277, "grad_norm": 0.5861721038818359, "learning_rate": 1.8842216914015676e-07, "loss": 0.164767849445343, "step": 232490 }, { "epoch": 0.9981710929651477, "grad_norm": 1.4621071815490723, "learning_rate": 1.8411044902253305e-07, "loss": 0.16802266836166382, "step": 232500 }, { "epoch": 0.9982140250551678, "grad_norm": 0.0011348744155839086, "learning_rate": 1.7979872890490933e-07, "loss": 0.312008261680603, "step": 232510 }, { "epoch": 0.9982569571451877, "grad_norm": 7.14293098449707, "learning_rate": 1.7548700878728562e-07, "loss": 0.4737683296203613, "step": 232520 }, { "epoch": 0.9982998892352077, "grad_norm": 0.03787514939904213, "learning_rate": 1.7117528866966187e-07, "loss": 0.19208227396011351, "step": 232530 }, { "epoch": 0.9983428213252278, "grad_norm": 0.021616164594888687, "learning_rate": 1.6686356855203816e-07, "loss": 0.3106940269470215, "step": 232540 }, { "epoch": 0.9983857534152477, "grad_norm": 0.21478670835494995, "learning_rate": 1.6255184843441444e-07, "loss": 0.1717553734779358, "step": 232550 }, { "epoch": 0.9984286855052678, "grad_norm": 0.005685772746801376, "learning_rate": 1.5824012831679073e-07, "loss": 0.13391354084014892, "step": 232560 }, { "epoch": 0.9984716175952878, "grad_norm": 2.9434475898742676, "learning_rate": 1.5392840819916698e-07, "loss": 0.19125187397003174, "step": 232570 }, { "epoch": 0.9985145496853077, "grad_norm": 0.04271689057350159, "learning_rate": 1.4961668808154324e-07, "loss": 0.19992996454238893, "step": 232580 }, { "epoch": 0.9985574817753278, "grad_norm": 0.047845933586359024, "learning_rate": 1.4530496796391953e-07, "loss": 0.08684280514717102, "step": 232590 }, { "epoch": 0.9986004138653478, "grad_norm": 2.6441221237182617, "learning_rate": 1.409932478462958e-07, "loss": 0.2548548698425293, "step": 232600 }, { "epoch": 0.9986433459553677, "grad_norm": 0.08814195543527603, "learning_rate": 1.3668152772867207e-07, "loss": 0.09096928238868714, "step": 232610 }, { "epoch": 0.9986862780453878, "grad_norm": 0.9759665131568909, "learning_rate": 1.3236980761104835e-07, "loss": 0.058413803577423096, "step": 232620 }, { "epoch": 0.9987292101354078, "grad_norm": 1.652724027633667, "learning_rate": 1.2805808749342464e-07, "loss": 0.41336398124694823, "step": 232630 }, { "epoch": 0.9987721422254279, "grad_norm": 0.04262397810816765, "learning_rate": 1.2374636737580092e-07, "loss": 0.2795504093170166, "step": 232640 }, { "epoch": 0.9988150743154478, "grad_norm": 0.0015567491063848138, "learning_rate": 1.1943464725817718e-07, "loss": 0.15489885807037354, "step": 232650 }, { "epoch": 0.9988580064054678, "grad_norm": 0.030675673857331276, "learning_rate": 1.1512292714055345e-07, "loss": 0.1793353796005249, "step": 232660 }, { "epoch": 0.9989009384954879, "grad_norm": 1.6193761825561523, "learning_rate": 1.1081120702292973e-07, "loss": 0.20298526287078858, "step": 232670 }, { "epoch": 0.9989438705855078, "grad_norm": 0.36367109417915344, "learning_rate": 1.0649948690530602e-07, "loss": 0.0037272989749908446, "step": 232680 }, { "epoch": 0.9989868026755279, "grad_norm": 2.131378650665283, "learning_rate": 1.0218776678768229e-07, "loss": 0.14402194023132325, "step": 232690 }, { "epoch": 0.9990297347655479, "grad_norm": 1.7471225261688232, "learning_rate": 9.787604667005855e-08, "loss": 0.16321039199829102, "step": 232700 }, { "epoch": 0.9990726668555678, "grad_norm": 1.7944810390472412, "learning_rate": 9.356432655243483e-08, "loss": 0.12686684131622314, "step": 232710 }, { "epoch": 0.9991155989455879, "grad_norm": 3.471855401992798, "learning_rate": 8.925260643481111e-08, "loss": 0.16587791442871094, "step": 232720 }, { "epoch": 0.9991585310356079, "grad_norm": 0.028937939554452896, "learning_rate": 8.494088631718738e-08, "loss": 0.2687675952911377, "step": 232730 }, { "epoch": 0.9992014631256279, "grad_norm": 6.40365743637085, "learning_rate": 8.062916619956366e-08, "loss": 0.36156454086303713, "step": 232740 }, { "epoch": 0.9992443952156479, "grad_norm": 0.044099632650613785, "learning_rate": 7.631744608193993e-08, "loss": 0.2157670736312866, "step": 232750 }, { "epoch": 0.9992873273056679, "grad_norm": 0.016408240422606468, "learning_rate": 7.200572596431621e-08, "loss": 0.08841435313224792, "step": 232760 }, { "epoch": 0.9993302593956879, "grad_norm": 2.911112070083618, "learning_rate": 6.769400584669248e-08, "loss": 0.061773651838302614, "step": 232770 }, { "epoch": 0.9993731914857079, "grad_norm": 0.005322351586073637, "learning_rate": 6.338228572906877e-08, "loss": 0.16446354389190673, "step": 232780 }, { "epoch": 0.999416123575728, "grad_norm": 1.429836630821228, "learning_rate": 5.907056561144503e-08, "loss": 0.1530466079711914, "step": 232790 }, { "epoch": 0.9994590556657479, "grad_norm": 0.12732602655887604, "learning_rate": 5.475884549382131e-08, "loss": 0.2938541412353516, "step": 232800 }, { "epoch": 0.9995019877557679, "grad_norm": 0.004034379031509161, "learning_rate": 5.044712537619758e-08, "loss": 0.22802155017852782, "step": 232810 }, { "epoch": 0.999544919845788, "grad_norm": 0.01561362762004137, "learning_rate": 4.613540525857386e-08, "loss": 0.1871453642845154, "step": 232820 }, { "epoch": 0.9995878519358079, "grad_norm": 0.09907402098178864, "learning_rate": 4.1823685140950134e-08, "loss": 0.15518252849578856, "step": 232830 }, { "epoch": 0.999630784025828, "grad_norm": 0.3256697654724121, "learning_rate": 3.751196502332641e-08, "loss": 0.34419205188751223, "step": 232840 }, { "epoch": 0.999673716115848, "grad_norm": 0.03469710424542427, "learning_rate": 3.320024490570268e-08, "loss": 0.13642860651016236, "step": 232850 }, { "epoch": 0.9997166482058679, "grad_norm": 0.0023571979254484177, "learning_rate": 2.888852478807896e-08, "loss": 0.2521304368972778, "step": 232860 }, { "epoch": 0.999759580295888, "grad_norm": 0.0008532292558811605, "learning_rate": 2.4576804670455234e-08, "loss": 0.34290714263916017, "step": 232870 }, { "epoch": 0.999802512385908, "grad_norm": 0.10287713259458542, "learning_rate": 2.0265084552831505e-08, "loss": 0.23883533477783203, "step": 232880 }, { "epoch": 0.9998454444759279, "grad_norm": 8.222575187683105, "learning_rate": 1.5953364435207782e-08, "loss": 0.26848292350769043, "step": 232890 }, { "epoch": 0.999888376565948, "grad_norm": 1.8974652290344238, "learning_rate": 1.1641644317584057e-08, "loss": 0.2564098596572876, "step": 232900 }, { "epoch": 0.999931308655968, "grad_norm": 0.014630243182182312, "learning_rate": 7.329924199960332e-09, "loss": 0.2814777851104736, "step": 232910 }, { "epoch": 0.999974240745988, "grad_norm": 0.0010633636265993118, "learning_rate": 3.0182040823366075e-09, "loss": 0.13390289545059203, "step": 232920 } ], "logging_steps": 10, "max_steps": 232926, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5803244156712313e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }