| { | |
| "best_metric": 0.080692358314991, | |
| "best_model_checkpoint": "autotrain-sec4/checkpoint-10605", | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 10605, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.007072135785007072, | |
| "grad_norm": 75.34838104248047, | |
| "learning_rate": 6.504524886877829e-07, | |
| "loss": 3.7557, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.014144271570014143, | |
| "grad_norm": 15.420210838317871, | |
| "learning_rate": 1.3574660633484164e-06, | |
| "loss": 2.4958, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.021216407355021217, | |
| "grad_norm": 4.017354965209961, | |
| "learning_rate": 2.0361990950226245e-06, | |
| "loss": 1.1167, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.028288543140028287, | |
| "grad_norm": 3.0722529888153076, | |
| "learning_rate": 2.743212669683258e-06, | |
| "loss": 0.7577, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03536067892503536, | |
| "grad_norm": 4.105304718017578, | |
| "learning_rate": 3.450226244343892e-06, | |
| "loss": 0.6, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.042432814710042434, | |
| "grad_norm": 2.796954870223999, | |
| "learning_rate": 4.157239819004525e-06, | |
| "loss": 0.5213, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04950495049504951, | |
| "grad_norm": 2.0742604732513428, | |
| "learning_rate": 4.864253393665159e-06, | |
| "loss": 0.4437, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.056577086280056574, | |
| "grad_norm": 1.6229099035263062, | |
| "learning_rate": 5.5712669683257925e-06, | |
| "loss": 0.3827, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.06364922206506365, | |
| "grad_norm": 2.0883893966674805, | |
| "learning_rate": 6.278280542986425e-06, | |
| "loss": 0.3385, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.07072135785007072, | |
| "grad_norm": 2.5166773796081543, | |
| "learning_rate": 6.985294117647059e-06, | |
| "loss": 0.2949, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.07779349363507779, | |
| "grad_norm": 2.4907472133636475, | |
| "learning_rate": 7.692307692307694e-06, | |
| "loss": 0.2776, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.08486562942008487, | |
| "grad_norm": 2.4089417457580566, | |
| "learning_rate": 8.399321266968327e-06, | |
| "loss": 0.2867, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.09193776520509193, | |
| "grad_norm": 1.0878061056137085, | |
| "learning_rate": 9.10633484162896e-06, | |
| "loss": 0.2567, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.09900990099009901, | |
| "grad_norm": 1.353794813156128, | |
| "learning_rate": 9.813348416289593e-06, | |
| "loss": 0.2377, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10608203677510608, | |
| "grad_norm": 1.3143301010131836, | |
| "learning_rate": 1.0520361990950227e-05, | |
| "loss": 0.2446, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.11315417256011315, | |
| "grad_norm": 1.1061075925827026, | |
| "learning_rate": 1.122737556561086e-05, | |
| "loss": 0.2136, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12022630834512023, | |
| "grad_norm": 4.132507801055908, | |
| "learning_rate": 1.1934389140271494e-05, | |
| "loss": 0.2089, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.1272984441301273, | |
| "grad_norm": 2.015286922454834, | |
| "learning_rate": 1.2641402714932126e-05, | |
| "loss": 0.206, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.13437057991513437, | |
| "grad_norm": 1.3202295303344727, | |
| "learning_rate": 1.3348416289592761e-05, | |
| "loss": 0.2135, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.14144271570014144, | |
| "grad_norm": 1.005441665649414, | |
| "learning_rate": 1.4055429864253395e-05, | |
| "loss": 0.198, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1485148514851485, | |
| "grad_norm": 1.0634604692459106, | |
| "learning_rate": 1.4762443438914029e-05, | |
| "loss": 0.1869, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.15558698727015557, | |
| "grad_norm": 1.5153967142105103, | |
| "learning_rate": 1.5469457013574662e-05, | |
| "loss": 0.1938, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.16265912305516267, | |
| "grad_norm": 1.5015569925308228, | |
| "learning_rate": 1.6176470588235296e-05, | |
| "loss": 0.1916, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.16973125884016974, | |
| "grad_norm": 1.3801521062850952, | |
| "learning_rate": 1.688348416289593e-05, | |
| "loss": 0.1859, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1768033946251768, | |
| "grad_norm": 1.4514083862304688, | |
| "learning_rate": 1.7590497737556563e-05, | |
| "loss": 0.1794, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.18387553041018387, | |
| "grad_norm": 1.1229180097579956, | |
| "learning_rate": 1.8297511312217194e-05, | |
| "loss": 0.181, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.19094766619519093, | |
| "grad_norm": 0.9584263563156128, | |
| "learning_rate": 1.9004524886877827e-05, | |
| "loss": 0.1702, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.19801980198019803, | |
| "grad_norm": 1.5446968078613281, | |
| "learning_rate": 1.971153846153846e-05, | |
| "loss": 0.1655, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2050919377652051, | |
| "grad_norm": 1.180188775062561, | |
| "learning_rate": 2.0418552036199095e-05, | |
| "loss": 0.1783, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.21216407355021216, | |
| "grad_norm": 1.4047558307647705, | |
| "learning_rate": 2.112556561085973e-05, | |
| "loss": 0.1654, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.21923620933521923, | |
| "grad_norm": 1.6405079364776611, | |
| "learning_rate": 2.1832579185520362e-05, | |
| "loss": 0.1676, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.2263083451202263, | |
| "grad_norm": 0.799575924873352, | |
| "learning_rate": 2.2539592760180996e-05, | |
| "loss": 0.1529, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.2333804809052334, | |
| "grad_norm": 1.0612698793411255, | |
| "learning_rate": 2.324660633484163e-05, | |
| "loss": 0.158, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.24045261669024046, | |
| "grad_norm": 0.8970231413841248, | |
| "learning_rate": 2.3953619909502263e-05, | |
| "loss": 0.1556, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.24752475247524752, | |
| "grad_norm": 1.1954729557037354, | |
| "learning_rate": 2.4660633484162897e-05, | |
| "loss": 0.1512, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.2545968882602546, | |
| "grad_norm": 2.2156217098236084, | |
| "learning_rate": 2.536764705882353e-05, | |
| "loss": 0.1528, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.26166902404526166, | |
| "grad_norm": 1.4057120084762573, | |
| "learning_rate": 2.6074660633484164e-05, | |
| "loss": 0.1562, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.26874115983026875, | |
| "grad_norm": 1.9392261505126953, | |
| "learning_rate": 2.6781674208144798e-05, | |
| "loss": 0.1601, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2758132956152758, | |
| "grad_norm": 1.114762306213379, | |
| "learning_rate": 2.7488687782805435e-05, | |
| "loss": 0.1616, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.2828854314002829, | |
| "grad_norm": 0.6488351225852966, | |
| "learning_rate": 2.8195701357466065e-05, | |
| "loss": 0.1564, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28995756718529, | |
| "grad_norm": 0.7562068700790405, | |
| "learning_rate": 2.8902714932126696e-05, | |
| "loss": 0.1447, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.297029702970297, | |
| "grad_norm": 1.5005004405975342, | |
| "learning_rate": 2.9609728506787333e-05, | |
| "loss": 0.1411, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3041018387553041, | |
| "grad_norm": 0.9700536131858826, | |
| "learning_rate": 3.0316742081447963e-05, | |
| "loss": 0.1628, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.31117397454031115, | |
| "grad_norm": 0.9135770797729492, | |
| "learning_rate": 3.10237556561086e-05, | |
| "loss": 0.1429, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.31824611032531824, | |
| "grad_norm": 0.5135190486907959, | |
| "learning_rate": 3.1730769230769234e-05, | |
| "loss": 0.1349, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.32531824611032534, | |
| "grad_norm": 0.7440194487571716, | |
| "learning_rate": 3.243778280542987e-05, | |
| "loss": 0.1511, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3323903818953324, | |
| "grad_norm": 0.774247407913208, | |
| "learning_rate": 3.31447963800905e-05, | |
| "loss": 0.1433, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.33946251768033947, | |
| "grad_norm": 0.9245631098747253, | |
| "learning_rate": 3.3851809954751135e-05, | |
| "loss": 0.136, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3465346534653465, | |
| "grad_norm": 0.5323517918586731, | |
| "learning_rate": 3.455882352941177e-05, | |
| "loss": 0.1402, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.3536067892503536, | |
| "grad_norm": 0.5974646210670471, | |
| "learning_rate": 3.52658371040724e-05, | |
| "loss": 0.139, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3606789250353607, | |
| "grad_norm": 0.7609033584594727, | |
| "learning_rate": 3.5972850678733036e-05, | |
| "loss": 0.1414, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.36775106082036774, | |
| "grad_norm": 0.6598523259162903, | |
| "learning_rate": 3.667986425339366e-05, | |
| "loss": 0.1279, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.37482319660537483, | |
| "grad_norm": 0.646514892578125, | |
| "learning_rate": 3.73868778280543e-05, | |
| "loss": 0.1318, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.38189533239038187, | |
| "grad_norm": 0.6801839470863342, | |
| "learning_rate": 3.809389140271493e-05, | |
| "loss": 0.135, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.38896746817538896, | |
| "grad_norm": 0.43455860018730164, | |
| "learning_rate": 3.880090497737557e-05, | |
| "loss": 0.1229, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.39603960396039606, | |
| "grad_norm": 0.7492242455482483, | |
| "learning_rate": 3.95079185520362e-05, | |
| "loss": 0.1336, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.4031117397454031, | |
| "grad_norm": 0.7798008918762207, | |
| "learning_rate": 4.021493212669684e-05, | |
| "loss": 0.1278, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.4101838755304102, | |
| "grad_norm": 0.5796477794647217, | |
| "learning_rate": 4.0921945701357465e-05, | |
| "loss": 0.1231, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.41725601131541723, | |
| "grad_norm": 0.8398697376251221, | |
| "learning_rate": 4.1628959276018105e-05, | |
| "loss": 0.1305, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.4243281471004243, | |
| "grad_norm": 1.1518373489379883, | |
| "learning_rate": 4.233597285067873e-05, | |
| "loss": 0.1345, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4314002828854314, | |
| "grad_norm": 0.6339196562767029, | |
| "learning_rate": 4.304298642533937e-05, | |
| "loss": 0.1388, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.43847241867043846, | |
| "grad_norm": 0.6023873090744019, | |
| "learning_rate": 4.375e-05, | |
| "loss": 0.1343, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.44554455445544555, | |
| "grad_norm": 0.6470553874969482, | |
| "learning_rate": 4.445701357466063e-05, | |
| "loss": 0.1353, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.4526166902404526, | |
| "grad_norm": 0.6824638843536377, | |
| "learning_rate": 4.516402714932127e-05, | |
| "loss": 0.1233, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.4596888260254597, | |
| "grad_norm": 0.7944373488426208, | |
| "learning_rate": 4.58710407239819e-05, | |
| "loss": 0.1235, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.4667609618104668, | |
| "grad_norm": 0.576715350151062, | |
| "learning_rate": 4.6578054298642534e-05, | |
| "loss": 0.1225, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.4738330975954738, | |
| "grad_norm": 0.5373973846435547, | |
| "learning_rate": 4.728506787330317e-05, | |
| "loss": 0.1379, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.4809052333804809, | |
| "grad_norm": 0.6271287798881531, | |
| "learning_rate": 4.79920814479638e-05, | |
| "loss": 0.1295, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.48797736916548795, | |
| "grad_norm": 0.776996374130249, | |
| "learning_rate": 4.8699095022624435e-05, | |
| "loss": 0.1284, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.49504950495049505, | |
| "grad_norm": 0.7526723742485046, | |
| "learning_rate": 4.940610859728507e-05, | |
| "loss": 0.1215, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5021216407355021, | |
| "grad_norm": 0.5230354070663452, | |
| "learning_rate": 4.99999921989483e-05, | |
| "loss": 0.1198, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.5091937765205092, | |
| "grad_norm": 0.9118756651878357, | |
| "learning_rate": 4.999958995831941e-05, | |
| "loss": 0.134, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5162659123055162, | |
| "grad_norm": 0.7880003452301025, | |
| "learning_rate": 4.9998578271728684e-05, | |
| "loss": 0.1271, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.5233380480905233, | |
| "grad_norm": 0.47186046838760376, | |
| "learning_rate": 4.999695716383928e-05, | |
| "loss": 0.1188, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5304101838755304, | |
| "grad_norm": 0.5088236331939697, | |
| "learning_rate": 4.9994726674170947e-05, | |
| "loss": 0.1179, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.5374823196605375, | |
| "grad_norm": 0.6230493187904358, | |
| "learning_rate": 4.999188685709908e-05, | |
| "loss": 0.1262, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5445544554455446, | |
| "grad_norm": 0.9874520301818848, | |
| "learning_rate": 4.998843778185343e-05, | |
| "loss": 0.1244, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.5516265912305516, | |
| "grad_norm": 0.5979165434837341, | |
| "learning_rate": 4.998437953251637e-05, | |
| "loss": 0.1291, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5586987270155587, | |
| "grad_norm": 0.8219141364097595, | |
| "learning_rate": 4.997971220802088e-05, | |
| "loss": 0.1155, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.5657708628005658, | |
| "grad_norm": 0.41394418478012085, | |
| "learning_rate": 4.997443592214809e-05, | |
| "loss": 0.1198, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5728429985855729, | |
| "grad_norm": 1.253273844718933, | |
| "learning_rate": 4.996855080352457e-05, | |
| "loss": 0.1191, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.57991513437058, | |
| "grad_norm": 0.9971447587013245, | |
| "learning_rate": 4.9962056995619135e-05, | |
| "loss": 0.1328, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5869872701555869, | |
| "grad_norm": 0.49998825788497925, | |
| "learning_rate": 4.995495465673939e-05, | |
| "loss": 0.115, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.594059405940594, | |
| "grad_norm": 0.9231402277946472, | |
| "learning_rate": 4.994724396002783e-05, | |
| "loss": 0.1131, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6011315417256011, | |
| "grad_norm": 0.5812481641769409, | |
| "learning_rate": 4.9938925093457684e-05, | |
| "loss": 0.103, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.6082036775106082, | |
| "grad_norm": 0.602626383304596, | |
| "learning_rate": 4.992999825982824e-05, | |
| "loss": 0.1097, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6152758132956153, | |
| "grad_norm": 0.4413115084171295, | |
| "learning_rate": 4.9920463676759975e-05, | |
| "loss": 0.1081, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.6223479490806223, | |
| "grad_norm": 0.796340823173523, | |
| "learning_rate": 4.991032157668924e-05, | |
| "loss": 0.1134, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6294200848656294, | |
| "grad_norm": 0.8647779822349548, | |
| "learning_rate": 4.989957220686257e-05, | |
| "loss": 0.1169, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.6364922206506365, | |
| "grad_norm": 0.5049439072608948, | |
| "learning_rate": 4.988821582933065e-05, | |
| "loss": 0.1155, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6435643564356436, | |
| "grad_norm": 0.5249194502830505, | |
| "learning_rate": 4.9876252720941974e-05, | |
| "loss": 0.1076, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.6506364922206507, | |
| "grad_norm": 0.9044466018676758, | |
| "learning_rate": 4.986368317333603e-05, | |
| "loss": 0.1171, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6577086280056577, | |
| "grad_norm": 0.6795557141304016, | |
| "learning_rate": 4.985050749293626e-05, | |
| "loss": 0.1099, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.6647807637906648, | |
| "grad_norm": 0.5548400282859802, | |
| "learning_rate": 4.983672600094253e-05, | |
| "loss": 0.1145, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6718528995756718, | |
| "grad_norm": 0.8753097057342529, | |
| "learning_rate": 4.982233903332335e-05, | |
| "loss": 0.1086, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.6789250353606789, | |
| "grad_norm": 0.7015976309776306, | |
| "learning_rate": 4.980734694080763e-05, | |
| "loss": 0.1134, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.685997171145686, | |
| "grad_norm": 0.3631691336631775, | |
| "learning_rate": 4.979175008887619e-05, | |
| "loss": 0.1102, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.693069306930693, | |
| "grad_norm": 0.512153685092926, | |
| "learning_rate": 4.977554885775278e-05, | |
| "loss": 0.1135, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7001414427157001, | |
| "grad_norm": 0.62693190574646, | |
| "learning_rate": 4.975874364239491e-05, | |
| "loss": 0.1104, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.7072135785007072, | |
| "grad_norm": 0.8053261637687683, | |
| "learning_rate": 4.974133485248409e-05, | |
| "loss": 0.1092, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.8354981541633606, | |
| "learning_rate": 4.972332291241597e-05, | |
| "loss": 0.1155, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.7213578500707214, | |
| "grad_norm": 1.4328135251998901, | |
| "learning_rate": 4.97047082612899e-05, | |
| "loss": 0.1071, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7284299858557284, | |
| "grad_norm": 0.5748372077941895, | |
| "learning_rate": 4.96854913528983e-05, | |
| "loss": 0.105, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.7355021216407355, | |
| "grad_norm": 0.5140411257743835, | |
| "learning_rate": 4.966567265571553e-05, | |
| "loss": 0.1076, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7425742574257426, | |
| "grad_norm": 0.42150813341140747, | |
| "learning_rate": 4.9645252652886524e-05, | |
| "loss": 0.1076, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.7496463932107497, | |
| "grad_norm": 0.7284517288208008, | |
| "learning_rate": 4.9624231842214966e-05, | |
| "loss": 0.1147, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7567185289957568, | |
| "grad_norm": 0.3435399830341339, | |
| "learning_rate": 4.960261073615119e-05, | |
| "loss": 0.117, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.7637906647807637, | |
| "grad_norm": 0.5726285576820374, | |
| "learning_rate": 4.9580389861779676e-05, | |
| "loss": 0.1028, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7708628005657708, | |
| "grad_norm": 0.6155900955200195, | |
| "learning_rate": 4.955756976080619e-05, | |
| "loss": 0.1059, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.7779349363507779, | |
| "grad_norm": 0.5489698052406311, | |
| "learning_rate": 4.95341509895446e-05, | |
| "loss": 0.1085, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.785007072135785, | |
| "grad_norm": 0.4485224783420563, | |
| "learning_rate": 4.951013411890329e-05, | |
| "loss": 0.1046, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.7920792079207921, | |
| "grad_norm": 1.4330599308013916, | |
| "learning_rate": 4.9485519734371254e-05, | |
| "loss": 0.1066, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.7991513437057991, | |
| "grad_norm": 0.5611838102340698, | |
| "learning_rate": 4.946030843600382e-05, | |
| "loss": 0.108, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.8062234794908062, | |
| "grad_norm": 0.44172587990760803, | |
| "learning_rate": 4.943450083840804e-05, | |
| "loss": 0.1115, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8132956152758133, | |
| "grad_norm": 0.5805100798606873, | |
| "learning_rate": 4.940809757072767e-05, | |
| "loss": 0.1017, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.8203677510608204, | |
| "grad_norm": 0.42033523321151733, | |
| "learning_rate": 4.938109927662786e-05, | |
| "loss": 0.0987, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8274398868458275, | |
| "grad_norm": 0.519852876663208, | |
| "learning_rate": 4.9353506614279465e-05, | |
| "loss": 0.1085, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.8345120226308345, | |
| "grad_norm": 0.41062799096107483, | |
| "learning_rate": 4.932532025634298e-05, | |
| "loss": 0.1122, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8415841584158416, | |
| "grad_norm": 0.4441488981246948, | |
| "learning_rate": 4.929654088995216e-05, | |
| "loss": 0.1075, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.8486562942008486, | |
| "grad_norm": 0.5983813405036926, | |
| "learning_rate": 4.926716921669724e-05, | |
| "loss": 0.105, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8557284299858557, | |
| "grad_norm": 0.4088096022605896, | |
| "learning_rate": 4.92372059526079e-05, | |
| "loss": 0.1055, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.8628005657708628, | |
| "grad_norm": 0.3722389042377472, | |
| "learning_rate": 4.92066518281357e-05, | |
| "loss": 0.1016, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.8698727015558698, | |
| "grad_norm": 0.3515225946903229, | |
| "learning_rate": 4.9175507588136395e-05, | |
| "loss": 0.1061, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.8769448373408769, | |
| "grad_norm": 0.6641696691513062, | |
| "learning_rate": 4.914377399185167e-05, | |
| "loss": 0.0976, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.884016973125884, | |
| "grad_norm": 1.0824599266052246, | |
| "learning_rate": 4.911145181289072e-05, | |
| "loss": 0.0993, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.8910891089108911, | |
| "grad_norm": 0.9281112551689148, | |
| "learning_rate": 4.90785418392113e-05, | |
| "loss": 0.1049, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.8981612446958982, | |
| "grad_norm": 0.6602789163589478, | |
| "learning_rate": 4.904504487310061e-05, | |
| "loss": 0.1105, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.9052333804809052, | |
| "grad_norm": 0.522657036781311, | |
| "learning_rate": 4.901096173115567e-05, | |
| "loss": 0.098, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9123055162659123, | |
| "grad_norm": 0.38893359899520874, | |
| "learning_rate": 4.897629324426343e-05, | |
| "loss": 0.0992, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.9193776520509194, | |
| "grad_norm": 0.9329886436462402, | |
| "learning_rate": 4.894104025758054e-05, | |
| "loss": 0.1048, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9264497878359265, | |
| "grad_norm": 0.4844260811805725, | |
| "learning_rate": 4.890520363051269e-05, | |
| "loss": 0.1019, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.9335219236209336, | |
| "grad_norm": 0.39830756187438965, | |
| "learning_rate": 4.886878423669373e-05, | |
| "loss": 0.0996, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.9405940594059405, | |
| "grad_norm": 0.3621814548969269, | |
| "learning_rate": 4.8831782963964314e-05, | |
| "loss": 0.0959, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.9476661951909476, | |
| "grad_norm": 0.43947598338127136, | |
| "learning_rate": 4.87942007143503e-05, | |
| "loss": 0.1028, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.9547383309759547, | |
| "grad_norm": 0.4322707951068878, | |
| "learning_rate": 4.87560384040407e-05, | |
| "loss": 0.0957, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.9618104667609618, | |
| "grad_norm": 0.7152990102767944, | |
| "learning_rate": 4.8717296963365446e-05, | |
| "loss": 0.0983, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.9688826025459689, | |
| "grad_norm": 0.36375129222869873, | |
| "learning_rate": 4.8677977336772576e-05, | |
| "loss": 0.0986, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.9759547383309759, | |
| "grad_norm": 0.3541475534439087, | |
| "learning_rate": 4.8638080482805334e-05, | |
| "loss": 0.1039, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.983026874115983, | |
| "grad_norm": 0.5277055501937866, | |
| "learning_rate": 4.8597607374078734e-05, | |
| "loss": 0.1009, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.9900990099009901, | |
| "grad_norm": 0.32760316133499146, | |
| "learning_rate": 4.855655899725587e-05, | |
| "loss": 0.0967, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.9971711456859972, | |
| "grad_norm": 0.4430937170982361, | |
| "learning_rate": 4.851493635302384e-05, | |
| "loss": 0.0959, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9641816937160642, | |
| "eval_f1": 0.855812468110255, | |
| "eval_loss": 0.10302519798278809, | |
| "eval_precision": 0.8459347469557771, | |
| "eval_recall": 0.8659235929015533, | |
| "eval_runtime": 65.6322, | |
| "eval_samples_per_second": 765.966, | |
| "eval_steps_per_second": 3.002, | |
| "step": 3535 | |
| }, | |
| { | |
| "epoch": 1.0042432814710043, | |
| "grad_norm": 0.5774176120758057, | |
| "learning_rate": 4.8472740456069404e-05, | |
| "loss": 0.0883, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.0113154172560113, | |
| "grad_norm": 0.6957634687423706, | |
| "learning_rate": 4.842997233505418e-05, | |
| "loss": 0.0774, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 1.0183875530410185, | |
| "grad_norm": 0.3464818000793457, | |
| "learning_rate": 4.838663303258961e-05, | |
| "loss": 0.0823, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.0254596888260255, | |
| "grad_norm": 0.6941576600074768, | |
| "learning_rate": 4.8342723605211556e-05, | |
| "loss": 0.0826, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 1.0325318246110324, | |
| "grad_norm": 0.44520071148872375, | |
| "learning_rate": 4.829824512335449e-05, | |
| "loss": 0.0867, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.0396039603960396, | |
| "grad_norm": 0.3814884126186371, | |
| "learning_rate": 4.825319867132545e-05, | |
| "loss": 0.0869, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 1.0466760961810466, | |
| "grad_norm": 0.44684168696403503, | |
| "learning_rate": 4.8207585347277574e-05, | |
| "loss": 0.0842, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.0537482319660538, | |
| "grad_norm": 0.7008658051490784, | |
| "learning_rate": 4.816140626318334e-05, | |
| "loss": 0.0864, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 1.0608203677510608, | |
| "grad_norm": 0.45034533739089966, | |
| "learning_rate": 4.8114662544807476e-05, | |
| "loss": 0.0801, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.0678925035360678, | |
| "grad_norm": 0.6562340259552002, | |
| "learning_rate": 4.8067355331679485e-05, | |
| "loss": 0.0814, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 1.074964639321075, | |
| "grad_norm": 0.7696852684020996, | |
| "learning_rate": 4.801948577706588e-05, | |
| "loss": 0.0837, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.082036775106082, | |
| "grad_norm": 0.38005831837654114, | |
| "learning_rate": 4.7971055047942074e-05, | |
| "loss": 0.0836, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 1.0891089108910892, | |
| "grad_norm": 0.5856048464775085, | |
| "learning_rate": 4.792206432496392e-05, | |
| "loss": 0.0776, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.0961810466760962, | |
| "grad_norm": 0.3622016906738281, | |
| "learning_rate": 4.787251480243895e-05, | |
| "loss": 0.084, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 1.1032531824611032, | |
| "grad_norm": 0.3826795816421509, | |
| "learning_rate": 4.782240768829722e-05, | |
| "loss": 0.0822, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.1103253182461104, | |
| "grad_norm": 0.4321337938308716, | |
| "learning_rate": 4.77717442040619e-05, | |
| "loss": 0.0797, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 1.1173974540311173, | |
| "grad_norm": 0.3895370364189148, | |
| "learning_rate": 4.772052558481949e-05, | |
| "loss": 0.0808, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.1244695898161245, | |
| "grad_norm": 0.3963691294193268, | |
| "learning_rate": 4.766875307918969e-05, | |
| "loss": 0.0829, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 1.1315417256011315, | |
| "grad_norm": 1.1154179573059082, | |
| "learning_rate": 4.7616427949294985e-05, | |
| "loss": 0.0864, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.1386138613861387, | |
| "grad_norm": 0.6903427839279175, | |
| "learning_rate": 4.756355147072986e-05, | |
| "loss": 0.0816, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 1.1456859971711457, | |
| "grad_norm": 0.3236379325389862, | |
| "learning_rate": 4.751012493252971e-05, | |
| "loss": 0.085, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.1527581329561527, | |
| "grad_norm": 0.31708064675331116, | |
| "learning_rate": 4.745614963713941e-05, | |
| "loss": 0.0824, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 1.15983026874116, | |
| "grad_norm": 0.47363677620887756, | |
| "learning_rate": 4.740162690038159e-05, | |
| "loss": 0.08, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.166902404526167, | |
| "grad_norm": 0.5583330988883972, | |
| "learning_rate": 4.7346558051424505e-05, | |
| "loss": 0.0864, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 1.1739745403111739, | |
| "grad_norm": 0.4270670413970947, | |
| "learning_rate": 4.729094443274969e-05, | |
| "loss": 0.0774, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.181046676096181, | |
| "grad_norm": 0.4069669544696808, | |
| "learning_rate": 4.7234787400119176e-05, | |
| "loss": 0.0856, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 1.188118811881188, | |
| "grad_norm": 0.3747076988220215, | |
| "learning_rate": 4.717808832254251e-05, | |
| "loss": 0.09, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.1951909476661953, | |
| "grad_norm": 0.45713725686073303, | |
| "learning_rate": 4.71208485822433e-05, | |
| "loss": 0.0758, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 1.2022630834512023, | |
| "grad_norm": 0.41270920634269714, | |
| "learning_rate": 4.7063069574625595e-05, | |
| "loss": 0.0849, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.2093352192362095, | |
| "grad_norm": 0.7367244362831116, | |
| "learning_rate": 4.70047527082398e-05, | |
| "loss": 0.0938, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 1.2164073550212164, | |
| "grad_norm": 0.8561830520629883, | |
| "learning_rate": 4.6945899404748376e-05, | |
| "loss": 0.0822, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.2234794908062234, | |
| "grad_norm": 0.8913972973823547, | |
| "learning_rate": 4.6886511098891194e-05, | |
| "loss": 0.0898, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 1.2305516265912306, | |
| "grad_norm": 2.0065510272979736, | |
| "learning_rate": 4.682658923845052e-05, | |
| "loss": 0.0784, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.2376237623762376, | |
| "grad_norm": 0.36617517471313477, | |
| "learning_rate": 4.676613528421575e-05, | |
| "loss": 0.0775, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 1.2446958981612446, | |
| "grad_norm": 0.3758735954761505, | |
| "learning_rate": 4.67051507099478e-05, | |
| "loss": 0.081, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.2517680339462518, | |
| "grad_norm": 0.4398355782032013, | |
| "learning_rate": 4.664363700234312e-05, | |
| "loss": 0.0847, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 1.2588401697312588, | |
| "grad_norm": 0.4554755687713623, | |
| "learning_rate": 4.658159566099757e-05, | |
| "loss": 0.0828, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.265912305516266, | |
| "grad_norm": 0.3456956446170807, | |
| "learning_rate": 4.651902819836974e-05, | |
| "loss": 0.0825, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 1.272984441301273, | |
| "grad_norm": 0.38322913646698, | |
| "learning_rate": 4.645593613974415e-05, | |
| "loss": 0.0812, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.2800565770862802, | |
| "grad_norm": 0.5935035943984985, | |
| "learning_rate": 4.639232102319403e-05, | |
| "loss": 0.0813, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 1.2871287128712872, | |
| "grad_norm": 0.39350178837776184, | |
| "learning_rate": 4.632818439954386e-05, | |
| "loss": 0.0828, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.2942008486562941, | |
| "grad_norm": 0.36052772402763367, | |
| "learning_rate": 4.626352783233154e-05, | |
| "loss": 0.0826, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 1.3012729844413014, | |
| "grad_norm": 0.3978725075721741, | |
| "learning_rate": 4.619835289777027e-05, | |
| "loss": 0.0831, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.3083451202263083, | |
| "grad_norm": 0.4236008822917938, | |
| "learning_rate": 4.6132661184710137e-05, | |
| "loss": 0.0895, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 1.3154172560113153, | |
| "grad_norm": 0.4994755983352661, | |
| "learning_rate": 4.606645429459937e-05, | |
| "loss": 0.0882, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.3224893917963225, | |
| "grad_norm": 1.0472633838653564, | |
| "learning_rate": 4.5999733841445325e-05, | |
| "loss": 0.0751, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 1.3295615275813295, | |
| "grad_norm": 0.29904720187187195, | |
| "learning_rate": 4.593250145177511e-05, | |
| "loss": 0.0859, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.3366336633663367, | |
| "grad_norm": 0.3832816779613495, | |
| "learning_rate": 4.5864758764595927e-05, | |
| "loss": 0.0785, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 1.3437057991513437, | |
| "grad_norm": 0.49846336245536804, | |
| "learning_rate": 4.5796507431355174e-05, | |
| "loss": 0.075, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.350777934936351, | |
| "grad_norm": 0.379282146692276, | |
| "learning_rate": 4.5727749115900094e-05, | |
| "loss": 0.0858, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 1.3578500707213579, | |
| "grad_norm": 0.5583311915397644, | |
| "learning_rate": 4.56584854944373e-05, | |
| "loss": 0.0951, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.3649222065063649, | |
| "grad_norm": 0.48655304312705994, | |
| "learning_rate": 4.5588718255491865e-05, | |
| "loss": 0.0884, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 1.371994342291372, | |
| "grad_norm": 0.6549155116081238, | |
| "learning_rate": 4.5518449099866155e-05, | |
| "loss": 0.0858, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.379066478076379, | |
| "grad_norm": 0.3174663186073303, | |
| "learning_rate": 4.5447679740598403e-05, | |
| "loss": 0.0837, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 1.386138613861386, | |
| "grad_norm": 0.36421680450439453, | |
| "learning_rate": 4.537641190292091e-05, | |
| "loss": 0.0841, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.3932107496463932, | |
| "grad_norm": 0.3484024107456207, | |
| "learning_rate": 4.530464732421801e-05, | |
| "loss": 0.0886, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 1.4002828854314002, | |
| "grad_norm": 0.7408681511878967, | |
| "learning_rate": 4.523238775398371e-05, | |
| "loss": 0.0813, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.4073550212164074, | |
| "grad_norm": 0.35419762134552, | |
| "learning_rate": 4.515963495377902e-05, | |
| "loss": 0.0768, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 1.4144271570014144, | |
| "grad_norm": 0.3253655731678009, | |
| "learning_rate": 4.508639069718904e-05, | |
| "loss": 0.0776, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.4214992927864216, | |
| "grad_norm": 0.3978010416030884, | |
| "learning_rate": 4.501265676977974e-05, | |
| "loss": 0.077, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 0.2976684868335724, | |
| "learning_rate": 4.493843496905434e-05, | |
| "loss": 0.0727, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.4356435643564356, | |
| "grad_norm": 0.3433378040790558, | |
| "learning_rate": 4.486372710440961e-05, | |
| "loss": 0.0804, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 1.4427157001414428, | |
| "grad_norm": 0.5507691502571106, | |
| "learning_rate": 4.478853499709167e-05, | |
| "loss": 0.0852, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.4497878359264498, | |
| "grad_norm": 0.36808139085769653, | |
| "learning_rate": 4.471286048015163e-05, | |
| "loss": 0.0832, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 1.4568599717114568, | |
| "grad_norm": 0.3539692163467407, | |
| "learning_rate": 4.46367053984009e-05, | |
| "loss": 0.0817, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.463932107496464, | |
| "grad_norm": 0.8253048658370972, | |
| "learning_rate": 4.4560071608366216e-05, | |
| "loss": 0.0826, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 1.471004243281471, | |
| "grad_norm": 0.4476601779460907, | |
| "learning_rate": 4.448296097824437e-05, | |
| "loss": 0.088, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.4780763790664782, | |
| "grad_norm": 1.5917980670928955, | |
| "learning_rate": 4.440537538785668e-05, | |
| "loss": 0.0828, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 1.4851485148514851, | |
| "grad_norm": 0.6799459457397461, | |
| "learning_rate": 4.432731672860316e-05, | |
| "loss": 0.0786, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.4922206506364923, | |
| "grad_norm": 0.34660467505455017, | |
| "learning_rate": 4.424878690341642e-05, | |
| "loss": 0.0749, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 1.4992927864214993, | |
| "grad_norm": 0.45074212551116943, | |
| "learning_rate": 4.4169787826715225e-05, | |
| "loss": 0.0904, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.5063649222065063, | |
| "grad_norm": 0.41240620613098145, | |
| "learning_rate": 4.4090321424357914e-05, | |
| "loss": 0.078, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 1.5134370579915135, | |
| "grad_norm": 0.3282380998134613, | |
| "learning_rate": 4.401038963359538e-05, | |
| "loss": 0.0835, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.5205091937765205, | |
| "grad_norm": 0.3657848536968231, | |
| "learning_rate": 4.3929994403023865e-05, | |
| "loss": 0.0801, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 1.5275813295615275, | |
| "grad_norm": 0.3543812930583954, | |
| "learning_rate": 4.384913769253746e-05, | |
| "loss": 0.081, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.5346534653465347, | |
| "grad_norm": 0.8204165697097778, | |
| "learning_rate": 4.376782147328031e-05, | |
| "loss": 0.0757, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 1.541725601131542, | |
| "grad_norm": 0.4716034233570099, | |
| "learning_rate": 4.368604772759861e-05, | |
| "loss": 0.0818, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.5487977369165487, | |
| "grad_norm": 0.3865686357021332, | |
| "learning_rate": 4.360381844899221e-05, | |
| "loss": 0.076, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 1.5558698727015559, | |
| "grad_norm": 0.5982063412666321, | |
| "learning_rate": 4.352113564206606e-05, | |
| "loss": 0.0861, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.562942008486563, | |
| "grad_norm": 0.4285977780818939, | |
| "learning_rate": 4.343800132248132e-05, | |
| "loss": 0.0767, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 1.57001414427157, | |
| "grad_norm": 0.542254626750946, | |
| "learning_rate": 4.3354417516906274e-05, | |
| "loss": 0.0814, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.577086280056577, | |
| "grad_norm": 0.273899644613266, | |
| "learning_rate": 4.327038626296682e-05, | |
| "loss": 0.0759, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 1.5841584158415842, | |
| "grad_norm": 0.3528795540332794, | |
| "learning_rate": 4.318590960919692e-05, | |
| "loss": 0.0785, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.5912305516265912, | |
| "grad_norm": 0.4583662748336792, | |
| "learning_rate": 4.3100989614988566e-05, | |
| "loss": 0.0831, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 1.5983026874115982, | |
| "grad_norm": 0.4005572497844696, | |
| "learning_rate": 4.3015628350541595e-05, | |
| "loss": 0.0781, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.6053748231966054, | |
| "grad_norm": 0.3729841411113739, | |
| "learning_rate": 4.2929827896813274e-05, | |
| "loss": 0.0849, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 1.6124469589816126, | |
| "grad_norm": 0.37740039825439453, | |
| "learning_rate": 4.284359034546751e-05, | |
| "loss": 0.0819, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.6195190947666194, | |
| "grad_norm": 0.45638662576675415, | |
| "learning_rate": 4.2756917798823874e-05, | |
| "loss": 0.0749, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 1.6265912305516266, | |
| "grad_norm": 0.3588665723800659, | |
| "learning_rate": 4.2669812369806374e-05, | |
| "loss": 0.0715, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.6336633663366338, | |
| "grad_norm": 0.3274244964122772, | |
| "learning_rate": 4.258227618189191e-05, | |
| "loss": 0.0707, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 1.6407355021216408, | |
| "grad_norm": 0.36558204889297485, | |
| "learning_rate": 4.249431136905853e-05, | |
| "loss": 0.0732, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.6478076379066477, | |
| "grad_norm": 0.32020628452301025, | |
| "learning_rate": 4.240592007573341e-05, | |
| "loss": 0.0787, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 1.654879773691655, | |
| "grad_norm": 0.4697955846786499, | |
| "learning_rate": 4.231710445674056e-05, | |
| "loss": 0.0707, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.661951909476662, | |
| "grad_norm": 0.3986113965511322, | |
| "learning_rate": 4.222786667724832e-05, | |
| "loss": 0.0746, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 1.669024045261669, | |
| "grad_norm": 0.35733580589294434, | |
| "learning_rate": 4.213820891271652e-05, | |
| "loss": 0.0787, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.6760961810466761, | |
| "grad_norm": 0.6812814474105835, | |
| "learning_rate": 4.204813334884353e-05, | |
| "loss": 0.0921, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 1.6831683168316833, | |
| "grad_norm": 0.3964853584766388, | |
| "learning_rate": 4.19576421815129e-05, | |
| "loss": 0.0786, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.69024045261669, | |
| "grad_norm": 0.4422667324542999, | |
| "learning_rate": 4.186673761673989e-05, | |
| "loss": 0.0752, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 1.6973125884016973, | |
| "grad_norm": 0.5387901663780212, | |
| "learning_rate": 4.177542187061763e-05, | |
| "loss": 0.0829, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.7043847241867045, | |
| "grad_norm": 0.30730950832366943, | |
| "learning_rate": 4.168369716926315e-05, | |
| "loss": 0.0782, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 1.7114568599717115, | |
| "grad_norm": 0.3543797731399536, | |
| "learning_rate": 4.159156574876306e-05, | |
| "loss": 0.0842, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.7185289957567185, | |
| "grad_norm": 0.44837215542793274, | |
| "learning_rate": 4.1499029855119106e-05, | |
| "loss": 0.0754, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 1.7256011315417257, | |
| "grad_norm": 0.4181993007659912, | |
| "learning_rate": 4.1406091744193354e-05, | |
| "loss": 0.0833, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.7326732673267327, | |
| "grad_norm": 0.47836604714393616, | |
| "learning_rate": 4.1312753681653213e-05, | |
| "loss": 0.0819, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 1.7397454031117396, | |
| "grad_norm": 0.42279157042503357, | |
| "learning_rate": 4.121901794291622e-05, | |
| "loss": 0.0748, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.7468175388967468, | |
| "grad_norm": 0.40630263090133667, | |
| "learning_rate": 4.112488681309455e-05, | |
| "loss": 0.0812, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 1.753889674681754, | |
| "grad_norm": 0.414064884185791, | |
| "learning_rate": 4.103036258693932e-05, | |
| "loss": 0.0815, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.7609618104667608, | |
| "grad_norm": 0.3953958749771118, | |
| "learning_rate": 4.0935447568784646e-05, | |
| "loss": 0.0839, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 1.768033946251768, | |
| "grad_norm": 0.4421181082725525, | |
| "learning_rate": 4.084014407249145e-05, | |
| "loss": 0.0784, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.7751060820367752, | |
| "grad_norm": 0.6101165413856506, | |
| "learning_rate": 4.0744454421391095e-05, | |
| "loss": 0.084, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 1.7821782178217822, | |
| "grad_norm": 0.8179222941398621, | |
| "learning_rate": 4.064838094822868e-05, | |
| "loss": 0.0792, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.7892503536067892, | |
| "grad_norm": 0.40432754158973694, | |
| "learning_rate": 4.055192599510624e-05, | |
| "loss": 0.0769, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 1.7963224893917964, | |
| "grad_norm": 0.36589494347572327, | |
| "learning_rate": 4.0455091913425606e-05, | |
| "loss": 0.0783, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.8033946251768034, | |
| "grad_norm": 0.3545513451099396, | |
| "learning_rate": 4.0357881063831105e-05, | |
| "loss": 0.0723, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 1.8104667609618104, | |
| "grad_norm": 0.5380228161811829, | |
| "learning_rate": 4.026029581615198e-05, | |
| "loss": 0.0771, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.8175388967468176, | |
| "grad_norm": 0.6094365119934082, | |
| "learning_rate": 4.016233854934468e-05, | |
| "loss": 0.084, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 1.8246110325318248, | |
| "grad_norm": 0.6324317455291748, | |
| "learning_rate": 4.0064011651434776e-05, | |
| "loss": 0.0794, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.8316831683168315, | |
| "grad_norm": 0.2819110155105591, | |
| "learning_rate": 3.996531751945883e-05, | |
| "loss": 0.0853, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 1.8387553041018387, | |
| "grad_norm": 0.3535131514072418, | |
| "learning_rate": 3.986625855940591e-05, | |
| "loss": 0.0712, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.845827439886846, | |
| "grad_norm": 0.32027262449264526, | |
| "learning_rate": 3.9766837186158944e-05, | |
| "loss": 0.0751, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 1.852899575671853, | |
| "grad_norm": 0.4550008475780487, | |
| "learning_rate": 3.9667055823435885e-05, | |
| "loss": 0.0744, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.85997171145686, | |
| "grad_norm": 0.30715903639793396, | |
| "learning_rate": 3.956691690373055e-05, | |
| "loss": 0.0745, | |
| "step": 6575 | |
| }, | |
| { | |
| "epoch": 1.8670438472418671, | |
| "grad_norm": 0.5177515745162964, | |
| "learning_rate": 3.946642286825339e-05, | |
| "loss": 0.0715, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.874115983026874, | |
| "grad_norm": 0.5210005640983582, | |
| "learning_rate": 3.9365576166871964e-05, | |
| "loss": 0.0696, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 1.881188118811881, | |
| "grad_norm": 0.28931137919425964, | |
| "learning_rate": 3.926437925805118e-05, | |
| "loss": 0.0749, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.8882602545968883, | |
| "grad_norm": 0.5680564641952515, | |
| "learning_rate": 3.916283460879341e-05, | |
| "loss": 0.074, | |
| "step": 6675 | |
| }, | |
| { | |
| "epoch": 1.8953323903818955, | |
| "grad_norm": 0.44310295581817627, | |
| "learning_rate": 3.90609446945783e-05, | |
| "loss": 0.0805, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.9024045261669023, | |
| "grad_norm": 0.6425299644470215, | |
| "learning_rate": 3.8958711999302475e-05, | |
| "loss": 0.0842, | |
| "step": 6725 | |
| }, | |
| { | |
| "epoch": 1.9094766619519095, | |
| "grad_norm": 0.300752729177475, | |
| "learning_rate": 3.885613901521893e-05, | |
| "loss": 0.0698, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.9165487977369167, | |
| "grad_norm": 0.43727684020996094, | |
| "learning_rate": 3.875322824287633e-05, | |
| "loss": 0.0767, | |
| "step": 6775 | |
| }, | |
| { | |
| "epoch": 1.9236209335219236, | |
| "grad_norm": 0.41746985912323, | |
| "learning_rate": 3.8649982191058e-05, | |
| "loss": 0.0795, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.9306930693069306, | |
| "grad_norm": 0.35224172472953796, | |
| "learning_rate": 3.8546403376720786e-05, | |
| "loss": 0.0765, | |
| "step": 6825 | |
| }, | |
| { | |
| "epoch": 1.9377652050919378, | |
| "grad_norm": 0.295906662940979, | |
| "learning_rate": 3.8442494324933724e-05, | |
| "loss": 0.076, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.9448373408769448, | |
| "grad_norm": 0.2558325529098511, | |
| "learning_rate": 3.833825756881645e-05, | |
| "loss": 0.0807, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 1.9519094766619518, | |
| "grad_norm": 0.3434934616088867, | |
| "learning_rate": 3.8233695649477426e-05, | |
| "loss": 0.0686, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.958981612446959, | |
| "grad_norm": 0.23843282461166382, | |
| "learning_rate": 3.8128811115952044e-05, | |
| "loss": 0.0795, | |
| "step": 6925 | |
| }, | |
| { | |
| "epoch": 1.9660537482319662, | |
| "grad_norm": 0.7551613450050354, | |
| "learning_rate": 3.802360652514047e-05, | |
| "loss": 0.0681, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.973125884016973, | |
| "grad_norm": 0.4647892713546753, | |
| "learning_rate": 3.7918084441745275e-05, | |
| "loss": 0.0798, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 1.9801980198019802, | |
| "grad_norm": 0.3910640478134155, | |
| "learning_rate": 3.781224743820897e-05, | |
| "loss": 0.077, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.9872701555869874, | |
| "grad_norm": 0.3788031339645386, | |
| "learning_rate": 3.770609809465124e-05, | |
| "loss": 0.0805, | |
| "step": 7025 | |
| }, | |
| { | |
| "epoch": 1.9943422913719944, | |
| "grad_norm": 0.6797524690628052, | |
| "learning_rate": 3.759963899880609e-05, | |
| "loss": 0.0726, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9707863671603164, | |
| "eval_f1": 0.8914057811793163, | |
| "eval_loss": 0.08405517786741257, | |
| "eval_precision": 0.8863308401537767, | |
| "eval_recall": 0.8965391729373575, | |
| "eval_runtime": 54.724, | |
| "eval_samples_per_second": 918.646, | |
| "eval_steps_per_second": 3.6, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 2.0014144271570014, | |
| "grad_norm": 0.22099077701568604, | |
| "learning_rate": 3.749287274595872e-05, | |
| "loss": 0.0755, | |
| "step": 7075 | |
| }, | |
| { | |
| "epoch": 2.0084865629420086, | |
| "grad_norm": 0.4921034872531891, | |
| "learning_rate": 3.738580193888228e-05, | |
| "loss": 0.0501, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.0155586987270158, | |
| "grad_norm": 0.23433761298656464, | |
| "learning_rate": 3.7278429187774436e-05, | |
| "loss": 0.0501, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 2.0226308345120225, | |
| "grad_norm": 0.4263274371623993, | |
| "learning_rate": 3.7170757110193697e-05, | |
| "loss": 0.0513, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.0297029702970297, | |
| "grad_norm": 0.2480274736881256, | |
| "learning_rate": 3.7062788330995635e-05, | |
| "loss": 0.0509, | |
| "step": 7175 | |
| }, | |
| { | |
| "epoch": 2.036775106082037, | |
| "grad_norm": 0.5820468664169312, | |
| "learning_rate": 3.695452548226887e-05, | |
| "loss": 0.0573, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.0438472418670437, | |
| "grad_norm": 0.42300423979759216, | |
| "learning_rate": 3.684597120327094e-05, | |
| "loss": 0.0558, | |
| "step": 7225 | |
| }, | |
| { | |
| "epoch": 2.050919377652051, | |
| "grad_norm": 0.41470280289649963, | |
| "learning_rate": 3.673712814036392e-05, | |
| "loss": 0.0519, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.057991513437058, | |
| "grad_norm": 0.2917075753211975, | |
| "learning_rate": 3.662799894694995e-05, | |
| "loss": 0.0518, | |
| "step": 7275 | |
| }, | |
| { | |
| "epoch": 2.065063649222065, | |
| "grad_norm": 0.41207775473594666, | |
| "learning_rate": 3.651858628340649e-05, | |
| "loss": 0.0474, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.072135785007072, | |
| "grad_norm": 0.31983888149261475, | |
| "learning_rate": 3.640889281702155e-05, | |
| "loss": 0.0583, | |
| "step": 7325 | |
| }, | |
| { | |
| "epoch": 2.0792079207920793, | |
| "grad_norm": 0.65561842918396, | |
| "learning_rate": 3.629892122192859e-05, | |
| "loss": 0.057, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.0862800565770865, | |
| "grad_norm": 1.253188133239746, | |
| "learning_rate": 3.6188674179041356e-05, | |
| "loss": 0.0524, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 2.0933521923620932, | |
| "grad_norm": 0.34525951743125916, | |
| "learning_rate": 3.607815437598853e-05, | |
| "loss": 0.0577, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.1004243281471005, | |
| "grad_norm": 0.5648230910301208, | |
| "learning_rate": 3.5967364507048226e-05, | |
| "loss": 0.0564, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 2.1074964639321077, | |
| "grad_norm": 0.2504655420780182, | |
| "learning_rate": 3.585630727308227e-05, | |
| "loss": 0.0476, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.1145685997171144, | |
| "grad_norm": 0.3219181299209595, | |
| "learning_rate": 3.574498538147035e-05, | |
| "loss": 0.0538, | |
| "step": 7475 | |
| }, | |
| { | |
| "epoch": 2.1216407355021216, | |
| "grad_norm": 0.36033445596694946, | |
| "learning_rate": 3.563340154604411e-05, | |
| "loss": 0.0576, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.128712871287129, | |
| "grad_norm": 0.3835320472717285, | |
| "learning_rate": 3.552155848702086e-05, | |
| "loss": 0.0529, | |
| "step": 7525 | |
| }, | |
| { | |
| "epoch": 2.1357850070721356, | |
| "grad_norm": 0.40226078033447266, | |
| "learning_rate": 3.5409458930937346e-05, | |
| "loss": 0.0507, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 0.34018775820732117, | |
| "learning_rate": 3.5297105610583265e-05, | |
| "loss": 0.0514, | |
| "step": 7575 | |
| }, | |
| { | |
| "epoch": 2.14992927864215, | |
| "grad_norm": 0.7404220700263977, | |
| "learning_rate": 3.518450126493464e-05, | |
| "loss": 0.0511, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.157001414427157, | |
| "grad_norm": 0.2098894715309143, | |
| "learning_rate": 3.507164863908704e-05, | |
| "loss": 0.0543, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 2.164073550212164, | |
| "grad_norm": 0.43998876214027405, | |
| "learning_rate": 3.4958550484188675e-05, | |
| "loss": 0.0547, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.171145685997171, | |
| "grad_norm": 0.32174915075302124, | |
| "learning_rate": 3.4845209557373315e-05, | |
| "loss": 0.0528, | |
| "step": 7675 | |
| }, | |
| { | |
| "epoch": 2.1782178217821784, | |
| "grad_norm": 0.8568967580795288, | |
| "learning_rate": 3.4731628621693094e-05, | |
| "loss": 0.0511, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.185289957567185, | |
| "grad_norm": 0.39218711853027344, | |
| "learning_rate": 3.461781044605112e-05, | |
| "loss": 0.0556, | |
| "step": 7725 | |
| }, | |
| { | |
| "epoch": 2.1923620933521923, | |
| "grad_norm": 0.2832966446876526, | |
| "learning_rate": 3.4503757805134004e-05, | |
| "loss": 0.0553, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.1994342291371995, | |
| "grad_norm": 0.3094783425331116, | |
| "learning_rate": 3.438947347934421e-05, | |
| "loss": 0.0482, | |
| "step": 7775 | |
| }, | |
| { | |
| "epoch": 2.2065063649222063, | |
| "grad_norm": 0.40016797184944153, | |
| "learning_rate": 3.427496025473227e-05, | |
| "loss": 0.0508, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.2135785007072135, | |
| "grad_norm": 0.3726707398891449, | |
| "learning_rate": 3.416022092292887e-05, | |
| "loss": 0.0507, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 2.2206506364922207, | |
| "grad_norm": 0.4792294502258301, | |
| "learning_rate": 3.4045258281076766e-05, | |
| "loss": 0.0549, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.227722772277228, | |
| "grad_norm": 1.009504795074463, | |
| "learning_rate": 3.393007513176265e-05, | |
| "loss": 0.0558, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 2.2347949080622347, | |
| "grad_norm": 0.40913182497024536, | |
| "learning_rate": 3.38146742829488e-05, | |
| "loss": 0.058, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 2.241867043847242, | |
| "grad_norm": 0.436496376991272, | |
| "learning_rate": 3.369905854790458e-05, | |
| "loss": 0.0546, | |
| "step": 7925 | |
| }, | |
| { | |
| "epoch": 2.248939179632249, | |
| "grad_norm": 0.2754940986633301, | |
| "learning_rate": 3.358323074513795e-05, | |
| "loss": 0.0498, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 2.256011315417256, | |
| "grad_norm": 0.7202574610710144, | |
| "learning_rate": 3.3467193698326696e-05, | |
| "loss": 0.0542, | |
| "step": 7975 | |
| }, | |
| { | |
| "epoch": 2.263083451202263, | |
| "grad_norm": 0.3375069200992584, | |
| "learning_rate": 3.335095023624958e-05, | |
| "loss": 0.0501, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.2701555869872703, | |
| "grad_norm": 0.34498855471611023, | |
| "learning_rate": 3.323450319271744e-05, | |
| "loss": 0.0551, | |
| "step": 8025 | |
| }, | |
| { | |
| "epoch": 2.2772277227722775, | |
| "grad_norm": 0.427866131067276, | |
| "learning_rate": 3.311785540650405e-05, | |
| "loss": 0.0517, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 2.2842998585572842, | |
| "grad_norm": 0.2636614739894867, | |
| "learning_rate": 3.300100972127694e-05, | |
| "loss": 0.0503, | |
| "step": 8075 | |
| }, | |
| { | |
| "epoch": 2.2913719943422914, | |
| "grad_norm": 0.34367385506629944, | |
| "learning_rate": 3.288396898552807e-05, | |
| "loss": 0.0512, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 2.298444130127298, | |
| "grad_norm": 0.27327075600624084, | |
| "learning_rate": 3.2766736052504385e-05, | |
| "loss": 0.0539, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 2.3055162659123054, | |
| "grad_norm": 0.3980356454849243, | |
| "learning_rate": 3.264931378013827e-05, | |
| "loss": 0.0583, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 2.3125884016973126, | |
| "grad_norm": 0.24006566405296326, | |
| "learning_rate": 3.2531705030977864e-05, | |
| "loss": 0.0511, | |
| "step": 8175 | |
| }, | |
| { | |
| "epoch": 2.31966053748232, | |
| "grad_norm": 0.5592175722122192, | |
| "learning_rate": 3.241391267211729e-05, | |
| "loss": 0.0502, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 2.3267326732673266, | |
| "grad_norm": 1.0215811729431152, | |
| "learning_rate": 3.229593957512672e-05, | |
| "loss": 0.0554, | |
| "step": 8225 | |
| }, | |
| { | |
| "epoch": 2.333804809052334, | |
| "grad_norm": 0.27189400792121887, | |
| "learning_rate": 3.2177788615982454e-05, | |
| "loss": 0.0501, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 2.340876944837341, | |
| "grad_norm": 0.3555835485458374, | |
| "learning_rate": 3.205946267499672e-05, | |
| "loss": 0.0563, | |
| "step": 8275 | |
| }, | |
| { | |
| "epoch": 2.3479490806223478, | |
| "grad_norm": 0.36138617992401123, | |
| "learning_rate": 3.19409646367475e-05, | |
| "loss": 0.0567, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 2.355021216407355, | |
| "grad_norm": 0.8795189261436462, | |
| "learning_rate": 3.182229739000822e-05, | |
| "loss": 0.0599, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 2.362093352192362, | |
| "grad_norm": 0.26847535371780396, | |
| "learning_rate": 3.170346382767731e-05, | |
| "loss": 0.0496, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 2.3691654879773694, | |
| "grad_norm": 0.29600900411605835, | |
| "learning_rate": 3.158446684670765e-05, | |
| "loss": 0.0485, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 2.376237623762376, | |
| "grad_norm": 0.2916370630264282, | |
| "learning_rate": 3.1465309348036016e-05, | |
| "loss": 0.0499, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 2.3833097595473833, | |
| "grad_norm": 0.5006932616233826, | |
| "learning_rate": 3.1345994236512305e-05, | |
| "loss": 0.0485, | |
| "step": 8425 | |
| }, | |
| { | |
| "epoch": 2.3903818953323905, | |
| "grad_norm": 0.3413617014884949, | |
| "learning_rate": 3.122652442082874e-05, | |
| "loss": 0.0506, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 2.3974540311173973, | |
| "grad_norm": 0.3974776268005371, | |
| "learning_rate": 3.1106902813448956e-05, | |
| "loss": 0.05, | |
| "step": 8475 | |
| }, | |
| { | |
| "epoch": 2.4045261669024045, | |
| "grad_norm": 0.3416425883769989, | |
| "learning_rate": 3.098713233053702e-05, | |
| "loss": 0.0532, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.4115983026874117, | |
| "grad_norm": 0.5350253582000732, | |
| "learning_rate": 3.086721589188628e-05, | |
| "loss": 0.0505, | |
| "step": 8525 | |
| }, | |
| { | |
| "epoch": 2.418670438472419, | |
| "grad_norm": 0.6094595789909363, | |
| "learning_rate": 3.0747156420848275e-05, | |
| "loss": 0.0488, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 2.4257425742574257, | |
| "grad_norm": 0.2579383850097656, | |
| "learning_rate": 3.0626956844261404e-05, | |
| "loss": 0.0531, | |
| "step": 8575 | |
| }, | |
| { | |
| "epoch": 2.432814710042433, | |
| "grad_norm": 0.3322567939758301, | |
| "learning_rate": 3.050662009237959e-05, | |
| "loss": 0.0521, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 2.4398868458274396, | |
| "grad_norm": 0.2817617654800415, | |
| "learning_rate": 3.038614909880084e-05, | |
| "loss": 0.0583, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 2.446958981612447, | |
| "grad_norm": 0.36568090319633484, | |
| "learning_rate": 3.026554680039575e-05, | |
| "loss": 0.0505, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 2.454031117397454, | |
| "grad_norm": 0.32663673162460327, | |
| "learning_rate": 3.0144816137235892e-05, | |
| "loss": 0.0521, | |
| "step": 8675 | |
| }, | |
| { | |
| "epoch": 2.4611032531824613, | |
| "grad_norm": 0.4173208475112915, | |
| "learning_rate": 3.0023960052522148e-05, | |
| "loss": 0.0586, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 2.468175388967468, | |
| "grad_norm": 0.2725406289100647, | |
| "learning_rate": 2.9902981492512945e-05, | |
| "loss": 0.0484, | |
| "step": 8725 | |
| }, | |
| { | |
| "epoch": 2.4752475247524752, | |
| "grad_norm": 0.4088152348995209, | |
| "learning_rate": 2.9781883406452453e-05, | |
| "loss": 0.0477, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 2.4823196605374824, | |
| "grad_norm": 0.35629716515541077, | |
| "learning_rate": 2.966066874649869e-05, | |
| "loss": 0.0484, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 2.489391796322489, | |
| "grad_norm": 0.4508310854434967, | |
| "learning_rate": 2.9539340467651494e-05, | |
| "loss": 0.0525, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 2.4964639321074964, | |
| "grad_norm": 0.49331003427505493, | |
| "learning_rate": 2.9417901527680582e-05, | |
| "loss": 0.049, | |
| "step": 8825 | |
| }, | |
| { | |
| "epoch": 2.5035360678925036, | |
| "grad_norm": 0.3239150941371918, | |
| "learning_rate": 2.9296354887053367e-05, | |
| "loss": 0.0522, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 2.510608203677511, | |
| "grad_norm": 0.2788864076137543, | |
| "learning_rate": 2.9174703508862828e-05, | |
| "loss": 0.0549, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 2.5176803394625176, | |
| "grad_norm": 0.3619994819164276, | |
| "learning_rate": 2.9057822401556116e-05, | |
| "loss": 0.0525, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 2.5247524752475248, | |
| "grad_norm": 0.25908371806144714, | |
| "learning_rate": 2.89359743427919e-05, | |
| "loss": 0.057, | |
| "step": 8925 | |
| }, | |
| { | |
| "epoch": 2.531824611032532, | |
| "grad_norm": 0.3243955373764038, | |
| "learning_rate": 2.8814030331907134e-05, | |
| "loss": 0.052, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 2.5388967468175387, | |
| "grad_norm": 0.3788680136203766, | |
| "learning_rate": 2.8691993341681998e-05, | |
| "loss": 0.0526, | |
| "step": 8975 | |
| }, | |
| { | |
| "epoch": 2.545968882602546, | |
| "grad_norm": 0.27258631587028503, | |
| "learning_rate": 2.856986634716332e-05, | |
| "loss": 0.0555, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.553041018387553, | |
| "grad_norm": 1.2522977590560913, | |
| "learning_rate": 2.8447652325592066e-05, | |
| "loss": 0.0574, | |
| "step": 9025 | |
| }, | |
| { | |
| "epoch": 2.5601131541725604, | |
| "grad_norm": 0.23333418369293213, | |
| "learning_rate": 2.8325354256330787e-05, | |
| "loss": 0.0531, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 2.567185289957567, | |
| "grad_norm": 0.32750052213668823, | |
| "learning_rate": 2.8202975120790963e-05, | |
| "loss": 0.0505, | |
| "step": 9075 | |
| }, | |
| { | |
| "epoch": 2.5742574257425743, | |
| "grad_norm": 0.3817848861217499, | |
| "learning_rate": 2.8080517902360316e-05, | |
| "loss": 0.0533, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 2.581329561527581, | |
| "grad_norm": 1.9594416618347168, | |
| "learning_rate": 2.7957985586330106e-05, | |
| "loss": 0.0499, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 2.5884016973125883, | |
| "grad_norm": 0.3222751021385193, | |
| "learning_rate": 2.7835381159822336e-05, | |
| "loss": 0.0553, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 2.5954738330975955, | |
| "grad_norm": 0.3441489040851593, | |
| "learning_rate": 2.7712707611716938e-05, | |
| "loss": 0.0533, | |
| "step": 9175 | |
| }, | |
| { | |
| "epoch": 2.6025459688826027, | |
| "grad_norm": 0.39582306146621704, | |
| "learning_rate": 2.75899679325789e-05, | |
| "loss": 0.0516, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.6096181046676095, | |
| "grad_norm": 0.32057181000709534, | |
| "learning_rate": 2.7467165114585358e-05, | |
| "loss": 0.0573, | |
| "step": 9225 | |
| }, | |
| { | |
| "epoch": 2.6166902404526167, | |
| "grad_norm": 0.49784281849861145, | |
| "learning_rate": 2.734430215145269e-05, | |
| "loss": 0.0508, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.623762376237624, | |
| "grad_norm": 0.3396826386451721, | |
| "learning_rate": 2.7221382038363492e-05, | |
| "loss": 0.053, | |
| "step": 9275 | |
| }, | |
| { | |
| "epoch": 2.6308345120226306, | |
| "grad_norm": 0.30675604939460754, | |
| "learning_rate": 2.7098407771893563e-05, | |
| "loss": 0.0527, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.637906647807638, | |
| "grad_norm": 0.7486620545387268, | |
| "learning_rate": 2.6975382349938892e-05, | |
| "loss": 0.0466, | |
| "step": 9325 | |
| }, | |
| { | |
| "epoch": 2.644978783592645, | |
| "grad_norm": 0.2760941684246063, | |
| "learning_rate": 2.685230877164253e-05, | |
| "loss": 0.0544, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 2.6520509193776522, | |
| "grad_norm": 0.4320007264614105, | |
| "learning_rate": 2.672919003732149e-05, | |
| "loss": 0.0516, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 2.659123055162659, | |
| "grad_norm": 0.390225350856781, | |
| "learning_rate": 2.6606029148393625e-05, | |
| "loss": 0.0491, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.666195190947666, | |
| "grad_norm": 0.3701136112213135, | |
| "learning_rate": 2.648282910730444e-05, | |
| "loss": 0.0531, | |
| "step": 9425 | |
| }, | |
| { | |
| "epoch": 2.6732673267326734, | |
| "grad_norm": 0.554530918598175, | |
| "learning_rate": 2.6359592917453897e-05, | |
| "loss": 0.0514, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 2.68033946251768, | |
| "grad_norm": 0.7218428254127502, | |
| "learning_rate": 2.623632358312319e-05, | |
| "loss": 0.0531, | |
| "step": 9475 | |
| }, | |
| { | |
| "epoch": 2.6874115983026874, | |
| "grad_norm": 0.4088691174983978, | |
| "learning_rate": 2.6113024109401547e-05, | |
| "loss": 0.0517, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.6944837340876946, | |
| "grad_norm": 0.2585553228855133, | |
| "learning_rate": 2.5989697502112903e-05, | |
| "loss": 0.0495, | |
| "step": 9525 | |
| }, | |
| { | |
| "epoch": 2.701555869872702, | |
| "grad_norm": 0.3183114528656006, | |
| "learning_rate": 2.58663467677427e-05, | |
| "loss": 0.0488, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 2.7086280056577086, | |
| "grad_norm": 0.35523074865341187, | |
| "learning_rate": 2.574297491336452e-05, | |
| "loss": 0.0507, | |
| "step": 9575 | |
| }, | |
| { | |
| "epoch": 2.7157001414427158, | |
| "grad_norm": 0.3662826120853424, | |
| "learning_rate": 2.5619584946566844e-05, | |
| "loss": 0.0492, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.7227722772277225, | |
| "grad_norm": 0.5980022549629211, | |
| "learning_rate": 2.549617987537968e-05, | |
| "loss": 0.056, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 2.7298444130127297, | |
| "grad_norm": 0.3375484347343445, | |
| "learning_rate": 2.537276270820127e-05, | |
| "loss": 0.0545, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 2.736916548797737, | |
| "grad_norm": 0.2702608108520508, | |
| "learning_rate": 2.524933645372472e-05, | |
| "loss": 0.0483, | |
| "step": 9675 | |
| }, | |
| { | |
| "epoch": 2.743988684582744, | |
| "grad_norm": 0.20587654411792755, | |
| "learning_rate": 2.512590412086468e-05, | |
| "loss": 0.048, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.751060820367751, | |
| "grad_norm": 0.40114825963974, | |
| "learning_rate": 2.5002468718683985e-05, | |
| "loss": 0.0478, | |
| "step": 9725 | |
| }, | |
| { | |
| "epoch": 2.758132956152758, | |
| "grad_norm": 0.25009649991989136, | |
| "learning_rate": 2.4879033256320268e-05, | |
| "loss": 0.0528, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.7652050919377653, | |
| "grad_norm": 0.4038143455982208, | |
| "learning_rate": 2.475560074291266e-05, | |
| "loss": 0.0536, | |
| "step": 9775 | |
| }, | |
| { | |
| "epoch": 2.772277227722772, | |
| "grad_norm": 0.3600768744945526, | |
| "learning_rate": 2.463217418752838e-05, | |
| "loss": 0.0513, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.7793493635077793, | |
| "grad_norm": 0.2662784457206726, | |
| "learning_rate": 2.4508756599089423e-05, | |
| "loss": 0.0528, | |
| "step": 9825 | |
| }, | |
| { | |
| "epoch": 2.7864214992927865, | |
| "grad_norm": 0.4926321506500244, | |
| "learning_rate": 2.4385350986299162e-05, | |
| "loss": 0.0502, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 2.7934936350777937, | |
| "grad_norm": 0.3003157675266266, | |
| "learning_rate": 2.4261960357569036e-05, | |
| "loss": 0.0491, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 2.8005657708628005, | |
| "grad_norm": 0.3073439598083496, | |
| "learning_rate": 2.413858772094521e-05, | |
| "loss": 0.051, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.8076379066478077, | |
| "grad_norm": 0.4092470109462738, | |
| "learning_rate": 2.4015236084035204e-05, | |
| "loss": 0.0474, | |
| "step": 9925 | |
| }, | |
| { | |
| "epoch": 2.814710042432815, | |
| "grad_norm": 0.5402696132659912, | |
| "learning_rate": 2.389190845393463e-05, | |
| "loss": 0.0492, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 2.8217821782178216, | |
| "grad_norm": 0.3162129521369934, | |
| "learning_rate": 2.376860783715384e-05, | |
| "loss": 0.0509, | |
| "step": 9975 | |
| }, | |
| { | |
| "epoch": 2.828854314002829, | |
| "grad_norm": 0.3184404671192169, | |
| "learning_rate": 2.3645337239544658e-05, | |
| "loss": 0.0493, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.835926449787836, | |
| "grad_norm": 0.33763962984085083, | |
| "learning_rate": 2.3522099666227087e-05, | |
| "loss": 0.0489, | |
| "step": 10025 | |
| }, | |
| { | |
| "epoch": 2.8429985855728432, | |
| "grad_norm": 0.4636186361312866, | |
| "learning_rate": 2.339889812151606e-05, | |
| "loss": 0.0545, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.85007072135785, | |
| "grad_norm": 0.3926536738872528, | |
| "learning_rate": 2.3275735608848208e-05, | |
| "loss": 0.0501, | |
| "step": 10075 | |
| }, | |
| { | |
| "epoch": 2.857142857142857, | |
| "grad_norm": 0.4206957519054413, | |
| "learning_rate": 2.3152615130708592e-05, | |
| "loss": 0.0502, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.864214992927864, | |
| "grad_norm": 0.729640781879425, | |
| "learning_rate": 2.302953968855759e-05, | |
| "loss": 0.0469, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 2.871287128712871, | |
| "grad_norm": 0.37660130858421326, | |
| "learning_rate": 2.2906512282757644e-05, | |
| "loss": 0.0524, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 2.8783592644978784, | |
| "grad_norm": 0.5026111602783203, | |
| "learning_rate": 2.2783535912500187e-05, | |
| "loss": 0.0587, | |
| "step": 10175 | |
| }, | |
| { | |
| "epoch": 2.8854314002828856, | |
| "grad_norm": 0.3521290123462677, | |
| "learning_rate": 2.266061357573248e-05, | |
| "loss": 0.0507, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.8925035360678923, | |
| "grad_norm": 0.32747042179107666, | |
| "learning_rate": 2.253774826908453e-05, | |
| "loss": 0.0531, | |
| "step": 10225 | |
| }, | |
| { | |
| "epoch": 2.8995756718528995, | |
| "grad_norm": 0.3167445659637451, | |
| "learning_rate": 2.2414942987796084e-05, | |
| "loss": 0.0468, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 2.9066478076379068, | |
| "grad_norm": 0.30890071392059326, | |
| "learning_rate": 2.2292200725643534e-05, | |
| "loss": 0.0535, | |
| "step": 10275 | |
| }, | |
| { | |
| "epoch": 2.9137199434229135, | |
| "grad_norm": 0.4126497209072113, | |
| "learning_rate": 2.216952447486701e-05, | |
| "loss": 0.0491, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.9207920792079207, | |
| "grad_norm": 1.0146815776824951, | |
| "learning_rate": 2.2046917226097387e-05, | |
| "loss": 0.0507, | |
| "step": 10325 | |
| }, | |
| { | |
| "epoch": 2.927864214992928, | |
| "grad_norm": 0.3795275390148163, | |
| "learning_rate": 2.1924381968283394e-05, | |
| "loss": 0.0501, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.934936350777935, | |
| "grad_norm": 0.3325127065181732, | |
| "learning_rate": 2.1801921688618758e-05, | |
| "loss": 0.0438, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 2.942008486562942, | |
| "grad_norm": 0.358235239982605, | |
| "learning_rate": 2.167953937246934e-05, | |
| "loss": 0.0519, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.949080622347949, | |
| "grad_norm": 0.37195077538490295, | |
| "learning_rate": 2.1557238003300438e-05, | |
| "loss": 0.0472, | |
| "step": 10425 | |
| }, | |
| { | |
| "epoch": 2.9561527581329563, | |
| "grad_norm": 0.3947463929653168, | |
| "learning_rate": 2.1435020562603944e-05, | |
| "loss": 0.0469, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 2.963224893917963, | |
| "grad_norm": 0.3517344892024994, | |
| "learning_rate": 2.1312890029825765e-05, | |
| "loss": 0.0451, | |
| "step": 10475 | |
| }, | |
| { | |
| "epoch": 2.9702970297029703, | |
| "grad_norm": 0.4573589861392975, | |
| "learning_rate": 2.1190849382293153e-05, | |
| "loss": 0.0478, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.9773691654879775, | |
| "grad_norm": 0.4348020553588867, | |
| "learning_rate": 2.1068901595142076e-05, | |
| "loss": 0.0471, | |
| "step": 10525 | |
| }, | |
| { | |
| "epoch": 2.9844413012729847, | |
| "grad_norm": 0.27447789907455444, | |
| "learning_rate": 2.0947049641244763e-05, | |
| "loss": 0.0525, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 2.9915134370579914, | |
| "grad_norm": 0.5210450291633606, | |
| "learning_rate": 2.0825296491137178e-05, | |
| "loss": 0.049, | |
| "step": 10575 | |
| }, | |
| { | |
| "epoch": 2.9985855728429986, | |
| "grad_norm": 0.4803789556026459, | |
| "learning_rate": 2.0703645112946632e-05, | |
| "loss": 0.048, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9731636716504077, | |
| "eval_f1": 0.9040792973909716, | |
| "eval_loss": 0.080692358314991, | |
| "eval_precision": 0.8956920811279763, | |
| "eval_recall": 0.9126250733540829, | |
| "eval_runtime": 54.8402, | |
| "eval_samples_per_second": 916.701, | |
| "eval_steps_per_second": 3.592, | |
| "step": 10605 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 17675, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 1 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.589143261681162e+17, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |