| [ | |
| { | |
| "step": 100, | |
| "loss": 7.657252192497253, | |
| "perplexity": 2115.935250373028, | |
| "learning_rate": 0.00015150000000000019, | |
| "step_time": 9.368009328842163, | |
| "tokens_per_second": 109.30817466708918, | |
| "memory_mb": 756.6015625 | |
| }, | |
| { | |
| "step": 200, | |
| "loss": 7.487581491470337, | |
| "perplexity": 1785.7280665447065, | |
| "learning_rate": 0.0002999999999999999, | |
| "step_time": 10.94300365447998, | |
| "tokens_per_second": 93.57577063229641, | |
| "memory_mb": 730.99609375 | |
| }, | |
| { | |
| "step": 300, | |
| "loss": 7.163171410560608, | |
| "perplexity": 1290.9987344513497, | |
| "learning_rate": 0.00029794904665665113, | |
| "step_time": 8.949450016021729, | |
| "tokens_per_second": 114.42043904002891, | |
| "memory_mb": 733.89453125 | |
| }, | |
| { | |
| "step": 400, | |
| "loss": 6.677380561828613, | |
| "perplexity": 794.2359329078598, | |
| "learning_rate": 0.00029185850380610337, | |
| "step_time": 8.572781324386597, | |
| "tokens_per_second": 119.44781527169884, | |
| "memory_mb": 827.69140625 | |
| }, | |
| { | |
| "step": 500, | |
| "loss": 6.8126842975616455, | |
| "perplexity": 909.3083881764071, | |
| "learning_rate": 0.0002819134295109075, | |
| "step_time": 8.801953315734863, | |
| "tokens_per_second": 116.33781312716582, | |
| "memory_mb": 849.21484375 | |
| }, | |
| { | |
| "step": 600, | |
| "loss": 6.901162624359131, | |
| "perplexity": 993.4290292468936, | |
| "learning_rate": 0.0002684159998210713, | |
| "step_time": 8.660848379135132, | |
| "tokens_per_second": 118.23322094714423, | |
| "memory_mb": 734.16015625 | |
| }, | |
| { | |
| "step": 700, | |
| "loss": 6.6995872259140015, | |
| "perplexity": 812.0705542959239, | |
| "learning_rate": 0.0002517763273076916, | |
| "step_time": 9.10423469543457, | |
| "tokens_per_second": 112.47513209578146, | |
| "memory_mb": 734.64453125 | |
| }, | |
| { | |
| "step": 800, | |
| "loss": 6.729204297065735, | |
| "perplexity": 836.4814103649661, | |
| "learning_rate": 0.00023250000000000793, | |
| "step_time": 8.646829605102539, | |
| "tokens_per_second": 118.42490794495734, | |
| "memory_mb": 733.84765625 | |
| }, | |
| { | |
| "step": 900, | |
| "loss": 6.801577568054199, | |
| "perplexity": 899.2648246887062, | |
| "learning_rate": 0.00021117271934897237, | |
| "step_time": 8.66753602027893, | |
| "tokens_per_second": 118.14199532649263, | |
| "memory_mb": 839.50390625 | |
| }, | |
| { | |
| "step": 1000, | |
| "loss": 6.5149312019348145, | |
| "perplexity": 675.1475110828569, | |
| "learning_rate": 0.00018844250398504186, | |
| "step_time": 9.189276933670044, | |
| "tokens_per_second": 111.43423006961567, | |
| "memory_mb": 734.55078125 | |
| }, | |
| { | |
| "step": 1100, | |
| "loss": 6.551132082939148, | |
| "perplexity": 700.0362244642821, | |
| "learning_rate": 0.00016500000000000537, | |
| "step_time": 8.517168760299683, | |
| "tokens_per_second": 120.22774572379963, | |
| "memory_mb": 913.80078125 | |
| }, | |
| { | |
| "step": 1200, | |
| "loss": 6.6684452295303345, | |
| "perplexity": 787.170782663513, | |
| "learning_rate": 0.00014155749601496882, | |
| "step_time": 9.400139570236206, | |
| "tokens_per_second": 108.93455276369572, | |
| "memory_mb": 968.5859375 | |
| }, | |
| { | |
| "step": 1300, | |
| "loss": 6.406905889511108, | |
| "perplexity": 606.0156976890383, | |
| "learning_rate": 0.00011882728065103813, | |
| "step_time": 8.490540981292725, | |
| "tokens_per_second": 120.6048003603289, | |
| "memory_mb": 979.359375 | |
| }, | |
| { | |
| "step": 1400, | |
| "loss": 6.3421419858932495, | |
| "perplexity": 568.011682273887, | |
| "learning_rate": 9.750000000000261e-05, | |
| "step_time": 10.675879955291748, | |
| "tokens_per_second": 95.91715196201983, | |
| "memory_mb": 733.6328125 | |
| }, | |
| { | |
| "step": 1500, | |
| "loss": 6.335531115531921, | |
| "perplexity": 564.2690154518974, | |
| "learning_rate": 7.822367269231907e-05, | |
| "step_time": 8.945564270019531, | |
| "tokens_per_second": 114.47014062957085, | |
| "memory_mb": 994.3515625 | |
| }, | |
| { | |
| "step": 1600, | |
| "loss": 6.629261136054993, | |
| "perplexity": 756.9227010883292, | |
| "learning_rate": 6.158400017893925e-05, | |
| "step_time": 10.422486782073975, | |
| "tokens_per_second": 98.2491051714468, | |
| "memory_mb": 730.56640625 | |
| }, | |
| { | |
| "step": 1700, | |
| "loss": 6.302608251571655, | |
| "perplexity": 545.9941446328027, | |
| "learning_rate": 4.808657048910149e-05, | |
| "step_time": 9.476832628250122, | |
| "tokens_per_second": 108.05297932006208, | |
| "memory_mb": 745.43359375 | |
| }, | |
| { | |
| "step": 1800, | |
| "loss": 6.266754984855652, | |
| "perplexity": 526.765240241726, | |
| "learning_rate": 3.8141496193902704e-05, | |
| "step_time": 8.648972511291504, | |
| "tokens_per_second": 118.3955664864394, | |
| "memory_mb": 1009.6484375 | |
| }, | |
| { | |
| "step": 1900, | |
| "loss": 6.480456352233887, | |
| "perplexity": 652.268542568135, | |
| "learning_rate": 3.2050953343351995e-05, | |
| "step_time": 8.593879699707031, | |
| "tokens_per_second": 119.15456531639704, | |
| "memory_mb": 735.796875 | |
| }, | |
| { | |
| "step": 2000, | |
| "loss": 5.975515604019165, | |
| "perplexity": 393.6710271356782, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "step_time": 8.94594669342041, | |
| "tokens_per_second": 114.46524723349116, | |
| "memory_mb": 729.84375 | |
| }, | |
| { | |
| "step": 2100, | |
| "loss": 6.554613709449768, | |
| "perplexity": 702.4777368926921, | |
| "learning_rate": 3.205095334333285e-05, | |
| "step_time": 9.199656009674072, | |
| "tokens_per_second": 111.3085096794047, | |
| "memory_mb": 752.64453125 | |
| }, | |
| { | |
| "step": 2200, | |
| "loss": 6.471360206604004, | |
| "perplexity": 646.3623155888199, | |
| "learning_rate": 3.814149619382671e-05, | |
| "step_time": 8.654725313186646, | |
| "tokens_per_second": 118.3168688715975, | |
| "memory_mb": 735.9375 | |
| }, | |
| { | |
| "step": 2300, | |
| "loss": 6.382450699806213, | |
| "perplexity": 591.3752163574818, | |
| "learning_rate": 4.808657048893273e-05, | |
| "step_time": 8.75070834159851, | |
| "tokens_per_second": 117.01909834340836, | |
| "memory_mb": 776.53125 | |
| }, | |
| { | |
| "step": 2400, | |
| "loss": 5.957324385643005, | |
| "perplexity": 386.5744152212925, | |
| "learning_rate": 6.158400017864459e-05, | |
| "step_time": 8.642782926559448, | |
| "tokens_per_second": 118.4803562349376, | |
| "memory_mb": 923.15234375 | |
| }, | |
| { | |
| "step": 2500, | |
| "loss": 6.218142509460449, | |
| "perplexity": 501.7703322170689, | |
| "learning_rate": 7.822367269186924e-05, | |
| "step_time": 9.567925691604614, | |
| "tokens_per_second": 107.0242425585004, | |
| "memory_mb": 1050.1875 | |
| }, | |
| { | |
| "step": 2600, | |
| "loss": 6.314482092857361, | |
| "perplexity": 552.515834581567, | |
| "learning_rate": 9.749999999937299e-05, | |
| "step_time": 8.212730169296265, | |
| "tokens_per_second": 124.68448115199003, | |
| "memory_mb": 738.69921875 | |
| }, | |
| { | |
| "step": 2700, | |
| "loss": 6.240906238555908, | |
| "perplexity": 513.3234937600264, | |
| "learning_rate": 0.00011882728065020969, | |
| "step_time": 8.88506555557251, | |
| "tokens_per_second": 115.24957172181702, | |
| "memory_mb": 739.125 | |
| }, | |
| { | |
| "step": 2800, | |
| "loss": 6.308051347732544, | |
| "perplexity": 548.9741461252174, | |
| "learning_rate": 0.0001415574960139285, | |
| "step_time": 9.43014669418335, | |
| "tokens_per_second": 108.58791842884247, | |
| "memory_mb": 732.40234375 | |
| }, | |
| { | |
| "step": 2900, | |
| "loss": 6.410398960113525, | |
| "perplexity": 608.136254778883, | |
| "learning_rate": 0.00016499999999874617, | |
| "step_time": 8.674461603164673, | |
| "tokens_per_second": 118.0476722182294, | |
| "memory_mb": 733.71484375 | |
| }, | |
| { | |
| "step": 3000, | |
| "loss": 6.160744071006775, | |
| "perplexity": 473.7804700263767, | |
| "learning_rate": 0.0001884425039835638, | |
| "step_time": 9.056138277053833, | |
| "tokens_per_second": 113.07247843096432, | |
| "memory_mb": 803.75390625 | |
| }, | |
| { | |
| "step": 3100, | |
| "loss": 6.270610332489014, | |
| "perplexity": 528.8000232415734, | |
| "learning_rate": 0.0002111727193472824, | |
| "step_time": 9.650378227233887, | |
| "tokens_per_second": 106.10983071214939, | |
| "memory_mb": 790.328125 | |
| }, | |
| { | |
| "step": 3200, | |
| "loss": 6.4413875341415405, | |
| "perplexity": 627.2765639068664, | |
| "learning_rate": 0.00023249999999811922, | |
| "step_time": 8.436410665512085, | |
| "tokens_per_second": 121.37863371043517, | |
| "memory_mb": 940.3671875 | |
| }, | |
| { | |
| "step": 3300, | |
| "loss": 6.163665413856506, | |
| "perplexity": 475.1665688640195, | |
| "learning_rate": 0.0002517763273056231, | |
| "step_time": 10.011188507080078, | |
| "tokens_per_second": 102.28555773131335, | |
| "memory_mb": 747.43359375 | |
| }, | |
| { | |
| "step": 3400, | |
| "loss": 6.067382216453552, | |
| "perplexity": 431.5494984476535, | |
| "learning_rate": 0.00026841599981884787, | |
| "step_time": 8.671327114105225, | |
| "tokens_per_second": 118.0903437876665, | |
| "memory_mb": 970.37109375 | |
| }, | |
| { | |
| "step": 3500, | |
| "loss": 6.14486300945282, | |
| "perplexity": 466.3157638357051, | |
| "learning_rate": 0.0002819134295085615, | |
| "step_time": 8.194751024246216, | |
| "tokens_per_second": 124.95803679333764, | |
| "memory_mb": 736.890625 | |
| }, | |
| { | |
| "step": 3600, | |
| "loss": 6.448354721069336, | |
| "perplexity": 631.6621769355031, | |
| "learning_rate": 0.00029185850380367053, | |
| "step_time": 8.76004934310913, | |
| "tokens_per_second": 116.89431872955184, | |
| "memory_mb": 781.22265625 | |
| }, | |
| { | |
| "step": 3700, | |
| "loss": 6.149544358253479, | |
| "perplexity": 468.5038682213576, | |
| "learning_rate": 0.00029794904665416755, | |
| "step_time": 11.995165824890137, | |
| "tokens_per_second": 85.36772354369505, | |
| "memory_mb": 836.3046875 | |
| }, | |
| { | |
| "step": 3800, | |
| "loss": 6.083824634552002, | |
| "perplexity": 438.70387214992155, | |
| "learning_rate": 0.0002999999999975028, | |
| "step_time": 8.194983720779419, | |
| "tokens_per_second": 124.95448861033346, | |
| "memory_mb": 766.9453125 | |
| }, | |
| { | |
| "step": 3900, | |
| "loss": 6.295181512832642, | |
| "perplexity": 541.954209109435, | |
| "learning_rate": 0.0002979490466541723, | |
| "step_time": 9.617033004760742, | |
| "tokens_per_second": 106.47774625428518, | |
| "memory_mb": 837.29296875 | |
| }, | |
| { | |
| "step": 4000, | |
| "loss": 5.752694249153137, | |
| "perplexity": 315.038309582537, | |
| "learning_rate": 0.0002918585038036804, | |
| "step_time": 10.153574228286743, | |
| "tokens_per_second": 100.85118569845567, | |
| "memory_mb": 729.46484375 | |
| }, | |
| { | |
| "step": 4100, | |
| "loss": 6.129071950912476, | |
| "perplexity": 459.00997915951336, | |
| "learning_rate": 0.00028191342950857645, | |
| "step_time": 16.923099994659424, | |
| "tokens_per_second": 121.01801683180422, | |
| "memory_mb": 866.20703125 | |
| }, | |
| { | |
| "step": 4200, | |
| "loss": 5.725140452384949, | |
| "perplexity": 306.4763075489217, | |
| "learning_rate": 0.0002684159998188649, | |
| "step_time": 16.08098530769348, | |
| "tokens_per_second": 127.35538033357905, | |
| "memory_mb": 856.5625 | |
| }, | |
| { | |
| "step": 4300, | |
| "loss": 5.917757749557495, | |
| "perplexity": 371.57760903139615, | |
| "learning_rate": 0.00025177632730563935, | |
| "step_time": 16.5029239654541, | |
| "tokens_per_second": 124.09922049493284, | |
| "memory_mb": 856.58203125 | |
| }, | |
| { | |
| "step": 4400, | |
| "loss": 5.828692674636841, | |
| "perplexity": 339.9140102647636, | |
| "learning_rate": 0.00023249999999813424, | |
| "step_time": 15.03031873703003, | |
| "tokens_per_second": 136.25792212605347, | |
| "memory_mb": 856.58203125 | |
| }, | |
| { | |
| "step": 4500, | |
| "loss": 5.768530249595642, | |
| "perplexity": 320.0669682230316, | |
| "learning_rate": 0.0002111727193472959, | |
| "step_time": 15.095525026321411, | |
| "tokens_per_second": 135.66934548013344, | |
| "memory_mb": 856.58203125 | |
| }, | |
| { | |
| "step": 4600, | |
| "loss": 5.824233174324036, | |
| "perplexity": 338.40153857021943, | |
| "learning_rate": 0.00018844250398357548, | |
| "step_time": 15.770112752914429, | |
| "tokens_per_second": 129.86590724416445, | |
| "memory_mb": 856.6796875 | |
| }, | |
| { | |
| "step": 4700, | |
| "loss": 5.592358708381653, | |
| "perplexity": 268.3678753241069, | |
| "learning_rate": 0.00016499999999875585, | |
| "step_time": 15.393234729766846, | |
| "tokens_per_second": 133.04546029169921, | |
| "memory_mb": 856.6015625 | |
| }, | |
| { | |
| "step": 4800, | |
| "loss": 5.626398682594299, | |
| "perplexity": 277.6603717835579, | |
| "learning_rate": 0.0001415574960139364, | |
| "step_time": 15.054797887802124, | |
| "tokens_per_second": 136.03636629750804, | |
| "memory_mb": 856.6796875 | |
| }, | |
| { | |
| "step": 4900, | |
| "loss": 5.718711733818054, | |
| "perplexity": 304.5123771619807, | |
| "learning_rate": 0.00011882728065021638, | |
| "step_time": 16.42769432067871, | |
| "tokens_per_second": 124.66752546168553, | |
| "memory_mb": 856.62890625 | |
| }, | |
| { | |
| "step": 5000, | |
| "loss": 5.441234588623047, | |
| "perplexity": 230.7268604519411, | |
| "learning_rate": 9.749999999937817e-05, | |
| "step_time": 15.495960474014282, | |
| "tokens_per_second": 132.16347598681364, | |
| "memory_mb": 856.71875 | |
| }, | |
| { | |
| "step": 5100, | |
| "loss": 5.574064016342163, | |
| "perplexity": 263.50280585799544, | |
| "learning_rate": 7.822367269187295e-05, | |
| "step_time": 15.050865411758423, | |
| "tokens_per_second": 136.07190975211358, | |
| "memory_mb": 856.67578125 | |
| }, | |
| { | |
| "step": 5200, | |
| "loss": 5.486729264259338, | |
| "perplexity": 241.4661419404157, | |
| "learning_rate": 6.158400017864684e-05, | |
| "step_time": 14.95394515991211, | |
| "tokens_per_second": 136.95382576968316, | |
| "memory_mb": 856.74609375 | |
| }, | |
| { | |
| "step": 5300, | |
| "loss": 5.643607139587402, | |
| "perplexity": 282.47982711304775, | |
| "learning_rate": 4.808657048893407e-05, | |
| "step_time": 14.74146294593811, | |
| "tokens_per_second": 138.927866759948, | |
| "memory_mb": 856.828125 | |
| }, | |
| { | |
| "step": 5400, | |
| "loss": 5.773634672164917, | |
| "perplexity": 321.7049020761921, | |
| "learning_rate": 3.8141496193827305e-05, | |
| "step_time": 14.770179033279419, | |
| "tokens_per_second": 138.6577640924697, | |
| "memory_mb": 856.73046875 | |
| }, | |
| { | |
| "step": 5500, | |
| "loss": 5.430219531059265, | |
| "perplexity": 228.19933676762415, | |
| "learning_rate": 3.2050953343333e-05, | |
| "step_time": 17.394481897354126, | |
| "tokens_per_second": 117.7384881070543, | |
| "memory_mb": 856.4453125 | |
| }, | |
| { | |
| "step": 5600, | |
| "loss": 5.905840158462524, | |
| "perplexity": 367.17558190769864, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "step_time": 14.9873046875, | |
| "tokens_per_second": 136.64898677265916, | |
| "memory_mb": 856.71875 | |
| }, | |
| { | |
| "step": 5700, | |
| "loss": 5.61777663230896, | |
| "perplexity": 275.27666109950576, | |
| "learning_rate": 3.205095334333272e-05, | |
| "step_time": 16.493826627731323, | |
| "tokens_per_second": 124.16766868136386, | |
| "memory_mb": 856.71875 | |
| }, | |
| { | |
| "step": 5800, | |
| "loss": 5.823711514472961, | |
| "perplexity": 338.2250541104363, | |
| "learning_rate": 3.814149619382621e-05, | |
| "step_time": 15.767404556274414, | |
| "tokens_per_second": 129.88821290724272, | |
| "memory_mb": 856.7265625 | |
| }, | |
| { | |
| "step": 5900, | |
| "loss": 5.767146706581116, | |
| "perplexity": 319.6244479984378, | |
| "learning_rate": 4.808657048893161e-05, | |
| "step_time": 14.995726346969604, | |
| "tokens_per_second": 136.57224415900788, | |
| "memory_mb": 857.28125 | |
| }, | |
| { | |
| "step": 6000, | |
| "loss": 5.632080435752869, | |
| "perplexity": 279.242459738446, | |
| "learning_rate": 6.158400017864261e-05, | |
| "step_time": 15.532944679260254, | |
| "tokens_per_second": 131.84879250452173, | |
| "memory_mb": 857.25 | |
| }, | |
| { | |
| "step": 6100, | |
| "loss": 5.641765594482422, | |
| "perplexity": 281.9601064615597, | |
| "learning_rate": 7.822367269186639e-05, | |
| "step_time": 13.042147397994995, | |
| "tokens_per_second": 157.02935548135613, | |
| "memory_mb": 3038.3984375 | |
| }, | |
| { | |
| "step": 6200, | |
| "loss": 5.216195583343506, | |
| "perplexity": 184.23195401529154, | |
| "learning_rate": 9.749999999936928e-05, | |
| "step_time": 13.716249227523804, | |
| "tokens_per_second": 149.3119559165174, | |
| "memory_mb": 3027.03515625 | |
| }, | |
| { | |
| "step": 6300, | |
| "loss": 5.537067532539368, | |
| "perplexity": 253.93225847719265, | |
| "learning_rate": 0.00011882728065020474, | |
| "step_time": 13.56373405456543, | |
| "tokens_per_second": 150.99086960575298, | |
| "memory_mb": 3029.5546875 | |
| }, | |
| { | |
| "step": 6400, | |
| "loss": 5.462532162666321, | |
| "perplexity": 235.69348362718267, | |
| "learning_rate": 0.0001415574960139221, | |
| "step_time": 15.235360145568848, | |
| "tokens_per_second": 134.4241278468008, | |
| "memory_mb": 3029.3984375 | |
| }, | |
| { | |
| "step": 6500, | |
| "loss": 5.461255669593811, | |
| "perplexity": 235.39281446997174, | |
| "learning_rate": 0.00016499999999873858, | |
| "step_time": 14.625237941741943, | |
| "tokens_per_second": 140.03190978211683, | |
| "memory_mb": 3029.7421875 | |
| }, | |
| { | |
| "step": 6600, | |
| "loss": 5.543500542640686, | |
| "perplexity": 255.571072864111, | |
| "learning_rate": 0.00018844250398355512, | |
| "step_time": 13.293677568435669, | |
| "tokens_per_second": 154.0581971735755, | |
| "memory_mb": 3025.90234375 | |
| }, | |
| { | |
| "step": 6700, | |
| "loss": 5.326942205429077, | |
| "perplexity": 205.80769336539043, | |
| "learning_rate": 0.0002111727193472727, | |
| "step_time": 13.689483880996704, | |
| "tokens_per_second": 149.60388702768896, | |
| "memory_mb": 3032.14453125 | |
| }, | |
| { | |
| "step": 6800, | |
| "loss": 5.4400506019592285, | |
| "perplexity": 230.4538445816494, | |
| "learning_rate": 0.00023249999999810797, | |
| "step_time": 13.920767545700073, | |
| "tokens_per_second": 147.11832470994733, | |
| "memory_mb": 3033.22265625 | |
| }, | |
| { | |
| "step": 6900, | |
| "loss": 5.562488794326782, | |
| "perplexity": 260.47028727589134, | |
| "learning_rate": 0.0002517763273056109, | |
| "step_time": 13.11566162109375, | |
| "tokens_per_second": 156.1491946930247, | |
| "memory_mb": 3030.31640625 | |
| }, | |
| { | |
| "step": 7000, | |
| "loss": 5.337615728378296, | |
| "perplexity": 208.01615155187662, | |
| "learning_rate": 0.00026841599981883453, | |
| "step_time": 13.812754154205322, | |
| "tokens_per_second": 148.2687650222517, | |
| "memory_mb": 3030.01171875 | |
| }, | |
| { | |
| "step": 7100, | |
| "loss": 5.591769337654114, | |
| "perplexity": 268.20975375486825, | |
| "learning_rate": 4.0863750705554176e-05, | |
| "step_time": 22.385777473449707, | |
| "tokens_per_second": 91.48665943941405, | |
| "memory_mb": 778.03515625 | |
| }, | |
| { | |
| "step": 7200, | |
| "loss": 5.491156101226807, | |
| "perplexity": 242.5374426712762, | |
| "learning_rate": 3.860829246363548e-05, | |
| "step_time": 23.257835626602173, | |
| "tokens_per_second": 88.0563450907491, | |
| "memory_mb": 766.17578125 | |
| }, | |
| { | |
| "step": 7300, | |
| "loss": 5.590375304222107, | |
| "perplexity": 267.83612088021056, | |
| "learning_rate": 3.660737030015427e-05, | |
| "step_time": 22.15459370613098, | |
| "tokens_per_second": 92.4413251339944, | |
| "memory_mb": 765.515625 | |
| }, | |
| { | |
| "step": 7400, | |
| "loss": 5.408857345581055, | |
| "perplexity": 223.37619999638744, | |
| "learning_rate": 3.486501380605981e-05, | |
| "step_time": 22.82973575592041, | |
| "tokens_per_second": 89.70756481353028, | |
| "memory_mb": 762.5078125 | |
| }, | |
| { | |
| "step": 7500, | |
| "loss": 5.681139707565308, | |
| "perplexity": 293.2834969372769, | |
| "learning_rate": 3.338473185545381e-05, | |
| "step_time": 23.581167221069336, | |
| "tokens_per_second": 86.848966414612, | |
| "memory_mb": 762.328125 | |
| }, | |
| { | |
| "step": 7600, | |
| "loss": 5.239741921424866, | |
| "perplexity": 188.6214169770699, | |
| "learning_rate": 3.2169505539184994e-05, | |
| "step_time": 24.760926008224487, | |
| "tokens_per_second": 82.71096159003685, | |
| "memory_mb": 765.65625 | |
| }, | |
| { | |
| "step": 7700, | |
| "loss": 5.552804112434387, | |
| "perplexity": 257.95989121628156, | |
| "learning_rate": 3.122178216132881e-05, | |
| "step_time": 21.644299030303955, | |
| "tokens_per_second": 94.62075889510751, | |
| "memory_mb": 766.9921875 | |
| }, | |
| { | |
| "step": 7800, | |
| "loss": 5.3391993045806885, | |
| "perplexity": 208.34582193938436, | |
| "learning_rate": 3.054347031064272e-05, | |
| "step_time": 21.552024602890015, | |
| "tokens_per_second": 95.02587518971994, | |
| "memory_mb": 768.30078125 | |
| }, | |
| { | |
| "step": 7900, | |
| "loss": 5.07136607170105, | |
| "perplexity": 159.39191947680405, | |
| "learning_rate": 3.0135936016922528e-05, | |
| "step_time": 25.795836448669434, | |
| "tokens_per_second": 79.3926571861808, | |
| "memory_mb": 766.03125 | |
| }, | |
| { | |
| "step": 8000, | |
| "loss": 5.289915561676025, | |
| "perplexity": 198.32667833002583, | |
| "learning_rate": 2.9999999999999997e-05, | |
| "step_time": 20.64373207092285, | |
| "tokens_per_second": 99.20686787466366, | |
| "memory_mb": 770.73046875 | |
| } | |
| ] |