[ { "step": 100, "loss": 7.657252192497253, "perplexity": 2115.935250373028, "learning_rate": 0.00015150000000000019, "step_time": 9.368009328842163, "tokens_per_second": 109.30817466708918, "memory_mb": 756.6015625 }, { "step": 200, "loss": 7.487581491470337, "perplexity": 1785.7280665447065, "learning_rate": 0.0002999999999999999, "step_time": 10.94300365447998, "tokens_per_second": 93.57577063229641, "memory_mb": 730.99609375 }, { "step": 300, "loss": 7.163171410560608, "perplexity": 1290.9987344513497, "learning_rate": 0.00029794904665665113, "step_time": 8.949450016021729, "tokens_per_second": 114.42043904002891, "memory_mb": 733.89453125 }, { "step": 400, "loss": 6.677380561828613, "perplexity": 794.2359329078598, "learning_rate": 0.00029185850380610337, "step_time": 8.572781324386597, "tokens_per_second": 119.44781527169884, "memory_mb": 827.69140625 }, { "step": 500, "loss": 6.8126842975616455, "perplexity": 909.3083881764071, "learning_rate": 0.0002819134295109075, "step_time": 8.801953315734863, "tokens_per_second": 116.33781312716582, "memory_mb": 849.21484375 }, { "step": 600, "loss": 6.901162624359131, "perplexity": 993.4290292468936, "learning_rate": 0.0002684159998210713, "step_time": 8.660848379135132, "tokens_per_second": 118.23322094714423, "memory_mb": 734.16015625 }, { "step": 700, "loss": 6.6995872259140015, "perplexity": 812.0705542959239, "learning_rate": 0.0002517763273076916, "step_time": 9.10423469543457, "tokens_per_second": 112.47513209578146, "memory_mb": 734.64453125 }, { "step": 800, "loss": 6.729204297065735, "perplexity": 836.4814103649661, "learning_rate": 0.00023250000000000793, "step_time": 8.646829605102539, "tokens_per_second": 118.42490794495734, "memory_mb": 733.84765625 }, { "step": 900, "loss": 6.801577568054199, "perplexity": 899.2648246887062, "learning_rate": 0.00021117271934897237, "step_time": 8.66753602027893, "tokens_per_second": 118.14199532649263, "memory_mb": 839.50390625 }, { "step": 1000, "loss": 6.5149312019348145, "perplexity": 675.1475110828569, "learning_rate": 0.00018844250398504186, "step_time": 9.189276933670044, "tokens_per_second": 111.43423006961567, "memory_mb": 734.55078125 }, { "step": 1100, "loss": 6.551132082939148, "perplexity": 700.0362244642821, "learning_rate": 0.00016500000000000537, "step_time": 8.517168760299683, "tokens_per_second": 120.22774572379963, "memory_mb": 913.80078125 }, { "step": 1200, "loss": 6.6684452295303345, "perplexity": 787.170782663513, "learning_rate": 0.00014155749601496882, "step_time": 9.400139570236206, "tokens_per_second": 108.93455276369572, "memory_mb": 968.5859375 }, { "step": 1300, "loss": 6.406905889511108, "perplexity": 606.0156976890383, "learning_rate": 0.00011882728065103813, "step_time": 8.490540981292725, "tokens_per_second": 120.6048003603289, "memory_mb": 979.359375 }, { "step": 1400, "loss": 6.3421419858932495, "perplexity": 568.011682273887, "learning_rate": 9.750000000000261e-05, "step_time": 10.675879955291748, "tokens_per_second": 95.91715196201983, "memory_mb": 733.6328125 }, { "step": 1500, "loss": 6.335531115531921, "perplexity": 564.2690154518974, "learning_rate": 7.822367269231907e-05, "step_time": 8.945564270019531, "tokens_per_second": 114.47014062957085, "memory_mb": 994.3515625 }, { "step": 1600, "loss": 6.629261136054993, "perplexity": 756.9227010883292, "learning_rate": 6.158400017893925e-05, "step_time": 10.422486782073975, "tokens_per_second": 98.2491051714468, "memory_mb": 730.56640625 }, { "step": 1700, "loss": 6.302608251571655, "perplexity": 545.9941446328027, "learning_rate": 4.808657048910149e-05, "step_time": 9.476832628250122, "tokens_per_second": 108.05297932006208, "memory_mb": 745.43359375 }, { "step": 1800, "loss": 6.266754984855652, "perplexity": 526.765240241726, "learning_rate": 3.8141496193902704e-05, "step_time": 8.648972511291504, "tokens_per_second": 118.3955664864394, "memory_mb": 1009.6484375 }, { "step": 1900, "loss": 6.480456352233887, "perplexity": 652.268542568135, "learning_rate": 3.2050953343351995e-05, "step_time": 8.593879699707031, "tokens_per_second": 119.15456531639704, "memory_mb": 735.796875 }, { "step": 2000, "loss": 5.975515604019165, "perplexity": 393.6710271356782, "learning_rate": 2.9999999999999997e-05, "step_time": 8.94594669342041, "tokens_per_second": 114.46524723349116, "memory_mb": 729.84375 }, { "step": 2100, "loss": 6.554613709449768, "perplexity": 702.4777368926921, "learning_rate": 3.205095334333285e-05, "step_time": 9.199656009674072, "tokens_per_second": 111.3085096794047, "memory_mb": 752.64453125 }, { "step": 2200, "loss": 6.471360206604004, "perplexity": 646.3623155888199, "learning_rate": 3.814149619382671e-05, "step_time": 8.654725313186646, "tokens_per_second": 118.3168688715975, "memory_mb": 735.9375 }, { "step": 2300, "loss": 6.382450699806213, "perplexity": 591.3752163574818, "learning_rate": 4.808657048893273e-05, "step_time": 8.75070834159851, "tokens_per_second": 117.01909834340836, "memory_mb": 776.53125 }, { "step": 2400, "loss": 5.957324385643005, "perplexity": 386.5744152212925, "learning_rate": 6.158400017864459e-05, "step_time": 8.642782926559448, "tokens_per_second": 118.4803562349376, "memory_mb": 923.15234375 }, { "step": 2500, "loss": 6.218142509460449, "perplexity": 501.7703322170689, "learning_rate": 7.822367269186924e-05, "step_time": 9.567925691604614, "tokens_per_second": 107.0242425585004, "memory_mb": 1050.1875 }, { "step": 2600, "loss": 6.314482092857361, "perplexity": 552.515834581567, "learning_rate": 9.749999999937299e-05, "step_time": 8.212730169296265, "tokens_per_second": 124.68448115199003, "memory_mb": 738.69921875 }, { "step": 2700, "loss": 6.240906238555908, "perplexity": 513.3234937600264, "learning_rate": 0.00011882728065020969, "step_time": 8.88506555557251, "tokens_per_second": 115.24957172181702, "memory_mb": 739.125 }, { "step": 2800, "loss": 6.308051347732544, "perplexity": 548.9741461252174, "learning_rate": 0.0001415574960139285, "step_time": 9.43014669418335, "tokens_per_second": 108.58791842884247, "memory_mb": 732.40234375 }, { "step": 2900, "loss": 6.410398960113525, "perplexity": 608.136254778883, "learning_rate": 0.00016499999999874617, "step_time": 8.674461603164673, "tokens_per_second": 118.0476722182294, "memory_mb": 733.71484375 }, { "step": 3000, "loss": 6.160744071006775, "perplexity": 473.7804700263767, "learning_rate": 0.0001884425039835638, "step_time": 9.056138277053833, "tokens_per_second": 113.07247843096432, "memory_mb": 803.75390625 }, { "step": 3100, "loss": 6.270610332489014, "perplexity": 528.8000232415734, "learning_rate": 0.0002111727193472824, "step_time": 9.650378227233887, "tokens_per_second": 106.10983071214939, "memory_mb": 790.328125 }, { "step": 3200, "loss": 6.4413875341415405, "perplexity": 627.2765639068664, "learning_rate": 0.00023249999999811922, "step_time": 8.436410665512085, "tokens_per_second": 121.37863371043517, "memory_mb": 940.3671875 }, { "step": 3300, "loss": 6.163665413856506, "perplexity": 475.1665688640195, "learning_rate": 0.0002517763273056231, "step_time": 10.011188507080078, "tokens_per_second": 102.28555773131335, "memory_mb": 747.43359375 }, { "step": 3400, "loss": 6.067382216453552, "perplexity": 431.5494984476535, "learning_rate": 0.00026841599981884787, "step_time": 8.671327114105225, "tokens_per_second": 118.0903437876665, "memory_mb": 970.37109375 }, { "step": 3500, "loss": 6.14486300945282, "perplexity": 466.3157638357051, "learning_rate": 0.0002819134295085615, "step_time": 8.194751024246216, "tokens_per_second": 124.95803679333764, "memory_mb": 736.890625 }, { "step": 3600, "loss": 6.448354721069336, "perplexity": 631.6621769355031, "learning_rate": 0.00029185850380367053, "step_time": 8.76004934310913, "tokens_per_second": 116.89431872955184, "memory_mb": 781.22265625 }, { "step": 3700, "loss": 6.149544358253479, "perplexity": 468.5038682213576, "learning_rate": 0.00029794904665416755, "step_time": 11.995165824890137, "tokens_per_second": 85.36772354369505, "memory_mb": 836.3046875 }, { "step": 3800, "loss": 6.083824634552002, "perplexity": 438.70387214992155, "learning_rate": 0.0002999999999975028, "step_time": 8.194983720779419, "tokens_per_second": 124.95448861033346, "memory_mb": 766.9453125 }, { "step": 3900, "loss": 6.295181512832642, "perplexity": 541.954209109435, "learning_rate": 0.0002979490466541723, "step_time": 9.617033004760742, "tokens_per_second": 106.47774625428518, "memory_mb": 837.29296875 }, { "step": 4000, "loss": 5.752694249153137, "perplexity": 315.038309582537, "learning_rate": 0.0002918585038036804, "step_time": 10.153574228286743, "tokens_per_second": 100.85118569845567, "memory_mb": 729.46484375 } ]