aria-1b-chat / training_logs /train_log.jsonl
dkumar15's picture
Upload training_logs/train_log.jsonl with huggingface_hub
11cd458 verified
{"step": 10, "loss": 10.3607, "lr": 2.6999999999999996e-06, "tps": 277501, "tokens": 10485760, "gpu_gb": 72.1, "elapsed_s": 37.8}
{"step": 20, "loss": 8.8964, "lr": 5.7e-06, "tps": 339197, "tokens": 20971520, "gpu_gb": 72.1, "elapsed_s": 68.7}
{"step": 30, "loss": 8.2883, "lr": 8.7e-06, "tps": 338562, "tokens": 31457280, "gpu_gb": 72.1, "elapsed_s": 99.7}
{"step": 40, "loss": 7.9897, "lr": 1.1699999999999998e-05, "tps": 338458, "tokens": 41943040, "gpu_gb": 72.1, "elapsed_s": 130.7}
{"step": 50, "loss": 7.6261, "lr": 1.47e-05, "tps": 338491, "tokens": 52428800, "gpu_gb": 72.1, "elapsed_s": 161.6}
{"step": 60, "loss": 7.3566, "lr": 1.7699999999999997e-05, "tps": 338245, "tokens": 62914560, "gpu_gb": 72.1, "elapsed_s": 192.6}
{"step": 70, "loss": 7.0665, "lr": 2.07e-05, "tps": 338120, "tokens": 73400320, "gpu_gb": 72.1, "elapsed_s": 223.6}
{"step": 80, "loss": 6.8306, "lr": 2.37e-05, "tps": 338186, "tokens": 83886080, "gpu_gb": 72.1, "elapsed_s": 254.7}
{"step": 90, "loss": 6.6047, "lr": 2.67e-05, "tps": 337906, "tokens": 94371840, "gpu_gb": 72.1, "elapsed_s": 285.7}
{"step": 100, "loss": 6.4162, "lr": 2.9699999999999997e-05, "tps": 337947, "tokens": 104857600, "gpu_gb": 72.1, "elapsed_s": 316.7}
{"step": 110, "loss": 6.2156, "lr": 3.27e-05, "tps": 337815, "tokens": 115343360, "gpu_gb": 72.1, "elapsed_s": 347.8}
{"step": 120, "loss": 6.0289, "lr": 3.5699999999999994e-05, "tps": 337887, "tokens": 125829120, "gpu_gb": 72.1, "elapsed_s": 378.8}
{"step": 130, "loss": 5.8996, "lr": 3.87e-05, "tps": 337996, "tokens": 136314880, "gpu_gb": 72.1, "elapsed_s": 409.8}
{"step": 140, "loss": 5.8069, "lr": 4.17e-05, "tps": 337826, "tokens": 146800640, "gpu_gb": 72.1, "elapsed_s": 440.8}
{"step": 150, "loss": 5.6835, "lr": 4.4699999999999996e-05, "tps": 338010, "tokens": 157286400, "gpu_gb": 72.1, "elapsed_s": 471.9}
{"step": 160, "loss": 5.5708, "lr": 4.7699999999999994e-05, "tps": 337835, "tokens": 167772160, "gpu_gb": 72.1, "elapsed_s": 502.9}
{"step": 170, "loss": 5.5253, "lr": 5.069999999999999e-05, "tps": 337712, "tokens": 178257920, "gpu_gb": 72.1, "elapsed_s": 534.0}
{"step": 180, "loss": 5.4963, "lr": 5.37e-05, "tps": 337967, "tokens": 188743680, "gpu_gb": 72.1, "elapsed_s": 565.0}
{"step": 190, "loss": 5.3931, "lr": 5.6699999999999996e-05, "tps": 338082, "tokens": 199229440, "gpu_gb": 72.1, "elapsed_s": 596.0}
{"step": 200, "loss": 5.2958, "lr": 5.9699999999999994e-05, "tps": 337891, "tokens": 209715200, "gpu_gb": 72.1, "elapsed_s": 627.0}
{"step": 210, "loss": 5.2475, "lr": 6.269999999999999e-05, "tps": 338177, "tokens": 220200960, "gpu_gb": 72.1, "elapsed_s": 658.0}
{"step": 220, "loss": 5.2241, "lr": 6.57e-05, "tps": 337887, "tokens": 230686720, "gpu_gb": 72.1, "elapsed_s": 689.1}
{"step": 230, "loss": 5.1156, "lr": 6.87e-05, "tps": 338051, "tokens": 241172480, "gpu_gb": 72.1, "elapsed_s": 720.1}
{"step": 240, "loss": 5.0763, "lr": 7.17e-05, "tps": 338018, "tokens": 251658240, "gpu_gb": 72.1, "elapsed_s": 751.1}
{"step": 250, "loss": 5.0011, "lr": 7.469999999999999e-05, "tps": 338144, "tokens": 262144000, "gpu_gb": 72.1, "elapsed_s": 782.1}
{"step": 260, "loss": 4.9508, "lr": 7.769999999999999e-05, "tps": 337870, "tokens": 272629760, "gpu_gb": 72.1, "elapsed_s": 813.2}
{"step": 270, "loss": 4.8888, "lr": 8.07e-05, "tps": 338316, "tokens": 283115520, "gpu_gb": 72.1, "elapsed_s": 844.2}
{"step": 280, "loss": 4.8232, "lr": 8.37e-05, "tps": 338059, "tokens": 293601280, "gpu_gb": 72.1, "elapsed_s": 875.2}
{"step": 290, "loss": 4.757, "lr": 8.669999999999999e-05, "tps": 338073, "tokens": 304087040, "gpu_gb": 72.1, "elapsed_s": 906.2}
{"step": 300, "loss": 4.7798, "lr": 8.969999999999998e-05, "tps": 337901, "tokens": 314572800, "gpu_gb": 72.1, "elapsed_s": 937.2}
{"step": 310, "loss": 4.6782, "lr": 9.269999999999999e-05, "tps": 338178, "tokens": 325058560, "gpu_gb": 72.1, "elapsed_s": 968.2}
{"step": 320, "loss": 4.6397, "lr": 9.57e-05, "tps": 338131, "tokens": 335544320, "gpu_gb": 72.1, "elapsed_s": 999.2}
{"step": 330, "loss": 4.5985, "lr": 9.87e-05, "tps": 337935, "tokens": 346030080, "gpu_gb": 72.1, "elapsed_s": 1030.3}
{"step": 340, "loss": 4.5419, "lr": 0.00010169999999999998, "tps": 337932, "tokens": 356515840, "gpu_gb": 72.1, "elapsed_s": 1061.3}
{"step": 350, "loss": 4.4853, "lr": 0.00010469999999999998, "tps": 337852, "tokens": 367001600, "gpu_gb": 72.1, "elapsed_s": 1092.3}
{"step": 360, "loss": 4.4338, "lr": 0.00010769999999999999, "tps": 338013, "tokens": 377487360, "gpu_gb": 72.1, "elapsed_s": 1123.4}
{"step": 370, "loss": 4.3868, "lr": 0.0001107, "tps": 337940, "tokens": 387973120, "gpu_gb": 72.1, "elapsed_s": 1154.4}
{"step": 380, "loss": 4.3558, "lr": 0.0001137, "tps": 337995, "tokens": 398458880, "gpu_gb": 72.1, "elapsed_s": 1185.4}
{"step": 390, "loss": 4.349, "lr": 0.00011669999999999999, "tps": 338120, "tokens": 408944640, "gpu_gb": 72.1, "elapsed_s": 1216.4}
{"step": 400, "loss": 4.2988, "lr": 0.00011969999999999998, "tps": 337749, "tokens": 419430400, "gpu_gb": 72.1, "elapsed_s": 1247.5}
{"step": 410, "loss": 4.2473, "lr": 0.0001227, "tps": 337685, "tokens": 429916160, "gpu_gb": 72.1, "elapsed_s": 1278.5}
{"step": 420, "loss": 4.2146, "lr": 0.00012569999999999997, "tps": 337840, "tokens": 440401920, "gpu_gb": 72.1, "elapsed_s": 1309.6}
{"step": 430, "loss": 4.182, "lr": 0.00012869999999999998, "tps": 337631, "tokens": 450887680, "gpu_gb": 72.1, "elapsed_s": 1340.6}
{"step": 440, "loss": 4.1326, "lr": 0.00013169999999999998, "tps": 337926, "tokens": 461373440, "gpu_gb": 72.1, "elapsed_s": 1371.6}
{"step": 450, "loss": 4.1462, "lr": 0.0001347, "tps": 337996, "tokens": 471859200, "gpu_gb": 72.1, "elapsed_s": 1402.7}
{"step": 460, "loss": 4.1261, "lr": 0.00013769999999999999, "tps": 338029, "tokens": 482344960, "gpu_gb": 72.1, "elapsed_s": 1433.7}
{"step": 470, "loss": 4.0357, "lr": 0.0001407, "tps": 337718, "tokens": 492830720, "gpu_gb": 72.1, "elapsed_s": 1464.7}
{"step": 480, "loss": 4.0278, "lr": 0.0001437, "tps": 338129, "tokens": 503316480, "gpu_gb": 72.1, "elapsed_s": 1495.7}
{"step": 490, "loss": 4.0152, "lr": 0.0001467, "tps": 337520, "tokens": 513802240, "gpu_gb": 72.1, "elapsed_s": 1526.8}
{"step": 500, "loss": 3.9752, "lr": 0.0001497, "tps": 337775, "tokens": 524288000, "gpu_gb": 72.1, "elapsed_s": 1557.9}
{"step": 510, "loss": 3.938, "lr": 0.00015269999999999997, "tps": 338051, "tokens": 534773760, "gpu_gb": 72.1, "elapsed_s": 1588.9}
{"step": 520, "loss": 3.9036, "lr": 0.0001557, "tps": 337835, "tokens": 545259520, "gpu_gb": 72.1, "elapsed_s": 1619.9}
{"step": 530, "loss": 3.8506, "lr": 0.00015869999999999998, "tps": 338056, "tokens": 555745280, "gpu_gb": 72.1, "elapsed_s": 1650.9}
{"step": 540, "loss": 3.8862, "lr": 0.00016169999999999997, "tps": 338112, "tokens": 566231040, "gpu_gb": 72.1, "elapsed_s": 1681.9}
{"step": 550, "loss": 3.8731, "lr": 0.0001647, "tps": 338124, "tokens": 576716800, "gpu_gb": 72.1, "elapsed_s": 1713.0}
{"step": 560, "loss": 3.812, "lr": 0.00016769999999999998, "tps": 338349, "tokens": 587202560, "gpu_gb": 72.1, "elapsed_s": 1743.9}
{"step": 570, "loss": 3.8264, "lr": 0.0001707, "tps": 338069, "tokens": 597688320, "gpu_gb": 72.1, "elapsed_s": 1775.0}
{"step": 580, "loss": 3.7331, "lr": 0.0001737, "tps": 337942, "tokens": 608174080, "gpu_gb": 72.1, "elapsed_s": 1806.0}
{"step": 590, "loss": 3.7047, "lr": 0.00017669999999999999, "tps": 337828, "tokens": 618659840, "gpu_gb": 72.1, "elapsed_s": 1837.0}
{"step": 600, "loss": 3.7206, "lr": 0.00017969999999999998, "tps": 337819, "tokens": 629145600, "gpu_gb": 72.1, "elapsed_s": 1868.1}
{"step": 610, "loss": 3.7448, "lr": 0.00018269999999999997, "tps": 338088, "tokens": 639631360, "gpu_gb": 72.1, "elapsed_s": 1899.1}
{"step": 620, "loss": 3.7058, "lr": 0.0001857, "tps": 337822, "tokens": 650117120, "gpu_gb": 72.1, "elapsed_s": 1930.1}
{"step": 630, "loss": 3.6843, "lr": 0.00018869999999999998, "tps": 337929, "tokens": 660602880, "gpu_gb": 72.1, "elapsed_s": 1961.2}
{"step": 640, "loss": 3.6939, "lr": 0.00019169999999999997, "tps": 337641, "tokens": 671088640, "gpu_gb": 72.1, "elapsed_s": 1992.2}
{"step": 650, "loss": 3.736, "lr": 0.0001947, "tps": 338221, "tokens": 681574400, "gpu_gb": 72.1, "elapsed_s": 2023.2}
{"step": 660, "loss": 3.6514, "lr": 0.00019769999999999998, "tps": 338229, "tokens": 692060160, "gpu_gb": 72.1, "elapsed_s": 2054.2}
{"step": 670, "loss": 3.6638, "lr": 0.0002007, "tps": 338125, "tokens": 702545920, "gpu_gb": 72.1, "elapsed_s": 2085.2}
{"step": 680, "loss": 3.6125, "lr": 0.0002037, "tps": 338435, "tokens": 713031680, "gpu_gb": 72.1, "elapsed_s": 2116.2}
{"step": 690, "loss": 3.6003, "lr": 0.00020669999999999998, "tps": 338256, "tokens": 723517440, "gpu_gb": 72.1, "elapsed_s": 2147.2}
{"step": 700, "loss": 3.5904, "lr": 0.00020969999999999997, "tps": 338258, "tokens": 734003200, "gpu_gb": 72.1, "elapsed_s": 2178.2}
{"step": 710, "loss": 3.5695, "lr": 0.00021269999999999997, "tps": 338250, "tokens": 744488960, "gpu_gb": 72.1, "elapsed_s": 2209.2}
{"step": 720, "loss": 3.5191, "lr": 0.00021569999999999998, "tps": 338222, "tokens": 754974720, "gpu_gb": 72.1, "elapsed_s": 2240.2}
{"step": 730, "loss": 3.4846, "lr": 0.00021869999999999998, "tps": 338218, "tokens": 765460480, "gpu_gb": 72.1, "elapsed_s": 2271.2}
{"step": 740, "loss": 3.514, "lr": 0.00022169999999999997, "tps": 338343, "tokens": 775946240, "gpu_gb": 72.1, "elapsed_s": 2302.2}
{"step": 750, "loss": 3.5441, "lr": 0.0002247, "tps": 338097, "tokens": 786432000, "gpu_gb": 72.1, "elapsed_s": 2333.2}
{"step": 760, "loss": 3.5615, "lr": 0.00022769999999999998, "tps": 338351, "tokens": 796917760, "gpu_gb": 72.1, "elapsed_s": 2364.2}
{"step": 770, "loss": 3.5083, "lr": 0.0002307, "tps": 338296, "tokens": 807403520, "gpu_gb": 72.1, "elapsed_s": 2395.2}
{"step": 780, "loss": 3.4712, "lr": 0.0002337, "tps": 338280, "tokens": 817889280, "gpu_gb": 72.1, "elapsed_s": 2426.2}
{"step": 790, "loss": 3.4273, "lr": 0.00023669999999999995, "tps": 338757, "tokens": 828375040, "gpu_gb": 72.1, "elapsed_s": 2457.2}
{"step": 800, "loss": 3.4286, "lr": 0.00023969999999999997, "tps": 338241, "tokens": 838860800, "gpu_gb": 72.1, "elapsed_s": 2488.2}
{"step": 810, "loss": 3.4875, "lr": 0.00024269999999999996, "tps": 338204, "tokens": 849346560, "gpu_gb": 72.1, "elapsed_s": 2519.2}
{"step": 820, "loss": 3.4334, "lr": 0.00024569999999999995, "tps": 338359, "tokens": 859832320, "gpu_gb": 72.1, "elapsed_s": 2550.2}
{"step": 830, "loss": 3.4144, "lr": 0.0002487, "tps": 338364, "tokens": 870318080, "gpu_gb": 72.1, "elapsed_s": 2581.1}
{"step": 840, "loss": 3.3745, "lr": 0.0002517, "tps": 338038, "tokens": 880803840, "gpu_gb": 72.1, "elapsed_s": 2612.2}
{"step": 850, "loss": 3.3452, "lr": 0.00025469999999999996, "tps": 338174, "tokens": 891289600, "gpu_gb": 72.1, "elapsed_s": 2643.2}
{"step": 860, "loss": 3.4124, "lr": 0.0002577, "tps": 338120, "tokens": 901775360, "gpu_gb": 72.1, "elapsed_s": 2674.2}
{"step": 870, "loss": 3.3429, "lr": 0.0002607, "tps": 337981, "tokens": 912261120, "gpu_gb": 72.1, "elapsed_s": 2705.2}
{"step": 880, "loss": 3.3011, "lr": 0.0002637, "tps": 337680, "tokens": 922746880, "gpu_gb": 72.1, "elapsed_s": 2736.3}
{"step": 890, "loss": 3.3417, "lr": 0.0002667, "tps": 337641, "tokens": 933232640, "gpu_gb": 72.1, "elapsed_s": 2767.3}
{"step": 900, "loss": 3.4644, "lr": 0.0002697, "tps": 337972, "tokens": 943718400, "gpu_gb": 72.1, "elapsed_s": 2798.3}
{"step": 910, "loss": 3.3903, "lr": 0.0002727, "tps": 337830, "tokens": 954204160, "gpu_gb": 72.1, "elapsed_s": 2829.4}
{"step": 920, "loss": 3.2915, "lr": 0.0002757, "tps": 337785, "tokens": 964689920, "gpu_gb": 72.1, "elapsed_s": 2860.4}
{"step": 930, "loss": 3.2839, "lr": 0.0002787, "tps": 337828, "tokens": 975175680, "gpu_gb": 72.1, "elapsed_s": 2891.5}
{"step": 940, "loss": 3.2583, "lr": 0.00028169999999999996, "tps": 337773, "tokens": 985661440, "gpu_gb": 72.1, "elapsed_s": 2922.5}
{"step": 950, "loss": 3.2841, "lr": 0.0002846999999999999, "tps": 338391, "tokens": 996147200, "gpu_gb": 72.1, "elapsed_s": 2953.5}
{"step": 960, "loss": 3.2582, "lr": 0.00028769999999999995, "tps": 337559, "tokens": 1006632960, "gpu_gb": 72.1, "elapsed_s": 2984.6}
{"step": 970, "loss": 3.3341, "lr": 0.00029069999999999996, "tps": 337931, "tokens": 1017118720, "gpu_gb": 72.1, "elapsed_s": 3015.6}
{"step": 980, "loss": 3.2309, "lr": 0.0002937, "tps": 337696, "tokens": 1027604480, "gpu_gb": 72.1, "elapsed_s": 3046.6}
{"step": 990, "loss": 3.2629, "lr": 0.00029669999999999995, "tps": 337599, "tokens": 1038090240, "gpu_gb": 72.1, "elapsed_s": 3077.7}
{"step": 1000, "loss": 3.2596, "lr": 0.00029969999999999997, "tps": 337745, "tokens": 1048576000, "gpu_gb": 72.1, "elapsed_s": 3108.8}
{"step": 1010, "loss": 3.2616, "lr": 0.0003, "tps": 143749, "tokens": 1059061760, "gpu_gb": 72.1, "elapsed_s": 3181.7}
{"step": 1020, "loss": 3.1904, "lr": 0.0003, "tps": 337809, "tokens": 1069547520, "gpu_gb": 72.1, "elapsed_s": 3212.7}
{"step": 1030, "loss": 3.2038, "lr": 0.0003, "tps": 337754, "tokens": 1080033280, "gpu_gb": 72.1, "elapsed_s": 3243.8}
{"step": 1040, "loss": 3.2201, "lr": 0.0003, "tps": 337688, "tokens": 1090519040, "gpu_gb": 72.1, "elapsed_s": 3274.8}
{"step": 1050, "loss": 3.1965, "lr": 0.0003, "tps": 337742, "tokens": 1101004800, "gpu_gb": 72.1, "elapsed_s": 3305.9}
{"step": 1060, "loss": 3.1839, "lr": 0.0003, "tps": 337780, "tokens": 1111490560, "gpu_gb": 72.1, "elapsed_s": 3336.9}
{"step": 1070, "loss": 3.1744, "lr": 0.0003, "tps": 337809, "tokens": 1121976320, "gpu_gb": 72.1, "elapsed_s": 3368.0}
{"step": 1080, "loss": 3.1328, "lr": 0.0003, "tps": 337642, "tokens": 1132462080, "gpu_gb": 72.1, "elapsed_s": 3399.0}
{"step": 1090, "loss": 3.1862, "lr": 0.0003, "tps": 337525, "tokens": 1142947840, "gpu_gb": 72.1, "elapsed_s": 3430.1}
{"step": 1100, "loss": 3.2287, "lr": 0.0003, "tps": 337658, "tokens": 1153433600, "gpu_gb": 72.1, "elapsed_s": 3461.1}
{"step": 1110, "loss": 3.087, "lr": 0.0003, "tps": 337500, "tokens": 1163919360, "gpu_gb": 72.1, "elapsed_s": 3492.2}
{"step": 1120, "loss": 3.0762, "lr": 0.0003, "tps": 338131, "tokens": 1174405120, "gpu_gb": 72.1, "elapsed_s": 3523.2}
{"step": 1130, "loss": 3.0911, "lr": 0.0003, "tps": 337471, "tokens": 1184890880, "gpu_gb": 72.1, "elapsed_s": 3554.3}
{"step": 1140, "loss": 3.1268, "lr": 0.0003, "tps": 337735, "tokens": 1195376640, "gpu_gb": 72.1, "elapsed_s": 3585.3}
{"step": 1150, "loss": 3.0484, "lr": 0.0003, "tps": 337787, "tokens": 1205862400, "gpu_gb": 72.1, "elapsed_s": 3616.4}
{"step": 1160, "loss": 3.0923, "lr": 0.0003, "tps": 337936, "tokens": 1216348160, "gpu_gb": 72.1, "elapsed_s": 3647.4}
{"step": 1170, "loss": 3.0839, "lr": 0.0003, "tps": 337549, "tokens": 1226833920, "gpu_gb": 72.1, "elapsed_s": 3678.5}
{"step": 1180, "loss": 3.086, "lr": 0.0003, "tps": 337683, "tokens": 1237319680, "gpu_gb": 72.1, "elapsed_s": 3709.5}
{"step": 1190, "loss": 3.1308, "lr": 0.0003, "tps": 337728, "tokens": 1247805440, "gpu_gb": 72.1, "elapsed_s": 3740.6}
{"step": 1200, "loss": 3.0694, "lr": 0.0003, "tps": 337549, "tokens": 1258291200, "gpu_gb": 72.1, "elapsed_s": 3771.6}
{"step": 1210, "loss": 3.086, "lr": 0.0003, "tps": 337753, "tokens": 1268776960, "gpu_gb": 72.1, "elapsed_s": 3802.7}
{"step": 1220, "loss": 3.0563, "lr": 0.0003, "tps": 337935, "tokens": 1279262720, "gpu_gb": 72.1, "elapsed_s": 3833.7}
{"step": 1230, "loss": 3.0831, "lr": 0.0003, "tps": 337705, "tokens": 1289748480, "gpu_gb": 72.1, "elapsed_s": 3864.8}
{"step": 1240, "loss": 3.0273, "lr": 0.0003, "tps": 337440, "tokens": 1300234240, "gpu_gb": 72.1, "elapsed_s": 3895.8}
{"step": 1250, "loss": 3.0422, "lr": 0.0003, "tps": 337525, "tokens": 1310720000, "gpu_gb": 72.1, "elapsed_s": 3926.9}
{"step": 1260, "loss": 3.0678, "lr": 0.0003, "tps": 337741, "tokens": 1321205760, "gpu_gb": 72.1, "elapsed_s": 3958.0}
{"step": 1270, "loss": 3.0452, "lr": 0.0003, "tps": 337573, "tokens": 1331691520, "gpu_gb": 72.1, "elapsed_s": 3989.0}
{"step": 1280, "loss": 3.022, "lr": 0.0003, "tps": 337966, "tokens": 1342177280, "gpu_gb": 72.1, "elapsed_s": 4020.1}
{"step": 1290, "loss": 2.9902, "lr": 0.0003, "tps": 337639, "tokens": 1352663040, "gpu_gb": 72.1, "elapsed_s": 4051.1}
{"step": 1300, "loss": 3.0195, "lr": 0.0003, "tps": 337706, "tokens": 1363148800, "gpu_gb": 72.1, "elapsed_s": 4082.2}
{"step": 1310, "loss": 3.0243, "lr": 0.0003, "tps": 337257, "tokens": 1373634560, "gpu_gb": 72.1, "elapsed_s": 4113.2}
{"step": 1320, "loss": 2.9802, "lr": 0.0003, "tps": 337687, "tokens": 1384120320, "gpu_gb": 72.1, "elapsed_s": 4144.3}
{"step": 1330, "loss": 2.9939, "lr": 0.0003, "tps": 337334, "tokens": 1394606080, "gpu_gb": 72.1, "elapsed_s": 4175.4}
{"step": 1340, "loss": 3.0147, "lr": 0.0003, "tps": 337945, "tokens": 1405091840, "gpu_gb": 72.1, "elapsed_s": 4206.4}
{"step": 1350, "loss": 3.0119, "lr": 0.0003, "tps": 337681, "tokens": 1415577600, "gpu_gb": 72.1, "elapsed_s": 4237.5}
{"step": 1360, "loss": 2.9667, "lr": 0.0003, "tps": 337695, "tokens": 1426063360, "gpu_gb": 72.1, "elapsed_s": 4268.5}
{"step": 1370, "loss": 2.9969, "lr": 0.0003, "tps": 337628, "tokens": 1436549120, "gpu_gb": 72.1, "elapsed_s": 4299.6}
{"step": 1380, "loss": 2.9876, "lr": 0.0003, "tps": 337848, "tokens": 1447034880, "gpu_gb": 72.1, "elapsed_s": 4330.6}
{"step": 1390, "loss": 2.9825, "lr": 0.0003, "tps": 337570, "tokens": 1457520640, "gpu_gb": 72.1, "elapsed_s": 4361.7}
{"step": 1400, "loss": 2.9958, "lr": 0.0003, "tps": 337662, "tokens": 1468006400, "gpu_gb": 72.1, "elapsed_s": 4392.7}
{"step": 1410, "loss": 2.9418, "lr": 0.0003, "tps": 337788, "tokens": 1478492160, "gpu_gb": 72.1, "elapsed_s": 4423.8}
{"step": 1420, "loss": 2.9457, "lr": 0.0003, "tps": 337628, "tokens": 1488977920, "gpu_gb": 72.1, "elapsed_s": 4454.8}
{"step": 1430, "loss": 3.0208, "lr": 0.0003, "tps": 337270, "tokens": 1499463680, "gpu_gb": 72.1, "elapsed_s": 4485.9}
{"step": 1440, "loss": 2.9686, "lr": 0.0003, "tps": 337800, "tokens": 1509949440, "gpu_gb": 72.1, "elapsed_s": 4517.0}
{"step": 1450, "loss": 2.9496, "lr": 0.0003, "tps": 337275, "tokens": 1520435200, "gpu_gb": 72.1, "elapsed_s": 4548.1}
{"step": 1460, "loss": 2.9548, "lr": 0.0003, "tps": 337804, "tokens": 1530920960, "gpu_gb": 72.1, "elapsed_s": 4579.1}
{"step": 1470, "loss": 2.9631, "lr": 0.0003, "tps": 337747, "tokens": 1541406720, "gpu_gb": 72.1, "elapsed_s": 4610.1}
{"step": 1480, "loss": 2.9061, "lr": 0.0003, "tps": 337839, "tokens": 1551892480, "gpu_gb": 72.1, "elapsed_s": 4641.2}
{"step": 1490, "loss": 2.9457, "lr": 0.0003, "tps": 337516, "tokens": 1562378240, "gpu_gb": 72.1, "elapsed_s": 4672.2}
{"step": 1500, "loss": 2.9158, "lr": 0.0003, "tps": 337715, "tokens": 1572864000, "gpu_gb": 72.1, "elapsed_s": 4703.3}
{"step": 1510, "loss": 2.9082, "lr": 0.0003, "tps": 337724, "tokens": 1583349760, "gpu_gb": 72.1, "elapsed_s": 4734.3}
{"step": 1520, "loss": 2.891, "lr": 0.0003, "tps": 337518, "tokens": 1593835520, "gpu_gb": 72.1, "elapsed_s": 4765.4}
{"step": 1530, "loss": 2.9157, "lr": 0.0003, "tps": 337831, "tokens": 1604321280, "gpu_gb": 72.1, "elapsed_s": 4796.4}
{"step": 1540, "loss": 2.9304, "lr": 0.0003, "tps": 337528, "tokens": 1614807040, "gpu_gb": 72.1, "elapsed_s": 4827.5}
{"step": 1550, "loss": 2.9203, "lr": 0.0003, "tps": 337590, "tokens": 1625292800, "gpu_gb": 72.1, "elapsed_s": 4858.6}
{"step": 1560, "loss": 2.9285, "lr": 0.0003, "tps": 337913, "tokens": 1635778560, "gpu_gb": 72.1, "elapsed_s": 4889.6}
{"step": 1570, "loss": 2.8671, "lr": 0.0003, "tps": 337538, "tokens": 1646264320, "gpu_gb": 72.1, "elapsed_s": 4920.7}
{"step": 1580, "loss": 2.8861, "lr": 0.0003, "tps": 337620, "tokens": 1656750080, "gpu_gb": 72.1, "elapsed_s": 4951.7}
{"step": 1590, "loss": 2.9128, "lr": 0.0003, "tps": 337484, "tokens": 1667235840, "gpu_gb": 72.1, "elapsed_s": 4982.8}
{"step": 1600, "loss": 2.9115, "lr": 0.0003, "tps": 337490, "tokens": 1677721600, "gpu_gb": 72.1, "elapsed_s": 5013.9}
{"step": 1610, "loss": 2.8903, "lr": 0.0003, "tps": 337730, "tokens": 1688207360, "gpu_gb": 72.1, "elapsed_s": 5044.9}
{"step": 1620, "loss": 2.8658, "lr": 0.0003, "tps": 337863, "tokens": 1698693120, "gpu_gb": 72.1, "elapsed_s": 5076.0}
{"step": 1630, "loss": 2.9027, "lr": 0.0003, "tps": 337856, "tokens": 1709178880, "gpu_gb": 72.1, "elapsed_s": 5107.0}
{"step": 1640, "loss": 2.8842, "lr": 0.0003, "tps": 337675, "tokens": 1719664640, "gpu_gb": 72.1, "elapsed_s": 5138.0}
{"step": 1650, "loss": 2.894, "lr": 0.0003, "tps": 337836, "tokens": 1730150400, "gpu_gb": 72.1, "elapsed_s": 5169.1}
{"step": 1660, "loss": 2.8836, "lr": 0.0003, "tps": 337548, "tokens": 1740636160, "gpu_gb": 72.1, "elapsed_s": 5200.1}
{"step": 1670, "loss": 2.8902, "lr": 0.0003, "tps": 337662, "tokens": 1751121920, "gpu_gb": 72.1, "elapsed_s": 5231.2}
{"step": 1680, "loss": 2.8719, "lr": 0.0003, "tps": 337686, "tokens": 1761607680, "gpu_gb": 72.1, "elapsed_s": 5262.3}
{"step": 1690, "loss": 2.8742, "lr": 0.0003, "tps": 337716, "tokens": 1772093440, "gpu_gb": 72.1, "elapsed_s": 5293.3}
{"step": 1700, "loss": 2.858, "lr": 0.0003, "tps": 337814, "tokens": 1782579200, "gpu_gb": 72.1, "elapsed_s": 5324.3}
{"step": 1710, "loss": 2.9222, "lr": 0.0003, "tps": 337563, "tokens": 1793064960, "gpu_gb": 72.1, "elapsed_s": 5355.4}
{"step": 1720, "loss": 2.8431, "lr": 0.0003, "tps": 337502, "tokens": 1803550720, "gpu_gb": 72.1, "elapsed_s": 5386.5}
{"step": 1730, "loss": 2.8559, "lr": 0.0003, "tps": 337462, "tokens": 1814036480, "gpu_gb": 72.1, "elapsed_s": 5417.5}
{"step": 1740, "loss": 2.874, "lr": 0.0003, "tps": 337648, "tokens": 1824522240, "gpu_gb": 72.1, "elapsed_s": 5448.6}
{"step": 1750, "loss": 2.8356, "lr": 0.0003, "tps": 337738, "tokens": 1835008000, "gpu_gb": 72.1, "elapsed_s": 5479.7}
{"step": 1760, "loss": 2.8684, "lr": 0.0003, "tps": 337720, "tokens": 1845493760, "gpu_gb": 72.1, "elapsed_s": 5510.7}
{"step": 1770, "loss": 2.8664, "lr": 0.0003, "tps": 337471, "tokens": 1855979520, "gpu_gb": 72.1, "elapsed_s": 5541.8}
{"step": 1780, "loss": 2.8539, "lr": 0.0003, "tps": 337865, "tokens": 1866465280, "gpu_gb": 72.1, "elapsed_s": 5572.8}
{"step": 1790, "loss": 2.8336, "lr": 0.0003, "tps": 337679, "tokens": 1876951040, "gpu_gb": 72.1, "elapsed_s": 5603.9}
{"step": 1800, "loss": 2.8886, "lr": 0.0003, "tps": 338123, "tokens": 1887436800, "gpu_gb": 72.1, "elapsed_s": 5634.9}
{"step": 1810, "loss": 2.8994, "lr": 0.0003, "tps": 337568, "tokens": 1897922560, "gpu_gb": 72.1, "elapsed_s": 5665.9}
{"step": 1820, "loss": 2.8435, "lr": 0.0003, "tps": 337583, "tokens": 1908408320, "gpu_gb": 72.1, "elapsed_s": 5697.0}
{"step": 1830, "loss": 2.8561, "lr": 0.0003, "tps": 337604, "tokens": 1918894080, "gpu_gb": 72.1, "elapsed_s": 5728.1}
{"step": 1840, "loss": 2.8222, "lr": 0.0003, "tps": 337671, "tokens": 1929379840, "gpu_gb": 72.1, "elapsed_s": 5759.1}
{"step": 1850, "loss": 2.8507, "lr": 0.0003, "tps": 337813, "tokens": 1939865600, "gpu_gb": 72.1, "elapsed_s": 5790.2}
{"step": 1860, "loss": 2.8311, "lr": 0.0003, "tps": 337510, "tokens": 1950351360, "gpu_gb": 72.1, "elapsed_s": 5821.2}
{"step": 1870, "loss": 2.8509, "lr": 0.0003, "tps": 337698, "tokens": 1960837120, "gpu_gb": 72.1, "elapsed_s": 5852.3}
{"step": 1880, "loss": 2.818, "lr": 0.0003, "tps": 337626, "tokens": 1971322880, "gpu_gb": 72.1, "elapsed_s": 5883.3}
{"step": 1890, "loss": 2.8317, "lr": 0.0003, "tps": 337960, "tokens": 1981808640, "gpu_gb": 72.1, "elapsed_s": 5914.4}
{"step": 1900, "loss": 2.8199, "lr": 0.0003, "tps": 337644, "tokens": 1992294400, "gpu_gb": 72.1, "elapsed_s": 5945.4}
{"step": 1910, "loss": 2.867, "lr": 0.0003, "tps": 337366, "tokens": 2002780160, "gpu_gb": 72.1, "elapsed_s": 5976.5}
{"step": 1920, "loss": 2.805, "lr": 0.0003, "tps": 337817, "tokens": 2013265920, "gpu_gb": 72.1, "elapsed_s": 6007.5}
{"step": 1930, "loss": 2.7798, "lr": 0.0003, "tps": 337828, "tokens": 2023751680, "gpu_gb": 72.1, "elapsed_s": 6038.6}
{"step": 1940, "loss": 2.7985, "lr": 0.0003, "tps": 337747, "tokens": 2034237440, "gpu_gb": 72.1, "elapsed_s": 6069.6}
{"step": 1950, "loss": 2.8329, "lr": 0.0003, "tps": 337720, "tokens": 2044723200, "gpu_gb": 72.1, "elapsed_s": 6100.7}
{"step": 1960, "loss": 2.8554, "lr": 0.0003, "tps": 337750, "tokens": 2055208960, "gpu_gb": 72.1, "elapsed_s": 6131.7}
{"step": 1970, "loss": 2.8135, "lr": 0.0003, "tps": 337858, "tokens": 2065694720, "gpu_gb": 72.1, "elapsed_s": 6162.7}
{"step": 1980, "loss": 2.8514, "lr": 0.0003, "tps": 337572, "tokens": 2076180480, "gpu_gb": 72.1, "elapsed_s": 6193.8}
{"step": 1990, "loss": 2.8521, "lr": 0.0003, "tps": 337474, "tokens": 2086666240, "gpu_gb": 72.1, "elapsed_s": 6224.9}
{"step": 2000, "loss": 2.7694, "lr": 0.0003, "tps": 337412, "tokens": 2097152000, "gpu_gb": 72.1, "elapsed_s": 6256.0}
{"step": 2010, "loss": 2.8209, "lr": 0.0003, "tps": 148844, "tokens": 2107637760, "gpu_gb": 72.1, "elapsed_s": 6326.4}
{"step": 2020, "loss": 2.7727, "lr": 0.0003, "tps": 337702, "tokens": 2118123520, "gpu_gb": 72.1, "elapsed_s": 6357.5}
{"step": 2030, "loss": 2.7783, "lr": 0.0003, "tps": 337892, "tokens": 2128609280, "gpu_gb": 72.1, "elapsed_s": 6388.5}
{"step": 2040, "loss": 2.7768, "lr": 0.0003, "tps": 337427, "tokens": 2139095040, "gpu_gb": 72.1, "elapsed_s": 6419.6}
{"step": 2050, "loss": 2.788, "lr": 0.0003, "tps": 337517, "tokens": 2149580800, "gpu_gb": 72.1, "elapsed_s": 6450.6}
{"step": 2060, "loss": 2.7512, "lr": 0.0003, "tps": 337628, "tokens": 2160066560, "gpu_gb": 72.1, "elapsed_s": 6481.7}
{"step": 2070, "loss": 2.7701, "lr": 0.0003, "tps": 337588, "tokens": 2170552320, "gpu_gb": 72.1, "elapsed_s": 6512.8}
{"step": 2080, "loss": 2.7239, "lr": 0.0003, "tps": 337632, "tokens": 2181038080, "gpu_gb": 72.1, "elapsed_s": 6543.8}
{"step": 2090, "loss": 2.752, "lr": 0.0003, "tps": 337749, "tokens": 2191523840, "gpu_gb": 72.1, "elapsed_s": 6574.9}
{"step": 2100, "loss": 2.7798, "lr": 0.0003, "tps": 337302, "tokens": 2202009600, "gpu_gb": 72.1, "elapsed_s": 6605.9}
{"step": 2110, "loss": 2.7465, "lr": 0.0003, "tps": 337428, "tokens": 2212495360, "gpu_gb": 72.1, "elapsed_s": 6637.0}
{"step": 2120, "loss": 2.7743, "lr": 0.0003, "tps": 337768, "tokens": 2222981120, "gpu_gb": 72.1, "elapsed_s": 6668.1}
{"step": 2130, "loss": 2.7738, "lr": 0.0003, "tps": 337739, "tokens": 2233466880, "gpu_gb": 72.1, "elapsed_s": 6699.1}
{"step": 2140, "loss": 2.7569, "lr": 0.0003, "tps": 337634, "tokens": 2243952640, "gpu_gb": 72.1, "elapsed_s": 6730.2}
{"step": 2150, "loss": 2.8012, "lr": 0.0003, "tps": 337662, "tokens": 2254438400, "gpu_gb": 72.1, "elapsed_s": 6761.2}
{"step": 2160, "loss": 2.7563, "lr": 0.0003, "tps": 337381, "tokens": 2264924160, "gpu_gb": 72.1, "elapsed_s": 6792.3}
{"step": 2170, "loss": 2.7568, "lr": 0.0003, "tps": 337521, "tokens": 2275409920, "gpu_gb": 72.1, "elapsed_s": 6823.4}
{"step": 2180, "loss": 2.7538, "lr": 0.0003, "tps": 337525, "tokens": 2285895680, "gpu_gb": 72.1, "elapsed_s": 6854.4}
{"step": 2190, "loss": 2.8106, "lr": 0.0003, "tps": 337510, "tokens": 2296381440, "gpu_gb": 72.1, "elapsed_s": 6885.5}
{"step": 2200, "loss": 2.6628, "lr": 0.0003, "tps": 337385, "tokens": 2306867200, "gpu_gb": 72.1, "elapsed_s": 6916.6}
{"step": 2210, "loss": 2.7014, "lr": 0.0003, "tps": 337430, "tokens": 2317352960, "gpu_gb": 72.1, "elapsed_s": 6947.7}
{"step": 2220, "loss": 2.7307, "lr": 0.0003, "tps": 337463, "tokens": 2327838720, "gpu_gb": 72.1, "elapsed_s": 6978.7}
{"step": 2230, "loss": 2.7327, "lr": 0.0003, "tps": 337440, "tokens": 2338324480, "gpu_gb": 72.1, "elapsed_s": 7009.8}
{"step": 2240, "loss": 2.785, "lr": 0.0003, "tps": 337469, "tokens": 2348810240, "gpu_gb": 72.1, "elapsed_s": 7040.9}
{"step": 2250, "loss": 2.7329, "lr": 0.0003, "tps": 337639, "tokens": 2359296000, "gpu_gb": 72.1, "elapsed_s": 7071.9}
{"step": 2260, "loss": 2.7351, "lr": 0.0003, "tps": 337644, "tokens": 2369781760, "gpu_gb": 72.1, "elapsed_s": 7103.0}
{"step": 2270, "loss": 2.69, "lr": 0.0003, "tps": 337442, "tokens": 2380267520, "gpu_gb": 72.1, "elapsed_s": 7134.1}
{"step": 2280, "loss": 2.7336, "lr": 0.0003, "tps": 337373, "tokens": 2390753280, "gpu_gb": 72.1, "elapsed_s": 7165.2}
{"step": 2290, "loss": 2.756, "lr": 0.0003, "tps": 337232, "tokens": 2401239040, "gpu_gb": 72.1, "elapsed_s": 7196.2}
{"step": 2300, "loss": 2.7134, "lr": 0.0003, "tps": 337575, "tokens": 2411724800, "gpu_gb": 72.1, "elapsed_s": 7227.3}
{"step": 2310, "loss": 2.7403, "lr": 0.0003, "tps": 337523, "tokens": 2422210560, "gpu_gb": 72.1, "elapsed_s": 7258.4}
{"step": 2320, "loss": 2.7371, "lr": 0.0003, "tps": 337572, "tokens": 2432696320, "gpu_gb": 72.1, "elapsed_s": 7289.4}
{"step": 2330, "loss": 2.7039, "lr": 0.0003, "tps": 337702, "tokens": 2443182080, "gpu_gb": 72.1, "elapsed_s": 7320.5}
{"step": 2340, "loss": 2.6991, "lr": 0.0003, "tps": 337671, "tokens": 2453667840, "gpu_gb": 72.1, "elapsed_s": 7351.5}
{"step": 2350, "loss": 2.7573, "lr": 0.0003, "tps": 337463, "tokens": 2464153600, "gpu_gb": 72.1, "elapsed_s": 7382.6}
{"step": 2360, "loss": 2.7449, "lr": 0.0003, "tps": 337427, "tokens": 2474639360, "gpu_gb": 72.1, "elapsed_s": 7413.7}
{"step": 2370, "loss": 2.7163, "lr": 0.0003, "tps": 337243, "tokens": 2485125120, "gpu_gb": 72.1, "elapsed_s": 7444.8}
{"step": 2380, "loss": 2.7419, "lr": 0.0003, "tps": 337661, "tokens": 2495610880, "gpu_gb": 72.1, "elapsed_s": 7475.8}
{"step": 2390, "loss": 2.7461, "lr": 0.0003, "tps": 337784, "tokens": 2506096640, "gpu_gb": 72.1, "elapsed_s": 7506.9}
{"step": 2400, "loss": 2.7353, "lr": 0.0003, "tps": 337439, "tokens": 2516582400, "gpu_gb": 72.1, "elapsed_s": 7538.0}
{"step": 2410, "loss": 2.6917, "lr": 0.0003, "tps": 337734, "tokens": 2527068160, "gpu_gb": 72.1, "elapsed_s": 7569.0}
{"step": 2420, "loss": 2.7321, "lr": 0.0003, "tps": 337454, "tokens": 2537553920, "gpu_gb": 72.1, "elapsed_s": 7600.1}
{"step": 2430, "loss": 2.7214, "lr": 0.0003, "tps": 337576, "tokens": 2548039680, "gpu_gb": 72.1, "elapsed_s": 7631.1}
{"step": 2440, "loss": 2.7282, "lr": 0.0003, "tps": 337108, "tokens": 2558525440, "gpu_gb": 72.1, "elapsed_s": 7662.2}
{"step": 2450, "loss": 2.7719, "lr": 0.0003, "tps": 337398, "tokens": 2569011200, "gpu_gb": 72.1, "elapsed_s": 7693.3}
{"step": 2460, "loss": 2.7202, "lr": 0.0003, "tps": 337389, "tokens": 2579496960, "gpu_gb": 72.1, "elapsed_s": 7724.4}
{"step": 2470, "loss": 2.7131, "lr": 0.0003, "tps": 337692, "tokens": 2589982720, "gpu_gb": 72.1, "elapsed_s": 7755.5}
{"step": 2480, "loss": 2.7159, "lr": 0.0003, "tps": 337631, "tokens": 2600468480, "gpu_gb": 72.1, "elapsed_s": 7786.5}
{"step": 2490, "loss": 2.7145, "lr": 0.0003, "tps": 337445, "tokens": 2610954240, "gpu_gb": 72.1, "elapsed_s": 7817.6}
{"step": 2500, "loss": 2.7106, "lr": 0.0003, "tps": 337381, "tokens": 2621440000, "gpu_gb": 72.1, "elapsed_s": 7848.7}
{"step": 2510, "loss": 2.7215, "lr": 0.0003, "tps": 337390, "tokens": 2631925760, "gpu_gb": 72.1, "elapsed_s": 7879.7}
{"step": 2520, "loss": 2.7288, "lr": 0.0003, "tps": 337474, "tokens": 2642411520, "gpu_gb": 72.1, "elapsed_s": 7910.8}
{"step": 2530, "loss": 2.7071, "lr": 0.0003, "tps": 337630, "tokens": 2652897280, "gpu_gb": 72.1, "elapsed_s": 7941.9}
{"step": 2540, "loss": 2.6876, "lr": 0.0003, "tps": 337497, "tokens": 2663383040, "gpu_gb": 72.1, "elapsed_s": 7972.9}
{"step": 2550, "loss": 2.6851, "lr": 0.0003, "tps": 337788, "tokens": 2673868800, "gpu_gb": 72.1, "elapsed_s": 8004.0}
{"step": 2560, "loss": 2.6703, "lr": 0.0003, "tps": 337471, "tokens": 2684354560, "gpu_gb": 72.1, "elapsed_s": 8035.1}
{"step": 2570, "loss": 2.7032, "lr": 0.0003, "tps": 337498, "tokens": 2694840320, "gpu_gb": 72.1, "elapsed_s": 8066.1}
{"step": 2580, "loss": 2.7158, "lr": 0.0003, "tps": 337730, "tokens": 2705326080, "gpu_gb": 72.1, "elapsed_s": 8097.2}
{"step": 2590, "loss": 2.6782, "lr": 0.0003, "tps": 337339, "tokens": 2715811840, "gpu_gb": 72.1, "elapsed_s": 8128.3}
{"step": 2600, "loss": 2.7377, "lr": 0.0003, "tps": 337567, "tokens": 2726297600, "gpu_gb": 72.1, "elapsed_s": 8159.3}
{"step": 2610, "loss": 2.7146, "lr": 0.0003, "tps": 337563, "tokens": 2736783360, "gpu_gb": 72.1, "elapsed_s": 8190.4}
{"step": 2620, "loss": 2.6989, "lr": 0.0003, "tps": 337496, "tokens": 2747269120, "gpu_gb": 72.1, "elapsed_s": 8221.5}
{"step": 2630, "loss": 2.7155, "lr": 0.0003, "tps": 337525, "tokens": 2757754880, "gpu_gb": 72.1, "elapsed_s": 8252.5}
{"step": 2640, "loss": 2.7081, "lr": 0.0003, "tps": 337513, "tokens": 2768240640, "gpu_gb": 72.1, "elapsed_s": 8283.6}
{"step": 2650, "loss": 2.7105, "lr": 0.0003, "tps": 337857, "tokens": 2778726400, "gpu_gb": 72.1, "elapsed_s": 8314.6}
{"step": 2660, "loss": 2.6405, "lr": 0.0003, "tps": 337354, "tokens": 2789212160, "gpu_gb": 72.1, "elapsed_s": 8345.7}
{"step": 2670, "loss": 2.6994, "lr": 0.0003, "tps": 337365, "tokens": 2799697920, "gpu_gb": 72.1, "elapsed_s": 8376.8}
{"step": 2680, "loss": 2.7194, "lr": 0.0003, "tps": 337143, "tokens": 2810183680, "gpu_gb": 72.1, "elapsed_s": 8407.9}
{"step": 2690, "loss": 2.7013, "lr": 0.0003, "tps": 337553, "tokens": 2820669440, "gpu_gb": 72.1, "elapsed_s": 8439.0}
{"step": 2700, "loss": 2.6697, "lr": 0.0003, "tps": 337479, "tokens": 2831155200, "gpu_gb": 72.1, "elapsed_s": 8470.0}
{"step": 2710, "loss": 2.663, "lr": 0.0003, "tps": 337581, "tokens": 2841640960, "gpu_gb": 72.1, "elapsed_s": 8501.1}
{"step": 2720, "loss": 2.6602, "lr": 0.0003, "tps": 337573, "tokens": 2852126720, "gpu_gb": 72.1, "elapsed_s": 8532.2}
{"step": 2730, "loss": 2.6886, "lr": 0.0003, "tps": 337688, "tokens": 2862612480, "gpu_gb": 72.1, "elapsed_s": 8563.2}
{"step": 2740, "loss": 2.6623, "lr": 0.0003, "tps": 337718, "tokens": 2873098240, "gpu_gb": 72.1, "elapsed_s": 8594.3}
{"step": 2750, "loss": 2.6623, "lr": 0.0003, "tps": 337548, "tokens": 2883584000, "gpu_gb": 72.1, "elapsed_s": 8625.3}
{"step": 2760, "loss": 2.6478, "lr": 0.0003, "tps": 337844, "tokens": 2894069760, "gpu_gb": 72.1, "elapsed_s": 8656.4}
{"step": 2770, "loss": 2.6929, "lr": 0.0003, "tps": 337450, "tokens": 2904555520, "gpu_gb": 72.1, "elapsed_s": 8687.4}
{"step": 2780, "loss": 2.7051, "lr": 0.0003, "tps": 337607, "tokens": 2915041280, "gpu_gb": 72.1, "elapsed_s": 8718.5}
{"step": 2790, "loss": 2.6803, "lr": 0.0003, "tps": 337385, "tokens": 2925527040, "gpu_gb": 72.1, "elapsed_s": 8749.6}
{"step": 2800, "loss": 2.6846, "lr": 0.0003, "tps": 337645, "tokens": 2936012800, "gpu_gb": 72.1, "elapsed_s": 8780.6}
{"step": 2810, "loss": 2.7239, "lr": 0.0003, "tps": 337624, "tokens": 2946498560, "gpu_gb": 72.1, "elapsed_s": 8811.7}
{"step": 2820, "loss": 2.7077, "lr": 0.0003, "tps": 337945, "tokens": 2956984320, "gpu_gb": 72.1, "elapsed_s": 8842.7}
{"step": 2830, "loss": 2.6419, "lr": 0.0003, "tps": 337444, "tokens": 2967470080, "gpu_gb": 72.1, "elapsed_s": 8873.8}
{"step": 2840, "loss": 2.6381, "lr": 0.0003, "tps": 337467, "tokens": 2977955840, "gpu_gb": 72.1, "elapsed_s": 8904.9}
{"step": 2850, "loss": 2.6325, "lr": 0.0003, "tps": 337664, "tokens": 2988441600, "gpu_gb": 72.1, "elapsed_s": 8935.9}
{"step": 2860, "loss": 2.6561, "lr": 0.0003, "tps": 337423, "tokens": 2998927360, "gpu_gb": 72.1, "elapsed_s": 8967.0}
{"step": 2870, "loss": 2.6655, "lr": 0.0003, "tps": 337505, "tokens": 3009413120, "gpu_gb": 72.1, "elapsed_s": 8998.1}
{"step": 2880, "loss": 2.6223, "lr": 0.0003, "tps": 337460, "tokens": 3019898880, "gpu_gb": 72.1, "elapsed_s": 9029.1}
{"step": 2890, "loss": 2.6552, "lr": 0.0003, "tps": 337644, "tokens": 3030384640, "gpu_gb": 72.1, "elapsed_s": 9060.2}
{"step": 2900, "loss": 2.6251, "lr": 0.0003, "tps": 337465, "tokens": 3040870400, "gpu_gb": 72.1, "elapsed_s": 9091.3}
{"step": 2910, "loss": 2.6517, "lr": 0.0003, "tps": 337353, "tokens": 3051356160, "gpu_gb": 72.1, "elapsed_s": 9122.3}
{"step": 2920, "loss": 2.6914, "lr": 0.0003, "tps": 337569, "tokens": 3061841920, "gpu_gb": 72.1, "elapsed_s": 9153.4}
{"step": 2930, "loss": 2.6208, "lr": 0.0003, "tps": 337484, "tokens": 3072327680, "gpu_gb": 72.1, "elapsed_s": 9184.5}
{"step": 2940, "loss": 2.7086, "lr": 0.0003, "tps": 337503, "tokens": 3082813440, "gpu_gb": 72.1, "elapsed_s": 9215.5}
{"step": 2950, "loss": 2.6339, "lr": 0.0003, "tps": 337703, "tokens": 3093299200, "gpu_gb": 72.1, "elapsed_s": 9246.6}
{"step": 2960, "loss": 2.6655, "lr": 0.0003, "tps": 337630, "tokens": 3103784960, "gpu_gb": 72.1, "elapsed_s": 9277.6}
{"step": 2970, "loss": 2.6395, "lr": 0.0003, "tps": 337813, "tokens": 3114270720, "gpu_gb": 72.1, "elapsed_s": 9308.7}
{"step": 2980, "loss": 2.6356, "lr": 0.0003, "tps": 337601, "tokens": 3124756480, "gpu_gb": 72.1, "elapsed_s": 9339.7}
{"step": 2990, "loss": 2.7013, "lr": 0.0003, "tps": 337698, "tokens": 3135242240, "gpu_gb": 72.1, "elapsed_s": 9370.8}
{"step": 3000, "loss": 2.6204, "lr": 0.0003, "tps": 337738, "tokens": 3145728000, "gpu_gb": 72.1, "elapsed_s": 9401.8}
{"step": 3010, "loss": 2.6464, "lr": 0.0003, "tps": 164373, "tokens": 3156213760, "gpu_gb": 72.1, "elapsed_s": 9465.6}
{"step": 3020, "loss": 2.6526, "lr": 0.0003, "tps": 337449, "tokens": 3166699520, "gpu_gb": 72.1, "elapsed_s": 9496.7}
{"step": 3030, "loss": 2.6484, "lr": 0.0003, "tps": 337470, "tokens": 3177185280, "gpu_gb": 72.1, "elapsed_s": 9527.8}
{"step": 3040, "loss": 2.6621, "lr": 0.0003, "tps": 337688, "tokens": 3187671040, "gpu_gb": 72.1, "elapsed_s": 9558.8}
{"step": 3050, "loss": 2.6249, "lr": 0.0003, "tps": 337730, "tokens": 3198156800, "gpu_gb": 72.1, "elapsed_s": 9589.9}
{"step": 3060, "loss": 2.6468, "lr": 0.0003, "tps": 337536, "tokens": 3208642560, "gpu_gb": 72.1, "elapsed_s": 9621.0}
{"step": 3070, "loss": 2.7034, "lr": 0.0003, "tps": 337624, "tokens": 3219128320, "gpu_gb": 72.1, "elapsed_s": 9652.0}
{"step": 3080, "loss": 2.7049, "lr": 0.0003, "tps": 337509, "tokens": 3229614080, "gpu_gb": 72.1, "elapsed_s": 9683.1}
{"step": 3090, "loss": 2.6466, "lr": 0.0003, "tps": 337621, "tokens": 3240099840, "gpu_gb": 72.1, "elapsed_s": 9714.1}
{"step": 3100, "loss": 2.5909, "lr": 0.0003, "tps": 338064, "tokens": 3250585600, "gpu_gb": 72.1, "elapsed_s": 9745.2}
{"step": 3110, "loss": 2.7079, "lr": 0.0003, "tps": 337903, "tokens": 3261071360, "gpu_gb": 72.1, "elapsed_s": 9776.2}
{"step": 3120, "loss": 2.6119, "lr": 0.0003, "tps": 338167, "tokens": 3271557120, "gpu_gb": 72.1, "elapsed_s": 9807.2}
{"step": 3130, "loss": 2.6788, "lr": 0.0003, "tps": 338188, "tokens": 3282042880, "gpu_gb": 72.1, "elapsed_s": 9838.2}
{"step": 3140, "loss": 2.6582, "lr": 0.0003, "tps": 338067, "tokens": 3292528640, "gpu_gb": 72.1, "elapsed_s": 9869.2}
{"step": 3150, "loss": 2.6527, "lr": 0.0003, "tps": 337977, "tokens": 3303014400, "gpu_gb": 72.1, "elapsed_s": 9900.2}
{"step": 3160, "loss": 2.6547, "lr": 0.0003, "tps": 337537, "tokens": 3313500160, "gpu_gb": 72.1, "elapsed_s": 9931.3}
{"step": 3170, "loss": 2.6331, "lr": 0.0003, "tps": 337606, "tokens": 3323985920, "gpu_gb": 72.1, "elapsed_s": 9962.4}
{"step": 3180, "loss": 2.6393, "lr": 0.0003, "tps": 337434, "tokens": 3334471680, "gpu_gb": 72.1, "elapsed_s": 9993.4}
{"step": 3190, "loss": 2.6034, "lr": 0.0003, "tps": 337686, "tokens": 3344957440, "gpu_gb": 72.1, "elapsed_s": 10024.5}
{"step": 3200, "loss": 2.6009, "lr": 0.0003, "tps": 337311, "tokens": 3355443200, "gpu_gb": 72.1, "elapsed_s": 10055.6}
{"step": 3210, "loss": 2.6328, "lr": 0.0003, "tps": 337632, "tokens": 3365928960, "gpu_gb": 72.1, "elapsed_s": 10086.6}
{"step": 3220, "loss": 2.6452, "lr": 0.0003, "tps": 337786, "tokens": 3376414720, "gpu_gb": 72.1, "elapsed_s": 10117.7}
{"step": 3230, "loss": 2.6676, "lr": 0.0003, "tps": 337466, "tokens": 3386900480, "gpu_gb": 72.1, "elapsed_s": 10148.8}
{"step": 3240, "loss": 2.6291, "lr": 0.0003, "tps": 337747, "tokens": 3397386240, "gpu_gb": 72.1, "elapsed_s": 10179.8}
{"step": 3250, "loss": 2.6189, "lr": 0.0003, "tps": 337532, "tokens": 3407872000, "gpu_gb": 72.1, "elapsed_s": 10210.9}
{"step": 3260, "loss": 2.6058, "lr": 0.0003, "tps": 337603, "tokens": 3418357760, "gpu_gb": 72.1, "elapsed_s": 10241.9}
{"step": 3270, "loss": 2.6175, "lr": 0.0003, "tps": 337503, "tokens": 3428843520, "gpu_gb": 72.1, "elapsed_s": 10273.0}
{"step": 3280, "loss": 2.6447, "lr": 0.0003, "tps": 337600, "tokens": 3439329280, "gpu_gb": 72.1, "elapsed_s": 10304.1}
{"step": 3290, "loss": 2.6004, "lr": 0.0003, "tps": 337789, "tokens": 3449815040, "gpu_gb": 72.1, "elapsed_s": 10335.1}
{"step": 3300, "loss": 2.6404, "lr": 0.0003, "tps": 337577, "tokens": 3460300800, "gpu_gb": 72.1, "elapsed_s": 10366.2}
{"step": 3310, "loss": 2.6517, "lr": 0.0003, "tps": 337546, "tokens": 3470786560, "gpu_gb": 72.1, "elapsed_s": 10397.2}
{"step": 3320, "loss": 2.6481, "lr": 0.0003, "tps": 337403, "tokens": 3481272320, "gpu_gb": 72.1, "elapsed_s": 10428.3}
{"step": 3330, "loss": 2.628, "lr": 0.0003, "tps": 337721, "tokens": 3491758080, "gpu_gb": 72.1, "elapsed_s": 10459.4}
{"step": 3340, "loss": 2.6305, "lr": 0.0003, "tps": 337592, "tokens": 3502243840, "gpu_gb": 72.1, "elapsed_s": 10490.4}
{"step": 3350, "loss": 2.641, "lr": 0.0003, "tps": 337607, "tokens": 3512729600, "gpu_gb": 72.1, "elapsed_s": 10521.5}
{"step": 3360, "loss": 2.5945, "lr": 0.0003, "tps": 337921, "tokens": 3523215360, "gpu_gb": 72.1, "elapsed_s": 10552.5}
{"step": 3370, "loss": 2.6655, "lr": 0.0003, "tps": 337627, "tokens": 3533701120, "gpu_gb": 72.1, "elapsed_s": 10583.6}
{"step": 3380, "loss": 2.6196, "lr": 0.0003, "tps": 337912, "tokens": 3544186880, "gpu_gb": 72.1, "elapsed_s": 10614.6}
{"step": 3390, "loss": 2.5826, "lr": 0.0003, "tps": 337705, "tokens": 3554672640, "gpu_gb": 72.1, "elapsed_s": 10645.6}
{"step": 3400, "loss": 2.7116, "lr": 0.0003, "tps": 337580, "tokens": 3565158400, "gpu_gb": 72.1, "elapsed_s": 10676.7}
{"step": 3410, "loss": 2.6541, "lr": 0.0003, "tps": 337575, "tokens": 3575644160, "gpu_gb": 72.1, "elapsed_s": 10707.8}
{"step": 3420, "loss": 2.6236, "lr": 0.0003, "tps": 337362, "tokens": 3586129920, "gpu_gb": 72.1, "elapsed_s": 10738.8}
{"step": 3430, "loss": 2.6686, "lr": 0.0003, "tps": 337789, "tokens": 3596615680, "gpu_gb": 72.1, "elapsed_s": 10769.9}
{"step": 3440, "loss": 2.656, "lr": 0.0003, "tps": 337796, "tokens": 3607101440, "gpu_gb": 72.1, "elapsed_s": 10800.9}
{"step": 3450, "loss": 2.6449, "lr": 0.0003, "tps": 337744, "tokens": 3617587200, "gpu_gb": 72.1, "elapsed_s": 10832.0}
{"step": 3460, "loss": 2.6482, "lr": 0.0003, "tps": 337516, "tokens": 3628072960, "gpu_gb": 72.1, "elapsed_s": 10863.0}
{"step": 3470, "loss": 2.5858, "lr": 0.0003, "tps": 337859, "tokens": 3638558720, "gpu_gb": 72.1, "elapsed_s": 10894.1}
{"step": 3480, "loss": 2.6516, "lr": 0.0003, "tps": 337514, "tokens": 3649044480, "gpu_gb": 72.1, "elapsed_s": 10925.1}
{"step": 3490, "loss": 2.6322, "lr": 0.0003, "tps": 337474, "tokens": 3659530240, "gpu_gb": 72.1, "elapsed_s": 10956.2}
{"step": 3500, "loss": 2.6456, "lr": 0.0003, "tps": 337762, "tokens": 3670016000, "gpu_gb": 72.1, "elapsed_s": 10987.3}
{"step": 3510, "loss": 2.6517, "lr": 0.0003, "tps": 337363, "tokens": 3680501760, "gpu_gb": 72.1, "elapsed_s": 11018.3}
{"step": 3520, "loss": 2.5923, "lr": 0.0003, "tps": 337738, "tokens": 3690987520, "gpu_gb": 72.1, "elapsed_s": 11049.4}
{"step": 3530, "loss": 2.6046, "lr": 0.0003, "tps": 337624, "tokens": 3701473280, "gpu_gb": 72.1, "elapsed_s": 11080.5}
{"step": 3540, "loss": 2.5807, "lr": 0.0003, "tps": 337681, "tokens": 3711959040, "gpu_gb": 72.1, "elapsed_s": 11111.5}
{"step": 3550, "loss": 2.6158, "lr": 0.0003, "tps": 337656, "tokens": 3722444800, "gpu_gb": 72.1, "elapsed_s": 11142.6}
{"step": 3560, "loss": 2.6053, "lr": 0.0003, "tps": 337412, "tokens": 3732930560, "gpu_gb": 72.1, "elapsed_s": 11173.6}
{"step": 3570, "loss": 2.5928, "lr": 0.0003, "tps": 337865, "tokens": 3743416320, "gpu_gb": 72.1, "elapsed_s": 11204.7}
{"step": 3580, "loss": 2.5679, "lr": 0.0003, "tps": 337509, "tokens": 3753902080, "gpu_gb": 72.1, "elapsed_s": 11235.7}
{"step": 3590, "loss": 2.6285, "lr": 0.0003, "tps": 337448, "tokens": 3764387840, "gpu_gb": 72.1, "elapsed_s": 11266.8}
{"step": 3600, "loss": 2.5625, "lr": 0.0003, "tps": 337691, "tokens": 3774873600, "gpu_gb": 72.1, "elapsed_s": 11297.9}
{"step": 3610, "loss": 2.5906, "lr": 0.0003, "tps": 337749, "tokens": 3785359360, "gpu_gb": 72.1, "elapsed_s": 11328.9}
{"step": 3620, "loss": 2.6156, "lr": 0.0003, "tps": 337632, "tokens": 3795845120, "gpu_gb": 72.1, "elapsed_s": 11360.0}
{"step": 3630, "loss": 2.5852, "lr": 0.0003, "tps": 337855, "tokens": 3806330880, "gpu_gb": 72.1, "elapsed_s": 11391.0}
{"step": 3640, "loss": 2.599, "lr": 0.0003, "tps": 337838, "tokens": 3816816640, "gpu_gb": 72.1, "elapsed_s": 11422.0}
{"step": 3650, "loss": 2.5944, "lr": 0.0003, "tps": 337965, "tokens": 3827302400, "gpu_gb": 72.1, "elapsed_s": 11453.1}
{"step": 3660, "loss": 2.6205, "lr": 0.0003, "tps": 337744, "tokens": 3837788160, "gpu_gb": 72.1, "elapsed_s": 11484.1}
{"step": 3670, "loss": 2.5996, "lr": 0.0003, "tps": 337366, "tokens": 3848273920, "gpu_gb": 72.1, "elapsed_s": 11515.2}
{"step": 3680, "loss": 2.5911, "lr": 0.0003, "tps": 337520, "tokens": 3858759680, "gpu_gb": 72.1, "elapsed_s": 11546.3}
{"step": 3690, "loss": 2.585, "lr": 0.0003, "tps": 337820, "tokens": 3869245440, "gpu_gb": 72.1, "elapsed_s": 11577.3}
{"step": 3700, "loss": 2.6261, "lr": 0.0003, "tps": 337483, "tokens": 3879731200, "gpu_gb": 72.1, "elapsed_s": 11608.4}
{"step": 3710, "loss": 2.5715, "lr": 0.0003, "tps": 337535, "tokens": 3890216960, "gpu_gb": 72.1, "elapsed_s": 11639.4}
{"step": 3720, "loss": 2.6006, "lr": 0.0003, "tps": 338020, "tokens": 3900702720, "gpu_gb": 72.1, "elapsed_s": 11670.5}
{"step": 3730, "loss": 2.6168, "lr": 0.0003, "tps": 337548, "tokens": 3911188480, "gpu_gb": 72.1, "elapsed_s": 11701.5}
{"step": 3740, "loss": 2.5848, "lr": 0.0003, "tps": 337735, "tokens": 3921674240, "gpu_gb": 72.1, "elapsed_s": 11732.6}
{"step": 3750, "loss": 2.5683, "lr": 0.0003, "tps": 337742, "tokens": 3932160000, "gpu_gb": 72.1, "elapsed_s": 11763.6}
{"step": 3760, "loss": 2.6049, "lr": 0.0003, "tps": 337655, "tokens": 3942645760, "gpu_gb": 72.1, "elapsed_s": 11794.7}
{"step": 3770, "loss": 2.5922, "lr": 0.0003, "tps": 337867, "tokens": 3953131520, "gpu_gb": 72.1, "elapsed_s": 11825.7}
{"step": 3780, "loss": 2.5864, "lr": 0.0003, "tps": 337532, "tokens": 3963617280, "gpu_gb": 72.1, "elapsed_s": 11856.8}
{"step": 3790, "loss": 2.5699, "lr": 0.0003, "tps": 337987, "tokens": 3974103040, "gpu_gb": 72.1, "elapsed_s": 11887.8}
{"step": 3800, "loss": 2.5878, "lr": 0.0003, "tps": 337747, "tokens": 3984588800, "gpu_gb": 72.1, "elapsed_s": 11918.9}
{"step": 3810, "loss": 2.5911, "lr": 0.0003, "tps": 337859, "tokens": 3995074560, "gpu_gb": 72.1, "elapsed_s": 11949.9}
{"step": 3820, "loss": 2.5905, "lr": 0.0003, "tps": 337644, "tokens": 4005560320, "gpu_gb": 72.1, "elapsed_s": 11980.9}
{"step": 3830, "loss": 2.5713, "lr": 0.0003, "tps": 337668, "tokens": 4016046080, "gpu_gb": 72.1, "elapsed_s": 12012.0}
{"step": 3840, "loss": 2.5364, "lr": 0.0003, "tps": 337552, "tokens": 4026531840, "gpu_gb": 72.1, "elapsed_s": 12043.1}
{"step": 3850, "loss": 2.6053, "lr": 0.0003, "tps": 337502, "tokens": 4037017600, "gpu_gb": 72.1, "elapsed_s": 12074.1}
{"step": 3860, "loss": 2.592, "lr": 0.0003, "tps": 337902, "tokens": 4047503360, "gpu_gb": 72.1, "elapsed_s": 12105.2}
{"step": 3870, "loss": 2.5767, "lr": 0.0003, "tps": 337669, "tokens": 4057989120, "gpu_gb": 72.1, "elapsed_s": 12136.2}
{"step": 3880, "loss": 2.5745, "lr": 0.0003, "tps": 337617, "tokens": 4068474880, "gpu_gb": 72.1, "elapsed_s": 12167.3}
{"step": 3890, "loss": 2.6142, "lr": 0.0003, "tps": 337714, "tokens": 4078960640, "gpu_gb": 72.1, "elapsed_s": 12198.3}
{"step": 3900, "loss": 2.6187, "lr": 0.0003, "tps": 337771, "tokens": 4089446400, "gpu_gb": 72.1, "elapsed_s": 12229.4}
{"step": 3910, "loss": 2.5859, "lr": 0.0003, "tps": 337642, "tokens": 4099932160, "gpu_gb": 72.1, "elapsed_s": 12260.4}
{"step": 3920, "loss": 2.5556, "lr": 0.0003, "tps": 337720, "tokens": 4110417920, "gpu_gb": 72.1, "elapsed_s": 12291.5}
{"step": 3930, "loss": 2.5431, "lr": 0.0003, "tps": 337942, "tokens": 4120903680, "gpu_gb": 72.1, "elapsed_s": 12322.5}
{"step": 3940, "loss": 2.5953, "lr": 0.0003, "tps": 337839, "tokens": 4131389440, "gpu_gb": 72.1, "elapsed_s": 12353.5}
{"step": 3950, "loss": 2.5908, "lr": 0.0003, "tps": 337555, "tokens": 4141875200, "gpu_gb": 72.1, "elapsed_s": 12384.6}
{"step": 3960, "loss": 2.6054, "lr": 0.0003, "tps": 338033, "tokens": 4152360960, "gpu_gb": 72.1, "elapsed_s": 12415.6}
{"step": 3970, "loss": 2.5285, "lr": 0.0003, "tps": 337595, "tokens": 4162846720, "gpu_gb": 72.1, "elapsed_s": 12446.7}
{"step": 3980, "loss": 2.5894, "lr": 0.0003, "tps": 337355, "tokens": 4173332480, "gpu_gb": 72.1, "elapsed_s": 12477.8}
{"step": 3990, "loss": 2.5877, "lr": 0.0003, "tps": 337493, "tokens": 4183818240, "gpu_gb": 72.1, "elapsed_s": 12508.8}
{"step": 4000, "loss": 2.5556, "lr": 0.0003, "tps": 337724, "tokens": 4194304000, "gpu_gb": 72.1, "elapsed_s": 12539.9}
{"step": 4010, "loss": 2.5411, "lr": 0.0003, "tps": 147785, "tokens": 4204789760, "gpu_gb": 72.1, "elapsed_s": 12610.8}
{"step": 4020, "loss": 2.6612, "lr": 0.0003, "tps": 337598, "tokens": 4215275520, "gpu_gb": 72.1, "elapsed_s": 12641.9}
{"step": 4030, "loss": 2.5523, "lr": 0.0003, "tps": 337709, "tokens": 4225761280, "gpu_gb": 72.1, "elapsed_s": 12673.0}
{"step": 4040, "loss": 2.5812, "lr": 0.0003, "tps": 337767, "tokens": 4236247040, "gpu_gb": 72.1, "elapsed_s": 12704.0}
{"step": 4050, "loss": 2.5466, "lr": 0.0003, "tps": 337712, "tokens": 4246732800, "gpu_gb": 72.1, "elapsed_s": 12735.0}
{"step": 4060, "loss": 2.5981, "lr": 0.0003, "tps": 337871, "tokens": 4257218560, "gpu_gb": 72.1, "elapsed_s": 12766.1}
{"step": 4070, "loss": 2.571, "lr": 0.0003, "tps": 337772, "tokens": 4267704320, "gpu_gb": 72.1, "elapsed_s": 12797.1}
{"step": 4080, "loss": 2.5869, "lr": 0.0003, "tps": 337822, "tokens": 4278190080, "gpu_gb": 72.1, "elapsed_s": 12828.2}
{"step": 4090, "loss": 2.5913, "lr": 0.0003, "tps": 337838, "tokens": 4288675840, "gpu_gb": 72.1, "elapsed_s": 12859.2}
{"step": 4100, "loss": 2.5863, "lr": 0.0003, "tps": 337692, "tokens": 4299161600, "gpu_gb": 72.1, "elapsed_s": 12890.3}
{"step": 4110, "loss": 2.5862, "lr": 0.0003, "tps": 337990, "tokens": 4309647360, "gpu_gb": 72.1, "elapsed_s": 12921.3}
{"step": 4120, "loss": 2.5889, "lr": 0.0003, "tps": 337758, "tokens": 4320133120, "gpu_gb": 72.1, "elapsed_s": 12952.3}
{"step": 4130, "loss": 2.5421, "lr": 0.0003, "tps": 337792, "tokens": 4330618880, "gpu_gb": 72.1, "elapsed_s": 12983.4}
{"step": 4140, "loss": 2.5036, "lr": 0.0003, "tps": 337737, "tokens": 4341104640, "gpu_gb": 72.1, "elapsed_s": 13014.4}
{"step": 4150, "loss": 2.5193, "lr": 0.0003, "tps": 337667, "tokens": 4351590400, "gpu_gb": 72.1, "elapsed_s": 13045.5}
{"step": 4160, "loss": 2.5604, "lr": 0.0003, "tps": 337580, "tokens": 4362076160, "gpu_gb": 72.1, "elapsed_s": 13076.5}
{"step": 4170, "loss": 2.5305, "lr": 0.0003, "tps": 337381, "tokens": 4372561920, "gpu_gb": 72.1, "elapsed_s": 13107.6}
{"step": 4180, "loss": 2.5787, "lr": 0.0003, "tps": 337935, "tokens": 4383047680, "gpu_gb": 72.1, "elapsed_s": 13138.6}
{"step": 4190, "loss": 2.5673, "lr": 0.0003, "tps": 337915, "tokens": 4393533440, "gpu_gb": 72.1, "elapsed_s": 13169.7}
{"step": 4200, "loss": 2.5664, "lr": 0.0003, "tps": 337841, "tokens": 4404019200, "gpu_gb": 72.1, "elapsed_s": 13200.7}
{"step": 4210, "loss": 2.5606, "lr": 0.0003, "tps": 337760, "tokens": 4414504960, "gpu_gb": 72.1, "elapsed_s": 13231.8}
{"step": 4220, "loss": 2.5826, "lr": 0.0003, "tps": 337700, "tokens": 4424990720, "gpu_gb": 72.1, "elapsed_s": 13262.8}
{"step": 4230, "loss": 2.5886, "lr": 0.0003, "tps": 337896, "tokens": 4435476480, "gpu_gb": 72.1, "elapsed_s": 13293.8}
{"step": 4240, "loss": 2.5528, "lr": 0.0003, "tps": 337707, "tokens": 4445962240, "gpu_gb": 72.1, "elapsed_s": 13324.9}
{"step": 4250, "loss": 2.5739, "lr": 0.0003, "tps": 337966, "tokens": 4456448000, "gpu_gb": 72.1, "elapsed_s": 13355.9}
{"step": 4260, "loss": 2.5612, "lr": 0.0003, "tps": 337818, "tokens": 4466933760, "gpu_gb": 72.1, "elapsed_s": 13387.0}
{"step": 4270, "loss": 2.5661, "lr": 0.0003, "tps": 337732, "tokens": 4477419520, "gpu_gb": 72.1, "elapsed_s": 13418.0}
{"step": 4280, "loss": 2.5676, "lr": 0.0003, "tps": 337753, "tokens": 4487905280, "gpu_gb": 72.1, "elapsed_s": 13449.0}
{"step": 4290, "loss": 2.5376, "lr": 0.0003, "tps": 337850, "tokens": 4498391040, "gpu_gb": 72.1, "elapsed_s": 13480.1}
{"step": 4300, "loss": 2.5677, "lr": 0.0003, "tps": 337683, "tokens": 4508876800, "gpu_gb": 72.1, "elapsed_s": 13511.1}
{"step": 4310, "loss": 2.5739, "lr": 0.0003, "tps": 337967, "tokens": 4519362560, "gpu_gb": 72.1, "elapsed_s": 13542.2}
{"step": 4320, "loss": 2.556, "lr": 0.0003, "tps": 337654, "tokens": 4529848320, "gpu_gb": 72.1, "elapsed_s": 13573.2}
{"step": 4330, "loss": 2.5797, "lr": 0.0003, "tps": 338000, "tokens": 4540334080, "gpu_gb": 72.1, "elapsed_s": 13604.2}
{"step": 4340, "loss": 2.5734, "lr": 0.0003, "tps": 338042, "tokens": 4550819840, "gpu_gb": 72.1, "elapsed_s": 13635.3}
{"step": 4350, "loss": 2.562, "lr": 0.0003, "tps": 337803, "tokens": 4561305600, "gpu_gb": 72.1, "elapsed_s": 13666.3}
{"step": 4360, "loss": 2.5436, "lr": 0.0003, "tps": 337602, "tokens": 4571791360, "gpu_gb": 72.1, "elapsed_s": 13697.4}
{"step": 4370, "loss": 2.5909, "lr": 0.0003, "tps": 337984, "tokens": 4582277120, "gpu_gb": 72.1, "elapsed_s": 13728.4}
{"step": 4380, "loss": 2.5682, "lr": 0.0003, "tps": 337919, "tokens": 4592762880, "gpu_gb": 72.1, "elapsed_s": 13759.4}
{"step": 4390, "loss": 2.5361, "lr": 0.0003, "tps": 337766, "tokens": 4603248640, "gpu_gb": 72.1, "elapsed_s": 13790.5}
{"step": 4400, "loss": 2.5468, "lr": 0.0003, "tps": 337463, "tokens": 4613734400, "gpu_gb": 72.1, "elapsed_s": 13821.5}
{"step": 4410, "loss": 2.5461, "lr": 0.0003, "tps": 337605, "tokens": 4624220160, "gpu_gb": 72.1, "elapsed_s": 13852.6}
{"step": 4420, "loss": 2.5927, "lr": 0.0003, "tps": 337893, "tokens": 4634705920, "gpu_gb": 72.1, "elapsed_s": 13883.6}
{"step": 4430, "loss": 2.5951, "lr": 0.0003, "tps": 337748, "tokens": 4645191680, "gpu_gb": 72.1, "elapsed_s": 13914.7}
{"step": 4440, "loss": 2.5364, "lr": 0.0003, "tps": 337714, "tokens": 4655677440, "gpu_gb": 72.1, "elapsed_s": 13945.7}
{"step": 4450, "loss": 2.536, "lr": 0.0003, "tps": 337885, "tokens": 4666163200, "gpu_gb": 72.1, "elapsed_s": 13976.8}
{"step": 4460, "loss": 2.5677, "lr": 0.0003, "tps": 337973, "tokens": 4676648960, "gpu_gb": 72.1, "elapsed_s": 14007.8}
{"step": 4470, "loss": 2.5444, "lr": 0.0003, "tps": 337879, "tokens": 4687134720, "gpu_gb": 72.1, "elapsed_s": 14038.8}
{"step": 4480, "loss": 2.5839, "lr": 0.0003, "tps": 337695, "tokens": 4697620480, "gpu_gb": 72.1, "elapsed_s": 14069.9}
{"step": 4490, "loss": 2.5841, "lr": 0.0003, "tps": 337706, "tokens": 4708106240, "gpu_gb": 72.1, "elapsed_s": 14100.9}
{"step": 4500, "loss": 2.5672, "lr": 0.0003, "tps": 337626, "tokens": 4718592000, "gpu_gb": 72.1, "elapsed_s": 14132.0}
{"step": 4510, "loss": 2.5324, "lr": 0.0003, "tps": 337961, "tokens": 4729077760, "gpu_gb": 72.1, "elapsed_s": 14163.0}
{"step": 4520, "loss": 2.5517, "lr": 0.0003, "tps": 335503, "tokens": 4739563520, "gpu_gb": 72.1, "elapsed_s": 14194.3}
{"step": 4530, "loss": 2.5905, "lr": 0.0003, "tps": 337605, "tokens": 4750049280, "gpu_gb": 72.1, "elapsed_s": 14225.3}
{"step": 4540, "loss": 2.5433, "lr": 0.0003, "tps": 337648, "tokens": 4760535040, "gpu_gb": 72.1, "elapsed_s": 14256.4}
{"step": 4550, "loss": 2.5335, "lr": 0.0003, "tps": 337947, "tokens": 4771020800, "gpu_gb": 72.1, "elapsed_s": 14287.4}
{"step": 4560, "loss": 2.5216, "lr": 0.0003, "tps": 337822, "tokens": 4781506560, "gpu_gb": 72.1, "elapsed_s": 14318.4}
{"step": 4570, "loss": 2.5193, "lr": 0.0003, "tps": 337999, "tokens": 4791992320, "gpu_gb": 72.1, "elapsed_s": 14349.5}
{"step": 4580, "loss": 2.5363, "lr": 0.0003, "tps": 337633, "tokens": 4802478080, "gpu_gb": 72.1, "elapsed_s": 14380.5}
{"step": 4590, "loss": 2.5553, "lr": 0.0003, "tps": 337932, "tokens": 4812963840, "gpu_gb": 72.1, "elapsed_s": 14411.5}
{"step": 4600, "loss": 2.5579, "lr": 0.0003, "tps": 337667, "tokens": 4823449600, "gpu_gb": 72.1, "elapsed_s": 14442.6}
{"step": 4610, "loss": 2.5742, "lr": 0.0003, "tps": 338028, "tokens": 4833935360, "gpu_gb": 72.1, "elapsed_s": 14473.6}
{"step": 4620, "loss": 2.5384, "lr": 0.0003, "tps": 337933, "tokens": 4844421120, "gpu_gb": 72.1, "elapsed_s": 14504.7}
{"step": 4630, "loss": 2.5126, "lr": 0.0003, "tps": 337962, "tokens": 4854906880, "gpu_gb": 72.1, "elapsed_s": 14535.7}
{"step": 4640, "loss": 2.5432, "lr": 0.0003, "tps": 337831, "tokens": 4865392640, "gpu_gb": 72.1, "elapsed_s": 14566.7}
{"step": 4650, "loss": 2.4881, "lr": 0.0003, "tps": 337669, "tokens": 4875878400, "gpu_gb": 72.1, "elapsed_s": 14597.8}
{"step": 4660, "loss": 2.5438, "lr": 0.0003, "tps": 338017, "tokens": 4886364160, "gpu_gb": 72.1, "elapsed_s": 14628.8}
{"step": 4670, "loss": 2.5089, "lr": 0.0003, "tps": 337776, "tokens": 4896849920, "gpu_gb": 72.1, "elapsed_s": 14659.8}
{"step": 4680, "loss": 2.5586, "lr": 0.0003, "tps": 337827, "tokens": 4907335680, "gpu_gb": 72.1, "elapsed_s": 14690.9}
{"step": 4690, "loss": 2.5329, "lr": 0.0003, "tps": 338042, "tokens": 4917821440, "gpu_gb": 72.1, "elapsed_s": 14721.9}
{"step": 4700, "loss": 2.5521, "lr": 0.0003, "tps": 337787, "tokens": 4928307200, "gpu_gb": 72.1, "elapsed_s": 14752.9}
{"step": 4710, "loss": 2.5162, "lr": 0.0003, "tps": 337964, "tokens": 4938792960, "gpu_gb": 72.1, "elapsed_s": 14784.0}
{"step": 4720, "loss": 2.5317, "lr": 0.0003, "tps": 337960, "tokens": 4949278720, "gpu_gb": 72.1, "elapsed_s": 14815.0}
{"step": 4730, "loss": 2.53, "lr": 0.0003, "tps": 338187, "tokens": 4959764480, "gpu_gb": 72.1, "elapsed_s": 14846.0}
{"step": 4740, "loss": 2.5241, "lr": 0.0003, "tps": 337793, "tokens": 4970250240, "gpu_gb": 72.1, "elapsed_s": 14877.0}
{"step": 4750, "loss": 2.5324, "lr": 0.0003, "tps": 337838, "tokens": 4980736000, "gpu_gb": 72.1, "elapsed_s": 14908.1}
{"step": 4760, "loss": 2.5216, "lr": 0.0003, "tps": 337990, "tokens": 4991221760, "gpu_gb": 72.1, "elapsed_s": 14939.1}
{"step": 4770, "loss": 2.5279, "lr": 0.0003, "tps": 337857, "tokens": 5001707520, "gpu_gb": 72.1, "elapsed_s": 14970.1}
{"step": 4780, "loss": 2.5729, "lr": 0.0003, "tps": 337877, "tokens": 5012193280, "gpu_gb": 72.1, "elapsed_s": 15001.2}
{"step": 4790, "loss": 2.5177, "lr": 0.0003, "tps": 337866, "tokens": 5022679040, "gpu_gb": 72.1, "elapsed_s": 15032.2}
{"step": 4800, "loss": 2.539, "lr": 0.0003, "tps": 337814, "tokens": 5033164800, "gpu_gb": 72.1, "elapsed_s": 15063.2}
{"step": 4810, "loss": 2.5037, "lr": 0.0003, "tps": 337945, "tokens": 5043650560, "gpu_gb": 72.1, "elapsed_s": 15094.3}
{"step": 4820, "loss": 2.5126, "lr": 0.0003, "tps": 338144, "tokens": 5054136320, "gpu_gb": 72.1, "elapsed_s": 15125.3}
{"step": 4830, "loss": 2.5538, "lr": 0.0003, "tps": 337810, "tokens": 5064622080, "gpu_gb": 72.1, "elapsed_s": 15156.3}
{"step": 4840, "loss": 2.5435, "lr": 0.0003, "tps": 338174, "tokens": 5075107840, "gpu_gb": 72.1, "elapsed_s": 15187.3}
{"step": 4850, "loss": 2.4871, "lr": 0.0003, "tps": 337707, "tokens": 5085593600, "gpu_gb": 72.1, "elapsed_s": 15218.4}
{"step": 4860, "loss": 2.4867, "lr": 0.0003, "tps": 337809, "tokens": 5096079360, "gpu_gb": 72.1, "elapsed_s": 15249.4}
{"step": 4870, "loss": 2.5245, "lr": 0.0003, "tps": 337992, "tokens": 5106565120, "gpu_gb": 72.1, "elapsed_s": 15280.4}
{"step": 4880, "loss": 2.546, "lr": 0.0003, "tps": 337716, "tokens": 5117050880, "gpu_gb": 72.1, "elapsed_s": 15311.5}
{"step": 4890, "loss": 2.5548, "lr": 0.0003, "tps": 337791, "tokens": 5127536640, "gpu_gb": 72.1, "elapsed_s": 15342.5}
{"step": 4900, "loss": 2.5322, "lr": 0.0003, "tps": 338030, "tokens": 5138022400, "gpu_gb": 72.1, "elapsed_s": 15373.6}
{"step": 4910, "loss": 2.5564, "lr": 0.0003, "tps": 337849, "tokens": 5148508160, "gpu_gb": 72.1, "elapsed_s": 15404.6}
{"step": 4920, "loss": 2.5758, "lr": 0.0003, "tps": 337820, "tokens": 5158993920, "gpu_gb": 72.1, "elapsed_s": 15435.6}
{"step": 4930, "loss": 2.5226, "lr": 0.0003, "tps": 337863, "tokens": 5169479680, "gpu_gb": 72.1, "elapsed_s": 15466.7}
{"step": 4940, "loss": 2.5504, "lr": 0.0003, "tps": 338000, "tokens": 5179965440, "gpu_gb": 72.1, "elapsed_s": 15497.7}
{"step": 4950, "loss": 2.5756, "lr": 0.0003, "tps": 337748, "tokens": 5190451200, "gpu_gb": 72.1, "elapsed_s": 15528.7}
{"step": 4960, "loss": 2.538, "lr": 0.0003, "tps": 337946, "tokens": 5200936960, "gpu_gb": 72.1, "elapsed_s": 15559.8}
{"step": 4970, "loss": 2.5267, "lr": 0.0003, "tps": 337848, "tokens": 5211422720, "gpu_gb": 72.1, "elapsed_s": 15590.8}
{"step": 4980, "loss": 2.5336, "lr": 0.0003, "tps": 337821, "tokens": 5221908480, "gpu_gb": 72.1, "elapsed_s": 15621.8}
{"step": 4990, "loss": 2.5416, "lr": 0.0003, "tps": 337884, "tokens": 5232394240, "gpu_gb": 72.1, "elapsed_s": 15652.9}
{"step": 5000, "loss": 2.5545, "lr": 0.0003, "tps": 337762, "tokens": 5242880000, "gpu_gb": 72.1, "elapsed_s": 15683.9}
{"step": 5010, "loss": 2.5568, "lr": 0.0003, "tps": 133573, "tokens": 5253365760, "gpu_gb": 72.1, "elapsed_s": 15762.4}
{"step": 5020, "loss": 2.5374, "lr": 0.0003, "tps": 337309, "tokens": 5263851520, "gpu_gb": 72.1, "elapsed_s": 15793.5}
{"step": 5030, "loss": 2.5171, "lr": 0.0003, "tps": 337199, "tokens": 5274337280, "gpu_gb": 72.1, "elapsed_s": 15824.6}
{"step": 5040, "loss": 2.5395, "lr": 0.0003, "tps": 337600, "tokens": 5284823040, "gpu_gb": 72.1, "elapsed_s": 15855.7}
{"step": 5050, "loss": 2.5123, "lr": 0.0003, "tps": 337242, "tokens": 5295308800, "gpu_gb": 72.1, "elapsed_s": 15886.8}
{"step": 5060, "loss": 2.5658, "lr": 0.0003, "tps": 337387, "tokens": 5305794560, "gpu_gb": 72.1, "elapsed_s": 15917.8}
{"step": 5070, "loss": 2.5499, "lr": 0.0003, "tps": 337245, "tokens": 5316280320, "gpu_gb": 72.1, "elapsed_s": 15948.9}
{"step": 5080, "loss": 2.5015, "lr": 0.0003, "tps": 337351, "tokens": 5326766080, "gpu_gb": 72.1, "elapsed_s": 15980.0}
{"step": 5090, "loss": 2.5627, "lr": 0.0003, "tps": 337177, "tokens": 5337251840, "gpu_gb": 72.1, "elapsed_s": 16011.1}
{"step": 5100, "loss": 2.5624, "lr": 0.0003, "tps": 334918, "tokens": 5347737600, "gpu_gb": 72.1, "elapsed_s": 16042.4}
{"step": 5110, "loss": 2.5013, "lr": 0.0003, "tps": 337367, "tokens": 5358223360, "gpu_gb": 72.1, "elapsed_s": 16073.5}
{"step": 5120, "loss": 2.4917, "lr": 0.0003, "tps": 337305, "tokens": 5368709120, "gpu_gb": 72.1, "elapsed_s": 16104.6}
{"step": 5130, "loss": 2.4839, "lr": 0.0003, "tps": 337874, "tokens": 5379194880, "gpu_gb": 72.1, "elapsed_s": 16135.6}
{"step": 5140, "loss": 2.5804, "lr": 0.0003, "tps": 337724, "tokens": 5389680640, "gpu_gb": 72.1, "elapsed_s": 16166.7}
{"step": 5150, "loss": 2.5554, "lr": 0.0003, "tps": 337913, "tokens": 5400166400, "gpu_gb": 72.1, "elapsed_s": 16197.7}
{"step": 5160, "loss": 2.5272, "lr": 0.0003, "tps": 338072, "tokens": 5410652160, "gpu_gb": 72.1, "elapsed_s": 16228.7}
{"step": 5170, "loss": 2.5529, "lr": 0.0003, "tps": 337859, "tokens": 5421137920, "gpu_gb": 72.1, "elapsed_s": 16259.8}
{"step": 5180, "loss": 2.5629, "lr": 0.0003, "tps": 337657, "tokens": 5431623680, "gpu_gb": 72.1, "elapsed_s": 16290.8}
{"step": 5190, "loss": 2.5816, "lr": 0.0003, "tps": 337370, "tokens": 5442109440, "gpu_gb": 72.1, "elapsed_s": 16321.9}
{"step": 5200, "loss": 2.5452, "lr": 0.0003, "tps": 337542, "tokens": 5452595200, "gpu_gb": 72.1, "elapsed_s": 16353.0}
{"step": 5210, "loss": 2.5133, "lr": 0.0003, "tps": 337610, "tokens": 5463080960, "gpu_gb": 72.1, "elapsed_s": 16384.0}
{"step": 5220, "loss": 2.4899, "lr": 0.0003, "tps": 337885, "tokens": 5473566720, "gpu_gb": 72.1, "elapsed_s": 16415.1}
{"step": 5230, "loss": 2.5105, "lr": 0.0003, "tps": 337964, "tokens": 5484052480, "gpu_gb": 72.1, "elapsed_s": 16446.1}
{"step": 5240, "loss": 2.5086, "lr": 0.0003, "tps": 338054, "tokens": 5494538240, "gpu_gb": 72.1, "elapsed_s": 16477.1}
{"step": 5250, "loss": 2.4937, "lr": 0.0003, "tps": 337995, "tokens": 5505024000, "gpu_gb": 72.1, "elapsed_s": 16508.1}
{"step": 5260, "loss": 2.5259, "lr": 0.0003, "tps": 337647, "tokens": 5515509760, "gpu_gb": 72.1, "elapsed_s": 16539.2}
{"step": 5270, "loss": 2.5178, "lr": 0.0003, "tps": 337882, "tokens": 5525995520, "gpu_gb": 72.1, "elapsed_s": 16570.2}
{"step": 5280, "loss": 2.468, "lr": 0.0003, "tps": 337979, "tokens": 5536481280, "gpu_gb": 72.1, "elapsed_s": 16601.2}
{"step": 5290, "loss": 2.5239, "lr": 0.0003, "tps": 337775, "tokens": 5546967040, "gpu_gb": 72.1, "elapsed_s": 16632.3}
{"step": 5300, "loss": 2.5277, "lr": 0.0003, "tps": 338074, "tokens": 5557452800, "gpu_gb": 72.1, "elapsed_s": 16663.3}
{"step": 5310, "loss": 2.4806, "lr": 0.0003, "tps": 337841, "tokens": 5567938560, "gpu_gb": 72.1, "elapsed_s": 16694.3}
{"step": 5320, "loss": 2.5073, "lr": 0.0003, "tps": 338122, "tokens": 5578424320, "gpu_gb": 72.1, "elapsed_s": 16725.4}
{"step": 5330, "loss": 2.5185, "lr": 0.0003, "tps": 337818, "tokens": 5588910080, "gpu_gb": 72.1, "elapsed_s": 16756.4}
{"step": 5340, "loss": 2.5538, "lr": 0.0003, "tps": 338151, "tokens": 5599395840, "gpu_gb": 72.1, "elapsed_s": 16787.4}
{"step": 5350, "loss": 2.5224, "lr": 0.0003, "tps": 337939, "tokens": 5609881600, "gpu_gb": 72.1, "elapsed_s": 16818.4}
{"step": 5360, "loss": 2.5138, "lr": 0.0003, "tps": 337561, "tokens": 5620367360, "gpu_gb": 72.1, "elapsed_s": 16849.5}
{"step": 5370, "loss": 2.5092, "lr": 0.0003, "tps": 337621, "tokens": 5630853120, "gpu_gb": 72.1, "elapsed_s": 16880.6}
{"step": 5380, "loss": 2.4538, "lr": 0.0003, "tps": 337400, "tokens": 5641338880, "gpu_gb": 72.1, "elapsed_s": 16911.6}
{"step": 5390, "loss": 2.5077, "lr": 0.0003, "tps": 337762, "tokens": 5651824640, "gpu_gb": 72.1, "elapsed_s": 16942.7}
{"step": 5400, "loss": 2.5441, "lr": 0.0003, "tps": 337393, "tokens": 5662310400, "gpu_gb": 72.1, "elapsed_s": 16973.8}
{"step": 5410, "loss": 2.4921, "lr": 0.0003, "tps": 337893, "tokens": 5672796160, "gpu_gb": 72.1, "elapsed_s": 17004.8}
{"step": 5420, "loss": 2.5372, "lr": 0.0003, "tps": 337864, "tokens": 5683281920, "gpu_gb": 72.1, "elapsed_s": 17035.8}
{"step": 5430, "loss": 2.4936, "lr": 0.0003, "tps": 338043, "tokens": 5693767680, "gpu_gb": 72.1, "elapsed_s": 17066.8}
{"step": 5440, "loss": 2.4938, "lr": 0.0003, "tps": 337607, "tokens": 5704253440, "gpu_gb": 72.1, "elapsed_s": 17097.9}
{"step": 5450, "loss": 2.5105, "lr": 0.0003, "tps": 338112, "tokens": 5714739200, "gpu_gb": 72.1, "elapsed_s": 17128.9}
{"step": 5460, "loss": 2.5084, "lr": 0.0003, "tps": 337889, "tokens": 5725224960, "gpu_gb": 72.1, "elapsed_s": 17159.9}
{"step": 5470, "loss": 2.4988, "lr": 0.0003, "tps": 338040, "tokens": 5735710720, "gpu_gb": 72.1, "elapsed_s": 17191.0}
{"step": 5480, "loss": 2.4789, "lr": 0.0003, "tps": 337771, "tokens": 5746196480, "gpu_gb": 72.1, "elapsed_s": 17222.0}
{"step": 5490, "loss": 2.4941, "lr": 0.0003, "tps": 337741, "tokens": 5756682240, "gpu_gb": 72.1, "elapsed_s": 17253.1}
{"step": 5500, "loss": 2.5377, "lr": 0.0003, "tps": 337659, "tokens": 5767168000, "gpu_gb": 72.1, "elapsed_s": 17284.1}
{"step": 5510, "loss": 2.4705, "lr": 0.0003, "tps": 337552, "tokens": 5777653760, "gpu_gb": 72.1, "elapsed_s": 17315.2}
{"step": 5520, "loss": 2.5094, "lr": 0.0003, "tps": 338016, "tokens": 5788139520, "gpu_gb": 72.1, "elapsed_s": 17346.2}
{"step": 5530, "loss": 2.4688, "lr": 0.0003, "tps": 337497, "tokens": 5798625280, "gpu_gb": 72.1, "elapsed_s": 17377.3}
{"step": 5540, "loss": 2.539, "lr": 0.0003, "tps": 337812, "tokens": 5809111040, "gpu_gb": 72.1, "elapsed_s": 17408.3}
{"step": 5550, "loss": 2.4728, "lr": 0.0003, "tps": 337299, "tokens": 5819596800, "gpu_gb": 72.1, "elapsed_s": 17439.4}
{"step": 5560, "loss": 2.4703, "lr": 0.0003, "tps": 337504, "tokens": 5830082560, "gpu_gb": 72.1, "elapsed_s": 17470.5}
{"step": 5570, "loss": 2.4934, "lr": 0.0003, "tps": 337722, "tokens": 5840568320, "gpu_gb": 72.1, "elapsed_s": 17501.5}
{"step": 5580, "loss": 2.4913, "lr": 0.0003, "tps": 337577, "tokens": 5851054080, "gpu_gb": 72.1, "elapsed_s": 17532.6}
{"step": 5590, "loss": 2.5104, "lr": 0.0003, "tps": 337484, "tokens": 5861539840, "gpu_gb": 72.1, "elapsed_s": 17563.6}
{"step": 5010, "loss": 2.7123, "lr": 0.0003, "tps": 278239, "tokens": 5253365760, "gpu_gb": 72.2, "elapsed_s": 37.7}
{"step": 5020, "loss": 2.6968, "lr": 0.0003, "tps": 338004, "tokens": 5263851520, "gpu_gb": 72.2, "elapsed_s": 68.7}
{"step": 5030, "loss": 2.7305, "lr": 0.0003, "tps": 338326, "tokens": 5274337280, "gpu_gb": 72.2, "elapsed_s": 99.7}
{"step": 5040, "loss": 2.6541, "lr": 0.0003, "tps": 338092, "tokens": 5284823040, "gpu_gb": 72.2, "elapsed_s": 130.7}
{"step": 5050, "loss": 2.6708, "lr": 0.0003, "tps": 338307, "tokens": 5295308800, "gpu_gb": 72.2, "elapsed_s": 161.7}
{"step": 5060, "loss": 2.6341, "lr": 0.0003, "tps": 338186, "tokens": 5305794560, "gpu_gb": 72.2, "elapsed_s": 192.7}
{"step": 5070, "loss": 2.6449, "lr": 0.0003, "tps": 338245, "tokens": 5316280320, "gpu_gb": 72.2, "elapsed_s": 223.7}
{"step": 5080, "loss": 2.6087, "lr": 0.0003, "tps": 337928, "tokens": 5326766080, "gpu_gb": 72.2, "elapsed_s": 254.7}
{"step": 5090, "loss": 2.6166, "lr": 0.0003, "tps": 337840, "tokens": 5337251840, "gpu_gb": 72.2, "elapsed_s": 285.8}
{"step": 5100, "loss": 2.6278, "lr": 0.0003, "tps": 338134, "tokens": 5347737600, "gpu_gb": 72.2, "elapsed_s": 316.8}
{"step": 5110, "loss": 2.6426, "lr": 0.0003, "tps": 338139, "tokens": 5358223360, "gpu_gb": 72.2, "elapsed_s": 347.8}
{"step": 5120, "loss": 2.5946, "lr": 0.0003, "tps": 338007, "tokens": 5368709120, "gpu_gb": 72.2, "elapsed_s": 378.8}
{"step": 5130, "loss": 2.6048, "lr": 0.0003, "tps": 338350, "tokens": 5379194880, "gpu_gb": 72.2, "elapsed_s": 409.8}
{"step": 5140, "loss": 2.5503, "lr": 0.0003, "tps": 338091, "tokens": 5389680640, "gpu_gb": 72.2, "elapsed_s": 440.8}
{"step": 5150, "loss": 2.612, "lr": 0.0003, "tps": 338155, "tokens": 5400166400, "gpu_gb": 72.2, "elapsed_s": 471.8}
{"step": 5160, "loss": 2.571, "lr": 0.0003, "tps": 337996, "tokens": 5410652160, "gpu_gb": 72.2, "elapsed_s": 502.9}
{"step": 5170, "loss": 2.5867, "lr": 0.0003, "tps": 338098, "tokens": 5421137920, "gpu_gb": 72.2, "elapsed_s": 533.9}
{"step": 5180, "loss": 2.5537, "lr": 0.0003, "tps": 338109, "tokens": 5431623680, "gpu_gb": 72.2, "elapsed_s": 564.9}
{"step": 5190, "loss": 2.5985, "lr": 0.0003, "tps": 338110, "tokens": 5442109440, "gpu_gb": 72.2, "elapsed_s": 595.9}
{"step": 5200, "loss": 2.5783, "lr": 0.0003, "tps": 338339, "tokens": 5452595200, "gpu_gb": 72.2, "elapsed_s": 626.9}
{"step": 5210, "loss": 2.5492, "lr": 0.0003, "tps": 338074, "tokens": 5463080960, "gpu_gb": 72.2, "elapsed_s": 657.9}
{"step": 5220, "loss": 2.595, "lr": 0.0003, "tps": 338232, "tokens": 5473566720, "gpu_gb": 72.2, "elapsed_s": 688.9}
{"step": 5230, "loss": 2.5698, "lr": 0.0003, "tps": 337932, "tokens": 5484052480, "gpu_gb": 72.2, "elapsed_s": 719.9}
{"step": 5240, "loss": 2.5751, "lr": 0.0003, "tps": 338300, "tokens": 5494538240, "gpu_gb": 72.2, "elapsed_s": 750.9}
{"step": 5250, "loss": 2.5329, "lr": 0.0003, "tps": 338269, "tokens": 5505024000, "gpu_gb": 72.2, "elapsed_s": 781.9}
{"step": 5260, "loss": 2.5191, "lr": 0.0003, "tps": 337989, "tokens": 5515509760, "gpu_gb": 72.2, "elapsed_s": 813.0}
{"step": 5270, "loss": 2.5908, "lr": 0.0003, "tps": 338075, "tokens": 5525995520, "gpu_gb": 72.2, "elapsed_s": 844.0}
{"step": 5280, "loss": 2.5338, "lr": 0.0003, "tps": 337818, "tokens": 5536481280, "gpu_gb": 72.2, "elapsed_s": 875.0}
{"step": 5290, "loss": 2.542, "lr": 0.0003, "tps": 337972, "tokens": 5546967040, "gpu_gb": 72.2, "elapsed_s": 906.0}
{"step": 5300, "loss": 2.545, "lr": 0.0003, "tps": 337827, "tokens": 5557452800, "gpu_gb": 72.2, "elapsed_s": 937.1}
{"step": 5310, "loss": 2.5654, "lr": 0.0003, "tps": 338054, "tokens": 5567938560, "gpu_gb": 72.2, "elapsed_s": 968.1}
{"step": 5320, "loss": 2.5315, "lr": 0.0003, "tps": 338359, "tokens": 5578424320, "gpu_gb": 72.2, "elapsed_s": 999.1}
{"step": 5330, "loss": 2.5302, "lr": 0.0003, "tps": 338483, "tokens": 5588910080, "gpu_gb": 72.2, "elapsed_s": 1030.1}
{"step": 5340, "loss": 2.5505, "lr": 0.0003, "tps": 337925, "tokens": 5599395840, "gpu_gb": 72.2, "elapsed_s": 1061.1}
{"step": 5350, "loss": 2.5622, "lr": 0.0003, "tps": 338758, "tokens": 5609881600, "gpu_gb": 72.2, "elapsed_s": 1092.1}
{"step": 5360, "loss": 2.5356, "lr": 0.0003, "tps": 338370, "tokens": 5620367360, "gpu_gb": 72.2, "elapsed_s": 1123.0}
{"step": 5370, "loss": 2.5368, "lr": 0.0003, "tps": 338273, "tokens": 5630853120, "gpu_gb": 72.2, "elapsed_s": 1154.0}
{"step": 5380, "loss": 2.5133, "lr": 0.0003, "tps": 338423, "tokens": 5641338880, "gpu_gb": 72.2, "elapsed_s": 1185.0}
{"step": 5390, "loss": 2.4983, "lr": 0.0003, "tps": 338033, "tokens": 5651824640, "gpu_gb": 72.2, "elapsed_s": 1216.1}
{"step": 5400, "loss": 2.5231, "lr": 0.0003, "tps": 338130, "tokens": 5662310400, "gpu_gb": 72.2, "elapsed_s": 1247.1}
{"step": 5410, "loss": 2.5236, "lr": 0.0003, "tps": 338284, "tokens": 5672796160, "gpu_gb": 72.2, "elapsed_s": 1278.1}
{"step": 5420, "loss": 2.4786, "lr": 0.0003, "tps": 338047, "tokens": 5683281920, "gpu_gb": 72.2, "elapsed_s": 1309.1}
{"step": 5430, "loss": 2.5229, "lr": 0.0003, "tps": 338241, "tokens": 5693767680, "gpu_gb": 72.2, "elapsed_s": 1340.1}
{"step": 5440, "loss": 2.5665, "lr": 0.0003, "tps": 338120, "tokens": 5704253440, "gpu_gb": 72.2, "elapsed_s": 1371.1}
{"step": 5450, "loss": 2.5129, "lr": 0.0003, "tps": 338319, "tokens": 5714739200, "gpu_gb": 72.2, "elapsed_s": 1402.1}
{"step": 5460, "loss": 2.5205, "lr": 0.0003, "tps": 338070, "tokens": 5725224960, "gpu_gb": 72.2, "elapsed_s": 1433.1}
{"step": 5470, "loss": 2.5075, "lr": 0.0003, "tps": 338016, "tokens": 5735710720, "gpu_gb": 72.2, "elapsed_s": 1464.1}
{"step": 5480, "loss": 2.4844, "lr": 0.0003, "tps": 338023, "tokens": 5746196480, "gpu_gb": 72.2, "elapsed_s": 1495.1}
{"step": 5490, "loss": 2.5115, "lr": 0.0003, "tps": 338285, "tokens": 5756682240, "gpu_gb": 72.2, "elapsed_s": 1526.1}
{"step": 5500, "loss": 2.5846, "lr": 0.0003, "tps": 338020, "tokens": 5767168000, "gpu_gb": 72.2, "elapsed_s": 1557.2}
{"step": 5510, "loss": 2.5365, "lr": 0.0003, "tps": 338105, "tokens": 5777653760, "gpu_gb": 72.2, "elapsed_s": 1588.2}
{"step": 5520, "loss": 2.5589, "lr": 0.0003, "tps": 338010, "tokens": 5788139520, "gpu_gb": 72.2, "elapsed_s": 1619.2}
{"step": 5530, "loss": 2.5428, "lr": 0.0003, "tps": 337947, "tokens": 5798625280, "gpu_gb": 72.2, "elapsed_s": 1650.2}
{"step": 5540, "loss": 2.5518, "lr": 0.0003, "tps": 338479, "tokens": 5809111040, "gpu_gb": 72.2, "elapsed_s": 1681.2}
{"step": 5550, "loss": 2.4893, "lr": 0.0003, "tps": 338322, "tokens": 5819596800, "gpu_gb": 72.2, "elapsed_s": 1712.2}
{"step": 5560, "loss": 2.5375, "lr": 0.0003, "tps": 337999, "tokens": 5830082560, "gpu_gb": 72.2, "elapsed_s": 1743.2}
{"step": 5570, "loss": 2.5275, "lr": 0.0003, "tps": 338330, "tokens": 5840568320, "gpu_gb": 72.2, "elapsed_s": 1774.2}
{"step": 5580, "loss": 2.5272, "lr": 0.0003, "tps": 338011, "tokens": 5851054080, "gpu_gb": 72.2, "elapsed_s": 1805.2}
{"step": 5590, "loss": 2.5691, "lr": 0.0003, "tps": 338522, "tokens": 5861539840, "gpu_gb": 72.2, "elapsed_s": 1836.2}
{"step": 5600, "loss": 2.5811, "lr": 0.0003, "tps": 338217, "tokens": 5872025600, "gpu_gb": 72.2, "elapsed_s": 1867.2}
{"step": 5610, "loss": 2.5461, "lr": 0.0003, "tps": 338240, "tokens": 5882511360, "gpu_gb": 72.2, "elapsed_s": 1898.2}
{"step": 5620, "loss": 2.5417, "lr": 0.0003, "tps": 337991, "tokens": 5892997120, "gpu_gb": 72.2, "elapsed_s": 1929.2}
{"step": 5630, "loss": 2.5171, "lr": 0.0003, "tps": 338096, "tokens": 5903482880, "gpu_gb": 72.2, "elapsed_s": 1960.3}
{"step": 5640, "loss": 2.5373, "lr": 0.0003, "tps": 338207, "tokens": 5913968640, "gpu_gb": 72.2, "elapsed_s": 1991.3}
{"step": 5650, "loss": 2.5145, "lr": 0.0003, "tps": 338257, "tokens": 5924454400, "gpu_gb": 72.2, "elapsed_s": 2022.3}
{"step": 5660, "loss": 2.4825, "lr": 0.0003, "tps": 338626, "tokens": 5934940160, "gpu_gb": 72.2, "elapsed_s": 2053.2}
{"step": 5670, "loss": 2.5356, "lr": 0.0003, "tps": 338025, "tokens": 5945425920, "gpu_gb": 72.2, "elapsed_s": 2084.2}
{"step": 5680, "loss": 2.494, "lr": 0.0003, "tps": 338093, "tokens": 5955911680, "gpu_gb": 72.2, "elapsed_s": 2115.3}
{"step": 5690, "loss": 2.5269, "lr": 0.0003, "tps": 338155, "tokens": 5966397440, "gpu_gb": 72.2, "elapsed_s": 2146.3}
{"step": 5700, "loss": 2.5304, "lr": 0.0003, "tps": 338536, "tokens": 5976883200, "gpu_gb": 72.2, "elapsed_s": 2177.2}
{"step": 5710, "loss": 2.5467, "lr": 0.0003, "tps": 338304, "tokens": 5987368960, "gpu_gb": 72.2, "elapsed_s": 2208.2}
{"step": 5720, "loss": 2.5167, "lr": 0.0003, "tps": 338053, "tokens": 5997854720, "gpu_gb": 72.2, "elapsed_s": 2239.3}
{"step": 5730, "loss": 2.5488, "lr": 0.0003, "tps": 337850, "tokens": 6008340480, "gpu_gb": 72.2, "elapsed_s": 2270.3}
{"step": 5740, "loss": 2.5165, "lr": 0.0003, "tps": 338353, "tokens": 6018826240, "gpu_gb": 72.2, "elapsed_s": 2301.3}
{"step": 5750, "loss": 2.4784, "lr": 0.0003, "tps": 338043, "tokens": 6029312000, "gpu_gb": 72.2, "elapsed_s": 2332.3}
{"step": 5760, "loss": 2.5241, "lr": 0.0003, "tps": 338226, "tokens": 6039797760, "gpu_gb": 72.2, "elapsed_s": 2363.3}
{"step": 5770, "loss": 2.5621, "lr": 0.0003, "tps": 337969, "tokens": 6050283520, "gpu_gb": 72.2, "elapsed_s": 2394.3}
{"step": 5780, "loss": 2.5338, "lr": 0.0003, "tps": 338560, "tokens": 6060769280, "gpu_gb": 72.2, "elapsed_s": 2425.3}
{"step": 5790, "loss": 2.4892, "lr": 0.0003, "tps": 338358, "tokens": 6071255040, "gpu_gb": 72.2, "elapsed_s": 2456.3}
{"step": 5800, "loss": 2.5158, "lr": 0.0003, "tps": 338322, "tokens": 6081740800, "gpu_gb": 72.2, "elapsed_s": 2487.3}
{"step": 5810, "loss": 2.5022, "lr": 0.0003, "tps": 338378, "tokens": 6092226560, "gpu_gb": 72.2, "elapsed_s": 2518.3}
{"step": 5820, "loss": 2.55, "lr": 0.0003, "tps": 338336, "tokens": 6102712320, "gpu_gb": 72.2, "elapsed_s": 2549.3}
{"step": 5830, "loss": 2.5598, "lr": 0.0003, "tps": 338762, "tokens": 6113198080, "gpu_gb": 72.2, "elapsed_s": 2580.2}
{"step": 5840, "loss": 2.5358, "lr": 0.0003, "tps": 338333, "tokens": 6123683840, "gpu_gb": 72.2, "elapsed_s": 2611.2}
{"step": 5850, "loss": 2.5335, "lr": 0.0003, "tps": 338631, "tokens": 6134169600, "gpu_gb": 72.2, "elapsed_s": 2642.2}
{"step": 5860, "loss": 2.5006, "lr": 0.0003, "tps": 338457, "tokens": 6144655360, "gpu_gb": 72.2, "elapsed_s": 2673.2}
{"step": 5870, "loss": 2.5547, "lr": 0.0003, "tps": 338308, "tokens": 6155141120, "gpu_gb": 72.2, "elapsed_s": 2704.2}
{"step": 5880, "loss": 2.5389, "lr": 0.0003, "tps": 338073, "tokens": 6165626880, "gpu_gb": 72.2, "elapsed_s": 2735.2}
{"step": 5890, "loss": 2.5584, "lr": 0.0003, "tps": 338524, "tokens": 6176112640, "gpu_gb": 72.2, "elapsed_s": 2766.2}
{"step": 5900, "loss": 2.5435, "lr": 0.0003, "tps": 338228, "tokens": 6186598400, "gpu_gb": 72.2, "elapsed_s": 2797.2}
{"step": 5910, "loss": 2.5287, "lr": 0.0003, "tps": 338556, "tokens": 6197084160, "gpu_gb": 72.2, "elapsed_s": 2828.1}
{"step": 5920, "loss": 2.5219, "lr": 0.0003, "tps": 338675, "tokens": 6207569920, "gpu_gb": 72.2, "elapsed_s": 2859.1}
{"step": 5930, "loss": 2.5619, "lr": 0.0003, "tps": 338258, "tokens": 6218055680, "gpu_gb": 72.2, "elapsed_s": 2890.1}
{"step": 5940, "loss": 2.5678, "lr": 0.0003, "tps": 338240, "tokens": 6228541440, "gpu_gb": 72.2, "elapsed_s": 2921.1}
{"step": 5950, "loss": 2.5204, "lr": 0.0003, "tps": 338301, "tokens": 6239027200, "gpu_gb": 72.2, "elapsed_s": 2952.1}
{"step": 5960, "loss": 2.4949, "lr": 0.0003, "tps": 337979, "tokens": 6249512960, "gpu_gb": 72.2, "elapsed_s": 2983.1}
{"step": 5970, "loss": 2.5149, "lr": 0.0003, "tps": 338316, "tokens": 6259998720, "gpu_gb": 72.2, "elapsed_s": 3014.1}
{"step": 5980, "loss": 2.5334, "lr": 0.0003, "tps": 338205, "tokens": 6270484480, "gpu_gb": 72.2, "elapsed_s": 3045.1}
{"step": 5990, "loss": 2.5047, "lr": 0.0003, "tps": 338382, "tokens": 6280970240, "gpu_gb": 72.2, "elapsed_s": 3076.1}
{"step": 6000, "loss": 2.5456, "lr": 0.0003, "tps": 338545, "tokens": 6291456000, "gpu_gb": 72.2, "elapsed_s": 3107.1}
{"step": 6010, "loss": 2.5297, "lr": 0.0003, "tps": 154340, "tokens": 6301941760, "gpu_gb": 72.2, "elapsed_s": 3175.0}
{"step": 6020, "loss": 2.5251, "lr": 0.0003, "tps": 337963, "tokens": 6312427520, "gpu_gb": 72.2, "elapsed_s": 3206.0}
{"step": 6030, "loss": 2.4916, "lr": 0.0003, "tps": 338194, "tokens": 6322913280, "gpu_gb": 72.2, "elapsed_s": 3237.0}
{"step": 6040, "loss": 2.557, "lr": 0.0003, "tps": 337905, "tokens": 6333399040, "gpu_gb": 72.2, "elapsed_s": 3268.1}
{"step": 6050, "loss": 2.4992, "lr": 0.0003, "tps": 338023, "tokens": 6343884800, "gpu_gb": 72.2, "elapsed_s": 3299.1}
{"step": 6060, "loss": 2.5144, "lr": 0.0003, "tps": 338255, "tokens": 6354370560, "gpu_gb": 72.2, "elapsed_s": 3330.1}
{"step": 6070, "loss": 2.5575, "lr": 0.0003, "tps": 337684, "tokens": 6364856320, "gpu_gb": 72.2, "elapsed_s": 3361.1}
{"step": 6080, "loss": 2.531, "lr": 0.0003, "tps": 337797, "tokens": 6375342080, "gpu_gb": 72.2, "elapsed_s": 3392.2}
{"step": 6090, "loss": 2.5113, "lr": 0.0003, "tps": 338183, "tokens": 6385827840, "gpu_gb": 72.2, "elapsed_s": 3423.2}
{"step": 6100, "loss": 2.4932, "lr": 0.0003, "tps": 338048, "tokens": 6396313600, "gpu_gb": 72.2, "elapsed_s": 3454.2}
{"step": 6110, "loss": 2.5538, "lr": 0.0003, "tps": 337997, "tokens": 6406799360, "gpu_gb": 72.2, "elapsed_s": 3485.2}
{"step": 6120, "loss": 2.5074, "lr": 0.0003, "tps": 337917, "tokens": 6417285120, "gpu_gb": 72.2, "elapsed_s": 3516.3}
{"step": 6130, "loss": 2.5343, "lr": 0.0003, "tps": 338032, "tokens": 6427770880, "gpu_gb": 72.2, "elapsed_s": 3547.3}
{"step": 6140, "loss": 2.5083, "lr": 0.0003, "tps": 338154, "tokens": 6438256640, "gpu_gb": 72.2, "elapsed_s": 3578.3}
{"step": 6150, "loss": 2.4912, "lr": 0.0003, "tps": 338033, "tokens": 6448742400, "gpu_gb": 72.2, "elapsed_s": 3609.3}
{"step": 6160, "loss": 2.5188, "lr": 0.0003, "tps": 338017, "tokens": 6459228160, "gpu_gb": 72.2, "elapsed_s": 3640.3}
{"step": 6170, "loss": 2.5416, "lr": 0.0003, "tps": 338255, "tokens": 6469713920, "gpu_gb": 72.2, "elapsed_s": 3671.3}
{"step": 6180, "loss": 2.4985, "lr": 0.0003, "tps": 337767, "tokens": 6480199680, "gpu_gb": 72.2, "elapsed_s": 3702.4}
{"step": 6190, "loss": 2.4683, "lr": 0.0003, "tps": 338132, "tokens": 6490685440, "gpu_gb": 72.2, "elapsed_s": 3733.4}
{"step": 6200, "loss": 2.472, "lr": 0.0003, "tps": 338108, "tokens": 6501171200, "gpu_gb": 72.2, "elapsed_s": 3764.4}
{"step": 6210, "loss": 2.5042, "lr": 0.0003, "tps": 337872, "tokens": 6511656960, "gpu_gb": 72.2, "elapsed_s": 3795.4}
{"step": 6220, "loss": 2.5114, "lr": 0.0003, "tps": 338091, "tokens": 6522142720, "gpu_gb": 72.2, "elapsed_s": 3826.5}
{"step": 6230, "loss": 2.4984, "lr": 0.0003, "tps": 338033, "tokens": 6532628480, "gpu_gb": 72.2, "elapsed_s": 3857.5}
{"step": 6240, "loss": 2.5322, "lr": 0.0003, "tps": 337899, "tokens": 6543114240, "gpu_gb": 72.2, "elapsed_s": 3888.5}
{"step": 6250, "loss": 2.496, "lr": 0.0003, "tps": 338076, "tokens": 6553600000, "gpu_gb": 72.2, "elapsed_s": 3919.5}
{"step": 6260, "loss": 2.4936, "lr": 0.0003, "tps": 338249, "tokens": 6564085760, "gpu_gb": 72.2, "elapsed_s": 3950.5}
{"step": 6270, "loss": 2.5404, "lr": 0.0003, "tps": 338059, "tokens": 6574571520, "gpu_gb": 72.2, "elapsed_s": 3981.5}
{"step": 6280, "loss": 2.5324, "lr": 0.0003, "tps": 338192, "tokens": 6585057280, "gpu_gb": 72.2, "elapsed_s": 4012.6}
{"step": 6290, "loss": 2.4764, "lr": 0.0003, "tps": 337935, "tokens": 6595543040, "gpu_gb": 72.2, "elapsed_s": 4043.6}
{"step": 6300, "loss": 2.4411, "lr": 0.0003, "tps": 337969, "tokens": 6606028800, "gpu_gb": 72.2, "elapsed_s": 4074.6}
{"step": 6310, "loss": 2.4904, "lr": 0.0003, "tps": 337916, "tokens": 6616514560, "gpu_gb": 72.2, "elapsed_s": 4105.6}
{"step": 6320, "loss": 2.518, "lr": 0.0003, "tps": 338621, "tokens": 6627000320, "gpu_gb": 72.2, "elapsed_s": 4136.6}
{"step": 6330, "loss": 2.5174, "lr": 0.0003, "tps": 338287, "tokens": 6637486080, "gpu_gb": 72.2, "elapsed_s": 4167.6}
{"step": 6340, "loss": 2.5053, "lr": 0.0003, "tps": 337982, "tokens": 6647971840, "gpu_gb": 72.2, "elapsed_s": 4198.6}
{"step": 6350, "loss": 2.497, "lr": 0.0003, "tps": 338130, "tokens": 6658457600, "gpu_gb": 72.2, "elapsed_s": 4229.6}
{"step": 6360, "loss": 2.4685, "lr": 0.0003, "tps": 338007, "tokens": 6668943360, "gpu_gb": 72.2, "elapsed_s": 4260.7}
{"step": 6370, "loss": 2.4936, "lr": 0.0003, "tps": 337946, "tokens": 6679429120, "gpu_gb": 72.2, "elapsed_s": 4291.7}
{"step": 6380, "loss": 2.5441, "lr": 0.0003, "tps": 337903, "tokens": 6689914880, "gpu_gb": 72.2, "elapsed_s": 4322.7}
{"step": 6390, "loss": 2.5194, "lr": 0.0003, "tps": 338227, "tokens": 6700400640, "gpu_gb": 72.2, "elapsed_s": 4353.7}
{"step": 6400, "loss": 2.4773, "lr": 0.0003, "tps": 338196, "tokens": 6710886400, "gpu_gb": 72.2, "elapsed_s": 4384.7}
{"step": 6410, "loss": 2.5463, "lr": 0.0003, "tps": 338155, "tokens": 6721372160, "gpu_gb": 72.2, "elapsed_s": 4415.7}
{"step": 6420, "loss": 2.5266, "lr": 0.0003, "tps": 338187, "tokens": 6731857920, "gpu_gb": 72.2, "elapsed_s": 4446.7}
{"step": 6430, "loss": 2.4581, "lr": 0.0003, "tps": 338261, "tokens": 6742343680, "gpu_gb": 72.2, "elapsed_s": 4477.7}
{"step": 6440, "loss": 2.515, "lr": 0.0003, "tps": 338160, "tokens": 6752829440, "gpu_gb": 72.2, "elapsed_s": 4508.7}
{"step": 6450, "loss": 2.5358, "lr": 0.0003, "tps": 338376, "tokens": 6763315200, "gpu_gb": 72.2, "elapsed_s": 4539.7}
{"step": 6460, "loss": 2.4727, "lr": 0.0003, "tps": 338082, "tokens": 6773800960, "gpu_gb": 72.2, "elapsed_s": 4570.8}
{"step": 6470, "loss": 2.5022, "lr": 0.0003, "tps": 338432, "tokens": 6784286720, "gpu_gb": 72.2, "elapsed_s": 4601.7}
{"step": 6480, "loss": 2.4994, "lr": 0.0003, "tps": 338052, "tokens": 6794772480, "gpu_gb": 72.2, "elapsed_s": 4632.8}
{"step": 6490, "loss": 2.4936, "lr": 0.0003, "tps": 338026, "tokens": 6805258240, "gpu_gb": 72.2, "elapsed_s": 4663.8}
{"step": 6500, "loss": 2.4815, "lr": 0.0003, "tps": 338159, "tokens": 6815744000, "gpu_gb": 72.2, "elapsed_s": 4694.8}
{"step": 6510, "loss": 2.515, "lr": 0.0003, "tps": 338154, "tokens": 6826229760, "gpu_gb": 72.2, "elapsed_s": 4725.8}
{"step": 6520, "loss": 2.5372, "lr": 0.0003, "tps": 338327, "tokens": 6836715520, "gpu_gb": 72.2, "elapsed_s": 4756.8}
{"step": 6530, "loss": 2.4863, "lr": 0.0003, "tps": 338030, "tokens": 6847201280, "gpu_gb": 72.2, "elapsed_s": 4787.8}
{"step": 6540, "loss": 2.5011, "lr": 0.0003, "tps": 338290, "tokens": 6857687040, "gpu_gb": 72.2, "elapsed_s": 4818.8}
{"step": 6550, "loss": 2.4899, "lr": 0.0003, "tps": 338353, "tokens": 6868172800, "gpu_gb": 72.2, "elapsed_s": 4849.8}
{"step": 6560, "loss": 2.5489, "lr": 0.0003, "tps": 338024, "tokens": 6878658560, "gpu_gb": 72.2, "elapsed_s": 4880.8}
{"step": 6570, "loss": 2.4481, "lr": 0.0003, "tps": 338365, "tokens": 6889144320, "gpu_gb": 72.2, "elapsed_s": 4911.8}
{"step": 6580, "loss": 2.5256, "lr": 0.0003, "tps": 337764, "tokens": 6899630080, "gpu_gb": 72.2, "elapsed_s": 4942.9}
{"step": 6590, "loss": 2.5093, "lr": 0.0003, "tps": 338196, "tokens": 6910115840, "gpu_gb": 72.2, "elapsed_s": 4973.9}
{"step": 6600, "loss": 2.4496, "lr": 0.0003, "tps": 338245, "tokens": 6920601600, "gpu_gb": 72.2, "elapsed_s": 5004.9}
{"step": 6610, "loss": 2.489, "lr": 0.0003, "tps": 338026, "tokens": 6931087360, "gpu_gb": 72.2, "elapsed_s": 5035.9}
{"step": 6620, "loss": 2.4823, "lr": 0.0003, "tps": 338044, "tokens": 6941573120, "gpu_gb": 72.2, "elapsed_s": 5066.9}
{"step": 6630, "loss": 2.5008, "lr": 0.0003, "tps": 338016, "tokens": 6952058880, "gpu_gb": 72.2, "elapsed_s": 5097.9}
{"step": 6640, "loss": 2.4984, "lr": 0.0003, "tps": 338120, "tokens": 6962544640, "gpu_gb": 72.2, "elapsed_s": 5128.9}
{"step": 6650, "loss": 2.5171, "lr": 0.0003, "tps": 338451, "tokens": 6973030400, "gpu_gb": 72.2, "elapsed_s": 5159.9}
{"step": 6660, "loss": 2.5067, "lr": 0.0003, "tps": 338012, "tokens": 6983516160, "gpu_gb": 72.2, "elapsed_s": 5190.9}
{"step": 6670, "loss": 2.4384, "lr": 0.0003, "tps": 338281, "tokens": 6994001920, "gpu_gb": 72.2, "elapsed_s": 5221.9}
{"step": 6680, "loss": 2.4706, "lr": 0.0003, "tps": 337901, "tokens": 7004487680, "gpu_gb": 72.2, "elapsed_s": 5253.0}
{"step": 6690, "loss": 2.5169, "lr": 0.0003, "tps": 338225, "tokens": 7014973440, "gpu_gb": 72.2, "elapsed_s": 5284.0}
{"step": 6700, "loss": 2.4722, "lr": 0.0003, "tps": 338246, "tokens": 7025459200, "gpu_gb": 72.2, "elapsed_s": 5315.0}
{"step": 6710, "loss": 2.5735, "lr": 0.0003, "tps": 338264, "tokens": 7035944960, "gpu_gb": 72.2, "elapsed_s": 5346.0}
{"step": 6720, "loss": 2.4857, "lr": 0.0003, "tps": 337774, "tokens": 7046430720, "gpu_gb": 72.2, "elapsed_s": 5377.0}
{"step": 6730, "loss": 2.4759, "lr": 0.0003, "tps": 338071, "tokens": 7056916480, "gpu_gb": 72.2, "elapsed_s": 5408.0}
{"step": 6740, "loss": 2.52, "lr": 0.0003, "tps": 338225, "tokens": 7067402240, "gpu_gb": 72.2, "elapsed_s": 5439.0}
{"step": 6750, "loss": 2.4928, "lr": 0.0003, "tps": 338229, "tokens": 7077888000, "gpu_gb": 72.2, "elapsed_s": 5470.0}
{"step": 6760, "loss": 2.4826, "lr": 0.0003, "tps": 338374, "tokens": 7088373760, "gpu_gb": 72.2, "elapsed_s": 5501.0}
{"step": 6770, "loss": 2.5128, "lr": 0.0003, "tps": 338573, "tokens": 7098859520, "gpu_gb": 72.2, "elapsed_s": 5532.0}
{"step": 6780, "loss": 2.5022, "lr": 0.0003, "tps": 338181, "tokens": 7109345280, "gpu_gb": 72.2, "elapsed_s": 5563.0}
{"step": 6790, "loss": 2.464, "lr": 0.0003, "tps": 337916, "tokens": 7119831040, "gpu_gb": 72.2, "elapsed_s": 5594.0}
{"step": 6800, "loss": 2.5032, "lr": 0.0003, "tps": 338079, "tokens": 7130316800, "gpu_gb": 72.2, "elapsed_s": 5625.0}
{"step": 6810, "loss": 2.5488, "lr": 0.0003, "tps": 337819, "tokens": 7140802560, "gpu_gb": 72.2, "elapsed_s": 5656.1}
{"step": 6820, "loss": 2.4962, "lr": 0.0003, "tps": 338182, "tokens": 7151288320, "gpu_gb": 72.2, "elapsed_s": 5687.1}
{"step": 6830, "loss": 2.4942, "lr": 0.0003, "tps": 337980, "tokens": 7161774080, "gpu_gb": 72.2, "elapsed_s": 5718.1}
{"step": 6840, "loss": 2.5257, "lr": 0.0003, "tps": 338163, "tokens": 7172259840, "gpu_gb": 72.2, "elapsed_s": 5749.1}
{"step": 6850, "loss": 2.4866, "lr": 0.0003, "tps": 338344, "tokens": 7182745600, "gpu_gb": 72.2, "elapsed_s": 5780.1}
{"step": 6860, "loss": 2.5432, "lr": 0.0003, "tps": 338104, "tokens": 7193231360, "gpu_gb": 72.2, "elapsed_s": 5811.1}
{"step": 6870, "loss": 2.4898, "lr": 0.0003, "tps": 338095, "tokens": 7203717120, "gpu_gb": 72.2, "elapsed_s": 5842.1}
{"step": 6880, "loss": 2.4686, "lr": 0.0003, "tps": 338248, "tokens": 7214202880, "gpu_gb": 72.2, "elapsed_s": 5873.1}
{"step": 6890, "loss": 2.5326, "lr": 0.0003, "tps": 338091, "tokens": 7224688640, "gpu_gb": 72.2, "elapsed_s": 5904.2}
{"step": 6900, "loss": 2.4856, "lr": 0.0003, "tps": 338297, "tokens": 7235174400, "gpu_gb": 72.2, "elapsed_s": 5935.2}
{"step": 6910, "loss": 2.4512, "lr": 0.0003, "tps": 338071, "tokens": 7245660160, "gpu_gb": 72.2, "elapsed_s": 5966.2}
{"step": 6920, "loss": 2.4868, "lr": 0.0003, "tps": 338224, "tokens": 7256145920, "gpu_gb": 72.2, "elapsed_s": 5997.2}
{"step": 6930, "loss": 2.4651, "lr": 0.0003, "tps": 338302, "tokens": 7266631680, "gpu_gb": 72.2, "elapsed_s": 6028.2}
{"step": 6940, "loss": 2.5154, "lr": 0.0003, "tps": 338261, "tokens": 7277117440, "gpu_gb": 72.2, "elapsed_s": 6059.2}
{"step": 6950, "loss": 2.5464, "lr": 0.0003, "tps": 338081, "tokens": 7287603200, "gpu_gb": 72.2, "elapsed_s": 6090.2}
{"step": 6960, "loss": 2.48, "lr": 0.0003, "tps": 338191, "tokens": 7298088960, "gpu_gb": 72.2, "elapsed_s": 6121.2}
{"step": 6970, "loss": 2.4658, "lr": 0.0003, "tps": 338216, "tokens": 7308574720, "gpu_gb": 72.2, "elapsed_s": 6152.2}
{"step": 6980, "loss": 2.5041, "lr": 0.0003, "tps": 337946, "tokens": 7319060480, "gpu_gb": 72.2, "elapsed_s": 6183.2}
{"step": 6990, "loss": 2.4588, "lr": 0.0003, "tps": 337991, "tokens": 7329546240, "gpu_gb": 72.2, "elapsed_s": 6214.2}
{"step": 7000, "loss": 2.4528, "lr": 0.0003, "tps": 337954, "tokens": 7340032000, "gpu_gb": 72.2, "elapsed_s": 6245.3}
{"step": 7010, "loss": 2.4847, "lr": 0.0003, "tps": 132582, "tokens": 7350517760, "gpu_gb": 72.2, "elapsed_s": 6324.4}
{"step": 7020, "loss": 2.4928, "lr": 0.0003, "tps": 338048, "tokens": 7361003520, "gpu_gb": 72.2, "elapsed_s": 6355.4}
{"step": 7030, "loss": 2.4767, "lr": 0.0003, "tps": 338267, "tokens": 7371489280, "gpu_gb": 72.2, "elapsed_s": 6386.4}
{"step": 7040, "loss": 2.5185, "lr": 0.0003, "tps": 338287, "tokens": 7381975040, "gpu_gb": 72.2, "elapsed_s": 6417.4}
{"step": 7050, "loss": 2.4851, "lr": 0.0003, "tps": 338492, "tokens": 7392460800, "gpu_gb": 72.2, "elapsed_s": 6448.4}
{"step": 7060, "loss": 2.4712, "lr": 0.0003, "tps": 338255, "tokens": 7402946560, "gpu_gb": 72.2, "elapsed_s": 6479.4}
{"step": 7070, "loss": 2.475, "lr": 0.0003, "tps": 338321, "tokens": 7413432320, "gpu_gb": 72.2, "elapsed_s": 6510.4}
{"step": 7080, "loss": 2.4742, "lr": 0.0003, "tps": 338385, "tokens": 7423918080, "gpu_gb": 72.2, "elapsed_s": 6541.3}
{"step": 7090, "loss": 2.4782, "lr": 0.0003, "tps": 338309, "tokens": 7434403840, "gpu_gb": 72.2, "elapsed_s": 6572.3}
{"step": 7100, "loss": 2.5042, "lr": 0.0003, "tps": 337993, "tokens": 7444889600, "gpu_gb": 72.2, "elapsed_s": 6603.4}
{"step": 7110, "loss": 2.4646, "lr": 0.0003, "tps": 337845, "tokens": 7455375360, "gpu_gb": 72.2, "elapsed_s": 6634.4}
{"step": 7120, "loss": 2.4686, "lr": 0.0003, "tps": 338056, "tokens": 7465861120, "gpu_gb": 72.2, "elapsed_s": 6665.4}
{"step": 7130, "loss": 2.5098, "lr": 0.0003, "tps": 338121, "tokens": 7476346880, "gpu_gb": 72.2, "elapsed_s": 6696.4}
{"step": 7140, "loss": 2.4872, "lr": 0.0003, "tps": 338148, "tokens": 7486832640, "gpu_gb": 72.2, "elapsed_s": 6727.4}
{"step": 7150, "loss": 2.4519, "lr": 0.0003, "tps": 337787, "tokens": 7497318400, "gpu_gb": 72.2, "elapsed_s": 6758.5}
{"step": 7160, "loss": 2.4597, "lr": 0.0003, "tps": 337631, "tokens": 7507804160, "gpu_gb": 72.2, "elapsed_s": 6789.5}
{"step": 7170, "loss": 2.5499, "lr": 0.0003, "tps": 337532, "tokens": 7518289920, "gpu_gb": 72.2, "elapsed_s": 6820.6}
{"step": 7180, "loss": 2.4644, "lr": 0.0003, "tps": 337972, "tokens": 7528775680, "gpu_gb": 72.2, "elapsed_s": 6851.6}
{"step": 7190, "loss": 2.4453, "lr": 0.0003, "tps": 338022, "tokens": 7539261440, "gpu_gb": 72.2, "elapsed_s": 6882.6}
{"step": 7200, "loss": 2.5205, "lr": 0.0003, "tps": 338122, "tokens": 7549747200, "gpu_gb": 72.2, "elapsed_s": 6913.7}
{"step": 7210, "loss": 2.4233, "lr": 0.0003, "tps": 337904, "tokens": 7560232960, "gpu_gb": 72.2, "elapsed_s": 6944.7}
{"step": 7220, "loss": 2.4688, "lr": 0.0003, "tps": 337863, "tokens": 7570718720, "gpu_gb": 72.2, "elapsed_s": 6975.7}
{"step": 7230, "loss": 2.4572, "lr": 0.0003, "tps": 338199, "tokens": 7581204480, "gpu_gb": 72.2, "elapsed_s": 7006.7}
{"step": 7240, "loss": 2.4747, "lr": 0.0003, "tps": 338041, "tokens": 7591690240, "gpu_gb": 72.2, "elapsed_s": 7037.8}
{"step": 7250, "loss": 2.465, "lr": 0.0003, "tps": 338213, "tokens": 7602176000, "gpu_gb": 72.2, "elapsed_s": 7068.8}
{"step": 7260, "loss": 2.5087, "lr": 0.0003, "tps": 338238, "tokens": 7612661760, "gpu_gb": 72.2, "elapsed_s": 7099.8}
{"step": 7270, "loss": 2.5377, "lr": 0.0003, "tps": 338395, "tokens": 7623147520, "gpu_gb": 72.2, "elapsed_s": 7130.7}
{"step": 7280, "loss": 2.4866, "lr": 0.0003, "tps": 337917, "tokens": 7633633280, "gpu_gb": 72.2, "elapsed_s": 7161.8}
{"step": 7290, "loss": 2.4921, "lr": 0.0003, "tps": 338193, "tokens": 7644119040, "gpu_gb": 72.2, "elapsed_s": 7192.8}
{"step": 7300, "loss": 2.4643, "lr": 0.0003, "tps": 337694, "tokens": 7654604800, "gpu_gb": 72.2, "elapsed_s": 7223.8}
{"step": 7310, "loss": 2.4602, "lr": 0.0003, "tps": 337548, "tokens": 7665090560, "gpu_gb": 72.2, "elapsed_s": 7254.9}
{"step": 7320, "loss": 2.472, "lr": 0.0003, "tps": 337567, "tokens": 7675576320, "gpu_gb": 72.2, "elapsed_s": 7286.0}
{"step": 7330, "loss": 2.4942, "lr": 0.0003, "tps": 337778, "tokens": 7686062080, "gpu_gb": 72.2, "elapsed_s": 7317.0}
{"step": 7340, "loss": 2.3894, "lr": 0.0003, "tps": 338054, "tokens": 7696547840, "gpu_gb": 72.2, "elapsed_s": 7348.0}
{"step": 7350, "loss": 2.456, "lr": 0.0003, "tps": 337850, "tokens": 7707033600, "gpu_gb": 72.2, "elapsed_s": 7379.1}
{"step": 7360, "loss": 2.4347, "lr": 0.0003, "tps": 338262, "tokens": 7717519360, "gpu_gb": 72.2, "elapsed_s": 7410.1}
{"step": 7370, "loss": 2.4905, "lr": 0.0003, "tps": 337963, "tokens": 7728005120, "gpu_gb": 72.2, "elapsed_s": 7441.1}
{"step": 7380, "loss": 2.4543, "lr": 0.0003, "tps": 338092, "tokens": 7738490880, "gpu_gb": 72.2, "elapsed_s": 7472.1}
{"step": 7390, "loss": 2.4657, "lr": 0.0003, "tps": 338290, "tokens": 7748976640, "gpu_gb": 72.2, "elapsed_s": 7503.1}
{"step": 7400, "loss": 2.4685, "lr": 0.0003, "tps": 337868, "tokens": 7759462400, "gpu_gb": 72.2, "elapsed_s": 7534.1}
{"step": 7410, "loss": 2.4637, "lr": 0.0003, "tps": 338248, "tokens": 7769948160, "gpu_gb": 72.2, "elapsed_s": 7565.1}
{"step": 7420, "loss": 2.5103, "lr": 0.0003, "tps": 338211, "tokens": 7780433920, "gpu_gb": 72.2, "elapsed_s": 7596.1}
{"step": 7430, "loss": 2.4563, "lr": 0.0003, "tps": 338043, "tokens": 7790919680, "gpu_gb": 72.2, "elapsed_s": 7627.2}
{"step": 7440, "loss": 2.4728, "lr": 0.0003, "tps": 338109, "tokens": 7801405440, "gpu_gb": 72.2, "elapsed_s": 7658.2}
{"step": 7450, "loss": 2.4545, "lr": 0.0003, "tps": 338230, "tokens": 7811891200, "gpu_gb": 72.2, "elapsed_s": 7689.2}
{"step": 7460, "loss": 2.4632, "lr": 0.0003, "tps": 338041, "tokens": 7822376960, "gpu_gb": 72.2, "elapsed_s": 7720.2}
{"step": 7470, "loss": 2.481, "lr": 0.0003, "tps": 338021, "tokens": 7832862720, "gpu_gb": 72.2, "elapsed_s": 7751.2}
{"step": 7480, "loss": 2.4756, "lr": 0.0003, "tps": 337972, "tokens": 7843348480, "gpu_gb": 72.2, "elapsed_s": 7782.2}
{"step": 7490, "loss": 2.4654, "lr": 0.0003, "tps": 338066, "tokens": 7853834240, "gpu_gb": 72.2, "elapsed_s": 7813.3}
{"step": 7500, "loss": 2.5212, "lr": 0.0003, "tps": 338246, "tokens": 7864320000, "gpu_gb": 72.2, "elapsed_s": 7844.3}
{"step": 7510, "loss": 2.4575, "lr": 0.0003, "tps": 337904, "tokens": 7874805760, "gpu_gb": 72.2, "elapsed_s": 7875.3}
{"step": 7520, "loss": 2.4858, "lr": 0.0003, "tps": 338382, "tokens": 7885291520, "gpu_gb": 72.2, "elapsed_s": 7906.3}
{"step": 7530, "loss": 2.436, "lr": 0.0003, "tps": 338384, "tokens": 7895777280, "gpu_gb": 72.2, "elapsed_s": 7937.3}
{"step": 7540, "loss": 2.4726, "lr": 0.0003, "tps": 338435, "tokens": 7906263040, "gpu_gb": 72.2, "elapsed_s": 7968.2}
{"step": 7550, "loss": 2.4561, "lr": 0.0003, "tps": 338230, "tokens": 7916748800, "gpu_gb": 72.2, "elapsed_s": 7999.3}
{"step": 7560, "loss": 2.418, "lr": 0.0003, "tps": 338386, "tokens": 7927234560, "gpu_gb": 72.2, "elapsed_s": 8030.2}
{"step": 7570, "loss": 2.4819, "lr": 0.0003, "tps": 338149, "tokens": 7937720320, "gpu_gb": 72.2, "elapsed_s": 8061.2}
{"step": 7580, "loss": 2.4728, "lr": 0.0003, "tps": 338144, "tokens": 7948206080, "gpu_gb": 72.2, "elapsed_s": 8092.3}
{"step": 7590, "loss": 2.4916, "lr": 0.0003, "tps": 338301, "tokens": 7958691840, "gpu_gb": 72.2, "elapsed_s": 8123.3}
{"step": 7600, "loss": 2.4405, "lr": 0.0003, "tps": 338245, "tokens": 7969177600, "gpu_gb": 72.2, "elapsed_s": 8154.3}
{"step": 7610, "loss": 2.4806, "lr": 0.0003, "tps": 337893, "tokens": 7979663360, "gpu_gb": 72.2, "elapsed_s": 8185.3}
{"step": 7620, "loss": 2.468, "lr": 0.0003, "tps": 338275, "tokens": 7990149120, "gpu_gb": 72.2, "elapsed_s": 8216.3}
{"step": 7630, "loss": 2.5064, "lr": 0.0003, "tps": 338204, "tokens": 8000634880, "gpu_gb": 72.2, "elapsed_s": 8247.3}
{"step": 7640, "loss": 2.4687, "lr": 0.0003, "tps": 338396, "tokens": 8011120640, "gpu_gb": 72.2, "elapsed_s": 8278.3}
{"step": 7650, "loss": 2.4775, "lr": 0.0003, "tps": 338428, "tokens": 8021606400, "gpu_gb": 72.2, "elapsed_s": 8309.3}
{"step": 7660, "loss": 2.4693, "lr": 0.0003, "tps": 338279, "tokens": 8032092160, "gpu_gb": 72.2, "elapsed_s": 8340.3}
{"step": 7670, "loss": 2.4631, "lr": 0.0003, "tps": 338143, "tokens": 8042577920, "gpu_gb": 72.2, "elapsed_s": 8371.3}
{"step": 7680, "loss": 2.4483, "lr": 0.0003, "tps": 338212, "tokens": 8053063680, "gpu_gb": 72.2, "elapsed_s": 8402.3}
{"step": 7690, "loss": 2.4733, "lr": 0.0003, "tps": 338407, "tokens": 8063549440, "gpu_gb": 72.2, "elapsed_s": 8433.3}
{"step": 7700, "loss": 2.4473, "lr": 0.0003, "tps": 338239, "tokens": 8074035200, "gpu_gb": 72.2, "elapsed_s": 8464.3}
{"step": 7710, "loss": 2.4895, "lr": 0.0003, "tps": 338414, "tokens": 8084520960, "gpu_gb": 72.2, "elapsed_s": 8495.2}
{"step": 7720, "loss": 2.4317, "lr": 0.0003, "tps": 338204, "tokens": 8095006720, "gpu_gb": 72.2, "elapsed_s": 8526.2}
{"step": 7730, "loss": 2.4857, "lr": 0.0003, "tps": 338459, "tokens": 8105492480, "gpu_gb": 72.2, "elapsed_s": 8557.2}
{"step": 7740, "loss": 2.4762, "lr": 0.0003, "tps": 338307, "tokens": 8115978240, "gpu_gb": 72.2, "elapsed_s": 8588.2}
{"step": 7750, "loss": 2.4403, "lr": 0.0003, "tps": 337705, "tokens": 8126464000, "gpu_gb": 72.2, "elapsed_s": 8619.3}
{"step": 7760, "loss": 2.4558, "lr": 0.0003, "tps": 337979, "tokens": 8136949760, "gpu_gb": 72.2, "elapsed_s": 8650.3}
{"step": 7770, "loss": 2.482, "lr": 0.0003, "tps": 337974, "tokens": 8147435520, "gpu_gb": 72.2, "elapsed_s": 8681.3}
{"step": 7780, "loss": 2.4753, "lr": 0.0003, "tps": 337915, "tokens": 8157921280, "gpu_gb": 72.2, "elapsed_s": 8712.4}
{"step": 7790, "loss": 2.5055, "lr": 0.0003, "tps": 338026, "tokens": 8168407040, "gpu_gb": 72.2, "elapsed_s": 8743.4}
{"step": 7800, "loss": 2.4444, "lr": 0.0003, "tps": 337704, "tokens": 8178892800, "gpu_gb": 72.2, "elapsed_s": 8774.4}
{"step": 7810, "loss": 2.4028, "lr": 0.0003, "tps": 338007, "tokens": 8189378560, "gpu_gb": 72.2, "elapsed_s": 8805.5}
{"step": 7820, "loss": 2.4543, "lr": 0.0003, "tps": 337856, "tokens": 8199864320, "gpu_gb": 72.2, "elapsed_s": 8836.5}
{"step": 7830, "loss": 2.4148, "lr": 0.0003, "tps": 337659, "tokens": 8210350080, "gpu_gb": 72.2, "elapsed_s": 8867.5}
{"step": 7840, "loss": 2.4586, "lr": 0.0003, "tps": 337884, "tokens": 8220835840, "gpu_gb": 72.2, "elapsed_s": 8898.6}
{"step": 7850, "loss": 2.4817, "lr": 0.0003, "tps": 337769, "tokens": 8231321600, "gpu_gb": 72.2, "elapsed_s": 8929.6}
{"step": 7860, "loss": 2.4328, "lr": 0.0003, "tps": 338117, "tokens": 8241807360, "gpu_gb": 72.2, "elapsed_s": 8960.6}
{"step": 7870, "loss": 2.5177, "lr": 0.0003, "tps": 337800, "tokens": 8252293120, "gpu_gb": 72.2, "elapsed_s": 8991.7}
{"step": 7880, "loss": 2.4782, "lr": 0.0003, "tps": 337810, "tokens": 8262778880, "gpu_gb": 72.2, "elapsed_s": 9022.7}
{"step": 7890, "loss": 2.459, "lr": 0.0003, "tps": 337481, "tokens": 8273264640, "gpu_gb": 72.2, "elapsed_s": 9053.8}
{"step": 7900, "loss": 2.4585, "lr": 0.0003, "tps": 338288, "tokens": 8283750400, "gpu_gb": 72.2, "elapsed_s": 9084.8}
{"step": 7910, "loss": 2.4342, "lr": 0.0003, "tps": 337787, "tokens": 8294236160, "gpu_gb": 72.2, "elapsed_s": 9115.8}
{"step": 7920, "loss": 2.4584, "lr": 0.0003, "tps": 338259, "tokens": 8304721920, "gpu_gb": 72.2, "elapsed_s": 9146.8}
{"step": 7930, "loss": 2.4576, "lr": 0.0003, "tps": 338255, "tokens": 8315207680, "gpu_gb": 72.2, "elapsed_s": 9177.8}
{"step": 7940, "loss": 2.4488, "lr": 0.0003, "tps": 337828, "tokens": 8325693440, "gpu_gb": 72.2, "elapsed_s": 9208.9}
{"step": 7950, "loss": 2.4451, "lr": 0.0003, "tps": 337776, "tokens": 8336179200, "gpu_gb": 72.2, "elapsed_s": 9239.9}
{"step": 7960, "loss": 2.4523, "lr": 0.0003, "tps": 337686, "tokens": 8346664960, "gpu_gb": 72.2, "elapsed_s": 9271.0}
{"step": 7970, "loss": 2.448, "lr": 0.0003, "tps": 337667, "tokens": 8357150720, "gpu_gb": 72.2, "elapsed_s": 9302.0}
{"step": 7980, "loss": 2.4987, "lr": 0.0003, "tps": 338012, "tokens": 8367636480, "gpu_gb": 72.2, "elapsed_s": 9333.0}
{"step": 7990, "loss": 2.4628, "lr": 0.0003, "tps": 338099, "tokens": 8378122240, "gpu_gb": 72.2, "elapsed_s": 9364.1}
{"step": 8000, "loss": 2.4656, "lr": 0.0003, "tps": 338234, "tokens": 8388608000, "gpu_gb": 72.2, "elapsed_s": 9395.1}
{"step": 8010, "loss": 2.4232, "lr": 0.0003, "tps": 141170, "tokens": 8399093760, "gpu_gb": 72.2, "elapsed_s": 9469.3}
{"step": 8020, "loss": 2.456, "lr": 0.0003, "tps": 338030, "tokens": 8409579520, "gpu_gb": 72.2, "elapsed_s": 9500.4}
{"step": 8030, "loss": 2.48, "lr": 0.0003, "tps": 338108, "tokens": 8420065280, "gpu_gb": 72.2, "elapsed_s": 9531.4}
{"step": 8040, "loss": 2.4648, "lr": 0.0003, "tps": 338090, "tokens": 8430551040, "gpu_gb": 72.2, "elapsed_s": 9562.4}
{"step": 8050, "loss": 2.4508, "lr": 0.0003, "tps": 337916, "tokens": 8441036800, "gpu_gb": 72.2, "elapsed_s": 9593.4}
{"step": 8060, "loss": 2.4658, "lr": 0.0003, "tps": 338346, "tokens": 8451522560, "gpu_gb": 72.2, "elapsed_s": 9624.4}
{"step": 8070, "loss": 2.4272, "lr": 0.0003, "tps": 338034, "tokens": 8462008320, "gpu_gb": 72.2, "elapsed_s": 9655.4}
{"step": 8080, "loss": 2.4167, "lr": 0.0003, "tps": 338156, "tokens": 8472494080, "gpu_gb": 72.2, "elapsed_s": 9686.4}
{"step": 8090, "loss": 2.5192, "lr": 0.0003, "tps": 337966, "tokens": 8482979840, "gpu_gb": 72.2, "elapsed_s": 9717.5}
{"step": 8100, "loss": 2.452, "lr": 0.0003, "tps": 338308, "tokens": 8493465600, "gpu_gb": 72.2, "elapsed_s": 9748.5}
{"step": 8110, "loss": 2.4238, "lr": 0.0003, "tps": 338041, "tokens": 8503951360, "gpu_gb": 72.2, "elapsed_s": 9779.5}
{"step": 8120, "loss": 2.4469, "lr": 0.0003, "tps": 338104, "tokens": 8514437120, "gpu_gb": 72.2, "elapsed_s": 9810.5}
{"step": 8130, "loss": 2.4563, "lr": 0.0003, "tps": 338387, "tokens": 8524922880, "gpu_gb": 72.2, "elapsed_s": 9841.5}
{"step": 8140, "loss": 2.4298, "lr": 0.0003, "tps": 337836, "tokens": 8535408640, "gpu_gb": 72.2, "elapsed_s": 9872.5}
{"step": 8150, "loss": 2.4482, "lr": 0.0003, "tps": 338238, "tokens": 8545894400, "gpu_gb": 72.2, "elapsed_s": 9903.5}
{"step": 8160, "loss": 2.4561, "lr": 0.0003, "tps": 337953, "tokens": 8556380160, "gpu_gb": 72.2, "elapsed_s": 9934.5}
{"step": 8170, "loss": 2.4685, "lr": 0.0003, "tps": 338261, "tokens": 8566865920, "gpu_gb": 72.2, "elapsed_s": 9965.5}
{"step": 8180, "loss": 2.4587, "lr": 0.0003, "tps": 338172, "tokens": 8577351680, "gpu_gb": 72.2, "elapsed_s": 9996.5}
{"step": 8190, "loss": 2.5186, "lr": 0.0003, "tps": 338010, "tokens": 8587837440, "gpu_gb": 72.2, "elapsed_s": 10027.6}
{"step": 8200, "loss": 2.4155, "lr": 0.0003, "tps": 338105, "tokens": 8598323200, "gpu_gb": 72.2, "elapsed_s": 10058.6}
{"step": 8210, "loss": 2.4568, "lr": 0.0003, "tps": 338143, "tokens": 8608808960, "gpu_gb": 72.2, "elapsed_s": 10089.6}
{"step": 8220, "loss": 2.4564, "lr": 0.0003, "tps": 338221, "tokens": 8619294720, "gpu_gb": 72.2, "elapsed_s": 10120.6}
{"step": 8230, "loss": 2.4896, "lr": 0.0003, "tps": 338310, "tokens": 8629780480, "gpu_gb": 72.2, "elapsed_s": 10151.6}
{"step": 8240, "loss": 2.4707, "lr": 0.0003, "tps": 338004, "tokens": 8640266240, "gpu_gb": 72.2, "elapsed_s": 10182.6}
{"step": 8250, "loss": 2.4862, "lr": 0.0003, "tps": 338211, "tokens": 8650752000, "gpu_gb": 72.2, "elapsed_s": 10213.6}
{"step": 8260, "loss": 2.4534, "lr": 0.0003, "tps": 338069, "tokens": 8661237760, "gpu_gb": 72.2, "elapsed_s": 10244.6}
{"step": 8270, "loss": 2.4566, "lr": 0.0003, "tps": 338034, "tokens": 8671723520, "gpu_gb": 72.2, "elapsed_s": 10275.7}
{"step": 8280, "loss": 2.4372, "lr": 0.0003, "tps": 338095, "tokens": 8682209280, "gpu_gb": 72.2, "elapsed_s": 10306.7}
{"step": 8290, "loss": 2.4762, "lr": 0.0003, "tps": 337951, "tokens": 8692695040, "gpu_gb": 72.2, "elapsed_s": 10337.7}
{"step": 8300, "loss": 2.4266, "lr": 0.0003, "tps": 338039, "tokens": 8703180800, "gpu_gb": 72.2, "elapsed_s": 10368.7}
{"step": 8310, "loss": 2.4199, "lr": 0.0003, "tps": 338306, "tokens": 8713666560, "gpu_gb": 72.2, "elapsed_s": 10399.7}
{"step": 8320, "loss": 2.5136, "lr": 0.0003, "tps": 338167, "tokens": 8724152320, "gpu_gb": 72.2, "elapsed_s": 10430.7}
{"step": 8330, "loss": 2.4635, "lr": 0.0003, "tps": 338244, "tokens": 8734638080, "gpu_gb": 72.2, "elapsed_s": 10461.7}
{"step": 8340, "loss": 2.4373, "lr": 0.0003, "tps": 338220, "tokens": 8745123840, "gpu_gb": 72.2, "elapsed_s": 10492.7}
{"step": 8350, "loss": 2.3727, "lr": 0.0003, "tps": 338197, "tokens": 8755609600, "gpu_gb": 72.2, "elapsed_s": 10523.7}
{"step": 8360, "loss": 2.4635, "lr": 0.0003, "tps": 338136, "tokens": 8766095360, "gpu_gb": 72.2, "elapsed_s": 10554.7}
{"step": 8370, "loss": 2.4686, "lr": 0.0003, "tps": 338377, "tokens": 8776581120, "gpu_gb": 72.2, "elapsed_s": 10585.7}
{"step": 8380, "loss": 2.4503, "lr": 0.0003, "tps": 338343, "tokens": 8787066880, "gpu_gb": 72.2, "elapsed_s": 10616.7}
{"step": 8390, "loss": 2.4532, "lr": 0.0003, "tps": 338074, "tokens": 8797552640, "gpu_gb": 72.2, "elapsed_s": 10647.7}
{"step": 8400, "loss": 2.4672, "lr": 0.0003, "tps": 338014, "tokens": 8808038400, "gpu_gb": 72.2, "elapsed_s": 10678.8}
{"step": 8410, "loss": 2.4596, "lr": 0.0003, "tps": 338249, "tokens": 8818524160, "gpu_gb": 72.2, "elapsed_s": 10709.8}
{"step": 8420, "loss": 2.489, "lr": 0.0003, "tps": 338297, "tokens": 8829009920, "gpu_gb": 72.2, "elapsed_s": 10740.8}
{"step": 8430, "loss": 2.4897, "lr": 0.0003, "tps": 338421, "tokens": 8839495680, "gpu_gb": 72.2, "elapsed_s": 10771.7}
{"step": 8440, "loss": 2.4304, "lr": 0.0003, "tps": 337950, "tokens": 8849981440, "gpu_gb": 72.2, "elapsed_s": 10802.8}
{"step": 8450, "loss": 2.4514, "lr": 0.0003, "tps": 338246, "tokens": 8860467200, "gpu_gb": 72.2, "elapsed_s": 10833.8}
{"step": 8460, "loss": 2.4339, "lr": 0.0003, "tps": 338207, "tokens": 8870952960, "gpu_gb": 72.2, "elapsed_s": 10864.8}
{"step": 8470, "loss": 2.4531, "lr": 0.0003, "tps": 338224, "tokens": 8881438720, "gpu_gb": 72.2, "elapsed_s": 10895.8}
{"step": 8480, "loss": 2.4529, "lr": 0.0003, "tps": 338333, "tokens": 8891924480, "gpu_gb": 72.2, "elapsed_s": 10926.8}
{"step": 8490, "loss": 2.438, "lr": 0.0003, "tps": 337981, "tokens": 8902410240, "gpu_gb": 72.2, "elapsed_s": 10957.8}
{"step": 8500, "loss": 2.4618, "lr": 0.0003, "tps": 338097, "tokens": 8912896000, "gpu_gb": 72.2, "elapsed_s": 10988.8}
{"step": 8510, "loss": 2.4618, "lr": 0.0003, "tps": 338177, "tokens": 8923381760, "gpu_gb": 72.2, "elapsed_s": 11019.8}
{"step": 8520, "loss": 2.4215, "lr": 0.0003, "tps": 338184, "tokens": 8933867520, "gpu_gb": 72.2, "elapsed_s": 11050.8}
{"step": 8530, "loss": 2.4226, "lr": 0.0003, "tps": 338426, "tokens": 8944353280, "gpu_gb": 72.2, "elapsed_s": 11081.8}
{"step": 8540, "loss": 2.4541, "lr": 0.0003, "tps": 338409, "tokens": 8954839040, "gpu_gb": 72.2, "elapsed_s": 11112.8}
{"step": 8550, "loss": 2.4967, "lr": 0.0003, "tps": 338167, "tokens": 8965324800, "gpu_gb": 72.2, "elapsed_s": 11143.8}
{"step": 8560, "loss": 2.4494, "lr": 0.0003, "tps": 338098, "tokens": 8975810560, "gpu_gb": 72.2, "elapsed_s": 11174.8}
{"step": 8570, "loss": 2.4734, "lr": 0.0003, "tps": 338359, "tokens": 8986296320, "gpu_gb": 72.2, "elapsed_s": 11205.8}
{"step": 8580, "loss": 2.473, "lr": 0.0003, "tps": 338343, "tokens": 8996782080, "gpu_gb": 72.2, "elapsed_s": 11236.8}
{"step": 8590, "loss": 2.4509, "lr": 0.0003, "tps": 337955, "tokens": 9007267840, "gpu_gb": 72.2, "elapsed_s": 11267.8}
{"step": 8600, "loss": 2.4839, "lr": 0.0003, "tps": 338092, "tokens": 9017753600, "gpu_gb": 72.2, "elapsed_s": 11298.8}
{"step": 8610, "loss": 2.4657, "lr": 0.0003, "tps": 338298, "tokens": 9028239360, "gpu_gb": 72.2, "elapsed_s": 11329.8}
{"step": 8620, "loss": 2.4406, "lr": 0.0003, "tps": 338156, "tokens": 9038725120, "gpu_gb": 72.2, "elapsed_s": 11360.8}
{"step": 8630, "loss": 2.4004, "lr": 0.0003, "tps": 338562, "tokens": 9049210880, "gpu_gb": 72.2, "elapsed_s": 11391.8}
{"step": 8640, "loss": 2.4424, "lr": 0.0003, "tps": 338432, "tokens": 9059696640, "gpu_gb": 72.2, "elapsed_s": 11422.8}
{"step": 8650, "loss": 2.4534, "lr": 0.0003, "tps": 338231, "tokens": 9070182400, "gpu_gb": 72.2, "elapsed_s": 11453.8}
{"step": 8660, "loss": 2.4565, "lr": 0.0003, "tps": 337976, "tokens": 9080668160, "gpu_gb": 72.2, "elapsed_s": 11484.8}
{"step": 8670, "loss": 2.4475, "lr": 0.0003, "tps": 338158, "tokens": 9091153920, "gpu_gb": 72.2, "elapsed_s": 11515.8}
{"step": 8680, "loss": 2.4552, "lr": 0.0003, "tps": 338280, "tokens": 9101639680, "gpu_gb": 72.2, "elapsed_s": 11546.8}
{"step": 8690, "loss": 2.4173, "lr": 0.0003, "tps": 338294, "tokens": 9112125440, "gpu_gb": 72.2, "elapsed_s": 11577.8}
{"step": 8700, "loss": 2.447, "lr": 0.0003, "tps": 338433, "tokens": 9122611200, "gpu_gb": 72.2, "elapsed_s": 11608.8}
{"step": 8710, "loss": 2.4796, "lr": 0.0003, "tps": 338136, "tokens": 9133096960, "gpu_gb": 72.2, "elapsed_s": 11639.8}
{"step": 8720, "loss": 2.4378, "lr": 0.0003, "tps": 338312, "tokens": 9143582720, "gpu_gb": 72.2, "elapsed_s": 11670.8}
{"step": 8730, "loss": 2.4412, "lr": 0.0003, "tps": 338200, "tokens": 9154068480, "gpu_gb": 72.2, "elapsed_s": 11701.8}
{"step": 8740, "loss": 2.4153, "lr": 0.0003, "tps": 338079, "tokens": 9164554240, "gpu_gb": 72.2, "elapsed_s": 11732.8}
{"step": 8750, "loss": 2.4452, "lr": 0.0003, "tps": 338385, "tokens": 9175040000, "gpu_gb": 72.2, "elapsed_s": 11763.8}
{"step": 8760, "loss": 2.448, "lr": 0.0003, "tps": 338016, "tokens": 9185525760, "gpu_gb": 72.2, "elapsed_s": 11794.8}
{"step": 8770, "loss": 2.4726, "lr": 0.0003, "tps": 337981, "tokens": 9196011520, "gpu_gb": 72.2, "elapsed_s": 11825.9}
{"step": 8780, "loss": 2.4167, "lr": 0.0003, "tps": 338001, "tokens": 9206497280, "gpu_gb": 72.2, "elapsed_s": 11856.9}
{"step": 8790, "loss": 2.4437, "lr": 0.0003, "tps": 338320, "tokens": 9216983040, "gpu_gb": 72.2, "elapsed_s": 11887.9}
{"step": 8800, "loss": 2.4581, "lr": 0.0003, "tps": 338056, "tokens": 9227468800, "gpu_gb": 72.2, "elapsed_s": 11918.9}
{"step": 8810, "loss": 2.4567, "lr": 0.0003, "tps": 338264, "tokens": 9237954560, "gpu_gb": 72.2, "elapsed_s": 11949.9}
{"step": 8820, "loss": 2.4304, "lr": 0.0003, "tps": 338082, "tokens": 9248440320, "gpu_gb": 72.2, "elapsed_s": 11980.9}
{"step": 8830, "loss": 2.4512, "lr": 0.0003, "tps": 337991, "tokens": 9258926080, "gpu_gb": 72.2, "elapsed_s": 12011.9}
{"step": 8840, "loss": 2.4321, "lr": 0.0003, "tps": 338072, "tokens": 9269411840, "gpu_gb": 72.2, "elapsed_s": 12043.0}
{"step": 8850, "loss": 2.4577, "lr": 0.0003, "tps": 338402, "tokens": 9279897600, "gpu_gb": 72.2, "elapsed_s": 12073.9}
{"step": 8860, "loss": 2.4736, "lr": 0.0003, "tps": 338057, "tokens": 9290383360, "gpu_gb": 72.2, "elapsed_s": 12105.0}
{"step": 8870, "loss": 2.4995, "lr": 0.0003, "tps": 338382, "tokens": 9300869120, "gpu_gb": 72.2, "elapsed_s": 12136.0}
{"step": 8880, "loss": 2.4414, "lr": 0.0003, "tps": 338241, "tokens": 9311354880, "gpu_gb": 72.2, "elapsed_s": 12167.0}
{"step": 8890, "loss": 2.4537, "lr": 0.0003, "tps": 338161, "tokens": 9321840640, "gpu_gb": 72.2, "elapsed_s": 12198.0}
{"step": 8900, "loss": 2.4349, "lr": 0.0003, "tps": 338426, "tokens": 9332326400, "gpu_gb": 72.2, "elapsed_s": 12228.9}
{"step": 8910, "loss": 2.4181, "lr": 0.0003, "tps": 338547, "tokens": 9342812160, "gpu_gb": 72.2, "elapsed_s": 12259.9}
{"step": 8920, "loss": 2.4595, "lr": 0.0003, "tps": 338012, "tokens": 9353297920, "gpu_gb": 72.2, "elapsed_s": 12290.9}
{"step": 8930, "loss": 2.4425, "lr": 0.0003, "tps": 338326, "tokens": 9363783680, "gpu_gb": 72.2, "elapsed_s": 12321.9}
{"step": 8940, "loss": 2.4006, "lr": 0.0003, "tps": 338120, "tokens": 9374269440, "gpu_gb": 72.2, "elapsed_s": 12352.9}
{"step": 8950, "loss": 2.4478, "lr": 0.0003, "tps": 338047, "tokens": 9384755200, "gpu_gb": 72.2, "elapsed_s": 12384.0}
{"step": 8960, "loss": 2.445, "lr": 0.0003, "tps": 338355, "tokens": 9395240960, "gpu_gb": 72.2, "elapsed_s": 12415.0}
{"step": 8970, "loss": 2.4636, "lr": 0.0003, "tps": 338296, "tokens": 9405726720, "gpu_gb": 72.2, "elapsed_s": 12446.0}
{"step": 8980, "loss": 2.4507, "lr": 0.0003, "tps": 338201, "tokens": 9416212480, "gpu_gb": 72.2, "elapsed_s": 12477.0}
{"step": 8990, "loss": 2.42, "lr": 0.0003, "tps": 338286, "tokens": 9426698240, "gpu_gb": 72.2, "elapsed_s": 12508.0}
{"step": 9000, "loss": 2.4675, "lr": 0.0003, "tps": 338487, "tokens": 9437184000, "gpu_gb": 72.2, "elapsed_s": 12538.9}
{"step": 9010, "loss": 2.442, "lr": 0.0003, "tps": 126815, "tokens": 9447669760, "gpu_gb": 72.2, "elapsed_s": 12621.6}
{"step": 9020, "loss": 2.4275, "lr": 0.0003, "tps": 338530, "tokens": 9458155520, "gpu_gb": 72.2, "elapsed_s": 12652.6}
{"step": 9030, "loss": 2.4154, "lr": 0.0003, "tps": 337736, "tokens": 9468641280, "gpu_gb": 72.2, "elapsed_s": 12683.6}
{"step": 9040, "loss": 2.4105, "lr": 0.0003, "tps": 338153, "tokens": 9479127040, "gpu_gb": 72.2, "elapsed_s": 12714.7}
{"step": 9050, "loss": 2.4042, "lr": 0.0003, "tps": 338166, "tokens": 9489612800, "gpu_gb": 72.2, "elapsed_s": 12745.7}
{"step": 9060, "loss": 2.4575, "lr": 0.0003, "tps": 337789, "tokens": 9500098560, "gpu_gb": 72.2, "elapsed_s": 12776.7}
{"step": 9070, "loss": 2.4542, "lr": 0.0003, "tps": 338375, "tokens": 9510584320, "gpu_gb": 72.2, "elapsed_s": 12807.7}
{"step": 9080, "loss": 2.3721, "lr": 0.0003, "tps": 338185, "tokens": 9521070080, "gpu_gb": 72.2, "elapsed_s": 12838.7}
{"step": 9090, "loss": 2.446, "lr": 0.0003, "tps": 338407, "tokens": 9531555840, "gpu_gb": 72.2, "elapsed_s": 12869.7}
{"step": 9100, "loss": 2.4117, "lr": 0.0003, "tps": 338436, "tokens": 9542041600, "gpu_gb": 72.2, "elapsed_s": 12900.7}
{"step": 9110, "loss": 2.3748, "lr": 0.0003, "tps": 338080, "tokens": 9552527360, "gpu_gb": 72.2, "elapsed_s": 12931.7}
{"step": 9120, "loss": 2.427, "lr": 0.0003, "tps": 338124, "tokens": 9563013120, "gpu_gb": 72.2, "elapsed_s": 12962.7}
{"step": 9130, "loss": 2.3993, "lr": 0.0003, "tps": 338331, "tokens": 9573498880, "gpu_gb": 72.2, "elapsed_s": 12993.7}
{"step": 9140, "loss": 2.3935, "lr": 0.0003, "tps": 338246, "tokens": 9583984640, "gpu_gb": 72.2, "elapsed_s": 13024.7}
{"step": 9150, "loss": 2.475, "lr": 0.0003, "tps": 338195, "tokens": 9594470400, "gpu_gb": 72.2, "elapsed_s": 13055.7}
{"step": 9160, "loss": 2.4813, "lr": 0.0003, "tps": 338109, "tokens": 9604956160, "gpu_gb": 72.2, "elapsed_s": 13086.7}
{"step": 9170, "loss": 2.4348, "lr": 0.0003, "tps": 338304, "tokens": 9615441920, "gpu_gb": 72.2, "elapsed_s": 13117.7}
{"step": 9180, "loss": 2.4625, "lr": 0.0003, "tps": 338222, "tokens": 9625927680, "gpu_gb": 72.2, "elapsed_s": 13148.7}
{"step": 9190, "loss": 2.4584, "lr": 0.0003, "tps": 338306, "tokens": 9636413440, "gpu_gb": 72.2, "elapsed_s": 13179.7}
{"step": 9200, "loss": 2.4096, "lr": 0.0003, "tps": 338224, "tokens": 9646899200, "gpu_gb": 72.2, "elapsed_s": 13210.7}
{"step": 9210, "loss": 2.426, "lr": 0.0003, "tps": 338128, "tokens": 9657384960, "gpu_gb": 72.2, "elapsed_s": 13241.7}
{"step": 9220, "loss": 2.4309, "lr": 0.0003, "tps": 338160, "tokens": 9667870720, "gpu_gb": 72.2, "elapsed_s": 13272.7}
{"step": 9230, "loss": 2.4154, "lr": 0.0003, "tps": 338621, "tokens": 9678356480, "gpu_gb": 72.2, "elapsed_s": 13303.7}
{"step": 9240, "loss": 2.4008, "lr": 0.0003, "tps": 338098, "tokens": 9688842240, "gpu_gb": 72.2, "elapsed_s": 13334.7}
{"step": 9250, "loss": 2.4203, "lr": 0.0003, "tps": 338376, "tokens": 9699328000, "gpu_gb": 72.2, "elapsed_s": 13365.7}
{"step": 9260, "loss": 2.437, "lr": 0.0003, "tps": 338313, "tokens": 9709813760, "gpu_gb": 72.2, "elapsed_s": 13396.7}
{"step": 9270, "loss": 2.4501, "lr": 0.0003, "tps": 338618, "tokens": 9720299520, "gpu_gb": 72.2, "elapsed_s": 13427.7}
{"step": 9280, "loss": 2.3993, "lr": 0.0003, "tps": 338492, "tokens": 9730785280, "gpu_gb": 72.2, "elapsed_s": 13458.6}
{"step": 9290, "loss": 2.3937, "lr": 0.0003, "tps": 338566, "tokens": 9741271040, "gpu_gb": 72.2, "elapsed_s": 13489.6}
{"step": 9300, "loss": 2.3956, "lr": 0.0003, "tps": 338242, "tokens": 9751756800, "gpu_gb": 72.2, "elapsed_s": 13520.6}
{"step": 9310, "loss": 2.4646, "lr": 0.0003, "tps": 338448, "tokens": 9762242560, "gpu_gb": 72.2, "elapsed_s": 13551.6}
{"step": 9320, "loss": 2.3666, "lr": 0.0003, "tps": 338497, "tokens": 9772728320, "gpu_gb": 72.2, "elapsed_s": 13582.6}
{"step": 9330, "loss": 2.4746, "lr": 0.0003, "tps": 338485, "tokens": 9783214080, "gpu_gb": 72.2, "elapsed_s": 13613.5}
{"step": 9340, "loss": 2.4122, "lr": 0.0003, "tps": 338301, "tokens": 9793699840, "gpu_gb": 72.2, "elapsed_s": 13644.5}
{"step": 9350, "loss": 2.4453, "lr": 0.0003, "tps": 338589, "tokens": 9804185600, "gpu_gb": 72.2, "elapsed_s": 13675.5}
{"step": 9360, "loss": 2.48, "lr": 0.0003, "tps": 338161, "tokens": 9814671360, "gpu_gb": 72.2, "elapsed_s": 13706.5}
{"step": 9370, "loss": 2.4701, "lr": 0.0003, "tps": 338577, "tokens": 9825157120, "gpu_gb": 72.2, "elapsed_s": 13737.5}
{"step": 9380, "loss": 2.4218, "lr": 0.0003, "tps": 338432, "tokens": 9835642880, "gpu_gb": 72.2, "elapsed_s": 13768.5}
{"step": 9390, "loss": 2.442, "lr": 0.0003, "tps": 338586, "tokens": 9846128640, "gpu_gb": 72.2, "elapsed_s": 13799.4}
{"step": 9400, "loss": 2.3994, "lr": 0.0003, "tps": 338231, "tokens": 9856614400, "gpu_gb": 72.2, "elapsed_s": 13830.4}
{"step": 9410, "loss": 2.4197, "lr": 0.0003, "tps": 338479, "tokens": 9867100160, "gpu_gb": 72.2, "elapsed_s": 13861.4}
{"step": 9420, "loss": 2.4221, "lr": 0.0003, "tps": 338412, "tokens": 9877585920, "gpu_gb": 72.2, "elapsed_s": 13892.4}
{"step": 9430, "loss": 2.483, "lr": 0.0003, "tps": 338548, "tokens": 9888071680, "gpu_gb": 72.2, "elapsed_s": 13923.4}
{"step": 9440, "loss": 2.4572, "lr": 0.0003, "tps": 338787, "tokens": 9898557440, "gpu_gb": 72.2, "elapsed_s": 13954.3}
{"step": 9450, "loss": 2.4644, "lr": 0.0003, "tps": 338495, "tokens": 9909043200, "gpu_gb": 72.2, "elapsed_s": 13985.3}
{"step": 9460, "loss": 2.4399, "lr": 0.0003, "tps": 338376, "tokens": 9919528960, "gpu_gb": 72.2, "elapsed_s": 14016.3}
{"step": 9470, "loss": 2.3801, "lr": 0.0003, "tps": 338368, "tokens": 9930014720, "gpu_gb": 72.2, "elapsed_s": 14047.3}
{"step": 9480, "loss": 2.4606, "lr": 0.0003, "tps": 338255, "tokens": 9940500480, "gpu_gb": 72.2, "elapsed_s": 14078.3}
{"step": 9490, "loss": 2.4278, "lr": 0.0003, "tps": 338366, "tokens": 9950986240, "gpu_gb": 72.2, "elapsed_s": 14109.3}
{"step": 9500, "loss": 2.4575, "lr": 0.0003, "tps": 338357, "tokens": 9961472000, "gpu_gb": 72.2, "elapsed_s": 14140.3}
{"step": 9510, "loss": 2.3895, "lr": 0.0003, "tps": 338436, "tokens": 9971957760, "gpu_gb": 72.2, "elapsed_s": 14171.2}
{"step": 9520, "loss": 2.4084, "lr": 0.0003, "tps": 338408, "tokens": 9982443520, "gpu_gb": 72.2, "elapsed_s": 14202.2}
{"step": 9530, "loss": 2.4169, "lr": 0.0003, "tps": 338458, "tokens": 9992929280, "gpu_gb": 72.2, "elapsed_s": 14233.2}
{"step": 9540, "loss": 2.4284, "lr": 0.0003, "tps": 338591, "tokens": 10003415040, "gpu_gb": 72.2, "elapsed_s": 14264.2}
{"step": 9550, "loss": 2.4422, "lr": 0.0003, "tps": 338256, "tokens": 10013900800, "gpu_gb": 72.2, "elapsed_s": 14295.2}
{"step": 9560, "loss": 2.4412, "lr": 0.0003, "tps": 338076, "tokens": 10024386560, "gpu_gb": 72.2, "elapsed_s": 14326.2}
{"step": 9570, "loss": 2.4557, "lr": 0.0003, "tps": 338244, "tokens": 10034872320, "gpu_gb": 72.2, "elapsed_s": 14357.2}
{"step": 9580, "loss": 2.443, "lr": 0.0003, "tps": 338417, "tokens": 10045358080, "gpu_gb": 72.2, "elapsed_s": 14388.2}
{"step": 9590, "loss": 2.4139, "lr": 0.0003, "tps": 338586, "tokens": 10055843840, "gpu_gb": 72.2, "elapsed_s": 14419.2}
{"step": 9600, "loss": 2.4079, "lr": 0.0003, "tps": 338566, "tokens": 10066329600, "gpu_gb": 72.2, "elapsed_s": 14450.1}
{"step": 9610, "loss": 2.4447, "lr": 0.0003, "tps": 338461, "tokens": 10076815360, "gpu_gb": 72.2, "elapsed_s": 14481.1}
{"step": 9620, "loss": 2.4263, "lr": 0.0003, "tps": 338519, "tokens": 10087301120, "gpu_gb": 72.2, "elapsed_s": 14512.1}
{"step": 9630, "loss": 2.4728, "lr": 0.0003, "tps": 338527, "tokens": 10097786880, "gpu_gb": 72.2, "elapsed_s": 14543.1}
{"step": 9640, "loss": 2.4406, "lr": 0.0003, "tps": 338409, "tokens": 10108272640, "gpu_gb": 72.2, "elapsed_s": 14574.0}
{"step": 9650, "loss": 2.407, "lr": 0.0003, "tps": 338196, "tokens": 10118758400, "gpu_gb": 72.2, "elapsed_s": 14605.1}
{"step": 9660, "loss": 2.4009, "lr": 0.0003, "tps": 338568, "tokens": 10129244160, "gpu_gb": 72.2, "elapsed_s": 14636.0}
{"step": 9670, "loss": 2.4206, "lr": 0.0003, "tps": 338213, "tokens": 10139729920, "gpu_gb": 72.2, "elapsed_s": 14667.0}
{"step": 9680, "loss": 2.4462, "lr": 0.0003, "tps": 338601, "tokens": 10150215680, "gpu_gb": 72.2, "elapsed_s": 14698.0}
{"step": 9690, "loss": 2.4238, "lr": 0.0003, "tps": 338374, "tokens": 10160701440, "gpu_gb": 72.2, "elapsed_s": 14729.0}
{"step": 9700, "loss": 2.4148, "lr": 0.0003, "tps": 338058, "tokens": 10171187200, "gpu_gb": 72.2, "elapsed_s": 14760.0}
{"step": 9710, "loss": 2.444, "lr": 0.0003, "tps": 338484, "tokens": 10181672960, "gpu_gb": 72.2, "elapsed_s": 14791.0}
{"step": 9720, "loss": 2.4163, "lr": 0.0003, "tps": 338455, "tokens": 10192158720, "gpu_gb": 72.2, "elapsed_s": 14822.0}
{"step": 9730, "loss": 2.4501, "lr": 0.0003, "tps": 338569, "tokens": 10202644480, "gpu_gb": 72.2, "elapsed_s": 14852.9}
{"step": 9740, "loss": 2.4254, "lr": 0.0003, "tps": 338486, "tokens": 10213130240, "gpu_gb": 72.2, "elapsed_s": 14883.9}
{"step": 9750, "loss": 2.4275, "lr": 0.0003, "tps": 338348, "tokens": 10223616000, "gpu_gb": 72.2, "elapsed_s": 14914.9}
{"step": 9760, "loss": 2.4279, "lr": 0.0003, "tps": 337043, "tokens": 10234101760, "gpu_gb": 72.2, "elapsed_s": 14946.0}
{"step": 9770, "loss": 2.4236, "lr": 0.0003, "tps": 338088, "tokens": 10244587520, "gpu_gb": 72.2, "elapsed_s": 14977.0}
{"step": 9780, "loss": 2.4643, "lr": 0.0003, "tps": 338203, "tokens": 10255073280, "gpu_gb": 72.2, "elapsed_s": 15008.0}
{"step": 9790, "loss": 2.4104, "lr": 0.0003, "tps": 338219, "tokens": 10265559040, "gpu_gb": 72.2, "elapsed_s": 15039.0}
{"step": 9800, "loss": 2.3795, "lr": 0.0003, "tps": 338289, "tokens": 10276044800, "gpu_gb": 72.2, "elapsed_s": 15070.0}
{"step": 9810, "loss": 2.4091, "lr": 0.0003, "tps": 337951, "tokens": 10286530560, "gpu_gb": 72.2, "elapsed_s": 15101.1}
{"step": 9820, "loss": 2.4086, "lr": 0.0003, "tps": 338277, "tokens": 10297016320, "gpu_gb": 72.2, "elapsed_s": 15132.1}
{"step": 9830, "loss": 2.3981, "lr": 0.0003, "tps": 338092, "tokens": 10307502080, "gpu_gb": 72.2, "elapsed_s": 15163.1}
{"step": 9840, "loss": 2.3926, "lr": 0.0003, "tps": 338010, "tokens": 10317987840, "gpu_gb": 72.2, "elapsed_s": 15194.1}
{"step": 9850, "loss": 2.4277, "lr": 0.0003, "tps": 338215, "tokens": 10328473600, "gpu_gb": 72.2, "elapsed_s": 15225.1}
{"step": 9860, "loss": 2.3758, "lr": 0.0003, "tps": 337979, "tokens": 10338959360, "gpu_gb": 72.2, "elapsed_s": 15256.1}
{"step": 9870, "loss": 2.3967, "lr": 0.0003, "tps": 338219, "tokens": 10349445120, "gpu_gb": 72.2, "elapsed_s": 15287.1}
{"step": 9880, "loss": 2.4129, "lr": 0.0003, "tps": 338178, "tokens": 10359930880, "gpu_gb": 72.2, "elapsed_s": 15318.1}
{"step": 9890, "loss": 2.3913, "lr": 0.0003, "tps": 337854, "tokens": 10370416640, "gpu_gb": 72.2, "elapsed_s": 15349.2}
{"step": 9900, "loss": 2.4738, "lr": 0.0003, "tps": 338279, "tokens": 10380902400, "gpu_gb": 72.2, "elapsed_s": 15380.2}
{"step": 9910, "loss": 2.4208, "lr": 0.0003, "tps": 338166, "tokens": 10391388160, "gpu_gb": 72.2, "elapsed_s": 15411.2}
{"step": 9920, "loss": 2.4414, "lr": 0.0003, "tps": 337934, "tokens": 10401873920, "gpu_gb": 72.2, "elapsed_s": 15442.2}
{"step": 9930, "loss": 2.4706, "lr": 0.0003, "tps": 337920, "tokens": 10412359680, "gpu_gb": 72.2, "elapsed_s": 15473.2}
{"step": 9940, "loss": 2.4099, "lr": 0.0003, "tps": 338288, "tokens": 10422845440, "gpu_gb": 72.2, "elapsed_s": 15504.2}
{"step": 9950, "loss": 2.4108, "lr": 0.0003, "tps": 337891, "tokens": 10433331200, "gpu_gb": 72.2, "elapsed_s": 15535.3}
{"step": 9960, "loss": 2.4065, "lr": 0.0003, "tps": 338215, "tokens": 10443816960, "gpu_gb": 72.2, "elapsed_s": 15566.3}
{"step": 9970, "loss": 2.441, "lr": 0.0003, "tps": 337994, "tokens": 10454302720, "gpu_gb": 72.2, "elapsed_s": 15597.3}
{"step": 9980, "loss": 2.4234, "lr": 0.0003, "tps": 338085, "tokens": 10464788480, "gpu_gb": 72.2, "elapsed_s": 15628.3}
{"step": 9990, "loss": 2.4611, "lr": 0.0003, "tps": 338117, "tokens": 10475274240, "gpu_gb": 72.2, "elapsed_s": 15659.3}
{"step": 10000, "loss": 2.4619, "lr": 0.0003, "tps": 338012, "tokens": 10485760000, "gpu_gb": 72.2, "elapsed_s": 15690.3}
{"step": 10010, "loss": 2.387, "lr": 0.0003, "tps": 140994, "tokens": 10496245760, "gpu_gb": 72.2, "elapsed_s": 15764.7}
{"step": 10020, "loss": 2.4048, "lr": 0.0003, "tps": 338393, "tokens": 10506731520, "gpu_gb": 72.2, "elapsed_s": 15795.7}
{"step": 10030, "loss": 2.4355, "lr": 0.0003, "tps": 338054, "tokens": 10517217280, "gpu_gb": 72.2, "elapsed_s": 15826.7}
{"step": 10040, "loss": 2.4068, "lr": 0.0003, "tps": 338140, "tokens": 10527703040, "gpu_gb": 72.2, "elapsed_s": 15857.7}
{"step": 10050, "loss": 2.4075, "lr": 0.0003, "tps": 338238, "tokens": 10538188800, "gpu_gb": 72.2, "elapsed_s": 15888.7}
{"step": 10060, "loss": 2.4714, "lr": 0.0003, "tps": 338196, "tokens": 10548674560, "gpu_gb": 72.2, "elapsed_s": 15919.7}
{"step": 10070, "loss": 2.4462, "lr": 0.0003, "tps": 338380, "tokens": 10559160320, "gpu_gb": 72.2, "elapsed_s": 15950.7}
{"step": 10080, "loss": 2.4507, "lr": 0.0003, "tps": 338400, "tokens": 10569646080, "gpu_gb": 72.2, "elapsed_s": 15981.7}
{"step": 10090, "loss": 2.4487, "lr": 0.0003, "tps": 338228, "tokens": 10580131840, "gpu_gb": 72.2, "elapsed_s": 16012.7}
{"step": 10100, "loss": 2.4378, "lr": 0.0003, "tps": 338752, "tokens": 10590617600, "gpu_gb": 72.2, "elapsed_s": 16043.7}
{"step": 10110, "loss": 2.4156, "lr": 0.0003, "tps": 338770, "tokens": 10601103360, "gpu_gb": 72.2, "elapsed_s": 16074.6}
{"step": 10120, "loss": 2.4102, "lr": 0.0003, "tps": 338795, "tokens": 10611589120, "gpu_gb": 72.2, "elapsed_s": 16105.6}
{"step": 10130, "loss": 2.4744, "lr": 0.0003, "tps": 338819, "tokens": 10622074880, "gpu_gb": 72.2, "elapsed_s": 16136.5}
{"step": 10140, "loss": 2.4978, "lr": 0.0003, "tps": 338807, "tokens": 10632560640, "gpu_gb": 72.2, "elapsed_s": 16167.5}
{"step": 10150, "loss": 2.4423, "lr": 0.0003, "tps": 338436, "tokens": 10643046400, "gpu_gb": 72.2, "elapsed_s": 16198.5}
{"step": 10160, "loss": 2.4175, "lr": 0.0003, "tps": 338574, "tokens": 10653532160, "gpu_gb": 72.2, "elapsed_s": 16229.4}
{"step": 10170, "loss": 2.4907, "lr": 0.0003, "tps": 338440, "tokens": 10664017920, "gpu_gb": 72.2, "elapsed_s": 16260.4}
{"step": 10180, "loss": 2.4887, "lr": 0.0003, "tps": 338571, "tokens": 10674503680, "gpu_gb": 72.2, "elapsed_s": 16291.4}
{"step": 10190, "loss": 2.4766, "lr": 0.0003, "tps": 338442, "tokens": 10684989440, "gpu_gb": 72.2, "elapsed_s": 16322.4}
{"step": 10200, "loss": 2.4712, "lr": 0.0003, "tps": 338227, "tokens": 10695475200, "gpu_gb": 72.2, "elapsed_s": 16353.4}
{"step": 10210, "loss": 2.4262, "lr": 0.0003, "tps": 338481, "tokens": 10705960960, "gpu_gb": 72.2, "elapsed_s": 16384.3}
{"step": 10220, "loss": 2.4247, "lr": 0.0003, "tps": 338414, "tokens": 10716446720, "gpu_gb": 72.2, "elapsed_s": 16415.3}
{"step": 10230, "loss": 2.4556, "lr": 0.0003, "tps": 338541, "tokens": 10726932480, "gpu_gb": 72.2, "elapsed_s": 16446.3}
{"step": 10240, "loss": 2.4637, "lr": 0.0003, "tps": 338456, "tokens": 10737418240, "gpu_gb": 72.2, "elapsed_s": 16477.3}
{"step": 10250, "loss": 2.4527, "lr": 0.0003, "tps": 338220, "tokens": 10747904000, "gpu_gb": 72.2, "elapsed_s": 16508.3}
{"step": 10260, "loss": 2.4185, "lr": 0.0003, "tps": 338406, "tokens": 10758389760, "gpu_gb": 72.2, "elapsed_s": 16539.3}
{"step": 10270, "loss": 2.4376, "lr": 0.0003, "tps": 338470, "tokens": 10768875520, "gpu_gb": 72.2, "elapsed_s": 16570.3}
{"step": 10280, "loss": 2.4599, "lr": 0.0003, "tps": 338230, "tokens": 10779361280, "gpu_gb": 72.2, "elapsed_s": 16601.3}
{"step": 10290, "loss": 2.4572, "lr": 0.0003, "tps": 338513, "tokens": 10789847040, "gpu_gb": 72.2, "elapsed_s": 16632.2}
{"step": 10300, "loss": 2.3966, "lr": 0.0003, "tps": 338569, "tokens": 10800332800, "gpu_gb": 72.2, "elapsed_s": 16663.2}
{"step": 10310, "loss": 2.466, "lr": 0.0003, "tps": 338309, "tokens": 10810818560, "gpu_gb": 72.2, "elapsed_s": 16694.2}
{"step": 10320, "loss": 2.434, "lr": 0.0003, "tps": 338210, "tokens": 10821304320, "gpu_gb": 72.2, "elapsed_s": 16725.2}
{"step": 10330, "loss": 2.4047, "lr": 0.0003, "tps": 338202, "tokens": 10831790080, "gpu_gb": 72.2, "elapsed_s": 16756.2}
{"step": 10340, "loss": 2.5395, "lr": 0.0003, "tps": 338541, "tokens": 10842275840, "gpu_gb": 72.2, "elapsed_s": 16787.2}
{"step": 10350, "loss": 2.4614, "lr": 0.0003, "tps": 338332, "tokens": 10852761600, "gpu_gb": 72.2, "elapsed_s": 16818.2}
{"step": 10360, "loss": 2.4354, "lr": 0.0003, "tps": 338489, "tokens": 10863247360, "gpu_gb": 72.2, "elapsed_s": 16849.1}
{"step": 10370, "loss": 2.4616, "lr": 0.0003, "tps": 338323, "tokens": 10873733120, "gpu_gb": 72.2, "elapsed_s": 16880.1}
{"step": 10380, "loss": 2.4423, "lr": 0.0003, "tps": 338395, "tokens": 10884218880, "gpu_gb": 72.2, "elapsed_s": 16911.1}
{"step": 10390, "loss": 2.4378, "lr": 0.0003, "tps": 338492, "tokens": 10894704640, "gpu_gb": 72.2, "elapsed_s": 16942.1}
{"step": 10400, "loss": 2.4735, "lr": 0.0003, "tps": 338276, "tokens": 10905190400, "gpu_gb": 72.2, "elapsed_s": 16973.1}
{"step": 10410, "loss": 2.4209, "lr": 0.0003, "tps": 338447, "tokens": 10915676160, "gpu_gb": 72.2, "elapsed_s": 17004.1}
{"step": 10420, "loss": 2.4674, "lr": 0.0003, "tps": 338160, "tokens": 10926161920, "gpu_gb": 72.2, "elapsed_s": 17035.1}
{"step": 10430, "loss": 2.4622, "lr": 0.0003, "tps": 338415, "tokens": 10936647680, "gpu_gb": 72.2, "elapsed_s": 17066.1}
{"step": 10440, "loss": 2.3867, "lr": 0.0003, "tps": 338397, "tokens": 10947133440, "gpu_gb": 72.2, "elapsed_s": 17097.1}
{"step": 10450, "loss": 2.4918, "lr": 0.0003, "tps": 338304, "tokens": 10957619200, "gpu_gb": 72.2, "elapsed_s": 17128.1}
{"step": 10460, "loss": 2.4213, "lr": 0.0003, "tps": 338723, "tokens": 10968104960, "gpu_gb": 72.2, "elapsed_s": 17159.0}
{"step": 10470, "loss": 2.4271, "lr": 0.0003, "tps": 338456, "tokens": 10978590720, "gpu_gb": 72.2, "elapsed_s": 17190.0}
{"step": 10480, "loss": 2.371, "lr": 0.0003, "tps": 338667, "tokens": 10989076480, "gpu_gb": 72.2, "elapsed_s": 17221.0}
{"step": 10490, "loss": 2.4322, "lr": 0.0003, "tps": 338306, "tokens": 10999562240, "gpu_gb": 72.2, "elapsed_s": 17252.0}
{"step": 10500, "loss": 2.4418, "lr": 0.0003, "tps": 338471, "tokens": 11010048000, "gpu_gb": 72.2, "elapsed_s": 17282.9}
{"step": 10510, "loss": 2.4303, "lr": 0.0003, "tps": 338683, "tokens": 11020533760, "gpu_gb": 72.2, "elapsed_s": 17313.9}
{"step": 10520, "loss": 2.4623, "lr": 0.0003, "tps": 338552, "tokens": 11031019520, "gpu_gb": 72.2, "elapsed_s": 17344.9}
{"step": 10530, "loss": 2.4732, "lr": 0.0003, "tps": 338557, "tokens": 11041505280, "gpu_gb": 72.2, "elapsed_s": 17375.8}
{"step": 10540, "loss": 2.4397, "lr": 0.0003, "tps": 338283, "tokens": 11051991040, "gpu_gb": 72.2, "elapsed_s": 17406.8}
{"step": 10550, "loss": 2.4615, "lr": 0.0003, "tps": 338277, "tokens": 11062476800, "gpu_gb": 72.2, "elapsed_s": 17437.8}
{"step": 10560, "loss": 2.4518, "lr": 0.0003, "tps": 338480, "tokens": 11072962560, "gpu_gb": 72.2, "elapsed_s": 17468.8}
{"step": 10570, "loss": 2.4681, "lr": 0.0003, "tps": 338460, "tokens": 11083448320, "gpu_gb": 72.2, "elapsed_s": 17499.8}
{"step": 10580, "loss": 2.4557, "lr": 0.0003, "tps": 338722, "tokens": 11093934080, "gpu_gb": 72.2, "elapsed_s": 17530.8}
{"step": 10590, "loss": 2.4441, "lr": 0.0003, "tps": 338230, "tokens": 11104419840, "gpu_gb": 72.2, "elapsed_s": 17561.8}
{"step": 10600, "loss": 2.4625, "lr": 0.0003, "tps": 338555, "tokens": 11114905600, "gpu_gb": 72.2, "elapsed_s": 17592.7}
{"step": 10610, "loss": 2.3645, "lr": 0.0003, "tps": 338608, "tokens": 11125391360, "gpu_gb": 72.2, "elapsed_s": 17623.7}
{"step": 10620, "loss": 2.4254, "lr": 0.0003, "tps": 338463, "tokens": 11135877120, "gpu_gb": 72.2, "elapsed_s": 17654.7}
{"step": 10630, "loss": 2.4599, "lr": 0.0003, "tps": 338511, "tokens": 11146362880, "gpu_gb": 72.2, "elapsed_s": 17685.7}
{"step": 10640, "loss": 2.4218, "lr": 0.0003, "tps": 338283, "tokens": 11156848640, "gpu_gb": 72.2, "elapsed_s": 17716.7}
{"step": 10650, "loss": 2.463, "lr": 0.0003, "tps": 338788, "tokens": 11167334400, "gpu_gb": 72.2, "elapsed_s": 17747.6}
{"step": 10660, "loss": 2.4947, "lr": 0.0003, "tps": 338710, "tokens": 11177820160, "gpu_gb": 72.2, "elapsed_s": 17778.6}
{"step": 10670, "loss": 2.413, "lr": 0.0003, "tps": 338332, "tokens": 11188305920, "gpu_gb": 72.2, "elapsed_s": 17809.6}
{"step": 10680, "loss": 2.443, "lr": 0.0003, "tps": 338577, "tokens": 11198791680, "gpu_gb": 72.2, "elapsed_s": 17840.5}
{"step": 10690, "loss": 2.4756, "lr": 0.0003, "tps": 338344, "tokens": 11209277440, "gpu_gb": 72.2, "elapsed_s": 17871.5}
{"step": 10700, "loss": 2.4497, "lr": 0.0003, "tps": 338663, "tokens": 11219763200, "gpu_gb": 72.2, "elapsed_s": 17902.5}
{"step": 10710, "loss": 2.4549, "lr": 0.0003, "tps": 338763, "tokens": 11230248960, "gpu_gb": 72.2, "elapsed_s": 17933.4}
{"step": 10720, "loss": 2.4102, "lr": 0.0003, "tps": 338107, "tokens": 11240734720, "gpu_gb": 72.2, "elapsed_s": 17964.4}
{"step": 10730, "loss": 2.4197, "lr": 0.0003, "tps": 338432, "tokens": 11251220480, "gpu_gb": 72.2, "elapsed_s": 17995.4}
{"step": 10740, "loss": 2.4231, "lr": 0.0003, "tps": 338165, "tokens": 11261706240, "gpu_gb": 72.2, "elapsed_s": 18026.4}
{"step": 10750, "loss": 2.438, "lr": 0.0003, "tps": 338554, "tokens": 11272192000, "gpu_gb": 72.2, "elapsed_s": 18057.4}
{"step": 10760, "loss": 2.4501, "lr": 0.0003, "tps": 338423, "tokens": 11282677760, "gpu_gb": 72.2, "elapsed_s": 18088.4}
{"step": 10770, "loss": 2.4263, "lr": 0.0003, "tps": 338351, "tokens": 11293163520, "gpu_gb": 72.2, "elapsed_s": 18119.4}
{"step": 10780, "loss": 2.4337, "lr": 0.0003, "tps": 338338, "tokens": 11303649280, "gpu_gb": 72.2, "elapsed_s": 18150.4}
{"step": 10790, "loss": 2.4504, "lr": 0.0003, "tps": 338510, "tokens": 11314135040, "gpu_gb": 72.2, "elapsed_s": 18181.4}
{"step": 10800, "loss": 2.4431, "lr": 0.0003, "tps": 338693, "tokens": 11324620800, "gpu_gb": 72.2, "elapsed_s": 18212.3}
{"step": 10810, "loss": 2.3984, "lr": 0.0003, "tps": 338391, "tokens": 11335106560, "gpu_gb": 72.2, "elapsed_s": 18243.3}
{"step": 10820, "loss": 2.511, "lr": 0.0003, "tps": 338181, "tokens": 11345592320, "gpu_gb": 72.2, "elapsed_s": 18274.3}
{"step": 10830, "loss": 2.4179, "lr": 0.0003, "tps": 338651, "tokens": 11356078080, "gpu_gb": 72.2, "elapsed_s": 18305.3}
{"step": 10840, "loss": 2.4802, "lr": 0.0003, "tps": 338654, "tokens": 11366563840, "gpu_gb": 72.2, "elapsed_s": 18336.2}
{"step": 10850, "loss": 2.4437, "lr": 0.0003, "tps": 338340, "tokens": 11377049600, "gpu_gb": 72.2, "elapsed_s": 18367.2}
{"step": 10860, "loss": 2.4091, "lr": 0.0003, "tps": 338591, "tokens": 11387535360, "gpu_gb": 72.2, "elapsed_s": 18398.2}
{"step": 10870, "loss": 2.4559, "lr": 0.0003, "tps": 338422, "tokens": 11398021120, "gpu_gb": 72.2, "elapsed_s": 18429.2}
{"step": 10880, "loss": 2.441, "lr": 0.0003, "tps": 338385, "tokens": 11408506880, "gpu_gb": 72.2, "elapsed_s": 18460.2}
{"step": 10890, "loss": 2.4578, "lr": 0.0003, "tps": 338603, "tokens": 11418992640, "gpu_gb": 72.2, "elapsed_s": 18491.1}
{"step": 10900, "loss": 2.4516, "lr": 0.0003, "tps": 338407, "tokens": 11429478400, "gpu_gb": 72.2, "elapsed_s": 18522.1}
{"step": 10910, "loss": 2.4458, "lr": 0.0003, "tps": 338548, "tokens": 11439964160, "gpu_gb": 72.2, "elapsed_s": 18553.1}
{"step": 10920, "loss": 2.4566, "lr": 0.0003, "tps": 338321, "tokens": 11450449920, "gpu_gb": 72.2, "elapsed_s": 18584.1}
{"step": 10930, "loss": 2.4641, "lr": 0.0003, "tps": 338249, "tokens": 11460935680, "gpu_gb": 72.2, "elapsed_s": 18615.1}
{"step": 10940, "loss": 2.5163, "lr": 0.0003, "tps": 338442, "tokens": 11471421440, "gpu_gb": 72.2, "elapsed_s": 18646.1}
{"step": 10950, "loss": 2.454, "lr": 0.0003, "tps": 338520, "tokens": 11481907200, "gpu_gb": 72.2, "elapsed_s": 18677.0}
{"step": 10960, "loss": 2.4238, "lr": 0.0003, "tps": 338387, "tokens": 11492392960, "gpu_gb": 72.2, "elapsed_s": 18708.0}
{"step": 10970, "loss": 2.4229, "lr": 0.0003, "tps": 338577, "tokens": 11502878720, "gpu_gb": 72.2, "elapsed_s": 18739.0}
{"step": 10980, "loss": 2.3748, "lr": 0.0003, "tps": 338404, "tokens": 11513364480, "gpu_gb": 72.2, "elapsed_s": 18770.0}
{"step": 10990, "loss": 2.4646, "lr": 0.0003, "tps": 338264, "tokens": 11523850240, "gpu_gb": 72.2, "elapsed_s": 18801.0}
{"step": 11000, "loss": 2.437, "lr": 0.0003, "tps": 338497, "tokens": 11534336000, "gpu_gb": 72.2, "elapsed_s": 18832.0}
{"step": 11010, "loss": 2.4665, "lr": 0.0003, "tps": 144230, "tokens": 11544821760, "gpu_gb": 72.2, "elapsed_s": 18904.7}
{"step": 11020, "loss": 2.4624, "lr": 0.0003, "tps": 337633, "tokens": 11555307520, "gpu_gb": 72.2, "elapsed_s": 18935.7}
{"step": 11030, "loss": 2.4537, "lr": 0.0003, "tps": 337794, "tokens": 11565793280, "gpu_gb": 72.2, "elapsed_s": 18966.8}
{"step": 11040, "loss": 2.4209, "lr": 0.0003, "tps": 337856, "tokens": 11576279040, "gpu_gb": 72.2, "elapsed_s": 18997.8}
{"step": 11050, "loss": 2.4151, "lr": 0.0003, "tps": 337798, "tokens": 11586764800, "gpu_gb": 72.2, "elapsed_s": 19028.8}
{"step": 11060, "loss": 2.4396, "lr": 0.0003, "tps": 338087, "tokens": 11597250560, "gpu_gb": 72.2, "elapsed_s": 19059.9}
{"step": 11070, "loss": 2.4689, "lr": 0.0003, "tps": 337992, "tokens": 11607736320, "gpu_gb": 72.2, "elapsed_s": 19090.9}
{"step": 11080, "loss": 2.4406, "lr": 0.0003, "tps": 338295, "tokens": 11618222080, "gpu_gb": 72.2, "elapsed_s": 19121.9}
{"step": 11090, "loss": 2.4318, "lr": 0.0003, "tps": 338035, "tokens": 11628707840, "gpu_gb": 72.2, "elapsed_s": 19152.9}
{"step": 11100, "loss": 2.4384, "lr": 0.0003, "tps": 338038, "tokens": 11639193600, "gpu_gb": 72.2, "elapsed_s": 19183.9}
{"step": 11110, "loss": 2.4381, "lr": 0.0003, "tps": 337969, "tokens": 11649679360, "gpu_gb": 72.2, "elapsed_s": 19215.0}
{"step": 11120, "loss": 2.4421, "lr": 0.0003, "tps": 337959, "tokens": 11660165120, "gpu_gb": 72.2, "elapsed_s": 19246.0}
{"step": 11130, "loss": 2.44, "lr": 0.0003, "tps": 337947, "tokens": 11670650880, "gpu_gb": 72.2, "elapsed_s": 19277.0}
{"step": 11140, "loss": 2.4267, "lr": 0.0003, "tps": 337997, "tokens": 11681136640, "gpu_gb": 72.2, "elapsed_s": 19308.0}
{"step": 11150, "loss": 2.4382, "lr": 0.0003, "tps": 337873, "tokens": 11691622400, "gpu_gb": 72.2, "elapsed_s": 19339.1}
{"step": 11160, "loss": 2.4057, "lr": 0.0003, "tps": 338020, "tokens": 11702108160, "gpu_gb": 72.2, "elapsed_s": 19370.1}
{"step": 11170, "loss": 2.4262, "lr": 0.0003, "tps": 338230, "tokens": 11712593920, "gpu_gb": 72.2, "elapsed_s": 19401.1}
{"step": 11180, "loss": 2.4434, "lr": 0.0003, "tps": 338009, "tokens": 11723079680, "gpu_gb": 72.2, "elapsed_s": 19432.1}
{"step": 11190, "loss": 2.489, "lr": 0.0003, "tps": 338056, "tokens": 11733565440, "gpu_gb": 72.2, "elapsed_s": 19463.1}
{"step": 11200, "loss": 2.4516, "lr": 0.0003, "tps": 338122, "tokens": 11744051200, "gpu_gb": 72.2, "elapsed_s": 19494.1}
{"step": 11210, "loss": 2.4313, "lr": 0.0003, "tps": 338109, "tokens": 11754536960, "gpu_gb": 72.2, "elapsed_s": 19525.2}
{"step": 11220, "loss": 2.4286, "lr": 0.0003, "tps": 338054, "tokens": 11765022720, "gpu_gb": 72.2, "elapsed_s": 19556.2}
{"step": 11230, "loss": 2.4332, "lr": 0.0003, "tps": 337702, "tokens": 11775508480, "gpu_gb": 72.2, "elapsed_s": 19587.2}
{"step": 11240, "loss": 2.4047, "lr": 0.0003, "tps": 337959, "tokens": 11785994240, "gpu_gb": 72.2, "elapsed_s": 19618.2}
{"step": 11250, "loss": 2.3928, "lr": 0.0003, "tps": 338057, "tokens": 11796480000, "gpu_gb": 72.2, "elapsed_s": 19649.3}
{"step": 11260, "loss": 2.4453, "lr": 0.0003, "tps": 338047, "tokens": 11806965760, "gpu_gb": 72.2, "elapsed_s": 19680.3}
{"step": 11270, "loss": 2.3942, "lr": 0.0003, "tps": 338023, "tokens": 11817451520, "gpu_gb": 72.2, "elapsed_s": 19711.3}
{"step": 11280, "loss": 2.4335, "lr": 0.0003, "tps": 338212, "tokens": 11827937280, "gpu_gb": 72.2, "elapsed_s": 19742.3}
{"step": 11290, "loss": 2.4471, "lr": 0.0003, "tps": 338053, "tokens": 11838423040, "gpu_gb": 72.2, "elapsed_s": 19773.3}
{"step": 11300, "loss": 2.4497, "lr": 0.0003, "tps": 338222, "tokens": 11848908800, "gpu_gb": 72.2, "elapsed_s": 19804.3}
{"step": 11310, "loss": 2.4115, "lr": 0.0003, "tps": 337847, "tokens": 11859394560, "gpu_gb": 72.2, "elapsed_s": 19835.4}
{"step": 11320, "loss": 2.4264, "lr": 0.0003, "tps": 338053, "tokens": 11869880320, "gpu_gb": 72.2, "elapsed_s": 19866.4}
{"step": 11330, "loss": 2.3983, "lr": 0.0003, "tps": 337944, "tokens": 11880366080, "gpu_gb": 72.2, "elapsed_s": 19897.4}
{"step": 11340, "loss": 2.4073, "lr": 0.0003, "tps": 338306, "tokens": 11890851840, "gpu_gb": 72.2, "elapsed_s": 19928.4}
{"step": 11350, "loss": 2.4321, "lr": 0.0003, "tps": 337970, "tokens": 11901337600, "gpu_gb": 72.2, "elapsed_s": 19959.4}
{"step": 11360, "loss": 2.3948, "lr": 0.0003, "tps": 338178, "tokens": 11911823360, "gpu_gb": 72.2, "elapsed_s": 19990.4}
{"step": 11370, "loss": 2.4132, "lr": 0.0003, "tps": 338030, "tokens": 11922309120, "gpu_gb": 72.2, "elapsed_s": 20021.5}
{"step": 11380, "loss": 2.4347, "lr": 0.0003, "tps": 337906, "tokens": 11932794880, "gpu_gb": 72.2, "elapsed_s": 20052.5}
{"step": 11390, "loss": 2.3297, "lr": 0.0003, "tps": 337939, "tokens": 11943280640, "gpu_gb": 72.2, "elapsed_s": 20083.5}
{"step": 11400, "loss": 2.4661, "lr": 0.0003, "tps": 337982, "tokens": 11953766400, "gpu_gb": 72.2, "elapsed_s": 20114.6}
{"step": 11410, "loss": 2.3954, "lr": 0.0003, "tps": 338233, "tokens": 11964252160, "gpu_gb": 72.2, "elapsed_s": 20145.6}
{"step": 11420, "loss": 2.4521, "lr": 0.0003, "tps": 337956, "tokens": 11974737920, "gpu_gb": 72.2, "elapsed_s": 20176.6}
{"step": 11430, "loss": 2.4624, "lr": 0.0003, "tps": 337733, "tokens": 11985223680, "gpu_gb": 72.2, "elapsed_s": 20207.6}
{"step": 11440, "loss": 2.4288, "lr": 0.0003, "tps": 337935, "tokens": 11995709440, "gpu_gb": 72.2, "elapsed_s": 20238.7}
{"step": 11450, "loss": 2.4019, "lr": 0.0003, "tps": 338027, "tokens": 12006195200, "gpu_gb": 72.2, "elapsed_s": 20269.7}
{"step": 11460, "loss": 2.4248, "lr": 0.0003, "tps": 338139, "tokens": 12016680960, "gpu_gb": 72.2, "elapsed_s": 20300.7}
{"step": 11470, "loss": 2.4452, "lr": 0.0003, "tps": 338216, "tokens": 12027166720, "gpu_gb": 72.2, "elapsed_s": 20331.7}
{"step": 11480, "loss": 2.4479, "lr": 0.0003, "tps": 338184, "tokens": 12037652480, "gpu_gb": 72.2, "elapsed_s": 20362.7}
{"step": 11490, "loss": 2.4396, "lr": 0.0003, "tps": 338081, "tokens": 12048138240, "gpu_gb": 72.2, "elapsed_s": 20393.7}
{"step": 11500, "loss": 2.453, "lr": 0.0003, "tps": 338096, "tokens": 12058624000, "gpu_gb": 72.2, "elapsed_s": 20424.7}
{"step": 11510, "loss": 2.3964, "lr": 0.0003, "tps": 338125, "tokens": 12069109760, "gpu_gb": 72.2, "elapsed_s": 20455.7}
{"step": 11520, "loss": 2.4359, "lr": 0.0003, "tps": 338097, "tokens": 12079595520, "gpu_gb": 72.2, "elapsed_s": 20486.8}
{"step": 11530, "loss": 2.4183, "lr": 0.0003, "tps": 338178, "tokens": 12090081280, "gpu_gb": 72.2, "elapsed_s": 20517.8}
{"step": 11540, "loss": 2.4253, "lr": 0.0003, "tps": 337887, "tokens": 12100567040, "gpu_gb": 72.2, "elapsed_s": 20548.8}
{"step": 11550, "loss": 2.4422, "lr": 0.0003, "tps": 338298, "tokens": 12111052800, "gpu_gb": 72.2, "elapsed_s": 20579.8}
{"step": 11560, "loss": 2.4057, "lr": 0.0003, "tps": 338175, "tokens": 12121538560, "gpu_gb": 72.2, "elapsed_s": 20610.8}
{"step": 11570, "loss": 2.4062, "lr": 0.0003, "tps": 338062, "tokens": 12132024320, "gpu_gb": 72.2, "elapsed_s": 20641.8}
{"step": 11580, "loss": 2.4205, "lr": 0.0003, "tps": 338046, "tokens": 12142510080, "gpu_gb": 72.2, "elapsed_s": 20672.8}
{"step": 11590, "loss": 2.426, "lr": 0.0003, "tps": 338258, "tokens": 12152995840, "gpu_gb": 72.2, "elapsed_s": 20703.8}
{"step": 11600, "loss": 2.4565, "lr": 0.0003, "tps": 338138, "tokens": 12163481600, "gpu_gb": 72.2, "elapsed_s": 20734.8}
{"step": 11610, "loss": 2.4313, "lr": 0.0003, "tps": 337787, "tokens": 12173967360, "gpu_gb": 72.2, "elapsed_s": 20765.9}
{"step": 11620, "loss": 2.4074, "lr": 0.0003, "tps": 337842, "tokens": 12184453120, "gpu_gb": 72.2, "elapsed_s": 20796.9}
{"step": 11630, "loss": 2.4478, "lr": 0.0003, "tps": 337884, "tokens": 12194938880, "gpu_gb": 72.2, "elapsed_s": 20828.0}
{"step": 11640, "loss": 2.4286, "lr": 0.0003, "tps": 337963, "tokens": 12205424640, "gpu_gb": 72.2, "elapsed_s": 20859.0}
{"step": 11650, "loss": 2.3782, "lr": 0.0003, "tps": 338093, "tokens": 12215910400, "gpu_gb": 72.2, "elapsed_s": 20890.0}
{"step": 11660, "loss": 2.4806, "lr": 0.0003, "tps": 338064, "tokens": 12226396160, "gpu_gb": 72.2, "elapsed_s": 20921.0}
{"step": 11670, "loss": 2.4433, "lr": 0.0003, "tps": 338069, "tokens": 12236881920, "gpu_gb": 72.2, "elapsed_s": 20952.0}
{"step": 11680, "loss": 2.4092, "lr": 0.0003, "tps": 338306, "tokens": 12247367680, "gpu_gb": 72.2, "elapsed_s": 20983.0}
{"step": 11690, "loss": 2.4123, "lr": 0.0003, "tps": 337954, "tokens": 12257853440, "gpu_gb": 72.2, "elapsed_s": 21014.1}
{"step": 11700, "loss": 2.3873, "lr": 0.0003, "tps": 338011, "tokens": 12268339200, "gpu_gb": 72.2, "elapsed_s": 21045.1}
{"step": 11710, "loss": 2.4722, "lr": 0.0003, "tps": 338240, "tokens": 12278824960, "gpu_gb": 72.2, "elapsed_s": 21076.1}
{"step": 11720, "loss": 2.419, "lr": 0.0003, "tps": 337799, "tokens": 12289310720, "gpu_gb": 72.2, "elapsed_s": 21107.1}
{"step": 11730, "loss": 2.3849, "lr": 0.0003, "tps": 337757, "tokens": 12299796480, "gpu_gb": 72.2, "elapsed_s": 21138.2}
{"step": 11740, "loss": 2.4384, "lr": 0.0003, "tps": 338103, "tokens": 12310282240, "gpu_gb": 72.2, "elapsed_s": 21169.2}
{"step": 11750, "loss": 2.4459, "lr": 0.0003, "tps": 338432, "tokens": 12320768000, "gpu_gb": 72.2, "elapsed_s": 21200.2}
{"step": 11760, "loss": 2.4205, "lr": 0.0003, "tps": 338048, "tokens": 12331253760, "gpu_gb": 72.2, "elapsed_s": 21231.2}
{"step": 11770, "loss": 2.4443, "lr": 0.0003, "tps": 338200, "tokens": 12341739520, "gpu_gb": 72.2, "elapsed_s": 21262.2}
{"step": 11780, "loss": 2.4055, "lr": 0.0003, "tps": 337964, "tokens": 12352225280, "gpu_gb": 72.2, "elapsed_s": 21293.2}
{"step": 11790, "loss": 2.4215, "lr": 0.0003, "tps": 338213, "tokens": 12362711040, "gpu_gb": 72.2, "elapsed_s": 21324.2}
{"step": 11800, "loss": 2.4407, "lr": 0.0003, "tps": 338188, "tokens": 12373196800, "gpu_gb": 72.2, "elapsed_s": 21355.2}
{"step": 11810, "loss": 2.4444, "lr": 0.0003, "tps": 337951, "tokens": 12383682560, "gpu_gb": 72.2, "elapsed_s": 21386.3}
{"step": 11820, "loss": 2.4889, "lr": 0.0003, "tps": 337968, "tokens": 12394168320, "gpu_gb": 72.2, "elapsed_s": 21417.3}
{"step": 11830, "loss": 2.4631, "lr": 0.0003, "tps": 337788, "tokens": 12404654080, "gpu_gb": 72.2, "elapsed_s": 21448.3}
{"step": 11840, "loss": 2.4104, "lr": 0.0003, "tps": 337848, "tokens": 12415139840, "gpu_gb": 72.2, "elapsed_s": 21479.4}
{"step": 11850, "loss": 2.4099, "lr": 0.0003, "tps": 337994, "tokens": 12425625600, "gpu_gb": 72.2, "elapsed_s": 21510.4}
{"step": 11860, "loss": 2.4614, "lr": 0.0003, "tps": 337961, "tokens": 12436111360, "gpu_gb": 72.2, "elapsed_s": 21541.4}
{"step": 11870, "loss": 2.429, "lr": 0.0003, "tps": 337889, "tokens": 12446597120, "gpu_gb": 72.2, "elapsed_s": 21572.4}
{"step": 11880, "loss": 2.4349, "lr": 0.0003, "tps": 337821, "tokens": 12457082880, "gpu_gb": 72.2, "elapsed_s": 21603.5}
{"step": 11890, "loss": 2.4102, "lr": 0.0003, "tps": 338037, "tokens": 12467568640, "gpu_gb": 72.2, "elapsed_s": 21634.5}
{"step": 11900, "loss": 2.4086, "lr": 0.0003, "tps": 338043, "tokens": 12478054400, "gpu_gb": 72.2, "elapsed_s": 21665.5}
{"step": 11910, "loss": 2.4034, "lr": 0.0003, "tps": 338115, "tokens": 12488540160, "gpu_gb": 72.2, "elapsed_s": 21696.5}
{"step": 11920, "loss": 2.408, "lr": 0.0003, "tps": 337712, "tokens": 12499025920, "gpu_gb": 72.2, "elapsed_s": 21727.6}
{"step": 11930, "loss": 2.4368, "lr": 0.0003, "tps": 338066, "tokens": 12509511680, "gpu_gb": 72.2, "elapsed_s": 21758.6}
{"step": 11940, "loss": 2.4403, "lr": 0.0003, "tps": 338144, "tokens": 12519997440, "gpu_gb": 72.2, "elapsed_s": 21789.6}
{"step": 11950, "loss": 2.3924, "lr": 0.0003, "tps": 338055, "tokens": 12530483200, "gpu_gb": 72.2, "elapsed_s": 21820.6}
{"step": 11960, "loss": 2.4108, "lr": 0.0003, "tps": 338014, "tokens": 12540968960, "gpu_gb": 72.2, "elapsed_s": 21851.7}
{"step": 11970, "loss": 2.4229, "lr": 0.0003, "tps": 338168, "tokens": 12551454720, "gpu_gb": 72.2, "elapsed_s": 21882.7}
{"step": 11980, "loss": 2.4066, "lr": 0.0003, "tps": 338352, "tokens": 12561940480, "gpu_gb": 72.2, "elapsed_s": 21913.6}
{"step": 11990, "loss": 2.4215, "lr": 0.0003, "tps": 338086, "tokens": 12572426240, "gpu_gb": 72.2, "elapsed_s": 21944.7}
{"step": 12000, "loss": 2.4133, "lr": 0.0003, "tps": 338007, "tokens": 12582912000, "gpu_gb": 72.2, "elapsed_s": 21975.7}
{"step": 12010, "loss": 2.4033, "lr": 0.0003, "tps": 156937, "tokens": 12593397760, "gpu_gb": 72.2, "elapsed_s": 22042.5}
{"step": 12020, "loss": 2.4341, "lr": 0.0003, "tps": 338132, "tokens": 12603883520, "gpu_gb": 72.2, "elapsed_s": 22073.5}
{"step": 12030, "loss": 2.3699, "lr": 0.0003, "tps": 338125, "tokens": 12614369280, "gpu_gb": 72.2, "elapsed_s": 22104.5}
{"step": 12040, "loss": 2.461, "lr": 0.0003, "tps": 338334, "tokens": 12624855040, "gpu_gb": 72.2, "elapsed_s": 22135.5}
{"step": 12050, "loss": 2.3941, "lr": 0.0003, "tps": 338248, "tokens": 12635340800, "gpu_gb": 72.2, "elapsed_s": 22166.5}
{"step": 12060, "loss": 2.4055, "lr": 0.0003, "tps": 338429, "tokens": 12645826560, "gpu_gb": 72.2, "elapsed_s": 22197.5}
{"step": 12070, "loss": 2.4307, "lr": 0.0003, "tps": 338450, "tokens": 12656312320, "gpu_gb": 72.2, "elapsed_s": 22228.5}
{"step": 12080, "loss": 2.4212, "lr": 0.0003, "tps": 338401, "tokens": 12666798080, "gpu_gb": 72.2, "elapsed_s": 22259.5}
{"step": 12090, "loss": 2.4195, "lr": 0.0003, "tps": 338379, "tokens": 12677283840, "gpu_gb": 72.2, "elapsed_s": 22290.5}
{"step": 12100, "loss": 2.37, "lr": 0.0003, "tps": 338037, "tokens": 12687769600, "gpu_gb": 72.2, "elapsed_s": 22321.5}
{"step": 12110, "loss": 2.4418, "lr": 0.0003, "tps": 338310, "tokens": 12698255360, "gpu_gb": 72.2, "elapsed_s": 22352.5}
{"step": 12120, "loss": 2.3555, "lr": 0.0003, "tps": 338075, "tokens": 12708741120, "gpu_gb": 72.2, "elapsed_s": 22383.5}
{"step": 12130, "loss": 2.3926, "lr": 0.0003, "tps": 338039, "tokens": 12719226880, "gpu_gb": 72.2, "elapsed_s": 22414.5}
{"step": 12140, "loss": 2.4467, "lr": 0.0003, "tps": 338162, "tokens": 12729712640, "gpu_gb": 72.2, "elapsed_s": 22445.5}
{"step": 12150, "loss": 2.3514, "lr": 0.0003, "tps": 338024, "tokens": 12740198400, "gpu_gb": 72.2, "elapsed_s": 22476.5}
{"step": 12160, "loss": 2.4652, "lr": 0.0003, "tps": 338238, "tokens": 12750684160, "gpu_gb": 72.2, "elapsed_s": 22507.5}
{"step": 12170, "loss": 2.3992, "lr": 0.0003, "tps": 337876, "tokens": 12761169920, "gpu_gb": 72.2, "elapsed_s": 22538.6}
{"step": 12180, "loss": 2.3904, "lr": 0.0003, "tps": 338050, "tokens": 12771655680, "gpu_gb": 72.2, "elapsed_s": 22569.6}
{"step": 12190, "loss": 2.384, "lr": 0.0003, "tps": 337945, "tokens": 12782141440, "gpu_gb": 72.2, "elapsed_s": 22600.6}
{"step": 12200, "loss": 2.4099, "lr": 0.0003, "tps": 338211, "tokens": 12792627200, "gpu_gb": 72.2, "elapsed_s": 22631.6}
{"step": 12210, "loss": 2.4155, "lr": 0.0003, "tps": 338173, "tokens": 12803112960, "gpu_gb": 72.2, "elapsed_s": 22662.6}
{"step": 12220, "loss": 2.4885, "lr": 0.0003, "tps": 337941, "tokens": 12813598720, "gpu_gb": 72.2, "elapsed_s": 22693.7}
{"step": 12230, "loss": 2.4481, "lr": 0.0003, "tps": 337968, "tokens": 12824084480, "gpu_gb": 72.2, "elapsed_s": 22724.7}
{"step": 12240, "loss": 2.454, "lr": 0.0003, "tps": 337829, "tokens": 12834570240, "gpu_gb": 72.2, "elapsed_s": 22755.7}
{"step": 12250, "loss": 2.4339, "lr": 0.0003, "tps": 338464, "tokens": 12845056000, "gpu_gb": 72.2, "elapsed_s": 22786.7}
{"step": 12260, "loss": 2.4132, "lr": 0.0003, "tps": 337967, "tokens": 12855541760, "gpu_gb": 72.2, "elapsed_s": 22817.7}
{"step": 12270, "loss": 2.4131, "lr": 0.0003, "tps": 338206, "tokens": 12866027520, "gpu_gb": 72.2, "elapsed_s": 22848.7}
{"step": 12280, "loss": 2.4156, "lr": 0.0003, "tps": 337986, "tokens": 12876513280, "gpu_gb": 72.2, "elapsed_s": 22879.8}
{"step": 12290, "loss": 2.4222, "lr": 0.0003, "tps": 338125, "tokens": 12886999040, "gpu_gb": 72.2, "elapsed_s": 22910.8}
{"step": 12300, "loss": 2.4169, "lr": 0.0003, "tps": 337873, "tokens": 12897484800, "gpu_gb": 72.2, "elapsed_s": 22941.8}
{"step": 12310, "loss": 2.4323, "lr": 0.0003, "tps": 338293, "tokens": 12907970560, "gpu_gb": 72.2, "elapsed_s": 22972.8}
{"step": 12320, "loss": 2.3942, "lr": 0.0003, "tps": 338279, "tokens": 12918456320, "gpu_gb": 72.2, "elapsed_s": 23003.8}
{"step": 12330, "loss": 2.4286, "lr": 0.0003, "tps": 337820, "tokens": 12928942080, "gpu_gb": 72.2, "elapsed_s": 23034.8}
{"step": 12340, "loss": 2.452, "lr": 0.0003, "tps": 338405, "tokens": 12939427840, "gpu_gb": 72.2, "elapsed_s": 23065.8}
{"step": 12350, "loss": 2.4251, "lr": 0.0003, "tps": 337922, "tokens": 12949913600, "gpu_gb": 72.2, "elapsed_s": 23096.9}
{"step": 12360, "loss": 2.4528, "lr": 0.0003, "tps": 338012, "tokens": 12960399360, "gpu_gb": 72.2, "elapsed_s": 23127.9}
{"step": 12370, "loss": 2.3964, "lr": 0.0003, "tps": 338008, "tokens": 12970885120, "gpu_gb": 72.2, "elapsed_s": 23158.9}
{"step": 12380, "loss": 2.4145, "lr": 0.0003, "tps": 338044, "tokens": 12981370880, "gpu_gb": 72.2, "elapsed_s": 23189.9}
{"step": 12390, "loss": 2.407, "lr": 0.0003, "tps": 338364, "tokens": 12991856640, "gpu_gb": 72.2, "elapsed_s": 23220.9}
{"step": 12400, "loss": 2.3821, "lr": 0.0003, "tps": 338351, "tokens": 13002342400, "gpu_gb": 72.2, "elapsed_s": 23251.9}
{"step": 12410, "loss": 2.4085, "lr": 0.0003, "tps": 338291, "tokens": 13012828160, "gpu_gb": 72.2, "elapsed_s": 23282.9}
{"step": 12420, "loss": 2.4209, "lr": 0.0003, "tps": 338052, "tokens": 13023313920, "gpu_gb": 72.2, "elapsed_s": 23313.9}
{"step": 12430, "loss": 2.4295, "lr": 0.0003, "tps": 338175, "tokens": 13033799680, "gpu_gb": 72.2, "elapsed_s": 23344.9}
{"step": 12440, "loss": 2.415, "lr": 0.0003, "tps": 338179, "tokens": 13044285440, "gpu_gb": 72.2, "elapsed_s": 23375.9}
{"step": 12450, "loss": 2.425, "lr": 0.0003, "tps": 338125, "tokens": 13054771200, "gpu_gb": 72.2, "elapsed_s": 23406.9}
{"step": 12460, "loss": 2.4106, "lr": 0.0003, "tps": 338165, "tokens": 13065256960, "gpu_gb": 72.2, "elapsed_s": 23438.0}
{"step": 12470, "loss": 2.42, "lr": 0.0003, "tps": 338259, "tokens": 13075742720, "gpu_gb": 72.2, "elapsed_s": 23469.0}
{"step": 12480, "loss": 2.4276, "lr": 0.0003, "tps": 338201, "tokens": 13086228480, "gpu_gb": 72.2, "elapsed_s": 23500.0}
{"step": 12490, "loss": 2.4597, "lr": 0.0003, "tps": 338191, "tokens": 13096714240, "gpu_gb": 72.2, "elapsed_s": 23531.0}
{"step": 12500, "loss": 2.4257, "lr": 0.0003, "tps": 338236, "tokens": 13107200000, "gpu_gb": 72.2, "elapsed_s": 23562.0}
{"step": 12510, "loss": 2.3793, "lr": 0.0003, "tps": 338190, "tokens": 13117685760, "gpu_gb": 72.2, "elapsed_s": 23593.0}
{"step": 12520, "loss": 2.4237, "lr": 0.0003, "tps": 337947, "tokens": 13128171520, "gpu_gb": 72.2, "elapsed_s": 23624.0}
{"step": 12530, "loss": 2.4296, "lr": 0.0003, "tps": 338278, "tokens": 13138657280, "gpu_gb": 72.2, "elapsed_s": 23655.0}
{"step": 12540, "loss": 2.4346, "lr": 0.0003, "tps": 338177, "tokens": 13149143040, "gpu_gb": 72.2, "elapsed_s": 23686.0}
{"step": 12550, "loss": 2.4271, "lr": 0.0003, "tps": 338347, "tokens": 13159628800, "gpu_gb": 72.2, "elapsed_s": 23717.0}
{"step": 12560, "loss": 2.4642, "lr": 0.0003, "tps": 338168, "tokens": 13170114560, "gpu_gb": 72.2, "elapsed_s": 23748.0}
{"step": 12570, "loss": 2.4167, "lr": 0.0003, "tps": 338357, "tokens": 13180600320, "gpu_gb": 72.2, "elapsed_s": 23779.0}
{"step": 12580, "loss": 2.3853, "lr": 0.0003, "tps": 338418, "tokens": 13191086080, "gpu_gb": 72.2, "elapsed_s": 23810.0}
{"step": 12590, "loss": 2.4249, "lr": 0.0003, "tps": 338295, "tokens": 13201571840, "gpu_gb": 72.2, "elapsed_s": 23841.0}
{"step": 12600, "loss": 2.4512, "lr": 0.0003, "tps": 338324, "tokens": 13212057600, "gpu_gb": 72.2, "elapsed_s": 23872.0}
{"step": 12610, "loss": 2.3968, "lr": 0.0003, "tps": 338388, "tokens": 13222543360, "gpu_gb": 72.2, "elapsed_s": 23903.0}
{"step": 12620, "loss": 2.3893, "lr": 0.0003, "tps": 338339, "tokens": 13233029120, "gpu_gb": 72.2, "elapsed_s": 23933.9}
{"step": 12630, "loss": 2.4139, "lr": 0.0003, "tps": 338518, "tokens": 13243514880, "gpu_gb": 72.2, "elapsed_s": 23964.9}
{"step": 12640, "loss": 2.4195, "lr": 0.0003, "tps": 338010, "tokens": 13254000640, "gpu_gb": 72.2, "elapsed_s": 23995.9}
{"step": 12650, "loss": 2.4358, "lr": 0.0003, "tps": 338134, "tokens": 13264486400, "gpu_gb": 72.2, "elapsed_s": 24027.0}
{"step": 12660, "loss": 2.4118, "lr": 0.0003, "tps": 338312, "tokens": 13274972160, "gpu_gb": 72.2, "elapsed_s": 24058.0}
{"step": 12670, "loss": 2.4135, "lr": 0.0003, "tps": 338289, "tokens": 13285457920, "gpu_gb": 72.2, "elapsed_s": 24088.9}
{"step": 12680, "loss": 2.3585, "lr": 0.0003, "tps": 338294, "tokens": 13295943680, "gpu_gb": 72.2, "elapsed_s": 24119.9}
{"step": 12690, "loss": 2.3967, "lr": 0.0003, "tps": 338518, "tokens": 13306429440, "gpu_gb": 72.2, "elapsed_s": 24150.9}
{"step": 12700, "loss": 2.4567, "lr": 0.0003, "tps": 338078, "tokens": 13316915200, "gpu_gb": 72.2, "elapsed_s": 24181.9}
{"step": 12710, "loss": 2.4135, "lr": 0.0003, "tps": 338185, "tokens": 13327400960, "gpu_gb": 72.2, "elapsed_s": 24212.9}
{"step": 12720, "loss": 2.3909, "lr": 0.0003, "tps": 338079, "tokens": 13337886720, "gpu_gb": 72.2, "elapsed_s": 24244.0}
{"step": 12730, "loss": 2.4075, "lr": 0.0003, "tps": 338036, "tokens": 13348372480, "gpu_gb": 72.2, "elapsed_s": 24275.0}
{"step": 12740, "loss": 2.4011, "lr": 0.0003, "tps": 338124, "tokens": 13358858240, "gpu_gb": 72.2, "elapsed_s": 24306.0}
{"step": 12750, "loss": 2.4144, "lr": 0.0003, "tps": 338129, "tokens": 13369344000, "gpu_gb": 72.2, "elapsed_s": 24337.0}
{"step": 12760, "loss": 2.3764, "lr": 0.0003, "tps": 338115, "tokens": 13379829760, "gpu_gb": 72.2, "elapsed_s": 24368.0}
{"step": 12770, "loss": 2.4147, "lr": 0.0003, "tps": 338400, "tokens": 13390315520, "gpu_gb": 72.2, "elapsed_s": 24399.0}
{"step": 12780, "loss": 2.4087, "lr": 0.0003, "tps": 338190, "tokens": 13400801280, "gpu_gb": 72.2, "elapsed_s": 24430.0}
{"step": 12790, "loss": 2.3817, "lr": 0.0003, "tps": 338245, "tokens": 13411287040, "gpu_gb": 72.2, "elapsed_s": 24461.0}
{"step": 12800, "loss": 2.4006, "lr": 0.0003, "tps": 338263, "tokens": 13421772800, "gpu_gb": 72.2, "elapsed_s": 24492.0}
{"step": 12810, "loss": 2.4211, "lr": 0.0003, "tps": 338154, "tokens": 13432258560, "gpu_gb": 72.2, "elapsed_s": 24523.0}
{"step": 12820, "loss": 2.4022, "lr": 0.0003, "tps": 338332, "tokens": 13442744320, "gpu_gb": 72.2, "elapsed_s": 24554.0}
{"step": 12830, "loss": 2.4214, "lr": 0.0003, "tps": 338151, "tokens": 13453230080, "gpu_gb": 72.2, "elapsed_s": 24585.0}
{"step": 12840, "loss": 2.4112, "lr": 0.0003, "tps": 337722, "tokens": 13463715840, "gpu_gb": 72.2, "elapsed_s": 24616.1}
{"step": 12850, "loss": 2.43, "lr": 0.0003, "tps": 338247, "tokens": 13474201600, "gpu_gb": 72.2, "elapsed_s": 24647.1}
{"step": 12860, "loss": 2.3573, "lr": 0.0003, "tps": 337891, "tokens": 13484687360, "gpu_gb": 72.2, "elapsed_s": 24678.1}
{"step": 12870, "loss": 2.4385, "lr": 0.0003, "tps": 338245, "tokens": 13495173120, "gpu_gb": 72.2, "elapsed_s": 24709.1}
{"step": 12880, "loss": 2.3931, "lr": 0.0003, "tps": 338374, "tokens": 13505658880, "gpu_gb": 72.2, "elapsed_s": 24740.1}
{"step": 12890, "loss": 2.3799, "lr": 0.0003, "tps": 338193, "tokens": 13516144640, "gpu_gb": 72.2, "elapsed_s": 24771.1}
{"step": 12900, "loss": 2.3917, "lr": 0.0003, "tps": 338194, "tokens": 13526630400, "gpu_gb": 72.2, "elapsed_s": 24802.1}
{"step": 12910, "loss": 2.3932, "lr": 0.0003, "tps": 338248, "tokens": 13537116160, "gpu_gb": 72.2, "elapsed_s": 24833.1}
{"step": 12920, "loss": 2.3842, "lr": 0.0003, "tps": 338462, "tokens": 13547601920, "gpu_gb": 72.2, "elapsed_s": 24864.1}
{"step": 12930, "loss": 2.3987, "lr": 0.0003, "tps": 338038, "tokens": 13558087680, "gpu_gb": 72.2, "elapsed_s": 24895.1}
{"step": 12940, "loss": 2.3813, "lr": 0.0003, "tps": 338225, "tokens": 13568573440, "gpu_gb": 72.2, "elapsed_s": 24926.1}
{"step": 12950, "loss": 2.3857, "lr": 0.0003, "tps": 338171, "tokens": 13579059200, "gpu_gb": 72.2, "elapsed_s": 24957.1}
{"step": 12960, "loss": 2.4188, "lr": 0.0003, "tps": 338234, "tokens": 13589544960, "gpu_gb": 72.2, "elapsed_s": 24988.1}
{"step": 12970, "loss": 2.4205, "lr": 0.0003, "tps": 338154, "tokens": 13600030720, "gpu_gb": 72.2, "elapsed_s": 25019.1}
{"step": 12980, "loss": 2.4327, "lr": 0.0003, "tps": 338189, "tokens": 13610516480, "gpu_gb": 72.2, "elapsed_s": 25050.1}
{"step": 12990, "loss": 2.3797, "lr": 0.0003, "tps": 338034, "tokens": 13621002240, "gpu_gb": 72.2, "elapsed_s": 25081.1}
{"step": 13000, "loss": 2.4328, "lr": 0.0003, "tps": 338260, "tokens": 13631488000, "gpu_gb": 72.2, "elapsed_s": 25112.1}
{"step": 13010, "loss": 2.4326, "lr": 0.0003, "tps": 152817, "tokens": 13641973760, "gpu_gb": 72.2, "elapsed_s": 25180.8}
{"step": 13020, "loss": 2.4198, "lr": 0.0003, "tps": 338162, "tokens": 13652459520, "gpu_gb": 72.2, "elapsed_s": 25211.8}
{"step": 13030, "loss": 2.4423, "lr": 0.0003, "tps": 337986, "tokens": 13662945280, "gpu_gb": 72.2, "elapsed_s": 25242.8}
{"step": 13040, "loss": 2.4235, "lr": 0.0003, "tps": 338164, "tokens": 13673431040, "gpu_gb": 72.2, "elapsed_s": 25273.8}
{"step": 13050, "loss": 2.3832, "lr": 0.0003, "tps": 338604, "tokens": 13683916800, "gpu_gb": 72.2, "elapsed_s": 25304.8}
{"step": 13060, "loss": 2.3592, "lr": 0.0003, "tps": 338027, "tokens": 13694402560, "gpu_gb": 72.2, "elapsed_s": 25335.8}
{"step": 13070, "loss": 2.4007, "lr": 0.0003, "tps": 338066, "tokens": 13704888320, "gpu_gb": 72.2, "elapsed_s": 25366.8}
{"step": 13080, "loss": 2.3771, "lr": 0.0003, "tps": 338127, "tokens": 13715374080, "gpu_gb": 72.2, "elapsed_s": 25397.8}
{"step": 13090, "loss": 2.421, "lr": 0.0003, "tps": 338328, "tokens": 13725859840, "gpu_gb": 72.2, "elapsed_s": 25428.8}
{"step": 13100, "loss": 2.3932, "lr": 0.0003, "tps": 338208, "tokens": 13736345600, "gpu_gb": 72.2, "elapsed_s": 25459.8}
{"step": 13110, "loss": 2.4217, "lr": 0.0003, "tps": 337955, "tokens": 13746831360, "gpu_gb": 72.2, "elapsed_s": 25490.8}
{"step": 13120, "loss": 2.3806, "lr": 0.0003, "tps": 337773, "tokens": 13757317120, "gpu_gb": 72.2, "elapsed_s": 25521.9}
{"step": 13130, "loss": 2.3806, "lr": 0.0003, "tps": 338192, "tokens": 13767802880, "gpu_gb": 72.2, "elapsed_s": 25552.9}
{"step": 13140, "loss": 2.3934, "lr": 0.0003, "tps": 338134, "tokens": 13778288640, "gpu_gb": 72.2, "elapsed_s": 25583.9}
{"step": 13150, "loss": 2.3864, "lr": 0.0003, "tps": 338361, "tokens": 13788774400, "gpu_gb": 72.2, "elapsed_s": 25614.9}
{"step": 13160, "loss": 2.3955, "lr": 0.0003, "tps": 338230, "tokens": 13799260160, "gpu_gb": 72.2, "elapsed_s": 25645.9}
{"step": 13170, "loss": 2.4244, "lr": 0.0003, "tps": 338106, "tokens": 13809745920, "gpu_gb": 72.2, "elapsed_s": 25676.9}
{"step": 13180, "loss": 2.3787, "lr": 0.0003, "tps": 338116, "tokens": 13820231680, "gpu_gb": 72.2, "elapsed_s": 25707.9}
{"step": 13190, "loss": 2.3783, "lr": 0.0003, "tps": 338044, "tokens": 13830717440, "gpu_gb": 72.2, "elapsed_s": 25738.9}
{"step": 13200, "loss": 2.4258, "lr": 0.0003, "tps": 337754, "tokens": 13841203200, "gpu_gb": 72.2, "elapsed_s": 25770.0}
{"step": 13210, "loss": 2.3971, "lr": 0.0003, "tps": 338136, "tokens": 13851688960, "gpu_gb": 72.2, "elapsed_s": 25801.0}
{"step": 13220, "loss": 2.38, "lr": 0.0003, "tps": 338174, "tokens": 13862174720, "gpu_gb": 72.2, "elapsed_s": 25832.0}
{"step": 13230, "loss": 2.4075, "lr": 0.0003, "tps": 338018, "tokens": 13872660480, "gpu_gb": 72.2, "elapsed_s": 25863.0}
{"step": 13240, "loss": 2.4228, "lr": 0.0003, "tps": 337599, "tokens": 13883146240, "gpu_gb": 72.2, "elapsed_s": 25894.1}
{"step": 13250, "loss": 2.3623, "lr": 0.0003, "tps": 337831, "tokens": 13893632000, "gpu_gb": 72.2, "elapsed_s": 25925.1}
{"step": 13260, "loss": 2.4038, "lr": 0.0003, "tps": 338101, "tokens": 13904117760, "gpu_gb": 72.2, "elapsed_s": 25956.1}
{"step": 13270, "loss": 2.3973, "lr": 0.0003, "tps": 337993, "tokens": 13914603520, "gpu_gb": 72.2, "elapsed_s": 25987.2}
{"step": 13280, "loss": 2.4176, "lr": 0.0003, "tps": 337683, "tokens": 13925089280, "gpu_gb": 72.2, "elapsed_s": 26018.2}
{"step": 13290, "loss": 2.3924, "lr": 0.0003, "tps": 338196, "tokens": 13935575040, "gpu_gb": 72.2, "elapsed_s": 26049.2}
{"step": 13300, "loss": 2.3903, "lr": 0.0003, "tps": 338337, "tokens": 13946060800, "gpu_gb": 72.2, "elapsed_s": 26080.2}
{"step": 13310, "loss": 2.4369, "lr": 0.0003, "tps": 337695, "tokens": 13956546560, "gpu_gb": 72.2, "elapsed_s": 26111.3}
{"step": 13320, "loss": 2.4065, "lr": 0.0003, "tps": 338101, "tokens": 13967032320, "gpu_gb": 72.2, "elapsed_s": 26142.3}
{"step": 13330, "loss": 2.4406, "lr": 0.0003, "tps": 338123, "tokens": 13977518080, "gpu_gb": 72.2, "elapsed_s": 26173.3}
{"step": 13340, "loss": 2.3938, "lr": 0.0003, "tps": 337782, "tokens": 13988003840, "gpu_gb": 72.2, "elapsed_s": 26204.3}
{"step": 13350, "loss": 2.4082, "lr": 0.0003, "tps": 338078, "tokens": 13998489600, "gpu_gb": 72.2, "elapsed_s": 26235.4}
{"step": 13360, "loss": 2.3989, "lr": 0.0003, "tps": 337909, "tokens": 14008975360, "gpu_gb": 72.2, "elapsed_s": 26266.4}
{"step": 13370, "loss": 2.4038, "lr": 0.0003, "tps": 338254, "tokens": 14019461120, "gpu_gb": 72.2, "elapsed_s": 26297.4}
{"step": 13380, "loss": 2.3918, "lr": 0.0003, "tps": 338009, "tokens": 14029946880, "gpu_gb": 72.2, "elapsed_s": 26328.4}
{"step": 13390, "loss": 2.3797, "lr": 0.0003, "tps": 338055, "tokens": 14040432640, "gpu_gb": 72.2, "elapsed_s": 26359.4}
{"step": 13400, "loss": 2.4646, "lr": 0.0003, "tps": 337921, "tokens": 14050918400, "gpu_gb": 72.2, "elapsed_s": 26390.5}
{"step": 13410, "loss": 2.4075, "lr": 0.0003, "tps": 338269, "tokens": 14061404160, "gpu_gb": 72.2, "elapsed_s": 26421.5}
{"step": 13420, "loss": 2.4089, "lr": 0.0003, "tps": 338023, "tokens": 14071889920, "gpu_gb": 72.2, "elapsed_s": 26452.5}
{"step": 13430, "loss": 2.3808, "lr": 0.0003, "tps": 338131, "tokens": 14082375680, "gpu_gb": 72.2, "elapsed_s": 26483.5}
{"step": 13440, "loss": 2.411, "lr": 0.0003, "tps": 338263, "tokens": 14092861440, "gpu_gb": 72.2, "elapsed_s": 26514.5}
{"step": 13450, "loss": 2.3607, "lr": 0.0003, "tps": 337899, "tokens": 14103347200, "gpu_gb": 72.2, "elapsed_s": 26545.5}
{"step": 13460, "loss": 2.37, "lr": 0.0003, "tps": 338357, "tokens": 14113832960, "gpu_gb": 72.2, "elapsed_s": 26576.5}
{"step": 13470, "loss": 2.4527, "lr": 0.0003, "tps": 337875, "tokens": 14124318720, "gpu_gb": 72.2, "elapsed_s": 26607.5}
{"step": 13480, "loss": 2.3229, "lr": 0.0003, "tps": 338097, "tokens": 14134804480, "gpu_gb": 72.2, "elapsed_s": 26638.6}
{"step": 13490, "loss": 2.45, "lr": 0.0003, "tps": 338125, "tokens": 14145290240, "gpu_gb": 72.2, "elapsed_s": 26669.6}
{"step": 13500, "loss": 2.3902, "lr": 0.0003, "tps": 338394, "tokens": 14155776000, "gpu_gb": 72.2, "elapsed_s": 26700.6}
{"step": 13510, "loss": 2.3976, "lr": 0.0003, "tps": 337901, "tokens": 14166261760, "gpu_gb": 72.2, "elapsed_s": 26731.6}
{"step": 13520, "loss": 2.4106, "lr": 0.0003, "tps": 338135, "tokens": 14176747520, "gpu_gb": 72.2, "elapsed_s": 26762.6}
{"step": 13530, "loss": 2.404, "lr": 0.0003, "tps": 337964, "tokens": 14187233280, "gpu_gb": 72.2, "elapsed_s": 26793.6}
{"step": 13540, "loss": 2.3781, "lr": 0.0003, "tps": 337945, "tokens": 14197719040, "gpu_gb": 72.2, "elapsed_s": 26824.7}
{"step": 13550, "loss": 2.3789, "lr": 0.0003, "tps": 338098, "tokens": 14208204800, "gpu_gb": 72.2, "elapsed_s": 26855.7}
{"step": 13560, "loss": 2.4338, "lr": 0.0003, "tps": 338241, "tokens": 14218690560, "gpu_gb": 72.2, "elapsed_s": 26886.7}
{"step": 13570, "loss": 2.4512, "lr": 0.0003, "tps": 337914, "tokens": 14229176320, "gpu_gb": 72.2, "elapsed_s": 26917.7}
{"step": 13580, "loss": 2.423, "lr": 0.0003, "tps": 337814, "tokens": 14239662080, "gpu_gb": 72.2, "elapsed_s": 26948.7}
{"step": 13590, "loss": 2.4245, "lr": 0.0003, "tps": 337918, "tokens": 14250147840, "gpu_gb": 72.2, "elapsed_s": 26979.8}
{"step": 13600, "loss": 2.4179, "lr": 0.0003, "tps": 337870, "tokens": 14260633600, "gpu_gb": 72.2, "elapsed_s": 27010.8}
{"step": 13610, "loss": 2.3654, "lr": 0.0003, "tps": 337975, "tokens": 14271119360, "gpu_gb": 72.2, "elapsed_s": 27041.8}
{"step": 13620, "loss": 2.3892, "lr": 0.0003, "tps": 337830, "tokens": 14281605120, "gpu_gb": 72.2, "elapsed_s": 27072.9}
{"step": 13630, "loss": 2.4281, "lr": 0.0003, "tps": 338131, "tokens": 14292090880, "gpu_gb": 72.2, "elapsed_s": 27103.9}
{"step": 13640, "loss": 2.4241, "lr": 0.0003, "tps": 338109, "tokens": 14302576640, "gpu_gb": 72.2, "elapsed_s": 27134.9}
{"step": 13650, "loss": 2.3796, "lr": 0.0003, "tps": 338001, "tokens": 14313062400, "gpu_gb": 72.2, "elapsed_s": 27165.9}
{"step": 13660, "loss": 2.3637, "lr": 0.0003, "tps": 337982, "tokens": 14323548160, "gpu_gb": 72.2, "elapsed_s": 27196.9}
{"step": 13670, "loss": 2.4002, "lr": 0.0003, "tps": 338425, "tokens": 14334033920, "gpu_gb": 72.2, "elapsed_s": 27227.9}
{"step": 13680, "loss": 2.404, "lr": 0.0003, "tps": 338111, "tokens": 14344519680, "gpu_gb": 72.2, "elapsed_s": 27258.9}
{"step": 13690, "loss": 2.3976, "lr": 0.0003, "tps": 338186, "tokens": 14355005440, "gpu_gb": 72.2, "elapsed_s": 27289.9}
{"step": 13700, "loss": 2.398, "lr": 0.0003, "tps": 337846, "tokens": 14365491200, "gpu_gb": 72.2, "elapsed_s": 27321.0}
{"step": 13710, "loss": 2.3787, "lr": 0.0003, "tps": 338220, "tokens": 14375976960, "gpu_gb": 72.2, "elapsed_s": 27352.0}
{"step": 13720, "loss": 2.3951, "lr": 0.0003, "tps": 338186, "tokens": 14386462720, "gpu_gb": 72.2, "elapsed_s": 27383.0}
{"step": 13730, "loss": 2.3992, "lr": 0.0003, "tps": 338412, "tokens": 14396948480, "gpu_gb": 72.2, "elapsed_s": 27414.0}
{"step": 13740, "loss": 2.4027, "lr": 0.0003, "tps": 338090, "tokens": 14407434240, "gpu_gb": 72.2, "elapsed_s": 27445.0}
{"step": 13750, "loss": 2.3829, "lr": 0.0003, "tps": 338129, "tokens": 14417920000, "gpu_gb": 72.2, "elapsed_s": 27476.0}
{"step": 13760, "loss": 2.4161, "lr": 0.0003, "tps": 338168, "tokens": 14428405760, "gpu_gb": 72.2, "elapsed_s": 27507.0}
{"step": 13770, "loss": 2.3976, "lr": 0.0003, "tps": 338031, "tokens": 14438891520, "gpu_gb": 72.2, "elapsed_s": 27538.0}
{"step": 13780, "loss": 2.3889, "lr": 0.0003, "tps": 337780, "tokens": 14449377280, "gpu_gb": 72.2, "elapsed_s": 27569.1}
{"step": 13790, "loss": 2.3918, "lr": 0.0003, "tps": 337973, "tokens": 14459863040, "gpu_gb": 72.2, "elapsed_s": 27600.1}
{"step": 13800, "loss": 2.3704, "lr": 0.0003, "tps": 337891, "tokens": 14470348800, "gpu_gb": 72.2, "elapsed_s": 27631.1}
{"step": 13810, "loss": 2.3646, "lr": 0.0003, "tps": 338352, "tokens": 14480834560, "gpu_gb": 72.2, "elapsed_s": 27662.1}
{"step": 13820, "loss": 2.4193, "lr": 0.0003, "tps": 338153, "tokens": 14491320320, "gpu_gb": 72.2, "elapsed_s": 27693.1}
{"step": 13830, "loss": 2.3944, "lr": 0.0003, "tps": 337928, "tokens": 14501806080, "gpu_gb": 72.2, "elapsed_s": 27724.2}
{"step": 13840, "loss": 2.3892, "lr": 0.0003, "tps": 338377, "tokens": 14512291840, "gpu_gb": 72.2, "elapsed_s": 27755.2}
{"step": 13850, "loss": 2.3803, "lr": 0.0003, "tps": 337991, "tokens": 14522777600, "gpu_gb": 72.2, "elapsed_s": 27786.2}
{"step": 13860, "loss": 2.3906, "lr": 0.0003, "tps": 338170, "tokens": 14533263360, "gpu_gb": 72.2, "elapsed_s": 27817.2}
{"step": 13870, "loss": 2.4041, "lr": 0.0003, "tps": 337948, "tokens": 14543749120, "gpu_gb": 72.2, "elapsed_s": 27848.2}
{"step": 13880, "loss": 2.3714, "lr": 0.0003, "tps": 338151, "tokens": 14554234880, "gpu_gb": 72.2, "elapsed_s": 27879.2}
{"step": 13890, "loss": 2.3631, "lr": 0.0003, "tps": 337921, "tokens": 14564720640, "gpu_gb": 72.2, "elapsed_s": 27910.3}
{"step": 13900, "loss": 2.4128, "lr": 0.0003, "tps": 337688, "tokens": 14575206400, "gpu_gb": 72.2, "elapsed_s": 27941.3}
{"step": 13910, "loss": 2.4023, "lr": 0.0003, "tps": 338126, "tokens": 14585692160, "gpu_gb": 72.2, "elapsed_s": 27972.3}
{"step": 13920, "loss": 2.3972, "lr": 0.0003, "tps": 337869, "tokens": 14596177920, "gpu_gb": 72.2, "elapsed_s": 28003.4}
{"step": 13930, "loss": 2.3717, "lr": 0.0003, "tps": 337904, "tokens": 14606663680, "gpu_gb": 72.2, "elapsed_s": 28034.4}
{"step": 13940, "loss": 2.3699, "lr": 0.0003, "tps": 338286, "tokens": 14617149440, "gpu_gb": 72.2, "elapsed_s": 28065.4}
{"step": 13950, "loss": 2.3978, "lr": 0.0003, "tps": 337914, "tokens": 14627635200, "gpu_gb": 72.2, "elapsed_s": 28096.4}
{"step": 13960, "loss": 2.3763, "lr": 0.0003, "tps": 337989, "tokens": 14638120960, "gpu_gb": 72.2, "elapsed_s": 28127.4}
{"step": 13970, "loss": 2.3928, "lr": 0.0003, "tps": 338350, "tokens": 14648606720, "gpu_gb": 72.2, "elapsed_s": 28158.4}
{"step": 13980, "loss": 2.4489, "lr": 0.0003, "tps": 338017, "tokens": 14659092480, "gpu_gb": 72.2, "elapsed_s": 28189.5}
{"step": 13990, "loss": 2.43, "lr": 0.0003, "tps": 338245, "tokens": 14669578240, "gpu_gb": 72.2, "elapsed_s": 28220.5}
{"step": 14000, "loss": 2.4122, "lr": 0.0003, "tps": 338049, "tokens": 14680064000, "gpu_gb": 72.2, "elapsed_s": 28251.5}
{"step": 14010, "loss": 2.3695, "lr": 0.0003, "tps": 153562, "tokens": 14690549760, "gpu_gb": 72.2, "elapsed_s": 28319.8}
{"step": 14020, "loss": 2.4103, "lr": 0.0003, "tps": 338214, "tokens": 14701035520, "gpu_gb": 72.2, "elapsed_s": 28350.8}
{"step": 14030, "loss": 2.4145, "lr": 0.0003, "tps": 338370, "tokens": 14711521280, "gpu_gb": 72.2, "elapsed_s": 28381.7}
{"step": 14040, "loss": 2.3512, "lr": 0.0003, "tps": 338344, "tokens": 14722007040, "gpu_gb": 72.2, "elapsed_s": 28412.7}
{"step": 14050, "loss": 2.3445, "lr": 0.0003, "tps": 338112, "tokens": 14732492800, "gpu_gb": 72.2, "elapsed_s": 28443.8}
{"step": 14060, "loss": 2.399, "lr": 0.0003, "tps": 338230, "tokens": 14742978560, "gpu_gb": 72.2, "elapsed_s": 28474.8}
{"step": 14070, "loss": 2.4091, "lr": 0.0003, "tps": 338185, "tokens": 14753464320, "gpu_gb": 72.2, "elapsed_s": 28505.8}
{"step": 14080, "loss": 2.4174, "lr": 0.0003, "tps": 338231, "tokens": 14763950080, "gpu_gb": 72.2, "elapsed_s": 28536.8}
{"step": 14090, "loss": 2.3629, "lr": 0.0003, "tps": 338229, "tokens": 14774435840, "gpu_gb": 72.2, "elapsed_s": 28567.8}
{"step": 14100, "loss": 2.39, "lr": 0.0003, "tps": 338068, "tokens": 14784921600, "gpu_gb": 72.2, "elapsed_s": 28598.8}
{"step": 14110, "loss": 2.4044, "lr": 0.0003, "tps": 338051, "tokens": 14795407360, "gpu_gb": 72.2, "elapsed_s": 28629.8}
{"step": 14120, "loss": 2.4507, "lr": 0.0003, "tps": 338351, "tokens": 14805893120, "gpu_gb": 72.2, "elapsed_s": 28660.8}
{"step": 14130, "loss": 2.3797, "lr": 0.0003, "tps": 338052, "tokens": 14816378880, "gpu_gb": 72.2, "elapsed_s": 28691.8}
{"step": 14140, "loss": 2.4007, "lr": 0.0003, "tps": 338115, "tokens": 14826864640, "gpu_gb": 72.2, "elapsed_s": 28722.8}
{"step": 14150, "loss": 2.4137, "lr": 0.0003, "tps": 338110, "tokens": 14837350400, "gpu_gb": 72.2, "elapsed_s": 28753.8}
{"step": 14160, "loss": 2.3779, "lr": 0.0003, "tps": 338127, "tokens": 14847836160, "gpu_gb": 72.2, "elapsed_s": 28784.8}
{"step": 14170, "loss": 2.4555, "lr": 0.0003, "tps": 337868, "tokens": 14858321920, "gpu_gb": 72.2, "elapsed_s": 28815.9}
{"step": 14180, "loss": 2.3582, "lr": 0.0003, "tps": 338381, "tokens": 14868807680, "gpu_gb": 72.2, "elapsed_s": 28846.9}
{"step": 14190, "loss": 2.4094, "lr": 0.0003, "tps": 338267, "tokens": 14879293440, "gpu_gb": 72.2, "elapsed_s": 28877.9}
{"step": 14200, "loss": 2.3892, "lr": 0.0003, "tps": 338378, "tokens": 14889779200, "gpu_gb": 72.2, "elapsed_s": 28908.9}
{"step": 14210, "loss": 2.4276, "lr": 0.0003, "tps": 338141, "tokens": 14900264960, "gpu_gb": 72.2, "elapsed_s": 28939.9}
{"step": 14220, "loss": 2.3901, "lr": 0.0003, "tps": 338372, "tokens": 14910750720, "gpu_gb": 72.2, "elapsed_s": 28970.9}
{"step": 14230, "loss": 2.3898, "lr": 0.0003, "tps": 338304, "tokens": 14921236480, "gpu_gb": 72.2, "elapsed_s": 29001.9}
{"step": 14240, "loss": 2.4074, "lr": 0.0003, "tps": 338230, "tokens": 14931722240, "gpu_gb": 72.2, "elapsed_s": 29032.9}
{"step": 14250, "loss": 2.4087, "lr": 0.0003, "tps": 338209, "tokens": 14942208000, "gpu_gb": 72.2, "elapsed_s": 29063.9}
{"step": 14260, "loss": 2.394, "lr": 0.0003, "tps": 338171, "tokens": 14952693760, "gpu_gb": 72.2, "elapsed_s": 29094.9}
{"step": 14270, "loss": 2.4084, "lr": 0.0003, "tps": 338134, "tokens": 14963179520, "gpu_gb": 72.2, "elapsed_s": 29125.9}
{"step": 14280, "loss": 2.4463, "lr": 0.0003, "tps": 338358, "tokens": 14973665280, "gpu_gb": 72.2, "elapsed_s": 29156.9}
{"step": 14290, "loss": 2.4421, "lr": 0.0003, "tps": 338132, "tokens": 14984151040, "gpu_gb": 72.2, "elapsed_s": 29187.9}
{"step": 14300, "loss": 2.375, "lr": 0.0003, "tps": 338202, "tokens": 14994636800, "gpu_gb": 72.2, "elapsed_s": 29218.9}
{"step": 14310, "loss": 2.3911, "lr": 0.0003, "tps": 338279, "tokens": 15005122560, "gpu_gb": 72.2, "elapsed_s": 29249.9}
{"step": 14320, "loss": 2.4072, "lr": 0.0003, "tps": 338073, "tokens": 15015608320, "gpu_gb": 72.2, "elapsed_s": 29280.9}
{"step": 14330, "loss": 2.4003, "lr": 0.0003, "tps": 338479, "tokens": 15026094080, "gpu_gb": 72.2, "elapsed_s": 29311.9}
{"step": 14340, "loss": 2.3968, "lr": 0.0003, "tps": 338292, "tokens": 15036579840, "gpu_gb": 72.2, "elapsed_s": 29342.9}
{"step": 14350, "loss": 2.4047, "lr": 0.0003, "tps": 338297, "tokens": 15047065600, "gpu_gb": 72.2, "elapsed_s": 29373.9}
{"step": 14360, "loss": 2.4261, "lr": 0.0003, "tps": 338223, "tokens": 15057551360, "gpu_gb": 72.2, "elapsed_s": 29404.9}
{"step": 14370, "loss": 2.4297, "lr": 0.0003, "tps": 338142, "tokens": 15068037120, "gpu_gb": 72.2, "elapsed_s": 29435.9}
{"step": 14380, "loss": 2.3585, "lr": 0.0003, "tps": 338182, "tokens": 15078522880, "gpu_gb": 72.2, "elapsed_s": 29466.9}
{"step": 14390, "loss": 2.391, "lr": 0.0003, "tps": 338049, "tokens": 15089008640, "gpu_gb": 72.2, "elapsed_s": 29497.9}
{"step": 14400, "loss": 2.3808, "lr": 0.0003, "tps": 338296, "tokens": 15099494400, "gpu_gb": 72.2, "elapsed_s": 29528.9}
{"step": 14410, "loss": 2.3911, "lr": 0.0003, "tps": 338196, "tokens": 15109980160, "gpu_gb": 72.2, "elapsed_s": 29559.9}
{"step": 14420, "loss": 2.3824, "lr": 0.0003, "tps": 338215, "tokens": 15120465920, "gpu_gb": 72.2, "elapsed_s": 29590.9}
{"step": 14430, "loss": 2.4139, "lr": 0.0003, "tps": 338406, "tokens": 15130951680, "gpu_gb": 72.2, "elapsed_s": 29621.9}
{"step": 14440, "loss": 2.4192, "lr": 0.0003, "tps": 338228, "tokens": 15141437440, "gpu_gb": 72.2, "elapsed_s": 29652.9}
{"step": 14450, "loss": 2.3894, "lr": 0.0003, "tps": 338293, "tokens": 15151923200, "gpu_gb": 72.2, "elapsed_s": 29683.9}
{"step": 14460, "loss": 2.4162, "lr": 0.0003, "tps": 338696, "tokens": 15162408960, "gpu_gb": 72.2, "elapsed_s": 29714.9}
{"step": 14470, "loss": 2.3388, "lr": 0.0003, "tps": 338078, "tokens": 15172894720, "gpu_gb": 72.2, "elapsed_s": 29745.9}
{"step": 14480, "loss": 2.3905, "lr": 0.0003, "tps": 338482, "tokens": 15183380480, "gpu_gb": 72.2, "elapsed_s": 29776.9}
{"step": 14490, "loss": 2.3525, "lr": 0.0003, "tps": 338371, "tokens": 15193866240, "gpu_gb": 72.2, "elapsed_s": 29807.8}
{"step": 14500, "loss": 2.3804, "lr": 0.0003, "tps": 338402, "tokens": 15204352000, "gpu_gb": 72.2, "elapsed_s": 29838.8}
{"step": 14510, "loss": 2.3678, "lr": 0.0003, "tps": 338447, "tokens": 15214837760, "gpu_gb": 72.2, "elapsed_s": 29869.8}
{"step": 14520, "loss": 2.389, "lr": 0.0003, "tps": 338288, "tokens": 15225323520, "gpu_gb": 72.2, "elapsed_s": 29900.8}
{"step": 14530, "loss": 2.3549, "lr": 0.0003, "tps": 338401, "tokens": 15235809280, "gpu_gb": 72.2, "elapsed_s": 29931.8}
{"step": 14540, "loss": 2.4077, "lr": 0.0003, "tps": 338054, "tokens": 15246295040, "gpu_gb": 72.2, "elapsed_s": 29962.8}
{"step": 14550, "loss": 2.4214, "lr": 0.0003, "tps": 338424, "tokens": 15256780800, "gpu_gb": 72.2, "elapsed_s": 29993.8}
{"step": 14560, "loss": 2.3536, "lr": 0.0003, "tps": 338162, "tokens": 15267266560, "gpu_gb": 72.2, "elapsed_s": 30024.8}
{"step": 14570, "loss": 2.3846, "lr": 0.0003, "tps": 338023, "tokens": 15277752320, "gpu_gb": 72.2, "elapsed_s": 30055.8}
{"step": 14580, "loss": 2.4028, "lr": 0.0003, "tps": 338161, "tokens": 15288238080, "gpu_gb": 72.2, "elapsed_s": 30086.8}
{"step": 14590, "loss": 2.3499, "lr": 0.0003, "tps": 338088, "tokens": 15298723840, "gpu_gb": 72.2, "elapsed_s": 30117.8}
{"step": 14600, "loss": 2.3747, "lr": 0.0003, "tps": 338469, "tokens": 15309209600, "gpu_gb": 72.2, "elapsed_s": 30148.8}
{"step": 14610, "loss": 2.4255, "lr": 0.0003, "tps": 338038, "tokens": 15319695360, "gpu_gb": 72.2, "elapsed_s": 30179.8}
{"step": 14620, "loss": 2.3705, "lr": 0.0003, "tps": 338219, "tokens": 15330181120, "gpu_gb": 72.2, "elapsed_s": 30210.9}
{"step": 14630, "loss": 2.3779, "lr": 0.0003, "tps": 338305, "tokens": 15340666880, "gpu_gb": 72.2, "elapsed_s": 30241.8}
{"step": 14640, "loss": 2.4046, "lr": 0.0003, "tps": 338222, "tokens": 15351152640, "gpu_gb": 72.2, "elapsed_s": 30272.9}
{"step": 14650, "loss": 2.3663, "lr": 0.0003, "tps": 338442, "tokens": 15361638400, "gpu_gb": 72.2, "elapsed_s": 30303.8}
{"step": 14660, "loss": 2.3759, "lr": 0.0003, "tps": 337848, "tokens": 15372124160, "gpu_gb": 72.2, "elapsed_s": 30334.9}
{"step": 14670, "loss": 2.3842, "lr": 0.0003, "tps": 338223, "tokens": 15382609920, "gpu_gb": 72.2, "elapsed_s": 30365.9}
{"step": 14680, "loss": 2.4338, "lr": 0.0003, "tps": 338026, "tokens": 15393095680, "gpu_gb": 72.2, "elapsed_s": 30396.9}
{"step": 14690, "loss": 2.3679, "lr": 0.0003, "tps": 337935, "tokens": 15403581440, "gpu_gb": 72.2, "elapsed_s": 30427.9}
{"step": 14700, "loss": 2.3933, "lr": 0.0003, "tps": 338155, "tokens": 15414067200, "gpu_gb": 72.2, "elapsed_s": 30458.9}
{"step": 14710, "loss": 2.4196, "lr": 0.0003, "tps": 338196, "tokens": 15424552960, "gpu_gb": 72.2, "elapsed_s": 30489.9}
{"step": 14720, "loss": 2.4037, "lr": 0.0003, "tps": 338044, "tokens": 15435038720, "gpu_gb": 72.2, "elapsed_s": 30521.0}
{"step": 14730, "loss": 2.3757, "lr": 0.0003, "tps": 338344, "tokens": 15445524480, "gpu_gb": 72.2, "elapsed_s": 30551.9}
{"step": 14740, "loss": 2.3866, "lr": 0.0003, "tps": 338201, "tokens": 15456010240, "gpu_gb": 72.2, "elapsed_s": 30583.0}
{"step": 14750, "loss": 2.3928, "lr": 0.0003, "tps": 338046, "tokens": 15466496000, "gpu_gb": 72.2, "elapsed_s": 30614.0}
{"step": 14760, "loss": 2.4228, "lr": 0.0003, "tps": 338353, "tokens": 15476981760, "gpu_gb": 72.2, "elapsed_s": 30645.0}
{"step": 14770, "loss": 2.4102, "lr": 0.0003, "tps": 338194, "tokens": 15487467520, "gpu_gb": 72.2, "elapsed_s": 30676.0}
{"step": 14780, "loss": 2.3913, "lr": 0.0003, "tps": 338162, "tokens": 15497953280, "gpu_gb": 72.2, "elapsed_s": 30707.0}
{"step": 14790, "loss": 2.3528, "lr": 0.0003, "tps": 338142, "tokens": 15508439040, "gpu_gb": 72.2, "elapsed_s": 30738.0}
{"step": 14800, "loss": 2.4074, "lr": 0.0003, "tps": 337870, "tokens": 15518924800, "gpu_gb": 72.2, "elapsed_s": 30769.0}
{"step": 14810, "loss": 2.4074, "lr": 0.0003, "tps": 338263, "tokens": 15529410560, "gpu_gb": 72.2, "elapsed_s": 30800.0}
{"step": 14820, "loss": 2.3887, "lr": 0.0003, "tps": 338224, "tokens": 15539896320, "gpu_gb": 72.2, "elapsed_s": 30831.0}
{"step": 14830, "loss": 2.4014, "lr": 0.0003, "tps": 338295, "tokens": 15550382080, "gpu_gb": 72.2, "elapsed_s": 30862.0}
{"step": 14840, "loss": 2.3552, "lr": 0.0003, "tps": 338135, "tokens": 15560867840, "gpu_gb": 72.2, "elapsed_s": 30893.0}
{"step": 14850, "loss": 2.3711, "lr": 0.0003, "tps": 338004, "tokens": 15571353600, "gpu_gb": 72.2, "elapsed_s": 30924.1}
{"step": 14860, "loss": 2.4149, "lr": 0.0003, "tps": 337944, "tokens": 15581839360, "gpu_gb": 72.2, "elapsed_s": 30955.1}
{"step": 14870, "loss": 2.4043, "lr": 0.0003, "tps": 338287, "tokens": 15592325120, "gpu_gb": 72.2, "elapsed_s": 30986.1}
{"step": 14880, "loss": 2.3495, "lr": 0.0003, "tps": 338347, "tokens": 15602810880, "gpu_gb": 72.2, "elapsed_s": 31017.1}
{"step": 14890, "loss": 2.382, "lr": 0.0003, "tps": 338144, "tokens": 15613296640, "gpu_gb": 72.2, "elapsed_s": 31048.1}
{"step": 14900, "loss": 2.3896, "lr": 0.0003, "tps": 338296, "tokens": 15623782400, "gpu_gb": 72.2, "elapsed_s": 31079.1}
{"step": 14910, "loss": 2.3954, "lr": 0.0003, "tps": 338317, "tokens": 15634268160, "gpu_gb": 72.2, "elapsed_s": 31110.1}
{"step": 14920, "loss": 2.3438, "lr": 0.0003, "tps": 338135, "tokens": 15644753920, "gpu_gb": 72.2, "elapsed_s": 31141.1}
{"step": 14930, "loss": 2.3825, "lr": 0.0003, "tps": 338145, "tokens": 15655239680, "gpu_gb": 72.2, "elapsed_s": 31172.1}
{"step": 14940, "loss": 2.4063, "lr": 0.0003, "tps": 337959, "tokens": 15665725440, "gpu_gb": 72.2, "elapsed_s": 31203.1}
{"step": 14950, "loss": 2.4002, "lr": 0.0003, "tps": 338324, "tokens": 15676211200, "gpu_gb": 72.2, "elapsed_s": 31234.1}
{"step": 14960, "loss": 2.3927, "lr": 0.0003, "tps": 338265, "tokens": 15686696960, "gpu_gb": 72.2, "elapsed_s": 31265.1}
{"step": 14970, "loss": 2.3464, "lr": 0.0003, "tps": 338348, "tokens": 15697182720, "gpu_gb": 72.2, "elapsed_s": 31296.1}
{"step": 14980, "loss": 2.4162, "lr": 0.0003, "tps": 338158, "tokens": 15707668480, "gpu_gb": 72.2, "elapsed_s": 31327.1}
{"step": 14990, "loss": 2.3715, "lr": 0.0003, "tps": 338368, "tokens": 15718154240, "gpu_gb": 72.2, "elapsed_s": 31358.1}
{"step": 15000, "loss": 2.3489, "lr": 0.0003, "tps": 338056, "tokens": 15728640000, "gpu_gb": 72.2, "elapsed_s": 31389.1}
{"step": 15010, "loss": 2.3684, "lr": 0.0003, "tps": 146418, "tokens": 15739125760, "gpu_gb": 72.2, "elapsed_s": 31460.7}
{"step": 15020, "loss": 2.4, "lr": 0.0003, "tps": 337835, "tokens": 15749611520, "gpu_gb": 72.2, "elapsed_s": 31491.8}
{"step": 15030, "loss": 2.3992, "lr": 0.0003, "tps": 337813, "tokens": 15760097280, "gpu_gb": 72.2, "elapsed_s": 31522.8}
{"step": 15040, "loss": 2.4008, "lr": 0.0003, "tps": 338134, "tokens": 15770583040, "gpu_gb": 72.2, "elapsed_s": 31553.8}
{"step": 15050, "loss": 2.4145, "lr": 0.0003, "tps": 337960, "tokens": 15781068800, "gpu_gb": 72.2, "elapsed_s": 31584.9}
{"step": 15060, "loss": 2.4084, "lr": 0.0003, "tps": 338058, "tokens": 15791554560, "gpu_gb": 72.2, "elapsed_s": 31615.9}
{"step": 15070, "loss": 2.384, "lr": 0.0003, "tps": 338134, "tokens": 15802040320, "gpu_gb": 72.2, "elapsed_s": 31646.9}
{"step": 15080, "loss": 2.3773, "lr": 0.0003, "tps": 338222, "tokens": 15812526080, "gpu_gb": 72.2, "elapsed_s": 31677.9}
{"step": 15090, "loss": 2.3893, "lr": 0.0003, "tps": 338098, "tokens": 15823011840, "gpu_gb": 72.2, "elapsed_s": 31708.9}
{"step": 15100, "loss": 2.3916, "lr": 0.0003, "tps": 338000, "tokens": 15833497600, "gpu_gb": 72.2, "elapsed_s": 31739.9}
{"step": 15110, "loss": 2.3774, "lr": 0.0003, "tps": 338101, "tokens": 15843983360, "gpu_gb": 72.2, "elapsed_s": 31770.9}
{"step": 15120, "loss": 2.3797, "lr": 0.0003, "tps": 338254, "tokens": 15854469120, "gpu_gb": 72.2, "elapsed_s": 31801.9}
{"step": 15130, "loss": 2.2797, "lr": 0.0003, "tps": 338050, "tokens": 15864954880, "gpu_gb": 72.2, "elapsed_s": 31833.0}
{"step": 15140, "loss": 2.3899, "lr": 0.0003, "tps": 338002, "tokens": 15875440640, "gpu_gb": 72.2, "elapsed_s": 31864.0}
{"step": 15150, "loss": 2.3663, "lr": 0.0003, "tps": 337988, "tokens": 15885926400, "gpu_gb": 72.2, "elapsed_s": 31895.0}
{"step": 15160, "loss": 2.3788, "lr": 0.0003, "tps": 338111, "tokens": 15896412160, "gpu_gb": 72.2, "elapsed_s": 31926.0}
{"step": 15170, "loss": 2.3953, "lr": 0.0003, "tps": 338012, "tokens": 15906897920, "gpu_gb": 72.2, "elapsed_s": 31957.0}
{"step": 15180, "loss": 2.3439, "lr": 0.0003, "tps": 338353, "tokens": 15917383680, "gpu_gb": 72.2, "elapsed_s": 31988.0}
{"step": 15190, "loss": 2.4668, "lr": 0.0003, "tps": 338230, "tokens": 15927869440, "gpu_gb": 72.2, "elapsed_s": 32019.0}
{"step": 15200, "loss": 2.3954, "lr": 0.0003, "tps": 338317, "tokens": 15938355200, "gpu_gb": 72.2, "elapsed_s": 32050.0}
{"step": 15210, "loss": 2.352, "lr": 0.0003, "tps": 337933, "tokens": 15948840960, "gpu_gb": 72.2, "elapsed_s": 32081.1}
{"step": 15220, "loss": 2.3244, "lr": 0.0003, "tps": 338000, "tokens": 15959326720, "gpu_gb": 72.2, "elapsed_s": 32112.1}
{"step": 15230, "loss": 2.4005, "lr": 0.0003, "tps": 338157, "tokens": 15969812480, "gpu_gb": 72.2, "elapsed_s": 32143.1}
{"step": 15240, "loss": 2.3725, "lr": 0.0003, "tps": 337947, "tokens": 15980298240, "gpu_gb": 72.2, "elapsed_s": 32174.1}
{"step": 15250, "loss": 2.393, "lr": 0.0003, "tps": 338028, "tokens": 15990784000, "gpu_gb": 72.2, "elapsed_s": 32205.1}
{"step": 15260, "loss": 2.3885, "lr": 0.0002999999542264696, "tps": 338349, "tokens": 16001269760, "gpu_gb": 72.2, "elapsed_s": 32236.1}
{"step": 15270, "loss": 2.3922, "lr": 0.000299994461440389, "tps": 337980, "tokens": 16011755520, "gpu_gb": 72.2, "elapsed_s": 32267.1}
{"step": 15280, "loss": 2.4054, "lr": 0.0002999798143750414, "tps": 337828, "tokens": 16022241280, "gpu_gb": 72.2, "elapsed_s": 32298.2}
{"step": 15290, "loss": 2.3809, "lr": 0.00029995601402367744, "tps": 338019, "tokens": 16032727040, "gpu_gb": 72.2, "elapsed_s": 32329.2}
{"step": 15300, "loss": 2.3951, "lr": 0.0002999230620002527, "tps": 338139, "tokens": 16043212800, "gpu_gb": 72.2, "elapsed_s": 32360.2}
{"step": 15310, "loss": 2.3756, "lr": 0.00029988096053931837, "tps": 338068, "tokens": 16053698560, "gpu_gb": 72.2, "elapsed_s": 32391.2}
{"step": 15320, "loss": 2.3595, "lr": 0.0002998297124958694, "tps": 338274, "tokens": 16064184320, "gpu_gb": 72.2, "elapsed_s": 32422.2}
{"step": 15330, "loss": 2.4314, "lr": 0.0002997693213451516, "tps": 337943, "tokens": 16074670080, "gpu_gb": 72.2, "elapsed_s": 32453.3}
{"step": 15340, "loss": 2.4171, "lr": 0.000299699791182425, "tps": 337932, "tokens": 16085155840, "gpu_gb": 72.2, "elapsed_s": 32484.3}
{"step": 15350, "loss": 2.3992, "lr": 0.00029962112672268716, "tps": 338071, "tokens": 16095641600, "gpu_gb": 72.2, "elapsed_s": 32515.3}
{"step": 15360, "loss": 2.3269, "lr": 0.00029953333330035264, "tps": 338005, "tokens": 16106127360, "gpu_gb": 72.2, "elapsed_s": 32546.3}
{"step": 15370, "loss": 2.38, "lr": 0.00029943641686889166, "tps": 338132, "tokens": 16116613120, "gpu_gb": 72.2, "elapsed_s": 32577.3}
{"step": 15380, "loss": 2.4132, "lr": 0.0002993303840004263, "tps": 338098, "tokens": 16127098880, "gpu_gb": 72.2, "elapsed_s": 32608.4}
{"step": 15390, "loss": 2.3876, "lr": 0.00029921524188528483, "tps": 338213, "tokens": 16137584640, "gpu_gb": 72.2, "elapsed_s": 32639.4}
{"step": 15400, "loss": 2.3593, "lr": 0.000299090998331514, "tps": 338224, "tokens": 16148070400, "gpu_gb": 72.2, "elapsed_s": 32670.4}
{"step": 15410, "loss": 2.3769, "lr": 0.00029895766176434984, "tps": 338195, "tokens": 16158556160, "gpu_gb": 72.2, "elapsed_s": 32701.4}
{"step": 15420, "loss": 2.3525, "lr": 0.000298815241225646, "tps": 338395, "tokens": 16169041920, "gpu_gb": 72.2, "elapsed_s": 32732.4}
{"step": 15430, "loss": 2.4009, "lr": 0.00029866374637326084, "tps": 337950, "tokens": 16179527680, "gpu_gb": 72.2, "elapsed_s": 32763.4}
{"step": 15440, "loss": 2.377, "lr": 0.00029850318748040244, "tps": 338094, "tokens": 16190013440, "gpu_gb": 72.2, "elapsed_s": 32794.4}
{"step": 15450, "loss": 2.3718, "lr": 0.0002983335754349318, "tps": 337899, "tokens": 16200499200, "gpu_gb": 72.2, "elapsed_s": 32825.4}
{"step": 15460, "loss": 2.3704, "lr": 0.0002981549217386249, "tps": 337912, "tokens": 16210984960, "gpu_gb": 72.2, "elapsed_s": 32856.5}
{"step": 15470, "loss": 2.3574, "lr": 0.0002979672385063921, "tps": 337998, "tokens": 16221470720, "gpu_gb": 72.2, "elapsed_s": 32887.5}
{"step": 15480, "loss": 2.3565, "lr": 0.00029777053846545727, "tps": 337894, "tokens": 16231956480, "gpu_gb": 72.2, "elapsed_s": 32918.5}
{"step": 15490, "loss": 2.4019, "lr": 0.00029756483495449443, "tps": 337886, "tokens": 16242442240, "gpu_gb": 72.2, "elapsed_s": 32949.6}
{"step": 15500, "loss": 2.3773, "lr": 0.00029735014192272313, "tps": 337924, "tokens": 16252928000, "gpu_gb": 72.2, "elapsed_s": 32980.6}
{"step": 15510, "loss": 2.4053, "lr": 0.00029712647392896263, "tps": 337719, "tokens": 16263413760, "gpu_gb": 72.2, "elapsed_s": 33011.6}
{"step": 15520, "loss": 2.3695, "lr": 0.00029689384614064464, "tps": 337707, "tokens": 16273899520, "gpu_gb": 72.2, "elapsed_s": 33042.7}
{"step": 15530, "loss": 2.3626, "lr": 0.00029665227433278485, "tps": 337954, "tokens": 16284385280, "gpu_gb": 72.2, "elapsed_s": 33073.7}
{"step": 15540, "loss": 2.3229, "lr": 0.00029640177488691305, "tps": 337711, "tokens": 16294871040, "gpu_gb": 72.2, "elapsed_s": 33104.8}
{"step": 15550, "loss": 2.3786, "lr": 0.00029614236478996234, "tps": 338067, "tokens": 16305356800, "gpu_gb": 72.2, "elapsed_s": 33135.8}
{"step": 15560, "loss": 2.3977, "lr": 0.00029587406163311713, "tps": 338134, "tokens": 16315842560, "gpu_gb": 72.2, "elapsed_s": 33166.8}
{"step": 15570, "loss": 2.352, "lr": 0.00029559688361062035, "tps": 337989, "tokens": 16326328320, "gpu_gb": 72.2, "elapsed_s": 33197.8}
{"step": 15580, "loss": 2.3803, "lr": 0.0002953108495185397, "tps": 338013, "tokens": 16336814080, "gpu_gb": 72.2, "elapsed_s": 33228.8}
{"step": 15590, "loss": 2.3889, "lr": 0.00029501597875349273, "tps": 338164, "tokens": 16347299840, "gpu_gb": 72.2, "elapsed_s": 33259.8}
{"step": 15600, "loss": 2.3763, "lr": 0.000294712291311332, "tps": 338036, "tokens": 16357785600, "gpu_gb": 72.2, "elapsed_s": 33290.9}
{"step": 15610, "loss": 2.3901, "lr": 0.0002943998077857887, "tps": 337911, "tokens": 16368271360, "gpu_gb": 72.2, "elapsed_s": 33321.9}
{"step": 15620, "loss": 2.387, "lr": 0.0002940785493670763, "tps": 338342, "tokens": 16378757120, "gpu_gb": 72.2, "elapsed_s": 33352.9}
{"step": 15630, "loss": 2.3615, "lr": 0.00029374853784045363, "tps": 338077, "tokens": 16389242880, "gpu_gb": 72.2, "elapsed_s": 33383.9}
{"step": 15640, "loss": 2.4258, "lr": 0.00029340979558474756, "tps": 337994, "tokens": 16399728640, "gpu_gb": 72.2, "elapsed_s": 33414.9}
{"step": 15650, "loss": 2.3581, "lr": 0.0002930623455708353, "tps": 338190, "tokens": 16410214400, "gpu_gb": 72.2, "elapsed_s": 33445.9}
{"step": 15660, "loss": 2.3563, "lr": 0.000292706211360087, "tps": 337524, "tokens": 16420700160, "gpu_gb": 72.2, "elapsed_s": 33477.0}
{"step": 15670, "loss": 2.3395, "lr": 0.0002923414171027676, "tps": 338087, "tokens": 16431185920, "gpu_gb": 72.2, "elapsed_s": 33508.0}
{"step": 15680, "loss": 2.3985, "lr": 0.00029196798753639946, "tps": 337936, "tokens": 16441671680, "gpu_gb": 72.2, "elapsed_s": 33539.0}
{"step": 15690, "loss": 2.3836, "lr": 0.0002915859479840847, "tps": 338086, "tokens": 16452157440, "gpu_gb": 72.2, "elapsed_s": 33570.1}
{"step": 15700, "loss": 2.3493, "lr": 0.0002911953243527881, "tps": 337999, "tokens": 16462643200, "gpu_gb": 72.2, "elapsed_s": 33601.1}
{"step": 15710, "loss": 2.361, "lr": 0.0002907961431315803, "tps": 337857, "tokens": 16473128960, "gpu_gb": 72.2, "elapsed_s": 33632.1}
{"step": 15720, "loss": 2.386, "lr": 0.00029038843138984115, "tps": 338050, "tokens": 16483614720, "gpu_gb": 72.2, "elapsed_s": 33663.1}
{"step": 15730, "loss": 2.4312, "lr": 0.00028997221677542454, "tps": 338010, "tokens": 16494100480, "gpu_gb": 72.2, "elapsed_s": 33694.2}
{"step": 15740, "loss": 2.3646, "lr": 0.0002895475275127834, "tps": 337810, "tokens": 16504586240, "gpu_gb": 72.2, "elapsed_s": 33725.2}
{"step": 15750, "loss": 2.3371, "lr": 0.0002891143924010557, "tps": 337946, "tokens": 16515072000, "gpu_gb": 72.2, "elapsed_s": 33756.2}
{"step": 15760, "loss": 2.3931, "lr": 0.0002886728408121113, "tps": 338022, "tokens": 16525557760, "gpu_gb": 72.2, "elapsed_s": 33787.2}
{"step": 15770, "loss": 2.3622, "lr": 0.00028822290268856065, "tps": 337855, "tokens": 16536043520, "gpu_gb": 72.2, "elapsed_s": 33818.3}
{"step": 15780, "loss": 2.4067, "lr": 0.0002877646085417239, "tps": 337789, "tokens": 16546529280, "gpu_gb": 72.2, "elapsed_s": 33849.3}
{"step": 15790, "loss": 2.38, "lr": 0.00028729798944956194, "tps": 337941, "tokens": 16557015040, "gpu_gb": 72.2, "elapsed_s": 33880.4}
{"step": 15800, "loss": 2.3748, "lr": 0.00028682307705456923, "tps": 338208, "tokens": 16567500800, "gpu_gb": 72.2, "elapsed_s": 33911.4}
{"step": 15810, "loss": 2.4189, "lr": 0.0002863399035616277, "tps": 337995, "tokens": 16577986560, "gpu_gb": 72.2, "elapsed_s": 33942.4}
{"step": 15820, "loss": 2.3859, "lr": 0.0002858485017358229, "tps": 338020, "tokens": 16588472320, "gpu_gb": 72.2, "elapsed_s": 33973.4}
{"step": 15830, "loss": 2.3711, "lr": 0.00028534890490022235, "tps": 337895, "tokens": 16598958080, "gpu_gb": 72.2, "elapsed_s": 34004.4}
{"step": 15840, "loss": 2.3871, "lr": 0.0002848411469336156, "tps": 338186, "tokens": 16609443840, "gpu_gb": 72.2, "elapsed_s": 34035.4}
{"step": 15850, "loss": 2.3635, "lr": 0.00028432526226821695, "tps": 337717, "tokens": 16619929600, "gpu_gb": 72.2, "elapsed_s": 34066.5}
{"step": 15860, "loss": 2.3014, "lr": 0.00028380128588733055, "tps": 338294, "tokens": 16630415360, "gpu_gb": 72.2, "elapsed_s": 34097.5}
{"step": 15870, "loss": 2.3887, "lr": 0.00028326925332297794, "tps": 337964, "tokens": 16640901120, "gpu_gb": 72.2, "elapsed_s": 34128.5}
{"step": 15880, "loss": 2.3692, "lr": 0.00028272920065348873, "tps": 338208, "tokens": 16651386880, "gpu_gb": 72.2, "elapsed_s": 34159.5}
{"step": 15890, "loss": 2.3494, "lr": 0.000282181164501054, "tps": 338316, "tokens": 16661872640, "gpu_gb": 72.2, "elapsed_s": 34190.5}
{"step": 15900, "loss": 2.3825, "lr": 0.00028162518202924273, "tps": 338137, "tokens": 16672358400, "gpu_gb": 72.2, "elapsed_s": 34221.5}
{"step": 15910, "loss": 2.4257, "lr": 0.00028106129094048187, "tps": 337807, "tokens": 16682844160, "gpu_gb": 72.2, "elapsed_s": 34252.6}
{"step": 15920, "loss": 2.4275, "lr": 0.00028048952947349947, "tps": 338253, "tokens": 16693329920, "gpu_gb": 72.2, "elapsed_s": 34283.6}
{"step": 15930, "loss": 2.3814, "lr": 0.0002799099364007317, "tps": 337864, "tokens": 16703815680, "gpu_gb": 72.2, "elapsed_s": 34314.6}
{"step": 15940, "loss": 2.3911, "lr": 0.0002793225510256937, "tps": 337942, "tokens": 16714301440, "gpu_gb": 72.2, "elapsed_s": 34345.6}
{"step": 15950, "loss": 2.4027, "lr": 0.00027872741318031406, "tps": 338314, "tokens": 16724787200, "gpu_gb": 72.2, "elapsed_s": 34376.6}
{"step": 15960, "loss": 2.3721, "lr": 0.000278124563222234, "tps": 338119, "tokens": 16735272960, "gpu_gb": 72.2, "elapsed_s": 34407.6}
{"step": 15970, "loss": 2.4187, "lr": 0.0002775140420320706, "tps": 338264, "tokens": 16745758720, "gpu_gb": 72.2, "elapsed_s": 34438.6}
{"step": 15980, "loss": 2.4035, "lr": 0.00027689589101064426, "tps": 338274, "tokens": 16756244480, "gpu_gb": 72.2, "elapsed_s": 34469.6}
{"step": 15990, "loss": 2.3958, "lr": 0.00027627015207617176, "tps": 338174, "tokens": 16766730240, "gpu_gb": 72.2, "elapsed_s": 34500.6}
{"step": 16000, "loss": 2.3925, "lr": 0.0002756368676614231, "tps": 338152, "tokens": 16777216000, "gpu_gb": 72.2, "elapsed_s": 34531.7}
{"step": 16010, "loss": 2.3956, "lr": 0.00027499608071084465, "tps": 163299, "tokens": 16787701760, "gpu_gb": 72.2, "elapsed_s": 34595.9}
{"step": 16020, "loss": 2.4183, "lr": 0.0002743478346776464, "tps": 337914, "tokens": 16798187520, "gpu_gb": 72.2, "elapsed_s": 34626.9}
{"step": 16030, "loss": 2.3929, "lr": 0.00027369217352085575, "tps": 338052, "tokens": 16808673280, "gpu_gb": 72.2, "elapsed_s": 34657.9}
{"step": 16040, "loss": 2.3696, "lr": 0.00027302914170233627, "tps": 338158, "tokens": 16819159040, "gpu_gb": 72.2, "elapsed_s": 34688.9}
{"step": 16050, "loss": 2.3587, "lr": 0.00027235878418377287, "tps": 338424, "tokens": 16829644800, "gpu_gb": 72.2, "elapsed_s": 34719.9}
{"step": 16060, "loss": 2.378, "lr": 0.00027168114642362254, "tps": 338272, "tokens": 16840130560, "gpu_gb": 72.2, "elapsed_s": 34750.9}
{"step": 16070, "loss": 2.3411, "lr": 0.0002709962743740321, "tps": 338458, "tokens": 16850616320, "gpu_gb": 72.2, "elapsed_s": 34781.9}
{"step": 16080, "loss": 2.3604, "lr": 0.0002703042144777217, "tps": 338338, "tokens": 16861102080, "gpu_gb": 72.2, "elapsed_s": 34812.9}
{"step": 16090, "loss": 2.3803, "lr": 0.0002696050136648357, "tps": 338248, "tokens": 16871587840, "gpu_gb": 72.2, "elapsed_s": 34843.9}
{"step": 16100, "loss": 2.3461, "lr": 0.00026889871934976015, "tps": 338002, "tokens": 16882073600, "gpu_gb": 72.2, "elapsed_s": 34874.9}
{"step": 16110, "loss": 2.3919, "lr": 0.0002681853794279075, "tps": 337928, "tokens": 16892559360, "gpu_gb": 72.2, "elapsed_s": 34905.9}
{"step": 16120, "loss": 2.3952, "lr": 0.0002674650422724685, "tps": 338168, "tokens": 16903045120, "gpu_gb": 72.2, "elapsed_s": 34936.9}
{"step": 16130, "loss": 2.3486, "lr": 0.0002667377567311324, "tps": 338085, "tokens": 16913530880, "gpu_gb": 72.2, "elapsed_s": 34968.0}
{"step": 16140, "loss": 2.3609, "lr": 0.0002660035721227742, "tps": 338111, "tokens": 16924016640, "gpu_gb": 72.2, "elapsed_s": 34999.0}
{"step": 16150, "loss": 2.3485, "lr": 0.00026526253823410984, "tps": 338278, "tokens": 16934502400, "gpu_gb": 72.2, "elapsed_s": 35030.0}
{"step": 16160, "loss": 2.307, "lr": 0.0002645147053163208, "tps": 338076, "tokens": 16944988160, "gpu_gb": 72.2, "elapsed_s": 35061.0}
{"step": 16170, "loss": 2.3886, "lr": 0.000263760124081646, "tps": 338312, "tokens": 16955473920, "gpu_gb": 72.2, "elapsed_s": 35092.0}
{"step": 16180, "loss": 2.4065, "lr": 0.00026299884569994277, "tps": 337917, "tokens": 16965959680, "gpu_gb": 72.2, "elapsed_s": 35123.0}
{"step": 16190, "loss": 2.3609, "lr": 0.0002622309217952173, "tps": 338262, "tokens": 16976445440, "gpu_gb": 72.2, "elapsed_s": 35154.0}
{"step": 16200, "loss": 2.3723, "lr": 0.0002614564044421238, "tps": 338344, "tokens": 16986931200, "gpu_gb": 72.2, "elapsed_s": 35185.0}
{"step": 16210, "loss": 2.3279, "lr": 0.00026067534616243274, "tps": 338330, "tokens": 16997416960, "gpu_gb": 72.2, "elapsed_s": 35216.0}
{"step": 16220, "loss": 2.3673, "lr": 0.0002598877999214699, "tps": 337873, "tokens": 17007902720, "gpu_gb": 72.2, "elapsed_s": 35247.0}
{"step": 16230, "loss": 2.3532, "lr": 0.00025909381912452435, "tps": 338516, "tokens": 17018388480, "gpu_gb": 72.2, "elapsed_s": 35278.0}
{"step": 16240, "loss": 2.3681, "lr": 0.0002582934576132267, "tps": 338352, "tokens": 17028874240, "gpu_gb": 72.2, "elapsed_s": 35309.0}
{"step": 16250, "loss": 2.3644, "lr": 0.0002574867696618984, "tps": 338082, "tokens": 17039360000, "gpu_gb": 72.2, "elapsed_s": 35340.0}
{"step": 16260, "loss": 2.3488, "lr": 0.00025667380997387114, "tps": 338241, "tokens": 17049845760, "gpu_gb": 72.2, "elapsed_s": 35371.0}
{"step": 16270, "loss": 2.3778, "lr": 0.0002558546336777771, "tps": 338000, "tokens": 17060331520, "gpu_gb": 72.2, "elapsed_s": 35402.0}
{"step": 16280, "loss": 2.3675, "lr": 0.00025502929632381064, "tps": 338476, "tokens": 17070817280, "gpu_gb": 72.2, "elapsed_s": 35433.0}
{"step": 16290, "loss": 2.3467, "lr": 0.0002541978538799616, "tps": 338183, "tokens": 17081303040, "gpu_gb": 72.2, "elapsed_s": 35464.0}
{"step": 16300, "loss": 2.3926, "lr": 0.00025336036272821953, "tps": 338155, "tokens": 17091788800, "gpu_gb": 72.2, "elapsed_s": 35495.0}
{"step": 16310, "loss": 2.3775, "lr": 0.0002525168796607506, "tps": 338158, "tokens": 17102274560, "gpu_gb": 72.2, "elapsed_s": 35526.0}
{"step": 16320, "loss": 2.3278, "lr": 0.0002516674618760463, "tps": 338229, "tokens": 17112760320, "gpu_gb": 72.2, "elapsed_s": 35557.0}
{"step": 16330, "loss": 2.3895, "lr": 0.00025081216697504484, "tps": 337733, "tokens": 17123246080, "gpu_gb": 72.2, "elapsed_s": 35588.1}
{"step": 16340, "loss": 2.349, "lr": 0.00024995105295722475, "tps": 338145, "tokens": 17133731840, "gpu_gb": 72.2, "elapsed_s": 35619.1}
{"step": 16350, "loss": 2.3492, "lr": 0.00024908417821667205, "tps": 337748, "tokens": 17144217600, "gpu_gb": 72.2, "elapsed_s": 35650.1}
{"step": 16360, "loss": 2.3338, "lr": 0.00024821160153812045, "tps": 338059, "tokens": 17154703360, "gpu_gb": 72.2, "elapsed_s": 35681.2}
{"step": 16370, "loss": 2.3504, "lr": 0.00024733338209296504, "tps": 337990, "tokens": 17165189120, "gpu_gb": 72.2, "elapsed_s": 35712.2}
{"step": 16380, "loss": 2.3434, "lr": 0.0002464495794352494, "tps": 338228, "tokens": 17175674880, "gpu_gb": 72.2, "elapsed_s": 35743.2}
{"step": 16390, "loss": 2.3605, "lr": 0.00024556025349762754, "tps": 338169, "tokens": 17186160640, "gpu_gb": 72.2, "elapsed_s": 35774.2}
{"step": 16400, "loss": 2.322, "lr": 0.00024466546458729954, "tps": 337815, "tokens": 17196646400, "gpu_gb": 72.2, "elapsed_s": 35805.2}
{"step": 16410, "loss": 2.3422, "lr": 0.0002437652733819219, "tps": 338024, "tokens": 17207132160, "gpu_gb": 72.2, "elapsed_s": 35836.3}
{"step": 16420, "loss": 2.3748, "lr": 0.0002428597409254931, "tps": 337945, "tokens": 17217617920, "gpu_gb": 72.2, "elapsed_s": 35867.3}
{"step": 16430, "loss": 2.3564, "lr": 0.00024194892862421382, "tps": 338458, "tokens": 17228103680, "gpu_gb": 72.2, "elapsed_s": 35898.3}
{"step": 16440, "loss": 2.3708, "lr": 0.00024103289824232288, "tps": 338131, "tokens": 17238589440, "gpu_gb": 72.2, "elapsed_s": 35929.3}
{"step": 16450, "loss": 2.46, "lr": 0.00024011171189790908, "tps": 338208, "tokens": 17249075200, "gpu_gb": 72.2, "elapsed_s": 35960.3}
{"step": 16460, "loss": 2.3518, "lr": 0.00023918543205869857, "tps": 338459, "tokens": 17259560960, "gpu_gb": 72.2, "elapsed_s": 35991.3}
{"step": 16470, "loss": 2.3754, "lr": 0.00023825412153781895, "tps": 338046, "tokens": 17270046720, "gpu_gb": 72.2, "elapsed_s": 36022.3}
{"step": 16480, "loss": 2.2953, "lr": 0.00023731784348953965, "tps": 338085, "tokens": 17280532480, "gpu_gb": 72.2, "elapsed_s": 36053.3}
{"step": 16490, "loss": 2.3999, "lr": 0.00023637666140498937, "tps": 337898, "tokens": 17291018240, "gpu_gb": 72.2, "elapsed_s": 36084.3}
{"step": 16500, "loss": 2.3562, "lr": 0.00023543063910785056, "tps": 338302, "tokens": 17301504000, "gpu_gb": 72.2, "elapsed_s": 36115.3}
{"step": 16510, "loss": 2.3847, "lr": 0.00023447984075003153, "tps": 337904, "tokens": 17311989760, "gpu_gb": 72.2, "elapsed_s": 36146.4}
{"step": 16520, "loss": 2.3626, "lr": 0.00023352433080731582, "tps": 338090, "tokens": 17322475520, "gpu_gb": 72.2, "elapsed_s": 36177.4}
{"step": 16530, "loss": 2.406, "lr": 0.00023256417407499044, "tps": 338466, "tokens": 17332961280, "gpu_gb": 72.2, "elapsed_s": 36208.4}
{"step": 16540, "loss": 2.3845, "lr": 0.00023159943566345158, "tps": 338252, "tokens": 17343447040, "gpu_gb": 72.2, "elapsed_s": 36239.4}
{"step": 16550, "loss": 2.3697, "lr": 0.00023063018099378935, "tps": 337997, "tokens": 17353932800, "gpu_gb": 72.2, "elapsed_s": 36270.4}
{"step": 16560, "loss": 2.4066, "lr": 0.0002296564757933517, "tps": 337849, "tokens": 17364418560, "gpu_gb": 72.2, "elapsed_s": 36301.4}
{"step": 16570, "loss": 2.3659, "lr": 0.000228678386091287, "tps": 338165, "tokens": 17374904320, "gpu_gb": 72.2, "elapsed_s": 36332.4}
{"step": 16580, "loss": 2.3589, "lr": 0.0002276959782140666, "tps": 338187, "tokens": 17385390080, "gpu_gb": 72.2, "elapsed_s": 36363.4}
{"step": 16590, "loss": 2.3825, "lr": 0.0002267093187809871, "tps": 338246, "tokens": 17395875840, "gpu_gb": 72.2, "elapsed_s": 36394.4}
{"step": 16600, "loss": 2.3303, "lr": 0.00022571847469965263, "tps": 338388, "tokens": 17406361600, "gpu_gb": 72.2, "elapsed_s": 36425.4}
{"step": 16610, "loss": 2.3536, "lr": 0.0002247235131614379, "tps": 338064, "tokens": 17416847360, "gpu_gb": 72.2, "elapsed_s": 36456.4}
{"step": 16620, "loss": 2.3738, "lr": 0.00022372450163693153, "tps": 337632, "tokens": 17427333120, "gpu_gb": 72.2, "elapsed_s": 36487.5}
{"step": 16630, "loss": 2.348, "lr": 0.00022272150787136082, "tps": 338388, "tokens": 17437818880, "gpu_gb": 72.2, "elapsed_s": 36518.5}
{"step": 16640, "loss": 2.3866, "lr": 0.00022171459987999808, "tps": 337739, "tokens": 17448304640, "gpu_gb": 72.2, "elapsed_s": 36549.5}
{"step": 16650, "loss": 2.3907, "lr": 0.00022070384594354781, "tps": 338086, "tokens": 17458790400, "gpu_gb": 72.2, "elapsed_s": 36580.5}
{"step": 16660, "loss": 2.3422, "lr": 0.000219689314603517, "tps": 337988, "tokens": 17469276160, "gpu_gb": 72.2, "elapsed_s": 36611.6}
{"step": 16670, "loss": 2.3797, "lr": 0.00021867107465756665, "tps": 337901, "tokens": 17479761920, "gpu_gb": 72.2, "elapsed_s": 36642.6}
{"step": 16680, "loss": 2.3993, "lr": 0.0002176491951548468, "tps": 338264, "tokens": 17490247680, "gpu_gb": 72.2, "elapsed_s": 36673.6}
{"step": 16690, "loss": 2.3594, "lr": 0.00021662374539131423, "tps": 337789, "tokens": 17500733440, "gpu_gb": 72.2, "elapsed_s": 36704.6}
{"step": 16700, "loss": 2.3308, "lr": 0.00021559479490503288, "tps": 338096, "tokens": 17511219200, "gpu_gb": 72.2, "elapsed_s": 36735.7}
{"step": 16710, "loss": 2.413, "lr": 0.00021456241347145882, "tps": 338245, "tokens": 17521704960, "gpu_gb": 72.2, "elapsed_s": 36766.7}
{"step": 16720, "loss": 2.3835, "lr": 0.0002135266710987083, "tps": 338437, "tokens": 17532190720, "gpu_gb": 72.2, "elapsed_s": 36797.6}
{"step": 16730, "loss": 2.3935, "lr": 0.00021248763802281047, "tps": 338109, "tokens": 17542676480, "gpu_gb": 72.2, "elapsed_s": 36828.6}
{"step": 16740, "loss": 2.3459, "lr": 0.0002114453847029445, "tps": 338103, "tokens": 17553162240, "gpu_gb": 72.2, "elapsed_s": 36859.7}
{"step": 16750, "loss": 2.3366, "lr": 0.00021039998181666153, "tps": 338250, "tokens": 17563648000, "gpu_gb": 72.2, "elapsed_s": 36890.7}
{"step": 16760, "loss": 2.3104, "lr": 0.00020935150025509194, "tps": 337899, "tokens": 17574133760, "gpu_gb": 72.2, "elapsed_s": 36921.7}
{"step": 16770, "loss": 2.3695, "lr": 0.00020830001111813812, "tps": 338183, "tokens": 17584619520, "gpu_gb": 72.2, "elapsed_s": 36952.7}
{"step": 16780, "loss": 2.3846, "lr": 0.0002072455857096528, "tps": 338296, "tokens": 17595105280, "gpu_gb": 72.2, "elapsed_s": 36983.7}
{"step": 16790, "loss": 2.3578, "lr": 0.00020618829553260395, "tps": 338085, "tokens": 17605591040, "gpu_gb": 72.2, "elapsed_s": 37014.7}
{"step": 16800, "loss": 2.3734, "lr": 0.00020512821228422604, "tps": 337874, "tokens": 17616076800, "gpu_gb": 72.2, "elapsed_s": 37045.7}
{"step": 16810, "loss": 2.3406, "lr": 0.00020406540785115798, "tps": 338317, "tokens": 17626562560, "gpu_gb": 72.2, "elapsed_s": 37076.7}
{"step": 16820, "loss": 2.371, "lr": 0.00020299995430456824, "tps": 337916, "tokens": 17637048320, "gpu_gb": 72.2, "elapsed_s": 37107.8}
{"step": 16830, "loss": 2.3275, "lr": 0.0002019319238952678, "tps": 338069, "tokens": 17647534080, "gpu_gb": 72.2, "elapsed_s": 37138.8}
{"step": 16840, "loss": 2.3691, "lr": 0.0002008613890488106, "tps": 338257, "tokens": 17658019840, "gpu_gb": 72.2, "elapsed_s": 37169.8}
{"step": 16850, "loss": 2.3566, "lr": 0.00019978842236058182, "tps": 338250, "tokens": 17668505600, "gpu_gb": 72.2, "elapsed_s": 37200.8}
{"step": 16860, "loss": 2.3755, "lr": 0.0001987130965908756, "tps": 338262, "tokens": 17678991360, "gpu_gb": 72.2, "elapsed_s": 37231.8}
{"step": 16870, "loss": 2.373, "lr": 0.0001976354846599606, "tps": 338050, "tokens": 17689477120, "gpu_gb": 72.2, "elapsed_s": 37262.8}
{"step": 16880, "loss": 2.3759, "lr": 0.00019655565964313518, "tps": 337969, "tokens": 17699962880, "gpu_gb": 72.2, "elapsed_s": 37293.8}
{"step": 16890, "loss": 2.3278, "lr": 0.00019547369476577225, "tps": 338039, "tokens": 17710448640, "gpu_gb": 72.2, "elapsed_s": 37324.9}
{"step": 16900, "loss": 2.3418, "lr": 0.00019438966339835337, "tps": 337899, "tokens": 17720934400, "gpu_gb": 72.2, "elapsed_s": 37355.9}
{"step": 16910, "loss": 2.3677, "lr": 0.00019330363905149337, "tps": 338038, "tokens": 17731420160, "gpu_gb": 72.2, "elapsed_s": 37386.9}
{"step": 16920, "loss": 2.3789, "lr": 0.00019221569537095567, "tps": 338230, "tokens": 17741905920, "gpu_gb": 72.2, "elapsed_s": 37417.9}
{"step": 16930, "loss": 2.379, "lr": 0.00019112590613265792, "tps": 337991, "tokens": 17752391680, "gpu_gb": 72.2, "elapsed_s": 37448.9}
{"step": 16940, "loss": 2.3843, "lr": 0.0001900343452376694, "tps": 338133, "tokens": 17762877440, "gpu_gb": 72.2, "elapsed_s": 37479.9}
{"step": 16950, "loss": 2.3537, "lr": 0.00018894108670719915, "tps": 338149, "tokens": 17773363200, "gpu_gb": 72.2, "elapsed_s": 37511.0}
{"step": 16960, "loss": 2.3526, "lr": 0.0001878462046775769, "tps": 338032, "tokens": 17783848960, "gpu_gb": 72.2, "elapsed_s": 37542.0}
{"step": 16970, "loss": 2.3781, "lr": 0.00018674977339522552, "tps": 338016, "tokens": 17794334720, "gpu_gb": 72.2, "elapsed_s": 37573.0}
{"step": 16980, "loss": 2.3725, "lr": 0.0001856518672116261, "tps": 338211, "tokens": 17804820480, "gpu_gb": 72.2, "elapsed_s": 37604.0}
{"step": 16990, "loss": 2.3606, "lr": 0.00018455256057827628, "tps": 338163, "tokens": 17815306240, "gpu_gb": 72.2, "elapsed_s": 37635.0}
{"step": 17000, "loss": 2.3006, "lr": 0.00018345192804164115, "tps": 337929, "tokens": 17825792000, "gpu_gb": 72.2, "elapsed_s": 37666.0}
{"step": 17010, "loss": 2.3225, "lr": 0.00018235004423809844, "tps": 155786, "tokens": 17836277760, "gpu_gb": 72.2, "elapsed_s": 37733.3}
{"step": 17020, "loss": 2.3811, "lr": 0.00018124698388887711, "tps": 337998, "tokens": 17846763520, "gpu_gb": 72.2, "elapsed_s": 37764.4}
{"step": 17030, "loss": 2.3371, "lr": 0.00018014282179499024, "tps": 338442, "tokens": 17857249280, "gpu_gb": 72.2, "elapsed_s": 37795.4}
{"step": 17040, "loss": 2.3968, "lr": 0.0001790376328321628, "tps": 338457, "tokens": 17867735040, "gpu_gb": 72.2, "elapsed_s": 37826.3}
{"step": 17050, "loss": 2.3421, "lr": 0.0001779314919457541, "tps": 338216, "tokens": 17878220800, "gpu_gb": 72.2, "elapsed_s": 37857.3}
{"step": 17060, "loss": 2.3866, "lr": 0.00017682447414567537, "tps": 338265, "tokens": 17888706560, "gpu_gb": 72.2, "elapsed_s": 37888.3}
{"step": 17070, "loss": 2.3224, "lr": 0.00017571665450130352, "tps": 338050, "tokens": 17899192320, "gpu_gb": 72.2, "elapsed_s": 37919.4}
{"step": 17080, "loss": 2.3705, "lr": 0.00017460810813639017, "tps": 338562, "tokens": 17909678080, "gpu_gb": 72.2, "elapsed_s": 37950.3}
{"step": 17090, "loss": 2.4074, "lr": 0.0001734989102239677, "tps": 338185, "tokens": 17920163840, "gpu_gb": 72.2, "elapsed_s": 37981.3}
{"step": 17100, "loss": 2.3211, "lr": 0.00017238913598125109, "tps": 338363, "tokens": 17930649600, "gpu_gb": 72.2, "elapsed_s": 38012.3}
{"step": 17110, "loss": 2.3347, "lr": 0.0001712788606645379, "tps": 338532, "tokens": 17941135360, "gpu_gb": 72.2, "elapsed_s": 38043.3}
{"step": 17120, "loss": 2.3783, "lr": 0.00017016815956410452, "tps": 338232, "tokens": 17951621120, "gpu_gb": 72.2, "elapsed_s": 38074.3}
{"step": 17130, "loss": 2.3856, "lr": 0.00016905710799910067, "tps": 338083, "tokens": 17962106880, "gpu_gb": 72.2, "elapsed_s": 38105.3}
{"step": 17140, "loss": 2.3599, "lr": 0.00016794578131244185, "tps": 338454, "tokens": 17972592640, "gpu_gb": 72.2, "elapsed_s": 38136.3}
{"step": 17150, "loss": 2.3432, "lr": 0.00016683425486570025, "tps": 338117, "tokens": 17983078400, "gpu_gb": 72.2, "elapsed_s": 38167.3}
{"step": 17160, "loss": 2.3542, "lr": 0.0001657226040339942, "tps": 338226, "tokens": 17993564160, "gpu_gb": 72.2, "elapsed_s": 38198.3}
{"step": 17170, "loss": 2.3437, "lr": 0.0001646109042008769, "tps": 338279, "tokens": 18004049920, "gpu_gb": 72.2, "elapsed_s": 38229.3}
{"step": 17180, "loss": 2.3643, "lr": 0.00016349923075322428, "tps": 338175, "tokens": 18014535680, "gpu_gb": 72.2, "elapsed_s": 38260.3}
{"step": 17190, "loss": 2.3957, "lr": 0.00016238765907612332, "tps": 338263, "tokens": 18025021440, "gpu_gb": 72.2, "elapsed_s": 38291.3}
{"step": 17200, "loss": 2.344, "lr": 0.00016127626454775937, "tps": 338256, "tokens": 18035507200, "gpu_gb": 72.2, "elapsed_s": 38322.3}
{"step": 17210, "loss": 2.3758, "lr": 0.0001601651225343052, "tps": 338222, "tokens": 18045992960, "gpu_gb": 72.2, "elapsed_s": 38353.3}
{"step": 17220, "loss": 2.335, "lr": 0.00015905430838480982, "tps": 338205, "tokens": 18056478720, "gpu_gb": 72.2, "elapsed_s": 38384.3}
{"step": 17230, "loss": 2.3816, "lr": 0.00015794389742608908, "tps": 338235, "tokens": 18066964480, "gpu_gb": 72.2, "elapsed_s": 38415.3}
{"step": 17240, "loss": 2.3241, "lr": 0.00015683396495761758, "tps": 338355, "tokens": 18077450240, "gpu_gb": 72.2, "elapsed_s": 38446.3}
{"step": 17250, "loss": 2.2859, "lr": 0.00015572458624642233, "tps": 338167, "tokens": 18087936000, "gpu_gb": 72.2, "elapsed_s": 38477.3}
{"step": 17260, "loss": 2.3559, "lr": 0.00015461583652197895, "tps": 338061, "tokens": 18098421760, "gpu_gb": 72.2, "elapsed_s": 38508.3}
{"step": 17270, "loss": 2.3586, "lr": 0.0001535077909711099, "tps": 338142, "tokens": 18108907520, "gpu_gb": 72.2, "elapsed_s": 38539.3}
{"step": 17280, "loss": 2.3344, "lr": 0.00015240052473288612, "tps": 338331, "tokens": 18119393280, "gpu_gb": 72.2, "elapsed_s": 38570.3}
{"step": 17290, "loss": 2.3072, "lr": 0.00015129411289353163, "tps": 338497, "tokens": 18129879040, "gpu_gb": 72.2, "elapsed_s": 38601.3}
{"step": 17300, "loss": 2.3804, "lr": 0.00015018863048133166, "tps": 338276, "tokens": 18140364800, "gpu_gb": 72.2, "elapsed_s": 38632.3}
{"step": 17310, "loss": 2.3718, "lr": 0.00014908415246154495, "tps": 338380, "tokens": 18150850560, "gpu_gb": 72.2, "elapsed_s": 38663.3}
{"step": 17320, "loss": 2.3124, "lr": 0.00014798075373132004, "tps": 338530, "tokens": 18161336320, "gpu_gb": 72.2, "elapsed_s": 38694.3}
{"step": 17330, "loss": 2.2931, "lr": 0.00014687850911461654, "tps": 338394, "tokens": 18171822080, "gpu_gb": 72.2, "elapsed_s": 38725.3}
{"step": 17340, "loss": 2.3605, "lr": 0.00014577749335713077, "tps": 338371, "tokens": 18182307840, "gpu_gb": 72.2, "elapsed_s": 38756.3}
{"step": 17350, "loss": 2.3185, "lr": 0.00014467778112122766, "tps": 338436, "tokens": 18192793600, "gpu_gb": 72.2, "elapsed_s": 38787.2}
{"step": 17360, "loss": 2.3745, "lr": 0.0001435794469808772, "tps": 338452, "tokens": 18203279360, "gpu_gb": 72.2, "elapsed_s": 38818.2}
{"step": 17370, "loss": 2.3844, "lr": 0.00014248256541659753, "tps": 338413, "tokens": 18213765120, "gpu_gb": 72.2, "elapsed_s": 38849.2}
{"step": 17380, "loss": 2.3673, "lr": 0.00014138721081040465, "tps": 338191, "tokens": 18224250880, "gpu_gb": 72.2, "elapsed_s": 38880.2}
{"step": 17390, "loss": 2.325, "lr": 0.0001402934574407676, "tps": 338593, "tokens": 18234736640, "gpu_gb": 72.2, "elapsed_s": 38911.2}
{"step": 17400, "loss": 2.3323, "lr": 0.0001392013794775725, "tps": 338024, "tokens": 18245222400, "gpu_gb": 72.2, "elapsed_s": 38942.2}
{"step": 17410, "loss": 2.3124, "lr": 0.0001381110509770919, "tps": 338800, "tokens": 18255708160, "gpu_gb": 72.2, "elapsed_s": 38973.2}
{"step": 17420, "loss": 2.3431, "lr": 0.00013702254587696358, "tps": 338440, "tokens": 18266193920, "gpu_gb": 72.2, "elapsed_s": 39004.1}
{"step": 17430, "loss": 2.3636, "lr": 0.0001359359379911765, "tps": 338217, "tokens": 18276679680, "gpu_gb": 72.2, "elapsed_s": 39035.1}
{"step": 17440, "loss": 2.345, "lr": 0.00013485130100506494, "tps": 338292, "tokens": 18287165440, "gpu_gb": 72.2, "elapsed_s": 39066.1}
{"step": 17450, "loss": 2.3565, "lr": 0.0001337687084703123, "tps": 338212, "tokens": 18297651200, "gpu_gb": 72.2, "elapsed_s": 39097.1}
{"step": 17460, "loss": 2.3727, "lr": 0.00013268823379996295, "tps": 338132, "tokens": 18308136960, "gpu_gb": 72.2, "elapsed_s": 39128.2}
{"step": 17470, "loss": 2.3304, "lr": 0.00013160995026344422, "tps": 338193, "tokens": 18318622720, "gpu_gb": 72.2, "elapsed_s": 39159.2}
{"step": 17480, "loss": 2.3428, "lr": 0.00013053393098159756, "tps": 338425, "tokens": 18329108480, "gpu_gb": 72.2, "elapsed_s": 39190.1}
{"step": 17490, "loss": 2.3746, "lr": 0.00012946024892172034, "tps": 338200, "tokens": 18339594240, "gpu_gb": 72.2, "elapsed_s": 39221.1}
{"step": 17500, "loss": 2.3852, "lr": 0.0001283889768926175, "tps": 338393, "tokens": 18350080000, "gpu_gb": 72.2, "elapsed_s": 39252.1}
{"step": 17510, "loss": 2.3546, "lr": 0.00012732018753966446, "tps": 338302, "tokens": 18360565760, "gpu_gb": 72.2, "elapsed_s": 39283.1}
{"step": 17520, "loss": 2.3672, "lr": 0.00012625395333988075, "tps": 338249, "tokens": 18371051520, "gpu_gb": 72.2, "elapsed_s": 39314.1}
{"step": 17530, "loss": 2.399, "lr": 0.0001251903465970151, "tps": 338271, "tokens": 18381537280, "gpu_gb": 72.2, "elapsed_s": 39345.1}
{"step": 17540, "loss": 2.3774, "lr": 0.0001241294394366426, "tps": 338264, "tokens": 18392023040, "gpu_gb": 72.2, "elapsed_s": 39376.1}
{"step": 17550, "loss": 2.3572, "lr": 0.00012307130380127338, "tps": 338295, "tokens": 18402508800, "gpu_gb": 72.2, "elapsed_s": 39407.1}
{"step": 17560, "loss": 2.3612, "lr": 0.0001220160114454744, "tps": 338563, "tokens": 18412994560, "gpu_gb": 72.2, "elapsed_s": 39438.1}
{"step": 17570, "loss": 2.3733, "lr": 0.00012096363393100318, "tps": 338010, "tokens": 18423480320, "gpu_gb": 72.2, "elapsed_s": 39469.1}
{"step": 17580, "loss": 2.3242, "lr": 0.00011991424262195548, "tps": 338404, "tokens": 18433966080, "gpu_gb": 72.2, "elapsed_s": 39500.1}
{"step": 17590, "loss": 2.3119, "lr": 0.00011886790867992564, "tps": 338437, "tokens": 18444451840, "gpu_gb": 72.2, "elapsed_s": 39531.1}
{"step": 17600, "loss": 2.4039, "lr": 0.000117824703059181, "tps": 338349, "tokens": 18454937600, "gpu_gb": 72.2, "elapsed_s": 39562.1}
{"step": 17610, "loss": 2.376, "lr": 0.0001167846965018505, "tps": 338468, "tokens": 18465423360, "gpu_gb": 72.2, "elapsed_s": 39593.1}
{"step": 17620, "loss": 2.3178, "lr": 0.00011574795953312711, "tps": 338051, "tokens": 18475909120, "gpu_gb": 72.2, "elapsed_s": 39624.1}
{"step": 17630, "loss": 2.3479, "lr": 0.00011471456245648597, "tps": 338313, "tokens": 18486394880, "gpu_gb": 72.2, "elapsed_s": 39655.1}
{"step": 17640, "loss": 2.3525, "lr": 0.00011368457534891623, "tps": 338358, "tokens": 18496880640, "gpu_gb": 72.2, "elapsed_s": 39686.1}
{"step": 17650, "loss": 2.3838, "lr": 0.00011265806805616959, "tps": 338327, "tokens": 18507366400, "gpu_gb": 72.2, "elapsed_s": 39717.1}
{"step": 17660, "loss": 2.3209, "lr": 0.00011163511018802365, "tps": 338660, "tokens": 18517852160, "gpu_gb": 72.2, "elapsed_s": 39748.0}
{"step": 17670, "loss": 2.3006, "lr": 0.0001106157711135613, "tps": 338044, "tokens": 18528337920, "gpu_gb": 72.2, "elapsed_s": 39779.0}
{"step": 17680, "loss": 2.3598, "lr": 0.00010960011995646705, "tps": 338137, "tokens": 18538823680, "gpu_gb": 72.2, "elapsed_s": 39810.0}
{"step": 17690, "loss": 2.395, "lr": 0.00010858822559033931, "tps": 338244, "tokens": 18549309440, "gpu_gb": 72.2, "elapsed_s": 39841.0}
{"step": 17700, "loss": 2.3515, "lr": 0.00010758015663402018, "tps": 338470, "tokens": 18559795200, "gpu_gb": 72.2, "elapsed_s": 39872.0}
{"step": 17710, "loss": 2.3875, "lr": 0.00010657598144694174, "tps": 338117, "tokens": 18570280960, "gpu_gb": 72.2, "elapsed_s": 39903.0}
{"step": 17720, "loss": 2.3577, "lr": 0.00010557576812449113, "tps": 338233, "tokens": 18580766720, "gpu_gb": 72.2, "elapsed_s": 39934.0}
{"step": 17730, "loss": 2.3379, "lr": 0.00010457958449339238, "tps": 338531, "tokens": 18591252480, "gpu_gb": 72.2, "elapsed_s": 39965.0}
{"step": 17740, "loss": 2.354, "lr": 0.000103587498107107, "tps": 338499, "tokens": 18601738240, "gpu_gb": 72.2, "elapsed_s": 39996.0}
{"step": 17750, "loss": 2.3493, "lr": 0.00010259957624125313, "tps": 338591, "tokens": 18612224000, "gpu_gb": 72.2, "elapsed_s": 40027.0}
{"step": 17760, "loss": 2.4328, "lr": 0.00010161588588904331, "tps": 338507, "tokens": 18622709760, "gpu_gb": 72.2, "elapsed_s": 40057.9}
{"step": 17770, "loss": 2.3621, "lr": 0.00010063649375674178, "tps": 338174, "tokens": 18633195520, "gpu_gb": 72.2, "elapsed_s": 40088.9}
{"step": 17780, "loss": 2.3512, "lr": 9.966146625914042e-05, "tps": 338328, "tokens": 18643681280, "gpu_gb": 72.2, "elapsed_s": 40119.9}
{"step": 17790, "loss": 2.3518, "lr": 9.869086951505572e-05, "tps": 337797, "tokens": 18654167040, "gpu_gb": 72.2, "elapsed_s": 40151.0}
{"step": 17800, "loss": 2.3671, "lr": 9.772476934284472e-05, "tps": 338513, "tokens": 18664652800, "gpu_gb": 72.2, "elapsed_s": 40182.0}
{"step": 17810, "loss": 2.3623, "lr": 9.676323125594149e-05, "tps": 338246, "tokens": 18675138560, "gpu_gb": 72.2, "elapsed_s": 40213.0}
{"step": 17820, "loss": 2.3593, "lr": 9.580632045841517e-05, "tps": 338227, "tokens": 18685624320, "gpu_gb": 72.2, "elapsed_s": 40244.0}
{"step": 17830, "loss": 2.3632, "lr": 9.485410184054771e-05, "tps": 338436, "tokens": 18696110080, "gpu_gb": 72.2, "elapsed_s": 40274.9}
{"step": 17840, "loss": 2.3479, "lr": 9.390663997443383e-05, "tps": 338386, "tokens": 18706595840, "gpu_gb": 72.2, "elapsed_s": 40305.9}
{"step": 17850, "loss": 2.368, "lr": 9.296399910960214e-05, "tps": 337881, "tokens": 18717081600, "gpu_gb": 72.2, "elapsed_s": 40337.0}
{"step": 17860, "loss": 2.3386, "lr": 9.202624316865838e-05, "tps": 338222, "tokens": 18727567360, "gpu_gb": 72.2, "elapsed_s": 40368.0}
{"step": 17870, "loss": 2.3659, "lr": 9.109343574295045e-05, "tps": 337926, "tokens": 18738053120, "gpu_gb": 72.2, "elapsed_s": 40399.0}
{"step": 17880, "loss": 2.3744, "lr": 9.016564008825624e-05, "tps": 338173, "tokens": 18748538880, "gpu_gb": 72.2, "elapsed_s": 40430.0}
{"step": 17890, "loss": 2.332, "lr": 8.924291912049432e-05, "tps": 338342, "tokens": 18759024640, "gpu_gb": 72.2, "elapsed_s": 40461.0}
{"step": 17900, "loss": 2.395, "lr": 8.832533541145706e-05, "tps": 338202, "tokens": 18769510400, "gpu_gb": 72.2, "elapsed_s": 40492.0}
{"step": 17910, "loss": 2.3879, "lr": 8.741295118456785e-05, "tps": 338209, "tokens": 18779996160, "gpu_gb": 72.2, "elapsed_s": 40523.0}
{"step": 17920, "loss": 2.3721, "lr": 8.65058283106613e-05, "tps": 338214, "tokens": 18790481920, "gpu_gb": 72.2, "elapsed_s": 40554.0}
{"step": 17930, "loss": 2.3263, "lr": 8.560402830378805e-05, "tps": 337943, "tokens": 18800967680, "gpu_gb": 72.2, "elapsed_s": 40585.0}
{"step": 17940, "loss": 2.3734, "lr": 8.47076123170429e-05, "tps": 337979, "tokens": 18811453440, "gpu_gb": 72.2, "elapsed_s": 40616.1}
{"step": 17950, "loss": 2.3962, "lr": 8.381664113841807e-05, "tps": 338246, "tokens": 18821939200, "gpu_gb": 72.2, "elapsed_s": 40647.1}
{"step": 17960, "loss": 2.4011, "lr": 8.293117518668122e-05, "tps": 338343, "tokens": 18832424960, "gpu_gb": 72.2, "elapsed_s": 40678.1}
{"step": 17970, "loss": 2.3429, "lr": 8.205127450727804e-05, "tps": 338039, "tokens": 18842910720, "gpu_gb": 72.2, "elapsed_s": 40709.1}
{"step": 17980, "loss": 2.3598, "lr": 8.117699876826052e-05, "tps": 338499, "tokens": 18853396480, "gpu_gb": 72.2, "elapsed_s": 40740.1}
{"step": 17990, "loss": 2.3628, "lr": 8.03084072562407e-05, "tps": 338119, "tokens": 18863882240, "gpu_gb": 72.2, "elapsed_s": 40771.1}
{"step": 18000, "loss": 2.3447, "lr": 7.94455588723705e-05, "tps": 338139, "tokens": 18874368000, "gpu_gb": 72.2, "elapsed_s": 40802.1}
{"step": 18010, "loss": 2.3784, "lr": 7.858851212834724e-05, "tps": 150167, "tokens": 18884853760, "gpu_gb": 72.2, "elapsed_s": 40871.9}
{"step": 18020, "loss": 2.3322, "lr": 7.773732514244592e-05, "tps": 338393, "tokens": 18895339520, "gpu_gb": 72.2, "elapsed_s": 40902.9}
{"step": 18030, "loss": 2.3448, "lr": 7.689205563557829e-05, "tps": 338151, "tokens": 18905825280, "gpu_gb": 72.2, "elapsed_s": 40933.9}
{"step": 18040, "loss": 2.3664, "lr": 7.605276092737815e-05, "tps": 338368, "tokens": 18916311040, "gpu_gb": 72.2, "elapsed_s": 40964.9}
{"step": 18050, "loss": 2.3046, "lr": 7.521949793231506e-05, "tps": 338378, "tokens": 18926796800, "gpu_gb": 72.2, "elapsed_s": 40995.9}
{"step": 18060, "loss": 2.344, "lr": 7.439232315583429e-05, "tps": 338406, "tokens": 18937282560, "gpu_gb": 72.2, "elapsed_s": 41026.9}
{"step": 18070, "loss": 2.354, "lr": 7.357129269052529e-05, "tps": 338475, "tokens": 18947768320, "gpu_gb": 72.2, "elapsed_s": 41057.8}
{"step": 18080, "loss": 2.3253, "lr": 7.275646221231796e-05, "tps": 338486, "tokens": 18958254080, "gpu_gb": 72.2, "elapsed_s": 41088.8}
{"step": 18090, "loss": 2.3472, "lr": 7.194788697670703e-05, "tps": 338228, "tokens": 18968739840, "gpu_gb": 72.2, "elapsed_s": 41119.8}
{"step": 18100, "loss": 2.3199, "lr": 7.114562181500507e-05, "tps": 338517, "tokens": 18979225600, "gpu_gb": 72.2, "elapsed_s": 41150.8}
{"step": 18110, "loss": 2.3452, "lr": 7.03497211306243e-05, "tps": 338976, "tokens": 18989711360, "gpu_gb": 72.2, "elapsed_s": 41181.7}
{"step": 18120, "loss": 2.4253, "lr": 6.956023889538747e-05, "tps": 338791, "tokens": 19000197120, "gpu_gb": 72.2, "elapsed_s": 41212.7}
{"step": 18130, "loss": 2.3858, "lr": 6.877722864586767e-05, "tps": 338817, "tokens": 19010682880, "gpu_gb": 72.2, "elapsed_s": 41243.6}
{"step": 18140, "loss": 2.3884, "lr": 6.800074347975801e-05, "tps": 338714, "tokens": 19021168640, "gpu_gb": 72.2, "elapsed_s": 41274.6}
{"step": 18150, "loss": 2.3666, "lr": 6.723083605227113e-05, "tps": 338836, "tokens": 19031654400, "gpu_gb": 72.2, "elapsed_s": 41305.5}
{"step": 18160, "loss": 2.35, "lr": 6.646755857256819e-05, "tps": 338651, "tokens": 19042140160, "gpu_gb": 72.2, "elapsed_s": 41336.5}
{"step": 18170, "loss": 2.3693, "lr": 6.571096280021867e-05, "tps": 338195, "tokens": 19052625920, "gpu_gb": 72.2, "elapsed_s": 41367.5}
{"step": 18180, "loss": 2.345, "lr": 6.496110004169036e-05, "tps": 338339, "tokens": 19063111680, "gpu_gb": 72.2, "elapsed_s": 41398.5}
{"step": 18190, "loss": 2.3654, "lr": 6.421802114687031e-05, "tps": 338222, "tokens": 19073597440, "gpu_gb": 72.2, "elapsed_s": 41429.5}
{"step": 18200, "loss": 2.397, "lr": 6.348177650561629e-05, "tps": 338192, "tokens": 19084083200, "gpu_gb": 72.2, "elapsed_s": 41460.5}
{"step": 18210, "loss": 2.343, "lr": 6.275241604433994e-05, "tps": 338389, "tokens": 19094568960, "gpu_gb": 72.2, "elapsed_s": 41491.5}
{"step": 18220, "loss": 2.3043, "lr": 6.202998922262123e-05, "tps": 338495, "tokens": 19105054720, "gpu_gb": 72.2, "elapsed_s": 41522.5}
{"step": 18230, "loss": 2.3385, "lr": 6.13145450298542e-05, "tps": 338212, "tokens": 19115540480, "gpu_gb": 72.2, "elapsed_s": 41553.5}
{"step": 18240, "loss": 2.3839, "lr": 6.0606131981925194e-05, "tps": 338442, "tokens": 19126026240, "gpu_gb": 72.2, "elapsed_s": 41584.5}
{"step": 18250, "loss": 2.364, "lr": 5.990479811792261e-05, "tps": 338197, "tokens": 19136512000, "gpu_gb": 72.2, "elapsed_s": 41615.5}
{"step": 18260, "loss": 2.389, "lr": 5.921059099687961e-05, "tps": 338545, "tokens": 19146997760, "gpu_gb": 72.2, "elapsed_s": 41646.4}
{"step": 18270, "loss": 2.4065, "lr": 5.852355769454852e-05, "tps": 338349, "tokens": 19157483520, "gpu_gb": 72.2, "elapsed_s": 41677.4}
{"step": 18280, "loss": 2.366, "lr": 5.7843744800209105e-05, "tps": 338515, "tokens": 19167969280, "gpu_gb": 72.2, "elapsed_s": 41708.4}
{"step": 18290, "loss": 2.3706, "lr": 5.7171198413508874e-05, "tps": 338209, "tokens": 19178455040, "gpu_gb": 72.2, "elapsed_s": 41739.4}
{"step": 18300, "loss": 2.3741, "lr": 5.650596414133689e-05, "tps": 338634, "tokens": 19188940800, "gpu_gb": 72.2, "elapsed_s": 41770.4}
{"step": 18310, "loss": 2.3838, "lr": 5.5848087094731436e-05, "tps": 338263, "tokens": 19199426560, "gpu_gb": 72.2, "elapsed_s": 41801.4}
{"step": 18320, "loss": 2.33, "lr": 5.519761188582061e-05, "tps": 338250, "tokens": 19209912320, "gpu_gb": 72.2, "elapsed_s": 41832.4}
{"step": 18330, "loss": 2.506, "lr": 5.4554582624797176e-05, "tps": 338355, "tokens": 19220398080, "gpu_gb": 72.2, "elapsed_s": 41863.4}
{"step": 18340, "loss": 2.4293, "lr": 5.3919042916927346e-05, "tps": 338528, "tokens": 19230883840, "gpu_gb": 72.2, "elapsed_s": 41894.3}
{"step": 18350, "loss": 2.3696, "lr": 5.329103585959389e-05, "tps": 338337, "tokens": 19241369600, "gpu_gb": 72.2, "elapsed_s": 41925.3}
{"step": 18360, "loss": 2.4253, "lr": 5.2670604039373555e-05, "tps": 338375, "tokens": 19251855360, "gpu_gb": 72.2, "elapsed_s": 41956.3}
{"step": 18370, "loss": 2.3733, "lr": 5.205778952914894e-05, "tps": 338162, "tokens": 19262341120, "gpu_gb": 72.2, "elapsed_s": 41987.3}
{"step": 18380, "loss": 2.3672, "lr": 5.145263388525593e-05, "tps": 338175, "tokens": 19272826880, "gpu_gb": 72.2, "elapsed_s": 42018.3}
{"step": 18390, "loss": 2.3381, "lr": 5.085517814466524e-05, "tps": 338433, "tokens": 19283312640, "gpu_gb": 72.2, "elapsed_s": 42049.3}
{"step": 18400, "loss": 2.3752, "lr": 5.026546282219975e-05, "tps": 338435, "tokens": 19293798400, "gpu_gb": 72.2, "elapsed_s": 42080.3}
{"step": 18410, "loss": 2.3963, "lr": 4.968352790778713e-05, "tps": 338471, "tokens": 19304284160, "gpu_gb": 72.2, "elapsed_s": 42111.3}
{"step": 18420, "loss": 2.4049, "lr": 4.910941286374807e-05, "tps": 338018, "tokens": 19314769920, "gpu_gb": 72.2, "elapsed_s": 42142.3}
{"step": 18430, "loss": 2.3269, "lr": 4.8543156622120125e-05, "tps": 338310, "tokens": 19325255680, "gpu_gb": 72.2, "elapsed_s": 42173.3}
{"step": 18440, "loss": 2.4158, "lr": 4.798479758201766e-05, "tps": 338383, "tokens": 19335741440, "gpu_gb": 72.2, "elapsed_s": 42204.3}
{"step": 18450, "loss": 2.397, "lr": 4.743437360702813e-05, "tps": 338124, "tokens": 19346227200, "gpu_gb": 72.2, "elapsed_s": 42235.3}
{"step": 18460, "loss": 2.4011, "lr": 4.689192202264418e-05, "tps": 338375, "tokens": 19356712960, "gpu_gb": 72.2, "elapsed_s": 42266.3}
{"step": 18470, "loss": 2.3974, "lr": 4.635747961373266e-05, "tps": 338226, "tokens": 19367198720, "gpu_gb": 72.2, "elapsed_s": 42297.3}
{"step": 18480, "loss": 2.3838, "lr": 4.5831082622040166e-05, "tps": 338196, "tokens": 19377684480, "gpu_gb": 72.2, "elapsed_s": 42328.3}
{"step": 18490, "loss": 2.3673, "lr": 4.531276674373541e-05, "tps": 338388, "tokens": 19388170240, "gpu_gb": 72.2, "elapsed_s": 42359.3}
{"step": 18500, "loss": 2.3978, "lr": 4.480256712698853e-05, "tps": 338152, "tokens": 19398656000, "gpu_gb": 72.2, "elapsed_s": 42390.3}
{"step": 18510, "loss": 2.3718, "lr": 4.43005183695876e-05, "tps": 338238, "tokens": 19409141760, "gpu_gb": 72.2, "elapsed_s": 42421.3}
{"step": 18520, "loss": 2.3944, "lr": 4.380665451659264e-05, "tps": 338283, "tokens": 19419627520, "gpu_gb": 72.2, "elapsed_s": 42452.3}
{"step": 18530, "loss": 2.3955, "lr": 4.332100905802663e-05, "tps": 338403, "tokens": 19430113280, "gpu_gb": 72.2, "elapsed_s": 42483.3}
{"step": 18540, "loss": 2.3849, "lr": 4.2843614926604844e-05, "tps": 338445, "tokens": 19440599040, "gpu_gb": 72.2, "elapsed_s": 42514.3}
{"step": 18550, "loss": 2.4186, "lr": 4.2374504495501314e-05, "tps": 338226, "tokens": 19451084800, "gpu_gb": 72.2, "elapsed_s": 42545.3}
{"step": 18560, "loss": 2.3752, "lr": 4.191370957615375e-05, "tps": 338213, "tokens": 19461570560, "gpu_gb": 72.2, "elapsed_s": 42576.3}
{"step": 18570, "loss": 2.358, "lr": 4.146126141610611e-05, "tps": 338484, "tokens": 19472056320, "gpu_gb": 72.2, "elapsed_s": 42607.2}
{"step": 18580, "loss": 2.3803, "lr": 4.101719069688983e-05, "tps": 337998, "tokens": 19482542080, "gpu_gb": 72.2, "elapsed_s": 42638.3}
{"step": 18590, "loss": 2.3967, "lr": 4.058152753194323e-05, "tps": 338408, "tokens": 19493027840, "gpu_gb": 72.2, "elapsed_s": 42669.3}
{"step": 18600, "loss": 2.3546, "lr": 4.0154301464569205e-05, "tps": 338533, "tokens": 19503513600, "gpu_gb": 72.2, "elapsed_s": 42700.2}
{"step": 18610, "loss": 2.3606, "lr": 3.973554146593218e-05, "tps": 338194, "tokens": 19513999360, "gpu_gb": 72.2, "elapsed_s": 42731.2}
{"step": 18620, "loss": 2.351, "lr": 3.932527593309329e-05, "tps": 338045, "tokens": 19524485120, "gpu_gb": 72.2, "elapsed_s": 42762.3}
{"step": 18630, "loss": 2.3546, "lr": 3.892353268708474e-05, "tps": 338190, "tokens": 19534970880, "gpu_gb": 72.2, "elapsed_s": 42793.3}
{"step": 18640, "loss": 2.3586, "lr": 3.853033897102319e-05, "tps": 338084, "tokens": 19545456640, "gpu_gb": 72.2, "elapsed_s": 42824.3}
{"step": 18650, "loss": 2.3747, "lr": 3.8145721448262494e-05, "tps": 338331, "tokens": 19555942400, "gpu_gb": 72.2, "elapsed_s": 42855.3}
{"step": 18660, "loss": 2.3712, "lr": 3.776970620058536e-05, "tps": 338392, "tokens": 19566428160, "gpu_gb": 72.2, "elapsed_s": 42886.3}
{"step": 18670, "loss": 2.3904, "lr": 3.7402318726434783e-05, "tps": 338432, "tokens": 19576913920, "gpu_gb": 72.2, "elapsed_s": 42917.2}
{"step": 18680, "loss": 2.4238, "lr": 3.704358393918507e-05, "tps": 338488, "tokens": 19587399680, "gpu_gb": 72.2, "elapsed_s": 42948.2}
{"step": 18690, "loss": 2.3803, "lr": 3.669352616545219e-05, "tps": 338434, "tokens": 19597885440, "gpu_gb": 72.2, "elapsed_s": 42979.2}
{"step": 18700, "loss": 2.3628, "lr": 3.6352169143444246e-05, "tps": 338339, "tokens": 19608371200, "gpu_gb": 72.2, "elapsed_s": 43010.2}
{"step": 18710, "loss": 2.34, "lr": 3.6019536021351723e-05, "tps": 338077, "tokens": 19618856960, "gpu_gb": 72.2, "elapsed_s": 43041.2}
{"step": 18720, "loss": 2.3937, "lr": 3.5695649355777804e-05, "tps": 338268, "tokens": 19629342720, "gpu_gb": 72.2, "elapsed_s": 43072.2}
{"step": 18730, "loss": 2.3559, "lr": 3.538053111020864e-05, "tps": 338744, "tokens": 19639828480, "gpu_gb": 72.2, "elapsed_s": 43103.2}
{"step": 18740, "loss": 2.3874, "lr": 3.507420265352403e-05, "tps": 338286, "tokens": 19650314240, "gpu_gb": 72.2, "elapsed_s": 43134.2}
{"step": 18750, "loss": 2.3819, "lr": 3.477668475854841e-05, "tps": 338139, "tokens": 19660800000, "gpu_gb": 72.2, "elapsed_s": 43165.2}
{"step": 18760, "loss": 2.3761, "lr": 3.4487997600642004e-05, "tps": 338377, "tokens": 19671285760, "gpu_gb": 72.2, "elapsed_s": 43196.2}
{"step": 18770, "loss": 2.3846, "lr": 3.4208160756332937e-05, "tps": 338160, "tokens": 19681771520, "gpu_gb": 72.2, "elapsed_s": 43227.2}
{"step": 18780, "loss": 2.3959, "lr": 3.39371932019895e-05, "tps": 338543, "tokens": 19692257280, "gpu_gb": 72.2, "elapsed_s": 43258.1}
{"step": 18790, "loss": 2.3729, "lr": 3.367511331253347e-05, "tps": 338166, "tokens": 19702743040, "gpu_gb": 72.2, "elapsed_s": 43289.2}
{"step": 18800, "loss": 2.3777, "lr": 3.342193886019393e-05, "tps": 338692, "tokens": 19713228800, "gpu_gb": 72.2, "elapsed_s": 43320.1}
{"step": 18810, "loss": 2.3701, "lr": 3.317768701330222e-05, "tps": 338469, "tokens": 19723714560, "gpu_gb": 72.2, "elapsed_s": 43351.1}
{"step": 18820, "loss": 2.3691, "lr": 3.2942374335127644e-05, "tps": 338495, "tokens": 19734200320, "gpu_gb": 72.2, "elapsed_s": 43382.1}
{"step": 18830, "loss": 2.3561, "lr": 3.271601678275424e-05, "tps": 338338, "tokens": 19744686080, "gpu_gb": 72.2, "elapsed_s": 43413.1}
{"step": 18840, "loss": 2.3854, "lr": 3.249862970599882e-05, "tps": 338345, "tokens": 19755171840, "gpu_gb": 72.2, "elapsed_s": 43444.1}
{"step": 18850, "loss": 2.375, "lr": 3.2290227846369915e-05, "tps": 338531, "tokens": 19765657600, "gpu_gb": 72.2, "elapsed_s": 43475.0}
{"step": 18860, "loss": 2.3838, "lr": 3.209082533606824e-05, "tps": 338219, "tokens": 19776143360, "gpu_gb": 72.2, "elapsed_s": 43506.0}
{"step": 18870, "loss": 2.4076, "lr": 3.1900435697028325e-05, "tps": 338383, "tokens": 19786629120, "gpu_gb": 72.2, "elapsed_s": 43537.0}
{"step": 18880, "loss": 2.4093, "lr": 3.171907184000148e-05, "tps": 338578, "tokens": 19797114880, "gpu_gb": 72.2, "elapsed_s": 43568.0}
{"step": 18890, "loss": 2.3756, "lr": 3.154674606368043e-05, "tps": 338098, "tokens": 19807600640, "gpu_gb": 72.2, "elapsed_s": 43599.0}
{"step": 18900, "loss": 2.3592, "lr": 3.1383470053865176e-05, "tps": 338160, "tokens": 19818086400, "gpu_gb": 72.2, "elapsed_s": 43630.0}
{"step": 18910, "loss": 2.3528, "lr": 3.1229254882670656e-05, "tps": 338396, "tokens": 19828572160, "gpu_gb": 72.2, "elapsed_s": 43661.0}
{"step": 18920, "loss": 2.3912, "lr": 3.1084111007775864e-05, "tps": 338270, "tokens": 19839057920, "gpu_gb": 72.2, "elapsed_s": 43692.0}
{"step": 18930, "loss": 2.4154, "lr": 3.094804827171472e-05, "tps": 338294, "tokens": 19849543680, "gpu_gb": 72.2, "elapsed_s": 43723.0}
{"step": 18940, "loss": 2.3957, "lr": 3.08210759012086e-05, "tps": 338399, "tokens": 19860029440, "gpu_gb": 72.2, "elapsed_s": 43754.0}
{"step": 18950, "loss": 2.3436, "lr": 3.0703202506540664e-05, "tps": 338390, "tokens": 19870515200, "gpu_gb": 72.2, "elapsed_s": 43785.0}
{"step": 18960, "loss": 2.3854, "lr": 3.0594436080972004e-05, "tps": 338465, "tokens": 19881000960, "gpu_gb": 72.2, "elapsed_s": 43815.9}
{"step": 18970, "loss": 2.3862, "lr": 3.0494784000199504e-05, "tps": 338358, "tokens": 19891486720, "gpu_gb": 72.2, "elapsed_s": 43846.9}
{"step": 18980, "loss": 2.3666, "lr": 3.0404253021855836e-05, "tps": 338541, "tokens": 19901972480, "gpu_gb": 72.2, "elapsed_s": 43877.9}
{"step": 18990, "loss": 2.417, "lr": 3.0322849285051037e-05, "tps": 338553, "tokens": 19912458240, "gpu_gb": 72.2, "elapsed_s": 43908.9}
{"step": 19000, "loss": 2.3804, "lr": 3.0250578309956336e-05, "tps": 338514, "tokens": 19922944000, "gpu_gb": 72.2, "elapsed_s": 43939.9}
{"step": 19010, "loss": 2.3622, "lr": 3.018744499742977e-05, "tps": 153977, "tokens": 19933429760, "gpu_gb": 72.2, "elapsed_s": 44008.0}
{"step": 19020, "loss": 2.3729, "lr": 3.0133453628683788e-05, "tps": 338176, "tokens": 19943915520, "gpu_gb": 72.2, "elapsed_s": 44039.0}
{"step": 19030, "loss": 2.3704, "lr": 3.0088607864995058e-05, "tps": 338273, "tokens": 19954401280, "gpu_gb": 72.2, "elapsed_s": 44070.0}
{"step": 19040, "loss": 2.4077, "lr": 3.005291074745606e-05, "tps": 338259, "tokens": 19964887040, "gpu_gb": 72.2, "elapsed_s": 44101.0}
{"step": 19050, "loss": 2.4399, "lr": 3.002636469676897e-05, "tps": 338320, "tokens": 19975372800, "gpu_gb": 72.2, "elapsed_s": 44132.0}
{"step": 19060, "loss": 2.3831, "lr": 3.0008971513081396e-05, "tps": 338314, "tokens": 19985858560, "gpu_gb": 72.2, "elapsed_s": 44163.0}
{"step": 19070, "loss": 2.4315, "lr": 3.0000732375864436e-05, "tps": 338176, "tokens": 19996344320, "gpu_gb": 72.2, "elapsed_s": 44194.0}