| {"current_steps": 5, "total_steps": 2500, "loss": 0.4815, "lr": 0.00048, "epoch": 0.04, "percentage": 0.2, "elapsed_time": "0:00:02", "remaining_time": "0:18:41", "throughput": 782.82, "total_tokens": 1760} |
| {"current_steps": 10, "total_steps": 2500, "loss": 0.4473, "lr": 0.0010799999999999998, "epoch": 0.08, "percentage": 0.4, "elapsed_time": "0:00:03", "remaining_time": "0:12:50", "throughput": 1138.17, "total_tokens": 3520} |
| {"current_steps": 15, "total_steps": 2500, "loss": 2.002, "lr": 0.00168, "epoch": 0.12, "percentage": 0.6, "elapsed_time": "0:00:03", "remaining_time": "0:10:57", "throughput": 1354.18, "total_tokens": 5376} |
| {"current_steps": 20, "total_steps": 2500, "loss": 6.1825, "lr": 0.00228, "epoch": 0.16, "percentage": 0.8, "elapsed_time": "0:00:04", "remaining_time": "0:10:01", "throughput": 1497.85, "total_tokens": 7264} |
| {"current_steps": 25, "total_steps": 2500, "loss": 1.4082, "lr": 0.0028799999999999997, "epoch": 0.2, "percentage": 1.0, "elapsed_time": "0:00:05", "remaining_time": "0:09:33", "throughput": 1634.22, "total_tokens": 9472} |
| {"current_steps": 30, "total_steps": 2500, "loss": 0.7726, "lr": 0.00348, "epoch": 0.24, "percentage": 1.2, "elapsed_time": "0:00:06", "remaining_time": "0:09:08", "throughput": 1676.19, "total_tokens": 11168} |
| {"current_steps": 35, "total_steps": 2500, "loss": 1.0988, "lr": 0.00408, "epoch": 0.28, "percentage": 1.4, "elapsed_time": "0:00:07", "remaining_time": "0:08:50", "throughput": 1723.86, "total_tokens": 12992} |
| {"current_steps": 40, "total_steps": 2500, "loss": 0.4399, "lr": 0.00468, "epoch": 0.32, "percentage": 1.6, "elapsed_time": "0:00:08", "remaining_time": "0:08:35", "throughput": 1756.9, "total_tokens": 14720} |
| {"current_steps": 45, "total_steps": 2500, "loss": 0.5133, "lr": 0.005279999999999999, "epoch": 0.36, "percentage": 1.8, "elapsed_time": "0:00:09", "remaining_time": "0:08:28", "throughput": 1796.96, "total_tokens": 16736} |
| {"current_steps": 50, "total_steps": 2500, "loss": 0.4874, "lr": 0.00588, "epoch": 0.4, "percentage": 2.0, "elapsed_time": "0:00:10", "remaining_time": "0:08:22", "throughput": 1836.88, "total_tokens": 18848} |
| {"current_steps": 55, "total_steps": 2500, "loss": 0.5731, "lr": 0.00648, "epoch": 0.44, "percentage": 2.2, "elapsed_time": "0:00:11", "remaining_time": "0:08:18", "throughput": 1870.99, "total_tokens": 20960} |
| {"current_steps": 60, "total_steps": 2500, "loss": 0.8152, "lr": 0.0070799999999999995, "epoch": 0.48, "percentage": 2.4, "elapsed_time": "0:00:12", "remaining_time": "0:08:12", "throughput": 1887.14, "total_tokens": 22848} |
| {"current_steps": 65, "total_steps": 2500, "loss": 0.7077, "lr": 0.00768, "epoch": 0.52, "percentage": 2.6, "elapsed_time": "0:00:13", "remaining_time": "0:08:07", "throughput": 1916.77, "total_tokens": 24960} |
| {"current_steps": 70, "total_steps": 2500, "loss": 0.4441, "lr": 0.008280000000000001, "epoch": 0.56, "percentage": 2.8, "elapsed_time": "0:00:13", "remaining_time": "0:08:04", "throughput": 1931.96, "total_tokens": 26976} |
| {"current_steps": 75, "total_steps": 2500, "loss": 0.5596, "lr": 0.008879999999999999, "epoch": 0.6, "percentage": 3.0, "elapsed_time": "0:00:14", "remaining_time": "0:08:00", "throughput": 1947.51, "total_tokens": 28960} |
| {"current_steps": 80, "total_steps": 2500, "loss": 0.5017, "lr": 0.00948, "epoch": 0.64, "percentage": 3.2, "elapsed_time": "0:00:15", "remaining_time": "0:07:57", "throughput": 1960.79, "total_tokens": 30944} |
| {"current_steps": 85, "total_steps": 2500, "loss": 0.5735, "lr": 0.01008, "epoch": 0.68, "percentage": 3.4, "elapsed_time": "0:00:16", "remaining_time": "0:07:54", "throughput": 1967.86, "total_tokens": 32896} |
| {"current_steps": 90, "total_steps": 2500, "loss": 0.4116, "lr": 0.010679999999999999, "epoch": 0.72, "percentage": 3.6, "elapsed_time": "0:00:17", "remaining_time": "0:07:52", "throughput": 1987.48, "total_tokens": 35104} |
| {"current_steps": 95, "total_steps": 2500, "loss": 0.3721, "lr": 0.01128, "epoch": 0.76, "percentage": 3.8, "elapsed_time": "0:00:18", "remaining_time": "0:07:48", "throughput": 1987.01, "total_tokens": 36768} |
| {"current_steps": 100, "total_steps": 2500, "loss": 0.2954, "lr": 0.01188, "epoch": 0.8, "percentage": 4.0, "elapsed_time": "0:00:19", "remaining_time": "0:07:48", "throughput": 2005.56, "total_tokens": 39136} |
| {"current_steps": 105, "total_steps": 2500, "loss": 0.5687, "lr": 0.01248, "epoch": 0.84, "percentage": 4.2, "elapsed_time": "0:00:20", "remaining_time": "0:07:45", "throughput": 2010.24, "total_tokens": 41056} |
| {"current_steps": 110, "total_steps": 2500, "loss": 9.1598, "lr": 0.01308, "epoch": 0.88, "percentage": 4.4, "elapsed_time": "0:00:21", "remaining_time": "0:07:43", "throughput": 2020.13, "total_tokens": 43104} |
| {"current_steps": 115, "total_steps": 2500, "loss": 9.5938, "lr": 0.01368, "epoch": 0.92, "percentage": 4.6, "elapsed_time": "0:00:22", "remaining_time": "0:07:41", "throughput": 2024.01, "total_tokens": 45056} |
| {"current_steps": 120, "total_steps": 2500, "loss": 8.1106, "lr": 0.01428, "epoch": 0.96, "percentage": 4.8, "elapsed_time": "0:00:23", "remaining_time": "0:07:40", "throughput": 2029.97, "total_tokens": 47168} |
| {"current_steps": 125, "total_steps": 2500, "loss": 6.1999, "lr": 0.014879999999999999, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:24", "remaining_time": "0:07:38", "throughput": 2037.01, "total_tokens": 49104} |
| {"current_steps": 125, "total_steps": 2500, "eval_loss": 5.093884468078613, "epoch": 1.0, "percentage": 5.0, "elapsed_time": "0:00:25", "remaining_time": "0:08:00", "throughput": 1940.07, "total_tokens": 49104} |
| {"current_steps": 130, "total_steps": 2500, "loss": 3.8922, "lr": 0.01548, "epoch": 1.04, "percentage": 5.2, "elapsed_time": "0:00:27", "remaining_time": "0:08:15", "throughput": 1892.28, "total_tokens": 51440} |
| {"current_steps": 135, "total_steps": 2500, "loss": 2.5933, "lr": 0.01608, "epoch": 1.08, "percentage": 5.4, "elapsed_time": "0:00:28", "remaining_time": "0:08:12", "throughput": 1898.22, "total_tokens": 53328} |
| {"current_steps": 140, "total_steps": 2500, "loss": 2.0065, "lr": 0.01668, "epoch": 1.12, "percentage": 5.6, "elapsed_time": "0:00:29", "remaining_time": "0:08:09", "throughput": 1905.94, "total_tokens": 55344} |
| {"current_steps": 145, "total_steps": 2500, "loss": 1.5715, "lr": 0.017279999999999997, "epoch": 1.16, "percentage": 5.8, "elapsed_time": "0:00:29", "remaining_time": "0:08:05", "throughput": 1908.12, "total_tokens": 57008} |
| {"current_steps": 150, "total_steps": 2500, "loss": 1.1723, "lr": 0.01788, "epoch": 1.2, "percentage": 6.0, "elapsed_time": "0:00:30", "remaining_time": "0:08:01", "throughput": 1910.38, "total_tokens": 58736} |
| {"current_steps": 155, "total_steps": 2500, "loss": 0.7969, "lr": 0.01848, "epoch": 1.24, "percentage": 6.2, "elapsed_time": "0:00:31", "remaining_time": "0:07:59", "throughput": 1923.17, "total_tokens": 61008} |
| {"current_steps": 160, "total_steps": 2500, "loss": 0.6049, "lr": 0.01908, "epoch": 1.28, "percentage": 6.4, "elapsed_time": "0:00:32", "remaining_time": "0:07:57", "throughput": 1927.4, "total_tokens": 62896} |
| {"current_steps": 165, "total_steps": 2500, "loss": 0.6311, "lr": 0.01968, "epoch": 1.32, "percentage": 6.6, "elapsed_time": "0:00:33", "remaining_time": "0:07:54", "throughput": 1933.87, "total_tokens": 64880} |
| {"current_steps": 170, "total_steps": 2500, "loss": 0.4201, "lr": 0.02028, "epoch": 1.3599999999999999, "percentage": 6.8, "elapsed_time": "0:00:34", "remaining_time": "0:07:52", "throughput": 1942.69, "total_tokens": 67024} |
| {"current_steps": 175, "total_steps": 2500, "loss": 0.4441, "lr": 0.02088, "epoch": 1.4, "percentage": 7.0, "elapsed_time": "0:00:35", "remaining_time": "0:07:50", "throughput": 1949.8, "total_tokens": 69040} |
| {"current_steps": 180, "total_steps": 2500, "loss": 0.4058, "lr": 0.02148, "epoch": 1.44, "percentage": 7.2, "elapsed_time": "0:00:36", "remaining_time": "0:07:48", "throughput": 1952.39, "total_tokens": 70896} |
| {"current_steps": 185, "total_steps": 2500, "loss": 0.4367, "lr": 0.02208, "epoch": 1.48, "percentage": 7.4, "elapsed_time": "0:00:37", "remaining_time": "0:07:45", "throughput": 1955.49, "total_tokens": 72784} |
| {"current_steps": 190, "total_steps": 2500, "loss": 0.3878, "lr": 0.02268, "epoch": 1.52, "percentage": 7.6, "elapsed_time": "0:00:38", "remaining_time": "0:07:44", "throughput": 1967.49, "total_tokens": 75152} |
| {"current_steps": 195, "total_steps": 2500, "loss": 0.3705, "lr": 0.02328, "epoch": 1.56, "percentage": 7.8, "elapsed_time": "0:00:39", "remaining_time": "0:07:41", "throughput": 1969.38, "total_tokens": 76944} |
| {"current_steps": 200, "total_steps": 2500, "loss": 0.4667, "lr": 0.023880000000000002, "epoch": 1.6, "percentage": 8.0, "elapsed_time": "0:00:39", "remaining_time": "0:07:39", "throughput": 1973.42, "total_tokens": 78896} |
| {"current_steps": 205, "total_steps": 2500, "loss": 0.335, "lr": 0.02448, "epoch": 1.6400000000000001, "percentage": 8.2, "elapsed_time": "0:00:40", "remaining_time": "0:07:37", "throughput": 1975.55, "total_tokens": 80656} |
| {"current_steps": 210, "total_steps": 2500, "loss": 0.5123, "lr": 0.025079999999999998, "epoch": 1.6800000000000002, "percentage": 8.4, "elapsed_time": "0:00:41", "remaining_time": "0:07:35", "throughput": 1981.48, "total_tokens": 82704} |
| {"current_steps": 215, "total_steps": 2500, "loss": 0.4403, "lr": 0.025679999999999998, "epoch": 1.72, "percentage": 8.6, "elapsed_time": "0:00:42", "remaining_time": "0:07:33", "throughput": 1986.15, "total_tokens": 84720} |
| {"current_steps": 220, "total_steps": 2500, "loss": 0.5363, "lr": 0.026279999999999998, "epoch": 1.76, "percentage": 8.8, "elapsed_time": "0:00:43", "remaining_time": "0:07:31", "throughput": 1986.83, "total_tokens": 86480} |
| {"current_steps": 225, "total_steps": 2500, "loss": 0.4979, "lr": 0.02688, "epoch": 1.8, "percentage": 9.0, "elapsed_time": "0:00:44", "remaining_time": "0:07:29", "throughput": 1991.05, "total_tokens": 88528} |
| {"current_steps": 230, "total_steps": 2500, "loss": 0.6169, "lr": 0.02748, "epoch": 1.8399999999999999, "percentage": 9.2, "elapsed_time": "0:00:45", "remaining_time": "0:07:27", "throughput": 1992.94, "total_tokens": 90352} |
| {"current_steps": 235, "total_steps": 2500, "loss": 0.4457, "lr": 0.02808, "epoch": 1.88, "percentage": 9.4, "elapsed_time": "0:00:46", "remaining_time": "0:07:25", "throughput": 1994.78, "total_tokens": 92240} |
| {"current_steps": 240, "total_steps": 2500, "loss": 0.6444, "lr": 0.028679999999999997, "epoch": 1.92, "percentage": 9.6, "elapsed_time": "0:00:47", "remaining_time": "0:07:24", "throughput": 2001.69, "total_tokens": 94480} |
| {"current_steps": 245, "total_steps": 2500, "loss": 0.5899, "lr": 0.029279999999999997, "epoch": 1.96, "percentage": 9.8, "elapsed_time": "0:00:48", "remaining_time": "0:07:23", "throughput": 2007.77, "total_tokens": 96784} |
| {"current_steps": 250, "total_steps": 2500, "loss": 0.5007, "lr": 0.02988, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:49", "remaining_time": "0:07:21", "throughput": 2005.0, "total_tokens": 98400} |
| {"current_steps": 250, "total_steps": 2500, "eval_loss": 0.5213586688041687, "epoch": 2.0, "percentage": 10.0, "elapsed_time": "0:00:50", "remaining_time": "0:07:32", "throughput": 1956.76, "total_tokens": 98400} |
| {"current_steps": 255, "total_steps": 2500, "loss": 0.6757, "lr": 0.029999766054429722, "epoch": 2.04, "percentage": 10.2, "elapsed_time": "0:00:52", "remaining_time": "0:07:39", "throughput": 1929.67, "total_tokens": 100608} |
| {"current_steps": 260, "total_steps": 2500, "loss": 0.6589, "lr": 0.029998815663057243, "epoch": 2.08, "percentage": 10.4, "elapsed_time": "0:00:53", "remaining_time": "0:07:37", "throughput": 1932.24, "total_tokens": 102528} |
| {"current_steps": 265, "total_steps": 2500, "loss": 0.5708, "lr": 0.029997134250569515, "epoch": 2.12, "percentage": 10.6, "elapsed_time": "0:00:53", "remaining_time": "0:07:34", "throughput": 1936.13, "total_tokens": 104448} |
| {"current_steps": 270, "total_steps": 2500, "loss": 0.6053, "lr": 0.029994721898916218, "epoch": 2.16, "percentage": 10.8, "elapsed_time": "0:00:54", "remaining_time": "0:07:32", "throughput": 1939.88, "total_tokens": 106304} |
| {"current_steps": 275, "total_steps": 2500, "loss": 0.4722, "lr": 0.029991578725671965, "epoch": 2.2, "percentage": 11.0, "elapsed_time": "0:00:55", "remaining_time": "0:07:30", "throughput": 1942.93, "total_tokens": 108192} |
| {"current_steps": 280, "total_steps": 2500, "loss": 0.3821, "lr": 0.029987704884030585, "epoch": 2.24, "percentage": 11.2, "elapsed_time": "0:00:56", "remaining_time": "0:07:29", "throughput": 1948.17, "total_tokens": 110400} |
| {"current_steps": 285, "total_steps": 2500, "loss": 0.3932, "lr": 0.029983100562797654, "epoch": 2.2800000000000002, "percentage": 11.4, "elapsed_time": "0:00:57", "remaining_time": "0:07:26", "throughput": 1948.61, "total_tokens": 112064} |
| {"current_steps": 290, "total_steps": 2500, "loss": 0.4333, "lr": 0.0299777659863813, "epoch": 2.32, "percentage": 11.6, "elapsed_time": "0:00:58", "remaining_time": "0:07:25", "throughput": 1953.25, "total_tokens": 114240} |
| {"current_steps": 295, "total_steps": 2500, "loss": 0.4133, "lr": 0.02997170141478124, "epoch": 2.36, "percentage": 11.8, "elapsed_time": "0:00:59", "remaining_time": "0:07:23", "throughput": 1956.34, "total_tokens": 116192} |
| {"current_steps": 300, "total_steps": 2500, "loss": 0.6054, "lr": 0.029964907143576146, "epoch": 2.4, "percentage": 12.0, "elapsed_time": "0:01:00", "remaining_time": "0:07:21", "throughput": 1956.74, "total_tokens": 117856} |
| {"current_steps": 305, "total_steps": 2500, "loss": 0.4193, "lr": 0.029957383503909207, "epoch": 2.44, "percentage": 12.2, "elapsed_time": "0:01:01", "remaining_time": "0:07:19", "throughput": 1956.64, "total_tokens": 119552} |
| {"current_steps": 310, "total_steps": 2500, "loss": 0.3931, "lr": 0.02994913086247201, "epoch": 2.48, "percentage": 12.4, "elapsed_time": "0:01:01", "remaining_time": "0:07:17", "throughput": 1957.29, "total_tokens": 121312} |
| {"current_steps": 315, "total_steps": 2500, "loss": 0.4105, "lr": 0.02994014962148666, "epoch": 2.52, "percentage": 12.6, "elapsed_time": "0:01:02", "remaining_time": "0:07:16", "throughput": 1960.47, "total_tokens": 123360} |
| {"current_steps": 320, "total_steps": 2500, "loss": 0.3718, "lr": 0.02993044021868616, "epoch": 2.56, "percentage": 12.8, "elapsed_time": "0:01:03", "remaining_time": "0:07:15", "throughput": 1964.45, "total_tokens": 125472} |
| {"current_steps": 325, "total_steps": 2500, "loss": 0.4553, "lr": 0.02992000312729311, "epoch": 2.6, "percentage": 13.0, "elapsed_time": "0:01:04", "remaining_time": "0:07:13", "throughput": 1966.95, "total_tokens": 127360} |
| {"current_steps": 330, "total_steps": 2500, "loss": 0.4259, "lr": 0.02990883885599662, "epoch": 2.64, "percentage": 13.2, "elapsed_time": "0:01:05", "remaining_time": "0:07:12", "throughput": 1970.44, "total_tokens": 129504} |
| {"current_steps": 335, "total_steps": 2500, "loss": 0.433, "lr": 0.02989694794892753, "epoch": 2.68, "percentage": 13.4, "elapsed_time": "0:01:06", "remaining_time": "0:07:11", "throughput": 1975.22, "total_tokens": 131744} |
| {"current_steps": 340, "total_steps": 2500, "loss": 0.5377, "lr": 0.029884330985631865, "epoch": 2.7199999999999998, "percentage": 13.6, "elapsed_time": "0:01:07", "remaining_time": "0:07:09", "throughput": 1979.94, "total_tokens": 133920} |
| {"current_steps": 345, "total_steps": 2500, "loss": 0.3742, "lr": 0.029870988581042618, "epoch": 2.76, "percentage": 13.8, "elapsed_time": "0:01:08", "remaining_time": "0:07:07", "throughput": 1980.47, "total_tokens": 135680} |
| {"current_steps": 350, "total_steps": 2500, "loss": 0.5017, "lr": 0.029856921385449764, "epoch": 2.8, "percentage": 14.0, "elapsed_time": "0:01:09", "remaining_time": "0:07:06", "throughput": 1980.99, "total_tokens": 137440} |
| {"current_steps": 355, "total_steps": 2500, "loss": 0.4367, "lr": 0.02984213008446858, "epoch": 2.84, "percentage": 14.2, "elapsed_time": "0:01:10", "remaining_time": "0:07:05", "throughput": 1986.4, "total_tokens": 139808} |
| {"current_steps": 360, "total_steps": 2500, "loss": 0.5944, "lr": 0.0298266153990062, "epoch": 2.88, "percentage": 14.4, "elapsed_time": "0:01:11", "remaining_time": "0:07:03", "throughput": 1987.97, "total_tokens": 141600} |
| {"current_steps": 365, "total_steps": 2500, "loss": 0.4197, "lr": 0.029810378085226505, "epoch": 2.92, "percentage": 14.6, "elapsed_time": "0:01:12", "remaining_time": "0:07:01", "throughput": 1988.73, "total_tokens": 143328} |
| {"current_steps": 370, "total_steps": 2500, "loss": 0.4708, "lr": 0.02979341893451326, "epoch": 2.96, "percentage": 14.8, "elapsed_time": "0:01:13", "remaining_time": "0:07:00", "throughput": 1992.1, "total_tokens": 145504} |
| {"current_steps": 375, "total_steps": 2500, "loss": 0.4231, "lr": 0.029775738773431554, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:01:13", "remaining_time": "0:06:59", "throughput": 1996.77, "total_tokens": 147712} |
| {"current_steps": 375, "total_steps": 2500, "eval_loss": 0.3933355212211609, "epoch": 3.0, "percentage": 15.0, "elapsed_time": "0:01:15", "remaining_time": "0:07:06", "throughput": 1964.68, "total_tokens": 147712} |
| {"current_steps": 380, "total_steps": 2500, "loss": 0.4493, "lr": 0.02975733846368749, "epoch": 3.04, "percentage": 15.2, "elapsed_time": "0:01:17", "remaining_time": "0:07:10", "throughput": 1943.11, "total_tokens": 149952} |
| {"current_steps": 385, "total_steps": 2500, "loss": 0.4871, "lr": 0.029738218902086198, "epoch": 3.08, "percentage": 15.4, "elapsed_time": "0:01:18", "remaining_time": "0:07:08", "throughput": 1946.09, "total_tokens": 151968} |
| {"current_steps": 390, "total_steps": 2500, "loss": 0.3498, "lr": 0.02971838102048815, "epoch": 3.12, "percentage": 15.6, "elapsed_time": "0:01:18", "remaining_time": "0:07:07", "throughput": 1948.14, "total_tokens": 153792} |
| {"current_steps": 395, "total_steps": 2500, "loss": 0.4336, "lr": 0.0296978257857637, "epoch": 3.16, "percentage": 15.8, "elapsed_time": "0:01:19", "remaining_time": "0:07:05", "throughput": 1950.46, "total_tokens": 155808} |
| {"current_steps": 400, "total_steps": 2500, "loss": 0.5013, "lr": 0.029676554199745995, "epoch": 3.2, "percentage": 16.0, "elapsed_time": "0:01:20", "remaining_time": "0:07:03", "throughput": 1950.82, "total_tokens": 157536} |
| {"current_steps": 405, "total_steps": 2500, "loss": 0.5216, "lr": 0.02965456729918212, "epoch": 3.24, "percentage": 16.2, "elapsed_time": "0:01:21", "remaining_time": "0:07:02", "throughput": 1954.17, "total_tokens": 159712} |
| {"current_steps": 410, "total_steps": 2500, "loss": 0.4967, "lr": 0.0296318661556826, "epoch": 3.2800000000000002, "percentage": 16.4, "elapsed_time": "0:01:22", "remaining_time": "0:07:01", "throughput": 1958.42, "total_tokens": 162048} |
| {"current_steps": 415, "total_steps": 2500, "loss": 0.3734, "lr": 0.029608451875669125, "epoch": 3.32, "percentage": 16.6, "elapsed_time": "0:01:23", "remaining_time": "0:07:00", "throughput": 1961.04, "total_tokens": 164064} |
| {"current_steps": 420, "total_steps": 2500, "loss": 0.3588, "lr": 0.02958432560032069, "epoch": 3.36, "percentage": 16.8, "elapsed_time": "0:01:24", "remaining_time": "0:06:58", "throughput": 1962.94, "total_tokens": 166016} |
| {"current_steps": 425, "total_steps": 2500, "loss": 0.3583, "lr": 0.029559488505517907, "epoch": 3.4, "percentage": 17.0, "elapsed_time": "0:01:25", "remaining_time": "0:06:57", "throughput": 1964.47, "total_tokens": 167808} |
| {"current_steps": 430, "total_steps": 2500, "loss": 0.3988, "lr": 0.029533941801785743, "epoch": 3.44, "percentage": 17.2, "elapsed_time": "0:01:26", "remaining_time": "0:06:55", "throughput": 1965.76, "total_tokens": 169760} |
| {"current_steps": 435, "total_steps": 2500, "loss": 0.3622, "lr": 0.02950768673423449, "epoch": 3.48, "percentage": 17.4, "elapsed_time": "0:01:27", "remaining_time": "0:06:54", "throughput": 1967.96, "total_tokens": 171744} |
| {"current_steps": 440, "total_steps": 2500, "loss": 0.3738, "lr": 0.029480724582499108, "epoch": 3.52, "percentage": 17.6, "elapsed_time": "0:01:28", "remaining_time": "0:06:52", "throughput": 1969.11, "total_tokens": 173568} |
| {"current_steps": 445, "total_steps": 2500, "loss": 0.4709, "lr": 0.02945305666067683, "epoch": 3.56, "percentage": 17.8, "elapsed_time": "0:01:29", "remaining_time": "0:06:51", "throughput": 1971.1, "total_tokens": 175648} |
| {"current_steps": 450, "total_steps": 2500, "loss": 0.4137, "lr": 0.02942468431726313, "epoch": 3.6, "percentage": 18.0, "elapsed_time": "0:01:30", "remaining_time": "0:06:50", "throughput": 1973.68, "total_tokens": 177728} |
| {"current_steps": 455, "total_steps": 2500, "loss": 0.4186, "lr": 0.029395608935086002, "epoch": 3.64, "percentage": 18.2, "elapsed_time": "0:01:30", "remaining_time": "0:06:48", "throughput": 1974.41, "total_tokens": 179520} |
| {"current_steps": 460, "total_steps": 2500, "loss": 0.3622, "lr": 0.029365831931238547, "epoch": 3.68, "percentage": 18.4, "elapsed_time": "0:01:31", "remaining_time": "0:06:47", "throughput": 1976.14, "total_tokens": 181440} |
| {"current_steps": 465, "total_steps": 2500, "loss": 0.3629, "lr": 0.029335354757009942, "epoch": 3.7199999999999998, "percentage": 18.6, "elapsed_time": "0:01:32", "remaining_time": "0:06:45", "throughput": 1976.2, "total_tokens": 183104} |
| {"current_steps": 470, "total_steps": 2500, "loss": 0.3548, "lr": 0.02930417889781464, "epoch": 3.76, "percentage": 18.8, "elapsed_time": "0:01:33", "remaining_time": "0:06:44", "throughput": 1978.56, "total_tokens": 185120} |
| {"current_steps": 475, "total_steps": 2500, "loss": 0.373, "lr": 0.029272305873120043, "epoch": 3.8, "percentage": 19.0, "elapsed_time": "0:01:34", "remaining_time": "0:06:42", "throughput": 1978.36, "total_tokens": 186816} |
| {"current_steps": 480, "total_steps": 2500, "loss": 0.3954, "lr": 0.02923973723637242, "epoch": 3.84, "percentage": 19.2, "elapsed_time": "0:01:35", "remaining_time": "0:06:41", "throughput": 1981.65, "total_tokens": 189056} |
| {"current_steps": 485, "total_steps": 2500, "loss": 0.3703, "lr": 0.029206474574921162, "epoch": 3.88, "percentage": 19.4, "elapsed_time": "0:01:36", "remaining_time": "0:06:40", "throughput": 1984.57, "total_tokens": 191264} |
| {"current_steps": 490, "total_steps": 2500, "loss": 0.3829, "lr": 0.029172519509941472, "epoch": 3.92, "percentage": 19.6, "elapsed_time": "0:01:37", "remaining_time": "0:06:38", "throughput": 1984.92, "total_tokens": 193024} |
| {"current_steps": 495, "total_steps": 2500, "loss": 0.3821, "lr": 0.029137873696355312, "epoch": 3.96, "percentage": 19.8, "elapsed_time": "0:01:38", "remaining_time": "0:06:37", "throughput": 1985.28, "total_tokens": 194784} |
| {"current_steps": 500, "total_steps": 2500, "loss": 0.371, "lr": 0.029102538822750754, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:38", "remaining_time": "0:06:35", "throughput": 1984.71, "total_tokens": 196320} |
| {"current_steps": 500, "total_steps": 2500, "eval_loss": 0.36149507761001587, "epoch": 4.0, "percentage": 20.0, "elapsed_time": "0:01:40", "remaining_time": "0:06:40", "throughput": 1960.8, "total_tokens": 196320} |
| {"current_steps": 505, "total_steps": 2500, "loss": 0.3628, "lr": 0.02906651661129969, "epoch": 4.04, "percentage": 20.2, "elapsed_time": "0:01:41", "remaining_time": "0:06:42", "throughput": 1947.44, "total_tokens": 198304} |
| {"current_steps": 510, "total_steps": 2500, "loss": 0.3315, "lr": 0.029029808817673884, "epoch": 4.08, "percentage": 20.4, "elapsed_time": "0:01:42", "remaining_time": "0:06:40", "throughput": 1948.83, "total_tokens": 200224} |
| {"current_steps": 515, "total_steps": 2500, "loss": 0.3816, "lr": 0.028992417230959418, "epoch": 4.12, "percentage": 20.6, "elapsed_time": "0:01:43", "remaining_time": "0:06:39", "throughput": 1951.46, "total_tokens": 202336} |
| {"current_steps": 520, "total_steps": 2500, "loss": 0.5441, "lr": 0.02895434367356947, "epoch": 4.16, "percentage": 20.8, "elapsed_time": "0:01:44", "remaining_time": "0:06:38", "throughput": 1953.7, "total_tokens": 204320} |
| {"current_steps": 525, "total_steps": 2500, "loss": 0.4703, "lr": 0.02891559000115551, "epoch": 4.2, "percentage": 21.0, "elapsed_time": "0:01:45", "remaining_time": "0:06:36", "throughput": 1956.27, "total_tokens": 206368} |
| {"current_steps": 530, "total_steps": 2500, "loss": 0.4003, "lr": 0.02887615810251687, "epoch": 4.24, "percentage": 21.2, "elapsed_time": "0:01:46", "remaining_time": "0:06:35", "throughput": 1958.25, "total_tokens": 208352} |
| {"current_steps": 535, "total_steps": 2500, "loss": 0.5362, "lr": 0.02883604989950865, "epoch": 4.28, "percentage": 21.4, "elapsed_time": "0:01:47", "remaining_time": "0:06:34", "throughput": 1961.96, "total_tokens": 210656} |
| {"current_steps": 540, "total_steps": 2500, "loss": 0.3997, "lr": 0.028795267346948084, "epoch": 4.32, "percentage": 21.6, "elapsed_time": "0:01:48", "remaining_time": "0:06:33", "throughput": 1964.25, "total_tokens": 212704} |
| {"current_steps": 545, "total_steps": 2500, "loss": 0.4328, "lr": 0.028753812432519246, "epoch": 4.36, "percentage": 21.8, "elapsed_time": "0:01:49", "remaining_time": "0:06:31", "throughput": 1964.7, "total_tokens": 214464} |
| {"current_steps": 550, "total_steps": 2500, "loss": 0.3991, "lr": 0.028711687176676184, "epoch": 4.4, "percentage": 22.0, "elapsed_time": "0:01:50", "remaining_time": "0:06:30", "throughput": 1965.02, "total_tokens": 216160} |
| {"current_steps": 555, "total_steps": 2500, "loss": 0.3542, "lr": 0.02866889363254443, "epoch": 4.44, "percentage": 22.2, "elapsed_time": "0:01:50", "remaining_time": "0:06:28", "throughput": 1966.54, "total_tokens": 218176} |
| {"current_steps": 560, "total_steps": 2500, "loss": 0.3607, "lr": 0.028625433885820964, "epoch": 4.48, "percentage": 22.4, "elapsed_time": "0:01:51", "remaining_time": "0:06:27", "throughput": 1968.81, "total_tokens": 220288} |
| {"current_steps": 565, "total_steps": 2500, "loss": 0.3832, "lr": 0.028581310054672526, "epoch": 4.52, "percentage": 22.6, "elapsed_time": "0:01:52", "remaining_time": "0:06:26", "throughput": 1971.06, "total_tokens": 222400} |
| {"current_steps": 570, "total_steps": 2500, "loss": 0.3502, "lr": 0.028536524289632388, "epoch": 4.5600000000000005, "percentage": 22.8, "elapsed_time": "0:01:53", "remaining_time": "0:06:25", "throughput": 1973.22, "total_tokens": 224512} |
| {"current_steps": 575, "total_steps": 2500, "loss": 0.3473, "lr": 0.028491078773495566, "epoch": 4.6, "percentage": 23.0, "elapsed_time": "0:01:54", "remaining_time": "0:06:23", "throughput": 1973.3, "total_tokens": 226176} |
| {"current_steps": 580, "total_steps": 2500, "loss": 0.4022, "lr": 0.028444975721212393, "epoch": 4.64, "percentage": 23.2, "elapsed_time": "0:01:55", "remaining_time": "0:06:22", "throughput": 1974.46, "total_tokens": 228160} |
| {"current_steps": 585, "total_steps": 2500, "loss": 0.3993, "lr": 0.028398217379780606, "epoch": 4.68, "percentage": 23.4, "elapsed_time": "0:01:56", "remaining_time": "0:06:21", "throughput": 1977.12, "total_tokens": 230336} |
| {"current_steps": 590, "total_steps": 2500, "loss": 0.323, "lr": 0.028350806028135785, "epoch": 4.72, "percentage": 23.6, "elapsed_time": "0:01:57", "remaining_time": "0:06:20", "throughput": 1978.23, "total_tokens": 232320} |
| {"current_steps": 595, "total_steps": 2500, "loss": 0.3817, "lr": 0.028302743977040336, "epoch": 4.76, "percentage": 23.8, "elapsed_time": "0:01:58", "remaining_time": "0:06:18", "throughput": 1979.89, "total_tokens": 234368} |
| {"current_steps": 600, "total_steps": 2500, "loss": 0.4572, "lr": 0.02825403356897082, "epoch": 4.8, "percentage": 24.0, "elapsed_time": "0:01:59", "remaining_time": "0:06:17", "throughput": 1980.91, "total_tokens": 236288} |
| {"current_steps": 605, "total_steps": 2500, "loss": 0.4331, "lr": 0.028204677178003804, "epoch": 4.84, "percentage": 24.2, "elapsed_time": "0:02:00", "remaining_time": "0:06:16", "throughput": 1982.99, "total_tokens": 238400} |
| {"current_steps": 610, "total_steps": 2500, "loss": 0.3884, "lr": 0.02815467720970015, "epoch": 4.88, "percentage": 24.4, "elapsed_time": "0:02:01", "remaining_time": "0:06:15", "throughput": 1983.95, "total_tokens": 240320} |
| {"current_steps": 615, "total_steps": 2500, "loss": 0.3402, "lr": 0.02810403610098778, "epoch": 4.92, "percentage": 24.6, "elapsed_time": "0:02:01", "remaining_time": "0:06:13", "throughput": 1983.94, "total_tokens": 241984} |
| {"current_steps": 620, "total_steps": 2500, "loss": 0.3685, "lr": 0.028052756320042883, "epoch": 4.96, "percentage": 24.8, "elapsed_time": "0:02:02", "remaining_time": "0:06:12", "throughput": 1985.03, "total_tokens": 243808} |
| {"current_steps": 625, "total_steps": 2500, "loss": 0.3531, "lr": 0.028000840366169646, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:02:03", "remaining_time": "0:06:11", "throughput": 1985.02, "total_tokens": 245520} |
| {"current_steps": 625, "total_steps": 2500, "eval_loss": 0.4567475914955139, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:02:04", "remaining_time": "0:06:14", "throughput": 1965.85, "total_tokens": 245520} |
| {"current_steps": 630, "total_steps": 2500, "loss": 0.4107, "lr": 0.027948290769678405, "epoch": 5.04, "percentage": 25.2, "elapsed_time": "0:02:06", "remaining_time": "0:06:15", "throughput": 1953.59, "total_tokens": 247344} |
| {"current_steps": 635, "total_steps": 2500, "loss": 0.3786, "lr": 0.027895110091762366, "epoch": 5.08, "percentage": 25.4, "elapsed_time": "0:02:07", "remaining_time": "0:06:14", "throughput": 1955.2, "total_tokens": 249392} |
| {"current_steps": 640, "total_steps": 2500, "loss": 0.3097, "lr": 0.02784130092437274, "epoch": 5.12, "percentage": 25.6, "elapsed_time": "0:02:08", "remaining_time": "0:06:13", "throughput": 1956.45, "total_tokens": 251280} |
| {"current_steps": 645, "total_steps": 2500, "loss": 0.4259, "lr": 0.027786865890092438, "epoch": 5.16, "percentage": 25.8, "elapsed_time": "0:02:09", "remaining_time": "0:06:11", "throughput": 1956.87, "total_tokens": 253040} |
| {"current_steps": 650, "total_steps": 2500, "loss": 0.4008, "lr": 0.02773180764200823, "epoch": 5.2, "percentage": 26.0, "elapsed_time": "0:02:10", "remaining_time": "0:06:10", "throughput": 1957.07, "total_tokens": 254768} |
| {"current_steps": 655, "total_steps": 2500, "loss": 0.3608, "lr": 0.027676128863581447, "epoch": 5.24, "percentage": 26.2, "elapsed_time": "0:02:11", "remaining_time": "0:06:09", "throughput": 1959.89, "total_tokens": 257040} |
| {"current_steps": 660, "total_steps": 2500, "loss": 0.3699, "lr": 0.027619832268517205, "epoch": 5.28, "percentage": 26.4, "elapsed_time": "0:02:12", "remaining_time": "0:06:08", "throughput": 1960.94, "total_tokens": 258960} |
| {"current_steps": 665, "total_steps": 2500, "loss": 0.3644, "lr": 0.02756292060063213, "epoch": 5.32, "percentage": 26.6, "elapsed_time": "0:02:12", "remaining_time": "0:06:06", "throughput": 1961.74, "total_tokens": 260720} |
| {"current_steps": 670, "total_steps": 2500, "loss": 0.3283, "lr": 0.02750539663372061, "epoch": 5.36, "percentage": 26.8, "elapsed_time": "0:02:13", "remaining_time": "0:06:05", "throughput": 1964.4, "total_tokens": 262992} |
| {"current_steps": 675, "total_steps": 2500, "loss": 0.4639, "lr": 0.027447263171419645, "epoch": 5.4, "percentage": 27.0, "elapsed_time": "0:02:14", "remaining_time": "0:06:04", "throughput": 1965.41, "total_tokens": 264912} |
| {"current_steps": 680, "total_steps": 2500, "loss": 0.438, "lr": 0.027388523047072177, "epoch": 5.44, "percentage": 27.2, "elapsed_time": "0:02:15", "remaining_time": "0:06:03", "throughput": 1965.76, "total_tokens": 266672} |
| {"current_steps": 685, "total_steps": 2500, "loss": 0.3646, "lr": 0.027329179123588995, "epoch": 5.48, "percentage": 27.4, "elapsed_time": "0:02:16", "remaining_time": "0:06:01", "throughput": 1967.37, "total_tokens": 268688} |
| {"current_steps": 690, "total_steps": 2500, "loss": 0.5465, "lr": 0.027269234293309207, "epoch": 5.52, "percentage": 27.6, "elapsed_time": "0:02:17", "remaining_time": "0:06:00", "throughput": 1968.27, "total_tokens": 270544} |
| {"current_steps": 695, "total_steps": 2500, "loss": 0.4467, "lr": 0.027208691477859273, "epoch": 5.5600000000000005, "percentage": 27.8, "elapsed_time": "0:02:18", "remaining_time": "0:05:59", "throughput": 1970.37, "total_tokens": 272656} |
| {"current_steps": 700, "total_steps": 2500, "loss": 0.4854, "lr": 0.027147553628010596, "epoch": 5.6, "percentage": 28.0, "elapsed_time": "0:02:19", "remaining_time": "0:05:58", "throughput": 1972.15, "total_tokens": 274768} |
| {"current_steps": 705, "total_steps": 2500, "loss": 0.4681, "lr": 0.027085823723535737, "epoch": 5.64, "percentage": 28.2, "elapsed_time": "0:02:20", "remaining_time": "0:05:57", "throughput": 1974.15, "total_tokens": 276976} |
| {"current_steps": 710, "total_steps": 2500, "loss": 0.4246, "lr": 0.02702350477306315, "epoch": 5.68, "percentage": 28.4, "elapsed_time": "0:02:21", "remaining_time": "0:05:56", "throughput": 1976.27, "total_tokens": 279152} |
| {"current_steps": 715, "total_steps": 2500, "loss": 0.4257, "lr": 0.026960599813930553, "epoch": 5.72, "percentage": 28.6, "elapsed_time": "0:02:22", "remaining_time": "0:05:55", "throughput": 1978.6, "total_tokens": 281360} |
| {"current_steps": 720, "total_steps": 2500, "loss": 0.3381, "lr": 0.02689711191203692, "epoch": 5.76, "percentage": 28.8, "elapsed_time": "0:02:23", "remaining_time": "0:05:53", "throughput": 1979.21, "total_tokens": 283248} |
| {"current_steps": 725, "total_steps": 2500, "loss": 0.3955, "lr": 0.02683304416169301, "epoch": 5.8, "percentage": 29.0, "elapsed_time": "0:02:24", "remaining_time": "0:05:52", "throughput": 1980.24, "total_tokens": 285200} |
| {"current_steps": 730, "total_steps": 2500, "loss": 0.3434, "lr": 0.0267683996854706, "epoch": 5.84, "percentage": 29.2, "elapsed_time": "0:02:25", "remaining_time": "0:05:51", "throughput": 1982.78, "total_tokens": 287568} |
| {"current_steps": 735, "total_steps": 2500, "loss": 0.3918, "lr": 0.026703181634050253, "epoch": 5.88, "percentage": 29.4, "elapsed_time": "0:02:25", "remaining_time": "0:05:50", "throughput": 1984.25, "total_tokens": 289584} |
| {"current_steps": 740, "total_steps": 2500, "loss": 0.3845, "lr": 0.026637393186067792, "epoch": 5.92, "percentage": 29.6, "elapsed_time": "0:02:26", "remaining_time": "0:05:49", "throughput": 1984.28, "total_tokens": 291312} |
| {"current_steps": 745, "total_steps": 2500, "loss": 0.3726, "lr": 0.026571037547959354, "epoch": 5.96, "percentage": 29.8, "elapsed_time": "0:02:27", "remaining_time": "0:05:47", "throughput": 1985.9, "total_tokens": 293360} |
| {"current_steps": 750, "total_steps": 2500, "loss": 0.3797, "lr": 0.026504117953805126, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:02:28", "remaining_time": "0:05:46", "throughput": 1985.76, "total_tokens": 294976} |
| {"current_steps": 750, "total_steps": 2500, "eval_loss": 0.4272903800010681, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:02:29", "remaining_time": "0:05:49", "throughput": 1969.66, "total_tokens": 294976} |
| {"current_steps": 755, "total_steps": 2500, "loss": 0.3748, "lr": 0.02643663766517172, "epoch": 6.04, "percentage": 30.2, "elapsed_time": "0:02:31", "remaining_time": "0:05:50", "throughput": 1960.43, "total_tokens": 297024} |
| {"current_steps": 760, "total_steps": 2500, "loss": 0.405, "lr": 0.0263685999709532, "epoch": 6.08, "percentage": 30.4, "elapsed_time": "0:02:32", "remaining_time": "0:05:48", "throughput": 1961.51, "total_tokens": 298976} |
| {"current_steps": 765, "total_steps": 2500, "loss": 0.3635, "lr": 0.026300008187210802, "epoch": 6.12, "percentage": 30.6, "elapsed_time": "0:02:33", "remaining_time": "0:05:47", "throughput": 1962.53, "total_tokens": 300928} |
| {"current_steps": 770, "total_steps": 2500, "loss": 0.4392, "lr": 0.02623086565701129, "epoch": 6.16, "percentage": 30.8, "elapsed_time": "0:02:34", "remaining_time": "0:05:46", "throughput": 1962.62, "total_tokens": 302592} |
| {"current_steps": 775, "total_steps": 2500, "loss": 0.3332, "lr": 0.026161175750264047, "epoch": 6.2, "percentage": 31.0, "elapsed_time": "0:02:35", "remaining_time": "0:05:45", "throughput": 1964.31, "total_tokens": 304704} |
| {"current_steps": 780, "total_steps": 2500, "loss": 0.4074, "lr": 0.026090941863556805, "epoch": 6.24, "percentage": 31.2, "elapsed_time": "0:02:36", "remaining_time": "0:05:44", "throughput": 1967.8, "total_tokens": 307232} |
| {"current_steps": 785, "total_steps": 2500, "loss": 0.3029, "lr": 0.026020167419990123, "epoch": 6.28, "percentage": 31.4, "elapsed_time": "0:02:37", "remaining_time": "0:05:43", "throughput": 1968.29, "total_tokens": 309024} |
| {"current_steps": 790, "total_steps": 2500, "loss": 0.41, "lr": 0.025948855869010523, "epoch": 6.32, "percentage": 31.6, "elapsed_time": "0:02:37", "remaining_time": "0:05:41", "throughput": 1969.85, "total_tokens": 311072} |
| {"current_steps": 795, "total_steps": 2500, "loss": 0.3748, "lr": 0.0258770106862424, "epoch": 6.36, "percentage": 31.8, "elapsed_time": "0:02:38", "remaining_time": "0:05:40", "throughput": 1971.24, "total_tokens": 313152} |
| {"current_steps": 800, "total_steps": 2500, "loss": 0.3625, "lr": 0.025804635373318605, "epoch": 6.4, "percentage": 32.0, "elapsed_time": "0:02:39", "remaining_time": "0:05:39", "throughput": 1972.41, "total_tokens": 315136} |
| {"current_steps": 805, "total_steps": 2500, "loss": 0.379, "lr": 0.025731733457709776, "epoch": 6.44, "percentage": 32.2, "elapsed_time": "0:02:40", "remaining_time": "0:05:38", "throughput": 1972.3, "total_tokens": 316768} |
| {"current_steps": 810, "total_steps": 2500, "loss": 0.3968, "lr": 0.02565830849255244, "epoch": 6.48, "percentage": 32.4, "elapsed_time": "0:02:41", "remaining_time": "0:05:36", "throughput": 1973.21, "total_tokens": 318592} |
| {"current_steps": 815, "total_steps": 2500, "loss": 0.3851, "lr": 0.025584364056475812, "epoch": 6.52, "percentage": 32.6, "elapsed_time": "0:02:42", "remaining_time": "0:05:35", "throughput": 1974.5, "total_tokens": 320608} |
| {"current_steps": 820, "total_steps": 2500, "loss": 0.3533, "lr": 0.025509903753427376, "epoch": 6.5600000000000005, "percentage": 32.8, "elapsed_time": "0:02:43", "remaining_time": "0:05:34", "throughput": 1976.77, "total_tokens": 322912} |
| {"current_steps": 825, "total_steps": 2500, "loss": 0.3553, "lr": 0.02543493121249726, "epoch": 6.6, "percentage": 33.0, "elapsed_time": "0:02:44", "remaining_time": "0:05:33", "throughput": 1979.22, "total_tokens": 325248} |
| {"current_steps": 830, "total_steps": 2500, "loss": 0.3626, "lr": 0.02535945008774134, "epoch": 6.64, "percentage": 33.2, "elapsed_time": "0:02:45", "remaining_time": "0:05:32", "throughput": 1979.99, "total_tokens": 327296} |
| {"current_steps": 835, "total_steps": 2500, "loss": 0.2508, "lr": 0.025283464058003153, "epoch": 6.68, "percentage": 33.4, "elapsed_time": "0:02:46", "remaining_time": "0:05:31", "throughput": 1980.88, "total_tokens": 329280} |
| {"current_steps": 840, "total_steps": 2500, "loss": 0.3576, "lr": 0.02520697682673458, "epoch": 6.72, "percentage": 33.6, "elapsed_time": "0:02:47", "remaining_time": "0:05:30", "throughput": 1982.24, "total_tokens": 331392} |
| {"current_steps": 845, "total_steps": 2500, "loss": 0.4182, "lr": 0.025129992121815364, "epoch": 6.76, "percentage": 33.8, "elapsed_time": "0:02:48", "remaining_time": "0:05:29", "throughput": 1982.88, "total_tokens": 333184} |
| {"current_steps": 850, "total_steps": 2500, "loss": 0.3747, "lr": 0.025052513695371426, "epoch": 6.8, "percentage": 34.0, "elapsed_time": "0:02:48", "remaining_time": "0:05:28", "throughput": 1983.78, "total_tokens": 335200} |
| {"current_steps": 855, "total_steps": 2500, "loss": 0.3889, "lr": 0.02497454532359195, "epoch": 6.84, "percentage": 34.2, "elapsed_time": "0:02:49", "remaining_time": "0:05:26", "throughput": 1984.64, "total_tokens": 337088} |
| {"current_steps": 860, "total_steps": 2500, "loss": 0.3811, "lr": 0.024896090806545392, "epoch": 6.88, "percentage": 34.4, "elapsed_time": "0:02:50", "remaining_time": "0:05:25", "throughput": 1985.02, "total_tokens": 338944} |
| {"current_steps": 865, "total_steps": 2500, "loss": 0.3808, "lr": 0.02481715396799422, "epoch": 6.92, "percentage": 34.6, "elapsed_time": "0:02:51", "remaining_time": "0:05:24", "throughput": 1985.39, "total_tokens": 340800} |
| {"current_steps": 870, "total_steps": 2500, "loss": 0.3825, "lr": 0.02473773865520859, "epoch": 6.96, "percentage": 34.8, "elapsed_time": "0:02:52", "remaining_time": "0:05:23", "throughput": 1986.1, "total_tokens": 342720} |
| {"current_steps": 875, "total_steps": 2500, "loss": 0.3769, "lr": 0.024657848738778805, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:02:53", "remaining_time": "0:05:22", "throughput": 1985.79, "total_tokens": 344320} |
| {"current_steps": 875, "total_steps": 2500, "eval_loss": 0.38088172674179077, "epoch": 7.0, "percentage": 35.0, "elapsed_time": "0:02:54", "remaining_time": "0:05:24", "throughput": 1972.05, "total_tokens": 344320} |
| {"current_steps": 880, "total_steps": 2500, "loss": 0.2859, "lr": 0.024577488112426684, "epoch": 7.04, "percentage": 35.2, "elapsed_time": "0:02:56", "remaining_time": "0:05:24", "throughput": 1963.23, "total_tokens": 346144} |
| {"current_steps": 885, "total_steps": 2500, "loss": 0.3787, "lr": 0.024496660692815784, "epoch": 7.08, "percentage": 35.4, "elapsed_time": "0:02:57", "remaining_time": "0:05:23", "throughput": 1965.23, "total_tokens": 348480} |
| {"current_steps": 890, "total_steps": 2500, "loss": 0.3581, "lr": 0.024415370419360507, "epoch": 7.12, "percentage": 35.6, "elapsed_time": "0:02:58", "remaining_time": "0:05:22", "throughput": 1965.99, "total_tokens": 350464} |
| {"current_steps": 895, "total_steps": 2500, "loss": 0.4492, "lr": 0.024333621254034096, "epoch": 7.16, "percentage": 35.8, "elapsed_time": "0:02:59", "remaining_time": "0:05:21", "throughput": 1966.71, "total_tokens": 352320} |
| {"current_steps": 900, "total_steps": 2500, "loss": 0.3571, "lr": 0.024251417181175537, "epoch": 7.2, "percentage": 36.0, "elapsed_time": "0:02:59", "remaining_time": "0:05:19", "throughput": 1966.64, "total_tokens": 353952} |
| {"current_steps": 905, "total_steps": 2500, "loss": 0.4173, "lr": 0.024168762207295375, "epoch": 7.24, "percentage": 36.2, "elapsed_time": "0:03:00", "remaining_time": "0:05:18", "throughput": 1967.57, "total_tokens": 355968} |
| {"current_steps": 910, "total_steps": 2500, "loss": 0.3965, "lr": 0.02408566036088042, "epoch": 7.28, "percentage": 36.4, "elapsed_time": "0:03:01", "remaining_time": "0:05:17", "throughput": 1967.97, "total_tokens": 357824} |
| {"current_steps": 915, "total_steps": 2500, "loss": 0.3916, "lr": 0.024002115692197432, "epoch": 7.32, "percentage": 36.6, "elapsed_time": "0:03:02", "remaining_time": "0:05:16", "throughput": 1970.06, "total_tokens": 360128} |
| {"current_steps": 920, "total_steps": 2500, "loss": 0.3612, "lr": 0.0239181322730957, "epoch": 7.36, "percentage": 36.8, "elapsed_time": "0:03:03", "remaining_time": "0:05:15", "throughput": 1971.31, "total_tokens": 362272} |
| {"current_steps": 925, "total_steps": 2500, "loss": 0.3478, "lr": 0.02383371419680858, "epoch": 7.4, "percentage": 37.0, "elapsed_time": "0:03:04", "remaining_time": "0:05:14", "throughput": 1972.5, "total_tokens": 364384} |
| {"current_steps": 930, "total_steps": 2500, "loss": 0.356, "lr": 0.02374886557775402, "epoch": 7.44, "percentage": 37.2, "elapsed_time": "0:03:05", "remaining_time": "0:05:13", "throughput": 1973.71, "total_tokens": 366464} |
| {"current_steps": 935, "total_steps": 2500, "loss": 0.3398, "lr": 0.02366359055133401, "epoch": 7.48, "percentage": 37.4, "elapsed_time": "0:03:06", "remaining_time": "0:05:12", "throughput": 1973.91, "total_tokens": 368160} |
| {"current_steps": 940, "total_steps": 2500, "loss": 0.3823, "lr": 0.023577893273733035, "epoch": 7.52, "percentage": 37.6, "elapsed_time": "0:03:07", "remaining_time": "0:05:11", "throughput": 1976.71, "total_tokens": 370688} |
| {"current_steps": 945, "total_steps": 2500, "loss": 0.3708, "lr": 0.023491777921715497, "epoch": 7.5600000000000005, "percentage": 37.8, "elapsed_time": "0:03:08", "remaining_time": "0:05:10", "throughput": 1977.5, "total_tokens": 372576} |
| {"current_steps": 950, "total_steps": 2500, "loss": 0.4326, "lr": 0.023405248692422168, "epoch": 7.6, "percentage": 38.0, "elapsed_time": "0:03:09", "remaining_time": "0:05:08", "throughput": 1978.3, "total_tokens": 374464} |
| {"current_steps": 955, "total_steps": 2500, "loss": 0.3874, "lr": 0.02331830980316561, "epoch": 7.64, "percentage": 38.2, "elapsed_time": "0:03:10", "remaining_time": "0:05:07", "throughput": 1979.58, "total_tokens": 376576} |
| {"current_steps": 960, "total_steps": 2500, "loss": 0.3745, "lr": 0.02323096549122463, "epoch": 7.68, "percentage": 38.4, "elapsed_time": "0:03:11", "remaining_time": "0:05:06", "throughput": 1980.36, "total_tokens": 378464} |
| {"current_steps": 965, "total_steps": 2500, "loss": 0.4957, "lr": 0.023143220013637764, "epoch": 7.72, "percentage": 38.6, "elapsed_time": "0:03:11", "remaining_time": "0:05:05", "throughput": 1980.82, "total_tokens": 380224} |
| {"current_steps": 970, "total_steps": 2500, "loss": 0.3984, "lr": 0.023055077646995804, "epoch": 7.76, "percentage": 38.8, "elapsed_time": "0:03:12", "remaining_time": "0:05:04", "throughput": 1982.58, "total_tokens": 382560} |
| {"current_steps": 975, "total_steps": 2500, "loss": 0.3417, "lr": 0.02296654268723335, "epoch": 7.8, "percentage": 39.0, "elapsed_time": "0:03:13", "remaining_time": "0:05:03", "throughput": 1983.82, "total_tokens": 384672} |
| {"current_steps": 980, "total_steps": 2500, "loss": 0.3955, "lr": 0.022877619449419438, "epoch": 7.84, "percentage": 39.2, "elapsed_time": "0:03:14", "remaining_time": "0:05:02", "throughput": 1984.51, "total_tokens": 386560} |
| {"current_steps": 985, "total_steps": 2500, "loss": 0.3893, "lr": 0.022788312267547222, "epoch": 7.88, "percentage": 39.4, "elapsed_time": "0:03:15", "remaining_time": "0:05:00", "throughput": 1984.61, "total_tokens": 388256} |
| {"current_steps": 990, "total_steps": 2500, "loss": 0.3035, "lr": 0.022698625494322753, "epoch": 7.92, "percentage": 39.6, "elapsed_time": "0:03:16", "remaining_time": "0:04:59", "throughput": 1984.43, "total_tokens": 389888} |
| {"current_steps": 995, "total_steps": 2500, "loss": 0.5096, "lr": 0.022608563500952836, "epoch": 7.96, "percentage": 39.8, "elapsed_time": "0:03:17", "remaining_time": "0:04:58", "throughput": 1985.51, "total_tokens": 391968} |
| {"current_steps": 1000, "total_steps": 2500, "loss": 0.3729, "lr": 0.022518130676931963, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:03:18", "remaining_time": "0:04:57", "throughput": 1985.8, "total_tokens": 393840} |
| {"current_steps": 1000, "total_steps": 2500, "eval_loss": 0.3906479477882385, "epoch": 8.0, "percentage": 40.0, "elapsed_time": "0:03:19", "remaining_time": "0:04:59", "throughput": 1973.77, "total_tokens": 393840} |
| {"current_steps": 1005, "total_steps": 2500, "loss": 0.4498, "lr": 0.022427331429828397, "epoch": 8.04, "percentage": 40.2, "elapsed_time": "0:03:21", "remaining_time": "0:04:59", "throughput": 1966.86, "total_tokens": 395920} |
| {"current_steps": 1010, "total_steps": 2500, "loss": 0.4517, "lr": 0.022336170185069344, "epoch": 8.08, "percentage": 40.4, "elapsed_time": "0:03:22", "remaining_time": "0:04:58", "throughput": 1967.19, "total_tokens": 397648} |
| {"current_steps": 1015, "total_steps": 2500, "loss": 0.4058, "lr": 0.022244651385725258, "epoch": 8.12, "percentage": 40.6, "elapsed_time": "0:03:22", "remaining_time": "0:04:56", "throughput": 1967.0, "total_tokens": 399280} |
| {"current_steps": 1020, "total_steps": 2500, "loss": 0.3791, "lr": 0.022152779492293318, "epoch": 8.16, "percentage": 40.8, "elapsed_time": "0:03:23", "remaining_time": "0:04:55", "throughput": 1969.28, "total_tokens": 401680} |
| {"current_steps": 1025, "total_steps": 2500, "loss": 0.3628, "lr": 0.02206055898247999, "epoch": 8.2, "percentage": 41.0, "elapsed_time": "0:03:24", "remaining_time": "0:04:54", "throughput": 1969.18, "total_tokens": 403312} |
| {"current_steps": 1030, "total_steps": 2500, "loss": 0.3646, "lr": 0.021967994350982817, "epoch": 8.24, "percentage": 41.2, "elapsed_time": "0:03:25", "remaining_time": "0:04:53", "throughput": 1969.96, "total_tokens": 405264} |
| {"current_steps": 1035, "total_steps": 2500, "loss": 0.3564, "lr": 0.021875090109271345, "epoch": 8.28, "percentage": 41.4, "elapsed_time": "0:03:26", "remaining_time": "0:04:52", "throughput": 1970.46, "total_tokens": 407088} |
| {"current_steps": 1040, "total_steps": 2500, "loss": 0.3978, "lr": 0.02178185078536726, "epoch": 8.32, "percentage": 41.6, "elapsed_time": "0:03:27", "remaining_time": "0:04:51", "throughput": 1971.21, "total_tokens": 408976} |
| {"current_steps": 1045, "total_steps": 2500, "loss": 0.3665, "lr": 0.02168828092362366, "epoch": 8.36, "percentage": 41.8, "elapsed_time": "0:03:28", "remaining_time": "0:04:50", "throughput": 1972.85, "total_tokens": 411184} |
| {"current_steps": 1050, "total_steps": 2500, "loss": 0.3755, "lr": 0.021594385084503596, "epoch": 8.4, "percentage": 42.0, "elapsed_time": "0:03:29", "remaining_time": "0:04:49", "throughput": 1974.22, "total_tokens": 413392} |
| {"current_steps": 1055, "total_steps": 2500, "loss": 0.3469, "lr": 0.02150016784435781, "epoch": 8.44, "percentage": 42.2, "elapsed_time": "0:03:30", "remaining_time": "0:04:48", "throughput": 1974.73, "total_tokens": 415344} |
| {"current_steps": 1060, "total_steps": 2500, "loss": 0.3571, "lr": 0.021405633795201648, "epoch": 8.48, "percentage": 42.4, "elapsed_time": "0:03:31", "remaining_time": "0:04:46", "throughput": 1975.6, "total_tokens": 417328} |
| {"current_steps": 1065, "total_steps": 2500, "loss": 0.3823, "lr": 0.02131078754449132, "epoch": 8.52, "percentage": 42.6, "elapsed_time": "0:03:32", "remaining_time": "0:04:45", "throughput": 1976.24, "total_tokens": 419280} |
| {"current_steps": 1070, "total_steps": 2500, "loss": 0.3575, "lr": 0.021215633714899262, "epoch": 8.56, "percentage": 42.8, "elapsed_time": "0:03:33", "remaining_time": "0:04:44", "throughput": 1977.85, "total_tokens": 421552} |
| {"current_steps": 1075, "total_steps": 2500, "loss": 0.3409, "lr": 0.02112017694408889, "epoch": 8.6, "percentage": 43.0, "elapsed_time": "0:03:33", "remaining_time": "0:04:43", "throughput": 1977.85, "total_tokens": 423152} |
| {"current_steps": 1080, "total_steps": 2500, "loss": 0.3388, "lr": 0.02102442188448855, "epoch": 8.64, "percentage": 43.2, "elapsed_time": "0:03:34", "remaining_time": "0:04:42", "throughput": 1979.03, "total_tokens": 425328} |
| {"current_steps": 1085, "total_steps": 2500, "loss": 0.3537, "lr": 0.02092837320306475, "epoch": 8.68, "percentage": 43.4, "elapsed_time": "0:03:35", "remaining_time": "0:04:41", "throughput": 1980.03, "total_tokens": 427472} |
| {"current_steps": 1090, "total_steps": 2500, "loss": 0.381, "lr": 0.020832035581094713, "epoch": 8.72, "percentage": 43.6, "elapsed_time": "0:03:36", "remaining_time": "0:04:40", "throughput": 1981.18, "total_tokens": 429584} |
| {"current_steps": 1095, "total_steps": 2500, "loss": 0.3491, "lr": 0.020735413713938236, "epoch": 8.76, "percentage": 43.8, "elapsed_time": "0:03:37", "remaining_time": "0:04:39", "throughput": 1982.22, "total_tokens": 431728} |
| {"current_steps": 1100, "total_steps": 2500, "loss": 0.3556, "lr": 0.0206385123108088, "epoch": 8.8, "percentage": 44.0, "elapsed_time": "0:03:38", "remaining_time": "0:04:38", "throughput": 1982.8, "total_tokens": 433712} |
| {"current_steps": 1105, "total_steps": 2500, "loss": 0.3446, "lr": 0.02054133609454409, "epoch": 8.84, "percentage": 44.2, "elapsed_time": "0:03:39", "remaining_time": "0:04:37", "throughput": 1983.44, "total_tokens": 435600} |
| {"current_steps": 1110, "total_steps": 2500, "loss": 0.3657, "lr": 0.020443889801375792, "epoch": 8.88, "percentage": 44.4, "elapsed_time": "0:03:40", "remaining_time": "0:04:36", "throughput": 1984.38, "total_tokens": 437680} |
| {"current_steps": 1115, "total_steps": 2500, "loss": 0.3817, "lr": 0.020346178180698758, "epoch": 8.92, "percentage": 44.6, "elapsed_time": "0:03:41", "remaining_time": "0:04:35", "throughput": 1984.89, "total_tokens": 439600} |
| {"current_steps": 1120, "total_steps": 2500, "loss": 0.3395, "lr": 0.020248205994839532, "epoch": 8.96, "percentage": 44.8, "elapsed_time": "0:03:42", "remaining_time": "0:04:33", "throughput": 1985.11, "total_tokens": 441392} |
| {"current_steps": 1125, "total_steps": 2500, "loss": 0.3446, "lr": 0.02014997801882423, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:03:43", "remaining_time": "0:04:33", "throughput": 1983.29, "total_tokens": 443168} |
| {"current_steps": 1125, "total_steps": 2500, "eval_loss": 0.35526347160339355, "epoch": 9.0, "percentage": 45.0, "elapsed_time": "0:03:44", "remaining_time": "0:04:34", "throughput": 1972.53, "total_tokens": 443168} |
| {"current_steps": 1130, "total_steps": 2500, "loss": 0.3573, "lr": 0.020051499040145825, "epoch": 9.04, "percentage": 45.2, "elapsed_time": "0:03:46", "remaining_time": "0:04:34", "throughput": 1966.17, "total_tokens": 445248} |
| {"current_steps": 1135, "total_steps": 2500, "loss": 0.341, "lr": 0.019952773858530802, "epoch": 9.08, "percentage": 45.4, "elapsed_time": "0:03:47", "remaining_time": "0:04:33", "throughput": 1966.87, "total_tokens": 447200} |
| {"current_steps": 1140, "total_steps": 2500, "loss": 0.3652, "lr": 0.019853807285705242, "epoch": 9.12, "percentage": 45.6, "elapsed_time": "0:03:48", "remaining_time": "0:04:32", "throughput": 1967.16, "total_tokens": 448992} |
| {"current_steps": 1145, "total_steps": 2500, "loss": 0.3719, "lr": 0.019754604145160278, "epoch": 9.16, "percentage": 45.8, "elapsed_time": "0:03:49", "remaining_time": "0:04:31", "throughput": 1967.85, "total_tokens": 450944} |
| {"current_steps": 1150, "total_steps": 2500, "loss": 0.3531, "lr": 0.01965516927191702, "epoch": 9.2, "percentage": 46.0, "elapsed_time": "0:03:50", "remaining_time": "0:04:30", "throughput": 1968.29, "total_tokens": 452832} |
| {"current_steps": 1155, "total_steps": 2500, "loss": 0.3589, "lr": 0.019555507512290914, "epoch": 9.24, "percentage": 46.2, "elapsed_time": "0:03:50", "remaining_time": "0:04:28", "throughput": 1968.66, "total_tokens": 454752} |
| {"current_steps": 1160, "total_steps": 2500, "loss": 0.3404, "lr": 0.019455623723655523, "epoch": 9.28, "percentage": 46.4, "elapsed_time": "0:03:51", "remaining_time": "0:04:27", "throughput": 1969.33, "total_tokens": 456640} |
| {"current_steps": 1165, "total_steps": 2500, "loss": 0.4698, "lr": 0.01935552277420579, "epoch": 9.32, "percentage": 46.6, "elapsed_time": "0:03:52", "remaining_time": "0:04:26", "throughput": 1969.64, "total_tokens": 458432} |
| {"current_steps": 1170, "total_steps": 2500, "loss": 0.3506, "lr": 0.019255209542720766, "epoch": 9.36, "percentage": 46.8, "elapsed_time": "0:03:53", "remaining_time": "0:04:25", "throughput": 1969.96, "total_tokens": 460224} |
| {"current_steps": 1175, "total_steps": 2500, "loss": 0.4111, "lr": 0.019154688918325835, "epoch": 9.4, "percentage": 47.0, "elapsed_time": "0:03:54", "remaining_time": "0:04:24", "throughput": 1970.95, "total_tokens": 462304} |
| {"current_steps": 1180, "total_steps": 2500, "loss": 0.3975, "lr": 0.019053965800254417, "epoch": 9.44, "percentage": 47.2, "elapsed_time": "0:03:55", "remaining_time": "0:04:23", "throughput": 1972.17, "total_tokens": 464448} |
| {"current_steps": 1185, "total_steps": 2500, "loss": 0.4151, "lr": 0.01895304509760919, "epoch": 9.48, "percentage": 47.4, "elapsed_time": "0:03:56", "remaining_time": "0:04:22", "throughput": 1973.27, "total_tokens": 466624} |
| {"current_steps": 1190, "total_steps": 2500, "loss": 0.3907, "lr": 0.018851931729122817, "epoch": 9.52, "percentage": 47.6, "elapsed_time": "0:03:57", "remaining_time": "0:04:21", "throughput": 1974.87, "total_tokens": 468928} |
| {"current_steps": 1195, "total_steps": 2500, "loss": 0.4194, "lr": 0.01875063062291823, "epoch": 9.56, "percentage": 47.8, "elapsed_time": "0:03:58", "remaining_time": "0:04:20", "throughput": 1975.02, "total_tokens": 470560} |
| {"current_steps": 1200, "total_steps": 2500, "loss": 0.3334, "lr": 0.01864914671626844, "epoch": 9.6, "percentage": 48.0, "elapsed_time": "0:03:59", "remaining_time": "0:04:19", "throughput": 1975.31, "total_tokens": 472352} |
| {"current_steps": 1205, "total_steps": 2500, "loss": 0.3885, "lr": 0.01854748495535587, "epoch": 9.64, "percentage": 48.2, "elapsed_time": "0:03:59", "remaining_time": "0:04:17", "throughput": 1975.35, "total_tokens": 474016} |
| {"current_steps": 1210, "total_steps": 2500, "loss": 0.3544, "lr": 0.018445650295031334, "epoch": 9.68, "percentage": 48.4, "elapsed_time": "0:04:00", "remaining_time": "0:04:16", "throughput": 1976.51, "total_tokens": 476160} |
| {"current_steps": 1215, "total_steps": 2500, "loss": 0.368, "lr": 0.018343647698572507, "epoch": 9.72, "percentage": 48.6, "elapsed_time": "0:04:01", "remaining_time": "0:04:15", "throughput": 1978.16, "total_tokens": 478496} |
| {"current_steps": 1220, "total_steps": 2500, "loss": 0.3938, "lr": 0.018241482137442033, "epoch": 9.76, "percentage": 48.8, "elapsed_time": "0:04:02", "remaining_time": "0:04:14", "throughput": 1979.11, "total_tokens": 480544} |
| {"current_steps": 1225, "total_steps": 2500, "loss": 0.3316, "lr": 0.018139158591045226, "epoch": 9.8, "percentage": 49.0, "elapsed_time": "0:04:03", "remaining_time": "0:04:13", "throughput": 1980.58, "total_tokens": 482848} |
| {"current_steps": 1230, "total_steps": 2500, "loss": 0.5302, "lr": 0.01803668204648739, "epoch": 9.84, "percentage": 49.2, "elapsed_time": "0:04:04", "remaining_time": "0:04:12", "throughput": 1981.13, "total_tokens": 484672} |
| {"current_steps": 1235, "total_steps": 2500, "loss": 0.3523, "lr": 0.017934057498330724, "epoch": 9.88, "percentage": 49.4, "elapsed_time": "0:04:05", "remaining_time": "0:04:11", "throughput": 1981.15, "total_tokens": 486400} |
| {"current_steps": 1240, "total_steps": 2500, "loss": 0.3899, "lr": 0.01783128994835093, "epoch": 9.92, "percentage": 49.6, "elapsed_time": "0:04:06", "remaining_time": "0:04:10", "throughput": 1981.89, "total_tokens": 488448} |
| {"current_steps": 1245, "total_steps": 2500, "loss": 0.3537, "lr": 0.017728384405293403, "epoch": 9.96, "percentage": 49.8, "elapsed_time": "0:04:07", "remaining_time": "0:04:09", "throughput": 1982.16, "total_tokens": 490304} |
| {"current_steps": 1250, "total_steps": 2500, "loss": 0.3702, "lr": 0.01762534588462914, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:04:08", "remaining_time": "0:04:08", "throughput": 1983.0, "total_tokens": 492304} |
| {"current_steps": 1250, "total_steps": 2500, "eval_loss": 0.3585449159145355, "epoch": 10.0, "percentage": 50.0, "elapsed_time": "0:04:09", "remaining_time": "0:04:09", "throughput": 1972.12, "total_tokens": 492304} |
| {"current_steps": 1255, "total_steps": 2500, "loss": 0.3596, "lr": 0.017522179408310273, "epoch": 10.04, "percentage": 50.2, "elapsed_time": "0:04:11", "remaining_time": "0:04:09", "throughput": 1966.44, "total_tokens": 494384} |
| {"current_steps": 1260, "total_steps": 2500, "loss": 0.3653, "lr": 0.01741889000452531, "epoch": 10.08, "percentage": 50.4, "elapsed_time": "0:04:12", "remaining_time": "0:04:08", "throughput": 1967.32, "total_tokens": 496528} |
| {"current_steps": 1265, "total_steps": 2500, "loss": 0.3874, "lr": 0.017315482707454068, "epoch": 10.12, "percentage": 50.6, "elapsed_time": "0:04:13", "remaining_time": "0:04:07", "throughput": 1967.47, "total_tokens": 498224} |
| {"current_steps": 1270, "total_steps": 2500, "loss": 0.2909, "lr": 0.017211962557022325, "epoch": 10.16, "percentage": 50.8, "elapsed_time": "0:04:14", "remaining_time": "0:04:06", "throughput": 1968.42, "total_tokens": 500336} |
| {"current_steps": 1275, "total_steps": 2500, "loss": 0.4151, "lr": 0.017108334598656162, "epoch": 10.2, "percentage": 51.0, "elapsed_time": "0:04:15", "remaining_time": "0:04:05", "throughput": 1969.41, "total_tokens": 502384} |
| {"current_steps": 1280, "total_steps": 2500, "loss": 0.3641, "lr": 0.01700460388303609, "epoch": 10.24, "percentage": 51.2, "elapsed_time": "0:04:15", "remaining_time": "0:04:03", "throughput": 1970.16, "total_tokens": 504304} |
| {"current_steps": 1285, "total_steps": 2500, "loss": 0.3855, "lr": 0.01690077546585082, "epoch": 10.28, "percentage": 51.4, "elapsed_time": "0:04:16", "remaining_time": "0:04:02", "throughput": 1970.45, "total_tokens": 506160} |
| {"current_steps": 1290, "total_steps": 2500, "loss": 0.3549, "lr": 0.016796854407550944, "epoch": 10.32, "percentage": 51.6, "elapsed_time": "0:04:17", "remaining_time": "0:04:01", "throughput": 1971.19, "total_tokens": 508208} |
| {"current_steps": 1295, "total_steps": 2500, "loss": 0.3401, "lr": 0.016692845773102222, "epoch": 10.36, "percentage": 51.8, "elapsed_time": "0:04:18", "remaining_time": "0:04:00", "throughput": 1972.51, "total_tokens": 510416} |
| {"current_steps": 1300, "total_steps": 2500, "loss": 0.385, "lr": 0.01658875463173878, "epoch": 10.4, "percentage": 52.0, "elapsed_time": "0:04:19", "remaining_time": "0:03:59", "throughput": 1973.04, "total_tokens": 512336} |
| {"current_steps": 1305, "total_steps": 2500, "loss": 0.3577, "lr": 0.016484586056715975, "epoch": 10.44, "percentage": 52.2, "elapsed_time": "0:04:20", "remaining_time": "0:03:58", "throughput": 1974.1, "total_tokens": 514480} |
| {"current_steps": 1310, "total_steps": 2500, "loss": 0.3635, "lr": 0.016380345125063207, "epoch": 10.48, "percentage": 52.4, "elapsed_time": "0:04:21", "remaining_time": "0:03:57", "throughput": 1974.46, "total_tokens": 516304} |
| {"current_steps": 1315, "total_steps": 2500, "loss": 0.3632, "lr": 0.01627603691733642, "epoch": 10.52, "percentage": 52.6, "elapsed_time": "0:04:22", "remaining_time": "0:03:56", "throughput": 1976.1, "total_tokens": 518736} |
| {"current_steps": 1320, "total_steps": 2500, "loss": 0.3636, "lr": 0.016171666517370502, "epoch": 10.56, "percentage": 52.8, "elapsed_time": "0:04:23", "remaining_time": "0:03:55", "throughput": 1976.56, "total_tokens": 520592} |
| {"current_steps": 1325, "total_steps": 2500, "loss": 0.3695, "lr": 0.016067239012031505, "epoch": 10.6, "percentage": 53.0, "elapsed_time": "0:04:24", "remaining_time": "0:03:54", "throughput": 1976.78, "total_tokens": 522384} |
| {"current_steps": 1330, "total_steps": 2500, "loss": 0.383, "lr": 0.015962759490968707, "epoch": 10.64, "percentage": 53.2, "elapsed_time": "0:04:25", "remaining_time": "0:03:53", "throughput": 1977.71, "total_tokens": 524496} |
| {"current_steps": 1335, "total_steps": 2500, "loss": 0.3311, "lr": 0.015858233046366573, "epoch": 10.68, "percentage": 53.4, "elapsed_time": "0:04:26", "remaining_time": "0:03:52", "throughput": 1977.93, "total_tokens": 526224} |
| {"current_steps": 1340, "total_steps": 2500, "loss": 0.2714, "lr": 0.015753664772696543, "epoch": 10.72, "percentage": 53.6, "elapsed_time": "0:04:26", "remaining_time": "0:03:51", "throughput": 1978.51, "total_tokens": 528176} |
| {"current_steps": 1345, "total_steps": 2500, "loss": 0.3212, "lr": 0.015649059766468756, "epoch": 10.76, "percentage": 53.8, "elapsed_time": "0:04:27", "remaining_time": "0:03:49", "throughput": 1978.65, "total_tokens": 529872} |
| {"current_steps": 1350, "total_steps": 2500, "loss": 0.3847, "lr": 0.015544423125983637, "epoch": 10.8, "percentage": 54.0, "elapsed_time": "0:04:28", "remaining_time": "0:03:48", "throughput": 1978.66, "total_tokens": 531536} |
| {"current_steps": 1355, "total_steps": 2500, "loss": 0.3618, "lr": 0.015439759951083426, "epoch": 10.84, "percentage": 54.2, "elapsed_time": "0:04:29", "remaining_time": "0:03:47", "throughput": 1979.8, "total_tokens": 533712} |
| {"current_steps": 1360, "total_steps": 2500, "loss": 0.3421, "lr": 0.015335075342903605, "epoch": 10.88, "percentage": 54.4, "elapsed_time": "0:04:30", "remaining_time": "0:03:46", "throughput": 1980.05, "total_tokens": 535568} |
| {"current_steps": 1365, "total_steps": 2500, "loss": 0.486, "lr": 0.015230374403624295, "epoch": 10.92, "percentage": 54.6, "elapsed_time": "0:04:31", "remaining_time": "0:03:45", "throughput": 1980.69, "total_tokens": 537488} |
| {"current_steps": 1370, "total_steps": 2500, "loss": 0.3626, "lr": 0.01512566223622156, "epoch": 10.96, "percentage": 54.8, "elapsed_time": "0:04:32", "remaining_time": "0:03:44", "throughput": 1981.91, "total_tokens": 539760} |
| {"current_steps": 1375, "total_steps": 2500, "loss": 0.4168, "lr": 0.015020943944218717, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:04:33", "remaining_time": "0:03:43", "throughput": 1981.86, "total_tokens": 541504} |
| {"current_steps": 1375, "total_steps": 2500, "eval_loss": 0.35911640524864197, "epoch": 11.0, "percentage": 55.0, "elapsed_time": "0:04:34", "remaining_time": "0:03:44", "throughput": 1973.13, "total_tokens": 541504} |
| {"current_steps": 1380, "total_steps": 2500, "loss": 0.3453, "lr": 0.014916224631437578, "epoch": 11.04, "percentage": 55.2, "elapsed_time": "0:04:36", "remaining_time": "0:03:44", "throughput": 1967.66, "total_tokens": 543520} |
| {"current_steps": 1385, "total_steps": 2500, "loss": 0.386, "lr": 0.014811509401749709, "epoch": 11.08, "percentage": 55.4, "elapsed_time": "0:04:37", "remaining_time": "0:03:43", "throughput": 1967.8, "total_tokens": 545280} |
| {"current_steps": 1390, "total_steps": 2500, "loss": 0.3797, "lr": 0.014706803358827684, "epoch": 11.12, "percentage": 55.6, "elapsed_time": "0:04:37", "remaining_time": "0:03:41", "throughput": 1967.86, "total_tokens": 547008} |
| {"current_steps": 1395, "total_steps": 2500, "loss": 0.3048, "lr": 0.014602111605896308, "epoch": 11.16, "percentage": 55.8, "elapsed_time": "0:04:38", "remaining_time": "0:03:40", "throughput": 1968.15, "total_tokens": 548832} |
| {"current_steps": 1400, "total_steps": 2500, "loss": 0.3428, "lr": 0.014497439245483927, "epoch": 11.2, "percentage": 56.0, "elapsed_time": "0:04:39", "remaining_time": "0:03:39", "throughput": 1969.13, "total_tokens": 551040} |
| {"current_steps": 1405, "total_steps": 2500, "loss": 0.3655, "lr": 0.014392791379173714, "epoch": 11.24, "percentage": 56.2, "elapsed_time": "0:04:40", "remaining_time": "0:03:38", "throughput": 1969.55, "total_tokens": 552832} |
| {"current_steps": 1410, "total_steps": 2500, "loss": 0.3799, "lr": 0.014288173107355036, "epoch": 11.28, "percentage": 56.4, "elapsed_time": "0:04:41", "remaining_time": "0:03:37", "throughput": 1969.49, "total_tokens": 554464} |
| {"current_steps": 1415, "total_steps": 2500, "loss": 0.4039, "lr": 0.014183589528974866, "epoch": 11.32, "percentage": 56.6, "elapsed_time": "0:04:42", "remaining_time": "0:03:36", "throughput": 1970.62, "total_tokens": 556672} |
| {"current_steps": 1420, "total_steps": 2500, "loss": 0.3737, "lr": 0.01407904574128927, "epoch": 11.36, "percentage": 56.8, "elapsed_time": "0:04:43", "remaining_time": "0:03:35", "throughput": 1971.16, "total_tokens": 558560} |
| {"current_steps": 1425, "total_steps": 2500, "loss": 0.3554, "lr": 0.01397454683961497, "epoch": 11.4, "percentage": 57.0, "elapsed_time": "0:04:44", "remaining_time": "0:03:34", "throughput": 1971.59, "total_tokens": 560480} |
| {"current_steps": 1430, "total_steps": 2500, "loss": 0.3556, "lr": 0.013870097917081011, "epoch": 11.44, "percentage": 57.2, "elapsed_time": "0:04:45", "remaining_time": "0:03:33", "throughput": 1971.82, "total_tokens": 562336} |
| {"current_steps": 1435, "total_steps": 2500, "loss": 0.3533, "lr": 0.013765704064380513, "epoch": 11.48, "percentage": 57.4, "elapsed_time": "0:04:46", "remaining_time": "0:03:32", "throughput": 1972.29, "total_tokens": 564320} |
| {"current_steps": 1440, "total_steps": 2500, "loss": 0.3656, "lr": 0.013661370369522592, "epoch": 11.52, "percentage": 57.6, "elapsed_time": "0:04:47", "remaining_time": "0:03:31", "throughput": 1973.08, "total_tokens": 566336} |
| {"current_steps": 1445, "total_steps": 2500, "loss": 0.3966, "lr": 0.01355710191758434, "epoch": 11.56, "percentage": 57.8, "elapsed_time": "0:04:47", "remaining_time": "0:03:30", "throughput": 1973.44, "total_tokens": 568224} |
| {"current_steps": 1450, "total_steps": 2500, "loss": 0.3779, "lr": 0.013452903790462992, "epoch": 11.6, "percentage": 58.0, "elapsed_time": "0:04:48", "remaining_time": "0:03:29", "throughput": 1973.67, "total_tokens": 570016} |
| {"current_steps": 1455, "total_steps": 2500, "loss": 0.4176, "lr": 0.01334878106662827, "epoch": 11.64, "percentage": 58.2, "elapsed_time": "0:04:49", "remaining_time": "0:03:28", "throughput": 1975.63, "total_tokens": 572640} |
| {"current_steps": 1460, "total_steps": 2500, "loss": 0.3374, "lr": 0.01324473882087484, "epoch": 11.68, "percentage": 58.4, "elapsed_time": "0:04:50", "remaining_time": "0:03:27", "throughput": 1975.78, "total_tokens": 574464} |
| {"current_steps": 1465, "total_steps": 2500, "loss": 0.3622, "lr": 0.013140782124074973, "epoch": 11.72, "percentage": 58.6, "elapsed_time": "0:04:51", "remaining_time": "0:03:26", "throughput": 1976.11, "total_tokens": 576288} |
| {"current_steps": 1470, "total_steps": 2500, "loss": 0.3691, "lr": 0.013036916042931416, "epoch": 11.76, "percentage": 58.8, "elapsed_time": "0:04:52", "remaining_time": "0:03:24", "throughput": 1977.2, "total_tokens": 578464} |
| {"current_steps": 1475, "total_steps": 2500, "loss": 0.3356, "lr": 0.01293314563973043, "epoch": 11.8, "percentage": 59.0, "elapsed_time": "0:04:53", "remaining_time": "0:03:24", "throughput": 1978.46, "total_tokens": 580832} |
| {"current_steps": 1480, "total_steps": 2500, "loss": 0.3673, "lr": 0.012829475972095062, "epoch": 11.84, "percentage": 59.2, "elapsed_time": "0:04:54", "remaining_time": "0:03:22", "throughput": 1979.35, "total_tokens": 582912} |
| {"current_steps": 1485, "total_steps": 2500, "loss": 0.342, "lr": 0.01272591209273867, "epoch": 11.88, "percentage": 59.4, "elapsed_time": "0:04:55", "remaining_time": "0:03:21", "throughput": 1980.01, "total_tokens": 584960} |
| {"current_steps": 1490, "total_steps": 2500, "loss": 0.355, "lr": 0.012622459049218613, "epoch": 11.92, "percentage": 59.6, "elapsed_time": "0:04:56", "remaining_time": "0:03:20", "throughput": 1980.93, "total_tokens": 587104} |
| {"current_steps": 1495, "total_steps": 2500, "loss": 0.3679, "lr": 0.012519121883690285, "epoch": 11.96, "percentage": 59.8, "elapsed_time": "0:04:57", "remaining_time": "0:03:19", "throughput": 1981.34, "total_tokens": 589024} |
| {"current_steps": 1500, "total_steps": 2500, "loss": 0.3828, "lr": 0.012415905632661352, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:04:58", "remaining_time": "0:03:18", "throughput": 1981.54, "total_tokens": 590864} |
| {"current_steps": 1500, "total_steps": 2500, "eval_loss": 0.3548784554004669, "epoch": 12.0, "percentage": 60.0, "elapsed_time": "0:04:59", "remaining_time": "0:03:19", "throughput": 1973.54, "total_tokens": 590864} |
| {"current_steps": 1505, "total_steps": 2500, "loss": 0.3504, "lr": 0.012312815326746265, "epoch": 12.04, "percentage": 60.2, "elapsed_time": "0:05:01", "remaining_time": "0:03:19", "throughput": 1968.19, "total_tokens": 592592} |
| {"current_steps": 1510, "total_steps": 2500, "loss": 0.3501, "lr": 0.012209855990421101, "epoch": 12.08, "percentage": 60.4, "elapsed_time": "0:05:01", "remaining_time": "0:03:17", "throughput": 1968.53, "total_tokens": 594480} |
| {"current_steps": 1515, "total_steps": 2500, "loss": 0.3366, "lr": 0.012107032641778662, "epoch": 12.12, "percentage": 60.6, "elapsed_time": "0:05:02", "remaining_time": "0:03:16", "throughput": 1969.77, "total_tokens": 596784} |
| {"current_steps": 1520, "total_steps": 2500, "loss": 0.3403, "lr": 0.012004350292283895, "epoch": 12.16, "percentage": 60.8, "elapsed_time": "0:05:03", "remaining_time": "0:03:15", "throughput": 1970.0, "total_tokens": 598640} |
| {"current_steps": 1525, "total_steps": 2500, "loss": 0.3801, "lr": 0.01190181394652965, "epoch": 12.2, "percentage": 61.0, "elapsed_time": "0:05:04", "remaining_time": "0:03:14", "throughput": 1970.12, "total_tokens": 600400} |
| {"current_steps": 1530, "total_steps": 2500, "loss": 0.3541, "lr": 0.01179942860199276, "epoch": 12.24, "percentage": 61.2, "elapsed_time": "0:05:05", "remaining_time": "0:03:13", "throughput": 1970.87, "total_tokens": 602448} |
| {"current_steps": 1535, "total_steps": 2500, "loss": 0.3558, "lr": 0.011697199248790476, "epoch": 12.28, "percentage": 61.4, "elapsed_time": "0:05:06", "remaining_time": "0:03:12", "throughput": 1971.86, "total_tokens": 604624} |
| {"current_steps": 1540, "total_steps": 2500, "loss": 0.346, "lr": 0.011595130869437252, "epoch": 12.32, "percentage": 61.6, "elapsed_time": "0:05:07", "remaining_time": "0:03:11", "throughput": 1972.86, "total_tokens": 606800} |
| {"current_steps": 1545, "total_steps": 2500, "loss": 0.3362, "lr": 0.011493228438601896, "epoch": 12.36, "percentage": 61.8, "elapsed_time": "0:05:08", "remaining_time": "0:03:10", "throughput": 1973.08, "total_tokens": 608592} |
| {"current_steps": 1550, "total_steps": 2500, "loss": 0.366, "lr": 0.011391496922865122, "epoch": 12.4, "percentage": 62.0, "elapsed_time": "0:05:09", "remaining_time": "0:03:09", "throughput": 1973.99, "total_tokens": 610736} |
| {"current_steps": 1555, "total_steps": 2500, "loss": 0.3446, "lr": 0.011289941280477493, "epoch": 12.44, "percentage": 62.2, "elapsed_time": "0:05:10", "remaining_time": "0:03:08", "throughput": 1974.12, "total_tokens": 612496} |
| {"current_steps": 1560, "total_steps": 2500, "loss": 0.3491, "lr": 0.011188566461117747, "epoch": 12.48, "percentage": 62.4, "elapsed_time": "0:05:11", "remaining_time": "0:03:07", "throughput": 1975.32, "total_tokens": 614800} |
| {"current_steps": 1565, "total_steps": 2500, "loss": 0.3577, "lr": 0.01108737740565155, "epoch": 12.52, "percentage": 62.6, "elapsed_time": "0:05:12", "remaining_time": "0:03:06", "throughput": 1975.34, "total_tokens": 616464} |
| {"current_steps": 1570, "total_steps": 2500, "loss": 0.3834, "lr": 0.01098637904589071, "epoch": 12.56, "percentage": 62.8, "elapsed_time": "0:05:13", "remaining_time": "0:03:05", "throughput": 1975.93, "total_tokens": 618512} |
| {"current_steps": 1575, "total_steps": 2500, "loss": 0.3602, "lr": 0.010885576304352803, "epoch": 12.6, "percentage": 63.0, "elapsed_time": "0:05:13", "remaining_time": "0:03:04", "throughput": 1976.34, "total_tokens": 620432} |
| {"current_steps": 1580, "total_steps": 2500, "loss": 0.3469, "lr": 0.010784974094021234, "epoch": 12.64, "percentage": 63.2, "elapsed_time": "0:05:14", "remaining_time": "0:03:03", "throughput": 1977.26, "total_tokens": 622704} |
| {"current_steps": 1585, "total_steps": 2500, "loss": 0.3565, "lr": 0.01068457731810581, "epoch": 12.68, "percentage": 63.4, "elapsed_time": "0:05:15", "remaining_time": "0:03:02", "throughput": 1977.96, "total_tokens": 624720} |
| {"current_steps": 1590, "total_steps": 2500, "loss": 0.3329, "lr": 0.010584390869803747, "epoch": 12.72, "percentage": 63.6, "elapsed_time": "0:05:16", "remaining_time": "0:03:01", "throughput": 1978.66, "total_tokens": 626800} |
| {"current_steps": 1595, "total_steps": 2500, "loss": 0.3686, "lr": 0.010484419632061193, "epoch": 12.76, "percentage": 63.8, "elapsed_time": "0:05:17", "remaining_time": "0:03:00", "throughput": 1979.94, "total_tokens": 629168} |
| {"current_steps": 1600, "total_steps": 2500, "loss": 0.3333, "lr": 0.010384668477335244, "epoch": 12.8, "percentage": 64.0, "elapsed_time": "0:05:18", "remaining_time": "0:02:59", "throughput": 1980.6, "total_tokens": 631248} |
| {"current_steps": 1605, "total_steps": 2500, "loss": 0.366, "lr": 0.010285142267356443, "epoch": 12.84, "percentage": 64.2, "elapsed_time": "0:05:19", "remaining_time": "0:02:58", "throughput": 1980.88, "total_tokens": 633008} |
| {"current_steps": 1610, "total_steps": 2500, "loss": 0.334, "lr": 0.010185845852891858, "epoch": 12.88, "percentage": 64.4, "elapsed_time": "0:05:20", "remaining_time": "0:02:57", "throughput": 1980.99, "total_tokens": 634768} |
| {"current_steps": 1615, "total_steps": 2500, "loss": 0.3646, "lr": 0.010086784073508647, "epoch": 12.92, "percentage": 64.6, "elapsed_time": "0:05:21", "remaining_time": "0:02:56", "throughput": 1981.68, "total_tokens": 636848} |
| {"current_steps": 1620, "total_steps": 2500, "loss": 0.359, "lr": 0.009987961757338177, "epoch": 12.96, "percentage": 64.8, "elapsed_time": "0:05:22", "remaining_time": "0:02:55", "throughput": 1982.07, "total_tokens": 638832} |
| {"current_steps": 1625, "total_steps": 2500, "loss": 0.3636, "lr": 0.009889383720840726, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:05:23", "remaining_time": "0:02:54", "throughput": 1982.31, "total_tokens": 640656} |
| {"current_steps": 1625, "total_steps": 2500, "eval_loss": 0.35853561758995056, "epoch": 13.0, "percentage": 65.0, "elapsed_time": "0:05:24", "remaining_time": "0:02:54", "throughput": 1974.9, "total_tokens": 640656} |
| {"current_steps": 1630, "total_steps": 2500, "loss": 0.3676, "lr": 0.009791054768570714, "epoch": 13.04, "percentage": 65.2, "elapsed_time": "0:05:26", "remaining_time": "0:02:54", "throughput": 1970.41, "total_tokens": 642480} |
| {"current_steps": 1635, "total_steps": 2500, "loss": 0.3359, "lr": 0.009692979692942557, "epoch": 13.08, "percentage": 65.4, "elapsed_time": "0:05:26", "remaining_time": "0:02:52", "throughput": 1970.44, "total_tokens": 644144} |
| {"current_steps": 1640, "total_steps": 2500, "loss": 0.3544, "lr": 0.00959516327399708, "epoch": 13.12, "percentage": 65.6, "elapsed_time": "0:05:27", "remaining_time": "0:02:51", "throughput": 1972.06, "total_tokens": 646736} |
| {"current_steps": 1645, "total_steps": 2500, "loss": 0.3524, "lr": 0.009497610279168539, "epoch": 13.16, "percentage": 65.8, "elapsed_time": "0:05:28", "remaining_time": "0:02:50", "throughput": 1973.16, "total_tokens": 648976} |
| {"current_steps": 1650, "total_steps": 2500, "loss": 0.3502, "lr": 0.009400325463052278, "epoch": 13.2, "percentage": 66.0, "elapsed_time": "0:05:29", "remaining_time": "0:02:49", "throughput": 1973.55, "total_tokens": 650960} |
| {"current_steps": 1655, "total_steps": 2500, "loss": 0.3469, "lr": 0.009303313567172984, "epoch": 13.24, "percentage": 66.2, "elapsed_time": "0:05:30", "remaining_time": "0:02:48", "throughput": 1974.37, "total_tokens": 653040} |
| {"current_steps": 1660, "total_steps": 2500, "loss": 0.3764, "lr": 0.0092065793197536, "epoch": 13.28, "percentage": 66.4, "elapsed_time": "0:05:31", "remaining_time": "0:02:47", "throughput": 1975.29, "total_tokens": 655280} |
| {"current_steps": 1665, "total_steps": 2500, "loss": 0.4314, "lr": 0.009110127435484875, "epoch": 13.32, "percentage": 66.6, "elapsed_time": "0:05:32", "remaining_time": "0:02:46", "throughput": 1976.59, "total_tokens": 657712} |
| {"current_steps": 1670, "total_steps": 2500, "loss": 0.3531, "lr": 0.00901396261529557, "epoch": 13.36, "percentage": 66.8, "elapsed_time": "0:05:33", "remaining_time": "0:02:45", "throughput": 1976.93, "total_tokens": 659504} |
| {"current_steps": 1675, "total_steps": 2500, "loss": 0.37, "lr": 0.008918089546123364, "epoch": 13.4, "percentage": 67.0, "elapsed_time": "0:05:34", "remaining_time": "0:02:44", "throughput": 1977.28, "total_tokens": 661424} |
| {"current_steps": 1680, "total_steps": 2500, "loss": 0.3335, "lr": 0.008822512900686379, "epoch": 13.44, "percentage": 67.2, "elapsed_time": "0:05:35", "remaining_time": "0:02:43", "throughput": 1977.92, "total_tokens": 663568} |
| {"current_steps": 1685, "total_steps": 2500, "loss": 0.341, "lr": 0.00872723733725548, "epoch": 13.48, "percentage": 67.4, "elapsed_time": "0:05:36", "remaining_time": "0:02:42", "throughput": 1978.03, "total_tokens": 665328} |
| {"current_steps": 1690, "total_steps": 2500, "loss": 0.3613, "lr": 0.008632267499427216, "epoch": 13.52, "percentage": 67.6, "elapsed_time": "0:05:37", "remaining_time": "0:02:41", "throughput": 1978.13, "total_tokens": 667216} |
| {"current_steps": 1695, "total_steps": 2500, "loss": 0.3376, "lr": 0.008537608015897499, "epoch": 13.56, "percentage": 67.8, "elapsed_time": "0:05:38", "remaining_time": "0:02:40", "throughput": 1978.83, "total_tokens": 669264} |
| {"current_steps": 1700, "total_steps": 2500, "loss": 0.3513, "lr": 0.00844326350023601, "epoch": 13.6, "percentage": 68.0, "elapsed_time": "0:05:39", "remaining_time": "0:02:39", "throughput": 1978.92, "total_tokens": 670960} |
| {"current_steps": 1705, "total_steps": 2500, "loss": 0.3506, "lr": 0.008349238550661338, "epoch": 13.64, "percentage": 68.2, "elapsed_time": "0:05:39", "remaining_time": "0:02:38", "throughput": 1980.02, "total_tokens": 673200} |
| {"current_steps": 1710, "total_steps": 2500, "loss": 0.3407, "lr": 0.00825553774981689, "epoch": 13.68, "percentage": 68.4, "elapsed_time": "0:05:40", "remaining_time": "0:02:37", "throughput": 1980.29, "total_tokens": 675088} |
| {"current_steps": 1715, "total_steps": 2500, "loss": 0.3082, "lr": 0.008162165664547503, "epoch": 13.72, "percentage": 68.6, "elapsed_time": "0:05:41", "remaining_time": "0:02:36", "throughput": 1980.72, "total_tokens": 676976} |
| {"current_steps": 1720, "total_steps": 2500, "loss": 0.4216, "lr": 0.008069126845676873, "epoch": 13.76, "percentage": 68.8, "elapsed_time": "0:05:42", "remaining_time": "0:02:35", "throughput": 1980.99, "total_tokens": 678800} |
| {"current_steps": 1725, "total_steps": 2500, "loss": 0.3673, "lr": 0.007976425827785783, "epoch": 13.8, "percentage": 69.0, "elapsed_time": "0:05:43", "remaining_time": "0:02:34", "throughput": 1981.33, "total_tokens": 680656} |
| {"current_steps": 1730, "total_steps": 2500, "loss": 0.3805, "lr": 0.007884067128991062, "epoch": 13.84, "percentage": 69.2, "elapsed_time": "0:05:44", "remaining_time": "0:02:33", "throughput": 1981.68, "total_tokens": 682640} |
| {"current_steps": 1735, "total_steps": 2500, "loss": 0.4055, "lr": 0.007792055250725389, "epoch": 13.88, "percentage": 69.4, "elapsed_time": "0:05:45", "remaining_time": "0:02:32", "throughput": 1981.77, "total_tokens": 684464} |
| {"current_steps": 1740, "total_steps": 2500, "loss": 0.3635, "lr": 0.007700394677517904, "epoch": 13.92, "percentage": 69.6, "elapsed_time": "0:05:46", "remaining_time": "0:02:31", "throughput": 1982.38, "total_tokens": 686480} |
| {"current_steps": 1745, "total_steps": 2500, "loss": 0.3882, "lr": 0.007609089876775628, "epoch": 13.96, "percentage": 69.8, "elapsed_time": "0:05:47", "remaining_time": "0:02:30", "throughput": 1982.56, "total_tokens": 688272} |
| {"current_steps": 1750, "total_steps": 2500, "loss": 0.3249, "lr": 0.00751814529856574, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:05:47", "remaining_time": "0:02:29", "throughput": 1982.33, "total_tokens": 689776} |
| {"current_steps": 1750, "total_steps": 2500, "eval_loss": 0.3582947552204132, "epoch": 14.0, "percentage": 70.0, "elapsed_time": "0:05:49", "remaining_time": "0:02:29", "throughput": 1975.48, "total_tokens": 689776} |
| {"current_steps": 1755, "total_steps": 2500, "loss": 0.3594, "lr": 0.007427565375398675, "epoch": 14.04, "percentage": 70.2, "elapsed_time": "0:05:50", "remaining_time": "0:02:28", "throughput": 1971.73, "total_tokens": 691952} |
| {"current_steps": 1760, "total_steps": 2500, "loss": 0.3237, "lr": 0.007337354522012103, "epoch": 14.08, "percentage": 70.4, "elapsed_time": "0:05:51", "remaining_time": "0:02:27", "throughput": 1972.17, "total_tokens": 693904} |
| {"current_steps": 1765, "total_steps": 2500, "loss": 0.365, "lr": 0.007247517135155747, "epoch": 14.12, "percentage": 70.6, "elapsed_time": "0:05:52", "remaining_time": "0:02:26", "throughput": 1972.3, "total_tokens": 695728} |
| {"current_steps": 1770, "total_steps": 2500, "loss": 0.3534, "lr": 0.007158057593377102, "epoch": 14.16, "percentage": 70.8, "elapsed_time": "0:05:53", "remaining_time": "0:02:25", "throughput": 1972.4, "total_tokens": 697424} |
| {"current_steps": 1775, "total_steps": 2500, "loss": 0.3457, "lr": 0.007068980256808024, "epoch": 14.2, "percentage": 71.0, "elapsed_time": "0:05:54", "remaining_time": "0:02:24", "throughput": 1972.65, "total_tokens": 699312} |
| {"current_steps": 1780, "total_steps": 2500, "loss": 0.346, "lr": 0.006980289466952232, "epoch": 14.24, "percentage": 71.2, "elapsed_time": "0:05:55", "remaining_time": "0:02:23", "throughput": 1973.25, "total_tokens": 701328} |
| {"current_steps": 1785, "total_steps": 2500, "loss": 0.3529, "lr": 0.006891989546473694, "epoch": 14.28, "percentage": 71.4, "elapsed_time": "0:05:56", "remaining_time": "0:02:22", "throughput": 1974.03, "total_tokens": 703536} |
| {"current_steps": 1790, "total_steps": 2500, "loss": 0.3274, "lr": 0.006804084798985964, "epoch": 14.32, "percentage": 71.6, "elapsed_time": "0:05:57", "remaining_time": "0:02:21", "throughput": 1974.23, "total_tokens": 705392} |
| {"current_steps": 1795, "total_steps": 2500, "loss": 0.3727, "lr": 0.006716579508842421, "epoch": 14.36, "percentage": 71.8, "elapsed_time": "0:05:58", "remaining_time": "0:02:20", "throughput": 1974.82, "total_tokens": 707408} |
| {"current_steps": 1800, "total_steps": 2500, "loss": 0.3559, "lr": 0.006629477940927452, "epoch": 14.4, "percentage": 72.0, "elapsed_time": "0:05:59", "remaining_time": "0:02:19", "throughput": 1975.59, "total_tokens": 709616} |
| {"current_steps": 1805, "total_steps": 2500, "loss": 0.3668, "lr": 0.006542784340448598, "epoch": 14.44, "percentage": 72.2, "elapsed_time": "0:06:00", "remaining_time": "0:02:18", "throughput": 1975.66, "total_tokens": 711248} |
| {"current_steps": 1810, "total_steps": 2500, "loss": 0.3606, "lr": 0.0064565029327296386, "epoch": 14.48, "percentage": 72.4, "elapsed_time": "0:06:00", "remaining_time": "0:02:17", "throughput": 1975.84, "total_tokens": 713104} |
| {"current_steps": 1815, "total_steps": 2500, "loss": 0.3606, "lr": 0.006370637923004662, "epoch": 14.52, "percentage": 72.6, "elapsed_time": "0:06:01", "remaining_time": "0:02:16", "throughput": 1975.99, "total_tokens": 714896} |
| {"current_steps": 1820, "total_steps": 2500, "loss": 0.3563, "lr": 0.006285193496213102, "epoch": 14.56, "percentage": 72.8, "elapsed_time": "0:06:02", "remaining_time": "0:02:15", "throughput": 1976.74, "total_tokens": 716976} |
| {"current_steps": 1825, "total_steps": 2500, "loss": 0.3463, "lr": 0.006200173816795781, "epoch": 14.6, "percentage": 73.0, "elapsed_time": "0:06:03", "remaining_time": "0:02:14", "throughput": 1976.99, "total_tokens": 718800} |
| {"current_steps": 1830, "total_steps": 2500, "loss": 0.348, "lr": 0.006115583028491929, "epoch": 14.64, "percentage": 73.2, "elapsed_time": "0:06:04", "remaining_time": "0:02:13", "throughput": 1977.31, "total_tokens": 720720} |
| {"current_steps": 1835, "total_steps": 2500, "loss": 0.3758, "lr": 0.0060314252541372225, "epoch": 14.68, "percentage": 73.4, "elapsed_time": "0:06:05", "remaining_time": "0:02:12", "throughput": 1977.57, "total_tokens": 722608} |
| {"current_steps": 1840, "total_steps": 2500, "loss": 0.3687, "lr": 0.005947704595462851, "epoch": 14.72, "percentage": 73.6, "elapsed_time": "0:06:06", "remaining_time": "0:02:11", "throughput": 1978.96, "total_tokens": 725136} |
| {"current_steps": 1845, "total_steps": 2500, "loss": 0.3528, "lr": 0.005864425132895618, "epoch": 14.76, "percentage": 73.8, "elapsed_time": "0:06:07", "remaining_time": "0:02:10", "throughput": 1980.01, "total_tokens": 727472} |
| {"current_steps": 1850, "total_steps": 2500, "loss": 0.4763, "lr": 0.005781590925359033, "epoch": 14.8, "percentage": 74.0, "elapsed_time": "0:06:08", "remaining_time": "0:02:09", "throughput": 1980.34, "total_tokens": 729392} |
| {"current_steps": 1855, "total_steps": 2500, "loss": 0.3847, "lr": 0.00569920601007551, "epoch": 14.84, "percentage": 74.2, "elapsed_time": "0:06:09", "remaining_time": "0:02:08", "throughput": 1981.17, "total_tokens": 731632} |
| {"current_steps": 1860, "total_steps": 2500, "loss": 0.3347, "lr": 0.005617274402369594, "epoch": 14.88, "percentage": 74.4, "elapsed_time": "0:06:10", "remaining_time": "0:02:07", "throughput": 1981.5, "total_tokens": 733520} |
| {"current_steps": 1865, "total_steps": 2500, "loss": 0.3472, "lr": 0.005535800095472255, "epoch": 14.92, "percentage": 74.6, "elapsed_time": "0:06:11", "remaining_time": "0:02:06", "throughput": 1982.07, "total_tokens": 735472} |
| {"current_steps": 1870, "total_steps": 2500, "loss": 0.3477, "lr": 0.005454787060326272, "epoch": 14.96, "percentage": 74.8, "elapsed_time": "0:06:11", "remaining_time": "0:02:05", "throughput": 1982.08, "total_tokens": 737136} |
| {"current_steps": 1875, "total_steps": 2500, "loss": 0.3689, "lr": 0.005374239245392678, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:06:12", "remaining_time": "0:02:04", "throughput": 1982.38, "total_tokens": 739024} |
| {"current_steps": 1875, "total_steps": 2500, "eval_loss": 0.3553641736507416, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "0:06:14", "remaining_time": "0:02:04", "throughput": 1975.97, "total_tokens": 739024} |
| {"current_steps": 1880, "total_steps": 2500, "loss": 0.3244, "lr": 0.005294160576458339, "epoch": 15.04, "percentage": 75.2, "elapsed_time": "0:06:15", "remaining_time": "0:02:03", "throughput": 1971.95, "total_tokens": 741072} |
| {"current_steps": 1885, "total_steps": 2500, "loss": 0.3495, "lr": 0.005214554956444606, "epoch": 15.08, "percentage": 75.4, "elapsed_time": "0:06:16", "remaining_time": "0:02:02", "throughput": 1972.43, "total_tokens": 743056} |
| {"current_steps": 1890, "total_steps": 2500, "loss": 0.3542, "lr": 0.005135426265217089, "epoch": 15.12, "percentage": 75.6, "elapsed_time": "0:06:17", "remaining_time": "0:02:01", "throughput": 1973.63, "total_tokens": 745456} |
| {"current_steps": 1895, "total_steps": 2500, "loss": 0.3522, "lr": 0.005056778359396572, "epoch": 15.16, "percentage": 75.8, "elapsed_time": "0:06:18", "remaining_time": "0:02:00", "throughput": 1973.89, "total_tokens": 747344} |
| {"current_steps": 1900, "total_steps": 2500, "loss": 0.3609, "lr": 0.004978615072171023, "epoch": 15.2, "percentage": 76.0, "elapsed_time": "0:06:19", "remaining_time": "0:01:59", "throughput": 1974.54, "total_tokens": 749392} |
| {"current_steps": 1905, "total_steps": 2500, "loss": 0.3523, "lr": 0.004900940213108792, "epoch": 15.24, "percentage": 76.2, "elapsed_time": "0:06:20", "remaining_time": "0:01:58", "throughput": 1975.56, "total_tokens": 751792} |
| {"current_steps": 1910, "total_steps": 2500, "loss": 0.3646, "lr": 0.00482375756797293, "epoch": 15.28, "percentage": 76.4, "elapsed_time": "0:06:21", "remaining_time": "0:01:57", "throughput": 1976.59, "total_tokens": 754128} |
| {"current_steps": 1915, "total_steps": 2500, "loss": 0.3585, "lr": 0.004747070898536663, "epoch": 15.32, "percentage": 76.6, "elapsed_time": "0:06:22", "remaining_time": "0:01:56", "throughput": 1977.06, "total_tokens": 756112} |
| {"current_steps": 1920, "total_steps": 2500, "loss": 0.3669, "lr": 0.004670883942400076, "epoch": 15.36, "percentage": 76.8, "elapsed_time": "0:06:23", "remaining_time": "0:01:55", "throughput": 1977.3, "total_tokens": 757936} |
| {"current_steps": 1925, "total_steps": 2500, "loss": 0.3497, "lr": 0.004595200412807927, "epoch": 15.4, "percentage": 77.0, "elapsed_time": "0:06:24", "remaining_time": "0:01:54", "throughput": 1977.8, "total_tokens": 759920} |
| {"current_steps": 1930, "total_steps": 2500, "loss": 0.3572, "lr": 0.0045200239984686755, "epoch": 15.44, "percentage": 77.2, "elapsed_time": "0:06:25", "remaining_time": "0:01:53", "throughput": 1977.95, "total_tokens": 761776} |
| {"current_steps": 1935, "total_steps": 2500, "loss": 0.3559, "lr": 0.004445358363374698, "epoch": 15.48, "percentage": 77.4, "elapsed_time": "0:06:26", "remaining_time": "0:01:52", "throughput": 1978.25, "total_tokens": 763696} |
| {"current_steps": 1940, "total_steps": 2500, "loss": 0.3421, "lr": 0.004371207146623706, "epoch": 15.52, "percentage": 77.6, "elapsed_time": "0:06:26", "remaining_time": "0:01:51", "throughput": 1978.57, "total_tokens": 765680} |
| {"current_steps": 1945, "total_steps": 2500, "loss": 0.3484, "lr": 0.004297573962241401, "epoch": 15.56, "percentage": 77.8, "elapsed_time": "0:06:27", "remaining_time": "0:01:50", "throughput": 1978.79, "total_tokens": 767504} |
| {"current_steps": 1950, "total_steps": 2500, "loss": 0.3604, "lr": 0.0042244623990053114, "epoch": 15.6, "percentage": 78.0, "elapsed_time": "0:06:28", "remaining_time": "0:01:49", "throughput": 1979.01, "total_tokens": 769264} |
| {"current_steps": 1955, "total_steps": 2500, "loss": 0.3577, "lr": 0.004151876020269871, "epoch": 15.64, "percentage": 78.2, "elapsed_time": "0:06:29", "remaining_time": "0:01:48", "throughput": 1979.46, "total_tokens": 771184} |
| {"current_steps": 1960, "total_steps": 2500, "loss": 0.3553, "lr": 0.004079818363792786, "epoch": 15.68, "percentage": 78.4, "elapsed_time": "0:06:30", "remaining_time": "0:01:47", "throughput": 1980.13, "total_tokens": 773296} |
| {"current_steps": 1965, "total_steps": 2500, "loss": 0.3353, "lr": 0.004008292941562568, "epoch": 15.72, "percentage": 78.6, "elapsed_time": "0:06:31", "remaining_time": "0:01:46", "throughput": 1980.5, "total_tokens": 775248} |
| {"current_steps": 1970, "total_steps": 2500, "loss": 0.3478, "lr": 0.003937303239627392, "epoch": 15.76, "percentage": 78.8, "elapsed_time": "0:06:32", "remaining_time": "0:01:45", "throughput": 1981.32, "total_tokens": 777456} |
| {"current_steps": 1975, "total_steps": 2500, "loss": 0.3542, "lr": 0.0038668527179251807, "epoch": 15.8, "percentage": 79.0, "elapsed_time": "0:06:33", "remaining_time": "0:01:44", "throughput": 1981.25, "total_tokens": 779088} |
| {"current_steps": 1980, "total_steps": 2500, "loss": 0.3684, "lr": 0.0037969448101149708, "epoch": 15.84, "percentage": 79.2, "elapsed_time": "0:06:34", "remaining_time": "0:01:43", "throughput": 1981.5, "total_tokens": 781040} |
| {"current_steps": 1985, "total_steps": 2500, "loss": 0.3445, "lr": 0.0037275829234095594, "epoch": 15.88, "percentage": 79.4, "elapsed_time": "0:06:35", "remaining_time": "0:01:42", "throughput": 1982.13, "total_tokens": 783152} |
| {"current_steps": 1990, "total_steps": 2500, "loss": 0.3385, "lr": 0.003658770438409455, "epoch": 15.92, "percentage": 79.6, "elapsed_time": "0:06:35", "remaining_time": "0:01:41", "throughput": 1982.28, "total_tokens": 784944} |
| {"current_steps": 1995, "total_steps": 2500, "loss": 0.3619, "lr": 0.003590510708938092, "epoch": 15.96, "percentage": 79.8, "elapsed_time": "0:06:36", "remaining_time": "0:01:40", "throughput": 1982.9, "total_tokens": 786992} |
| {"current_steps": 2000, "total_steps": 2500, "loss": 0.3544, "lr": 0.003522807061878387, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:06:37", "remaining_time": "0:01:39", "throughput": 1982.66, "total_tokens": 788480} |
| {"current_steps": 2000, "total_steps": 2500, "eval_loss": 0.36209720373153687, "epoch": 16.0, "percentage": 80.0, "elapsed_time": "0:06:38", "remaining_time": "0:01:39", "throughput": 1976.47, "total_tokens": 788480} |
| {"current_steps": 2005, "total_steps": 2500, "loss": 0.342, "lr": 0.003455662797010583, "epoch": 16.04, "percentage": 80.2, "elapsed_time": "0:06:41", "remaining_time": "0:01:39", "throughput": 1970.82, "total_tokens": 790720} |
| {"current_steps": 2010, "total_steps": 2500, "loss": 0.3688, "lr": 0.0033890811868514214, "epoch": 16.08, "percentage": 80.4, "elapsed_time": "0:06:42", "remaining_time": "0:01:38", "throughput": 1971.28, "total_tokens": 792704} |
| {"current_steps": 2015, "total_steps": 2500, "loss": 0.3483, "lr": 0.0033230654764946498, "epoch": 16.12, "percentage": 80.6, "elapsed_time": "0:06:43", "remaining_time": "0:01:37", "throughput": 1972.12, "total_tokens": 794912} |
| {"current_steps": 2020, "total_steps": 2500, "loss": 0.3435, "lr": 0.0032576188834528546, "epoch": 16.16, "percentage": 80.8, "elapsed_time": "0:06:44", "remaining_time": "0:01:36", "throughput": 1973.12, "total_tokens": 797248} |
| {"current_steps": 2025, "total_steps": 2500, "loss": 0.3494, "lr": 0.003192744597500652, "epoch": 16.2, "percentage": 81.0, "elapsed_time": "0:06:44", "remaining_time": "0:01:34", "throughput": 1973.49, "total_tokens": 799200} |
| {"current_steps": 2030, "total_steps": 2500, "loss": 0.3478, "lr": 0.003128445780519215, "epoch": 16.24, "percentage": 81.2, "elapsed_time": "0:06:45", "remaining_time": "0:01:33", "throughput": 1973.62, "total_tokens": 800928} |
| {"current_steps": 2035, "total_steps": 2500, "loss": 0.364, "lr": 0.0030647255663421684, "epoch": 16.28, "percentage": 81.4, "elapsed_time": "0:06:46", "remaining_time": "0:01:32", "throughput": 1974.09, "total_tokens": 802976} |
| {"current_steps": 2040, "total_steps": 2500, "loss": 0.3546, "lr": 0.003001587060602859, "epoch": 16.32, "percentage": 81.6, "elapsed_time": "0:06:47", "remaining_time": "0:01:31", "throughput": 1974.19, "total_tokens": 804736} |
| {"current_steps": 2045, "total_steps": 2500, "loss": 0.353, "lr": 0.0029390333405829804, "epoch": 16.36, "percentage": 81.8, "elapsed_time": "0:06:48", "remaining_time": "0:01:30", "throughput": 1974.48, "total_tokens": 806592} |
| {"current_steps": 2050, "total_steps": 2500, "loss": 0.3401, "lr": 0.0028770674550625936, "epoch": 16.4, "percentage": 82.0, "elapsed_time": "0:06:49", "remaining_time": "0:01:29", "throughput": 1974.39, "total_tokens": 808224} |
| {"current_steps": 2055, "total_steps": 2500, "loss": 0.3514, "lr": 0.0028156924241715415, "epoch": 16.44, "percentage": 82.2, "elapsed_time": "0:06:50", "remaining_time": "0:01:28", "throughput": 1974.48, "total_tokens": 809984} |
| {"current_steps": 2060, "total_steps": 2500, "loss": 0.3483, "lr": 0.0027549112392422407, "epoch": 16.48, "percentage": 82.4, "elapsed_time": "0:06:51", "remaining_time": "0:01:27", "throughput": 1975.24, "total_tokens": 812288} |
| {"current_steps": 2065, "total_steps": 2500, "loss": 0.3481, "lr": 0.0026947268626638992, "epoch": 16.52, "percentage": 82.6, "elapsed_time": "0:06:52", "remaining_time": "0:01:26", "throughput": 1975.81, "total_tokens": 814400} |
| {"current_steps": 2070, "total_steps": 2500, "loss": 0.3426, "lr": 0.002635142227738116, "epoch": 16.56, "percentage": 82.8, "elapsed_time": "0:06:53", "remaining_time": "0:01:25", "throughput": 1976.54, "total_tokens": 816576} |
| {"current_steps": 2075, "total_steps": 2500, "loss": 0.3544, "lr": 0.0025761602385359376, "epoch": 16.6, "percentage": 83.0, "elapsed_time": "0:06:54", "remaining_time": "0:01:24", "throughput": 1976.64, "total_tokens": 818400} |
| {"current_steps": 2080, "total_steps": 2500, "loss": 0.3659, "lr": 0.002517783769756312, "epoch": 16.64, "percentage": 83.2, "elapsed_time": "0:06:54", "remaining_time": "0:01:23", "throughput": 1976.79, "total_tokens": 820192} |
| {"current_steps": 2085, "total_steps": 2500, "loss": 0.3485, "lr": 0.002460015666585969, "epoch": 16.68, "percentage": 83.4, "elapsed_time": "0:06:55", "remaining_time": "0:01:22", "throughput": 1977.33, "total_tokens": 822240} |
| {"current_steps": 2090, "total_steps": 2500, "loss": 0.3328, "lr": 0.0024028587445607563, "epoch": 16.72, "percentage": 83.6, "elapsed_time": "0:06:56", "remaining_time": "0:01:21", "throughput": 1977.65, "total_tokens": 824128} |
| {"current_steps": 2095, "total_steps": 2500, "loss": 0.3547, "lr": 0.002346315789428418, "epoch": 16.76, "percentage": 83.8, "elapsed_time": "0:06:57", "remaining_time": "0:01:20", "throughput": 1977.82, "total_tokens": 825952} |
| {"current_steps": 2100, "total_steps": 2500, "loss": 0.367, "lr": 0.002290389557012817, "epoch": 16.8, "percentage": 84.0, "elapsed_time": "0:06:58", "remaining_time": "0:01:19", "throughput": 1978.3, "total_tokens": 828032} |
| {"current_steps": 2105, "total_steps": 2500, "loss": 0.3627, "lr": 0.002235082773079624, "epoch": 16.84, "percentage": 84.2, "elapsed_time": "0:06:59", "remaining_time": "0:01:18", "throughput": 1978.3, "total_tokens": 829760} |
| {"current_steps": 2110, "total_steps": 2500, "loss": 0.3639, "lr": 0.0021803981332034566, "epoch": 16.88, "percentage": 84.4, "elapsed_time": "0:07:00", "remaining_time": "0:01:17", "throughput": 1978.58, "total_tokens": 831744} |
| {"current_steps": 2115, "total_steps": 2500, "loss": 0.3497, "lr": 0.0021263383026365167, "epoch": 16.92, "percentage": 84.6, "elapsed_time": "0:07:01", "remaining_time": "0:01:16", "throughput": 1979.24, "total_tokens": 833984} |
| {"current_steps": 2120, "total_steps": 2500, "loss": 0.3329, "lr": 0.002072905916178676, "epoch": 16.96, "percentage": 84.8, "elapsed_time": "0:07:02", "remaining_time": "0:01:15", "throughput": 1979.3, "total_tokens": 835680} |
| {"current_steps": 2125, "total_steps": 2500, "loss": 0.3224, "lr": 0.00202010357804907, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:07:03", "remaining_time": "0:01:14", "throughput": 1979.62, "total_tokens": 837600} |
| {"current_steps": 2125, "total_steps": 2500, "eval_loss": 0.3574245870113373, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "0:07:04", "remaining_time": "0:01:14", "throughput": 1973.96, "total_tokens": 837600} |
| {"current_steps": 2130, "total_steps": 2500, "loss": 0.3508, "lr": 0.001967933861759169, "epoch": 17.04, "percentage": 85.2, "elapsed_time": "0:07:06", "remaining_time": "0:01:14", "throughput": 1970.76, "total_tokens": 839744} |
| {"current_steps": 2135, "total_steps": 2500, "loss": 0.3481, "lr": 0.0019163993099873444, "epoch": 17.08, "percentage": 85.4, "elapsed_time": "0:07:07", "remaining_time": "0:01:13", "throughput": 1971.54, "total_tokens": 841952} |
| {"current_steps": 2140, "total_steps": 2500, "loss": 0.3222, "lr": 0.0018655024344549419, "epoch": 17.12, "percentage": 85.6, "elapsed_time": "0:07:07", "remaining_time": "0:01:11", "throughput": 1971.89, "total_tokens": 843904} |
| {"current_steps": 2145, "total_steps": 2500, "loss": 0.3889, "lr": 0.0018152457158038754, "epoch": 17.16, "percentage": 85.8, "elapsed_time": "0:07:08", "remaining_time": "0:01:10", "throughput": 1972.05, "total_tokens": 845760} |
| {"current_steps": 2150, "total_steps": 2500, "loss": 0.3317, "lr": 0.0017656316034757025, "epoch": 17.2, "percentage": 86.0, "elapsed_time": "0:07:09", "remaining_time": "0:01:09", "throughput": 1972.53, "total_tokens": 847840} |
| {"current_steps": 2155, "total_steps": 2500, "loss": 0.354, "lr": 0.0017166625155922675, "epoch": 17.24, "percentage": 86.2, "elapsed_time": "0:07:10", "remaining_time": "0:01:08", "throughput": 1973.21, "total_tokens": 850016} |
| {"current_steps": 2160, "total_steps": 2500, "loss": 0.3368, "lr": 0.0016683408388378294, "epoch": 17.28, "percentage": 86.4, "elapsed_time": "0:07:11", "remaining_time": "0:01:07", "throughput": 1973.35, "total_tokens": 851872} |
| {"current_steps": 2165, "total_steps": 2500, "loss": 0.3385, "lr": 0.0016206689283427389, "epoch": 17.32, "percentage": 86.6, "elapsed_time": "0:07:12", "remaining_time": "0:01:06", "throughput": 1973.55, "total_tokens": 853760} |
| {"current_steps": 2170, "total_steps": 2500, "loss": 0.3592, "lr": 0.001573649107568662, "epoch": 17.36, "percentage": 86.8, "elapsed_time": "0:07:13", "remaining_time": "0:01:05", "throughput": 1974.25, "total_tokens": 856000} |
| {"current_steps": 2175, "total_steps": 2500, "loss": 0.3485, "lr": 0.0015272836681953217, "epoch": 17.4, "percentage": 87.0, "elapsed_time": "0:07:14", "remaining_time": "0:01:04", "throughput": 1974.47, "total_tokens": 857888} |
| {"current_steps": 2180, "total_steps": 2500, "loss": 0.3517, "lr": 0.001481574870008831, "epoch": 17.44, "percentage": 87.2, "elapsed_time": "0:07:15", "remaining_time": "0:01:03", "throughput": 1974.73, "total_tokens": 859744} |
| {"current_steps": 2185, "total_steps": 2500, "loss": 0.333, "lr": 0.0014365249407915204, "epoch": 17.48, "percentage": 87.4, "elapsed_time": "0:07:16", "remaining_time": "0:01:02", "throughput": 1975.27, "total_tokens": 861792} |
| {"current_steps": 2190, "total_steps": 2500, "loss": 0.3322, "lr": 0.0013921360762133871, "epoch": 17.52, "percentage": 87.6, "elapsed_time": "0:07:17", "remaining_time": "0:01:01", "throughput": 1975.74, "total_tokens": 863936} |
| {"current_steps": 2195, "total_steps": 2500, "loss": 0.3483, "lr": 0.0013484104397250651, "epoch": 17.56, "percentage": 87.8, "elapsed_time": "0:07:18", "remaining_time": "0:01:00", "throughput": 1975.85, "total_tokens": 865760} |
| {"current_steps": 2200, "total_steps": 2500, "loss": 0.3639, "lr": 0.0013053501624523916, "epoch": 17.6, "percentage": 88.0, "elapsed_time": "0:07:19", "remaining_time": "0:00:59", "throughput": 1976.68, "total_tokens": 868000} |
| {"current_steps": 2205, "total_steps": 2500, "loss": 0.3518, "lr": 0.001262957343092531, "epoch": 17.64, "percentage": 88.2, "elapsed_time": "0:07:19", "remaining_time": "0:00:58", "throughput": 1976.81, "total_tokens": 869664} |
| {"current_steps": 2210, "total_steps": 2500, "loss": 0.3713, "lr": 0.00122123404781169, "epoch": 17.68, "percentage": 88.4, "elapsed_time": "0:07:20", "remaining_time": "0:00:57", "throughput": 1977.01, "total_tokens": 871552} |
| {"current_steps": 2215, "total_steps": 2500, "loss": 0.3415, "lr": 0.0011801823101444209, "epoch": 17.72, "percentage": 88.6, "elapsed_time": "0:07:21", "remaining_time": "0:00:56", "throughput": 1977.28, "total_tokens": 873536} |
| {"current_steps": 2220, "total_steps": 2500, "loss": 0.3397, "lr": 0.0011398041308944994, "epoch": 17.76, "percentage": 88.8, "elapsed_time": "0:07:22", "remaining_time": "0:00:55", "throughput": 1977.48, "total_tokens": 875424} |
| {"current_steps": 2225, "total_steps": 2500, "loss": 0.3462, "lr": 0.0011001014780374185, "epoch": 17.8, "percentage": 89.0, "elapsed_time": "0:07:23", "remaining_time": "0:00:54", "throughput": 1978.09, "total_tokens": 877632} |
| {"current_steps": 2230, "total_steps": 2500, "loss": 0.3419, "lr": 0.0010610762866244655, "epoch": 17.84, "percentage": 89.2, "elapsed_time": "0:07:24", "remaining_time": "0:00:53", "throughput": 1978.14, "total_tokens": 879392} |
| {"current_steps": 2235, "total_steps": 2500, "loss": 0.366, "lr": 0.0010227304586884133, "epoch": 17.88, "percentage": 89.4, "elapsed_time": "0:07:25", "remaining_time": "0:00:52", "throughput": 1977.98, "total_tokens": 880992} |
| {"current_steps": 2240, "total_steps": 2500, "loss": 0.3624, "lr": 0.0009850658631508197, "epoch": 17.92, "percentage": 89.6, "elapsed_time": "0:07:26", "remaining_time": "0:00:51", "throughput": 1978.92, "total_tokens": 883360} |
| {"current_steps": 2245, "total_steps": 2500, "loss": 0.3447, "lr": 0.0009480843357309326, "epoch": 17.96, "percentage": 89.8, "elapsed_time": "0:07:27", "remaining_time": "0:00:50", "throughput": 1978.99, "total_tokens": 885120} |
| {"current_steps": 2250, "total_steps": 2500, "loss": 0.3574, "lr": 0.0009117876788562262, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:07:28", "remaining_time": "0:00:49", "throughput": 1979.41, "total_tokens": 887088} |
| {"current_steps": 2250, "total_steps": 2500, "eval_loss": 0.358134925365448, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "0:07:29", "remaining_time": "0:00:49", "throughput": 1974.07, "total_tokens": 887088} |
| {"current_steps": 2255, "total_steps": 2500, "loss": 0.3473, "lr": 0.0008761776615745486, "epoch": 18.04, "percentage": 90.2, "elapsed_time": "0:07:31", "remaining_time": "0:00:49", "throughput": 1971.11, "total_tokens": 889104} |
| {"current_steps": 2260, "total_steps": 2500, "loss": 0.3498, "lr": 0.0008412560194679031, "epoch": 18.08, "percentage": 90.4, "elapsed_time": "0:07:31", "remaining_time": "0:00:47", "throughput": 1971.5, "total_tokens": 891088} |
| {"current_steps": 2265, "total_steps": 2500, "loss": 0.3264, "lr": 0.0008070244545678617, "epoch": 18.12, "percentage": 90.6, "elapsed_time": "0:07:32", "remaining_time": "0:00:46", "throughput": 1971.6, "total_tokens": 892848} |
| {"current_steps": 2270, "total_steps": 2500, "loss": 0.3628, "lr": 0.0007734846352726027, "epoch": 18.16, "percentage": 90.8, "elapsed_time": "0:07:33", "remaining_time": "0:00:45", "throughput": 1971.82, "total_tokens": 894800} |
| {"current_steps": 2275, "total_steps": 2500, "loss": 0.3148, "lr": 0.0007406381962655994, "epoch": 18.2, "percentage": 91.0, "elapsed_time": "0:07:34", "remaining_time": "0:00:44", "throughput": 1972.18, "total_tokens": 896880} |
| {"current_steps": 2280, "total_steps": 2500, "loss": 0.3517, "lr": 0.0007084867384359533, "epoch": 18.24, "percentage": 91.2, "elapsed_time": "0:07:35", "remaining_time": "0:00:43", "throughput": 1972.21, "total_tokens": 898608} |
| {"current_steps": 2285, "total_steps": 2500, "loss": 0.3433, "lr": 0.0006770318288003557, "epoch": 18.28, "percentage": 91.4, "elapsed_time": "0:07:36", "remaining_time": "0:00:42", "throughput": 1972.51, "total_tokens": 900560} |
| {"current_steps": 2290, "total_steps": 2500, "loss": 0.3576, "lr": 0.0006462750004267293, "epoch": 18.32, "percentage": 91.6, "elapsed_time": "0:07:37", "remaining_time": "0:00:41", "throughput": 1973.28, "total_tokens": 902800} |
| {"current_steps": 2295, "total_steps": 2500, "loss": 0.3645, "lr": 0.0006162177523594958, "epoch": 18.36, "percentage": 91.8, "elapsed_time": "0:07:38", "remaining_time": "0:00:40", "throughput": 1973.57, "total_tokens": 904752} |
| {"current_steps": 2300, "total_steps": 2500, "loss": 0.3302, "lr": 0.0005868615495465235, "epoch": 18.4, "percentage": 92.0, "elapsed_time": "0:07:39", "remaining_time": "0:00:39", "throughput": 1973.95, "total_tokens": 906800} |
| {"current_steps": 2305, "total_steps": 2500, "loss": 0.3453, "lr": 0.0005582078227677183, "epoch": 18.44, "percentage": 92.2, "elapsed_time": "0:07:40", "remaining_time": "0:00:38", "throughput": 1974.08, "total_tokens": 908720} |
| {"current_steps": 2310, "total_steps": 2500, "loss": 0.3552, "lr": 0.0005302579685653008, "epoch": 18.48, "percentage": 92.4, "elapsed_time": "0:07:41", "remaining_time": "0:00:37", "throughput": 1974.91, "total_tokens": 911056} |
| {"current_steps": 2315, "total_steps": 2500, "loss": 0.3429, "lr": 0.000503013349175736, "epoch": 18.52, "percentage": 92.6, "elapsed_time": "0:07:42", "remaining_time": "0:00:36", "throughput": 1975.01, "total_tokens": 912784} |
| {"current_steps": 2320, "total_steps": 2500, "loss": 0.3472, "lr": 0.00047647529246333417, "epoch": 18.56, "percentage": 92.8, "elapsed_time": "0:07:43", "remaining_time": "0:00:35", "throughput": 1975.92, "total_tokens": 915216} |
| {"current_steps": 2325, "total_steps": 2500, "loss": 0.3565, "lr": 0.00045064509185554043, "epoch": 18.6, "percentage": 93.0, "elapsed_time": "0:07:44", "remaining_time": "0:00:34", "throughput": 1976.62, "total_tokens": 917424} |
| {"current_steps": 2330, "total_steps": 2500, "loss": 0.3475, "lr": 0.0004255240062798904, "epoch": 18.64, "percentage": 93.2, "elapsed_time": "0:07:45", "remaining_time": "0:00:33", "throughput": 1976.93, "total_tokens": 919376} |
| {"current_steps": 2335, "total_steps": 2500, "loss": 0.315, "lr": 0.00040111326010265733, "epoch": 18.68, "percentage": 93.4, "elapsed_time": "0:07:45", "remaining_time": "0:00:32", "throughput": 1977.17, "total_tokens": 921296} |
| {"current_steps": 2340, "total_steps": 2500, "loss": 0.338, "lr": 0.0003774140430691686, "epoch": 18.72, "percentage": 93.6, "elapsed_time": "0:07:46", "remaining_time": "0:00:31", "throughput": 1977.3, "total_tokens": 923088} |
| {"current_steps": 2345, "total_steps": 2500, "loss": 0.3616, "lr": 0.00035442751024582827, "epoch": 18.76, "percentage": 93.8, "elapsed_time": "0:07:47", "remaining_time": "0:00:30", "throughput": 1977.8, "total_tokens": 925136} |
| {"current_steps": 2350, "total_steps": 2500, "loss": 0.359, "lr": 0.0003321547819638176, "epoch": 18.8, "percentage": 94.0, "elapsed_time": "0:07:48", "remaining_time": "0:00:29", "throughput": 1978.2, "total_tokens": 927216} |
| {"current_steps": 2355, "total_steps": 2500, "loss": 0.3599, "lr": 0.00031059694376449007, "epoch": 18.84, "percentage": 94.2, "elapsed_time": "0:07:49", "remaining_time": "0:00:28", "throughput": 1978.83, "total_tokens": 929456} |
| {"current_steps": 2360, "total_steps": 2500, "loss": 0.3421, "lr": 0.0002897550463464654, "epoch": 18.88, "percentage": 94.4, "elapsed_time": "0:07:50", "remaining_time": "0:00:27", "throughput": 1978.77, "total_tokens": 931088} |
| {"current_steps": 2365, "total_steps": 2500, "loss": 0.3532, "lr": 0.000269630105514424, "epoch": 18.92, "percentage": 94.6, "elapsed_time": "0:07:51", "remaining_time": "0:00:26", "throughput": 1979.28, "total_tokens": 933136} |
| {"current_steps": 2370, "total_steps": 2500, "loss": 0.3481, "lr": 0.0002502231021295864, "epoch": 18.96, "percentage": 94.8, "elapsed_time": "0:07:52", "remaining_time": "0:00:25", "throughput": 1979.84, "total_tokens": 935280} |
| {"current_steps": 2375, "total_steps": 2500, "loss": 0.3582, "lr": 0.00023153498206192002, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:07:53", "remaining_time": "0:00:24", "throughput": 1979.63, "total_tokens": 936768} |
| {"current_steps": 2375, "total_steps": 2500, "eval_loss": 0.3589184880256653, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:07:54", "remaining_time": "0:00:24", "throughput": 1974.6, "total_tokens": 936768} |
| {"current_steps": 2380, "total_steps": 2500, "loss": 0.3552, "lr": 0.0002135666561440308, "epoch": 19.04, "percentage": 95.2, "elapsed_time": "0:07:56", "remaining_time": "0:00:24", "throughput": 1971.61, "total_tokens": 938912} |
| {"current_steps": 2385, "total_steps": 2500, "loss": 0.3653, "lr": 0.00019631900012677395, "epoch": 19.08, "percentage": 95.4, "elapsed_time": "0:07:57", "remaining_time": "0:00:23", "throughput": 1972.58, "total_tokens": 941376} |
| {"current_steps": 2390, "total_steps": 2500, "loss": 0.3566, "lr": 0.0001797928546365729, "epoch": 19.12, "percentage": 95.6, "elapsed_time": "0:07:58", "remaining_time": "0:00:22", "throughput": 1972.69, "total_tokens": 943264} |
| {"current_steps": 2395, "total_steps": 2500, "loss": 0.3373, "lr": 0.00016398902513444392, "epoch": 19.16, "percentage": 95.8, "elapsed_time": "0:07:59", "remaining_time": "0:00:21", "throughput": 1973.01, "total_tokens": 945152} |
| {"current_steps": 2400, "total_steps": 2500, "loss": 0.3389, "lr": 0.000148908281876739, "epoch": 19.2, "percentage": 96.0, "elapsed_time": "0:07:59", "remaining_time": "0:00:19", "throughput": 1973.33, "total_tokens": 947104} |
| {"current_steps": 2405, "total_steps": 2500, "loss": 0.3475, "lr": 0.00013455135987760935, "epoch": 19.24, "percentage": 96.2, "elapsed_time": "0:08:00", "remaining_time": "0:00:18", "throughput": 1973.57, "total_tokens": 948896} |
| {"current_steps": 2410, "total_steps": 2500, "loss": 0.349, "lr": 0.00012091895887317904, "epoch": 19.28, "percentage": 96.4, "elapsed_time": "0:08:01", "remaining_time": "0:00:17", "throughput": 1973.57, "total_tokens": 950624} |
| {"current_steps": 2415, "total_steps": 2500, "loss": 0.3533, "lr": 0.00010801174328744222, "epoch": 19.32, "percentage": 96.6, "elapsed_time": "0:08:02", "remaining_time": "0:00:16", "throughput": 1973.76, "total_tokens": 952512} |
| {"current_steps": 2420, "total_steps": 2500, "loss": 0.3325, "lr": 9.583034219987406e-05, "epoch": 19.36, "percentage": 96.8, "elapsed_time": "0:08:03", "remaining_time": "0:00:15", "throughput": 1974.1, "total_tokens": 954592} |
| {"current_steps": 2425, "total_steps": 2500, "loss": 0.3343, "lr": 8.43753493147803e-05, "epoch": 19.4, "percentage": 97.0, "elapsed_time": "0:08:04", "remaining_time": "0:00:14", "throughput": 1974.73, "total_tokens": 956768} |
| {"current_steps": 2430, "total_steps": 2500, "loss": 0.3502, "lr": 7.364732293235365e-05, "epoch": 19.44, "percentage": 97.2, "elapsed_time": "0:08:05", "remaining_time": "0:00:13", "throughput": 1975.11, "total_tokens": 958816} |
| {"current_steps": 2435, "total_steps": 2500, "loss": 0.3544, "lr": 6.3646785921464e-05, "epoch": 19.48, "percentage": 97.4, "elapsed_time": "0:08:06", "remaining_time": "0:00:12", "throughput": 1975.91, "total_tokens": 961088} |
| {"current_steps": 2440, "total_steps": 2500, "loss": 0.336, "lr": 5.437422569417538e-05, "epoch": 19.52, "percentage": 97.6, "elapsed_time": "0:08:07", "remaining_time": "0:00:11", "throughput": 1976.3, "total_tokens": 963072} |
| {"current_steps": 2445, "total_steps": 2500, "loss": 0.3564, "lr": 4.5830094181995014e-05, "epoch": 19.56, "percentage": 97.8, "elapsed_time": "0:08:08", "remaining_time": "0:00:10", "throughput": 1976.55, "total_tokens": 964928} |
| {"current_steps": 2450, "total_steps": 2500, "loss": 0.3477, "lr": 3.801480781383759e-05, "epoch": 19.6, "percentage": 98.0, "elapsed_time": "0:08:09", "remaining_time": "0:00:09", "throughput": 1976.76, "total_tokens": 966784} |
| {"current_steps": 2455, "total_steps": 2500, "loss": 0.3511, "lr": 3.092874749573316e-05, "epoch": 19.64, "percentage": 98.2, "elapsed_time": "0:08:09", "remaining_time": "0:00:08", "throughput": 1976.96, "total_tokens": 968608} |
| {"current_steps": 2460, "total_steps": 2500, "loss": 0.3504, "lr": 2.4572258592265305e-05, "epoch": 19.68, "percentage": 98.4, "elapsed_time": "0:08:10", "remaining_time": "0:00:07", "throughput": 1977.37, "total_tokens": 970688} |
| {"current_steps": 2465, "total_steps": 2500, "loss": 0.3493, "lr": 1.8945650909737987e-05, "epoch": 19.72, "percentage": 98.6, "elapsed_time": "0:08:11", "remaining_time": "0:00:06", "throughput": 1977.45, "total_tokens": 972384} |
| {"current_steps": 2470, "total_steps": 2500, "loss": 0.3366, "lr": 1.404919868106924e-05, "epoch": 19.76, "percentage": 98.8, "elapsed_time": "0:08:12", "remaining_time": "0:00:05", "throughput": 1977.76, "total_tokens": 974400} |
| {"current_steps": 2475, "total_steps": 2500, "loss": 0.3203, "lr": 9.88314055243189e-06, "epoch": 19.8, "percentage": 99.0, "elapsed_time": "0:08:13", "remaining_time": "0:00:04", "throughput": 1978.39, "total_tokens": 976640} |
| {"current_steps": 2480, "total_steps": 2500, "loss": 0.3479, "lr": 6.44767957162451e-06, "epoch": 19.84, "percentage": 99.2, "elapsed_time": "0:08:14", "remaining_time": "0:00:03", "throughput": 1979.21, "total_tokens": 978976} |
| {"current_steps": 2485, "total_steps": 2500, "loss": 0.3666, "lr": 3.74298317816768e-06, "epoch": 19.88, "percentage": 99.4, "elapsed_time": "0:08:15", "remaining_time": "0:00:02", "throughput": 1979.03, "total_tokens": 980480} |
| {"current_steps": 2490, "total_steps": 2500, "loss": 0.3634, "lr": 1.7691831951471837e-06, "epoch": 19.92, "percentage": 99.6, "elapsed_time": "0:08:16", "remaining_time": "0:00:01", "throughput": 1979.71, "total_tokens": 982752} |
| {"current_steps": 2495, "total_steps": 2500, "loss": 0.3563, "lr": 5.263758227908033e-07, "epoch": 19.96, "percentage": 99.8, "elapsed_time": "0:08:17", "remaining_time": "0:00:00", "throughput": 1979.59, "total_tokens": 984288} |
| {"current_steps": 2500, "total_steps": 2500, "loss": 0.3613, "lr": 1.4621633773748144e-08, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:08:18", "remaining_time": "0:00:00", "throughput": 1979.54, "total_tokens": 985952} |
| {"current_steps": 2500, "total_steps": 2500, "eval_loss": 0.3601417541503906, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:08:19", "remaining_time": "0:00:00", "throughput": 1974.71, "total_tokens": 985952} |
| {"current_steps": 2500, "total_steps": 2500, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:08:20", "remaining_time": "0:00:00", "throughput": 1969.96, "total_tokens": 985952} |
|
|