Mistral-EN-Part-3 / trainer_log.jsonl
SamChen888's picture
Upload folder using huggingface_hub
7632848 verified
{"current_steps": 10, "total_steps": 1044, "loss": 0.8011, "lr": 2.9993209101500904e-05, "epoch": 0.028694404591104734, "percentage": 0.96, "elapsed_time": "0:00:11", "remaining_time": "0:19:44", "throughput": 2310.83, "total_tokens": 26464}
{"current_steps": 20, "total_steps": 1044, "loss": 0.1799, "lr": 2.997284255484393e-05, "epoch": 0.05738880918220947, "percentage": 1.92, "elapsed_time": "0:00:20", "remaining_time": "0:17:29", "throughput": 2613.08, "total_tokens": 53568}
{"current_steps": 30, "total_steps": 1044, "loss": 0.1418, "lr": 2.9938918800982563e-05, "epoch": 0.08608321377331421, "percentage": 2.87, "elapsed_time": "0:00:29", "remaining_time": "0:16:29", "throughput": 2728.5, "total_tokens": 79840}
{"current_steps": 40, "total_steps": 1044, "loss": 0.1428, "lr": 2.9891468556286034e-05, "epoch": 0.11477761836441894, "percentage": 3.83, "elapsed_time": "0:00:38", "remaining_time": "0:16:01", "throughput": 2788.33, "total_tokens": 106848}
{"current_steps": 50, "total_steps": 1044, "loss": 0.1297, "lr": 2.983053478472707e-05, "epoch": 0.14347202295552366, "percentage": 4.79, "elapsed_time": "0:00:47", "remaining_time": "0:15:39", "throughput": 2825.55, "total_tokens": 133600}
{"current_steps": 60, "total_steps": 1044, "loss": 0.1392, "lr": 2.975617265898004e-05, "epoch": 0.17216642754662842, "percentage": 5.75, "elapsed_time": "0:00:56", "remaining_time": "0:15:20", "throughput": 2847.44, "total_tokens": 159872}
{"current_steps": 70, "total_steps": 1044, "loss": 0.1418, "lr": 2.9668449510464707e-05, "epoch": 0.20086083213773315, "percentage": 6.7, "elapsed_time": "0:01:05", "remaining_time": "0:15:07", "throughput": 2866.12, "total_tokens": 186912}
{"current_steps": 80, "total_steps": 1044, "loss": 0.1336, "lr": 2.9567444768380745e-05, "epoch": 0.22955523672883787, "percentage": 7.66, "elapsed_time": "0:01:14", "remaining_time": "0:14:54", "throughput": 2879.15, "total_tokens": 213696}
{"current_steps": 90, "total_steps": 1044, "loss": 0.0996, "lr": 2.9453249887788343e-05, "epoch": 0.2582496413199426, "percentage": 8.62, "elapsed_time": "0:01:23", "remaining_time": "0:14:43", "throughput": 2889.53, "total_tokens": 240736}
{"current_steps": 100, "total_steps": 1044, "loss": 0.1102, "lr": 2.9325968266799934e-05, "epoch": 0.28694404591104733, "percentage": 9.58, "elapsed_time": "0:01:32", "remaining_time": "0:14:29", "throughput": 2895.23, "total_tokens": 266816}
{"current_steps": 110, "total_steps": 1044, "loss": 0.1443, "lr": 2.918571515295803e-05, "epoch": 0.31563845050215206, "percentage": 10.54, "elapsed_time": "0:01:41", "remaining_time": "0:14:18", "throughput": 2901.53, "total_tokens": 293504}
{"current_steps": 120, "total_steps": 1044, "loss": 0.1224, "lr": 2.9032617538884018e-05, "epoch": 0.34433285509325684, "percentage": 11.49, "elapsed_time": "0:01:50", "remaining_time": "0:14:07", "throughput": 2904.71, "total_tokens": 319872}
{"current_steps": 130, "total_steps": 1044, "loss": 0.1678, "lr": 2.8866814047292232e-05, "epoch": 0.37302725968436157, "percentage": 12.45, "elapsed_time": "0:01:59", "remaining_time": "0:13:58", "throughput": 2909.19, "total_tokens": 346816}
{"current_steps": 140, "total_steps": 1044, "loss": 0.1154, "lr": 2.8688454805473647e-05, "epoch": 0.4017216642754663, "percentage": 13.41, "elapsed_time": "0:02:08", "remaining_time": "0:13:49", "throughput": 2913.25, "total_tokens": 374304}
{"current_steps": 150, "total_steps": 1044, "loss": 0.1285, "lr": 2.84977013093626e-05, "epoch": 0.430416068866571, "percentage": 14.37, "elapsed_time": "0:02:17", "remaining_time": "0:13:40", "throughput": 2916.45, "total_tokens": 401568}
{"current_steps": 160, "total_steps": 1044, "loss": 0.1144, "lr": 2.8294726277309815e-05, "epoch": 0.45911047345767575, "percentage": 15.33, "elapsed_time": "0:02:26", "remaining_time": "0:13:31", "throughput": 2917.95, "total_tokens": 428384}
{"current_steps": 170, "total_steps": 1044, "loss": 0.0875, "lr": 2.8079713493694024e-05, "epoch": 0.4878048780487805, "percentage": 16.28, "elapsed_time": "0:02:35", "remaining_time": "0:13:21", "throughput": 2918.36, "total_tokens": 454880}
{"current_steps": 180, "total_steps": 1044, "loss": 0.151, "lr": 2.7852857642513838e-05, "epoch": 0.5164992826398852, "percentage": 17.24, "elapsed_time": "0:02:45", "remaining_time": "0:13:12", "throughput": 2920.04, "total_tokens": 481856}
{"current_steps": 190, "total_steps": 1044, "loss": 0.1153, "lr": 2.7614364131110498e-05, "epoch": 0.5451936872309899, "percentage": 18.2, "elapsed_time": "0:02:54", "remaining_time": "0:13:02", "throughput": 2920.27, "total_tokens": 508192}
{"current_steps": 200, "total_steps": 1044, "loss": 0.0971, "lr": 2.7364448904181152e-05, "epoch": 0.5738880918220947, "percentage": 19.16, "elapsed_time": "0:03:02", "remaining_time": "0:12:52", "throughput": 2919.37, "total_tokens": 534240}
{"current_steps": 210, "total_steps": 1044, "loss": 0.1035, "lr": 2.7103338248251055e-05, "epoch": 0.6025824964131994, "percentage": 20.11, "elapsed_time": "0:03:12", "remaining_time": "0:12:43", "throughput": 2919.77, "total_tokens": 561664}
{"current_steps": 220, "total_steps": 1044, "loss": 0.0908, "lr": 2.6831268586781746e-05, "epoch": 0.6312769010043041, "percentage": 21.07, "elapsed_time": "0:03:21", "remaining_time": "0:12:35", "throughput": 2917.05, "total_tokens": 588128}
{"current_steps": 230, "total_steps": 1044, "loss": 0.1216, "lr": 2.6548486266100645e-05, "epoch": 0.6599713055954088, "percentage": 22.03, "elapsed_time": "0:03:30", "remaining_time": "0:12:26", "throughput": 2913.81, "total_tokens": 614432}
{"current_steps": 240, "total_steps": 1044, "loss": 0.0952, "lr": 2.6255247332346036e-05, "epoch": 0.6886657101865137, "percentage": 22.99, "elapsed_time": "0:03:40", "remaining_time": "0:12:17", "throughput": 2911.39, "total_tokens": 640832}
{"current_steps": 250, "total_steps": 1044, "loss": 0.1073, "lr": 2.5951817299629266e-05, "epoch": 0.7173601147776184, "percentage": 23.95, "elapsed_time": "0:03:49", "remaining_time": "0:12:08", "throughput": 2912.33, "total_tokens": 668448}
{"current_steps": 260, "total_steps": 1044, "loss": 0.1048, "lr": 2.5638470909624166e-05, "epoch": 0.7460545193687231, "percentage": 24.9, "elapsed_time": "0:03:58", "remaining_time": "0:11:59", "throughput": 2912.19, "total_tokens": 695008}
{"current_steps": 270, "total_steps": 1044, "loss": 0.1015, "lr": 2.531549188280135e-05, "epoch": 0.7747489239598279, "percentage": 25.86, "elapsed_time": "0:04:07", "remaining_time": "0:11:50", "throughput": 2913.22, "total_tokens": 722208}
{"current_steps": 280, "total_steps": 1044, "loss": 0.0815, "lr": 2.498317266153262e-05, "epoch": 0.8034433285509326, "percentage": 26.82, "elapsed_time": "0:04:17", "remaining_time": "0:11:41", "throughput": 2913.78, "total_tokens": 749216}
{"current_steps": 290, "total_steps": 1044, "loss": 0.0843, "lr": 2.464181414529809e-05, "epoch": 0.8321377331420373, "percentage": 27.78, "elapsed_time": "0:04:26", "remaining_time": "0:11:32", "throughput": 2914.27, "total_tokens": 776000}
{"current_steps": 300, "total_steps": 1044, "loss": 0.1321, "lr": 2.4291725418235848e-05, "epoch": 0.860832137733142, "percentage": 28.74, "elapsed_time": "0:04:35", "remaining_time": "0:11:22", "throughput": 2914.48, "total_tokens": 802560}
{"current_steps": 310, "total_steps": 1044, "loss": 0.0868, "lr": 2.3933223469280704e-05, "epoch": 0.8895265423242468, "percentage": 29.69, "elapsed_time": "0:04:44", "remaining_time": "0:11:13", "throughput": 2914.26, "total_tokens": 828704}
{"current_steps": 320, "total_steps": 1044, "loss": 0.1012, "lr": 2.3566632905145604e-05, "epoch": 0.9182209469153515, "percentage": 30.65, "elapsed_time": "0:04:53", "remaining_time": "0:11:04", "throughput": 2914.84, "total_tokens": 855616}
{"current_steps": 330, "total_steps": 1044, "loss": 0.1139, "lr": 2.3192285656405456e-05, "epoch": 0.9469153515064562, "percentage": 31.61, "elapsed_time": "0:05:02", "remaining_time": "0:10:54", "throughput": 2915.2, "total_tokens": 882400}
{"current_steps": 340, "total_steps": 1044, "loss": 0.1004, "lr": 2.2810520676949537e-05, "epoch": 0.975609756097561, "percentage": 32.57, "elapsed_time": "0:05:11", "remaining_time": "0:10:45", "throughput": 2916.17, "total_tokens": 909760}
{"current_steps": 350, "total_steps": 1044, "loss": 0.1131, "lr": 2.2421683637074648e-05, "epoch": 1.0028694404591105, "percentage": 33.52, "elapsed_time": "0:05:20", "remaining_time": "0:10:35", "throughput": 2915.36, "total_tokens": 934112}
{"current_steps": 360, "total_steps": 1044, "loss": 0.0896, "lr": 2.2026126610496852e-05, "epoch": 1.0315638450502151, "percentage": 34.48, "elapsed_time": "0:05:29", "remaining_time": "0:10:25", "throughput": 2914.97, "total_tokens": 960320}
{"current_steps": 370, "total_steps": 1044, "loss": 0.0629, "lr": 2.1624207755565232e-05, "epoch": 1.06025824964132, "percentage": 35.44, "elapsed_time": "0:05:38", "remaining_time": "0:10:16", "throughput": 2914.99, "total_tokens": 986688}
{"current_steps": 380, "total_steps": 1044, "loss": 0.0658, "lr": 2.121629099096628e-05, "epoch": 1.0889526542324246, "percentage": 36.4, "elapsed_time": "0:05:47", "remaining_time": "0:10:07", "throughput": 2914.65, "total_tokens": 1013088}
{"current_steps": 390, "total_steps": 1044, "loss": 0.0777, "lr": 2.0802745666212592e-05, "epoch": 1.1176470588235294, "percentage": 37.36, "elapsed_time": "0:05:56", "remaining_time": "0:09:58", "throughput": 2914.48, "total_tokens": 1039808}
{"current_steps": 400, "total_steps": 1044, "loss": 0.0908, "lr": 2.0383946227214188e-05, "epoch": 1.146341463414634, "percentage": 38.31, "elapsed_time": "0:06:05", "remaining_time": "0:09:49", "throughput": 2913.58, "total_tokens": 1066144}
{"current_steps": 410, "total_steps": 1044, "loss": 0.0731, "lr": 1.9960271877235306e-05, "epoch": 1.175035868005739, "percentage": 39.27, "elapsed_time": "0:06:15", "remaining_time": "0:09:40", "throughput": 2912.62, "total_tokens": 1092576}
{"current_steps": 420, "total_steps": 1044, "loss": 0.0661, "lr": 1.953210623354359e-05, "epoch": 1.2037302725968435, "percentage": 40.23, "elapsed_time": "0:06:24", "remaining_time": "0:09:31", "throughput": 2911.15, "total_tokens": 1119104}
{"current_steps": 430, "total_steps": 1044, "loss": 0.0391, "lr": 1.909983698006266e-05, "epoch": 1.2324246771879483, "percentage": 41.19, "elapsed_time": "0:06:33", "remaining_time": "0:09:22", "throughput": 2909.7, "total_tokens": 1145376}
{"current_steps": 440, "total_steps": 1044, "loss": 0.0594, "lr": 1.8663855516342468e-05, "epoch": 1.2611190817790532, "percentage": 42.15, "elapsed_time": "0:06:43", "remaining_time": "0:09:13", "throughput": 2909.62, "total_tokens": 1172736}
{"current_steps": 450, "total_steps": 1044, "loss": 0.0492, "lr": 1.8224556603165363e-05, "epoch": 1.2898134863701578, "percentage": 43.1, "elapsed_time": "0:06:52", "remaining_time": "0:09:04", "throughput": 2909.1, "total_tokens": 1199296}
{"current_steps": 460, "total_steps": 1044, "loss": 0.0465, "lr": 1.7782338005108694e-05, "epoch": 1.3185078909612624, "percentage": 44.06, "elapsed_time": "0:07:01", "remaining_time": "0:08:55", "throughput": 2908.83, "total_tokens": 1226368}
{"current_steps": 470, "total_steps": 1044, "loss": 0.1173, "lr": 1.733760013038765e-05, "epoch": 1.3472022955523673, "percentage": 45.02, "elapsed_time": "0:07:10", "remaining_time": "0:08:45", "throughput": 2908.02, "total_tokens": 1252352}
{"current_steps": 480, "total_steps": 1044, "loss": 0.0721, "lr": 1.689074566830434e-05, "epoch": 1.375896700143472, "percentage": 45.98, "elapsed_time": "0:07:19", "remaining_time": "0:08:36", "throughput": 2907.48, "total_tokens": 1278912}
{"current_steps": 490, "total_steps": 1044, "loss": 0.061, "lr": 1.6442179224631558e-05, "epoch": 1.4045911047345767, "percentage": 46.93, "elapsed_time": "0:07:29", "remaining_time": "0:08:27", "throughput": 2907.21, "total_tokens": 1306176}
{"current_steps": 500, "total_steps": 1044, "loss": 0.0599, "lr": 1.5992306955261175e-05, "epoch": 1.4332855093256816, "percentage": 47.89, "elapsed_time": "0:07:38", "remaining_time": "0:08:18", "throughput": 2906.43, "total_tokens": 1332608}
{"current_steps": 510, "total_steps": 1044, "loss": 0.0721, "lr": 1.5541536198449044e-05, "epoch": 1.4619799139167862, "percentage": 48.85, "elapsed_time": "0:07:47", "remaining_time": "0:08:09", "throughput": 2905.58, "total_tokens": 1359520}
{"current_steps": 520, "total_steps": 1044, "loss": 0.0595, "lr": 1.5090275105989284e-05, "epoch": 1.490674318507891, "percentage": 49.81, "elapsed_time": "0:07:57", "remaining_time": "0:08:01", "throughput": 2904.37, "total_tokens": 1386368}
{"current_steps": 530, "total_steps": 1044, "loss": 0.0519, "lr": 1.463893227365195e-05, "epoch": 1.5193687230989958, "percentage": 50.77, "elapsed_time": "0:08:06", "remaining_time": "0:07:51", "throughput": 2903.71, "total_tokens": 1412992}
{"current_steps": 540, "total_steps": 1044, "loss": 0.0752, "lr": 1.4187916371218739e-05, "epoch": 1.5480631276901005, "percentage": 51.72, "elapsed_time": "0:08:15", "remaining_time": "0:07:42", "throughput": 2903.58, "total_tokens": 1439616}
{"current_steps": 550, "total_steps": 1044, "loss": 0.0895, "lr": 1.3737635772451642e-05, "epoch": 1.576757532281205, "percentage": 52.68, "elapsed_time": "0:08:25", "remaining_time": "0:07:33", "throughput": 2903.36, "total_tokens": 1466464}
{"current_steps": 560, "total_steps": 1044, "loss": 0.1118, "lr": 1.328849818532963e-05, "epoch": 1.60545193687231, "percentage": 53.64, "elapsed_time": "0:08:34", "remaining_time": "0:07:24", "throughput": 2903.32, "total_tokens": 1493440}
{"current_steps": 570, "total_steps": 1044, "loss": 0.0917, "lr": 1.2840910282888211e-05, "epoch": 1.6341463414634148, "percentage": 54.6, "elapsed_time": "0:08:43", "remaining_time": "0:07:15", "throughput": 2903.26, "total_tokens": 1520256}
{"current_steps": 580, "total_steps": 1044, "loss": 0.0797, "lr": 1.2395277334996045e-05, "epoch": 1.6628407460545194, "percentage": 55.56, "elapsed_time": "0:08:53", "remaining_time": "0:07:06", "throughput": 2903.26, "total_tokens": 1547712}
{"current_steps": 590, "total_steps": 1044, "loss": 0.0609, "lr": 1.1952002841402057e-05, "epoch": 1.691535150645624, "percentage": 56.51, "elapsed_time": "0:09:02", "remaining_time": "0:06:57", "throughput": 2903.13, "total_tokens": 1574528}
{"current_steps": 600, "total_steps": 1044, "loss": 0.0602, "lr": 1.1511488166385349e-05, "epoch": 1.7202295552367288, "percentage": 57.47, "elapsed_time": "0:09:11", "remaining_time": "0:06:48", "throughput": 2902.48, "total_tokens": 1600768}
{"current_steps": 610, "total_steps": 1044, "loss": 0.0714, "lr": 1.107413217533863e-05, "epoch": 1.7489239598278337, "percentage": 58.43, "elapsed_time": "0:09:20", "remaining_time": "0:06:39", "throughput": 2902.3, "total_tokens": 1627744}
{"current_steps": 620, "total_steps": 1044, "loss": 0.0559, "lr": 1.0640330873614336e-05, "epoch": 1.7776183644189383, "percentage": 59.39, "elapsed_time": "0:09:30", "remaining_time": "0:06:29", "throughput": 2902.19, "total_tokens": 1654784}
{"current_steps": 630, "total_steps": 1044, "loss": 0.1277, "lr": 1.0210477047960303e-05, "epoch": 1.806312769010043, "percentage": 60.34, "elapsed_time": "0:09:39", "remaining_time": "0:06:20", "throughput": 2901.29, "total_tokens": 1680928}
{"current_steps": 640, "total_steps": 1044, "loss": 0.0817, "lr": 9.78495991086979e-06, "epoch": 1.8350071736011477, "percentage": 61.3, "elapsed_time": "0:09:48", "remaining_time": "0:06:11", "throughput": 2900.05, "total_tokens": 1707232}
{"current_steps": 650, "total_steps": 1044, "loss": 0.073, "lr": 9.364164748167806e-06, "epoch": 1.8637015781922526, "percentage": 62.26, "elapsed_time": "0:09:57", "remaining_time": "0:06:02", "throughput": 2899.16, "total_tokens": 1733568}
{"current_steps": 660, "total_steps": 1044, "loss": 0.0637, "lr": 8.948472570152874e-06, "epoch": 1.8923959827833574, "percentage": 63.22, "elapsed_time": "0:10:07", "remaining_time": "0:05:53", "throughput": 2898.71, "total_tokens": 1760608}
{"current_steps": 670, "total_steps": 1044, "loss": 0.0512, "lr": 8.538259766610019e-06, "epoch": 1.921090387374462, "percentage": 64.18, "elapsed_time": "0:10:16", "remaining_time": "0:05:44", "throughput": 2898.43, "total_tokens": 1788064}
{"current_steps": 680, "total_steps": 1044, "loss": 0.0608, "lr": 8.133897766007499e-06, "epoch": 1.9497847919655666, "percentage": 65.13, "elapsed_time": "0:10:26", "remaining_time": "0:05:35", "throughput": 2897.74, "total_tokens": 1814688}
{"current_steps": 690, "total_steps": 1044, "loss": 0.0596, "lr": 7.735752699185711e-06, "epoch": 1.9784791965566715, "percentage": 66.09, "elapsed_time": "0:10:35", "remaining_time": "0:05:26", "throughput": 2896.83, "total_tokens": 1841056}
{"current_steps": 700, "total_steps": 1044, "loss": 0.059, "lr": 7.344185067842878e-06, "epoch": 2.005738880918221, "percentage": 67.05, "elapsed_time": "0:10:44", "remaining_time": "0:05:16", "throughput": 2895.85, "total_tokens": 1865472}
{"current_steps": 710, "total_steps": 1044, "loss": 0.0411, "lr": 6.959549418117669e-06, "epoch": 2.034433285509326, "percentage": 68.01, "elapsed_time": "0:10:53", "remaining_time": "0:05:07", "throughput": 2895.61, "total_tokens": 1891968}
{"current_steps": 720, "total_steps": 1044, "loss": 0.0296, "lr": 6.582194019564266e-06, "epoch": 2.0631276901004303, "percentage": 68.97, "elapsed_time": "0:11:02", "remaining_time": "0:04:58", "throughput": 2895.66, "total_tokens": 1918912}
{"current_steps": 730, "total_steps": 1044, "loss": 0.0433, "lr": 6.2124605498106336e-06, "epoch": 2.091822094691535, "percentage": 69.92, "elapsed_time": "0:11:11", "remaining_time": "0:04:48", "throughput": 2895.56, "total_tokens": 1945312}
{"current_steps": 740, "total_steps": 1044, "loss": 0.0283, "lr": 5.850683785185409e-06, "epoch": 2.12051649928264, "percentage": 70.88, "elapsed_time": "0:11:20", "remaining_time": "0:04:39", "throughput": 2895.45, "total_tokens": 1971520}
{"current_steps": 750, "total_steps": 1044, "loss": 0.0419, "lr": 5.497191297593647e-06, "epoch": 2.149210903873745, "percentage": 71.84, "elapsed_time": "0:11:30", "remaining_time": "0:04:30", "throughput": 2895.61, "total_tokens": 1998528}
{"current_steps": 760, "total_steps": 1044, "loss": 0.0601, "lr": 5.1523031579157994e-06, "epoch": 2.177905308464849, "percentage": 72.8, "elapsed_time": "0:11:39", "remaining_time": "0:04:21", "throughput": 2895.39, "total_tokens": 2025088}
{"current_steps": 770, "total_steps": 1044, "loss": 0.0368, "lr": 4.816331646198556e-06, "epoch": 2.206599713055954, "percentage": 73.75, "elapsed_time": "0:11:48", "remaining_time": "0:04:12", "throughput": 2895.28, "total_tokens": 2052032}
{"current_steps": 780, "total_steps": 1044, "loss": 0.0474, "lr": 4.4895809688998655e-06, "epoch": 2.235294117647059, "percentage": 74.71, "elapsed_time": "0:11:58", "remaining_time": "0:04:03", "throughput": 2894.91, "total_tokens": 2079104}
{"current_steps": 790, "total_steps": 1044, "loss": 0.0498, "lr": 4.172346983444269e-06, "epoch": 2.2639885222381637, "percentage": 75.67, "elapsed_time": "0:12:07", "remaining_time": "0:03:53", "throughput": 2894.49, "total_tokens": 2106496}
{"current_steps": 800, "total_steps": 1044, "loss": 0.0389, "lr": 3.864916930337852e-06, "epoch": 2.292682926829268, "percentage": 76.63, "elapsed_time": "0:12:17", "remaining_time": "0:03:44", "throughput": 2894.21, "total_tokens": 2133248}
{"current_steps": 810, "total_steps": 1044, "loss": 0.0275, "lr": 3.567569173085455e-06, "epoch": 2.321377331420373, "percentage": 77.59, "elapsed_time": "0:12:26", "remaining_time": "0:03:35", "throughput": 2894.2, "total_tokens": 2159840}
{"current_steps": 820, "total_steps": 1044, "loss": 0.0269, "lr": 3.2805729461455307e-06, "epoch": 2.350071736011478, "percentage": 78.54, "elapsed_time": "0:12:35", "remaining_time": "0:03:26", "throughput": 2894.51, "total_tokens": 2186528}
{"current_steps": 830, "total_steps": 1044, "loss": 0.0586, "lr": 3.0041881111509783e-06, "epoch": 2.3787661406025826, "percentage": 79.5, "elapsed_time": "0:12:44", "remaining_time": "0:03:17", "throughput": 2894.84, "total_tokens": 2213344}
{"current_steps": 840, "total_steps": 1044, "loss": 0.046, "lr": 2.7386649216166233e-06, "epoch": 2.407460545193687, "percentage": 80.46, "elapsed_time": "0:12:53", "remaining_time": "0:03:07", "throughput": 2894.88, "total_tokens": 2239648}
{"current_steps": 850, "total_steps": 1044, "loss": 0.0414, "lr": 2.484243796346367e-06, "epoch": 2.436154949784792, "percentage": 81.42, "elapsed_time": "0:13:02", "remaining_time": "0:02:58", "throughput": 2895.33, "total_tokens": 2267008}
{"current_steps": 860, "total_steps": 1044, "loss": 0.0375, "lr": 2.241155101745242e-06, "epoch": 2.4648493543758967, "percentage": 82.38, "elapsed_time": "0:13:12", "remaining_time": "0:02:49", "throughput": 2895.48, "total_tokens": 2293664}
{"current_steps": 870, "total_steps": 1044, "loss": 0.0541, "lr": 2.0096189432334194e-06, "epoch": 2.4935437589670015, "percentage": 83.33, "elapsed_time": "0:13:21", "remaining_time": "0:02:40", "throughput": 2895.65, "total_tokens": 2320032}
{"current_steps": 880, "total_steps": 1044, "loss": 0.0324, "lr": 1.7898449659510841e-06, "epoch": 2.5222381635581064, "percentage": 84.29, "elapsed_time": "0:13:30", "remaining_time": "0:02:31", "throughput": 2895.8, "total_tokens": 2346848}
{"current_steps": 890, "total_steps": 1044, "loss": 0.0704, "lr": 1.5820321649345582e-06, "epoch": 2.5509325681492108, "percentage": 85.25, "elapsed_time": "0:13:39", "remaining_time": "0:02:21", "throughput": 2895.58, "total_tokens": 2373408}
{"current_steps": 900, "total_steps": 1044, "loss": 0.0376, "lr": 1.3863687049356465e-06, "epoch": 2.5796269727403156, "percentage": 86.21, "elapsed_time": "0:13:48", "remaining_time": "0:02:12", "throughput": 2895.37, "total_tokens": 2400192}
{"current_steps": 910, "total_steps": 1044, "loss": 0.0377, "lr": 1.2030317500472572e-06, "epoch": 2.6083213773314204, "percentage": 87.16, "elapsed_time": "0:13:58", "remaining_time": "0:02:03", "throughput": 2894.87, "total_tokens": 2426720}
{"current_steps": 920, "total_steps": 1044, "loss": 0.0459, "lr": 1.0321873032896328e-06, "epoch": 2.637015781922525, "percentage": 88.12, "elapsed_time": "0:14:07", "remaining_time": "0:01:54", "throughput": 2894.63, "total_tokens": 2453504}
{"current_steps": 930, "total_steps": 1044, "loss": 0.0352, "lr": 8.7399005630238e-07, "epoch": 2.6657101865136297, "percentage": 89.08, "elapsed_time": "0:14:16", "remaining_time": "0:01:45", "throughput": 2894.4, "total_tokens": 2480064}
{"current_steps": 940, "total_steps": 1044, "loss": 0.0308, "lr": 7.285832492784456e-07, "epoch": 2.6944045911047345, "percentage": 90.04, "elapsed_time": "0:14:26", "remaining_time": "0:01:35", "throughput": 2894.92, "total_tokens": 2507904}
{"current_steps": 950, "total_steps": 1044, "loss": 0.0359, "lr": 5.960985412668457e-07, "epoch": 2.7230989956958394, "percentage": 91.0, "elapsed_time": "0:14:35", "remaining_time": "0:01:26", "throughput": 2894.9, "total_tokens": 2534624}
{"current_steps": 960, "total_steps": 1044, "loss": 0.0251, "lr": 4.766558909615504e-07, "epoch": 2.751793400286944, "percentage": 91.95, "elapsed_time": "0:14:44", "remaining_time": "0:01:17", "throughput": 2894.74, "total_tokens": 2560384}
{"current_steps": 970, "total_steps": 1044, "loss": 0.0278, "lr": 3.703634480845175e-07, "epoch": 2.7804878048780486, "percentage": 92.91, "elapsed_time": "0:14:53", "remaining_time": "0:01:08", "throughput": 2894.73, "total_tokens": 2586976}
{"current_steps": 980, "total_steps": 1044, "loss": 0.0337, "lr": 2.7731745546118295e-07, "epoch": 2.8091822094691534, "percentage": 93.87, "elapsed_time": "0:15:02", "remaining_time": "0:00:58", "throughput": 2894.66, "total_tokens": 2613568}
{"current_steps": 990, "total_steps": 1044, "loss": 0.0418, "lr": 1.9760216187710788e-07, "epoch": 2.8378766140602583, "percentage": 94.83, "elapsed_time": "0:15:12", "remaining_time": "0:00:49", "throughput": 2894.68, "total_tokens": 2640128}
{"current_steps": 1000, "total_steps": 1044, "loss": 0.042, "lr": 1.3128974579462771e-07, "epoch": 2.866571018651363, "percentage": 95.79, "elapsed_time": "0:15:21", "remaining_time": "0:00:40", "throughput": 2894.81, "total_tokens": 2666880}
{"current_steps": 1010, "total_steps": 1044, "loss": 0.043, "lr": 7.844024999865806e-08, "epoch": 2.895265423242468, "percentage": 96.74, "elapsed_time": "0:15:31", "remaining_time": "0:00:31", "throughput": 2892.97, "total_tokens": 2694272}
{"current_steps": 1020, "total_steps": 1044, "loss": 0.0282, "lr": 3.910152723075322e-08, "epoch": 2.9239598278335723, "percentage": 97.7, "elapsed_time": "0:15:40", "remaining_time": "0:00:22", "throughput": 2892.54, "total_tokens": 2721024}
{"current_steps": 1030, "total_steps": 1044, "loss": 0.049, "lr": 1.3309196860701867e-08, "epoch": 2.952654232424677, "percentage": 98.66, "elapsed_time": "0:15:49", "remaining_time": "0:00:12", "throughput": 2891.88, "total_tokens": 2746976}
{"current_steps": 1040, "total_steps": 1044, "loss": 0.0376, "lr": 1.0866126348507699e-09, "epoch": 2.981348637015782, "percentage": 99.62, "elapsed_time": "0:15:59", "remaining_time": "0:00:03", "throughput": 2890.86, "total_tokens": 2773152}
{"current_steps": 1044, "total_steps": 1044, "epoch": 2.9928263988522237, "percentage": 100.0, "elapsed_time": "0:16:03", "remaining_time": "0:00:00", "throughput": 2888.84, "total_tokens": 2783872}