Modelo-treinado / trainer_log.jsonl
alexiaassis's picture
Upload folder using huggingface_hub
6ce1151 verified
{"current_steps": 10, "total_steps": 1395, "loss": 0.8869, "lr": 2.9996919063517692e-05, "epoch": 0.021528525296017224, "percentage": 0.72, "elapsed_time": "0:00:21", "remaining_time": "0:49:04", "throughput": 1715.83, "total_tokens": 36480}
{"current_steps": 20, "total_steps": 1395, "loss": 0.199, "lr": 2.9986270537572827e-05, "epoch": 0.04305705059203445, "percentage": 1.43, "elapsed_time": "0:00:33", "remaining_time": "0:38:50", "throughput": 2143.92, "total_tokens": 72672}
{"current_steps": 30, "total_steps": 1395, "loss": 0.1809, "lr": 2.9968021784983436e-05, "epoch": 0.06458557588805167, "percentage": 2.15, "elapsed_time": "0:00:47", "remaining_time": "0:35:47", "throughput": 2319.02, "total_tokens": 109472}
{"current_steps": 40, "total_steps": 1395, "loss": 0.1766, "lr": 2.994218206053077e-05, "epoch": 0.0861141011840689, "percentage": 2.87, "elapsed_time": "0:01:00", "remaining_time": "0:33:52", "throughput": 2443.83, "total_tokens": 146656}
{"current_steps": 50, "total_steps": 1395, "loss": 0.1784, "lr": 2.9908764468726846e-05, "epoch": 0.10764262648008611, "percentage": 3.58, "elapsed_time": "0:01:12", "remaining_time": "0:32:35", "throughput": 2519.74, "total_tokens": 183200}
{"current_steps": 60, "total_steps": 1395, "loss": 0.165, "lr": 2.986778595716853e-05, "epoch": 0.12917115177610333, "percentage": 4.3, "elapsed_time": "0:01:25", "remaining_time": "0:31:43", "throughput": 2576.27, "total_tokens": 220352}
{"current_steps": 70, "total_steps": 1395, "loss": 0.1884, "lr": 2.981926730794264e-05, "epoch": 0.15069967707212056, "percentage": 5.02, "elapsed_time": "0:01:38", "remaining_time": "0:31:00", "throughput": 2613.67, "total_tokens": 256864}
{"current_steps": 80, "total_steps": 1395, "loss": 0.1668, "lr": 2.9763233127086394e-05, "epoch": 0.1722282023681378, "percentage": 5.73, "elapsed_time": "0:01:50", "remaining_time": "0:30:23", "throughput": 2644.76, "total_tokens": 293408}
{"current_steps": 90, "total_steps": 1395, "loss": 0.1538, "lr": 2.9699711832108555e-05, "epoch": 0.193756727664155, "percentage": 6.45, "elapsed_time": "0:02:03", "remaining_time": "0:29:48", "throughput": 2667.87, "total_tokens": 329152}
{"current_steps": 100, "total_steps": 1395, "loss": 0.1729, "lr": 2.9628735637577612e-05, "epoch": 0.21528525296017223, "percentage": 7.17, "elapsed_time": "0:02:16", "remaining_time": "0:29:21", "throughput": 2688.46, "total_tokens": 365632}
{"current_steps": 110, "total_steps": 1395, "loss": 0.1346, "lr": 2.955034053878428e-05, "epoch": 0.23681377825618946, "percentage": 7.89, "elapsed_time": "0:02:28", "remaining_time": "0:28:55", "throughput": 2704.64, "total_tokens": 401888}
{"current_steps": 120, "total_steps": 1395, "loss": 0.1622, "lr": 2.9464566293486653e-05, "epoch": 0.25834230355220666, "percentage": 8.6, "elapsed_time": "0:02:41", "remaining_time": "0:28:34", "throughput": 2719.24, "total_tokens": 438816}
{"current_steps": 130, "total_steps": 1395, "loss": 0.1357, "lr": 2.9371456401747193e-05, "epoch": 0.2798708288482239, "percentage": 9.32, "elapsed_time": "0:02:54", "remaining_time": "0:28:14", "throughput": 2731.14, "total_tokens": 475648}
{"current_steps": 140, "total_steps": 1395, "loss": 0.1537, "lr": 2.9271058083871864e-05, "epoch": 0.3013993541442411, "percentage": 10.04, "elapsed_time": "0:03:06", "remaining_time": "0:27:54", "throughput": 2741.53, "total_tokens": 512256}
{"current_steps": 150, "total_steps": 1395, "loss": 0.1183, "lr": 2.916342225646254e-05, "epoch": 0.32292787944025836, "percentage": 10.75, "elapsed_time": "0:03:19", "remaining_time": "0:27:36", "throughput": 2750.14, "total_tokens": 548992}
{"current_steps": 160, "total_steps": 1395, "loss": 0.1336, "lr": 2.904860350659487e-05, "epoch": 0.3444564047362756, "percentage": 11.47, "elapsed_time": "0:03:32", "remaining_time": "0:27:19", "throughput": 2757.92, "total_tokens": 585792}
{"current_steps": 170, "total_steps": 1395, "loss": 0.1661, "lr": 2.8926660064134637e-05, "epoch": 0.36598493003229277, "percentage": 12.19, "elapsed_time": "0:03:45", "remaining_time": "0:27:01", "throughput": 2763.58, "total_tokens": 621952}
{"current_steps": 180, "total_steps": 1395, "loss": 0.1405, "lr": 2.8797653772206726e-05, "epoch": 0.38751345532831, "percentage": 12.9, "elapsed_time": "0:03:57", "remaining_time": "0:26:44", "throughput": 2769.62, "total_tokens": 658240}
{"current_steps": 190, "total_steps": 1395, "loss": 0.1456, "lr": 2.8661650055831645e-05, "epoch": 0.40904198062432723, "percentage": 13.62, "elapsed_time": "0:04:10", "remaining_time": "0:26:27", "throughput": 2776.17, "total_tokens": 694880}
{"current_steps": 200, "total_steps": 1395, "loss": 0.1158, "lr": 2.8518717888745486e-05, "epoch": 0.43057050592034446, "percentage": 14.34, "elapsed_time": "0:04:23", "remaining_time": "0:26:12", "throughput": 2781.89, "total_tokens": 731968}
{"current_steps": 210, "total_steps": 1395, "loss": 0.1293, "lr": 2.836892975842015e-05, "epoch": 0.4520990312163617, "percentage": 15.05, "elapsed_time": "0:04:35", "remaining_time": "0:25:55", "throughput": 2786.89, "total_tokens": 768416}
{"current_steps": 220, "total_steps": 1395, "loss": 0.1456, "lr": 2.8212361629301655e-05, "epoch": 0.4736275565123789, "percentage": 15.77, "elapsed_time": "0:04:48", "remaining_time": "0:25:40", "throughput": 2791.68, "total_tokens": 805376}
{"current_steps": 230, "total_steps": 1395, "loss": 0.1428, "lr": 2.8049092904285033e-05, "epoch": 0.4951560818083961, "percentage": 16.49, "elapsed_time": "0:05:01", "remaining_time": "0:25:24", "throughput": 2795.54, "total_tokens": 841472}
{"current_steps": 240, "total_steps": 1395, "loss": 0.1334, "lr": 2.7879206384445526e-05, "epoch": 0.5166846071044133, "percentage": 17.2, "elapsed_time": "0:05:13", "remaining_time": "0:25:09", "throughput": 2800.11, "total_tokens": 878464}
{"current_steps": 250, "total_steps": 1395, "loss": 0.1414, "lr": 2.7702788227046286e-05, "epoch": 0.5382131324004306, "percentage": 17.92, "elapsed_time": "0:05:26", "remaining_time": "0:24:53", "throughput": 2803.4, "total_tokens": 914432}
{"current_steps": 260, "total_steps": 1395, "loss": 0.1098, "lr": 2.751992790184413e-05, "epoch": 0.5597416576964478, "percentage": 18.64, "elapsed_time": "0:05:39", "remaining_time": "0:24:40", "throughput": 2808.2, "total_tokens": 952320}
{"current_steps": 270, "total_steps": 1395, "loss": 0.1643, "lr": 2.733071814571527e-05, "epoch": 0.581270182992465, "percentage": 19.35, "elapsed_time": "0:05:51", "remaining_time": "0:24:25", "throughput": 2810.83, "total_tokens": 988800}
{"current_steps": 280, "total_steps": 1395, "loss": 0.1222, "lr": 2.7135254915624213e-05, "epoch": 0.6027987082884823, "percentage": 20.07, "elapsed_time": "0:06:04", "remaining_time": "0:24:11", "throughput": 2813.81, "total_tokens": 1025952}
{"current_steps": 290, "total_steps": 1395, "loss": 0.1489, "lr": 2.693363733995954e-05, "epoch": 0.6243272335844995, "percentage": 20.79, "elapsed_time": "0:06:17", "remaining_time": "0:23:58", "throughput": 2817.18, "total_tokens": 1063360}
{"current_steps": 300, "total_steps": 1395, "loss": 0.1207, "lr": 2.672596766826138e-05, "epoch": 0.6458557588805167, "percentage": 21.51, "elapsed_time": "0:06:30", "remaining_time": "0:23:43", "throughput": 2818.65, "total_tokens": 1099648}
{"current_steps": 310, "total_steps": 1395, "loss": 0.1319, "lr": 2.6512351219365927e-05, "epoch": 0.667384284176534, "percentage": 22.22, "elapsed_time": "0:06:42", "remaining_time": "0:23:29", "throughput": 2819.84, "total_tokens": 1135712}
{"current_steps": 320, "total_steps": 1395, "loss": 0.1453, "lr": 2.6292896327993467e-05, "epoch": 0.6889128094725512, "percentage": 22.94, "elapsed_time": "0:06:55", "remaining_time": "0:23:15", "throughput": 2822.06, "total_tokens": 1172384}
{"current_steps": 330, "total_steps": 1395, "loss": 0.1202, "lr": 2.606771428980681e-05, "epoch": 0.7104413347685683, "percentage": 23.66, "elapsed_time": "0:07:08", "remaining_time": "0:23:02", "throughput": 2824.06, "total_tokens": 1209440}
{"current_steps": 340, "total_steps": 1395, "loss": 0.1283, "lr": 2.5836919304968184e-05, "epoch": 0.7319698600645855, "percentage": 24.37, "elapsed_time": "0:07:20", "remaining_time": "0:22:48", "throughput": 2825.64, "total_tokens": 1245920}
{"current_steps": 350, "total_steps": 1395, "loss": 0.1374, "lr": 2.5600628420223057e-05, "epoch": 0.7534983853606028, "percentage": 25.09, "elapsed_time": "0:07:34", "remaining_time": "0:22:35", "throughput": 2828.62, "total_tokens": 1284288}
{"current_steps": 360, "total_steps": 1395, "loss": 0.1355, "lr": 2.535896146954037e-05, "epoch": 0.77502691065662, "percentage": 25.81, "elapsed_time": "0:07:46", "remaining_time": "0:22:21", "throughput": 2830.17, "total_tokens": 1320896}
{"current_steps": 370, "total_steps": 1395, "loss": 0.131, "lr": 2.5112041013339212e-05, "epoch": 0.7965554359526372, "percentage": 26.52, "elapsed_time": "0:07:59", "remaining_time": "0:22:09", "throughput": 2832.7, "total_tokens": 1359264}
{"current_steps": 380, "total_steps": 1395, "loss": 0.1034, "lr": 2.48599922763328e-05, "epoch": 0.8180839612486545, "percentage": 27.24, "elapsed_time": "0:08:12", "remaining_time": "0:21:55", "throughput": 2834.57, "total_tokens": 1396032}
{"current_steps": 390, "total_steps": 1395, "loss": 0.1465, "lr": 2.4602943084021286e-05, "epoch": 0.8396124865446717, "percentage": 27.96, "elapsed_time": "0:08:25", "remaining_time": "0:21:41", "throughput": 2835.08, "total_tokens": 1432224}
{"current_steps": 400, "total_steps": 1395, "loss": 0.1049, "lr": 2.434102379786553e-05, "epoch": 0.8611410118406889, "percentage": 28.67, "elapsed_time": "0:08:37", "remaining_time": "0:21:27", "throughput": 2835.78, "total_tokens": 1468224}
{"current_steps": 410, "total_steps": 1395, "loss": 0.1024, "lr": 2.4074367249174864e-05, "epoch": 0.8826695371367062, "percentage": 29.39, "elapsed_time": "0:08:50", "remaining_time": "0:21:14", "throughput": 2837.37, "total_tokens": 1505088}
{"current_steps": 420, "total_steps": 1395, "loss": 0.1155, "lr": 2.380310867174219e-05, "epoch": 0.9041980624327234, "percentage": 30.11, "elapsed_time": "0:09:03", "remaining_time": "0:21:01", "throughput": 2838.63, "total_tokens": 1542048}
{"current_steps": 430, "total_steps": 1395, "loss": 0.117, "lr": 2.3527385633260724e-05, "epoch": 0.9257265877287406, "percentage": 30.82, "elapsed_time": "0:09:15", "remaining_time": "0:20:47", "throughput": 2840.25, "total_tokens": 1578496}
{"current_steps": 440, "total_steps": 1395, "loss": 0.1023, "lr": 2.3247337965557097e-05, "epoch": 0.9472551130247578, "percentage": 31.54, "elapsed_time": "0:09:28", "remaining_time": "0:20:32", "throughput": 2840.35, "total_tokens": 1613440}
{"current_steps": 450, "total_steps": 1395, "loss": 0.1046, "lr": 2.2963107693676192e-05, "epoch": 0.9687836383207751, "percentage": 32.26, "elapsed_time": "0:09:40", "remaining_time": "0:20:19", "throughput": 2840.69, "total_tokens": 1649760}
{"current_steps": 460, "total_steps": 1395, "loss": 0.1106, "lr": 2.2674838963853738e-05, "epoch": 0.9903121636167922, "percentage": 32.97, "elapsed_time": "0:09:53", "remaining_time": "0:20:06", "throughput": 2841.62, "total_tokens": 1686016}
{"current_steps": 470, "total_steps": 1395, "loss": 0.0859, "lr": 2.2382677970413097e-05, "epoch": 1.0107642626480087, "percentage": 33.69, "elapsed_time": "0:10:05", "remaining_time": "0:19:51", "throughput": 2842.72, "total_tokens": 1721152}
{"current_steps": 480, "total_steps": 1395, "loss": 0.1089, "lr": 2.2086772881623455e-05, "epoch": 1.0322927879440258, "percentage": 34.41, "elapsed_time": "0:10:18", "remaining_time": "0:19:38", "throughput": 2843.73, "total_tokens": 1757568}
{"current_steps": 490, "total_steps": 1395, "loss": 0.0852, "lr": 2.1787273764556827e-05, "epoch": 1.0538213132400431, "percentage": 35.13, "elapsed_time": "0:10:30", "remaining_time": "0:19:25", "throughput": 2845.12, "total_tokens": 1794848}
{"current_steps": 500, "total_steps": 1395, "loss": 0.0988, "lr": 2.1484332508982167e-05, "epoch": 1.0753498385360603, "percentage": 35.84, "elapsed_time": "0:10:43", "remaining_time": "0:19:12", "throughput": 2846.63, "total_tokens": 1832256}
{"current_steps": 510, "total_steps": 1395, "loss": 0.1001, "lr": 2.11781027503351e-05, "epoch": 1.0968783638320776, "percentage": 36.56, "elapsed_time": "0:10:56", "remaining_time": "0:18:58", "throughput": 2847.5, "total_tokens": 1868928}
{"current_steps": 520, "total_steps": 1395, "loss": 0.1107, "lr": 2.0868739791802312e-05, "epoch": 1.1184068891280947, "percentage": 37.28, "elapsed_time": "0:11:09", "remaining_time": "0:18:45", "throughput": 2848.51, "total_tokens": 1905824}
{"current_steps": 530, "total_steps": 1395, "loss": 0.0894, "lr": 2.0556400525560168e-05, "epoch": 1.1399354144241118, "percentage": 37.99, "elapsed_time": "0:11:21", "remaining_time": "0:18:32", "throughput": 2848.98, "total_tokens": 1941760}
{"current_steps": 540, "total_steps": 1395, "loss": 0.1016, "lr": 2.0241243353207484e-05, "epoch": 1.1614639397201292, "percentage": 38.71, "elapsed_time": "0:11:34", "remaining_time": "0:18:19", "throughput": 2849.25, "total_tokens": 1978208}
{"current_steps": 550, "total_steps": 1395, "loss": 0.0932, "lr": 1.9923428105432796e-05, "epoch": 1.1829924650161463, "percentage": 39.43, "elapsed_time": "0:11:47", "remaining_time": "0:18:06", "throughput": 2850.47, "total_tokens": 2015904}
{"current_steps": 560, "total_steps": 1395, "loss": 0.1056, "lr": 1.9603115960956852e-05, "epoch": 1.2045209903121636, "percentage": 40.14, "elapsed_time": "0:11:59", "remaining_time": "0:17:53", "throughput": 2851.01, "total_tokens": 2052672}
{"current_steps": 570, "total_steps": 1395, "loss": 0.1033, "lr": 1.9280469364791505e-05, "epoch": 1.2260495156081808, "percentage": 40.86, "elapsed_time": "0:12:12", "remaining_time": "0:17:40", "throughput": 2851.17, "total_tokens": 2088736}
{"current_steps": 580, "total_steps": 1395, "loss": 0.1035, "lr": 1.8955651945856354e-05, "epoch": 1.247578040904198, "percentage": 41.58, "elapsed_time": "0:12:25", "remaining_time": "0:17:27", "throughput": 2851.42, "total_tokens": 2125600}
{"current_steps": 590, "total_steps": 1395, "loss": 0.0811, "lr": 1.8628828433995013e-05, "epoch": 1.2691065662002152, "percentage": 42.29, "elapsed_time": "0:12:38", "remaining_time": "0:17:14", "throughput": 2851.58, "total_tokens": 2161792}
{"current_steps": 600, "total_steps": 1395, "loss": 0.0888, "lr": 1.8300164576433035e-05, "epoch": 1.2906350914962326, "percentage": 43.01, "elapsed_time": "0:12:50", "remaining_time": "0:17:01", "throughput": 2851.53, "total_tokens": 2197920}
{"current_steps": 610, "total_steps": 1395, "loss": 0.0968, "lr": 1.7969827053719862e-05, "epoch": 1.3121636167922497, "percentage": 43.73, "elapsed_time": "0:13:03", "remaining_time": "0:16:48", "throughput": 2852.2, "total_tokens": 2235168}
{"current_steps": 620, "total_steps": 1395, "loss": 0.1311, "lr": 1.7637983395197465e-05, "epoch": 1.333692142088267, "percentage": 44.44, "elapsed_time": "0:13:16", "remaining_time": "0:16:35", "throughput": 2852.02, "total_tokens": 2270816}
{"current_steps": 630, "total_steps": 1395, "loss": 0.1001, "lr": 1.7304801894038522e-05, "epoch": 1.3552206673842842, "percentage": 45.16, "elapsed_time": "0:13:29", "remaining_time": "0:16:22", "throughput": 2852.57, "total_tokens": 2308128}
{"current_steps": 640, "total_steps": 1395, "loss": 0.0887, "lr": 1.6970451521897198e-05, "epoch": 1.3767491926803013, "percentage": 45.88, "elapsed_time": "0:13:41", "remaining_time": "0:16:09", "throughput": 2852.79, "total_tokens": 2344992}
{"current_steps": 650, "total_steps": 1395, "loss": 0.1041, "lr": 1.663510184321587e-05, "epoch": 1.3982777179763186, "percentage": 46.59, "elapsed_time": "0:13:54", "remaining_time": "0:15:56", "throughput": 2852.95, "total_tokens": 2381056}
{"current_steps": 660, "total_steps": 1395, "loss": 0.1022, "lr": 1.6298922929231194e-05, "epoch": 1.419806243272336, "percentage": 47.31, "elapsed_time": "0:14:07", "remaining_time": "0:15:43", "throughput": 2852.78, "total_tokens": 2417152}
{"current_steps": 670, "total_steps": 1395, "loss": 0.0831, "lr": 1.5962085271723158e-05, "epoch": 1.441334768568353, "percentage": 48.03, "elapsed_time": "0:14:20", "remaining_time": "0:15:30", "throughput": 2853.25, "total_tokens": 2454048}
{"current_steps": 680, "total_steps": 1395, "loss": 0.1055, "lr": 1.5624759696550874e-05, "epoch": 1.4628632938643702, "percentage": 48.75, "elapsed_time": "0:14:33", "remaining_time": "0:15:18", "throughput": 2853.64, "total_tokens": 2491552}
{"current_steps": 690, "total_steps": 1395, "loss": 0.0812, "lr": 1.528711727701893e-05, "epoch": 1.4843918191603875, "percentage": 49.46, "elapsed_time": "0:14:45", "remaining_time": "0:15:05", "throughput": 2853.75, "total_tokens": 2528128}
{"current_steps": 700, "total_steps": 1395, "loss": 0.1016, "lr": 1.4949329247118258e-05, "epoch": 1.5059203444564049, "percentage": 50.18, "elapsed_time": "0:14:58", "remaining_time": "0:14:52", "throughput": 2854.04, "total_tokens": 2564448}
{"current_steps": 710, "total_steps": 1395, "loss": 0.0956, "lr": 1.4611566914685489e-05, "epoch": 1.527448869752422, "percentage": 50.9, "elapsed_time": "0:15:11", "remaining_time": "0:14:39", "throughput": 2854.16, "total_tokens": 2601056}
{"current_steps": 720, "total_steps": 1395, "loss": 0.0936, "lr": 1.4274001574524894e-05, "epoch": 1.5489773950484391, "percentage": 51.61, "elapsed_time": "0:15:24", "remaining_time": "0:14:26", "throughput": 2854.51, "total_tokens": 2637888}
{"current_steps": 730, "total_steps": 1395, "loss": 0.0728, "lr": 1.3936804421536906e-05, "epoch": 1.5705059203444565, "percentage": 52.33, "elapsed_time": "0:15:37", "remaining_time": "0:14:13", "throughput": 2852.88, "total_tokens": 2673312}
{"current_steps": 740, "total_steps": 1395, "loss": 0.0839, "lr": 1.3600146463897327e-05, "epoch": 1.5920344456404736, "percentage": 53.05, "elapsed_time": "0:15:49", "remaining_time": "0:14:00", "throughput": 2852.96, "total_tokens": 2709696}
{"current_steps": 750, "total_steps": 1395, "loss": 0.1308, "lr": 1.3264198436331193e-05, "epoch": 1.6135629709364907, "percentage": 53.76, "elapsed_time": "0:16:02", "remaining_time": "0:13:47", "throughput": 2853.61, "total_tokens": 2746528}
{"current_steps": 760, "total_steps": 1395, "loss": 0.0927, "lr": 1.2929130713525396e-05, "epoch": 1.635091496232508, "percentage": 54.48, "elapsed_time": "0:16:15", "remaining_time": "0:13:34", "throughput": 2853.76, "total_tokens": 2782656}
{"current_steps": 770, "total_steps": 1395, "loss": 0.1135, "lr": 1.2595113223723788e-05, "epoch": 1.6566200215285254, "percentage": 55.2, "elapsed_time": "0:16:27", "remaining_time": "0:13:21", "throughput": 2854.08, "total_tokens": 2819744}
{"current_steps": 780, "total_steps": 1395, "loss": 0.0813, "lr": 1.2262315362548825e-05, "epoch": 1.6781485468245425, "percentage": 55.91, "elapsed_time": "0:16:41", "remaining_time": "0:13:09", "throughput": 2854.35, "total_tokens": 2857216}
{"current_steps": 790, "total_steps": 1395, "loss": 0.0933, "lr": 1.1930905907093196e-05, "epoch": 1.6996770721205596, "percentage": 56.63, "elapsed_time": "0:16:53", "remaining_time": "0:12:56", "throughput": 2853.72, "total_tokens": 2892000}
{"current_steps": 800, "total_steps": 1395, "loss": 0.0881, "lr": 1.1601052930325236e-05, "epoch": 1.721205597416577, "percentage": 57.35, "elapsed_time": "0:17:06", "remaining_time": "0:12:43", "throughput": 2853.73, "total_tokens": 2928832}
{"current_steps": 810, "total_steps": 1395, "loss": 0.079, "lr": 1.127292371585141e-05, "epoch": 1.7427341227125943, "percentage": 58.06, "elapsed_time": "0:17:19", "remaining_time": "0:12:30", "throughput": 2853.98, "total_tokens": 2965632}
{"current_steps": 820, "total_steps": 1395, "loss": 0.1132, "lr": 1.0946684673079074e-05, "epoch": 1.7642626480086114, "percentage": 58.78, "elapsed_time": "0:17:31", "remaining_time": "0:12:17", "throughput": 2854.14, "total_tokens": 3002496}
{"current_steps": 830, "total_steps": 1395, "loss": 0.0793, "lr": 1.0622501252822628e-05, "epoch": 1.7857911733046286, "percentage": 59.5, "elapsed_time": "0:17:44", "remaining_time": "0:12:04", "throughput": 2854.34, "total_tokens": 3038944}
{"current_steps": 840, "total_steps": 1395, "loss": 0.0918, "lr": 1.0300537863395823e-05, "epoch": 1.807319698600646, "percentage": 60.22, "elapsed_time": "0:17:57", "remaining_time": "0:11:51", "throughput": 2854.57, "total_tokens": 3076032}
{"current_steps": 850, "total_steps": 1395, "loss": 0.093, "lr": 9.98095778723271e-06, "epoch": 1.8288482238966632, "percentage": 60.93, "elapsed_time": "0:18:10", "remaining_time": "0:11:39", "throughput": 2854.67, "total_tokens": 3112448}
{"current_steps": 860, "total_steps": 1395, "loss": 0.0973, "lr": 9.663923098079656e-06, "epoch": 1.8503767491926801, "percentage": 61.65, "elapsed_time": "0:18:23", "remaining_time": "0:11:26", "throughput": 2854.95, "total_tokens": 3149376}
{"current_steps": 870, "total_steps": 1395, "loss": 0.0995, "lr": 9.349594578800259e-06, "epoch": 1.8719052744886975, "percentage": 62.37, "elapsed_time": "0:18:36", "remaining_time": "0:11:13", "throughput": 2854.97, "total_tokens": 3186304}
{"current_steps": 880, "total_steps": 1395, "loss": 0.1238, "lr": 9.038131639834977e-06, "epoch": 1.8934337997847148, "percentage": 63.08, "elapsed_time": "0:18:48", "remaining_time": "0:11:00", "throughput": 2855.09, "total_tokens": 3222752}
{"current_steps": 890, "total_steps": 1395, "loss": 0.0912, "lr": 8.7296922383568e-06, "epoch": 1.914962325080732, "percentage": 63.8, "elapsed_time": "0:19:01", "remaining_time": "0:10:47", "throughput": 2854.97, "total_tokens": 3258976}
{"current_steps": 900, "total_steps": 1395, "loss": 0.0935, "lr": 8.424432798163838e-06, "epoch": 1.936490850376749, "percentage": 64.52, "elapsed_time": "0:19:14", "remaining_time": "0:10:34", "throughput": 2855.05, "total_tokens": 3295040}
{"current_steps": 910, "total_steps": 1395, "loss": 0.0722, "lr": 8.122508130349711e-06, "epoch": 1.9580193756727664, "percentage": 65.23, "elapsed_time": "0:19:26", "remaining_time": "0:10:21", "throughput": 2855.32, "total_tokens": 3331392}
{"current_steps": 920, "total_steps": 1395, "loss": 0.1001, "lr": 7.82407135479164e-06, "epoch": 1.9795479009687837, "percentage": 65.95, "elapsed_time": "0:19:39", "remaining_time": "0:10:09", "throughput": 2855.37, "total_tokens": 3368032}
{"current_steps": 930, "total_steps": 1395, "loss": 0.0647, "lr": 7.529273822496379e-06, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:19:51", "remaining_time": "0:09:55", "throughput": 2855.74, "total_tokens": 3403584}
{"current_steps": 940, "total_steps": 1395, "loss": 0.0881, "lr": 7.238265038843143e-06, "epoch": 2.0215285252960173, "percentage": 67.38, "elapsed_time": "0:20:04", "remaining_time": "0:09:43", "throughput": 2855.93, "total_tokens": 3440000}
{"current_steps": 950, "total_steps": 1395, "loss": 0.0573, "lr": 6.951192587762555e-06, "epoch": 2.0430570505920342, "percentage": 68.1, "elapsed_time": "0:20:17", "remaining_time": "0:09:30", "throughput": 2856.07, "total_tokens": 3476640}
{"current_steps": 960, "total_steps": 1395, "loss": 0.0646, "lr": 6.668202056890052e-06, "epoch": 2.0645855758880516, "percentage": 68.82, "elapsed_time": "0:20:29", "remaining_time": "0:09:17", "throughput": 2856.18, "total_tokens": 3512992}
{"current_steps": 970, "total_steps": 1395, "loss": 0.0682, "lr": 6.389436963731707e-06, "epoch": 2.086114101184069, "percentage": 69.53, "elapsed_time": "0:20:42", "remaining_time": "0:09:04", "throughput": 2856.35, "total_tokens": 3549504}
{"current_steps": 980, "total_steps": 1395, "loss": 0.0931, "lr": 6.115038682879904e-06, "epoch": 2.1076426264800863, "percentage": 70.25, "elapsed_time": "0:20:55", "remaining_time": "0:08:51", "throughput": 2856.6, "total_tokens": 3586592}
{"current_steps": 990, "total_steps": 1395, "loss": 0.0686, "lr": 5.845146374315781e-06, "epoch": 2.129171151776103, "percentage": 70.97, "elapsed_time": "0:21:08", "remaining_time": "0:08:38", "throughput": 2856.7, "total_tokens": 3622944}
{"current_steps": 1000, "total_steps": 1395, "loss": 0.0917, "lr": 5.579896912834807e-06, "epoch": 2.1506996770721205, "percentage": 71.68, "elapsed_time": "0:21:21", "remaining_time": "0:08:26", "throughput": 2856.96, "total_tokens": 3660096}
{"current_steps": 1010, "total_steps": 1395, "loss": 0.0931, "lr": 5.319424818631293e-06, "epoch": 2.172228202368138, "percentage": 72.4, "elapsed_time": "0:21:34", "remaining_time": "0:08:13", "throughput": 2854.9, "total_tokens": 3697024}
{"current_steps": 1020, "total_steps": 1395, "loss": 0.0855, "lr": 5.063862189076988e-06, "epoch": 2.193756727664155, "percentage": 73.12, "elapsed_time": "0:21:47", "remaining_time": "0:08:00", "throughput": 2855.44, "total_tokens": 3734240}
{"current_steps": 1030, "total_steps": 1395, "loss": 0.0909, "lr": 4.813338631728471e-06, "epoch": 2.215285252960172, "percentage": 73.84, "elapsed_time": "0:22:00", "remaining_time": "0:07:47", "throughput": 2855.84, "total_tokens": 3771232}
{"current_steps": 1040, "total_steps": 1395, "loss": 0.0817, "lr": 4.567981198597168e-06, "epoch": 2.2368137782561894, "percentage": 74.55, "elapsed_time": "0:22:13", "remaining_time": "0:07:35", "throughput": 2856.28, "total_tokens": 3808416}
{"current_steps": 1050, "total_steps": 1395, "loss": 0.0599, "lr": 4.327914321715459e-06, "epoch": 2.2583423035522068, "percentage": 75.27, "elapsed_time": "0:22:25", "remaining_time": "0:07:22", "throughput": 2856.56, "total_tokens": 3844768}
{"current_steps": 1060, "total_steps": 1395, "loss": 0.0711, "lr": 4.09325975003146e-06, "epoch": 2.2798708288482237, "percentage": 75.99, "elapsed_time": "0:22:38", "remaining_time": "0:07:09", "throughput": 2856.69, "total_tokens": 3881088}
{"current_steps": 1070, "total_steps": 1395, "loss": 0.0723, "lr": 3.864136487664538e-06, "epoch": 2.301399354144241, "percentage": 76.7, "elapsed_time": "0:22:51", "remaining_time": "0:06:56", "throughput": 2856.61, "total_tokens": 3916928}
{"current_steps": 1080, "total_steps": 1395, "loss": 0.0524, "lr": 3.640660733552863e-06, "epoch": 2.3229278794402584, "percentage": 77.42, "elapsed_time": "0:23:03", "remaining_time": "0:06:43", "throughput": 2856.61, "total_tokens": 3952928}
{"current_steps": 1090, "total_steps": 1395, "loss": 0.0513, "lr": 3.4229458225235696e-06, "epoch": 2.3444564047362757, "percentage": 78.14, "elapsed_time": "0:23:16", "remaining_time": "0:06:30", "throughput": 2856.7, "total_tokens": 3990720}
{"current_steps": 1100, "total_steps": 1395, "loss": 0.0773, "lr": 3.211102167815469e-06, "epoch": 2.3659849300322926, "percentage": 78.85, "elapsed_time": "0:23:29", "remaining_time": "0:06:18", "throughput": 2856.67, "total_tokens": 4026656}
{"current_steps": 1110, "total_steps": 1395, "loss": 0.0896, "lr": 3.0052372050834025e-06, "epoch": 2.38751345532831, "percentage": 79.57, "elapsed_time": "0:23:42", "remaining_time": "0:06:05", "throughput": 2857.07, "total_tokens": 4064192}
{"current_steps": 1120, "total_steps": 1395, "loss": 0.0719, "lr": 2.805455337912722e-06, "epoch": 2.4090419806243273, "percentage": 80.29, "elapsed_time": "0:23:55", "remaining_time": "0:05:52", "throughput": 2857.52, "total_tokens": 4101632}
{"current_steps": 1130, "total_steps": 1395, "loss": 0.0801, "lr": 2.611857884871379e-06, "epoch": 2.4305705059203446, "percentage": 81.0, "elapsed_time": "0:24:08", "remaining_time": "0:05:39", "throughput": 2857.87, "total_tokens": 4138208}
{"current_steps": 1140, "total_steps": 1395, "loss": 0.0821, "lr": 2.424543028126661e-06, "epoch": 2.4520990312163615, "percentage": 81.72, "elapsed_time": "0:24:20", "remaining_time": "0:05:26", "throughput": 2857.95, "total_tokens": 4174592}
{"current_steps": 1150, "total_steps": 1395, "loss": 0.0654, "lr": 2.243605763652489e-06, "epoch": 2.473627556512379, "percentage": 82.44, "elapsed_time": "0:24:33", "remaining_time": "0:05:13", "throughput": 2858.19, "total_tokens": 4211616}
{"current_steps": 1160, "total_steps": 1395, "loss": 0.0767, "lr": 2.0691378530525933e-06, "epoch": 2.495156081808396, "percentage": 83.15, "elapsed_time": "0:24:46", "remaining_time": "0:05:01", "throughput": 2858.28, "total_tokens": 4247744}
{"current_steps": 1170, "total_steps": 1395, "loss": 0.0655, "lr": 1.9012277770239829e-06, "epoch": 2.5166846071044136, "percentage": 83.87, "elapsed_time": "0:24:58", "remaining_time": "0:04:48", "throughput": 2858.65, "total_tokens": 4284224}
{"current_steps": 1180, "total_steps": 1395, "loss": 0.0584, "lr": 1.7399606904843224e-06, "epoch": 2.5382131324004304, "percentage": 84.59, "elapsed_time": "0:25:11", "remaining_time": "0:04:35", "throughput": 2858.64, "total_tokens": 4320672}
{"current_steps": 1190, "total_steps": 1395, "loss": 0.0601, "lr": 1.5854183793859577e-06, "epoch": 2.559741657696448, "percentage": 85.3, "elapsed_time": "0:25:24", "remaining_time": "0:04:22", "throughput": 2858.97, "total_tokens": 4358080}
{"current_steps": 1200, "total_steps": 1395, "loss": 0.0665, "lr": 1.437679219238498e-06, "epoch": 2.581270182992465, "percentage": 86.02, "elapsed_time": "0:25:37", "remaining_time": "0:04:09", "throughput": 2859.24, "total_tokens": 4394752}
{"current_steps": 1210, "total_steps": 1395, "loss": 0.0658, "lr": 1.2968181353609854e-06, "epoch": 2.602798708288482, "percentage": 86.74, "elapsed_time": "0:25:49", "remaining_time": "0:03:56", "throughput": 2859.51, "total_tokens": 4431808}
{"current_steps": 1220, "total_steps": 1395, "loss": 0.0898, "lr": 1.1629065648838495e-06, "epoch": 2.6243272335844994, "percentage": 87.46, "elapsed_time": "0:26:02", "remaining_time": "0:03:44", "throughput": 2859.39, "total_tokens": 4467328}
{"current_steps": 1230, "total_steps": 1395, "loss": 0.0619, "lr": 1.0360124205198102e-06, "epoch": 2.6458557588805167, "percentage": 88.17, "elapsed_time": "0:26:15", "remaining_time": "0:03:31", "throughput": 2859.53, "total_tokens": 4503808}
{"current_steps": 1240, "total_steps": 1395, "loss": 0.0763, "lr": 9.162000561222672e-07, "epoch": 2.667384284176534, "percentage": 88.89, "elapsed_time": "0:26:28", "remaining_time": "0:03:18", "throughput": 2859.88, "total_tokens": 4541536}
{"current_steps": 1250, "total_steps": 1395, "loss": 0.0909, "lr": 8.035302340484646e-07, "epoch": 2.6889128094725514, "percentage": 89.61, "elapsed_time": "0:26:40", "remaining_time": "0:03:05", "throughput": 2859.98, "total_tokens": 4577664}
{"current_steps": 1260, "total_steps": 1395, "loss": 0.0683, "lr": 6.980600943441273e-07, "epoch": 2.7104413347685683, "percentage": 90.32, "elapsed_time": "0:26:53", "remaining_time": "0:02:52", "throughput": 2860.22, "total_tokens": 4614176}
{"current_steps": 1270, "total_steps": 1395, "loss": 0.0867, "lr": 5.998431257651132e-07, "epoch": 2.7319698600645856, "percentage": 91.04, "elapsed_time": "0:27:05", "remaining_time": "0:02:40", "throughput": 2860.43, "total_tokens": 4650848}
{"current_steps": 1280, "total_steps": 1395, "loss": 0.0648, "lr": 5.089291386508055e-07, "epoch": 2.7534983853606025, "percentage": 91.76, "elapsed_time": "0:27:18", "remaining_time": "0:02:27", "throughput": 2860.58, "total_tokens": 4687456}
{"current_steps": 1290, "total_steps": 1395, "loss": 0.0672, "lr": 4.2536423966299256e-07, "epoch": 2.77502691065662, "percentage": 92.47, "elapsed_time": "0:27:31", "remaining_time": "0:02:14", "throughput": 2860.66, "total_tokens": 4723840}
{"current_steps": 1300, "total_steps": 1395, "loss": 0.089, "lr": 3.491908084030776e-07, "epoch": 2.7965554359526372, "percentage": 93.19, "elapsed_time": "0:27:44", "remaining_time": "0:02:01", "throughput": 2861.01, "total_tokens": 4761056}
{"current_steps": 1310, "total_steps": 1395, "loss": 0.07, "lr": 2.804474759194176e-07, "epoch": 2.8180839612486546, "percentage": 93.91, "elapsed_time": "0:27:56", "remaining_time": "0:01:48", "throughput": 2861.4, "total_tokens": 4798240}
{"current_steps": 1320, "total_steps": 1395, "loss": 0.0807, "lr": 2.1916910511575627e-07, "epoch": 2.839612486544672, "percentage": 94.62, "elapsed_time": "0:28:09", "remaining_time": "0:01:35", "throughput": 2861.57, "total_tokens": 4834560}
{"current_steps": 1330, "total_steps": 1395, "loss": 0.0669, "lr": 1.6538677307062933e-07, "epoch": 2.861141011840689, "percentage": 95.34, "elapsed_time": "0:28:22", "remaining_time": "0:01:23", "throughput": 2861.67, "total_tokens": 4871328}
{"current_steps": 1340, "total_steps": 1395, "loss": 0.0552, "lr": 1.1912775527676278e-07, "epoch": 2.882669537136706, "percentage": 96.06, "elapsed_time": "0:28:34", "remaining_time": "0:01:10", "throughput": 2861.89, "total_tokens": 4907744}
{"current_steps": 1350, "total_steps": 1395, "loss": 0.0917, "lr": 8.04155118084099e-08, "epoch": 2.9041980624327235, "percentage": 96.77, "elapsed_time": "0:28:47", "remaining_time": "0:00:57", "throughput": 2861.9, "total_tokens": 4943712}
{"current_steps": 1360, "total_steps": 1395, "loss": 0.074, "lr": 4.9269675423671556e-08, "epoch": 2.9257265877287404, "percentage": 97.49, "elapsed_time": "0:29:00", "remaining_time": "0:00:44", "throughput": 2861.97, "total_tokens": 4980480}
{"current_steps": 1370, "total_steps": 1395, "loss": 0.0599, "lr": 2.570604160781753e-08, "epoch": 2.9472551130247577, "percentage": 98.21, "elapsed_time": "0:29:12", "remaining_time": "0:00:31", "throughput": 2862.0, "total_tokens": 5016800}
{"current_steps": 1380, "total_steps": 1395, "loss": 0.0593, "lr": 9.736560562675912e-09, "epoch": 2.968783638320775, "percentage": 98.92, "elapsed_time": "0:29:25", "remaining_time": "0:00:19", "throughput": 2862.02, "total_tokens": 5053440}
{"current_steps": 1390, "total_steps": 1395, "loss": 0.0763, "lr": 1.3693311461326908e-09, "epoch": 2.9903121636167924, "percentage": 99.64, "elapsed_time": "0:29:38", "remaining_time": "0:00:06", "throughput": 2862.27, "total_tokens": 5090592}
{"current_steps": 1395, "total_steps": 1395, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:29:45", "remaining_time": "0:00:00", "throughput": 2860.57, "total_tokens": 5107072}