Training in progress, step 19080
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +190 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 58745928
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6b648d5b0889f1a56d081695ded6da2225e1ad398557e28778803760c33558a
|
| 3 |
size 58745928
|
trainer_log.jsonl
CHANGED
|
@@ -3645,3 +3645,193 @@
|
|
| 3645 |
{"current_steps": 18130, "total_steps": 19080, "loss": 0.2177, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "1:24:01", "remaining_time": "0:04:24", "throughput": 2350.03, "total_tokens": 11847912}
|
| 3646 |
{"current_steps": 18135, "total_steps": 19080, "loss": 0.1504, "lr": 3.7347880953662597e-07, "epoch": 9.504716981132075, "percentage": 95.05, "elapsed_time": "1:24:03", "remaining_time": "0:04:22", "throughput": 2350.18, "total_tokens": 11853192}
|
| 3647 |
{"current_steps": 18140, "total_steps": 19080, "loss": 0.2218, "lr": 3.6955099370666045e-07, "epoch": 9.50733752620545, "percentage": 95.07, "elapsed_time": "1:24:04", "remaining_time": "0:04:21", "throughput": 2350.25, "total_tokens": 11856808}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3645 |
{"current_steps": 18130, "total_steps": 19080, "loss": 0.2177, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "1:24:01", "remaining_time": "0:04:24", "throughput": 2350.03, "total_tokens": 11847912}
|
| 3646 |
{"current_steps": 18135, "total_steps": 19080, "loss": 0.1504, "lr": 3.7347880953662597e-07, "epoch": 9.504716981132075, "percentage": 95.05, "elapsed_time": "1:24:03", "remaining_time": "0:04:22", "throughput": 2350.18, "total_tokens": 11853192}
|
| 3647 |
{"current_steps": 18140, "total_steps": 19080, "loss": 0.2218, "lr": 3.6955099370666045e-07, "epoch": 9.50733752620545, "percentage": 95.07, "elapsed_time": "1:24:04", "remaining_time": "0:04:21", "throughput": 2350.25, "total_tokens": 11856808}
|
| 3648 |
+
{"current_steps": 18145, "total_steps": 19080, "loss": 0.395, "lr": 3.656437875113522e-07, "epoch": 9.509958071278826, "percentage": 95.1, "elapsed_time": "1:24:06", "remaining_time": "0:04:20", "throughput": 2350.32, "total_tokens": 11860360}
|
| 3649 |
+
{"current_steps": 18150, "total_steps": 19080, "loss": 0.1542, "lr": 3.617571942200693e-07, "epoch": 9.5125786163522, "percentage": 95.13, "elapsed_time": "1:24:07", "remaining_time": "0:04:18", "throughput": 2350.32, "total_tokens": 11863048}
|
| 3650 |
+
{"current_steps": 18155, "total_steps": 19080, "loss": 0.2159, "lr": 3.5789121708493523e-07, "epoch": 9.515199161425576, "percentage": 95.15, "elapsed_time": "1:24:08", "remaining_time": "0:04:17", "throughput": 2350.41, "total_tokens": 11867016}
|
| 3651 |
+
{"current_steps": 18160, "total_steps": 19080, "loss": 0.1665, "lr": 3.5404585934082635e-07, "epoch": 9.517819706498951, "percentage": 95.18, "elapsed_time": "1:24:10", "remaining_time": "0:04:15", "throughput": 2350.5, "total_tokens": 11870728}
|
| 3652 |
+
{"current_steps": 18165, "total_steps": 19080, "loss": 0.1925, "lr": 3.502211242053577e-07, "epoch": 9.520440251572326, "percentage": 95.2, "elapsed_time": "1:24:11", "remaining_time": "0:04:14", "throughput": 2350.54, "total_tokens": 11873928}
|
| 3653 |
+
{"current_steps": 18170, "total_steps": 19080, "loss": 0.1691, "lr": 3.4641701487889697e-07, "epoch": 9.523060796645701, "percentage": 95.23, "elapsed_time": "1:24:12", "remaining_time": "0:04:13", "throughput": 2350.57, "total_tokens": 11876968}
|
| 3654 |
+
{"current_steps": 18175, "total_steps": 19080, "loss": 0.2048, "lr": 3.4263353454454806e-07, "epoch": 9.525681341719078, "percentage": 95.26, "elapsed_time": "1:24:14", "remaining_time": "0:04:11", "throughput": 2350.65, "total_tokens": 11880648}
|
| 3655 |
+
{"current_steps": 18180, "total_steps": 19080, "loss": 0.2346, "lr": 3.3887068636815346e-07, "epoch": 9.528301886792454, "percentage": 95.28, "elapsed_time": "1:24:15", "remaining_time": "0:04:10", "throughput": 2350.68, "total_tokens": 11883560}
|
| 3656 |
+
{"current_steps": 18185, "total_steps": 19080, "loss": 0.2058, "lr": 3.351284734982918e-07, "epoch": 9.530922431865829, "percentage": 95.31, "elapsed_time": "1:24:17", "remaining_time": "0:04:08", "throughput": 2350.82, "total_tokens": 11888296}
|
| 3657 |
+
{"current_steps": 18190, "total_steps": 19080, "loss": 0.211, "lr": 3.3140689906628054e-07, "epoch": 9.533542976939204, "percentage": 95.34, "elapsed_time": "1:24:18", "remaining_time": "0:04:07", "throughput": 2350.93, "total_tokens": 11892872}
|
| 3658 |
+
{"current_steps": 18195, "total_steps": 19080, "loss": 0.1548, "lr": 3.2770596618615645e-07, "epoch": 9.536163522012579, "percentage": 95.36, "elapsed_time": "1:24:20", "remaining_time": "0:04:06", "throughput": 2350.95, "total_tokens": 11895944}
|
| 3659 |
+
{"current_steps": 18200, "total_steps": 19080, "loss": 0.1328, "lr": 3.240256779546952e-07, "epoch": 9.538784067085954, "percentage": 95.39, "elapsed_time": "1:24:21", "remaining_time": "0:04:04", "throughput": 2350.97, "total_tokens": 11898696}
|
| 3660 |
+
{"current_steps": 18205, "total_steps": 19080, "loss": 0.1451, "lr": 3.2036603745139447e-07, "epoch": 9.54140461215933, "percentage": 95.41, "elapsed_time": "1:24:23", "remaining_time": "0:04:03", "throughput": 2351.17, "total_tokens": 11904744}
|
| 3661 |
+
{"current_steps": 18210, "total_steps": 19080, "loss": 0.1608, "lr": 3.167270477384743e-07, "epoch": 9.544025157232705, "percentage": 95.44, "elapsed_time": "1:24:24", "remaining_time": "0:04:01", "throughput": 2351.22, "total_tokens": 11907656}
|
| 3662 |
+
{"current_steps": 18215, "total_steps": 19080, "loss": 0.2666, "lr": 3.1310871186086834e-07, "epoch": 9.54664570230608, "percentage": 95.47, "elapsed_time": "1:24:25", "remaining_time": "0:04:00", "throughput": 2351.23, "total_tokens": 11910504}
|
| 3663 |
+
{"current_steps": 18220, "total_steps": 19080, "loss": 0.1847, "lr": 3.095110328462464e-07, "epoch": 9.549266247379455, "percentage": 95.49, "elapsed_time": "1:24:26", "remaining_time": "0:03:59", "throughput": 2351.21, "total_tokens": 11913192}
|
| 3664 |
+
{"current_steps": 18225, "total_steps": 19080, "loss": 0.3092, "lr": 3.0593401370497264e-07, "epoch": 9.55188679245283, "percentage": 95.52, "elapsed_time": "1:24:28", "remaining_time": "0:03:57", "throughput": 2351.27, "total_tokens": 11916680}
|
| 3665 |
+
{"current_steps": 18230, "total_steps": 19080, "loss": 0.1675, "lr": 3.0237765743013626e-07, "epoch": 9.554507337526205, "percentage": 95.55, "elapsed_time": "1:24:29", "remaining_time": "0:03:56", "throughput": 2351.29, "total_tokens": 11919656}
|
| 3666 |
+
{"current_steps": 18235, "total_steps": 19080, "loss": 0.2035, "lr": 2.9884196699753453e-07, "epoch": 9.55712788259958, "percentage": 95.57, "elapsed_time": "1:24:30", "remaining_time": "0:03:54", "throughput": 2351.31, "total_tokens": 11922632}
|
| 3667 |
+
{"current_steps": 18240, "total_steps": 19080, "loss": 0.194, "lr": 2.953269453656704e-07, "epoch": 9.559748427672956, "percentage": 95.6, "elapsed_time": "1:24:32", "remaining_time": "0:03:53", "throughput": 2351.37, "total_tokens": 11926280}
|
| 3668 |
+
{"current_steps": 18245, "total_steps": 19080, "loss": 0.1185, "lr": 2.9183259547575504e-07, "epoch": 9.56236897274633, "percentage": 95.62, "elapsed_time": "1:24:33", "remaining_time": "0:03:52", "throughput": 2351.42, "total_tokens": 11929768}
|
| 3669 |
+
{"current_steps": 18250, "total_steps": 19080, "loss": 0.2141, "lr": 2.883589202517023e-07, "epoch": 9.564989517819706, "percentage": 95.65, "elapsed_time": "1:24:34", "remaining_time": "0:03:50", "throughput": 2351.49, "total_tokens": 11933480}
|
| 3670 |
+
{"current_steps": 18255, "total_steps": 19080, "loss": 0.1422, "lr": 2.849059226001177e-07, "epoch": 9.567610062893081, "percentage": 95.68, "elapsed_time": "1:24:35", "remaining_time": "0:03:49", "throughput": 2351.51, "total_tokens": 11936200}
|
| 3671 |
+
{"current_steps": 18260, "total_steps": 19080, "loss": 0.2477, "lr": 2.8147360541032065e-07, "epoch": 9.570230607966456, "percentage": 95.7, "elapsed_time": "1:24:36", "remaining_time": "0:03:47", "throughput": 2351.49, "total_tokens": 11938472}
|
| 3672 |
+
{"current_steps": 18265, "total_steps": 19080, "loss": 0.2637, "lr": 2.780619715543109e-07, "epoch": 9.572851153039831, "percentage": 95.73, "elapsed_time": "1:24:38", "remaining_time": "0:03:46", "throughput": 2351.57, "total_tokens": 11942280}
|
| 3673 |
+
{"current_steps": 18270, "total_steps": 19080, "loss": 0.166, "lr": 2.746710238867911e-07, "epoch": 9.575471698113208, "percentage": 95.75, "elapsed_time": "1:24:39", "remaining_time": "0:03:45", "throughput": 2351.65, "total_tokens": 11945800}
|
| 3674 |
+
{"current_steps": 18275, "total_steps": 19080, "loss": 0.1826, "lr": 2.713007652451499e-07, "epoch": 9.578092243186584, "percentage": 95.78, "elapsed_time": "1:24:40", "remaining_time": "0:03:43", "throughput": 2351.64, "total_tokens": 11948200}
|
| 3675 |
+
{"current_steps": 18280, "total_steps": 19080, "loss": 0.2232, "lr": 2.6795119844946757e-07, "epoch": 9.580712788259959, "percentage": 95.81, "elapsed_time": "1:24:42", "remaining_time": "0:03:42", "throughput": 2351.69, "total_tokens": 11951656}
|
| 3676 |
+
{"current_steps": 18285, "total_steps": 19080, "loss": 0.1707, "lr": 2.646223263025077e-07, "epoch": 9.583333333333334, "percentage": 95.83, "elapsed_time": "1:24:43", "remaining_time": "0:03:41", "throughput": 2351.76, "total_tokens": 11955208}
|
| 3677 |
+
{"current_steps": 18290, "total_steps": 19080, "loss": 0.1431, "lr": 2.6131415158971993e-07, "epoch": 9.585953878406709, "percentage": 95.86, "elapsed_time": "1:24:44", "remaining_time": "0:03:39", "throughput": 2351.75, "total_tokens": 11957768}
|
| 3678 |
+
{"current_steps": 18295, "total_steps": 19080, "loss": 0.204, "lr": 2.5802667707922887e-07, "epoch": 9.588574423480084, "percentage": 95.89, "elapsed_time": "1:24:45", "remaining_time": "0:03:38", "throughput": 2351.78, "total_tokens": 11960552}
|
| 3679 |
+
{"current_steps": 18300, "total_steps": 19080, "loss": 0.2429, "lr": 2.54759905521848e-07, "epoch": 9.59119496855346, "percentage": 95.91, "elapsed_time": "1:24:47", "remaining_time": "0:03:36", "throughput": 2351.88, "total_tokens": 11964552}
|
| 3680 |
+
{"current_steps": 18305, "total_steps": 19080, "loss": 0.1847, "lr": 2.51513839651063e-07, "epoch": 9.593815513626835, "percentage": 95.94, "elapsed_time": "1:24:48", "remaining_time": "0:03:35", "throughput": 2351.84, "total_tokens": 11966824}
|
| 3681 |
+
{"current_steps": 18310, "total_steps": 19080, "loss": 0.1888, "lr": 2.4828848218302615e-07, "epoch": 9.59643605870021, "percentage": 95.96, "elapsed_time": "1:24:49", "remaining_time": "0:03:34", "throughput": 2351.87, "total_tokens": 11969832}
|
| 3682 |
+
{"current_steps": 18315, "total_steps": 19080, "loss": 0.149, "lr": 2.450838358165786e-07, "epoch": 9.599056603773585, "percentage": 95.99, "elapsed_time": "1:24:50", "remaining_time": "0:03:32", "throughput": 2351.87, "total_tokens": 11972424}
|
| 3683 |
+
{"current_steps": 18320, "total_steps": 19080, "loss": 0.2274, "lr": 2.41899903233217e-07, "epoch": 9.60167714884696, "percentage": 96.02, "elapsed_time": "1:24:52", "remaining_time": "0:03:31", "throughput": 2351.98, "total_tokens": 11976552}
|
| 3684 |
+
{"current_steps": 18325, "total_steps": 19080, "loss": 0.2065, "lr": 2.387366870971103e-07, "epoch": 9.604297693920335, "percentage": 96.04, "elapsed_time": "1:24:53", "remaining_time": "0:03:29", "throughput": 2352.03, "total_tokens": 11979720}
|
| 3685 |
+
{"current_steps": 18330, "total_steps": 19080, "loss": 0.2473, "lr": 2.3559419005509675e-07, "epoch": 9.60691823899371, "percentage": 96.07, "elapsed_time": "1:24:54", "remaining_time": "0:03:28", "throughput": 2352.06, "total_tokens": 11982536}
|
| 3686 |
+
{"current_steps": 18335, "total_steps": 19080, "loss": 0.136, "lr": 2.3247241473667026e-07, "epoch": 9.609538784067086, "percentage": 96.1, "elapsed_time": "1:24:55", "remaining_time": "0:03:27", "throughput": 2352.07, "total_tokens": 11985384}
|
| 3687 |
+
{"current_steps": 18340, "total_steps": 19080, "loss": 0.1659, "lr": 2.2937136375399126e-07, "epoch": 9.61215932914046, "percentage": 96.12, "elapsed_time": "1:24:57", "remaining_time": "0:03:25", "throughput": 2352.1, "total_tokens": 11988712}
|
| 3688 |
+
{"current_steps": 18345, "total_steps": 19080, "loss": 0.1984, "lr": 2.2629103970188137e-07, "epoch": 9.614779874213836, "percentage": 96.15, "elapsed_time": "1:24:58", "remaining_time": "0:03:24", "throughput": 2352.23, "total_tokens": 11993352}
|
| 3689 |
+
{"current_steps": 18350, "total_steps": 19080, "loss": 0.2087, "lr": 2.2323144515780935e-07, "epoch": 9.617400419287211, "percentage": 96.17, "elapsed_time": "1:24:59", "remaining_time": "0:03:22", "throughput": 2352.21, "total_tokens": 11995848}
|
| 3690 |
+
{"current_steps": 18355, "total_steps": 19080, "loss": 0.2814, "lr": 2.201925826819079e-07, "epoch": 9.620020964360586, "percentage": 96.2, "elapsed_time": "1:25:01", "remaining_time": "0:03:21", "throughput": 2352.27, "total_tokens": 11999336}
|
| 3691 |
+
{"current_steps": 18360, "total_steps": 19080, "loss": 0.2234, "lr": 2.1717445481695408e-07, "epoch": 9.622641509433961, "percentage": 96.23, "elapsed_time": "1:25:02", "remaining_time": "0:03:20", "throughput": 2352.35, "total_tokens": 12003080}
|
| 3692 |
+
{"current_steps": 18365, "total_steps": 19080, "loss": 0.1069, "lr": 2.1417706408838333e-07, "epoch": 9.625262054507338, "percentage": 96.25, "elapsed_time": "1:25:04", "remaining_time": "0:03:18", "throughput": 2352.46, "total_tokens": 12007240}
|
| 3693 |
+
{"current_steps": 18370, "total_steps": 19080, "loss": 0.2357, "lr": 2.112004130042755e-07, "epoch": 9.627882599580714, "percentage": 96.28, "elapsed_time": "1:25:05", "remaining_time": "0:03:17", "throughput": 2352.46, "total_tokens": 12009928}
|
| 3694 |
+
{"current_steps": 18375, "total_steps": 19080, "loss": 0.1974, "lr": 2.082445040553549e-07, "epoch": 9.630503144654089, "percentage": 96.31, "elapsed_time": "1:25:06", "remaining_time": "0:03:15", "throughput": 2352.52, "total_tokens": 12013384}
|
| 3695 |
+
{"current_steps": 18380, "total_steps": 19080, "loss": 0.1731, "lr": 2.053093397149902e-07, "epoch": 9.633123689727464, "percentage": 96.33, "elapsed_time": "1:25:07", "remaining_time": "0:03:14", "throughput": 2352.55, "total_tokens": 12016520}
|
| 3696 |
+
{"current_steps": 18385, "total_steps": 19080, "loss": 0.1849, "lr": 2.0239492243919467e-07, "epoch": 9.635744234800839, "percentage": 96.36, "elapsed_time": "1:25:09", "remaining_time": "0:03:13", "throughput": 2352.6, "total_tokens": 12020040}
|
| 3697 |
+
{"current_steps": 18390, "total_steps": 19080, "loss": 0.2053, "lr": 1.9950125466662028e-07, "epoch": 9.638364779874214, "percentage": 96.38, "elapsed_time": "1:25:10", "remaining_time": "0:03:11", "throughput": 2352.66, "total_tokens": 12023464}
|
| 3698 |
+
{"current_steps": 18395, "total_steps": 19080, "loss": 0.1342, "lr": 1.9662833881855248e-07, "epoch": 9.64098532494759, "percentage": 96.41, "elapsed_time": "1:25:11", "remaining_time": "0:03:10", "throughput": 2352.71, "total_tokens": 12026664}
|
| 3699 |
+
{"current_steps": 18400, "total_steps": 19080, "loss": 0.1883, "lr": 1.9377617729891828e-07, "epoch": 9.643605870020965, "percentage": 96.44, "elapsed_time": "1:25:13", "remaining_time": "0:03:08", "throughput": 2352.8, "total_tokens": 12030440}
|
| 3700 |
+
{"current_steps": 18405, "total_steps": 19080, "loss": 0.2082, "lr": 1.9094477249427534e-07, "epoch": 9.64622641509434, "percentage": 96.46, "elapsed_time": "1:25:14", "remaining_time": "0:03:07", "throughput": 2352.79, "total_tokens": 12032872}
|
| 3701 |
+
{"current_steps": 18410, "total_steps": 19080, "loss": 0.17, "lr": 1.8813412677381737e-07, "epoch": 9.648846960167715, "percentage": 96.49, "elapsed_time": "1:25:16", "remaining_time": "0:03:06", "throughput": 2352.94, "total_tokens": 12037864}
|
| 3702 |
+
{"current_steps": 18415, "total_steps": 19080, "loss": 0.1484, "lr": 1.8534424248935756e-07, "epoch": 9.65146750524109, "percentage": 96.51, "elapsed_time": "1:25:17", "remaining_time": "0:03:04", "throughput": 2352.97, "total_tokens": 12040904}
|
| 3703 |
+
{"current_steps": 18420, "total_steps": 19080, "loss": 0.1732, "lr": 1.8257512197535076e-07, "epoch": 9.654088050314465, "percentage": 96.54, "elapsed_time": "1:25:18", "remaining_time": "0:03:03", "throughput": 2353.02, "total_tokens": 12044296}
|
| 3704 |
+
{"current_steps": 18425, "total_steps": 19080, "loss": 0.1678, "lr": 1.7982676754886574e-07, "epoch": 9.65670859538784, "percentage": 96.57, "elapsed_time": "1:25:19", "remaining_time": "0:03:02", "throughput": 2353.0, "total_tokens": 12047208}
|
| 3705 |
+
{"current_steps": 18430, "total_steps": 19080, "loss": 0.1896, "lr": 1.7709918150959904e-07, "epoch": 9.659329140461216, "percentage": 96.59, "elapsed_time": "1:25:21", "remaining_time": "0:03:00", "throughput": 2353.07, "total_tokens": 12050696}
|
| 3706 |
+
{"current_steps": 18435, "total_steps": 19080, "loss": 0.2246, "lr": 1.7439236613987775e-07, "epoch": 9.66194968553459, "percentage": 96.62, "elapsed_time": "1:25:22", "remaining_time": "0:02:59", "throughput": 2353.14, "total_tokens": 12054536}
|
| 3707 |
+
{"current_steps": 18440, "total_steps": 19080, "loss": 0.1987, "lr": 1.717063237046318e-07, "epoch": 9.664570230607966, "percentage": 96.65, "elapsed_time": "1:25:23", "remaining_time": "0:02:57", "throughput": 2353.11, "total_tokens": 12056776}
|
| 3708 |
+
{"current_steps": 18445, "total_steps": 19080, "loss": 0.2037, "lr": 1.6904105645142444e-07, "epoch": 9.667190775681341, "percentage": 96.67, "elapsed_time": "1:25:24", "remaining_time": "0:02:56", "throughput": 2353.15, "total_tokens": 12059720}
|
| 3709 |
+
{"current_steps": 18450, "total_steps": 19080, "loss": 0.2111, "lr": 1.6639656661043e-07, "epoch": 9.669811320754716, "percentage": 96.7, "elapsed_time": "1:25:26", "remaining_time": "0:02:55", "throughput": 2353.2, "total_tokens": 12063304}
|
| 3710 |
+
{"current_steps": 18455, "total_steps": 19080, "loss": 0.2242, "lr": 1.6377285639443407e-07, "epoch": 9.672431865828091, "percentage": 96.72, "elapsed_time": "1:25:27", "remaining_time": "0:02:53", "throughput": 2353.3, "total_tokens": 12067592}
|
| 3711 |
+
{"current_steps": 18460, "total_steps": 19080, "loss": 0.2144, "lr": 1.61169927998836e-07, "epoch": 9.675052410901468, "percentage": 96.75, "elapsed_time": "1:25:29", "remaining_time": "0:02:52", "throughput": 2353.34, "total_tokens": 12070856}
|
| 3712 |
+
{"current_steps": 18465, "total_steps": 19080, "loss": 0.2126, "lr": 1.5858778360165195e-07, "epoch": 9.677672955974844, "percentage": 96.78, "elapsed_time": "1:25:30", "remaining_time": "0:02:50", "throughput": 2353.39, "total_tokens": 12074280}
|
| 3713 |
+
{"current_steps": 18470, "total_steps": 19080, "loss": 0.1615, "lr": 1.5602642536350075e-07, "epoch": 9.680293501048219, "percentage": 96.8, "elapsed_time": "1:25:31", "remaining_time": "0:02:49", "throughput": 2353.42, "total_tokens": 12077288}
|
| 3714 |
+
{"current_steps": 18475, "total_steps": 19080, "loss": 0.1603, "lr": 1.5348585542760974e-07, "epoch": 9.682914046121594, "percentage": 96.83, "elapsed_time": "1:25:33", "remaining_time": "0:02:48", "throughput": 2353.54, "total_tokens": 12082056}
|
| 3715 |
+
{"current_steps": 18480, "total_steps": 19080, "loss": 0.2342, "lr": 1.5096607591980894e-07, "epoch": 9.685534591194969, "percentage": 96.86, "elapsed_time": "1:25:34", "remaining_time": "0:02:46", "throughput": 2353.56, "total_tokens": 12085128}
|
| 3716 |
+
{"current_steps": 18485, "total_steps": 19080, "loss": 0.2181, "lr": 1.4846708894853955e-07, "epoch": 9.688155136268344, "percentage": 96.88, "elapsed_time": "1:25:36", "remaining_time": "0:02:45", "throughput": 2353.64, "total_tokens": 12089032}
|
| 3717 |
+
{"current_steps": 18490, "total_steps": 19080, "loss": 0.1645, "lr": 1.459888966048373e-07, "epoch": 9.69077568134172, "percentage": 96.91, "elapsed_time": "1:25:37", "remaining_time": "0:02:43", "throughput": 2353.62, "total_tokens": 12091496}
|
| 3718 |
+
{"current_steps": 18495, "total_steps": 19080, "loss": 0.2559, "lr": 1.4353150096234058e-07, "epoch": 9.693396226415095, "percentage": 96.93, "elapsed_time": "1:25:38", "remaining_time": "0:02:42", "throughput": 2353.7, "total_tokens": 12095208}
|
| 3719 |
+
{"current_steps": 18500, "total_steps": 19080, "loss": 0.2692, "lr": 1.410949040772852e-07, "epoch": 9.69601677148847, "percentage": 96.96, "elapsed_time": "1:25:40", "remaining_time": "0:02:41", "throughput": 2353.79, "total_tokens": 12099176}
|
| 3720 |
+
{"current_steps": 18505, "total_steps": 19080, "loss": 0.2361, "lr": 1.3867910798850692e-07, "epoch": 9.698637316561845, "percentage": 96.99, "elapsed_time": "1:25:41", "remaining_time": "0:02:39", "throughput": 2353.84, "total_tokens": 12102408}
|
| 3721 |
+
{"current_steps": 18510, "total_steps": 19080, "loss": 0.2525, "lr": 1.3628411471742764e-07, "epoch": 9.70125786163522, "percentage": 97.01, "elapsed_time": "1:25:42", "remaining_time": "0:02:38", "throughput": 2353.89, "total_tokens": 12105704}
|
| 3722 |
+
{"current_steps": 18515, "total_steps": 19080, "loss": 0.2855, "lr": 1.3390992626807485e-07, "epoch": 9.703878406708595, "percentage": 97.04, "elapsed_time": "1:25:44", "remaining_time": "0:02:36", "throughput": 2353.89, "total_tokens": 12108520}
|
| 3723 |
+
{"current_steps": 18520, "total_steps": 19080, "loss": 0.1961, "lr": 1.315565446270567e-07, "epoch": 9.70649895178197, "percentage": 97.06, "elapsed_time": "1:25:45", "remaining_time": "0:02:35", "throughput": 2353.95, "total_tokens": 12111912}
|
| 3724 |
+
{"current_steps": 18525, "total_steps": 19080, "loss": 0.1868, "lr": 1.292239717635785e-07, "epoch": 9.709119496855346, "percentage": 97.09, "elapsed_time": "1:25:46", "remaining_time": "0:02:34", "throughput": 2354.02, "total_tokens": 12116040}
|
| 3725 |
+
{"current_steps": 18530, "total_steps": 19080, "loss": 0.1718, "lr": 1.269122096294262e-07, "epoch": 9.71174004192872, "percentage": 97.12, "elapsed_time": "1:25:48", "remaining_time": "0:02:32", "throughput": 2354.02, "total_tokens": 12118632}
|
| 3726 |
+
{"current_steps": 18535, "total_steps": 19080, "loss": 0.1279, "lr": 1.24621260158983e-07, "epoch": 9.714360587002096, "percentage": 97.14, "elapsed_time": "1:25:49", "remaining_time": "0:02:31", "throughput": 2354.03, "total_tokens": 12121192}
|
| 3727 |
+
{"current_steps": 18540, "total_steps": 19080, "loss": 0.1686, "lr": 1.2235112526920723e-07, "epoch": 9.716981132075471, "percentage": 97.17, "elapsed_time": "1:25:50", "remaining_time": "0:02:30", "throughput": 2354.06, "total_tokens": 12123976}
|
| 3728 |
+
{"current_steps": 18545, "total_steps": 19080, "loss": 0.2811, "lr": 1.2010180685964324e-07, "epoch": 9.719601677148846, "percentage": 97.2, "elapsed_time": "1:25:51", "remaining_time": "0:02:28", "throughput": 2354.13, "total_tokens": 12127816}
|
| 3729 |
+
{"current_steps": 18550, "total_steps": 19080, "loss": 0.1819, "lr": 1.1787330681241881e-07, "epoch": 9.722222222222221, "percentage": 97.22, "elapsed_time": "1:25:53", "remaining_time": "0:02:27", "throughput": 2354.21, "total_tokens": 12131848}
|
| 3730 |
+
{"current_steps": 18555, "total_steps": 19080, "loss": 0.207, "lr": 1.156656269922396e-07, "epoch": 9.724842767295598, "percentage": 97.25, "elapsed_time": "1:25:54", "remaining_time": "0:02:25", "throughput": 2354.27, "total_tokens": 12135432}
|
| 3731 |
+
{"current_steps": 18560, "total_steps": 19080, "loss": 0.2295, "lr": 1.1347876924639455e-07, "epoch": 9.727463312368974, "percentage": 97.27, "elapsed_time": "1:25:55", "remaining_time": "0:02:24", "throughput": 2354.31, "total_tokens": 12138376}
|
| 3732 |
+
{"current_steps": 18565, "total_steps": 19080, "loss": 0.2558, "lr": 1.1131273540474496e-07, "epoch": 9.730083857442349, "percentage": 97.3, "elapsed_time": "1:25:57", "remaining_time": "0:02:23", "throughput": 2354.36, "total_tokens": 12141480}
|
| 3733 |
+
{"current_steps": 18570, "total_steps": 19080, "loss": 0.1591, "lr": 1.091675272797299e-07, "epoch": 9.732704402515724, "percentage": 97.33, "elapsed_time": "1:25:58", "remaining_time": "0:02:21", "throughput": 2354.36, "total_tokens": 12144168}
|
| 3734 |
+
{"current_steps": 18575, "total_steps": 19080, "loss": 0.2554, "lr": 1.0704314666635795e-07, "epoch": 9.735324947589099, "percentage": 97.35, "elapsed_time": "1:25:59", "remaining_time": "0:02:20", "throughput": 2354.45, "total_tokens": 12148168}
|
| 3735 |
+
{"current_steps": 18580, "total_steps": 19080, "loss": 0.144, "lr": 1.0493959534221832e-07, "epoch": 9.737945492662474, "percentage": 97.38, "elapsed_time": "1:26:00", "remaining_time": "0:02:18", "throughput": 2354.45, "total_tokens": 12150696}
|
| 3736 |
+
{"current_steps": 18585, "total_steps": 19080, "loss": 0.1574, "lr": 1.0285687506746133e-07, "epoch": 9.74056603773585, "percentage": 97.41, "elapsed_time": "1:26:02", "remaining_time": "0:02:17", "throughput": 2354.48, "total_tokens": 12153928}
|
| 3737 |
+
{"current_steps": 18590, "total_steps": 19080, "loss": 0.1859, "lr": 1.0079498758481798e-07, "epoch": 9.743186582809225, "percentage": 97.43, "elapsed_time": "1:26:03", "remaining_time": "0:02:16", "throughput": 2354.53, "total_tokens": 12157384}
|
| 3738 |
+
{"current_steps": 18595, "total_steps": 19080, "loss": 0.1942, "lr": 9.87539346195776e-08, "epoch": 9.7458071278826, "percentage": 97.46, "elapsed_time": "1:26:04", "remaining_time": "0:02:14", "throughput": 2354.53, "total_tokens": 12160200}
|
| 3739 |
+
{"current_steps": 18600, "total_steps": 19080, "loss": 0.256, "lr": 9.673371787960183e-08, "epoch": 9.748427672955975, "percentage": 97.48, "elapsed_time": "1:26:05", "remaining_time": "0:02:13", "throughput": 2354.5, "total_tokens": 12162504}
|
| 3740 |
+
{"current_steps": 18605, "total_steps": 19080, "loss": 0.2052, "lr": 9.473433905531626e-08, "epoch": 9.75104821802935, "percentage": 97.51, "elapsed_time": "1:26:06", "remaining_time": "0:02:11", "throughput": 2354.52, "total_tokens": 12165288}
|
| 3741 |
+
{"current_steps": 18610, "total_steps": 19080, "loss": 0.1821, "lr": 9.275579981970483e-08, "epoch": 9.753668763102725, "percentage": 97.54, "elapsed_time": "1:26:07", "remaining_time": "0:02:10", "throughput": 2354.53, "total_tokens": 12167912}
|
| 3742 |
+
{"current_steps": 18615, "total_steps": 19080, "loss": 0.2621, "lr": 9.07981018283266e-08, "epoch": 9.7562893081761, "percentage": 97.56, "elapsed_time": "1:26:09", "remaining_time": "0:02:09", "throughput": 2354.61, "total_tokens": 12171624}
|
| 3743 |
+
{"current_steps": 18620, "total_steps": 19080, "loss": 0.1644, "lr": 8.886124671928786e-08, "epoch": 9.758909853249476, "percentage": 97.59, "elapsed_time": "1:26:10", "remaining_time": "0:02:07", "throughput": 2354.65, "total_tokens": 12174632}
|
| 3744 |
+
{"current_steps": 18625, "total_steps": 19080, "loss": 0.3071, "lr": 8.694523611326444e-08, "epoch": 9.76153039832285, "percentage": 97.62, "elapsed_time": "1:26:11", "remaining_time": "0:02:06", "throughput": 2354.69, "total_tokens": 12177896}
|
| 3745 |
+
{"current_steps": 18630, "total_steps": 19080, "loss": 0.1994, "lr": 8.505007161348222e-08, "epoch": 9.764150943396226, "percentage": 97.64, "elapsed_time": "1:26:13", "remaining_time": "0:02:04", "throughput": 2354.75, "total_tokens": 12181256}
|
| 3746 |
+
{"current_steps": 18635, "total_steps": 19080, "loss": 0.2394, "lr": 8.31757548057338e-08, "epoch": 9.766771488469601, "percentage": 97.67, "elapsed_time": "1:26:14", "remaining_time": "0:02:03", "throughput": 2354.76, "total_tokens": 12183848}
|
| 3747 |
+
{"current_steps": 18640, "total_steps": 19080, "loss": 0.2171, "lr": 8.132228725835634e-08, "epoch": 9.769392033542976, "percentage": 97.69, "elapsed_time": "1:26:15", "remaining_time": "0:02:02", "throughput": 2354.81, "total_tokens": 12187208}
|
| 3748 |
+
{"current_steps": 18645, "total_steps": 19080, "loss": 0.2163, "lr": 7.948967052225087e-08, "epoch": 9.772012578616351, "percentage": 97.72, "elapsed_time": "1:26:16", "remaining_time": "0:02:00", "throughput": 2354.86, "total_tokens": 12190568}
|
| 3749 |
+
{"current_steps": 18650, "total_steps": 19080, "loss": 0.1506, "lr": 7.767790613086301e-08, "epoch": 9.774633123689728, "percentage": 97.75, "elapsed_time": "1:26:17", "remaining_time": "0:01:59", "throughput": 2354.87, "total_tokens": 12193224}
|
| 3750 |
+
{"current_steps": 18655, "total_steps": 19080, "loss": 0.1625, "lr": 7.588699560019952e-08, "epoch": 9.777253668763104, "percentage": 97.77, "elapsed_time": "1:26:19", "remaining_time": "0:01:57", "throughput": 2354.84, "total_tokens": 12195816}
|
| 3751 |
+
{"current_steps": 18660, "total_steps": 19080, "loss": 0.219, "lr": 7.411694042881168e-08, "epoch": 9.779874213836479, "percentage": 97.8, "elapsed_time": "1:26:20", "remaining_time": "0:01:56", "throughput": 2354.87, "total_tokens": 12198856}
|
| 3752 |
+
{"current_steps": 18665, "total_steps": 19080, "loss": 0.2299, "lr": 7.23677420977953e-08, "epoch": 9.782494758909854, "percentage": 97.82, "elapsed_time": "1:26:21", "remaining_time": "0:01:55", "throughput": 2354.91, "total_tokens": 12201992}
|
| 3753 |
+
{"current_steps": 18670, "total_steps": 19080, "loss": 0.2013, "lr": 7.063940207080733e-08, "epoch": 9.785115303983229, "percentage": 97.85, "elapsed_time": "1:26:22", "remaining_time": "0:01:53", "throughput": 2355.0, "total_tokens": 12205608}
|
| 3754 |
+
{"current_steps": 18675, "total_steps": 19080, "loss": 0.2355, "lr": 6.893192179403817e-08, "epoch": 9.787735849056604, "percentage": 97.88, "elapsed_time": "1:26:24", "remaining_time": "0:01:52", "throughput": 2355.07, "total_tokens": 12209352}
|
| 3755 |
+
{"current_steps": 18680, "total_steps": 19080, "loss": 0.1831, "lr": 6.724530269623108e-08, "epoch": 9.79035639412998, "percentage": 97.9, "elapsed_time": "1:26:25", "remaining_time": "0:01:51", "throughput": 2355.18, "total_tokens": 12213768}
|
| 3756 |
+
{"current_steps": 18685, "total_steps": 19080, "loss": 0.1776, "lr": 6.557954618867102e-08, "epoch": 9.792976939203355, "percentage": 97.93, "elapsed_time": "1:26:27", "remaining_time": "0:01:49", "throughput": 2355.21, "total_tokens": 12216776}
|
| 3757 |
+
{"current_steps": 18690, "total_steps": 19080, "loss": 0.176, "lr": 6.393465366519024e-08, "epoch": 9.79559748427673, "percentage": 97.96, "elapsed_time": "1:26:28", "remaining_time": "0:01:48", "throughput": 2355.3, "total_tokens": 12221000}
|
| 3758 |
+
{"current_steps": 18695, "total_steps": 19080, "loss": 0.222, "lr": 6.231062650215724e-08, "epoch": 9.798218029350105, "percentage": 97.98, "elapsed_time": "1:26:30", "remaining_time": "0:01:46", "throughput": 2355.33, "total_tokens": 12224200}
|
| 3759 |
+
{"current_steps": 18700, "total_steps": 19080, "loss": 0.1646, "lr": 6.070746605848221e-08, "epoch": 9.80083857442348, "percentage": 98.01, "elapsed_time": "1:26:31", "remaining_time": "0:01:45", "throughput": 2355.35, "total_tokens": 12226920}
|
| 3760 |
+
{"current_steps": 18705, "total_steps": 19080, "loss": 0.2519, "lr": 5.912517367561987e-08, "epoch": 9.803459119496855, "percentage": 98.03, "elapsed_time": "1:26:32", "remaining_time": "0:01:44", "throughput": 2355.37, "total_tokens": 12229960}
|
| 3761 |
+
{"current_steps": 18710, "total_steps": 19080, "loss": 0.1821, "lr": 5.756375067755837e-08, "epoch": 9.80607966457023, "percentage": 98.06, "elapsed_time": "1:26:33", "remaining_time": "0:01:42", "throughput": 2355.31, "total_tokens": 12232040}
|
| 3762 |
+
{"current_steps": 18715, "total_steps": 19080, "loss": 0.1456, "lr": 5.602319837082481e-08, "epoch": 9.808700209643606, "percentage": 98.09, "elapsed_time": "1:26:34", "remaining_time": "0:01:41", "throughput": 2355.3, "total_tokens": 12234472}
|
| 3763 |
+
{"current_steps": 18720, "total_steps": 19080, "loss": 0.1499, "lr": 5.450351804448528e-08, "epoch": 9.81132075471698, "percentage": 98.11, "elapsed_time": "1:26:35", "remaining_time": "0:01:39", "throughput": 2355.31, "total_tokens": 12237448}
|
| 3764 |
+
{"current_steps": 18725, "total_steps": 19080, "loss": 0.2284, "lr": 5.3004710970133705e-08, "epoch": 9.813941299790356, "percentage": 98.14, "elapsed_time": "1:26:36", "remaining_time": "0:01:38", "throughput": 2355.33, "total_tokens": 12240264}
|
| 3765 |
+
{"current_steps": 18730, "total_steps": 19080, "loss": 0.1809, "lr": 5.1526778401911334e-08, "epoch": 9.816561844863731, "percentage": 98.17, "elapsed_time": "1:26:38", "remaining_time": "0:01:37", "throughput": 2355.35, "total_tokens": 12243176}
|
| 3766 |
+
{"current_steps": 18735, "total_steps": 19080, "loss": 0.1922, "lr": 5.0069721576476156e-08, "epoch": 9.819182389937106, "percentage": 98.19, "elapsed_time": "1:26:39", "remaining_time": "0:01:35", "throughput": 2355.35, "total_tokens": 12245864}
|
| 3767 |
+
{"current_steps": 18740, "total_steps": 19080, "loss": 0.2115, "lr": 4.863354171303347e-08, "epoch": 9.821802935010481, "percentage": 98.22, "elapsed_time": "1:26:40", "remaining_time": "0:01:34", "throughput": 2355.38, "total_tokens": 12248712}
|
| 3768 |
+
{"current_steps": 18745, "total_steps": 19080, "loss": 0.1998, "lr": 4.72182400133081e-08, "epoch": 9.824423480083858, "percentage": 98.24, "elapsed_time": "1:26:41", "remaining_time": "0:01:32", "throughput": 2355.37, "total_tokens": 12251144}
|
| 3769 |
+
{"current_steps": 18750, "total_steps": 19080, "loss": 0.2153, "lr": 4.582381766156385e-08, "epoch": 9.827044025157234, "percentage": 98.27, "elapsed_time": "1:26:42", "remaining_time": "0:01:31", "throughput": 2355.45, "total_tokens": 12255336}
|
| 3770 |
+
{"current_steps": 18755, "total_steps": 19080, "loss": 0.1861, "lr": 4.445027582458683e-08, "epoch": 9.829664570230609, "percentage": 98.3, "elapsed_time": "1:26:44", "remaining_time": "0:01:30", "throughput": 2355.43, "total_tokens": 12257672}
|
| 3771 |
+
{"current_steps": 18760, "total_steps": 19080, "loss": 0.1773, "lr": 4.309761565169379e-08, "epoch": 9.832285115303984, "percentage": 98.32, "elapsed_time": "1:26:45", "remaining_time": "0:01:28", "throughput": 2355.49, "total_tokens": 12261032}
|
| 3772 |
+
{"current_steps": 18765, "total_steps": 19080, "loss": 0.1952, "lr": 4.1765838274732125e-08, "epoch": 9.834905660377359, "percentage": 98.35, "elapsed_time": "1:26:46", "remaining_time": "0:01:27", "throughput": 2355.54, "total_tokens": 12264488}
|
| 3773 |
+
{"current_steps": 18770, "total_steps": 19080, "loss": 0.1964, "lr": 4.045494480807155e-08, "epoch": 9.837526205450734, "percentage": 98.38, "elapsed_time": "1:26:47", "remaining_time": "0:01:26", "throughput": 2355.59, "total_tokens": 12267432}
|
| 3774 |
+
{"current_steps": 18775, "total_steps": 19080, "loss": 0.1542, "lr": 3.916493634860407e-08, "epoch": 9.84014675052411, "percentage": 98.4, "elapsed_time": "1:26:49", "remaining_time": "0:01:24", "throughput": 2355.65, "total_tokens": 12270888}
|
| 3775 |
+
{"current_steps": 18780, "total_steps": 19080, "loss": 0.1844, "lr": 3.789581397575515e-08, "epoch": 9.842767295597485, "percentage": 98.43, "elapsed_time": "1:26:50", "remaining_time": "0:01:23", "throughput": 2355.55, "total_tokens": 12273896}
|
| 3776 |
+
{"current_steps": 18785, "total_steps": 19080, "loss": 0.2003, "lr": 3.664757875146418e-08, "epoch": 9.84538784067086, "percentage": 98.45, "elapsed_time": "1:26:51", "remaining_time": "0:01:21", "throughput": 2355.54, "total_tokens": 12276328}
|
| 3777 |
+
{"current_steps": 18790, "total_steps": 19080, "loss": 0.1712, "lr": 3.5420231720198485e-08, "epoch": 9.848008385744235, "percentage": 98.48, "elapsed_time": "1:26:52", "remaining_time": "0:01:20", "throughput": 2355.56, "total_tokens": 12279016}
|
| 3778 |
+
{"current_steps": 18795, "total_steps": 19080, "loss": 0.1335, "lr": 3.421377390894764e-08, "epoch": 9.85062893081761, "percentage": 98.51, "elapsed_time": "1:26:53", "remaining_time": "0:01:19", "throughput": 2355.54, "total_tokens": 12281512}
|
| 3779 |
+
{"current_steps": 18800, "total_steps": 19080, "loss": 0.2554, "lr": 3.3028206327218035e-08, "epoch": 9.853249475890985, "percentage": 98.53, "elapsed_time": "1:26:55", "remaining_time": "0:01:17", "throughput": 2355.6, "total_tokens": 12285160}
|
| 3780 |
+
{"current_steps": 18805, "total_steps": 19080, "loss": 0.2177, "lr": 3.1863529967041117e-08, "epoch": 9.85587002096436, "percentage": 98.56, "elapsed_time": "1:26:56", "remaining_time": "0:01:16", "throughput": 2355.67, "total_tokens": 12288616}
|
| 3781 |
+
{"current_steps": 18810, "total_steps": 19080, "loss": 0.3209, "lr": 3.071974580296233e-08, "epoch": 9.858490566037736, "percentage": 98.58, "elapsed_time": "1:26:58", "remaining_time": "0:01:14", "throughput": 2355.74, "total_tokens": 12292680}
|
| 3782 |
+
{"current_steps": 18815, "total_steps": 19080, "loss": 0.1889, "lr": 2.9596854792052207e-08, "epoch": 9.86111111111111, "percentage": 98.61, "elapsed_time": "1:26:59", "remaining_time": "0:01:13", "throughput": 2355.87, "total_tokens": 12297160}
|
| 3783 |
+
{"current_steps": 18820, "total_steps": 19080, "loss": 0.2506, "lr": 2.8494857873889724e-08, "epoch": 9.863731656184486, "percentage": 98.64, "elapsed_time": "1:27:01", "remaining_time": "0:01:12", "throughput": 2355.91, "total_tokens": 12300520}
|
| 3784 |
+
{"current_steps": 18825, "total_steps": 19080, "loss": 0.2419, "lr": 2.741375597057616e-08, "epoch": 9.866352201257861, "percentage": 98.66, "elapsed_time": "1:27:02", "remaining_time": "0:01:10", "throughput": 2355.94, "total_tokens": 12303496}
|
| 3785 |
+
{"current_steps": 18830, "total_steps": 19080, "loss": 0.2213, "lr": 2.6353549986729566e-08, "epoch": 9.868972746331236, "percentage": 98.69, "elapsed_time": "1:27:03", "remaining_time": "0:01:09", "throughput": 2355.96, "total_tokens": 12306568}
|
| 3786 |
+
{"current_steps": 18835, "total_steps": 19080, "loss": 0.2368, "lr": 2.531424080948197e-08, "epoch": 9.871593291404611, "percentage": 98.72, "elapsed_time": "1:27:04", "remaining_time": "0:01:07", "throughput": 2355.97, "total_tokens": 12309160}
|
| 3787 |
+
{"current_steps": 18840, "total_steps": 19080, "loss": 0.2018, "lr": 2.4295829308482176e-08, "epoch": 9.874213836477988, "percentage": 98.74, "elapsed_time": "1:27:06", "remaining_time": "0:01:06", "throughput": 2356.04, "total_tokens": 12312776}
|
| 3788 |
+
{"current_steps": 18845, "total_steps": 19080, "loss": 0.153, "lr": 2.329831633588464e-08, "epoch": 9.876834381551364, "percentage": 98.77, "elapsed_time": "1:27:07", "remaining_time": "0:01:05", "throughput": 2356.09, "total_tokens": 12316104}
|
| 3789 |
+
{"current_steps": 18850, "total_steps": 19080, "loss": 0.2396, "lr": 2.232170272636891e-08, "epoch": 9.879454926624739, "percentage": 98.79, "elapsed_time": "1:27:08", "remaining_time": "0:01:03", "throughput": 2356.09, "total_tokens": 12318568}
|
| 3790 |
+
{"current_steps": 18855, "total_steps": 19080, "loss": 0.2184, "lr": 2.136598929711464e-08, "epoch": 9.882075471698114, "percentage": 98.82, "elapsed_time": "1:27:09", "remaining_time": "0:01:02", "throughput": 2356.14, "total_tokens": 12321736}
|
| 3791 |
+
{"current_steps": 18860, "total_steps": 19080, "loss": 0.2428, "lr": 2.0431176847823807e-08, "epoch": 9.884696016771489, "percentage": 98.85, "elapsed_time": "1:27:10", "remaining_time": "0:01:01", "throughput": 2356.19, "total_tokens": 12325032}
|
| 3792 |
+
{"current_steps": 18865, "total_steps": 19080, "loss": 0.2541, "lr": 1.9517266160704038e-08, "epoch": 9.887316561844864, "percentage": 98.87, "elapsed_time": "1:27:12", "remaining_time": "0:00:59", "throughput": 2356.28, "total_tokens": 12329160}
|
| 3793 |
+
{"current_steps": 18870, "total_steps": 19080, "loss": 0.3971, "lr": 1.8624258000471405e-08, "epoch": 9.88993710691824, "percentage": 98.9, "elapsed_time": "1:27:13", "remaining_time": "0:00:58", "throughput": 2356.33, "total_tokens": 12332392}
|
| 3794 |
+
{"current_steps": 18875, "total_steps": 19080, "loss": 0.2291, "lr": 1.7752153114358737e-08, "epoch": 9.892557651991615, "percentage": 98.93, "elapsed_time": "1:27:15", "remaining_time": "0:00:56", "throughput": 2356.41, "total_tokens": 12336456}
|
| 3795 |
+
{"current_steps": 18880, "total_steps": 19080, "loss": 0.1979, "lr": 1.6900952232098977e-08, "epoch": 9.89517819706499, "percentage": 98.95, "elapsed_time": "1:27:16", "remaining_time": "0:00:55", "throughput": 2356.4, "total_tokens": 12339080}
|
| 3796 |
+
{"current_steps": 18885, "total_steps": 19080, "loss": 0.1701, "lr": 1.6070656065939048e-08, "epoch": 9.897798742138365, "percentage": 98.98, "elapsed_time": "1:27:17", "remaining_time": "0:00:54", "throughput": 2356.39, "total_tokens": 12341768}
|
| 3797 |
+
{"current_steps": 18890, "total_steps": 19080, "loss": 0.1987, "lr": 1.526126531063432e-08, "epoch": 9.90041928721174, "percentage": 99.0, "elapsed_time": "1:27:18", "remaining_time": "0:00:52", "throughput": 2356.43, "total_tokens": 12344936}
|
| 3798 |
+
{"current_steps": 18895, "total_steps": 19080, "loss": 0.1666, "lr": 1.4472780643445817e-08, "epoch": 9.903039832285115, "percentage": 99.03, "elapsed_time": "1:27:20", "remaining_time": "0:00:51", "throughput": 2356.56, "total_tokens": 12349416}
|
| 3799 |
+
{"current_steps": 18900, "total_steps": 19080, "loss": 0.2051, "lr": 1.3705202724142996e-08, "epoch": 9.90566037735849, "percentage": 99.06, "elapsed_time": "1:27:21", "remaining_time": "0:00:49", "throughput": 2356.57, "total_tokens": 12352360}
|
| 3800 |
+
{"current_steps": 18905, "total_steps": 19080, "loss": 0.2463, "lr": 1.2958532194995432e-08, "epoch": 9.908280922431866, "percentage": 99.08, "elapsed_time": "1:27:22", "remaining_time": "0:00:48", "throughput": 2356.62, "total_tokens": 12355688}
|
| 3801 |
+
{"current_steps": 18910, "total_steps": 19080, "loss": 0.2172, "lr": 1.2232769680789457e-08, "epoch": 9.91090146750524, "percentage": 99.11, "elapsed_time": "1:27:24", "remaining_time": "0:00:47", "throughput": 2356.7, "total_tokens": 12359560}
|
| 3802 |
+
{"current_steps": 18915, "total_steps": 19080, "loss": 0.2712, "lr": 1.152791578880319e-08, "epoch": 9.913522012578616, "percentage": 99.14, "elapsed_time": "1:27:25", "remaining_time": "0:00:45", "throughput": 2356.8, "total_tokens": 12363656}
|
| 3803 |
+
{"current_steps": 18920, "total_steps": 19080, "loss": 0.173, "lr": 1.0843971108828732e-08, "epoch": 9.916142557651991, "percentage": 99.16, "elapsed_time": "1:27:27", "remaining_time": "0:00:44", "throughput": 2356.87, "total_tokens": 12367688}
|
| 3804 |
+
{"current_steps": 18925, "total_steps": 19080, "loss": 0.207, "lr": 1.018093621316385e-08, "epoch": 9.918763102725366, "percentage": 99.19, "elapsed_time": "1:27:28", "remaining_time": "0:00:42", "throughput": 2356.95, "total_tokens": 12371400}
|
| 3805 |
+
{"current_steps": 18930, "total_steps": 19080, "loss": 0.1976, "lr": 9.53881165659809e-09, "epoch": 9.921383647798741, "percentage": 99.21, "elapsed_time": "1:27:30", "remaining_time": "0:00:41", "throughput": 2356.97, "total_tokens": 12374152}
|
| 3806 |
+
{"current_steps": 18935, "total_steps": 19080, "loss": 0.1312, "lr": 8.91759797644054e-09, "epoch": 9.924004192872118, "percentage": 99.24, "elapsed_time": "1:27:31", "remaining_time": "0:00:40", "throughput": 2357.08, "total_tokens": 12378472}
|
| 3807 |
+
{"current_steps": 18940, "total_steps": 19080, "loss": 0.2356, "lr": 8.317295692486516e-09, "epoch": 9.926624737945493, "percentage": 99.27, "elapsed_time": "1:27:32", "remaining_time": "0:00:38", "throughput": 2357.1, "total_tokens": 12381480}
|
| 3808 |
+
{"current_steps": 18945, "total_steps": 19080, "loss": 0.2243, "lr": 7.737905307045323e-09, "epoch": 9.929245283018869, "percentage": 99.29, "elapsed_time": "1:27:34", "remaining_time": "0:00:37", "throughput": 2357.14, "total_tokens": 12384648}
|
| 3809 |
+
{"current_steps": 18950, "total_steps": 19080, "loss": 0.167, "lr": 7.179427304926378e-09, "epoch": 9.931865828092244, "percentage": 99.32, "elapsed_time": "1:27:35", "remaining_time": "0:00:36", "throughput": 2357.17, "total_tokens": 12387432}
|
| 3810 |
+
{"current_steps": 18955, "total_steps": 19080, "loss": 0.2268, "lr": 6.641862153433653e-09, "epoch": 9.934486373165619, "percentage": 99.34, "elapsed_time": "1:27:36", "remaining_time": "0:00:34", "throughput": 2357.21, "total_tokens": 12390984}
|
| 3811 |
+
{"current_steps": 18960, "total_steps": 19080, "loss": 0.216, "lr": 6.125210302382333e-09, "epoch": 9.937106918238994, "percentage": 99.37, "elapsed_time": "1:27:38", "remaining_time": "0:00:33", "throughput": 2357.28, "total_tokens": 12394760}
|
| 3812 |
+
{"current_steps": 18965, "total_steps": 19080, "loss": 0.2906, "lr": 5.629472184079387e-09, "epoch": 9.93972746331237, "percentage": 99.4, "elapsed_time": "1:27:39", "remaining_time": "0:00:31", "throughput": 2357.32, "total_tokens": 12397768}
|
| 3813 |
+
{"current_steps": 18970, "total_steps": 19080, "loss": 0.1896, "lr": 5.154648213334668e-09, "epoch": 9.942348008385745, "percentage": 99.42, "elapsed_time": "1:27:40", "remaining_time": "0:00:30", "throughput": 2357.36, "total_tokens": 12400968}
|
| 3814 |
+
{"current_steps": 18975, "total_steps": 19080, "loss": 0.2504, "lr": 4.700738787466463e-09, "epoch": 9.94496855345912, "percentage": 99.45, "elapsed_time": "1:27:42", "remaining_time": "0:00:29", "throughput": 2357.45, "total_tokens": 12406664}
|
| 3815 |
+
{"current_steps": 18980, "total_steps": 19080, "loss": 0.166, "lr": 4.26774428627652e-09, "epoch": 9.947589098532495, "percentage": 99.48, "elapsed_time": "1:27:43", "remaining_time": "0:00:27", "throughput": 2357.47, "total_tokens": 12409448}
|
| 3816 |
+
{"current_steps": 18985, "total_steps": 19080, "loss": 0.1027, "lr": 3.855665072080572e-09, "epoch": 9.95020964360587, "percentage": 99.5, "elapsed_time": "1:27:45", "remaining_time": "0:00:26", "throughput": 2357.52, "total_tokens": 12412744}
|
| 3817 |
+
{"current_steps": 18990, "total_steps": 19080, "loss": 0.3511, "lr": 3.464501489683358e-09, "epoch": 9.952830188679245, "percentage": 99.53, "elapsed_time": "1:27:46", "remaining_time": "0:00:24", "throughput": 2357.54, "total_tokens": 12415656}
|
| 3818 |
+
{"current_steps": 18995, "total_steps": 19080, "loss": 0.2944, "lr": 3.094253866398056e-09, "epoch": 9.95545073375262, "percentage": 99.55, "elapsed_time": "1:27:47", "remaining_time": "0:00:23", "throughput": 2357.55, "total_tokens": 12418472}
|
| 3819 |
+
{"current_steps": 19000, "total_steps": 19080, "loss": 0.1688, "lr": 2.7449225120268484e-09, "epoch": 9.958071278825996, "percentage": 99.58, "elapsed_time": "1:27:48", "remaining_time": "0:00:22", "throughput": 2357.6, "total_tokens": 12421768}
|
| 3820 |
+
{"current_steps": 19005, "total_steps": 19080, "loss": 0.2327, "lr": 2.416507718877581e-09, "epoch": 9.96069182389937, "percentage": 99.61, "elapsed_time": "1:27:50", "remaining_time": "0:00:20", "throughput": 2357.69, "total_tokens": 12425800}
|
| 3821 |
+
{"current_steps": 19010, "total_steps": 19080, "loss": 0.2447, "lr": 2.109009761747105e-09, "epoch": 9.963312368972746, "percentage": 99.63, "elapsed_time": "1:27:51", "remaining_time": "0:00:19", "throughput": 2357.75, "total_tokens": 12429288}
|
| 3822 |
+
{"current_steps": 19015, "total_steps": 19080, "loss": 0.1465, "lr": 1.8224288979434844e-09, "epoch": 9.965932914046121, "percentage": 99.66, "elapsed_time": "1:27:53", "remaining_time": "0:00:18", "throughput": 2357.84, "total_tokens": 12433160}
|
| 3823 |
+
{"current_steps": 19020, "total_steps": 19080, "loss": 0.1619, "lr": 1.5567653672554638e-09, "epoch": 9.968553459119496, "percentage": 99.69, "elapsed_time": "1:27:54", "remaining_time": "0:00:16", "throughput": 2357.82, "total_tokens": 12435944}
|
| 3824 |
+
{"current_steps": 19025, "total_steps": 19080, "loss": 0.1746, "lr": 1.3120193919857748e-09, "epoch": 9.971174004192871, "percentage": 99.71, "elapsed_time": "1:27:55", "remaining_time": "0:00:15", "throughput": 2357.79, "total_tokens": 12438216}
|
| 3825 |
+
{"current_steps": 19030, "total_steps": 19080, "loss": 0.1743, "lr": 1.0881911769261565e-09, "epoch": 9.973794549266248, "percentage": 99.74, "elapsed_time": "1:27:56", "remaining_time": "0:00:13", "throughput": 2357.82, "total_tokens": 12440904}
|
| 3826 |
+
{"current_steps": 19035, "total_steps": 19080, "loss": 0.219, "lr": 8.852809093601311e-10, "epoch": 9.976415094339622, "percentage": 99.76, "elapsed_time": "1:27:57", "remaining_time": "0:00:12", "throughput": 2357.81, "total_tokens": 12443752}
|
| 3827 |
+
{"current_steps": 19040, "total_steps": 19080, "loss": 0.2648, "lr": 7.03288759076881e-10, "epoch": 9.979035639412999, "percentage": 99.79, "elapsed_time": "1:27:58", "remaining_time": "0:00:11", "throughput": 2357.79, "total_tokens": 12446152}
|
| 3828 |
+
{"current_steps": 19045, "total_steps": 19080, "loss": 0.1536, "lr": 5.422148783629233e-10, "epoch": 9.981656184486374, "percentage": 99.82, "elapsed_time": "1:27:59", "remaining_time": "0:00:09", "throughput": 2357.81, "total_tokens": 12449160}
|
| 3829 |
+
{"current_steps": 19050, "total_steps": 19080, "loss": 0.2432, "lr": 4.0205940199100623e-10, "epoch": 9.984276729559749, "percentage": 99.84, "elapsed_time": "1:28:01", "remaining_time": "0:00:08", "throughput": 2357.9, "total_tokens": 12453064}
|
| 3830 |
+
{"current_steps": 19055, "total_steps": 19080, "loss": 0.164, "lr": 2.828224472395391e-10, "epoch": 9.986897274633124, "percentage": 99.87, "elapsed_time": "1:28:02", "remaining_time": "0:00:06", "throughput": 2357.92, "total_tokens": 12455944}
|
| 3831 |
+
{"current_steps": 19060, "total_steps": 19080, "loss": 0.2456, "lr": 1.8450411388426515e-10, "epoch": 9.9895178197065, "percentage": 99.9, "elapsed_time": "1:28:03", "remaining_time": "0:00:05", "throughput": 2357.98, "total_tokens": 12459528}
|
| 3832 |
+
{"current_steps": 19065, "total_steps": 19080, "loss": 0.2544, "lr": 1.0710448418715935e-10, "epoch": 9.992138364779874, "percentage": 99.92, "elapsed_time": "1:28:05", "remaining_time": "0:00:04", "throughput": 2358.05, "total_tokens": 12463048}
|
| 3833 |
+
{"current_steps": 19070, "total_steps": 19080, "loss": 0.1928, "lr": 5.062362291585743e-11, "epoch": 9.99475890985325, "percentage": 99.95, "elapsed_time": "1:28:06", "remaining_time": "0:00:02", "throughput": 2358.15, "total_tokens": 12467240}
|
| 3834 |
+
{"current_steps": 19075, "total_steps": 19080, "loss": 0.3794, "lr": 1.5061577329777976e-11, "epoch": 9.997379454926625, "percentage": 99.97, "elapsed_time": "1:28:08", "remaining_time": "0:00:01", "throughput": 2358.19, "total_tokens": 12470216}
|
| 3835 |
+
{"current_steps": 19080, "total_steps": 19080, "loss": 0.277, "lr": 4.183771884491705e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:28:09", "remaining_time": "0:00:00", "throughput": 2358.13, "total_tokens": 12472912}
|
| 3836 |
+
{"current_steps": 19080, "total_steps": 19080, "eval_loss": 0.6786032915115356, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:28:25", "remaining_time": "0:00:00", "throughput": 2351.05, "total_tokens": 12472912}
|
| 3837 |
+
{"current_steps": 19080, "total_steps": 19080, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "1:28:26", "remaining_time": "0:00:00", "throughput": 2350.44, "total_tokens": 12472912}
|