Training in progress, step 25880
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +252 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24b99a9bafe855a8da49c0abd73817da2b559ff779b052493b944153fd1cb88b
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -4943,3 +4943,255 @@
|
|
| 4943 |
{"current_steps": 24620, "total_steps": 25880, "loss": 0.6505, "lr": 3.607274418129969e-07, "epoch": 19.026275115919628, "percentage": 95.13, "elapsed_time": "1:01:19", "remaining_time": "0:03:08", "throughput": 2253.11, "total_tokens": 8291376}
|
| 4944 |
{"current_steps": 24625, "total_steps": 25880, "loss": 0.4318, "lr": 3.5787929858073777e-07, "epoch": 19.03013910355487, "percentage": 95.15, "elapsed_time": "1:01:20", "remaining_time": "0:03:07", "throughput": 2253.15, "total_tokens": 8293168}
|
| 4945 |
{"current_steps": 24630, "total_steps": 25880, "loss": 0.4026, "lr": 3.5504236273254943e-07, "epoch": 19.034003091190108, "percentage": 95.17, "elapsed_time": "1:01:21", "remaining_time": "0:03:06", "throughput": 2253.04, "total_tokens": 8294832}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4943 |
{"current_steps": 24620, "total_steps": 25880, "loss": 0.6505, "lr": 3.607274418129969e-07, "epoch": 19.026275115919628, "percentage": 95.13, "elapsed_time": "1:01:19", "remaining_time": "0:03:08", "throughput": 2253.11, "total_tokens": 8291376}
|
| 4944 |
{"current_steps": 24625, "total_steps": 25880, "loss": 0.4318, "lr": 3.5787929858073777e-07, "epoch": 19.03013910355487, "percentage": 95.15, "elapsed_time": "1:01:20", "remaining_time": "0:03:07", "throughput": 2253.15, "total_tokens": 8293168}
|
| 4945 |
{"current_steps": 24630, "total_steps": 25880, "loss": 0.4026, "lr": 3.5504236273254943e-07, "epoch": 19.034003091190108, "percentage": 95.17, "elapsed_time": "1:01:21", "remaining_time": "0:03:06", "throughput": 2253.04, "total_tokens": 8294832}
|
| 4946 |
+
{"current_steps": 24635, "total_steps": 25880, "loss": 0.4684, "lr": 3.5221663555868587e-07, "epoch": 19.037867078825347, "percentage": 95.19, "elapsed_time": "1:01:22", "remaining_time": "0:03:06", "throughput": 2253.08, "total_tokens": 8296624}
|
| 4947 |
+
{"current_steps": 24640, "total_steps": 25880, "loss": 0.4608, "lr": 3.4940211834430804e-07, "epoch": 19.04173106646059, "percentage": 95.21, "elapsed_time": "1:01:23", "remaining_time": "0:03:05", "throughput": 2253.11, "total_tokens": 8298512}
|
| 4948 |
+
{"current_steps": 24645, "total_steps": 25880, "loss": 0.4861, "lr": 3.4659881236947246e-07, "epoch": 19.045595054095827, "percentage": 95.23, "elapsed_time": "1:01:23", "remaining_time": "0:03:04", "throughput": 2253.11, "total_tokens": 8300112}
|
| 4949 |
+
{"current_steps": 24650, "total_steps": 25880, "loss": 0.4152, "lr": 3.4380671890913985e-07, "epoch": 19.049459041731065, "percentage": 95.25, "elapsed_time": "1:01:24", "remaining_time": "0:03:03", "throughput": 2253.13, "total_tokens": 8301872}
|
| 4950 |
+
{"current_steps": 24655, "total_steps": 25880, "loss": 0.3847, "lr": 3.410258392331722e-07, "epoch": 19.053323029366307, "percentage": 95.27, "elapsed_time": "1:01:25", "remaining_time": "0:03:03", "throughput": 2253.17, "total_tokens": 8303632}
|
| 4951 |
+
{"current_steps": 24660, "total_steps": 25880, "loss": 0.4412, "lr": 3.3825617460633006e-07, "epoch": 19.057187017001546, "percentage": 95.29, "elapsed_time": "1:01:26", "remaining_time": "0:03:02", "throughput": 2253.21, "total_tokens": 8305488}
|
| 4952 |
+
{"current_steps": 24665, "total_steps": 25880, "loss": 0.4968, "lr": 3.3549772628827524e-07, "epoch": 19.061051004636784, "percentage": 95.31, "elapsed_time": "1:01:26", "remaining_time": "0:03:01", "throughput": 2253.23, "total_tokens": 8307184}
|
| 4953 |
+
{"current_steps": 24670, "total_steps": 25880, "loss": 0.5806, "lr": 3.327504955335625e-07, "epoch": 19.064914992272026, "percentage": 95.32, "elapsed_time": "1:01:27", "remaining_time": "0:03:00", "throughput": 2253.25, "total_tokens": 8308816}
|
| 4954 |
+
{"current_steps": 24675, "total_steps": 25880, "loss": 0.7481, "lr": 3.30014483591648e-07, "epoch": 19.068778979907265, "percentage": 95.34, "elapsed_time": "1:01:28", "remaining_time": "0:03:00", "throughput": 2253.28, "total_tokens": 8310640}
|
| 4955 |
+
{"current_steps": 24680, "total_steps": 25880, "loss": 0.4375, "lr": 3.2728969170689183e-07, "epoch": 19.072642967542503, "percentage": 95.36, "elapsed_time": "1:01:28", "remaining_time": "0:02:59", "throughput": 2253.29, "total_tokens": 8312208}
|
| 4956 |
+
{"current_steps": 24685, "total_steps": 25880, "loss": 0.3744, "lr": 3.2457612111854165e-07, "epoch": 19.076506955177745, "percentage": 95.38, "elapsed_time": "1:01:29", "remaining_time": "0:02:58", "throughput": 2253.26, "total_tokens": 8313744}
|
| 4957 |
+
{"current_steps": 24690, "total_steps": 25880, "loss": 0.3691, "lr": 3.218737730607491e-07, "epoch": 19.080370942812984, "percentage": 95.4, "elapsed_time": "1:01:30", "remaining_time": "0:02:57", "throughput": 2253.28, "total_tokens": 8315440}
|
| 4958 |
+
{"current_steps": 24695, "total_steps": 25880, "loss": 0.6245, "lr": 3.191826487625532e-07, "epoch": 19.084234930448222, "percentage": 95.42, "elapsed_time": "1:01:31", "remaining_time": "0:02:57", "throughput": 2253.3, "total_tokens": 8317200}
|
| 4959 |
+
{"current_steps": 24700, "total_steps": 25880, "loss": 0.3481, "lr": 3.1650274944790004e-07, "epoch": 19.08809891808346, "percentage": 95.44, "elapsed_time": "1:01:31", "remaining_time": "0:02:56", "throughput": 2253.26, "total_tokens": 8318704}
|
| 4960 |
+
{"current_steps": 24705, "total_steps": 25880, "loss": 0.3801, "lr": 3.1383407633561734e-07, "epoch": 19.091962905718702, "percentage": 95.46, "elapsed_time": "1:01:32", "remaining_time": "0:02:55", "throughput": 2253.25, "total_tokens": 8320304}
|
| 4961 |
+
{"current_steps": 24710, "total_steps": 25880, "loss": 0.4514, "lr": 3.1117663063943705e-07, "epoch": 19.09582689335394, "percentage": 95.48, "elapsed_time": "1:01:33", "remaining_time": "0:02:54", "throughput": 2253.19, "total_tokens": 8321616}
|
| 4962 |
+
{"current_steps": 24715, "total_steps": 25880, "loss": 0.5577, "lr": 3.0853041356798116e-07, "epoch": 19.09969088098918, "percentage": 95.5, "elapsed_time": "1:01:33", "remaining_time": "0:02:54", "throughput": 2253.22, "total_tokens": 8323344}
|
| 4963 |
+
{"current_steps": 24720, "total_steps": 25880, "loss": 0.3641, "lr": 3.058954263247621e-07, "epoch": 19.10355486862442, "percentage": 95.52, "elapsed_time": "1:01:34", "remaining_time": "0:02:53", "throughput": 2253.23, "total_tokens": 8325104}
|
| 4964 |
+
{"current_steps": 24725, "total_steps": 25880, "loss": 0.4778, "lr": 3.0327167010819333e-07, "epoch": 19.10741885625966, "percentage": 95.54, "elapsed_time": "1:01:35", "remaining_time": "0:02:52", "throughput": 2253.29, "total_tokens": 8326928}
|
| 4965 |
+
{"current_steps": 24730, "total_steps": 25880, "loss": 0.4224, "lr": 3.006591461115704e-07, "epoch": 19.111282843894898, "percentage": 95.56, "elapsed_time": "1:01:36", "remaining_time": "0:02:51", "throughput": 2253.33, "total_tokens": 8328848}
|
| 4966 |
+
{"current_steps": 24735, "total_steps": 25880, "loss": 0.3609, "lr": 2.9805785552308727e-07, "epoch": 19.11514683153014, "percentage": 95.58, "elapsed_time": "1:01:36", "remaining_time": "0:02:51", "throughput": 2253.29, "total_tokens": 8330288}
|
| 4967 |
+
{"current_steps": 24740, "total_steps": 25880, "loss": 0.4791, "lr": 2.954677995258254e-07, "epoch": 19.11901081916538, "percentage": 95.6, "elapsed_time": "1:01:37", "remaining_time": "0:02:50", "throughput": 2253.27, "total_tokens": 8331792}
|
| 4968 |
+
{"current_steps": 24745, "total_steps": 25880, "loss": 0.5538, "lr": 2.9288897929775905e-07, "epoch": 19.122874806800617, "percentage": 95.61, "elapsed_time": "1:01:38", "remaining_time": "0:02:49", "throughput": 2253.32, "total_tokens": 8333616}
|
| 4969 |
+
{"current_steps": 24750, "total_steps": 25880, "loss": 0.5134, "lr": 2.9032139601174734e-07, "epoch": 19.12673879443586, "percentage": 95.63, "elapsed_time": "1:01:39", "remaining_time": "0:02:48", "throughput": 2253.35, "total_tokens": 8335280}
|
| 4970 |
+
{"current_steps": 24755, "total_steps": 25880, "loss": 0.3791, "lr": 2.8776505083554504e-07, "epoch": 19.130602782071097, "percentage": 95.65, "elapsed_time": "1:01:39", "remaining_time": "0:02:48", "throughput": 2253.34, "total_tokens": 8336752}
|
| 4971 |
+
{"current_steps": 24760, "total_steps": 25880, "loss": 0.4347, "lr": 2.852199449317944e-07, "epoch": 19.134466769706336, "percentage": 95.67, "elapsed_time": "1:01:40", "remaining_time": "0:02:47", "throughput": 2253.31, "total_tokens": 8338288}
|
| 4972 |
+
{"current_steps": 24765, "total_steps": 25880, "loss": 0.3888, "lr": 2.8268607945802493e-07, "epoch": 19.138330757341578, "percentage": 95.69, "elapsed_time": "1:01:41", "remaining_time": "0:02:46", "throughput": 2253.35, "total_tokens": 8340176}
|
| 4973 |
+
{"current_steps": 24770, "total_steps": 25880, "loss": 0.4404, "lr": 2.801634555666538e-07, "epoch": 19.142194744976816, "percentage": 95.71, "elapsed_time": "1:01:41", "remaining_time": "0:02:45", "throughput": 2253.33, "total_tokens": 8341616}
|
| 4974 |
+
{"current_steps": 24775, "total_steps": 25880, "loss": 0.4172, "lr": 2.7765207440498266e-07, "epoch": 19.146058732612055, "percentage": 95.73, "elapsed_time": "1:01:42", "remaining_time": "0:02:45", "throughput": 2253.36, "total_tokens": 8343408}
|
| 4975 |
+
{"current_steps": 24780, "total_steps": 25880, "loss": 0.4195, "lr": 2.751519371152034e-07, "epoch": 19.149922720247297, "percentage": 95.75, "elapsed_time": "1:01:43", "remaining_time": "0:02:44", "throughput": 2253.38, "total_tokens": 8345104}
|
| 4976 |
+
{"current_steps": 24785, "total_steps": 25880, "loss": 0.4461, "lr": 2.726630448343953e-07, "epoch": 19.153786707882535, "percentage": 95.77, "elapsed_time": "1:01:44", "remaining_time": "0:02:43", "throughput": 2253.41, "total_tokens": 8346736}
|
| 4977 |
+
{"current_steps": 24790, "total_steps": 25880, "loss": 0.4406, "lr": 2.7018539869451963e-07, "epoch": 19.157650695517773, "percentage": 95.79, "elapsed_time": "1:01:44", "remaining_time": "0:02:42", "throughput": 2253.42, "total_tokens": 8348304}
|
| 4978 |
+
{"current_steps": 24795, "total_steps": 25880, "loss": 0.4654, "lr": 2.6771899982242774e-07, "epoch": 19.161514683153015, "percentage": 95.81, "elapsed_time": "1:01:45", "remaining_time": "0:02:42", "throughput": 2253.42, "total_tokens": 8349872}
|
| 4979 |
+
{"current_steps": 24800, "total_steps": 25880, "loss": 0.3819, "lr": 2.6526384933984737e-07, "epoch": 19.165378670788254, "percentage": 95.83, "elapsed_time": "1:01:46", "remaining_time": "0:02:41", "throughput": 2253.45, "total_tokens": 8351568}
|
| 4980 |
+
{"current_steps": 24805, "total_steps": 25880, "loss": 0.5031, "lr": 2.6281994836340195e-07, "epoch": 19.169242658423492, "percentage": 95.85, "elapsed_time": "1:01:46", "remaining_time": "0:02:40", "throughput": 2253.48, "total_tokens": 8353296}
|
| 4981 |
+
{"current_steps": 24810, "total_steps": 25880, "loss": 0.3461, "lr": 2.603872980045885e-07, "epoch": 19.173106646058734, "percentage": 95.87, "elapsed_time": "1:01:47", "remaining_time": "0:02:39", "throughput": 2253.56, "total_tokens": 8355248}
|
| 4982 |
+
{"current_steps": 24815, "total_steps": 25880, "loss": 0.3764, "lr": 2.5796589936979423e-07, "epoch": 19.176970633693973, "percentage": 95.88, "elapsed_time": "1:01:48", "remaining_time": "0:02:39", "throughput": 2253.57, "total_tokens": 8356848}
|
| 4983 |
+
{"current_steps": 24820, "total_steps": 25880, "loss": 0.3569, "lr": 2.5555575356027703e-07, "epoch": 19.18083462132921, "percentage": 95.9, "elapsed_time": "1:01:49", "remaining_time": "0:02:38", "throughput": 2253.61, "total_tokens": 8358736}
|
| 4984 |
+
{"current_steps": 24825, "total_steps": 25880, "loss": 0.3349, "lr": 2.531568616721963e-07, "epoch": 19.18469860896445, "percentage": 95.92, "elapsed_time": "1:01:49", "remaining_time": "0:02:37", "throughput": 2253.64, "total_tokens": 8360496}
|
| 4985 |
+
{"current_steps": 24830, "total_steps": 25880, "loss": 0.4136, "lr": 2.5076922479657647e-07, "epoch": 19.18856259659969, "percentage": 95.94, "elapsed_time": "1:01:50", "remaining_time": "0:02:36", "throughput": 2253.64, "total_tokens": 8362032}
|
| 4986 |
+
{"current_steps": 24835, "total_steps": 25880, "loss": 0.694, "lr": 2.483928440193295e-07, "epoch": 19.19242658423493, "percentage": 95.96, "elapsed_time": "1:01:51", "remaining_time": "0:02:36", "throughput": 2253.63, "total_tokens": 8363536}
|
| 4987 |
+
{"current_steps": 24840, "total_steps": 25880, "loss": 0.5105, "lr": 2.460277204212519e-07, "epoch": 19.19629057187017, "percentage": 95.98, "elapsed_time": "1:01:51", "remaining_time": "0:02:35", "throughput": 2253.64, "total_tokens": 8365136}
|
| 4988 |
+
{"current_steps": 24845, "total_steps": 25880, "loss": 0.5209, "lr": 2.43673855078011e-07, "epoch": 19.20015455950541, "percentage": 96.0, "elapsed_time": "1:01:52", "remaining_time": "0:02:34", "throughput": 2253.67, "total_tokens": 8366928}
|
| 4989 |
+
{"current_steps": 24850, "total_steps": 25880, "loss": 0.5336, "lr": 2.413312490601588e-07, "epoch": 19.20401854714065, "percentage": 96.02, "elapsed_time": "1:01:53", "remaining_time": "0:02:33", "throughput": 2253.73, "total_tokens": 8368944}
|
| 4990 |
+
{"current_steps": 24855, "total_steps": 25880, "loss": 0.4757, "lr": 2.3899990343312916e-07, "epoch": 19.207882534775887, "percentage": 96.04, "elapsed_time": "1:01:54", "remaining_time": "0:02:33", "throughput": 2253.71, "total_tokens": 8370480}
|
| 4991 |
+
{"current_steps": 24860, "total_steps": 25880, "loss": 0.3979, "lr": 2.3667981925723226e-07, "epoch": 19.21174652241113, "percentage": 96.06, "elapsed_time": "1:01:54", "remaining_time": "0:02:32", "throughput": 2253.71, "total_tokens": 8372080}
|
| 4992 |
+
{"current_steps": 24865, "total_steps": 25880, "loss": 0.4274, "lr": 2.3437099758765734e-07, "epoch": 19.215610510046368, "percentage": 96.08, "elapsed_time": "1:01:55", "remaining_time": "0:02:31", "throughput": 2253.78, "total_tokens": 8373968}
|
| 4993 |
+
{"current_steps": 24870, "total_steps": 25880, "loss": 0.3993, "lr": 2.3207343947446447e-07, "epoch": 19.219474497681606, "percentage": 96.1, "elapsed_time": "1:01:56", "remaining_time": "0:02:30", "throughput": 2253.73, "total_tokens": 8375344}
|
| 4994 |
+
{"current_steps": 24875, "total_steps": 25880, "loss": 0.4176, "lr": 2.2978714596260108e-07, "epoch": 19.223338485316848, "percentage": 96.12, "elapsed_time": "1:01:56", "remaining_time": "0:02:30", "throughput": 2253.75, "total_tokens": 8377040}
|
| 4995 |
+
{"current_steps": 24880, "total_steps": 25880, "loss": 0.424, "lr": 2.275121180918882e-07, "epoch": 19.227202472952087, "percentage": 96.14, "elapsed_time": "1:01:57", "remaining_time": "0:02:29", "throughput": 2253.78, "total_tokens": 8378800}
|
| 4996 |
+
{"current_steps": 24885, "total_steps": 25880, "loss": 0.4446, "lr": 2.2524835689702316e-07, "epoch": 19.231066460587325, "percentage": 96.16, "elapsed_time": "1:01:58", "remaining_time": "0:02:28", "throughput": 2253.82, "total_tokens": 8380560}
|
| 4997 |
+
{"current_steps": 24890, "total_steps": 25880, "loss": 0.4194, "lr": 2.229958634075713e-07, "epoch": 19.234930448222567, "percentage": 96.17, "elapsed_time": "1:01:59", "remaining_time": "0:02:27", "throughput": 2253.81, "total_tokens": 8382096}
|
| 4998 |
+
{"current_steps": 24895, "total_steps": 25880, "loss": 0.4505, "lr": 2.207546386479853e-07, "epoch": 19.238794435857805, "percentage": 96.19, "elapsed_time": "1:01:59", "remaining_time": "0:02:27", "throughput": 2253.86, "total_tokens": 8384016}
|
| 4999 |
+
{"current_steps": 24900, "total_steps": 25880, "loss": 0.4803, "lr": 2.1852468363758594e-07, "epoch": 19.242658423493044, "percentage": 96.21, "elapsed_time": "1:02:00", "remaining_time": "0:02:26", "throughput": 2253.93, "total_tokens": 8386000}
|
| 5000 |
+
{"current_steps": 24905, "total_steps": 25880, "loss": 0.6431, "lr": 2.1630599939057306e-07, "epoch": 19.246522411128286, "percentage": 96.23, "elapsed_time": "1:02:01", "remaining_time": "0:02:25", "throughput": 2253.96, "total_tokens": 8387728}
|
| 5001 |
+
{"current_steps": 24910, "total_steps": 25880, "loss": 0.4076, "lr": 2.140985869160145e-07, "epoch": 19.250386398763524, "percentage": 96.25, "elapsed_time": "1:02:02", "remaining_time": "0:02:24", "throughput": 2253.99, "total_tokens": 8389456}
|
| 5002 |
+
{"current_steps": 24915, "total_steps": 25880, "loss": 0.5268, "lr": 2.1190244721785435e-07, "epoch": 19.254250386398763, "percentage": 96.27, "elapsed_time": "1:02:02", "remaining_time": "0:02:24", "throughput": 2253.99, "total_tokens": 8391088}
|
| 5003 |
+
{"current_steps": 24920, "total_steps": 25880, "loss": 0.4544, "lr": 2.0971758129491314e-07, "epoch": 19.258114374034005, "percentage": 96.29, "elapsed_time": "1:02:03", "remaining_time": "0:02:23", "throughput": 2254.01, "total_tokens": 8392784}
|
| 5004 |
+
{"current_steps": 24925, "total_steps": 25880, "loss": 0.3726, "lr": 2.0754399014087933e-07, "epoch": 19.261978361669243, "percentage": 96.31, "elapsed_time": "1:02:04", "remaining_time": "0:02:22", "throughput": 2254.06, "total_tokens": 8394512}
|
| 5005 |
+
{"current_steps": 24930, "total_steps": 25880, "loss": 0.3624, "lr": 2.0538167474431214e-07, "epoch": 19.26584234930448, "percentage": 96.33, "elapsed_time": "1:02:04", "remaining_time": "0:02:21", "throughput": 2254.11, "total_tokens": 8396400}
|
| 5006 |
+
{"current_steps": 24935, "total_steps": 25880, "loss": 0.4928, "lr": 2.0323063608865267e-07, "epoch": 19.269706336939723, "percentage": 96.35, "elapsed_time": "1:02:05", "remaining_time": "0:02:21", "throughput": 2254.15, "total_tokens": 8398320}
|
| 5007 |
+
{"current_steps": 24940, "total_steps": 25880, "loss": 0.4937, "lr": 2.0109087515219894e-07, "epoch": 19.273570324574962, "percentage": 96.37, "elapsed_time": "1:02:06", "remaining_time": "0:02:20", "throughput": 2254.12, "total_tokens": 8399728}
|
| 5008 |
+
{"current_steps": 24945, "total_steps": 25880, "loss": 0.5363, "lr": 1.9896239290813078e-07, "epoch": 19.2774343122102, "percentage": 96.39, "elapsed_time": "1:02:07", "remaining_time": "0:02:19", "throughput": 2254.16, "total_tokens": 8401584}
|
| 5009 |
+
{"current_steps": 24950, "total_steps": 25880, "loss": 0.3615, "lr": 1.9684519032449333e-07, "epoch": 19.28129829984544, "percentage": 96.41, "elapsed_time": "1:02:07", "remaining_time": "0:02:18", "throughput": 2254.22, "total_tokens": 8403344}
|
| 5010 |
+
{"current_steps": 24955, "total_steps": 25880, "loss": 0.5165, "lr": 1.947392683642052e-07, "epoch": 19.28516228748068, "percentage": 96.43, "elapsed_time": "1:02:08", "remaining_time": "0:02:18", "throughput": 2254.22, "total_tokens": 8404912}
|
| 5011 |
+
{"current_steps": 24960, "total_steps": 25880, "loss": 0.4414, "lr": 1.9264462798505023e-07, "epoch": 19.28902627511592, "percentage": 96.45, "elapsed_time": "1:02:09", "remaining_time": "0:02:17", "throughput": 2254.28, "total_tokens": 8406736}
|
| 5012 |
+
{"current_steps": 24965, "total_steps": 25880, "loss": 0.4552, "lr": 1.905612701396803e-07, "epoch": 19.292890262751158, "percentage": 96.46, "elapsed_time": "1:02:09", "remaining_time": "0:02:16", "throughput": 2254.31, "total_tokens": 8408560}
|
| 5013 |
+
{"current_steps": 24970, "total_steps": 25880, "loss": 0.5652, "lr": 1.884891957756263e-07, "epoch": 19.2967542503864, "percentage": 96.48, "elapsed_time": "1:02:10", "remaining_time": "0:02:15", "throughput": 2254.32, "total_tokens": 8410192}
|
| 5014 |
+
{"current_steps": 24975, "total_steps": 25880, "loss": 0.3761, "lr": 1.864284058352761e-07, "epoch": 19.300618238021638, "percentage": 96.5, "elapsed_time": "1:02:11", "remaining_time": "0:02:15", "throughput": 2254.34, "total_tokens": 8411824}
|
| 5015 |
+
{"current_steps": 24980, "total_steps": 25880, "loss": 0.3893, "lr": 1.8437890125589109e-07, "epoch": 19.304482225656876, "percentage": 96.52, "elapsed_time": "1:02:12", "remaining_time": "0:02:14", "throughput": 2254.36, "total_tokens": 8413584}
|
| 5016 |
+
{"current_steps": 24985, "total_steps": 25880, "loss": 0.7046, "lr": 1.8234068296959506e-07, "epoch": 19.30834621329212, "percentage": 96.54, "elapsed_time": "1:02:12", "remaining_time": "0:02:13", "throughput": 2254.33, "total_tokens": 8415088}
|
| 5017 |
+
{"current_steps": 24990, "total_steps": 25880, "loss": 0.4265, "lr": 1.8031375190338261e-07, "epoch": 19.312210200927357, "percentage": 96.56, "elapsed_time": "1:02:13", "remaining_time": "0:02:12", "throughput": 2254.39, "total_tokens": 8416912}
|
| 5018 |
+
{"current_steps": 24995, "total_steps": 25880, "loss": 0.4514, "lr": 1.782981089791136e-07, "epoch": 19.316074188562595, "percentage": 96.58, "elapsed_time": "1:02:14", "remaining_time": "0:02:12", "throughput": 2254.37, "total_tokens": 8418384}
|
| 5019 |
+
{"current_steps": 25000, "total_steps": 25880, "loss": 0.4552, "lr": 1.7629375511351852e-07, "epoch": 19.319938176197837, "percentage": 96.6, "elapsed_time": "1:02:15", "remaining_time": "0:02:11", "throughput": 2254.38, "total_tokens": 8420112}
|
| 5020 |
+
{"current_steps": 25005, "total_steps": 25880, "loss": 0.3544, "lr": 1.7430069121818492e-07, "epoch": 19.323802163833076, "percentage": 96.62, "elapsed_time": "1:02:15", "remaining_time": "0:02:10", "throughput": 2254.36, "total_tokens": 8421680}
|
| 5021 |
+
{"current_steps": 25010, "total_steps": 25880, "loss": 0.4154, "lr": 1.7231891819957657e-07, "epoch": 19.327666151468314, "percentage": 96.64, "elapsed_time": "1:02:16", "remaining_time": "0:02:09", "throughput": 2254.39, "total_tokens": 8423376}
|
| 5022 |
+
{"current_steps": 25015, "total_steps": 25880, "loss": 0.3883, "lr": 1.703484369590086e-07, "epoch": 19.331530139103556, "percentage": 96.66, "elapsed_time": "1:02:17", "remaining_time": "0:02:09", "throughput": 2254.36, "total_tokens": 8424816}
|
| 5023 |
+
{"current_steps": 25020, "total_steps": 25880, "loss": 0.4847, "lr": 1.6838924839266966e-07, "epoch": 19.335394126738795, "percentage": 96.68, "elapsed_time": "1:02:17", "remaining_time": "0:02:08", "throughput": 2254.43, "total_tokens": 8426640}
|
| 5024 |
+
{"current_steps": 25025, "total_steps": 25880, "loss": 0.4769, "lr": 1.664413533916137e-07, "epoch": 19.339258114374033, "percentage": 96.7, "elapsed_time": "1:02:18", "remaining_time": "0:02:07", "throughput": 2254.46, "total_tokens": 8428464}
|
| 5025 |
+
{"current_steps": 25030, "total_steps": 25880, "loss": 0.307, "lr": 1.645047528417487e-07, "epoch": 19.343122102009275, "percentage": 96.72, "elapsed_time": "1:02:19", "remaining_time": "0:02:06", "throughput": 2254.45, "total_tokens": 8430032}
|
| 5026 |
+
{"current_steps": 25035, "total_steps": 25880, "loss": 0.4919, "lr": 1.62579447623859e-07, "epoch": 19.346986089644513, "percentage": 96.73, "elapsed_time": "1:02:20", "remaining_time": "0:02:06", "throughput": 2254.44, "total_tokens": 8431632}
|
| 5027 |
+
{"current_steps": 25040, "total_steps": 25880, "loss": 0.3638, "lr": 1.606654386135803e-07, "epoch": 19.350850077279752, "percentage": 96.75, "elapsed_time": "1:02:20", "remaining_time": "0:02:05", "throughput": 2254.49, "total_tokens": 8433584}
|
| 5028 |
+
{"current_steps": 25045, "total_steps": 25880, "loss": 0.3291, "lr": 1.5876272668141902e-07, "epoch": 19.354714064914994, "percentage": 96.77, "elapsed_time": "1:02:21", "remaining_time": "0:02:04", "throughput": 2254.47, "total_tokens": 8435152}
|
| 5029 |
+
{"current_steps": 25050, "total_steps": 25880, "loss": 0.5148, "lr": 1.568713126927357e-07, "epoch": 19.358578052550232, "percentage": 96.79, "elapsed_time": "1:02:22", "remaining_time": "0:02:03", "throughput": 2254.49, "total_tokens": 8436848}
|
| 5030 |
+
{"current_steps": 25055, "total_steps": 25880, "loss": 0.4066, "lr": 1.549911975077617e-07, "epoch": 19.36244204018547, "percentage": 96.81, "elapsed_time": "1:02:22", "remaining_time": "0:02:03", "throughput": 2254.54, "total_tokens": 8438704}
|
| 5031 |
+
{"current_steps": 25060, "total_steps": 25880, "loss": 0.4281, "lr": 1.5312238198157968e-07, "epoch": 19.366306027820713, "percentage": 96.83, "elapsed_time": "1:02:23", "remaining_time": "0:02:02", "throughput": 2254.55, "total_tokens": 8440368}
|
| 5032 |
+
{"current_steps": 25065, "total_steps": 25880, "loss": 0.4067, "lr": 1.5126486696414032e-07, "epoch": 19.37017001545595, "percentage": 96.85, "elapsed_time": "1:02:24", "remaining_time": "0:02:01", "throughput": 2254.58, "total_tokens": 8442160}
|
| 5033 |
+
{"current_steps": 25070, "total_steps": 25880, "loss": 0.4194, "lr": 1.4941865330025394e-07, "epoch": 19.37403400309119, "percentage": 96.87, "elapsed_time": "1:02:25", "remaining_time": "0:02:01", "throughput": 2254.61, "total_tokens": 8443920}
|
| 5034 |
+
{"current_steps": 25075, "total_steps": 25880, "loss": 0.4347, "lr": 1.475837418295878e-07, "epoch": 19.377897990726428, "percentage": 96.89, "elapsed_time": "1:02:25", "remaining_time": "0:02:00", "throughput": 2254.57, "total_tokens": 8445328}
|
| 5035 |
+
{"current_steps": 25080, "total_steps": 25880, "loss": 0.496, "lr": 1.457601333866715e-07, "epoch": 19.38176197836167, "percentage": 96.91, "elapsed_time": "1:02:26", "remaining_time": "0:01:59", "throughput": 2254.54, "total_tokens": 8446768}
|
| 5036 |
+
{"current_steps": 25085, "total_steps": 25880, "loss": 0.3709, "lr": 1.4394782880089443e-07, "epoch": 19.38562596599691, "percentage": 96.93, "elapsed_time": "1:02:27", "remaining_time": "0:01:58", "throughput": 2254.52, "total_tokens": 8448240}
|
| 5037 |
+
{"current_steps": 25090, "total_steps": 25880, "loss": 0.4135, "lr": 1.4214682889649998e-07, "epoch": 19.389489953632147, "percentage": 96.95, "elapsed_time": "1:02:27", "remaining_time": "0:01:58", "throughput": 2254.54, "total_tokens": 8449936}
|
| 5038 |
+
{"current_steps": 25095, "total_steps": 25880, "loss": 0.3946, "lr": 1.403571344925969e-07, "epoch": 19.39335394126739, "percentage": 96.97, "elapsed_time": "1:02:28", "remaining_time": "0:01:57", "throughput": 2254.57, "total_tokens": 8451664}
|
| 5039 |
+
{"current_steps": 25100, "total_steps": 25880, "loss": 0.4288, "lr": 1.3857874640314516e-07, "epoch": 19.397217928902627, "percentage": 96.99, "elapsed_time": "1:02:29", "remaining_time": "0:01:56", "throughput": 2254.56, "total_tokens": 8453232}
|
| 5040 |
+
{"current_steps": 25105, "total_steps": 25880, "loss": 0.4396, "lr": 1.3681166543697e-07, "epoch": 19.401081916537866, "percentage": 97.01, "elapsed_time": "1:02:30", "remaining_time": "0:01:55", "throughput": 2254.61, "total_tokens": 8455120}
|
| 5041 |
+
{"current_steps": 25110, "total_steps": 25880, "loss": 0.5157, "lr": 1.3505589239775073e-07, "epoch": 19.404945904173108, "percentage": 97.02, "elapsed_time": "1:02:30", "remaining_time": "0:01:55", "throughput": 2254.6, "total_tokens": 8456656}
|
| 5042 |
+
{"current_steps": 25115, "total_steps": 25880, "loss": 0.6977, "lr": 1.3331142808401808e-07, "epoch": 19.408809891808346, "percentage": 97.04, "elapsed_time": "1:02:31", "remaining_time": "0:01:54", "throughput": 2254.6, "total_tokens": 8458192}
|
| 5043 |
+
{"current_steps": 25120, "total_steps": 25880, "loss": 0.4514, "lr": 1.315782732891735e-07, "epoch": 19.412673879443584, "percentage": 97.06, "elapsed_time": "1:02:32", "remaining_time": "0:01:53", "throughput": 2254.62, "total_tokens": 8459984}
|
| 5044 |
+
{"current_steps": 25125, "total_steps": 25880, "loss": 0.3581, "lr": 1.2985642880145864e-07, "epoch": 19.416537867078826, "percentage": 97.08, "elapsed_time": "1:02:32", "remaining_time": "0:01:52", "throughput": 2254.59, "total_tokens": 8461424}
|
| 5045 |
+
{"current_steps": 25130, "total_steps": 25880, "loss": 0.3975, "lr": 1.2814589540398048e-07, "epoch": 19.420401854714065, "percentage": 97.1, "elapsed_time": "1:02:33", "remaining_time": "0:01:52", "throughput": 2254.59, "total_tokens": 8463024}
|
| 5046 |
+
{"current_steps": 25135, "total_steps": 25880, "loss": 0.3898, "lr": 1.2644667387470276e-07, "epoch": 19.424265842349303, "percentage": 97.12, "elapsed_time": "1:02:34", "remaining_time": "0:01:51", "throughput": 2254.59, "total_tokens": 8464720}
|
| 5047 |
+
{"current_steps": 25140, "total_steps": 25880, "loss": 0.7687, "lr": 1.247587649864379e-07, "epoch": 19.428129829984545, "percentage": 97.14, "elapsed_time": "1:02:35", "remaining_time": "0:01:50", "throughput": 2254.55, "total_tokens": 8466192}
|
| 5048 |
+
{"current_steps": 25145, "total_steps": 25880, "loss": 0.562, "lr": 1.230821695068607e-07, "epoch": 19.431993817619784, "percentage": 97.16, "elapsed_time": "1:02:35", "remaining_time": "0:01:49", "throughput": 2254.59, "total_tokens": 8467888}
|
| 5049 |
+
{"current_steps": 25150, "total_steps": 25880, "loss": 0.565, "lr": 1.214168881984945e-07, "epoch": 19.435857805255022, "percentage": 97.18, "elapsed_time": "1:02:36", "remaining_time": "0:01:49", "throughput": 2254.61, "total_tokens": 8469680}
|
| 5050 |
+
{"current_steps": 25155, "total_steps": 25880, "loss": 0.3662, "lr": 1.1976292181871684e-07, "epoch": 19.439721792890264, "percentage": 97.2, "elapsed_time": "1:02:37", "remaining_time": "0:01:48", "throughput": 2254.62, "total_tokens": 8471408}
|
| 5051 |
+
{"current_steps": 25160, "total_steps": 25880, "loss": 0.5288, "lr": 1.1812027111976764e-07, "epoch": 19.443585780525503, "percentage": 97.22, "elapsed_time": "1:02:38", "remaining_time": "0:01:47", "throughput": 2254.63, "total_tokens": 8473072}
|
| 5052 |
+
{"current_steps": 25165, "total_steps": 25880, "loss": 0.4789, "lr": 1.1648893684872986e-07, "epoch": 19.44744976816074, "percentage": 97.24, "elapsed_time": "1:02:38", "remaining_time": "0:01:46", "throughput": 2254.66, "total_tokens": 8474832}
|
| 5053 |
+
{"current_steps": 25170, "total_steps": 25880, "loss": 0.3657, "lr": 1.1486891974754332e-07, "epoch": 19.451313755795983, "percentage": 97.26, "elapsed_time": "1:02:39", "remaining_time": "0:01:46", "throughput": 2254.68, "total_tokens": 8476528}
|
| 5054 |
+
{"current_steps": 25175, "total_steps": 25880, "loss": 0.4136, "lr": 1.1326022055300478e-07, "epoch": 19.45517774343122, "percentage": 97.28, "elapsed_time": "1:02:40", "remaining_time": "0:01:45", "throughput": 2254.7, "total_tokens": 8478224}
|
| 5055 |
+
{"current_steps": 25180, "total_steps": 25880, "loss": 0.4305, "lr": 1.1166283999675953e-07, "epoch": 19.45904173106646, "percentage": 97.3, "elapsed_time": "1:02:40", "remaining_time": "0:01:44", "throughput": 2254.69, "total_tokens": 8479760}
|
| 5056 |
+
{"current_steps": 25185, "total_steps": 25880, "loss": 0.4258, "lr": 1.100767788053042e-07, "epoch": 19.462905718701702, "percentage": 97.31, "elapsed_time": "1:02:41", "remaining_time": "0:01:43", "throughput": 2254.75, "total_tokens": 8481584}
|
| 5057 |
+
{"current_steps": 25190, "total_steps": 25880, "loss": 0.5385, "lr": 1.0850203769998957e-07, "epoch": 19.46676970633694, "percentage": 97.33, "elapsed_time": "1:02:42", "remaining_time": "0:01:43", "throughput": 2254.81, "total_tokens": 8483440}
|
| 5058 |
+
{"current_steps": 25195, "total_steps": 25880, "loss": 0.3794, "lr": 1.0693861739701771e-07, "epoch": 19.47063369397218, "percentage": 97.35, "elapsed_time": "1:02:43", "remaining_time": "0:01:42", "throughput": 2254.84, "total_tokens": 8485200}
|
| 5059 |
+
{"current_steps": 25200, "total_steps": 25880, "loss": 0.3755, "lr": 1.0538651860744208e-07, "epoch": 19.474497681607417, "percentage": 97.37, "elapsed_time": "1:02:43", "remaining_time": "0:01:41", "throughput": 2254.85, "total_tokens": 8486928}
|
| 5060 |
+
{"current_steps": 25205, "total_steps": 25880, "loss": 0.3554, "lr": 1.0384574203716469e-07, "epoch": 19.47836166924266, "percentage": 97.39, "elapsed_time": "1:02:44", "remaining_time": "0:01:40", "throughput": 2254.88, "total_tokens": 8488592}
|
| 5061 |
+
{"current_steps": 25210, "total_steps": 25880, "loss": 0.4432, "lr": 1.0231628838694163e-07, "epoch": 19.482225656877898, "percentage": 97.41, "elapsed_time": "1:02:45", "remaining_time": "0:01:40", "throughput": 2254.85, "total_tokens": 8490064}
|
| 5062 |
+
{"current_steps": 25215, "total_steps": 25880, "loss": 0.4153, "lr": 1.0079815835237761e-07, "epoch": 19.486089644513136, "percentage": 97.43, "elapsed_time": "1:02:45", "remaining_time": "0:01:39", "throughput": 2254.85, "total_tokens": 8491632}
|
| 5063 |
+
{"current_steps": 25220, "total_steps": 25880, "loss": 0.7196, "lr": 9.929135262392586e-08, "epoch": 19.489953632148378, "percentage": 97.45, "elapsed_time": "1:02:46", "remaining_time": "0:01:38", "throughput": 2254.88, "total_tokens": 8493360}
|
| 5064 |
+
{"current_steps": 25225, "total_steps": 25880, "loss": 0.3707, "lr": 9.779587188689099e-08, "epoch": 19.493817619783616, "percentage": 97.47, "elapsed_time": "1:02:47", "remaining_time": "0:01:37", "throughput": 2254.91, "total_tokens": 8495088}
|
| 5065 |
+
{"current_steps": 25230, "total_steps": 25880, "loss": 0.4924, "lr": 9.631171682142893e-08, "epoch": 19.497681607418855, "percentage": 97.49, "elapsed_time": "1:02:48", "remaining_time": "0:01:37", "throughput": 2254.9, "total_tokens": 8496592}
|
| 5066 |
+
{"current_steps": 25235, "total_steps": 25880, "loss": 0.365, "lr": 9.483888810253582e-08, "epoch": 19.501545595054097, "percentage": 97.51, "elapsed_time": "1:02:48", "remaining_time": "0:01:36", "throughput": 2254.9, "total_tokens": 8498288}
|
| 5067 |
+
{"current_steps": 25240, "total_steps": 25880, "loss": 0.4979, "lr": 9.337738640007032e-08, "epoch": 19.505409582689335, "percentage": 97.53, "elapsed_time": "1:02:49", "remaining_time": "0:01:35", "throughput": 2254.94, "total_tokens": 8500016}
|
| 5068 |
+
{"current_steps": 25245, "total_steps": 25880, "loss": 0.3902, "lr": 9.192721237873125e-08, "epoch": 19.509273570324574, "percentage": 97.55, "elapsed_time": "1:02:50", "remaining_time": "0:01:34", "throughput": 2255.0, "total_tokens": 8501872}
|
| 5069 |
+
{"current_steps": 25250, "total_steps": 25880, "loss": 0.3695, "lr": 9.048836669806326e-08, "epoch": 19.513137557959816, "percentage": 97.57, "elapsed_time": "1:02:50", "remaining_time": "0:01:34", "throughput": 2254.98, "total_tokens": 8503344}
|
| 5070 |
+
{"current_steps": 25255, "total_steps": 25880, "loss": 0.5319, "lr": 8.906085001246233e-08, "epoch": 19.517001545595054, "percentage": 97.59, "elapsed_time": "1:02:51", "remaining_time": "0:01:33", "throughput": 2255.02, "total_tokens": 8505104}
|
| 5071 |
+
{"current_steps": 25260, "total_steps": 25880, "loss": 0.4431, "lr": 8.764466297117302e-08, "epoch": 19.520865533230292, "percentage": 97.6, "elapsed_time": "1:02:52", "remaining_time": "0:01:32", "throughput": 2255.03, "total_tokens": 8506768}
|
| 5072 |
+
{"current_steps": 25265, "total_steps": 25880, "loss": 0.6339, "lr": 8.623980621828842e-08, "epoch": 19.524729520865534, "percentage": 97.62, "elapsed_time": "1:02:53", "remaining_time": "0:01:31", "throughput": 2255.02, "total_tokens": 8508368}
|
| 5073 |
+
{"current_steps": 25270, "total_steps": 25880, "loss": 0.427, "lr": 8.484628039273912e-08, "epoch": 19.528593508500773, "percentage": 97.64, "elapsed_time": "1:02:53", "remaining_time": "0:01:31", "throughput": 2255.05, "total_tokens": 8510096}
|
| 5074 |
+
{"current_steps": 25275, "total_steps": 25880, "loss": 0.4375, "lr": 8.34640861283098e-08, "epoch": 19.53245749613601, "percentage": 97.66, "elapsed_time": "1:02:54", "remaining_time": "0:01:30", "throughput": 2255.04, "total_tokens": 8511600}
|
| 5075 |
+
{"current_steps": 25280, "total_steps": 25880, "loss": 0.362, "lr": 8.209322405363929e-08, "epoch": 19.536321483771253, "percentage": 97.68, "elapsed_time": "1:02:55", "remaining_time": "0:01:29", "throughput": 2255.03, "total_tokens": 8513072}
|
| 5076 |
+
{"current_steps": 25285, "total_steps": 25880, "loss": 0.4037, "lr": 8.073369479219551e-08, "epoch": 19.54018547140649, "percentage": 97.7, "elapsed_time": "1:02:55", "remaining_time": "0:01:28", "throughput": 2255.07, "total_tokens": 8514960}
|
| 5077 |
+
{"current_steps": 25290, "total_steps": 25880, "loss": 0.4023, "lr": 7.938549896230329e-08, "epoch": 19.54404945904173, "percentage": 97.72, "elapsed_time": "1:02:56", "remaining_time": "0:01:28", "throughput": 2255.11, "total_tokens": 8516784}
|
| 5078 |
+
{"current_steps": 25295, "total_steps": 25880, "loss": 0.3816, "lr": 7.804863717712774e-08, "epoch": 19.547913446676972, "percentage": 97.74, "elapsed_time": "1:02:57", "remaining_time": "0:01:27", "throughput": 2255.09, "total_tokens": 8518256}
|
| 5079 |
+
{"current_steps": 25300, "total_steps": 25880, "loss": 0.4206, "lr": 7.672311004468802e-08, "epoch": 19.55177743431221, "percentage": 97.76, "elapsed_time": "1:02:58", "remaining_time": "0:01:26", "throughput": 2255.13, "total_tokens": 8519952}
|
| 5080 |
+
{"current_steps": 25305, "total_steps": 25880, "loss": 0.4609, "lr": 7.540891816783246e-08, "epoch": 19.55564142194745, "percentage": 97.78, "elapsed_time": "1:02:58", "remaining_time": "0:01:25", "throughput": 2255.15, "total_tokens": 8521680}
|
| 5081 |
+
{"current_steps": 25310, "total_steps": 25880, "loss": 0.411, "lr": 7.410606214427185e-08, "epoch": 19.55950540958269, "percentage": 97.8, "elapsed_time": "1:02:59", "remaining_time": "0:01:25", "throughput": 2255.13, "total_tokens": 8523280}
|
| 5082 |
+
{"current_steps": 25315, "total_steps": 25880, "loss": 0.3241, "lr": 7.281454256654885e-08, "epoch": 19.56336939721793, "percentage": 97.82, "elapsed_time": "1:03:00", "remaining_time": "0:01:24", "throughput": 2255.09, "total_tokens": 8524656}
|
| 5083 |
+
{"current_steps": 25320, "total_steps": 25880, "loss": 0.3593, "lr": 7.153436002205472e-08, "epoch": 19.567233384853168, "percentage": 97.84, "elapsed_time": "1:03:00", "remaining_time": "0:01:23", "throughput": 2255.11, "total_tokens": 8526384}
|
| 5084 |
+
{"current_steps": 25325, "total_steps": 25880, "loss": 0.3394, "lr": 7.02655150930237e-08, "epoch": 19.57109737248841, "percentage": 97.86, "elapsed_time": "1:03:01", "remaining_time": "0:01:22", "throughput": 2255.12, "total_tokens": 8527984}
|
| 5085 |
+
{"current_steps": 25330, "total_steps": 25880, "loss": 0.3329, "lr": 6.900800835653587e-08, "epoch": 19.57496136012365, "percentage": 97.87, "elapsed_time": "1:03:02", "remaining_time": "0:01:22", "throughput": 2255.13, "total_tokens": 8529552}
|
| 5086 |
+
{"current_steps": 25335, "total_steps": 25880, "loss": 0.3933, "lr": 6.77618403845115e-08, "epoch": 19.578825347758887, "percentage": 97.89, "elapsed_time": "1:03:03", "remaining_time": "0:01:21", "throughput": 2255.2, "total_tokens": 8531440}
|
| 5087 |
+
{"current_steps": 25340, "total_steps": 25880, "loss": 0.5348, "lr": 6.652701174371389e-08, "epoch": 19.582689335394125, "percentage": 97.91, "elapsed_time": "1:03:03", "remaining_time": "0:01:20", "throughput": 2255.21, "total_tokens": 8533104}
|
| 5088 |
+
{"current_steps": 25345, "total_steps": 25880, "loss": 0.5721, "lr": 6.530352299575215e-08, "epoch": 19.586553323029367, "percentage": 97.93, "elapsed_time": "1:03:04", "remaining_time": "0:01:19", "throughput": 2255.25, "total_tokens": 8534896}
|
| 5089 |
+
{"current_steps": 25350, "total_steps": 25880, "loss": 0.4204, "lr": 6.409137469707837e-08, "epoch": 19.590417310664606, "percentage": 97.95, "elapsed_time": "1:03:05", "remaining_time": "0:01:19", "throughput": 2255.26, "total_tokens": 8536464}
|
| 5090 |
+
{"current_steps": 25355, "total_steps": 25880, "loss": 0.6522, "lr": 6.289056739898213e-08, "epoch": 19.594281298299844, "percentage": 97.97, "elapsed_time": "1:03:05", "remaining_time": "0:01:18", "throughput": 2255.24, "total_tokens": 8538032}
|
| 5091 |
+
{"current_steps": 25360, "total_steps": 25880, "loss": 0.4902, "lr": 6.170110164759879e-08, "epoch": 19.598145285935086, "percentage": 97.99, "elapsed_time": "1:03:06", "remaining_time": "0:01:17", "throughput": 2255.3, "total_tokens": 8540016}
|
| 5092 |
+
{"current_steps": 25365, "total_steps": 25880, "loss": 0.4824, "lr": 6.052297798390116e-08, "epoch": 19.602009273570324, "percentage": 98.01, "elapsed_time": "1:03:07", "remaining_time": "0:01:16", "throughput": 2255.37, "total_tokens": 8541968}
|
| 5093 |
+
{"current_steps": 25370, "total_steps": 25880, "loss": 0.5451, "lr": 5.9356196943713415e-08, "epoch": 19.605873261205563, "percentage": 98.03, "elapsed_time": "1:03:08", "remaining_time": "0:01:16", "throughput": 2255.41, "total_tokens": 8543696}
|
| 5094 |
+
{"current_steps": 25375, "total_steps": 25880, "loss": 0.331, "lr": 5.8200759057688845e-08, "epoch": 19.609737248840805, "percentage": 98.05, "elapsed_time": "1:03:08", "remaining_time": "0:01:15", "throughput": 2255.43, "total_tokens": 8545456}
|
| 5095 |
+
{"current_steps": 25380, "total_steps": 25880, "loss": 0.4092, "lr": 5.705666485132932e-08, "epoch": 19.613601236476043, "percentage": 98.07, "elapsed_time": "1:03:09", "remaining_time": "0:01:14", "throughput": 2255.42, "total_tokens": 8546960}
|
| 5096 |
+
{"current_steps": 25385, "total_steps": 25880, "loss": 0.389, "lr": 5.5923914844976944e-08, "epoch": 19.61746522411128, "percentage": 98.09, "elapsed_time": "1:03:10", "remaining_time": "0:01:13", "throughput": 2255.38, "total_tokens": 8548368}
|
| 5097 |
+
{"current_steps": 25390, "total_steps": 25880, "loss": 0.4914, "lr": 5.4802509553811274e-08, "epoch": 19.621329211746524, "percentage": 98.11, "elapsed_time": "1:03:10", "remaining_time": "0:01:13", "throughput": 2255.4, "total_tokens": 8550128}
|
| 5098 |
+
{"current_steps": 25395, "total_steps": 25880, "loss": 0.5197, "lr": 5.3692449487857675e-08, "epoch": 19.625193199381762, "percentage": 98.13, "elapsed_time": "1:03:11", "remaining_time": "0:01:12", "throughput": 2255.37, "total_tokens": 8551600}
|
| 5099 |
+
{"current_steps": 25400, "total_steps": 25880, "loss": 0.4063, "lr": 5.259373515197341e-08, "epoch": 19.629057187017, "percentage": 98.15, "elapsed_time": "1:03:12", "remaining_time": "0:01:11", "throughput": 2255.39, "total_tokens": 8553232}
|
| 5100 |
+
{"current_steps": 25405, "total_steps": 25880, "loss": 0.4002, "lr": 5.150636704586431e-08, "epoch": 19.632921174652243, "percentage": 98.16, "elapsed_time": "1:03:13", "remaining_time": "0:01:10", "throughput": 2255.37, "total_tokens": 8554768}
|
| 5101 |
+
{"current_steps": 25410, "total_steps": 25880, "loss": 0.5945, "lr": 5.043034566406812e-08, "epoch": 19.63678516228748, "percentage": 98.18, "elapsed_time": "1:03:13", "remaining_time": "0:01:10", "throughput": 2255.33, "total_tokens": 8556176}
|
| 5102 |
+
{"current_steps": 25415, "total_steps": 25880, "loss": 0.4335, "lr": 4.936567149596838e-08, "epoch": 19.64064914992272, "percentage": 98.2, "elapsed_time": "1:03:14", "remaining_time": "0:01:09", "throughput": 2255.33, "total_tokens": 8557872}
|
| 5103 |
+
{"current_steps": 25420, "total_steps": 25880, "loss": 0.4408, "lr": 4.8312345025786075e-08, "epoch": 19.64451313755796, "percentage": 98.22, "elapsed_time": "1:03:15", "remaining_time": "0:01:08", "throughput": 2255.38, "total_tokens": 8559760}
|
| 5104 |
+
{"current_steps": 25425, "total_steps": 25880, "loss": 0.3892, "lr": 4.7270366732576896e-08, "epoch": 19.6483771251932, "percentage": 98.24, "elapsed_time": "1:03:15", "remaining_time": "0:01:07", "throughput": 2255.35, "total_tokens": 8561296}
|
| 5105 |
+
{"current_steps": 25430, "total_steps": 25880, "loss": 0.8623, "lr": 4.6239737090242316e-08, "epoch": 19.652241112828438, "percentage": 98.26, "elapsed_time": "1:03:16", "remaining_time": "0:01:07", "throughput": 2255.44, "total_tokens": 8563376}
|
| 5106 |
+
{"current_steps": 25435, "total_steps": 25880, "loss": 0.55, "lr": 4.5220456567515725e-08, "epoch": 19.65610510046368, "percentage": 98.28, "elapsed_time": "1:03:17", "remaining_time": "0:01:06", "throughput": 2255.47, "total_tokens": 8565040}
|
| 5107 |
+
{"current_steps": 25440, "total_steps": 25880, "loss": 0.4622, "lr": 4.421252562797629e-08, "epoch": 19.65996908809892, "percentage": 98.3, "elapsed_time": "1:03:18", "remaining_time": "0:01:05", "throughput": 2255.47, "total_tokens": 8566608}
|
| 5108 |
+
{"current_steps": 25445, "total_steps": 25880, "loss": 0.3754, "lr": 4.321594473003232e-08, "epoch": 19.663833075734157, "percentage": 98.32, "elapsed_time": "1:03:18", "remaining_time": "0:01:04", "throughput": 2255.52, "total_tokens": 8568400}
|
| 5109 |
+
{"current_steps": 25450, "total_steps": 25880, "loss": 0.564, "lr": 4.22307143269407e-08, "epoch": 19.667697063369395, "percentage": 98.34, "elapsed_time": "1:03:19", "remaining_time": "0:01:04", "throughput": 2255.55, "total_tokens": 8570160}
|
| 5110 |
+
{"current_steps": 25455, "total_steps": 25880, "loss": 0.4374, "lr": 4.125683486678189e-08, "epoch": 19.671561051004637, "percentage": 98.36, "elapsed_time": "1:03:20", "remaining_time": "0:01:03", "throughput": 2255.58, "total_tokens": 8571824}
|
| 5111 |
+
{"current_steps": 25460, "total_steps": 25880, "loss": 0.3965, "lr": 4.0294306792490466e-08, "epoch": 19.675425038639876, "percentage": 98.38, "elapsed_time": "1:03:20", "remaining_time": "0:01:02", "throughput": 2255.6, "total_tokens": 8573520}
|
| 5112 |
+
{"current_steps": 25465, "total_steps": 25880, "loss": 0.3932, "lr": 3.934313054182459e-08, "epoch": 19.679289026275114, "percentage": 98.4, "elapsed_time": "1:03:21", "remaining_time": "0:01:01", "throughput": 2255.63, "total_tokens": 8575184}
|
| 5113 |
+
{"current_steps": 25470, "total_steps": 25880, "loss": 0.4097, "lr": 3.840330654738544e-08, "epoch": 19.683153013910356, "percentage": 98.42, "elapsed_time": "1:03:22", "remaining_time": "0:01:01", "throughput": 2255.67, "total_tokens": 8576976}
|
| 5114 |
+
{"current_steps": 25475, "total_steps": 25880, "loss": 0.4515, "lr": 3.747483523661166e-08, "epoch": 19.687017001545595, "percentage": 98.44, "elapsed_time": "1:03:23", "remaining_time": "0:01:00", "throughput": 2255.68, "total_tokens": 8578704}
|
| 5115 |
+
{"current_steps": 25480, "total_steps": 25880, "loss": 0.5981, "lr": 3.655771703177935e-08, "epoch": 19.690880989180833, "percentage": 98.45, "elapsed_time": "1:03:23", "remaining_time": "0:00:59", "throughput": 2255.67, "total_tokens": 8580272}
|
| 5116 |
+
{"current_steps": 25485, "total_steps": 25880, "loss": 0.3607, "lr": 3.565195234999652e-08, "epoch": 19.694744976816075, "percentage": 98.47, "elapsed_time": "1:03:24", "remaining_time": "0:00:58", "throughput": 2255.66, "total_tokens": 8581872}
|
| 5117 |
+
{"current_steps": 25490, "total_steps": 25880, "loss": 0.583, "lr": 3.475754160321143e-08, "epoch": 19.698608964451314, "percentage": 98.49, "elapsed_time": "1:03:25", "remaining_time": "0:00:58", "throughput": 2255.61, "total_tokens": 8583248}
|
| 5118 |
+
{"current_steps": 25495, "total_steps": 25880, "loss": 0.5216, "lr": 3.3874485198207015e-08, "epoch": 19.702472952086552, "percentage": 98.51, "elapsed_time": "1:03:26", "remaining_time": "0:00:57", "throughput": 2255.57, "total_tokens": 8584688}
|
| 5119 |
+
{"current_steps": 25500, "total_steps": 25880, "loss": 0.4673, "lr": 3.3002783536603685e-08, "epoch": 19.706336939721794, "percentage": 98.53, "elapsed_time": "1:03:26", "remaining_time": "0:00:56", "throughput": 2255.55, "total_tokens": 8586256}
|
| 5120 |
+
{"current_steps": 25505, "total_steps": 25880, "loss": 0.4213, "lr": 3.214243701485653e-08, "epoch": 19.710200927357032, "percentage": 98.55, "elapsed_time": "1:03:27", "remaining_time": "0:00:55", "throughput": 2255.56, "total_tokens": 8587920}
|
| 5121 |
+
{"current_steps": 25510, "total_steps": 25880, "loss": 0.4262, "lr": 3.129344602425255e-08, "epoch": 19.71406491499227, "percentage": 98.57, "elapsed_time": "1:03:28", "remaining_time": "0:00:55", "throughput": 2255.58, "total_tokens": 8589520}
|
| 5122 |
+
{"current_steps": 25515, "total_steps": 25880, "loss": 0.3381, "lr": 3.045581095092453e-08, "epoch": 19.717928902627513, "percentage": 98.59, "elapsed_time": "1:03:28", "remaining_time": "0:00:54", "throughput": 2255.6, "total_tokens": 8591216}
|
| 5123 |
+
{"current_steps": 25520, "total_steps": 25880, "loss": 0.3205, "lr": 2.9629532175828867e-08, "epoch": 19.72179289026275, "percentage": 98.61, "elapsed_time": "1:03:29", "remaining_time": "0:00:53", "throughput": 2255.67, "total_tokens": 8593104}
|
| 5124 |
+
{"current_steps": 25525, "total_steps": 25880, "loss": 0.4164, "lr": 2.881461007476216e-08, "epoch": 19.72565687789799, "percentage": 98.63, "elapsed_time": "1:03:30", "remaining_time": "0:00:52", "throughput": 2255.68, "total_tokens": 8594704}
|
| 5125 |
+
{"current_steps": 25530, "total_steps": 25880, "loss": 0.5573, "lr": 2.8011045018361272e-08, "epoch": 19.72952086553323, "percentage": 98.65, "elapsed_time": "1:03:30", "remaining_time": "0:00:52", "throughput": 2255.68, "total_tokens": 8596336}
|
| 5126 |
+
{"current_steps": 25535, "total_steps": 25880, "loss": 0.4151, "lr": 2.7218837372086636e-08, "epoch": 19.73338485316847, "percentage": 98.67, "elapsed_time": "1:03:31", "remaining_time": "0:00:51", "throughput": 2255.67, "total_tokens": 8597840}
|
| 5127 |
+
{"current_steps": 25540, "total_steps": 25880, "loss": 0.3644, "lr": 2.6437987496238935e-08, "epoch": 19.73724884080371, "percentage": 98.69, "elapsed_time": "1:03:32", "remaining_time": "0:00:50", "throughput": 2255.69, "total_tokens": 8599536}
|
| 5128 |
+
{"current_steps": 25545, "total_steps": 25880, "loss": 0.4367, "lr": 2.566849574595631e-08, "epoch": 19.74111282843895, "percentage": 98.71, "elapsed_time": "1:03:33", "remaining_time": "0:00:50", "throughput": 2255.71, "total_tokens": 8601296}
|
| 5129 |
+
{"current_steps": 25550, "total_steps": 25880, "loss": 0.5413, "lr": 2.4910362471208815e-08, "epoch": 19.74497681607419, "percentage": 98.72, "elapsed_time": "1:03:33", "remaining_time": "0:00:49", "throughput": 2255.73, "total_tokens": 8602960}
|
| 5130 |
+
{"current_steps": 25555, "total_steps": 25880, "loss": 0.3322, "lr": 2.4163588016795636e-08, "epoch": 19.748840803709427, "percentage": 98.74, "elapsed_time": "1:03:34", "remaining_time": "0:00:48", "throughput": 2255.7, "total_tokens": 8604368}
|
| 5131 |
+
{"current_steps": 25560, "total_steps": 25880, "loss": 0.4798, "lr": 2.3428172722358977e-08, "epoch": 19.75270479134467, "percentage": 98.76, "elapsed_time": "1:03:35", "remaining_time": "0:00:47", "throughput": 2255.67, "total_tokens": 8605904}
|
| 5132 |
+
{"current_steps": 25565, "total_steps": 25880, "loss": 0.4875, "lr": 2.270411692237018e-08, "epoch": 19.756568778979908, "percentage": 98.78, "elapsed_time": "1:03:35", "remaining_time": "0:00:47", "throughput": 2255.7, "total_tokens": 8607632}
|
| 5133 |
+
{"current_steps": 25570, "total_steps": 25880, "loss": 0.4446, "lr": 2.1991420946129714e-08, "epoch": 19.760432766615146, "percentage": 98.8, "elapsed_time": "1:03:36", "remaining_time": "0:00:46", "throughput": 2255.7, "total_tokens": 8609200}
|
| 5134 |
+
{"current_steps": 25575, "total_steps": 25880, "loss": 0.4243, "lr": 2.12900851177783e-08, "epoch": 19.764296754250385, "percentage": 98.82, "elapsed_time": "1:03:37", "remaining_time": "0:00:45", "throughput": 2255.74, "total_tokens": 8610960}
|
| 5135 |
+
{"current_steps": 25580, "total_steps": 25880, "loss": 0.387, "lr": 2.0600109756288565e-08, "epoch": 19.768160741885627, "percentage": 98.84, "elapsed_time": "1:03:38", "remaining_time": "0:00:44", "throughput": 2255.8, "total_tokens": 8612848}
|
| 5136 |
+
{"current_steps": 25585, "total_steps": 25880, "loss": 0.4638, "lr": 1.992149517546227e-08, "epoch": 19.772024729520865, "percentage": 98.86, "elapsed_time": "1:03:38", "remaining_time": "0:00:44", "throughput": 2255.79, "total_tokens": 8614352}
|
| 5137 |
+
{"current_steps": 25590, "total_steps": 25880, "loss": 0.3641, "lr": 1.925424168394141e-08, "epoch": 19.775888717156104, "percentage": 98.88, "elapsed_time": "1:03:39", "remaining_time": "0:00:43", "throughput": 2255.85, "total_tokens": 8616272}
|
| 5138 |
+
{"current_steps": 25595, "total_steps": 25880, "loss": 0.4065, "lr": 1.8598349585197128e-08, "epoch": 19.779752704791346, "percentage": 98.9, "elapsed_time": "1:03:40", "remaining_time": "0:00:42", "throughput": 2255.89, "total_tokens": 8618032}
|
| 5139 |
+
{"current_steps": 25600, "total_steps": 25880, "loss": 0.3933, "lr": 1.7953819177529697e-08, "epoch": 19.783616692426584, "percentage": 98.92, "elapsed_time": "1:03:40", "remaining_time": "0:00:41", "throughput": 2255.87, "total_tokens": 8619504}
|
| 5140 |
+
{"current_steps": 25605, "total_steps": 25880, "loss": 0.4531, "lr": 1.732065075407685e-08, "epoch": 19.787480680061822, "percentage": 98.94, "elapsed_time": "1:03:41", "remaining_time": "0:00:41", "throughput": 2255.85, "total_tokens": 8620976}
|
| 5141 |
+
{"current_steps": 25610, "total_steps": 25880, "loss": 0.6506, "lr": 1.6698844602808238e-08, "epoch": 19.791344667697064, "percentage": 98.96, "elapsed_time": "1:03:42", "remaining_time": "0:00:40", "throughput": 2255.83, "total_tokens": 8622448}
|
| 5142 |
+
{"current_steps": 25615, "total_steps": 25880, "loss": 0.4041, "lr": 1.6088401006522647e-08, "epoch": 19.795208655332303, "percentage": 98.98, "elapsed_time": "1:03:42", "remaining_time": "0:00:39", "throughput": 2255.8, "total_tokens": 8623856}
|
| 5143 |
+
{"current_steps": 25620, "total_steps": 25880, "loss": 0.4735, "lr": 1.548932024285632e-08, "epoch": 19.79907264296754, "percentage": 99.0, "elapsed_time": "1:03:43", "remaining_time": "0:00:38", "throughput": 2255.8, "total_tokens": 8625456}
|
| 5144 |
+
{"current_steps": 25625, "total_steps": 25880, "loss": 0.5244, "lr": 1.4901602584271868e-08, "epoch": 19.802936630602783, "percentage": 99.01, "elapsed_time": "1:03:44", "remaining_time": "0:00:38", "throughput": 2255.89, "total_tokens": 8627408}
|
| 5145 |
+
{"current_steps": 25630, "total_steps": 25880, "loss": 0.357, "lr": 1.4325248298069361e-08, "epoch": 19.80680061823802, "percentage": 99.03, "elapsed_time": "1:03:45", "remaining_time": "0:00:37", "throughput": 2255.88, "total_tokens": 8628944}
|
| 5146 |
+
{"current_steps": 25635, "total_steps": 25880, "loss": 0.3306, "lr": 1.3760257646378005e-08, "epoch": 19.81066460587326, "percentage": 99.05, "elapsed_time": "1:03:45", "remaining_time": "0:00:36", "throughput": 2255.91, "total_tokens": 8630672}
|
| 5147 |
+
{"current_steps": 25640, "total_steps": 25880, "loss": 0.502, "lr": 1.3206630886158921e-08, "epoch": 19.814528593508502, "percentage": 99.07, "elapsed_time": "1:03:46", "remaining_time": "0:00:35", "throughput": 2255.92, "total_tokens": 8632336}
|
| 5148 |
+
{"current_steps": 25645, "total_steps": 25880, "loss": 0.3848, "lr": 1.2664368269202365e-08, "epoch": 19.81839258114374, "percentage": 99.09, "elapsed_time": "1:03:47", "remaining_time": "0:00:35", "throughput": 2255.91, "total_tokens": 8633840}
|
| 5149 |
+
{"current_steps": 25650, "total_steps": 25880, "loss": 0.342, "lr": 1.2133470042136052e-08, "epoch": 19.82225656877898, "percentage": 99.11, "elapsed_time": "1:03:47", "remaining_time": "0:00:34", "throughput": 2255.98, "total_tokens": 8635792}
|
| 5150 |
+
{"current_steps": 25655, "total_steps": 25880, "loss": 0.4559, "lr": 1.161393644641129e-08, "epoch": 19.82612055641422, "percentage": 99.13, "elapsed_time": "1:03:48", "remaining_time": "0:00:33", "throughput": 2256.07, "total_tokens": 8637904}
|
| 5151 |
+
{"current_steps": 25660, "total_steps": 25880, "loss": 0.524, "lr": 1.1105767718319614e-08, "epoch": 19.82998454404946, "percentage": 99.15, "elapsed_time": "1:03:49", "remaining_time": "0:00:32", "throughput": 2256.13, "total_tokens": 8639696}
|
| 5152 |
+
{"current_steps": 25665, "total_steps": 25880, "loss": 0.4749, "lr": 1.0608964088978934e-08, "epoch": 19.833848531684698, "percentage": 99.17, "elapsed_time": "1:03:50", "remaining_time": "0:00:32", "throughput": 2256.13, "total_tokens": 8641360}
|
| 5153 |
+
{"current_steps": 25670, "total_steps": 25880, "loss": 0.52, "lr": 1.012352578433351e-08, "epoch": 19.83771251931994, "percentage": 99.19, "elapsed_time": "1:03:50", "remaining_time": "0:00:31", "throughput": 2256.15, "total_tokens": 8643216}
|
| 5154 |
+
{"current_steps": 25675, "total_steps": 25880, "loss": 0.5659, "lr": 9.649453025170618e-09, "epoch": 19.841576506955178, "percentage": 99.21, "elapsed_time": "1:03:51", "remaining_time": "0:00:30", "throughput": 2256.15, "total_tokens": 8644816}
|
| 5155 |
+
{"current_steps": 25680, "total_steps": 25880, "loss": 0.6191, "lr": 9.186746027095571e-09, "epoch": 19.845440494590417, "percentage": 99.23, "elapsed_time": "1:03:52", "remaining_time": "0:00:29", "throughput": 2256.15, "total_tokens": 8646352}
|
| 5156 |
+
{"current_steps": 25685, "total_steps": 25880, "loss": 0.4028, "lr": 8.73540500055392e-09, "epoch": 19.84930448222566, "percentage": 99.25, "elapsed_time": "1:03:53", "remaining_time": "0:00:29", "throughput": 2256.17, "total_tokens": 8648048}
|
| 5157 |
+
{"current_steps": 25690, "total_steps": 25880, "loss": 0.423, "lr": 8.295430150814798e-09, "epoch": 19.853168469860897, "percentage": 99.27, "elapsed_time": "1:03:53", "remaining_time": "0:00:28", "throughput": 2256.17, "total_tokens": 8649584}
|
| 5158 |
+
{"current_steps": 25695, "total_steps": 25880, "loss": 0.4027, "lr": 7.866821677984804e-09, "epoch": 19.857032457496135, "percentage": 99.29, "elapsed_time": "1:03:54", "remaining_time": "0:00:27", "throughput": 2256.15, "total_tokens": 8651152}
|
| 5159 |
+
{"current_steps": 25700, "total_steps": 25880, "loss": 0.4079, "lr": 7.449579776996895e-09, "epoch": 19.860896445131374, "percentage": 99.3, "elapsed_time": "1:03:55", "remaining_time": "0:00:26", "throughput": 2256.12, "total_tokens": 8652560}
|
| 5160 |
+
{"current_steps": 25705, "total_steps": 25880, "loss": 0.5352, "lr": 7.043704637613169e-09, "epoch": 19.864760432766616, "percentage": 99.32, "elapsed_time": "1:03:55", "remaining_time": "0:00:26", "throughput": 2256.16, "total_tokens": 8654448}
|
| 5161 |
+
{"current_steps": 25710, "total_steps": 25880, "loss": 0.3745, "lr": 6.6491964444304054e-09, "epoch": 19.868624420401854, "percentage": 99.34, "elapsed_time": "1:03:56", "remaining_time": "0:00:25", "throughput": 2256.18, "total_tokens": 8656144}
|
| 5162 |
+
{"current_steps": 25715, "total_steps": 25880, "loss": 0.3659, "lr": 6.266055376871749e-09, "epoch": 19.872488408037093, "percentage": 99.36, "elapsed_time": "1:03:57", "remaining_time": "0:00:24", "throughput": 2256.18, "total_tokens": 8657776}
|
| 5163 |
+
{"current_steps": 25720, "total_steps": 25880, "loss": 0.4095, "lr": 5.894281609195029e-09, "epoch": 19.876352395672335, "percentage": 99.38, "elapsed_time": "1:03:58", "remaining_time": "0:00:23", "throughput": 2256.2, "total_tokens": 8659472}
|
| 5164 |
+
{"current_steps": 25725, "total_steps": 25880, "loss": 0.514, "lr": 5.533875310478886e-09, "epoch": 19.880216383307573, "percentage": 99.4, "elapsed_time": "1:03:58", "remaining_time": "0:00:23", "throughput": 2256.23, "total_tokens": 8661136}
|
| 5165 |
+
{"current_steps": 25730, "total_steps": 25880, "loss": 0.5502, "lr": 5.184836644644975e-09, "epoch": 19.88408037094281, "percentage": 99.42, "elapsed_time": "1:03:59", "remaining_time": "0:00:22", "throughput": 2256.24, "total_tokens": 8662800}
|
| 5166 |
+
{"current_steps": 25735, "total_steps": 25880, "loss": 0.4169, "lr": 4.847165770435758e-09, "epoch": 19.887944358578054, "percentage": 99.44, "elapsed_time": "1:04:00", "remaining_time": "0:00:21", "throughput": 2256.28, "total_tokens": 8664592}
|
| 5167 |
+
{"current_steps": 25740, "total_steps": 25880, "loss": 0.5899, "lr": 4.52086284142561e-09, "epoch": 19.891808346213292, "percentage": 99.46, "elapsed_time": "1:04:00", "remaining_time": "0:00:20", "throughput": 2256.32, "total_tokens": 8666480}
|
| 5168 |
+
{"current_steps": 25745, "total_steps": 25880, "loss": 0.415, "lr": 4.205928006018045e-09, "epoch": 19.89567233384853, "percentage": 99.48, "elapsed_time": "1:04:01", "remaining_time": "0:00:20", "throughput": 2256.32, "total_tokens": 8668112}
|
| 5169 |
+
{"current_steps": 25750, "total_steps": 25880, "loss": 0.533, "lr": 3.9023614074484845e-09, "epoch": 19.899536321483772, "percentage": 99.5, "elapsed_time": "1:04:02", "remaining_time": "0:00:19", "throughput": 2256.33, "total_tokens": 8669776}
|
| 5170 |
+
{"current_steps": 25755, "total_steps": 25880, "loss": 0.5479, "lr": 3.6101631837814896e-09, "epoch": 19.90340030911901, "percentage": 99.52, "elapsed_time": "1:04:03", "remaining_time": "0:00:18", "throughput": 2256.39, "total_tokens": 8671696}
|
| 5171 |
+
{"current_steps": 25760, "total_steps": 25880, "loss": 0.5905, "lr": 3.32933346790798e-09, "epoch": 19.90726429675425, "percentage": 99.54, "elapsed_time": "1:04:03", "remaining_time": "0:00:17", "throughput": 2256.45, "total_tokens": 8673616}
|
| 5172 |
+
{"current_steps": 25765, "total_steps": 25880, "loss": 0.4874, "lr": 3.059872387553564e-09, "epoch": 19.91112828438949, "percentage": 99.56, "elapsed_time": "1:04:04", "remaining_time": "0:00:17", "throughput": 2256.47, "total_tokens": 8675312}
|
| 5173 |
+
{"current_steps": 25770, "total_steps": 25880, "loss": 0.3755, "lr": 2.8017800652702097e-09, "epoch": 19.91499227202473, "percentage": 99.57, "elapsed_time": "1:04:05", "remaining_time": "0:00:16", "throughput": 2256.47, "total_tokens": 8676976}
|
| 5174 |
+
{"current_steps": 25775, "total_steps": 25880, "loss": 0.4936, "lr": 2.5550566184390224e-09, "epoch": 19.918856259659968, "percentage": 99.59, "elapsed_time": "1:04:06", "remaining_time": "0:00:15", "throughput": 2256.54, "total_tokens": 8678960}
|
| 5175 |
+
{"current_steps": 25780, "total_steps": 25880, "loss": 0.5284, "lr": 2.3197021592730185e-09, "epoch": 19.92272024729521, "percentage": 99.61, "elapsed_time": "1:04:06", "remaining_time": "0:00:14", "throughput": 2256.58, "total_tokens": 8680720}
|
| 5176 |
+
{"current_steps": 25785, "total_steps": 25880, "loss": 0.4439, "lr": 2.095716794811575e-09, "epoch": 19.92658423493045, "percentage": 99.63, "elapsed_time": "1:04:07", "remaining_time": "0:00:14", "throughput": 2256.58, "total_tokens": 8682352}
|
| 5177 |
+
{"current_steps": 25790, "total_steps": 25880, "loss": 0.6204, "lr": 1.883100626925982e-09, "epoch": 19.930448222565687, "percentage": 99.65, "elapsed_time": "1:04:08", "remaining_time": "0:00:13", "throughput": 2256.59, "total_tokens": 8683952}
|
| 5178 |
+
{"current_steps": 25795, "total_steps": 25880, "loss": 0.325, "lr": 1.6818537523111134e-09, "epoch": 19.93431221020093, "percentage": 99.67, "elapsed_time": "1:04:08", "remaining_time": "0:00:12", "throughput": 2256.57, "total_tokens": 8685392}
|
| 5179 |
+
{"current_steps": 25800, "total_steps": 25880, "loss": 0.3567, "lr": 1.491976262499306e-09, "epoch": 19.938176197836167, "percentage": 99.69, "elapsed_time": "1:04:09", "remaining_time": "0:00:11", "throughput": 2256.59, "total_tokens": 8687088}
|
| 5180 |
+
{"current_steps": 25805, "total_steps": 25880, "loss": 0.347, "lr": 1.3134682438492585e-09, "epoch": 19.942040185471406, "percentage": 99.71, "elapsed_time": "1:04:10", "remaining_time": "0:00:11", "throughput": 2256.6, "total_tokens": 8688752}
|
| 5181 |
+
{"current_steps": 25810, "total_steps": 25880, "loss": 0.5814, "lr": 1.1463297775432535e-09, "epoch": 19.945904173106648, "percentage": 99.73, "elapsed_time": "1:04:11", "remaining_time": "0:00:10", "throughput": 2256.61, "total_tokens": 8690384}
|
| 5182 |
+
{"current_steps": 25815, "total_steps": 25880, "loss": 0.4264, "lr": 9.905609395982617e-10, "epoch": 19.949768160741886, "percentage": 99.75, "elapsed_time": "1:04:11", "remaining_time": "0:00:09", "throughput": 2256.67, "total_tokens": 8692208}
|
| 5183 |
+
{"current_steps": 25820, "total_steps": 25880, "loss": 0.4662, "lr": 8.4616180086039e-10, "epoch": 19.953632148377125, "percentage": 99.77, "elapsed_time": "1:04:12", "remaining_time": "0:00:08", "throughput": 2256.68, "total_tokens": 8693808}
|
| 5184 |
+
{"current_steps": 25825, "total_steps": 25880, "loss": 0.4275, "lr": 7.1313242699933e-10, "epoch": 19.957496136012363, "percentage": 99.79, "elapsed_time": "1:04:13", "remaining_time": "0:00:08", "throughput": 2256.69, "total_tokens": 8695536}
|
| 5185 |
+
{"current_steps": 25830, "total_steps": 25880, "loss": 0.4044, "lr": 5.914728785250123e-10, "epoch": 19.961360123647605, "percentage": 99.81, "elapsed_time": "1:04:13", "remaining_time": "0:00:07", "throughput": 2256.74, "total_tokens": 8697392}
|
| 5186 |
+
{"current_steps": 25835, "total_steps": 25880, "loss": 0.3563, "lr": 4.811832107598502e-10, "epoch": 19.965224111282843, "percentage": 99.83, "elapsed_time": "1:04:14", "remaining_time": "0:00:06", "throughput": 2256.79, "total_tokens": 8699216}
|
| 5187 |
+
{"current_steps": 25840, "total_steps": 25880, "loss": 0.5002, "lr": 3.8226347387204654e-10, "epoch": 19.969088098918082, "percentage": 99.85, "elapsed_time": "1:04:15", "remaining_time": "0:00:05", "throughput": 2256.84, "total_tokens": 8701168}
|
| 5188 |
+
{"current_steps": 25845, "total_steps": 25880, "loss": 0.3505, "lr": 2.9471371284783834e-10, "epoch": 19.972952086553324, "percentage": 99.86, "elapsed_time": "1:04:16", "remaining_time": "0:00:05", "throughput": 2256.85, "total_tokens": 8702800}
|
| 5189 |
+
{"current_steps": 25850, "total_steps": 25880, "loss": 0.4798, "lr": 2.185339675025988e-10, "epoch": 19.976816074188562, "percentage": 99.88, "elapsed_time": "1:04:16", "remaining_time": "0:00:04", "throughput": 2256.85, "total_tokens": 8704400}
|
| 5190 |
+
{"current_steps": 25855, "total_steps": 25880, "loss": 0.5805, "lr": 1.5372427248638853e-10, "epoch": 19.9806800618238, "percentage": 99.9, "elapsed_time": "1:04:17", "remaining_time": "0:00:03", "throughput": 2256.89, "total_tokens": 8706096}
|
| 5191 |
+
{"current_steps": 25860, "total_steps": 25880, "loss": 0.4725, "lr": 1.0028465727562885e-10, "epoch": 19.984544049459043, "percentage": 99.92, "elapsed_time": "1:04:18", "remaining_time": "0:00:02", "throughput": 2256.95, "total_tokens": 8707984}
|
| 5192 |
+
{"current_steps": 25865, "total_steps": 25880, "loss": 0.3305, "lr": 5.821514617587731e-11, "epoch": 19.98840803709428, "percentage": 99.94, "elapsed_time": "1:04:18", "remaining_time": "0:00:02", "throughput": 2256.96, "total_tokens": 8709552}
|
| 5193 |
+
{"current_steps": 25870, "total_steps": 25880, "loss": 0.3717, "lr": 2.751575831627662e-11, "epoch": 19.99227202472952, "percentage": 99.96, "elapsed_time": "1:04:19", "remaining_time": "0:00:01", "throughput": 2257.02, "total_tokens": 8711472}
|
| 5194 |
+
{"current_steps": 25875, "total_steps": 25880, "loss": 0.3542, "lr": 8.186507660656873e-12, "epoch": 19.99613601236476, "percentage": 99.98, "elapsed_time": "1:04:20", "remaining_time": "0:00:00", "throughput": 2257.07, "total_tokens": 8713296}
|
| 5195 |
+
{"current_steps": 25880, "total_steps": 25880, "loss": 0.5089, "lr": 2.2740300198442753e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:04:21", "remaining_time": "0:00:00", "throughput": 2257.0, "total_tokens": 8714656}
|
| 5196 |
+
{"current_steps": 25880, "total_steps": 25880, "eval_loss": 0.4453426003456116, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:04:27", "remaining_time": "0:00:00", "throughput": 2253.36, "total_tokens": 8714656}
|
| 5197 |
+
{"current_steps": 25880, "total_steps": 25880, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:04:28", "remaining_time": "0:00:00", "throughput": 2252.85, "total_tokens": 8714656}
|