Training in progress, step 25880
Browse files- trainer_log.jsonl +253 -0
trainer_log.jsonl
CHANGED
|
@@ -4942,3 +4942,256 @@
|
|
| 4942 |
{"current_steps": 24615, "total_steps": 25880, "loss": 4.8735, "lr": 3.635867911339741e-07, "epoch": 19.02241112828439, "percentage": 95.11, "elapsed_time": "0:55:53", "remaining_time": "0:02:52", "throughput": 2472.34, "total_tokens": 8289936}
|
| 4943 |
{"current_steps": 24620, "total_steps": 25880, "loss": 5.2248, "lr": 3.607274418129969e-07, "epoch": 19.026275115919628, "percentage": 95.13, "elapsed_time": "0:55:53", "remaining_time": "0:02:51", "throughput": 2472.24, "total_tokens": 8291376}
|
| 4944 |
{"current_steps": 24625, "total_steps": 25880, "loss": 4.7807, "lr": 3.5787929858073777e-07, "epoch": 19.03013910355487, "percentage": 95.15, "elapsed_time": "0:55:54", "remaining_time": "0:02:50", "throughput": 2472.29, "total_tokens": 8293168}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4942 |
{"current_steps": 24615, "total_steps": 25880, "loss": 4.8735, "lr": 3.635867911339741e-07, "epoch": 19.02241112828439, "percentage": 95.11, "elapsed_time": "0:55:53", "remaining_time": "0:02:52", "throughput": 2472.34, "total_tokens": 8289936}
|
| 4943 |
{"current_steps": 24620, "total_steps": 25880, "loss": 5.2248, "lr": 3.607274418129969e-07, "epoch": 19.026275115919628, "percentage": 95.13, "elapsed_time": "0:55:53", "remaining_time": "0:02:51", "throughput": 2472.24, "total_tokens": 8291376}
|
| 4944 |
{"current_steps": 24625, "total_steps": 25880, "loss": 4.7807, "lr": 3.5787929858073777e-07, "epoch": 19.03013910355487, "percentage": 95.15, "elapsed_time": "0:55:54", "remaining_time": "0:02:50", "throughput": 2472.29, "total_tokens": 8293168}
|
| 4945 |
+
{"current_steps": 24630, "total_steps": 25880, "loss": 4.6992, "lr": 3.5504236273254943e-07, "epoch": 19.034003091190108, "percentage": 95.17, "elapsed_time": "0:55:55", "remaining_time": "0:02:50", "throughput": 2472.32, "total_tokens": 8294832}
|
| 4946 |
+
{"current_steps": 24635, "total_steps": 25880, "loss": 4.725, "lr": 3.5221663555868587e-07, "epoch": 19.037867078825347, "percentage": 95.19, "elapsed_time": "0:55:55", "remaining_time": "0:02:49", "throughput": 2472.36, "total_tokens": 8296624}
|
| 4947 |
+
{"current_steps": 24640, "total_steps": 25880, "loss": 4.7338, "lr": 3.4940211834430804e-07, "epoch": 19.04173106646059, "percentage": 95.21, "elapsed_time": "0:55:56", "remaining_time": "0:02:48", "throughput": 2472.39, "total_tokens": 8298512}
|
| 4948 |
+
{"current_steps": 24645, "total_steps": 25880, "loss": 4.3167, "lr": 3.4659881236947246e-07, "epoch": 19.045595054095827, "percentage": 95.23, "elapsed_time": "0:55:57", "remaining_time": "0:02:48", "throughput": 2472.39, "total_tokens": 8300112}
|
| 4949 |
+
{"current_steps": 24650, "total_steps": 25880, "loss": 4.8095, "lr": 3.4380671890913985e-07, "epoch": 19.049459041731065, "percentage": 95.25, "elapsed_time": "0:55:57", "remaining_time": "0:02:47", "throughput": 2472.4, "total_tokens": 8301872}
|
| 4950 |
+
{"current_steps": 24655, "total_steps": 25880, "loss": 4.9236, "lr": 3.410258392331722e-07, "epoch": 19.053323029366307, "percentage": 95.27, "elapsed_time": "0:55:58", "remaining_time": "0:02:46", "throughput": 2472.44, "total_tokens": 8303632}
|
| 4951 |
+
{"current_steps": 24660, "total_steps": 25880, "loss": 4.5406, "lr": 3.3825617460633006e-07, "epoch": 19.057187017001546, "percentage": 95.29, "elapsed_time": "0:55:59", "remaining_time": "0:02:46", "throughput": 2472.48, "total_tokens": 8305488}
|
| 4952 |
+
{"current_steps": 24665, "total_steps": 25880, "loss": 4.5301, "lr": 3.3549772628827524e-07, "epoch": 19.061051004636784, "percentage": 95.31, "elapsed_time": "0:55:59", "remaining_time": "0:02:45", "throughput": 2472.5, "total_tokens": 8307184}
|
| 4953 |
+
{"current_steps": 24670, "total_steps": 25880, "loss": 4.5087, "lr": 3.327504955335625e-07, "epoch": 19.064914992272026, "percentage": 95.32, "elapsed_time": "0:56:00", "remaining_time": "0:02:44", "throughput": 2472.53, "total_tokens": 8308816}
|
| 4954 |
+
{"current_steps": 24675, "total_steps": 25880, "loss": 4.8807, "lr": 3.30014483591648e-07, "epoch": 19.068778979907265, "percentage": 95.34, "elapsed_time": "0:56:01", "remaining_time": "0:02:44", "throughput": 2472.56, "total_tokens": 8310640}
|
| 4955 |
+
{"current_steps": 24680, "total_steps": 25880, "loss": 4.7589, "lr": 3.2728969170689183e-07, "epoch": 19.072642967542503, "percentage": 95.36, "elapsed_time": "0:56:01", "remaining_time": "0:02:43", "throughput": 2472.57, "total_tokens": 8312208}
|
| 4956 |
+
{"current_steps": 24685, "total_steps": 25880, "loss": 4.716, "lr": 3.2457612111854165e-07, "epoch": 19.076506955177745, "percentage": 95.38, "elapsed_time": "0:56:02", "remaining_time": "0:02:42", "throughput": 2472.55, "total_tokens": 8313744}
|
| 4957 |
+
{"current_steps": 24690, "total_steps": 25880, "loss": 4.9407, "lr": 3.218737730607491e-07, "epoch": 19.080370942812984, "percentage": 95.4, "elapsed_time": "0:56:03", "remaining_time": "0:02:42", "throughput": 2472.57, "total_tokens": 8315440}
|
| 4958 |
+
{"current_steps": 24695, "total_steps": 25880, "loss": 4.9422, "lr": 3.191826487625532e-07, "epoch": 19.084234930448222, "percentage": 95.42, "elapsed_time": "0:56:03", "remaining_time": "0:02:41", "throughput": 2472.58, "total_tokens": 8317200}
|
| 4959 |
+
{"current_steps": 24700, "total_steps": 25880, "loss": 4.6116, "lr": 3.1650274944790004e-07, "epoch": 19.08809891808346, "percentage": 95.44, "elapsed_time": "0:56:04", "remaining_time": "0:02:40", "throughput": 2472.56, "total_tokens": 8318704}
|
| 4960 |
+
{"current_steps": 24705, "total_steps": 25880, "loss": 4.4894, "lr": 3.1383407633561734e-07, "epoch": 19.091962905718702, "percentage": 95.46, "elapsed_time": "0:56:05", "remaining_time": "0:02:40", "throughput": 2472.55, "total_tokens": 8320304}
|
| 4961 |
+
{"current_steps": 24710, "total_steps": 25880, "loss": 5.0603, "lr": 3.1117663063943705e-07, "epoch": 19.09582689335394, "percentage": 95.48, "elapsed_time": "0:56:05", "remaining_time": "0:02:39", "throughput": 2472.49, "total_tokens": 8321616}
|
| 4962 |
+
{"current_steps": 24715, "total_steps": 25880, "loss": 4.6278, "lr": 3.0853041356798116e-07, "epoch": 19.09969088098918, "percentage": 95.5, "elapsed_time": "0:56:06", "remaining_time": "0:02:38", "throughput": 2472.53, "total_tokens": 8323344}
|
| 4963 |
+
{"current_steps": 24720, "total_steps": 25880, "loss": 4.5231, "lr": 3.058954263247621e-07, "epoch": 19.10355486862442, "percentage": 95.52, "elapsed_time": "0:56:07", "remaining_time": "0:02:37", "throughput": 2472.54, "total_tokens": 8325104}
|
| 4964 |
+
{"current_steps": 24725, "total_steps": 25880, "loss": 4.7902, "lr": 3.0327167010819333e-07, "epoch": 19.10741885625966, "percentage": 95.54, "elapsed_time": "0:56:07", "remaining_time": "0:02:37", "throughput": 2472.59, "total_tokens": 8326928}
|
| 4965 |
+
{"current_steps": 24730, "total_steps": 25880, "loss": 4.755, "lr": 3.006591461115704e-07, "epoch": 19.111282843894898, "percentage": 95.56, "elapsed_time": "0:56:08", "remaining_time": "0:02:36", "throughput": 2472.63, "total_tokens": 8328848}
|
| 4966 |
+
{"current_steps": 24735, "total_steps": 25880, "loss": 4.5882, "lr": 2.9805785552308727e-07, "epoch": 19.11514683153014, "percentage": 95.58, "elapsed_time": "0:56:09", "remaining_time": "0:02:35", "throughput": 2472.6, "total_tokens": 8330288}
|
| 4967 |
+
{"current_steps": 24740, "total_steps": 25880, "loss": 4.9121, "lr": 2.954677995258254e-07, "epoch": 19.11901081916538, "percentage": 95.6, "elapsed_time": "0:56:09", "remaining_time": "0:02:35", "throughput": 2472.57, "total_tokens": 8331792}
|
| 4968 |
+
{"current_steps": 24745, "total_steps": 25880, "loss": 4.8616, "lr": 2.9288897929775905e-07, "epoch": 19.122874806800617, "percentage": 95.61, "elapsed_time": "0:56:10", "remaining_time": "0:02:34", "throughput": 2472.46, "total_tokens": 8333616}
|
| 4969 |
+
{"current_steps": 24750, "total_steps": 25880, "loss": 4.8233, "lr": 2.9032139601174734e-07, "epoch": 19.12673879443586, "percentage": 95.63, "elapsed_time": "0:56:11", "remaining_time": "0:02:33", "throughput": 2472.49, "total_tokens": 8335280}
|
| 4970 |
+
{"current_steps": 24755, "total_steps": 25880, "loss": 4.632, "lr": 2.8776505083554504e-07, "epoch": 19.130602782071097, "percentage": 95.65, "elapsed_time": "0:56:11", "remaining_time": "0:02:33", "throughput": 2472.48, "total_tokens": 8336752}
|
| 4971 |
+
{"current_steps": 24760, "total_steps": 25880, "loss": 4.5785, "lr": 2.852199449317944e-07, "epoch": 19.134466769706336, "percentage": 95.67, "elapsed_time": "0:56:12", "remaining_time": "0:02:32", "throughput": 2472.46, "total_tokens": 8338288}
|
| 4972 |
+
{"current_steps": 24765, "total_steps": 25880, "loss": 4.367, "lr": 2.8268607945802493e-07, "epoch": 19.138330757341578, "percentage": 95.69, "elapsed_time": "0:56:13", "remaining_time": "0:02:31", "throughput": 2472.5, "total_tokens": 8340176}
|
| 4973 |
+
{"current_steps": 24770, "total_steps": 25880, "loss": 5.0026, "lr": 2.801634555666538e-07, "epoch": 19.142194744976816, "percentage": 95.71, "elapsed_time": "0:56:13", "remaining_time": "0:02:31", "throughput": 2472.48, "total_tokens": 8341616}
|
| 4974 |
+
{"current_steps": 24775, "total_steps": 25880, "loss": 4.7076, "lr": 2.7765207440498266e-07, "epoch": 19.146058732612055, "percentage": 95.73, "elapsed_time": "0:56:14", "remaining_time": "0:02:30", "throughput": 2472.5, "total_tokens": 8343408}
|
| 4975 |
+
{"current_steps": 24780, "total_steps": 25880, "loss": 4.6129, "lr": 2.751519371152034e-07, "epoch": 19.149922720247297, "percentage": 95.75, "elapsed_time": "0:56:15", "remaining_time": "0:02:29", "throughput": 2472.52, "total_tokens": 8345104}
|
| 4976 |
+
{"current_steps": 24785, "total_steps": 25880, "loss": 4.6593, "lr": 2.726630448343953e-07, "epoch": 19.153786707882535, "percentage": 95.77, "elapsed_time": "0:56:15", "remaining_time": "0:02:29", "throughput": 2472.55, "total_tokens": 8346736}
|
| 4977 |
+
{"current_steps": 24790, "total_steps": 25880, "loss": 4.9427, "lr": 2.7018539869451963e-07, "epoch": 19.157650695517773, "percentage": 95.79, "elapsed_time": "0:56:16", "remaining_time": "0:02:28", "throughput": 2472.57, "total_tokens": 8348304}
|
| 4978 |
+
{"current_steps": 24795, "total_steps": 25880, "loss": 4.9864, "lr": 2.6771899982242774e-07, "epoch": 19.161514683153015, "percentage": 95.81, "elapsed_time": "0:56:16", "remaining_time": "0:02:27", "throughput": 2472.58, "total_tokens": 8349872}
|
| 4979 |
+
{"current_steps": 24800, "total_steps": 25880, "loss": 4.9151, "lr": 2.6526384933984737e-07, "epoch": 19.165378670788254, "percentage": 95.83, "elapsed_time": "0:56:17", "remaining_time": "0:02:27", "throughput": 2472.6, "total_tokens": 8351568}
|
| 4980 |
+
{"current_steps": 24805, "total_steps": 25880, "loss": 4.5711, "lr": 2.6281994836340195e-07, "epoch": 19.169242658423492, "percentage": 95.85, "elapsed_time": "0:56:18", "remaining_time": "0:02:26", "throughput": 2472.64, "total_tokens": 8353296}
|
| 4981 |
+
{"current_steps": 24810, "total_steps": 25880, "loss": 4.4554, "lr": 2.603872980045885e-07, "epoch": 19.173106646058734, "percentage": 95.87, "elapsed_time": "0:56:18", "remaining_time": "0:02:25", "throughput": 2472.71, "total_tokens": 8355248}
|
| 4982 |
+
{"current_steps": 24815, "total_steps": 25880, "loss": 4.6305, "lr": 2.5796589936979423e-07, "epoch": 19.176970633693973, "percentage": 95.88, "elapsed_time": "0:56:19", "remaining_time": "0:02:25", "throughput": 2472.73, "total_tokens": 8356848}
|
| 4983 |
+
{"current_steps": 24820, "total_steps": 25880, "loss": 4.235, "lr": 2.5555575356027703e-07, "epoch": 19.18083462132921, "percentage": 95.9, "elapsed_time": "0:56:20", "remaining_time": "0:02:24", "throughput": 2472.76, "total_tokens": 8358736}
|
| 4984 |
+
{"current_steps": 24825, "total_steps": 25880, "loss": 4.7284, "lr": 2.531568616721963e-07, "epoch": 19.18469860896445, "percentage": 95.92, "elapsed_time": "0:56:20", "remaining_time": "0:02:23", "throughput": 2472.8, "total_tokens": 8360496}
|
| 4985 |
+
{"current_steps": 24830, "total_steps": 25880, "loss": 4.6712, "lr": 2.5076922479657647e-07, "epoch": 19.18856259659969, "percentage": 95.94, "elapsed_time": "0:56:21", "remaining_time": "0:02:22", "throughput": 2472.8, "total_tokens": 8362032}
|
| 4986 |
+
{"current_steps": 24835, "total_steps": 25880, "loss": 4.7377, "lr": 2.483928440193295e-07, "epoch": 19.19242658423493, "percentage": 95.96, "elapsed_time": "0:56:22", "remaining_time": "0:02:22", "throughput": 2472.8, "total_tokens": 8363536}
|
| 4987 |
+
{"current_steps": 24840, "total_steps": 25880, "loss": 4.7276, "lr": 2.460277204212519e-07, "epoch": 19.19629057187017, "percentage": 95.98, "elapsed_time": "0:56:22", "remaining_time": "0:02:21", "throughput": 2472.82, "total_tokens": 8365136}
|
| 4988 |
+
{"current_steps": 24845, "total_steps": 25880, "loss": 4.5494, "lr": 2.43673855078011e-07, "epoch": 19.20015455950541, "percentage": 96.0, "elapsed_time": "0:56:23", "remaining_time": "0:02:20", "throughput": 2472.85, "total_tokens": 8366928}
|
| 4989 |
+
{"current_steps": 24850, "total_steps": 25880, "loss": 4.5512, "lr": 2.413312490601588e-07, "epoch": 19.20401854714065, "percentage": 96.02, "elapsed_time": "0:56:24", "remaining_time": "0:02:20", "throughput": 2472.91, "total_tokens": 8368944}
|
| 4990 |
+
{"current_steps": 24855, "total_steps": 25880, "loss": 4.7035, "lr": 2.3899990343312916e-07, "epoch": 19.207882534775887, "percentage": 96.04, "elapsed_time": "0:56:24", "remaining_time": "0:02:19", "throughput": 2472.9, "total_tokens": 8370480}
|
| 4991 |
+
{"current_steps": 24860, "total_steps": 25880, "loss": 4.4601, "lr": 2.3667981925723226e-07, "epoch": 19.21174652241113, "percentage": 96.06, "elapsed_time": "0:56:25", "remaining_time": "0:02:18", "throughput": 2472.9, "total_tokens": 8372080}
|
| 4992 |
+
{"current_steps": 24865, "total_steps": 25880, "loss": 4.345, "lr": 2.3437099758765734e-07, "epoch": 19.215610510046368, "percentage": 96.08, "elapsed_time": "0:56:26", "remaining_time": "0:02:18", "throughput": 2472.97, "total_tokens": 8373968}
|
| 4993 |
+
{"current_steps": 24870, "total_steps": 25880, "loss": 4.3322, "lr": 2.3207343947446447e-07, "epoch": 19.219474497681606, "percentage": 96.1, "elapsed_time": "0:56:26", "remaining_time": "0:02:17", "throughput": 2472.93, "total_tokens": 8375344}
|
| 4994 |
+
{"current_steps": 24875, "total_steps": 25880, "loss": 4.4538, "lr": 2.2978714596260108e-07, "epoch": 19.223338485316848, "percentage": 96.12, "elapsed_time": "0:56:27", "remaining_time": "0:02:16", "throughput": 2472.95, "total_tokens": 8377040}
|
| 4995 |
+
{"current_steps": 24880, "total_steps": 25880, "loss": 4.6324, "lr": 2.275121180918882e-07, "epoch": 19.227202472952087, "percentage": 96.14, "elapsed_time": "0:56:28", "remaining_time": "0:02:16", "throughput": 2472.99, "total_tokens": 8378800}
|
| 4996 |
+
{"current_steps": 24885, "total_steps": 25880, "loss": 4.7279, "lr": 2.2524835689702316e-07, "epoch": 19.231066460587325, "percentage": 96.16, "elapsed_time": "0:56:28", "remaining_time": "0:02:15", "throughput": 2473.03, "total_tokens": 8380560}
|
| 4997 |
+
{"current_steps": 24890, "total_steps": 25880, "loss": 4.9099, "lr": 2.229958634075713e-07, "epoch": 19.234930448222567, "percentage": 96.17, "elapsed_time": "0:56:29", "remaining_time": "0:02:14", "throughput": 2473.03, "total_tokens": 8382096}
|
| 4998 |
+
{"current_steps": 24895, "total_steps": 25880, "loss": 4.4389, "lr": 2.207546386479853e-07, "epoch": 19.238794435857805, "percentage": 96.19, "elapsed_time": "0:56:30", "remaining_time": "0:02:14", "throughput": 2473.07, "total_tokens": 8384016}
|
| 4999 |
+
{"current_steps": 24900, "total_steps": 25880, "loss": 5.0261, "lr": 2.1852468363758594e-07, "epoch": 19.242658423493044, "percentage": 96.21, "elapsed_time": "0:56:30", "remaining_time": "0:02:13", "throughput": 2473.14, "total_tokens": 8386000}
|
| 5000 |
+
{"current_steps": 24905, "total_steps": 25880, "loss": 5.1382, "lr": 2.1630599939057306e-07, "epoch": 19.246522411128286, "percentage": 96.23, "elapsed_time": "0:56:31", "remaining_time": "0:02:12", "throughput": 2473.17, "total_tokens": 8387728}
|
| 5001 |
+
{"current_steps": 24910, "total_steps": 25880, "loss": 5.043, "lr": 2.140985869160145e-07, "epoch": 19.250386398763524, "percentage": 96.25, "elapsed_time": "0:56:32", "remaining_time": "0:02:12", "throughput": 2473.2, "total_tokens": 8389456}
|
| 5002 |
+
{"current_steps": 24915, "total_steps": 25880, "loss": 4.4127, "lr": 2.1190244721785435e-07, "epoch": 19.254250386398763, "percentage": 96.27, "elapsed_time": "0:56:32", "remaining_time": "0:02:11", "throughput": 2473.21, "total_tokens": 8391088}
|
| 5003 |
+
{"current_steps": 24920, "total_steps": 25880, "loss": 5.1753, "lr": 2.0971758129491314e-07, "epoch": 19.258114374034005, "percentage": 96.29, "elapsed_time": "0:56:33", "remaining_time": "0:02:10", "throughput": 2473.23, "total_tokens": 8392784}
|
| 5004 |
+
{"current_steps": 24925, "total_steps": 25880, "loss": 4.6386, "lr": 2.0754399014087933e-07, "epoch": 19.261978361669243, "percentage": 96.31, "elapsed_time": "0:56:34", "remaining_time": "0:02:10", "throughput": 2473.28, "total_tokens": 8394512}
|
| 5005 |
+
{"current_steps": 24930, "total_steps": 25880, "loss": 4.4931, "lr": 2.0538167474431214e-07, "epoch": 19.26584234930448, "percentage": 96.33, "elapsed_time": "0:56:34", "remaining_time": "0:02:09", "throughput": 2473.34, "total_tokens": 8396400}
|
| 5006 |
+
{"current_steps": 24935, "total_steps": 25880, "loss": 4.1899, "lr": 2.0323063608865267e-07, "epoch": 19.269706336939723, "percentage": 96.35, "elapsed_time": "0:56:35", "remaining_time": "0:02:08", "throughput": 2473.37, "total_tokens": 8398320}
|
| 5007 |
+
{"current_steps": 24940, "total_steps": 25880, "loss": 4.9734, "lr": 2.0109087515219894e-07, "epoch": 19.273570324574962, "percentage": 96.37, "elapsed_time": "0:56:36", "remaining_time": "0:02:08", "throughput": 2473.34, "total_tokens": 8399728}
|
| 5008 |
+
{"current_steps": 24945, "total_steps": 25880, "loss": 4.837, "lr": 1.9896239290813078e-07, "epoch": 19.2774343122102, "percentage": 96.39, "elapsed_time": "0:56:36", "remaining_time": "0:02:07", "throughput": 2473.39, "total_tokens": 8401584}
|
| 5009 |
+
{"current_steps": 24950, "total_steps": 25880, "loss": 4.7942, "lr": 1.9684519032449333e-07, "epoch": 19.28129829984544, "percentage": 96.41, "elapsed_time": "0:56:37", "remaining_time": "0:02:06", "throughput": 2473.45, "total_tokens": 8403344}
|
| 5010 |
+
{"current_steps": 24955, "total_steps": 25880, "loss": 4.7497, "lr": 1.947392683642052e-07, "epoch": 19.28516228748068, "percentage": 96.43, "elapsed_time": "0:56:38", "remaining_time": "0:02:05", "throughput": 2473.46, "total_tokens": 8404912}
|
| 5011 |
+
{"current_steps": 24960, "total_steps": 25880, "loss": 4.8864, "lr": 1.9264462798505023e-07, "epoch": 19.28902627511592, "percentage": 96.45, "elapsed_time": "0:56:38", "remaining_time": "0:02:05", "throughput": 2473.52, "total_tokens": 8406736}
|
| 5012 |
+
{"current_steps": 24965, "total_steps": 25880, "loss": 4.701, "lr": 1.905612701396803e-07, "epoch": 19.292890262751158, "percentage": 96.46, "elapsed_time": "0:56:39", "remaining_time": "0:02:04", "throughput": 2473.55, "total_tokens": 8408560}
|
| 5013 |
+
{"current_steps": 24970, "total_steps": 25880, "loss": 4.4763, "lr": 1.884891957756263e-07, "epoch": 19.2967542503864, "percentage": 96.48, "elapsed_time": "0:56:40", "remaining_time": "0:02:03", "throughput": 2473.56, "total_tokens": 8410192}
|
| 5014 |
+
{"current_steps": 24975, "total_steps": 25880, "loss": 4.5756, "lr": 1.864284058352761e-07, "epoch": 19.300618238021638, "percentage": 96.5, "elapsed_time": "0:56:40", "remaining_time": "0:02:03", "throughput": 2473.59, "total_tokens": 8411824}
|
| 5015 |
+
{"current_steps": 24980, "total_steps": 25880, "loss": 4.855, "lr": 1.8437890125589109e-07, "epoch": 19.304482225656876, "percentage": 96.52, "elapsed_time": "0:56:41", "remaining_time": "0:02:02", "throughput": 2473.61, "total_tokens": 8413584}
|
| 5016 |
+
{"current_steps": 24985, "total_steps": 25880, "loss": 4.9789, "lr": 1.8234068296959506e-07, "epoch": 19.30834621329212, "percentage": 96.54, "elapsed_time": "0:56:41", "remaining_time": "0:02:01", "throughput": 2473.58, "total_tokens": 8415088}
|
| 5017 |
+
{"current_steps": 24990, "total_steps": 25880, "loss": 4.3625, "lr": 1.8031375190338261e-07, "epoch": 19.312210200927357, "percentage": 96.56, "elapsed_time": "0:56:42", "remaining_time": "0:02:01", "throughput": 2473.64, "total_tokens": 8416912}
|
| 5018 |
+
{"current_steps": 24995, "total_steps": 25880, "loss": 5.148, "lr": 1.782981089791136e-07, "epoch": 19.316074188562595, "percentage": 96.58, "elapsed_time": "0:56:43", "remaining_time": "0:02:00", "throughput": 2473.63, "total_tokens": 8418384}
|
| 5019 |
+
{"current_steps": 25000, "total_steps": 25880, "loss": 4.405, "lr": 1.7629375511351852e-07, "epoch": 19.319938176197837, "percentage": 96.6, "elapsed_time": "0:56:43", "remaining_time": "0:01:59", "throughput": 2473.63, "total_tokens": 8420112}
|
| 5020 |
+
{"current_steps": 25005, "total_steps": 25880, "loss": 4.9424, "lr": 1.7430069121818492e-07, "epoch": 19.323802163833076, "percentage": 96.62, "elapsed_time": "0:56:44", "remaining_time": "0:01:59", "throughput": 2473.62, "total_tokens": 8421680}
|
| 5021 |
+
{"current_steps": 25010, "total_steps": 25880, "loss": 4.8256, "lr": 1.7231891819957657e-07, "epoch": 19.327666151468314, "percentage": 96.64, "elapsed_time": "0:56:45", "remaining_time": "0:01:58", "throughput": 2473.65, "total_tokens": 8423376}
|
| 5022 |
+
{"current_steps": 25015, "total_steps": 25880, "loss": 4.6178, "lr": 1.703484369590086e-07, "epoch": 19.331530139103556, "percentage": 96.66, "elapsed_time": "0:56:45", "remaining_time": "0:01:57", "throughput": 2473.62, "total_tokens": 8424816}
|
| 5023 |
+
{"current_steps": 25020, "total_steps": 25880, "loss": 4.5928, "lr": 1.6838924839266966e-07, "epoch": 19.335394126738795, "percentage": 96.68, "elapsed_time": "0:56:46", "remaining_time": "0:01:57", "throughput": 2473.7, "total_tokens": 8426640}
|
| 5024 |
+
{"current_steps": 25025, "total_steps": 25880, "loss": 4.8195, "lr": 1.664413533916137e-07, "epoch": 19.339258114374033, "percentage": 96.7, "elapsed_time": "0:56:47", "remaining_time": "0:01:56", "throughput": 2473.74, "total_tokens": 8428464}
|
| 5025 |
+
{"current_steps": 25030, "total_steps": 25880, "loss": 4.6938, "lr": 1.645047528417487e-07, "epoch": 19.343122102009275, "percentage": 96.72, "elapsed_time": "0:56:47", "remaining_time": "0:01:55", "throughput": 2473.72, "total_tokens": 8430032}
|
| 5026 |
+
{"current_steps": 25035, "total_steps": 25880, "loss": 5.0042, "lr": 1.62579447623859e-07, "epoch": 19.346986089644513, "percentage": 96.73, "elapsed_time": "0:56:48", "remaining_time": "0:01:55", "throughput": 2473.72, "total_tokens": 8431632}
|
| 5027 |
+
{"current_steps": 25040, "total_steps": 25880, "loss": 4.7787, "lr": 1.606654386135803e-07, "epoch": 19.350850077279752, "percentage": 96.75, "elapsed_time": "0:56:49", "remaining_time": "0:01:54", "throughput": 2473.77, "total_tokens": 8433584}
|
| 5028 |
+
{"current_steps": 25045, "total_steps": 25880, "loss": 4.3847, "lr": 1.5876272668141902e-07, "epoch": 19.354714064914994, "percentage": 96.77, "elapsed_time": "0:56:49", "remaining_time": "0:01:53", "throughput": 2473.76, "total_tokens": 8435152}
|
| 5029 |
+
{"current_steps": 25050, "total_steps": 25880, "loss": 4.8811, "lr": 1.568713126927357e-07, "epoch": 19.358578052550232, "percentage": 96.79, "elapsed_time": "0:56:50", "remaining_time": "0:01:53", "throughput": 2473.78, "total_tokens": 8436848}
|
| 5030 |
+
{"current_steps": 25055, "total_steps": 25880, "loss": 4.5374, "lr": 1.549911975077617e-07, "epoch": 19.36244204018547, "percentage": 96.81, "elapsed_time": "0:56:51", "remaining_time": "0:01:52", "throughput": 2473.82, "total_tokens": 8438704}
|
| 5031 |
+
{"current_steps": 25060, "total_steps": 25880, "loss": 4.7832, "lr": 1.5312238198157968e-07, "epoch": 19.366306027820713, "percentage": 96.83, "elapsed_time": "0:56:51", "remaining_time": "0:01:51", "throughput": 2473.84, "total_tokens": 8440368}
|
| 5032 |
+
{"current_steps": 25065, "total_steps": 25880, "loss": 4.7192, "lr": 1.5126486696414032e-07, "epoch": 19.37017001545595, "percentage": 96.85, "elapsed_time": "0:56:52", "remaining_time": "0:01:50", "throughput": 2473.87, "total_tokens": 8442160}
|
| 5033 |
+
{"current_steps": 25070, "total_steps": 25880, "loss": 4.7678, "lr": 1.4941865330025394e-07, "epoch": 19.37403400309119, "percentage": 96.87, "elapsed_time": "0:56:53", "remaining_time": "0:01:50", "throughput": 2473.88, "total_tokens": 8443920}
|
| 5034 |
+
{"current_steps": 25075, "total_steps": 25880, "loss": 4.8926, "lr": 1.475837418295878e-07, "epoch": 19.377897990726428, "percentage": 96.89, "elapsed_time": "0:56:53", "remaining_time": "0:01:49", "throughput": 2473.85, "total_tokens": 8445328}
|
| 5035 |
+
{"current_steps": 25080, "total_steps": 25880, "loss": 4.6268, "lr": 1.457601333866715e-07, "epoch": 19.38176197836167, "percentage": 96.91, "elapsed_time": "0:56:54", "remaining_time": "0:01:48", "throughput": 2473.83, "total_tokens": 8446768}
|
| 5036 |
+
{"current_steps": 25085, "total_steps": 25880, "loss": 4.3151, "lr": 1.4394782880089443e-07, "epoch": 19.38562596599691, "percentage": 96.93, "elapsed_time": "0:56:55", "remaining_time": "0:01:48", "throughput": 2473.82, "total_tokens": 8448240}
|
| 5037 |
+
{"current_steps": 25090, "total_steps": 25880, "loss": 4.6398, "lr": 1.4214682889649998e-07, "epoch": 19.389489953632147, "percentage": 96.95, "elapsed_time": "0:56:55", "remaining_time": "0:01:47", "throughput": 2473.85, "total_tokens": 8449936}
|
| 5038 |
+
{"current_steps": 25095, "total_steps": 25880, "loss": 4.7292, "lr": 1.403571344925969e-07, "epoch": 19.39335394126739, "percentage": 96.97, "elapsed_time": "0:56:56", "remaining_time": "0:01:46", "throughput": 2473.88, "total_tokens": 8451664}
|
| 5039 |
+
{"current_steps": 25100, "total_steps": 25880, "loss": 4.7279, "lr": 1.3857874640314516e-07, "epoch": 19.397217928902627, "percentage": 96.99, "elapsed_time": "0:56:57", "remaining_time": "0:01:46", "throughput": 2473.87, "total_tokens": 8453232}
|
| 5040 |
+
{"current_steps": 25105, "total_steps": 25880, "loss": 4.9964, "lr": 1.3681166543697e-07, "epoch": 19.401081916537866, "percentage": 97.01, "elapsed_time": "0:56:57", "remaining_time": "0:01:45", "throughput": 2473.92, "total_tokens": 8455120}
|
| 5041 |
+
{"current_steps": 25110, "total_steps": 25880, "loss": 4.6791, "lr": 1.3505589239775073e-07, "epoch": 19.404945904173108, "percentage": 97.02, "elapsed_time": "0:56:58", "remaining_time": "0:01:44", "throughput": 2473.92, "total_tokens": 8456656}
|
| 5042 |
+
{"current_steps": 25115, "total_steps": 25880, "loss": 5.0818, "lr": 1.3331142808401808e-07, "epoch": 19.408809891808346, "percentage": 97.04, "elapsed_time": "0:56:58", "remaining_time": "0:01:44", "throughput": 2473.92, "total_tokens": 8458192}
|
| 5043 |
+
{"current_steps": 25120, "total_steps": 25880, "loss": 4.7076, "lr": 1.315782732891735e-07, "epoch": 19.412673879443584, "percentage": 97.06, "elapsed_time": "0:56:59", "remaining_time": "0:01:43", "throughput": 2473.95, "total_tokens": 8459984}
|
| 5044 |
+
{"current_steps": 25125, "total_steps": 25880, "loss": 4.5308, "lr": 1.2985642880145864e-07, "epoch": 19.416537867078826, "percentage": 97.08, "elapsed_time": "0:57:00", "remaining_time": "0:01:42", "throughput": 2473.93, "total_tokens": 8461424}
|
| 5045 |
+
{"current_steps": 25130, "total_steps": 25880, "loss": 4.786, "lr": 1.2814589540398048e-07, "epoch": 19.420401854714065, "percentage": 97.1, "elapsed_time": "0:57:00", "remaining_time": "0:01:42", "throughput": 2473.92, "total_tokens": 8463024}
|
| 5046 |
+
{"current_steps": 25135, "total_steps": 25880, "loss": 4.5477, "lr": 1.2644667387470276e-07, "epoch": 19.424265842349303, "percentage": 97.12, "elapsed_time": "0:57:01", "remaining_time": "0:01:41", "throughput": 2473.93, "total_tokens": 8464720}
|
| 5047 |
+
{"current_steps": 25140, "total_steps": 25880, "loss": 4.8459, "lr": 1.247587649864379e-07, "epoch": 19.428129829984545, "percentage": 97.14, "elapsed_time": "0:57:02", "remaining_time": "0:01:40", "throughput": 2473.89, "total_tokens": 8466192}
|
| 5048 |
+
{"current_steps": 25145, "total_steps": 25880, "loss": 4.5866, "lr": 1.230821695068607e-07, "epoch": 19.431993817619784, "percentage": 97.16, "elapsed_time": "0:57:02", "remaining_time": "0:01:40", "throughput": 2473.94, "total_tokens": 8467888}
|
| 5049 |
+
{"current_steps": 25150, "total_steps": 25880, "loss": 4.7842, "lr": 1.214168881984945e-07, "epoch": 19.435857805255022, "percentage": 97.18, "elapsed_time": "0:57:03", "remaining_time": "0:01:39", "throughput": 2473.96, "total_tokens": 8469680}
|
| 5050 |
+
{"current_steps": 25155, "total_steps": 25880, "loss": 4.6052, "lr": 1.1976292181871684e-07, "epoch": 19.439721792890264, "percentage": 97.2, "elapsed_time": "0:57:04", "remaining_time": "0:01:38", "throughput": 2473.97, "total_tokens": 8471408}
|
| 5051 |
+
{"current_steps": 25160, "total_steps": 25880, "loss": 4.9042, "lr": 1.1812027111976764e-07, "epoch": 19.443585780525503, "percentage": 97.22, "elapsed_time": "0:57:04", "remaining_time": "0:01:38", "throughput": 2473.98, "total_tokens": 8473072}
|
| 5052 |
+
{"current_steps": 25165, "total_steps": 25880, "loss": 4.4013, "lr": 1.1648893684872986e-07, "epoch": 19.44744976816074, "percentage": 97.24, "elapsed_time": "0:57:05", "remaining_time": "0:01:37", "throughput": 2474.02, "total_tokens": 8474832}
|
| 5053 |
+
{"current_steps": 25170, "total_steps": 25880, "loss": 5.0108, "lr": 1.1486891974754332e-07, "epoch": 19.451313755795983, "percentage": 97.26, "elapsed_time": "0:57:06", "remaining_time": "0:01:36", "throughput": 2474.04, "total_tokens": 8476528}
|
| 5054 |
+
{"current_steps": 25175, "total_steps": 25880, "loss": 4.5667, "lr": 1.1326022055300478e-07, "epoch": 19.45517774343122, "percentage": 97.28, "elapsed_time": "0:57:06", "remaining_time": "0:01:35", "throughput": 2474.06, "total_tokens": 8478224}
|
| 5055 |
+
{"current_steps": 25180, "total_steps": 25880, "loss": 4.3968, "lr": 1.1166283999675953e-07, "epoch": 19.45904173106646, "percentage": 97.3, "elapsed_time": "0:57:07", "remaining_time": "0:01:35", "throughput": 2474.07, "total_tokens": 8479760}
|
| 5056 |
+
{"current_steps": 25185, "total_steps": 25880, "loss": 4.2887, "lr": 1.100767788053042e-07, "epoch": 19.462905718701702, "percentage": 97.31, "elapsed_time": "0:57:08", "remaining_time": "0:01:34", "throughput": 2474.12, "total_tokens": 8481584}
|
| 5057 |
+
{"current_steps": 25190, "total_steps": 25880, "loss": 4.8165, "lr": 1.0850203769998957e-07, "epoch": 19.46676970633694, "percentage": 97.33, "elapsed_time": "0:57:08", "remaining_time": "0:01:33", "throughput": 2474.19, "total_tokens": 8483440}
|
| 5058 |
+
{"current_steps": 25195, "total_steps": 25880, "loss": 4.5053, "lr": 1.0693861739701771e-07, "epoch": 19.47063369397218, "percentage": 97.35, "elapsed_time": "0:57:09", "remaining_time": "0:01:33", "throughput": 2474.23, "total_tokens": 8485200}
|
| 5059 |
+
{"current_steps": 25200, "total_steps": 25880, "loss": 4.5498, "lr": 1.0538651860744208e-07, "epoch": 19.474497681607417, "percentage": 97.37, "elapsed_time": "0:57:10", "remaining_time": "0:01:32", "throughput": 2474.24, "total_tokens": 8486928}
|
| 5060 |
+
{"current_steps": 25205, "total_steps": 25880, "loss": 4.5936, "lr": 1.0384574203716469e-07, "epoch": 19.47836166924266, "percentage": 97.39, "elapsed_time": "0:57:10", "remaining_time": "0:01:31", "throughput": 2474.27, "total_tokens": 8488592}
|
| 5061 |
+
{"current_steps": 25210, "total_steps": 25880, "loss": 4.741, "lr": 1.0231628838694163e-07, "epoch": 19.482225656877898, "percentage": 97.41, "elapsed_time": "0:57:11", "remaining_time": "0:01:31", "throughput": 2474.26, "total_tokens": 8490064}
|
| 5062 |
+
{"current_steps": 25215, "total_steps": 25880, "loss": 4.304, "lr": 1.0079815835237761e-07, "epoch": 19.486089644513136, "percentage": 97.43, "elapsed_time": "0:57:11", "remaining_time": "0:01:30", "throughput": 2474.28, "total_tokens": 8491632}
|
| 5063 |
+
{"current_steps": 25220, "total_steps": 25880, "loss": 4.9541, "lr": 9.929135262392586e-08, "epoch": 19.489953632148378, "percentage": 97.45, "elapsed_time": "0:57:12", "remaining_time": "0:01:29", "throughput": 2474.31, "total_tokens": 8493360}
|
| 5064 |
+
{"current_steps": 25225, "total_steps": 25880, "loss": 4.4758, "lr": 9.779587188689099e-08, "epoch": 19.493817619783616, "percentage": 97.47, "elapsed_time": "0:57:13", "remaining_time": "0:01:29", "throughput": 2474.34, "total_tokens": 8495088}
|
| 5065 |
+
{"current_steps": 25230, "total_steps": 25880, "loss": 4.5531, "lr": 9.631171682142893e-08, "epoch": 19.497681607418855, "percentage": 97.49, "elapsed_time": "0:57:13", "remaining_time": "0:01:28", "throughput": 2474.33, "total_tokens": 8496592}
|
| 5066 |
+
{"current_steps": 25235, "total_steps": 25880, "loss": 4.6447, "lr": 9.483888810253582e-08, "epoch": 19.501545595054097, "percentage": 97.51, "elapsed_time": "0:57:14", "remaining_time": "0:01:27", "throughput": 2474.34, "total_tokens": 8498288}
|
| 5067 |
+
{"current_steps": 25240, "total_steps": 25880, "loss": 4.628, "lr": 9.337738640007032e-08, "epoch": 19.505409582689335, "percentage": 97.53, "elapsed_time": "0:57:15", "remaining_time": "0:01:27", "throughput": 2474.39, "total_tokens": 8500016}
|
| 5068 |
+
{"current_steps": 25245, "total_steps": 25880, "loss": 4.5168, "lr": 9.192721237873125e-08, "epoch": 19.509273570324574, "percentage": 97.55, "elapsed_time": "0:57:15", "remaining_time": "0:01:26", "throughput": 2474.46, "total_tokens": 8501872}
|
| 5069 |
+
{"current_steps": 25250, "total_steps": 25880, "loss": 4.3126, "lr": 9.048836669806326e-08, "epoch": 19.513137557959816, "percentage": 97.57, "elapsed_time": "0:57:16", "remaining_time": "0:01:25", "throughput": 2474.44, "total_tokens": 8503344}
|
| 5070 |
+
{"current_steps": 25255, "total_steps": 25880, "loss": 4.8032, "lr": 8.906085001246233e-08, "epoch": 19.517001545595054, "percentage": 97.59, "elapsed_time": "0:57:17", "remaining_time": "0:01:25", "throughput": 2474.48, "total_tokens": 8505104}
|
| 5071 |
+
{"current_steps": 25260, "total_steps": 25880, "loss": 4.763, "lr": 8.764466297117302e-08, "epoch": 19.520865533230292, "percentage": 97.6, "elapsed_time": "0:57:17", "remaining_time": "0:01:24", "throughput": 2474.5, "total_tokens": 8506768}
|
| 5072 |
+
{"current_steps": 25265, "total_steps": 25880, "loss": 4.5707, "lr": 8.623980621828842e-08, "epoch": 19.524729520865534, "percentage": 97.62, "elapsed_time": "0:57:18", "remaining_time": "0:01:23", "throughput": 2474.49, "total_tokens": 8508368}
|
| 5073 |
+
{"current_steps": 25270, "total_steps": 25880, "loss": 4.4598, "lr": 8.484628039273912e-08, "epoch": 19.528593508500773, "percentage": 97.64, "elapsed_time": "0:57:19", "remaining_time": "0:01:23", "throughput": 2474.53, "total_tokens": 8510096}
|
| 5074 |
+
{"current_steps": 25275, "total_steps": 25880, "loss": 4.4292, "lr": 8.34640861283098e-08, "epoch": 19.53245749613601, "percentage": 97.66, "elapsed_time": "0:57:19", "remaining_time": "0:01:22", "throughput": 2474.52, "total_tokens": 8511600}
|
| 5075 |
+
{"current_steps": 25280, "total_steps": 25880, "loss": 4.6996, "lr": 8.209322405363929e-08, "epoch": 19.536321483771253, "percentage": 97.68, "elapsed_time": "0:57:20", "remaining_time": "0:01:21", "throughput": 2474.51, "total_tokens": 8513072}
|
| 5076 |
+
{"current_steps": 25285, "total_steps": 25880, "loss": 4.6504, "lr": 8.073369479219551e-08, "epoch": 19.54018547140649, "percentage": 97.7, "elapsed_time": "0:57:21", "remaining_time": "0:01:20", "throughput": 2474.56, "total_tokens": 8514960}
|
| 5077 |
+
{"current_steps": 25290, "total_steps": 25880, "loss": 4.3792, "lr": 7.938549896230329e-08, "epoch": 19.54404945904173, "percentage": 97.72, "elapsed_time": "0:57:21", "remaining_time": "0:01:20", "throughput": 2474.59, "total_tokens": 8516784}
|
| 5078 |
+
{"current_steps": 25295, "total_steps": 25880, "loss": 4.5252, "lr": 7.804863717712774e-08, "epoch": 19.547913446676972, "percentage": 97.74, "elapsed_time": "0:57:22", "remaining_time": "0:01:19", "throughput": 2474.57, "total_tokens": 8518256}
|
| 5079 |
+
{"current_steps": 25300, "total_steps": 25880, "loss": 4.6907, "lr": 7.672311004468802e-08, "epoch": 19.55177743431221, "percentage": 97.76, "elapsed_time": "0:57:22", "remaining_time": "0:01:18", "throughput": 2474.62, "total_tokens": 8519952}
|
| 5080 |
+
{"current_steps": 25305, "total_steps": 25880, "loss": 4.5198, "lr": 7.540891816783246e-08, "epoch": 19.55564142194745, "percentage": 97.78, "elapsed_time": "0:57:23", "remaining_time": "0:01:18", "throughput": 2474.65, "total_tokens": 8521680}
|
| 5081 |
+
{"current_steps": 25310, "total_steps": 25880, "loss": 4.4537, "lr": 7.410606214427185e-08, "epoch": 19.55950540958269, "percentage": 97.8, "elapsed_time": "0:57:24", "remaining_time": "0:01:17", "throughput": 2474.62, "total_tokens": 8523280}
|
| 5082 |
+
{"current_steps": 25315, "total_steps": 25880, "loss": 4.5475, "lr": 7.281454256654885e-08, "epoch": 19.56336939721793, "percentage": 97.82, "elapsed_time": "0:57:24", "remaining_time": "0:01:16", "throughput": 2474.59, "total_tokens": 8524656}
|
| 5083 |
+
{"current_steps": 25320, "total_steps": 25880, "loss": 4.3069, "lr": 7.153436002205472e-08, "epoch": 19.567233384853168, "percentage": 97.84, "elapsed_time": "0:57:25", "remaining_time": "0:01:16", "throughput": 2474.62, "total_tokens": 8526384}
|
| 5084 |
+
{"current_steps": 25325, "total_steps": 25880, "loss": 4.6221, "lr": 7.02655150930237e-08, "epoch": 19.57109737248841, "percentage": 97.86, "elapsed_time": "0:57:26", "remaining_time": "0:01:15", "throughput": 2474.64, "total_tokens": 8527984}
|
| 5085 |
+
{"current_steps": 25330, "total_steps": 25880, "loss": 4.5771, "lr": 6.900800835653587e-08, "epoch": 19.57496136012365, "percentage": 97.87, "elapsed_time": "0:57:26", "remaining_time": "0:01:14", "throughput": 2474.65, "total_tokens": 8529552}
|
| 5086 |
+
{"current_steps": 25335, "total_steps": 25880, "loss": 4.5336, "lr": 6.77618403845115e-08, "epoch": 19.578825347758887, "percentage": 97.89, "elapsed_time": "0:57:27", "remaining_time": "0:01:14", "throughput": 2474.72, "total_tokens": 8531440}
|
| 5087 |
+
{"current_steps": 25340, "total_steps": 25880, "loss": 4.6725, "lr": 6.652701174371389e-08, "epoch": 19.582689335394125, "percentage": 97.91, "elapsed_time": "0:57:28", "remaining_time": "0:01:13", "throughput": 2474.74, "total_tokens": 8533104}
|
| 5088 |
+
{"current_steps": 25345, "total_steps": 25880, "loss": 4.7208, "lr": 6.530352299575215e-08, "epoch": 19.586553323029367, "percentage": 97.93, "elapsed_time": "0:57:28", "remaining_time": "0:01:12", "throughput": 2474.78, "total_tokens": 8534896}
|
| 5089 |
+
{"current_steps": 25350, "total_steps": 25880, "loss": 4.4622, "lr": 6.409137469707837e-08, "epoch": 19.590417310664606, "percentage": 97.95, "elapsed_time": "0:57:29", "remaining_time": "0:01:12", "throughput": 2474.79, "total_tokens": 8536464}
|
| 5090 |
+
{"current_steps": 25355, "total_steps": 25880, "loss": 4.9091, "lr": 6.289056739898213e-08, "epoch": 19.594281298299844, "percentage": 97.97, "elapsed_time": "0:57:30", "remaining_time": "0:01:11", "throughput": 2474.78, "total_tokens": 8538032}
|
| 5091 |
+
{"current_steps": 25360, "total_steps": 25880, "loss": 4.6437, "lr": 6.170110164759879e-08, "epoch": 19.598145285935086, "percentage": 97.99, "elapsed_time": "0:57:30", "remaining_time": "0:01:10", "throughput": 2474.83, "total_tokens": 8540016}
|
| 5092 |
+
{"current_steps": 25365, "total_steps": 25880, "loss": 4.8041, "lr": 6.052297798390116e-08, "epoch": 19.602009273570324, "percentage": 98.01, "elapsed_time": "0:57:31", "remaining_time": "0:01:10", "throughput": 2474.89, "total_tokens": 8541968}
|
| 5093 |
+
{"current_steps": 25370, "total_steps": 25880, "loss": 4.552, "lr": 5.9356196943713415e-08, "epoch": 19.605873261205563, "percentage": 98.03, "elapsed_time": "0:57:32", "remaining_time": "0:01:09", "throughput": 2474.95, "total_tokens": 8543696}
|
| 5094 |
+
{"current_steps": 25375, "total_steps": 25880, "loss": 4.6653, "lr": 5.8200759057688845e-08, "epoch": 19.609737248840805, "percentage": 98.05, "elapsed_time": "0:57:32", "remaining_time": "0:01:08", "throughput": 2474.96, "total_tokens": 8545456}
|
| 5095 |
+
{"current_steps": 25380, "total_steps": 25880, "loss": 4.532, "lr": 5.705666485132932e-08, "epoch": 19.613601236476043, "percentage": 98.07, "elapsed_time": "0:57:33", "remaining_time": "0:01:08", "throughput": 2474.96, "total_tokens": 8546960}
|
| 5096 |
+
{"current_steps": 25385, "total_steps": 25880, "loss": 4.6456, "lr": 5.5923914844976944e-08, "epoch": 19.61746522411128, "percentage": 98.09, "elapsed_time": "0:57:33", "remaining_time": "0:01:07", "throughput": 2474.93, "total_tokens": 8548368}
|
| 5097 |
+
{"current_steps": 25390, "total_steps": 25880, "loss": 4.7773, "lr": 5.4802509553811274e-08, "epoch": 19.621329211746524, "percentage": 98.11, "elapsed_time": "0:57:34", "remaining_time": "0:01:06", "throughput": 2474.94, "total_tokens": 8550128}
|
| 5098 |
+
{"current_steps": 25395, "total_steps": 25880, "loss": 4.8517, "lr": 5.3692449487857675e-08, "epoch": 19.625193199381762, "percentage": 98.13, "elapsed_time": "0:57:35", "remaining_time": "0:01:05", "throughput": 2474.93, "total_tokens": 8551600}
|
| 5099 |
+
{"current_steps": 25400, "total_steps": 25880, "loss": 4.4443, "lr": 5.259373515197341e-08, "epoch": 19.629057187017, "percentage": 98.15, "elapsed_time": "0:57:35", "remaining_time": "0:01:05", "throughput": 2474.96, "total_tokens": 8553232}
|
| 5100 |
+
{"current_steps": 25405, "total_steps": 25880, "loss": 4.5971, "lr": 5.150636704586431e-08, "epoch": 19.632921174652243, "percentage": 98.16, "elapsed_time": "0:57:36", "remaining_time": "0:01:04", "throughput": 2474.94, "total_tokens": 8554768}
|
| 5101 |
+
{"current_steps": 25410, "total_steps": 25880, "loss": 4.6934, "lr": 5.043034566406812e-08, "epoch": 19.63678516228748, "percentage": 98.18, "elapsed_time": "0:57:37", "remaining_time": "0:01:03", "throughput": 2474.92, "total_tokens": 8556176}
|
| 5102 |
+
{"current_steps": 25415, "total_steps": 25880, "loss": 4.5773, "lr": 4.936567149596838e-08, "epoch": 19.64064914992272, "percentage": 98.2, "elapsed_time": "0:57:37", "remaining_time": "0:01:03", "throughput": 2474.91, "total_tokens": 8557872}
|
| 5103 |
+
{"current_steps": 25420, "total_steps": 25880, "loss": 4.5794, "lr": 4.8312345025786075e-08, "epoch": 19.64451313755796, "percentage": 98.22, "elapsed_time": "0:57:38", "remaining_time": "0:01:02", "throughput": 2474.96, "total_tokens": 8559760}
|
| 5104 |
+
{"current_steps": 25425, "total_steps": 25880, "loss": 4.6888, "lr": 4.7270366732576896e-08, "epoch": 19.6483771251932, "percentage": 98.24, "elapsed_time": "0:57:39", "remaining_time": "0:01:01", "throughput": 2474.94, "total_tokens": 8561296}
|
| 5105 |
+
{"current_steps": 25430, "total_steps": 25880, "loss": 4.8577, "lr": 4.6239737090242316e-08, "epoch": 19.652241112828438, "percentage": 98.26, "elapsed_time": "0:57:39", "remaining_time": "0:01:01", "throughput": 2475.02, "total_tokens": 8563376}
|
| 5106 |
+
{"current_steps": 25435, "total_steps": 25880, "loss": 4.9786, "lr": 4.5220456567515725e-08, "epoch": 19.65610510046368, "percentage": 98.28, "elapsed_time": "0:57:40", "remaining_time": "0:01:00", "throughput": 2475.05, "total_tokens": 8565040}
|
| 5107 |
+
{"current_steps": 25440, "total_steps": 25880, "loss": 4.6461, "lr": 4.421252562797629e-08, "epoch": 19.65996908809892, "percentage": 98.3, "elapsed_time": "0:57:41", "remaining_time": "0:00:59", "throughput": 2475.07, "total_tokens": 8566608}
|
| 5108 |
+
{"current_steps": 25445, "total_steps": 25880, "loss": 4.6452, "lr": 4.321594473003232e-08, "epoch": 19.663833075734157, "percentage": 98.32, "elapsed_time": "0:57:41", "remaining_time": "0:00:59", "throughput": 2475.11, "total_tokens": 8568400}
|
| 5109 |
+
{"current_steps": 25450, "total_steps": 25880, "loss": 4.7552, "lr": 4.22307143269407e-08, "epoch": 19.667697063369395, "percentage": 98.34, "elapsed_time": "0:57:42", "remaining_time": "0:00:58", "throughput": 2475.15, "total_tokens": 8570160}
|
| 5110 |
+
{"current_steps": 25455, "total_steps": 25880, "loss": 4.6023, "lr": 4.125683486678189e-08, "epoch": 19.671561051004637, "percentage": 98.36, "elapsed_time": "0:57:43", "remaining_time": "0:00:57", "throughput": 2475.19, "total_tokens": 8571824}
|
| 5111 |
+
{"current_steps": 25460, "total_steps": 25880, "loss": 4.4633, "lr": 4.0294306792490466e-08, "epoch": 19.675425038639876, "percentage": 98.38, "elapsed_time": "0:57:43", "remaining_time": "0:00:57", "throughput": 2475.21, "total_tokens": 8573520}
|
| 5112 |
+
{"current_steps": 25465, "total_steps": 25880, "loss": 4.585, "lr": 3.934313054182459e-08, "epoch": 19.679289026275114, "percentage": 98.4, "elapsed_time": "0:57:44", "remaining_time": "0:00:56", "throughput": 2475.24, "total_tokens": 8575184}
|
| 5113 |
+
{"current_steps": 25470, "total_steps": 25880, "loss": 4.7982, "lr": 3.840330654738544e-08, "epoch": 19.683153013910356, "percentage": 98.42, "elapsed_time": "0:57:45", "remaining_time": "0:00:55", "throughput": 2475.29, "total_tokens": 8576976}
|
| 5114 |
+
{"current_steps": 25475, "total_steps": 25880, "loss": 4.6574, "lr": 3.747483523661166e-08, "epoch": 19.687017001545595, "percentage": 98.44, "elapsed_time": "0:57:45", "remaining_time": "0:00:55", "throughput": 2475.3, "total_tokens": 8578704}
|
| 5115 |
+
{"current_steps": 25480, "total_steps": 25880, "loss": 4.6828, "lr": 3.655771703177935e-08, "epoch": 19.690880989180833, "percentage": 98.45, "elapsed_time": "0:57:46", "remaining_time": "0:00:54", "throughput": 2475.29, "total_tokens": 8580272}
|
| 5116 |
+
{"current_steps": 25485, "total_steps": 25880, "loss": 4.6214, "lr": 3.565195234999652e-08, "epoch": 19.694744976816075, "percentage": 98.47, "elapsed_time": "0:57:47", "remaining_time": "0:00:53", "throughput": 2475.28, "total_tokens": 8581872}
|
| 5117 |
+
{"current_steps": 25490, "total_steps": 25880, "loss": 4.8455, "lr": 3.475754160321143e-08, "epoch": 19.698608964451314, "percentage": 98.49, "elapsed_time": "0:57:47", "remaining_time": "0:00:53", "throughput": 2475.25, "total_tokens": 8583248}
|
| 5118 |
+
{"current_steps": 25495, "total_steps": 25880, "loss": 4.4429, "lr": 3.3874485198207015e-08, "epoch": 19.702472952086552, "percentage": 98.51, "elapsed_time": "0:57:48", "remaining_time": "0:00:52", "throughput": 2475.2, "total_tokens": 8584688}
|
| 5119 |
+
{"current_steps": 25500, "total_steps": 25880, "loss": 4.8567, "lr": 3.3002783536603685e-08, "epoch": 19.706336939721794, "percentage": 98.53, "elapsed_time": "0:57:48", "remaining_time": "0:00:51", "throughput": 2475.19, "total_tokens": 8586256}
|
| 5120 |
+
{"current_steps": 25505, "total_steps": 25880, "loss": 4.6935, "lr": 3.214243701485653e-08, "epoch": 19.710200927357032, "percentage": 98.55, "elapsed_time": "0:57:49", "remaining_time": "0:00:51", "throughput": 2475.21, "total_tokens": 8587920}
|
| 5121 |
+
{"current_steps": 25510, "total_steps": 25880, "loss": 4.4071, "lr": 3.129344602425255e-08, "epoch": 19.71406491499227, "percentage": 98.57, "elapsed_time": "0:57:50", "remaining_time": "0:00:50", "throughput": 2475.23, "total_tokens": 8589520}
|
| 5122 |
+
{"current_steps": 25515, "total_steps": 25880, "loss": 4.4654, "lr": 3.045581095092453e-08, "epoch": 19.717928902627513, "percentage": 98.59, "elapsed_time": "0:57:50", "remaining_time": "0:00:49", "throughput": 2475.25, "total_tokens": 8591216}
|
| 5123 |
+
{"current_steps": 25520, "total_steps": 25880, "loss": 4.2649, "lr": 2.9629532175828867e-08, "epoch": 19.72179289026275, "percentage": 98.61, "elapsed_time": "0:57:51", "remaining_time": "0:00:48", "throughput": 2475.32, "total_tokens": 8593104}
|
| 5124 |
+
{"current_steps": 25525, "total_steps": 25880, "loss": 4.5236, "lr": 2.881461007476216e-08, "epoch": 19.72565687789799, "percentage": 98.63, "elapsed_time": "0:57:52", "remaining_time": "0:00:48", "throughput": 2475.34, "total_tokens": 8594704}
|
| 5125 |
+
{"current_steps": 25530, "total_steps": 25880, "loss": 4.7453, "lr": 2.8011045018361272e-08, "epoch": 19.72952086553323, "percentage": 98.65, "elapsed_time": "0:57:52", "remaining_time": "0:00:47", "throughput": 2475.35, "total_tokens": 8596336}
|
| 5126 |
+
{"current_steps": 25535, "total_steps": 25880, "loss": 4.64, "lr": 2.7218837372086636e-08, "epoch": 19.73338485316847, "percentage": 98.67, "elapsed_time": "0:57:53", "remaining_time": "0:00:46", "throughput": 2475.34, "total_tokens": 8597840}
|
| 5127 |
+
{"current_steps": 25540, "total_steps": 25880, "loss": 4.4822, "lr": 2.6437987496238935e-08, "epoch": 19.73724884080371, "percentage": 98.69, "elapsed_time": "0:57:54", "remaining_time": "0:00:46", "throughput": 2475.36, "total_tokens": 8599536}
|
| 5128 |
+
{"current_steps": 25545, "total_steps": 25880, "loss": 4.7857, "lr": 2.566849574595631e-08, "epoch": 19.74111282843895, "percentage": 98.71, "elapsed_time": "0:57:54", "remaining_time": "0:00:45", "throughput": 2475.38, "total_tokens": 8601296}
|
| 5129 |
+
{"current_steps": 25550, "total_steps": 25880, "loss": 5.041, "lr": 2.4910362471208815e-08, "epoch": 19.74497681607419, "percentage": 98.72, "elapsed_time": "0:57:55", "remaining_time": "0:00:44", "throughput": 2475.42, "total_tokens": 8602960}
|
| 5130 |
+
{"current_steps": 25555, "total_steps": 25880, "loss": 4.6241, "lr": 2.4163588016795636e-08, "epoch": 19.748840803709427, "percentage": 98.74, "elapsed_time": "0:57:55", "remaining_time": "0:00:44", "throughput": 2475.39, "total_tokens": 8604368}
|
| 5131 |
+
{"current_steps": 25560, "total_steps": 25880, "loss": 4.8678, "lr": 2.3428172722358977e-08, "epoch": 19.75270479134467, "percentage": 98.76, "elapsed_time": "0:57:56", "remaining_time": "0:00:43", "throughput": 2475.37, "total_tokens": 8605904}
|
| 5132 |
+
{"current_steps": 25565, "total_steps": 25880, "loss": 4.8362, "lr": 2.270411692237018e-08, "epoch": 19.756568778979908, "percentage": 98.78, "elapsed_time": "0:57:57", "remaining_time": "0:00:42", "throughput": 2475.4, "total_tokens": 8607632}
|
| 5133 |
+
{"current_steps": 25570, "total_steps": 25880, "loss": 4.7657, "lr": 2.1991420946129714e-08, "epoch": 19.760432766615146, "percentage": 98.8, "elapsed_time": "0:57:57", "remaining_time": "0:00:42", "throughput": 2475.41, "total_tokens": 8609200}
|
| 5134 |
+
{"current_steps": 25575, "total_steps": 25880, "loss": 4.4421, "lr": 2.12900851177783e-08, "epoch": 19.764296754250385, "percentage": 98.82, "elapsed_time": "0:57:58", "remaining_time": "0:00:41", "throughput": 2475.45, "total_tokens": 8610960}
|
| 5135 |
+
{"current_steps": 25580, "total_steps": 25880, "loss": 4.3709, "lr": 2.0600109756288565e-08, "epoch": 19.768160741885627, "percentage": 98.84, "elapsed_time": "0:57:59", "remaining_time": "0:00:40", "throughput": 2475.51, "total_tokens": 8612848}
|
| 5136 |
+
{"current_steps": 25585, "total_steps": 25880, "loss": 4.962, "lr": 1.992149517546227e-08, "epoch": 19.772024729520865, "percentage": 98.86, "elapsed_time": "0:57:59", "remaining_time": "0:00:40", "throughput": 2475.5, "total_tokens": 8614352}
|
| 5137 |
+
{"current_steps": 25590, "total_steps": 25880, "loss": 4.4487, "lr": 1.925424168394141e-08, "epoch": 19.775888717156104, "percentage": 98.88, "elapsed_time": "0:58:00", "remaining_time": "0:00:39", "throughput": 2475.56, "total_tokens": 8616272}
|
| 5138 |
+
{"current_steps": 25595, "total_steps": 25880, "loss": 4.7292, "lr": 1.8598349585197128e-08, "epoch": 19.779752704791346, "percentage": 98.9, "elapsed_time": "0:58:01", "remaining_time": "0:00:38", "throughput": 2475.59, "total_tokens": 8618032}
|
| 5139 |
+
{"current_steps": 25600, "total_steps": 25880, "loss": 4.5363, "lr": 1.7953819177529697e-08, "epoch": 19.783616692426584, "percentage": 98.92, "elapsed_time": "0:58:01", "remaining_time": "0:00:38", "throughput": 2475.58, "total_tokens": 8619504}
|
| 5140 |
+
{"current_steps": 25605, "total_steps": 25880, "loss": 4.5782, "lr": 1.732065075407685e-08, "epoch": 19.787480680061822, "percentage": 98.94, "elapsed_time": "0:58:02", "remaining_time": "0:00:37", "throughput": 2475.57, "total_tokens": 8620976}
|
| 5141 |
+
{"current_steps": 25610, "total_steps": 25880, "loss": 4.8115, "lr": 1.6698844602808238e-08, "epoch": 19.791344667697064, "percentage": 98.96, "elapsed_time": "0:58:03", "remaining_time": "0:00:36", "throughput": 2475.56, "total_tokens": 8622448}
|
| 5142 |
+
{"current_steps": 25615, "total_steps": 25880, "loss": 4.6706, "lr": 1.6088401006522647e-08, "epoch": 19.795208655332303, "percentage": 98.98, "elapsed_time": "0:58:03", "remaining_time": "0:00:36", "throughput": 2475.53, "total_tokens": 8623856}
|
| 5143 |
+
{"current_steps": 25620, "total_steps": 25880, "loss": 4.6016, "lr": 1.548932024285632e-08, "epoch": 19.79907264296754, "percentage": 99.0, "elapsed_time": "0:58:04", "remaining_time": "0:00:35", "throughput": 2475.53, "total_tokens": 8625456}
|
| 5144 |
+
{"current_steps": 25625, "total_steps": 25880, "loss": 4.6804, "lr": 1.4901602584271868e-08, "epoch": 19.802936630602783, "percentage": 99.01, "elapsed_time": "0:58:04", "remaining_time": "0:00:34", "throughput": 2475.61, "total_tokens": 8627408}
|
| 5145 |
+
{"current_steps": 25630, "total_steps": 25880, "loss": 4.6745, "lr": 1.4325248298069361e-08, "epoch": 19.80680061823802, "percentage": 99.03, "elapsed_time": "0:58:05", "remaining_time": "0:00:33", "throughput": 2475.61, "total_tokens": 8628944}
|
| 5146 |
+
{"current_steps": 25635, "total_steps": 25880, "loss": 4.8013, "lr": 1.3760257646378005e-08, "epoch": 19.81066460587326, "percentage": 99.05, "elapsed_time": "0:58:06", "remaining_time": "0:00:33", "throughput": 2475.64, "total_tokens": 8630672}
|
| 5147 |
+
{"current_steps": 25640, "total_steps": 25880, "loss": 4.9742, "lr": 1.3206630886158921e-08, "epoch": 19.814528593508502, "percentage": 99.07, "elapsed_time": "0:58:06", "remaining_time": "0:00:32", "throughput": 2475.66, "total_tokens": 8632336}
|
| 5148 |
+
{"current_steps": 25645, "total_steps": 25880, "loss": 4.7525, "lr": 1.2664368269202365e-08, "epoch": 19.81839258114374, "percentage": 99.09, "elapsed_time": "0:58:07", "remaining_time": "0:00:31", "throughput": 2475.65, "total_tokens": 8633840}
|
| 5149 |
+
{"current_steps": 25650, "total_steps": 25880, "loss": 4.6583, "lr": 1.2133470042136052e-08, "epoch": 19.82225656877898, "percentage": 99.11, "elapsed_time": "0:58:08", "remaining_time": "0:00:31", "throughput": 2475.71, "total_tokens": 8635792}
|
| 5150 |
+
{"current_steps": 25655, "total_steps": 25880, "loss": 4.9399, "lr": 1.161393644641129e-08, "epoch": 19.82612055641422, "percentage": 99.13, "elapsed_time": "0:58:08", "remaining_time": "0:00:30", "throughput": 2475.79, "total_tokens": 8637904}
|
| 5151 |
+
{"current_steps": 25660, "total_steps": 25880, "loss": 4.5152, "lr": 1.1105767718319614e-08, "epoch": 19.82998454404946, "percentage": 99.15, "elapsed_time": "0:58:09", "remaining_time": "0:00:29", "throughput": 2475.86, "total_tokens": 8639696}
|
| 5152 |
+
{"current_steps": 25665, "total_steps": 25880, "loss": 4.5038, "lr": 1.0608964088978934e-08, "epoch": 19.833848531684698, "percentage": 99.17, "elapsed_time": "0:58:10", "remaining_time": "0:00:29", "throughput": 2475.85, "total_tokens": 8641360}
|
| 5153 |
+
{"current_steps": 25670, "total_steps": 25880, "loss": 4.7485, "lr": 1.012352578433351e-08, "epoch": 19.83771251931994, "percentage": 99.19, "elapsed_time": "0:58:10", "remaining_time": "0:00:28", "throughput": 2475.87, "total_tokens": 8643216}
|
| 5154 |
+
{"current_steps": 25675, "total_steps": 25880, "loss": 5.2262, "lr": 9.649453025170618e-09, "epoch": 19.841576506955178, "percentage": 99.21, "elapsed_time": "0:58:11", "remaining_time": "0:00:27", "throughput": 2475.87, "total_tokens": 8644816}
|
| 5155 |
+
{"current_steps": 25680, "total_steps": 25880, "loss": 4.9672, "lr": 9.186746027095571e-09, "epoch": 19.845440494590417, "percentage": 99.23, "elapsed_time": "0:58:12", "remaining_time": "0:00:27", "throughput": 2475.87, "total_tokens": 8646352}
|
| 5156 |
+
{"current_steps": 25685, "total_steps": 25880, "loss": 4.4626, "lr": 8.73540500055392e-09, "epoch": 19.84930448222566, "percentage": 99.25, "elapsed_time": "0:58:12", "remaining_time": "0:00:26", "throughput": 2475.89, "total_tokens": 8648048}
|
| 5157 |
+
{"current_steps": 25690, "total_steps": 25880, "loss": 4.7713, "lr": 8.295430150814798e-09, "epoch": 19.853168469860897, "percentage": 99.27, "elapsed_time": "0:58:13", "remaining_time": "0:00:25", "throughput": 2475.9, "total_tokens": 8649584}
|
| 5158 |
+
{"current_steps": 25695, "total_steps": 25880, "loss": 4.7273, "lr": 7.866821677984804e-09, "epoch": 19.857032457496135, "percentage": 99.29, "elapsed_time": "0:58:14", "remaining_time": "0:00:25", "throughput": 2475.89, "total_tokens": 8651152}
|
| 5159 |
+
{"current_steps": 25700, "total_steps": 25880, "loss": 4.9045, "lr": 7.449579776996895e-09, "epoch": 19.860896445131374, "percentage": 99.3, "elapsed_time": "0:58:14", "remaining_time": "0:00:24", "throughput": 2475.86, "total_tokens": 8652560}
|
| 5160 |
+
{"current_steps": 25705, "total_steps": 25880, "loss": 4.5137, "lr": 7.043704637613169e-09, "epoch": 19.864760432766616, "percentage": 99.32, "elapsed_time": "0:58:15", "remaining_time": "0:00:23", "throughput": 2475.89, "total_tokens": 8654448}
|
| 5161 |
+
{"current_steps": 25710, "total_steps": 25880, "loss": 4.5018, "lr": 6.6491964444304054e-09, "epoch": 19.868624420401854, "percentage": 99.34, "elapsed_time": "0:58:16", "remaining_time": "0:00:23", "throughput": 2475.91, "total_tokens": 8656144}
|
| 5162 |
+
{"current_steps": 25715, "total_steps": 25880, "loss": 4.3331, "lr": 6.266055376871749e-09, "epoch": 19.872488408037093, "percentage": 99.36, "elapsed_time": "0:58:16", "remaining_time": "0:00:22", "throughput": 2475.91, "total_tokens": 8657776}
|
| 5163 |
+
{"current_steps": 25720, "total_steps": 25880, "loss": 5.0436, "lr": 5.894281609195029e-09, "epoch": 19.876352395672335, "percentage": 99.38, "elapsed_time": "0:58:17", "remaining_time": "0:00:21", "throughput": 2475.93, "total_tokens": 8659472}
|
| 5164 |
+
{"current_steps": 25725, "total_steps": 25880, "loss": 4.819, "lr": 5.533875310478886e-09, "epoch": 19.880216383307573, "percentage": 99.4, "elapsed_time": "0:58:18", "remaining_time": "0:00:21", "throughput": 2475.97, "total_tokens": 8661136}
|
| 5165 |
+
{"current_steps": 25730, "total_steps": 25880, "loss": 4.9444, "lr": 5.184836644644975e-09, "epoch": 19.88408037094281, "percentage": 99.42, "elapsed_time": "0:58:18", "remaining_time": "0:00:20", "throughput": 2475.98, "total_tokens": 8662800}
|
| 5166 |
+
{"current_steps": 25735, "total_steps": 25880, "loss": 4.7833, "lr": 4.847165770435758e-09, "epoch": 19.887944358578054, "percentage": 99.44, "elapsed_time": "0:58:19", "remaining_time": "0:00:19", "throughput": 2476.03, "total_tokens": 8664592}
|
| 5167 |
+
{"current_steps": 25740, "total_steps": 25880, "loss": 5.0547, "lr": 4.52086284142561e-09, "epoch": 19.891808346213292, "percentage": 99.46, "elapsed_time": "0:58:20", "remaining_time": "0:00:19", "throughput": 2476.05, "total_tokens": 8666480}
|
| 5168 |
+
{"current_steps": 25745, "total_steps": 25880, "loss": 4.6184, "lr": 4.205928006018045e-09, "epoch": 19.89567233384853, "percentage": 99.48, "elapsed_time": "0:58:20", "remaining_time": "0:00:18", "throughput": 2476.06, "total_tokens": 8668112}
|
| 5169 |
+
{"current_steps": 25750, "total_steps": 25880, "loss": 4.4488, "lr": 3.9023614074484845e-09, "epoch": 19.899536321483772, "percentage": 99.5, "elapsed_time": "0:58:21", "remaining_time": "0:00:17", "throughput": 2476.07, "total_tokens": 8669776}
|
| 5170 |
+
{"current_steps": 25755, "total_steps": 25880, "loss": 4.7974, "lr": 3.6101631837814896e-09, "epoch": 19.90340030911901, "percentage": 99.52, "elapsed_time": "0:58:22", "remaining_time": "0:00:16", "throughput": 2476.12, "total_tokens": 8671696}
|
| 5171 |
+
{"current_steps": 25760, "total_steps": 25880, "loss": 5.0934, "lr": 3.32933346790798e-09, "epoch": 19.90726429675425, "percentage": 99.54, "elapsed_time": "0:58:22", "remaining_time": "0:00:16", "throughput": 2476.18, "total_tokens": 8673616}
|
| 5172 |
+
{"current_steps": 25765, "total_steps": 25880, "loss": 4.6054, "lr": 3.059872387553564e-09, "epoch": 19.91112828438949, "percentage": 99.56, "elapsed_time": "0:58:23", "remaining_time": "0:00:15", "throughput": 2476.2, "total_tokens": 8675312}
|
| 5173 |
+
{"current_steps": 25770, "total_steps": 25880, "loss": 4.6071, "lr": 2.8017800652702097e-09, "epoch": 19.91499227202473, "percentage": 99.57, "elapsed_time": "0:58:24", "remaining_time": "0:00:14", "throughput": 2476.2, "total_tokens": 8676976}
|
| 5174 |
+
{"current_steps": 25775, "total_steps": 25880, "loss": 4.6747, "lr": 2.5550566184390224e-09, "epoch": 19.918856259659968, "percentage": 99.59, "elapsed_time": "0:58:24", "remaining_time": "0:00:14", "throughput": 2476.27, "total_tokens": 8678960}
|
| 5175 |
+
{"current_steps": 25780, "total_steps": 25880, "loss": 4.7489, "lr": 2.3197021592730185e-09, "epoch": 19.92272024729521, "percentage": 99.61, "elapsed_time": "0:58:25", "remaining_time": "0:00:13", "throughput": 2476.31, "total_tokens": 8680720}
|
| 5176 |
+
{"current_steps": 25785, "total_steps": 25880, "loss": 4.5177, "lr": 2.095716794811575e-09, "epoch": 19.92658423493045, "percentage": 99.63, "elapsed_time": "0:58:26", "remaining_time": "0:00:12", "throughput": 2476.31, "total_tokens": 8682352}
|
| 5177 |
+
{"current_steps": 25790, "total_steps": 25880, "loss": 5.0301, "lr": 1.883100626925982e-09, "epoch": 19.930448222565687, "percentage": 99.65, "elapsed_time": "0:58:26", "remaining_time": "0:00:12", "throughput": 2476.33, "total_tokens": 8683952}
|
| 5178 |
+
{"current_steps": 25795, "total_steps": 25880, "loss": 4.7109, "lr": 1.6818537523111134e-09, "epoch": 19.93431221020093, "percentage": 99.67, "elapsed_time": "0:58:27", "remaining_time": "0:00:11", "throughput": 2476.31, "total_tokens": 8685392}
|
| 5179 |
+
{"current_steps": 25800, "total_steps": 25880, "loss": 4.5536, "lr": 1.491976262499306e-09, "epoch": 19.938176197836167, "percentage": 99.69, "elapsed_time": "0:58:28", "remaining_time": "0:00:10", "throughput": 2476.33, "total_tokens": 8687088}
|
| 5180 |
+
{"current_steps": 25805, "total_steps": 25880, "loss": 4.5708, "lr": 1.3134682438492585e-09, "epoch": 19.942040185471406, "percentage": 99.71, "elapsed_time": "0:58:28", "remaining_time": "0:00:10", "throughput": 2476.35, "total_tokens": 8688752}
|
| 5181 |
+
{"current_steps": 25810, "total_steps": 25880, "loss": 4.8619, "lr": 1.1463297775432535e-09, "epoch": 19.945904173106648, "percentage": 99.73, "elapsed_time": "0:58:29", "remaining_time": "0:00:09", "throughput": 2476.35, "total_tokens": 8690384}
|
| 5182 |
+
{"current_steps": 25815, "total_steps": 25880, "loss": 4.6382, "lr": 9.905609395982617e-10, "epoch": 19.949768160741886, "percentage": 99.75, "elapsed_time": "0:58:30", "remaining_time": "0:00:08", "throughput": 2476.41, "total_tokens": 8692208}
|
| 5183 |
+
{"current_steps": 25820, "total_steps": 25880, "loss": 4.6968, "lr": 8.4616180086039e-10, "epoch": 19.953632148377125, "percentage": 99.77, "elapsed_time": "0:58:30", "remaining_time": "0:00:08", "throughput": 2476.42, "total_tokens": 8693808}
|
| 5184 |
+
{"current_steps": 25825, "total_steps": 25880, "loss": 4.7443, "lr": 7.1313242699933e-10, "epoch": 19.957496136012363, "percentage": 99.79, "elapsed_time": "0:58:31", "remaining_time": "0:00:07", "throughput": 2476.43, "total_tokens": 8695536}
|
| 5185 |
+
{"current_steps": 25830, "total_steps": 25880, "loss": 4.6115, "lr": 5.914728785250123e-10, "epoch": 19.961360123647605, "percentage": 99.81, "elapsed_time": "0:58:32", "remaining_time": "0:00:06", "throughput": 2476.47, "total_tokens": 8697392}
|
| 5186 |
+
{"current_steps": 25835, "total_steps": 25880, "loss": 4.7411, "lr": 4.811832107598502e-10, "epoch": 19.965224111282843, "percentage": 99.83, "elapsed_time": "0:58:32", "remaining_time": "0:00:06", "throughput": 2476.52, "total_tokens": 8699216}
|
| 5187 |
+
{"current_steps": 25840, "total_steps": 25880, "loss": 4.9582, "lr": 3.8226347387204654e-10, "epoch": 19.969088098918082, "percentage": 99.85, "elapsed_time": "0:58:33", "remaining_time": "0:00:05", "throughput": 2476.57, "total_tokens": 8701168}
|
| 5188 |
+
{"current_steps": 25845, "total_steps": 25880, "loss": 4.5971, "lr": 2.9471371284783834e-10, "epoch": 19.972952086553324, "percentage": 99.86, "elapsed_time": "0:58:34", "remaining_time": "0:00:04", "throughput": 2476.58, "total_tokens": 8702800}
|
| 5189 |
+
{"current_steps": 25850, "total_steps": 25880, "loss": 4.4756, "lr": 2.185339675025988e-10, "epoch": 19.976816074188562, "percentage": 99.88, "elapsed_time": "0:58:34", "remaining_time": "0:00:04", "throughput": 2476.58, "total_tokens": 8704400}
|
| 5190 |
+
{"current_steps": 25855, "total_steps": 25880, "loss": 4.9561, "lr": 1.5372427248638853e-10, "epoch": 19.9806800618238, "percentage": 99.9, "elapsed_time": "0:58:35", "remaining_time": "0:00:03", "throughput": 2476.62, "total_tokens": 8706096}
|
| 5191 |
+
{"current_steps": 25860, "total_steps": 25880, "loss": 4.6325, "lr": 1.0028465727562885e-10, "epoch": 19.984544049459043, "percentage": 99.92, "elapsed_time": "0:58:35", "remaining_time": "0:00:02", "throughput": 2476.69, "total_tokens": 8707984}
|
| 5192 |
+
{"current_steps": 25865, "total_steps": 25880, "loss": 4.6068, "lr": 5.821514617587731e-11, "epoch": 19.98840803709428, "percentage": 99.94, "elapsed_time": "0:58:36", "remaining_time": "0:00:02", "throughput": 2476.7, "total_tokens": 8709552}
|
| 5193 |
+
{"current_steps": 25870, "total_steps": 25880, "loss": 4.6132, "lr": 2.751575831627662e-11, "epoch": 19.99227202472952, "percentage": 99.96, "elapsed_time": "0:58:37", "remaining_time": "0:00:01", "throughput": 2476.76, "total_tokens": 8711472}
|
| 5194 |
+
{"current_steps": 25875, "total_steps": 25880, "loss": 4.7441, "lr": 8.186507660656873e-12, "epoch": 19.99613601236476, "percentage": 99.98, "elapsed_time": "0:58:37", "remaining_time": "0:00:00", "throughput": 2476.81, "total_tokens": 8713296}
|
| 5195 |
+
{"current_steps": 25880, "total_steps": 25880, "loss": 4.6684, "lr": 2.2740300198442753e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:58:38", "remaining_time": "0:00:00", "throughput": 2476.73, "total_tokens": 8714656}
|
| 5196 |
+
{"current_steps": 25880, "total_steps": 25880, "eval_loss": 4.679266929626465, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:58:44", "remaining_time": "0:00:00", "throughput": 2472.52, "total_tokens": 8714656}
|
| 5197 |
+
{"current_steps": 25880, "total_steps": 25880, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:58:45", "remaining_time": "0:00:00", "throughput": 2471.91, "total_tokens": 8714656}
|