Training in progress, step 25880
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +258 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55df5c6271b3476be5c491bc75fa96d20102c4cccf66729cb422d249fe762208
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -4937,3 +4937,261 @@
|
|
| 4937 |
{"current_steps": 24590, "total_steps": 25880, "loss": 0.3014, "lr": 7.561031667511431e-06, "epoch": 19.00309119010819, "percentage": 95.02, "elapsed_time": "1:12:31", "remaining_time": "0:03:48", "throughput": 1903.08, "total_tokens": 8281424}
|
| 4938 |
{"current_steps": 24595, "total_steps": 25880, "loss": 0.1992, "lr": 7.502724464392985e-06, "epoch": 19.006955177743432, "percentage": 95.03, "elapsed_time": "1:12:32", "remaining_time": "0:03:47", "throughput": 1903.1, "total_tokens": 8283120}
|
| 4939 |
{"current_steps": 24600, "total_steps": 25880, "loss": 0.1696, "lr": 7.444641251984963e-06, "epoch": 19.01081916537867, "percentage": 95.05, "elapsed_time": "1:12:33", "remaining_time": "0:03:46", "throughput": 1903.12, "total_tokens": 8284848}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4937 |
{"current_steps": 24590, "total_steps": 25880, "loss": 0.3014, "lr": 7.561031667511431e-06, "epoch": 19.00309119010819, "percentage": 95.02, "elapsed_time": "1:12:31", "remaining_time": "0:03:48", "throughput": 1903.08, "total_tokens": 8281424}
|
| 4938 |
{"current_steps": 24595, "total_steps": 25880, "loss": 0.1992, "lr": 7.502724464392985e-06, "epoch": 19.006955177743432, "percentage": 95.03, "elapsed_time": "1:12:32", "remaining_time": "0:03:47", "throughput": 1903.1, "total_tokens": 8283120}
|
| 4939 |
{"current_steps": 24600, "total_steps": 25880, "loss": 0.1696, "lr": 7.444641251984963e-06, "epoch": 19.01081916537867, "percentage": 95.05, "elapsed_time": "1:12:33", "remaining_time": "0:03:46", "throughput": 1903.12, "total_tokens": 8284848}
|
| 4940 |
+
{"current_steps": 24605, "total_steps": 25880, "loss": 0.3481, "lr": 7.3867820567039e-06, "epoch": 19.01468315301391, "percentage": 95.07, "elapsed_time": "1:12:34", "remaining_time": "0:03:45", "throughput": 1903.14, "total_tokens": 8286576}
|
| 4941 |
+
{"current_steps": 24610, "total_steps": 25880, "loss": 0.2268, "lr": 7.32914690486447e-06, "epoch": 19.01854714064915, "percentage": 95.09, "elapsed_time": "1:12:34", "remaining_time": "0:03:44", "throughput": 1903.13, "total_tokens": 8288080}
|
| 4942 |
+
{"current_steps": 24615, "total_steps": 25880, "loss": 0.2297, "lr": 7.271735822679481e-06, "epoch": 19.02241112828439, "percentage": 95.11, "elapsed_time": "1:12:35", "remaining_time": "0:03:43", "throughput": 1903.18, "total_tokens": 8289936}
|
| 4943 |
+
{"current_steps": 24620, "total_steps": 25880, "loss": 0.3214, "lr": 7.214548836259937e-06, "epoch": 19.026275115919628, "percentage": 95.13, "elapsed_time": "1:12:36", "remaining_time": "0:03:42", "throughput": 1903.15, "total_tokens": 8291376}
|
| 4944 |
+
{"current_steps": 24625, "total_steps": 25880, "loss": 0.266, "lr": 7.157585971614755e-06, "epoch": 19.03013910355487, "percentage": 95.15, "elapsed_time": "1:12:37", "remaining_time": "0:03:42", "throughput": 1903.18, "total_tokens": 8293168}
|
| 4945 |
+
{"current_steps": 24630, "total_steps": 25880, "loss": 0.2628, "lr": 7.100847254650988e-06, "epoch": 19.034003091190108, "percentage": 95.17, "elapsed_time": "1:12:38", "remaining_time": "0:03:41", "throughput": 1903.2, "total_tokens": 8294832}
|
| 4946 |
+
{"current_steps": 24635, "total_steps": 25880, "loss": 0.3181, "lr": 7.044332711173717e-06, "epoch": 19.037867078825347, "percentage": 95.19, "elapsed_time": "1:12:39", "remaining_time": "0:03:40", "throughput": 1903.24, "total_tokens": 8296624}
|
| 4947 |
+
{"current_steps": 24640, "total_steps": 25880, "loss": 0.2975, "lr": 6.98804236688616e-06, "epoch": 19.04173106646059, "percentage": 95.21, "elapsed_time": "1:12:40", "remaining_time": "0:03:39", "throughput": 1903.28, "total_tokens": 8298512}
|
| 4948 |
+
{"current_steps": 24645, "total_steps": 25880, "loss": 0.2826, "lr": 6.931976247389449e-06, "epoch": 19.045595054095827, "percentage": 95.23, "elapsed_time": "1:12:40", "remaining_time": "0:03:38", "throughput": 1903.29, "total_tokens": 8300112}
|
| 4949 |
+
{"current_steps": 24650, "total_steps": 25880, "loss": 0.2521, "lr": 6.876134378182797e-06, "epoch": 19.049459041731065, "percentage": 95.25, "elapsed_time": "1:12:41", "remaining_time": "0:03:37", "throughput": 1903.32, "total_tokens": 8301872}
|
| 4950 |
+
{"current_steps": 24655, "total_steps": 25880, "loss": 0.2006, "lr": 6.8205167846634446e-06, "epoch": 19.053323029366307, "percentage": 95.27, "elapsed_time": "1:12:42", "remaining_time": "0:03:36", "throughput": 1903.35, "total_tokens": 8303632}
|
| 4951 |
+
{"current_steps": 24660, "total_steps": 25880, "loss": 0.3292, "lr": 6.765123492126602e-06, "epoch": 19.057187017001546, "percentage": 95.29, "elapsed_time": "1:12:43", "remaining_time": "0:03:35", "throughput": 1903.39, "total_tokens": 8305488}
|
| 4952 |
+
{"current_steps": 24665, "total_steps": 25880, "loss": 0.2695, "lr": 6.7099545257655046e-06, "epoch": 19.061051004636784, "percentage": 95.31, "elapsed_time": "1:12:44", "remaining_time": "0:03:34", "throughput": 1903.41, "total_tokens": 8307184}
|
| 4953 |
+
{"current_steps": 24670, "total_steps": 25880, "loss": 0.3267, "lr": 6.65500991067125e-06, "epoch": 19.064914992272026, "percentage": 95.32, "elapsed_time": "1:12:45", "remaining_time": "0:03:34", "throughput": 1903.42, "total_tokens": 8308816}
|
| 4954 |
+
{"current_steps": 24675, "total_steps": 25880, "loss": 0.3311, "lr": 6.60028967183296e-06, "epoch": 19.068778979907265, "percentage": 95.34, "elapsed_time": "1:12:46", "remaining_time": "0:03:33", "throughput": 1903.45, "total_tokens": 8310640}
|
| 4955 |
+
{"current_steps": 24680, "total_steps": 25880, "loss": 0.2718, "lr": 6.545793834137836e-06, "epoch": 19.072642967542503, "percentage": 95.36, "elapsed_time": "1:12:46", "remaining_time": "0:03:32", "throughput": 1903.45, "total_tokens": 8312208}
|
| 4956 |
+
{"current_steps": 24685, "total_steps": 25880, "loss": 0.2284, "lr": 6.4915224223708325e-06, "epoch": 19.076506955177745, "percentage": 95.38, "elapsed_time": "1:12:47", "remaining_time": "0:03:31", "throughput": 1903.45, "total_tokens": 8313744}
|
| 4957 |
+
{"current_steps": 24690, "total_steps": 25880, "loss": 0.1863, "lr": 6.437475461214981e-06, "epoch": 19.080370942812984, "percentage": 95.4, "elapsed_time": "1:12:48", "remaining_time": "0:03:30", "throughput": 1903.47, "total_tokens": 8315440}
|
| 4958 |
+
{"current_steps": 24695, "total_steps": 25880, "loss": 0.2865, "lr": 6.3836529752510645e-06, "epoch": 19.084234930448222, "percentage": 95.42, "elapsed_time": "1:12:49", "remaining_time": "0:03:29", "throughput": 1903.48, "total_tokens": 8317200}
|
| 4959 |
+
{"current_steps": 24700, "total_steps": 25880, "loss": 0.2595, "lr": 6.3300549889580006e-06, "epoch": 19.08809891808346, "percentage": 95.44, "elapsed_time": "1:12:50", "remaining_time": "0:03:28", "throughput": 1903.47, "total_tokens": 8318704}
|
| 4960 |
+
{"current_steps": 24705, "total_steps": 25880, "loss": 0.2323, "lr": 6.276681526712347e-06, "epoch": 19.091962905718702, "percentage": 95.46, "elapsed_time": "1:12:51", "remaining_time": "0:03:27", "throughput": 1903.48, "total_tokens": 8320304}
|
| 4961 |
+
{"current_steps": 24710, "total_steps": 25880, "loss": 0.2538, "lr": 6.22353261278874e-06, "epoch": 19.09582689335394, "percentage": 95.48, "elapsed_time": "1:12:51", "remaining_time": "0:03:27", "throughput": 1903.45, "total_tokens": 8321616}
|
| 4962 |
+
{"current_steps": 24715, "total_steps": 25880, "loss": 0.296, "lr": 6.170608271359624e-06, "epoch": 19.09969088098918, "percentage": 95.5, "elapsed_time": "1:12:52", "remaining_time": "0:03:26", "throughput": 1903.47, "total_tokens": 8323344}
|
| 4963 |
+
{"current_steps": 24720, "total_steps": 25880, "loss": 0.2549, "lr": 6.117908526495242e-06, "epoch": 19.10355486862442, "percentage": 95.52, "elapsed_time": "1:12:53", "remaining_time": "0:03:25", "throughput": 1903.5, "total_tokens": 8325104}
|
| 4964 |
+
{"current_steps": 24725, "total_steps": 25880, "loss": 0.2946, "lr": 6.065433402163867e-06, "epoch": 19.10741885625966, "percentage": 95.54, "elapsed_time": "1:12:54", "remaining_time": "0:03:24", "throughput": 1903.53, "total_tokens": 8326928}
|
| 4965 |
+
{"current_steps": 24730, "total_steps": 25880, "loss": 0.2684, "lr": 6.0131829222314085e-06, "epoch": 19.111282843894898, "percentage": 95.56, "elapsed_time": "1:12:55", "remaining_time": "0:03:23", "throughput": 1903.58, "total_tokens": 8328848}
|
| 4966 |
+
{"current_steps": 24735, "total_steps": 25880, "loss": 0.2197, "lr": 5.961157110461746e-06, "epoch": 19.11514683153014, "percentage": 95.58, "elapsed_time": "1:12:56", "remaining_time": "0:03:22", "throughput": 1903.56, "total_tokens": 8330288}
|
| 4967 |
+
{"current_steps": 24740, "total_steps": 25880, "loss": 0.387, "lr": 5.9093559905165075e-06, "epoch": 19.11901081916538, "percentage": 95.6, "elapsed_time": "1:12:56", "remaining_time": "0:03:21", "throughput": 1903.55, "total_tokens": 8331792}
|
| 4968 |
+
{"current_steps": 24745, "total_steps": 25880, "loss": 0.2024, "lr": 5.8577795859551804e-06, "epoch": 19.122874806800617, "percentage": 95.61, "elapsed_time": "1:12:57", "remaining_time": "0:03:20", "throughput": 1903.59, "total_tokens": 8333616}
|
| 4969 |
+
{"current_steps": 24750, "total_steps": 25880, "loss": 0.2509, "lr": 5.806427920234947e-06, "epoch": 19.12673879443586, "percentage": 95.63, "elapsed_time": "1:12:58", "remaining_time": "0:03:19", "throughput": 1903.6, "total_tokens": 8335280}
|
| 4970 |
+
{"current_steps": 24755, "total_steps": 25880, "loss": 0.2029, "lr": 5.755301016710901e-06, "epoch": 19.130602782071097, "percentage": 95.65, "elapsed_time": "1:12:59", "remaining_time": "0:03:19", "throughput": 1903.6, "total_tokens": 8336752}
|
| 4971 |
+
{"current_steps": 24760, "total_steps": 25880, "loss": 0.2315, "lr": 5.704398898635887e-06, "epoch": 19.134466769706336, "percentage": 95.67, "elapsed_time": "1:13:00", "remaining_time": "0:03:18", "throughput": 1903.6, "total_tokens": 8338288}
|
| 4972 |
+
{"current_steps": 24765, "total_steps": 25880, "loss": 0.2472, "lr": 5.653721589160498e-06, "epoch": 19.138330757341578, "percentage": 95.69, "elapsed_time": "1:13:01", "remaining_time": "0:03:17", "throughput": 1903.65, "total_tokens": 8340176}
|
| 4973 |
+
{"current_steps": 24770, "total_steps": 25880, "loss": 0.2866, "lr": 5.603269111333076e-06, "epoch": 19.142194744976816, "percentage": 95.71, "elapsed_time": "1:13:01", "remaining_time": "0:03:16", "throughput": 1903.64, "total_tokens": 8341616}
|
| 4974 |
+
{"current_steps": 24775, "total_steps": 25880, "loss": 0.253, "lr": 5.553041488099652e-06, "epoch": 19.146058732612055, "percentage": 95.73, "elapsed_time": "1:13:02", "remaining_time": "0:03:15", "throughput": 1903.67, "total_tokens": 8343408}
|
| 4975 |
+
{"current_steps": 24780, "total_steps": 25880, "loss": 0.2324, "lr": 5.503038742304068e-06, "epoch": 19.149922720247297, "percentage": 95.75, "elapsed_time": "1:13:03", "remaining_time": "0:03:14", "throughput": 1903.7, "total_tokens": 8345104}
|
| 4976 |
+
{"current_steps": 24785, "total_steps": 25880, "loss": 0.2661, "lr": 5.453260896687906e-06, "epoch": 19.153786707882535, "percentage": 95.77, "elapsed_time": "1:13:04", "remaining_time": "0:03:13", "throughput": 1903.72, "total_tokens": 8346736}
|
| 4977 |
+
{"current_steps": 24790, "total_steps": 25880, "loss": 0.3231, "lr": 5.403707973890393e-06, "epoch": 19.157650695517773, "percentage": 95.79, "elapsed_time": "1:13:05", "remaining_time": "0:03:12", "throughput": 1903.72, "total_tokens": 8348304}
|
| 4978 |
+
{"current_steps": 24795, "total_steps": 25880, "loss": 0.2402, "lr": 5.354379996448555e-06, "epoch": 19.161514683153015, "percentage": 95.81, "elapsed_time": "1:13:06", "remaining_time": "0:03:11", "throughput": 1903.74, "total_tokens": 8349872}
|
| 4979 |
+
{"current_steps": 24800, "total_steps": 25880, "loss": 0.2942, "lr": 5.305276986796948e-06, "epoch": 19.165378670788254, "percentage": 95.83, "elapsed_time": "1:13:06", "remaining_time": "0:03:11", "throughput": 1903.77, "total_tokens": 8351568}
|
| 4980 |
+
{"current_steps": 24805, "total_steps": 25880, "loss": 0.2532, "lr": 5.256398967268039e-06, "epoch": 19.169242658423492, "percentage": 95.85, "elapsed_time": "1:13:07", "remaining_time": "0:03:10", "throughput": 1903.8, "total_tokens": 8353296}
|
| 4981 |
+
{"current_steps": 24810, "total_steps": 25880, "loss": 0.2317, "lr": 5.20774596009177e-06, "epoch": 19.173106646058734, "percentage": 95.87, "elapsed_time": "1:13:08", "remaining_time": "0:03:09", "throughput": 1903.87, "total_tokens": 8355248}
|
| 4982 |
+
{"current_steps": 24815, "total_steps": 25880, "loss": 0.2807, "lr": 5.159317987395884e-06, "epoch": 19.176970633693973, "percentage": 95.88, "elapsed_time": "1:13:09", "remaining_time": "0:03:08", "throughput": 1903.89, "total_tokens": 8356848}
|
| 4983 |
+
{"current_steps": 24820, "total_steps": 25880, "loss": 0.2401, "lr": 5.111115071205541e-06, "epoch": 19.18083462132921, "percentage": 95.9, "elapsed_time": "1:13:10", "remaining_time": "0:03:07", "throughput": 1903.93, "total_tokens": 8358736}
|
| 4984 |
+
{"current_steps": 24825, "total_steps": 25880, "loss": 0.2296, "lr": 5.063137233443926e-06, "epoch": 19.18469860896445, "percentage": 95.92, "elapsed_time": "1:13:11", "remaining_time": "0:03:06", "throughput": 1903.97, "total_tokens": 8360496}
|
| 4985 |
+
{"current_steps": 24830, "total_steps": 25880, "loss": 0.2323, "lr": 5.01538449593153e-06, "epoch": 19.18856259659969, "percentage": 95.94, "elapsed_time": "1:13:11", "remaining_time": "0:03:05", "throughput": 1903.98, "total_tokens": 8362032}
|
| 4986 |
+
{"current_steps": 24835, "total_steps": 25880, "loss": 0.2867, "lr": 4.96785688038659e-06, "epoch": 19.19242658423493, "percentage": 95.96, "elapsed_time": "1:13:12", "remaining_time": "0:03:04", "throughput": 1903.98, "total_tokens": 8363536}
|
| 4987 |
+
{"current_steps": 24840, "total_steps": 25880, "loss": 0.2794, "lr": 4.9205544084250375e-06, "epoch": 19.19629057187017, "percentage": 95.98, "elapsed_time": "1:13:13", "remaining_time": "0:03:03", "throughput": 1904.01, "total_tokens": 8365136}
|
| 4988 |
+
{"current_steps": 24845, "total_steps": 25880, "loss": 0.325, "lr": 4.87347710156022e-06, "epoch": 19.20015455950541, "percentage": 96.0, "elapsed_time": "1:13:14", "remaining_time": "0:03:03", "throughput": 1904.04, "total_tokens": 8366928}
|
| 4989 |
+
{"current_steps": 24850, "total_steps": 25880, "loss": 0.2654, "lr": 4.826624981203176e-06, "epoch": 19.20401854714065, "percentage": 96.02, "elapsed_time": "1:13:15", "remaining_time": "0:03:02", "throughput": 1904.11, "total_tokens": 8368944}
|
| 4990 |
+
{"current_steps": 24855, "total_steps": 25880, "loss": 0.2401, "lr": 4.779998068662583e-06, "epoch": 19.207882534775887, "percentage": 96.04, "elapsed_time": "1:13:16", "remaining_time": "0:03:01", "throughput": 1904.1, "total_tokens": 8370480}
|
| 4991 |
+
{"current_steps": 24860, "total_steps": 25880, "loss": 0.2461, "lr": 4.733596385144645e-06, "epoch": 19.21174652241113, "percentage": 96.06, "elapsed_time": "1:13:16", "remaining_time": "0:03:00", "throughput": 1904.11, "total_tokens": 8372080}
|
| 4992 |
+
{"current_steps": 24865, "total_steps": 25880, "loss": 0.3016, "lr": 4.687419951753147e-06, "epoch": 19.215610510046368, "percentage": 96.08, "elapsed_time": "1:13:17", "remaining_time": "0:02:59", "throughput": 1904.16, "total_tokens": 8373968}
|
| 4993 |
+
{"current_steps": 24870, "total_steps": 25880, "loss": 0.2153, "lr": 4.641468789489289e-06, "epoch": 19.219474497681606, "percentage": 96.1, "elapsed_time": "1:13:18", "remaining_time": "0:02:58", "throughput": 1904.14, "total_tokens": 8375344}
|
| 4994 |
+
{"current_steps": 24875, "total_steps": 25880, "loss": 0.2375, "lr": 4.595742919252021e-06, "epoch": 19.223338485316848, "percentage": 96.12, "elapsed_time": "1:13:19", "remaining_time": "0:02:57", "throughput": 1904.15, "total_tokens": 8377040}
|
| 4995 |
+
{"current_steps": 24880, "total_steps": 25880, "loss": 0.2399, "lr": 4.550242361837765e-06, "epoch": 19.227202472952087, "percentage": 96.14, "elapsed_time": "1:13:20", "remaining_time": "0:02:56", "throughput": 1904.19, "total_tokens": 8378800}
|
| 4996 |
+
{"current_steps": 24885, "total_steps": 25880, "loss": 0.2457, "lr": 4.504967137940463e-06, "epoch": 19.231066460587325, "percentage": 96.16, "elapsed_time": "1:13:21", "remaining_time": "0:02:55", "throughput": 1904.23, "total_tokens": 8380560}
|
| 4997 |
+
{"current_steps": 24890, "total_steps": 25880, "loss": 0.2075, "lr": 4.459917268151426e-06, "epoch": 19.234930448222567, "percentage": 96.17, "elapsed_time": "1:13:21", "remaining_time": "0:02:55", "throughput": 1904.24, "total_tokens": 8382096}
|
| 4998 |
+
{"current_steps": 24895, "total_steps": 25880, "loss": 0.232, "lr": 4.415092772959706e-06, "epoch": 19.238794435857805, "percentage": 96.19, "elapsed_time": "1:13:22", "remaining_time": "0:02:54", "throughput": 1904.29, "total_tokens": 8384016}
|
| 4999 |
+
{"current_steps": 24900, "total_steps": 25880, "loss": 0.3156, "lr": 4.370493672751719e-06, "epoch": 19.242658423493044, "percentage": 96.21, "elapsed_time": "1:13:23", "remaining_time": "0:02:53", "throughput": 1904.36, "total_tokens": 8386000}
|
| 5000 |
+
{"current_steps": 24905, "total_steps": 25880, "loss": 0.2854, "lr": 4.326119987811461e-06, "epoch": 19.246522411128286, "percentage": 96.23, "elapsed_time": "1:13:24", "remaining_time": "0:02:52", "throughput": 1904.37, "total_tokens": 8387728}
|
| 5001 |
+
{"current_steps": 24910, "total_steps": 25880, "loss": 0.2184, "lr": 4.281971738320289e-06, "epoch": 19.250386398763524, "percentage": 96.25, "elapsed_time": "1:13:25", "remaining_time": "0:02:51", "throughput": 1904.4, "total_tokens": 8389456}
|
| 5002 |
+
{"current_steps": 24915, "total_steps": 25880, "loss": 0.2505, "lr": 4.238048944357087e-06, "epoch": 19.254250386398763, "percentage": 96.27, "elapsed_time": "1:13:26", "remaining_time": "0:02:50", "throughput": 1904.41, "total_tokens": 8391088}
|
| 5003 |
+
{"current_steps": 24920, "total_steps": 25880, "loss": 0.2523, "lr": 4.1943516258982625e-06, "epoch": 19.258114374034005, "percentage": 96.29, "elapsed_time": "1:13:26", "remaining_time": "0:02:49", "throughput": 1904.44, "total_tokens": 8392784}
|
| 5004 |
+
{"current_steps": 24925, "total_steps": 25880, "loss": 0.1885, "lr": 4.150879802817586e-06, "epoch": 19.261978361669243, "percentage": 96.31, "elapsed_time": "1:13:27", "remaining_time": "0:02:48", "throughput": 1904.47, "total_tokens": 8394512}
|
| 5005 |
+
{"current_steps": 24930, "total_steps": 25880, "loss": 0.2871, "lr": 4.107633494886243e-06, "epoch": 19.26584234930448, "percentage": 96.33, "elapsed_time": "1:13:28", "remaining_time": "0:02:47", "throughput": 1904.53, "total_tokens": 8396400}
|
| 5006 |
+
{"current_steps": 24935, "total_steps": 25880, "loss": 0.2383, "lr": 4.064612721773053e-06, "epoch": 19.269706336939723, "percentage": 96.35, "elapsed_time": "1:13:29", "remaining_time": "0:02:47", "throughput": 1904.59, "total_tokens": 8398320}
|
| 5007 |
+
{"current_steps": 24940, "total_steps": 25880, "loss": 0.2516, "lr": 4.021817503043979e-06, "epoch": 19.273570324574962, "percentage": 96.37, "elapsed_time": "1:13:30", "remaining_time": "0:02:46", "throughput": 1904.57, "total_tokens": 8399728}
|
| 5008 |
+
{"current_steps": 24945, "total_steps": 25880, "loss": 0.3058, "lr": 3.979247858162616e-06, "epoch": 19.2774343122102, "percentage": 96.39, "elapsed_time": "1:13:31", "remaining_time": "0:02:45", "throughput": 1904.61, "total_tokens": 8401584}
|
| 5009 |
+
{"current_steps": 24950, "total_steps": 25880, "loss": 0.2123, "lr": 3.936903806489866e-06, "epoch": 19.28129829984544, "percentage": 96.41, "elapsed_time": "1:13:32", "remaining_time": "0:02:44", "throughput": 1904.64, "total_tokens": 8403344}
|
| 5010 |
+
{"current_steps": 24955, "total_steps": 25880, "loss": 0.2276, "lr": 3.894785367284104e-06, "epoch": 19.28516228748068, "percentage": 96.43, "elapsed_time": "1:13:32", "remaining_time": "0:02:43", "throughput": 1904.64, "total_tokens": 8404912}
|
| 5011 |
+
{"current_steps": 24960, "total_steps": 25880, "loss": 0.299, "lr": 3.852892559701004e-06, "epoch": 19.28902627511592, "percentage": 96.45, "elapsed_time": "1:13:33", "remaining_time": "0:02:42", "throughput": 1904.67, "total_tokens": 8406736}
|
| 5012 |
+
{"current_steps": 24965, "total_steps": 25880, "loss": 0.2631, "lr": 3.8112254027936056e-06, "epoch": 19.292890262751158, "percentage": 96.46, "elapsed_time": "1:13:34", "remaining_time": "0:02:41", "throughput": 1904.72, "total_tokens": 8408560}
|
| 5013 |
+
{"current_steps": 24970, "total_steps": 25880, "loss": 0.2103, "lr": 3.7697839155125258e-06, "epoch": 19.2967542503864, "percentage": 96.48, "elapsed_time": "1:13:35", "remaining_time": "0:02:40", "throughput": 1904.73, "total_tokens": 8410192}
|
| 5014 |
+
{"current_steps": 24975, "total_steps": 25880, "loss": 0.2164, "lr": 3.728568116705522e-06, "epoch": 19.300618238021638, "percentage": 96.5, "elapsed_time": "1:13:36", "remaining_time": "0:02:40", "throughput": 1904.75, "total_tokens": 8411824}
|
| 5015 |
+
{"current_steps": 24980, "total_steps": 25880, "loss": 0.2508, "lr": 3.6875780251178213e-06, "epoch": 19.304482225656876, "percentage": 96.52, "elapsed_time": "1:13:37", "remaining_time": "0:02:39", "throughput": 1904.77, "total_tokens": 8413584}
|
| 5016 |
+
{"current_steps": 24985, "total_steps": 25880, "loss": 0.3537, "lr": 3.646813659391901e-06, "epoch": 19.30834621329212, "percentage": 96.54, "elapsed_time": "1:13:37", "remaining_time": "0:02:38", "throughput": 1904.77, "total_tokens": 8415088}
|
| 5017 |
+
{"current_steps": 24990, "total_steps": 25880, "loss": 0.2991, "lr": 3.6062750380676525e-06, "epoch": 19.312210200927357, "percentage": 96.56, "elapsed_time": "1:13:38", "remaining_time": "0:02:37", "throughput": 1904.8, "total_tokens": 8416912}
|
| 5018 |
+
{"current_steps": 24995, "total_steps": 25880, "loss": 0.3076, "lr": 3.5659621795822716e-06, "epoch": 19.316074188562595, "percentage": 96.58, "elapsed_time": "1:13:39", "remaining_time": "0:02:36", "throughput": 1904.8, "total_tokens": 8418384}
|
| 5019 |
+
{"current_steps": 25000, "total_steps": 25880, "loss": 0.211, "lr": 3.5258751022703705e-06, "epoch": 19.319938176197837, "percentage": 96.6, "elapsed_time": "1:13:40", "remaining_time": "0:02:35", "throughput": 1904.82, "total_tokens": 8420112}
|
| 5020 |
+
{"current_steps": 25005, "total_steps": 25880, "loss": 0.193, "lr": 3.4860138243636985e-06, "epoch": 19.323802163833076, "percentage": 96.62, "elapsed_time": "1:13:41", "remaining_time": "0:02:34", "throughput": 1904.82, "total_tokens": 8421680}
|
| 5021 |
+
{"current_steps": 25010, "total_steps": 25880, "loss": 0.3145, "lr": 3.4463783639915314e-06, "epoch": 19.327666151468314, "percentage": 96.64, "elapsed_time": "1:13:42", "remaining_time": "0:02:33", "throughput": 1904.85, "total_tokens": 8423376}
|
| 5022 |
+
{"current_steps": 25015, "total_steps": 25880, "loss": 0.2569, "lr": 3.406968739180172e-06, "epoch": 19.331530139103556, "percentage": 96.66, "elapsed_time": "1:13:42", "remaining_time": "0:02:32", "throughput": 1904.83, "total_tokens": 8424816}
|
| 5023 |
+
{"current_steps": 25020, "total_steps": 25880, "loss": 0.3233, "lr": 3.3677849678533933e-06, "epoch": 19.335394126738795, "percentage": 96.68, "elapsed_time": "1:13:43", "remaining_time": "0:02:32", "throughput": 1904.87, "total_tokens": 8426640}
|
| 5024 |
+
{"current_steps": 25025, "total_steps": 25880, "loss": 0.2554, "lr": 3.328827067832274e-06, "epoch": 19.339258114374033, "percentage": 96.7, "elapsed_time": "1:13:44", "remaining_time": "0:02:31", "throughput": 1904.9, "total_tokens": 8428464}
|
| 5025 |
+
{"current_steps": 25030, "total_steps": 25880, "loss": 0.2241, "lr": 3.290095056834974e-06, "epoch": 19.343122102009275, "percentage": 96.72, "elapsed_time": "1:13:45", "remaining_time": "0:02:30", "throughput": 1904.9, "total_tokens": 8430032}
|
| 5026 |
+
{"current_steps": 25035, "total_steps": 25880, "loss": 0.2678, "lr": 3.2515889524771803e-06, "epoch": 19.346986089644513, "percentage": 96.73, "elapsed_time": "1:13:46", "remaining_time": "0:02:29", "throughput": 1904.89, "total_tokens": 8431632}
|
| 5027 |
+
{"current_steps": 25040, "total_steps": 25880, "loss": 0.2261, "lr": 3.213308772271606e-06, "epoch": 19.350850077279752, "percentage": 96.75, "elapsed_time": "1:13:47", "remaining_time": "0:02:28", "throughput": 1904.95, "total_tokens": 8433584}
|
| 5028 |
+
{"current_steps": 25045, "total_steps": 25880, "loss": 0.1831, "lr": 3.17525453362838e-06, "epoch": 19.354714064914994, "percentage": 96.77, "elapsed_time": "1:13:48", "remaining_time": "0:02:27", "throughput": 1904.96, "total_tokens": 8435152}
|
| 5029 |
+
{"current_steps": 25050, "total_steps": 25880, "loss": 0.3181, "lr": 3.137426253854714e-06, "epoch": 19.358578052550232, "percentage": 96.79, "elapsed_time": "1:13:48", "remaining_time": "0:02:26", "throughput": 1904.99, "total_tokens": 8436848}
|
| 5030 |
+
{"current_steps": 25055, "total_steps": 25880, "loss": 0.2128, "lr": 3.099823950155234e-06, "epoch": 19.36244204018547, "percentage": 96.81, "elapsed_time": "1:13:49", "remaining_time": "0:02:25", "throughput": 1905.01, "total_tokens": 8438704}
|
| 5031 |
+
{"current_steps": 25060, "total_steps": 25880, "loss": 0.2723, "lr": 3.0624476396315937e-06, "epoch": 19.366306027820713, "percentage": 96.83, "elapsed_time": "1:13:50", "remaining_time": "0:02:24", "throughput": 1905.04, "total_tokens": 8440368}
|
| 5032 |
+
{"current_steps": 25065, "total_steps": 25880, "loss": 0.2889, "lr": 3.0252973392828064e-06, "epoch": 19.37017001545595, "percentage": 96.85, "elapsed_time": "1:13:51", "remaining_time": "0:02:24", "throughput": 1905.07, "total_tokens": 8442160}
|
| 5033 |
+
{"current_steps": 25070, "total_steps": 25880, "loss": 0.2569, "lr": 2.9883730660050788e-06, "epoch": 19.37403400309119, "percentage": 96.87, "elapsed_time": "1:13:52", "remaining_time": "0:02:23", "throughput": 1905.1, "total_tokens": 8443920}
|
| 5034 |
+
{"current_steps": 25075, "total_steps": 25880, "loss": 0.2329, "lr": 2.9516748365917557e-06, "epoch": 19.377897990726428, "percentage": 96.89, "elapsed_time": "1:13:53", "remaining_time": "0:02:22", "throughput": 1905.09, "total_tokens": 8445328}
|
| 5035 |
+
{"current_steps": 25080, "total_steps": 25880, "loss": 0.2374, "lr": 2.91520266773343e-06, "epoch": 19.38176197836167, "percentage": 96.91, "elapsed_time": "1:13:53", "remaining_time": "0:02:21", "throughput": 1905.07, "total_tokens": 8446768}
|
| 5036 |
+
{"current_steps": 25085, "total_steps": 25880, "loss": 0.1862, "lr": 2.8789565760178884e-06, "epoch": 19.38562596599691, "percentage": 96.93, "elapsed_time": "1:13:54", "remaining_time": "0:02:20", "throughput": 1905.07, "total_tokens": 8448240}
|
| 5037 |
+
{"current_steps": 25090, "total_steps": 25880, "loss": 0.2371, "lr": 2.8429365779299998e-06, "epoch": 19.389489953632147, "percentage": 96.95, "elapsed_time": "1:13:55", "remaining_time": "0:02:19", "throughput": 1905.09, "total_tokens": 8449936}
|
| 5038 |
+
{"current_steps": 25095, "total_steps": 25880, "loss": 0.2064, "lr": 2.807142689851938e-06, "epoch": 19.39335394126739, "percentage": 96.97, "elapsed_time": "1:13:56", "remaining_time": "0:02:18", "throughput": 1905.11, "total_tokens": 8451664}
|
| 5039 |
+
{"current_steps": 25100, "total_steps": 25880, "loss": 0.2121, "lr": 2.771574928062903e-06, "epoch": 19.397217928902627, "percentage": 96.99, "elapsed_time": "1:13:57", "remaining_time": "0:02:17", "throughput": 1905.11, "total_tokens": 8453232}
|
| 5040 |
+
{"current_steps": 25105, "total_steps": 25880, "loss": 0.3435, "lr": 2.7362333087394e-06, "epoch": 19.401081916537866, "percentage": 97.01, "elapsed_time": "1:13:57", "remaining_time": "0:02:17", "throughput": 1905.17, "total_tokens": 8455120}
|
| 5041 |
+
{"current_steps": 25110, "total_steps": 25880, "loss": 0.2272, "lr": 2.7011178479550147e-06, "epoch": 19.404945904173108, "percentage": 97.02, "elapsed_time": "1:13:58", "remaining_time": "0:02:16", "throughput": 1905.18, "total_tokens": 8456656}
|
| 5042 |
+
{"current_steps": 25115, "total_steps": 25880, "loss": 0.3517, "lr": 2.6662285616803616e-06, "epoch": 19.408809891808346, "percentage": 97.04, "elapsed_time": "1:13:59", "remaining_time": "0:02:15", "throughput": 1905.17, "total_tokens": 8458192}
|
| 5043 |
+
{"current_steps": 25120, "total_steps": 25880, "loss": 0.3331, "lr": 2.63156546578347e-06, "epoch": 19.412673879443584, "percentage": 97.06, "elapsed_time": "1:14:00", "remaining_time": "0:02:14", "throughput": 1905.2, "total_tokens": 8459984}
|
| 5044 |
+
{"current_steps": 25125, "total_steps": 25880, "loss": 0.2177, "lr": 2.597128576029173e-06, "epoch": 19.416537867078826, "percentage": 97.08, "elapsed_time": "1:14:01", "remaining_time": "0:02:13", "throughput": 1905.19, "total_tokens": 8461424}
|
| 5045 |
+
{"current_steps": 25130, "total_steps": 25880, "loss": 0.2596, "lr": 2.5629179080796095e-06, "epoch": 19.420401854714065, "percentage": 97.1, "elapsed_time": "1:14:02", "remaining_time": "0:02:12", "throughput": 1905.2, "total_tokens": 8463024}
|
| 5046 |
+
{"current_steps": 25135, "total_steps": 25880, "loss": 0.2654, "lr": 2.5289334774940555e-06, "epoch": 19.424265842349303, "percentage": 97.12, "elapsed_time": "1:14:02", "remaining_time": "0:02:11", "throughput": 1905.21, "total_tokens": 8464720}
|
| 5047 |
+
{"current_steps": 25140, "total_steps": 25880, "loss": 0.357, "lr": 2.495175299728758e-06, "epoch": 19.428129829984545, "percentage": 97.14, "elapsed_time": "1:14:03", "remaining_time": "0:02:10", "throughput": 1905.21, "total_tokens": 8466192}
|
| 5048 |
+
{"current_steps": 25145, "total_steps": 25880, "loss": 0.2874, "lr": 2.4616433901372138e-06, "epoch": 19.431993817619784, "percentage": 97.16, "elapsed_time": "1:14:04", "remaining_time": "0:02:09", "throughput": 1905.23, "total_tokens": 8467888}
|
| 5049 |
+
{"current_steps": 25150, "total_steps": 25880, "loss": 0.3277, "lr": 2.42833776396989e-06, "epoch": 19.435857805255022, "percentage": 97.18, "elapsed_time": "1:14:05", "remaining_time": "0:02:09", "throughput": 1905.26, "total_tokens": 8469680}
|
| 5050 |
+
{"current_steps": 25155, "total_steps": 25880, "loss": 0.2421, "lr": 2.3952584363743366e-06, "epoch": 19.439721792890264, "percentage": 97.2, "elapsed_time": "1:14:06", "remaining_time": "0:02:08", "throughput": 1905.28, "total_tokens": 8471408}
|
| 5051 |
+
{"current_steps": 25160, "total_steps": 25880, "loss": 0.2987, "lr": 2.3624054223953526e-06, "epoch": 19.443585780525503, "percentage": 97.22, "elapsed_time": "1:14:07", "remaining_time": "0:02:07", "throughput": 1905.31, "total_tokens": 8473072}
|
| 5052 |
+
{"current_steps": 25165, "total_steps": 25880, "loss": 0.4216, "lr": 2.329778736974597e-06, "epoch": 19.44744976816074, "percentage": 97.24, "elapsed_time": "1:14:07", "remaining_time": "0:02:06", "throughput": 1905.35, "total_tokens": 8474832}
|
| 5053 |
+
{"current_steps": 25170, "total_steps": 25880, "loss": 0.1953, "lr": 2.2973783949508665e-06, "epoch": 19.451313755795983, "percentage": 97.26, "elapsed_time": "1:14:08", "remaining_time": "0:02:05", "throughput": 1905.38, "total_tokens": 8476528}
|
| 5054 |
+
{"current_steps": 25175, "total_steps": 25880, "loss": 0.253, "lr": 2.2652044110600957e-06, "epoch": 19.45517774343122, "percentage": 97.28, "elapsed_time": "1:14:09", "remaining_time": "0:02:04", "throughput": 1905.41, "total_tokens": 8478224}
|
| 5055 |
+
{"current_steps": 25180, "total_steps": 25880, "loss": 0.2652, "lr": 2.2332567999351904e-06, "epoch": 19.45904173106646, "percentage": 97.3, "elapsed_time": "1:14:10", "remaining_time": "0:02:03", "throughput": 1905.42, "total_tokens": 8479760}
|
| 5056 |
+
{"current_steps": 25185, "total_steps": 25880, "loss": 0.229, "lr": 2.201535576106084e-06, "epoch": 19.462905718701702, "percentage": 97.31, "elapsed_time": "1:14:11", "remaining_time": "0:02:02", "throughput": 1905.45, "total_tokens": 8481584}
|
| 5057 |
+
{"current_steps": 25190, "total_steps": 25880, "loss": 0.3058, "lr": 2.1700407539997914e-06, "epoch": 19.46676970633694, "percentage": 97.33, "elapsed_time": "1:14:12", "remaining_time": "0:02:01", "throughput": 1905.49, "total_tokens": 8483440}
|
| 5058 |
+
{"current_steps": 25195, "total_steps": 25880, "loss": 0.1929, "lr": 2.1387723479403543e-06, "epoch": 19.47063369397218, "percentage": 97.35, "elapsed_time": "1:14:12", "remaining_time": "0:02:01", "throughput": 1905.52, "total_tokens": 8485200}
|
| 5059 |
+
{"current_steps": 25200, "total_steps": 25880, "loss": 0.2361, "lr": 2.1077303721488417e-06, "epoch": 19.474497681607417, "percentage": 97.37, "elapsed_time": "1:14:13", "remaining_time": "0:02:00", "throughput": 1905.54, "total_tokens": 8486928}
|
| 5060 |
+
{"current_steps": 25205, "total_steps": 25880, "loss": 0.2179, "lr": 2.0769148407432934e-06, "epoch": 19.47836166924266, "percentage": 97.39, "elapsed_time": "1:14:14", "remaining_time": "0:01:59", "throughput": 1905.56, "total_tokens": 8488592}
|
| 5061 |
+
{"current_steps": 25210, "total_steps": 25880, "loss": 0.2366, "lr": 2.0463257677388323e-06, "epoch": 19.482225656877898, "percentage": 97.41, "elapsed_time": "1:14:15", "remaining_time": "0:01:58", "throughput": 1905.55, "total_tokens": 8490064}
|
| 5062 |
+
{"current_steps": 25215, "total_steps": 25880, "loss": 0.2245, "lr": 2.015963167047552e-06, "epoch": 19.486089644513136, "percentage": 97.43, "elapsed_time": "1:14:16", "remaining_time": "0:01:57", "throughput": 1905.54, "total_tokens": 8491632}
|
| 5063 |
+
{"current_steps": 25220, "total_steps": 25880, "loss": 0.31, "lr": 1.9858270524785173e-06, "epoch": 19.489953632148378, "percentage": 97.45, "elapsed_time": "1:14:17", "remaining_time": "0:01:56", "throughput": 1905.56, "total_tokens": 8493360}
|
| 5064 |
+
{"current_steps": 25225, "total_steps": 25880, "loss": 0.2114, "lr": 1.9559174377378197e-06, "epoch": 19.493817619783616, "percentage": 97.47, "elapsed_time": "1:14:17", "remaining_time": "0:01:55", "throughput": 1905.59, "total_tokens": 8495088}
|
| 5065 |
+
{"current_steps": 25230, "total_steps": 25880, "loss": 0.3529, "lr": 1.9262343364285783e-06, "epoch": 19.497681607418855, "percentage": 97.49, "elapsed_time": "1:14:18", "remaining_time": "0:01:54", "throughput": 1905.59, "total_tokens": 8496592}
|
| 5066 |
+
{"current_steps": 25235, "total_steps": 25880, "loss": 0.2732, "lr": 1.8967777620507165e-06, "epoch": 19.501545595054097, "percentage": 97.51, "elapsed_time": "1:14:19", "remaining_time": "0:01:53", "throughput": 1905.61, "total_tokens": 8498288}
|
| 5067 |
+
{"current_steps": 25240, "total_steps": 25880, "loss": 0.2658, "lr": 1.8675477280014063e-06, "epoch": 19.505409582689335, "percentage": 97.53, "elapsed_time": "1:14:20", "remaining_time": "0:01:53", "throughput": 1905.63, "total_tokens": 8500016}
|
| 5068 |
+
{"current_steps": 25245, "total_steps": 25880, "loss": 0.2651, "lr": 1.838544247574625e-06, "epoch": 19.509273570324574, "percentage": 97.55, "elapsed_time": "1:14:21", "remaining_time": "0:01:52", "throughput": 1905.67, "total_tokens": 8501872}
|
| 5069 |
+
{"current_steps": 25250, "total_steps": 25880, "loss": 0.2371, "lr": 1.8097673339612653e-06, "epoch": 19.513137557959816, "percentage": 97.57, "elapsed_time": "1:14:22", "remaining_time": "0:01:51", "throughput": 1905.65, "total_tokens": 8503344}
|
| 5070 |
+
{"current_steps": 25255, "total_steps": 25880, "loss": 0.338, "lr": 1.7812170002492467e-06, "epoch": 19.517001545595054, "percentage": 97.59, "elapsed_time": "1:14:23", "remaining_time": "0:01:50", "throughput": 1905.67, "total_tokens": 8505104}
|
| 5071 |
+
{"current_steps": 25260, "total_steps": 25880, "loss": 0.2754, "lr": 1.7528932594234603e-06, "epoch": 19.520865533230292, "percentage": 97.6, "elapsed_time": "1:14:23", "remaining_time": "0:01:49", "throughput": 1905.69, "total_tokens": 8506768}
|
| 5072 |
+
{"current_steps": 25265, "total_steps": 25880, "loss": 0.2607, "lr": 1.7247961243657684e-06, "epoch": 19.524729520865534, "percentage": 97.62, "elapsed_time": "1:14:24", "remaining_time": "0:01:48", "throughput": 1905.69, "total_tokens": 8508368}
|
| 5073 |
+
{"current_steps": 25270, "total_steps": 25880, "loss": 0.2354, "lr": 1.6969256078547823e-06, "epoch": 19.528593508500773, "percentage": 97.64, "elapsed_time": "1:14:25", "remaining_time": "0:01:47", "throughput": 1905.72, "total_tokens": 8510096}
|
| 5074 |
+
{"current_steps": 25275, "total_steps": 25880, "loss": 0.2131, "lr": 1.669281722566196e-06, "epoch": 19.53245749613601, "percentage": 97.66, "elapsed_time": "1:14:26", "remaining_time": "0:01:46", "throughput": 1905.73, "total_tokens": 8511600}
|
| 5075 |
+
{"current_steps": 25280, "total_steps": 25880, "loss": 0.2601, "lr": 1.6418644810727857e-06, "epoch": 19.536321483771253, "percentage": 97.68, "elapsed_time": "1:14:27", "remaining_time": "0:01:46", "throughput": 1905.72, "total_tokens": 8513072}
|
| 5076 |
+
{"current_steps": 25285, "total_steps": 25880, "loss": 0.2577, "lr": 1.61467389584391e-06, "epoch": 19.54018547140649, "percentage": 97.7, "elapsed_time": "1:14:27", "remaining_time": "0:01:45", "throughput": 1905.77, "total_tokens": 8514960}
|
| 5077 |
+
{"current_steps": 25290, "total_steps": 25880, "loss": 0.2586, "lr": 1.5877099792460658e-06, "epoch": 19.54404945904173, "percentage": 97.72, "elapsed_time": "1:14:28", "remaining_time": "0:01:44", "throughput": 1905.8, "total_tokens": 8516784}
|
| 5078 |
+
{"current_steps": 25295, "total_steps": 25880, "loss": 0.2115, "lr": 1.5609727435425548e-06, "epoch": 19.547913446676972, "percentage": 97.74, "elapsed_time": "1:14:29", "remaining_time": "0:01:43", "throughput": 1905.8, "total_tokens": 8518256}
|
| 5079 |
+
{"current_steps": 25300, "total_steps": 25880, "loss": 0.2587, "lr": 1.5344622008937603e-06, "epoch": 19.55177743431221, "percentage": 97.76, "elapsed_time": "1:14:30", "remaining_time": "0:01:42", "throughput": 1905.82, "total_tokens": 8519952}
|
| 5080 |
+
{"current_steps": 25305, "total_steps": 25880, "loss": 0.2449, "lr": 1.5081783633566494e-06, "epoch": 19.55564142194745, "percentage": 97.78, "elapsed_time": "1:14:31", "remaining_time": "0:01:41", "throughput": 1905.86, "total_tokens": 8521680}
|
| 5081 |
+
{"current_steps": 25310, "total_steps": 25880, "loss": 0.2081, "lr": 1.482121242885437e-06, "epoch": 19.55950540958269, "percentage": 97.8, "elapsed_time": "1:14:32", "remaining_time": "0:01:40", "throughput": 1905.86, "total_tokens": 8523280}
|
| 5082 |
+
{"current_steps": 25315, "total_steps": 25880, "loss": 0.1849, "lr": 1.456290851330977e-06, "epoch": 19.56336939721793, "percentage": 97.82, "elapsed_time": "1:14:32", "remaining_time": "0:01:39", "throughput": 1905.84, "total_tokens": 8524656}
|
| 5083 |
+
{"current_steps": 25320, "total_steps": 25880, "loss": 0.2235, "lr": 1.4306872004410942e-06, "epoch": 19.567233384853168, "percentage": 97.84, "elapsed_time": "1:14:33", "remaining_time": "0:01:38", "throughput": 1905.87, "total_tokens": 8526384}
|
| 5084 |
+
{"current_steps": 25325, "total_steps": 25880, "loss": 0.1987, "lr": 1.405310301860474e-06, "epoch": 19.57109737248841, "percentage": 97.86, "elapsed_time": "1:14:34", "remaining_time": "0:01:38", "throughput": 1905.88, "total_tokens": 8527984}
|
| 5085 |
+
{"current_steps": 25330, "total_steps": 25880, "loss": 0.1912, "lr": 1.3801601671307173e-06, "epoch": 19.57496136012365, "percentage": 97.87, "elapsed_time": "1:14:35", "remaining_time": "0:01:37", "throughput": 1905.89, "total_tokens": 8529552}
|
| 5086 |
+
{"current_steps": 25335, "total_steps": 25880, "loss": 0.2569, "lr": 1.35523680769023e-06, "epoch": 19.578825347758887, "percentage": 97.89, "elapsed_time": "1:14:36", "remaining_time": "0:01:36", "throughput": 1905.93, "total_tokens": 8531440}
|
| 5087 |
+
{"current_steps": 25340, "total_steps": 25880, "loss": 0.3012, "lr": 1.3305402348742778e-06, "epoch": 19.582689335394125, "percentage": 97.91, "elapsed_time": "1:14:37", "remaining_time": "0:01:35", "throughput": 1905.94, "total_tokens": 8533104}
|
| 5088 |
+
{"current_steps": 25345, "total_steps": 25880, "loss": 0.2643, "lr": 1.306070459915043e-06, "epoch": 19.586553323029367, "percentage": 97.93, "elapsed_time": "1:14:37", "remaining_time": "0:01:34", "throughput": 1905.97, "total_tokens": 8534896}
|
| 5089 |
+
{"current_steps": 25350, "total_steps": 25880, "loss": 0.2512, "lr": 1.2818274939415676e-06, "epoch": 19.590417310664606, "percentage": 97.95, "elapsed_time": "1:14:38", "remaining_time": "0:01:33", "throughput": 1905.97, "total_tokens": 8536464}
|
| 5090 |
+
{"current_steps": 25355, "total_steps": 25880, "loss": 0.2426, "lr": 1.2578113479796428e-06, "epoch": 19.594281298299844, "percentage": 97.97, "elapsed_time": "1:14:39", "remaining_time": "0:01:32", "throughput": 1905.97, "total_tokens": 8538032}
|
| 5091 |
+
{"current_steps": 25360, "total_steps": 25880, "loss": 0.2209, "lr": 1.2340220329519757e-06, "epoch": 19.598145285935086, "percentage": 97.99, "elapsed_time": "1:14:40", "remaining_time": "0:01:31", "throughput": 1906.03, "total_tokens": 8540016}
|
| 5092 |
+
{"current_steps": 25365, "total_steps": 25880, "loss": 0.2797, "lr": 1.2104595596780232e-06, "epoch": 19.602009273570324, "percentage": 98.01, "elapsed_time": "1:14:41", "remaining_time": "0:01:30", "throughput": 1906.08, "total_tokens": 8541968}
|
| 5093 |
+
{"current_steps": 25370, "total_steps": 25880, "loss": 0.3018, "lr": 1.1871239388742683e-06, "epoch": 19.605873261205563, "percentage": 98.03, "elapsed_time": "1:14:42", "remaining_time": "0:01:30", "throughput": 1906.11, "total_tokens": 8543696}
|
| 5094 |
+
{"current_steps": 25375, "total_steps": 25880, "loss": 0.2138, "lr": 1.1640151811537768e-06, "epoch": 19.609737248840805, "percentage": 98.05, "elapsed_time": "1:14:43", "remaining_time": "0:01:29", "throughput": 1906.12, "total_tokens": 8545456}
|
| 5095 |
+
{"current_steps": 25380, "total_steps": 25880, "loss": 0.2372, "lr": 1.1411332970265864e-06, "epoch": 19.613601236476043, "percentage": 98.07, "elapsed_time": "1:14:43", "remaining_time": "0:01:28", "throughput": 1906.12, "total_tokens": 8546960}
|
| 5096 |
+
{"current_steps": 25385, "total_steps": 25880, "loss": 0.2401, "lr": 1.1184782968995388e-06, "epoch": 19.61746522411128, "percentage": 98.09, "elapsed_time": "1:14:44", "remaining_time": "0:01:27", "throughput": 1906.11, "total_tokens": 8548368}
|
| 5097 |
+
{"current_steps": 25390, "total_steps": 25880, "loss": 0.2438, "lr": 1.0960501910762256e-06, "epoch": 19.621329211746524, "percentage": 98.11, "elapsed_time": "1:14:45", "remaining_time": "0:01:26", "throughput": 1906.14, "total_tokens": 8550128}
|
| 5098 |
+
{"current_steps": 25395, "total_steps": 25880, "loss": 0.296, "lr": 1.0738489897571536e-06, "epoch": 19.625193199381762, "percentage": 98.13, "elapsed_time": "1:14:46", "remaining_time": "0:01:25", "throughput": 1906.13, "total_tokens": 8551600}
|
| 5099 |
+
{"current_steps": 25400, "total_steps": 25880, "loss": 0.2869, "lr": 1.0518747030394682e-06, "epoch": 19.629057187017, "percentage": 98.15, "elapsed_time": "1:14:47", "remaining_time": "0:01:24", "throughput": 1906.16, "total_tokens": 8553232}
|
| 5100 |
+
{"current_steps": 25405, "total_steps": 25880, "loss": 0.2646, "lr": 1.0301273409172862e-06, "epoch": 19.632921174652243, "percentage": 98.16, "elapsed_time": "1:14:47", "remaining_time": "0:01:23", "throughput": 1906.16, "total_tokens": 8554768}
|
| 5101 |
+
{"current_steps": 25410, "total_steps": 25880, "loss": 0.3, "lr": 1.0086069132813625e-06, "epoch": 19.63678516228748, "percentage": 98.18, "elapsed_time": "1:14:48", "remaining_time": "0:01:23", "throughput": 1906.14, "total_tokens": 8556176}
|
| 5102 |
+
{"current_steps": 25415, "total_steps": 25880, "loss": 0.2662, "lr": 9.873134299193676e-07, "epoch": 19.64064914992272, "percentage": 98.2, "elapsed_time": "1:14:49", "remaining_time": "0:01:22", "throughput": 1906.16, "total_tokens": 8557872}
|
| 5103 |
+
{"current_steps": 25420, "total_steps": 25880, "loss": 0.2017, "lr": 9.662469005157216e-07, "epoch": 19.64451313755796, "percentage": 98.22, "elapsed_time": "1:14:50", "remaining_time": "0:01:21", "throughput": 1906.21, "total_tokens": 8559760}
|
| 5104 |
+
{"current_steps": 25425, "total_steps": 25880, "loss": 0.2428, "lr": 9.45407334651538e-07, "epoch": 19.6483771251932, "percentage": 98.24, "elapsed_time": "1:14:51", "remaining_time": "0:01:20", "throughput": 1906.21, "total_tokens": 8561296}
|
| 5105 |
+
{"current_steps": 25430, "total_steps": 25880, "loss": 0.3148, "lr": 9.247947418048464e-07, "epoch": 19.652241112828438, "percentage": 98.26, "elapsed_time": "1:14:52", "remaining_time": "0:01:19", "throughput": 1906.27, "total_tokens": 8563376}
|
| 5106 |
+
{"current_steps": 25435, "total_steps": 25880, "loss": 0.3044, "lr": 9.044091313503145e-07, "epoch": 19.65610510046368, "percentage": 98.28, "elapsed_time": "1:14:53", "remaining_time": "0:01:18", "throughput": 1906.28, "total_tokens": 8565040}
|
| 5107 |
+
{"current_steps": 25440, "total_steps": 25880, "loss": 0.2553, "lr": 8.842505125595257e-07, "epoch": 19.65996908809892, "percentage": 98.3, "elapsed_time": "1:14:53", "remaining_time": "0:01:17", "throughput": 1906.29, "total_tokens": 8566608}
|
| 5108 |
+
{"current_steps": 25445, "total_steps": 25880, "loss": 0.2234, "lr": 8.643188946006464e-07, "epoch": 19.663833075734157, "percentage": 98.32, "elapsed_time": "1:14:54", "remaining_time": "0:01:16", "throughput": 1906.33, "total_tokens": 8568400}
|
| 5109 |
+
{"current_steps": 25450, "total_steps": 25880, "loss": 0.2635, "lr": 8.44614286538814e-07, "epoch": 19.667697063369395, "percentage": 98.34, "elapsed_time": "1:14:55", "remaining_time": "0:01:15", "throughput": 1906.36, "total_tokens": 8570160}
|
| 5110 |
+
{"current_steps": 25455, "total_steps": 25880, "loss": 0.2147, "lr": 8.251366973356378e-07, "epoch": 19.671561051004637, "percentage": 98.36, "elapsed_time": "1:14:56", "remaining_time": "0:01:15", "throughput": 1906.38, "total_tokens": 8571824}
|
| 5111 |
+
{"current_steps": 25460, "total_steps": 25880, "loss": 0.3175, "lr": 8.058861358498093e-07, "epoch": 19.675425038639876, "percentage": 98.38, "elapsed_time": "1:14:57", "remaining_time": "0:01:14", "throughput": 1906.4, "total_tokens": 8573520}
|
| 5112 |
+
{"current_steps": 25465, "total_steps": 25880, "loss": 0.2109, "lr": 7.868626108364918e-07, "epoch": 19.679289026275114, "percentage": 98.4, "elapsed_time": "1:14:58", "remaining_time": "0:01:13", "throughput": 1906.43, "total_tokens": 8575184}
|
| 5113 |
+
{"current_steps": 25470, "total_steps": 25880, "loss": 0.3219, "lr": 7.680661309477088e-07, "epoch": 19.683153013910356, "percentage": 98.42, "elapsed_time": "1:14:58", "remaining_time": "0:01:12", "throughput": 1906.45, "total_tokens": 8576976}
|
| 5114 |
+
{"current_steps": 25475, "total_steps": 25880, "loss": 0.2874, "lr": 7.494967047322332e-07, "epoch": 19.687017001545595, "percentage": 98.44, "elapsed_time": "1:14:59", "remaining_time": "0:01:11", "throughput": 1906.47, "total_tokens": 8578704}
|
| 5115 |
+
{"current_steps": 25480, "total_steps": 25880, "loss": 0.3256, "lr": 7.311543406355869e-07, "epoch": 19.690880989180833, "percentage": 98.45, "elapsed_time": "1:15:00", "remaining_time": "0:01:10", "throughput": 1906.48, "total_tokens": 8580272}
|
| 5116 |
+
{"current_steps": 25485, "total_steps": 25880, "loss": 0.2296, "lr": 7.130390469999304e-07, "epoch": 19.694744976816075, "percentage": 98.47, "elapsed_time": "1:15:01", "remaining_time": "0:01:09", "throughput": 1906.49, "total_tokens": 8581872}
|
| 5117 |
+
{"current_steps": 25490, "total_steps": 25880, "loss": 0.2513, "lr": 6.951508320642286e-07, "epoch": 19.698608964451314, "percentage": 98.49, "elapsed_time": "1:15:02", "remaining_time": "0:01:08", "throughput": 1906.46, "total_tokens": 8583248}
|
| 5118 |
+
{"current_steps": 25495, "total_steps": 25880, "loss": 0.2879, "lr": 6.774897039641403e-07, "epoch": 19.702472952086552, "percentage": 98.51, "elapsed_time": "1:15:02", "remaining_time": "0:01:07", "throughput": 1906.45, "total_tokens": 8584688}
|
| 5119 |
+
{"current_steps": 25500, "total_steps": 25880, "loss": 0.2178, "lr": 6.600556707320737e-07, "epoch": 19.706336939721794, "percentage": 98.53, "elapsed_time": "1:15:03", "remaining_time": "0:01:07", "throughput": 1906.45, "total_tokens": 8586256}
|
| 5120 |
+
{"current_steps": 25505, "total_steps": 25880, "loss": 0.2962, "lr": 6.428487402971306e-07, "epoch": 19.710200927357032, "percentage": 98.55, "elapsed_time": "1:15:04", "remaining_time": "0:01:06", "throughput": 1906.46, "total_tokens": 8587920}
|
| 5121 |
+
{"current_steps": 25510, "total_steps": 25880, "loss": 0.2556, "lr": 6.258689204850509e-07, "epoch": 19.71406491499227, "percentage": 98.57, "elapsed_time": "1:15:05", "remaining_time": "0:01:05", "throughput": 1906.48, "total_tokens": 8589520}
|
| 5122 |
+
{"current_steps": 25515, "total_steps": 25880, "loss": 0.2169, "lr": 6.091162190184907e-07, "epoch": 19.717928902627513, "percentage": 98.59, "elapsed_time": "1:15:06", "remaining_time": "0:01:04", "throughput": 1906.5, "total_tokens": 8591216}
|
| 5123 |
+
{"current_steps": 25520, "total_steps": 25880, "loss": 0.1746, "lr": 5.925906435165773e-07, "epoch": 19.72179289026275, "percentage": 98.61, "elapsed_time": "1:15:07", "remaining_time": "0:01:03", "throughput": 1906.54, "total_tokens": 8593104}
|
| 5124 |
+
{"current_steps": 25525, "total_steps": 25880, "loss": 0.2531, "lr": 5.762922014952432e-07, "epoch": 19.72565687789799, "percentage": 98.63, "elapsed_time": "1:15:07", "remaining_time": "0:01:02", "throughput": 1906.55, "total_tokens": 8594704}
|
| 5125 |
+
{"current_steps": 25530, "total_steps": 25880, "loss": 0.2787, "lr": 5.602209003672254e-07, "epoch": 19.72952086553323, "percentage": 98.65, "elapsed_time": "1:15:08", "remaining_time": "0:01:01", "throughput": 1906.56, "total_tokens": 8596336}
|
| 5126 |
+
{"current_steps": 25535, "total_steps": 25880, "loss": 0.2595, "lr": 5.443767474417327e-07, "epoch": 19.73338485316847, "percentage": 98.67, "elapsed_time": "1:15:09", "remaining_time": "0:01:00", "throughput": 1906.56, "total_tokens": 8597840}
|
| 5127 |
+
{"current_steps": 25540, "total_steps": 25880, "loss": 0.2639, "lr": 5.287597499247787e-07, "epoch": 19.73724884080371, "percentage": 98.69, "elapsed_time": "1:15:10", "remaining_time": "0:01:00", "throughput": 1906.59, "total_tokens": 8599536}
|
| 5128 |
+
{"current_steps": 25545, "total_steps": 25880, "loss": 0.2118, "lr": 5.133699149191262e-07, "epoch": 19.74111282843895, "percentage": 98.71, "elapsed_time": "1:15:11", "remaining_time": "0:00:59", "throughput": 1906.62, "total_tokens": 8601296}
|
| 5129 |
+
{"current_steps": 25550, "total_steps": 25880, "loss": 0.3445, "lr": 4.982072494241763e-07, "epoch": 19.74497681607419, "percentage": 98.72, "elapsed_time": "1:15:12", "remaining_time": "0:00:58", "throughput": 1906.63, "total_tokens": 8602960}
|
| 5130 |
+
{"current_steps": 25555, "total_steps": 25880, "loss": 0.1912, "lr": 4.832717603359127e-07, "epoch": 19.748840803709427, "percentage": 98.74, "elapsed_time": "1:15:12", "remaining_time": "0:00:57", "throughput": 1906.61, "total_tokens": 8604368}
|
| 5131 |
+
{"current_steps": 25560, "total_steps": 25880, "loss": 0.2919, "lr": 4.685634544471795e-07, "epoch": 19.75270479134467, "percentage": 98.76, "elapsed_time": "1:15:13", "remaining_time": "0:00:56", "throughput": 1906.61, "total_tokens": 8605904}
|
| 5132 |
+
{"current_steps": 25565, "total_steps": 25880, "loss": 0.2896, "lr": 4.5408233844740355e-07, "epoch": 19.756568778979908, "percentage": 98.78, "elapsed_time": "1:15:14", "remaining_time": "0:00:55", "throughput": 1906.64, "total_tokens": 8607632}
|
| 5133 |
+
{"current_steps": 25570, "total_steps": 25880, "loss": 0.236, "lr": 4.398284189225943e-07, "epoch": 19.760432766615146, "percentage": 98.8, "elapsed_time": "1:15:15", "remaining_time": "0:00:54", "throughput": 1906.65, "total_tokens": 8609200}
|
| 5134 |
+
{"current_steps": 25575, "total_steps": 25880, "loss": 0.2684, "lr": 4.2580170235556606e-07, "epoch": 19.764296754250385, "percentage": 98.82, "elapsed_time": "1:15:16", "remaining_time": "0:00:53", "throughput": 1906.66, "total_tokens": 8610960}
|
| 5135 |
+
{"current_steps": 25580, "total_steps": 25880, "loss": 0.2947, "lr": 4.120021951257713e-07, "epoch": 19.768160741885627, "percentage": 98.84, "elapsed_time": "1:15:17", "remaining_time": "0:00:52", "throughput": 1906.71, "total_tokens": 8612848}
|
| 5136 |
+
{"current_steps": 25585, "total_steps": 25880, "loss": 0.2606, "lr": 3.9842990350924533e-07, "epoch": 19.772024729520865, "percentage": 98.86, "elapsed_time": "1:15:17", "remaining_time": "0:00:52", "throughput": 1906.71, "total_tokens": 8614352}
|
| 5137 |
+
{"current_steps": 25590, "total_steps": 25880, "loss": 0.2145, "lr": 3.850848336788282e-07, "epoch": 19.775888717156104, "percentage": 98.88, "elapsed_time": "1:15:18", "remaining_time": "0:00:51", "throughput": 1906.76, "total_tokens": 8616272}
|
| 5138 |
+
{"current_steps": 25595, "total_steps": 25880, "loss": 0.2581, "lr": 3.7196699170394256e-07, "epoch": 19.779752704791346, "percentage": 98.9, "elapsed_time": "1:15:19", "remaining_time": "0:00:50", "throughput": 1906.78, "total_tokens": 8618032}
|
| 5139 |
+
{"current_steps": 25600, "total_steps": 25880, "loss": 0.2338, "lr": 3.5907638355059393e-07, "epoch": 19.783616692426584, "percentage": 98.92, "elapsed_time": "1:15:20", "remaining_time": "0:00:49", "throughput": 1906.77, "total_tokens": 8619504}
|
| 5140 |
+
{"current_steps": 25605, "total_steps": 25880, "loss": 0.2247, "lr": 3.46413015081537e-07, "epoch": 19.787480680061822, "percentage": 98.94, "elapsed_time": "1:15:21", "remaining_time": "0:00:48", "throughput": 1906.77, "total_tokens": 8620976}
|
| 5141 |
+
{"current_steps": 25610, "total_steps": 25880, "loss": 0.3114, "lr": 3.339768920561648e-07, "epoch": 19.791344667697064, "percentage": 98.96, "elapsed_time": "1:15:22", "remaining_time": "0:00:47", "throughput": 1906.76, "total_tokens": 8622448}
|
| 5142 |
+
{"current_steps": 25615, "total_steps": 25880, "loss": 0.2078, "lr": 3.2176802013045294e-07, "epoch": 19.795208655332303, "percentage": 98.98, "elapsed_time": "1:15:22", "remaining_time": "0:00:46", "throughput": 1906.75, "total_tokens": 8623856}
|
| 5143 |
+
{"current_steps": 25620, "total_steps": 25880, "loss": 0.2255, "lr": 3.097864048571264e-07, "epoch": 19.79907264296754, "percentage": 99.0, "elapsed_time": "1:15:23", "remaining_time": "0:00:45", "throughput": 1906.76, "total_tokens": 8625456}
|
| 5144 |
+
{"current_steps": 25625, "total_steps": 25880, "loss": 0.3276, "lr": 2.9803205168543733e-07, "epoch": 19.802936630602783, "percentage": 99.01, "elapsed_time": "1:15:24", "remaining_time": "0:00:45", "throughput": 1906.8, "total_tokens": 8627408}
|
| 5145 |
+
{"current_steps": 25630, "total_steps": 25880, "loss": 0.2197, "lr": 2.865049659613872e-07, "epoch": 19.80680061823802, "percentage": 99.03, "elapsed_time": "1:15:25", "remaining_time": "0:00:44", "throughput": 1906.8, "total_tokens": 8628944}
|
| 5146 |
+
{"current_steps": 25635, "total_steps": 25880, "loss": 0.2383, "lr": 2.752051529275601e-07, "epoch": 19.81066460587326, "percentage": 99.05, "elapsed_time": "1:15:26", "remaining_time": "0:00:43", "throughput": 1906.82, "total_tokens": 8630672}
|
| 5147 |
+
{"current_steps": 25640, "total_steps": 25880, "loss": 0.3242, "lr": 2.641326177231784e-07, "epoch": 19.814528593508502, "percentage": 99.07, "elapsed_time": "1:15:27", "remaining_time": "0:00:42", "throughput": 1906.84, "total_tokens": 8632336}
|
| 5148 |
+
{"current_steps": 25645, "total_steps": 25880, "loss": 0.2273, "lr": 2.5328736538404726e-07, "epoch": 19.81839258114374, "percentage": 99.09, "elapsed_time": "1:15:27", "remaining_time": "0:00:41", "throughput": 1906.84, "total_tokens": 8633840}
|
| 5149 |
+
{"current_steps": 25650, "total_steps": 25880, "loss": 0.2253, "lr": 2.4266940084272106e-07, "epoch": 19.82225656877898, "percentage": 99.11, "elapsed_time": "1:15:28", "remaining_time": "0:00:40", "throughput": 1906.9, "total_tokens": 8635792}
|
| 5150 |
+
{"current_steps": 25655, "total_steps": 25880, "loss": 0.2686, "lr": 2.3227872892822577e-07, "epoch": 19.82612055641422, "percentage": 99.13, "elapsed_time": "1:15:29", "remaining_time": "0:00:39", "throughput": 1906.97, "total_tokens": 8637904}
|
| 5151 |
+
{"current_steps": 25660, "total_steps": 25880, "loss": 0.2238, "lr": 2.221153543663923e-07, "epoch": 19.82998454404946, "percentage": 99.15, "elapsed_time": "1:15:30", "remaining_time": "0:00:38", "throughput": 1907.0, "total_tokens": 8639696}
|
| 5152 |
+
{"current_steps": 25665, "total_steps": 25880, "loss": 0.2412, "lr": 2.1217928177957868e-07, "epoch": 19.833848531684698, "percentage": 99.17, "elapsed_time": "1:15:31", "remaining_time": "0:00:37", "throughput": 1907.01, "total_tokens": 8641360}
|
| 5153 |
+
{"current_steps": 25670, "total_steps": 25880, "loss": 0.2869, "lr": 2.0247051568667018e-07, "epoch": 19.83771251931994, "percentage": 99.19, "elapsed_time": "1:15:32", "remaining_time": "0:00:37", "throughput": 1907.04, "total_tokens": 8643216}
|
| 5154 |
+
{"current_steps": 25675, "total_steps": 25880, "loss": 0.3423, "lr": 1.9298906050341237e-07, "epoch": 19.841576506955178, "percentage": 99.21, "elapsed_time": "1:15:33", "remaining_time": "0:00:36", "throughput": 1907.05, "total_tokens": 8644816}
|
| 5155 |
+
{"current_steps": 25680, "total_steps": 25880, "loss": 0.35, "lr": 1.8373492054191143e-07, "epoch": 19.845440494590417, "percentage": 99.23, "elapsed_time": "1:15:33", "remaining_time": "0:00:35", "throughput": 1907.05, "total_tokens": 8646352}
|
| 5156 |
+
{"current_steps": 25685, "total_steps": 25880, "loss": 0.2459, "lr": 1.747081000110784e-07, "epoch": 19.84930448222566, "percentage": 99.25, "elapsed_time": "1:15:34", "remaining_time": "0:00:34", "throughput": 1907.06, "total_tokens": 8648048}
|
| 5157 |
+
{"current_steps": 25690, "total_steps": 25880, "loss": 0.233, "lr": 1.6590860301629596e-07, "epoch": 19.853168469860897, "percentage": 99.27, "elapsed_time": "1:15:35", "remaining_time": "0:00:33", "throughput": 1907.07, "total_tokens": 8649584}
|
| 5158 |
+
{"current_steps": 25695, "total_steps": 25880, "loss": 0.2723, "lr": 1.5733643355969606e-07, "epoch": 19.857032457496135, "percentage": 99.29, "elapsed_time": "1:15:36", "remaining_time": "0:00:32", "throughput": 1907.07, "total_tokens": 8651152}
|
| 5159 |
+
{"current_steps": 25700, "total_steps": 25880, "loss": 0.2473, "lr": 1.489915955399379e-07, "epoch": 19.860896445131374, "percentage": 99.3, "elapsed_time": "1:15:37", "remaining_time": "0:00:31", "throughput": 1907.06, "total_tokens": 8652560}
|
| 5160 |
+
{"current_steps": 25705, "total_steps": 25880, "loss": 0.3147, "lr": 1.4087409275226336e-07, "epoch": 19.864760432766616, "percentage": 99.32, "elapsed_time": "1:15:38", "remaining_time": "0:00:30", "throughput": 1907.1, "total_tokens": 8654448}
|
| 5161 |
+
{"current_steps": 25710, "total_steps": 25880, "loss": 0.2636, "lr": 1.329839288886081e-07, "epoch": 19.868624420401854, "percentage": 99.34, "elapsed_time": "1:15:38", "remaining_time": "0:00:30", "throughput": 1907.12, "total_tokens": 8656144}
|
| 5162 |
+
{"current_steps": 25715, "total_steps": 25880, "loss": 0.228, "lr": 1.2532110753743497e-07, "epoch": 19.872488408037093, "percentage": 99.36, "elapsed_time": "1:15:39", "remaining_time": "0:00:29", "throughput": 1907.12, "total_tokens": 8657776}
|
| 5163 |
+
{"current_steps": 25720, "total_steps": 25880, "loss": 0.2332, "lr": 1.1788563218390058e-07, "epoch": 19.876352395672335, "percentage": 99.38, "elapsed_time": "1:15:40", "remaining_time": "0:00:28", "throughput": 1907.14, "total_tokens": 8659472}
|
| 5164 |
+
{"current_steps": 25725, "total_steps": 25880, "loss": 0.2738, "lr": 1.1067750620957772e-07, "epoch": 19.880216383307573, "percentage": 99.4, "elapsed_time": "1:15:41", "remaining_time": "0:00:27", "throughput": 1907.15, "total_tokens": 8661136}
|
| 5165 |
+
{"current_steps": 25730, "total_steps": 25880, "loss": 0.2732, "lr": 1.0369673289289949e-07, "epoch": 19.88408037094281, "percentage": 99.42, "elapsed_time": "1:15:42", "remaining_time": "0:00:26", "throughput": 1907.17, "total_tokens": 8662800}
|
| 5166 |
+
{"current_steps": 25735, "total_steps": 25880, "loss": 0.2265, "lr": 9.694331540871514e-08, "epoch": 19.887944358578054, "percentage": 99.44, "elapsed_time": "1:15:43", "remaining_time": "0:00:25", "throughput": 1907.2, "total_tokens": 8664592}
|
| 5167 |
+
{"current_steps": 25740, "total_steps": 25880, "loss": 0.2897, "lr": 9.041725682851221e-08, "epoch": 19.891808346213292, "percentage": 99.46, "elapsed_time": "1:15:43", "remaining_time": "0:00:24", "throughput": 1907.24, "total_tokens": 8666480}
|
| 5168 |
+
{"current_steps": 25745, "total_steps": 25880, "loss": 0.2704, "lr": 8.41185601203609e-08, "epoch": 19.89567233384853, "percentage": 99.48, "elapsed_time": "1:15:44", "remaining_time": "0:00:23", "throughput": 1907.25, "total_tokens": 8668112}
|
| 5169 |
+
{"current_steps": 25750, "total_steps": 25880, "loss": 0.2464, "lr": 7.80472281489697e-08, "epoch": 19.899536321483772, "percentage": 99.5, "elapsed_time": "1:15:45", "remaining_time": "0:00:22", "throughput": 1907.26, "total_tokens": 8669776}
|
| 5170 |
+
{"current_steps": 25755, "total_steps": 25880, "loss": 0.335, "lr": 7.22032636756298e-08, "epoch": 19.90340030911901, "percentage": 99.52, "elapsed_time": "1:15:46", "remaining_time": "0:00:22", "throughput": 1907.3, "total_tokens": 8671696}
|
| 5171 |
+
{"current_steps": 25760, "total_steps": 25880, "loss": 0.3334, "lr": 6.65866693581596e-08, "epoch": 19.90726429675425, "percentage": 99.54, "elapsed_time": "1:15:47", "remaining_time": "0:00:21", "throughput": 1907.34, "total_tokens": 8673616}
|
| 5172 |
+
{"current_steps": 25765, "total_steps": 25880, "loss": 0.2924, "lr": 6.119744775107128e-08, "epoch": 19.91112828438949, "percentage": 99.56, "elapsed_time": "1:15:48", "remaining_time": "0:00:20", "throughput": 1907.36, "total_tokens": 8675312}
|
| 5173 |
+
{"current_steps": 25770, "total_steps": 25880, "loss": 0.2185, "lr": 5.6035601305404196e-08, "epoch": 19.91499227202473, "percentage": 99.57, "elapsed_time": "1:15:49", "remaining_time": "0:00:19", "throughput": 1907.37, "total_tokens": 8676976}
|
| 5174 |
+
{"current_steps": 25775, "total_steps": 25880, "loss": 0.2769, "lr": 5.110113236878045e-08, "epoch": 19.918856259659968, "percentage": 99.59, "elapsed_time": "1:15:50", "remaining_time": "0:00:18", "throughput": 1907.43, "total_tokens": 8678960}
|
| 5175 |
+
{"current_steps": 25780, "total_steps": 25880, "loss": 0.2002, "lr": 4.639404318546037e-08, "epoch": 19.92272024729521, "percentage": 99.61, "elapsed_time": "1:15:50", "remaining_time": "0:00:17", "throughput": 1907.46, "total_tokens": 8680720}
|
| 5176 |
+
{"current_steps": 25785, "total_steps": 25880, "loss": 0.241, "lr": 4.1914335896231504e-08, "epoch": 19.92658423493045, "percentage": 99.63, "elapsed_time": "1:15:51", "remaining_time": "0:00:16", "throughput": 1907.47, "total_tokens": 8682352}
|
| 5177 |
+
{"current_steps": 25790, "total_steps": 25880, "loss": 0.4027, "lr": 3.766201253851964e-08, "epoch": 19.930448222565687, "percentage": 99.65, "elapsed_time": "1:15:52", "remaining_time": "0:00:15", "throughput": 1907.47, "total_tokens": 8683952}
|
| 5178 |
+
{"current_steps": 25795, "total_steps": 25880, "loss": 0.232, "lr": 3.363707504622226e-08, "epoch": 19.93431221020093, "percentage": 99.67, "elapsed_time": "1:15:53", "remaining_time": "0:00:15", "throughput": 1907.46, "total_tokens": 8685392}
|
| 5179 |
+
{"current_steps": 25800, "total_steps": 25880, "loss": 0.2095, "lr": 2.983952524998612e-08, "epoch": 19.938176197836167, "percentage": 99.69, "elapsed_time": "1:15:54", "remaining_time": "0:00:14", "throughput": 1907.48, "total_tokens": 8687088}
|
| 5180 |
+
{"current_steps": 25805, "total_steps": 25880, "loss": 0.1986, "lr": 2.626936487698517e-08, "epoch": 19.942040185471406, "percentage": 99.71, "elapsed_time": "1:15:55", "remaining_time": "0:00:13", "throughput": 1907.49, "total_tokens": 8688752}
|
| 5181 |
+
{"current_steps": 25810, "total_steps": 25880, "loss": 0.3602, "lr": 2.292659555086507e-08, "epoch": 19.945904173106648, "percentage": 99.73, "elapsed_time": "1:15:55", "remaining_time": "0:00:12", "throughput": 1907.5, "total_tokens": 8690384}
|
| 5182 |
+
{"current_steps": 25815, "total_steps": 25880, "loss": 0.2312, "lr": 1.9811218791965235e-08, "epoch": 19.949768160741886, "percentage": 99.75, "elapsed_time": "1:15:56", "remaining_time": "0:00:11", "throughput": 1907.52, "total_tokens": 8692208}
|
| 5183 |
+
{"current_steps": 25820, "total_steps": 25880, "loss": 0.2492, "lr": 1.69232360172078e-08, "epoch": 19.953632148377125, "percentage": 99.77, "elapsed_time": "1:15:57", "remaining_time": "0:00:10", "throughput": 1907.53, "total_tokens": 8693808}
|
| 5184 |
+
{"current_steps": 25825, "total_steps": 25880, "loss": 0.321, "lr": 1.42626485399866e-08, "epoch": 19.957496136012363, "percentage": 99.79, "elapsed_time": "1:15:58", "remaining_time": "0:00:09", "throughput": 1907.55, "total_tokens": 8695536}
|
| 5185 |
+
{"current_steps": 25830, "total_steps": 25880, "loss": 0.2694, "lr": 1.1829457570500246e-08, "epoch": 19.961360123647605, "percentage": 99.81, "elapsed_time": "1:15:59", "remaining_time": "0:00:08", "throughput": 1907.58, "total_tokens": 8697392}
|
| 5186 |
+
{"current_steps": 25835, "total_steps": 25880, "loss": 0.1778, "lr": 9.623664215197003e-09, "epoch": 19.965224111282843, "percentage": 99.83, "elapsed_time": "1:16:00", "remaining_time": "0:00:07", "throughput": 1907.61, "total_tokens": 8699216}
|
| 5187 |
+
{"current_steps": 25840, "total_steps": 25880, "loss": 0.3286, "lr": 7.64526947744093e-09, "epoch": 19.969088098918082, "percentage": 99.85, "elapsed_time": "1:16:01", "remaining_time": "0:00:07", "throughput": 1907.66, "total_tokens": 8701168}
|
| 5188 |
+
{"current_steps": 25845, "total_steps": 25880, "loss": 0.2206, "lr": 5.894274256956766e-09, "epoch": 19.972952086553324, "percentage": 99.86, "elapsed_time": "1:16:01", "remaining_time": "0:00:06", "throughput": 1907.68, "total_tokens": 8702800}
|
| 5189 |
+
{"current_steps": 25850, "total_steps": 25880, "loss": 0.2749, "lr": 4.370679350051976e-09, "epoch": 19.976816074188562, "percentage": 99.88, "elapsed_time": "1:16:02", "remaining_time": "0:00:05", "throughput": 1907.69, "total_tokens": 8704400}
|
| 5190 |
+
{"current_steps": 25855, "total_steps": 25880, "loss": 0.3259, "lr": 3.0744854497277707e-09, "epoch": 19.9806800618238, "percentage": 99.9, "elapsed_time": "1:16:03", "remaining_time": "0:00:04", "throughput": 1907.7, "total_tokens": 8706096}
|
| 5191 |
+
{"current_steps": 25860, "total_steps": 25880, "loss": 0.2801, "lr": 2.005693145512577e-09, "epoch": 19.984544049459043, "percentage": 99.92, "elapsed_time": "1:16:04", "remaining_time": "0:00:03", "throughput": 1907.75, "total_tokens": 8707984}
|
| 5192 |
+
{"current_steps": 25865, "total_steps": 25880, "loss": 0.1941, "lr": 1.1643029235175462e-09, "epoch": 19.98840803709428, "percentage": 99.94, "elapsed_time": "1:16:05", "remaining_time": "0:00:02", "throughput": 1907.76, "total_tokens": 8709552}
|
| 5193 |
+
{"current_steps": 25870, "total_steps": 25880, "loss": 0.1942, "lr": 5.503151663255323e-10, "epoch": 19.99227202472952, "percentage": 99.96, "elapsed_time": "1:16:06", "remaining_time": "0:00:01", "throughput": 1907.8, "total_tokens": 8711472}
|
| 5194 |
+
{"current_steps": 25875, "total_steps": 25880, "loss": 0.1894, "lr": 1.6373015321313745e-10, "epoch": 19.99613601236476, "percentage": 99.98, "elapsed_time": "1:16:07", "remaining_time": "0:00:00", "throughput": 1907.82, "total_tokens": 8713296}
|
| 5195 |
+
{"current_steps": 25880, "total_steps": 25880, "loss": 0.2855, "lr": 4.548060039688551e-12, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:16:07", "remaining_time": "0:00:00", "throughput": 1907.8, "total_tokens": 8714656}
|
| 5196 |
+
{"current_steps": 25880, "total_steps": 25880, "eval_loss": 0.555719792842865, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:16:18", "remaining_time": "0:00:00", "throughput": 1903.22, "total_tokens": 8714656}
|
| 5197 |
+
{"current_steps": 25880, "total_steps": 25880, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:16:19", "remaining_time": "0:00:00", "throughput": 1902.84, "total_tokens": 8714656}
|