Training in progress, step 38160
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +381 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1074144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74f4fd6e7c677a53fc86051671cfb1d696dc6e0175705968eeb2ecb7f35f78b6
|
| 3 |
size 1074144
|
trainer_log.jsonl
CHANGED
|
@@ -7270,3 +7270,384 @@
|
|
| 7270 |
{"current_steps": 36255, "total_steps": 38160, "loss": 0.4316, "lr": 3.7901236962379627e-07, "epoch": 19.001572327044027, "percentage": 95.01, "elapsed_time": "1:42:31", "remaining_time": "0:05:23", "throughput": 3855.46, "total_tokens": 23716768}
|
| 7271 |
{"current_steps": 36260, "total_steps": 38160, "loss": 0.4629, "lr": 3.7703146230519126e-07, "epoch": 19.0041928721174, "percentage": 95.02, "elapsed_time": "1:42:32", "remaining_time": "0:05:22", "throughput": 3855.52, "total_tokens": 23720384}
|
| 7272 |
{"current_steps": 36265, "total_steps": 38160, "loss": 0.5606, "lr": 3.7505570583069306e-07, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:42:33", "remaining_time": "0:05:21", "throughput": 3855.61, "total_tokens": 23725024}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7270 |
{"current_steps": 36255, "total_steps": 38160, "loss": 0.4316, "lr": 3.7901236962379627e-07, "epoch": 19.001572327044027, "percentage": 95.01, "elapsed_time": "1:42:31", "remaining_time": "0:05:23", "throughput": 3855.46, "total_tokens": 23716768}
|
| 7271 |
{"current_steps": 36260, "total_steps": 38160, "loss": 0.4629, "lr": 3.7703146230519126e-07, "epoch": 19.0041928721174, "percentage": 95.02, "elapsed_time": "1:42:32", "remaining_time": "0:05:22", "throughput": 3855.52, "total_tokens": 23720384}
|
| 7272 |
{"current_steps": 36265, "total_steps": 38160, "loss": 0.5606, "lr": 3.7505570583069306e-07, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:42:33", "remaining_time": "0:05:21", "throughput": 3855.61, "total_tokens": 23725024}
|
| 7273 |
+
{"current_steps": 36270, "total_steps": 38160, "loss": 0.3078, "lr": 3.7308510061360435e-07, "epoch": 19.00943396226415, "percentage": 95.05, "elapsed_time": "1:42:34", "remaining_time": "0:05:20", "throughput": 3855.66, "total_tokens": 23728640}
|
| 7274 |
+
{"current_steps": 36275, "total_steps": 38160, "loss": 0.7057, "lr": 3.7111964706615374e-07, "epoch": 19.012054507337528, "percentage": 95.06, "elapsed_time": "1:42:35", "remaining_time": "0:05:19", "throughput": 3855.6, "total_tokens": 23731392}
|
| 7275 |
+
{"current_steps": 36280, "total_steps": 38160, "loss": 0.5054, "lr": 3.691593455994929e-07, "epoch": 19.0146750524109, "percentage": 95.07, "elapsed_time": "1:42:35", "remaining_time": "0:05:18", "throughput": 3855.61, "total_tokens": 23734912}
|
| 7276 |
+
{"current_steps": 36285, "total_steps": 38160, "loss": 0.4447, "lr": 3.672041966236911e-07, "epoch": 19.017295597484278, "percentage": 95.09, "elapsed_time": "1:42:36", "remaining_time": "0:05:18", "throughput": 3855.62, "total_tokens": 23737856}
|
| 7277 |
+
{"current_steps": 36290, "total_steps": 38160, "loss": 0.4631, "lr": 3.6525420054774884e-07, "epoch": 19.01991614255765, "percentage": 95.1, "elapsed_time": "1:42:37", "remaining_time": "0:05:17", "throughput": 3855.69, "total_tokens": 23741408}
|
| 7278 |
+
{"current_steps": 36295, "total_steps": 38160, "loss": 0.4966, "lr": 3.6330935777957597e-07, "epoch": 19.02253668763103, "percentage": 95.11, "elapsed_time": "1:42:38", "remaining_time": "0:05:16", "throughput": 3855.67, "total_tokens": 23744448}
|
| 7279 |
+
{"current_steps": 36300, "total_steps": 38160, "loss": 0.259, "lr": 3.613696687260193e-07, "epoch": 19.0251572327044, "percentage": 95.13, "elapsed_time": "1:42:39", "remaining_time": "0:05:15", "throughput": 3855.75, "total_tokens": 23748128}
|
| 7280 |
+
{"current_steps": 36305, "total_steps": 38160, "loss": 0.4289, "lr": 3.59435133792832e-07, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "1:42:39", "remaining_time": "0:05:14", "throughput": 3855.71, "total_tokens": 23750880}
|
| 7281 |
+
{"current_steps": 36310, "total_steps": 38160, "loss": 0.3776, "lr": 3.575057533847043e-07, "epoch": 19.030398322851152, "percentage": 95.15, "elapsed_time": "1:42:40", "remaining_time": "0:05:13", "throughput": 3855.75, "total_tokens": 23754112}
|
| 7282 |
+
{"current_steps": 36315, "total_steps": 38160, "loss": 0.4467, "lr": 3.555815279052327e-07, "epoch": 19.03301886792453, "percentage": 95.17, "elapsed_time": "1:42:41", "remaining_time": "0:05:13", "throughput": 3855.85, "total_tokens": 23757792}
|
| 7283 |
+
{"current_steps": 36320, "total_steps": 38160, "loss": 0.4926, "lr": 3.5366245775694805e-07, "epoch": 19.035639412997902, "percentage": 95.18, "elapsed_time": "1:42:42", "remaining_time": "0:05:12", "throughput": 3855.99, "total_tokens": 23762336}
|
| 7284 |
+
{"current_steps": 36325, "total_steps": 38160, "loss": 0.3254, "lr": 3.517485433412987e-07, "epoch": 19.03825995807128, "percentage": 95.19, "elapsed_time": "1:42:43", "remaining_time": "0:05:11", "throughput": 3855.91, "total_tokens": 23764800}
|
| 7285 |
+
{"current_steps": 36330, "total_steps": 38160, "loss": 0.4941, "lr": 3.4983978505865044e-07, "epoch": 19.040880503144653, "percentage": 95.2, "elapsed_time": "1:42:44", "remaining_time": "0:05:10", "throughput": 3855.97, "total_tokens": 23768288}
|
| 7286 |
+
{"current_steps": 36335, "total_steps": 38160, "loss": 0.3915, "lr": 3.47936183308295e-07, "epoch": 19.04350104821803, "percentage": 95.22, "elapsed_time": "1:42:44", "remaining_time": "0:05:09", "throughput": 3856.0, "total_tokens": 23771456}
|
| 7287 |
+
{"current_steps": 36340, "total_steps": 38160, "loss": 0.4568, "lr": 3.460377384884472e-07, "epoch": 19.046121593291403, "percentage": 95.23, "elapsed_time": "1:42:45", "remaining_time": "0:05:08", "throughput": 3855.93, "total_tokens": 23774144}
|
| 7288 |
+
{"current_steps": 36345, "total_steps": 38160, "loss": 0.398, "lr": 3.4414445099623384e-07, "epoch": 19.04874213836478, "percentage": 95.24, "elapsed_time": "1:42:46", "remaining_time": "0:05:07", "throughput": 3855.97, "total_tokens": 23777280}
|
| 7289 |
+
{"current_steps": 36350, "total_steps": 38160, "loss": 0.4234, "lr": 3.42256321227713e-07, "epoch": 19.051362683438157, "percentage": 95.26, "elapsed_time": "1:42:47", "remaining_time": "0:05:07", "throughput": 3855.97, "total_tokens": 23780672}
|
| 7290 |
+
{"current_steps": 36355, "total_steps": 38160, "loss": 0.5268, "lr": 3.403733495778605e-07, "epoch": 19.05398322851153, "percentage": 95.27, "elapsed_time": "1:42:48", "remaining_time": "0:05:06", "throughput": 3856.05, "total_tokens": 23784224}
|
| 7291 |
+
{"current_steps": 36360, "total_steps": 38160, "loss": 0.3707, "lr": 3.3849553644056955e-07, "epoch": 19.056603773584907, "percentage": 95.28, "elapsed_time": "1:42:48", "remaining_time": "0:05:05", "throughput": 3856.09, "total_tokens": 23787904}
|
| 7292 |
+
{"current_steps": 36365, "total_steps": 38160, "loss": 0.4894, "lr": 3.36622882208662e-07, "epoch": 19.05922431865828, "percentage": 95.3, "elapsed_time": "1:42:49", "remaining_time": "0:05:04", "throughput": 3856.1, "total_tokens": 23790784}
|
| 7293 |
+
{"current_steps": 36370, "total_steps": 38160, "loss": 0.7455, "lr": 3.347553872738718e-07, "epoch": 19.061844863731658, "percentage": 95.31, "elapsed_time": "1:42:50", "remaining_time": "0:05:03", "throughput": 3856.13, "total_tokens": 23794048}
|
| 7294 |
+
{"current_steps": 36375, "total_steps": 38160, "loss": 0.475, "lr": 3.3289305202685583e-07, "epoch": 19.06446540880503, "percentage": 95.32, "elapsed_time": "1:42:51", "remaining_time": "0:05:02", "throughput": 3856.19, "total_tokens": 23797792}
|
| 7295 |
+
{"current_steps": 36380, "total_steps": 38160, "loss": 0.4381, "lr": 3.310358768571997e-07, "epoch": 19.067085953878408, "percentage": 95.34, "elapsed_time": "1:42:52", "remaining_time": "0:05:01", "throughput": 3856.19, "total_tokens": 23800768}
|
| 7296 |
+
{"current_steps": 36385, "total_steps": 38160, "loss": 0.6044, "lr": 3.2918386215339827e-07, "epoch": 19.06970649895178, "percentage": 95.35, "elapsed_time": "1:42:52", "remaining_time": "0:05:01", "throughput": 3856.23, "total_tokens": 23803968}
|
| 7297 |
+
{"current_steps": 36390, "total_steps": 38160, "loss": 0.4925, "lr": 3.2733700830287205e-07, "epoch": 19.072327044025158, "percentage": 95.36, "elapsed_time": "1:42:53", "remaining_time": "0:05:00", "throughput": 3856.24, "total_tokens": 23807136}
|
| 7298 |
+
{"current_steps": 36395, "total_steps": 38160, "loss": 0.4779, "lr": 3.254953156919649e-07, "epoch": 19.07494758909853, "percentage": 95.37, "elapsed_time": "1:42:54", "remaining_time": "0:04:59", "throughput": 3856.37, "total_tokens": 23811520}
|
| 7299 |
+
{"current_steps": 36400, "total_steps": 38160, "loss": 0.4026, "lr": 3.2365878470593527e-07, "epoch": 19.07756813417191, "percentage": 95.39, "elapsed_time": "1:42:55", "remaining_time": "0:04:58", "throughput": 3856.41, "total_tokens": 23814624}
|
| 7300 |
+
{"current_steps": 36405, "total_steps": 38160, "loss": 0.4567, "lr": 3.2182741572896747e-07, "epoch": 19.080188679245282, "percentage": 95.4, "elapsed_time": "1:42:56", "remaining_time": "0:04:57", "throughput": 3856.46, "total_tokens": 23817856}
|
| 7301 |
+
{"current_steps": 36410, "total_steps": 38160, "loss": 0.4509, "lr": 3.2000120914415786e-07, "epoch": 19.08280922431866, "percentage": 95.41, "elapsed_time": "1:42:56", "remaining_time": "0:04:56", "throughput": 3856.52, "total_tokens": 23821472}
|
| 7302 |
+
{"current_steps": 36415, "total_steps": 38160, "loss": 0.4643, "lr": 3.181801653335315e-07, "epoch": 19.085429769392032, "percentage": 95.43, "elapsed_time": "1:42:57", "remaining_time": "0:04:56", "throughput": 3856.52, "total_tokens": 23824512}
|
| 7303 |
+
{"current_steps": 36420, "total_steps": 38160, "loss": 0.3752, "lr": 3.163642846780307e-07, "epoch": 19.08805031446541, "percentage": 95.44, "elapsed_time": "1:42:58", "remaining_time": "0:04:55", "throughput": 3856.55, "total_tokens": 23827776}
|
| 7304 |
+
{"current_steps": 36425, "total_steps": 38160, "loss": 0.4701, "lr": 3.1455356755751565e-07, "epoch": 19.090670859538783, "percentage": 95.45, "elapsed_time": "1:42:59", "remaining_time": "0:04:54", "throughput": 3856.53, "total_tokens": 23830688}
|
| 7305 |
+
{"current_steps": 36430, "total_steps": 38160, "loss": 0.3674, "lr": 3.127480143507666e-07, "epoch": 19.09329140461216, "percentage": 95.47, "elapsed_time": "1:43:00", "remaining_time": "0:04:53", "throughput": 3856.53, "total_tokens": 23833536}
|
| 7306 |
+
{"current_steps": 36435, "total_steps": 38160, "loss": 0.3868, "lr": 3.1094762543548704e-07, "epoch": 19.095911949685533, "percentage": 95.48, "elapsed_time": "1:43:01", "remaining_time": "0:04:52", "throughput": 3856.64, "total_tokens": 23838272}
|
| 7307 |
+
{"current_steps": 36440, "total_steps": 38160, "loss": 0.5286, "lr": 3.091524011882951e-07, "epoch": 19.09853249475891, "percentage": 95.49, "elapsed_time": "1:43:01", "remaining_time": "0:04:51", "throughput": 3856.71, "total_tokens": 23841600}
|
| 7308 |
+
{"current_steps": 36445, "total_steps": 38160, "loss": 0.4666, "lr": 3.073623419847349e-07, "epoch": 19.101153039832287, "percentage": 95.51, "elapsed_time": "1:43:02", "remaining_time": "0:04:50", "throughput": 3856.77, "total_tokens": 23845120}
|
| 7309 |
+
{"current_steps": 36450, "total_steps": 38160, "loss": 0.4753, "lr": 3.0557744819926235e-07, "epoch": 19.10377358490566, "percentage": 95.52, "elapsed_time": "1:43:03", "remaining_time": "0:04:50", "throughput": 3856.78, "total_tokens": 23848480}
|
| 7310 |
+
{"current_steps": 36455, "total_steps": 38160, "loss": 0.3858, "lr": 3.037977202052594e-07, "epoch": 19.106394129979037, "percentage": 95.53, "elapsed_time": "1:43:04", "remaining_time": "0:04:49", "throughput": 3856.82, "total_tokens": 23852000}
|
| 7311 |
+
{"current_steps": 36460, "total_steps": 38160, "loss": 0.3711, "lr": 3.020231583750255e-07, "epoch": 19.10901467505241, "percentage": 95.55, "elapsed_time": "1:43:05", "remaining_time": "0:04:48", "throughput": 3856.9, "total_tokens": 23856224}
|
| 7312 |
+
{"current_steps": 36465, "total_steps": 38160, "loss": 0.3429, "lr": 3.002537630797747e-07, "epoch": 19.111635220125788, "percentage": 95.56, "elapsed_time": "1:43:06", "remaining_time": "0:04:47", "throughput": 3856.96, "total_tokens": 23859680}
|
| 7313 |
+
{"current_steps": 36470, "total_steps": 38160, "loss": 0.3925, "lr": 2.984895346896499e-07, "epoch": 19.11425576519916, "percentage": 95.57, "elapsed_time": "1:43:06", "remaining_time": "0:04:46", "throughput": 3857.03, "total_tokens": 23863168}
|
| 7314 |
+
{"current_steps": 36475, "total_steps": 38160, "loss": 0.5003, "lr": 2.967304735737031e-07, "epoch": 19.116876310272538, "percentage": 95.58, "elapsed_time": "1:43:07", "remaining_time": "0:04:45", "throughput": 3857.07, "total_tokens": 23866208}
|
| 7315 |
+
{"current_steps": 36480, "total_steps": 38160, "loss": 0.363, "lr": 2.9497658009991214e-07, "epoch": 19.11949685534591, "percentage": 95.6, "elapsed_time": "1:43:08", "remaining_time": "0:04:44", "throughput": 3857.04, "total_tokens": 23869152}
|
| 7316 |
+
{"current_steps": 36485, "total_steps": 38160, "loss": 0.3183, "lr": 2.932278546351724e-07, "epoch": 19.122117400419288, "percentage": 95.61, "elapsed_time": "1:43:09", "remaining_time": "0:04:44", "throughput": 3857.1, "total_tokens": 23872768}
|
| 7317 |
+
{"current_steps": 36490, "total_steps": 38160, "loss": 0.3949, "lr": 2.9148429754529137e-07, "epoch": 19.12473794549266, "percentage": 95.62, "elapsed_time": "1:43:10", "remaining_time": "0:04:43", "throughput": 3857.07, "total_tokens": 23875744}
|
| 7318 |
+
{"current_steps": 36495, "total_steps": 38160, "loss": 0.3055, "lr": 2.897459091950078e-07, "epoch": 19.12735849056604, "percentage": 95.64, "elapsed_time": "1:43:10", "remaining_time": "0:04:42", "throughput": 3856.98, "total_tokens": 23877984}
|
| 7319 |
+
{"current_steps": 36500, "total_steps": 38160, "loss": 0.6469, "lr": 2.880126899479724e-07, "epoch": 19.129979035639412, "percentage": 95.65, "elapsed_time": "1:43:11", "remaining_time": "0:04:41", "throughput": 3857.0, "total_tokens": 23881504}
|
| 7320 |
+
{"current_steps": 36505, "total_steps": 38160, "loss": 0.3519, "lr": 2.862846401667507e-07, "epoch": 19.13259958071279, "percentage": 95.66, "elapsed_time": "1:43:12", "remaining_time": "0:04:40", "throughput": 3857.02, "total_tokens": 23884864}
|
| 7321 |
+
{"current_steps": 36510, "total_steps": 38160, "loss": 0.5442, "lr": 2.8456176021283697e-07, "epoch": 19.135220125786162, "percentage": 95.68, "elapsed_time": "1:43:13", "remaining_time": "0:04:39", "throughput": 3857.02, "total_tokens": 23888000}
|
| 7322 |
+
{"current_steps": 36515, "total_steps": 38160, "loss": 0.5383, "lr": 2.8284405044662897e-07, "epoch": 19.13784067085954, "percentage": 95.69, "elapsed_time": "1:43:14", "remaining_time": "0:04:39", "throughput": 3856.88, "total_tokens": 23890528}
|
| 7323 |
+
{"current_steps": 36520, "total_steps": 38160, "loss": 0.3584, "lr": 2.8113151122745874e-07, "epoch": 19.140461215932913, "percentage": 95.7, "elapsed_time": "1:43:15", "remaining_time": "0:04:38", "throughput": 3856.78, "total_tokens": 23893184}
|
| 7324 |
+
{"current_steps": 36525, "total_steps": 38160, "loss": 0.4723, "lr": 2.7942414291357023e-07, "epoch": 19.14308176100629, "percentage": 95.72, "elapsed_time": "1:43:15", "remaining_time": "0:04:37", "throughput": 3856.76, "total_tokens": 23895968}
|
| 7325 |
+
{"current_steps": 36530, "total_steps": 38160, "loss": 0.4229, "lr": 2.777219458621222e-07, "epoch": 19.145702306079663, "percentage": 95.73, "elapsed_time": "1:43:16", "remaining_time": "0:04:36", "throughput": 3856.71, "total_tokens": 23898880}
|
| 7326 |
+
{"current_steps": 36535, "total_steps": 38160, "loss": 0.4625, "lr": 2.7602492042919923e-07, "epoch": 19.14832285115304, "percentage": 95.74, "elapsed_time": "1:43:17", "remaining_time": "0:04:35", "throughput": 3856.65, "total_tokens": 23901504}
|
| 7327 |
+
{"current_steps": 36540, "total_steps": 38160, "loss": 0.4526, "lr": 2.7433306696979523e-07, "epoch": 19.150943396226417, "percentage": 95.75, "elapsed_time": "1:43:18", "remaining_time": "0:04:34", "throughput": 3856.68, "total_tokens": 23904768}
|
| 7328 |
+
{"current_steps": 36545, "total_steps": 38160, "loss": 0.4994, "lr": 2.7264638583782973e-07, "epoch": 19.15356394129979, "percentage": 95.77, "elapsed_time": "1:43:19", "remaining_time": "0:04:33", "throughput": 3856.67, "total_tokens": 23907872}
|
| 7329 |
+
{"current_steps": 36550, "total_steps": 38160, "loss": 0.3813, "lr": 2.7096487738613177e-07, "epoch": 19.156184486373167, "percentage": 95.78, "elapsed_time": "1:43:19", "remaining_time": "0:04:33", "throughput": 3856.69, "total_tokens": 23910816}
|
| 7330 |
+
{"current_steps": 36555, "total_steps": 38160, "loss": 0.4918, "lr": 2.6928854196646147e-07, "epoch": 19.15880503144654, "percentage": 95.79, "elapsed_time": "1:43:20", "remaining_time": "0:04:32", "throughput": 3856.7, "total_tokens": 23914048}
|
| 7331 |
+
{"current_steps": 36560, "total_steps": 38160, "loss": 0.3547, "lr": 2.6761737992948565e-07, "epoch": 19.161425576519918, "percentage": 95.81, "elapsed_time": "1:43:21", "remaining_time": "0:04:31", "throughput": 3856.78, "total_tokens": 23918016}
|
| 7332 |
+
{"current_steps": 36565, "total_steps": 38160, "loss": 0.371, "lr": 2.6595139162479134e-07, "epoch": 19.16404612159329, "percentage": 95.82, "elapsed_time": "1:43:22", "remaining_time": "0:04:30", "throughput": 3856.81, "total_tokens": 23921056}
|
| 7333 |
+
{"current_steps": 36570, "total_steps": 38160, "loss": 0.5106, "lr": 2.6429057740088303e-07, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "1:43:23", "remaining_time": "0:04:29", "throughput": 3856.76, "total_tokens": 23923648}
|
| 7334 |
+
{"current_steps": 36575, "total_steps": 38160, "loss": 0.3711, "lr": 2.6263493760518564e-07, "epoch": 19.16928721174004, "percentage": 95.85, "elapsed_time": "1:43:23", "remaining_time": "0:04:28", "throughput": 3856.78, "total_tokens": 23926592}
|
| 7335 |
+
{"current_steps": 36580, "total_steps": 38160, "loss": 0.4643, "lr": 2.6098447258404157e-07, "epoch": 19.171907756813418, "percentage": 95.86, "elapsed_time": "1:43:24", "remaining_time": "0:04:27", "throughput": 3856.8, "total_tokens": 23929856}
|
| 7336 |
+
{"current_steps": 36585, "total_steps": 38160, "loss": 0.3838, "lr": 2.593391826827107e-07, "epoch": 19.17452830188679, "percentage": 95.87, "elapsed_time": "1:43:25", "remaining_time": "0:04:27", "throughput": 3856.77, "total_tokens": 23932608}
|
| 7337 |
+
{"current_steps": 36590, "total_steps": 38160, "loss": 0.5844, "lr": 2.5769906824536225e-07, "epoch": 19.17714884696017, "percentage": 95.89, "elapsed_time": "1:43:26", "remaining_time": "0:04:26", "throughput": 3856.71, "total_tokens": 23935488}
|
| 7338 |
+
{"current_steps": 36595, "total_steps": 38160, "loss": 0.4753, "lr": 2.560641296150967e-07, "epoch": 19.179769392033542, "percentage": 95.9, "elapsed_time": "1:43:26", "remaining_time": "0:04:25", "throughput": 3856.72, "total_tokens": 23938432}
|
| 7339 |
+
{"current_steps": 36600, "total_steps": 38160, "loss": 0.4992, "lr": 2.544343671339211e-07, "epoch": 19.18238993710692, "percentage": 95.91, "elapsed_time": "1:43:27", "remaining_time": "0:04:24", "throughput": 3856.69, "total_tokens": 23941120}
|
| 7340 |
+
{"current_steps": 36605, "total_steps": 38160, "loss": 0.4446, "lr": 2.5280978114276264e-07, "epoch": 19.185010482180292, "percentage": 95.93, "elapsed_time": "1:43:28", "remaining_time": "0:04:23", "throughput": 3856.71, "total_tokens": 23944320}
|
| 7341 |
+
{"current_steps": 36610, "total_steps": 38160, "loss": 0.5439, "lr": 2.511903719814662e-07, "epoch": 19.18763102725367, "percentage": 95.94, "elapsed_time": "1:43:29", "remaining_time": "0:04:22", "throughput": 3856.62, "total_tokens": 23946624}
|
| 7342 |
+
{"current_steps": 36615, "total_steps": 38160, "loss": 0.3723, "lr": 2.4957613998879695e-07, "epoch": 19.190251572327043, "percentage": 95.95, "elapsed_time": "1:43:29", "remaining_time": "0:04:22", "throughput": 3856.61, "total_tokens": 23949440}
|
| 7343 |
+
{"current_steps": 36620, "total_steps": 38160, "loss": 0.3203, "lr": 2.479670855024291e-07, "epoch": 19.19287211740042, "percentage": 95.96, "elapsed_time": "1:43:30", "remaining_time": "0:04:21", "throughput": 3856.58, "total_tokens": 23952224}
|
| 7344 |
+
{"current_steps": 36625, "total_steps": 38160, "loss": 0.3747, "lr": 2.463632088589629e-07, "epoch": 19.195492662473793, "percentage": 95.98, "elapsed_time": "1:43:31", "remaining_time": "0:04:20", "throughput": 3856.56, "total_tokens": 23955072}
|
| 7345 |
+
{"current_steps": 36630, "total_steps": 38160, "loss": 0.4266, "lr": 2.447645103939078e-07, "epoch": 19.19811320754717, "percentage": 95.99, "elapsed_time": "1:43:32", "remaining_time": "0:04:19", "throughput": 3856.55, "total_tokens": 23958336}
|
| 7346 |
+
{"current_steps": 36635, "total_steps": 38160, "loss": 0.3012, "lr": 2.431709904416962e-07, "epoch": 19.200733752620547, "percentage": 96.0, "elapsed_time": "1:43:33", "remaining_time": "0:04:18", "throughput": 3856.56, "total_tokens": 23961504}
|
| 7347 |
+
{"current_steps": 36640, "total_steps": 38160, "loss": 0.6953, "lr": 2.4158264933567264e-07, "epoch": 19.20335429769392, "percentage": 96.02, "elapsed_time": "1:43:33", "remaining_time": "0:04:17", "throughput": 3856.53, "total_tokens": 23964288}
|
| 7348 |
+
{"current_steps": 36645, "total_steps": 38160, "loss": 0.4647, "lr": 2.399994874080963e-07, "epoch": 19.205974842767297, "percentage": 96.03, "elapsed_time": "1:43:34", "remaining_time": "0:04:16", "throughput": 3856.57, "total_tokens": 23967776}
|
| 7349 |
+
{"current_steps": 36650, "total_steps": 38160, "loss": 0.3073, "lr": 2.3842150499015504e-07, "epoch": 19.20859538784067, "percentage": 96.04, "elapsed_time": "1:43:35", "remaining_time": "0:04:16", "throughput": 3856.58, "total_tokens": 23970688}
|
| 7350 |
+
{"current_steps": 36655, "total_steps": 38160, "loss": 0.4254, "lr": 2.368487024119348e-07, "epoch": 19.211215932914047, "percentage": 96.06, "elapsed_time": "1:43:36", "remaining_time": "0:04:15", "throughput": 3856.64, "total_tokens": 23974208}
|
| 7351 |
+
{"current_steps": 36660, "total_steps": 38160, "loss": 0.4739, "lr": 2.3528108000245573e-07, "epoch": 19.21383647798742, "percentage": 96.07, "elapsed_time": "1:43:37", "remaining_time": "0:04:14", "throughput": 3856.72, "total_tokens": 23978080}
|
| 7352 |
+
{"current_steps": 36665, "total_steps": 38160, "loss": 0.4931, "lr": 2.3371863808964445e-07, "epoch": 19.216457023060798, "percentage": 96.08, "elapsed_time": "1:43:38", "remaining_time": "0:04:13", "throughput": 3856.88, "total_tokens": 23982976}
|
| 7353 |
+
{"current_steps": 36670, "total_steps": 38160, "loss": 0.4427, "lr": 2.32161377000345e-07, "epoch": 19.21907756813417, "percentage": 96.1, "elapsed_time": "1:43:39", "remaining_time": "0:04:12", "throughput": 3856.89, "total_tokens": 23986528}
|
| 7354 |
+
{"current_steps": 36675, "total_steps": 38160, "loss": 0.3904, "lr": 2.306092970603163e-07, "epoch": 19.221698113207548, "percentage": 96.11, "elapsed_time": "1:43:39", "remaining_time": "0:04:11", "throughput": 3856.9, "total_tokens": 23989376}
|
| 7355 |
+
{"current_steps": 36680, "total_steps": 38160, "loss": 0.4462, "lr": 2.2906239859424306e-07, "epoch": 19.22431865828092, "percentage": 96.12, "elapsed_time": "1:43:40", "remaining_time": "0:04:10", "throughput": 3856.76, "total_tokens": 23991648}
|
| 7356 |
+
{"current_steps": 36685, "total_steps": 38160, "loss": 0.4378, "lr": 2.2752068192571084e-07, "epoch": 19.2269392033543, "percentage": 96.13, "elapsed_time": "1:43:41", "remaining_time": "0:04:10", "throughput": 3856.7, "total_tokens": 23994336}
|
| 7357 |
+
{"current_steps": 36690, "total_steps": 38160, "loss": 0.4283, "lr": 2.2598414737723396e-07, "epoch": 19.229559748427672, "percentage": 96.15, "elapsed_time": "1:43:42", "remaining_time": "0:04:09", "throughput": 3856.72, "total_tokens": 23997632}
|
| 7358 |
+
{"current_steps": 36695, "total_steps": 38160, "loss": 0.3862, "lr": 2.2445279527023578e-07, "epoch": 19.23218029350105, "percentage": 96.16, "elapsed_time": "1:43:43", "remaining_time": "0:04:08", "throughput": 3856.71, "total_tokens": 24000384}
|
| 7359 |
+
{"current_steps": 36700, "total_steps": 38160, "loss": 0.4977, "lr": 2.2292662592506007e-07, "epoch": 19.234800838574422, "percentage": 96.17, "elapsed_time": "1:43:43", "remaining_time": "0:04:07", "throughput": 3856.74, "total_tokens": 24003616}
|
| 7360 |
+
{"current_steps": 36705, "total_steps": 38160, "loss": 0.4244, "lr": 2.2140563966095985e-07, "epoch": 19.2374213836478, "percentage": 96.19, "elapsed_time": "1:43:44", "remaining_time": "0:04:06", "throughput": 3856.7, "total_tokens": 24006336}
|
| 7361 |
+
{"current_steps": 36710, "total_steps": 38160, "loss": 0.3558, "lr": 2.1988983679611386e-07, "epoch": 19.240041928721173, "percentage": 96.2, "elapsed_time": "1:43:45", "remaining_time": "0:04:05", "throughput": 3856.86, "total_tokens": 24011168}
|
| 7362 |
+
{"current_steps": 36715, "total_steps": 38160, "loss": 0.3466, "lr": 2.1837921764760462e-07, "epoch": 19.24266247379455, "percentage": 96.21, "elapsed_time": "1:43:46", "remaining_time": "0:04:05", "throughput": 3856.88, "total_tokens": 24014240}
|
| 7363 |
+
{"current_steps": 36720, "total_steps": 38160, "loss": 0.4825, "lr": 2.1687378253144043e-07, "epoch": 19.245283018867923, "percentage": 96.23, "elapsed_time": "1:43:47", "remaining_time": "0:04:04", "throughput": 3856.91, "total_tokens": 24017568}
|
| 7364 |
+
{"current_steps": 36725, "total_steps": 38160, "loss": 0.5081, "lr": 2.153735317625416e-07, "epoch": 19.2479035639413, "percentage": 96.24, "elapsed_time": "1:43:48", "remaining_time": "0:04:03", "throughput": 3856.97, "total_tokens": 24021376}
|
| 7365 |
+
{"current_steps": 36730, "total_steps": 38160, "loss": 0.3901, "lr": 2.1387846565474045e-07, "epoch": 19.250524109014677, "percentage": 96.25, "elapsed_time": "1:43:48", "remaining_time": "0:04:02", "throughput": 3857.01, "total_tokens": 24024928}
|
| 7366 |
+
{"current_steps": 36735, "total_steps": 38160, "loss": 0.4165, "lr": 2.123885845207896e-07, "epoch": 19.25314465408805, "percentage": 96.27, "elapsed_time": "1:43:49", "remaining_time": "0:04:01", "throughput": 3857.04, "total_tokens": 24028128}
|
| 7367 |
+
{"current_steps": 36740, "total_steps": 38160, "loss": 0.3669, "lr": 2.1090388867235356e-07, "epoch": 19.255765199161427, "percentage": 96.28, "elapsed_time": "1:43:50", "remaining_time": "0:04:00", "throughput": 3857.07, "total_tokens": 24031584}
|
| 7368 |
+
{"current_steps": 36745, "total_steps": 38160, "loss": 0.4897, "lr": 2.0942437842001728e-07, "epoch": 19.2583857442348, "percentage": 96.29, "elapsed_time": "1:43:51", "remaining_time": "0:03:59", "throughput": 3857.05, "total_tokens": 24034336}
|
| 7369 |
+
{"current_steps": 36750, "total_steps": 38160, "loss": 0.5133, "lr": 2.0795005407327483e-07, "epoch": 19.261006289308177, "percentage": 96.31, "elapsed_time": "1:43:52", "remaining_time": "0:03:59", "throughput": 3857.07, "total_tokens": 24037408}
|
| 7370 |
+
{"current_steps": 36755, "total_steps": 38160, "loss": 0.4559, "lr": 2.0648091594053508e-07, "epoch": 19.26362683438155, "percentage": 96.32, "elapsed_time": "1:43:52", "remaining_time": "0:03:58", "throughput": 3857.04, "total_tokens": 24040128}
|
| 7371 |
+
{"current_steps": 36760, "total_steps": 38160, "loss": 0.4295, "lr": 2.0501696432913275e-07, "epoch": 19.266247379454928, "percentage": 96.33, "elapsed_time": "1:43:53", "remaining_time": "0:03:57", "throughput": 3857.01, "total_tokens": 24042944}
|
| 7372 |
+
{"current_steps": 36765, "total_steps": 38160, "loss": 0.4759, "lr": 2.035581995453034e-07, "epoch": 19.2688679245283, "percentage": 96.34, "elapsed_time": "1:43:54", "remaining_time": "0:03:56", "throughput": 3856.95, "total_tokens": 24045952}
|
| 7373 |
+
{"current_steps": 36770, "total_steps": 38160, "loss": 0.5413, "lr": 2.0210462189420577e-07, "epoch": 19.271488469601678, "percentage": 96.36, "elapsed_time": "1:43:55", "remaining_time": "0:03:55", "throughput": 3856.96, "total_tokens": 24048768}
|
| 7374 |
+
{"current_steps": 36775, "total_steps": 38160, "loss": 0.4359, "lr": 2.006562316799132e-07, "epoch": 19.27410901467505, "percentage": 96.37, "elapsed_time": "1:43:56", "remaining_time": "0:03:54", "throughput": 3857.03, "total_tokens": 24052704}
|
| 7375 |
+
{"current_steps": 36780, "total_steps": 38160, "loss": 0.5778, "lr": 1.992130292054084e-07, "epoch": 19.27672955974843, "percentage": 96.38, "elapsed_time": "1:43:57", "remaining_time": "0:03:54", "throughput": 3857.1, "total_tokens": 24056896}
|
| 7376 |
+
{"current_steps": 36785, "total_steps": 38160, "loss": 0.5186, "lr": 1.9777501477259708e-07, "epoch": 19.279350104821802, "percentage": 96.4, "elapsed_time": "1:43:57", "remaining_time": "0:03:53", "throughput": 3857.03, "total_tokens": 24059168}
|
| 7377 |
+
{"current_steps": 36790, "total_steps": 38160, "loss": 0.3696, "lr": 1.9634218868229414e-07, "epoch": 19.28197064989518, "percentage": 96.41, "elapsed_time": "1:43:58", "remaining_time": "0:03:52", "throughput": 3856.99, "total_tokens": 24062176}
|
| 7378 |
+
{"current_steps": 36795, "total_steps": 38160, "loss": 0.3988, "lr": 1.9491455123423207e-07, "epoch": 19.284591194968552, "percentage": 96.42, "elapsed_time": "1:43:59", "remaining_time": "0:03:51", "throughput": 3857.05, "total_tokens": 24065536}
|
| 7379 |
+
{"current_steps": 36800, "total_steps": 38160, "loss": 0.4452, "lr": 1.9349210272705253e-07, "epoch": 19.28721174004193, "percentage": 96.44, "elapsed_time": "1:44:00", "remaining_time": "0:03:50", "throughput": 3857.06, "total_tokens": 24068608}
|
| 7380 |
+
{"current_steps": 36805, "total_steps": 38160, "loss": 0.4093, "lr": 1.920748434583175e-07, "epoch": 19.289832285115303, "percentage": 96.45, "elapsed_time": "1:44:01", "remaining_time": "0:03:49", "throughput": 3857.13, "total_tokens": 24072928}
|
| 7381 |
+
{"current_steps": 36810, "total_steps": 38160, "loss": 0.3872, "lr": 1.906627737245009e-07, "epoch": 19.29245283018868, "percentage": 96.46, "elapsed_time": "1:44:02", "remaining_time": "0:03:48", "throughput": 3857.26, "total_tokens": 24078176}
|
| 7382 |
+
{"current_steps": 36815, "total_steps": 38160, "loss": 0.4003, "lr": 1.8925589382099152e-07, "epoch": 19.295073375262053, "percentage": 96.48, "elapsed_time": "1:44:03", "remaining_time": "0:03:48", "throughput": 3857.23, "total_tokens": 24080768}
|
| 7383 |
+
{"current_steps": 36820, "total_steps": 38160, "loss": 0.476, "lr": 1.8785420404209553e-07, "epoch": 19.29769392033543, "percentage": 96.49, "elapsed_time": "1:44:03", "remaining_time": "0:03:47", "throughput": 3857.24, "total_tokens": 24083936}
|
| 7384 |
+
{"current_steps": 36825, "total_steps": 38160, "loss": 0.3738, "lr": 1.864577046810284e-07, "epoch": 19.300314465408803, "percentage": 96.5, "elapsed_time": "1:44:04", "remaining_time": "0:03:46", "throughput": 3857.25, "total_tokens": 24087200}
|
| 7385 |
+
{"current_steps": 36830, "total_steps": 38160, "loss": 0.4097, "lr": 1.8506639602992038e-07, "epoch": 19.30293501048218, "percentage": 96.51, "elapsed_time": "1:44:05", "remaining_time": "0:03:45", "throughput": 3857.29, "total_tokens": 24090432}
|
| 7386 |
+
{"current_steps": 36835, "total_steps": 38160, "loss": 0.4577, "lr": 1.836802783798164e-07, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "1:44:06", "remaining_time": "0:03:44", "throughput": 3857.27, "total_tokens": 24093504}
|
| 7387 |
+
{"current_steps": 36840, "total_steps": 38160, "loss": 0.4908, "lr": 1.822993520206817e-07, "epoch": 19.30817610062893, "percentage": 96.54, "elapsed_time": "1:44:07", "remaining_time": "0:03:43", "throughput": 3857.33, "total_tokens": 24097088}
|
| 7388 |
+
{"current_steps": 36845, "total_steps": 38160, "loss": 0.3628, "lr": 1.8092361724138528e-07, "epoch": 19.310796645702307, "percentage": 96.55, "elapsed_time": "1:44:07", "remaining_time": "0:03:42", "throughput": 3857.28, "total_tokens": 24099552}
|
| 7389 |
+
{"current_steps": 36850, "total_steps": 38160, "loss": 0.4481, "lr": 1.7955307432971914e-07, "epoch": 19.31341719077568, "percentage": 96.57, "elapsed_time": "1:44:08", "remaining_time": "0:03:42", "throughput": 3857.29, "total_tokens": 24102816}
|
| 7390 |
+
{"current_steps": 36855, "total_steps": 38160, "loss": 0.4627, "lr": 1.7818772357238445e-07, "epoch": 19.316037735849058, "percentage": 96.58, "elapsed_time": "1:44:09", "remaining_time": "0:03:41", "throughput": 3857.3, "total_tokens": 24105856}
|
| 7391 |
+
{"current_steps": 36860, "total_steps": 38160, "loss": 0.4023, "lr": 1.7682756525499166e-07, "epoch": 19.31865828092243, "percentage": 96.59, "elapsed_time": "1:44:10", "remaining_time": "0:03:40", "throughput": 3857.42, "total_tokens": 24110848}
|
| 7392 |
+
{"current_steps": 36865, "total_steps": 38160, "loss": 0.3612, "lr": 1.7547259966207708e-07, "epoch": 19.321278825995808, "percentage": 96.61, "elapsed_time": "1:44:11", "remaining_time": "0:03:39", "throughput": 3857.47, "total_tokens": 24114144}
|
| 7393 |
+
{"current_steps": 36870, "total_steps": 38160, "loss": 0.3335, "lr": 1.7412282707708062e-07, "epoch": 19.32389937106918, "percentage": 96.62, "elapsed_time": "1:44:12", "remaining_time": "0:03:38", "throughput": 3857.54, "total_tokens": 24117472}
|
| 7394 |
+
{"current_steps": 36875, "total_steps": 38160, "loss": 0.3743, "lr": 1.7277824778235974e-07, "epoch": 19.32651991614256, "percentage": 96.63, "elapsed_time": "1:44:12", "remaining_time": "0:03:37", "throughput": 3857.53, "total_tokens": 24120576}
|
| 7395 |
+
{"current_steps": 36880, "total_steps": 38160, "loss": 0.3992, "lr": 1.714388620591867e-07, "epoch": 19.329140461215932, "percentage": 96.65, "elapsed_time": "1:44:13", "remaining_time": "0:03:37", "throughput": 3857.47, "total_tokens": 24123072}
|
| 7396 |
+
{"current_steps": 36885, "total_steps": 38160, "loss": 0.3997, "lr": 1.701046701877429e-07, "epoch": 19.33176100628931, "percentage": 96.66, "elapsed_time": "1:44:14", "remaining_time": "0:03:36", "throughput": 3857.43, "total_tokens": 24125952}
|
| 7397 |
+
{"current_steps": 36890, "total_steps": 38160, "loss": 0.4973, "lr": 1.6877567244713011e-07, "epoch": 19.334381551362682, "percentage": 96.67, "elapsed_time": "1:44:15", "remaining_time": "0:03:35", "throughput": 3857.43, "total_tokens": 24128896}
|
| 7398 |
+
{"current_steps": 36895, "total_steps": 38160, "loss": 0.3958, "lr": 1.6745186911535372e-07, "epoch": 19.33700209643606, "percentage": 96.69, "elapsed_time": "1:44:15", "remaining_time": "0:03:34", "throughput": 3857.44, "total_tokens": 24131936}
|
| 7399 |
+
{"current_steps": 36900, "total_steps": 38160, "loss": 0.5364, "lr": 1.6613326046934497e-07, "epoch": 19.339622641509433, "percentage": 96.7, "elapsed_time": "1:44:16", "remaining_time": "0:03:33", "throughput": 3857.43, "total_tokens": 24134912}
|
| 7400 |
+
{"current_steps": 36905, "total_steps": 38160, "loss": 0.4722, "lr": 1.64819846784936e-07, "epoch": 19.34224318658281, "percentage": 96.71, "elapsed_time": "1:44:17", "remaining_time": "0:03:32", "throughput": 3857.33, "total_tokens": 24137312}
|
| 7401 |
+
{"current_steps": 36910, "total_steps": 38160, "loss": 0.3753, "lr": 1.6351162833688204e-07, "epoch": 19.344863731656183, "percentage": 96.72, "elapsed_time": "1:44:18", "remaining_time": "0:03:31", "throughput": 3857.39, "total_tokens": 24141024}
|
| 7402 |
+
{"current_steps": 36915, "total_steps": 38160, "loss": 0.3245, "lr": 1.6220860539884475e-07, "epoch": 19.34748427672956, "percentage": 96.74, "elapsed_time": "1:44:19", "remaining_time": "0:03:31", "throughput": 3857.41, "total_tokens": 24144512}
|
| 7403 |
+
{"current_steps": 36920, "total_steps": 38160, "loss": 0.3536, "lr": 1.6091077824340327e-07, "epoch": 19.350104821802937, "percentage": 96.75, "elapsed_time": "1:44:20", "remaining_time": "0:03:30", "throughput": 3857.41, "total_tokens": 24147392}
|
| 7404 |
+
{"current_steps": 36925, "total_steps": 38160, "loss": 0.464, "lr": 1.5961814714204605e-07, "epoch": 19.35272536687631, "percentage": 96.76, "elapsed_time": "1:44:20", "remaining_time": "0:03:29", "throughput": 3857.36, "total_tokens": 24149824}
|
| 7405 |
+
{"current_steps": 36930, "total_steps": 38160, "loss": 0.4991, "lr": 1.5833071236518172e-07, "epoch": 19.355345911949687, "percentage": 96.78, "elapsed_time": "1:44:21", "remaining_time": "0:03:28", "throughput": 3857.33, "total_tokens": 24152768}
|
| 7406 |
+
{"current_steps": 36935, "total_steps": 38160, "loss": 0.4341, "lr": 1.5704847418212266e-07, "epoch": 19.35796645702306, "percentage": 96.79, "elapsed_time": "1:44:22", "remaining_time": "0:03:27", "throughput": 3857.41, "total_tokens": 24156480}
|
| 7407 |
+
{"current_steps": 36940, "total_steps": 38160, "loss": 0.4639, "lr": 1.5577143286109875e-07, "epoch": 19.360587002096437, "percentage": 96.8, "elapsed_time": "1:44:23", "remaining_time": "0:03:26", "throughput": 3857.38, "total_tokens": 24159136}
|
| 7408 |
+
{"current_steps": 36945, "total_steps": 38160, "loss": 0.3521, "lr": 1.5449958866925185e-07, "epoch": 19.36320754716981, "percentage": 96.82, "elapsed_time": "1:44:23", "remaining_time": "0:03:26", "throughput": 3857.5, "total_tokens": 24163232}
|
| 7409 |
+
{"current_steps": 36950, "total_steps": 38160, "loss": 0.405, "lr": 1.532329418726386e-07, "epoch": 19.365828092243188, "percentage": 96.83, "elapsed_time": "1:44:24", "remaining_time": "0:03:25", "throughput": 3857.55, "total_tokens": 24166624}
|
| 7410 |
+
{"current_steps": 36955, "total_steps": 38160, "loss": 0.4153, "lr": 1.5197149273623036e-07, "epoch": 19.36844863731656, "percentage": 96.84, "elapsed_time": "1:44:25", "remaining_time": "0:03:24", "throughput": 3857.55, "total_tokens": 24169440}
|
| 7411 |
+
{"current_steps": 36960, "total_steps": 38160, "loss": 0.3208, "lr": 1.5071524152389936e-07, "epoch": 19.371069182389938, "percentage": 96.86, "elapsed_time": "1:44:26", "remaining_time": "0:03:23", "throughput": 3857.59, "total_tokens": 24172960}
|
| 7412 |
+
{"current_steps": 36965, "total_steps": 38160, "loss": 0.3425, "lr": 1.4946418849844934e-07, "epoch": 19.37368972746331, "percentage": 96.87, "elapsed_time": "1:44:27", "remaining_time": "0:03:22", "throughput": 3857.66, "total_tokens": 24177152}
|
| 7413 |
+
{"current_steps": 36970, "total_steps": 38160, "loss": 0.5224, "lr": 1.4821833392157923e-07, "epoch": 19.37631027253669, "percentage": 96.88, "elapsed_time": "1:44:28", "remaining_time": "0:03:21", "throughput": 3857.72, "total_tokens": 24180672}
|
| 7414 |
+
{"current_steps": 36975, "total_steps": 38160, "loss": 0.4383, "lr": 1.4697767805390838e-07, "epoch": 19.378930817610062, "percentage": 96.89, "elapsed_time": "1:44:28", "remaining_time": "0:03:20", "throughput": 3857.81, "total_tokens": 24184256}
|
| 7415 |
+
{"current_steps": 36980, "total_steps": 38160, "loss": 0.3218, "lr": 1.4574222115497084e-07, "epoch": 19.38155136268344, "percentage": 96.91, "elapsed_time": "1:44:30", "remaining_time": "0:03:20", "throughput": 3857.89, "total_tokens": 24189568}
|
| 7416 |
+
{"current_steps": 36985, "total_steps": 38160, "loss": 0.443, "lr": 1.4451196348320706e-07, "epoch": 19.384171907756812, "percentage": 96.92, "elapsed_time": "1:44:30", "remaining_time": "0:03:19", "throughput": 3857.86, "total_tokens": 24192288}
|
| 7417 |
+
{"current_steps": 36990, "total_steps": 38160, "loss": 0.4322, "lr": 1.4328690529597232e-07, "epoch": 19.38679245283019, "percentage": 96.93, "elapsed_time": "1:44:31", "remaining_time": "0:03:18", "throughput": 3857.9, "total_tokens": 24195584}
|
| 7418 |
+
{"current_steps": 36995, "total_steps": 38160, "loss": 0.4421, "lr": 1.4206704684953943e-07, "epoch": 19.389412997903563, "percentage": 96.95, "elapsed_time": "1:44:32", "remaining_time": "0:03:17", "throughput": 3857.83, "total_tokens": 24198048}
|
| 7419 |
+
{"current_steps": 37000, "total_steps": 38160, "loss": 0.3641, "lr": 1.4085238839908476e-07, "epoch": 19.39203354297694, "percentage": 96.96, "elapsed_time": "1:44:33", "remaining_time": "0:03:16", "throughput": 3857.83, "total_tokens": 24200896}
|
| 7420 |
+
{"current_steps": 37005, "total_steps": 38160, "loss": 0.4058, "lr": 1.3964293019870235e-07, "epoch": 19.394654088050313, "percentage": 96.97, "elapsed_time": "1:44:33", "remaining_time": "0:03:15", "throughput": 3857.87, "total_tokens": 24203968}
|
| 7421 |
+
{"current_steps": 37010, "total_steps": 38160, "loss": 0.3643, "lr": 1.3843867250139807e-07, "epoch": 19.39727463312369, "percentage": 96.99, "elapsed_time": "1:44:34", "remaining_time": "0:03:14", "throughput": 3857.92, "total_tokens": 24207712}
|
| 7422 |
+
{"current_steps": 37015, "total_steps": 38160, "loss": 0.4022, "lr": 1.3723961555908715e-07, "epoch": 19.399895178197063, "percentage": 97.0, "elapsed_time": "1:44:35", "remaining_time": "0:03:14", "throughput": 3857.94, "total_tokens": 24211040}
|
| 7423 |
+
{"current_steps": 37020, "total_steps": 38160, "loss": 0.442, "lr": 1.3604575962259947e-07, "epoch": 19.40251572327044, "percentage": 97.01, "elapsed_time": "1:44:36", "remaining_time": "0:03:13", "throughput": 3857.96, "total_tokens": 24214592}
|
| 7424 |
+
{"current_steps": 37025, "total_steps": 38160, "loss": 0.4486, "lr": 1.3485710494167692e-07, "epoch": 19.405136268343817, "percentage": 97.03, "elapsed_time": "1:44:37", "remaining_time": "0:03:12", "throughput": 3857.99, "total_tokens": 24217952}
|
| 7425 |
+
{"current_steps": 37030, "total_steps": 38160, "loss": 0.4513, "lr": 1.3367365176496783e-07, "epoch": 19.40775681341719, "percentage": 97.04, "elapsed_time": "1:44:38", "remaining_time": "0:03:11", "throughput": 3858.06, "total_tokens": 24221344}
|
| 7426 |
+
{"current_steps": 37035, "total_steps": 38160, "loss": 0.6703, "lr": 1.324954003400436e-07, "epoch": 19.410377358490567, "percentage": 97.05, "elapsed_time": "1:44:38", "remaining_time": "0:03:10", "throughput": 3858.1, "total_tokens": 24224640}
|
| 7427 |
+
{"current_steps": 37040, "total_steps": 38160, "loss": 0.4939, "lr": 1.313223509133793e-07, "epoch": 19.41299790356394, "percentage": 97.06, "elapsed_time": "1:44:39", "remaining_time": "0:03:09", "throughput": 3858.08, "total_tokens": 24227424}
|
| 7428 |
+
{"current_steps": 37045, "total_steps": 38160, "loss": 0.4171, "lr": 1.301545037303592e-07, "epoch": 19.415618448637318, "percentage": 97.08, "elapsed_time": "1:44:40", "remaining_time": "0:03:09", "throughput": 3858.15, "total_tokens": 24231424}
|
| 7429 |
+
{"current_steps": 37050, "total_steps": 38160, "loss": 0.5492, "lr": 1.2899185903529066e-07, "epoch": 19.41823899371069, "percentage": 97.09, "elapsed_time": "1:44:41", "remaining_time": "0:03:08", "throughput": 3858.11, "total_tokens": 24234176}
|
| 7430 |
+
{"current_steps": 37055, "total_steps": 38160, "loss": 0.4597, "lr": 1.278344170713791e-07, "epoch": 19.420859538784068, "percentage": 97.1, "elapsed_time": "1:44:42", "remaining_time": "0:03:07", "throughput": 3858.08, "total_tokens": 24236992}
|
| 7431 |
+
{"current_steps": 37060, "total_steps": 38160, "loss": 0.3967, "lr": 1.2668217808075033e-07, "epoch": 19.42348008385744, "percentage": 97.12, "elapsed_time": "1:44:42", "remaining_time": "0:03:06", "throughput": 3858.06, "total_tokens": 24239936}
|
| 7432 |
+
{"current_steps": 37065, "total_steps": 38160, "loss": 0.4004, "lr": 1.255351423044393e-07, "epoch": 19.42610062893082, "percentage": 97.13, "elapsed_time": "1:44:43", "remaining_time": "0:03:05", "throughput": 3858.08, "total_tokens": 24242976}
|
| 7433 |
+
{"current_steps": 37070, "total_steps": 38160, "loss": 0.3966, "lr": 1.2439330998239572e-07, "epoch": 19.428721174004192, "percentage": 97.14, "elapsed_time": "1:44:44", "remaining_time": "0:03:04", "throughput": 3858.03, "total_tokens": 24245504}
|
| 7434 |
+
{"current_steps": 37075, "total_steps": 38160, "loss": 0.5393, "lr": 1.2325668135347578e-07, "epoch": 19.43134171907757, "percentage": 97.16, "elapsed_time": "1:44:45", "remaining_time": "0:03:03", "throughput": 3858.09, "total_tokens": 24248704}
|
| 7435 |
+
{"current_steps": 37080, "total_steps": 38160, "loss": 0.4592, "lr": 1.221252566554476e-07, "epoch": 19.433962264150942, "percentage": 97.17, "elapsed_time": "1:44:46", "remaining_time": "0:03:03", "throughput": 3858.14, "total_tokens": 24252288}
|
| 7436 |
+
{"current_steps": 37085, "total_steps": 38160, "loss": 0.4229, "lr": 1.2099903612499685e-07, "epoch": 19.43658280922432, "percentage": 97.18, "elapsed_time": "1:44:46", "remaining_time": "0:03:02", "throughput": 3858.23, "total_tokens": 24255776}
|
| 7437 |
+
{"current_steps": 37090, "total_steps": 38160, "loss": 0.4564, "lr": 1.198780199977101e-07, "epoch": 19.439203354297693, "percentage": 97.2, "elapsed_time": "1:44:47", "remaining_time": "0:03:01", "throughput": 3858.19, "total_tokens": 24258624}
|
| 7438 |
+
{"current_steps": 37095, "total_steps": 38160, "loss": 0.4707, "lr": 1.1876220850809416e-07, "epoch": 19.44182389937107, "percentage": 97.21, "elapsed_time": "1:44:48", "remaining_time": "0:03:00", "throughput": 3858.15, "total_tokens": 24261120}
|
| 7439 |
+
{"current_steps": 37100, "total_steps": 38160, "loss": 0.3749, "lr": 1.1765160188956514e-07, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "1:44:49", "remaining_time": "0:02:59", "throughput": 3858.15, "total_tokens": 24264032}
|
| 7440 |
+
{"current_steps": 37105, "total_steps": 38160, "loss": 0.2814, "lr": 1.1654620037445107e-07, "epoch": 19.44706498951782, "percentage": 97.24, "elapsed_time": "1:44:49", "remaining_time": "0:02:58", "throughput": 3858.21, "total_tokens": 24267424}
|
| 7441 |
+
{"current_steps": 37110, "total_steps": 38160, "loss": 0.5264, "lr": 1.1544600419398088e-07, "epoch": 19.449685534591197, "percentage": 97.25, "elapsed_time": "1:44:50", "remaining_time": "0:02:57", "throughput": 3858.26, "total_tokens": 24270976}
|
| 7442 |
+
{"current_steps": 37115, "total_steps": 38160, "loss": 0.4682, "lr": 1.1435101357831213e-07, "epoch": 19.45230607966457, "percentage": 97.26, "elapsed_time": "1:44:51", "remaining_time": "0:02:57", "throughput": 3858.23, "total_tokens": 24273856}
|
| 7443 |
+
{"current_steps": 37120, "total_steps": 38160, "loss": 0.4744, "lr": 1.1326122875650047e-07, "epoch": 19.454926624737947, "percentage": 97.27, "elapsed_time": "1:44:52", "remaining_time": "0:02:56", "throughput": 3858.27, "total_tokens": 24277536}
|
| 7444 |
+
{"current_steps": 37125, "total_steps": 38160, "loss": 0.4871, "lr": 1.1217664995651633e-07, "epoch": 19.45754716981132, "percentage": 97.29, "elapsed_time": "1:44:53", "remaining_time": "0:02:55", "throughput": 3858.38, "total_tokens": 24283520}
|
| 7445 |
+
{"current_steps": 37130, "total_steps": 38160, "loss": 0.4595, "lr": 1.1109727740523935e-07, "epoch": 19.460167714884697, "percentage": 97.3, "elapsed_time": "1:44:54", "remaining_time": "0:02:54", "throughput": 3858.39, "total_tokens": 24286592}
|
| 7446 |
+
{"current_steps": 37135, "total_steps": 38160, "loss": 0.4411, "lr": 1.1002311132846944e-07, "epoch": 19.46278825995807, "percentage": 97.31, "elapsed_time": "1:44:55", "remaining_time": "0:02:53", "throughput": 3858.49, "total_tokens": 24290624}
|
| 7447 |
+
{"current_steps": 37140, "total_steps": 38160, "loss": 0.4525, "lr": 1.0895415195090186e-07, "epoch": 19.465408805031448, "percentage": 97.33, "elapsed_time": "1:44:56", "remaining_time": "0:02:52", "throughput": 3858.49, "total_tokens": 24293568}
|
| 7448 |
+
{"current_steps": 37145, "total_steps": 38160, "loss": 0.5063, "lr": 1.0789039949615221e-07, "epoch": 19.46802935010482, "percentage": 97.34, "elapsed_time": "1:44:56", "remaining_time": "0:02:52", "throughput": 3858.46, "total_tokens": 24296288}
|
| 7449 |
+
{"current_steps": 37150, "total_steps": 38160, "loss": 0.4339, "lr": 1.0683185418674801e-07, "epoch": 19.470649895178198, "percentage": 97.35, "elapsed_time": "1:44:57", "remaining_time": "0:02:51", "throughput": 3858.37, "total_tokens": 24298592}
|
| 7450 |
+
{"current_steps": 37155, "total_steps": 38160, "loss": 0.421, "lr": 1.0577851624412604e-07, "epoch": 19.47327044025157, "percentage": 97.37, "elapsed_time": "1:44:58", "remaining_time": "0:02:50", "throughput": 3858.4, "total_tokens": 24301856}
|
| 7451 |
+
{"current_steps": 37160, "total_steps": 38160, "loss": 0.3413, "lr": 1.0473038588862672e-07, "epoch": 19.47589098532495, "percentage": 97.38, "elapsed_time": "1:44:59", "remaining_time": "0:02:49", "throughput": 3858.41, "total_tokens": 24304832}
|
| 7452 |
+
{"current_steps": 37165, "total_steps": 38160, "loss": 0.4129, "lr": 1.0368746333951351e-07, "epoch": 19.478511530398322, "percentage": 97.39, "elapsed_time": "1:45:00", "remaining_time": "0:02:48", "throughput": 3858.43, "total_tokens": 24308480}
|
| 7453 |
+
{"current_steps": 37170, "total_steps": 38160, "loss": 0.4514, "lr": 1.0264974881494805e-07, "epoch": 19.4811320754717, "percentage": 97.41, "elapsed_time": "1:45:00", "remaining_time": "0:02:47", "throughput": 3858.45, "total_tokens": 24311552}
|
| 7454 |
+
{"current_steps": 37175, "total_steps": 38160, "loss": 0.5717, "lr": 1.0161724253201222e-07, "epoch": 19.483752620545072, "percentage": 97.42, "elapsed_time": "1:45:01", "remaining_time": "0:02:46", "throughput": 3858.43, "total_tokens": 24314336}
|
| 7455 |
+
{"current_steps": 37180, "total_steps": 38160, "loss": 0.5203, "lr": 1.0058994470669436e-07, "epoch": 19.48637316561845, "percentage": 97.43, "elapsed_time": "1:45:02", "remaining_time": "0:02:46", "throughput": 3858.42, "total_tokens": 24317184}
|
| 7456 |
+
{"current_steps": 37185, "total_steps": 38160, "loss": 0.4698, "lr": 9.956785555388926e-08, "epoch": 19.488993710691823, "percentage": 97.44, "elapsed_time": "1:45:03", "remaining_time": "0:02:45", "throughput": 3858.29, "total_tokens": 24320544}
|
| 7457 |
+
{"current_steps": 37190, "total_steps": 38160, "loss": 0.4878, "lr": 9.855097528741475e-08, "epoch": 19.4916142557652, "percentage": 97.46, "elapsed_time": "1:45:04", "remaining_time": "0:02:44", "throughput": 3858.37, "total_tokens": 24324192}
|
| 7458 |
+
{"current_steps": 37195, "total_steps": 38160, "loss": 0.423, "lr": 9.753930411998402e-08, "epoch": 19.494234800838573, "percentage": 97.47, "elapsed_time": "1:45:05", "remaining_time": "0:02:43", "throughput": 3858.46, "total_tokens": 24328000}
|
| 7459 |
+
{"current_steps": 37200, "total_steps": 38160, "loss": 0.5097, "lr": 9.653284226322501e-08, "epoch": 19.49685534591195, "percentage": 97.48, "elapsed_time": "1:45:05", "remaining_time": "0:02:42", "throughput": 3858.49, "total_tokens": 24331136}
|
| 7460 |
+
{"current_steps": 37205, "total_steps": 38160, "loss": 0.4127, "lr": 9.553158992768318e-08, "epoch": 19.499475890985323, "percentage": 97.5, "elapsed_time": "1:45:06", "remaining_time": "0:02:41", "throughput": 3858.52, "total_tokens": 24334368}
|
| 7461 |
+
{"current_steps": 37210, "total_steps": 38160, "loss": 0.4585, "lr": 9.453554732281044e-08, "epoch": 19.5020964360587, "percentage": 97.51, "elapsed_time": "1:45:07", "remaining_time": "0:02:41", "throughput": 3858.53, "total_tokens": 24337248}
|
| 7462 |
+
{"current_steps": 37215, "total_steps": 38160, "loss": 0.4618, "lr": 9.354471465696236e-08, "epoch": 19.504716981132077, "percentage": 97.52, "elapsed_time": "1:45:08", "remaining_time": "0:02:40", "throughput": 3858.54, "total_tokens": 24340352}
|
| 7463 |
+
{"current_steps": 37220, "total_steps": 38160, "loss": 0.6399, "lr": 9.255909213741199e-08, "epoch": 19.50733752620545, "percentage": 97.54, "elapsed_time": "1:45:08", "remaining_time": "0:02:39", "throughput": 3858.56, "total_tokens": 24343584}
|
| 7464 |
+
{"current_steps": 37225, "total_steps": 38160, "loss": 0.484, "lr": 9.157867997034164e-08, "epoch": 19.509958071278827, "percentage": 97.55, "elapsed_time": "1:45:09", "remaining_time": "0:02:38", "throughput": 3858.59, "total_tokens": 24346880}
|
| 7465 |
+
{"current_steps": 37230, "total_steps": 38160, "loss": 0.2973, "lr": 9.060347836084004e-08, "epoch": 19.5125786163522, "percentage": 97.56, "elapsed_time": "1:45:10", "remaining_time": "0:02:37", "throughput": 3858.72, "total_tokens": 24351552}
|
| 7466 |
+
{"current_steps": 37235, "total_steps": 38160, "loss": 0.4056, "lr": 8.96334875129079e-08, "epoch": 19.515199161425578, "percentage": 97.58, "elapsed_time": "1:45:11", "remaining_time": "0:02:36", "throughput": 3858.76, "total_tokens": 24355360}
|
| 7467 |
+
{"current_steps": 37240, "total_steps": 38160, "loss": 0.3697, "lr": 8.866870762946067e-08, "epoch": 19.51781970649895, "percentage": 97.59, "elapsed_time": "1:45:12", "remaining_time": "0:02:35", "throughput": 3858.76, "total_tokens": 24358336}
|
| 7468 |
+
{"current_steps": 37245, "total_steps": 38160, "loss": 0.4712, "lr": 8.77091389123147e-08, "epoch": 19.520440251572328, "percentage": 97.6, "elapsed_time": "1:45:13", "remaining_time": "0:02:35", "throughput": 3858.72, "total_tokens": 24360992}
|
| 7469 |
+
{"current_steps": 37250, "total_steps": 38160, "loss": 0.5191, "lr": 8.675478156220385e-08, "epoch": 19.5230607966457, "percentage": 97.62, "elapsed_time": "1:45:13", "remaining_time": "0:02:34", "throughput": 3858.65, "total_tokens": 24363296}
|
| 7470 |
+
{"current_steps": 37255, "total_steps": 38160, "loss": 0.3216, "lr": 8.580563577876566e-08, "epoch": 19.52568134171908, "percentage": 97.63, "elapsed_time": "1:45:14", "remaining_time": "0:02:33", "throughput": 3858.54, "total_tokens": 24365568}
|
| 7471 |
+
{"current_steps": 37260, "total_steps": 38160, "loss": 0.3845, "lr": 8.486170176055242e-08, "epoch": 19.528301886792452, "percentage": 97.64, "elapsed_time": "1:45:15", "remaining_time": "0:02:32", "throughput": 3858.55, "total_tokens": 24368672}
|
| 7472 |
+
{"current_steps": 37265, "total_steps": 38160, "loss": 0.3525, "lr": 8.392297970502282e-08, "epoch": 19.53092243186583, "percentage": 97.65, "elapsed_time": "1:45:16", "remaining_time": "0:02:31", "throughput": 3858.53, "total_tokens": 24371456}
|
| 7473 |
+
{"current_steps": 37270, "total_steps": 38160, "loss": 0.4334, "lr": 8.298946980855315e-08, "epoch": 19.533542976939202, "percentage": 97.67, "elapsed_time": "1:45:17", "remaining_time": "0:02:30", "throughput": 3858.57, "total_tokens": 24375200}
|
| 7474 |
+
{"current_steps": 37275, "total_steps": 38160, "loss": 0.4631, "lr": 8.206117226641497e-08, "epoch": 19.53616352201258, "percentage": 97.68, "elapsed_time": "1:45:17", "remaining_time": "0:02:30", "throughput": 3858.54, "total_tokens": 24377824}
|
| 7475 |
+
{"current_steps": 37280, "total_steps": 38160, "loss": 0.3837, "lr": 8.113808727280015e-08, "epoch": 19.538784067085953, "percentage": 97.69, "elapsed_time": "1:45:18", "remaining_time": "0:02:29", "throughput": 3858.55, "total_tokens": 24381184}
|
| 7476 |
+
{"current_steps": 37285, "total_steps": 38160, "loss": 0.3452, "lr": 8.022021502080979e-08, "epoch": 19.54140461215933, "percentage": 97.71, "elapsed_time": "1:45:19", "remaining_time": "0:02:28", "throughput": 3858.61, "total_tokens": 24384448}
|
| 7477 |
+
{"current_steps": 37290, "total_steps": 38160, "loss": 0.3095, "lr": 7.930755570244863e-08, "epoch": 19.544025157232703, "percentage": 97.72, "elapsed_time": "1:45:20", "remaining_time": "0:02:27", "throughput": 3858.59, "total_tokens": 24387104}
|
| 7478 |
+
{"current_steps": 37295, "total_steps": 38160, "loss": 0.4715, "lr": 7.840010950863619e-08, "epoch": 19.54664570230608, "percentage": 97.73, "elapsed_time": "1:45:20", "remaining_time": "0:02:26", "throughput": 3858.56, "total_tokens": 24389792}
|
| 7479 |
+
{"current_steps": 37300, "total_steps": 38160, "loss": 0.4978, "lr": 7.749787662920116e-08, "epoch": 19.549266247379457, "percentage": 97.75, "elapsed_time": "1:45:21", "remaining_time": "0:02:25", "throughput": 3858.57, "total_tokens": 24393632}
|
| 7480 |
+
{"current_steps": 37305, "total_steps": 38160, "loss": 0.3695, "lr": 7.660085725288146e-08, "epoch": 19.55188679245283, "percentage": 97.76, "elapsed_time": "1:45:22", "remaining_time": "0:02:24", "throughput": 3858.63, "total_tokens": 24397152}
|
| 7481 |
+
{"current_steps": 37310, "total_steps": 38160, "loss": 0.4756, "lr": 7.570905156731867e-08, "epoch": 19.554507337526207, "percentage": 97.77, "elapsed_time": "1:45:23", "remaining_time": "0:02:24", "throughput": 3858.74, "total_tokens": 24401152}
|
| 7482 |
+
{"current_steps": 37315, "total_steps": 38160, "loss": 0.453, "lr": 7.482245975907188e-08, "epoch": 19.55712788259958, "percentage": 97.79, "elapsed_time": "1:45:24", "remaining_time": "0:02:23", "throughput": 3858.78, "total_tokens": 24404896}
|
| 7483 |
+
{"current_steps": 37320, "total_steps": 38160, "loss": 0.4291, "lr": 7.394108201360383e-08, "epoch": 19.559748427672957, "percentage": 97.8, "elapsed_time": "1:45:25", "remaining_time": "0:02:22", "throughput": 3858.78, "total_tokens": 24407712}
|
| 7484 |
+
{"current_steps": 37325, "total_steps": 38160, "loss": 0.4296, "lr": 7.306491851529206e-08, "epoch": 19.56236897274633, "percentage": 97.81, "elapsed_time": "1:45:26", "remaining_time": "0:02:21", "throughput": 3858.84, "total_tokens": 24411232}
|
| 7485 |
+
{"current_steps": 37330, "total_steps": 38160, "loss": 0.3686, "lr": 7.219396944741774e-08, "epoch": 19.564989517819708, "percentage": 97.82, "elapsed_time": "1:45:26", "remaining_time": "0:02:20", "throughput": 3858.85, "total_tokens": 24414432}
|
| 7486 |
+
{"current_steps": 37335, "total_steps": 38160, "loss": 0.4256, "lr": 7.132823499217123e-08, "epoch": 19.56761006289308, "percentage": 97.84, "elapsed_time": "1:45:27", "remaining_time": "0:02:19", "throughput": 3858.86, "total_tokens": 24417600}
|
| 7487 |
+
{"current_steps": 37340, "total_steps": 38160, "loss": 0.3906, "lr": 7.046771533065766e-08, "epoch": 19.570230607966458, "percentage": 97.85, "elapsed_time": "1:45:28", "remaining_time": "0:02:18", "throughput": 3858.86, "total_tokens": 24420416}
|
| 7488 |
+
{"current_steps": 37345, "total_steps": 38160, "loss": 0.4479, "lr": 6.961241064288859e-08, "epoch": 19.57285115303983, "percentage": 97.86, "elapsed_time": "1:45:29", "remaining_time": "0:02:18", "throughput": 3858.93, "total_tokens": 24424000}
|
| 7489 |
+
{"current_steps": 37350, "total_steps": 38160, "loss": 0.3888, "lr": 6.876232110778202e-08, "epoch": 19.57547169811321, "percentage": 97.88, "elapsed_time": "1:45:30", "remaining_time": "0:02:17", "throughput": 3859.0, "total_tokens": 24427552}
|
| 7490 |
+
{"current_steps": 37355, "total_steps": 38160, "loss": 0.4454, "lr": 6.791744690316515e-08, "epoch": 19.578092243186582, "percentage": 97.89, "elapsed_time": "1:45:30", "remaining_time": "0:02:16", "throughput": 3858.97, "total_tokens": 24430176}
|
| 7491 |
+
{"current_steps": 37360, "total_steps": 38160, "loss": 0.4247, "lr": 6.707778820577715e-08, "epoch": 19.58071278825996, "percentage": 97.9, "elapsed_time": "1:45:31", "remaining_time": "0:02:15", "throughput": 3858.98, "total_tokens": 24433312}
|
| 7492 |
+
{"current_steps": 37365, "total_steps": 38160, "loss": 0.3052, "lr": 6.624334519126919e-08, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "1:45:32", "remaining_time": "0:02:14", "throughput": 3859.03, "total_tokens": 24436608}
|
| 7493 |
+
{"current_steps": 37370, "total_steps": 38160, "loss": 0.5175, "lr": 6.541411803419329e-08, "epoch": 19.58595387840671, "percentage": 97.93, "elapsed_time": "1:45:33", "remaining_time": "0:02:13", "throughput": 3858.99, "total_tokens": 24439136}
|
| 7494 |
+
{"current_steps": 37375, "total_steps": 38160, "loss": 0.3319, "lr": 6.459010690801626e-08, "epoch": 19.588574423480082, "percentage": 97.94, "elapsed_time": "1:45:33", "remaining_time": "0:02:13", "throughput": 3859.0, "total_tokens": 24442880}
|
| 7495 |
+
{"current_steps": 37380, "total_steps": 38160, "loss": 0.3913, "lr": 6.377131198510855e-08, "epoch": 19.59119496855346, "percentage": 97.96, "elapsed_time": "1:45:34", "remaining_time": "0:02:12", "throughput": 3859.04, "total_tokens": 24446240}
|
| 7496 |
+
{"current_steps": 37385, "total_steps": 38160, "loss": 0.5192, "lr": 6.295773343675537e-08, "epoch": 19.593815513626833, "percentage": 97.97, "elapsed_time": "1:45:35", "remaining_time": "0:02:11", "throughput": 3859.06, "total_tokens": 24449376}
|
| 7497 |
+
{"current_steps": 37390, "total_steps": 38160, "loss": 0.5417, "lr": 6.214937143314837e-08, "epoch": 19.59643605870021, "percentage": 97.98, "elapsed_time": "1:45:36", "remaining_time": "0:02:10", "throughput": 3859.08, "total_tokens": 24452608}
|
| 7498 |
+
{"current_steps": 37395, "total_steps": 38160, "loss": 0.5083, "lr": 6.134622614338559e-08, "epoch": 19.599056603773583, "percentage": 98.0, "elapsed_time": "1:45:37", "remaining_time": "0:02:09", "throughput": 3859.01, "total_tokens": 24455008}
|
| 7499 |
+
{"current_steps": 37400, "total_steps": 38160, "loss": 0.5119, "lr": 6.05482977354771e-08, "epoch": 19.60167714884696, "percentage": 98.01, "elapsed_time": "1:45:38", "remaining_time": "0:02:08", "throughput": 3859.13, "total_tokens": 24459488}
|
| 7500 |
+
{"current_steps": 37405, "total_steps": 38160, "loss": 0.5157, "lr": 5.975558637634215e-08, "epoch": 19.604297693920337, "percentage": 98.02, "elapsed_time": "1:45:39", "remaining_time": "0:02:07", "throughput": 3859.14, "total_tokens": 24463136}
|
| 7501 |
+
{"current_steps": 37410, "total_steps": 38160, "loss": 0.4454, "lr": 5.896809223180366e-08, "epoch": 19.60691823899371, "percentage": 98.03, "elapsed_time": "1:45:39", "remaining_time": "0:02:07", "throughput": 3859.15, "total_tokens": 24466240}
|
| 7502 |
+
{"current_steps": 37415, "total_steps": 38160, "loss": 0.425, "lr": 5.8185815466596516e-08, "epoch": 19.609538784067087, "percentage": 98.05, "elapsed_time": "1:45:40", "remaining_time": "0:02:06", "throughput": 3859.14, "total_tokens": 24469344}
|
| 7503 |
+
{"current_steps": 37420, "total_steps": 38160, "loss": 0.4923, "lr": 5.7408756244370365e-08, "epoch": 19.61215932914046, "percentage": 98.06, "elapsed_time": "1:45:41", "remaining_time": "0:02:05", "throughput": 3859.09, "total_tokens": 24471872}
|
| 7504 |
+
{"current_steps": 37425, "total_steps": 38160, "loss": 0.4626, "lr": 5.663691472766741e-08, "epoch": 19.614779874213838, "percentage": 98.07, "elapsed_time": "1:45:42", "remaining_time": "0:02:04", "throughput": 3859.08, "total_tokens": 24474752}
|
| 7505 |
+
{"current_steps": 37430, "total_steps": 38160, "loss": 0.5325, "lr": 5.587029107795572e-08, "epoch": 19.61740041928721, "percentage": 98.09, "elapsed_time": "1:45:43", "remaining_time": "0:02:03", "throughput": 3859.25, "total_tokens": 24480000}
|
| 7506 |
+
{"current_steps": 37435, "total_steps": 38160, "loss": 0.4392, "lr": 5.510888545559867e-08, "epoch": 19.620020964360588, "percentage": 98.1, "elapsed_time": "1:45:43", "remaining_time": "0:02:02", "throughput": 3859.24, "total_tokens": 24482656}
|
| 7507 |
+
{"current_steps": 37440, "total_steps": 38160, "loss": 0.4486, "lr": 5.435269801987997e-08, "epoch": 19.62264150943396, "percentage": 98.11, "elapsed_time": "1:45:44", "remaining_time": "0:02:02", "throughput": 3859.27, "total_tokens": 24485856}
|
| 7508 |
+
{"current_steps": 37445, "total_steps": 38160, "loss": 0.4782, "lr": 5.360172892897863e-08, "epoch": 19.62526205450734, "percentage": 98.13, "elapsed_time": "1:45:45", "remaining_time": "0:02:01", "throughput": 3859.33, "total_tokens": 24489600}
|
| 7509 |
+
{"current_steps": 37450, "total_steps": 38160, "loss": 0.357, "lr": 5.285597833999401e-08, "epoch": 19.627882599580712, "percentage": 98.14, "elapsed_time": "1:45:46", "remaining_time": "0:02:00", "throughput": 3859.3, "total_tokens": 24492160}
|
| 7510 |
+
{"current_steps": 37455, "total_steps": 38160, "loss": 0.435, "lr": 5.2115446408926295e-08, "epoch": 19.63050314465409, "percentage": 98.15, "elapsed_time": "1:45:47", "remaining_time": "0:01:59", "throughput": 3859.29, "total_tokens": 24495200}
|
| 7511 |
+
{"current_steps": 37460, "total_steps": 38160, "loss": 0.3938, "lr": 5.138013329068769e-08, "epoch": 19.633123689727462, "percentage": 98.17, "elapsed_time": "1:45:47", "remaining_time": "0:01:58", "throughput": 3859.3, "total_tokens": 24498624}
|
| 7512 |
+
{"current_steps": 37465, "total_steps": 38160, "loss": 0.4888, "lr": 5.065003913909405e-08, "epoch": 19.63574423480084, "percentage": 98.18, "elapsed_time": "1:45:48", "remaining_time": "0:01:57", "throughput": 3859.32, "total_tokens": 24501760}
|
| 7513 |
+
{"current_steps": 37470, "total_steps": 38160, "loss": 0.4499, "lr": 4.9925164106875975e-08, "epoch": 19.638364779874212, "percentage": 98.19, "elapsed_time": "1:45:49", "remaining_time": "0:01:56", "throughput": 3859.46, "total_tokens": 24506560}
|
| 7514 |
+
{"current_steps": 37475, "total_steps": 38160, "loss": 0.4016, "lr": 4.9205508345670506e-08, "epoch": 19.64098532494759, "percentage": 98.2, "elapsed_time": "1:45:50", "remaining_time": "0:01:56", "throughput": 3859.46, "total_tokens": 24509472}
|
| 7515 |
+
{"current_steps": 37480, "total_steps": 38160, "loss": 0.3965, "lr": 4.8491072006015546e-08, "epoch": 19.643605870020963, "percentage": 98.22, "elapsed_time": "1:45:51", "remaining_time": "0:01:55", "throughput": 3859.49, "total_tokens": 24512608}
|
| 7516 |
+
{"current_steps": 37485, "total_steps": 38160, "loss": 0.4034, "lr": 4.778185523736933e-08, "epoch": 19.64622641509434, "percentage": 98.23, "elapsed_time": "1:45:52", "remaining_time": "0:01:54", "throughput": 3859.56, "total_tokens": 24516064}
|
| 7517 |
+
{"current_steps": 37490, "total_steps": 38160, "loss": 0.3198, "lr": 4.7077858188085414e-08, "epoch": 19.648846960167717, "percentage": 98.24, "elapsed_time": "1:45:52", "remaining_time": "0:01:53", "throughput": 3859.63, "total_tokens": 24519744}
|
| 7518 |
+
{"current_steps": 37495, "total_steps": 38160, "loss": 0.4499, "lr": 4.6379081005437644e-08, "epoch": 19.65146750524109, "percentage": 98.26, "elapsed_time": "1:45:53", "remaining_time": "0:01:52", "throughput": 3859.69, "total_tokens": 24523072}
|
| 7519 |
+
{"current_steps": 37500, "total_steps": 38160, "loss": 0.3766, "lr": 4.568552383560076e-08, "epoch": 19.654088050314467, "percentage": 98.27, "elapsed_time": "1:45:54", "remaining_time": "0:01:51", "throughput": 3859.76, "total_tokens": 24526752}
|
| 7520 |
+
{"current_steps": 37505, "total_steps": 38160, "loss": 0.4869, "lr": 4.4997186823655945e-08, "epoch": 19.65670859538784, "percentage": 98.28, "elapsed_time": "1:45:55", "remaining_time": "0:01:50", "throughput": 3859.71, "total_tokens": 24529184}
|
| 7521 |
+
{"current_steps": 37510, "total_steps": 38160, "loss": 0.4611, "lr": 4.431407011360189e-08, "epoch": 19.659329140461217, "percentage": 98.3, "elapsed_time": "1:45:55", "remaining_time": "0:01:50", "throughput": 3859.77, "total_tokens": 24532576}
|
| 7522 |
+
{"current_steps": 37515, "total_steps": 38160, "loss": 0.349, "lr": 4.3636173848332627e-08, "epoch": 19.66194968553459, "percentage": 98.31, "elapsed_time": "1:45:56", "remaining_time": "0:01:49", "throughput": 3859.76, "total_tokens": 24535648}
|
| 7523 |
+
{"current_steps": 37520, "total_steps": 38160, "loss": 0.3963, "lr": 4.2963498169656946e-08, "epoch": 19.664570230607968, "percentage": 98.32, "elapsed_time": "1:45:57", "remaining_time": "0:01:48", "throughput": 3859.8, "total_tokens": 24539136}
|
| 7524 |
+
{"current_steps": 37525, "total_steps": 38160, "loss": 0.4328, "lr": 4.229604321829561e-08, "epoch": 19.66719077568134, "percentage": 98.34, "elapsed_time": "1:45:58", "remaining_time": "0:01:47", "throughput": 3859.88, "total_tokens": 24543008}
|
| 7525 |
+
{"current_steps": 37530, "total_steps": 38160, "loss": 0.4128, "lr": 4.16338091338675e-08, "epoch": 19.669811320754718, "percentage": 98.35, "elapsed_time": "1:45:59", "remaining_time": "0:01:46", "throughput": 3860.04, "total_tokens": 24549408}
|
| 7526 |
+
{"current_steps": 37535, "total_steps": 38160, "loss": 0.3934, "lr": 4.097679605490623e-08, "epoch": 19.67243186582809, "percentage": 98.36, "elapsed_time": "1:46:00", "remaining_time": "0:01:45", "throughput": 3860.09, "total_tokens": 24552960}
|
| 7527 |
+
{"current_steps": 37540, "total_steps": 38160, "loss": 0.4795, "lr": 4.032500411885465e-08, "epoch": 19.67505241090147, "percentage": 98.38, "elapsed_time": "1:46:01", "remaining_time": "0:01:45", "throughput": 3860.05, "total_tokens": 24555648}
|
| 7528 |
+
{"current_steps": 37545, "total_steps": 38160, "loss": 0.4223, "lr": 3.967843346205369e-08, "epoch": 19.677672955974842, "percentage": 98.39, "elapsed_time": "1:46:02", "remaining_time": "0:01:44", "throughput": 3860.08, "total_tokens": 24558976}
|
| 7529 |
+
{"current_steps": 37550, "total_steps": 38160, "loss": 0.4861, "lr": 3.903708421976182e-08, "epoch": 19.68029350104822, "percentage": 98.4, "elapsed_time": "1:46:03", "remaining_time": "0:01:43", "throughput": 3860.12, "total_tokens": 24562560}
|
| 7530 |
+
{"current_steps": 37555, "total_steps": 38160, "loss": 0.5609, "lr": 3.840095652614395e-08, "epoch": 19.682914046121592, "percentage": 98.41, "elapsed_time": "1:46:03", "remaining_time": "0:01:42", "throughput": 3860.14, "total_tokens": 24565664}
|
| 7531 |
+
{"current_steps": 37560, "total_steps": 38160, "loss": 0.3807, "lr": 3.777005051426863e-08, "epoch": 19.68553459119497, "percentage": 98.43, "elapsed_time": "1:46:04", "remaining_time": "0:01:41", "throughput": 3860.21, "total_tokens": 24569120}
|
| 7532 |
+
{"current_steps": 37565, "total_steps": 38160, "loss": 0.3511, "lr": 3.71443663161164e-08, "epoch": 19.688155136268342, "percentage": 98.44, "elapsed_time": "1:46:05", "remaining_time": "0:01:40", "throughput": 3860.24, "total_tokens": 24572320}
|
| 7533 |
+
{"current_steps": 37570, "total_steps": 38160, "loss": 0.503, "lr": 3.652390406256867e-08, "epoch": 19.69077568134172, "percentage": 98.45, "elapsed_time": "1:46:06", "remaining_time": "0:01:39", "throughput": 3860.27, "total_tokens": 24575808}
|
| 7534 |
+
{"current_steps": 37575, "total_steps": 38160, "loss": 0.3868, "lr": 3.5908663883421624e-08, "epoch": 19.693396226415093, "percentage": 98.47, "elapsed_time": "1:46:07", "remaining_time": "0:01:39", "throughput": 3860.31, "total_tokens": 24579072}
|
| 7535 |
+
{"current_steps": 37580, "total_steps": 38160, "loss": 0.3664, "lr": 3.529864590737786e-08, "epoch": 19.69601677148847, "percentage": 98.48, "elapsed_time": "1:46:07", "remaining_time": "0:01:38", "throughput": 3860.21, "total_tokens": 24581536}
|
| 7536 |
+
{"current_steps": 37585, "total_steps": 38160, "loss": 0.347, "lr": 3.4693850262046415e-08, "epoch": 19.698637316561843, "percentage": 98.49, "elapsed_time": "1:46:08", "remaining_time": "0:01:37", "throughput": 3860.26, "total_tokens": 24585472}
|
| 7537 |
+
{"current_steps": 37590, "total_steps": 38160, "loss": 0.6014, "lr": 3.409427707394275e-08, "epoch": 19.70125786163522, "percentage": 98.51, "elapsed_time": "1:46:09", "remaining_time": "0:01:36", "throughput": 3860.26, "total_tokens": 24588320}
|
| 7538 |
+
{"current_steps": 37595, "total_steps": 38160, "loss": 0.3378, "lr": 3.349992646848599e-08, "epoch": 19.703878406708597, "percentage": 98.52, "elapsed_time": "1:46:10", "remaining_time": "0:01:35", "throughput": 3860.27, "total_tokens": 24591328}
|
| 7539 |
+
{"current_steps": 37600, "total_steps": 38160, "loss": 0.4491, "lr": 3.291079857001556e-08, "epoch": 19.70649895178197, "percentage": 98.53, "elapsed_time": "1:46:11", "remaining_time": "0:01:34", "throughput": 3860.34, "total_tokens": 24594976}
|
| 7540 |
+
{"current_steps": 37605, "total_steps": 38160, "loss": 0.3862, "lr": 3.2326893501763436e-08, "epoch": 19.709119496855347, "percentage": 98.55, "elapsed_time": "1:46:11", "remaining_time": "0:01:34", "throughput": 3860.28, "total_tokens": 24597504}
|
| 7541 |
+
{"current_steps": 37610, "total_steps": 38160, "loss": 0.4758, "lr": 3.174821138588191e-08, "epoch": 19.71174004192872, "percentage": 98.56, "elapsed_time": "1:46:12", "remaining_time": "0:01:33", "throughput": 3860.23, "total_tokens": 24600128}
|
| 7542 |
+
{"current_steps": 37615, "total_steps": 38160, "loss": 0.3378, "lr": 3.11747523434186e-08, "epoch": 19.714360587002098, "percentage": 98.57, "elapsed_time": "1:46:13", "remaining_time": "0:01:32", "throughput": 3860.3, "total_tokens": 24603648}
|
| 7543 |
+
{"current_steps": 37620, "total_steps": 38160, "loss": 0.5153, "lr": 3.060651649434143e-08, "epoch": 19.71698113207547, "percentage": 98.58, "elapsed_time": "1:46:14", "remaining_time": "0:01:31", "throughput": 3860.32, "total_tokens": 24606688}
|
| 7544 |
+
{"current_steps": 37625, "total_steps": 38160, "loss": 0.4182, "lr": 3.004350395751088e-08, "epoch": 19.719601677148848, "percentage": 98.6, "elapsed_time": "1:46:15", "remaining_time": "0:01:30", "throughput": 3860.42, "total_tokens": 24610688}
|
| 7545 |
+
{"current_steps": 37630, "total_steps": 38160, "loss": 0.3324, "lr": 2.94857148507105e-08, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "1:46:16", "remaining_time": "0:01:29", "throughput": 3860.53, "total_tokens": 24615456}
|
| 7546 |
+
{"current_steps": 37635, "total_steps": 38160, "loss": 0.3822, "lr": 2.893314929061641e-08, "epoch": 19.7248427672956, "percentage": 98.62, "elapsed_time": "1:46:16", "remaining_time": "0:01:28", "throughput": 3860.55, "total_tokens": 24618720}
|
| 7547 |
+
{"current_steps": 37640, "total_steps": 38160, "loss": 0.4018, "lr": 2.8385807392825037e-08, "epoch": 19.72746331236897, "percentage": 98.64, "elapsed_time": "1:46:17", "remaining_time": "0:01:28", "throughput": 3860.49, "total_tokens": 24621184}
|
| 7548 |
+
{"current_steps": 37645, "total_steps": 38160, "loss": 0.6109, "lr": 2.7843689271830897e-08, "epoch": 19.73008385744235, "percentage": 98.65, "elapsed_time": "1:46:18", "remaining_time": "0:01:27", "throughput": 3860.52, "total_tokens": 24624512}
|
| 7549 |
+
{"current_steps": 37650, "total_steps": 38160, "loss": 0.5068, "lr": 2.7306795041040502e-08, "epoch": 19.732704402515722, "percentage": 98.66, "elapsed_time": "1:46:19", "remaining_time": "0:01:26", "throughput": 3860.46, "total_tokens": 24626880}
|
| 7550 |
+
{"current_steps": 37655, "total_steps": 38160, "loss": 0.5578, "lr": 2.677512481276401e-08, "epoch": 19.7353249475891, "percentage": 98.68, "elapsed_time": "1:46:20", "remaining_time": "0:01:25", "throughput": 3860.47, "total_tokens": 24629888}
|
| 7551 |
+
{"current_steps": 37660, "total_steps": 38160, "loss": 0.361, "lr": 2.6248678698220785e-08, "epoch": 19.737945492662472, "percentage": 98.69, "elapsed_time": "1:46:20", "remaining_time": "0:01:24", "throughput": 3860.51, "total_tokens": 24633408}
|
| 7552 |
+
{"current_steps": 37665, "total_steps": 38160, "loss": 0.5313, "lr": 2.5727456807542183e-08, "epoch": 19.74056603773585, "percentage": 98.7, "elapsed_time": "1:46:21", "remaining_time": "0:01:23", "throughput": 3860.46, "total_tokens": 24636096}
|
| 7553 |
+
{"current_steps": 37670, "total_steps": 38160, "loss": 0.4393, "lr": 2.5211459249754875e-08, "epoch": 19.743186582809223, "percentage": 98.72, "elapsed_time": "1:46:22", "remaining_time": "0:01:23", "throughput": 3860.55, "total_tokens": 24639872}
|
| 7554 |
+
{"current_steps": 37675, "total_steps": 38160, "loss": 0.3972, "lr": 2.4700686132803076e-08, "epoch": 19.7458071278826, "percentage": 98.73, "elapsed_time": "1:46:23", "remaining_time": "0:01:22", "throughput": 3860.56, "total_tokens": 24642816}
|
| 7555 |
+
{"current_steps": 37680, "total_steps": 38160, "loss": 0.356, "lr": 2.4195137563534643e-08, "epoch": 19.748427672955973, "percentage": 98.74, "elapsed_time": "1:46:23", "remaining_time": "0:01:21", "throughput": 3860.51, "total_tokens": 24645312}
|
| 7556 |
+
{"current_steps": 37685, "total_steps": 38160, "loss": 0.5479, "lr": 2.369481364770387e-08, "epoch": 19.75104821802935, "percentage": 98.76, "elapsed_time": "1:46:24", "remaining_time": "0:01:20", "throughput": 3860.56, "total_tokens": 24648416}
|
| 7557 |
+
{"current_steps": 37690, "total_steps": 38160, "loss": 0.4444, "lr": 2.3199714489974266e-08, "epoch": 19.753668763102727, "percentage": 98.77, "elapsed_time": "1:46:25", "remaining_time": "0:01:19", "throughput": 3860.61, "total_tokens": 24651808}
|
| 7558 |
+
{"current_steps": 37695, "total_steps": 38160, "loss": 0.3974, "lr": 2.2709840193912978e-08, "epoch": 19.7562893081761, "percentage": 98.78, "elapsed_time": "1:46:26", "remaining_time": "0:01:18", "throughput": 3860.64, "total_tokens": 24655232}
|
| 7559 |
+
{"current_steps": 37700, "total_steps": 38160, "loss": 0.8525, "lr": 2.2225190861999146e-08, "epoch": 19.758909853249477, "percentage": 98.79, "elapsed_time": "1:46:27", "remaining_time": "0:01:17", "throughput": 3860.68, "total_tokens": 24658208}
|
| 7560 |
+
{"current_steps": 37705, "total_steps": 38160, "loss": 0.4997, "lr": 2.1745766595612783e-08, "epoch": 19.76153039832285, "percentage": 98.81, "elapsed_time": "1:46:27", "remaining_time": "0:01:17", "throughput": 3860.75, "total_tokens": 24661536}
|
| 7561 |
+
{"current_steps": 37710, "total_steps": 38160, "loss": 0.4736, "lr": 2.1271567495045886e-08, "epoch": 19.764150943396228, "percentage": 98.82, "elapsed_time": "1:46:28", "remaining_time": "0:01:16", "throughput": 3860.8, "total_tokens": 24664832}
|
| 7562 |
+
{"current_steps": 37715, "total_steps": 38160, "loss": 0.4546, "lr": 2.0802593659491333e-08, "epoch": 19.7667714884696, "percentage": 98.83, "elapsed_time": "1:46:29", "remaining_time": "0:01:15", "throughput": 3860.83, "total_tokens": 24667872}
|
| 7563 |
+
{"current_steps": 37720, "total_steps": 38160, "loss": 0.3681, "lr": 2.0338845187059528e-08, "epoch": 19.769392033542978, "percentage": 98.85, "elapsed_time": "1:46:30", "remaining_time": "0:01:14", "throughput": 3860.85, "total_tokens": 24670976}
|
| 7564 |
+
{"current_steps": 37725, "total_steps": 38160, "loss": 0.2281, "lr": 1.988032217475899e-08, "epoch": 19.77201257861635, "percentage": 98.86, "elapsed_time": "1:46:31", "remaining_time": "0:01:13", "throughput": 3861.04, "total_tokens": 24677568}
|
| 7565 |
+
{"current_steps": 37730, "total_steps": 38160, "loss": 0.4212, "lr": 1.942702471850466e-08, "epoch": 19.77463312368973, "percentage": 98.87, "elapsed_time": "1:46:32", "remaining_time": "0:01:12", "throughput": 3861.07, "total_tokens": 24680768}
|
| 7566 |
+
{"current_steps": 37735, "total_steps": 38160, "loss": 0.4854, "lr": 1.897895291312346e-08, "epoch": 19.7772536687631, "percentage": 98.89, "elapsed_time": "1:46:32", "remaining_time": "0:01:12", "throughput": 3861.11, "total_tokens": 24684032}
|
| 7567 |
+
{"current_steps": 37740, "total_steps": 38160, "loss": 0.549, "lr": 1.8536106852348746e-08, "epoch": 19.77987421383648, "percentage": 98.9, "elapsed_time": "1:46:33", "remaining_time": "0:01:11", "throughput": 3861.12, "total_tokens": 24686976}
|
| 7568 |
+
{"current_steps": 37745, "total_steps": 38160, "loss": 0.3716, "lr": 1.8098486628814748e-08, "epoch": 19.782494758909852, "percentage": 98.91, "elapsed_time": "1:46:34", "remaining_time": "0:01:10", "throughput": 3861.2, "total_tokens": 24690560}
|
| 7569 |
+
{"current_steps": 37750, "total_steps": 38160, "loss": 0.4198, "lr": 1.7666092334067684e-08, "epoch": 19.78511530398323, "percentage": 98.93, "elapsed_time": "1:46:35", "remaining_time": "0:01:09", "throughput": 3861.2, "total_tokens": 24693344}
|
| 7570 |
+
{"current_steps": 37755, "total_steps": 38160, "loss": 0.4256, "lr": 1.7238924058562977e-08, "epoch": 19.787735849056602, "percentage": 98.94, "elapsed_time": "1:46:35", "remaining_time": "0:01:08", "throughput": 3861.21, "total_tokens": 24696160}
|
| 7571 |
+
{"current_steps": 37760, "total_steps": 38160, "loss": 0.4344, "lr": 1.6816981891656924e-08, "epoch": 19.79035639412998, "percentage": 98.95, "elapsed_time": "1:46:36", "remaining_time": "0:01:07", "throughput": 3861.24, "total_tokens": 24699232}
|
| 7572 |
+
{"current_steps": 37765, "total_steps": 38160, "loss": 0.3842, "lr": 1.6400265921615032e-08, "epoch": 19.792976939203353, "percentage": 98.96, "elapsed_time": "1:46:37", "remaining_time": "0:01:06", "throughput": 3861.35, "total_tokens": 24703104}
|
| 7573 |
+
{"current_steps": 37770, "total_steps": 38160, "loss": 0.3831, "lr": 1.598877623560924e-08, "epoch": 19.79559748427673, "percentage": 98.98, "elapsed_time": "1:46:38", "remaining_time": "0:01:06", "throughput": 3861.43, "total_tokens": 24706816}
|
| 7574 |
+
{"current_steps": 37775, "total_steps": 38160, "loss": 0.4829, "lr": 1.5582512919717908e-08, "epoch": 19.798218029350103, "percentage": 98.99, "elapsed_time": "1:46:39", "remaining_time": "0:01:05", "throughput": 3861.44, "total_tokens": 24709696}
|
| 7575 |
+
{"current_steps": 37780, "total_steps": 38160, "loss": 0.5272, "lr": 1.5181476058928613e-08, "epoch": 19.80083857442348, "percentage": 99.0, "elapsed_time": "1:46:39", "remaining_time": "0:01:04", "throughput": 3861.55, "total_tokens": 24713696}
|
| 7576 |
+
{"current_steps": 37785, "total_steps": 38160, "loss": 0.4084, "lr": 1.4785665737129805e-08, "epoch": 19.803459119496857, "percentage": 99.02, "elapsed_time": "1:46:40", "remaining_time": "0:01:03", "throughput": 3861.59, "total_tokens": 24716768}
|
| 7577 |
+
{"current_steps": 37790, "total_steps": 38160, "loss": 0.406, "lr": 1.4395082037127471e-08, "epoch": 19.80607966457023, "percentage": 99.03, "elapsed_time": "1:46:41", "remaining_time": "0:01:02", "throughput": 3861.65, "total_tokens": 24720448}
|
| 7578 |
+
{"current_steps": 37795, "total_steps": 38160, "loss": 0.407, "lr": 1.4009725040620148e-08, "epoch": 19.808700209643607, "percentage": 99.04, "elapsed_time": "1:46:42", "remaining_time": "0:01:01", "throughput": 3861.69, "total_tokens": 24723680}
|
| 7579 |
+
{"current_steps": 37800, "total_steps": 38160, "loss": 0.4353, "lr": 1.3629594828223902e-08, "epoch": 19.81132075471698, "percentage": 99.06, "elapsed_time": "1:46:43", "remaining_time": "0:01:00", "throughput": 3861.69, "total_tokens": 24726848}
|
| 7580 |
+
{"current_steps": 37805, "total_steps": 38160, "loss": 0.4587, "lr": 1.3254691479458458e-08, "epoch": 19.813941299790358, "percentage": 99.07, "elapsed_time": "1:46:43", "remaining_time": "0:01:00", "throughput": 3861.7, "total_tokens": 24730016}
|
| 7581 |
+
{"current_steps": 37810, "total_steps": 38160, "loss": 0.5497, "lr": 1.2885015072747197e-08, "epoch": 19.81656184486373, "percentage": 99.08, "elapsed_time": "1:46:44", "remaining_time": "0:00:59", "throughput": 3861.64, "total_tokens": 24732480}
|
| 7582 |
+
{"current_steps": 37815, "total_steps": 38160, "loss": 0.5174, "lr": 1.2520565685419927e-08, "epoch": 19.819182389937108, "percentage": 99.1, "elapsed_time": "1:46:45", "remaining_time": "0:00:58", "throughput": 3861.69, "total_tokens": 24735712}
|
| 7583 |
+
{"current_steps": 37820, "total_steps": 38160, "loss": 0.4136, "lr": 1.2161343393721214e-08, "epoch": 19.82180293501048, "percentage": 99.11, "elapsed_time": "1:46:46", "remaining_time": "0:00:57", "throughput": 3861.73, "total_tokens": 24739008}
|
| 7584 |
+
{"current_steps": 37825, "total_steps": 38160, "loss": 0.5309, "lr": 1.1807348272790952e-08, "epoch": 19.82442348008386, "percentage": 99.12, "elapsed_time": "1:46:46", "remaining_time": "0:00:56", "throughput": 3861.78, "total_tokens": 24742432}
|
| 7585 |
+
{"current_steps": 37830, "total_steps": 38160, "loss": 0.448, "lr": 1.1458580396683794e-08, "epoch": 19.82704402515723, "percentage": 99.14, "elapsed_time": "1:46:47", "remaining_time": "0:00:55", "throughput": 3861.78, "total_tokens": 24745632}
|
| 7586 |
+
{"current_steps": 37835, "total_steps": 38160, "loss": 0.3141, "lr": 1.1115039838358044e-08, "epoch": 19.82966457023061, "percentage": 99.15, "elapsed_time": "1:46:48", "remaining_time": "0:00:55", "throughput": 3861.82, "total_tokens": 24748960}
|
| 7587 |
+
{"current_steps": 37840, "total_steps": 38160, "loss": 0.6287, "lr": 1.0776726669678439e-08, "epoch": 19.832285115303982, "percentage": 99.16, "elapsed_time": "1:46:49", "remaining_time": "0:00:54", "throughput": 3861.83, "total_tokens": 24751872}
|
| 7588 |
+
{"current_steps": 37845, "total_steps": 38160, "loss": 0.4791, "lr": 1.044364096141337e-08, "epoch": 19.83490566037736, "percentage": 99.17, "elapsed_time": "1:46:50", "remaining_time": "0:00:53", "throughput": 3862.0, "total_tokens": 24759232}
|
| 7589 |
+
{"current_steps": 37850, "total_steps": 38160, "loss": 0.5423, "lr": 1.0115782783243211e-08, "epoch": 19.837526205450732, "percentage": 99.19, "elapsed_time": "1:46:51", "remaining_time": "0:00:52", "throughput": 3862.06, "total_tokens": 24762528}
|
| 7590 |
+
{"current_steps": 37855, "total_steps": 38160, "loss": 0.4671, "lr": 9.793152203751988e-09, "epoch": 19.84014675052411, "percentage": 99.2, "elapsed_time": "1:46:52", "remaining_time": "0:00:51", "throughput": 3862.0, "total_tokens": 24765344}
|
| 7591 |
+
{"current_steps": 37860, "total_steps": 38160, "loss": 0.5153, "lr": 9.475749290430158e-09, "epoch": 19.842767295597483, "percentage": 99.21, "elapsed_time": "1:46:53", "remaining_time": "0:00:50", "throughput": 3862.03, "total_tokens": 24768512}
|
| 7592 |
+
{"current_steps": 37865, "total_steps": 38160, "loss": 0.3324, "lr": 9.16357410967461e-09, "epoch": 19.84538784067086, "percentage": 99.23, "elapsed_time": "1:46:54", "remaining_time": "0:00:49", "throughput": 3862.09, "total_tokens": 24771776}
|
| 7593 |
+
{"current_steps": 37870, "total_steps": 38160, "loss": 0.3825, "lr": 8.856626726788664e-09, "epoch": 19.848008385744233, "percentage": 99.24, "elapsed_time": "1:46:54", "remaining_time": "0:00:49", "throughput": 3862.1, "total_tokens": 24774656}
|
| 7594 |
+
{"current_steps": 37875, "total_steps": 38160, "loss": 0.4231, "lr": 8.554907205982066e-09, "epoch": 19.85062893081761, "percentage": 99.25, "elapsed_time": "1:46:55", "remaining_time": "0:00:48", "throughput": 3862.11, "total_tokens": 24777568}
|
| 7595 |
+
{"current_steps": 37880, "total_steps": 38160, "loss": 0.3107, "lr": 8.258415610373772e-09, "epoch": 19.853249475890987, "percentage": 99.27, "elapsed_time": "1:46:56", "remaining_time": "0:00:47", "throughput": 3862.13, "total_tokens": 24780864}
|
| 7596 |
+
{"current_steps": 37885, "total_steps": 38160, "loss": 0.3606, "lr": 7.967152001980838e-09, "epoch": 19.85587002096436, "percentage": 99.28, "elapsed_time": "1:46:57", "remaining_time": "0:00:46", "throughput": 3862.21, "total_tokens": 24784256}
|
| 7597 |
+
{"current_steps": 37890, "total_steps": 38160, "loss": 0.4603, "lr": 7.681116441737856e-09, "epoch": 19.858490566037737, "percentage": 99.29, "elapsed_time": "1:46:57", "remaining_time": "0:00:45", "throughput": 3862.22, "total_tokens": 24787296}
|
| 7598 |
+
{"current_steps": 37895, "total_steps": 38160, "loss": 0.4697, "lr": 7.4003089894747464e-09, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "1:46:58", "remaining_time": "0:00:44", "throughput": 3862.34, "total_tokens": 24792320}
|
| 7599 |
+
{"current_steps": 37900, "total_steps": 38160, "loss": 0.3557, "lr": 7.12472970393896e-09, "epoch": 19.863731656184488, "percentage": 99.32, "elapsed_time": "1:46:59", "remaining_time": "0:00:44", "throughput": 3862.3, "total_tokens": 24795072}
|
| 7600 |
+
{"current_steps": 37905, "total_steps": 38160, "loss": 0.5307, "lr": 6.854378642776049e-09, "epoch": 19.86635220125786, "percentage": 99.33, "elapsed_time": "1:47:00", "remaining_time": "0:00:43", "throughput": 3862.3, "total_tokens": 24798016}
|
| 7601 |
+
{"current_steps": 37910, "total_steps": 38160, "loss": 0.4849, "lr": 6.589255862538002e-09, "epoch": 19.868972746331238, "percentage": 99.34, "elapsed_time": "1:47:01", "remaining_time": "0:00:42", "throughput": 3862.24, "total_tokens": 24800544}
|
| 7602 |
+
{"current_steps": 37915, "total_steps": 38160, "loss": 0.4809, "lr": 6.32936141869156e-09, "epoch": 19.87159329140461, "percentage": 99.36, "elapsed_time": "1:47:02", "remaining_time": "0:00:41", "throughput": 3862.29, "total_tokens": 24803872}
|
| 7603 |
+
{"current_steps": 37920, "total_steps": 38160, "loss": 0.4666, "lr": 6.074695365596017e-09, "epoch": 19.87421383647799, "percentage": 99.37, "elapsed_time": "1:47:02", "remaining_time": "0:00:40", "throughput": 3862.24, "total_tokens": 24806464}
|
| 7604 |
+
{"current_steps": 37925, "total_steps": 38160, "loss": 0.4322, "lr": 5.8252577565309775e-09, "epoch": 19.87683438155136, "percentage": 99.38, "elapsed_time": "1:47:04", "remaining_time": "0:00:39", "throughput": 3862.42, "total_tokens": 24814080}
|
| 7605 |
+
{"current_steps": 37930, "total_steps": 38160, "loss": 0.4693, "lr": 5.581048643671371e-09, "epoch": 19.87945492662474, "percentage": 99.4, "elapsed_time": "1:47:05", "remaining_time": "0:00:38", "throughput": 3862.46, "total_tokens": 24817248}
|
| 7606 |
+
{"current_steps": 37935, "total_steps": 38160, "loss": 0.3075, "lr": 5.342068078106888e-09, "epoch": 19.882075471698112, "percentage": 99.41, "elapsed_time": "1:47:06", "remaining_time": "0:00:38", "throughput": 3862.47, "total_tokens": 24820480}
|
| 7607 |
+
{"current_steps": 37940, "total_steps": 38160, "loss": 0.4287, "lr": 5.1083161098253175e-09, "epoch": 19.88469601677149, "percentage": 99.42, "elapsed_time": "1:47:06", "remaining_time": "0:00:37", "throughput": 3862.46, "total_tokens": 24823200}
|
| 7608 |
+
{"current_steps": 37945, "total_steps": 38160, "loss": 0.3767, "lr": 4.87979278772921e-09, "epoch": 19.887316561844862, "percentage": 99.44, "elapsed_time": "1:47:07", "remaining_time": "0:00:36", "throughput": 3862.45, "total_tokens": 24826112}
|
| 7609 |
+
{"current_steps": 37950, "total_steps": 38160, "loss": 0.4473, "lr": 4.656498159619216e-09, "epoch": 19.88993710691824, "percentage": 99.45, "elapsed_time": "1:47:08", "remaining_time": "0:00:35", "throughput": 3862.41, "total_tokens": 24828576}
|
| 7610 |
+
{"current_steps": 37955, "total_steps": 38160, "loss": 0.3315, "lr": 4.438432272210746e-09, "epoch": 19.892557651991613, "percentage": 99.46, "elapsed_time": "1:47:09", "remaining_time": "0:00:34", "throughput": 3862.54, "total_tokens": 24832672}
|
| 7611 |
+
{"current_steps": 37960, "total_steps": 38160, "loss": 0.4523, "lr": 4.225595171114538e-09, "epoch": 19.89517819706499, "percentage": 99.48, "elapsed_time": "1:47:09", "remaining_time": "0:00:33", "throughput": 3862.55, "total_tokens": 24835616}
|
| 7612 |
+
{"current_steps": 37965, "total_steps": 38160, "loss": 0.3921, "lr": 4.017986900858861e-09, "epoch": 19.897798742138363, "percentage": 99.49, "elapsed_time": "1:47:10", "remaining_time": "0:00:33", "throughput": 3862.63, "total_tokens": 24839328}
|
| 7613 |
+
{"current_steps": 37970, "total_steps": 38160, "loss": 0.3565, "lr": 3.815607504870089e-09, "epoch": 19.90041928721174, "percentage": 99.5, "elapsed_time": "1:47:11", "remaining_time": "0:00:32", "throughput": 3862.63, "total_tokens": 24842176}
|
| 7614 |
+
{"current_steps": 37975, "total_steps": 38160, "loss": 0.6305, "lr": 3.618457025486577e-09, "epoch": 19.903039832285117, "percentage": 99.52, "elapsed_time": "1:47:12", "remaining_time": "0:00:31", "throughput": 3862.62, "total_tokens": 24845088}
|
| 7615 |
+
{"current_steps": 37980, "total_steps": 38160, "loss": 0.389, "lr": 3.426535503947559e-09, "epoch": 19.90566037735849, "percentage": 99.53, "elapsed_time": "1:47:12", "remaining_time": "0:00:30", "throughput": 3862.63, "total_tokens": 24848032}
|
| 7616 |
+
{"current_steps": 37985, "total_steps": 38160, "loss": 0.481, "lr": 3.2398429803986997e-09, "epoch": 19.908280922431867, "percentage": 99.54, "elapsed_time": "1:47:13", "remaining_time": "0:00:29", "throughput": 3862.69, "total_tokens": 24851584}
|
| 7617 |
+
{"current_steps": 37990, "total_steps": 38160, "loss": 0.4883, "lr": 3.058379493900421e-09, "epoch": 19.91090146750524, "percentage": 99.55, "elapsed_time": "1:47:14", "remaining_time": "0:00:28", "throughput": 3862.7, "total_tokens": 24854464}
|
| 7618 |
+
{"current_steps": 37995, "total_steps": 38160, "loss": 0.4539, "lr": 2.882145082405696e-09, "epoch": 19.913522012578618, "percentage": 99.57, "elapsed_time": "1:47:15", "remaining_time": "0:00:27", "throughput": 3862.67, "total_tokens": 24857152}
|
| 7619 |
+
{"current_steps": 38000, "total_steps": 38160, "loss": 0.4841, "lr": 2.711139782785033e-09, "epoch": 19.91614255765199, "percentage": 99.58, "elapsed_time": "1:47:16", "remaining_time": "0:00:27", "throughput": 3862.7, "total_tokens": 24860480}
|
| 7620 |
+
{"current_steps": 38005, "total_steps": 38160, "loss": 0.4375, "lr": 2.5453636308098163e-09, "epoch": 19.918763102725368, "percentage": 99.59, "elapsed_time": "1:47:16", "remaining_time": "0:00:26", "throughput": 3862.72, "total_tokens": 24863680}
|
| 7621 |
+
{"current_steps": 38010, "total_steps": 38160, "loss": 0.4825, "lr": 2.3848166611606383e-09, "epoch": 19.92138364779874, "percentage": 99.61, "elapsed_time": "1:47:17", "remaining_time": "0:00:25", "throughput": 3862.77, "total_tokens": 24866976}
|
| 7622 |
+
{"current_steps": 38015, "total_steps": 38160, "loss": 0.4309, "lr": 2.2294989074161943e-09, "epoch": 19.92400419287212, "percentage": 99.62, "elapsed_time": "1:47:18", "remaining_time": "0:00:24", "throughput": 3862.81, "total_tokens": 24870240}
|
| 7623 |
+
{"current_steps": 38020, "total_steps": 38160, "loss": 0.6158, "lr": 2.0794104020727125e-09, "epoch": 19.92662473794549, "percentage": 99.63, "elapsed_time": "1:47:19", "remaining_time": "0:00:23", "throughput": 3862.92, "total_tokens": 24874400}
|
| 7624 |
+
{"current_steps": 38025, "total_steps": 38160, "loss": 0.4865, "lr": 1.9345511765272993e-09, "epoch": 19.92924528301887, "percentage": 99.65, "elapsed_time": "1:47:20", "remaining_time": "0:00:22", "throughput": 3862.98, "total_tokens": 24878368}
|
| 7625 |
+
{"current_steps": 38030, "total_steps": 38160, "loss": 0.357, "lr": 1.794921261077942e-09, "epoch": 19.931865828092242, "percentage": 99.66, "elapsed_time": "1:47:20", "remaining_time": "0:00:22", "throughput": 3862.95, "total_tokens": 24881184}
|
| 7626 |
+
{"current_steps": 38035, "total_steps": 38160, "loss": 0.3929, "lr": 1.6605206849373833e-09, "epoch": 19.93448637316562, "percentage": 99.67, "elapsed_time": "1:47:21", "remaining_time": "0:00:21", "throughput": 3863.02, "total_tokens": 24884704}
|
| 7627 |
+
{"current_steps": 38040, "total_steps": 38160, "loss": 0.6, "lr": 1.531349476219246e-09, "epoch": 19.937106918238992, "percentage": 99.69, "elapsed_time": "1:47:22", "remaining_time": "0:00:20", "throughput": 3863.01, "total_tokens": 24887456}
|
| 7628 |
+
{"current_steps": 38045, "total_steps": 38160, "loss": 0.4109, "lr": 1.4074076619463584e-09, "epoch": 19.93972746331237, "percentage": 99.7, "elapsed_time": "1:47:23", "remaining_time": "0:00:19", "throughput": 3863.03, "total_tokens": 24890592}
|
| 7629 |
+
{"current_steps": 38050, "total_steps": 38160, "loss": 0.4513, "lr": 1.2886952680452036e-09, "epoch": 19.942348008385743, "percentage": 99.71, "elapsed_time": "1:47:24", "remaining_time": "0:00:18", "throughput": 3863.08, "total_tokens": 24893856}
|
| 7630 |
+
{"current_steps": 38055, "total_steps": 38160, "loss": 0.2896, "lr": 1.1752123193459197e-09, "epoch": 19.94496855345912, "percentage": 99.72, "elapsed_time": "1:47:24", "remaining_time": "0:00:17", "throughput": 3863.06, "total_tokens": 24896544}
|
| 7631 |
+
{"current_steps": 38060, "total_steps": 38160, "loss": 0.3627, "lr": 1.0669588395934017e-09, "epoch": 19.947589098532493, "percentage": 99.74, "elapsed_time": "1:47:25", "remaining_time": "0:00:16", "throughput": 3862.99, "total_tokens": 24898848}
|
| 7632 |
+
{"current_steps": 38065, "total_steps": 38160, "loss": 0.3229, "lr": 9.639348514278722e-10, "epoch": 19.95020964360587, "percentage": 99.75, "elapsed_time": "1:47:26", "remaining_time": "0:00:16", "throughput": 3862.97, "total_tokens": 24901472}
|
| 7633 |
+
{"current_steps": 38070, "total_steps": 38160, "loss": 0.3999, "lr": 8.661403764043119e-10, "epoch": 19.952830188679247, "percentage": 99.76, "elapsed_time": "1:47:26", "remaining_time": "0:00:15", "throughput": 3863.02, "total_tokens": 24904768}
|
| 7634 |
+
{"current_steps": 38075, "total_steps": 38160, "loss": 0.4709, "lr": 7.735754349785795e-10, "epoch": 19.95545073375262, "percentage": 99.78, "elapsed_time": "1:47:27", "remaining_time": "0:00:14", "throughput": 3863.09, "total_tokens": 24908512}
|
| 7635 |
+
{"current_steps": 38080, "total_steps": 38160, "loss": 0.5359, "lr": 6.862400465157403e-10, "epoch": 19.958071278825997, "percentage": 99.79, "elapsed_time": "1:47:28", "remaining_time": "0:00:13", "throughput": 3863.12, "total_tokens": 24911488}
|
| 7636 |
+
{"current_steps": 38085, "total_steps": 38160, "loss": 0.3978, "lr": 6.041342292817387e-10, "epoch": 19.96069182389937, "percentage": 99.8, "elapsed_time": "1:47:29", "remaining_time": "0:00:12", "throughput": 3863.13, "total_tokens": 24914624}
|
| 7637 |
+
{"current_steps": 38090, "total_steps": 38160, "loss": 0.4801, "lr": 5.272580004572758e-10, "epoch": 19.963312368972748, "percentage": 99.82, "elapsed_time": "1:47:30", "remaining_time": "0:00:11", "throughput": 3863.1, "total_tokens": 24917632}
|
| 7638 |
+
{"current_steps": 38095, "total_steps": 38160, "loss": 0.459, "lr": 4.556113761211567e-10, "epoch": 19.96593291404612, "percentage": 99.83, "elapsed_time": "1:47:30", "remaining_time": "0:00:11", "throughput": 3863.14, "total_tokens": 24920768}
|
| 7639 |
+
{"current_steps": 38100, "total_steps": 38160, "loss": 0.6119, "lr": 3.8919437125861657e-10, "epoch": 19.968553459119498, "percentage": 99.84, "elapsed_time": "1:47:31", "remaining_time": "0:00:10", "throughput": 3863.13, "total_tokens": 24923936}
|
| 7640 |
+
{"current_steps": 38105, "total_steps": 38160, "loss": 0.354, "lr": 3.280069997696478e-10, "epoch": 19.97117400419287, "percentage": 99.86, "elapsed_time": "1:47:32", "remaining_time": "0:00:09", "throughput": 3863.19, "total_tokens": 24927424}
|
| 7641 |
+
{"current_steps": 38110, "total_steps": 38160, "loss": 0.5323, "lr": 2.720492744467951e-10, "epoch": 19.97379454926625, "percentage": 99.87, "elapsed_time": "1:47:33", "remaining_time": "0:00:08", "throughput": 3863.16, "total_tokens": 24930080}
|
| 7642 |
+
{"current_steps": 38115, "total_steps": 38160, "loss": 0.4419, "lr": 2.2132120700291136e-10, "epoch": 19.97641509433962, "percentage": 99.88, "elapsed_time": "1:47:34", "remaining_time": "0:00:07", "throughput": 3863.21, "total_tokens": 24933664}
|
| 7643 |
+
{"current_steps": 38120, "total_steps": 38160, "loss": 0.4462, "lr": 1.7582280804340213e-10, "epoch": 19.979035639413, "percentage": 99.9, "elapsed_time": "1:47:34", "remaining_time": "0:00:06", "throughput": 3863.24, "total_tokens": 24936800}
|
| 7644 |
+
{"current_steps": 38125, "total_steps": 38160, "loss": 0.5421, "lr": 1.3555408708842977e-10, "epoch": 19.981656184486372, "percentage": 99.91, "elapsed_time": "1:47:35", "remaining_time": "0:00:05", "throughput": 3863.28, "total_tokens": 24940352}
|
| 7645 |
+
{"current_steps": 38130, "total_steps": 38160, "loss": 0.4668, "lr": 1.0051505256458704e-10, "epoch": 19.98427672955975, "percentage": 99.92, "elapsed_time": "1:47:36", "remaining_time": "0:00:05", "throughput": 3863.35, "total_tokens": 24943808}
|
| 7646 |
+
{"current_steps": 38135, "total_steps": 38160, "loss": 0.4371, "lr": 7.070571179657037e-11, "epoch": 19.986897274633122, "percentage": 99.93, "elapsed_time": "1:47:37", "remaining_time": "0:00:04", "throughput": 3863.3, "total_tokens": 24946304}
|
| 7647 |
+
{"current_steps": 38140, "total_steps": 38160, "loss": 0.4312, "lr": 4.612607102383315e-11, "epoch": 19.9895178197065, "percentage": 99.95, "elapsed_time": "1:47:37", "remaining_time": "0:00:03", "throughput": 3863.35, "total_tokens": 24949440}
|
| 7648 |
+
{"current_steps": 38145, "total_steps": 38160, "loss": 0.3517, "lr": 2.677613538670798e-11, "epoch": 19.992138364779873, "percentage": 99.96, "elapsed_time": "1:47:38", "remaining_time": "0:00:02", "throughput": 3863.42, "total_tokens": 24953376}
|
| 7649 |
+
{"current_steps": 38150, "total_steps": 38160, "loss": 0.4133, "lr": 1.2655908931957782e-11, "epoch": 19.99475890985325, "percentage": 99.97, "elapsed_time": "1:47:39", "remaining_time": "0:00:01", "throughput": 3863.53, "total_tokens": 24957984}
|
| 7650 |
+
{"current_steps": 38155, "total_steps": 38160, "loss": 0.4096, "lr": 3.765394615551366e-12, "epoch": 19.997379454926623, "percentage": 99.99, "elapsed_time": "1:47:40", "remaining_time": "0:00:00", "throughput": 3863.51, "total_tokens": 24960832}
|
| 7651 |
+
{"current_steps": 38160, "total_steps": 38160, "loss": 0.6148, "lr": 1.0459429711229262e-13, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:47:41", "remaining_time": "0:00:00", "throughput": 3863.41, "total_tokens": 24964664}
|
| 7652 |
+
{"current_steps": 38160, "total_steps": 38160, "eval_loss": 0.47683560848236084, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:47:51", "remaining_time": "0:00:00", "throughput": 3857.66, "total_tokens": 24964664}
|
| 7653 |
+
{"current_steps": 38160, "total_steps": 38160, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:47:52", "remaining_time": "0:00:00", "throughput": 3856.78, "total_tokens": 24964664}
|