Training in progress, step 28620
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +382 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aff5331418018b5af86863868fce500c5fef81640686d50ae9ff06bf608be477
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -5357,3 +5357,385 @@
|
|
| 5357 |
{"current_steps": 26715, "total_steps": 38160, "loss": 0.4798, "lr": 1.2496039256720665e-05, "epoch": 14.001572327044025, "percentage": 70.01, "elapsed_time": "1:20:48", "remaining_time": "0:34:37", "throughput": 3594.3, "total_tokens": 17427448}
|
| 5358 |
{"current_steps": 26720, "total_steps": 38160, "loss": 0.5229, "lr": 1.2486139229792452e-05, "epoch": 14.0041928721174, "percentage": 70.02, "elapsed_time": "1:20:49", "remaining_time": "0:34:36", "throughput": 3594.25, "total_tokens": 17430008}
|
| 5359 |
{"current_steps": 26725, "total_steps": 38160, "loss": 0.5574, "lr": 1.2476241820621152e-05, "epoch": 14.006813417190775, "percentage": 70.03, "elapsed_time": "1:20:50", "remaining_time": "0:34:35", "throughput": 3594.28, "total_tokens": 17433080}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5357 |
{"current_steps": 26715, "total_steps": 38160, "loss": 0.4798, "lr": 1.2496039256720665e-05, "epoch": 14.001572327044025, "percentage": 70.01, "elapsed_time": "1:20:48", "remaining_time": "0:34:37", "throughput": 3594.3, "total_tokens": 17427448}
|
| 5358 |
{"current_steps": 26720, "total_steps": 38160, "loss": 0.5229, "lr": 1.2486139229792452e-05, "epoch": 14.0041928721174, "percentage": 70.02, "elapsed_time": "1:20:49", "remaining_time": "0:34:36", "throughput": 3594.25, "total_tokens": 17430008}
|
| 5359 |
{"current_steps": 26725, "total_steps": 38160, "loss": 0.5574, "lr": 1.2476241820621152e-05, "epoch": 14.006813417190775, "percentage": 70.03, "elapsed_time": "1:20:50", "remaining_time": "0:34:35", "throughput": 3594.28, "total_tokens": 17433080}
|
| 5360 |
+
{"current_steps": 26730, "total_steps": 38160, "loss": 0.5177, "lr": 1.2466347031277176e-05, "epoch": 14.00943396226415, "percentage": 70.05, "elapsed_time": "1:20:51", "remaining_time": "0:34:34", "throughput": 3594.28, "total_tokens": 17436088}
|
| 5361 |
+
{"current_steps": 26735, "total_steps": 38160, "loss": 0.4347, "lr": 1.2456454863830411e-05, "epoch": 14.012054507337526, "percentage": 70.06, "elapsed_time": "1:20:51", "remaining_time": "0:34:33", "throughput": 3594.21, "total_tokens": 17438680}
|
| 5362 |
+
{"current_steps": 26740, "total_steps": 38160, "loss": 0.4651, "lr": 1.2446565320350185e-05, "epoch": 14.014675052410901, "percentage": 70.07, "elapsed_time": "1:20:52", "remaining_time": "0:34:32", "throughput": 3594.37, "total_tokens": 17442424}
|
| 5363 |
+
{"current_steps": 26745, "total_steps": 38160, "loss": 0.448, "lr": 1.2436678402905283e-05, "epoch": 14.017295597484276, "percentage": 70.09, "elapsed_time": "1:20:53", "remaining_time": "0:34:31", "throughput": 3594.42, "total_tokens": 17445784}
|
| 5364 |
+
{"current_steps": 26750, "total_steps": 38160, "loss": 0.3438, "lr": 1.2426794113563933e-05, "epoch": 14.019916142557651, "percentage": 70.1, "elapsed_time": "1:20:54", "remaining_time": "0:34:30", "throughput": 3594.34, "total_tokens": 17448280}
|
| 5365 |
+
{"current_steps": 26755, "total_steps": 38160, "loss": 0.5127, "lr": 1.24169124543938e-05, "epoch": 14.022536687631026, "percentage": 70.11, "elapsed_time": "1:20:55", "remaining_time": "0:34:29", "throughput": 3594.42, "total_tokens": 17452248}
|
| 5366 |
+
{"current_steps": 26760, "total_steps": 38160, "loss": 0.3977, "lr": 1.2407033427462026e-05, "epoch": 14.025157232704403, "percentage": 70.13, "elapsed_time": "1:20:56", "remaining_time": "0:34:28", "throughput": 3594.51, "total_tokens": 17455672}
|
| 5367 |
+
{"current_steps": 26765, "total_steps": 38160, "loss": 0.3616, "lr": 1.2397157034835192e-05, "epoch": 14.027777777777779, "percentage": 70.14, "elapsed_time": "1:20:56", "remaining_time": "0:34:27", "throughput": 3594.48, "total_tokens": 17458328}
|
| 5368 |
+
{"current_steps": 26770, "total_steps": 38160, "loss": 0.5559, "lr": 1.2387283278579325e-05, "epoch": 14.030398322851154, "percentage": 70.15, "elapsed_time": "1:20:57", "remaining_time": "0:34:26", "throughput": 3594.4, "total_tokens": 17460856}
|
| 5369 |
+
{"current_steps": 26775, "total_steps": 38160, "loss": 0.4243, "lr": 1.237741216075991e-05, "epoch": 14.033018867924529, "percentage": 70.17, "elapsed_time": "1:20:58", "remaining_time": "0:34:25", "throughput": 3594.47, "total_tokens": 17464440}
|
| 5370 |
+
{"current_steps": 26780, "total_steps": 38160, "loss": 0.5593, "lr": 1.2367543683441868e-05, "epoch": 14.035639412997904, "percentage": 70.18, "elapsed_time": "1:20:59", "remaining_time": "0:34:25", "throughput": 3594.5, "total_tokens": 17467512}
|
| 5371 |
+
{"current_steps": 26785, "total_steps": 38160, "loss": 0.4676, "lr": 1.2357677848689558e-05, "epoch": 14.03825995807128, "percentage": 70.19, "elapsed_time": "1:21:00", "remaining_time": "0:34:24", "throughput": 3594.57, "total_tokens": 17471224}
|
| 5372 |
+
{"current_steps": 26790, "total_steps": 38160, "loss": 0.3628, "lr": 1.2347814658566811e-05, "epoch": 14.040880503144654, "percentage": 70.2, "elapsed_time": "1:21:01", "remaining_time": "0:34:23", "throughput": 3594.67, "total_tokens": 17474616}
|
| 5373 |
+
{"current_steps": 26795, "total_steps": 38160, "loss": 0.4378, "lr": 1.2337954115136894e-05, "epoch": 14.04350104821803, "percentage": 70.22, "elapsed_time": "1:21:02", "remaining_time": "0:34:22", "throughput": 3594.72, "total_tokens": 17477880}
|
| 5374 |
+
{"current_steps": 26800, "total_steps": 38160, "loss": 0.6105, "lr": 1.2328096220462529e-05, "epoch": 14.046121593291405, "percentage": 70.23, "elapsed_time": "1:21:02", "remaining_time": "0:34:21", "throughput": 3594.78, "total_tokens": 17481016}
|
| 5375 |
+
{"current_steps": 26805, "total_steps": 38160, "loss": 0.4106, "lr": 1.2318240976605865e-05, "epoch": 14.04874213836478, "percentage": 70.24, "elapsed_time": "1:21:03", "remaining_time": "0:34:20", "throughput": 3594.82, "total_tokens": 17484568}
|
| 5376 |
+
{"current_steps": 26810, "total_steps": 38160, "loss": 0.4308, "lr": 1.23083883856285e-05, "epoch": 14.051362683438155, "percentage": 70.26, "elapsed_time": "1:21:05", "remaining_time": "0:34:19", "throughput": 3595.11, "total_tokens": 17491032}
|
| 5377 |
+
{"current_steps": 26815, "total_steps": 38160, "loss": 0.3972, "lr": 1.229853844959149e-05, "epoch": 14.05398322851153, "percentage": 70.27, "elapsed_time": "1:21:06", "remaining_time": "0:34:18", "throughput": 3595.13, "total_tokens": 17494136}
|
| 5378 |
+
{"current_steps": 26820, "total_steps": 38160, "loss": 0.5983, "lr": 1.228869117055533e-05, "epoch": 14.056603773584905, "percentage": 70.28, "elapsed_time": "1:21:06", "remaining_time": "0:34:17", "throughput": 3595.12, "total_tokens": 17496920}
|
| 5379 |
+
{"current_steps": 26825, "total_steps": 38160, "loss": 0.3583, "lr": 1.227884655057997e-05, "epoch": 14.05922431865828, "percentage": 70.3, "elapsed_time": "1:21:07", "remaining_time": "0:34:16", "throughput": 3595.17, "total_tokens": 17500024}
|
| 5380 |
+
{"current_steps": 26830, "total_steps": 38160, "loss": 0.4695, "lr": 1.2269004591724777e-05, "epoch": 14.061844863731656, "percentage": 70.31, "elapsed_time": "1:21:08", "remaining_time": "0:34:15", "throughput": 3595.24, "total_tokens": 17503416}
|
| 5381 |
+
{"current_steps": 26835, "total_steps": 38160, "loss": 0.3922, "lr": 1.2259165296048575e-05, "epoch": 14.064465408805031, "percentage": 70.32, "elapsed_time": "1:21:09", "remaining_time": "0:34:14", "throughput": 3595.26, "total_tokens": 17506648}
|
| 5382 |
+
{"current_steps": 26840, "total_steps": 38160, "loss": 0.4426, "lr": 1.2249328665609638e-05, "epoch": 14.067085953878406, "percentage": 70.34, "elapsed_time": "1:21:10", "remaining_time": "0:34:14", "throughput": 3595.21, "total_tokens": 17509496}
|
| 5383 |
+
{"current_steps": 26845, "total_steps": 38160, "loss": 0.4513, "lr": 1.2239494702465676e-05, "epoch": 14.069706498951781, "percentage": 70.35, "elapsed_time": "1:21:11", "remaining_time": "0:34:13", "throughput": 3595.3, "total_tokens": 17513464}
|
| 5384 |
+
{"current_steps": 26850, "total_steps": 38160, "loss": 0.3363, "lr": 1.2229663408673842e-05, "epoch": 14.072327044025156, "percentage": 70.36, "elapsed_time": "1:21:12", "remaining_time": "0:34:12", "throughput": 3595.27, "total_tokens": 17516568}
|
| 5385 |
+
{"current_steps": 26855, "total_steps": 38160, "loss": 0.5668, "lr": 1.221983478629074e-05, "epoch": 14.074947589098532, "percentage": 70.37, "elapsed_time": "1:21:13", "remaining_time": "0:34:11", "throughput": 3595.39, "total_tokens": 17520440}
|
| 5386 |
+
{"current_steps": 26860, "total_steps": 38160, "loss": 0.4696, "lr": 1.22100088373724e-05, "epoch": 14.077568134171909, "percentage": 70.39, "elapsed_time": "1:21:13", "remaining_time": "0:34:10", "throughput": 3595.35, "total_tokens": 17523224}
|
| 5387 |
+
{"current_steps": 26865, "total_steps": 38160, "loss": 0.4725, "lr": 1.2200185563974283e-05, "epoch": 14.080188679245284, "percentage": 70.4, "elapsed_time": "1:21:15", "remaining_time": "0:34:09", "throughput": 3595.48, "total_tokens": 17530008}
|
| 5388 |
+
{"current_steps": 26870, "total_steps": 38160, "loss": 0.4733, "lr": 1.219036496815132e-05, "epoch": 14.082809224318659, "percentage": 70.41, "elapsed_time": "1:21:16", "remaining_time": "0:34:08", "throughput": 3595.52, "total_tokens": 17533048}
|
| 5389 |
+
{"current_steps": 26875, "total_steps": 38160, "loss": 0.3845, "lr": 1.2180547051957863e-05, "epoch": 14.085429769392034, "percentage": 70.43, "elapsed_time": "1:21:17", "remaining_time": "0:34:07", "throughput": 3595.47, "total_tokens": 17535608}
|
| 5390 |
+
{"current_steps": 26880, "total_steps": 38160, "loss": 0.4333, "lr": 1.2170731817447723e-05, "epoch": 14.08805031446541, "percentage": 70.44, "elapsed_time": "1:21:17", "remaining_time": "0:34:06", "throughput": 3595.6, "total_tokens": 17539096}
|
| 5391 |
+
{"current_steps": 26885, "total_steps": 38160, "loss": 0.5927, "lr": 1.216091926667412e-05, "epoch": 14.090670859538784, "percentage": 70.45, "elapsed_time": "1:21:18", "remaining_time": "0:34:06", "throughput": 3595.76, "total_tokens": 17542936}
|
| 5392 |
+
{"current_steps": 26890, "total_steps": 38160, "loss": 0.6104, "lr": 1.2151109401689723e-05, "epoch": 14.09329140461216, "percentage": 70.47, "elapsed_time": "1:21:19", "remaining_time": "0:34:05", "throughput": 3595.73, "total_tokens": 17545688}
|
| 5393 |
+
{"current_steps": 26895, "total_steps": 38160, "loss": 0.4965, "lr": 1.214130222454665e-05, "epoch": 14.095911949685535, "percentage": 70.48, "elapsed_time": "1:21:20", "remaining_time": "0:34:04", "throughput": 3595.71, "total_tokens": 17548536}
|
| 5394 |
+
{"current_steps": 26900, "total_steps": 38160, "loss": 0.4254, "lr": 1.2131497737296452e-05, "epoch": 14.09853249475891, "percentage": 70.49, "elapsed_time": "1:21:21", "remaining_time": "0:34:03", "throughput": 3595.77, "total_tokens": 17552056}
|
| 5395 |
+
{"current_steps": 26905, "total_steps": 38160, "loss": 0.3902, "lr": 1.2121695941990124e-05, "epoch": 14.101153039832285, "percentage": 70.51, "elapsed_time": "1:21:22", "remaining_time": "0:34:02", "throughput": 3595.86, "total_tokens": 17555512}
|
| 5396 |
+
{"current_steps": 26910, "total_steps": 38160, "loss": 0.4541, "lr": 1.2111896840678075e-05, "epoch": 14.10377358490566, "percentage": 70.52, "elapsed_time": "1:21:23", "remaining_time": "0:34:01", "throughput": 3595.93, "total_tokens": 17559320}
|
| 5397 |
+
{"current_steps": 26915, "total_steps": 38160, "loss": 0.3563, "lr": 1.210210043541018e-05, "epoch": 14.106394129979035, "percentage": 70.53, "elapsed_time": "1:21:23", "remaining_time": "0:34:00", "throughput": 3595.98, "total_tokens": 17562616}
|
| 5398 |
+
{"current_steps": 26920, "total_steps": 38160, "loss": 0.5239, "lr": 1.2092306728235717e-05, "epoch": 14.10901467505241, "percentage": 70.55, "elapsed_time": "1:21:24", "remaining_time": "0:33:59", "throughput": 3596.07, "total_tokens": 17566200}
|
| 5399 |
+
{"current_steps": 26925, "total_steps": 38160, "loss": 0.408, "lr": 1.2082515721203427e-05, "epoch": 14.111635220125786, "percentage": 70.56, "elapsed_time": "1:21:25", "remaining_time": "0:33:58", "throughput": 3596.13, "total_tokens": 17569496}
|
| 5400 |
+
{"current_steps": 26930, "total_steps": 38160, "loss": 0.4996, "lr": 1.2072727416361479e-05, "epoch": 14.114255765199161, "percentage": 70.57, "elapsed_time": "1:21:26", "remaining_time": "0:33:57", "throughput": 3595.98, "total_tokens": 17571832}
|
| 5401 |
+
{"current_steps": 26935, "total_steps": 38160, "loss": 0.4787, "lr": 1.2062941815757484e-05, "epoch": 14.116876310272536, "percentage": 70.58, "elapsed_time": "1:21:27", "remaining_time": "0:33:56", "throughput": 3595.97, "total_tokens": 17574744}
|
| 5402 |
+
{"current_steps": 26940, "total_steps": 38160, "loss": 0.4924, "lr": 1.2053158921438454e-05, "epoch": 14.119496855345911, "percentage": 70.6, "elapsed_time": "1:21:28", "remaining_time": "0:33:55", "throughput": 3596.06, "total_tokens": 17578232}
|
| 5403 |
+
{"current_steps": 26945, "total_steps": 38160, "loss": 0.5384, "lr": 1.2043378735450885e-05, "epoch": 14.122117400419286, "percentage": 70.61, "elapsed_time": "1:21:29", "remaining_time": "0:33:54", "throughput": 3596.12, "total_tokens": 17581688}
|
| 5404 |
+
{"current_steps": 26950, "total_steps": 38160, "loss": 0.4563, "lr": 1.2033601259840657e-05, "epoch": 14.124737945492662, "percentage": 70.62, "elapsed_time": "1:21:29", "remaining_time": "0:33:54", "throughput": 3596.19, "total_tokens": 17585336}
|
| 5405 |
+
{"current_steps": 26955, "total_steps": 38160, "loss": 0.4621, "lr": 1.2023826496653118e-05, "epoch": 14.127358490566039, "percentage": 70.64, "elapsed_time": "1:21:30", "remaining_time": "0:33:53", "throughput": 3596.1, "total_tokens": 17588120}
|
| 5406 |
+
{"current_steps": 26960, "total_steps": 38160, "loss": 0.4822, "lr": 1.2014054447933044e-05, "epoch": 14.129979035639414, "percentage": 70.65, "elapsed_time": "1:21:31", "remaining_time": "0:33:52", "throughput": 3596.24, "total_tokens": 17591928}
|
| 5407 |
+
{"current_steps": 26965, "total_steps": 38160, "loss": 0.3565, "lr": 1.200428511572462e-05, "epoch": 14.132599580712789, "percentage": 70.66, "elapsed_time": "1:21:32", "remaining_time": "0:33:51", "throughput": 3596.43, "total_tokens": 17596120}
|
| 5408 |
+
{"current_steps": 26970, "total_steps": 38160, "loss": 0.5425, "lr": 1.1994518502071484e-05, "epoch": 14.135220125786164, "percentage": 70.68, "elapsed_time": "1:21:33", "remaining_time": "0:33:50", "throughput": 3596.5, "total_tokens": 17599416}
|
| 5409 |
+
{"current_steps": 26975, "total_steps": 38160, "loss": 0.5256, "lr": 1.1984754609016716e-05, "epoch": 14.13784067085954, "percentage": 70.69, "elapsed_time": "1:21:34", "remaining_time": "0:33:49", "throughput": 3596.61, "total_tokens": 17603192}
|
| 5410 |
+
{"current_steps": 26980, "total_steps": 38160, "loss": 0.4889, "lr": 1.1974993438602785e-05, "epoch": 14.140461215932914, "percentage": 70.7, "elapsed_time": "1:21:35", "remaining_time": "0:33:48", "throughput": 3596.67, "total_tokens": 17606520}
|
| 5411 |
+
{"current_steps": 26985, "total_steps": 38160, "loss": 0.4347, "lr": 1.1965234992871641e-05, "epoch": 14.14308176100629, "percentage": 70.72, "elapsed_time": "1:21:36", "remaining_time": "0:33:47", "throughput": 3596.61, "total_tokens": 17609112}
|
| 5412 |
+
{"current_steps": 26990, "total_steps": 38160, "loss": 0.5326, "lr": 1.195547927386462e-05, "epoch": 14.145702306079665, "percentage": 70.73, "elapsed_time": "1:21:36", "remaining_time": "0:33:46", "throughput": 3596.65, "total_tokens": 17612440}
|
| 5413 |
+
{"current_steps": 26995, "total_steps": 38160, "loss": 0.4099, "lr": 1.1945726283622514e-05, "epoch": 14.14832285115304, "percentage": 70.74, "elapsed_time": "1:21:37", "remaining_time": "0:33:45", "throughput": 3596.65, "total_tokens": 17615384}
|
| 5414 |
+
{"current_steps": 27000, "total_steps": 38160, "loss": 0.3836, "lr": 1.1935976024185547e-05, "epoch": 14.150943396226415, "percentage": 70.75, "elapsed_time": "1:21:38", "remaining_time": "0:33:44", "throughput": 3596.63, "total_tokens": 17618456}
|
| 5415 |
+
{"current_steps": 27005, "total_steps": 38160, "loss": 0.7536, "lr": 1.1926228497593341e-05, "epoch": 14.15356394129979, "percentage": 70.77, "elapsed_time": "1:21:39", "remaining_time": "0:33:43", "throughput": 3596.66, "total_tokens": 17621720}
|
| 5416 |
+
{"current_steps": 27010, "total_steps": 38160, "loss": 0.5639, "lr": 1.1916483705884985e-05, "epoch": 14.156184486373165, "percentage": 70.78, "elapsed_time": "1:21:40", "remaining_time": "0:33:42", "throughput": 3596.71, "total_tokens": 17625112}
|
| 5417 |
+
{"current_steps": 27015, "total_steps": 38160, "loss": 0.6401, "lr": 1.1906741651098983e-05, "epoch": 14.15880503144654, "percentage": 70.79, "elapsed_time": "1:21:41", "remaining_time": "0:33:41", "throughput": 3596.82, "total_tokens": 17628824}
|
| 5418 |
+
{"current_steps": 27020, "total_steps": 38160, "loss": 0.3731, "lr": 1.1897002335273239e-05, "epoch": 14.161425576519916, "percentage": 70.81, "elapsed_time": "1:21:42", "remaining_time": "0:33:41", "throughput": 3596.91, "total_tokens": 17632568}
|
| 5419 |
+
{"current_steps": 27025, "total_steps": 38160, "loss": 0.4079, "lr": 1.1887265760445121e-05, "epoch": 14.164046121593291, "percentage": 70.82, "elapsed_time": "1:21:42", "remaining_time": "0:33:40", "throughput": 3596.99, "total_tokens": 17635928}
|
| 5420 |
+
{"current_steps": 27030, "total_steps": 38160, "loss": 0.4962, "lr": 1.1877531928651417e-05, "epoch": 14.166666666666666, "percentage": 70.83, "elapsed_time": "1:21:43", "remaining_time": "0:33:39", "throughput": 3597.11, "total_tokens": 17639832}
|
| 5421 |
+
{"current_steps": 27035, "total_steps": 38160, "loss": 0.4608, "lr": 1.1867800841928313e-05, "epoch": 14.169287211740041, "percentage": 70.85, "elapsed_time": "1:21:44", "remaining_time": "0:33:38", "throughput": 3597.15, "total_tokens": 17643160}
|
| 5422 |
+
{"current_steps": 27040, "total_steps": 38160, "loss": 0.568, "lr": 1.1858072502311465e-05, "epoch": 14.171907756813416, "percentage": 70.86, "elapsed_time": "1:21:45", "remaining_time": "0:33:37", "throughput": 3597.15, "total_tokens": 17646040}
|
| 5423 |
+
{"current_steps": 27045, "total_steps": 38160, "loss": 0.4826, "lr": 1.1848346911835908e-05, "epoch": 14.174528301886792, "percentage": 70.87, "elapsed_time": "1:21:46", "remaining_time": "0:33:36", "throughput": 3597.1, "total_tokens": 17648600}
|
| 5424 |
+
{"current_steps": 27050, "total_steps": 38160, "loss": 0.4842, "lr": 1.1838624072536137e-05, "epoch": 14.177148846960169, "percentage": 70.89, "elapsed_time": "1:21:47", "remaining_time": "0:33:35", "throughput": 3597.18, "total_tokens": 17652024}
|
| 5425 |
+
{"current_steps": 27055, "total_steps": 38160, "loss": 0.4488, "lr": 1.1828903986446055e-05, "epoch": 14.179769392033544, "percentage": 70.9, "elapsed_time": "1:21:48", "remaining_time": "0:33:34", "throughput": 3597.21, "total_tokens": 17655160}
|
| 5426 |
+
{"current_steps": 27060, "total_steps": 38160, "loss": 0.4967, "lr": 1.1819186655599007e-05, "epoch": 14.182389937106919, "percentage": 70.91, "elapsed_time": "1:21:48", "remaining_time": "0:33:33", "throughput": 3597.29, "total_tokens": 17658712}
|
| 5427 |
+
{"current_steps": 27065, "total_steps": 38160, "loss": 0.4014, "lr": 1.1809472082027734e-05, "epoch": 14.185010482180294, "percentage": 70.93, "elapsed_time": "1:21:49", "remaining_time": "0:33:32", "throughput": 3597.27, "total_tokens": 17661432}
|
| 5428 |
+
{"current_steps": 27070, "total_steps": 38160, "loss": 0.5947, "lr": 1.1799760267764407e-05, "epoch": 14.18763102725367, "percentage": 70.94, "elapsed_time": "1:21:50", "remaining_time": "0:33:31", "throughput": 3597.29, "total_tokens": 17664408}
|
| 5429 |
+
{"current_steps": 27075, "total_steps": 38160, "loss": 0.404, "lr": 1.1790051214840634e-05, "epoch": 14.190251572327044, "percentage": 70.95, "elapsed_time": "1:21:51", "remaining_time": "0:33:30", "throughput": 3597.37, "total_tokens": 17667608}
|
| 5430 |
+
{"current_steps": 27080, "total_steps": 38160, "loss": 0.4953, "lr": 1.1780344925287443e-05, "epoch": 14.19287211740042, "percentage": 70.96, "elapsed_time": "1:21:52", "remaining_time": "0:33:29", "throughput": 3597.45, "total_tokens": 17670872}
|
| 5431 |
+
{"current_steps": 27085, "total_steps": 38160, "loss": 0.5749, "lr": 1.1770641401135282e-05, "epoch": 14.195492662473795, "percentage": 70.98, "elapsed_time": "1:21:52", "remaining_time": "0:33:28", "throughput": 3597.38, "total_tokens": 17673368}
|
| 5432 |
+
{"current_steps": 27090, "total_steps": 38160, "loss": 0.454, "lr": 1.1760940644414003e-05, "epoch": 14.19811320754717, "percentage": 70.99, "elapsed_time": "1:21:53", "remaining_time": "0:33:27", "throughput": 3597.31, "total_tokens": 17675800}
|
| 5433 |
+
{"current_steps": 27095, "total_steps": 38160, "loss": 0.3812, "lr": 1.1751242657152914e-05, "epoch": 14.200733752620545, "percentage": 71.0, "elapsed_time": "1:21:54", "remaining_time": "0:33:26", "throughput": 3597.42, "total_tokens": 17679288}
|
| 5434 |
+
{"current_steps": 27100, "total_steps": 38160, "loss": 0.5524, "lr": 1.1741547441380698e-05, "epoch": 14.20335429769392, "percentage": 71.02, "elapsed_time": "1:21:55", "remaining_time": "0:33:26", "throughput": 3597.46, "total_tokens": 17682392}
|
| 5435 |
+
{"current_steps": 27105, "total_steps": 38160, "loss": 0.5387, "lr": 1.17318549991255e-05, "epoch": 14.205974842767295, "percentage": 71.03, "elapsed_time": "1:21:56", "remaining_time": "0:33:25", "throughput": 3597.42, "total_tokens": 17685176}
|
| 5436 |
+
{"current_steps": 27110, "total_steps": 38160, "loss": 0.4265, "lr": 1.1722165332414863e-05, "epoch": 14.20859538784067, "percentage": 71.04, "elapsed_time": "1:21:56", "remaining_time": "0:33:24", "throughput": 3597.31, "total_tokens": 17687768}
|
| 5437 |
+
{"current_steps": 27115, "total_steps": 38160, "loss": 0.3654, "lr": 1.1712478443275768e-05, "epoch": 14.211215932914046, "percentage": 71.06, "elapsed_time": "1:21:57", "remaining_time": "0:33:23", "throughput": 3597.28, "total_tokens": 17690712}
|
| 5438 |
+
{"current_steps": 27120, "total_steps": 38160, "loss": 0.7315, "lr": 1.170279433373459e-05, "epoch": 14.213836477987421, "percentage": 71.07, "elapsed_time": "1:21:58", "remaining_time": "0:33:22", "throughput": 3597.26, "total_tokens": 17693944}
|
| 5439 |
+
{"current_steps": 27125, "total_steps": 38160, "loss": 0.4078, "lr": 1.1693113005817125e-05, "epoch": 14.216457023060796, "percentage": 71.08, "elapsed_time": "1:21:59", "remaining_time": "0:33:21", "throughput": 3597.35, "total_tokens": 17697528}
|
| 5440 |
+
{"current_steps": 27130, "total_steps": 38160, "loss": 0.5082, "lr": 1.1683434461548606e-05, "epoch": 14.219077568134171, "percentage": 71.1, "elapsed_time": "1:22:00", "remaining_time": "0:33:20", "throughput": 3597.3, "total_tokens": 17700472}
|
| 5441 |
+
{"current_steps": 27135, "total_steps": 38160, "loss": 0.4353, "lr": 1.1673758702953675e-05, "epoch": 14.221698113207546, "percentage": 71.11, "elapsed_time": "1:22:01", "remaining_time": "0:33:19", "throughput": 3597.35, "total_tokens": 17704024}
|
| 5442 |
+
{"current_steps": 27140, "total_steps": 38160, "loss": 0.4177, "lr": 1.1664085732056398e-05, "epoch": 14.224318658280922, "percentage": 71.12, "elapsed_time": "1:22:02", "remaining_time": "0:33:18", "throughput": 3597.34, "total_tokens": 17706840}
|
| 5443 |
+
{"current_steps": 27145, "total_steps": 38160, "loss": 0.64, "lr": 1.1654415550880243e-05, "epoch": 14.226939203354299, "percentage": 71.13, "elapsed_time": "1:22:03", "remaining_time": "0:33:17", "throughput": 3597.31, "total_tokens": 17709592}
|
| 5444 |
+
{"current_steps": 27150, "total_steps": 38160, "loss": 0.5086, "lr": 1.1644748161448088e-05, "epoch": 14.229559748427674, "percentage": 71.15, "elapsed_time": "1:22:03", "remaining_time": "0:33:16", "throughput": 3597.41, "total_tokens": 17713112}
|
| 5445 |
+
{"current_steps": 27155, "total_steps": 38160, "loss": 0.5487, "lr": 1.1635083565782257e-05, "epoch": 14.232180293501049, "percentage": 71.16, "elapsed_time": "1:22:04", "remaining_time": "0:33:15", "throughput": 3597.41, "total_tokens": 17716056}
|
| 5446 |
+
{"current_steps": 27160, "total_steps": 38160, "loss": 0.4876, "lr": 1.1625421765904465e-05, "epoch": 14.234800838574424, "percentage": 71.17, "elapsed_time": "1:22:05", "remaining_time": "0:33:14", "throughput": 3597.51, "total_tokens": 17719704}
|
| 5447 |
+
{"current_steps": 27165, "total_steps": 38160, "loss": 0.4936, "lr": 1.1615762763835858e-05, "epoch": 14.2374213836478, "percentage": 71.19, "elapsed_time": "1:22:06", "remaining_time": "0:33:13", "throughput": 3597.46, "total_tokens": 17722456}
|
| 5448 |
+
{"current_steps": 27170, "total_steps": 38160, "loss": 0.4507, "lr": 1.1606106561596993e-05, "epoch": 14.240041928721174, "percentage": 71.2, "elapsed_time": "1:22:07", "remaining_time": "0:33:12", "throughput": 3597.47, "total_tokens": 17725400}
|
| 5449 |
+
{"current_steps": 27175, "total_steps": 38160, "loss": 0.6135, "lr": 1.1596453161207829e-05, "epoch": 14.24266247379455, "percentage": 71.21, "elapsed_time": "1:22:08", "remaining_time": "0:33:12", "throughput": 3597.46, "total_tokens": 17728440}
|
| 5450 |
+
{"current_steps": 27180, "total_steps": 38160, "loss": 0.4478, "lr": 1.1586802564687741e-05, "epoch": 14.245283018867925, "percentage": 71.23, "elapsed_time": "1:22:08", "remaining_time": "0:33:11", "throughput": 3597.45, "total_tokens": 17731192}
|
| 5451 |
+
{"current_steps": 27185, "total_steps": 38160, "loss": 0.3672, "lr": 1.1577154774055528e-05, "epoch": 14.2479035639413, "percentage": 71.24, "elapsed_time": "1:22:09", "remaining_time": "0:33:10", "throughput": 3597.58, "total_tokens": 17735096}
|
| 5452 |
+
{"current_steps": 27190, "total_steps": 38160, "loss": 0.5081, "lr": 1.1567509791329401e-05, "epoch": 14.250524109014675, "percentage": 71.25, "elapsed_time": "1:22:10", "remaining_time": "0:33:09", "throughput": 3597.72, "total_tokens": 17739000}
|
| 5453 |
+
{"current_steps": 27195, "total_steps": 38160, "loss": 0.4312, "lr": 1.155786761852699e-05, "epoch": 14.25314465408805, "percentage": 71.27, "elapsed_time": "1:22:11", "remaining_time": "0:33:08", "throughput": 3597.71, "total_tokens": 17741784}
|
| 5454 |
+
{"current_steps": 27200, "total_steps": 38160, "loss": 0.5759, "lr": 1.1548228257665317e-05, "epoch": 14.255765199161425, "percentage": 71.28, "elapsed_time": "1:22:12", "remaining_time": "0:33:07", "throughput": 3597.67, "total_tokens": 17744440}
|
| 5455 |
+
{"current_steps": 27205, "total_steps": 38160, "loss": 0.6579, "lr": 1.1538591710760815e-05, "epoch": 14.2583857442348, "percentage": 71.29, "elapsed_time": "1:22:12", "remaining_time": "0:33:06", "throughput": 3597.69, "total_tokens": 17747352}
|
| 5456 |
+
{"current_steps": 27210, "total_steps": 38160, "loss": 0.4462, "lr": 1.1528957979829355e-05, "epoch": 14.261006289308176, "percentage": 71.31, "elapsed_time": "1:22:13", "remaining_time": "0:33:05", "throughput": 3597.61, "total_tokens": 17750104}
|
| 5457 |
+
{"current_steps": 27215, "total_steps": 38160, "loss": 0.4298, "lr": 1.1519327066886198e-05, "epoch": 14.26362683438155, "percentage": 71.32, "elapsed_time": "1:22:14", "remaining_time": "0:33:04", "throughput": 3597.56, "total_tokens": 17752728}
|
| 5458 |
+
{"current_steps": 27220, "total_steps": 38160, "loss": 0.4034, "lr": 1.1509698973946032e-05, "epoch": 14.266247379454926, "percentage": 71.33, "elapsed_time": "1:22:15", "remaining_time": "0:33:03", "throughput": 3597.61, "total_tokens": 17756248}
|
| 5459 |
+
{"current_steps": 27225, "total_steps": 38160, "loss": 0.4432, "lr": 1.1500073703022926e-05, "epoch": 14.268867924528301, "percentage": 71.34, "elapsed_time": "1:22:16", "remaining_time": "0:33:02", "throughput": 3597.71, "total_tokens": 17759704}
|
| 5460 |
+
{"current_steps": 27230, "total_steps": 38160, "loss": 0.479, "lr": 1.1490451256130394e-05, "epoch": 14.271488469601676, "percentage": 71.36, "elapsed_time": "1:22:17", "remaining_time": "0:33:01", "throughput": 3597.76, "total_tokens": 17763064}
|
| 5461 |
+
{"current_steps": 27235, "total_steps": 38160, "loss": 0.5374, "lr": 1.1480831635281328e-05, "epoch": 14.274109014675052, "percentage": 71.37, "elapsed_time": "1:22:18", "remaining_time": "0:33:00", "throughput": 3597.92, "total_tokens": 17767384}
|
| 5462 |
+
{"current_steps": 27240, "total_steps": 38160, "loss": 0.5481, "lr": 1.1471214842488046e-05, "epoch": 14.276729559748428, "percentage": 71.38, "elapsed_time": "1:22:19", "remaining_time": "0:32:59", "throughput": 3597.94, "total_tokens": 17770328}
|
| 5463 |
+
{"current_steps": 27245, "total_steps": 38160, "loss": 0.4438, "lr": 1.1461600879762277e-05, "epoch": 14.279350104821804, "percentage": 71.4, "elapsed_time": "1:22:19", "remaining_time": "0:32:59", "throughput": 3598.01, "total_tokens": 17774072}
|
| 5464 |
+
{"current_steps": 27250, "total_steps": 38160, "loss": 0.3538, "lr": 1.1451989749115157e-05, "epoch": 14.281970649895179, "percentage": 71.41, "elapsed_time": "1:22:20", "remaining_time": "0:32:58", "throughput": 3597.9, "total_tokens": 17776344}
|
| 5465 |
+
{"current_steps": 27255, "total_steps": 38160, "loss": 0.538, "lr": 1.1442381452557219e-05, "epoch": 14.284591194968554, "percentage": 71.42, "elapsed_time": "1:22:21", "remaining_time": "0:32:57", "throughput": 3598.07, "total_tokens": 17780536}
|
| 5466 |
+
{"current_steps": 27260, "total_steps": 38160, "loss": 0.4957, "lr": 1.1432775992098397e-05, "epoch": 14.28721174004193, "percentage": 71.44, "elapsed_time": "1:22:22", "remaining_time": "0:32:56", "throughput": 3598.15, "total_tokens": 17784024}
|
| 5467 |
+
{"current_steps": 27265, "total_steps": 38160, "loss": 0.6406, "lr": 1.1423173369748056e-05, "epoch": 14.289832285115304, "percentage": 71.45, "elapsed_time": "1:22:23", "remaining_time": "0:32:55", "throughput": 3598.25, "total_tokens": 17788248}
|
| 5468 |
+
{"current_steps": 27270, "total_steps": 38160, "loss": 0.4756, "lr": 1.1413573587514952e-05, "epoch": 14.29245283018868, "percentage": 71.46, "elapsed_time": "1:22:24", "remaining_time": "0:32:54", "throughput": 3598.22, "total_tokens": 17790872}
|
| 5469 |
+
{"current_steps": 27275, "total_steps": 38160, "loss": 0.4658, "lr": 1.140397664740726e-05, "epoch": 14.295073375262055, "percentage": 71.48, "elapsed_time": "1:22:25", "remaining_time": "0:32:53", "throughput": 3598.25, "total_tokens": 17793912}
|
| 5470 |
+
{"current_steps": 27280, "total_steps": 38160, "loss": 0.4448, "lr": 1.1394382551432534e-05, "epoch": 14.29769392033543, "percentage": 71.49, "elapsed_time": "1:22:25", "remaining_time": "0:32:52", "throughput": 3598.38, "total_tokens": 17797336}
|
| 5471 |
+
{"current_steps": 27285, "total_steps": 38160, "loss": 0.5729, "lr": 1.1384791301597767e-05, "epoch": 14.300314465408805, "percentage": 71.5, "elapsed_time": "1:22:26", "remaining_time": "0:32:51", "throughput": 3598.44, "total_tokens": 17800728}
|
| 5472 |
+
{"current_steps": 27290, "total_steps": 38160, "loss": 0.6729, "lr": 1.1375202899909317e-05, "epoch": 14.30293501048218, "percentage": 71.51, "elapsed_time": "1:22:27", "remaining_time": "0:32:50", "throughput": 3598.42, "total_tokens": 17803448}
|
| 5473 |
+
{"current_steps": 27295, "total_steps": 38160, "loss": 0.3995, "lr": 1.1365617348372984e-05, "epoch": 14.305555555555555, "percentage": 71.53, "elapsed_time": "1:22:28", "remaining_time": "0:32:49", "throughput": 3598.57, "total_tokens": 17807896}
|
| 5474 |
+
{"current_steps": 27300, "total_steps": 38160, "loss": 0.4616, "lr": 1.135603464899396e-05, "epoch": 14.30817610062893, "percentage": 71.54, "elapsed_time": "1:22:29", "remaining_time": "0:32:48", "throughput": 3598.62, "total_tokens": 17811032}
|
| 5475 |
+
{"current_steps": 27305, "total_steps": 38160, "loss": 0.5094, "lr": 1.1346454803776821e-05, "epoch": 14.310796645702306, "percentage": 71.55, "elapsed_time": "1:22:30", "remaining_time": "0:32:48", "throughput": 3598.78, "total_tokens": 17815640}
|
| 5476 |
+
{"current_steps": 27310, "total_steps": 38160, "loss": 0.433, "lr": 1.1336877814725566e-05, "epoch": 14.31341719077568, "percentage": 71.57, "elapsed_time": "1:22:31", "remaining_time": "0:32:47", "throughput": 3598.82, "total_tokens": 17818744}
|
| 5477 |
+
{"current_steps": 27315, "total_steps": 38160, "loss": 0.4326, "lr": 1.1327303683843607e-05, "epoch": 14.316037735849056, "percentage": 71.58, "elapsed_time": "1:22:32", "remaining_time": "0:32:46", "throughput": 3598.81, "total_tokens": 17821432}
|
| 5478 |
+
{"current_steps": 27320, "total_steps": 38160, "loss": 0.4928, "lr": 1.1317732413133723e-05, "epoch": 14.318658280922431, "percentage": 71.59, "elapsed_time": "1:22:32", "remaining_time": "0:32:45", "throughput": 3598.82, "total_tokens": 17824312}
|
| 5479 |
+
{"current_steps": 27325, "total_steps": 38160, "loss": 0.3465, "lr": 1.1308164004598118e-05, "epoch": 14.321278825995806, "percentage": 71.61, "elapsed_time": "1:22:33", "remaining_time": "0:32:44", "throughput": 3598.86, "total_tokens": 17827288}
|
| 5480 |
+
{"current_steps": 27330, "total_steps": 38160, "loss": 0.4405, "lr": 1.129859846023841e-05, "epoch": 14.323899371069182, "percentage": 71.62, "elapsed_time": "1:22:34", "remaining_time": "0:32:43", "throughput": 3598.91, "total_tokens": 17830392}
|
| 5481 |
+
{"current_steps": 27335, "total_steps": 38160, "loss": 0.4541, "lr": 1.1289035782055582e-05, "epoch": 14.326519916142558, "percentage": 71.63, "elapsed_time": "1:22:35", "remaining_time": "0:32:42", "throughput": 3598.96, "total_tokens": 17833432}
|
| 5482 |
+
{"current_steps": 27340, "total_steps": 38160, "loss": 0.469, "lr": 1.1279475972050044e-05, "epoch": 14.329140461215934, "percentage": 71.65, "elapsed_time": "1:22:35", "remaining_time": "0:32:41", "throughput": 3599.12, "total_tokens": 17837112}
|
| 5483 |
+
{"current_steps": 27345, "total_steps": 38160, "loss": 0.4075, "lr": 1.1269919032221612e-05, "epoch": 14.331761006289309, "percentage": 71.66, "elapsed_time": "1:22:36", "remaining_time": "0:32:40", "throughput": 3599.13, "total_tokens": 17840088}
|
| 5484 |
+
{"current_steps": 27350, "total_steps": 38160, "loss": 0.4232, "lr": 1.1260364964569466e-05, "epoch": 14.334381551362684, "percentage": 71.67, "elapsed_time": "1:22:37", "remaining_time": "0:32:39", "throughput": 3599.1, "total_tokens": 17843000}
|
| 5485 |
+
{"current_steps": 27355, "total_steps": 38160, "loss": 0.5046, "lr": 1.125081377109223e-05, "epoch": 14.33700209643606, "percentage": 71.69, "elapsed_time": "1:22:38", "remaining_time": "0:32:38", "throughput": 3599.18, "total_tokens": 17846872}
|
| 5486 |
+
{"current_steps": 27360, "total_steps": 38160, "loss": 0.4825, "lr": 1.1241265453787882e-05, "epoch": 14.339622641509434, "percentage": 71.7, "elapsed_time": "1:22:39", "remaining_time": "0:32:37", "throughput": 3599.26, "total_tokens": 17850232}
|
| 5487 |
+
{"current_steps": 27365, "total_steps": 38160, "loss": 0.6739, "lr": 1.1231720014653837e-05, "epoch": 14.34224318658281, "percentage": 71.71, "elapsed_time": "1:22:40", "remaining_time": "0:32:36", "throughput": 3599.38, "total_tokens": 17854360}
|
| 5488 |
+
{"current_steps": 27370, "total_steps": 38160, "loss": 0.4535, "lr": 1.1222177455686896e-05, "epoch": 14.344863731656185, "percentage": 71.72, "elapsed_time": "1:22:41", "remaining_time": "0:32:35", "throughput": 3599.51, "total_tokens": 17858040}
|
| 5489 |
+
{"current_steps": 27375, "total_steps": 38160, "loss": 0.5824, "lr": 1.1212637778883237e-05, "epoch": 14.34748427672956, "percentage": 71.74, "elapsed_time": "1:22:42", "remaining_time": "0:32:34", "throughput": 3599.53, "total_tokens": 17861400}
|
| 5490 |
+
{"current_steps": 27380, "total_steps": 38160, "loss": 0.6932, "lr": 1.1203100986238468e-05, "epoch": 14.350104821802935, "percentage": 71.75, "elapsed_time": "1:22:43", "remaining_time": "0:32:34", "throughput": 3599.66, "total_tokens": 17865176}
|
| 5491 |
+
{"current_steps": 27385, "total_steps": 38160, "loss": 0.5618, "lr": 1.1193567079747561e-05, "epoch": 14.35272536687631, "percentage": 71.76, "elapsed_time": "1:22:43", "remaining_time": "0:32:33", "throughput": 3599.57, "total_tokens": 17867544}
|
| 5492 |
+
{"current_steps": 27390, "total_steps": 38160, "loss": 0.3933, "lr": 1.1184036061404907e-05, "epoch": 14.355345911949685, "percentage": 71.78, "elapsed_time": "1:22:44", "remaining_time": "0:32:32", "throughput": 3599.6, "total_tokens": 17870584}
|
| 5493 |
+
{"current_steps": 27395, "total_steps": 38160, "loss": 0.4638, "lr": 1.117450793320429e-05, "epoch": 14.35796645702306, "percentage": 71.79, "elapsed_time": "1:22:45", "remaining_time": "0:32:31", "throughput": 3599.63, "total_tokens": 17873560}
|
| 5494 |
+
{"current_steps": 27400, "total_steps": 38160, "loss": 0.5704, "lr": 1.1164982697138893e-05, "epoch": 14.360587002096436, "percentage": 71.8, "elapsed_time": "1:22:46", "remaining_time": "0:32:30", "throughput": 3599.71, "total_tokens": 17876824}
|
| 5495 |
+
{"current_steps": 27405, "total_steps": 38160, "loss": 0.5084, "lr": 1.1155460355201267e-05, "epoch": 14.36320754716981, "percentage": 71.82, "elapsed_time": "1:22:46", "remaining_time": "0:32:29", "throughput": 3599.76, "total_tokens": 17879928}
|
| 5496 |
+
{"current_steps": 27410, "total_steps": 38160, "loss": 0.5551, "lr": 1.1145940909383399e-05, "epoch": 14.365828092243186, "percentage": 71.83, "elapsed_time": "1:22:47", "remaining_time": "0:32:28", "throughput": 3599.88, "total_tokens": 17883672}
|
| 5497 |
+
{"current_steps": 27415, "total_steps": 38160, "loss": 0.4223, "lr": 1.1136424361676626e-05, "epoch": 14.368448637316561, "percentage": 71.84, "elapsed_time": "1:22:48", "remaining_time": "0:32:27", "throughput": 3599.96, "total_tokens": 17887032}
|
| 5498 |
+
{"current_steps": 27420, "total_steps": 38160, "loss": 0.4802, "lr": 1.1126910714071711e-05, "epoch": 14.371069182389936, "percentage": 71.86, "elapsed_time": "1:22:49", "remaining_time": "0:32:26", "throughput": 3599.93, "total_tokens": 17889816}
|
| 5499 |
+
{"current_steps": 27425, "total_steps": 38160, "loss": 0.5154, "lr": 1.1117399968558806e-05, "epoch": 14.373689727463312, "percentage": 71.87, "elapsed_time": "1:22:50", "remaining_time": "0:32:25", "throughput": 3599.95, "total_tokens": 17893016}
|
| 5500 |
+
{"current_steps": 27430, "total_steps": 38160, "loss": 0.424, "lr": 1.1107892127127454e-05, "epoch": 14.376310272536688, "percentage": 71.88, "elapsed_time": "1:22:51", "remaining_time": "0:32:24", "throughput": 3599.95, "total_tokens": 17896056}
|
| 5501 |
+
{"current_steps": 27435, "total_steps": 38160, "loss": 0.5874, "lr": 1.109838719176658e-05, "epoch": 14.378930817610064, "percentage": 71.89, "elapsed_time": "1:22:51", "remaining_time": "0:32:23", "throughput": 3600.01, "total_tokens": 17899224}
|
| 5502 |
+
{"current_steps": 27440, "total_steps": 38160, "loss": 0.5802, "lr": 1.1088885164464497e-05, "epoch": 14.381551362683439, "percentage": 71.91, "elapsed_time": "1:22:52", "remaining_time": "0:32:22", "throughput": 3600.14, "total_tokens": 17902840}
|
| 5503 |
+
{"current_steps": 27445, "total_steps": 38160, "loss": 0.5712, "lr": 1.1079386047208932e-05, "epoch": 14.384171907756814, "percentage": 71.92, "elapsed_time": "1:22:53", "remaining_time": "0:32:21", "throughput": 3599.96, "total_tokens": 17905272}
|
| 5504 |
+
{"current_steps": 27450, "total_steps": 38160, "loss": 0.5047, "lr": 1.1069889841986991e-05, "epoch": 14.38679245283019, "percentage": 71.93, "elapsed_time": "1:22:54", "remaining_time": "0:32:20", "throughput": 3599.93, "total_tokens": 17907992}
|
| 5505 |
+
{"current_steps": 27455, "total_steps": 38160, "loss": 0.4101, "lr": 1.1060396550785182e-05, "epoch": 14.389412997903564, "percentage": 71.95, "elapsed_time": "1:22:55", "remaining_time": "0:32:19", "throughput": 3599.91, "total_tokens": 17910904}
|
| 5506 |
+
{"current_steps": 27460, "total_steps": 38160, "loss": 0.4109, "lr": 1.1050906175589382e-05, "epoch": 14.39203354297694, "percentage": 71.96, "elapsed_time": "1:22:56", "remaining_time": "0:32:19", "throughput": 3600.08, "total_tokens": 17914904}
|
| 5507 |
+
{"current_steps": 27465, "total_steps": 38160, "loss": 0.5032, "lr": 1.1041418718384863e-05, "epoch": 14.394654088050315, "percentage": 71.97, "elapsed_time": "1:22:57", "remaining_time": "0:32:18", "throughput": 3600.11, "total_tokens": 17918104}
|
| 5508 |
+
{"current_steps": 27470, "total_steps": 38160, "loss": 0.4833, "lr": 1.1031934181156298e-05, "epoch": 14.39727463312369, "percentage": 71.99, "elapsed_time": "1:22:57", "remaining_time": "0:32:17", "throughput": 3600.23, "total_tokens": 17921720}
|
| 5509 |
+
{"current_steps": 27475, "total_steps": 38160, "loss": 0.5163, "lr": 1.1022452565887747e-05, "epoch": 14.399895178197065, "percentage": 72.0, "elapsed_time": "1:22:58", "remaining_time": "0:32:16", "throughput": 3600.28, "total_tokens": 17924984}
|
| 5510 |
+
{"current_steps": 27480, "total_steps": 38160, "loss": 0.4665, "lr": 1.1012973874562657e-05, "epoch": 14.40251572327044, "percentage": 72.01, "elapsed_time": "1:22:59", "remaining_time": "0:32:15", "throughput": 3600.24, "total_tokens": 17927672}
|
| 5511 |
+
{"current_steps": 27485, "total_steps": 38160, "loss": 0.5932, "lr": 1.1003498109163865e-05, "epoch": 14.405136268343815, "percentage": 72.03, "elapsed_time": "1:23:00", "remaining_time": "0:32:14", "throughput": 3600.25, "total_tokens": 17931128}
|
| 5512 |
+
{"current_steps": 27490, "total_steps": 38160, "loss": 0.4745, "lr": 1.0994025271673592e-05, "epoch": 14.40775681341719, "percentage": 72.04, "elapsed_time": "1:23:01", "remaining_time": "0:32:13", "throughput": 3600.4, "total_tokens": 17934936}
|
| 5513 |
+
{"current_steps": 27495, "total_steps": 38160, "loss": 0.3548, "lr": 1.0984555364073431e-05, "epoch": 14.410377358490566, "percentage": 72.05, "elapsed_time": "1:23:02", "remaining_time": "0:32:12", "throughput": 3600.34, "total_tokens": 17937432}
|
| 5514 |
+
{"current_steps": 27500, "total_steps": 38160, "loss": 0.4789, "lr": 1.0975088388344393e-05, "epoch": 14.41299790356394, "percentage": 72.06, "elapsed_time": "1:23:02", "remaining_time": "0:32:11", "throughput": 3600.23, "total_tokens": 17939800}
|
| 5515 |
+
{"current_steps": 27505, "total_steps": 38160, "loss": 0.5087, "lr": 1.096562434646686e-05, "epoch": 14.415618448637316, "percentage": 72.08, "elapsed_time": "1:23:03", "remaining_time": "0:32:10", "throughput": 3600.22, "total_tokens": 17942808}
|
| 5516 |
+
{"current_steps": 27510, "total_steps": 38160, "loss": 0.4903, "lr": 1.0956163240420611e-05, "epoch": 14.418238993710691, "percentage": 72.09, "elapsed_time": "1:23:04", "remaining_time": "0:32:09", "throughput": 3600.21, "total_tokens": 17945592}
|
| 5517 |
+
{"current_steps": 27515, "total_steps": 38160, "loss": 0.5865, "lr": 1.094670507218479e-05, "epoch": 14.420859538784066, "percentage": 72.1, "elapsed_time": "1:23:05", "remaining_time": "0:32:08", "throughput": 3600.22, "total_tokens": 17948536}
|
| 5518 |
+
{"current_steps": 27520, "total_steps": 38160, "loss": 0.4966, "lr": 1.093724984373793e-05, "epoch": 14.423480083857442, "percentage": 72.12, "elapsed_time": "1:23:06", "remaining_time": "0:32:07", "throughput": 3600.35, "total_tokens": 17952408}
|
| 5519 |
+
{"current_steps": 27525, "total_steps": 38160, "loss": 0.4699, "lr": 1.0927797557057968e-05, "epoch": 14.426100628930818, "percentage": 72.13, "elapsed_time": "1:23:07", "remaining_time": "0:32:06", "throughput": 3600.47, "total_tokens": 17955896}
|
| 5520 |
+
{"current_steps": 27530, "total_steps": 38160, "loss": 0.5405, "lr": 1.0918348214122209e-05, "epoch": 14.428721174004194, "percentage": 72.14, "elapsed_time": "1:23:08", "remaining_time": "0:32:06", "throughput": 3600.76, "total_tokens": 17963128}
|
| 5521 |
+
{"current_steps": 27535, "total_steps": 38160, "loss": 0.5467, "lr": 1.0908901816907366e-05, "epoch": 14.431341719077569, "percentage": 72.16, "elapsed_time": "1:23:09", "remaining_time": "0:32:05", "throughput": 3600.71, "total_tokens": 17965688}
|
| 5522 |
+
{"current_steps": 27540, "total_steps": 38160, "loss": 0.3718, "lr": 1.08994583673895e-05, "epoch": 14.433962264150944, "percentage": 72.17, "elapsed_time": "1:23:10", "remaining_time": "0:32:04", "throughput": 3600.73, "total_tokens": 17968888}
|
| 5523 |
+
{"current_steps": 27545, "total_steps": 38160, "loss": 0.4322, "lr": 1.0890017867544068e-05, "epoch": 14.43658280922432, "percentage": 72.18, "elapsed_time": "1:23:11", "remaining_time": "0:32:03", "throughput": 3600.79, "total_tokens": 17972120}
|
| 5524 |
+
{"current_steps": 27550, "total_steps": 38160, "loss": 0.4319, "lr": 1.088058031934592e-05, "epoch": 14.439203354297694, "percentage": 72.2, "elapsed_time": "1:23:11", "remaining_time": "0:32:02", "throughput": 3600.89, "total_tokens": 17975608}
|
| 5525 |
+
{"current_steps": 27555, "total_steps": 38160, "loss": 0.6057, "lr": 1.087114572476929e-05, "epoch": 14.44182389937107, "percentage": 72.21, "elapsed_time": "1:23:12", "remaining_time": "0:32:01", "throughput": 3600.98, "total_tokens": 17979128}
|
| 5526 |
+
{"current_steps": 27560, "total_steps": 38160, "loss": 0.5323, "lr": 1.0861714085787783e-05, "epoch": 14.444444444444445, "percentage": 72.22, "elapsed_time": "1:23:13", "remaining_time": "0:32:00", "throughput": 3601.06, "total_tokens": 17982456}
|
| 5527 |
+
{"current_steps": 27565, "total_steps": 38160, "loss": 0.5648, "lr": 1.0852285404374402e-05, "epoch": 14.44706498951782, "percentage": 72.24, "elapsed_time": "1:23:14", "remaining_time": "0:31:59", "throughput": 3601.14, "total_tokens": 17985720}
|
| 5528 |
+
{"current_steps": 27570, "total_steps": 38160, "loss": 0.4558, "lr": 1.0842859682501505e-05, "epoch": 14.449685534591195, "percentage": 72.25, "elapsed_time": "1:23:15", "remaining_time": "0:31:58", "throughput": 3601.26, "total_tokens": 17989272}
|
| 5529 |
+
{"current_steps": 27575, "total_steps": 38160, "loss": 0.5733, "lr": 1.0833436922140841e-05, "epoch": 14.45230607966457, "percentage": 72.26, "elapsed_time": "1:23:16", "remaining_time": "0:31:57", "throughput": 3601.29, "total_tokens": 17992408}
|
| 5530 |
+
{"current_steps": 27580, "total_steps": 38160, "loss": 0.4404, "lr": 1.0824017125263555e-05, "epoch": 14.454926624737945, "percentage": 72.27, "elapsed_time": "1:23:17", "remaining_time": "0:31:56", "throughput": 3601.44, "total_tokens": 17996568}
|
| 5531 |
+
{"current_steps": 27585, "total_steps": 38160, "loss": 0.4486, "lr": 1.0814600293840154e-05, "epoch": 14.45754716981132, "percentage": 72.29, "elapsed_time": "1:23:17", "remaining_time": "0:31:55", "throughput": 3601.49, "total_tokens": 17999832}
|
| 5532 |
+
{"current_steps": 27590, "total_steps": 38160, "loss": 0.5814, "lr": 1.080518642984055e-05, "epoch": 14.460167714884696, "percentage": 72.3, "elapsed_time": "1:23:18", "remaining_time": "0:31:55", "throughput": 3601.63, "total_tokens": 18003704}
|
| 5533 |
+
{"current_steps": 27595, "total_steps": 38160, "loss": 0.4145, "lr": 1.0795775535233988e-05, "epoch": 14.46278825995807, "percentage": 72.31, "elapsed_time": "1:23:19", "remaining_time": "0:31:54", "throughput": 3601.59, "total_tokens": 18006328}
|
| 5534 |
+
{"current_steps": 27600, "total_steps": 38160, "loss": 0.4985, "lr": 1.0786367611989145e-05, "epoch": 14.465408805031446, "percentage": 72.33, "elapsed_time": "1:23:20", "remaining_time": "0:31:53", "throughput": 3601.67, "total_tokens": 18010008}
|
| 5535 |
+
{"current_steps": 27605, "total_steps": 38160, "loss": 0.5201, "lr": 1.0776962662074031e-05, "epoch": 14.468029350104821, "percentage": 72.34, "elapsed_time": "1:23:21", "remaining_time": "0:31:52", "throughput": 3601.81, "total_tokens": 18013880}
|
| 5536 |
+
{"current_steps": 27610, "total_steps": 38160, "loss": 0.4775, "lr": 1.0767560687456062e-05, "epoch": 14.470649895178196, "percentage": 72.35, "elapsed_time": "1:23:22", "remaining_time": "0:31:51", "throughput": 3601.84, "total_tokens": 18016824}
|
| 5537 |
+
{"current_steps": 27615, "total_steps": 38160, "loss": 0.4711, "lr": 1.0758161690102036e-05, "epoch": 14.473270440251572, "percentage": 72.37, "elapsed_time": "1:23:22", "remaining_time": "0:31:50", "throughput": 3601.95, "total_tokens": 18020472}
|
| 5538 |
+
{"current_steps": 27620, "total_steps": 38160, "loss": 0.4178, "lr": 1.0748765671978095e-05, "epoch": 14.475890985324948, "percentage": 72.38, "elapsed_time": "1:23:23", "remaining_time": "0:31:49", "throughput": 3601.89, "total_tokens": 18023128}
|
| 5539 |
+
{"current_steps": 27625, "total_steps": 38160, "loss": 0.5346, "lr": 1.0739372635049797e-05, "epoch": 14.478511530398324, "percentage": 72.39, "elapsed_time": "1:23:24", "remaining_time": "0:31:48", "throughput": 3602.0, "total_tokens": 18026872}
|
| 5540 |
+
{"current_steps": 27630, "total_steps": 38160, "loss": 0.5774, "lr": 1.0729982581282042e-05, "epoch": 14.481132075471699, "percentage": 72.41, "elapsed_time": "1:23:25", "remaining_time": "0:31:47", "throughput": 3602.07, "total_tokens": 18030200}
|
| 5541 |
+
{"current_steps": 27635, "total_steps": 38160, "loss": 0.3695, "lr": 1.0720595512639129e-05, "epoch": 14.483752620545074, "percentage": 72.42, "elapsed_time": "1:23:26", "remaining_time": "0:31:46", "throughput": 3602.14, "total_tokens": 18033528}
|
| 5542 |
+
{"current_steps": 27640, "total_steps": 38160, "loss": 0.5077, "lr": 1.0711211431084728e-05, "epoch": 14.48637316561845, "percentage": 72.43, "elapsed_time": "1:23:27", "remaining_time": "0:31:45", "throughput": 3602.08, "total_tokens": 18036088}
|
| 5543 |
+
{"current_steps": 27645, "total_steps": 38160, "loss": 0.5077, "lr": 1.0701830338581887e-05, "epoch": 14.488993710691824, "percentage": 72.44, "elapsed_time": "1:23:28", "remaining_time": "0:31:44", "throughput": 3602.21, "total_tokens": 18040408}
|
| 5544 |
+
{"current_steps": 27650, "total_steps": 38160, "loss": 0.3787, "lr": 1.069245223709301e-05, "epoch": 14.4916142557652, "percentage": 72.46, "elapsed_time": "1:23:28", "remaining_time": "0:31:43", "throughput": 3602.19, "total_tokens": 18043096}
|
| 5545 |
+
{"current_steps": 27655, "total_steps": 38160, "loss": 0.4883, "lr": 1.0683077128579904e-05, "epoch": 14.494234800838575, "percentage": 72.47, "elapsed_time": "1:23:29", "remaining_time": "0:31:42", "throughput": 3602.24, "total_tokens": 18046232}
|
| 5546 |
+
{"current_steps": 27660, "total_steps": 38160, "loss": 0.5254, "lr": 1.0673705015003718e-05, "epoch": 14.49685534591195, "percentage": 72.48, "elapsed_time": "1:23:30", "remaining_time": "0:31:42", "throughput": 3602.2, "total_tokens": 18048792}
|
| 5547 |
+
{"current_steps": 27665, "total_steps": 38160, "loss": 0.4617, "lr": 1.0664335898325002e-05, "epoch": 14.499475890985325, "percentage": 72.5, "elapsed_time": "1:23:31", "remaining_time": "0:31:41", "throughput": 3602.35, "total_tokens": 18052696}
|
| 5548 |
+
{"current_steps": 27670, "total_steps": 38160, "loss": 0.3751, "lr": 1.065496978050367e-05, "epoch": 14.5020964360587, "percentage": 72.51, "elapsed_time": "1:23:32", "remaining_time": "0:31:40", "throughput": 3602.32, "total_tokens": 18055256}
|
| 5549 |
+
{"current_steps": 27675, "total_steps": 38160, "loss": 0.4766, "lr": 1.0645606663499e-05, "epoch": 14.504716981132075, "percentage": 72.52, "elapsed_time": "1:23:32", "remaining_time": "0:31:39", "throughput": 3602.27, "total_tokens": 18057784}
|
| 5550 |
+
{"current_steps": 27680, "total_steps": 38160, "loss": 0.6035, "lr": 1.0636246549269649e-05, "epoch": 14.50733752620545, "percentage": 72.54, "elapsed_time": "1:23:33", "remaining_time": "0:31:38", "throughput": 3602.34, "total_tokens": 18060920}
|
| 5551 |
+
{"current_steps": 27685, "total_steps": 38160, "loss": 0.4702, "lr": 1.062688943977366e-05, "epoch": 14.509958071278826, "percentage": 72.55, "elapsed_time": "1:23:34", "remaining_time": "0:31:37", "throughput": 3602.42, "total_tokens": 18064184}
|
| 5552 |
+
{"current_steps": 27690, "total_steps": 38160, "loss": 0.5578, "lr": 1.0617535336968413e-05, "epoch": 14.5125786163522, "percentage": 72.56, "elapsed_time": "1:23:35", "remaining_time": "0:31:36", "throughput": 3602.5, "total_tokens": 18067672}
|
| 5553 |
+
{"current_steps": 27695, "total_steps": 38160, "loss": 0.4604, "lr": 1.0608184242810698e-05, "epoch": 14.515199161425576, "percentage": 72.58, "elapsed_time": "1:23:36", "remaining_time": "0:31:35", "throughput": 3602.56, "total_tokens": 18071032}
|
| 5554 |
+
{"current_steps": 27700, "total_steps": 38160, "loss": 0.7236, "lr": 1.0598836159256642e-05, "epoch": 14.517819706498951, "percentage": 72.59, "elapsed_time": "1:23:36", "remaining_time": "0:31:34", "throughput": 3602.64, "total_tokens": 18074200}
|
| 5555 |
+
{"current_steps": 27705, "total_steps": 38160, "loss": 0.4836, "lr": 1.0589491088261763e-05, "epoch": 14.520440251572326, "percentage": 72.6, "elapsed_time": "1:23:37", "remaining_time": "0:31:33", "throughput": 3602.62, "total_tokens": 18076824}
|
| 5556 |
+
{"current_steps": 27710, "total_steps": 38160, "loss": 0.5219, "lr": 1.0580149031780945e-05, "epoch": 14.523060796645701, "percentage": 72.62, "elapsed_time": "1:23:38", "remaining_time": "0:31:32", "throughput": 3602.59, "total_tokens": 18079512}
|
| 5557 |
+
{"current_steps": 27715, "total_steps": 38160, "loss": 0.4436, "lr": 1.0570809991768447e-05, "epoch": 14.525681341719078, "percentage": 72.63, "elapsed_time": "1:23:39", "remaining_time": "0:31:31", "throughput": 3602.62, "total_tokens": 18082424}
|
| 5558 |
+
{"current_steps": 27720, "total_steps": 38160, "loss": 0.4275, "lr": 1.0561473970177873e-05, "epoch": 14.528301886792454, "percentage": 72.64, "elapsed_time": "1:23:40", "remaining_time": "0:31:30", "throughput": 3602.67, "total_tokens": 18085400}
|
| 5559 |
+
{"current_steps": 27725, "total_steps": 38160, "loss": 0.6617, "lr": 1.0552140968962232e-05, "epoch": 14.530922431865829, "percentage": 72.65, "elapsed_time": "1:23:40", "remaining_time": "0:31:29", "throughput": 3602.75, "total_tokens": 18088664}
|
| 5560 |
+
{"current_steps": 27730, "total_steps": 38160, "loss": 0.4776, "lr": 1.054281099007386e-05, "epoch": 14.533542976939204, "percentage": 72.67, "elapsed_time": "1:23:41", "remaining_time": "0:31:28", "throughput": 3602.75, "total_tokens": 18091512}
|
| 5561 |
+
{"current_steps": 27735, "total_steps": 38160, "loss": 0.6628, "lr": 1.0533484035464492e-05, "epoch": 14.536163522012579, "percentage": 72.68, "elapsed_time": "1:23:42", "remaining_time": "0:31:27", "throughput": 3602.65, "total_tokens": 18093752}
|
| 5562 |
+
{"current_steps": 27740, "total_steps": 38160, "loss": 0.5627, "lr": 1.0524160107085231e-05, "epoch": 14.538784067085954, "percentage": 72.69, "elapsed_time": "1:23:43", "remaining_time": "0:31:26", "throughput": 3602.64, "total_tokens": 18096440}
|
| 5563 |
+
{"current_steps": 27745, "total_steps": 38160, "loss": 0.4193, "lr": 1.0514839206886518e-05, "epoch": 14.54140461215933, "percentage": 72.71, "elapsed_time": "1:23:43", "remaining_time": "0:31:25", "throughput": 3602.64, "total_tokens": 18099384}
|
| 5564 |
+
{"current_steps": 27750, "total_steps": 38160, "loss": 0.4487, "lr": 1.0505521336818195e-05, "epoch": 14.544025157232705, "percentage": 72.72, "elapsed_time": "1:23:44", "remaining_time": "0:31:24", "throughput": 3602.68, "total_tokens": 18102744}
|
| 5565 |
+
{"current_steps": 27755, "total_steps": 38160, "loss": 0.5449, "lr": 1.0496206498829438e-05, "epoch": 14.54664570230608, "percentage": 72.73, "elapsed_time": "1:23:45", "remaining_time": "0:31:24", "throughput": 3602.7, "total_tokens": 18105720}
|
| 5566 |
+
{"current_steps": 27760, "total_steps": 38160, "loss": 0.3779, "lr": 1.0486894694868814e-05, "epoch": 14.549266247379455, "percentage": 72.75, "elapsed_time": "1:23:46", "remaining_time": "0:31:23", "throughput": 3602.72, "total_tokens": 18108536}
|
| 5567 |
+
{"current_steps": 27765, "total_steps": 38160, "loss": 0.547, "lr": 1.0477585926884246e-05, "epoch": 14.55188679245283, "percentage": 72.76, "elapsed_time": "1:23:47", "remaining_time": "0:31:22", "throughput": 3602.65, "total_tokens": 18110904}
|
| 5568 |
+
{"current_steps": 27770, "total_steps": 38160, "loss": 0.4181, "lr": 1.046828019682303e-05, "epoch": 14.554507337526205, "percentage": 72.77, "elapsed_time": "1:23:47", "remaining_time": "0:31:21", "throughput": 3602.69, "total_tokens": 18113944}
|
| 5569 |
+
{"current_steps": 27775, "total_steps": 38160, "loss": 0.5888, "lr": 1.0458977506631808e-05, "epoch": 14.55712788259958, "percentage": 72.79, "elapsed_time": "1:23:48", "remaining_time": "0:31:20", "throughput": 3602.76, "total_tokens": 18117304}
|
| 5570 |
+
{"current_steps": 27780, "total_steps": 38160, "loss": 0.5965, "lr": 1.0449677858256593e-05, "epoch": 14.559748427672956, "percentage": 72.8, "elapsed_time": "1:23:49", "remaining_time": "0:31:19", "throughput": 3602.69, "total_tokens": 18119704}
|
| 5571 |
+
{"current_steps": 27785, "total_steps": 38160, "loss": 0.5782, "lr": 1.0440381253642773e-05, "epoch": 14.56236897274633, "percentage": 72.81, "elapsed_time": "1:23:50", "remaining_time": "0:31:18", "throughput": 3602.9, "total_tokens": 18124312}
|
| 5572 |
+
{"current_steps": 27790, "total_steps": 38160, "loss": 0.4305, "lr": 1.0431087694735087e-05, "epoch": 14.564989517819706, "percentage": 72.82, "elapsed_time": "1:23:51", "remaining_time": "0:31:17", "throughput": 3603.01, "total_tokens": 18128408}
|
| 5573 |
+
{"current_steps": 27795, "total_steps": 38160, "loss": 0.4148, "lr": 1.0421797183477644e-05, "epoch": 14.567610062893081, "percentage": 72.84, "elapsed_time": "1:23:52", "remaining_time": "0:31:16", "throughput": 3603.05, "total_tokens": 18131416}
|
| 5574 |
+
{"current_steps": 27800, "total_steps": 38160, "loss": 0.5576, "lr": 1.041250972181392e-05, "epoch": 14.570230607966456, "percentage": 72.85, "elapsed_time": "1:23:53", "remaining_time": "0:31:15", "throughput": 3603.14, "total_tokens": 18134872}
|
| 5575 |
+
{"current_steps": 27805, "total_steps": 38160, "loss": 0.3418, "lr": 1.0403225311686743e-05, "epoch": 14.572851153039831, "percentage": 72.86, "elapsed_time": "1:23:53", "remaining_time": "0:31:14", "throughput": 3603.29, "total_tokens": 18138776}
|
| 5576 |
+
{"current_steps": 27810, "total_steps": 38160, "loss": 0.5111, "lr": 1.0393943955038293e-05, "epoch": 14.575471698113208, "percentage": 72.88, "elapsed_time": "1:23:54", "remaining_time": "0:31:13", "throughput": 3603.32, "total_tokens": 18141752}
|
| 5577 |
+
{"current_steps": 27815, "total_steps": 38160, "loss": 0.3797, "lr": 1.038466565381013e-05, "epoch": 14.578092243186584, "percentage": 72.89, "elapsed_time": "1:23:55", "remaining_time": "0:31:12", "throughput": 3603.25, "total_tokens": 18144152}
|
| 5578 |
+
{"current_steps": 27820, "total_steps": 38160, "loss": 0.5053, "lr": 1.0375390409943175e-05, "epoch": 14.580712788259959, "percentage": 72.9, "elapsed_time": "1:23:56", "remaining_time": "0:31:11", "throughput": 3603.27, "total_tokens": 18147192}
|
| 5579 |
+
{"current_steps": 27825, "total_steps": 38160, "loss": 0.4198, "lr": 1.0366118225377708e-05, "epoch": 14.583333333333334, "percentage": 72.92, "elapsed_time": "1:23:57", "remaining_time": "0:31:10", "throughput": 3603.34, "total_tokens": 18150456}
|
| 5580 |
+
{"current_steps": 27830, "total_steps": 38160, "loss": 0.4498, "lr": 1.0356849102053353e-05, "epoch": 14.585953878406709, "percentage": 72.93, "elapsed_time": "1:23:57", "remaining_time": "0:31:09", "throughput": 3603.37, "total_tokens": 18153464}
|
| 5581 |
+
{"current_steps": 27835, "total_steps": 38160, "loss": 0.5747, "lr": 1.0347583041909104e-05, "epoch": 14.588574423480084, "percentage": 72.94, "elapsed_time": "1:23:58", "remaining_time": "0:31:09", "throughput": 3603.49, "total_tokens": 18157336}
|
| 5582 |
+
{"current_steps": 27840, "total_steps": 38160, "loss": 0.3795, "lr": 1.0338320046883315e-05, "epoch": 14.59119496855346, "percentage": 72.96, "elapsed_time": "1:23:59", "remaining_time": "0:31:08", "throughput": 3603.61, "total_tokens": 18161016}
|
| 5583 |
+
{"current_steps": 27845, "total_steps": 38160, "loss": 0.5705, "lr": 1.0329060118913705e-05, "epoch": 14.593815513626835, "percentage": 72.97, "elapsed_time": "1:24:00", "remaining_time": "0:31:07", "throughput": 3603.42, "total_tokens": 18163480}
|
| 5584 |
+
{"current_steps": 27850, "total_steps": 38160, "loss": 0.451, "lr": 1.0319803259937353e-05, "epoch": 14.59643605870021, "percentage": 72.98, "elapsed_time": "1:24:01", "remaining_time": "0:31:06", "throughput": 3603.44, "total_tokens": 18166584}
|
| 5585 |
+
{"current_steps": 27855, "total_steps": 38160, "loss": 0.4171, "lr": 1.0310549471890676e-05, "epoch": 14.599056603773585, "percentage": 73.0, "elapsed_time": "1:24:02", "remaining_time": "0:31:05", "throughput": 3603.31, "total_tokens": 18169144}
|
| 5586 |
+
{"current_steps": 27860, "total_steps": 38160, "loss": 0.5581, "lr": 1.0301298756709454e-05, "epoch": 14.60167714884696, "percentage": 73.01, "elapsed_time": "1:24:03", "remaining_time": "0:31:04", "throughput": 3603.25, "total_tokens": 18172184}
|
| 5587 |
+
{"current_steps": 27865, "total_steps": 38160, "loss": 0.4517, "lr": 1.0292051116328844e-05, "epoch": 14.604297693920335, "percentage": 73.02, "elapsed_time": "1:24:04", "remaining_time": "0:31:03", "throughput": 3603.21, "total_tokens": 18174840}
|
| 5588 |
+
{"current_steps": 27870, "total_steps": 38160, "loss": 0.4112, "lr": 1.028280655268334e-05, "epoch": 14.60691823899371, "percentage": 73.03, "elapsed_time": "1:24:04", "remaining_time": "0:31:02", "throughput": 3603.31, "total_tokens": 18178584}
|
| 5589 |
+
{"current_steps": 27875, "total_steps": 38160, "loss": 0.4616, "lr": 1.0273565067706806e-05, "epoch": 14.609538784067086, "percentage": 73.05, "elapsed_time": "1:24:06", "remaining_time": "0:31:01", "throughput": 3603.54, "total_tokens": 18184792}
|
| 5590 |
+
{"current_steps": 27880, "total_steps": 38160, "loss": 0.5201, "lr": 1.0264326663332457e-05, "epoch": 14.61215932914046, "percentage": 73.06, "elapsed_time": "1:24:07", "remaining_time": "0:31:01", "throughput": 3603.58, "total_tokens": 18188248}
|
| 5591 |
+
{"current_steps": 27885, "total_steps": 38160, "loss": 0.3949, "lr": 1.0255091341492859e-05, "epoch": 14.614779874213836, "percentage": 73.07, "elapsed_time": "1:24:08", "remaining_time": "0:31:00", "throughput": 3603.55, "total_tokens": 18191160}
|
| 5592 |
+
{"current_steps": 27890, "total_steps": 38160, "loss": 0.399, "lr": 1.0245859104119923e-05, "epoch": 14.617400419287211, "percentage": 73.09, "elapsed_time": "1:24:08", "remaining_time": "0:30:59", "throughput": 3603.68, "total_tokens": 18195000}
|
| 5593 |
+
{"current_steps": 27895, "total_steps": 38160, "loss": 0.4123, "lr": 1.0236629953144938e-05, "epoch": 14.620020964360586, "percentage": 73.1, "elapsed_time": "1:24:09", "remaining_time": "0:30:58", "throughput": 3603.71, "total_tokens": 18198232}
|
| 5594 |
+
{"current_steps": 27900, "total_steps": 38160, "loss": 0.6141, "lr": 1.0227403890498537e-05, "epoch": 14.622641509433961, "percentage": 73.11, "elapsed_time": "1:24:10", "remaining_time": "0:30:57", "throughput": 3603.58, "total_tokens": 18200856}
|
| 5595 |
+
{"current_steps": 27905, "total_steps": 38160, "loss": 0.4611, "lr": 1.021818091811072e-05, "epoch": 14.625262054507338, "percentage": 73.13, "elapsed_time": "1:24:11", "remaining_time": "0:30:56", "throughput": 3603.45, "total_tokens": 18203256}
|
| 5596 |
+
{"current_steps": 27910, "total_steps": 38160, "loss": 0.5915, "lr": 1.0208961037910813e-05, "epoch": 14.627882599580714, "percentage": 73.14, "elapsed_time": "1:24:12", "remaining_time": "0:30:55", "throughput": 3603.4, "total_tokens": 18206008}
|
| 5597 |
+
{"current_steps": 27915, "total_steps": 38160, "loss": 0.5298, "lr": 1.0199744251827503e-05, "epoch": 14.630503144654089, "percentage": 73.15, "elapsed_time": "1:24:13", "remaining_time": "0:30:54", "throughput": 3603.44, "total_tokens": 18208984}
|
| 5598 |
+
{"current_steps": 27920, "total_steps": 38160, "loss": 0.3362, "lr": 1.0190530561788845e-05, "epoch": 14.633123689727464, "percentage": 73.17, "elapsed_time": "1:24:14", "remaining_time": "0:30:53", "throughput": 3603.56, "total_tokens": 18213848}
|
| 5599 |
+
{"current_steps": 27925, "total_steps": 38160, "loss": 0.4819, "lr": 1.0181319969722236e-05, "epoch": 14.635744234800839, "percentage": 73.18, "elapsed_time": "1:24:15", "remaining_time": "0:30:52", "throughput": 3603.72, "total_tokens": 18217496}
|
| 5600 |
+
{"current_steps": 27930, "total_steps": 38160, "loss": 0.4817, "lr": 1.0172112477554441e-05, "epoch": 14.638364779874214, "percentage": 73.19, "elapsed_time": "1:24:15", "remaining_time": "0:30:51", "throughput": 3603.76, "total_tokens": 18220536}
|
| 5601 |
+
{"current_steps": 27935, "total_steps": 38160, "loss": 0.5959, "lr": 1.016290808721154e-05, "epoch": 14.64098532494759, "percentage": 73.2, "elapsed_time": "1:24:17", "remaining_time": "0:30:51", "throughput": 3603.89, "total_tokens": 18224888}
|
| 5602 |
+
{"current_steps": 27940, "total_steps": 38160, "loss": 0.3299, "lr": 1.0153706800619007e-05, "epoch": 14.643605870020965, "percentage": 73.22, "elapsed_time": "1:24:17", "remaining_time": "0:30:50", "throughput": 3604.09, "total_tokens": 18229368}
|
| 5603 |
+
{"current_steps": 27945, "total_steps": 38160, "loss": 0.481, "lr": 1.0144508619701623e-05, "epoch": 14.64622641509434, "percentage": 73.23, "elapsed_time": "1:24:18", "remaining_time": "0:30:49", "throughput": 3604.15, "total_tokens": 18232760}
|
| 5604 |
+
{"current_steps": 27950, "total_steps": 38160, "loss": 0.504, "lr": 1.013531354638356e-05, "epoch": 14.648846960167715, "percentage": 73.24, "elapsed_time": "1:24:19", "remaining_time": "0:30:48", "throughput": 3604.15, "total_tokens": 18235576}
|
| 5605 |
+
{"current_steps": 27955, "total_steps": 38160, "loss": 0.5102, "lr": 1.0126121582588316e-05, "epoch": 14.65146750524109, "percentage": 73.26, "elapsed_time": "1:24:20", "remaining_time": "0:30:47", "throughput": 3604.26, "total_tokens": 18239224}
|
| 5606 |
+
{"current_steps": 27960, "total_steps": 38160, "loss": 0.4263, "lr": 1.0116932730238754e-05, "epoch": 14.654088050314465, "percentage": 73.27, "elapsed_time": "1:24:21", "remaining_time": "0:30:46", "throughput": 3604.24, "total_tokens": 18241912}
|
| 5607 |
+
{"current_steps": 27965, "total_steps": 38160, "loss": 0.4056, "lr": 1.0107746991257063e-05, "epoch": 14.65670859538784, "percentage": 73.28, "elapsed_time": "1:24:22", "remaining_time": "0:30:45", "throughput": 3604.19, "total_tokens": 18244632}
|
| 5608 |
+
{"current_steps": 27970, "total_steps": 38160, "loss": 0.4752, "lr": 1.0098564367564811e-05, "epoch": 14.659329140461216, "percentage": 73.3, "elapsed_time": "1:24:22", "remaining_time": "0:30:44", "throughput": 3604.21, "total_tokens": 18247768}
|
| 5609 |
+
{"current_steps": 27975, "total_steps": 38160, "loss": 0.4207, "lr": 1.0089384861082876e-05, "epoch": 14.66194968553459, "percentage": 73.31, "elapsed_time": "1:24:23", "remaining_time": "0:30:43", "throughput": 3604.2, "total_tokens": 18250456}
|
| 5610 |
+
{"current_steps": 27980, "total_steps": 38160, "loss": 0.4778, "lr": 1.0080208473731523e-05, "epoch": 14.664570230607966, "percentage": 73.32, "elapsed_time": "1:24:24", "remaining_time": "0:30:42", "throughput": 3604.26, "total_tokens": 18253688}
|
| 5611 |
+
{"current_steps": 27985, "total_steps": 38160, "loss": 0.3989, "lr": 1.0071035207430352e-05, "epoch": 14.667190775681341, "percentage": 73.34, "elapsed_time": "1:24:25", "remaining_time": "0:30:41", "throughput": 3604.31, "total_tokens": 18256952}
|
| 5612 |
+
{"current_steps": 27990, "total_steps": 38160, "loss": 0.4066, "lr": 1.0061865064098285e-05, "epoch": 14.669811320754716, "percentage": 73.35, "elapsed_time": "1:24:26", "remaining_time": "0:30:40", "throughput": 3604.53, "total_tokens": 18263064}
|
| 5613 |
+
{"current_steps": 27995, "total_steps": 38160, "loss": 0.4123, "lr": 1.0052698045653625e-05, "epoch": 14.672431865828091, "percentage": 73.36, "elapsed_time": "1:24:27", "remaining_time": "0:30:40", "throughput": 3604.52, "total_tokens": 18265848}
|
| 5614 |
+
{"current_steps": 28000, "total_steps": 38160, "loss": 0.6411, "lr": 1.0043534154014015e-05, "epoch": 14.675052410901468, "percentage": 73.38, "elapsed_time": "1:24:28", "remaining_time": "0:30:39", "throughput": 3604.58, "total_tokens": 18269208}
|
| 5615 |
+
{"current_steps": 28005, "total_steps": 38160, "loss": 0.4222, "lr": 1.003437339109642e-05, "epoch": 14.677672955974844, "percentage": 73.39, "elapsed_time": "1:24:29", "remaining_time": "0:30:38", "throughput": 3604.8, "total_tokens": 18274136}
|
| 5616 |
+
{"current_steps": 28010, "total_steps": 38160, "loss": 0.3748, "lr": 1.0025215758817183e-05, "epoch": 14.680293501048219, "percentage": 73.4, "elapsed_time": "1:24:30", "remaining_time": "0:30:37", "throughput": 3604.75, "total_tokens": 18277016}
|
| 5617 |
+
{"current_steps": 28015, "total_steps": 38160, "loss": 0.4113, "lr": 1.0016061259091958e-05, "epoch": 14.682914046121594, "percentage": 73.41, "elapsed_time": "1:24:31", "remaining_time": "0:30:36", "throughput": 3604.66, "total_tokens": 18279640}
|
| 5618 |
+
{"current_steps": 28020, "total_steps": 38160, "loss": 0.4551, "lr": 1.000690989383577e-05, "epoch": 14.685534591194969, "percentage": 73.43, "elapsed_time": "1:24:31", "remaining_time": "0:30:35", "throughput": 3604.53, "total_tokens": 18281816}
|
| 5619 |
+
{"current_steps": 28025, "total_steps": 38160, "loss": 0.6174, "lr": 9.997761664962993e-06, "epoch": 14.688155136268344, "percentage": 73.44, "elapsed_time": "1:24:32", "remaining_time": "0:30:34", "throughput": 3604.47, "total_tokens": 18284568}
|
| 5620 |
+
{"current_steps": 28030, "total_steps": 38160, "loss": 0.4417, "lr": 9.988616574387314e-06, "epoch": 14.69077568134172, "percentage": 73.45, "elapsed_time": "1:24:33", "remaining_time": "0:30:33", "throughput": 3604.57, "total_tokens": 18288792}
|
| 5621 |
+
{"current_steps": 28035, "total_steps": 38160, "loss": 0.4595, "lr": 9.979474624021787e-06, "epoch": 14.693396226415095, "percentage": 73.47, "elapsed_time": "1:24:34", "remaining_time": "0:30:32", "throughput": 3604.66, "total_tokens": 18292344}
|
| 5622 |
+
{"current_steps": 28040, "total_steps": 38160, "loss": 0.4387, "lr": 9.970335815778815e-06, "epoch": 14.69601677148847, "percentage": 73.48, "elapsed_time": "1:24:35", "remaining_time": "0:30:31", "throughput": 3604.64, "total_tokens": 18296184}
|
| 5623 |
+
{"current_steps": 28045, "total_steps": 38160, "loss": 0.5302, "lr": 9.96120015157011e-06, "epoch": 14.698637316561845, "percentage": 73.49, "elapsed_time": "1:24:36", "remaining_time": "0:30:31", "throughput": 3604.69, "total_tokens": 18299960}
|
| 5624 |
+
{"current_steps": 28050, "total_steps": 38160, "loss": 0.4858, "lr": 9.952067633306767e-06, "epoch": 14.70125786163522, "percentage": 73.51, "elapsed_time": "1:24:37", "remaining_time": "0:30:30", "throughput": 3604.76, "total_tokens": 18304024}
|
| 5625 |
+
{"current_steps": 28055, "total_steps": 38160, "loss": 0.5138, "lr": 9.942938262899204e-06, "epoch": 14.703878406708595, "percentage": 73.52, "elapsed_time": "1:24:38", "remaining_time": "0:30:29", "throughput": 3604.7, "total_tokens": 18306808}
|
| 5626 |
+
{"current_steps": 28060, "total_steps": 38160, "loss": 0.4626, "lr": 9.93381204225717e-06, "epoch": 14.70649895178197, "percentage": 73.53, "elapsed_time": "1:24:39", "remaining_time": "0:30:28", "throughput": 3604.52, "total_tokens": 18308984}
|
| 5627 |
+
{"current_steps": 28065, "total_steps": 38160, "loss": 0.469, "lr": 9.924688973289782e-06, "epoch": 14.709119496855346, "percentage": 73.55, "elapsed_time": "1:24:40", "remaining_time": "0:30:27", "throughput": 3604.41, "total_tokens": 18311416}
|
| 5628 |
+
{"current_steps": 28070, "total_steps": 38160, "loss": 0.5896, "lr": 9.915569057905463e-06, "epoch": 14.71174004192872, "percentage": 73.56, "elapsed_time": "1:24:41", "remaining_time": "0:30:26", "throughput": 3604.4, "total_tokens": 18314552}
|
| 5629 |
+
{"current_steps": 28075, "total_steps": 38160, "loss": 0.4175, "lr": 9.906452298012003e-06, "epoch": 14.714360587002096, "percentage": 73.57, "elapsed_time": "1:24:42", "remaining_time": "0:30:25", "throughput": 3604.4, "total_tokens": 18317752}
|
| 5630 |
+
{"current_steps": 28080, "total_steps": 38160, "loss": 0.5003, "lr": 9.897338695516529e-06, "epoch": 14.716981132075471, "percentage": 73.58, "elapsed_time": "1:24:42", "remaining_time": "0:30:24", "throughput": 3604.41, "total_tokens": 18320888}
|
| 5631 |
+
{"current_steps": 28085, "total_steps": 38160, "loss": 0.6368, "lr": 9.888228252325507e-06, "epoch": 14.719601677148846, "percentage": 73.6, "elapsed_time": "1:24:43", "remaining_time": "0:30:23", "throughput": 3604.48, "total_tokens": 18324600}
|
| 5632 |
+
{"current_steps": 28090, "total_steps": 38160, "loss": 0.4087, "lr": 9.87912097034473e-06, "epoch": 14.722222222222221, "percentage": 73.61, "elapsed_time": "1:24:44", "remaining_time": "0:30:22", "throughput": 3604.54, "total_tokens": 18328184}
|
| 5633 |
+
{"current_steps": 28095, "total_steps": 38160, "loss": 0.4387, "lr": 9.870016851479327e-06, "epoch": 14.724842767295598, "percentage": 73.62, "elapsed_time": "1:24:45", "remaining_time": "0:30:21", "throughput": 3604.43, "total_tokens": 18330680}
|
| 5634 |
+
{"current_steps": 28100, "total_steps": 38160, "loss": 0.5121, "lr": 9.860915897633791e-06, "epoch": 14.727463312368974, "percentage": 73.64, "elapsed_time": "1:24:46", "remaining_time": "0:30:20", "throughput": 3604.58, "total_tokens": 18334552}
|
| 5635 |
+
{"current_steps": 28105, "total_steps": 38160, "loss": 0.4833, "lr": 9.85181811071193e-06, "epoch": 14.730083857442349, "percentage": 73.65, "elapsed_time": "1:24:47", "remaining_time": "0:30:20", "throughput": 3604.69, "total_tokens": 18338168}
|
| 5636 |
+
{"current_steps": 28110, "total_steps": 38160, "loss": 0.4353, "lr": 9.842723492616913e-06, "epoch": 14.732704402515724, "percentage": 73.66, "elapsed_time": "1:24:48", "remaining_time": "0:30:19", "throughput": 3604.72, "total_tokens": 18341528}
|
| 5637 |
+
{"current_steps": 28115, "total_steps": 38160, "loss": 0.4827, "lr": 9.833632045251207e-06, "epoch": 14.735324947589099, "percentage": 73.68, "elapsed_time": "1:24:49", "remaining_time": "0:30:18", "throughput": 3604.77, "total_tokens": 18344920}
|
| 5638 |
+
{"current_steps": 28120, "total_steps": 38160, "loss": 0.3774, "lr": 9.824543770516656e-06, "epoch": 14.737945492662474, "percentage": 73.69, "elapsed_time": "1:24:49", "remaining_time": "0:30:17", "throughput": 3604.82, "total_tokens": 18348312}
|
| 5639 |
+
{"current_steps": 28125, "total_steps": 38160, "loss": 0.4323, "lr": 9.815458670314412e-06, "epoch": 14.74056603773585, "percentage": 73.7, "elapsed_time": "1:24:50", "remaining_time": "0:30:16", "throughput": 3604.82, "total_tokens": 18351160}
|
| 5640 |
+
{"current_steps": 28130, "total_steps": 38160, "loss": 0.5661, "lr": 9.80637674654498e-06, "epoch": 14.743186582809225, "percentage": 73.72, "elapsed_time": "1:24:51", "remaining_time": "0:30:15", "throughput": 3604.8, "total_tokens": 18354104}
|
| 5641 |
+
{"current_steps": 28135, "total_steps": 38160, "loss": 0.4767, "lr": 9.797298001108193e-06, "epoch": 14.7458071278826, "percentage": 73.73, "elapsed_time": "1:24:52", "remaining_time": "0:30:14", "throughput": 3604.84, "total_tokens": 18357784}
|
| 5642 |
+
{"current_steps": 28140, "total_steps": 38160, "loss": 0.5926, "lr": 9.788222435903232e-06, "epoch": 14.748427672955975, "percentage": 73.74, "elapsed_time": "1:24:53", "remaining_time": "0:30:13", "throughput": 3604.88, "total_tokens": 18360984}
|
| 5643 |
+
{"current_steps": 28145, "total_steps": 38160, "loss": 0.4681, "lr": 9.779150052828596e-06, "epoch": 14.75104821802935, "percentage": 73.76, "elapsed_time": "1:24:54", "remaining_time": "0:30:12", "throughput": 3605.06, "total_tokens": 18365080}
|
| 5644 |
+
{"current_steps": 28150, "total_steps": 38160, "loss": 0.5487, "lr": 9.770080853782108e-06, "epoch": 14.753668763102725, "percentage": 73.77, "elapsed_time": "1:24:55", "remaining_time": "0:30:11", "throughput": 3605.01, "total_tokens": 18367800}
|
| 5645 |
+
{"current_steps": 28155, "total_steps": 38160, "loss": 0.6872, "lr": 9.76101484066095e-06, "epoch": 14.7562893081761, "percentage": 73.78, "elapsed_time": "1:24:55", "remaining_time": "0:30:10", "throughput": 3604.93, "total_tokens": 18370392}
|
| 5646 |
+
{"current_steps": 28160, "total_steps": 38160, "loss": 0.5066, "lr": 9.751952015361637e-06, "epoch": 14.758909853249476, "percentage": 73.79, "elapsed_time": "1:24:56", "remaining_time": "0:30:09", "throughput": 3604.91, "total_tokens": 18373560}
|
| 5647 |
+
{"current_steps": 28165, "total_steps": 38160, "loss": 0.4072, "lr": 9.742892379780003e-06, "epoch": 14.76153039832285, "percentage": 73.81, "elapsed_time": "1:24:57", "remaining_time": "0:30:09", "throughput": 3605.1, "total_tokens": 18378296}
|
| 5648 |
+
{"current_steps": 28170, "total_steps": 38160, "loss": 0.4853, "lr": 9.733835935811237e-06, "epoch": 14.764150943396226, "percentage": 73.82, "elapsed_time": "1:24:58", "remaining_time": "0:30:08", "throughput": 3605.06, "total_tokens": 18381016}
|
| 5649 |
+
{"current_steps": 28175, "total_steps": 38160, "loss": 0.5303, "lr": 9.724782685349809e-06, "epoch": 14.766771488469601, "percentage": 73.83, "elapsed_time": "1:24:59", "remaining_time": "0:30:07", "throughput": 3605.12, "total_tokens": 18384248}
|
| 5650 |
+
{"current_steps": 28180, "total_steps": 38160, "loss": 0.5695, "lr": 9.715732630289576e-06, "epoch": 14.769392033542976, "percentage": 73.85, "elapsed_time": "1:25:00", "remaining_time": "0:30:06", "throughput": 3605.02, "total_tokens": 18386648}
|
| 5651 |
+
{"current_steps": 28185, "total_steps": 38160, "loss": 0.4269, "lr": 9.706685772523701e-06, "epoch": 14.772012578616351, "percentage": 73.86, "elapsed_time": "1:25:01", "remaining_time": "0:30:05", "throughput": 3605.05, "total_tokens": 18390232}
|
| 5652 |
+
{"current_steps": 28190, "total_steps": 38160, "loss": 0.5245, "lr": 9.697642113944692e-06, "epoch": 14.774633123689728, "percentage": 73.87, "elapsed_time": "1:25:02", "remaining_time": "0:30:04", "throughput": 3605.07, "total_tokens": 18393624}
|
| 5653 |
+
{"current_steps": 28195, "total_steps": 38160, "loss": 0.4081, "lr": 9.688601656444381e-06, "epoch": 14.777253668763104, "percentage": 73.89, "elapsed_time": "1:25:03", "remaining_time": "0:30:03", "throughput": 3605.16, "total_tokens": 18397176}
|
| 5654 |
+
{"current_steps": 28200, "total_steps": 38160, "loss": 0.5023, "lr": 9.679564401913921e-06, "epoch": 14.779874213836479, "percentage": 73.9, "elapsed_time": "1:25:03", "remaining_time": "0:30:02", "throughput": 3605.2, "total_tokens": 18400472}
|
| 5655 |
+
{"current_steps": 28205, "total_steps": 38160, "loss": 0.4381, "lr": 9.670530352243798e-06, "epoch": 14.782494758909854, "percentage": 73.91, "elapsed_time": "1:25:04", "remaining_time": "0:30:01", "throughput": 3605.18, "total_tokens": 18403224}
|
| 5656 |
+
{"current_steps": 28210, "total_steps": 38160, "loss": 0.3643, "lr": 9.661499509323835e-06, "epoch": 14.785115303983229, "percentage": 73.93, "elapsed_time": "1:25:05", "remaining_time": "0:30:00", "throughput": 3605.12, "total_tokens": 18406200}
|
| 5657 |
+
{"current_steps": 28215, "total_steps": 38160, "loss": 0.4978, "lr": 9.652471875043182e-06, "epoch": 14.787735849056604, "percentage": 73.94, "elapsed_time": "1:25:06", "remaining_time": "0:29:59", "throughput": 3605.16, "total_tokens": 18409432}
|
| 5658 |
+
{"current_steps": 28220, "total_steps": 38160, "loss": 0.4572, "lr": 9.64344745129033e-06, "epoch": 14.79035639412998, "percentage": 73.95, "elapsed_time": "1:25:07", "remaining_time": "0:29:58", "throughput": 3605.04, "total_tokens": 18411736}
|
| 5659 |
+
{"current_steps": 28225, "total_steps": 38160, "loss": 0.4872, "lr": 9.634426239953073e-06, "epoch": 14.792976939203355, "percentage": 73.96, "elapsed_time": "1:25:08", "remaining_time": "0:29:58", "throughput": 3605.17, "total_tokens": 18415448}
|
| 5660 |
+
{"current_steps": 28230, "total_steps": 38160, "loss": 0.5652, "lr": 9.62540824291854e-06, "epoch": 14.79559748427673, "percentage": 73.98, "elapsed_time": "1:25:08", "remaining_time": "0:29:57", "throughput": 3605.3, "total_tokens": 18419032}
|
| 5661 |
+
{"current_steps": 28235, "total_steps": 38160, "loss": 0.5169, "lr": 9.616393462073197e-06, "epoch": 14.798218029350105, "percentage": 73.99, "elapsed_time": "1:25:09", "remaining_time": "0:29:56", "throughput": 3605.36, "total_tokens": 18422136}
|
| 5662 |
+
{"current_steps": 28240, "total_steps": 38160, "loss": 0.6716, "lr": 9.607381899302839e-06, "epoch": 14.80083857442348, "percentage": 74.0, "elapsed_time": "1:25:10", "remaining_time": "0:29:55", "throughput": 3605.3, "total_tokens": 18424696}
|
| 5663 |
+
{"current_steps": 28245, "total_steps": 38160, "loss": 0.5665, "lr": 9.59837355649258e-06, "epoch": 14.803459119496855, "percentage": 74.02, "elapsed_time": "1:25:11", "remaining_time": "0:29:54", "throughput": 3605.31, "total_tokens": 18427704}
|
| 5664 |
+
{"current_steps": 28250, "total_steps": 38160, "loss": 0.3895, "lr": 9.589368435526868e-06, "epoch": 14.80607966457023, "percentage": 74.03, "elapsed_time": "1:25:12", "remaining_time": "0:29:53", "throughput": 3605.27, "total_tokens": 18430328}
|
| 5665 |
+
{"current_steps": 28255, "total_steps": 38160, "loss": 0.5497, "lr": 9.580366538289468e-06, "epoch": 14.808700209643606, "percentage": 74.04, "elapsed_time": "1:25:12", "remaining_time": "0:29:52", "throughput": 3605.22, "total_tokens": 18432920}
|
| 5666 |
+
{"current_steps": 28260, "total_steps": 38160, "loss": 0.5826, "lr": 9.57136786666346e-06, "epoch": 14.81132075471698, "percentage": 74.06, "elapsed_time": "1:25:13", "remaining_time": "0:29:51", "throughput": 3605.19, "total_tokens": 18435896}
|
| 5667 |
+
{"current_steps": 28265, "total_steps": 38160, "loss": 0.4616, "lr": 9.562372422531276e-06, "epoch": 14.813941299790356, "percentage": 74.07, "elapsed_time": "1:25:14", "remaining_time": "0:29:50", "throughput": 3605.11, "total_tokens": 18438712}
|
| 5668 |
+
{"current_steps": 28270, "total_steps": 38160, "loss": 0.5051, "lr": 9.553380207774662e-06, "epoch": 14.816561844863731, "percentage": 74.08, "elapsed_time": "1:25:15", "remaining_time": "0:29:49", "throughput": 3605.08, "total_tokens": 18441592}
|
| 5669 |
+
{"current_steps": 28275, "total_steps": 38160, "loss": 0.4499, "lr": 9.54439122427469e-06, "epoch": 14.819182389937106, "percentage": 74.1, "elapsed_time": "1:25:16", "remaining_time": "0:29:48", "throughput": 3604.96, "total_tokens": 18444120}
|
| 5670 |
+
{"current_steps": 28280, "total_steps": 38160, "loss": 0.4131, "lr": 9.535405473911748e-06, "epoch": 14.821802935010481, "percentage": 74.11, "elapsed_time": "1:25:17", "remaining_time": "0:29:47", "throughput": 3605.06, "total_tokens": 18447640}
|
| 5671 |
+
{"current_steps": 28285, "total_steps": 38160, "loss": 0.5234, "lr": 9.526422958565546e-06, "epoch": 14.824423480083858, "percentage": 74.12, "elapsed_time": "1:25:18", "remaining_time": "0:29:46", "throughput": 3605.24, "total_tokens": 18452312}
|
| 5672 |
+
{"current_steps": 28290, "total_steps": 38160, "loss": 0.4706, "lr": 9.517443680115124e-06, "epoch": 14.827044025157234, "percentage": 74.14, "elapsed_time": "1:25:19", "remaining_time": "0:29:45", "throughput": 3605.25, "total_tokens": 18455704}
|
| 5673 |
+
{"current_steps": 28295, "total_steps": 38160, "loss": 0.431, "lr": 9.50846764043885e-06, "epoch": 14.829664570230609, "percentage": 74.15, "elapsed_time": "1:25:20", "remaining_time": "0:29:45", "throughput": 3605.33, "total_tokens": 18459384}
|
| 5674 |
+
{"current_steps": 28300, "total_steps": 38160, "loss": 0.5952, "lr": 9.499494841414422e-06, "epoch": 14.832285115303984, "percentage": 74.16, "elapsed_time": "1:25:21", "remaining_time": "0:29:44", "throughput": 3605.44, "total_tokens": 18463512}
|
| 5675 |
+
{"current_steps": 28305, "total_steps": 38160, "loss": 0.5969, "lr": 9.490525284918822e-06, "epoch": 14.834905660377359, "percentage": 74.17, "elapsed_time": "1:25:21", "remaining_time": "0:29:43", "throughput": 3605.4, "total_tokens": 18466520}
|
| 5676 |
+
{"current_steps": 28310, "total_steps": 38160, "loss": 0.4351, "lr": 9.4815589728284e-06, "epoch": 14.837526205450734, "percentage": 74.19, "elapsed_time": "1:25:22", "remaining_time": "0:29:42", "throughput": 3605.36, "total_tokens": 18469240}
|
| 5677 |
+
{"current_steps": 28315, "total_steps": 38160, "loss": 0.4981, "lr": 9.472595907018788e-06, "epoch": 14.84014675052411, "percentage": 74.2, "elapsed_time": "1:25:23", "remaining_time": "0:29:41", "throughput": 3605.44, "total_tokens": 18473272}
|
| 5678 |
+
{"current_steps": 28320, "total_steps": 38160, "loss": 0.5009, "lr": 9.463636089364968e-06, "epoch": 14.842767295597485, "percentage": 74.21, "elapsed_time": "1:25:24", "remaining_time": "0:29:40", "throughput": 3605.65, "total_tokens": 18478392}
|
| 5679 |
+
{"current_steps": 28325, "total_steps": 38160, "loss": 0.4972, "lr": 9.454679521741239e-06, "epoch": 14.84538784067086, "percentage": 74.23, "elapsed_time": "1:25:25", "remaining_time": "0:29:39", "throughput": 3605.61, "total_tokens": 18481048}
|
| 5680 |
+
{"current_steps": 28330, "total_steps": 38160, "loss": 0.5404, "lr": 9.445726206021194e-06, "epoch": 14.848008385744235, "percentage": 74.24, "elapsed_time": "1:25:26", "remaining_time": "0:29:38", "throughput": 3605.65, "total_tokens": 18484376}
|
| 5681 |
+
{"current_steps": 28335, "total_steps": 38160, "loss": 0.3504, "lr": 9.436776144077772e-06, "epoch": 14.85062893081761, "percentage": 74.25, "elapsed_time": "1:25:27", "remaining_time": "0:29:37", "throughput": 3605.74, "total_tokens": 18487928}
|
| 5682 |
+
{"current_steps": 28340, "total_steps": 38160, "loss": 0.6011, "lr": 9.427829337783237e-06, "epoch": 14.853249475890985, "percentage": 74.27, "elapsed_time": "1:25:28", "remaining_time": "0:29:36", "throughput": 3605.79, "total_tokens": 18491192}
|
| 5683 |
+
{"current_steps": 28345, "total_steps": 38160, "loss": 0.4092, "lr": 9.418885789009138e-06, "epoch": 14.85587002096436, "percentage": 74.28, "elapsed_time": "1:25:29", "remaining_time": "0:29:36", "throughput": 3605.82, "total_tokens": 18494744}
|
| 5684 |
+
{"current_steps": 28350, "total_steps": 38160, "loss": 0.5226, "lr": 9.409945499626374e-06, "epoch": 14.858490566037736, "percentage": 74.29, "elapsed_time": "1:25:30", "remaining_time": "0:29:35", "throughput": 3605.92, "total_tokens": 18498584}
|
| 5685 |
+
{"current_steps": 28355, "total_steps": 38160, "loss": 0.551, "lr": 9.40100847150516e-06, "epoch": 14.86111111111111, "percentage": 74.31, "elapsed_time": "1:25:30", "remaining_time": "0:29:34", "throughput": 3605.92, "total_tokens": 18501400}
|
| 5686 |
+
{"current_steps": 28360, "total_steps": 38160, "loss": 0.4703, "lr": 9.392074706515003e-06, "epoch": 14.863731656184486, "percentage": 74.32, "elapsed_time": "1:25:31", "remaining_time": "0:29:33", "throughput": 3605.77, "total_tokens": 18503864}
|
| 5687 |
+
{"current_steps": 28365, "total_steps": 38160, "loss": 0.4288, "lr": 9.383144206524753e-06, "epoch": 14.866352201257861, "percentage": 74.33, "elapsed_time": "1:25:32", "remaining_time": "0:29:32", "throughput": 3605.75, "total_tokens": 18506712}
|
| 5688 |
+
{"current_steps": 28370, "total_steps": 38160, "loss": 0.5198, "lr": 9.374216973402575e-06, "epoch": 14.868972746331236, "percentage": 74.34, "elapsed_time": "1:25:33", "remaining_time": "0:29:31", "throughput": 3605.85, "total_tokens": 18510360}
|
| 5689 |
+
{"current_steps": 28375, "total_steps": 38160, "loss": 0.4895, "lr": 9.365293009015926e-06, "epoch": 14.871593291404611, "percentage": 74.36, "elapsed_time": "1:25:34", "remaining_time": "0:29:30", "throughput": 3605.84, "total_tokens": 18513432}
|
| 5690 |
+
{"current_steps": 28380, "total_steps": 38160, "loss": 0.5016, "lr": 9.356372315231623e-06, "epoch": 14.874213836477988, "percentage": 74.37, "elapsed_time": "1:25:35", "remaining_time": "0:29:29", "throughput": 3605.85, "total_tokens": 18516632}
|
| 5691 |
+
{"current_steps": 28385, "total_steps": 38160, "loss": 0.5516, "lr": 9.347454893915749e-06, "epoch": 14.876834381551364, "percentage": 74.38, "elapsed_time": "1:25:36", "remaining_time": "0:29:28", "throughput": 3606.07, "total_tokens": 18521624}
|
| 5692 |
+
{"current_steps": 28390, "total_steps": 38160, "loss": 0.5321, "lr": 9.338540746933741e-06, "epoch": 14.879454926624739, "percentage": 74.4, "elapsed_time": "1:25:37", "remaining_time": "0:29:27", "throughput": 3606.1, "total_tokens": 18524760}
|
| 5693 |
+
{"current_steps": 28395, "total_steps": 38160, "loss": 0.4164, "lr": 9.329629876150339e-06, "epoch": 14.882075471698114, "percentage": 74.41, "elapsed_time": "1:25:37", "remaining_time": "0:29:26", "throughput": 3606.02, "total_tokens": 18527416}
|
| 5694 |
+
{"current_steps": 28400, "total_steps": 38160, "loss": 0.5463, "lr": 9.320722283429581e-06, "epoch": 14.884696016771489, "percentage": 74.42, "elapsed_time": "1:25:38", "remaining_time": "0:29:26", "throughput": 3606.05, "total_tokens": 18531032}
|
| 5695 |
+
{"current_steps": 28405, "total_steps": 38160, "loss": 0.4871, "lr": 9.311817970634854e-06, "epoch": 14.887316561844864, "percentage": 74.44, "elapsed_time": "1:25:39", "remaining_time": "0:29:25", "throughput": 3606.13, "total_tokens": 18534744}
|
| 5696 |
+
{"current_steps": 28410, "total_steps": 38160, "loss": 0.5121, "lr": 9.302916939628814e-06, "epoch": 14.88993710691824, "percentage": 74.45, "elapsed_time": "1:25:40", "remaining_time": "0:29:24", "throughput": 3606.17, "total_tokens": 18538392}
|
| 5697 |
+
{"current_steps": 28415, "total_steps": 38160, "loss": 0.4251, "lr": 9.29401919227347e-06, "epoch": 14.892557651991615, "percentage": 74.46, "elapsed_time": "1:25:41", "remaining_time": "0:29:23", "throughput": 3606.19, "total_tokens": 18541400}
|
| 5698 |
+
{"current_steps": 28420, "total_steps": 38160, "loss": 0.4843, "lr": 9.28512473043013e-06, "epoch": 14.89517819706499, "percentage": 74.48, "elapsed_time": "1:25:42", "remaining_time": "0:29:22", "throughput": 3606.16, "total_tokens": 18544696}
|
| 5699 |
+
{"current_steps": 28425, "total_steps": 38160, "loss": 0.3485, "lr": 9.276233555959419e-06, "epoch": 14.897798742138365, "percentage": 74.49, "elapsed_time": "1:25:43", "remaining_time": "0:29:21", "throughput": 3606.18, "total_tokens": 18547864}
|
| 5700 |
+
{"current_steps": 28430, "total_steps": 38160, "loss": 0.473, "lr": 9.267345670721254e-06, "epoch": 14.90041928721174, "percentage": 74.5, "elapsed_time": "1:25:44", "remaining_time": "0:29:20", "throughput": 3606.11, "total_tokens": 18550584}
|
| 5701 |
+
{"current_steps": 28435, "total_steps": 38160, "loss": 0.571, "lr": 9.258461076574895e-06, "epoch": 14.903039832285115, "percentage": 74.52, "elapsed_time": "1:25:45", "remaining_time": "0:29:19", "throughput": 3606.1, "total_tokens": 18553624}
|
| 5702 |
+
{"current_steps": 28440, "total_steps": 38160, "loss": 0.5692, "lr": 9.24957977537888e-06, "epoch": 14.90566037735849, "percentage": 74.53, "elapsed_time": "1:25:45", "remaining_time": "0:29:18", "throughput": 3606.1, "total_tokens": 18556472}
|
| 5703 |
+
{"current_steps": 28445, "total_steps": 38160, "loss": 0.3624, "lr": 9.240701768991086e-06, "epoch": 14.908280922431866, "percentage": 74.54, "elapsed_time": "1:25:46", "remaining_time": "0:29:17", "throughput": 3605.93, "total_tokens": 18558840}
|
| 5704 |
+
{"current_steps": 28450, "total_steps": 38160, "loss": 0.4717, "lr": 9.231827059268692e-06, "epoch": 14.91090146750524, "percentage": 74.55, "elapsed_time": "1:25:47", "remaining_time": "0:29:16", "throughput": 3605.96, "total_tokens": 18562008}
|
| 5705 |
+
{"current_steps": 28455, "total_steps": 38160, "loss": 0.5585, "lr": 9.222955648068193e-06, "epoch": 14.913522012578616, "percentage": 74.57, "elapsed_time": "1:25:48", "remaining_time": "0:29:15", "throughput": 3605.86, "total_tokens": 18564952}
|
| 5706 |
+
{"current_steps": 28460, "total_steps": 38160, "loss": 0.4559, "lr": 9.214087537245378e-06, "epoch": 14.916142557651991, "percentage": 74.58, "elapsed_time": "1:25:49", "remaining_time": "0:29:15", "throughput": 3605.73, "total_tokens": 18567800}
|
| 5707 |
+
{"current_steps": 28465, "total_steps": 38160, "loss": 0.5358, "lr": 9.205222728655344e-06, "epoch": 14.918763102725366, "percentage": 74.59, "elapsed_time": "1:25:50", "remaining_time": "0:29:14", "throughput": 3605.78, "total_tokens": 18571096}
|
| 5708 |
+
{"current_steps": 28470, "total_steps": 38160, "loss": 0.3719, "lr": 9.196361224152522e-06, "epoch": 14.921383647798741, "percentage": 74.61, "elapsed_time": "1:25:51", "remaining_time": "0:29:13", "throughput": 3605.8, "total_tokens": 18574136}
|
| 5709 |
+
{"current_steps": 28475, "total_steps": 38160, "loss": 0.5039, "lr": 9.187503025590632e-06, "epoch": 14.924004192872118, "percentage": 74.62, "elapsed_time": "1:25:51", "remaining_time": "0:29:12", "throughput": 3605.72, "total_tokens": 18576664}
|
| 5710 |
+
{"current_steps": 28480, "total_steps": 38160, "loss": 0.454, "lr": 9.17864813482272e-06, "epoch": 14.926624737945493, "percentage": 74.63, "elapsed_time": "1:25:52", "remaining_time": "0:29:11", "throughput": 3605.87, "total_tokens": 18580824}
|
| 5711 |
+
{"current_steps": 28485, "total_steps": 38160, "loss": 0.4312, "lr": 9.169796553701122e-06, "epoch": 14.929245283018869, "percentage": 74.65, "elapsed_time": "1:25:54", "remaining_time": "0:29:10", "throughput": 3605.7, "total_tokens": 18583864}
|
| 5712 |
+
{"current_steps": 28490, "total_steps": 38160, "loss": 0.4572, "lr": 9.160948284077473e-06, "epoch": 14.931865828092244, "percentage": 74.66, "elapsed_time": "1:25:54", "remaining_time": "0:29:09", "throughput": 3605.68, "total_tokens": 18586680}
|
| 5713 |
+
{"current_steps": 28495, "total_steps": 38160, "loss": 0.3857, "lr": 9.152103327802738e-06, "epoch": 14.934486373165619, "percentage": 74.67, "elapsed_time": "1:25:55", "remaining_time": "0:29:08", "throughput": 3605.81, "total_tokens": 18590776}
|
| 5714 |
+
{"current_steps": 28500, "total_steps": 38160, "loss": 0.4059, "lr": 9.143261686727189e-06, "epoch": 14.937106918238994, "percentage": 74.69, "elapsed_time": "1:25:56", "remaining_time": "0:29:07", "throughput": 3605.92, "total_tokens": 18594616}
|
| 5715 |
+
{"current_steps": 28505, "total_steps": 38160, "loss": 0.5877, "lr": 9.134423362700392e-06, "epoch": 14.93972746331237, "percentage": 74.7, "elapsed_time": "1:25:57", "remaining_time": "0:29:06", "throughput": 3605.86, "total_tokens": 18597528}
|
| 5716 |
+
{"current_steps": 28510, "total_steps": 38160, "loss": 0.2877, "lr": 9.12558835757123e-06, "epoch": 14.942348008385745, "percentage": 74.71, "elapsed_time": "1:25:58", "remaining_time": "0:29:05", "throughput": 3605.79, "total_tokens": 18599992}
|
| 5717 |
+
{"current_steps": 28515, "total_steps": 38160, "loss": 0.4815, "lr": 9.116756673187878e-06, "epoch": 14.94496855345912, "percentage": 74.72, "elapsed_time": "1:25:59", "remaining_time": "0:29:05", "throughput": 3605.78, "total_tokens": 18602872}
|
| 5718 |
+
{"current_steps": 28520, "total_steps": 38160, "loss": 0.4604, "lr": 9.107928311397817e-06, "epoch": 14.947589098532495, "percentage": 74.74, "elapsed_time": "1:26:00", "remaining_time": "0:29:04", "throughput": 3605.86, "total_tokens": 18606360}
|
| 5719 |
+
{"current_steps": 28525, "total_steps": 38160, "loss": 0.4012, "lr": 9.09910327404785e-06, "epoch": 14.95020964360587, "percentage": 74.75, "elapsed_time": "1:26:00", "remaining_time": "0:29:03", "throughput": 3605.74, "total_tokens": 18608824}
|
| 5720 |
+
{"current_steps": 28530, "total_steps": 38160, "loss": 0.4689, "lr": 9.090281562984068e-06, "epoch": 14.952830188679245, "percentage": 74.76, "elapsed_time": "1:26:01", "remaining_time": "0:29:02", "throughput": 3605.67, "total_tokens": 18611864}
|
| 5721 |
+
{"current_steps": 28535, "total_steps": 38160, "loss": 0.581, "lr": 9.081463180051883e-06, "epoch": 14.95545073375262, "percentage": 74.78, "elapsed_time": "1:26:02", "remaining_time": "0:29:01", "throughput": 3605.73, "total_tokens": 18615480}
|
| 5722 |
+
{"current_steps": 28540, "total_steps": 38160, "loss": 0.514, "lr": 9.072648127095996e-06, "epoch": 14.958071278825996, "percentage": 74.79, "elapsed_time": "1:26:03", "remaining_time": "0:29:00", "throughput": 3605.74, "total_tokens": 18618392}
|
| 5723 |
+
{"current_steps": 28545, "total_steps": 38160, "loss": 0.5559, "lr": 9.0638364059604e-06, "epoch": 14.96069182389937, "percentage": 74.8, "elapsed_time": "1:26:04", "remaining_time": "0:28:59", "throughput": 3605.79, "total_tokens": 18622200}
|
| 5724 |
+
{"current_steps": 28550, "total_steps": 38160, "loss": 0.482, "lr": 9.055028018488417e-06, "epoch": 14.963312368972746, "percentage": 74.82, "elapsed_time": "1:26:05", "remaining_time": "0:28:58", "throughput": 3605.7, "total_tokens": 18625080}
|
| 5725 |
+
{"current_steps": 28555, "total_steps": 38160, "loss": 0.5187, "lr": 9.046222966522665e-06, "epoch": 14.965932914046121, "percentage": 74.83, "elapsed_time": "1:26:06", "remaining_time": "0:28:57", "throughput": 3605.68, "total_tokens": 18627992}
|
| 5726 |
+
{"current_steps": 28560, "total_steps": 38160, "loss": 0.4561, "lr": 9.037421251905059e-06, "epoch": 14.968553459119496, "percentage": 74.84, "elapsed_time": "1:26:07", "remaining_time": "0:28:56", "throughput": 3605.7, "total_tokens": 18631192}
|
| 5727 |
+
{"current_steps": 28565, "total_steps": 38160, "loss": 0.4237, "lr": 9.028622876476833e-06, "epoch": 14.971174004192871, "percentage": 74.86, "elapsed_time": "1:26:08", "remaining_time": "0:28:55", "throughput": 3605.79, "total_tokens": 18634904}
|
| 5728 |
+
{"current_steps": 28570, "total_steps": 38160, "loss": 0.5324, "lr": 9.019827842078474e-06, "epoch": 14.973794549266248, "percentage": 74.87, "elapsed_time": "1:26:08", "remaining_time": "0:28:55", "throughput": 3605.8, "total_tokens": 18637784}
|
| 5729 |
+
{"current_steps": 28575, "total_steps": 38160, "loss": 0.4964, "lr": 9.011036150549817e-06, "epoch": 14.976415094339622, "percentage": 74.88, "elapsed_time": "1:26:09", "remaining_time": "0:28:54", "throughput": 3605.63, "total_tokens": 18640120}
|
| 5730 |
+
{"current_steps": 28580, "total_steps": 38160, "loss": 0.6224, "lr": 9.002247803729987e-06, "epoch": 14.979035639412999, "percentage": 74.9, "elapsed_time": "1:26:10", "remaining_time": "0:28:53", "throughput": 3605.68, "total_tokens": 18643672}
|
| 5731 |
+
{"current_steps": 28585, "total_steps": 38160, "loss": 0.5889, "lr": 8.993462803457404e-06, "epoch": 14.981656184486374, "percentage": 74.91, "elapsed_time": "1:26:11", "remaining_time": "0:28:52", "throughput": 3605.71, "total_tokens": 18646872}
|
| 5732 |
+
{"current_steps": 28590, "total_steps": 38160, "loss": 0.3633, "lr": 8.984681151569799e-06, "epoch": 14.984276729559749, "percentage": 74.92, "elapsed_time": "1:26:12", "remaining_time": "0:28:51", "throughput": 3605.8, "total_tokens": 18650744}
|
| 5733 |
+
{"current_steps": 28595, "total_steps": 38160, "loss": 0.5029, "lr": 8.975902849904183e-06, "epoch": 14.986897274633124, "percentage": 74.93, "elapsed_time": "1:26:13", "remaining_time": "0:28:50", "throughput": 3605.86, "total_tokens": 18654296}
|
| 5734 |
+
{"current_steps": 28600, "total_steps": 38160, "loss": 0.4494, "lr": 8.967127900296874e-06, "epoch": 14.9895178197065, "percentage": 74.95, "elapsed_time": "1:26:14", "remaining_time": "0:28:49", "throughput": 3605.9, "total_tokens": 18657560}
|
| 5735 |
+
{"current_steps": 28605, "total_steps": 38160, "loss": 0.4689, "lr": 8.95835630458349e-06, "epoch": 14.992138364779874, "percentage": 74.96, "elapsed_time": "1:26:15", "remaining_time": "0:28:48", "throughput": 3605.9, "total_tokens": 18660728}
|
| 5736 |
+
{"current_steps": 28610, "total_steps": 38160, "loss": 0.46, "lr": 8.949588064598958e-06, "epoch": 14.99475890985325, "percentage": 74.97, "elapsed_time": "1:26:15", "remaining_time": "0:28:47", "throughput": 3605.85, "total_tokens": 18663352}
|
| 5737 |
+
{"current_steps": 28615, "total_steps": 38160, "loss": 0.4868, "lr": 8.940823182177499e-06, "epoch": 14.997379454926625, "percentage": 74.99, "elapsed_time": "1:26:16", "remaining_time": "0:28:46", "throughput": 3605.72, "total_tokens": 18665816}
|
| 5738 |
+
{"current_steps": 28620, "total_steps": 38160, "loss": 0.3951, "lr": 8.932061659152607e-06, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "1:26:17", "remaining_time": "0:28:45", "throughput": 3605.58, "total_tokens": 18668536}
|
| 5739 |
+
{"current_steps": 28620, "total_steps": 38160, "eval_loss": 0.4691649079322815, "epoch": 15.0, "percentage": 75.0, "elapsed_time": "1:26:27", "remaining_time": "0:28:49", "throughput": 3598.63, "total_tokens": 18668536}
|
| 5740 |
+
{"current_steps": 28625, "total_steps": 38160, "loss": 0.7674, "lr": 8.92330349735711e-06, "epoch": 15.002620545073375, "percentage": 75.01, "elapsed_time": "1:26:30", "remaining_time": "0:28:48", "throughput": 3597.57, "total_tokens": 18672024}
|
| 5741 |
+
{"current_steps": 28630, "total_steps": 38160, "loss": 0.6076, "lr": 8.914548698623104e-06, "epoch": 15.00524109014675, "percentage": 75.03, "elapsed_time": "1:26:31", "remaining_time": "0:28:47", "throughput": 3597.49, "total_tokens": 18674712}
|