Training in progress, step 18126
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +191 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 58745928
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:794ed34b0c89c92f05947c26380617333fba8e2e3d667f6bf6dfb6128d61fd20
|
| 3 |
size 58745928
|
trainer_log.jsonl
CHANGED
|
@@ -3454,3 +3454,194 @@
|
|
| 3454 |
{"current_steps": 17180, "total_steps": 19080, "loss": 0.1861, "lr": 1.4967536313353237e-06, "epoch": 9.0041928721174, "percentage": 90.04, "elapsed_time": "1:19:38", "remaining_time": "0:08:48", "throughput": 2350.15, "total_tokens": 11229544}
|
| 3455 |
{"current_steps": 17185, "total_steps": 19080, "loss": 0.2323, "lr": 1.4889694807246779e-06, "epoch": 9.006813417190775, "percentage": 90.07, "elapsed_time": "1:19:39", "remaining_time": "0:08:47", "throughput": 2350.24, "total_tokens": 11233640}
|
| 3456 |
{"current_steps": 17190, "total_steps": 19080, "loss": 0.1362, "lr": 1.481205003070424e-06, "epoch": 9.00943396226415, "percentage": 90.09, "elapsed_time": "1:19:41", "remaining_time": "0:08:45", "throughput": 2350.24, "total_tokens": 11236552}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3454 |
{"current_steps": 17180, "total_steps": 19080, "loss": 0.1861, "lr": 1.4967536313353237e-06, "epoch": 9.0041928721174, "percentage": 90.04, "elapsed_time": "1:19:38", "remaining_time": "0:08:48", "throughput": 2350.15, "total_tokens": 11229544}
|
| 3455 |
{"current_steps": 17185, "total_steps": 19080, "loss": 0.2323, "lr": 1.4889694807246779e-06, "epoch": 9.006813417190775, "percentage": 90.07, "elapsed_time": "1:19:39", "remaining_time": "0:08:47", "throughput": 2350.24, "total_tokens": 11233640}
|
| 3456 |
{"current_steps": 17190, "total_steps": 19080, "loss": 0.1362, "lr": 1.481205003070424e-06, "epoch": 9.00943396226415, "percentage": 90.09, "elapsed_time": "1:19:41", "remaining_time": "0:08:45", "throughput": 2350.24, "total_tokens": 11236552}
|
| 3457 |
+
{"current_steps": 17195, "total_steps": 19080, "loss": 0.1822, "lr": 1.4734602048695312e-06, "epoch": 9.012054507337526, "percentage": 90.12, "elapsed_time": "1:19:42", "remaining_time": "0:08:44", "throughput": 2350.32, "total_tokens": 11240008}
|
| 3458 |
+
{"current_steps": 17200, "total_steps": 19080, "loss": 0.2346, "lr": 1.465735092602491e-06, "epoch": 9.014675052410901, "percentage": 90.15, "elapsed_time": "1:19:43", "remaining_time": "0:08:42", "throughput": 2350.37, "total_tokens": 11243176}
|
| 3459 |
+
{"current_steps": 17205, "total_steps": 19080, "loss": 0.2101, "lr": 1.4580296727333187e-06, "epoch": 9.017295597484276, "percentage": 90.17, "elapsed_time": "1:19:44", "remaining_time": "0:08:41", "throughput": 2350.44, "total_tokens": 11246568}
|
| 3460 |
+
{"current_steps": 17210, "total_steps": 19080, "loss": 0.1574, "lr": 1.450343951709568e-06, "epoch": 9.019916142557651, "percentage": 90.2, "elapsed_time": "1:19:46", "remaining_time": "0:08:40", "throughput": 2350.56, "total_tokens": 11251080}
|
| 3461 |
+
{"current_steps": 17215, "total_steps": 19080, "loss": 0.2614, "lr": 1.4426779359622916e-06, "epoch": 9.022536687631026, "percentage": 90.23, "elapsed_time": "1:19:47", "remaining_time": "0:08:38", "throughput": 2350.49, "total_tokens": 11253256}
|
| 3462 |
+
{"current_steps": 17220, "total_steps": 19080, "loss": 0.203, "lr": 1.4350316319060585e-06, "epoch": 9.025157232704403, "percentage": 90.25, "elapsed_time": "1:19:49", "remaining_time": "0:08:37", "throughput": 2350.57, "total_tokens": 11256936}
|
| 3463 |
+
{"current_steps": 17225, "total_steps": 19080, "loss": 0.1817, "lr": 1.4274050459389594e-06, "epoch": 9.027777777777779, "percentage": 90.28, "elapsed_time": "1:19:50", "remaining_time": "0:08:35", "throughput": 2350.58, "total_tokens": 11259784}
|
| 3464 |
+
{"current_steps": 17230, "total_steps": 19080, "loss": 0.2137, "lr": 1.4197981844425583e-06, "epoch": 9.030398322851154, "percentage": 90.3, "elapsed_time": "1:19:51", "remaining_time": "0:08:34", "throughput": 2350.63, "total_tokens": 11262728}
|
| 3465 |
+
{"current_steps": 17235, "total_steps": 19080, "loss": 0.1827, "lr": 1.4122110537819365e-06, "epoch": 9.033018867924529, "percentage": 90.33, "elapsed_time": "1:19:52", "remaining_time": "0:08:33", "throughput": 2350.64, "total_tokens": 11265640}
|
| 3466 |
+
{"current_steps": 17240, "total_steps": 19080, "loss": 0.1757, "lr": 1.4046436603056601e-06, "epoch": 9.035639412997904, "percentage": 90.36, "elapsed_time": "1:19:54", "remaining_time": "0:08:31", "throughput": 2350.76, "total_tokens": 11270344}
|
| 3467 |
+
{"current_steps": 17245, "total_steps": 19080, "loss": 0.2245, "lr": 1.397096010345772e-06, "epoch": 9.03825995807128, "percentage": 90.38, "elapsed_time": "1:19:55", "remaining_time": "0:08:30", "throughput": 2350.71, "total_tokens": 11272584}
|
| 3468 |
+
{"current_steps": 17250, "total_steps": 19080, "loss": 0.1936, "lr": 1.3895681102178094e-06, "epoch": 9.040880503144654, "percentage": 90.41, "elapsed_time": "1:19:57", "remaining_time": "0:08:28", "throughput": 2350.81, "total_tokens": 11276872}
|
| 3469 |
+
{"current_steps": 17255, "total_steps": 19080, "loss": 0.2209, "lr": 1.3820599662207695e-06, "epoch": 9.04350104821803, "percentage": 90.44, "elapsed_time": "1:19:58", "remaining_time": "0:08:27", "throughput": 2350.81, "total_tokens": 11279688}
|
| 3470 |
+
{"current_steps": 17260, "total_steps": 19080, "loss": 0.1744, "lr": 1.3745715846371244e-06, "epoch": 9.046121593291405, "percentage": 90.46, "elapsed_time": "1:19:59", "remaining_time": "0:08:26", "throughput": 2350.87, "total_tokens": 11282888}
|
| 3471 |
+
{"current_steps": 17265, "total_steps": 19080, "loss": 0.3085, "lr": 1.3671029717328142e-06, "epoch": 9.04874213836478, "percentage": 90.49, "elapsed_time": "1:20:00", "remaining_time": "0:08:24", "throughput": 2350.91, "total_tokens": 11285928}
|
| 3472 |
+
{"current_steps": 17270, "total_steps": 19080, "loss": 0.1979, "lr": 1.3596541337572265e-06, "epoch": 9.051362683438155, "percentage": 90.51, "elapsed_time": "1:20:02", "remaining_time": "0:08:23", "throughput": 2350.95, "total_tokens": 11289288}
|
| 3473 |
+
{"current_steps": 17275, "total_steps": 19080, "loss": 0.2229, "lr": 1.3522250769432115e-06, "epoch": 9.05398322851153, "percentage": 90.54, "elapsed_time": "1:20:04", "remaining_time": "0:08:21", "throughput": 2351.15, "total_tokens": 11295976}
|
| 3474 |
+
{"current_steps": 17280, "total_steps": 19080, "loss": 0.2252, "lr": 1.3448158075070687e-06, "epoch": 9.056603773584905, "percentage": 90.57, "elapsed_time": "1:20:05", "remaining_time": "0:08:20", "throughput": 2351.23, "total_tokens": 11299816}
|
| 3475 |
+
{"current_steps": 17285, "total_steps": 19080, "loss": 0.2149, "lr": 1.337426331648528e-06, "epoch": 9.05922431865828, "percentage": 90.59, "elapsed_time": "1:20:07", "remaining_time": "0:08:19", "throughput": 2351.28, "total_tokens": 11303176}
|
| 3476 |
+
{"current_steps": 17290, "total_steps": 19080, "loss": 0.1377, "lr": 1.3300566555507709e-06, "epoch": 9.061844863731656, "percentage": 90.62, "elapsed_time": "1:20:08", "remaining_time": "0:08:17", "throughput": 2351.34, "total_tokens": 11306376}
|
| 3477 |
+
{"current_steps": 17295, "total_steps": 19080, "loss": 0.1683, "lr": 1.3227067853804065e-06, "epoch": 9.064465408805031, "percentage": 90.64, "elapsed_time": "1:20:10", "remaining_time": "0:08:16", "throughput": 2351.45, "total_tokens": 11310696}
|
| 3478 |
+
{"current_steps": 17300, "total_steps": 19080, "loss": 0.1812, "lr": 1.315376727287465e-06, "epoch": 9.067085953878406, "percentage": 90.67, "elapsed_time": "1:20:11", "remaining_time": "0:08:15", "throughput": 2351.54, "total_tokens": 11314888}
|
| 3479 |
+
{"current_steps": 17305, "total_steps": 19080, "loss": 0.1949, "lr": 1.3080664874054127e-06, "epoch": 9.069706498951781, "percentage": 90.7, "elapsed_time": "1:20:12", "remaining_time": "0:08:13", "throughput": 2351.56, "total_tokens": 11317576}
|
| 3480 |
+
{"current_steps": 17310, "total_steps": 19080, "loss": 0.2285, "lr": 1.3007760718511176e-06, "epoch": 9.072327044025156, "percentage": 90.72, "elapsed_time": "1:20:14", "remaining_time": "0:08:12", "throughput": 2351.61, "total_tokens": 11320840}
|
| 3481 |
+
{"current_steps": 17315, "total_steps": 19080, "loss": 0.1425, "lr": 1.2935054867248692e-06, "epoch": 9.074947589098532, "percentage": 90.75, "elapsed_time": "1:20:15", "remaining_time": "0:08:10", "throughput": 2351.65, "total_tokens": 11324040}
|
| 3482 |
+
{"current_steps": 17320, "total_steps": 19080, "loss": 0.2331, "lr": 1.2862547381103567e-06, "epoch": 9.077568134171909, "percentage": 90.78, "elapsed_time": "1:20:16", "remaining_time": "0:08:09", "throughput": 2351.69, "total_tokens": 11327784}
|
| 3483 |
+
{"current_steps": 17325, "total_steps": 19080, "loss": 0.175, "lr": 1.2790238320746827e-06, "epoch": 9.080188679245284, "percentage": 90.8, "elapsed_time": "1:20:18", "remaining_time": "0:08:08", "throughput": 2351.76, "total_tokens": 11331304}
|
| 3484 |
+
{"current_steps": 17330, "total_steps": 19080, "loss": 0.265, "lr": 1.271812774668335e-06, "epoch": 9.082809224318659, "percentage": 90.83, "elapsed_time": "1:20:19", "remaining_time": "0:08:06", "throughput": 2351.79, "total_tokens": 11334312}
|
| 3485 |
+
{"current_steps": 17335, "total_steps": 19080, "loss": 0.2298, "lr": 1.2646215719251952e-06, "epoch": 9.085429769392034, "percentage": 90.85, "elapsed_time": "1:20:21", "remaining_time": "0:08:05", "throughput": 2351.92, "total_tokens": 11339144}
|
| 3486 |
+
{"current_steps": 17340, "total_steps": 19080, "loss": 0.1504, "lr": 1.2574502298625334e-06, "epoch": 9.08805031446541, "percentage": 90.88, "elapsed_time": "1:20:22", "remaining_time": "0:08:03", "throughput": 2351.98, "total_tokens": 11342664}
|
| 3487 |
+
{"current_steps": 17345, "total_steps": 19080, "loss": 0.1871, "lr": 1.250298754481008e-06, "epoch": 9.090670859538784, "percentage": 90.91, "elapsed_time": "1:20:23", "remaining_time": "0:08:02", "throughput": 2352.01, "total_tokens": 11345768}
|
| 3488 |
+
{"current_steps": 17350, "total_steps": 19080, "loss": 0.2855, "lr": 1.2431671517646403e-06, "epoch": 9.09329140461216, "percentage": 90.93, "elapsed_time": "1:20:25", "remaining_time": "0:08:01", "throughput": 2352.1, "total_tokens": 11349448}
|
| 3489 |
+
{"current_steps": 17355, "total_steps": 19080, "loss": 0.1549, "lr": 1.2360554276808295e-06, "epoch": 9.095911949685535, "percentage": 90.96, "elapsed_time": "1:20:26", "remaining_time": "0:07:59", "throughput": 2352.17, "total_tokens": 11352744}
|
| 3490 |
+
{"current_steps": 17360, "total_steps": 19080, "loss": 0.1963, "lr": 1.228963588180343e-06, "epoch": 9.09853249475891, "percentage": 90.99, "elapsed_time": "1:20:27", "remaining_time": "0:07:58", "throughput": 2352.12, "total_tokens": 11354920}
|
| 3491 |
+
{"current_steps": 17365, "total_steps": 19080, "loss": 0.1791, "lr": 1.2218916391973118e-06, "epoch": 9.101153039832285, "percentage": 91.01, "elapsed_time": "1:20:29", "remaining_time": "0:07:56", "throughput": 2352.23, "total_tokens": 11359240}
|
| 3492 |
+
{"current_steps": 17370, "total_steps": 19080, "loss": 0.2646, "lr": 1.2148395866492135e-06, "epoch": 9.10377358490566, "percentage": 91.04, "elapsed_time": "1:20:30", "remaining_time": "0:07:55", "throughput": 2352.24, "total_tokens": 11362056}
|
| 3493 |
+
{"current_steps": 17375, "total_steps": 19080, "loss": 0.1106, "lr": 1.2078074364368862e-06, "epoch": 9.106394129979035, "percentage": 91.06, "elapsed_time": "1:20:31", "remaining_time": "0:07:54", "throughput": 2352.23, "total_tokens": 11364648}
|
| 3494 |
+
{"current_steps": 17380, "total_steps": 19080, "loss": 0.1846, "lr": 1.2007951944445122e-06, "epoch": 9.10901467505241, "percentage": 91.09, "elapsed_time": "1:20:32", "remaining_time": "0:07:52", "throughput": 2352.3, "total_tokens": 11368168}
|
| 3495 |
+
{"current_steps": 17385, "total_steps": 19080, "loss": 0.196, "lr": 1.1938028665396173e-06, "epoch": 9.111635220125786, "percentage": 91.12, "elapsed_time": "1:20:33", "remaining_time": "0:07:51", "throughput": 2352.31, "total_tokens": 11370856}
|
| 3496 |
+
{"current_steps": 17390, "total_steps": 19080, "loss": 0.2205, "lr": 1.1868304585730571e-06, "epoch": 9.114255765199161, "percentage": 91.14, "elapsed_time": "1:20:35", "remaining_time": "0:07:49", "throughput": 2352.36, "total_tokens": 11373768}
|
| 3497 |
+
{"current_steps": 17395, "total_steps": 19080, "loss": 0.2296, "lr": 1.1798779763790346e-06, "epoch": 9.116876310272536, "percentage": 91.17, "elapsed_time": "1:20:36", "remaining_time": "0:07:48", "throughput": 2352.42, "total_tokens": 11377192}
|
| 3498 |
+
{"current_steps": 17400, "total_steps": 19080, "loss": 0.1757, "lr": 1.1729454257750544e-06, "epoch": 9.119496855345911, "percentage": 91.19, "elapsed_time": "1:20:37", "remaining_time": "0:07:47", "throughput": 2352.43, "total_tokens": 11379912}
|
| 3499 |
+
{"current_steps": 17405, "total_steps": 19080, "loss": 0.2089, "lr": 1.1660328125619652e-06, "epoch": 9.122117400419286, "percentage": 91.22, "elapsed_time": "1:20:38", "remaining_time": "0:07:45", "throughput": 2352.51, "total_tokens": 11383496}
|
| 3500 |
+
{"current_steps": 17410, "total_steps": 19080, "loss": 0.2251, "lr": 1.1591401425239318e-06, "epoch": 9.124737945492662, "percentage": 91.25, "elapsed_time": "1:20:40", "remaining_time": "0:07:44", "throughput": 2352.53, "total_tokens": 11386504}
|
| 3501 |
+
{"current_steps": 17415, "total_steps": 19080, "loss": 0.1412, "lr": 1.1522674214284158e-06, "epoch": 9.127358490566039, "percentage": 91.27, "elapsed_time": "1:20:41", "remaining_time": "0:07:42", "throughput": 2352.5, "total_tokens": 11388776}
|
| 3502 |
+
{"current_steps": 17420, "total_steps": 19080, "loss": 0.2454, "lr": 1.145414655026203e-06, "epoch": 9.129979035639414, "percentage": 91.3, "elapsed_time": "1:20:42", "remaining_time": "0:07:41", "throughput": 2352.54, "total_tokens": 11391976}
|
| 3503 |
+
{"current_steps": 17425, "total_steps": 19080, "loss": 0.2052, "lr": 1.1385818490513733e-06, "epoch": 9.132599580712789, "percentage": 91.33, "elapsed_time": "1:20:43", "remaining_time": "0:07:40", "throughput": 2352.58, "total_tokens": 11394856}
|
| 3504 |
+
{"current_steps": 17430, "total_steps": 19080, "loss": 0.2314, "lr": 1.1317690092213007e-06, "epoch": 9.135220125786164, "percentage": 91.35, "elapsed_time": "1:20:44", "remaining_time": "0:07:38", "throughput": 2352.61, "total_tokens": 11397896}
|
| 3505 |
+
{"current_steps": 17435, "total_steps": 19080, "loss": 0.2037, "lr": 1.124976141236675e-06, "epoch": 9.13784067085954, "percentage": 91.38, "elapsed_time": "1:20:46", "remaining_time": "0:07:37", "throughput": 2352.64, "total_tokens": 11401096}
|
| 3506 |
+
{"current_steps": 17440, "total_steps": 19080, "loss": 0.2057, "lr": 1.1182032507814354e-06, "epoch": 9.140461215932914, "percentage": 91.4, "elapsed_time": "1:20:47", "remaining_time": "0:07:35", "throughput": 2352.7, "total_tokens": 11404616}
|
| 3507 |
+
{"current_steps": 17445, "total_steps": 19080, "loss": 0.1735, "lr": 1.1114503435228434e-06, "epoch": 9.14308176100629, "percentage": 91.43, "elapsed_time": "1:20:48", "remaining_time": "0:07:34", "throughput": 2352.74, "total_tokens": 11407848}
|
| 3508 |
+
{"current_steps": 17450, "total_steps": 19080, "loss": 0.1843, "lr": 1.1047174251114234e-06, "epoch": 9.145702306079665, "percentage": 91.46, "elapsed_time": "1:20:49", "remaining_time": "0:07:33", "throughput": 2352.76, "total_tokens": 11410760}
|
| 3509 |
+
{"current_steps": 17455, "total_steps": 19080, "loss": 0.2563, "lr": 1.0980045011809604e-06, "epoch": 9.14832285115304, "percentage": 91.48, "elapsed_time": "1:20:51", "remaining_time": "0:07:31", "throughput": 2352.78, "total_tokens": 11413864}
|
| 3510 |
+
{"current_steps": 17460, "total_steps": 19080, "loss": 0.1969, "lr": 1.0913115773485388e-06, "epoch": 9.150943396226415, "percentage": 91.51, "elapsed_time": "1:20:52", "remaining_time": "0:07:30", "throughput": 2352.81, "total_tokens": 11416808}
|
| 3511 |
+
{"current_steps": 17465, "total_steps": 19080, "loss": 0.2888, "lr": 1.084638659214482e-06, "epoch": 9.15356394129979, "percentage": 91.54, "elapsed_time": "1:20:53", "remaining_time": "0:07:28", "throughput": 2352.86, "total_tokens": 11419944}
|
| 3512 |
+
{"current_steps": 17470, "total_steps": 19080, "loss": 0.1907, "lr": 1.0779857523623815e-06, "epoch": 9.156184486373165, "percentage": 91.56, "elapsed_time": "1:20:54", "remaining_time": "0:07:27", "throughput": 2352.86, "total_tokens": 11422728}
|
| 3513 |
+
{"current_steps": 17475, "total_steps": 19080, "loss": 0.2466, "lr": 1.071352862359093e-06, "epoch": 9.15880503144654, "percentage": 91.59, "elapsed_time": "1:20:57", "remaining_time": "0:07:26", "throughput": 2353.14, "total_tokens": 11430984}
|
| 3514 |
+
{"current_steps": 17480, "total_steps": 19080, "loss": 0.1729, "lr": 1.0647399947547127e-06, "epoch": 9.161425576519916, "percentage": 91.61, "elapsed_time": "1:20:58", "remaining_time": "0:07:24", "throughput": 2353.15, "total_tokens": 11433672}
|
| 3515 |
+
{"current_steps": 17485, "total_steps": 19080, "loss": 0.2152, "lr": 1.0581471550825812e-06, "epoch": 9.164046121593291, "percentage": 91.64, "elapsed_time": "1:20:59", "remaining_time": "0:07:23", "throughput": 2353.15, "total_tokens": 11436168}
|
| 3516 |
+
{"current_steps": 17490, "total_steps": 19080, "loss": 0.16, "lr": 1.0515743488592939e-06, "epoch": 9.166666666666666, "percentage": 91.67, "elapsed_time": "1:21:01", "remaining_time": "0:07:21", "throughput": 2353.2, "total_tokens": 11439528}
|
| 3517 |
+
{"current_steps": 17495, "total_steps": 19080, "loss": 0.2509, "lr": 1.0450215815846736e-06, "epoch": 9.169287211740041, "percentage": 91.69, "elapsed_time": "1:21:02", "remaining_time": "0:07:20", "throughput": 2353.22, "total_tokens": 11442312}
|
| 3518 |
+
{"current_steps": 17500, "total_steps": 19080, "loss": 0.187, "lr": 1.0384888587417736e-06, "epoch": 9.171907756813416, "percentage": 91.72, "elapsed_time": "1:21:03", "remaining_time": "0:07:19", "throughput": 2353.26, "total_tokens": 11445416}
|
| 3519 |
+
{"current_steps": 17505, "total_steps": 19080, "loss": 0.272, "lr": 1.0319761857968735e-06, "epoch": 9.174528301886792, "percentage": 91.75, "elapsed_time": "1:21:04", "remaining_time": "0:07:17", "throughput": 2353.31, "total_tokens": 11448712}
|
| 3520 |
+
{"current_steps": 17510, "total_steps": 19080, "loss": 0.1971, "lr": 1.0254835681994895e-06, "epoch": 9.177148846960169, "percentage": 91.77, "elapsed_time": "1:21:06", "remaining_time": "0:07:16", "throughput": 2353.36, "total_tokens": 11452104}
|
| 3521 |
+
{"current_steps": 17515, "total_steps": 19080, "loss": 0.1401, "lr": 1.0190110113823426e-06, "epoch": 9.179769392033544, "percentage": 91.8, "elapsed_time": "1:21:07", "remaining_time": "0:07:14", "throughput": 2353.46, "total_tokens": 11456136}
|
| 3522 |
+
{"current_steps": 17520, "total_steps": 19080, "loss": 0.2005, "lr": 1.0125585207613752e-06, "epoch": 9.182389937106919, "percentage": 91.82, "elapsed_time": "1:21:09", "remaining_time": "0:07:13", "throughput": 2353.51, "total_tokens": 11459272}
|
| 3523 |
+
{"current_steps": 17525, "total_steps": 19080, "loss": 0.1893, "lr": 1.0061261017357327e-06, "epoch": 9.185010482180294, "percentage": 91.85, "elapsed_time": "1:21:10", "remaining_time": "0:07:12", "throughput": 2353.61, "total_tokens": 11463208}
|
| 3524 |
+
{"current_steps": 17530, "total_steps": 19080, "loss": 0.1934, "lr": 9.997137596877732e-07, "epoch": 9.18763102725367, "percentage": 91.88, "elapsed_time": "1:21:11", "remaining_time": "0:07:10", "throughput": 2353.66, "total_tokens": 11466472}
|
| 3525 |
+
{"current_steps": 17535, "total_steps": 19080, "loss": 0.232, "lr": 9.93321499983052e-07, "epoch": 9.190251572327044, "percentage": 91.9, "elapsed_time": "1:21:12", "remaining_time": "0:07:09", "throughput": 2353.68, "total_tokens": 11469192}
|
| 3526 |
+
{"current_steps": 17540, "total_steps": 19080, "loss": 0.1902, "lr": 9.869493279703158e-07, "epoch": 9.19287211740042, "percentage": 91.93, "elapsed_time": "1:21:14", "remaining_time": "0:07:07", "throughput": 2353.71, "total_tokens": 11472232}
|
| 3527 |
+
{"current_steps": 17545, "total_steps": 19080, "loss": 0.249, "lr": 9.805972489815102e-07, "epoch": 9.195492662473795, "percentage": 91.95, "elapsed_time": "1:21:15", "remaining_time": "0:07:06", "throughput": 2353.78, "total_tokens": 11476040}
|
| 3528 |
+
{"current_steps": 17550, "total_steps": 19080, "loss": 0.186, "lr": 9.742652683317643e-07, "epoch": 9.19811320754717, "percentage": 91.98, "elapsed_time": "1:21:16", "remaining_time": "0:07:05", "throughput": 2353.76, "total_tokens": 11478728}
|
| 3529 |
+
{"current_steps": 17555, "total_steps": 19080, "loss": 0.2088, "lr": 9.679533913193927e-07, "epoch": 9.200733752620545, "percentage": 92.01, "elapsed_time": "1:21:17", "remaining_time": "0:07:03", "throughput": 2353.78, "total_tokens": 11481448}
|
| 3530 |
+
{"current_steps": 17560, "total_steps": 19080, "loss": 0.1713, "lr": 9.61661623225879e-07, "epoch": 9.20335429769392, "percentage": 92.03, "elapsed_time": "1:21:19", "remaining_time": "0:07:02", "throughput": 2353.82, "total_tokens": 11484424}
|
| 3531 |
+
{"current_steps": 17565, "total_steps": 19080, "loss": 0.2067, "lr": 9.553899693158951e-07, "epoch": 9.205974842767295, "percentage": 92.06, "elapsed_time": "1:21:20", "remaining_time": "0:07:00", "throughput": 2353.87, "total_tokens": 11487560}
|
| 3532 |
+
{"current_steps": 17570, "total_steps": 19080, "loss": 0.1901, "lr": 9.491384348372684e-07, "epoch": 9.20859538784067, "percentage": 92.09, "elapsed_time": "1:21:21", "remaining_time": "0:06:59", "throughput": 2353.9, "total_tokens": 11490632}
|
| 3533 |
+
{"current_steps": 17575, "total_steps": 19080, "loss": 0.1972, "lr": 9.429070250210004e-07, "epoch": 9.211215932914046, "percentage": 92.11, "elapsed_time": "1:21:22", "remaining_time": "0:06:58", "throughput": 2353.9, "total_tokens": 11493032}
|
| 3534 |
+
{"current_steps": 17580, "total_steps": 19080, "loss": 0.1555, "lr": 9.366957450812535e-07, "epoch": 9.213836477987421, "percentage": 92.14, "elapsed_time": "1:21:23", "remaining_time": "0:06:56", "throughput": 2353.92, "total_tokens": 11496200}
|
| 3535 |
+
{"current_steps": 17585, "total_steps": 19080, "loss": 0.2612, "lr": 9.305046002153345e-07, "epoch": 9.216457023060796, "percentage": 92.16, "elapsed_time": "1:21:24", "remaining_time": "0:06:55", "throughput": 2353.92, "total_tokens": 11498696}
|
| 3536 |
+
{"current_steps": 17590, "total_steps": 19080, "loss": 0.1617, "lr": 9.243335956037186e-07, "epoch": 9.219077568134171, "percentage": 92.19, "elapsed_time": "1:21:26", "remaining_time": "0:06:53", "throughput": 2353.99, "total_tokens": 11502312}
|
| 3537 |
+
{"current_steps": 17595, "total_steps": 19080, "loss": 0.193, "lr": 9.181827364100171e-07, "epoch": 9.221698113207546, "percentage": 92.22, "elapsed_time": "1:21:27", "remaining_time": "0:06:52", "throughput": 2354.01, "total_tokens": 11505160}
|
| 3538 |
+
{"current_steps": 17600, "total_steps": 19080, "loss": 0.1312, "lr": 9.120520277809852e-07, "epoch": 9.224318658280922, "percentage": 92.24, "elapsed_time": "1:21:28", "remaining_time": "0:06:51", "throughput": 2354.04, "total_tokens": 11508456}
|
| 3539 |
+
{"current_steps": 17605, "total_steps": 19080, "loss": 0.2016, "lr": 9.059414748465278e-07, "epoch": 9.226939203354299, "percentage": 92.27, "elapsed_time": "1:21:30", "remaining_time": "0:06:49", "throughput": 2354.11, "total_tokens": 11511816}
|
| 3540 |
+
{"current_steps": 17610, "total_steps": 19080, "loss": 0.1919, "lr": 8.998510827196715e-07, "epoch": 9.229559748427674, "percentage": 92.3, "elapsed_time": "1:21:31", "remaining_time": "0:06:48", "throughput": 2354.09, "total_tokens": 11514088}
|
| 3541 |
+
{"current_steps": 17615, "total_steps": 19080, "loss": 0.1706, "lr": 8.937808564965733e-07, "epoch": 9.232180293501049, "percentage": 92.32, "elapsed_time": "1:21:32", "remaining_time": "0:06:46", "throughput": 2354.12, "total_tokens": 11517096}
|
| 3542 |
+
{"current_steps": 17620, "total_steps": 19080, "loss": 0.1835, "lr": 8.877308012565339e-07, "epoch": 9.234800838574424, "percentage": 92.35, "elapsed_time": "1:21:33", "remaining_time": "0:06:45", "throughput": 2354.16, "total_tokens": 11520168}
|
| 3543 |
+
{"current_steps": 17625, "total_steps": 19080, "loss": 0.3027, "lr": 8.817009220619482e-07, "epoch": 9.2374213836478, "percentage": 92.37, "elapsed_time": "1:21:34", "remaining_time": "0:06:44", "throughput": 2354.24, "total_tokens": 11523944}
|
| 3544 |
+
{"current_steps": 17630, "total_steps": 19080, "loss": 0.2684, "lr": 8.756912239583554e-07, "epoch": 9.240041928721174, "percentage": 92.4, "elapsed_time": "1:21:36", "remaining_time": "0:06:42", "throughput": 2354.32, "total_tokens": 11527720}
|
| 3545 |
+
{"current_steps": 17635, "total_steps": 19080, "loss": 0.2851, "lr": 8.697017119743911e-07, "epoch": 9.24266247379455, "percentage": 92.43, "elapsed_time": "1:21:37", "remaining_time": "0:06:41", "throughput": 2354.39, "total_tokens": 11531304}
|
| 3546 |
+
{"current_steps": 17640, "total_steps": 19080, "loss": 0.1969, "lr": 8.637323911218048e-07, "epoch": 9.245283018867925, "percentage": 92.45, "elapsed_time": "1:21:39", "remaining_time": "0:06:39", "throughput": 2354.47, "total_tokens": 11535176}
|
| 3547 |
+
{"current_steps": 17645, "total_steps": 19080, "loss": 0.2768, "lr": 8.577832663954538e-07, "epoch": 9.2479035639413, "percentage": 92.48, "elapsed_time": "1:21:40", "remaining_time": "0:06:38", "throughput": 2354.55, "total_tokens": 11539016}
|
| 3548 |
+
{"current_steps": 17650, "total_steps": 19080, "loss": 0.1551, "lr": 8.51854342773295e-07, "epoch": 9.250524109014675, "percentage": 92.51, "elapsed_time": "1:21:42", "remaining_time": "0:06:37", "throughput": 2354.69, "total_tokens": 11543752}
|
| 3549 |
+
{"current_steps": 17655, "total_steps": 19080, "loss": 0.23, "lr": 8.459456252163739e-07, "epoch": 9.25314465408805, "percentage": 92.53, "elapsed_time": "1:21:43", "remaining_time": "0:06:35", "throughput": 2354.71, "total_tokens": 11546664}
|
| 3550 |
+
{"current_steps": 17660, "total_steps": 19080, "loss": 0.1992, "lr": 8.400571186688466e-07, "epoch": 9.255765199161425, "percentage": 92.56, "elapsed_time": "1:21:44", "remaining_time": "0:06:34", "throughput": 2354.7, "total_tokens": 11549032}
|
| 3551 |
+
{"current_steps": 17665, "total_steps": 19080, "loss": 0.3095, "lr": 8.341888280579386e-07, "epoch": 9.2583857442348, "percentage": 92.58, "elapsed_time": "1:21:45", "remaining_time": "0:06:32", "throughput": 2354.76, "total_tokens": 11552328}
|
| 3552 |
+
{"current_steps": 17670, "total_steps": 19080, "loss": 0.284, "lr": 8.283407582939689e-07, "epoch": 9.261006289308176, "percentage": 92.61, "elapsed_time": "1:21:47", "remaining_time": "0:06:31", "throughput": 2354.79, "total_tokens": 11555464}
|
| 3553 |
+
{"current_steps": 17675, "total_steps": 19080, "loss": 0.1909, "lr": 8.22512914270332e-07, "epoch": 9.26362683438155, "percentage": 92.64, "elapsed_time": "1:21:48", "remaining_time": "0:06:30", "throughput": 2354.78, "total_tokens": 11558088}
|
| 3554 |
+
{"current_steps": 17680, "total_steps": 19080, "loss": 0.2348, "lr": 8.167053008635101e-07, "epoch": 9.266247379454926, "percentage": 92.66, "elapsed_time": "1:21:49", "remaining_time": "0:06:28", "throughput": 2354.8, "total_tokens": 11560872}
|
| 3555 |
+
{"current_steps": 17685, "total_steps": 19080, "loss": 0.1645, "lr": 8.109179229330438e-07, "epoch": 9.268867924528301, "percentage": 92.69, "elapsed_time": "1:21:50", "remaining_time": "0:06:27", "throughput": 2354.86, "total_tokens": 11564264}
|
| 3556 |
+
{"current_steps": 17690, "total_steps": 19080, "loss": 0.1806, "lr": 8.051507853215401e-07, "epoch": 9.271488469601676, "percentage": 92.71, "elapsed_time": "1:21:52", "remaining_time": "0:06:25", "throughput": 2354.93, "total_tokens": 11567656}
|
| 3557 |
+
{"current_steps": 17695, "total_steps": 19080, "loss": 0.1994, "lr": 7.994038928546887e-07, "epoch": 9.274109014675052, "percentage": 92.74, "elapsed_time": "1:21:53", "remaining_time": "0:06:24", "throughput": 2354.97, "total_tokens": 11571176}
|
| 3558 |
+
{"current_steps": 17700, "total_steps": 19080, "loss": 0.262, "lr": 7.93677250341221e-07, "epoch": 9.276729559748428, "percentage": 92.77, "elapsed_time": "1:21:54", "remaining_time": "0:06:23", "throughput": 2355.01, "total_tokens": 11574216}
|
| 3559 |
+
{"current_steps": 17705, "total_steps": 19080, "loss": 0.168, "lr": 7.879708625729287e-07, "epoch": 9.279350104821804, "percentage": 92.79, "elapsed_time": "1:21:56", "remaining_time": "0:06:21", "throughput": 2355.05, "total_tokens": 11577608}
|
| 3560 |
+
{"current_steps": 17710, "total_steps": 19080, "loss": 0.2086, "lr": 7.822847343246564e-07, "epoch": 9.281970649895179, "percentage": 92.82, "elapsed_time": "1:21:57", "remaining_time": "0:06:20", "throughput": 2355.1, "total_tokens": 11581000}
|
| 3561 |
+
{"current_steps": 17715, "total_steps": 19080, "loss": 0.2397, "lr": 7.766188703542954e-07, "epoch": 9.284591194968554, "percentage": 92.85, "elapsed_time": "1:21:58", "remaining_time": "0:06:19", "throughput": 2355.16, "total_tokens": 11584840}
|
| 3562 |
+
{"current_steps": 17720, "total_steps": 19080, "loss": 0.2179, "lr": 7.709732754027866e-07, "epoch": 9.28721174004193, "percentage": 92.87, "elapsed_time": "1:22:00", "remaining_time": "0:06:17", "throughput": 2355.21, "total_tokens": 11587912}
|
| 3563 |
+
{"current_steps": 17725, "total_steps": 19080, "loss": 0.1985, "lr": 7.653479541941038e-07, "epoch": 9.289832285115304, "percentage": 92.9, "elapsed_time": "1:22:01", "remaining_time": "0:06:16", "throughput": 2355.31, "total_tokens": 11591752}
|
| 3564 |
+
{"current_steps": 17730, "total_steps": 19080, "loss": 0.2611, "lr": 7.597429114352572e-07, "epoch": 9.29245283018868, "percentage": 92.92, "elapsed_time": "1:22:02", "remaining_time": "0:06:14", "throughput": 2355.29, "total_tokens": 11594248}
|
| 3565 |
+
{"current_steps": 17735, "total_steps": 19080, "loss": 0.1854, "lr": 7.541581518162922e-07, "epoch": 9.295073375262055, "percentage": 92.95, "elapsed_time": "1:22:03", "remaining_time": "0:06:13", "throughput": 2355.31, "total_tokens": 11597448}
|
| 3566 |
+
{"current_steps": 17740, "total_steps": 19080, "loss": 0.3018, "lr": 7.485936800102788e-07, "epoch": 9.29769392033543, "percentage": 92.98, "elapsed_time": "1:22:05", "remaining_time": "0:06:12", "throughput": 2355.35, "total_tokens": 11600360}
|
| 3567 |
+
{"current_steps": 17745, "total_steps": 19080, "loss": 0.2788, "lr": 7.430495006733152e-07, "epoch": 9.300314465408805, "percentage": 93.0, "elapsed_time": "1:22:06", "remaining_time": "0:06:10", "throughput": 2355.38, "total_tokens": 11603528}
|
| 3568 |
+
{"current_steps": 17750, "total_steps": 19080, "loss": 0.2482, "lr": 7.375256184445178e-07, "epoch": 9.30293501048218, "percentage": 93.03, "elapsed_time": "1:22:07", "remaining_time": "0:06:09", "throughput": 2355.41, "total_tokens": 11606600}
|
| 3569 |
+
{"current_steps": 17755, "total_steps": 19080, "loss": 0.1853, "lr": 7.320220379460146e-07, "epoch": 9.305555555555555, "percentage": 93.06, "elapsed_time": "1:22:08", "remaining_time": "0:06:07", "throughput": 2355.4, "total_tokens": 11609064}
|
| 3570 |
+
{"current_steps": 17760, "total_steps": 19080, "loss": 0.2002, "lr": 7.265387637829524e-07, "epoch": 9.30817610062893, "percentage": 93.08, "elapsed_time": "1:22:09", "remaining_time": "0:06:06", "throughput": 2355.46, "total_tokens": 11612328}
|
| 3571 |
+
{"current_steps": 17765, "total_steps": 19080, "loss": 0.2481, "lr": 7.210758005434887e-07, "epoch": 9.310796645702306, "percentage": 93.11, "elapsed_time": "1:22:11", "remaining_time": "0:06:05", "throughput": 2355.51, "total_tokens": 11615912}
|
| 3572 |
+
{"current_steps": 17770, "total_steps": 19080, "loss": 0.2542, "lr": 7.156331527987753e-07, "epoch": 9.31341719077568, "percentage": 93.13, "elapsed_time": "1:22:12", "remaining_time": "0:06:03", "throughput": 2355.54, "total_tokens": 11618888}
|
| 3573 |
+
{"current_steps": 17775, "total_steps": 19080, "loss": 0.3932, "lr": 7.102108251029777e-07, "epoch": 9.316037735849056, "percentage": 93.16, "elapsed_time": "1:22:13", "remaining_time": "0:06:02", "throughput": 2355.55, "total_tokens": 11621544}
|
| 3574 |
+
{"current_steps": 17780, "total_steps": 19080, "loss": 0.1779, "lr": 7.04808821993247e-07, "epoch": 9.318658280922431, "percentage": 93.19, "elapsed_time": "1:22:14", "remaining_time": "0:06:00", "throughput": 2355.56, "total_tokens": 11624040}
|
| 3575 |
+
{"current_steps": 17785, "total_steps": 19080, "loss": 0.1761, "lr": 6.994271479897314e-07, "epoch": 9.321278825995806, "percentage": 93.21, "elapsed_time": "1:22:15", "remaining_time": "0:05:59", "throughput": 2355.56, "total_tokens": 11626728}
|
| 3576 |
+
{"current_steps": 17790, "total_steps": 19080, "loss": 0.1322, "lr": 6.940658075955759e-07, "epoch": 9.323899371069182, "percentage": 93.24, "elapsed_time": "1:22:17", "remaining_time": "0:05:58", "throughput": 2355.6, "total_tokens": 11629832}
|
| 3577 |
+
{"current_steps": 17795, "total_steps": 19080, "loss": 0.1471, "lr": 6.887248052969003e-07, "epoch": 9.326519916142558, "percentage": 93.27, "elapsed_time": "1:22:18", "remaining_time": "0:05:56", "throughput": 2355.66, "total_tokens": 11633320}
|
| 3578 |
+
{"current_steps": 17800, "total_steps": 19080, "loss": 0.2028, "lr": 6.834041455628104e-07, "epoch": 9.329140461215934, "percentage": 93.29, "elapsed_time": "1:22:19", "remaining_time": "0:05:55", "throughput": 2355.66, "total_tokens": 11636104}
|
| 3579 |
+
{"current_steps": 17805, "total_steps": 19080, "loss": 0.2262, "lr": 6.781038328454003e-07, "epoch": 9.331761006289309, "percentage": 93.32, "elapsed_time": "1:22:21", "remaining_time": "0:05:53", "throughput": 2355.73, "total_tokens": 11639752}
|
| 3580 |
+
{"current_steps": 17810, "total_steps": 19080, "loss": 0.2166, "lr": 6.728238715797169e-07, "epoch": 9.334381551362684, "percentage": 93.34, "elapsed_time": "1:22:22", "remaining_time": "0:05:52", "throughput": 2355.74, "total_tokens": 11642664}
|
| 3581 |
+
{"current_steps": 17815, "total_steps": 19080, "loss": 0.2155, "lr": 6.675642661838011e-07, "epoch": 9.33700209643606, "percentage": 93.37, "elapsed_time": "1:22:23", "remaining_time": "0:05:51", "throughput": 2355.79, "total_tokens": 11646024}
|
| 3582 |
+
{"current_steps": 17820, "total_steps": 19080, "loss": 0.1963, "lr": 6.623250210586463e-07, "epoch": 9.339622641509434, "percentage": 93.4, "elapsed_time": "1:22:24", "remaining_time": "0:05:49", "throughput": 2355.84, "total_tokens": 11649192}
|
| 3583 |
+
{"current_steps": 17825, "total_steps": 19080, "loss": 0.2791, "lr": 6.571061405882095e-07, "epoch": 9.34224318658281, "percentage": 93.42, "elapsed_time": "1:22:26", "remaining_time": "0:05:48", "throughput": 2355.94, "total_tokens": 11653192}
|
| 3584 |
+
{"current_steps": 17830, "total_steps": 19080, "loss": 0.187, "lr": 6.519076291394172e-07, "epoch": 9.344863731656185, "percentage": 93.45, "elapsed_time": "1:22:27", "remaining_time": "0:05:46", "throughput": 2356.02, "total_tokens": 11657224}
|
| 3585 |
+
{"current_steps": 17835, "total_steps": 19080, "loss": 0.3241, "lr": 6.467294910621452e-07, "epoch": 9.34748427672956, "percentage": 93.47, "elapsed_time": "1:22:29", "remaining_time": "0:05:45", "throughput": 2356.05, "total_tokens": 11660328}
|
| 3586 |
+
{"current_steps": 17840, "total_steps": 19080, "loss": 0.2311, "lr": 6.415717306892193e-07, "epoch": 9.350104821802935, "percentage": 93.5, "elapsed_time": "1:22:30", "remaining_time": "0:05:44", "throughput": 2356.09, "total_tokens": 11663400}
|
| 3587 |
+
{"current_steps": 17845, "total_steps": 19080, "loss": 0.2316, "lr": 6.364343523364263e-07, "epoch": 9.35272536687631, "percentage": 93.53, "elapsed_time": "1:22:31", "remaining_time": "0:05:42", "throughput": 2356.1, "total_tokens": 11665992}
|
| 3588 |
+
{"current_steps": 17850, "total_steps": 19080, "loss": 0.1911, "lr": 6.313173603024802e-07, "epoch": 9.355345911949685, "percentage": 93.55, "elapsed_time": "1:22:32", "remaining_time": "0:05:41", "throughput": 2356.12, "total_tokens": 11669000}
|
| 3589 |
+
{"current_steps": 17855, "total_steps": 19080, "loss": 0.1792, "lr": 6.262207588690533e-07, "epoch": 9.35796645702306, "percentage": 93.58, "elapsed_time": "1:22:33", "remaining_time": "0:05:39", "throughput": 2356.15, "total_tokens": 11671976}
|
| 3590 |
+
{"current_steps": 17860, "total_steps": 19080, "loss": 0.1825, "lr": 6.211445523007398e-07, "epoch": 9.360587002096436, "percentage": 93.61, "elapsed_time": "1:22:35", "remaining_time": "0:05:38", "throughput": 2356.18, "total_tokens": 11674984}
|
| 3591 |
+
{"current_steps": 17865, "total_steps": 19080, "loss": 0.1849, "lr": 6.160887448450892e-07, "epoch": 9.36320754716981, "percentage": 93.63, "elapsed_time": "1:22:36", "remaining_time": "0:05:37", "throughput": 2356.2, "total_tokens": 11677864}
|
| 3592 |
+
{"current_steps": 17870, "total_steps": 19080, "loss": 0.2051, "lr": 6.11053340732562e-07, "epoch": 9.365828092243186, "percentage": 93.66, "elapsed_time": "1:22:37", "remaining_time": "0:05:35", "throughput": 2356.23, "total_tokens": 11681128}
|
| 3593 |
+
{"current_steps": 17875, "total_steps": 19080, "loss": 0.1878, "lr": 6.060383441765544e-07, "epoch": 9.368448637316561, "percentage": 93.68, "elapsed_time": "1:22:38", "remaining_time": "0:05:34", "throughput": 2356.23, "total_tokens": 11683880}
|
| 3594 |
+
{"current_steps": 17880, "total_steps": 19080, "loss": 0.1804, "lr": 6.01043759373393e-07, "epoch": 9.371069182389936, "percentage": 93.71, "elapsed_time": "1:22:40", "remaining_time": "0:05:32", "throughput": 2356.29, "total_tokens": 11687496}
|
| 3595 |
+
{"current_steps": 17885, "total_steps": 19080, "loss": 0.2486, "lr": 5.960695905023128e-07, "epoch": 9.373689727463312, "percentage": 93.74, "elapsed_time": "1:22:41", "remaining_time": "0:05:31", "throughput": 2356.32, "total_tokens": 11690696}
|
| 3596 |
+
{"current_steps": 17890, "total_steps": 19080, "loss": 0.1964, "lr": 5.91115841725473e-07, "epoch": 9.376310272536688, "percentage": 93.76, "elapsed_time": "1:22:42", "remaining_time": "0:05:30", "throughput": 2356.34, "total_tokens": 11693544}
|
| 3597 |
+
{"current_steps": 17895, "total_steps": 19080, "loss": 0.2301, "lr": 5.861825171879415e-07, "epoch": 9.378930817610064, "percentage": 93.79, "elapsed_time": "1:22:43", "remaining_time": "0:05:28", "throughput": 2356.38, "total_tokens": 11696840}
|
| 3598 |
+
{"current_steps": 17900, "total_steps": 19080, "loss": 0.2105, "lr": 5.812696210177021e-07, "epoch": 9.381551362683439, "percentage": 93.82, "elapsed_time": "1:22:45", "remaining_time": "0:05:27", "throughput": 2356.41, "total_tokens": 11700264}
|
| 3599 |
+
{"current_steps": 17905, "total_steps": 19080, "loss": 0.1389, "lr": 5.763771573256415e-07, "epoch": 9.384171907756814, "percentage": 93.84, "elapsed_time": "1:22:46", "remaining_time": "0:05:25", "throughput": 2356.46, "total_tokens": 11703496}
|
| 3600 |
+
{"current_steps": 17910, "total_steps": 19080, "loss": 0.2668, "lr": 5.715051302055491e-07, "epoch": 9.38679245283019, "percentage": 93.87, "elapsed_time": "1:22:47", "remaining_time": "0:05:24", "throughput": 2356.5, "total_tokens": 11706664}
|
| 3601 |
+
{"current_steps": 17915, "total_steps": 19080, "loss": 0.2157, "lr": 5.666535437341108e-07, "epoch": 9.389412997903564, "percentage": 93.89, "elapsed_time": "1:22:48", "remaining_time": "0:05:23", "throughput": 2356.51, "total_tokens": 11709480}
|
| 3602 |
+
{"current_steps": 17920, "total_steps": 19080, "loss": 0.1569, "lr": 5.618224019709212e-07, "epoch": 9.39203354297694, "percentage": 93.92, "elapsed_time": "1:22:50", "remaining_time": "0:05:21", "throughput": 2356.54, "total_tokens": 11712712}
|
| 3603 |
+
{"current_steps": 17925, "total_steps": 19080, "loss": 0.2357, "lr": 5.570117089584548e-07, "epoch": 9.394654088050315, "percentage": 93.95, "elapsed_time": "1:22:51", "remaining_time": "0:05:20", "throughput": 2356.56, "total_tokens": 11715464}
|
| 3604 |
+
{"current_steps": 17930, "total_steps": 19080, "loss": 0.1958, "lr": 5.522214687220751e-07, "epoch": 9.39727463312369, "percentage": 93.97, "elapsed_time": "1:22:52", "remaining_time": "0:05:18", "throughput": 2356.56, "total_tokens": 11718120}
|
| 3605 |
+
{"current_steps": 17935, "total_steps": 19080, "loss": 0.2373, "lr": 5.474516852700451e-07, "epoch": 9.399895178197065, "percentage": 94.0, "elapsed_time": "1:22:53", "remaining_time": "0:05:17", "throughput": 2356.62, "total_tokens": 11721512}
|
| 3606 |
+
{"current_steps": 17940, "total_steps": 19080, "loss": 0.1557, "lr": 5.427023625934946e-07, "epoch": 9.40251572327044, "percentage": 94.03, "elapsed_time": "1:22:54", "remaining_time": "0:05:16", "throughput": 2356.59, "total_tokens": 11724008}
|
| 3607 |
+
{"current_steps": 17945, "total_steps": 19080, "loss": 0.1922, "lr": 5.379735046664419e-07, "epoch": 9.405136268343815, "percentage": 94.05, "elapsed_time": "1:22:56", "remaining_time": "0:05:14", "throughput": 2356.6, "total_tokens": 11726696}
|
| 3608 |
+
{"current_steps": 17950, "total_steps": 19080, "loss": 0.1791, "lr": 5.33265115445783e-07, "epoch": 9.40775681341719, "percentage": 94.08, "elapsed_time": "1:22:57", "remaining_time": "0:05:13", "throughput": 2356.67, "total_tokens": 11730408}
|
| 3609 |
+
{"current_steps": 17955, "total_steps": 19080, "loss": 0.2329, "lr": 5.285771988712746e-07, "epoch": 9.410377358490566, "percentage": 94.1, "elapsed_time": "1:22:58", "remaining_time": "0:05:11", "throughput": 2356.69, "total_tokens": 11733320}
|
| 3610 |
+
{"current_steps": 17960, "total_steps": 19080, "loss": 0.2134, "lr": 5.239097588655595e-07, "epoch": 9.41299790356394, "percentage": 94.13, "elapsed_time": "1:23:00", "remaining_time": "0:05:10", "throughput": 2356.84, "total_tokens": 11738472}
|
| 3611 |
+
{"current_steps": 17965, "total_steps": 19080, "loss": 0.2347, "lr": 5.192627993341359e-07, "epoch": 9.415618448637316, "percentage": 94.16, "elapsed_time": "1:23:01", "remaining_time": "0:05:09", "throughput": 2356.83, "total_tokens": 11741032}
|
| 3612 |
+
{"current_steps": 17970, "total_steps": 19080, "loss": 0.1797, "lr": 5.146363241653657e-07, "epoch": 9.418238993710691, "percentage": 94.18, "elapsed_time": "1:23:03", "remaining_time": "0:05:07", "throughput": 2356.88, "total_tokens": 11744328}
|
| 3613 |
+
{"current_steps": 17975, "total_steps": 19080, "loss": 0.1522, "lr": 5.100303372304716e-07, "epoch": 9.420859538784066, "percentage": 94.21, "elapsed_time": "1:23:04", "remaining_time": "0:05:06", "throughput": 2356.94, "total_tokens": 11747976}
|
| 3614 |
+
{"current_steps": 17980, "total_steps": 19080, "loss": 0.3175, "lr": 5.054448423835373e-07, "epoch": 9.423480083857442, "percentage": 94.23, "elapsed_time": "1:23:05", "remaining_time": "0:05:05", "throughput": 2356.98, "total_tokens": 11751144}
|
| 3615 |
+
{"current_steps": 17985, "total_steps": 19080, "loss": 0.2523, "lr": 5.008798434614908e-07, "epoch": 9.426100628930818, "percentage": 94.26, "elapsed_time": "1:23:06", "remaining_time": "0:05:03", "throughput": 2357.02, "total_tokens": 11754312}
|
| 3616 |
+
{"current_steps": 17990, "total_steps": 19080, "loss": 0.1624, "lr": 4.963353442841156e-07, "epoch": 9.428721174004194, "percentage": 94.29, "elapsed_time": "1:23:08", "remaining_time": "0:05:02", "throughput": 2357.09, "total_tokens": 11757896}
|
| 3617 |
+
{"current_steps": 17995, "total_steps": 19080, "loss": 0.1476, "lr": 4.918113486540393e-07, "epoch": 9.431341719077569, "percentage": 94.31, "elapsed_time": "1:23:09", "remaining_time": "0:05:00", "throughput": 2357.13, "total_tokens": 11760840}
|
| 3618 |
+
{"current_steps": 18000, "total_steps": 19080, "loss": 0.1717, "lr": 4.873078603567421e-07, "epoch": 9.433962264150944, "percentage": 94.34, "elapsed_time": "1:23:10", "remaining_time": "0:04:59", "throughput": 2357.18, "total_tokens": 11764136}
|
| 3619 |
+
{"current_steps": 18005, "total_steps": 19080, "loss": 0.2438, "lr": 4.828248831605292e-07, "epoch": 9.43658280922432, "percentage": 94.37, "elapsed_time": "1:23:11", "remaining_time": "0:04:58", "throughput": 2357.19, "total_tokens": 11766696}
|
| 3620 |
+
{"current_steps": 18010, "total_steps": 19080, "loss": 0.2162, "lr": 4.783624208165554e-07, "epoch": 9.439203354297694, "percentage": 94.39, "elapsed_time": "1:23:12", "remaining_time": "0:04:56", "throughput": 2357.17, "total_tokens": 11769160}
|
| 3621 |
+
{"current_steps": 18015, "total_steps": 19080, "loss": 0.2251, "lr": 4.739204770588035e-07, "epoch": 9.44182389937107, "percentage": 94.42, "elapsed_time": "1:23:14", "remaining_time": "0:04:55", "throughput": 2357.2, "total_tokens": 11772360}
|
| 3622 |
+
{"current_steps": 18020, "total_steps": 19080, "loss": 0.2422, "lr": 4.694990556040918e-07, "epoch": 9.444444444444445, "percentage": 94.44, "elapsed_time": "1:23:15", "remaining_time": "0:04:53", "throughput": 2357.19, "total_tokens": 11774984}
|
| 3623 |
+
{"current_steps": 18025, "total_steps": 19080, "loss": 0.1911, "lr": 4.65098160152061e-07, "epoch": 9.44706498951782, "percentage": 94.47, "elapsed_time": "1:23:16", "remaining_time": "0:04:52", "throughput": 2357.2, "total_tokens": 11777928}
|
| 3624 |
+
{"current_steps": 18030, "total_steps": 19080, "loss": 0.1798, "lr": 4.6071779438517924e-07, "epoch": 9.449685534591195, "percentage": 94.5, "elapsed_time": "1:23:17", "remaining_time": "0:04:51", "throughput": 2357.25, "total_tokens": 11781096}
|
| 3625 |
+
{"current_steps": 18035, "total_steps": 19080, "loss": 0.1613, "lr": 4.563579619687369e-07, "epoch": 9.45230607966457, "percentage": 94.52, "elapsed_time": "1:23:19", "remaining_time": "0:04:49", "throughput": 2357.27, "total_tokens": 11784104}
|
| 3626 |
+
{"current_steps": 18040, "total_steps": 19080, "loss": 0.2106, "lr": 4.5201866655084636e-07, "epoch": 9.454926624737945, "percentage": 94.55, "elapsed_time": "1:23:20", "remaining_time": "0:04:48", "throughput": 2357.31, "total_tokens": 11787304}
|
| 3627 |
+
{"current_steps": 18045, "total_steps": 19080, "loss": 0.1358, "lr": 4.4769991176242533e-07, "epoch": 9.45754716981132, "percentage": 94.58, "elapsed_time": "1:23:21", "remaining_time": "0:04:46", "throughput": 2357.33, "total_tokens": 11790024}
|
| 3628 |
+
{"current_steps": 18050, "total_steps": 19080, "loss": 0.144, "lr": 4.4340170121721645e-07, "epoch": 9.460167714884696, "percentage": 94.6, "elapsed_time": "1:23:22", "remaining_time": "0:04:45", "throughput": 2357.33, "total_tokens": 11792840}
|
| 3629 |
+
{"current_steps": 18055, "total_steps": 19080, "loss": 0.1601, "lr": 4.3912403851176234e-07, "epoch": 9.46278825995807, "percentage": 94.63, "elapsed_time": "1:23:23", "remaining_time": "0:04:44", "throughput": 2357.35, "total_tokens": 11795720}
|
| 3630 |
+
{"current_steps": 18060, "total_steps": 19080, "loss": 0.176, "lr": 4.348669272254163e-07, "epoch": 9.465408805031446, "percentage": 94.65, "elapsed_time": "1:23:24", "remaining_time": "0:04:42", "throughput": 2357.36, "total_tokens": 11798472}
|
| 3631 |
+
{"current_steps": 18065, "total_steps": 19080, "loss": 0.1684, "lr": 4.306303709203374e-07, "epoch": 9.468029350104821, "percentage": 94.68, "elapsed_time": "1:23:26", "remaining_time": "0:04:41", "throughput": 2357.42, "total_tokens": 11801800}
|
| 3632 |
+
{"current_steps": 18070, "total_steps": 19080, "loss": 0.2118, "lr": 4.264143731414788e-07, "epoch": 9.470649895178196, "percentage": 94.71, "elapsed_time": "1:23:27", "remaining_time": "0:04:39", "throughput": 2357.47, "total_tokens": 11805160}
|
| 3633 |
+
{"current_steps": 18075, "total_steps": 19080, "loss": 0.2005, "lr": 4.2221893741659636e-07, "epoch": 9.473270440251572, "percentage": 94.73, "elapsed_time": "1:23:30", "remaining_time": "0:04:38", "throughput": 2357.62, "total_tokens": 11811816}
|
| 3634 |
+
{"current_steps": 18080, "total_steps": 19080, "loss": 0.2876, "lr": 4.180440672562402e-07, "epoch": 9.475890985324948, "percentage": 94.76, "elapsed_time": "1:23:31", "remaining_time": "0:04:37", "throughput": 2357.72, "total_tokens": 11816040}
|
| 3635 |
+
{"current_steps": 18085, "total_steps": 19080, "loss": 0.2507, "lr": 4.1388976615374665e-07, "epoch": 9.478511530398324, "percentage": 94.79, "elapsed_time": "1:23:32", "remaining_time": "0:04:35", "throughput": 2357.76, "total_tokens": 11818920}
|
| 3636 |
+
{"current_steps": 18090, "total_steps": 19080, "loss": 0.2246, "lr": 4.097560375852516e-07, "epoch": 9.481132075471699, "percentage": 94.81, "elapsed_time": "1:23:33", "remaining_time": "0:04:34", "throughput": 2357.78, "total_tokens": 11821736}
|
| 3637 |
+
{"current_steps": 18095, "total_steps": 19080, "loss": 0.2317, "lr": 4.056428850096661e-07, "epoch": 9.483752620545074, "percentage": 94.84, "elapsed_time": "1:23:35", "remaining_time": "0:04:33", "throughput": 2357.82, "total_tokens": 11825256}
|
| 3638 |
+
{"current_steps": 18100, "total_steps": 19080, "loss": 0.2561, "lr": 4.01550311868687e-07, "epoch": 9.48637316561845, "percentage": 94.86, "elapsed_time": "1:23:36", "remaining_time": "0:04:31", "throughput": 2357.92, "total_tokens": 11829416}
|
| 3639 |
+
{"current_steps": 18105, "total_steps": 19080, "loss": 0.267, "lr": 3.974783215867972e-07, "epoch": 9.488993710691824, "percentage": 94.89, "elapsed_time": "1:23:38", "remaining_time": "0:04:30", "throughput": 2357.93, "total_tokens": 11832200}
|
| 3640 |
+
{"current_steps": 18110, "total_steps": 19080, "loss": 0.2254, "lr": 3.9342691757124626e-07, "epoch": 9.4916142557652, "percentage": 94.92, "elapsed_time": "1:23:39", "remaining_time": "0:04:28", "throughput": 2357.99, "total_tokens": 11835592}
|
| 3641 |
+
{"current_steps": 18115, "total_steps": 19080, "loss": 0.2257, "lr": 3.8939610321206966e-07, "epoch": 9.494234800838575, "percentage": 94.94, "elapsed_time": "1:23:40", "remaining_time": "0:04:27", "throughput": 2358.05, "total_tokens": 11839400}
|
| 3642 |
+
{"current_steps": 18120, "total_steps": 19080, "loss": 0.2322, "lr": 3.853858818820694e-07, "epoch": 9.49685534591195, "percentage": 94.97, "elapsed_time": "1:23:41", "remaining_time": "0:04:26", "throughput": 2358.08, "total_tokens": 11842216}
|
| 3643 |
+
{"current_steps": 18125, "total_steps": 19080, "loss": 0.152, "lr": 3.8139625693680847e-07, "epoch": 9.499475890985325, "percentage": 94.99, "elapsed_time": "1:23:43", "remaining_time": "0:04:24", "throughput": 2358.09, "total_tokens": 11844936}
|
| 3644 |
+
{"current_steps": 18126, "total_steps": 19080, "eval_loss": 0.6764101982116699, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "1:23:59", "remaining_time": "0:04:25", "throughput": 2350.64, "total_tokens": 11845704}
|
| 3645 |
+
{"current_steps": 18130, "total_steps": 19080, "loss": 0.2177, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "1:24:01", "remaining_time": "0:04:24", "throughput": 2350.03, "total_tokens": 11847912}
|
| 3646 |
+
{"current_steps": 18135, "total_steps": 19080, "loss": 0.1504, "lr": 3.7347880953662597e-07, "epoch": 9.504716981132075, "percentage": 95.05, "elapsed_time": "1:24:03", "remaining_time": "0:04:22", "throughput": 2350.18, "total_tokens": 11853192}
|
| 3647 |
+
{"current_steps": 18140, "total_steps": 19080, "loss": 0.2218, "lr": 3.6955099370666045e-07, "epoch": 9.50733752620545, "percentage": 95.07, "elapsed_time": "1:24:04", "remaining_time": "0:04:21", "throughput": 2350.25, "total_tokens": 11856808}
|