Training in progress, step 18126
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +191 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 26214528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98c2691d2278d3d8d70be3d74d21d5346cd2e1dc3ccd52b8ca3dc30ee249fdb3
|
| 3 |
size 26214528
|
trainer_log.jsonl
CHANGED
|
@@ -3453,3 +3453,194 @@
|
|
| 3453 |
{"current_steps": 17175, "total_steps": 19080, "loss": 5.4825, "lr": 1.5045574483889463e-06, "epoch": 9.001572327044025, "percentage": 90.02, "elapsed_time": "0:52:26", "remaining_time": "0:05:48", "throughput": 3568.22, "total_tokens": 11227112}
|
| 3454 |
{"current_steps": 17180, "total_steps": 19080, "loss": 4.8449, "lr": 1.4967536313353237e-06, "epoch": 9.0041928721174, "percentage": 90.04, "elapsed_time": "0:52:27", "remaining_time": "0:05:48", "throughput": 3568.25, "total_tokens": 11229544}
|
| 3455 |
{"current_steps": 17185, "total_steps": 19080, "loss": 5.1874, "lr": 1.4889694807246779e-06, "epoch": 9.006813417190775, "percentage": 90.07, "elapsed_time": "0:52:28", "remaining_time": "0:05:47", "throughput": 3568.41, "total_tokens": 11233640}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3453 |
{"current_steps": 17175, "total_steps": 19080, "loss": 5.4825, "lr": 1.5045574483889463e-06, "epoch": 9.001572327044025, "percentage": 90.02, "elapsed_time": "0:52:26", "remaining_time": "0:05:48", "throughput": 3568.22, "total_tokens": 11227112}
|
| 3454 |
{"current_steps": 17180, "total_steps": 19080, "loss": 4.8449, "lr": 1.4967536313353237e-06, "epoch": 9.0041928721174, "percentage": 90.04, "elapsed_time": "0:52:27", "remaining_time": "0:05:48", "throughput": 3568.25, "total_tokens": 11229544}
|
| 3455 |
{"current_steps": 17185, "total_steps": 19080, "loss": 5.1874, "lr": 1.4889694807246779e-06, "epoch": 9.006813417190775, "percentage": 90.07, "elapsed_time": "0:52:28", "remaining_time": "0:05:47", "throughput": 3568.41, "total_tokens": 11233640}
|
| 3456 |
+
{"current_steps": 17190, "total_steps": 19080, "loss": 4.7053, "lr": 1.481205003070424e-06, "epoch": 9.00943396226415, "percentage": 90.09, "elapsed_time": "0:52:28", "remaining_time": "0:05:46", "throughput": 3568.47, "total_tokens": 11236552}
|
| 3457 |
+
{"current_steps": 17195, "total_steps": 19080, "loss": 5.2198, "lr": 1.4734602048695312e-06, "epoch": 9.012054507337526, "percentage": 90.12, "elapsed_time": "0:52:29", "remaining_time": "0:05:45", "throughput": 3568.61, "total_tokens": 11240008}
|
| 3458 |
+
{"current_steps": 17200, "total_steps": 19080, "loss": 5.1301, "lr": 1.465735092602491e-06, "epoch": 9.014675052410901, "percentage": 90.15, "elapsed_time": "0:52:30", "remaining_time": "0:05:44", "throughput": 3568.7, "total_tokens": 11243176}
|
| 3459 |
+
{"current_steps": 17205, "total_steps": 19080, "loss": 4.8785, "lr": 1.4580296727333187e-06, "epoch": 9.017295597484276, "percentage": 90.17, "elapsed_time": "0:52:31", "remaining_time": "0:05:43", "throughput": 3568.83, "total_tokens": 11246568}
|
| 3460 |
+
{"current_steps": 17210, "total_steps": 19080, "loss": 4.8614, "lr": 1.450343951709568e-06, "epoch": 9.019916142557651, "percentage": 90.2, "elapsed_time": "0:52:32", "remaining_time": "0:05:42", "throughput": 3569.05, "total_tokens": 11251080}
|
| 3461 |
+
{"current_steps": 17215, "total_steps": 19080, "loss": 4.8627, "lr": 1.4426779359622916e-06, "epoch": 9.022536687631026, "percentage": 90.23, "elapsed_time": "0:52:33", "remaining_time": "0:05:41", "throughput": 3569.06, "total_tokens": 11253256}
|
| 3462 |
+
{"current_steps": 17220, "total_steps": 19080, "loss": 4.9337, "lr": 1.4350316319060585e-06, "epoch": 9.025157232704403, "percentage": 90.25, "elapsed_time": "0:52:33", "remaining_time": "0:05:40", "throughput": 3569.2, "total_tokens": 11256936}
|
| 3463 |
+
{"current_steps": 17225, "total_steps": 19080, "loss": 4.2928, "lr": 1.4274050459389594e-06, "epoch": 9.027777777777779, "percentage": 90.28, "elapsed_time": "0:52:34", "remaining_time": "0:05:39", "throughput": 3569.26, "total_tokens": 11259784}
|
| 3464 |
+
{"current_steps": 17230, "total_steps": 19080, "loss": 4.5473, "lr": 1.4197981844425583e-06, "epoch": 9.030398322851154, "percentage": 90.3, "elapsed_time": "0:52:35", "remaining_time": "0:05:38", "throughput": 3569.35, "total_tokens": 11262728}
|
| 3465 |
+
{"current_steps": 17235, "total_steps": 19080, "loss": 4.9116, "lr": 1.4122110537819365e-06, "epoch": 9.033018867924529, "percentage": 90.33, "elapsed_time": "0:52:36", "remaining_time": "0:05:37", "throughput": 3569.42, "total_tokens": 11265640}
|
| 3466 |
+
{"current_steps": 17240, "total_steps": 19080, "loss": 4.6044, "lr": 1.4046436603056601e-06, "epoch": 9.035639412997904, "percentage": 90.36, "elapsed_time": "0:52:37", "remaining_time": "0:05:36", "throughput": 3569.61, "total_tokens": 11270344}
|
| 3467 |
+
{"current_steps": 17245, "total_steps": 19080, "loss": 4.3928, "lr": 1.397096010345772e-06, "epoch": 9.03825995807128, "percentage": 90.38, "elapsed_time": "0:52:37", "remaining_time": "0:05:36", "throughput": 3569.61, "total_tokens": 11272584}
|
| 3468 |
+
{"current_steps": 17250, "total_steps": 19080, "loss": 4.9733, "lr": 1.3895681102178094e-06, "epoch": 9.040880503144654, "percentage": 90.41, "elapsed_time": "0:52:38", "remaining_time": "0:05:35", "throughput": 3569.78, "total_tokens": 11276872}
|
| 3469 |
+
{"current_steps": 17255, "total_steps": 19080, "loss": 4.564, "lr": 1.3820599662207695e-06, "epoch": 9.04350104821803, "percentage": 90.44, "elapsed_time": "0:52:39", "remaining_time": "0:05:34", "throughput": 3569.84, "total_tokens": 11279688}
|
| 3470 |
+
{"current_steps": 17260, "total_steps": 19080, "loss": 4.4587, "lr": 1.3745715846371244e-06, "epoch": 9.046121593291405, "percentage": 90.46, "elapsed_time": "0:52:40", "remaining_time": "0:05:33", "throughput": 3569.94, "total_tokens": 11282888}
|
| 3471 |
+
{"current_steps": 17265, "total_steps": 19080, "loss": 4.5152, "lr": 1.3671029717328142e-06, "epoch": 9.04874213836478, "percentage": 90.49, "elapsed_time": "0:52:41", "remaining_time": "0:05:32", "throughput": 3570.02, "total_tokens": 11285928}
|
| 3472 |
+
{"current_steps": 17270, "total_steps": 19080, "loss": 4.7721, "lr": 1.3596541337572265e-06, "epoch": 9.051362683438155, "percentage": 90.51, "elapsed_time": "0:52:42", "remaining_time": "0:05:31", "throughput": 3570.14, "total_tokens": 11289288}
|
| 3473 |
+
{"current_steps": 17275, "total_steps": 19080, "loss": 5.4317, "lr": 1.3522250769432115e-06, "epoch": 9.05398322851153, "percentage": 90.54, "elapsed_time": "0:52:43", "remaining_time": "0:05:30", "throughput": 3570.5, "total_tokens": 11295976}
|
| 3474 |
+
{"current_steps": 17280, "total_steps": 19080, "loss": 5.0974, "lr": 1.3448158075070687e-06, "epoch": 9.056603773584905, "percentage": 90.57, "elapsed_time": "0:52:44", "remaining_time": "0:05:29", "throughput": 3570.63, "total_tokens": 11299816}
|
| 3475 |
+
{"current_steps": 17285, "total_steps": 19080, "loss": 5.1409, "lr": 1.337426331648528e-06, "epoch": 9.05922431865828, "percentage": 90.59, "elapsed_time": "0:52:45", "remaining_time": "0:05:28", "throughput": 3570.73, "total_tokens": 11303176}
|
| 3476 |
+
{"current_steps": 17290, "total_steps": 19080, "loss": 4.0771, "lr": 1.3300566555507709e-06, "epoch": 9.061844863731656, "percentage": 90.62, "elapsed_time": "0:52:46", "remaining_time": "0:05:27", "throughput": 3570.85, "total_tokens": 11306376}
|
| 3477 |
+
{"current_steps": 17295, "total_steps": 19080, "loss": 4.7883, "lr": 1.3227067853804065e-06, "epoch": 9.064465408805031, "percentage": 90.64, "elapsed_time": "0:52:47", "remaining_time": "0:05:26", "throughput": 3571.04, "total_tokens": 11310696}
|
| 3478 |
+
{"current_steps": 17300, "total_steps": 19080, "loss": 4.5838, "lr": 1.315376727287465e-06, "epoch": 9.067085953878406, "percentage": 90.67, "elapsed_time": "0:52:48", "remaining_time": "0:05:25", "throughput": 3571.23, "total_tokens": 11314888}
|
| 3479 |
+
{"current_steps": 17305, "total_steps": 19080, "loss": 4.4454, "lr": 1.3080664874054127e-06, "epoch": 9.069706498951781, "percentage": 90.7, "elapsed_time": "0:52:49", "remaining_time": "0:05:25", "throughput": 3571.31, "total_tokens": 11317576}
|
| 3480 |
+
{"current_steps": 17310, "total_steps": 19080, "loss": 5.3457, "lr": 1.3007760718511176e-06, "epoch": 9.072327044025156, "percentage": 90.72, "elapsed_time": "0:52:49", "remaining_time": "0:05:24", "throughput": 3571.41, "total_tokens": 11320840}
|
| 3481 |
+
{"current_steps": 17315, "total_steps": 19080, "loss": 4.1925, "lr": 1.2935054867248692e-06, "epoch": 9.074947589098532, "percentage": 90.75, "elapsed_time": "0:52:50", "remaining_time": "0:05:23", "throughput": 3571.49, "total_tokens": 11324040}
|
| 3482 |
+
{"current_steps": 17320, "total_steps": 19080, "loss": 4.8688, "lr": 1.2862547381103567e-06, "epoch": 9.077568134171909, "percentage": 90.78, "elapsed_time": "0:52:51", "remaining_time": "0:05:22", "throughput": 3571.64, "total_tokens": 11327784}
|
| 3483 |
+
{"current_steps": 17325, "total_steps": 19080, "loss": 5.0509, "lr": 1.2790238320746827e-06, "epoch": 9.080188679245284, "percentage": 90.8, "elapsed_time": "0:52:52", "remaining_time": "0:05:21", "throughput": 3571.77, "total_tokens": 11331304}
|
| 3484 |
+
{"current_steps": 17330, "total_steps": 19080, "loss": 4.9781, "lr": 1.271812774668335e-06, "epoch": 9.082809224318659, "percentage": 90.83, "elapsed_time": "0:52:53", "remaining_time": "0:05:20", "throughput": 3571.86, "total_tokens": 11334312}
|
| 3485 |
+
{"current_steps": 17335, "total_steps": 19080, "loss": 5.2861, "lr": 1.2646215719251952e-06, "epoch": 9.085429769392034, "percentage": 90.85, "elapsed_time": "0:52:54", "remaining_time": "0:05:19", "throughput": 3572.08, "total_tokens": 11339144}
|
| 3486 |
+
{"current_steps": 17340, "total_steps": 19080, "loss": 4.8642, "lr": 1.2574502298625334e-06, "epoch": 9.08805031446541, "percentage": 90.88, "elapsed_time": "0:52:55", "remaining_time": "0:05:18", "throughput": 3572.21, "total_tokens": 11342664}
|
| 3487 |
+
{"current_steps": 17345, "total_steps": 19080, "loss": 4.6836, "lr": 1.250298754481008e-06, "epoch": 9.090670859538784, "percentage": 90.91, "elapsed_time": "0:52:56", "remaining_time": "0:05:17", "throughput": 3572.3, "total_tokens": 11345768}
|
| 3488 |
+
{"current_steps": 17350, "total_steps": 19080, "loss": 5.2562, "lr": 1.2431671517646403e-06, "epoch": 9.09329140461216, "percentage": 90.93, "elapsed_time": "0:52:56", "remaining_time": "0:05:16", "throughput": 3572.47, "total_tokens": 11349448}
|
| 3489 |
+
{"current_steps": 17355, "total_steps": 19080, "loss": 4.4867, "lr": 1.2360554276808295e-06, "epoch": 9.095911949685535, "percentage": 90.96, "elapsed_time": "0:52:57", "remaining_time": "0:05:15", "throughput": 3572.58, "total_tokens": 11352744}
|
| 3490 |
+
{"current_steps": 17360, "total_steps": 19080, "loss": 4.8949, "lr": 1.228963588180343e-06, "epoch": 9.09853249475891, "percentage": 90.99, "elapsed_time": "0:52:58", "remaining_time": "0:05:14", "throughput": 3572.59, "total_tokens": 11354920}
|
| 3491 |
+
{"current_steps": 17365, "total_steps": 19080, "loss": 5.2611, "lr": 1.2218916391973118e-06, "epoch": 9.101153039832285, "percentage": 91.01, "elapsed_time": "0:52:59", "remaining_time": "0:05:14", "throughput": 3572.78, "total_tokens": 11359240}
|
| 3492 |
+
{"current_steps": 17370, "total_steps": 19080, "loss": 4.7021, "lr": 1.2148395866492135e-06, "epoch": 9.10377358490566, "percentage": 91.04, "elapsed_time": "0:53:00", "remaining_time": "0:05:13", "throughput": 3572.85, "total_tokens": 11362056}
|
| 3493 |
+
{"current_steps": 17375, "total_steps": 19080, "loss": 4.271, "lr": 1.2078074364368862e-06, "epoch": 9.106394129979035, "percentage": 91.06, "elapsed_time": "0:53:00", "remaining_time": "0:05:12", "throughput": 3572.89, "total_tokens": 11364648}
|
| 3494 |
+
{"current_steps": 17380, "total_steps": 19080, "loss": 4.9505, "lr": 1.2007951944445122e-06, "epoch": 9.10901467505241, "percentage": 91.09, "elapsed_time": "0:53:01", "remaining_time": "0:05:11", "throughput": 3573.03, "total_tokens": 11368168}
|
| 3495 |
+
{"current_steps": 17385, "total_steps": 19080, "loss": 4.5032, "lr": 1.1938028665396173e-06, "epoch": 9.111635220125786, "percentage": 91.12, "elapsed_time": "0:53:02", "remaining_time": "0:05:10", "throughput": 3573.09, "total_tokens": 11370856}
|
| 3496 |
+
{"current_steps": 17390, "total_steps": 19080, "loss": 4.5134, "lr": 1.1868304585730571e-06, "epoch": 9.114255765199161, "percentage": 91.14, "elapsed_time": "0:53:03", "remaining_time": "0:05:09", "throughput": 3573.18, "total_tokens": 11373768}
|
| 3497 |
+
{"current_steps": 17395, "total_steps": 19080, "loss": 5.4472, "lr": 1.1798779763790346e-06, "epoch": 9.116876310272536, "percentage": 91.17, "elapsed_time": "0:53:03", "remaining_time": "0:05:08", "throughput": 3573.31, "total_tokens": 11377192}
|
| 3498 |
+
{"current_steps": 17400, "total_steps": 19080, "loss": 4.7259, "lr": 1.1729454257750544e-06, "epoch": 9.119496855345911, "percentage": 91.19, "elapsed_time": "0:53:04", "remaining_time": "0:05:07", "throughput": 3573.37, "total_tokens": 11379912}
|
| 3499 |
+
{"current_steps": 17405, "total_steps": 19080, "loss": 5.1425, "lr": 1.1660328125619652e-06, "epoch": 9.122117400419286, "percentage": 91.22, "elapsed_time": "0:53:05", "remaining_time": "0:05:06", "throughput": 3573.51, "total_tokens": 11383496}
|
| 3500 |
+
{"current_steps": 17410, "total_steps": 19080, "loss": 5.006, "lr": 1.1591401425239318e-06, "epoch": 9.124737945492662, "percentage": 91.25, "elapsed_time": "0:53:06", "remaining_time": "0:05:05", "throughput": 3573.57, "total_tokens": 11386504}
|
| 3501 |
+
{"current_steps": 17415, "total_steps": 19080, "loss": 4.7213, "lr": 1.1522674214284158e-06, "epoch": 9.127358490566039, "percentage": 91.27, "elapsed_time": "0:53:06", "remaining_time": "0:05:04", "throughput": 3573.59, "total_tokens": 11388776}
|
| 3502 |
+
{"current_steps": 17420, "total_steps": 19080, "loss": 5.4081, "lr": 1.145414655026203e-06, "epoch": 9.129979035639414, "percentage": 91.3, "elapsed_time": "0:53:07", "remaining_time": "0:05:03", "throughput": 3573.69, "total_tokens": 11391976}
|
| 3503 |
+
{"current_steps": 17425, "total_steps": 19080, "loss": 4.7462, "lr": 1.1385818490513733e-06, "epoch": 9.132599580712789, "percentage": 91.33, "elapsed_time": "0:53:08", "remaining_time": "0:05:02", "throughput": 3573.77, "total_tokens": 11394856}
|
| 3504 |
+
{"current_steps": 17430, "total_steps": 19080, "loss": 4.6352, "lr": 1.1317690092213007e-06, "epoch": 9.135220125786164, "percentage": 91.35, "elapsed_time": "0:53:09", "remaining_time": "0:05:01", "throughput": 3573.87, "total_tokens": 11397896}
|
| 3505 |
+
{"current_steps": 17435, "total_steps": 19080, "loss": 4.9955, "lr": 1.124976141236675e-06, "epoch": 9.13784067085954, "percentage": 91.38, "elapsed_time": "0:53:10", "remaining_time": "0:05:00", "throughput": 3573.94, "total_tokens": 11401096}
|
| 3506 |
+
{"current_steps": 17440, "total_steps": 19080, "loss": 4.6331, "lr": 1.1182032507814354e-06, "epoch": 9.140461215932914, "percentage": 91.4, "elapsed_time": "0:53:10", "remaining_time": "0:05:00", "throughput": 3574.06, "total_tokens": 11404616}
|
| 3507 |
+
{"current_steps": 17445, "total_steps": 19080, "loss": 4.3902, "lr": 1.1114503435228434e-06, "epoch": 9.14308176100629, "percentage": 91.43, "elapsed_time": "0:53:11", "remaining_time": "0:04:59", "throughput": 3574.15, "total_tokens": 11407848}
|
| 3508 |
+
{"current_steps": 17450, "total_steps": 19080, "loss": 4.9676, "lr": 1.1047174251114234e-06, "epoch": 9.145702306079665, "percentage": 91.46, "elapsed_time": "0:53:12", "remaining_time": "0:04:58", "throughput": 3574.19, "total_tokens": 11410760}
|
| 3509 |
+
{"current_steps": 17455, "total_steps": 19080, "loss": 4.6585, "lr": 1.0980045011809604e-06, "epoch": 9.14832285115304, "percentage": 91.48, "elapsed_time": "0:53:13", "remaining_time": "0:04:57", "throughput": 3574.27, "total_tokens": 11413864}
|
| 3510 |
+
{"current_steps": 17460, "total_steps": 19080, "loss": 4.3511, "lr": 1.0913115773485388e-06, "epoch": 9.150943396226415, "percentage": 91.51, "elapsed_time": "0:53:14", "remaining_time": "0:04:56", "throughput": 3574.37, "total_tokens": 11416808}
|
| 3511 |
+
{"current_steps": 17465, "total_steps": 19080, "loss": 4.8272, "lr": 1.084638659214482e-06, "epoch": 9.15356394129979, "percentage": 91.54, "elapsed_time": "0:53:14", "remaining_time": "0:04:55", "throughput": 3574.47, "total_tokens": 11419944}
|
| 3512 |
+
{"current_steps": 17470, "total_steps": 19080, "loss": 4.485, "lr": 1.0779857523623815e-06, "epoch": 9.156184486373165, "percentage": 91.56, "elapsed_time": "0:53:15", "remaining_time": "0:04:54", "throughput": 3574.5, "total_tokens": 11422728}
|
| 3513 |
+
{"current_steps": 17475, "total_steps": 19080, "loss": 5.3814, "lr": 1.071352862359093e-06, "epoch": 9.15880503144654, "percentage": 91.59, "elapsed_time": "0:53:17", "remaining_time": "0:04:53", "throughput": 3574.96, "total_tokens": 11430984}
|
| 3514 |
+
{"current_steps": 17480, "total_steps": 19080, "loss": 4.9479, "lr": 1.0647399947547127e-06, "epoch": 9.161425576519916, "percentage": 91.61, "elapsed_time": "0:53:18", "remaining_time": "0:04:52", "throughput": 3575.03, "total_tokens": 11433672}
|
| 3515 |
+
{"current_steps": 17485, "total_steps": 19080, "loss": 4.6195, "lr": 1.0581471550825812e-06, "epoch": 9.164046121593291, "percentage": 91.64, "elapsed_time": "0:53:18", "remaining_time": "0:04:51", "throughput": 3575.06, "total_tokens": 11436168}
|
| 3516 |
+
{"current_steps": 17490, "total_steps": 19080, "loss": 4.1144, "lr": 1.0515743488592939e-06, "epoch": 9.166666666666666, "percentage": 91.67, "elapsed_time": "0:53:19", "remaining_time": "0:04:50", "throughput": 3575.19, "total_tokens": 11439528}
|
| 3517 |
+
{"current_steps": 17495, "total_steps": 19080, "loss": 4.5112, "lr": 1.0450215815846736e-06, "epoch": 9.169287211740041, "percentage": 91.69, "elapsed_time": "0:53:20", "remaining_time": "0:04:49", "throughput": 3575.25, "total_tokens": 11442312}
|
| 3518 |
+
{"current_steps": 17500, "total_steps": 19080, "loss": 4.8115, "lr": 1.0384888587417736e-06, "epoch": 9.171907756813416, "percentage": 91.72, "elapsed_time": "0:53:21", "remaining_time": "0:04:49", "throughput": 3575.34, "total_tokens": 11445416}
|
| 3519 |
+
{"current_steps": 17505, "total_steps": 19080, "loss": 4.4227, "lr": 1.0319761857968735e-06, "epoch": 9.174528301886792, "percentage": 91.75, "elapsed_time": "0:53:22", "remaining_time": "0:04:48", "throughput": 3575.47, "total_tokens": 11448712}
|
| 3520 |
+
{"current_steps": 17510, "total_steps": 19080, "loss": 4.5735, "lr": 1.0254835681994895e-06, "epoch": 9.177148846960169, "percentage": 91.77, "elapsed_time": "0:53:22", "remaining_time": "0:04:47", "throughput": 3575.57, "total_tokens": 11452104}
|
| 3521 |
+
{"current_steps": 17515, "total_steps": 19080, "loss": 4.9944, "lr": 1.0190110113823426e-06, "epoch": 9.179769392033544, "percentage": 91.8, "elapsed_time": "0:53:23", "remaining_time": "0:04:46", "throughput": 3575.75, "total_tokens": 11456136}
|
| 3522 |
+
{"current_steps": 17520, "total_steps": 19080, "loss": 4.1644, "lr": 1.0125585207613752e-06, "epoch": 9.182389937106919, "percentage": 91.82, "elapsed_time": "0:53:24", "remaining_time": "0:04:45", "throughput": 3575.85, "total_tokens": 11459272}
|
| 3523 |
+
{"current_steps": 17525, "total_steps": 19080, "loss": 4.7309, "lr": 1.0061261017357327e-06, "epoch": 9.185010482180294, "percentage": 91.85, "elapsed_time": "0:53:25", "remaining_time": "0:04:44", "throughput": 3576.02, "total_tokens": 11463208}
|
| 3524 |
+
{"current_steps": 17530, "total_steps": 19080, "loss": 4.6128, "lr": 9.997137596877732e-07, "epoch": 9.18763102725367, "percentage": 91.88, "elapsed_time": "0:53:26", "remaining_time": "0:04:43", "throughput": 3576.13, "total_tokens": 11466472}
|
| 3525 |
+
{"current_steps": 17535, "total_steps": 19080, "loss": 5.3524, "lr": 9.93321499983052e-07, "epoch": 9.190251572327044, "percentage": 91.9, "elapsed_time": "0:53:27", "remaining_time": "0:04:42", "throughput": 3576.22, "total_tokens": 11469192}
|
| 3526 |
+
{"current_steps": 17540, "total_steps": 19080, "loss": 4.379, "lr": 9.869493279703158e-07, "epoch": 9.19287211740042, "percentage": 91.93, "elapsed_time": "0:53:27", "remaining_time": "0:04:41", "throughput": 3576.29, "total_tokens": 11472232}
|
| 3527 |
+
{"current_steps": 17545, "total_steps": 19080, "loss": 4.436, "lr": 9.805972489815102e-07, "epoch": 9.195492662473795, "percentage": 91.95, "elapsed_time": "0:53:28", "remaining_time": "0:04:40", "throughput": 3576.43, "total_tokens": 11476040}
|
| 3528 |
+
{"current_steps": 17550, "total_steps": 19080, "loss": 4.5329, "lr": 9.742652683317643e-07, "epoch": 9.19811320754717, "percentage": 91.98, "elapsed_time": "0:53:29", "remaining_time": "0:04:39", "throughput": 3576.47, "total_tokens": 11478728}
|
| 3529 |
+
{"current_steps": 17555, "total_steps": 19080, "loss": 4.7362, "lr": 9.679533913193927e-07, "epoch": 9.200733752620545, "percentage": 92.01, "elapsed_time": "0:53:30", "remaining_time": "0:04:38", "throughput": 3576.52, "total_tokens": 11481448}
|
| 3530 |
+
{"current_steps": 17560, "total_steps": 19080, "loss": 4.3449, "lr": 9.61661623225879e-07, "epoch": 9.20335429769392, "percentage": 92.03, "elapsed_time": "0:53:30", "remaining_time": "0:04:37", "throughput": 3576.62, "total_tokens": 11484424}
|
| 3531 |
+
{"current_steps": 17565, "total_steps": 19080, "loss": 4.8636, "lr": 9.553899693158951e-07, "epoch": 9.205974842767295, "percentage": 92.06, "elapsed_time": "0:53:31", "remaining_time": "0:04:37", "throughput": 3576.71, "total_tokens": 11487560}
|
| 3532 |
+
{"current_steps": 17570, "total_steps": 19080, "loss": 5.0432, "lr": 9.491384348372684e-07, "epoch": 9.20859538784067, "percentage": 92.09, "elapsed_time": "0:53:32", "remaining_time": "0:04:36", "throughput": 3576.79, "total_tokens": 11490632}
|
| 3533 |
+
{"current_steps": 17575, "total_steps": 19080, "loss": 4.439, "lr": 9.429070250210004e-07, "epoch": 9.211215932914046, "percentage": 92.11, "elapsed_time": "0:53:33", "remaining_time": "0:04:35", "throughput": 3576.83, "total_tokens": 11493032}
|
| 3534 |
+
{"current_steps": 17580, "total_steps": 19080, "loss": 5.0909, "lr": 9.366957450812535e-07, "epoch": 9.213836477987421, "percentage": 92.14, "elapsed_time": "0:53:33", "remaining_time": "0:04:34", "throughput": 3576.92, "total_tokens": 11496200}
|
| 3535 |
+
{"current_steps": 17585, "total_steps": 19080, "loss": 4.0487, "lr": 9.305046002153345e-07, "epoch": 9.216457023060796, "percentage": 92.16, "elapsed_time": "0:53:34", "remaining_time": "0:04:33", "throughput": 3576.96, "total_tokens": 11498696}
|
| 3536 |
+
{"current_steps": 17590, "total_steps": 19080, "loss": 4.8605, "lr": 9.243335956037186e-07, "epoch": 9.219077568134171, "percentage": 92.19, "elapsed_time": "0:53:35", "remaining_time": "0:04:32", "throughput": 3577.06, "total_tokens": 11502312}
|
| 3537 |
+
{"current_steps": 17595, "total_steps": 19080, "loss": 4.4524, "lr": 9.181827364100171e-07, "epoch": 9.221698113207546, "percentage": 92.22, "elapsed_time": "0:53:36", "remaining_time": "0:04:31", "throughput": 3577.12, "total_tokens": 11505160}
|
| 3538 |
+
{"current_steps": 17600, "total_steps": 19080, "loss": 4.4516, "lr": 9.120520277809852e-07, "epoch": 9.224318658280922, "percentage": 92.24, "elapsed_time": "0:53:37", "remaining_time": "0:04:30", "throughput": 3577.18, "total_tokens": 11508456}
|
| 3539 |
+
{"current_steps": 17605, "total_steps": 19080, "loss": 4.9652, "lr": 9.059414748465278e-07, "epoch": 9.226939203354299, "percentage": 92.27, "elapsed_time": "0:53:38", "remaining_time": "0:04:29", "throughput": 3577.3, "total_tokens": 11511816}
|
| 3540 |
+
{"current_steps": 17610, "total_steps": 19080, "loss": 5.1351, "lr": 8.998510827196715e-07, "epoch": 9.229559748427674, "percentage": 92.3, "elapsed_time": "0:53:38", "remaining_time": "0:04:28", "throughput": 3577.3, "total_tokens": 11514088}
|
| 3541 |
+
{"current_steps": 17615, "total_steps": 19080, "loss": 4.6463, "lr": 8.937808564965733e-07, "epoch": 9.232180293501049, "percentage": 92.32, "elapsed_time": "0:53:39", "remaining_time": "0:04:27", "throughput": 3577.38, "total_tokens": 11517096}
|
| 3542 |
+
{"current_steps": 17620, "total_steps": 19080, "loss": 4.1463, "lr": 8.877308012565339e-07, "epoch": 9.234800838574424, "percentage": 92.35, "elapsed_time": "0:53:40", "remaining_time": "0:04:26", "throughput": 3577.46, "total_tokens": 11520168}
|
| 3543 |
+
{"current_steps": 17625, "total_steps": 19080, "loss": 5.0439, "lr": 8.817009220619482e-07, "epoch": 9.2374213836478, "percentage": 92.37, "elapsed_time": "0:53:41", "remaining_time": "0:04:25", "throughput": 3577.63, "total_tokens": 11523944}
|
| 3544 |
+
{"current_steps": 17630, "total_steps": 19080, "loss": 5.4636, "lr": 8.756912239583554e-07, "epoch": 9.240041928721174, "percentage": 92.4, "elapsed_time": "0:53:42", "remaining_time": "0:04:24", "throughput": 3577.78, "total_tokens": 11527720}
|
| 3545 |
+
{"current_steps": 17635, "total_steps": 19080, "loss": 4.8167, "lr": 8.697017119743911e-07, "epoch": 9.24266247379455, "percentage": 92.43, "elapsed_time": "0:53:42", "remaining_time": "0:04:24", "throughput": 3577.91, "total_tokens": 11531304}
|
| 3546 |
+
{"current_steps": 17640, "total_steps": 19080, "loss": 4.9497, "lr": 8.637323911218048e-07, "epoch": 9.245283018867925, "percentage": 92.45, "elapsed_time": "0:53:43", "remaining_time": "0:04:23", "throughput": 3578.05, "total_tokens": 11535176}
|
| 3547 |
+
{"current_steps": 17645, "total_steps": 19080, "loss": 4.1837, "lr": 8.577832663954538e-07, "epoch": 9.2479035639413, "percentage": 92.48, "elapsed_time": "0:53:44", "remaining_time": "0:04:22", "throughput": 3578.21, "total_tokens": 11539016}
|
| 3548 |
+
{"current_steps": 17650, "total_steps": 19080, "loss": 4.5938, "lr": 8.51854342773295e-07, "epoch": 9.250524109014675, "percentage": 92.51, "elapsed_time": "0:53:45", "remaining_time": "0:04:21", "throughput": 3578.45, "total_tokens": 11543752}
|
| 3549 |
+
{"current_steps": 17655, "total_steps": 19080, "loss": 4.908, "lr": 8.459456252163739e-07, "epoch": 9.25314465408805, "percentage": 92.53, "elapsed_time": "0:53:46", "remaining_time": "0:04:20", "throughput": 3578.52, "total_tokens": 11546664}
|
| 3550 |
+
{"current_steps": 17660, "total_steps": 19080, "loss": 4.61, "lr": 8.400571186688466e-07, "epoch": 9.255765199161425, "percentage": 92.56, "elapsed_time": "0:53:47", "remaining_time": "0:04:19", "throughput": 3578.55, "total_tokens": 11549032}
|
| 3551 |
+
{"current_steps": 17665, "total_steps": 19080, "loss": 5.1138, "lr": 8.341888280579386e-07, "epoch": 9.2583857442348, "percentage": 92.58, "elapsed_time": "0:53:48", "remaining_time": "0:04:18", "throughput": 3578.67, "total_tokens": 11552328}
|
| 3552 |
+
{"current_steps": 17670, "total_steps": 19080, "loss": 4.7185, "lr": 8.283407582939689e-07, "epoch": 9.261006289308176, "percentage": 92.61, "elapsed_time": "0:53:48", "remaining_time": "0:04:17", "throughput": 3578.74, "total_tokens": 11555464}
|
| 3553 |
+
{"current_steps": 17675, "total_steps": 19080, "loss": 4.7221, "lr": 8.22512914270332e-07, "epoch": 9.26362683438155, "percentage": 92.64, "elapsed_time": "0:53:49", "remaining_time": "0:04:16", "throughput": 3578.78, "total_tokens": 11558088}
|
| 3554 |
+
{"current_steps": 17680, "total_steps": 19080, "loss": 4.4545, "lr": 8.167053008635101e-07, "epoch": 9.266247379454926, "percentage": 92.66, "elapsed_time": "0:53:50", "remaining_time": "0:04:15", "throughput": 3578.85, "total_tokens": 11560872}
|
| 3555 |
+
{"current_steps": 17685, "total_steps": 19080, "loss": 4.2644, "lr": 8.109179229330438e-07, "epoch": 9.268867924528301, "percentage": 92.69, "elapsed_time": "0:53:51", "remaining_time": "0:04:14", "throughput": 3578.99, "total_tokens": 11564264}
|
| 3556 |
+
{"current_steps": 17690, "total_steps": 19080, "loss": 5.0274, "lr": 8.051507853215401e-07, "epoch": 9.271488469601676, "percentage": 92.71, "elapsed_time": "0:53:51", "remaining_time": "0:04:13", "throughput": 3579.11, "total_tokens": 11567656}
|
| 3557 |
+
{"current_steps": 17695, "total_steps": 19080, "loss": 4.6356, "lr": 7.994038928546887e-07, "epoch": 9.274109014675052, "percentage": 92.74, "elapsed_time": "0:53:52", "remaining_time": "0:04:13", "throughput": 3579.23, "total_tokens": 11571176}
|
| 3558 |
+
{"current_steps": 17700, "total_steps": 19080, "loss": 4.8219, "lr": 7.93677250341221e-07, "epoch": 9.276729559748428, "percentage": 92.77, "elapsed_time": "0:53:53", "remaining_time": "0:04:12", "throughput": 3579.31, "total_tokens": 11574216}
|
| 3559 |
+
{"current_steps": 17705, "total_steps": 19080, "loss": 5.4325, "lr": 7.879708625729287e-07, "epoch": 9.279350104821804, "percentage": 92.79, "elapsed_time": "0:53:54", "remaining_time": "0:04:11", "throughput": 3579.4, "total_tokens": 11577608}
|
| 3560 |
+
{"current_steps": 17710, "total_steps": 19080, "loss": 5.1345, "lr": 7.822847343246564e-07, "epoch": 9.281970649895179, "percentage": 92.82, "elapsed_time": "0:53:55", "remaining_time": "0:04:10", "throughput": 3579.51, "total_tokens": 11581000}
|
| 3561 |
+
{"current_steps": 17715, "total_steps": 19080, "loss": 4.9794, "lr": 7.766188703542954e-07, "epoch": 9.284591194968554, "percentage": 92.85, "elapsed_time": "0:53:56", "remaining_time": "0:04:09", "throughput": 3579.64, "total_tokens": 11584840}
|
| 3562 |
+
{"current_steps": 17720, "total_steps": 19080, "loss": 4.4785, "lr": 7.709732754027866e-07, "epoch": 9.28721174004193, "percentage": 92.87, "elapsed_time": "0:53:57", "remaining_time": "0:04:08", "throughput": 3579.74, "total_tokens": 11587912}
|
| 3563 |
+
{"current_steps": 17725, "total_steps": 19080, "loss": 4.7322, "lr": 7.653479541941038e-07, "epoch": 9.289832285115304, "percentage": 92.9, "elapsed_time": "0:53:58", "remaining_time": "0:04:07", "throughput": 3579.91, "total_tokens": 11591752}
|
| 3564 |
+
{"current_steps": 17730, "total_steps": 19080, "loss": 4.8923, "lr": 7.597429114352572e-07, "epoch": 9.29245283018868, "percentage": 92.92, "elapsed_time": "0:53:58", "remaining_time": "0:04:06", "throughput": 3579.94, "total_tokens": 11594248}
|
| 3565 |
+
{"current_steps": 17735, "total_steps": 19080, "loss": 4.5835, "lr": 7.541581518162922e-07, "epoch": 9.295073375262055, "percentage": 92.95, "elapsed_time": "0:53:59", "remaining_time": "0:04:05", "throughput": 3580.01, "total_tokens": 11597448}
|
| 3566 |
+
{"current_steps": 17740, "total_steps": 19080, "loss": 4.8804, "lr": 7.485936800102788e-07, "epoch": 9.29769392033543, "percentage": 92.98, "elapsed_time": "0:54:00", "remaining_time": "0:04:04", "throughput": 3580.08, "total_tokens": 11600360}
|
| 3567 |
+
{"current_steps": 17745, "total_steps": 19080, "loss": 4.2884, "lr": 7.430495006733152e-07, "epoch": 9.300314465408805, "percentage": 93.0, "elapsed_time": "0:54:01", "remaining_time": "0:04:03", "throughput": 3580.17, "total_tokens": 11603528}
|
| 3568 |
+
{"current_steps": 17750, "total_steps": 19080, "loss": 4.9263, "lr": 7.375256184445178e-07, "epoch": 9.30293501048218, "percentage": 93.03, "elapsed_time": "0:54:01", "remaining_time": "0:04:02", "throughput": 3580.24, "total_tokens": 11606600}
|
| 3569 |
+
{"current_steps": 17755, "total_steps": 19080, "loss": 4.2094, "lr": 7.320220379460146e-07, "epoch": 9.305555555555555, "percentage": 93.06, "elapsed_time": "0:54:02", "remaining_time": "0:04:01", "throughput": 3580.28, "total_tokens": 11609064}
|
| 3570 |
+
{"current_steps": 17760, "total_steps": 19080, "loss": 4.515, "lr": 7.265387637829524e-07, "epoch": 9.30817610062893, "percentage": 93.08, "elapsed_time": "0:54:03", "remaining_time": "0:04:01", "throughput": 3580.37, "total_tokens": 11612328}
|
| 3571 |
+
{"current_steps": 17765, "total_steps": 19080, "loss": 4.9506, "lr": 7.210758005434887e-07, "epoch": 9.310796645702306, "percentage": 93.11, "elapsed_time": "0:54:04", "remaining_time": "0:04:00", "throughput": 3580.47, "total_tokens": 11615912}
|
| 3572 |
+
{"current_steps": 17770, "total_steps": 19080, "loss": 4.9355, "lr": 7.156331527987753e-07, "epoch": 9.31341719077568, "percentage": 93.13, "elapsed_time": "0:54:04", "remaining_time": "0:03:59", "throughput": 3580.56, "total_tokens": 11618888}
|
| 3573 |
+
{"current_steps": 17775, "total_steps": 19080, "loss": 4.873, "lr": 7.102108251029777e-07, "epoch": 9.316037735849056, "percentage": 93.16, "elapsed_time": "0:54:05", "remaining_time": "0:03:58", "throughput": 3580.6, "total_tokens": 11621544}
|
| 3574 |
+
{"current_steps": 17780, "total_steps": 19080, "loss": 4.7155, "lr": 7.04808821993247e-07, "epoch": 9.318658280922431, "percentage": 93.19, "elapsed_time": "0:54:06", "remaining_time": "0:03:57", "throughput": 3580.63, "total_tokens": 11624040}
|
| 3575 |
+
{"current_steps": 17785, "total_steps": 19080, "loss": 4.9008, "lr": 6.994271479897314e-07, "epoch": 9.321278825995806, "percentage": 93.21, "elapsed_time": "0:54:07", "remaining_time": "0:03:56", "throughput": 3580.68, "total_tokens": 11626728}
|
| 3576 |
+
{"current_steps": 17790, "total_steps": 19080, "loss": 4.632, "lr": 6.940658075955759e-07, "epoch": 9.323899371069182, "percentage": 93.24, "elapsed_time": "0:54:07", "remaining_time": "0:03:55", "throughput": 3580.77, "total_tokens": 11629832}
|
| 3577 |
+
{"current_steps": 17795, "total_steps": 19080, "loss": 4.7675, "lr": 6.887248052969003e-07, "epoch": 9.326519916142558, "percentage": 93.27, "elapsed_time": "0:54:08", "remaining_time": "0:03:54", "throughput": 3580.91, "total_tokens": 11633320}
|
| 3578 |
+
{"current_steps": 17800, "total_steps": 19080, "loss": 4.7699, "lr": 6.834041455628104e-07, "epoch": 9.329140461215934, "percentage": 93.29, "elapsed_time": "0:54:09", "remaining_time": "0:03:53", "throughput": 3580.98, "total_tokens": 11636104}
|
| 3579 |
+
{"current_steps": 17805, "total_steps": 19080, "loss": 4.4308, "lr": 6.781038328454003e-07, "epoch": 9.331761006289309, "percentage": 93.32, "elapsed_time": "0:54:10", "remaining_time": "0:03:52", "throughput": 3581.1, "total_tokens": 11639752}
|
| 3580 |
+
{"current_steps": 17810, "total_steps": 19080, "loss": 4.8927, "lr": 6.728238715797169e-07, "epoch": 9.334381551362684, "percentage": 93.34, "elapsed_time": "0:54:11", "remaining_time": "0:03:51", "throughput": 3581.16, "total_tokens": 11642664}
|
| 3581 |
+
{"current_steps": 17815, "total_steps": 19080, "loss": 5.0511, "lr": 6.675642661838011e-07, "epoch": 9.33700209643606, "percentage": 93.37, "elapsed_time": "0:54:11", "remaining_time": "0:03:50", "throughput": 3581.26, "total_tokens": 11646024}
|
| 3582 |
+
{"current_steps": 17820, "total_steps": 19080, "loss": 4.9122, "lr": 6.623250210586463e-07, "epoch": 9.339622641509434, "percentage": 93.4, "elapsed_time": "0:54:12", "remaining_time": "0:03:49", "throughput": 3581.35, "total_tokens": 11649192}
|
| 3583 |
+
{"current_steps": 17825, "total_steps": 19080, "loss": 5.2047, "lr": 6.571061405882095e-07, "epoch": 9.34224318658281, "percentage": 93.42, "elapsed_time": "0:54:13", "remaining_time": "0:03:49", "throughput": 3581.51, "total_tokens": 11653192}
|
| 3584 |
+
{"current_steps": 17830, "total_steps": 19080, "loss": 4.767, "lr": 6.519076291394172e-07, "epoch": 9.344863731656185, "percentage": 93.45, "elapsed_time": "0:54:14", "remaining_time": "0:03:48", "throughput": 3581.69, "total_tokens": 11657224}
|
| 3585 |
+
{"current_steps": 17835, "total_steps": 19080, "loss": 4.8745, "lr": 6.467294910621452e-07, "epoch": 9.34748427672956, "percentage": 93.47, "elapsed_time": "0:54:15", "remaining_time": "0:03:47", "throughput": 3581.77, "total_tokens": 11660328}
|
| 3586 |
+
{"current_steps": 17840, "total_steps": 19080, "loss": 4.826, "lr": 6.415717306892193e-07, "epoch": 9.350104821802935, "percentage": 93.5, "elapsed_time": "0:54:16", "remaining_time": "0:03:46", "throughput": 3581.86, "total_tokens": 11663400}
|
| 3587 |
+
{"current_steps": 17845, "total_steps": 19080, "loss": 4.678, "lr": 6.364343523364263e-07, "epoch": 9.35272536687631, "percentage": 93.53, "elapsed_time": "0:54:16", "remaining_time": "0:03:45", "throughput": 3581.93, "total_tokens": 11665992}
|
| 3588 |
+
{"current_steps": 17850, "total_steps": 19080, "loss": 4.2315, "lr": 6.313173603024802e-07, "epoch": 9.355345911949685, "percentage": 93.55, "elapsed_time": "0:54:17", "remaining_time": "0:03:44", "throughput": 3582.01, "total_tokens": 11669000}
|
| 3589 |
+
{"current_steps": 17855, "total_steps": 19080, "loss": 4.5663, "lr": 6.262207588690533e-07, "epoch": 9.35796645702306, "percentage": 93.58, "elapsed_time": "0:54:18", "remaining_time": "0:03:43", "throughput": 3582.08, "total_tokens": 11671976}
|
| 3590 |
+
{"current_steps": 17860, "total_steps": 19080, "loss": 4.8545, "lr": 6.211445523007398e-07, "epoch": 9.360587002096436, "percentage": 93.61, "elapsed_time": "0:54:19", "remaining_time": "0:03:42", "throughput": 3582.14, "total_tokens": 11674984}
|
| 3591 |
+
{"current_steps": 17865, "total_steps": 19080, "loss": 4.4912, "lr": 6.160887448450892e-07, "epoch": 9.36320754716981, "percentage": 93.63, "elapsed_time": "0:54:19", "remaining_time": "0:03:41", "throughput": 3582.19, "total_tokens": 11677864}
|
| 3592 |
+
{"current_steps": 17870, "total_steps": 19080, "loss": 4.12, "lr": 6.11053340732562e-07, "epoch": 9.365828092243186, "percentage": 93.66, "elapsed_time": "0:54:20", "remaining_time": "0:03:40", "throughput": 3582.31, "total_tokens": 11681128}
|
| 3593 |
+
{"current_steps": 17875, "total_steps": 19080, "loss": 4.7106, "lr": 6.060383441765544e-07, "epoch": 9.368448637316561, "percentage": 93.68, "elapsed_time": "0:54:21", "remaining_time": "0:03:39", "throughput": 3582.37, "total_tokens": 11683880}
|
| 3594 |
+
{"current_steps": 17880, "total_steps": 19080, "loss": 4.8576, "lr": 6.01043759373393e-07, "epoch": 9.371069182389936, "percentage": 93.71, "elapsed_time": "0:54:22", "remaining_time": "0:03:38", "throughput": 3582.49, "total_tokens": 11687496}
|
| 3595 |
+
{"current_steps": 17885, "total_steps": 19080, "loss": 4.9831, "lr": 5.960695905023128e-07, "epoch": 9.373689727463312, "percentage": 93.74, "elapsed_time": "0:54:23", "remaining_time": "0:03:38", "throughput": 3582.58, "total_tokens": 11690696}
|
| 3596 |
+
{"current_steps": 17890, "total_steps": 19080, "loss": 4.3041, "lr": 5.91115841725473e-07, "epoch": 9.376310272536688, "percentage": 93.76, "elapsed_time": "0:54:23", "remaining_time": "0:03:37", "throughput": 3582.64, "total_tokens": 11693544}
|
| 3597 |
+
{"current_steps": 17895, "total_steps": 19080, "loss": 4.5929, "lr": 5.861825171879415e-07, "epoch": 9.378930817610064, "percentage": 93.79, "elapsed_time": "0:54:24", "remaining_time": "0:03:36", "throughput": 3582.72, "total_tokens": 11696840}
|
| 3598 |
+
{"current_steps": 17900, "total_steps": 19080, "loss": 4.9579, "lr": 5.812696210177021e-07, "epoch": 9.381551362683439, "percentage": 93.82, "elapsed_time": "0:54:25", "remaining_time": "0:03:35", "throughput": 3582.8, "total_tokens": 11700264}
|
| 3599 |
+
{"current_steps": 17905, "total_steps": 19080, "loss": 4.0998, "lr": 5.763771573256415e-07, "epoch": 9.384171907756814, "percentage": 93.84, "elapsed_time": "0:54:26", "remaining_time": "0:03:34", "throughput": 3582.89, "total_tokens": 11703496}
|
| 3600 |
+
{"current_steps": 17910, "total_steps": 19080, "loss": 5.0112, "lr": 5.715051302055491e-07, "epoch": 9.38679245283019, "percentage": 93.87, "elapsed_time": "0:54:27", "remaining_time": "0:03:33", "throughput": 3582.98, "total_tokens": 11706664}
|
| 3601 |
+
{"current_steps": 17915, "total_steps": 19080, "loss": 4.8623, "lr": 5.666535437341108e-07, "epoch": 9.389412997903564, "percentage": 93.89, "elapsed_time": "0:54:28", "remaining_time": "0:03:32", "throughput": 3583.05, "total_tokens": 11709480}
|
| 3602 |
+
{"current_steps": 17920, "total_steps": 19080, "loss": 4.7276, "lr": 5.618224019709212e-07, "epoch": 9.39203354297694, "percentage": 93.92, "elapsed_time": "0:54:28", "remaining_time": "0:03:31", "throughput": 3583.15, "total_tokens": 11712712}
|
| 3603 |
+
{"current_steps": 17925, "total_steps": 19080, "loss": 5.2687, "lr": 5.570117089584548e-07, "epoch": 9.394654088050315, "percentage": 93.95, "elapsed_time": "0:54:29", "remaining_time": "0:03:30", "throughput": 3583.18, "total_tokens": 11715464}
|
| 3604 |
+
{"current_steps": 17930, "total_steps": 19080, "loss": 5.3163, "lr": 5.522214687220751e-07, "epoch": 9.39727463312369, "percentage": 93.97, "elapsed_time": "0:54:30", "remaining_time": "0:03:29", "throughput": 3583.25, "total_tokens": 11718120}
|
| 3605 |
+
{"current_steps": 17935, "total_steps": 19080, "loss": 4.6542, "lr": 5.474516852700451e-07, "epoch": 9.399895178197065, "percentage": 94.0, "elapsed_time": "0:54:31", "remaining_time": "0:03:28", "throughput": 3583.36, "total_tokens": 11721512}
|
| 3606 |
+
{"current_steps": 17940, "total_steps": 19080, "loss": 4.0778, "lr": 5.427023625934946e-07, "epoch": 9.40251572327044, "percentage": 94.03, "elapsed_time": "0:54:31", "remaining_time": "0:03:27", "throughput": 3583.39, "total_tokens": 11724008}
|
| 3607 |
+
{"current_steps": 17945, "total_steps": 19080, "loss": 4.6752, "lr": 5.379735046664419e-07, "epoch": 9.405136268343815, "percentage": 94.05, "elapsed_time": "0:54:32", "remaining_time": "0:03:26", "throughput": 3583.42, "total_tokens": 11726696}
|
| 3608 |
+
{"current_steps": 17950, "total_steps": 19080, "loss": 5.0386, "lr": 5.33265115445783e-07, "epoch": 9.40775681341719, "percentage": 94.08, "elapsed_time": "0:54:33", "remaining_time": "0:03:26", "throughput": 3583.56, "total_tokens": 11730408}
|
| 3609 |
+
{"current_steps": 17955, "total_steps": 19080, "loss": 5.1352, "lr": 5.285771988712746e-07, "epoch": 9.410377358490566, "percentage": 94.1, "elapsed_time": "0:54:34", "remaining_time": "0:03:25", "throughput": 3583.61, "total_tokens": 11733320}
|
| 3610 |
+
{"current_steps": 17960, "total_steps": 19080, "loss": 4.9984, "lr": 5.239097588655595e-07, "epoch": 9.41299790356394, "percentage": 94.13, "elapsed_time": "0:54:35", "remaining_time": "0:03:24", "throughput": 3583.86, "total_tokens": 11738472}
|
| 3611 |
+
{"current_steps": 17965, "total_steps": 19080, "loss": 5.0667, "lr": 5.192627993341359e-07, "epoch": 9.415618448637316, "percentage": 94.16, "elapsed_time": "0:54:36", "remaining_time": "0:03:23", "throughput": 3583.9, "total_tokens": 11741032}
|
| 3612 |
+
{"current_steps": 17970, "total_steps": 19080, "loss": 5.2111, "lr": 5.146363241653657e-07, "epoch": 9.418238993710691, "percentage": 94.18, "elapsed_time": "0:54:36", "remaining_time": "0:03:22", "throughput": 3584.0, "total_tokens": 11744328}
|
| 3613 |
+
{"current_steps": 17975, "total_steps": 19080, "loss": 5.1963, "lr": 5.100303372304716e-07, "epoch": 9.420859538784066, "percentage": 94.21, "elapsed_time": "0:54:37", "remaining_time": "0:03:21", "throughput": 3584.11, "total_tokens": 11747976}
|
| 3614 |
+
{"current_steps": 17980, "total_steps": 19080, "loss": 4.7959, "lr": 5.054448423835373e-07, "epoch": 9.423480083857442, "percentage": 94.23, "elapsed_time": "0:54:38", "remaining_time": "0:03:20", "throughput": 3584.22, "total_tokens": 11751144}
|
| 3615 |
+
{"current_steps": 17985, "total_steps": 19080, "loss": 4.6962, "lr": 5.008798434614908e-07, "epoch": 9.426100628930818, "percentage": 94.26, "elapsed_time": "0:54:39", "remaining_time": "0:03:19", "throughput": 3584.3, "total_tokens": 11754312}
|
| 3616 |
+
{"current_steps": 17990, "total_steps": 19080, "loss": 4.2372, "lr": 4.963353442841156e-07, "epoch": 9.428721174004194, "percentage": 94.29, "elapsed_time": "0:54:40", "remaining_time": "0:03:18", "throughput": 3584.44, "total_tokens": 11757896}
|
| 3617 |
+
{"current_steps": 17995, "total_steps": 19080, "loss": 4.6567, "lr": 4.918113486540393e-07, "epoch": 9.431341719077569, "percentage": 94.31, "elapsed_time": "0:54:41", "remaining_time": "0:03:17", "throughput": 3584.51, "total_tokens": 11760840}
|
| 3618 |
+
{"current_steps": 18000, "total_steps": 19080, "loss": 5.1522, "lr": 4.873078603567421e-07, "epoch": 9.433962264150944, "percentage": 94.34, "elapsed_time": "0:54:41", "remaining_time": "0:03:16", "throughput": 3584.61, "total_tokens": 11764136}
|
| 3619 |
+
{"current_steps": 18005, "total_steps": 19080, "loss": 4.5166, "lr": 4.828248831605292e-07, "epoch": 9.43658280922432, "percentage": 94.37, "elapsed_time": "0:54:42", "remaining_time": "0:03:15", "throughput": 3584.67, "total_tokens": 11766696}
|
| 3620 |
+
{"current_steps": 18010, "total_steps": 19080, "loss": 5.0052, "lr": 4.783624208165554e-07, "epoch": 9.439203354297694, "percentage": 94.39, "elapsed_time": "0:54:43", "remaining_time": "0:03:15", "throughput": 3584.73, "total_tokens": 11769160}
|
| 3621 |
+
{"current_steps": 18015, "total_steps": 19080, "loss": 4.9698, "lr": 4.739204770588035e-07, "epoch": 9.44182389937107, "percentage": 94.42, "elapsed_time": "0:54:43", "remaining_time": "0:03:14", "throughput": 3584.83, "total_tokens": 11772360}
|
| 3622 |
+
{"current_steps": 18020, "total_steps": 19080, "loss": 4.476, "lr": 4.694990556040918e-07, "epoch": 9.444444444444445, "percentage": 94.44, "elapsed_time": "0:54:44", "remaining_time": "0:03:13", "throughput": 3584.89, "total_tokens": 11774984}
|
| 3623 |
+
{"current_steps": 18025, "total_steps": 19080, "loss": 4.7578, "lr": 4.65098160152061e-07, "epoch": 9.44706498951782, "percentage": 94.47, "elapsed_time": "0:54:45", "remaining_time": "0:03:12", "throughput": 3584.93, "total_tokens": 11777928}
|
| 3624 |
+
{"current_steps": 18030, "total_steps": 19080, "loss": 5.0103, "lr": 4.6071779438517924e-07, "epoch": 9.449685534591195, "percentage": 94.5, "elapsed_time": "0:54:46", "remaining_time": "0:03:11", "throughput": 3585.04, "total_tokens": 11781096}
|
| 3625 |
+
{"current_steps": 18035, "total_steps": 19080, "loss": 4.8236, "lr": 4.563579619687369e-07, "epoch": 9.45230607966457, "percentage": 94.52, "elapsed_time": "0:54:46", "remaining_time": "0:03:10", "throughput": 3585.1, "total_tokens": 11784104}
|
| 3626 |
+
{"current_steps": 18040, "total_steps": 19080, "loss": 5.1027, "lr": 4.5201866655084636e-07, "epoch": 9.454926624737945, "percentage": 94.55, "elapsed_time": "0:54:47", "remaining_time": "0:03:09", "throughput": 3585.21, "total_tokens": 11787304}
|
| 3627 |
+
{"current_steps": 18045, "total_steps": 19080, "loss": 4.3268, "lr": 4.4769991176242533e-07, "epoch": 9.45754716981132, "percentage": 94.58, "elapsed_time": "0:54:48", "remaining_time": "0:03:08", "throughput": 3585.25, "total_tokens": 11790024}
|
| 3628 |
+
{"current_steps": 18050, "total_steps": 19080, "loss": 4.7543, "lr": 4.4340170121721645e-07, "epoch": 9.460167714884696, "percentage": 94.6, "elapsed_time": "0:54:49", "remaining_time": "0:03:07", "throughput": 3585.3, "total_tokens": 11792840}
|
| 3629 |
+
{"current_steps": 18055, "total_steps": 19080, "loss": 4.5631, "lr": 4.3912403851176234e-07, "epoch": 9.46278825995807, "percentage": 94.63, "elapsed_time": "0:54:49", "remaining_time": "0:03:06", "throughput": 3585.35, "total_tokens": 11795720}
|
| 3630 |
+
{"current_steps": 18060, "total_steps": 19080, "loss": 4.4589, "lr": 4.348669272254163e-07, "epoch": 9.465408805031446, "percentage": 94.65, "elapsed_time": "0:54:50", "remaining_time": "0:03:05", "throughput": 3585.38, "total_tokens": 11798472}
|
| 3631 |
+
{"current_steps": 18065, "total_steps": 19080, "loss": 4.4908, "lr": 4.306303709203374e-07, "epoch": 9.468029350104821, "percentage": 94.68, "elapsed_time": "0:54:51", "remaining_time": "0:03:04", "throughput": 3585.48, "total_tokens": 11801800}
|
| 3632 |
+
{"current_steps": 18070, "total_steps": 19080, "loss": 4.9893, "lr": 4.264143731414788e-07, "epoch": 9.470649895178196, "percentage": 94.71, "elapsed_time": "0:54:52", "remaining_time": "0:03:04", "throughput": 3585.58, "total_tokens": 11805160}
|
| 3633 |
+
{"current_steps": 18075, "total_steps": 19080, "loss": 5.0055, "lr": 4.2221893741659636e-07, "epoch": 9.473270440251572, "percentage": 94.73, "elapsed_time": "0:54:54", "remaining_time": "0:03:03", "throughput": 3585.84, "total_tokens": 11811816}
|
| 3634 |
+
{"current_steps": 18080, "total_steps": 19080, "loss": 5.0781, "lr": 4.180440672562402e-07, "epoch": 9.475890985324948, "percentage": 94.76, "elapsed_time": "0:54:55", "remaining_time": "0:03:02", "throughput": 3586.02, "total_tokens": 11816040}
|
| 3635 |
+
{"current_steps": 18085, "total_steps": 19080, "loss": 4.8746, "lr": 4.1388976615374665e-07, "epoch": 9.478511530398324, "percentage": 94.79, "elapsed_time": "0:54:55", "remaining_time": "0:03:01", "throughput": 3586.11, "total_tokens": 11818920}
|
| 3636 |
+
{"current_steps": 18090, "total_steps": 19080, "loss": 4.7548, "lr": 4.097560375852516e-07, "epoch": 9.481132075471699, "percentage": 94.81, "elapsed_time": "0:54:56", "remaining_time": "0:03:00", "throughput": 3586.18, "total_tokens": 11821736}
|
| 3637 |
+
{"current_steps": 18095, "total_steps": 19080, "loss": 4.7153, "lr": 4.056428850096661e-07, "epoch": 9.483752620545074, "percentage": 94.84, "elapsed_time": "0:54:57", "remaining_time": "0:02:59", "throughput": 3586.28, "total_tokens": 11825256}
|
| 3638 |
+
{"current_steps": 18100, "total_steps": 19080, "loss": 4.9875, "lr": 4.01550311868687e-07, "epoch": 9.48637316561845, "percentage": 94.86, "elapsed_time": "0:54:58", "remaining_time": "0:02:58", "throughput": 3586.47, "total_tokens": 11829416}
|
| 3639 |
+
{"current_steps": 18105, "total_steps": 19080, "loss": 4.4875, "lr": 3.974783215867972e-07, "epoch": 9.488993710691824, "percentage": 94.89, "elapsed_time": "0:54:59", "remaining_time": "0:02:57", "throughput": 3586.53, "total_tokens": 11832200}
|
| 3640 |
+
{"current_steps": 18110, "total_steps": 19080, "loss": 5.4377, "lr": 3.9342691757124626e-07, "epoch": 9.4916142557652, "percentage": 94.92, "elapsed_time": "0:54:59", "remaining_time": "0:02:56", "throughput": 3586.64, "total_tokens": 11835592}
|
| 3641 |
+
{"current_steps": 18115, "total_steps": 19080, "loss": 5.156, "lr": 3.8939610321206966e-07, "epoch": 9.494234800838575, "percentage": 94.94, "elapsed_time": "0:55:00", "remaining_time": "0:02:55", "throughput": 3586.76, "total_tokens": 11839400}
|
| 3642 |
+
{"current_steps": 18120, "total_steps": 19080, "loss": 5.4055, "lr": 3.853858818820694e-07, "epoch": 9.49685534591195, "percentage": 94.97, "elapsed_time": "0:55:01", "remaining_time": "0:02:54", "throughput": 3586.83, "total_tokens": 11842216}
|
| 3643 |
+
{"current_steps": 18125, "total_steps": 19080, "loss": 4.5529, "lr": 3.8139625693680847e-07, "epoch": 9.499475890985325, "percentage": 94.99, "elapsed_time": "0:55:02", "remaining_time": "0:02:53", "throughput": 3586.89, "total_tokens": 11844936}
|
| 3644 |
+
{"current_steps": 18126, "total_steps": 19080, "eval_loss": 4.819365978240967, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:55:18", "remaining_time": "0:02:54", "throughput": 3569.49, "total_tokens": 11845704}
|
| 3645 |
+
{"current_steps": 18130, "total_steps": 19080, "loss": 5.0405, "lr": 3.774272317146277e-07, "epoch": 9.5020964360587, "percentage": 95.02, "elapsed_time": "0:55:20", "remaining_time": "0:02:54", "throughput": 3567.84, "total_tokens": 11847912}
|
| 3646 |
+
{"current_steps": 18135, "total_steps": 19080, "loss": 4.7468, "lr": 3.7347880953662597e-07, "epoch": 9.504716981132075, "percentage": 95.05, "elapsed_time": "0:55:22", "remaining_time": "0:02:53", "throughput": 3568.09, "total_tokens": 11853192}
|