Training in progress, step 34344
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +382 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 798032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b156c6200cedebb9c7dc55af9637f4fe70f98e002e578f83f8c3c753f0a3b695
|
| 3 |
size 798032
|
trainer_log.jsonl
CHANGED
|
@@ -6505,3 +6505,385 @@
|
|
| 6505 |
{"current_steps": 32440, "total_steps": 38160, "loss": 0.3522, "lr": 3.345935435184408e-06, "epoch": 17.0020964360587, "percentage": 85.01, "elapsed_time": "1:38:02", "remaining_time": "0:17:17", "throughput": 3597.15, "total_tokens": 21160712}
|
| 6506 |
{"current_steps": 32445, "total_steps": 38160, "loss": 0.4398, "lr": 3.3402232754641576e-06, "epoch": 17.004716981132077, "percentage": 85.02, "elapsed_time": "1:38:03", "remaining_time": "0:17:16", "throughput": 3597.06, "total_tokens": 21163528}
|
| 6507 |
{"current_steps": 32450, "total_steps": 38160, "loss": 0.573, "lr": 3.334515646722089e-06, "epoch": 17.00733752620545, "percentage": 85.04, "elapsed_time": "1:38:04", "remaining_time": "0:17:15", "throughput": 3597.18, "total_tokens": 21167304}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6505 |
{"current_steps": 32440, "total_steps": 38160, "loss": 0.3522, "lr": 3.345935435184408e-06, "epoch": 17.0020964360587, "percentage": 85.01, "elapsed_time": "1:38:02", "remaining_time": "0:17:17", "throughput": 3597.15, "total_tokens": 21160712}
|
| 6506 |
{"current_steps": 32445, "total_steps": 38160, "loss": 0.4398, "lr": 3.3402232754641576e-06, "epoch": 17.004716981132077, "percentage": 85.02, "elapsed_time": "1:38:03", "remaining_time": "0:17:16", "throughput": 3597.06, "total_tokens": 21163528}
|
| 6507 |
{"current_steps": 32450, "total_steps": 38160, "loss": 0.573, "lr": 3.334515646722089e-06, "epoch": 17.00733752620545, "percentage": 85.04, "elapsed_time": "1:38:04", "remaining_time": "0:17:15", "throughput": 3597.18, "total_tokens": 21167304}
|
| 6508 |
+
{"current_steps": 32455, "total_steps": 38160, "loss": 0.4873, "lr": 3.32881255015218e-06, "epoch": 17.009958071278827, "percentage": 85.05, "elapsed_time": "1:38:05", "remaining_time": "0:17:14", "throughput": 3597.22, "total_tokens": 21170792}
|
| 6509 |
+
{"current_steps": 32460, "total_steps": 38160, "loss": 0.4323, "lr": 3.3231139869474433e-06, "epoch": 17.0125786163522, "percentage": 85.06, "elapsed_time": "1:38:06", "remaining_time": "0:17:13", "throughput": 3597.31, "total_tokens": 21174408}
|
| 6510 |
+
{"current_steps": 32465, "total_steps": 38160, "loss": 0.5118, "lr": 3.317419958299961e-06, "epoch": 17.015199161425578, "percentage": 85.08, "elapsed_time": "1:38:07", "remaining_time": "0:17:12", "throughput": 3597.4, "total_tokens": 21178248}
|
| 6511 |
+
{"current_steps": 32470, "total_steps": 38160, "loss": 0.3483, "lr": 3.31173046540085e-06, "epoch": 17.01781970649895, "percentage": 85.09, "elapsed_time": "1:38:07", "remaining_time": "0:17:11", "throughput": 3597.38, "total_tokens": 21181064}
|
| 6512 |
+
{"current_steps": 32475, "total_steps": 38160, "loss": 0.5371, "lr": 3.306045509440295e-06, "epoch": 17.020440251572328, "percentage": 85.1, "elapsed_time": "1:38:08", "remaining_time": "0:17:10", "throughput": 3597.42, "total_tokens": 21184296}
|
| 6513 |
+
{"current_steps": 32480, "total_steps": 38160, "loss": 0.5935, "lr": 3.300365091607521e-06, "epoch": 17.0230607966457, "percentage": 85.12, "elapsed_time": "1:38:09", "remaining_time": "0:17:09", "throughput": 3597.42, "total_tokens": 21187176}
|
| 6514 |
+
{"current_steps": 32485, "total_steps": 38160, "loss": 0.3982, "lr": 3.2946892130908127e-06, "epoch": 17.02568134171908, "percentage": 85.13, "elapsed_time": "1:38:10", "remaining_time": "0:17:09", "throughput": 3597.4, "total_tokens": 21189800}
|
| 6515 |
+
{"current_steps": 32490, "total_steps": 38160, "loss": 0.5546, "lr": 3.289017875077488e-06, "epoch": 17.028301886792452, "percentage": 85.14, "elapsed_time": "1:38:11", "remaining_time": "0:17:08", "throughput": 3597.64, "total_tokens": 21196328}
|
| 6516 |
+
{"current_steps": 32495, "total_steps": 38160, "loss": 0.5017, "lr": 3.283351078753938e-06, "epoch": 17.03092243186583, "percentage": 85.15, "elapsed_time": "1:38:12", "remaining_time": "0:17:07", "throughput": 3597.7, "total_tokens": 21199400}
|
| 6517 |
+
{"current_steps": 32500, "total_steps": 38160, "loss": 0.4501, "lr": 3.2776888253055777e-06, "epoch": 17.033542976939202, "percentage": 85.17, "elapsed_time": "1:38:13", "remaining_time": "0:17:06", "throughput": 3597.71, "total_tokens": 21202248}
|
| 6518 |
+
{"current_steps": 32505, "total_steps": 38160, "loss": 0.5686, "lr": 3.272031115916896e-06, "epoch": 17.03616352201258, "percentage": 85.18, "elapsed_time": "1:38:14", "remaining_time": "0:17:05", "throughput": 3597.76, "total_tokens": 21205448}
|
| 6519 |
+
{"current_steps": 32510, "total_steps": 38160, "loss": 0.3927, "lr": 3.2663779517714238e-06, "epoch": 17.038784067085953, "percentage": 85.19, "elapsed_time": "1:38:15", "remaining_time": "0:17:04", "throughput": 3597.89, "total_tokens": 21209576}
|
| 6520 |
+
{"current_steps": 32515, "total_steps": 38160, "loss": 0.5228, "lr": 3.260729334051729e-06, "epoch": 17.04140461215933, "percentage": 85.21, "elapsed_time": "1:38:15", "remaining_time": "0:17:03", "throughput": 3597.94, "total_tokens": 21212904}
|
| 6521 |
+
{"current_steps": 32520, "total_steps": 38160, "loss": 0.4718, "lr": 3.2550852639394446e-06, "epoch": 17.044025157232703, "percentage": 85.22, "elapsed_time": "1:38:16", "remaining_time": "0:17:02", "throughput": 3597.97, "total_tokens": 21216104}
|
| 6522 |
+
{"current_steps": 32525, "total_steps": 38160, "loss": 0.5436, "lr": 3.2494457426152452e-06, "epoch": 17.04664570230608, "percentage": 85.23, "elapsed_time": "1:38:17", "remaining_time": "0:17:01", "throughput": 3598.02, "total_tokens": 21220104}
|
| 6523 |
+
{"current_steps": 32530, "total_steps": 38160, "loss": 0.5516, "lr": 3.243810771258851e-06, "epoch": 17.049266247379453, "percentage": 85.25, "elapsed_time": "1:38:18", "remaining_time": "0:17:00", "throughput": 3598.01, "total_tokens": 21222920}
|
| 6524 |
+
{"current_steps": 32535, "total_steps": 38160, "loss": 0.4719, "lr": 3.2381803510490387e-06, "epoch": 17.05188679245283, "percentage": 85.26, "elapsed_time": "1:38:19", "remaining_time": "0:16:59", "throughput": 3598.05, "total_tokens": 21226408}
|
| 6525 |
+
{"current_steps": 32540, "total_steps": 38160, "loss": 0.5707, "lr": 3.2325544831636217e-06, "epoch": 17.054507337526207, "percentage": 85.27, "elapsed_time": "1:38:20", "remaining_time": "0:16:59", "throughput": 3598.16, "total_tokens": 21230184}
|
| 6526 |
+
{"current_steps": 32545, "total_steps": 38160, "loss": 0.4434, "lr": 3.22693316877947e-06, "epoch": 17.05712788259958, "percentage": 85.29, "elapsed_time": "1:38:21", "remaining_time": "0:16:58", "throughput": 3598.16, "total_tokens": 21233288}
|
| 6527 |
+
{"current_steps": 32550, "total_steps": 38160, "loss": 0.4873, "lr": 3.2213164090725057e-06, "epoch": 17.059748427672957, "percentage": 85.3, "elapsed_time": "1:38:21", "remaining_time": "0:16:57", "throughput": 3598.1, "total_tokens": 21235720}
|
| 6528 |
+
{"current_steps": 32555, "total_steps": 38160, "loss": 0.5171, "lr": 3.2157042052176763e-06, "epoch": 17.06236897274633, "percentage": 85.31, "elapsed_time": "1:38:22", "remaining_time": "0:16:56", "throughput": 3598.08, "total_tokens": 21238408}
|
| 6529 |
+
{"current_steps": 32560, "total_steps": 38160, "loss": 0.4237, "lr": 3.2100965583890003e-06, "epoch": 17.064989517819708, "percentage": 85.32, "elapsed_time": "1:38:23", "remaining_time": "0:16:55", "throughput": 3598.26, "total_tokens": 21242632}
|
| 6530 |
+
{"current_steps": 32565, "total_steps": 38160, "loss": 0.4397, "lr": 3.204493469759537e-06, "epoch": 17.06761006289308, "percentage": 85.34, "elapsed_time": "1:38:24", "remaining_time": "0:16:54", "throughput": 3598.31, "total_tokens": 21246056}
|
| 6531 |
+
{"current_steps": 32570, "total_steps": 38160, "loss": 0.6085, "lr": 3.1988949405013764e-06, "epoch": 17.070230607966458, "percentage": 85.35, "elapsed_time": "1:38:25", "remaining_time": "0:16:53", "throughput": 3598.25, "total_tokens": 21249032}
|
| 6532 |
+
{"current_steps": 32575, "total_steps": 38160, "loss": 0.4659, "lr": 3.193300971785676e-06, "epoch": 17.07285115303983, "percentage": 85.36, "elapsed_time": "1:38:26", "remaining_time": "0:16:52", "throughput": 3598.21, "total_tokens": 21251816}
|
| 6533 |
+
{"current_steps": 32580, "total_steps": 38160, "loss": 0.4104, "lr": 3.1877115647826335e-06, "epoch": 17.07547169811321, "percentage": 85.38, "elapsed_time": "1:38:27", "remaining_time": "0:16:51", "throughput": 3598.33, "total_tokens": 21256200}
|
| 6534 |
+
{"current_steps": 32585, "total_steps": 38160, "loss": 0.4841, "lr": 3.1821267206614797e-06, "epoch": 17.078092243186582, "percentage": 85.39, "elapsed_time": "1:38:28", "remaining_time": "0:16:50", "throughput": 3598.41, "total_tokens": 21260392}
|
| 6535 |
+
{"current_steps": 32590, "total_steps": 38160, "loss": 0.5239, "lr": 3.176546440590508e-06, "epoch": 17.08071278825996, "percentage": 85.4, "elapsed_time": "1:38:29", "remaining_time": "0:16:49", "throughput": 3598.46, "total_tokens": 21264296}
|
| 6536 |
+
{"current_steps": 32595, "total_steps": 38160, "loss": 0.5133, "lr": 3.170970725737041e-06, "epoch": 17.083333333333332, "percentage": 85.42, "elapsed_time": "1:38:30", "remaining_time": "0:16:49", "throughput": 3598.51, "total_tokens": 21267848}
|
| 6537 |
+
{"current_steps": 32600, "total_steps": 38160, "loss": 0.4057, "lr": 3.165399577267461e-06, "epoch": 17.08595387840671, "percentage": 85.43, "elapsed_time": "1:38:31", "remaining_time": "0:16:48", "throughput": 3598.63, "total_tokens": 21271560}
|
| 6538 |
+
{"current_steps": 32605, "total_steps": 38160, "loss": 0.5204, "lr": 3.159832996347184e-06, "epoch": 17.088574423480082, "percentage": 85.44, "elapsed_time": "1:38:31", "remaining_time": "0:16:47", "throughput": 3598.67, "total_tokens": 21274760}
|
| 6539 |
+
{"current_steps": 32610, "total_steps": 38160, "loss": 0.4791, "lr": 3.1542709841406865e-06, "epoch": 17.09119496855346, "percentage": 85.46, "elapsed_time": "1:38:32", "remaining_time": "0:16:46", "throughput": 3598.58, "total_tokens": 21277352}
|
| 6540 |
+
{"current_steps": 32615, "total_steps": 38160, "loss": 0.6631, "lr": 3.148713541811468e-06, "epoch": 17.093815513626833, "percentage": 85.47, "elapsed_time": "1:38:33", "remaining_time": "0:16:45", "throughput": 3598.53, "total_tokens": 21280168}
|
| 6541 |
+
{"current_steps": 32620, "total_steps": 38160, "loss": 0.4825, "lr": 3.1431606705220803e-06, "epoch": 17.09643605870021, "percentage": 85.48, "elapsed_time": "1:38:34", "remaining_time": "0:16:44", "throughput": 3598.61, "total_tokens": 21283688}
|
| 6542 |
+
{"current_steps": 32625, "total_steps": 38160, "loss": 0.4855, "lr": 3.1376123714341234e-06, "epoch": 17.099056603773583, "percentage": 85.5, "elapsed_time": "1:38:35", "remaining_time": "0:16:43", "throughput": 3598.64, "total_tokens": 21286920}
|
| 6543 |
+
{"current_steps": 32630, "total_steps": 38160, "loss": 0.4949, "lr": 3.1320686457082387e-06, "epoch": 17.10167714884696, "percentage": 85.51, "elapsed_time": "1:38:36", "remaining_time": "0:16:42", "throughput": 3598.73, "total_tokens": 21290600}
|
| 6544 |
+
{"current_steps": 32635, "total_steps": 38160, "loss": 0.5186, "lr": 3.126529494504113e-06, "epoch": 17.104297693920337, "percentage": 85.52, "elapsed_time": "1:38:36", "remaining_time": "0:16:41", "throughput": 3598.66, "total_tokens": 21293032}
|
| 6545 |
+
{"current_steps": 32640, "total_steps": 38160, "loss": 0.5242, "lr": 3.1209949189804744e-06, "epoch": 17.10691823899371, "percentage": 85.53, "elapsed_time": "1:38:37", "remaining_time": "0:16:40", "throughput": 3598.61, "total_tokens": 21295976}
|
| 6546 |
+
{"current_steps": 32645, "total_steps": 38160, "loss": 0.4951, "lr": 3.115464920295089e-06, "epoch": 17.109538784067087, "percentage": 85.55, "elapsed_time": "1:38:38", "remaining_time": "0:16:39", "throughput": 3598.67, "total_tokens": 21299816}
|
| 6547 |
+
{"current_steps": 32650, "total_steps": 38160, "loss": 0.4755, "lr": 3.1099394996047666e-06, "epoch": 17.11215932914046, "percentage": 85.56, "elapsed_time": "1:38:39", "remaining_time": "0:16:38", "throughput": 3598.64, "total_tokens": 21302696}
|
| 6548 |
+
{"current_steps": 32655, "total_steps": 38160, "loss": 0.4744, "lr": 3.104418658065367e-06, "epoch": 17.114779874213838, "percentage": 85.57, "elapsed_time": "1:38:40", "remaining_time": "0:16:38", "throughput": 3598.53, "total_tokens": 21305288}
|
| 6549 |
+
{"current_steps": 32660, "total_steps": 38160, "loss": 0.4163, "lr": 3.098902396831782e-06, "epoch": 17.11740041928721, "percentage": 85.59, "elapsed_time": "1:38:41", "remaining_time": "0:16:37", "throughput": 3598.57, "total_tokens": 21308808}
|
| 6550 |
+
{"current_steps": 32665, "total_steps": 38160, "loss": 0.4723, "lr": 3.0933907170579646e-06, "epoch": 17.120020964360588, "percentage": 85.6, "elapsed_time": "1:38:42", "remaining_time": "0:16:36", "throughput": 3598.61, "total_tokens": 21312008}
|
| 6551 |
+
{"current_steps": 32670, "total_steps": 38160, "loss": 0.4619, "lr": 3.0878836198968835e-06, "epoch": 17.12264150943396, "percentage": 85.61, "elapsed_time": "1:38:43", "remaining_time": "0:16:35", "throughput": 3598.42, "total_tokens": 21314280}
|
| 6552 |
+
{"current_steps": 32675, "total_steps": 38160, "loss": 0.4534, "lr": 3.0823811065005564e-06, "epoch": 17.12526205450734, "percentage": 85.63, "elapsed_time": "1:38:44", "remaining_time": "0:16:34", "throughput": 3598.31, "total_tokens": 21316808}
|
| 6553 |
+
{"current_steps": 32680, "total_steps": 38160, "loss": 0.487, "lr": 3.0768831780200542e-06, "epoch": 17.127882599580712, "percentage": 85.64, "elapsed_time": "1:38:45", "remaining_time": "0:16:33", "throughput": 3598.4, "total_tokens": 21320712}
|
| 6554 |
+
{"current_steps": 32685, "total_steps": 38160, "loss": 0.4411, "lr": 3.071389835605476e-06, "epoch": 17.13050314465409, "percentage": 85.65, "elapsed_time": "1:38:45", "remaining_time": "0:16:32", "throughput": 3598.55, "total_tokens": 21324968}
|
| 6555 |
+
{"current_steps": 32690, "total_steps": 38160, "loss": 0.4907, "lr": 3.065901080405978e-06, "epoch": 17.133123689727462, "percentage": 85.67, "elapsed_time": "1:38:46", "remaining_time": "0:16:31", "throughput": 3598.57, "total_tokens": 21328360}
|
| 6556 |
+
{"current_steps": 32695, "total_steps": 38160, "loss": 0.5046, "lr": 3.060416913569733e-06, "epoch": 17.13574423480084, "percentage": 85.68, "elapsed_time": "1:38:47", "remaining_time": "0:16:30", "throughput": 3598.57, "total_tokens": 21331336}
|
| 6557 |
+
{"current_steps": 32700, "total_steps": 38160, "loss": 0.6091, "lr": 3.054937336243968e-06, "epoch": 17.138364779874212, "percentage": 85.69, "elapsed_time": "1:38:48", "remaining_time": "0:16:29", "throughput": 3598.58, "total_tokens": 21334344}
|
| 6558 |
+
{"current_steps": 32705, "total_steps": 38160, "loss": 0.5072, "lr": 3.049462349574947e-06, "epoch": 17.14098532494759, "percentage": 85.7, "elapsed_time": "1:38:49", "remaining_time": "0:16:29", "throughput": 3598.82, "total_tokens": 21340360}
|
| 6559 |
+
{"current_steps": 32710, "total_steps": 38160, "loss": 0.4603, "lr": 3.043991954707981e-06, "epoch": 17.143605870020963, "percentage": 85.72, "elapsed_time": "1:38:50", "remaining_time": "0:16:28", "throughput": 3598.82, "total_tokens": 21343176}
|
| 6560 |
+
{"current_steps": 32715, "total_steps": 38160, "loss": 0.4718, "lr": 3.0385261527874075e-06, "epoch": 17.14622641509434, "percentage": 85.73, "elapsed_time": "1:38:51", "remaining_time": "0:16:27", "throughput": 3598.82, "total_tokens": 21346024}
|
| 6561 |
+
{"current_steps": 32720, "total_steps": 38160, "loss": 0.4518, "lr": 3.0330649449566205e-06, "epoch": 17.148846960167713, "percentage": 85.74, "elapsed_time": "1:38:52", "remaining_time": "0:16:26", "throughput": 3598.92, "total_tokens": 21350120}
|
| 6562 |
+
{"current_steps": 32725, "total_steps": 38160, "loss": 0.5962, "lr": 3.027608332358034e-06, "epoch": 17.15146750524109, "percentage": 85.76, "elapsed_time": "1:38:53", "remaining_time": "0:16:25", "throughput": 3598.98, "total_tokens": 21353608}
|
| 6563 |
+
{"current_steps": 32730, "total_steps": 38160, "loss": 0.4793, "lr": 3.022156316133104e-06, "epoch": 17.154088050314467, "percentage": 85.77, "elapsed_time": "1:38:54", "remaining_time": "0:16:24", "throughput": 3599.17, "total_tokens": 21358376}
|
| 6564 |
+
{"current_steps": 32735, "total_steps": 38160, "loss": 0.4201, "lr": 3.016708897422335e-06, "epoch": 17.15670859538784, "percentage": 85.78, "elapsed_time": "1:38:55", "remaining_time": "0:16:23", "throughput": 3599.16, "total_tokens": 21361128}
|
| 6565 |
+
{"current_steps": 32740, "total_steps": 38160, "loss": 0.4895, "lr": 3.0112660773652685e-06, "epoch": 17.159329140461217, "percentage": 85.8, "elapsed_time": "1:38:55", "remaining_time": "0:16:22", "throughput": 3599.26, "total_tokens": 21364584}
|
| 6566 |
+
{"current_steps": 32745, "total_steps": 38160, "loss": 0.4631, "lr": 3.005827857100482e-06, "epoch": 17.16194968553459, "percentage": 85.81, "elapsed_time": "1:38:56", "remaining_time": "0:16:21", "throughput": 3599.33, "total_tokens": 21368168}
|
| 6567 |
+
{"current_steps": 32750, "total_steps": 38160, "loss": 0.4135, "lr": 3.000394237765586e-06, "epoch": 17.164570230607968, "percentage": 85.82, "elapsed_time": "1:38:57", "remaining_time": "0:16:20", "throughput": 3599.38, "total_tokens": 21371752}
|
| 6568 |
+
{"current_steps": 32755, "total_steps": 38160, "loss": 0.5783, "lr": 2.9949652204972254e-06, "epoch": 17.16719077568134, "percentage": 85.84, "elapsed_time": "1:38:58", "remaining_time": "0:16:19", "throughput": 3599.4, "total_tokens": 21374632}
|
| 6569 |
+
{"current_steps": 32760, "total_steps": 38160, "loss": 0.4884, "lr": 2.989540806431093e-06, "epoch": 17.169811320754718, "percentage": 85.85, "elapsed_time": "1:38:59", "remaining_time": "0:16:18", "throughput": 3599.4, "total_tokens": 21377608}
|
| 6570 |
+
{"current_steps": 32765, "total_steps": 38160, "loss": 0.539, "lr": 2.984120996701917e-06, "epoch": 17.17243186582809, "percentage": 85.86, "elapsed_time": "1:39:00", "remaining_time": "0:16:18", "throughput": 3599.48, "total_tokens": 21381192}
|
| 6571 |
+
{"current_steps": 32770, "total_steps": 38160, "loss": 0.6583, "lr": 2.9787057924434618e-06, "epoch": 17.17505241090147, "percentage": 85.88, "elapsed_time": "1:39:00", "remaining_time": "0:16:17", "throughput": 3599.51, "total_tokens": 21384168}
|
| 6572 |
+
{"current_steps": 32775, "total_steps": 38160, "loss": 0.4712, "lr": 2.9732951947885166e-06, "epoch": 17.177672955974842, "percentage": 85.89, "elapsed_time": "1:39:01", "remaining_time": "0:16:16", "throughput": 3599.48, "total_tokens": 21387240}
|
| 6573 |
+
{"current_steps": 32780, "total_steps": 38160, "loss": 0.424, "lr": 2.967889204868929e-06, "epoch": 17.18029350104822, "percentage": 85.9, "elapsed_time": "1:39:02", "remaining_time": "0:16:15", "throughput": 3599.46, "total_tokens": 21390216}
|
| 6574 |
+
{"current_steps": 32785, "total_steps": 38160, "loss": 0.4879, "lr": 2.96248782381556e-06, "epoch": 17.182914046121592, "percentage": 85.91, "elapsed_time": "1:39:03", "remaining_time": "0:16:14", "throughput": 3599.53, "total_tokens": 21393544}
|
| 6575 |
+
{"current_steps": 32790, "total_steps": 38160, "loss": 0.4914, "lr": 2.957091052758321e-06, "epoch": 17.18553459119497, "percentage": 85.93, "elapsed_time": "1:39:04", "remaining_time": "0:16:13", "throughput": 3599.6, "total_tokens": 21397288}
|
| 6576 |
+
{"current_steps": 32795, "total_steps": 38160, "loss": 0.4078, "lr": 2.9516988928261517e-06, "epoch": 17.188155136268342, "percentage": 85.94, "elapsed_time": "1:39:05", "remaining_time": "0:16:12", "throughput": 3599.57, "total_tokens": 21399912}
|
| 6577 |
+
{"current_steps": 32800, "total_steps": 38160, "loss": 0.4594, "lr": 2.9463113451470403e-06, "epoch": 17.19077568134172, "percentage": 85.95, "elapsed_time": "1:39:05", "remaining_time": "0:16:11", "throughput": 3599.51, "total_tokens": 21402472}
|
| 6578 |
+
{"current_steps": 32805, "total_steps": 38160, "loss": 0.4221, "lr": 2.94092841084799e-06, "epoch": 17.193396226415093, "percentage": 85.97, "elapsed_time": "1:39:06", "remaining_time": "0:16:10", "throughput": 3599.51, "total_tokens": 21405352}
|
| 6579 |
+
{"current_steps": 32810, "total_steps": 38160, "loss": 0.4218, "lr": 2.9355500910550586e-06, "epoch": 17.19601677148847, "percentage": 85.98, "elapsed_time": "1:39:07", "remaining_time": "0:16:09", "throughput": 3599.47, "total_tokens": 21408008}
|
| 6580 |
+
{"current_steps": 32815, "total_steps": 38160, "loss": 0.5928, "lr": 2.9301763868933157e-06, "epoch": 17.198637316561843, "percentage": 85.99, "elapsed_time": "1:39:08", "remaining_time": "0:16:08", "throughput": 3599.52, "total_tokens": 21411880}
|
| 6581 |
+
{"current_steps": 32820, "total_steps": 38160, "loss": 0.4633, "lr": 2.9248072994868896e-06, "epoch": 17.20125786163522, "percentage": 86.01, "elapsed_time": "1:39:09", "remaining_time": "0:16:08", "throughput": 3599.4, "total_tokens": 21414504}
|
| 6582 |
+
{"current_steps": 32825, "total_steps": 38160, "loss": 0.4472, "lr": 2.919442829958932e-06, "epoch": 17.203878406708597, "percentage": 86.02, "elapsed_time": "1:39:10", "remaining_time": "0:16:07", "throughput": 3599.37, "total_tokens": 21417608}
|
| 6583 |
+
{"current_steps": 32830, "total_steps": 38160, "loss": 0.5183, "lr": 2.9140829794316234e-06, "epoch": 17.20649895178197, "percentage": 86.03, "elapsed_time": "1:39:11", "remaining_time": "0:16:06", "throughput": 3599.4, "total_tokens": 21420712}
|
| 6584 |
+
{"current_steps": 32835, "total_steps": 38160, "loss": 0.6786, "lr": 2.908727749026191e-06, "epoch": 17.209119496855347, "percentage": 86.05, "elapsed_time": "1:39:11", "remaining_time": "0:16:05", "throughput": 3599.38, "total_tokens": 21423400}
|
| 6585 |
+
{"current_steps": 32840, "total_steps": 38160, "loss": 0.5395, "lr": 2.9033771398628756e-06, "epoch": 17.21174004192872, "percentage": 86.06, "elapsed_time": "1:39:12", "remaining_time": "0:16:04", "throughput": 3599.42, "total_tokens": 21426888}
|
| 6586 |
+
{"current_steps": 32845, "total_steps": 38160, "loss": 0.507, "lr": 2.898031153060973e-06, "epoch": 17.214360587002098, "percentage": 86.07, "elapsed_time": "1:39:13", "remaining_time": "0:16:03", "throughput": 3599.43, "total_tokens": 21429736}
|
| 6587 |
+
{"current_steps": 32850, "total_steps": 38160, "loss": 0.502, "lr": 2.8926897897388033e-06, "epoch": 17.21698113207547, "percentage": 86.08, "elapsed_time": "1:39:14", "remaining_time": "0:16:02", "throughput": 3599.53, "total_tokens": 21433320}
|
| 6588 |
+
{"current_steps": 32855, "total_steps": 38160, "loss": 0.4289, "lr": 2.8873530510137136e-06, "epoch": 17.219601677148848, "percentage": 86.1, "elapsed_time": "1:39:15", "remaining_time": "0:16:01", "throughput": 3599.58, "total_tokens": 21436584}
|
| 6589 |
+
{"current_steps": 32860, "total_steps": 38160, "loss": 0.598, "lr": 2.8820209380020892e-06, "epoch": 17.22222222222222, "percentage": 86.11, "elapsed_time": "1:39:16", "remaining_time": "0:16:00", "throughput": 3599.59, "total_tokens": 21439528}
|
| 6590 |
+
{"current_steps": 32865, "total_steps": 38160, "loss": 0.4365, "lr": 2.8766934518193566e-06, "epoch": 17.2248427672956, "percentage": 86.12, "elapsed_time": "1:39:16", "remaining_time": "0:15:59", "throughput": 3599.65, "total_tokens": 21442824}
|
| 6591 |
+
{"current_steps": 32870, "total_steps": 38160, "loss": 0.5414, "lr": 2.87137059357995e-06, "epoch": 17.22746331236897, "percentage": 86.14, "elapsed_time": "1:39:17", "remaining_time": "0:15:58", "throughput": 3599.67, "total_tokens": 21446024}
|
| 6592 |
+
{"current_steps": 32875, "total_steps": 38160, "loss": 0.3994, "lr": 2.8660523643973618e-06, "epoch": 17.23008385744235, "percentage": 86.15, "elapsed_time": "1:39:18", "remaining_time": "0:15:57", "throughput": 3599.61, "total_tokens": 21448744}
|
| 6593 |
+
{"current_steps": 32880, "total_steps": 38160, "loss": 0.5104, "lr": 2.860738765384105e-06, "epoch": 17.232704402515722, "percentage": 86.16, "elapsed_time": "1:39:19", "remaining_time": "0:15:56", "throughput": 3599.6, "total_tokens": 21451688}
|
| 6594 |
+
{"current_steps": 32885, "total_steps": 38160, "loss": 0.5131, "lr": 2.8554297976517154e-06, "epoch": 17.2353249475891, "percentage": 86.18, "elapsed_time": "1:39:20", "remaining_time": "0:15:56", "throughput": 3599.48, "total_tokens": 21453864}
|
| 6595 |
+
{"current_steps": 32890, "total_steps": 38160, "loss": 0.5059, "lr": 2.8501254623107707e-06, "epoch": 17.237945492662472, "percentage": 86.19, "elapsed_time": "1:39:21", "remaining_time": "0:15:55", "throughput": 3599.51, "total_tokens": 21456936}
|
| 6596 |
+
{"current_steps": 32895, "total_steps": 38160, "loss": 0.4502, "lr": 2.84482576047089e-06, "epoch": 17.24056603773585, "percentage": 86.2, "elapsed_time": "1:39:21", "remaining_time": "0:15:54", "throughput": 3599.59, "total_tokens": 21460424}
|
| 6597 |
+
{"current_steps": 32900, "total_steps": 38160, "loss": 0.4898, "lr": 2.839530693240694e-06, "epoch": 17.243186582809223, "percentage": 86.22, "elapsed_time": "1:39:22", "remaining_time": "0:15:53", "throughput": 3599.54, "total_tokens": 21463016}
|
| 6598 |
+
{"current_steps": 32905, "total_steps": 38160, "loss": 0.4411, "lr": 2.8342402617278606e-06, "epoch": 17.2458071278826, "percentage": 86.23, "elapsed_time": "1:39:23", "remaining_time": "0:15:52", "throughput": 3599.65, "total_tokens": 21466632}
|
| 6599 |
+
{"current_steps": 32910, "total_steps": 38160, "loss": 0.4835, "lr": 2.82895446703908e-06, "epoch": 17.248427672955973, "percentage": 86.24, "elapsed_time": "1:39:24", "remaining_time": "0:15:51", "throughput": 3599.62, "total_tokens": 21469544}
|
| 6600 |
+
{"current_steps": 32915, "total_steps": 38160, "loss": 0.4119, "lr": 2.823673310280084e-06, "epoch": 17.25104821802935, "percentage": 86.26, "elapsed_time": "1:39:25", "remaining_time": "0:15:50", "throughput": 3599.54, "total_tokens": 21472040}
|
| 6601 |
+
{"current_steps": 32920, "total_steps": 38160, "loss": 0.404, "lr": 2.8183967925556305e-06, "epoch": 17.253668763102727, "percentage": 86.27, "elapsed_time": "1:39:26", "remaining_time": "0:15:49", "throughput": 3599.66, "total_tokens": 21475784}
|
| 6602 |
+
{"current_steps": 32925, "total_steps": 38160, "loss": 0.4212, "lr": 2.8131249149695115e-06, "epoch": 17.2562893081761, "percentage": 86.28, "elapsed_time": "1:39:27", "remaining_time": "0:15:48", "throughput": 3599.7, "total_tokens": 21479656}
|
| 6603 |
+
{"current_steps": 32930, "total_steps": 38160, "loss": 0.4358, "lr": 2.8078576786245397e-06, "epoch": 17.258909853249477, "percentage": 86.29, "elapsed_time": "1:39:27", "remaining_time": "0:15:47", "throughput": 3599.73, "total_tokens": 21482824}
|
| 6604 |
+
{"current_steps": 32935, "total_steps": 38160, "loss": 0.4492, "lr": 2.8025950846225557e-06, "epoch": 17.26153039832285, "percentage": 86.31, "elapsed_time": "1:39:28", "remaining_time": "0:15:46", "throughput": 3599.73, "total_tokens": 21485672}
|
| 6605 |
+
{"current_steps": 32940, "total_steps": 38160, "loss": 0.437, "lr": 2.7973371340644373e-06, "epoch": 17.264150943396228, "percentage": 86.32, "elapsed_time": "1:39:29", "remaining_time": "0:15:45", "throughput": 3599.73, "total_tokens": 21488616}
|
| 6606 |
+
{"current_steps": 32945, "total_steps": 38160, "loss": 0.4407, "lr": 2.7920838280500907e-06, "epoch": 17.2667714884696, "percentage": 86.33, "elapsed_time": "1:39:30", "remaining_time": "0:15:45", "throughput": 3599.64, "total_tokens": 21490952}
|
| 6607 |
+
{"current_steps": 32950, "total_steps": 38160, "loss": 0.4713, "lr": 2.7868351676784504e-06, "epoch": 17.269392033542978, "percentage": 86.35, "elapsed_time": "1:39:31", "remaining_time": "0:15:44", "throughput": 3599.6, "total_tokens": 21493832}
|
| 6608 |
+
{"current_steps": 32955, "total_steps": 38160, "loss": 0.4671, "lr": 2.7815911540474665e-06, "epoch": 17.27201257861635, "percentage": 86.36, "elapsed_time": "1:39:32", "remaining_time": "0:15:43", "throughput": 3599.48, "total_tokens": 21496136}
|
| 6609 |
+
{"current_steps": 32960, "total_steps": 38160, "loss": 0.5205, "lr": 2.7763517882541364e-06, "epoch": 17.27463312368973, "percentage": 86.37, "elapsed_time": "1:39:32", "remaining_time": "0:15:42", "throughput": 3599.48, "total_tokens": 21499432}
|
| 6610 |
+
{"current_steps": 32965, "total_steps": 38160, "loss": 0.6076, "lr": 2.771117071394466e-06, "epoch": 17.2772536687631, "percentage": 86.39, "elapsed_time": "1:39:33", "remaining_time": "0:15:41", "throughput": 3599.46, "total_tokens": 21502472}
|
| 6611 |
+
{"current_steps": 32970, "total_steps": 38160, "loss": 0.4077, "lr": 2.7658870045635055e-06, "epoch": 17.27987421383648, "percentage": 86.4, "elapsed_time": "1:39:34", "remaining_time": "0:15:40", "throughput": 3599.55, "total_tokens": 21506088}
|
| 6612 |
+
{"current_steps": 32975, "total_steps": 38160, "loss": 0.4359, "lr": 2.7606615888553234e-06, "epoch": 17.282494758909852, "percentage": 86.41, "elapsed_time": "1:39:35", "remaining_time": "0:15:39", "throughput": 3599.52, "total_tokens": 21508904}
|
| 6613 |
+
{"current_steps": 32980, "total_steps": 38160, "loss": 0.4101, "lr": 2.755440825363023e-06, "epoch": 17.28511530398323, "percentage": 86.43, "elapsed_time": "1:39:36", "remaining_time": "0:15:38", "throughput": 3599.41, "total_tokens": 21511624}
|
| 6614 |
+
{"current_steps": 32985, "total_steps": 38160, "loss": 0.4075, "lr": 2.75022471517872e-06, "epoch": 17.287735849056602, "percentage": 86.44, "elapsed_time": "1:39:37", "remaining_time": "0:15:37", "throughput": 3599.37, "total_tokens": 21514504}
|
| 6615 |
+
{"current_steps": 32990, "total_steps": 38160, "loss": 0.5743, "lr": 2.745013259393564e-06, "epoch": 17.29035639412998, "percentage": 86.45, "elapsed_time": "1:39:38", "remaining_time": "0:15:36", "throughput": 3599.32, "total_tokens": 21517256}
|
| 6616 |
+
{"current_steps": 32995, "total_steps": 38160, "loss": 0.4372, "lr": 2.739806459097735e-06, "epoch": 17.292976939203353, "percentage": 86.46, "elapsed_time": "1:39:38", "remaining_time": "0:15:35", "throughput": 3599.38, "total_tokens": 21520648}
|
| 6617 |
+
{"current_steps": 33000, "total_steps": 38160, "loss": 0.4252, "lr": 2.7346043153804403e-06, "epoch": 17.29559748427673, "percentage": 86.48, "elapsed_time": "1:39:39", "remaining_time": "0:15:35", "throughput": 3599.4, "total_tokens": 21523688}
|
| 6618 |
+
{"current_steps": 33005, "total_steps": 38160, "loss": 0.5022, "lr": 2.729406829329903e-06, "epoch": 17.298218029350103, "percentage": 86.49, "elapsed_time": "1:39:40", "remaining_time": "0:15:34", "throughput": 3599.48, "total_tokens": 21527016}
|
| 6619 |
+
{"current_steps": 33010, "total_steps": 38160, "loss": 0.4201, "lr": 2.72421400203339e-06, "epoch": 17.30083857442348, "percentage": 86.5, "elapsed_time": "1:39:41", "remaining_time": "0:15:33", "throughput": 3599.55, "total_tokens": 21530408}
|
| 6620 |
+
{"current_steps": 33015, "total_steps": 38160, "loss": 0.5594, "lr": 2.719025834577163e-06, "epoch": 17.303459119496857, "percentage": 86.52, "elapsed_time": "1:39:42", "remaining_time": "0:15:32", "throughput": 3599.56, "total_tokens": 21533288}
|
| 6621 |
+
{"current_steps": 33020, "total_steps": 38160, "loss": 0.4528, "lr": 2.713842328046537e-06, "epoch": 17.30607966457023, "percentage": 86.53, "elapsed_time": "1:39:43", "remaining_time": "0:15:31", "throughput": 3599.59, "total_tokens": 21536520}
|
| 6622 |
+
{"current_steps": 33025, "total_steps": 38160, "loss": 0.5409, "lr": 2.7086634835258416e-06, "epoch": 17.308700209643607, "percentage": 86.54, "elapsed_time": "1:39:43", "remaining_time": "0:15:30", "throughput": 3599.56, "total_tokens": 21539304}
|
| 6623 |
+
{"current_steps": 33030, "total_steps": 38160, "loss": 0.4324, "lr": 2.7034893020984333e-06, "epoch": 17.31132075471698, "percentage": 86.56, "elapsed_time": "1:39:44", "remaining_time": "0:15:29", "throughput": 3599.59, "total_tokens": 21542472}
|
| 6624 |
+
{"current_steps": 33035, "total_steps": 38160, "loss": 0.5162, "lr": 2.6983197848466952e-06, "epoch": 17.313941299790358, "percentage": 86.57, "elapsed_time": "1:39:45", "remaining_time": "0:15:28", "throughput": 3599.57, "total_tokens": 21545736}
|
| 6625 |
+
{"current_steps": 33040, "total_steps": 38160, "loss": 0.5722, "lr": 2.6931549328520265e-06, "epoch": 17.31656184486373, "percentage": 86.58, "elapsed_time": "1:39:46", "remaining_time": "0:15:27", "throughput": 3599.57, "total_tokens": 21548744}
|
| 6626 |
+
{"current_steps": 33045, "total_steps": 38160, "loss": 0.6005, "lr": 2.6879947471948523e-06, "epoch": 17.319182389937108, "percentage": 86.6, "elapsed_time": "1:39:47", "remaining_time": "0:15:26", "throughput": 3599.57, "total_tokens": 21551592}
|
| 6627 |
+
{"current_steps": 33050, "total_steps": 38160, "loss": 0.4786, "lr": 2.6828392289546285e-06, "epoch": 17.32180293501048, "percentage": 86.61, "elapsed_time": "1:39:48", "remaining_time": "0:15:25", "throughput": 3599.49, "total_tokens": 21554216}
|
| 6628 |
+
{"current_steps": 33055, "total_steps": 38160, "loss": 0.3486, "lr": 2.677688379209828e-06, "epoch": 17.32442348008386, "percentage": 86.62, "elapsed_time": "1:39:48", "remaining_time": "0:15:24", "throughput": 3599.44, "total_tokens": 21556776}
|
| 6629 |
+
{"current_steps": 33060, "total_steps": 38160, "loss": 0.5042, "lr": 2.6725421990379586e-06, "epoch": 17.32704402515723, "percentage": 86.64, "elapsed_time": "1:39:49", "remaining_time": "0:15:24", "throughput": 3599.44, "total_tokens": 21559976}
|
| 6630 |
+
{"current_steps": 33065, "total_steps": 38160, "loss": 0.5009, "lr": 2.667400689515537e-06, "epoch": 17.32966457023061, "percentage": 86.65, "elapsed_time": "1:39:50", "remaining_time": "0:15:23", "throughput": 3599.44, "total_tokens": 21562792}
|
| 6631 |
+
{"current_steps": 33070, "total_steps": 38160, "loss": 0.5848, "lr": 2.6622638517181e-06, "epoch": 17.332285115303982, "percentage": 86.66, "elapsed_time": "1:39:51", "remaining_time": "0:15:22", "throughput": 3599.46, "total_tokens": 21565736}
|
| 6632 |
+
{"current_steps": 33075, "total_steps": 38160, "loss": 0.3852, "lr": 2.657131686720224e-06, "epoch": 17.33490566037736, "percentage": 86.67, "elapsed_time": "1:39:52", "remaining_time": "0:15:21", "throughput": 3599.59, "total_tokens": 21569864}
|
| 6633 |
+
{"current_steps": 33080, "total_steps": 38160, "loss": 0.3087, "lr": 2.652004195595498e-06, "epoch": 17.337526205450732, "percentage": 86.69, "elapsed_time": "1:39:53", "remaining_time": "0:15:20", "throughput": 3599.58, "total_tokens": 21572776}
|
| 6634 |
+
{"current_steps": 33085, "total_steps": 38160, "loss": 0.651, "lr": 2.6468813794165356e-06, "epoch": 17.34014675052411, "percentage": 86.7, "elapsed_time": "1:39:54", "remaining_time": "0:15:19", "throughput": 3599.69, "total_tokens": 21576584}
|
| 6635 |
+
{"current_steps": 33090, "total_steps": 38160, "loss": 0.4693, "lr": 2.641763239254977e-06, "epoch": 17.342767295597483, "percentage": 86.71, "elapsed_time": "1:39:54", "remaining_time": "0:15:18", "throughput": 3599.68, "total_tokens": 21579496}
|
| 6636 |
+
{"current_steps": 33095, "total_steps": 38160, "loss": 0.574, "lr": 2.6366497761814713e-06, "epoch": 17.34538784067086, "percentage": 86.73, "elapsed_time": "1:39:55", "remaining_time": "0:15:17", "throughput": 3599.64, "total_tokens": 21582120}
|
| 6637 |
+
{"current_steps": 33100, "total_steps": 38160, "loss": 0.5489, "lr": 2.631540991265691e-06, "epoch": 17.348008385744233, "percentage": 86.74, "elapsed_time": "1:39:56", "remaining_time": "0:15:16", "throughput": 3599.62, "total_tokens": 21585224}
|
| 6638 |
+
{"current_steps": 33105, "total_steps": 38160, "loss": 0.3733, "lr": 2.6264368855763443e-06, "epoch": 17.35062893081761, "percentage": 86.75, "elapsed_time": "1:39:57", "remaining_time": "0:15:15", "throughput": 3599.62, "total_tokens": 21588264}
|
| 6639 |
+
{"current_steps": 33110, "total_steps": 38160, "loss": 0.5491, "lr": 2.6213374601811504e-06, "epoch": 17.353249475890987, "percentage": 86.77, "elapsed_time": "1:39:58", "remaining_time": "0:15:14", "throughput": 3599.66, "total_tokens": 21591464}
|
| 6640 |
+
{"current_steps": 33115, "total_steps": 38160, "loss": 0.4755, "lr": 2.616242716146855e-06, "epoch": 17.35587002096436, "percentage": 86.78, "elapsed_time": "1:39:58", "remaining_time": "0:15:13", "throughput": 3599.62, "total_tokens": 21594024}
|
| 6641 |
+
{"current_steps": 33120, "total_steps": 38160, "loss": 0.5546, "lr": 2.6111526545392145e-06, "epoch": 17.358490566037737, "percentage": 86.79, "elapsed_time": "1:39:59", "remaining_time": "0:15:13", "throughput": 3599.56, "total_tokens": 21596776}
|
| 6642 |
+
{"current_steps": 33125, "total_steps": 38160, "loss": 0.3488, "lr": 2.6060672764230106e-06, "epoch": 17.36111111111111, "percentage": 86.81, "elapsed_time": "1:40:00", "remaining_time": "0:15:12", "throughput": 3599.64, "total_tokens": 21600392}
|
| 6643 |
+
{"current_steps": 33130, "total_steps": 38160, "loss": 0.6513, "lr": 2.600986582862047e-06, "epoch": 17.363731656184488, "percentage": 86.82, "elapsed_time": "1:40:01", "remaining_time": "0:15:11", "throughput": 3599.69, "total_tokens": 21603880}
|
| 6644 |
+
{"current_steps": 33135, "total_steps": 38160, "loss": 0.4904, "lr": 2.5959105749191498e-06, "epoch": 17.36635220125786, "percentage": 86.83, "elapsed_time": "1:40:02", "remaining_time": "0:15:10", "throughput": 3599.63, "total_tokens": 21606600}
|
| 6645 |
+
{"current_steps": 33140, "total_steps": 38160, "loss": 0.5663, "lr": 2.590839253656166e-06, "epoch": 17.368972746331238, "percentage": 86.84, "elapsed_time": "1:40:03", "remaining_time": "0:15:09", "throughput": 3599.75, "total_tokens": 21610856}
|
| 6646 |
+
{"current_steps": 33145, "total_steps": 38160, "loss": 0.4115, "lr": 2.5857726201339473e-06, "epoch": 17.37159329140461, "percentage": 86.86, "elapsed_time": "1:40:04", "remaining_time": "0:15:08", "throughput": 3599.88, "total_tokens": 21615016}
|
| 6647 |
+
{"current_steps": 33150, "total_steps": 38160, "loss": 0.5147, "lr": 2.5807106754123873e-06, "epoch": 17.37421383647799, "percentage": 86.87, "elapsed_time": "1:40:05", "remaining_time": "0:15:07", "throughput": 3599.96, "total_tokens": 21618920}
|
| 6648 |
+
{"current_steps": 33155, "total_steps": 38160, "loss": 0.3616, "lr": 2.575653420550378e-06, "epoch": 17.37683438155136, "percentage": 86.88, "elapsed_time": "1:40:06", "remaining_time": "0:15:06", "throughput": 3599.99, "total_tokens": 21621960}
|
| 6649 |
+
{"current_steps": 33160, "total_steps": 38160, "loss": 0.4921, "lr": 2.57060085660584e-06, "epoch": 17.37945492662474, "percentage": 86.9, "elapsed_time": "1:40:06", "remaining_time": "0:15:05", "throughput": 3599.94, "total_tokens": 21624584}
|
| 6650 |
+
{"current_steps": 33165, "total_steps": 38160, "loss": 0.451, "lr": 2.565552984635716e-06, "epoch": 17.382075471698112, "percentage": 86.91, "elapsed_time": "1:40:07", "remaining_time": "0:15:04", "throughput": 3600.05, "total_tokens": 21628200}
|
| 6651 |
+
{"current_steps": 33170, "total_steps": 38160, "loss": 0.3943, "lr": 2.5605098056959652e-06, "epoch": 17.38469601677149, "percentage": 86.92, "elapsed_time": "1:40:08", "remaining_time": "0:15:03", "throughput": 3600.01, "total_tokens": 21630792}
|
| 6652 |
+
{"current_steps": 33175, "total_steps": 38160, "loss": 0.5641, "lr": 2.555471320841557e-06, "epoch": 17.387316561844862, "percentage": 86.94, "elapsed_time": "1:40:09", "remaining_time": "0:15:02", "throughput": 3599.96, "total_tokens": 21633480}
|
| 6653 |
+
{"current_steps": 33180, "total_steps": 38160, "loss": 0.4654, "lr": 2.55043753112649e-06, "epoch": 17.38993710691824, "percentage": 86.95, "elapsed_time": "1:40:10", "remaining_time": "0:15:02", "throughput": 3599.99, "total_tokens": 21636584}
|
| 6654 |
+
{"current_steps": 33185, "total_steps": 38160, "loss": 0.4981, "lr": 2.5454084376037717e-06, "epoch": 17.392557651991613, "percentage": 86.96, "elapsed_time": "1:40:10", "remaining_time": "0:15:01", "throughput": 3600.01, "total_tokens": 21639624}
|
| 6655 |
+
{"current_steps": 33190, "total_steps": 38160, "loss": 0.6054, "lr": 2.5403840413254304e-06, "epoch": 17.39517819706499, "percentage": 86.98, "elapsed_time": "1:40:11", "remaining_time": "0:15:00", "throughput": 3599.98, "total_tokens": 21642248}
|
| 6656 |
+
{"current_steps": 33195, "total_steps": 38160, "loss": 0.6262, "lr": 2.5353643433425196e-06, "epoch": 17.397798742138363, "percentage": 86.99, "elapsed_time": "1:40:12", "remaining_time": "0:14:59", "throughput": 3600.0, "total_tokens": 21645576}
|
| 6657 |
+
{"current_steps": 33200, "total_steps": 38160, "loss": 0.5517, "lr": 2.5303493447050965e-06, "epoch": 17.40041928721174, "percentage": 87.0, "elapsed_time": "1:40:13", "remaining_time": "0:14:58", "throughput": 3599.93, "total_tokens": 21648712}
|
| 6658 |
+
{"current_steps": 33205, "total_steps": 38160, "loss": 0.5286, "lr": 2.525339046462244e-06, "epoch": 17.403039832285117, "percentage": 87.02, "elapsed_time": "1:40:14", "remaining_time": "0:14:57", "throughput": 3599.85, "total_tokens": 21651624}
|
| 6659 |
+
{"current_steps": 33210, "total_steps": 38160, "loss": 0.5743, "lr": 2.5203334496620544e-06, "epoch": 17.40566037735849, "percentage": 87.03, "elapsed_time": "1:40:15", "remaining_time": "0:14:56", "throughput": 3599.97, "total_tokens": 21655624}
|
| 6660 |
+
{"current_steps": 33215, "total_steps": 38160, "loss": 0.5223, "lr": 2.515332555351646e-06, "epoch": 17.408280922431867, "percentage": 87.04, "elapsed_time": "1:40:16", "remaining_time": "0:14:55", "throughput": 3599.96, "total_tokens": 21658632}
|
| 6661 |
+
{"current_steps": 33220, "total_steps": 38160, "loss": 0.3889, "lr": 2.510336364577154e-06, "epoch": 17.41090146750524, "percentage": 87.05, "elapsed_time": "1:40:17", "remaining_time": "0:14:54", "throughput": 3600.0, "total_tokens": 21661736}
|
| 6662 |
+
{"current_steps": 33225, "total_steps": 38160, "loss": 0.5215, "lr": 2.5053448783837143e-06, "epoch": 17.413522012578618, "percentage": 87.07, "elapsed_time": "1:40:18", "remaining_time": "0:14:53", "throughput": 3599.99, "total_tokens": 21664776}
|
| 6663 |
+
{"current_steps": 33230, "total_steps": 38160, "loss": 0.4628, "lr": 2.5003580978154917e-06, "epoch": 17.41614255765199, "percentage": 87.08, "elapsed_time": "1:40:18", "remaining_time": "0:14:52", "throughput": 3599.99, "total_tokens": 21667752}
|
| 6664 |
+
{"current_steps": 33235, "total_steps": 38160, "loss": 0.5853, "lr": 2.4953760239156716e-06, "epoch": 17.418763102725368, "percentage": 87.09, "elapsed_time": "1:40:19", "remaining_time": "0:14:52", "throughput": 3600.13, "total_tokens": 21672168}
|
| 6665 |
+
{"current_steps": 33240, "total_steps": 38160, "loss": 0.5654, "lr": 2.4903986577264366e-06, "epoch": 17.42138364779874, "percentage": 87.11, "elapsed_time": "1:40:20", "remaining_time": "0:14:51", "throughput": 3600.04, "total_tokens": 21674472}
|
| 6666 |
+
{"current_steps": 33245, "total_steps": 38160, "loss": 0.7179, "lr": 2.4854260002889986e-06, "epoch": 17.42400419287212, "percentage": 87.12, "elapsed_time": "1:40:21", "remaining_time": "0:14:50", "throughput": 3600.05, "total_tokens": 21677608}
|
| 6667 |
+
{"current_steps": 33250, "total_steps": 38160, "loss": 0.4742, "lr": 2.480458052643586e-06, "epoch": 17.42662473794549, "percentage": 87.13, "elapsed_time": "1:40:22", "remaining_time": "0:14:49", "throughput": 3600.05, "total_tokens": 21680552}
|
| 6668 |
+
{"current_steps": 33255, "total_steps": 38160, "loss": 0.5419, "lr": 2.4754948158294293e-06, "epoch": 17.42924528301887, "percentage": 87.15, "elapsed_time": "1:40:23", "remaining_time": "0:14:48", "throughput": 3599.86, "total_tokens": 21682696}
|
| 6669 |
+
{"current_steps": 33260, "total_steps": 38160, "loss": 0.5415, "lr": 2.4705362908847814e-06, "epoch": 17.431865828092242, "percentage": 87.16, "elapsed_time": "1:40:24", "remaining_time": "0:14:47", "throughput": 3599.97, "total_tokens": 21686920}
|
| 6670 |
+
{"current_steps": 33265, "total_steps": 38160, "loss": 0.3544, "lr": 2.4655824788469172e-06, "epoch": 17.43448637316562, "percentage": 87.17, "elapsed_time": "1:40:25", "remaining_time": "0:14:46", "throughput": 3599.9, "total_tokens": 21689416}
|
| 6671 |
+
{"current_steps": 33270, "total_steps": 38160, "loss": 0.3189, "lr": 2.4606333807521087e-06, "epoch": 17.437106918238992, "percentage": 87.19, "elapsed_time": "1:40:25", "remaining_time": "0:14:45", "throughput": 3600.03, "total_tokens": 21693480}
|
| 6672 |
+
{"current_steps": 33275, "total_steps": 38160, "loss": 0.4898, "lr": 2.455688997635658e-06, "epoch": 17.43972746331237, "percentage": 87.2, "elapsed_time": "1:40:26", "remaining_time": "0:14:44", "throughput": 3600.0, "total_tokens": 21696296}
|
| 6673 |
+
{"current_steps": 33280, "total_steps": 38160, "loss": 0.3999, "lr": 2.4507493305318652e-06, "epoch": 17.442348008385743, "percentage": 87.21, "elapsed_time": "1:40:28", "remaining_time": "0:14:43", "throughput": 3600.09, "total_tokens": 21702088}
|
| 6674 |
+
{"current_steps": 33285, "total_steps": 38160, "loss": 0.5512, "lr": 2.445814380474057e-06, "epoch": 17.44496855345912, "percentage": 87.22, "elapsed_time": "1:40:29", "remaining_time": "0:14:43", "throughput": 3600.12, "total_tokens": 21705160}
|
| 6675 |
+
{"current_steps": 33290, "total_steps": 38160, "loss": 0.52, "lr": 2.4408841484945684e-06, "epoch": 17.447589098532493, "percentage": 87.24, "elapsed_time": "1:40:29", "remaining_time": "0:14:42", "throughput": 3600.17, "total_tokens": 21708328}
|
| 6676 |
+
{"current_steps": 33295, "total_steps": 38160, "loss": 0.6114, "lr": 2.435958635624755e-06, "epoch": 17.45020964360587, "percentage": 87.25, "elapsed_time": "1:40:30", "remaining_time": "0:14:41", "throughput": 3600.41, "total_tokens": 21713768}
|
| 6677 |
+
{"current_steps": 33300, "total_steps": 38160, "loss": 0.46, "lr": 2.4310378428949678e-06, "epoch": 17.452830188679247, "percentage": 87.26, "elapsed_time": "1:40:31", "remaining_time": "0:14:40", "throughput": 3600.45, "total_tokens": 21717192}
|
| 6678 |
+
{"current_steps": 33305, "total_steps": 38160, "loss": 0.5117, "lr": 2.4261217713345804e-06, "epoch": 17.45545073375262, "percentage": 87.28, "elapsed_time": "1:40:32", "remaining_time": "0:14:39", "throughput": 3600.54, "total_tokens": 21721384}
|
| 6679 |
+
{"current_steps": 33310, "total_steps": 38160, "loss": 0.4571, "lr": 2.421210421971981e-06, "epoch": 17.458071278825997, "percentage": 87.29, "elapsed_time": "1:40:33", "remaining_time": "0:14:38", "throughput": 3600.55, "total_tokens": 21724392}
|
| 6680 |
+
{"current_steps": 33315, "total_steps": 38160, "loss": 0.5663, "lr": 2.416303795834571e-06, "epoch": 17.46069182389937, "percentage": 87.3, "elapsed_time": "1:40:34", "remaining_time": "0:14:37", "throughput": 3600.51, "total_tokens": 21727208}
|
| 6681 |
+
{"current_steps": 33320, "total_steps": 38160, "loss": 0.4108, "lr": 2.4114018939487615e-06, "epoch": 17.463312368972748, "percentage": 87.32, "elapsed_time": "1:40:35", "remaining_time": "0:14:36", "throughput": 3600.48, "total_tokens": 21729960}
|
| 6682 |
+
{"current_steps": 33325, "total_steps": 38160, "loss": 0.4723, "lr": 2.4065047173399662e-06, "epoch": 17.46593291404612, "percentage": 87.33, "elapsed_time": "1:40:36", "remaining_time": "0:14:35", "throughput": 3600.63, "total_tokens": 21734216}
|
| 6683 |
+
{"current_steps": 33330, "total_steps": 38160, "loss": 0.605, "lr": 2.4016122670326295e-06, "epoch": 17.468553459119498, "percentage": 87.34, "elapsed_time": "1:40:37", "remaining_time": "0:14:34", "throughput": 3600.7, "total_tokens": 21738152}
|
| 6684 |
+
{"current_steps": 33335, "total_steps": 38160, "loss": 0.5456, "lr": 2.396724544050186e-06, "epoch": 17.47117400419287, "percentage": 87.36, "elapsed_time": "1:40:38", "remaining_time": "0:14:33", "throughput": 3600.72, "total_tokens": 21741576}
|
| 6685 |
+
{"current_steps": 33340, "total_steps": 38160, "loss": 0.3893, "lr": 2.3918415494150944e-06, "epoch": 17.47379454926625, "percentage": 87.37, "elapsed_time": "1:40:39", "remaining_time": "0:14:33", "throughput": 3600.79, "total_tokens": 21745224}
|
| 6686 |
+
{"current_steps": 33345, "total_steps": 38160, "loss": 0.6033, "lr": 2.3869632841488245e-06, "epoch": 17.47641509433962, "percentage": 87.38, "elapsed_time": "1:40:39", "remaining_time": "0:14:32", "throughput": 3600.81, "total_tokens": 21748424}
|
| 6687 |
+
{"current_steps": 33350, "total_steps": 38160, "loss": 0.4866, "lr": 2.382089749271857e-06, "epoch": 17.479035639413, "percentage": 87.4, "elapsed_time": "1:40:40", "remaining_time": "0:14:31", "throughput": 3600.77, "total_tokens": 21751336}
|
| 6688 |
+
{"current_steps": 33355, "total_steps": 38160, "loss": 0.4127, "lr": 2.3772209458036737e-06, "epoch": 17.481656184486372, "percentage": 87.41, "elapsed_time": "1:40:41", "remaining_time": "0:14:30", "throughput": 3600.77, "total_tokens": 21754568}
|
| 6689 |
+
{"current_steps": 33360, "total_steps": 38160, "loss": 0.4299, "lr": 2.372356874762771e-06, "epoch": 17.48427672955975, "percentage": 87.42, "elapsed_time": "1:40:42", "remaining_time": "0:14:29", "throughput": 3600.75, "total_tokens": 21757832}
|
| 6690 |
+
{"current_steps": 33365, "total_steps": 38160, "loss": 0.3885, "lr": 2.36749753716666e-06, "epoch": 17.486897274633122, "percentage": 87.43, "elapsed_time": "1:40:43", "remaining_time": "0:14:28", "throughput": 3600.75, "total_tokens": 21760968}
|
| 6691 |
+
{"current_steps": 33370, "total_steps": 38160, "loss": 0.4008, "lr": 2.3626429340318613e-06, "epoch": 17.4895178197065, "percentage": 87.45, "elapsed_time": "1:40:44", "remaining_time": "0:14:27", "throughput": 3600.84, "total_tokens": 21765160}
|
| 6692 |
+
{"current_steps": 33375, "total_steps": 38160, "loss": 0.6543, "lr": 2.357793066373901e-06, "epoch": 17.492138364779873, "percentage": 87.46, "elapsed_time": "1:40:45", "remaining_time": "0:14:26", "throughput": 3600.86, "total_tokens": 21768584}
|
| 6693 |
+
{"current_steps": 33380, "total_steps": 38160, "loss": 0.4788, "lr": 2.352947935207317e-06, "epoch": 17.49475890985325, "percentage": 87.47, "elapsed_time": "1:40:46", "remaining_time": "0:14:25", "throughput": 3600.83, "total_tokens": 21771304}
|
| 6694 |
+
{"current_steps": 33385, "total_steps": 38160, "loss": 0.5827, "lr": 2.3481075415456495e-06, "epoch": 17.497379454926623, "percentage": 87.49, "elapsed_time": "1:40:47", "remaining_time": "0:14:24", "throughput": 3600.86, "total_tokens": 21774568}
|
| 6695 |
+
{"current_steps": 33390, "total_steps": 38160, "loss": 0.5791, "lr": 2.3432718864014576e-06, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "1:40:47", "remaining_time": "0:14:23", "throughput": 3600.88, "total_tokens": 21777608}
|
| 6696 |
+
{"current_steps": 33395, "total_steps": 38160, "loss": 0.4255, "lr": 2.338440970786304e-06, "epoch": 17.502620545073377, "percentage": 87.51, "elapsed_time": "1:40:48", "remaining_time": "0:14:23", "throughput": 3600.97, "total_tokens": 21781288}
|
| 6697 |
+
{"current_steps": 33400, "total_steps": 38160, "loss": 0.5877, "lr": 2.3336147957107644e-06, "epoch": 17.50524109014675, "percentage": 87.53, "elapsed_time": "1:40:49", "remaining_time": "0:14:22", "throughput": 3600.94, "total_tokens": 21784168}
|
| 6698 |
+
{"current_steps": 33405, "total_steps": 38160, "loss": 0.5042, "lr": 2.3287933621844227e-06, "epoch": 17.507861635220127, "percentage": 87.54, "elapsed_time": "1:40:50", "remaining_time": "0:14:21", "throughput": 3600.9, "total_tokens": 21786760}
|
| 6699 |
+
{"current_steps": 33410, "total_steps": 38160, "loss": 0.445, "lr": 2.3239766712158633e-06, "epoch": 17.5104821802935, "percentage": 87.55, "elapsed_time": "1:40:51", "remaining_time": "0:14:20", "throughput": 3600.86, "total_tokens": 21789352}
|
| 6700 |
+
{"current_steps": 33415, "total_steps": 38160, "loss": 0.5253, "lr": 2.319164723812675e-06, "epoch": 17.513102725366878, "percentage": 87.57, "elapsed_time": "1:40:51", "remaining_time": "0:14:19", "throughput": 3600.86, "total_tokens": 21792296}
|
| 6701 |
+
{"current_steps": 33420, "total_steps": 38160, "loss": 0.4326, "lr": 2.314357520981472e-06, "epoch": 17.51572327044025, "percentage": 87.58, "elapsed_time": "1:40:52", "remaining_time": "0:14:18", "throughput": 3600.76, "total_tokens": 21794824}
|
| 6702 |
+
{"current_steps": 33425, "total_steps": 38160, "loss": 0.4891, "lr": 2.309555063727864e-06, "epoch": 17.518343815513628, "percentage": 87.59, "elapsed_time": "1:40:53", "remaining_time": "0:14:17", "throughput": 3600.79, "total_tokens": 21797928}
|
| 6703 |
+
{"current_steps": 33430, "total_steps": 38160, "loss": 0.6761, "lr": 2.304757353056475e-06, "epoch": 17.520964360587, "percentage": 87.6, "elapsed_time": "1:40:54", "remaining_time": "0:14:16", "throughput": 3600.83, "total_tokens": 21801352}
|
| 6704 |
+
{"current_steps": 33435, "total_steps": 38160, "loss": 0.4508, "lr": 2.2999643899709267e-06, "epoch": 17.52358490566038, "percentage": 87.62, "elapsed_time": "1:40:55", "remaining_time": "0:14:15", "throughput": 3600.96, "total_tokens": 21805768}
|
| 6705 |
+
{"current_steps": 33440, "total_steps": 38160, "loss": 0.3899, "lr": 2.2951761754738456e-06, "epoch": 17.52620545073375, "percentage": 87.63, "elapsed_time": "1:40:56", "remaining_time": "0:14:14", "throughput": 3600.96, "total_tokens": 21808648}
|
| 6706 |
+
{"current_steps": 33445, "total_steps": 38160, "loss": 0.5373, "lr": 2.29039271056688e-06, "epoch": 17.52882599580713, "percentage": 87.64, "elapsed_time": "1:40:57", "remaining_time": "0:14:13", "throughput": 3600.88, "total_tokens": 21811080}
|
| 6707 |
+
{"current_steps": 33450, "total_steps": 38160, "loss": 0.3959, "lr": 2.285613996250674e-06, "epoch": 17.531446540880502, "percentage": 87.66, "elapsed_time": "1:40:58", "remaining_time": "0:14:13", "throughput": 3600.92, "total_tokens": 21814568}
|
| 6708 |
+
{"current_steps": 33455, "total_steps": 38160, "loss": 0.4666, "lr": 2.2808400335248865e-06, "epoch": 17.53406708595388, "percentage": 87.67, "elapsed_time": "1:40:58", "remaining_time": "0:14:12", "throughput": 3600.9, "total_tokens": 21817416}
|
| 6709 |
+
{"current_steps": 33460, "total_steps": 38160, "loss": 0.4622, "lr": 2.2760708233881654e-06, "epoch": 17.536687631027252, "percentage": 87.68, "elapsed_time": "1:40:59", "remaining_time": "0:14:11", "throughput": 3600.91, "total_tokens": 21820648}
|
| 6710 |
+
{"current_steps": 33465, "total_steps": 38160, "loss": 0.5378, "lr": 2.271306366838183e-06, "epoch": 17.53930817610063, "percentage": 87.7, "elapsed_time": "1:41:00", "remaining_time": "0:14:10", "throughput": 3600.89, "total_tokens": 21823560}
|
| 6711 |
+
{"current_steps": 33470, "total_steps": 38160, "loss": 0.5766, "lr": 2.266546664871602e-06, "epoch": 17.541928721174003, "percentage": 87.71, "elapsed_time": "1:41:01", "remaining_time": "0:14:09", "throughput": 3600.98, "total_tokens": 21827208}
|
| 6712 |
+
{"current_steps": 33475, "total_steps": 38160, "loss": 0.5802, "lr": 2.261791718484102e-06, "epoch": 17.54454926624738, "percentage": 87.72, "elapsed_time": "1:41:02", "remaining_time": "0:14:08", "throughput": 3600.96, "total_tokens": 21830120}
|
| 6713 |
+
{"current_steps": 33480, "total_steps": 38160, "loss": 0.4967, "lr": 2.257041528670362e-06, "epoch": 17.547169811320753, "percentage": 87.74, "elapsed_time": "1:41:03", "remaining_time": "0:14:07", "throughput": 3600.95, "total_tokens": 21833064}
|
| 6714 |
+
{"current_steps": 33485, "total_steps": 38160, "loss": 0.5094, "lr": 2.2522960964240753e-06, "epoch": 17.54979035639413, "percentage": 87.75, "elapsed_time": "1:41:03", "remaining_time": "0:14:06", "throughput": 3600.93, "total_tokens": 21835752}
|
| 6715 |
+
{"current_steps": 33490, "total_steps": 38160, "loss": 0.6131, "lr": 2.247555422737924e-06, "epoch": 17.552410901467507, "percentage": 87.76, "elapsed_time": "1:41:04", "remaining_time": "0:14:05", "throughput": 3600.91, "total_tokens": 21838664}
|
| 6716 |
+
{"current_steps": 33495, "total_steps": 38160, "loss": 0.5196, "lr": 2.2428195086036024e-06, "epoch": 17.55503144654088, "percentage": 87.78, "elapsed_time": "1:41:05", "remaining_time": "0:14:04", "throughput": 3600.98, "total_tokens": 21842184}
|
| 6717 |
+
{"current_steps": 33500, "total_steps": 38160, "loss": 0.4855, "lr": 2.238088355011811e-06, "epoch": 17.557651991614257, "percentage": 87.79, "elapsed_time": "1:41:06", "remaining_time": "0:14:03", "throughput": 3601.0, "total_tokens": 21845224}
|
| 6718 |
+
{"current_steps": 33505, "total_steps": 38160, "loss": 0.72, "lr": 2.2333619629522518e-06, "epoch": 17.56027253668763, "percentage": 87.8, "elapsed_time": "1:41:07", "remaining_time": "0:14:02", "throughput": 3601.07, "total_tokens": 21848552}
|
| 6719 |
+
{"current_steps": 33510, "total_steps": 38160, "loss": 0.505, "lr": 2.2286403334136424e-06, "epoch": 17.562893081761008, "percentage": 87.81, "elapsed_time": "1:41:08", "remaining_time": "0:14:02", "throughput": 3600.99, "total_tokens": 21850856}
|
| 6720 |
+
{"current_steps": 33515, "total_steps": 38160, "loss": 0.4252, "lr": 2.223923467383679e-06, "epoch": 17.56551362683438, "percentage": 87.83, "elapsed_time": "1:41:09", "remaining_time": "0:14:01", "throughput": 3601.03, "total_tokens": 21856104}
|
| 6721 |
+
{"current_steps": 33520, "total_steps": 38160, "loss": 0.3913, "lr": 2.2192113658490886e-06, "epoch": 17.568134171907758, "percentage": 87.84, "elapsed_time": "1:41:10", "remaining_time": "0:14:00", "throughput": 3600.92, "total_tokens": 21858440}
|
| 6722 |
+
{"current_steps": 33525, "total_steps": 38160, "loss": 0.7861, "lr": 2.2145040297955776e-06, "epoch": 17.57075471698113, "percentage": 87.85, "elapsed_time": "1:41:11", "remaining_time": "0:13:59", "throughput": 3600.89, "total_tokens": 21861064}
|
| 6723 |
+
{"current_steps": 33530, "total_steps": 38160, "loss": 0.3039, "lr": 2.209801460207875e-06, "epoch": 17.57337526205451, "percentage": 87.87, "elapsed_time": "1:41:11", "remaining_time": "0:13:58", "throughput": 3600.88, "total_tokens": 21863848}
|
| 6724 |
+
{"current_steps": 33535, "total_steps": 38160, "loss": 0.4574, "lr": 2.2051036580697042e-06, "epoch": 17.57599580712788, "percentage": 87.88, "elapsed_time": "1:41:12", "remaining_time": "0:13:57", "throughput": 3600.92, "total_tokens": 21867208}
|
| 6725 |
+
{"current_steps": 33540, "total_steps": 38160, "loss": 0.4362, "lr": 2.2004106243637885e-06, "epoch": 17.57861635220126, "percentage": 87.89, "elapsed_time": "1:41:13", "remaining_time": "0:13:56", "throughput": 3601.04, "total_tokens": 21871944}
|
| 6726 |
+
{"current_steps": 33545, "total_steps": 38160, "loss": 0.2899, "lr": 2.1957223600718587e-06, "epoch": 17.581236897274632, "percentage": 87.91, "elapsed_time": "1:41:14", "remaining_time": "0:13:55", "throughput": 3600.96, "total_tokens": 21874728}
|
| 6727 |
+
{"current_steps": 33550, "total_steps": 38160, "loss": 0.4304, "lr": 2.1910388661746493e-06, "epoch": 17.58385744234801, "percentage": 87.92, "elapsed_time": "1:41:15", "remaining_time": "0:13:54", "throughput": 3600.85, "total_tokens": 21877320}
|
| 6728 |
+
{"current_steps": 33555, "total_steps": 38160, "loss": 0.4383, "lr": 2.186360143651886e-06, "epoch": 17.586477987421382, "percentage": 87.93, "elapsed_time": "1:41:16", "remaining_time": "0:13:53", "throughput": 3600.81, "total_tokens": 21880488}
|
| 6729 |
+
{"current_steps": 33560, "total_steps": 38160, "loss": 0.5395, "lr": 2.1816861934823103e-06, "epoch": 17.58909853249476, "percentage": 87.95, "elapsed_time": "1:41:17", "remaining_time": "0:13:53", "throughput": 3600.77, "total_tokens": 21883080}
|
| 6730 |
+
{"current_steps": 33565, "total_steps": 38160, "loss": 0.6013, "lr": 2.1770170166436595e-06, "epoch": 17.591719077568133, "percentage": 87.96, "elapsed_time": "1:41:18", "remaining_time": "0:13:52", "throughput": 3600.84, "total_tokens": 21886536}
|
| 6731 |
+
{"current_steps": 33570, "total_steps": 38160, "loss": 0.4787, "lr": 2.172352614112669e-06, "epoch": 17.59433962264151, "percentage": 87.97, "elapsed_time": "1:41:19", "remaining_time": "0:13:51", "throughput": 3600.89, "total_tokens": 21889896}
|
| 6732 |
+
{"current_steps": 33575, "total_steps": 38160, "loss": 0.4666, "lr": 2.167692986865083e-06, "epoch": 17.596960167714883, "percentage": 87.98, "elapsed_time": "1:41:19", "remaining_time": "0:13:50", "throughput": 3600.93, "total_tokens": 21893256}
|
| 6733 |
+
{"current_steps": 33580, "total_steps": 38160, "loss": 0.4885, "lr": 2.1630381358756362e-06, "epoch": 17.59958071278826, "percentage": 88.0, "elapsed_time": "1:41:20", "remaining_time": "0:13:49", "throughput": 3600.94, "total_tokens": 21896328}
|
| 6734 |
+
{"current_steps": 33585, "total_steps": 38160, "loss": 0.5558, "lr": 2.158388062118072e-06, "epoch": 17.602201257861637, "percentage": 88.01, "elapsed_time": "1:41:21", "remaining_time": "0:13:48", "throughput": 3600.87, "total_tokens": 21898856}
|
| 6735 |
+
{"current_steps": 33590, "total_steps": 38160, "loss": 0.4691, "lr": 2.1537427665651393e-06, "epoch": 17.60482180293501, "percentage": 88.02, "elapsed_time": "1:41:22", "remaining_time": "0:13:47", "throughput": 3600.88, "total_tokens": 21901992}
|
| 6736 |
+
{"current_steps": 33595, "total_steps": 38160, "loss": 0.3822, "lr": 2.1491022501885726e-06, "epoch": 17.607442348008387, "percentage": 88.04, "elapsed_time": "1:41:23", "remaining_time": "0:13:46", "throughput": 3600.83, "total_tokens": 21904904}
|
| 6737 |
+
{"current_steps": 33600, "total_steps": 38160, "loss": 0.3941, "lr": 2.1444665139591174e-06, "epoch": 17.61006289308176, "percentage": 88.05, "elapsed_time": "1:41:24", "remaining_time": "0:13:45", "throughput": 3600.91, "total_tokens": 21908808}
|
| 6738 |
+
{"current_steps": 33605, "total_steps": 38160, "loss": 0.5005, "lr": 2.139835558846523e-06, "epoch": 17.612683438155138, "percentage": 88.06, "elapsed_time": "1:41:25", "remaining_time": "0:13:44", "throughput": 3600.92, "total_tokens": 21911720}
|
| 6739 |
+
{"current_steps": 33610, "total_steps": 38160, "loss": 0.4342, "lr": 2.1352093858195225e-06, "epoch": 17.61530398322851, "percentage": 88.08, "elapsed_time": "1:41:25", "remaining_time": "0:13:43", "throughput": 3600.83, "total_tokens": 21914024}
|
| 6740 |
+
{"current_steps": 33615, "total_steps": 38160, "loss": 0.3942, "lr": 2.1305879958458676e-06, "epoch": 17.617924528301888, "percentage": 88.09, "elapsed_time": "1:41:26", "remaining_time": "0:13:42", "throughput": 3600.75, "total_tokens": 21916648}
|
| 6741 |
+
{"current_steps": 33620, "total_steps": 38160, "loss": 0.4795, "lr": 2.1259713898922894e-06, "epoch": 17.62054507337526, "percentage": 88.1, "elapsed_time": "1:41:27", "remaining_time": "0:13:42", "throughput": 3600.82, "total_tokens": 21920136}
|
| 6742 |
+
{"current_steps": 33625, "total_steps": 38160, "loss": 0.4281, "lr": 2.1213595689245386e-06, "epoch": 17.62316561844864, "percentage": 88.12, "elapsed_time": "1:41:28", "remaining_time": "0:13:41", "throughput": 3600.95, "total_tokens": 21924168}
|
| 6743 |
+
{"current_steps": 33630, "total_steps": 38160, "loss": 0.4812, "lr": 2.1167525339073513e-06, "epoch": 17.62578616352201, "percentage": 88.13, "elapsed_time": "1:41:30", "remaining_time": "0:13:40", "throughput": 3601.19, "total_tokens": 21931656}
|
| 6744 |
+
{"current_steps": 33635, "total_steps": 38160, "loss": 0.4448, "lr": 2.112150285804473e-06, "epoch": 17.62840670859539, "percentage": 88.14, "elapsed_time": "1:41:30", "remaining_time": "0:13:39", "throughput": 3601.11, "total_tokens": 21934184}
|
| 6745 |
+
{"current_steps": 33640, "total_steps": 38160, "loss": 0.4861, "lr": 2.107552825578632e-06, "epoch": 17.631027253668762, "percentage": 88.16, "elapsed_time": "1:41:31", "remaining_time": "0:13:38", "throughput": 3601.31, "total_tokens": 21938856}
|
| 6746 |
+
{"current_steps": 33645, "total_steps": 38160, "loss": 0.4627, "lr": 2.102960154191572e-06, "epoch": 17.63364779874214, "percentage": 88.17, "elapsed_time": "1:41:32", "remaining_time": "0:13:37", "throughput": 3601.31, "total_tokens": 21941736}
|
| 6747 |
+
{"current_steps": 33650, "total_steps": 38160, "loss": 0.5445, "lr": 2.098372272604021e-06, "epoch": 17.636268343815512, "percentage": 88.18, "elapsed_time": "1:41:33", "remaining_time": "0:13:36", "throughput": 3601.38, "total_tokens": 21945256}
|
| 6748 |
+
{"current_steps": 33655, "total_steps": 38160, "loss": 0.5287, "lr": 2.0937891817757155e-06, "epoch": 17.63888888888889, "percentage": 88.19, "elapsed_time": "1:41:34", "remaining_time": "0:13:35", "throughput": 3601.22, "total_tokens": 21947432}
|
| 6749 |
+
{"current_steps": 33660, "total_steps": 38160, "loss": 0.5614, "lr": 2.089210882665385e-06, "epoch": 17.641509433962263, "percentage": 88.21, "elapsed_time": "1:41:35", "remaining_time": "0:13:34", "throughput": 3601.2, "total_tokens": 21950664}
|
| 6750 |
+
{"current_steps": 33665, "total_steps": 38160, "loss": 0.5185, "lr": 2.0846373762307635e-06, "epoch": 17.64412997903564, "percentage": 88.22, "elapsed_time": "1:41:36", "remaining_time": "0:13:33", "throughput": 3601.21, "total_tokens": 21953704}
|
| 6751 |
+
{"current_steps": 33670, "total_steps": 38160, "loss": 0.4006, "lr": 2.080068663428569e-06, "epoch": 17.646750524109013, "percentage": 88.23, "elapsed_time": "1:41:37", "remaining_time": "0:13:33", "throughput": 3601.22, "total_tokens": 21956936}
|
| 6752 |
+
{"current_steps": 33675, "total_steps": 38160, "loss": 0.5072, "lr": 2.075504745214521e-06, "epoch": 17.64937106918239, "percentage": 88.25, "elapsed_time": "1:41:37", "remaining_time": "0:13:32", "throughput": 3601.23, "total_tokens": 21960200}
|
| 6753 |
+
{"current_steps": 33680, "total_steps": 38160, "loss": 0.4324, "lr": 2.0709456225433437e-06, "epoch": 17.651991614255767, "percentage": 88.26, "elapsed_time": "1:41:38", "remaining_time": "0:13:31", "throughput": 3601.12, "total_tokens": 21962824}
|
| 6754 |
+
{"current_steps": 33685, "total_steps": 38160, "loss": 0.455, "lr": 2.0663912963687554e-06, "epoch": 17.65461215932914, "percentage": 88.27, "elapsed_time": "1:41:39", "remaining_time": "0:13:30", "throughput": 3601.19, "total_tokens": 21966504}
|
| 6755 |
+
{"current_steps": 33690, "total_steps": 38160, "loss": 0.4409, "lr": 2.0618417676434713e-06, "epoch": 17.657232704402517, "percentage": 88.29, "elapsed_time": "1:41:40", "remaining_time": "0:13:29", "throughput": 3601.32, "total_tokens": 21970664}
|
| 6756 |
+
{"current_steps": 33695, "total_steps": 38160, "loss": 0.3885, "lr": 2.0572970373191975e-06, "epoch": 17.65985324947589, "percentage": 88.3, "elapsed_time": "1:41:41", "remaining_time": "0:13:28", "throughput": 3601.34, "total_tokens": 21974312}
|
| 6757 |
+
{"current_steps": 33700, "total_steps": 38160, "loss": 0.543, "lr": 2.052757106346634e-06, "epoch": 17.662473794549268, "percentage": 88.31, "elapsed_time": "1:41:42", "remaining_time": "0:13:27", "throughput": 3601.36, "total_tokens": 21977288}
|
| 6758 |
+
{"current_steps": 33705, "total_steps": 38160, "loss": 0.5245, "lr": 2.048221975675488e-06, "epoch": 17.66509433962264, "percentage": 88.33, "elapsed_time": "1:41:43", "remaining_time": "0:13:26", "throughput": 3601.28, "total_tokens": 21979720}
|
| 6759 |
+
{"current_steps": 33710, "total_steps": 38160, "loss": 0.51, "lr": 2.043691646254456e-06, "epoch": 17.667714884696018, "percentage": 88.34, "elapsed_time": "1:41:44", "remaining_time": "0:13:25", "throughput": 3601.24, "total_tokens": 21982600}
|
| 6760 |
+
{"current_steps": 33715, "total_steps": 38160, "loss": 0.4966, "lr": 2.039166119031233e-06, "epoch": 17.67033542976939, "percentage": 88.35, "elapsed_time": "1:41:44", "remaining_time": "0:13:24", "throughput": 3601.34, "total_tokens": 21986184}
|
| 6761 |
+
{"current_steps": 33720, "total_steps": 38160, "loss": 0.5412, "lr": 2.034645394952511e-06, "epoch": 17.67295597484277, "percentage": 88.36, "elapsed_time": "1:41:45", "remaining_time": "0:13:23", "throughput": 3601.37, "total_tokens": 21989352}
|
| 6762 |
+
{"current_steps": 33725, "total_steps": 38160, "loss": 0.4796, "lr": 2.030129474963971e-06, "epoch": 17.67557651991614, "percentage": 88.38, "elapsed_time": "1:41:46", "remaining_time": "0:13:23", "throughput": 3601.34, "total_tokens": 21992072}
|
| 6763 |
+
{"current_steps": 33730, "total_steps": 38160, "loss": 0.5184, "lr": 2.025618360010284e-06, "epoch": 17.67819706498952, "percentage": 88.39, "elapsed_time": "1:41:47", "remaining_time": "0:13:22", "throughput": 3601.46, "total_tokens": 21996200}
|
| 6764 |
+
{"current_steps": 33735, "total_steps": 38160, "loss": 0.4852, "lr": 2.0211120510351317e-06, "epoch": 17.680817610062892, "percentage": 88.4, "elapsed_time": "1:41:48", "remaining_time": "0:13:21", "throughput": 3601.39, "total_tokens": 21998728}
|
| 6765 |
+
{"current_steps": 33740, "total_steps": 38160, "loss": 0.4872, "lr": 2.0166105489811822e-06, "epoch": 17.68343815513627, "percentage": 88.42, "elapsed_time": "1:41:49", "remaining_time": "0:13:20", "throughput": 3601.36, "total_tokens": 22001480}
|
| 6766 |
+
{"current_steps": 33745, "total_steps": 38160, "loss": 0.6122, "lr": 2.0121138547901016e-06, "epoch": 17.686058700209642, "percentage": 88.43, "elapsed_time": "1:41:50", "remaining_time": "0:13:19", "throughput": 3601.46, "total_tokens": 22005800}
|
| 6767 |
+
{"current_steps": 33750, "total_steps": 38160, "loss": 0.3757, "lr": 2.0076219694025427e-06, "epoch": 17.68867924528302, "percentage": 88.44, "elapsed_time": "1:41:51", "remaining_time": "0:13:18", "throughput": 3601.5, "total_tokens": 22009096}
|
| 6768 |
+
{"current_steps": 33755, "total_steps": 38160, "loss": 0.4698, "lr": 2.003134893758152e-06, "epoch": 17.691299790356393, "percentage": 88.46, "elapsed_time": "1:41:51", "remaining_time": "0:13:17", "throughput": 3601.5, "total_tokens": 22012072}
|
| 6769 |
+
{"current_steps": 33760, "total_steps": 38160, "loss": 0.5796, "lr": 1.9986526287955805e-06, "epoch": 17.69392033542977, "percentage": 88.47, "elapsed_time": "1:41:52", "remaining_time": "0:13:16", "throughput": 3601.56, "total_tokens": 22015304}
|
| 6770 |
+
{"current_steps": 33765, "total_steps": 38160, "loss": 0.4235, "lr": 1.9941751754524644e-06, "epoch": 17.696540880503143, "percentage": 88.48, "elapsed_time": "1:41:53", "remaining_time": "0:13:15", "throughput": 3601.65, "total_tokens": 22019016}
|
| 6771 |
+
{"current_steps": 33770, "total_steps": 38160, "loss": 0.5145, "lr": 1.9897025346654412e-06, "epoch": 17.69916142557652, "percentage": 88.5, "elapsed_time": "1:41:54", "remaining_time": "0:13:14", "throughput": 3601.66, "total_tokens": 22021992}
|
| 6772 |
+
{"current_steps": 33775, "total_steps": 38160, "loss": 0.4086, "lr": 1.985234707370134e-06, "epoch": 17.701781970649897, "percentage": 88.51, "elapsed_time": "1:41:55", "remaining_time": "0:13:13", "throughput": 3601.65, "total_tokens": 22024808}
|
| 6773 |
+
{"current_steps": 33780, "total_steps": 38160, "loss": 0.5306, "lr": 1.9807716945011544e-06, "epoch": 17.70440251572327, "percentage": 88.52, "elapsed_time": "1:41:55", "remaining_time": "0:13:13", "throughput": 3601.59, "total_tokens": 22027336}
|
| 6774 |
+
{"current_steps": 33785, "total_steps": 38160, "loss": 0.4462, "lr": 1.9763134969921215e-06, "epoch": 17.707023060796647, "percentage": 88.54, "elapsed_time": "1:41:56", "remaining_time": "0:13:12", "throughput": 3601.6, "total_tokens": 22030440}
|
| 6775 |
+
{"current_steps": 33790, "total_steps": 38160, "loss": 0.6431, "lr": 1.971860115775634e-06, "epoch": 17.70964360587002, "percentage": 88.55, "elapsed_time": "1:41:57", "remaining_time": "0:13:11", "throughput": 3601.5, "total_tokens": 22033160}
|
| 6776 |
+
{"current_steps": 33795, "total_steps": 38160, "loss": 0.3032, "lr": 1.967411551783294e-06, "epoch": 17.712264150943398, "percentage": 88.56, "elapsed_time": "1:41:58", "remaining_time": "0:13:10", "throughput": 3601.42, "total_tokens": 22035720}
|
| 6777 |
+
{"current_steps": 33800, "total_steps": 38160, "loss": 0.581, "lr": 1.9629678059456895e-06, "epoch": 17.71488469601677, "percentage": 88.57, "elapsed_time": "1:41:59", "remaining_time": "0:13:09", "throughput": 3601.42, "total_tokens": 22038664}
|
| 6778 |
+
{"current_steps": 33805, "total_steps": 38160, "loss": 0.4235, "lr": 1.9585288791924004e-06, "epoch": 17.717505241090148, "percentage": 88.59, "elapsed_time": "1:42:00", "remaining_time": "0:13:08", "throughput": 3601.38, "total_tokens": 22041512}
|
| 6779 |
+
{"current_steps": 33810, "total_steps": 38160, "loss": 0.4483, "lr": 1.9540947724519947e-06, "epoch": 17.72012578616352, "percentage": 88.6, "elapsed_time": "1:42:01", "remaining_time": "0:13:07", "throughput": 3601.47, "total_tokens": 22045320}
|
| 6780 |
+
{"current_steps": 33815, "total_steps": 38160, "loss": 0.3538, "lr": 1.9496654866520414e-06, "epoch": 17.7227463312369, "percentage": 88.61, "elapsed_time": "1:42:02", "remaining_time": "0:13:06", "throughput": 3601.46, "total_tokens": 22048232}
|
| 6781 |
+
{"current_steps": 33820, "total_steps": 38160, "loss": 0.4471, "lr": 1.9452410227190937e-06, "epoch": 17.72536687631027, "percentage": 88.63, "elapsed_time": "1:42:02", "remaining_time": "0:13:05", "throughput": 3601.54, "total_tokens": 22051528}
|
| 6782 |
+
{"current_steps": 33825, "total_steps": 38160, "loss": 0.5463, "lr": 1.9408213815787087e-06, "epoch": 17.72798742138365, "percentage": 88.64, "elapsed_time": "1:42:03", "remaining_time": "0:13:04", "throughput": 3601.49, "total_tokens": 22054248}
|
| 6783 |
+
{"current_steps": 33830, "total_steps": 38160, "loss": 0.3771, "lr": 1.936406564155413e-06, "epoch": 17.730607966457022, "percentage": 88.65, "elapsed_time": "1:42:04", "remaining_time": "0:13:03", "throughput": 3601.58, "total_tokens": 22058600}
|
| 6784 |
+
{"current_steps": 33835, "total_steps": 38160, "loss": 0.4283, "lr": 1.9319965713727435e-06, "epoch": 17.7332285115304, "percentage": 88.67, "elapsed_time": "1:42:05", "remaining_time": "0:13:03", "throughput": 3601.63, "total_tokens": 22061896}
|
| 6785 |
+
{"current_steps": 33840, "total_steps": 38160, "loss": 0.583, "lr": 1.9275914041532144e-06, "epoch": 17.735849056603772, "percentage": 88.68, "elapsed_time": "1:42:06", "remaining_time": "0:13:02", "throughput": 3601.67, "total_tokens": 22065192}
|
| 6786 |
+
{"current_steps": 33845, "total_steps": 38160, "loss": 0.5481, "lr": 1.9231910634183394e-06, "epoch": 17.73846960167715, "percentage": 88.69, "elapsed_time": "1:42:07", "remaining_time": "0:13:01", "throughput": 3601.68, "total_tokens": 22068296}
|
| 6787 |
+
{"current_steps": 33850, "total_steps": 38160, "loss": 0.5508, "lr": 1.918795550088623e-06, "epoch": 17.741090146750523, "percentage": 88.71, "elapsed_time": "1:42:08", "remaining_time": "0:13:00", "throughput": 3601.74, "total_tokens": 22071976}
|
| 6788 |
+
{"current_steps": 33855, "total_steps": 38160, "loss": 0.5956, "lr": 1.9144048650835496e-06, "epoch": 17.7437106918239, "percentage": 88.72, "elapsed_time": "1:42:09", "remaining_time": "0:12:59", "throughput": 3601.82, "total_tokens": 22075656}
|
| 6789 |
+
{"current_steps": 33860, "total_steps": 38160, "loss": 0.4204, "lr": 1.9100190093216093e-06, "epoch": 17.746331236897273, "percentage": 88.73, "elapsed_time": "1:42:10", "remaining_time": "0:12:58", "throughput": 3601.92, "total_tokens": 22079816}
|
| 6790 |
+
{"current_steps": 33865, "total_steps": 38160, "loss": 0.514, "lr": 1.9056379837202627e-06, "epoch": 17.74895178197065, "percentage": 88.74, "elapsed_time": "1:42:10", "remaining_time": "0:12:57", "throughput": 3601.87, "total_tokens": 22082472}
|
| 6791 |
+
{"current_steps": 33870, "total_steps": 38160, "loss": 0.5106, "lr": 1.9012617891959766e-06, "epoch": 17.751572327044027, "percentage": 88.76, "elapsed_time": "1:42:11", "remaining_time": "0:12:56", "throughput": 3601.92, "total_tokens": 22086024}
|
| 6792 |
+
{"current_steps": 33875, "total_steps": 38160, "loss": 0.4902, "lr": 1.8968904266641996e-06, "epoch": 17.7541928721174, "percentage": 88.77, "elapsed_time": "1:42:12", "remaining_time": "0:12:55", "throughput": 3601.97, "total_tokens": 22090024}
|
| 6793 |
+
{"current_steps": 33880, "total_steps": 38160, "loss": 0.4669, "lr": 1.892523897039372e-06, "epoch": 17.756813417190777, "percentage": 88.78, "elapsed_time": "1:42:13", "remaining_time": "0:12:54", "throughput": 3601.93, "total_tokens": 22092680}
|
| 6794 |
+
{"current_steps": 33885, "total_steps": 38160, "loss": 0.5655, "lr": 1.888162201234922e-06, "epoch": 17.75943396226415, "percentage": 88.8, "elapsed_time": "1:42:14", "remaining_time": "0:12:53", "throughput": 3601.87, "total_tokens": 22095112}
|
| 6795 |
+
{"current_steps": 33890, "total_steps": 38160, "loss": 0.5076, "lr": 1.8838053401632666e-06, "epoch": 17.762054507337528, "percentage": 88.81, "elapsed_time": "1:42:15", "remaining_time": "0:12:53", "throughput": 3601.96, "total_tokens": 22098696}
|
| 6796 |
+
{"current_steps": 33895, "total_steps": 38160, "loss": 0.4934, "lr": 1.8794533147358074e-06, "epoch": 17.7646750524109, "percentage": 88.82, "elapsed_time": "1:42:16", "remaining_time": "0:12:52", "throughput": 3602.0, "total_tokens": 22102024}
|
| 6797 |
+
{"current_steps": 33900, "total_steps": 38160, "loss": 0.464, "lr": 1.875106125862941e-06, "epoch": 17.767295597484278, "percentage": 88.84, "elapsed_time": "1:42:16", "remaining_time": "0:12:51", "throughput": 3602.05, "total_tokens": 22105064}
|
| 6798 |
+
{"current_steps": 33905, "total_steps": 38160, "loss": 0.608, "lr": 1.870763774454057e-06, "epoch": 17.76991614255765, "percentage": 88.85, "elapsed_time": "1:42:17", "remaining_time": "0:12:50", "throughput": 3602.15, "total_tokens": 22108968}
|
| 6799 |
+
{"current_steps": 33910, "total_steps": 38160, "loss": 0.3978, "lr": 1.8664262614175115e-06, "epoch": 17.77253668763103, "percentage": 88.86, "elapsed_time": "1:42:18", "remaining_time": "0:12:49", "throughput": 3602.28, "total_tokens": 22113288}
|
| 6800 |
+
{"current_steps": 33915, "total_steps": 38160, "loss": 0.5235, "lr": 1.862093587660671e-06, "epoch": 17.7751572327044, "percentage": 88.88, "elapsed_time": "1:42:19", "remaining_time": "0:12:48", "throughput": 3602.45, "total_tokens": 22117768}
|
| 6801 |
+
{"current_steps": 33920, "total_steps": 38160, "loss": 0.4626, "lr": 1.8577657540898851e-06, "epoch": 17.77777777777778, "percentage": 88.89, "elapsed_time": "1:42:20", "remaining_time": "0:12:47", "throughput": 3602.47, "total_tokens": 22120936}
|
| 6802 |
+
{"current_steps": 33925, "total_steps": 38160, "loss": 0.3822, "lr": 1.8534427616104798e-06, "epoch": 17.780398322851152, "percentage": 88.9, "elapsed_time": "1:42:21", "remaining_time": "0:12:46", "throughput": 3602.52, "total_tokens": 22124616}
|
| 6803 |
+
{"current_steps": 33930, "total_steps": 38160, "loss": 0.4538, "lr": 1.8491246111267846e-06, "epoch": 17.78301886792453, "percentage": 88.92, "elapsed_time": "1:42:22", "remaining_time": "0:12:45", "throughput": 3602.56, "total_tokens": 22128072}
|
| 6804 |
+
{"current_steps": 33935, "total_steps": 38160, "loss": 0.4893, "lr": 1.8448113035420934e-06, "epoch": 17.785639412997902, "percentage": 88.93, "elapsed_time": "1:42:23", "remaining_time": "0:12:44", "throughput": 3602.55, "total_tokens": 22131272}
|
| 6805 |
+
{"current_steps": 33940, "total_steps": 38160, "loss": 0.3879, "lr": 1.840502839758712e-06, "epoch": 17.78825995807128, "percentage": 88.94, "elapsed_time": "1:42:24", "remaining_time": "0:12:43", "throughput": 3602.62, "total_tokens": 22134888}
|
| 6806 |
+
{"current_steps": 33945, "total_steps": 38160, "loss": 0.3929, "lr": 1.8361992206779226e-06, "epoch": 17.790880503144653, "percentage": 88.95, "elapsed_time": "1:42:24", "remaining_time": "0:12:43", "throughput": 3602.59, "total_tokens": 22137480}
|
| 6807 |
+
{"current_steps": 33950, "total_steps": 38160, "loss": 0.4689, "lr": 1.8319004471999856e-06, "epoch": 17.79350104821803, "percentage": 88.97, "elapsed_time": "1:42:25", "remaining_time": "0:12:42", "throughput": 3602.58, "total_tokens": 22141000}
|
| 6808 |
+
{"current_steps": 33955, "total_steps": 38160, "loss": 0.3617, "lr": 1.8276065202241592e-06, "epoch": 17.796121593291403, "percentage": 88.98, "elapsed_time": "1:42:26", "remaining_time": "0:12:41", "throughput": 3602.57, "total_tokens": 22143752}
|
| 6809 |
+
{"current_steps": 33960, "total_steps": 38160, "loss": 0.4516, "lr": 1.8233174406486863e-06, "epoch": 17.79874213836478, "percentage": 88.99, "elapsed_time": "1:42:27", "remaining_time": "0:12:40", "throughput": 3602.63, "total_tokens": 22147272}
|
| 6810 |
+
{"current_steps": 33965, "total_steps": 38160, "loss": 0.3478, "lr": 1.8190332093707907e-06, "epoch": 17.801362683438157, "percentage": 89.01, "elapsed_time": "1:42:28", "remaining_time": "0:12:39", "throughput": 3602.66, "total_tokens": 22151080}
|
| 6811 |
+
{"current_steps": 33970, "total_steps": 38160, "loss": 0.4744, "lr": 1.8147538272866833e-06, "epoch": 17.80398322851153, "percentage": 89.02, "elapsed_time": "1:42:29", "remaining_time": "0:12:38", "throughput": 3602.7, "total_tokens": 22154664}
|
| 6812 |
+
{"current_steps": 33975, "total_steps": 38160, "loss": 0.4671, "lr": 1.8104792952915672e-06, "epoch": 17.806603773584907, "percentage": 89.03, "elapsed_time": "1:42:30", "remaining_time": "0:12:37", "throughput": 3602.67, "total_tokens": 22157608}
|
| 6813 |
+
{"current_steps": 33980, "total_steps": 38160, "loss": 0.5613, "lr": 1.8062096142796192e-06, "epoch": 17.80922431865828, "percentage": 89.05, "elapsed_time": "1:42:31", "remaining_time": "0:12:36", "throughput": 3602.61, "total_tokens": 22160136}
|
| 6814 |
+
{"current_steps": 33985, "total_steps": 38160, "loss": 0.4612, "lr": 1.8019447851440163e-06, "epoch": 17.811844863731658, "percentage": 89.06, "elapsed_time": "1:42:31", "remaining_time": "0:12:35", "throughput": 3602.48, "total_tokens": 22162376}
|
| 6815 |
+
{"current_steps": 33990, "total_steps": 38160, "loss": 0.6725, "lr": 1.7976848087769032e-06, "epoch": 17.81446540880503, "percentage": 89.07, "elapsed_time": "1:42:32", "remaining_time": "0:12:34", "throughput": 3602.59, "total_tokens": 22166376}
|
| 6816 |
+
{"current_steps": 33995, "total_steps": 38160, "loss": 0.4309, "lr": 1.79342968606942e-06, "epoch": 17.817085953878408, "percentage": 89.09, "elapsed_time": "1:42:33", "remaining_time": "0:12:33", "throughput": 3602.57, "total_tokens": 22169128}
|
| 6817 |
+
{"current_steps": 34000, "total_steps": 38160, "loss": 0.5307, "lr": 1.7891794179116933e-06, "epoch": 17.81970649895178, "percentage": 89.1, "elapsed_time": "1:42:34", "remaining_time": "0:12:33", "throughput": 3602.73, "total_tokens": 22173544}
|
| 6818 |
+
{"current_steps": 34005, "total_steps": 38160, "loss": 0.4501, "lr": 1.7849340051928316e-06, "epoch": 17.822327044025158, "percentage": 89.11, "elapsed_time": "1:42:35", "remaining_time": "0:12:32", "throughput": 3602.75, "total_tokens": 22176712}
|
| 6819 |
+
{"current_steps": 34010, "total_steps": 38160, "loss": 0.4724, "lr": 1.780693448800927e-06, "epoch": 17.82494758909853, "percentage": 89.12, "elapsed_time": "1:42:36", "remaining_time": "0:12:31", "throughput": 3602.74, "total_tokens": 22179816}
|
| 6820 |
+
{"current_steps": 34015, "total_steps": 38160, "loss": 0.4685, "lr": 1.7764577496230478e-06, "epoch": 17.82756813417191, "percentage": 89.14, "elapsed_time": "1:42:37", "remaining_time": "0:12:30", "throughput": 3602.77, "total_tokens": 22182888}
|
| 6821 |
+
{"current_steps": 34020, "total_steps": 38160, "loss": 0.512, "lr": 1.7722269085452602e-06, "epoch": 17.830188679245282, "percentage": 89.15, "elapsed_time": "1:42:38", "remaining_time": "0:12:29", "throughput": 3602.84, "total_tokens": 22186472}
|
| 6822 |
+
{"current_steps": 34025, "total_steps": 38160, "loss": 0.4562, "lr": 1.7680009264526038e-06, "epoch": 17.83280922431866, "percentage": 89.16, "elapsed_time": "1:42:38", "remaining_time": "0:12:28", "throughput": 3602.78, "total_tokens": 22189320}
|
| 6823 |
+
{"current_steps": 34030, "total_steps": 38160, "loss": 0.4923, "lr": 1.7637798042291126e-06, "epoch": 17.835429769392032, "percentage": 89.18, "elapsed_time": "1:42:39", "remaining_time": "0:12:27", "throughput": 3602.77, "total_tokens": 22192200}
|
| 6824 |
+
{"current_steps": 34035, "total_steps": 38160, "loss": 0.4097, "lr": 1.7595635427577944e-06, "epoch": 17.83805031446541, "percentage": 89.19, "elapsed_time": "1:42:40", "remaining_time": "0:12:26", "throughput": 3602.69, "total_tokens": 22194696}
|
| 6825 |
+
{"current_steps": 34040, "total_steps": 38160, "loss": 0.4175, "lr": 1.7553521429206465e-06, "epoch": 17.840670859538783, "percentage": 89.2, "elapsed_time": "1:42:41", "remaining_time": "0:12:25", "throughput": 3602.56, "total_tokens": 22197192}
|
| 6826 |
+
{"current_steps": 34045, "total_steps": 38160, "loss": 0.4355, "lr": 1.7511456055986335e-06, "epoch": 17.84329140461216, "percentage": 89.22, "elapsed_time": "1:42:42", "remaining_time": "0:12:24", "throughput": 3602.5, "total_tokens": 22199976}
|
| 6827 |
+
{"current_steps": 34050, "total_steps": 38160, "loss": 0.4755, "lr": 1.746943931671724e-06, "epoch": 17.845911949685533, "percentage": 89.23, "elapsed_time": "1:42:43", "remaining_time": "0:12:23", "throughput": 3602.47, "total_tokens": 22203176}
|
| 6828 |
+
{"current_steps": 34055, "total_steps": 38160, "loss": 0.3447, "lr": 1.742747122018859e-06, "epoch": 17.84853249475891, "percentage": 89.24, "elapsed_time": "1:42:44", "remaining_time": "0:12:23", "throughput": 3602.58, "total_tokens": 22207368}
|
| 6829 |
+
{"current_steps": 34060, "total_steps": 38160, "loss": 0.512, "lr": 1.738555177517967e-06, "epoch": 17.851153039832283, "percentage": 89.26, "elapsed_time": "1:42:45", "remaining_time": "0:12:22", "throughput": 3602.59, "total_tokens": 22210376}
|
| 6830 |
+
{"current_steps": 34065, "total_steps": 38160, "loss": 0.4234, "lr": 1.734368099045952e-06, "epoch": 17.85377358490566, "percentage": 89.27, "elapsed_time": "1:42:46", "remaining_time": "0:12:21", "throughput": 3602.71, "total_tokens": 22214856}
|
| 6831 |
+
{"current_steps": 34070, "total_steps": 38160, "loss": 0.407, "lr": 1.730185887478697e-06, "epoch": 17.856394129979037, "percentage": 89.28, "elapsed_time": "1:42:47", "remaining_time": "0:12:20", "throughput": 3602.79, "total_tokens": 22218504}
|
| 6832 |
+
{"current_steps": 34075, "total_steps": 38160, "loss": 0.4682, "lr": 1.7260085436910794e-06, "epoch": 17.85901467505241, "percentage": 89.3, "elapsed_time": "1:42:47", "remaining_time": "0:12:19", "throughput": 3602.77, "total_tokens": 22221384}
|
| 6833 |
+
{"current_steps": 34080, "total_steps": 38160, "loss": 0.5701, "lr": 1.7218360685569506e-06, "epoch": 17.861635220125788, "percentage": 89.31, "elapsed_time": "1:42:48", "remaining_time": "0:12:18", "throughput": 3602.84, "total_tokens": 22225480}
|
| 6834 |
+
{"current_steps": 34085, "total_steps": 38160, "loss": 0.5544, "lr": 1.7176684629491513e-06, "epoch": 17.86425576519916, "percentage": 89.32, "elapsed_time": "1:42:49", "remaining_time": "0:12:17", "throughput": 3602.87, "total_tokens": 22228488}
|
| 6835 |
+
{"current_steps": 34090, "total_steps": 38160, "loss": 0.4888, "lr": 1.7135057277394868e-06, "epoch": 17.866876310272538, "percentage": 89.33, "elapsed_time": "1:42:50", "remaining_time": "0:12:16", "throughput": 3602.93, "total_tokens": 22231912}
|
| 6836 |
+
{"current_steps": 34095, "total_steps": 38160, "loss": 0.5131, "lr": 1.7093478637987548e-06, "epoch": 17.86949685534591, "percentage": 89.35, "elapsed_time": "1:42:51", "remaining_time": "0:12:15", "throughput": 3602.98, "total_tokens": 22235144}
|
| 6837 |
+
{"current_steps": 34100, "total_steps": 38160, "loss": 0.6006, "lr": 1.7051948719967375e-06, "epoch": 17.872117400419288, "percentage": 89.36, "elapsed_time": "1:42:52", "remaining_time": "0:12:14", "throughput": 3602.97, "total_tokens": 22238088}
|
| 6838 |
+
{"current_steps": 34105, "total_steps": 38160, "loss": 0.5534, "lr": 1.7010467532021901e-06, "epoch": 17.87473794549266, "percentage": 89.37, "elapsed_time": "1:42:52", "remaining_time": "0:12:13", "throughput": 3603.08, "total_tokens": 22241800}
|
| 6839 |
+
{"current_steps": 34110, "total_steps": 38160, "loss": 0.5092, "lr": 1.6969035082828544e-06, "epoch": 17.87735849056604, "percentage": 89.39, "elapsed_time": "1:42:53", "remaining_time": "0:12:13", "throughput": 3603.17, "total_tokens": 22245512}
|
| 6840 |
+
{"current_steps": 34115, "total_steps": 38160, "loss": 0.5512, "lr": 1.692765138105451e-06, "epoch": 17.879979035639412, "percentage": 89.4, "elapsed_time": "1:42:54", "remaining_time": "0:12:12", "throughput": 3603.15, "total_tokens": 22248328}
|
| 6841 |
+
{"current_steps": 34120, "total_steps": 38160, "loss": 0.5438, "lr": 1.6886316435356792e-06, "epoch": 17.88259958071279, "percentage": 89.41, "elapsed_time": "1:42:55", "remaining_time": "0:12:11", "throughput": 3603.27, "total_tokens": 22252744}
|
| 6842 |
+
{"current_steps": 34125, "total_steps": 38160, "loss": 0.3738, "lr": 1.6845030254382138e-06, "epoch": 17.885220125786162, "percentage": 89.43, "elapsed_time": "1:42:56", "remaining_time": "0:12:10", "throughput": 3603.45, "total_tokens": 22257544}
|
| 6843 |
+
{"current_steps": 34130, "total_steps": 38160, "loss": 0.4576, "lr": 1.6803792846767197e-06, "epoch": 17.88784067085954, "percentage": 89.44, "elapsed_time": "1:42:57", "remaining_time": "0:12:09", "throughput": 3603.42, "total_tokens": 22260456}
|
| 6844 |
+
{"current_steps": 34135, "total_steps": 38160, "loss": 0.495, "lr": 1.6762604221138316e-06, "epoch": 17.890461215932913, "percentage": 89.45, "elapsed_time": "1:42:58", "remaining_time": "0:12:08", "throughput": 3603.48, "total_tokens": 22264328}
|
| 6845 |
+
{"current_steps": 34140, "total_steps": 38160, "loss": 0.4184, "lr": 1.6721464386111774e-06, "epoch": 17.89308176100629, "percentage": 89.47, "elapsed_time": "1:42:59", "remaining_time": "0:12:07", "throughput": 3603.41, "total_tokens": 22267080}
|
| 6846 |
+
{"current_steps": 34145, "total_steps": 38160, "loss": 0.3733, "lr": 1.6680373350293516e-06, "epoch": 17.895702306079663, "percentage": 89.48, "elapsed_time": "1:43:00", "remaining_time": "0:12:06", "throughput": 3603.46, "total_tokens": 22270248}
|
| 6847 |
+
{"current_steps": 34150, "total_steps": 38160, "loss": 0.4705, "lr": 1.6639331122279223e-06, "epoch": 17.89832285115304, "percentage": 89.49, "elapsed_time": "1:43:01", "remaining_time": "0:12:05", "throughput": 3603.54, "total_tokens": 22273960}
|
| 6848 |
+
{"current_steps": 34155, "total_steps": 38160, "loss": 0.4713, "lr": 1.6598337710654583e-06, "epoch": 17.900943396226417, "percentage": 89.5, "elapsed_time": "1:43:02", "remaining_time": "0:12:04", "throughput": 3603.54, "total_tokens": 22277256}
|
| 6849 |
+
{"current_steps": 34160, "total_steps": 38160, "loss": 0.5347, "lr": 1.6557393123994874e-06, "epoch": 17.90356394129979, "percentage": 89.52, "elapsed_time": "1:43:02", "remaining_time": "0:12:03", "throughput": 3603.55, "total_tokens": 22280424}
|
| 6850 |
+
{"current_steps": 34165, "total_steps": 38160, "loss": 0.4096, "lr": 1.651649737086533e-06, "epoch": 17.906184486373167, "percentage": 89.53, "elapsed_time": "1:43:03", "remaining_time": "0:12:03", "throughput": 3603.65, "total_tokens": 22284808}
|
| 6851 |
+
{"current_steps": 34170, "total_steps": 38160, "loss": 0.4566, "lr": 1.6475650459820774e-06, "epoch": 17.90880503144654, "percentage": 89.54, "elapsed_time": "1:43:04", "remaining_time": "0:12:02", "throughput": 3603.65, "total_tokens": 22287912}
|
| 6852 |
+
{"current_steps": 34175, "total_steps": 38160, "loss": 0.4943, "lr": 1.643485239940598e-06, "epoch": 17.911425576519918, "percentage": 89.56, "elapsed_time": "1:43:05", "remaining_time": "0:12:01", "throughput": 3603.67, "total_tokens": 22291112}
|
| 6853 |
+
{"current_steps": 34180, "total_steps": 38160, "loss": 0.5058, "lr": 1.6394103198155375e-06, "epoch": 17.91404612159329, "percentage": 89.57, "elapsed_time": "1:43:06", "remaining_time": "0:12:00", "throughput": 3603.7, "total_tokens": 22294280}
|
| 6854 |
+
{"current_steps": 34185, "total_steps": 38160, "loss": 0.3745, "lr": 1.6353402864593276e-06, "epoch": 17.916666666666668, "percentage": 89.58, "elapsed_time": "1:43:07", "remaining_time": "0:11:59", "throughput": 3603.77, "total_tokens": 22297640}
|
| 6855 |
+
{"current_steps": 34190, "total_steps": 38160, "loss": 0.3968, "lr": 1.6312751407233706e-06, "epoch": 17.91928721174004, "percentage": 89.6, "elapsed_time": "1:43:08", "remaining_time": "0:11:58", "throughput": 3603.81, "total_tokens": 22300712}
|
| 6856 |
+
{"current_steps": 34195, "total_steps": 38160, "loss": 0.4146, "lr": 1.6272148834580559e-06, "epoch": 17.921907756813418, "percentage": 89.61, "elapsed_time": "1:43:09", "remaining_time": "0:11:57", "throughput": 3603.71, "total_tokens": 22304072}
|
| 6857 |
+
{"current_steps": 34200, "total_steps": 38160, "loss": 0.6271, "lr": 1.6231595155127317e-06, "epoch": 17.92452830188679, "percentage": 89.62, "elapsed_time": "1:43:10", "remaining_time": "0:11:56", "throughput": 3603.72, "total_tokens": 22307336}
|
| 6858 |
+
{"current_steps": 34205, "total_steps": 38160, "loss": 0.4476, "lr": 1.6191090377357414e-06, "epoch": 17.92714884696017, "percentage": 89.64, "elapsed_time": "1:43:10", "remaining_time": "0:11:55", "throughput": 3603.7, "total_tokens": 22310216}
|
| 6859 |
+
{"current_steps": 34210, "total_steps": 38160, "loss": 0.418, "lr": 1.6150634509743939e-06, "epoch": 17.929769392033542, "percentage": 89.65, "elapsed_time": "1:43:11", "remaining_time": "0:11:54", "throughput": 3603.79, "total_tokens": 22314216}
|
| 6860 |
+
{"current_steps": 34215, "total_steps": 38160, "loss": 0.4202, "lr": 1.611022756074984e-06, "epoch": 17.93238993710692, "percentage": 89.66, "elapsed_time": "1:43:12", "remaining_time": "0:11:54", "throughput": 3603.8, "total_tokens": 22317416}
|
| 6861 |
+
{"current_steps": 34220, "total_steps": 38160, "loss": 0.4695, "lr": 1.606986953882783e-06, "epoch": 17.935010482180292, "percentage": 89.68, "elapsed_time": "1:43:13", "remaining_time": "0:11:53", "throughput": 3603.83, "total_tokens": 22320456}
|
| 6862 |
+
{"current_steps": 34225, "total_steps": 38160, "loss": 0.357, "lr": 1.6029560452420238e-06, "epoch": 17.93763102725367, "percentage": 89.69, "elapsed_time": "1:43:14", "remaining_time": "0:11:52", "throughput": 3603.8, "total_tokens": 22323048}
|
| 6863 |
+
{"current_steps": 34230, "total_steps": 38160, "loss": 0.4182, "lr": 1.5989300309959377e-06, "epoch": 17.940251572327043, "percentage": 89.7, "elapsed_time": "1:43:15", "remaining_time": "0:11:51", "throughput": 3603.86, "total_tokens": 22326568}
|
| 6864 |
+
{"current_steps": 34235, "total_steps": 38160, "loss": 0.4774, "lr": 1.5949089119867116e-06, "epoch": 17.94287211740042, "percentage": 89.71, "elapsed_time": "1:43:16", "remaining_time": "0:11:50", "throughput": 3603.89, "total_tokens": 22329864}
|
| 6865 |
+
{"current_steps": 34240, "total_steps": 38160, "loss": 0.6854, "lr": 1.5908926890555204e-06, "epoch": 17.945492662473793, "percentage": 89.73, "elapsed_time": "1:43:16", "remaining_time": "0:11:49", "throughput": 3603.9, "total_tokens": 22333000}
|
| 6866 |
+
{"current_steps": 34245, "total_steps": 38160, "loss": 0.396, "lr": 1.5868813630425167e-06, "epoch": 17.94811320754717, "percentage": 89.74, "elapsed_time": "1:43:17", "remaining_time": "0:11:48", "throughput": 3603.96, "total_tokens": 22336328}
|
| 6867 |
+
{"current_steps": 34250, "total_steps": 38160, "loss": 0.3635, "lr": 1.5828749347868183e-06, "epoch": 17.950733752620543, "percentage": 89.75, "elapsed_time": "1:43:18", "remaining_time": "0:11:47", "throughput": 3603.88, "total_tokens": 22338920}
|
| 6868 |
+
{"current_steps": 34255, "total_steps": 38160, "loss": 0.7104, "lr": 1.5788734051265268e-06, "epoch": 17.95335429769392, "percentage": 89.77, "elapsed_time": "1:43:19", "remaining_time": "0:11:46", "throughput": 3603.91, "total_tokens": 22342568}
|
| 6869 |
+
{"current_steps": 34260, "total_steps": 38160, "loss": 0.483, "lr": 1.5748767748987231e-06, "epoch": 17.955974842767297, "percentage": 89.78, "elapsed_time": "1:43:20", "remaining_time": "0:11:45", "throughput": 3603.9, "total_tokens": 22345512}
|
| 6870 |
+
{"current_steps": 34265, "total_steps": 38160, "loss": 0.6076, "lr": 1.5708850449394436e-06, "epoch": 17.95859538784067, "percentage": 89.79, "elapsed_time": "1:43:21", "remaining_time": "0:11:44", "throughput": 3603.9, "total_tokens": 22348488}
|
| 6871 |
+
{"current_steps": 34270, "total_steps": 38160, "loss": 0.398, "lr": 1.5668982160837176e-06, "epoch": 17.961215932914047, "percentage": 89.81, "elapsed_time": "1:43:22", "remaining_time": "0:11:44", "throughput": 3603.93, "total_tokens": 22351848}
|
| 6872 |
+
{"current_steps": 34275, "total_steps": 38160, "loss": 0.4573, "lr": 1.562916289165553e-06, "epoch": 17.96383647798742, "percentage": 89.82, "elapsed_time": "1:43:22", "remaining_time": "0:11:43", "throughput": 3603.93, "total_tokens": 22354856}
|
| 6873 |
+
{"current_steps": 34280, "total_steps": 38160, "loss": 0.3305, "lr": 1.5589392650179114e-06, "epoch": 17.966457023060798, "percentage": 89.83, "elapsed_time": "1:43:23", "remaining_time": "0:11:42", "throughput": 3604.01, "total_tokens": 22358152}
|
| 6874 |
+
{"current_steps": 34285, "total_steps": 38160, "loss": 0.5364, "lr": 1.5549671444727437e-06, "epoch": 17.96907756813417, "percentage": 89.85, "elapsed_time": "1:43:24", "remaining_time": "0:11:41", "throughput": 3604.09, "total_tokens": 22361704}
|
| 6875 |
+
{"current_steps": 34290, "total_steps": 38160, "loss": 0.5924, "lr": 1.55099992836098e-06, "epoch": 17.971698113207548, "percentage": 89.86, "elapsed_time": "1:43:25", "remaining_time": "0:11:40", "throughput": 3604.06, "total_tokens": 22364680}
|
| 6876 |
+
{"current_steps": 34295, "total_steps": 38160, "loss": 0.4144, "lr": 1.5470376175125061e-06, "epoch": 17.97431865828092, "percentage": 89.87, "elapsed_time": "1:43:26", "remaining_time": "0:11:39", "throughput": 3604.07, "total_tokens": 22367752}
|
| 6877 |
+
{"current_steps": 34300, "total_steps": 38160, "loss": 0.454, "lr": 1.5430802127562038e-06, "epoch": 17.9769392033543, "percentage": 89.88, "elapsed_time": "1:43:27", "remaining_time": "0:11:38", "throughput": 3604.09, "total_tokens": 22370632}
|
| 6878 |
+
{"current_steps": 34305, "total_steps": 38160, "loss": 0.452, "lr": 1.5391277149199023e-06, "epoch": 17.979559748427672, "percentage": 89.9, "elapsed_time": "1:43:27", "remaining_time": "0:11:37", "throughput": 3604.02, "total_tokens": 22373224}
|
| 6879 |
+
{"current_steps": 34310, "total_steps": 38160, "loss": 0.4848, "lr": 1.5351801248304292e-06, "epoch": 17.98218029350105, "percentage": 89.91, "elapsed_time": "1:43:29", "remaining_time": "0:11:36", "throughput": 3604.15, "total_tokens": 22379816}
|
| 6880 |
+
{"current_steps": 34315, "total_steps": 38160, "loss": 0.3859, "lr": 1.5312374433135767e-06, "epoch": 17.984800838574422, "percentage": 89.92, "elapsed_time": "1:43:30", "remaining_time": "0:11:35", "throughput": 3604.17, "total_tokens": 22383112}
|
| 6881 |
+
{"current_steps": 34320, "total_steps": 38160, "loss": 0.5888, "lr": 1.5272996711941018e-06, "epoch": 17.9874213836478, "percentage": 89.94, "elapsed_time": "1:43:31", "remaining_time": "0:11:34", "throughput": 3604.23, "total_tokens": 22386472}
|
| 6882 |
+
{"current_steps": 34325, "total_steps": 38160, "loss": 0.6009, "lr": 1.5233668092957482e-06, "epoch": 17.990041928721173, "percentage": 89.95, "elapsed_time": "1:43:31", "remaining_time": "0:11:34", "throughput": 3604.16, "total_tokens": 22388968}
|
| 6883 |
+
{"current_steps": 34330, "total_steps": 38160, "loss": 0.4155, "lr": 1.5194388584412217e-06, "epoch": 17.99266247379455, "percentage": 89.96, "elapsed_time": "1:43:32", "remaining_time": "0:11:33", "throughput": 3604.16, "total_tokens": 22392008}
|
| 6884 |
+
{"current_steps": 34335, "total_steps": 38160, "loss": 0.4204, "lr": 1.5155158194522013e-06, "epoch": 17.995283018867923, "percentage": 89.98, "elapsed_time": "1:43:33", "remaining_time": "0:11:32", "throughput": 3604.13, "total_tokens": 22394984}
|
| 6885 |
+
{"current_steps": 34340, "total_steps": 38160, "loss": 0.5018, "lr": 1.5115976931493498e-06, "epoch": 17.9979035639413, "percentage": 89.99, "elapsed_time": "1:43:34", "remaining_time": "0:11:31", "throughput": 3604.16, "total_tokens": 22398376}
|
| 6886 |
+
{"current_steps": 34344, "total_steps": 38160, "eval_loss": 0.46863794326782227, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "1:43:45", "remaining_time": "0:11:31", "throughput": 3598.26, "total_tokens": 22400368}
|
| 6887 |
+
{"current_steps": 34345, "total_steps": 38160, "loss": 0.7199, "lr": 1.5076844803522922e-06, "epoch": 18.000524109014677, "percentage": 90.0, "elapsed_time": "1:43:47", "remaining_time": "0:11:31", "throughput": 3597.32, "total_tokens": 22400816}
|
| 6888 |
+
{"current_steps": 34350, "total_steps": 38160, "loss": 0.4802, "lr": 1.5037761818796236e-06, "epoch": 18.00314465408805, "percentage": 90.02, "elapsed_time": "1:43:48", "remaining_time": "0:11:30", "throughput": 3597.28, "total_tokens": 22403888}
|
| 6889 |
+
{"current_steps": 34355, "total_steps": 38160, "loss": 0.5429, "lr": 1.4998727985489235e-06, "epoch": 18.005765199161427, "percentage": 90.03, "elapsed_time": "1:43:48", "remaining_time": "0:11:29", "throughput": 3597.43, "total_tokens": 22408208}
|