Training in progress, step 38160
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +379 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb5f64cbde87f31bacfcbbcff70d85a63078dc375d015116fbce3feac7584e0d
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -7272,3 +7272,382 @@
|
|
| 7272 |
{"current_steps": 36265, "total_steps": 38160, "loss": 0.3946, "lr": 7.501114116613861e-06, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:32:46", "remaining_time": "0:04:50", "throughput": 4262.91, "total_tokens": 23730696}
|
| 7273 |
{"current_steps": 36270, "total_steps": 38160, "loss": 0.3599, "lr": 7.461702012272087e-06, "epoch": 19.00943396226415, "percentage": 95.05, "elapsed_time": "1:32:47", "remaining_time": "0:04:50", "throughput": 4262.94, "total_tokens": 23733960}
|
| 7274 |
{"current_steps": 36275, "total_steps": 38160, "loss": 0.456, "lr": 7.422392941323075e-06, "epoch": 19.012054507337528, "percentage": 95.06, "elapsed_time": "1:32:48", "remaining_time": "0:04:49", "throughput": 4262.92, "total_tokens": 23736712}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7272 |
{"current_steps": 36265, "total_steps": 38160, "loss": 0.3946, "lr": 7.501114116613861e-06, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:32:46", "remaining_time": "0:04:50", "throughput": 4262.91, "total_tokens": 23730696}
|
| 7273 |
{"current_steps": 36270, "total_steps": 38160, "loss": 0.3599, "lr": 7.461702012272087e-06, "epoch": 19.00943396226415, "percentage": 95.05, "elapsed_time": "1:32:47", "remaining_time": "0:04:50", "throughput": 4262.94, "total_tokens": 23733960}
|
| 7274 |
{"current_steps": 36275, "total_steps": 38160, "loss": 0.456, "lr": 7.422392941323075e-06, "epoch": 19.012054507337528, "percentage": 95.06, "elapsed_time": "1:32:48", "remaining_time": "0:04:49", "throughput": 4262.92, "total_tokens": 23736712}
|
| 7275 |
+
{"current_steps": 36280, "total_steps": 38160, "loss": 0.5568, "lr": 7.383186911989858e-06, "epoch": 19.0146750524109, "percentage": 95.07, "elapsed_time": "1:32:48", "remaining_time": "0:04:48", "throughput": 4262.91, "total_tokens": 23739624}
|
| 7276 |
+
{"current_steps": 36285, "total_steps": 38160, "loss": 0.3488, "lr": 7.344083932473822e-06, "epoch": 19.017295597484278, "percentage": 95.09, "elapsed_time": "1:32:49", "remaining_time": "0:04:47", "throughput": 4262.93, "total_tokens": 23742696}
|
| 7277 |
+
{"current_steps": 36290, "total_steps": 38160, "loss": 0.5117, "lr": 7.305084010954976e-06, "epoch": 19.01991614255765, "percentage": 95.1, "elapsed_time": "1:32:50", "remaining_time": "0:04:47", "throughput": 4263.01, "total_tokens": 23746472}
|
| 7278 |
+
{"current_steps": 36295, "total_steps": 38160, "loss": 0.3209, "lr": 7.26618715559152e-06, "epoch": 19.02253668763103, "percentage": 95.11, "elapsed_time": "1:32:51", "remaining_time": "0:04:46", "throughput": 4263.03, "total_tokens": 23749640}
|
| 7279 |
+
{"current_steps": 36300, "total_steps": 38160, "loss": 0.4649, "lr": 7.227393374520386e-06, "epoch": 19.0251572327044, "percentage": 95.13, "elapsed_time": "1:32:51", "remaining_time": "0:04:45", "throughput": 4263.0, "total_tokens": 23752328}
|
| 7280 |
+
{"current_steps": 36305, "total_steps": 38160, "loss": 0.4101, "lr": 7.18870267585664e-06, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "1:32:52", "remaining_time": "0:04:44", "throughput": 4263.11, "total_tokens": 23756392}
|
| 7281 |
+
{"current_steps": 36310, "total_steps": 38160, "loss": 0.4994, "lr": 7.150115067694085e-06, "epoch": 19.030398322851152, "percentage": 95.15, "elapsed_time": "1:32:53", "remaining_time": "0:04:43", "throughput": 4263.15, "total_tokens": 23759784}
|
| 7282 |
+
{"current_steps": 36315, "total_steps": 38160, "loss": 0.4266, "lr": 7.111630558104653e-06, "epoch": 19.03301886792453, "percentage": 95.17, "elapsed_time": "1:32:53", "remaining_time": "0:04:43", "throughput": 4263.13, "total_tokens": 23762408}
|
| 7283 |
+
{"current_steps": 36320, "total_steps": 38160, "loss": 0.4445, "lr": 7.073249155138961e-06, "epoch": 19.035639412997902, "percentage": 95.18, "elapsed_time": "1:32:54", "remaining_time": "0:04:42", "throughput": 4263.19, "total_tokens": 23765960}
|
| 7284 |
+
{"current_steps": 36325, "total_steps": 38160, "loss": 0.4899, "lr": 7.0349708668259736e-06, "epoch": 19.03825995807128, "percentage": 95.19, "elapsed_time": "1:32:55", "remaining_time": "0:04:41", "throughput": 4263.21, "total_tokens": 23769032}
|
| 7285 |
+
{"current_steps": 36330, "total_steps": 38160, "loss": 0.3604, "lr": 6.996795701173009e-06, "epoch": 19.040880503144653, "percentage": 95.2, "elapsed_time": "1:32:56", "remaining_time": "0:04:40", "throughput": 4263.17, "total_tokens": 23771528}
|
| 7286 |
+
{"current_steps": 36335, "total_steps": 38160, "loss": 0.5086, "lr": 6.958723666165901e-06, "epoch": 19.04350104821803, "percentage": 95.22, "elapsed_time": "1:32:56", "remaining_time": "0:04:40", "throughput": 4263.25, "total_tokens": 23775400}
|
| 7287 |
+
{"current_steps": 36340, "total_steps": 38160, "loss": 0.3599, "lr": 6.9207547697689446e-06, "epoch": 19.046121593291403, "percentage": 95.23, "elapsed_time": "1:32:57", "remaining_time": "0:04:39", "throughput": 4263.28, "total_tokens": 23778536}
|
| 7288 |
+
{"current_steps": 36345, "total_steps": 38160, "loss": 0.4171, "lr": 6.882889019924676e-06, "epoch": 19.04874213836478, "percentage": 95.24, "elapsed_time": "1:32:58", "remaining_time": "0:04:38", "throughput": 4263.32, "total_tokens": 23781832}
|
| 7289 |
+
{"current_steps": 36350, "total_steps": 38160, "loss": 0.5045, "lr": 6.84512642455426e-06, "epoch": 19.051362683438157, "percentage": 95.26, "elapsed_time": "1:32:59", "remaining_time": "0:04:37", "throughput": 4263.65, "total_tokens": 23789288}
|
| 7290 |
+
{"current_steps": 36355, "total_steps": 38160, "loss": 0.4291, "lr": 6.8074669915572095e-06, "epoch": 19.05398322851153, "percentage": 95.27, "elapsed_time": "1:33:00", "remaining_time": "0:04:37", "throughput": 4263.67, "total_tokens": 23792296}
|
| 7291 |
+
{"current_steps": 36360, "total_steps": 38160, "loss": 0.4297, "lr": 6.769910728811391e-06, "epoch": 19.056603773584907, "percentage": 95.28, "elapsed_time": "1:33:00", "remaining_time": "0:04:36", "throughput": 4263.69, "total_tokens": 23795464}
|
| 7292 |
+
{"current_steps": 36365, "total_steps": 38160, "loss": 0.5291, "lr": 6.73245764417324e-06, "epoch": 19.05922431865828, "percentage": 95.3, "elapsed_time": "1:33:01", "remaining_time": "0:04:35", "throughput": 4263.69, "total_tokens": 23798504}
|
| 7293 |
+
{"current_steps": 36370, "total_steps": 38160, "loss": 0.491, "lr": 6.695107745477435e-06, "epoch": 19.061844863731658, "percentage": 95.31, "elapsed_time": "1:33:02", "remaining_time": "0:04:34", "throughput": 4263.75, "total_tokens": 23802024}
|
| 7294 |
+
{"current_steps": 36375, "total_steps": 38160, "loss": 0.3151, "lr": 6.657861040537117e-06, "epoch": 19.06446540880503, "percentage": 95.32, "elapsed_time": "1:33:03", "remaining_time": "0:04:33", "throughput": 4263.8, "total_tokens": 23805512}
|
| 7295 |
+
{"current_steps": 36380, "total_steps": 38160, "loss": 0.3581, "lr": 6.620717537143994e-06, "epoch": 19.067085953878408, "percentage": 95.34, "elapsed_time": "1:33:03", "remaining_time": "0:04:33", "throughput": 4263.83, "total_tokens": 23808712}
|
| 7296 |
+
{"current_steps": 36385, "total_steps": 38160, "loss": 0.3813, "lr": 6.583677243067965e-06, "epoch": 19.06970649895178, "percentage": 95.35, "elapsed_time": "1:33:04", "remaining_time": "0:04:32", "throughput": 4264.04, "total_tokens": 23814280}
|
| 7297 |
+
{"current_steps": 36390, "total_steps": 38160, "loss": 0.424, "lr": 6.546740166057441e-06, "epoch": 19.072327044025158, "percentage": 95.36, "elapsed_time": "1:33:05", "remaining_time": "0:04:31", "throughput": 4264.06, "total_tokens": 23817448}
|
| 7298 |
+
{"current_steps": 36395, "total_steps": 38160, "loss": 0.5021, "lr": 6.5099063138392975e-06, "epoch": 19.07494758909853, "percentage": 95.37, "elapsed_time": "1:33:06", "remaining_time": "0:04:30", "throughput": 4264.07, "total_tokens": 23820520}
|
| 7299 |
+
{"current_steps": 36400, "total_steps": 38160, "loss": 0.3026, "lr": 6.473175694118705e-06, "epoch": 19.07756813417191, "percentage": 95.39, "elapsed_time": "1:33:07", "remaining_time": "0:04:30", "throughput": 4264.3, "total_tokens": 23826664}
|
| 7300 |
+
{"current_steps": 36405, "total_steps": 38160, "loss": 0.3529, "lr": 6.436548314579349e-06, "epoch": 19.080188679245282, "percentage": 95.4, "elapsed_time": "1:33:08", "remaining_time": "0:04:29", "throughput": 4264.35, "total_tokens": 23830120}
|
| 7301 |
+
{"current_steps": 36410, "total_steps": 38160, "loss": 0.5579, "lr": 6.400024182883158e-06, "epoch": 19.08280922431866, "percentage": 95.41, "elapsed_time": "1:33:08", "remaining_time": "0:04:28", "throughput": 4264.39, "total_tokens": 23833480}
|
| 7302 |
+
{"current_steps": 36415, "total_steps": 38160, "loss": 0.4919, "lr": 6.363603306670629e-06, "epoch": 19.085429769392032, "percentage": 95.43, "elapsed_time": "1:33:09", "remaining_time": "0:04:27", "throughput": 4264.41, "total_tokens": 23836712}
|
| 7303 |
+
{"current_steps": 36420, "total_steps": 38160, "loss": 0.5601, "lr": 6.327285693560614e-06, "epoch": 19.08805031446541, "percentage": 95.44, "elapsed_time": "1:33:10", "remaining_time": "0:04:27", "throughput": 4264.45, "total_tokens": 23840072}
|
| 7304 |
+
{"current_steps": 36425, "total_steps": 38160, "loss": 0.4627, "lr": 6.2910713511503125e-06, "epoch": 19.090670859538783, "percentage": 95.45, "elapsed_time": "1:33:11", "remaining_time": "0:04:26", "throughput": 4264.47, "total_tokens": 23843176}
|
| 7305 |
+
{"current_steps": 36430, "total_steps": 38160, "loss": 0.3909, "lr": 6.254960287015332e-06, "epoch": 19.09329140461216, "percentage": 95.47, "elapsed_time": "1:33:11", "remaining_time": "0:04:25", "throughput": 4264.48, "total_tokens": 23846344}
|
| 7306 |
+
{"current_steps": 36435, "total_steps": 38160, "loss": 0.395, "lr": 6.218952508709741e-06, "epoch": 19.095911949685533, "percentage": 95.48, "elapsed_time": "1:33:12", "remaining_time": "0:04:24", "throughput": 4264.56, "total_tokens": 23850216}
|
| 7307 |
+
{"current_steps": 36440, "total_steps": 38160, "loss": 0.5637, "lr": 6.183048023765903e-06, "epoch": 19.09853249475891, "percentage": 95.49, "elapsed_time": "1:33:13", "remaining_time": "0:04:24", "throughput": 4264.58, "total_tokens": 23853352}
|
| 7308 |
+
{"current_steps": 36445, "total_steps": 38160, "loss": 0.419, "lr": 6.147246839694698e-06, "epoch": 19.101153039832287, "percentage": 95.51, "elapsed_time": "1:33:14", "remaining_time": "0:04:23", "throughput": 4264.55, "total_tokens": 23855912}
|
| 7309 |
+
{"current_steps": 36450, "total_steps": 38160, "loss": 0.2892, "lr": 6.111548963985247e-06, "epoch": 19.10377358490566, "percentage": 95.52, "elapsed_time": "1:33:14", "remaining_time": "0:04:22", "throughput": 4264.49, "total_tokens": 23858120}
|
| 7310 |
+
{"current_steps": 36455, "total_steps": 38160, "loss": 0.4418, "lr": 6.075954404105188e-06, "epoch": 19.106394129979037, "percentage": 95.53, "elapsed_time": "1:33:15", "remaining_time": "0:04:21", "throughput": 4264.54, "total_tokens": 23861736}
|
| 7311 |
+
{"current_steps": 36460, "total_steps": 38160, "loss": 0.3188, "lr": 6.040463167500509e-06, "epoch": 19.10901467505241, "percentage": 95.55, "elapsed_time": "1:33:16", "remaining_time": "0:04:20", "throughput": 4264.64, "total_tokens": 23865864}
|
| 7312 |
+
{"current_steps": 36465, "total_steps": 38160, "loss": 0.4292, "lr": 6.005075261595494e-06, "epoch": 19.111635220125788, "percentage": 95.56, "elapsed_time": "1:33:16", "remaining_time": "0:04:20", "throughput": 4264.67, "total_tokens": 23869160}
|
| 7313 |
+
{"current_steps": 36470, "total_steps": 38160, "loss": 0.3705, "lr": 5.969790693792998e-06, "epoch": 19.11425576519916, "percentage": 95.57, "elapsed_time": "1:33:17", "remaining_time": "0:04:19", "throughput": 4264.75, "total_tokens": 23873064}
|
| 7314 |
+
{"current_steps": 36475, "total_steps": 38160, "loss": 0.4185, "lr": 5.9346094714740615e-06, "epoch": 19.116876310272538, "percentage": 95.58, "elapsed_time": "1:33:18", "remaining_time": "0:04:18", "throughput": 4264.78, "total_tokens": 23876328}
|
| 7315 |
+
{"current_steps": 36480, "total_steps": 38160, "loss": 0.4035, "lr": 5.8995316019982425e-06, "epoch": 19.11949685534591, "percentage": 95.6, "elapsed_time": "1:33:19", "remaining_time": "0:04:17", "throughput": 4264.76, "total_tokens": 23878984}
|
| 7316 |
+
{"current_steps": 36485, "total_steps": 38160, "loss": 0.2954, "lr": 5.8645570927034485e-06, "epoch": 19.122117400419288, "percentage": 95.61, "elapsed_time": "1:33:19", "remaining_time": "0:04:17", "throughput": 4264.75, "total_tokens": 23881800}
|
| 7317 |
+
{"current_steps": 36490, "total_steps": 38160, "loss": 0.4055, "lr": 5.8296859509058275e-06, "epoch": 19.12473794549266, "percentage": 95.62, "elapsed_time": "1:33:20", "remaining_time": "0:04:16", "throughput": 4264.72, "total_tokens": 23884296}
|
| 7318 |
+
{"current_steps": 36495, "total_steps": 38160, "loss": 0.6559, "lr": 5.794918183900155e-06, "epoch": 19.12735849056604, "percentage": 95.64, "elapsed_time": "1:33:21", "remaining_time": "0:04:15", "throughput": 4264.75, "total_tokens": 23887560}
|
| 7319 |
+
{"current_steps": 36500, "total_steps": 38160, "loss": 0.3457, "lr": 5.760253798959447e-06, "epoch": 19.129979035639412, "percentage": 95.65, "elapsed_time": "1:33:21", "remaining_time": "0:04:14", "throughput": 4264.75, "total_tokens": 23890408}
|
| 7320 |
+
{"current_steps": 36505, "total_steps": 38160, "loss": 0.4794, "lr": 5.725692803335015e-06, "epoch": 19.13259958071279, "percentage": 95.66, "elapsed_time": "1:33:22", "remaining_time": "0:04:13", "throughput": 4264.75, "total_tokens": 23893288}
|
| 7321 |
+
{"current_steps": 36510, "total_steps": 38160, "loss": 0.625, "lr": 5.691235204256739e-06, "epoch": 19.135220125786162, "percentage": 95.68, "elapsed_time": "1:33:23", "remaining_time": "0:04:13", "throughput": 4264.79, "total_tokens": 23897000}
|
| 7322 |
+
{"current_steps": 36515, "total_steps": 38160, "loss": 0.4295, "lr": 5.65688100893258e-06, "epoch": 19.13784067085954, "percentage": 95.69, "elapsed_time": "1:33:24", "remaining_time": "0:04:12", "throughput": 4264.83, "total_tokens": 23900392}
|
| 7323 |
+
{"current_steps": 36520, "total_steps": 38160, "loss": 0.3439, "lr": 5.622630224549174e-06, "epoch": 19.140461215932913, "percentage": 95.7, "elapsed_time": "1:33:24", "remaining_time": "0:04:11", "throughput": 4264.94, "total_tokens": 23904520}
|
| 7324 |
+
{"current_steps": 36525, "total_steps": 38160, "loss": 0.4478, "lr": 5.588482858271404e-06, "epoch": 19.14308176100629, "percentage": 95.72, "elapsed_time": "1:33:25", "remaining_time": "0:04:10", "throughput": 4264.95, "total_tokens": 23907560}
|
| 7325 |
+
{"current_steps": 36530, "total_steps": 38160, "loss": 0.482, "lr": 5.554438917242444e-06, "epoch": 19.145702306079663, "percentage": 95.73, "elapsed_time": "1:33:26", "remaining_time": "0:04:10", "throughput": 4265.06, "total_tokens": 23911784}
|
| 7326 |
+
{"current_steps": 36535, "total_steps": 38160, "loss": 0.3855, "lr": 5.520498408583985e-06, "epoch": 19.14832285115304, "percentage": 95.74, "elapsed_time": "1:33:27", "remaining_time": "0:04:09", "throughput": 4265.08, "total_tokens": 23914952}
|
| 7327 |
+
{"current_steps": 36540, "total_steps": 38160, "loss": 0.4677, "lr": 5.486661339395904e-06, "epoch": 19.150943396226417, "percentage": 95.75, "elapsed_time": "1:33:27", "remaining_time": "0:04:08", "throughput": 4265.13, "total_tokens": 23918408}
|
| 7328 |
+
{"current_steps": 36545, "total_steps": 38160, "loss": 0.4616, "lr": 5.452927716756595e-06, "epoch": 19.15356394129979, "percentage": 95.77, "elapsed_time": "1:33:28", "remaining_time": "0:04:07", "throughput": 4265.18, "total_tokens": 23921896}
|
| 7329 |
+
{"current_steps": 36550, "total_steps": 38160, "loss": 0.4296, "lr": 5.419297547722635e-06, "epoch": 19.156184486373167, "percentage": 95.78, "elapsed_time": "1:33:29", "remaining_time": "0:04:07", "throughput": 4265.24, "total_tokens": 23925512}
|
| 7330 |
+
{"current_steps": 36555, "total_steps": 38160, "loss": 0.432, "lr": 5.385770839329229e-06, "epoch": 19.15880503144654, "percentage": 95.79, "elapsed_time": "1:33:30", "remaining_time": "0:04:06", "throughput": 4265.21, "total_tokens": 23928072}
|
| 7331 |
+
{"current_steps": 36560, "total_steps": 38160, "loss": 0.3988, "lr": 5.352347598589713e-06, "epoch": 19.161425576519918, "percentage": 95.81, "elapsed_time": "1:33:30", "remaining_time": "0:04:05", "throughput": 4265.19, "total_tokens": 23930632}
|
| 7332 |
+
{"current_steps": 36565, "total_steps": 38160, "loss": 0.3938, "lr": 5.319027832495826e-06, "epoch": 19.16404612159329, "percentage": 95.82, "elapsed_time": "1:33:31", "remaining_time": "0:04:04", "throughput": 4265.26, "total_tokens": 23934408}
|
| 7333 |
+
{"current_steps": 36570, "total_steps": 38160, "loss": 0.2814, "lr": 5.285811548017661e-06, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "1:33:32", "remaining_time": "0:04:04", "throughput": 4265.3, "total_tokens": 23937768}
|
| 7334 |
+
{"current_steps": 36575, "total_steps": 38160, "loss": 0.3245, "lr": 5.252698752103713e-06, "epoch": 19.16928721174004, "percentage": 95.85, "elapsed_time": "1:33:32", "remaining_time": "0:04:03", "throughput": 4265.36, "total_tokens": 23941288}
|
| 7335 |
+
{"current_steps": 36580, "total_steps": 38160, "loss": 0.347, "lr": 5.219689451680832e-06, "epoch": 19.171907756813418, "percentage": 95.86, "elapsed_time": "1:33:33", "remaining_time": "0:04:02", "throughput": 4265.46, "total_tokens": 23945384}
|
| 7336 |
+
{"current_steps": 36585, "total_steps": 38160, "loss": 0.3566, "lr": 5.186783653654214e-06, "epoch": 19.17452830188679, "percentage": 95.87, "elapsed_time": "1:33:34", "remaining_time": "0:04:01", "throughput": 4265.49, "total_tokens": 23948680}
|
| 7337 |
+
{"current_steps": 36590, "total_steps": 38160, "loss": 0.4363, "lr": 5.153981364907245e-06, "epoch": 19.17714884696017, "percentage": 95.89, "elapsed_time": "1:33:35", "remaining_time": "0:04:00", "throughput": 4265.52, "total_tokens": 23951912}
|
| 7338 |
+
{"current_steps": 36595, "total_steps": 38160, "loss": 0.4407, "lr": 5.121282592301935e-06, "epoch": 19.179769392033542, "percentage": 95.9, "elapsed_time": "1:33:35", "remaining_time": "0:04:00", "throughput": 4265.5, "total_tokens": 23954568}
|
| 7339 |
+
{"current_steps": 36600, "total_steps": 38160, "loss": 0.5874, "lr": 5.088687342678422e-06, "epoch": 19.18238993710692, "percentage": 95.91, "elapsed_time": "1:33:36", "remaining_time": "0:03:59", "throughput": 4265.57, "total_tokens": 23958376}
|
| 7340 |
+
{"current_steps": 36605, "total_steps": 38160, "loss": 0.4996, "lr": 5.056195622855253e-06, "epoch": 19.185010482180292, "percentage": 95.93, "elapsed_time": "1:33:37", "remaining_time": "0:03:58", "throughput": 4265.61, "total_tokens": 23961768}
|
| 7341 |
+
{"current_steps": 36610, "total_steps": 38160, "loss": 0.4894, "lr": 5.023807439629324e-06, "epoch": 19.18763102725367, "percentage": 95.94, "elapsed_time": "1:33:38", "remaining_time": "0:03:57", "throughput": 4265.63, "total_tokens": 23964936}
|
| 7342 |
+
{"current_steps": 36615, "total_steps": 38160, "loss": 0.5153, "lr": 4.991522799775938e-06, "epoch": 19.190251572327043, "percentage": 95.95, "elapsed_time": "1:33:38", "remaining_time": "0:03:57", "throughput": 4265.64, "total_tokens": 23967976}
|
| 7343 |
+
{"current_steps": 36620, "total_steps": 38160, "loss": 0.4312, "lr": 4.9593417100485816e-06, "epoch": 19.19287211740042, "percentage": 95.96, "elapsed_time": "1:33:39", "remaining_time": "0:03:56", "throughput": 4265.65, "total_tokens": 23970984}
|
| 7344 |
+
{"current_steps": 36625, "total_steps": 38160, "loss": 0.3569, "lr": 4.927264177179258e-06, "epoch": 19.195492662473793, "percentage": 95.98, "elapsed_time": "1:33:40", "remaining_time": "0:03:55", "throughput": 4265.77, "total_tokens": 23975400}
|
| 7345 |
+
{"current_steps": 36630, "total_steps": 38160, "loss": 0.5337, "lr": 4.895290207878156e-06, "epoch": 19.19811320754717, "percentage": 95.99, "elapsed_time": "1:33:41", "remaining_time": "0:03:54", "throughput": 4265.78, "total_tokens": 23978504}
|
| 7346 |
+
{"current_steps": 36635, "total_steps": 38160, "loss": 0.3495, "lr": 4.863419808833924e-06, "epoch": 19.200733752620547, "percentage": 96.0, "elapsed_time": "1:33:41", "remaining_time": "0:03:54", "throughput": 4265.84, "total_tokens": 23981992}
|
| 7347 |
+
{"current_steps": 36640, "total_steps": 38160, "loss": 0.4149, "lr": 4.831652986713453e-06, "epoch": 19.20335429769392, "percentage": 96.02, "elapsed_time": "1:33:42", "remaining_time": "0:03:53", "throughput": 4265.88, "total_tokens": 23985416}
|
| 7348 |
+
{"current_steps": 36645, "total_steps": 38160, "loss": 0.4525, "lr": 4.799989748161926e-06, "epoch": 19.205974842767297, "percentage": 96.03, "elapsed_time": "1:33:43", "remaining_time": "0:03:52", "throughput": 4265.89, "total_tokens": 23988328}
|
| 7349 |
+
{"current_steps": 36650, "total_steps": 38160, "loss": 0.5068, "lr": 4.768430099803101e-06, "epoch": 19.20859538784067, "percentage": 96.04, "elapsed_time": "1:33:44", "remaining_time": "0:03:51", "throughput": 4265.91, "total_tokens": 23991464}
|
| 7350 |
+
{"current_steps": 36655, "total_steps": 38160, "loss": 0.4332, "lr": 4.736974048238696e-06, "epoch": 19.211215932914047, "percentage": 96.06, "elapsed_time": "1:33:44", "remaining_time": "0:03:50", "throughput": 4265.93, "total_tokens": 23994536}
|
| 7351 |
+
{"current_steps": 36660, "total_steps": 38160, "loss": 0.3574, "lr": 4.705621600049115e-06, "epoch": 19.21383647798742, "percentage": 96.07, "elapsed_time": "1:33:45", "remaining_time": "0:03:50", "throughput": 4266.01, "total_tokens": 23998472}
|
| 7352 |
+
{"current_steps": 36665, "total_steps": 38160, "loss": 0.426, "lr": 4.674372761792889e-06, "epoch": 19.216457023060798, "percentage": 96.08, "elapsed_time": "1:33:46", "remaining_time": "0:03:49", "throughput": 4266.02, "total_tokens": 24001384}
|
| 7353 |
+
{"current_steps": 36670, "total_steps": 38160, "loss": 0.5026, "lr": 4.6432275400069e-06, "epoch": 19.21907756813417, "percentage": 96.1, "elapsed_time": "1:33:46", "remaining_time": "0:03:48", "throughput": 4266.05, "total_tokens": 24004712}
|
| 7354 |
+
{"current_steps": 36675, "total_steps": 38160, "loss": 0.4015, "lr": 4.6121859412063264e-06, "epoch": 19.221698113207548, "percentage": 96.11, "elapsed_time": "1:33:47", "remaining_time": "0:03:47", "throughput": 4266.01, "total_tokens": 24007144}
|
| 7355 |
+
{"current_steps": 36680, "total_steps": 38160, "loss": 0.4105, "lr": 4.581247971884861e-06, "epoch": 19.22431865828092, "percentage": 96.12, "elapsed_time": "1:33:48", "remaining_time": "0:03:47", "throughput": 4266.02, "total_tokens": 24010056}
|
| 7356 |
+
{"current_steps": 36685, "total_steps": 38160, "loss": 0.3935, "lr": 4.550413638514217e-06, "epoch": 19.2269392033543, "percentage": 96.13, "elapsed_time": "1:33:48", "remaining_time": "0:03:46", "throughput": 4266.05, "total_tokens": 24013352}
|
| 7357 |
+
{"current_steps": 36690, "total_steps": 38160, "loss": 0.4173, "lr": 4.519682947544679e-06, "epoch": 19.229559748427672, "percentage": 96.15, "elapsed_time": "1:33:49", "remaining_time": "0:03:45", "throughput": 4266.07, "total_tokens": 24016488}
|
| 7358 |
+
{"current_steps": 36695, "total_steps": 38160, "loss": 0.5951, "lr": 4.489055905404715e-06, "epoch": 19.23218029350105, "percentage": 96.16, "elapsed_time": "1:33:50", "remaining_time": "0:03:44", "throughput": 4266.11, "total_tokens": 24020008}
|
| 7359 |
+
{"current_steps": 36700, "total_steps": 38160, "loss": 0.4567, "lr": 4.4585325185012014e-06, "epoch": 19.234800838574422, "percentage": 96.17, "elapsed_time": "1:33:51", "remaining_time": "0:03:44", "throughput": 4266.08, "total_tokens": 24022472}
|
| 7360 |
+
{"current_steps": 36705, "total_steps": 38160, "loss": 0.5066, "lr": 4.428112793219197e-06, "epoch": 19.2374213836478, "percentage": 96.19, "elapsed_time": "1:33:51", "remaining_time": "0:03:43", "throughput": 4266.2, "total_tokens": 24026856}
|
| 7361 |
+
{"current_steps": 36710, "total_steps": 38160, "loss": 0.4603, "lr": 4.397796735922277e-06, "epoch": 19.240041928721173, "percentage": 96.2, "elapsed_time": "1:33:52", "remaining_time": "0:03:42", "throughput": 4266.25, "total_tokens": 24030376}
|
| 7362 |
+
{"current_steps": 36715, "total_steps": 38160, "loss": 0.394, "lr": 4.367584352952092e-06, "epoch": 19.24266247379455, "percentage": 96.21, "elapsed_time": "1:33:53", "remaining_time": "0:03:41", "throughput": 4266.26, "total_tokens": 24033384}
|
| 7363 |
+
{"current_steps": 36720, "total_steps": 38160, "loss": 0.3476, "lr": 4.337475650628808e-06, "epoch": 19.245283018867923, "percentage": 96.23, "elapsed_time": "1:33:54", "remaining_time": "0:03:40", "throughput": 4266.47, "total_tokens": 24039144}
|
| 7364 |
+
{"current_steps": 36725, "total_steps": 38160, "loss": 0.3537, "lr": 4.307470635250832e-06, "epoch": 19.2479035639413, "percentage": 96.24, "elapsed_time": "1:33:55", "remaining_time": "0:03:40", "throughput": 4266.56, "total_tokens": 24043112}
|
| 7365 |
+
{"current_steps": 36730, "total_steps": 38160, "loss": 0.5064, "lr": 4.277569313094809e-06, "epoch": 19.250524109014677, "percentage": 96.25, "elapsed_time": "1:33:56", "remaining_time": "0:03:39", "throughput": 4266.63, "total_tokens": 24046824}
|
| 7366 |
+
{"current_steps": 36735, "total_steps": 38160, "loss": 0.4852, "lr": 4.247771690415791e-06, "epoch": 19.25314465408805, "percentage": 96.27, "elapsed_time": "1:33:56", "remaining_time": "0:03:38", "throughput": 4266.62, "total_tokens": 24049576}
|
| 7367 |
+
{"current_steps": 36740, "total_steps": 38160, "loss": 0.5094, "lr": 4.218077773447071e-06, "epoch": 19.255765199161427, "percentage": 96.28, "elapsed_time": "1:33:57", "remaining_time": "0:03:37", "throughput": 4266.62, "total_tokens": 24052456}
|
| 7368 |
+
{"current_steps": 36745, "total_steps": 38160, "loss": 0.334, "lr": 4.1884875684003455e-06, "epoch": 19.2583857442348, "percentage": 96.29, "elapsed_time": "1:33:58", "remaining_time": "0:03:37", "throughput": 4266.61, "total_tokens": 24055272}
|
| 7369 |
+
{"current_steps": 36750, "total_steps": 38160, "loss": 0.379, "lr": 4.159001081465497e-06, "epoch": 19.261006289308177, "percentage": 96.31, "elapsed_time": "1:33:58", "remaining_time": "0:03:36", "throughput": 4266.65, "total_tokens": 24058728}
|
| 7370 |
+
{"current_steps": 36755, "total_steps": 38160, "loss": 0.4646, "lr": 4.129618318810702e-06, "epoch": 19.26362683438155, "percentage": 96.32, "elapsed_time": "1:33:59", "remaining_time": "0:03:35", "throughput": 4266.66, "total_tokens": 24061896}
|
| 7371 |
+
{"current_steps": 36760, "total_steps": 38160, "loss": 0.5274, "lr": 4.100339286582655e-06, "epoch": 19.266247379454928, "percentage": 96.33, "elapsed_time": "1:34:00", "remaining_time": "0:03:34", "throughput": 4266.67, "total_tokens": 24065000}
|
| 7372 |
+
{"current_steps": 36765, "total_steps": 38160, "loss": 0.4222, "lr": 4.071163990906068e-06, "epoch": 19.2688679245283, "percentage": 96.34, "elapsed_time": "1:34:00", "remaining_time": "0:03:34", "throughput": 4266.71, "total_tokens": 24068264}
|
| 7373 |
+
{"current_steps": 36770, "total_steps": 38160, "loss": 0.4088, "lr": 4.042092437884115e-06, "epoch": 19.271488469601678, "percentage": 96.36, "elapsed_time": "1:34:01", "remaining_time": "0:03:33", "throughput": 4266.79, "total_tokens": 24072008}
|
| 7374 |
+
{"current_steps": 36775, "total_steps": 38160, "loss": 0.45, "lr": 4.013124633598264e-06, "epoch": 19.27410901467505, "percentage": 96.37, "elapsed_time": "1:34:02", "remaining_time": "0:03:32", "throughput": 4266.85, "total_tokens": 24075752}
|
| 7375 |
+
{"current_steps": 36780, "total_steps": 38160, "loss": 0.3928, "lr": 3.984260584108168e-06, "epoch": 19.27672955974843, "percentage": 96.38, "elapsed_time": "1:34:03", "remaining_time": "0:03:31", "throughput": 4266.93, "total_tokens": 24079528}
|
| 7376 |
+
{"current_steps": 36785, "total_steps": 38160, "loss": 0.4117, "lr": 3.955500295451942e-06, "epoch": 19.279350104821802, "percentage": 96.4, "elapsed_time": "1:34:04", "remaining_time": "0:03:30", "throughput": 4267.18, "total_tokens": 24085544}
|
| 7377 |
+
{"current_steps": 36790, "total_steps": 38160, "loss": 0.4485, "lr": 3.926843773645883e-06, "epoch": 19.28197064989518, "percentage": 96.41, "elapsed_time": "1:34:05", "remaining_time": "0:03:30", "throughput": 4267.25, "total_tokens": 24089480}
|
| 7378 |
+
{"current_steps": 36795, "total_steps": 38160, "loss": 0.3593, "lr": 3.8982910246846415e-06, "epoch": 19.284591194968552, "percentage": 96.42, "elapsed_time": "1:34:05", "remaining_time": "0:03:29", "throughput": 4267.31, "total_tokens": 24092968}
|
| 7379 |
+
{"current_steps": 36800, "total_steps": 38160, "loss": 0.3343, "lr": 3.869842054541051e-06, "epoch": 19.28721174004193, "percentage": 96.44, "elapsed_time": "1:34:06", "remaining_time": "0:03:28", "throughput": 4267.38, "total_tokens": 24096680}
|
| 7380 |
+
{"current_steps": 36805, "total_steps": 38160, "loss": 0.4174, "lr": 3.84149686916635e-06, "epoch": 19.289832285115303, "percentage": 96.45, "elapsed_time": "1:34:07", "remaining_time": "0:03:27", "throughput": 4267.46, "total_tokens": 24100456}
|
| 7381 |
+
{"current_steps": 36810, "total_steps": 38160, "loss": 0.4574, "lr": 3.8132554744900183e-06, "epoch": 19.29245283018868, "percentage": 96.46, "elapsed_time": "1:34:08", "remaining_time": "0:03:27", "throughput": 4267.49, "total_tokens": 24103816}
|
| 7382 |
+
{"current_steps": 36815, "total_steps": 38160, "loss": 0.4151, "lr": 3.7851178764198302e-06, "epoch": 19.295073375262053, "percentage": 96.48, "elapsed_time": "1:34:09", "remaining_time": "0:03:26", "throughput": 4267.58, "total_tokens": 24107752}
|
| 7383 |
+
{"current_steps": 36820, "total_steps": 38160, "loss": 0.4456, "lr": 3.7570840808419104e-06, "epoch": 19.29769392033543, "percentage": 96.49, "elapsed_time": "1:34:09", "remaining_time": "0:03:25", "throughput": 4267.59, "total_tokens": 24110856}
|
| 7384 |
+
{"current_steps": 36825, "total_steps": 38160, "loss": 0.559, "lr": 3.729154093620568e-06, "epoch": 19.300314465408803, "percentage": 96.5, "elapsed_time": "1:34:10", "remaining_time": "0:03:24", "throughput": 4267.6, "total_tokens": 24113896}
|
| 7385 |
+
{"current_steps": 36830, "total_steps": 38160, "loss": 0.4685, "lr": 3.7013279205984073e-06, "epoch": 19.30293501048218, "percentage": 96.51, "elapsed_time": "1:34:11", "remaining_time": "0:03:24", "throughput": 4267.61, "total_tokens": 24116872}
|
| 7386 |
+
{"current_steps": 36835, "total_steps": 38160, "loss": 0.3459, "lr": 3.6736055675963275e-06, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "1:34:11", "remaining_time": "0:03:23", "throughput": 4267.63, "total_tokens": 24120072}
|
| 7387 |
+
{"current_steps": 36840, "total_steps": 38160, "loss": 0.3513, "lr": 3.645987040413634e-06, "epoch": 19.30817610062893, "percentage": 96.54, "elapsed_time": "1:34:12", "remaining_time": "0:03:22", "throughput": 4267.73, "total_tokens": 24124232}
|
| 7388 |
+
{"current_steps": 36845, "total_steps": 38160, "loss": 0.3954, "lr": 3.6184723448277056e-06, "epoch": 19.310796645702307, "percentage": 96.55, "elapsed_time": "1:34:13", "remaining_time": "0:03:21", "throughput": 4267.77, "total_tokens": 24127720}
|
| 7389 |
+
{"current_steps": 36850, "total_steps": 38160, "loss": 0.4764, "lr": 3.5910614865943826e-06, "epoch": 19.31341719077568, "percentage": 96.57, "elapsed_time": "1:34:14", "remaining_time": "0:03:21", "throughput": 4267.85, "total_tokens": 24131624}
|
| 7390 |
+
{"current_steps": 36855, "total_steps": 38160, "loss": 0.3507, "lr": 3.563754471447689e-06, "epoch": 19.316037735849058, "percentage": 96.58, "elapsed_time": "1:34:14", "remaining_time": "0:03:20", "throughput": 4267.85, "total_tokens": 24134568}
|
| 7391 |
+
{"current_steps": 36860, "total_steps": 38160, "loss": 0.3472, "lr": 3.5365513050998334e-06, "epoch": 19.31865828092243, "percentage": 96.59, "elapsed_time": "1:34:15", "remaining_time": "0:03:19", "throughput": 4267.87, "total_tokens": 24137768}
|
| 7392 |
+
{"current_steps": 36865, "total_steps": 38160, "loss": 0.4402, "lr": 3.5094519932415414e-06, "epoch": 19.321278825995808, "percentage": 96.61, "elapsed_time": "1:34:16", "remaining_time": "0:03:18", "throughput": 4268.0, "total_tokens": 24142184}
|
| 7393 |
+
{"current_steps": 36870, "total_steps": 38160, "loss": 0.4072, "lr": 3.4824565415416123e-06, "epoch": 19.32389937106918, "percentage": 96.62, "elapsed_time": "1:34:17", "remaining_time": "0:03:17", "throughput": 4268.01, "total_tokens": 24145192}
|
| 7394 |
+
{"current_steps": 36875, "total_steps": 38160, "loss": 0.4438, "lr": 3.4555649556471946e-06, "epoch": 19.32651991614256, "percentage": 96.63, "elapsed_time": "1:34:17", "remaining_time": "0:03:17", "throughput": 4268.06, "total_tokens": 24148648}
|
| 7395 |
+
{"current_steps": 36880, "total_steps": 38160, "loss": 0.3228, "lr": 3.4287772411837338e-06, "epoch": 19.329140461215932, "percentage": 96.65, "elapsed_time": "1:34:18", "remaining_time": "0:03:16", "throughput": 4268.14, "total_tokens": 24152616}
|
| 7396 |
+
{"current_steps": 36885, "total_steps": 38160, "loss": 0.4953, "lr": 3.402093403754858e-06, "epoch": 19.33176100628931, "percentage": 96.66, "elapsed_time": "1:34:19", "remaining_time": "0:03:15", "throughput": 4268.13, "total_tokens": 24155272}
|
| 7397 |
+
{"current_steps": 36890, "total_steps": 38160, "loss": 0.4446, "lr": 3.375513448942602e-06, "epoch": 19.334381551362682, "percentage": 96.67, "elapsed_time": "1:34:20", "remaining_time": "0:03:14", "throughput": 4268.15, "total_tokens": 24158536}
|
| 7398 |
+
{"current_steps": 36895, "total_steps": 38160, "loss": 0.4675, "lr": 3.349037382307074e-06, "epoch": 19.33700209643606, "percentage": 96.69, "elapsed_time": "1:34:21", "remaining_time": "0:03:14", "throughput": 4268.29, "total_tokens": 24163144}
|
| 7399 |
+
{"current_steps": 36900, "total_steps": 38160, "loss": 0.385, "lr": 3.322665209386899e-06, "epoch": 19.339622641509433, "percentage": 96.7, "elapsed_time": "1:34:21", "remaining_time": "0:03:13", "throughput": 4268.28, "total_tokens": 24165864}
|
| 7400 |
+
{"current_steps": 36905, "total_steps": 38160, "loss": 0.4168, "lr": 3.29639693569872e-06, "epoch": 19.34224318658281, "percentage": 96.71, "elapsed_time": "1:34:22", "remaining_time": "0:03:12", "throughput": 4268.31, "total_tokens": 24169032}
|
| 7401 |
+
{"current_steps": 36910, "total_steps": 38160, "loss": 0.482, "lr": 3.270232566737641e-06, "epoch": 19.344863731656183, "percentage": 96.72, "elapsed_time": "1:34:23", "remaining_time": "0:03:11", "throughput": 4268.28, "total_tokens": 24171592}
|
| 7402 |
+
{"current_steps": 36915, "total_steps": 38160, "loss": 0.457, "lr": 3.244172107976895e-06, "epoch": 19.34748427672956, "percentage": 96.74, "elapsed_time": "1:34:23", "remaining_time": "0:03:11", "throughput": 4268.3, "total_tokens": 24174824}
|
| 7403 |
+
{"current_steps": 36920, "total_steps": 38160, "loss": 0.5649, "lr": 3.2182155648680657e-06, "epoch": 19.350104821802937, "percentage": 96.75, "elapsed_time": "1:34:24", "remaining_time": "0:03:10", "throughput": 4268.31, "total_tokens": 24177864}
|
| 7404 |
+
{"current_steps": 36925, "total_steps": 38160, "loss": 0.408, "lr": 3.1923629428409205e-06, "epoch": 19.35272536687631, "percentage": 96.76, "elapsed_time": "1:34:25", "remaining_time": "0:03:09", "throughput": 4268.36, "total_tokens": 24181288}
|
| 7405 |
+
{"current_steps": 36930, "total_steps": 38160, "loss": 0.567, "lr": 3.166614247303634e-06, "epoch": 19.355345911949687, "percentage": 96.78, "elapsed_time": "1:34:25", "remaining_time": "0:03:08", "throughput": 4268.4, "total_tokens": 24184680}
|
| 7406 |
+
{"current_steps": 36935, "total_steps": 38160, "loss": 0.3087, "lr": 3.140969483642453e-06, "epoch": 19.35796645702306, "percentage": 96.79, "elapsed_time": "1:34:26", "remaining_time": "0:03:07", "throughput": 4268.44, "total_tokens": 24188072}
|
| 7407 |
+
{"current_steps": 36940, "total_steps": 38160, "loss": 0.389, "lr": 3.1154286572219747e-06, "epoch": 19.360587002096437, "percentage": 96.8, "elapsed_time": "1:34:27", "remaining_time": "0:03:07", "throughput": 4268.42, "total_tokens": 24190664}
|
| 7408 |
+
{"current_steps": 36945, "total_steps": 38160, "loss": 0.235, "lr": 3.089991773385037e-06, "epoch": 19.36320754716981, "percentage": 96.82, "elapsed_time": "1:34:28", "remaining_time": "0:03:06", "throughput": 4268.45, "total_tokens": 24193832}
|
| 7409 |
+
{"current_steps": 36950, "total_steps": 38160, "loss": 0.4542, "lr": 3.064658837452772e-06, "epoch": 19.365828092243188, "percentage": 96.83, "elapsed_time": "1:34:28", "remaining_time": "0:03:05", "throughput": 4268.49, "total_tokens": 24197256}
|
| 7410 |
+
{"current_steps": 36955, "total_steps": 38160, "loss": 0.3604, "lr": 3.0394298547246068e-06, "epoch": 19.36844863731656, "percentage": 96.84, "elapsed_time": "1:34:29", "remaining_time": "0:03:04", "throughput": 4268.47, "total_tokens": 24199816}
|
| 7411 |
+
{"current_steps": 36960, "total_steps": 38160, "loss": 0.51, "lr": 3.0143048304779875e-06, "epoch": 19.371069182389938, "percentage": 96.86, "elapsed_time": "1:34:30", "remaining_time": "0:03:04", "throughput": 4268.52, "total_tokens": 24203272}
|
| 7412 |
+
{"current_steps": 36965, "total_steps": 38160, "loss": 0.3529, "lr": 2.989283769968987e-06, "epoch": 19.37368972746331, "percentage": 96.87, "elapsed_time": "1:34:30", "remaining_time": "0:03:03", "throughput": 4268.56, "total_tokens": 24206728}
|
| 7413 |
+
{"current_steps": 36970, "total_steps": 38160, "loss": 0.3437, "lr": 2.964366678431585e-06, "epoch": 19.37631027253669, "percentage": 96.88, "elapsed_time": "1:34:31", "remaining_time": "0:03:02", "throughput": 4268.55, "total_tokens": 24209416}
|
| 7414 |
+
{"current_steps": 36975, "total_steps": 38160, "loss": 0.4875, "lr": 2.9395535610781678e-06, "epoch": 19.378930817610062, "percentage": 96.89, "elapsed_time": "1:34:32", "remaining_time": "0:03:01", "throughput": 4268.57, "total_tokens": 24212456}
|
| 7415 |
+
{"current_steps": 36980, "total_steps": 38160, "loss": 0.4403, "lr": 2.9148444230994166e-06, "epoch": 19.38155136268344, "percentage": 96.91, "elapsed_time": "1:34:32", "remaining_time": "0:03:01", "throughput": 4268.52, "total_tokens": 24214856}
|
| 7416 |
+
{"current_steps": 36985, "total_steps": 38160, "loss": 0.385, "lr": 2.890239269664141e-06, "epoch": 19.384171907756812, "percentage": 96.92, "elapsed_time": "1:34:33", "remaining_time": "0:03:00", "throughput": 4268.55, "total_tokens": 24218024}
|
| 7417 |
+
{"current_steps": 36990, "total_steps": 38160, "loss": 0.4213, "lr": 2.8657381059194466e-06, "epoch": 19.38679245283019, "percentage": 96.93, "elapsed_time": "1:34:34", "remaining_time": "0:02:59", "throughput": 4268.54, "total_tokens": 24220808}
|
| 7418 |
+
{"current_steps": 36995, "total_steps": 38160, "loss": 0.5437, "lr": 2.8413409369907885e-06, "epoch": 19.389412997903563, "percentage": 96.95, "elapsed_time": "1:34:34", "remaining_time": "0:02:58", "throughput": 4268.54, "total_tokens": 24223816}
|
| 7419 |
+
{"current_steps": 37000, "total_steps": 38160, "loss": 0.4089, "lr": 2.817047767981695e-06, "epoch": 19.39203354297694, "percentage": 96.96, "elapsed_time": "1:34:35", "remaining_time": "0:02:57", "throughput": 4268.61, "total_tokens": 24227464}
|
| 7420 |
+
{"current_steps": 37005, "total_steps": 38160, "loss": 0.488, "lr": 2.7928586039740466e-06, "epoch": 19.394654088050313, "percentage": 96.97, "elapsed_time": "1:34:36", "remaining_time": "0:02:57", "throughput": 4268.61, "total_tokens": 24230376}
|
| 7421 |
+
{"current_steps": 37010, "total_steps": 38160, "loss": 0.3644, "lr": 2.7687734500279615e-06, "epoch": 19.39727463312369, "percentage": 96.99, "elapsed_time": "1:34:37", "remaining_time": "0:02:56", "throughput": 4268.57, "total_tokens": 24232744}
|
| 7422 |
+
{"current_steps": 37015, "total_steps": 38160, "loss": 0.4025, "lr": 2.744792311181743e-06, "epoch": 19.399895178197063, "percentage": 97.0, "elapsed_time": "1:34:37", "remaining_time": "0:02:55", "throughput": 4268.58, "total_tokens": 24235848}
|
| 7423 |
+
{"current_steps": 37020, "total_steps": 38160, "loss": 0.4432, "lr": 2.720915192451989e-06, "epoch": 19.40251572327044, "percentage": 97.01, "elapsed_time": "1:34:38", "remaining_time": "0:02:54", "throughput": 4268.67, "total_tokens": 24239912}
|
| 7424 |
+
{"current_steps": 37025, "total_steps": 38160, "loss": 0.6125, "lr": 2.697142098833538e-06, "epoch": 19.405136268343817, "percentage": 97.03, "elapsed_time": "1:34:39", "remaining_time": "0:02:54", "throughput": 4268.75, "total_tokens": 24243784}
|
| 7425 |
+
{"current_steps": 37030, "total_steps": 38160, "loss": 0.3584, "lr": 2.6734730352993563e-06, "epoch": 19.40775681341719, "percentage": 97.04, "elapsed_time": "1:34:40", "remaining_time": "0:02:53", "throughput": 4268.76, "total_tokens": 24246792}
|
| 7426 |
+
{"current_steps": 37035, "total_steps": 38160, "loss": 0.3658, "lr": 2.649908006800872e-06, "epoch": 19.410377358490567, "percentage": 97.05, "elapsed_time": "1:34:40", "remaining_time": "0:02:52", "throughput": 4268.76, "total_tokens": 24249704}
|
| 7427 |
+
{"current_steps": 37040, "total_steps": 38160, "loss": 0.4016, "lr": 2.626447018267586e-06, "epoch": 19.41299790356394, "percentage": 97.06, "elapsed_time": "1:34:41", "remaining_time": "0:02:51", "throughput": 4268.76, "total_tokens": 24252616}
|
| 7428 |
+
{"current_steps": 37045, "total_steps": 38160, "loss": 0.3515, "lr": 2.603090074607184e-06, "epoch": 19.415618448637318, "percentage": 97.08, "elapsed_time": "1:34:42", "remaining_time": "0:02:51", "throughput": 4268.82, "total_tokens": 24256136}
|
| 7429 |
+
{"current_steps": 37050, "total_steps": 38160, "loss": 0.438, "lr": 2.579837180705813e-06, "epoch": 19.41823899371069, "percentage": 97.09, "elapsed_time": "1:34:42", "remaining_time": "0:02:50", "throughput": 4268.85, "total_tokens": 24259400}
|
| 7430 |
+
{"current_steps": 37055, "total_steps": 38160, "loss": 0.3684, "lr": 2.556688341427582e-06, "epoch": 19.420859538784068, "percentage": 97.1, "elapsed_time": "1:34:43", "remaining_time": "0:02:49", "throughput": 4268.99, "total_tokens": 24263880}
|
| 7431 |
+
{"current_steps": 37060, "total_steps": 38160, "loss": 0.4759, "lr": 2.5336435616150066e-06, "epoch": 19.42348008385744, "percentage": 97.12, "elapsed_time": "1:34:44", "remaining_time": "0:02:48", "throughput": 4269.0, "total_tokens": 24266888}
|
| 7432 |
+
{"current_steps": 37065, "total_steps": 38160, "loss": 0.4622, "lr": 2.510702846088786e-06, "epoch": 19.42610062893082, "percentage": 97.13, "elapsed_time": "1:34:45", "remaining_time": "0:02:47", "throughput": 4269.01, "total_tokens": 24269768}
|
| 7433 |
+
{"current_steps": 37070, "total_steps": 38160, "loss": 0.3513, "lr": 2.487866199647915e-06, "epoch": 19.428721174004192, "percentage": 97.14, "elapsed_time": "1:34:45", "remaining_time": "0:02:47", "throughput": 4269.02, "total_tokens": 24272808}
|
| 7434 |
+
{"current_steps": 37075, "total_steps": 38160, "loss": 0.3723, "lr": 2.4651336270695156e-06, "epoch": 19.43134171907757, "percentage": 97.16, "elapsed_time": "1:34:46", "remaining_time": "0:02:46", "throughput": 4269.01, "total_tokens": 24275592}
|
| 7435 |
+
{"current_steps": 37080, "total_steps": 38160, "loss": 0.4447, "lr": 2.442505133108952e-06, "epoch": 19.433962264150942, "percentage": 97.17, "elapsed_time": "1:34:47", "remaining_time": "0:02:45", "throughput": 4269.06, "total_tokens": 24278984}
|
| 7436 |
+
{"current_steps": 37085, "total_steps": 38160, "loss": 0.5551, "lr": 2.419980722499937e-06, "epoch": 19.43658280922432, "percentage": 97.18, "elapsed_time": "1:34:47", "remaining_time": "0:02:44", "throughput": 4269.09, "total_tokens": 24282248}
|
| 7437 |
+
{"current_steps": 37090, "total_steps": 38160, "loss": 0.3893, "lr": 2.397560399954202e-06, "epoch": 19.439203354297693, "percentage": 97.2, "elapsed_time": "1:34:48", "remaining_time": "0:02:44", "throughput": 4269.15, "total_tokens": 24285960}
|
| 7438 |
+
{"current_steps": 37095, "total_steps": 38160, "loss": 0.3355, "lr": 2.3752441701618833e-06, "epoch": 19.44182389937107, "percentage": 97.21, "elapsed_time": "1:34:49", "remaining_time": "0:02:43", "throughput": 4269.11, "total_tokens": 24288392}
|
| 7439 |
+
{"current_steps": 37100, "total_steps": 38160, "loss": 0.3952, "lr": 2.3530320377913027e-06, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "1:34:50", "remaining_time": "0:02:42", "throughput": 4269.16, "total_tokens": 24291880}
|
| 7440 |
+
{"current_steps": 37105, "total_steps": 38160, "loss": 0.3708, "lr": 2.3309240074890213e-06, "epoch": 19.44706498951782, "percentage": 97.24, "elapsed_time": "1:34:50", "remaining_time": "0:02:41", "throughput": 4269.19, "total_tokens": 24295112}
|
| 7441 |
+
{"current_steps": 37110, "total_steps": 38160, "loss": 0.4273, "lr": 2.3089200838796176e-06, "epoch": 19.449685534591197, "percentage": 97.25, "elapsed_time": "1:34:51", "remaining_time": "0:02:41", "throughput": 4269.21, "total_tokens": 24298152}
|
| 7442 |
+
{"current_steps": 37115, "total_steps": 38160, "loss": 0.3506, "lr": 2.2870202715662426e-06, "epoch": 19.45230607966457, "percentage": 97.26, "elapsed_time": "1:34:52", "remaining_time": "0:02:40", "throughput": 4269.29, "total_tokens": 24302024}
|
| 7443 |
+
{"current_steps": 37120, "total_steps": 38160, "loss": 0.4843, "lr": 2.265224575130009e-06, "epoch": 19.454926624737947, "percentage": 97.27, "elapsed_time": "1:34:53", "remaining_time": "0:02:39", "throughput": 4269.33, "total_tokens": 24305480}
|
| 7444 |
+
{"current_steps": 37125, "total_steps": 38160, "loss": 0.3979, "lr": 2.2435329991303268e-06, "epoch": 19.45754716981132, "percentage": 97.29, "elapsed_time": "1:34:53", "remaining_time": "0:02:38", "throughput": 4269.44, "total_tokens": 24309800}
|
| 7445 |
+
{"current_steps": 37130, "total_steps": 38160, "loss": 0.4346, "lr": 2.2219455481047868e-06, "epoch": 19.460167714884697, "percentage": 97.3, "elapsed_time": "1:34:54", "remaining_time": "0:02:37", "throughput": 4269.47, "total_tokens": 24313096}
|
| 7446 |
+
{"current_steps": 37135, "total_steps": 38160, "loss": 0.513, "lr": 2.2004622265693886e-06, "epoch": 19.46278825995807, "percentage": 97.31, "elapsed_time": "1:34:55", "remaining_time": "0:02:37", "throughput": 4269.46, "total_tokens": 24315784}
|
| 7447 |
+
{"current_steps": 37140, "total_steps": 38160, "loss": 0.4727, "lr": 2.179083039018037e-06, "epoch": 19.465408805031448, "percentage": 97.33, "elapsed_time": "1:34:55", "remaining_time": "0:02:36", "throughput": 4269.44, "total_tokens": 24318440}
|
| 7448 |
+
{"current_steps": 37145, "total_steps": 38160, "loss": 0.5015, "lr": 2.157807989923044e-06, "epoch": 19.46802935010482, "percentage": 97.34, "elapsed_time": "1:34:56", "remaining_time": "0:02:35", "throughput": 4269.46, "total_tokens": 24321544}
|
| 7449 |
+
{"current_steps": 37150, "total_steps": 38160, "loss": 0.4193, "lr": 2.1366370837349603e-06, "epoch": 19.470649895178198, "percentage": 97.35, "elapsed_time": "1:34:57", "remaining_time": "0:02:34", "throughput": 4269.52, "total_tokens": 24325192}
|
| 7450 |
+
{"current_steps": 37155, "total_steps": 38160, "loss": 0.4596, "lr": 2.1155703248825207e-06, "epoch": 19.47327044025157, "percentage": 97.37, "elapsed_time": "1:34:58", "remaining_time": "0:02:34", "throughput": 4269.52, "total_tokens": 24328072}
|
| 7451 |
+
{"current_steps": 37160, "total_steps": 38160, "loss": 0.2957, "lr": 2.094607717772534e-06, "epoch": 19.47589098532495, "percentage": 97.38, "elapsed_time": "1:34:58", "remaining_time": "0:02:33", "throughput": 4269.54, "total_tokens": 24331208}
|
| 7452 |
+
{"current_steps": 37165, "total_steps": 38160, "loss": 0.3368, "lr": 2.0737492667902702e-06, "epoch": 19.478511530398322, "percentage": 97.39, "elapsed_time": "1:34:59", "remaining_time": "0:02:32", "throughput": 4269.57, "total_tokens": 24334600}
|
| 7453 |
+
{"current_steps": 37170, "total_steps": 38160, "loss": 0.5772, "lr": 2.0529949762989608e-06, "epoch": 19.4811320754717, "percentage": 97.41, "elapsed_time": "1:35:00", "remaining_time": "0:02:31", "throughput": 4269.63, "total_tokens": 24338120}
|
| 7454 |
+
{"current_steps": 37175, "total_steps": 38160, "loss": 0.4277, "lr": 2.032344850640244e-06, "epoch": 19.483752620545072, "percentage": 97.42, "elapsed_time": "1:35:01", "remaining_time": "0:02:31", "throughput": 4269.68, "total_tokens": 24341672}
|
| 7455 |
+
{"current_steps": 37180, "total_steps": 38160, "loss": 0.5127, "lr": 2.011798894133887e-06, "epoch": 19.48637316561845, "percentage": 97.43, "elapsed_time": "1:35:01", "remaining_time": "0:02:30", "throughput": 4269.75, "total_tokens": 24345512}
|
| 7456 |
+
{"current_steps": 37185, "total_steps": 38160, "loss": 0.4416, "lr": 1.9913571110777852e-06, "epoch": 19.488993710691823, "percentage": 97.44, "elapsed_time": "1:35:02", "remaining_time": "0:02:29", "throughput": 4269.78, "total_tokens": 24348616}
|
| 7457 |
+
{"current_steps": 37190, "total_steps": 38160, "loss": 0.3604, "lr": 1.971019505748295e-06, "epoch": 19.4916142557652, "percentage": 97.46, "elapsed_time": "1:35:03", "remaining_time": "0:02:28", "throughput": 4269.77, "total_tokens": 24351496}
|
| 7458 |
+
{"current_steps": 37195, "total_steps": 38160, "loss": 0.4258, "lr": 1.9507860823996803e-06, "epoch": 19.494234800838573, "percentage": 97.47, "elapsed_time": "1:35:03", "remaining_time": "0:02:27", "throughput": 4269.76, "total_tokens": 24354248}
|
| 7459 |
+
{"current_steps": 37200, "total_steps": 38160, "loss": 0.5794, "lr": 1.9306568452645e-06, "epoch": 19.49685534591195, "percentage": 97.48, "elapsed_time": "1:35:04", "remaining_time": "0:02:27", "throughput": 4269.78, "total_tokens": 24357320}
|
| 7460 |
+
{"current_steps": 37205, "total_steps": 38160, "loss": 0.5295, "lr": 1.910631798553664e-06, "epoch": 19.499475890985323, "percentage": 97.5, "elapsed_time": "1:35:05", "remaining_time": "0:02:26", "throughput": 4269.81, "total_tokens": 24360520}
|
| 7461 |
+
{"current_steps": 37210, "total_steps": 38160, "loss": 0.3993, "lr": 1.8907109464562088e-06, "epoch": 19.5020964360587, "percentage": 97.51, "elapsed_time": "1:35:06", "remaining_time": "0:02:25", "throughput": 4269.9, "total_tokens": 24364552}
|
| 7462 |
+
{"current_steps": 37215, "total_steps": 38160, "loss": 0.3855, "lr": 1.870894293139247e-06, "epoch": 19.504716981132077, "percentage": 97.52, "elapsed_time": "1:35:06", "remaining_time": "0:02:24", "throughput": 4269.89, "total_tokens": 24367464}
|
| 7463 |
+
{"current_steps": 37220, "total_steps": 38160, "loss": 0.473, "lr": 1.8511818427482396e-06, "epoch": 19.50733752620545, "percentage": 97.54, "elapsed_time": "1:35:07", "remaining_time": "0:02:24", "throughput": 4269.91, "total_tokens": 24370536}
|
| 7464 |
+
{"current_steps": 37225, "total_steps": 38160, "loss": 0.5735, "lr": 1.8315735994068327e-06, "epoch": 19.509958071278827, "percentage": 97.55, "elapsed_time": "1:35:08", "remaining_time": "0:02:23", "throughput": 4270.01, "total_tokens": 24374760}
|
| 7465 |
+
{"current_steps": 37230, "total_steps": 38160, "loss": 0.4576, "lr": 1.8120695672168009e-06, "epoch": 19.5125786163522, "percentage": 97.56, "elapsed_time": "1:35:09", "remaining_time": "0:02:22", "throughput": 4270.03, "total_tokens": 24377896}
|
| 7466 |
+
{"current_steps": 37235, "total_steps": 38160, "loss": 0.4837, "lr": 1.792669750258158e-06, "epoch": 19.515199161425578, "percentage": 97.58, "elapsed_time": "1:35:09", "remaining_time": "0:02:21", "throughput": 4270.0, "total_tokens": 24380392}
|
| 7467 |
+
{"current_steps": 37240, "total_steps": 38160, "loss": 0.431, "lr": 1.7733741525892134e-06, "epoch": 19.51781970649895, "percentage": 97.59, "elapsed_time": "1:35:10", "remaining_time": "0:02:21", "throughput": 4270.0, "total_tokens": 24383272}
|
| 7468 |
+
{"current_steps": 37245, "total_steps": 38160, "loss": 0.5583, "lr": 1.7541827782462937e-06, "epoch": 19.520440251572328, "percentage": 97.6, "elapsed_time": "1:35:11", "remaining_time": "0:02:20", "throughput": 4270.03, "total_tokens": 24386600}
|
| 7469 |
+
{"current_steps": 37250, "total_steps": 38160, "loss": 0.4206, "lr": 1.7350956312440768e-06, "epoch": 19.5230607966457, "percentage": 97.62, "elapsed_time": "1:35:11", "remaining_time": "0:02:19", "throughput": 4270.06, "total_tokens": 24389832}
|
| 7470 |
+
{"current_steps": 37255, "total_steps": 38160, "loss": 0.4374, "lr": 1.716112715575313e-06, "epoch": 19.52568134171908, "percentage": 97.63, "elapsed_time": "1:35:12", "remaining_time": "0:02:18", "throughput": 4270.08, "total_tokens": 24393096}
|
| 7471 |
+
{"current_steps": 37260, "total_steps": 38160, "loss": 0.3978, "lr": 1.6972340352110481e-06, "epoch": 19.528301886792452, "percentage": 97.64, "elapsed_time": "1:35:13", "remaining_time": "0:02:18", "throughput": 4270.08, "total_tokens": 24396008}
|
| 7472 |
+
{"current_steps": 37265, "total_steps": 38160, "loss": 0.4823, "lr": 1.6784595941004565e-06, "epoch": 19.53092243186583, "percentage": 97.65, "elapsed_time": "1:35:13", "remaining_time": "0:02:17", "throughput": 4270.05, "total_tokens": 24398632}
|
| 7473 |
+
{"current_steps": 37270, "total_steps": 38160, "loss": 0.4804, "lr": 1.659789396171063e-06, "epoch": 19.533542976939202, "percentage": 97.67, "elapsed_time": "1:35:14", "remaining_time": "0:02:16", "throughput": 4270.09, "total_tokens": 24401992}
|
| 7474 |
+
{"current_steps": 37275, "total_steps": 38160, "loss": 0.4836, "lr": 1.6412234453282993e-06, "epoch": 19.53616352201258, "percentage": 97.68, "elapsed_time": "1:35:15", "remaining_time": "0:02:15", "throughput": 4270.13, "total_tokens": 24405384}
|
| 7475 |
+
{"current_steps": 37280, "total_steps": 38160, "loss": 0.4954, "lr": 1.622761745456003e-06, "epoch": 19.538784067085953, "percentage": 97.69, "elapsed_time": "1:35:16", "remaining_time": "0:02:14", "throughput": 4270.12, "total_tokens": 24408136}
|
| 7476 |
+
{"current_steps": 37285, "total_steps": 38160, "loss": 0.5673, "lr": 1.6044043004161958e-06, "epoch": 19.54140461215933, "percentage": 97.71, "elapsed_time": "1:35:16", "remaining_time": "0:02:14", "throughput": 4270.2, "total_tokens": 24412040}
|
| 7477 |
+
{"current_steps": 37290, "total_steps": 38160, "loss": 0.4569, "lr": 1.5861511140489725e-06, "epoch": 19.544025157232703, "percentage": 97.72, "elapsed_time": "1:35:17", "remaining_time": "0:02:13", "throughput": 4270.2, "total_tokens": 24415080}
|
| 7478 |
+
{"current_steps": 37295, "total_steps": 38160, "loss": 0.4394, "lr": 1.5680021901727237e-06, "epoch": 19.54664570230608, "percentage": 97.73, "elapsed_time": "1:35:18", "remaining_time": "0:02:12", "throughput": 4270.18, "total_tokens": 24417736}
|
| 7479 |
+
{"current_steps": 37300, "total_steps": 38160, "loss": 0.5185, "lr": 1.5499575325840232e-06, "epoch": 19.549266247379457, "percentage": 97.75, "elapsed_time": "1:35:18", "remaining_time": "0:02:11", "throughput": 4270.23, "total_tokens": 24421224}
|
| 7480 |
+
{"current_steps": 37305, "total_steps": 38160, "loss": 0.4291, "lr": 1.5320171450576293e-06, "epoch": 19.55188679245283, "percentage": 97.76, "elapsed_time": "1:35:20", "remaining_time": "0:02:11", "throughput": 4270.47, "total_tokens": 24427304}
|
| 7481 |
+
{"current_steps": 37310, "total_steps": 38160, "loss": 0.4493, "lr": 1.5141810313463733e-06, "epoch": 19.554507337526207, "percentage": 97.77, "elapsed_time": "1:35:20", "remaining_time": "0:02:10", "throughput": 4270.55, "total_tokens": 24431080}
|
| 7482 |
+
{"current_steps": 37315, "total_steps": 38160, "loss": 0.4943, "lr": 1.4964491951814374e-06, "epoch": 19.55712788259958, "percentage": 97.79, "elapsed_time": "1:35:21", "remaining_time": "0:02:09", "throughput": 4270.56, "total_tokens": 24433960}
|
| 7483 |
+
{"current_steps": 37320, "total_steps": 38160, "loss": 0.3514, "lr": 1.4788216402720766e-06, "epoch": 19.559748427672957, "percentage": 97.8, "elapsed_time": "1:35:22", "remaining_time": "0:02:08", "throughput": 4270.54, "total_tokens": 24436616}
|
| 7484 |
+
{"current_steps": 37325, "total_steps": 38160, "loss": 0.4767, "lr": 1.4612983703058413e-06, "epoch": 19.56236897274633, "percentage": 97.81, "elapsed_time": "1:35:22", "remaining_time": "0:02:08", "throughput": 4270.53, "total_tokens": 24439240}
|
| 7485 |
+
{"current_steps": 37330, "total_steps": 38160, "loss": 0.4667, "lr": 1.4438793889483549e-06, "epoch": 19.564989517819708, "percentage": 97.82, "elapsed_time": "1:35:23", "remaining_time": "0:02:07", "throughput": 4270.52, "total_tokens": 24441992}
|
| 7486 |
+
{"current_steps": 37335, "total_steps": 38160, "loss": 0.3988, "lr": 1.4265646998434246e-06, "epoch": 19.56761006289308, "percentage": 97.84, "elapsed_time": "1:35:24", "remaining_time": "0:02:06", "throughput": 4270.51, "total_tokens": 24444776}
|
| 7487 |
+
{"current_steps": 37340, "total_steps": 38160, "loss": 0.4323, "lr": 1.409354306613153e-06, "epoch": 19.570230607966458, "percentage": 97.85, "elapsed_time": "1:35:24", "remaining_time": "0:02:05", "throughput": 4270.51, "total_tokens": 24447720}
|
| 7488 |
+
{"current_steps": 37345, "total_steps": 38160, "loss": 0.4273, "lr": 1.3922482128577718e-06, "epoch": 19.57285115303983, "percentage": 97.86, "elapsed_time": "1:35:25", "remaining_time": "0:02:04", "throughput": 4270.65, "total_tokens": 24452520}
|
| 7489 |
+
{"current_steps": 37350, "total_steps": 38160, "loss": 0.3804, "lr": 1.3752464221556404e-06, "epoch": 19.57547169811321, "percentage": 97.88, "elapsed_time": "1:35:26", "remaining_time": "0:02:04", "throughput": 4270.61, "total_tokens": 24454888}
|
| 7490 |
+
{"current_steps": 37355, "total_steps": 38160, "loss": 0.3629, "lr": 1.358348938063303e-06, "epoch": 19.578092243186582, "percentage": 97.89, "elapsed_time": "1:35:27", "remaining_time": "0:02:03", "throughput": 4270.64, "total_tokens": 24458152}
|
| 7491 |
+
{"current_steps": 37360, "total_steps": 38160, "loss": 0.4616, "lr": 1.341555764115543e-06, "epoch": 19.58071278825996, "percentage": 97.9, "elapsed_time": "1:35:27", "remaining_time": "0:02:02", "throughput": 4270.63, "total_tokens": 24460904}
|
| 7492 |
+
{"current_steps": 37365, "total_steps": 38160, "loss": 0.4003, "lr": 1.3248669038253835e-06, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "1:35:28", "remaining_time": "0:02:01", "throughput": 4270.67, "total_tokens": 24464328}
|
| 7493 |
+
{"current_steps": 37370, "total_steps": 38160, "loss": 0.5159, "lr": 1.3082823606838656e-06, "epoch": 19.58595387840671, "percentage": 97.93, "elapsed_time": "1:35:29", "remaining_time": "0:02:01", "throughput": 4270.73, "total_tokens": 24468072}
|
| 7494 |
+
{"current_steps": 37375, "total_steps": 38160, "loss": 0.5159, "lr": 1.2918021381603251e-06, "epoch": 19.588574423480082, "percentage": 97.94, "elapsed_time": "1:35:30", "remaining_time": "0:02:00", "throughput": 4270.84, "total_tokens": 24472424}
|
| 7495 |
+
{"current_steps": 37380, "total_steps": 38160, "loss": 0.2853, "lr": 1.275426239702171e-06, "epoch": 19.59119496855346, "percentage": 97.96, "elapsed_time": "1:35:30", "remaining_time": "0:01:59", "throughput": 4270.86, "total_tokens": 24475656}
|
| 7496 |
+
{"current_steps": 37385, "total_steps": 38160, "loss": 0.3604, "lr": 1.2591546687351073e-06, "epoch": 19.593815513626833, "percentage": 97.97, "elapsed_time": "1:35:31", "remaining_time": "0:01:58", "throughput": 4270.91, "total_tokens": 24479368}
|
| 7497 |
+
{"current_steps": 37390, "total_steps": 38160, "loss": 0.3641, "lr": 1.2429874286629673e-06, "epoch": 19.59643605870021, "percentage": 97.98, "elapsed_time": "1:35:32", "remaining_time": "0:01:58", "throughput": 4270.97, "total_tokens": 24483048}
|
| 7498 |
+
{"current_steps": 37395, "total_steps": 38160, "loss": 0.4306, "lr": 1.2269245228677116e-06, "epoch": 19.599056603773583, "percentage": 98.0, "elapsed_time": "1:35:33", "remaining_time": "0:01:57", "throughput": 4270.98, "total_tokens": 24486152}
|
| 7499 |
+
{"current_steps": 37400, "total_steps": 38160, "loss": 0.3954, "lr": 1.210965954709542e-06, "epoch": 19.60167714884696, "percentage": 98.01, "elapsed_time": "1:35:33", "remaining_time": "0:01:56", "throughput": 4271.0, "total_tokens": 24489192}
|
| 7500 |
+
{"current_steps": 37405, "total_steps": 38160, "loss": 0.507, "lr": 1.1951117275268431e-06, "epoch": 19.604297693920337, "percentage": 98.02, "elapsed_time": "1:35:34", "remaining_time": "0:01:55", "throughput": 4271.02, "total_tokens": 24492392}
|
| 7501 |
+
{"current_steps": 37410, "total_steps": 38160, "loss": 0.3951, "lr": 1.1793618446360732e-06, "epoch": 19.60691823899371, "percentage": 98.03, "elapsed_time": "1:35:35", "remaining_time": "0:01:54", "throughput": 4270.98, "total_tokens": 24494760}
|
| 7502 |
+
{"current_steps": 37415, "total_steps": 38160, "loss": 0.4343, "lr": 1.1637163093319303e-06, "epoch": 19.609538784067087, "percentage": 98.05, "elapsed_time": "1:35:35", "remaining_time": "0:01:54", "throughput": 4270.98, "total_tokens": 24497672}
|
| 7503 |
+
{"current_steps": 37420, "total_steps": 38160, "loss": 0.4443, "lr": 1.1481751248874072e-06, "epoch": 19.61215932914046, "percentage": 98.06, "elapsed_time": "1:35:36", "remaining_time": "0:01:53", "throughput": 4271.06, "total_tokens": 24501704}
|
| 7504 |
+
{"current_steps": 37425, "total_steps": 38160, "loss": 0.3846, "lr": 1.1327382945533482e-06, "epoch": 19.614779874213838, "percentage": 98.07, "elapsed_time": "1:35:37", "remaining_time": "0:01:52", "throughput": 4271.13, "total_tokens": 24505576}
|
| 7505 |
+
{"current_steps": 37430, "total_steps": 38160, "loss": 0.4744, "lr": 1.1174058215591143e-06, "epoch": 19.61740041928721, "percentage": 98.09, "elapsed_time": "1:35:38", "remaining_time": "0:01:51", "throughput": 4271.14, "total_tokens": 24508616}
|
| 7506 |
+
{"current_steps": 37435, "total_steps": 38160, "loss": 0.436, "lr": 1.1021777091119732e-06, "epoch": 19.620020964360588, "percentage": 98.1, "elapsed_time": "1:35:38", "remaining_time": "0:01:51", "throughput": 4271.14, "total_tokens": 24511528}
|
| 7507 |
+
{"current_steps": 37440, "total_steps": 38160, "loss": 0.526, "lr": 1.0870539603975994e-06, "epoch": 19.62264150943396, "percentage": 98.11, "elapsed_time": "1:35:39", "remaining_time": "0:01:50", "throughput": 4271.17, "total_tokens": 24514632}
|
| 7508 |
+
{"current_steps": 37445, "total_steps": 38160, "loss": 0.4723, "lr": 1.0720345785795727e-06, "epoch": 19.62526205450734, "percentage": 98.13, "elapsed_time": "1:35:40", "remaining_time": "0:01:49", "throughput": 4271.21, "total_tokens": 24518088}
|
| 7509 |
+
{"current_steps": 37450, "total_steps": 38160, "loss": 0.4334, "lr": 1.0571195667998802e-06, "epoch": 19.627882599580712, "percentage": 98.14, "elapsed_time": "1:35:40", "remaining_time": "0:01:48", "throughput": 4271.21, "total_tokens": 24520904}
|
| 7510 |
+
{"current_steps": 37455, "total_steps": 38160, "loss": 0.4279, "lr": 1.042308928178526e-06, "epoch": 19.63050314465409, "percentage": 98.15, "elapsed_time": "1:35:41", "remaining_time": "0:01:48", "throughput": 4271.24, "total_tokens": 24524104}
|
| 7511 |
+
{"current_steps": 37460, "total_steps": 38160, "loss": 0.4793, "lr": 1.0276026658137538e-06, "epoch": 19.633123689727462, "percentage": 98.17, "elapsed_time": "1:35:42", "remaining_time": "0:01:47", "throughput": 4271.29, "total_tokens": 24527624}
|
| 7512 |
+
{"current_steps": 37465, "total_steps": 38160, "loss": 0.4408, "lr": 1.013000782781881e-06, "epoch": 19.63574423480084, "percentage": 98.18, "elapsed_time": "1:35:43", "remaining_time": "0:01:46", "throughput": 4271.25, "total_tokens": 24529960}
|
| 7513 |
+
{"current_steps": 37470, "total_steps": 38160, "loss": 0.7454, "lr": 9.985032821375195e-07, "epoch": 19.638364779874212, "percentage": 98.19, "elapsed_time": "1:35:43", "remaining_time": "0:01:45", "throughput": 4271.22, "total_tokens": 24532616}
|
| 7514 |
+
{"current_steps": 37475, "total_steps": 38160, "loss": 0.332, "lr": 9.841101669134101e-07, "epoch": 19.64098532494759, "percentage": 98.2, "elapsed_time": "1:35:44", "remaining_time": "0:01:45", "throughput": 4271.25, "total_tokens": 24535880}
|
| 7515 |
+
{"current_steps": 37480, "total_steps": 38160, "loss": 0.4529, "lr": 9.69821440120311e-07, "epoch": 19.643605870020963, "percentage": 98.22, "elapsed_time": "1:35:45", "remaining_time": "0:01:44", "throughput": 4271.37, "total_tokens": 24540296}
|
| 7516 |
+
{"current_steps": 37485, "total_steps": 38160, "loss": 0.3966, "lr": 9.556371047473866e-07, "epoch": 19.64622641509434, "percentage": 98.23, "elapsed_time": "1:35:46", "remaining_time": "0:01:43", "throughput": 4271.39, "total_tokens": 24543496}
|
| 7517 |
+
{"current_steps": 37490, "total_steps": 38160, "loss": 0.3647, "lr": 9.415571637617082e-07, "epoch": 19.648846960167717, "percentage": 98.24, "elapsed_time": "1:35:47", "remaining_time": "0:01:42", "throughput": 4271.64, "total_tokens": 24550056}
|
| 7518 |
+
{"current_steps": 37495, "total_steps": 38160, "loss": 0.4293, "lr": 9.275816201087528e-07, "epoch": 19.65146750524109, "percentage": 98.26, "elapsed_time": "1:35:47", "remaining_time": "0:01:41", "throughput": 4271.69, "total_tokens": 24553608}
|
| 7519 |
+
{"current_steps": 37500, "total_steps": 38160, "loss": 0.5931, "lr": 9.137104767120153e-07, "epoch": 19.654088050314467, "percentage": 98.27, "elapsed_time": "1:35:48", "remaining_time": "0:01:41", "throughput": 4271.75, "total_tokens": 24557224}
|
| 7520 |
+
{"current_steps": 37505, "total_steps": 38160, "loss": 0.6089, "lr": 8.999437364731189e-07, "epoch": 19.65670859538784, "percentage": 98.28, "elapsed_time": "1:35:49", "remaining_time": "0:01:40", "throughput": 4271.8, "total_tokens": 24560616}
|
| 7521 |
+
{"current_steps": 37510, "total_steps": 38160, "loss": 0.7024, "lr": 8.862814022720378e-07, "epoch": 19.659329140461217, "percentage": 98.3, "elapsed_time": "1:35:50", "remaining_time": "0:01:39", "throughput": 4271.82, "total_tokens": 24563656}
|
| 7522 |
+
{"current_steps": 37515, "total_steps": 38160, "loss": 0.4006, "lr": 8.727234769666526e-07, "epoch": 19.66194968553459, "percentage": 98.31, "elapsed_time": "1:35:50", "remaining_time": "0:01:38", "throughput": 4271.79, "total_tokens": 24566312}
|
| 7523 |
+
{"current_steps": 37520, "total_steps": 38160, "loss": 0.5473, "lr": 8.592699633931389e-07, "epoch": 19.664570230607968, "percentage": 98.32, "elapsed_time": "1:35:51", "remaining_time": "0:01:38", "throughput": 4271.83, "total_tokens": 24569640}
|
| 7524 |
+
{"current_steps": 37525, "total_steps": 38160, "loss": 0.5136, "lr": 8.459208643659122e-07, "epoch": 19.66719077568134, "percentage": 98.34, "elapsed_time": "1:35:52", "remaining_time": "0:01:37", "throughput": 4271.8, "total_tokens": 24572264}
|
| 7525 |
+
{"current_steps": 37530, "total_steps": 38160, "loss": 0.4166, "lr": 8.326761826773499e-07, "epoch": 19.669811320754718, "percentage": 98.35, "elapsed_time": "1:35:52", "remaining_time": "0:01:36", "throughput": 4271.8, "total_tokens": 24575048}
|
| 7526 |
+
{"current_steps": 37535, "total_steps": 38160, "loss": 0.4091, "lr": 8.195359210981246e-07, "epoch": 19.67243186582809, "percentage": 98.36, "elapsed_time": "1:35:53", "remaining_time": "0:01:35", "throughput": 4271.79, "total_tokens": 24577704}
|
| 7527 |
+
{"current_steps": 37540, "total_steps": 38160, "loss": 0.4396, "lr": 8.065000823770929e-07, "epoch": 19.67505241090147, "percentage": 98.38, "elapsed_time": "1:35:54", "remaining_time": "0:01:35", "throughput": 4271.85, "total_tokens": 24581352}
|
| 7528 |
+
{"current_steps": 37545, "total_steps": 38160, "loss": 0.4178, "lr": 7.935686692410737e-07, "epoch": 19.677672955974842, "percentage": 98.39, "elapsed_time": "1:35:54", "remaining_time": "0:01:34", "throughput": 4271.85, "total_tokens": 24584168}
|
| 7529 |
+
{"current_steps": 37550, "total_steps": 38160, "loss": 0.3323, "lr": 7.807416843952364e-07, "epoch": 19.68029350104822, "percentage": 98.4, "elapsed_time": "1:35:55", "remaining_time": "0:01:33", "throughput": 4271.94, "total_tokens": 24588200}
|
| 7530 |
+
{"current_steps": 37555, "total_steps": 38160, "loss": 0.5631, "lr": 7.68019130522879e-07, "epoch": 19.682914046121592, "percentage": 98.41, "elapsed_time": "1:35:56", "remaining_time": "0:01:32", "throughput": 4271.9, "total_tokens": 24590568}
|
| 7531 |
+
{"current_steps": 37560, "total_steps": 38160, "loss": 0.394, "lr": 7.554010102853726e-07, "epoch": 19.68553459119497, "percentage": 98.43, "elapsed_time": "1:35:57", "remaining_time": "0:01:31", "throughput": 4271.89, "total_tokens": 24593320}
|
| 7532 |
+
{"current_steps": 37565, "total_steps": 38160, "loss": 0.5447, "lr": 7.428873263223279e-07, "epoch": 19.688155136268342, "percentage": 98.44, "elapsed_time": "1:35:57", "remaining_time": "0:01:31", "throughput": 4271.86, "total_tokens": 24595848}
|
| 7533 |
+
{"current_steps": 37570, "total_steps": 38160, "loss": 0.3625, "lr": 7.304780812513734e-07, "epoch": 19.69077568134172, "percentage": 98.45, "elapsed_time": "1:35:58", "remaining_time": "0:01:30", "throughput": 4271.85, "total_tokens": 24598600}
|
| 7534 |
+
{"current_steps": 37575, "total_steps": 38160, "loss": 0.4306, "lr": 7.181732776684325e-07, "epoch": 19.693396226415093, "percentage": 98.47, "elapsed_time": "1:35:58", "remaining_time": "0:01:29", "throughput": 4271.86, "total_tokens": 24601544}
|
| 7535 |
+
{"current_steps": 37580, "total_steps": 38160, "loss": 0.4179, "lr": 7.059729181475572e-07, "epoch": 19.69601677148847, "percentage": 98.48, "elapsed_time": "1:35:59", "remaining_time": "0:01:28", "throughput": 4271.88, "total_tokens": 24604552}
|
| 7536 |
+
{"current_steps": 37585, "total_steps": 38160, "loss": 0.5022, "lr": 6.938770052409282e-07, "epoch": 19.698637316561843, "percentage": 98.49, "elapsed_time": "1:36:00", "remaining_time": "0:01:28", "throughput": 4271.88, "total_tokens": 24607432}
|
| 7537 |
+
{"current_steps": 37590, "total_steps": 38160, "loss": 0.4782, "lr": 6.81885541478855e-07, "epoch": 19.70125786163522, "percentage": 98.51, "elapsed_time": "1:36:01", "remaining_time": "0:01:27", "throughput": 4271.97, "total_tokens": 24611464}
|
| 7538 |
+
{"current_steps": 37595, "total_steps": 38160, "loss": 0.4861, "lr": 6.699985293697197e-07, "epoch": 19.703878406708597, "percentage": 98.52, "elapsed_time": "1:36:01", "remaining_time": "0:01:26", "throughput": 4271.97, "total_tokens": 24614344}
|
| 7539 |
+
{"current_steps": 37600, "total_steps": 38160, "loss": 0.484, "lr": 6.582159714003111e-07, "epoch": 19.70649895178197, "percentage": 98.53, "elapsed_time": "1:36:02", "remaining_time": "0:01:25", "throughput": 4272.03, "total_tokens": 24617864}
|
| 7540 |
+
{"current_steps": 37605, "total_steps": 38160, "loss": 0.3802, "lr": 6.465378700352687e-07, "epoch": 19.709119496855347, "percentage": 98.55, "elapsed_time": "1:36:03", "remaining_time": "0:01:25", "throughput": 4272.05, "total_tokens": 24620904}
|
| 7541 |
+
{"current_steps": 37610, "total_steps": 38160, "loss": 0.3046, "lr": 6.349642277176382e-07, "epoch": 19.71174004192872, "percentage": 98.56, "elapsed_time": "1:36:03", "remaining_time": "0:01:24", "throughput": 4272.03, "total_tokens": 24623560}
|
| 7542 |
+
{"current_steps": 37615, "total_steps": 38160, "loss": 0.3874, "lr": 6.23495046868372e-07, "epoch": 19.714360587002098, "percentage": 98.57, "elapsed_time": "1:36:04", "remaining_time": "0:01:23", "throughput": 4272.09, "total_tokens": 24627016}
|
| 7543 |
+
{"current_steps": 37620, "total_steps": 38160, "loss": 0.3777, "lr": 6.121303298868286e-07, "epoch": 19.71698113207547, "percentage": 98.58, "elapsed_time": "1:36:05", "remaining_time": "0:01:22", "throughput": 4272.15, "total_tokens": 24630600}
|
| 7544 |
+
{"current_steps": 37625, "total_steps": 38160, "loss": 0.5426, "lr": 6.008700791502175e-07, "epoch": 19.719601677148848, "percentage": 98.6, "elapsed_time": "1:36:06", "remaining_time": "0:01:21", "throughput": 4272.21, "total_tokens": 24634248}
|
| 7545 |
+
{"current_steps": 37630, "total_steps": 38160, "loss": 0.3276, "lr": 5.8971429701421e-07, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "1:36:06", "remaining_time": "0:01:21", "throughput": 4272.23, "total_tokens": 24637352}
|
| 7546 |
+
{"current_steps": 37635, "total_steps": 38160, "loss": 0.435, "lr": 5.786629858123283e-07, "epoch": 19.7248427672956, "percentage": 98.62, "elapsed_time": "1:36:07", "remaining_time": "0:01:20", "throughput": 4272.21, "total_tokens": 24639912}
|
| 7547 |
+
{"current_steps": 37640, "total_steps": 38160, "loss": 0.3397, "lr": 5.677161478565008e-07, "epoch": 19.72746331236897, "percentage": 98.64, "elapsed_time": "1:36:08", "remaining_time": "0:01:19", "throughput": 4272.18, "total_tokens": 24642568}
|
| 7548 |
+
{"current_steps": 37645, "total_steps": 38160, "loss": 0.3255, "lr": 5.56873785436618e-07, "epoch": 19.73008385744235, "percentage": 98.65, "elapsed_time": "1:36:08", "remaining_time": "0:01:18", "throughput": 4272.17, "total_tokens": 24645416}
|
| 7549 |
+
{"current_steps": 37650, "total_steps": 38160, "loss": 0.4195, "lr": 5.4613590082081e-07, "epoch": 19.732704402515722, "percentage": 98.66, "elapsed_time": "1:36:09", "remaining_time": "0:01:18", "throughput": 4272.19, "total_tokens": 24648360}
|
| 7550 |
+
{"current_steps": 37655, "total_steps": 38160, "loss": 0.4968, "lr": 5.355024962552801e-07, "epoch": 19.7353249475891, "percentage": 98.68, "elapsed_time": "1:36:10", "remaining_time": "0:01:17", "throughput": 4272.14, "total_tokens": 24650728}
|
| 7551 |
+
{"current_steps": 37660, "total_steps": 38160, "loss": 0.5147, "lr": 5.249735739644157e-07, "epoch": 19.737945492662472, "percentage": 98.69, "elapsed_time": "1:36:10", "remaining_time": "0:01:16", "throughput": 4272.16, "total_tokens": 24653928}
|
| 7552 |
+
{"current_steps": 37665, "total_steps": 38160, "loss": 0.3989, "lr": 5.145491361508436e-07, "epoch": 19.74056603773585, "percentage": 98.7, "elapsed_time": "1:36:11", "remaining_time": "0:01:15", "throughput": 4272.22, "total_tokens": 24657608}
|
| 7553 |
+
{"current_steps": 37670, "total_steps": 38160, "loss": 0.327, "lr": 5.042291849950975e-07, "epoch": 19.743186582809223, "percentage": 98.72, "elapsed_time": "1:36:12", "remaining_time": "0:01:15", "throughput": 4272.24, "total_tokens": 24660744}
|
| 7554 |
+
{"current_steps": 37675, "total_steps": 38160, "loss": 0.4067, "lr": 4.940137226560615e-07, "epoch": 19.7458071278826, "percentage": 98.73, "elapsed_time": "1:36:13", "remaining_time": "0:01:14", "throughput": 4272.28, "total_tokens": 24664136}
|
| 7555 |
+
{"current_steps": 37680, "total_steps": 38160, "loss": 0.4415, "lr": 4.839027512706928e-07, "epoch": 19.748427672955973, "percentage": 98.74, "elapsed_time": "1:36:13", "remaining_time": "0:01:13", "throughput": 4272.27, "total_tokens": 24666920}
|
| 7556 |
+
{"current_steps": 37685, "total_steps": 38160, "loss": 0.4559, "lr": 4.7389627295407743e-07, "epoch": 19.75104821802935, "percentage": 98.76, "elapsed_time": "1:36:14", "remaining_time": "0:01:12", "throughput": 4272.27, "total_tokens": 24669800}
|
| 7557 |
+
{"current_steps": 37690, "total_steps": 38160, "loss": 0.4294, "lr": 4.6399428979948534e-07, "epoch": 19.753668763102727, "percentage": 98.77, "elapsed_time": "1:36:15", "remaining_time": "0:01:12", "throughput": 4272.25, "total_tokens": 24672488}
|
| 7558 |
+
{"current_steps": 37695, "total_steps": 38160, "loss": 0.4655, "lr": 4.541968038782596e-07, "epoch": 19.7562893081761, "percentage": 98.78, "elapsed_time": "1:36:15", "remaining_time": "0:01:11", "throughput": 4272.31, "total_tokens": 24676168}
|
| 7559 |
+
{"current_steps": 37700, "total_steps": 38160, "loss": 0.5222, "lr": 4.445038172399829e-07, "epoch": 19.758909853249477, "percentage": 98.79, "elapsed_time": "1:36:16", "remaining_time": "0:01:10", "throughput": 4272.32, "total_tokens": 24679176}
|
| 7560 |
+
{"current_steps": 37705, "total_steps": 38160, "loss": 0.3447, "lr": 4.3491533191225563e-07, "epoch": 19.76153039832285, "percentage": 98.81, "elapsed_time": "1:36:17", "remaining_time": "0:01:09", "throughput": 4272.3, "total_tokens": 24681864}
|
| 7561 |
+
{"current_steps": 37710, "total_steps": 38160, "loss": 0.4929, "lr": 4.254313499009177e-07, "epoch": 19.764150943396228, "percentage": 98.82, "elapsed_time": "1:36:17", "remaining_time": "0:01:08", "throughput": 4272.3, "total_tokens": 24684712}
|
| 7562 |
+
{"current_steps": 37715, "total_steps": 38160, "loss": 0.5149, "lr": 4.1605187318982664e-07, "epoch": 19.7667714884696, "percentage": 98.83, "elapsed_time": "1:36:18", "remaining_time": "0:01:08", "throughput": 4272.27, "total_tokens": 24687336}
|
| 7563 |
+
{"current_steps": 37720, "total_steps": 38160, "loss": 0.32, "lr": 4.067769037411906e-07, "epoch": 19.769392033542978, "percentage": 98.85, "elapsed_time": "1:36:19", "remaining_time": "0:01:07", "throughput": 4272.31, "total_tokens": 24690664}
|
| 7564 |
+
{"current_steps": 37725, "total_steps": 38160, "loss": 0.4732, "lr": 3.9760644349517984e-07, "epoch": 19.77201257861635, "percentage": 98.86, "elapsed_time": "1:36:19", "remaining_time": "0:01:06", "throughput": 4272.37, "total_tokens": 24694216}
|
| 7565 |
+
{"current_steps": 37730, "total_steps": 38160, "loss": 0.4679, "lr": 3.885404943700932e-07, "epoch": 19.77463312368973, "percentage": 98.87, "elapsed_time": "1:36:20", "remaining_time": "0:01:05", "throughput": 4272.35, "total_tokens": 24696936}
|
| 7566 |
+
{"current_steps": 37735, "total_steps": 38160, "loss": 0.3807, "lr": 3.795790582624692e-07, "epoch": 19.7772536687631, "percentage": 98.89, "elapsed_time": "1:36:21", "remaining_time": "0:01:05", "throughput": 4272.32, "total_tokens": 24699400}
|
| 7567 |
+
{"current_steps": 37740, "total_steps": 38160, "loss": 0.4702, "lr": 3.707221370469749e-07, "epoch": 19.77987421383648, "percentage": 98.9, "elapsed_time": "1:36:21", "remaining_time": "0:01:04", "throughput": 4272.33, "total_tokens": 24702376}
|
| 7568 |
+
{"current_steps": 37745, "total_steps": 38160, "loss": 0.3502, "lr": 3.6196973257629494e-07, "epoch": 19.782494758909852, "percentage": 98.91, "elapsed_time": "1:36:22", "remaining_time": "0:01:03", "throughput": 4272.44, "total_tokens": 24706568}
|
| 7569 |
+
{"current_steps": 37750, "total_steps": 38160, "loss": 0.4598, "lr": 3.533218466813537e-07, "epoch": 19.78511530398323, "percentage": 98.93, "elapsed_time": "1:36:23", "remaining_time": "0:01:02", "throughput": 4272.43, "total_tokens": 24709384}
|
| 7570 |
+
{"current_steps": 37755, "total_steps": 38160, "loss": 0.3839, "lr": 3.447784811712595e-07, "epoch": 19.787735849056602, "percentage": 98.94, "elapsed_time": "1:36:24", "remaining_time": "0:01:02", "throughput": 4272.46, "total_tokens": 24712552}
|
| 7571 |
+
{"current_steps": 37760, "total_steps": 38160, "loss": 0.3107, "lr": 3.363396378331385e-07, "epoch": 19.79035639412998, "percentage": 98.95, "elapsed_time": "1:36:24", "remaining_time": "0:01:01", "throughput": 4272.45, "total_tokens": 24715400}
|
| 7572 |
+
{"current_steps": 37765, "total_steps": 38160, "loss": 0.4568, "lr": 3.280053184323006e-07, "epoch": 19.792976939203353, "percentage": 98.96, "elapsed_time": "1:36:25", "remaining_time": "0:01:00", "throughput": 4272.46, "total_tokens": 24718376}
|
| 7573 |
+
{"current_steps": 37770, "total_steps": 38160, "loss": 0.5715, "lr": 3.1977552471218476e-07, "epoch": 19.79559748427673, "percentage": 98.98, "elapsed_time": "1:36:26", "remaining_time": "0:00:59", "throughput": 4272.47, "total_tokens": 24721416}
|
| 7574 |
+
{"current_steps": 37775, "total_steps": 38160, "loss": 0.4629, "lr": 3.116502583943581e-07, "epoch": 19.798218029350103, "percentage": 98.99, "elapsed_time": "1:36:26", "remaining_time": "0:00:58", "throughput": 4272.46, "total_tokens": 24724232}
|
| 7575 |
+
{"current_steps": 37780, "total_steps": 38160, "loss": 0.5984, "lr": 3.036295211785722e-07, "epoch": 19.80083857442348, "percentage": 99.0, "elapsed_time": "1:36:28", "remaining_time": "0:00:58", "throughput": 4272.77, "total_tokens": 24731656}
|
| 7576 |
+
{"current_steps": 37785, "total_steps": 38160, "loss": 0.5101, "lr": 2.957133147425961e-07, "epoch": 19.803459119496857, "percentage": 99.02, "elapsed_time": "1:36:28", "remaining_time": "0:00:57", "throughput": 4272.75, "total_tokens": 24734344}
|
| 7577 |
+
{"current_steps": 37790, "total_steps": 38160, "loss": 0.4073, "lr": 2.879016407425494e-07, "epoch": 19.80607966457023, "percentage": 99.03, "elapsed_time": "1:36:29", "remaining_time": "0:00:56", "throughput": 4272.77, "total_tokens": 24737416}
|
| 7578 |
+
{"current_steps": 37795, "total_steps": 38160, "loss": 0.3778, "lr": 2.8019450081240295e-07, "epoch": 19.808700209643607, "percentage": 99.04, "elapsed_time": "1:36:30", "remaining_time": "0:00:55", "throughput": 4272.79, "total_tokens": 24740648}
|
| 7579 |
+
{"current_steps": 37800, "total_steps": 38160, "loss": 0.3535, "lr": 2.7259189656447803e-07, "epoch": 19.81132075471698, "percentage": 99.06, "elapsed_time": "1:36:31", "remaining_time": "0:00:55", "throughput": 4272.88, "total_tokens": 24744712}
|
| 7580 |
+
{"current_steps": 37805, "total_steps": 38160, "loss": 0.3883, "lr": 2.650938295891692e-07, "epoch": 19.813941299790358, "percentage": 99.07, "elapsed_time": "1:36:31", "remaining_time": "0:00:54", "throughput": 4272.88, "total_tokens": 24747592}
|
| 7581 |
+
{"current_steps": 37810, "total_steps": 38160, "loss": 0.382, "lr": 2.5770030145494395e-07, "epoch": 19.81656184486373, "percentage": 99.08, "elapsed_time": "1:36:32", "remaining_time": "0:00:53", "throughput": 4272.91, "total_tokens": 24750920}
|
| 7582 |
+
{"current_steps": 37815, "total_steps": 38160, "loss": 0.3801, "lr": 2.504113137083985e-07, "epoch": 19.819182389937108, "percentage": 99.1, "elapsed_time": "1:36:33", "remaining_time": "0:00:52", "throughput": 4273.1, "total_tokens": 24756200}
|
| 7583 |
+
{"current_steps": 37820, "total_steps": 38160, "loss": 0.4084, "lr": 2.4322686787442425e-07, "epoch": 19.82180293501048, "percentage": 99.11, "elapsed_time": "1:36:34", "remaining_time": "0:00:52", "throughput": 4273.1, "total_tokens": 24759144}
|
| 7584 |
+
{"current_steps": 37825, "total_steps": 38160, "loss": 0.4071, "lr": 2.3614696545581904e-07, "epoch": 19.82442348008386, "percentage": 99.12, "elapsed_time": "1:36:34", "remaining_time": "0:00:51", "throughput": 4273.09, "total_tokens": 24761992}
|
| 7585 |
+
{"current_steps": 37830, "total_steps": 38160, "loss": 0.434, "lr": 2.2917160793367585e-07, "epoch": 19.82704402515723, "percentage": 99.14, "elapsed_time": "1:36:35", "remaining_time": "0:00:50", "throughput": 4273.08, "total_tokens": 24764744}
|
| 7586 |
+
{"current_steps": 37835, "total_steps": 38160, "loss": 0.5165, "lr": 2.2230079676716086e-07, "epoch": 19.82966457023061, "percentage": 99.15, "elapsed_time": "1:36:36", "remaining_time": "0:00:49", "throughput": 4273.06, "total_tokens": 24767336}
|
| 7587 |
+
{"current_steps": 37840, "total_steps": 38160, "loss": 0.4396, "lr": 2.1553453339356875e-07, "epoch": 19.832285115303982, "percentage": 99.16, "elapsed_time": "1:36:36", "remaining_time": "0:00:49", "throughput": 4273.06, "total_tokens": 24770344}
|
| 7588 |
+
{"current_steps": 37845, "total_steps": 38160, "loss": 0.4717, "lr": 2.0887281922826738e-07, "epoch": 19.83490566037736, "percentage": 99.17, "elapsed_time": "1:36:37", "remaining_time": "0:00:48", "throughput": 4273.1, "total_tokens": 24773704}
|
| 7589 |
+
{"current_steps": 37850, "total_steps": 38160, "loss": 0.4201, "lr": 2.023156556648642e-07, "epoch": 19.837526205450732, "percentage": 99.19, "elapsed_time": "1:36:38", "remaining_time": "0:00:47", "throughput": 4273.12, "total_tokens": 24776840}
|
| 7590 |
+
{"current_steps": 37855, "total_steps": 38160, "loss": 0.4535, "lr": 1.9586304407503975e-07, "epoch": 19.84014675052411, "percentage": 99.2, "elapsed_time": "1:36:39", "remaining_time": "0:00:46", "throughput": 4273.23, "total_tokens": 24780904}
|
| 7591 |
+
{"current_steps": 37860, "total_steps": 38160, "loss": 0.3967, "lr": 1.8951498580860315e-07, "epoch": 19.842767295597483, "percentage": 99.21, "elapsed_time": "1:36:39", "remaining_time": "0:00:45", "throughput": 4273.24, "total_tokens": 24784104}
|
| 7592 |
+
{"current_steps": 37865, "total_steps": 38160, "loss": 0.3925, "lr": 1.832714821934922e-07, "epoch": 19.84538784067086, "percentage": 99.23, "elapsed_time": "1:36:40", "remaining_time": "0:00:45", "throughput": 4273.37, "total_tokens": 24788712}
|
| 7593 |
+
{"current_steps": 37870, "total_steps": 38160, "loss": 0.3871, "lr": 1.7713253453577328e-07, "epoch": 19.848008385744233, "percentage": 99.24, "elapsed_time": "1:36:41", "remaining_time": "0:00:44", "throughput": 4273.38, "total_tokens": 24791784}
|
| 7594 |
+
{"current_steps": 37875, "total_steps": 38160, "loss": 0.5115, "lr": 1.7109814411964132e-07, "epoch": 19.85062893081761, "percentage": 99.25, "elapsed_time": "1:36:42", "remaining_time": "0:00:43", "throughput": 4273.41, "total_tokens": 24795016}
|
| 7595 |
+
{"current_steps": 37880, "total_steps": 38160, "loss": 0.4774, "lr": 1.651683122074754e-07, "epoch": 19.853249475890987, "percentage": 99.27, "elapsed_time": "1:36:42", "remaining_time": "0:00:42", "throughput": 4273.47, "total_tokens": 24798600}
|
| 7596 |
+
{"current_steps": 37885, "total_steps": 38160, "loss": 0.511, "lr": 1.5934304003961675e-07, "epoch": 19.85587002096436, "percentage": 99.28, "elapsed_time": "1:36:43", "remaining_time": "0:00:42", "throughput": 4273.45, "total_tokens": 24801288}
|
| 7597 |
+
{"current_steps": 37890, "total_steps": 38160, "loss": 0.3803, "lr": 1.5362232883475713e-07, "epoch": 19.858490566037737, "percentage": 99.29, "elapsed_time": "1:36:44", "remaining_time": "0:00:41", "throughput": 4273.43, "total_tokens": 24803976}
|
| 7598 |
+
{"current_steps": 37895, "total_steps": 38160, "loss": 0.5029, "lr": 1.4800617978949492e-07, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "1:36:44", "remaining_time": "0:00:40", "throughput": 4273.41, "total_tokens": 24806632}
|
| 7599 |
+
{"current_steps": 37900, "total_steps": 38160, "loss": 0.3478, "lr": 1.424945940787792e-07, "epoch": 19.863731656184488, "percentage": 99.32, "elapsed_time": "1:36:45", "remaining_time": "0:00:39", "throughput": 4273.38, "total_tokens": 24809160}
|
| 7600 |
+
{"current_steps": 37905, "total_steps": 38160, "loss": 0.5061, "lr": 1.3708757285552098e-07, "epoch": 19.86635220125786, "percentage": 99.33, "elapsed_time": "1:36:46", "remaining_time": "0:00:39", "throughput": 4273.42, "total_tokens": 24812584}
|
| 7601 |
+
{"current_steps": 37910, "total_steps": 38160, "loss": 0.5318, "lr": 1.3178511725076004e-07, "epoch": 19.868972746331238, "percentage": 99.34, "elapsed_time": "1:36:47", "remaining_time": "0:00:38", "throughput": 4273.47, "total_tokens": 24816104}
|
| 7602 |
+
{"current_steps": 37915, "total_steps": 38160, "loss": 0.4369, "lr": 1.265872283738312e-07, "epoch": 19.87159329140461, "percentage": 99.36, "elapsed_time": "1:36:47", "remaining_time": "0:00:37", "throughput": 4273.48, "total_tokens": 24819112}
|
| 7603 |
+
{"current_steps": 37920, "total_steps": 38160, "loss": 0.3261, "lr": 1.2149390731192033e-07, "epoch": 19.87421383647799, "percentage": 99.37, "elapsed_time": "1:36:48", "remaining_time": "0:00:36", "throughput": 4273.52, "total_tokens": 24822504}
|
| 7604 |
+
{"current_steps": 37925, "total_steps": 38160, "loss": 0.3822, "lr": 1.1650515513061955e-07, "epoch": 19.87683438155136, "percentage": 99.38, "elapsed_time": "1:36:49", "remaining_time": "0:00:35", "throughput": 4273.51, "total_tokens": 24825224}
|
| 7605 |
+
{"current_steps": 37930, "total_steps": 38160, "loss": 0.5135, "lr": 1.1162097287342743e-07, "epoch": 19.87945492662474, "percentage": 99.4, "elapsed_time": "1:36:49", "remaining_time": "0:00:35", "throughput": 4273.51, "total_tokens": 24828104}
|
| 7606 |
+
{"current_steps": 37935, "total_steps": 38160, "loss": 0.3931, "lr": 1.0684136156213775e-07, "epoch": 19.882075471698112, "percentage": 99.41, "elapsed_time": "1:36:50", "remaining_time": "0:00:34", "throughput": 4273.56, "total_tokens": 24831592}
|
| 7607 |
+
{"current_steps": 37940, "total_steps": 38160, "loss": 0.4654, "lr": 1.0216632219650634e-07, "epoch": 19.88469601677149, "percentage": 99.42, "elapsed_time": "1:36:51", "remaining_time": "0:00:33", "throughput": 4273.59, "total_tokens": 24834824}
|
| 7608 |
+
{"current_steps": 37945, "total_steps": 38160, "loss": 0.4507, "lr": 9.759585575458418e-08, "epoch": 19.887316561844862, "percentage": 99.44, "elapsed_time": "1:36:51", "remaining_time": "0:00:32", "throughput": 4273.64, "total_tokens": 24838184}
|
| 7609 |
+
{"current_steps": 37950, "total_steps": 38160, "loss": 0.3944, "lr": 9.312996319238432e-08, "epoch": 19.88993710691824, "percentage": 99.45, "elapsed_time": "1:36:52", "remaining_time": "0:00:32", "throughput": 4273.68, "total_tokens": 24841576}
|
| 7610 |
+
{"current_steps": 37955, "total_steps": 38160, "loss": 0.4949, "lr": 8.876864544421493e-08, "epoch": 19.892557651991613, "percentage": 99.46, "elapsed_time": "1:36:53", "remaining_time": "0:00:31", "throughput": 4273.66, "total_tokens": 24844232}
|
| 7611 |
+
{"current_steps": 37960, "total_steps": 38160, "loss": 0.3763, "lr": 8.451190342229077e-08, "epoch": 19.89517819706499, "percentage": 99.48, "elapsed_time": "1:36:54", "remaining_time": "0:00:30", "throughput": 4273.67, "total_tokens": 24847144}
|
| 7612 |
+
{"current_steps": 37965, "total_steps": 38160, "loss": 0.4322, "lr": 8.035973801717722e-08, "epoch": 19.897798742138363, "percentage": 99.49, "elapsed_time": "1:36:54", "remaining_time": "0:00:29", "throughput": 4273.71, "total_tokens": 24850568}
|
| 7613 |
+
{"current_steps": 37970, "total_steps": 38160, "loss": 0.3356, "lr": 7.631215009740178e-08, "epoch": 19.90041928721174, "percentage": 99.5, "elapsed_time": "1:36:55", "remaining_time": "0:00:29", "throughput": 4273.66, "total_tokens": 24852808}
|
| 7614 |
+
{"current_steps": 37975, "total_steps": 38160, "loss": 0.5061, "lr": 7.236914050973153e-08, "epoch": 19.903039832285117, "percentage": 99.52, "elapsed_time": "1:36:56", "remaining_time": "0:00:28", "throughput": 4273.74, "total_tokens": 24856744}
|
| 7615 |
+
{"current_steps": 37980, "total_steps": 38160, "loss": 0.4624, "lr": 6.853071007895117e-08, "epoch": 19.90566037735849, "percentage": 99.53, "elapsed_time": "1:36:56", "remaining_time": "0:00:27", "throughput": 4273.78, "total_tokens": 24860296}
|
| 7616 |
+
{"current_steps": 37985, "total_steps": 38160, "loss": 0.3917, "lr": 6.479685960797399e-08, "epoch": 19.908280922431867, "percentage": 99.54, "elapsed_time": "1:36:57", "remaining_time": "0:00:26", "throughput": 4273.81, "total_tokens": 24863560}
|
| 7617 |
+
{"current_steps": 37990, "total_steps": 38160, "loss": 0.363, "lr": 6.116758987800841e-08, "epoch": 19.91090146750524, "percentage": 99.55, "elapsed_time": "1:36:58", "remaining_time": "0:00:26", "throughput": 4273.8, "total_tokens": 24866344}
|
| 7618 |
+
{"current_steps": 37995, "total_steps": 38160, "loss": 0.4618, "lr": 5.7642901648113924e-08, "epoch": 19.913522012578618, "percentage": 99.57, "elapsed_time": "1:36:59", "remaining_time": "0:00:25", "throughput": 4273.84, "total_tokens": 24869800}
|
| 7619 |
+
{"current_steps": 38000, "total_steps": 38160, "loss": 0.3622, "lr": 5.422279565570065e-08, "epoch": 19.91614255765199, "percentage": 99.58, "elapsed_time": "1:36:59", "remaining_time": "0:00:24", "throughput": 4273.81, "total_tokens": 24872264}
|
| 7620 |
+
{"current_steps": 38005, "total_steps": 38160, "loss": 0.4558, "lr": 5.090727261619632e-08, "epoch": 19.918763102725368, "percentage": 99.59, "elapsed_time": "1:37:00", "remaining_time": "0:00:23", "throughput": 4273.86, "total_tokens": 24875816}
|
| 7621 |
+
{"current_steps": 38010, "total_steps": 38160, "loss": 0.4209, "lr": 4.7696333223212765e-08, "epoch": 19.92138364779874, "percentage": 99.61, "elapsed_time": "1:37:01", "remaining_time": "0:00:22", "throughput": 4273.86, "total_tokens": 24878856}
|
| 7622 |
+
{"current_steps": 38015, "total_steps": 38160, "loss": 0.4151, "lr": 4.4589978148323884e-08, "epoch": 19.92400419287212, "percentage": 99.62, "elapsed_time": "1:37:01", "remaining_time": "0:00:22", "throughput": 4273.87, "total_tokens": 24881864}
|
| 7623 |
+
{"current_steps": 38020, "total_steps": 38160, "loss": 0.4558, "lr": 4.1588208041454244e-08, "epoch": 19.92662473794549, "percentage": 99.63, "elapsed_time": "1:37:02", "remaining_time": "0:00:21", "throughput": 4273.85, "total_tokens": 24884488}
|
| 7624 |
+
{"current_steps": 38025, "total_steps": 38160, "loss": 0.3358, "lr": 3.8691023530545985e-08, "epoch": 19.92924528301887, "percentage": 99.65, "elapsed_time": "1:37:03", "remaining_time": "0:00:20", "throughput": 4273.9, "total_tokens": 24887944}
|
| 7625 |
+
{"current_steps": 38030, "total_steps": 38160, "loss": 0.4284, "lr": 3.589842522155884e-08, "epoch": 19.931865828092242, "percentage": 99.66, "elapsed_time": "1:37:03", "remaining_time": "0:00:19", "throughput": 4273.94, "total_tokens": 24891400}
|
| 7626 |
+
{"current_steps": 38035, "total_steps": 38160, "loss": 0.3623, "lr": 3.321041369874767e-08, "epoch": 19.93448637316562, "percentage": 99.67, "elapsed_time": "1:37:04", "remaining_time": "0:00:19", "throughput": 4273.89, "total_tokens": 24893672}
|
| 7627 |
+
{"current_steps": 38040, "total_steps": 38160, "loss": 0.4336, "lr": 3.0626989524384916e-08, "epoch": 19.937106918238992, "percentage": 99.69, "elapsed_time": "1:37:05", "remaining_time": "0:00:18", "throughput": 4273.97, "total_tokens": 24897544}
|
| 7628 |
+
{"current_steps": 38045, "total_steps": 38160, "loss": 0.5873, "lr": 2.8148153238927167e-08, "epoch": 19.93972746331237, "percentage": 99.7, "elapsed_time": "1:37:06", "remaining_time": "0:00:17", "throughput": 4274.01, "total_tokens": 24900904}
|
| 7629 |
+
{"current_steps": 38050, "total_steps": 38160, "loss": 0.3799, "lr": 2.5773905360904072e-08, "epoch": 19.942348008385743, "percentage": 99.71, "elapsed_time": "1:37:06", "remaining_time": "0:00:16", "throughput": 4274.11, "total_tokens": 24905064}
|
| 7630 |
+
{"current_steps": 38055, "total_steps": 38160, "loss": 0.4358, "lr": 2.3504246386918392e-08, "epoch": 19.94496855345912, "percentage": 99.72, "elapsed_time": "1:37:07", "remaining_time": "0:00:16", "throughput": 4274.15, "total_tokens": 24908392}
|
| 7631 |
+
{"current_steps": 38060, "total_steps": 38160, "loss": 0.3958, "lr": 2.133917679186803e-08, "epoch": 19.947589098532493, "percentage": 99.74, "elapsed_time": "1:37:08", "remaining_time": "0:00:15", "throughput": 4274.15, "total_tokens": 24911336}
|
| 7632 |
+
{"current_steps": 38065, "total_steps": 38160, "loss": 0.3703, "lr": 1.9278697028557447e-08, "epoch": 19.95020964360587, "percentage": 99.75, "elapsed_time": "1:37:09", "remaining_time": "0:00:14", "throughput": 4274.28, "total_tokens": 24915880}
|
| 7633 |
+
{"current_steps": 38070, "total_steps": 38160, "loss": 0.545, "lr": 1.7322807528086238e-08, "epoch": 19.952830188679247, "percentage": 99.76, "elapsed_time": "1:37:10", "remaining_time": "0:00:13", "throughput": 4274.33, "total_tokens": 24919464}
|
| 7634 |
+
{"current_steps": 38075, "total_steps": 38160, "loss": 0.2569, "lr": 1.547150869957159e-08, "epoch": 19.95545073375262, "percentage": 99.78, "elapsed_time": "1:37:10", "remaining_time": "0:00:13", "throughput": 4274.33, "total_tokens": 24922312}
|
| 7635 |
+
{"current_steps": 38080, "total_steps": 38160, "loss": 0.4558, "lr": 1.3724800930314806e-08, "epoch": 19.958071278825997, "percentage": 99.79, "elapsed_time": "1:37:11", "remaining_time": "0:00:12", "throughput": 4274.39, "total_tokens": 24925928}
|
| 7636 |
+
{"current_steps": 38085, "total_steps": 38160, "loss": 0.3754, "lr": 1.2082684585634773e-08, "epoch": 19.96069182389937, "percentage": 99.8, "elapsed_time": "1:37:12", "remaining_time": "0:00:11", "throughput": 4274.42, "total_tokens": 24929224}
|
| 7637 |
+
{"current_steps": 38090, "total_steps": 38160, "loss": 0.3613, "lr": 1.0545160009145516e-08, "epoch": 19.963312368972748, "percentage": 99.82, "elapsed_time": "1:37:12", "remaining_time": "0:00:10", "throughput": 4274.39, "total_tokens": 24931752}
|
| 7638 |
+
{"current_steps": 38095, "total_steps": 38160, "loss": 0.6552, "lr": 9.112227522423133e-09, "epoch": 19.96593291404612, "percentage": 99.83, "elapsed_time": "1:37:13", "remaining_time": "0:00:09", "throughput": 4274.38, "total_tokens": 24934504}
|
| 7639 |
+
{"current_steps": 38100, "total_steps": 38160, "loss": 0.5286, "lr": 7.783887425172331e-09, "epoch": 19.968553459119498, "percentage": 99.84, "elapsed_time": "1:37:14", "remaining_time": "0:00:09", "throughput": 4274.39, "total_tokens": 24937512}
|
| 7640 |
+
{"current_steps": 38105, "total_steps": 38160, "loss": 0.3216, "lr": 6.560139995392955e-09, "epoch": 19.97117400419287, "percentage": 99.86, "elapsed_time": "1:37:14", "remaining_time": "0:00:08", "throughput": 4274.4, "total_tokens": 24940520}
|
| 7641 |
+
{"current_steps": 38110, "total_steps": 38160, "loss": 0.3609, "lr": 5.440985488935901e-09, "epoch": 19.97379454926625, "percentage": 99.87, "elapsed_time": "1:37:15", "remaining_time": "0:00:07", "throughput": 4274.35, "total_tokens": 24942888}
|
| 7642 |
+
{"current_steps": 38115, "total_steps": 38160, "loss": 0.4675, "lr": 4.426424140058227e-09, "epoch": 19.97641509433962, "percentage": 99.88, "elapsed_time": "1:37:16", "remaining_time": "0:00:06", "throughput": 4274.4, "total_tokens": 24946408}
|
| 7643 |
+
{"current_steps": 38120, "total_steps": 38160, "loss": 0.6317, "lr": 3.5164561608680424e-09, "epoch": 19.979035639413, "percentage": 99.9, "elapsed_time": "1:37:16", "remaining_time": "0:00:06", "throughput": 4274.39, "total_tokens": 24948968}
|
| 7644 |
+
{"current_steps": 38125, "total_steps": 38160, "loss": 0.3686, "lr": 2.7110817417685953e-09, "epoch": 19.981656184486372, "percentage": 99.91, "elapsed_time": "1:37:17", "remaining_time": "0:00:05", "throughput": 4274.38, "total_tokens": 24951752}
|
| 7645 |
+
{"current_steps": 38130, "total_steps": 38160, "loss": 0.4576, "lr": 2.010301051291741e-09, "epoch": 19.98427672955975, "percentage": 99.92, "elapsed_time": "1:37:18", "remaining_time": "0:00:04", "throughput": 4274.39, "total_tokens": 24954696}
|
| 7646 |
+
{"current_steps": 38135, "total_steps": 38160, "loss": 0.4868, "lr": 1.4141142359314074e-09, "epoch": 19.986897274633122, "percentage": 99.93, "elapsed_time": "1:37:19", "remaining_time": "0:00:03", "throughput": 4274.5, "total_tokens": 24959048}
|
| 7647 |
+
{"current_steps": 38140, "total_steps": 38160, "loss": 0.31, "lr": 9.22521420476663e-10, "epoch": 19.9895178197065, "percentage": 99.95, "elapsed_time": "1:37:19", "remaining_time": "0:00:03", "throughput": 4274.48, "total_tokens": 24961640}
|
| 7648 |
+
{"current_steps": 38145, "total_steps": 38160, "loss": 0.3882, "lr": 5.355227077341596e-10, "epoch": 19.992138364779873, "percentage": 99.96, "elapsed_time": "1:37:20", "remaining_time": "0:00:02", "throughput": 4274.44, "total_tokens": 24963976}
|
| 7649 |
+
{"current_steps": 38150, "total_steps": 38160, "loss": 0.3728, "lr": 2.5311817863915566e-10, "epoch": 19.99475890985325, "percentage": 99.97, "elapsed_time": "1:37:20", "remaining_time": "0:00:01", "throughput": 4274.44, "total_tokens": 24966760}
|
| 7650 |
+
{"current_steps": 38155, "total_steps": 38160, "loss": 0.3599, "lr": 7.53078923110273e-11, "epoch": 19.997379454926623, "percentage": 99.99, "elapsed_time": "1:37:21", "remaining_time": "0:00:00", "throughput": 4274.48, "total_tokens": 24970216}
|
| 7651 |
+
{"current_steps": 38160, "total_steps": 38160, "loss": 0.7746, "lr": 2.0918859422458526e-12, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:37:22", "remaining_time": "0:00:00", "throughput": 4274.49, "total_tokens": 24973864}
|
| 7652 |
+
{"current_steps": 38160, "total_steps": 38160, "eval_loss": 0.4553232491016388, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:37:35", "remaining_time": "0:00:00", "throughput": 4264.69, "total_tokens": 24973864}
|
| 7653 |
+
{"current_steps": 38160, "total_steps": 38160, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:37:37", "remaining_time": "0:00:00", "throughput": 4263.22, "total_tokens": 24973864}
|