Training in progress, step 12293
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +132 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0d834435a5774f261eff8e00b29084e06208e0e69547dc2b69ac0d89a18b251b
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -2346,3 +2346,135 @@
|
|
| 2346 |
{"current_steps": 11645, "total_steps": 12940, "loss": 0.3782, "lr": 1.512301019405582e-06, "epoch": 8.999227202472952, "percentage": 89.99, "elapsed_time": "0:39:00", "remaining_time": "0:04:20", "throughput": 1678.76, "total_tokens": 3928576}
|
| 2347 |
{"current_steps": 11646, "total_steps": 12940, "eval_loss": 0.5048345923423767, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:39:11", "remaining_time": "0:04:21", "throughput": 1670.49, "total_tokens": 3928704}
|
| 2348 |
{"current_steps": 11650, "total_steps": 12940, "loss": 0.415, "lr": 1.5007724935268557e-06, "epoch": 9.003091190108192, "percentage": 90.03, "elapsed_time": "0:39:14", "remaining_time": "0:04:20", "throughput": 1669.21, "total_tokens": 3929984}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2346 |
{"current_steps": 11645, "total_steps": 12940, "loss": 0.3782, "lr": 1.512301019405582e-06, "epoch": 8.999227202472952, "percentage": 89.99, "elapsed_time": "0:39:00", "remaining_time": "0:04:20", "throughput": 1678.76, "total_tokens": 3928576}
|
| 2347 |
{"current_steps": 11646, "total_steps": 12940, "eval_loss": 0.5048345923423767, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:39:11", "remaining_time": "0:04:21", "throughput": 1670.49, "total_tokens": 3928704}
|
| 2348 |
{"current_steps": 11650, "total_steps": 12940, "loss": 0.415, "lr": 1.5007724935268557e-06, "epoch": 9.003091190108192, "percentage": 90.03, "elapsed_time": "0:39:14", "remaining_time": "0:04:20", "throughput": 1669.21, "total_tokens": 3929984}
|
| 2349 |
+
{"current_steps": 11655, "total_steps": 12940, "loss": 0.3829, "lr": 1.489286718000879e-06, "epoch": 9.006955177743432, "percentage": 90.07, "elapsed_time": "0:39:15", "remaining_time": "0:04:19", "throughput": 1669.37, "total_tokens": 3931968}
|
| 2350 |
+
{"current_steps": 11660, "total_steps": 12940, "loss": 0.572, "lr": 1.477843713722843e-06, "epoch": 9.01081916537867, "percentage": 90.11, "elapsed_time": "0:39:16", "remaining_time": "0:04:18", "throughput": 1669.47, "total_tokens": 3933728}
|
| 2351 |
+
{"current_steps": 11665, "total_steps": 12940, "loss": 0.4516, "lr": 1.4664435015101258e-06, "epoch": 9.01468315301391, "percentage": 90.15, "elapsed_time": "0:39:17", "remaining_time": "0:04:17", "throughput": 1669.56, "total_tokens": 3935488}
|
| 2352 |
+
{"current_steps": 11670, "total_steps": 12940, "loss": 0.344, "lr": 1.455086102102271e-06, "epoch": 9.018547140649149, "percentage": 90.19, "elapsed_time": "0:39:18", "remaining_time": "0:04:16", "throughput": 1669.6, "total_tokens": 3937056}
|
| 2353 |
+
{"current_steps": 11675, "total_steps": 12940, "loss": 0.3834, "lr": 1.4437715361609345e-06, "epoch": 9.02241112828439, "percentage": 90.22, "elapsed_time": "0:39:19", "remaining_time": "0:04:15", "throughput": 1669.73, "total_tokens": 3938976}
|
| 2354 |
+
{"current_steps": 11680, "total_steps": 12940, "loss": 0.2955, "lr": 1.4324998242698368e-06, "epoch": 9.02627511591963, "percentage": 90.26, "elapsed_time": "0:39:19", "remaining_time": "0:04:14", "throughput": 1669.73, "total_tokens": 3940384}
|
| 2355 |
+
{"current_steps": 11685, "total_steps": 12940, "loss": 0.4866, "lr": 1.4212709869347363e-06, "epoch": 9.030139103554868, "percentage": 90.3, "elapsed_time": "0:39:20", "remaining_time": "0:04:13", "throughput": 1669.82, "total_tokens": 3942176}
|
| 2356 |
+
{"current_steps": 11690, "total_steps": 12940, "loss": 0.3119, "lr": 1.4100850445834175e-06, "epoch": 9.034003091190108, "percentage": 90.34, "elapsed_time": "0:39:21", "remaining_time": "0:04:12", "throughput": 1669.87, "total_tokens": 3943776}
|
| 2357 |
+
{"current_steps": 11695, "total_steps": 12940, "loss": 0.4354, "lr": 1.3989420175656049e-06, "epoch": 9.037867078825348, "percentage": 90.38, "elapsed_time": "0:39:22", "remaining_time": "0:04:11", "throughput": 1670.0, "total_tokens": 3945664}
|
| 2358 |
+
{"current_steps": 11700, "total_steps": 12940, "loss": 0.3953, "lr": 1.3878419261529525e-06, "epoch": 9.041731066460587, "percentage": 90.42, "elapsed_time": "0:39:23", "remaining_time": "0:04:10", "throughput": 1670.14, "total_tokens": 3947584}
|
| 2359 |
+
{"current_steps": 11705, "total_steps": 12940, "loss": 0.4879, "lr": 1.3767847905390124e-06, "epoch": 9.045595054095827, "percentage": 90.46, "elapsed_time": "0:39:24", "remaining_time": "0:04:09", "throughput": 1670.25, "total_tokens": 3949344}
|
| 2360 |
+
{"current_steps": 11710, "total_steps": 12940, "loss": 0.3548, "lr": 1.3657706308391971e-06, "epoch": 9.049459041731067, "percentage": 90.49, "elapsed_time": "0:39:25", "remaining_time": "0:04:08", "throughput": 1670.28, "total_tokens": 3950880}
|
| 2361 |
+
{"current_steps": 11715, "total_steps": 12940, "loss": 0.3711, "lr": 1.35479946709072e-06, "epoch": 9.053323029366306, "percentage": 90.53, "elapsed_time": "0:39:26", "remaining_time": "0:04:07", "throughput": 1670.45, "total_tokens": 3952928}
|
| 2362 |
+
{"current_steps": 11720, "total_steps": 12940, "loss": 0.4858, "lr": 1.3438713192525793e-06, "epoch": 9.057187017001546, "percentage": 90.57, "elapsed_time": "0:39:27", "remaining_time": "0:04:06", "throughput": 1670.52, "total_tokens": 3954624}
|
| 2363 |
+
{"current_steps": 11725, "total_steps": 12940, "loss": 0.3906, "lr": 1.332986207205525e-06, "epoch": 9.061051004636786, "percentage": 90.61, "elapsed_time": "0:39:28", "remaining_time": "0:04:05", "throughput": 1670.63, "total_tokens": 3956352}
|
| 2364 |
+
{"current_steps": 11730, "total_steps": 12940, "loss": 0.5325, "lr": 1.3221441507520167e-06, "epoch": 9.064914992272024, "percentage": 90.65, "elapsed_time": "0:39:29", "remaining_time": "0:04:04", "throughput": 1670.73, "total_tokens": 3958144}
|
| 2365 |
+
{"current_steps": 11735, "total_steps": 12940, "loss": 0.4584, "lr": 1.311345169616171e-06, "epoch": 9.068778979907265, "percentage": 90.69, "elapsed_time": "0:39:30", "remaining_time": "0:04:03", "throughput": 1670.75, "total_tokens": 3959712}
|
| 2366 |
+
{"current_steps": 11740, "total_steps": 12940, "loss": 0.3971, "lr": 1.3005892834437594e-06, "epoch": 9.072642967542503, "percentage": 90.73, "elapsed_time": "0:39:30", "remaining_time": "0:04:02", "throughput": 1670.81, "total_tokens": 3961312}
|
| 2367 |
+
{"current_steps": 11745, "total_steps": 12940, "loss": 0.3496, "lr": 1.2898765118021406e-06, "epoch": 9.076506955177743, "percentage": 90.77, "elapsed_time": "0:39:31", "remaining_time": "0:04:01", "throughput": 1670.87, "total_tokens": 3962976}
|
| 2368 |
+
{"current_steps": 11750, "total_steps": 12940, "loss": 0.4629, "lr": 1.279206874180247e-06, "epoch": 9.080370942812984, "percentage": 90.8, "elapsed_time": "0:39:32", "remaining_time": "0:04:00", "throughput": 1670.9, "total_tokens": 3964480}
|
| 2369 |
+
{"current_steps": 11755, "total_steps": 12940, "loss": 0.3183, "lr": 1.2685803899885301e-06, "epoch": 9.084234930448222, "percentage": 90.84, "elapsed_time": "0:39:33", "remaining_time": "0:03:59", "throughput": 1671.05, "total_tokens": 3966368}
|
| 2370 |
+
{"current_steps": 11760, "total_steps": 12940, "loss": 0.3382, "lr": 1.2579970785589507e-06, "epoch": 9.088098918083462, "percentage": 90.88, "elapsed_time": "0:39:34", "remaining_time": "0:03:58", "throughput": 1671.1, "total_tokens": 3967968}
|
| 2371 |
+
{"current_steps": 11765, "total_steps": 12940, "loss": 0.3175, "lr": 1.247456959144913e-06, "epoch": 9.091962905718702, "percentage": 90.92, "elapsed_time": "0:39:35", "remaining_time": "0:03:57", "throughput": 1671.21, "total_tokens": 3969824}
|
| 2372 |
+
{"current_steps": 11770, "total_steps": 12940, "loss": 0.3196, "lr": 1.2369600509212543e-06, "epoch": 9.09582689335394, "percentage": 90.96, "elapsed_time": "0:39:36", "remaining_time": "0:03:56", "throughput": 1671.24, "total_tokens": 3971296}
|
| 2373 |
+
{"current_steps": 11775, "total_steps": 12940, "loss": 0.5826, "lr": 1.2265063729842068e-06, "epoch": 9.099690880989181, "percentage": 91.0, "elapsed_time": "0:39:37", "remaining_time": "0:03:55", "throughput": 1671.31, "total_tokens": 3972992}
|
| 2374 |
+
{"current_steps": 11780, "total_steps": 12940, "loss": 0.459, "lr": 1.216095944351342e-06, "epoch": 9.103554868624421, "percentage": 91.04, "elapsed_time": "0:39:38", "remaining_time": "0:03:54", "throughput": 1671.4, "total_tokens": 3974784}
|
| 2375 |
+
{"current_steps": 11785, "total_steps": 12940, "loss": 0.6045, "lr": 1.2057287839615544e-06, "epoch": 9.10741885625966, "percentage": 91.07, "elapsed_time": "0:39:39", "remaining_time": "0:03:53", "throughput": 1671.48, "total_tokens": 3976512}
|
| 2376 |
+
{"current_steps": 11790, "total_steps": 12940, "loss": 0.3237, "lr": 1.195404910675038e-06, "epoch": 9.1112828438949, "percentage": 91.11, "elapsed_time": "0:39:39", "remaining_time": "0:03:52", "throughput": 1671.58, "total_tokens": 3978240}
|
| 2377 |
+
{"current_steps": 11795, "total_steps": 12940, "loss": 0.6548, "lr": 1.1851243432732278e-06, "epoch": 9.115146831530138, "percentage": 91.15, "elapsed_time": "0:39:40", "remaining_time": "0:03:51", "throughput": 1671.72, "total_tokens": 3980192}
|
| 2378 |
+
{"current_steps": 11800, "total_steps": 12940, "loss": 0.3307, "lr": 1.1748871004587714e-06, "epoch": 9.119010819165378, "percentage": 91.19, "elapsed_time": "0:39:41", "remaining_time": "0:03:50", "throughput": 1671.74, "total_tokens": 3981696}
|
| 2379 |
+
{"current_steps": 11805, "total_steps": 12940, "loss": 0.3915, "lr": 1.164693200855499e-06, "epoch": 9.122874806800619, "percentage": 91.23, "elapsed_time": "0:39:42", "remaining_time": "0:03:49", "throughput": 1671.74, "total_tokens": 3983104}
|
| 2380 |
+
{"current_steps": 11810, "total_steps": 12940, "loss": 0.3583, "lr": 1.1545426630084093e-06, "epoch": 9.126738794435857, "percentage": 91.27, "elapsed_time": "0:39:43", "remaining_time": "0:03:48", "throughput": 1671.75, "total_tokens": 3984544}
|
| 2381 |
+
{"current_steps": 11815, "total_steps": 12940, "loss": 0.3939, "lr": 1.1444355053835926e-06, "epoch": 9.130602782071097, "percentage": 91.31, "elapsed_time": "0:39:44", "remaining_time": "0:03:47", "throughput": 1671.81, "total_tokens": 3986176}
|
| 2382 |
+
{"current_steps": 11820, "total_steps": 12940, "loss": 0.3638, "lr": 1.134371746368229e-06, "epoch": 9.134466769706338, "percentage": 91.34, "elapsed_time": "0:39:45", "remaining_time": "0:03:46", "throughput": 1671.88, "total_tokens": 3987872}
|
| 2383 |
+
{"current_steps": 11825, "total_steps": 12940, "loss": 0.3699, "lr": 1.1243514042705517e-06, "epoch": 9.138330757341576, "percentage": 91.38, "elapsed_time": "0:39:46", "remaining_time": "0:03:44", "throughput": 1672.0, "total_tokens": 3989696}
|
| 2384 |
+
{"current_steps": 11830, "total_steps": 12940, "loss": 0.32, "lr": 1.1143744973198062e-06, "epoch": 9.142194744976816, "percentage": 91.42, "elapsed_time": "0:39:47", "remaining_time": "0:03:43", "throughput": 1672.06, "total_tokens": 3991328}
|
| 2385 |
+
{"current_steps": 11835, "total_steps": 12940, "loss": 0.4116, "lr": 1.1044410436662156e-06, "epoch": 9.146058732612056, "percentage": 91.46, "elapsed_time": "0:39:47", "remaining_time": "0:03:42", "throughput": 1672.11, "total_tokens": 3992960}
|
| 2386 |
+
{"current_steps": 11840, "total_steps": 12940, "loss": 0.4835, "lr": 1.0945510613809523e-06, "epoch": 9.149922720247295, "percentage": 91.5, "elapsed_time": "0:39:48", "remaining_time": "0:03:41", "throughput": 1672.17, "total_tokens": 3994560}
|
| 2387 |
+
{"current_steps": 11845, "total_steps": 12940, "loss": 0.395, "lr": 1.0847045684561076e-06, "epoch": 9.153786707882535, "percentage": 91.54, "elapsed_time": "0:39:49", "remaining_time": "0:03:40", "throughput": 1672.32, "total_tokens": 3996480}
|
| 2388 |
+
{"current_steps": 11850, "total_steps": 12940, "loss": 0.4228, "lr": 1.0749015828046632e-06, "epoch": 9.157650695517773, "percentage": 91.58, "elapsed_time": "0:39:50", "remaining_time": "0:03:39", "throughput": 1672.36, "total_tokens": 3998016}
|
| 2389 |
+
{"current_steps": 11855, "total_steps": 12940, "loss": 0.3089, "lr": 1.0651421222604346e-06, "epoch": 9.161514683153014, "percentage": 91.62, "elapsed_time": "0:39:51", "remaining_time": "0:03:38", "throughput": 1672.43, "total_tokens": 3999712}
|
| 2390 |
+
{"current_steps": 11860, "total_steps": 12940, "loss": 0.4504, "lr": 1.0554262045780694e-06, "epoch": 9.165378670788254, "percentage": 91.65, "elapsed_time": "0:39:52", "remaining_time": "0:03:37", "throughput": 1672.5, "total_tokens": 4001408}
|
| 2391 |
+
{"current_steps": 11865, "total_steps": 12940, "loss": 0.3183, "lr": 1.0457538474329897e-06, "epoch": 9.169242658423492, "percentage": 91.69, "elapsed_time": "0:39:53", "remaining_time": "0:03:36", "throughput": 1672.56, "total_tokens": 4003072}
|
| 2392 |
+
{"current_steps": 11870, "total_steps": 12940, "loss": 0.5185, "lr": 1.0361250684213868e-06, "epoch": 9.173106646058732, "percentage": 91.73, "elapsed_time": "0:39:54", "remaining_time": "0:03:35", "throughput": 1672.57, "total_tokens": 4004576}
|
| 2393 |
+
{"current_steps": 11875, "total_steps": 12940, "loss": 0.4194, "lr": 1.0265398850601544e-06, "epoch": 9.176970633693973, "percentage": 91.77, "elapsed_time": "0:39:55", "remaining_time": "0:03:34", "throughput": 1672.58, "total_tokens": 4006016}
|
| 2394 |
+
{"current_steps": 11880, "total_steps": 12940, "loss": 0.3538, "lr": 1.0169983147868934e-06, "epoch": 9.180834621329211, "percentage": 91.81, "elapsed_time": "0:39:55", "remaining_time": "0:03:33", "throughput": 1672.58, "total_tokens": 4007456}
|
| 2395 |
+
{"current_steps": 11885, "total_steps": 12940, "loss": 0.3572, "lr": 1.0075003749598467e-06, "epoch": 9.184698608964451, "percentage": 91.85, "elapsed_time": "0:39:56", "remaining_time": "0:03:32", "throughput": 1672.68, "total_tokens": 4009280}
|
| 2396 |
+
{"current_steps": 11890, "total_steps": 12940, "loss": 0.406, "lr": 9.98046082857898e-07, "epoch": 9.188562596599692, "percentage": 91.89, "elapsed_time": "0:39:57", "remaining_time": "0:03:31", "throughput": 1672.67, "total_tokens": 4010688}
|
| 2397 |
+
{"current_steps": 11895, "total_steps": 12940, "loss": 0.3241, "lr": 9.886354556805227e-07, "epoch": 9.19242658423493, "percentage": 91.92, "elapsed_time": "0:39:58", "remaining_time": "0:03:30", "throughput": 1672.76, "total_tokens": 4012480}
|
| 2398 |
+
{"current_steps": 11900, "total_steps": 12940, "loss": 0.3106, "lr": 9.792685105477512e-07, "epoch": 9.19629057187017, "percentage": 91.96, "elapsed_time": "0:39:59", "remaining_time": "0:03:29", "throughput": 1672.77, "total_tokens": 4013920}
|
| 2399 |
+
{"current_steps": 11905, "total_steps": 12940, "loss": 0.3213, "lr": 9.69945264500155e-07, "epoch": 9.20015455950541, "percentage": 92.0, "elapsed_time": "0:40:00", "remaining_time": "0:03:28", "throughput": 1672.82, "total_tokens": 4015456}
|
| 2400 |
+
{"current_steps": 11910, "total_steps": 12940, "loss": 0.3601, "lr": 9.606657344988028e-07, "epoch": 9.204018547140649, "percentage": 92.04, "elapsed_time": "0:40:01", "remaining_time": "0:03:27", "throughput": 1672.88, "total_tokens": 4017088}
|
| 2401 |
+
{"current_steps": 11915, "total_steps": 12940, "loss": 0.4321, "lr": 9.514299374252384e-07, "epoch": 9.207882534775889, "percentage": 92.08, "elapsed_time": "0:40:02", "remaining_time": "0:03:26", "throughput": 1672.93, "total_tokens": 4018688}
|
| 2402 |
+
{"current_steps": 11920, "total_steps": 12940, "loss": 0.2887, "lr": 9.42237890081446e-07, "epoch": 9.211746522411127, "percentage": 92.12, "elapsed_time": "0:40:03", "remaining_time": "0:03:25", "throughput": 1672.99, "total_tokens": 4020352}
|
| 2403 |
+
{"current_steps": 11925, "total_steps": 12940, "loss": 0.4599, "lr": 9.330896091898078e-07, "epoch": 9.215610510046368, "percentage": 92.16, "elapsed_time": "0:40:03", "remaining_time": "0:03:24", "throughput": 1673.05, "total_tokens": 4021984}
|
| 2404 |
+
{"current_steps": 11930, "total_steps": 12940, "loss": 0.351, "lr": 9.239851113930997e-07, "epoch": 9.219474497681608, "percentage": 92.19, "elapsed_time": "0:40:04", "remaining_time": "0:03:23", "throughput": 1673.16, "total_tokens": 4023872}
|
| 2405 |
+
{"current_steps": 11935, "total_steps": 12940, "loss": 0.288, "lr": 9.149244132544393e-07, "epoch": 9.223338485316846, "percentage": 92.23, "elapsed_time": "0:40:05", "remaining_time": "0:03:22", "throughput": 1673.21, "total_tokens": 4025536}
|
| 2406 |
+
{"current_steps": 11940, "total_steps": 12940, "loss": 0.3241, "lr": 9.059075312572635e-07, "epoch": 9.227202472952087, "percentage": 92.27, "elapsed_time": "0:40:06", "remaining_time": "0:03:21", "throughput": 1673.24, "total_tokens": 4027040}
|
| 2407 |
+
{"current_steps": 11945, "total_steps": 12940, "loss": 0.3883, "lr": 8.969344818052977e-07, "epoch": 9.231066460587327, "percentage": 92.31, "elapsed_time": "0:40:07", "remaining_time": "0:03:20", "throughput": 1673.39, "total_tokens": 4028960}
|
| 2408 |
+
{"current_steps": 11950, "total_steps": 12940, "loss": 0.4248, "lr": 8.880052812225314e-07, "epoch": 9.234930448222565, "percentage": 92.35, "elapsed_time": "0:40:08", "remaining_time": "0:03:19", "throughput": 1673.47, "total_tokens": 4030688}
|
| 2409 |
+
{"current_steps": 11955, "total_steps": 12940, "loss": 0.6813, "lr": 8.791199457531735e-07, "epoch": 9.238794435857805, "percentage": 92.39, "elapsed_time": "0:40:09", "remaining_time": "0:03:18", "throughput": 1673.49, "total_tokens": 4032128}
|
| 2410 |
+
{"current_steps": 11960, "total_steps": 12940, "loss": 0.346, "lr": 8.702784915616352e-07, "epoch": 9.242658423493046, "percentage": 92.43, "elapsed_time": "0:40:10", "remaining_time": "0:03:17", "throughput": 1673.59, "total_tokens": 4033952}
|
| 2411 |
+
{"current_steps": 11965, "total_steps": 12940, "loss": 0.333, "lr": 8.614809347325031e-07, "epoch": 9.246522411128284, "percentage": 92.47, "elapsed_time": "0:40:11", "remaining_time": "0:03:16", "throughput": 1673.69, "total_tokens": 4035744}
|
| 2412 |
+
{"current_steps": 11970, "total_steps": 12940, "loss": 0.4332, "lr": 8.527272912705025e-07, "epoch": 9.250386398763524, "percentage": 92.5, "elapsed_time": "0:40:12", "remaining_time": "0:03:15", "throughput": 1673.76, "total_tokens": 4037440}
|
| 2413 |
+
{"current_steps": 11975, "total_steps": 12940, "loss": 0.5924, "lr": 8.440175771004699e-07, "epoch": 9.254250386398763, "percentage": 92.54, "elapsed_time": "0:40:13", "remaining_time": "0:03:14", "throughput": 1673.82, "total_tokens": 4039072}
|
| 2414 |
+
{"current_steps": 11980, "total_steps": 12940, "loss": 0.3603, "lr": 8.353518080673195e-07, "epoch": 9.258114374034003, "percentage": 92.58, "elapsed_time": "0:40:13", "remaining_time": "0:03:13", "throughput": 1673.92, "total_tokens": 4040832}
|
| 2415 |
+
{"current_steps": 11985, "total_steps": 12940, "loss": 0.3178, "lr": 8.267299999360267e-07, "epoch": 9.261978361669243, "percentage": 92.62, "elapsed_time": "0:40:14", "remaining_time": "0:03:12", "throughput": 1673.99, "total_tokens": 4042528}
|
| 2416 |
+
{"current_steps": 11990, "total_steps": 12940, "loss": 0.3962, "lr": 8.181521683915921e-07, "epoch": 9.265842349304481, "percentage": 92.66, "elapsed_time": "0:40:15", "remaining_time": "0:03:11", "throughput": 1674.07, "total_tokens": 4044224}
|
| 2417 |
+
{"current_steps": 11995, "total_steps": 12940, "loss": 0.4961, "lr": 8.096183290390053e-07, "epoch": 9.269706336939722, "percentage": 92.7, "elapsed_time": "0:40:16", "remaining_time": "0:03:10", "throughput": 1674.16, "total_tokens": 4045920}
|
| 2418 |
+
{"current_steps": 12000, "total_steps": 12940, "loss": 0.4487, "lr": 8.011284974032363e-07, "epoch": 9.273570324574962, "percentage": 92.74, "elapsed_time": "0:40:17", "remaining_time": "0:03:09", "throughput": 1674.23, "total_tokens": 4047584}
|
| 2419 |
+
{"current_steps": 12005, "total_steps": 12940, "loss": 0.285, "lr": 7.926826889291833e-07, "epoch": 9.2774343122102, "percentage": 92.77, "elapsed_time": "0:40:18", "remaining_time": "0:03:08", "throughput": 1674.31, "total_tokens": 4049216}
|
| 2420 |
+
{"current_steps": 12010, "total_steps": 12940, "loss": 0.3205, "lr": 7.842809189816641e-07, "epoch": 9.28129829984544, "percentage": 92.81, "elapsed_time": "0:40:19", "remaining_time": "0:03:07", "throughput": 1674.4, "total_tokens": 4050912}
|
| 2421 |
+
{"current_steps": 12015, "total_steps": 12940, "loss": 0.3262, "lr": 7.759232028453744e-07, "epoch": 9.28516228748068, "percentage": 92.85, "elapsed_time": "0:40:20", "remaining_time": "0:03:06", "throughput": 1674.5, "total_tokens": 4052704}
|
| 2422 |
+
{"current_steps": 12020, "total_steps": 12940, "loss": 0.4121, "lr": 7.676095557248769e-07, "epoch": 9.28902627511592, "percentage": 92.89, "elapsed_time": "0:40:21", "remaining_time": "0:03:05", "throughput": 1674.59, "total_tokens": 4054368}
|
| 2423 |
+
{"current_steps": 12025, "total_steps": 12940, "loss": 0.3629, "lr": 7.593399927445454e-07, "epoch": 9.29289026275116, "percentage": 92.93, "elapsed_time": "0:40:21", "remaining_time": "0:03:04", "throughput": 1674.65, "total_tokens": 4056000}
|
| 2424 |
+
{"current_steps": 12030, "total_steps": 12940, "loss": 0.3439, "lr": 7.511145289485738e-07, "epoch": 9.2967542503864, "percentage": 92.97, "elapsed_time": "0:40:22", "remaining_time": "0:03:03", "throughput": 1674.75, "total_tokens": 4057824}
|
| 2425 |
+
{"current_steps": 12035, "total_steps": 12940, "loss": 0.5464, "lr": 7.429331793009225e-07, "epoch": 9.300618238021638, "percentage": 93.01, "elapsed_time": "0:40:23", "remaining_time": "0:03:02", "throughput": 1674.89, "total_tokens": 4059744}
|
| 2426 |
+
{"current_steps": 12040, "total_steps": 12940, "loss": 0.3348, "lr": 7.347959586852915e-07, "epoch": 9.304482225656878, "percentage": 93.04, "elapsed_time": "0:40:24", "remaining_time": "0:03:01", "throughput": 1674.93, "total_tokens": 4061312}
|
| 2427 |
+
{"current_steps": 12045, "total_steps": 12940, "loss": 0.5156, "lr": 7.267028819051058e-07, "epoch": 9.308346213292117, "percentage": 93.08, "elapsed_time": "0:40:25", "remaining_time": "0:03:00", "throughput": 1674.94, "total_tokens": 4062752}
|
| 2428 |
+
{"current_steps": 12050, "total_steps": 12940, "loss": 0.33, "lr": 7.186539636834855e-07, "epoch": 9.312210200927357, "percentage": 93.12, "elapsed_time": "0:40:26", "remaining_time": "0:02:59", "throughput": 1674.94, "total_tokens": 4064224}
|
| 2429 |
+
{"current_steps": 12055, "total_steps": 12940, "loss": 0.4544, "lr": 7.106492186632147e-07, "epoch": 9.316074188562597, "percentage": 93.16, "elapsed_time": "0:40:27", "remaining_time": "0:02:58", "throughput": 1675.01, "total_tokens": 4065920}
|
| 2430 |
+
{"current_steps": 12060, "total_steps": 12940, "loss": 0.4436, "lr": 7.026886614067141e-07, "epoch": 9.319938176197835, "percentage": 93.2, "elapsed_time": "0:40:28", "remaining_time": "0:02:57", "throughput": 1675.12, "total_tokens": 4067744}
|
| 2431 |
+
{"current_steps": 12065, "total_steps": 12940, "loss": 0.3134, "lr": 6.947723063960132e-07, "epoch": 9.323802163833076, "percentage": 93.24, "elapsed_time": "0:40:29", "remaining_time": "0:02:56", "throughput": 1675.12, "total_tokens": 4069152}
|
| 2432 |
+
{"current_steps": 12070, "total_steps": 12940, "loss": 0.4172, "lr": 6.869001680327447e-07, "epoch": 9.327666151468316, "percentage": 93.28, "elapsed_time": "0:40:30", "remaining_time": "0:02:55", "throughput": 1675.22, "total_tokens": 4070976}
|
| 2433 |
+
{"current_steps": 12075, "total_steps": 12940, "loss": 0.3875, "lr": 6.790722606380806e-07, "epoch": 9.331530139103554, "percentage": 93.32, "elapsed_time": "0:40:30", "remaining_time": "0:02:54", "throughput": 1675.23, "total_tokens": 4072416}
|
| 2434 |
+
{"current_steps": 12080, "total_steps": 12940, "loss": 0.4109, "lr": 6.712885984527378e-07, "epoch": 9.335394126738795, "percentage": 93.35, "elapsed_time": "0:40:31", "remaining_time": "0:02:53", "throughput": 1675.33, "total_tokens": 4074208}
|
| 2435 |
+
{"current_steps": 12085, "total_steps": 12940, "loss": 0.4116, "lr": 6.635491956369394e-07, "epoch": 9.339258114374035, "percentage": 93.39, "elapsed_time": "0:40:32", "remaining_time": "0:02:52", "throughput": 1675.41, "total_tokens": 4075904}
|
| 2436 |
+
{"current_steps": 12090, "total_steps": 12940, "loss": 0.3383, "lr": 6.558540662703922e-07, "epoch": 9.343122102009273, "percentage": 93.43, "elapsed_time": "0:40:33", "remaining_time": "0:02:51", "throughput": 1675.48, "total_tokens": 4077568}
|
| 2437 |
+
{"current_steps": 12095, "total_steps": 12940, "loss": 0.3387, "lr": 6.482032243522618e-07, "epoch": 9.346986089644513, "percentage": 93.47, "elapsed_time": "0:40:34", "remaining_time": "0:02:50", "throughput": 1675.63, "total_tokens": 4079520}
|
| 2438 |
+
{"current_steps": 12100, "total_steps": 12940, "loss": 0.567, "lr": 6.405966838011313e-07, "epoch": 9.350850077279752, "percentage": 93.51, "elapsed_time": "0:40:35", "remaining_time": "0:02:49", "throughput": 1675.7, "total_tokens": 4081280}
|
| 2439 |
+
{"current_steps": 12105, "total_steps": 12940, "loss": 0.3708, "lr": 6.330344584550063e-07, "epoch": 9.354714064914992, "percentage": 93.55, "elapsed_time": "0:40:36", "remaining_time": "0:02:48", "throughput": 1675.77, "total_tokens": 4082976}
|
| 2440 |
+
{"current_steps": 12110, "total_steps": 12940, "loss": 0.3918, "lr": 6.255165620712711e-07, "epoch": 9.358578052550232, "percentage": 93.59, "elapsed_time": "0:40:37", "remaining_time": "0:02:47", "throughput": 1675.85, "total_tokens": 4084704}
|
| 2441 |
+
{"current_steps": 12115, "total_steps": 12940, "loss": 0.3938, "lr": 6.18043008326652e-07, "epoch": 9.36244204018547, "percentage": 93.62, "elapsed_time": "0:40:38", "remaining_time": "0:02:46", "throughput": 1675.91, "total_tokens": 4086368}
|
| 2442 |
+
{"current_steps": 12120, "total_steps": 12940, "loss": 0.3442, "lr": 6.106138108172233e-07, "epoch": 9.36630602782071, "percentage": 93.66, "elapsed_time": "0:40:39", "remaining_time": "0:02:45", "throughput": 1676.02, "total_tokens": 4088192}
|
| 2443 |
+
{"current_steps": 12125, "total_steps": 12940, "loss": 0.3421, "lr": 6.032289830583515e-07, "epoch": 9.370170015455951, "percentage": 93.7, "elapsed_time": "0:40:40", "remaining_time": "0:02:44", "throughput": 1676.07, "total_tokens": 4089824}
|
| 2444 |
+
{"current_steps": 12130, "total_steps": 12940, "loss": 0.5586, "lr": 5.958885384846958e-07, "epoch": 9.37403400309119, "percentage": 93.74, "elapsed_time": "0:40:41", "remaining_time": "0:02:43", "throughput": 1676.14, "total_tokens": 4091552}
|
| 2445 |
+
{"current_steps": 12135, "total_steps": 12940, "loss": 0.5366, "lr": 5.885924904501627e-07, "epoch": 9.37789799072643, "percentage": 93.78, "elapsed_time": "0:40:41", "remaining_time": "0:02:41", "throughput": 1676.21, "total_tokens": 4093312}
|
| 2446 |
+
{"current_steps": 12140, "total_steps": 12940, "loss": 0.3096, "lr": 5.81340852227899e-07, "epoch": 9.38176197836167, "percentage": 93.82, "elapsed_time": "0:40:42", "remaining_time": "0:02:40", "throughput": 1676.29, "total_tokens": 4095040}
|
| 2447 |
+
{"current_steps": 12145, "total_steps": 12940, "loss": 0.3299, "lr": 5.741336370102573e-07, "epoch": 9.385625965996908, "percentage": 93.86, "elapsed_time": "0:40:43", "remaining_time": "0:02:39", "throughput": 1676.36, "total_tokens": 4096704}
|
| 2448 |
+
{"current_steps": 12150, "total_steps": 12940, "loss": 0.4277, "lr": 5.669708579087718e-07, "epoch": 9.389489953632149, "percentage": 93.89, "elapsed_time": "0:40:44", "remaining_time": "0:02:38", "throughput": 1676.46, "total_tokens": 4098432}
|
| 2449 |
+
{"current_steps": 12155, "total_steps": 12940, "loss": 0.6135, "lr": 5.598525279541438e-07, "epoch": 9.393353941267389, "percentage": 93.93, "elapsed_time": "0:40:45", "remaining_time": "0:02:37", "throughput": 1676.48, "total_tokens": 4099968}
|
| 2450 |
+
{"current_steps": 12160, "total_steps": 12940, "loss": 0.6291, "lr": 5.527786600962093e-07, "epoch": 9.397217928902627, "percentage": 93.97, "elapsed_time": "0:40:46", "remaining_time": "0:02:36", "throughput": 1676.63, "total_tokens": 4102016}
|
| 2451 |
+
{"current_steps": 12165, "total_steps": 12940, "loss": 0.3263, "lr": 5.4574926720391e-07, "epoch": 9.401081916537867, "percentage": 94.01, "elapsed_time": "0:40:47", "remaining_time": "0:02:35", "throughput": 1676.68, "total_tokens": 4103616}
|
| 2452 |
+
{"current_steps": 12170, "total_steps": 12940, "loss": 0.4533, "lr": 5.387643620652888e-07, "epoch": 9.404945904173106, "percentage": 94.05, "elapsed_time": "0:40:48", "remaining_time": "0:02:34", "throughput": 1676.76, "total_tokens": 4105376}
|
| 2453 |
+
{"current_steps": 12175, "total_steps": 12940, "loss": 0.5429, "lr": 5.318239573874534e-07, "epoch": 9.408809891808346, "percentage": 94.09, "elapsed_time": "0:40:49", "remaining_time": "0:02:33", "throughput": 1676.91, "total_tokens": 4107296}
|
| 2454 |
+
{"current_steps": 12180, "total_steps": 12940, "loss": 0.3542, "lr": 5.24928065796551e-07, "epoch": 9.412673879443586, "percentage": 94.13, "elapsed_time": "0:40:50", "remaining_time": "0:02:32", "throughput": 1676.99, "total_tokens": 4109056}
|
| 2455 |
+
{"current_steps": 12185, "total_steps": 12940, "loss": 0.569, "lr": 5.18076699837744e-07, "epoch": 9.416537867078825, "percentage": 94.17, "elapsed_time": "0:40:51", "remaining_time": "0:02:31", "throughput": 1677.1, "total_tokens": 4110816}
|
| 2456 |
+
{"current_steps": 12190, "total_steps": 12940, "loss": 0.3652, "lr": 5.112698719752119e-07, "epoch": 9.420401854714065, "percentage": 94.2, "elapsed_time": "0:40:52", "remaining_time": "0:02:30", "throughput": 1677.15, "total_tokens": 4112416}
|
| 2457 |
+
{"current_steps": 12195, "total_steps": 12940, "loss": 0.4423, "lr": 5.045075945920935e-07, "epoch": 9.424265842349305, "percentage": 94.24, "elapsed_time": "0:40:52", "remaining_time": "0:02:29", "throughput": 1677.21, "total_tokens": 4114112}
|
| 2458 |
+
{"current_steps": 12200, "total_steps": 12940, "loss": 0.4525, "lr": 4.97789879990479e-07, "epoch": 9.428129829984544, "percentage": 94.28, "elapsed_time": "0:40:53", "remaining_time": "0:02:28", "throughput": 1677.31, "total_tokens": 4115872}
|
| 2459 |
+
{"current_steps": 12205, "total_steps": 12940, "loss": 0.3924, "lr": 4.911167403913979e-07, "epoch": 9.431993817619784, "percentage": 94.32, "elapsed_time": "0:40:54", "remaining_time": "0:02:27", "throughput": 1677.39, "total_tokens": 4117600}
|
| 2460 |
+
{"current_steps": 12210, "total_steps": 12940, "loss": 0.4197, "lr": 4.844881879347896e-07, "epoch": 9.435857805255024, "percentage": 94.36, "elapsed_time": "0:40:55", "remaining_time": "0:02:26", "throughput": 1677.46, "total_tokens": 4119360}
|
| 2461 |
+
{"current_steps": 12215, "total_steps": 12940, "loss": 0.4111, "lr": 4.779042346794688e-07, "epoch": 9.439721792890262, "percentage": 94.4, "elapsed_time": "0:40:56", "remaining_time": "0:02:25", "throughput": 1677.55, "total_tokens": 4121088}
|
| 2462 |
+
{"current_steps": 12220, "total_steps": 12940, "loss": 0.3731, "lr": 4.7136489260311553e-07, "epoch": 9.443585780525503, "percentage": 94.44, "elapsed_time": "0:40:57", "remaining_time": "0:02:24", "throughput": 1677.56, "total_tokens": 4122592}
|
| 2463 |
+
{"current_steps": 12225, "total_steps": 12940, "loss": 0.3655, "lr": 4.648701736022637e-07, "epoch": 9.447449768160741, "percentage": 94.47, "elapsed_time": "0:40:58", "remaining_time": "0:02:23", "throughput": 1677.57, "total_tokens": 4124160}
|
| 2464 |
+
{"current_steps": 12230, "total_steps": 12940, "loss": 0.38, "lr": 4.5842008949225913e-07, "epoch": 9.451313755795981, "percentage": 94.51, "elapsed_time": "0:40:59", "remaining_time": "0:02:22", "throughput": 1677.65, "total_tokens": 4125920}
|
| 2465 |
+
{"current_steps": 12235, "total_steps": 12940, "loss": 0.3092, "lr": 4.5201465200724327e-07, "epoch": 9.455177743431221, "percentage": 94.55, "elapsed_time": "0:41:00", "remaining_time": "0:02:21", "throughput": 1677.78, "total_tokens": 4127808}
|
| 2466 |
+
{"current_steps": 12240, "total_steps": 12940, "loss": 0.3973, "lr": 4.456538728001475e-07, "epoch": 9.45904173106646, "percentage": 94.59, "elapsed_time": "0:41:01", "remaining_time": "0:02:20", "throughput": 1677.92, "total_tokens": 4129760}
|
| 2467 |
+
{"current_steps": 12245, "total_steps": 12940, "loss": 0.5579, "lr": 4.393377634426432e-07, "epoch": 9.4629057187017, "percentage": 94.63, "elapsed_time": "0:41:02", "remaining_time": "0:02:19", "throughput": 1677.93, "total_tokens": 4131264}
|
| 2468 |
+
{"current_steps": 12250, "total_steps": 12940, "loss": 0.3081, "lr": 4.3306633542515574e-07, "epoch": 9.46676970633694, "percentage": 94.67, "elapsed_time": "0:41:03", "remaining_time": "0:02:18", "throughput": 1678.06, "total_tokens": 4133152}
|
| 2469 |
+
{"current_steps": 12255, "total_steps": 12940, "loss": 0.4624, "lr": 4.268396001568087e-07, "epoch": 9.470633693972179, "percentage": 94.71, "elapsed_time": "0:41:03", "remaining_time": "0:02:17", "throughput": 1678.11, "total_tokens": 4134816}
|
| 2470 |
+
{"current_steps": 12260, "total_steps": 12940, "loss": 0.3868, "lr": 4.206575689654324e-07, "epoch": 9.474497681607419, "percentage": 94.74, "elapsed_time": "0:41:04", "remaining_time": "0:02:16", "throughput": 1678.2, "total_tokens": 4136576}
|
| 2471 |
+
{"current_steps": 12265, "total_steps": 12940, "loss": 0.535, "lr": 4.1452025309751673e-07, "epoch": 9.478361669242659, "percentage": 94.78, "elapsed_time": "0:41:05", "remaining_time": "0:02:15", "throughput": 1678.27, "total_tokens": 4138208}
|
| 2472 |
+
{"current_steps": 12270, "total_steps": 12940, "loss": 0.5027, "lr": 4.0842766371822216e-07, "epoch": 9.482225656877898, "percentage": 94.82, "elapsed_time": "0:41:06", "remaining_time": "0:02:14", "throughput": 1678.24, "total_tokens": 4139552}
|
| 2473 |
+
{"current_steps": 12275, "total_steps": 12940, "loss": 0.3818, "lr": 4.02379811911327e-07, "epoch": 9.486089644513138, "percentage": 94.86, "elapsed_time": "0:41:07", "remaining_time": "0:02:13", "throughput": 1678.31, "total_tokens": 4141216}
|
| 2474 |
+
{"current_steps": 12280, "total_steps": 12940, "loss": 0.4078, "lr": 3.963767086792275e-07, "epoch": 9.489953632148378, "percentage": 94.9, "elapsed_time": "0:41:08", "remaining_time": "0:02:12", "throughput": 1678.37, "total_tokens": 4142848}
|
| 2475 |
+
{"current_steps": 12285, "total_steps": 12940, "loss": 0.4355, "lr": 3.9041836494291275e-07, "epoch": 9.493817619783616, "percentage": 94.94, "elapsed_time": "0:41:09", "remaining_time": "0:02:11", "throughput": 1678.4, "total_tokens": 4144416}
|
| 2476 |
+
{"current_steps": 12290, "total_steps": 12940, "loss": 0.3722, "lr": 3.845047915419397e-07, "epoch": 9.497681607418857, "percentage": 94.98, "elapsed_time": "0:41:10", "remaining_time": "0:02:10", "throughput": 1678.46, "total_tokens": 4146112}
|
| 2477 |
+
{"current_steps": 12293, "total_steps": 12940, "eval_loss": 0.5038707852363586, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:41:22", "remaining_time": "0:02:10", "throughput": 1670.72, "total_tokens": 4147200}
|
| 2478 |
+
{"current_steps": 12295, "total_steps": 12940, "loss": 0.2858, "lr": 3.7863599923442516e-07, "epoch": 9.501545595054095, "percentage": 95.02, "elapsed_time": "0:41:24", "remaining_time": "0:02:10", "throughput": 1669.79, "total_tokens": 4148032}
|
| 2479 |
+
{"current_steps": 12300, "total_steps": 12940, "loss": 0.3949, "lr": 3.728119986970147e-07, "epoch": 9.505409582689335, "percentage": 95.05, "elapsed_time": "0:41:25", "remaining_time": "0:02:09", "throughput": 1669.85, "total_tokens": 4149664}
|
| 2480 |
+
{"current_steps": 12305, "total_steps": 12940, "loss": 0.3249, "lr": 3.670328005248663e-07, "epoch": 9.509273570324575, "percentage": 95.09, "elapsed_time": "0:41:25", "remaining_time": "0:02:08", "throughput": 1669.9, "total_tokens": 4151328}
|