Training in progress, step 3400
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +41 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f7d19ddbccb1734cd355ddee30937370fd5559ac8e537d30816e80addf62b4d8
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -654,3 +654,44 @@
|
|
| 654 |
{"current_steps": 3195, "total_steps": 40000, "loss": 6.0947, "lr": 0.2953050368891902, "epoch": 0.10226618014211639, "percentage": 7.99, "elapsed_time": "3:03:12", "remaining_time": "1 day, 11:10:32", "throughput": 393.38, "total_tokens": 4324368}
|
| 655 |
{"current_steps": 3200, "total_steps": 40000, "loss": 6.6672, "lr": 0.29529040352989805, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:03:14", "remaining_time": "1 day, 11:07:20", "throughput": 393.92, "total_tokens": 4331088}
|
| 656 |
{"current_steps": 3200, "total_steps": 40000, "eval_loss": 5.711532115936279, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:13:58", "remaining_time": "1 day, 13:10:39", "throughput": 372.14, "total_tokens": 4331088}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
{"current_steps": 3195, "total_steps": 40000, "loss": 6.0947, "lr": 0.2953050368891902, "epoch": 0.10226618014211639, "percentage": 7.99, "elapsed_time": "3:03:12", "remaining_time": "1 day, 11:10:32", "throughput": 393.38, "total_tokens": 4324368}
|
| 655 |
{"current_steps": 3200, "total_steps": 40000, "loss": 6.6672, "lr": 0.29529040352989805, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:03:14", "remaining_time": "1 day, 11:07:20", "throughput": 393.92, "total_tokens": 4331088}
|
| 656 |
{"current_steps": 3200, "total_steps": 40000, "eval_loss": 5.711532115936279, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:13:58", "remaining_time": "1 day, 13:10:39", "throughput": 372.14, "total_tokens": 4331088}
|
| 657 |
+
{"current_steps": 3205, "total_steps": 40000, "loss": 6.3491, "lr": 0.29527574776499993, "epoch": 0.10258626208309327, "percentage": 8.01, "elapsed_time": "3:14:01", "remaining_time": "1 day, 13:07:30", "throughput": 372.61, "total_tokens": 4337776}
|
| 658 |
+
{"current_steps": 3210, "total_steps": 40000, "loss": 5.5269, "lr": 0.2952610695967558, "epoch": 0.10274630305358172, "percentage": 8.03, "elapsed_time": "3:14:03", "remaining_time": "1 day, 13:04:08", "throughput": 373.13, "total_tokens": 4344624}
|
| 659 |
+
{"current_steps": 3215, "total_steps": 40000, "loss": 6.3063, "lr": 0.29524636902742935, "epoch": 0.10290634402407016, "percentage": 8.04, "elapsed_time": "3:14:05", "remaining_time": "1 day, 13:00:46", "throughput": 373.67, "total_tokens": 4351664}
|
| 660 |
+
{"current_steps": 3220, "total_steps": 40000, "loss": 6.1282, "lr": 0.2952316460592875, "epoch": 0.10306638499455861, "percentage": 8.05, "elapsed_time": "3:14:07", "remaining_time": "1 day, 12:57:25", "throughput": 374.2, "total_tokens": 4358576}
|
| 661 |
+
{"current_steps": 3225, "total_steps": 40000, "loss": 5.7549, "lr": 0.29521690069460066, "epoch": 0.10322642596504705, "percentage": 8.06, "elapsed_time": "3:14:09", "remaining_time": "1 day, 12:54:04", "throughput": 374.7, "total_tokens": 4365216}
|
| 662 |
+
{"current_steps": 3230, "total_steps": 40000, "loss": 5.9063, "lr": 0.29520213293564285, "epoch": 0.10338646693553549, "percentage": 8.08, "elapsed_time": "3:14:11", "remaining_time": "1 day, 12:50:44", "throughput": 375.21, "total_tokens": 4371936}
|
| 663 |
+
{"current_steps": 3235, "total_steps": 40000, "loss": 5.0874, "lr": 0.29518734278469144, "epoch": 0.10354650790602395, "percentage": 8.09, "elapsed_time": "3:14:13", "remaining_time": "1 day, 12:47:24", "throughput": 375.72, "total_tokens": 4378640}
|
| 664 |
+
{"current_steps": 3240, "total_steps": 40000, "loss": 4.9757, "lr": 0.29517253024402723, "epoch": 0.10370654887651239, "percentage": 8.1, "elapsed_time": "3:14:15", "remaining_time": "1 day, 12:44:05", "throughput": 376.22, "total_tokens": 4385248}
|
| 665 |
+
{"current_steps": 3245, "total_steps": 40000, "loss": 5.3763, "lr": 0.2951576953159345, "epoch": 0.10386658984700083, "percentage": 8.11, "elapsed_time": "3:14:18", "remaining_time": "1 day, 12:40:46", "throughput": 376.74, "total_tokens": 4392080}
|
| 666 |
+
{"current_steps": 3250, "total_steps": 40000, "loss": 5.6235, "lr": 0.29514283800270097, "epoch": 0.10402663081748928, "percentage": 8.12, "elapsed_time": "3:14:20", "remaining_time": "1 day, 12:37:28", "throughput": 377.25, "total_tokens": 4398784}
|
| 667 |
+
{"current_steps": 3255, "total_steps": 40000, "loss": 5.7336, "lr": 0.2951279583066179, "epoch": 0.10418667178797772, "percentage": 8.14, "elapsed_time": "3:14:22", "remaining_time": "1 day, 12:34:11", "throughput": 377.75, "total_tokens": 4405376}
|
| 668 |
+
{"current_steps": 3260, "total_steps": 40000, "loss": 4.9623, "lr": 0.2951130562299798, "epoch": 0.10434671275846617, "percentage": 8.15, "elapsed_time": "3:14:24", "remaining_time": "1 day, 12:30:54", "throughput": 378.26, "total_tokens": 4412112}
|
| 669 |
+
{"current_steps": 3265, "total_steps": 40000, "loss": 6.2228, "lr": 0.29509813177508487, "epoch": 0.10450675372895461, "percentage": 8.16, "elapsed_time": "3:14:26", "remaining_time": "1 day, 12:27:38", "throughput": 378.77, "total_tokens": 4418816}
|
| 670 |
+
{"current_steps": 3270, "total_steps": 40000, "loss": 5.5275, "lr": 0.2950831849442346, "epoch": 0.10466679469944305, "percentage": 8.18, "elapsed_time": "3:14:28", "remaining_time": "1 day, 12:24:23", "throughput": 379.27, "total_tokens": 4425472}
|
| 671 |
+
{"current_steps": 3275, "total_steps": 40000, "loss": 5.694, "lr": 0.2950682157397339, "epoch": 0.1048268356699315, "percentage": 8.19, "elapsed_time": "3:14:30", "remaining_time": "1 day, 12:21:08", "throughput": 379.78, "total_tokens": 4432176}
|
| 672 |
+
{"current_steps": 3280, "total_steps": 40000, "loss": 6.2268, "lr": 0.2950532241638914, "epoch": 0.10498687664041995, "percentage": 8.2, "elapsed_time": "3:14:32", "remaining_time": "1 day, 12:17:53", "throughput": 380.29, "total_tokens": 4438928}
|
| 673 |
+
{"current_steps": 3285, "total_steps": 40000, "loss": 6.0347, "lr": 0.2950382102190188, "epoch": 0.10514691761090839, "percentage": 8.21, "elapsed_time": "3:14:34", "remaining_time": "1 day, 12:14:39", "throughput": 380.77, "total_tokens": 4445216}
|
| 674 |
+
{"current_steps": 3290, "total_steps": 40000, "loss": 5.4231, "lr": 0.2950231739074316, "epoch": 0.10530695858139684, "percentage": 8.22, "elapsed_time": "3:14:36", "remaining_time": "1 day, 12:11:26", "throughput": 381.26, "total_tokens": 4451760}
|
| 675 |
+
{"current_steps": 3295, "total_steps": 40000, "loss": 5.4835, "lr": 0.29500811523144843, "epoch": 0.10546699955188528, "percentage": 8.24, "elapsed_time": "3:14:38", "remaining_time": "1 day, 12:08:14", "throughput": 381.79, "total_tokens": 4458752}
|
| 676 |
+
{"current_steps": 3300, "total_steps": 40000, "loss": 6.0333, "lr": 0.2949930341933917, "epoch": 0.10562704052237373, "percentage": 8.25, "elapsed_time": "3:14:40", "remaining_time": "1 day, 12:05:02", "throughput": 382.31, "total_tokens": 4465632}
|
| 677 |
+
{"current_steps": 3305, "total_steps": 40000, "loss": 6.0783, "lr": 0.29497793079558693, "epoch": 0.10578708149286217, "percentage": 8.26, "elapsed_time": "3:14:42", "remaining_time": "1 day, 12:01:50", "throughput": 382.81, "total_tokens": 4472224}
|
| 678 |
+
{"current_steps": 3310, "total_steps": 40000, "loss": 5.506, "lr": 0.2949628050403633, "epoch": 0.10594712246335061, "percentage": 8.28, "elapsed_time": "3:14:44", "remaining_time": "1 day, 11:58:38", "throughput": 383.29, "total_tokens": 4478624}
|
| 679 |
+
{"current_steps": 3315, "total_steps": 40000, "loss": 5.1095, "lr": 0.2949476569300535, "epoch": 0.10610716343383907, "percentage": 8.29, "elapsed_time": "3:14:46", "remaining_time": "1 day, 11:55:28", "throughput": 383.81, "total_tokens": 4485488}
|
| 680 |
+
{"current_steps": 3320, "total_steps": 40000, "loss": 6.4218, "lr": 0.29493248646699344, "epoch": 0.1062672044043275, "percentage": 8.3, "elapsed_time": "3:14:48", "remaining_time": "1 day, 11:52:20", "throughput": 384.36, "total_tokens": 4492736}
|
| 681 |
+
{"current_steps": 3325, "total_steps": 40000, "loss": 5.7727, "lr": 0.29491729365352265, "epoch": 0.10642724537481596, "percentage": 8.31, "elapsed_time": "3:14:50", "remaining_time": "1 day, 11:49:11", "throughput": 384.87, "total_tokens": 4499504}
|
| 682 |
+
{"current_steps": 3330, "total_steps": 40000, "loss": 5.8368, "lr": 0.29490207849198397, "epoch": 0.1065872863453044, "percentage": 8.33, "elapsed_time": "3:14:52", "remaining_time": "1 day, 11:46:02", "throughput": 385.38, "total_tokens": 4506192}
|
| 683 |
+
{"current_steps": 3335, "total_steps": 40000, "loss": 5.4325, "lr": 0.29488684098472384, "epoch": 0.10674732731579284, "percentage": 8.34, "elapsed_time": "3:14:54", "remaining_time": "1 day, 11:42:54", "throughput": 385.88, "total_tokens": 4512880}
|
| 684 |
+
{"current_steps": 3340, "total_steps": 40000, "loss": 5.2622, "lr": 0.2948715811340921, "epoch": 0.10690736828628129, "percentage": 8.35, "elapsed_time": "3:14:57", "remaining_time": "1 day, 11:39:47", "throughput": 386.41, "total_tokens": 4519824}
|
| 685 |
+
{"current_steps": 3345, "total_steps": 40000, "loss": 6.3227, "lr": 0.294856298942442, "epoch": 0.10706740925676973, "percentage": 8.36, "elapsed_time": "3:14:59", "remaining_time": "1 day, 11:36:41", "throughput": 386.94, "total_tokens": 4526880}
|
| 686 |
+
{"current_steps": 3350, "total_steps": 40000, "loss": 5.5516, "lr": 0.2948409944121302, "epoch": 0.10722745022725817, "percentage": 8.38, "elapsed_time": "3:15:01", "remaining_time": "1 day, 11:33:35", "throughput": 387.5, "total_tokens": 4534304}
|
| 687 |
+
{"current_steps": 3355, "total_steps": 40000, "loss": 5.4318, "lr": 0.29482566754551687, "epoch": 0.10738749119774663, "percentage": 8.39, "elapsed_time": "3:15:03", "remaining_time": "1 day, 11:30:29", "throughput": 388.0, "total_tokens": 4540880}
|
| 688 |
+
{"current_steps": 3360, "total_steps": 40000, "loss": 5.9571, "lr": 0.2948103183449656, "epoch": 0.10754753216823507, "percentage": 8.4, "elapsed_time": "3:15:05", "remaining_time": "1 day, 11:27:24", "throughput": 388.5, "total_tokens": 4547520}
|
| 689 |
+
{"current_steps": 3365, "total_steps": 40000, "loss": 6.1462, "lr": 0.2947949468128435, "epoch": 0.10770757313872352, "percentage": 8.41, "elapsed_time": "3:15:07", "remaining_time": "1 day, 11:24:19", "throughput": 389.0, "total_tokens": 4554240}
|
| 690 |
+
{"current_steps": 3370, "total_steps": 40000, "loss": 6.0525, "lr": 0.2947795529515209, "epoch": 0.10786761410921196, "percentage": 8.43, "elapsed_time": "3:15:09", "remaining_time": "1 day, 11:21:15", "throughput": 389.52, "total_tokens": 4561040}
|
| 691 |
+
{"current_steps": 3375, "total_steps": 40000, "loss": 5.5309, "lr": 0.29476413676337193, "epoch": 0.1080276550797004, "percentage": 8.44, "elapsed_time": "3:15:11", "remaining_time": "1 day, 11:18:12", "throughput": 390.03, "total_tokens": 4567920}
|
| 692 |
+
{"current_steps": 3380, "total_steps": 40000, "loss": 5.2485, "lr": 0.2947486982507738, "epoch": 0.10818769605018885, "percentage": 8.45, "elapsed_time": "3:15:13", "remaining_time": "1 day, 11:15:09", "throughput": 390.54, "total_tokens": 4574592}
|
| 693 |
+
{"current_steps": 3385, "total_steps": 40000, "loss": 6.0799, "lr": 0.29473323741610735, "epoch": 0.10834773702067729, "percentage": 8.46, "elapsed_time": "3:15:15", "remaining_time": "1 day, 11:12:06", "throughput": 391.03, "total_tokens": 4581184}
|
| 694 |
+
{"current_steps": 3390, "total_steps": 40000, "loss": 6.0579, "lr": 0.2947177542617569, "epoch": 0.10850777799116573, "percentage": 8.48, "elapsed_time": "3:15:17", "remaining_time": "1 day, 11:09:04", "throughput": 391.54, "total_tokens": 4587968}
|
| 695 |
+
{"current_steps": 3395, "total_steps": 40000, "loss": 5.4002, "lr": 0.2947022487901101, "epoch": 0.10866781896165419, "percentage": 8.49, "elapsed_time": "3:15:19", "remaining_time": "1 day, 11:06:02", "throughput": 392.02, "total_tokens": 4594368}
|
| 696 |
+
{"current_steps": 3400, "total_steps": 40000, "loss": 6.1049, "lr": 0.2946867210035581, "epoch": 0.10882785993214263, "percentage": 8.5, "elapsed_time": "3:15:21", "remaining_time": "1 day, 11:03:02", "throughput": 392.58, "total_tokens": 4601728}
|
| 697 |
+
{"current_steps": 3400, "total_steps": 40000, "eval_loss": 5.739169597625732, "epoch": 0.10882785993214263, "percentage": 8.5, "elapsed_time": "3:26:06", "remaining_time": "1 day, 12:58:37", "throughput": 372.12, "total_tokens": 4601728}
|