Training in progress, step 3400
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +41 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:077f6172d297e9484da04d81f08213a2515cd9081914f96995243bba8161f701
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -654,3 +654,44 @@
|
|
| 654 |
{"current_steps": 3195, "total_steps": 40000, "loss": 6.1542, "lr": 0.2953050368891902, "epoch": 0.10226618014211639, "percentage": 7.99, "elapsed_time": "3:00:32", "remaining_time": "1 day, 10:39:43", "throughput": 399.21, "total_tokens": 4324368}
|
| 655 |
{"current_steps": 3200, "total_steps": 40000, "loss": 6.6138, "lr": 0.29529040352989805, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:00:34", "remaining_time": "1 day, 10:36:35", "throughput": 399.76, "total_tokens": 4331088}
|
| 656 |
{"current_steps": 3200, "total_steps": 40000, "eval_loss": 5.739660263061523, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:11:08", "remaining_time": "1 day, 12:38:06", "throughput": 377.65, "total_tokens": 4331088}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 654 |
{"current_steps": 3195, "total_steps": 40000, "loss": 6.1542, "lr": 0.2953050368891902, "epoch": 0.10226618014211639, "percentage": 7.99, "elapsed_time": "3:00:32", "remaining_time": "1 day, 10:39:43", "throughput": 399.21, "total_tokens": 4324368}
|
| 655 |
{"current_steps": 3200, "total_steps": 40000, "loss": 6.6138, "lr": 0.29529040352989805, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:00:34", "remaining_time": "1 day, 10:36:35", "throughput": 399.76, "total_tokens": 4331088}
|
| 656 |
{"current_steps": 3200, "total_steps": 40000, "eval_loss": 5.739660263061523, "epoch": 0.10242622111260483, "percentage": 8.0, "elapsed_time": "3:11:08", "remaining_time": "1 day, 12:38:06", "throughput": 377.65, "total_tokens": 4331088}
|
| 657 |
+
{"current_steps": 3205, "total_steps": 40000, "loss": 6.3304, "lr": 0.29527574776499993, "epoch": 0.10258626208309327, "percentage": 8.01, "elapsed_time": "3:11:11", "remaining_time": "1 day, 12:34:58", "throughput": 378.14, "total_tokens": 4337776}
|
| 658 |
+
{"current_steps": 3210, "total_steps": 40000, "loss": 5.5037, "lr": 0.2952610695967558, "epoch": 0.10274630305358172, "percentage": 8.03, "elapsed_time": "3:11:13", "remaining_time": "1 day, 12:31:38", "throughput": 378.66, "total_tokens": 4344624}
|
| 659 |
+
{"current_steps": 3215, "total_steps": 40000, "loss": 6.3196, "lr": 0.29524636902742935, "epoch": 0.10290634402407016, "percentage": 8.04, "elapsed_time": "3:11:15", "remaining_time": "1 day, 12:28:20", "throughput": 379.21, "total_tokens": 4351664}
|
| 660 |
+
{"current_steps": 3220, "total_steps": 40000, "loss": 6.1192, "lr": 0.2952316460592875, "epoch": 0.10306638499455861, "percentage": 8.05, "elapsed_time": "3:11:17", "remaining_time": "1 day, 12:25:02", "throughput": 379.74, "total_tokens": 4358576}
|
| 661 |
+
{"current_steps": 3225, "total_steps": 40000, "loss": 5.7225, "lr": 0.29521690069460066, "epoch": 0.10322642596504705, "percentage": 8.06, "elapsed_time": "3:11:19", "remaining_time": "1 day, 12:21:44", "throughput": 380.25, "total_tokens": 4365216}
|
| 662 |
+
{"current_steps": 3230, "total_steps": 40000, "loss": 5.8469, "lr": 0.29520213293564285, "epoch": 0.10338646693553549, "percentage": 8.08, "elapsed_time": "3:11:21", "remaining_time": "1 day, 12:18:27", "throughput": 380.77, "total_tokens": 4371936}
|
| 663 |
+
{"current_steps": 3235, "total_steps": 40000, "loss": 5.1085, "lr": 0.29518734278469144, "epoch": 0.10354650790602395, "percentage": 8.09, "elapsed_time": "3:11:23", "remaining_time": "1 day, 12:15:10", "throughput": 381.29, "total_tokens": 4378640}
|
| 664 |
+
{"current_steps": 3240, "total_steps": 40000, "loss": 4.9587, "lr": 0.29517253024402723, "epoch": 0.10370654887651239, "percentage": 8.1, "elapsed_time": "3:11:25", "remaining_time": "1 day, 12:11:54", "throughput": 381.8, "total_tokens": 4385248}
|
| 665 |
+
{"current_steps": 3245, "total_steps": 40000, "loss": 5.3926, "lr": 0.2951576953159345, "epoch": 0.10386658984700083, "percentage": 8.11, "elapsed_time": "3:11:27", "remaining_time": "1 day, 12:08:38", "throughput": 382.32, "total_tokens": 4392080}
|
| 666 |
+
{"current_steps": 3250, "total_steps": 40000, "loss": 5.626, "lr": 0.29514283800270097, "epoch": 0.10402663081748928, "percentage": 8.12, "elapsed_time": "3:11:29", "remaining_time": "1 day, 12:05:24", "throughput": 382.84, "total_tokens": 4398784}
|
| 667 |
+
{"current_steps": 3255, "total_steps": 40000, "loss": 5.6925, "lr": 0.2951279583066179, "epoch": 0.10418667178797772, "percentage": 8.14, "elapsed_time": "3:11:31", "remaining_time": "1 day, 12:02:09", "throughput": 383.35, "total_tokens": 4405376}
|
| 668 |
+
{"current_steps": 3260, "total_steps": 40000, "loss": 4.9893, "lr": 0.2951130562299798, "epoch": 0.10434671275846617, "percentage": 8.15, "elapsed_time": "3:11:33", "remaining_time": "1 day, 11:58:56", "throughput": 383.86, "total_tokens": 4412112}
|
| 669 |
+
{"current_steps": 3265, "total_steps": 40000, "loss": 6.1722, "lr": 0.29509813177508487, "epoch": 0.10450675372895461, "percentage": 8.16, "elapsed_time": "3:11:36", "remaining_time": "1 day, 11:55:43", "throughput": 384.38, "total_tokens": 4418816}
|
| 670 |
+
{"current_steps": 3270, "total_steps": 40000, "loss": 5.5052, "lr": 0.2950831849442346, "epoch": 0.10466679469944305, "percentage": 8.18, "elapsed_time": "3:11:38", "remaining_time": "1 day, 11:52:30", "throughput": 384.89, "total_tokens": 4425472}
|
| 671 |
+
{"current_steps": 3275, "total_steps": 40000, "loss": 5.7296, "lr": 0.2950682157397339, "epoch": 0.1048268356699315, "percentage": 8.19, "elapsed_time": "3:11:40", "remaining_time": "1 day, 11:49:18", "throughput": 385.4, "total_tokens": 4432176}
|
| 672 |
+
{"current_steps": 3280, "total_steps": 40000, "loss": 6.2374, "lr": 0.2950532241638914, "epoch": 0.10498687664041995, "percentage": 8.2, "elapsed_time": "3:11:42", "remaining_time": "1 day, 11:46:07", "throughput": 385.92, "total_tokens": 4438928}
|
| 673 |
+
{"current_steps": 3285, "total_steps": 40000, "loss": 6.0145, "lr": 0.2950382102190188, "epoch": 0.10514691761090839, "percentage": 8.21, "elapsed_time": "3:11:44", "remaining_time": "1 day, 11:42:56", "throughput": 386.4, "total_tokens": 4445216}
|
| 674 |
+
{"current_steps": 3290, "total_steps": 40000, "loss": 5.4508, "lr": 0.2950231739074316, "epoch": 0.10530695858139684, "percentage": 8.22, "elapsed_time": "3:11:46", "remaining_time": "1 day, 11:39:45", "throughput": 386.9, "total_tokens": 4451760}
|
| 675 |
+
{"current_steps": 3295, "total_steps": 40000, "loss": 5.5077, "lr": 0.29500811523144843, "epoch": 0.10546699955188528, "percentage": 8.24, "elapsed_time": "3:11:48", "remaining_time": "1 day, 11:36:36", "throughput": 387.44, "total_tokens": 4458752}
|
| 676 |
+
{"current_steps": 3300, "total_steps": 40000, "loss": 6.0674, "lr": 0.2949930341933917, "epoch": 0.10562704052237373, "percentage": 8.25, "elapsed_time": "3:11:50", "remaining_time": "1 day, 11:33:27", "throughput": 387.97, "total_tokens": 4465632}
|
| 677 |
+
{"current_steps": 3305, "total_steps": 40000, "loss": 6.0529, "lr": 0.29497793079558693, "epoch": 0.10578708149286217, "percentage": 8.26, "elapsed_time": "3:11:52", "remaining_time": "1 day, 11:30:18", "throughput": 388.48, "total_tokens": 4472224}
|
| 678 |
+
{"current_steps": 3310, "total_steps": 40000, "loss": 5.5106, "lr": 0.2949628050403633, "epoch": 0.10594712246335061, "percentage": 8.28, "elapsed_time": "3:11:54", "remaining_time": "1 day, 11:27:10", "throughput": 388.97, "total_tokens": 4478624}
|
| 679 |
+
{"current_steps": 3315, "total_steps": 40000, "loss": 5.0853, "lr": 0.2949476569300535, "epoch": 0.10610716343383907, "percentage": 8.29, "elapsed_time": "3:11:56", "remaining_time": "1 day, 11:24:03", "throughput": 389.49, "total_tokens": 4485488}
|
| 680 |
+
{"current_steps": 3320, "total_steps": 40000, "loss": 6.4041, "lr": 0.29493248646699344, "epoch": 0.1062672044043275, "percentage": 8.3, "elapsed_time": "3:11:58", "remaining_time": "1 day, 11:20:57", "throughput": 390.05, "total_tokens": 4492736}
|
| 681 |
+
{"current_steps": 3325, "total_steps": 40000, "loss": 5.7114, "lr": 0.29491729365352265, "epoch": 0.10642724537481596, "percentage": 8.31, "elapsed_time": "3:12:00", "remaining_time": "1 day, 11:17:51", "throughput": 390.57, "total_tokens": 4499504}
|
| 682 |
+
{"current_steps": 3330, "total_steps": 40000, "loss": 5.8162, "lr": 0.29490207849198397, "epoch": 0.1065872863453044, "percentage": 8.33, "elapsed_time": "3:12:02", "remaining_time": "1 day, 11:14:45", "throughput": 391.08, "total_tokens": 4506192}
|
| 683 |
+
{"current_steps": 3335, "total_steps": 40000, "loss": 5.4379, "lr": 0.29488684098472384, "epoch": 0.10674732731579284, "percentage": 8.34, "elapsed_time": "3:12:04", "remaining_time": "1 day, 11:11:40", "throughput": 391.59, "total_tokens": 4512880}
|
| 684 |
+
{"current_steps": 3340, "total_steps": 40000, "loss": 5.2765, "lr": 0.2948715811340921, "epoch": 0.10690736828628129, "percentage": 8.35, "elapsed_time": "3:12:06", "remaining_time": "1 day, 11:08:36", "throughput": 392.12, "total_tokens": 4519824}
|
| 685 |
+
{"current_steps": 3345, "total_steps": 40000, "loss": 6.32, "lr": 0.294856298942442, "epoch": 0.10706740925676973, "percentage": 8.36, "elapsed_time": "3:12:08", "remaining_time": "1 day, 11:05:32", "throughput": 392.66, "total_tokens": 4526880}
|
| 686 |
+
{"current_steps": 3350, "total_steps": 40000, "loss": 5.5361, "lr": 0.2948409944121302, "epoch": 0.10722745022725817, "percentage": 8.38, "elapsed_time": "3:12:10", "remaining_time": "1 day, 11:02:30", "throughput": 393.23, "total_tokens": 4534304}
|
| 687 |
+
{"current_steps": 3355, "total_steps": 40000, "loss": 5.4795, "lr": 0.29482566754551687, "epoch": 0.10738749119774663, "percentage": 8.39, "elapsed_time": "3:12:12", "remaining_time": "1 day, 10:59:27", "throughput": 393.74, "total_tokens": 4540880}
|
| 688 |
+
{"current_steps": 3360, "total_steps": 40000, "loss": 5.937, "lr": 0.2948103183449656, "epoch": 0.10754753216823507, "percentage": 8.4, "elapsed_time": "3:12:14", "remaining_time": "1 day, 10:56:24", "throughput": 394.24, "total_tokens": 4547520}
|
| 689 |
+
{"current_steps": 3365, "total_steps": 40000, "loss": 6.1634, "lr": 0.2947949468128435, "epoch": 0.10770757313872352, "percentage": 8.41, "elapsed_time": "3:12:16", "remaining_time": "1 day, 10:53:22", "throughput": 394.76, "total_tokens": 4554240}
|
| 690 |
+
{"current_steps": 3370, "total_steps": 40000, "loss": 6.0973, "lr": 0.2947795529515209, "epoch": 0.10786761410921196, "percentage": 8.43, "elapsed_time": "3:12:18", "remaining_time": "1 day, 10:50:21", "throughput": 395.27, "total_tokens": 4561040}
|
| 691 |
+
{"current_steps": 3375, "total_steps": 40000, "loss": 5.5619, "lr": 0.29476413676337193, "epoch": 0.1080276550797004, "percentage": 8.44, "elapsed_time": "3:12:20", "remaining_time": "1 day, 10:47:21", "throughput": 395.8, "total_tokens": 4567920}
|
| 692 |
+
{"current_steps": 3380, "total_steps": 40000, "loss": 5.2372, "lr": 0.2947486982507738, "epoch": 0.10818769605018885, "percentage": 8.45, "elapsed_time": "3:12:23", "remaining_time": "1 day, 10:44:20", "throughput": 396.31, "total_tokens": 4574592}
|
| 693 |
+
{"current_steps": 3385, "total_steps": 40000, "loss": 6.1078, "lr": 0.29473323741610735, "epoch": 0.10834773702067729, "percentage": 8.46, "elapsed_time": "3:12:25", "remaining_time": "1 day, 10:41:20", "throughput": 396.81, "total_tokens": 4581184}
|
| 694 |
+
{"current_steps": 3390, "total_steps": 40000, "loss": 6.0475, "lr": 0.2947177542617569, "epoch": 0.10850777799116573, "percentage": 8.48, "elapsed_time": "3:12:27", "remaining_time": "1 day, 10:38:21", "throughput": 397.33, "total_tokens": 4587968}
|
| 695 |
+
{"current_steps": 3395, "total_steps": 40000, "loss": 5.4173, "lr": 0.2947022487901101, "epoch": 0.10866781896165419, "percentage": 8.49, "elapsed_time": "3:12:29", "remaining_time": "1 day, 10:35:22", "throughput": 397.81, "total_tokens": 4594368}
|
| 696 |
+
{"current_steps": 3400, "total_steps": 40000, "loss": 6.1053, "lr": 0.2946867210035581, "epoch": 0.10882785993214263, "percentage": 8.5, "elapsed_time": "3:12:31", "remaining_time": "1 day, 10:32:25", "throughput": 398.38, "total_tokens": 4601728}
|
| 697 |
+
{"current_steps": 3400, "total_steps": 40000, "eval_loss": 5.716908931732178, "epoch": 0.10882785993214263, "percentage": 8.5, "elapsed_time": "3:23:05", "remaining_time": "1 day, 12:26:14", "throughput": 377.64, "total_tokens": 4601728}
|